diff --git a/fla/models/delta_net/__pycache__/__init__.cpython-312.pyc b/fla/models/delta_net/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06816586a52cfd70b8ea61e5bf1b04405f07d4e7 Binary files /dev/null and b/fla/models/delta_net/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/delta_net/__pycache__/configuration_delta_net.cpython-312.pyc b/fla/models/delta_net/__pycache__/configuration_delta_net.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b369400dc646c06fa4a9dfc5cd6a58adea10365 Binary files /dev/null and b/fla/models/delta_net/__pycache__/configuration_delta_net.cpython-312.pyc differ diff --git a/fla/models/delta_net/__pycache__/modeling_delta_net.cpython-312.pyc b/fla/models/delta_net/__pycache__/modeling_delta_net.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9344119efb80ac32f666d06ea1f5813426e97f20 Binary files /dev/null and b/fla/models/delta_net/__pycache__/modeling_delta_net.cpython-312.pyc differ diff --git a/fla/models/gated_deltanet/__pycache__/configuration_gated_deltanet.cpython-312.pyc b/fla/models/gated_deltanet/__pycache__/configuration_gated_deltanet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b169999bd0b1009eaf15ba44d8f5ac739282f2bc Binary files /dev/null and b/fla/models/gated_deltanet/__pycache__/configuration_gated_deltanet.cpython-312.pyc differ diff --git a/fla/models/gated_deltanet/modeling_gated_deltanet.py b/fla/models/gated_deltanet/modeling_gated_deltanet.py new file mode 100644 index 0000000000000000000000000000000000000000..64d8f4d4f7cb22ed536a91ccde9c64d33ad7b2d8 --- /dev/null +++ b/fla/models/gated_deltanet/modeling_gated_deltanet.py @@ -0,0 +1,412 @@ +# -*- coding: utf-8 -*- + +from __future__ import annotations + +import math +import warnings +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union + +import torch +import torch.nn as nn +import torch.utils.checkpoint +from transformers.generation import GenerationMixin +from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast +from transformers.modeling_utils import PreTrainedModel +from transformers.utils import logging +from transformers.utils.deprecation import deprecate_kwarg + +from fla.layers.attn import Attention +from fla.layers.gated_deltanet import GatedDeltaNet +from fla.models.gated_deltanet.configuration_gated_deltanet import GatedDeltaNetConfig +from fla.models.utils import Cache +from fla.modules import FusedCrossEntropyLoss, FusedLinearCrossEntropyLoss +from fla.modules import GatedMLP as GatedDeltaNetMLP +from fla.modules import RMSNorm + +if TYPE_CHECKING: + from transformers.processing_utils import Unpack + + +logger = logging.get_logger(__name__) + + +class GatedDeltaNetBlock(nn.Module): + def __init__(self, config: GatedDeltaNetConfig, layer_idx: int): + super().__init__() + + self.config = config + self.layer_idx = layer_idx + + self.attn_norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps) + if config.attn is not None and layer_idx in config.attn['layers']: + self.attn = Attention( + hidden_size=config.hidden_size, + num_heads=config.attn['num_heads'], + num_kv_heads=config.attn['num_kv_heads'], + qkv_bias=config.attn['qkv_bias'], + window_size=config.attn['window_size'], + rope_theta=config.attn['rope_theta'], + max_position_embeddings=config.max_position_embeddings, + layer_idx=layer_idx + ) + else: + self.attn = GatedDeltaNet( + mode=config.attn_mode, + hidden_size=config.hidden_size, + expand_v=config.expand_v, + head_dim=config.head_dim, + num_heads=config.num_heads, + use_gate=config.use_gate, + use_short_conv=config.use_short_conv, + conv_size=config.conv_size, + norm_eps=config.norm_eps, + layer_idx=layer_idx + ) + self.mlp_norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps) + self.mlp = GatedDeltaNetMLP( + hidden_size=config.hidden_size, + hidden_ratio=config.hidden_ratio, + intermediate_size=config.intermediate_size, + hidden_act=config.hidden_act, + fuse_swiglu=config.fuse_swiglu + ) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + use_cache: Optional[bool] = False, + output_attentions: Optional[bool] = False, + **kwargs: Unpack[Dict] + ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: + residual = hidden_states + hidden_states = self.attn_norm(hidden_states) + hidden_states, attentions, past_key_values = self.attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + **kwargs + ) + if self.config.fuse_norm: + hidden_states, residual = self.mlp_norm(hidden_states, residual, True) + else: + hidden_states = residual + hidden_states + residual = hidden_states + hidden_states = self.mlp_norm(hidden_states) + hidden_states = self.mlp(hidden_states, **kwargs) + hidden_states = residual + hidden_states + + outputs = (hidden_states, attentions, past_key_values) + + return outputs + + +class GatedDeltaNetPreTrainedModel(PreTrainedModel): + + config_class = GatedDeltaNetConfig + base_model_prefix = 'model' + supports_gradient_checkpointing = True + _no_split_modules = ['GatedDeltaNetBlock'] + _supports_cache_class = True + + def __init__(self, *inputs, **kwargs): + super().__init__(*inputs, **kwargs) + + def _init_weights( + self, + module: nn.Module, + prenorm_residual_strategy: Optional[str] = 'rescale', + num_residuals_per_layer: int = 2, + ): + if isinstance(module, (nn.Linear, nn.Conv1d)): + # Slightly different from the TF version which uses truncated_normal for initialization + # cf https://github.com/pytorch/pytorch/pull/5617 + nn.init.normal_(module.weight, mean=0.0, std=self.config.initializer_range) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, nn.Embedding): + nn.init.normal_(module.weight, mean=0.0, std=self.config.initializer_range) + elif hasattr(module, 'reset_parameters'): + module.reset_parameters() + + if prenorm_residual_strategy is not None: + # Reinitialize selected weights subject to the OpenAI GPT-2 Paper Scheme: + # > A modified initialization which accounts for the accumulation on the residual path with model depth. Scale + # > the weights of residual layers at initialization by a factor of 1/√N where N is the # of residual layers. + # > -- GPT-2 :: https://openai.com/blog/better-language-models/ + # + # Reference (Megatron-LM): https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/model/gpt_model.py + p = None + if hasattr(module, 'o_proj'): + p = module.o_proj.weight + elif hasattr(module, 'down_proj'): + p = module.down_proj.weight + if p is not None: + # Special Scaled Initialization --> There are 2 Layer Norms per Transformer Block + # Following Pytorch init, except scale by 1/sqrt(2 * n_layer) + # We need to reinit p since this code could be called multiple times + # Having just p *= scale would repeatedly scale it down + if prenorm_residual_strategy == 'rescale': + nn.init.kaiming_uniform_(p, a=math.sqrt(5)) + with torch.no_grad(): + p /= math.sqrt(num_residuals_per_layer * self.config.num_hidden_layers) + elif prenorm_residual_strategy == 'zero': + nn.init.zeros_(p) + else: + raise ValueError(f"Invalid prenorm_residual_strategy: {prenorm_residual_strategy}") + + +class GatedDeltaNetModel(GatedDeltaNetPreTrainedModel): + + def __init__(self, config: GatedDeltaNetConfig): + super().__init__(config) + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + + self.embeddings = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx) + self.layers = nn.ModuleList([GatedDeltaNetBlock(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]) + self.norm = (RMSNorm if config.fuse_norm else nn.RMSNorm)(config.hidden_size, eps=config.norm_eps) + + self.gradient_checkpointing = False + + self.post_init() + + def get_input_embeddings(self): + return self.embeddings + + def set_input_embeddings(self, value): + self.embeddings = value + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, # noqa + inputs_embeds: Optional[torch.FloatTensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + **kwargs: Unpack[Dict] + ) -> Union[Tuple, BaseModelOutputWithPast]: + if output_attentions: + warnings.warn("`GatedDeltaNetModel` does not `output_attentions` now, setting it to `False`.") + output_attentions = False + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + use_cache = use_cache if use_cache is not None else (self.config.use_cache if not self.training else False) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") + if input_ids is None and inputs_embeds is None: + raise ValueError("You have to specify either input_ids or inputs_embeds") + + if inputs_embeds is None: + inputs_embeds = self.embeddings(input_ids) + hidden_states = inputs_embeds + + if use_cache and not isinstance(past_key_values, Cache): + past_key_values = Cache.from_legacy_cache(past_key_values) + + if self.gradient_checkpointing and self.training and use_cache: + logger.warning_once("`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...") + use_cache = False + + all_hidden_states = () if output_hidden_states else None + all_attns = () if output_attentions else None + for layer in self.layers: + if output_hidden_states: + all_hidden_states += (hidden_states,) + + if self.gradient_checkpointing and self.training: + hidden_states, attentions, past_key_values = self._gradient_checkpointing_func( + layer.__call__, + hidden_states, + attention_mask, + past_key_values, + use_cache, + output_attentions, + **kwargs + ) + else: + hidden_states, attentions, past_key_values = layer( + hidden_states, + attention_mask=attention_mask, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + **kwargs + ) + + if output_attentions: + all_attns += (attentions,) + + hidden_states = self.norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + if not return_dict: + return tuple(i for i in [hidden_states, past_key_values, all_hidden_states, all_attns] if i is not None) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=past_key_values, + hidden_states=all_hidden_states, + attentions=all_attns + ) + + +class GatedDeltaNetForCausalLM(GatedDeltaNetPreTrainedModel, GenerationMixin): + + _tied_weights_keys = ["lm_head.weight"] + + def __init__(self, config): + super().__init__(config) + self.model = GatedDeltaNetModel(config) + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + self.criterion = None + + # Initialize weights and apply final processing + self.post_init() + + def get_input_embeddings(self): + return self.model.embeddings + + def set_input_embeddings(self, value): + self.model.embeddings = value + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + + def set_decoder(self, decoder): + self.model = decoder + + def get_decoder(self): + return self.model + + def generate(self, *args, **kwargs): + try: + return super().generate(*args, **kwargs) + except AttributeError as exception: + if 'past_key_values' in str(exception): + raise AttributeError( + f"You tried to call `generate` with a decoding strategy that manipulates `past_key_values`, " + f"which is not supported for {self.__class__.__name__}. " + f"Try another generation strategy instead. " + f"For the available generation strategies, check this doc: " + f"https://huggingface.co/docs/transformers/en/generation_strategies#decoding-strategies" + ) + else: + raise exception + + @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep") + def prepare_inputs_for_generation( + self, + input_ids: torch.LongTensor = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + attention_mask: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, + use_cache: bool = True, + logits_to_keep: Optional[int] = None, + **kwargs + ): + # only last token for `inputs_ids` if the `past_key_values` is not empty. + if past_key_values is not None and len(past_key_values) > 0: + input_ids = input_ids[:, -1:] + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and len(past_key_values) == 0: + model_inputs = {'inputs_embeds': inputs_embeds} + else: + # The `contiguous()` here is necessary to have a static stride during decoding. torchdynamo otherwise + # recompiles graphs as the stride of the inputs is a guard. + # Ref: https://github.com/huggingface/transformers/pull/29114 + # TODO: use `next_tokens` directly instead. + model_inputs = {'input_ids': input_ids.contiguous()} + + if logits_to_keep is not None: + model_inputs['logits_to_keep'] = logits_to_keep + + model_inputs.update({ + 'past_key_values': past_key_values, + 'use_cache': use_cache, + 'attention_mask': attention_mask, + }) + return model_inputs + + @deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep") + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.Tensor] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + logits_to_keep: Optional[int] = 0, + **kwargs: Unpack[Dict] + ) -> Union[Tuple, CausalLMOutputWithPast]: + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + inputs_embeds=inputs_embeds, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + **kwargs + ) + + hidden_states = outputs[0] + fuse_linear_and_cross_entropy = self.config.fuse_cross_entropy and self.training + + loss, logits = None, None + if not fuse_linear_and_cross_entropy or labels is None: + logits = self.lm_head(hidden_states if logits_to_keep is None else hidden_states[:, -logits_to_keep:]) + if labels is not None: + if getattr(self, 'criterion', None) is None: + if fuse_linear_and_cross_entropy: + criterion = FusedLinearCrossEntropyLoss() + elif self.config.fuse_cross_entropy: + criterion = FusedCrossEntropyLoss(inplace_backward=True) + else: + criterion = nn.CrossEntropyLoss() + else: + criterion = self.criterion + labels = labels.to(hidden_states.device) + labels = torch.cat((labels[..., 1:], torch.full_like(labels[:, :1], criterion.ignore_index)), 1) + if fuse_linear_and_cross_entropy: + loss = criterion(hidden_states, labels, self.lm_head.weight, self.lm_head.bias) + else: + loss = criterion(logits.view(labels.numel(), -1), labels.view(-1)) + + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/fla/models/gated_deltaproduct/__pycache__/__init__.cpython-312.pyc b/fla/models/gated_deltaproduct/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..826ef262188ca1b523459fb1280c1f69747b14cf Binary files /dev/null and b/fla/models/gated_deltaproduct/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/gated_deltaproduct/__pycache__/configuration_gated_deltaproduct.cpython-312.pyc b/fla/models/gated_deltaproduct/__pycache__/configuration_gated_deltaproduct.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd3d2b181536d4b16e58b23413d6e6814e86041c Binary files /dev/null and b/fla/models/gated_deltaproduct/__pycache__/configuration_gated_deltaproduct.cpython-312.pyc differ diff --git a/fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc b/fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e17f17f828368b99dfe69fc7e6ad8ca084f293b5 Binary files /dev/null and b/fla/models/gla/__pycache__/configuration_gla.cpython-312.pyc differ diff --git a/fla/models/gsa/__pycache__/__init__.cpython-312.pyc b/fla/models/gsa/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..917d30bac46dd2cb4e1b693665f1c1e3fbcd6276 Binary files /dev/null and b/fla/models/gsa/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/gsa/__pycache__/configuration_gsa.cpython-312.pyc b/fla/models/gsa/__pycache__/configuration_gsa.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c92b18bc40269ecd42ab44314e9caadbca3d2865 Binary files /dev/null and b/fla/models/gsa/__pycache__/configuration_gsa.cpython-312.pyc differ diff --git a/fla/models/hgrn2/__pycache__/modeling_hgrn2.cpython-312.pyc b/fla/models/hgrn2/__pycache__/modeling_hgrn2.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d7e10a8155e81c9eac01a7cb265b90c8f229b77 Binary files /dev/null and b/fla/models/hgrn2/__pycache__/modeling_hgrn2.cpython-312.pyc differ diff --git a/fla/models/lightnet/__pycache__/configuration_lightnet.cpython-312.pyc b/fla/models/lightnet/__pycache__/configuration_lightnet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee2044d5ca4c3d82fa8b2cc5dc504903f1ef54ad Binary files /dev/null and b/fla/models/lightnet/__pycache__/configuration_lightnet.cpython-312.pyc differ diff --git a/fla/models/lightnet/__pycache__/modeling_lightnet.cpython-312.pyc b/fla/models/lightnet/__pycache__/modeling_lightnet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0c900725bbd3961f84ce4f15930c960a6902b171 Binary files /dev/null and b/fla/models/lightnet/__pycache__/modeling_lightnet.cpython-312.pyc differ diff --git a/fla/models/linear_attn/__pycache__/configuration_linear_attn.cpython-312.pyc b/fla/models/linear_attn/__pycache__/configuration_linear_attn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbdd8e34d3d9282d50b38b74988cb12d706b216e Binary files /dev/null and b/fla/models/linear_attn/__pycache__/configuration_linear_attn.cpython-312.pyc differ diff --git a/fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc b/fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b431f68259f3c9301390c7fdc475853c775d612 Binary files /dev/null and b/fla/models/linear_attn/__pycache__/modeling_linear_attn.cpython-312.pyc differ diff --git a/fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc b/fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4a1cd37e2fdc2308bd511fbf03275b4114953af9 Binary files /dev/null and b/fla/models/mamba/__pycache__/configuration_mamba.cpython-312.pyc differ diff --git a/fla/models/mamba2/configuration_mamba2.py b/fla/models/mamba2/configuration_mamba2.py new file mode 100644 index 0000000000000000000000000000000000000000..4541257e687c5ca5121cb2eb92ea190839935345 --- /dev/null +++ b/fla/models/mamba2/configuration_mamba2.py @@ -0,0 +1,170 @@ +# Copyright 2024 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MAMBA2 configuration""" + +import math + +from transformers.configuration_utils import PretrainedConfig + + +class Mamba2Config(PretrainedConfig): + """ + This is the configuration class to store the configuration of a [`Mamba2Model`]. It is used to instantiate a MAMBA2 + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the MAMBA2 + [state-spaces/mamba2-2.8b](https://huggingface.co/state-spaces/mamba2-2.8b) architecture. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + num_heads (`int`, *optional*, defaults to 64): + Number of heads for the evolution matrices of mamba 2. + head_dim (`int`, *optional*, defaults to 64): + Dimension of each head. + vocab_size (`int`, *optional*, defaults to 32768): + Vocabulary size of the MAMBA2 model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`Mamba2Model`]. + hidden_size (`int`, *optional*, defaults to 2048): + Dimensionality of the embeddings and hidden states. + state_size (`int`, *optional*, defaults to 128): shape of the state space latents. + num_hidden_layers (`int`, *optional*, defaults to 48): + Number of hidden layers in the model. + layer_norm_epsilon (`float`, *optional*, defaults to 1e-05): + The epsilon to use in the layer normalization layers. + pad_token_id (`int`, *optional*, defaults to 0): + Padding token id. + bos_token_id (`int`, *optional*, defaults to 1): + The id of the beginning of sentence token in the vocabulary. + eos_token_id (`int`, *optional*, defaults to 2): + The id of the end of sentence token in the vocabulary. + expand (`int`, *optional*, defaults to 2): Expanding factor used to determine the intermediate size. + conv_kernel (`int`, *optional*, defaults to 4): Size of the convolution kernel. + n_groups (`int`, *optional*, defaults to 1): + Number of groups for the evolution matrices of mamba 2. + use_bias (`bool`, *optional*, defaults to `False`): + Whether or not to use bias in ["in_proj", "out_proj"] of the mixer block + use_conv_bias (`bool`, *optional*, defaults to `True`): + Whether or not to use bias in the convolution layer of the mixer block. + hidden_act (`str`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + initializer_range (`float`, *optional*, defaults to 0.1): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + residual_in_fp32 (`bool`, *optional*, defaults to `True`): + Whether or not residuals should be in `float32`. + If set to `False` residuals will keep the same `dtype` as the rest of the model + time_step_rank (`Union[int,str]`, *optional*, defaults to `"auto"`): + Rank of the discretization projection matrix. + `"auto"` means that it will default to `math.ceil(self.hidden_size / 16)` + time_step_min (`float`, *optional*, defaults to 0.001): + Minimum `time_step` used to bound `dt_proj.bias`. + time_step_max (`float`, *optional*, defaults to 0.1): + Maximum `time_step` used to bound `dt_proj.bias`. + time_step_floor (`float`, *optional*, defaults to 0.0001): + Minimum clamping value of the `dt_proj.bias` layer initialization. + time_step_limit (`tuple`, *optional*, defaults to `(0.0, inf)`): + Accepted range of time step values. + rescale_prenorm_residual (`bool`, *optional*, defaults to `True`): + Whether or not to rescale `out_proj` weights when initializing. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the cache should be used. + rms_norm (`bool`, *optional*, defaults to `True`): + Whether to use RMS norm or not. + chunk_size (`int`, *optional*, defaults to 256): + Size of the chunks that will comprise the sequence. + tie_word_embeddings (`bool`, *optional*, defaults to `False`): + Whether to tie word embeddings or not. + """ + + model_type = "mamba2" + + def __init__( + self, + num_heads: int = 64, + head_dim: int = 64, + vocab_size: int = 32000, + hidden_size: int = 2048, + state_size: int = 128, + num_hidden_layers: int = 48, + layer_norm_epsilon: float = 1e-5, + pad_token_id: int = 0, + bos_token_id: int = 1, + eos_token_id: int = 2, + expand: int = 2, + conv_kernel: int = 4, + n_groups: int = 1, + use_bias: bool = False, + use_conv_bias: bool = True, + hidden_act: str = "silu", + initializer_range: float = 0.1, + residual_in_fp32: bool = True, + time_step_rank: str = "auto", + time_step_min: float = 0.001, + time_step_max: float = 0.1, + time_step_floor: float = 1e-4, + time_step_limit=(0.0, float("inf")), + rescale_prenorm_residual: bool = True, + use_cache: bool = True, + rms_norm: bool = True, + chunk_size: int = 256, + fuse_norm: bool = True, + fuse_cross_entropy: bool = True, + tie_word_embeddings: bool = False, + **kwargs, + ): + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.state_size = state_size + self.num_hidden_layers = num_hidden_layers + self.layer_norm_epsilon = layer_norm_epsilon + self.conv_kernel = conv_kernel + self.expand = expand + + self.bos_token_id = bos_token_id + self.eos_token_id = eos_token_id + self.pad_token_id = pad_token_id + self.use_bias = use_bias + self.use_conv_bias = use_conv_bias + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.time_step_rank = ( + math.ceil(self.hidden_size / 16) + if time_step_rank == "auto" + else time_step_rank + ) + self.time_step_min = time_step_min + self.time_step_max = time_step_max + self.time_step_floor = time_step_floor + self.rescale_prenorm_residual = rescale_prenorm_residual + self.residual_in_fp32 = residual_in_fp32 + self.use_cache = use_cache + self.n_groups = n_groups + self.num_heads = num_heads + self.head_dim = head_dim + self.rms_norm = rms_norm + self.state_size = state_size + self.chunk_size = chunk_size + self.time_step_limit = time_step_limit + self.fuse_norm = fuse_norm + self.fuse_cross_entropy = fuse_cross_entropy + self.tie_word_embeddings = tie_word_embeddings + + super().__init__( + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + pad_token_id=pad_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) diff --git a/fla/models/retnet/__pycache__/__init__.cpython-312.pyc b/fla/models/retnet/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97f9813efbfae66602664fbeaa5cdca3280a2d2c Binary files /dev/null and b/fla/models/retnet/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/retnet/__pycache__/modeling_retnet.cpython-312.pyc b/fla/models/retnet/__pycache__/modeling_retnet.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8aa3e7dfa66612edae585bb4ecac0474cb9c77f2 Binary files /dev/null and b/fla/models/retnet/__pycache__/modeling_retnet.cpython-312.pyc differ diff --git a/fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc b/fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d4168068436d69c2f825f5e7d333523b3dda926 Binary files /dev/null and b/fla/models/rwkv6/__pycache__/configuration_rwkv6.cpython-312.pyc differ diff --git a/fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc b/fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..00b8580825b466afa08caa9fad932df25fca2845 Binary files /dev/null and b/fla/models/rwkv6/__pycache__/modeling_rwkv6.cpython-312.pyc differ diff --git a/fla/models/rwkv7/__pycache__/__init__.cpython-312.pyc b/fla/models/rwkv7/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ef17492e7b007c56ba6c33189aded2c0979f005d Binary files /dev/null and b/fla/models/rwkv7/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/rwkv7/__pycache__/modeling_rwkv7.cpython-312.pyc b/fla/models/rwkv7/__pycache__/modeling_rwkv7.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e8878b056dbe85d9ee0d9a762478be82f681899 Binary files /dev/null and b/fla/models/rwkv7/__pycache__/modeling_rwkv7.cpython-312.pyc differ diff --git a/fla/models/transformer/__pycache__/__init__.cpython-312.pyc b/fla/models/transformer/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4870acbae2712350041cec9f33217909a4cb566a Binary files /dev/null and b/fla/models/transformer/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/transformer/__pycache__/configuration_transformer.cpython-312.pyc b/fla/models/transformer/__pycache__/configuration_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c4ed08d5b8951a0c74a0552e86adb60d0ccb23c5 Binary files /dev/null and b/fla/models/transformer/__pycache__/configuration_transformer.cpython-312.pyc differ diff --git a/fla/models/transformer_dsmtp/__pycache__/__init__.cpython-312.pyc b/fla/models/transformer_dsmtp/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..92e44d632e7bd3ab507c2c49d2e262735a16a0dc Binary files /dev/null and b/fla/models/transformer_dsmtp/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/models/transformer_dsmtp/__pycache__/configuration_transformer.cpython-312.pyc b/fla/models/transformer_dsmtp/__pycache__/configuration_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6481732d8f392e1fa5706fd72440b67797c344e Binary files /dev/null and b/fla/models/transformer_dsmtp/__pycache__/configuration_transformer.cpython-312.pyc differ diff --git a/fla/models/transformer_dsmtp/__pycache__/modeling_transformer.cpython-312.pyc b/fla/models/transformer_dsmtp/__pycache__/modeling_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d8423e62accbda5864bcad2408463e655ec6f36 Binary files /dev/null and b/fla/models/transformer_dsmtp/__pycache__/modeling_transformer.cpython-312.pyc differ diff --git a/fla/models/transformer_top/__pycache__/configuration_transformer.cpython-312.pyc b/fla/models/transformer_top/__pycache__/configuration_transformer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffab12445fe64ab233196cda84138055b5a7cbd7 Binary files /dev/null and b/fla/models/transformer_top/__pycache__/configuration_transformer.cpython-312.pyc differ diff --git a/fla/ops/__pycache__/__init__.cpython-312.pyc b/fla/ops/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..182b2f77c3e57c2ad3f3807c4d6807128edbc689 Binary files /dev/null and b/fla/ops/__pycache__/__init__.cpython-312.pyc differ diff --git a/fla/ops/abc/chunk.py b/fla/ops/abc/chunk.py new file mode 100644 index 0000000000000000000000000000000000000000..8538e04800cd71414782ff72668df1fbd97984b1 --- /dev/null +++ b/fla/ops/abc/chunk.py @@ -0,0 +1,1116 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + +from fla.ops.utils import logcumsumexp_fwd_kernel, softmax_bwd, softmax_fwd +from fla.ops.utils.op import exp +from fla.utils import input_guard + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_fwd_kernel_h( + k, + v, + z, + h, + h0, + ht, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr, + NORMK: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + STORE_FINAL_STATE: tl.constexpr +): + i_v, i_k, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + + b_h = tl.zeros([BK, BV], dtype=tl.float32) + if USE_INITIAL_STATE: + p_h = tl.make_block_ptr(h0 + i_bh * K * V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + b_h += tl.load(p_h, boundary_check=(0, 1)).to(tl.float32) + if NORMK: + p_z0 = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), (i_k * BK,), (BK,), (0,)) + else: + p_z0 = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), (i_v * BV,), (BV,), (0,)) + b_zp = tl.load(p_z0).to(tl.float32) + for i_t in range(NT): + p_k = tl.make_block_ptr(k + i_bh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_v = tl.make_block_ptr(v + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_h = tl.make_block_ptr(h + i_bh * NT*K*V + i_t * K * V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + + tl.store(p_h, b_h.to(p_h.dtype.element_ty), boundary_check=(0, 1)) + # [BK, BT] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + if NORMK: + p_zc = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), ((i_t * BT + BT - 1) * K + i_k * BK,), (BK,), (0,)) + # [BK,] + b_zc = tl.load(p_zc, boundary_check=(0,)) + b_r, b_zp = exp(b_zp - b_zc), b_zc + # [BK, BV] + b_h = b_h * b_r[:, None] + b_k = exp(b_k - b_zc[:, None]).to(b_k.dtype) + else: + p_zc = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), ((i_t * BT + BT - 1) * V + i_v * BV,), (BV,), (0,)) + # [BV,] + b_zc = tl.load(p_zc, boundary_check=(0,)) + b_r, b_zp = exp(b_zp - b_zc), b_zc + # [BK, BV] + b_h = b_h * b_r[None, :] + b_v = exp(b_v - b_zc[None, :]).to(b_v.dtype) + # [BK, BV] + b_h += tl.dot(b_k, b_v, allow_tf32=False) + + if STORE_FINAL_STATE: + p_h = tl.make_block_ptr(ht + i_bh * K * V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_h, b_h.to(p_h.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_fwd_kernel_intra_K( + v, + z, + o, + A, + T, + V: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BV: tl.constexpr, + NC: tl.constexpr +): + i_v, i_c, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_t, i_i = i_c // NC, i_c % NC + + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), ((i_t * BT + i_i * BC) * V + i_v * BV,), (BV,), (0,)) + # [BV,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + # [BC, BV] + b_o = tl.zeros([BC, BV], dtype=tl.float32) + for i_j in range(0, i_i): + p_A = tl.make_block_ptr(A + i_bh * T * BT, (T, BT), (BT, 1), (i_t * BT + i_i * BC, i_j * BC), (BC, BC), (1, 0)) + p_v = tl.make_block_ptr(v + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_j * BC, i_v * BV), (BC, BV), (1, 0)) + # [BC, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BC, BC] + b_A = tl.load(p_A, boundary_check=(0, 1)) + b_o += tl.dot(b_A, exp(b_v - b_zn[None, :]).to(b_v.dtype), allow_tf32=False) + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_o *= exp(b_zn[None, :] - b_z) + + o_i = tl.arange(0, BC) + o_A = i_bh * T * BT + (i_t * BT + i_i * BC + tl.arange(0, BC)) * BT + i_i * BC + m_A = (i_t * BT + i_i * BC + tl.arange(0, BC)) < T + for j in range(0, BC): + p_v = tl.make_block_ptr(v + i_bh * T*V, (T * V,), (1,), ((i_t * BT + i_i * BC + j) * V + i_v * BV,), (BV,), (0,)) + # [BC,] + b_A = tl.load(A + o_A + j, mask=m_A, other=0) + # [BV,] + b_v = tl.load(p_v, boundary_check=(0,)).to(tl.float32) + # [BC, BV] + # avoid 0 * inf = inf + m_i = o_i[:, None] >= j + b_o += tl.where(m_i, b_A[:, None] * exp(b_v[None, :] - b_z), 0) + p_o = tl.make_block_ptr(o + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_fwd_kernel_K( + q, + k, + z, + h, + o, + A, + scale, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_p = tl.maximum(i_t * BT - 1, 0) + + o_i = tl.arange(0, BT) + m_s = o_i[:, None] >= o_i[None, :] + + b_o = tl.zeros([BT, BV], dtype=tl.float32) + b_A = tl.zeros([BT, BT], dtype=tl.float32) + for i_k in range(tl.cdiv(K, BK)): + p_q = tl.make_block_ptr(q + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_k = tl.make_block_ptr(k + i_bh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_h = tl.make_block_ptr(h + i_bh * NT*K*V + i_t * K * V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BK, BT] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BK, BV] + b_h = tl.load(p_h, boundary_check=(0, 1)) + # [BT, BV] + b_o += tl.dot(b_q, b_h, allow_tf32=False) + # [BT, BT] + b_A += tl.dot(b_q, b_k, allow_tf32=False) + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_o = tl.make_block_ptr(o + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + # [BT, BV] + b_z = tl.load(p_z, boundary_check=(0, 1)) + # [BT, BV] + p_zp = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), (i_p * V + i_v * BV,), (BV,), (0,)) + b_zp = tl.load(p_zp, boundary_check=(0,)) + b_o = b_o * exp(b_zp[None, :] - b_z) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + p_A = tl.make_block_ptr(A + i_bh * T * BT, (T, BT), (BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + # [BT, BT] + b_A = tl.where(m_s, b_A, 0.) + if i_v == 0: + tl.store(p_A, b_A.to(p_A.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_fwd_kernel_intra_V( + q, + k, + z, + A, + scale, + T, + K: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BK: tl.constexpr, + NC: tl.constexpr +): + i_k, i_c, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_t, i_i, i_j = i_c // (NC * NC), (i_c % (NC * NC)) // NC, (i_c % (NC * NC)) % NC + n_bh = tl.num_programs(2) + + if i_i > i_j: + p_q = tl.make_block_ptr(q + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + p_k = tl.make_block_ptr(k + i_bh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT + i_j * BC), (BK, BC), (0, 1)) + p_z = tl.make_block_ptr(z + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + p_A = tl.make_block_ptr(A + (i_k*n_bh+i_bh)*T*BT, (T, BT), (BT, 1), (i_t * BT + i_i * BC, i_j * BC), (BC, BC), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), ((i_t * BT + i_i * BC) * K + i_k * BK,), (BK,), (0,)) + # [BK,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + # [BC, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_q = (b_q * exp(b_zn[None, :] - b_z) * scale).to(b_q.dtype) + # [BK, BC] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_k = exp(b_k - b_zn[:, None]).to(b_k.dtype) + # [BC, BC] + b_A = tl.dot(b_q, b_k, allow_tf32=False) + tl.store(p_A, b_A.to(A.dtype.element_ty), boundary_check=(0, 1)) + elif i_i == i_j: + p_q = tl.make_block_ptr(q + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + p_k = tl.make_block_ptr(k + i_bh * T*K, (T * K,), (1,), ((i_t * BT + i_j * BC) * K + i_k * BK,), (BK,), (0,)) + p_z = tl.make_block_ptr(z + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + # [BC, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_z = tl.load(p_z, boundary_check=(0, 1)) + + o_i = tl.arange(0, BC) + o_A = (i_bh + i_k * n_bh) * T * BT + (i_t * BT + i_i * BC + tl.arange(0, BC)) * BT + i_j * BC + m_A = (i_t * BT + i_i * BC + tl.arange(0, BC)) < T + for j in range(0, BC): + # [BK,] + b_k = tl.load(p_k, boundary_check=(0,)).to(tl.float32) + # [BC,] + b_A = tl.sum(b_q * exp(b_k[None, :] - b_z) * scale, 1) + b_A = tl.where(o_i >= j, b_A, 0.) + tl.store(A + o_A + j, b_A.to(b_q.dtype), mask=m_A) + + p_k = tl.advance(p_k, (K,)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_fwd_kernel_V( + q, + v, + z, + h, + o, + A, + scale, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_p = tl.maximum(i_t * BT - 1, 0) + + b_o = tl.zeros([BT, BV], dtype=tl.float32) + for i_k in range(tl.cdiv(K, BK)): + p_q = tl.make_block_ptr(q + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_z = tl.make_block_ptr(z + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_h = tl.make_block_ptr(h + i_bh * NT*K*V + i_t * K * V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + p_zp = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), (i_p * K + i_k * BK,), (BK,), (0,)) + + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BT, BK] + b_z = tl.load(p_z, boundary_check=(0, 1)) + # [BT, BK] + b_zp = tl.load(p_zp, boundary_check=(0,)) + b_q = (b_q * exp(b_zp[None, :] - b_z)).to(b_q.dtype) + # [BK, BV] + b_h = tl.load(p_h, boundary_check=(0, 1)) + # works but dkw, owing to divine benevolence + # [BT, BV] + if i_k >= 0: + b_o += tl.dot(b_q, b_h, allow_tf32=False) + p_v = tl.make_block_ptr(v + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_o = tl.make_block_ptr(o + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_A = tl.make_block_ptr(A + i_bh * T * BT, (T, BT), (BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BT, BT] + b_A = tl.load(p_A, boundary_check=(0, 1)) + b_o += tl.dot(b_A.to(b_v.dtype), b_v, allow_tf32=False) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_dh( + q, + z, + do, + dh, + scale, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr, + NORMK: tl.constexpr +): + i_k, i_v, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + + b_dh = tl.zeros([BK, BV], dtype=tl.float32) + b_zp = tl.full([BK if NORMK else BV], float('inf'), dtype=tl.float32) + for i_t in range(NT - 1, -1, -1): + i_p = tl.maximum(i_t * BT - 1, 0) + p_q = tl.make_block_ptr(q + i_bh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_do = tl.make_block_ptr(do + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_dh = tl.make_block_ptr(dh + i_bh * NT*K*V + i_t * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + + # [BK, BT] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BT, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + + tl.store(p_dh, b_dh.to(p_dh.dtype.element_ty), boundary_check=(0, 1)) + if NORMK: + p_z = tl.make_block_ptr(z + i_bh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_zc = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), (i_p * K + i_k * BK,), (BK,), (0,)) + # [BK,] + b_zc = tl.load(p_zc, boundary_check=(0,)) + b_r, b_zp = exp(b_zc - b_zp), b_zc + # [BK, BT] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_q = (b_q * exp(b_zc[:, None] - b_z)).to(b_q.dtype) + # [BK, BV] + b_dh = b_dh * b_r[:, None] + else: + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_zc = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), (i_p * V + i_v * BV,), (BV,), (0,)) + # [BV,] + b_zc = tl.load(p_zc, boundary_check=(0,)) + b_r, b_zp = exp(b_zc - b_zp), b_zc + # [BT, BV] + b_z = tl.load(p_z, boundary_check=(0,)) + b_do = (b_do * exp(b_zc[None, :] - b_z)).to(b_do.dtype) + # [BK, BV] + b_dh = b_dh * b_r[None, :] + # [BK, BV] + b_dh += tl.dot(b_q, b_do, allow_tf32=False) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_V( + k, + v, + z, + h, + A, + do, + dh, + dq, + dk, + dv, + dA, + scale, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr +): + i_k, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_p = tl.maximum(i_t * BT - 1, 0) + n_bh = tl.num_programs(2) + + p_k = tl.make_block_ptr(k + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_zc = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), ((i_t * BT + BT - 1) * K + i_k * BK,), (BK,), (0,)) + p_A = tl.make_block_ptr(A + i_bh * T * BT, (BT, T), (1, BT), (0, i_t * BT), (BT, BT), (0, 1)) + + # [BK,] + b_zc = tl.load(p_zc, boundary_check=(0,)) + # [BT, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_k = exp(b_k - b_zc[None, :]).to(b_k.dtype) + # [BT, BT] + b_A = tl.load(p_A, boundary_check=(0, 1)) + + b_dq = tl.zeros([BT, BK], dtype=tl.float32) + b_dk = tl.zeros([BT, BK], dtype=tl.float32) + b_dA = tl.zeros([BT, BT], dtype=tl.float32) + for i_v in range(tl.cdiv(V, BV)): + p_v = tl.make_block_ptr(v + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_h = tl.make_block_ptr(h + i_bh * NT*K*V + i_t * V * K, (V, K), (1, V), (i_v * BV, i_k * BK), (BV, BK), (0, 1)) + p_do = tl.make_block_ptr(do + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_dh = tl.make_block_ptr(dh + i_bh * NT*K*V + i_t * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + p_dv = tl.make_block_ptr(dv + (i_k*n_bh+i_bh) * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BV, BK] + b_h = tl.load(p_h, boundary_check=(0, 1)) + # [BT, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BK, BV] + b_dh = tl.load(p_dh, boundary_check=(0, 1)) + + # [BT, BV] + b_dv = tl.dot(b_k, b_dh, allow_tf32=False) + if i_k == 0: + b_dv += tl.dot(b_A.to(b_do.dtype), b_do, allow_tf32=False) + b_do = (b_do * scale).to(b_do.dtype) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + # [BT, BT] + b_dA += tl.dot(b_do, tl.trans(b_v), allow_tf32=False) + # [BT, BK] + b_dq += tl.dot(b_do, b_h, allow_tf32=False) + # [BT, BK] + b_dk += tl.dot(b_v, tl.trans(b_dh), allow_tf32=False) + p_z = tl.make_block_ptr(z + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_zp = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), (i_p * K + i_k * BK,), (BK,), (0,)) + # [BK,] + b_zp = tl.load(p_zp, boundary_check=(0,)) + # [BT, BK] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_z = exp(b_zp[None, :] - b_z) + # [BT, BK] + b_dq = b_dq * b_z + b_dk = b_dk * b_k + + p_dq = tl.make_block_ptr(dq + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_dk = tl.make_block_ptr(dk + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_dA = tl.make_block_ptr(dA + i_bh * T * BT, (T, BT,), (BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + + o_i = tl.arange(0, BT) + m_s = o_i[:, None] >= o_i[None, :] + # [BT, BT] + b_dA = tl.where(m_s, b_dA, 0.).to(b_k.dtype) + if i_k == 0: + tl.store(p_dA, b_dA.to(p_dA.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_intra_V( + q, + k, + z, + dA, + dq, + dk, + T, + K: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BK: tl.constexpr, + NC: tl.constexpr +): + i_k, i_c, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_t, i_i = i_c // NC, i_c % NC + + p_z = tl.make_block_ptr(z + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), ((i_t * BT + i_i * BC) * K + i_k * BK,), (BK,), (0,)) + # [BK,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + # [BC, BK] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_zq = exp(b_zn[None, :] - b_z) + b_dq = tl.zeros([BC, BK], dtype=tl.float32) + for i_j in range(0, i_i): + p_k = tl.make_block_ptr(k + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_j * BC, i_k * BK), (BC, BK), (1, 0)) + p_dA = tl.make_block_ptr(dA + i_bh * T * BT, (T, BT), (BT, 1), (i_t * BT + i_i * BC, i_j * BC), (BC, BC), (1, 0)) + # [BC, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_kz = exp(b_k - b_zn[None, :]).to(b_k.dtype) + # [BC, BC] + b_dA = tl.load(p_dA, boundary_check=(0, 1)) + # [BC, BK] + b_dq += tl.dot(b_dA, b_kz, allow_tf32=False) + b_dq *= b_zq + + o_i = tl.arange(0, BC) + o_dA = i_bh * T * BT + (i_t * BT + i_i * BC + tl.arange(0, BC)) * BT + i_i * BC + m_dA = (i_t * BT + i_i * BC + tl.arange(0, BC)) < T + for j in range(0, BC): + p_kj = tl.make_block_ptr(k + i_bh * T*K, (T * K,), (1,), ((i_t * BT + i_i*BC+j) * K + i_k * BK,), (BK,), (0,)) + # [BC,] + b_dA = tl.load(dA + o_dA + j, mask=m_dA, other=0) + # [BK,] + b_kj = tl.load(p_kj, boundary_check=(0,)).to(tl.float32) + # [BC, BK] + m_i = o_i[:, None] >= j + # [BC, BK] + b_dq += tl.where(m_i, b_dA[:, None] * exp(b_kj[None, :] - b_z), 0.) + p_dq = tl.make_block_ptr(dq + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + + tl.debug_barrier() + p_k = tl.make_block_ptr(k + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*K, (T*K,), (1,), ((i_t * BT + i_i * BC + BC - 1) * K + i_k * BK,), (BK,), (0,)) + # [BK,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + # [BC, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_kz = exp(b_k - b_zn[None, :]) + b_dk = tl.zeros([BC, BK], dtype=tl.float32) + for i_j in range(i_i + 1, NC): + p_q = tl.make_block_ptr(q + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_j * BC, i_k * BK), (BC, BK), (1, 0)) + p_z = tl.make_block_ptr(z + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_j * BC, i_k * BK), (BC, BK), (1, 0)) + p_dA = tl.make_block_ptr(dA + i_bh * T * BT, (T, BT), (BT, 1), (i_t * BT + i_j * BC, i_i * BC), (BC, BC), (1, 0)) + # [BC, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_qz = (b_q * exp(b_zn[None, :] - b_z)).to(b_q.dtype) + # [BC, BC] + b_dA = tl.load(p_dA, boundary_check=(0, 1)) + # [BC, BK] + b_dk += tl.dot(tl.trans(b_dA), b_qz, allow_tf32=False) + b_dk *= b_kz + + o_dA = i_bh * T * BT + (i_t * BT + i_i * BC) * BT + i_i * BC + tl.arange(0, BC) + for j in range(0, BC): + p_qj = tl.make_block_ptr(q + i_bh * T*K, (T * K,), (1,), ((i_t * BT + i_i * BC + j) * K + i_k * BK,), (BK,), (0,)) + p_zj = tl.make_block_ptr(z + i_bh * T*K, (T * K,), (1,), ((i_t * BT + i_i * BC + j) * K + i_k * BK,), (BK,), (0,)) + # [BC,] + b_dA = tl.load(dA + o_dA + j * BT, mask=(i_t * BT + i_i * BC + j < T), other=0) + # [BK,] + b_qj = tl.load(p_qj, boundary_check=(0,)).to(tl.float32) + b_zj = tl.load(p_zj, boundary_check=(0,)).to(tl.float32) + # [BC, BK] + m_i = o_i[:, None] <= j + b_dk += tl.where(m_i, b_dA[:, None] * b_qj[None, :] * exp(b_k - b_zj[None, :]), 0.) + p_dk = tl.make_block_ptr(dk + i_bh * T*K, (T, K), (K, 1), (i_t * BT + i_i * BC, i_k * BK), (BC, BK), (1, 0)) + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_intra_K( + v, + z, + do, + dA, + scale, + T, + V: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BV: tl.constexpr, + NC: tl.constexpr +): + i_v, i_c, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_t, i_i, i_j = i_c // (NC * NC), (i_c % (NC * NC)) // NC, (i_c % (NC * NC)) % NC + n_bh = tl.num_programs(2) + + if i_i > i_j: + p_v = tl.make_block_ptr(v + i_bh * T*V, (V, T), (1, V), (i_v * BV, i_t * BT + i_j * BC), (BV, BC), (0, 1)) + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), ((i_t * BT + i_i * BC) * V + i_v * BV,), (BV,), (0,)) + p_do = tl.make_block_ptr(do + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + p_dA = tl.make_block_ptr(dA+(i_bh+i_v*n_bh)*T*BT, (T, BT), (BT, 1), (i_t * BT + i_i * BC, i_j * BC), (BC, BC), (1, 0)) + # [BV,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + # [BC, BV] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_do = (b_do * exp(b_zn[None, :] - b_z) * scale).to(b_do.dtype) + # [BV, BC] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_v = exp(b_v - b_zn[:, None]).to(b_v.dtype) + # [BC, BC] + b_dA = tl.dot(b_do, b_v, allow_tf32=False) + tl.store(p_dA, b_dA.to(dA.dtype.element_ty), boundary_check=(0, 1)) + elif i_i == i_j: + p_v = tl.make_block_ptr(v + i_bh * T*V, (T * V,), (1,), ((i_t * BT + i_j * BC) * V + i_v * BV,), (BV,), (0,)) + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + p_do = tl.make_block_ptr(do + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + # [BC, BV] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_do = tl.load(p_do, boundary_check=(0, 1)) * scale + + o_i = tl.arange(0, BC) + o_A = (i_bh + i_v * n_bh) * T * BT + (i_t * BT + i_i * BC + tl.arange(0, BC)) * BT + i_j * BC + m_A = (i_t * BT + i_i * BC + tl.arange(0, BC)) < T + for j in range(0, BC): + # [BV,] + b_v = tl.load(p_v, boundary_check=(0,)).to(tl.float32) + # [BC,] + b_dA = tl.sum(b_do * exp(b_v[None, :] - b_z), 1) + b_dA = tl.where(o_i >= j, b_dA, 0) + tl.store(dA + o_A + j, b_dA.to(b_do.dtype), mask=m_A) + + p_v = tl.advance(p_v, (V,)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_K( + q, + k, + v, + z, + h, + A, + do, + dh, + dq, + dk, + dv, + dA, + scale, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr +): + i_k, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_p = tl.maximum(i_t * BT - 1, 0) + n_bh = tl.num_programs(2) + + o_i = tl.arange(0, BT) + m_s = o_i[:, None] >= o_i[None, :] + + p_q = tl.make_block_ptr(q + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_k = tl.make_block_ptr(k + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_A = tl.make_block_ptr(A + (i_k*n_bh+i_bh) * T * BT, (T, BT, ), (BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BT, BT] + b_A = tl.dot((b_q * scale).to(b_q.dtype), tl.trans(b_k), allow_tf32=False) + b_A = tl.where(m_s, b_A, 0.) + tl.store(p_A, b_A.to(p_A.dtype.element_ty), boundary_check=(0, 1)) + + b_dq = tl.zeros([BT, BK], dtype=tl.float32) + b_dk = tl.zeros([BT, BK], dtype=tl.float32) + for i_v in range(tl.cdiv(V, BV)): + p_v = tl.make_block_ptr(v + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_zp = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), (i_p * V + i_v * BV,), (BV,), (0,)) + p_zc = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), ((i_t * BT + BT - 1) * V + i_v * BV,), (BV,), (0,)) + p_h = tl.make_block_ptr(h + i_bh * NT*K*V + i_t * K*V, (V, K), (1, V), (i_v * BV, i_k * BK), (BV, BK), (0, 1)) + + p_do = tl.make_block_ptr(do + i_bh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_dh = tl.make_block_ptr(dh + i_bh * NT*K*V + i_t * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + p_dv = tl.make_block_ptr(dv + (i_k*n_bh+i_bh) * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + + # [BV,] + b_zp = tl.load(p_zp, boundary_check=(0,)) + b_zc = tl.load(p_zc, boundary_check=(0,)) + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_v = exp(b_v - b_zc[None, :]).to(b_v.dtype) + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_z = exp(b_zp[None, :] - b_z) + # [BV, BK] + b_h = tl.load(p_h, boundary_check=(0, 1)) + # [BT, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_do = (b_do * b_z * scale).to(b_do.dtype) + # [BK, BV] + b_dh = tl.load(p_dh, boundary_check=(0, 1)) + + # [BT, BK] + b_dq += tl.dot(b_do, b_h, allow_tf32=False) + b_dk += tl.dot(b_v, tl.trans(b_dh), allow_tf32=False) + # [BT, BV] + b_dv = b_v * tl.dot(b_k, b_dh, allow_tf32=False) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + p_dA = tl.make_block_ptr(dA + i_bh * T * BT, (T, BT, ), (BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + # [BT, BT] + b_dA = tl.load(p_dA, boundary_check=(0, 1)) + # [BT, BK] + b_dq += tl.dot(b_dA, b_k, allow_tf32=False) + b_dk += tl.dot(tl.trans(b_dA).to(b_k.dtype), b_q, allow_tf32=False) + + p_dq = tl.make_block_ptr(dq + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_dk = tl.make_block_ptr(dk + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_intra_KV( + v, + z, + A, + do, + dv, + T, + V: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BV: tl.constexpr, + NC: tl.constexpr +): + i_v, i_c, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_t, i_i = i_c // NC, i_c % NC + + p_v = tl.make_block_ptr(v + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*V, (T*V,), (1,), ((i_t * BT + i_i * BC + BC - 1) * V + i_v * BV,), (BV,), (0,)) + # [BV,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + # [BC, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_dv = tl.zeros([BC, BV], dtype=tl.float32) + for i_j in range(i_i + 1, NC): + p_z = tl.make_block_ptr(z + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_j * BC, i_v * BV), (BC, BV), (1, 0)) + p_A = tl.make_block_ptr(A + i_bh * T * BT, (BT, T), (1, BT), (i_i * BC, i_t * BT + i_j * BC), (BC, BC), (0, 1)) + p_do = tl.make_block_ptr(do + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_j * BC, i_v * BV), (BC, BV), (1, 0)) + # [BC, BV] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_do = (b_do * exp(b_zn[None, :] - b_z)).to(b_do.dtype) + # [BC, BC] + b_A = tl.load(p_A, boundary_check=(0, 1)) + b_dv += tl.dot(b_A, b_do, allow_tf32=False) + b_dv *= exp(b_v - b_zn[None, :]) + + o_i = tl.arange(0, BC) + for j in range(0, BC): + p_z = tl.make_block_ptr(z + i_bh * T*V, (T * V,), (1,), ((i_t * BT + i_i * BC + j) * V + i_v * BV,), (BV,), (0,)) + p_A = tl.make_block_ptr(A + i_bh * T * BT, (T * BT,), (1,), ((i_t * BT + i_i * BC + j) * BT + i_i * BC,), (BC,), (0,)) + p_do = tl.make_block_ptr(do + i_bh * T*V, (T * V,), (1,), ((i_t * BT + i_i * BC + j) * V + i_v * BV,), (BV,), (0,)) + # [BC,] + b_A = tl.load(p_A, boundary_check=(0,)) + # [BV,] + b_z = tl.load(p_z, boundary_check=(0,)) + b_do = tl.load(p_do, boundary_check=(0,)) + # [BC, BV] + m_i = o_i[:, None] <= j + b_dv += tl.where(m_i, exp(b_v - b_z[None, :]) * b_A[:, None] * b_do[None, :], 0.) + p_dv = tl.make_block_ptr(dv + i_bh * T*V, (T, V), (V, 1), (i_t * BT + i_i * BC, i_v * BV), (BC, BV), (1, 0)) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_rcum_inter( + s, + z, + ss, + doo, + T, + S: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + NT: tl.constexpr +): + i_m, i_bh = tl.program_id(0), tl.program_id(1) + + b_sp = tl.zeros([BS,], dtype=tl.float32) + b_zp = tl.full([BS,], float('inf'), dtype=tl.float32) + for i_t in range(NT - 1, -1, -1): + p_s = tl.make_block_ptr(s + i_bh * T*S, (T, S), (S, 1), (i_t * BT, i_m * BS), (BT, BS), (1, 0)) + p_z = tl.make_block_ptr(z + i_bh * T*S, (T, S), (S, 1), (i_t * BT, i_m * BS), (BT, BS), (1, 0)) + p_zc = tl.make_block_ptr(z + i_bh * T*S, (T*S,), (1,), ((i_t * BT) * S + i_m * BS,), (BS,), (0,)) + p_ss = tl.make_block_ptr(ss + i_bh * T*S, (T, S), (S, 1), (i_t * BT, i_m * BS), (BT, BS), (1, 0)) + p_doo = tl.make_block_ptr(doo + i_bh * T*S, (T, S), (S, 1), (i_t * BT, i_m * BS), (BT, BS), (1, 0)) + # [BS,] + b_zc = tl.load(p_zc, boundary_check=(0,)) + # [BT, BS] + b_s = tl.load(p_s, boundary_check=(0, 1)) + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_ss = tl.load(p_ss, boundary_check=(0, 1)) + + b_doo = exp(b_s - b_zp[None, :]) * b_sp[None, :] + tl.store(p_doo, b_doo.to(p_doo.dtype.element_ty), boundary_check=(0, 1)) + # [BS,] + b_sp = b_sp * exp(b_zc - b_zp) + tl.sum(b_ss * exp(b_zc[None, :] - b_z), 0) + b_zp = b_zc + + +@triton.jit(do_not_specialize=['T']) +def chunk_abc_bwd_kernel_rcum_intra( + s, + z, + ss, + doo, + T, + S: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BS: tl.constexpr, + NC: tl.constexpr +): + i_s, i_c, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_t, i_i = i_c // NC, i_c % NC + + o_i = tl.arange(0, BC) + m_o = tl.full([BC, BC], 1., dtype=tl.float32) + + p_s = tl.make_block_ptr(s + i_bh * T*S, (T, S), (S, 1), (i_t * BT + i_i * BC, i_s * BS), (BC, BS), (1, 0)) + p_zn = tl.make_block_ptr(z + i_bh * T*S, (T*S,), (1,), ((i_t * BT + i_i * BC + BC - 1) * S + i_s * BS,), (BS,), (0,)) + p_doo = tl.make_block_ptr(doo + i_bh * T*S, (T, S), (S, 1), (i_t * BT + i_i * BC, i_s * BS), (BC, BS), (1, 0)) + # [BC, BS] + b_s = tl.load(p_s, boundary_check=(0, 1)) + # [BS,] + b_zn = tl.load(p_zn, boundary_check=(0,)) + + b_doo = tl.zeros([BC, BS], dtype=tl.float32) + for i_j in range(i_i + 1, NC): + p_z = tl.make_block_ptr(z + i_bh * T*S, (T, S), (S, 1), (i_t * BT + i_j * BC, i_s * BS), (BC, BS), (1, 0)) + p_ss = tl.make_block_ptr(ss + i_bh * T*S, (T, S), (S, 1), (i_t * BT + i_j * BC, i_s * BS), (BC, BS), (1, 0)) + # [BC, BS] + b_z = tl.load(p_z, boundary_check=(0, 1)) + b_ss = tl.load(p_ss, boundary_check=(0, 1)) + # [BC, BS] + b_doo += b_ss * exp(b_zn[None, :] - b_z) + b_doo = exp(b_s - b_zn[None, :]) * tl.dot(m_o.to(b_s.dtype), b_doo.to(b_s.dtype), allow_tf32=False) + + for j in range(0, BC): + p_z = tl.make_block_ptr(z + i_bh * T*S, (T*S,), (1,), ((i_t * BT + i_i * BC + j) * S + i_s * BS,), (BS,), (0,)) + p_ss = tl.make_block_ptr(ss + i_bh * T*S, (T*S,), (1,), ((i_t * BT + i_i * BC + j) * S + i_s * BS,), (BS,), (0,)) + # [BS,] + b_z = tl.load(p_z, boundary_check=(0,)) + b_ss = tl.load(p_ss, boundary_check=(0,)) + # [BC, BS] + m_i = o_i[:, None] <= j + b_doo += tl.where(m_i, exp(b_s - b_z[None, :]) * b_ss[None, :], 0.) + b_doo += tl.load(p_doo, boundary_check=(0, 1)) + tl.store(p_doo, b_doo.to(p_doo.dtype.element_ty), boundary_check=(0, 1)) + + +class ChunkABCFunction(torch.autograd.Function): + + @staticmethod + @input_guard + def forward(ctx, q, k, v, s, initial_state, output_final_state): + B, H, T, K, V, M = *q.shape, v.shape[-1], s.shape[-1] + BT, BC = 64, 16 + BK = min(64, triton.next_power_of_2(K)) + BV = min(64, triton.next_power_of_2(V)) + BM = min(64, triton.next_power_of_2(M)) + NT, NC = triton.cdiv(T, BT), triton.cdiv(BT, BC) + NV, NM = triton.cdiv(V, BV), triton.cdiv(M, BM) + num_warps = 4 if BK == 64 else 2 + num_stages = 1 + + def fwd_pre(s, B, H, T, S): + # keep cummulative normalizer in fp32 + z = torch.empty_like(s, dtype=torch.float) + grid = (B * H,) + logcumsumexp_fwd_kernel[grid]( + s, z, + T=T, S=S + ) + return z + + def fwd_inner(q, k, v, z, B, H, T, K, V, BT, BK, BV, NT, normk=False, h0=None, ht=None): + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + h = q.new_empty(B, H, NT * K, V) + grid = (NV, NK, B * H) + chunk_abc_fwd_kernel_h[grid]( + k, v, z, h, h0, ht, + T=T, K=K, V=V, BT=BT, BK=BK, BV=BV, NT=NT, + NORMK=normk, + USE_INITIAL_STATE=h0 is not None, + STORE_FINAL_STATE=ht is not None, + num_warps=num_warps, + num_stages=num_stages + ) + return h + + final_state = None + if output_final_state: + final_state = (q.new_empty(B, H, K, M, dtype=torch.float), + q.new_empty(B, H, M, V, dtype=torch.float)) + + z = fwd_pre(s, B, H, T, M) + scale = K ** -0.5 + hk = fwd_inner( + q=q, k=k, v=s, z=z, + B=B, H=H, T=T, K=K, V=M, BT=BT, BK=BK, BV=BM, NT=NT, + normk=False, + h0=initial_state[0] if initial_state is not None else None, + ht=final_state[0] if final_state is not None else None + ) + ok1 = torch.empty_like(s) + Ak = q.new_empty(B, H, T, BT) + grid = (NM, NT, B * H) + chunk_abc_fwd_kernel_K[grid]( + q, k, z, hk, ok1, Ak, + scale=scale, + T=T, K=K, V=M, BT=BT, BK=BK, BV=BM, NT=NT, + num_warps=num_warps, + num_stages=num_stages + ) + ok0 = torch.empty_like(s) + grid = (NM, NT * NC, B * H) + chunk_abc_fwd_kernel_intra_K[grid]( + s, z, ok0, Ak, + T=T, V=M, BT=BT, BC=BC, BV=BM, NC=NC, + num_warps=2, + num_stages=num_stages + ) + ok = ok0.add_(ok1) + + scale = 1. + # p is kept in fp32 for safe softmax backward + p = softmax_fwd(ok, dtype=torch.float) + qv = p.to(q.dtype) + + scale = 1. + hv = fwd_inner( + q=qv, k=s, v=v, z=z, + B=B, H=H, T=T, K=M, V=V, BT=BT, BK=BM, BV=BV, NT=NT, + normk=True, + h0=initial_state[1] if initial_state is not None else None, + ht=final_state[1] if final_state is not None else None + ) + Av = q.new_zeros(NM, B, H, T, BT) + grid = (NM, NT * NC * NC, B * H) + chunk_abc_fwd_kernel_intra_V[grid]( + qv, s, z, Av, + scale=scale, + T=T, K=M, BT=BT, BC=BC, BK=BM, NC=NC, + num_warps=2, + num_stages=num_stages + ) + Av = Av.sum(0) + ov = torch.empty_like(v) + grid = (NV, NT, B * H) + chunk_abc_fwd_kernel_V[grid]( + qv, v, z, hv, ov, Av, + scale=scale, + T=T, + K=M, + V=V, + BT=BT, + BK=BM, + BV=BV, + NT=NT, + num_warps=num_warps, + num_stages=num_stages + ) + ctx.save_for_backward(q, k, v, s, z, ok, p, hk, hv, Av) + ctx.BT = BT + return ov, final_state + + @staticmethod + @input_guard + def backward(ctx, dov, dht=None): + q, k, v, s, z, ok, p, hk, hv, Av = ctx.saved_tensors + B, H, T, K, V, M = *q.shape, v.shape[-1], s.shape[-1] + BT, BC = ctx.BT, 16 + BK = min(64, triton.next_power_of_2(K)) + BV = min(64, triton.next_power_of_2(V)) + BM = min(64, triton.next_power_of_2(M)) + NT, NC = triton.cdiv(T, BT), triton.cdiv(BT, BC) + NK, NM = triton.cdiv(K, BK), triton.cdiv(M, BM) + num_warps = 4 if BK == 64 else 2 + num_stages = 1 + + def bwd_inner(q, z, do, B, H, T, K, V, BT, BK, BV, NT, scale, normk=False): + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + dh = q.new_empty(B, H, NT * K, V) + grid = (NK, NV, B * H) + chunk_abc_bwd_kernel_dh[grid]( + q, z, do, dh, + scale=scale, + T=T, K=K, V=V, BT=BT, BK=BK, BV=BV, NT=NT, + NORMK=normk, + num_warps=num_warps, + num_stages=num_stages + ) + return dh + + def bwd_post(s, z, ss, B, H, T, S, BT, BC, BS, NT, NC, NS): + doo = torch.empty_like(s) + grid = (NS, B * H) + chunk_abc_bwd_kernel_rcum_inter[grid]( + s, z, ss, doo, + T=T, S=S, BT=BT, BS=BS, NT=NT, + num_warps=num_warps, + num_stages=num_stages + ) + grid = (NS, NT * NC, B * H) + chunk_abc_bwd_kernel_rcum_intra[grid]( + s, z, ss, doo, + T=T, S=S, BT=BT, BC=BC, BS=BS, NC=NC, + num_warps=num_warps, + num_stages=num_stages + ) + return doo + + scale = 1. + qv = p.to(q.dtype) + dhv = bwd_inner( + qv, z, dov, + B=B, H=H, T=T, K=M, V=V, BT=BT, BK=BM, BV=BV, NT=NT, + scale=scale, + normk=True + ) + dp1 = torch.empty_like(p) + dsv1 = torch.empty_like(s, dtype=torch.float) + dv = v.new_empty(NM, *v.shape) + dAv = q.new_zeros(B, H, T, BT) + grid = (NM, NT, B * H) + chunk_abc_bwd_kernel_V[grid]( + s, v, z, hv, Av, dov, dhv, dp1, dsv1, dv, dAv, + scale=scale, + T=T, K=M, V=V, BT=BT, BK=BM, BV=BV, NT=NT, + num_warps=num_warps, + num_stages=num_stages + ) + dv = dv.sum(0) + dp0 = torch.empty_like(p) + dsv0 = s.new_zeros(s.shape, dtype=torch.float) + grid = (NM, NT * NC, B * H) + chunk_abc_bwd_kernel_intra_V[grid]( + qv, s, z, dAv, dp0, dsv0, + T=T, K=M, BT=BT, BC=BC, BK=BM, NC=NC, + num_warps=2, + num_stages=num_stages + ) + dp = dp1.add_(dp0) + dsv = dsv1.add_(dsv0) + + # softmax gradient, equivalent to: + # dok = p * (dp - (p * dp).sum(-1, True)) + dok = softmax_bwd(p, dp, dtype=ok.dtype) + + scale = K ** -0.5 + dhk = bwd_inner( + q, z, dok, + B=B, H=H, T=T, K=K, V=M, BT=BT, BK=BK, BV=BM, NT=NT, + scale=scale, + normk=False + ) + dAk = q.new_zeros(NM, B, H, T, BT) + grid = (NM, NT * NC * NC, B * H) + chunk_abc_bwd_kernel_intra_K[grid]( + s, z, dok, dAk, + scale=scale, + T=T, V=M, BT=BT, BC=BC, BV=BM, NC=NC, + num_warps=2, + num_stages=num_stages + ) + dAk = dAk.sum(0) + + Ak = q.new_zeros(NK, B, H, T, BT) + dq = torch.empty_like(q) + dk = torch.empty_like(k) + dsk1 = s.new_empty(NK, *s.shape, dtype=torch.float) + grid = (NK, NT, B * H) + chunk_abc_bwd_kernel_K[grid]( + q, k, s, z, hk, Ak, dok, dhk, dq, dk, dsk1, dAk, + scale=scale, + T=T, K=K, V=M, BT=BT, BK=BK, BV=BM, NT=NT, + num_warps=num_warps, + num_stages=num_stages + ) + Ak = Ak.sum(0) + dsk1 = dsk1.sum(0) + dsk0 = torch.empty_like(s, dtype=torch.float) + grid = (NM, NT * NC, B * H) + chunk_abc_bwd_kernel_intra_KV[grid]( + s, z, Ak, dok, dsk0, + T=T, V=M, BT=BT, BC=BC, BV=BM, NC=NC, + num_warps=2, + num_stages=num_stages + ) + ds = dsv.add_(dsk1.add_(dsk0)) + ds -= bwd_post(s, z, ok * dok + p * dp, B, H, T, M, BT, BC, BM, NT, NC, NM) + ds = ds.to(s.dtype) + return dq, dk, dv, ds, None, None + + +@torch.compiler.disable +def chunk_abc( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + s: torch.Tensor, + initial_state: Optional[Tuple[torch.Tensor]] = None, + output_final_state: bool = False, + head_first: bool = True +) -> Tuple[torch.Tensor, torch.Tensor]: + r""" + Args: + q (torch.Tensor): + queries of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]` + k (torch.Tensor): + keys of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]` + v (torch.Tensor): + values of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]` + s (torch.Tensor): + slot representations of shape `[B, H, T, M]` if `head_first=True` else `[B, T, H, M]` + initial_state (Optional[Tuple[torch.Tensor, torch.Tensor]]): + Initial states of shape `[B, H, K, M]` and `[B, H, M, V]`. Default: `None`. + output_final_state (Optional[bool]): + Whether to output the final state of shape `[B, H, K, M]` and `[B, H, M, V]`. Default: `False`. + head_first (Optional[bool]): + Whether the inputs are in the head-first format. + Default: `True`. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + final_state (torch.Tensor): + Final state of shape `[B, H, K, M]` and `[B, H, M, V]` if `output_final_state=True` else `None`. + """ + if not head_first: + q, k, v, s = map(lambda x: x.transpose(1, 2), (q, k, v, s)) + o, final_state = ChunkABCFunction.apply(q, k, v, s, initial_state, output_final_state) + if not head_first: + o = o.transpose(1, 2) + return o, final_state diff --git a/fla/ops/based/__init__.py b/fla/ops/based/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f20b31ba0ea4c7d345761fbd6ab5f6ced5136236 --- /dev/null +++ b/fla/ops/based/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .fused_chunk import fused_chunk_based +from .parallel import parallel_based + +__all__ = [ + 'fused_chunk_based', + 'parallel_based' +] diff --git a/fla/ops/common/__init__.py b/fla/ops/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..40a96afc6ff09d58a702b76e3f7dd412fe975e26 --- /dev/null +++ b/fla/ops/common/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/fla/ops/common/chunk_delta_h.py b/fla/ops/common/chunk_delta_h.py new file mode 100644 index 0000000000000000000000000000000000000000..f5ed788cfa86c42bb9e04b90ae9c659321494bba --- /dev/null +++ b/fla/ops/common/chunk_delta_h.py @@ -0,0 +1,399 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + +from fla.ops.common.utils import prepare_chunk_offsets +from fla.ops.utils.op import exp +from fla.utils import check_shared_mem, is_nvidia_hopper, use_cuda_graph + +NUM_WARPS = [2, 4] if is_nvidia_hopper else [2, 4, 8, 16] + + +@triton.heuristics({ + 'USE_G': lambda args: args['g'] is not None, + 'USE_INITIAL_STATE': lambda args: args['h0'] is not None, + 'STORE_FINAL_STATE': lambda args: args['ht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None, +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in NUM_WARPS + for num_stages in [2, 3, 4] + ], + key=['H', 'K', 'V', 'BT', 'BK', 'BV', 'USE_G'], + use_cuda_graph=use_cuda_graph, +) +@triton.jit(do_not_specialize=['T']) +def chunk_gated_delta_rule_fwd_kernel_h( + k, + v, + d, + v_new, + g, + h, + h0, + ht, + offsets, + chunk_offsets, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NT: tl.constexpr, + USE_G: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + STORE_FINAL_STATE: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr, +): + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_n, i_h = i_nh // H, i_nh % H + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + boh = tl.load(chunk_offsets + i_n).to(tl.int32) + else: + bos, eos = i_n * T, i_n * T + T + NT = tl.cdiv(T, BT) + boh = i_n * NT + + # [BK, BV] + b_h = tl.zeros([BK, BV], dtype=tl.float32) + if USE_INITIAL_STATE: + p_h0 = tl.make_block_ptr(h0 + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + b_h = tl.load(p_h0, boundary_check=(0, 1)).to(tl.float32) + + for i_t in range(NT): + if HEAD_FIRST: + p_h = tl.make_block_ptr(h + (i_nh * NT + i_t) * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + else: + p_h = tl.make_block_ptr(h + ((boh + i_t) * H + i_h) * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_h, b_h.to(p_h.dtype.element_ty), boundary_check=(0, 1)) + b_hc = tl.zeros([BK, BV], dtype=tl.float32) + if USE_G: + last_idx = min((i_t + 1) * BT, T) - 1 + if HEAD_FIRST: + b_g_last = tl.load(g + i_nh * T + last_idx) + else: + b_g_last = tl.load(g + bos * H + last_idx * H + i_h) + else: + b_g_last = None + last_idx = None + # since we need to make all DK in the SRAM. we face serve SRAM memory burden. By subchunking we allievate such burden + for i_c in range(tl.cdiv(min(BT, T - i_t * BT), BC)): + if HEAD_FIRST: + p_k = tl.make_block_ptr(k + i_nh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT + i_c * BC), (BK, BC), (0, 1)) + p_d = tl.make_block_ptr(d + i_nh * T*K, (T, K), (K, 1), (i_t * BT + i_c * BC, i_k * BK), (BC, BK), (1, 0)) + p_v = tl.make_block_ptr(v + i_nh * T*V, (T, V), (V, 1), (i_t * BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_v_new = tl.make_block_ptr(v_new+i_nh*T*V, (T, V), (V, 1), (i_t * BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_g = tl.make_block_ptr(g + i_nh * T, (T,), (1,), (i_t * BT + i_c * BC,), (BC,), (0,)) if USE_G else None + else: + p_k = tl.make_block_ptr(k+(bos*H+i_h)*K, (K, T), (1, H*K), (i_k * BK, i_t * BT + i_c * BC), (BK, BC), (0, 1)) + p_d = tl.make_block_ptr(d+(bos*H+i_h)*K, (T, K), (H*K, 1), (i_t * BT + i_c * BC, i_k * BK), (BC, BK), (1, 0)) + p_v = tl.make_block_ptr(v+(bos*H+i_h)*V, (T, V), (H*V, 1), (i_t * BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_v_new = tl.make_block_ptr(v_new+(bos*H+i_h)*V, (T, V), (H*V, 1), (i_t*BT+i_c*BC, i_v * BV), (BC, BV), (1, 0)) + p_g = tl.make_block_ptr(g+bos*H+i_h, (T,), (H,), (i_t*BT+i_c*BC, ), (BC,), (0,)) if USE_G else None + b_g = tl.load(p_g, boundary_check=(0, )) if USE_G else None + # [BK, BC] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_k = (b_k * exp(b_g_last - b_g)[None, :]).to(b_k.dtype) if USE_G else b_k + # [BC, BK] + b_d = tl.load(p_d, boundary_check=(0, 1)) + b_d = (b_d * exp(b_g)[:, None]).to(b_d.dtype) if USE_G else b_d + # [BC, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_v2 = b_v - tl.dot(b_d, b_h.to(b_d.dtype)) + # [BK, BV] + tl.store(p_v_new, b_v2.to(p_v_new.dtype.element_ty), boundary_check=(0, 1)) + b_hc += tl.dot(b_k, b_v2.to(b_k.dtype), allow_tf32=False) + b_h *= exp(b_g_last) if USE_G else 1 + b_h += b_hc + + if STORE_FINAL_STATE: + p_ht = tl.make_block_ptr(ht + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.heuristics({ + 'USE_G': lambda args: args['g'] is not None, + 'USE_INITIAL_STATE': lambda args: args['dh0'] is not None, + 'USE_FINAL_STATE_GRADIENT': lambda args: args['dht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None, +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in NUM_WARPS + for num_stages in [2, 3, 4] + ], + key=['BT', 'BK', 'BV', 'USE_G'], + use_cuda_graph=use_cuda_graph, +) +@triton.jit(do_not_specialize=['T']) +def chunk_gated_delta_rule_bwd_kernel_dhu( + q, + k, + d, + g, + dht, + dh0, + do, + dh, + dv, + dv2, + offsets, + chunk_offsets, + scale, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BC: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + USE_FINAL_STATE_GRADIENT: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_n, i_h = i_nh // H, i_nh % H + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + boh = tl.load(chunk_offsets + i_n).to(tl.int32) + else: + bos, eos = i_n * T, i_n * T + T + NT = tl.cdiv(T, BT) + boh = i_n * NT + + # [BK, BV] + b_dh = tl.zeros([BK, BV], dtype=tl.float32) + if USE_FINAL_STATE_GRADIENT: + p_dht = tl.make_block_ptr(dht + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + b_dh += tl.load(p_dht, boundary_check=(0, 1)) + + for i_t in range(NT - 1, -1, -1): + if HEAD_FIRST: + p_dh = tl.make_block_ptr(dh + (i_nh * NT + i_t) * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + else: + p_dh = tl.make_block_ptr(dh + ((boh+i_t) * H + i_h) * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_dh, b_dh.to(p_dh.dtype.element_ty), boundary_check=(0, 1)) + b_dh_tmp = tl.zeros([BK, BV], dtype=tl.float32) + if USE_G: + last_idx = min((i_t + 1) * BT, T) - 1 + if HEAD_FIRST: + bg_last = tl.load(g + i_nh * T + last_idx) + else: + bg_last = tl.load(g + (bos + last_idx) * H + i_h) + else: + bg_last = None + last_idx = None + for i_c in range(tl.cdiv(BT, BC) - 1, -1, -1): + if HEAD_FIRST: + p_q = tl.make_block_ptr(q + i_nh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT + i_c * BC), (BK, BC), (0, 1)) + p_k = tl.make_block_ptr(k + i_nh * T*K, (T, K), (K, 1), (i_t * BT + i_c * BC, i_k * BK), (BC, BK), (1, 0)) + p_d = tl.make_block_ptr(d + i_nh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT + i_c * BC), (BK, BC), (0, 1)) + p_dv = tl.make_block_ptr(dv + i_nh * T*V, (T, V), (V, 1), (i_t * BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_do = tl.make_block_ptr(do + i_nh * T*V, (T, V), (V, 1), (i_t * BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_g = tl.make_block_ptr(g + i_nh * T, (T,), (1,), (i_t * BT + i_c * BC,), (BC,), (0,)) if USE_G else None + p_dv2 = tl.make_block_ptr(dv2 + i_nh * T*V, (T, V), (V, 1), (i_t * BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + else: + p_q = tl.make_block_ptr(q+(bos*H+i_h)*K, (K, T), (1, H*K), (i_k * BK, i_t * BT + i_c * BC), (BK, BC), (0, 1)) + p_k = tl.make_block_ptr(k+(bos*H+i_h)*K, (T, K), (H*K, 1), (i_t * BT + i_c * BC, i_k * BK), (BC, BK), (1, 0)) + p_d = tl.make_block_ptr(d+(bos*H+i_h)*K, (K, T), (1, H*K), (i_k * BK, i_t * BT + i_c * BC), (BK, BC), (0, 1)) + p_dv = tl.make_block_ptr(dv+(bos*H+i_h)*V, (T, V), (H*V, 1), (i_t*BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_do = tl.make_block_ptr(do+(bos*H+i_h)*V, (T, V), (H*V, 1), (i_t*BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + p_g = tl.make_block_ptr(g+bos*H+i_h, (T,), (H,), (i_t*BT + i_c * BC,), (BC,), (0,)) if USE_G else None + p_dv2 = tl.make_block_ptr(dv2+(bos*H+i_h)*V, (T, V), (H*V, 1), (i_t*BT + i_c * BC, i_v * BV), (BC, BV), (1, 0)) + b_g = tl.load(p_g, boundary_check=(0,)) if USE_G else None + # [BK, BT] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale * exp(b_g)[None, :]).to(b_q.dtype) if USE_G else (b_q * scale).to(b_q.dtype) + # [BT, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_d = tl.load(p_d, boundary_check=(0, 1)) + b_k = (b_k * exp(bg_last - b_g)[:, None]).to(b_k.dtype) if USE_G else b_k + b_d = (b_d * exp(b_g)[None, :]).to(b_d.dtype) if USE_G else b_d + # [BT, V] + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_dv = tl.load(p_dv, boundary_check=(0, 1)) + b_dv2 = b_dv + tl.dot(b_k, b_dh.to(b_k.dtype), allow_tf32=False) + tl.store(p_dv2, b_dv2.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + # [BK, BV] + b_dh_tmp += tl.dot(b_q, b_do.to(b_q.dtype), allow_tf32=False) + b_dh_tmp -= tl.dot(b_d, b_dv2.to(b_q.dtype), allow_tf32=False) + b_dh *= exp(bg_last) if USE_G else 1 + b_dh += b_dh_tmp + + if USE_INITIAL_STATE: + p_dh0 = tl.make_block_ptr(dh0 + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_dh0, b_dh.to(p_dh0.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_gated_delta_rule_fwd_h( + k: torch.Tensor, + w: torch.Tensor, + u: torch.Tensor, + g: Optional[torch.Tensor] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +) -> Tuple[torch.Tensor, torch.Tensor]: + if head_first: + B, H, T, K, V = *k.shape, u.shape[-1] + else: + B, T, H, K, V = *k.shape, u.shape[-1] + BT = min(chunk_size, max(triton.next_power_of_2(T), 16)) + # N: the actual number of sequences in the batch with either equal or variable lengths + if offsets is None: + N, NT, chunk_offsets = B, triton.cdiv(T, BT), None + else: + N, NT, chunk_offsets = len(offsets) - 1, len(indices), prepare_chunk_offsets(offsets, BT) + BK = triton.next_power_of_2(K) + assert BK <= 256, "current kernel does not support head dimension larger than 256." + # H100 can have larger block size + if check_shared_mem('hopper', k.device.index): + BV = 64 + BC = 64 if K <= 128 else 32 + # A100 + elif check_shared_mem('ampere', k.device.index): + BV = 32 + BC = 64 + else: + BV = 32 + BC = 32 if K <= 128 else 16 + BC = min(BT, BC) + NK = triton.cdiv(K, BK) + NV = triton.cdiv(V, BV) + assert NK == 1, 'NK > 1 is not supported because it involves time-consuming synchronization' + + if head_first: + h = k.new_empty(B, H, NT, K, V) + else: + h = k.new_empty(B, NT, H, K, V) + final_state = k.new_empty(N, H, K, V, dtype=torch.float32) if output_final_state else None + + v_new = torch.empty_like(u) + grid = (NK, NV, N * H) + + chunk_gated_delta_rule_fwd_kernel_h[grid]( + k=k, + v=u, + d=w, + v_new=v_new, + g=g, + h=h, + h0=initial_state, + ht=final_state, + offsets=offsets, + chunk_offsets=chunk_offsets, + T=T, + H=H, + K=K, + V=V, + BT=BT, + BC=BC, + BK=BK, + BV=BV, + NT=NT, + HEAD_FIRST=head_first + ) + return h, v_new, final_state + + +def chunk_gated_delta_rule_bwd_dhu( + q: torch.Tensor, + k: torch.Tensor, + w: torch.Tensor, + g: torch.Tensor, + h0: torch.Tensor, + dht: Optional[torch.Tensor], + do: torch.Tensor, + dv: torch.Tensor, + scale: float, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + if head_first: + B, H, T, K, V = *q.shape, do.shape[-1] + else: + B, T, H, K, V = *q.shape, do.shape[-1] + BT = min(chunk_size, max(triton.next_power_of_2(T), 16)) + # N: the actual number of sequences in the batch with either equal or variable lengths + if offsets is None: + N, NT, chunk_offsets = B, triton.cdiv(T, BT), None + else: + N, NT, chunk_offsets = len(offsets) - 1, len(indices), prepare_chunk_offsets(offsets, BT) + + BK = triton.next_power_of_2(K) + assert BK <= 256, "current kernel does not support head dimension being larger than 256." + + # H100 + if check_shared_mem('hopper', q.device.index): + BV = 64 + BC = 64 if K <= 128 else 32 + # A100 + elif check_shared_mem('ampere', q.device.index): + BV = 32 + BC = 64 if K <= 128 else 32 + else: + BV = 32 if K <= 128 else 16 + BC = 16 + + BC = min(BT, BC) + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + assert NK == 1, 'NK > 1 is not supported because it involves time-consuming synchronization' + + if head_first: + dh = q.new_empty(B, H, NT, K, V) + else: + dh = q.new_empty(B, NT, H, K, V) + dh0 = torch.empty_like(h0, dtype=torch.float32) if h0 is not None else None + dv2 = torch.empty_like(dv) + + grid = (NK, NV, N * H) + chunk_gated_delta_rule_bwd_kernel_dhu[grid]( + q=q, + k=k, + d=w, + g=g, + dht=dht, + dh0=dh0, + do=do, + dh=dh, + dv=dv, + dv2=dv2, + offsets=offsets, + chunk_offsets=chunk_offsets, + scale=scale, + T=T, + H=H, + K=K, + V=V, + BT=BT, + BC=BC, + BK=BK, + BV=BV, + HEAD_FIRST=head_first + ) + return dh, dh0, dv2 diff --git a/fla/ops/common/chunk_h.py b/fla/ops/common/chunk_h.py new file mode 100644 index 0000000000000000000000000000000000000000..0aa5a7a93b9741968fa03ab630eb8aba062ccc5f --- /dev/null +++ b/fla/ops/common/chunk_h.py @@ -0,0 +1,422 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + +from fla.ops.common.utils import prepare_chunk_offsets +from fla.ops.utils.op import exp +from fla.utils import check_shared_mem + +BKV_LIST = [32, 64] if check_shared_mem() else [16, 32] + + +@triton.heuristics({ + 'USE_INITIAL_STATE': lambda args: args['h0'] is not None, + 'STORE_FINAL_STATE': lambda args: args['ht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({'BK': BK, 'BV': BV}, num_warps=num_warps, num_stages=num_stages) + for BK in BKV_LIST + for BV in BKV_LIST + for num_warps in [1, 2, 4, 8] + for num_stages in [2, 3, 4] + ], + key=['BT', 'USE_G', 'USE_GK', 'USE_GV'] +) +@triton.jit(do_not_specialize=['T']) +def chunk_fwd_kernel_h( + k, + v, + h, + g, + gk, + gv, + h0, + ht, + offsets, + split_offsets, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_GK: tl.constexpr, + USE_GV: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + STORE_FINAL_STATE: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_n, i_h = i_nh // H, i_nh % H + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + NS = tl.cdiv(T, BS) + boh = tl.load(split_offsets + i_n).to(tl.int32) + else: + bos, eos = i_n * T, i_n * T + T + NT = tl.cdiv(T, BT) + NS = tl.cdiv(T, BS) + boh = i_n * NS + + # [BK, BV] + b_h = tl.zeros([BK, BV], dtype=tl.float32) + if USE_INITIAL_STATE: + p_h0 = tl.make_block_ptr(h0 + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + b_h = tl.load(p_h0, boundary_check=(0, 1)).to(tl.float32) + + for i_t in range(NT): + i_s = i_t // (BS // BT) + if HEAD_FIRST: + p_k = tl.make_block_ptr(k + i_nh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_v = tl.make_block_ptr(v + i_nh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + + o_h = (i_nh * NS + i_s).to(tl.int64) * K*V + p_h = tl.make_block_ptr(h + o_h, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + else: + p_k = tl.make_block_ptr(k + (bos*H + i_h) * K, (K, T), (1, H*K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_v = tl.make_block_ptr(v + (bos*H + i_h) * V, (T, V), (H*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + + o_h = ((boh + i_s) * H + i_h).to(tl.int64) * K*V + p_h = tl.make_block_ptr(h + o_h, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + + if i_t % (BS // BT) == 0: + tl.store(p_h, b_h.to(p_h.dtype.element_ty), boundary_check=(0, 1)) + # [BK, BT] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + last_idx = min((i_t + 1) * BT, T) - 1 + + # scalar decay + if USE_G: + if HEAD_FIRST: + b_g_last = tl.load(g + i_nh * T + last_idx) + p_g = g + i_nh * T + i_t * BT + tl.arange(0, BT) + p_g = tl.max_contiguous(tl.multiple_of(p_g, BT), BT) + else: + b_g_last = tl.load(g + bos * H + last_idx * H + i_h) + p_g = g + bos*H + (i_t * BT + tl.arange(0, BT)) * H + i_h + b_h *= exp(b_g_last) + b_g = tl.load(p_g, mask=(i_t * BT + tl.arange(0, BT) < T), other=0.) + b_v = (b_v * exp(b_g_last - b_g)[:, None]).to(b_v.dtype) + + # vector decay, h = Diag(gk) @ h + if USE_GK: + if HEAD_FIRST: + p_gk = tl.make_block_ptr(gk + i_nh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_gk_last = gk + i_nh * T*K + last_idx * K + i_k * BK + tl.arange(0, BK) + p_gk_last = tl.max_contiguous(tl.multiple_of(p_gk_last, BK), BK) + else: + p_gk = tl.make_block_ptr(gk + (bos*H + i_h) * K, (K, T), (1, H*K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_gk_last = gk + (bos + last_idx) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + + b_gk_last = tl.load(p_gk_last, mask=(i_k * BK + tl.arange(0, BK) < K), other=0.) + b_h *= exp(b_gk_last)[:, None] + + b_gk = tl.load(p_gk, boundary_check=(0, 1)) + b_k = (b_k * exp(b_gk_last[:, None] - b_gk)).to(b_k.dtype) + + # vector decay, h = h @ Diag(gv) + if USE_GV: + if HEAD_FIRST: + p_gv = tl.make_block_ptr(gv + i_nh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_gv_last = gv + i_nh * T*V + last_idx * V + i_v * BV + tl.arange(0, BV) + p_gv_last = tl.max_contiguous(tl.multiple_of(p_gv_last, BV), BV) + else: + p_gv = tl.make_block_ptr(gv + (bos*H + i_h) * V, (T, V), (H*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_gv_last = gv + (bos + last_idx) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + + b_gv_last = tl.load(p_gv_last, mask=(i_v * BV + tl.arange(0, BV) < V), other=0.) + b_h *= exp(b_gv_last)[None, :] + + b_gv = tl.load(p_gv, boundary_check=(0, 1)) + b_v = (b_v * exp(b_gv_last[None, :] - b_gv)).to(b_v.dtype) + + b_h += tl.dot(b_k, b_v) + + if STORE_FINAL_STATE: + p_ht = tl.make_block_ptr(ht + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.heuristics({ + 'STORE_INITIAL_STATE_GRADIENT': lambda args: args['dh0'] is not None, + 'USE_FINAL_STATE_GRADIENT': lambda args: args['dht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({'BK': BK, 'BV': BV}, num_warps=num_warps, num_stages=num_stages) + for BK in BKV_LIST + for BV in BKV_LIST + for num_warps in [1, 2, 4, 8] + for num_stages in [2, 3, 4] + ], + key=['BT', 'USE_G', 'USE_GK', 'USE_GV'] +) +@triton.jit(do_not_specialize=['T']) +def chunk_bwd_kernel_dh( + q, + g, + gk, + gv, + do, + dh, + dht, + dh0, + offsets, + split_offsets, + scale, + T, + HQ: tl.constexpr, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NG: tl.constexpr, + USE_G: tl.constexpr, + USE_GK: tl.constexpr, + USE_GV: tl.constexpr, + STORE_INITIAL_STATE_GRADIENT: tl.constexpr, + USE_FINAL_STATE_GRADIENT: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_bg = i_nh // NG + i_n, i_hq = i_nh // HQ, i_nh % HQ + i_h = i_hq // NG + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + NS = tl.cdiv(T, BS) + boh = tl.load(split_offsets + i_n).to(tl.int32) + else: + bos, eos = i_n * T, i_n * T + T + NT = tl.cdiv(T, BT) + NS = tl.cdiv(T, BS) + boh = i_n * NS + + # [BK, BV] + b_dh = tl.zeros([BK, BV], dtype=tl.float32) + if USE_FINAL_STATE_GRADIENT: + p_dht = tl.make_block_ptr(dht + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + b_dh += tl.load(p_dht, boundary_check=(0, 1)).to(tl.float32) + + for i_t in range(NT - 1, -1, -1): + i_s = i_t // (BS // BT) + if HEAD_FIRST: + o_dh = (i_nh * NS + i_s).to(tl.int64) * K*V + p_dh = tl.make_block_ptr(dh + o_dh, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + else: + o_dh = ((boh + i_s) * H + i_h).to(tl.int64) * K*V + p_dh = tl.make_block_ptr(dh + o_dh, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + + if i_t % (BS // BT) == 0: + tl.store(p_dh, b_dh.to(p_dh.dtype.element_ty), boundary_check=(0, 1)) + last_idx = min(i_t * BT + BT, T) - 1 + # [BK, BT] + if HEAD_FIRST: + p_q = tl.make_block_ptr(q + i_nh * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_do = tl.make_block_ptr(do + i_nh * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + else: + p_q = tl.make_block_ptr(q + (bos*HQ + i_hq) * K, (K, T), (1, HQ*K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_do = tl.make_block_ptr(do + (bos*HQ + i_hq) * V, (T, V), (HQ*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BT, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + + if USE_G: + if HEAD_FIRST: + p_g = g + i_bg * T + i_t * BT + tl.arange(0, BT) + p_g = tl.max_contiguous(tl.multiple_of(p_g, BT), BT) + b_g_last = tl.load(g + i_bg * T + last_idx) + else: + p_g = g + (bos + i_t * BT + tl.arange(0, BT)) * H + i_h + b_g_last = tl.load(g + (bos + last_idx) * H + i_h) + b_g = tl.load(p_g, mask=(i_t * BT + tl.arange(0, BT) < T), other=0.) + b_q = (b_q * exp(b_g)[None, :]).to(b_q.dtype) + + b_dh *= exp(b_g_last) + + if USE_GK: + if HEAD_FIRST: + p_gk = tl.make_block_ptr(gk + i_bg * T*K, (K, T), (1, K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_gk_last = gk + (i_bg * T + last_idx) * K + i_k * BK + tl.arange(0, BK) + p_gk_last = tl.max_contiguous(tl.multiple_of(p_gk_last, BK), BK) + else: + p_gk = tl.make_block_ptr(gk + (bos*H + i_h) * K, (K, T), (1, H*K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_gk_last = gk + (bos + last_idx) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + + b_gk = tl.load(p_gk, boundary_check=(0, 1)) + b_q = (b_q * exp(b_gk)).to(b_q.dtype) + b_gk_last = tl.load(p_gk_last, mask=(i_k * BK + tl.arange(0, BK) < K), other=0.) + b_dh *= exp(b_gk_last)[:, None] + + if USE_GV: + if HEAD_FIRST: + p_gv = tl.make_block_ptr(gv + i_bg * T*V, (T, V), (V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_gv_last = gv + (i_bg * T + last_idx) * V + i_v * BV + tl.arange(0, BV) + p_gv_last = tl.max_contiguous(tl.multiple_of(p_gv_last, BV), BV) + else: + p_gv = tl.make_block_ptr(gv + (bos*H + i_h) * V, (T, V), (H*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_gv_last = gv + (bos + last_idx) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + + b_gv = tl.load(p_gv, boundary_check=(0, 1)) + b_do = (b_do * exp(b_gv)).to(b_do.dtype) + + b_gv_last = tl.load(p_gv_last, mask=(i_v * BV + tl.arange(0, BV) < V), other=0.) + b_dh *= exp(b_gv_last)[None, :] + + b_dh += tl.dot(b_q, b_do) + + if STORE_INITIAL_STATE_GRADIENT: + p_dh0 = tl.make_block_ptr(dh0 + i_nh * K*V, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + tl.store(p_dh0, b_dh.to(p_dh0.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_fwd_h( + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + gk: torch.Tensor, + gv: torch.Tensor, + h0: torch.Tensor, + output_final_state: bool, + offsets: Optional[torch.Tensor] = None, + head_first: bool = True, + chunk_size: int = 64, + split_size: Optional[int] = None, + states_in_fp32: bool = False +) -> Tuple[torch.Tensor, torch.Tensor]: + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + BT = min(chunk_size, max(16, triton.next_power_of_2(T))) + BS = BT if split_size is None else min(split_size, max(16, triton.next_power_of_2(T))) + assert BS % BT == 0, f"The `split_size` (got {BS}) must be a multiple of `chunk_size` {BT}" + # N: the actual number of sequences in the batch with either equal or variable lengths + if offsets is None: + split_offsets, N, NS = None, B, triton.cdiv(T, BS) + else: + split_offsets = prepare_chunk_offsets(offsets, BS) + N, NS = len(offsets) - 1, split_offsets[-1] + + if head_first: + h = k.new_empty(B, H, NS, K, V, dtype=k.dtype if not states_in_fp32 else torch.float) + else: + h = k.new_empty(B, NS, H, K, V, dtype=k.dtype if not states_in_fp32 else torch.float) + ht = k.new_empty(N, H, K, V, dtype=torch.float) if output_final_state else None + def grid(meta): return (triton.cdiv(K, meta['BK']), triton.cdiv(V, meta['BV']), N * H) + chunk_fwd_kernel_h[grid]( + k=k, + v=v, + h=h, + g=g, + gk=gk, + gv=gv, + h0=h0, + ht=ht, + offsets=offsets, + split_offsets=split_offsets, + T=T, + H=H, + K=K, + V=V, + BT=BT, + BS=BS, + USE_G=g is not None, + USE_GK=gk is not None, + USE_GV=gv is not None, + HEAD_FIRST=head_first + ) + return h, ht + + +def chunk_bwd_dh( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + gk: torch.Tensor, + gv: torch.Tensor, + do: torch.Tensor, + h0: torch.Tensor, + dht: torch.Tensor, + scale: float, + offsets: Optional[torch.Tensor] = None, + head_first: bool = True, + chunk_size: int = 64, + split_size: Optional[int] = None, + states_in_fp32: bool = False +) -> Tuple[torch.Tensor, torch.Tensor]: + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + HQ = q.shape[1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + HQ = q.shape[2] + BT = min(chunk_size, max(16, triton.next_power_of_2(T))) + BS = BT if split_size is None else min(split_size, max(16, triton.next_power_of_2(T))) + assert BS % BT == 0, f"The `split_size` (got {BS}) must be a multiple of `chunk_size` {BT}" + # N: the actual number of sequences in the batch with either equal or variable lengths + # NG: number of groups in GQA + if offsets is None: + split_offsets, N, NS = None, B, triton.cdiv(T, BS) + else: + split_offsets = prepare_chunk_offsets(offsets, BS) + N, NS = len(offsets) - 1, split_offsets[-1] + NG = HQ // H + + if head_first: + dh = k.new_empty(B, HQ, NS, K, V, dtype=k.dtype if not states_in_fp32 else torch.float) + else: + dh = k.new_empty(B, NS, HQ, K, V, dtype=k.dtype if not states_in_fp32 else torch.float) + dh0 = torch.empty_like(h0, dtype=torch.float) if h0 is not None else None + + def grid(meta): return (triton.cdiv(K, meta['BK']), triton.cdiv(V, meta['BV']), N * H) + chunk_bwd_kernel_dh[grid]( + q=q, + g=g, + gk=gk, + gv=gv, + do=do, + dh=dh, + dht=dht, + dh0=dh0, + offsets=offsets, + split_offsets=split_offsets, + scale=scale, + T=T, + HQ=HQ, + H=H, + K=K, + V=V, + BT=BT, + BS=BS, + NG=NG, + USE_G=g is not None, + USE_GK=gk is not None, + USE_GV=gv is not None, + HEAD_FIRST=head_first + ) + return dh, dh0 diff --git a/fla/ops/common/chunk_o.py b/fla/ops/common/chunk_o.py new file mode 100644 index 0000000000000000000000000000000000000000..b1e99d1d28bebc49994deaef04c252be74b2d570 --- /dev/null +++ b/fla/ops/common/chunk_o.py @@ -0,0 +1,668 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + +from fla.ops.utils.op import exp, safe_exp +from fla.utils import check_shared_mem, is_nvidia_hopper + +BKV_LIST = [64, 128] if check_shared_mem() else [32, 64] +NUM_WARPS = [2, 4] if is_nvidia_hopper else [2, 4, 8] + + +@triton.heuristics({ + 'USE_G': lambda args: args['g'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({'BK': BK, 'BV': BV}, num_warps=num_warps, num_stages=num_stages) + for BK in BKV_LIST + for BV in BKV_LIST + for num_warps in NUM_WARPS + for num_stages in [2, 3, 4] + ], + key=['H', 'K', 'V', 'BT'], +) +@triton.jit(do_not_specialize=['T']) +def chunk_fwd_kernel_o( + q, + k, + v, + h, + g, + o, + offsets, + indices, + scale, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_h = i_bh // H, i_bh % H + + if USE_OFFSETS: + i_tg = i_t + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + else: + NT = tl.cdiv(T, BT) + i_tg = i_b * NT + i_t + bos, eos = i_b * T, i_b * T + T + + s_qk = K if HEAD_FIRST else H*K + s_vo = V if HEAD_FIRST else H*V + s_g = 1 if HEAD_FIRST else H + # offset calculation + q += (i_bh * T*K) if HEAD_FIRST else ((bos * H + i_h) * K) + k += (i_bh * T*K) if HEAD_FIRST else ((bos * H + i_h) * K) + v += (i_bh * T*V) if HEAD_FIRST else ((bos * H + i_h) * V) + o += (i_bh * T*V) if HEAD_FIRST else ((bos * H + i_h) * V) + h += ((i_bh * NT + i_t).to(tl.int64) * K*V) if HEAD_FIRST else ((i_tg * H + i_h).to(tl.int64) * K*V) + + b_o = tl.zeros([BT, BV], dtype=tl.float32) + b_A = tl.zeros([BT, BT], dtype=tl.float32) + + for i_k in range(tl.cdiv(K, BK)): + p_q = tl.make_block_ptr(q, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_k = tl.make_block_ptr(k, (K, T), (1, s_qk), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + p_h = tl.make_block_ptr(h, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + # [BK, BT] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BK, BV] + b_h = tl.load(p_h, boundary_check=(0, 1)) + + # [BT, BK] @ [BK, BV] -> [BT, BV] + b_o += tl.dot(b_q, b_h) + # [BT, BK] @ [BK, BT] -> [BT, BT] + b_A += tl.dot(b_q, b_k) + + if USE_G: + g += (i_bh * T) if HEAD_FIRST else (bos * H + i_h) + p_g = tl.make_block_ptr(g, (T,), (s_g,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + b_o = b_o * exp(b_g)[:, None] + b_A = b_A * safe_exp(b_g[:, None] - b_g[None, :]) + + o_i = tl.arange(0, BT) + m_A = o_i[:, None] >= o_i[None, :] + b_A = tl.where(m_A, b_A, 0) + + p_v = tl.make_block_ptr(v, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_o = tl.make_block_ptr(o, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_v = tl.load(p_v, boundary_check=(0, 1)) + + # to fix mma -> mma layout conversion + # already solved by triton v3.2 or higher + b_o = b_o * scale + tl.dot(b_A.to(b_v.dtype), b_v) * scale + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.heuristics({ + 'USE_OFFSETS': lambda args: args['offsets'] is not None, + 'USE_G': lambda args: args['g'] is not None, + 'USE_DW': lambda args: args['dw'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in NUM_WARPS + for num_stages in [2, 3, 4] + ], + key=['H', 'K', 'V', 'BT', 'BK', 'BV', 'USE_G', 'USE_DW'], +) +@triton.jit(do_not_specialize=['T']) +def chunk_bwd_kernel_dqkwg( + q, + k, + v, + h, + g, + do, + dh, + dq, + dk, + dg, + w, + dv, + dw, + offsets, + indices, + scale, + B: tl.constexpr, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_DW: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_k, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_h = i_bh // H, i_bh % H + if USE_G: + dg += i_k * B * H * T + if USE_OFFSETS: + i_tg = i_t + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + else: + NT = tl.cdiv(T, BT) + i_tg = i_b * NT + i_t + bos, eos = i_b * T, i_b * T + T + + # offset calculation + v += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + do += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + h += (i_bh * NT + i_t).to(tl.int64) * K*V if HEAD_FIRST else (i_tg * H + i_h).to(tl.int64) * K*V + dh += (i_bh * NT + i_t).to(tl.int64) * K*V if HEAD_FIRST else (i_tg * H + i_h).to(tl.int64) * K*V + q += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + k += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + dq += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + dk += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + s_qk = K if HEAD_FIRST else H*K + s_vo = V if HEAD_FIRST else H*V + s_g = 1 if HEAD_FIRST else H + + # for delta rule only + if USE_DW: + dw += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + dv += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + w += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + + b_dq = tl.zeros([BT, BK], dtype=tl.float32) + b_dk = tl.zeros([BT, BK], dtype=tl.float32) + b_ds = tl.zeros([BT, BT], dtype=tl.float32) + b_dg_last = tl.zeros([1,], dtype=tl.float32) if USE_G else None + b_dw = tl.zeros([BT, BK], dtype=tl.float32) if USE_DW else None + + for i_v in range(tl.cdiv(V, BV)): + p_v = tl.make_block_ptr(v, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_do = tl.make_block_ptr(do, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_h = tl.make_block_ptr(h, (V, K), (1, V), (i_v * BV, i_k * BK), (BV, BK), (0, 1)) + p_dh = tl.make_block_ptr(dh, (V, K), (1, V), (i_v * BV, i_k * BK), (BV, BK), (0, 1)) + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BV, BK] + b_h = tl.load(p_h, boundary_check=(0, 1)) + b_dh = tl.load(p_dh, boundary_check=(0, 1)) + if USE_G: + b_dg_last += (tl.sum(b_h * b_dh)) + # [BT, BV] @ [BV, BT] -> [BT, BT] + b_ds += tl.dot(b_do, tl.trans(b_v)) + # [BT, BV] @ [BV, BK] -> [BT, BK] + b_dq += tl.dot(b_do, b_h.to(b_do.dtype)) + # [BT, BV] @ [BV, BK] -> [BT, BK] + b_dk += tl.dot(b_v, b_dh.to(b_v.dtype)) + if USE_DW: + p_dv = tl.make_block_ptr(dv, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_dv = tl.load(p_dv, boundary_check=(0, 1)) + b_dw += tl.dot(b_dv.to(b_v.dtype), b_h.to(b_v.dtype)) + + if USE_DW and not USE_G: + p_dw = tl.make_block_ptr(dw, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + tl.store(p_dw, -b_dw.to(p_dw.dtype.element_ty), boundary_check=(0, 1)) + + tl.debug_barrier() + o_i = tl.arange(0, BT) + p_q = tl.make_block_ptr(q, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_k = tl.make_block_ptr(k, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + + p_dq = tl.make_block_ptr(dq, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_dk = tl.make_block_ptr(dk, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + + if USE_G: + b_dg = tl.zeros([BT,], dtype=tl.float32) + g += i_bh * T if HEAD_FIRST else bos * H + i_h + dg += i_bh * T if HEAD_FIRST else bos * H + i_h + p_g = tl.make_block_ptr(g, (T,), (s_g,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + b_g_last = tl.load(g + (min(i_t * BT + BT, T) - 1) * s_g) + b_dg_last *= exp(b_g_last) + + if USE_DW: + p_w = tl.make_block_ptr(w, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_dw = tl.make_block_ptr(dw, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + b_w = tl.load(p_w, boundary_check=(0, 1)) + b_dw = b_dw * exp(b_g)[:, None] + tl.store(p_dw, -b_dw.to(p_dw.dtype.element_ty), boundary_check=(0, 1)) + b_dg -= tl.sum(b_w * b_dw, axis=1) + + b_dq = b_dq * exp(b_g)[:, None] * scale + b_dg += tl.sum(b_dq * b_q, axis=1) + + b_dk = b_dk * safe_exp(-b_g + b_g_last)[:, None] + b_dg -= tl.sum(b_k * b_dk, axis=1) + b_dg_last += tl.sum(b_dk * b_k) + + b_ds = tl.where(o_i[:, None] >= o_i[None, :], b_ds * safe_exp(b_g[:, None] - b_g[None, :]), 0) * scale + b_ds2 = b_ds * tl.dot(b_q, tl.trans(b_k)) + b_dg += tl.sum(b_ds2, axis=1) + b_dg -= tl.sum(b_ds2, axis=0) + + b_ds = b_ds.to(b_k.dtype) + # [BT, BK] + b_dq += tl.dot(b_ds, b_k) + b_dk += tl.dot(tl.trans(b_ds), b_q) + p_dg = tl.make_block_ptr(dg, (T,), (s_g,), (i_t * BT,), (BT,), (0,)) + # (SY 09/21) revcumsum in a separate kernel due to strange triton compiler issue + # b_dg = tl.dot(tl.where(o_i[:, None] <= o_i[None, :], 1., 0.), b_dg, allow_tf32=False) + b_dg_last) + b_dg = tl.where(o_i < min(BT, T-i_t*BT) - 1, b_dg, b_dg + b_dg_last) + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dg, b_dg.to(p_dg.dtype.element_ty), boundary_check=(0,)) + else: + b_ds = tl.where(o_i[:, None] >= o_i[None, :], b_ds, 0) + b_ds = b_ds.to(b_k.dtype) + b_dq += tl.dot(b_ds, b_k) + b_dk += tl.dot(tl.trans(b_ds), b_q) * scale + b_dq *= scale + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.heuristics({ + 'USE_OFFSETS': lambda args: args['offsets'] is not None, + 'USE_G': lambda args: args['g'] is not None, +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in [2, 4, 8] + for num_stages in [2, 3, 4] + ], + key=['H', 'K', 'V', 'BT', 'BK', 'BV', 'USE_G'], +) +@triton.jit(do_not_specialize=['T']) +def chunk_bwd_kernel_dv( + q, + k, + g, + do, + dv, + dh, + offsets, + indices, + scale, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_h = i_bh // H, i_bh % H + if USE_OFFSETS: + i_tg = i_t + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + else: + NT = tl.cdiv(T, BT) + i_tg = i_b * NT + i_t + bos, eos = i_b * T, i_b * T + T + + b_dv = tl.zeros([BT, BV], dtype=tl.float32) + + # offset calculation + q += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + k += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + do += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + dv += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + s_qk = K if HEAD_FIRST else H*K + s_vo = V if HEAD_FIRST else H*V + s_g = 1 if HEAD_FIRST else H + dh += (i_bh * NT + i_t).to(tl.int64) * K*V if HEAD_FIRST else (i_tg * H + i_h).to(tl.int64) * K*V + + b_A = tl.zeros([BT, BT], dtype=tl.float32) + for i_k in range(tl.cdiv(K, BK)): + p_k = tl.make_block_ptr(k, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_q = tl.make_block_ptr(q, (K, T), (1, s_qk), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_A += tl.dot(b_k, b_q) + p_dh = tl.make_block_ptr(dh, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + b_dh = tl.load(p_dh, boundary_check=(0, 1)) + b_dv += tl.dot(b_k, b_dh.to(b_k.dtype)) + + if USE_G: + g += (i_bh * T) if HEAD_FIRST else (bos * H + i_h) + p_g = tl.make_block_ptr(g, (T,), (s_g,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + b_g_last = tl.load(g + (min(i_t * BT + BT, T) - 1) * s_g) + b_dv *= safe_exp(-b_g + b_g_last)[:, None] + + mask = (tl.arange(0, BT)[:, None] <= tl.arange(0, BT)[None, :]) + if USE_G: + b_A = tl.where(mask, b_A * safe_exp(b_g[None, :] - b_g[:, None]) * scale, 0).to(do.dtype.element_ty) + else: + b_A = tl.where(mask, b_A * scale, 0).to(do.dtype.element_ty) + p_do = tl.make_block_ptr(do, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_dv = tl.make_block_ptr(dv, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_dv += tl.dot(b_A.to(b_do.dtype), b_do) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.heuristics({ + 'USE_G': lambda args: args['g'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None, +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in NUM_WARPS + for num_stages in [2, 3, 4] + ], + key=['H', 'K', 'V', 'BT', 'BK', 'BV', 'USE_G'], +) +@triton.jit(do_not_specialize=['T']) +def chunk_bwd_kernel_dv_local( + q, + k, + g, + do, + dv, + offsets, + indices, + scale, + T, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_t, i_bh = tl.program_id(0), tl.program_id(1) + i_b, i_h = i_bh // H, i_bh % H + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + bos, eos = i_b * T, i_b * T + T + + # offset calculation + q += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + k += i_bh * T*K if HEAD_FIRST else (bos * H + i_h) * K + do += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + dv += i_bh * T*V if HEAD_FIRST else (bos * H + i_h) * V + s_qk = K if HEAD_FIRST else H*K + s_vo = V if HEAD_FIRST else H*V + s_g = 1 if HEAD_FIRST else H + + b_A = tl.zeros([BT, BT], dtype=tl.float32) + for i_k in range(tl.cdiv(K, BK)): + p_k = tl.make_block_ptr(k, (T, K), (s_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_q = tl.make_block_ptr(q, (K, T), (1, s_qk), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_A += tl.dot(b_k, b_q) + + if USE_G: + g += (i_bh * T) if HEAD_FIRST else (bos * H + i_h) + p_g = tl.make_block_ptr(g, (T,), (s_g,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + + mask = (tl.arange(0, BT)[:, None] <= tl.arange(0, BT)[None, :]) + if USE_G: + b_A = tl.where(mask, b_A * safe_exp(b_g[None, :] - b_g[:, None]) * scale, 0).to(do.dtype.element_ty) + else: + b_A = tl.where(mask, b_A * scale, 0).to(do.dtype.element_ty) + + for i_v in range(tl.cdiv(V, BV)): + p_do = tl.make_block_ptr(do, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_dv = tl.make_block_ptr(dv, (T, V), (s_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_dv = tl.dot(b_A.to(b_do.dtype), b_do) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_fwd_o( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + h: torch.Tensor, + g: Optional[torch.Tensor] = None, # cumsum of log decay + scale: Optional[float] = None, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +) -> torch.Tensor: + if head_first: + B, H, T, K, V = *q.shape, v.shape[-1] + else: + B, T, H, K, V = *q.shape, v.shape[-1] + if scale is None: + scale = k.shape[-1] ** -0.5 + BT = min(chunk_size, max(16, triton.next_power_of_2(T))) + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + o = torch.empty_like(v) + + def grid(meta): return (triton.cdiv(V, meta['BV']), NT, B * H) + chunk_fwd_kernel_o[grid]( + q, + k, + v, + h, + g, + o, + offsets, + indices, + scale, + T=T, + H=H, + K=K, + V=V, + BT=BT, + HEAD_FIRST=head_first + ) + return o + + +def chunk_bwd_dv( + q: torch.Tensor, + k: torch.Tensor, + g: torch.Tensor, + do: torch.Tensor, + dh: torch.Tensor, + scale: float, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +) -> torch.Tensor: + if head_first: + B, H, T, K, V = *k.shape, do.shape[-1] + else: + B, T, H, K, V = *k.shape, do.shape[-1] + BT = min(chunk_size, max(16, triton.next_power_of_2(T))) + # H100 can have larger block size + if check_shared_mem('hopper', k.device.index): + CONST_TILING = 128 + elif check_shared_mem: + CONST_TILING = 64 + else: + CONST_TILING = 32 + BK = min(triton.next_power_of_2(K), CONST_TILING) + BV = min(triton.next_power_of_2(V), CONST_TILING) + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + NV = triton.cdiv(V, BV) + + dv = torch.empty_like(do) + grid = (NV, NT, B * H) + chunk_bwd_kernel_dv[grid]( + q, + k, + g, + do, + dv, + dh, + offsets, + indices, + scale, + T=T, + H=H, + K=K, + V=V, + BT=BT, + BK=BK, + BV=BV, + HEAD_FIRST=head_first + ) + return dv + + +def chunk_bwd_dv_local( + q: torch.Tensor, + k: torch.Tensor, + g: torch.Tensor, + do: torch.Tensor, + dh: torch.Tensor, + scale: float, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +) -> torch.Tensor: + if head_first: + B, H, T, K, V = *k.shape, do.shape[-1] + else: + B, T, H, K, V = *k.shape, do.shape[-1] + BT = min(chunk_size, max(16, triton.next_power_of_2(T))) + # H100 can have larger block size + if check_shared_mem('hopper', k.device.index): + CONST_TILING = 128 + elif check_shared_mem: + CONST_TILING = 64 + else: + CONST_TILING = 32 + BK = min(triton.next_power_of_2(K), CONST_TILING) + BV = min(triton.next_power_of_2(V), CONST_TILING) + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + dv = torch.empty_like(do) + grid = (NT, B * H) + chunk_bwd_kernel_dv_local[grid]( + q, + k, + g, + do, + dv, + offsets, + indices, + scale, + T=T, + H=H, + K=K, + V=V, + BT=BT, + BK=BK, + BV=BV, + HEAD_FIRST=head_first + ) + return dv + + +def chunk_bwd_dqkwg( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + do: torch.Tensor, + h: torch.Tensor, + dh: torch.Tensor, + dv: Optional[torch.Tensor] = None, + w: Optional[torch.Tensor] = None, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + chunk_size: int = 64, + scale: float = 1.0, + head_first: bool = True, +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + BT = min(chunk_size, max(16, triton.next_power_of_2(T))) + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + CONST_TILING = 64 if check_shared_mem() else 32 + BK = min(triton.next_power_of_2(K), CONST_TILING) + BV = min(triton.next_power_of_2(V), CONST_TILING) + NK = triton.cdiv(K, BK) + dq = torch.empty_like(q) + dk = torch.empty_like(k) + dg = torch.empty(NK, *g.shape, dtype=torch.float32, device=g.device) if g is not None else None + dw = torch.empty_like(w) if w is not None else None + + grid = (NK, NT, B * H) + chunk_bwd_kernel_dqkwg[grid]( + q=q, + k=k, + v=v, + h=h, + g=g, + do=do, + dh=dh, + dv=dv, + w=w, + dw=dw, + dq=dq, + dk=dk, + dg=dg, + offsets=offsets, + indices=indices, + scale=scale, + B=B, + T=T, + H=H, + K=K, + V=V, + BT=BT, + BK=BK, + BV=BV, + HEAD_FIRST=head_first + ) + + if dg is not None: + dg = dg.sum(0) + return dq, dk, dw, dg diff --git a/fla/ops/common/chunk_scaled_dot_kkt.py b/fla/ops/common/chunk_scaled_dot_kkt.py new file mode 100644 index 0000000000000000000000000000000000000000..ff30664dce50a8869dd6198aaecea2ab6a171704 --- /dev/null +++ b/fla/ops/common/chunk_scaled_dot_kkt.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional + +import torch +import triton +import triton.language as tl + +from fla.ops.common.utils import prepare_chunk_indices + + +@triton.heuristics({ + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({'BK': BK}, num_warps=num_warps, num_stages=num_stages) + for BK in [32, 64, 128] + for num_warps in [2, 4, 8] + for num_stages in [2, 3, 4] + ], + key=['H', 'K', 'BT', 'USE_OFFSETS'], +) +@triton.jit(do_not_specialize=['T']) +def chunk_scaled_dot_kkt_fwd_kernel( + k, + beta, + A, + offsets, + indices, + T, + H: tl.constexpr, + K: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + HEAD_FIRST: tl.constexpr, + USE_OFFSETS: tl.constexpr, +): + i_t, i_bh = tl.program_id(0), tl.program_id(1) + i_b, i_h = i_bh // H, i_bh % H + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + bos, eos = i_b * T, i_b * T + T + o_t = tl.arange(0, BT) + + if HEAD_FIRST: + p_beta = tl.make_block_ptr(beta + i_bh * T, (T,), (1,), (i_t * BT,), (BT,), (0,)) + else: + p_beta = tl.make_block_ptr(beta + bos*H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,)) + b_beta = tl.load(p_beta, boundary_check=(0,)) + + b_A = tl.zeros([BT, BT], dtype=tl.float32) + for i_k in range(tl.cdiv(K, BK)): + if HEAD_FIRST: + p_k = tl.make_block_ptr(k + i_bh * T*K, (T, K), (K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + else: + p_k = tl.make_block_ptr(k + (bos*H + i_h) * K, (T, K), (H*K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_kb = b_k * b_beta[:, None] + b_A += tl.dot(b_kb.to(b_k.dtype), tl.trans(b_k)) + + b_A = tl.where(o_t[:, None] > o_t[None, :], b_A, 0) + if HEAD_FIRST: + p_A = tl.make_block_ptr(A + i_bh * T*BT, (T, BT), (BT, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + else: + p_A = tl.make_block_ptr(A + (bos*H + i_h) * BT, (T, BT), (BT*H, 1), (i_t * BT, 0), (BT, BT), (1, 0)) + tl.store(p_A, b_A.to(p_A.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_scaled_dot_kkt_fwd( + k: torch.Tensor, + beta: torch.Tensor, + cu_seqlens: Optional[torch.LongTensor], + head_first: bool = False, + chunk_size: int = 64, + output_dtype: torch.dtype = torch.float32 +) -> torch.Tensor: + r""" + Compute beta * K * K^T. + + Args: + k (torch.Tensor): + The key tensor of shape `[B, T, H, K]` if not `head_first` else `[B, H, T, K]`. + beta (torch.Tensor): + The beta tensor of shape `[B, T, H]` if not `head_first` else `[B, H, T]`. + cu_seqlens (torch.LongTensor): + The cumulative sequence lengths of the input tensor. + Default: None + head_first (bool): + If False, the input/output tensor is in the shape of `[B, T, H, K]`. + If True, the input/output tensor is in the shape of `[B, H, T, K]`. + Default: False + chunk_size (int): + The chunk size. Default: 64. + output_dtype (torch.dtype): + The dtype of the output tensor. Default: `torch.float32` + + Returns: + beta * K * K^T of shape `[B, T, H, BT]` if not `head_first` else `[B, H, T, BT]`, + where `BT` is the chunk size. + """ + if head_first: + B, H, T, K = k.shape + else: + B, T, H, K = k.shape + BT = chunk_size + indices = prepare_chunk_indices(cu_seqlens, BT) if cu_seqlens is not None else None + NT = triton.cdiv(T, BT) if cu_seqlens is None else len(indices) + A = torch.empty(B, *((H, T) if head_first else (T, H)), BT, device=k.device, dtype=output_dtype) + chunk_scaled_dot_kkt_fwd_kernel[(NT, B * H)]( + k=k, + beta=beta, + A=A, + offsets=cu_seqlens, + indices=indices, + T=T, + H=H, + K=K, + BT=BT, + HEAD_FIRST=head_first + ) + return A diff --git a/fla/ops/common/fused_recurrent.py b/fla/ops/common/fused_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..263de38d060716ec525a273d45eb1c3fe08ac4be --- /dev/null +++ b/fla/ops/common/fused_recurrent.py @@ -0,0 +1,575 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional + +import torch +import triton +import triton.language as tl + +from fla.ops.utils import chunk_global_cumsum +from fla.ops.utils.op import exp +from fla.utils import autocast_custom_bwd, autocast_custom_fwd, input_guard + + +@triton.heuristics({ + 'USE_INITIAL_STATE': lambda args: args['h0'] is not None, + 'STORE_FINAL_STATE': lambda args: args['ht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps) + for num_warps in [1, 2, 4] + ], + key=["BK", "BV", "USE_GK", "USE_GV", "USE_G"], +) +@triton.jit(do_not_specialize=['T']) +def fused_recurrent_fwd_kernel( + q, + k, + v, + g, + gk, + gv, + o, + h0, + ht, + offsets, + scale, + T, + B: tl.constexpr, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + REVERSE: tl.constexpr, + USE_G: tl.constexpr, + USE_GK: tl.constexpr, + USE_GV: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + STORE_FINAL_STATE: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + # indices + i_v, i_k, i_nh = tl.program_id(0).to(tl.int64), tl.program_id(1).to(tl.int64), tl.program_id(2).to(tl.int64) + i_n, i_h = i_nh // H, i_nh % H + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int64), tl.load(offsets + i_n + 1).to(tl.int64) + all = T + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + all = B * T + + if HEAD_FIRST: + p_q = q + i_nh * T*K + ((T-1) * K if REVERSE else 0) + i_k * BK + tl.arange(0, BK) + p_k = k + i_nh * T*K + ((T-1) * K if REVERSE else 0) + i_k * BK + tl.arange(0, BK) + p_v = v + i_nh * T*V + ((T-1) * V if REVERSE else 0) + i_v * BV + tl.arange(0, BV) + p_o = o + (i_k * B*H + i_nh) * T*V + ((T-1) * V if REVERSE else 0) + i_v * BV + tl.arange(0, BV) + if USE_G: + p_g = g + i_nh * T + ((T-1) if REVERSE else 0) + if USE_GK: + p_gk = gk + i_nh * T*K + ((T-1) * K if REVERSE else 0) + i_k * BK + tl.arange(0, BK) + if USE_GV: + p_gv = gv + i_nh * T*V + ((T-1) * V if REVERSE else 0) + i_v * BV + tl.arange(0, BV) + else: + p_q = q + (bos + ((T-1) if REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + p_k = k + (bos + ((T-1) if REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + p_v = v + (bos + ((T-1) if REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + p_o = o + ((i_k * all + bos) + ((T-1) if REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + if USE_G: + p_g = g + (bos + ((T-1) if REVERSE else 0)) * H + i_h + if USE_GK: + p_gk = gk + (bos + ((T-1) if REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + if USE_GV: + p_gv = gv + (bos + ((T-1) if REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + + mask_k = (i_k * BK + tl.arange(0, BK)) < K + mask_v = (i_v * BV + tl.arange(0, BV)) < V + mask_h = mask_k[None, :] & mask_v[:, None] + b_h = tl.zeros([BV, BK], dtype=tl.float32) + + if USE_INITIAL_STATE: + p_h0 = h0 + i_nh * K*V + (i_k * BK + tl.arange(0, BK)[None, :]) * V + (i_v * BV + tl.arange(0, BV)[:, None]) + b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32) + + for _ in range(0, T): + b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32) * scale + b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32) + b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32) + if USE_GK: + b_gk = tl.load(p_gk, mask=mask_k, other=0).to(tl.float32) + b_h = b_h * exp(b_gk[None, :]) + if USE_GV: + b_gv = tl.load(p_gv, mask=mask_v, other=0).to(tl.float32) + b_h = b_h * exp(b_gv[:, None]) + if USE_G: + b_g = tl.load(p_g).to(tl.float32) + b_h = b_h * exp(b_g) + b_h += b_k[None, :] * b_v[:, None] + b_o = b_h * b_q[None, :] + b_o = tl.sum(b_o, axis=1) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v) + p_q += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * K + p_k += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * K + p_v += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * V + p_o += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * V + if USE_GK: + p_gk += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * K + if USE_GV: + p_gv += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * V + if USE_G: + p_g += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) + + if STORE_FINAL_STATE: + p_ht = ht + i_nh * K*V + (i_k * BK + tl.arange(0, BK)[None, :]) * V + (i_v * BV + tl.arange(0, BV)[:, None]) + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask_h) + + +@triton.heuristics({ + 'USE_INITIAL_STATE': lambda args: args['h0'] is not None, + 'STORE_INITIAL_STATE_GRADIENT': lambda args: args['dh0'] is not None, + 'USE_FINAL_STATE_GRADIENT': lambda args: args['dht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps) + for num_warps in [1, 2, 4] + ], + key=['BK', 'BV', 'USE_GK', 'USE_GV', 'USE_G'], +) +@triton.jit(do_not_specialize=['T']) +def fused_recurrent_bwd_kernel( + q, + k, + v, + g, + gk, + gv, + h0, + do, + dq, + dk, + dv, + dht, + dh0, + offsets, + scale, + T, + B: tl.constexpr, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + REVERSE: tl.constexpr, + USE_G: tl.constexpr, + USE_GK: tl.constexpr, + USE_GV: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + STORE_INITIAL_STATE_GRADIENT: tl.constexpr, + USE_FINAL_STATE_GRADIENT: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr +): + i_v, i_k, i_nh = tl.program_id(0).to(tl.int64), tl.program_id(1).to(tl.int64), tl.program_id(2).to(tl.int64) + i_n, i_h = i_nh // H, i_nh % H + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int64), tl.load(offsets + i_n + 1).to(tl.int64) + all = T + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + all = B * T + + if HEAD_FIRST: + p_k = k + i_nh * T*K + ((T-1) * K if REVERSE else 0) + i_k * BK + tl.arange(0, BK) + p_v = v + i_nh * T*V + ((T-1) * V if REVERSE else 0) + i_v * BV + tl.arange(0, BV) + p_do = do + i_nh * T*V + ((T-1) * V if REVERSE else 0) + i_v * BV + tl.arange(0, BV) + p_dq = dq + (i_v * B*H + i_nh) * T*K + ((T-1) * K if REVERSE else 0) + i_k * BK + tl.arange(0, BK) + if USE_G: + p_g = g + i_nh * T + ((T-1) if REVERSE else 0) + if USE_GK: + p_gk = gk + i_nh * T*K + ((T-1) * K if REVERSE else 0) + i_k * BK + tl.arange(0, BK) + if USE_GV: + p_gv = gv + i_nh * T*V + ((T-1) * V if REVERSE else 0) + i_v * BV + tl.arange(0, BV) + else: + p_k = k + (bos + ((T-1) if REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + p_v = v + (bos + ((T-1) if REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + p_do = do + (bos + ((T-1) if REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + p_dq = dq + ((i_v * all + bos) + ((T-1) if REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + if USE_G: + p_g = g + (bos + ((T-1) if REVERSE else 0)) * H + i_h + if USE_GK: + p_gk = gk + (bos + ((T-1) if REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + if USE_GV: + p_gv = gv + (bos + ((T-1) if REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + + mask_k = i_k * BK + tl.arange(0, BK) < K + mask_v = i_v * BV + tl.arange(0, BV) < V + mask_h = mask_k[:, None] & mask_v[None, :] + + b_h = tl.zeros([BK, BV], dtype=tl.float32) + if USE_INITIAL_STATE: + p_h0 = h0 + i_nh * K*V + (i_k * BK + tl.arange(0, BK)[:, None]) * V + (i_v * BV + tl.arange(0, BV)[None, :]) + b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32) + + for _ in range(0, T): + b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32) + b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32) + b_do = tl.load(p_do, mask=mask_v, other=0).to(tl.float32) + if USE_G: + b_g = tl.load(p_g).to(tl.float32) + b_h = b_h * exp(b_g) + if USE_GK: + b_gk = tl.load(p_gk, mask=mask_k, other=0).to(tl.float32) + b_h = b_h * exp(b_gk[:, None]) + if USE_GV: + b_gv = tl.load(p_gv, mask=mask_v, other=0).to(tl.float32) + b_h = b_h * exp(b_gv[None, :]) + b_h += b_k[:, None] * b_v[None, :] + b_dq = b_h * b_do[None, :] + b_dq = tl.sum(b_dq, axis=1) * scale + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), mask=mask_k) + + p_k += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * K + p_v += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * V + p_do += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * V + p_dq += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * K + if USE_G: + p_g += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) + if USE_GK: + p_gk += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * K + if USE_GV: + p_gv += (-1 if REVERSE else 1) * (1 if HEAD_FIRST else H) * V + + # sync threads + tl.debug_barrier() + + if HEAD_FIRST: + p_q = q + i_nh * T*K + ((T - 1) * K if not REVERSE else 0) + i_k * BK + tl.arange(0, BK) + p_k = k + i_nh * T*K + ((T - 1) * K if not REVERSE else 0) + i_k * BK + tl.arange(0, BK) + p_v = v + i_nh * T*V + ((T - 1) * V if not REVERSE else 0) + i_v * BV + tl.arange(0, BV) + p_do = do + i_nh * T*V + ((T - 1) * V if not REVERSE else 0) + i_v * BV + tl.arange(0, BV) + p_dk = dk + (i_v * B*H + i_nh) * T*K + ((T - 1) * K if not REVERSE else 0) + i_k * BK + tl.arange(0, BK) + p_dv = dv + (i_k * B*H + i_nh) * T*V + ((T - 1) * V if not REVERSE else 0) + i_v * BV + tl.arange(0, BV) + if USE_G: + p_g = g + i_nh * T + ((T - 1) if not REVERSE else 0) + if USE_GK: + p_gk = gk + i_nh * T*K + ((T - 1) * K if not REVERSE else 0) + i_k * BK + tl.arange(0, BK) + if USE_GV: + p_gv = gv + i_nh * T*V + ((T - 1) * V if not REVERSE else 0) + i_v * BV + tl.arange(0, BV) + else: + p_q = q + (bos + ((T - 1) if not REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + p_k = k + (bos + ((T - 1) if not REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + p_v = v + (bos + ((T - 1) if not REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + p_do = do + (bos + ((T - 1) if not REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + p_dk = dk + ((i_v * all + bos) + ((T - 1) if not REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + p_dv = dv + ((i_k * all + bos) + ((T - 1) if not REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + if USE_G: + p_g = g + (bos + ((T - 1) if not REVERSE else 0)) * H + i_h + if USE_GK: + p_gk = gk + (bos + ((T - 1) if not REVERSE else 0)) * H*K + i_h * K + i_k * BK + tl.arange(0, BK) + if USE_GV: + p_gv = gv + (bos + ((T - 1) if not REVERSE else 0)) * H*V + i_h * V + i_v * BV + tl.arange(0, BV) + + b_dh = tl.zeros([BK, BV], dtype=tl.float32) + if USE_FINAL_STATE_GRADIENT: + p_dht = dht + i_nh * K*V + (i_k * BK + tl.arange(0, BK)[:, None]) * V + (i_v * BV + tl.arange(0, BV)[None, :]) + b_dh += tl.load(p_dht, mask=mask_h, other=0).to(tl.float32) + + for _ in range(T): + b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32) * scale + b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32) + b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32) + b_do = tl.load(p_do, mask=mask_v, other=0).to(tl.float32) + b_dh += b_q[:, None] * b_do[None, :] + b_dk = tl.sum(b_dh * b_v[None, :], axis=1) + b_dv = tl.sum(b_dh * b_k[:, None], axis=0) + if USE_G: + b_g = tl.load(p_g).to(tl.float32) + b_dh *= exp(b_g) + if USE_GK: + b_gk = tl.load(p_gk, mask=mask_k, other=0).to(tl.float32) + b_dh *= exp(b_gk)[:, None] + if USE_GV: + b_gv = tl.load(p_gv, mask=mask_v, other=0).to(tl.float32) + b_dh *= exp(b_gv)[None, :] + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), mask=mask_k) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), mask=mask_v) + + p_q += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * K + p_k += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * K + p_v += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * V + p_do += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * V + p_dk += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * K + p_dv += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * V + if USE_G: + p_g += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) + if USE_GK: + p_gk += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * K + if USE_GV: + p_gv += (1 if REVERSE else -1) * (1 if HEAD_FIRST else H) * V + + if STORE_INITIAL_STATE_GRADIENT: + p_dh0 = dh0 + i_nh * K*V + (i_k * BK + tl.arange(0, BK)[:, None]) * V + (i_v * BV + tl.arange(0, BV)[None, :]) + tl.store(p_dh0, b_dh.to(p_dh0.dtype.element_ty), mask=mask_h) + + +def fused_recurrent_fwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: Optional[torch.Tensor] = None, + gk: Optional[torch.Tensor] = None, + gv: Optional[torch.Tensor] = None, + scale: Optional[float] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + reverse: bool = False, + offsets: Optional[torch.LongTensor] = None, + head_first: bool = True +): + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + N = B if offsets is None else len(offsets) - 1 + BK, BV = min(K, 64), min(V, 64) + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + + h0 = initial_state + if output_final_state: + ht = q.new_empty(N, H, K, V, dtype=torch.float32) + else: + ht = None + o = q.new_empty(NK, *v.shape, dtype=torch.float32) + + grid = (NV, NK, N * H) + fused_recurrent_fwd_kernel[grid]( + q, + k, + v, + g, + gk, + gv, + o, + h0, + ht, + offsets, + scale, + T=T, + B=B, + H=H, + K=K, + V=V, + BK=BK, + BV=BV, + USE_G=g is not None, + USE_GK=gk is not None, + USE_GV=gv is not None, + REVERSE=reverse, + HEAD_FIRST=head_first + ) + o = o.sum(0) + return o, ht + + +def fused_recurrent_bwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: Optional[torch.Tensor] = None, + gk: Optional[torch.Tensor] = None, + gv: Optional[torch.Tensor] = None, + o: Optional[torch.Tensor] = None, + do: Optional[torch.Tensor] = None, + dht: Optional[torch.Tensor] = None, + scale: Optional[float] = None, + initial_state: Optional[torch.Tensor] = None, + reverse: bool = False, + offsets: Optional[torch.LongTensor] = None, + head_first: bool = True +): + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + N = B if offsets is None else len(offsets) - 1 + + BK, BV = min(K, 64), min(V, 64) + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + + dq = q.new_empty(NV, *q.shape, dtype=torch.float32) + dk = q.new_empty(NV, *k.shape, dtype=torch.float32) + dv = q.new_empty(NK, *v.shape, dtype=torch.float32) + h0 = initial_state + dh0 = torch.empty_like(initial_state) if initial_state is not None else None + + grid = (NV, NK, N * H) + fused_recurrent_bwd_kernel[grid]( + q, + k, + v, + g, + gk, + gv, + h0, + do, + dq, + dk, + dv, + dht, + dh0, + offsets, + scale, + B=B, + T=T, + H=H, + K=K, + V=V, + BK=BK, + BV=BV, + USE_G=g is not None, + USE_GK=gk is not None, + USE_GV=gv is not None, + REVERSE=reverse, + HEAD_FIRST=head_first + ) + dq = dq.sum(0) + dk = dk.sum(0) + dv = dv.sum(0) + dg, dgk, dgv = None, None, None + if g is not None: + dg = chunk_global_cumsum( + (dq * q.float() - dk * k.float()).sum(-1), + reverse=not reverse, + offsets=offsets, + head_first=head_first + ) + if gk is not None: + dgk = chunk_global_cumsum( + dq * q.float() - dk * k.float(), + reverse=not reverse, + offsets=offsets, + head_first=head_first + ) + if gv is not None: + dgv = chunk_global_cumsum( + do.float() * o.float() - dv * v.float(), + reverse=not reverse, + offsets=offsets, + head_first=head_first + ) + + return dq, dk, dv, dg, dgk, dgv, dh0 + + +class FusedRecurrentFunction(torch.autograd.Function): + + @staticmethod + @input_guard + @autocast_custom_fwd + def forward( + ctx, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: Optional[torch.Tensor] = None, + gk: Optional[torch.Tensor] = None, + gv: Optional[torch.Tensor] = None, + scale: Optional[float] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + reverse: bool = False, + offsets: Optional[torch.LongTensor] = None, + head_first: bool = True + ): + o, ht = fused_recurrent_fwd( + q=q, + k=k, + v=v, + g=g, + gk=gk, + gv=gv, + scale=scale, + initial_state=initial_state, + output_final_state=output_final_state, + reverse=reverse, + offsets=offsets, + head_first=head_first + ) + ctx.save_for_backward(q, k, v, g, gk, gv, initial_state, o) + ctx.scale = scale + ctx.reverse = reverse + ctx.offsets = offsets + ctx.head_first = head_first + return o.to(q.dtype), ht + + @staticmethod + @input_guard + @autocast_custom_bwd + def backward(ctx, do, dht): + q, k, v, g, gk, gv, initial_state, o = ctx.saved_tensors + # not supported yet. + if dht is not None: + if not dht.eq(0).all(): + if g is not None: + assert g.requires_grad is False, "Cannot load final state gradient and use gates at the same time" + if gk is not None: + assert gk.requires_grad is False, "Cannot load final state gradient and use gates at the same time" + if gv is not None: + assert gv.requires_grad is False, "Cannot load final state gradient and use gates at the same time" + dq, dk, dv, dg, dgk, dgv, dh0 = fused_recurrent_bwd( + q=q, + k=k, + v=v, + g=g, + gk=gk, + gv=gv, + o=o, + do=do, + dht=dht, + scale=ctx.scale, + initial_state=initial_state, + reverse=ctx.reverse, + offsets=ctx.offsets, + head_first=ctx.head_first + ) + return dq.to(q.dtype), dk.to(k.dtype), dv.to(v.dtype), dg, dgk, dgv, None, dh0, None, None, None, None + + +def fused_recurrent( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: Optional[torch.Tensor] = None, + gk: Optional[torch.Tensor] = None, + gv: Optional[torch.Tensor] = None, + scale: Optional[float] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + reverse: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = True +): + if scale is None: + scale = k.shape[-1] ** -0.5 + return FusedRecurrentFunction.apply( + q, + k, + v, + g, + gk, + gv, + scale, + initial_state, + output_final_state, + reverse, + cu_seqlens, + head_first + ) diff --git a/fla/ops/common/utils.py b/fla/ops/common/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c61cf9a36b4a79578f8692070bce68a6d39830b8 --- /dev/null +++ b/fla/ops/common/utils.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +import torch +import triton +import triton.language as tl + +from fla.utils import tensor_cache + + +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps) + for num_warps in [4, 8, 16, 32] + ], + key=['B'], +) +@triton.jit +def prepare_position_ids_kernel( + y, + offsets, + B: tl.constexpr +): + i_n = tl.program_id(0) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + + o = tl.arange(0, B) + for i in range(0, tl.cdiv(T, B) * B, B): + o_i = o + i + tl.store(y + bos + o_i, o_i, o_i < T) + + +@tensor_cache +def prepare_lens(offsets: torch.LongTensor) -> torch.LongTensor: + return offsets[1:] - offsets[:-1] + + +@tensor_cache +def prepare_position_ids(offsets: torch.LongTensor) -> torch.LongTensor: + return torch.cat([torch.arange(n, dtype=offsets.dtype, device=offsets.device) for n in prepare_lens(offsets).unbind()]) + + +@tensor_cache +def prepare_sequence_ids(position_ids: torch.LongTensor) -> torch.LongTensor: + return position_ids.eq(0).cumsum(0) - 1 + + +@tensor_cache +def prepare_token_indices(offsets: torch.LongTensor) -> torch.LongTensor: + position_ids = prepare_position_ids(offsets) + return torch.stack([prepare_sequence_ids(position_ids), position_ids], 1).to(offsets) + + +@tensor_cache +def prepare_chunk_indices( + offsets: torch.LongTensor, + chunk_size: int +) -> torch.LongTensor: + indices = torch.cat([torch.arange(n) for n in triton.cdiv(prepare_lens(offsets), chunk_size).tolist()]) + return torch.stack([prepare_sequence_ids(indices), indices], 1).to(offsets) + + +@tensor_cache +def prepare_chunk_offsets( + offsets: torch.LongTensor, + chunk_size: int +) -> torch.LongTensor: + return torch.cat([offsets.new_tensor([0]), triton.cdiv(prepare_lens(offsets), chunk_size)]).cumsum(-1) diff --git a/fla/ops/delta_rule/__pycache__/fused_recurrent.cpython-312.pyc b/fla/ops/delta_rule/__pycache__/fused_recurrent.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd616a716cdb79657deb8ad9e2a3a945fbd9e3bc Binary files /dev/null and b/fla/ops/delta_rule/__pycache__/fused_recurrent.cpython-312.pyc differ diff --git a/fla/ops/delta_rule/fused_chunk.py b/fla/ops/delta_rule/fused_chunk.py new file mode 100644 index 0000000000000000000000000000000000000000..6347fb9af47d3d9f82c03ea9aedbfa09fc1bfbc1 --- /dev/null +++ b/fla/ops/delta_rule/fused_chunk.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- + +def fused_chunk_delta_rule( + **kwargs +): + raise NotImplementedError("fused_chunk_delta_rule is deprecated. Please use chunk_delta_rule instead.") diff --git a/fla/ops/forgetting_attn/parallel.py b/fla/ops/forgetting_attn/parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..88fea7f29b238e488848711ed894cb6cae7ea91b --- /dev/null +++ b/fla/ops/forgetting_attn/parallel.py @@ -0,0 +1,708 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional + +import torch +import triton +import triton.language as tl +from einops import rearrange, reduce + +from fla.ops.common.utils import prepare_chunk_indices +from fla.ops.utils import chunk_global_cumsum, chunk_local_cumsum +from fla.ops.utils.op import div, exp, log +from fla.utils import autocast_custom_bwd, autocast_custom_fwd, check_shared_mem, input_guard + + +@triton.heuristics({ + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in [1, 2, 4] + ([8] if check_shared_mem('hopper') else []) + for num_stages in [2, 3, 4, 5] + ], + key=['B', 'H', 'G', 'K', 'V', 'BK', 'BV'], +) +@triton.jit +def parallel_forgetting_attn_fwd_kernel( + q, + k, + v, + g, + o, + lse, + scale, + offsets, + indices, + T, + B: tl.constexpr, + H: tl.constexpr, + HQ: tl.constexpr, + G: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_OFFSETS: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_hq = i_bh // HQ, i_bh % HQ + i_h = i_hq // G + + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + i_n = i_b + bos, eos = i_n * T, i_n * T + T + + p_q = tl.make_block_ptr(q + (bos * HQ + i_hq) * K, (T, K), (HQ*K, 1), (i_t * BT, 0), (BT, BK), (1, 0)) + p_g = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + p_o = tl.make_block_ptr(o + (bos * HQ + i_hq) * V, (T, V), (HQ*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_lse = tl.make_block_ptr(lse + bos * HQ + i_hq, (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + + # the Q block is kept in the shared memory throughout the whole kernel + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BT,] + b_gq = tl.load(p_g, boundary_check=(0,)).to(tl.float32) + # [BT, BV] + b_o = tl.zeros([BT, BV], dtype=tl.float32) + + b_m = tl.full([BT], float('-inf'), dtype=tl.float32) + b_acc = tl.zeros([BT], dtype=tl.float32) + + # [BT] + o_q = i_t * BT + tl.arange(0, BT) + for i_s in range(i_t * BT, min((i_t + 1) * BT, T), BS): + p_k = tl.make_block_ptr(k + (bos * H + i_h) * K, (K, T), (1, H*K), (0, i_s), (BK, BS), (0, 1)) + p_v = tl.make_block_ptr(v + (bos * H + i_h) * V, (T, V), (H*V, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + p_gk = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + + # [BS] + o_k = i_s + tl.arange(0, BS) + # [BK, BS] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BS, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BS,] + b_gk = tl.load(p_gk, boundary_check=(0,)) + # [BT, BS] + b_s = tl.dot(b_q, b_k) + b_gq[:, None] - b_gk[None, :] + b_s = tl.where(o_q[:, None] >= o_k[None, :], b_s, float('-inf')) + + # [BT] + b_m, b_mp = tl.maximum(b_m, tl.max(b_s, 1)), b_m + b_r = exp(b_mp - b_m) + # [BT, BS] + b_p = exp(b_s - b_m[:, None]) + # [BT] + b_acc = b_acc * b_r + tl.sum(b_p, 1) + # [BT, BV] + b_o = b_o * b_r[:, None] + tl.dot(b_p.to(b_q.dtype), b_v) + + b_mp = b_m + + for i_s in range(i_t * BT - BS, -BS, -BS): + p_k = tl.make_block_ptr(k + (bos * H + i_h) * K, (K, T), (1, H*K), (0, i_s), (BK, BS), (0, 1)) + p_v = tl.make_block_ptr(v + (bos * H + i_h) * V, (T, V), (H*V, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + p_gk = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + + # [BK, BS] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BS, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BS,] + b_gk = tl.load(p_gk, boundary_check=(0,)).to(tl.float32) + + b_gn = tl.load(g + (bos + min(i_s + BS, T) - 1) * HQ + i_hq).to(tl.float32) + b_gp = tl.load(g + (bos + i_s - 1) * HQ + i_hq).to(tl.float32) if i_s % BT > 0 else 0. + # [BT, BS] + b_s = tl.dot(b_q, b_k) + b_gq[:, None] + (b_gn - b_gk)[None, :] + + b_gq += b_gn - b_gp + b_m, b_mp = tl.maximum(b_m, tl.max(b_s, 1)), b_m + b_r = exp(b_mp - b_m) + # [BT, BS] + b_p = exp(b_s - b_m[:, None]) + # [BT] + b_acc = b_acc * b_r + tl.sum(b_p, 1) + # [BT, BV] + b_o = b_o * b_r[:, None] + tl.dot(b_p.to(b_q.dtype), b_v) + + b_mp = b_m + + b_o = div(b_o, b_acc[:, None]) + b_m += log(b_acc) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_lse, b_m.to(p_lse.dtype.element_ty), boundary_check=(0,)) + + +@triton.jit +def parallel_forgetting_attn_bwd_kernel_preprocess( + o, + do, + delta, + B: tl.constexpr, + V: tl.constexpr +): + i_n = tl.program_id(0) + o_d = tl.arange(0, B) + m_d = o_d < V + + b_o = tl.load(o + i_n * V + o_d, mask=m_d, other=0) + b_do = tl.load(do + i_n * V + o_d, mask=m_d, other=0).to(tl.float32) + b_delta = tl.sum(b_o * b_do) + + tl.store(delta + i_n, b_delta.to(delta.dtype.element_ty)) + + +@triton.heuristics({ + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in [1, 2, 4] + ([8] if check_shared_mem('hopper') else []) + for num_stages in [2, 3, 4] + ], + key=['B', 'H', 'G', 'K', 'V', 'BK', 'BV'], +) +@triton.jit(do_not_specialize=['T']) +def parallel_forgetting_attn_bwd_kernel_dq( + q, + k, + v, + g, + lse, + delta, + do, + dq, + dg, + scale, + offsets, + indices, + T, + B: tl.constexpr, + H: tl.constexpr, + HQ: tl.constexpr, + G: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_OFFSETS: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_hq = i_bh // HQ, i_bh % HQ + i_h = i_hq // G + + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + i_n = i_b + bos, eos = i_n * T, i_n * T + T + + p_q = tl.make_block_ptr(q + (bos * HQ + i_hq) * K, (T, K), (HQ*K, 1), (i_t * BT, 0), (BT, BK), (1, 0)) + p_g = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + p_dq = tl.make_block_ptr(dq + (bos * HQ + i_hq) * K, (T, K), (HQ*K, 1), (i_t * BT, 0), (BT, BK), (1, 0)) + p_dg = tl.make_block_ptr(dg + (bos * HQ + i_hq), (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + p_do = tl.make_block_ptr(do + (bos * HQ + i_hq) * V, (T, V), (HQ*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_lse = tl.make_block_ptr(lse + bos * HQ + i_hq, (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + p_delta = tl.make_block_ptr(delta + bos * HQ + i_hq, (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BT, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BT] + b_gq = tl.load(p_g, boundary_check=(0,)).to(tl.float32) + b_lse = tl.load(p_lse, boundary_check=(0,)) + b_delta = tl.load(p_delta, boundary_check=(0,)) + + # [BT] + o_q = i_t * BT + tl.arange(0, BT) + # [BT, BK] + b_dq = tl.zeros([BT, BK], dtype=tl.float32) + # [BT] + b_dg = tl.zeros([BT,], dtype=tl.float32) + for i_s in range(i_t * BT, min((i_t + 1) * BT, T), BS): + p_k = tl.make_block_ptr(k + (bos * H + i_h) * K, (K, T), (1, H*K), (0, i_s), (BK, BS), (0, 1)) + p_v = tl.make_block_ptr(v + (bos * H + i_h) * V, (V, T), (1, H*V), (i_v * BV, i_s), (BV, BS), (0, 1)) + p_gk = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + + # [BS] + o_k = i_s + tl.arange(0, BS) + # [BK, BS] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BV, BS] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BS,] + b_gk = tl.load(p_gk, boundary_check=(0,)) + # [BT, BS] + b_s = tl.dot(b_q, b_k) + (b_gq - b_lse)[:, None] - b_gk[None, :] + b_p = exp(tl.where(o_q[:, None] >= o_k[None, :], b_s, float('-inf'))) + + # [BT, BV] @ [BV, BS] -> [BT, BS] + b_dp = tl.dot(b_do, b_v) + b_ds = b_p * (b_dp.to(tl.float32) - b_delta[:, None]) + # [BT, BS] @ [BS, BK] -> [BT, BK] + b_dq += tl.dot(b_ds.to(b_k.dtype), tl.trans(b_k)) + # [BT] + b_dg += tl.sum(b_ds, 1) + + for i_s in range(i_t * BT - BS, -BS, -BS): + p_k = tl.make_block_ptr(k + (bos * H + i_h) * K, (K, T), (1, H*K), (0, i_s), (BK, BS), (0, 1)) + p_v = tl.make_block_ptr(v + (bos * H + i_h) * V, (V, T), (1, H*V), (i_v * BV, i_s), (BV, BS), (0, 1)) + p_gk = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + + # [BK, BS] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BV, BS] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BS,] + b_gk = tl.load(p_gk, boundary_check=(0,)).to(tl.float32) + + b_gn = tl.load(g + (bos + min(i_s + BS, T) - 1) * HQ + i_hq).to(tl.float32) + b_gp = tl.load(g + (bos + i_s - 1) * HQ + i_hq).to(tl.float32) if i_s % BT > 0 else 0. + # [BT, BS] + b_s = tl.dot(b_q, b_k) + (b_gq - b_lse)[:, None] + (b_gn - b_gk)[None, :] + b_p = exp(b_s) + # [BT, BV] @ [BV, BS] -> [BT, BS] + b_dp = tl.dot(b_do, b_v) + b_ds = b_p * (b_dp - b_delta[:, None]) + # [BT, BS] @ [BS, BK] -> [BT, BK] + b_dq += tl.dot(b_ds.to(b_k.dtype), tl.trans(b_k)) + # [BT] + b_dg += tl.sum(b_ds, 1) + + b_gq += b_gn - b_gp + + b_dq *= scale + + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dg, b_dg.to(p_dg.dtype.element_ty), boundary_check=(0,)) + + +@triton.heuristics({ + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in [1, 2, 4, 8] + for num_stages in [2, 3, 4] + ], + key=['B', 'H', 'G', 'K', 'V', 'BK', 'BV'], +) +@triton.jit(do_not_specialize=['T']) +def parallel_forgetting_attn_bwd_kernel_dkv( + q, + k, + v, + g, + lse, + delta, + do, + dk, + dv, + dg, + offsets, + indices, + scale, + T, + B: tl.constexpr, + H: tl.constexpr, + HQ: tl.constexpr, + G: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_OFFSETS: tl.constexpr +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_hq = i_bh // HQ, i_bh % HQ + i_h = i_hq // G + + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + i_n = i_b + bos, eos = i_n * T, i_n * T + T + + p_k = tl.make_block_ptr(k + (bos * H + i_h) * K, (T, K), (H*K, 1), (i_t * BT, 0), (BT, BK), (1, 0)) + p_v = tl.make_block_ptr(v + (bos * H + i_h) * V, (T, V), (H*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_gk = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + p_dk = tl.make_block_ptr(dk + (bos * HQ + i_hq) * K, (T, K), (HQ*K, 1), (i_t * BT, 0), (BT, BK), (1, 0)) + p_dv = tl.make_block_ptr(dv + (bos * HQ + i_hq) * V, (T, V), (HQ*V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_dg = tl.make_block_ptr(dg + (bos * HQ + i_hq), (T,), (HQ,), (i_t * BT,), (BT,), (0,)) + + # [BT, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_dk = tl.zeros([BT, BK], dtype=tl.float32) + # [BT, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_dv = tl.zeros([BT, BV], dtype=tl.float32) + # [BT] + b_gk = tl.load(p_gk, boundary_check=(0,)).to(tl.float32) + b_dg = tl.zeros([BT,], dtype=tl.float32) + + o_k = i_t * BT + tl.arange(0, BT) + m_k = o_k < T + for i_s in range(i_t * BT, min((i_t + 1) * BT, T), BS): + p_q = tl.make_block_ptr(q + (bos * HQ + i_hq) * K, (T, K), (HQ*K, 1), (i_s, 0), (BS, BK), (1, 0)) + p_do = tl.make_block_ptr(do + (bos * HQ + i_hq) * V, (T, V), (HQ*V, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + p_lse = tl.make_block_ptr(lse + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + p_delta = tl.make_block_ptr(delta + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + p_gq = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + + # [BS] + o_q = i_s + tl.arange(0, BS) + # [BS, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BS, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BS] + b_lse = tl.load(p_lse, boundary_check=(0,)) + b_delta = tl.load(p_delta, boundary_check=(0,)) + b_gq = tl.load(p_gq, boundary_check=(0,)).to(tl.float32) + + m_q = o_q < T + m_s = (o_k[:, None] <= o_q[None, :]) & m_k[:, None] & m_q[None, :] + # [BT, BS] + b_s = tl.dot(b_k, tl.trans(b_q)) - b_gk[:, None] + (b_gq - b_lse)[None, :] + b_p = tl.where(m_s, exp(b_s), 0) + # [BT, BS] @ [BS, BV] -> [BT, BV] + b_dv += tl.dot(b_p.to(b_do.dtype), b_do) + # [BT, BV] @ [BV, BS] -> [BT, BS] + b_dp = tl.dot(b_v, tl.trans(b_do)) + # [BT, BS] + b_ds = b_p * (b_dp - b_delta[None, :]) + # [BT, BS] @ [BS, BK] -> [BT, BK] + b_dk += tl.dot(b_ds.to(b_q.dtype), b_q) + # [BT] + b_dg -= tl.sum(b_ds, 1) + + b_gk -= tl.load(g + (bos + min((i_t + 1) * BT, T) - 1) * HQ + i_hq).to(tl.float32) + for i_s in range((i_t + 1) * BT, T, BS): + p_q = tl.make_block_ptr(q + (bos * HQ + i_hq) * K, (T, K), (HQ*K, 1), (i_s, 0), (BS, BK), (1, 0)) + p_do = tl.make_block_ptr(do + (bos * HQ + i_hq) * V, (T, V), (HQ*V, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + p_lse = tl.make_block_ptr(lse + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + p_delta = tl.make_block_ptr(delta + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + p_gq = tl.make_block_ptr(g + bos * HQ + i_hq, (T,), (HQ,), (i_s,), (BS,), (0,)) + + # [BS] + o_q = i_s + tl.arange(0, BS) + # [BS, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + # [BS, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BS] + b_lse = tl.load(p_lse, boundary_check=(0,)) + b_delta = tl.load(p_delta, boundary_check=(0,)) + b_gq = tl.load(p_gq, boundary_check=(0,)).to(tl.float32) + + b_gn = tl.load(g + (bos + min(i_s + BS, T) - 1) * HQ + i_hq).to(tl.float32) + b_gp = tl.load(g + (bos + i_s - 1) * HQ + i_hq).to(tl.float32) if i_s % BT > 0 else 0. + # [BT, BS] + b_s = tl.dot(b_k, tl.trans(b_q)) - (b_gk + b_gp)[:, None] + (b_gq - b_lse)[None, :] + b_p = exp(b_s) + # [BT, BS] @ [BS, BV] -> [BT, BV] + b_dv += tl.dot(b_p.to(b_do.dtype), b_do) + # [BT, BV] @ [BV, BS] -> [BT, BS] + b_dp = tl.dot(b_v, tl.trans(b_do)) + # [BT, BS] + b_ds = b_p * (b_dp - b_delta[None, :]) + # [BT, BS] @ [BS, BK] -> [BT, BK] + b_dk += tl.dot(b_ds.to(b_q.dtype), b_q) + # [BT] + b_dg -= tl.sum(b_ds, 1) + + b_gk -= b_gn - b_gp + + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dg, b_dg.to(p_dg.dtype.element_ty), boundary_check=(0,)) + + +def parallel_forgetting_attn_fwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + scale: float, + chunk_size: int = 128, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, +): + B, T, H, K, V = *k.shape, v.shape[-1] + HQ = q.shape[2] + G = HQ // H + BT = chunk_size + BK = max(16, triton.next_power_of_2(K)) + assert V <= 256, "V must be less than or equal to 256" + if check_shared_mem('hopper'): + BS = min(64, max(16, triton.next_power_of_2(T))) + else: + BS = min(32, max(16, triton.next_power_of_2(T))) + BV = min(256, max(16, triton.next_power_of_2(V))) + NV = triton.cdiv(V, BV) + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + o = torch.empty(B, T, HQ, V, dtype=v.dtype, device=q.device) + lse = torch.empty(B, T, HQ, dtype=torch.float, device=q.device) + + grid = (NV, NT, B * HQ) + parallel_forgetting_attn_fwd_kernel[grid]( + q=q, + k=k, + v=v, + g=g, + o=o, + lse=lse, + scale=scale, + offsets=offsets, + indices=indices, + B=B, + T=T, + H=H, + HQ=HQ, + G=G, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV, + ) + return o, lse + + +def parallel_forgetting_attn_bwd_preprocess( + o: torch.Tensor, + do: torch.Tensor +): + V = o.shape[-1] + delta = torch.empty_like(o[..., 0], dtype=torch.float) + parallel_forgetting_attn_bwd_kernel_preprocess[(delta.numel(),)]( + o=o, + do=do, + delta=delta, + B=triton.next_power_of_2(V), + V=V, + ) + return delta + + +def parallel_forgetting_attn_bwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + o: torch.Tensor, + lse: torch.Tensor, + do: torch.Tensor, + scale: float = None, + chunk_size: int = 128, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, +): + B, T, H, K, V = *k.shape, v.shape[-1] + HQ = q.shape[2] + G = HQ // H + BT = chunk_size + BS = min(32, max(16, triton.next_power_of_2(T))) + BK = max(16, triton.next_power_of_2(K)) + BV = max(16, triton.next_power_of_2(V)) + NV = triton.cdiv(V, BV) + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + delta = parallel_forgetting_attn_bwd_preprocess(o, do) + dq = q.new_empty(B, T, HQ, K, dtype=q.dtype) + dk = q.new_empty(B, T, HQ, K, dtype=k.dtype if H == HQ else torch.float) + dv = q.new_empty(B, T, HQ, V, dtype=v.dtype if H == HQ else torch.float) + dg = q.new_empty(g.shape, dtype=torch.float) + # NOTE: the original `dg` can be destroyed during autotuning + # this is [a known triton issue](https://github.com/triton-lang/triton/issues/5082), which will be fixed in 3.3 (?) + # so we need to make a copy of `dg` + dg2 = q.new_empty(g.shape, dtype=torch.float) + grid = (NV, NT, B * HQ) + parallel_forgetting_attn_bwd_kernel_dq[grid]( + q=q, + k=k, + v=v, + g=g, + lse=lse, + delta=delta, + do=do, + dq=dq, + dg=dg, + offsets=offsets, + indices=indices, + scale=scale, + T=T, + B=B, + H=H, + HQ=HQ, + G=G, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV + ) + parallel_forgetting_attn_bwd_kernel_dkv[grid]( + q=q, + k=k, + v=v, + g=g, + lse=lse, + delta=delta, + do=do, + dk=dk, + dv=dv, + dg=dg2, + offsets=offsets, + indices=indices, + scale=scale, + T=T, + B=B, + H=H, + HQ=HQ, + G=G, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV + ) + dk = reduce(dk, 'b t (h g) k -> b t h k', g=G, reduction='sum') + dv = reduce(dv, 'b t (h g) v -> b t h v', g=G, reduction='sum') + dg = dg.add_(dg2) + return dq, dk, dv, dg + + +@torch.compile +class ParallelForgettingAttentionFunction(torch.autograd.Function): + + @staticmethod + @input_guard + @autocast_custom_fwd + def forward(ctx, q, k, v, g, scale, offsets): + ctx.dtype = q.dtype + if check_shared_mem('hopper'): + chunk_size = min(128, max(16, triton.next_power_of_2(q.shape[1]))) + else: + chunk_size = min(64, max(16, triton.next_power_of_2(q.shape[1]))) + # 2-d indices denoting the offsets of chunks in each sequence + # for example, if the passed `offsets` is [0, 100, 356] and `chunk_size` is 64, + # then there are 2 and 4 chunks in the 1st and 2nd sequences respectively, and `indices` will be + # [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [1, 3]] + indices = prepare_chunk_indices(offsets, chunk_size) if offsets is not None else None + + g = chunk_local_cumsum(g, chunk_size, offsets=offsets, indices=indices, head_first=False) + o, lse = parallel_forgetting_attn_fwd( + q=q, + k=k, + v=v, + g=g, + scale=scale, + chunk_size=chunk_size, + offsets=offsets, + indices=indices + ) + ctx.save_for_backward(q, k, v, g, o, lse) + ctx.chunk_size = chunk_size + ctx.offsets = offsets + ctx.indices = indices + ctx.scale = scale + return o.to(q.dtype) + + @staticmethod + @input_guard + @autocast_custom_bwd + def backward(ctx, do): + q, k, v, g, o, lse = ctx.saved_tensors + dq, dk, dv, dg = parallel_forgetting_attn_bwd( + q=q, + k=k, + v=v, + g=g, + o=o, + lse=lse, + do=do, + scale=ctx.scale, + chunk_size=ctx.chunk_size, + offsets=ctx.offsets, + indices=ctx.indices + ) + dg = chunk_global_cumsum(dg, reverse=True, head_first=False, offsets=ctx.offsets) + return dq.to(q), dk.to(k), dv.to(v), dg.to(g), None, None, None, None, None, None, None, None + + +def parallel_forgetting_attn( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + scale: Optional[float] = None, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = False +) -> torch.Tensor: + r""" + Args: + q (torch.Tensor): + queries of shape `[B, T, HQ, K]` if `head_first=False` else `[B, HQ, T, K]`. + k (torch.Tensor): + keys of shape `[B, T, H, K]` if `head_first=False` else `[B, H, T, K]`. + GQA will be applied if HQ is divisible by H. + v (torch.Tensor): + values of shape `[B, T, H, V]` if `head_first=False` else `[B, H, T, V]`. + g (torch.Tensor): + Forget gates (in **log space**) of shape `[B, T, HQ]` if `head_first=False` else `[B, HQ, T]`. + scale (Optional[int]): + Scale factor for attention scores. + If not provided, it will default to `1 / sqrt(K)`. Default: `None`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + head_first (Optional[bool]): + Whether the inputs are in the head-first format. Default: `False`. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, T, HQ, V]` if `head_first=False` else `[B, HQ, T, V]`. + """ + if scale is None: + scale = k.shape[-1] ** -0.5 + if cu_seqlens is not None: + assert q.shape[0] == 1, "batch size must be 1 when cu_seqlens are provided" + if g is not None: + g = g.float() + if head_first: + q, k, v = map(lambda x: rearrange(x, 'b h t d -> b t h d'), (q, k, v)) + g = rearrange(g, 'b h t -> b t h') + o = ParallelForgettingAttentionFunction.apply(q, k, v, g, scale, cu_seqlens) + if head_first: + o = rearrange(o, 'b t h d -> b h t d') + return o diff --git a/fla/ops/gated_delta_rule/__init__.py b/fla/ops/gated_delta_rule/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad7f86639b3482c78768cf0511d2eb2650305e7f --- /dev/null +++ b/fla/ops/gated_delta_rule/__init__.py @@ -0,0 +1,7 @@ +from .chunk import chunk_gated_delta_rule +from .fused_recurrent import fused_recurrent_gated_delta_rule + +__all__ = [ + "chunk_gated_delta_rule", + "fused_recurrent_gated_delta_rule" +] diff --git a/fla/ops/gated_delta_rule/chunk.py b/fla/ops/gated_delta_rule/chunk.py new file mode 100644 index 0000000000000000000000000000000000000000..abbb52a56fbaf62a4c818c32217dc8c95a0e2292 --- /dev/null +++ b/fla/ops/gated_delta_rule/chunk.py @@ -0,0 +1,392 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional + +import torch +import triton +from einops import rearrange + +from fla.modules.l2norm import l2norm_bwd, l2norm_fwd +from fla.ops.common.chunk_delta_h import chunk_gated_delta_rule_bwd_dhu, chunk_gated_delta_rule_fwd_h +from fla.ops.common.chunk_o import chunk_bwd_dqkwg, chunk_bwd_dv_local, chunk_fwd_o +from fla.ops.gated_delta_rule.wy_fast import bwd_prepare_wy_repr, fwd_prepare_wy_repr, fwd_recompute_w_u +from fla.ops.utils import chunk_local_cumsum +from fla.utils import autocast_custom_bwd, autocast_custom_fwd, input_guard + + +def chunk_gated_delta_rule_fwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + beta: torch.Tensor, + scale: float, + initial_state: torch.Tensor, + output_final_state: bool, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +): + g = chunk_local_cumsum(g, chunk_size, offsets=offsets, indices=indices, head_first=head_first) + # obtain WY representation. u is actually the new v. + w, u, Aw, Au = fwd_prepare_wy_repr( + k=k, + v=v, + beta=beta, + g=g, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=chunk_size + ) + + h, v_new, final_state = chunk_gated_delta_rule_fwd_h( + k=k, + w=w, + u=u, + g=g, + initial_state=initial_state, + output_final_state=output_final_state, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=chunk_size + ) + + # obtain output + o = chunk_fwd_o( + q=q, + k=k, + v=v_new, + h=h, + g=g, + scale=scale, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=chunk_size + ) + return g, o, Aw, Au, final_state + + +def chunk_gated_delta_rule_bwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + beta: torch.Tensor, + Aw: torch.Tensor, + Au: torch.Tensor, + scale: float, + initial_state: torch.Tensor, + do: torch.Tensor, + dht: torch.Tensor, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, + head_first: bool = True, + chunk_size: int = 64 +): + T = q.shape[2] if head_first else q.shape[1] + BT = min(chunk_size, max(triton.next_power_of_2(T), 16)) + w, u = fwd_recompute_w_u( + k=k, + v=v, + beta=beta, + Aw=Aw, + Au=Au, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=BT + ) + h, v_new, _ = chunk_gated_delta_rule_fwd_h( + k=k, + w=w, + u=u, + g=g, + initial_state=initial_state, + output_final_state=False, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=BT + ) + dv = chunk_bwd_dv_local( + q=q, + k=k, + g=g, + do=do, + dh=None, + scale=scale, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=BT + ) + dh, dh0, dv = chunk_gated_delta_rule_bwd_dhu( + q=q, + k=k, + w=w, + g=g, + h0=initial_state, + dht=dht, + do=do, + dv=dv, + scale=scale, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=BT + ) + dq, dk, dw, dg = chunk_bwd_dqkwg( + q=q, + k=k, + v=v_new, + w=w, + g=g, + h=h, + dv=dv, + do=do, + dh=dh, + scale=scale, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=BT + ) + dk2, dv, db, dg2 = bwd_prepare_wy_repr( + k=k, + v=v, + beta=beta, + g=g, + Aw=Aw, + Au=Au, + dw=dw, + du=dv, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=BT + ) + dk.add_(dk2) + dg.add_(dg2) + assert dg.dtype == torch.float32, "dg should be fp32" + dg = chunk_local_cumsum(dg, chunk_size, reverse=True, offsets=offsets, indices=indices, head_first=head_first) + return dq, dk, dv, db, dg, dh0 + + +class ChunkGatedDeltaRuleFunction(torch.autograd.Function): + + @staticmethod + @input_guard + @autocast_custom_fwd + def forward( + ctx, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + beta: torch.Tensor, + scale: float, + initial_state: torch.Tensor, + output_final_state: bool, + offsets: Optional[torch.LongTensor] = None, + head_first: bool = True, + use_qk_l2norm_in_kernel: bool = False + ): + chunk_size = 64 + q_orig = q + k_orig = k + + if use_qk_l2norm_in_kernel: + q = l2norm_fwd(q) + k = l2norm_fwd(k) + + # 2-d indices denoting the offsets of chunks in each sequence + # for example, if the passed `offsets` is [0, 100, 356] and `chunk_size` is 64, + # then there are 2 and 4 chunks in the 1st and 2nd sequences respectively, and `indices` will be + # [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [1, 3]] + indices = None + if offsets is not None: + indices = torch.cat([torch.arange(n) for n in triton.cdiv(offsets[1:] - offsets[:-1], chunk_size).tolist()]) + indices = torch.stack([indices.eq(0).cumsum(0) - 1, indices], 1).to(offsets) + + g, o, Aw, Au, final_state = chunk_gated_delta_rule_fwd( + q=q, + k=k, + v=v, + g=g, + beta=beta, + scale=scale, + initial_state=initial_state, + output_final_state=output_final_state, + offsets=offsets, + indices=indices, + head_first=head_first, + chunk_size=chunk_size, + ) + ctx.save_for_backward(q_orig, k_orig, v, g, beta, Aw, Au, initial_state, offsets, indices) + ctx.chunk_size = chunk_size + ctx.scale = scale + ctx.head_first = head_first + ctx.use_qk_l2norm_in_kernel = use_qk_l2norm_in_kernel + return o.to(q.dtype), final_state + + @staticmethod + @input_guard + @autocast_custom_bwd + def backward( + ctx, + do: torch.Tensor, + dht: torch.Tensor + ): + q, k, v, g, beta, Aw, Au, initial_state, offsets, indices = ctx.saved_tensors + if ctx.use_qk_l2norm_in_kernel: + q, q_orig = l2norm_fwd(q), q + k, k_orig = l2norm_fwd(k), k + dq, dk, dv, db, dg, dh0 = chunk_gated_delta_rule_bwd( + q=q, + k=k, + v=v, + g=g, + beta=beta, + Aw=Aw, + Au=Au, + scale=ctx.scale, + initial_state=initial_state, + do=do, + dht=dht, + offsets=offsets, + indices=indices, + head_first=ctx.head_first, + chunk_size=ctx.chunk_size + ) + if ctx.use_qk_l2norm_in_kernel: + dq = l2norm_bwd(q_orig, dq) + dk = l2norm_bwd(k_orig, dk) + return dq.to(q), dk.to(k), dv.to(v), dg.to(g), db.to(beta), None, dh0, None, None, None, None + + +@torch.compiler.disable +def chunk_gated_delta_rule( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + beta: torch.Tensor, + scale: float = None, + initial_state: torch.Tensor = None, + output_final_state: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = False, + use_qk_l2norm_in_kernel: bool = False +): + r""" + Args: + q (torch.Tensor): + queries of shape `[B, T, H, K]` if `head_first=False` else `[B, H, T, K]`. + k (torch.Tensor): + keys of shape `[B, T, H, K]` if `head_first=False` else `[B, H, T, K]`. + v (torch.Tensor): + values of shape `[B, T, H, V]` if `head_first=False` else `[B, H, T, V]`. + g (torch.Tensor): + (forget) gating tensor (in log space!) of shape `[B, T, H]` if `head_first=False` else `[B, H, T]`. + beta (torch.Tensor): + betas of shape `[B, T, H]` if `head_first=False` else `[B, H, T]`. + scale (Optional[int]): + Scale factor for the RetNet attention scores. + If not provided, it will default to `1 / sqrt(K)`. Default: `None`. + initial_state (Optional[torch.Tensor]): + Initial state of shape `[N, H, K, V]` for `N` input sequences. + For equal-length input sequences, `N` equals the batch size `B`. + Default: `None`. + output_final_state (Optional[bool]): + Whether to output the final state of shape `[N, H, K, V]`. Default: `False`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + head_first (Optional[bool]): + Whether the inputs are in the head-first format, which is not supported for variable-length inputs. + Default: `False`. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, T, H, V]` if `head_first=False` else `[B, H, T, V]`. + final_state (torch.Tensor): + Final state of shape `[N, H, K, V]` if `output_final_state=True` else `None`. + + Examples:: + >>> import torch + >>> import torch.nn.functional as F + >>> from einops import rearrange + >>> from fla.ops.gated_delta_rule import chunk_gated_delta_rule + # inputs with equal lengths + >>> B, T, H, K, V = 4, 2048, 4, 512, 512 + >>> q = torch.randn(B, T, H, K, dtype=torch.bfloat16, device='cuda') + >>> k = F.normalize(torch.randn(B, T, H, K, dtype=torch.bfloat16, device='cuda'), p=2, dim=-1) + >>> v = torch.randn(B, T, H, V, dtype=torch.bfloat16, device='cuda') + >>> beta = torch.rand(B, T, H, dtype=torch.bfloat16, device='cuda').sigmoid() + >>> g = F.logsigmoid(torch.rand(B, T, H, dtype=torch.bfloat16, device='cuda')) + >>> h0 = torch.randn(B, H, K, V, dtype=torch.bfloat16, device='cuda') + >>> o, ht = chunk_gated_delta_rule( + q, k, v, g, beta, + initial_state=h0, + output_final_state=True, + head_first=False + ) + # for variable-length inputs, the batch size `B` is expected to be 1 and `cu_seqlens` is required + >>> q, k, v, beta, g = map(lambda x: rearrange(x, 'b t ... -> 1 (b t) ...'), (q, k, v, beta, g)) + # for a batch with 4 sequences, `cu_seqlens` with 5 start/end positions are expected + >>> cu_seqlens = q.new_tensor([0, 2048, 4096, 6144, 8192], dtype=torch.long) + >>> o_var, ht_var = chunk_gated_delta_rule( + q, k, v, g, beta, + initial_state=h0, + output_final_state=True, + cu_seqlens=cu_seqlens, + head_first=False + ) + """ + assert q.dtype == k.dtype == v.dtype + assert q.dtype != torch.float32, "ChunkGatedDeltaRuleFunction does not support float32. Please use bfloat16." + assert len(beta.shape) == 3, "beta must be of shape [B, H, T] if head_first=True, or [B, T, H] if head_first=False." + + if cu_seqlens is not None: + if q.shape[0] != 1: + raise ValueError( + f"The batch size is expected to be 1 rather than {q.shape[0]} when using `cu_seqlens`." + f"Please flatten variable-length inputs before processing." + ) + if head_first: + raise RuntimeError( + "Sequences with variable lengths are not supported for head-first mode" + ) + if initial_state is not None and initial_state.shape[0] != len(cu_seqlens) - 1: + raise ValueError( + f"The number of initial states is expected to be equal to the number of input sequences, " + f"i.e., {len(cu_seqlens) - 1} rather than {initial_state.shape[0]}." + ) + if head_first: + q, k, v = map(lambda x: rearrange(x, 'b h t d -> b t h d'), (q, k, v)) + beta, g = map(lambda x: rearrange(x, 'b h t -> b t h'), (beta, g)) + if scale is None: + scale = k.shape[-1] ** -0.5 + else: + assert scale > 0, "Scale must be positive." + o, final_state = ChunkGatedDeltaRuleFunction.apply( + q, + k, + v, + g, + beta, + scale, + initial_state, + output_final_state, + cu_seqlens, + False, + use_qk_l2norm_in_kernel + ) + if head_first: + o = rearrange(o, 'b t h v -> b h t v') + return o, final_state diff --git a/fla/ops/generalized_delta_rule/__init__.py b/fla/ops/generalized_delta_rule/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f4b4155a215ca8c44ea45d6b151b1e584872ed6c --- /dev/null +++ b/fla/ops/generalized_delta_rule/__init__.py @@ -0,0 +1,9 @@ +from .dplr import chunk_dplr_delta_rule, fused_recurrent_dplr_delta_rule +from .iplr import chunk_iplr_delta_rule, fused_recurrent_iplr_delta_rule + +__all__ = [ + 'chunk_dplr_delta_rule', + 'fused_recurrent_dplr_delta_rule', + 'chunk_iplr_delta_rule', + 'fused_recurrent_iplr_delta_rule' +] diff --git a/fla/ops/gla/fused_recurrent.py b/fla/ops/gla/fused_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..d211541d789809ee89a688b380626026b1dbed88 --- /dev/null +++ b/fla/ops/gla/fused_recurrent.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch + +from fla.ops.common.fused_recurrent import fused_recurrent + + +def fused_recurrent_gla( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + gk: Optional[torch.Tensor] = None, + gv: Optional[torch.Tensor] = None, + scale: Optional[int] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + reverse: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = True +) -> Tuple[torch.Tensor, torch.Tensor]: + r""" + Args: + q (torch.Tensor): + queries of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + k (torch.Tensor): + keys of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + v (torch.Tensor): + values of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + gk (torch.Tensor): + Forget gates of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]` applied to keys. + gv (torch.Tensor): + Forget gates of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]` applied to values. + scale (Optional[int]): + Scale factor for the attention scores. + If not provided, it will default to `1 / sqrt(K)`. Default: `None`. + initial_state (Optional[torch.Tensor]): + Initial state of shape `[N, H, K, V]` for `N` input sequences. + For equal-length input sequences, `N` equals the batch size `B`. + Default: `None`. + output_final_state (Optional[bool]): + Whether to output the final state of shape `[N, H, K, V]`. Default: `False`. + reverse (Optional[bool]): + If `True`, process the state passing in reverse order. Default: `False`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + head_first (Optional[bool]): + Whether the inputs are in the head-first format, which is not supported for variable-length inputs. + Default: `True`. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + final_state (torch.Tensor): + Final state of shape `[N, H, K, V]` if `output_final_state=True` else `None`. + + Examples:: + >>> import torch + >>> import torch.nn.functional as F + >>> from einops import rearrange + >>> from fla.ops.gla import fused_recurrent_gla + # inputs with equal lengths + >>> B, T, H, K, V = 4, 2048, 4, 512, 512 + >>> q = torch.randn(B, T, H, K, device='cuda') + >>> k = torch.randn(B, T, H, K, device='cuda') + >>> v = torch.randn(B, T, H, V, device='cuda') + >>> g = F.logsigmoid(torch.randn(B, T, H, K, device='cuda')) + >>> h0 = torch.randn(B, H, K, V, device='cuda') + >>> o, ht = fused_recurrent_gla(q, k, v, g, + initial_state=h0, + output_final_state=True, + head_first=False) + # for variable-length inputs, the batch size `B` is expected to be 1 and `cu_seqlens` is required + >>> q, k, v, g = map(lambda x: rearrange(x, 'b t h d -> 1 (b t) h d'), (q, k, v, g)) + # for a batch with 4 sequences, `cu_seqlens` with 5 start/end positions are expected + >>> cu_seqlens = q.new_tensor([0, 2048, 4096, 6144, 8192], dtype=torch.long) + >>> o_var, ht_var = fused_recurrent_gla(q, k, v, g, + initial_state=h0, + output_final_state=True, + cu_seqlens=cu_seqlens, + head_first=False) + >>> assert o.allclose(o_var.view(o.shape)) + >>> assert ht.allclose(ht_var) + """ + if cu_seqlens is not None: + if q.shape[0] != 1: + raise ValueError(f"The batch size is expected to be 1 rather than {q.shape[0]} when using `cu_seqlens`." + f"Please flatten variable-length inputs before processing.") + if head_first: + raise RuntimeError("Sequences with variable lengths are not supported for head-first mode") + if initial_state is not None and initial_state.shape[0] != len(cu_seqlens) - 1: + raise ValueError(f"The number of initial states is expected to be equal to the number of input sequences, " + f"i.e., {len(cu_seqlens) - 1} rather than {initial_state.shape[0]}.") + if scale is None: + scale = k.shape[-1] ** -0.5 + o, final_state = fused_recurrent( + q=q, + k=k, + v=v, + g=None, + gk=gk, + gv=gv, + scale=scale, + initial_state=initial_state, + output_final_state=output_final_state, + reverse=reverse, + cu_seqlens=cu_seqlens, + head_first=head_first + ) + return o, final_state diff --git a/fla/ops/gla/naive.py b/fla/ops/gla/naive.py new file mode 100644 index 0000000000000000000000000000000000000000..507a7395c0c28b0a9c54008e1735098cd3fbdc85 --- /dev/null +++ b/fla/ops/gla/naive.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +from typing import Optional + +import torch + + +def ceildiv(a, b): + return -(a // -b) + + +def naive_recurrent_gla( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + gk: torch.Tensor, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False +): + dtype = q.dtype + q, k, v, gk = map(lambda x: x.float(), (q, k, v, gk)) + B, H, T, K, V = *q.shape, v.shape[-1] + o = torch.zeros_like(v) + scale = K ** -0.5 + + h = q.new_zeros(B, H, K, V, dtype=torch.float32) + if initial_state is not None: + h += initial_state.float() + + for i in range(T): + q_i = q[:, :, i] * scale + k_i = k[:, :, i] + v_i = v[:, :, i] + gk_i = gk[:, :, i].exp() + kv_i = k_i[..., None] * v_i[..., None, :] + h = h * gk_i[..., None] + kv_i + o[:, :, i] = (q_i[..., None] * h).sum(-2) + + if not output_final_state: + h = None + return o.to(dtype), h diff --git a/fla/ops/gsa/__init__.py b/fla/ops/gsa/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ed8a88014ddfc3143e67d3a48c38a54b75d7f3d6 --- /dev/null +++ b/fla/ops/gsa/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .chunk import chunk_gsa +from .fused_recurrent import fused_recurrent_gsa + +__all__ = [ + 'chunk_gsa', + 'fused_recurrent_gsa' +] diff --git a/fla/ops/hgrn/__init__.py b/fla/ops/hgrn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f2012c3c15f125271df225ce755ed3b2dbe01a83 --- /dev/null +++ b/fla/ops/hgrn/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .chunk import chunk_hgrn +from .fused_recurrent import fused_recurrent_hgrn + +__all__ = [ + 'chunk_hgrn', + 'fused_recurrent_hgrn' +] diff --git a/fla/ops/hgrn/fused_recurrent.py b/fla/ops/hgrn/fused_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a70f0c7e4e12fc3648f1f0c19fc946fb85eb97 --- /dev/null +++ b/fla/ops/hgrn/fused_recurrent.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + +from fla.ops.utils.op import exp +from fla.utils import input_guard + + +@triton.heuristics({ + 'USE_INITIAL_STATE': lambda args: args['h0'] is not None, + 'STORE_FINAL_STATE': lambda args: args['ht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({'BD': BD}, num_warps=num_warps) + for BD in [32, 64, 128] + for num_warps in [1, 2, 4, 8] + ], + key=['D'] +) +@triton.jit(do_not_specialize=['T']) +def fused_recurrent_hgrn_fwd_kernel( + x, + g, + o, + h0, + ht, + offsets, + T, + D: tl.constexpr, + BD: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + STORE_FINAL_STATE: tl.constexpr, + USE_OFFSETS: tl.constexpr +): + i_d, i_n = tl.program_id(0), tl.program_id(1) + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int64), tl.load(offsets + i_n + 1).to(tl.int64) + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + + o_d = i_d * BD + tl.arange(0, BD) + mask = o_d < D + + p_x = x + bos * D + o_d + p_g = g + bos * D + o_d + p_o = o + bos * D + o_d + + b_h = tl.zeros([BD], dtype=tl.float32) + if USE_INITIAL_STATE: + p_h0 = h0 + i_n * D + o_d + b_h += tl.load(p_h0, mask=mask, other=0).to(tl.float32) + for _ in range(0, T): + b_x = tl.load(p_x, mask=mask, other=0).to(tl.float32) + b_g = tl.load(p_g, mask=mask, other=0).to(tl.float32) + b_h = exp(b_g) * b_h + b_x + tl.store(p_o, b_h.to(p_o.dtype.element_ty), mask=mask) + + p_x += D + p_g += D + p_o += D + + if STORE_FINAL_STATE: + p_ht = ht + i_n * D + o_d + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask) + + +@triton.heuristics({ + 'USE_INITIAL_STATE': lambda args: args['h0'] is not None, + 'USE_FINAL_STATE_GRADIENT': lambda args: args['dht'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({'BD': BD}, num_warps=num_warps) + for BD in [32, 64, 128] + for num_warps in [1, 2, 4, 8] + ], + key=['D'] +) +@triton.jit(do_not_specialize=['T']) +def fused_recurrent_hgrn_bwd_kernel( + g, + o, + h0, + dx, + dg, + do, + dht, + dh0, + offsets, + T, + D: tl.constexpr, + BD: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + USE_FINAL_STATE_GRADIENT: tl.constexpr, + USE_OFFSETS: tl.constexpr +): + i_d, i_n = tl.program_id(0), tl.program_id(1) + if USE_OFFSETS: + bos, eos = tl.load(offsets + i_n).to(tl.int64), tl.load(offsets + i_n + 1).to(tl.int64) + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + + o_d = i_d * BD + tl.arange(0, BD) + mask = o_d < D + + p_g = g + (bos + T - 1) * D + o_d + p_o = o + (bos + T - 2) * D + o_d + p_dx = dx + (bos + T - 1) * D + o_d + p_dg = dg + (bos + T - 1) * D + o_d + p_do = do + (bos + T - 1) * D + o_d + + b_dh = tl.zeros([BD], dtype=tl.float32) + if USE_FINAL_STATE_GRADIENT: + p_dht = dht + i_n * D + o_d + b_dh += tl.load(p_dht, mask=mask, other=0).to(tl.float32) + + for i in range(T - 1, -1, -1): + b_g = tl.load(p_g, mask=mask, other=0).to(tl.float32) + b_do = tl.load(p_do, mask=mask, other=0).to(tl.float32) + if i > 0: + b_o = tl.load(p_o, mask=mask, other=0).to(tl.float32) + elif USE_INITIAL_STATE: + b_o = tl.load(h0 + i_n * D + o_d, mask=mask, other=0).to(tl.float32) + else: + b_o = tl.zeros([BD], dtype=tl.float32) + + b_dh = b_dh + b_do + b_dx = b_dh + b_dh = b_dh * exp(b_g) + b_dg = b_dh * b_o + tl.store(p_dx, b_dx.to(p_dx.dtype.element_ty), mask=mask) + tl.store(p_dg, b_dg.to(p_dg.dtype.element_ty), mask=mask) + + p_g -= D + p_o -= D + p_dx -= D + p_dg -= D + p_do -= D + + if USE_INITIAL_STATE: + p_dh0 = dh0 + i_n * D + o_d + tl.store(p_dh0, b_dh.to(p_dh0.dtype.element_ty), mask=mask) + + +def fused_recurrent_hgrn_fwd( + x: torch.Tensor, + g: torch.Tensor, + initial_state: torch.Tensor = None, + output_final_state: bool = False, + offsets: Optional[torch.LongTensor] = None, +) -> Tuple[torch.Tensor, torch.Tensor]: + B, T, D = x.shape + N = B if offsets is None else len(offsets) - 1 + + o = torch.empty_like(x) + final_state = x.new_empty(N, D) if output_final_state else None + + def grid(meta): return (triton.cdiv(D, meta['BD']), N) + fused_recurrent_hgrn_fwd_kernel[grid]( + x=x, + g=g, + o=o, + h0=initial_state, + ht=final_state, + offsets=offsets, + T=T, + D=D + ) + return o, final_state + + +def fused_recurrent_hgrn_bwd( + g: torch.Tensor, + o: torch.Tensor, + do: torch.Tensor, + dht: torch.Tensor = None, + initial_state: torch.Tensor = None, + offsets: Optional[torch.LongTensor] = None +) -> Tuple[torch.Tensor, torch.Tensor]: + B, T, D = do.shape + N = B if offsets is None else len(offsets) - 1 + + dx = torch.empty_like(o, dtype=torch.float) + dg = torch.empty_like(g, dtype=torch.float) + dh0 = torch.empty_like(initial_state, dtype=torch.float) if initial_state is not None else None + def grid(meta): return (triton.cdiv(D, meta['BD']), N) + fused_recurrent_hgrn_bwd_kernel[grid]( + g=g, + o=o, + h0=initial_state, + dx=dx, + dg=dg, + do=do, + dht=dht, + dh0=dh0, + offsets=offsets, + T=T, + D=D + ) + return dx, dg, dh0 + + +class FusedRecurrentHGRNFunction(torch.autograd.Function): + + @staticmethod + @input_guard + def forward( + ctx, + x: torch.Tensor, + g: torch.Tensor, + initial_state: torch.Tensor = None, + output_final_state: bool = False, + offsets: Optional[torch.LongTensor] = None + ): + o, ht = fused_recurrent_hgrn_fwd( + x=x, + g=g, + initial_state=initial_state, + output_final_state=output_final_state, + offsets=offsets + ) + ctx.save_for_backward(g, o, initial_state) + ctx.offsets = offsets + return o, ht + + @staticmethod + @input_guard + def backward(ctx, do, dht=None): + g, o, initial_state = ctx.saved_tensors + offsets = ctx.offsets + + dx, dg, dh0 = fused_recurrent_hgrn_bwd( + g=g, + o=o, + do=do, + dht=dht, + initial_state=initial_state, + offsets=offsets + ) + return dx, dg, dh0, None, None + + +@torch.compiler.disable +def fused_recurrent_hgrn( + x: torch.Tensor, + g: torch.Tensor, + initial_state: torch.Tensor = None, + output_final_state: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, +) -> Tuple[torch.Tensor, torch.Tensor]: + r""" + Args: + x (torch.Tensor): + inputs of shape `[B, T, D]. + g (torch.Tensor): + Forget gates of shape `[B, T, D]`. + initial_state (Optional[torch.Tensor]): + Initial state of shape `[N, D]` for `N` input sequences. + For equal-length input sequences, `N` equals the batch size `B`. + Default: `None`. + output_final_state (Optional[bool]): + Whether to output the final state of shape `[N, D]`. Default: `False`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, T, D]`. + final_state (torch.Tensor): + Final state of shape `[N, D]` if `output_final_state=True` else `None`. + + Examples:: + >>> import torch + >>> import torch.nn.functional as F + >>> from einops import rearrange + >>> from fla.ops.hgrn import fused_recurrent_hgrn + # inputs with equal lengths + >>> B, T, D = 4, 2048, 512 + >>> x = torch.randn(B, T, D, device='cuda') + >>> g = F.logsigmoid(torch.randn(B, T, D, device='cuda')) + >>> h0 = torch.randn(B, D, device='cuda') + >>> o, ht = fused_recurrent_hgrn(x, g, initial_state=h0, output_final_state=True) + # for variable-length inputs, the batch size `B` is expected to be 1 and `cu_seqlens` is required + >>> x, g = map(lambda x: rearrange(x, 'b t d -> 1 (b t) d'), (x, g)) + # for a batch with 4 sequences, `cu_seqlens` with 5 start/end positions are expected + >>> cu_seqlens = x.new_tensor([0, 2048, 4096, 6144, 8192], dtype=torch.long) + >>> o_var, ht_var = fused_recurrent_hgrn(x, g, initial_state=h0, output_final_state=True, cu_seqlens=cu_seqlens) + >>> assert o.allclose(o_var.view(o.shape)) + >>> assert ht.allclose(ht_var) + """ + return FusedRecurrentHGRNFunction.apply( + x, + g, + initial_state, + output_final_state, + cu_seqlens + ) diff --git a/fla/ops/lightning_attn/fused_recurrent.py b/fla/ops/lightning_attn/fused_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..6548188b7b617a994316696b8ee1237b064029c4 --- /dev/null +++ b/fla/ops/lightning_attn/fused_recurrent.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch + +from fla.ops.simple_gla.fused_recurrent import fused_recurrent_simple_gla + + +def fused_recurrent_lightning_attn( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + layer_idx: int, + num_layers: int, + scale: Optional[float] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + reverse: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = True +) -> Tuple[torch.Tensor, torch.Tensor]: + r""" + Args: + q (torch.Tensor): + queries of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + k (torch.Tensor): + keys of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + v (torch.Tensor): + values of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + layer_idx (int): + The index of the current layer. + num_layers (int): + The total number of layers. Both `layer_idx` and `num_layers` are used to compute the decay factor. + scale (Optional[int]): + Scale factor for the attention scores. + If not provided, it will default to `1 / sqrt(K)`. Default: `None`. + initial_state (Optional[torch.Tensor]): + Initial state of shape `[N, H, K, V]` for `N` input sequences. + For equal-length input sequences, `N` equals the batch size `B`. + Default: `None`. + output_final_state (Optional[bool]): + Whether to output the final state of shape `[N, H, K, V]`. Default: `False`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + head_first (Optional[bool]): + Whether the inputs are in the head-first format, which is not supported for variable-length inputs. + Default: `True`. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + final_state (torch.Tensor): + Final state of shape `[N, H, K, V]` if `output_final_state=True` else `None`. + """ + H = q.shape[1] if head_first else q.shape[2] + s = -(8 / H * (1 - layer_idx / num_layers)) * q.new_tensor(range(H), dtype=torch.float) + if head_first: + g = s[None, :, None].expand(q.shape[0], q.shape[1], q.shape[2]).contiguous() + else: + g = s[None, None, :].expand(q.shape[0], q.shape[1], q.shape[2]).contiguous() + return fused_recurrent_simple_gla( + q=q, + k=k, + v=v, + g=g, + scale=scale, + initial_state=initial_state, + output_final_state=output_final_state, + reverse=reverse, + cu_seqlens=cu_seqlens, + head_first=head_first + ) diff --git a/fla/ops/nsa/__init__.py b/fla/ops/nsa/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..941a1be41e1650961af0d28e64837421826ffab2 --- /dev/null +++ b/fla/ops/nsa/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from .naive import naive_nsa +from .parallel import parallel_nsa + +__all__ = [ + 'naive_nsa', + 'parallel_nsa' +] diff --git a/fla/ops/retention/__init__.py b/fla/ops/retention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a38ab43c9982c9751bb9db146b9d9fe05663964a --- /dev/null +++ b/fla/ops/retention/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- + +from .chunk import chunk_retention +from .fused_chunk import fused_chunk_retention +from .fused_recurrent import fused_recurrent_retention +from .parallel import parallel_retention + +__all__ = [ + 'chunk_retention', + 'fused_chunk_retention', + 'parallel_retention', + 'fused_recurrent_retention' +] diff --git a/fla/ops/retention/fused_recurrent.py b/fla/ops/retention/fused_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..b84eb83e739d16ad44485c8a7166be7e9e08e775 --- /dev/null +++ b/fla/ops/retention/fused_recurrent.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch + +from fla.ops.simple_gla.fused_recurrent import fused_recurrent_simple_gla + + +def fused_recurrent_retention( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + scale: Optional[float] = None, + initial_state: Optional[torch.Tensor] = None, + output_final_state: bool = False, + reverse: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = True +) -> Tuple[torch.Tensor, torch.Tensor]: + if head_first: + n_heads = q.shape[1] + else: + n_heads = q.shape[2] + s = (1 - q.new_tensor(2., dtype=torch.float).pow(-5. - q.new_tensor(range(n_heads), dtype=torch.float))).log() + if head_first: + g = s[None, :, None].expand(q.shape[0], q.shape[1], q.shape[2]).contiguous() + else: + g = s[None, None, :].expand(q.shape[0], q.shape[1], q.shape[2]).contiguous() + return fused_recurrent_simple_gla( + q=q, + k=k, + v=v, + g=g, + scale=scale, + initial_state=initial_state, + output_final_state=output_final_state, + reverse=reverse, + cu_seqlens=cu_seqlens, + head_first=head_first + ) diff --git a/fla/ops/retention/naive.py b/fla/ops/retention/naive.py new file mode 100644 index 0000000000000000000000000000000000000000..15611bf649779d2d956d2ab390b7d72dbb12201d --- /dev/null +++ b/fla/ops/retention/naive.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +import torch + + +def naive_retention(q, k, v): + orig_type = q.dtype + q, k, v = q.float(), k.float(), v.float() + _, n_heads, seq_len, d_head = q.shape + s = (1 - q.new_tensor(2., dtype=torch.float).pow(-5. - q.new_tensor(range(n_heads), dtype=torch.float))).log2() + n = q.new_tensor(range(seq_len), dtype=torch.float) + n = torch.exp2((n.unsqueeze(-1) - n) * s.view(-1, 1, 1)) * n.unsqueeze(-1).ge(n) + s = torch.einsum('bhqd,bhkd,hqk->bhqk', q * d_head ** -0.5, k, n.to(q.dtype)) + o = torch.einsum('bhqk,bhkd->bhqd', s, v) + return o.to(orig_type) diff --git a/fla/ops/rwkv7/channel_mixing.py b/fla/ops/rwkv7/channel_mixing.py new file mode 100644 index 0000000000000000000000000000000000000000..991ea426f086859b8eaf0623f5acf059f9bddc5c --- /dev/null +++ b/fla/ops/rwkv7/channel_mixing.py @@ -0,0 +1,323 @@ +import logging + +import torch +import triton +import triton.language as tl + +from fla.utils import autocast_custom_bwd, autocast_custom_fwd, check_pytorch_version, input_guard, use_cuda_graph + +logger = logging.getLogger(__name__) + +if not check_pytorch_version('2.4'): + logger.warning('PyTorch < 2.4 detected - computations may be slower due to lack of optimizations') + + +@triton.autotune( + configs=[ + triton.Config({'BLOCK_SIZE': block_size}) + for block_size in [128, 256, 512, 1024, 2048, 4096, 8192] + ], + key=['hidden_dim'], + use_cuda_graph=use_cuda_graph, +) +@triton.jit +def rwkv_seq_mix_kernel( + x_ptr, + x_prev_ptr, + mix_k_ptr, + output_ptr, + batch_size: tl.constexpr, + token_length, + hidden_dim: tl.constexpr, + BLOCK_SIZE: tl.constexpr +): + block_start = tl.program_id(0) * BLOCK_SIZE + block_idx = block_start + tl.arange(0, BLOCK_SIZE)[:] + + total_seq_dim = token_length * hidden_dim + batch_idx = block_idx // total_seq_dim + seq_and_feat = block_idx % total_seq_dim + seq_idx = seq_and_feat // hidden_dim + feat_idx = seq_and_feat % hidden_dim + + is_valid = (batch_idx < batch_size) & (seq_idx < token_length) + + x_idx = batch_idx * total_seq_dim + seq_idx * hidden_dim + feat_idx + + curr_x = tl.load(x_ptr + x_idx, mask=is_valid, other=0.0).to(tl.float32) + k_value = tl.load(mix_k_ptr + feat_idx).to(tl.float32) + + is_first = seq_idx < 1 + prev_state_idx = batch_idx * hidden_dim + feat_idx + prev_state = tl.load(x_prev_ptr + prev_state_idx, + mask=(is_first & is_valid), + other=0.0).to(tl.float32) + + prev_x_idx = x_idx - hidden_dim + prev_x = tl.load(x_ptr + prev_x_idx, + mask=(~is_first & is_valid), + other=0.0).to(tl.float32) + + prev_value = tl.where(is_first, prev_state, prev_x) + state_diff = prev_value - curr_x + mixed = state_diff * k_value + result = tl.cast(curr_x + mixed, dtype=output_ptr.dtype.element_ty, fp_downcast_rounding='rtne') + tl.store(output_ptr + x_idx, result, mask=is_valid) + + +@triton.jit +def rwkv_channel_mixing_pow_and_relu( + in_ptr, + out_ptr, + BLOCK_SIZE: tl.constexpr +): + """Fused ReLU and Power operation: x = ReLU(x)^2""" + xoffset = tl.program_id(0) * BLOCK_SIZE + xindex = xoffset + tl.arange(0, BLOCK_SIZE) + x0 = xindex + x = tl.load(in_ptr + (x0), None) + x = tl.maximum(x, 0.0).to(tl.float32) + x = tl.cast(x * x, dtype=out_ptr.dtype.element_ty, fp_downcast_rounding='rtne') + tl.store(out_ptr + (x0), x, None) + + +def rwkv_mix_torch(x: torch.Tensor, x_prev: torch.Tensor, x_k: torch.Tensor): + if x_prev.dim() == 2: + x_prev = x_prev.unsqueeze(1) # (batch_size, 1, hidden_dim) + xx = torch.cat((x_prev, x[:, :-1, :]), dim=1) - x + k = x.addcmul(xx, x_k) + return k + + +def rwkv_relu_and_square_torch(x: torch.Tensor): + return torch.relu(x) ** 2 + + +def rwkv_mix_fwd(x, x_prev, x_k): + has_batch = x.dim() == 3 + + if has_batch: + batch_size, token_length, hidden_dim = x.shape + else: + token_length, hidden_dim = x.shape + batch_size = 1 + x = x.unsqueeze(0) + x_prev = x_prev.unsqueeze(0) + + token_length = x.shape[1] + hidden_dim = x.shape[2] + total_elements = batch_size * token_length * hidden_dim + + output = torch.empty_like(x) + + def grid(meta): return ( + (total_elements + meta['BLOCK_SIZE'] - 1) // meta['BLOCK_SIZE'], # grid_0 + 1, # grid_1 + 1 # grid_2 + ) + + rwkv_seq_mix_kernel[grid]( + x.contiguous(), + x_prev.contiguous(), + x_k.squeeze(), + output, + batch_size=batch_size, + token_length=token_length, + hidden_dim=hidden_dim, + ) + if not has_batch: + output = output.squeeze(0) + return output + + +def rwkv_relu_and_square_fwd(x: torch.Tensor, inplace: bool = True): + """ + Triton implementation of RWKV's ReLU and square operation + Args: + x: Input tensor + Returns: + Tensor after ReLU and square operations + """ + x = x.contiguous() + output = x if inplace else torch.empty_like(x) + + def grid(meta): return ( + (output.numel() + meta['BLOCK_SIZE'] - 1) // meta['BLOCK_SIZE'], # grid_0 + 1, # grid_1 + 1 # grid_2 + ) + rwkv_channel_mixing_pow_and_relu[grid]( + x, + output, + BLOCK_SIZE=4096, + ) + + return output + + +@triton.jit +def relu_square_bwd_kernel( + out_ptr, + forward_input_ptr, + BLOCK_SIZE: tl.constexpr +): + """ReLU(x)^2 backward kernel + grad_input = grad_output * 2 * x if x > 0 else 0 + """ + pid = tl.program_id(0) + block_start = pid * BLOCK_SIZE + offsets = block_start + tl.arange(0, BLOCK_SIZE) + + x = tl.load(forward_input_ptr + offsets).to(tl.float32) + grad = tl.load(out_ptr + offsets).to(tl.float32) + + x = tl.maximum(x, 0.0) + + grad_input = grad * 2 * x + + tl.store(out_ptr + offsets, grad_input.to(out_ptr.dtype.element_ty)) + + +@triton.autotune( + configs=[ + triton.Config({'BLOCK_SIZE': block_size}) + for block_size in [128, 256, 512, 1024, 2048, 4096, 8192] + ], + key=['hidden_dim'], + use_cuda_graph=use_cuda_graph, +) +@triton.jit +def rwkv_mix_bwd_kenel( + dk1_ptr0, + xk_ptr, + dx_ptr, + dx_prev_ptr, + batch_size, + token_length, + hidden_dim: tl.constexpr, + BLOCK_SIZE: tl.constexpr +): + pid = tl.program_id(0) + offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) + + batch_idx = offsets // (token_length * hidden_dim) + seq_feat = offsets % (token_length * hidden_dim) + seq_idx = seq_feat // hidden_dim + feat_idx = seq_feat % hidden_dim + + is_valid = offsets < (batch_size * token_length * hidden_dim) + + dk1 = tl.load(dk1_ptr0 + offsets, mask=is_valid) + xk = tl.load(xk_ptr + feat_idx, mask=is_valid) + prod = dk1 * xk + + mask_next = seq_idx < (token_length - 1) + next_offset = offsets + hidden_dim + dk1_next = tl.load(dk1_ptr0 + next_offset, mask=mask_next & is_valid, other=0.0) + prod_next = dk1_next * xk + dx_val = dk1 - prod + tl.where(mask_next, prod_next, 0.0) + dx_val = tl.cast(dx_val, dtype=dx_ptr.dtype.element_ty, fp_downcast_rounding='rtne') + tl.store(dx_ptr + offsets, dx_val, mask=is_valid) + + dx_prev_offset = batch_idx * hidden_dim + feat_idx + is_first_step = seq_idx == 0 + + tl.store( + dx_prev_ptr + dx_prev_offset, + tl.cast(prod, dtype=dx_prev_ptr.dtype.element_ty), + mask=is_first_step + ) + + +@torch.compile(fullgraph=True) +def compute_x_k_grad(dk1, x, x_prev): + """ + Args: + dk1: (batch*seq_len, hidden_dim) + x: (batch, seq_len, hidden_dim) + x_prev: (batch, hidden_dim) or (batch, 1, hidden_dim) + """ + + if x_prev.dim() == 2: + x_prev = x_prev.unsqueeze(1) # (batch, 1, hidden_dim) + xx = torch.cat((x_prev, x[:, :-1, :]), dim=1) - x # (batch, seq_len, hidden_dim) + + # (hidden_dim,) --> (1, 1, hidden_dim) + grad_x_k = (dk1 * xx.reshape(-1, x.shape[2])).sum(dim=0).view(1, 1, -1) + return grad_x_k + + +def rwkv_channel_mixing_bwd(grad_output, x, x_prev, x_k, key_weight, value_weight, k1, k1_K, k, inplace=True): + batch_size = x.shape[0] if x.dim() == 3 else 1 + seq_len, n_embd = x.shape[-2], x.shape[-1] + + dV = k.transpose(-2, -1) @ grad_output + dk = grad_output @ value_weight.transpose(-2, -1) + + BLOCK_SIZE = 4096 + grid = ((dk.numel() + BLOCK_SIZE - 1) // BLOCK_SIZE,) + relu_square_bwd_kernel[grid]( + dk, + k1_K, + BLOCK_SIZE=BLOCK_SIZE + ) + + dK = k1.transpose(-2, -1) @ dk + dk1 = dk @ key_weight.transpose(-2, -1) + dk1 = dk1.view(-1, n_embd).contiguous() + + dk_reduced = compute_x_k_grad(dk1, x, x_prev) + dx_prev = torch.empty_like(x_prev) if not inplace else x_prev + dx = torch.empty_like(x) if not inplace else x + + def grid(meta): return ((batch_size * seq_len * n_embd + meta['BLOCK_SIZE'] - 1) // meta['BLOCK_SIZE'], 1, 1) + rwkv_mix_bwd_kenel[grid]( + dk1, + x_k.squeeze(), + dx, + dx_prev, + batch_size, + seq_len, + n_embd, + ) + # dx_prev.shape batch_size, seq_len, n_embd + return dx, dx_prev, dk_reduced, dK, dV + + +class Rwkv7ChannelMixing(torch.autograd.Function): + @staticmethod + @input_guard + @autocast_custom_fwd + def forward(ctx, x, x_prev, x_k, key_weight, value_weight, inplace: bool = True): + k1 = rwkv_mix_fwd(x, x_prev, x_k) + k1_K = k1 @ key_weight + k = rwkv_relu_and_square_fwd(k1_K, inplace=True) + ctx.save_for_backward(x, x_prev, x_k, key_weight, value_weight) + ctx.inplace = inplace + return k @ value_weight + + @staticmethod + @input_guard + @autocast_custom_bwd + def backward(ctx, dkv): + x, x_prev, x_k, key_weight, value_weight = ctx.saved_tensors + k1 = rwkv_mix_fwd(x, x_prev, x_k) + k1_K = k1 @ key_weight + k = rwkv_relu_and_square_fwd(k1_K, inplace=False) + dx, dx_prev, dk_reduced, dK, dV = rwkv_channel_mixing_bwd( + dkv, x, x_prev, x_k, key_weight, value_weight, k1, k1_K, k, ctx.inplace) + return dx, dx_prev, dk_reduced, dK, dV, None + + +def channel_mixing_rwkv7(x: torch.Tensor, x_prev: torch.Tensor, x_k: torch.Tensor, + key_weight: torch.Tensor, value_weight: torch.Tensor, inplace: bool = True): + assert x.dim() == 3 + + return Rwkv7ChannelMixing.apply(x, x_prev, x_k, key_weight, value_weight, inplace), x[-1, :] + + +def channel_mixing_rwkv7_torch(x, x_prev, x_k, key_weight, value_weight): + k1 = rwkv_mix_torch(x, x_prev, x_k) + k1_K = k1 @ key_weight + k = rwkv_relu_and_square_torch(k1_K) + return k @ value_weight, x[-1, :] diff --git a/fla/ops/rwkv7/chunk.py b/fla/ops/rwkv7/chunk.py new file mode 100644 index 0000000000000000000000000000000000000000..956c458974316d1f83121bd71ef1ec433cb6cdde --- /dev/null +++ b/fla/ops/rwkv7/chunk.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional + +import torch + +from fla.ops.generalized_delta_rule import chunk_dplr_delta_rule + + +def chunk_rwkv7( + r: torch.Tensor, + w: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + a: torch.Tensor, + b: torch.Tensor, + scale: float = 1.0, + initial_state: torch.Tensor = None, + output_final_state: bool = True, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = False +): + """ + Args: + r (torch.Tensor): + r of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + w (torch.Tensor): + log decay of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + k (torch.Tensor): + k of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + v (torch.Tensor): + v of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + a (torch.Tensor): + a of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + b (torch.Tensor): + b of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]`. + scale (float): + scale of the attention. + initial_state (Optional[torch.Tensor]): + Initial state of shape `[N, H, K, V]` for `N` input sequences. + For equal-length input sequences, `N` equals the batch size `B`. + Default: `None`. + output_final_state (Optional[bool]): + Whether to output the final state of shape `[N, H, K, V]`. Default: `False`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + head_first (bool): + whether to use head first. Recommended to be False to avoid extra transposes. + """ + return chunk_dplr_delta_rule( + q=r, + k=k, + v=v, + a=a, + b=b, + gk=w, + scale=scale, + initial_state=initial_state, + output_final_state=output_final_state, + cu_seqlens=cu_seqlens, + head_first=head_first + ) diff --git a/fla/ops/simple_gla/parallel.py b/fla/ops/simple_gla/parallel.py new file mode 100644 index 0000000000000000000000000000000000000000..d0ad1c8c33dd846eb1d1cf3c582836b6110017d7 --- /dev/null +++ b/fla/ops/simple_gla/parallel.py @@ -0,0 +1,722 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + +from fla.ops.utils import chunk_global_cumsum, chunk_local_cumsum +from fla.ops.utils.op import safe_exp +from fla.utils import autocast_custom_bwd, autocast_custom_fwd, check_shared_mem, input_guard, is_intel_alchemist + +# https://github.com/intel/intel-xpu-backend-for-triton/issues/3449 +triton_config = {'grf_mode': 'large'} if is_intel_alchemist else {} + + +@triton.heuristics({ + 'NV': lambda args: triton.cdiv(args['V'], args['BV']), + 'OUTPUT_ATTENTIONS': lambda args: args['attn'] is not None, + 'USE_OFFSETS': lambda args: args['offsets'] is not None, + 'USE_G': lambda args: args['g'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps, num_stages=num_stages) + for num_warps in [2, 4, 8, 16] + for num_stages in [2, 3, 4] + ], + key=["BT", "BS", "BK", "BV", "USE_G"], +) +@triton.jit +def parallel_simple_gla_fwd_kernel( + q, + k, + v, + g, + o, + attn, + scale, + offsets, + indices, + T, + B: tl.constexpr, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NV: tl.constexpr, + OUTPUT_ATTENTIONS: tl.constexpr, + HEAD_FIRST: tl.constexpr, + USE_OFFSETS: tl.constexpr, + USE_G: tl.constexpr +): + tl.static_assert(not (USE_OFFSETS and HEAD_FIRST), "USE_OFFSETS and HEAD_FIRST cannot be True at the same time") + i_kv, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_k, i_v = i_kv // NV, i_kv % NV + i_b, i_h = i_bh // H, i_bh % H + o += i_k * B * T * H * V + + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + bos, eos = i_b * T, i_b * T + T + + q += i_bh * T * K if HEAD_FIRST else (bos * H + i_h) * K + k += i_bh * T * K if HEAD_FIRST else (bos * H + i_h) * K + v += i_bh * T * V if HEAD_FIRST else (bos * H + i_h) * V + o += i_bh * T * V if HEAD_FIRST else (bos * H + i_h) * V + if USE_G: + g += i_bh * T if HEAD_FIRST else bos * H + i_h + if OUTPUT_ATTENTIONS: + attn += (bos * H + i_h * T) * T + i_k * B * H * T * T + stride_qk = K if HEAD_FIRST else H * K + stride_vo = V if HEAD_FIRST else H * V + stride_g = 1 if HEAD_FIRST else H + + p_q = tl.make_block_ptr(q, (T, K), (stride_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + + # the Q block is kept in the shared memory throughout the whole kernel + # [BT, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_q = (b_q * scale).to(b_q.dtype) + b_o = tl.zeros([BT, BV], dtype=tl.float32) + + # [BT] + o_q = i_t * BT + tl.arange(0, BT) + # [BS] + o_k = i_t * BT + tl.arange(0, BS) + # Q block and K block have overlap. + # masks required + if USE_G: + p_gq = tl.make_block_ptr(g, (T,), (stride_g,), (i_t * BT,), (BT,), (0,)) + # [BT,] + b_gq = tl.load(p_gq, boundary_check=(0,)).to(tl.float32) + # rescale interchunk output + else: + b_gq = None + + for i_s in range(i_t * BT, min((i_t + 1) * BT, T), BS): + p_k = tl.make_block_ptr(k, (K, T), (1, stride_qk), (i_k * BK, i_s), (BK, BS), (0, 1)) + p_v = tl.make_block_ptr(v, (T, V), (stride_vo, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + # [BK, BS] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BS, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BT, BS] + m_s = o_q[:, None] >= o_k[None, :] + b_s = tl.dot(b_q, b_k) + if USE_G: + p_gk = tl.make_block_ptr(g, (T,), (stride_g,), (i_s,), (BS,), (0,)) + b_gk = tl.load(p_gk, boundary_check=(0,)) + b_s *= safe_exp(b_gq[:, None] - b_gk[None, :]) + b_s = tl.where(m_s, b_s, 0) + else: + b_s = tl.where(m_s, b_s, 0) + # [BT, BV] + if i_s >= 0: + b_o += tl.dot(b_s.to(b_q.dtype), b_v) + if OUTPUT_ATTENTIONS: + p_a = tl.make_block_ptr(attn, (T, T), (T, 1), (i_t * BT, i_s), (BT, BS), (1, 0)) + tl.store(p_a, b_s.to(p_a.dtype.element_ty), boundary_check=(0, 1)) + o_k += BS + + for i_s in range(i_t * BT - BS, -BS, -BS): + p_k = tl.make_block_ptr(k, (K, T), (1, stride_qk), (i_k * BK, i_s), (BK, BS), (0, 1)) + p_v = tl.make_block_ptr(v, (T, V), (stride_vo, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + # [BK, BS] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BS, BV] + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_s = tl.dot(b_q, b_k) + if USE_G: + p_g = tl.make_block_ptr(g, (T,), (stride_g,), (i_s,), (BS,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + b_gn = tl.load(g + (min(i_s + BS, T) - 1) * stride_g) + b_gp = tl.load(g + (i_s-1) * stride_g) if i_s % BT > 0 else 0. + # No concrete meaning. Just to avoid some layout bugs. + b_s *= safe_exp(b_gq[:, None] + (b_gn - b_g)[None, :]) + b_gq += (b_gn - b_gp) + if OUTPUT_ATTENTIONS: + p_a = tl.make_block_ptr(attn, (T, T), (T, 1), (i_t * BT, i_s), (BT, BS), (1, 0)) + tl.store(p_a, b_s.to(p_a.dtype.element_ty), boundary_check=(0, 1)) + if i_s >= 0: + b_o += tl.dot(b_s.to(b_v.dtype), b_v) + p_o = tl.make_block_ptr(o, (T, V), (stride_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + +@triton.jit(do_not_specialize=['T']) +def parallel_simple_gla_bwd_kernel_dq( + i_t, + i_k, + i_v, + q, + k, + v, + g, + do, + dq, + dg, + stride_qk, + stride_vo, + stride_g, + scale, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr +): + p_do = tl.make_block_ptr(do, (T, V), (stride_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + # [BT, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BT, BK] + b_dq = tl.zeros([BT, BK], dtype=tl.float32) + + for i_s in range(0, i_t * BT, BS): + p_k = tl.make_block_ptr(k, (T, K), (stride_qk, 1), (i_s, i_k * BK), (BS, BK), (1, 0)) + p_v = tl.make_block_ptr(v, (V, T), (1, stride_vo), (i_v * BV, i_s), (BV, BS), (0, 1)) + # [BS, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BV, BS] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BT, BV] @ [BV, BS] = [BT, BS] + b_ds = tl.dot(b_do, b_v) + if USE_G: + p_g = tl.make_block_ptr(g, (T,), (stride_g,), (i_s,), (BS,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + b_gn = tl.load(g + (min(i_s + BS, T) - 1) * stride_g) + b_gp = tl.load(g + (i_s - 1) * stride_g) if i_s % BT > 0 else 0. + b_ds *= safe_exp(b_gn - b_g)[None, :] + if i_s > 0: + b_dq *= safe_exp(b_gn - b_gp) + # [BT, BS] @ [BS, BK] = [BT, BK] + b_dq += tl.dot(b_ds.to(b_v.dtype), b_k) + + if USE_G: + p_gq = tl.make_block_ptr(g, (T,), (stride_g,), (i_t * BT,), (BT,), (0,)) + # [BT,] + b_gq = tl.load(p_gq, boundary_check=(0,)) + # [BT, BK] + b_dq *= safe_exp(b_gq)[:, None] + + # [BT] + o_q = i_t * BT + tl.arange(0, BT) + # [BS] + o_k = i_t * BT + tl.arange(0, BS) + # Q block and K block have overlap. masks required + for i_s in range(i_t * BT, min((i_t + 1) * BT, T), BS): + p_k = tl.make_block_ptr(k, (T, K), (stride_qk, 1), (i_s, i_k * BK), (BS, BK), (1, 0)) + p_v = tl.make_block_ptr(v, (V, T), (1, stride_vo), (i_v * BV, i_s), (BV, BS), (0, 1)) + # [BS, BK] + b_k = tl.load(p_k, boundary_check=(0, 1)) + # [BV, BS] + b_v = tl.load(p_v, boundary_check=(0, 1)) + # [BT, BV] @ [BV, BS] = [BT, BS] + b_ds = tl.dot(b_do, b_v) + if USE_G: + p_gk = tl.make_block_ptr(g, (T,), (stride_g,), (i_s,), (BS,), (0,)) + b_gk = tl.load(p_gk, boundary_check=(0,)) + b_ds *= safe_exp(b_gq[:, None] - b_gk[None, :]) + b_ds = tl.where(o_q[:, None] >= o_k[None, :], b_ds, 0) + # [BT, BK] + b_dq += tl.dot(b_ds.to(b_k.dtype), b_k) + o_k += BS + + b_dq *= scale + p_dq = tl.make_block_ptr(dq, (T, K), (stride_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + tl.store(p_dq, b_dq.to(p_dq.dtype.element_ty), boundary_check=(0, 1)) + if USE_G: + p_q = tl.make_block_ptr(q, (T, K), (stride_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_dg = tl.sum(b_dq * b_q, 1) + p_dg = tl.make_block_ptr(dg, (T,), (stride_g,), (i_t * BT,), (BT,), (0,)) + tl.store(p_dg, b_dg.to(p_dg.dtype.element_ty), boundary_check=(0,)) + + +@triton.jit(do_not_specialize=['T']) +def parallel_simple_gla_bwd_kernel_dkv( + i_t, + i_k, + i_v, + q, + k, + v, + g, + do, + dk, + dv, + dg, + scale, + stride_qk, + stride_vo, + stride_g, + T, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr +): + # [BT, BK] + p_k = tl.make_block_ptr(k, (T, K), (stride_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_dk = tl.zeros([BT, BK], dtype=tl.float32) + # [BT, BV] + p_v = tl.make_block_ptr(v, (T, V), (stride_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_dv = tl.zeros([BT, BV], dtype=tl.float32) + if USE_G: + p_gk = tl.make_block_ptr(g, (T,), (stride_g,), (i_t * BT,), (BT,), (0,)) + b_gk = tl.load(p_gk, boundary_check=(0,)) + NTS = tl.cdiv(T, BS) + # [BT, BK] + for i_s in range(NTS * BS - BS, (i_t + 1) * BT - BS, -BS): + p_q = tl.make_block_ptr(q, (T, K), (stride_qk, 1), (i_s, i_k * BK), (BS, BK), (1, 0)) + p_do = tl.make_block_ptr(do, (T, V), (stride_vo, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_do = tl.load(p_do, boundary_check=(0, 1)) + b_ds = tl.dot(b_v, tl.trans(b_do)) + b_s = tl.dot(b_k, tl.trans(b_q)) + if USE_G: + p_gq = tl.make_block_ptr(g, (T,), (stride_g,), (i_s,), (BS,), (0,)) + b_gq = tl.load(p_gq, boundary_check=(0,)) + b_gp = tl.load(g + (min(i_s + BS, T) - 1) * stride_g) + b_gn = tl.load(g + (i_s - 1) * stride_g) if i_s % BT > 0 else 0. + if i_s >= 0: + tmp = safe_exp(b_gp - b_gn) + b_dk *= tmp + b_dv *= tmp + tmp2 = safe_exp(b_gq - b_gn) + b_ds *= tmp2[None, :] + b_s *= tmp2[None, :] + # [BT, BK] + b_dk += tl.dot(b_ds.to(b_q.dtype), b_q) + # [BT, BV] + b_dv += tl.dot(b_s.to(b_do.dtype), b_do) + + if USE_G: + b_g_last = tl.load(g + (min(i_t * BT + BT, T) - 1) * stride_g) + if i_t >= 0: + tmp2 = safe_exp(b_g_last - b_gk)[:, None] + b_dk *= tmp2 + b_dv *= tmp2 + + o_q = i_t * BT + tl.arange(0, BS) + o_k = i_t * BT + tl.arange(0, BT) + for i_s in range(i_t * BT, min((i_t + 1) * BT, T), BS): + p_q = tl.make_block_ptr(q, (T, K), (stride_qk, 1), (i_s, i_k * BK), (BS, BK), (1, 0)) + p_do = tl.make_block_ptr(do, (T, V), (stride_vo, 1), (i_s, i_v * BV), (BS, BV), (1, 0)) + # [BS, BK] + b_q = tl.load(p_q, boundary_check=(0, 1)) + # [BS, BV] + b_do = tl.load(p_do, boundary_check=(0, 1)) + # [BS] + b_ds = tl.dot(b_v, tl.trans(b_do)) + b_s = tl.dot(b_k, tl.trans(b_q)) + if USE_G: + p_gq = tl.make_block_ptr(g, (T,), (stride_g,), (i_s,), (BS,), (0,)) + b_gq = tl.load(p_gq, boundary_check=(0,)) + if i_s >= 0: + tmp = safe_exp(-b_gk[:, None] + b_gq[None, :]) + b_ds *= tmp + b_s *= tmp + m_s = o_k[:, None] <= o_q[None, :] + b_s = tl.where(m_s, b_s, 0) + b_ds = tl.where(m_s, b_ds, 0) + # [BT, BK] + b_dk += tl.dot(b_ds.to(b_q.dtype), b_q) + b_dv += tl.dot(b_s.to(b_do.dtype), b_do) + o_q += BS + b_dk *= scale + b_dv *= scale + p_dk = tl.make_block_ptr(dk, (T, K), (stride_qk, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_dv = tl.make_block_ptr(dv, (T, V), (stride_vo, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + tl.store(p_dk, b_dk.to(p_dk.dtype.element_ty), boundary_check=(0, 1)) + tl.store(p_dv, b_dv.to(p_dv.dtype.element_ty), boundary_check=(0, 1)) + if USE_G: + p_dg = tl.make_block_ptr(dg, (T,), (stride_g,), (i_t * BT,), (BT,), (0,)) + b_dg = tl.load(p_dg, boundary_check=(0,)) + b_dg -= tl.sum(b_dk * b_k, 1) + tl.store(p_dg, b_dg.to(p_dg.dtype.element_ty), boundary_check=(0,)) + + +@triton.heuristics({ + 'NV': lambda args: triton.cdiv(args['V'], args['BV']), + 'USE_OFFSETS': lambda args: args['offsets'] is not None, + 'USE_G': lambda args: args['g'] is not None +}) +@triton.autotune( + configs=[ + triton.Config(triton_config, num_warps=num_warps) + for num_warps in [2, 4, 8, 16] + ], + key=['BT', 'BS', 'BK', 'BV', 'USE_G'], +) +@triton.jit(do_not_specialize=['T']) +def parallel_simple_gla_bwd_kernel( + q, + k, + v, + g, + do, + dq, + dk, + dv, + dg, + scale, + offsets, + indices, + T, + B: tl.constexpr, + H: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BS: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + NV: tl.constexpr, + USE_OFFSETS: tl.constexpr, + HEAD_FIRST: tl.constexpr, + USE_G: tl.constexpr +): + tl.static_assert(not (USE_OFFSETS and HEAD_FIRST), "USE_OFFSETS and HEAD_FIRST cannot be True at the same time") + i_kv, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_k, i_v = i_kv // NV, i_kv % NV + i_b, i_h = i_bh // H, i_bh % H + dq += i_v * B * H * T * K + dk += i_v * B * H * T * K + dv += i_k * B * H * T * V + if USE_G: + dg += i_kv * B * H * T + + if USE_OFFSETS: + i_n, i_t = tl.load(indices + i_t * 2).to(tl.int32), tl.load(indices + i_t * 2 + 1).to(tl.int32) + bos, eos = tl.load(offsets + i_n).to(tl.int32), tl.load(offsets + i_n + 1).to(tl.int32) + T = eos - bos + else: + bos, eos = i_b * T, i_b * T + T + + q += (i_bh * T * K) if HEAD_FIRST else (bos * H + i_h) * K + k += (i_bh * T * K) if HEAD_FIRST else (bos * H + i_h) * K + v += (i_bh * T * V) if HEAD_FIRST else (bos * H + i_h) * V + do += (i_bh * T * V) if HEAD_FIRST else (bos * H + i_h) * V + dq += (i_bh * T * K) if HEAD_FIRST else (bos * H + i_h) * K + dk += (i_bh * T * K) if HEAD_FIRST else (bos * H + i_h) * K + dv += (i_bh * T * V) if HEAD_FIRST else (bos * H + i_h) * V + if USE_G: + g += (i_bh * T) if HEAD_FIRST else (bos * H + i_h) + dg += (i_bh * T) if HEAD_FIRST else (bos * H + i_h) + stride_qk = K if HEAD_FIRST else H * K + stride_vo = V if HEAD_FIRST else H * V + stride_g = 1 if HEAD_FIRST else H + + parallel_simple_gla_bwd_kernel_dq( + i_t=i_t, + i_k=i_k, + i_v=i_v, + q=q, + k=k, + v=v, + g=g, + do=do, + dq=dq, + dg=dg, + scale=scale, + stride_qk=stride_qk, + stride_vo=stride_vo, + stride_g=stride_g, + T=T, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV, + USE_G=USE_G + ) + tl.debug_barrier() + parallel_simple_gla_bwd_kernel_dkv( + i_t=i_t, + i_k=i_k, + i_v=i_v, + q=q, + k=k, + v=v, + g=g, + do=do, + dk=dk, + dv=dv, + dg=dg, + scale=scale, + stride_qk=stride_qk, + stride_vo=stride_vo, + stride_g=stride_g, + T=T, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV, + USE_G=USE_G + ) + + +def parallel_simple_gla_fwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + scale: float, + output_attentions: bool = False, + chunk_size: int = 128, + head_first: bool = True, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, +): + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + BT, BS = chunk_size, 32 + if check_shared_mem('hopper', k.device.index): + BK = min(256, triton.next_power_of_2(K)) + BV = min(256, triton.next_power_of_2(V)) + elif check_shared_mem('ampere', k.device.index): + BK = min(128, triton.next_power_of_2(K)) + BV = min(128, triton.next_power_of_2(V)) + else: + BK = min(64, triton.next_power_of_2(K)) + BV = min(64, triton.next_power_of_2(V)) + + NK = triton.cdiv(K, BK) + NV = triton.cdiv(V, BV) + assert BT % BS == 0 + + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + # local cumulative decay in log space + if g is not None: + g = chunk_local_cumsum(g, chunk_size, offsets=offsets, indices=indices, head_first=head_first) + grid = (NK * NV, NT, B * H) + o = torch.empty(NK, *v.shape, dtype=v.dtype if NK == 1 else torch.float, device=q.device) + attn = q.new_zeros(NK, B, H, T, T) if output_attentions else None + + parallel_simple_gla_fwd_kernel[grid]( + q=q, + k=k, + v=v, + g=g, + o=o, + attn=attn, + scale=scale, + offsets=offsets, + indices=indices, + B=B, + H=H, + T=T, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV, + HEAD_FIRST=head_first, + ) + o = o.sum(0) + + if output_attentions: + attn = attn.sum(0) + return o, g, attn + + +def parallel_simple_gla_bwd( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: torch.Tensor, + do: torch.Tensor, + scale: float, + chunk_size: int = 128, + head_first: bool = True, + offsets: Optional[torch.LongTensor] = None, + indices: Optional[torch.LongTensor] = None, +): + if head_first: + B, H, T, K, V = *k.shape, v.shape[-1] + else: + B, T, H, K, V = *k.shape, v.shape[-1] + BT, BS = chunk_size, 32 + if check_shared_mem('hopper', k.device.index): + BK = min(256, triton.next_power_of_2(K)) + BV = min(256, triton.next_power_of_2(V)) + elif check_shared_mem('ampere', k.device.index): + BK = min(128, triton.next_power_of_2(K)) + BV = min(128, triton.next_power_of_2(V)) + elif check_shared_mem('ada', k.device.index): + BK = min(64, triton.next_power_of_2(K)) + BV = min(64, triton.next_power_of_2(V)) + else: + BK = min(32, triton.next_power_of_2(K)) + BV = min(32, triton.next_power_of_2(V)) + + NK = triton.cdiv(K, BK) + NV = triton.cdiv(V, BV) + assert BT % BS == 0 + + dq = torch.empty(NV, * q.shape, dtype=q.dtype if NV == 1 else torch.float, device=q.device) + dk = torch.empty(NV, * k.shape, dtype=k.dtype if NV == 1 else torch.float, device=q.device) + dv = torch.empty(NK, * v.shape, dtype=v.dtype if NK == 1 else torch.float, device=q.device) + dg = torch.empty(NK*NV, *g.shape, dtype=torch.float, device=q.device) if g is not None else None + + NT = triton.cdiv(T, BT) if offsets is None else len(indices) + + grid = (NK * NV, NT, B * H) + parallel_simple_gla_bwd_kernel[grid]( + q=q, + k=k, + v=v, + g=g, + do=do, + dq=dq, + dk=dk, + dv=dv, + dg=dg, + offsets=offsets, + indices=indices, + scale=scale, + T=T, + B=B, + H=H, + K=K, + V=V, + BT=BT, + BS=BS, + BK=BK, + BV=BV, + HEAD_FIRST=head_first + ) + dq = dq.sum(0) + dk = dk.sum(0) + dv = dv.sum(0) + dg = chunk_global_cumsum(dg.sum(0), reverse=True, head_first=head_first, offsets=offsets) if g is not None else None + return dq, dk, dv, dg + + +class ParallelSimpleGLAFunction(torch.autograd.Function): + + @staticmethod + @input_guard + @autocast_custom_fwd + def forward(ctx, q, k, v, g, scale, output_attentions, head_first, offsets): + chunk_size = 128 + ctx.dtype = q.dtype + + # 2-d indices denoting the offsets of chunks in each sequence + # for example, if the passed `offsets` is [0, 100, 356] and `chunk_size` is 64, + # then there are 2 and 4 chunks in the 1st and 2nd sequences respectively, and `indices` will be + # [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [1, 3]] + indices = None + if offsets is not None: + indices = torch.cat([torch.arange(n) for n in triton.cdiv(offsets[1:] - offsets[:-1], chunk_size).tolist()]) + indices = torch.stack([indices.eq(0).cumsum(0) - 1, indices], 1).to(offsets) + + o, g, attn = parallel_simple_gla_fwd( + q=q, + k=k, + v=v, + g=g, + scale=scale, + output_attentions=output_attentions, + head_first=head_first, + offsets=offsets, + indices=indices, + chunk_size=chunk_size) + ctx.save_for_backward(q, k, v, g, offsets, indices) + ctx.scale = scale + ctx.chunk_size = chunk_size + ctx.head_first = head_first + return o.to(q.dtype), attn + + @staticmethod + @input_guard + @autocast_custom_bwd + def backward(ctx, do, da=None): + q, k, v, g, offsets, indices = ctx.saved_tensors + dq, dk, dv, dg = parallel_simple_gla_bwd( + q=q, + k=k, + v=v, + g=g, + do=do, + scale=ctx.scale, + chunk_size=ctx.chunk_size, + offsets=offsets, + indices=indices, + head_first=ctx.head_first) + return dq.to(q), dk.to(k), dv.to(v), dg.to(ctx.dtype) if dg is not None else None, None, None, None, None + + +def parallel_simple_gla( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + g: Optional[torch.Tensor] = None, + scale: Optional[float] = None, + output_attentions: bool = False, + cu_seqlens: Optional[torch.LongTensor] = None, + head_first: bool = True +) -> Tuple[torch.Tensor, torch.Tensor]: + r""" + Args: + q (torch.Tensor): + queries of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]` + k (torch.Tensor): + keys of shape `[B, H, T, K]` if `head_first=True` else `[B, T, H, K]` + v (torch.Tensor): + values of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]` + g (torch.Tensor): + Forget gates of shape `[B, H, T]` if `head_first=True` else `[B, T, H]`. + Compared to GLA, the gating is head-wise instead of elementwise. + scale (Optional[int]): + Scale factor for attention scores. + If not provided, it will default to `1 / sqrt(K)`. Default: `None`. + output_attentions (bool): + Whether to output the materialized attention scores of shape [B, H, T, T]. Default: `False`. + head_first (Optional[bool]): + Whether the inputs are in the head-first format. Default: `True`. + cu_seqlens (torch.LongTensor): + Cumulative sequence lengths of shape `[N+1]` used for variable-length training, + consistent with the FlashAttention API. + + Returns: + o (torch.Tensor): + Outputs of shape `[B, H, T, V]` if `head_first=True` else `[B, T, H, V]`. + attn (torch.Tensor): + Attention scores of shape `[B, H, T, T]` if `output_attentions=True` else `None` + """ + if scale is None: + scale = k.shape[-1] ** -0.5 + if cu_seqlens is not None: + assert q.shape[0] == 1, "batch size must be 1 when cu_seqlens are provided" + assert not head_first, "head_first must be False when cu_seqlens are provided" + if g is not None: + g = g.float() + if output_attentions: + assert cu_seqlens is None, "output_attentions=True is not supported with variable-length sequences" + o, attn = ParallelSimpleGLAFunction.apply(q, k, v, g, scale, output_attentions, head_first, cu_seqlens) + return o, attn diff --git a/fla/ops/titans/__init__.py b/fla/ops/titans/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55aa4e0588f0b27d61ba190c5987ae7d637ca3d8 --- /dev/null +++ b/fla/ops/titans/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- + +from .naive import chunk_titans_linear + +__all__ = [ + 'chunk_titans_linear' +] diff --git a/fla/ops/utils/__init__.py b/fla/ops/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ca4b0ff5fcf03073efdcf657043ecdd482c8eec1 --- /dev/null +++ b/fla/ops/utils/__init__.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +from .asm import fp32_to_tf32_asm +from .cumsum import ( + chunk_global_cumsum, + chunk_global_cumsum_scalar, + chunk_global_cumsum_scalar_kernel, + chunk_global_cumsum_vector, + chunk_global_cumsum_vector_kernel, + chunk_local_cumsum, + chunk_local_cumsum_scalar, + chunk_local_cumsum_scalar_kernel, + chunk_local_cumsum_vector, + chunk_local_cumsum_vector_kernel +) +from .logcumsumexp import logcumsumexp_fwd_kernel +from .logsumexp import logsumexp_fwd, logsumexp_fwd_kernel +from .matmul import addmm, matmul, matmul_kernel +from .pooling import mean_pooling +from .softmax import softmax_bwd, softmax_bwd_kernel, softmax_fwd, softmax_fwd_kernel + +__all__ = [ + 'chunk_global_cumsum', + 'chunk_global_cumsum_scalar', + 'chunk_global_cumsum_scalar_kernel', + 'chunk_global_cumsum_vector', + 'chunk_global_cumsum_vector_kernel', + 'chunk_local_cumsum', + 'chunk_local_cumsum_scalar', + 'chunk_local_cumsum_scalar_kernel', + 'chunk_local_cumsum_vector', + 'chunk_local_cumsum_vector_kernel', + 'logcumsumexp_fwd_kernel', + 'logsumexp_fwd', + 'logsumexp_fwd_kernel', + 'addmm', + 'matmul', + 'matmul_kernel', + 'mean_pooling', + 'softmax_bwd', + 'softmax_bwd_kernel', + 'softmax_fwd', + 'softmax_fwd_kernel', + 'fp32_to_tf32_asm', +] diff --git a/fla/ops/utils/asm.py b/fla/ops/utils/asm.py new file mode 100644 index 0000000000000000000000000000000000000000..c4a96bad2cecf24733832b6817f8d4b855685f05 --- /dev/null +++ b/fla/ops/utils/asm.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + +from fla.utils import device_platform + + +def fp32_to_tf32_asm() -> str: + """ + Get the assembly code for converting FP32 to TF32. + """ + ASM_DICT = { + 'nvidia': 'cvt.rna.tf32.f32 $0, $1;' + } + if device_platform in ASM_DICT: + return ASM_DICT[device_platform] + else: + # return empty string if the device is not supported + return "" diff --git a/fla/ops/utils/logcumsumexp.py b/fla/ops/utils/logcumsumexp.py new file mode 100644 index 0000000000000000000000000000000000000000..7476d3220599aa78e8f8ae5b10d0d15297cc47b4 --- /dev/null +++ b/fla/ops/utils/logcumsumexp.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2025, Songlin Yang, Yu Zhang + +import triton +import triton.language as tl + +from fla.ops.utils.op import exp, log + + +@triton.autotune( + configs=[ + triton.Config({'BT': BT}, num_warps=num_warps) + for BT in [16, 32, 64] + for num_warps in [2, 4, 8] + ], + key=['S'] +) +@triton.jit(do_not_specialize=['T']) +def logcumsumexp_fwd_kernel( + s, + z, + T, + S: tl.constexpr, + BT: tl.constexpr +): + i_bh = tl.program_id(0) + o_i = tl.arange(0, BT) + m_s = tl.where(o_i[:, None] >= o_i[None, :], 1., 0.) + + b_mp = tl.full([S,], float('-inf'), dtype=tl.float32) + b_zp = tl.zeros([S,], dtype=tl.float32) + for i_t in range(tl.cdiv(T, BT)): + p_s = tl.make_block_ptr(s + i_bh * T*S, (T, S), (S, 1), (i_t * BT, 0), (BT, S), (1, 0)) + p_z = tl.make_block_ptr(z + i_bh * T*S, (T, S), (S, 1), (i_t * BT, 0), (BT, S), (1, 0)) + + # [BT, S] + b_s = tl.load(p_s, boundary_check=(0, 1)).to(tl.float32) + # [S,] + b_mc = tl.max(b_s, 0) + b_mc = tl.maximum(b_mp, b_mc) + b_zp = b_zp * exp(b_mp - b_mc) + # [BT, S] + b_s = exp(b_s - b_mc) + b_z = tl.dot(m_s, b_s, allow_tf32=False) + b_zp + # [S,] + b_zc = tl.max(b_z, 0) + b_mp = b_mc + b_zp = b_zc + # [BT, BS] + # small eps to prevent underflows + b_z = log(tl.where(b_z != 0, b_z, 1e-20)) + b_mc + tl.store(p_z, b_z.to(p_z.dtype.element_ty), boundary_check=(0, 1)) diff --git a/fla/ops/utils/logsumexp.py b/fla/ops/utils/logsumexp.py new file mode 100644 index 0000000000000000000000000000000000000000..b647012b68c05ee59783d3d3615961962895a185 --- /dev/null +++ b/fla/ops/utils/logsumexp.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2023-2024, Songlin Yang, Yu Zhang + +from typing import Optional + +import torch +import triton +import triton.language as tl + +from fla.ops.utils.op import exp, log + + +@triton.heuristics({ + 'HAS_SCALE': lambda args: args['scale'] is not None +}) +@triton.autotune( + configs=[ + triton.Config({}, num_warps=num_warps) + for num_warps in [1, 2, 4, 8, 16, 32] + ], + key=['D'] +) +@triton.jit +def logsumexp_fwd_kernel( + x, + z, + scale, + D: tl.constexpr, + B: tl.constexpr, + HAS_SCALE: tl.constexpr +): + i_n, i_d = tl.program_id(0).to(tl.int64), tl.program_id(1).to(tl.int64) + o_d = i_d * B + tl.arange(0, B) + m_d = o_d < D + + b_x = tl.load(x + i_n * D + o_d, mask=m_d, other=-float('inf')) + if HAS_SCALE: + b_x = b_x * scale + b_m = tl.max(b_x, 0) + b_z = log(tl.sum(exp(b_x - b_m), 0)) + b_m + tl.store(z + i_n * tl.cdiv(D, B) + i_d, b_z) + + +def logsumexp_fwd( + x, + scale: Optional[float] = None, + dtype: Optional[torch.dtype] = None +): + r""" + Compute the logsumexp of the input tensor over the last dimension. + + Args: + x (Tensor): + The input tensor of any shape. + scale (Optional[float]): + The scale applied to the input tensor. Default: `None`. + dtype (Optional[torch.dtype]): + The data type of the output tensor. Default: `None`. + Returns: + Tensor: The logsumexp of the input tensor. + """ + + shape = x.shape + x = x.view(-1, shape[-1]) + N, D = x.shape + B = min(triton.next_power_of_2(D), 64 * 1024) + ND = triton.cdiv(D, B) + + z = x.new_empty(N, ND, dtype=torch.float) + logsumexp_fwd_kernel[(N, ND)]( + x=x, + z=z, + scale=scale, + D=D, + B=B + ) + z = z.logsumexp(-1).view(*shape[:-1]) + if dtype is not None and dtype != torch.float: + z = z.to(dtype) + return z diff --git a/fla/ops/utils/op.py b/fla/ops/utils/op.py new file mode 100644 index 0000000000000000000000000000000000000000..f0fe269ed8756b6a7b3ea396dffdfdd56b924ea9 --- /dev/null +++ b/fla/ops/utils/op.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2024, Songlin Yang, Yu Zhang + +import os + +import triton +import triton.language as tl +import triton.language.extra.libdevice as tldevice + +from fla.utils import is_gather_supported + +if os.environ.get('FLA_USE_FAST_OPS', '0') == '1': + div = tldevice.fast_dividef + exp = tldevice.fast_expf + log = tldevice.fast_logf + log2 = tldevice.fast_log2f +else: + @triton.jit + def div_normal(x, y): + return x / y + div = div_normal + exp = tl.exp + log = tl.log + log2 = tl.log2 + + +@triton.jit +def safe_exp(x): + return exp(tl.where(x <= 0, x, float('-inf'))) + + +if not is_gather_supported: + def gather(*args, **kwargs): + pass +else: + gather = tl.gather diff --git a/fla/ops/utils/testing.py b/fla/ops/utils/testing.py new file mode 100644 index 0000000000000000000000000000000000000000..6f4fb01202e6bfbea3351defe8a424ca648ce7d1 --- /dev/null +++ b/fla/ops/utils/testing.py @@ -0,0 +1,26 @@ +import os + +compiled_mode = os.getenv("COMPILER_MODE") == "1" +ci_env = os.getenv("CI_ENV") == "1" + + +def get_abs_err(x, y): + return (x.detach()-y.detach()).flatten().abs().max().item() + + +def get_err_ratio(x, y): + err = (x-y).flatten().square().mean().sqrt().item() + base = (x).flatten().square().mean().sqrt().item() + return err / (base + 1e-15) + + +def assert_close(prefix, ref, tri, ratio, warning=False): + msg = f"{prefix} diff: {get_abs_err(ref, tri):.6f} ratio: {get_err_ratio(ref, tri):.6f}" + print(msg) + error_rate = get_err_ratio(ref, tri) + if warning or str(prefix).strip().lower() == "dh0" or (ci_env and error_rate < 0.01): + if error_rate > ratio: + import warnings + warnings.warn(msg) + else: + assert error_rate < ratio, msg diff --git a/logs/none_99omtdbz/attempt_0/1/stderr.log b/logs/none_99omtdbz/attempt_0/1/stderr.log new file mode 100644 index 0000000000000000000000000000000000000000..09b2d44a4054b768d9825814a91b386a43ab4e5f --- /dev/null +++ b/logs/none_99omtdbz/attempt_0/1/stderr.log @@ -0,0 +1,17257 @@ +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc +wandb: Currently logged in as: zaydzuhri to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured. +[titan] 2025-10-04 22:33:17,413 - root - INFO - Starting job: default job +[titan] 2025-10-04 22:33:17,413 - root - INFO - { + "activation_checkpoint": { + "mode": "none", + "selective_ac_option": "2" + }, + "activation_offload": { + "mode": "none" + }, + "checkpoint": { + "async_mode": "disabled", + "convert_to_hf_on_save": false, + "create_seed_checkpoint": false, + "enable_checkpoint": true, + "exclude_from_loading": [], + "export_dtype": "float32", + "folder": "checkpoint", + "hf_repo_base_name": "zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000", + "hf_upload_enabled": true, + "hf_upload_format": "dcp", + "interval": 5000, + "interval_type": "steps", + "keep_latest_k": 0, + "load_step": -1, + "model_weights_only": false + }, + "comm": { + "init_timeout_seconds": 6000, + "trace_buf_size": 20000, + "train_timeout_seconds": 6000 + }, + "experimental": { + "context_parallel_degree": 1, + "context_parallel_rotate_method": "allgather", + "custom_model_path": "", + "enable_async_tensor_parallel": false, + "enable_compiled_autograd": false, + "pipeline_parallel_degree": 1, + "pipeline_parallel_microbatches": null, + "pipeline_parallel_schedule": "1F1B", + "pipeline_parallel_schedule_csv": "", + "pipeline_parallel_split_points": [] + }, + "fault_tolerance": { + "enable": false, + "group_size": 0, + "min_replica_size": 1, + "replica_id": 0 + }, + "float8": { + "enable_fsdp_float8_all_gather": false, + "force_recompute_fp8_weight_in_bwd": false, + "precompute_float8_dynamic_scale_for_fsdp": false, + "recipe_name": null + }, + "job": { + "config_file": "flame/models/fla.toml", + "description": "default job", + "dump_folder": "exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine", + "print_args": true, + "use_for_integration_test": false + }, + "lr_scheduler": { + "decay_ratio": null, + "decay_type": "cosine", + "lr_min": 0.1, + "warmup_steps": 400 + }, + "memory_estimation": { + "disable_fake_mode": false, + "enabled": false + }, + "metrics": { + "disable_color_printing": false, + "enable_tensorboard": false, + "enable_wandb": true, + "log_freq": 5, + "save_for_all_ranks": false, + "save_tb_folder": "tb" + }, + "model": { + "config": "configs/mtp_transformer_1B.json", + "converters": [], + "name": "fla", + "print_after_conversion": false, + "tokenizer_path": "fla-hub/transformer-1.3B-100B" + }, + "optimizer": { + "early_step_in_backward": false, + "eps": 1e-15, + "implementation": "fused", + "lr": 5e-05, + "name": "AdamW" + }, + "profiling": { + "enable_memory_snapshot": false, + "enable_profiling": true, + "profile_freq": 512, + "save_memory_snapshot_folder": "memory_snapshot", + "save_traces_folder": "profile_trace" + }, + "training": { + "batch_size": 16, + "compile": true, + "context_len": 4096, + "data_dir": null, + "data_files": null, + "data_parallel_replicate_degree": 1, + "data_parallel_shard_degree": -1, + "data_probs": null, + "dataset": "/root/.cache/zaydzuhri___open_math_instruct-2-text/default", + "dataset_name": "default", + "dataset_split": "train", + "deterministic": false, + "disable_loss_parallel": false, + "enable_cpu_offload": false, + "fsdp_reshard_after_forward": "default", + "gc_freq": 50, + "gradient_accumulation_steps": 1, + "max_norm": 1.0, + "mixed_precision_param": "bfloat16", + "mixed_precision_reduce": "float32", + "num_workers": 32, + "persistent_workers": false, + "pin_memory": false, + "prefetch_factor": 2, + "seed": 79, + "seq_len": 4096, + "skip_nan_inf": true, + "steps": 40000, + "streaming": false, + "tensor_parallel_degree": 1, + "varlen": false + } +} +[titan] 2025-10-04 22:33:17,413 - root - INFO - [GC] Initial GC collection. 0.00 seconds. +[titan] 2025-10-04 22:33:46,031 - root - INFO - Target Hugging Face repository for this run: zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000-20251004-223346 +[titan] 2025-10-04 22:33:46,032 - root - WARNING - ENV[TORCH_NCCL_ASYNC_ERROR_HANDLING] = 1 will be overridden to 3 based on job config +[titan] 2025-10-04 22:33:46,033 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:33:46,034 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:33:46,034 - root - INFO - Peak FLOPS used for computing MFU: 9.890e+14 +[titan] 2025-10-04 22:33:46,034 - root - INFO - Building 1-D device mesh with ['dp_shard'], [8] +[titan] 2025-10-04 22:33:46,098 - root - INFO - Loading tokenizer... +[titan] 2025-10-04 22:33:46,244 - root - INFO - LlamaTokenizerFast(name_or_path='fla-hub/transformer-1.3B-100B', vocab_size=32000, model_max_length=10000000000, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': ''}, clean_up_tokenization_spaces=False, added_tokens_decoder={ + 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), +} +) +[titan] 2025-10-04 22:33:46,245 - root - INFO - Loading dataset /root/.cache/zaydzuhri___open_math_instruct-2-text/default:default +`trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,245 - datasets.load - ERROR - `trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:47,043 - root - INFO - Dataset({ + features: ['text'], + num_rows: 21972791 +}) +[titan] 2025-10-04 22:33:47,044 - root - INFO - Shuffling the dataset with seed 79 +[titan] 2025-10-04 22:33:53,238 - root - INFO - Loading model config from configs/mtp_transformer_1B.json +[titan] 2025-10-04 22:33:53,241 - root - INFO - Building dataloader... +[titan] 2025-10-04 22:33:53,242 - root - INFO - Building model from the config +MTPTransformerConfig { + "bos_token_id": 1, + "elementwise_affine": true, + "eos_token_id": 2, + "fuse_cross_entropy": true, + "fuse_norm": true, + "fuse_swiglu": true, + "hidden_act": "swish", + "hidden_ratio": 4, + "hidden_size": 2048, + "initializer_range": 0.006, + "intermediate_size": null, + "max_position_embeddings": 8192, + "model_type": "mtp_transformer", + "n_future_tokens": 4, + "norm_eps": 1e-06, + "num_heads": 32, + "num_hidden_layers": 32, + "num_kv_heads": null, + "pad_token_id": 2, + "qk_norm": false, + "qkv_bias": false, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "transformers_version": "4.51.3", + "use_cache": true, + "use_custom_backward": false, + "vocab_size": 32000, + "window_size": null +} + +[titan] 2025-10-04 22:33:53,371 - root - INFO -  +MTPTransformerForCausalLM( + (model): MTPTransformerModel( + (embeddings): Embedding(32000, 2048, padding_idx=2) + (layers): ModuleList( + (0-27): 28 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (extra_heads): ModuleList( + (0-3): 4 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (norm): RMSNorm(2048, eps=1e-06) + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + (criterion): FusedLinearCrossEntropyLoss() +) + +[titan] 2025-10-04 22:33:53,399 - root - INFO - Compiling each block with torch.compile +[titan] 2025-10-04 22:33:53,399 - root - INFO - Compiling the embedding, norm, and lm_head layers with torch.compile +[titan] 2025-10-04 22:33:53,400 - root - INFO - Compiling the entire model with torch.compile +[titan] 2025-10-04 22:33:53,474 - root - INFO - Applied FSDP to the model +[titan] 2025-10-04 22:33:53,694 - root - INFO - CUDA memory usage for model: 0.84GiB(0.60%) +[titan] 2025-10-04 22:33:53,714 - root - INFO - Checkpointing active. Checkpoints will be loaded from and saved to exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/checkpoint +[titan] 2025-10-04 22:33:53,714 - root - INFO - Loading the checkpoint at step 0. +[titan] 2025-10-04 22:34:08,154 - root - INFO - [GC] GC collection for checkpoint loading. 0.65 seconds. +[titan] 2025-10-04 22:34:08,154 - root - INFO - Finished loading the checkpoint in 14.44 seconds. +[titan] 2025-10-04 22:34:08,154 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:34:08,155 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:34:09,521 - root - INFO - ***** Running training ***** +[titan] 2025-10-04 22:34:09,526 - root - INFO -  Training starts at step 1 +[titan] 2025-10-04 22:34:09,538 - root - INFO -  Number of tokens per sequence = 4,096 +[titan] 2025-10-04 22:34:09,544 - root - INFO -  Gradient Accumulation steps = 1 +[titan] 2025-10-04 22:34:09,544 - root - INFO -  Instantaneous batch size (per device) = 16 +[titan] 2025-10-04 22:34:09,544 - root - INFO -  Global batch size (w. parallel, distributed & accumulation) = 128 (524,288 tokens) +[titan] 2025-10-04 22:34:09,544 - root - INFO -  Total optimization steps = 40,000 (20,971,520,000 tokens) +[titan] 2025-10-04 22:34:09,544 - root - INFO -  Warmup steps = 400 (209,715,200 tokens) +[titan] 2025-10-04 22:34:09,545 - root - INFO -  Number of parameters = 1,775,372,288  +[titan] 2025-10-04 22:34:09,545 - root - INFO - Profiling active. Traces will be saved at exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace +[titan] 2025-10-04 22:34:47,724 - root - INFO - step: 1 loss: 12.0105 memory: 116.89GiB(83.88%) tps: 1,656 tflops: 22.98 mfu: 2.32% global_avg_ntp_loss: 2.1249 global_avg_mtp_loss: 9.8856 +[titan] 2025-10-04 22:34:47,724 - root - INFO - lr: 2.4938e-07 gnorm: 20.89 [ 0:00:39<18 days, 7:37:51] +[titan] 2025-10-04 22:34:47,724 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-04 22:34:53,087 - root - INFO - [GC] GC collection invoked by checkpointer. 0.19 seconds. +[titan] 2025-10-04 22:34:53,088 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 5.36 seconds. +[titan] 2025-10-04 22:34:53,088 - root - INFO - Synchronizing and adjusting timeout for all ProcessGroups to 1:40:00 +[titan] 2025-10-04 22:36:58,991 - root - INFO - step: 5 loss: 11.7564 memory: 118.84GiB(85.28%) tps: 1,997 tflops: 27.71 mfu: 2.80% global_avg_ntp_loss: 2.0697 global_avg_mtp_loss: 9.6867 +[titan] 2025-10-04 22:36:58,992 - root - INFO - lr: 7.4813e-07 gnorm: 19.96 [ 0:02:50<15 days, 19:35:07] +[titan] 2025-10-04 22:37:09,851 - root - INFO - step: 10 loss: 11.2335 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 1.9192 global_avg_mtp_loss: 9.3143 +[titan] 2025-10-04 22:37:09,851 - root - INFO - lr: 1.3716e-06 gnorm: 18.16 [ 0:03:01<8 days, 9:49:54] +[titan] 2025-10-04 22:37:20,642 - root - INFO - step: 15 loss: 10.8309 memory: 118.84GiB(85.28%) tps: 30,368 tflops: 421.30 mfu: 42.60% global_avg_ntp_loss: 1.7960 global_avg_mtp_loss: 9.0349 +[titan] 2025-10-04 22:37:20,642 - root - INFO - lr: 1.9950e-06 gnorm: 10.62 [ 0:03:12<5 days, 22:31:39] +[titan] 2025-10-04 22:37:31,508 - root - INFO - step: 20 loss: 10.3172 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 1.6641 global_avg_mtp_loss: 8.6531 +[titan] 2025-10-04 22:37:31,508 - root - INFO - lr: 2.6185e-06 gnorm: 8.22 [ 0:03:23<4 days, 16:54:56] +[titan] 2025-10-04 22:37:42,328 - root - INFO - step: 25 loss: 9.9294 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 1.5801 global_avg_mtp_loss: 8.3492 +[titan] 2025-10-04 22:37:42,328 - root - INFO - lr: 3.2419e-06 gnorm: 7.10 [ 0:03:34<3 days, 23:07:37] +[titan] 2025-10-04 22:37:53,161 - root - INFO - step: 30 loss: 9.5763 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 1.4997 global_avg_mtp_loss: 8.0766 +[titan] 2025-10-04 22:37:53,161 - root - INFO - lr: 3.8653e-06 gnorm: 6.23 [ 0:03:45<3 days, 11:16:17] +[titan] 2025-10-04 22:38:04,056 - root - INFO - step: 35 loss: 9.3711 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 1.4603 global_avg_mtp_loss: 7.9108 +[titan] 2025-10-04 22:38:04,056 - root - INFO - lr: 4.4888e-06 gnorm: 6.20 [ 0:03:55<3 days, 2:49:20] +[titan] 2025-10-04 22:38:14,933 - root - INFO - step: 40 loss: 9.0179 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 1.3853 global_avg_mtp_loss: 7.6325 +[titan] 2025-10-04 22:38:14,933 - root - INFO - lr: 5.1122e-06 gnorm: 5.60 [ 0:04:06<2 days, 20:28:45] +[titan] 2025-10-04 22:38:25,789 - root - INFO - step: 45 loss: 8.7524 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 1.3406 global_avg_mtp_loss: 7.4118 +[titan] 2025-10-04 22:38:25,789 - root - INFO - lr: 5.7357e-06 gnorm: 5.43 [ 0:04:17<2 days, 15:32:25] +[titan] 2025-10-04 22:38:34,543 - root - INFO - [GC] Peforming periodical GC collection. 0.04 seconds. +[titan] 2025-10-04 22:38:36,745 - root - INFO - step: 50 loss: 8.5439 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.94 mfu: 41.96% global_avg_ntp_loss: 1.3050 global_avg_mtp_loss: 7.2389 +[titan] 2025-10-04 22:38:36,746 - root - INFO - lr: 6.3591e-06 gnorm: 5.74 [ 0:04:28<2 days, 11:36:38] +[titan] 2025-10-04 22:38:47,618 - root - INFO - step: 55 loss: 8.3158 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 1.2609 global_avg_mtp_loss: 7.0549 +[titan] 2025-10-04 22:38:47,619 - root - INFO - lr: 6.9825e-06 gnorm: 5.52 [ 0:04:39<2 days, 8:22:41] +[titan] 2025-10-04 22:38:58,482 - root - INFO - step: 60 loss: 8.2006 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 1.2373 global_avg_mtp_loss: 6.9633 +[titan] 2025-10-04 22:38:58,482 - root - INFO - lr: 7.6060e-06 gnorm: 5.72 [ 0:04:50<2 days, 5:40:56] +[titan] 2025-10-04 22:39:09,360 - root - INFO - step: 65 loss: 8.1393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 1.2182 global_avg_mtp_loss: 6.9211 +[titan] 2025-10-04 22:39:09,360 - root - INFO - lr: 8.2294e-06 gnorm: 5.66 [ 0:05:01<2 days, 3:24:11] +[titan] 2025-10-04 22:39:20,248 - root - INFO - step: 70 loss: 7.7608 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 1.1495 global_avg_mtp_loss: 6.6112 +[titan] 2025-10-04 22:39:20,248 - root - INFO - lr: 8.8529e-06 gnorm: 5.54 [ 0:05:12<2 days, 1:27:02] +[titan] 2025-10-04 22:39:31,185 - root - INFO - step: 75 loss: 7.6862 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 1.1395 global_avg_mtp_loss: 6.5467 +[titan] 2025-10-04 22:39:31,185 - root - INFO - lr: 9.4763e-06 gnorm: 6.04 [ 0:05:23<1 day, 23:45:55] +[titan] 2025-10-04 22:39:42,063 - root - INFO - step: 80 loss: 7.4352 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.0959 global_avg_mtp_loss: 6.3393 +[titan] 2025-10-04 22:39:42,063 - root - INFO - lr: 1.0100e-05 gnorm: 5.61 [ 0:05:33<1 day, 22:16:55] +[titan] 2025-10-04 22:39:52,933 - root - INFO - step: 85 loss: 7.3232 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 1.0671 global_avg_mtp_loss: 6.2561 +[titan] 2025-10-04 22:39:52,934 - root - INFO - lr: 1.0723e-05 gnorm: 5.89 [ 0:05:44<1 day, 20:58:19] +[titan] 2025-10-04 22:40:03,808 - root - INFO - step: 90 loss: 7.1910 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 1.0545 global_avg_mtp_loss: 6.1364 +[titan] 2025-10-04 22:40:03,808 - root - INFO - lr: 1.1347e-05 gnorm: 6.24 [ 0:05:55<1 day, 19:48:27] +[titan] 2025-10-04 22:40:14,668 - root - INFO - step: 95 loss: 7.0637 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 1.0179 global_avg_mtp_loss: 6.0458 +[titan] 2025-10-04 22:40:14,668 - root - INFO - lr: 1.1970e-05 gnorm: 5.80 [ 0:06:06<1 day, 18:45:50] +[titan] 2025-10-04 22:40:23,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:40:25,567 - root - INFO - step: 100 loss: 7.0183 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 1.0144 global_avg_mtp_loss: 6.0039 +[titan] 2025-10-04 22:40:25,567 - root - INFO - lr: 1.2594e-05 gnorm: 5.49 [ 0:06:17<1 day, 17:49:42] +[titan] 2025-10-04 22:40:36,554 - root - INFO - step: 105 loss: 6.7845 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.77 mfu: 41.84% global_avg_ntp_loss: 0.9684 global_avg_mtp_loss: 5.8161 +[titan] 2025-10-04 22:40:36,554 - root - INFO - lr: 1.3217e-05 gnorm: 5.66 [ 0:06:28<1 day, 16:59:28] +[titan] 2025-10-04 22:40:47,440 - root - INFO - step: 110 loss: 6.7610 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.9616 global_avg_mtp_loss: 5.7993 +[titan] 2025-10-04 22:40:47,440 - root - INFO - lr: 1.3840e-05 gnorm: 5.76 [ 0:06:39<1 day, 16:13:10] +[titan] 2025-10-04 22:40:58,316 - root - INFO - step: 115 loss: 6.7822 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.9526 global_avg_mtp_loss: 5.8296 +[titan] 2025-10-04 22:40:58,316 - root - INFO - lr: 1.4464e-05 gnorm: 5.41 [ 0:06:50<1 day, 15:30:50] +[titan] 2025-10-04 22:41:09,192 - root - INFO - step: 120 loss: 6.5921 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.9190 global_avg_mtp_loss: 5.6731 +[titan] 2025-10-04 22:41:09,193 - root - INFO - lr: 1.5087e-05 gnorm: 5.18 [ 0:07:01<1 day, 14:52:00] +[titan] 2025-10-04 22:41:20,086 - root - INFO - step: 125 loss: 6.3759 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8880 global_avg_mtp_loss: 5.4878 +[titan] 2025-10-04 22:41:20,086 - root - INFO - lr: 1.5711e-05 gnorm: 4.91 [ 0:07:11<1 day, 14:16:21] +[titan] 2025-10-04 22:41:31,181 - root - INFO - step: 130 loss: 6.3566 memory: 118.84GiB(85.28%) tps: 29,536 tflops: 409.77 mfu: 41.43% global_avg_ntp_loss: 0.8781 global_avg_mtp_loss: 5.4786 +[titan] 2025-10-04 22:41:31,181 - root - INFO - lr: 1.6334e-05 gnorm: 4.37 [ 0:07:23<1 day, 13:44:28] +[titan] 2025-10-04 22:41:42,074 - root - INFO - step: 135 loss: 6.3044 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8713 global_avg_mtp_loss: 5.4331 +[titan] 2025-10-04 22:41:42,075 - root - INFO - lr: 1.6958e-05 gnorm: 4.29 [ 0:07:33<1 day, 13:13:56] +[titan] 2025-10-04 22:41:52,936 - root - INFO - step: 140 loss: 6.3158 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.8632 global_avg_mtp_loss: 5.4526 +[titan] 2025-10-04 22:41:52,936 - root - INFO - lr: 1.7581e-05 gnorm: 3.03 [ 0:07:44<1 day, 12:45:25] +[titan] 2025-10-04 22:42:03,814 - root - INFO - step: 145 loss: 6.2266 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.8508 global_avg_mtp_loss: 5.3758 +[titan] 2025-10-04 22:42:03,815 - root - INFO - lr: 1.8204e-05 gnorm: 3.86 [ 0:07:55<1 day, 12:18:56] +[titan] 2025-10-04 22:42:12,515 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:42:14,709 - root - INFO - step: 150 loss: 6.0872 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.8237 global_avg_mtp_loss: 5.2635 +[titan] 2025-10-04 22:42:14,710 - root - INFO - lr: 1.8828e-05 gnorm: 3.31 [ 0:08:06<1 day, 11:54:16] +[titan] 2025-10-04 22:42:25,613 - root - INFO - step: 155 loss: 6.0870 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.8286 global_avg_mtp_loss: 5.2584 +[titan] 2025-10-04 22:42:25,613 - root - INFO - lr: 1.9451e-05 gnorm: 3.04 [ 0:08:17<1 day, 11:31:14] +[titan] 2025-10-04 22:42:36,528 - root - INFO - step: 160 loss: 5.9733 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.8032 global_avg_mtp_loss: 5.1701 +[titan] 2025-10-04 22:42:36,529 - root - INFO - lr: 2.0075e-05 gnorm: 3.06 [ 0:08:28<1 day, 11:09:40] +[titan] 2025-10-04 22:42:47,448 - root - INFO - step: 165 loss: 5.8683 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.7907 global_avg_mtp_loss: 5.0776 +[titan] 2025-10-04 22:42:47,448 - root - INFO - lr: 2.0698e-05 gnorm: 3.39 [ 0:08:39<1 day, 10:49:25] +[titan] 2025-10-04 22:42:58,343 - root - INFO - step: 170 loss: 5.8536 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.7847 global_avg_mtp_loss: 5.0689 +[titan] 2025-10-04 22:42:58,343 - root - INFO - lr: 2.1322e-05 gnorm: 2.80 [ 0:08:50<1 day, 10:30:15] +[titan] 2025-10-04 22:43:09,215 - root - INFO - step: 175 loss: 5.7812 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.7716 global_avg_mtp_loss: 5.0096 +[titan] 2025-10-04 22:43:09,216 - root - INFO - lr: 2.1945e-05 gnorm: 4.02 [ 0:09:01<1 day, 10:12:05] +[titan] 2025-10-04 22:43:20,097 - root - INFO - step: 180 loss: 5.7994 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.7711 global_avg_mtp_loss: 5.0283 +[titan] 2025-10-04 22:43:20,098 - root - INFO - lr: 2.2569e-05 gnorm: 3.36 [ 0:09:11<1 day, 9:54:57] +[titan] 2025-10-04 22:43:31,003 - root - INFO - step: 185 loss: 5.6617 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9134 +[titan] 2025-10-04 22:43:31,004 - root - INFO - lr: 2.3192e-05 gnorm: 2.73 [ 0:09:22<1 day, 9:38:49] +[titan] 2025-10-04 22:43:41,902 - root - INFO - step: 190 loss: 5.6564 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9080 +[titan] 2025-10-04 22:43:41,903 - root - INFO - lr: 2.3815e-05 gnorm: 3.17 [ 0:09:33<1 day, 9:23:30] +[titan] 2025-10-04 22:43:52,788 - root - INFO - step: 195 loss: 5.6643 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.7475 global_avg_mtp_loss: 4.9168 +[titan] 2025-10-04 22:43:52,788 - root - INFO - lr: 2.4439e-05 gnorm: 2.43 [ 0:09:44<1 day, 9:08:55] +[titan] 2025-10-04 22:44:01,482 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:44:03,675 - root - INFO - step: 200 loss: 5.6189 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.7360 global_avg_mtp_loss: 4.8830 +[titan] 2025-10-04 22:44:03,675 - root - INFO - lr: 2.5062e-05 gnorm: 3.47 [ 0:09:55<1 day, 8:55:04] +[titan] 2025-10-04 22:44:14,559 - root - INFO - step: 205 loss: 5.5215 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.7213 global_avg_mtp_loss: 4.8002 +[titan] 2025-10-04 22:44:14,559 - root - INFO - lr: 2.5686e-05 gnorm: 3.09 [ 0:10:06<1 day, 8:41:52] +[titan] 2025-10-04 22:44:25,433 - root - INFO - step: 210 loss: 5.5044 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.7198 global_avg_mtp_loss: 4.7846 +[titan] 2025-10-04 22:44:25,433 - root - INFO - lr: 2.6309e-05 gnorm: 2.66 [ 0:10:17<1 day, 8:29:15] +[titan] 2025-10-04 22:44:36,338 - root - INFO - step: 215 loss: 5.4728 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.7115 global_avg_mtp_loss: 4.7613 +[titan] 2025-10-04 22:44:36,338 - root - INFO - lr: 2.6933e-05 gnorm: 2.45 [ 0:10:28<1 day, 8:17:18] +[titan] 2025-10-04 22:44:47,225 - root - INFO - step: 220 loss: 5.3310 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.6944 global_avg_mtp_loss: 4.6366 +[titan] 2025-10-04 22:44:47,225 - root - INFO - lr: 2.7556e-05 gnorm: 2.66 [ 0:10:39<1 day, 8:05:51] +[titan] 2025-10-04 22:44:58,124 - root - INFO - step: 225 loss: 5.3739 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.6934 global_avg_mtp_loss: 4.6805 +[titan] 2025-10-04 22:44:58,125 - root - INFO - lr: 2.8180e-05 gnorm: 2.95 [ 0:10:49<1 day, 7:54:55] +[titan] 2025-10-04 22:45:09,004 - root - INFO - step: 230 loss: 5.4216 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.7014 global_avg_mtp_loss: 4.7202 +[titan] 2025-10-04 22:45:09,004 - root - INFO - lr: 2.8803e-05 gnorm: 2.60 [ 0:11:00<1 day, 7:44:25] +[titan] 2025-10-04 22:45:19,907 - root - INFO - step: 235 loss: 5.3090 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.6909 global_avg_mtp_loss: 4.6180 +[titan] 2025-10-04 22:45:19,907 - root - INFO - lr: 2.9426e-05 gnorm: 2.68 [ 0:11:11<1 day, 7:34:24] +[titan] 2025-10-04 22:45:30,796 - root - INFO - step: 240 loss: 5.2690 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.6785 global_avg_mtp_loss: 4.5905 +[titan] 2025-10-04 22:45:30,796 - root - INFO - lr: 3.0050e-05 gnorm: 2.38 [ 0:11:22<1 day, 7:24:46] +[titan] 2025-10-04 22:45:41,709 - root - INFO - step: 245 loss: 5.1965 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.6691 global_avg_mtp_loss: 4.5274 +[titan] 2025-10-04 22:45:41,710 - root - INFO - lr: 3.0673e-05 gnorm: 2.47 [ 0:11:33<1 day, 7:15:35] +[titan] 2025-10-04 22:45:50,403 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:45:52,597 - root - INFO - step: 250 loss: 5.1858 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6654 global_avg_mtp_loss: 4.5204 +[titan] 2025-10-04 22:45:52,597 - root - INFO - lr: 3.1297e-05 gnorm: 3.00 [ 0:11:44<1 day, 7:06:42] +[titan] 2025-10-04 22:46:03,496 - root - INFO - step: 255 loss: 5.1706 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.6625 global_avg_mtp_loss: 4.5081 +[titan] 2025-10-04 22:46:03,496 - root - INFO - lr: 3.1920e-05 gnorm: 2.61 [ 0:11:55<1 day, 6:58:10] +[titan] 2025-10-04 22:46:14,369 - root - INFO - step: 260 loss: 5.1473 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.6607 global_avg_mtp_loss: 4.4865 +[titan] 2025-10-04 22:46:14,369 - root - INFO - lr: 3.2544e-05 gnorm: 2.39 [ 0:12:06<1 day, 6:49:54] +[titan] 2025-10-04 22:46:25,252 - root - INFO - step: 265 loss: 5.1300 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.6565 global_avg_mtp_loss: 4.4735 +[titan] 2025-10-04 22:46:25,253 - root - INFO - lr: 3.3167e-05 gnorm: 2.29 [ 0:12:17<1 day, 6:41:58] +[titan] 2025-10-04 22:46:36,152 - root - INFO - step: 270 loss: 5.1579 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6598 global_avg_mtp_loss: 4.4981 +[titan] 2025-10-04 22:46:36,152 - root - INFO - lr: 3.3791e-05 gnorm: 2.51 [ 0:12:27<1 day, 6:34:22] +[titan] 2025-10-04 22:46:47,010 - root - INFO - step: 275 loss: 5.0167 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.6398 global_avg_mtp_loss: 4.3769 +[titan] 2025-10-04 22:46:47,011 - root - INFO - lr: 3.4414e-05 gnorm: 2.10 [ 0:12:38<1 day, 6:26:55] +[titan] 2025-10-04 22:46:57,896 - root - INFO - step: 280 loss: 5.0898 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.6486 global_avg_mtp_loss: 4.4413 +[titan] 2025-10-04 22:46:57,896 - root - INFO - lr: 3.5037e-05 gnorm: 3.07 [ 0:12:49<1 day, 6:19:49] +[titan] 2025-10-04 22:47:08,770 - root - INFO - step: 285 loss: 5.1105 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.6521 global_avg_mtp_loss: 4.4584 +[titan] 2025-10-04 22:47:08,770 - root - INFO - lr: 3.5661e-05 gnorm: 2.23 [ 0:13:00<1 day, 6:12:55] +[titan] 2025-10-04 22:47:19,662 - root - INFO - step: 290 loss: 5.0807 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6456 global_avg_mtp_loss: 4.4352 +[titan] 2025-10-04 22:47:19,662 - root - INFO - lr: 3.6284e-05 gnorm: 2.82 [ 0:13:11<1 day, 6:06:17] +[titan] 2025-10-04 22:47:30,549 - root - INFO - step: 295 loss: 5.0464 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6427 global_avg_mtp_loss: 4.4037 +[titan] 2025-10-04 22:47:30,550 - root - INFO - lr: 3.6908e-05 gnorm: 2.35 [ 0:13:22<1 day, 5:59:52] +[titan] 2025-10-04 22:47:39,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:47:41,466 - root - INFO - step: 300 loss: 5.1119 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.6529 global_avg_mtp_loss: 4.4589 +[titan] 2025-10-04 22:47:41,466 - root - INFO - lr: 3.7531e-05 gnorm: 2.72 [ 0:13:33<1 day, 5:53:44] +[titan] 2025-10-04 22:47:52,331 - root - INFO - step: 305 loss: 4.9831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.6338 global_avg_mtp_loss: 4.3492 +[titan] 2025-10-04 22:47:52,331 - root - INFO - lr: 3.8155e-05 gnorm: 2.81 [ 0:13:44<1 day, 5:47:40] +[titan] 2025-10-04 22:48:03,188 - root - INFO - step: 310 loss: 4.9896 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.6364 global_avg_mtp_loss: 4.3532 +[titan] 2025-10-04 22:48:03,188 - root - INFO - lr: 3.8778e-05 gnorm: 2.39 [ 0:13:55<1 day, 5:41:47] +[titan] 2025-10-04 22:48:14,051 - root - INFO - step: 315 loss: 4.8865 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.6207 global_avg_mtp_loss: 4.2658 +[titan] 2025-10-04 22:48:14,051 - root - INFO - lr: 3.9401e-05 gnorm: 3.11 [ 0:14:05<1 day, 5:36:05] +[titan] 2025-10-04 22:48:24,948 - root - INFO - step: 320 loss: 4.9416 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.6290 global_avg_mtp_loss: 4.3126 +[titan] 2025-10-04 22:48:24,948 - root - INFO - lr: 4.0025e-05 gnorm: 2.57 [ 0:14:16<1 day, 5:30:38] +[titan] 2025-10-04 22:48:35,879 - root - INFO - step: 325 loss: 4.8914 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.6229 global_avg_mtp_loss: 4.2686 +[titan] 2025-10-04 22:48:35,879 - root - INFO - lr: 4.0648e-05 gnorm: 2.22 [ 0:14:27<1 day, 5:25:24] +[titan] 2025-10-04 22:48:46,771 - root - INFO - step: 330 loss: 4.8494 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.6146 global_avg_mtp_loss: 4.2348 +[titan] 2025-10-04 22:48:46,771 - root - INFO - lr: 4.1272e-05 gnorm: 2.17 [ 0:14:38<1 day, 5:20:16] +[titan] 2025-10-04 22:48:57,658 - root - INFO - step: 335 loss: 4.9431 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6274 global_avg_mtp_loss: 4.3157 +[titan] 2025-10-04 22:48:57,658 - root - INFO - lr: 4.1895e-05 gnorm: 2.41 [ 0:14:49<1 day, 5:15:15] +[titan] 2025-10-04 22:49:08,546 - root - INFO - step: 340 loss: 4.8429 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6110 global_avg_mtp_loss: 4.2319 +[titan] 2025-10-04 22:49:08,546 - root - INFO - lr: 4.2519e-05 gnorm: 2.38 [ 0:15:00<1 day, 5:10:23] +[titan] 2025-10-04 22:49:19,437 - root - INFO - step: 345 loss: 4.7699 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.6044 global_avg_mtp_loss: 4.1656 +[titan] 2025-10-04 22:49:19,437 - root - INFO - lr: 4.3142e-05 gnorm: 2.47 [ 0:15:11<1 day, 5:05:40] +[titan] 2025-10-04 22:49:28,142 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:49:30,329 - root - INFO - step: 350 loss: 4.8354 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6129 global_avg_mtp_loss: 4.2225 +[titan] 2025-10-04 22:49:30,329 - root - INFO - lr: 4.3766e-05 gnorm: 2.30 [ 0:15:22<1 day, 5:01:05] +[titan] 2025-10-04 22:49:41,264 - root - INFO - step: 355 loss: 4.8409 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.6123 global_avg_mtp_loss: 4.2286 +[titan] 2025-10-04 22:49:41,264 - root - INFO - lr: 4.4389e-05 gnorm: 2.44 [ 0:15:33<1 day, 4:56:41] +[titan] 2025-10-04 22:49:52,147 - root - INFO - step: 360 loss: 4.6777 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.5902 global_avg_mtp_loss: 4.0875 +[titan] 2025-10-04 22:49:52,148 - root - INFO - lr: 4.5012e-05 gnorm: 1.96 [ 0:15:43<1 day, 4:52:19] +[titan] 2025-10-04 22:50:03,033 - root - INFO - step: 365 loss: 4.8152 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.6116 global_avg_mtp_loss: 4.2037 +[titan] 2025-10-04 22:50:03,033 - root - INFO - lr: 4.5636e-05 gnorm: 2.14 [ 0:15:54<1 day, 4:48:05] +[titan] 2025-10-04 22:50:13,908 - root - INFO - step: 370 loss: 4.7797 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.6024 global_avg_mtp_loss: 4.1773 +[titan] 2025-10-04 22:50:13,908 - root - INFO - lr: 4.6259e-05 gnorm: 2.37 [ 0:16:05<1 day, 4:43:55] +[titan] 2025-10-04 22:50:24,783 - root - INFO - step: 375 loss: 4.6716 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.5906 global_avg_mtp_loss: 4.0810 +[titan] 2025-10-04 22:50:24,783 - root - INFO - lr: 4.6883e-05 gnorm: 2.26 [ 0:16:16<1 day, 4:39:52] +[titan] 2025-10-04 22:50:35,652 - root - INFO - step: 380 loss: 4.7162 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.5950 global_avg_mtp_loss: 4.1212 +[titan] 2025-10-04 22:50:35,652 - root - INFO - lr: 4.7506e-05 gnorm: 2.15 [ 0:16:27<1 day, 4:35:55] +[titan] 2025-10-04 22:50:46,574 - root - INFO - step: 385 loss: 4.8016 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.6054 global_avg_mtp_loss: 4.1962 +[titan] 2025-10-04 22:50:46,574 - root - INFO - lr: 4.8130e-05 gnorm: 2.50 [ 0:16:38<1 day, 4:32:09] +[titan] 2025-10-04 22:50:57,443 - root - INFO - step: 390 loss: 4.7078 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.5929 global_avg_mtp_loss: 4.1150 +[titan] 2025-10-04 22:50:57,444 - root - INFO - lr: 4.8753e-05 gnorm: 2.00 [ 0:16:49<1 day, 4:28:23] +[titan] 2025-10-04 22:51:08,305 - root - INFO - step: 395 loss: 4.6384 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.5834 global_avg_mtp_loss: 4.0551 +[titan] 2025-10-04 22:51:08,305 - root - INFO - lr: 4.9377e-05 gnorm: 2.37 [ 0:17:00<1 day, 4:24:42] +[titan] 2025-10-04 22:51:16,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:51:19,152 - root - INFO - step: 400 loss: 4.6918 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.5928 global_avg_mtp_loss: 4.0990 +[titan] 2025-10-04 22:51:19,152 - root - INFO - lr: 5.0000e-05 gnorm: 2.36 [ 0:17:10<1 day, 4:21:04] +[titan] 2025-10-04 22:51:30,025 - root - INFO - step: 405 loss: 4.6284 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.5843 global_avg_mtp_loss: 4.0441 +[titan] 2025-10-04 22:51:30,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.37 [ 0:17:21<1 day, 4:17:34] +[titan] 2025-10-04 22:51:40,903 - root - INFO - step: 410 loss: 4.5757 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.5764 global_avg_mtp_loss: 3.9993 +[titan] 2025-10-04 22:51:40,903 - root - INFO - lr: 5.0000e-05 gnorm: 2.16 [ 0:17:32<1 day, 4:14:10] +[titan] 2025-10-04 22:51:51,757 - root - INFO - step: 415 loss: 4.6798 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.5875 global_avg_mtp_loss: 4.0923 +[titan] 2025-10-04 22:51:51,758 - root - INFO - lr: 5.0000e-05 gnorm: 2.18 [ 0:17:43<1 day, 4:10:48] +[titan] 2025-10-04 22:52:02,632 - root - INFO - step: 420 loss: 4.6984 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.5914 global_avg_mtp_loss: 4.1070 +[titan] 2025-10-04 22:52:02,632 - root - INFO - lr: 5.0000e-05 gnorm: 2.08 [ 0:17:54<1 day, 4:07:32] +[titan] 2025-10-04 22:52:13,523 - root - INFO - step: 425 loss: 4.6583 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.5870 global_avg_mtp_loss: 4.0713 +[titan] 2025-10-04 22:52:13,523 - root - INFO - lr: 5.0000e-05 gnorm: 1.97 [ 0:18:05<1 day, 4:04:22] +[titan] 2025-10-04 22:52:24,408 - root - INFO - step: 430 loss: 4.5843 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.5750 global_avg_mtp_loss: 4.0093 +[titan] 2025-10-04 22:52:24,408 - root - INFO - lr: 5.0000e-05 gnorm: 2.22 [ 0:18:16<1 day, 4:01:16] +[titan] 2025-10-04 22:52:35,258 - root - INFO - step: 435 loss: 4.5321 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.5697 global_avg_mtp_loss: 3.9625 +[titan] 2025-10-04 22:52:35,258 - root - INFO - lr: 5.0000e-05 gnorm: 2.13 [ 0:18:27<1 day, 3:58:11] +[titan] 2025-10-04 22:52:46,145 - root - INFO - step: 440 loss: 4.5606 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.5730 global_avg_mtp_loss: 3.9875 +[titan] 2025-10-04 22:52:46,146 - root - INFO - lr: 5.0000e-05 gnorm: 2.40 [ 0:18:37<1 day, 3:55:13] +[titan] 2025-10-04 22:52:57,025 - root - INFO - step: 445 loss: 4.5406 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.5687 global_avg_mtp_loss: 3.9718 +[titan] 2025-10-04 22:52:57,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:18:48<1 day, 3:52:18] +[titan] 2025-10-04 22:53:05,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:53:07,904 - root - INFO - step: 450 loss: 4.5707 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.5740 global_avg_mtp_loss: 3.9967 +[titan] 2025-10-04 22:53:07,904 - root - INFO - lr: 5.0000e-05 gnorm: 2.34 [ 0:18:59<1 day, 3:49:27] +[titan] 2025-10-04 22:53:18,769 - root - INFO - step: 455 loss: 4.4743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.5620 global_avg_mtp_loss: 3.9123 +[titan] 2025-10-04 22:53:18,770 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:19:10<1 day, 3:46:38] +[titan] 2025-10-04 22:53:29,609 - root - INFO - step: 460 loss: 4.4303 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8761 +[titan] 2025-10-04 22:53:29,609 - root - INFO - lr: 5.0000e-05 gnorm: 2.25 [ 0:19:21<1 day, 3:43:50] +[titan] 2025-10-04 22:53:40,497 - root - INFO - step: 465 loss: 4.4283 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.5552 global_avg_mtp_loss: 3.8731 +[titan] 2025-10-04 22:53:40,497 - root - INFO - lr: 5.0000e-05 gnorm: 1.84 [ 0:19:32<1 day, 3:41:10] +[titan] 2025-10-04 22:53:51,344 - root - INFO - step: 470 loss: 4.4176 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8632 +[titan] 2025-10-04 22:53:51,344 - root - INFO - lr: 5.0000e-05 gnorm: 2.15 [ 0:19:43<1 day, 3:38:29] +[titan] 2025-10-04 22:54:02,202 - root - INFO - step: 475 loss: 4.4882 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.5655 global_avg_mtp_loss: 3.9227 +[titan] 2025-10-04 22:54:02,202 - root - INFO - lr: 5.0000e-05 gnorm: 1.78 [ 0:19:53<1 day, 3:35:53] +[titan] 2025-10-04 22:54:13,066 - root - INFO - step: 480 loss: 4.4600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.5572 global_avg_mtp_loss: 3.9028 +[titan] 2025-10-04 22:54:13,066 - root - INFO - lr: 5.0000e-05 gnorm: 2.00 [ 0:20:04<1 day, 3:33:20] +[titan] 2025-10-04 22:54:23,913 - root - INFO - step: 485 loss: 4.3781 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.5484 global_avg_mtp_loss: 3.8297 +[titan] 2025-10-04 22:54:23,913 - root - INFO - lr: 4.9999e-05 gnorm: 1.60 [ 0:20:15<1 day, 3:30:48] +[titan] 2025-10-04 22:54:34,742 - root - INFO - step: 490 loss: 4.4068 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.5524 global_avg_mtp_loss: 3.8544 +[titan] 2025-10-04 22:54:34,742 - root - INFO - lr: 4.9999e-05 gnorm: 2.19 [ 0:20:26<1 day, 3:28:18] +[titan] 2025-10-04 22:54:45,647 - root - INFO - step: 495 loss: 4.3459 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.5461 global_avg_mtp_loss: 3.7998 +[titan] 2025-10-04 22:54:45,647 - root - INFO - lr: 4.9999e-05 gnorm: 1.79 [ 0:20:37<1 day, 3:25:57] +[titan] 2025-10-04 22:54:54,303 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:54:56,481 - root - INFO - step: 500 loss: 4.5195 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.5664 global_avg_mtp_loss: 3.9531 +[titan] 2025-10-04 22:54:56,481 - root - INFO - lr: 4.9999e-05 gnorm: 1.81 [ 0:20:48<1 day, 3:23:33] +[titan] 2025-10-04 22:55:07,316 - root - INFO - step: 505 loss: 4.3727 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.5468 global_avg_mtp_loss: 3.8259 +[titan] 2025-10-04 22:55:07,316 - root - INFO - lr: 4.9999e-05 gnorm: 1.99 [ 0:20:59<1 day, 3:21:12] +[titan] 2025-10-04 22:55:18,908 - root - INFO - step: 510 loss: 4.3913 memory: 118.84GiB(85.28%) tps: 28,269 tflops: 392.18 mfu: 39.65% global_avg_ntp_loss: 0.5477 global_avg_mtp_loss: 3.8435 +[titan] 2025-10-04 22:55:18,908 - root - INFO - lr: 4.9999e-05 gnorm: 1.64 [ 0:21:10<1 day, 3:19:52] +[titan] 2025-10-04 22:55:23,513 - root - INFO - Dumping profiler traces at step 512 +[titan] 2025-10-04 22:55:23,549 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 22:55:30,219 - root - INFO - step: 515 loss: 4.3744 memory: 118.84GiB(85.28%) tps: 28,972 tflops: 401.94 mfu: 40.64% global_avg_ntp_loss: 0.5458 global_avg_mtp_loss: 3.8286 +[titan] 2025-10-04 22:55:30,219 - root - INFO - lr: 4.9999e-05 gnorm: 1.67 [ 0:21:22<1 day, 3:18:11] +[titan] 2025-10-04 22:55:41,134 - root - INFO - step: 520 loss: 4.3427 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.5439 global_avg_mtp_loss: 3.7988 +[titan] 2025-10-04 22:55:41,135 - root - INFO - lr: 4.9999e-05 gnorm: 2.16 [ 0:21:32<1 day, 3:16:02] +[titan] 2025-10-04 22:55:52,306 - root - INFO - step: 525 loss: 4.3706 memory: 118.84GiB(85.28%) tps: 29,331 tflops: 406.92 mfu: 41.14% global_avg_ntp_loss: 0.5472 global_avg_mtp_loss: 3.8234 +[titan] 2025-10-04 22:55:52,307 - root - INFO - lr: 4.9999e-05 gnorm: 1.88 [ 0:21:44<1 day, 3:14:15] +[titan] 2025-10-04 22:56:03,131 - root - INFO - step: 530 loss: 4.3726 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.5471 global_avg_mtp_loss: 3.8256 +[titan] 2025-10-04 22:56:03,131 - root - INFO - lr: 4.9999e-05 gnorm: 2.18 [ 0:21:54<1 day, 3:12:04] +[titan] 2025-10-04 22:56:13,930 - root - INFO - step: 535 loss: 4.4086 memory: 118.84GiB(85.28%) tps: 30,344 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.5498 global_avg_mtp_loss: 3.8588 +[titan] 2025-10-04 22:56:13,930 - root - INFO - lr: 4.9999e-05 gnorm: 1.95 [ 0:22:05<1 day, 3:09:53] +[titan] 2025-10-04 22:56:24,765 - root - INFO - step: 540 loss: 4.4155 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.5521 global_avg_mtp_loss: 3.8634 +[titan] 2025-10-04 22:56:24,765 - root - INFO - lr: 4.9999e-05 gnorm: 2.04 [ 0:22:16<1 day, 3:07:47] +[titan] 2025-10-04 22:56:35,621 - root - INFO - step: 545 loss: 4.3565 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.5455 global_avg_mtp_loss: 3.8109 +[titan] 2025-10-04 22:56:35,621 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:22:27<1 day, 3:05:45] +[titan] 2025-10-04 22:56:44,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:56:46,499 - root - INFO - step: 550 loss: 4.2924 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.5365 global_avg_mtp_loss: 3.7559 +[titan] 2025-10-04 22:56:46,499 - root - INFO - lr: 4.9998e-05 gnorm: 1.96 [ 0:22:38<1 day, 3:03:46] +[titan] 2025-10-04 22:56:57,360 - root - INFO - step: 555 loss: 4.3086 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.5367 global_avg_mtp_loss: 3.7719 +[titan] 2025-10-04 22:56:57,361 - root - INFO - lr: 4.9998e-05 gnorm: 1.94 [ 0:22:49<1 day, 3:01:48] +[titan] 2025-10-04 22:57:08,185 - root - INFO - step: 560 loss: 4.2981 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.5349 global_avg_mtp_loss: 3.7631 +[titan] 2025-10-04 22:57:08,185 - root - INFO - lr: 4.9998e-05 gnorm: 1.84 [ 0:22:59<1 day, 2:59:49] +[titan] 2025-10-04 22:57:19,007 - root - INFO - step: 565 loss: 4.3383 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7969 +[titan] 2025-10-04 22:57:19,007 - root - INFO - lr: 4.9998e-05 gnorm: 1.66 [ 0:23:10<1 day, 2:57:52] +[titan] 2025-10-04 22:57:29,825 - root - INFO - step: 570 loss: 4.3634 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.5450 global_avg_mtp_loss: 3.8184 +[titan] 2025-10-04 22:57:29,825 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:23:21<1 day, 2:55:57] +[titan] 2025-10-04 22:57:40,662 - root - INFO - step: 575 loss: 4.2261 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.5285 global_avg_mtp_loss: 3.6977 +[titan] 2025-10-04 22:57:40,663 - root - INFO - lr: 4.9998e-05 gnorm: 1.67 [ 0:23:32<1 day, 2:54:04] +[titan] 2025-10-04 22:57:51,566 - root - INFO - step: 580 loss: 4.2298 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.5294 global_avg_mtp_loss: 3.7005 +[titan] 2025-10-04 22:57:51,566 - root - INFO - lr: 4.9998e-05 gnorm: 1.98 [ 0:23:43<1 day, 2:52:18] +[titan] 2025-10-04 22:58:02,405 - root - INFO - step: 585 loss: 4.3315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7902 +[titan] 2025-10-04 22:58:02,405 - root - INFO - lr: 4.9998e-05 gnorm: 1.72 [ 0:23:54<1 day, 2:50:30] +[titan] 2025-10-04 22:58:13,269 - root - INFO - step: 590 loss: 4.2600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.5322 global_avg_mtp_loss: 3.7278 +[titan] 2025-10-04 22:58:13,270 - root - INFO - lr: 4.9997e-05 gnorm: 1.95 [ 0:24:05<1 day, 2:48:44] +[titan] 2025-10-04 22:58:24,105 - root - INFO - step: 595 loss: 4.1808 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.5216 global_avg_mtp_loss: 3.6592 +[titan] 2025-10-04 22:58:24,105 - root - INFO - lr: 4.9997e-05 gnorm: 1.65 [ 0:24:15<1 day, 2:46:59] +[titan] 2025-10-04 22:58:32,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:58:34,964 - root - INFO - step: 600 loss: 4.1976 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.5240 global_avg_mtp_loss: 3.6736 +[titan] 2025-10-04 22:58:34,964 - root - INFO - lr: 4.9997e-05 gnorm: 1.83 [ 0:24:26<1 day, 2:45:16] +[titan] 2025-10-04 22:58:45,870 - root - INFO - step: 605 loss: 4.3159 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.5391 global_avg_mtp_loss: 3.7769 +[titan] 2025-10-04 22:58:45,870 - root - INFO - lr: 4.9997e-05 gnorm: 1.87 [ 0:24:37<1 day, 2:43:38] +[titan] 2025-10-04 22:58:56,733 - root - INFO - step: 610 loss: 4.1166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.5131 global_avg_mtp_loss: 3.6035 +[titan] 2025-10-04 22:58:56,733 - root - INFO - lr: 4.9997e-05 gnorm: 1.62 [ 0:24:48<1 day, 2:41:59] +[titan] 2025-10-04 22:59:07,585 - root - INFO - step: 615 loss: 4.2340 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.5275 global_avg_mtp_loss: 3.7065 +[titan] 2025-10-04 22:59:07,585 - root - INFO - lr: 4.9997e-05 gnorm: 1.88 [ 0:24:59<1 day, 2:40:20] +[titan] 2025-10-04 22:59:18,424 - root - INFO - step: 620 loss: 4.2004 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5249 global_avg_mtp_loss: 3.6756 +[titan] 2025-10-04 22:59:18,424 - root - INFO - lr: 4.9997e-05 gnorm: 1.91 [ 0:25:10<1 day, 2:38:42] +[titan] 2025-10-04 22:59:29,245 - root - INFO - step: 625 loss: 4.2113 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.5247 global_avg_mtp_loss: 3.6866 +[titan] 2025-10-04 22:59:29,245 - root - INFO - lr: 4.9996e-05 gnorm: 1.62 [ 0:25:21<1 day, 2:37:04] +[titan] 2025-10-04 22:59:40,085 - root - INFO - step: 630 loss: 4.1954 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.5210 global_avg_mtp_loss: 3.6745 +[titan] 2025-10-04 22:59:40,085 - root - INFO - lr: 4.9996e-05 gnorm: 1.68 [ 0:25:31<1 day, 2:35:29] +[titan] 2025-10-04 22:59:51,004 - root - INFO - step: 635 loss: 4.0965 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.5096 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 22:59:51,004 - root - INFO - lr: 4.9996e-05 gnorm: 1.82 [ 0:25:42<1 day, 2:34:00] +[titan] 2025-10-04 23:00:01,832 - root - INFO - step: 640 loss: 4.2067 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.5236 global_avg_mtp_loss: 3.6831 +[titan] 2025-10-04 23:00:01,832 - root - INFO - lr: 4.9996e-05 gnorm: 1.87 [ 0:25:53<1 day, 2:32:27] +[titan] 2025-10-04 23:00:12,683 - root - INFO - step: 645 loss: 4.0562 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.5030 global_avg_mtp_loss: 3.5532 +[titan] 2025-10-04 23:00:12,683 - root - INFO - lr: 4.9996e-05 gnorm: 1.73 [ 0:26:04<1 day, 2:30:56] +[titan] 2025-10-04 23:00:21,312 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:00:23,493 - root - INFO - step: 650 loss: 4.1298 memory: 118.84GiB(85.28%) tps: 30,314 tflops: 420.56 mfu: 42.52% global_avg_ntp_loss: 0.5128 global_avg_mtp_loss: 3.6170 +[titan] 2025-10-04 23:00:23,493 - root - INFO - lr: 4.9996e-05 gnorm: 1.75 [ 0:26:15<1 day, 2:29:24] +[titan] 2025-10-04 23:00:34,283 - root - INFO - step: 655 loss: 4.0941 memory: 118.84GiB(85.28%) tps: 30,369 tflops: 421.33 mfu: 42.60% global_avg_ntp_loss: 0.5089 global_avg_mtp_loss: 3.5852 +[titan] 2025-10-04 23:00:34,283 - root - INFO - lr: 4.9995e-05 gnorm: 1.70 [ 0:26:26<1 day, 2:27:52] +[titan] 2025-10-04 23:00:45,102 - root - INFO - step: 660 loss: 4.1313 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.5130 global_avg_mtp_loss: 3.6184 +[titan] 2025-10-04 23:00:45,102 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:36<1 day, 2:26:23] +[titan] 2025-10-04 23:00:55,946 - root - INFO - step: 665 loss: 4.1367 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5147 global_avg_mtp_loss: 3.6220 +[titan] 2025-10-04 23:00:55,946 - root - INFO - lr: 4.9995e-05 gnorm: 1.99 [ 0:26:47<1 day, 2:24:57] +[titan] 2025-10-04 23:01:06,742 - root - INFO - step: 670 loss: 4.0904 memory: 118.84GiB(85.28%) tps: 30,352 tflops: 421.09 mfu: 42.58% global_avg_ntp_loss: 0.5075 global_avg_mtp_loss: 3.5829 +[titan] 2025-10-04 23:01:06,743 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:58<1 day, 2:23:29] +[titan] 2025-10-04 23:01:17,585 - root - INFO - step: 675 loss: 4.0638 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.5042 global_avg_mtp_loss: 3.5596 +[titan] 2025-10-04 23:01:17,585 - root - INFO - lr: 4.9995e-05 gnorm: 2.15 [ 0:27:09<1 day, 2:22:05] +[titan] 2025-10-04 23:01:28,410 - root - INFO - step: 680 loss: 4.0064 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4976 global_avg_mtp_loss: 3.5088 +[titan] 2025-10-04 23:01:28,410 - root - INFO - lr: 4.9994e-05 gnorm: 1.81 [ 0:27:20<1 day, 2:20:41] +[titan] 2025-10-04 23:01:39,214 - root - INFO - step: 685 loss: 4.1427 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.5134 global_avg_mtp_loss: 3.6293 +[titan] 2025-10-04 23:01:39,214 - root - INFO - lr: 4.9994e-05 gnorm: 1.69 [ 0:27:30<1 day, 2:19:17] +[titan] 2025-10-04 23:01:50,056 - root - INFO - step: 690 loss: 4.0571 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.5019 global_avg_mtp_loss: 3.5553 +[titan] 2025-10-04 23:01:50,056 - root - INFO - lr: 4.9994e-05 gnorm: 1.63 [ 0:27:41<1 day, 2:17:56] +[titan] 2025-10-04 23:02:00,900 - root - INFO - step: 695 loss: 4.0380 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5011 global_avg_mtp_loss: 3.5369 +[titan] 2025-10-04 23:02:00,900 - root - INFO - lr: 4.9994e-05 gnorm: 1.77 [ 0:27:52<1 day, 2:16:36] +[titan] 2025-10-04 23:02:09,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:02:11,753 - root - INFO - step: 700 loss: 4.0879 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.5070 global_avg_mtp_loss: 3.5810 +[titan] 2025-10-04 23:02:11,753 - root - INFO - lr: 4.9994e-05 gnorm: 1.96 [ 0:28:03<1 day, 2:15:18] +[titan] 2025-10-04 23:02:22,605 - root - INFO - step: 705 loss: 4.0241 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4988 global_avg_mtp_loss: 3.5252 +[titan] 2025-10-04 23:02:22,605 - root - INFO - lr: 4.9993e-05 gnorm: 1.83 [ 0:28:14<1 day, 2:14:00] +[titan] 2025-10-04 23:02:33,405 - root - INFO - step: 710 loss: 4.0903 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.5058 global_avg_mtp_loss: 3.5844 +[titan] 2025-10-04 23:02:33,405 - root - INFO - lr: 4.9993e-05 gnorm: 1.64 [ 0:28:25<1 day, 2:12:41] +[titan] 2025-10-04 23:02:44,244 - root - INFO - step: 715 loss: 4.0535 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5015 global_avg_mtp_loss: 3.5520 +[titan] 2025-10-04 23:02:44,244 - root - INFO - lr: 4.9993e-05 gnorm: 1.50 [ 0:28:36<1 day, 2:11:24] +[titan] 2025-10-04 23:02:55,077 - root - INFO - step: 720 loss: 4.0093 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.4957 global_avg_mtp_loss: 3.5137 +[titan] 2025-10-04 23:02:55,077 - root - INFO - lr: 4.9993e-05 gnorm: 1.58 [ 0:28:46<1 day, 2:10:09] +[titan] 2025-10-04 23:03:05,902 - root - INFO - step: 725 loss: 3.9529 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4894 global_avg_mtp_loss: 3.4635 +[titan] 2025-10-04 23:03:05,902 - root - INFO - lr: 4.9992e-05 gnorm: 1.53 [ 0:28:57<1 day, 2:08:53] +[titan] 2025-10-04 23:03:16,765 - root - INFO - step: 730 loss: 3.9701 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.4916 global_avg_mtp_loss: 3.4785 +[titan] 2025-10-04 23:03:16,765 - root - INFO - lr: 4.9992e-05 gnorm: 1.57 [ 0:29:08<1 day, 2:07:41] +[titan] 2025-10-04 23:03:27,585 - root - INFO - step: 735 loss: 4.0191 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.4982 global_avg_mtp_loss: 3.5209 +[titan] 2025-10-04 23:03:27,585 - root - INFO - lr: 4.9992e-05 gnorm: 1.59 [ 0:29:19<1 day, 2:06:27] +[titan] 2025-10-04 23:03:38,404 - root - INFO - step: 740 loss: 3.9770 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.4912 global_avg_mtp_loss: 3.4857 +[titan] 2025-10-04 23:03:38,404 - root - INFO - lr: 4.9992e-05 gnorm: 1.61 [ 0:29:30<1 day, 2:05:14] +[titan] 2025-10-04 23:03:49,265 - root - INFO - step: 745 loss: 4.0755 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.5054 global_avg_mtp_loss: 3.5701 +[titan] 2025-10-04 23:03:49,265 - root - INFO - lr: 4.9992e-05 gnorm: 1.52 [ 0:29:41<1 day, 2:04:05] +[titan] 2025-10-04 23:03:57,894 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:04:00,081 - root - INFO - step: 750 loss: 3.9375 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.4868 global_avg_mtp_loss: 3.4508 +[titan] 2025-10-04 23:04:00,081 - root - INFO - lr: 4.9991e-05 gnorm: 1.67 [ 0:29:51<1 day, 2:02:53] +[titan] 2025-10-04 23:04:10,923 - root - INFO - step: 755 loss: 4.0060 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.4974 global_avg_mtp_loss: 3.5087 +[titan] 2025-10-04 23:04:10,923 - root - INFO - lr: 4.9991e-05 gnorm: 1.62 [ 0:30:02<1 day, 2:01:44] +[titan] 2025-10-04 23:04:21,765 - root - INFO - step: 760 loss: 3.9826 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.4928 global_avg_mtp_loss: 3.4897 +[titan] 2025-10-04 23:04:21,765 - root - INFO - lr: 4.9991e-05 gnorm: 1.57 [ 0:30:13<1 day, 2:00:35] +[titan] 2025-10-04 23:04:32,624 - root - INFO - step: 765 loss: 3.9503 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4871 global_avg_mtp_loss: 3.4633 +[titan] 2025-10-04 23:04:32,625 - root - INFO - lr: 4.9991e-05 gnorm: 1.73 [ 0:30:24<1 day, 1:59:28] +[titan] 2025-10-04 23:04:43,499 - root - INFO - step: 770 loss: 4.0928 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.5059 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 23:04:43,499 - root - INFO - lr: 4.9990e-05 gnorm: 1.68 [ 0:30:35<1 day, 1:58:23] +[titan] 2025-10-04 23:04:54,364 - root - INFO - step: 775 loss: 4.0138 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4966 global_avg_mtp_loss: 3.5172 +[titan] 2025-10-04 23:04:54,364 - root - INFO - lr: 4.9990e-05 gnorm: 1.84 [ 0:30:46<1 day, 1:57:18] +[titan] 2025-10-04 23:05:05,165 - root - INFO - step: 780 loss: 3.9609 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.4878 global_avg_mtp_loss: 3.4731 +[titan] 2025-10-04 23:05:05,165 - root - INFO - lr: 4.9990e-05 gnorm: 1.66 [ 0:30:56<1 day, 1:56:10] +[titan] 2025-10-04 23:05:16,001 - root - INFO - step: 785 loss: 4.0392 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.5003 global_avg_mtp_loss: 3.5389 +[titan] 2025-10-04 23:05:16,002 - root - INFO - lr: 4.9989e-05 gnorm: 1.74 [ 0:31:07<1 day, 1:55:05] +[titan] 2025-10-04 23:05:26,809 - root - INFO - step: 790 loss: 3.9123 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.4820 global_avg_mtp_loss: 3.4303 +[titan] 2025-10-04 23:05:26,809 - root - INFO - lr: 4.9989e-05 gnorm: 1.71 [ 0:31:18<1 day, 1:53:59] +[titan] 2025-10-04 23:05:37,659 - root - INFO - step: 795 loss: 3.9513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.4870 global_avg_mtp_loss: 3.4643 +[titan] 2025-10-04 23:05:37,659 - root - INFO - lr: 4.9989e-05 gnorm: 1.57 [ 0:31:29<1 day, 1:52:55] +[titan] 2025-10-04 23:05:46,349 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:05:48,524 - root - INFO - step: 800 loss: 3.8805 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4013 +[titan] 2025-10-04 23:05:48,524 - root - INFO - lr: 4.9989e-05 gnorm: 1.63 [ 0:31:40<1 day, 1:51:54] +[titan] 2025-10-04 23:05:59,423 - root - INFO - step: 805 loss: 4.0567 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.5041 global_avg_mtp_loss: 3.5527 +[titan] 2025-10-04 23:05:59,424 - root - INFO - lr: 4.9988e-05 gnorm: 1.65 [ 0:31:51<1 day, 1:50:54] +[titan] 2025-10-04 23:06:10,267 - root - INFO - step: 810 loss: 3.9384 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4865 global_avg_mtp_loss: 3.4519 +[titan] 2025-10-04 23:06:10,267 - root - INFO - lr: 4.9988e-05 gnorm: 1.62 [ 0:32:02<1 day, 1:49:53] +[titan] 2025-10-04 23:06:21,120 - root - INFO - step: 815 loss: 3.9402 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.4841 global_avg_mtp_loss: 3.4561 +[titan] 2025-10-04 23:06:21,120 - root - INFO - lr: 4.9988e-05 gnorm: 1.83 [ 0:32:12<1 day, 1:48:52] +[titan] 2025-10-04 23:06:31,962 - root - INFO - step: 820 loss: 3.8907 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.4804 global_avg_mtp_loss: 3.4102 +[titan] 2025-10-04 23:06:31,962 - root - INFO - lr: 4.9987e-05 gnorm: 1.56 [ 0:32:23<1 day, 1:47:52] +[titan] 2025-10-04 23:06:42,804 - root - INFO - step: 825 loss: 3.9391 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.4866 global_avg_mtp_loss: 3.4525 +[titan] 2025-10-04 23:06:42,804 - root - INFO - lr: 4.9987e-05 gnorm: 1.73 [ 0:32:34<1 day, 1:46:52] +[titan] 2025-10-04 23:06:53,697 - root - INFO - step: 830 loss: 3.8534 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.4757 global_avg_mtp_loss: 3.3777 +[titan] 2025-10-04 23:06:53,697 - root - INFO - lr: 4.9987e-05 gnorm: 1.46 [ 0:32:45<1 day, 1:45:55] +[titan] 2025-10-04 23:07:04,599 - root - INFO - step: 835 loss: 3.9680 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.4909 global_avg_mtp_loss: 3.4770 +[titan] 2025-10-04 23:07:04,599 - root - INFO - lr: 4.9987e-05 gnorm: 1.69 [ 0:32:56<1 day, 1:44:59] +[titan] 2025-10-04 23:07:15,482 - root - INFO - step: 840 loss: 3.8804 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4793 global_avg_mtp_loss: 3.4011 +[titan] 2025-10-04 23:07:15,482 - root - INFO - lr: 4.9986e-05 gnorm: 1.65 [ 0:33:07<1 day, 1:44:03] +[titan] 2025-10-04 23:07:26,345 - root - INFO - step: 845 loss: 3.9335 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.4859 global_avg_mtp_loss: 3.4476 +[titan] 2025-10-04 23:07:26,345 - root - INFO - lr: 4.9986e-05 gnorm: 1.67 [ 0:33:18<1 day, 1:43:06] +[titan] 2025-10-04 23:07:35,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:07:37,166 - root - INFO - step: 850 loss: 3.9466 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.4899 global_avg_mtp_loss: 3.4568 +[titan] 2025-10-04 23:07:37,166 - root - INFO - lr: 4.9986e-05 gnorm: 1.53 [ 0:33:28<1 day, 1:42:08] +[titan] 2025-10-04 23:07:48,038 - root - INFO - step: 855 loss: 3.8553 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3805 +[titan] 2025-10-04 23:07:48,038 - root - INFO - lr: 4.9985e-05 gnorm: 1.54 [ 0:33:39<1 day, 1:41:13] +[titan] 2025-10-04 23:07:58,950 - root - INFO - step: 860 loss: 3.9192 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.12% global_avg_ntp_loss: 0.4837 global_avg_mtp_loss: 3.4355 +[titan] 2025-10-04 23:07:58,951 - root - INFO - lr: 4.9985e-05 gnorm: 1.63 [ 0:33:50<1 day, 1:40:20] +[titan] 2025-10-04 23:08:09,863 - root - INFO - step: 865 loss: 3.8398 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.4747 global_avg_mtp_loss: 3.3651 +[titan] 2025-10-04 23:08:09,863 - root - INFO - lr: 4.9985e-05 gnorm: 1.57 [ 0:34:01<1 day, 1:39:28] +[titan] 2025-10-04 23:08:20,763 - root - INFO - step: 870 loss: 3.9660 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.4876 global_avg_mtp_loss: 3.4784 +[titan] 2025-10-04 23:08:20,763 - root - INFO - lr: 4.9984e-05 gnorm: 1.70 [ 0:34:12<1 day, 1:38:36] +[titan] 2025-10-04 23:08:31,644 - root - INFO - step: 875 loss: 3.8236 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4708 global_avg_mtp_loss: 3.3528 +[titan] 2025-10-04 23:08:31,644 - root - INFO - lr: 4.9984e-05 gnorm: 1.58 [ 0:34:23<1 day, 1:37:43] +[titan] 2025-10-04 23:08:42,521 - root - INFO - step: 880 loss: 3.8393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4731 global_avg_mtp_loss: 3.3662 +[titan] 2025-10-04 23:08:42,522 - root - INFO - lr: 4.9984e-05 gnorm: 1.66 [ 0:34:34<1 day, 1:36:51] +[titan] 2025-10-04 23:08:53,411 - root - INFO - step: 885 loss: 3.9181 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4832 global_avg_mtp_loss: 3.4349 +[titan] 2025-10-04 23:08:53,412 - root - INFO - lr: 4.9983e-05 gnorm: 1.81 [ 0:34:45<1 day, 1:35:59] +[titan] 2025-10-04 23:09:04,287 - root - INFO - step: 890 loss: 3.8540 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4756 global_avg_mtp_loss: 3.3784 +[titan] 2025-10-04 23:09:04,287 - root - INFO - lr: 4.9983e-05 gnorm: 1.63 [ 0:34:56<1 day, 1:35:08] +[titan] 2025-10-04 23:09:15,149 - root - INFO - step: 895 loss: 3.7956 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.4646 global_avg_mtp_loss: 3.3310 +[titan] 2025-10-04 23:09:15,149 - root - INFO - lr: 4.9983e-05 gnorm: 1.59 [ 0:35:06<1 day, 1:34:16] +[titan] 2025-10-04 23:09:23,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:09:26,036 - root - INFO - step: 900 loss: 3.8814 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4023 +[titan] 2025-10-04 23:09:26,036 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:17<1 day, 1:33:26] +[titan] 2025-10-04 23:09:36,928 - root - INFO - step: 905 loss: 3.8547 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3797 +[titan] 2025-10-04 23:09:36,928 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:28<1 day, 1:32:36] +[titan] 2025-10-04 23:09:47,795 - root - INFO - step: 910 loss: 3.7503 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4614 global_avg_mtp_loss: 3.2890 +[titan] 2025-10-04 23:09:47,795 - root - INFO - lr: 4.9982e-05 gnorm: 1.63 [ 0:35:39<1 day, 1:31:46] +[titan] 2025-10-04 23:09:58,664 - root - INFO - step: 915 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3219 +[titan] 2025-10-04 23:09:58,664 - root - INFO - lr: 4.9981e-05 gnorm: 1.57 [ 0:35:50<1 day, 1:30:56] +[titan] 2025-10-04 23:10:09,537 - root - INFO - step: 920 loss: 3.8477 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.4753 global_avg_mtp_loss: 3.3723 +[titan] 2025-10-04 23:10:09,537 - root - INFO - lr: 4.9981e-05 gnorm: 1.56 [ 0:36:01<1 day, 1:30:07] +[titan] 2025-10-04 23:10:20,420 - root - INFO - step: 925 loss: 3.8141 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3439 +[titan] 2025-10-04 23:10:20,420 - root - INFO - lr: 4.9980e-05 gnorm: 1.53 [ 0:36:12<1 day, 1:29:19] +[titan] 2025-10-04 23:10:31,298 - root - INFO - step: 930 loss: 3.8185 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3482 +[titan] 2025-10-04 23:10:31,298 - root - INFO - lr: 4.9980e-05 gnorm: 1.56 [ 0:36:23<1 day, 1:28:31] +[titan] 2025-10-04 23:10:42,186 - root - INFO - step: 935 loss: 3.7234 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.4574 global_avg_mtp_loss: 3.2661 +[titan] 2025-10-04 23:10:42,186 - root - INFO - lr: 4.9980e-05 gnorm: 1.52 [ 0:36:33<1 day, 1:27:44] +[titan] 2025-10-04 23:10:53,053 - root - INFO - step: 940 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4666 global_avg_mtp_loss: 3.3211 +[titan] 2025-10-04 23:10:53,053 - root - INFO - lr: 4.9979e-05 gnorm: 1.69 [ 0:36:44<1 day, 1:26:56] +[titan] 2025-10-04 23:11:03,935 - root - INFO - step: 945 loss: 3.7815 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.4635 global_avg_mtp_loss: 3.3180 +[titan] 2025-10-04 23:11:03,935 - root - INFO - lr: 4.9979e-05 gnorm: 1.45 [ 0:36:55<1 day, 1:26:09] +[titan] 2025-10-04 23:11:12,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:11:14,787 - root - INFO - step: 950 loss: 3.8345 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4716 global_avg_mtp_loss: 3.3629 +[titan] 2025-10-04 23:11:14,787 - root - INFO - lr: 4.9979e-05 gnorm: 1.54 [ 0:37:06<1 day, 1:25:22] +[titan] 2025-10-04 23:11:25,662 - root - INFO - step: 955 loss: 3.7153 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.4570 global_avg_mtp_loss: 3.2583 +[titan] 2025-10-04 23:11:25,662 - root - INFO - lr: 4.9978e-05 gnorm: 1.40 [ 0:37:17<1 day, 1:24:36] +[titan] 2025-10-04 23:11:36,506 - root - INFO - step: 960 loss: 3.7474 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4595 global_avg_mtp_loss: 3.2878 +[titan] 2025-10-04 23:11:36,506 - root - INFO - lr: 4.9978e-05 gnorm: 1.39 [ 0:37:28<1 day, 1:23:49] +[titan] 2025-10-04 23:11:47,428 - root - INFO - step: 965 loss: 3.7469 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.4597 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:11:47,429 - root - INFO - lr: 4.9977e-05 gnorm: 1.60 [ 0:37:39<1 day, 1:23:05] +[titan] 2025-10-04 23:11:58,339 - root - INFO - step: 970 loss: 3.7767 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.4638 global_avg_mtp_loss: 3.3129 +[titan] 2025-10-04 23:11:58,339 - root - INFO - lr: 4.9977e-05 gnorm: 1.59 [ 0:37:50<1 day, 1:22:21] +[titan] 2025-10-04 23:12:09,214 - root - INFO - step: 975 loss: 3.7198 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4568 global_avg_mtp_loss: 3.2630 +[titan] 2025-10-04 23:12:09,214 - root - INFO - lr: 4.9977e-05 gnorm: 1.44 [ 0:38:00<1 day, 1:21:36] +[titan] 2025-10-04 23:12:20,081 - root - INFO - step: 980 loss: 3.7702 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4623 global_avg_mtp_loss: 3.3079 +[titan] 2025-10-04 23:12:20,081 - root - INFO - lr: 4.9976e-05 gnorm: 1.42 [ 0:38:11<1 day, 1:20:52] +[titan] 2025-10-04 23:12:30,946 - root - INFO - step: 985 loss: 3.8212 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3515 +[titan] 2025-10-04 23:12:30,947 - root - INFO - lr: 4.9976e-05 gnorm: 1.39 [ 0:38:22<1 day, 1:20:07] +[titan] 2025-10-04 23:12:41,799 - root - INFO - step: 990 loss: 3.7716 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.4659 global_avg_mtp_loss: 3.3057 +[titan] 2025-10-04 23:12:41,799 - root - INFO - lr: 4.9975e-05 gnorm: 1.50 [ 0:38:33<1 day, 1:19:23] +[titan] 2025-10-04 23:12:52,700 - root - INFO - step: 995 loss: 3.8144 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3447 +[titan] 2025-10-04 23:12:52,701 - root - INFO - lr: 4.9975e-05 gnorm: 1.47 [ 0:38:44<1 day, 1:18:40] +[titan] 2025-10-04 23:13:01,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:13:03,568 - root - INFO - step: 1000 loss: 3.6411 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4473 global_avg_mtp_loss: 3.1938 +[titan] 2025-10-04 23:13:03,569 - root - INFO - lr: 4.9974e-05 gnorm: 1.70 [ 0:38:55<1 day, 1:17:57] +[titan] 2025-10-04 23:13:14,441 - root - INFO - step: 1005 loss: 3.7872 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4636 global_avg_mtp_loss: 3.3236 +[titan] 2025-10-04 23:13:14,442 - root - INFO - lr: 4.9974e-05 gnorm: 1.62 [ 0:39:06<1 day, 1:17:14] +[titan] 2025-10-04 23:13:25,308 - root - INFO - step: 1010 loss: 3.8240 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4681 global_avg_mtp_loss: 3.3560 +[titan] 2025-10-04 23:13:25,308 - root - INFO - lr: 4.9974e-05 gnorm: 1.51 [ 0:39:17<1 day, 1:16:31] +[titan] 2025-10-04 23:13:36,156 - root - INFO - step: 1015 loss: 3.7026 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.4566 global_avg_mtp_loss: 3.2461 +[titan] 2025-10-04 23:13:36,157 - root - INFO - lr: 4.9973e-05 gnorm: 1.61 [ 0:39:27<1 day, 1:15:48] +[titan] 2025-10-04 23:13:47,024 - root - INFO - step: 1020 loss: 3.8204 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4698 global_avg_mtp_loss: 3.3506 +[titan] 2025-10-04 23:13:47,025 - root - INFO - lr: 4.9973e-05 gnorm: 1.58 [ 0:39:38<1 day, 1:15:06] +[titan] 2025-10-04 23:13:55,946 - root - INFO - Dumping profiler traces at step 1024 +[titan] 2025-10-04 23:13:55,982 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:13:58,196 - root - INFO - step: 1025 loss: 3.7098 memory: 118.84GiB(85.28%) tps: 29,332 tflops: 406.94 mfu: 41.15% global_avg_ntp_loss: 0.4550 global_avg_mtp_loss: 3.2548 +[titan] 2025-10-04 23:13:58,196 - root - INFO - lr: 4.9972e-05 gnorm: 1.53 [ 0:39:49<1 day, 1:14:35] +[titan] 2025-10-04 23:14:09,055 - root - INFO - step: 1030 loss: 3.6684 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4505 global_avg_mtp_loss: 3.2179 +[titan] 2025-10-04 23:14:09,056 - root - INFO - lr: 4.9972e-05 gnorm: 1.49 [ 0:40:00<1 day, 1:13:54] +[titan] 2025-10-04 23:14:19,917 - root - INFO - step: 1035 loss: 3.7778 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4632 global_avg_mtp_loss: 3.3146 +[titan] 2025-10-04 23:14:19,917 - root - INFO - lr: 4.9971e-05 gnorm: 1.64 [ 0:40:11<1 day, 1:13:12] +[titan] 2025-10-04 23:14:30,784 - root - INFO - step: 1040 loss: 3.7600 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4596 global_avg_mtp_loss: 3.3004 +[titan] 2025-10-04 23:14:30,784 - root - INFO - lr: 4.9971e-05 gnorm: 1.73 [ 0:40:22<1 day, 1:12:31] +[titan] 2025-10-04 23:14:41,642 - root - INFO - step: 1045 loss: 3.7970 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3312 +[titan] 2025-10-04 23:14:41,642 - root - INFO - lr: 4.9970e-05 gnorm: 1.60 [ 0:40:33<1 day, 1:11:50] +[titan] 2025-10-04 23:14:50,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:14:52,527 - root - INFO - step: 1050 loss: 3.7607 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.4629 global_avg_mtp_loss: 3.2979 +[titan] 2025-10-04 23:14:52,527 - root - INFO - lr: 4.9970e-05 gnorm: 1.86 [ 0:40:44<1 day, 1:11:10] +[titan] 2025-10-04 23:15:03,398 - root - INFO - step: 1055 loss: 3.6921 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.4533 global_avg_mtp_loss: 3.2388 +[titan] 2025-10-04 23:15:03,398 - root - INFO - lr: 4.9970e-05 gnorm: 1.59 [ 0:40:55<1 day, 1:10:30] +[titan] 2025-10-04 23:15:14,306 - root - INFO - step: 1060 loss: 3.7138 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.4561 global_avg_mtp_loss: 3.2577 +[titan] 2025-10-04 23:15:14,306 - root - INFO - lr: 4.9969e-05 gnorm: 1.89 [ 0:41:06<1 day, 1:09:52] +[titan] 2025-10-04 23:15:25,186 - root - INFO - step: 1065 loss: 3.7455 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4584 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:15:25,186 - root - INFO - lr: 4.9969e-05 gnorm: 1.72 [ 0:41:16<1 day, 1:09:13] +[titan] 2025-10-04 23:15:36,061 - root - INFO - step: 1070 loss: 3.6510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4474 global_avg_mtp_loss: 3.2036 +[titan] 2025-10-04 23:15:36,061 - root - INFO - lr: 4.9968e-05 gnorm: 1.70 [ 0:41:27<1 day, 1:08:34] +[titan] 2025-10-04 23:15:46,950 - root - INFO - step: 1075 loss: 3.7757 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4630 global_avg_mtp_loss: 3.3127 +[titan] 2025-10-04 23:15:46,950 - root - INFO - lr: 4.9968e-05 gnorm: 1.53 [ 0:41:38<1 day, 1:07:55] +[titan] 2025-10-04 23:15:57,821 - root - INFO - step: 1080 loss: 3.6997 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4542 global_avg_mtp_loss: 3.2455 +[titan] 2025-10-04 23:15:57,821 - root - INFO - lr: 4.9967e-05 gnorm: 1.40 [ 0:41:49<1 day, 1:07:17] +[titan] 2025-10-04 23:16:08,691 - root - INFO - step: 1085 loss: 3.7768 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.4652 global_avg_mtp_loss: 3.3116 +[titan] 2025-10-04 23:16:08,691 - root - INFO - lr: 4.9967e-05 gnorm: 1.71 [ 0:42:00<1 day, 1:06:38] +[titan] 2025-10-04 23:16:19,625 - root - INFO - step: 1090 loss: 3.7891 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.4653 global_avg_mtp_loss: 3.3238 +[titan] 2025-10-04 23:16:19,625 - root - INFO - lr: 4.9966e-05 gnorm: 1.32 [ 0:42:11<1 day, 1:06:02] +[titan] 2025-10-04 23:16:30,524 - root - INFO - step: 1095 loss: 3.6348 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.4440 global_avg_mtp_loss: 3.1907 +[titan] 2025-10-04 23:16:30,525 - root - INFO - lr: 4.9966e-05 gnorm: 1.55 [ 0:42:22<1 day, 1:05:25] +[titan] 2025-10-04 23:16:39,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:16:41,421 - root - INFO - step: 1100 loss: 3.7357 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.4573 global_avg_mtp_loss: 3.2785 +[titan] 2025-10-04 23:16:41,422 - root - INFO - lr: 4.9965e-05 gnorm: 1.50 [ 0:42:33<1 day, 1:04:48] +[titan] 2025-10-04 23:16:52,335 - root - INFO - step: 1105 loss: 3.6253 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1800 +[titan] 2025-10-04 23:16:52,335 - root - INFO - lr: 4.9965e-05 gnorm: 1.52 [ 0:42:44<1 day, 1:04:12] +[titan] 2025-10-04 23:17:03,265 - root - INFO - step: 1110 loss: 3.6786 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.4500 global_avg_mtp_loss: 3.2285 +[titan] 2025-10-04 23:17:03,266 - root - INFO - lr: 4.9964e-05 gnorm: 1.41 [ 0:42:55<1 day, 1:03:37] +[titan] 2025-10-04 23:17:14,175 - root - INFO - step: 1115 loss: 3.6578 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.4465 global_avg_mtp_loss: 3.2112 +[titan] 2025-10-04 23:17:14,175 - root - INFO - lr: 4.9964e-05 gnorm: 1.35 [ 0:43:05<1 day, 1:03:02] +[titan] 2025-10-04 23:17:25,067 - root - INFO - step: 1120 loss: 3.6849 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.4511 global_avg_mtp_loss: 3.2339 +[titan] 2025-10-04 23:17:25,067 - root - INFO - lr: 4.9963e-05 gnorm: 1.51 [ 0:43:16<1 day, 1:02:26] +[titan] 2025-10-04 23:17:35,980 - root - INFO - step: 1125 loss: 3.6812 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.4516 global_avg_mtp_loss: 3.2296 +[titan] 2025-10-04 23:17:35,980 - root - INFO - lr: 4.9963e-05 gnorm: 1.53 [ 0:43:27<1 day, 1:01:51] +[titan] 2025-10-04 23:17:46,863 - root - INFO - step: 1130 loss: 3.6167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4428 global_avg_mtp_loss: 3.1739 +[titan] 2025-10-04 23:17:46,863 - root - INFO - lr: 4.9962e-05 gnorm: 1.69 [ 0:43:38<1 day, 1:01:15] +[titan] 2025-10-04 23:17:57,754 - root - INFO - step: 1135 loss: 3.5668 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.4385 global_avg_mtp_loss: 3.1284 +[titan] 2025-10-04 23:17:57,754 - root - INFO - lr: 4.9962e-05 gnorm: 1.44 [ 0:43:49<1 day, 1:00:39] +[titan] 2025-10-04 23:18:08,676 - root - INFO - step: 1140 loss: 3.6958 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.4522 global_avg_mtp_loss: 3.2436 +[titan] 2025-10-04 23:18:08,676 - root - INFO - lr: 4.9961e-05 gnorm: 1.51 [ 0:44:00<1 day, 1:00:05] +[titan] 2025-10-04 23:18:19,548 - root - INFO - step: 1145 loss: 3.7386 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.4725 global_avg_mtp_loss: 3.2662 +[titan] 2025-10-04 23:18:19,548 - root - INFO - lr: 4.9961e-05 gnorm: 1.52 [ 0:44:11<1 day, 0:59:29] +[titan] 2025-10-04 23:18:28,249 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:18:30,436 - root - INFO - step: 1150 loss: 3.6554 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.4491 global_avg_mtp_loss: 3.2063 +[titan] 2025-10-04 23:18:30,436 - root - INFO - lr: 4.9960e-05 gnorm: 1.51 [ 0:44:22<1 day, 0:58:54] +[titan] 2025-10-04 23:18:41,365 - root - INFO - step: 1155 loss: 3.6986 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.4535 global_avg_mtp_loss: 3.2451 +[titan] 2025-10-04 23:18:41,365 - root - INFO - lr: 4.9960e-05 gnorm: 1.49 [ 0:44:33<1 day, 0:58:21] +[titan] 2025-10-04 23:18:52,242 - root - INFO - step: 1160 loss: 3.6068 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4415 global_avg_mtp_loss: 3.1653 +[titan] 2025-10-04 23:18:52,243 - root - INFO - lr: 4.9959e-05 gnorm: 1.49 [ 0:44:43<1 day, 0:57:46] +[titan] 2025-10-04 23:19:03,171 - root - INFO - step: 1165 loss: 3.5931 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.4398 global_avg_mtp_loss: 3.1533 +[titan] 2025-10-04 23:19:03,171 - root - INFO - lr: 4.9958e-05 gnorm: 1.54 [ 0:44:54<1 day, 0:57:13] +[titan] 2025-10-04 23:19:14,054 - root - INFO - step: 1170 loss: 3.6446 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4453 global_avg_mtp_loss: 3.1993 +[titan] 2025-10-04 23:19:14,054 - root - INFO - lr: 4.9958e-05 gnorm: 1.49 [ 0:45:05<1 day, 0:56:39] +[titan] 2025-10-04 23:19:24,934 - root - INFO - step: 1175 loss: 3.6211 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1757 +[titan] 2025-10-04 23:19:24,934 - root - INFO - lr: 4.9957e-05 gnorm: 1.48 [ 0:45:16<1 day, 0:56:05] +[titan] 2025-10-04 23:19:35,805 - root - INFO - step: 1180 loss: 3.6634 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4499 global_avg_mtp_loss: 3.2135 +[titan] 2025-10-04 23:19:35,805 - root - INFO - lr: 4.9957e-05 gnorm: 1.55 [ 0:45:27<1 day, 0:55:31] +[titan] 2025-10-04 23:19:46,722 - root - INFO - step: 1185 loss: 3.6182 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1740 +[titan] 2025-10-04 23:19:46,722 - root - INFO - lr: 4.9956e-05 gnorm: 1.56 [ 0:45:38<1 day, 0:54:58] +[titan] 2025-10-04 23:19:57,577 - root - INFO - step: 1190 loss: 3.6307 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.4437 global_avg_mtp_loss: 3.1870 +[titan] 2025-10-04 23:19:57,578 - root - INFO - lr: 4.9956e-05 gnorm: 1.44 [ 0:45:49<1 day, 0:54:24] +[titan] 2025-10-04 23:20:08,587 - root - INFO - step: 1195 loss: 3.6947 memory: 118.84GiB(85.28%) tps: 29,765 tflops: 412.95 mfu: 41.75% global_avg_ntp_loss: 0.4519 global_avg_mtp_loss: 3.2429 +[titan] 2025-10-04 23:20:08,587 - root - INFO - lr: 4.9955e-05 gnorm: 1.42 [ 0:46:00<1 day, 0:53:55] +[titan] 2025-10-04 23:20:17,298 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:20:19,487 - root - INFO - step: 1200 loss: 3.6239 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1796 +[titan] 2025-10-04 23:20:19,487 - root - INFO - lr: 4.9955e-05 gnorm: 1.44 [ 0:46:11<1 day, 0:53:22] +[titan] 2025-10-04 23:20:30,366 - root - INFO - step: 1205 loss: 3.6270 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4452 global_avg_mtp_loss: 3.1819 +[titan] 2025-10-04 23:20:30,366 - root - INFO - lr: 4.9954e-05 gnorm: 1.60 [ 0:46:22<1 day, 0:52:49] +[titan] 2025-10-04 23:20:41,259 - root - INFO - step: 1210 loss: 3.6144 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1709 +[titan] 2025-10-04 23:20:41,259 - root - INFO - lr: 4.9953e-05 gnorm: 1.66 [ 0:46:32<1 day, 0:52:17] +[titan] 2025-10-04 23:20:52,152 - root - INFO - step: 1215 loss: 3.6886 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4528 global_avg_mtp_loss: 3.2359 +[titan] 2025-10-04 23:20:52,152 - root - INFO - lr: 4.9953e-05 gnorm: 1.48 [ 0:46:43<1 day, 0:51:44] +[titan] 2025-10-04 23:21:03,098 - root - INFO - step: 1220 loss: 3.5263 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.4324 global_avg_mtp_loss: 3.0939 +[titan] 2025-10-04 23:21:03,098 - root - INFO - lr: 4.9952e-05 gnorm: 1.62 [ 0:46:54<1 day, 0:51:14] +[titan] 2025-10-04 23:21:14,014 - root - INFO - step: 1225 loss: 3.6228 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.4426 global_avg_mtp_loss: 3.1801 +[titan] 2025-10-04 23:21:14,014 - root - INFO - lr: 4.9952e-05 gnorm: 1.53 [ 0:47:05<1 day, 0:50:43] +[titan] 2025-10-04 23:21:24,903 - root - INFO - step: 1230 loss: 3.5398 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.4327 global_avg_mtp_loss: 3.1072 +[titan] 2025-10-04 23:21:24,904 - root - INFO - lr: 4.9951e-05 gnorm: 1.39 [ 0:47:16<1 day, 0:50:11] +[titan] 2025-10-04 23:21:35,790 - root - INFO - step: 1235 loss: 3.5790 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.4389 global_avg_mtp_loss: 3.1401 +[titan] 2025-10-04 23:21:35,790 - root - INFO - lr: 4.9951e-05 gnorm: 1.42 [ 0:47:27<1 day, 0:49:39] +[titan] 2025-10-04 23:21:46,666 - root - INFO - step: 1240 loss: 3.6434 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4448 global_avg_mtp_loss: 3.1987 +[titan] 2025-10-04 23:21:46,666 - root - INFO - lr: 4.9950e-05 gnorm: 1.43 [ 0:47:38<1 day, 0:49:07] +[titan] 2025-10-04 23:21:57,577 - root - INFO - step: 1245 loss: 3.5452 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4350 global_avg_mtp_loss: 3.1102 +[titan] 2025-10-04 23:21:57,577 - root - INFO - lr: 4.9949e-05 gnorm: 1.40 [ 0:47:49<1 day, 0:48:37] +[titan] 2025-10-04 23:22:06,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:22:08,563 - root - INFO - step: 1250 loss: 3.5844 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.4369 global_avg_mtp_loss: 3.1475 +[titan] 2025-10-04 23:22:08,564 - root - INFO - lr: 4.9949e-05 gnorm: 1.48 [ 0:48:00<1 day, 0:48:08] +[titan] 2025-10-04 23:22:19,438 - root - INFO - step: 1255 loss: 3.6078 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1643 +[titan] 2025-10-04 23:22:19,438 - root - INFO - lr: 4.9948e-05 gnorm: 1.59 [ 0:48:11<1 day, 0:47:37] +[titan] 2025-10-04 23:22:30,309 - root - INFO - step: 1260 loss: 3.5536 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4346 global_avg_mtp_loss: 3.1191 +[titan] 2025-10-04 23:22:30,309 - root - INFO - lr: 4.9948e-05 gnorm: 1.57 [ 0:48:22<1 day, 0:47:05] +[titan] 2025-10-04 23:22:41,203 - root - INFO - step: 1265 loss: 3.5861 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.4376 global_avg_mtp_loss: 3.1485 +[titan] 2025-10-04 23:22:41,203 - root - INFO - lr: 4.9947e-05 gnorm: 1.47 [ 0:48:32<1 day, 0:46:35] +[titan] 2025-10-04 23:22:52,080 - root - INFO - step: 1270 loss: 3.6181 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4419 global_avg_mtp_loss: 3.1762 +[titan] 2025-10-04 23:22:52,081 - root - INFO - lr: 4.9946e-05 gnorm: 1.38 [ 0:48:43<1 day, 0:46:04] +[titan] 2025-10-04 23:23:02,961 - root - INFO - step: 1275 loss: 3.5508 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1168 +[titan] 2025-10-04 23:23:02,961 - root - INFO - lr: 4.9946e-05 gnorm: 1.48 [ 0:48:54<1 day, 0:45:33] +[titan] 2025-10-04 23:23:13,913 - root - INFO - step: 1280 loss: 3.5362 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.4318 global_avg_mtp_loss: 3.1044 +[titan] 2025-10-04 23:23:13,913 - root - INFO - lr: 4.9945e-05 gnorm: 1.47 [ 0:49:05<1 day, 0:45:05] +[titan] 2025-10-04 23:23:24,835 - root - INFO - step: 1285 loss: 3.5593 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4354 global_avg_mtp_loss: 3.1239 +[titan] 2025-10-04 23:23:24,835 - root - INFO - lr: 4.9944e-05 gnorm: 1.48 [ 0:49:16<1 day, 0:44:36] +[titan] 2025-10-04 23:23:35,699 - root - INFO - step: 1290 loss: 3.5751 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4357 global_avg_mtp_loss: 3.1395 +[titan] 2025-10-04 23:23:35,700 - root - INFO - lr: 4.9944e-05 gnorm: 1.42 [ 0:49:27<1 day, 0:44:05] +[titan] 2025-10-04 23:23:46,610 - root - INFO - step: 1295 loss: 3.5938 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4377 global_avg_mtp_loss: 3.1562 +[titan] 2025-10-04 23:23:46,610 - root - INFO - lr: 4.9943e-05 gnorm: 1.35 [ 0:49:38<1 day, 0:43:36] +[titan] 2025-10-04 23:23:55,309 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:23:57,488 - root - INFO - step: 1300 loss: 3.5542 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4336 global_avg_mtp_loss: 3.1206 +[titan] 2025-10-04 23:23:57,489 - root - INFO - lr: 4.9943e-05 gnorm: 1.38 [ 0:49:49<1 day, 0:43:06] +[titan] 2025-10-04 23:24:08,378 - root - INFO - step: 1305 loss: 3.5644 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4344 global_avg_mtp_loss: 3.1301 +[titan] 2025-10-04 23:24:08,379 - root - INFO - lr: 4.9942e-05 gnorm: 1.38 [ 0:50:00<1 day, 0:42:36] +[titan] 2025-10-04 23:24:19,247 - root - INFO - step: 1310 loss: 3.5464 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1124 +[titan] 2025-10-04 23:24:19,247 - root - INFO - lr: 4.9941e-05 gnorm: 1.43 [ 0:50:10<1 day, 0:42:06] +[titan] 2025-10-04 23:24:30,161 - root - INFO - step: 1315 loss: 3.5898 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.4372 global_avg_mtp_loss: 3.1527 +[titan] 2025-10-04 23:24:30,161 - root - INFO - lr: 4.9941e-05 gnorm: 1.34 [ 0:50:21<1 day, 0:41:38] +[titan] 2025-10-04 23:24:41,039 - root - INFO - step: 1320 loss: 3.6159 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4427 global_avg_mtp_loss: 3.1731 +[titan] 2025-10-04 23:24:41,039 - root - INFO - lr: 4.9940e-05 gnorm: 1.34 [ 0:50:32<1 day, 0:41:08] +[titan] 2025-10-04 23:24:51,938 - root - INFO - step: 1325 loss: 3.4618 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4229 global_avg_mtp_loss: 3.0389 +[titan] 2025-10-04 23:24:51,938 - root - INFO - lr: 4.9939e-05 gnorm: 1.36 [ 0:50:43<1 day, 0:40:40] +[titan] 2025-10-04 23:25:02,828 - root - INFO - step: 1330 loss: 3.5160 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4280 global_avg_mtp_loss: 3.0880 +[titan] 2025-10-04 23:25:02,829 - root - INFO - lr: 4.9939e-05 gnorm: 1.38 [ 0:50:54<1 day, 0:40:11] +[titan] 2025-10-04 23:25:13,745 - root - INFO - step: 1335 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4282 global_avg_mtp_loss: 3.0763 +[titan] 2025-10-04 23:25:13,746 - root - INFO - lr: 4.9938e-05 gnorm: 1.46 [ 0:51:05<1 day, 0:39:43] +[titan] 2025-10-04 23:25:24,642 - root - INFO - step: 1340 loss: 3.5440 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4337 global_avg_mtp_loss: 3.1103 +[titan] 2025-10-04 23:25:24,642 - root - INFO - lr: 4.9937e-05 gnorm: 1.42 [ 0:51:16<1 day, 0:39:15] +[titan] 2025-10-04 23:25:35,576 - root - INFO - step: 1345 loss: 3.6036 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.4395 global_avg_mtp_loss: 3.1641 +[titan] 2025-10-04 23:25:35,576 - root - INFO - lr: 4.9937e-05 gnorm: 1.35 [ 0:51:27<1 day, 0:38:47] +[titan] 2025-10-04 23:25:44,278 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:25:46,465 - root - INFO - step: 1350 loss: 3.5202 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4296 global_avg_mtp_loss: 3.0905 +[titan] 2025-10-04 23:25:46,465 - root - INFO - lr: 4.9936e-05 gnorm: 1.31 [ 0:51:38<1 day, 0:38:19] +[titan] 2025-10-04 23:25:57,344 - root - INFO - step: 1355 loss: 3.5459 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.4309 global_avg_mtp_loss: 3.1149 +[titan] 2025-10-04 23:25:57,344 - root - INFO - lr: 4.9935e-05 gnorm: 1.30 [ 0:51:49<1 day, 0:37:51] +[titan] 2025-10-04 23:26:08,268 - root - INFO - step: 1360 loss: 3.5720 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.4351 global_avg_mtp_loss: 3.1369 +[titan] 2025-10-04 23:26:08,269 - root - INFO - lr: 4.9935e-05 gnorm: 1.39 [ 0:51:59<1 day, 0:37:24] +[titan] 2025-10-04 23:26:19,143 - root - INFO - step: 1365 loss: 3.4497 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0288 +[titan] 2025-10-04 23:26:19,143 - root - INFO - lr: 4.9934e-05 gnorm: 1.37 [ 0:52:10<1 day, 0:36:55] +[titan] 2025-10-04 23:26:30,030 - root - INFO - step: 1370 loss: 3.5847 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.4370 global_avg_mtp_loss: 3.1477 +[titan] 2025-10-04 23:26:30,030 - root - INFO - lr: 4.9933e-05 gnorm: 1.49 [ 0:52:21<1 day, 0:36:27] +[titan] 2025-10-04 23:26:40,913 - root - INFO - step: 1375 loss: 3.4970 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4269 global_avg_mtp_loss: 3.0701 +[titan] 2025-10-04 23:26:40,913 - root - INFO - lr: 4.9933e-05 gnorm: 1.38 [ 0:52:32<1 day, 0:36:00] +[titan] 2025-10-04 23:26:51,832 - root - INFO - step: 1380 loss: 3.4520 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 3.0312 +[titan] 2025-10-04 23:26:51,833 - root - INFO - lr: 4.9932e-05 gnorm: 1.36 [ 0:52:43<1 day, 0:35:33] +[titan] 2025-10-04 23:27:02,721 - root - INFO - step: 1385 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.4169 global_avg_mtp_loss: 3.0149 +[titan] 2025-10-04 23:27:02,721 - root - INFO - lr: 4.9931e-05 gnorm: 1.42 [ 0:52:54<1 day, 0:35:05] +[titan] 2025-10-04 23:27:13,641 - root - INFO - step: 1390 loss: 3.4046 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.4139 global_avg_mtp_loss: 2.9907 +[titan] 2025-10-04 23:27:13,641 - root - INFO - lr: 4.9931e-05 gnorm: 1.37 [ 0:53:05<1 day, 0:34:39] +[titan] 2025-10-04 23:27:24,527 - root - INFO - step: 1395 loss: 3.4971 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.4253 global_avg_mtp_loss: 3.0717 +[titan] 2025-10-04 23:27:24,527 - root - INFO - lr: 4.9930e-05 gnorm: 1.41 [ 0:53:16<1 day, 0:34:12] +[titan] 2025-10-04 23:27:33,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:27:35,394 - root - INFO - step: 1400 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.4290 global_avg_mtp_loss: 3.0755 +[titan] 2025-10-04 23:27:35,394 - root - INFO - lr: 4.9929e-05 gnorm: 1.40 [ 0:53:27<1 day, 0:33:44] +[titan] 2025-10-04 23:27:46,287 - root - INFO - step: 1405 loss: 3.4686 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4233 global_avg_mtp_loss: 3.0453 +[titan] 2025-10-04 23:27:46,287 - root - INFO - lr: 4.9928e-05 gnorm: 1.49 [ 0:53:37<1 day, 0:33:17] +[titan] 2025-10-04 23:27:57,198 - root - INFO - step: 1410 loss: 3.5153 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4300 global_avg_mtp_loss: 3.0853 +[titan] 2025-10-04 23:27:57,198 - root - INFO - lr: 4.9928e-05 gnorm: 1.47 [ 0:53:48<1 day, 0:32:51] +[titan] 2025-10-04 23:28:08,061 - root - INFO - step: 1415 loss: 3.4739 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4242 global_avg_mtp_loss: 3.0497 +[titan] 2025-10-04 23:28:08,061 - root - INFO - lr: 4.9927e-05 gnorm: 1.34 [ 0:53:59<1 day, 0:32:23] +[titan] 2025-10-04 23:28:18,978 - root - INFO - step: 1420 loss: 3.5053 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.4276 global_avg_mtp_loss: 3.0778 +[titan] 2025-10-04 23:28:18,978 - root - INFO - lr: 4.9926e-05 gnorm: 1.41 [ 0:54:10<1 day, 0:31:57] +[titan] 2025-10-04 23:28:29,841 - root - INFO - step: 1425 loss: 3.4083 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4170 global_avg_mtp_loss: 2.9913 +[titan] 2025-10-04 23:28:29,841 - root - INFO - lr: 4.9926e-05 gnorm: 1.42 [ 0:54:21<1 day, 0:31:30] +[titan] 2025-10-04 23:28:40,714 - root - INFO - step: 1430 loss: 3.4627 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0411 +[titan] 2025-10-04 23:28:40,714 - root - INFO - lr: 4.9925e-05 gnorm: 1.43 [ 0:54:32<1 day, 0:31:03] +[titan] 2025-10-04 23:28:51,581 - root - INFO - step: 1435 loss: 3.4919 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0684 +[titan] 2025-10-04 23:28:51,582 - root - INFO - lr: 4.9924e-05 gnorm: 1.37 [ 0:54:43<1 day, 0:30:36] +[titan] 2025-10-04 23:29:02,457 - root - INFO - step: 1440 loss: 3.4907 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.4267 global_avg_mtp_loss: 3.0640 +[titan] 2025-10-04 23:29:02,457 - root - INFO - lr: 4.9923e-05 gnorm: 1.37 [ 0:54:54<1 day, 0:30:10] +[titan] 2025-10-04 23:29:13,408 - root - INFO - step: 1445 loss: 3.4656 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0447 +[titan] 2025-10-04 23:29:13,408 - root - INFO - lr: 4.9923e-05 gnorm: 1.40 [ 0:55:05<1 day, 0:29:45] +[titan] 2025-10-04 23:29:22,093 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:29:24,281 - root - INFO - step: 1450 loss: 3.4814 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4230 global_avg_mtp_loss: 3.0585 +[titan] 2025-10-04 23:29:24,281 - root - INFO - lr: 4.9922e-05 gnorm: 1.47 [ 0:55:15<1 day, 0:29:19] +[titan] 2025-10-04 23:29:35,145 - root - INFO - step: 1455 loss: 3.4419 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.4184 global_avg_mtp_loss: 3.0235 +[titan] 2025-10-04 23:29:35,145 - root - INFO - lr: 4.9921e-05 gnorm: 1.37 [ 0:55:26<1 day, 0:28:52] +[titan] 2025-10-04 23:29:46,030 - root - INFO - step: 1460 loss: 3.5546 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.4320 global_avg_mtp_loss: 3.1226 +[titan] 2025-10-04 23:29:46,030 - root - INFO - lr: 4.9920e-05 gnorm: 1.41 [ 0:55:37<1 day, 0:28:26] +[titan] 2025-10-04 23:29:56,926 - root - INFO - step: 1465 loss: 3.5290 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4295 global_avg_mtp_loss: 3.0995 +[titan] 2025-10-04 23:29:56,927 - root - INFO - lr: 4.9920e-05 gnorm: 1.36 [ 0:55:48<1 day, 0:28:01] +[titan] 2025-10-04 23:30:07,807 - root - INFO - step: 1470 loss: 3.4674 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0442 +[titan] 2025-10-04 23:30:07,807 - root - INFO - lr: 4.9919e-05 gnorm: 1.41 [ 0:55:59<1 day, 0:27:35] +[titan] 2025-10-04 23:30:18,704 - root - INFO - step: 1475 loss: 3.4400 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0220 +[titan] 2025-10-04 23:30:18,704 - root - INFO - lr: 4.9918e-05 gnorm: 1.36 [ 0:56:10<1 day, 0:27:10] +[titan] 2025-10-04 23:30:29,561 - root - INFO - step: 1480 loss: 3.4692 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.4228 global_avg_mtp_loss: 3.0463 +[titan] 2025-10-04 23:30:29,562 - root - INFO - lr: 4.9917e-05 gnorm: 1.30 [ 0:56:21<1 day, 0:26:44] +[titan] 2025-10-04 23:30:40,438 - root - INFO - step: 1485 loss: 3.4861 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4263 global_avg_mtp_loss: 3.0598 +[titan] 2025-10-04 23:30:40,438 - root - INFO - lr: 4.9917e-05 gnorm: 1.35 [ 0:56:32<1 day, 0:26:18] +[titan] 2025-10-04 23:30:51,302 - root - INFO - step: 1490 loss: 3.4181 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4151 global_avg_mtp_loss: 3.0030 +[titan] 2025-10-04 23:30:51,303 - root - INFO - lr: 4.9916e-05 gnorm: 1.47 [ 0:56:43<1 day, 0:25:52] +[titan] 2025-10-04 23:31:02,175 - root - INFO - step: 1495 loss: 3.4587 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4192 global_avg_mtp_loss: 3.0394 +[titan] 2025-10-04 23:31:02,176 - root - INFO - lr: 4.9915e-05 gnorm: 1.30 [ 0:56:53<1 day, 0:25:27] +[titan] 2025-10-04 23:31:10,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:31:13,053 - root - INFO - step: 1500 loss: 3.4454 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4203 global_avg_mtp_loss: 3.0251 +[titan] 2025-10-04 23:31:13,053 - root - INFO - lr: 4.9914e-05 gnorm: 1.32 [ 0:57:04<1 day, 0:25:01] +[titan] 2025-10-04 23:31:23,959 - root - INFO - step: 1505 loss: 3.5094 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.4278 global_avg_mtp_loss: 3.0816 +[titan] 2025-10-04 23:31:23,959 - root - INFO - lr: 4.9913e-05 gnorm: 1.39 [ 0:57:15<1 day, 0:24:37] +[titan] 2025-10-04 23:31:34,816 - root - INFO - step: 1510 loss: 3.4203 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 2.9996 +[titan] 2025-10-04 23:31:34,816 - root - INFO - lr: 4.9913e-05 gnorm: 1.40 [ 0:57:26<1 day, 0:24:11] +[titan] 2025-10-04 23:31:45,697 - root - INFO - step: 1515 loss: 3.4819 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4246 global_avg_mtp_loss: 3.0574 +[titan] 2025-10-04 23:31:45,697 - root - INFO - lr: 4.9912e-05 gnorm: 1.42 [ 0:57:37<1 day, 0:23:46] +[titan] 2025-10-04 23:31:56,581 - root - INFO - step: 1520 loss: 3.4715 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0513 +[titan] 2025-10-04 23:31:56,581 - root - INFO - lr: 4.9911e-05 gnorm: 1.54 [ 0:57:48<1 day, 0:23:22] +[titan] 2025-10-04 23:32:07,443 - root - INFO - step: 1525 loss: 3.3887 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4123 global_avg_mtp_loss: 2.9763 +[titan] 2025-10-04 23:32:07,443 - root - INFO - lr: 4.9910e-05 gnorm: 1.50 [ 0:57:59<1 day, 0:22:56] +[titan] 2025-10-04 23:32:18,324 - root - INFO - step: 1530 loss: 3.4137 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4161 global_avg_mtp_loss: 2.9977 +[titan] 2025-10-04 23:32:18,324 - root - INFO - lr: 4.9909e-05 gnorm: 1.39 [ 0:58:10<1 day, 0:22:32] +[titan] 2025-10-04 23:32:29,266 - root - INFO - step: 1535 loss: 3.4241 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.4172 global_avg_mtp_loss: 3.0069 +[titan] 2025-10-04 23:32:29,266 - root - INFO - lr: 4.9909e-05 gnorm: 1.38 [ 0:58:20<1 day, 0:22:09] +[titan] 2025-10-04 23:32:31,609 - root - INFO - Dumping profiler traces at step 1536 +[titan] 2025-10-04 23:32:31,645 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:32:40,423 - root - INFO - step: 1540 loss: 3.4722 memory: 118.84GiB(85.28%) tps: 29,370 tflops: 407.47 mfu: 41.20% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0490 +[titan] 2025-10-04 23:32:40,423 - root - INFO - lr: 4.9908e-05 gnorm: 1.48 [ 0:58:32<1 day, 0:21:51] +[titan] 2025-10-04 23:32:51,288 - root - INFO - step: 1545 loss: 3.4793 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4234 global_avg_mtp_loss: 3.0559 +[titan] 2025-10-04 23:32:51,288 - root - INFO - lr: 4.9907e-05 gnorm: 1.37 [ 0:58:42<1 day, 0:21:26] +[titan] 2025-10-04 23:32:59,970 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:33:02,152 - root - INFO - step: 1550 loss: 3.4035 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4133 global_avg_mtp_loss: 2.9902 +[titan] 2025-10-04 23:33:02,152 - root - INFO - lr: 4.9906e-05 gnorm: 1.32 [ 0:58:53<1 day, 0:21:02] +[titan] 2025-10-04 23:33:13,032 - root - INFO - step: 1555 loss: 3.4850 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.4225 global_avg_mtp_loss: 3.0625 +[titan] 2025-10-04 23:33:13,032 - root - INFO - lr: 4.9905e-05 gnorm: 1.34 [ 0:59:04<1 day, 0:20:37] +[titan] 2025-10-04 23:33:23,946 - root - INFO - step: 1560 loss: 3.5272 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4289 global_avg_mtp_loss: 3.0982 +[titan] 2025-10-04 23:33:23,946 - root - INFO - lr: 4.9905e-05 gnorm: 1.37 [ 0:59:15<1 day, 0:20:14] +[titan] 2025-10-04 23:33:34,861 - root - INFO - step: 1565 loss: 3.5253 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.4294 global_avg_mtp_loss: 3.0959 +[titan] 2025-10-04 23:33:34,861 - root - INFO - lr: 4.9904e-05 gnorm: 1.37 [ 0:59:26<1 day, 0:19:51] +[titan] 2025-10-04 23:33:45,801 - root - INFO - step: 1570 loss: 3.4320 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.4173 global_avg_mtp_loss: 3.0147 +[titan] 2025-10-04 23:33:45,801 - root - INFO - lr: 4.9903e-05 gnorm: 1.35 [ 0:59:37<1 day, 0:19:28] +[titan] 2025-10-04 23:33:56,697 - root - INFO - step: 1575 loss: 3.4044 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4154 global_avg_mtp_loss: 2.9890 +[titan] 2025-10-04 23:33:56,697 - root - INFO - lr: 4.9902e-05 gnorm: 1.32 [ 0:59:48<1 day, 0:19:05] +[titan] 2025-10-04 23:34:07,560 - root - INFO - step: 1580 loss: 3.5820 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4484 global_avg_mtp_loss: 3.1336 +[titan] 2025-10-04 23:34:07,560 - root - INFO - lr: 4.9901e-05 gnorm: 1.32 [ 0:59:59<1 day, 0:18:41] +[titan] 2025-10-04 23:34:18,478 - root - INFO - step: 1585 loss: 3.3932 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.4134 global_avg_mtp_loss: 2.9798 +[titan] 2025-10-04 23:34:18,479 - root - INFO - lr: 4.9900e-05 gnorm: 1.40 [ 1:00:10<1 day, 0:18:18] +[titan] 2025-10-04 23:34:29,342 - root - INFO - step: 1590 loss: 3.4358 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.4195 global_avg_mtp_loss: 3.0163 +[titan] 2025-10-04 23:34:29,342 - root - INFO - lr: 4.9900e-05 gnorm: 1.38 [ 1:00:21<1 day, 0:17:54] +[titan] 2025-10-04 23:34:40,218 - root - INFO - step: 1595 loss: 3.3310 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4056 global_avg_mtp_loss: 2.9254 +[titan] 2025-10-04 23:34:40,218 - root - INFO - lr: 4.9899e-05 gnorm: 1.38 [ 1:00:31<1 day, 0:17:30] +[titan] 2025-10-04 23:34:48,909 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:34:51,087 - root - INFO - step: 1600 loss: 3.4555 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4196 global_avg_mtp_loss: 3.0358 +[titan] 2025-10-04 23:34:51,088 - root - INFO - lr: 4.9898e-05 gnorm: 1.39 [ 1:00:42<1 day, 0:17:06] +[titan] 2025-10-04 23:35:01,992 - root - INFO - step: 1605 loss: 3.4766 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0531 +[titan] 2025-10-04 23:35:01,992 - root - INFO - lr: 4.9897e-05 gnorm: 1.33 [ 1:00:53<1 day, 0:16:43] +[titan] 2025-10-04 23:35:12,867 - root - INFO - step: 1610 loss: 3.3824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4111 global_avg_mtp_loss: 2.9713 +[titan] 2025-10-04 23:35:12,867 - root - INFO - lr: 4.9896e-05 gnorm: 1.41 [ 1:01:04<1 day, 0:16:20] +[titan] 2025-10-04 23:35:23,778 - root - INFO - step: 1615 loss: 3.4363 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.4168 global_avg_mtp_loss: 3.0195 +[titan] 2025-10-04 23:35:23,779 - root - INFO - lr: 4.9895e-05 gnorm: 1.27 [ 1:01:15<1 day, 0:15:57] +[titan] 2025-10-04 23:35:34,649 - root - INFO - step: 1620 loss: 3.3175 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.4028 global_avg_mtp_loss: 2.9147 +[titan] 2025-10-04 23:35:34,649 - root - INFO - lr: 4.9895e-05 gnorm: 1.32 [ 1:01:26<1 day, 0:15:34] +[titan] 2025-10-04 23:35:45,526 - root - INFO - step: 1625 loss: 3.3715 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4086 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:35:45,527 - root - INFO - lr: 4.9894e-05 gnorm: 1.41 [ 1:01:37<1 day, 0:15:11] +[titan] 2025-10-04 23:35:56,405 - root - INFO - step: 1630 loss: 3.3383 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4038 global_avg_mtp_loss: 2.9345 +[titan] 2025-10-04 23:35:56,405 - root - INFO - lr: 4.9893e-05 gnorm: 1.32 [ 1:01:48<1 day, 0:14:48] +[titan] 2025-10-04 23:36:07,309 - root - INFO - step: 1635 loss: 3.4176 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4148 global_avg_mtp_loss: 3.0028 +[titan] 2025-10-04 23:36:07,309 - root - INFO - lr: 4.9892e-05 gnorm: 1.40 [ 1:01:58<1 day, 0:14:25] +[titan] 2025-10-04 23:36:18,303 - root - INFO - step: 1640 loss: 3.3374 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.4052 global_avg_mtp_loss: 2.9322 +[titan] 2025-10-04 23:36:18,304 - root - INFO - lr: 4.9891e-05 gnorm: 1.45 [ 1:02:09<1 day, 0:14:05] +[titan] 2025-10-04 23:36:29,175 - root - INFO - step: 1645 loss: 3.4862 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.4238 global_avg_mtp_loss: 3.0624 +[titan] 2025-10-04 23:36:29,175 - root - INFO - lr: 4.9890e-05 gnorm: 1.49 [ 1:02:20<1 day, 0:13:42] +[titan] 2025-10-04 23:36:37,867 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:36:40,054 - root - INFO - step: 1650 loss: 3.2615 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8657 +[titan] 2025-10-04 23:36:40,054 - root - INFO - lr: 4.9889e-05 gnorm: 1.45 [ 1:02:31<1 day, 0:13:19] +[titan] 2025-10-04 23:36:50,937 - root - INFO - step: 1655 loss: 3.4016 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4127 global_avg_mtp_loss: 2.9889 +[titan] 2025-10-04 23:36:50,937 - root - INFO - lr: 4.9888e-05 gnorm: 1.34 [ 1:02:42<1 day, 0:12:56] +[titan] 2025-10-04 23:37:01,815 - root - INFO - step: 1660 loss: 3.3760 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4106 global_avg_mtp_loss: 2.9654 +[titan] 2025-10-04 23:37:01,815 - root - INFO - lr: 4.9888e-05 gnorm: 1.33 [ 1:02:53<1 day, 0:12:34] +[titan] 2025-10-04 23:37:12,722 - root - INFO - step: 1665 loss: 3.3861 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.4119 global_avg_mtp_loss: 2.9742 +[titan] 2025-10-04 23:37:12,722 - root - INFO - lr: 4.9887e-05 gnorm: 1.28 [ 1:03:04<1 day, 0:12:12] +[titan] 2025-10-04 23:37:23,672 - root - INFO - step: 1670 loss: 3.3993 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.20 mfu: 41.98% global_avg_ntp_loss: 0.4125 global_avg_mtp_loss: 2.9867 +[titan] 2025-10-04 23:37:23,672 - root - INFO - lr: 4.9886e-05 gnorm: 1.29 [ 1:03:15<1 day, 0:11:51] +[titan] 2025-10-04 23:37:34,543 - root - INFO - step: 1675 loss: 3.3445 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9396 +[titan] 2025-10-04 23:37:34,543 - root - INFO - lr: 4.9885e-05 gnorm: 1.45 [ 1:03:26<1 day, 0:11:28] +[titan] 2025-10-04 23:37:45,421 - root - INFO - step: 1680 loss: 3.4052 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4135 global_avg_mtp_loss: 2.9917 +[titan] 2025-10-04 23:37:45,421 - root - INFO - lr: 4.9884e-05 gnorm: 1.41 [ 1:03:37<1 day, 0:11:06] +[titan] 2025-10-04 23:37:56,304 - root - INFO - step: 1685 loss: 3.3465 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4066 global_avg_mtp_loss: 2.9399 +[titan] 2025-10-04 23:37:56,305 - root - INFO - lr: 4.9883e-05 gnorm: 1.35 [ 1:03:47<1 day, 0:10:44] +[titan] 2025-10-04 23:38:07,165 - root - INFO - step: 1690 loss: 3.4157 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.4162 global_avg_mtp_loss: 2.9995 +[titan] 2025-10-04 23:38:07,165 - root - INFO - lr: 4.9882e-05 gnorm: 1.31 [ 1:03:58<1 day, 0:10:21] +[titan] 2025-10-04 23:38:18,032 - root - INFO - step: 1695 loss: 3.3211 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.4037 global_avg_mtp_loss: 2.9174 +[titan] 2025-10-04 23:38:18,032 - root - INFO - lr: 4.9881e-05 gnorm: 1.27 [ 1:04:09<1 day, 0:09:58] +[titan] 2025-10-04 23:38:26,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:38:28,977 - root - INFO - step: 1700 loss: 3.4333 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0154 +[titan] 2025-10-04 23:38:28,977 - root - INFO - lr: 4.9880e-05 gnorm: 1.47 [ 1:04:20<1 day, 0:09:38] +[titan] 2025-10-04 23:38:39,826 - root - INFO - step: 1705 loss: 3.3912 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.4113 global_avg_mtp_loss: 2.9799 +[titan] 2025-10-04 23:38:39,826 - root - INFO - lr: 4.9879e-05 gnorm: 1.35 [ 1:04:31<1 day, 0:09:15] +[titan] 2025-10-04 23:38:50,670 - root - INFO - step: 1710 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4158 global_avg_mtp_loss: 3.0159 +[titan] 2025-10-04 23:38:50,670 - root - INFO - lr: 4.9878e-05 gnorm: 1.41 [ 1:04:42<1 day, 0:08:52] +[titan] 2025-10-04 23:39:01,517 - root - INFO - step: 1715 loss: 3.4588 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0386 +[titan] 2025-10-04 23:39:01,517 - root - INFO - lr: 4.9877e-05 gnorm: 1.41 [ 1:04:53<1 day, 0:08:30] +[titan] 2025-10-04 23:39:12,377 - root - INFO - step: 1720 loss: 3.3718 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.4092 global_avg_mtp_loss: 2.9625 +[titan] 2025-10-04 23:39:12,377 - root - INFO - lr: 4.9877e-05 gnorm: 1.24 [ 1:05:04<1 day, 0:08:07] +[titan] 2025-10-04 23:39:23,301 - root - INFO - step: 1725 loss: 3.3446 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.4100 global_avg_mtp_loss: 2.9346 +[titan] 2025-10-04 23:39:23,301 - root - INFO - lr: 4.9876e-05 gnorm: 1.27 [ 1:05:14<1 day, 0:07:47] +[titan] 2025-10-04 23:39:34,194 - root - INFO - step: 1730 loss: 3.4582 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0367 +[titan] 2025-10-04 23:39:34,195 - root - INFO - lr: 4.9875e-05 gnorm: 1.32 [ 1:05:25<1 day, 0:07:25] +[titan] 2025-10-04 23:39:45,081 - root - INFO - step: 1735 loss: 3.4372 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4157 global_avg_mtp_loss: 3.0215 +[titan] 2025-10-04 23:39:45,081 - root - INFO - lr: 4.9874e-05 gnorm: 1.37 [ 1:05:36<1 day, 0:07:04] +[titan] 2025-10-04 23:39:55,972 - root - INFO - step: 1740 loss: 3.3532 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9463 +[titan] 2025-10-04 23:39:55,972 - root - INFO - lr: 4.9873e-05 gnorm: 1.36 [ 1:05:47<1 day, 0:06:42] +[titan] 2025-10-04 23:40:06,852 - root - INFO - step: 1745 loss: 3.3083 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4022 global_avg_mtp_loss: 2.9061 +[titan] 2025-10-04 23:40:06,853 - root - INFO - lr: 4.9872e-05 gnorm: 1.33 [ 1:05:58<1 day, 0:06:21] +[titan] 2025-10-04 23:40:15,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:40:17,731 - root - INFO - step: 1750 loss: 3.4480 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4191 global_avg_mtp_loss: 3.0289 +[titan] 2025-10-04 23:40:17,731 - root - INFO - lr: 4.9871e-05 gnorm: 1.35 [ 1:06:09<1 day, 0:05:59] +[titan] 2025-10-04 23:40:28,641 - root - INFO - step: 1755 loss: 3.3860 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4107 global_avg_mtp_loss: 2.9753 +[titan] 2025-10-04 23:40:28,641 - root - INFO - lr: 4.9870e-05 gnorm: 1.31 [ 1:06:20<1 day, 0:05:39] +[titan] 2025-10-04 23:40:39,515 - root - INFO - step: 1760 loss: 3.3596 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9525 +[titan] 2025-10-04 23:40:39,516 - root - INFO - lr: 4.9869e-05 gnorm: 1.44 [ 1:06:31<1 day, 0:05:17] +[titan] 2025-10-04 23:40:50,423 - root - INFO - step: 1765 loss: 3.2984 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3986 global_avg_mtp_loss: 2.8998 +[titan] 2025-10-04 23:40:50,423 - root - INFO - lr: 4.9868e-05 gnorm: 1.40 [ 1:06:42<1 day, 0:04:56] +[titan] 2025-10-04 23:41:01,295 - root - INFO - step: 1770 loss: 3.3670 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.4093 global_avg_mtp_loss: 2.9577 +[titan] 2025-10-04 23:41:01,295 - root - INFO - lr: 4.9867e-05 gnorm: 1.37 [ 1:06:52<1 day, 0:04:35] +[titan] 2025-10-04 23:41:12,156 - root - INFO - step: 1775 loss: 3.3745 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.4116 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:41:12,156 - root - INFO - lr: 4.9866e-05 gnorm: 1.36 [ 1:07:03<1 day, 0:04:13] +[titan] 2025-10-04 23:41:23,073 - root - INFO - step: 1780 loss: 3.2774 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:41:23,074 - root - INFO - lr: 4.9865e-05 gnorm: 1.44 [ 1:07:14<1 day, 0:03:53] +[titan] 2025-10-04 23:41:33,936 - root - INFO - step: 1785 loss: 3.3608 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9538 +[titan] 2025-10-04 23:41:33,936 - root - INFO - lr: 4.9864e-05 gnorm: 1.39 [ 1:07:25<1 day, 0:03:32] +[titan] 2025-10-04 23:41:44,811 - root - INFO - step: 1790 loss: 3.3548 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9479 +[titan] 2025-10-04 23:41:44,812 - root - INFO - lr: 4.9863e-05 gnorm: 1.46 [ 1:07:36<1 day, 0:03:11] +[titan] 2025-10-04 23:41:55,714 - root - INFO - step: 1795 loss: 3.4000 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.4121 global_avg_mtp_loss: 2.9879 +[titan] 2025-10-04 23:41:55,715 - root - INFO - lr: 4.9862e-05 gnorm: 1.53 [ 1:07:47<1 day, 0:02:50] +[titan] 2025-10-04 23:42:04,388 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:42:06,574 - root - INFO - step: 1800 loss: 3.3948 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.4124 global_avg_mtp_loss: 2.9824 +[titan] 2025-10-04 23:42:06,574 - root - INFO - lr: 4.9861e-05 gnorm: 1.37 [ 1:07:58<1 day, 0:02:29] +[titan] 2025-10-04 23:42:17,436 - root - INFO - step: 1805 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.4017 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:42:17,437 - root - INFO - lr: 4.9860e-05 gnorm: 1.29 [ 1:08:09<1 day, 0:02:08] +[titan] 2025-10-04 23:42:28,375 - root - INFO - step: 1810 loss: 3.3561 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:42:28,375 - root - INFO - lr: 4.9859e-05 gnorm: 1.39 [ 1:08:20<1 day, 0:01:48] +[titan] 2025-10-04 23:42:39,216 - root - INFO - step: 1815 loss: 3.3053 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.3995 global_avg_mtp_loss: 2.9058 +[titan] 2025-10-04 23:42:39,217 - root - INFO - lr: 4.9858e-05 gnorm: 1.34 [ 1:08:30<1 day, 0:01:27] +[titan] 2025-10-04 23:42:50,059 - root - INFO - step: 1820 loss: 3.2854 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8887 +[titan] 2025-10-04 23:42:50,059 - root - INFO - lr: 4.9857e-05 gnorm: 1.37 [ 1:08:41<1 day, 0:01:05] +[titan] 2025-10-04 23:43:00,958 - root - INFO - step: 1825 loss: 3.3393 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4035 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:00,958 - root - INFO - lr: 4.9856e-05 gnorm: 1.37 [ 1:08:52<1 day, 0:00:45] +[titan] 2025-10-04 23:43:11,802 - root - INFO - step: 1830 loss: 3.3421 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4062 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:11,802 - root - INFO - lr: 4.9855e-05 gnorm: 1.36 [ 1:09:03<1 day, 0:00:24] +[titan] 2025-10-04 23:43:22,644 - root - INFO - step: 1835 loss: 3.3492 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.4055 global_avg_mtp_loss: 2.9437 +[titan] 2025-10-04 23:43:22,645 - root - INFO - lr: 4.9854e-05 gnorm: 1.32 [ 1:09:14<1 day, 0:00:02] +[titan] 2025-10-04 23:43:33,561 - root - INFO - step: 1840 loss: 3.2612 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3942 global_avg_mtp_loss: 2.8670 +[titan] 2025-10-04 23:43:33,562 - root - INFO - lr: 4.9853e-05 gnorm: 1.27 [ 1:09:25<23:59:43] +[titan] 2025-10-04 23:43:44,438 - root - INFO - step: 1845 loss: 3.3605 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9537 +[titan] 2025-10-04 23:43:44,438 - root - INFO - lr: 4.9852e-05 gnorm: 1.27 [ 1:09:36<23:59:22] +[titan] 2025-10-04 23:43:53,129 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:43:55,315 - root - INFO - step: 1850 loss: 3.3556 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4063 global_avg_mtp_loss: 2.9493 +[titan] 2025-10-04 23:43:55,315 - root - INFO - lr: 4.9851e-05 gnorm: 1.32 [ 1:09:46<23:59:02] +[titan] 2025-10-04 23:44:06,182 - root - INFO - step: 1855 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4016 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:44:06,182 - root - INFO - lr: 4.9850e-05 gnorm: 1.40 [ 1:09:57<23:58:41] +[titan] 2025-10-04 23:44:17,099 - root - INFO - step: 1860 loss: 3.3782 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4085 global_avg_mtp_loss: 2.9697 +[titan] 2025-10-04 23:44:17,099 - root - INFO - lr: 4.9849e-05 gnorm: 1.35 [ 1:10:08<23:58:22] +[titan] 2025-10-04 23:44:28,008 - root - INFO - step: 1865 loss: 3.2855 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3997 global_avg_mtp_loss: 2.8857 +[titan] 2025-10-04 23:44:28,009 - root - INFO - lr: 4.9848e-05 gnorm: 1.35 [ 1:10:19<23:58:02] +[titan] 2025-10-04 23:44:38,889 - root - INFO - step: 1870 loss: 3.3023 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.9043 +[titan] 2025-10-04 23:44:38,889 - root - INFO - lr: 4.9847e-05 gnorm: 1.24 [ 1:10:30<23:57:42] +[titan] 2025-10-04 23:44:49,776 - root - INFO - step: 1875 loss: 3.3134 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.4008 global_avg_mtp_loss: 2.9126 +[titan] 2025-10-04 23:44:49,776 - root - INFO - lr: 4.9846e-05 gnorm: 1.32 [ 1:10:41<23:57:22] +[titan] 2025-10-04 23:45:00,642 - root - INFO - step: 1880 loss: 3.2097 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8207 +[titan] 2025-10-04 23:45:00,642 - root - INFO - lr: 4.9845e-05 gnorm: 1.33 [ 1:10:52<23:57:02] +[titan] 2025-10-04 23:45:11,496 - root - INFO - step: 1885 loss: 3.2568 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8621 +[titan] 2025-10-04 23:45:11,497 - root - INFO - lr: 4.9844e-05 gnorm: 1.34 [ 1:11:03<23:56:41] +[titan] 2025-10-04 23:45:22,417 - root - INFO - step: 1890 loss: 3.3180 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.4019 global_avg_mtp_loss: 2.9160 +[titan] 2025-10-04 23:45:22,417 - root - INFO - lr: 4.9843e-05 gnorm: 1.39 [ 1:11:14<23:56:22] +[titan] 2025-10-04 23:45:33,318 - root - INFO - step: 1895 loss: 3.2706 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3955 global_avg_mtp_loss: 2.8752 +[titan] 2025-10-04 23:45:33,318 - root - INFO - lr: 4.9842e-05 gnorm: 1.50 [ 1:11:24<23:56:03] +[titan] 2025-10-04 23:45:41,992 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:45:44,171 - root - INFO - step: 1900 loss: 3.2793 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8827 +[titan] 2025-10-04 23:45:44,171 - root - INFO - lr: 4.9841e-05 gnorm: 1.29 [ 1:11:35<23:55:42] +[titan] 2025-10-04 23:45:55,048 - root - INFO - step: 1905 loss: 3.3144 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4029 global_avg_mtp_loss: 2.9115 +[titan] 2025-10-04 23:45:55,048 - root - INFO - lr: 4.9840e-05 gnorm: 1.32 [ 1:11:46<23:55:22] +[titan] 2025-10-04 23:46:05,920 - root - INFO - step: 1910 loss: 3.2864 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3983 global_avg_mtp_loss: 2.8881 +[titan] 2025-10-04 23:46:05,920 - root - INFO - lr: 4.9839e-05 gnorm: 1.32 [ 1:11:57<23:55:02] +[titan] 2025-10-04 23:46:16,784 - root - INFO - step: 1915 loss: 3.2475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8558 +[titan] 2025-10-04 23:46:16,785 - root - INFO - lr: 4.9837e-05 gnorm: 1.28 [ 1:12:08<23:54:42] +[titan] 2025-10-04 23:46:27,699 - root - INFO - step: 1920 loss: 3.3007 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.11% global_avg_ntp_loss: 0.3987 global_avg_mtp_loss: 2.9020 +[titan] 2025-10-04 23:46:27,700 - root - INFO - lr: 4.9836e-05 gnorm: 1.39 [ 1:12:19<23:54:23] +[titan] 2025-10-04 23:46:38,626 - root - INFO - step: 1925 loss: 3.2659 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3951 global_avg_mtp_loss: 2.8709 +[titan] 2025-10-04 23:46:38,626 - root - INFO - lr: 4.9835e-05 gnorm: 1.32 [ 1:12:30<23:54:05] +[titan] 2025-10-04 23:46:49,497 - root - INFO - step: 1930 loss: 3.2880 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8914 +[titan] 2025-10-04 23:46:49,497 - root - INFO - lr: 4.9834e-05 gnorm: 1.31 [ 1:12:41<23:53:45] +[titan] 2025-10-04 23:47:00,373 - root - INFO - step: 1935 loss: 3.2719 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8754 +[titan] 2025-10-04 23:47:00,374 - root - INFO - lr: 4.9833e-05 gnorm: 1.33 [ 1:12:52<23:53:25] +[titan] 2025-10-04 23:47:11,263 - root - INFO - step: 1940 loss: 3.3395 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4048 global_avg_mtp_loss: 2.9347 +[titan] 2025-10-04 23:47:11,263 - root - INFO - lr: 4.9832e-05 gnorm: 1.41 [ 1:13:02<23:53:06] +[titan] 2025-10-04 23:47:22,130 - root - INFO - step: 1945 loss: 3.2947 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8976 +[titan] 2025-10-04 23:47:22,130 - root - INFO - lr: 4.9831e-05 gnorm: 1.48 [ 1:13:13<23:52:46] +[titan] 2025-10-04 23:47:30,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:47:33,036 - root - INFO - step: 1950 loss: 3.3613 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.4054 global_avg_mtp_loss: 2.9558 +[titan] 2025-10-04 23:47:33,037 - root - INFO - lr: 4.9830e-05 gnorm: 1.34 [ 1:13:24<23:52:27] +[titan] 2025-10-04 23:47:43,944 - root - INFO - step: 1955 loss: 3.2920 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3990 global_avg_mtp_loss: 2.8929 +[titan] 2025-10-04 23:47:43,944 - root - INFO - lr: 4.9829e-05 gnorm: 1.29 [ 1:13:35<23:52:09] +[titan] 2025-10-04 23:47:54,843 - root - INFO - step: 1960 loss: 3.2473 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3941 global_avg_mtp_loss: 2.8533 +[titan] 2025-10-04 23:47:54,843 - root - INFO - lr: 4.9828e-05 gnorm: 1.30 [ 1:13:46<23:51:50] +[titan] 2025-10-04 23:48:05,717 - root - INFO - step: 1965 loss: 3.2766 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3954 global_avg_mtp_loss: 2.8812 +[titan] 2025-10-04 23:48:05,717 - root - INFO - lr: 4.9827e-05 gnorm: 1.23 [ 1:13:57<23:51:30] +[titan] 2025-10-04 23:48:16,623 - root - INFO - step: 1970 loss: 3.2148 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3891 global_avg_mtp_loss: 2.8257 +[titan] 2025-10-04 23:48:16,623 - root - INFO - lr: 4.9825e-05 gnorm: 1.38 [ 1:14:08<23:51:12] +[titan] 2025-10-04 23:48:27,497 - root - INFO - step: 1975 loss: 3.2117 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3887 global_avg_mtp_loss: 2.8230 +[titan] 2025-10-04 23:48:27,497 - root - INFO - lr: 4.9824e-05 gnorm: 1.35 [ 1:14:19<23:50:52] +[titan] 2025-10-04 23:48:38,417 - root - INFO - step: 1980 loss: 3.3095 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.4021 global_avg_mtp_loss: 2.9075 +[titan] 2025-10-04 23:48:38,417 - root - INFO - lr: 4.9823e-05 gnorm: 1.35 [ 1:14:30<23:50:34] +[titan] 2025-10-04 23:48:49,319 - root - INFO - step: 1985 loss: 3.2797 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:48:49,319 - root - INFO - lr: 4.9822e-05 gnorm: 1.26 [ 1:14:40<23:50:15] +[titan] 2025-10-04 23:49:00,192 - root - INFO - step: 1990 loss: 3.3317 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4032 global_avg_mtp_loss: 2.9285 +[titan] 2025-10-04 23:49:00,193 - root - INFO - lr: 4.9821e-05 gnorm: 1.36 [ 1:14:51<23:49:56] +[titan] 2025-10-04 23:49:11,083 - root - INFO - step: 1995 loss: 3.2394 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-04 23:49:11,084 - root - INFO - lr: 4.9820e-05 gnorm: 1.25 [ 1:15:02<23:49:37] +[titan] 2025-10-04 23:49:19,762 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:49:21,941 - root - INFO - step: 2000 loss: 3.2905 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.3991 global_avg_mtp_loss: 2.8913 +[titan] 2025-10-04 23:49:21,941 - root - INFO - lr: 4.9819e-05 gnorm: 1.41 [ 1:15:13<23:49:18] +[titan] 2025-10-04 23:49:32,868 - root - INFO - step: 2005 loss: 3.2217 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8299 +[titan] 2025-10-04 23:49:32,868 - root - INFO - lr: 4.9818e-05 gnorm: 1.41 [ 1:15:24<23:49:00] +[titan] 2025-10-04 23:49:43,749 - root - INFO - step: 2010 loss: 3.2369 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3913 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:49:43,750 - root - INFO - lr: 4.9816e-05 gnorm: 1.33 [ 1:15:35<23:48:41] +[titan] 2025-10-04 23:49:54,661 - root - INFO - step: 2015 loss: 3.2498 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3947 global_avg_mtp_loss: 2.8551 +[titan] 2025-10-04 23:49:54,661 - root - INFO - lr: 4.9815e-05 gnorm: 1.34 [ 1:15:46<23:48:23] +[titan] 2025-10-04 23:50:05,578 - root - INFO - step: 2020 loss: 3.2711 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3939 global_avg_mtp_loss: 2.8772 +[titan] 2025-10-04 23:50:05,579 - root - INFO - lr: 4.9814e-05 gnorm: 1.36 [ 1:15:57<23:48:04] +[titan] 2025-10-04 23:50:16,459 - root - INFO - step: 2025 loss: 3.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3962 global_avg_mtp_loss: 2.8751 +[titan] 2025-10-04 23:50:16,459 - root - INFO - lr: 4.9813e-05 gnorm: 1.26 [ 1:16:08<23:47:46] +[titan] 2025-10-04 23:50:27,328 - root - INFO - step: 2030 loss: 3.2606 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3932 global_avg_mtp_loss: 2.8674 +[titan] 2025-10-04 23:50:27,329 - root - INFO - lr: 4.9812e-05 gnorm: 1.27 [ 1:16:18<23:47:27] +[titan] 2025-10-04 23:50:38,283 - root - INFO - step: 2035 loss: 3.3063 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3996 global_avg_mtp_loss: 2.9067 +[titan] 2025-10-04 23:50:38,284 - root - INFO - lr: 4.9811e-05 gnorm: 1.35 [ 1:16:29<23:47:09] +[titan] 2025-10-04 23:50:49,166 - root - INFO - step: 2040 loss: 3.1900 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3852 global_avg_mtp_loss: 2.8048 +[titan] 2025-10-04 23:50:49,166 - root - INFO - lr: 4.9810e-05 gnorm: 1.37 [ 1:16:40<23:46:51] +[titan] 2025-10-04 23:51:00,136 - root - INFO - step: 2045 loss: 3.2396 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.3910 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:00,136 - root - INFO - lr: 4.9808e-05 gnorm: 1.30 [ 1:16:51<23:46:34] +[titan] 2025-10-04 23:51:06,833 - root - INFO - Dumping profiler traces at step 2048 +[titan] 2025-10-04 23:51:06,869 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:51:09,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:51:11,279 - root - INFO - step: 2050 loss: 3.2428 memory: 118.84GiB(85.28%) tps: 29,407 tflops: 407.98 mfu: 41.25% global_avg_ntp_loss: 0.3943 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:11,280 - root - INFO - lr: 4.9807e-05 gnorm: 1.39 [ 1:17:02<23:46:20] +[titan] 2025-10-04 23:51:22,173 - root - INFO - step: 2055 loss: 3.3541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:51:22,173 - root - INFO - lr: 4.9806e-05 gnorm: 1.42 [ 1:17:13<23:46:02] +[titan] 2025-10-04 23:51:33,068 - root - INFO - step: 2060 loss: 3.2810 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3960 global_avg_mtp_loss: 2.8850 +[titan] 2025-10-04 23:51:33,069 - root - INFO - lr: 4.9805e-05 gnorm: 1.33 [ 1:17:24<23:45:43] +[titan] 2025-10-04 23:51:43,943 - root - INFO - step: 2065 loss: 3.2366 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:51:43,943 - root - INFO - lr: 4.9804e-05 gnorm: 1.45 [ 1:17:35<23:45:25] +[titan] 2025-10-04 23:51:54,802 - root - INFO - step: 2070 loss: 3.2400 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3911 global_avg_mtp_loss: 2.8489 +[titan] 2025-10-04 23:51:54,802 - root - INFO - lr: 4.9803e-05 gnorm: 1.37 [ 1:17:46<23:45:06] +[titan] 2025-10-04 23:52:05,671 - root - INFO - step: 2075 loss: 3.2363 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3927 global_avg_mtp_loss: 2.8436 +[titan] 2025-10-04 23:52:05,671 - root - INFO - lr: 4.9801e-05 gnorm: 1.32 [ 1:17:57<23:44:47] +[titan] 2025-10-04 23:52:16,539 - root - INFO - step: 2080 loss: 3.1819 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3837 global_avg_mtp_loss: 2.7983 +[titan] 2025-10-04 23:52:16,539 - root - INFO - lr: 4.9800e-05 gnorm: 1.25 [ 1:18:08<23:44:29] +[titan] 2025-10-04 23:52:27,458 - root - INFO - step: 2085 loss: 3.2817 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8845 +[titan] 2025-10-04 23:52:27,458 - root - INFO - lr: 4.9799e-05 gnorm: 1.31 [ 1:18:19<23:44:11] +[titan] 2025-10-04 23:52:38,351 - root - INFO - step: 2090 loss: 3.2776 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3974 global_avg_mtp_loss: 2.8802 +[titan] 2025-10-04 23:52:38,351 - root - INFO - lr: 4.9798e-05 gnorm: 1.27 [ 1:18:29<23:43:53] +[titan] 2025-10-04 23:52:49,245 - root - INFO - step: 2095 loss: 3.2401 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8479 +[titan] 2025-10-04 23:52:49,245 - root - INFO - lr: 4.9797e-05 gnorm: 1.35 [ 1:18:40<23:43:35] +[titan] 2025-10-04 23:52:57,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:53:00,142 - root - INFO - step: 2100 loss: 3.1666 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3827 global_avg_mtp_loss: 2.7839 +[titan] 2025-10-04 23:53:00,142 - root - INFO - lr: 4.9795e-05 gnorm: 1.31 [ 1:18:51<23:43:17] +[titan] 2025-10-04 23:53:11,021 - root - INFO - step: 2105 loss: 3.1171 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3762 global_avg_mtp_loss: 2.7409 +[titan] 2025-10-04 23:53:11,021 - root - INFO - lr: 4.9794e-05 gnorm: 1.45 [ 1:19:02<23:42:59] +[titan] 2025-10-04 23:53:21,893 - root - INFO - step: 2110 loss: 3.2816 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3984 global_avg_mtp_loss: 2.8833 +[titan] 2025-10-04 23:53:21,894 - root - INFO - lr: 4.9793e-05 gnorm: 1.35 [ 1:19:13<23:42:40] +[titan] 2025-10-04 23:53:32,852 - root - INFO - step: 2115 loss: 3.2607 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8663 +[titan] 2025-10-04 23:53:32,852 - root - INFO - lr: 4.9792e-05 gnorm: 1.27 [ 1:19:24<23:42:24] +[titan] 2025-10-04 23:53:43,730 - root - INFO - step: 2120 loss: 3.2629 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8683 +[titan] 2025-10-04 23:53:43,730 - root - INFO - lr: 4.9791e-05 gnorm: 1.25 [ 1:19:35<23:42:05] +[titan] 2025-10-04 23:53:54,620 - root - INFO - step: 2125 loss: 3.0920 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3725 global_avg_mtp_loss: 2.7195 +[titan] 2025-10-04 23:53:54,620 - root - INFO - lr: 4.9789e-05 gnorm: 1.37 [ 1:19:46<23:41:48] +[titan] 2025-10-04 23:54:05,507 - root - INFO - step: 2130 loss: 3.2038 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8165 +[titan] 2025-10-04 23:54:05,508 - root - INFO - lr: 4.9788e-05 gnorm: 1.28 [ 1:19:57<23:41:30] +[titan] 2025-10-04 23:54:16,404 - root - INFO - step: 2135 loss: 3.1616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.3810 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-04 23:54:16,404 - root - INFO - lr: 4.9787e-05 gnorm: 1.27 [ 1:20:08<23:41:12] +[titan] 2025-10-04 23:54:27,282 - root - INFO - step: 2140 loss: 3.1455 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3792 global_avg_mtp_loss: 2.7663 +[titan] 2025-10-04 23:54:27,282 - root - INFO - lr: 4.9786e-05 gnorm: 1.36 [ 1:20:18<23:40:54] +[titan] 2025-10-04 23:54:38,216 - root - INFO - step: 2145 loss: 3.1443 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7655 +[titan] 2025-10-04 23:54:38,216 - root - INFO - lr: 4.9785e-05 gnorm: 1.24 [ 1:20:29<23:40:37] +[titan] 2025-10-04 23:54:46,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:54:49,106 - root - INFO - step: 2150 loss: 3.2432 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3921 global_avg_mtp_loss: 2.8511 +[titan] 2025-10-04 23:54:49,106 - root - INFO - lr: 4.9783e-05 gnorm: 1.23 [ 1:20:40<23:40:19] +[titan] 2025-10-04 23:54:59,985 - root - INFO - step: 2155 loss: 3.1416 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7620 +[titan] 2025-10-04 23:54:59,985 - root - INFO - lr: 4.9782e-05 gnorm: 1.23 [ 1:20:51<23:40:01] +[titan] 2025-10-04 23:55:10,860 - root - INFO - step: 2160 loss: 3.1386 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7597 +[titan] 2025-10-04 23:55:10,860 - root - INFO - lr: 4.9781e-05 gnorm: 1.27 [ 1:21:02<23:39:43] +[titan] 2025-10-04 23:55:21,730 - root - INFO - step: 2165 loss: 3.2482 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8559 +[titan] 2025-10-04 23:55:21,730 - root - INFO - lr: 4.9780e-05 gnorm: 1.29 [ 1:21:13<23:39:25] +[titan] 2025-10-04 23:55:32,617 - root - INFO - step: 2170 loss: 3.2349 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3914 global_avg_mtp_loss: 2.8435 +[titan] 2025-10-04 23:55:32,617 - root - INFO - lr: 4.9778e-05 gnorm: 1.22 [ 1:21:24<23:39:08] +[titan] 2025-10-04 23:55:43,541 - root - INFO - step: 2175 loss: 3.2325 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.3901 global_avg_mtp_loss: 2.8424 +[titan] 2025-10-04 23:55:43,542 - root - INFO - lr: 4.9777e-05 gnorm: 1.32 [ 1:21:35<23:38:51] +[titan] 2025-10-04 23:55:54,482 - root - INFO - step: 2180 loss: 3.1551 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7749 +[titan] 2025-10-04 23:55:54,482 - root - INFO - lr: 4.9776e-05 gnorm: 1.29 [ 1:21:46<23:38:34] +[titan] 2025-10-04 23:56:05,357 - root - INFO - step: 2185 loss: 3.2187 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3882 global_avg_mtp_loss: 2.8305 +[titan] 2025-10-04 23:56:05,357 - root - INFO - lr: 4.9775e-05 gnorm: 1.37 [ 1:21:56<23:38:16] +[titan] 2025-10-04 23:56:16,252 - root - INFO - step: 2190 loss: 3.1722 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7900 +[titan] 2025-10-04 23:56:16,253 - root - INFO - lr: 4.9773e-05 gnorm: 1.44 [ 1:22:07<23:37:59] +[titan] 2025-10-04 23:56:27,132 - root - INFO - step: 2195 loss: 3.1685 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3823 global_avg_mtp_loss: 2.7862 +[titan] 2025-10-04 23:56:27,132 - root - INFO - lr: 4.9772e-05 gnorm: 1.33 [ 1:22:18<23:37:41] +[titan] 2025-10-04 23:56:35,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:56:38,038 - root - INFO - step: 2200 loss: 3.1985 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3856 global_avg_mtp_loss: 2.8129 +[titan] 2025-10-04 23:56:38,038 - root - INFO - lr: 4.9771e-05 gnorm: 1.31 [ 1:22:29<23:37:24] +[titan] 2025-10-04 23:56:48,912 - root - INFO - step: 2205 loss: 3.2059 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.8192 +[titan] 2025-10-04 23:56:48,913 - root - INFO - lr: 4.9769e-05 gnorm: 1.26 [ 1:22:40<23:37:06] +[titan] 2025-10-04 23:56:59,839 - root - INFO - step: 2210 loss: 3.1541 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7729 +[titan] 2025-10-04 23:56:59,840 - root - INFO - lr: 4.9768e-05 gnorm: 1.42 [ 1:22:51<23:36:49] +[titan] 2025-10-04 23:57:10,737 - root - INFO - step: 2215 loss: 3.2356 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3903 global_avg_mtp_loss: 2.8453 +[titan] 2025-10-04 23:57:10,737 - root - INFO - lr: 4.9767e-05 gnorm: 1.51 [ 1:23:02<23:36:32] +[titan] 2025-10-04 23:57:21,630 - root - INFO - step: 2220 loss: 3.1859 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.7992 +[titan] 2025-10-04 23:57:21,630 - root - INFO - lr: 4.9766e-05 gnorm: 1.39 [ 1:23:13<23:36:15] +[titan] 2025-10-04 23:57:32,532 - root - INFO - step: 2225 loss: 3.1779 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3842 global_avg_mtp_loss: 2.7936 +[titan] 2025-10-04 23:57:32,532 - root - INFO - lr: 4.9764e-05 gnorm: 1.24 [ 1:23:24<23:35:58] +[titan] 2025-10-04 23:57:43,450 - root - INFO - step: 2230 loss: 3.2176 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.3895 global_avg_mtp_loss: 2.8282 +[titan] 2025-10-04 23:57:43,450 - root - INFO - lr: 4.9763e-05 gnorm: 1.28 [ 1:23:35<23:35:41] +[titan] 2025-10-04 23:57:54,366 - root - INFO - step: 2235 loss: 3.2212 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3879 global_avg_mtp_loss: 2.8333 +[titan] 2025-10-04 23:57:54,366 - root - INFO - lr: 4.9762e-05 gnorm: 1.35 [ 1:23:45<23:35:24] +[titan] 2025-10-04 23:58:05,251 - root - INFO - step: 2240 loss: 3.2781 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8824 +[titan] 2025-10-04 23:58:05,252 - root - INFO - lr: 4.9760e-05 gnorm: 1.39 [ 1:23:56<23:35:07] +[titan] 2025-10-04 23:58:16,173 - root - INFO - step: 2245 loss: 3.1710 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7885 +[titan] 2025-10-04 23:58:16,174 - root - INFO - lr: 4.9759e-05 gnorm: 1.30 [ 1:24:07<23:34:50] +[titan] 2025-10-04 23:58:24,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:58:27,069 - root - INFO - step: 2250 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7772 +[titan] 2025-10-04 23:58:27,069 - root - INFO - lr: 4.9758e-05 gnorm: 1.33 [ 1:24:18<23:34:33] +[titan] 2025-10-04 23:58:37,973 - root - INFO - step: 2255 loss: 3.2917 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3969 global_avg_mtp_loss: 2.8947 +[titan] 2025-10-04 23:58:37,973 - root - INFO - lr: 4.9757e-05 gnorm: 1.34 [ 1:24:29<23:34:16] +[titan] 2025-10-04 23:58:48,849 - root - INFO - step: 2260 loss: 3.1742 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3832 global_avg_mtp_loss: 2.7911 +[titan] 2025-10-04 23:58:48,849 - root - INFO - lr: 4.9755e-05 gnorm: 1.32 [ 1:24:40<23:33:59] +[titan] 2025-10-04 23:58:59,727 - root - INFO - step: 2265 loss: 3.1716 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3830 global_avg_mtp_loss: 2.7886 +[titan] 2025-10-04 23:58:59,727 - root - INFO - lr: 4.9754e-05 gnorm: 1.31 [ 1:24:51<23:33:42] +[titan] 2025-10-04 23:59:10,618 - root - INFO - step: 2270 loss: 3.2242 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8352 +[titan] 2025-10-04 23:59:10,618 - root - INFO - lr: 4.9753e-05 gnorm: 1.31 [ 1:25:02<23:33:25] +[titan] 2025-10-04 23:59:21,547 - root - INFO - step: 2275 loss: 3.2006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3858 global_avg_mtp_loss: 2.8148 +[titan] 2025-10-04 23:59:21,547 - root - INFO - lr: 4.9751e-05 gnorm: 1.30 [ 1:25:13<23:33:08] +[titan] 2025-10-04 23:59:32,439 - root - INFO - step: 2280 loss: 3.1251 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7491 +[titan] 2025-10-04 23:59:32,439 - root - INFO - lr: 4.9750e-05 gnorm: 1.25 [ 1:25:24<23:32:51] +[titan] 2025-10-04 23:59:43,315 - root - INFO - step: 2285 loss: 3.1971 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3843 global_avg_mtp_loss: 2.8128 +[titan] 2025-10-04 23:59:43,315 - root - INFO - lr: 4.9749e-05 gnorm: 1.24 [ 1:25:34<23:32:34] +[titan] 2025-10-04 23:59:54,211 - root - INFO - step: 2290 loss: 3.1138 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3754 global_avg_mtp_loss: 2.7384 +[titan] 2025-10-04 23:59:54,211 - root - INFO - lr: 4.9747e-05 gnorm: 1.30 [ 1:25:45<23:32:17] +[titan] 2025-10-05 00:00:05,066 - root - INFO - step: 2295 loss: 3.1381 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3802 global_avg_mtp_loss: 2.7579 +[titan] 2025-10-05 00:00:05,067 - root - INFO - lr: 4.9746e-05 gnorm: 1.34 [ 1:25:56<23:32:00] +[titan] 2025-10-05 00:00:13,755 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:00:15,940 - root - INFO - step: 2300 loss: 3.1684 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7859 +[titan] 2025-10-05 00:00:15,940 - root - INFO - lr: 4.9745e-05 gnorm: 1.25 [ 1:26:07<23:31:43] +[titan] 2025-10-05 00:00:26,871 - root - INFO - step: 2305 loss: 3.1673 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3819 global_avg_mtp_loss: 2.7853 +[titan] 2025-10-05 00:00:26,871 - root - INFO - lr: 4.9743e-05 gnorm: 1.22 [ 1:26:18<23:31:26] +[titan] 2025-10-05 00:00:37,762 - root - INFO - step: 2310 loss: 3.1531 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7719 +[titan] 2025-10-05 00:00:37,762 - root - INFO - lr: 4.9742e-05 gnorm: 1.30 [ 1:26:29<23:31:10] +[titan] 2025-10-05 00:00:48,669 - root - INFO - step: 2315 loss: 3.1583 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3795 global_avg_mtp_loss: 2.7788 +[titan] 2025-10-05 00:00:48,669 - root - INFO - lr: 4.9741e-05 gnorm: 1.22 [ 1:26:40<23:30:53] +[titan] 2025-10-05 00:00:59,522 - root - INFO - step: 2320 loss: 3.1995 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3851 global_avg_mtp_loss: 2.8144 +[titan] 2025-10-05 00:00:59,522 - root - INFO - lr: 4.9739e-05 gnorm: 1.29 [ 1:26:51<23:30:36] +[titan] 2025-10-05 00:01:10,409 - root - INFO - step: 2325 loss: 3.1550 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3800 global_avg_mtp_loss: 2.7750 +[titan] 2025-10-05 00:01:10,409 - root - INFO - lr: 4.9738e-05 gnorm: 1.29 [ 1:27:02<23:30:19] +[titan] 2025-10-05 00:01:21,286 - root - INFO - step: 2330 loss: 3.1042 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3734 global_avg_mtp_loss: 2.7308 +[titan] 2025-10-05 00:01:21,286 - root - INFO - lr: 4.9737e-05 gnorm: 1.25 [ 1:27:12<23:30:02] +[titan] 2025-10-05 00:01:32,170 - root - INFO - step: 2335 loss: 3.1428 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3775 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:01:32,170 - root - INFO - lr: 4.9735e-05 gnorm: 1.24 [ 1:27:23<23:29:45] +[titan] 2025-10-05 00:01:43,255 - root - INFO - step: 2340 loss: 3.2357 memory: 118.84GiB(85.28%) tps: 29,561 tflops: 410.12 mfu: 41.47% global_avg_ntp_loss: 0.3959 global_avg_mtp_loss: 2.8398 +[titan] 2025-10-05 00:01:43,255 - root - INFO - lr: 4.9734e-05 gnorm: 1.31 [ 1:27:34<23:29:32] +[titan] 2025-10-05 00:01:54,139 - root - INFO - step: 2345 loss: 3.2594 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3938 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:01:54,139 - root - INFO - lr: 4.9732e-05 gnorm: 1.30 [ 1:27:45<23:29:15] +[titan] 2025-10-05 00:02:02,829 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:02:05,010 - root - INFO - step: 2350 loss: 3.1385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3798 global_avg_mtp_loss: 2.7587 +[titan] 2025-10-05 00:02:05,010 - root - INFO - lr: 4.9731e-05 gnorm: 1.30 [ 1:27:56<23:28:58] +[titan] 2025-10-05 00:02:15,898 - root - INFO - step: 2355 loss: 3.1702 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3826 global_avg_mtp_loss: 2.7876 +[titan] 2025-10-05 00:02:15,898 - root - INFO - lr: 4.9730e-05 gnorm: 1.32 [ 1:28:07<23:28:41] +[titan] 2025-10-05 00:02:26,769 - root - INFO - step: 2360 loss: 3.1893 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8043 +[titan] 2025-10-05 00:02:26,769 - root - INFO - lr: 4.9728e-05 gnorm: 1.43 [ 1:28:18<23:28:24] +[titan] 2025-10-05 00:02:37,640 - root - INFO - step: 2365 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7481 +[titan] 2025-10-05 00:02:37,640 - root - INFO - lr: 4.9727e-05 gnorm: 1.39 [ 1:28:29<23:28:07] +[titan] 2025-10-05 00:02:48,598 - root - INFO - step: 2370 loss: 3.1988 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3869 global_avg_mtp_loss: 2.8118 +[titan] 2025-10-05 00:02:48,598 - root - INFO - lr: 4.9726e-05 gnorm: 1.28 [ 1:28:40<23:27:52] +[titan] 2025-10-05 00:02:59,464 - root - INFO - step: 2375 loss: 3.1613 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3814 global_avg_mtp_loss: 2.7799 +[titan] 2025-10-05 00:02:59,464 - root - INFO - lr: 4.9724e-05 gnorm: 1.31 [ 1:28:51<23:27:35] +[titan] 2025-10-05 00:03:10,332 - root - INFO - step: 2380 loss: 3.2049 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3860 global_avg_mtp_loss: 2.8189 +[titan] 2025-10-05 00:03:10,332 - root - INFO - lr: 4.9723e-05 gnorm: 1.34 [ 1:29:01<23:27:18] +[titan] 2025-10-05 00:03:21,196 - root - INFO - step: 2385 loss: 3.1936 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3835 global_avg_mtp_loss: 2.8101 +[titan] 2025-10-05 00:03:21,196 - root - INFO - lr: 4.9721e-05 gnorm: 1.30 [ 1:29:12<23:27:01] +[titan] 2025-10-05 00:03:32,051 - root - INFO - step: 2390 loss: 3.2440 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3919 global_avg_mtp_loss: 2.8521 +[titan] 2025-10-05 00:03:32,051 - root - INFO - lr: 4.9720e-05 gnorm: 1.33 [ 1:29:23<23:26:44] +[titan] 2025-10-05 00:03:42,978 - root - INFO - step: 2395 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3771 global_avg_mtp_loss: 2.7475 +[titan] 2025-10-05 00:03:42,978 - root - INFO - lr: 4.9719e-05 gnorm: 1.31 [ 1:29:34<23:26:28] +[titan] 2025-10-05 00:03:51,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:03:53,851 - root - INFO - step: 2400 loss: 3.2662 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8718 +[titan] 2025-10-05 00:03:53,851 - root - INFO - lr: 4.9717e-05 gnorm: 1.40 [ 1:29:45<23:26:12] +[titan] 2025-10-05 00:04:04,749 - root - INFO - step: 2405 loss: 3.2406 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-05 00:04:04,749 - root - INFO - lr: 4.9716e-05 gnorm: 1.38 [ 1:29:56<23:25:56] +[titan] 2025-10-05 00:04:15,630 - root - INFO - step: 2410 loss: 3.1271 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7504 +[titan] 2025-10-05 00:04:15,630 - root - INFO - lr: 4.9714e-05 gnorm: 1.27 [ 1:30:07<23:25:39] +[titan] 2025-10-05 00:04:26,491 - root - INFO - step: 2415 loss: 3.1402 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3779 global_avg_mtp_loss: 2.7623 +[titan] 2025-10-05 00:04:26,491 - root - INFO - lr: 4.9713e-05 gnorm: 1.39 [ 1:30:18<23:25:22] +[titan] 2025-10-05 00:04:37,350 - root - INFO - step: 2420 loss: 3.1746 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7924 +[titan] 2025-10-05 00:04:37,350 - root - INFO - lr: 4.9711e-05 gnorm: 1.45 [ 1:30:28<23:25:05] +[titan] 2025-10-05 00:04:48,268 - root - INFO - step: 2425 loss: 3.1765 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3831 global_avg_mtp_loss: 2.7934 +[titan] 2025-10-05 00:04:48,269 - root - INFO - lr: 4.9710e-05 gnorm: 1.42 [ 1:30:39<23:24:50] +[titan] 2025-10-05 00:04:59,129 - root - INFO - step: 2430 loss: 3.2456 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8539 +[titan] 2025-10-05 00:04:59,129 - root - INFO - lr: 4.9709e-05 gnorm: 1.29 [ 1:30:50<23:24:33] +[titan] 2025-10-05 00:05:10,040 - root - INFO - step: 2435 loss: 3.0885 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3728 global_avg_mtp_loss: 2.7157 +[titan] 2025-10-05 00:05:10,040 - root - INFO - lr: 4.9707e-05 gnorm: 1.30 [ 1:31:01<23:24:17] +[titan] 2025-10-05 00:05:20,901 - root - INFO - step: 2440 loss: 3.1883 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3839 global_avg_mtp_loss: 2.8044 +[titan] 2025-10-05 00:05:20,901 - root - INFO - lr: 4.9706e-05 gnorm: 1.29 [ 1:31:12<23:24:00] +[titan] 2025-10-05 00:05:31,767 - root - INFO - step: 2445 loss: 3.1123 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3742 global_avg_mtp_loss: 2.7381 +[titan] 2025-10-05 00:05:31,767 - root - INFO - lr: 4.9704e-05 gnorm: 1.28 [ 1:31:23<23:23:44] +[titan] 2025-10-05 00:05:40,460 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:05:42,650 - root - INFO - step: 2450 loss: 3.1786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3845 global_avg_mtp_loss: 2.7941 +[titan] 2025-10-05 00:05:42,650 - root - INFO - lr: 4.9703e-05 gnorm: 1.27 [ 1:31:34<23:23:27] +[titan] 2025-10-05 00:05:53,572 - root - INFO - step: 2455 loss: 3.1398 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3790 global_avg_mtp_loss: 2.7608 +[titan] 2025-10-05 00:05:53,573 - root - INFO - lr: 4.9701e-05 gnorm: 1.27 [ 1:31:45<23:23:12] +[titan] 2025-10-05 00:06:04,454 - root - INFO - step: 2460 loss: 3.2308 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8350 +[titan] 2025-10-05 00:06:04,455 - root - INFO - lr: 4.9700e-05 gnorm: 2.69 [ 1:31:56<23:22:55] +[titan] 2025-10-05 00:06:15,398 - root - INFO - step: 2465 loss: 3.1213 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7452 +[titan] 2025-10-05 00:06:15,398 - root - INFO - lr: 4.9698e-05 gnorm: 1.28 [ 1:32:07<23:22:40] +[titan] 2025-10-05 00:06:26,299 - root - INFO - step: 2470 loss: 3.1059 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3749 global_avg_mtp_loss: 2.7310 +[titan] 2025-10-05 00:06:26,299 - root - INFO - lr: 4.9697e-05 gnorm: 1.29 [ 1:32:17<23:22:24] +[titan] 2025-10-05 00:06:37,192 - root - INFO - step: 2475 loss: 3.1051 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3745 global_avg_mtp_loss: 2.7306 +[titan] 2025-10-05 00:06:37,192 - root - INFO - lr: 4.9696e-05 gnorm: 1.31 [ 1:32:28<23:22:08] +[titan] 2025-10-05 00:06:48,155 - root - INFO - step: 2480 loss: 3.1093 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.69 mfu: 41.93% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7359 +[titan] 2025-10-05 00:06:48,155 - root - INFO - lr: 4.9694e-05 gnorm: 1.32 [ 1:32:39<23:21:53] +[titan] 2025-10-05 00:06:59,038 - root - INFO - step: 2485 loss: 3.1283 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3767 global_avg_mtp_loss: 2.7516 +[titan] 2025-10-05 00:06:59,038 - root - INFO - lr: 4.9693e-05 gnorm: 1.34 [ 1:32:50<23:21:37] +[titan] 2025-10-05 00:07:09,901 - root - INFO - step: 2490 loss: 3.1376 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7607 +[titan] 2025-10-05 00:07:09,901 - root - INFO - lr: 4.9691e-05 gnorm: 1.34 [ 1:33:01<23:21:21] +[titan] 2025-10-05 00:07:20,803 - root - INFO - step: 2495 loss: 3.1543 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7736 +[titan] 2025-10-05 00:07:20,803 - root - INFO - lr: 4.9690e-05 gnorm: 1.36 [ 1:33:12<23:21:05] +[titan] 2025-10-05 00:07:29,528 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:07:31,706 - root - INFO - step: 2500 loss: 3.1575 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7787 +[titan] 2025-10-05 00:07:31,706 - root - INFO - lr: 4.9688e-05 gnorm: 1.31 [ 1:33:23<23:20:49] +[titan] 2025-10-05 00:07:42,568 - root - INFO - step: 2505 loss: 3.1325 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3760 global_avg_mtp_loss: 2.7566 +[titan] 2025-10-05 00:07:42,568 - root - INFO - lr: 4.9687e-05 gnorm: 1.22 [ 1:33:34<23:20:33] +[titan] 2025-10-05 00:07:53,496 - root - INFO - step: 2510 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3718 global_avg_mtp_loss: 2.7142 +[titan] 2025-10-05 00:07:53,497 - root - INFO - lr: 4.9685e-05 gnorm: 1.31 [ 1:33:45<23:20:17] +[titan] 2025-10-05 00:08:04,377 - root - INFO - step: 2515 loss: 3.2003 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3862 global_avg_mtp_loss: 2.8141 +[titan] 2025-10-05 00:08:04,378 - root - INFO - lr: 4.9684e-05 gnorm: 1.43 [ 1:33:55<23:20:01] +[titan] 2025-10-05 00:08:15,255 - root - INFO - step: 2520 loss: 3.1816 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3847 global_avg_mtp_loss: 2.7969 +[titan] 2025-10-05 00:08:15,255 - root - INFO - lr: 4.9682e-05 gnorm: 1.38 [ 1:34:06<23:19:45] +[titan] 2025-10-05 00:08:26,136 - root - INFO - step: 2525 loss: 3.2579 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:08:26,136 - root - INFO - lr: 4.9681e-05 gnorm: 1.37 [ 1:34:17<23:19:29] +[titan] 2025-10-05 00:08:37,049 - root - INFO - step: 2530 loss: 3.1078 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7346 +[titan] 2025-10-05 00:08:37,049 - root - INFO - lr: 4.9679e-05 gnorm: 1.28 [ 1:34:28<23:19:14] +[titan] 2025-10-05 00:08:48,046 - root - INFO - step: 2535 loss: 3.0953 memory: 118.84GiB(85.28%) tps: 29,797 tflops: 413.39 mfu: 41.80% global_avg_ntp_loss: 0.3719 global_avg_mtp_loss: 2.7233 +[titan] 2025-10-05 00:08:48,047 - root - INFO - lr: 4.9678e-05 gnorm: 1.25 [ 1:34:39<23:19:00] +[titan] 2025-10-05 00:08:58,919 - root - INFO - step: 2540 loss: 3.1620 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3803 global_avg_mtp_loss: 2.7817 +[titan] 2025-10-05 00:08:58,919 - root - INFO - lr: 4.9676e-05 gnorm: 1.26 [ 1:34:50<23:18:43] +[titan] 2025-10-05 00:09:09,786 - root - INFO - step: 2545 loss: 3.1667 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3815 global_avg_mtp_loss: 2.7852 +[titan] 2025-10-05 00:09:09,786 - root - INFO - lr: 4.9675e-05 gnorm: 1.40 [ 1:35:01<23:18:27] +[titan] 2025-10-05 00:09:18,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:09:20,677 - root - INFO - step: 2550 loss: 3.0790 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3704 global_avg_mtp_loss: 2.7086 +[titan] 2025-10-05 00:09:20,677 - root - INFO - lr: 4.9673e-05 gnorm: 1.34 [ 1:35:12<23:18:12] +[titan] 2025-10-05 00:09:31,556 - root - INFO - step: 2555 loss: 3.0389 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3662 global_avg_mtp_loss: 2.6727 +[titan] 2025-10-05 00:09:31,557 - root - INFO - lr: 4.9672e-05 gnorm: 1.31 [ 1:35:23<23:17:56] +[titan] 2025-10-05 00:09:42,516 - root - INFO - step: 2560 loss: 3.1285 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.3755 global_avg_mtp_loss: 2.7530 +[titan] 2025-10-05 00:09:42,516 - root - INFO - lr: 4.9670e-05 gnorm: 1.23 [ 1:35:34<23:17:41] +[titan] 2025-10-05 00:09:42,679 - root - INFO - Dumping profiler traces at step 2560 +[titan] 2025-10-05 00:09:42,716 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:09:53,687 - root - INFO - step: 2565 loss: 3.0635 memory: 118.84GiB(85.28%) tps: 29,334 tflops: 406.97 mfu: 41.15% global_avg_ntp_loss: 0.3659 global_avg_mtp_loss: 2.6976 +[titan] 2025-10-05 00:09:53,687 - root - INFO - lr: 4.9669e-05 gnorm: 1.33 [ 1:35:45<23:17:29] +[titan] 2025-10-05 00:10:04,566 - root - INFO - step: 2570 loss: 3.0420 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6772 +[titan] 2025-10-05 00:10:04,566 - root - INFO - lr: 4.9667e-05 gnorm: 1.29 [ 1:35:56<23:17:13] +[titan] 2025-10-05 00:10:15,470 - root - INFO - step: 2575 loss: 3.2085 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8213 +[titan] 2025-10-05 00:10:15,471 - root - INFO - lr: 4.9666e-05 gnorm: 1.30 [ 1:36:07<23:16:58] +[titan] 2025-10-05 00:10:26,384 - root - INFO - step: 2580 loss: 3.2105 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3855 global_avg_mtp_loss: 2.8250 +[titan] 2025-10-05 00:10:26,384 - root - INFO - lr: 4.9664e-05 gnorm: 1.29 [ 1:36:17<23:16:43] +[titan] 2025-10-05 00:10:37,260 - root - INFO - step: 2585 loss: 3.0856 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3698 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:10:37,261 - root - INFO - lr: 4.9663e-05 gnorm: 1.30 [ 1:36:28<23:16:27] +[titan] 2025-10-05 00:10:48,212 - root - INFO - step: 2590 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.3650 global_avg_mtp_loss: 2.6717 +[titan] 2025-10-05 00:10:48,212 - root - INFO - lr: 4.9661e-05 gnorm: 1.26 [ 1:36:39<23:16:12] +[titan] 2025-10-05 00:10:59,142 - root - INFO - step: 2595 loss: 3.1492 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.3799 global_avg_mtp_loss: 2.7693 +[titan] 2025-10-05 00:10:59,142 - root - INFO - lr: 4.9659e-05 gnorm: 1.24 [ 1:36:50<23:15:57] +[titan] 2025-10-05 00:11:07,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:11:10,032 - root - INFO - step: 2600 loss: 3.0911 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3727 global_avg_mtp_loss: 2.7185 +[titan] 2025-10-05 00:11:10,032 - root - INFO - lr: 4.9658e-05 gnorm: 1.22 [ 1:37:01<23:15:41] +[titan] 2025-10-05 00:11:20,915 - root - INFO - step: 2605 loss: 3.1578 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3808 global_avg_mtp_loss: 2.7770 +[titan] 2025-10-05 00:11:20,915 - root - INFO - lr: 4.9656e-05 gnorm: 1.26 [ 1:37:12<23:15:26] +[titan] 2025-10-05 00:11:31,815 - root - INFO - step: 2610 loss: 3.1088 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3741 global_avg_mtp_loss: 2.7348 +[titan] 2025-10-05 00:11:31,815 - root - INFO - lr: 4.9655e-05 gnorm: 1.25 [ 1:37:23<23:15:10] +[titan] 2025-10-05 00:11:42,699 - root - INFO - step: 2615 loss: 3.1165 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7413 +[titan] 2025-10-05 00:11:42,699 - root - INFO - lr: 4.9653e-05 gnorm: 1.30 [ 1:37:34<23:14:55] +[titan] 2025-10-05 00:11:53,594 - root - INFO - step: 2620 loss: 3.1397 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7617 +[titan] 2025-10-05 00:11:53,594 - root - INFO - lr: 4.9652e-05 gnorm: 1.27 [ 1:37:45<23:14:39] +[titan] 2025-10-05 00:12:04,505 - root - INFO - step: 2625 loss: 3.1215 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7463 +[titan] 2025-10-05 00:12:04,505 - root - INFO - lr: 4.9650e-05 gnorm: 1.33 [ 1:37:56<23:14:24] +[titan] 2025-10-05 00:12:15,389 - root - INFO - step: 2630 loss: 3.1525 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7737 +[titan] 2025-10-05 00:12:15,390 - root - INFO - lr: 4.9649e-05 gnorm: 1.27 [ 1:38:06<23:14:08] +[titan] 2025-10-05 00:12:26,270 - root - INFO - step: 2635 loss: 3.1176 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3751 global_avg_mtp_loss: 2.7424 +[titan] 2025-10-05 00:12:26,271 - root - INFO - lr: 4.9647e-05 gnorm: 1.30 [ 1:38:17<23:13:53] +[titan] 2025-10-05 00:12:37,153 - root - INFO - step: 2640 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6977 +[titan] 2025-10-05 00:12:37,153 - root - INFO - lr: 4.9645e-05 gnorm: 1.28 [ 1:38:28<23:13:37] +[titan] 2025-10-05 00:12:48,055 - root - INFO - step: 2645 loss: 3.1119 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7387 +[titan] 2025-10-05 00:12:48,055 - root - INFO - lr: 4.9644e-05 gnorm: 1.30 [ 1:38:39<23:13:22] +[titan] 2025-10-05 00:12:56,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:12:58,983 - root - INFO - step: 2650 loss: 3.0548 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6872 +[titan] 2025-10-05 00:12:58,983 - root - INFO - lr: 4.9642e-05 gnorm: 1.23 [ 1:38:50<23:13:07] +[titan] 2025-10-05 00:13:09,879 - root - INFO - step: 2655 loss: 3.0496 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3671 global_avg_mtp_loss: 2.6826 +[titan] 2025-10-05 00:13:09,879 - root - INFO - lr: 4.9641e-05 gnorm: 1.28 [ 1:39:01<23:12:52] +[titan] 2025-10-05 00:13:20,805 - root - INFO - step: 2660 loss: 3.1186 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.3759 global_avg_mtp_loss: 2.7427 +[titan] 2025-10-05 00:13:20,805 - root - INFO - lr: 4.9639e-05 gnorm: 1.25 [ 1:39:12<23:12:37] +[titan] 2025-10-05 00:13:31,679 - root - INFO - step: 2665 loss: 3.0573 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3678 global_avg_mtp_loss: 2.6895 +[titan] 2025-10-05 00:13:31,680 - root - INFO - lr: 4.9637e-05 gnorm: 1.25 [ 1:39:23<23:12:21] +[titan] 2025-10-05 00:13:42,558 - root - INFO - step: 2670 loss: 3.0570 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3663 global_avg_mtp_loss: 2.6907 +[titan] 2025-10-05 00:13:42,558 - root - INFO - lr: 4.9636e-05 gnorm: 1.26 [ 1:39:34<23:12:06] +[titan] 2025-10-05 00:13:53,472 - root - INFO - step: 2675 loss: 3.1878 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8028 +[titan] 2025-10-05 00:13:53,472 - root - INFO - lr: 4.9634e-05 gnorm: 1.31 [ 1:39:45<23:11:51] +[titan] 2025-10-05 00:14:04,364 - root - INFO - step: 2680 loss: 3.1135 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3738 global_avg_mtp_loss: 2.7397 +[titan] 2025-10-05 00:14:04,365 - root - INFO - lr: 4.9633e-05 gnorm: 1.22 [ 1:39:55<23:11:35] +[titan] 2025-10-05 00:14:15,279 - root - INFO - step: 2685 loss: 3.0010 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3606 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:14:15,279 - root - INFO - lr: 4.9631e-05 gnorm: 1.32 [ 1:40:06<23:11:20] +[titan] 2025-10-05 00:14:26,223 - root - INFO - step: 2690 loss: 3.1084 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.3737 global_avg_mtp_loss: 2.7347 +[titan] 2025-10-05 00:14:26,223 - root - INFO - lr: 4.9629e-05 gnorm: 1.28 [ 1:40:17<23:11:06] +[titan] 2025-10-05 00:14:37,114 - root - INFO - step: 2695 loss: 3.1301 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3758 global_avg_mtp_loss: 2.7543 +[titan] 2025-10-05 00:14:37,114 - root - INFO - lr: 4.9628e-05 gnorm: 1.31 [ 1:40:28<23:10:51] +[titan] 2025-10-05 00:14:45,831 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:14:48,028 - root - INFO - step: 2700 loss: 3.0874 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3690 global_avg_mtp_loss: 2.7184 +[titan] 2025-10-05 00:14:48,029 - root - INFO - lr: 4.9626e-05 gnorm: 1.38 [ 1:40:39<23:10:36] +[titan] 2025-10-05 00:14:58,931 - root - INFO - step: 2705 loss: 3.1260 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3750 global_avg_mtp_loss: 2.7509 +[titan] 2025-10-05 00:14:58,931 - root - INFO - lr: 4.9625e-05 gnorm: 1.28 [ 1:40:50<23:10:21] +[titan] 2025-10-05 00:15:09,812 - root - INFO - step: 2710 loss: 3.0477 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3643 global_avg_mtp_loss: 2.6834 +[titan] 2025-10-05 00:15:09,812 - root - INFO - lr: 4.9623e-05 gnorm: 1.29 [ 1:41:01<23:10:05] +[titan] 2025-10-05 00:15:20,681 - root - INFO - step: 2715 loss: 2.9784 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3575 global_avg_mtp_loss: 2.6209 +[titan] 2025-10-05 00:15:20,681 - root - INFO - lr: 4.9621e-05 gnorm: 1.39 [ 1:41:12<23:09:50] +[titan] 2025-10-05 00:15:31,544 - root - INFO - step: 2720 loss: 3.0989 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3712 global_avg_mtp_loss: 2.7276 +[titan] 2025-10-05 00:15:31,544 - root - INFO - lr: 4.9620e-05 gnorm: 1.28 [ 1:41:23<23:09:34] +[titan] 2025-10-05 00:15:42,481 - root - INFO - step: 2725 loss: 3.0279 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.3634 global_avg_mtp_loss: 2.6645 +[titan] 2025-10-05 00:15:42,482 - root - INFO - lr: 4.9618e-05 gnorm: 1.38 [ 1:41:34<23:09:20] +[titan] 2025-10-05 00:15:53,371 - root - INFO - step: 2730 loss: 3.0629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3682 global_avg_mtp_loss: 2.6946 +[titan] 2025-10-05 00:15:53,371 - root - INFO - lr: 4.9616e-05 gnorm: 1.27 [ 1:41:44<23:09:04] +[titan] 2025-10-05 00:16:04,250 - root - INFO - step: 2735 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6257 +[titan] 2025-10-05 00:16:04,250 - root - INFO - lr: 4.9615e-05 gnorm: 1.32 [ 1:41:55<23:08:49] +[titan] 2025-10-05 00:16:15,152 - root - INFO - step: 2740 loss: 3.0246 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6608 +[titan] 2025-10-05 00:16:15,152 - root - INFO - lr: 4.9613e-05 gnorm: 1.29 [ 1:42:06<23:08:34] +[titan] 2025-10-05 00:16:26,041 - root - INFO - step: 2745 loss: 3.1571 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7775 +[titan] 2025-10-05 00:16:26,041 - root - INFO - lr: 4.9611e-05 gnorm: 1.28 [ 1:42:17<23:08:19] +[titan] 2025-10-05 00:16:34,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:16:36,918 - root - INFO - step: 2750 loss: 3.0736 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3686 global_avg_mtp_loss: 2.7050 +[titan] 2025-10-05 00:16:36,919 - root - INFO - lr: 4.9610e-05 gnorm: 1.24 [ 1:42:28<23:08:04] +[titan] 2025-10-05 00:16:47,865 - root - INFO - step: 2755 loss: 2.9899 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6309 +[titan] 2025-10-05 00:16:47,865 - root - INFO - lr: 4.9608e-05 gnorm: 1.22 [ 1:42:39<23:07:49] +[titan] 2025-10-05 00:16:58,851 - root - INFO - step: 2760 loss: 3.0390 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.3657 global_avg_mtp_loss: 2.6733 +[titan] 2025-10-05 00:16:58,851 - root - INFO - lr: 4.9606e-05 gnorm: 1.33 [ 1:42:50<23:07:36] +[titan] 2025-10-05 00:17:09,727 - root - INFO - step: 2765 loss: 3.1133 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3740 global_avg_mtp_loss: 2.7394 +[titan] 2025-10-05 00:17:09,727 - root - INFO - lr: 4.9605e-05 gnorm: 1.30 [ 1:43:01<23:07:20] +[titan] 2025-10-05 00:17:20,607 - root - INFO - step: 2770 loss: 3.0638 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3695 global_avg_mtp_loss: 2.6943 +[titan] 2025-10-05 00:17:20,607 - root - INFO - lr: 4.9603e-05 gnorm: 1.35 [ 1:43:12<23:07:05] +[titan] 2025-10-05 00:17:31,517 - root - INFO - step: 2775 loss: 3.0938 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3716 global_avg_mtp_loss: 2.7222 +[titan] 2025-10-05 00:17:31,517 - root - INFO - lr: 4.9601e-05 gnorm: 1.26 [ 1:43:23<23:06:50] +[titan] 2025-10-05 00:17:42,399 - root - INFO - step: 2780 loss: 3.0126 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6519 +[titan] 2025-10-05 00:17:42,399 - root - INFO - lr: 4.9600e-05 gnorm: 1.30 [ 1:43:33<23:06:35] +[titan] 2025-10-05 00:17:53,331 - root - INFO - step: 2785 loss: 3.0873 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3714 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:17:53,331 - root - INFO - lr: 4.9598e-05 gnorm: 1.28 [ 1:43:44<23:06:21] +[titan] 2025-10-05 00:18:04,263 - root - INFO - step: 2790 loss: 3.0185 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.3627 global_avg_mtp_loss: 2.6559 +[titan] 2025-10-05 00:18:04,263 - root - INFO - lr: 4.9596e-05 gnorm: 1.33 [ 1:43:55<23:06:06] +[titan] 2025-10-05 00:18:15,157 - root - INFO - step: 2795 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3677 global_avg_mtp_loss: 2.6975 +[titan] 2025-10-05 00:18:15,157 - root - INFO - lr: 4.9595e-05 gnorm: 1.25 [ 1:44:06<23:05:51] +[titan] 2025-10-05 00:18:23,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:18:26,054 - root - INFO - step: 2800 loss: 3.0213 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6588 +[titan] 2025-10-05 00:18:26,054 - root - INFO - lr: 4.9593e-05 gnorm: 1.28 [ 1:44:17<23:05:37] +[titan] 2025-10-05 00:18:36,954 - root - INFO - step: 2805 loss: 3.1425 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3783 global_avg_mtp_loss: 2.7642 +[titan] 2025-10-05 00:18:36,954 - root - INFO - lr: 4.9591e-05 gnorm: 1.28 [ 1:44:28<23:05:22] +[titan] 2025-10-05 00:18:47,864 - root - INFO - step: 2810 loss: 3.0392 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3638 global_avg_mtp_loss: 2.6754 +[titan] 2025-10-05 00:18:47,864 - root - INFO - lr: 4.9590e-05 gnorm: 1.27 [ 1:44:39<23:05:07] +[titan] 2025-10-05 00:18:58,796 - root - INFO - step: 2815 loss: 3.0728 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3684 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:18:58,796 - root - INFO - lr: 4.9588e-05 gnorm: 1.28 [ 1:44:50<23:04:53] +[titan] 2025-10-05 00:19:09,768 - root - INFO - step: 2820 loss: 3.0759 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.3697 global_avg_mtp_loss: 2.7062 +[titan] 2025-10-05 00:19:09,768 - root - INFO - lr: 4.9586e-05 gnorm: 1.28 [ 1:45:01<23:04:39] +[titan] 2025-10-05 00:19:20,659 - root - INFO - step: 2825 loss: 3.0518 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3667 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:19:20,660 - root - INFO - lr: 4.9585e-05 gnorm: 1.38 [ 1:45:12<23:04:24] +[titan] 2025-10-05 00:19:31,538 - root - INFO - step: 2830 loss: 3.1035 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7302 +[titan] 2025-10-05 00:19:31,538 - root - INFO - lr: 4.9583e-05 gnorm: 1.34 [ 1:45:23<23:04:09] +[titan] 2025-10-05 00:19:42,419 - root - INFO - step: 2835 loss: 3.0685 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3683 global_avg_mtp_loss: 2.7002 +[titan] 2025-10-05 00:19:42,419 - root - INFO - lr: 4.9581e-05 gnorm: 1.37 [ 1:45:33<23:03:54] +[titan] 2025-10-05 00:19:53,306 - root - INFO - step: 2840 loss: 3.0223 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3652 global_avg_mtp_loss: 2.6571 +[titan] 2025-10-05 00:19:53,306 - root - INFO - lr: 4.9579e-05 gnorm: 1.32 [ 1:45:44<23:03:39] +[titan] 2025-10-05 00:20:04,219 - root - INFO - step: 2845 loss: 3.0274 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3633 global_avg_mtp_loss: 2.6641 +[titan] 2025-10-05 00:20:04,219 - root - INFO - lr: 4.9578e-05 gnorm: 1.28 [ 1:45:55<23:03:25] +[titan] 2025-10-05 00:20:12,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:20:15,145 - root - INFO - step: 2850 loss: 3.0430 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3661 global_avg_mtp_loss: 2.6770 +[titan] 2025-10-05 00:20:15,145 - root - INFO - lr: 4.9576e-05 gnorm: 1.26 [ 1:46:06<23:03:10] +[titan] 2025-10-05 00:20:26,027 - root - INFO - step: 2855 loss: 3.0893 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7209 +[titan] 2025-10-05 00:20:26,027 - root - INFO - lr: 4.9574e-05 gnorm: 1.27 [ 1:46:17<23:02:55] +[titan] 2025-10-05 00:20:36,904 - root - INFO - step: 2860 loss: 3.0960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3724 global_avg_mtp_loss: 2.7236 +[titan] 2025-10-05 00:20:36,904 - root - INFO - lr: 4.9573e-05 gnorm: 1.28 [ 1:46:28<23:02:40] +[titan] 2025-10-05 00:20:47,806 - root - INFO - step: 2865 loss: 3.1434 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:20:47,806 - root - INFO - lr: 4.9571e-05 gnorm: 1.30 [ 1:46:39<23:02:26] +[titan] 2025-10-05 00:20:58,761 - root - INFO - step: 2870 loss: 2.9969 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.99 mfu: 41.96% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:20:58,761 - root - INFO - lr: 4.9569e-05 gnorm: 1.30 [ 1:46:50<23:02:12] +[titan] 2025-10-05 00:21:09,643 - root - INFO - step: 2875 loss: 3.0232 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3635 global_avg_mtp_loss: 2.6597 +[titan] 2025-10-05 00:21:09,643 - root - INFO - lr: 4.9567e-05 gnorm: 1.30 [ 1:47:01<23:01:57] +[titan] 2025-10-05 00:21:20,548 - root - INFO - step: 2880 loss: 2.9737 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3570 global_avg_mtp_loss: 2.6167 +[titan] 2025-10-05 00:21:20,548 - root - INFO - lr: 4.9566e-05 gnorm: 1.28 [ 1:47:12<23:01:42] +[titan] 2025-10-05 00:21:31,529 - root - INFO - step: 2885 loss: 3.0875 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.3720 global_avg_mtp_loss: 2.7155 +[titan] 2025-10-05 00:21:31,530 - root - INFO - lr: 4.9564e-05 gnorm: 1.25 [ 1:47:23<23:01:29] +[titan] 2025-10-05 00:21:42,407 - root - INFO - step: 2890 loss: 3.0347 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6705 +[titan] 2025-10-05 00:21:42,407 - root - INFO - lr: 4.9562e-05 gnorm: 1.38 [ 1:47:33<23:01:14] +[titan] 2025-10-05 00:21:53,280 - root - INFO - step: 2895 loss: 3.0145 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3610 global_avg_mtp_loss: 2.6535 +[titan] 2025-10-05 00:21:53,280 - root - INFO - lr: 4.9560e-05 gnorm: 1.22 [ 1:47:44<23:00:59] +[titan] 2025-10-05 00:22:02,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:22:04,199 - root - INFO - step: 2900 loss: 3.1605 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-05 00:22:04,199 - root - INFO - lr: 4.9559e-05 gnorm: 1.35 [ 1:47:55<23:00:45] +[titan] 2025-10-05 00:22:15,084 - root - INFO - step: 2905 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3703 global_avg_mtp_loss: 2.7158 +[titan] 2025-10-05 00:22:15,084 - root - INFO - lr: 4.9557e-05 gnorm: 1.29 [ 1:48:06<23:00:30] +[titan] 2025-10-05 00:22:25,962 - root - INFO - step: 2910 loss: 3.0022 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6423 +[titan] 2025-10-05 00:22:25,962 - root - INFO - lr: 4.9555e-05 gnorm: 1.31 [ 1:48:17<23:00:15] +[titan] 2025-10-05 00:22:36,871 - root - INFO - step: 2915 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3797 global_avg_mtp_loss: 2.7783 +[titan] 2025-10-05 00:22:36,871 - root - INFO - lr: 4.9553e-05 gnorm: 1.42 [ 1:48:28<23:00:01] +[titan] 2025-10-05 00:22:47,815 - root - INFO - step: 2920 loss: 3.0326 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3653 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:22:47,815 - root - INFO - lr: 4.9552e-05 gnorm: 1.30 [ 1:48:39<22:59:47] +[titan] 2025-10-05 00:22:58,703 - root - INFO - step: 2925 loss: 3.0724 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3681 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:22:58,703 - root - INFO - lr: 4.9550e-05 gnorm: 1.34 [ 1:48:50<22:59:32] +[titan] 2025-10-05 00:23:09,632 - root - INFO - step: 2930 loss: 3.0482 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6818 +[titan] 2025-10-05 00:23:09,632 - root - INFO - lr: 4.9548e-05 gnorm: 1.23 [ 1:49:01<22:59:18] +[titan] 2025-10-05 00:23:20,517 - root - INFO - step: 2935 loss: 2.9200 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5708 +[titan] 2025-10-05 00:23:20,517 - root - INFO - lr: 4.9546e-05 gnorm: 1.28 [ 1:49:12<22:59:03] +[titan] 2025-10-05 00:23:31,391 - root - INFO - step: 2940 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6615 +[titan] 2025-10-05 00:23:31,391 - root - INFO - lr: 4.9544e-05 gnorm: 1.25 [ 1:49:22<22:58:48] +[titan] 2025-10-05 00:23:42,322 - root - INFO - step: 2945 loss: 3.1473 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7704 +[titan] 2025-10-05 00:23:42,322 - root - INFO - lr: 4.9543e-05 gnorm: 1.35 [ 1:49:33<22:58:34] +[titan] 2025-10-05 00:23:51,004 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:23:53,182 - root - INFO - step: 2950 loss: 3.0250 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6620 +[titan] 2025-10-05 00:23:53,183 - root - INFO - lr: 4.9541e-05 gnorm: 1.26 [ 1:49:44<22:58:19] +[titan] 2025-10-05 00:24:04,100 - root - INFO - step: 2955 loss: 2.9887 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3579 global_avg_mtp_loss: 2.6308 +[titan] 2025-10-05 00:24:04,100 - root - INFO - lr: 4.9539e-05 gnorm: 1.32 [ 1:49:55<22:58:05] +[titan] 2025-10-05 00:24:14,957 - root - INFO - step: 2960 loss: 2.9752 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6188 +[titan] 2025-10-05 00:24:14,957 - root - INFO - lr: 4.9537e-05 gnorm: 1.29 [ 1:50:06<22:57:50] +[titan] 2025-10-05 00:24:25,824 - root - INFO - step: 2965 loss: 3.0670 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3670 global_avg_mtp_loss: 2.7000 +[titan] 2025-10-05 00:24:25,824 - root - INFO - lr: 4.9535e-05 gnorm: 1.36 [ 1:50:17<22:57:35] +[titan] 2025-10-05 00:24:36,677 - root - INFO - step: 2970 loss: 3.0105 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3605 global_avg_mtp_loss: 2.6500 +[titan] 2025-10-05 00:24:36,677 - root - INFO - lr: 4.9534e-05 gnorm: 1.28 [ 1:50:28<22:57:20] +[titan] 2025-10-05 00:24:47,550 - root - INFO - step: 2975 loss: 3.0798 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7113 +[titan] 2025-10-05 00:24:47,550 - root - INFO - lr: 4.9532e-05 gnorm: 1.26 [ 1:50:39<22:57:06] +[titan] 2025-10-05 00:24:58,508 - root - INFO - step: 2980 loss: 3.0933 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7212 +[titan] 2025-10-05 00:24:58,508 - root - INFO - lr: 4.9530e-05 gnorm: 1.34 [ 1:50:50<22:56:52] +[titan] 2025-10-05 00:25:09,436 - root - INFO - step: 2985 loss: 2.9918 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6318 +[titan] 2025-10-05 00:25:09,436 - root - INFO - lr: 4.9528e-05 gnorm: 1.29 [ 1:51:00<22:56:38] +[titan] 2025-10-05 00:25:20,336 - root - INFO - step: 2990 loss: 3.0864 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3699 global_avg_mtp_loss: 2.7165 +[titan] 2025-10-05 00:25:20,336 - root - INFO - lr: 4.9526e-05 gnorm: 1.30 [ 1:51:11<22:56:24] +[titan] 2025-10-05 00:25:31,210 - root - INFO - step: 2995 loss: 3.0152 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3613 global_avg_mtp_loss: 2.6538 +[titan] 2025-10-05 00:25:31,210 - root - INFO - lr: 4.9525e-05 gnorm: 1.34 [ 1:51:22<22:56:09] +[titan] 2025-10-05 00:25:39,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:25:42,077 - root - INFO - step: 3000 loss: 2.9639 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6087 +[titan] 2025-10-05 00:25:42,077 - root - INFO - lr: 4.9523e-05 gnorm: 1.20 [ 1:51:33<22:55:54] +[titan] 2025-10-05 00:25:52,956 - root - INFO - step: 3005 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6274 +[titan] 2025-10-05 00:25:52,956 - root - INFO - lr: 4.9521e-05 gnorm: 1.25 [ 1:51:44<22:55:40] +[titan] 2025-10-05 00:26:03,943 - root - INFO - step: 3010 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:26:03,943 - root - INFO - lr: 4.9519e-05 gnorm: 1.25 [ 1:51:55<22:55:26] +[titan] 2025-10-05 00:26:14,799 - root - INFO - step: 3015 loss: 2.9622 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6081 +[titan] 2025-10-05 00:26:14,799 - root - INFO - lr: 4.9517e-05 gnorm: 1.20 [ 1:52:06<22:55:12] +[titan] 2025-10-05 00:26:25,658 - root - INFO - step: 3020 loss: 3.1014 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7293 +[titan] 2025-10-05 00:26:25,658 - root - INFO - lr: 4.9515e-05 gnorm: 1.29 [ 1:52:17<22:54:57] +[titan] 2025-10-05 00:26:36,501 - root - INFO - step: 3025 loss: 3.0035 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.3588 global_avg_mtp_loss: 2.6447 +[titan] 2025-10-05 00:26:36,501 - root - INFO - lr: 4.9514e-05 gnorm: 1.22 [ 1:52:28<22:54:42] +[titan] 2025-10-05 00:26:47,370 - root - INFO - step: 3030 loss: 2.9868 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3589 global_avg_mtp_loss: 2.6279 +[titan] 2025-10-05 00:26:47,370 - root - INFO - lr: 4.9512e-05 gnorm: 1.28 [ 1:52:38<22:54:27] +[titan] 2025-10-05 00:26:58,255 - root - INFO - step: 3035 loss: 3.0690 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.7021 +[titan] 2025-10-05 00:26:58,255 - root - INFO - lr: 4.9510e-05 gnorm: 1.29 [ 1:52:49<22:54:13] +[titan] 2025-10-05 00:27:09,176 - root - INFO - step: 3040 loss: 2.9415 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5893 +[titan] 2025-10-05 00:27:09,176 - root - INFO - lr: 4.9508e-05 gnorm: 1.23 [ 1:53:00<22:53:59] +[titan] 2025-10-05 00:27:20,081 - root - INFO - step: 3045 loss: 2.9565 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.6029 +[titan] 2025-10-05 00:27:20,081 - root - INFO - lr: 4.9506e-05 gnorm: 1.31 [ 1:53:11<22:53:45] +[titan] 2025-10-05 00:27:28,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:27:30,926 - root - INFO - step: 3050 loss: 3.0382 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.6713 +[titan] 2025-10-05 00:27:30,926 - root - INFO - lr: 4.9504e-05 gnorm: 1.32 [ 1:53:22<22:53:30] +[titan] 2025-10-05 00:27:41,788 - root - INFO - step: 3055 loss: 2.9038 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5556 +[titan] 2025-10-05 00:27:41,788 - root - INFO - lr: 4.9502e-05 gnorm: 1.27 [ 1:53:33<22:53:15] +[titan] 2025-10-05 00:27:52,674 - root - INFO - step: 3060 loss: 3.0259 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3639 global_avg_mtp_loss: 2.6619 +[titan] 2025-10-05 00:27:52,674 - root - INFO - lr: 4.9501e-05 gnorm: 1.32 [ 1:53:44<22:53:01] +[titan] 2025-10-05 00:28:03,564 - root - INFO - step: 3065 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6732 +[titan] 2025-10-05 00:28:03,564 - root - INFO - lr: 4.9499e-05 gnorm: 1.39 [ 1:53:55<22:52:47] +[titan] 2025-10-05 00:28:14,505 - root - INFO - step: 3070 loss: 2.9931 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.3595 global_avg_mtp_loss: 2.6336 +[titan] 2025-10-05 00:28:14,505 - root - INFO - lr: 4.9497e-05 gnorm: 1.46 [ 1:54:06<22:52:33] +[titan] 2025-10-05 00:28:19,015 - root - INFO - Dumping profiler traces at step 3072 +[titan] 2025-10-05 00:28:19,051 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:28:25,635 - root - INFO - step: 3075 loss: 2.9714 memory: 118.84GiB(85.28%) tps: 29,442 tflops: 408.46 mfu: 41.30% global_avg_ntp_loss: 0.3583 global_avg_mtp_loss: 2.6131 +[titan] 2025-10-05 00:28:25,635 - root - INFO - lr: 4.9495e-05 gnorm: 1.38 [ 1:54:17<22:52:21] +[titan] 2025-10-05 00:28:36,484 - root - INFO - step: 3080 loss: 3.0383 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3644 global_avg_mtp_loss: 2.6739 +[titan] 2025-10-05 00:28:36,484 - root - INFO - lr: 4.9493e-05 gnorm: 1.27 [ 1:54:28<22:52:07] +[titan] 2025-10-05 00:28:47,350 - root - INFO - step: 3085 loss: 3.0016 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:28:47,350 - root - INFO - lr: 4.9491e-05 gnorm: 1.28 [ 1:54:38<22:51:52] +[titan] 2025-10-05 00:28:58,198 - root - INFO - step: 3090 loss: 2.8733 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 00:28:58,198 - root - INFO - lr: 4.9489e-05 gnorm: 1.28 [ 1:54:49<22:51:37] +[titan] 2025-10-05 00:29:09,096 - root - INFO - step: 3095 loss: 3.0415 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6767 +[titan] 2025-10-05 00:29:09,096 - root - INFO - lr: 4.9487e-05 gnorm: 1.33 [ 1:55:00<22:51:23] +[titan] 2025-10-05 00:29:17,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:29:19,960 - root - INFO - step: 3100 loss: 2.9482 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.5947 +[titan] 2025-10-05 00:29:19,960 - root - INFO - lr: 4.9485e-05 gnorm: 1.33 [ 1:55:11<22:51:09] +[titan] 2025-10-05 00:29:30,867 - root - INFO - step: 3105 loss: 2.9859 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6283 +[titan] 2025-10-05 00:29:30,868 - root - INFO - lr: 4.9484e-05 gnorm: 1.27 [ 1:55:22<22:50:55] +[titan] 2025-10-05 00:29:41,783 - root - INFO - step: 3110 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6306 +[titan] 2025-10-05 00:29:41,783 - root - INFO - lr: 4.9482e-05 gnorm: 1.30 [ 1:55:33<22:50:41] +[titan] 2025-10-05 00:29:52,657 - root - INFO - step: 3115 loss: 2.9941 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6352 +[titan] 2025-10-05 00:29:52,657 - root - INFO - lr: 4.9480e-05 gnorm: 1.24 [ 1:55:44<22:50:26] +[titan] 2025-10-05 00:30:03,529 - root - INFO - step: 3120 loss: 3.0041 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3611 global_avg_mtp_loss: 2.6431 +[titan] 2025-10-05 00:30:03,529 - root - INFO - lr: 4.9478e-05 gnorm: 1.22 [ 1:55:55<22:50:12] +[titan] 2025-10-05 00:30:14,438 - root - INFO - step: 3125 loss: 2.9712 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6162 +[titan] 2025-10-05 00:30:14,438 - root - INFO - lr: 4.9476e-05 gnorm: 1.28 [ 1:56:05<22:49:58] +[titan] 2025-10-05 00:30:25,289 - root - INFO - step: 3130 loss: 2.9425 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:30:25,289 - root - INFO - lr: 4.9474e-05 gnorm: 1.28 [ 1:56:16<22:49:43] +[titan] 2025-10-05 00:30:36,160 - root - INFO - step: 3135 loss: 3.0775 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3696 global_avg_mtp_loss: 2.7079 +[titan] 2025-10-05 00:30:36,160 - root - INFO - lr: 4.9472e-05 gnorm: 1.26 [ 1:56:27<22:49:29] +[titan] 2025-10-05 00:30:47,054 - root - INFO - step: 3140 loss: 3.0122 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6492 +[titan] 2025-10-05 00:30:47,054 - root - INFO - lr: 4.9470e-05 gnorm: 1.22 [ 1:56:38<22:49:15] +[titan] 2025-10-05 00:30:57,914 - root - INFO - step: 3145 loss: 3.0169 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6560 +[titan] 2025-10-05 00:30:57,914 - root - INFO - lr: 4.9468e-05 gnorm: 1.27 [ 1:56:49<22:49:01] +[titan] 2025-10-05 00:31:06,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:31:08,816 - root - INFO - step: 3150 loss: 2.9327 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3545 global_avg_mtp_loss: 2.5782 +[titan] 2025-10-05 00:31:08,816 - root - INFO - lr: 4.9466e-05 gnorm: 1.26 [ 1:57:00<22:48:47] +[titan] 2025-10-05 00:31:19,715 - root - INFO - step: 3155 loss: 3.0434 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3631 global_avg_mtp_loss: 2.6803 +[titan] 2025-10-05 00:31:19,715 - root - INFO - lr: 4.9464e-05 gnorm: 1.33 [ 1:57:11<22:48:33] +[titan] 2025-10-05 00:31:30,598 - root - INFO - step: 3160 loss: 2.9152 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5676 +[titan] 2025-10-05 00:31:30,598 - root - INFO - lr: 4.9462e-05 gnorm: 1.28 [ 1:57:22<22:48:18] +[titan] 2025-10-05 00:31:41,469 - root - INFO - step: 3165 loss: 3.0228 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6602 +[titan] 2025-10-05 00:31:41,469 - root - INFO - lr: 4.9460e-05 gnorm: 1.32 [ 1:57:33<22:48:04] +[titan] 2025-10-05 00:31:52,401 - root - INFO - step: 3170 loss: 2.9954 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:31:52,401 - root - INFO - lr: 4.9459e-05 gnorm: 1.39 [ 1:57:43<22:47:50] +[titan] 2025-10-05 00:32:03,274 - root - INFO - step: 3175 loss: 2.9805 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.6231 +[titan] 2025-10-05 00:32:03,274 - root - INFO - lr: 4.9457e-05 gnorm: 1.26 [ 1:57:54<22:47:36] +[titan] 2025-10-05 00:32:14,178 - root - INFO - step: 3180 loss: 3.0141 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3598 global_avg_mtp_loss: 2.6543 +[titan] 2025-10-05 00:32:14,178 - root - INFO - lr: 4.9455e-05 gnorm: 1.31 [ 1:58:05<22:47:22] +[titan] 2025-10-05 00:32:25,055 - root - INFO - step: 3185 loss: 3.0493 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3641 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:32:25,055 - root - INFO - lr: 4.9453e-05 gnorm: 1.37 [ 1:58:16<22:47:08] +[titan] 2025-10-05 00:32:35,936 - root - INFO - step: 3190 loss: 2.9654 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6093 +[titan] 2025-10-05 00:32:35,936 - root - INFO - lr: 4.9451e-05 gnorm: 1.29 [ 1:58:27<22:46:54] +[titan] 2025-10-05 00:32:46,815 - root - INFO - step: 3195 loss: 2.9889 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3566 global_avg_mtp_loss: 2.6323 +[titan] 2025-10-05 00:32:46,815 - root - INFO - lr: 4.9449e-05 gnorm: 1.28 [ 1:58:38<22:46:40] +[titan] 2025-10-05 00:32:55,521 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:32:57,705 - root - INFO - step: 3200 loss: 2.9502 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.5953 +[titan] 2025-10-05 00:32:57,705 - root - INFO - lr: 4.9447e-05 gnorm: 1.30 [ 1:58:49<22:46:26] +[titan] 2025-10-05 00:33:08,681 - root - INFO - step: 3205 loss: 2.9709 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.22 mfu: 41.88% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6155 +[titan] 2025-10-05 00:33:08,681 - root - INFO - lr: 4.9445e-05 gnorm: 1.23 [ 1:59:00<22:46:13] +[titan] 2025-10-05 00:33:19,557 - root - INFO - step: 3210 loss: 2.9185 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5693 +[titan] 2025-10-05 00:33:19,558 - root - INFO - lr: 4.9443e-05 gnorm: 1.28 [ 1:59:11<22:45:59] +[titan] 2025-10-05 00:33:30,432 - root - INFO - step: 3215 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3514 global_avg_mtp_loss: 2.5956 +[titan] 2025-10-05 00:33:30,432 - root - INFO - lr: 4.9441e-05 gnorm: 1.39 [ 1:59:21<22:45:44] +[titan] 2025-10-05 00:33:41,300 - root - INFO - step: 3220 loss: 3.0300 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3628 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:33:41,300 - root - INFO - lr: 4.9439e-05 gnorm: 1.32 [ 1:59:32<22:45:30] +[titan] 2025-10-05 00:33:52,166 - root - INFO - step: 3225 loss: 3.0123 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6515 +[titan] 2025-10-05 00:33:52,166 - root - INFO - lr: 4.9437e-05 gnorm: 1.29 [ 1:59:43<22:45:16] +[titan] 2025-10-05 00:34:03,015 - root - INFO - step: 3230 loss: 3.0282 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3622 global_avg_mtp_loss: 2.6660 +[titan] 2025-10-05 00:34:03,015 - root - INFO - lr: 4.9435e-05 gnorm: 1.29 [ 1:59:54<22:45:02] +[titan] 2025-10-05 00:34:13,972 - root - INFO - step: 3235 loss: 3.0440 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3646 global_avg_mtp_loss: 2.6794 +[titan] 2025-10-05 00:34:13,973 - root - INFO - lr: 4.9433e-05 gnorm: 1.27 [ 2:00:05<22:44:48] +[titan] 2025-10-05 00:34:24,817 - root - INFO - step: 3240 loss: 2.9616 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:34:24,817 - root - INFO - lr: 4.9431e-05 gnorm: 1.21 [ 2:00:16<22:44:34] +[titan] 2025-10-05 00:34:35,664 - root - INFO - step: 3245 loss: 3.0402 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6760 +[titan] 2025-10-05 00:34:35,665 - root - INFO - lr: 4.9429e-05 gnorm: 1.23 [ 2:00:27<22:44:19] +[titan] 2025-10-05 00:34:44,349 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:34:46,540 - root - INFO - step: 3250 loss: 3.0298 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3617 global_avg_mtp_loss: 2.6681 +[titan] 2025-10-05 00:34:46,540 - root - INFO - lr: 4.9427e-05 gnorm: 1.26 [ 2:00:38<22:44:05] +[titan] 2025-10-05 00:34:57,421 - root - INFO - step: 3255 loss: 2.9633 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6078 +[titan] 2025-10-05 00:34:57,421 - root - INFO - lr: 4.9425e-05 gnorm: 1.31 [ 2:00:48<22:43:51] +[titan] 2025-10-05 00:35:08,296 - root - INFO - step: 3260 loss: 2.9911 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6340 +[titan] 2025-10-05 00:35:08,296 - root - INFO - lr: 4.9423e-05 gnorm: 1.27 [ 2:00:59<22:43:37] +[titan] 2025-10-05 00:35:19,241 - root - INFO - step: 3265 loss: 2.9592 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6039 +[titan] 2025-10-05 00:35:19,242 - root - INFO - lr: 4.9421e-05 gnorm: 1.30 [ 2:01:10<22:43:24] +[titan] 2025-10-05 00:35:30,115 - root - INFO - step: 3270 loss: 2.9685 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6125 +[titan] 2025-10-05 00:35:30,115 - root - INFO - lr: 4.9419e-05 gnorm: 1.33 [ 2:01:21<22:43:10] +[titan] 2025-10-05 00:35:40,981 - root - INFO - step: 3275 loss: 3.0649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3675 global_avg_mtp_loss: 2.6974 +[titan] 2025-10-05 00:35:40,981 - root - INFO - lr: 4.9417e-05 gnorm: 1.36 [ 2:01:32<22:42:56] +[titan] 2025-10-05 00:35:51,879 - root - INFO - step: 3280 loss: 2.9994 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6401 +[titan] 2025-10-05 00:35:51,879 - root - INFO - lr: 4.9415e-05 gnorm: 1.31 [ 2:01:43<22:42:42] +[titan] 2025-10-05 00:36:02,779 - root - INFO - step: 3285 loss: 2.9516 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3524 global_avg_mtp_loss: 2.5992 +[titan] 2025-10-05 00:36:02,779 - root - INFO - lr: 4.9413e-05 gnorm: 1.24 [ 2:01:54<22:42:28] +[titan] 2025-10-05 00:36:13,718 - root - INFO - step: 3290 loss: 3.0135 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6533 +[titan] 2025-10-05 00:36:13,718 - root - INFO - lr: 4.9411e-05 gnorm: 1.32 [ 2:02:05<22:42:15] +[titan] 2025-10-05 00:36:24,612 - root - INFO - step: 3295 loss: 2.9374 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3515 global_avg_mtp_loss: 2.5859 +[titan] 2025-10-05 00:36:24,613 - root - INFO - lr: 4.9409e-05 gnorm: 1.30 [ 2:02:16<22:42:01] +[titan] 2025-10-05 00:36:33,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:36:35,569 - root - INFO - step: 3300 loss: 3.0216 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6591 +[titan] 2025-10-05 00:36:35,570 - root - INFO - lr: 4.9407e-05 gnorm: 1.29 [ 2:02:27<22:41:48] +[titan] 2025-10-05 00:36:46,479 - root - INFO - step: 3305 loss: 2.9748 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6193 +[titan] 2025-10-05 00:36:46,480 - root - INFO - lr: 4.9405e-05 gnorm: 1.29 [ 2:02:38<22:41:35] +[titan] 2025-10-05 00:36:57,349 - root - INFO - step: 3310 loss: 2.9636 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3540 global_avg_mtp_loss: 2.6095 +[titan] 2025-10-05 00:36:57,350 - root - INFO - lr: 4.9403e-05 gnorm: 1.18 [ 2:02:48<22:41:20] +[titan] 2025-10-05 00:37:08,233 - root - INFO - step: 3315 loss: 2.9774 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6224 +[titan] 2025-10-05 00:37:08,233 - root - INFO - lr: 4.9401e-05 gnorm: 1.24 [ 2:02:59<22:41:07] +[titan] 2025-10-05 00:37:19,133 - root - INFO - step: 3320 loss: 2.9377 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:37:19,133 - root - INFO - lr: 4.9399e-05 gnorm: 1.24 [ 2:03:10<22:40:53] +[titan] 2025-10-05 00:37:29,998 - root - INFO - step: 3325 loss: 2.8934 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:37:29,999 - root - INFO - lr: 4.9397e-05 gnorm: 1.31 [ 2:03:21<22:40:39] +[titan] 2025-10-05 00:37:40,921 - root - INFO - step: 3330 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3596 global_avg_mtp_loss: 2.6407 +[titan] 2025-10-05 00:37:40,921 - root - INFO - lr: 4.9395e-05 gnorm: 1.29 [ 2:03:32<22:40:25] +[titan] 2025-10-05 00:37:51,784 - root - INFO - step: 3335 loss: 2.9450 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5927 +[titan] 2025-10-05 00:37:51,784 - root - INFO - lr: 4.9392e-05 gnorm: 1.29 [ 2:03:43<22:40:11] +[titan] 2025-10-05 00:38:02,640 - root - INFO - step: 3340 loss: 2.9243 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3497 global_avg_mtp_loss: 2.5746 +[titan] 2025-10-05 00:38:02,640 - root - INFO - lr: 4.9390e-05 gnorm: 1.24 [ 2:03:54<22:39:57] +[titan] 2025-10-05 00:38:13,559 - root - INFO - step: 3345 loss: 2.9258 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5767 +[titan] 2025-10-05 00:38:13,559 - root - INFO - lr: 4.9388e-05 gnorm: 1.32 [ 2:04:05<22:39:44] +[titan] 2025-10-05 00:38:22,253 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:38:24,447 - root - INFO - step: 3350 loss: 2.9893 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3573 global_avg_mtp_loss: 2.6320 +[titan] 2025-10-05 00:38:24,447 - root - INFO - lr: 4.9386e-05 gnorm: 1.23 [ 2:04:15<22:39:30] +[titan] 2025-10-05 00:38:35,319 - root - INFO - step: 3355 loss: 2.8550 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3405 global_avg_mtp_loss: 2.5146 +[titan] 2025-10-05 00:38:35,319 - root - INFO - lr: 4.9384e-05 gnorm: 1.25 [ 2:04:26<22:39:16] +[titan] 2025-10-05 00:38:46,199 - root - INFO - step: 3360 loss: 2.8891 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:38:46,199 - root - INFO - lr: 4.9382e-05 gnorm: 1.31 [ 2:04:37<22:39:02] +[titan] 2025-10-05 00:38:57,161 - root - INFO - step: 3365 loss: 2.9521 memory: 118.84GiB(85.28%) tps: 29,893 tflops: 414.72 mfu: 41.93% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.5991 +[titan] 2025-10-05 00:38:57,161 - root - INFO - lr: 4.9380e-05 gnorm: 1.25 [ 2:04:48<22:38:49] +[titan] 2025-10-05 00:39:08,046 - root - INFO - step: 3370 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.5919 +[titan] 2025-10-05 00:39:08,046 - root - INFO - lr: 4.9378e-05 gnorm: 1.32 [ 2:04:59<22:38:36] +[titan] 2025-10-05 00:39:18,937 - root - INFO - step: 3375 loss: 2.9184 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5694 +[titan] 2025-10-05 00:39:18,937 - root - INFO - lr: 4.9376e-05 gnorm: 1.25 [ 2:05:10<22:38:22] +[titan] 2025-10-05 00:39:29,827 - root - INFO - step: 3380 loss: 2.9621 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6080 +[titan] 2025-10-05 00:39:29,827 - root - INFO - lr: 4.9374e-05 gnorm: 1.24 [ 2:05:21<22:38:08] +[titan] 2025-10-05 00:39:40,719 - root - INFO - step: 3385 loss: 2.9011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3465 global_avg_mtp_loss: 2.5547 +[titan] 2025-10-05 00:39:40,719 - root - INFO - lr: 4.9372e-05 gnorm: 1.22 [ 2:05:32<22:37:54] +[titan] 2025-10-05 00:39:51,594 - root - INFO - step: 3390 loss: 2.9910 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6324 +[titan] 2025-10-05 00:39:51,595 - root - INFO - lr: 4.9370e-05 gnorm: 1.24 [ 2:05:43<22:37:41] +[titan] 2025-10-05 00:40:02,576 - root - INFO - step: 3395 loss: 2.9436 memory: 118.84GiB(85.28%) tps: 29,839 tflops: 413.97 mfu: 41.86% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5907 +[titan] 2025-10-05 00:40:02,577 - root - INFO - lr: 4.9367e-05 gnorm: 1.26 [ 2:05:54<22:37:28] +[titan] 2025-10-05 00:40:11,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:40:13,489 - root - INFO - step: 3400 loss: 2.9838 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3569 global_avg_mtp_loss: 2.6269 +[titan] 2025-10-05 00:40:13,489 - root - INFO - lr: 4.9365e-05 gnorm: 1.27 [ 2:06:05<22:37:15] +[titan] 2025-10-05 00:40:24,371 - root - INFO - step: 3405 loss: 3.0515 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3666 global_avg_mtp_loss: 2.6849 +[titan] 2025-10-05 00:40:24,371 - root - INFO - lr: 4.9363e-05 gnorm: 1.23 [ 2:06:15<22:37:01] +[titan] 2025-10-05 00:40:35,244 - root - INFO - step: 3410 loss: 2.9631 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3556 global_avg_mtp_loss: 2.6074 +[titan] 2025-10-05 00:40:35,244 - root - INFO - lr: 4.9361e-05 gnorm: 1.28 [ 2:06:26<22:36:47] +[titan] 2025-10-05 00:40:46,133 - root - INFO - step: 3415 loss: 2.9578 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3547 global_avg_mtp_loss: 2.6032 +[titan] 2025-10-05 00:40:46,133 - root - INFO - lr: 4.9359e-05 gnorm: 1.23 [ 2:06:37<22:36:33] +[titan] 2025-10-05 00:40:57,009 - root - INFO - step: 3420 loss: 2.9329 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3498 global_avg_mtp_loss: 2.5832 +[titan] 2025-10-05 00:40:57,009 - root - INFO - lr: 4.9357e-05 gnorm: 1.19 [ 2:06:48<22:36:20] +[titan] 2025-10-05 00:41:07,937 - root - INFO - step: 3425 loss: 2.9564 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.6041 +[titan] 2025-10-05 00:41:07,937 - root - INFO - lr: 4.9355e-05 gnorm: 1.27 [ 2:06:59<22:36:06] +[titan] 2025-10-05 00:41:18,921 - root - INFO - step: 3430 loss: 2.9729 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.88 mfu: 41.85% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6166 +[titan] 2025-10-05 00:41:18,921 - root - INFO - lr: 4.9353e-05 gnorm: 1.26 [ 2:07:10<22:35:54] +[titan] 2025-10-05 00:41:29,788 - root - INFO - step: 3435 loss: 2.9570 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3535 global_avg_mtp_loss: 2.6035 +[titan] 2025-10-05 00:41:29,789 - root - INFO - lr: 4.9351e-05 gnorm: 1.30 [ 2:07:21<22:35:40] +[titan] 2025-10-05 00:41:40,636 - root - INFO - step: 3440 loss: 2.9121 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.3473 global_avg_mtp_loss: 2.5649 +[titan] 2025-10-05 00:41:40,637 - root - INFO - lr: 4.9348e-05 gnorm: 1.25 [ 2:07:32<22:35:26] +[titan] 2025-10-05 00:41:51,497 - root - INFO - step: 3445 loss: 2.9720 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3548 global_avg_mtp_loss: 2.6172 +[titan] 2025-10-05 00:41:51,498 - root - INFO - lr: 4.9346e-05 gnorm: 1.24 [ 2:07:43<22:35:12] +[titan] 2025-10-05 00:42:00,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:42:02,373 - root - INFO - step: 3450 loss: 3.0025 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:42:02,373 - root - INFO - lr: 4.9344e-05 gnorm: 1.40 [ 2:07:53<22:34:58] +[titan] 2025-10-05 00:42:13,236 - root - INFO - step: 3455 loss: 2.8984 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5533 +[titan] 2025-10-05 00:42:13,236 - root - INFO - lr: 4.9342e-05 gnorm: 1.33 [ 2:08:04<22:34:44] +[titan] 2025-10-05 00:42:24,195 - root - INFO - step: 3460 loss: 2.8961 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3468 global_avg_mtp_loss: 2.5493 +[titan] 2025-10-05 00:42:24,195 - root - INFO - lr: 4.9340e-05 gnorm: 1.30 [ 2:08:15<22:34:31] +[titan] 2025-10-05 00:42:35,085 - root - INFO - step: 3465 loss: 3.0085 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3591 global_avg_mtp_loss: 2.6494 +[titan] 2025-10-05 00:42:35,085 - root - INFO - lr: 4.9338e-05 gnorm: 1.28 [ 2:08:26<22:34:18] +[titan] 2025-10-05 00:42:45,952 - root - INFO - step: 3470 loss: 2.9361 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5855 +[titan] 2025-10-05 00:42:45,952 - root - INFO - lr: 4.9336e-05 gnorm: 1.26 [ 2:08:37<22:34:04] +[titan] 2025-10-05 00:42:56,840 - root - INFO - step: 3475 loss: 2.9223 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3496 global_avg_mtp_loss: 2.5727 +[titan] 2025-10-05 00:42:56,841 - root - INFO - lr: 4.9333e-05 gnorm: 1.25 [ 2:08:48<22:33:51] +[titan] 2025-10-05 00:43:07,696 - root - INFO - step: 3480 loss: 2.9007 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5539 +[titan] 2025-10-05 00:43:07,696 - root - INFO - lr: 4.9331e-05 gnorm: 1.30 [ 2:08:59<22:33:37] +[titan] 2025-10-05 00:43:18,563 - root - INFO - step: 3485 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5412 +[titan] 2025-10-05 00:43:18,564 - root - INFO - lr: 4.9329e-05 gnorm: 1.24 [ 2:09:10<22:33:23] +[titan] 2025-10-05 00:43:29,498 - root - INFO - step: 3490 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3565 global_avg_mtp_loss: 2.6312 +[titan] 2025-10-05 00:43:29,498 - root - INFO - lr: 4.9327e-05 gnorm: 1.34 [ 2:09:21<22:33:10] +[titan] 2025-10-05 00:43:40,371 - root - INFO - step: 3495 loss: 2.8500 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5087 +[titan] 2025-10-05 00:43:40,371 - root - INFO - lr: 4.9325e-05 gnorm: 1.24 [ 2:09:31<22:32:56] +[titan] 2025-10-05 00:43:49,059 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:43:51,242 - root - INFO - step: 3500 loss: 2.9053 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5576 +[titan] 2025-10-05 00:43:51,242 - root - INFO - lr: 4.9323e-05 gnorm: 1.26 [ 2:09:42<22:32:42] +[titan] 2025-10-05 00:44:02,120 - root - INFO - step: 3505 loss: 2.9596 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:44:02,120 - root - INFO - lr: 4.9320e-05 gnorm: 1.27 [ 2:09:53<22:32:29] +[titan] 2025-10-05 00:44:13,041 - root - INFO - step: 3510 loss: 2.9620 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3557 global_avg_mtp_loss: 2.6063 +[titan] 2025-10-05 00:44:13,042 - root - INFO - lr: 4.9318e-05 gnorm: 1.36 [ 2:10:04<22:32:16] +[titan] 2025-10-05 00:44:23,983 - root - INFO - step: 3515 loss: 2.9163 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5681 +[titan] 2025-10-05 00:44:23,983 - root - INFO - lr: 4.9316e-05 gnorm: 1.35 [ 2:10:15<22:32:03] +[titan] 2025-10-05 00:44:34,890 - root - INFO - step: 3520 loss: 2.9840 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6176 +[titan] 2025-10-05 00:44:34,890 - root - INFO - lr: 4.9314e-05 gnorm: 1.30 [ 2:10:26<22:31:49] +[titan] 2025-10-05 00:44:45,807 - root - INFO - step: 3525 loss: 2.8766 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3420 global_avg_mtp_loss: 2.5345 +[titan] 2025-10-05 00:44:45,807 - root - INFO - lr: 4.9312e-05 gnorm: 1.33 [ 2:10:37<22:31:36] +[titan] 2025-10-05 00:44:56,695 - root - INFO - step: 3530 loss: 2.8643 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5214 +[titan] 2025-10-05 00:44:56,696 - root - INFO - lr: 4.9309e-05 gnorm: 1.31 [ 2:10:48<22:31:23] +[titan] 2025-10-05 00:45:07,556 - root - INFO - step: 3535 loss: 2.9317 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5804 +[titan] 2025-10-05 00:45:07,556 - root - INFO - lr: 4.9307e-05 gnorm: 1.24 [ 2:10:59<22:31:09] +[titan] 2025-10-05 00:45:18,462 - root - INFO - step: 3540 loss: 2.9149 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3487 global_avg_mtp_loss: 2.5661 +[titan] 2025-10-05 00:45:18,463 - root - INFO - lr: 4.9305e-05 gnorm: 1.24 [ 2:11:09<22:30:56] +[titan] 2025-10-05 00:45:29,403 - root - INFO - step: 3545 loss: 2.9166 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5665 +[titan] 2025-10-05 00:45:29,403 - root - INFO - lr: 4.9303e-05 gnorm: 1.31 [ 2:11:20<22:30:43] +[titan] 2025-10-05 00:45:38,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:45:40,270 - root - INFO - step: 3550 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5477 +[titan] 2025-10-05 00:45:40,270 - root - INFO - lr: 4.9301e-05 gnorm: 1.20 [ 2:11:31<22:30:29] +[titan] 2025-10-05 00:45:51,156 - root - INFO - step: 3555 loss: 2.8547 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3414 global_avg_mtp_loss: 2.5133 +[titan] 2025-10-05 00:45:51,156 - root - INFO - lr: 4.9298e-05 gnorm: 1.22 [ 2:11:42<22:30:16] +[titan] 2025-10-05 00:46:02,028 - root - INFO - step: 3560 loss: 2.9708 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3553 global_avg_mtp_loss: 2.6154 +[titan] 2025-10-05 00:46:02,029 - root - INFO - lr: 4.9296e-05 gnorm: 1.25 [ 2:11:53<22:30:02] +[titan] 2025-10-05 00:46:12,872 - root - INFO - step: 3565 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3463 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:46:12,873 - root - INFO - lr: 4.9294e-05 gnorm: 1.22 [ 2:12:04<22:29:48] +[titan] 2025-10-05 00:46:23,793 - root - INFO - step: 3570 loss: 2.9591 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3538 global_avg_mtp_loss: 2.6053 +[titan] 2025-10-05 00:46:23,794 - root - INFO - lr: 4.9292e-05 gnorm: 1.26 [ 2:12:15<22:29:35] +[titan] 2025-10-05 00:46:34,664 - root - INFO - step: 3575 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3519 global_avg_mtp_loss: 2.5886 +[titan] 2025-10-05 00:46:34,664 - root - INFO - lr: 4.9290e-05 gnorm: 1.30 [ 2:12:26<22:29:21] +[titan] 2025-10-05 00:46:45,547 - root - INFO - step: 3580 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3464 global_avg_mtp_loss: 2.5473 +[titan] 2025-10-05 00:46:45,548 - root - INFO - lr: 4.9287e-05 gnorm: 1.23 [ 2:12:37<22:29:08] +[titan] 2025-10-05 00:46:54,489 - root - INFO - Dumping profiler traces at step 3584 +[titan] 2025-10-05 00:46:54,526 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:46:56,755 - root - INFO - step: 3585 loss: 2.9232 memory: 118.84GiB(85.28%) tps: 29,238 tflops: 405.64 mfu: 41.02% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5729 +[titan] 2025-10-05 00:46:56,755 - root - INFO - lr: 4.9285e-05 gnorm: 1.28 [ 2:12:48<22:28:58] +[titan] 2025-10-05 00:47:07,619 - root - INFO - step: 3590 loss: 2.9273 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3501 global_avg_mtp_loss: 2.5772 +[titan] 2025-10-05 00:47:07,619 - root - INFO - lr: 4.9283e-05 gnorm: 1.25 [ 2:12:59<22:28:44] +[titan] 2025-10-05 00:47:18,508 - root - INFO - step: 3595 loss: 2.9212 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5720 +[titan] 2025-10-05 00:47:18,509 - root - INFO - lr: 4.9281e-05 gnorm: 1.34 [ 2:13:10<22:28:31] +[titan] 2025-10-05 00:47:27,261 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:47:29,446 - root - INFO - step: 3600 loss: 2.8603 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5188 +[titan] 2025-10-05 00:47:29,447 - root - INFO - lr: 4.9278e-05 gnorm: 1.22 [ 2:13:20<22:28:18] +[titan] 2025-10-05 00:47:40,308 - root - INFO - step: 3605 loss: 2.8618 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5190 +[titan] 2025-10-05 00:47:40,308 - root - INFO - lr: 4.9276e-05 gnorm: 1.19 [ 2:13:31<22:28:04] +[titan] 2025-10-05 00:47:51,174 - root - INFO - step: 3610 loss: 2.9114 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5639 +[titan] 2025-10-05 00:47:51,174 - root - INFO - lr: 4.9274e-05 gnorm: 1.25 [ 2:13:42<22:27:51] +[titan] 2025-10-05 00:48:02,028 - root - INFO - step: 3615 loss: 2.8693 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5265 +[titan] 2025-10-05 00:48:02,028 - root - INFO - lr: 4.9272e-05 gnorm: 1.25 [ 2:13:53<22:27:37] +[titan] 2025-10-05 00:48:12,956 - root - INFO - step: 3620 loss: 2.9829 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3582 global_avg_mtp_loss: 2.6247 +[titan] 2025-10-05 00:48:12,956 - root - INFO - lr: 4.9269e-05 gnorm: 1.25 [ 2:14:04<22:27:24] +[titan] 2025-10-05 00:48:23,914 - root - INFO - step: 3625 loss: 2.9614 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.6084 +[titan] 2025-10-05 00:48:23,914 - root - INFO - lr: 4.9267e-05 gnorm: 1.19 [ 2:14:15<22:27:11] +[titan] 2025-10-05 00:48:34,821 - root - INFO - step: 3630 loss: 2.9416 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3505 global_avg_mtp_loss: 2.5911 +[titan] 2025-10-05 00:48:34,821 - root - INFO - lr: 4.9265e-05 gnorm: 1.30 [ 2:14:26<22:26:58] +[titan] 2025-10-05 00:48:45,728 - root - INFO - step: 3635 loss: 2.8827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5377 +[titan] 2025-10-05 00:48:45,728 - root - INFO - lr: 4.9263e-05 gnorm: 1.22 [ 2:14:37<22:26:45] +[titan] 2025-10-05 00:48:56,629 - root - INFO - step: 3640 loss: 2.8474 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.3379 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 00:48:56,629 - root - INFO - lr: 4.9260e-05 gnorm: 1.32 [ 2:14:48<22:26:32] +[titan] 2025-10-05 00:49:07,530 - root - INFO - step: 3645 loss: 2.9298 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5793 +[titan] 2025-10-05 00:49:07,530 - root - INFO - lr: 4.9258e-05 gnorm: 1.31 [ 2:14:59<22:26:19] +[titan] 2025-10-05 00:49:16,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:49:18,488 - root - INFO - step: 3650 loss: 3.0056 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6456 +[titan] 2025-10-05 00:49:18,488 - root - INFO - lr: 4.9256e-05 gnorm: 1.32 [ 2:15:09<22:26:06] +[titan] 2025-10-05 00:49:29,386 - root - INFO - step: 3655 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5347 +[titan] 2025-10-05 00:49:29,387 - root - INFO - lr: 4.9254e-05 gnorm: 1.23 [ 2:15:20<22:25:53] +[titan] 2025-10-05 00:49:40,255 - root - INFO - step: 3660 loss: 2.8748 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5314 +[titan] 2025-10-05 00:49:40,255 - root - INFO - lr: 4.9251e-05 gnorm: 1.26 [ 2:15:31<22:25:39] +[titan] 2025-10-05 00:49:51,122 - root - INFO - step: 3665 loss: 2.9419 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:49:51,122 - root - INFO - lr: 4.9249e-05 gnorm: 1.24 [ 2:15:42<22:25:26] +[titan] 2025-10-05 00:50:01,986 - root - INFO - step: 3670 loss: 2.8845 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5398 +[titan] 2025-10-05 00:50:01,987 - root - INFO - lr: 4.9247e-05 gnorm: 1.29 [ 2:15:53<22:25:12] +[titan] 2025-10-05 00:50:12,850 - root - INFO - step: 3675 loss: 2.8906 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5453 +[titan] 2025-10-05 00:50:12,851 - root - INFO - lr: 4.9244e-05 gnorm: 1.29 [ 2:16:04<22:24:59] +[titan] 2025-10-05 00:50:23,731 - root - INFO - step: 3680 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3632 global_avg_mtp_loss: 2.6609 +[titan] 2025-10-05 00:50:23,732 - root - INFO - lr: 4.9242e-05 gnorm: 1.28 [ 2:16:15<22:24:45] +[titan] 2025-10-05 00:50:34,722 - root - INFO - step: 3685 loss: 2.9110 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.3489 global_avg_mtp_loss: 2.5621 +[titan] 2025-10-05 00:50:34,723 - root - INFO - lr: 4.9240e-05 gnorm: 1.25 [ 2:16:26<22:24:33] +[titan] 2025-10-05 00:50:45,616 - root - INFO - step: 3690 loss: 2.8445 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5064 +[titan] 2025-10-05 00:50:45,616 - root - INFO - lr: 4.9238e-05 gnorm: 1.23 [ 2:16:37<22:24:20] +[titan] 2025-10-05 00:50:56,496 - root - INFO - step: 3695 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3361 global_avg_mtp_loss: 2.4870 +[titan] 2025-10-05 00:50:56,496 - root - INFO - lr: 4.9235e-05 gnorm: 1.19 [ 2:16:47<22:24:07] +[titan] 2025-10-05 00:51:05,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:51:07,381 - root - INFO - step: 3700 loss: 2.8874 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5427 +[titan] 2025-10-05 00:51:07,381 - root - INFO - lr: 4.9233e-05 gnorm: 1.31 [ 2:16:58<22:23:53] +[titan] 2025-10-05 00:51:18,258 - root - INFO - step: 3705 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3441 global_avg_mtp_loss: 2.5425 +[titan] 2025-10-05 00:51:18,258 - root - INFO - lr: 4.9231e-05 gnorm: 1.36 [ 2:17:09<22:23:40] +[titan] 2025-10-05 00:51:29,175 - root - INFO - step: 3710 loss: 2.9115 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3467 global_avg_mtp_loss: 2.5648 +[titan] 2025-10-05 00:51:29,175 - root - INFO - lr: 4.9228e-05 gnorm: 1.27 [ 2:17:20<22:23:27] +[titan] 2025-10-05 00:51:40,064 - root - INFO - step: 3715 loss: 2.9140 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5671 +[titan] 2025-10-05 00:51:40,064 - root - INFO - lr: 4.9226e-05 gnorm: 1.23 [ 2:17:31<22:23:14] +[titan] 2025-10-05 00:51:50,950 - root - INFO - step: 3720 loss: 2.8644 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.5220 +[titan] 2025-10-05 00:51:50,950 - root - INFO - lr: 4.9224e-05 gnorm: 1.28 [ 2:17:42<22:23:00] +[titan] 2025-10-05 00:52:01,826 - root - INFO - step: 3725 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5677 +[titan] 2025-10-05 00:52:01,826 - root - INFO - lr: 4.9221e-05 gnorm: 1.30 [ 2:17:53<22:22:47] +[titan] 2025-10-05 00:52:12,692 - root - INFO - step: 3730 loss: 2.8843 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5392 +[titan] 2025-10-05 00:52:12,692 - root - INFO - lr: 4.9219e-05 gnorm: 1.27 [ 2:18:04<22:22:34] +[titan] 2025-10-05 00:52:23,581 - root - INFO - step: 3735 loss: 2.8622 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3411 global_avg_mtp_loss: 2.5211 +[titan] 2025-10-05 00:52:23,581 - root - INFO - lr: 4.9217e-05 gnorm: 1.29 [ 2:18:15<22:22:20] +[titan] 2025-10-05 00:52:34,507 - root - INFO - step: 3740 loss: 2.8833 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5382 +[titan] 2025-10-05 00:52:34,507 - root - INFO - lr: 4.9214e-05 gnorm: 1.32 [ 2:18:25<22:22:08] +[titan] 2025-10-05 00:52:45,424 - root - INFO - step: 3745 loss: 2.8876 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5439 +[titan] 2025-10-05 00:52:45,424 - root - INFO - lr: 4.9212e-05 gnorm: 1.30 [ 2:18:36<22:21:55] +[titan] 2025-10-05 00:52:54,123 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:52:56,315 - root - INFO - step: 3750 loss: 2.9081 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3472 global_avg_mtp_loss: 2.5609 +[titan] 2025-10-05 00:52:56,315 - root - INFO - lr: 4.9210e-05 gnorm: 1.37 [ 2:18:47<22:21:42] +[titan] 2025-10-05 00:53:07,243 - root - INFO - step: 3755 loss: 2.8797 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 00:53:07,243 - root - INFO - lr: 4.9207e-05 gnorm: 1.27 [ 2:18:58<22:21:29] +[titan] 2025-10-05 00:53:18,154 - root - INFO - step: 3760 loss: 2.8545 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3406 global_avg_mtp_loss: 2.5139 +[titan] 2025-10-05 00:53:18,154 - root - INFO - lr: 4.9205e-05 gnorm: 1.27 [ 2:19:09<22:21:16] +[titan] 2025-10-05 00:53:29,071 - root - INFO - step: 3765 loss: 2.8350 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.4960 +[titan] 2025-10-05 00:53:29,071 - root - INFO - lr: 4.9203e-05 gnorm: 1.27 [ 2:19:20<22:21:03] +[titan] 2025-10-05 00:53:39,977 - root - INFO - step: 3770 loss: 2.8227 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3370 global_avg_mtp_loss: 2.4857 +[titan] 2025-10-05 00:53:39,978 - root - INFO - lr: 4.9200e-05 gnorm: 1.19 [ 2:19:31<22:20:50] +[titan] 2025-10-05 00:53:50,879 - root - INFO - step: 3775 loss: 2.8842 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3430 global_avg_mtp_loss: 2.5411 +[titan] 2025-10-05 00:53:50,879 - root - INFO - lr: 4.9198e-05 gnorm: 1.23 [ 2:19:42<22:20:37] +[titan] 2025-10-05 00:54:01,831 - root - INFO - step: 3780 loss: 2.9375 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5862 +[titan] 2025-10-05 00:54:01,831 - root - INFO - lr: 4.9196e-05 gnorm: 1.20 [ 2:19:53<22:20:24] +[titan] 2025-10-05 00:54:12,711 - root - INFO - step: 3785 loss: 2.8747 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5318 +[titan] 2025-10-05 00:54:12,711 - root - INFO - lr: 4.9193e-05 gnorm: 1.23 [ 2:20:04<22:20:11] +[titan] 2025-10-05 00:54:23,577 - root - INFO - step: 3790 loss: 2.8207 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4833 +[titan] 2025-10-05 00:54:23,577 - root - INFO - lr: 4.9191e-05 gnorm: 1.27 [ 2:20:15<22:19:58] +[titan] 2025-10-05 00:54:34,480 - root - INFO - step: 3795 loss: 2.9584 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3532 global_avg_mtp_loss: 2.6052 +[titan] 2025-10-05 00:54:34,480 - root - INFO - lr: 4.9188e-05 gnorm: 1.29 [ 2:20:25<22:19:45] +[titan] 2025-10-05 00:54:43,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:54:45,353 - root - INFO - step: 3800 loss: 2.9385 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3511 global_avg_mtp_loss: 2.5874 +[titan] 2025-10-05 00:54:45,353 - root - INFO - lr: 4.9186e-05 gnorm: 1.24 [ 2:20:36<22:19:31] +[titan] 2025-10-05 00:54:56,214 - root - INFO - step: 3805 loss: 2.8516 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3400 global_avg_mtp_loss: 2.5116 +[titan] 2025-10-05 00:54:56,214 - root - INFO - lr: 4.9184e-05 gnorm: 1.32 [ 2:20:47<22:19:18] +[titan] 2025-10-05 00:55:07,134 - root - INFO - step: 3810 loss: 2.8608 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5196 +[titan] 2025-10-05 00:55:07,134 - root - INFO - lr: 4.9181e-05 gnorm: 1.27 [ 2:20:58<22:19:05] +[titan] 2025-10-05 00:55:18,019 - root - INFO - step: 3815 loss: 2.9132 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3495 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 00:55:18,019 - root - INFO - lr: 4.9179e-05 gnorm: 1.33 [ 2:21:09<22:18:52] +[titan] 2025-10-05 00:55:28,882 - root - INFO - step: 3820 loss: 2.8903 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3438 global_avg_mtp_loss: 2.5465 +[titan] 2025-10-05 00:55:28,882 - root - INFO - lr: 4.9176e-05 gnorm: 1.28 [ 2:21:20<22:18:39] +[titan] 2025-10-05 00:55:39,765 - root - INFO - step: 3825 loss: 2.8538 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3396 global_avg_mtp_loss: 2.5142 +[titan] 2025-10-05 00:55:39,765 - root - INFO - lr: 4.9174e-05 gnorm: 1.35 [ 2:21:31<22:18:26] +[titan] 2025-10-05 00:55:50,656 - root - INFO - step: 3830 loss: 2.8951 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5503 +[titan] 2025-10-05 00:55:50,656 - root - INFO - lr: 4.9172e-05 gnorm: 1.29 [ 2:21:42<22:18:12] +[titan] 2025-10-05 00:56:01,544 - root - INFO - step: 3835 loss: 2.8701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 00:56:01,544 - root - INFO - lr: 4.9169e-05 gnorm: 1.28 [ 2:21:53<22:17:59] +[titan] 2025-10-05 00:56:12,424 - root - INFO - step: 3840 loss: 2.8980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3459 global_avg_mtp_loss: 2.5521 +[titan] 2025-10-05 00:56:12,424 - root - INFO - lr: 4.9167e-05 gnorm: 1.29 [ 2:22:03<22:17:46] +[titan] 2025-10-05 00:56:23,350 - root - INFO - step: 3845 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:56:23,350 - root - INFO - lr: 4.9164e-05 gnorm: 1.33 [ 2:22:14<22:17:33] +[titan] 2025-10-05 00:56:32,044 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:56:34,236 - root - INFO - step: 3850 loss: 2.8817 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5379 +[titan] 2025-10-05 00:56:34,237 - root - INFO - lr: 4.9162e-05 gnorm: 1.28 [ 2:22:25<22:17:20] +[titan] 2025-10-05 00:56:45,120 - root - INFO - step: 3855 loss: 2.8016 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 00:56:45,120 - root - INFO - lr: 4.9160e-05 gnorm: 1.32 [ 2:22:36<22:17:07] +[titan] 2025-10-05 00:56:56,000 - root - INFO - step: 3860 loss: 2.8851 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 00:56:56,000 - root - INFO - lr: 4.9157e-05 gnorm: 1.29 [ 2:22:47<22:16:54] +[titan] 2025-10-05 00:57:06,896 - root - INFO - step: 3865 loss: 2.8534 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3401 global_avg_mtp_loss: 2.5132 +[titan] 2025-10-05 00:57:06,896 - root - INFO - lr: 4.9155e-05 gnorm: 1.25 [ 2:22:58<22:16:41] +[titan] 2025-10-05 00:57:17,779 - root - INFO - step: 3870 loss: 2.9197 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5707 +[titan] 2025-10-05 00:57:17,779 - root - INFO - lr: 4.9152e-05 gnorm: 1.28 [ 2:23:09<22:16:28] +[titan] 2025-10-05 00:57:28,718 - root - INFO - step: 3875 loss: 2.9466 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.3534 global_avg_mtp_loss: 2.5932 +[titan] 2025-10-05 00:57:28,718 - root - INFO - lr: 4.9150e-05 gnorm: 1.21 [ 2:23:20<22:16:15] +[titan] 2025-10-05 00:57:39,599 - root - INFO - step: 3880 loss: 2.8840 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3444 global_avg_mtp_loss: 2.5396 +[titan] 2025-10-05 00:57:39,600 - root - INFO - lr: 4.9148e-05 gnorm: 1.28 [ 2:23:31<22:16:02] +[titan] 2025-10-05 00:57:50,474 - root - INFO - step: 3885 loss: 2.9370 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3488 global_avg_mtp_loss: 2.5882 +[titan] 2025-10-05 00:57:50,474 - root - INFO - lr: 4.9145e-05 gnorm: 1.25 [ 2:23:41<22:15:49] +[titan] 2025-10-05 00:58:01,351 - root - INFO - step: 3890 loss: 2.9350 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3493 global_avg_mtp_loss: 2.5857 +[titan] 2025-10-05 00:58:01,351 - root - INFO - lr: 4.9143e-05 gnorm: 1.31 [ 2:23:52<22:15:36] +[titan] 2025-10-05 00:58:12,271 - root - INFO - step: 3895 loss: 2.9044 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5592 +[titan] 2025-10-05 00:58:12,271 - root - INFO - lr: 4.9140e-05 gnorm: 1.26 [ 2:24:03<22:15:23] +[titan] 2025-10-05 00:58:20,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:58:23,152 - root - INFO - step: 3900 loss: 2.7993 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 00:58:23,152 - root - INFO - lr: 4.9138e-05 gnorm: 1.27 [ 2:24:14<22:15:10] +[titan] 2025-10-05 00:58:34,070 - root - INFO - step: 3905 loss: 2.9356 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:58:34,070 - root - INFO - lr: 4.9135e-05 gnorm: 1.23 [ 2:24:25<22:14:57] +[titan] 2025-10-05 00:58:44,959 - root - INFO - step: 3910 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3475 global_avg_mtp_loss: 2.5678 +[titan] 2025-10-05 00:58:44,959 - root - INFO - lr: 4.9133e-05 gnorm: 1.26 [ 2:24:36<22:14:44] +[titan] 2025-10-05 00:58:55,830 - root - INFO - step: 3915 loss: 2.8401 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5019 +[titan] 2025-10-05 00:58:55,830 - root - INFO - lr: 4.9130e-05 gnorm: 1.23 [ 2:24:47<22:14:31] +[titan] 2025-10-05 00:59:06,689 - root - INFO - step: 3920 loss: 2.9547 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3597 global_avg_mtp_loss: 2.5950 +[titan] 2025-10-05 00:59:06,690 - root - INFO - lr: 4.9128e-05 gnorm: 1.24 [ 2:24:58<22:14:18] +[titan] 2025-10-05 00:59:17,583 - root - INFO - step: 3925 loss: 2.9231 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3478 global_avg_mtp_loss: 2.5753 +[titan] 2025-10-05 00:59:17,584 - root - INFO - lr: 4.9125e-05 gnorm: 1.29 [ 2:25:09<22:14:05] +[titan] 2025-10-05 00:59:28,459 - root - INFO - step: 3930 loss: 2.8642 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5227 +[titan] 2025-10-05 00:59:28,459 - root - INFO - lr: 4.9123e-05 gnorm: 1.29 [ 2:25:19<22:13:52] +[titan] 2025-10-05 00:59:39,392 - root - INFO - step: 3935 loss: 2.8806 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 00:59:39,393 - root - INFO - lr: 4.9121e-05 gnorm: 1.31 [ 2:25:30<22:13:39] +[titan] 2025-10-05 00:59:50,302 - root - INFO - step: 3940 loss: 2.9187 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.3484 global_avg_mtp_loss: 2.5703 +[titan] 2025-10-05 00:59:50,302 - root - INFO - lr: 4.9118e-05 gnorm: 1.23 [ 2:25:41<22:13:27] +[titan] 2025-10-05 01:00:01,171 - root - INFO - step: 3945 loss: 2.8435 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:00:01,172 - root - INFO - lr: 4.9116e-05 gnorm: 1.25 [ 2:25:52<22:13:13] +[titan] 2025-10-05 01:00:09,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:00:12,051 - root - INFO - step: 3950 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5446 +[titan] 2025-10-05 01:00:12,051 - root - INFO - lr: 4.9113e-05 gnorm: 1.27 [ 2:26:03<22:13:00] +[titan] 2025-10-05 01:00:22,938 - root - INFO - step: 3955 loss: 2.8946 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5492 +[titan] 2025-10-05 01:00:22,938 - root - INFO - lr: 4.9111e-05 gnorm: 1.31 [ 2:26:14<22:12:47] +[titan] 2025-10-05 01:00:33,863 - root - INFO - step: 3960 loss: 2.9358 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5868 +[titan] 2025-10-05 01:00:33,863 - root - INFO - lr: 4.9108e-05 gnorm: 1.30 [ 2:26:25<22:12:35] +[titan] 2025-10-05 01:00:44,742 - root - INFO - step: 3965 loss: 2.8537 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3415 global_avg_mtp_loss: 2.5123 +[titan] 2025-10-05 01:00:44,743 - root - INFO - lr: 4.9106e-05 gnorm: 1.24 [ 2:26:36<22:12:22] +[titan] 2025-10-05 01:00:55,669 - root - INFO - step: 3970 loss: 2.8697 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5257 +[titan] 2025-10-05 01:00:55,670 - root - INFO - lr: 4.9103e-05 gnorm: 1.26 [ 2:26:47<22:12:09] +[titan] 2025-10-05 01:01:06,531 - root - INFO - step: 3975 loss: 2.8184 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4844 +[titan] 2025-10-05 01:01:06,531 - root - INFO - lr: 4.9101e-05 gnorm: 1.26 [ 2:26:57<22:11:56] +[titan] 2025-10-05 01:01:17,435 - root - INFO - step: 3980 loss: 2.8685 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5264 +[titan] 2025-10-05 01:01:17,435 - root - INFO - lr: 4.9098e-05 gnorm: 1.32 [ 2:27:08<22:11:43] +[titan] 2025-10-05 01:01:28,313 - root - INFO - step: 3985 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3312 global_avg_mtp_loss: 2.4498 +[titan] 2025-10-05 01:01:28,313 - root - INFO - lr: 4.9096e-05 gnorm: 1.26 [ 2:27:19<22:11:30] +[titan] 2025-10-05 01:01:39,229 - root - INFO - step: 3990 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3337 global_avg_mtp_loss: 2.4610 +[titan] 2025-10-05 01:01:39,229 - root - INFO - lr: 4.9093e-05 gnorm: 1.30 [ 2:27:30<22:11:18] +[titan] 2025-10-05 01:01:50,092 - root - INFO - step: 3995 loss: 2.7943 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3322 global_avg_mtp_loss: 2.4621 +[titan] 2025-10-05 01:01:50,092 - root - INFO - lr: 4.9091e-05 gnorm: 1.21 [ 2:27:41<22:11:04] +[titan] 2025-10-05 01:01:58,771 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:02:00,958 - root - INFO - step: 4000 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5902 +[titan] 2025-10-05 01:02:00,958 - root - INFO - lr: 4.9088e-05 gnorm: 1.27 [ 2:27:52<22:10:51] +[titan] 2025-10-05 01:02:11,850 - root - INFO - step: 4005 loss: 2.8699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5271 +[titan] 2025-10-05 01:02:11,850 - root - INFO - lr: 4.9086e-05 gnorm: 1.29 [ 2:28:03<22:10:38] +[titan] 2025-10-05 01:02:22,761 - root - INFO - step: 4010 loss: 2.8862 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 01:02:22,761 - root - INFO - lr: 4.9083e-05 gnorm: 1.23 [ 2:28:14<22:10:26] +[titan] 2025-10-05 01:02:33,616 - root - INFO - step: 4015 loss: 2.8251 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.4858 +[titan] 2025-10-05 01:02:33,616 - root - INFO - lr: 4.9081e-05 gnorm: 1.23 [ 2:28:25<22:10:12] +[titan] 2025-10-05 01:02:44,524 - root - INFO - step: 4020 loss: 2.8756 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5328 +[titan] 2025-10-05 01:02:44,525 - root - INFO - lr: 4.9078e-05 gnorm: 1.23 [ 2:28:35<22:10:00] +[titan] 2025-10-05 01:02:55,396 - root - INFO - step: 4025 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3363 global_avg_mtp_loss: 2.4949 +[titan] 2025-10-05 01:02:55,396 - root - INFO - lr: 4.9076e-05 gnorm: 1.22 [ 2:28:46<22:09:47] +[titan] 2025-10-05 01:03:06,265 - root - INFO - step: 4030 loss: 2.8674 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5248 +[titan] 2025-10-05 01:03:06,265 - root - INFO - lr: 4.9073e-05 gnorm: 1.24 [ 2:28:57<22:09:34] +[titan] 2025-10-05 01:03:17,168 - root - INFO - step: 4035 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:03:17,168 - root - INFO - lr: 4.9070e-05 gnorm: 1.29 [ 2:29:08<22:09:21] +[titan] 2025-10-05 01:03:28,097 - root - INFO - step: 4040 loss: 2.8057 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3338 global_avg_mtp_loss: 2.4719 +[titan] 2025-10-05 01:03:28,098 - root - INFO - lr: 4.9068e-05 gnorm: 1.23 [ 2:29:19<22:09:08] +[titan] 2025-10-05 01:03:39,019 - root - INFO - step: 4045 loss: 2.8686 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5273 +[titan] 2025-10-05 01:03:39,019 - root - INFO - lr: 4.9065e-05 gnorm: 1.33 [ 2:29:30<22:08:56] +[titan] 2025-10-05 01:03:47,736 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:03:49,927 - root - INFO - step: 4050 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5088 +[titan] 2025-10-05 01:03:49,928 - root - INFO - lr: 4.9063e-05 gnorm: 1.25 [ 2:29:41<22:08:43] +[titan] 2025-10-05 01:04:00,828 - root - INFO - step: 4055 loss: 2.8040 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4693 +[titan] 2025-10-05 01:04:00,828 - root - INFO - lr: 4.9060e-05 gnorm: 1.23 [ 2:29:52<22:08:30] +[titan] 2025-10-05 01:04:11,717 - root - INFO - step: 4060 loss: 2.8008 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4673 +[titan] 2025-10-05 01:04:11,717 - root - INFO - lr: 4.9058e-05 gnorm: 1.27 [ 2:30:03<22:08:18] +[titan] 2025-10-05 01:04:22,649 - root - INFO - step: 4065 loss: 2.8860 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5433 +[titan] 2025-10-05 01:04:22,649 - root - INFO - lr: 4.9055e-05 gnorm: 1.27 [ 2:30:14<22:08:05] +[titan] 2025-10-05 01:04:33,534 - root - INFO - step: 4070 loss: 2.8482 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.5092 +[titan] 2025-10-05 01:04:33,534 - root - INFO - lr: 4.9053e-05 gnorm: 1.28 [ 2:30:24<22:07:52] +[titan] 2025-10-05 01:04:44,493 - root - INFO - step: 4075 loss: 2.7243 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.3989 +[titan] 2025-10-05 01:04:44,493 - root - INFO - lr: 4.9050e-05 gnorm: 1.28 [ 2:30:35<22:07:40] +[titan] 2025-10-05 01:04:55,369 - root - INFO - step: 4080 loss: 2.9124 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5655 +[titan] 2025-10-05 01:04:55,370 - root - INFO - lr: 4.9047e-05 gnorm: 1.24 [ 2:30:46<22:07:27] +[titan] 2025-10-05 01:05:06,228 - root - INFO - step: 4085 loss: 2.8731 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 01:05:06,228 - root - INFO - lr: 4.9045e-05 gnorm: 1.27 [ 2:30:57<22:07:14] +[titan] 2025-10-05 01:05:17,102 - root - INFO - step: 4090 loss: 2.7997 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4672 +[titan] 2025-10-05 01:05:17,102 - root - INFO - lr: 4.9042e-05 gnorm: 1.28 [ 2:31:08<22:07:01] +[titan] 2025-10-05 01:05:28,059 - root - INFO - step: 4095 loss: 2.9035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5587 +[titan] 2025-10-05 01:05:28,060 - root - INFO - lr: 4.9040e-05 gnorm: 1.23 [ 2:31:19<22:06:49] +[titan] 2025-10-05 01:05:30,421 - root - INFO - Dumping profiler traces at step 4096 +[titan] 2025-10-05 01:05:30,462 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:05:37,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:05:39,213 - root - INFO - step: 4100 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 29,380 tflops: 407.60 mfu: 41.21% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4763 +[titan] 2025-10-05 01:05:39,213 - root - INFO - lr: 4.9037e-05 gnorm: 1.29 [ 2:31:30<22:06:38] +[titan] 2025-10-05 01:05:50,104 - root - INFO - step: 4105 loss: 2.8434 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5046 +[titan] 2025-10-05 01:05:50,104 - root - INFO - lr: 4.9035e-05 gnorm: 1.25 [ 2:31:41<22:06:25] +[titan] 2025-10-05 01:06:00,954 - root - INFO - step: 4110 loss: 2.8513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3410 global_avg_mtp_loss: 2.5103 +[titan] 2025-10-05 01:06:00,954 - root - INFO - lr: 4.9032e-05 gnorm: 1.30 [ 2:31:52<22:06:12] +[titan] 2025-10-05 01:06:11,792 - root - INFO - step: 4115 loss: 2.8687 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 01:06:11,793 - root - INFO - lr: 4.9029e-05 gnorm: 1.28 [ 2:32:03<22:05:59] +[titan] 2025-10-05 01:06:22,672 - root - INFO - step: 4120 loss: 2.7381 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3273 global_avg_mtp_loss: 2.4108 +[titan] 2025-10-05 01:06:22,673 - root - INFO - lr: 4.9027e-05 gnorm: 1.20 [ 2:32:14<22:05:46] +[titan] 2025-10-05 01:06:33,541 - root - INFO - step: 4125 loss: 2.8811 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 01:06:33,541 - root - INFO - lr: 4.9024e-05 gnorm: 1.27 [ 2:32:24<22:05:33] +[titan] 2025-10-05 01:06:44,458 - root - INFO - step: 4130 loss: 2.7955 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3316 global_avg_mtp_loss: 2.4639 +[titan] 2025-10-05 01:06:44,459 - root - INFO - lr: 4.9022e-05 gnorm: 1.22 [ 2:32:35<22:05:21] +[titan] 2025-10-05 01:06:55,338 - root - INFO - step: 4135 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3357 global_avg_mtp_loss: 2.4937 +[titan] 2025-10-05 01:06:55,338 - root - INFO - lr: 4.9019e-05 gnorm: 1.26 [ 2:32:46<22:05:08] +[titan] 2025-10-05 01:07:06,209 - root - INFO - step: 4140 loss: 2.8211 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3351 global_avg_mtp_loss: 2.4860 +[titan] 2025-10-05 01:07:06,209 - root - INFO - lr: 4.9016e-05 gnorm: 1.23 [ 2:32:57<22:04:55] +[titan] 2025-10-05 01:07:17,116 - root - INFO - step: 4145 loss: 2.7757 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4442 +[titan] 2025-10-05 01:07:17,116 - root - INFO - lr: 4.9014e-05 gnorm: 1.33 [ 2:33:08<22:04:42] +[titan] 2025-10-05 01:07:25,818 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:07:28,011 - root - INFO - step: 4150 loss: 2.8404 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.5032 +[titan] 2025-10-05 01:07:28,012 - root - INFO - lr: 4.9011e-05 gnorm: 1.29 [ 2:33:19<22:04:29] +[titan] 2025-10-05 01:07:38,919 - root - INFO - step: 4155 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5380 +[titan] 2025-10-05 01:07:38,919 - root - INFO - lr: 4.9009e-05 gnorm: 1.22 [ 2:33:30<22:04:17] +[titan] 2025-10-05 01:07:49,794 - root - INFO - step: 4160 loss: 2.8305 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3386 global_avg_mtp_loss: 2.4919 +[titan] 2025-10-05 01:07:49,794 - root - INFO - lr: 4.9006e-05 gnorm: 1.23 [ 2:33:41<22:04:04] +[titan] 2025-10-05 01:08:00,715 - root - INFO - step: 4165 loss: 2.7568 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4286 +[titan] 2025-10-05 01:08:00,715 - root - INFO - lr: 4.9003e-05 gnorm: 1.22 [ 2:33:52<22:03:52] +[titan] 2025-10-05 01:08:11,575 - root - INFO - step: 4170 loss: 2.8449 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.3395 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:08:11,575 - root - INFO - lr: 4.9001e-05 gnorm: 1.22 [ 2:34:03<22:03:39] +[titan] 2025-10-05 01:08:22,448 - root - INFO - step: 4175 loss: 2.8005 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3330 global_avg_mtp_loss: 2.4675 +[titan] 2025-10-05 01:08:22,448 - root - INFO - lr: 4.8998e-05 gnorm: 1.22 [ 2:34:13<22:03:26] +[titan] 2025-10-05 01:08:33,314 - root - INFO - step: 4180 loss: 2.7794 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4468 +[titan] 2025-10-05 01:08:33,314 - root - INFO - lr: 4.8995e-05 gnorm: 1.18 [ 2:34:24<22:03:13] +[titan] 2025-10-05 01:08:44,215 - root - INFO - step: 4185 loss: 2.8110 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3342 global_avg_mtp_loss: 2.4768 +[titan] 2025-10-05 01:08:44,215 - root - INFO - lr: 4.8993e-05 gnorm: 1.25 [ 2:34:35<22:03:00] +[titan] 2025-10-05 01:08:55,079 - root - INFO - step: 4190 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4689 +[titan] 2025-10-05 01:08:55,079 - root - INFO - lr: 4.8990e-05 gnorm: 1.20 [ 2:34:46<22:02:47] +[titan] 2025-10-05 01:09:05,968 - root - INFO - step: 4195 loss: 2.7893 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3318 global_avg_mtp_loss: 2.4575 +[titan] 2025-10-05 01:09:05,968 - root - INFO - lr: 4.8987e-05 gnorm: 1.27 [ 2:34:57<22:02:34] +[titan] 2025-10-05 01:09:14,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:09:16,867 - root - INFO - step: 4200 loss: 2.8001 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 01:09:16,867 - root - INFO - lr: 4.8985e-05 gnorm: 1.37 [ 2:35:08<22:02:22] +[titan] 2025-10-05 01:09:27,758 - root - INFO - step: 4205 loss: 2.8414 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5036 +[titan] 2025-10-05 01:09:27,758 - root - INFO - lr: 4.8982e-05 gnorm: 1.27 [ 2:35:19<22:02:09] +[titan] 2025-10-05 01:09:38,614 - root - INFO - step: 4210 loss: 2.8082 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4739 +[titan] 2025-10-05 01:09:38,614 - root - INFO - lr: 4.8980e-05 gnorm: 1.21 [ 2:35:30<22:01:56] +[titan] 2025-10-05 01:09:49,535 - root - INFO - step: 4215 loss: 2.8257 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4886 +[titan] 2025-10-05 01:09:49,535 - root - INFO - lr: 4.8977e-05 gnorm: 1.25 [ 2:35:40<22:01:44] +[titan] 2025-10-05 01:10:00,451 - root - INFO - step: 4220 loss: 2.8238 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3391 global_avg_mtp_loss: 2.4847 +[titan] 2025-10-05 01:10:00,451 - root - INFO - lr: 4.8974e-05 gnorm: 1.27 [ 2:35:51<22:01:31] +[titan] 2025-10-05 01:10:11,409 - root - INFO - step: 4225 loss: 2.7720 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4420 +[titan] 2025-10-05 01:10:11,409 - root - INFO - lr: 4.8972e-05 gnorm: 1.25 [ 2:36:02<22:01:19] +[titan] 2025-10-05 01:10:22,330 - root - INFO - step: 4230 loss: 2.8335 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3375 global_avg_mtp_loss: 2.4961 +[titan] 2025-10-05 01:10:22,330 - root - INFO - lr: 4.8969e-05 gnorm: 1.22 [ 2:36:13<22:01:07] +[titan] 2025-10-05 01:10:33,205 - root - INFO - step: 4235 loss: 2.9402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5879 +[titan] 2025-10-05 01:10:33,205 - root - INFO - lr: 4.8966e-05 gnorm: 1.26 [ 2:36:24<22:00:54] +[titan] 2025-10-05 01:10:44,111 - root - INFO - step: 4240 loss: 2.8115 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4770 +[titan] 2025-10-05 01:10:44,111 - root - INFO - lr: 4.8964e-05 gnorm: 1.23 [ 2:36:35<22:00:41] +[titan] 2025-10-05 01:10:54,992 - root - INFO - step: 4245 loss: 2.7621 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4328 +[titan] 2025-10-05 01:10:54,993 - root - INFO - lr: 4.8961e-05 gnorm: 1.25 [ 2:36:46<22:00:28] +[titan] 2025-10-05 01:11:03,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:11:05,860 - root - INFO - step: 4250 loss: 2.7919 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:11:05,860 - root - INFO - lr: 4.8958e-05 gnorm: 1.34 [ 2:36:57<22:00:16] +[titan] 2025-10-05 01:11:16,750 - root - INFO - step: 4255 loss: 2.8769 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 01:11:16,751 - root - INFO - lr: 4.8955e-05 gnorm: 1.23 [ 2:37:08<22:00:03] +[titan] 2025-10-05 01:11:27,682 - root - INFO - step: 4260 loss: 2.8447 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5059 +[titan] 2025-10-05 01:11:27,682 - root - INFO - lr: 4.8953e-05 gnorm: 1.29 [ 2:37:19<21:59:51] +[titan] 2025-10-05 01:11:38,566 - root - INFO - step: 4265 loss: 2.8553 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3402 global_avg_mtp_loss: 2.5151 +[titan] 2025-10-05 01:11:38,566 - root - INFO - lr: 4.8950e-05 gnorm: 1.28 [ 2:37:29<21:59:38] +[titan] 2025-10-05 01:11:49,489 - root - INFO - step: 4270 loss: 2.8265 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:11:49,489 - root - INFO - lr: 4.8947e-05 gnorm: 1.23 [ 2:37:40<21:59:25] +[titan] 2025-10-05 01:12:00,379 - root - INFO - step: 4275 loss: 2.7626 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3291 global_avg_mtp_loss: 2.4335 +[titan] 2025-10-05 01:12:00,379 - root - INFO - lr: 4.8945e-05 gnorm: 1.23 [ 2:37:51<21:59:13] +[titan] 2025-10-05 01:12:11,266 - root - INFO - step: 4280 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4955 +[titan] 2025-10-05 01:12:11,266 - root - INFO - lr: 4.8942e-05 gnorm: 1.25 [ 2:38:02<21:59:00] +[titan] 2025-10-05 01:12:22,135 - root - INFO - step: 4285 loss: 2.8353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3380 global_avg_mtp_loss: 2.4973 +[titan] 2025-10-05 01:12:22,135 - root - INFO - lr: 4.8939e-05 gnorm: 1.27 [ 2:38:13<21:58:47] +[titan] 2025-10-05 01:12:33,063 - root - INFO - step: 4290 loss: 2.7796 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4469 +[titan] 2025-10-05 01:12:33,063 - root - INFO - lr: 4.8937e-05 gnorm: 1.31 [ 2:38:24<21:58:35] +[titan] 2025-10-05 01:12:43,959 - root - INFO - step: 4295 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4492 +[titan] 2025-10-05 01:12:43,959 - root - INFO - lr: 4.8934e-05 gnorm: 1.37 [ 2:38:35<21:58:22] +[titan] 2025-10-05 01:12:52,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:12:54,832 - root - INFO - step: 4300 loss: 2.9113 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 01:12:54,832 - root - INFO - lr: 4.8931e-05 gnorm: 1.32 [ 2:38:46<21:58:10] +[titan] 2025-10-05 01:13:05,696 - root - INFO - step: 4305 loss: 2.8427 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:13:05,696 - root - INFO - lr: 4.8928e-05 gnorm: 1.29 [ 2:38:57<21:57:57] +[titan] 2025-10-05 01:13:16,559 - root - INFO - step: 4310 loss: 2.8552 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5127 +[titan] 2025-10-05 01:13:16,559 - root - INFO - lr: 4.8926e-05 gnorm: 1.25 [ 2:39:07<21:57:44] +[titan] 2025-10-05 01:13:27,434 - root - INFO - step: 4315 loss: 2.7587 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:13:27,434 - root - INFO - lr: 4.8923e-05 gnorm: 1.28 [ 2:39:18<21:57:31] +[titan] 2025-10-05 01:13:38,295 - root - INFO - step: 4320 loss: 2.8361 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3373 global_avg_mtp_loss: 2.4988 +[titan] 2025-10-05 01:13:38,295 - root - INFO - lr: 4.8920e-05 gnorm: 1.33 [ 2:39:29<21:57:18] +[titan] 2025-10-05 01:13:49,212 - root - INFO - step: 4325 loss: 2.8809 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5381 +[titan] 2025-10-05 01:13:49,212 - root - INFO - lr: 4.8918e-05 gnorm: 1.32 [ 2:39:40<21:57:06] +[titan] 2025-10-05 01:14:00,073 - root - INFO - step: 4330 loss: 2.8655 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5232 +[titan] 2025-10-05 01:14:00,073 - root - INFO - lr: 4.8915e-05 gnorm: 1.25 [ 2:39:51<21:56:53] +[titan] 2025-10-05 01:14:10,949 - root - INFO - step: 4335 loss: 2.8077 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4742 +[titan] 2025-10-05 01:14:10,949 - root - INFO - lr: 4.8912e-05 gnorm: 1.25 [ 2:40:02<21:56:40] +[titan] 2025-10-05 01:14:21,868 - root - INFO - step: 4340 loss: 2.8223 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3346 global_avg_mtp_loss: 2.4877 +[titan] 2025-10-05 01:14:21,868 - root - INFO - lr: 4.8909e-05 gnorm: 1.21 [ 2:40:13<21:56:28] +[titan] 2025-10-05 01:14:32,754 - root - INFO - step: 4345 loss: 2.8555 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3431 global_avg_mtp_loss: 2.5124 +[titan] 2025-10-05 01:14:32,754 - root - INFO - lr: 4.8907e-05 gnorm: 1.26 [ 2:40:24<21:56:15] +[titan] 2025-10-05 01:14:41,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:14:43,631 - root - INFO - step: 4350 loss: 2.7309 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 01:14:43,631 - root - INFO - lr: 4.8904e-05 gnorm: 1.21 [ 2:40:35<21:56:03] +[titan] 2025-10-05 01:14:54,554 - root - INFO - step: 4355 loss: 2.7817 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:14:54,555 - root - INFO - lr: 4.8901e-05 gnorm: 1.31 [ 2:40:45<21:55:50] +[titan] 2025-10-05 01:15:05,471 - root - INFO - step: 4360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:15:05,471 - root - INFO - lr: 4.8898e-05 gnorm: 1.18 [ 2:40:56<21:55:38] +[titan] 2025-10-05 01:15:16,353 - root - INFO - step: 4365 loss: 2.7543 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3265 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:15:16,353 - root - INFO - lr: 4.8896e-05 gnorm: 1.34 [ 2:41:07<21:55:25] +[titan] 2025-10-05 01:15:27,221 - root - INFO - step: 4370 loss: 2.8151 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3349 global_avg_mtp_loss: 2.4802 +[titan] 2025-10-05 01:15:27,222 - root - INFO - lr: 4.8893e-05 gnorm: 1.33 [ 2:41:18<21:55:13] +[titan] 2025-10-05 01:15:38,092 - root - INFO - step: 4375 loss: 2.8402 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:15:38,092 - root - INFO - lr: 4.8890e-05 gnorm: 1.24 [ 2:41:29<21:55:00] +[titan] 2025-10-05 01:15:48,973 - root - INFO - step: 4380 loss: 2.7636 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4347 +[titan] 2025-10-05 01:15:48,973 - root - INFO - lr: 4.8887e-05 gnorm: 1.28 [ 2:41:40<21:54:47] +[titan] 2025-10-05 01:15:59,862 - root - INFO - step: 4385 loss: 2.7822 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4512 +[titan] 2025-10-05 01:15:59,863 - root - INFO - lr: 4.8884e-05 gnorm: 1.22 [ 2:41:51<21:54:35] +[titan] 2025-10-05 01:16:10,768 - root - INFO - step: 4390 loss: 2.8774 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5334 +[titan] 2025-10-05 01:16:10,768 - root - INFO - lr: 4.8882e-05 gnorm: 1.31 [ 2:42:02<21:54:22] +[titan] 2025-10-05 01:16:21,633 - root - INFO - step: 4395 loss: 2.7736 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4446 +[titan] 2025-10-05 01:16:21,633 - root - INFO - lr: 4.8879e-05 gnorm: 1.27 [ 2:42:13<21:54:09] +[titan] 2025-10-05 01:16:30,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:16:32,515 - root - INFO - step: 4400 loss: 2.8412 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5024 +[titan] 2025-10-05 01:16:32,515 - root - INFO - lr: 4.8876e-05 gnorm: 1.24 [ 2:42:23<21:53:57] +[titan] 2025-10-05 01:16:43,378 - root - INFO - step: 4405 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4765 +[titan] 2025-10-05 01:16:43,378 - root - INFO - lr: 4.8873e-05 gnorm: 1.25 [ 2:42:34<21:53:44] +[titan] 2025-10-05 01:16:54,311 - root - INFO - step: 4410 loss: 2.7984 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3341 global_avg_mtp_loss: 2.4642 +[titan] 2025-10-05 01:16:54,312 - root - INFO - lr: 4.8871e-05 gnorm: 1.22 [ 2:42:45<21:53:32] +[titan] 2025-10-05 01:17:05,164 - root - INFO - step: 4415 loss: 2.7761 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3306 global_avg_mtp_loss: 2.4455 +[titan] 2025-10-05 01:17:05,164 - root - INFO - lr: 4.8868e-05 gnorm: 1.24 [ 2:42:56<21:53:19] +[titan] 2025-10-05 01:17:16,059 - root - INFO - step: 4420 loss: 2.8777 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5351 +[titan] 2025-10-05 01:17:16,059 - root - INFO - lr: 4.8865e-05 gnorm: 1.27 [ 2:43:07<21:53:06] +[titan] 2025-10-05 01:17:26,943 - root - INFO - step: 4425 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4104 +[titan] 2025-10-05 01:17:26,943 - root - INFO - lr: 4.8862e-05 gnorm: 1.25 [ 2:43:18<21:52:54] +[titan] 2025-10-05 01:17:37,810 - root - INFO - step: 4430 loss: 2.8315 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:17:37,810 - root - INFO - lr: 4.8859e-05 gnorm: 1.24 [ 2:43:29<21:52:41] +[titan] 2025-10-05 01:17:48,674 - root - INFO - step: 4435 loss: 2.7874 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4564 +[titan] 2025-10-05 01:17:48,674 - root - INFO - lr: 4.8857e-05 gnorm: 1.29 [ 2:43:40<21:52:28] +[titan] 2025-10-05 01:17:59,549 - root - INFO - step: 4440 loss: 2.7652 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4365 +[titan] 2025-10-05 01:17:59,549 - root - INFO - lr: 4.8854e-05 gnorm: 1.25 [ 2:43:50<21:52:16] +[titan] 2025-10-05 01:18:10,464 - root - INFO - step: 4445 loss: 2.7634 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4344 +[titan] 2025-10-05 01:18:10,464 - root - INFO - lr: 4.8851e-05 gnorm: 1.21 [ 2:44:01<21:52:03] +[titan] 2025-10-05 01:18:19,181 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:18:21,402 - root - INFO - step: 4450 loss: 2.8198 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.3358 global_avg_mtp_loss: 2.4839 +[titan] 2025-10-05 01:18:21,402 - root - INFO - lr: 4.8848e-05 gnorm: 1.25 [ 2:44:12<21:51:51] +[titan] 2025-10-05 01:18:32,290 - root - INFO - step: 4455 loss: 2.8002 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4666 +[titan] 2025-10-05 01:18:32,290 - root - INFO - lr: 4.8845e-05 gnorm: 1.21 [ 2:44:23<21:51:39] +[titan] 2025-10-05 01:18:43,182 - root - INFO - step: 4460 loss: 2.7924 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:18:43,183 - root - INFO - lr: 4.8842e-05 gnorm: 1.17 [ 2:44:34<21:51:26] +[titan] 2025-10-05 01:18:54,107 - root - INFO - step: 4465 loss: 2.8210 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3364 global_avg_mtp_loss: 2.4846 +[titan] 2025-10-05 01:18:54,107 - root - INFO - lr: 4.8840e-05 gnorm: 1.23 [ 2:44:45<21:51:14] +[titan] 2025-10-05 01:19:04,974 - root - INFO - step: 4470 loss: 2.7913 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4596 +[titan] 2025-10-05 01:19:04,974 - root - INFO - lr: 4.8837e-05 gnorm: 1.21 [ 2:44:56<21:51:01] +[titan] 2025-10-05 01:19:15,845 - root - INFO - step: 4475 loss: 2.8258 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3360 global_avg_mtp_loss: 2.4898 +[titan] 2025-10-05 01:19:15,846 - root - INFO - lr: 4.8834e-05 gnorm: 1.28 [ 2:45:07<21:50:49] +[titan] 2025-10-05 01:19:26,715 - root - INFO - step: 4480 loss: 2.7821 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:19:26,715 - root - INFO - lr: 4.8831e-05 gnorm: 1.29 [ 2:45:18<21:50:36] +[titan] 2025-10-05 01:19:37,611 - root - INFO - step: 4485 loss: 2.8154 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4798 +[titan] 2025-10-05 01:19:37,611 - root - INFO - lr: 4.8828e-05 gnorm: 1.28 [ 2:45:29<21:50:24] +[titan] 2025-10-05 01:19:48,473 - root - INFO - step: 4490 loss: 2.7910 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3319 global_avg_mtp_loss: 2.4591 +[titan] 2025-10-05 01:19:48,474 - root - INFO - lr: 4.8825e-05 gnorm: 1.39 [ 2:45:39<21:50:11] +[titan] 2025-10-05 01:19:59,363 - root - INFO - step: 4495 loss: 2.7586 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4304 +[titan] 2025-10-05 01:19:59,363 - root - INFO - lr: 4.8823e-05 gnorm: 1.26 [ 2:45:50<21:49:58] +[titan] 2025-10-05 01:20:08,035 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:20:10,224 - root - INFO - step: 4500 loss: 2.8484 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.5091 +[titan] 2025-10-05 01:20:10,224 - root - INFO - lr: 4.8820e-05 gnorm: 1.25 [ 2:46:01<21:49:46] +[titan] 2025-10-05 01:20:21,077 - root - INFO - step: 4505 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3816 +[titan] 2025-10-05 01:20:21,077 - root - INFO - lr: 4.8817e-05 gnorm: 1.25 [ 2:46:12<21:49:33] +[titan] 2025-10-05 01:20:31,932 - root - INFO - step: 4510 loss: 2.8270 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3376 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:20:31,932 - root - INFO - lr: 4.8814e-05 gnorm: 1.26 [ 2:46:23<21:49:20] +[titan] 2025-10-05 01:20:42,845 - root - INFO - step: 4515 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4207 +[titan] 2025-10-05 01:20:42,845 - root - INFO - lr: 4.8811e-05 gnorm: 1.20 [ 2:46:34<21:49:08] +[titan] 2025-10-05 01:20:53,800 - root - INFO - step: 4520 loss: 2.8244 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4888 +[titan] 2025-10-05 01:20:53,800 - root - INFO - lr: 4.8808e-05 gnorm: 1.37 [ 2:46:45<21:48:56] +[titan] 2025-10-05 01:21:04,708 - root - INFO - step: 4525 loss: 2.7186 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3964 +[titan] 2025-10-05 01:21:04,708 - root - INFO - lr: 4.8805e-05 gnorm: 1.27 [ 2:46:56<21:48:44] +[titan] 2025-10-05 01:21:15,602 - root - INFO - step: 4530 loss: 2.7206 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3236 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:21:15,602 - root - INFO - lr: 4.8803e-05 gnorm: 1.23 [ 2:47:07<21:48:31] +[titan] 2025-10-05 01:21:26,498 - root - INFO - step: 4535 loss: 2.7518 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3249 global_avg_mtp_loss: 2.4269 +[titan] 2025-10-05 01:21:26,498 - root - INFO - lr: 4.8800e-05 gnorm: 1.35 [ 2:47:17<21:48:19] +[titan] 2025-10-05 01:21:37,376 - root - INFO - step: 4540 loss: 2.7814 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3301 global_avg_mtp_loss: 2.4513 +[titan] 2025-10-05 01:21:37,376 - root - INFO - lr: 4.8797e-05 gnorm: 1.23 [ 2:47:28<21:48:06] +[titan] 2025-10-05 01:21:48,331 - root - INFO - step: 4545 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4487 +[titan] 2025-10-05 01:21:48,331 - root - INFO - lr: 4.8794e-05 gnorm: 1.25 [ 2:47:39<21:47:54] +[titan] 2025-10-05 01:21:57,058 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:21:59,248 - root - INFO - step: 4550 loss: 2.8483 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 01:21:59,249 - root - INFO - lr: 4.8791e-05 gnorm: 1.26 [ 2:47:50<21:47:42] +[titan] 2025-10-05 01:22:10,102 - root - INFO - step: 4555 loss: 2.7389 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.4138 +[titan] 2025-10-05 01:22:10,103 - root - INFO - lr: 4.8788e-05 gnorm: 1.20 [ 2:48:01<21:47:29] +[titan] 2025-10-05 01:22:20,974 - root - INFO - step: 4560 loss: 2.7847 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:22:20,974 - root - INFO - lr: 4.8785e-05 gnorm: 1.21 [ 2:48:12<21:47:17] +[titan] 2025-10-05 01:22:31,853 - root - INFO - step: 4565 loss: 2.7537 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:22:31,854 - root - INFO - lr: 4.8782e-05 gnorm: 1.27 [ 2:48:23<21:47:04] +[titan] 2025-10-05 01:22:42,729 - root - INFO - step: 4570 loss: 2.6580 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 01:22:42,729 - root - INFO - lr: 4.8779e-05 gnorm: 1.26 [ 2:48:34<21:46:52] +[titan] 2025-10-05 01:22:53,792 - root - INFO - step: 4575 loss: 2.8422 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.3385 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:22:53,792 - root - INFO - lr: 4.8777e-05 gnorm: 1.26 [ 2:48:45<21:46:41] +[titan] 2025-10-05 01:23:04,721 - root - INFO - step: 4580 loss: 2.6906 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3732 +[titan] 2025-10-05 01:23:04,721 - root - INFO - lr: 4.8774e-05 gnorm: 1.18 [ 2:48:56<21:46:28] +[titan] 2025-10-05 01:23:15,616 - root - INFO - step: 4585 loss: 2.7509 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:23:15,616 - root - INFO - lr: 4.8771e-05 gnorm: 1.23 [ 2:49:07<21:46:16] +[titan] 2025-10-05 01:23:26,529 - root - INFO - step: 4590 loss: 2.7868 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4570 +[titan] 2025-10-05 01:23:26,530 - root - INFO - lr: 4.8768e-05 gnorm: 1.24 [ 2:49:17<21:46:04] +[titan] 2025-10-05 01:23:37,394 - root - INFO - step: 4595 loss: 2.7525 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3274 global_avg_mtp_loss: 2.4251 +[titan] 2025-10-05 01:23:37,394 - root - INFO - lr: 4.8765e-05 gnorm: 1.22 [ 2:49:28<21:45:51] +[titan] 2025-10-05 01:23:46,091 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:23:48,293 - root - INFO - step: 4600 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4209 +[titan] 2025-10-05 01:23:48,294 - root - INFO - lr: 4.8762e-05 gnorm: 1.18 [ 2:49:39<21:45:39] +[titan] 2025-10-05 01:23:59,314 - root - INFO - step: 4605 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 29,734 tflops: 412.51 mfu: 41.71% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.5076 +[titan] 2025-10-05 01:23:59,314 - root - INFO - lr: 4.8759e-05 gnorm: 1.23 [ 2:49:50<21:45:27] +[titan] 2025-10-05 01:24:06,005 - root - INFO - Dumping profiler traces at step 4608 +[titan] 2025-10-05 01:24:06,040 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 01:24:10,470 - root - INFO - step: 4610 loss: 2.7849 memory: 118.84GiB(85.28%) tps: 29,373 tflops: 407.50 mfu: 41.20% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4534 +[titan] 2025-10-05 01:24:10,471 - root - INFO - lr: 4.8756e-05 gnorm: 1.28 [ 2:50:01<21:45:17] +[titan] 2025-10-05 01:24:21,351 - root - INFO - step: 4615 loss: 2.7549 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4283 +[titan] 2025-10-05 01:24:21,351 - root - INFO - lr: 4.8753e-05 gnorm: 1.21 [ 2:50:12<21:45:05] +[titan] 2025-10-05 01:24:32,230 - root - INFO - step: 4620 loss: 2.6761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3586 +[titan] 2025-10-05 01:24:32,230 - root - INFO - lr: 4.8750e-05 gnorm: 1.22 [ 2:50:23<21:44:52] +[titan] 2025-10-05 01:24:43,126 - root - INFO - step: 4625 loss: 2.6974 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:24:43,126 - root - INFO - lr: 4.8747e-05 gnorm: 1.18 [ 2:50:34<21:44:40] +[titan] 2025-10-05 01:24:54,032 - root - INFO - step: 4630 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3412 global_avg_mtp_loss: 2.5238 +[titan] 2025-10-05 01:24:54,032 - root - INFO - lr: 4.8744e-05 gnorm: 1.24 [ 2:50:45<21:44:27] +[titan] 2025-10-05 01:25:04,940 - root - INFO - step: 4635 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4155 +[titan] 2025-10-05 01:25:04,940 - root - INFO - lr: 4.8741e-05 gnorm: 1.21 [ 2:50:56<21:44:15] +[titan] 2025-10-05 01:25:15,817 - root - INFO - step: 4640 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3562 +[titan] 2025-10-05 01:25:15,817 - root - INFO - lr: 4.8739e-05 gnorm: 1.31 [ 2:51:07<21:44:03] +[titan] 2025-10-05 01:25:26,720 - root - INFO - step: 4645 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3352 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:25:26,721 - root - INFO - lr: 4.8736e-05 gnorm: 1.23 [ 2:51:18<21:43:50] +[titan] 2025-10-05 01:25:35,459 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:25:37,643 - root - INFO - step: 4650 loss: 2.6937 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3193 global_avg_mtp_loss: 2.3743 +[titan] 2025-10-05 01:25:37,643 - root - INFO - lr: 4.8733e-05 gnorm: 1.23 [ 2:51:29<21:43:38] +[titan] 2025-10-05 01:25:48,525 - root - INFO - step: 4655 loss: 2.7402 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4144 +[titan] 2025-10-05 01:25:48,525 - root - INFO - lr: 4.8730e-05 gnorm: 1.22 [ 2:51:39<21:43:26] +[titan] 2025-10-05 01:25:59,422 - root - INFO - step: 4660 loss: 2.7820 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4520 +[titan] 2025-10-05 01:25:59,422 - root - INFO - lr: 4.8727e-05 gnorm: 1.30 [ 2:51:50<21:43:14] +[titan] 2025-10-05 01:26:10,311 - root - INFO - step: 4665 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3392 global_avg_mtp_loss: 2.5074 +[titan] 2025-10-05 01:26:10,311 - root - INFO - lr: 4.8724e-05 gnorm: 1.25 [ 2:52:01<21:43:01] +[titan] 2025-10-05 01:26:21,210 - root - INFO - step: 4670 loss: 2.7305 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4066 +[titan] 2025-10-05 01:26:21,210 - root - INFO - lr: 4.8721e-05 gnorm: 1.25 [ 2:52:12<21:42:49] +[titan] 2025-10-05 01:26:32,122 - root - INFO - step: 4675 loss: 2.7530 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4264 +[titan] 2025-10-05 01:26:32,122 - root - INFO - lr: 4.8718e-05 gnorm: 1.25 [ 2:52:23<21:42:37] +[titan] 2025-10-05 01:26:43,055 - root - INFO - step: 4680 loss: 2.8067 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4732 +[titan] 2025-10-05 01:26:43,055 - root - INFO - lr: 4.8715e-05 gnorm: 1.24 [ 2:52:34<21:42:25] +[titan] 2025-10-05 01:26:53,990 - root - INFO - step: 4685 loss: 2.6707 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 01:26:53,990 - root - INFO - lr: 4.8712e-05 gnorm: 1.36 [ 2:52:45<21:42:13] +[titan] 2025-10-05 01:27:04,906 - root - INFO - step: 4690 loss: 2.7149 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3214 global_avg_mtp_loss: 2.3935 +[titan] 2025-10-05 01:27:04,906 - root - INFO - lr: 4.8709e-05 gnorm: 1.23 [ 2:52:56<21:42:00] +[titan] 2025-10-05 01:27:15,817 - root - INFO - step: 4695 loss: 2.6965 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3761 +[titan] 2025-10-05 01:27:15,817 - root - INFO - lr: 4.8706e-05 gnorm: 1.25 [ 2:53:07<21:41:48] +[titan] 2025-10-05 01:27:24,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:27:26,735 - root - INFO - step: 4700 loss: 2.7982 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 01:27:26,735 - root - INFO - lr: 4.8703e-05 gnorm: 1.22 [ 2:53:18<21:41:36] +[titan] 2025-10-05 01:27:37,672 - root - INFO - step: 4705 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3443 global_avg_mtp_loss: 2.5494 +[titan] 2025-10-05 01:27:37,673 - root - INFO - lr: 4.8700e-05 gnorm: 1.26 [ 2:53:29<21:41:24] +[titan] 2025-10-05 01:27:48,615 - root - INFO - step: 4710 loss: 2.7471 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.3269 global_avg_mtp_loss: 2.4201 +[titan] 2025-10-05 01:27:48,615 - root - INFO - lr: 4.8697e-05 gnorm: 1.21 [ 2:53:40<21:41:12] +[titan] 2025-10-05 01:27:59,548 - root - INFO - step: 4715 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.3271 global_avg_mtp_loss: 2.4303 +[titan] 2025-10-05 01:27:59,548 - root - INFO - lr: 4.8694e-05 gnorm: 1.22 [ 2:53:50<21:41:00] +[titan] 2025-10-05 01:28:10,470 - root - INFO - step: 4720 loss: 2.8297 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3377 global_avg_mtp_loss: 2.4920 +[titan] 2025-10-05 01:28:10,471 - root - INFO - lr: 4.8691e-05 gnorm: 1.25 [ 2:54:01<21:40:48] +[titan] 2025-10-05 01:28:21,389 - root - INFO - step: 4725 loss: 2.8079 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4747 +[titan] 2025-10-05 01:28:21,389 - root - INFO - lr: 4.8688e-05 gnorm: 1.25 [ 2:54:12<21:40:36] +[titan] 2025-10-05 01:28:32,287 - root - INFO - step: 4730 loss: 2.7460 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3255 global_avg_mtp_loss: 2.4205 +[titan] 2025-10-05 01:28:32,287 - root - INFO - lr: 4.8685e-05 gnorm: 1.27 [ 2:54:23<21:40:24] +[titan] 2025-10-05 01:28:43,251 - root - INFO - step: 4735 loss: 2.6848 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3668 +[titan] 2025-10-05 01:28:43,252 - root - INFO - lr: 4.8682e-05 gnorm: 1.24 [ 2:54:34<21:40:12] +[titan] 2025-10-05 01:28:54,171 - root - INFO - step: 4740 loss: 2.7918 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4618 +[titan] 2025-10-05 01:28:54,171 - root - INFO - lr: 4.8679e-05 gnorm: 1.32 [ 2:54:45<21:40:00] +[titan] 2025-10-05 01:29:05,077 - root - INFO - step: 4745 loss: 2.7361 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4131 +[titan] 2025-10-05 01:29:05,077 - root - INFO - lr: 4.8676e-05 gnorm: 1.29 [ 2:54:56<21:39:47] +[titan] 2025-10-05 01:29:13,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:29:15,984 - root - INFO - step: 4750 loss: 2.7499 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:29:15,985 - root - INFO - lr: 4.8673e-05 gnorm: 1.26 [ 2:55:07<21:39:35] +[titan] 2025-10-05 01:29:26,874 - root - INFO - step: 4755 loss: 2.7721 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3285 global_avg_mtp_loss: 2.4435 +[titan] 2025-10-05 01:29:26,874 - root - INFO - lr: 4.8670e-05 gnorm: 1.19 [ 2:55:18<21:39:23] +[titan] 2025-10-05 01:29:37,761 - root - INFO - step: 4760 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3353 global_avg_mtp_loss: 2.4595 +[titan] 2025-10-05 01:29:37,761 - root - INFO - lr: 4.8667e-05 gnorm: 1.22 [ 2:55:29<21:39:11] +[titan] 2025-10-05 01:29:48,663 - root - INFO - step: 4765 loss: 2.7250 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3240 global_avg_mtp_loss: 2.4010 +[titan] 2025-10-05 01:29:48,664 - root - INFO - lr: 4.8664e-05 gnorm: 1.28 [ 2:55:40<21:38:58] +[titan] 2025-10-05 01:29:59,563 - root - INFO - step: 4770 loss: 2.7157 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3221 global_avg_mtp_loss: 2.3936 +[titan] 2025-10-05 01:29:59,563 - root - INFO - lr: 4.8661e-05 gnorm: 2.78 [ 2:55:50<21:38:46] +[titan] 2025-10-05 01:30:10,469 - root - INFO - step: 4775 loss: 2.8036 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4701 +[titan] 2025-10-05 01:30:10,470 - root - INFO - lr: 4.8658e-05 gnorm: 1.25 [ 2:56:01<21:38:34] +[titan] 2025-10-05 01:30:21,348 - root - INFO - step: 4780 loss: 2.7215 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:30:21,348 - root - INFO - lr: 4.8655e-05 gnorm: 1.38 [ 2:56:12<21:38:21] +[titan] 2025-10-05 01:30:32,231 - root - INFO - step: 4785 loss: 2.7709 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4425 +[titan] 2025-10-05 01:30:32,231 - root - INFO - lr: 4.8652e-05 gnorm: 1.21 [ 2:56:23<21:38:09] +[titan] 2025-10-05 01:30:43,113 - root - INFO - step: 4790 loss: 2.7171 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.3934 +[titan] 2025-10-05 01:30:43,113 - root - INFO - lr: 4.8649e-05 gnorm: 1.19 [ 2:56:34<21:37:57] +[titan] 2025-10-05 01:30:54,053 - root - INFO - step: 4795 loss: 2.8155 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.4731 +[titan] 2025-10-05 01:30:54,054 - root - INFO - lr: 4.8646e-05 gnorm: 1.20 [ 2:56:45<21:37:45] +[titan] 2025-10-05 01:31:02,753 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:31:04,944 - root - INFO - step: 4800 loss: 2.7229 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4004 +[titan] 2025-10-05 01:31:04,944 - root - INFO - lr: 4.8643e-05 gnorm: 1.27 [ 2:56:56<21:37:33] +[titan] 2025-10-05 01:31:15,845 - root - INFO - step: 4805 loss: 2.7633 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4354 +[titan] 2025-10-05 01:31:15,845 - root - INFO - lr: 4.8639e-05 gnorm: 1.30 [ 2:57:07<21:37:20] +[titan] 2025-10-05 01:31:26,718 - root - INFO - step: 4810 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 01:31:26,718 - root - INFO - lr: 4.8636e-05 gnorm: 1.23 [ 2:57:18<21:37:08] +[titan] 2025-10-05 01:31:37,587 - root - INFO - step: 4815 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4100 +[titan] 2025-10-05 01:31:37,587 - root - INFO - lr: 4.8633e-05 gnorm: 1.25 [ 2:57:28<21:36:55] +[titan] 2025-10-05 01:31:48,487 - root - INFO - step: 4820 loss: 2.7752 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4454 +[titan] 2025-10-05 01:31:48,487 - root - INFO - lr: 4.8630e-05 gnorm: 1.24 [ 2:57:39<21:36:43] +[titan] 2025-10-05 01:31:59,366 - root - INFO - step: 4825 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3299 global_avg_mtp_loss: 2.4440 +[titan] 2025-10-05 01:31:59,366 - root - INFO - lr: 4.8627e-05 gnorm: 1.27 [ 2:57:50<21:36:31] +[titan] 2025-10-05 01:32:10,285 - root - INFO - step: 4830 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3289 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:32:10,285 - root - INFO - lr: 4.8624e-05 gnorm: 1.25 [ 2:58:01<21:36:19] +[titan] 2025-10-05 01:32:21,158 - root - INFO - step: 4835 loss: 2.7916 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4584 +[titan] 2025-10-05 01:32:21,158 - root - INFO - lr: 4.8621e-05 gnorm: 1.23 [ 2:58:12<21:36:06] +[titan] 2025-10-05 01:32:32,019 - root - INFO - step: 4840 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3305 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:32:32,020 - root - INFO - lr: 4.8618e-05 gnorm: 1.25 [ 2:58:23<21:35:54] +[titan] 2025-10-05 01:32:42,890 - root - INFO - step: 4845 loss: 2.7622 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4341 +[titan] 2025-10-05 01:32:42,890 - root - INFO - lr: 4.8615e-05 gnorm: 1.24 [ 2:58:34<21:35:42] +[titan] 2025-10-05 01:32:51,570 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:32:53,752 - root - INFO - step: 4850 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3209 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 01:32:53,752 - root - INFO - lr: 4.8612e-05 gnorm: 1.26 [ 2:58:45<21:35:29] +[titan] 2025-10-05 01:33:04,624 - root - INFO - step: 4855 loss: 2.7888 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4588 +[titan] 2025-10-05 01:33:04,624 - root - INFO - lr: 4.8609e-05 gnorm: 1.30 [ 2:58:55<21:35:17] +[titan] 2025-10-05 01:33:15,521 - root - INFO - step: 4860 loss: 2.6936 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3751 +[titan] 2025-10-05 01:33:15,521 - root - INFO - lr: 4.8606e-05 gnorm: 1.24 [ 2:59:06<21:35:04] +[titan] 2025-10-05 01:33:26,393 - root - INFO - step: 4865 loss: 2.8919 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3439 global_avg_mtp_loss: 2.5480 +[titan] 2025-10-05 01:33:26,393 - root - INFO - lr: 4.8603e-05 gnorm: 1.25 [ 2:59:17<21:34:52] +[titan] 2025-10-05 01:33:37,259 - root - INFO - step: 4870 loss: 2.7240 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 01:33:37,259 - root - INFO - lr: 4.8599e-05 gnorm: 1.24 [ 2:59:28<21:34:40] +[titan] 2025-10-05 01:33:48,148 - root - INFO - step: 4875 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4417 +[titan] 2025-10-05 01:33:48,148 - root - INFO - lr: 4.8596e-05 gnorm: 1.26 [ 2:59:39<21:34:27] +[titan] 2025-10-05 01:33:59,035 - root - INFO - step: 4880 loss: 2.7227 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4000 +[titan] 2025-10-05 01:33:59,035 - root - INFO - lr: 4.8593e-05 gnorm: 1.27 [ 2:59:50<21:34:15] +[titan] 2025-10-05 01:34:09,948 - root - INFO - step: 4885 loss: 2.7234 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4009 +[titan] 2025-10-05 01:34:09,948 - root - INFO - lr: 4.8590e-05 gnorm: 1.20 [ 3:00:01<21:34:03] +[titan] 2025-10-05 01:34:20,817 - root - INFO - step: 4890 loss: 2.7314 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4083 +[titan] 2025-10-05 01:34:20,818 - root - INFO - lr: 4.8587e-05 gnorm: 1.33 [ 3:00:12<21:33:51] +[titan] 2025-10-05 01:34:31,730 - root - INFO - step: 4895 loss: 2.7077 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3210 global_avg_mtp_loss: 2.3867 +[titan] 2025-10-05 01:34:31,730 - root - INFO - lr: 4.8584e-05 gnorm: 1.29 [ 3:00:23<21:33:38] +[titan] 2025-10-05 01:34:40,426 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:34:42,619 - root - INFO - step: 4900 loss: 2.7734 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3292 global_avg_mtp_loss: 2.4443 +[titan] 2025-10-05 01:34:42,620 - root - INFO - lr: 4.8581e-05 gnorm: 1.28 [ 3:00:33<21:33:26] +[titan] 2025-10-05 01:34:53,494 - root - INFO - step: 4905 loss: 2.7406 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4146 +[titan] 2025-10-05 01:34:53,495 - root - INFO - lr: 4.8578e-05 gnorm: 1.17 [ 3:00:44<21:33:14] +[titan] 2025-10-05 01:35:04,450 - root - INFO - step: 4910 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3981 +[titan] 2025-10-05 01:35:04,451 - root - INFO - lr: 4.8575e-05 gnorm: 1.20 [ 3:00:55<21:33:02] +[titan] 2025-10-05 01:35:15,335 - root - INFO - step: 4915 loss: 2.7382 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4140 +[titan] 2025-10-05 01:35:15,335 - root - INFO - lr: 4.8571e-05 gnorm: 1.28 [ 3:01:06<21:32:50] +[titan] 2025-10-05 01:35:26,233 - root - INFO - step: 4920 loss: 2.7952 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3320 global_avg_mtp_loss: 2.4631 +[titan] 2025-10-05 01:35:26,233 - root - INFO - lr: 4.8568e-05 gnorm: 1.29 [ 3:01:17<21:32:38] +[titan] 2025-10-05 01:35:37,136 - root - INFO - step: 4925 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4403 +[titan] 2025-10-05 01:35:37,136 - root - INFO - lr: 4.8565e-05 gnorm: 1.25 [ 3:01:28<21:32:26] +[titan] 2025-10-05 01:35:48,014 - root - INFO - step: 4930 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4081 +[titan] 2025-10-05 01:35:48,014 - root - INFO - lr: 4.8562e-05 gnorm: 1.21 [ 3:01:39<21:32:13] +[titan] 2025-10-05 01:35:58,895 - root - INFO - step: 4935 loss: 2.7204 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:35:58,895 - root - INFO - lr: 4.8559e-05 gnorm: 1.20 [ 3:01:50<21:32:01] +[titan] 2025-10-05 01:36:09,806 - root - INFO - step: 4940 loss: 2.7788 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:36:09,806 - root - INFO - lr: 4.8556e-05 gnorm: 1.21 [ 3:02:01<21:31:49] +[titan] 2025-10-05 01:36:20,731 - root - INFO - step: 4945 loss: 2.7547 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3304 global_avg_mtp_loss: 2.4243 +[titan] 2025-10-05 01:36:20,732 - root - INFO - lr: 4.8553e-05 gnorm: 1.23 [ 3:02:12<21:31:37] +[titan] 2025-10-05 01:36:29,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:36:31,611 - root - INFO - step: 4950 loss: 2.6438 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3129 global_avg_mtp_loss: 2.3309 +[titan] 2025-10-05 01:36:31,611 - root - INFO - lr: 4.8549e-05 gnorm: 1.20 [ 3:02:22<21:31:25] +[titan] 2025-10-05 01:36:42,497 - root - INFO - step: 4955 loss: 2.7743 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:36:42,497 - root - INFO - lr: 4.8546e-05 gnorm: 1.29 [ 3:02:33<21:31:12] +[titan] 2025-10-05 01:36:53,369 - root - INFO - step: 4960 loss: 2.7846 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4551 +[titan] 2025-10-05 01:36:53,369 - root - INFO - lr: 4.8543e-05 gnorm: 1.25 [ 3:02:44<21:31:00] +[titan] 2025-10-05 01:37:04,267 - root - INFO - step: 4965 loss: 2.8172 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3344 global_avg_mtp_loss: 2.4828 +[titan] 2025-10-05 01:37:04,267 - root - INFO - lr: 4.8540e-05 gnorm: 1.25 [ 3:02:55<21:30:48] +[titan] 2025-10-05 01:37:15,212 - root - INFO - step: 4970 loss: 2.6436 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3310 +[titan] 2025-10-05 01:37:15,212 - root - INFO - lr: 4.8537e-05 gnorm: 1.25 [ 3:03:06<21:30:36] +[titan] 2025-10-05 01:37:26,159 - root - INFO - step: 4975 loss: 2.7551 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4289 +[titan] 2025-10-05 01:37:26,159 - root - INFO - lr: 4.8534e-05 gnorm: 1.22 [ 3:03:17<21:30:24] +[titan] 2025-10-05 01:37:37,030 - root - INFO - step: 4980 loss: 2.7052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3218 global_avg_mtp_loss: 2.3834 +[titan] 2025-10-05 01:37:37,031 - root - INFO - lr: 4.8530e-05 gnorm: 1.26 [ 3:03:28<21:30:12] +[titan] 2025-10-05 01:37:47,943 - root - INFO - step: 4985 loss: 2.7357 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4094 +[titan] 2025-10-05 01:37:47,944 - root - INFO - lr: 4.8527e-05 gnorm: 1.27 [ 3:03:39<21:30:00] +[titan] 2025-10-05 01:37:58,856 - root - INFO - step: 4990 loss: 2.7950 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4628 +[titan] 2025-10-05 01:37:58,857 - root - INFO - lr: 4.8524e-05 gnorm: 1.22 [ 3:03:50<21:29:48] +[titan] 2025-10-05 01:38:09,823 - root - INFO - step: 4995 loss: 2.7375 memory: 118.84GiB(85.28%) tps: 29,881 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.3261 global_avg_mtp_loss: 2.4114 +[titan] 2025-10-05 01:38:09,823 - root - INFO - lr: 4.8521e-05 gnorm: 1.18 [ 3:04:01<21:29:36] +[titan] 2025-10-05 01:38:18,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:38:20,753 - root - INFO - step: 5000 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3200 global_avg_mtp_loss: 2.3792 +[titan] 2025-10-05 01:38:20,753 - root - INFO - lr: 4.8518e-05 gnorm: 1.26 [ 3:04:12<21:29:24] +[titan] 2025-10-05 01:38:20,753 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 01:38:42,122 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 01:38:42,122 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 21.37 seconds. +[titan] 2025-10-05 01:40:51,998 - root - INFO - step: 5005 loss: 2.7858 memory: 118.84GiB(85.28%) tps: 2,167 tflops: 30.06 mfu: 3.04% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:40:51,998 - root - INFO - lr: 4.8515e-05 gnorm: 1.27 [ 3:06:43<21:45:33] +[titan] 2025-10-05 01:41:02,796 - root - INFO - step: 5010 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3270 global_avg_mtp_loss: 2.4291 +[titan] 2025-10-05 01:41:02,796 - root - INFO - lr: 4.8511e-05 gnorm: 1.34 [ 3:06:54<21:45:20] +[titan] 2025-10-05 01:41:13,614 - root - INFO - step: 5015 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.3283 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:41:13,614 - root - INFO - lr: 4.8508e-05 gnorm: 1.32 [ 3:07:04<21:45:06] +[titan] 2025-10-05 01:41:24,485 - root - INFO - step: 5020 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4305 +[titan] 2025-10-05 01:41:24,486 - root - INFO - lr: 4.8505e-05 gnorm: 1.31 [ 3:07:15<21:44:52] +[titan] 2025-10-05 01:41:35,321 - root - INFO - step: 5025 loss: 2.7060 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3836 +[titan] 2025-10-05 01:41:35,321 - root - INFO - lr: 4.8502e-05 gnorm: 1.27 [ 3:07:26<21:44:39] +[titan] 2025-10-05 01:41:46,205 - root - INFO - step: 5030 loss: 2.7304 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3247 global_avg_mtp_loss: 2.4057 +[titan] 2025-10-05 01:41:46,205 - root - INFO - lr: 4.8499e-05 gnorm: 1.28 [ 3:07:37<21:44:25] +[titan] 2025-10-05 01:41:57,092 - root - INFO - step: 5035 loss: 2.7485 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4237 +[titan] 2025-10-05 01:41:57,093 - root - INFO - lr: 4.8495e-05 gnorm: 1.26 [ 3:07:48<21:44:12] +[titan] 2025-10-05 01:42:08,008 - root - INFO - step: 5040 loss: 2.7641 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4360 +[titan] 2025-10-05 01:42:08,008 - root - INFO - lr: 4.8492e-05 gnorm: 1.18 [ 3:07:59<21:43:59] +[titan] 2025-10-05 01:42:18,888 - root - INFO - step: 5045 loss: 2.6254 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3136 +[titan] 2025-10-05 01:42:18,888 - root - INFO - lr: 4.8489e-05 gnorm: 1.29 [ 3:08:10<21:43:46] +[titan] 2025-10-05 01:42:27,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:42:29,897 - root - INFO - step: 5050 loss: 2.7825 memory: 118.84GiB(85.28%) tps: 29,766 tflops: 412.96 mfu: 41.76% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4531 +[titan] 2025-10-05 01:42:29,897 - root - INFO - lr: 4.8486e-05 gnorm: 1.24 [ 3:08:21<21:43:33] +[titan] 2025-10-05 01:42:40,766 - root - INFO - step: 5055 loss: 2.7808 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4505 +[titan] 2025-10-05 01:42:40,766 - root - INFO - lr: 4.8483e-05 gnorm: 1.22 [ 3:08:32<21:43:20] +[titan] 2025-10-05 01:42:51,649 - root - INFO - step: 5060 loss: 2.6497 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3358 +[titan] 2025-10-05 01:42:51,649 - root - INFO - lr: 4.8479e-05 gnorm: 1.25 [ 3:08:43<21:43:06] +[titan] 2025-10-05 01:43:02,533 - root - INFO - step: 5065 loss: 2.7482 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 01:43:02,533 - root - INFO - lr: 4.8476e-05 gnorm: 1.21 [ 3:08:53<21:42:53] +[titan] 2025-10-05 01:43:13,418 - root - INFO - step: 5070 loss: 2.8515 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3494 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:43:13,418 - root - INFO - lr: 4.8473e-05 gnorm: 1.24 [ 3:09:04<21:42:40] +[titan] 2025-10-05 01:43:24,295 - root - INFO - step: 5075 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3955 +[titan] 2025-10-05 01:43:24,295 - root - INFO - lr: 4.8470e-05 gnorm: 1.23 [ 3:09:15<21:42:26] +[titan] 2025-10-05 01:43:35,165 - root - INFO - step: 5080 loss: 2.6731 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3557 +[titan] 2025-10-05 01:43:35,166 - root - INFO - lr: 4.8466e-05 gnorm: 1.24 [ 3:09:26<21:42:13] +[titan] 2025-10-05 01:43:46,043 - root - INFO - step: 5085 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 01:43:46,043 - root - INFO - lr: 4.8463e-05 gnorm: 1.24 [ 3:09:37<21:42:00] +[titan] 2025-10-05 01:43:56,916 - root - INFO - step: 5090 loss: 2.7316 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4079 +[titan] 2025-10-05 01:43:56,916 - root - INFO - lr: 4.8460e-05 gnorm: 1.35 [ 3:09:48<21:41:46] +[titan] 2025-10-05 01:44:07,778 - root - INFO - step: 5095 loss: 2.7611 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4327 +[titan] 2025-10-05 01:44:07,778 - root - INFO - lr: 4.8457e-05 gnorm: 1.27 [ 3:09:59<21:41:33] +[titan] 2025-10-05 01:44:16,486 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:44:18,671 - root - INFO - step: 5100 loss: 2.6824 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3640 +[titan] 2025-10-05 01:44:18,671 - root - INFO - lr: 4.8453e-05 gnorm: 1.28 [ 3:10:10<21:41:20] +[titan] 2025-10-05 01:44:29,534 - root - INFO - step: 5105 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.4782 +[titan] 2025-10-05 01:44:29,534 - root - INFO - lr: 4.8450e-05 gnorm: 1.26 [ 3:10:20<21:41:06] +[titan] 2025-10-05 01:44:40,413 - root - INFO - step: 5110 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.3923 +[titan] 2025-10-05 01:44:40,413 - root - INFO - lr: 4.8447e-05 gnorm: 1.23 [ 3:10:31<21:40:53] +[titan] 2025-10-05 01:44:51,299 - root - INFO - step: 5115 loss: 2.6959 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3758 +[titan] 2025-10-05 01:44:51,299 - root - INFO - lr: 4.8444e-05 gnorm: 1.26 [ 3:10:42<21:40:40] +[titan] 2025-10-05 01:45:02,275 - root - INFO - step: 5120 loss: 2.7516 memory: 118.84GiB(85.28%) tps: 29,856 tflops: 414.21 mfu: 41.88% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4257 +[titan] 2025-10-05 01:45:02,275 - root - INFO - lr: 4.8440e-05 gnorm: 1.21 [ 3:10:53<21:40:27] +[titan] 2025-10-05 01:45:02,442 - root - INFO - Dumping profiler traces at step 5120 +[titan] 2025-10-05 01:45:02,478 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:45:13,379 - root - INFO - step: 5125 loss: 2.7714 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.41 mfu: 41.40% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4427 +[titan] 2025-10-05 01:45:13,379 - root - INFO - lr: 4.8437e-05 gnorm: 1.24 [ 3:11:04<21:40:16] +[titan] 2025-10-05 01:45:24,262 - root - INFO - step: 5130 loss: 2.6786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3612 +[titan] 2025-10-05 01:45:24,263 - root - INFO - lr: 4.8434e-05 gnorm: 1.22 [ 3:11:15<21:40:02] +[titan] 2025-10-05 01:45:35,196 - root - INFO - step: 5135 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4702 +[titan] 2025-10-05 01:45:35,196 - root - INFO - lr: 4.8431e-05 gnorm: 1.27 [ 3:11:26<21:39:49] +[titan] 2025-10-05 01:45:46,094 - root - INFO - step: 5140 loss: 2.7216 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3233 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:45:46,094 - root - INFO - lr: 4.8427e-05 gnorm: 1.26 [ 3:11:37<21:39:36] +[titan] 2025-10-05 01:45:56,991 - root - INFO - step: 5145 loss: 2.7084 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3869 +[titan] 2025-10-05 01:45:56,991 - root - INFO - lr: 4.8424e-05 gnorm: 1.23 [ 3:11:48<21:39:23] +[titan] 2025-10-05 01:46:05,684 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:46:07,870 - root - INFO - step: 5150 loss: 2.7550 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4284 +[titan] 2025-10-05 01:46:07,870 - root - INFO - lr: 4.8421e-05 gnorm: 1.28 [ 3:11:59<21:39:10] +[titan] 2025-10-05 01:46:18,768 - root - INFO - step: 5155 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3142 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 01:46:18,768 - root - INFO - lr: 4.8417e-05 gnorm: 1.20 [ 3:12:10<21:38:57] +[titan] 2025-10-05 01:46:29,716 - root - INFO - step: 5160 loss: 2.7141 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3918 +[titan] 2025-10-05 01:46:29,716 - root - INFO - lr: 4.8414e-05 gnorm: 1.22 [ 3:12:21<21:38:44] +[titan] 2025-10-05 01:46:40,611 - root - INFO - step: 5165 loss: 2.7431 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3245 global_avg_mtp_loss: 2.4185 +[titan] 2025-10-05 01:46:40,611 - root - INFO - lr: 4.8411e-05 gnorm: 1.18 [ 3:12:31<21:38:31] +[titan] 2025-10-05 01:46:51,503 - root - INFO - step: 5170 loss: 2.6610 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 01:46:51,503 - root - INFO - lr: 4.8408e-05 gnorm: 1.21 [ 3:12:42<21:38:18] +[titan] 2025-10-05 01:47:02,418 - root - INFO - step: 5175 loss: 2.7319 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4077 +[titan] 2025-10-05 01:47:02,418 - root - INFO - lr: 4.8404e-05 gnorm: 1.21 [ 3:12:53<21:38:05] +[titan] 2025-10-05 01:47:13,333 - root - INFO - step: 5180 loss: 2.7303 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:47:13,333 - root - INFO - lr: 4.8401e-05 gnorm: 1.24 [ 3:13:04<21:37:52] +[titan] 2025-10-05 01:47:24,247 - root - INFO - step: 5185 loss: 2.6746 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 01:47:24,247 - root - INFO - lr: 4.8398e-05 gnorm: 1.22 [ 3:13:15<21:37:39] +[titan] 2025-10-05 01:47:35,216 - root - INFO - step: 5190 loss: 2.7738 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4456 +[titan] 2025-10-05 01:47:35,217 - root - INFO - lr: 4.8394e-05 gnorm: 1.31 [ 3:13:26<21:37:26] +[titan] 2025-10-05 01:47:46,124 - root - INFO - step: 5195 loss: 2.8394 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3384 global_avg_mtp_loss: 2.5009 +[titan] 2025-10-05 01:47:46,124 - root - INFO - lr: 4.8391e-05 gnorm: 1.27 [ 3:13:37<21:37:13] +[titan] 2025-10-05 01:47:54,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:47:57,027 - root - INFO - step: 5200 loss: 2.7263 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4026 +[titan] 2025-10-05 01:47:57,027 - root - INFO - lr: 4.8388e-05 gnorm: 1.24 [ 3:13:48<21:37:00] +[titan] 2025-10-05 01:48:07,915 - root - INFO - step: 5205 loss: 2.7277 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.4038 +[titan] 2025-10-05 01:48:07,915 - root - INFO - lr: 4.8384e-05 gnorm: 1.21 [ 3:13:59<21:36:47] +[titan] 2025-10-05 01:48:18,830 - root - INFO - step: 5210 loss: 2.6835 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3172 global_avg_mtp_loss: 2.3663 +[titan] 2025-10-05 01:48:18,830 - root - INFO - lr: 4.8381e-05 gnorm: 1.22 [ 3:14:10<21:36:34] +[titan] 2025-10-05 01:48:29,733 - root - INFO - step: 5215 loss: 2.6886 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3701 +[titan] 2025-10-05 01:48:29,733 - root - INFO - lr: 4.8378e-05 gnorm: 1.23 [ 3:14:21<21:36:21] +[titan] 2025-10-05 01:48:40,645 - root - INFO - step: 5220 loss: 2.7098 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 01:48:40,645 - root - INFO - lr: 4.8374e-05 gnorm: 1.25 [ 3:14:31<21:36:08] +[titan] 2025-10-05 01:48:51,536 - root - INFO - step: 5225 loss: 2.8169 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4830 +[titan] 2025-10-05 01:48:51,536 - root - INFO - lr: 4.8371e-05 gnorm: 1.24 [ 3:14:42<21:35:55] +[titan] 2025-10-05 01:49:02,433 - root - INFO - step: 5230 loss: 2.7455 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4197 +[titan] 2025-10-05 01:49:02,433 - root - INFO - lr: 4.8368e-05 gnorm: 1.26 [ 3:14:53<21:35:42] +[titan] 2025-10-05 01:49:13,324 - root - INFO - step: 5235 loss: 2.7873 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.3324 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:49:13,325 - root - INFO - lr: 4.8364e-05 gnorm: 1.21 [ 3:15:04<21:35:29] +[titan] 2025-10-05 01:49:24,205 - root - INFO - step: 5240 loss: 2.6851 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3675 +[titan] 2025-10-05 01:49:24,206 - root - INFO - lr: 4.8361e-05 gnorm: 1.22 [ 3:15:15<21:35:16] +[titan] 2025-10-05 01:49:35,124 - root - INFO - step: 5245 loss: 2.7664 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:49:35,124 - root - INFO - lr: 4.8358e-05 gnorm: 1.24 [ 3:15:26<21:35:03] +[titan] 2025-10-05 01:49:43,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:49:45,992 - root - INFO - step: 5250 loss: 2.7297 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4047 +[titan] 2025-10-05 01:49:45,992 - root - INFO - lr: 4.8354e-05 gnorm: 1.29 [ 3:15:37<21:34:49] +[titan] 2025-10-05 01:49:56,896 - root - INFO - step: 5255 loss: 2.7151 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3928 +[titan] 2025-10-05 01:49:56,896 - root - INFO - lr: 4.8351e-05 gnorm: 1.29 [ 3:15:48<21:34:36] +[titan] 2025-10-05 01:50:07,763 - root - INFO - step: 5260 loss: 2.7886 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3308 global_avg_mtp_loss: 2.4578 +[titan] 2025-10-05 01:50:07,763 - root - INFO - lr: 4.8348e-05 gnorm: 1.36 [ 3:15:59<21:34:23] +[titan] 2025-10-05 01:50:18,645 - root - INFO - step: 5265 loss: 2.6117 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3012 +[titan] 2025-10-05 01:50:18,645 - root - INFO - lr: 4.8344e-05 gnorm: 1.24 [ 3:16:09<21:34:10] +[titan] 2025-10-05 01:50:29,515 - root - INFO - step: 5270 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4451 +[titan] 2025-10-05 01:50:29,516 - root - INFO - lr: 4.8341e-05 gnorm: 1.24 [ 3:16:20<21:33:57] +[titan] 2025-10-05 01:50:40,456 - root - INFO - step: 5275 loss: 2.7065 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3874 +[titan] 2025-10-05 01:50:40,457 - root - INFO - lr: 4.8338e-05 gnorm: 1.25 [ 3:16:31<21:33:44] +[titan] 2025-10-05 01:50:51,334 - root - INFO - step: 5280 loss: 2.7674 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4384 +[titan] 2025-10-05 01:50:51,334 - root - INFO - lr: 4.8334e-05 gnorm: 1.25 [ 3:16:42<21:33:31] +[titan] 2025-10-05 01:51:02,214 - root - INFO - step: 5285 loss: 2.6660 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3493 +[titan] 2025-10-05 01:51:02,214 - root - INFO - lr: 4.8331e-05 gnorm: 1.20 [ 3:16:53<21:33:18] +[titan] 2025-10-05 01:51:13,075 - root - INFO - step: 5290 loss: 2.7457 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4189 +[titan] 2025-10-05 01:51:13,075 - root - INFO - lr: 4.8327e-05 gnorm: 1.25 [ 3:17:04<21:33:05] +[titan] 2025-10-05 01:51:23,938 - root - INFO - step: 5295 loss: 2.7299 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:51:23,938 - root - INFO - lr: 4.8324e-05 gnorm: 1.18 [ 3:17:15<21:32:51] +[titan] 2025-10-05 01:51:32,658 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:51:34,836 - root - INFO - step: 5300 loss: 2.7577 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4310 +[titan] 2025-10-05 01:51:34,836 - root - INFO - lr: 4.8321e-05 gnorm: 1.27 [ 3:17:26<21:32:38] +[titan] 2025-10-05 01:51:45,732 - root - INFO - step: 5305 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.4411 +[titan] 2025-10-05 01:51:45,732 - root - INFO - lr: 4.8317e-05 gnorm: 1.28 [ 3:17:37<21:32:25] +[titan] 2025-10-05 01:51:56,598 - root - INFO - step: 5310 loss: 2.6649 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3486 +[titan] 2025-10-05 01:51:56,598 - root - INFO - lr: 4.8314e-05 gnorm: 1.25 [ 3:17:47<21:32:12] +[titan] 2025-10-05 01:52:07,463 - root - INFO - step: 5315 loss: 2.6130 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3093 global_avg_mtp_loss: 2.3037 +[titan] 2025-10-05 01:52:07,463 - root - INFO - lr: 4.8311e-05 gnorm: 1.23 [ 3:17:58<21:31:59] +[titan] 2025-10-05 01:52:18,354 - root - INFO - step: 5320 loss: 2.7768 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4481 +[titan] 2025-10-05 01:52:18,354 - root - INFO - lr: 4.8307e-05 gnorm: 1.31 [ 3:18:09<21:31:46] +[titan] 2025-10-05 01:52:29,236 - root - INFO - step: 5325 loss: 2.7143 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3924 +[titan] 2025-10-05 01:52:29,236 - root - INFO - lr: 4.8304e-05 gnorm: 1.21 [ 3:18:20<21:31:33] +[titan] 2025-10-05 01:52:40,146 - root - INFO - step: 5330 loss: 2.7556 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4290 +[titan] 2025-10-05 01:52:40,146 - root - INFO - lr: 4.8300e-05 gnorm: 1.27 [ 3:18:31<21:31:20] +[titan] 2025-10-05 01:52:51,044 - root - INFO - step: 5335 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4168 +[titan] 2025-10-05 01:52:51,044 - root - INFO - lr: 4.8297e-05 gnorm: 1.26 [ 3:18:42<21:31:07] +[titan] 2025-10-05 01:53:01,911 - root - INFO - step: 5340 loss: 2.7097 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3206 global_avg_mtp_loss: 2.3891 +[titan] 2025-10-05 01:53:01,911 - root - INFO - lr: 4.8294e-05 gnorm: 1.30 [ 3:18:53<21:30:54] +[titan] 2025-10-05 01:53:12,786 - root - INFO - step: 5345 loss: 2.6651 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3501 +[titan] 2025-10-05 01:53:12,787 - root - INFO - lr: 4.8290e-05 gnorm: 1.21 [ 3:19:04<21:30:41] +[titan] 2025-10-05 01:53:21,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:53:23,680 - root - INFO - step: 5350 loss: 2.7279 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3226 global_avg_mtp_loss: 2.4053 +[titan] 2025-10-05 01:53:23,680 - root - INFO - lr: 4.8287e-05 gnorm: 1.24 [ 3:19:15<21:30:28] +[titan] 2025-10-05 01:53:34,600 - root - INFO - step: 5355 loss: 2.6227 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 01:53:34,600 - root - INFO - lr: 4.8283e-05 gnorm: 1.28 [ 3:19:25<21:30:15] +[titan] 2025-10-05 01:53:45,495 - root - INFO - step: 5360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4548 +[titan] 2025-10-05 01:53:45,495 - root - INFO - lr: 4.8280e-05 gnorm: 1.23 [ 3:19:36<21:30:02] +[titan] 2025-10-05 01:53:56,371 - root - INFO - step: 5365 loss: 2.7914 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4555 +[titan] 2025-10-05 01:53:56,372 - root - INFO - lr: 4.8276e-05 gnorm: 1.23 [ 3:19:47<21:29:49] +[titan] 2025-10-05 01:54:07,246 - root - INFO - step: 5370 loss: 2.6816 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3627 +[titan] 2025-10-05 01:54:07,246 - root - INFO - lr: 4.8273e-05 gnorm: 1.23 [ 3:19:58<21:29:36] +[titan] 2025-10-05 01:54:18,130 - root - INFO - step: 5375 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3272 +[titan] 2025-10-05 01:54:18,130 - root - INFO - lr: 4.8270e-05 gnorm: 1.27 [ 3:20:09<21:29:23] +[titan] 2025-10-05 01:54:28,973 - root - INFO - step: 5380 loss: 2.7116 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3899 +[titan] 2025-10-05 01:54:28,973 - root - INFO - lr: 4.8266e-05 gnorm: 1.23 [ 3:20:20<21:29:09] +[titan] 2025-10-05 01:54:39,864 - root - INFO - step: 5385 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3764 +[titan] 2025-10-05 01:54:39,864 - root - INFO - lr: 4.8263e-05 gnorm: 1.24 [ 3:20:31<21:28:56] +[titan] 2025-10-05 01:54:50,734 - root - INFO - step: 5390 loss: 2.7644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4366 +[titan] 2025-10-05 01:54:50,735 - root - INFO - lr: 4.8259e-05 gnorm: 1.25 [ 3:20:42<21:28:43] +[titan] 2025-10-05 01:55:01,593 - root - INFO - step: 5395 loss: 2.7603 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3272 global_avg_mtp_loss: 2.4331 +[titan] 2025-10-05 01:55:01,593 - root - INFO - lr: 4.8256e-05 gnorm: 1.21 [ 3:20:52<21:28:30] +[titan] 2025-10-05 01:55:10,273 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:55:12,472 - root - INFO - step: 5400 loss: 2.7045 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3833 +[titan] 2025-10-05 01:55:12,472 - root - INFO - lr: 4.8252e-05 gnorm: 1.20 [ 3:21:03<21:28:17] +[titan] 2025-10-05 01:55:23,346 - root - INFO - step: 5405 loss: 2.7062 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3213 global_avg_mtp_loss: 2.3849 +[titan] 2025-10-05 01:55:23,346 - root - INFO - lr: 4.8249e-05 gnorm: 1.20 [ 3:21:14<21:28:04] +[titan] 2025-10-05 01:55:34,207 - root - INFO - step: 5410 loss: 2.7345 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4118 +[titan] 2025-10-05 01:55:34,207 - root - INFO - lr: 4.8245e-05 gnorm: 1.26 [ 3:21:25<21:27:51] +[titan] 2025-10-05 01:55:45,114 - root - INFO - step: 5415 loss: 2.6787 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3618 +[titan] 2025-10-05 01:55:45,114 - root - INFO - lr: 4.8242e-05 gnorm: 1.16 [ 3:21:36<21:27:38] +[titan] 2025-10-05 01:55:55,985 - root - INFO - step: 5420 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.2994 +[titan] 2025-10-05 01:55:55,985 - root - INFO - lr: 4.8239e-05 gnorm: 1.23 [ 3:21:47<21:27:25] +[titan] 2025-10-05 01:56:06,858 - root - INFO - step: 5425 loss: 2.6262 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 01:56:06,858 - root - INFO - lr: 4.8235e-05 gnorm: 1.20 [ 3:21:58<21:27:12] +[titan] 2025-10-05 01:56:17,752 - root - INFO - step: 5430 loss: 2.6880 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3706 +[titan] 2025-10-05 01:56:17,752 - root - INFO - lr: 4.8232e-05 gnorm: 1.21 [ 3:22:09<21:26:59] +[titan] 2025-10-05 01:56:28,647 - root - INFO - step: 5435 loss: 2.6104 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 01:56:28,647 - root - INFO - lr: 4.8228e-05 gnorm: 1.24 [ 3:22:19<21:26:46] +[titan] 2025-10-05 01:56:39,549 - root - INFO - step: 5440 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3753 +[titan] 2025-10-05 01:56:39,550 - root - INFO - lr: 4.8225e-05 gnorm: 1.24 [ 3:22:30<21:26:33] +[titan] 2025-10-05 01:56:50,425 - root - INFO - step: 5445 loss: 2.7005 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 01:56:50,426 - root - INFO - lr: 4.8221e-05 gnorm: 1.24 [ 3:22:41<21:26:20] +[titan] 2025-10-05 01:56:59,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:57:01,307 - root - INFO - step: 5450 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3211 global_avg_mtp_loss: 2.3942 +[titan] 2025-10-05 01:57:01,307 - root - INFO - lr: 4.8218e-05 gnorm: 1.22 [ 3:22:52<21:26:07] +[titan] 2025-10-05 01:57:12,168 - root - INFO - step: 5455 loss: 2.7238 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4003 +[titan] 2025-10-05 01:57:12,168 - root - INFO - lr: 4.8214e-05 gnorm: 1.25 [ 3:23:03<21:25:54] +[titan] 2025-10-05 01:57:23,004 - root - INFO - step: 5460 loss: 2.7013 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 01:57:23,004 - root - INFO - lr: 4.8211e-05 gnorm: 1.21 [ 3:23:14<21:25:41] +[titan] 2025-10-05 01:57:33,870 - root - INFO - step: 5465 loss: 2.7566 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:57:33,870 - root - INFO - lr: 4.8207e-05 gnorm: 1.28 [ 3:23:25<21:25:28] +[titan] 2025-10-05 01:57:44,735 - root - INFO - step: 5470 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 01:57:44,736 - root - INFO - lr: 4.8204e-05 gnorm: 3.95 [ 3:23:36<21:25:15] +[titan] 2025-10-05 01:57:55,597 - root - INFO - step: 5475 loss: 2.7332 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4064 +[titan] 2025-10-05 01:57:55,598 - root - INFO - lr: 4.8200e-05 gnorm: 5.60 [ 3:23:46<21:25:02] +[titan] 2025-10-05 01:58:06,457 - root - INFO - step: 5480 loss: 2.6333 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3196 +[titan] 2025-10-05 01:58:06,457 - root - INFO - lr: 4.8197e-05 gnorm: 1.42 [ 3:23:57<21:24:49] +[titan] 2025-10-05 01:58:17,326 - root - INFO - step: 5485 loss: 2.6808 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3195 global_avg_mtp_loss: 2.3613 +[titan] 2025-10-05 01:58:17,327 - root - INFO - lr: 4.8193e-05 gnorm: 1.64 [ 3:24:08<21:24:35] +[titan] 2025-10-05 01:58:28,172 - root - INFO - step: 5490 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3789 +[titan] 2025-10-05 01:58:28,172 - root - INFO - lr: 4.8190e-05 gnorm: 1.44 [ 3:24:19<21:24:22] +[titan] 2025-10-05 01:58:39,061 - root - INFO - step: 5495 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:58:39,061 - root - INFO - lr: 4.8186e-05 gnorm: 1.37 [ 3:24:30<21:24:09] +[titan] 2025-10-05 01:58:47,779 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:58:49,967 - root - INFO - step: 5500 loss: 2.7427 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4165 +[titan] 2025-10-05 01:58:49,967 - root - INFO - lr: 4.8183e-05 gnorm: 1.30 [ 3:24:41<21:23:57] +[titan] 2025-10-05 01:59:00,823 - root - INFO - step: 5505 loss: 2.7373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4120 +[titan] 2025-10-05 01:59:00,823 - root - INFO - lr: 4.8179e-05 gnorm: 1.29 [ 3:24:52<21:23:44] +[titan] 2025-10-05 01:59:11,693 - root - INFO - step: 5510 loss: 2.6666 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3511 +[titan] 2025-10-05 01:59:11,693 - root - INFO - lr: 4.8176e-05 gnorm: 1.30 [ 3:25:03<21:23:31] +[titan] 2025-10-05 01:59:22,587 - root - INFO - step: 5515 loss: 2.7189 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.3938 +[titan] 2025-10-05 01:59:22,588 - root - INFO - lr: 4.8172e-05 gnorm: 6.71 [ 3:25:13<21:23:18] +[titan] 2025-10-05 01:59:33,471 - root - INFO - step: 5520 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3125 global_avg_mtp_loss: 2.3281 +[titan] 2025-10-05 01:59:33,471 - root - INFO - lr: 4.8169e-05 gnorm: 1.27 [ 3:25:24<21:23:05] +[titan] 2025-10-05 01:59:44,386 - root - INFO - step: 5525 loss: 2.6236 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3140 +[titan] 2025-10-05 01:59:44,386 - root - INFO - lr: 4.8165e-05 gnorm: 1.21 [ 3:25:35<21:22:52] +[titan] 2025-10-05 01:59:55,268 - root - INFO - step: 5530 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 01:59:55,268 - root - INFO - lr: 4.8162e-05 gnorm: 1.23 [ 3:25:46<21:22:39] +[titan] 2025-10-05 02:00:06,139 - root - INFO - step: 5535 loss: 2.6010 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3104 global_avg_mtp_loss: 2.2906 +[titan] 2025-10-05 02:00:06,139 - root - INFO - lr: 4.8158e-05 gnorm: 1.28 [ 3:25:57<21:22:26] +[titan] 2025-10-05 02:00:17,012 - root - INFO - step: 5540 loss: 2.6903 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 02:00:17,013 - root - INFO - lr: 4.8155e-05 gnorm: 1.28 [ 3:26:08<21:22:13] +[titan] 2025-10-05 02:00:27,882 - root - INFO - step: 5545 loss: 2.6624 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3482 +[titan] 2025-10-05 02:00:27,883 - root - INFO - lr: 4.8151e-05 gnorm: 1.25 [ 3:26:19<21:22:00] +[titan] 2025-10-05 02:00:36,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:00:38,754 - root - INFO - step: 5550 loss: 2.6437 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3307 +[titan] 2025-10-05 02:00:38,754 - root - INFO - lr: 4.8147e-05 gnorm: 1.23 [ 3:26:30<21:21:47] +[titan] 2025-10-05 02:00:49,688 - root - INFO - step: 5555 loss: 2.6840 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:00:49,689 - root - INFO - lr: 4.8144e-05 gnorm: 1.21 [ 3:26:40<21:21:35] +[titan] 2025-10-05 02:01:00,569 - root - INFO - step: 5560 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3569 +[titan] 2025-10-05 02:01:00,569 - root - INFO - lr: 4.8140e-05 gnorm: 1.21 [ 3:26:51<21:21:22] +[titan] 2025-10-05 02:01:11,488 - root - INFO - step: 5565 loss: 2.6609 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 02:01:11,488 - root - INFO - lr: 4.8137e-05 gnorm: 1.24 [ 3:27:02<21:21:09] +[titan] 2025-10-05 02:01:22,384 - root - INFO - step: 5570 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.3978 +[titan] 2025-10-05 02:01:22,385 - root - INFO - lr: 4.8133e-05 gnorm: 1.26 [ 3:27:13<21:20:56] +[titan] 2025-10-05 02:01:33,286 - root - INFO - step: 5575 loss: 2.6770 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3597 +[titan] 2025-10-05 02:01:33,287 - root - INFO - lr: 4.8130e-05 gnorm: 1.23 [ 3:27:24<21:20:43] +[titan] 2025-10-05 02:01:44,187 - root - INFO - step: 5580 loss: 2.6684 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.3160 global_avg_mtp_loss: 2.3524 +[titan] 2025-10-05 02:01:44,187 - root - INFO - lr: 4.8126e-05 gnorm: 1.22 [ 3:27:35<21:20:31] +[titan] 2025-10-05 02:01:55,071 - root - INFO - step: 5585 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3821 +[titan] 2025-10-05 02:01:55,072 - root - INFO - lr: 4.8123e-05 gnorm: 1.23 [ 3:27:46<21:20:18] +[titan] 2025-10-05 02:02:05,953 - root - INFO - step: 5590 loss: 2.7020 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 02:02:05,954 - root - INFO - lr: 4.8119e-05 gnorm: 1.29 [ 3:27:57<21:20:05] +[titan] 2025-10-05 02:02:16,866 - root - INFO - step: 5595 loss: 2.6621 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3458 +[titan] 2025-10-05 02:02:16,866 - root - INFO - lr: 4.8115e-05 gnorm: 1.25 [ 3:28:08<21:19:52] +[titan] 2025-10-05 02:02:25,553 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:02:27,738 - root - INFO - step: 5600 loss: 2.7026 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3818 +[titan] 2025-10-05 02:02:27,739 - root - INFO - lr: 4.8112e-05 gnorm: 1.26 [ 3:28:19<21:19:39] +[titan] 2025-10-05 02:02:38,604 - root - INFO - step: 5605 loss: 2.6192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:02:38,604 - root - INFO - lr: 4.8108e-05 gnorm: 1.25 [ 3:28:29<21:19:26] +[titan] 2025-10-05 02:02:49,527 - root - INFO - step: 5610 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3444 +[titan] 2025-10-05 02:02:49,527 - root - INFO - lr: 4.8105e-05 gnorm: 1.32 [ 3:28:40<21:19:14] +[titan] 2025-10-05 02:03:00,407 - root - INFO - step: 5615 loss: 2.6727 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3552 +[titan] 2025-10-05 02:03:00,408 - root - INFO - lr: 4.8101e-05 gnorm: 1.19 [ 3:28:51<21:19:01] +[titan] 2025-10-05 02:03:11,293 - root - INFO - step: 5620 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3098 +[titan] 2025-10-05 02:03:11,293 - root - INFO - lr: 4.8097e-05 gnorm: 1.24 [ 3:29:02<21:18:48] +[titan] 2025-10-05 02:03:22,216 - root - INFO - step: 5625 loss: 2.6235 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 02:03:22,216 - root - INFO - lr: 4.8094e-05 gnorm: 1.21 [ 3:29:13<21:18:35] +[titan] 2025-10-05 02:03:33,165 - root - INFO - step: 5630 loss: 2.7089 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3881 +[titan] 2025-10-05 02:03:33,165 - root - INFO - lr: 4.8090e-05 gnorm: 1.27 [ 3:29:24<21:18:23] +[titan] 2025-10-05 02:03:37,690 - root - INFO - Dumping profiler traces at step 5632 +[titan] 2025-10-05 02:03:37,727 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:03:44,274 - root - INFO - step: 5635 loss: 2.6796 memory: 118.84GiB(85.28%) tps: 29,498 tflops: 409.23 mfu: 41.38% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3615 +[titan] 2025-10-05 02:03:44,274 - root - INFO - lr: 4.8087e-05 gnorm: 1.25 [ 3:29:35<21:18:12] +[titan] 2025-10-05 02:03:55,158 - root - INFO - step: 5640 loss: 2.6061 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3075 global_avg_mtp_loss: 2.2987 +[titan] 2025-10-05 02:03:55,158 - root - INFO - lr: 4.8083e-05 gnorm: 1.25 [ 3:29:46<21:17:59] +[titan] 2025-10-05 02:04:06,053 - root - INFO - step: 5645 loss: 2.7125 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3908 +[titan] 2025-10-05 02:04:06,053 - root - INFO - lr: 4.8079e-05 gnorm: 1.34 [ 3:29:57<21:17:46] +[titan] 2025-10-05 02:04:14,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:04:16,937 - root - INFO - step: 5650 loss: 2.5977 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:04:16,937 - root - INFO - lr: 4.8076e-05 gnorm: 1.27 [ 3:30:08<21:17:33] +[titan] 2025-10-05 02:04:27,853 - root - INFO - step: 5655 loss: 2.6416 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3299 +[titan] 2025-10-05 02:04:27,854 - root - INFO - lr: 4.8072e-05 gnorm: 1.30 [ 3:30:19<21:17:20] +[titan] 2025-10-05 02:04:38,772 - root - INFO - step: 5660 loss: 2.7230 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 02:04:38,772 - root - INFO - lr: 4.8069e-05 gnorm: 1.24 [ 3:30:30<21:17:08] +[titan] 2025-10-05 02:04:49,685 - root - INFO - step: 5665 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3829 +[titan] 2025-10-05 02:04:49,685 - root - INFO - lr: 4.8065e-05 gnorm: 1.26 [ 3:30:40<21:16:55] +[titan] 2025-10-05 02:05:00,577 - root - INFO - step: 5670 loss: 2.6274 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3178 +[titan] 2025-10-05 02:05:00,577 - root - INFO - lr: 4.8061e-05 gnorm: 1.25 [ 3:30:51<21:16:42] +[titan] 2025-10-05 02:05:11,454 - root - INFO - step: 5675 loss: 2.6289 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:11,454 - root - INFO - lr: 4.8058e-05 gnorm: 1.22 [ 3:31:02<21:16:30] +[titan] 2025-10-05 02:05:22,325 - root - INFO - step: 5680 loss: 2.7071 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3216 global_avg_mtp_loss: 2.3854 +[titan] 2025-10-05 02:05:22,325 - root - INFO - lr: 4.8054e-05 gnorm: 1.24 [ 3:31:13<21:16:17] +[titan] 2025-10-05 02:05:33,190 - root - INFO - step: 5685 loss: 2.6647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3151 global_avg_mtp_loss: 2.3496 +[titan] 2025-10-05 02:05:33,190 - root - INFO - lr: 4.8051e-05 gnorm: 1.25 [ 3:31:24<21:16:04] +[titan] 2025-10-05 02:05:44,079 - root - INFO - step: 5690 loss: 2.6318 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3204 +[titan] 2025-10-05 02:05:44,079 - root - INFO - lr: 4.8047e-05 gnorm: 1.20 [ 3:31:35<21:15:51] +[titan] 2025-10-05 02:05:54,989 - root - INFO - step: 5695 loss: 2.6284 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:54,989 - root - INFO - lr: 4.8043e-05 gnorm: 1.18 [ 3:31:46<21:15:38] +[titan] 2025-10-05 02:06:03,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:06:05,856 - root - INFO - step: 5700 loss: 2.6425 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:06:05,856 - root - INFO - lr: 4.8040e-05 gnorm: 1.17 [ 3:31:57<21:15:26] +[titan] 2025-10-05 02:06:16,739 - root - INFO - step: 5705 loss: 2.6825 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3176 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:06:16,740 - root - INFO - lr: 4.8036e-05 gnorm: 1.21 [ 3:32:08<21:15:13] +[titan] 2025-10-05 02:06:27,613 - root - INFO - step: 5710 loss: 2.7487 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 02:06:27,613 - root - INFO - lr: 4.8032e-05 gnorm: 1.24 [ 3:32:18<21:15:00] +[titan] 2025-10-05 02:06:38,482 - root - INFO - step: 5715 loss: 2.6692 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3542 +[titan] 2025-10-05 02:06:38,482 - root - INFO - lr: 4.8029e-05 gnorm: 1.27 [ 3:32:29<21:14:47] +[titan] 2025-10-05 02:06:49,408 - root - INFO - step: 5720 loss: 2.6745 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3170 global_avg_mtp_loss: 2.3576 +[titan] 2025-10-05 02:06:49,408 - root - INFO - lr: 4.8025e-05 gnorm: 1.21 [ 3:32:40<21:14:34] +[titan] 2025-10-05 02:07:00,305 - root - INFO - step: 5725 loss: 2.6145 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3063 +[titan] 2025-10-05 02:07:00,305 - root - INFO - lr: 4.8021e-05 gnorm: 1.25 [ 3:32:51<21:14:22] +[titan] 2025-10-05 02:07:11,183 - root - INFO - step: 5730 loss: 2.6939 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3752 +[titan] 2025-10-05 02:07:11,183 - root - INFO - lr: 4.8018e-05 gnorm: 1.27 [ 3:33:02<21:14:09] +[titan] 2025-10-05 02:07:22,045 - root - INFO - step: 5735 loss: 2.6083 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3002 +[titan] 2025-10-05 02:07:22,046 - root - INFO - lr: 4.8014e-05 gnorm: 1.28 [ 3:33:13<21:13:56] +[titan] 2025-10-05 02:07:32,920 - root - INFO - step: 5740 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3073 global_avg_mtp_loss: 2.2892 +[titan] 2025-10-05 02:07:32,921 - root - INFO - lr: 4.8010e-05 gnorm: 1.17 [ 3:33:24<21:13:43] +[titan] 2025-10-05 02:07:43,786 - root - INFO - step: 5745 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:07:43,786 - root - INFO - lr: 4.8007e-05 gnorm: 1.24 [ 3:33:35<21:13:30] +[titan] 2025-10-05 02:07:52,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:07:54,774 - root - INFO - step: 5750 loss: 2.6142 memory: 118.84GiB(85.28%) tps: 29,822 tflops: 413.73 mfu: 41.83% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 02:07:54,774 - root - INFO - lr: 4.8003e-05 gnorm: 1.24 [ 3:33:46<21:13:18] +[titan] 2025-10-05 02:08:05,686 - root - INFO - step: 5755 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3384 +[titan] 2025-10-05 02:08:05,687 - root - INFO - lr: 4.7999e-05 gnorm: 1.20 [ 3:33:56<21:13:06] +[titan] 2025-10-05 02:08:16,606 - root - INFO - step: 5760 loss: 2.7255 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4025 +[titan] 2025-10-05 02:08:16,606 - root - INFO - lr: 4.7996e-05 gnorm: 1.24 [ 3:34:07<21:12:53] +[titan] 2025-10-05 02:08:27,489 - root - INFO - step: 5765 loss: 2.6698 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3144 global_avg_mtp_loss: 2.3553 +[titan] 2025-10-05 02:08:27,489 - root - INFO - lr: 4.7992e-05 gnorm: 1.25 [ 3:34:18<21:12:40] +[titan] 2025-10-05 02:08:38,371 - root - INFO - step: 5770 loss: 2.7107 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 02:08:38,371 - root - INFO - lr: 4.7988e-05 gnorm: 1.26 [ 3:34:29<21:12:28] +[titan] 2025-10-05 02:08:49,290 - root - INFO - step: 5775 loss: 2.7046 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3203 global_avg_mtp_loss: 2.3843 +[titan] 2025-10-05 02:08:49,291 - root - INFO - lr: 4.7985e-05 gnorm: 1.28 [ 3:34:40<21:12:15] +[titan] 2025-10-05 02:09:00,170 - root - INFO - step: 5780 loss: 2.7717 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4437 +[titan] 2025-10-05 02:09:00,170 - root - INFO - lr: 4.7981e-05 gnorm: 1.66 [ 3:34:51<21:12:02] +[titan] 2025-10-05 02:09:11,065 - root - INFO - step: 5785 loss: 2.6598 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3451 +[titan] 2025-10-05 02:09:11,066 - root - INFO - lr: 4.7977e-05 gnorm: 1.28 [ 3:35:02<21:11:50] +[titan] 2025-10-05 02:09:21,936 - root - INFO - step: 5790 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3077 +[titan] 2025-10-05 02:09:21,936 - root - INFO - lr: 4.7973e-05 gnorm: 1.24 [ 3:35:13<21:11:37] +[titan] 2025-10-05 02:09:32,809 - root - INFO - step: 5795 loss: 2.6803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3637 +[titan] 2025-10-05 02:09:32,809 - root - INFO - lr: 4.7970e-05 gnorm: 1.27 [ 3:35:24<21:11:24] +[titan] 2025-10-05 02:09:41,491 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:09:43,680 - root - INFO - step: 5800 loss: 2.6313 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3124 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:09:43,680 - root - INFO - lr: 4.7966e-05 gnorm: 1.25 [ 3:35:34<21:11:11] +[titan] 2025-10-05 02:09:54,628 - root - INFO - step: 5805 loss: 2.6182 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.3088 +[titan] 2025-10-05 02:09:54,629 - root - INFO - lr: 4.7962e-05 gnorm: 1.26 [ 3:35:45<21:10:59] +[titan] 2025-10-05 02:10:05,480 - root - INFO - step: 5810 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 02:10:05,481 - root - INFO - lr: 4.7959e-05 gnorm: 1.28 [ 3:35:56<21:10:46] +[titan] 2025-10-05 02:10:16,374 - root - INFO - step: 5815 loss: 2.6620 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3479 +[titan] 2025-10-05 02:10:16,374 - root - INFO - lr: 4.7955e-05 gnorm: 1.22 [ 3:36:07<21:10:33] +[titan] 2025-10-05 02:10:27,283 - root - INFO - step: 5820 loss: 2.6968 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3191 global_avg_mtp_loss: 2.3777 +[titan] 2025-10-05 02:10:27,283 - root - INFO - lr: 4.7951e-05 gnorm: 1.21 [ 3:36:18<21:10:21] +[titan] 2025-10-05 02:10:38,152 - root - INFO - step: 5825 loss: 2.6399 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3263 +[titan] 2025-10-05 02:10:38,152 - root - INFO - lr: 4.7947e-05 gnorm: 1.26 [ 3:36:29<21:10:08] +[titan] 2025-10-05 02:10:49,018 - root - INFO - step: 5830 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3158 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:10:49,018 - root - INFO - lr: 4.7944e-05 gnorm: 1.27 [ 3:36:40<21:09:55] +[titan] 2025-10-05 02:10:59,943 - root - INFO - step: 5835 loss: 2.6687 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 02:10:59,943 - root - INFO - lr: 4.7940e-05 gnorm: 1.27 [ 3:36:51<21:09:43] +[titan] 2025-10-05 02:11:10,804 - root - INFO - step: 5840 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3646 +[titan] 2025-10-05 02:11:10,804 - root - INFO - lr: 4.7936e-05 gnorm: 1.23 [ 3:37:02<21:09:30] +[titan] 2025-10-05 02:11:21,663 - root - INFO - step: 5845 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:11:21,663 - root - INFO - lr: 4.7933e-05 gnorm: 1.22 [ 3:37:12<21:09:17] +[titan] 2025-10-05 02:11:30,368 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:11:32,557 - root - INFO - step: 5850 loss: 2.5946 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2879 +[titan] 2025-10-05 02:11:32,557 - root - INFO - lr: 4.7929e-05 gnorm: 1.24 [ 3:37:23<21:09:04] +[titan] 2025-10-05 02:11:43,442 - root - INFO - step: 5855 loss: 2.6553 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3404 +[titan] 2025-10-05 02:11:43,442 - root - INFO - lr: 4.7925e-05 gnorm: 1.31 [ 3:37:34<21:08:52] +[titan] 2025-10-05 02:11:54,344 - root - INFO - step: 5860 loss: 2.6942 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3769 +[titan] 2025-10-05 02:11:54,344 - root - INFO - lr: 4.7921e-05 gnorm: 1.23 [ 3:37:45<21:08:39] +[titan] 2025-10-05 02:12:05,223 - root - INFO - step: 5865 loss: 2.5612 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3011 global_avg_mtp_loss: 2.2601 +[titan] 2025-10-05 02:12:05,223 - root - INFO - lr: 4.7918e-05 gnorm: 1.19 [ 3:37:56<21:08:26] +[titan] 2025-10-05 02:12:16,102 - root - INFO - step: 5870 loss: 2.6730 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3578 +[titan] 2025-10-05 02:12:16,102 - root - INFO - lr: 4.7914e-05 gnorm: 1.22 [ 3:38:07<21:08:14] +[titan] 2025-10-05 02:12:26,998 - root - INFO - step: 5875 loss: 2.7092 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3873 +[titan] 2025-10-05 02:12:26,998 - root - INFO - lr: 4.7910e-05 gnorm: 1.27 [ 3:38:18<21:08:01] +[titan] 2025-10-05 02:12:37,886 - root - INFO - step: 5880 loss: 2.6639 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.3140 global_avg_mtp_loss: 2.3499 +[titan] 2025-10-05 02:12:37,886 - root - INFO - lr: 4.7906e-05 gnorm: 1.23 [ 3:38:29<21:07:48] +[titan] 2025-10-05 02:12:48,782 - root - INFO - step: 5885 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3282 +[titan] 2025-10-05 02:12:48,782 - root - INFO - lr: 4.7903e-05 gnorm: 1.23 [ 3:38:40<21:07:36] +[titan] 2025-10-05 02:12:59,686 - root - INFO - step: 5890 loss: 2.6332 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3225 +[titan] 2025-10-05 02:12:59,686 - root - INFO - lr: 4.7899e-05 gnorm: 1.23 [ 3:38:50<21:07:23] +[titan] 2025-10-05 02:13:10,552 - root - INFO - step: 5895 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3779 +[titan] 2025-10-05 02:13:10,552 - root - INFO - lr: 4.7895e-05 gnorm: 1.20 [ 3:39:01<21:07:10] +[titan] 2025-10-05 02:13:19,229 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:13:21,417 - root - INFO - step: 5900 loss: 2.6773 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3598 +[titan] 2025-10-05 02:13:21,418 - root - INFO - lr: 4.7891e-05 gnorm: 1.21 [ 3:39:12<21:06:58] +[titan] 2025-10-05 02:13:32,300 - root - INFO - step: 5905 loss: 2.6413 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3290 +[titan] 2025-10-05 02:13:32,300 - root - INFO - lr: 4.7888e-05 gnorm: 1.21 [ 3:39:23<21:06:45] +[titan] 2025-10-05 02:13:43,183 - root - INFO - step: 5910 loss: 2.7061 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3853 +[titan] 2025-10-05 02:13:43,184 - root - INFO - lr: 4.7884e-05 gnorm: 1.23 [ 3:39:34<21:06:32] +[titan] 2025-10-05 02:13:54,153 - root - INFO - step: 5915 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 02:13:54,153 - root - INFO - lr: 4.7880e-05 gnorm: 1.20 [ 3:39:45<21:06:20] +[titan] 2025-10-05 02:14:05,035 - root - INFO - step: 5920 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3748 +[titan] 2025-10-05 02:14:05,035 - root - INFO - lr: 4.7876e-05 gnorm: 1.23 [ 3:39:56<21:06:07] +[titan] 2025-10-05 02:14:15,930 - root - INFO - step: 5925 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 02:14:15,930 - root - INFO - lr: 4.7872e-05 gnorm: 1.31 [ 3:40:07<21:05:55] +[titan] 2025-10-05 02:14:26,810 - root - INFO - step: 5930 loss: 2.5791 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 02:14:26,810 - root - INFO - lr: 4.7869e-05 gnorm: 1.28 [ 3:40:18<21:05:42] +[titan] 2025-10-05 02:14:37,679 - root - INFO - step: 5935 loss: 2.8206 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3526 global_avg_mtp_loss: 2.4680 +[titan] 2025-10-05 02:14:37,679 - root - INFO - lr: 4.7865e-05 gnorm: 1.21 [ 3:40:28<21:05:29] +[titan] 2025-10-05 02:14:48,570 - root - INFO - step: 5940 loss: 2.6562 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3137 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:14:48,571 - root - INFO - lr: 4.7861e-05 gnorm: 1.27 [ 3:40:39<21:05:17] +[titan] 2025-10-05 02:14:59,517 - root - INFO - step: 5945 loss: 2.6955 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3188 global_avg_mtp_loss: 2.3767 +[titan] 2025-10-05 02:14:59,517 - root - INFO - lr: 4.7857e-05 gnorm: 1.24 [ 3:40:50<21:05:05] +[titan] 2025-10-05 02:15:08,203 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:15:10,403 - root - INFO - step: 5950 loss: 2.6441 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3332 +[titan] 2025-10-05 02:15:10,403 - root - INFO - lr: 4.7853e-05 gnorm: 1.24 [ 3:41:01<21:04:52] +[titan] 2025-10-05 02:15:21,261 - root - INFO - step: 5955 loss: 2.6351 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3224 +[titan] 2025-10-05 02:15:21,261 - root - INFO - lr: 4.7850e-05 gnorm: 1.27 [ 3:41:12<21:04:39] +[titan] 2025-10-05 02:15:32,145 - root - INFO - step: 5960 loss: 2.5704 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2660 +[titan] 2025-10-05 02:15:32,146 - root - INFO - lr: 4.7846e-05 gnorm: 1.24 [ 3:41:23<21:04:27] +[titan] 2025-10-05 02:15:43,038 - root - INFO - step: 5965 loss: 2.6451 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3134 global_avg_mtp_loss: 2.3317 +[titan] 2025-10-05 02:15:43,038 - root - INFO - lr: 4.7842e-05 gnorm: 1.24 [ 3:41:34<21:04:14] +[titan] 2025-10-05 02:15:53,932 - root - INFO - step: 5970 loss: 2.6446 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:15:53,932 - root - INFO - lr: 4.7838e-05 gnorm: 1.25 [ 3:41:45<21:04:01] +[titan] 2025-10-05 02:16:04,943 - root - INFO - step: 5975 loss: 2.6984 memory: 118.84GiB(85.28%) tps: 29,760 tflops: 412.88 mfu: 41.75% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 02:16:04,943 - root - INFO - lr: 4.7834e-05 gnorm: 1.22 [ 3:41:56<21:03:49] +[titan] 2025-10-05 02:16:15,864 - root - INFO - step: 5980 loss: 2.6883 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3697 +[titan] 2025-10-05 02:16:15,864 - root - INFO - lr: 4.7831e-05 gnorm: 1.23 [ 3:42:07<21:03:37] +[titan] 2025-10-05 02:16:26,743 - root - INFO - step: 5985 loss: 2.6999 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3205 global_avg_mtp_loss: 2.3795 +[titan] 2025-10-05 02:16:26,743 - root - INFO - lr: 4.7827e-05 gnorm: 1.25 [ 3:42:18<21:03:24] +[titan] 2025-10-05 02:16:37,616 - root - INFO - step: 5990 loss: 2.6514 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3368 +[titan] 2025-10-05 02:16:37,616 - root - INFO - lr: 4.7823e-05 gnorm: 1.24 [ 3:42:28<21:03:12] +[titan] 2025-10-05 02:16:48,504 - root - INFO - step: 5995 loss: 2.6633 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3143 global_avg_mtp_loss: 2.3490 +[titan] 2025-10-05 02:16:48,504 - root - INFO - lr: 4.7819e-05 gnorm: 1.24 [ 3:42:39<21:02:59] +[titan] 2025-10-05 02:16:57,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:16:59,424 - root - INFO - step: 6000 loss: 2.7331 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4096 +[titan] 2025-10-05 02:16:59,424 - root - INFO - lr: 4.7815e-05 gnorm: 1.20 [ 3:42:50<21:02:47] +[titan] 2025-10-05 02:17:10,295 - root - INFO - step: 6005 loss: 2.6202 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3105 +[titan] 2025-10-05 02:17:10,295 - root - INFO - lr: 4.7811e-05 gnorm: 1.18 [ 3:43:01<21:02:34] +[titan] 2025-10-05 02:17:21,201 - root - INFO - step: 6010 loss: 2.5634 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2608 +[titan] 2025-10-05 02:17:21,201 - root - INFO - lr: 4.7808e-05 gnorm: 1.22 [ 3:43:12<21:02:22] +[titan] 2025-10-05 02:17:32,081 - root - INFO - step: 6015 loss: 2.6412 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3292 +[titan] 2025-10-05 02:17:32,082 - root - INFO - lr: 4.7804e-05 gnorm: 1.19 [ 3:43:23<21:02:09] +[titan] 2025-10-05 02:17:42,964 - root - INFO - step: 6020 loss: 2.7137 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3909 +[titan] 2025-10-05 02:17:42,964 - root - INFO - lr: 4.7800e-05 gnorm: 1.23 [ 3:43:34<21:01:56] +[titan] 2025-10-05 02:17:53,873 - root - INFO - step: 6025 loss: 2.6409 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3232 +[titan] 2025-10-05 02:17:53,873 - root - INFO - lr: 4.7796e-05 gnorm: 1.20 [ 3:43:45<21:01:44] +[titan] 2025-10-05 02:18:04,793 - root - INFO - step: 6030 loss: 2.6673 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3165 global_avg_mtp_loss: 2.3508 +[titan] 2025-10-05 02:18:04,793 - root - INFO - lr: 4.7792e-05 gnorm: 1.27 [ 3:43:56<21:01:32] +[titan] 2025-10-05 02:18:15,648 - root - INFO - step: 6035 loss: 2.5627 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2603 +[titan] 2025-10-05 02:18:15,648 - root - INFO - lr: 4.7788e-05 gnorm: 1.20 [ 3:44:06<21:01:19] +[titan] 2025-10-05 02:18:26,520 - root - INFO - step: 6040 loss: 2.6300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:18:26,520 - root - INFO - lr: 4.7784e-05 gnorm: 1.19 [ 3:44:17<21:01:06] +[titan] 2025-10-05 02:18:37,421 - root - INFO - step: 6045 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2925 +[titan] 2025-10-05 02:18:37,421 - root - INFO - lr: 4.7781e-05 gnorm: 1.23 [ 3:44:28<21:00:54] +[titan] 2025-10-05 02:18:46,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:18:48,292 - root - INFO - step: 6050 loss: 2.6234 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3122 +[titan] 2025-10-05 02:18:48,292 - root - INFO - lr: 4.7777e-05 gnorm: 1.22 [ 3:44:39<21:00:41] +[titan] 2025-10-05 02:18:59,214 - root - INFO - step: 6055 loss: 2.7909 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3399 global_avg_mtp_loss: 2.4510 +[titan] 2025-10-05 02:18:59,214 - root - INFO - lr: 4.7773e-05 gnorm: 1.28 [ 3:44:50<21:00:29] +[titan] 2025-10-05 02:19:10,081 - root - INFO - step: 6060 loss: 2.7169 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 02:19:10,081 - root - INFO - lr: 4.7769e-05 gnorm: 1.19 [ 3:45:01<21:00:16] +[titan] 2025-10-05 02:19:20,960 - root - INFO - step: 6065 loss: 2.5899 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3057 global_avg_mtp_loss: 2.2843 +[titan] 2025-10-05 02:19:20,960 - root - INFO - lr: 4.7765e-05 gnorm: 1.20 [ 3:45:12<21:00:03] +[titan] 2025-10-05 02:19:31,815 - root - INFO - step: 6070 loss: 2.5974 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2900 +[titan] 2025-10-05 02:19:31,815 - root - INFO - lr: 4.7761e-05 gnorm: 1.19 [ 3:45:23<20:59:51] +[titan] 2025-10-05 02:19:42,704 - root - INFO - step: 6075 loss: 2.5388 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2387 +[titan] 2025-10-05 02:19:42,705 - root - INFO - lr: 4.7757e-05 gnorm: 1.24 [ 3:45:33<20:59:38] +[titan] 2025-10-05 02:19:53,571 - root - INFO - step: 6080 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2553 +[titan] 2025-10-05 02:19:53,571 - root - INFO - lr: 4.7753e-05 gnorm: 1.24 [ 3:45:44<20:59:25] +[titan] 2025-10-05 02:20:04,484 - root - INFO - step: 6085 loss: 2.6574 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 02:20:04,484 - root - INFO - lr: 4.7750e-05 gnorm: 1.25 [ 3:45:55<20:59:13] +[titan] 2025-10-05 02:20:15,352 - root - INFO - step: 6090 loss: 2.6004 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2933 +[titan] 2025-10-05 02:20:15,352 - root - INFO - lr: 4.7746e-05 gnorm: 1.29 [ 3:46:06<20:59:00] +[titan] 2025-10-05 02:20:26,230 - root - INFO - step: 6095 loss: 2.6515 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:20:26,230 - root - INFO - lr: 4.7742e-05 gnorm: 1.25 [ 3:46:17<20:58:48] +[titan] 2025-10-05 02:20:34,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:20:37,079 - root - INFO - step: 6100 loss: 2.6900 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3703 +[titan] 2025-10-05 02:20:37,079 - root - INFO - lr: 4.7738e-05 gnorm: 1.19 [ 3:46:28<20:58:35] +[titan] 2025-10-05 02:20:47,995 - root - INFO - step: 6105 loss: 2.7058 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3851 +[titan] 2025-10-05 02:20:47,995 - root - INFO - lr: 4.7734e-05 gnorm: 1.26 [ 3:46:39<20:58:23] +[titan] 2025-10-05 02:20:58,928 - root - INFO - step: 6110 loss: 2.6693 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3537 +[titan] 2025-10-05 02:20:58,928 - root - INFO - lr: 4.7730e-05 gnorm: 1.27 [ 3:46:50<20:58:10] +[titan] 2025-10-05 02:21:09,804 - root - INFO - step: 6115 loss: 2.5456 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:21:09,804 - root - INFO - lr: 4.7726e-05 gnorm: 1.13 [ 3:47:01<20:57:58] +[titan] 2025-10-05 02:21:20,686 - root - INFO - step: 6120 loss: 2.6377 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3244 +[titan] 2025-10-05 02:21:20,686 - root - INFO - lr: 4.7722e-05 gnorm: 1.17 [ 3:47:11<20:57:45] +[titan] 2025-10-05 02:21:31,544 - root - INFO - step: 6125 loss: 2.5803 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:21:31,544 - root - INFO - lr: 4.7718e-05 gnorm: 1.19 [ 3:47:22<20:57:33] +[titan] 2025-10-05 02:21:42,406 - root - INFO - step: 6130 loss: 2.6986 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3790 +[titan] 2025-10-05 02:21:42,406 - root - INFO - lr: 4.7714e-05 gnorm: 1.30 [ 3:47:33<20:57:20] +[titan] 2025-10-05 02:21:53,244 - root - INFO - step: 6135 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:21:53,244 - root - INFO - lr: 4.7710e-05 gnorm: 1.24 [ 3:47:44<20:57:07] +[titan] 2025-10-05 02:22:04,175 - root - INFO - step: 6140 loss: 2.5814 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3040 global_avg_mtp_loss: 2.2775 +[titan] 2025-10-05 02:22:04,175 - root - INFO - lr: 4.7707e-05 gnorm: 1.23 [ 3:47:55<20:56:55] +[titan] 2025-10-05 02:22:13,108 - root - INFO - Dumping profiler traces at step 6144 +[titan] 2025-10-05 02:22:13,143 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 02:22:15,343 - root - INFO - step: 6145 loss: 2.6735 memory: 118.84GiB(85.28%) tps: 29,342 tflops: 407.07 mfu: 41.16% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3580 +[titan] 2025-10-05 02:22:15,343 - root - INFO - lr: 4.7703e-05 gnorm: 1.26 [ 3:48:06<20:56:44] +[titan] 2025-10-05 02:22:24,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:22:26,217 - root - INFO - step: 6150 loss: 2.6490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3135 global_avg_mtp_loss: 2.3355 +[titan] 2025-10-05 02:22:26,217 - root - INFO - lr: 4.7699e-05 gnorm: 1.24 [ 3:48:17<20:56:31] +[titan] 2025-10-05 02:22:37,096 - root - INFO - step: 6155 loss: 2.6463 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3340 +[titan] 2025-10-05 02:22:37,096 - root - INFO - lr: 4.7695e-05 gnorm: 1.18 [ 3:48:28<20:56:19] +[titan] 2025-10-05 02:22:47,962 - root - INFO - step: 6160 loss: 2.6975 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:22:47,962 - root - INFO - lr: 4.7691e-05 gnorm: 1.25 [ 3:48:39<20:56:06] +[titan] 2025-10-05 02:22:58,842 - root - INFO - step: 6165 loss: 2.6719 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 02:22:58,842 - root - INFO - lr: 4.7687e-05 gnorm: 1.27 [ 3:48:50<20:55:54] +[titan] 2025-10-05 02:23:09,781 - root - INFO - step: 6170 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3183 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:23:09,781 - root - INFO - lr: 4.7683e-05 gnorm: 1.18 [ 3:49:01<20:55:41] +[titan] 2025-10-05 02:23:20,657 - root - INFO - step: 6175 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2762 +[titan] 2025-10-05 02:23:20,657 - root - INFO - lr: 4.7679e-05 gnorm: 1.25 [ 3:49:11<20:55:29] +[titan] 2025-10-05 02:23:31,536 - root - INFO - step: 6180 loss: 2.6338 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3119 global_avg_mtp_loss: 2.3219 +[titan] 2025-10-05 02:23:31,536 - root - INFO - lr: 4.7675e-05 gnorm: 1.21 [ 3:49:22<20:55:16] +[titan] 2025-10-05 02:23:42,416 - root - INFO - step: 6185 loss: 2.6751 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3584 +[titan] 2025-10-05 02:23:42,416 - root - INFO - lr: 4.7671e-05 gnorm: 1.23 [ 3:49:33<20:55:04] +[titan] 2025-10-05 02:23:53,282 - root - INFO - step: 6190 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:23:53,282 - root - INFO - lr: 4.7667e-05 gnorm: 1.94 [ 3:49:44<20:54:51] +[titan] 2025-10-05 02:24:04,176 - root - INFO - step: 6195 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.3001 +[titan] 2025-10-05 02:24:04,177 - root - INFO - lr: 4.7663e-05 gnorm: 1.30 [ 3:49:55<20:54:39] +[titan] 2025-10-05 02:24:12,861 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:24:15,046 - root - INFO - step: 6200 loss: 2.6013 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2936 +[titan] 2025-10-05 02:24:15,047 - root - INFO - lr: 4.7659e-05 gnorm: 1.22 [ 3:50:06<20:54:26] +[titan] 2025-10-05 02:24:25,976 - root - INFO - step: 6205 loss: 2.6406 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:24:25,976 - root - INFO - lr: 4.7655e-05 gnorm: 1.21 [ 3:50:17<20:54:14] +[titan] 2025-10-05 02:24:36,842 - root - INFO - step: 6210 loss: 2.5418 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 02:24:36,842 - root - INFO - lr: 4.7651e-05 gnorm: 1.17 [ 3:50:28<20:54:01] +[titan] 2025-10-05 02:24:47,725 - root - INFO - step: 6215 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2744 +[titan] 2025-10-05 02:24:47,725 - root - INFO - lr: 4.7647e-05 gnorm: 1.20 [ 3:50:38<20:53:49] +[titan] 2025-10-05 02:24:58,595 - root - INFO - step: 6220 loss: 2.6116 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 02:24:58,595 - root - INFO - lr: 4.7643e-05 gnorm: 1.26 [ 3:50:49<20:53:36] +[titan] 2025-10-05 02:25:09,462 - root - INFO - step: 6225 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3158 +[titan] 2025-10-05 02:25:09,462 - root - INFO - lr: 4.7639e-05 gnorm: 1.26 [ 3:51:00<20:53:24] +[titan] 2025-10-05 02:25:20,338 - root - INFO - step: 6230 loss: 2.6316 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3207 +[titan] 2025-10-05 02:25:20,338 - root - INFO - lr: 4.7635e-05 gnorm: 1.26 [ 3:51:11<20:53:11] +[titan] 2025-10-05 02:25:31,243 - root - INFO - step: 6235 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:25:31,243 - root - INFO - lr: 4.7631e-05 gnorm: 1.24 [ 3:51:22<20:52:59] +[titan] 2025-10-05 02:25:42,123 - root - INFO - step: 6240 loss: 2.6737 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3161 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:25:42,123 - root - INFO - lr: 4.7627e-05 gnorm: 1.21 [ 3:51:33<20:52:46] +[titan] 2025-10-05 02:25:53,008 - root - INFO - step: 6245 loss: 2.6264 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3138 +[titan] 2025-10-05 02:25:53,008 - root - INFO - lr: 4.7623e-05 gnorm: 1.21 [ 3:51:44<20:52:34] +[titan] 2025-10-05 02:26:01,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:26:03,944 - root - INFO - step: 6250 loss: 2.6166 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 02:26:03,944 - root - INFO - lr: 4.7619e-05 gnorm: 1.24 [ 3:51:55<20:52:21] +[titan] 2025-10-05 02:26:14,837 - root - INFO - step: 6255 loss: 2.5876 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2818 +[titan] 2025-10-05 02:26:14,837 - root - INFO - lr: 4.7615e-05 gnorm: 1.18 [ 3:52:06<20:52:09] +[titan] 2025-10-05 02:26:25,726 - root - INFO - step: 6260 loss: 2.7070 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3866 +[titan] 2025-10-05 02:26:25,726 - root - INFO - lr: 4.7611e-05 gnorm: 1.20 [ 3:52:16<20:51:57] +[titan] 2025-10-05 02:26:36,613 - root - INFO - step: 6265 loss: 2.6830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3168 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:26:36,613 - root - INFO - lr: 4.7607e-05 gnorm: 1.22 [ 3:52:27<20:51:44] +[titan] 2025-10-05 02:26:47,499 - root - INFO - step: 6270 loss: 2.4995 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2939 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 02:26:47,499 - root - INFO - lr: 4.7603e-05 gnorm: 1.22 [ 3:52:38<20:51:32] +[titan] 2025-10-05 02:26:58,361 - root - INFO - step: 6275 loss: 2.5337 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2345 +[titan] 2025-10-05 02:26:58,361 - root - INFO - lr: 4.7599e-05 gnorm: 1.19 [ 3:52:49<20:51:19] +[titan] 2025-10-05 02:27:09,255 - root - INFO - step: 6280 loss: 2.5465 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 02:27:09,255 - root - INFO - lr: 4.7595e-05 gnorm: 1.18 [ 3:53:00<20:51:07] +[titan] 2025-10-05 02:27:20,123 - root - INFO - step: 6285 loss: 2.6725 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3560 +[titan] 2025-10-05 02:27:20,123 - root - INFO - lr: 4.7591e-05 gnorm: 1.25 [ 3:53:11<20:50:54] +[titan] 2025-10-05 02:27:30,985 - root - INFO - step: 6290 loss: 2.6086 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.2999 +[titan] 2025-10-05 02:27:30,985 - root - INFO - lr: 4.7587e-05 gnorm: 1.20 [ 3:53:22<20:50:42] +[titan] 2025-10-05 02:27:41,851 - root - INFO - step: 6295 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:27:41,851 - root - INFO - lr: 4.7583e-05 gnorm: 1.19 [ 3:53:33<20:50:29] +[titan] 2025-10-05 02:27:50,574 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:27:52,765 - root - INFO - step: 6300 loss: 2.6057 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2983 +[titan] 2025-10-05 02:27:52,765 - root - INFO - lr: 4.7579e-05 gnorm: 1.25 [ 3:53:44<20:50:17] +[titan] 2025-10-05 02:28:03,660 - root - INFO - step: 6305 loss: 2.6038 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3067 global_avg_mtp_loss: 2.2971 +[titan] 2025-10-05 02:28:03,661 - root - INFO - lr: 4.7575e-05 gnorm: 1.34 [ 3:53:54<20:50:04] +[titan] 2025-10-05 02:28:14,554 - root - INFO - step: 6310 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3056 +[titan] 2025-10-05 02:28:14,554 - root - INFO - lr: 4.7571e-05 gnorm: 1.26 [ 3:54:05<20:49:52] +[titan] 2025-10-05 02:28:25,460 - root - INFO - step: 6315 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 02:28:25,460 - root - INFO - lr: 4.7567e-05 gnorm: 1.30 [ 3:54:16<20:49:40] +[titan] 2025-10-05 02:28:36,327 - root - INFO - step: 6320 loss: 2.6294 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3180 +[titan] 2025-10-05 02:28:36,327 - root - INFO - lr: 4.7563e-05 gnorm: 1.20 [ 3:54:27<20:49:27] +[titan] 2025-10-05 02:28:47,212 - root - INFO - step: 6325 loss: 2.5971 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:28:47,212 - root - INFO - lr: 4.7559e-05 gnorm: 1.24 [ 3:54:38<20:49:15] +[titan] 2025-10-05 02:28:58,148 - root - INFO - step: 6330 loss: 2.5947 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2896 +[titan] 2025-10-05 02:28:58,148 - root - INFO - lr: 4.7555e-05 gnorm: 1.17 [ 3:54:49<20:49:03] +[titan] 2025-10-05 02:29:09,045 - root - INFO - step: 6335 loss: 2.6560 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3421 +[titan] 2025-10-05 02:29:09,045 - root - INFO - lr: 4.7551e-05 gnorm: 1.23 [ 3:55:00<20:48:50] +[titan] 2025-10-05 02:29:19,929 - root - INFO - step: 6340 loss: 2.5919 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2841 +[titan] 2025-10-05 02:29:19,929 - root - INFO - lr: 4.7547e-05 gnorm: 1.21 [ 3:55:11<20:48:38] +[titan] 2025-10-05 02:29:30,803 - root - INFO - step: 6345 loss: 2.6337 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3228 +[titan] 2025-10-05 02:29:30,803 - root - INFO - lr: 4.7543e-05 gnorm: 1.20 [ 3:55:22<20:48:25] +[titan] 2025-10-05 02:29:39,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:29:41,698 - root - INFO - step: 6350 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.2911 +[titan] 2025-10-05 02:29:41,698 - root - INFO - lr: 4.7539e-05 gnorm: 1.21 [ 3:55:32<20:48:13] +[titan] 2025-10-05 02:29:52,582 - root - INFO - step: 6355 loss: 2.5766 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2722 +[titan] 2025-10-05 02:29:52,582 - root - INFO - lr: 4.7535e-05 gnorm: 1.31 [ 3:55:43<20:48:00] +[titan] 2025-10-05 02:30:03,454 - root - INFO - step: 6360 loss: 2.6402 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3288 +[titan] 2025-10-05 02:30:03,454 - root - INFO - lr: 4.7531e-05 gnorm: 1.19 [ 3:55:54<20:47:48] +[titan] 2025-10-05 02:30:14,404 - root - INFO - step: 6365 loss: 2.5756 memory: 118.84GiB(85.28%) tps: 29,925 tflops: 415.16 mfu: 41.98% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2718 +[titan] 2025-10-05 02:30:14,405 - root - INFO - lr: 4.7527e-05 gnorm: 1.21 [ 3:56:05<20:47:36] +[titan] 2025-10-05 02:30:25,300 - root - INFO - step: 6370 loss: 2.6721 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3566 +[titan] 2025-10-05 02:30:25,301 - root - INFO - lr: 4.7523e-05 gnorm: 1.26 [ 3:56:16<20:47:24] +[titan] 2025-10-05 02:30:36,188 - root - INFO - step: 6375 loss: 2.6701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3548 +[titan] 2025-10-05 02:30:36,189 - root - INFO - lr: 4.7519e-05 gnorm: 1.26 [ 3:56:27<20:47:11] +[titan] 2025-10-05 02:30:47,063 - root - INFO - step: 6380 loss: 2.6577 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:30:47,063 - root - INFO - lr: 4.7514e-05 gnorm: 1.19 [ 3:56:38<20:46:59] +[titan] 2025-10-05 02:30:57,930 - root - INFO - step: 6385 loss: 2.5739 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:30:57,930 - root - INFO - lr: 4.7510e-05 gnorm: 1.20 [ 3:56:49<20:46:46] +[titan] 2025-10-05 02:31:08,797 - root - INFO - step: 6390 loss: 2.6461 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3331 +[titan] 2025-10-05 02:31:08,797 - root - INFO - lr: 4.7506e-05 gnorm: 1.18 [ 3:57:00<20:46:34] +[titan] 2025-10-05 02:31:19,713 - root - INFO - step: 6395 loss: 2.6359 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3246 +[titan] 2025-10-05 02:31:19,713 - root - INFO - lr: 4.7502e-05 gnorm: 1.18 [ 3:57:10<20:46:21] +[titan] 2025-10-05 02:31:28,409 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:31:30,590 - root - INFO - step: 6400 loss: 2.6427 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.3304 +[titan] 2025-10-05 02:31:30,590 - root - INFO - lr: 4.7498e-05 gnorm: 1.20 [ 3:57:21<20:46:09] +[titan] 2025-10-05 02:31:41,458 - root - INFO - step: 6405 loss: 2.5702 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:31:41,458 - root - INFO - lr: 4.7494e-05 gnorm: 1.26 [ 3:57:32<20:45:57] +[titan] 2025-10-05 02:31:52,328 - root - INFO - step: 6410 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2830 +[titan] 2025-10-05 02:31:52,329 - root - INFO - lr: 4.7490e-05 gnorm: 1.28 [ 3:57:43<20:45:44] +[titan] 2025-10-05 02:32:03,197 - root - INFO - step: 6415 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:32:03,197 - root - INFO - lr: 4.7486e-05 gnorm: 1.24 [ 3:57:54<20:45:32] +[titan] 2025-10-05 02:32:14,081 - root - INFO - step: 6420 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2355 +[titan] 2025-10-05 02:32:14,082 - root - INFO - lr: 4.7482e-05 gnorm: 1.22 [ 3:58:05<20:45:19] +[titan] 2025-10-05 02:32:25,006 - root - INFO - step: 6425 loss: 2.6729 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:32:25,007 - root - INFO - lr: 4.7478e-05 gnorm: 1.28 [ 3:58:16<20:45:07] +[titan] 2025-10-05 02:32:35,882 - root - INFO - step: 6430 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2763 +[titan] 2025-10-05 02:32:35,883 - root - INFO - lr: 4.7474e-05 gnorm: 1.22 [ 3:58:27<20:44:55] +[titan] 2025-10-05 02:32:46,767 - root - INFO - step: 6435 loss: 2.5922 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2859 +[titan] 2025-10-05 02:32:46,767 - root - INFO - lr: 4.7469e-05 gnorm: 1.22 [ 3:58:37<20:44:42] +[titan] 2025-10-05 02:32:57,635 - root - INFO - step: 6440 loss: 2.5566 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2552 +[titan] 2025-10-05 02:32:57,635 - root - INFO - lr: 4.7465e-05 gnorm: 1.19 [ 3:58:48<20:44:30] +[titan] 2025-10-05 02:33:08,509 - root - INFO - step: 6445 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3471 +[titan] 2025-10-05 02:33:08,509 - root - INFO - lr: 4.7461e-05 gnorm: 1.18 [ 3:58:59<20:44:17] +[titan] 2025-10-05 02:33:17,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:33:19,369 - root - INFO - step: 6450 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2865 +[titan] 2025-10-05 02:33:19,369 - root - INFO - lr: 4.7457e-05 gnorm: 1.22 [ 3:59:10<20:44:05] +[titan] 2025-10-05 02:33:30,229 - root - INFO - step: 6455 loss: 2.6465 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3333 +[titan] 2025-10-05 02:33:30,229 - root - INFO - lr: 4.7453e-05 gnorm: 1.20 [ 3:59:21<20:43:52] +[titan] 2025-10-05 02:33:41,125 - root - INFO - step: 6460 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2613 +[titan] 2025-10-05 02:33:41,125 - root - INFO - lr: 4.7449e-05 gnorm: 1.21 [ 3:59:32<20:43:40] +[titan] 2025-10-05 02:33:51,972 - root - INFO - step: 6465 loss: 2.6340 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.3110 global_avg_mtp_loss: 2.3230 +[titan] 2025-10-05 02:33:51,972 - root - INFO - lr: 4.7445e-05 gnorm: 1.25 [ 3:59:43<20:43:27] +[titan] 2025-10-05 02:34:02,829 - root - INFO - step: 6470 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:34:02,830 - root - INFO - lr: 4.7441e-05 gnorm: 1.22 [ 3:59:54<20:43:15] +[titan] 2025-10-05 02:34:13,713 - root - INFO - step: 6475 loss: 2.6622 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3477 +[titan] 2025-10-05 02:34:13,713 - root - INFO - lr: 4.7436e-05 gnorm: 1.22 [ 4:00:04<20:43:03] +[titan] 2025-10-05 02:34:24,581 - root - INFO - step: 6480 loss: 2.5985 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2920 +[titan] 2025-10-05 02:34:24,581 - root - INFO - lr: 4.7432e-05 gnorm: 1.22 [ 4:00:15<20:42:50] +[titan] 2025-10-05 02:34:35,430 - root - INFO - step: 6485 loss: 2.5699 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2657 +[titan] 2025-10-05 02:34:35,430 - root - INFO - lr: 4.7428e-05 gnorm: 1.26 [ 4:00:26<20:42:38] +[titan] 2025-10-05 02:34:46,317 - root - INFO - step: 6490 loss: 2.5393 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:34:46,318 - root - INFO - lr: 4.7424e-05 gnorm: 1.22 [ 4:00:37<20:42:25] +[titan] 2025-10-05 02:34:57,192 - root - INFO - step: 6495 loss: 2.6369 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3116 global_avg_mtp_loss: 2.3253 +[titan] 2025-10-05 02:34:57,193 - root - INFO - lr: 4.7420e-05 gnorm: 1.23 [ 4:00:48<20:42:13] +[titan] 2025-10-05 02:35:05,878 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:35:08,068 - root - INFO - step: 6500 loss: 2.5435 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3003 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 02:35:08,068 - root - INFO - lr: 4.7416e-05 gnorm: 1.25 [ 4:00:59<20:42:00] +[titan] 2025-10-05 02:35:18,953 - root - INFO - step: 6505 loss: 2.6050 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2979 +[titan] 2025-10-05 02:35:18,953 - root - INFO - lr: 4.7412e-05 gnorm: 1.26 [ 4:01:10<20:41:48] +[titan] 2025-10-05 02:35:29,825 - root - INFO - step: 6510 loss: 2.5818 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2782 +[titan] 2025-10-05 02:35:29,825 - root - INFO - lr: 4.7407e-05 gnorm: 1.19 [ 4:01:21<20:41:36] +[titan] 2025-10-05 02:35:40,705 - root - INFO - step: 6515 loss: 2.5167 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2195 +[titan] 2025-10-05 02:35:40,705 - root - INFO - lr: 4.7403e-05 gnorm: 1.18 [ 4:01:31<20:41:23] +[titan] 2025-10-05 02:35:51,579 - root - INFO - step: 6520 loss: 2.6889 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3704 +[titan] 2025-10-05 02:35:51,580 - root - INFO - lr: 4.7399e-05 gnorm: 1.25 [ 4:01:42<20:41:11] +[titan] 2025-10-05 02:36:02,521 - root - INFO - step: 6525 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3046 global_avg_mtp_loss: 2.2801 +[titan] 2025-10-05 02:36:02,521 - root - INFO - lr: 4.7395e-05 gnorm: 1.21 [ 4:01:53<20:40:59] +[titan] 2025-10-05 02:36:13,407 - root - INFO - step: 6530 loss: 2.5064 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2103 +[titan] 2025-10-05 02:36:13,407 - root - INFO - lr: 4.7391e-05 gnorm: 1.17 [ 4:02:04<20:40:47] +[titan] 2025-10-05 02:36:24,288 - root - INFO - step: 6535 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:36:24,289 - root - INFO - lr: 4.7387e-05 gnorm: 1.26 [ 4:02:15<20:40:34] +[titan] 2025-10-05 02:36:35,150 - root - INFO - step: 6540 loss: 2.6944 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 02:36:35,150 - root - INFO - lr: 4.7382e-05 gnorm: 1.22 [ 4:02:26<20:40:22] +[titan] 2025-10-05 02:36:46,030 - root - INFO - step: 6545 loss: 2.5975 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2909 +[titan] 2025-10-05 02:36:46,030 - root - INFO - lr: 4.7378e-05 gnorm: 1.25 [ 4:02:37<20:40:09] +[titan] 2025-10-05 02:36:54,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:36:56,903 - root - INFO - step: 6550 loss: 2.5802 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2749 +[titan] 2025-10-05 02:36:56,904 - root - INFO - lr: 4.7374e-05 gnorm: 1.18 [ 4:02:48<20:39:57] +[titan] 2025-10-05 02:37:07,790 - root - INFO - step: 6555 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 02:37:07,790 - root - INFO - lr: 4.7370e-05 gnorm: 1.23 [ 4:02:59<20:39:45] +[titan] 2025-10-05 02:37:18,673 - root - INFO - step: 6560 loss: 2.6310 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3210 +[titan] 2025-10-05 02:37:18,673 - root - INFO - lr: 4.7366e-05 gnorm: 1.22 [ 4:03:09<20:39:32] +[titan] 2025-10-05 02:37:29,519 - root - INFO - step: 6565 loss: 2.6348 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3242 +[titan] 2025-10-05 02:37:29,520 - root - INFO - lr: 4.7361e-05 gnorm: 1.24 [ 4:03:20<20:39:20] +[titan] 2025-10-05 02:37:40,400 - root - INFO - step: 6570 loss: 2.5419 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2417 +[titan] 2025-10-05 02:37:40,400 - root - INFO - lr: 4.7357e-05 gnorm: 1.19 [ 4:03:31<20:39:07] +[titan] 2025-10-05 02:37:51,268 - root - INFO - step: 6575 loss: 2.5865 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2812 +[titan] 2025-10-05 02:37:51,269 - root - INFO - lr: 4.7353e-05 gnorm: 1.28 [ 4:03:42<20:38:55] +[titan] 2025-10-05 02:38:02,128 - root - INFO - step: 6580 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:38:02,128 - root - INFO - lr: 4.7349e-05 gnorm: 1.20 [ 4:03:53<20:38:43] +[titan] 2025-10-05 02:38:13,063 - root - INFO - step: 6585 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3373 +[titan] 2025-10-05 02:38:13,063 - root - INFO - lr: 4.7345e-05 gnorm: 1.24 [ 4:04:04<20:38:31] +[titan] 2025-10-05 02:38:23,973 - root - INFO - step: 6590 loss: 2.6349 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3243 +[titan] 2025-10-05 02:38:23,974 - root - INFO - lr: 4.7340e-05 gnorm: 1.19 [ 4:04:15<20:38:18] +[titan] 2025-10-05 02:38:34,826 - root - INFO - step: 6595 loss: 2.7415 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4095 +[titan] 2025-10-05 02:38:34,826 - root - INFO - lr: 4.7336e-05 gnorm: 1.21 [ 4:04:26<20:38:06] +[titan] 2025-10-05 02:38:43,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:38:45,682 - root - INFO - step: 6600 loss: 2.5758 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2716 +[titan] 2025-10-05 02:38:45,682 - root - INFO - lr: 4.7332e-05 gnorm: 1.18 [ 4:04:36<20:37:53] +[titan] 2025-10-05 02:38:56,550 - root - INFO - step: 6605 loss: 2.5294 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2310 +[titan] 2025-10-05 02:38:56,550 - root - INFO - lr: 4.7328e-05 gnorm: 1.19 [ 4:04:47<20:37:41] +[titan] 2025-10-05 02:39:07,416 - root - INFO - step: 6610 loss: 2.5451 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:39:07,416 - root - INFO - lr: 4.7324e-05 gnorm: 1.20 [ 4:04:58<20:37:29] +[titan] 2025-10-05 02:39:18,347 - root - INFO - step: 6615 loss: 2.7044 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3846 +[titan] 2025-10-05 02:39:18,347 - root - INFO - lr: 4.7319e-05 gnorm: 1.23 [ 4:05:09<20:37:17] +[titan] 2025-10-05 02:39:29,249 - root - INFO - step: 6620 loss: 2.5846 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3048 global_avg_mtp_loss: 2.2799 +[titan] 2025-10-05 02:39:29,249 - root - INFO - lr: 4.7315e-05 gnorm: 1.16 [ 4:05:20<20:37:04] +[titan] 2025-10-05 02:39:40,113 - root - INFO - step: 6625 loss: 2.6491 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3371 +[titan] 2025-10-05 02:39:40,113 - root - INFO - lr: 4.7311e-05 gnorm: 1.27 [ 4:05:31<20:36:52] +[titan] 2025-10-05 02:39:50,990 - root - INFO - step: 6630 loss: 2.5891 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:39:50,990 - root - INFO - lr: 4.7307e-05 gnorm: 1.21 [ 4:05:42<20:36:40] +[titan] 2025-10-05 02:40:01,853 - root - INFO - step: 6635 loss: 2.6888 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3719 +[titan] 2025-10-05 02:40:01,853 - root - INFO - lr: 4.7302e-05 gnorm: 1.21 [ 4:05:53<20:36:27] +[titan] 2025-10-05 02:40:12,718 - root - INFO - step: 6640 loss: 2.5610 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2587 +[titan] 2025-10-05 02:40:12,718 - root - INFO - lr: 4.7298e-05 gnorm: 1.19 [ 4:06:03<20:36:15] +[titan] 2025-10-05 02:40:23,628 - root - INFO - step: 6645 loss: 2.5680 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2644 +[titan] 2025-10-05 02:40:23,628 - root - INFO - lr: 4.7294e-05 gnorm: 1.27 [ 4:06:14<20:36:03] +[titan] 2025-10-05 02:40:32,372 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:40:34,565 - root - INFO - step: 6650 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3119 +[titan] 2025-10-05 02:40:34,566 - root - INFO - lr: 4.7290e-05 gnorm: 1.21 [ 4:06:25<20:35:51] +[titan] 2025-10-05 02:40:45,524 - root - INFO - step: 6655 loss: 2.6619 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:40:45,524 - root - INFO - lr: 4.7285e-05 gnorm: 1.16 [ 4:06:36<20:35:39] +[titan] 2025-10-05 02:40:47,884 - root - INFO - Dumping profiler traces at step 6656 +[titan] 2025-10-05 02:40:47,923 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:40:56,627 - root - INFO - step: 6660 loss: 2.5796 memory: 118.84GiB(85.28%) tps: 29,513 tflops: 409.44 mfu: 41.40% global_avg_ntp_loss: 0.3032 global_avg_mtp_loss: 2.2764 +[titan] 2025-10-05 02:40:56,627 - root - INFO - lr: 4.7281e-05 gnorm: 1.18 [ 4:06:47<20:35:28] +[titan] 2025-10-05 02:41:07,500 - root - INFO - step: 6665 loss: 2.5859 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2808 +[titan] 2025-10-05 02:41:07,500 - root - INFO - lr: 4.7277e-05 gnorm: 1.21 [ 4:06:58<20:35:15] +[titan] 2025-10-05 02:41:18,462 - root - INFO - step: 6670 loss: 2.5619 memory: 118.84GiB(85.28%) tps: 29,894 tflops: 414.73 mfu: 41.93% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:41:18,462 - root - INFO - lr: 4.7273e-05 gnorm: 1.20 [ 4:07:09<20:35:03] +[titan] 2025-10-05 02:41:29,324 - root - INFO - step: 6675 loss: 2.4816 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.1887 +[titan] 2025-10-05 02:41:29,325 - root - INFO - lr: 4.7268e-05 gnorm: 1.20 [ 4:07:20<20:34:51] +[titan] 2025-10-05 02:41:40,209 - root - INFO - step: 6680 loss: 2.6410 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3279 +[titan] 2025-10-05 02:41:40,209 - root - INFO - lr: 4.7264e-05 gnorm: 1.26 [ 4:07:31<20:34:39] +[titan] 2025-10-05 02:41:51,110 - root - INFO - step: 6685 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3116 +[titan] 2025-10-05 02:41:51,110 - root - INFO - lr: 4.7260e-05 gnorm: 1.25 [ 4:07:42<20:34:26] +[titan] 2025-10-05 02:42:01,973 - root - INFO - step: 6690 loss: 2.6096 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3014 +[titan] 2025-10-05 02:42:01,973 - root - INFO - lr: 4.7256e-05 gnorm: 1.20 [ 4:07:53<20:34:14] +[titan] 2025-10-05 02:42:12,862 - root - INFO - step: 6695 loss: 2.5175 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2207 +[titan] 2025-10-05 02:42:12,862 - root - INFO - lr: 4.7251e-05 gnorm: 1.18 [ 4:08:04<20:34:02] +[titan] 2025-10-05 02:42:21,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:42:23,816 - root - INFO - step: 6700 loss: 2.6088 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3010 +[titan] 2025-10-05 02:42:23,816 - root - INFO - lr: 4.7247e-05 gnorm: 1.21 [ 4:08:15<20:33:50] +[titan] 2025-10-05 02:42:34,693 - root - INFO - step: 6705 loss: 2.6071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.2991 +[titan] 2025-10-05 02:42:34,693 - root - INFO - lr: 4.7243e-05 gnorm: 1.19 [ 4:08:25<20:33:38] +[titan] 2025-10-05 02:42:45,561 - root - INFO - step: 6710 loss: 2.5118 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2161 +[titan] 2025-10-05 02:42:45,561 - root - INFO - lr: 4.7238e-05 gnorm: 1.24 [ 4:08:36<20:33:25] +[titan] 2025-10-05 02:42:56,442 - root - INFO - step: 6715 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.2997 +[titan] 2025-10-05 02:42:56,442 - root - INFO - lr: 4.7234e-05 gnorm: 1.21 [ 4:08:47<20:33:13] +[titan] 2025-10-05 02:43:07,287 - root - INFO - step: 6720 loss: 2.5570 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2554 +[titan] 2025-10-05 02:43:07,287 - root - INFO - lr: 4.7230e-05 gnorm: 1.22 [ 4:08:58<20:33:01] +[titan] 2025-10-05 02:43:18,136 - root - INFO - step: 6725 loss: 2.5707 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3021 global_avg_mtp_loss: 2.2686 +[titan] 2025-10-05 02:43:18,136 - root - INFO - lr: 4.7226e-05 gnorm: 1.53 [ 4:09:09<20:32:48] +[titan] 2025-10-05 02:43:29,070 - root - INFO - step: 6730 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2464 +[titan] 2025-10-05 02:43:29,070 - root - INFO - lr: 4.7221e-05 gnorm: 1.26 [ 4:09:20<20:32:36] +[titan] 2025-10-05 02:43:39,913 - root - INFO - step: 6735 loss: 2.5430 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 02:43:39,913 - root - INFO - lr: 4.7217e-05 gnorm: 1.22 [ 4:09:31<20:32:24] +[titan] 2025-10-05 02:43:50,772 - root - INFO - step: 6740 loss: 2.5235 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2275 +[titan] 2025-10-05 02:43:50,772 - root - INFO - lr: 4.7213e-05 gnorm: 1.21 [ 4:09:41<20:32:11] +[titan] 2025-10-05 02:44:01,659 - root - INFO - step: 6745 loss: 2.6439 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3121 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:44:01,660 - root - INFO - lr: 4.7208e-05 gnorm: 1.19 [ 4:09:52<20:31:59] +[titan] 2025-10-05 02:44:10,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:44:12,526 - root - INFO - step: 6750 loss: 2.5875 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 02:44:12,526 - root - INFO - lr: 4.7204e-05 gnorm: 1.21 [ 4:10:03<20:31:47] +[titan] 2025-10-05 02:44:23,443 - root - INFO - step: 6755 loss: 2.4956 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2014 +[titan] 2025-10-05 02:44:23,443 - root - INFO - lr: 4.7200e-05 gnorm: 1.19 [ 4:10:14<20:31:35] +[titan] 2025-10-05 02:44:34,289 - root - INFO - step: 6760 loss: 2.5401 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2414 +[titan] 2025-10-05 02:44:34,289 - root - INFO - lr: 4.7196e-05 gnorm: 1.22 [ 4:10:25<20:31:22] +[titan] 2025-10-05 02:44:45,167 - root - INFO - step: 6765 loss: 2.5998 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2924 +[titan] 2025-10-05 02:44:45,167 - root - INFO - lr: 4.7191e-05 gnorm: 1.30 [ 4:10:36<20:31:10] +[titan] 2025-10-05 02:44:56,029 - root - INFO - step: 6770 loss: 2.5743 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2714 +[titan] 2025-10-05 02:44:56,030 - root - INFO - lr: 4.7187e-05 gnorm: 1.21 [ 4:10:47<20:30:58] +[titan] 2025-10-05 02:45:06,886 - root - INFO - step: 6775 loss: 2.5839 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3045 global_avg_mtp_loss: 2.2794 +[titan] 2025-10-05 02:45:06,886 - root - INFO - lr: 4.7183e-05 gnorm: 1.17 [ 4:10:58<20:30:45] +[titan] 2025-10-05 02:45:17,790 - root - INFO - step: 6780 loss: 2.5182 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2228 +[titan] 2025-10-05 02:45:17,790 - root - INFO - lr: 4.7178e-05 gnorm: 1.24 [ 4:11:08<20:30:33] +[titan] 2025-10-05 02:45:28,696 - root - INFO - step: 6785 loss: 2.5460 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2463 +[titan] 2025-10-05 02:45:28,696 - root - INFO - lr: 4.7174e-05 gnorm: 1.20 [ 4:11:19<20:30:21] +[titan] 2025-10-05 02:45:39,548 - root - INFO - step: 6790 loss: 2.5312 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2336 +[titan] 2025-10-05 02:45:39,548 - root - INFO - lr: 4.7170e-05 gnorm: 1.16 [ 4:11:30<20:30:09] +[titan] 2025-10-05 02:45:50,426 - root - INFO - step: 6795 loss: 2.5011 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:45:50,427 - root - INFO - lr: 4.7165e-05 gnorm: 1.18 [ 4:11:41<20:29:56] +[titan] 2025-10-05 02:45:59,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:46:01,303 - root - INFO - step: 6800 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2649 +[titan] 2025-10-05 02:46:01,304 - root - INFO - lr: 4.7161e-05 gnorm: 1.23 [ 4:11:52<20:29:44] +[titan] 2025-10-05 02:46:12,162 - root - INFO - step: 6805 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:46:12,162 - root - INFO - lr: 4.7157e-05 gnorm: 1.28 [ 4:12:03<20:29:32] +[titan] 2025-10-05 02:46:23,159 - root - INFO - step: 6810 loss: 2.5521 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.40 mfu: 41.80% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2496 +[titan] 2025-10-05 02:46:23,159 - root - INFO - lr: 4.7152e-05 gnorm: 1.22 [ 4:12:14<20:29:20] +[titan] 2025-10-05 02:46:34,046 - root - INFO - step: 6815 loss: 2.6067 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.3007 +[titan] 2025-10-05 02:46:34,046 - root - INFO - lr: 4.7148e-05 gnorm: 1.17 [ 4:12:25<20:29:08] +[titan] 2025-10-05 02:46:44,908 - root - INFO - step: 6820 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2807 +[titan] 2025-10-05 02:46:44,908 - root - INFO - lr: 4.7143e-05 gnorm: 1.17 [ 4:12:36<20:28:55] +[titan] 2025-10-05 02:46:55,788 - root - INFO - step: 6825 loss: 2.5910 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2852 +[titan] 2025-10-05 02:46:55,788 - root - INFO - lr: 4.7139e-05 gnorm: 1.16 [ 4:12:46<20:28:43] +[titan] 2025-10-05 02:47:06,620 - root - INFO - step: 6830 loss: 2.5384 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 02:47:06,620 - root - INFO - lr: 4.7135e-05 gnorm: 1.18 [ 4:12:57<20:28:31] +[titan] 2025-10-05 02:47:17,469 - root - INFO - step: 6835 loss: 2.5733 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 02:47:17,469 - root - INFO - lr: 4.7130e-05 gnorm: 1.23 [ 4:13:08<20:28:18] +[titan] 2025-10-05 02:47:28,380 - root - INFO - step: 6840 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2835 +[titan] 2025-10-05 02:47:28,380 - root - INFO - lr: 4.7126e-05 gnorm: 1.26 [ 4:13:19<20:28:06] +[titan] 2025-10-05 02:47:39,283 - root - INFO - step: 6845 loss: 2.5574 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2546 +[titan] 2025-10-05 02:47:39,283 - root - INFO - lr: 4.7122e-05 gnorm: 1.19 [ 4:13:30<20:27:54] +[titan] 2025-10-05 02:47:47,974 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:47:50,155 - root - INFO - step: 6850 loss: 2.5366 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2372 +[titan] 2025-10-05 02:47:50,155 - root - INFO - lr: 4.7117e-05 gnorm: 1.18 [ 4:13:41<20:27:42] +[titan] 2025-10-05 02:48:01,026 - root - INFO - step: 6855 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2793 +[titan] 2025-10-05 02:48:01,026 - root - INFO - lr: 4.7113e-05 gnorm: 1.17 [ 4:13:52<20:27:30] +[titan] 2025-10-05 02:48:11,906 - root - INFO - step: 6860 loss: 2.5452 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2453 +[titan] 2025-10-05 02:48:11,906 - root - INFO - lr: 4.7109e-05 gnorm: 1.21 [ 4:14:03<20:27:17] +[titan] 2025-10-05 02:48:22,764 - root - INFO - step: 6865 loss: 2.5903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2839 +[titan] 2025-10-05 02:48:22,764 - root - INFO - lr: 4.7104e-05 gnorm: 1.22 [ 4:14:13<20:27:05] +[titan] 2025-10-05 02:48:33,663 - root - INFO - step: 6870 loss: 2.5282 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2305 +[titan] 2025-10-05 02:48:33,663 - root - INFO - lr: 4.7100e-05 gnorm: 1.21 [ 4:14:24<20:26:53] +[titan] 2025-10-05 02:48:44,571 - root - INFO - step: 6875 loss: 2.5842 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2787 +[titan] 2025-10-05 02:48:44,571 - root - INFO - lr: 4.7095e-05 gnorm: 1.20 [ 4:14:35<20:26:41] +[titan] 2025-10-05 02:48:55,419 - root - INFO - step: 6880 loss: 2.5406 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2421 +[titan] 2025-10-05 02:48:55,419 - root - INFO - lr: 4.7091e-05 gnorm: 1.23 [ 4:14:46<20:26:28] +[titan] 2025-10-05 02:49:06,283 - root - INFO - step: 6885 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2631 +[titan] 2025-10-05 02:49:06,283 - root - INFO - lr: 4.7087e-05 gnorm: 1.33 [ 4:14:57<20:26:16] +[titan] 2025-10-05 02:49:17,141 - root - INFO - step: 6890 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.3033 global_avg_mtp_loss: 2.2679 +[titan] 2025-10-05 02:49:17,141 - root - INFO - lr: 4.7082e-05 gnorm: 1.20 [ 4:15:08<20:26:04] +[titan] 2025-10-05 02:49:28,078 - root - INFO - step: 6895 loss: 2.5483 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2478 +[titan] 2025-10-05 02:49:28,078 - root - INFO - lr: 4.7078e-05 gnorm: 1.18 [ 4:15:19<20:25:52] +[titan] 2025-10-05 02:49:36,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:49:38,935 - root - INFO - step: 6900 loss: 2.5983 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:49:38,935 - root - INFO - lr: 4.7073e-05 gnorm: 1.24 [ 4:15:30<20:25:40] +[titan] 2025-10-05 02:49:49,829 - root - INFO - step: 6905 loss: 2.5554 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2534 +[titan] 2025-10-05 02:49:49,830 - root - INFO - lr: 4.7069e-05 gnorm: 1.19 [ 4:15:41<20:25:27] +[titan] 2025-10-05 02:50:00,703 - root - INFO - step: 6910 loss: 2.6056 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.2975 +[titan] 2025-10-05 02:50:00,703 - root - INFO - lr: 4.7065e-05 gnorm: 1.20 [ 4:15:51<20:25:15] +[titan] 2025-10-05 02:50:11,565 - root - INFO - step: 6915 loss: 2.5960 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2897 +[titan] 2025-10-05 02:50:11,566 - root - INFO - lr: 4.7060e-05 gnorm: 1.24 [ 4:16:02<20:25:03] +[titan] 2025-10-05 02:50:22,427 - root - INFO - step: 6920 loss: 2.5924 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2856 +[titan] 2025-10-05 02:50:22,427 - root - INFO - lr: 4.7056e-05 gnorm: 1.19 [ 4:16:13<20:24:51] +[titan] 2025-10-05 02:50:33,321 - root - INFO - step: 6925 loss: 2.4869 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1938 +[titan] 2025-10-05 02:50:33,321 - root - INFO - lr: 4.7051e-05 gnorm: 1.18 [ 4:16:24<20:24:39] +[titan] 2025-10-05 02:50:44,192 - root - INFO - step: 6930 loss: 2.5543 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 02:50:44,193 - root - INFO - lr: 4.7047e-05 gnorm: 1.24 [ 4:16:35<20:24:26] +[titan] 2025-10-05 02:50:55,042 - root - INFO - step: 6935 loss: 2.5426 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2428 +[titan] 2025-10-05 02:50:55,042 - root - INFO - lr: 4.7043e-05 gnorm: 1.21 [ 4:16:46<20:24:14] +[titan] 2025-10-05 02:51:05,935 - root - INFO - step: 6940 loss: 2.6667 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3517 +[titan] 2025-10-05 02:51:05,935 - root - INFO - lr: 4.7038e-05 gnorm: 1.24 [ 4:16:57<20:24:02] +[titan] 2025-10-05 02:51:16,790 - root - INFO - step: 6945 loss: 2.6473 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3345 +[titan] 2025-10-05 02:51:16,790 - root - INFO - lr: 4.7034e-05 gnorm: 1.27 [ 4:17:07<20:23:50] +[titan] 2025-10-05 02:51:25,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:51:27,720 - root - INFO - step: 6950 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2047 +[titan] 2025-10-05 02:51:27,721 - root - INFO - lr: 4.7029e-05 gnorm: 1.25 [ 4:17:18<20:23:38] +[titan] 2025-10-05 02:51:38,573 - root - INFO - step: 6955 loss: 2.6408 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:51:38,573 - root - INFO - lr: 4.7025e-05 gnorm: 1.22 [ 4:17:29<20:23:25] +[titan] 2025-10-05 02:51:49,457 - root - INFO - step: 6960 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3017 global_avg_mtp_loss: 2.2580 +[titan] 2025-10-05 02:51:49,458 - root - INFO - lr: 4.7020e-05 gnorm: 1.20 [ 4:17:40<20:23:13] +[titan] 2025-10-05 02:52:00,296 - root - INFO - step: 6965 loss: 2.6601 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:52:00,296 - root - INFO - lr: 4.7016e-05 gnorm: 1.30 [ 4:17:51<20:23:01] +[titan] 2025-10-05 02:52:11,200 - root - INFO - step: 6970 loss: 2.5501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2484 +[titan] 2025-10-05 02:52:11,200 - root - INFO - lr: 4.7012e-05 gnorm: 1.25 [ 4:18:02<20:22:49] +[titan] 2025-10-05 02:52:22,059 - root - INFO - step: 6975 loss: 2.5650 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2627 +[titan] 2025-10-05 02:52:22,059 - root - INFO - lr: 4.7007e-05 gnorm: 1.18 [ 4:18:13<20:22:36] +[titan] 2025-10-05 02:52:32,953 - root - INFO - step: 6980 loss: 2.6856 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.3581 +[titan] 2025-10-05 02:52:32,953 - root - INFO - lr: 4.7003e-05 gnorm: 1.24 [ 4:18:24<20:22:24] +[titan] 2025-10-05 02:52:43,790 - root - INFO - step: 6985 loss: 2.5169 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2194 +[titan] 2025-10-05 02:52:43,790 - root - INFO - lr: 4.6998e-05 gnorm: 1.33 [ 4:18:34<20:22:12] +[titan] 2025-10-05 02:52:54,642 - root - INFO - step: 6990 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2300 +[titan] 2025-10-05 02:52:54,642 - root - INFO - lr: 4.6994e-05 gnorm: 1.18 [ 4:18:45<20:22:00] +[titan] 2025-10-05 02:53:05,477 - root - INFO - step: 6995 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3774 +[titan] 2025-10-05 02:53:05,477 - root - INFO - lr: 4.6989e-05 gnorm: 1.28 [ 4:18:56<20:21:47] +[titan] 2025-10-05 02:53:14,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:53:16,323 - root - INFO - step: 7000 loss: 2.6331 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.3240 +[titan] 2025-10-05 02:53:16,323 - root - INFO - lr: 4.6985e-05 gnorm: 1.28 [ 4:19:07<20:21:35] +[titan] 2025-10-05 02:53:27,204 - root - INFO - step: 7005 loss: 2.5777 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2742 +[titan] 2025-10-05 02:53:27,204 - root - INFO - lr: 4.6980e-05 gnorm: 1.20 [ 4:19:18<20:21:23] +[titan] 2025-10-05 02:53:38,086 - root - INFO - step: 7010 loss: 2.5633 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:53:38,086 - root - INFO - lr: 4.6976e-05 gnorm: 1.21 [ 4:19:29<20:21:11] +[titan] 2025-10-05 02:53:48,973 - root - INFO - step: 7015 loss: 2.5508 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2507 +[titan] 2025-10-05 02:53:48,973 - root - INFO - lr: 4.6971e-05 gnorm: 1.17 [ 4:19:40<20:20:58] +[titan] 2025-10-05 02:53:59,845 - root - INFO - step: 7020 loss: 2.6141 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3057 +[titan] 2025-10-05 02:53:59,845 - root - INFO - lr: 4.6967e-05 gnorm: 1.23 [ 4:19:51<20:20:46] +[titan] 2025-10-05 02:54:10,698 - root - INFO - step: 7025 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2184 +[titan] 2025-10-05 02:54:10,698 - root - INFO - lr: 4.6962e-05 gnorm: 1.18 [ 4:20:01<20:20:34] +[titan] 2025-10-05 02:54:21,549 - root - INFO - step: 7030 loss: 2.5250 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2274 +[titan] 2025-10-05 02:54:21,550 - root - INFO - lr: 4.6958e-05 gnorm: 1.18 [ 4:20:12<20:20:22] +[titan] 2025-10-05 02:54:32,510 - root - INFO - step: 7035 loss: 2.4583 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2895 global_avg_mtp_loss: 2.1687 +[titan] 2025-10-05 02:54:32,510 - root - INFO - lr: 4.6954e-05 gnorm: 1.13 [ 4:20:23<20:20:10] +[titan] 2025-10-05 02:54:43,391 - root - INFO - step: 7040 loss: 2.5911 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2860 +[titan] 2025-10-05 02:54:43,391 - root - INFO - lr: 4.6949e-05 gnorm: 1.24 [ 4:20:34<20:19:58] +[titan] 2025-10-05 02:54:54,247 - root - INFO - step: 7045 loss: 2.5161 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2197 +[titan] 2025-10-05 02:54:54,247 - root - INFO - lr: 4.6945e-05 gnorm: 1.19 [ 4:20:45<20:19:45] +[titan] 2025-10-05 02:55:02,933 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:55:05,120 - root - INFO - step: 7050 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2600 +[titan] 2025-10-05 02:55:05,120 - root - INFO - lr: 4.6940e-05 gnorm: 1.19 [ 4:20:56<20:19:33] +[titan] 2025-10-05 02:55:15,988 - root - INFO - step: 7055 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:55:15,988 - root - INFO - lr: 4.6936e-05 gnorm: 1.18 [ 4:21:07<20:19:21] +[titan] 2025-10-05 02:55:26,853 - root - INFO - step: 7060 loss: 2.6283 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3103 global_avg_mtp_loss: 2.3181 +[titan] 2025-10-05 02:55:26,853 - root - INFO - lr: 4.6931e-05 gnorm: 1.21 [ 4:21:18<20:19:09] +[titan] 2025-10-05 02:55:37,782 - root - INFO - step: 7065 loss: 2.5429 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2433 +[titan] 2025-10-05 02:55:37,782 - root - INFO - lr: 4.6927e-05 gnorm: 1.17 [ 4:21:28<20:18:57] +[titan] 2025-10-05 02:55:48,649 - root - INFO - step: 7070 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2882 +[titan] 2025-10-05 02:55:48,649 - root - INFO - lr: 4.6922e-05 gnorm: 1.22 [ 4:21:39<20:18:45] +[titan] 2025-10-05 02:55:59,510 - root - INFO - step: 7075 loss: 2.5409 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 02:55:59,510 - root - INFO - lr: 4.6918e-05 gnorm: 1.20 [ 4:21:50<20:18:32] +[titan] 2025-10-05 02:56:10,352 - root - INFO - step: 7080 loss: 2.5976 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:56:10,353 - root - INFO - lr: 4.6913e-05 gnorm: 1.19 [ 4:22:01<20:18:20] +[titan] 2025-10-05 02:56:21,217 - root - INFO - step: 7085 loss: 2.5675 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2647 +[titan] 2025-10-05 02:56:21,217 - root - INFO - lr: 4.6909e-05 gnorm: 1.26 [ 4:22:12<20:18:08] +[titan] 2025-10-05 02:56:32,130 - root - INFO - step: 7090 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 02:56:32,130 - root - INFO - lr: 4.6904e-05 gnorm: 1.19 [ 4:22:23<20:17:56] +[titan] 2025-10-05 02:56:43,006 - root - INFO - step: 7095 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 02:56:43,006 - root - INFO - lr: 4.6899e-05 gnorm: 1.16 [ 4:22:34<20:17:44] +[titan] 2025-10-05 02:56:51,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:56:53,930 - root - INFO - step: 7100 loss: 2.6150 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3102 global_avg_mtp_loss: 2.3048 +[titan] 2025-10-05 02:56:53,930 - root - INFO - lr: 4.6895e-05 gnorm: 1.24 [ 4:22:45<20:17:32] +[titan] 2025-10-05 02:57:04,810 - root - INFO - step: 7105 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2524 +[titan] 2025-10-05 02:57:04,810 - root - INFO - lr: 4.6890e-05 gnorm: 1.23 [ 4:22:55<20:17:20] +[titan] 2025-10-05 02:57:15,679 - root - INFO - step: 7110 loss: 2.6249 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3152 +[titan] 2025-10-05 02:57:15,679 - root - INFO - lr: 4.6886e-05 gnorm: 1.23 [ 4:23:06<20:17:08] +[titan] 2025-10-05 02:57:26,560 - root - INFO - step: 7115 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 02:57:26,560 - root - INFO - lr: 4.6881e-05 gnorm: 1.21 [ 4:23:17<20:16:55] +[titan] 2025-10-05 02:57:37,504 - root - INFO - step: 7120 loss: 2.5642 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 02:57:37,504 - root - INFO - lr: 4.6877e-05 gnorm: 1.25 [ 4:23:28<20:16:44] +[titan] 2025-10-05 02:57:48,407 - root - INFO - step: 7125 loss: 2.5252 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2269 +[titan] 2025-10-05 02:57:48,407 - root - INFO - lr: 4.6872e-05 gnorm: 1.23 [ 4:23:39<20:16:32] +[titan] 2025-10-05 02:57:59,324 - root - INFO - step: 7130 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2910 +[titan] 2025-10-05 02:57:59,324 - root - INFO - lr: 4.6868e-05 gnorm: 1.23 [ 4:23:50<20:16:20] +[titan] 2025-10-05 02:58:10,198 - root - INFO - step: 7135 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3080 +[titan] 2025-10-05 02:58:10,198 - root - INFO - lr: 4.6863e-05 gnorm: 1.30 [ 4:24:01<20:16:07] +[titan] 2025-10-05 02:58:21,069 - root - INFO - step: 7140 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:58:21,069 - root - INFO - lr: 4.6859e-05 gnorm: 1.21 [ 4:24:12<20:15:55] +[titan] 2025-10-05 02:58:31,936 - root - INFO - step: 7145 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2622 +[titan] 2025-10-05 02:58:31,936 - root - INFO - lr: 4.6854e-05 gnorm: 1.20 [ 4:24:23<20:15:43] +[titan] 2025-10-05 02:58:40,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:58:42,870 - root - INFO - step: 7150 loss: 2.5513 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2508 +[titan] 2025-10-05 02:58:42,871 - root - INFO - lr: 4.6850e-05 gnorm: 1.21 [ 4:24:34<20:15:31] +[titan] 2025-10-05 02:58:53,743 - root - INFO - step: 7155 loss: 2.5589 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2563 +[titan] 2025-10-05 02:58:53,743 - root - INFO - lr: 4.6845e-05 gnorm: 1.16 [ 4:24:44<20:15:19] +[titan] 2025-10-05 02:59:04,618 - root - INFO - step: 7160 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2723 +[titan] 2025-10-05 02:59:04,618 - root - INFO - lr: 4.6840e-05 gnorm: 1.21 [ 4:24:55<20:15:07] +[titan] 2025-10-05 02:59:15,628 - root - INFO - step: 7165 loss: 2.5541 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.91 mfu: 41.75% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2529 +[titan] 2025-10-05 02:59:15,629 - root - INFO - lr: 4.6836e-05 gnorm: 1.17 [ 4:25:06<20:14:55] +[titan] 2025-10-05 02:59:22,344 - root - INFO - Dumping profiler traces at step 7168 +[titan] 2025-10-05 02:59:22,380 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:59:26,736 - root - INFO - step: 7170 loss: 2.6199 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.30 mfu: 41.38% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3097 +[titan] 2025-10-05 02:59:26,736 - root - INFO - lr: 4.6831e-05 gnorm: 1.20 [ 4:25:17<20:14:44] +[titan] 2025-10-05 02:59:37,672 - root - INFO - step: 7175 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1801 +[titan] 2025-10-05 02:59:37,672 - root - INFO - lr: 4.6827e-05 gnorm: 1.15 [ 4:25:28<20:14:32] +[titan] 2025-10-05 02:59:48,576 - root - INFO - step: 7180 loss: 2.6188 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3101 +[titan] 2025-10-05 02:59:48,576 - root - INFO - lr: 4.6822e-05 gnorm: 1.22 [ 4:25:39<20:14:20] +[titan] 2025-10-05 02:59:59,450 - root - INFO - step: 7185 loss: 2.5330 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2333 +[titan] 2025-10-05 02:59:59,450 - root - INFO - lr: 4.6818e-05 gnorm: 1.21 [ 4:25:50<20:14:08] +[titan] 2025-10-05 03:00:10,322 - root - INFO - step: 7190 loss: 2.6028 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2970 +[titan] 2025-10-05 03:00:10,322 - root - INFO - lr: 4.6813e-05 gnorm: 1.20 [ 4:26:01<20:13:56] +[titan] 2025-10-05 03:00:21,233 - root - INFO - step: 7195 loss: 2.6073 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2996 +[titan] 2025-10-05 03:00:21,233 - root - INFO - lr: 4.6808e-05 gnorm: 1.23 [ 4:26:12<20:13:44] +[titan] 2025-10-05 03:00:29,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:00:32,108 - root - INFO - step: 7200 loss: 2.5130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:00:32,108 - root - INFO - lr: 4.6804e-05 gnorm: 1.32 [ 4:26:23<20:13:32] +[titan] 2025-10-05 03:00:43,038 - root - INFO - step: 7205 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2785 +[titan] 2025-10-05 03:00:43,038 - root - INFO - lr: 4.6799e-05 gnorm: 1.29 [ 4:26:34<20:13:20] +[titan] 2025-10-05 03:00:53,933 - root - INFO - step: 7210 loss: 2.5257 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:00:53,933 - root - INFO - lr: 4.6795e-05 gnorm: 1.20 [ 4:26:45<20:13:08] +[titan] 2025-10-05 03:01:04,827 - root - INFO - step: 7215 loss: 2.5854 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3039 global_avg_mtp_loss: 2.2815 +[titan] 2025-10-05 03:01:04,827 - root - INFO - lr: 4.6790e-05 gnorm: 1.21 [ 4:26:55<20:12:56] +[titan] 2025-10-05 03:01:15,711 - root - INFO - step: 7220 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2982 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:01:15,712 - root - INFO - lr: 4.6786e-05 gnorm: 1.19 [ 4:27:06<20:12:44] +[titan] 2025-10-05 03:01:26,615 - root - INFO - step: 7225 loss: 2.4967 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2040 +[titan] 2025-10-05 03:01:26,615 - root - INFO - lr: 4.6781e-05 gnorm: 1.24 [ 4:27:17<20:12:32] +[titan] 2025-10-05 03:01:37,539 - root - INFO - step: 7230 loss: 2.6118 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.3044 +[titan] 2025-10-05 03:01:37,539 - root - INFO - lr: 4.6776e-05 gnorm: 1.20 [ 4:27:28<20:12:20] +[titan] 2025-10-05 03:01:48,431 - root - INFO - step: 7235 loss: 2.5240 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2979 global_avg_mtp_loss: 2.2261 +[titan] 2025-10-05 03:01:48,431 - root - INFO - lr: 4.6772e-05 gnorm: 1.18 [ 4:27:39<20:12:08] +[titan] 2025-10-05 03:01:59,313 - root - INFO - step: 7240 loss: 2.5262 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2284 +[titan] 2025-10-05 03:01:59,313 - root - INFO - lr: 4.6767e-05 gnorm: 1.17 [ 4:27:50<20:11:56] +[titan] 2025-10-05 03:02:10,185 - root - INFO - step: 7245 loss: 2.5139 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2193 +[titan] 2025-10-05 03:02:10,185 - root - INFO - lr: 4.6762e-05 gnorm: 1.26 [ 4:28:01<20:11:44] +[titan] 2025-10-05 03:02:18,882 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:02:21,067 - root - INFO - step: 7250 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:02:21,067 - root - INFO - lr: 4.6758e-05 gnorm: 1.26 [ 4:28:12<20:11:32] +[titan] 2025-10-05 03:02:31,931 - root - INFO - step: 7255 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2327 +[titan] 2025-10-05 03:02:31,931 - root - INFO - lr: 4.6753e-05 gnorm: 1.22 [ 4:28:23<20:11:20] +[titan] 2025-10-05 03:02:42,869 - root - INFO - step: 7260 loss: 2.5329 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.2991 global_avg_mtp_loss: 2.2339 +[titan] 2025-10-05 03:02:42,869 - root - INFO - lr: 4.6749e-05 gnorm: 1.22 [ 4:28:34<20:11:08] +[titan] 2025-10-05 03:02:53,734 - root - INFO - step: 7265 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.2033 +[titan] 2025-10-05 03:02:53,734 - root - INFO - lr: 4.6744e-05 gnorm: 1.30 [ 4:28:44<20:10:56] +[titan] 2025-10-05 03:03:04,623 - root - INFO - step: 7270 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3079 global_avg_mtp_loss: 2.3083 +[titan] 2025-10-05 03:03:04,623 - root - INFO - lr: 4.6739e-05 gnorm: 1.34 [ 4:28:55<20:10:44] +[titan] 2025-10-05 03:03:15,505 - root - INFO - step: 7275 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3072 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 03:03:15,505 - root - INFO - lr: 4.6735e-05 gnorm: 1.22 [ 4:29:06<20:10:32] +[titan] 2025-10-05 03:03:26,372 - root - INFO - step: 7280 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2876 +[titan] 2025-10-05 03:03:26,372 - root - INFO - lr: 4.6730e-05 gnorm: 1.19 [ 4:29:17<20:10:20] +[titan] 2025-10-05 03:03:37,274 - root - INFO - step: 7285 loss: 2.6024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2948 +[titan] 2025-10-05 03:03:37,274 - root - INFO - lr: 4.6725e-05 gnorm: 1.27 [ 4:29:28<20:10:08] +[titan] 2025-10-05 03:03:48,171 - root - INFO - step: 7290 loss: 2.5142 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2183 +[titan] 2025-10-05 03:03:48,171 - root - INFO - lr: 4.6721e-05 gnorm: 1.18 [ 4:29:39<20:09:56] +[titan] 2025-10-05 03:03:59,037 - root - INFO - step: 7295 loss: 2.5672 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3019 global_avg_mtp_loss: 2.2653 +[titan] 2025-10-05 03:03:59,037 - root - INFO - lr: 4.6716e-05 gnorm: 1.21 [ 4:29:50<20:09:43] +[titan] 2025-10-05 03:04:07,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:04:09,893 - root - INFO - step: 7300 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2362 +[titan] 2025-10-05 03:04:09,893 - root - INFO - lr: 4.6712e-05 gnorm: 1.19 [ 4:30:01<20:09:31] +[titan] 2025-10-05 03:04:20,770 - root - INFO - step: 7305 loss: 2.5190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:04:20,770 - root - INFO - lr: 4.6707e-05 gnorm: 1.20 [ 4:30:11<20:09:19] +[titan] 2025-10-05 03:04:31,636 - root - INFO - step: 7310 loss: 2.5542 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2522 +[titan] 2025-10-05 03:04:31,637 - root - INFO - lr: 4.6702e-05 gnorm: 1.16 [ 4:30:22<20:09:07] +[titan] 2025-10-05 03:04:42,538 - root - INFO - step: 7315 loss: 2.5823 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3047 global_avg_mtp_loss: 2.2776 +[titan] 2025-10-05 03:04:42,538 - root - INFO - lr: 4.6698e-05 gnorm: 1.19 [ 4:30:33<20:08:55] +[titan] 2025-10-05 03:04:53,396 - root - INFO - step: 7320 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.2988 +[titan] 2025-10-05 03:04:53,396 - root - INFO - lr: 4.6693e-05 gnorm: 1.20 [ 4:30:44<20:08:43] +[titan] 2025-10-05 03:05:04,291 - root - INFO - step: 7325 loss: 2.6131 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 03:05:04,291 - root - INFO - lr: 4.6688e-05 gnorm: 1.20 [ 4:30:55<20:08:31] +[titan] 2025-10-05 03:05:15,170 - root - INFO - step: 7330 loss: 2.5664 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2639 +[titan] 2025-10-05 03:05:15,170 - root - INFO - lr: 4.6684e-05 gnorm: 1.19 [ 4:31:06<20:08:19] +[titan] 2025-10-05 03:05:26,057 - root - INFO - step: 7335 loss: 2.5718 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:05:26,057 - root - INFO - lr: 4.6679e-05 gnorm: 1.19 [ 4:31:17<20:08:07] +[titan] 2025-10-05 03:05:36,944 - root - INFO - step: 7340 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:05:36,944 - root - INFO - lr: 4.6674e-05 gnorm: 1.21 [ 4:31:28<20:07:55] +[titan] 2025-10-05 03:05:47,861 - root - INFO - step: 7345 loss: 2.4951 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2008 +[titan] 2025-10-05 03:05:47,862 - root - INFO - lr: 4.6670e-05 gnorm: 1.18 [ 4:31:38<20:07:43] +[titan] 2025-10-05 03:05:56,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:05:58,742 - root - INFO - step: 7350 loss: 2.6375 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3271 +[titan] 2025-10-05 03:05:58,742 - root - INFO - lr: 4.6665e-05 gnorm: 1.20 [ 4:31:49<20:07:31] +[titan] 2025-10-05 03:06:09,631 - root - INFO - step: 7355 loss: 2.5204 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2241 +[titan] 2025-10-05 03:06:09,631 - root - INFO - lr: 4.6660e-05 gnorm: 1.13 [ 4:32:00<20:07:19] +[titan] 2025-10-05 03:06:20,514 - root - INFO - step: 7360 loss: 2.5761 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2736 +[titan] 2025-10-05 03:06:20,514 - root - INFO - lr: 4.6656e-05 gnorm: 1.20 [ 4:32:11<20:07:07] +[titan] 2025-10-05 03:06:31,396 - root - INFO - step: 7365 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2678 +[titan] 2025-10-05 03:06:31,397 - root - INFO - lr: 4.6651e-05 gnorm: 1.18 [ 4:32:22<20:06:55] +[titan] 2025-10-05 03:06:42,281 - root - INFO - step: 7370 loss: 2.5449 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2457 +[titan] 2025-10-05 03:06:42,282 - root - INFO - lr: 4.6646e-05 gnorm: 1.20 [ 4:32:33<20:06:43] +[titan] 2025-10-05 03:06:53,156 - root - INFO - step: 7375 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2076 +[titan] 2025-10-05 03:06:53,156 - root - INFO - lr: 4.6642e-05 gnorm: 1.20 [ 4:32:44<20:06:31] +[titan] 2025-10-05 03:07:04,009 - root - INFO - step: 7380 loss: 2.4884 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:07:04,009 - root - INFO - lr: 4.6637e-05 gnorm: 1.18 [ 4:32:55<20:06:19] +[titan] 2025-10-05 03:07:14,887 - root - INFO - step: 7385 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2624 +[titan] 2025-10-05 03:07:14,887 - root - INFO - lr: 4.6632e-05 gnorm: 1.29 [ 4:33:06<20:06:06] +[titan] 2025-10-05 03:07:25,781 - root - INFO - step: 7390 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:07:25,781 - root - INFO - lr: 4.6627e-05 gnorm: 1.19 [ 4:33:16<20:05:54] +[titan] 2025-10-05 03:07:36,668 - root - INFO - step: 7395 loss: 2.5215 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2253 +[titan] 2025-10-05 03:07:36,668 - root - INFO - lr: 4.6623e-05 gnorm: 1.18 [ 4:33:27<20:05:42] +[titan] 2025-10-05 03:07:45,410 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:07:47,595 - root - INFO - step: 7400 loss: 2.5552 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 03:07:47,595 - root - INFO - lr: 4.6618e-05 gnorm: 1.25 [ 4:33:38<20:05:31] +[titan] 2025-10-05 03:07:58,479 - root - INFO - step: 7405 loss: 2.5722 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2664 +[titan] 2025-10-05 03:07:58,479 - root - INFO - lr: 4.6613e-05 gnorm: 1.23 [ 4:33:49<20:05:19] +[titan] 2025-10-05 03:08:09,352 - root - INFO - step: 7410 loss: 2.6173 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 03:08:09,352 - root - INFO - lr: 4.6609e-05 gnorm: 1.26 [ 4:34:00<20:05:07] +[titan] 2025-10-05 03:08:20,245 - root - INFO - step: 7415 loss: 2.6371 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.3115 global_avg_mtp_loss: 2.3256 +[titan] 2025-10-05 03:08:20,245 - root - INFO - lr: 4.6604e-05 gnorm: 1.18 [ 4:34:11<20:04:55] +[titan] 2025-10-05 03:08:31,148 - root - INFO - step: 7420 loss: 2.5121 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:08:31,148 - root - INFO - lr: 4.6599e-05 gnorm: 1.18 [ 4:34:22<20:04:43] +[titan] 2025-10-05 03:08:42,047 - root - INFO - step: 7425 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2028 +[titan] 2025-10-05 03:08:42,047 - root - INFO - lr: 4.6594e-05 gnorm: 1.17 [ 4:34:33<20:04:31] +[titan] 2025-10-05 03:08:52,923 - root - INFO - step: 7430 loss: 2.5993 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2935 +[titan] 2025-10-05 03:08:52,923 - root - INFO - lr: 4.6590e-05 gnorm: 1.19 [ 4:34:44<20:04:19] +[titan] 2025-10-05 03:09:03,806 - root - INFO - step: 7435 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2641 +[titan] 2025-10-05 03:09:03,806 - root - INFO - lr: 4.6585e-05 gnorm: 1.23 [ 4:34:54<20:04:07] +[titan] 2025-10-05 03:09:14,682 - root - INFO - step: 7440 loss: 2.4458 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:09:14,682 - root - INFO - lr: 4.6580e-05 gnorm: 1.21 [ 4:35:05<20:03:55] +[titan] 2025-10-05 03:09:25,563 - root - INFO - step: 7445 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2988 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:09:25,564 - root - INFO - lr: 4.6576e-05 gnorm: 1.20 [ 4:35:16<20:03:43] +[titan] 2025-10-05 03:09:34,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:09:36,483 - root - INFO - step: 7450 loss: 2.4992 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2041 +[titan] 2025-10-05 03:09:36,483 - root - INFO - lr: 4.6571e-05 gnorm: 1.13 [ 4:35:27<20:03:31] +[titan] 2025-10-05 03:09:47,415 - root - INFO - step: 7455 loss: 2.5685 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:09:47,415 - root - INFO - lr: 4.6566e-05 gnorm: 1.21 [ 4:35:38<20:03:19] +[titan] 2025-10-05 03:09:58,322 - root - INFO - step: 7460 loss: 2.5530 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.15% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2521 +[titan] 2025-10-05 03:09:58,322 - root - INFO - lr: 4.6561e-05 gnorm: 1.19 [ 4:35:49<20:03:07] +[titan] 2025-10-05 03:10:09,217 - root - INFO - step: 7465 loss: 2.5984 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2930 +[titan] 2025-10-05 03:10:09,217 - root - INFO - lr: 4.6557e-05 gnorm: 1.33 [ 4:36:00<20:02:55] +[titan] 2025-10-05 03:10:20,126 - root - INFO - step: 7470 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:10:20,127 - root - INFO - lr: 4.6552e-05 gnorm: 1.25 [ 4:36:11<20:02:43] +[titan] 2025-10-05 03:10:31,009 - root - INFO - step: 7475 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3233 +[titan] 2025-10-05 03:10:31,009 - root - INFO - lr: 4.6547e-05 gnorm: 1.21 [ 4:36:22<20:02:31] +[titan] 2025-10-05 03:10:41,908 - root - INFO - step: 7480 loss: 2.6221 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3088 global_avg_mtp_loss: 2.3133 +[titan] 2025-10-05 03:10:41,908 - root - INFO - lr: 4.6542e-05 gnorm: 1.24 [ 4:36:33<20:02:19] +[titan] 2025-10-05 03:10:52,859 - root - INFO - step: 7485 loss: 2.6267 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.13 mfu: 41.97% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3150 +[titan] 2025-10-05 03:10:52,859 - root - INFO - lr: 4.6538e-05 gnorm: 1.23 [ 4:36:43<20:02:08] +[titan] 2025-10-05 03:11:03,748 - root - INFO - step: 7490 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:11:03,748 - root - INFO - lr: 4.6533e-05 gnorm: 1.16 [ 4:36:54<20:01:56] +[titan] 2025-10-05 03:11:14,653 - root - INFO - step: 7495 loss: 2.5041 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2100 +[titan] 2025-10-05 03:11:14,654 - root - INFO - lr: 4.6528e-05 gnorm: 1.17 [ 4:37:05<20:01:44] +[titan] 2025-10-05 03:11:23,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:11:25,557 - root - INFO - step: 7500 loss: 2.5279 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 03:11:25,558 - root - INFO - lr: 4.6523e-05 gnorm: 1.17 [ 4:37:16<20:01:32] +[titan] 2025-10-05 03:11:36,447 - root - INFO - step: 7505 loss: 2.5670 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:11:36,447 - root - INFO - lr: 4.6519e-05 gnorm: 1.26 [ 4:37:27<20:01:20] +[titan] 2025-10-05 03:11:47,366 - root - INFO - step: 7510 loss: 2.5107 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 03:11:47,366 - root - INFO - lr: 4.6514e-05 gnorm: 1.18 [ 4:37:38<20:01:08] +[titan] 2025-10-05 03:11:58,284 - root - INFO - step: 7515 loss: 2.6471 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3343 +[titan] 2025-10-05 03:11:58,284 - root - INFO - lr: 4.6509e-05 gnorm: 1.26 [ 4:37:49<20:00:56] +[titan] 2025-10-05 03:12:09,176 - root - INFO - step: 7520 loss: 2.5022 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:12:09,177 - root - INFO - lr: 4.6504e-05 gnorm: 1.24 [ 4:38:00<20:00:44] +[titan] 2025-10-05 03:12:20,065 - root - INFO - step: 7525 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2289 +[titan] 2025-10-05 03:12:20,065 - root - INFO - lr: 4.6499e-05 gnorm: 1.20 [ 4:38:11<20:00:32] +[titan] 2025-10-05 03:12:30,937 - root - INFO - step: 7530 loss: 2.5858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2803 +[titan] 2025-10-05 03:12:30,937 - root - INFO - lr: 4.6495e-05 gnorm: 1.25 [ 4:38:22<20:00:20] +[titan] 2025-10-05 03:12:41,813 - root - INFO - step: 7535 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:12:41,814 - root - INFO - lr: 4.6490e-05 gnorm: 1.20 [ 4:38:32<20:00:08] +[titan] 2025-10-05 03:12:52,684 - root - INFO - step: 7540 loss: 2.5356 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:12:52,684 - root - INFO - lr: 4.6485e-05 gnorm: 1.23 [ 4:38:43<19:59:56] +[titan] 2025-10-05 03:13:03,580 - root - INFO - step: 7545 loss: 2.5425 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2420 +[titan] 2025-10-05 03:13:03,580 - root - INFO - lr: 4.6480e-05 gnorm: 1.22 [ 4:38:54<19:59:44] +[titan] 2025-10-05 03:13:12,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:13:14,425 - root - INFO - step: 7550 loss: 2.5098 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:14,425 - root - INFO - lr: 4.6476e-05 gnorm: 1.21 [ 4:39:05<19:59:32] +[titan] 2025-10-05 03:13:25,285 - root - INFO - step: 7555 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2953 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:25,285 - root - INFO - lr: 4.6471e-05 gnorm: 1.32 [ 4:39:16<19:59:20] +[titan] 2025-10-05 03:13:36,128 - root - INFO - step: 7560 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1394 +[titan] 2025-10-05 03:13:36,128 - root - INFO - lr: 4.6466e-05 gnorm: 1.23 [ 4:39:27<19:59:08] +[titan] 2025-10-05 03:13:47,004 - root - INFO - step: 7565 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2297 +[titan] 2025-10-05 03:13:47,005 - root - INFO - lr: 4.6461e-05 gnorm: 1.21 [ 4:39:38<19:58:56] +[titan] 2025-10-05 03:13:57,856 - root - INFO - step: 7570 loss: 2.4658 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:13:57,856 - root - INFO - lr: 4.6456e-05 gnorm: 1.15 [ 4:39:48<19:58:44] +[titan] 2025-10-05 03:14:08,701 - root - INFO - step: 7575 loss: 2.5486 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2477 +[titan] 2025-10-05 03:14:08,701 - root - INFO - lr: 4.6452e-05 gnorm: 1.16 [ 4:39:59<19:58:32] +[titan] 2025-10-05 03:14:19,585 - root - INFO - step: 7580 loss: 2.4950 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:14:19,586 - root - INFO - lr: 4.6447e-05 gnorm: 1.20 [ 4:40:10<19:58:20] +[titan] 2025-10-05 03:14:30,487 - root - INFO - step: 7585 loss: 2.5519 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3008 global_avg_mtp_loss: 2.2511 +[titan] 2025-10-05 03:14:30,487 - root - INFO - lr: 4.6442e-05 gnorm: 1.18 [ 4:40:21<19:58:08] +[titan] 2025-10-05 03:14:41,356 - root - INFO - step: 7590 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2224 +[titan] 2025-10-05 03:14:41,356 - root - INFO - lr: 4.6437e-05 gnorm: 1.18 [ 4:40:32<19:57:56] +[titan] 2025-10-05 03:14:52,221 - root - INFO - step: 7595 loss: 2.5646 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 03:14:52,221 - root - INFO - lr: 4.6432e-05 gnorm: 1.16 [ 4:40:43<19:57:44] +[titan] 2025-10-05 03:15:00,910 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:15:03,087 - root - INFO - step: 7600 loss: 2.5198 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:15:03,087 - root - INFO - lr: 4.6427e-05 gnorm: 1.22 [ 4:40:54<19:57:32] +[titan] 2025-10-05 03:15:13,944 - root - INFO - step: 7605 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2354 +[titan] 2025-10-05 03:15:13,944 - root - INFO - lr: 4.6423e-05 gnorm: 1.19 [ 4:41:05<19:57:20] +[titan] 2025-10-05 03:15:24,824 - root - INFO - step: 7610 loss: 2.4376 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1514 +[titan] 2025-10-05 03:15:24,824 - root - INFO - lr: 4.6418e-05 gnorm: 1.19 [ 4:41:15<19:57:08] +[titan] 2025-10-05 03:15:35,666 - root - INFO - step: 7615 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2192 +[titan] 2025-10-05 03:15:35,666 - root - INFO - lr: 4.6413e-05 gnorm: 1.17 [ 4:41:26<19:56:55] +[titan] 2025-10-05 03:15:46,512 - root - INFO - step: 7620 loss: 2.5412 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2413 +[titan] 2025-10-05 03:15:46,512 - root - INFO - lr: 4.6408e-05 gnorm: 1.18 [ 4:41:37<19:56:43] +[titan] 2025-10-05 03:15:57,356 - root - INFO - step: 7625 loss: 2.6165 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3094 global_avg_mtp_loss: 2.3070 +[titan] 2025-10-05 03:15:57,356 - root - INFO - lr: 4.6403e-05 gnorm: 1.26 [ 4:41:48<19:56:31] +[titan] 2025-10-05 03:16:08,215 - root - INFO - step: 7630 loss: 2.5181 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:16:08,215 - root - INFO - lr: 4.6398e-05 gnorm: 1.21 [ 4:41:59<19:56:19] +[titan] 2025-10-05 03:16:19,087 - root - INFO - step: 7635 loss: 2.4574 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1676 +[titan] 2025-10-05 03:16:19,088 - root - INFO - lr: 4.6394e-05 gnorm: 1.19 [ 4:42:10<19:56:07] +[titan] 2025-10-05 03:16:29,923 - root - INFO - step: 7640 loss: 2.4611 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1719 +[titan] 2025-10-05 03:16:29,923 - root - INFO - lr: 4.6389e-05 gnorm: 1.17 [ 4:42:21<19:55:55] +[titan] 2025-10-05 03:16:40,805 - root - INFO - step: 7645 loss: 2.5518 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2500 +[titan] 2025-10-05 03:16:40,805 - root - INFO - lr: 4.6384e-05 gnorm: 1.19 [ 4:42:31<19:55:43] +[titan] 2025-10-05 03:16:49,484 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:16:51,676 - root - INFO - step: 7650 loss: 2.5593 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2579 +[titan] 2025-10-05 03:16:51,676 - root - INFO - lr: 4.6379e-05 gnorm: 1.21 [ 4:42:42<19:55:31] +[titan] 2025-10-05 03:17:02,521 - root - INFO - step: 7655 loss: 2.5404 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 03:17:02,521 - root - INFO - lr: 4.6374e-05 gnorm: 1.24 [ 4:42:53<19:55:19] +[titan] 2025-10-05 03:17:13,367 - root - INFO - step: 7660 loss: 2.5051 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2097 +[titan] 2025-10-05 03:17:13,367 - root - INFO - lr: 4.6369e-05 gnorm: 1.23 [ 4:43:04<19:55:07] +[titan] 2025-10-05 03:17:24,235 - root - INFO - step: 7665 loss: 2.6218 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.3131 +[titan] 2025-10-05 03:17:24,235 - root - INFO - lr: 4.6364e-05 gnorm: 1.19 [ 4:43:15<19:54:55] +[titan] 2025-10-05 03:17:35,066 - root - INFO - step: 7670 loss: 2.5900 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2847 +[titan] 2025-10-05 03:17:35,066 - root - INFO - lr: 4.6360e-05 gnorm: 1.23 [ 4:43:26<19:54:43] +[titan] 2025-10-05 03:17:45,893 - root - INFO - step: 7675 loss: 2.5953 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 03:17:45,893 - root - INFO - lr: 4.6355e-05 gnorm: 1.19 [ 4:43:37<19:54:30] +[titan] 2025-10-05 03:17:56,861 - root - INFO - step: 7680 loss: 2.5148 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2185 +[titan] 2025-10-05 03:17:56,861 - root - INFO - lr: 4.6350e-05 gnorm: 1.23 [ 4:43:47<19:54:19] +[titan] 2025-10-05 03:17:57,032 - root - INFO - Dumping profiler traces at step 7680 +[titan] 2025-10-05 03:17:57,070 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:18:07,907 - root - INFO - step: 7685 loss: 2.4389 memory: 118.84GiB(85.28%) tps: 29,665 tflops: 411.56 mfu: 41.61% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 03:18:07,907 - root - INFO - lr: 4.6345e-05 gnorm: 1.17 [ 4:43:59<19:54:08] +[titan] 2025-10-05 03:18:18,756 - root - INFO - step: 7690 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:18:18,757 - root - INFO - lr: 4.6340e-05 gnorm: 1.18 [ 4:44:09<19:53:56] +[titan] 2025-10-05 03:18:29,608 - root - INFO - step: 7695 loss: 2.5730 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:18:29,609 - root - INFO - lr: 4.6335e-05 gnorm: 1.36 [ 4:44:20<19:53:43] +[titan] 2025-10-05 03:18:38,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:18:40,472 - root - INFO - step: 7700 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2643 +[titan] 2025-10-05 03:18:40,473 - root - INFO - lr: 4.6330e-05 gnorm: 1.19 [ 4:44:31<19:53:31] +[titan] 2025-10-05 03:18:51,364 - root - INFO - step: 7705 loss: 2.5443 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 03:18:51,364 - root - INFO - lr: 4.6325e-05 gnorm: 1.19 [ 4:44:42<19:53:20] +[titan] 2025-10-05 03:19:02,224 - root - INFO - step: 7710 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2637 +[titan] 2025-10-05 03:19:02,225 - root - INFO - lr: 4.6321e-05 gnorm: 1.20 [ 4:44:53<19:53:08] +[titan] 2025-10-05 03:19:13,098 - root - INFO - step: 7715 loss: 2.5489 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2486 +[titan] 2025-10-05 03:19:13,098 - root - INFO - lr: 4.6316e-05 gnorm: 1.20 [ 4:45:04<19:52:56] +[titan] 2025-10-05 03:19:23,973 - root - INFO - step: 7720 loss: 2.4402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1529 +[titan] 2025-10-05 03:19:23,974 - root - INFO - lr: 4.6311e-05 gnorm: 1.21 [ 4:45:15<19:52:44] +[titan] 2025-10-05 03:19:34,816 - root - INFO - step: 7725 loss: 2.5551 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:19:34,816 - root - INFO - lr: 4.6306e-05 gnorm: 1.19 [ 4:45:25<19:52:31] +[titan] 2025-10-05 03:19:45,679 - root - INFO - step: 7730 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2814 +[titan] 2025-10-05 03:19:45,679 - root - INFO - lr: 4.6301e-05 gnorm: 1.17 [ 4:45:36<19:52:19] +[titan] 2025-10-05 03:19:56,502 - root - INFO - step: 7735 loss: 2.5206 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2970 global_avg_mtp_loss: 2.2236 +[titan] 2025-10-05 03:19:56,502 - root - INFO - lr: 4.6296e-05 gnorm: 1.24 [ 4:45:47<19:52:07] +[titan] 2025-10-05 03:20:07,337 - root - INFO - step: 7740 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2545 +[titan] 2025-10-05 03:20:07,337 - root - INFO - lr: 4.6291e-05 gnorm: 1.19 [ 4:45:58<19:51:55] +[titan] 2025-10-05 03:20:18,166 - root - INFO - step: 7745 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.79 mfu: 42.45% global_avg_ntp_loss: 0.2938 global_avg_mtp_loss: 2.1964 +[titan] 2025-10-05 03:20:18,166 - root - INFO - lr: 4.6286e-05 gnorm: 1.20 [ 4:46:09<19:51:43] +[titan] 2025-10-05 03:20:26,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:20:29,014 - root - INFO - step: 7750 loss: 2.4800 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.1876 +[titan] 2025-10-05 03:20:29,015 - root - INFO - lr: 4.6281e-05 gnorm: 1.17 [ 4:46:20<19:51:31] +[titan] 2025-10-05 03:20:39,856 - root - INFO - step: 7755 loss: 2.4850 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1929 +[titan] 2025-10-05 03:20:39,857 - root - INFO - lr: 4.6276e-05 gnorm: 1.24 [ 4:46:30<19:51:19] +[titan] 2025-10-05 03:20:50,697 - root - INFO - step: 7760 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2144 +[titan] 2025-10-05 03:20:50,697 - root - INFO - lr: 4.6271e-05 gnorm: 1.13 [ 4:46:41<19:51:07] +[titan] 2025-10-05 03:21:01,572 - root - INFO - step: 7765 loss: 2.5168 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2200 +[titan] 2025-10-05 03:21:01,573 - root - INFO - lr: 4.6267e-05 gnorm: 1.17 [ 4:46:52<19:50:55] +[titan] 2025-10-05 03:21:12,426 - root - INFO - step: 7770 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:21:12,426 - root - INFO - lr: 4.6262e-05 gnorm: 1.24 [ 4:47:03<19:50:43] +[titan] 2025-10-05 03:21:23,262 - root - INFO - step: 7775 loss: 2.5468 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2462 +[titan] 2025-10-05 03:21:23,262 - root - INFO - lr: 4.6257e-05 gnorm: 1.22 [ 4:47:14<19:50:31] +[titan] 2025-10-05 03:21:34,121 - root - INFO - step: 7780 loss: 2.5186 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2965 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:21:34,121 - root - INFO - lr: 4.6252e-05 gnorm: 1.22 [ 4:47:25<19:50:19] +[titan] 2025-10-05 03:21:44,959 - root - INFO - step: 7785 loss: 2.5555 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2532 +[titan] 2025-10-05 03:21:44,960 - root - INFO - lr: 4.6247e-05 gnorm: 1.19 [ 4:47:36<19:50:07] +[titan] 2025-10-05 03:21:55,841 - root - INFO - step: 7790 loss: 2.5595 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2574 +[titan] 2025-10-05 03:21:55,841 - root - INFO - lr: 4.6242e-05 gnorm: 1.21 [ 4:47:46<19:49:55] +[titan] 2025-10-05 03:22:06,686 - root - INFO - step: 7795 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2407 +[titan] 2025-10-05 03:22:06,686 - root - INFO - lr: 4.6237e-05 gnorm: 1.20 [ 4:47:57<19:49:43] +[titan] 2025-10-05 03:22:15,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:22:17,501 - root - INFO - step: 7800 loss: 2.4671 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1775 +[titan] 2025-10-05 03:22:17,501 - root - INFO - lr: 4.6232e-05 gnorm: 1.31 [ 4:48:08<19:49:30] +[titan] 2025-10-05 03:22:28,367 - root - INFO - step: 7805 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2366 +[titan] 2025-10-05 03:22:28,367 - root - INFO - lr: 4.6227e-05 gnorm: 1.21 [ 4:48:19<19:49:18] +[titan] 2025-10-05 03:22:39,182 - root - INFO - step: 7810 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:22:39,182 - root - INFO - lr: 4.6222e-05 gnorm: 1.23 [ 4:48:30<19:49:06] +[titan] 2025-10-05 03:22:50,001 - root - INFO - step: 7815 loss: 2.5037 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2093 +[titan] 2025-10-05 03:22:50,001 - root - INFO - lr: 4.6217e-05 gnorm: 1.17 [ 4:48:41<19:48:54] +[titan] 2025-10-05 03:23:00,861 - root - INFO - step: 7820 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 03:23:00,861 - root - INFO - lr: 4.6212e-05 gnorm: 1.15 [ 4:48:51<19:48:42] +[titan] 2025-10-05 03:23:11,665 - root - INFO - step: 7825 loss: 2.5549 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:23:11,665 - root - INFO - lr: 4.6207e-05 gnorm: 1.18 [ 4:49:02<19:48:30] +[titan] 2025-10-05 03:23:22,463 - root - INFO - step: 7830 loss: 2.5877 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2813 +[titan] 2025-10-05 03:23:22,464 - root - INFO - lr: 4.6202e-05 gnorm: 1.22 [ 4:49:13<19:48:18] +[titan] 2025-10-05 03:23:33,276 - root - INFO - step: 7835 loss: 2.5278 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:23:33,276 - root - INFO - lr: 4.6197e-05 gnorm: 1.28 [ 4:49:24<19:48:05] +[titan] 2025-10-05 03:23:44,101 - root - INFO - step: 7840 loss: 2.5759 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.46% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 03:23:44,101 - root - INFO - lr: 4.6192e-05 gnorm: 1.19 [ 4:49:35<19:47:53] +[titan] 2025-10-05 03:23:54,974 - root - INFO - step: 7845 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 03:23:54,975 - root - INFO - lr: 4.6187e-05 gnorm: 1.19 [ 4:49:46<19:47:41] +[titan] 2025-10-05 03:24:03,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:24:05,782 - root - INFO - step: 7850 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2873 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 03:24:05,783 - root - INFO - lr: 4.6182e-05 gnorm: 1.17 [ 4:49:56<19:47:29] +[titan] 2025-10-05 03:24:16,593 - root - INFO - step: 7855 loss: 2.4523 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1636 +[titan] 2025-10-05 03:24:16,593 - root - INFO - lr: 4.6177e-05 gnorm: 1.14 [ 4:50:07<19:47:17] +[titan] 2025-10-05 03:24:27,424 - root - INFO - step: 7860 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2509 +[titan] 2025-10-05 03:24:27,424 - root - INFO - lr: 4.6172e-05 gnorm: 1.24 [ 4:50:18<19:47:05] +[titan] 2025-10-05 03:24:38,249 - root - INFO - step: 7865 loss: 2.5375 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2394 +[titan] 2025-10-05 03:24:38,249 - root - INFO - lr: 4.6167e-05 gnorm: 1.22 [ 4:50:29<19:46:53] +[titan] 2025-10-05 03:24:49,117 - root - INFO - step: 7870 loss: 2.4208 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1365 +[titan] 2025-10-05 03:24:49,117 - root - INFO - lr: 4.6163e-05 gnorm: 1.17 [ 4:50:40<19:46:41] +[titan] 2025-10-05 03:25:00,043 - root - INFO - step: 7875 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:25:00,043 - root - INFO - lr: 4.6158e-05 gnorm: 1.19 [ 4:50:51<19:46:29] +[titan] 2025-10-05 03:25:10,889 - root - INFO - step: 7880 loss: 2.5464 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2467 +[titan] 2025-10-05 03:25:10,889 - root - INFO - lr: 4.6153e-05 gnorm: 1.19 [ 4:51:01<19:46:17] +[titan] 2025-10-05 03:25:21,745 - root - INFO - step: 7885 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:25:21,745 - root - INFO - lr: 4.6148e-05 gnorm: 1.18 [ 4:51:12<19:46:05] +[titan] 2025-10-05 03:25:32,610 - root - INFO - step: 7890 loss: 2.5321 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2346 +[titan] 2025-10-05 03:25:32,610 - root - INFO - lr: 4.6143e-05 gnorm: 1.20 [ 4:51:23<19:45:53] +[titan] 2025-10-05 03:25:43,443 - root - INFO - step: 7895 loss: 2.5115 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:25:43,443 - root - INFO - lr: 4.6138e-05 gnorm: 1.14 [ 4:51:34<19:45:41] +[titan] 2025-10-05 03:25:52,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:25:54,248 - root - INFO - step: 7900 loss: 2.5320 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.75 mfu: 42.54% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2324 +[titan] 2025-10-05 03:25:54,248 - root - INFO - lr: 4.6133e-05 gnorm: 1.18 [ 4:51:45<19:45:29] +[titan] 2025-10-05 03:26:05,135 - root - INFO - step: 7905 loss: 2.5694 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2673 +[titan] 2025-10-05 03:26:05,135 - root - INFO - lr: 4.6128e-05 gnorm: 1.17 [ 4:51:56<19:45:17] +[titan] 2025-10-05 03:26:15,976 - root - INFO - step: 7910 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2989 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:26:15,976 - root - INFO - lr: 4.6123e-05 gnorm: 1.24 [ 4:52:07<19:45:05] +[titan] 2025-10-05 03:26:26,803 - root - INFO - step: 7915 loss: 2.5234 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2262 +[titan] 2025-10-05 03:26:26,803 - root - INFO - lr: 4.6118e-05 gnorm: 1.20 [ 4:52:17<19:44:53] +[titan] 2025-10-05 03:26:37,605 - root - INFO - step: 7920 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2145 +[titan] 2025-10-05 03:26:37,605 - root - INFO - lr: 4.6113e-05 gnorm: 1.21 [ 4:52:28<19:44:41] +[titan] 2025-10-05 03:26:48,452 - root - INFO - step: 7925 loss: 2.4185 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 03:26:48,452 - root - INFO - lr: 4.6107e-05 gnorm: 1.15 [ 4:52:39<19:44:29] +[titan] 2025-10-05 03:26:59,330 - root - INFO - step: 7930 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 03:26:59,330 - root - INFO - lr: 4.6102e-05 gnorm: 1.26 [ 4:52:50<19:44:17] +[titan] 2025-10-05 03:27:10,155 - root - INFO - step: 7935 loss: 2.4620 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2890 global_avg_mtp_loss: 2.1731 +[titan] 2025-10-05 03:27:10,155 - root - INFO - lr: 4.6097e-05 gnorm: 1.18 [ 4:53:01<19:44:05] +[titan] 2025-10-05 03:27:20,964 - root - INFO - step: 7940 loss: 2.4808 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.61 mfu: 42.53% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:27:20,964 - root - INFO - lr: 4.6092e-05 gnorm: 1.15 [ 4:53:12<19:43:52] +[titan] 2025-10-05 03:27:31,803 - root - INFO - step: 7945 loss: 2.5084 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2142 +[titan] 2025-10-05 03:27:31,803 - root - INFO - lr: 4.6087e-05 gnorm: 1.16 [ 4:53:22<19:43:40] +[titan] 2025-10-05 03:27:40,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:27:42,583 - root - INFO - step: 7950 loss: 2.5326 memory: 118.84GiB(85.28%) tps: 30,397 tflops: 421.71 mfu: 42.64% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2359 +[titan] 2025-10-05 03:27:42,583 - root - INFO - lr: 4.6082e-05 gnorm: 1.21 [ 4:53:33<19:43:28] +[titan] 2025-10-05 03:27:53,381 - root - INFO - step: 7955 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2591 +[titan] 2025-10-05 03:27:53,382 - root - INFO - lr: 4.6077e-05 gnorm: 1.18 [ 4:53:44<19:43:16] +[titan] 2025-10-05 03:28:04,227 - root - INFO - step: 7960 loss: 2.4969 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2940 global_avg_mtp_loss: 2.2030 +[titan] 2025-10-05 03:28:04,227 - root - INFO - lr: 4.6072e-05 gnorm: 1.15 [ 4:53:55<19:43:04] +[titan] 2025-10-05 03:28:15,055 - root - INFO - step: 7965 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1237 +[titan] 2025-10-05 03:28:15,055 - root - INFO - lr: 4.6067e-05 gnorm: 1.13 [ 4:54:06<19:42:52] +[titan] 2025-10-05 03:28:25,883 - root - INFO - step: 7970 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.2034 +[titan] 2025-10-05 03:28:25,883 - root - INFO - lr: 4.6062e-05 gnorm: 1.17 [ 4:54:16<19:42:40] +[titan] 2025-10-05 03:28:36,715 - root - INFO - step: 7975 loss: 2.5491 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2491 +[titan] 2025-10-05 03:28:36,715 - root - INFO - lr: 4.6057e-05 gnorm: 1.19 [ 4:54:27<19:42:28] +[titan] 2025-10-05 03:28:47,543 - root - INFO - step: 7980 loss: 2.4817 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:28:47,543 - root - INFO - lr: 4.6052e-05 gnorm: 1.16 [ 4:54:38<19:42:16] +[titan] 2025-10-05 03:28:58,364 - root - INFO - step: 7985 loss: 2.5422 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2429 +[titan] 2025-10-05 03:28:58,364 - root - INFO - lr: 4.6047e-05 gnorm: 1.18 [ 4:54:49<19:42:03] +[titan] 2025-10-05 03:29:09,176 - root - INFO - step: 7990 loss: 2.5558 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2558 +[titan] 2025-10-05 03:29:09,176 - root - INFO - lr: 4.6042e-05 gnorm: 1.18 [ 4:55:00<19:41:51] +[titan] 2025-10-05 03:29:19,983 - root - INFO - step: 7995 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.68 mfu: 42.54% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:29:19,983 - root - INFO - lr: 4.6037e-05 gnorm: 1.16 [ 4:55:11<19:41:39] +[titan] 2025-10-05 03:29:28,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:29:30,811 - root - INFO - step: 8000 loss: 2.5669 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.3034 global_avg_mtp_loss: 2.2635 +[titan] 2025-10-05 03:29:30,811 - root - INFO - lr: 4.6032e-05 gnorm: 1.20 [ 4:55:21<19:41:27] +[titan] 2025-10-05 03:29:41,667 - root - INFO - step: 8005 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2699 +[titan] 2025-10-05 03:29:41,667 - root - INFO - lr: 4.6027e-05 gnorm: 1.25 [ 4:55:32<19:41:15] +[titan] 2025-10-05 03:29:52,487 - root - INFO - step: 8010 loss: 2.5006 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2058 +[titan] 2025-10-05 03:29:52,487 - root - INFO - lr: 4.6022e-05 gnorm: 1.26 [ 4:55:43<19:41:03] +[titan] 2025-10-05 03:30:03,339 - root - INFO - step: 8015 loss: 2.4914 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:30:03,339 - root - INFO - lr: 4.6017e-05 gnorm: 1.18 [ 4:55:54<19:40:51] +[titan] 2025-10-05 03:30:14,162 - root - INFO - step: 8020 loss: 2.4809 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:30:14,162 - root - INFO - lr: 4.6012e-05 gnorm: 1.20 [ 4:56:05<19:40:39] +[titan] 2025-10-05 03:30:25,003 - root - INFO - step: 8025 loss: 2.4991 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2042 +[titan] 2025-10-05 03:30:25,003 - root - INFO - lr: 4.6007e-05 gnorm: 1.17 [ 4:56:16<19:40:27] +[titan] 2025-10-05 03:30:35,840 - root - INFO - step: 8030 loss: 2.4390 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1512 +[titan] 2025-10-05 03:30:35,841 - root - INFO - lr: 4.6001e-05 gnorm: 1.18 [ 4:56:26<19:40:15] +[titan] 2025-10-05 03:30:46,678 - root - INFO - step: 8035 loss: 2.5127 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:30:46,678 - root - INFO - lr: 4.5996e-05 gnorm: 1.21 [ 4:56:37<19:40:03] +[titan] 2025-10-05 03:30:57,494 - root - INFO - step: 8040 loss: 2.4745 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1848 +[titan] 2025-10-05 03:30:57,495 - root - INFO - lr: 4.5991e-05 gnorm: 1.17 [ 4:56:48<19:39:51] +[titan] 2025-10-05 03:31:08,359 - root - INFO - step: 8045 loss: 2.5034 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2082 +[titan] 2025-10-05 03:31:08,360 - root - INFO - lr: 4.5986e-05 gnorm: 1.19 [ 4:56:59<19:39:39] +[titan] 2025-10-05 03:31:17,027 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:31:19,196 - root - INFO - step: 8050 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1741 +[titan] 2025-10-05 03:31:19,196 - root - INFO - lr: 4.5981e-05 gnorm: 1.19 [ 4:57:10<19:39:27] +[titan] 2025-10-05 03:31:30,047 - root - INFO - step: 8055 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2206 +[titan] 2025-10-05 03:31:30,047 - root - INFO - lr: 4.5976e-05 gnorm: 1.16 [ 4:57:21<19:39:15] +[titan] 2025-10-05 03:31:40,901 - root - INFO - step: 8060 loss: 2.4474 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 03:31:40,901 - root - INFO - lr: 4.5971e-05 gnorm: 1.14 [ 4:57:31<19:39:03] +[titan] 2025-10-05 03:31:51,725 - root - INFO - step: 8065 loss: 2.5411 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2406 +[titan] 2025-10-05 03:31:51,725 - root - INFO - lr: 4.5966e-05 gnorm: 1.17 [ 4:57:42<19:38:51] +[titan] 2025-10-05 03:32:02,621 - root - INFO - step: 8070 loss: 2.4864 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1942 +[titan] 2025-10-05 03:32:02,621 - root - INFO - lr: 4.5961e-05 gnorm: 1.20 [ 4:57:53<19:38:39] +[titan] 2025-10-05 03:32:13,441 - root - INFO - step: 8075 loss: 2.5540 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 03:32:13,441 - root - INFO - lr: 4.5956e-05 gnorm: 1.17 [ 4:58:04<19:38:27] +[titan] 2025-10-05 03:32:24,287 - root - INFO - step: 8080 loss: 2.4398 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1536 +[titan] 2025-10-05 03:32:24,287 - root - INFO - lr: 4.5951e-05 gnorm: 1.14 [ 4:58:15<19:38:15] +[titan] 2025-10-05 03:32:35,118 - root - INFO - step: 8085 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2356 +[titan] 2025-10-05 03:32:35,118 - root - INFO - lr: 4.5945e-05 gnorm: 1.21 [ 4:58:26<19:38:03] +[titan] 2025-10-05 03:32:45,958 - root - INFO - step: 8090 loss: 2.5225 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2257 +[titan] 2025-10-05 03:32:45,959 - root - INFO - lr: 4.5940e-05 gnorm: 1.12 [ 4:58:37<19:37:51] +[titan] 2025-10-05 03:32:56,823 - root - INFO - step: 8095 loss: 2.5506 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2506 +[titan] 2025-10-05 03:32:56,824 - root - INFO - lr: 4.5935e-05 gnorm: 1.21 [ 4:58:47<19:37:39] +[titan] 2025-10-05 03:33:05,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:33:07,719 - root - INFO - step: 8100 loss: 2.5049 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 03:33:07,719 - root - INFO - lr: 4.5930e-05 gnorm: 1.20 [ 4:58:58<19:37:27] +[titan] 2025-10-05 03:33:18,615 - root - INFO - step: 8105 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:33:18,615 - root - INFO - lr: 4.5925e-05 gnorm: 1.12 [ 4:59:09<19:37:16] +[titan] 2025-10-05 03:33:29,481 - root - INFO - step: 8110 loss: 2.4795 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 03:33:29,481 - root - INFO - lr: 4.5920e-05 gnorm: 1.16 [ 4:59:20<19:37:04] +[titan] 2025-10-05 03:33:40,332 - root - INFO - step: 8115 loss: 2.4748 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1833 +[titan] 2025-10-05 03:33:40,332 - root - INFO - lr: 4.5915e-05 gnorm: 1.18 [ 4:59:31<19:36:52] +[titan] 2025-10-05 03:33:51,164 - root - INFO - step: 8120 loss: 2.5292 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2311 +[titan] 2025-10-05 03:33:51,164 - root - INFO - lr: 4.5910e-05 gnorm: 1.19 [ 4:59:42<19:36:40] +[titan] 2025-10-05 03:34:02,020 - root - INFO - step: 8125 loss: 2.4881 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:34:02,020 - root - INFO - lr: 4.5904e-05 gnorm: 1.21 [ 4:59:53<19:36:28] +[titan] 2025-10-05 03:34:12,891 - root - INFO - step: 8130 loss: 2.5727 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:34:12,891 - root - INFO - lr: 4.5899e-05 gnorm: 1.22 [ 5:00:03<19:36:16] +[titan] 2025-10-05 03:34:23,761 - root - INFO - step: 8135 loss: 2.4550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1663 +[titan] 2025-10-05 03:34:23,761 - root - INFO - lr: 4.5894e-05 gnorm: 1.21 [ 5:00:14<19:36:04] +[titan] 2025-10-05 03:34:34,624 - root - INFO - step: 8140 loss: 2.4669 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:34:34,624 - root - INFO - lr: 4.5889e-05 gnorm: 1.16 [ 5:00:25<19:35:52] +[titan] 2025-10-05 03:34:45,506 - root - INFO - step: 8145 loss: 2.5656 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:34:45,506 - root - INFO - lr: 4.5884e-05 gnorm: 1.18 [ 5:00:36<19:35:40] +[titan] 2025-10-05 03:34:54,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:34:56,340 - root - INFO - step: 8150 loss: 2.4846 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1926 +[titan] 2025-10-05 03:34:56,340 - root - INFO - lr: 4.5879e-05 gnorm: 1.16 [ 5:00:47<19:35:28] +[titan] 2025-10-05 03:35:07,237 - root - INFO - step: 8155 loss: 2.5131 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2164 +[titan] 2025-10-05 03:35:07,237 - root - INFO - lr: 4.5874e-05 gnorm: 1.17 [ 5:00:58<19:35:17] +[titan] 2025-10-05 03:35:18,098 - root - INFO - step: 8160 loss: 2.6082 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3005 +[titan] 2025-10-05 03:35:18,098 - root - INFO - lr: 4.5868e-05 gnorm: 1.18 [ 5:01:09<19:35:05] +[titan] 2025-10-05 03:35:28,978 - root - INFO - step: 8165 loss: 2.5372 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2397 +[titan] 2025-10-05 03:35:28,978 - root - INFO - lr: 4.5863e-05 gnorm: 1.17 [ 5:01:20<19:34:53] +[titan] 2025-10-05 03:35:39,844 - root - INFO - step: 8170 loss: 2.4152 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 03:35:39,844 - root - INFO - lr: 4.5858e-05 gnorm: 1.18 [ 5:01:30<19:34:41] +[titan] 2025-10-05 03:35:50,781 - root - INFO - step: 8175 loss: 2.5578 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3013 global_avg_mtp_loss: 2.2565 +[titan] 2025-10-05 03:35:50,781 - root - INFO - lr: 4.5853e-05 gnorm: 1.27 [ 5:01:41<19:34:29] +[titan] 2025-10-05 03:36:01,663 - root - INFO - step: 8180 loss: 2.4462 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1586 +[titan] 2025-10-05 03:36:01,663 - root - INFO - lr: 4.5848e-05 gnorm: 1.13 [ 5:01:52<19:34:18] +[titan] 2025-10-05 03:36:12,582 - root - INFO - step: 8185 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:36:12,582 - root - INFO - lr: 4.5843e-05 gnorm: 1.20 [ 5:02:03<19:34:06] +[titan] 2025-10-05 03:36:23,548 - root - INFO - step: 8190 loss: 2.4035 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1217 +[titan] 2025-10-05 03:36:23,549 - root - INFO - lr: 4.5837e-05 gnorm: 1.16 [ 5:02:14<19:33:54] +[titan] 2025-10-05 03:36:28,079 - root - INFO - Dumping profiler traces at step 8192 +[titan] 2025-10-05 03:36:28,117 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:36:34,646 - root - INFO - step: 8195 loss: 2.4867 memory: 118.84GiB(85.28%) tps: 29,528 tflops: 409.66 mfu: 41.42% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 03:36:34,646 - root - INFO - lr: 4.5832e-05 gnorm: 1.16 [ 5:02:25<19:33:43] +[titan] 2025-10-05 03:36:43,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:36:45,533 - root - INFO - step: 8200 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2809 +[titan] 2025-10-05 03:36:45,533 - root - INFO - lr: 4.5827e-05 gnorm: 1.15 [ 5:02:36<19:33:32] +[titan] 2025-10-05 03:36:56,421 - root - INFO - step: 8205 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1517 +[titan] 2025-10-05 03:36:56,421 - root - INFO - lr: 4.5822e-05 gnorm: 1.15 [ 5:02:47<19:33:20] +[titan] 2025-10-05 03:37:07,262 - root - INFO - step: 8210 loss: 2.4422 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2866 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:37:07,262 - root - INFO - lr: 4.5817e-05 gnorm: 1.16 [ 5:02:58<19:33:08] +[titan] 2025-10-05 03:37:18,124 - root - INFO - step: 8215 loss: 2.5901 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3062 global_avg_mtp_loss: 2.2840 +[titan] 2025-10-05 03:37:18,124 - root - INFO - lr: 4.5812e-05 gnorm: 1.23 [ 5:03:09<19:32:56] +[titan] 2025-10-05 03:37:29,001 - root - INFO - step: 8220 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2582 +[titan] 2025-10-05 03:37:29,001 - root - INFO - lr: 4.5806e-05 gnorm: 1.20 [ 5:03:20<19:32:44] +[titan] 2025-10-05 03:37:39,844 - root - INFO - step: 8225 loss: 2.4659 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 03:37:39,845 - root - INFO - lr: 4.5801e-05 gnorm: 1.23 [ 5:03:30<19:32:32] +[titan] 2025-10-05 03:37:50,743 - root - INFO - step: 8230 loss: 2.5410 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 03:37:50,743 - root - INFO - lr: 4.5796e-05 gnorm: 1.19 [ 5:03:41<19:32:21] +[titan] 2025-10-05 03:38:01,585 - root - INFO - step: 8235 loss: 2.5291 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2326 +[titan] 2025-10-05 03:38:01,585 - root - INFO - lr: 4.5791e-05 gnorm: 1.15 [ 5:03:52<19:32:09] +[titan] 2025-10-05 03:38:12,474 - root - INFO - step: 8240 loss: 2.5137 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:38:12,474 - root - INFO - lr: 4.5786e-05 gnorm: 1.17 [ 5:04:03<19:31:57] +[titan] 2025-10-05 03:38:23,335 - root - INFO - step: 8245 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:38:23,335 - root - INFO - lr: 4.5780e-05 gnorm: 1.17 [ 5:04:14<19:31:45] +[titan] 2025-10-05 03:38:32,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:38:34,223 - root - INFO - step: 8250 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2131 +[titan] 2025-10-05 03:38:34,223 - root - INFO - lr: 4.5775e-05 gnorm: 1.18 [ 5:04:25<19:31:33] +[titan] 2025-10-05 03:38:45,088 - root - INFO - step: 8255 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2198 +[titan] 2025-10-05 03:38:45,088 - root - INFO - lr: 4.5770e-05 gnorm: 1.20 [ 5:04:36<19:31:21] +[titan] 2025-10-05 03:38:55,962 - root - INFO - step: 8260 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1524 +[titan] 2025-10-05 03:38:55,962 - root - INFO - lr: 4.5765e-05 gnorm: 1.19 [ 5:04:47<19:31:09] +[titan] 2025-10-05 03:39:06,818 - root - INFO - step: 8265 loss: 2.6017 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2949 +[titan] 2025-10-05 03:39:06,818 - root - INFO - lr: 4.5760e-05 gnorm: 1.23 [ 5:04:57<19:30:58] +[titan] 2025-10-05 03:39:17,707 - root - INFO - step: 8270 loss: 2.4450 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1572 +[titan] 2025-10-05 03:39:17,707 - root - INFO - lr: 4.5754e-05 gnorm: 1.18 [ 5:05:08<19:30:46] +[titan] 2025-10-05 03:39:28,574 - root - INFO - step: 8275 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1559 +[titan] 2025-10-05 03:39:28,574 - root - INFO - lr: 4.5749e-05 gnorm: 1.20 [ 5:05:19<19:30:34] +[titan] 2025-10-05 03:39:39,438 - root - INFO - step: 8280 loss: 2.4782 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2902 global_avg_mtp_loss: 2.1880 +[titan] 2025-10-05 03:39:39,438 - root - INFO - lr: 4.5744e-05 gnorm: 1.20 [ 5:05:30<19:30:22] +[titan] 2025-10-05 03:39:50,344 - root - INFO - step: 8285 loss: 2.4818 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:39:50,345 - root - INFO - lr: 4.5739e-05 gnorm: 1.16 [ 5:05:41<19:30:10] +[titan] 2025-10-05 03:40:01,252 - root - INFO - step: 8290 loss: 2.4954 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2009 +[titan] 2025-10-05 03:40:01,252 - root - INFO - lr: 4.5733e-05 gnorm: 1.16 [ 5:05:52<19:29:59] +[titan] 2025-10-05 03:40:12,143 - root - INFO - step: 8295 loss: 2.5302 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2317 +[titan] 2025-10-05 03:40:12,143 - root - INFO - lr: 4.5728e-05 gnorm: 1.18 [ 5:06:03<19:29:47] +[titan] 2025-10-05 03:40:20,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:40:23,034 - root - INFO - step: 8300 loss: 2.4874 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:40:23,034 - root - INFO - lr: 4.5723e-05 gnorm: 1.19 [ 5:06:14<19:29:35] +[titan] 2025-10-05 03:40:33,937 - root - INFO - step: 8305 loss: 2.5831 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2800 +[titan] 2025-10-05 03:40:33,938 - root - INFO - lr: 4.5718e-05 gnorm: 1.17 [ 5:06:24<19:29:24] +[titan] 2025-10-05 03:40:44,825 - root - INFO - step: 8310 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2514 +[titan] 2025-10-05 03:40:44,825 - root - INFO - lr: 4.5713e-05 gnorm: 1.17 [ 5:06:35<19:29:12] +[titan] 2025-10-05 03:40:55,729 - root - INFO - step: 8315 loss: 2.5111 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:40:55,729 - root - INFO - lr: 4.5707e-05 gnorm: 1.14 [ 5:06:46<19:29:00] +[titan] 2025-10-05 03:41:06,596 - root - INFO - step: 8320 loss: 2.5003 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2067 +[titan] 2025-10-05 03:41:06,596 - root - INFO - lr: 4.5702e-05 gnorm: 1.19 [ 5:06:57<19:28:48] +[titan] 2025-10-05 03:41:17,525 - root - INFO - step: 8325 loss: 2.4974 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 03:41:17,525 - root - INFO - lr: 4.5697e-05 gnorm: 1.26 [ 5:07:08<19:28:37] +[titan] 2025-10-05 03:41:28,416 - root - INFO - step: 8330 loss: 2.4791 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1878 +[titan] 2025-10-05 03:41:28,416 - root - INFO - lr: 4.5692e-05 gnorm: 1.19 [ 5:07:19<19:28:25] +[titan] 2025-10-05 03:41:39,305 - root - INFO - step: 8335 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:41:39,305 - root - INFO - lr: 4.5686e-05 gnorm: 1.25 [ 5:07:30<19:28:13] +[titan] 2025-10-05 03:41:50,197 - root - INFO - step: 8340 loss: 2.4762 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:41:50,197 - root - INFO - lr: 4.5681e-05 gnorm: 1.22 [ 5:07:41<19:28:01] +[titan] 2025-10-05 03:42:01,087 - root - INFO - step: 8345 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:42:01,087 - root - INFO - lr: 4.5676e-05 gnorm: 1.33 [ 5:07:52<19:27:50] +[titan] 2025-10-05 03:42:09,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:42:11,958 - root - INFO - step: 8350 loss: 2.5178 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2229 +[titan] 2025-10-05 03:42:11,958 - root - INFO - lr: 4.5671e-05 gnorm: 1.20 [ 5:08:03<19:27:38] +[titan] 2025-10-05 03:42:22,859 - root - INFO - step: 8355 loss: 2.5012 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2065 +[titan] 2025-10-05 03:42:22,859 - root - INFO - lr: 4.5665e-05 gnorm: 1.16 [ 5:08:13<19:27:26] +[titan] 2025-10-05 03:42:33,723 - root - INFO - step: 8360 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2087 +[titan] 2025-10-05 03:42:33,724 - root - INFO - lr: 4.5660e-05 gnorm: 1.21 [ 5:08:24<19:27:14] +[titan] 2025-10-05 03:42:44,605 - root - INFO - step: 8365 loss: 2.4169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1341 +[titan] 2025-10-05 03:42:44,605 - root - INFO - lr: 4.5655e-05 gnorm: 1.27 [ 5:08:35<19:27:03] +[titan] 2025-10-05 03:42:55,502 - root - INFO - step: 8370 loss: 2.4654 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:42:55,503 - root - INFO - lr: 4.5649e-05 gnorm: 1.13 [ 5:08:46<19:26:51] +[titan] 2025-10-05 03:43:06,377 - root - INFO - step: 8375 loss: 2.4547 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1658 +[titan] 2025-10-05 03:43:06,377 - root - INFO - lr: 4.5644e-05 gnorm: 1.15 [ 5:08:57<19:26:39] +[titan] 2025-10-05 03:43:17,279 - root - INFO - step: 8380 loss: 2.5065 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2118 +[titan] 2025-10-05 03:43:17,279 - root - INFO - lr: 4.5639e-05 gnorm: 1.18 [ 5:09:08<19:26:27] +[titan] 2025-10-05 03:43:28,170 - root - INFO - step: 8385 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.1973 +[titan] 2025-10-05 03:43:28,171 - root - INFO - lr: 4.5634e-05 gnorm: 1.19 [ 5:09:19<19:26:16] +[titan] 2025-10-05 03:43:39,058 - root - INFO - step: 8390 loss: 2.3818 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 03:43:39,058 - root - INFO - lr: 4.5628e-05 gnorm: 1.18 [ 5:09:30<19:26:04] +[titan] 2025-10-05 03:43:49,941 - root - INFO - step: 8395 loss: 2.4979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2044 +[titan] 2025-10-05 03:43:49,941 - root - INFO - lr: 4.5623e-05 gnorm: 1.24 [ 5:09:40<19:25:52] +[titan] 2025-10-05 03:43:58,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:44:00,835 - root - INFO - step: 8400 loss: 2.4609 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1710 +[titan] 2025-10-05 03:44:00,835 - root - INFO - lr: 4.5618e-05 gnorm: 1.21 [ 5:09:51<19:25:40] +[titan] 2025-10-05 03:44:11,708 - root - INFO - step: 8405 loss: 2.4714 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1804 +[titan] 2025-10-05 03:44:11,708 - root - INFO - lr: 4.5612e-05 gnorm: 1.18 [ 5:10:02<19:25:29] +[titan] 2025-10-05 03:44:22,628 - root - INFO - step: 8410 loss: 2.4894 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1963 +[titan] 2025-10-05 03:44:22,628 - root - INFO - lr: 4.5607e-05 gnorm: 1.17 [ 5:10:13<19:25:17] +[titan] 2025-10-05 03:44:33,498 - root - INFO - step: 8415 loss: 2.4601 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1709 +[titan] 2025-10-05 03:44:33,498 - root - INFO - lr: 4.5602e-05 gnorm: 1.15 [ 5:10:24<19:25:05] +[titan] 2025-10-05 03:44:44,372 - root - INFO - step: 8420 loss: 2.4695 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1796 +[titan] 2025-10-05 03:44:44,372 - root - INFO - lr: 4.5597e-05 gnorm: 1.21 [ 5:10:35<19:24:53] +[titan] 2025-10-05 03:44:55,241 - root - INFO - step: 8425 loss: 2.6043 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.2890 +[titan] 2025-10-05 03:44:55,241 - root - INFO - lr: 4.5591e-05 gnorm: 1.22 [ 5:10:46<19:24:42] +[titan] 2025-10-05 03:45:06,108 - root - INFO - step: 8430 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1610 +[titan] 2025-10-05 03:45:06,108 - root - INFO - lr: 4.5586e-05 gnorm: 1.22 [ 5:10:57<19:24:30] +[titan] 2025-10-05 03:45:17,033 - root - INFO - step: 8435 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1622 +[titan] 2025-10-05 03:45:17,033 - root - INFO - lr: 4.5581e-05 gnorm: 1.17 [ 5:11:08<19:24:18] +[titan] 2025-10-05 03:45:27,906 - root - INFO - step: 8440 loss: 2.4384 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1521 +[titan] 2025-10-05 03:45:27,906 - root - INFO - lr: 4.5575e-05 gnorm: 1.18 [ 5:11:18<19:24:06] +[titan] 2025-10-05 03:45:38,796 - root - INFO - step: 8445 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2251 +[titan] 2025-10-05 03:45:38,796 - root - INFO - lr: 4.5570e-05 gnorm: 1.18 [ 5:11:29<19:23:55] +[titan] 2025-10-05 03:45:47,504 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:45:49,701 - root - INFO - step: 8450 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1765 +[titan] 2025-10-05 03:45:49,701 - root - INFO - lr: 4.5565e-05 gnorm: 1.15 [ 5:11:40<19:23:43] +[titan] 2025-10-05 03:46:00,576 - root - INFO - step: 8455 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1407 +[titan] 2025-10-05 03:46:00,576 - root - INFO - lr: 4.5559e-05 gnorm: 1.16 [ 5:11:51<19:23:31] +[titan] 2025-10-05 03:46:11,464 - root - INFO - step: 8460 loss: 2.4581 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1695 +[titan] 2025-10-05 03:46:11,465 - root - INFO - lr: 4.5554e-05 gnorm: 1.18 [ 5:12:02<19:23:19] +[titan] 2025-10-05 03:46:22,405 - root - INFO - step: 8465 loss: 2.4681 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2912 global_avg_mtp_loss: 2.1769 +[titan] 2025-10-05 03:46:22,406 - root - INFO - lr: 4.5549e-05 gnorm: 1.26 [ 5:12:13<19:23:08] +[titan] 2025-10-05 03:46:33,303 - root - INFO - step: 8470 loss: 2.4812 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:46:33,303 - root - INFO - lr: 4.5543e-05 gnorm: 1.18 [ 5:12:24<19:22:56] +[titan] 2025-10-05 03:46:44,215 - root - INFO - step: 8475 loss: 2.4456 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:46:44,215 - root - INFO - lr: 4.5538e-05 gnorm: 1.19 [ 5:12:35<19:22:45] +[titan] 2025-10-05 03:46:55,102 - root - INFO - step: 8480 loss: 2.5134 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2167 +[titan] 2025-10-05 03:46:55,103 - root - INFO - lr: 4.5533e-05 gnorm: 1.22 [ 5:12:46<19:22:33] +[titan] 2025-10-05 03:47:05,998 - root - INFO - step: 8485 loss: 2.4337 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 03:47:05,998 - root - INFO - lr: 4.5527e-05 gnorm: 1.16 [ 5:12:57<19:22:21] +[titan] 2025-10-05 03:47:16,904 - root - INFO - step: 8490 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1235 +[titan] 2025-10-05 03:47:16,904 - root - INFO - lr: 4.5522e-05 gnorm: 1.17 [ 5:13:07<19:22:10] +[titan] 2025-10-05 03:47:27,782 - root - INFO - step: 8495 loss: 2.4698 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1791 +[titan] 2025-10-05 03:47:27,783 - root - INFO - lr: 4.5517e-05 gnorm: 1.17 [ 5:13:18<19:21:58] +[titan] 2025-10-05 03:47:36,458 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:47:38,638 - root - INFO - step: 8500 loss: 2.3537 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0773 +[titan] 2025-10-05 03:47:38,638 - root - INFO - lr: 4.5511e-05 gnorm: 1.20 [ 5:13:29<19:21:46] +[titan] 2025-10-05 03:47:49,538 - root - INFO - step: 8505 loss: 2.5368 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2395 +[titan] 2025-10-05 03:47:49,538 - root - INFO - lr: 4.5506e-05 gnorm: 1.16 [ 5:13:40<19:21:34] +[titan] 2025-10-05 03:48:00,412 - root - INFO - step: 8510 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.1961 +[titan] 2025-10-05 03:48:00,413 - root - INFO - lr: 4.5501e-05 gnorm: 1.19 [ 5:13:51<19:21:22] +[titan] 2025-10-05 03:48:11,277 - root - INFO - step: 8515 loss: 2.4264 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:48:11,277 - root - INFO - lr: 4.5495e-05 gnorm: 1.17 [ 5:14:02<19:21:11] +[titan] 2025-10-05 03:48:22,187 - root - INFO - step: 8520 loss: 2.4968 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2043 +[titan] 2025-10-05 03:48:22,188 - root - INFO - lr: 4.5490e-05 gnorm: 1.24 [ 5:14:13<19:20:59] +[titan] 2025-10-05 03:48:33,044 - root - INFO - step: 8525 loss: 2.5002 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2934 global_avg_mtp_loss: 2.2068 +[titan] 2025-10-05 03:48:33,044 - root - INFO - lr: 4.5485e-05 gnorm: 1.16 [ 5:14:24<19:20:47] +[titan] 2025-10-05 03:48:43,906 - root - INFO - step: 8530 loss: 2.5203 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2249 +[titan] 2025-10-05 03:48:43,906 - root - INFO - lr: 4.5479e-05 gnorm: 1.18 [ 5:14:34<19:20:35] +[titan] 2025-10-05 03:48:54,778 - root - INFO - step: 8535 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:48:54,778 - root - INFO - lr: 4.5474e-05 gnorm: 1.23 [ 5:14:45<19:20:24] +[titan] 2025-10-05 03:49:05,664 - root - INFO - step: 8540 loss: 2.5027 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2084 +[titan] 2025-10-05 03:49:05,664 - root - INFO - lr: 4.5468e-05 gnorm: 1.19 [ 5:14:56<19:20:12] +[titan] 2025-10-05 03:49:16,537 - root - INFO - step: 8545 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2823 global_avg_mtp_loss: 2.1266 +[titan] 2025-10-05 03:49:16,537 - root - INFO - lr: 4.5463e-05 gnorm: 1.19 [ 5:15:07<19:20:00] +[titan] 2025-10-05 03:49:25,283 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:49:27,468 - root - INFO - step: 8550 loss: 2.4984 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2049 +[titan] 2025-10-05 03:49:27,468 - root - INFO - lr: 4.5458e-05 gnorm: 1.21 [ 5:15:18<19:19:49] +[titan] 2025-10-05 03:49:38,338 - root - INFO - step: 8555 loss: 2.4539 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1653 +[titan] 2025-10-05 03:49:38,338 - root - INFO - lr: 4.5452e-05 gnorm: 1.20 [ 5:15:29<19:19:37] +[titan] 2025-10-05 03:49:49,202 - root - INFO - step: 8560 loss: 2.4721 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:49:49,202 - root - INFO - lr: 4.5447e-05 gnorm: 1.17 [ 5:15:40<19:19:25] +[titan] 2025-10-05 03:50:00,074 - root - INFO - step: 8565 loss: 2.5405 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 03:50:00,074 - root - INFO - lr: 4.5442e-05 gnorm: 1.15 [ 5:15:51<19:19:13] +[titan] 2025-10-05 03:50:10,978 - root - INFO - step: 8570 loss: 2.4470 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 03:50:10,979 - root - INFO - lr: 4.5436e-05 gnorm: 1.22 [ 5:16:02<19:19:02] +[titan] 2025-10-05 03:50:21,887 - root - INFO - step: 8575 loss: 2.4633 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1735 +[titan] 2025-10-05 03:50:21,887 - root - INFO - lr: 4.5431e-05 gnorm: 1.21 [ 5:16:12<19:18:50] +[titan] 2025-10-05 03:50:32,776 - root - INFO - step: 8580 loss: 2.4711 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1810 +[titan] 2025-10-05 03:50:32,776 - root - INFO - lr: 4.5425e-05 gnorm: 1.18 [ 5:16:23<19:18:38] +[titan] 2025-10-05 03:50:43,667 - root - INFO - step: 8585 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:50:43,667 - root - INFO - lr: 4.5420e-05 gnorm: 1.22 [ 5:16:34<19:18:27] +[titan] 2025-10-05 03:50:54,557 - root - INFO - step: 8590 loss: 2.5385 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2399 +[titan] 2025-10-05 03:50:54,558 - root - INFO - lr: 4.5415e-05 gnorm: 1.18 [ 5:16:45<19:18:15] +[titan] 2025-10-05 03:51:05,424 - root - INFO - step: 8595 loss: 2.4767 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 03:51:05,425 - root - INFO - lr: 4.5409e-05 gnorm: 1.16 [ 5:16:56<19:18:03] +[titan] 2025-10-05 03:51:14,103 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:51:16,290 - root - INFO - step: 8600 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:51:16,290 - root - INFO - lr: 4.5404e-05 gnorm: 1.14 [ 5:17:07<19:17:51] +[titan] 2025-10-05 03:51:27,250 - root - INFO - step: 8605 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2352 +[titan] 2025-10-05 03:51:27,251 - root - INFO - lr: 4.5398e-05 gnorm: 1.15 [ 5:17:18<19:17:40] +[titan] 2025-10-05 03:51:38,134 - root - INFO - step: 8610 loss: 2.4373 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1515 +[titan] 2025-10-05 03:51:38,134 - root - INFO - lr: 4.5393e-05 gnorm: 1.14 [ 5:17:29<19:17:28] +[titan] 2025-10-05 03:51:49,035 - root - INFO - step: 8615 loss: 2.5154 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2980 global_avg_mtp_loss: 2.2174 +[titan] 2025-10-05 03:51:49,036 - root - INFO - lr: 4.5388e-05 gnorm: 1.21 [ 5:17:40<19:17:17] +[titan] 2025-10-05 03:51:59,908 - root - INFO - step: 8620 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1329 +[titan] 2025-10-05 03:51:59,908 - root - INFO - lr: 4.5382e-05 gnorm: 1.19 [ 5:17:50<19:17:05] +[titan] 2025-10-05 03:52:10,800 - root - INFO - step: 8625 loss: 2.4772 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:52:10,800 - root - INFO - lr: 4.5377e-05 gnorm: 1.19 [ 5:18:01<19:16:53] +[titan] 2025-10-05 03:52:21,724 - root - INFO - step: 8630 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1254 +[titan] 2025-10-05 03:52:21,724 - root - INFO - lr: 4.5371e-05 gnorm: 1.17 [ 5:18:12<19:16:42] +[titan] 2025-10-05 03:52:32,629 - root - INFO - step: 8635 loss: 2.4666 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 03:52:32,629 - root - INFO - lr: 4.5366e-05 gnorm: 1.18 [ 5:18:23<19:16:30] +[titan] 2025-10-05 03:52:43,516 - root - INFO - step: 8640 loss: 2.5035 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:52:43,516 - root - INFO - lr: 4.5360e-05 gnorm: 1.16 [ 5:18:34<19:16:18] +[titan] 2025-10-05 03:52:54,413 - root - INFO - step: 8645 loss: 2.4079 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1260 +[titan] 2025-10-05 03:52:54,414 - root - INFO - lr: 4.5355e-05 gnorm: 1.18 [ 5:18:45<19:16:07] +[titan] 2025-10-05 03:53:03,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:53:05,278 - root - INFO - step: 8650 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:53:05,278 - root - INFO - lr: 4.5350e-05 gnorm: 1.17 [ 5:18:56<19:15:55] +[titan] 2025-10-05 03:53:16,166 - root - INFO - step: 8655 loss: 2.4949 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2017 +[titan] 2025-10-05 03:53:16,166 - root - INFO - lr: 4.5344e-05 gnorm: 1.17 [ 5:19:07<19:15:43] +[titan] 2025-10-05 03:53:27,098 - root - INFO - step: 8660 loss: 2.4590 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1706 +[titan] 2025-10-05 03:53:27,098 - root - INFO - lr: 4.5339e-05 gnorm: 1.20 [ 5:19:18<19:15:32] +[titan] 2025-10-05 03:53:38,012 - root - INFO - step: 8665 loss: 2.5151 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2199 +[titan] 2025-10-05 03:53:38,012 - root - INFO - lr: 4.5333e-05 gnorm: 1.19 [ 5:19:29<19:15:20] +[titan] 2025-10-05 03:53:48,872 - root - INFO - step: 8670 loss: 2.4344 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 03:53:48,872 - root - INFO - lr: 4.5328e-05 gnorm: 1.15 [ 5:19:39<19:15:08] +[titan] 2025-10-05 03:53:59,744 - root - INFO - step: 8675 loss: 2.4632 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1742 +[titan] 2025-10-05 03:53:59,744 - root - INFO - lr: 4.5322e-05 gnorm: 1.17 [ 5:19:50<19:14:56] +[titan] 2025-10-05 03:54:10,610 - root - INFO - step: 8680 loss: 2.4556 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 03:54:10,611 - root - INFO - lr: 4.5317e-05 gnorm: 1.17 [ 5:20:01<19:14:45] +[titan] 2025-10-05 03:54:21,508 - root - INFO - step: 8685 loss: 2.4742 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1837 +[titan] 2025-10-05 03:54:21,508 - root - INFO - lr: 4.5311e-05 gnorm: 1.20 [ 5:20:12<19:14:33] +[titan] 2025-10-05 03:54:32,411 - root - INFO - step: 8690 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2972 global_avg_mtp_loss: 2.2331 +[titan] 2025-10-05 03:54:32,411 - root - INFO - lr: 4.5306e-05 gnorm: 1.22 [ 5:20:23<19:14:21] +[titan] 2025-10-05 03:54:43,289 - root - INFO - step: 8695 loss: 2.4873 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1952 +[titan] 2025-10-05 03:54:43,290 - root - INFO - lr: 4.5301e-05 gnorm: 1.21 [ 5:20:34<19:14:10] +[titan] 2025-10-05 03:54:52,023 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:54:54,213 - root - INFO - step: 8700 loss: 2.4737 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1831 +[titan] 2025-10-05 03:54:54,213 - root - INFO - lr: 4.5295e-05 gnorm: 1.19 [ 5:20:45<19:13:58] +[titan] 2025-10-05 03:55:03,159 - root - INFO - Dumping profiler traces at step 8704 +[titan] 2025-10-05 03:55:03,195 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:55:05,378 - root - INFO - step: 8705 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 29,348 tflops: 407.16 mfu: 41.17% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:55:05,378 - root - INFO - lr: 4.5290e-05 gnorm: 1.17 [ 5:20:56<19:13:47] +[titan] 2025-10-05 03:55:16,259 - root - INFO - step: 8710 loss: 2.3993 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1181 +[titan] 2025-10-05 03:55:16,259 - root - INFO - lr: 4.5284e-05 gnorm: 1.16 [ 5:21:07<19:13:36] +[titan] 2025-10-05 03:55:27,179 - root - INFO - step: 8715 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1461 +[titan] 2025-10-05 03:55:27,179 - root - INFO - lr: 4.5279e-05 gnorm: 1.17 [ 5:21:18<19:13:24] +[titan] 2025-10-05 03:55:38,073 - root - INFO - step: 8720 loss: 2.3963 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 03:55:38,073 - root - INFO - lr: 4.5273e-05 gnorm: 1.24 [ 5:21:29<19:13:12] +[titan] 2025-10-05 03:55:48,962 - root - INFO - step: 8725 loss: 2.4482 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 03:55:48,962 - root - INFO - lr: 4.5268e-05 gnorm: 1.19 [ 5:21:39<19:13:01] +[titan] 2025-10-05 03:55:59,898 - root - INFO - step: 8730 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:55:59,898 - root - INFO - lr: 4.5262e-05 gnorm: 1.18 [ 5:21:50<19:12:49] +[titan] 2025-10-05 03:56:10,791 - root - INFO - step: 8735 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:56:10,791 - root - INFO - lr: 4.5257e-05 gnorm: 1.13 [ 5:22:01<19:12:38] +[titan] 2025-10-05 03:56:21,690 - root - INFO - step: 8740 loss: 2.5138 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:56:21,690 - root - INFO - lr: 4.5251e-05 gnorm: 1.17 [ 5:22:12<19:12:26] +[titan] 2025-10-05 03:56:32,598 - root - INFO - step: 8745 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:56:32,598 - root - INFO - lr: 4.5246e-05 gnorm: 1.21 [ 5:22:23<19:12:14] +[titan] 2025-10-05 03:56:41,299 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:56:43,483 - root - INFO - step: 8750 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 03:56:43,483 - root - INFO - lr: 4.5240e-05 gnorm: 1.23 [ 5:22:34<19:12:03] +[titan] 2025-10-05 03:56:54,343 - root - INFO - step: 8755 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1820 +[titan] 2025-10-05 03:56:54,343 - root - INFO - lr: 4.5235e-05 gnorm: 1.20 [ 5:22:45<19:11:51] +[titan] 2025-10-05 03:57:05,209 - root - INFO - step: 8760 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:57:05,210 - root - INFO - lr: 4.5229e-05 gnorm: 1.14 [ 5:22:56<19:11:39] +[titan] 2025-10-05 03:57:16,152 - root - INFO - step: 8765 loss: 2.5128 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:57:16,153 - root - INFO - lr: 4.5224e-05 gnorm: 1.17 [ 5:23:07<19:11:28] +[titan] 2025-10-05 03:57:27,083 - root - INFO - step: 8770 loss: 2.4066 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 03:57:27,083 - root - INFO - lr: 4.5218e-05 gnorm: 1.11 [ 5:23:18<19:11:16] +[titan] 2025-10-05 03:57:37,931 - root - INFO - step: 8775 loss: 2.4260 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 03:57:37,932 - root - INFO - lr: 4.5213e-05 gnorm: 1.17 [ 5:23:28<19:11:04] +[titan] 2025-10-05 03:57:48,805 - root - INFO - step: 8780 loss: 2.4759 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1847 +[titan] 2025-10-05 03:57:48,805 - root - INFO - lr: 4.5207e-05 gnorm: 1.24 [ 5:23:39<19:10:53] +[titan] 2025-10-05 03:57:59,678 - root - INFO - step: 8785 loss: 2.4875 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:57:59,678 - root - INFO - lr: 4.5202e-05 gnorm: 1.16 [ 5:23:50<19:10:41] +[titan] 2025-10-05 03:58:10,559 - root - INFO - step: 8790 loss: 2.4424 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:58:10,559 - root - INFO - lr: 4.5196e-05 gnorm: 1.16 [ 5:24:01<19:10:29] +[titan] 2025-10-05 03:58:21,459 - root - INFO - step: 8795 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1293 +[titan] 2025-10-05 03:58:21,459 - root - INFO - lr: 4.5191e-05 gnorm: 1.13 [ 5:24:12<19:10:18] +[titan] 2025-10-05 03:58:30,179 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:58:32,360 - root - INFO - step: 8800 loss: 2.3926 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1119 +[titan] 2025-10-05 03:58:32,360 - root - INFO - lr: 4.5185e-05 gnorm: 1.16 [ 5:24:23<19:10:06] +[titan] 2025-10-05 03:58:43,220 - root - INFO - step: 8805 loss: 2.5057 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2110 +[titan] 2025-10-05 03:58:43,221 - root - INFO - lr: 4.5180e-05 gnorm: 1.16 [ 5:24:34<19:09:54] +[titan] 2025-10-05 03:58:54,092 - root - INFO - step: 8810 loss: 2.4643 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:58:54,092 - root - INFO - lr: 4.5174e-05 gnorm: 1.21 [ 5:24:45<19:09:42] +[titan] 2025-10-05 03:59:04,956 - root - INFO - step: 8815 loss: 2.5184 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2227 +[titan] 2025-10-05 03:59:04,956 - root - INFO - lr: 4.5169e-05 gnorm: 1.20 [ 5:24:55<19:09:31] +[titan] 2025-10-05 03:59:15,807 - root - INFO - step: 8820 loss: 2.3921 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 03:59:15,807 - root - INFO - lr: 4.5163e-05 gnorm: 1.12 [ 5:25:06<19:09:19] +[titan] 2025-10-05 03:59:26,817 - root - INFO - step: 8825 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.90 mfu: 41.75% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1453 +[titan] 2025-10-05 03:59:26,817 - root - INFO - lr: 4.5158e-05 gnorm: 1.14 [ 5:25:17<19:09:08] +[titan] 2025-10-05 03:59:37,700 - root - INFO - step: 8830 loss: 2.4161 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 03:59:37,700 - root - INFO - lr: 4.5152e-05 gnorm: 1.17 [ 5:25:28<19:08:56] +[titan] 2025-10-05 03:59:48,610 - root - INFO - step: 8835 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:59:48,610 - root - INFO - lr: 4.5147e-05 gnorm: 1.20 [ 5:25:39<19:08:44] +[titan] 2025-10-05 03:59:59,499 - root - INFO - step: 8840 loss: 2.4555 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 03:59:59,499 - root - INFO - lr: 4.5141e-05 gnorm: 1.16 [ 5:25:50<19:08:33] +[titan] 2025-10-05 04:00:10,375 - root - INFO - step: 8845 loss: 2.5058 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2957 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 04:00:10,376 - root - INFO - lr: 4.5136e-05 gnorm: 1.15 [ 5:26:01<19:08:21] +[titan] 2025-10-05 04:00:19,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:00:21,274 - root - INFO - step: 8850 loss: 2.4134 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:00:21,274 - root - INFO - lr: 4.5130e-05 gnorm: 1.16 [ 5:26:12<19:08:09] +[titan] 2025-10-05 04:00:32,174 - root - INFO - step: 8855 loss: 2.3939 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1130 +[titan] 2025-10-05 04:00:32,174 - root - INFO - lr: 4.5124e-05 gnorm: 1.14 [ 5:26:23<19:07:58] +[titan] 2025-10-05 04:00:43,105 - root - INFO - step: 8860 loss: 2.4901 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.1965 +[titan] 2025-10-05 04:00:43,105 - root - INFO - lr: 4.5119e-05 gnorm: 1.13 [ 5:26:34<19:07:46] +[titan] 2025-10-05 04:00:53,982 - root - INFO - step: 8865 loss: 2.4318 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1455 +[titan] 2025-10-05 04:00:53,982 - root - INFO - lr: 4.5113e-05 gnorm: 1.20 [ 5:26:44<19:07:35] +[titan] 2025-10-05 04:01:04,884 - root - INFO - step: 8870 loss: 2.4552 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 04:01:04,884 - root - INFO - lr: 4.5108e-05 gnorm: 1.17 [ 5:26:55<19:07:23] +[titan] 2025-10-05 04:01:15,755 - root - INFO - step: 8875 loss: 2.4361 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1505 +[titan] 2025-10-05 04:01:15,755 - root - INFO - lr: 4.5102e-05 gnorm: 1.11 [ 5:27:06<19:07:11] +[titan] 2025-10-05 04:01:26,620 - root - INFO - step: 8880 loss: 2.4652 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 04:01:26,621 - root - INFO - lr: 4.5097e-05 gnorm: 1.18 [ 5:27:17<19:07:00] +[titan] 2025-10-05 04:01:37,500 - root - INFO - step: 8885 loss: 2.4777 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1862 +[titan] 2025-10-05 04:01:37,500 - root - INFO - lr: 4.5091e-05 gnorm: 1.16 [ 5:27:28<19:06:48] +[titan] 2025-10-05 04:01:48,415 - root - INFO - step: 8890 loss: 2.4058 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:01:48,416 - root - INFO - lr: 4.5086e-05 gnorm: 1.17 [ 5:27:39<19:06:36] +[titan] 2025-10-05 04:01:59,279 - root - INFO - step: 8895 loss: 2.4655 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1759 +[titan] 2025-10-05 04:01:59,280 - root - INFO - lr: 4.5080e-05 gnorm: 1.19 [ 5:27:50<19:06:25] +[titan] 2025-10-05 04:02:07,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:02:10,126 - root - INFO - step: 8900 loss: 2.4494 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:02:10,126 - root - INFO - lr: 4.5074e-05 gnorm: 1.24 [ 5:28:01<19:06:13] +[titan] 2025-10-05 04:02:20,976 - root - INFO - step: 8905 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 04:02:20,976 - root - INFO - lr: 4.5069e-05 gnorm: 1.18 [ 5:28:11<19:06:01] +[titan] 2025-10-05 04:02:31,857 - root - INFO - step: 8910 loss: 2.4530 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1646 +[titan] 2025-10-05 04:02:31,858 - root - INFO - lr: 4.5063e-05 gnorm: 1.18 [ 5:28:22<19:05:49] +[titan] 2025-10-05 04:02:42,714 - root - INFO - step: 8915 loss: 2.4292 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:02:42,714 - root - INFO - lr: 4.5058e-05 gnorm: 1.18 [ 5:28:33<19:05:38] +[titan] 2025-10-05 04:02:53,586 - root - INFO - step: 8920 loss: 2.4665 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 04:02:53,586 - root - INFO - lr: 4.5052e-05 gnorm: 1.14 [ 5:28:44<19:05:26] +[titan] 2025-10-05 04:03:04,511 - root - INFO - step: 8925 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1484 +[titan] 2025-10-05 04:03:04,511 - root - INFO - lr: 4.5047e-05 gnorm: 1.20 [ 5:28:55<19:05:14] +[titan] 2025-10-05 04:03:15,417 - root - INFO - step: 8930 loss: 2.5325 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 04:03:15,417 - root - INFO - lr: 4.5041e-05 gnorm: 1.18 [ 5:29:06<19:05:03] +[titan] 2025-10-05 04:03:26,302 - root - INFO - step: 8935 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:03:26,303 - root - INFO - lr: 4.5035e-05 gnorm: 1.21 [ 5:29:17<19:04:51] +[titan] 2025-10-05 04:03:37,172 - root - INFO - step: 8940 loss: 2.6656 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3417 +[titan] 2025-10-05 04:03:37,172 - root - INFO - lr: 4.5030e-05 gnorm: 1.16 [ 5:29:28<19:04:39] +[titan] 2025-10-05 04:03:48,057 - root - INFO - step: 8945 loss: 2.4401 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1543 +[titan] 2025-10-05 04:03:48,057 - root - INFO - lr: 4.5024e-05 gnorm: 1.12 [ 5:29:39<19:04:28] +[titan] 2025-10-05 04:03:56,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:03:58,919 - root - INFO - step: 8950 loss: 2.4061 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1253 +[titan] 2025-10-05 04:03:58,919 - root - INFO - lr: 4.5019e-05 gnorm: 1.11 [ 5:29:49<19:04:16] +[titan] 2025-10-05 04:04:09,819 - root - INFO - step: 8955 loss: 2.4957 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 04:04:09,820 - root - INFO - lr: 4.5013e-05 gnorm: 1.12 [ 5:30:00<19:04:05] +[titan] 2025-10-05 04:04:20,693 - root - INFO - step: 8960 loss: 2.4047 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1227 +[titan] 2025-10-05 04:04:20,693 - root - INFO - lr: 4.5007e-05 gnorm: 1.15 [ 5:30:11<19:03:53] +[titan] 2025-10-05 04:04:31,580 - root - INFO - step: 8965 loss: 2.4637 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1740 +[titan] 2025-10-05 04:04:31,580 - root - INFO - lr: 4.5002e-05 gnorm: 1.15 [ 5:30:22<19:03:41] +[titan] 2025-10-05 04:04:42,434 - root - INFO - step: 8970 loss: 2.4642 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 04:04:42,434 - root - INFO - lr: 4.4996e-05 gnorm: 1.19 [ 5:30:33<19:03:29] +[titan] 2025-10-05 04:04:53,298 - root - INFO - step: 8975 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 04:04:53,298 - root - INFO - lr: 4.4991e-05 gnorm: 1.20 [ 5:30:44<19:03:18] +[titan] 2025-10-05 04:05:04,159 - root - INFO - step: 8980 loss: 2.4094 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1265 +[titan] 2025-10-05 04:05:04,159 - root - INFO - lr: 4.4985e-05 gnorm: 1.14 [ 5:30:55<19:03:06] +[titan] 2025-10-05 04:05:15,056 - root - INFO - step: 8985 loss: 2.4593 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1707 +[titan] 2025-10-05 04:05:15,057 - root - INFO - lr: 4.4979e-05 gnorm: 1.20 [ 5:31:06<19:02:54] +[titan] 2025-10-05 04:05:25,930 - root - INFO - step: 8990 loss: 2.3911 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 04:05:25,930 - root - INFO - lr: 4.4974e-05 gnorm: 1.15 [ 5:31:16<19:02:43] +[titan] 2025-10-05 04:05:36,797 - root - INFO - step: 8995 loss: 2.4428 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1566 +[titan] 2025-10-05 04:05:36,798 - root - INFO - lr: 4.4968e-05 gnorm: 1.17 [ 5:31:27<19:02:31] +[titan] 2025-10-05 04:05:45,469 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:05:47,651 - root - INFO - step: 9000 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0801 +[titan] 2025-10-05 04:05:47,652 - root - INFO - lr: 4.4962e-05 gnorm: 1.23 [ 5:31:38<19:02:19] +[titan] 2025-10-05 04:05:58,519 - root - INFO - step: 9005 loss: 2.4431 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1560 +[titan] 2025-10-05 04:05:58,519 - root - INFO - lr: 4.4957e-05 gnorm: 1.17 [ 5:31:49<19:02:08] +[titan] 2025-10-05 04:06:09,392 - root - INFO - step: 9010 loss: 2.4584 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1697 +[titan] 2025-10-05 04:06:09,392 - root - INFO - lr: 4.4951e-05 gnorm: 1.17 [ 5:32:00<19:01:56] +[titan] 2025-10-05 04:06:20,257 - root - INFO - step: 9015 loss: 2.4693 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1794 +[titan] 2025-10-05 04:06:20,257 - root - INFO - lr: 4.4946e-05 gnorm: 1.13 [ 5:32:11<19:01:44] +[titan] 2025-10-05 04:06:31,158 - root - INFO - step: 9020 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1487 +[titan] 2025-10-05 04:06:31,158 - root - INFO - lr: 4.4940e-05 gnorm: 1.15 [ 5:32:22<19:01:33] +[titan] 2025-10-05 04:06:42,018 - root - INFO - step: 9025 loss: 2.3968 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:06:42,018 - root - INFO - lr: 4.4934e-05 gnorm: 1.11 [ 5:32:32<19:01:21] +[titan] 2025-10-05 04:06:52,886 - root - INFO - step: 9030 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:06:52,886 - root - INFO - lr: 4.4929e-05 gnorm: 1.08 [ 5:32:43<19:01:09] +[titan] 2025-10-05 04:07:03,747 - root - INFO - step: 9035 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 04:07:03,747 - root - INFO - lr: 4.4923e-05 gnorm: 1.16 [ 5:32:54<19:00:57] +[titan] 2025-10-05 04:07:14,610 - root - INFO - step: 9040 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1859 +[titan] 2025-10-05 04:07:14,610 - root - INFO - lr: 4.4917e-05 gnorm: 1.17 [ 5:33:05<19:00:46] +[titan] 2025-10-05 04:07:25,476 - root - INFO - step: 9045 loss: 2.4520 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1641 +[titan] 2025-10-05 04:07:25,476 - root - INFO - lr: 4.4912e-05 gnorm: 1.19 [ 5:33:16<19:00:34] +[titan] 2025-10-05 04:07:34,201 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:07:36,379 - root - INFO - step: 9050 loss: 2.4771 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:07:36,379 - root - INFO - lr: 4.4906e-05 gnorm: 1.19 [ 5:33:27<19:00:22] +[titan] 2025-10-05 04:07:47,258 - root - INFO - step: 9055 loss: 2.4168 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1339 +[titan] 2025-10-05 04:07:47,258 - root - INFO - lr: 4.4900e-05 gnorm: 1.14 [ 5:33:38<19:00:11] +[titan] 2025-10-05 04:07:58,123 - root - INFO - step: 9060 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:07:58,123 - root - INFO - lr: 4.4895e-05 gnorm: 1.16 [ 5:33:49<18:59:59] +[titan] 2025-10-05 04:08:09,003 - root - INFO - step: 9065 loss: 2.4858 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2911 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:08:09,003 - root - INFO - lr: 4.4889e-05 gnorm: 1.18 [ 5:33:59<18:59:47] +[titan] 2025-10-05 04:08:19,858 - root - INFO - step: 9070 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 04:08:19,858 - root - INFO - lr: 4.4883e-05 gnorm: 1.18 [ 5:34:10<18:59:36] +[titan] 2025-10-05 04:08:30,739 - root - INFO - step: 9075 loss: 2.4338 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1483 +[titan] 2025-10-05 04:08:30,739 - root - INFO - lr: 4.4878e-05 gnorm: 1.16 [ 5:34:21<18:59:24] +[titan] 2025-10-05 04:08:41,605 - root - INFO - step: 9080 loss: 2.3786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 04:08:41,605 - root - INFO - lr: 4.4872e-05 gnorm: 1.24 [ 5:34:32<18:59:12] +[titan] 2025-10-05 04:08:52,482 - root - INFO - step: 9085 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2055 +[titan] 2025-10-05 04:08:52,482 - root - INFO - lr: 4.4866e-05 gnorm: 1.20 [ 5:34:43<18:59:01] +[titan] 2025-10-05 04:09:03,324 - root - INFO - step: 9090 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1628 +[titan] 2025-10-05 04:09:03,325 - root - INFO - lr: 4.4861e-05 gnorm: 1.24 [ 5:34:54<18:58:49] +[titan] 2025-10-05 04:09:14,169 - root - INFO - step: 9095 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1886 +[titan] 2025-10-05 04:09:14,169 - root - INFO - lr: 4.4855e-05 gnorm: 1.21 [ 5:35:05<18:58:37] +[titan] 2025-10-05 04:09:22,830 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:09:25,022 - root - INFO - step: 9100 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:09:25,022 - root - INFO - lr: 4.4849e-05 gnorm: 1.19 [ 5:35:15<18:58:25] +[titan] 2025-10-05 04:09:35,891 - root - INFO - step: 9105 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1300 +[titan] 2025-10-05 04:09:35,891 - root - INFO - lr: 4.4844e-05 gnorm: 1.18 [ 5:35:26<18:58:14] +[titan] 2025-10-05 04:09:46,754 - root - INFO - step: 9110 loss: 2.3843 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1029 +[titan] 2025-10-05 04:09:46,754 - root - INFO - lr: 4.4838e-05 gnorm: 1.28 [ 5:35:37<18:58:02] +[titan] 2025-10-05 04:09:57,624 - root - INFO - step: 9115 loss: 2.4036 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1228 +[titan] 2025-10-05 04:09:57,624 - root - INFO - lr: 4.4832e-05 gnorm: 1.19 [ 5:35:48<18:57:50] +[titan] 2025-10-05 04:10:08,470 - root - INFO - step: 9120 loss: 2.4158 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1325 +[titan] 2025-10-05 04:10:08,470 - root - INFO - lr: 4.4827e-05 gnorm: 1.14 [ 5:35:59<18:57:39] +[titan] 2025-10-05 04:10:19,323 - root - INFO - step: 9125 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 04:10:19,323 - root - INFO - lr: 4.4821e-05 gnorm: 1.19 [ 5:36:10<18:57:27] +[titan] 2025-10-05 04:10:30,178 - root - INFO - step: 9130 loss: 2.4437 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 04:10:30,178 - root - INFO - lr: 4.4815e-05 gnorm: 1.22 [ 5:36:21<18:57:15] +[titan] 2025-10-05 04:10:41,058 - root - INFO - step: 9135 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1518 +[titan] 2025-10-05 04:10:41,058 - root - INFO - lr: 4.4809e-05 gnorm: 1.15 [ 5:36:32<18:57:04] +[titan] 2025-10-05 04:10:51,913 - root - INFO - step: 9140 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:10:51,913 - root - INFO - lr: 4.4804e-05 gnorm: 1.14 [ 5:36:42<18:56:52] +[titan] 2025-10-05 04:11:02,801 - root - INFO - step: 9145 loss: 2.4160 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1331 +[titan] 2025-10-05 04:11:02,801 - root - INFO - lr: 4.4798e-05 gnorm: 1.18 [ 5:36:53<18:56:40] +[titan] 2025-10-05 04:11:11,463 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:11:13,643 - root - INFO - step: 9150 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1327 +[titan] 2025-10-05 04:11:13,643 - root - INFO - lr: 4.4792e-05 gnorm: 1.15 [ 5:37:04<18:56:28] +[titan] 2025-10-05 04:11:24,500 - root - INFO - step: 9155 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1789 +[titan] 2025-10-05 04:11:24,500 - root - INFO - lr: 4.4787e-05 gnorm: 1.16 [ 5:37:15<18:56:17] +[titan] 2025-10-05 04:11:35,333 - root - INFO - step: 9160 loss: 2.4173 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1345 +[titan] 2025-10-05 04:11:35,333 - root - INFO - lr: 4.4781e-05 gnorm: 1.15 [ 5:37:26<18:56:05] +[titan] 2025-10-05 04:11:46,194 - root - INFO - step: 9165 loss: 2.4180 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 04:11:46,195 - root - INFO - lr: 4.4775e-05 gnorm: 1.13 [ 5:37:37<18:55:53] +[titan] 2025-10-05 04:11:57,056 - root - INFO - step: 9170 loss: 2.3989 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 04:11:57,057 - root - INFO - lr: 4.4769e-05 gnorm: 1.15 [ 5:37:48<18:55:42] +[titan] 2025-10-05 04:12:07,928 - root - INFO - step: 9175 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 04:12:07,928 - root - INFO - lr: 4.4764e-05 gnorm: 1.11 [ 5:37:58<18:55:30] +[titan] 2025-10-05 04:12:18,847 - root - INFO - step: 9180 loss: 2.5568 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2549 +[titan] 2025-10-05 04:12:18,847 - root - INFO - lr: 4.4758e-05 gnorm: 1.20 [ 5:38:09<18:55:18] +[titan] 2025-10-05 04:12:29,719 - root - INFO - step: 9185 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1545 +[titan] 2025-10-05 04:12:29,719 - root - INFO - lr: 4.4752e-05 gnorm: 1.16 [ 5:38:20<18:55:07] +[titan] 2025-10-05 04:12:40,611 - root - INFO - step: 9190 loss: 2.3798 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.1027 +[titan] 2025-10-05 04:12:40,611 - root - INFO - lr: 4.4747e-05 gnorm: 1.15 [ 5:38:31<18:54:55] +[titan] 2025-10-05 04:12:51,477 - root - INFO - step: 9195 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1621 +[titan] 2025-10-05 04:12:51,477 - root - INFO - lr: 4.4741e-05 gnorm: 1.15 [ 5:38:42<18:54:44] +[titan] 2025-10-05 04:13:00,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:13:02,353 - root - INFO - step: 9200 loss: 2.4374 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1525 +[titan] 2025-10-05 04:13:02,353 - root - INFO - lr: 4.4735e-05 gnorm: 1.20 [ 5:38:53<18:54:32] +[titan] 2025-10-05 04:13:13,230 - root - INFO - step: 9205 loss: 2.4854 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1940 +[titan] 2025-10-05 04:13:13,230 - root - INFO - lr: 4.4729e-05 gnorm: 1.22 [ 5:39:04<18:54:20] +[titan] 2025-10-05 04:13:24,132 - root - INFO - step: 9210 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:13:24,132 - root - INFO - lr: 4.4724e-05 gnorm: 1.19 [ 5:39:15<18:54:09] +[titan] 2025-10-05 04:13:35,087 - root - INFO - step: 9215 loss: 2.4851 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1930 +[titan] 2025-10-05 04:13:35,087 - root - INFO - lr: 4.4718e-05 gnorm: 1.15 [ 5:39:26<18:53:57] +[titan] 2025-10-05 04:13:37,451 - root - INFO - Dumping profiler traces at step 9216 +[titan] 2025-10-05 04:13:37,489 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:13:46,201 - root - INFO - step: 9220 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 29,486 tflops: 409.07 mfu: 41.36% global_avg_ntp_loss: 0.2928 global_avg_mtp_loss: 2.2073 +[titan] 2025-10-05 04:13:46,201 - root - INFO - lr: 4.4712e-05 gnorm: 1.17 [ 5:39:37<18:53:47] +[titan] 2025-10-05 04:13:57,080 - root - INFO - step: 9225 loss: 2.3856 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1071 +[titan] 2025-10-05 04:13:57,080 - root - INFO - lr: 4.4706e-05 gnorm: 1.15 [ 5:39:48<18:53:35] +[titan] 2025-10-05 04:14:07,953 - root - INFO - step: 9230 loss: 2.4302 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1452 +[titan] 2025-10-05 04:14:07,953 - root - INFO - lr: 4.4701e-05 gnorm: 1.15 [ 5:39:58<18:53:23] +[titan] 2025-10-05 04:14:18,819 - root - INFO - step: 9235 loss: 2.4502 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1635 +[titan] 2025-10-05 04:14:18,819 - root - INFO - lr: 4.4695e-05 gnorm: 1.22 [ 5:40:09<18:53:12] +[titan] 2025-10-05 04:14:29,678 - root - INFO - step: 9240 loss: 2.4452 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1584 +[titan] 2025-10-05 04:14:29,678 - root - INFO - lr: 4.4689e-05 gnorm: 1.17 [ 5:40:20<18:53:00] +[titan] 2025-10-05 04:14:40,618 - root - INFO - step: 9245 loss: 2.4345 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1494 +[titan] 2025-10-05 04:14:40,618 - root - INFO - lr: 4.4683e-05 gnorm: 1.14 [ 5:40:31<18:52:48] +[titan] 2025-10-05 04:14:49,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:14:51,484 - root - INFO - step: 9250 loss: 2.5104 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 04:14:51,484 - root - INFO - lr: 4.4678e-05 gnorm: 1.18 [ 5:40:42<18:52:37] +[titan] 2025-10-05 04:15:02,363 - root - INFO - step: 9255 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1304 +[titan] 2025-10-05 04:15:02,363 - root - INFO - lr: 4.4672e-05 gnorm: 1.16 [ 5:40:53<18:52:25] +[titan] 2025-10-05 04:15:13,235 - root - INFO - step: 9260 loss: 2.4511 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:15:13,236 - root - INFO - lr: 4.4666e-05 gnorm: 1.17 [ 5:41:04<18:52:14] +[titan] 2025-10-05 04:15:24,134 - root - INFO - step: 9265 loss: 2.5208 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2247 +[titan] 2025-10-05 04:15:24,134 - root - INFO - lr: 4.4660e-05 gnorm: 1.12 [ 5:41:15<18:52:02] +[titan] 2025-10-05 04:15:35,014 - root - INFO - step: 9270 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1064 +[titan] 2025-10-05 04:15:35,014 - root - INFO - lr: 4.4655e-05 gnorm: 1.14 [ 5:41:25<18:51:50] +[titan] 2025-10-05 04:15:45,940 - root - INFO - step: 9275 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2155 +[titan] 2025-10-05 04:15:45,940 - root - INFO - lr: 4.4649e-05 gnorm: 3.57 [ 5:41:36<18:51:39] +[titan] 2025-10-05 04:15:56,816 - root - INFO - step: 9280 loss: 2.4602 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1702 +[titan] 2025-10-05 04:15:56,816 - root - INFO - lr: 4.4643e-05 gnorm: 1.17 [ 5:41:47<18:51:27] +[titan] 2025-10-05 04:16:07,687 - root - INFO - step: 9285 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1597 +[titan] 2025-10-05 04:16:07,687 - root - INFO - lr: 4.4637e-05 gnorm: 1.16 [ 5:41:58<18:51:16] +[titan] 2025-10-05 04:16:18,550 - root - INFO - step: 9290 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1896 +[titan] 2025-10-05 04:16:18,550 - root - INFO - lr: 4.4631e-05 gnorm: 1.19 [ 5:42:09<18:51:04] +[titan] 2025-10-05 04:16:29,436 - root - INFO - step: 9295 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1040 +[titan] 2025-10-05 04:16:29,436 - root - INFO - lr: 4.4626e-05 gnorm: 1.23 [ 5:42:20<18:50:52] +[titan] 2025-10-05 04:16:38,131 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:16:40,321 - root - INFO - step: 9300 loss: 2.4653 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:16:40,321 - root - INFO - lr: 4.4620e-05 gnorm: 1.12 [ 5:42:31<18:50:41] +[titan] 2025-10-05 04:16:51,231 - root - INFO - step: 9305 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:16:51,232 - root - INFO - lr: 4.4614e-05 gnorm: 1.15 [ 5:42:42<18:50:29] +[titan] 2025-10-05 04:17:02,103 - root - INFO - step: 9310 loss: 2.4882 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1966 +[titan] 2025-10-05 04:17:02,103 - root - INFO - lr: 4.4608e-05 gnorm: 1.14 [ 5:42:53<18:50:18] +[titan] 2025-10-05 04:17:13,000 - root - INFO - step: 9315 loss: 2.4906 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1975 +[titan] 2025-10-05 04:17:13,000 - root - INFO - lr: 4.4602e-05 gnorm: 1.19 [ 5:43:03<18:50:06] +[titan] 2025-10-05 04:17:23,889 - root - INFO - step: 9320 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:17:23,890 - root - INFO - lr: 4.4597e-05 gnorm: 1.23 [ 5:43:14<18:49:55] +[titan] 2025-10-05 04:17:34,759 - root - INFO - step: 9325 loss: 2.4923 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2926 global_avg_mtp_loss: 2.1997 +[titan] 2025-10-05 04:17:34,759 - root - INFO - lr: 4.4591e-05 gnorm: 1.20 [ 5:43:25<18:49:43] +[titan] 2025-10-05 04:17:45,670 - root - INFO - step: 9330 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1813 +[titan] 2025-10-05 04:17:45,670 - root - INFO - lr: 4.4585e-05 gnorm: 1.15 [ 5:43:36<18:49:31] +[titan] 2025-10-05 04:17:56,531 - root - INFO - step: 9335 loss: 2.5353 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:17:56,531 - root - INFO - lr: 4.4579e-05 gnorm: 1.15 [ 5:43:47<18:49:20] +[titan] 2025-10-05 04:18:07,423 - root - INFO - step: 9340 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 04:18:07,423 - root - INFO - lr: 4.4573e-05 gnorm: 1.22 [ 5:43:58<18:49:08] +[titan] 2025-10-05 04:18:18,296 - root - INFO - step: 9345 loss: 2.4834 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1917 +[titan] 2025-10-05 04:18:18,296 - root - INFO - lr: 4.4568e-05 gnorm: 1.16 [ 5:44:09<18:48:57] +[titan] 2025-10-05 04:18:27,002 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:18:29,196 - root - INFO - step: 9350 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:18:29,196 - root - INFO - lr: 4.4562e-05 gnorm: 1.12 [ 5:44:20<18:48:45] +[titan] 2025-10-05 04:18:40,056 - root - INFO - step: 9355 loss: 2.4321 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1466 +[titan] 2025-10-05 04:18:40,056 - root - INFO - lr: 4.4556e-05 gnorm: 1.12 [ 5:44:31<18:48:33] +[titan] 2025-10-05 04:18:50,968 - root - INFO - step: 9360 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2060 +[titan] 2025-10-05 04:18:50,968 - root - INFO - lr: 4.4550e-05 gnorm: 1.14 [ 5:44:41<18:48:22] +[titan] 2025-10-05 04:19:01,819 - root - INFO - step: 9365 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1401 +[titan] 2025-10-05 04:19:01,819 - root - INFO - lr: 4.4544e-05 gnorm: 1.14 [ 5:44:52<18:48:10] +[titan] 2025-10-05 04:19:12,717 - root - INFO - step: 9370 loss: 2.5021 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 04:19:12,718 - root - INFO - lr: 4.4538e-05 gnorm: 1.13 [ 5:45:03<18:47:59] +[titan] 2025-10-05 04:19:23,592 - root - INFO - step: 9375 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 04:19:23,592 - root - INFO - lr: 4.4533e-05 gnorm: 1.15 [ 5:45:14<18:47:47] +[titan] 2025-10-05 04:19:34,464 - root - INFO - step: 9380 loss: 2.4564 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1688 +[titan] 2025-10-05 04:19:34,465 - root - INFO - lr: 4.4527e-05 gnorm: 1.21 [ 5:45:25<18:47:35] +[titan] 2025-10-05 04:19:45,394 - root - INFO - step: 9385 loss: 2.4197 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1348 +[titan] 2025-10-05 04:19:45,394 - root - INFO - lr: 4.4521e-05 gnorm: 1.16 [ 5:45:36<18:47:24] +[titan] 2025-10-05 04:19:56,282 - root - INFO - step: 9390 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:19:56,282 - root - INFO - lr: 4.4515e-05 gnorm: 1.15 [ 5:45:47<18:47:12] +[titan] 2025-10-05 04:20:07,169 - root - INFO - step: 9395 loss: 2.4327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1475 +[titan] 2025-10-05 04:20:07,169 - root - INFO - lr: 4.4509e-05 gnorm: 1.21 [ 5:45:58<18:47:01] +[titan] 2025-10-05 04:20:15,874 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:20:18,060 - root - INFO - step: 9400 loss: 2.5009 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2079 +[titan] 2025-10-05 04:20:18,061 - root - INFO - lr: 4.4503e-05 gnorm: 1.18 [ 5:46:09<18:46:49] +[titan] 2025-10-05 04:20:28,966 - root - INFO - step: 9405 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1469 +[titan] 2025-10-05 04:20:28,966 - root - INFO - lr: 4.4498e-05 gnorm: 1.14 [ 5:46:19<18:46:38] +[titan] 2025-10-05 04:20:39,882 - root - INFO - step: 9410 loss: 2.4983 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 04:20:39,882 - root - INFO - lr: 4.4492e-05 gnorm: 1.20 [ 5:46:30<18:46:26] +[titan] 2025-10-05 04:20:50,800 - root - INFO - step: 9415 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:20:50,800 - root - INFO - lr: 4.4486e-05 gnorm: 1.13 [ 5:46:41<18:46:15] +[titan] 2025-10-05 04:21:01,668 - root - INFO - step: 9420 loss: 2.3688 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0904 +[titan] 2025-10-05 04:21:01,668 - root - INFO - lr: 4.4480e-05 gnorm: 1.16 [ 5:46:52<18:46:03] +[titan] 2025-10-05 04:21:12,542 - root - INFO - step: 9425 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 04:21:12,543 - root - INFO - lr: 4.4474e-05 gnorm: 1.16 [ 5:47:03<18:45:52] +[titan] 2025-10-05 04:21:23,412 - root - INFO - step: 9430 loss: 2.4415 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1551 +[titan] 2025-10-05 04:21:23,412 - root - INFO - lr: 4.4468e-05 gnorm: 1.20 [ 5:47:14<18:45:40] +[titan] 2025-10-05 04:21:34,322 - root - INFO - step: 9435 loss: 2.3669 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 04:21:34,322 - root - INFO - lr: 4.4462e-05 gnorm: 1.10 [ 5:47:25<18:45:28] +[titan] 2025-10-05 04:21:45,197 - root - INFO - step: 9440 loss: 2.3883 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1070 +[titan] 2025-10-05 04:21:45,197 - root - INFO - lr: 4.4457e-05 gnorm: 1.17 [ 5:47:36<18:45:17] +[titan] 2025-10-05 04:21:56,142 - root - INFO - step: 9445 loss: 2.4394 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1537 +[titan] 2025-10-05 04:21:56,142 - root - INFO - lr: 4.4451e-05 gnorm: 1.15 [ 5:47:47<18:45:05] +[titan] 2025-10-05 04:22:04,824 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:22:07,011 - root - INFO - step: 9450 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1302 +[titan] 2025-10-05 04:22:07,011 - root - INFO - lr: 4.4445e-05 gnorm: 1.11 [ 5:47:57<18:44:54] +[titan] 2025-10-05 04:22:17,891 - root - INFO - step: 9455 loss: 2.4826 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1916 +[titan] 2025-10-05 04:22:17,891 - root - INFO - lr: 4.4439e-05 gnorm: 1.14 [ 5:48:08<18:44:42] +[titan] 2025-10-05 04:22:28,768 - root - INFO - step: 9460 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 04:22:28,768 - root - INFO - lr: 4.4433e-05 gnorm: 1.12 [ 5:48:19<18:44:31] +[titan] 2025-10-05 04:22:39,662 - root - INFO - step: 9465 loss: 2.4758 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:22:39,663 - root - INFO - lr: 4.4427e-05 gnorm: 1.12 [ 5:48:30<18:44:19] +[titan] 2025-10-05 04:22:50,623 - root - INFO - step: 9470 loss: 2.4549 memory: 118.84GiB(85.28%) tps: 29,899 tflops: 414.80 mfu: 41.94% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1672 +[titan] 2025-10-05 04:22:50,623 - root - INFO - lr: 4.4421e-05 gnorm: 1.19 [ 5:48:41<18:44:08] +[titan] 2025-10-05 04:23:01,499 - root - INFO - step: 9475 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1431 +[titan] 2025-10-05 04:23:01,500 - root - INFO - lr: 4.4415e-05 gnorm: 1.12 [ 5:48:52<18:43:56] +[titan] 2025-10-05 04:23:12,360 - root - INFO - step: 9480 loss: 2.4464 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1579 +[titan] 2025-10-05 04:23:12,360 - root - INFO - lr: 4.4410e-05 gnorm: 1.18 [ 5:49:03<18:43:45] +[titan] 2025-10-05 04:23:23,239 - root - INFO - step: 9485 loss: 2.4527 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1643 +[titan] 2025-10-05 04:23:23,239 - root - INFO - lr: 4.4404e-05 gnorm: 1.19 [ 5:49:14<18:43:33] +[titan] 2025-10-05 04:23:34,114 - root - INFO - step: 9490 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:23:34,114 - root - INFO - lr: 4.4398e-05 gnorm: 1.11 [ 5:49:25<18:43:21] +[titan] 2025-10-05 04:23:44,977 - root - INFO - step: 9495 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1819 +[titan] 2025-10-05 04:23:44,977 - root - INFO - lr: 4.4392e-05 gnorm: 1.11 [ 5:49:35<18:43:10] +[titan] 2025-10-05 04:23:53,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:23:55,915 - root - INFO - step: 9500 loss: 2.4279 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1438 +[titan] 2025-10-05 04:23:55,915 - root - INFO - lr: 4.4386e-05 gnorm: 1.12 [ 5:49:46<18:42:58] +[titan] 2025-10-05 04:24:06,759 - root - INFO - step: 9505 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:24:06,759 - root - INFO - lr: 4.4380e-05 gnorm: 1.15 [ 5:49:57<18:42:47] +[titan] 2025-10-05 04:24:17,624 - root - INFO - step: 9510 loss: 2.4001 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1187 +[titan] 2025-10-05 04:24:17,624 - root - INFO - lr: 4.4374e-05 gnorm: 1.13 [ 5:50:08<18:42:35] +[titan] 2025-10-05 04:24:28,498 - root - INFO - step: 9515 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 04:24:28,498 - root - INFO - lr: 4.4368e-05 gnorm: 1.17 [ 5:50:19<18:42:23] +[titan] 2025-10-05 04:24:39,377 - root - INFO - step: 9520 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:24:39,377 - root - INFO - lr: 4.4362e-05 gnorm: 1.13 [ 5:50:30<18:42:12] +[titan] 2025-10-05 04:24:50,308 - root - INFO - step: 9525 loss: 2.3498 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0740 +[titan] 2025-10-05 04:24:50,308 - root - INFO - lr: 4.4357e-05 gnorm: 1.18 [ 5:50:41<18:42:00] +[titan] 2025-10-05 04:25:01,216 - root - INFO - step: 9530 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1821 +[titan] 2025-10-05 04:25:01,216 - root - INFO - lr: 4.4351e-05 gnorm: 1.23 [ 5:50:52<18:41:49] +[titan] 2025-10-05 04:25:12,092 - root - INFO - step: 9535 loss: 2.4240 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1392 +[titan] 2025-10-05 04:25:12,092 - root - INFO - lr: 4.4345e-05 gnorm: 1.17 [ 5:51:03<18:41:37] +[titan] 2025-10-05 04:25:22,993 - root - INFO - step: 9540 loss: 2.4342 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1477 +[titan] 2025-10-05 04:25:22,993 - root - INFO - lr: 4.4339e-05 gnorm: 1.18 [ 5:51:13<18:41:26] +[titan] 2025-10-05 04:25:33,873 - root - INFO - step: 9545 loss: 2.4536 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1647 +[titan] 2025-10-05 04:25:33,873 - root - INFO - lr: 4.4333e-05 gnorm: 1.18 [ 5:51:24<18:41:14] +[titan] 2025-10-05 04:25:42,543 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:25:44,728 - root - INFO - step: 9550 loss: 2.4518 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1638 +[titan] 2025-10-05 04:25:44,728 - root - INFO - lr: 4.4327e-05 gnorm: 1.19 [ 5:51:35<18:41:03] +[titan] 2025-10-05 04:25:55,649 - root - INFO - step: 9555 loss: 2.4091 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 04:25:55,650 - root - INFO - lr: 4.4321e-05 gnorm: 1.19 [ 5:51:46<18:40:51] +[titan] 2025-10-05 04:26:06,497 - root - INFO - step: 9560 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1277 +[titan] 2025-10-05 04:26:06,497 - root - INFO - lr: 4.4315e-05 gnorm: 1.14 [ 5:51:57<18:40:40] +[titan] 2025-10-05 04:26:17,403 - root - INFO - step: 9565 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1464 +[titan] 2025-10-05 04:26:17,403 - root - INFO - lr: 4.4309e-05 gnorm: 1.18 [ 5:52:08<18:40:28] +[titan] 2025-10-05 04:26:28,292 - root - INFO - step: 9570 loss: 2.4323 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 04:26:28,292 - root - INFO - lr: 4.4303e-05 gnorm: 1.17 [ 5:52:19<18:40:16] +[titan] 2025-10-05 04:26:39,137 - root - INFO - step: 9575 loss: 2.4565 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:26:39,137 - root - INFO - lr: 4.4297e-05 gnorm: 1.17 [ 5:52:30<18:40:05] +[titan] 2025-10-05 04:26:50,002 - root - INFO - step: 9580 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.2636 +[titan] 2025-10-05 04:26:50,002 - root - INFO - lr: 4.4291e-05 gnorm: 1.16 [ 5:52:40<18:39:53] +[titan] 2025-10-05 04:27:00,916 - root - INFO - step: 9585 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 04:27:00,916 - root - INFO - lr: 4.4285e-05 gnorm: 1.16 [ 5:52:51<18:39:42] +[titan] 2025-10-05 04:27:11,772 - root - INFO - step: 9590 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2492 +[titan] 2025-10-05 04:27:11,773 - root - INFO - lr: 4.4279e-05 gnorm: 1.14 [ 5:53:02<18:39:30] +[titan] 2025-10-05 04:27:22,632 - root - INFO - step: 9595 loss: 2.4580 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1692 +[titan] 2025-10-05 04:27:22,632 - root - INFO - lr: 4.4273e-05 gnorm: 1.16 [ 5:53:13<18:39:18] +[titan] 2025-10-05 04:27:31,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:27:33,503 - root - INFO - step: 9600 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:27:33,503 - root - INFO - lr: 4.4268e-05 gnorm: 1.14 [ 5:53:24<18:39:07] +[titan] 2025-10-05 04:27:44,378 - root - INFO - step: 9605 loss: 2.4209 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1359 +[titan] 2025-10-05 04:27:44,378 - root - INFO - lr: 4.4262e-05 gnorm: 1.14 [ 5:53:35<18:38:55] +[titan] 2025-10-05 04:27:55,281 - root - INFO - step: 9610 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1495 +[titan] 2025-10-05 04:27:55,281 - root - INFO - lr: 4.4256e-05 gnorm: 1.27 [ 5:53:46<18:38:44] +[titan] 2025-10-05 04:28:06,144 - root - INFO - step: 9615 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1414 +[titan] 2025-10-05 04:28:06,145 - root - INFO - lr: 4.4250e-05 gnorm: 1.12 [ 5:53:57<18:38:32] +[titan] 2025-10-05 04:28:17,025 - root - INFO - step: 9620 loss: 2.4380 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1520 +[titan] 2025-10-05 04:28:17,025 - root - INFO - lr: 4.4244e-05 gnorm: 1.17 [ 5:54:07<18:38:21] +[titan] 2025-10-05 04:28:27,900 - root - INFO - step: 9625 loss: 2.4092 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1278 +[titan] 2025-10-05 04:28:27,900 - root - INFO - lr: 4.4238e-05 gnorm: 1.17 [ 5:54:18<18:38:09] +[titan] 2025-10-05 04:28:38,759 - root - INFO - step: 9630 loss: 2.3955 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1144 +[titan] 2025-10-05 04:28:38,759 - root - INFO - lr: 4.4232e-05 gnorm: 1.18 [ 5:54:29<18:37:57] +[titan] 2025-10-05 04:28:49,641 - root - INFO - step: 9635 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 04:28:49,641 - root - INFO - lr: 4.4226e-05 gnorm: 1.17 [ 5:54:40<18:37:46] +[titan] 2025-10-05 04:29:00,565 - root - INFO - step: 9640 loss: 2.5391 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2382 +[titan] 2025-10-05 04:29:00,565 - root - INFO - lr: 4.4220e-05 gnorm: 1.17 [ 5:54:51<18:37:34] +[titan] 2025-10-05 04:29:11,410 - root - INFO - step: 9645 loss: 2.4192 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 04:29:11,410 - root - INFO - lr: 4.4214e-05 gnorm: 1.18 [ 5:55:02<18:37:23] +[titan] 2025-10-05 04:29:20,108 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:29:22,295 - root - INFO - step: 9650 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:29:22,295 - root - INFO - lr: 4.4208e-05 gnorm: 1.14 [ 5:55:13<18:37:11] +[titan] 2025-10-05 04:29:33,192 - root - INFO - step: 9655 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1795 +[titan] 2025-10-05 04:29:33,192 - root - INFO - lr: 4.4202e-05 gnorm: 1.18 [ 5:55:24<18:37:00] +[titan] 2025-10-05 04:29:44,075 - root - INFO - step: 9660 loss: 2.5077 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2141 +[titan] 2025-10-05 04:29:44,076 - root - INFO - lr: 4.4196e-05 gnorm: 1.19 [ 5:55:34<18:36:48] +[titan] 2025-10-05 04:29:55,012 - root - INFO - step: 9665 loss: 2.3987 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2812 global_avg_mtp_loss: 2.1174 +[titan] 2025-10-05 04:29:55,012 - root - INFO - lr: 4.4190e-05 gnorm: 1.13 [ 5:55:45<18:36:37] +[titan] 2025-10-05 04:30:05,890 - root - INFO - step: 9670 loss: 2.4206 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1355 +[titan] 2025-10-05 04:30:05,891 - root - INFO - lr: 4.4184e-05 gnorm: 1.15 [ 5:55:56<18:36:25] +[titan] 2025-10-05 04:30:16,776 - root - INFO - step: 9675 loss: 2.3409 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 04:30:16,776 - root - INFO - lr: 4.4178e-05 gnorm: 1.12 [ 5:56:07<18:36:14] +[titan] 2025-10-05 04:30:27,638 - root - INFO - step: 9680 loss: 2.4055 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1224 +[titan] 2025-10-05 04:30:27,639 - root - INFO - lr: 4.4172e-05 gnorm: 1.11 [ 5:56:18<18:36:02] +[titan] 2025-10-05 04:30:38,514 - root - INFO - step: 9685 loss: 2.4020 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1213 +[titan] 2025-10-05 04:30:38,514 - root - INFO - lr: 4.4166e-05 gnorm: 1.10 [ 5:56:29<18:35:51] +[titan] 2025-10-05 04:30:49,397 - root - INFO - step: 9690 loss: 2.3894 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 04:30:49,397 - root - INFO - lr: 4.4160e-05 gnorm: 1.14 [ 5:56:40<18:35:39] +[titan] 2025-10-05 04:31:00,376 - root - INFO - step: 9695 loss: 2.4118 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.10 mfu: 41.87% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:31:00,376 - root - INFO - lr: 4.4154e-05 gnorm: 1.13 [ 5:56:51<18:35:28] +[titan] 2025-10-05 04:31:09,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:31:11,234 - root - INFO - step: 9700 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 04:31:11,235 - root - INFO - lr: 4.4148e-05 gnorm: 1.17 [ 5:57:02<18:35:16] +[titan] 2025-10-05 04:31:22,095 - root - INFO - step: 9705 loss: 2.4525 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1644 +[titan] 2025-10-05 04:31:22,095 - root - INFO - lr: 4.4142e-05 gnorm: 1.18 [ 5:57:13<18:35:05] +[titan] 2025-10-05 04:31:32,925 - root - INFO - step: 9710 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.77 mfu: 42.44% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:31:32,925 - root - INFO - lr: 4.4136e-05 gnorm: 1.17 [ 5:57:23<18:34:53] +[titan] 2025-10-05 04:31:43,787 - root - INFO - step: 9715 loss: 2.4891 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 04:31:43,787 - root - INFO - lr: 4.4130e-05 gnorm: 1.38 [ 5:57:34<18:34:41] +[titan] 2025-10-05 04:31:54,630 - root - INFO - step: 9720 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0983 +[titan] 2025-10-05 04:31:54,630 - root - INFO - lr: 4.4124e-05 gnorm: 1.14 [ 5:57:45<18:34:30] +[titan] 2025-10-05 04:32:05,581 - root - INFO - step: 9725 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 04:32:05,582 - root - INFO - lr: 4.4118e-05 gnorm: 1.14 [ 5:57:56<18:34:18] +[titan] 2025-10-05 04:32:12,282 - root - INFO - Dumping profiler traces at step 9728 +[titan] 2025-10-05 04:32:12,320 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:32:16,691 - root - INFO - step: 9730 loss: 2.4883 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.1950 +[titan] 2025-10-05 04:32:16,691 - root - INFO - lr: 4.4112e-05 gnorm: 1.25 [ 5:58:07<18:34:07] +[titan] 2025-10-05 04:32:27,533 - root - INFO - step: 9735 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:32:27,534 - root - INFO - lr: 4.4106e-05 gnorm: 1.17 [ 5:58:18<18:33:56] +[titan] 2025-10-05 04:32:38,369 - root - INFO - step: 9740 loss: 2.4600 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1703 +[titan] 2025-10-05 04:32:38,369 - root - INFO - lr: 4.4100e-05 gnorm: 1.17 [ 5:58:29<18:33:44] +[titan] 2025-10-05 04:32:49,220 - root - INFO - step: 9745 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 04:32:49,220 - root - INFO - lr: 4.4094e-05 gnorm: 1.16 [ 5:58:40<18:33:32] +[titan] 2025-10-05 04:32:57,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:33:00,142 - root - INFO - step: 9750 loss: 2.3885 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1078 +[titan] 2025-10-05 04:33:00,143 - root - INFO - lr: 4.4088e-05 gnorm: 1.14 [ 5:58:51<18:33:21] +[titan] 2025-10-05 04:33:10,995 - root - INFO - step: 9755 loss: 2.5700 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3070 global_avg_mtp_loss: 2.2630 +[titan] 2025-10-05 04:33:10,995 - root - INFO - lr: 4.4082e-05 gnorm: 1.38 [ 5:59:01<18:33:09] +[titan] 2025-10-05 04:33:21,841 - root - INFO - step: 9760 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:33:21,841 - root - INFO - lr: 4.4076e-05 gnorm: 1.10 [ 5:59:12<18:32:58] +[titan] 2025-10-05 04:33:32,699 - root - INFO - step: 9765 loss: 2.4074 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:33:32,699 - root - INFO - lr: 4.4070e-05 gnorm: 1.12 [ 5:59:23<18:32:46] +[titan] 2025-10-05 04:33:43,562 - root - INFO - step: 9770 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 04:33:43,563 - root - INFO - lr: 4.4064e-05 gnorm: 1.20 [ 5:59:34<18:32:34] +[titan] 2025-10-05 04:33:54,429 - root - INFO - step: 9775 loss: 2.3924 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1125 +[titan] 2025-10-05 04:33:54,429 - root - INFO - lr: 4.4058e-05 gnorm: 1.13 [ 5:59:45<18:32:23] +[titan] 2025-10-05 04:34:05,364 - root - INFO - step: 9780 loss: 2.4335 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1473 +[titan] 2025-10-05 04:34:05,365 - root - INFO - lr: 4.4052e-05 gnorm: 1.19 [ 5:59:56<18:32:12] +[titan] 2025-10-05 04:34:16,251 - root - INFO - step: 9785 loss: 2.4309 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:34:16,251 - root - INFO - lr: 4.4046e-05 gnorm: 1.30 [ 6:00:07<18:32:00] +[titan] 2025-10-05 04:34:27,120 - root - INFO - step: 9790 loss: 2.4512 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2882 global_avg_mtp_loss: 2.1629 +[titan] 2025-10-05 04:34:27,120 - root - INFO - lr: 4.4039e-05 gnorm: 1.21 [ 6:00:18<18:31:48] +[titan] 2025-10-05 04:34:37,999 - root - INFO - step: 9795 loss: 2.3456 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 04:34:37,999 - root - INFO - lr: 4.4033e-05 gnorm: 1.14 [ 6:00:28<18:31:37] +[titan] 2025-10-05 04:34:46,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:34:48,883 - root - INFO - step: 9800 loss: 2.4057 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:34:48,883 - root - INFO - lr: 4.4027e-05 gnorm: 1.18 [ 6:00:39<18:31:25] +[titan] 2025-10-05 04:34:59,779 - root - INFO - step: 9805 loss: 2.5371 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2995 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:34:59,779 - root - INFO - lr: 4.4021e-05 gnorm: 1.15 [ 6:00:50<18:31:14] +[titan] 2025-10-05 04:35:10,650 - root - INFO - step: 9810 loss: 2.4142 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1309 +[titan] 2025-10-05 04:35:10,651 - root - INFO - lr: 4.4015e-05 gnorm: 1.16 [ 6:01:01<18:31:02] +[titan] 2025-10-05 04:35:21,521 - root - INFO - step: 9815 loss: 2.4068 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1250 +[titan] 2025-10-05 04:35:21,521 - root - INFO - lr: 4.4009e-05 gnorm: 1.16 [ 6:01:12<18:30:51] +[titan] 2025-10-05 04:35:32,405 - root - INFO - step: 9820 loss: 2.4191 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:35:32,405 - root - INFO - lr: 4.4003e-05 gnorm: 1.14 [ 6:01:23<18:30:39] +[titan] 2025-10-05 04:35:43,265 - root - INFO - step: 9825 loss: 2.4557 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:35:43,266 - root - INFO - lr: 4.3997e-05 gnorm: 1.11 [ 6:01:34<18:30:28] +[titan] 2025-10-05 04:35:54,144 - root - INFO - step: 9830 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 04:35:54,144 - root - INFO - lr: 4.3991e-05 gnorm: 1.10 [ 6:01:45<18:30:16] +[titan] 2025-10-05 04:36:05,038 - root - INFO - step: 9835 loss: 2.3594 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0837 +[titan] 2025-10-05 04:36:05,038 - root - INFO - lr: 4.3985e-05 gnorm: 1.17 [ 6:01:55<18:30:05] +[titan] 2025-10-05 04:36:15,903 - root - INFO - step: 9840 loss: 2.3943 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1136 +[titan] 2025-10-05 04:36:15,903 - root - INFO - lr: 4.3979e-05 gnorm: 1.13 [ 6:02:06<18:29:53] +[titan] 2025-10-05 04:36:26,766 - root - INFO - step: 9845 loss: 2.3607 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0841 +[titan] 2025-10-05 04:36:26,766 - root - INFO - lr: 4.3973e-05 gnorm: 1.11 [ 6:02:17<18:29:41] +[titan] 2025-10-05 04:36:35,444 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:36:37,631 - root - INFO - step: 9850 loss: 2.4018 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1206 +[titan] 2025-10-05 04:36:37,631 - root - INFO - lr: 4.3967e-05 gnorm: 1.18 [ 6:02:28<18:29:30] +[titan] 2025-10-05 04:36:48,494 - root - INFO - step: 9855 loss: 2.3920 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 04:36:48,494 - root - INFO - lr: 4.3961e-05 gnorm: 1.14 [ 6:02:39<18:29:18] +[titan] 2025-10-05 04:36:59,366 - root - INFO - step: 9860 loss: 2.3928 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1132 +[titan] 2025-10-05 04:36:59,366 - root - INFO - lr: 4.3955e-05 gnorm: 1.16 [ 6:02:50<18:29:07] +[titan] 2025-10-05 04:37:10,292 - root - INFO - step: 9865 loss: 2.3430 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0689 +[titan] 2025-10-05 04:37:10,292 - root - INFO - lr: 4.3948e-05 gnorm: 1.14 [ 6:03:01<18:28:55] +[titan] 2025-10-05 04:37:21,111 - root - INFO - step: 9870 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.0953 +[titan] 2025-10-05 04:37:21,111 - root - INFO - lr: 4.3942e-05 gnorm: 1.23 [ 6:03:12<18:28:44] +[titan] 2025-10-05 04:37:31,972 - root - INFO - step: 9875 loss: 2.4673 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2893 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:37:31,972 - root - INFO - lr: 4.3936e-05 gnorm: 1.12 [ 6:03:22<18:28:32] +[titan] 2025-10-05 04:37:42,800 - root - INFO - step: 9880 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 04:37:42,800 - root - INFO - lr: 4.3930e-05 gnorm: 1.18 [ 6:03:33<18:28:20] +[titan] 2025-10-05 04:37:53,645 - root - INFO - step: 9885 loss: 2.3888 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:37:53,645 - root - INFO - lr: 4.3924e-05 gnorm: 1.14 [ 6:03:44<18:28:09] +[titan] 2025-10-05 04:38:04,551 - root - INFO - step: 9890 loss: 2.3882 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 04:38:04,552 - root - INFO - lr: 4.3918e-05 gnorm: 1.12 [ 6:03:55<18:27:57] +[titan] 2025-10-05 04:38:15,412 - root - INFO - step: 9895 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1157 +[titan] 2025-10-05 04:38:15,412 - root - INFO - lr: 4.3912e-05 gnorm: 1.14 [ 6:04:06<18:27:46] +[titan] 2025-10-05 04:38:24,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:38:26,242 - root - INFO - step: 9900 loss: 2.3816 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1025 +[titan] 2025-10-05 04:38:26,242 - root - INFO - lr: 4.3906e-05 gnorm: 1.14 [ 6:04:17<18:27:34] +[titan] 2025-10-05 04:38:37,109 - root - INFO - step: 9905 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 04:38:37,109 - root - INFO - lr: 4.3900e-05 gnorm: 1.17 [ 6:04:28<18:27:22] +[titan] 2025-10-05 04:38:47,968 - root - INFO - step: 9910 loss: 2.4451 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 04:38:47,968 - root - INFO - lr: 4.3894e-05 gnorm: 1.17 [ 6:04:38<18:27:11] +[titan] 2025-10-05 04:38:58,828 - root - INFO - step: 9915 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0776 +[titan] 2025-10-05 04:38:58,828 - root - INFO - lr: 4.3887e-05 gnorm: 1.15 [ 6:04:49<18:26:59] +[titan] 2025-10-05 04:39:09,703 - root - INFO - step: 9920 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 04:39:09,703 - root - INFO - lr: 4.3881e-05 gnorm: 1.13 [ 6:05:00<18:26:48] +[titan] 2025-10-05 04:39:20,593 - root - INFO - step: 9925 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 04:39:20,593 - root - INFO - lr: 4.3875e-05 gnorm: 1.14 [ 6:05:11<18:26:36] +[titan] 2025-10-05 04:39:31,464 - root - INFO - step: 9930 loss: 2.2894 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 04:39:31,464 - root - INFO - lr: 4.3869e-05 gnorm: 1.11 [ 6:05:22<18:26:25] +[titan] 2025-10-05 04:39:42,337 - root - INFO - step: 9935 loss: 2.3475 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 04:39:42,338 - root - INFO - lr: 4.3863e-05 gnorm: 1.10 [ 6:05:33<18:26:13] +[titan] 2025-10-05 04:39:53,224 - root - INFO - step: 9940 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0849 +[titan] 2025-10-05 04:39:53,224 - root - INFO - lr: 4.3857e-05 gnorm: 1.13 [ 6:05:44<18:26:02] +[titan] 2025-10-05 04:40:04,154 - root - INFO - step: 9945 loss: 2.3821 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1033 +[titan] 2025-10-05 04:40:04,154 - root - INFO - lr: 4.3851e-05 gnorm: 1.14 [ 6:05:55<18:25:50] +[titan] 2025-10-05 04:40:12,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:40:15,025 - root - INFO - step: 9950 loss: 2.4179 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:40:15,026 - root - INFO - lr: 4.3845e-05 gnorm: 1.11 [ 6:06:05<18:25:39] +[titan] 2025-10-05 04:40:25,938 - root - INFO - step: 9955 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1159 +[titan] 2025-10-05 04:40:25,938 - root - INFO - lr: 4.3838e-05 gnorm: 1.14 [ 6:06:16<18:25:27] +[titan] 2025-10-05 04:40:36,795 - root - INFO - step: 9960 loss: 2.3949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 04:40:36,795 - root - INFO - lr: 4.3832e-05 gnorm: 1.17 [ 6:06:27<18:25:16] +[titan] 2025-10-05 04:40:47,648 - root - INFO - step: 9965 loss: 2.4110 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:40:47,648 - root - INFO - lr: 4.3826e-05 gnorm: 1.15 [ 6:06:38<18:25:04] +[titan] 2025-10-05 04:40:58,540 - root - INFO - step: 9970 loss: 2.3944 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1127 +[titan] 2025-10-05 04:40:58,540 - root - INFO - lr: 4.3820e-05 gnorm: 1.18 [ 6:06:49<18:24:53] +[titan] 2025-10-05 04:41:09,431 - root - INFO - step: 9975 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 04:41:09,432 - root - INFO - lr: 4.3814e-05 gnorm: 1.19 [ 6:07:00<18:24:41] +[titan] 2025-10-05 04:41:20,364 - root - INFO - step: 9980 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 04:41:20,364 - root - INFO - lr: 4.3808e-05 gnorm: 1.18 [ 6:07:11<18:24:30] +[titan] 2025-10-05 04:41:31,259 - root - INFO - step: 9985 loss: 2.4484 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:41:31,260 - root - INFO - lr: 4.3802e-05 gnorm: 1.15 [ 6:07:22<18:24:18] +[titan] 2025-10-05 04:41:42,148 - root - INFO - step: 9990 loss: 2.4717 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:41:42,148 - root - INFO - lr: 4.3795e-05 gnorm: 1.17 [ 6:07:33<18:24:07] +[titan] 2025-10-05 04:41:53,059 - root - INFO - step: 9995 loss: 2.3948 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1147 +[titan] 2025-10-05 04:41:53,059 - root - INFO - lr: 4.3789e-05 gnorm: 1.17 [ 6:07:43<18:23:55] +[titan] 2025-10-05 04:42:01,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:42:03,951 - root - INFO - step: 10000 loss: 2.4699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1790 +[titan] 2025-10-05 04:42:03,951 - root - INFO - lr: 4.3783e-05 gnorm: 1.18 [ 6:07:54<18:23:44] +[titan] 2025-10-05 04:42:03,951 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 04:42:23,085 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 04:42:23,085 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.13 seconds. +[titan] 2025-10-05 04:44:28,943 - root - INFO - step: 10005 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 2,260 tflops: 31.35 mfu: 3.17% global_avg_ntp_loss: 0.2840 global_avg_mtp_loss: 2.1282 +[titan] 2025-10-05 04:44:28,943 - root - INFO - lr: 4.3777e-05 gnorm: 1.15 [ 6:10:19<18:30:15] +[titan] 2025-10-05 04:44:39,758 - root - INFO - step: 10010 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0843 +[titan] 2025-10-05 04:44:39,759 - root - INFO - lr: 4.3771e-05 gnorm: 1.10 [ 6:10:30<18:30:03] +[titan] 2025-10-05 04:44:50,583 - root - INFO - step: 10015 loss: 2.4606 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1726 +[titan] 2025-10-05 04:44:50,583 - root - INFO - lr: 4.3765e-05 gnorm: 1.17 [ 6:10:41<18:29:51] +[titan] 2025-10-05 04:45:01,371 - root - INFO - step: 10020 loss: 2.3595 memory: 118.84GiB(85.28%) tps: 30,376 tflops: 421.43 mfu: 42.61% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0816 +[titan] 2025-10-05 04:45:01,371 - root - INFO - lr: 4.3758e-05 gnorm: 1.12 [ 6:10:52<18:29:39] +[titan] 2025-10-05 04:45:12,207 - root - INFO - step: 10025 loss: 2.3890 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1095 +[titan] 2025-10-05 04:45:12,207 - root - INFO - lr: 4.3752e-05 gnorm: 1.13 [ 6:11:03<18:29:27] +[titan] 2025-10-05 04:45:23,056 - root - INFO - step: 10030 loss: 2.4171 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:45:23,056 - root - INFO - lr: 4.3746e-05 gnorm: 1.14 [ 6:11:13<18:29:15] +[titan] 2025-10-05 04:45:33,878 - root - INFO - step: 10035 loss: 2.4258 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1417 +[titan] 2025-10-05 04:45:33,879 - root - INFO - lr: 4.3740e-05 gnorm: 1.18 [ 6:11:24<18:29:03] +[titan] 2025-10-05 04:45:44,722 - root - INFO - step: 10040 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:45:44,722 - root - INFO - lr: 4.3734e-05 gnorm: 1.13 [ 6:11:35<18:28:51] +[titan] 2025-10-05 04:45:55,531 - root - INFO - step: 10045 loss: 2.3962 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:45:55,531 - root - INFO - lr: 4.3728e-05 gnorm: 1.14 [ 6:11:46<18:28:39] +[titan] 2025-10-05 04:46:04,196 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:46:06,376 - root - INFO - step: 10050 loss: 2.4217 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1372 +[titan] 2025-10-05 04:46:06,376 - root - INFO - lr: 4.3721e-05 gnorm: 1.19 [ 6:11:57<18:28:27] +[titan] 2025-10-05 04:46:17,244 - root - INFO - step: 10055 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1117 +[titan] 2025-10-05 04:46:17,245 - root - INFO - lr: 4.3715e-05 gnorm: 1.09 [ 6:12:08<18:28:15] +[titan] 2025-10-05 04:46:28,093 - root - INFO - step: 10060 loss: 2.4776 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 04:46:28,093 - root - INFO - lr: 4.3709e-05 gnorm: 1.12 [ 6:12:18<18:28:03] +[titan] 2025-10-05 04:46:38,949 - root - INFO - step: 10065 loss: 2.3571 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0814 +[titan] 2025-10-05 04:46:38,949 - root - INFO - lr: 4.3703e-05 gnorm: 1.17 [ 6:12:29<18:27:52] +[titan] 2025-10-05 04:46:49,820 - root - INFO - step: 10070 loss: 2.4101 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:46:49,820 - root - INFO - lr: 4.3697e-05 gnorm: 1.14 [ 6:12:40<18:27:40] +[titan] 2025-10-05 04:47:00,671 - root - INFO - step: 10075 loss: 2.4112 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:47:00,671 - root - INFO - lr: 4.3690e-05 gnorm: 1.17 [ 6:12:51<18:27:28] +[titan] 2025-10-05 04:47:11,530 - root - INFO - step: 10080 loss: 2.3867 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1065 +[titan] 2025-10-05 04:47:11,530 - root - INFO - lr: 4.3684e-05 gnorm: 1.12 [ 6:13:02<18:27:16] +[titan] 2025-10-05 04:47:22,402 - root - INFO - step: 10085 loss: 2.3591 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 04:47:22,403 - root - INFO - lr: 4.3678e-05 gnorm: 1.14 [ 6:13:13<18:27:04] +[titan] 2025-10-05 04:47:33,304 - root - INFO - step: 10090 loss: 2.3953 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1153 +[titan] 2025-10-05 04:47:33,305 - root - INFO - lr: 4.3672e-05 gnorm: 1.12 [ 6:13:24<18:26:53] +[titan] 2025-10-05 04:47:44,169 - root - INFO - step: 10095 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2874 global_avg_mtp_loss: 2.1668 +[titan] 2025-10-05 04:47:44,169 - root - INFO - lr: 4.3666e-05 gnorm: 1.20 [ 6:13:35<18:26:41] +[titan] 2025-10-05 04:47:52,901 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:47:55,091 - root - INFO - step: 10100 loss: 2.4560 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1684 +[titan] 2025-10-05 04:47:55,091 - root - INFO - lr: 4.3659e-05 gnorm: 1.18 [ 6:13:45<18:26:29] +[titan] 2025-10-05 04:48:05,969 - root - INFO - step: 10105 loss: 2.4312 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:48:05,969 - root - INFO - lr: 4.3653e-05 gnorm: 1.10 [ 6:13:56<18:26:17] +[titan] 2025-10-05 04:48:16,842 - root - INFO - step: 10110 loss: 2.3985 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1175 +[titan] 2025-10-05 04:48:16,842 - root - INFO - lr: 4.3647e-05 gnorm: 1.15 [ 6:14:07<18:26:06] +[titan] 2025-10-05 04:48:27,739 - root - INFO - step: 10115 loss: 2.4183 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1354 +[titan] 2025-10-05 04:48:27,739 - root - INFO - lr: 4.3641e-05 gnorm: 1.11 [ 6:14:18<18:25:54] +[titan] 2025-10-05 04:48:38,638 - root - INFO - step: 10120 loss: 2.3862 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 04:48:38,639 - root - INFO - lr: 4.3635e-05 gnorm: 1.15 [ 6:14:29<18:25:42] +[titan] 2025-10-05 04:48:49,495 - root - INFO - step: 10125 loss: 2.4046 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1229 +[titan] 2025-10-05 04:48:49,495 - root - INFO - lr: 4.3628e-05 gnorm: 1.13 [ 6:14:40<18:25:30] +[titan] 2025-10-05 04:49:00,374 - root - INFO - step: 10130 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 04:49:00,374 - root - INFO - lr: 4.3622e-05 gnorm: 1.12 [ 6:14:51<18:25:19] +[titan] 2025-10-05 04:49:11,231 - root - INFO - step: 10135 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1240 +[titan] 2025-10-05 04:49:11,231 - root - INFO - lr: 4.3616e-05 gnorm: 1.11 [ 6:15:02<18:25:07] +[titan] 2025-10-05 04:49:22,073 - root - INFO - step: 10140 loss: 2.4295 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1448 +[titan] 2025-10-05 04:49:22,073 - root - INFO - lr: 4.3610e-05 gnorm: 1.19 [ 6:15:12<18:24:55] +[titan] 2025-10-05 04:49:32,953 - root - INFO - step: 10145 loss: 2.4182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:49:32,953 - root - INFO - lr: 4.3603e-05 gnorm: 1.13 [ 6:15:23<18:24:43] +[titan] 2025-10-05 04:49:41,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:49:43,809 - root - INFO - step: 10150 loss: 2.4033 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1220 +[titan] 2025-10-05 04:49:43,810 - root - INFO - lr: 4.3597e-05 gnorm: 1.18 [ 6:15:34<18:24:31] +[titan] 2025-10-05 04:49:54,722 - root - INFO - step: 10155 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1491 +[titan] 2025-10-05 04:49:54,722 - root - INFO - lr: 4.3591e-05 gnorm: 1.19 [ 6:15:45<18:24:20] +[titan] 2025-10-05 04:50:05,570 - root - INFO - step: 10160 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2925 global_avg_mtp_loss: 2.2000 +[titan] 2025-10-05 04:50:05,570 - root - INFO - lr: 4.3585e-05 gnorm: 1.18 [ 6:15:56<18:24:08] +[titan] 2025-10-05 04:50:16,417 - root - INFO - step: 10165 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 04:50:16,418 - root - INFO - lr: 4.3578e-05 gnorm: 1.15 [ 6:16:07<18:23:56] +[titan] 2025-10-05 04:50:27,286 - root - INFO - step: 10170 loss: 2.4892 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:50:27,286 - root - INFO - lr: 4.3572e-05 gnorm: 1.17 [ 6:16:18<18:23:44] +[titan] 2025-10-05 04:50:38,151 - root - INFO - step: 10175 loss: 2.4728 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1825 +[titan] 2025-10-05 04:50:38,151 - root - INFO - lr: 4.3566e-05 gnorm: 1.14 [ 6:16:29<18:23:33] +[titan] 2025-10-05 04:50:49,013 - root - INFO - step: 10180 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1066 +[titan] 2025-10-05 04:50:49,013 - root - INFO - lr: 4.3560e-05 gnorm: 1.11 [ 6:16:39<18:23:21] +[titan] 2025-10-05 04:50:59,879 - root - INFO - step: 10185 loss: 2.3308 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0576 +[titan] 2025-10-05 04:50:59,879 - root - INFO - lr: 4.3553e-05 gnorm: 1.10 [ 6:16:50<18:23:09] +[titan] 2025-10-05 04:51:10,735 - root - INFO - step: 10190 loss: 2.4005 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1191 +[titan] 2025-10-05 04:51:10,735 - root - INFO - lr: 4.3547e-05 gnorm: 1.12 [ 6:17:01<18:22:57] +[titan] 2025-10-05 04:51:21,605 - root - INFO - step: 10195 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:51:21,605 - root - INFO - lr: 4.3541e-05 gnorm: 1.07 [ 6:17:12<18:22:45] +[titan] 2025-10-05 04:51:30,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:51:32,491 - root - INFO - step: 10200 loss: 2.4592 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1708 +[titan] 2025-10-05 04:51:32,491 - root - INFO - lr: 4.3535e-05 gnorm: 1.19 [ 6:17:23<18:22:34] +[titan] 2025-10-05 04:51:43,357 - root - INFO - step: 10205 loss: 2.3585 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0823 +[titan] 2025-10-05 04:51:43,357 - root - INFO - lr: 4.3528e-05 gnorm: 1.08 [ 6:17:34<18:22:22] +[titan] 2025-10-05 04:51:54,234 - root - INFO - step: 10210 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 04:51:54,234 - root - INFO - lr: 4.3522e-05 gnorm: 1.13 [ 6:17:45<18:22:10] +[titan] 2025-10-05 04:52:05,148 - root - INFO - step: 10215 loss: 2.4224 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1397 +[titan] 2025-10-05 04:52:05,148 - root - INFO - lr: 4.3516e-05 gnorm: 1.15 [ 6:17:56<18:21:58] +[titan] 2025-10-05 04:52:16,011 - root - INFO - step: 10220 loss: 2.3880 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:52:16,012 - root - INFO - lr: 4.3510e-05 gnorm: 1.17 [ 6:18:06<18:21:47] +[titan] 2025-10-05 04:52:26,919 - root - INFO - step: 10225 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0969 +[titan] 2025-10-05 04:52:26,919 - root - INFO - lr: 4.3503e-05 gnorm: 1.13 [ 6:18:17<18:21:35] +[titan] 2025-10-05 04:52:37,795 - root - INFO - step: 10230 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2964 global_avg_mtp_loss: 2.1864 +[titan] 2025-10-05 04:52:37,795 - root - INFO - lr: 4.3497e-05 gnorm: 1.23 [ 6:18:28<18:21:23] +[titan] 2025-10-05 04:52:48,650 - root - INFO - step: 10235 loss: 2.3739 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0960 +[titan] 2025-10-05 04:52:48,651 - root - INFO - lr: 4.3491e-05 gnorm: 1.14 [ 6:18:39<18:21:11] +[titan] 2025-10-05 04:52:59,594 - root - INFO - step: 10240 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0761 +[titan] 2025-10-05 04:52:59,594 - root - INFO - lr: 4.3485e-05 gnorm: 1.17 [ 6:18:50<18:21:00] +[titan] 2025-10-05 04:52:59,770 - root - INFO - Dumping profiler traces at step 10240 +[titan] 2025-10-05 04:52:59,806 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:53:10,672 - root - INFO - step: 10245 loss: 2.4638 memory: 118.84GiB(85.28%) tps: 29,580 tflops: 410.38 mfu: 41.49% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1752 +[titan] 2025-10-05 04:53:10,672 - root - INFO - lr: 4.3478e-05 gnorm: 1.18 [ 6:19:01<18:20:49] +[titan] 2025-10-05 04:53:19,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:53:21,556 - root - INFO - step: 10250 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0638 +[titan] 2025-10-05 04:53:21,556 - root - INFO - lr: 4.3472e-05 gnorm: 1.18 [ 6:19:12<18:20:37] +[titan] 2025-10-05 04:53:32,460 - root - INFO - step: 10255 loss: 2.3782 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.0997 +[titan] 2025-10-05 04:53:32,460 - root - INFO - lr: 4.3466e-05 gnorm: 1.11 [ 6:19:23<18:20:25] +[titan] 2025-10-05 04:53:43,321 - root - INFO - step: 10260 loss: 2.3383 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 04:53:43,321 - root - INFO - lr: 4.3459e-05 gnorm: 1.16 [ 6:19:34<18:20:14] +[titan] 2025-10-05 04:53:54,178 - root - INFO - step: 10265 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 04:53:54,178 - root - INFO - lr: 4.3453e-05 gnorm: 1.16 [ 6:19:45<18:20:02] +[titan] 2025-10-05 04:54:05,007 - root - INFO - step: 10270 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 04:54:05,007 - root - INFO - lr: 4.3447e-05 gnorm: 1.17 [ 6:19:55<18:19:50] +[titan] 2025-10-05 04:54:15,842 - root - INFO - step: 10275 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:54:15,843 - root - INFO - lr: 4.3440e-05 gnorm: 1.14 [ 6:20:06<18:19:38] +[titan] 2025-10-05 04:54:26,778 - root - INFO - step: 10280 loss: 2.3590 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0825 +[titan] 2025-10-05 04:54:26,778 - root - INFO - lr: 4.3434e-05 gnorm: 1.09 [ 6:20:17<18:19:26] +[titan] 2025-10-05 04:54:37,611 - root - INFO - step: 10285 loss: 2.3467 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 04:54:37,611 - root - INFO - lr: 4.3428e-05 gnorm: 1.17 [ 6:20:28<18:19:15] +[titan] 2025-10-05 04:54:48,457 - root - INFO - step: 10290 loss: 2.3098 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 04:54:48,457 - root - INFO - lr: 4.3422e-05 gnorm: 1.13 [ 6:20:39<18:19:03] +[titan] 2025-10-05 04:54:59,307 - root - INFO - step: 10295 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 04:54:59,307 - root - INFO - lr: 4.3415e-05 gnorm: 1.19 [ 6:20:50<18:18:51] +[titan] 2025-10-05 04:55:07,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:55:10,154 - root - INFO - step: 10300 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:55:10,154 - root - INFO - lr: 4.3409e-05 gnorm: 1.11 [ 6:21:01<18:18:39] +[titan] 2025-10-05 04:55:20,995 - root - INFO - step: 10305 loss: 2.4115 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1272 +[titan] 2025-10-05 04:55:20,995 - root - INFO - lr: 4.3403e-05 gnorm: 1.16 [ 6:21:11<18:18:27] +[titan] 2025-10-05 04:55:31,895 - root - INFO - step: 10310 loss: 2.3942 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:55:31,895 - root - INFO - lr: 4.3396e-05 gnorm: 1.11 [ 6:21:22<18:18:16] +[titan] 2025-10-05 04:55:42,797 - root - INFO - step: 10315 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0890 +[titan] 2025-10-05 04:55:42,797 - root - INFO - lr: 4.3390e-05 gnorm: 1.14 [ 6:21:33<18:18:04] +[titan] 2025-10-05 04:55:53,631 - root - INFO - step: 10320 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0675 +[titan] 2025-10-05 04:55:53,631 - root - INFO - lr: 4.3384e-05 gnorm: 1.13 [ 6:21:44<18:17:52] +[titan] 2025-10-05 04:56:04,495 - root - INFO - step: 10325 loss: 2.3236 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 04:56:04,495 - root - INFO - lr: 4.3377e-05 gnorm: 1.11 [ 6:21:55<18:17:40] +[titan] 2025-10-05 04:56:15,368 - root - INFO - step: 10330 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0554 +[titan] 2025-10-05 04:56:15,368 - root - INFO - lr: 4.3371e-05 gnorm: 1.11 [ 6:22:06<18:17:29] +[titan] 2025-10-05 04:56:26,235 - root - INFO - step: 10335 loss: 2.3812 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1021 +[titan] 2025-10-05 04:56:26,235 - root - INFO - lr: 4.3365e-05 gnorm: 1.13 [ 6:22:17<18:17:17] +[titan] 2025-10-05 04:56:37,100 - root - INFO - step: 10340 loss: 2.4139 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 04:56:37,100 - root - INFO - lr: 4.3358e-05 gnorm: 1.15 [ 6:22:27<18:17:05] +[titan] 2025-10-05 04:56:48,014 - root - INFO - step: 10345 loss: 2.3627 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0861 +[titan] 2025-10-05 04:56:48,014 - root - INFO - lr: 4.3352e-05 gnorm: 1.15 [ 6:22:38<18:16:53] +[titan] 2025-10-05 04:56:56,705 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:56:58,888 - root - INFO - step: 10350 loss: 2.3704 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0924 +[titan] 2025-10-05 04:56:58,888 - root - INFO - lr: 4.3346e-05 gnorm: 1.12 [ 6:22:49<18:16:42] +[titan] 2025-10-05 04:57:09,755 - root - INFO - step: 10355 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0403 +[titan] 2025-10-05 04:57:09,755 - root - INFO - lr: 4.3339e-05 gnorm: 1.14 [ 6:23:00<18:16:30] +[titan] 2025-10-05 04:57:20,636 - root - INFO - step: 10360 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1400 +[titan] 2025-10-05 04:57:20,637 - root - INFO - lr: 4.3333e-05 gnorm: 1.16 [ 6:23:11<18:16:18] +[titan] 2025-10-05 04:57:31,520 - root - INFO - step: 10365 loss: 2.3992 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 04:57:31,521 - root - INFO - lr: 4.3327e-05 gnorm: 1.14 [ 6:23:22<18:16:07] +[titan] 2025-10-05 04:57:42,396 - root - INFO - step: 10370 loss: 2.4732 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1823 +[titan] 2025-10-05 04:57:42,396 - root - INFO - lr: 4.3320e-05 gnorm: 1.14 [ 6:23:33<18:15:55] +[titan] 2025-10-05 04:57:53,311 - root - INFO - step: 10375 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1107 +[titan] 2025-10-05 04:57:53,311 - root - INFO - lr: 4.3314e-05 gnorm: 1.17 [ 6:23:44<18:15:43] +[titan] 2025-10-05 04:58:04,191 - root - INFO - step: 10380 loss: 2.3285 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 04:58:04,191 - root - INFO - lr: 4.3308e-05 gnorm: 1.15 [ 6:23:55<18:15:31] +[titan] 2025-10-05 04:58:15,071 - root - INFO - step: 10385 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:58:15,072 - root - INFO - lr: 4.3301e-05 gnorm: 2.89 [ 6:24:05<18:15:20] +[titan] 2025-10-05 04:58:25,961 - root - INFO - step: 10390 loss: 2.4472 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 04:58:25,961 - root - INFO - lr: 4.3295e-05 gnorm: 1.19 [ 6:24:16<18:15:08] +[titan] 2025-10-05 04:58:36,832 - root - INFO - step: 10395 loss: 2.4116 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:58:36,832 - root - INFO - lr: 4.3289e-05 gnorm: 1.19 [ 6:24:27<18:14:56] +[titan] 2025-10-05 04:58:45,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:58:47,732 - root - INFO - step: 10400 loss: 2.3889 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:58:47,732 - root - INFO - lr: 4.3282e-05 gnorm: 1.15 [ 6:24:38<18:14:45] +[titan] 2025-10-05 04:58:58,620 - root - INFO - step: 10405 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1661 +[titan] 2025-10-05 04:58:58,620 - root - INFO - lr: 4.3276e-05 gnorm: 1.15 [ 6:24:49<18:14:33] +[titan] 2025-10-05 04:59:09,537 - root - INFO - step: 10410 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0667 +[titan] 2025-10-05 04:59:09,538 - root - INFO - lr: 4.3270e-05 gnorm: 1.09 [ 6:25:00<18:14:21] +[titan] 2025-10-05 04:59:20,430 - root - INFO - step: 10415 loss: 2.4412 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1531 +[titan] 2025-10-05 04:59:20,430 - root - INFO - lr: 4.3263e-05 gnorm: 1.11 [ 6:25:11<18:14:10] +[titan] 2025-10-05 04:59:31,331 - root - INFO - step: 10420 loss: 2.4559 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1675 +[titan] 2025-10-05 04:59:31,331 - root - INFO - lr: 4.3257e-05 gnorm: 1.18 [ 6:25:22<18:13:58] +[titan] 2025-10-05 04:59:42,198 - root - INFO - step: 10425 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0782 +[titan] 2025-10-05 04:59:42,198 - root - INFO - lr: 4.3250e-05 gnorm: 1.15 [ 6:25:33<18:13:46] +[titan] 2025-10-05 04:59:53,072 - root - INFO - step: 10430 loss: 2.3763 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0987 +[titan] 2025-10-05 04:59:53,072 - root - INFO - lr: 4.3244e-05 gnorm: 1.14 [ 6:25:43<18:13:35] +[titan] 2025-10-05 05:00:03,938 - root - INFO - step: 10435 loss: 2.4170 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2835 global_avg_mtp_loss: 2.1335 +[titan] 2025-10-05 05:00:03,939 - root - INFO - lr: 4.3238e-05 gnorm: 1.15 [ 6:25:54<18:13:23] +[titan] 2025-10-05 05:00:14,820 - root - INFO - step: 10440 loss: 2.4296 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 05:00:14,820 - root - INFO - lr: 4.3231e-05 gnorm: 1.12 [ 6:26:05<18:13:11] +[titan] 2025-10-05 05:00:25,686 - root - INFO - step: 10445 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0958 +[titan] 2025-10-05 05:00:25,686 - root - INFO - lr: 4.3225e-05 gnorm: 1.15 [ 6:26:16<18:12:59] +[titan] 2025-10-05 05:00:34,395 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:00:36,573 - root - INFO - step: 10450 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:00:36,574 - root - INFO - lr: 4.3219e-05 gnorm: 1.12 [ 6:26:27<18:12:48] +[titan] 2025-10-05 05:00:47,453 - root - INFO - step: 10455 loss: 2.2956 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 05:00:47,453 - root - INFO - lr: 4.3212e-05 gnorm: 1.12 [ 6:26:38<18:12:36] +[titan] 2025-10-05 05:00:58,326 - root - INFO - step: 10460 loss: 2.4231 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1381 +[titan] 2025-10-05 05:00:58,326 - root - INFO - lr: 4.3206e-05 gnorm: 1.13 [ 6:26:49<18:12:24] +[titan] 2025-10-05 05:01:09,212 - root - INFO - step: 10465 loss: 2.3984 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1166 +[titan] 2025-10-05 05:01:09,212 - root - INFO - lr: 4.3199e-05 gnorm: 1.16 [ 6:27:00<18:12:13] +[titan] 2025-10-05 05:01:20,082 - root - INFO - step: 10470 loss: 2.3857 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1068 +[titan] 2025-10-05 05:01:20,082 - root - INFO - lr: 4.3193e-05 gnorm: 1.17 [ 6:27:10<18:12:01] +[titan] 2025-10-05 05:01:31,025 - root - INFO - step: 10475 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0845 +[titan] 2025-10-05 05:01:31,025 - root - INFO - lr: 4.3187e-05 gnorm: 1.18 [ 6:27:21<18:11:49] +[titan] 2025-10-05 05:01:41,900 - root - INFO - step: 10480 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1605 +[titan] 2025-10-05 05:01:41,900 - root - INFO - lr: 4.3180e-05 gnorm: 1.14 [ 6:27:32<18:11:38] +[titan] 2025-10-05 05:01:52,794 - root - INFO - step: 10485 loss: 2.3469 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0715 +[titan] 2025-10-05 05:01:52,794 - root - INFO - lr: 4.3174e-05 gnorm: 1.11 [ 6:27:43<18:11:26] +[titan] 2025-10-05 05:02:03,640 - root - INFO - step: 10490 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:02:03,641 - root - INFO - lr: 4.3167e-05 gnorm: 1.12 [ 6:27:54<18:11:14] +[titan] 2025-10-05 05:02:14,499 - root - INFO - step: 10495 loss: 2.4247 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2861 global_avg_mtp_loss: 2.1386 +[titan] 2025-10-05 05:02:14,499 - root - INFO - lr: 4.3161e-05 gnorm: 1.11 [ 6:28:05<18:11:02] +[titan] 2025-10-05 05:02:23,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:02:25,357 - root - INFO - step: 10500 loss: 2.3813 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1019 +[titan] 2025-10-05 05:02:25,357 - root - INFO - lr: 4.3155e-05 gnorm: 1.11 [ 6:28:16<18:10:51] +[titan] 2025-10-05 05:02:36,309 - root - INFO - step: 10505 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1463 +[titan] 2025-10-05 05:02:36,309 - root - INFO - lr: 4.3148e-05 gnorm: 1.31 [ 6:28:27<18:10:39] +[titan] 2025-10-05 05:02:47,169 - root - INFO - step: 10510 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0752 +[titan] 2025-10-05 05:02:47,169 - root - INFO - lr: 4.3142e-05 gnorm: 1.12 [ 6:28:38<18:10:27] +[titan] 2025-10-05 05:02:58,035 - root - INFO - step: 10515 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1262 +[titan] 2025-10-05 05:02:58,035 - root - INFO - lr: 4.3135e-05 gnorm: 1.20 [ 6:28:48<18:10:16] +[titan] 2025-10-05 05:03:08,894 - root - INFO - step: 10520 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0523 +[titan] 2025-10-05 05:03:08,894 - root - INFO - lr: 4.3129e-05 gnorm: 1.10 [ 6:28:59<18:10:04] +[titan] 2025-10-05 05:03:19,768 - root - INFO - step: 10525 loss: 2.4870 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 05:03:19,768 - root - INFO - lr: 4.3122e-05 gnorm: 1.18 [ 6:29:10<18:09:52] +[titan] 2025-10-05 05:03:30,631 - root - INFO - step: 10530 loss: 2.3951 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:03:30,631 - root - INFO - lr: 4.3116e-05 gnorm: 1.13 [ 6:29:21<18:09:41] +[titan] 2025-10-05 05:03:41,571 - root - INFO - step: 10535 loss: 2.3677 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:03:41,571 - root - INFO - lr: 4.3110e-05 gnorm: 1.19 [ 6:29:32<18:09:29] +[titan] 2025-10-05 05:03:52,432 - root - INFO - step: 10540 loss: 2.4252 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1411 +[titan] 2025-10-05 05:03:52,432 - root - INFO - lr: 4.3103e-05 gnorm: 1.19 [ 6:29:43<18:09:17] +[titan] 2025-10-05 05:04:03,276 - root - INFO - step: 10545 loss: 2.4280 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1435 +[titan] 2025-10-05 05:04:03,277 - root - INFO - lr: 4.3097e-05 gnorm: 1.16 [ 6:29:54<18:09:06] +[titan] 2025-10-05 05:04:11,963 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:04:14,149 - root - INFO - step: 10550 loss: 2.2936 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0258 +[titan] 2025-10-05 05:04:14,149 - root - INFO - lr: 4.3090e-05 gnorm: 1.14 [ 6:30:04<18:08:54] +[titan] 2025-10-05 05:04:25,007 - root - INFO - step: 10555 loss: 2.3687 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0910 +[titan] 2025-10-05 05:04:25,007 - root - INFO - lr: 4.3084e-05 gnorm: 1.18 [ 6:30:15<18:08:42] +[titan] 2025-10-05 05:04:35,912 - root - INFO - step: 10560 loss: 2.4093 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1275 +[titan] 2025-10-05 05:04:35,912 - root - INFO - lr: 4.3077e-05 gnorm: 1.23 [ 6:30:26<18:08:30] +[titan] 2025-10-05 05:04:46,752 - root - INFO - step: 10565 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1548 +[titan] 2025-10-05 05:04:46,752 - root - INFO - lr: 4.3071e-05 gnorm: 1.10 [ 6:30:37<18:08:19] +[titan] 2025-10-05 05:04:57,630 - root - INFO - step: 10570 loss: 2.3849 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1052 +[titan] 2025-10-05 05:04:57,630 - root - INFO - lr: 4.3065e-05 gnorm: 1.13 [ 6:30:48<18:08:07] +[titan] 2025-10-05 05:05:08,469 - root - INFO - step: 10575 loss: 2.4749 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1842 +[titan] 2025-10-05 05:05:08,469 - root - INFO - lr: 4.3058e-05 gnorm: 1.19 [ 6:30:59<18:07:55] +[titan] 2025-10-05 05:05:19,334 - root - INFO - step: 10580 loss: 2.3851 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1046 +[titan] 2025-10-05 05:05:19,335 - root - INFO - lr: 4.3052e-05 gnorm: 1.12 [ 6:31:10<18:07:43] +[titan] 2025-10-05 05:05:30,220 - root - INFO - step: 10585 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0900 +[titan] 2025-10-05 05:05:30,220 - root - INFO - lr: 4.3045e-05 gnorm: 1.17 [ 6:31:21<18:07:32] +[titan] 2025-10-05 05:05:41,134 - root - INFO - step: 10590 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0866 +[titan] 2025-10-05 05:05:41,134 - root - INFO - lr: 4.3039e-05 gnorm: 1.10 [ 6:31:31<18:07:20] +[titan] 2025-10-05 05:05:51,981 - root - INFO - step: 10595 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 05:05:51,981 - root - INFO - lr: 4.3032e-05 gnorm: 1.13 [ 6:31:42<18:07:08] +[titan] 2025-10-05 05:06:00,680 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:06:02,853 - root - INFO - step: 10600 loss: 2.4272 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1436 +[titan] 2025-10-05 05:06:02,853 - root - INFO - lr: 4.3026e-05 gnorm: 1.13 [ 6:31:53<18:06:57] +[titan] 2025-10-05 05:06:13,702 - root - INFO - step: 10605 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1154 +[titan] 2025-10-05 05:06:13,702 - root - INFO - lr: 4.3019e-05 gnorm: 1.18 [ 6:32:04<18:06:45] +[titan] 2025-10-05 05:06:24,546 - root - INFO - step: 10610 loss: 2.4439 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 05:06:24,547 - root - INFO - lr: 4.3013e-05 gnorm: 1.17 [ 6:32:15<18:06:33] +[titan] 2025-10-05 05:06:35,421 - root - INFO - step: 10615 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0903 +[titan] 2025-10-05 05:06:35,421 - root - INFO - lr: 4.3006e-05 gnorm: 1.10 [ 6:32:26<18:06:21] +[titan] 2025-10-05 05:06:46,307 - root - INFO - step: 10620 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 05:06:46,307 - root - INFO - lr: 4.3000e-05 gnorm: 1.15 [ 6:32:37<18:06:10] +[titan] 2025-10-05 05:06:57,167 - root - INFO - step: 10625 loss: 2.3874 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1077 +[titan] 2025-10-05 05:06:57,168 - root - INFO - lr: 4.2993e-05 gnorm: 1.16 [ 6:32:47<18:05:58] +[titan] 2025-10-05 05:07:08,027 - root - INFO - step: 10630 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0719 +[titan] 2025-10-05 05:07:08,027 - root - INFO - lr: 4.2987e-05 gnorm: 1.17 [ 6:32:58<18:05:46] +[titan] 2025-10-05 05:07:18,912 - root - INFO - step: 10635 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0503 +[titan] 2025-10-05 05:07:18,912 - root - INFO - lr: 4.2981e-05 gnorm: 1.11 [ 6:33:09<18:05:35] +[titan] 2025-10-05 05:07:29,770 - root - INFO - step: 10640 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0909 +[titan] 2025-10-05 05:07:29,770 - root - INFO - lr: 4.2974e-05 gnorm: 1.11 [ 6:33:20<18:05:23] +[titan] 2025-10-05 05:07:40,642 - root - INFO - step: 10645 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1251 +[titan] 2025-10-05 05:07:40,642 - root - INFO - lr: 4.2968e-05 gnorm: 1.15 [ 6:33:31<18:05:11] +[titan] 2025-10-05 05:07:49,320 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:07:51,513 - root - INFO - step: 10650 loss: 2.3800 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 05:07:51,513 - root - INFO - lr: 4.2961e-05 gnorm: 1.13 [ 6:33:42<18:05:00] +[titan] 2025-10-05 05:08:02,386 - root - INFO - step: 10655 loss: 2.2876 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0198 +[titan] 2025-10-05 05:08:02,387 - root - INFO - lr: 4.2955e-05 gnorm: 1.11 [ 6:33:53<18:04:48] +[titan] 2025-10-05 05:08:13,251 - root - INFO - step: 10660 loss: 2.3831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 05:08:13,252 - root - INFO - lr: 4.2948e-05 gnorm: 1.14 [ 6:34:04<18:04:36] +[titan] 2025-10-05 05:08:24,145 - root - INFO - step: 10665 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 05:08:24,145 - root - INFO - lr: 4.2942e-05 gnorm: 1.11 [ 6:34:14<18:04:25] +[titan] 2025-10-05 05:08:34,996 - root - INFO - step: 10670 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 05:08:34,996 - root - INFO - lr: 4.2935e-05 gnorm: 1.10 [ 6:34:25<18:04:13] +[titan] 2025-10-05 05:08:45,876 - root - INFO - step: 10675 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1231 +[titan] 2025-10-05 05:08:45,876 - root - INFO - lr: 4.2929e-05 gnorm: 1.11 [ 6:34:36<18:04:01] +[titan] 2025-10-05 05:08:56,738 - root - INFO - step: 10680 loss: 2.4221 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1374 +[titan] 2025-10-05 05:08:56,738 - root - INFO - lr: 4.2922e-05 gnorm: 1.12 [ 6:34:47<18:03:49] +[titan] 2025-10-05 05:09:07,575 - root - INFO - step: 10685 loss: 2.4893 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1990 +[titan] 2025-10-05 05:09:07,575 - root - INFO - lr: 4.2916e-05 gnorm: 1.14 [ 6:34:58<18:03:38] +[titan] 2025-10-05 05:09:18,438 - root - INFO - step: 10690 loss: 2.3907 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1112 +[titan] 2025-10-05 05:09:18,438 - root - INFO - lr: 4.2909e-05 gnorm: 1.15 [ 6:35:09<18:03:26] +[titan] 2025-10-05 05:09:29,320 - root - INFO - step: 10695 loss: 2.3485 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0733 +[titan] 2025-10-05 05:09:29,320 - root - INFO - lr: 4.2903e-05 gnorm: 1.12 [ 6:35:20<18:03:14] +[titan] 2025-10-05 05:09:38,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:09:40,188 - root - INFO - step: 10700 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0915 +[titan] 2025-10-05 05:09:40,188 - root - INFO - lr: 4.2896e-05 gnorm: 1.13 [ 6:35:31<18:03:03] +[titan] 2025-10-05 05:09:51,053 - root - INFO - step: 10705 loss: 2.4598 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1721 +[titan] 2025-10-05 05:09:51,054 - root - INFO - lr: 4.2890e-05 gnorm: 1.14 [ 6:35:41<18:02:51] +[titan] 2025-10-05 05:10:01,930 - root - INFO - step: 10710 loss: 2.4459 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 05:10:01,930 - root - INFO - lr: 4.2883e-05 gnorm: 1.13 [ 6:35:52<18:02:39] +[titan] 2025-10-05 05:10:12,779 - root - INFO - step: 10715 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:10:12,779 - root - INFO - lr: 4.2877e-05 gnorm: 1.10 [ 6:36:03<18:02:27] +[titan] 2025-10-05 05:10:23,641 - root - INFO - step: 10720 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0671 +[titan] 2025-10-05 05:10:23,641 - root - INFO - lr: 4.2870e-05 gnorm: 1.07 [ 6:36:14<18:02:16] +[titan] 2025-10-05 05:10:34,518 - root - INFO - step: 10725 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 05:10:34,518 - root - INFO - lr: 4.2864e-05 gnorm: 1.07 [ 6:36:25<18:02:04] +[titan] 2025-10-05 05:10:45,426 - root - INFO - step: 10730 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0982 +[titan] 2025-10-05 05:10:45,426 - root - INFO - lr: 4.2857e-05 gnorm: 1.17 [ 6:36:36<18:01:52] +[titan] 2025-10-05 05:10:56,306 - root - INFO - step: 10735 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 05:10:56,306 - root - INFO - lr: 4.2851e-05 gnorm: 1.12 [ 6:36:47<18:01:41] +[titan] 2025-10-05 05:11:07,161 - root - INFO - step: 10740 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:11:07,161 - root - INFO - lr: 4.2844e-05 gnorm: 1.17 [ 6:36:57<18:01:29] +[titan] 2025-10-05 05:11:18,031 - root - INFO - step: 10745 loss: 2.3429 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0692 +[titan] 2025-10-05 05:11:18,031 - root - INFO - lr: 4.2837e-05 gnorm: 1.13 [ 6:37:08<18:01:17] +[titan] 2025-10-05 05:11:26,767 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:11:28,948 - root - INFO - step: 10750 loss: 2.2983 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 05:11:28,948 - root - INFO - lr: 4.2831e-05 gnorm: 1.14 [ 6:37:19<18:01:06] +[titan] 2025-10-05 05:11:33,458 - root - INFO - Dumping profiler traces at step 10752 +[titan] 2025-10-05 05:11:33,497 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:11:40,090 - root - INFO - step: 10755 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 29,410 tflops: 408.02 mfu: 41.26% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 05:11:40,090 - root - INFO - lr: 4.2824e-05 gnorm: 1.14 [ 6:37:30<18:00:55] +[titan] 2025-10-05 05:11:50,993 - root - INFO - step: 10760 loss: 2.3455 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0704 +[titan] 2025-10-05 05:11:50,993 - root - INFO - lr: 4.2818e-05 gnorm: 1.14 [ 6:37:41<18:00:43] +[titan] 2025-10-05 05:12:01,856 - root - INFO - step: 10765 loss: 2.3069 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0370 +[titan] 2025-10-05 05:12:01,857 - root - INFO - lr: 4.2811e-05 gnorm: 1.12 [ 6:37:52<18:00:32] +[titan] 2025-10-05 05:12:12,697 - root - INFO - step: 10770 loss: 2.3339 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 05:12:12,697 - root - INFO - lr: 4.2805e-05 gnorm: 1.09 [ 6:38:03<18:00:20] +[titan] 2025-10-05 05:12:23,573 - root - INFO - step: 10775 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1096 +[titan] 2025-10-05 05:12:23,573 - root - INFO - lr: 4.2798e-05 gnorm: 1.09 [ 6:38:14<18:00:08] +[titan] 2025-10-05 05:12:34,428 - root - INFO - step: 10780 loss: 2.2969 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0279 +[titan] 2025-10-05 05:12:34,428 - root - INFO - lr: 4.2792e-05 gnorm: 1.09 [ 6:38:25<17:59:56] +[titan] 2025-10-05 05:12:45,414 - root - INFO - step: 10785 loss: 2.3471 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 05:12:45,414 - root - INFO - lr: 4.2785e-05 gnorm: 1.13 [ 6:38:36<17:59:45] +[titan] 2025-10-05 05:12:56,296 - root - INFO - step: 10790 loss: 2.3752 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0968 +[titan] 2025-10-05 05:12:56,297 - root - INFO - lr: 4.2779e-05 gnorm: 1.12 [ 6:38:47<17:59:33] +[titan] 2025-10-05 05:13:07,167 - root - INFO - step: 10795 loss: 2.3683 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:13:07,167 - root - INFO - lr: 4.2772e-05 gnorm: 1.15 [ 6:38:57<17:59:22] +[titan] 2025-10-05 05:13:15,828 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:13:18,033 - root - INFO - step: 10800 loss: 2.3892 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1094 +[titan] 2025-10-05 05:13:18,033 - root - INFO - lr: 4.2765e-05 gnorm: 1.12 [ 6:39:08<17:59:10] +[titan] 2025-10-05 05:13:28,909 - root - INFO - step: 10805 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0879 +[titan] 2025-10-05 05:13:28,909 - root - INFO - lr: 4.2759e-05 gnorm: 1.13 [ 6:39:19<17:58:58] +[titan] 2025-10-05 05:13:39,766 - root - INFO - step: 10810 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 05:13:39,766 - root - INFO - lr: 4.2752e-05 gnorm: 1.11 [ 6:39:30<17:58:47] +[titan] 2025-10-05 05:13:50,697 - root - INFO - step: 10815 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1334 +[titan] 2025-10-05 05:13:50,697 - root - INFO - lr: 4.2746e-05 gnorm: 1.13 [ 6:39:41<17:58:35] +[titan] 2025-10-05 05:14:01,553 - root - INFO - step: 10820 loss: 2.3463 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:14:01,554 - root - INFO - lr: 4.2739e-05 gnorm: 1.09 [ 6:39:52<17:58:23] +[titan] 2025-10-05 05:14:12,442 - root - INFO - step: 10825 loss: 2.3705 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0925 +[titan] 2025-10-05 05:14:12,442 - root - INFO - lr: 4.2733e-05 gnorm: 1.17 [ 6:40:03<17:58:12] +[titan] 2025-10-05 05:14:23,285 - root - INFO - step: 10830 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:14:23,285 - root - INFO - lr: 4.2726e-05 gnorm: 1.13 [ 6:40:14<17:58:00] +[titan] 2025-10-05 05:14:34,165 - root - INFO - step: 10835 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 05:14:34,166 - root - INFO - lr: 4.2720e-05 gnorm: 1.16 [ 6:40:24<17:57:48] +[titan] 2025-10-05 05:14:45,051 - root - INFO - step: 10840 loss: 2.3728 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0952 +[titan] 2025-10-05 05:14:45,052 - root - INFO - lr: 4.2713e-05 gnorm: 1.13 [ 6:40:35<17:57:37] +[titan] 2025-10-05 05:14:55,878 - root - INFO - step: 10845 loss: 2.4128 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 05:14:55,878 - root - INFO - lr: 4.2706e-05 gnorm: 1.10 [ 6:40:46<17:57:25] +[titan] 2025-10-05 05:15:04,525 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:15:06,705 - root - INFO - step: 10850 loss: 2.3718 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:15:06,705 - root - INFO - lr: 4.2700e-05 gnorm: 1.12 [ 6:40:57<17:57:13] +[titan] 2025-10-05 05:15:17,575 - root - INFO - step: 10855 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0802 +[titan] 2025-10-05 05:15:17,575 - root - INFO - lr: 4.2693e-05 gnorm: 1.14 [ 6:41:08<17:57:02] +[titan] 2025-10-05 05:15:28,456 - root - INFO - step: 10860 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0820 +[titan] 2025-10-05 05:15:28,456 - root - INFO - lr: 4.2687e-05 gnorm: 1.13 [ 6:41:19<17:56:50] +[titan] 2025-10-05 05:15:39,313 - root - INFO - step: 10865 loss: 2.4256 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 05:15:39,313 - root - INFO - lr: 4.2680e-05 gnorm: 1.10 [ 6:41:30<17:56:38] +[titan] 2025-10-05 05:15:50,205 - root - INFO - step: 10870 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 05:15:50,205 - root - INFO - lr: 4.2673e-05 gnorm: 1.13 [ 6:41:41<17:56:27] +[titan] 2025-10-05 05:16:01,082 - root - INFO - step: 10875 loss: 2.3634 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:16:01,082 - root - INFO - lr: 4.2667e-05 gnorm: 1.15 [ 6:41:51<17:56:15] +[titan] 2025-10-05 05:16:11,946 - root - INFO - step: 10880 loss: 2.3075 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 05:16:11,946 - root - INFO - lr: 4.2660e-05 gnorm: 1.14 [ 6:42:02<17:56:03] +[titan] 2025-10-05 05:16:22,841 - root - INFO - step: 10885 loss: 2.4065 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1258 +[titan] 2025-10-05 05:16:22,841 - root - INFO - lr: 4.2654e-05 gnorm: 1.21 [ 6:42:13<17:55:52] +[titan] 2025-10-05 05:16:33,734 - root - INFO - step: 10890 loss: 2.3635 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0864 +[titan] 2025-10-05 05:16:33,734 - root - INFO - lr: 4.2647e-05 gnorm: 1.10 [ 6:42:24<17:55:40] +[titan] 2025-10-05 05:16:44,609 - root - INFO - step: 10895 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1223 +[titan] 2025-10-05 05:16:44,609 - root - INFO - lr: 4.2640e-05 gnorm: 1.11 [ 6:42:35<17:55:28] +[titan] 2025-10-05 05:16:53,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:16:55,473 - root - INFO - step: 10900 loss: 2.3494 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0759 +[titan] 2025-10-05 05:16:55,473 - root - INFO - lr: 4.2634e-05 gnorm: 1.15 [ 6:42:46<17:55:17] +[titan] 2025-10-05 05:17:06,345 - root - INFO - step: 10905 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 05:17:06,345 - root - INFO - lr: 4.2627e-05 gnorm: 1.13 [ 6:42:57<17:55:05] +[titan] 2025-10-05 05:17:17,231 - root - INFO - step: 10910 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0659 +[titan] 2025-10-05 05:17:17,231 - root - INFO - lr: 4.2621e-05 gnorm: 1.17 [ 6:43:08<17:54:54] +[titan] 2025-10-05 05:17:28,109 - root - INFO - step: 10915 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0894 +[titan] 2025-10-05 05:17:28,110 - root - INFO - lr: 4.2614e-05 gnorm: 1.19 [ 6:43:18<17:54:42] +[titan] 2025-10-05 05:17:39,014 - root - INFO - step: 10920 loss: 2.3277 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:17:39,014 - root - INFO - lr: 4.2607e-05 gnorm: 1.14 [ 6:43:29<17:54:30] +[titan] 2025-10-05 05:17:49,944 - root - INFO - step: 10925 loss: 2.3202 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0487 +[titan] 2025-10-05 05:17:49,944 - root - INFO - lr: 4.2601e-05 gnorm: 1.12 [ 6:43:40<17:54:19] +[titan] 2025-10-05 05:18:00,806 - root - INFO - step: 10930 loss: 2.3343 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0611 +[titan] 2025-10-05 05:18:00,807 - root - INFO - lr: 4.2594e-05 gnorm: 1.12 [ 6:43:51<17:54:07] +[titan] 2025-10-05 05:18:11,668 - root - INFO - step: 10935 loss: 2.4012 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1192 +[titan] 2025-10-05 05:18:11,669 - root - INFO - lr: 4.2588e-05 gnorm: 1.13 [ 6:44:02<17:53:55] +[titan] 2025-10-05 05:18:22,533 - root - INFO - step: 10940 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:18:22,533 - root - INFO - lr: 4.2581e-05 gnorm: 1.10 [ 6:44:13<17:53:44] +[titan] 2025-10-05 05:18:33,393 - root - INFO - step: 10945 loss: 2.3284 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0574 +[titan] 2025-10-05 05:18:33,393 - root - INFO - lr: 4.2574e-05 gnorm: 1.16 [ 6:44:24<17:53:32] +[titan] 2025-10-05 05:18:42,067 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:18:44,246 - root - INFO - step: 10950 loss: 2.3482 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0732 +[titan] 2025-10-05 05:18:44,246 - root - INFO - lr: 4.2568e-05 gnorm: 1.17 [ 6:44:35<17:53:20] +[titan] 2025-10-05 05:18:55,149 - root - INFO - step: 10955 loss: 2.4275 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 05:18:55,149 - root - INFO - lr: 4.2561e-05 gnorm: 1.19 [ 6:44:45<17:53:09] +[titan] 2025-10-05 05:19:06,006 - root - INFO - step: 10960 loss: 2.3559 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 05:19:06,006 - root - INFO - lr: 4.2554e-05 gnorm: 1.17 [ 6:44:56<17:52:57] +[titan] 2025-10-05 05:19:16,844 - root - INFO - step: 10965 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0690 +[titan] 2025-10-05 05:19:16,844 - root - INFO - lr: 4.2548e-05 gnorm: 1.13 [ 6:45:07<17:52:45] +[titan] 2025-10-05 05:19:27,707 - root - INFO - step: 10970 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0665 +[titan] 2025-10-05 05:19:27,707 - root - INFO - lr: 4.2541e-05 gnorm: 1.11 [ 6:45:18<17:52:34] +[titan] 2025-10-05 05:19:38,565 - root - INFO - step: 10975 loss: 2.4017 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1202 +[titan] 2025-10-05 05:19:38,565 - root - INFO - lr: 4.2535e-05 gnorm: 1.13 [ 6:45:29<17:52:22] +[titan] 2025-10-05 05:19:49,430 - root - INFO - step: 10980 loss: 2.3707 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0929 +[titan] 2025-10-05 05:19:49,430 - root - INFO - lr: 4.2528e-05 gnorm: 1.14 [ 6:45:40<17:52:10] +[titan] 2025-10-05 05:20:00,329 - root - INFO - step: 10985 loss: 2.3910 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 05:20:00,329 - root - INFO - lr: 4.2521e-05 gnorm: 1.11 [ 6:45:51<17:51:59] +[titan] 2025-10-05 05:20:11,199 - root - INFO - step: 10990 loss: 2.2943 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 05:20:11,199 - root - INFO - lr: 4.2515e-05 gnorm: 1.15 [ 6:46:01<17:51:47] +[titan] 2025-10-05 05:20:22,060 - root - INFO - step: 10995 loss: 2.4220 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1390 +[titan] 2025-10-05 05:20:22,060 - root - INFO - lr: 4.2508e-05 gnorm: 1.17 [ 6:46:12<17:51:36] +[titan] 2025-10-05 05:20:30,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:20:32,950 - root - INFO - step: 11000 loss: 2.4329 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 05:20:32,950 - root - INFO - lr: 4.2501e-05 gnorm: 1.13 [ 6:46:23<17:51:24] +[titan] 2025-10-05 05:20:43,793 - root - INFO - step: 11005 loss: 2.3674 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0912 +[titan] 2025-10-05 05:20:43,793 - root - INFO - lr: 4.2495e-05 gnorm: 1.13 [ 6:46:34<17:51:12] +[titan] 2025-10-05 05:20:54,676 - root - INFO - step: 11010 loss: 2.3859 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.1074 +[titan] 2025-10-05 05:20:54,677 - root - INFO - lr: 4.2488e-05 gnorm: 1.23 [ 6:46:45<17:51:01] +[titan] 2025-10-05 05:21:05,537 - root - INFO - step: 11015 loss: 2.4219 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 05:21:05,537 - root - INFO - lr: 4.2481e-05 gnorm: 1.14 [ 6:46:56<17:50:49] +[titan] 2025-10-05 05:21:16,444 - root - INFO - step: 11020 loss: 2.3693 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0921 +[titan] 2025-10-05 05:21:16,444 - root - INFO - lr: 4.2475e-05 gnorm: 1.15 [ 6:47:07<17:50:37] +[titan] 2025-10-05 05:21:27,322 - root - INFO - step: 11025 loss: 2.4120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1303 +[titan] 2025-10-05 05:21:27,323 - root - INFO - lr: 4.2468e-05 gnorm: 1.14 [ 6:47:18<17:50:26] +[titan] 2025-10-05 05:21:38,201 - root - INFO - step: 11030 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2721 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:21:38,202 - root - INFO - lr: 4.2461e-05 gnorm: 1.11 [ 6:47:28<17:50:14] +[titan] 2025-10-05 05:21:49,263 - root - INFO - step: 11035 loss: 2.3662 memory: 118.84GiB(85.28%) tps: 29,623 tflops: 410.98 mfu: 41.55% global_avg_ntp_loss: 0.2773 global_avg_mtp_loss: 2.0889 +[titan] 2025-10-05 05:21:49,264 - root - INFO - lr: 4.2455e-05 gnorm: 1.06 [ 6:47:40<17:50:03] +[titan] 2025-10-05 05:22:00,112 - root - INFO - step: 11040 loss: 2.3713 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0938 +[titan] 2025-10-05 05:22:00,112 - root - INFO - lr: 4.2448e-05 gnorm: 1.16 [ 6:47:50<17:49:51] +[titan] 2025-10-05 05:22:10,978 - root - INFO - step: 11045 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0794 +[titan] 2025-10-05 05:22:10,978 - root - INFO - lr: 4.2441e-05 gnorm: 1.12 [ 6:48:01<17:49:40] +[titan] 2025-10-05 05:22:19,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:22:21,866 - root - INFO - step: 11050 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:22:21,866 - root - INFO - lr: 4.2435e-05 gnorm: 1.18 [ 6:48:12<17:49:28] +[titan] 2025-10-05 05:22:32,725 - root - INFO - step: 11055 loss: 2.4619 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1734 +[titan] 2025-10-05 05:22:32,725 - root - INFO - lr: 4.2428e-05 gnorm: 1.17 [ 6:48:23<17:49:16] +[titan] 2025-10-05 05:22:43,603 - root - INFO - step: 11060 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 05:22:43,604 - root - INFO - lr: 4.2421e-05 gnorm: 1.18 [ 6:48:34<17:49:05] +[titan] 2025-10-05 05:22:54,557 - root - INFO - step: 11065 loss: 2.3059 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0363 +[titan] 2025-10-05 05:22:54,558 - root - INFO - lr: 4.2415e-05 gnorm: 1.11 [ 6:48:45<17:48:53] +[titan] 2025-10-05 05:23:05,447 - root - INFO - step: 11070 loss: 2.3833 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1048 +[titan] 2025-10-05 05:23:05,447 - root - INFO - lr: 4.2408e-05 gnorm: 1.15 [ 6:48:56<17:48:42] +[titan] 2025-10-05 05:23:16,319 - root - INFO - step: 11075 loss: 2.3472 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:23:16,319 - root - INFO - lr: 4.2401e-05 gnorm: 1.12 [ 6:49:07<17:48:30] +[titan] 2025-10-05 05:23:27,231 - root - INFO - step: 11080 loss: 2.3159 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0451 +[titan] 2025-10-05 05:23:27,231 - root - INFO - lr: 4.2395e-05 gnorm: 1.15 [ 6:49:18<17:48:19] +[titan] 2025-10-05 05:23:38,120 - root - INFO - step: 11085 loss: 2.3918 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 05:23:38,120 - root - INFO - lr: 4.2388e-05 gnorm: 1.10 [ 6:49:28<17:48:07] +[titan] 2025-10-05 05:23:48,999 - root - INFO - step: 11090 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:23:48,999 - root - INFO - lr: 4.2381e-05 gnorm: 1.12 [ 6:49:39<17:47:55] +[titan] 2025-10-05 05:23:59,936 - root - INFO - step: 11095 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:23:59,936 - root - INFO - lr: 4.2375e-05 gnorm: 1.15 [ 6:49:50<17:47:44] +[titan] 2025-10-05 05:24:08,638 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:24:10,828 - root - INFO - step: 11100 loss: 2.3700 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:24:10,828 - root - INFO - lr: 4.2368e-05 gnorm: 1.16 [ 6:50:01<17:47:32] +[titan] 2025-10-05 05:24:21,716 - root - INFO - step: 11105 loss: 2.3080 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 05:24:21,716 - root - INFO - lr: 4.2361e-05 gnorm: 1.11 [ 6:50:12<17:47:21] +[titan] 2025-10-05 05:24:32,601 - root - INFO - step: 11110 loss: 2.3389 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0656 +[titan] 2025-10-05 05:24:32,602 - root - INFO - lr: 4.2354e-05 gnorm: 1.18 [ 6:50:23<17:47:09] +[titan] 2025-10-05 05:24:43,497 - root - INFO - step: 11115 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:24:43,497 - root - INFO - lr: 4.2348e-05 gnorm: 1.16 [ 6:50:34<17:46:58] +[titan] 2025-10-05 05:24:54,381 - root - INFO - step: 11120 loss: 2.3434 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0676 +[titan] 2025-10-05 05:24:54,382 - root - INFO - lr: 4.2341e-05 gnorm: 1.17 [ 6:50:45<17:46:46] +[titan] 2025-10-05 05:25:05,236 - root - INFO - step: 11125 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:25:05,236 - root - INFO - lr: 4.2334e-05 gnorm: 1.14 [ 6:50:56<17:46:34] +[titan] 2025-10-05 05:25:16,090 - root - INFO - step: 11130 loss: 2.3586 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0834 +[titan] 2025-10-05 05:25:16,091 - root - INFO - lr: 4.2328e-05 gnorm: 1.10 [ 6:51:06<17:46:23] +[titan] 2025-10-05 05:25:26,938 - root - INFO - step: 11135 loss: 2.3923 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 05:25:26,939 - root - INFO - lr: 4.2321e-05 gnorm: 1.15 [ 6:51:17<17:46:11] +[titan] 2025-10-05 05:25:37,783 - root - INFO - step: 11140 loss: 2.3864 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 05:25:37,783 - root - INFO - lr: 4.2314e-05 gnorm: 1.15 [ 6:51:28<17:45:59] +[titan] 2025-10-05 05:25:48,642 - root - INFO - step: 11145 loss: 2.3257 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0532 +[titan] 2025-10-05 05:25:48,642 - root - INFO - lr: 4.2307e-05 gnorm: 1.12 [ 6:51:39<17:45:48] +[titan] 2025-10-05 05:25:57,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:25:59,544 - root - INFO - step: 11150 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0565 +[titan] 2025-10-05 05:25:59,544 - root - INFO - lr: 4.2301e-05 gnorm: 1.12 [ 6:51:50<17:45:36] +[titan] 2025-10-05 05:26:10,397 - root - INFO - step: 11155 loss: 2.3187 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0478 +[titan] 2025-10-05 05:26:10,397 - root - INFO - lr: 4.2294e-05 gnorm: 1.08 [ 6:52:01<17:45:24] +[titan] 2025-10-05 05:26:21,273 - root - INFO - step: 11160 loss: 2.3623 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0869 +[titan] 2025-10-05 05:26:21,273 - root - INFO - lr: 4.2287e-05 gnorm: 1.14 [ 6:52:12<17:45:13] +[titan] 2025-10-05 05:26:32,142 - root - INFO - step: 11165 loss: 2.3541 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:26:32,142 - root - INFO - lr: 4.2281e-05 gnorm: 1.13 [ 6:52:22<17:45:01] +[titan] 2025-10-05 05:26:43,035 - root - INFO - step: 11170 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 05:26:43,035 - root - INFO - lr: 4.2274e-05 gnorm: 1.13 [ 6:52:33<17:44:50] +[titan] 2025-10-05 05:26:53,989 - root - INFO - step: 11175 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.97% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 05:26:53,989 - root - INFO - lr: 4.2267e-05 gnorm: 1.12 [ 6:52:44<17:44:38] +[titan] 2025-10-05 05:27:04,880 - root - INFO - step: 11180 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0666 +[titan] 2025-10-05 05:27:04,880 - root - INFO - lr: 4.2260e-05 gnorm: 1.19 [ 6:52:55<17:44:27] +[titan] 2025-10-05 05:27:15,757 - root - INFO - step: 11185 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0699 +[titan] 2025-10-05 05:27:15,757 - root - INFO - lr: 4.2254e-05 gnorm: 1.15 [ 6:53:06<17:44:15] +[titan] 2025-10-05 05:27:26,622 - root - INFO - step: 11190 loss: 2.3961 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1162 +[titan] 2025-10-05 05:27:26,622 - root - INFO - lr: 4.2247e-05 gnorm: 1.10 [ 6:53:17<17:44:03] +[titan] 2025-10-05 05:27:37,484 - root - INFO - step: 11195 loss: 2.3721 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 05:27:37,484 - root - INFO - lr: 4.2240e-05 gnorm: 1.15 [ 6:53:28<17:43:52] +[titan] 2025-10-05 05:27:46,182 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:27:48,372 - root - INFO - step: 11200 loss: 2.3645 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:27:48,372 - root - INFO - lr: 4.2233e-05 gnorm: 1.17 [ 6:53:39<17:43:40] +[titan] 2025-10-05 05:27:59,307 - root - INFO - step: 11205 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:27:59,307 - root - INFO - lr: 4.2227e-05 gnorm: 1.09 [ 6:53:50<17:43:29] +[titan] 2025-10-05 05:28:10,176 - root - INFO - step: 11210 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0464 +[titan] 2025-10-05 05:28:10,176 - root - INFO - lr: 4.2220e-05 gnorm: 1.15 [ 6:54:00<17:43:17] +[titan] 2025-10-05 05:28:21,076 - root - INFO - step: 11215 loss: 2.3354 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 05:28:21,076 - root - INFO - lr: 4.2213e-05 gnorm: 1.14 [ 6:54:11<17:43:06] +[titan] 2025-10-05 05:28:31,935 - root - INFO - step: 11220 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0592 +[titan] 2025-10-05 05:28:31,935 - root - INFO - lr: 4.2206e-05 gnorm: 1.10 [ 6:54:22<17:42:54] +[titan] 2025-10-05 05:28:42,804 - root - INFO - step: 11225 loss: 2.2877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 05:28:42,805 - root - INFO - lr: 4.2200e-05 gnorm: 1.15 [ 6:54:33<17:42:42] +[titan] 2025-10-05 05:28:53,662 - root - INFO - step: 11230 loss: 2.3995 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 05:28:53,662 - root - INFO - lr: 4.2193e-05 gnorm: 1.17 [ 6:54:44<17:42:31] +[titan] 2025-10-05 05:29:04,634 - root - INFO - step: 11235 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 05:29:04,634 - root - INFO - lr: 4.2186e-05 gnorm: 1.17 [ 6:54:55<17:42:19] +[titan] 2025-10-05 05:29:15,534 - root - INFO - step: 11240 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0842 +[titan] 2025-10-05 05:29:15,535 - root - INFO - lr: 4.2179e-05 gnorm: 1.12 [ 6:55:06<17:42:08] +[titan] 2025-10-05 05:29:26,383 - root - INFO - step: 11245 loss: 2.3641 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0871 +[titan] 2025-10-05 05:29:26,383 - root - INFO - lr: 4.2173e-05 gnorm: 1.08 [ 6:55:17<17:41:56] +[titan] 2025-10-05 05:29:35,041 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:29:37,226 - root - INFO - step: 11250 loss: 2.3893 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 05:29:37,226 - root - INFO - lr: 4.2166e-05 gnorm: 1.11 [ 6:55:28<17:41:44] +[titan] 2025-10-05 05:29:48,080 - root - INFO - step: 11255 loss: 2.3315 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0589 +[titan] 2025-10-05 05:29:48,080 - root - INFO - lr: 4.2159e-05 gnorm: 1.15 [ 6:55:38<17:41:33] +[titan] 2025-10-05 05:29:58,912 - root - INFO - step: 11260 loss: 2.3790 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1000 +[titan] 2025-10-05 05:29:58,912 - root - INFO - lr: 4.2152e-05 gnorm: 1.11 [ 6:55:49<17:41:21] +[titan] 2025-10-05 05:30:07,834 - root - INFO - Dumping profiler traces at step 11264 +[titan] 2025-10-05 05:30:07,870 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:30:10,050 - root - INFO - step: 11265 loss: 2.2811 memory: 118.84GiB(85.28%) tps: 29,420 tflops: 408.16 mfu: 41.27% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 05:30:10,051 - root - INFO - lr: 4.2146e-05 gnorm: 1.10 [ 6:56:00<17:41:10] +[titan] 2025-10-05 05:30:20,892 - root - INFO - step: 11270 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0809 +[titan] 2025-10-05 05:30:20,892 - root - INFO - lr: 4.2139e-05 gnorm: 1.12 [ 6:56:11<17:40:58] +[titan] 2025-10-05 05:30:31,735 - root - INFO - step: 11275 loss: 2.3738 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0947 +[titan] 2025-10-05 05:30:31,735 - root - INFO - lr: 4.2132e-05 gnorm: 1.10 [ 6:56:22<17:40:47] +[titan] 2025-10-05 05:30:42,574 - root - INFO - step: 11280 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 05:30:42,574 - root - INFO - lr: 4.2125e-05 gnorm: 1.10 [ 6:56:33<17:40:35] +[titan] 2025-10-05 05:30:53,426 - root - INFO - step: 11285 loss: 2.3915 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1121 +[titan] 2025-10-05 05:30:53,426 - root - INFO - lr: 4.2118e-05 gnorm: 1.14 [ 6:56:44<17:40:23] +[titan] 2025-10-05 05:31:04,306 - root - INFO - step: 11290 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1164 +[titan] 2025-10-05 05:31:04,307 - root - INFO - lr: 4.2112e-05 gnorm: 1.16 [ 6:56:55<17:40:12] +[titan] 2025-10-05 05:31:15,165 - root - INFO - step: 11295 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 05:31:15,165 - root - INFO - lr: 4.2105e-05 gnorm: 1.16 [ 6:57:05<17:40:00] +[titan] 2025-10-05 05:31:23,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:31:26,028 - root - INFO - step: 11300 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1297 +[titan] 2025-10-05 05:31:26,028 - root - INFO - lr: 4.2098e-05 gnorm: 1.16 [ 6:57:16<17:39:49] +[titan] 2025-10-05 05:31:36,890 - root - INFO - step: 11305 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 05:31:36,890 - root - INFO - lr: 4.2091e-05 gnorm: 1.19 [ 6:57:27<17:39:37] +[titan] 2025-10-05 05:31:47,751 - root - INFO - step: 11310 loss: 2.3629 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0867 +[titan] 2025-10-05 05:31:47,751 - root - INFO - lr: 4.2084e-05 gnorm: 1.13 [ 6:57:38<17:39:25] +[titan] 2025-10-05 05:31:58,646 - root - INFO - step: 11315 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0333 +[titan] 2025-10-05 05:31:58,646 - root - INFO - lr: 4.2078e-05 gnorm: 1.14 [ 6:57:49<17:39:14] +[titan] 2025-10-05 05:32:09,512 - root - INFO - step: 11320 loss: 2.4605 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 05:32:09,512 - root - INFO - lr: 4.2071e-05 gnorm: 1.15 [ 6:58:00<17:39:02] +[titan] 2025-10-05 05:32:20,392 - root - INFO - step: 11325 loss: 2.3568 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0807 +[titan] 2025-10-05 05:32:20,392 - root - INFO - lr: 4.2064e-05 gnorm: 1.12 [ 6:58:11<17:38:51] +[titan] 2025-10-05 05:32:31,290 - root - INFO - step: 11330 loss: 2.4028 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1208 +[titan] 2025-10-05 05:32:31,290 - root - INFO - lr: 4.2057e-05 gnorm: 1.14 [ 6:58:22<17:38:39] +[titan] 2025-10-05 05:32:42,174 - root - INFO - step: 11335 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:32:42,174 - root - INFO - lr: 4.2050e-05 gnorm: 1.16 [ 6:58:32<17:38:27] +[titan] 2025-10-05 05:32:53,063 - root - INFO - step: 11340 loss: 2.3303 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0571 +[titan] 2025-10-05 05:32:53,064 - root - INFO - lr: 4.2044e-05 gnorm: 1.10 [ 6:58:43<17:38:16] +[titan] 2025-10-05 05:33:03,971 - root - INFO - step: 11345 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.1089 +[titan] 2025-10-05 05:33:03,972 - root - INFO - lr: 4.2037e-05 gnorm: 1.10 [ 6:58:54<17:38:04] +[titan] 2025-10-05 05:33:12,662 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:33:14,854 - root - INFO - step: 11350 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:33:14,854 - root - INFO - lr: 4.2030e-05 gnorm: 1.16 [ 6:59:05<17:37:53] +[titan] 2025-10-05 05:33:25,725 - root - INFO - step: 11355 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:33:25,725 - root - INFO - lr: 4.2023e-05 gnorm: 1.14 [ 6:59:16<17:37:41] +[titan] 2025-10-05 05:33:36,578 - root - INFO - step: 11360 loss: 2.2858 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0185 +[titan] 2025-10-05 05:33:36,578 - root - INFO - lr: 4.2016e-05 gnorm: 1.08 [ 6:59:27<17:37:30] +[titan] 2025-10-05 05:33:47,452 - root - INFO - step: 11365 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:33:47,452 - root - INFO - lr: 4.2010e-05 gnorm: 1.07 [ 6:59:38<17:37:18] +[titan] 2025-10-05 05:33:58,347 - root - INFO - step: 11370 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0475 +[titan] 2025-10-05 05:33:58,347 - root - INFO - lr: 4.2003e-05 gnorm: 1.09 [ 6:59:49<17:37:06] +[titan] 2025-10-05 05:34:09,277 - root - INFO - step: 11375 loss: 2.4178 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1340 +[titan] 2025-10-05 05:34:09,277 - root - INFO - lr: 4.1996e-05 gnorm: 1.13 [ 7:00:00<17:36:55] +[titan] 2025-10-05 05:34:20,157 - root - INFO - step: 11380 loss: 2.3349 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:34:20,157 - root - INFO - lr: 4.1989e-05 gnorm: 1.18 [ 7:00:10<17:36:43] +[titan] 2025-10-05 05:34:31,049 - root - INFO - step: 11385 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:34:31,049 - root - INFO - lr: 4.1982e-05 gnorm: 1.10 [ 7:00:21<17:36:32] +[titan] 2025-10-05 05:34:41,929 - root - INFO - step: 11390 loss: 2.4099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1279 +[titan] 2025-10-05 05:34:41,929 - root - INFO - lr: 4.1975e-05 gnorm: 1.10 [ 7:00:32<17:36:20] +[titan] 2025-10-05 05:34:52,785 - root - INFO - step: 11395 loss: 2.3564 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:34:52,785 - root - INFO - lr: 4.1969e-05 gnorm: 1.15 [ 7:00:43<17:36:09] +[titan] 2025-10-05 05:35:01,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:35:03,690 - root - INFO - step: 11400 loss: 2.4143 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1315 +[titan] 2025-10-05 05:35:03,690 - root - INFO - lr: 4.1962e-05 gnorm: 1.14 [ 7:00:54<17:35:57] +[titan] 2025-10-05 05:35:14,535 - root - INFO - step: 11405 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 05:35:14,536 - root - INFO - lr: 4.1955e-05 gnorm: 1.17 [ 7:01:05<17:35:45] +[titan] 2025-10-05 05:35:25,412 - root - INFO - step: 11410 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0477 +[titan] 2025-10-05 05:35:25,412 - root - INFO - lr: 4.1948e-05 gnorm: 1.13 [ 7:01:16<17:35:34] +[titan] 2025-10-05 05:35:36,263 - root - INFO - step: 11415 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0351 +[titan] 2025-10-05 05:35:36,263 - root - INFO - lr: 4.1941e-05 gnorm: 1.12 [ 7:01:27<17:35:22] +[titan] 2025-10-05 05:35:47,122 - root - INFO - step: 11420 loss: 2.3875 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 05:35:47,122 - root - INFO - lr: 4.1934e-05 gnorm: 1.14 [ 7:01:37<17:35:11] +[titan] 2025-10-05 05:35:57,974 - root - INFO - step: 11425 loss: 2.3552 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0795 +[titan] 2025-10-05 05:35:57,974 - root - INFO - lr: 4.1928e-05 gnorm: 1.13 [ 7:01:48<17:34:59] +[titan] 2025-10-05 05:36:08,849 - root - INFO - step: 11430 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 05:36:08,849 - root - INFO - lr: 4.1921e-05 gnorm: 1.17 [ 7:01:59<17:34:47] +[titan] 2025-10-05 05:36:19,695 - root - INFO - step: 11435 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:36:19,695 - root - INFO - lr: 4.1914e-05 gnorm: 1.16 [ 7:02:10<17:34:36] +[titan] 2025-10-05 05:36:30,564 - root - INFO - step: 11440 loss: 2.3449 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0705 +[titan] 2025-10-05 05:36:30,564 - root - INFO - lr: 4.1907e-05 gnorm: 1.08 [ 7:02:21<17:34:24] +[titan] 2025-10-05 05:36:41,427 - root - INFO - step: 11445 loss: 2.4403 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1539 +[titan] 2025-10-05 05:36:41,427 - root - INFO - lr: 4.1900e-05 gnorm: 1.15 [ 7:02:32<17:34:13] +[titan] 2025-10-05 05:36:50,092 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:36:52,270 - root - INFO - step: 11450 loss: 2.3496 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:36:52,270 - root - INFO - lr: 4.1893e-05 gnorm: 1.14 [ 7:02:43<17:34:01] +[titan] 2025-10-05 05:37:03,144 - root - INFO - step: 11455 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 05:37:03,145 - root - INFO - lr: 4.1886e-05 gnorm: 1.13 [ 7:02:53<17:33:49] +[titan] 2025-10-05 05:37:13,972 - root - INFO - step: 11460 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 05:37:13,973 - root - INFO - lr: 4.1880e-05 gnorm: 1.13 [ 7:03:04<17:33:38] +[titan] 2025-10-05 05:37:24,845 - root - INFO - step: 11465 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0606 +[titan] 2025-10-05 05:37:24,845 - root - INFO - lr: 4.1873e-05 gnorm: 1.16 [ 7:03:15<17:33:26] +[titan] 2025-10-05 05:37:35,703 - root - INFO - step: 11470 loss: 2.3317 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0582 +[titan] 2025-10-05 05:37:35,703 - root - INFO - lr: 4.1866e-05 gnorm: 1.10 [ 7:03:26<17:33:14] +[titan] 2025-10-05 05:37:46,570 - root - INFO - step: 11475 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 05:37:46,570 - root - INFO - lr: 4.1859e-05 gnorm: 1.18 [ 7:03:37<17:33:03] +[titan] 2025-10-05 05:37:57,446 - root - INFO - step: 11480 loss: 2.3142 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0433 +[titan] 2025-10-05 05:37:57,447 - root - INFO - lr: 4.1852e-05 gnorm: 1.10 [ 7:03:48<17:32:51] +[titan] 2025-10-05 05:38:08,329 - root - INFO - step: 11485 loss: 2.3042 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0343 +[titan] 2025-10-05 05:38:08,329 - root - INFO - lr: 4.1845e-05 gnorm: 1.16 [ 7:03:59<17:32:40] +[titan] 2025-10-05 05:38:19,195 - root - INFO - step: 11490 loss: 2.4232 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1402 +[titan] 2025-10-05 05:38:19,195 - root - INFO - lr: 4.1838e-05 gnorm: 1.17 [ 7:04:09<17:32:28] +[titan] 2025-10-05 05:38:30,073 - root - INFO - step: 11495 loss: 2.3563 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0808 +[titan] 2025-10-05 05:38:30,073 - root - INFO - lr: 4.1831e-05 gnorm: 1.12 [ 7:04:20<17:32:17] +[titan] 2025-10-05 05:38:38,740 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:38:40,929 - root - INFO - step: 11500 loss: 2.3519 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0760 +[titan] 2025-10-05 05:38:40,929 - root - INFO - lr: 4.1825e-05 gnorm: 1.09 [ 7:04:31<17:32:05] +[titan] 2025-10-05 05:38:51,791 - root - INFO - step: 11505 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 05:38:51,792 - root - INFO - lr: 4.1818e-05 gnorm: 1.18 [ 7:04:42<17:31:53] +[titan] 2025-10-05 05:39:02,689 - root - INFO - step: 11510 loss: 2.3200 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0488 +[titan] 2025-10-05 05:39:02,689 - root - INFO - lr: 4.1811e-05 gnorm: 1.13 [ 7:04:53<17:31:42] +[titan] 2025-10-05 05:39:13,585 - root - INFO - step: 11515 loss: 2.4548 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1671 +[titan] 2025-10-05 05:39:13,586 - root - INFO - lr: 4.1804e-05 gnorm: 1.13 [ 7:05:04<17:31:30] +[titan] 2025-10-05 05:39:24,449 - root - INFO - step: 11520 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0252 +[titan] 2025-10-05 05:39:24,449 - root - INFO - lr: 4.1797e-05 gnorm: 1.15 [ 7:05:15<17:31:19] +[titan] 2025-10-05 05:39:35,295 - root - INFO - step: 11525 loss: 2.2866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 05:39:35,295 - root - INFO - lr: 4.1790e-05 gnorm: 1.07 [ 7:05:26<17:31:07] +[titan] 2025-10-05 05:39:46,183 - root - INFO - step: 11530 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0847 +[titan] 2025-10-05 05:39:46,183 - root - INFO - lr: 4.1783e-05 gnorm: 1.14 [ 7:05:36<17:30:56] +[titan] 2025-10-05 05:39:57,043 - root - INFO - step: 11535 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 05:39:57,043 - root - INFO - lr: 4.1776e-05 gnorm: 1.14 [ 7:05:47<17:30:44] +[titan] 2025-10-05 05:40:07,934 - root - INFO - step: 11540 loss: 2.3581 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0821 +[titan] 2025-10-05 05:40:07,934 - root - INFO - lr: 4.1769e-05 gnorm: 1.10 [ 7:05:58<17:30:32] +[titan] 2025-10-05 05:40:18,821 - root - INFO - step: 11545 loss: 2.4229 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:40:18,821 - root - INFO - lr: 4.1763e-05 gnorm: 1.15 [ 7:06:09<17:30:21] +[titan] 2025-10-05 05:40:27,479 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:40:29,677 - root - INFO - step: 11550 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0971 +[titan] 2025-10-05 05:40:29,677 - root - INFO - lr: 4.1756e-05 gnorm: 1.16 [ 7:06:20<17:30:09] +[titan] 2025-10-05 05:40:40,531 - root - INFO - step: 11555 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0225 +[titan] 2025-10-05 05:40:40,531 - root - INFO - lr: 4.1749e-05 gnorm: 1.07 [ 7:06:31<17:29:58] +[titan] 2025-10-05 05:40:51,372 - root - INFO - step: 11560 loss: 2.3640 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 05:40:51,372 - root - INFO - lr: 4.1742e-05 gnorm: 1.13 [ 7:06:42<17:29:46] +[titan] 2025-10-05 05:41:02,211 - root - INFO - step: 11565 loss: 2.3067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0371 +[titan] 2025-10-05 05:41:02,211 - root - INFO - lr: 4.1735e-05 gnorm: 1.09 [ 7:06:52<17:29:34] +[titan] 2025-10-05 05:41:13,062 - root - INFO - step: 11570 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:41:13,062 - root - INFO - lr: 4.1728e-05 gnorm: 1.08 [ 7:07:03<17:29:23] +[titan] 2025-10-05 05:41:23,914 - root - INFO - step: 11575 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 05:41:23,914 - root - INFO - lr: 4.1721e-05 gnorm: 1.11 [ 7:07:14<17:29:11] +[titan] 2025-10-05 05:41:34,780 - root - INFO - step: 11580 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0896 +[titan] 2025-10-05 05:41:34,780 - root - INFO - lr: 4.1714e-05 gnorm: 1.16 [ 7:07:25<17:29:00] +[titan] 2025-10-05 05:41:45,632 - root - INFO - step: 11585 loss: 2.3149 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0440 +[titan] 2025-10-05 05:41:45,632 - root - INFO - lr: 4.1707e-05 gnorm: 1.12 [ 7:07:36<17:28:48] +[titan] 2025-10-05 05:41:56,483 - root - INFO - step: 11590 loss: 2.2891 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0207 +[titan] 2025-10-05 05:41:56,483 - root - INFO - lr: 4.1700e-05 gnorm: 1.11 [ 7:07:47<17:28:36] +[titan] 2025-10-05 05:42:07,367 - root - INFO - step: 11595 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 05:42:07,367 - root - INFO - lr: 4.1693e-05 gnorm: 1.09 [ 7:07:58<17:28:25] +[titan] 2025-10-05 05:42:16,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:42:18,229 - root - INFO - step: 11600 loss: 2.3596 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0839 +[titan] 2025-10-05 05:42:18,229 - root - INFO - lr: 4.1686e-05 gnorm: 1.13 [ 7:08:08<17:28:13] +[titan] 2025-10-05 05:42:29,091 - root - INFO - step: 11605 loss: 2.3723 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0941 +[titan] 2025-10-05 05:42:29,091 - root - INFO - lr: 4.1680e-05 gnorm: 1.11 [ 7:08:19<17:28:02] +[titan] 2025-10-05 05:42:39,944 - root - INFO - step: 11610 loss: 2.3331 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0601 +[titan] 2025-10-05 05:42:39,944 - root - INFO - lr: 4.1673e-05 gnorm: 1.12 [ 7:08:30<17:27:50] +[titan] 2025-10-05 05:42:50,809 - root - INFO - step: 11615 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0661 +[titan] 2025-10-05 05:42:50,809 - root - INFO - lr: 4.1666e-05 gnorm: 1.14 [ 7:08:41<17:27:38] +[titan] 2025-10-05 05:43:01,660 - root - INFO - step: 11620 loss: 2.3817 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1022 +[titan] 2025-10-05 05:43:01,660 - root - INFO - lr: 4.1659e-05 gnorm: 1.16 [ 7:08:52<17:27:27] +[titan] 2025-10-05 05:43:12,542 - root - INFO - step: 11625 loss: 2.3129 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0422 +[titan] 2025-10-05 05:43:12,542 - root - INFO - lr: 4.1652e-05 gnorm: 1.15 [ 7:09:03<17:27:15] +[titan] 2025-10-05 05:43:23,381 - root - INFO - step: 11630 loss: 2.3032 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 05:43:23,381 - root - INFO - lr: 4.1645e-05 gnorm: 1.17 [ 7:09:14<17:27:04] +[titan] 2025-10-05 05:43:34,203 - root - INFO - step: 11635 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0697 +[titan] 2025-10-05 05:43:34,203 - root - INFO - lr: 4.1638e-05 gnorm: 1.17 [ 7:09:24<17:26:52] +[titan] 2025-10-05 05:43:45,042 - root - INFO - step: 11640 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0979 +[titan] 2025-10-05 05:43:45,042 - root - INFO - lr: 4.1631e-05 gnorm: 1.09 [ 7:09:35<17:26:40] +[titan] 2025-10-05 05:43:55,889 - root - INFO - step: 11645 loss: 2.3366 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 05:43:55,889 - root - INFO - lr: 4.1624e-05 gnorm: 1.12 [ 7:09:46<17:26:29] +[titan] 2025-10-05 05:44:04,549 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:44:06,731 - root - INFO - step: 11650 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0964 +[titan] 2025-10-05 05:44:06,731 - root - INFO - lr: 4.1617e-05 gnorm: 1.13 [ 7:09:57<17:26:17] +[titan] 2025-10-05 05:44:17,623 - root - INFO - step: 11655 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0423 +[titan] 2025-10-05 05:44:17,623 - root - INFO - lr: 4.1610e-05 gnorm: 1.16 [ 7:10:08<17:26:06] +[titan] 2025-10-05 05:44:28,491 - root - INFO - step: 11660 loss: 2.3791 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 05:44:28,491 - root - INFO - lr: 4.1603e-05 gnorm: 1.14 [ 7:10:19<17:25:54] +[titan] 2025-10-05 05:44:39,349 - root - INFO - step: 11665 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0347 +[titan] 2025-10-05 05:44:39,349 - root - INFO - lr: 4.1596e-05 gnorm: 1.14 [ 7:10:30<17:25:42] +[titan] 2025-10-05 05:44:50,212 - root - INFO - step: 11670 loss: 2.2728 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0088 +[titan] 2025-10-05 05:44:50,212 - root - INFO - lr: 4.1589e-05 gnorm: 1.12 [ 7:10:40<17:25:31] +[titan] 2025-10-05 05:45:01,081 - root - INFO - step: 11675 loss: 2.3589 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:45:01,081 - root - INFO - lr: 4.1582e-05 gnorm: 1.11 [ 7:10:51<17:25:19] +[titan] 2025-10-05 05:45:11,965 - root - INFO - step: 11680 loss: 2.3297 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0575 +[titan] 2025-10-05 05:45:11,965 - root - INFO - lr: 4.1575e-05 gnorm: 1.10 [ 7:11:02<17:25:08] +[titan] 2025-10-05 05:45:22,811 - root - INFO - step: 11685 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0746 +[titan] 2025-10-05 05:45:22,811 - root - INFO - lr: 4.1568e-05 gnorm: 1.11 [ 7:11:13<17:24:56] +[titan] 2025-10-05 05:45:33,673 - root - INFO - step: 11690 loss: 2.3753 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2787 global_avg_mtp_loss: 2.0966 +[titan] 2025-10-05 05:45:33,674 - root - INFO - lr: 4.1561e-05 gnorm: 1.10 [ 7:11:24<17:24:44] +[titan] 2025-10-05 05:45:44,536 - root - INFO - step: 11695 loss: 2.3906 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1103 +[titan] 2025-10-05 05:45:44,537 - root - INFO - lr: 4.1554e-05 gnorm: 1.11 [ 7:11:35<17:24:33] +[titan] 2025-10-05 05:45:53,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:45:55,410 - root - INFO - step: 11700 loss: 2.3089 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 05:45:55,410 - root - INFO - lr: 4.1547e-05 gnorm: 1.16 [ 7:11:46<17:24:21] +[titan] 2025-10-05 05:46:06,262 - root - INFO - step: 11705 loss: 2.3134 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 05:46:06,262 - root - INFO - lr: 4.1540e-05 gnorm: 1.11 [ 7:11:56<17:24:10] +[titan] 2025-10-05 05:46:17,130 - root - INFO - step: 11710 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:46:17,130 - root - INFO - lr: 4.1534e-05 gnorm: 1.07 [ 7:12:07<17:23:58] +[titan] 2025-10-05 05:46:27,969 - root - INFO - step: 11715 loss: 2.3153 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0455 +[titan] 2025-10-05 05:46:27,969 - root - INFO - lr: 4.1527e-05 gnorm: 1.10 [ 7:12:18<17:23:47] +[titan] 2025-10-05 05:46:38,818 - root - INFO - step: 11720 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1170 +[titan] 2025-10-05 05:46:38,818 - root - INFO - lr: 4.1520e-05 gnorm: 1.16 [ 7:12:29<17:23:35] +[titan] 2025-10-05 05:46:49,675 - root - INFO - step: 11725 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0144 +[titan] 2025-10-05 05:46:49,675 - root - INFO - lr: 4.1513e-05 gnorm: 1.16 [ 7:12:40<17:23:23] +[titan] 2025-10-05 05:47:00,544 - root - INFO - step: 11730 loss: 2.4145 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1312 +[titan] 2025-10-05 05:47:00,544 - root - INFO - lr: 4.1506e-05 gnorm: 1.10 [ 7:12:51<17:23:12] +[titan] 2025-10-05 05:47:11,419 - root - INFO - step: 11735 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0736 +[titan] 2025-10-05 05:47:11,419 - root - INFO - lr: 4.1499e-05 gnorm: 1.08 [ 7:13:02<17:23:00] +[titan] 2025-10-05 05:47:22,265 - root - INFO - step: 11740 loss: 2.3154 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 05:47:22,265 - root - INFO - lr: 4.1492e-05 gnorm: 1.11 [ 7:13:12<17:22:49] +[titan] 2025-10-05 05:47:33,131 - root - INFO - step: 11745 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 05:47:33,131 - root - INFO - lr: 4.1485e-05 gnorm: 1.13 [ 7:13:23<17:22:37] +[titan] 2025-10-05 05:47:41,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:47:43,985 - root - INFO - step: 11750 loss: 2.3279 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0557 +[titan] 2025-10-05 05:47:43,985 - root - INFO - lr: 4.1478e-05 gnorm: 1.13 [ 7:13:34<17:22:26] +[titan] 2025-10-05 05:47:54,868 - root - INFO - step: 11755 loss: 2.3253 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0524 +[titan] 2025-10-05 05:47:54,869 - root - INFO - lr: 4.1471e-05 gnorm: 1.15 [ 7:13:45<17:22:14] +[titan] 2025-10-05 05:48:05,705 - root - INFO - step: 11760 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 05:48:05,705 - root - INFO - lr: 4.1464e-05 gnorm: 1.11 [ 7:13:56<17:22:02] +[titan] 2025-10-05 05:48:16,588 - root - INFO - step: 11765 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0673 +[titan] 2025-10-05 05:48:16,588 - root - INFO - lr: 4.1457e-05 gnorm: 1.08 [ 7:14:07<17:21:51] +[titan] 2025-10-05 05:48:27,456 - root - INFO - step: 11770 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:48:27,456 - root - INFO - lr: 4.1450e-05 gnorm: 1.13 [ 7:14:18<17:21:39] +[titan] 2025-10-05 05:48:38,410 - root - INFO - step: 11775 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 29,914 tflops: 415.01 mfu: 41.96% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 05:48:38,411 - root - INFO - lr: 4.1443e-05 gnorm: 1.12 [ 7:14:29<17:21:28] +[titan] 2025-10-05 05:48:40,781 - root - INFO - Dumping profiler traces at step 11776 +[titan] 2025-10-05 05:48:40,818 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:48:49,532 - root - INFO - step: 11780 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0553 +[titan] 2025-10-05 05:48:49,532 - root - INFO - lr: 4.1436e-05 gnorm: 1.10 [ 7:14:40<17:21:17] +[titan] 2025-10-05 05:49:00,425 - root - INFO - step: 11785 loss: 2.3316 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0583 +[titan] 2025-10-05 05:49:00,425 - root - INFO - lr: 4.1429e-05 gnorm: 1.11 [ 7:14:51<17:21:05] +[titan] 2025-10-05 05:49:11,301 - root - INFO - step: 11790 loss: 2.2637 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 05:49:11,301 - root - INFO - lr: 4.1422e-05 gnorm: 1.08 [ 7:15:02<17:20:54] +[titan] 2025-10-05 05:49:22,173 - root - INFO - step: 11795 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1151 +[titan] 2025-10-05 05:49:22,173 - root - INFO - lr: 4.1415e-05 gnorm: 1.13 [ 7:15:12<17:20:42] +[titan] 2025-10-05 05:49:30,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:49:33,049 - root - INFO - step: 11800 loss: 2.3168 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0453 +[titan] 2025-10-05 05:49:33,050 - root - INFO - lr: 4.1408e-05 gnorm: 1.14 [ 7:15:23<17:20:31] +[titan] 2025-10-05 05:49:43,908 - root - INFO - step: 11805 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:49:43,909 - root - INFO - lr: 4.1401e-05 gnorm: 1.11 [ 7:15:34<17:20:19] +[titan] 2025-10-05 05:49:54,777 - root - INFO - step: 11810 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 05:49:54,777 - root - INFO - lr: 4.1394e-05 gnorm: 1.14 [ 7:15:45<17:20:08] +[titan] 2025-10-05 05:50:05,641 - root - INFO - step: 11815 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:50:05,641 - root - INFO - lr: 4.1387e-05 gnorm: 1.10 [ 7:15:56<17:19:56] +[titan] 2025-10-05 05:50:16,549 - root - INFO - step: 11820 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 05:50:16,549 - root - INFO - lr: 4.1379e-05 gnorm: 1.14 [ 7:16:07<17:19:45] +[titan] 2025-10-05 05:50:27,410 - root - INFO - step: 11825 loss: 2.3545 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:50:27,410 - root - INFO - lr: 4.1372e-05 gnorm: 1.11 [ 7:16:18<17:19:33] +[titan] 2025-10-05 05:50:38,296 - root - INFO - step: 11830 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 05:50:38,297 - root - INFO - lr: 4.1365e-05 gnorm: 1.17 [ 7:16:29<17:19:22] +[titan] 2025-10-05 05:50:49,183 - root - INFO - step: 11835 loss: 2.4085 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1264 +[titan] 2025-10-05 05:50:49,183 - root - INFO - lr: 4.1358e-05 gnorm: 1.12 [ 7:16:39<17:19:10] +[titan] 2025-10-05 05:51:00,086 - root - INFO - step: 11840 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 05:51:00,086 - root - INFO - lr: 4.1351e-05 gnorm: 1.11 [ 7:16:50<17:18:59] +[titan] 2025-10-05 05:51:10,957 - root - INFO - step: 11845 loss: 2.3242 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0501 +[titan] 2025-10-05 05:51:10,957 - root - INFO - lr: 4.1344e-05 gnorm: 1.08 [ 7:17:01<17:18:47] +[titan] 2025-10-05 05:51:19,706 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:51:21,898 - root - INFO - step: 11850 loss: 2.3518 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0767 +[titan] 2025-10-05 05:51:21,898 - root - INFO - lr: 4.1337e-05 gnorm: 1.12 [ 7:17:12<17:18:36] +[titan] 2025-10-05 05:51:32,790 - root - INFO - step: 11855 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 05:51:32,790 - root - INFO - lr: 4.1330e-05 gnorm: 1.12 [ 7:17:23<17:18:24] +[titan] 2025-10-05 05:51:43,664 - root - INFO - step: 11860 loss: 2.3095 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 05:51:43,664 - root - INFO - lr: 4.1323e-05 gnorm: 1.18 [ 7:17:34<17:18:13] +[titan] 2025-10-05 05:51:54,563 - root - INFO - step: 11865 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0318 +[titan] 2025-10-05 05:51:54,563 - root - INFO - lr: 4.1316e-05 gnorm: 1.09 [ 7:17:45<17:18:01] +[titan] 2025-10-05 05:52:05,455 - root - INFO - step: 11870 loss: 2.3710 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0936 +[titan] 2025-10-05 05:52:05,455 - root - INFO - lr: 4.1309e-05 gnorm: 1.11 [ 7:17:56<17:17:50] +[titan] 2025-10-05 05:52:16,379 - root - INFO - step: 11875 loss: 2.3659 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0895 +[titan] 2025-10-05 05:52:16,379 - root - INFO - lr: 4.1302e-05 gnorm: 1.15 [ 7:18:07<17:17:38] +[titan] 2025-10-05 05:52:27,265 - root - INFO - step: 11880 loss: 2.4011 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1135 +[titan] 2025-10-05 05:52:27,265 - root - INFO - lr: 4.1295e-05 gnorm: 3.35 [ 7:18:17<17:17:27] +[titan] 2025-10-05 05:52:38,136 - root - INFO - step: 11885 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0521 +[titan] 2025-10-05 05:52:38,137 - root - INFO - lr: 4.1288e-05 gnorm: 1.14 [ 7:18:28<17:17:15] +[titan] 2025-10-05 05:52:49,001 - root - INFO - step: 11890 loss: 2.3415 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0669 +[titan] 2025-10-05 05:52:49,002 - root - INFO - lr: 4.1281e-05 gnorm: 1.11 [ 7:18:39<17:17:04] +[titan] 2025-10-05 05:52:59,880 - root - INFO - step: 11895 loss: 2.3264 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2720 global_avg_mtp_loss: 2.0545 +[titan] 2025-10-05 05:52:59,880 - root - INFO - lr: 4.1274e-05 gnorm: 1.12 [ 7:18:50<17:16:52] +[titan] 2025-10-05 05:53:08,562 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:53:10,763 - root - INFO - step: 11900 loss: 2.2583 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9932 +[titan] 2025-10-05 05:53:10,763 - root - INFO - lr: 4.1267e-05 gnorm: 1.12 [ 7:19:01<17:16:41] +[titan] 2025-10-05 05:53:21,692 - root - INFO - step: 11905 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 05:53:21,692 - root - INFO - lr: 4.1260e-05 gnorm: 1.14 [ 7:19:12<17:16:29] +[titan] 2025-10-05 05:53:32,550 - root - INFO - step: 11910 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:53:32,550 - root - INFO - lr: 4.1253e-05 gnorm: 1.07 [ 7:19:23<17:16:18] +[titan] 2025-10-05 05:53:43,445 - root - INFO - step: 11915 loss: 2.3927 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:53:43,445 - root - INFO - lr: 4.1246e-05 gnorm: 1.12 [ 7:19:34<17:16:06] +[titan] 2025-10-05 05:53:54,326 - root - INFO - step: 11920 loss: 2.4016 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:53:54,326 - root - INFO - lr: 4.1239e-05 gnorm: 1.11 [ 7:19:45<17:15:55] +[titan] 2025-10-05 05:54:05,201 - root - INFO - step: 11925 loss: 2.3896 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 05:54:05,201 - root - INFO - lr: 4.1232e-05 gnorm: 1.10 [ 7:19:55<17:15:43] +[titan] 2025-10-05 05:54:16,091 - root - INFO - step: 11930 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:54:16,091 - root - INFO - lr: 4.1224e-05 gnorm: 1.18 [ 7:20:06<17:15:32] +[titan] 2025-10-05 05:54:27,039 - root - INFO - step: 11935 loss: 2.3186 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 05:54:27,039 - root - INFO - lr: 4.1217e-05 gnorm: 1.13 [ 7:20:17<17:15:20] +[titan] 2025-10-05 05:54:37,903 - root - INFO - step: 11940 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1239 +[titan] 2025-10-05 05:54:37,903 - root - INFO - lr: 4.1210e-05 gnorm: 1.14 [ 7:20:28<17:15:09] +[titan] 2025-10-05 05:54:48,775 - root - INFO - step: 11945 loss: 2.3374 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:54:48,776 - root - INFO - lr: 4.1203e-05 gnorm: 1.16 [ 7:20:39<17:14:57] +[titan] 2025-10-05 05:54:57,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:54:59,643 - root - INFO - step: 11950 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0633 +[titan] 2025-10-05 05:54:59,643 - root - INFO - lr: 4.1196e-05 gnorm: 1.10 [ 7:20:50<17:14:46] +[titan] 2025-10-05 05:55:10,528 - root - INFO - step: 11955 loss: 2.3258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 05:55:10,528 - root - INFO - lr: 4.1189e-05 gnorm: 1.08 [ 7:21:01<17:14:34] +[titan] 2025-10-05 05:55:21,456 - root - INFO - step: 11960 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:55:21,456 - root - INFO - lr: 4.1182e-05 gnorm: 1.12 [ 7:21:12<17:14:23] +[titan] 2025-10-05 05:55:32,338 - root - INFO - step: 11965 loss: 2.3022 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 05:55:32,338 - root - INFO - lr: 4.1175e-05 gnorm: 1.06 [ 7:21:23<17:14:11] +[titan] 2025-10-05 05:55:43,237 - root - INFO - step: 11970 loss: 2.3819 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 05:55:43,238 - root - INFO - lr: 4.1168e-05 gnorm: 1.11 [ 7:21:33<17:14:00] +[titan] 2025-10-05 05:55:54,122 - root - INFO - step: 11975 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0302 +[titan] 2025-10-05 05:55:54,122 - root - INFO - lr: 4.1161e-05 gnorm: 1.07 [ 7:21:44<17:13:49] +[titan] 2025-10-05 05:56:04,989 - root - INFO - step: 11980 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:56:04,989 - root - INFO - lr: 4.1154e-05 gnorm: 1.08 [ 7:21:55<17:13:37] +[titan] 2025-10-05 05:56:15,876 - root - INFO - step: 11985 loss: 2.3487 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:56:15,876 - root - INFO - lr: 4.1147e-05 gnorm: 1.11 [ 7:22:06<17:13:26] +[titan] 2025-10-05 05:56:26,799 - root - INFO - step: 11990 loss: 2.3624 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 05:56:26,799 - root - INFO - lr: 4.1139e-05 gnorm: 1.07 [ 7:22:17<17:13:14] +[titan] 2025-10-05 05:56:37,664 - root - INFO - step: 11995 loss: 2.3352 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:56:37,664 - root - INFO - lr: 4.1132e-05 gnorm: 1.15 [ 7:22:28<17:13:03] +[titan] 2025-10-05 05:56:46,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:56:48,559 - root - INFO - step: 12000 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0434 +[titan] 2025-10-05 05:56:48,559 - root - INFO - lr: 4.1125e-05 gnorm: 1.15 [ 7:22:39<17:12:51] +[titan] 2025-10-05 05:56:59,430 - root - INFO - step: 12005 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0683 +[titan] 2025-10-05 05:56:59,430 - root - INFO - lr: 4.1118e-05 gnorm: 1.12 [ 7:22:50<17:12:40] +[titan] 2025-10-05 05:57:10,327 - root - INFO - step: 12010 loss: 2.3294 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0572 +[titan] 2025-10-05 05:57:10,327 - root - INFO - lr: 4.1111e-05 gnorm: 1.11 [ 7:23:01<17:12:28] +[titan] 2025-10-05 05:57:21,254 - root - INFO - step: 12015 loss: 2.3689 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:57:21,254 - root - INFO - lr: 4.1104e-05 gnorm: 1.08 [ 7:23:11<17:12:17] +[titan] 2025-10-05 05:57:32,120 - root - INFO - step: 12020 loss: 2.3542 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0781 +[titan] 2025-10-05 05:57:32,120 - root - INFO - lr: 4.1097e-05 gnorm: 1.08 [ 7:23:22<17:12:05] +[titan] 2025-10-05 05:57:43,004 - root - INFO - step: 12025 loss: 2.3233 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:57:43,004 - root - INFO - lr: 4.1090e-05 gnorm: 1.13 [ 7:23:33<17:11:54] +[titan] 2025-10-05 05:57:53,894 - root - INFO - step: 12030 loss: 2.3526 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:57:53,895 - root - INFO - lr: 4.1083e-05 gnorm: 1.09 [ 7:23:44<17:11:42] +[titan] 2025-10-05 05:58:04,763 - root - INFO - step: 12035 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 05:58:04,764 - root - INFO - lr: 4.1075e-05 gnorm: 1.11 [ 7:23:55<17:11:31] +[titan] 2025-10-05 05:58:15,655 - root - INFO - step: 12040 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 05:58:15,655 - root - INFO - lr: 4.1068e-05 gnorm: 1.13 [ 7:24:06<17:11:19] +[titan] 2025-10-05 05:58:26,581 - root - INFO - step: 12045 loss: 2.2551 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9914 +[titan] 2025-10-05 05:58:26,582 - root - INFO - lr: 4.1061e-05 gnorm: 1.10 [ 7:24:17<17:11:08] +[titan] 2025-10-05 05:58:35,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:58:37,445 - root - INFO - step: 12050 loss: 2.2791 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 05:58:37,445 - root - INFO - lr: 4.1054e-05 gnorm: 1.12 [ 7:24:28<17:10:56] +[titan] 2025-10-05 05:58:48,333 - root - INFO - step: 12055 loss: 2.3027 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0322 +[titan] 2025-10-05 05:58:48,334 - root - INFO - lr: 4.1047e-05 gnorm: 1.09 [ 7:24:39<17:10:45] +[titan] 2025-10-05 05:58:59,215 - root - INFO - step: 12060 loss: 2.3599 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:58:59,215 - root - INFO - lr: 4.1040e-05 gnorm: 1.13 [ 7:24:49<17:10:33] +[titan] 2025-10-05 05:59:10,066 - root - INFO - step: 12065 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 05:59:10,066 - root - INFO - lr: 4.1033e-05 gnorm: 1.14 [ 7:25:00<17:10:22] +[titan] 2025-10-05 05:59:20,922 - root - INFO - step: 12070 loss: 2.3313 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:59:20,923 - root - INFO - lr: 4.1026e-05 gnorm: 1.12 [ 7:25:11<17:10:10] +[titan] 2025-10-05 05:59:31,844 - root - INFO - step: 12075 loss: 2.4140 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 05:59:31,844 - root - INFO - lr: 4.1018e-05 gnorm: 1.14 [ 7:25:22<17:09:59] +[titan] 2025-10-05 05:59:42,686 - root - INFO - step: 12080 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0618 +[titan] 2025-10-05 05:59:42,686 - root - INFO - lr: 4.1011e-05 gnorm: 1.13 [ 7:25:33<17:09:47] +[titan] 2025-10-05 05:59:53,539 - root - INFO - step: 12085 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0449 +[titan] 2025-10-05 05:59:53,540 - root - INFO - lr: 4.1004e-05 gnorm: 1.11 [ 7:25:44<17:09:36] +[titan] 2025-10-05 06:00:04,392 - root - INFO - step: 12090 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0005 +[titan] 2025-10-05 06:00:04,392 - root - INFO - lr: 4.0997e-05 gnorm: 1.08 [ 7:25:55<17:09:24] +[titan] 2025-10-05 06:00:15,254 - root - INFO - step: 12095 loss: 2.3576 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0822 +[titan] 2025-10-05 06:00:15,254 - root - INFO - lr: 4.0990e-05 gnorm: 1.07 [ 7:26:05<17:09:13] +[titan] 2025-10-05 06:00:23,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:00:26,169 - root - INFO - step: 12100 loss: 2.3299 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0570 +[titan] 2025-10-05 06:00:26,169 - root - INFO - lr: 4.0983e-05 gnorm: 1.12 [ 7:26:16<17:09:01] +[titan] 2025-10-05 06:00:37,019 - root - INFO - step: 12105 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 06:00:37,019 - root - INFO - lr: 4.0976e-05 gnorm: 1.10 [ 7:26:27<17:08:50] +[titan] 2025-10-05 06:00:47,875 - root - INFO - step: 12110 loss: 2.3109 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0407 +[titan] 2025-10-05 06:00:47,875 - root - INFO - lr: 4.0968e-05 gnorm: 1.14 [ 7:26:38<17:08:38] +[titan] 2025-10-05 06:00:58,710 - root - INFO - step: 12115 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0231 +[titan] 2025-10-05 06:00:58,710 - root - INFO - lr: 4.0961e-05 gnorm: 1.09 [ 7:26:49<17:08:26] +[titan] 2025-10-05 06:01:09,539 - root - INFO - step: 12120 loss: 2.3227 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0502 +[titan] 2025-10-05 06:01:09,539 - root - INFO - lr: 4.0954e-05 gnorm: 1.11 [ 7:27:00<17:08:15] +[titan] 2025-10-05 06:01:20,374 - root - INFO - step: 12125 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 06:01:20,374 - root - INFO - lr: 4.0947e-05 gnorm: 1.07 [ 7:27:11<17:08:03] +[titan] 2025-10-05 06:01:31,270 - root - INFO - step: 12130 loss: 2.2677 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0025 +[titan] 2025-10-05 06:01:31,270 - root - INFO - lr: 4.0940e-05 gnorm: 1.31 [ 7:27:21<17:07:52] +[titan] 2025-10-05 06:01:42,106 - root - INFO - step: 12135 loss: 2.2796 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:01:42,106 - root - INFO - lr: 4.0933e-05 gnorm: 1.13 [ 7:27:32<17:07:40] +[titan] 2025-10-05 06:01:52,949 - root - INFO - step: 12140 loss: 2.3222 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:01:52,949 - root - INFO - lr: 4.0926e-05 gnorm: 1.09 [ 7:27:43<17:07:29] +[titan] 2025-10-05 06:02:03,787 - root - INFO - step: 12145 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:02:03,787 - root - INFO - lr: 4.0918e-05 gnorm: 1.12 [ 7:27:54<17:07:17] +[titan] 2025-10-05 06:02:12,468 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:02:14,649 - root - INFO - step: 12150 loss: 2.3633 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2765 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 06:02:14,649 - root - INFO - lr: 4.0911e-05 gnorm: 1.10 [ 7:28:05<17:07:06] +[titan] 2025-10-05 06:02:25,544 - root - INFO - step: 12155 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 06:02:25,544 - root - INFO - lr: 4.0904e-05 gnorm: 1.08 [ 7:28:16<17:06:54] +[titan] 2025-10-05 06:02:36,407 - root - INFO - step: 12160 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:02:36,407 - root - INFO - lr: 4.0897e-05 gnorm: 1.12 [ 7:28:27<17:06:43] +[titan] 2025-10-05 06:02:47,265 - root - INFO - step: 12165 loss: 2.3191 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:02:47,265 - root - INFO - lr: 4.0890e-05 gnorm: 1.13 [ 7:28:37<17:06:31] +[titan] 2025-10-05 06:02:58,124 - root - INFO - step: 12170 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0270 +[titan] 2025-10-05 06:02:58,124 - root - INFO - lr: 4.0883e-05 gnorm: 1.13 [ 7:28:48<17:06:20] +[titan] 2025-10-05 06:03:08,999 - root - INFO - step: 12175 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 06:03:08,999 - root - INFO - lr: 4.0875e-05 gnorm: 1.10 [ 7:28:59<17:06:08] +[titan] 2025-10-05 06:03:19,864 - root - INFO - step: 12180 loss: 2.3860 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1055 +[titan] 2025-10-05 06:03:19,864 - root - INFO - lr: 4.0868e-05 gnorm: 1.08 [ 7:29:10<17:05:57] +[titan] 2025-10-05 06:03:30,733 - root - INFO - step: 12185 loss: 2.2786 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 06:03:30,733 - root - INFO - lr: 4.0861e-05 gnorm: 1.09 [ 7:29:21<17:05:45] +[titan] 2025-10-05 06:03:41,601 - root - INFO - step: 12190 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 06:03:41,601 - root - INFO - lr: 4.0854e-05 gnorm: 1.13 [ 7:29:32<17:05:33] +[titan] 2025-10-05 06:03:52,503 - root - INFO - step: 12195 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9906 +[titan] 2025-10-05 06:03:52,503 - root - INFO - lr: 4.0847e-05 gnorm: 1.13 [ 7:29:43<17:05:22] +[titan] 2025-10-05 06:04:01,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:04:03,365 - root - INFO - step: 12200 loss: 2.3747 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0972 +[titan] 2025-10-05 06:04:03,365 - root - INFO - lr: 4.0839e-05 gnorm: 1.12 [ 7:29:54<17:05:11] +[titan] 2025-10-05 06:04:14,208 - root - INFO - step: 12205 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0049 +[titan] 2025-10-05 06:04:14,208 - root - INFO - lr: 4.0832e-05 gnorm: 1.10 [ 7:30:04<17:04:59] +[titan] 2025-10-05 06:04:25,065 - root - INFO - step: 12210 loss: 2.3060 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:04:25,065 - root - INFO - lr: 4.0825e-05 gnorm: 1.06 [ 7:30:15<17:04:47] +[titan] 2025-10-05 06:04:35,929 - root - INFO - step: 12215 loss: 2.2793 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 06:04:35,929 - root - INFO - lr: 4.0818e-05 gnorm: 1.04 [ 7:30:26<17:04:36] +[titan] 2025-10-05 06:04:46,809 - root - INFO - step: 12220 loss: 2.3271 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 06:04:46,809 - root - INFO - lr: 4.0811e-05 gnorm: 1.14 [ 7:30:37<17:04:24] +[titan] 2025-10-05 06:04:57,691 - root - INFO - step: 12225 loss: 2.2624 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9984 +[titan] 2025-10-05 06:04:57,691 - root - INFO - lr: 4.0803e-05 gnorm: 1.17 [ 7:30:48<17:04:13] +[titan] 2025-10-05 06:05:08,549 - root - INFO - step: 12230 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 06:05:08,549 - root - INFO - lr: 4.0796e-05 gnorm: 1.09 [ 7:30:59<17:04:01] +[titan] 2025-10-05 06:05:19,441 - root - INFO - step: 12235 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0739 +[titan] 2025-10-05 06:05:19,441 - root - INFO - lr: 4.0789e-05 gnorm: 1.10 [ 7:31:10<17:03:50] +[titan] 2025-10-05 06:05:30,318 - root - INFO - step: 12240 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 06:05:30,318 - root - INFO - lr: 4.0782e-05 gnorm: 1.09 [ 7:31:20<17:03:38] +[titan] 2025-10-05 06:05:41,191 - root - INFO - step: 12245 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 06:05:41,191 - root - INFO - lr: 4.0775e-05 gnorm: 1.08 [ 7:31:31<17:03:27] +[titan] 2025-10-05 06:05:49,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:05:52,078 - root - INFO - step: 12250 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:05:52,079 - root - INFO - lr: 4.0767e-05 gnorm: 1.17 [ 7:31:42<17:03:16] +[titan] 2025-10-05 06:06:02,966 - root - INFO - step: 12255 loss: 2.3830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 06:06:02,966 - root - INFO - lr: 4.0760e-05 gnorm: 1.12 [ 7:31:53<17:03:04] +[titan] 2025-10-05 06:06:13,829 - root - INFO - step: 12260 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9702 +[titan] 2025-10-05 06:06:13,829 - root - INFO - lr: 4.0753e-05 gnorm: 1.10 [ 7:32:04<17:02:53] +[titan] 2025-10-05 06:06:24,716 - root - INFO - step: 12265 loss: 2.3897 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1102 +[titan] 2025-10-05 06:06:24,716 - root - INFO - lr: 4.0746e-05 gnorm: 1.13 [ 7:32:15<17:02:41] +[titan] 2025-10-05 06:06:35,605 - root - INFO - step: 12270 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0629 +[titan] 2025-10-05 06:06:35,605 - root - INFO - lr: 4.0739e-05 gnorm: 1.15 [ 7:32:26<17:02:30] +[titan] 2025-10-05 06:06:46,502 - root - INFO - step: 12275 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 06:06:46,502 - root - INFO - lr: 4.0731e-05 gnorm: 1.17 [ 7:32:37<17:02:18] +[titan] 2025-10-05 06:06:57,383 - root - INFO - step: 12280 loss: 2.3419 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0674 +[titan] 2025-10-05 06:06:57,383 - root - INFO - lr: 4.0724e-05 gnorm: 1.16 [ 7:32:48<17:02:07] +[titan] 2025-10-05 06:07:08,352 - root - INFO - step: 12285 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.44 mfu: 41.91% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 06:07:08,353 - root - INFO - lr: 4.0717e-05 gnorm: 1.14 [ 7:32:59<17:01:55] +[titan] 2025-10-05 06:07:15,075 - root - INFO - Dumping profiler traces at step 12288 +[titan] 2025-10-05 06:07:15,114 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:07:19,500 - root - INFO - step: 12290 loss: 2.3565 memory: 118.84GiB(85.28%) tps: 29,395 tflops: 407.81 mfu: 41.23% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:07:19,500 - root - INFO - lr: 4.0710e-05 gnorm: 1.08 [ 7:33:10<17:01:45] +[titan] 2025-10-05 06:07:30,465 - root - INFO - step: 12295 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 29,886 tflops: 414.62 mfu: 41.92% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 06:07:30,465 - root - INFO - lr: 4.0702e-05 gnorm: 1.07 [ 7:33:21<17:01:33] +[titan] 2025-10-05 06:07:39,154 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:07:41,347 - root - INFO - step: 12300 loss: 2.3244 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:07:41,347 - root - INFO - lr: 4.0695e-05 gnorm: 1.16 [ 7:33:32<17:01:22] +[titan] 2025-10-05 06:07:52,196 - root - INFO - step: 12305 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 06:07:52,196 - root - INFO - lr: 4.0688e-05 gnorm: 1.09 [ 7:33:42<17:01:10] +[titan] 2025-10-05 06:08:03,050 - root - INFO - step: 12310 loss: 2.3555 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:08:03,050 - root - INFO - lr: 4.0681e-05 gnorm: 1.12 [ 7:33:53<17:00:59] +[titan] 2025-10-05 06:08:13,913 - root - INFO - step: 12315 loss: 2.3066 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0365 +[titan] 2025-10-05 06:08:13,914 - root - INFO - lr: 4.0674e-05 gnorm: 1.08 [ 7:34:04<17:00:47] +[titan] 2025-10-05 06:08:24,841 - root - INFO - step: 12320 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0745 +[titan] 2025-10-05 06:08:24,841 - root - INFO - lr: 4.0666e-05 gnorm: 1.11 [ 7:34:15<17:00:36] +[titan] 2025-10-05 06:08:35,938 - root - INFO - step: 12325 loss: 2.4352 memory: 118.84GiB(85.28%) tps: 29,531 tflops: 409.69 mfu: 41.42% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1501 +[titan] 2025-10-05 06:08:35,938 - root - INFO - lr: 4.0659e-05 gnorm: 1.15 [ 7:34:26<17:00:25] +[titan] 2025-10-05 06:08:46,800 - root - INFO - step: 12330 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:08:46,800 - root - INFO - lr: 4.0652e-05 gnorm: 1.09 [ 7:34:37<17:00:13] +[titan] 2025-10-05 06:08:57,665 - root - INFO - step: 12335 loss: 2.3478 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 06:08:57,665 - root - INFO - lr: 4.0645e-05 gnorm: 1.09 [ 7:34:48<17:00:02] +[titan] 2025-10-05 06:09:08,538 - root - INFO - step: 12340 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0577 +[titan] 2025-10-05 06:09:08,539 - root - INFO - lr: 4.0637e-05 gnorm: 1.13 [ 7:34:59<16:59:50] +[titan] 2025-10-05 06:09:19,441 - root - INFO - step: 12345 loss: 2.3988 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1189 +[titan] 2025-10-05 06:09:19,441 - root - INFO - lr: 4.0630e-05 gnorm: 1.13 [ 7:35:10<16:59:39] +[titan] 2025-10-05 06:09:28,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:09:30,418 - root - INFO - step: 12350 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.17 mfu: 41.88% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0382 +[titan] 2025-10-05 06:09:30,418 - root - INFO - lr: 4.0623e-05 gnorm: 1.12 [ 7:35:21<16:59:28] +[titan] 2025-10-05 06:09:41,340 - root - INFO - step: 12355 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0087 +[titan] 2025-10-05 06:09:41,340 - root - INFO - lr: 4.0616e-05 gnorm: 1.16 [ 7:35:32<16:59:16] +[titan] 2025-10-05 06:09:52,209 - root - INFO - step: 12360 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0389 +[titan] 2025-10-05 06:09:52,209 - root - INFO - lr: 4.0608e-05 gnorm: 1.09 [ 7:35:42<16:59:05] +[titan] 2025-10-05 06:10:03,072 - root - INFO - step: 12365 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 06:10:03,073 - root - INFO - lr: 4.0601e-05 gnorm: 1.09 [ 7:35:53<16:58:53] +[titan] 2025-10-05 06:10:13,928 - root - INFO - step: 12370 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 06:10:13,928 - root - INFO - lr: 4.0594e-05 gnorm: 1.09 [ 7:36:04<16:58:42] +[titan] 2025-10-05 06:10:24,802 - root - INFO - step: 12375 loss: 2.3408 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:10:24,803 - root - INFO - lr: 4.0587e-05 gnorm: 1.10 [ 7:36:15<16:58:30] +[titan] 2025-10-05 06:10:35,777 - root - INFO - step: 12380 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.24 mfu: 41.89% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:10:35,777 - root - INFO - lr: 4.0579e-05 gnorm: 1.08 [ 7:36:26<16:58:19] +[titan] 2025-10-05 06:10:46,648 - root - INFO - step: 12385 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:10:46,649 - root - INFO - lr: 4.0572e-05 gnorm: 1.13 [ 7:36:37<16:58:08] +[titan] 2025-10-05 06:10:57,506 - root - INFO - step: 12390 loss: 2.3730 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 06:10:57,506 - root - INFO - lr: 4.0565e-05 gnorm: 1.14 [ 7:36:48<16:57:56] +[titan] 2025-10-05 06:11:08,373 - root - INFO - step: 12395 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:11:08,373 - root - INFO - lr: 4.0558e-05 gnorm: 1.06 [ 7:36:59<16:57:45] +[titan] 2025-10-05 06:11:17,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:11:19,239 - root - INFO - step: 12400 loss: 2.3820 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 06:11:19,239 - root - INFO - lr: 4.0550e-05 gnorm: 1.12 [ 7:37:09<16:57:33] +[titan] 2025-10-05 06:11:30,093 - root - INFO - step: 12405 loss: 2.3346 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0597 +[titan] 2025-10-05 06:11:30,094 - root - INFO - lr: 4.0543e-05 gnorm: 1.09 [ 7:37:20<16:57:22] +[titan] 2025-10-05 06:11:41,037 - root - INFO - step: 12410 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0232 +[titan] 2025-10-05 06:11:41,037 - root - INFO - lr: 4.0536e-05 gnorm: 1.14 [ 7:37:31<16:57:10] +[titan] 2025-10-05 06:11:51,926 - root - INFO - step: 12415 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0550 +[titan] 2025-10-05 06:11:51,926 - root - INFO - lr: 4.0528e-05 gnorm: 1.18 [ 7:37:42<16:56:59] +[titan] 2025-10-05 06:12:02,805 - root - INFO - step: 12420 loss: 2.3265 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 06:12:02,806 - root - INFO - lr: 4.0521e-05 gnorm: 1.08 [ 7:37:53<16:56:47] +[titan] 2025-10-05 06:12:13,684 - root - INFO - step: 12425 loss: 2.3185 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0481 +[titan] 2025-10-05 06:12:13,684 - root - INFO - lr: 4.0514e-05 gnorm: 1.14 [ 7:38:04<16:56:36] +[titan] 2025-10-05 06:12:24,578 - root - INFO - step: 12430 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:12:24,578 - root - INFO - lr: 4.0507e-05 gnorm: 1.12 [ 7:38:15<16:56:25] +[titan] 2025-10-05 06:12:35,467 - root - INFO - step: 12435 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0765 +[titan] 2025-10-05 06:12:35,468 - root - INFO - lr: 4.0499e-05 gnorm: 1.10 [ 7:38:26<16:56:13] +[titan] 2025-10-05 06:12:46,337 - root - INFO - step: 12440 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:12:46,337 - root - INFO - lr: 4.0492e-05 gnorm: 1.07 [ 7:38:36<16:56:02] +[titan] 2025-10-05 06:12:57,242 - root - INFO - step: 12445 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 06:12:57,242 - root - INFO - lr: 4.0485e-05 gnorm: 1.08 [ 7:38:47<16:55:50] +[titan] 2025-10-05 06:13:05,952 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:13:08,132 - root - INFO - step: 12450 loss: 2.3232 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0520 +[titan] 2025-10-05 06:13:08,132 - root - INFO - lr: 4.0477e-05 gnorm: 1.09 [ 7:38:58<16:55:39] +[titan] 2025-10-05 06:13:19,019 - root - INFO - step: 12455 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:13:19,019 - root - INFO - lr: 4.0470e-05 gnorm: 1.08 [ 7:39:09<16:55:27] +[titan] 2025-10-05 06:13:29,895 - root - INFO - step: 12460 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:13:29,895 - root - INFO - lr: 4.0463e-05 gnorm: 1.12 [ 7:39:20<16:55:16] +[titan] 2025-10-05 06:13:40,820 - root - INFO - step: 12465 loss: 2.3135 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0425 +[titan] 2025-10-05 06:13:40,820 - root - INFO - lr: 4.0456e-05 gnorm: 1.11 [ 7:39:31<16:55:04] +[titan] 2025-10-05 06:13:51,710 - root - INFO - step: 12470 loss: 2.3792 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 06:13:51,710 - root - INFO - lr: 4.0448e-05 gnorm: 1.07 [ 7:39:42<16:54:53] +[titan] 2025-10-05 06:14:02,592 - root - INFO - step: 12475 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0962 +[titan] 2025-10-05 06:14:02,592 - root - INFO - lr: 4.0441e-05 gnorm: 1.11 [ 7:39:53<16:54:42] +[titan] 2025-10-05 06:14:13,496 - root - INFO - step: 12480 loss: 2.2332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9712 +[titan] 2025-10-05 06:14:13,497 - root - INFO - lr: 4.0434e-05 gnorm: 1.08 [ 7:40:04<16:54:30] +[titan] 2025-10-05 06:14:24,366 - root - INFO - step: 12485 loss: 2.3235 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 06:14:24,366 - root - INFO - lr: 4.0426e-05 gnorm: 1.11 [ 7:40:15<16:54:19] +[titan] 2025-10-05 06:14:35,268 - root - INFO - step: 12490 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0610 +[titan] 2025-10-05 06:14:35,269 - root - INFO - lr: 4.0419e-05 gnorm: 1.09 [ 7:40:25<16:54:07] +[titan] 2025-10-05 06:14:46,143 - root - INFO - step: 12495 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 06:14:46,144 - root - INFO - lr: 4.0412e-05 gnorm: 1.12 [ 7:40:36<16:53:56] +[titan] 2025-10-05 06:14:54,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:14:57,025 - root - INFO - step: 12500 loss: 2.2990 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0321 +[titan] 2025-10-05 06:14:57,025 - root - INFO - lr: 4.0404e-05 gnorm: 1.12 [ 7:40:47<16:53:44] +[titan] 2025-10-05 06:15:07,897 - root - INFO - step: 12505 loss: 2.3230 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 06:15:07,897 - root - INFO - lr: 4.0397e-05 gnorm: 1.14 [ 7:40:58<16:53:33] +[titan] 2025-10-05 06:15:18,787 - root - INFO - step: 12510 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0680 +[titan] 2025-10-05 06:15:18,787 - root - INFO - lr: 4.0390e-05 gnorm: 1.11 [ 7:41:09<16:53:21] +[titan] 2025-10-05 06:15:29,657 - root - INFO - step: 12515 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:15:29,657 - root - INFO - lr: 4.0383e-05 gnorm: 1.11 [ 7:41:20<16:53:10] +[titan] 2025-10-05 06:15:40,564 - root - INFO - step: 12520 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 06:15:40,564 - root - INFO - lr: 4.0375e-05 gnorm: 1.09 [ 7:41:31<16:52:59] +[titan] 2025-10-05 06:15:51,439 - root - INFO - step: 12525 loss: 2.2600 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 06:15:51,439 - root - INFO - lr: 4.0368e-05 gnorm: 1.12 [ 7:41:42<16:52:47] +[titan] 2025-10-05 06:16:02,309 - root - INFO - step: 12530 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2748 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 06:16:02,309 - root - INFO - lr: 4.0361e-05 gnorm: 1.12 [ 7:41:52<16:52:36] +[titan] 2025-10-05 06:16:13,212 - root - INFO - step: 12535 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:16:13,212 - root - INFO - lr: 4.0353e-05 gnorm: 1.11 [ 7:42:03<16:52:24] +[titan] 2025-10-05 06:16:24,126 - root - INFO - step: 12540 loss: 2.3391 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 06:16:24,127 - root - INFO - lr: 4.0346e-05 gnorm: 1.13 [ 7:42:14<16:52:13] +[titan] 2025-10-05 06:16:35,001 - root - INFO - step: 12545 loss: 2.3246 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 06:16:35,001 - root - INFO - lr: 4.0339e-05 gnorm: 1.12 [ 7:42:25<16:52:01] +[titan] 2025-10-05 06:16:43,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:16:45,891 - root - INFO - step: 12550 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0627 +[titan] 2025-10-05 06:16:45,891 - root - INFO - lr: 4.0331e-05 gnorm: 1.10 [ 7:42:36<16:51:50] +[titan] 2025-10-05 06:16:56,777 - root - INFO - step: 12555 loss: 2.2647 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 06:16:56,777 - root - INFO - lr: 4.0324e-05 gnorm: 1.14 [ 7:42:47<16:51:39] +[titan] 2025-10-05 06:17:07,666 - root - INFO - step: 12560 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 06:17:07,667 - root - INFO - lr: 4.0317e-05 gnorm: 1.12 [ 7:42:58<16:51:27] +[titan] 2025-10-05 06:17:18,556 - root - INFO - step: 12565 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:17:18,557 - root - INFO - lr: 4.0309e-05 gnorm: 1.11 [ 7:43:09<16:51:16] +[titan] 2025-10-05 06:17:29,439 - root - INFO - step: 12570 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 06:17:29,439 - root - INFO - lr: 4.0302e-05 gnorm: 1.11 [ 7:43:20<16:51:04] +[titan] 2025-10-05 06:17:40,372 - root - INFO - step: 12575 loss: 2.2819 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 06:17:40,372 - root - INFO - lr: 4.0295e-05 gnorm: 1.11 [ 7:43:31<16:50:53] +[titan] 2025-10-05 06:17:51,237 - root - INFO - step: 12580 loss: 2.3250 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0525 +[titan] 2025-10-05 06:17:51,238 - root - INFO - lr: 4.0287e-05 gnorm: 1.11 [ 7:43:41<16:50:41] +[titan] 2025-10-05 06:18:02,105 - root - INFO - step: 12585 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:18:02,105 - root - INFO - lr: 4.0280e-05 gnorm: 1.09 [ 7:43:52<16:50:30] +[titan] 2025-10-05 06:18:12,984 - root - INFO - step: 12590 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.0880 +[titan] 2025-10-05 06:18:12,984 - root - INFO - lr: 4.0273e-05 gnorm: 1.17 [ 7:44:03<16:50:19] +[titan] 2025-10-05 06:18:23,839 - root - INFO - step: 12595 loss: 2.3742 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0956 +[titan] 2025-10-05 06:18:23,839 - root - INFO - lr: 4.0265e-05 gnorm: 1.11 [ 7:44:14<16:50:07] +[titan] 2025-10-05 06:18:32,532 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:18:34,727 - root - INFO - step: 12600 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9762 +[titan] 2025-10-05 06:18:34,727 - root - INFO - lr: 4.0258e-05 gnorm: 1.14 [ 7:44:25<16:49:56] +[titan] 2025-10-05 06:18:45,664 - root - INFO - step: 12605 loss: 2.3207 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0497 +[titan] 2025-10-05 06:18:45,664 - root - INFO - lr: 4.0250e-05 gnorm: 1.17 [ 7:44:36<16:49:44] +[titan] 2025-10-05 06:18:56,552 - root - INFO - step: 12610 loss: 2.3981 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 06:18:56,552 - root - INFO - lr: 4.0243e-05 gnorm: 1.13 [ 7:44:47<16:49:33] +[titan] 2025-10-05 06:19:07,421 - root - INFO - step: 12615 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 06:19:07,421 - root - INFO - lr: 4.0236e-05 gnorm: 1.09 [ 7:44:58<16:49:21] +[titan] 2025-10-05 06:19:18,306 - root - INFO - step: 12620 loss: 2.3150 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 06:19:18,306 - root - INFO - lr: 4.0228e-05 gnorm: 1.12 [ 7:45:08<16:49:10] +[titan] 2025-10-05 06:19:29,188 - root - INFO - step: 12625 loss: 2.3979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1165 +[titan] 2025-10-05 06:19:29,189 - root - INFO - lr: 4.0221e-05 gnorm: 1.12 [ 7:45:19<16:48:59] +[titan] 2025-10-05 06:19:40,105 - root - INFO - step: 12630 loss: 2.2606 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9964 +[titan] 2025-10-05 06:19:40,105 - root - INFO - lr: 4.0214e-05 gnorm: 1.14 [ 7:45:30<16:48:47] +[titan] 2025-10-05 06:19:50,986 - root - INFO - step: 12635 loss: 2.3546 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:19:50,986 - root - INFO - lr: 4.0206e-05 gnorm: 1.11 [ 7:45:41<16:48:36] +[titan] 2025-10-05 06:20:01,908 - root - INFO - step: 12640 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:20:01,908 - root - INFO - lr: 4.0199e-05 gnorm: 1.11 [ 7:45:52<16:48:24] +[titan] 2025-10-05 06:20:12,799 - root - INFO - step: 12645 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0332 +[titan] 2025-10-05 06:20:12,799 - root - INFO - lr: 4.0192e-05 gnorm: 1.10 [ 7:46:03<16:48:13] +[titan] 2025-10-05 06:20:21,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:20:23,685 - root - INFO - step: 12650 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 06:20:23,685 - root - INFO - lr: 4.0184e-05 gnorm: 1.11 [ 7:46:14<16:48:02] +[titan] 2025-10-05 06:20:34,581 - root - INFO - step: 12655 loss: 2.2611 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 06:20:34,581 - root - INFO - lr: 4.0177e-05 gnorm: 1.08 [ 7:46:25<16:47:50] +[titan] 2025-10-05 06:20:45,479 - root - INFO - step: 12660 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:20:45,480 - root - INFO - lr: 4.0169e-05 gnorm: 1.09 [ 7:46:36<16:47:39] +[titan] 2025-10-05 06:20:56,352 - root - INFO - step: 12665 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:20:56,353 - root - INFO - lr: 4.0162e-05 gnorm: 1.10 [ 7:46:46<16:47:27] +[titan] 2025-10-05 06:21:07,226 - root - INFO - step: 12670 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9534 +[titan] 2025-10-05 06:21:07,226 - root - INFO - lr: 4.0155e-05 gnorm: 1.08 [ 7:46:57<16:47:16] +[titan] 2025-10-05 06:21:18,106 - root - INFO - step: 12675 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9903 +[titan] 2025-10-05 06:21:18,106 - root - INFO - lr: 4.0147e-05 gnorm: 1.16 [ 7:47:08<16:47:04] +[titan] 2025-10-05 06:21:28,978 - root - INFO - step: 12680 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 06:21:28,978 - root - INFO - lr: 4.0140e-05 gnorm: 1.11 [ 7:47:19<16:46:53] +[titan] 2025-10-05 06:21:39,844 - root - INFO - step: 12685 loss: 2.3348 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0622 +[titan] 2025-10-05 06:21:39,844 - root - INFO - lr: 4.0133e-05 gnorm: 1.13 [ 7:47:30<16:46:41] +[titan] 2025-10-05 06:21:50,731 - root - INFO - step: 12690 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 06:21:50,731 - root - INFO - lr: 4.0125e-05 gnorm: 1.14 [ 7:47:41<16:46:30] +[titan] 2025-10-05 06:22:01,611 - root - INFO - step: 12695 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 06:22:01,612 - root - INFO - lr: 4.0118e-05 gnorm: 1.10 [ 7:47:52<16:46:19] +[titan] 2025-10-05 06:22:10,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:22:12,500 - root - INFO - step: 12700 loss: 2.3396 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0653 +[titan] 2025-10-05 06:22:12,501 - root - INFO - lr: 4.0110e-05 gnorm: 1.11 [ 7:48:03<16:46:07] +[titan] 2025-10-05 06:22:23,372 - root - INFO - step: 12705 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0153 +[titan] 2025-10-05 06:22:23,372 - root - INFO - lr: 4.0103e-05 gnorm: 1.11 [ 7:48:14<16:45:56] +[titan] 2025-10-05 06:22:34,241 - root - INFO - step: 12710 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:22:34,241 - root - INFO - lr: 4.0096e-05 gnorm: 1.10 [ 7:48:24<16:45:44] +[titan] 2025-10-05 06:22:45,141 - root - INFO - step: 12715 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0103 +[titan] 2025-10-05 06:22:45,141 - root - INFO - lr: 4.0088e-05 gnorm: 1.14 [ 7:48:35<16:45:33] +[titan] 2025-10-05 06:22:56,018 - root - INFO - step: 12720 loss: 2.2452 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 06:22:56,018 - root - INFO - lr: 4.0081e-05 gnorm: 1.10 [ 7:48:46<16:45:21] +[titan] 2025-10-05 06:23:06,904 - root - INFO - step: 12725 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0456 +[titan] 2025-10-05 06:23:06,905 - root - INFO - lr: 4.0073e-05 gnorm: 1.10 [ 7:48:57<16:45:10] +[titan] 2025-10-05 06:23:17,777 - root - INFO - step: 12730 loss: 2.3547 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0804 +[titan] 2025-10-05 06:23:17,778 - root - INFO - lr: 4.0066e-05 gnorm: 1.09 [ 7:49:08<16:44:59] +[titan] 2025-10-05 06:23:28,700 - root - INFO - step: 12735 loss: 2.4579 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1714 +[titan] 2025-10-05 06:23:28,700 - root - INFO - lr: 4.0059e-05 gnorm: 1.12 [ 7:49:19<16:44:47] +[titan] 2025-10-05 06:23:39,577 - root - INFO - step: 12740 loss: 2.2807 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 06:23:39,577 - root - INFO - lr: 4.0051e-05 gnorm: 1.08 [ 7:49:30<16:44:36] +[titan] 2025-10-05 06:23:50,466 - root - INFO - step: 12745 loss: 2.2580 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9955 +[titan] 2025-10-05 06:23:50,467 - root - INFO - lr: 4.0044e-05 gnorm: 1.13 [ 7:49:41<16:44:24] +[titan] 2025-10-05 06:23:59,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:24:01,332 - root - INFO - step: 12750 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:24:01,332 - root - INFO - lr: 4.0036e-05 gnorm: 1.16 [ 7:49:51<16:44:13] +[titan] 2025-10-05 06:24:12,211 - root - INFO - step: 12755 loss: 2.3122 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 06:24:12,211 - root - INFO - lr: 4.0029e-05 gnorm: 1.10 [ 7:50:02<16:44:01] +[titan] 2025-10-05 06:24:23,070 - root - INFO - step: 12760 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 06:24:23,070 - root - INFO - lr: 4.0022e-05 gnorm: 1.11 [ 7:50:13<16:43:50] +[titan] 2025-10-05 06:24:33,960 - root - INFO - step: 12765 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0308 +[titan] 2025-10-05 06:24:33,960 - root - INFO - lr: 4.0014e-05 gnorm: 1.11 [ 7:50:24<16:43:39] +[titan] 2025-10-05 06:24:44,855 - root - INFO - step: 12770 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0202 +[titan] 2025-10-05 06:24:44,855 - root - INFO - lr: 4.0007e-05 gnorm: 1.10 [ 7:50:35<16:43:27] +[titan] 2025-10-05 06:24:55,732 - root - INFO - step: 12775 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 06:24:55,732 - root - INFO - lr: 3.9999e-05 gnorm: 1.13 [ 7:50:46<16:43:16] +[titan] 2025-10-05 06:25:06,578 - root - INFO - step: 12780 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:06,578 - root - INFO - lr: 3.9992e-05 gnorm: 1.10 [ 7:50:57<16:43:04] +[titan] 2025-10-05 06:25:17,446 - root - INFO - step: 12785 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0703 +[titan] 2025-10-05 06:25:17,446 - root - INFO - lr: 3.9984e-05 gnorm: 1.15 [ 7:51:08<16:42:53] +[titan] 2025-10-05 06:25:28,322 - root - INFO - step: 12790 loss: 2.1995 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 06:25:28,322 - root - INFO - lr: 3.9977e-05 gnorm: 1.08 [ 7:51:18<16:42:41] +[titan] 2025-10-05 06:25:39,196 - root - INFO - step: 12795 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0818 +[titan] 2025-10-05 06:25:39,196 - root - INFO - lr: 3.9970e-05 gnorm: 1.11 [ 7:51:29<16:42:30] +[titan] 2025-10-05 06:25:48,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:25:50,190 - root - INFO - step: 12800 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.49 mfu: 41.81% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:50,191 - root - INFO - lr: 3.9962e-05 gnorm: 1.12 [ 7:51:40<16:42:19] +[titan] 2025-10-05 06:25:50,369 - root - INFO - Dumping profiler traces at step 12800 +[titan] 2025-10-05 06:25:50,406 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:26:01,279 - root - INFO - step: 12805 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 29,551 tflops: 409.97 mfu: 41.45% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 06:26:01,280 - root - INFO - lr: 3.9955e-05 gnorm: 1.13 [ 7:51:51<16:42:08] +[titan] 2025-10-05 06:26:12,154 - root - INFO - step: 12810 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:26:12,154 - root - INFO - lr: 3.9947e-05 gnorm: 1.08 [ 7:52:02<16:41:56] +[titan] 2025-10-05 06:26:23,032 - root - INFO - step: 12815 loss: 2.3306 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0579 +[titan] 2025-10-05 06:26:23,033 - root - INFO - lr: 3.9940e-05 gnorm: 1.06 [ 7:52:13<16:41:45] +[titan] 2025-10-05 06:26:33,940 - root - INFO - step: 12820 loss: 2.3775 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0989 +[titan] 2025-10-05 06:26:33,940 - root - INFO - lr: 3.9932e-05 gnorm: 1.15 [ 7:52:24<16:41:34] +[titan] 2025-10-05 06:26:44,836 - root - INFO - step: 12825 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0008 +[titan] 2025-10-05 06:26:44,836 - root - INFO - lr: 3.9925e-05 gnorm: 1.05 [ 7:52:35<16:41:22] +[titan] 2025-10-05 06:26:55,799 - root - INFO - step: 12830 loss: 2.3367 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0630 +[titan] 2025-10-05 06:26:55,799 - root - INFO - lr: 3.9918e-05 gnorm: 1.14 [ 7:52:46<16:41:11] +[titan] 2025-10-05 06:27:06,678 - root - INFO - step: 12835 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 06:27:06,678 - root - INFO - lr: 3.9910e-05 gnorm: 1.07 [ 7:52:57<16:40:59] +[titan] 2025-10-05 06:27:17,590 - root - INFO - step: 12840 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:27:17,590 - root - INFO - lr: 3.9903e-05 gnorm: 1.10 [ 7:53:08<16:40:48] +[titan] 2025-10-05 06:27:28,495 - root - INFO - step: 12845 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0092 +[titan] 2025-10-05 06:27:28,495 - root - INFO - lr: 3.9895e-05 gnorm: 1.10 [ 7:53:19<16:40:37] +[titan] 2025-10-05 06:27:37,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:27:39,368 - root - INFO - step: 12850 loss: 2.2958 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0276 +[titan] 2025-10-05 06:27:39,368 - root - INFO - lr: 3.9888e-05 gnorm: 1.09 [ 7:53:29<16:40:25] +[titan] 2025-10-05 06:27:50,338 - root - INFO - step: 12855 loss: 2.2825 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0128 +[titan] 2025-10-05 06:27:50,339 - root - INFO - lr: 3.9880e-05 gnorm: 1.16 [ 7:53:40<16:40:14] +[titan] 2025-10-05 06:28:01,245 - root - INFO - step: 12860 loss: 2.3056 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:28:01,245 - root - INFO - lr: 3.9873e-05 gnorm: 1.08 [ 7:53:51<16:40:03] +[titan] 2025-10-05 06:28:12,135 - root - INFO - step: 12865 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9912 +[titan] 2025-10-05 06:28:12,135 - root - INFO - lr: 3.9865e-05 gnorm: 1.10 [ 7:54:02<16:39:51] +[titan] 2025-10-05 06:28:23,005 - root - INFO - step: 12870 loss: 2.3501 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 06:28:23,005 - root - INFO - lr: 3.9858e-05 gnorm: 1.07 [ 7:54:13<16:39:40] +[titan] 2025-10-05 06:28:33,877 - root - INFO - step: 12875 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0229 +[titan] 2025-10-05 06:28:33,877 - root - INFO - lr: 3.9850e-05 gnorm: 1.13 [ 7:54:24<16:39:28] +[titan] 2025-10-05 06:28:44,761 - root - INFO - step: 12880 loss: 2.3117 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:28:44,761 - root - INFO - lr: 3.9843e-05 gnorm: 1.15 [ 7:54:35<16:39:17] +[titan] 2025-10-05 06:28:55,685 - root - INFO - step: 12885 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:28:55,685 - root - INFO - lr: 3.9836e-05 gnorm: 1.11 [ 7:54:46<16:39:06] +[titan] 2025-10-05 06:29:06,556 - root - INFO - step: 12890 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:29:06,556 - root - INFO - lr: 3.9828e-05 gnorm: 1.12 [ 7:54:57<16:38:54] +[titan] 2025-10-05 06:29:17,466 - root - INFO - step: 12895 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:29:17,467 - root - INFO - lr: 3.9821e-05 gnorm: 1.11 [ 7:55:08<16:38:43] +[titan] 2025-10-05 06:29:26,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:29:28,342 - root - INFO - step: 12900 loss: 2.3579 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0826 +[titan] 2025-10-05 06:29:28,342 - root - INFO - lr: 3.9813e-05 gnorm: 1.11 [ 7:55:18<16:38:31] +[titan] 2025-10-05 06:29:39,206 - root - INFO - step: 12905 loss: 2.2414 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 06:29:39,206 - root - INFO - lr: 3.9806e-05 gnorm: 1.08 [ 7:55:29<16:38:20] +[titan] 2025-10-05 06:29:50,114 - root - INFO - step: 12910 loss: 2.2702 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:29:50,114 - root - INFO - lr: 3.9798e-05 gnorm: 1.13 [ 7:55:40<16:38:09] +[titan] 2025-10-05 06:30:00,993 - root - INFO - step: 12915 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0072 +[titan] 2025-10-05 06:30:00,993 - root - INFO - lr: 3.9791e-05 gnorm: 1.07 [ 7:55:51<16:37:57] +[titan] 2025-10-05 06:30:11,897 - root - INFO - step: 12920 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0307 +[titan] 2025-10-05 06:30:11,898 - root - INFO - lr: 3.9783e-05 gnorm: 1.04 [ 7:56:02<16:37:46] +[titan] 2025-10-05 06:30:22,817 - root - INFO - step: 12925 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0581 +[titan] 2025-10-05 06:30:22,817 - root - INFO - lr: 3.9776e-05 gnorm: 1.13 [ 7:56:13<16:37:34] +[titan] 2025-10-05 06:30:33,737 - root - INFO - step: 12930 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:30:33,737 - root - INFO - lr: 3.9768e-05 gnorm: 1.13 [ 7:56:24<16:37:23] +[titan] 2025-10-05 06:30:44,627 - root - INFO - step: 12935 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:30:44,627 - root - INFO - lr: 3.9761e-05 gnorm: 1.10 [ 7:56:35<16:37:12] +[titan] 2025-10-05 06:30:55,585 - root - INFO - step: 12940 loss: 2.3356 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 06:30:55,586 - root - INFO - lr: 3.9753e-05 gnorm: 1.11 [ 7:56:46<16:37:01] +[titan] 2025-10-05 06:31:06,518 - root - INFO - step: 12945 loss: 2.2859 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:31:06,518 - root - INFO - lr: 3.9746e-05 gnorm: 1.10 [ 7:56:57<16:36:49] +[titan] 2025-10-05 06:31:15,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:31:17,447 - root - INFO - step: 12950 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0289 +[titan] 2025-10-05 06:31:17,447 - root - INFO - lr: 3.9738e-05 gnorm: 1.11 [ 7:57:08<16:36:38] +[titan] 2025-10-05 06:31:28,381 - root - INFO - step: 12955 loss: 2.3005 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 06:31:28,382 - root - INFO - lr: 3.9731e-05 gnorm: 1.07 [ 7:57:18<16:36:27] +[titan] 2025-10-05 06:31:39,314 - root - INFO - step: 12960 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0616 +[titan] 2025-10-05 06:31:39,315 - root - INFO - lr: 3.9723e-05 gnorm: 1.13 [ 7:57:29<16:36:15] +[titan] 2025-10-05 06:31:50,231 - root - INFO - step: 12965 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 06:31:50,231 - root - INFO - lr: 3.9716e-05 gnorm: 1.14 [ 7:57:40<16:36:04] +[titan] 2025-10-05 06:32:01,132 - root - INFO - step: 12970 loss: 2.3312 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:32:01,132 - root - INFO - lr: 3.9708e-05 gnorm: 1.16 [ 7:57:51<16:35:53] +[titan] 2025-10-05 06:32:12,023 - root - INFO - step: 12975 loss: 2.2497 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 06:32:12,023 - root - INFO - lr: 3.9701e-05 gnorm: 1.13 [ 7:58:02<16:35:41] +[titan] 2025-10-05 06:32:22,921 - root - INFO - step: 12980 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0314 +[titan] 2025-10-05 06:32:22,922 - root - INFO - lr: 3.9693e-05 gnorm: 1.08 [ 7:58:13<16:35:30] +[titan] 2025-10-05 06:32:33,791 - root - INFO - step: 12985 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 06:32:33,791 - root - INFO - lr: 3.9686e-05 gnorm: 1.06 [ 7:58:24<16:35:18] +[titan] 2025-10-05 06:32:44,706 - root - INFO - step: 12990 loss: 2.3628 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 06:32:44,706 - root - INFO - lr: 3.9678e-05 gnorm: 1.12 [ 7:58:35<16:35:07] +[titan] 2025-10-05 06:32:55,609 - root - INFO - step: 12995 loss: 2.2830 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:32:55,609 - root - INFO - lr: 3.9671e-05 gnorm: 1.13 [ 7:58:46<16:34:56] +[titan] 2025-10-05 06:33:04,309 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:33:06,487 - root - INFO - step: 13000 loss: 2.2887 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:33:06,487 - root - INFO - lr: 3.9663e-05 gnorm: 1.09 [ 7:58:57<16:34:44] +[titan] 2025-10-05 06:33:17,365 - root - INFO - step: 13005 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9733 +[titan] 2025-10-05 06:33:17,365 - root - INFO - lr: 3.9656e-05 gnorm: 1.09 [ 7:59:07<16:34:33] +[titan] 2025-10-05 06:33:28,255 - root - INFO - step: 13010 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0677 +[titan] 2025-10-05 06:33:28,255 - root - INFO - lr: 3.9648e-05 gnorm: 1.10 [ 7:59:18<16:34:21] +[titan] 2025-10-05 06:33:39,107 - root - INFO - step: 13015 loss: 2.3870 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 06:33:39,107 - root - INFO - lr: 3.9641e-05 gnorm: 1.14 [ 7:59:29<16:34:10] +[titan] 2025-10-05 06:33:49,999 - root - INFO - step: 13020 loss: 2.2362 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9754 +[titan] 2025-10-05 06:33:49,999 - root - INFO - lr: 3.9633e-05 gnorm: 1.04 [ 7:59:40<16:33:59] +[titan] 2025-10-05 06:34:00,906 - root - INFO - step: 13025 loss: 2.3058 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0372 +[titan] 2025-10-05 06:34:00,906 - root - INFO - lr: 3.9626e-05 gnorm: 1.10 [ 7:59:51<16:33:47] +[titan] 2025-10-05 06:34:11,756 - root - INFO - step: 13030 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9901 +[titan] 2025-10-05 06:34:11,756 - root - INFO - lr: 3.9618e-05 gnorm: 1.10 [ 8:00:02<16:33:36] +[titan] 2025-10-05 06:34:22,620 - root - INFO - step: 13035 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:34:22,620 - root - INFO - lr: 3.9611e-05 gnorm: 1.07 [ 8:00:13<16:33:24] +[titan] 2025-10-05 06:34:33,499 - root - INFO - step: 13040 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:34:33,499 - root - INFO - lr: 3.9603e-05 gnorm: 1.10 [ 8:00:24<16:33:13] +[titan] 2025-10-05 06:34:44,364 - root - INFO - step: 13045 loss: 2.3062 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 06:34:44,365 - root - INFO - lr: 3.9596e-05 gnorm: 1.12 [ 8:00:34<16:33:01] +[titan] 2025-10-05 06:34:53,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:34:55,288 - root - INFO - step: 13050 loss: 2.2984 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:34:55,289 - root - INFO - lr: 3.9588e-05 gnorm: 1.10 [ 8:00:45<16:32:50] +[titan] 2025-10-05 06:35:06,196 - root - INFO - step: 13055 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9765 +[titan] 2025-10-05 06:35:06,196 - root - INFO - lr: 3.9581e-05 gnorm: 1.05 [ 8:00:56<16:32:39] +[titan] 2025-10-05 06:35:17,080 - root - INFO - step: 13060 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 06:35:17,080 - root - INFO - lr: 3.9573e-05 gnorm: 1.10 [ 8:01:07<16:32:27] +[titan] 2025-10-05 06:35:27,969 - root - INFO - step: 13065 loss: 2.2499 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:35:27,969 - root - INFO - lr: 3.9566e-05 gnorm: 1.09 [ 8:01:18<16:32:16] +[titan] 2025-10-05 06:35:38,866 - root - INFO - step: 13070 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0717 +[titan] 2025-10-05 06:35:38,867 - root - INFO - lr: 3.9558e-05 gnorm: 1.12 [ 8:01:29<16:32:05] +[titan] 2025-10-05 06:35:49,752 - root - INFO - step: 13075 loss: 2.3177 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0458 +[titan] 2025-10-05 06:35:49,752 - root - INFO - lr: 3.9551e-05 gnorm: 1.11 [ 8:01:40<16:31:53] +[titan] 2025-10-05 06:36:00,668 - root - INFO - step: 13080 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 06:36:00,669 - root - INFO - lr: 3.9543e-05 gnorm: 1.07 [ 8:01:51<16:31:42] +[titan] 2025-10-05 06:36:11,580 - root - INFO - step: 13085 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 06:36:11,580 - root - INFO - lr: 3.9535e-05 gnorm: 1.11 [ 8:02:02<16:31:31] +[titan] 2025-10-05 06:36:22,465 - root - INFO - step: 13090 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:36:22,465 - root - INFO - lr: 3.9528e-05 gnorm: 1.09 [ 8:02:13<16:31:19] +[titan] 2025-10-05 06:36:33,326 - root - INFO - step: 13095 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9630 +[titan] 2025-10-05 06:36:33,326 - root - INFO - lr: 3.9520e-05 gnorm: 1.10 [ 8:02:23<16:31:08] +[titan] 2025-10-05 06:36:41,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:36:44,174 - root - INFO - step: 13100 loss: 2.3105 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:36:44,174 - root - INFO - lr: 3.9513e-05 gnorm: 1.14 [ 8:02:34<16:30:56] +[titan] 2025-10-05 06:36:55,075 - root - INFO - step: 13105 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:36:55,075 - root - INFO - lr: 3.9505e-05 gnorm: 1.18 [ 8:02:45<16:30:45] +[titan] 2025-10-05 06:37:05,918 - root - INFO - step: 13110 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 06:37:05,918 - root - INFO - lr: 3.9498e-05 gnorm: 1.08 [ 8:02:56<16:30:33] +[titan] 2025-10-05 06:37:16,786 - root - INFO - step: 13115 loss: 2.2582 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 06:37:16,786 - root - INFO - lr: 3.9490e-05 gnorm: 1.10 [ 8:03:07<16:30:22] +[titan] 2025-10-05 06:37:27,685 - root - INFO - step: 13120 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 06:37:27,685 - root - INFO - lr: 3.9483e-05 gnorm: 1.11 [ 8:03:18<16:30:11] +[titan] 2025-10-05 06:37:38,554 - root - INFO - step: 13125 loss: 2.3124 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 06:37:38,554 - root - INFO - lr: 3.9475e-05 gnorm: 1.10 [ 8:03:29<16:29:59] +[titan] 2025-10-05 06:37:49,418 - root - INFO - step: 13130 loss: 2.3195 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:37:49,418 - root - INFO - lr: 3.9468e-05 gnorm: 1.09 [ 8:03:40<16:29:48] +[titan] 2025-10-05 06:38:00,337 - root - INFO - step: 13135 loss: 2.2981 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:38:00,337 - root - INFO - lr: 3.9460e-05 gnorm: 1.09 [ 8:03:50<16:29:36] +[titan] 2025-10-05 06:38:11,199 - root - INFO - step: 13140 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9924 +[titan] 2025-10-05 06:38:11,200 - root - INFO - lr: 3.9452e-05 gnorm: 1.06 [ 8:04:01<16:29:25] +[titan] 2025-10-05 06:38:22,080 - root - INFO - step: 13145 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2759 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 06:38:22,080 - root - INFO - lr: 3.9445e-05 gnorm: 1.08 [ 8:04:12<16:29:14] +[titan] 2025-10-05 06:38:30,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:38:32,990 - root - INFO - step: 13150 loss: 2.2897 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0219 +[titan] 2025-10-05 06:38:32,990 - root - INFO - lr: 3.9437e-05 gnorm: 1.12 [ 8:04:23<16:29:02] +[titan] 2025-10-05 06:38:43,859 - root - INFO - step: 13155 loss: 2.2817 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0147 +[titan] 2025-10-05 06:38:43,859 - root - INFO - lr: 3.9430e-05 gnorm: 1.08 [ 8:04:34<16:28:51] +[titan] 2025-10-05 06:38:54,735 - root - INFO - step: 13160 loss: 2.3131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0428 +[titan] 2025-10-05 06:38:54,736 - root - INFO - lr: 3.9422e-05 gnorm: 1.11 [ 8:04:45<16:28:39] +[titan] 2025-10-05 06:39:05,628 - root - INFO - step: 13165 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 06:39:05,628 - root - INFO - lr: 3.9415e-05 gnorm: 1.10 [ 8:04:56<16:28:28] +[titan] 2025-10-05 06:39:16,489 - root - INFO - step: 13170 loss: 2.3292 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:39:16,489 - root - INFO - lr: 3.9407e-05 gnorm: 1.11 [ 8:05:07<16:28:17] +[titan] 2025-10-05 06:39:27,377 - root - INFO - step: 13175 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9764 +[titan] 2025-10-05 06:39:27,377 - root - INFO - lr: 3.9399e-05 gnorm: 1.07 [ 8:05:17<16:28:05] +[titan] 2025-10-05 06:39:38,260 - root - INFO - step: 13180 loss: 2.2929 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0261 +[titan] 2025-10-05 06:39:38,260 - root - INFO - lr: 3.9392e-05 gnorm: 1.18 [ 8:05:28<16:27:54] +[titan] 2025-10-05 06:39:49,151 - root - INFO - step: 13185 loss: 2.2880 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0212 +[titan] 2025-10-05 06:39:49,152 - root - INFO - lr: 3.9384e-05 gnorm: 1.13 [ 8:05:39<16:27:42] +[titan] 2025-10-05 06:40:00,050 - root - INFO - step: 13190 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 06:40:00,050 - root - INFO - lr: 3.9377e-05 gnorm: 1.11 [ 8:05:50<16:27:31] +[titan] 2025-10-05 06:40:10,934 - root - INFO - step: 13195 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9939 +[titan] 2025-10-05 06:40:10,934 - root - INFO - lr: 3.9369e-05 gnorm: 1.10 [ 8:06:01<16:27:20] +[titan] 2025-10-05 06:40:19,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:40:21,820 - root - INFO - step: 13200 loss: 2.2675 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0021 +[titan] 2025-10-05 06:40:21,820 - root - INFO - lr: 3.9362e-05 gnorm: 1.13 [ 8:06:12<16:27:08] +[titan] 2025-10-05 06:40:32,683 - root - INFO - step: 13205 loss: 2.3004 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:40:32,683 - root - INFO - lr: 3.9354e-05 gnorm: 1.11 [ 8:06:23<16:26:57] +[titan] 2025-10-05 06:40:43,552 - root - INFO - step: 13210 loss: 2.3321 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0596 +[titan] 2025-10-05 06:40:43,553 - root - INFO - lr: 3.9346e-05 gnorm: 1.09 [ 8:06:34<16:26:45] +[titan] 2025-10-05 06:40:54,441 - root - INFO - step: 13215 loss: 2.3746 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 06:40:54,441 - root - INFO - lr: 3.9339e-05 gnorm: 1.09 [ 8:06:45<16:26:34] +[titan] 2025-10-05 06:41:05,315 - root - INFO - step: 13220 loss: 2.3394 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0660 +[titan] 2025-10-05 06:41:05,315 - root - INFO - lr: 3.9331e-05 gnorm: 1.13 [ 8:06:55<16:26:23] +[titan] 2025-10-05 06:41:16,174 - root - INFO - step: 13225 loss: 2.2522 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 06:41:16,174 - root - INFO - lr: 3.9324e-05 gnorm: 1.10 [ 8:07:06<16:26:11] +[titan] 2025-10-05 06:41:27,031 - root - INFO - step: 13230 loss: 2.2903 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:41:27,031 - root - INFO - lr: 3.9316e-05 gnorm: 1.10 [ 8:07:17<16:26:00] +[titan] 2025-10-05 06:41:37,890 - root - INFO - step: 13235 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0078 +[titan] 2025-10-05 06:41:37,890 - root - INFO - lr: 3.9308e-05 gnorm: 1.09 [ 8:07:28<16:25:48] +[titan] 2025-10-05 06:41:48,764 - root - INFO - step: 13240 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 06:41:48,764 - root - INFO - lr: 3.9301e-05 gnorm: 1.10 [ 8:07:39<16:25:37] +[titan] 2025-10-05 06:41:59,671 - root - INFO - step: 13245 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0046 +[titan] 2025-10-05 06:41:59,672 - root - INFO - lr: 3.9293e-05 gnorm: 1.13 [ 8:07:50<16:25:26] +[titan] 2025-10-05 06:42:08,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:42:10,557 - root - INFO - step: 13250 loss: 2.3326 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0599 +[titan] 2025-10-05 06:42:10,557 - root - INFO - lr: 3.9286e-05 gnorm: 1.14 [ 8:08:01<16:25:14] +[titan] 2025-10-05 06:42:21,421 - root - INFO - step: 13255 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 06:42:21,421 - root - INFO - lr: 3.9278e-05 gnorm: 1.14 [ 8:08:12<16:25:03] +[titan] 2025-10-05 06:42:32,317 - root - INFO - step: 13260 loss: 2.2022 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9453 +[titan] 2025-10-05 06:42:32,317 - root - INFO - lr: 3.9270e-05 gnorm: 1.07 [ 8:08:22<16:24:51] +[titan] 2025-10-05 06:42:43,197 - root - INFO - step: 13265 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 06:42:43,198 - root - INFO - lr: 3.9263e-05 gnorm: 1.11 [ 8:08:33<16:24:40] +[titan] 2025-10-05 06:42:54,090 - root - INFO - step: 13270 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 06:42:54,091 - root - INFO - lr: 3.9255e-05 gnorm: 1.10 [ 8:08:44<16:24:29] +[titan] 2025-10-05 06:43:05,001 - root - INFO - step: 13275 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 06:43:05,001 - root - INFO - lr: 3.9248e-05 gnorm: 1.10 [ 8:08:55<16:24:17] +[titan] 2025-10-05 06:43:15,880 - root - INFO - step: 13280 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:15,880 - root - INFO - lr: 3.9240e-05 gnorm: 1.07 [ 8:09:06<16:24:06] +[titan] 2025-10-05 06:43:26,737 - root - INFO - step: 13285 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:26,737 - root - INFO - lr: 3.9232e-05 gnorm: 1.11 [ 8:09:17<16:23:55] +[titan] 2025-10-05 06:43:37,602 - root - INFO - step: 13290 loss: 2.3086 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:43:37,602 - root - INFO - lr: 3.9225e-05 gnorm: 1.10 [ 8:09:28<16:23:43] +[titan] 2025-10-05 06:43:48,473 - root - INFO - step: 13295 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 06:43:48,473 - root - INFO - lr: 3.9217e-05 gnorm: 1.11 [ 8:09:39<16:23:32] +[titan] 2025-10-05 06:43:57,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:43:59,317 - root - INFO - step: 13300 loss: 2.3797 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 06:43:59,318 - root - INFO - lr: 3.9209e-05 gnorm: 1.11 [ 8:09:49<16:23:20] +[titan] 2025-10-05 06:44:10,186 - root - INFO - step: 13305 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0141 +[titan] 2025-10-05 06:44:10,186 - root - INFO - lr: 3.9202e-05 gnorm: 1.09 [ 8:10:00<16:23:09] +[titan] 2025-10-05 06:44:21,180 - root - INFO - step: 13310 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0337 +[titan] 2025-10-05 06:44:21,180 - root - INFO - lr: 3.9194e-05 gnorm: 1.09 [ 8:10:11<16:22:58] +[titan] 2025-10-05 06:44:25,693 - root - INFO - Dumping profiler traces at step 13312 +[titan] 2025-10-05 06:44:25,732 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:44:32,265 - root - INFO - step: 13315 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 29,562 tflops: 410.13 mfu: 41.47% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 06:44:32,265 - root - INFO - lr: 3.9187e-05 gnorm: 1.04 [ 8:10:22<16:22:47] +[titan] 2025-10-05 06:44:43,144 - root - INFO - step: 13320 loss: 2.3112 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 06:44:43,144 - root - INFO - lr: 3.9179e-05 gnorm: 1.13 [ 8:10:33<16:22:35] +[titan] 2025-10-05 06:44:54,006 - root - INFO - step: 13325 loss: 2.3530 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0783 +[titan] 2025-10-05 06:44:54,006 - root - INFO - lr: 3.9171e-05 gnorm: 1.06 [ 8:10:44<16:22:24] +[titan] 2025-10-05 06:45:04,897 - root - INFO - step: 13330 loss: 2.3671 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 06:45:04,897 - root - INFO - lr: 3.9164e-05 gnorm: 1.11 [ 8:10:55<16:22:13] +[titan] 2025-10-05 06:45:15,754 - root - INFO - step: 13335 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0644 +[titan] 2025-10-05 06:45:15,754 - root - INFO - lr: 3.9156e-05 gnorm: 1.16 [ 8:11:06<16:22:01] +[titan] 2025-10-05 06:45:26,632 - root - INFO - step: 13340 loss: 2.2623 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:45:26,633 - root - INFO - lr: 3.9148e-05 gnorm: 1.12 [ 8:11:17<16:21:50] +[titan] 2025-10-05 06:45:37,522 - root - INFO - step: 13345 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 06:45:37,522 - root - INFO - lr: 3.9141e-05 gnorm: 1.07 [ 8:11:28<16:21:38] +[titan] 2025-10-05 06:45:46,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:45:48,374 - root - INFO - step: 13350 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:45:48,374 - root - INFO - lr: 3.9133e-05 gnorm: 1.10 [ 8:11:38<16:21:27] +[titan] 2025-10-05 06:45:59,227 - root - INFO - step: 13355 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0173 +[titan] 2025-10-05 06:45:59,227 - root - INFO - lr: 3.9126e-05 gnorm: 1.11 [ 8:11:49<16:21:15] +[titan] 2025-10-05 06:46:10,100 - root - INFO - step: 13360 loss: 2.3111 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:46:10,100 - root - INFO - lr: 3.9118e-05 gnorm: 1.11 [ 8:12:00<16:21:04] +[titan] 2025-10-05 06:46:20,957 - root - INFO - step: 13365 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 06:46:20,958 - root - INFO - lr: 3.9110e-05 gnorm: 1.10 [ 8:12:11<16:20:53] +[titan] 2025-10-05 06:46:31,838 - root - INFO - step: 13370 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0254 +[titan] 2025-10-05 06:46:31,838 - root - INFO - lr: 3.9103e-05 gnorm: 1.13 [ 8:12:22<16:20:41] +[titan] 2025-10-05 06:46:42,735 - root - INFO - step: 13375 loss: 2.3437 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0696 +[titan] 2025-10-05 06:46:42,735 - root - INFO - lr: 3.9095e-05 gnorm: 1.12 [ 8:12:33<16:20:30] +[titan] 2025-10-05 06:46:53,595 - root - INFO - step: 13380 loss: 2.2952 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0281 +[titan] 2025-10-05 06:46:53,595 - root - INFO - lr: 3.9087e-05 gnorm: 1.07 [ 8:12:44<16:20:18] +[titan] 2025-10-05 06:47:04,484 - root - INFO - step: 13385 loss: 2.3167 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0450 +[titan] 2025-10-05 06:47:04,485 - root - INFO - lr: 3.9080e-05 gnorm: 1.12 [ 8:12:55<16:20:07] +[titan] 2025-10-05 06:47:15,385 - root - INFO - step: 13390 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:47:15,385 - root - INFO - lr: 3.9072e-05 gnorm: 1.13 [ 8:13:05<16:19:56] +[titan] 2025-10-05 06:47:26,291 - root - INFO - step: 13395 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:47:26,291 - root - INFO - lr: 3.9064e-05 gnorm: 1.09 [ 8:13:16<16:19:44] +[titan] 2025-10-05 06:47:34,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:47:37,159 - root - INFO - step: 13400 loss: 2.2934 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0255 +[titan] 2025-10-05 06:47:37,159 - root - INFO - lr: 3.9057e-05 gnorm: 1.10 [ 8:13:27<16:19:33] +[titan] 2025-10-05 06:47:48,051 - root - INFO - step: 13405 loss: 2.1829 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 06:47:48,051 - root - INFO - lr: 3.9049e-05 gnorm: 1.13 [ 8:13:38<16:19:22] +[titan] 2025-10-05 06:47:58,962 - root - INFO - step: 13410 loss: 2.3403 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0664 +[titan] 2025-10-05 06:47:58,962 - root - INFO - lr: 3.9041e-05 gnorm: 1.08 [ 8:13:49<16:19:10] +[titan] 2025-10-05 06:48:09,859 - root - INFO - step: 13415 loss: 2.2971 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:48:09,859 - root - INFO - lr: 3.9034e-05 gnorm: 1.09 [ 8:14:00<16:18:59] +[titan] 2025-10-05 06:48:20,742 - root - INFO - step: 13420 loss: 2.3033 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0348 +[titan] 2025-10-05 06:48:20,742 - root - INFO - lr: 3.9026e-05 gnorm: 1.09 [ 8:14:11<16:18:48] +[titan] 2025-10-05 06:48:31,616 - root - INFO - step: 13425 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0709 +[titan] 2025-10-05 06:48:31,616 - root - INFO - lr: 3.9018e-05 gnorm: 1.11 [ 8:14:22<16:18:36] +[titan] 2025-10-05 06:48:42,471 - root - INFO - step: 13430 loss: 2.2153 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 06:48:42,471 - root - INFO - lr: 3.9011e-05 gnorm: 1.09 [ 8:14:33<16:18:25] +[titan] 2025-10-05 06:48:53,334 - root - INFO - step: 13435 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 06:48:53,334 - root - INFO - lr: 3.9003e-05 gnorm: 1.10 [ 8:14:43<16:18:13] +[titan] 2025-10-05 06:49:04,235 - root - INFO - step: 13440 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 06:49:04,235 - root - INFO - lr: 3.8995e-05 gnorm: 1.10 [ 8:14:54<16:18:02] +[titan] 2025-10-05 06:49:15,122 - root - INFO - step: 13445 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0366 +[titan] 2025-10-05 06:49:15,122 - root - INFO - lr: 3.8988e-05 gnorm: 1.10 [ 8:15:05<16:17:51] +[titan] 2025-10-05 06:49:23,790 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:49:25,981 - root - INFO - step: 13450 loss: 2.2828 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0115 +[titan] 2025-10-05 06:49:25,981 - root - INFO - lr: 3.8980e-05 gnorm: 1.07 [ 8:15:16<16:17:39] +[titan] 2025-10-05 06:49:36,831 - root - INFO - step: 13455 loss: 2.2498 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9867 +[titan] 2025-10-05 06:49:36,831 - root - INFO - lr: 3.8972e-05 gnorm: 1.03 [ 8:15:27<16:17:28] +[titan] 2025-10-05 06:49:47,714 - root - INFO - step: 13460 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0682 +[titan] 2025-10-05 06:49:47,714 - root - INFO - lr: 3.8965e-05 gnorm: 1.14 [ 8:15:38<16:17:16] +[titan] 2025-10-05 06:49:58,585 - root - INFO - step: 13465 loss: 2.2324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 06:49:58,585 - root - INFO - lr: 3.8957e-05 gnorm: 1.11 [ 8:15:49<16:17:05] +[titan] 2025-10-05 06:50:09,688 - root - INFO - step: 13470 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 29,514 tflops: 409.46 mfu: 41.40% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9928 +[titan] 2025-10-05 06:50:09,688 - root - INFO - lr: 3.8949e-05 gnorm: 1.07 [ 8:16:00<16:16:54] +[titan] 2025-10-05 06:50:20,551 - root - INFO - step: 13475 loss: 2.2930 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0250 +[titan] 2025-10-05 06:50:20,551 - root - INFO - lr: 3.8942e-05 gnorm: 1.12 [ 8:16:11<16:16:43] +[titan] 2025-10-05 06:50:31,416 - root - INFO - step: 13480 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:50:31,416 - root - INFO - lr: 3.8934e-05 gnorm: 1.09 [ 8:16:21<16:16:31] +[titan] 2025-10-05 06:50:42,269 - root - INFO - step: 13485 loss: 2.2218 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9617 +[titan] 2025-10-05 06:50:42,269 - root - INFO - lr: 3.8926e-05 gnorm: 1.10 [ 8:16:32<16:16:20] +[titan] 2025-10-05 06:50:53,127 - root - INFO - step: 13490 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 06:50:53,128 - root - INFO - lr: 3.8919e-05 gnorm: 1.07 [ 8:16:43<16:16:08] +[titan] 2025-10-05 06:51:03,982 - root - INFO - step: 13495 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:51:03,982 - root - INFO - lr: 3.8911e-05 gnorm: 1.09 [ 8:16:54<16:15:57] +[titan] 2025-10-05 06:51:12,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:51:14,857 - root - INFO - step: 13500 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 06:51:14,857 - root - INFO - lr: 3.8903e-05 gnorm: 1.09 [ 8:17:05<16:15:46] +[titan] 2025-10-05 06:51:25,746 - root - INFO - step: 13505 loss: 2.2715 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 06:51:25,746 - root - INFO - lr: 3.8896e-05 gnorm: 1.09 [ 8:17:16<16:15:34] +[titan] 2025-10-05 06:51:36,614 - root - INFO - step: 13510 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 06:51:36,614 - root - INFO - lr: 3.8888e-05 gnorm: 1.08 [ 8:17:27<16:15:23] +[titan] 2025-10-05 06:51:47,494 - root - INFO - step: 13515 loss: 2.2519 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 06:51:47,494 - root - INFO - lr: 3.8880e-05 gnorm: 1.12 [ 8:17:38<16:15:12] +[titan] 2025-10-05 06:51:58,360 - root - INFO - step: 13520 loss: 2.2323 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:51:58,360 - root - INFO - lr: 3.8872e-05 gnorm: 1.05 [ 8:17:48<16:15:00] +[titan] 2025-10-05 06:52:09,236 - root - INFO - step: 13525 loss: 2.2346 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 06:52:09,236 - root - INFO - lr: 3.8865e-05 gnorm: 1.07 [ 8:17:59<16:14:49] +[titan] 2025-10-05 06:52:20,103 - root - INFO - step: 13530 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9669 +[titan] 2025-10-05 06:52:20,103 - root - INFO - lr: 3.8857e-05 gnorm: 1.08 [ 8:18:10<16:14:37] +[titan] 2025-10-05 06:52:30,992 - root - INFO - step: 13535 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9949 +[titan] 2025-10-05 06:52:30,992 - root - INFO - lr: 3.8849e-05 gnorm: 1.10 [ 8:18:21<16:14:26] +[titan] 2025-10-05 06:52:41,845 - root - INFO - step: 13540 loss: 2.2743 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0096 +[titan] 2025-10-05 06:52:41,846 - root - INFO - lr: 3.8842e-05 gnorm: 1.16 [ 8:18:32<16:14:15] +[titan] 2025-10-05 06:52:52,731 - root - INFO - step: 13545 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:52:52,731 - root - INFO - lr: 3.8834e-05 gnorm: 1.19 [ 8:18:43<16:14:03] +[titan] 2025-10-05 06:53:01,407 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:53:03,584 - root - INFO - step: 13550 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:53:03,584 - root - INFO - lr: 3.8826e-05 gnorm: 1.12 [ 8:18:54<16:13:52] +[titan] 2025-10-05 06:53:14,560 - root - INFO - step: 13555 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.18 mfu: 41.88% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:53:14,560 - root - INFO - lr: 3.8818e-05 gnorm: 1.18 [ 8:19:05<16:13:41] +[titan] 2025-10-05 06:53:25,426 - root - INFO - step: 13560 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:53:25,426 - root - INFO - lr: 3.8811e-05 gnorm: 1.10 [ 8:19:15<16:13:29] +[titan] 2025-10-05 06:53:36,319 - root - INFO - step: 13565 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0242 +[titan] 2025-10-05 06:53:36,320 - root - INFO - lr: 3.8803e-05 gnorm: 1.11 [ 8:19:26<16:13:18] +[titan] 2025-10-05 06:53:47,222 - root - INFO - step: 13570 loss: 2.2893 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:53:47,222 - root - INFO - lr: 3.8795e-05 gnorm: 1.11 [ 8:19:37<16:13:07] +[titan] 2025-10-05 06:53:58,096 - root - INFO - step: 13575 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9963 +[titan] 2025-10-05 06:53:58,096 - root - INFO - lr: 3.8788e-05 gnorm: 1.11 [ 8:19:48<16:12:55] +[titan] 2025-10-05 06:54:08,974 - root - INFO - step: 13580 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:54:08,975 - root - INFO - lr: 3.8780e-05 gnorm: 1.11 [ 8:19:59<16:12:44] +[titan] 2025-10-05 06:54:19,877 - root - INFO - step: 13585 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0437 +[titan] 2025-10-05 06:54:19,877 - root - INFO - lr: 3.8772e-05 gnorm: 1.15 [ 8:20:10<16:12:32] +[titan] 2025-10-05 06:54:30,750 - root - INFO - step: 13590 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0181 +[titan] 2025-10-05 06:54:30,750 - root - INFO - lr: 3.8764e-05 gnorm: 1.09 [ 8:20:21<16:12:21] +[titan] 2025-10-05 06:54:41,615 - root - INFO - step: 13595 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0386 +[titan] 2025-10-05 06:54:41,615 - root - INFO - lr: 3.8757e-05 gnorm: 1.12 [ 8:20:32<16:12:10] +[titan] 2025-10-05 06:54:50,323 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:54:52,501 - root - INFO - step: 13600 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:54:52,501 - root - INFO - lr: 3.8749e-05 gnorm: 1.12 [ 8:20:43<16:11:58] +[titan] 2025-10-05 06:55:03,350 - root - INFO - step: 13605 loss: 2.2279 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 06:55:03,350 - root - INFO - lr: 3.8741e-05 gnorm: 1.09 [ 8:20:53<16:11:47] +[titan] 2025-10-05 06:55:14,228 - root - INFO - step: 13610 loss: 2.3259 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0547 +[titan] 2025-10-05 06:55:14,228 - root - INFO - lr: 3.8734e-05 gnorm: 1.14 [ 8:21:04<16:11:36] +[titan] 2025-10-05 06:55:25,123 - root - INFO - step: 13615 loss: 2.2661 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0007 +[titan] 2025-10-05 06:55:25,123 - root - INFO - lr: 3.8726e-05 gnorm: 1.11 [ 8:21:15<16:11:24] +[titan] 2025-10-05 06:55:35,976 - root - INFO - step: 13620 loss: 2.3686 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0920 +[titan] 2025-10-05 06:55:35,976 - root - INFO - lr: 3.8718e-05 gnorm: 1.15 [ 8:21:26<16:11:13] +[titan] 2025-10-05 06:55:46,835 - root - INFO - step: 13625 loss: 2.2851 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0187 +[titan] 2025-10-05 06:55:46,835 - root - INFO - lr: 3.8710e-05 gnorm: 1.07 [ 8:21:37<16:11:01] +[titan] 2025-10-05 06:55:57,740 - root - INFO - step: 13630 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0176 +[titan] 2025-10-05 06:55:57,740 - root - INFO - lr: 3.8703e-05 gnorm: 1.08 [ 8:21:48<16:10:50] +[titan] 2025-10-05 06:56:08,602 - root - INFO - step: 13635 loss: 2.3123 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 06:56:08,602 - root - INFO - lr: 3.8695e-05 gnorm: 1.12 [ 8:21:59<16:10:39] +[titan] 2025-10-05 06:56:19,485 - root - INFO - step: 13640 loss: 2.2360 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 06:56:19,486 - root - INFO - lr: 3.8687e-05 gnorm: 1.08 [ 8:22:10<16:10:27] +[titan] 2025-10-05 06:56:30,339 - root - INFO - step: 13645 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0101 +[titan] 2025-10-05 06:56:30,339 - root - INFO - lr: 3.8679e-05 gnorm: 1.20 [ 8:22:20<16:10:16] +[titan] 2025-10-05 06:56:39,024 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:56:41,218 - root - INFO - step: 13650 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0411 +[titan] 2025-10-05 06:56:41,218 - root - INFO - lr: 3.8672e-05 gnorm: 1.10 [ 8:22:31<16:10:05] +[titan] 2025-10-05 06:56:52,068 - root - INFO - step: 13655 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 06:56:52,068 - root - INFO - lr: 3.8664e-05 gnorm: 1.09 [ 8:22:42<16:09:53] +[titan] 2025-10-05 06:57:02,942 - root - INFO - step: 13660 loss: 2.3364 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0632 +[titan] 2025-10-05 06:57:02,942 - root - INFO - lr: 3.8656e-05 gnorm: 1.13 [ 8:22:53<16:09:42] +[titan] 2025-10-05 06:57:13,852 - root - INFO - step: 13665 loss: 2.2401 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 06:57:13,852 - root - INFO - lr: 3.8648e-05 gnorm: 1.09 [ 8:23:04<16:09:30] +[titan] 2025-10-05 06:57:24,731 - root - INFO - step: 13670 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9808 +[titan] 2025-10-05 06:57:24,731 - root - INFO - lr: 3.8641e-05 gnorm: 1.12 [ 8:23:15<16:09:19] +[titan] 2025-10-05 06:57:35,601 - root - INFO - step: 13675 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 06:57:35,601 - root - INFO - lr: 3.8633e-05 gnorm: 1.12 [ 8:23:26<16:09:08] +[titan] 2025-10-05 06:57:46,492 - root - INFO - step: 13680 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9915 +[titan] 2025-10-05 06:57:46,493 - root - INFO - lr: 3.8625e-05 gnorm: 1.09 [ 8:23:37<16:08:56] +[titan] 2025-10-05 06:57:57,361 - root - INFO - step: 13685 loss: 2.2907 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:57:57,361 - root - INFO - lr: 3.8617e-05 gnorm: 1.05 [ 8:23:47<16:08:45] +[titan] 2025-10-05 06:58:08,244 - root - INFO - step: 13690 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 06:58:08,244 - root - INFO - lr: 3.8610e-05 gnorm: 1.12 [ 8:23:58<16:08:34] +[titan] 2025-10-05 06:58:19,163 - root - INFO - step: 13695 loss: 2.2749 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0089 +[titan] 2025-10-05 06:58:19,164 - root - INFO - lr: 3.8602e-05 gnorm: 1.09 [ 8:24:09<16:08:22] +[titan] 2025-10-05 06:58:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:58:30,056 - root - INFO - step: 13700 loss: 2.3146 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 06:58:30,056 - root - INFO - lr: 3.8594e-05 gnorm: 1.10 [ 8:24:20<16:08:11] +[titan] 2025-10-05 06:58:40,938 - root - INFO - step: 13705 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 06:58:40,938 - root - INFO - lr: 3.8586e-05 gnorm: 1.07 [ 8:24:31<16:08:00] +[titan] 2025-10-05 06:58:51,816 - root - INFO - step: 13710 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 06:58:51,816 - root - INFO - lr: 3.8578e-05 gnorm: 1.10 [ 8:24:42<16:07:48] +[titan] 2025-10-05 06:59:02,700 - root - INFO - step: 13715 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 06:59:02,700 - root - INFO - lr: 3.8571e-05 gnorm: 1.12 [ 8:24:53<16:07:37] +[titan] 2025-10-05 06:59:13,554 - root - INFO - step: 13720 loss: 2.3118 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:59:13,554 - root - INFO - lr: 3.8563e-05 gnorm: 1.14 [ 8:25:04<16:07:26] +[titan] 2025-10-05 06:59:24,420 - root - INFO - step: 13725 loss: 2.2285 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9687 +[titan] 2025-10-05 06:59:24,420 - root - INFO - lr: 3.8555e-05 gnorm: 1.11 [ 8:25:14<16:07:14] +[titan] 2025-10-05 06:59:35,307 - root - INFO - step: 13730 loss: 2.2243 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 06:59:35,307 - root - INFO - lr: 3.8547e-05 gnorm: 1.10 [ 8:25:25<16:07:03] +[titan] 2025-10-05 06:59:46,179 - root - INFO - step: 13735 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 06:59:46,179 - root - INFO - lr: 3.8540e-05 gnorm: 1.08 [ 8:25:36<16:06:51] +[titan] 2025-10-05 06:59:57,061 - root - INFO - step: 13740 loss: 2.2450 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9828 +[titan] 2025-10-05 06:59:57,061 - root - INFO - lr: 3.8532e-05 gnorm: 1.15 [ 8:25:47<16:06:40] +[titan] 2025-10-05 07:00:07,935 - root - INFO - step: 13745 loss: 2.3278 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:00:07,935 - root - INFO - lr: 3.8524e-05 gnorm: 1.10 [ 8:25:58<16:06:29] +[titan] 2025-10-05 07:00:16,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:00:18,832 - root - INFO - step: 13750 loss: 2.3084 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 07:00:18,833 - root - INFO - lr: 3.8516e-05 gnorm: 1.10 [ 8:26:09<16:06:17] +[titan] 2025-10-05 07:00:29,706 - root - INFO - step: 13755 loss: 2.3204 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0493 +[titan] 2025-10-05 07:00:29,706 - root - INFO - lr: 3.8509e-05 gnorm: 1.11 [ 8:26:20<16:06:06] +[titan] 2025-10-05 07:00:40,608 - root - INFO - step: 13760 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0305 +[titan] 2025-10-05 07:00:40,608 - root - INFO - lr: 3.8501e-05 gnorm: 1.15 [ 8:26:31<16:05:55] +[titan] 2025-10-05 07:00:51,487 - root - INFO - step: 13765 loss: 2.2771 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 07:00:51,487 - root - INFO - lr: 3.8493e-05 gnorm: 1.08 [ 8:26:42<16:05:43] +[titan] 2025-10-05 07:01:02,367 - root - INFO - step: 13770 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0107 +[titan] 2025-10-05 07:01:02,367 - root - INFO - lr: 3.8485e-05 gnorm: 1.52 [ 8:26:52<16:05:32] +[titan] 2025-10-05 07:01:13,257 - root - INFO - step: 13775 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0460 +[titan] 2025-10-05 07:01:13,257 - root - INFO - lr: 3.8477e-05 gnorm: 1.11 [ 8:27:03<16:05:21] +[titan] 2025-10-05 07:01:24,150 - root - INFO - step: 13780 loss: 2.3133 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 07:01:24,150 - root - INFO - lr: 3.8470e-05 gnorm: 1.05 [ 8:27:14<16:05:09] +[titan] 2025-10-05 07:01:35,054 - root - INFO - step: 13785 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9600 +[titan] 2025-10-05 07:01:35,054 - root - INFO - lr: 3.8462e-05 gnorm: 1.10 [ 8:27:25<16:04:58] +[titan] 2025-10-05 07:01:45,974 - root - INFO - step: 13790 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0014 +[titan] 2025-10-05 07:01:45,974 - root - INFO - lr: 3.8454e-05 gnorm: 1.09 [ 8:27:36<16:04:47] +[titan] 2025-10-05 07:01:56,865 - root - INFO - step: 13795 loss: 2.2879 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:01:56,865 - root - INFO - lr: 3.8446e-05 gnorm: 1.08 [ 8:27:47<16:04:35] +[titan] 2025-10-05 07:02:05,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:02:07,773 - root - INFO - step: 13800 loss: 2.2846 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0177 +[titan] 2025-10-05 07:02:07,773 - root - INFO - lr: 3.8438e-05 gnorm: 1.09 [ 8:27:58<16:04:24] +[titan] 2025-10-05 07:02:18,700 - root - INFO - step: 13805 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 07:02:18,700 - root - INFO - lr: 3.8431e-05 gnorm: 1.09 [ 8:28:09<16:04:13] +[titan] 2025-10-05 07:02:29,593 - root - INFO - step: 13810 loss: 2.2868 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 07:02:29,593 - root - INFO - lr: 3.8423e-05 gnorm: 1.08 [ 8:28:20<16:04:02] +[titan] 2025-10-05 07:02:40,489 - root - INFO - step: 13815 loss: 2.3125 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 07:02:40,489 - root - INFO - lr: 3.8415e-05 gnorm: 1.08 [ 8:28:31<16:03:50] +[titan] 2025-10-05 07:02:51,396 - root - INFO - step: 13820 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1047 +[titan] 2025-10-05 07:02:51,396 - root - INFO - lr: 3.8407e-05 gnorm: 1.13 [ 8:28:41<16:03:39] +[titan] 2025-10-05 07:03:00,403 - root - INFO - Dumping profiler traces at step 13824 +[titan] 2025-10-05 07:03:00,440 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:03:02,625 - root - INFO - step: 13825 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 29,181 tflops: 404.84 mfu: 40.93% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:03:02,626 - root - INFO - lr: 3.8399e-05 gnorm: 1.09 [ 8:28:53<16:03:28] +[titan] 2025-10-05 07:03:13,525 - root - INFO - step: 13830 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0510 +[titan] 2025-10-05 07:03:13,526 - root - INFO - lr: 3.8392e-05 gnorm: 1.08 [ 8:29:04<16:03:17] +[titan] 2025-10-05 07:03:24,466 - root - INFO - step: 13835 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:03:24,466 - root - INFO - lr: 3.8384e-05 gnorm: 1.07 [ 8:29:14<16:03:06] +[titan] 2025-10-05 07:03:35,347 - root - INFO - step: 13840 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:03:35,348 - root - INFO - lr: 3.8376e-05 gnorm: 1.09 [ 8:29:25<16:02:54] +[titan] 2025-10-05 07:03:46,225 - root - INFO - step: 13845 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 07:03:46,225 - root - INFO - lr: 3.8368e-05 gnorm: 1.11 [ 8:29:36<16:02:43] +[titan] 2025-10-05 07:03:54,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:03:57,111 - root - INFO - step: 13850 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0113 +[titan] 2025-10-05 07:03:57,111 - root - INFO - lr: 3.8360e-05 gnorm: 1.11 [ 8:29:47<16:02:32] +[titan] 2025-10-05 07:04:08,025 - root - INFO - step: 13855 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0769 +[titan] 2025-10-05 07:04:08,025 - root - INFO - lr: 3.8353e-05 gnorm: 1.11 [ 8:29:58<16:02:20] +[titan] 2025-10-05 07:04:18,937 - root - INFO - step: 13860 loss: 2.2484 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9860 +[titan] 2025-10-05 07:04:18,937 - root - INFO - lr: 3.8345e-05 gnorm: 1.13 [ 8:30:09<16:02:09] +[titan] 2025-10-05 07:04:29,819 - root - INFO - step: 13865 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9412 +[titan] 2025-10-05 07:04:29,819 - root - INFO - lr: 3.8337e-05 gnorm: 1.13 [ 8:30:20<16:01:58] +[titan] 2025-10-05 07:04:40,706 - root - INFO - step: 13870 loss: 2.1522 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 07:04:40,706 - root - INFO - lr: 3.8329e-05 gnorm: 1.10 [ 8:30:31<16:01:46] +[titan] 2025-10-05 07:04:51,600 - root - INFO - step: 13875 loss: 2.2926 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:04:51,601 - root - INFO - lr: 3.8321e-05 gnorm: 1.13 [ 8:30:42<16:01:35] +[titan] 2025-10-05 07:05:02,483 - root - INFO - step: 13880 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 07:05:02,484 - root - INFO - lr: 3.8313e-05 gnorm: 1.05 [ 8:30:53<16:01:24] +[titan] 2025-10-05 07:05:13,375 - root - INFO - step: 13885 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 07:05:13,375 - root - INFO - lr: 3.8306e-05 gnorm: 1.09 [ 8:31:03<16:01:12] +[titan] 2025-10-05 07:05:24,346 - root - INFO - step: 13890 loss: 2.3386 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 07:05:24,347 - root - INFO - lr: 3.8298e-05 gnorm: 1.09 [ 8:31:14<16:01:01] +[titan] 2025-10-05 07:05:35,221 - root - INFO - step: 13895 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 07:05:35,221 - root - INFO - lr: 3.8290e-05 gnorm: 1.10 [ 8:31:25<16:00:50] +[titan] 2025-10-05 07:05:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:05:46,101 - root - INFO - step: 13900 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 07:05:46,101 - root - INFO - lr: 3.8282e-05 gnorm: 1.11 [ 8:31:36<16:00:38] +[titan] 2025-10-05 07:05:56,991 - root - INFO - step: 13905 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 07:05:56,991 - root - INFO - lr: 3.8274e-05 gnorm: 1.09 [ 8:31:47<16:00:27] +[titan] 2025-10-05 07:06:07,860 - root - INFO - step: 13910 loss: 2.2822 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0150 +[titan] 2025-10-05 07:06:07,860 - root - INFO - lr: 3.8266e-05 gnorm: 1.06 [ 8:31:58<16:00:16] +[titan] 2025-10-05 07:06:18,755 - root - INFO - step: 13915 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0626 +[titan] 2025-10-05 07:06:18,755 - root - INFO - lr: 3.8259e-05 gnorm: 1.12 [ 8:32:09<16:00:04] +[titan] 2025-10-05 07:06:29,694 - root - INFO - step: 13920 loss: 2.3240 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0527 +[titan] 2025-10-05 07:06:29,694 - root - INFO - lr: 3.8251e-05 gnorm: 1.13 [ 8:32:20<15:59:53] +[titan] 2025-10-05 07:06:40,578 - root - INFO - step: 13925 loss: 2.2091 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9511 +[titan] 2025-10-05 07:06:40,578 - root - INFO - lr: 3.8243e-05 gnorm: 1.13 [ 8:32:31<15:59:42] +[titan] 2025-10-05 07:06:51,433 - root - INFO - step: 13930 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:06:51,434 - root - INFO - lr: 3.8235e-05 gnorm: 1.12 [ 8:32:41<15:59:31] +[titan] 2025-10-05 07:07:02,325 - root - INFO - step: 13935 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0277 +[titan] 2025-10-05 07:07:02,326 - root - INFO - lr: 3.8227e-05 gnorm: 1.12 [ 8:32:52<15:59:19] +[titan] 2025-10-05 07:07:13,223 - root - INFO - step: 13940 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 07:07:13,223 - root - INFO - lr: 3.8219e-05 gnorm: 1.05 [ 8:33:03<15:59:08] +[titan] 2025-10-05 07:07:24,144 - root - INFO - step: 13945 loss: 2.2627 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 07:07:24,144 - root - INFO - lr: 3.8212e-05 gnorm: 1.07 [ 8:33:14<15:58:57] +[titan] 2025-10-05 07:07:32,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:07:35,060 - root - INFO - step: 13950 loss: 2.3247 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0535 +[titan] 2025-10-05 07:07:35,061 - root - INFO - lr: 3.8204e-05 gnorm: 1.15 [ 8:33:25<15:58:45] +[titan] 2025-10-05 07:07:45,949 - root - INFO - step: 13955 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 1.9994 +[titan] 2025-10-05 07:07:45,949 - root - INFO - lr: 3.8196e-05 gnorm: 1.12 [ 8:33:36<15:58:34] +[titan] 2025-10-05 07:07:56,827 - root - INFO - step: 13960 loss: 2.2073 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 07:07:56,827 - root - INFO - lr: 3.8188e-05 gnorm: 1.13 [ 8:33:47<15:58:23] +[titan] 2025-10-05 07:08:07,719 - root - INFO - step: 13965 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 07:08:07,720 - root - INFO - lr: 3.8180e-05 gnorm: 1.05 [ 8:33:58<15:58:11] +[titan] 2025-10-05 07:08:18,609 - root - INFO - step: 13970 loss: 2.3210 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0494 +[titan] 2025-10-05 07:08:18,609 - root - INFO - lr: 3.8172e-05 gnorm: 1.11 [ 8:34:09<15:58:00] +[titan] 2025-10-05 07:08:29,526 - root - INFO - step: 13975 loss: 2.3414 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0678 +[titan] 2025-10-05 07:08:29,526 - root - INFO - lr: 3.8164e-05 gnorm: 1.06 [ 8:34:20<15:57:49] +[titan] 2025-10-05 07:08:40,409 - root - INFO - step: 13980 loss: 2.2904 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0234 +[titan] 2025-10-05 07:08:40,409 - root - INFO - lr: 3.8157e-05 gnorm: 1.10 [ 8:34:30<15:57:37] +[titan] 2025-10-05 07:08:51,305 - root - INFO - step: 13985 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9806 +[titan] 2025-10-05 07:08:51,305 - root - INFO - lr: 3.8149e-05 gnorm: 1.08 [ 8:34:41<15:57:26] +[titan] 2025-10-05 07:09:02,176 - root - INFO - step: 13990 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:09:02,177 - root - INFO - lr: 3.8141e-05 gnorm: 1.06 [ 8:34:52<15:57:15] +[titan] 2025-10-05 07:09:13,061 - root - INFO - step: 13995 loss: 2.2816 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0139 +[titan] 2025-10-05 07:09:13,062 - root - INFO - lr: 3.8133e-05 gnorm: 1.14 [ 8:35:03<15:57:03] +[titan] 2025-10-05 07:09:21,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:09:23,969 - root - INFO - step: 14000 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 07:09:23,969 - root - INFO - lr: 3.8125e-05 gnorm: 1.09 [ 8:35:14<15:56:52] +[titan] 2025-10-05 07:09:34,866 - root - INFO - step: 14005 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0102 +[titan] 2025-10-05 07:09:34,866 - root - INFO - lr: 3.8117e-05 gnorm: 1.06 [ 8:35:25<15:56:41] +[titan] 2025-10-05 07:09:45,752 - root - INFO - step: 14010 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0239 +[titan] 2025-10-05 07:09:45,752 - root - INFO - lr: 3.8109e-05 gnorm: 1.14 [ 8:35:36<15:56:29] +[titan] 2025-10-05 07:09:56,681 - root - INFO - step: 14015 loss: 2.2388 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9782 +[titan] 2025-10-05 07:09:56,681 - root - INFO - lr: 3.8101e-05 gnorm: 1.10 [ 8:35:47<15:56:18] +[titan] 2025-10-05 07:10:07,561 - root - INFO - step: 14020 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0100 +[titan] 2025-10-05 07:10:07,561 - root - INFO - lr: 3.8094e-05 gnorm: 1.10 [ 8:35:58<15:56:07] +[titan] 2025-10-05 07:10:18,446 - root - INFO - step: 14025 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0031 +[titan] 2025-10-05 07:10:18,446 - root - INFO - lr: 3.8086e-05 gnorm: 1.06 [ 8:36:08<15:55:56] +[titan] 2025-10-05 07:10:29,418 - root - INFO - step: 14030 loss: 2.3296 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 07:10:29,419 - root - INFO - lr: 3.8078e-05 gnorm: 1.11 [ 8:36:19<15:55:44] +[titan] 2025-10-05 07:10:40,286 - root - INFO - step: 14035 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0286 +[titan] 2025-10-05 07:10:40,286 - root - INFO - lr: 3.8070e-05 gnorm: 1.08 [ 8:36:30<15:55:33] +[titan] 2025-10-05 07:10:51,186 - root - INFO - step: 14040 loss: 2.3219 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 07:10:51,186 - root - INFO - lr: 3.8062e-05 gnorm: 1.08 [ 8:36:41<15:55:22] +[titan] 2025-10-05 07:11:02,100 - root - INFO - step: 14045 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:11:02,100 - root - INFO - lr: 3.8054e-05 gnorm: 1.03 [ 8:36:52<15:55:10] +[titan] 2025-10-05 07:11:10,810 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:11:13,002 - root - INFO - step: 14050 loss: 2.2598 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:13,002 - root - INFO - lr: 3.8046e-05 gnorm: 1.08 [ 8:37:03<15:54:59] +[titan] 2025-10-05 07:11:23,889 - root - INFO - step: 14055 loss: 2.2829 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0164 +[titan] 2025-10-05 07:11:23,889 - root - INFO - lr: 3.8038e-05 gnorm: 1.06 [ 8:37:14<15:54:48] +[titan] 2025-10-05 07:11:34,797 - root - INFO - step: 14060 loss: 2.2612 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:34,797 - root - INFO - lr: 3.8031e-05 gnorm: 1.08 [ 8:37:25<15:54:37] +[titan] 2025-10-05 07:11:45,686 - root - INFO - step: 14065 loss: 2.2504 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:11:45,686 - root - INFO - lr: 3.8023e-05 gnorm: 1.10 [ 8:37:36<15:54:25] +[titan] 2025-10-05 07:11:56,588 - root - INFO - step: 14070 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 07:11:56,588 - root - INFO - lr: 3.8015e-05 gnorm: 1.10 [ 8:37:47<15:54:14] +[titan] 2025-10-05 07:12:07,484 - root - INFO - step: 14075 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:12:07,484 - root - INFO - lr: 3.8007e-05 gnorm: 1.05 [ 8:37:57<15:54:03] +[titan] 2025-10-05 07:12:18,379 - root - INFO - step: 14080 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 07:12:18,379 - root - INFO - lr: 3.7999e-05 gnorm: 1.09 [ 8:38:08<15:53:51] +[titan] 2025-10-05 07:12:29,280 - root - INFO - step: 14085 loss: 2.2541 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 07:12:29,281 - root - INFO - lr: 3.7991e-05 gnorm: 1.11 [ 8:38:19<15:53:40] +[titan] 2025-10-05 07:12:40,158 - root - INFO - step: 14090 loss: 2.2892 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0223 +[titan] 2025-10-05 07:12:40,159 - root - INFO - lr: 3.7983e-05 gnorm: 1.07 [ 8:38:30<15:53:29] +[titan] 2025-10-05 07:12:51,038 - root - INFO - step: 14095 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0060 +[titan] 2025-10-05 07:12:51,038 - root - INFO - lr: 3.7975e-05 gnorm: 1.08 [ 8:38:41<15:53:17] +[titan] 2025-10-05 07:12:59,737 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:13:01,923 - root - INFO - step: 14100 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 07:13:01,924 - root - INFO - lr: 3.7967e-05 gnorm: 1.10 [ 8:38:52<15:53:06] +[titan] 2025-10-05 07:13:12,819 - root - INFO - step: 14105 loss: 2.2680 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0026 +[titan] 2025-10-05 07:13:12,819 - root - INFO - lr: 3.7959e-05 gnorm: 1.10 [ 8:39:03<15:52:55] +[titan] 2025-10-05 07:13:23,712 - root - INFO - step: 14110 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:13:23,712 - root - INFO - lr: 3.7952e-05 gnorm: 1.06 [ 8:39:14<15:52:43] +[titan] 2025-10-05 07:13:34,613 - root - INFO - step: 14115 loss: 2.3226 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0504 +[titan] 2025-10-05 07:13:34,613 - root - INFO - lr: 3.7944e-05 gnorm: 1.17 [ 8:39:25<15:52:32] +[titan] 2025-10-05 07:13:45,510 - root - INFO - step: 14120 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 07:13:45,510 - root - INFO - lr: 3.7936e-05 gnorm: 1.12 [ 8:39:36<15:52:21] +[titan] 2025-10-05 07:13:56,397 - root - INFO - step: 14125 loss: 2.2697 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0052 +[titan] 2025-10-05 07:13:56,397 - root - INFO - lr: 3.7928e-05 gnorm: 1.11 [ 8:39:46<15:52:09] +[titan] 2025-10-05 07:14:07,282 - root - INFO - step: 14130 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 07:14:07,282 - root - INFO - lr: 3.7920e-05 gnorm: 1.09 [ 8:39:57<15:51:58] +[titan] 2025-10-05 07:14:18,161 - root - INFO - step: 14135 loss: 2.2782 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 07:14:18,162 - root - INFO - lr: 3.7912e-05 gnorm: 1.13 [ 8:40:08<15:51:47] +[titan] 2025-10-05 07:14:29,064 - root - INFO - step: 14140 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 07:14:29,064 - root - INFO - lr: 3.7904e-05 gnorm: 1.12 [ 8:40:19<15:51:36] +[titan] 2025-10-05 07:14:39,953 - root - INFO - step: 14145 loss: 2.2613 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9977 +[titan] 2025-10-05 07:14:39,953 - root - INFO - lr: 3.7896e-05 gnorm: 1.07 [ 8:40:30<15:51:24] +[titan] 2025-10-05 07:14:48,667 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:14:50,851 - root - INFO - step: 14150 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 07:14:50,851 - root - INFO - lr: 3.7888e-05 gnorm: 1.09 [ 8:40:41<15:51:13] +[titan] 2025-10-05 07:15:01,723 - root - INFO - step: 14155 loss: 2.3499 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0768 +[titan] 2025-10-05 07:15:01,723 - root - INFO - lr: 3.7880e-05 gnorm: 1.07 [ 8:40:52<15:51:02] +[titan] 2025-10-05 07:15:12,596 - root - INFO - step: 14160 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 07:15:12,597 - root - INFO - lr: 3.7872e-05 gnorm: 1.07 [ 8:41:03<15:50:50] +[titan] 2025-10-05 07:15:23,478 - root - INFO - step: 14165 loss: 2.2806 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 07:15:23,478 - root - INFO - lr: 3.7865e-05 gnorm: 1.09 [ 8:41:13<15:50:39] +[titan] 2025-10-05 07:15:34,374 - root - INFO - step: 14170 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:15:34,374 - root - INFO - lr: 3.7857e-05 gnorm: 1.08 [ 8:41:24<15:50:28] +[titan] 2025-10-05 07:15:45,286 - root - INFO - step: 14175 loss: 2.2571 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9933 +[titan] 2025-10-05 07:15:45,287 - root - INFO - lr: 3.7849e-05 gnorm: 1.11 [ 8:41:35<15:50:16] +[titan] 2025-10-05 07:15:56,187 - root - INFO - step: 14180 loss: 2.3045 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 07:15:56,187 - root - INFO - lr: 3.7841e-05 gnorm: 1.13 [ 8:41:46<15:50:05] +[titan] 2025-10-05 07:16:07,077 - root - INFO - step: 14185 loss: 2.2313 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9715 +[titan] 2025-10-05 07:16:07,077 - root - INFO - lr: 3.7833e-05 gnorm: 1.08 [ 8:41:57<15:49:54] +[titan] 2025-10-05 07:16:17,954 - root - INFO - step: 14190 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9824 +[titan] 2025-10-05 07:16:17,954 - root - INFO - lr: 3.7825e-05 gnorm: 1.05 [ 8:42:08<15:49:42] +[titan] 2025-10-05 07:16:28,838 - root - INFO - step: 14195 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 07:16:28,839 - root - INFO - lr: 3.7817e-05 gnorm: 1.10 [ 8:42:19<15:49:31] +[titan] 2025-10-05 07:16:37,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:16:39,702 - root - INFO - step: 14200 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9987 +[titan] 2025-10-05 07:16:39,703 - root - INFO - lr: 3.7809e-05 gnorm: 1.10 [ 8:42:30<15:49:20] +[titan] 2025-10-05 07:16:50,596 - root - INFO - step: 14205 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 07:16:50,596 - root - INFO - lr: 3.7801e-05 gnorm: 1.05 [ 8:42:41<15:49:08] +[titan] 2025-10-05 07:17:01,477 - root - INFO - step: 14210 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:17:01,477 - root - INFO - lr: 3.7793e-05 gnorm: 1.09 [ 8:42:51<15:48:57] +[titan] 2025-10-05 07:17:12,357 - root - INFO - step: 14215 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 07:17:12,357 - root - INFO - lr: 3.7785e-05 gnorm: 1.09 [ 8:43:02<15:48:46] +[titan] 2025-10-05 07:17:23,224 - root - INFO - step: 14220 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0228 +[titan] 2025-10-05 07:17:23,224 - root - INFO - lr: 3.7777e-05 gnorm: 1.12 [ 8:43:13<15:48:34] +[titan] 2025-10-05 07:17:34,101 - root - INFO - step: 14225 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9842 +[titan] 2025-10-05 07:17:34,101 - root - INFO - lr: 3.7769e-05 gnorm: 1.11 [ 8:43:24<15:48:23] +[titan] 2025-10-05 07:17:44,966 - root - INFO - step: 14230 loss: 2.2228 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9631 +[titan] 2025-10-05 07:17:44,966 - root - INFO - lr: 3.7761e-05 gnorm: 1.06 [ 8:43:35<15:48:12] +[titan] 2025-10-05 07:17:55,865 - root - INFO - step: 14235 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:17:55,865 - root - INFO - lr: 3.7753e-05 gnorm: 1.15 [ 8:43:46<15:48:00] +[titan] 2025-10-05 07:18:06,742 - root - INFO - step: 14240 loss: 2.2274 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9683 +[titan] 2025-10-05 07:18:06,742 - root - INFO - lr: 3.7746e-05 gnorm: 1.07 [ 8:43:57<15:47:49] +[titan] 2025-10-05 07:18:17,634 - root - INFO - step: 14245 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0317 +[titan] 2025-10-05 07:18:17,634 - root - INFO - lr: 3.7738e-05 gnorm: 1.09 [ 8:44:08<15:47:38] +[titan] 2025-10-05 07:18:26,321 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:18:28,505 - root - INFO - step: 14250 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:18:28,505 - root - INFO - lr: 3.7730e-05 gnorm: 1.10 [ 8:44:18<15:47:26] +[titan] 2025-10-05 07:18:39,411 - root - INFO - step: 14255 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 07:18:39,412 - root - INFO - lr: 3.7722e-05 gnorm: 1.06 [ 8:44:29<15:47:15] +[titan] 2025-10-05 07:18:50,297 - root - INFO - step: 14260 loss: 2.3010 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0327 +[titan] 2025-10-05 07:18:50,297 - root - INFO - lr: 3.7714e-05 gnorm: 1.07 [ 8:44:40<15:47:04] +[titan] 2025-10-05 07:19:01,201 - root - INFO - step: 14265 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0104 +[titan] 2025-10-05 07:19:01,201 - root - INFO - lr: 3.7706e-05 gnorm: 1.10 [ 8:44:51<15:46:53] +[titan] 2025-10-05 07:19:12,083 - root - INFO - step: 14270 loss: 2.2667 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0022 +[titan] 2025-10-05 07:19:12,084 - root - INFO - lr: 3.7698e-05 gnorm: 1.08 [ 8:45:02<15:46:41] +[titan] 2025-10-05 07:19:22,993 - root - INFO - step: 14275 loss: 2.1944 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9385 +[titan] 2025-10-05 07:19:22,993 - root - INFO - lr: 3.7690e-05 gnorm: 1.09 [ 8:45:13<15:46:30] +[titan] 2025-10-05 07:19:33,913 - root - INFO - step: 14280 loss: 2.2467 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9851 +[titan] 2025-10-05 07:19:33,913 - root - INFO - lr: 3.7682e-05 gnorm: 1.07 [ 8:45:24<15:46:19] +[titan] 2025-10-05 07:19:44,768 - root - INFO - step: 14285 loss: 2.2223 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9628 +[titan] 2025-10-05 07:19:44,768 - root - INFO - lr: 3.7674e-05 gnorm: 1.09 [ 8:45:35<15:46:07] +[titan] 2025-10-05 07:19:55,630 - root - INFO - step: 14290 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 07:19:55,630 - root - INFO - lr: 3.7666e-05 gnorm: 1.10 [ 8:45:46<15:45:56] +[titan] 2025-10-05 07:20:06,491 - root - INFO - step: 14295 loss: 2.2948 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 07:20:06,491 - root - INFO - lr: 3.7658e-05 gnorm: 1.11 [ 8:45:56<15:45:45] +[titan] 2025-10-05 07:20:15,198 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:20:17,375 - root - INFO - step: 14300 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 07:20:17,376 - root - INFO - lr: 3.7650e-05 gnorm: 1.15 [ 8:46:07<15:45:33] +[titan] 2025-10-05 07:20:28,246 - root - INFO - step: 14305 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0609 +[titan] 2025-10-05 07:20:28,246 - root - INFO - lr: 3.7642e-05 gnorm: 1.12 [ 8:46:18<15:45:22] +[titan] 2025-10-05 07:20:39,160 - root - INFO - step: 14310 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0267 +[titan] 2025-10-05 07:20:39,160 - root - INFO - lr: 3.7634e-05 gnorm: 1.14 [ 8:46:29<15:45:11] +[titan] 2025-10-05 07:20:50,006 - root - INFO - step: 14315 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 07:20:50,006 - root - INFO - lr: 3.7626e-05 gnorm: 1.07 [ 8:46:40<15:44:59] +[titan] 2025-10-05 07:21:00,866 - root - INFO - step: 14320 loss: 2.2698 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 07:21:00,866 - root - INFO - lr: 3.7618e-05 gnorm: 1.09 [ 8:46:51<15:44:48] +[titan] 2025-10-05 07:21:11,703 - root - INFO - step: 14325 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:21:11,703 - root - INFO - lr: 3.7610e-05 gnorm: 1.09 [ 8:47:02<15:44:37] +[titan] 2025-10-05 07:21:22,593 - root - INFO - step: 14330 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0330 +[titan] 2025-10-05 07:21:22,594 - root - INFO - lr: 3.7602e-05 gnorm: 1.14 [ 8:47:13<15:44:25] +[titan] 2025-10-05 07:21:33,559 - root - INFO - step: 14335 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 07:21:33,560 - root - INFO - lr: 3.7594e-05 gnorm: 1.09 [ 8:47:24<15:44:14] +[titan] 2025-10-05 07:21:35,961 - root - INFO - Dumping profiler traces at step 14336 +[titan] 2025-10-05 07:21:36,010 - root - INFO - Finished dumping profiler traces in 0.05 seconds +[titan] 2025-10-05 07:21:44,698 - root - INFO - step: 14340 loss: 2.3096 memory: 118.84GiB(85.28%) tps: 29,418 tflops: 408.13 mfu: 41.27% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 07:21:44,699 - root - INFO - lr: 3.7586e-05 gnorm: 1.13 [ 8:47:35<15:44:03] +[titan] 2025-10-05 07:21:55,565 - root - INFO - step: 14345 loss: 2.3329 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 07:21:55,565 - root - INFO - lr: 3.7578e-05 gnorm: 1.11 [ 8:47:46<15:43:52] +[titan] 2025-10-05 07:22:04,240 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:22:06,418 - root - INFO - step: 14350 loss: 2.2380 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9769 +[titan] 2025-10-05 07:22:06,419 - root - INFO - lr: 3.7570e-05 gnorm: 1.07 [ 8:47:56<15:43:41] +[titan] 2025-10-05 07:22:17,273 - root - INFO - step: 14355 loss: 2.2325 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 07:22:17,273 - root - INFO - lr: 3.7562e-05 gnorm: 1.12 [ 8:48:07<15:43:29] +[titan] 2025-10-05 07:22:28,142 - root - INFO - step: 14360 loss: 2.3425 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0700 +[titan] 2025-10-05 07:22:28,143 - root - INFO - lr: 3.7554e-05 gnorm: 1.12 [ 8:48:18<15:43:18] +[titan] 2025-10-05 07:22:39,138 - root - INFO - step: 14365 loss: 2.2707 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.44 mfu: 41.80% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:22:39,139 - root - INFO - lr: 3.7546e-05 gnorm: 1.08 [ 8:48:29<15:43:07] +[titan] 2025-10-05 07:22:50,009 - root - INFO - step: 14370 loss: 2.2987 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 07:22:50,009 - root - INFO - lr: 3.7538e-05 gnorm: 1.13 [ 8:48:40<15:42:55] +[titan] 2025-10-05 07:23:00,863 - root - INFO - step: 14375 loss: 2.2114 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 07:23:00,864 - root - INFO - lr: 3.7530e-05 gnorm: 1.09 [ 8:48:51<15:42:44] +[titan] 2025-10-05 07:23:11,714 - root - INFO - step: 14380 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:23:11,714 - root - INFO - lr: 3.7522e-05 gnorm: 1.11 [ 8:49:02<15:42:33] +[titan] 2025-10-05 07:23:22,597 - root - INFO - step: 14385 loss: 2.3245 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0537 +[titan] 2025-10-05 07:23:22,597 - root - INFO - lr: 3.7514e-05 gnorm: 1.07 [ 8:49:13<15:42:21] +[titan] 2025-10-05 07:23:33,453 - root - INFO - step: 14390 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:23:33,453 - root - INFO - lr: 3.7506e-05 gnorm: 1.10 [ 8:49:23<15:42:10] +[titan] 2025-10-05 07:23:44,404 - root - INFO - step: 14395 loss: 2.3155 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0444 +[titan] 2025-10-05 07:23:44,404 - root - INFO - lr: 3.7498e-05 gnorm: 1.07 [ 8:49:34<15:41:59] +[titan] 2025-10-05 07:23:53,093 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:23:55,271 - root - INFO - step: 14400 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:23:55,271 - root - INFO - lr: 3.7490e-05 gnorm: 1.09 [ 8:49:45<15:41:47] +[titan] 2025-10-05 07:24:06,109 - root - INFO - step: 14405 loss: 2.3174 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0466 +[titan] 2025-10-05 07:24:06,109 - root - INFO - lr: 3.7482e-05 gnorm: 1.11 [ 8:49:56<15:41:36] +[titan] 2025-10-05 07:24:16,949 - root - INFO - step: 14410 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0222 +[titan] 2025-10-05 07:24:16,949 - root - INFO - lr: 3.7474e-05 gnorm: 1.22 [ 8:50:07<15:41:25] +[titan] 2025-10-05 07:24:27,813 - root - INFO - step: 14415 loss: 2.2533 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9898 +[titan] 2025-10-05 07:24:27,813 - root - INFO - lr: 3.7466e-05 gnorm: 1.06 [ 8:50:18<15:41:13] +[titan] 2025-10-05 07:24:38,740 - root - INFO - step: 14420 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9292 +[titan] 2025-10-05 07:24:38,740 - root - INFO - lr: 3.7458e-05 gnorm: 1.07 [ 8:50:29<15:41:02] +[titan] 2025-10-05 07:24:49,616 - root - INFO - step: 14425 loss: 2.2439 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 07:24:49,616 - root - INFO - lr: 3.7450e-05 gnorm: 1.09 [ 8:50:40<15:40:51] +[titan] 2025-10-05 07:25:00,495 - root - INFO - step: 14430 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0617 +[titan] 2025-10-05 07:25:00,495 - root - INFO - lr: 3.7442e-05 gnorm: 1.10 [ 8:50:50<15:40:40] +[titan] 2025-10-05 07:25:11,357 - root - INFO - step: 14435 loss: 2.2516 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9889 +[titan] 2025-10-05 07:25:11,357 - root - INFO - lr: 3.7434e-05 gnorm: 1.10 [ 8:51:01<15:40:28] +[titan] 2025-10-05 07:25:22,214 - root - INFO - step: 14440 loss: 2.2632 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:25:22,214 - root - INFO - lr: 3.7426e-05 gnorm: 1.10 [ 8:51:12<15:40:17] +[titan] 2025-10-05 07:25:33,076 - root - INFO - step: 14445 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 07:25:33,076 - root - INFO - lr: 3.7418e-05 gnorm: 1.09 [ 8:51:23<15:40:05] +[titan] 2025-10-05 07:25:41,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:25:44,001 - root - INFO - step: 14450 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 07:25:44,001 - root - INFO - lr: 3.7410e-05 gnorm: 1.07 [ 8:51:34<15:39:54] +[titan] 2025-10-05 07:25:54,893 - root - INFO - step: 14455 loss: 2.2554 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9922 +[titan] 2025-10-05 07:25:54,893 - root - INFO - lr: 3.7402e-05 gnorm: 1.13 [ 8:51:45<15:39:43] +[titan] 2025-10-05 07:26:05,774 - root - INFO - step: 14460 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9572 +[titan] 2025-10-05 07:26:05,774 - root - INFO - lr: 3.7394e-05 gnorm: 1.08 [ 8:51:56<15:39:32] +[titan] 2025-10-05 07:26:16,651 - root - INFO - step: 14465 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 07:26:16,651 - root - INFO - lr: 3.7386e-05 gnorm: 1.11 [ 8:52:07<15:39:20] +[titan] 2025-10-05 07:26:27,521 - root - INFO - step: 14470 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 07:26:27,521 - root - INFO - lr: 3.7378e-05 gnorm: 1.08 [ 8:52:17<15:39:09] +[titan] 2025-10-05 07:26:38,394 - root - INFO - step: 14475 loss: 2.2013 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 07:26:38,395 - root - INFO - lr: 3.7370e-05 gnorm: 1.08 [ 8:52:28<15:38:58] +[titan] 2025-10-05 07:26:49,332 - root - INFO - step: 14480 loss: 2.2812 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0154 +[titan] 2025-10-05 07:26:49,332 - root - INFO - lr: 3.7362e-05 gnorm: 1.12 [ 8:52:39<15:38:46] +[titan] 2025-10-05 07:27:00,212 - root - INFO - step: 14485 loss: 2.2411 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 07:27:00,213 - root - INFO - lr: 3.7354e-05 gnorm: 1.05 [ 8:52:50<15:38:35] +[titan] 2025-10-05 07:27:11,129 - root - INFO - step: 14490 loss: 2.2405 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9788 +[titan] 2025-10-05 07:27:11,129 - root - INFO - lr: 3.7346e-05 gnorm: 1.09 [ 8:53:01<15:38:24] +[titan] 2025-10-05 07:27:22,004 - root - INFO - step: 14495 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 07:27:22,004 - root - INFO - lr: 3.7338e-05 gnorm: 1.09 [ 8:53:12<15:38:13] +[titan] 2025-10-05 07:27:30,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:27:32,894 - root - INFO - step: 14500 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 07:27:32,894 - root - INFO - lr: 3.7330e-05 gnorm: 1.10 [ 8:53:23<15:38:01] +[titan] 2025-10-05 07:27:43,812 - root - INFO - step: 14505 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0409 +[titan] 2025-10-05 07:27:43,812 - root - INFO - lr: 3.7322e-05 gnorm: 1.06 [ 8:53:34<15:37:50] +[titan] 2025-10-05 07:27:54,688 - root - INFO - step: 14510 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9921 +[titan] 2025-10-05 07:27:54,688 - root - INFO - lr: 3.7314e-05 gnorm: 1.08 [ 8:53:45<15:37:39] +[titan] 2025-10-05 07:28:05,542 - root - INFO - step: 14515 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 07:28:05,542 - root - INFO - lr: 3.7306e-05 gnorm: 1.05 [ 8:53:56<15:37:27] +[titan] 2025-10-05 07:28:16,397 - root - INFO - step: 14520 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:28:16,398 - root - INFO - lr: 3.7298e-05 gnorm: 1.08 [ 8:54:06<15:37:16] +[titan] 2025-10-05 07:28:27,301 - root - INFO - step: 14525 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9512 +[titan] 2025-10-05 07:28:27,301 - root - INFO - lr: 3.7290e-05 gnorm: 1.05 [ 8:54:17<15:37:05] +[titan] 2025-10-05 07:28:38,153 - root - INFO - step: 14530 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0325 +[titan] 2025-10-05 07:28:38,153 - root - INFO - lr: 3.7282e-05 gnorm: 1.12 [ 8:54:28<15:36:53] +[titan] 2025-10-05 07:28:49,042 - root - INFO - step: 14535 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 07:28:49,042 - root - INFO - lr: 3.7274e-05 gnorm: 1.10 [ 8:54:39<15:36:42] +[titan] 2025-10-05 07:28:59,909 - root - INFO - step: 14540 loss: 2.2631 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9992 +[titan] 2025-10-05 07:28:59,910 - root - INFO - lr: 3.7266e-05 gnorm: 1.09 [ 8:54:50<15:36:31] +[titan] 2025-10-05 07:29:10,771 - root - INFO - step: 14545 loss: 2.2017 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9458 +[titan] 2025-10-05 07:29:10,771 - root - INFO - lr: 3.7258e-05 gnorm: 1.10 [ 8:55:01<15:36:19] +[titan] 2025-10-05 07:29:19,437 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:29:21,621 - root - INFO - step: 14550 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 07:29:21,621 - root - INFO - lr: 3.7250e-05 gnorm: 1.09 [ 8:55:12<15:36:08] +[titan] 2025-10-05 07:29:32,531 - root - INFO - step: 14555 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0226 +[titan] 2025-10-05 07:29:32,531 - root - INFO - lr: 3.7242e-05 gnorm: 1.14 [ 8:55:22<15:35:57] +[titan] 2025-10-05 07:29:43,472 - root - INFO - step: 14560 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8975 +[titan] 2025-10-05 07:29:43,472 - root - INFO - lr: 3.7234e-05 gnorm: 1.02 [ 8:55:33<15:35:46] +[titan] 2025-10-05 07:29:54,345 - root - INFO - step: 14565 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 07:29:54,345 - root - INFO - lr: 3.7226e-05 gnorm: 1.13 [ 8:55:44<15:35:34] +[titan] 2025-10-05 07:30:05,208 - root - INFO - step: 14570 loss: 2.3031 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0344 +[titan] 2025-10-05 07:30:05,209 - root - INFO - lr: 3.7218e-05 gnorm: 1.12 [ 8:55:55<15:35:23] +[titan] 2025-10-05 07:30:16,066 - root - INFO - step: 14575 loss: 2.2367 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 07:30:16,066 - root - INFO - lr: 3.7210e-05 gnorm: 1.10 [ 8:56:06<15:35:12] +[titan] 2025-10-05 07:30:26,932 - root - INFO - step: 14580 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 07:30:26,933 - root - INFO - lr: 3.7202e-05 gnorm: 1.07 [ 8:56:17<15:35:00] +[titan] 2025-10-05 07:30:37,811 - root - INFO - step: 14585 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9566 +[titan] 2025-10-05 07:30:37,811 - root - INFO - lr: 3.7194e-05 gnorm: 1.08 [ 8:56:28<15:34:49] +[titan] 2025-10-05 07:30:48,772 - root - INFO - step: 14590 loss: 2.3418 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0695 +[titan] 2025-10-05 07:30:48,772 - root - INFO - lr: 3.7185e-05 gnorm: 1.20 [ 8:56:39<15:34:38] +[titan] 2025-10-05 07:30:59,630 - root - INFO - step: 14595 loss: 2.2116 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 07:30:59,630 - root - INFO - lr: 3.7177e-05 gnorm: 1.09 [ 8:56:50<15:34:26] +[titan] 2025-10-05 07:31:08,301 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:31:10,494 - root - INFO - step: 14600 loss: 2.1772 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 07:31:10,494 - root - INFO - lr: 3.7169e-05 gnorm: 1.08 [ 8:57:00<15:34:15] +[titan] 2025-10-05 07:31:21,365 - root - INFO - step: 14605 loss: 2.3083 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:31:21,365 - root - INFO - lr: 3.7161e-05 gnorm: 1.09 [ 8:57:11<15:34:04] +[titan] 2025-10-05 07:31:32,251 - root - INFO - step: 14610 loss: 2.3039 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 07:31:32,251 - root - INFO - lr: 3.7153e-05 gnorm: 1.15 [ 8:57:22<15:33:53] +[titan] 2025-10-05 07:31:43,124 - root - INFO - step: 14615 loss: 2.2982 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0301 +[titan] 2025-10-05 07:31:43,125 - root - INFO - lr: 3.7145e-05 gnorm: 1.12 [ 8:57:33<15:33:41] +[titan] 2025-10-05 07:31:54,094 - root - INFO - step: 14620 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9332 +[titan] 2025-10-05 07:31:54,094 - root - INFO - lr: 3.7137e-05 gnorm: 1.09 [ 8:57:44<15:33:30] +[titan] 2025-10-05 07:32:04,989 - root - INFO - step: 14625 loss: 2.2391 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:32:04,989 - root - INFO - lr: 3.7129e-05 gnorm: 1.10 [ 8:57:55<15:33:19] +[titan] 2025-10-05 07:32:15,888 - root - INFO - step: 14630 loss: 2.3113 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 07:32:15,888 - root - INFO - lr: 3.7121e-05 gnorm: 1.10 [ 8:58:06<15:33:08] +[titan] 2025-10-05 07:32:26,771 - root - INFO - step: 14635 loss: 2.2726 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:32:26,771 - root - INFO - lr: 3.7113e-05 gnorm: 1.12 [ 8:58:17<15:32:56] +[titan] 2025-10-05 07:32:37,649 - root - INFO - step: 14640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 07:32:37,649 - root - INFO - lr: 3.7105e-05 gnorm: 1.08 [ 8:58:28<15:32:45] +[titan] 2025-10-05 07:32:48,613 - root - INFO - step: 14645 loss: 2.1989 memory: 118.84GiB(85.28%) tps: 29,888 tflops: 414.65 mfu: 41.93% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9422 +[titan] 2025-10-05 07:32:48,613 - root - INFO - lr: 3.7097e-05 gnorm: 1.05 [ 8:58:39<15:32:34] +[titan] 2025-10-05 07:32:57,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:32:59,535 - root - INFO - step: 14650 loss: 2.3040 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0349 +[titan] 2025-10-05 07:32:59,535 - root - INFO - lr: 3.7089e-05 gnorm: 1.06 [ 8:58:49<15:32:23] +[titan] 2025-10-05 07:33:10,438 - root - INFO - step: 14655 loss: 2.2889 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0209 +[titan] 2025-10-05 07:33:10,439 - root - INFO - lr: 3.7081e-05 gnorm: 1.13 [ 8:59:00<15:32:11] +[titan] 2025-10-05 07:33:21,348 - root - INFO - step: 14660 loss: 2.2514 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:33:21,348 - root - INFO - lr: 3.7073e-05 gnorm: 1.12 [ 8:59:11<15:32:00] +[titan] 2025-10-05 07:33:32,227 - root - INFO - step: 14665 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 07:33:32,227 - root - INFO - lr: 3.7064e-05 gnorm: 1.12 [ 8:59:22<15:31:49] +[titan] 2025-10-05 07:33:43,130 - root - INFO - step: 14670 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:33:43,130 - root - INFO - lr: 3.7056e-05 gnorm: 1.13 [ 8:59:33<15:31:37] +[titan] 2025-10-05 07:33:54,091 - root - INFO - step: 14675 loss: 2.2801 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 07:33:54,091 - root - INFO - lr: 3.7048e-05 gnorm: 1.08 [ 8:59:44<15:31:26] +[titan] 2025-10-05 07:34:04,932 - root - INFO - step: 14680 loss: 2.1187 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 07:34:04,932 - root - INFO - lr: 3.7040e-05 gnorm: 1.06 [ 8:59:55<15:31:15] +[titan] 2025-10-05 07:34:15,806 - root - INFO - step: 14685 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9938 +[titan] 2025-10-05 07:34:15,806 - root - INFO - lr: 3.7032e-05 gnorm: 1.10 [ 9:00:06<15:31:04] +[titan] 2025-10-05 07:34:26,671 - root - INFO - step: 14690 loss: 2.2095 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9509 +[titan] 2025-10-05 07:34:26,671 - root - INFO - lr: 3.7024e-05 gnorm: 1.06 [ 9:00:17<15:30:52] +[titan] 2025-10-05 07:34:37,510 - root - INFO - step: 14695 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 07:34:37,510 - root - INFO - lr: 3.7016e-05 gnorm: 1.06 [ 9:00:27<15:30:41] +[titan] 2025-10-05 07:34:46,191 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:34:48,450 - root - INFO - step: 14700 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9405 +[titan] 2025-10-05 07:34:48,450 - root - INFO - lr: 3.7008e-05 gnorm: 1.10 [ 9:00:38<15:30:30] +[titan] 2025-10-05 07:34:59,300 - root - INFO - step: 14705 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0240 +[titan] 2025-10-05 07:34:59,300 - root - INFO - lr: 3.7000e-05 gnorm: 1.14 [ 9:00:49<15:30:18] +[titan] 2025-10-05 07:35:10,167 - root - INFO - step: 14710 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0017 +[titan] 2025-10-05 07:35:10,168 - root - INFO - lr: 3.6992e-05 gnorm: 1.09 [ 9:01:00<15:30:07] +[titan] 2025-10-05 07:35:21,048 - root - INFO - step: 14715 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0027 +[titan] 2025-10-05 07:35:21,048 - root - INFO - lr: 3.6984e-05 gnorm: 1.13 [ 9:01:11<15:29:56] +[titan] 2025-10-05 07:35:31,930 - root - INFO - step: 14720 loss: 2.2273 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9675 +[titan] 2025-10-05 07:35:31,930 - root - INFO - lr: 3.6976e-05 gnorm: 1.08 [ 9:01:22<15:29:44] +[titan] 2025-10-05 07:35:42,810 - root - INFO - step: 14725 loss: 2.3179 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 07:35:42,810 - root - INFO - lr: 3.6967e-05 gnorm: 1.12 [ 9:01:33<15:29:33] +[titan] 2025-10-05 07:35:53,724 - root - INFO - step: 14730 loss: 2.2620 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9989 +[titan] 2025-10-05 07:35:53,724 - root - INFO - lr: 3.6959e-05 gnorm: 1.11 [ 9:01:44<15:29:22] +[titan] 2025-10-05 07:36:04,629 - root - INFO - step: 14735 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 07:36:04,629 - root - INFO - lr: 3.6951e-05 gnorm: 1.06 [ 9:01:55<15:29:11] +[titan] 2025-10-05 07:36:15,522 - root - INFO - step: 14740 loss: 2.2768 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 07:36:15,522 - root - INFO - lr: 3.6943e-05 gnorm: 1.09 [ 9:02:05<15:28:59] +[titan] 2025-10-05 07:36:26,431 - root - INFO - step: 14745 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 07:36:26,431 - root - INFO - lr: 3.6935e-05 gnorm: 1.07 [ 9:02:16<15:28:48] +[titan] 2025-10-05 07:36:35,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:36:37,321 - root - INFO - step: 14750 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 07:36:37,321 - root - INFO - lr: 3.6927e-05 gnorm: 1.10 [ 9:02:27<15:28:37] +[titan] 2025-10-05 07:36:48,227 - root - INFO - step: 14755 loss: 2.2186 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:36:48,228 - root - INFO - lr: 3.6919e-05 gnorm: 1.04 [ 9:02:38<15:28:26] +[titan] 2025-10-05 07:36:59,096 - root - INFO - step: 14760 loss: 2.2696 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0036 +[titan] 2025-10-05 07:36:59,096 - root - INFO - lr: 3.6911e-05 gnorm: 1.08 [ 9:02:49<15:28:14] +[titan] 2025-10-05 07:37:09,945 - root - INFO - step: 14765 loss: 2.2510 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9893 +[titan] 2025-10-05 07:37:09,945 - root - INFO - lr: 3.6903e-05 gnorm: 1.13 [ 9:03:00<15:28:03] +[titan] 2025-10-05 07:37:20,822 - root - INFO - step: 14770 loss: 2.2169 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:37:20,822 - root - INFO - lr: 3.6894e-05 gnorm: 1.08 [ 9:03:11<15:27:52] +[titan] 2025-10-05 07:37:31,692 - root - INFO - step: 14775 loss: 2.2524 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 07:37:31,692 - root - INFO - lr: 3.6886e-05 gnorm: 1.10 [ 9:03:22<15:27:40] +[titan] 2025-10-05 07:37:42,588 - root - INFO - step: 14780 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 07:37:42,588 - root - INFO - lr: 3.6878e-05 gnorm: 1.12 [ 9:03:33<15:27:29] +[titan] 2025-10-05 07:37:53,516 - root - INFO - step: 14785 loss: 2.1691 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9163 +[titan] 2025-10-05 07:37:53,516 - root - INFO - lr: 3.6870e-05 gnorm: 1.06 [ 9:03:43<15:27:18] +[titan] 2025-10-05 07:38:04,385 - root - INFO - step: 14790 loss: 2.1764 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 07:38:04,386 - root - INFO - lr: 3.6862e-05 gnorm: 1.05 [ 9:03:54<15:27:07] +[titan] 2025-10-05 07:38:15,271 - root - INFO - step: 14795 loss: 2.2615 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9988 +[titan] 2025-10-05 07:38:15,271 - root - INFO - lr: 3.6854e-05 gnorm: 1.11 [ 9:04:05<15:26:55] +[titan] 2025-10-05 07:38:23,946 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:38:26,148 - root - INFO - step: 14800 loss: 2.2171 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 07:38:26,148 - root - INFO - lr: 3.6846e-05 gnorm: 1.11 [ 9:04:16<15:26:44] +[titan] 2025-10-05 07:38:37,018 - root - INFO - step: 14805 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0292 +[titan] 2025-10-05 07:38:37,018 - root - INFO - lr: 3.6838e-05 gnorm: 1.12 [ 9:04:27<15:26:33] +[titan] 2025-10-05 07:38:47,933 - root - INFO - step: 14810 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 07:38:47,933 - root - INFO - lr: 3.6830e-05 gnorm: 1.11 [ 9:04:38<15:26:21] +[titan] 2025-10-05 07:38:58,873 - root - INFO - step: 14815 loss: 2.2872 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:38:58,873 - root - INFO - lr: 3.6821e-05 gnorm: 1.08 [ 9:04:49<15:26:10] +[titan] 2025-10-05 07:39:09,749 - root - INFO - step: 14820 loss: 2.2863 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:39:09,749 - root - INFO - lr: 3.6813e-05 gnorm: 1.08 [ 9:05:00<15:25:59] +[titan] 2025-10-05 07:39:20,633 - root - INFO - step: 14825 loss: 2.3248 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 07:39:20,633 - root - INFO - lr: 3.6805e-05 gnorm: 1.06 [ 9:05:11<15:25:48] +[titan] 2025-10-05 07:39:31,524 - root - INFO - step: 14830 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:39:31,525 - root - INFO - lr: 3.6797e-05 gnorm: 1.05 [ 9:05:21<15:25:36] +[titan] 2025-10-05 07:39:42,407 - root - INFO - step: 14835 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0410 +[titan] 2025-10-05 07:39:42,408 - root - INFO - lr: 3.6789e-05 gnorm: 1.09 [ 9:05:32<15:25:25] +[titan] 2025-10-05 07:39:53,316 - root - INFO - step: 14840 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:39:53,316 - root - INFO - lr: 3.6781e-05 gnorm: 1.06 [ 9:05:43<15:25:14] +[titan] 2025-10-05 07:40:04,271 - root - INFO - step: 14845 loss: 2.2304 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.97 mfu: 41.96% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9723 +[titan] 2025-10-05 07:40:04,271 - root - INFO - lr: 3.6773e-05 gnorm: 1.12 [ 9:05:54<15:25:03] +[titan] 2025-10-05 07:40:10,992 - root - INFO - Dumping profiler traces at step 14848 +[titan] 2025-10-05 07:40:11,031 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:40:13,209 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:40:15,391 - root - INFO - step: 14850 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 29,468 tflops: 408.82 mfu: 41.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:40:15,392 - root - INFO - lr: 3.6765e-05 gnorm: 1.08 [ 9:06:05<15:24:52] +[titan] 2025-10-05 07:40:26,262 - root - INFO - step: 14855 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 07:40:26,262 - root - INFO - lr: 3.6756e-05 gnorm: 1.09 [ 9:06:16<15:24:40] +[titan] 2025-10-05 07:40:37,129 - root - INFO - step: 14860 loss: 2.2444 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 07:40:37,129 - root - INFO - lr: 3.6748e-05 gnorm: 1.08 [ 9:06:27<15:24:29] +[titan] 2025-10-05 07:40:47,995 - root - INFO - step: 14865 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0098 +[titan] 2025-10-05 07:40:47,995 - root - INFO - lr: 3.6740e-05 gnorm: 1.10 [ 9:06:38<15:24:18] +[titan] 2025-10-05 07:40:58,905 - root - INFO - step: 14870 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:40:58,905 - root - INFO - lr: 3.6732e-05 gnorm: 1.10 [ 9:06:49<15:24:07] +[titan] 2025-10-05 07:41:09,784 - root - INFO - step: 14875 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 07:41:09,784 - root - INFO - lr: 3.6724e-05 gnorm: 1.10 [ 9:07:00<15:23:55] +[titan] 2025-10-05 07:41:20,683 - root - INFO - step: 14880 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 07:41:20,683 - root - INFO - lr: 3.6716e-05 gnorm: 1.08 [ 9:07:11<15:23:44] +[titan] 2025-10-05 07:41:31,553 - root - INFO - step: 14885 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 07:41:31,553 - root - INFO - lr: 3.6708e-05 gnorm: 1.05 [ 9:07:21<15:23:33] +[titan] 2025-10-05 07:41:42,413 - root - INFO - step: 14890 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 07:41:42,413 - root - INFO - lr: 3.6699e-05 gnorm: 1.15 [ 9:07:32<15:23:21] +[titan] 2025-10-05 07:41:53,308 - root - INFO - step: 14895 loss: 2.2418 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:41:53,308 - root - INFO - lr: 3.6691e-05 gnorm: 1.07 [ 9:07:43<15:23:10] +[titan] 2025-10-05 07:42:01,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:42:04,160 - root - INFO - step: 14900 loss: 2.2908 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0236 +[titan] 2025-10-05 07:42:04,160 - root - INFO - lr: 3.6683e-05 gnorm: 1.13 [ 9:07:54<15:22:59] +[titan] 2025-10-05 07:42:15,031 - root - INFO - step: 14905 loss: 2.3078 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0380 +[titan] 2025-10-05 07:42:15,031 - root - INFO - lr: 3.6675e-05 gnorm: 1.12 [ 9:08:05<15:22:48] +[titan] 2025-10-05 07:42:25,895 - root - INFO - step: 14910 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9881 +[titan] 2025-10-05 07:42:25,895 - root - INFO - lr: 3.6667e-05 gnorm: 1.09 [ 9:08:16<15:22:36] +[titan] 2025-10-05 07:42:36,754 - root - INFO - step: 14915 loss: 2.2480 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 07:42:36,754 - root - INFO - lr: 3.6659e-05 gnorm: 1.09 [ 9:08:27<15:22:25] +[titan] 2025-10-05 07:42:47,621 - root - INFO - step: 14920 loss: 2.4317 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 07:42:47,621 - root - INFO - lr: 3.6651e-05 gnorm: 1.11 [ 9:08:38<15:22:14] +[titan] 2025-10-05 07:42:58,504 - root - INFO - step: 14925 loss: 2.2167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9576 +[titan] 2025-10-05 07:42:58,504 - root - INFO - lr: 3.6642e-05 gnorm: 1.09 [ 9:08:48<15:22:02] +[titan] 2025-10-05 07:43:09,387 - root - INFO - step: 14930 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 07:43:09,388 - root - INFO - lr: 3.6634e-05 gnorm: 1.08 [ 9:08:59<15:21:51] +[titan] 2025-10-05 07:43:20,273 - root - INFO - step: 14935 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:43:20,273 - root - INFO - lr: 3.6626e-05 gnorm: 1.11 [ 9:09:10<15:21:40] +[titan] 2025-10-05 07:43:31,152 - root - INFO - step: 14940 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:43:31,152 - root - INFO - lr: 3.6618e-05 gnorm: 1.09 [ 9:09:21<15:21:28] +[titan] 2025-10-05 07:43:42,038 - root - INFO - step: 14945 loss: 2.2476 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 07:43:42,038 - root - INFO - lr: 3.6610e-05 gnorm: 1.04 [ 9:09:32<15:21:17] +[titan] 2025-10-05 07:43:50,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:43:52,910 - root - INFO - step: 14950 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9350 +[titan] 2025-10-05 07:43:52,910 - root - INFO - lr: 3.6602e-05 gnorm: 1.07 [ 9:09:43<15:21:06] +[titan] 2025-10-05 07:44:03,804 - root - INFO - step: 14955 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 07:44:03,804 - root - INFO - lr: 3.6593e-05 gnorm: 1.06 [ 9:09:54<15:20:55] +[titan] 2025-10-05 07:44:14,704 - root - INFO - step: 14960 loss: 2.2966 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0290 +[titan] 2025-10-05 07:44:14,704 - root - INFO - lr: 3.6585e-05 gnorm: 1.08 [ 9:10:05<15:20:43] +[titan] 2025-10-05 07:44:25,575 - root - INFO - step: 14965 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 07:44:25,576 - root - INFO - lr: 3.6577e-05 gnorm: 1.07 [ 9:10:15<15:20:32] +[titan] 2025-10-05 07:44:36,428 - root - INFO - step: 14970 loss: 2.2508 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:44:36,428 - root - INFO - lr: 3.6569e-05 gnorm: 1.09 [ 9:10:26<15:20:21] +[titan] 2025-10-05 07:44:47,311 - root - INFO - step: 14975 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:44:47,311 - root - INFO - lr: 3.6561e-05 gnorm: 1.08 [ 9:10:37<15:20:09] +[titan] 2025-10-05 07:44:58,219 - root - INFO - step: 14980 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:44:58,219 - root - INFO - lr: 3.6553e-05 gnorm: 1.11 [ 9:10:48<15:19:58] +[titan] 2025-10-05 07:45:09,066 - root - INFO - step: 14985 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9745 +[titan] 2025-10-05 07:45:09,066 - root - INFO - lr: 3.6544e-05 gnorm: 1.07 [ 9:10:59<15:19:47] +[titan] 2025-10-05 07:45:19,933 - root - INFO - step: 14990 loss: 2.1985 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 07:45:19,934 - root - INFO - lr: 3.6536e-05 gnorm: 1.07 [ 9:11:10<15:19:36] +[titan] 2025-10-05 07:45:30,815 - root - INFO - step: 14995 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:45:30,816 - root - INFO - lr: 3.6528e-05 gnorm: 1.05 [ 9:11:21<15:19:24] +[titan] 2025-10-05 07:45:39,478 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:45:41,661 - root - INFO - step: 15000 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 07:45:41,661 - root - INFO - lr: 3.6520e-05 gnorm: 1.11 [ 9:11:32<15:19:13] +[titan] 2025-10-05 07:45:41,661 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 07:46:00,827 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 07:46:00,827 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.17 seconds. +[titan] 2025-10-05 07:48:03,855 - root - INFO - step: 15005 loss: 2.1283 memory: 118.84GiB(85.28%) tps: 2,304 tflops: 31.97 mfu: 3.23% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 07:48:03,856 - root - INFO - lr: 3.6512e-05 gnorm: 1.06 [ 9:13:54<15:22:40] +[titan] 2025-10-05 07:48:14,694 - root - INFO - step: 15010 loss: 2.2315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 07:48:14,695 - root - INFO - lr: 3.6504e-05 gnorm: 1.12 [ 9:14:05<15:22:29] +[titan] 2025-10-05 07:48:25,509 - root - INFO - step: 15015 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 07:48:25,510 - root - INFO - lr: 3.6495e-05 gnorm: 1.09 [ 9:14:15<15:22:17] +[titan] 2025-10-05 07:48:36,334 - root - INFO - step: 15020 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 07:48:36,334 - root - INFO - lr: 3.6487e-05 gnorm: 1.09 [ 9:14:26<15:22:06] +[titan] 2025-10-05 07:48:47,212 - root - INFO - step: 15025 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:48:47,212 - root - INFO - lr: 3.6479e-05 gnorm: 1.07 [ 9:14:37<15:21:55] +[titan] 2025-10-05 07:48:58,068 - root - INFO - step: 15030 loss: 2.2843 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:48:58,069 - root - INFO - lr: 3.6471e-05 gnorm: 1.12 [ 9:14:48<15:21:43] +[titan] 2025-10-05 07:49:08,916 - root - INFO - step: 15035 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 07:49:08,916 - root - INFO - lr: 3.6463e-05 gnorm: 1.06 [ 9:14:59<15:21:32] +[titan] 2025-10-05 07:49:19,759 - root - INFO - step: 15040 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9876 +[titan] 2025-10-05 07:49:19,759 - root - INFO - lr: 3.6454e-05 gnorm: 1.06 [ 9:15:10<15:21:20] +[titan] 2025-10-05 07:49:30,613 - root - INFO - step: 15045 loss: 2.2689 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 07:49:30,613 - root - INFO - lr: 3.6446e-05 gnorm: 1.09 [ 9:15:21<15:21:09] +[titan] 2025-10-05 07:49:39,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:49:41,450 - root - INFO - step: 15050 loss: 2.2266 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 07:49:41,450 - root - INFO - lr: 3.6438e-05 gnorm: 1.08 [ 9:15:31<15:20:57] +[titan] 2025-10-05 07:49:52,326 - root - INFO - step: 15055 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0056 +[titan] 2025-10-05 07:49:52,326 - root - INFO - lr: 3.6430e-05 gnorm: 1.10 [ 9:15:42<15:20:46] +[titan] 2025-10-05 07:50:03,204 - root - INFO - step: 15060 loss: 2.2848 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:50:03,204 - root - INFO - lr: 3.6422e-05 gnorm: 1.09 [ 9:15:53<15:20:35] +[titan] 2025-10-05 07:50:14,065 - root - INFO - step: 15065 loss: 2.2635 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 07:50:14,066 - root - INFO - lr: 3.6413e-05 gnorm: 1.09 [ 9:16:04<15:20:23] +[titan] 2025-10-05 07:50:24,948 - root - INFO - step: 15070 loss: 2.2568 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 07:50:24,948 - root - INFO - lr: 3.6405e-05 gnorm: 1.08 [ 9:16:15<15:20:12] +[titan] 2025-10-05 07:50:35,794 - root - INFO - step: 15075 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0211 +[titan] 2025-10-05 07:50:35,794 - root - INFO - lr: 3.6397e-05 gnorm: 1.09 [ 9:16:26<15:20:00] +[titan] 2025-10-05 07:50:46,648 - root - INFO - step: 15080 loss: 2.2769 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0110 +[titan] 2025-10-05 07:50:46,649 - root - INFO - lr: 3.6389e-05 gnorm: 1.07 [ 9:16:37<15:19:49] +[titan] 2025-10-05 07:50:57,498 - root - INFO - step: 15085 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 07:50:57,498 - root - INFO - lr: 3.6381e-05 gnorm: 1.07 [ 9:16:47<15:19:37] +[titan] 2025-10-05 07:51:08,384 - root - INFO - step: 15090 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:51:08,385 - root - INFO - lr: 3.6373e-05 gnorm: 1.08 [ 9:16:58<15:19:26] +[titan] 2025-10-05 07:51:19,234 - root - INFO - step: 15095 loss: 2.2363 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9753 +[titan] 2025-10-05 07:51:19,234 - root - INFO - lr: 3.6364e-05 gnorm: 1.07 [ 9:17:09<15:19:15] +[titan] 2025-10-05 07:51:27,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:51:30,098 - root - INFO - step: 15100 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:51:30,098 - root - INFO - lr: 3.6356e-05 gnorm: 1.15 [ 9:17:20<15:19:03] +[titan] 2025-10-05 07:51:40,977 - root - INFO - step: 15105 loss: 2.2586 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9945 +[titan] 2025-10-05 07:51:40,977 - root - INFO - lr: 3.6348e-05 gnorm: 1.11 [ 9:17:31<15:18:52] +[titan] 2025-10-05 07:51:51,845 - root - INFO - step: 15110 loss: 2.2404 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 07:51:51,845 - root - INFO - lr: 3.6340e-05 gnorm: 1.07 [ 9:17:42<15:18:40] +[titan] 2025-10-05 07:52:02,714 - root - INFO - step: 15115 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0275 +[titan] 2025-10-05 07:52:02,715 - root - INFO - lr: 3.6331e-05 gnorm: 1.14 [ 9:17:53<15:18:29] +[titan] 2025-10-05 07:52:13,605 - root - INFO - step: 15120 loss: 2.2957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 07:52:13,605 - root - INFO - lr: 3.6323e-05 gnorm: 1.15 [ 9:18:04<15:18:18] +[titan] 2025-10-05 07:52:24,497 - root - INFO - step: 15125 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:52:24,498 - root - INFO - lr: 3.6315e-05 gnorm: 1.03 [ 9:18:14<15:18:06] +[titan] 2025-10-05 07:52:35,368 - root - INFO - step: 15130 loss: 2.2438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:35,368 - root - INFO - lr: 3.6307e-05 gnorm: 1.11 [ 9:18:25<15:17:55] +[titan] 2025-10-05 07:52:46,243 - root - INFO - step: 15135 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 07:52:46,243 - root - INFO - lr: 3.6299e-05 gnorm: 1.09 [ 9:18:36<15:17:43] +[titan] 2025-10-05 07:52:57,118 - root - INFO - step: 15140 loss: 2.2420 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:57,118 - root - INFO - lr: 3.6290e-05 gnorm: 1.10 [ 9:18:47<15:17:32] +[titan] 2025-10-05 07:53:07,980 - root - INFO - step: 15145 loss: 2.3012 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 07:53:07,981 - root - INFO - lr: 3.6282e-05 gnorm: 1.07 [ 9:18:58<15:17:21] +[titan] 2025-10-05 07:53:16,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:53:18,858 - root - INFO - step: 15150 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9690 +[titan] 2025-10-05 07:53:18,858 - root - INFO - lr: 3.6274e-05 gnorm: 1.07 [ 9:19:09<15:17:09] +[titan] 2025-10-05 07:53:29,759 - root - INFO - step: 15155 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:53:29,759 - root - INFO - lr: 3.6266e-05 gnorm: 1.07 [ 9:19:20<15:16:58] +[titan] 2025-10-05 07:53:40,639 - root - INFO - step: 15160 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:53:40,639 - root - INFO - lr: 3.6258e-05 gnorm: 1.11 [ 9:19:31<15:16:46] +[titan] 2025-10-05 07:53:51,512 - root - INFO - step: 15165 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:53:51,512 - root - INFO - lr: 3.6249e-05 gnorm: 1.10 [ 9:19:41<15:16:35] +[titan] 2025-10-05 07:54:02,384 - root - INFO - step: 15170 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0038 +[titan] 2025-10-05 07:54:02,384 - root - INFO - lr: 3.6241e-05 gnorm: 1.11 [ 9:19:52<15:16:24] +[titan] 2025-10-05 07:54:13,267 - root - INFO - step: 15175 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:54:13,267 - root - INFO - lr: 3.6233e-05 gnorm: 1.08 [ 9:20:03<15:16:12] +[titan] 2025-10-05 07:54:24,133 - root - INFO - step: 15180 loss: 2.3028 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 07:54:24,133 - root - INFO - lr: 3.6225e-05 gnorm: 1.09 [ 9:20:14<15:16:01] +[titan] 2025-10-05 07:54:35,038 - root - INFO - step: 15185 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:54:35,038 - root - INFO - lr: 3.6216e-05 gnorm: 1.07 [ 9:20:25<15:15:49] +[titan] 2025-10-05 07:54:45,892 - root - INFO - step: 15190 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0272 +[titan] 2025-10-05 07:54:45,892 - root - INFO - lr: 3.6208e-05 gnorm: 1.08 [ 9:20:36<15:15:38] +[titan] 2025-10-05 07:54:56,749 - root - INFO - step: 15195 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9845 +[titan] 2025-10-05 07:54:56,749 - root - INFO - lr: 3.6200e-05 gnorm: 1.08 [ 9:20:47<15:15:27] +[titan] 2025-10-05 07:55:05,429 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:55:07,614 - root - INFO - step: 15200 loss: 2.2230 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 07:55:07,615 - root - INFO - lr: 3.6192e-05 gnorm: 1.08 [ 9:20:58<15:15:15] +[titan] 2025-10-05 07:55:18,475 - root - INFO - step: 15205 loss: 2.2720 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 07:55:18,475 - root - INFO - lr: 3.6184e-05 gnorm: 1.09 [ 9:21:08<15:15:04] +[titan] 2025-10-05 07:55:29,333 - root - INFO - step: 15210 loss: 2.2496 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9871 +[titan] 2025-10-05 07:55:29,333 - root - INFO - lr: 3.6175e-05 gnorm: 1.10 [ 9:21:19<15:14:52] +[titan] 2025-10-05 07:55:40,201 - root - INFO - step: 15215 loss: 2.2704 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 07:55:40,201 - root - INFO - lr: 3.6167e-05 gnorm: 1.10 [ 9:21:30<15:14:41] +[titan] 2025-10-05 07:55:51,102 - root - INFO - step: 15220 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9944 +[titan] 2025-10-05 07:55:51,103 - root - INFO - lr: 3.6159e-05 gnorm: 1.09 [ 9:21:41<15:14:30] +[titan] 2025-10-05 07:56:01,979 - root - INFO - step: 15225 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:56:01,979 - root - INFO - lr: 3.6151e-05 gnorm: 1.08 [ 9:21:52<15:14:18] +[titan] 2025-10-05 07:56:12,856 - root - INFO - step: 15230 loss: 2.3282 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0531 +[titan] 2025-10-05 07:56:12,856 - root - INFO - lr: 3.6142e-05 gnorm: 1.08 [ 9:22:03<15:14:07] +[titan] 2025-10-05 07:56:23,718 - root - INFO - step: 15235 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0077 +[titan] 2025-10-05 07:56:23,718 - root - INFO - lr: 3.6134e-05 gnorm: 1.04 [ 9:22:14<15:13:55] +[titan] 2025-10-05 07:56:34,605 - root - INFO - step: 15240 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9751 +[titan] 2025-10-05 07:56:34,605 - root - INFO - lr: 3.6126e-05 gnorm: 1.07 [ 9:22:25<15:13:44] +[titan] 2025-10-05 07:56:45,472 - root - INFO - step: 15245 loss: 2.3360 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 07:56:45,472 - root - INFO - lr: 3.6118e-05 gnorm: 1.05 [ 9:22:35<15:13:33] +[titan] 2025-10-05 07:56:54,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:56:56,368 - root - INFO - step: 15250 loss: 2.2490 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9870 +[titan] 2025-10-05 07:56:56,368 - root - INFO - lr: 3.6109e-05 gnorm: 1.08 [ 9:22:46<15:13:21] +[titan] 2025-10-05 07:57:07,241 - root - INFO - step: 15255 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9655 +[titan] 2025-10-05 07:57:07,241 - root - INFO - lr: 3.6101e-05 gnorm: 1.06 [ 9:22:57<15:13:10] +[titan] 2025-10-05 07:57:18,136 - root - INFO - step: 15260 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:57:18,136 - root - INFO - lr: 3.6093e-05 gnorm: 1.10 [ 9:23:08<15:12:59] +[titan] 2025-10-05 07:57:29,037 - root - INFO - step: 15265 loss: 2.2358 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9744 +[titan] 2025-10-05 07:57:29,037 - root - INFO - lr: 3.6085e-05 gnorm: 1.07 [ 9:23:19<15:12:47] +[titan] 2025-10-05 07:57:39,909 - root - INFO - step: 15270 loss: 2.3087 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 07:57:39,910 - root - INFO - lr: 3.6076e-05 gnorm: 1.08 [ 9:23:30<15:12:36] +[titan] 2025-10-05 07:57:50,767 - root - INFO - step: 15275 loss: 2.2564 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 07:57:50,768 - root - INFO - lr: 3.6068e-05 gnorm: 1.08 [ 9:23:41<15:12:24] +[titan] 2025-10-05 07:58:01,662 - root - INFO - step: 15280 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9973 +[titan] 2025-10-05 07:58:01,662 - root - INFO - lr: 3.6060e-05 gnorm: 1.20 [ 9:23:52<15:12:13] +[titan] 2025-10-05 07:58:12,584 - root - INFO - step: 15285 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:58:12,584 - root - INFO - lr: 3.6052e-05 gnorm: 1.10 [ 9:24:02<15:12:02] +[titan] 2025-10-05 07:58:23,438 - root - INFO - step: 15290 loss: 2.2206 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 07:58:23,438 - root - INFO - lr: 3.6043e-05 gnorm: 1.06 [ 9:24:13<15:11:50] +[titan] 2025-10-05 07:58:34,298 - root - INFO - step: 15295 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 07:58:34,299 - root - INFO - lr: 3.6035e-05 gnorm: 1.06 [ 9:24:24<15:11:39] +[titan] 2025-10-05 07:58:42,983 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:58:45,167 - root - INFO - step: 15300 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 07:58:45,167 - root - INFO - lr: 3.6027e-05 gnorm: 1.08 [ 9:24:35<15:11:27] +[titan] 2025-10-05 07:58:56,031 - root - INFO - step: 15305 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 07:58:56,032 - root - INFO - lr: 3.6019e-05 gnorm: 1.10 [ 9:24:46<15:11:16] +[titan] 2025-10-05 07:59:06,887 - root - INFO - step: 15310 loss: 2.2775 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 07:59:06,887 - root - INFO - lr: 3.6010e-05 gnorm: 1.09 [ 9:24:57<15:11:05] +[titan] 2025-10-05 07:59:17,809 - root - INFO - step: 15315 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9766 +[titan] 2025-10-05 07:59:17,809 - root - INFO - lr: 3.6002e-05 gnorm: 1.10 [ 9:25:08<15:10:53] +[titan] 2025-10-05 07:59:28,686 - root - INFO - step: 15320 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 07:59:28,686 - root - INFO - lr: 3.5994e-05 gnorm: 1.07 [ 9:25:19<15:10:42] +[titan] 2025-10-05 07:59:39,537 - root - INFO - step: 15325 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 07:59:39,537 - root - INFO - lr: 3.5986e-05 gnorm: 1.11 [ 9:25:29<15:10:31] +[titan] 2025-10-05 07:59:50,400 - root - INFO - step: 15330 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:59:50,400 - root - INFO - lr: 3.5977e-05 gnorm: 1.11 [ 9:25:40<15:10:19] +[titan] 2025-10-05 08:00:01,278 - root - INFO - step: 15335 loss: 2.2792 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0137 +[titan] 2025-10-05 08:00:01,278 - root - INFO - lr: 3.5969e-05 gnorm: 1.15 [ 9:25:51<15:10:08] +[titan] 2025-10-05 08:00:12,178 - root - INFO - step: 15340 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 08:00:12,178 - root - INFO - lr: 3.5961e-05 gnorm: 1.06 [ 9:26:02<15:09:56] +[titan] 2025-10-05 08:00:23,116 - root - INFO - step: 15345 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:00:23,116 - root - INFO - lr: 3.5952e-05 gnorm: 1.09 [ 9:26:13<15:09:45] +[titan] 2025-10-05 08:00:31,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:00:33,989 - root - INFO - step: 15350 loss: 2.2871 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 08:00:33,989 - root - INFO - lr: 3.5944e-05 gnorm: 1.10 [ 9:26:24<15:09:34] +[titan] 2025-10-05 08:00:44,860 - root - INFO - step: 15355 loss: 2.1883 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 08:00:44,860 - root - INFO - lr: 3.5936e-05 gnorm: 1.10 [ 9:26:35<15:09:22] +[titan] 2025-10-05 08:00:55,829 - root - INFO - step: 15360 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9417 +[titan] 2025-10-05 08:00:55,830 - root - INFO - lr: 3.5928e-05 gnorm: 1.04 [ 9:26:46<15:09:11] +[titan] 2025-10-05 08:00:56,007 - root - INFO - Dumping profiler traces at step 15360 +[titan] 2025-10-05 08:00:56,047 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:01:06,999 - root - INFO - step: 15365 loss: 2.2781 memory: 118.84GiB(85.28%) tps: 29,338 tflops: 407.01 mfu: 41.15% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 08:01:06,999 - root - INFO - lr: 3.5919e-05 gnorm: 1.07 [ 9:26:57<15:09:00] +[titan] 2025-10-05 08:01:17,972 - root - INFO - step: 15370 loss: 2.2166 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 08:01:17,972 - root - INFO - lr: 3.5911e-05 gnorm: 1.06 [ 9:27:08<15:08:49] +[titan] 2025-10-05 08:01:28,828 - root - INFO - step: 15375 loss: 2.2257 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 08:01:28,829 - root - INFO - lr: 3.5903e-05 gnorm: 1.04 [ 9:27:19<15:08:38] +[titan] 2025-10-05 08:01:39,731 - root - INFO - step: 15380 loss: 2.2608 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9985 +[titan] 2025-10-05 08:01:39,732 - root - INFO - lr: 3.5895e-05 gnorm: 1.14 [ 9:27:30<15:08:26] +[titan] 2025-10-05 08:01:50,600 - root - INFO - step: 15385 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9684 +[titan] 2025-10-05 08:01:50,601 - root - INFO - lr: 3.5886e-05 gnorm: 1.10 [ 9:27:40<15:08:15] +[titan] 2025-10-05 08:02:01,449 - root - INFO - step: 15390 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 08:02:01,449 - root - INFO - lr: 3.5878e-05 gnorm: 1.10 [ 9:27:51<15:08:03] +[titan] 2025-10-05 08:02:12,375 - root - INFO - step: 15395 loss: 2.2776 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 08:02:12,375 - root - INFO - lr: 3.5870e-05 gnorm: 1.10 [ 9:28:02<15:07:52] +[titan] 2025-10-05 08:02:21,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:02:23,262 - root - INFO - step: 15400 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9035 +[titan] 2025-10-05 08:02:23,262 - root - INFO - lr: 3.5861e-05 gnorm: 1.05 [ 9:28:13<15:07:41] +[titan] 2025-10-05 08:02:34,123 - root - INFO - step: 15405 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 08:02:34,123 - root - INFO - lr: 3.5853e-05 gnorm: 1.05 [ 9:28:24<15:07:29] +[titan] 2025-10-05 08:02:45,027 - root - INFO - step: 15410 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:02:45,027 - root - INFO - lr: 3.5845e-05 gnorm: 1.09 [ 9:28:35<15:07:18] +[titan] 2025-10-05 08:02:55,897 - root - INFO - step: 15415 loss: 2.2676 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:02:55,897 - root - INFO - lr: 3.5837e-05 gnorm: 1.09 [ 9:28:46<15:07:07] +[titan] 2025-10-05 08:03:06,761 - root - INFO - step: 15420 loss: 2.2135 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9565 +[titan] 2025-10-05 08:03:06,761 - root - INFO - lr: 3.5828e-05 gnorm: 1.09 [ 9:28:57<15:06:55] +[titan] 2025-10-05 08:03:17,688 - root - INFO - step: 15425 loss: 2.2445 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9820 +[titan] 2025-10-05 08:03:17,688 - root - INFO - lr: 3.5820e-05 gnorm: 1.04 [ 9:29:08<15:06:44] +[titan] 2025-10-05 08:03:28,565 - root - INFO - step: 15430 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 08:03:28,565 - root - INFO - lr: 3.5812e-05 gnorm: 1.11 [ 9:29:18<15:06:33] +[titan] 2025-10-05 08:03:39,425 - root - INFO - step: 15435 loss: 2.2327 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:03:39,425 - root - INFO - lr: 3.5803e-05 gnorm: 1.06 [ 9:29:29<15:06:21] +[titan] 2025-10-05 08:03:50,323 - root - INFO - step: 15440 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 08:03:50,323 - root - INFO - lr: 3.5795e-05 gnorm: 1.04 [ 9:29:40<15:06:10] +[titan] 2025-10-05 08:04:01,195 - root - INFO - step: 15445 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 08:04:01,195 - root - INFO - lr: 3.5787e-05 gnorm: 1.04 [ 9:29:51<15:05:58] +[titan] 2025-10-05 08:04:09,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:04:12,045 - root - INFO - step: 15450 loss: 2.2815 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0162 +[titan] 2025-10-05 08:04:12,045 - root - INFO - lr: 3.5778e-05 gnorm: 1.12 [ 9:30:02<15:05:47] +[titan] 2025-10-05 08:04:22,960 - root - INFO - step: 15455 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 08:04:22,960 - root - INFO - lr: 3.5770e-05 gnorm: 1.11 [ 9:30:13<15:05:36] +[titan] 2025-10-05 08:04:33,818 - root - INFO - step: 15460 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9338 +[titan] 2025-10-05 08:04:33,818 - root - INFO - lr: 3.5762e-05 gnorm: 1.08 [ 9:30:24<15:05:24] +[titan] 2025-10-05 08:04:44,689 - root - INFO - step: 15465 loss: 2.1902 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:04:44,689 - root - INFO - lr: 3.5754e-05 gnorm: 1.09 [ 9:30:35<15:05:13] +[titan] 2025-10-05 08:04:55,581 - root - INFO - step: 15470 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:04:55,581 - root - INFO - lr: 3.5745e-05 gnorm: 1.07 [ 9:30:45<15:05:02] +[titan] 2025-10-05 08:05:06,501 - root - INFO - step: 15475 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0023 +[titan] 2025-10-05 08:05:06,501 - root - INFO - lr: 3.5737e-05 gnorm: 1.12 [ 9:30:56<15:04:50] +[titan] 2025-10-05 08:05:17,432 - root - INFO - step: 15480 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 08:05:17,432 - root - INFO - lr: 3.5729e-05 gnorm: 1.10 [ 9:31:07<15:04:39] +[titan] 2025-10-05 08:05:28,358 - root - INFO - step: 15485 loss: 2.2121 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:05:28,358 - root - INFO - lr: 3.5720e-05 gnorm: 1.05 [ 9:31:18<15:04:28] +[titan] 2025-10-05 08:05:39,256 - root - INFO - step: 15490 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 08:05:39,257 - root - INFO - lr: 3.5712e-05 gnorm: 1.11 [ 9:31:29<15:04:16] +[titan] 2025-10-05 08:05:50,140 - root - INFO - step: 15495 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 08:05:50,140 - root - INFO - lr: 3.5704e-05 gnorm: 1.04 [ 9:31:40<15:04:05] +[titan] 2025-10-05 08:05:58,823 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:06:01,006 - root - INFO - step: 15500 loss: 2.1526 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 08:06:01,006 - root - INFO - lr: 3.5695e-05 gnorm: 1.06 [ 9:31:51<15:03:54] +[titan] 2025-10-05 08:06:11,916 - root - INFO - step: 15505 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 08:06:11,916 - root - INFO - lr: 3.5687e-05 gnorm: 1.09 [ 9:32:02<15:03:42] +[titan] 2025-10-05 08:06:22,849 - root - INFO - step: 15510 loss: 2.2409 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 08:06:22,849 - root - INFO - lr: 3.5679e-05 gnorm: 1.06 [ 9:32:13<15:03:31] +[titan] 2025-10-05 08:06:33,698 - root - INFO - step: 15515 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 08:06:33,698 - root - INFO - lr: 3.5670e-05 gnorm: 1.08 [ 9:32:24<15:03:20] +[titan] 2025-10-05 08:06:44,607 - root - INFO - step: 15520 loss: 2.3868 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 08:06:44,607 - root - INFO - lr: 3.5662e-05 gnorm: 2.99 [ 9:32:34<15:03:08] +[titan] 2025-10-05 08:06:55,464 - root - INFO - step: 15525 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9210 +[titan] 2025-10-05 08:06:55,464 - root - INFO - lr: 3.5654e-05 gnorm: 1.10 [ 9:32:45<15:02:57] +[titan] 2025-10-05 08:07:06,333 - root - INFO - step: 15530 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:07:06,333 - root - INFO - lr: 3.5646e-05 gnorm: 1.03 [ 9:32:56<15:02:45] +[titan] 2025-10-05 08:07:17,232 - root - INFO - step: 15535 loss: 2.2054 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 08:07:17,232 - root - INFO - lr: 3.5637e-05 gnorm: 1.08 [ 9:33:07<15:02:34] +[titan] 2025-10-05 08:07:28,161 - root - INFO - step: 15540 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:07:28,161 - root - INFO - lr: 3.5629e-05 gnorm: 1.10 [ 9:33:18<15:02:23] +[titan] 2025-10-05 08:07:39,016 - root - INFO - step: 15545 loss: 2.2280 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 08:07:39,016 - root - INFO - lr: 3.5621e-05 gnorm: 1.07 [ 9:33:29<15:02:11] +[titan] 2025-10-05 08:07:47,694 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:07:49,876 - root - INFO - step: 15550 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 08:07:49,876 - root - INFO - lr: 3.5612e-05 gnorm: 1.07 [ 9:33:40<15:02:00] +[titan] 2025-10-05 08:08:00,741 - root - INFO - step: 15555 loss: 2.2855 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 08:08:00,741 - root - INFO - lr: 3.5604e-05 gnorm: 1.08 [ 9:33:51<15:01:49] +[titan] 2025-10-05 08:08:11,595 - root - INFO - step: 15560 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 08:08:11,595 - root - INFO - lr: 3.5596e-05 gnorm: 1.08 [ 9:34:01<15:01:37] +[titan] 2025-10-05 08:08:22,482 - root - INFO - step: 15565 loss: 2.2410 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:08:22,482 - root - INFO - lr: 3.5587e-05 gnorm: 1.08 [ 9:34:12<15:01:26] +[titan] 2025-10-05 08:08:33,383 - root - INFO - step: 15570 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 08:08:33,383 - root - INFO - lr: 3.5579e-05 gnorm: 1.11 [ 9:34:23<15:01:15] +[titan] 2025-10-05 08:08:44,242 - root - INFO - step: 15575 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 08:08:44,242 - root - INFO - lr: 3.5571e-05 gnorm: 1.06 [ 9:34:34<15:01:03] +[titan] 2025-10-05 08:08:55,120 - root - INFO - step: 15580 loss: 2.2133 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9541 +[titan] 2025-10-05 08:08:55,120 - root - INFO - lr: 3.5562e-05 gnorm: 1.05 [ 9:34:45<15:00:52] +[titan] 2025-10-05 08:09:06,000 - root - INFO - step: 15585 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9699 +[titan] 2025-10-05 08:09:06,000 - root - INFO - lr: 3.5554e-05 gnorm: 1.10 [ 9:34:56<15:00:40] +[titan] 2025-10-05 08:09:16,881 - root - INFO - step: 15590 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 08:09:16,881 - root - INFO - lr: 3.5546e-05 gnorm: 1.07 [ 9:35:07<15:00:29] +[titan] 2025-10-05 08:09:27,784 - root - INFO - step: 15595 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 08:09:27,784 - root - INFO - lr: 3.5537e-05 gnorm: 1.12 [ 9:35:18<15:00:18] +[titan] 2025-10-05 08:09:36,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:09:38,690 - root - INFO - step: 15600 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 08:09:38,690 - root - INFO - lr: 3.5529e-05 gnorm: 1.09 [ 9:35:29<15:00:06] +[titan] 2025-10-05 08:09:49,593 - root - INFO - step: 15605 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 08:09:49,594 - root - INFO - lr: 3.5521e-05 gnorm: 1.08 [ 9:35:39<14:59:55] +[titan] 2025-10-05 08:10:00,479 - root - INFO - step: 15610 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 08:10:00,480 - root - INFO - lr: 3.5512e-05 gnorm: 1.08 [ 9:35:50<14:59:44] +[titan] 2025-10-05 08:10:11,374 - root - INFO - step: 15615 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9198 +[titan] 2025-10-05 08:10:11,374 - root - INFO - lr: 3.5504e-05 gnorm: 1.05 [ 9:36:01<14:59:32] +[titan] 2025-10-05 08:10:22,287 - root - INFO - step: 15620 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9771 +[titan] 2025-10-05 08:10:22,287 - root - INFO - lr: 3.5496e-05 gnorm: 1.08 [ 9:36:12<14:59:21] +[titan] 2025-10-05 08:10:33,166 - root - INFO - step: 15625 loss: 2.2767 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 08:10:33,166 - root - INFO - lr: 3.5487e-05 gnorm: 1.12 [ 9:36:23<14:59:10] +[titan] 2025-10-05 08:10:44,056 - root - INFO - step: 15630 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 08:10:44,056 - root - INFO - lr: 3.5479e-05 gnorm: 1.07 [ 9:36:34<14:58:58] +[titan] 2025-10-05 08:10:54,998 - root - INFO - step: 15635 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 08:10:54,998 - root - INFO - lr: 3.5471e-05 gnorm: 1.07 [ 9:36:45<14:58:47] +[titan] 2025-10-05 08:11:05,867 - root - INFO - step: 15640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:11:05,867 - root - INFO - lr: 3.5462e-05 gnorm: 1.08 [ 9:36:56<14:58:36] +[titan] 2025-10-05 08:11:16,727 - root - INFO - step: 15645 loss: 2.1832 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:11:16,727 - root - INFO - lr: 3.5454e-05 gnorm: 1.08 [ 9:37:07<14:58:24] +[titan] 2025-10-05 08:11:25,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:11:27,605 - root - INFO - step: 15650 loss: 2.1882 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:11:27,605 - root - INFO - lr: 3.5445e-05 gnorm: 1.03 [ 9:37:17<14:58:13] +[titan] 2025-10-05 08:11:38,458 - root - INFO - step: 15655 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:11:38,458 - root - INFO - lr: 3.5437e-05 gnorm: 1.05 [ 9:37:28<14:58:02] +[titan] 2025-10-05 08:11:49,330 - root - INFO - step: 15660 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 08:11:49,330 - root - INFO - lr: 3.5429e-05 gnorm: 1.11 [ 9:37:39<14:57:50] +[titan] 2025-10-05 08:12:00,235 - root - INFO - step: 15665 loss: 2.1792 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9268 +[titan] 2025-10-05 08:12:00,235 - root - INFO - lr: 3.5420e-05 gnorm: 1.07 [ 9:37:50<14:57:39] +[titan] 2025-10-05 08:12:11,114 - root - INFO - step: 15670 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9779 +[titan] 2025-10-05 08:12:11,114 - root - INFO - lr: 3.5412e-05 gnorm: 1.03 [ 9:38:01<14:57:28] +[titan] 2025-10-05 08:12:21,997 - root - INFO - step: 15675 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 08:12:21,997 - root - INFO - lr: 3.5404e-05 gnorm: 1.07 [ 9:38:12<14:57:16] +[titan] 2025-10-05 08:12:32,868 - root - INFO - step: 15680 loss: 2.2075 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:12:32,868 - root - INFO - lr: 3.5395e-05 gnorm: 1.07 [ 9:38:23<14:57:05] +[titan] 2025-10-05 08:12:43,747 - root - INFO - step: 15685 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 08:12:43,747 - root - INFO - lr: 3.5387e-05 gnorm: 1.10 [ 9:38:34<14:56:54] +[titan] 2025-10-05 08:12:54,610 - root - INFO - step: 15690 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0090 +[titan] 2025-10-05 08:12:54,610 - root - INFO - lr: 3.5379e-05 gnorm: 1.08 [ 9:38:44<14:56:42] +[titan] 2025-10-05 08:13:05,466 - root - INFO - step: 15695 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 08:13:05,466 - root - INFO - lr: 3.5370e-05 gnorm: 1.06 [ 9:38:55<14:56:31] +[titan] 2025-10-05 08:13:14,194 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:13:16,388 - root - INFO - step: 15700 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0069 +[titan] 2025-10-05 08:13:16,388 - root - INFO - lr: 3.5362e-05 gnorm: 1.10 [ 9:39:06<14:56:19] +[titan] 2025-10-05 08:13:27,274 - root - INFO - step: 15705 loss: 2.2396 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9780 +[titan] 2025-10-05 08:13:27,275 - root - INFO - lr: 3.5354e-05 gnorm: 1.10 [ 9:39:17<14:56:08] +[titan] 2025-10-05 08:13:38,136 - root - INFO - step: 15710 loss: 2.2474 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 08:13:38,136 - root - INFO - lr: 3.5345e-05 gnorm: 1.12 [ 9:39:28<14:55:57] +[titan] 2025-10-05 08:13:49,010 - root - INFO - step: 15715 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9203 +[titan] 2025-10-05 08:13:49,010 - root - INFO - lr: 3.5337e-05 gnorm: 1.10 [ 9:39:39<14:55:45] +[titan] 2025-10-05 08:13:59,875 - root - INFO - step: 15720 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9736 +[titan] 2025-10-05 08:13:59,875 - root - INFO - lr: 3.5328e-05 gnorm: 1.11 [ 9:39:50<14:55:34] +[titan] 2025-10-05 08:14:10,743 - root - INFO - step: 15725 loss: 2.2138 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9568 +[titan] 2025-10-05 08:14:10,743 - root - INFO - lr: 3.5320e-05 gnorm: 1.10 [ 9:40:01<14:55:23] +[titan] 2025-10-05 08:14:21,639 - root - INFO - step: 15730 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 08:14:21,640 - root - INFO - lr: 3.5312e-05 gnorm: 1.09 [ 9:40:11<14:55:11] +[titan] 2025-10-05 08:14:32,539 - root - INFO - step: 15735 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 08:14:32,539 - root - INFO - lr: 3.5303e-05 gnorm: 1.05 [ 9:40:22<14:55:00] +[titan] 2025-10-05 08:14:43,413 - root - INFO - step: 15740 loss: 2.2798 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:14:43,414 - root - INFO - lr: 3.5295e-05 gnorm: 1.11 [ 9:40:33<14:54:49] +[titan] 2025-10-05 08:14:54,293 - root - INFO - step: 15745 loss: 2.2448 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9833 +[titan] 2025-10-05 08:14:54,293 - root - INFO - lr: 3.5287e-05 gnorm: 1.15 [ 9:40:44<14:54:37] +[titan] 2025-10-05 08:15:02,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:15:05,161 - root - INFO - step: 15750 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9407 +[titan] 2025-10-05 08:15:05,161 - root - INFO - lr: 3.5278e-05 gnorm: 1.07 [ 9:40:55<14:54:26] +[titan] 2025-10-05 08:15:16,026 - root - INFO - step: 15755 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9732 +[titan] 2025-10-05 08:15:16,026 - root - INFO - lr: 3.5270e-05 gnorm: 1.08 [ 9:41:06<14:54:15] +[titan] 2025-10-05 08:15:26,950 - root - INFO - step: 15760 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9790 +[titan] 2025-10-05 08:15:26,951 - root - INFO - lr: 3.5261e-05 gnorm: 1.13 [ 9:41:17<14:54:03] +[titan] 2025-10-05 08:15:37,835 - root - INFO - step: 15765 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0588 +[titan] 2025-10-05 08:15:37,835 - root - INFO - lr: 3.5253e-05 gnorm: 1.07 [ 9:41:28<14:53:52] +[titan] 2025-10-05 08:15:48,693 - root - INFO - step: 15770 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:15:48,694 - root - INFO - lr: 3.5245e-05 gnorm: 1.07 [ 9:41:39<14:53:41] +[titan] 2025-10-05 08:15:59,558 - root - INFO - step: 15775 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 08:15:59,558 - root - INFO - lr: 3.5236e-05 gnorm: 1.09 [ 9:41:49<14:53:29] +[titan] 2025-10-05 08:16:10,424 - root - INFO - step: 15780 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:16:10,425 - root - INFO - lr: 3.5228e-05 gnorm: 1.07 [ 9:42:00<14:53:18] +[titan] 2025-10-05 08:16:21,284 - root - INFO - step: 15785 loss: 2.2235 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9639 +[titan] 2025-10-05 08:16:21,284 - root - INFO - lr: 3.5220e-05 gnorm: 1.08 [ 9:42:11<14:53:06] +[titan] 2025-10-05 08:16:32,182 - root - INFO - step: 15790 loss: 2.2629 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 08:16:32,182 - root - INFO - lr: 3.5211e-05 gnorm: 1.05 [ 9:42:22<14:52:55] +[titan] 2025-10-05 08:16:43,101 - root - INFO - step: 15795 loss: 2.1715 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9180 +[titan] 2025-10-05 08:16:43,101 - root - INFO - lr: 3.5203e-05 gnorm: 1.08 [ 9:42:33<14:52:44] +[titan] 2025-10-05 08:16:51,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:16:53,985 - root - INFO - step: 15800 loss: 2.2694 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 08:16:53,985 - root - INFO - lr: 3.5194e-05 gnorm: 1.09 [ 9:42:44<14:52:32] +[titan] 2025-10-05 08:17:04,888 - root - INFO - step: 15805 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 08:17:04,888 - root - INFO - lr: 3.5186e-05 gnorm: 1.07 [ 9:42:55<14:52:21] +[titan] 2025-10-05 08:17:15,782 - root - INFO - step: 15810 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9612 +[titan] 2025-10-05 08:17:15,782 - root - INFO - lr: 3.5178e-05 gnorm: 1.11 [ 9:43:06<14:52:10] +[titan] 2025-10-05 08:17:26,682 - root - INFO - step: 15815 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 08:17:26,682 - root - INFO - lr: 3.5169e-05 gnorm: 1.09 [ 9:43:17<14:51:59] +[titan] 2025-10-05 08:17:37,542 - root - INFO - step: 15820 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9730 +[titan] 2025-10-05 08:17:37,542 - root - INFO - lr: 3.5161e-05 gnorm: 1.11 [ 9:43:27<14:51:47] +[titan] 2025-10-05 08:17:48,471 - root - INFO - step: 15825 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 08:17:48,471 - root - INFO - lr: 3.5152e-05 gnorm: 1.07 [ 9:43:38<14:51:36] +[titan] 2025-10-05 08:17:59,372 - root - INFO - step: 15830 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0266 +[titan] 2025-10-05 08:17:59,373 - root - INFO - lr: 3.5144e-05 gnorm: 1.07 [ 9:43:49<14:51:25] +[titan] 2025-10-05 08:18:10,255 - root - INFO - step: 15835 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9594 +[titan] 2025-10-05 08:18:10,256 - root - INFO - lr: 3.5136e-05 gnorm: 1.11 [ 9:44:00<14:51:13] +[titan] 2025-10-05 08:18:21,144 - root - INFO - step: 15840 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 08:18:21,144 - root - INFO - lr: 3.5127e-05 gnorm: 1.07 [ 9:44:11<14:51:02] +[titan] 2025-10-05 08:18:32,042 - root - INFO - step: 15845 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 08:18:32,043 - root - INFO - lr: 3.5119e-05 gnorm: 1.07 [ 9:44:22<14:50:51] +[titan] 2025-10-05 08:18:40,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:18:42,921 - root - INFO - step: 15850 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9352 +[titan] 2025-10-05 08:18:42,921 - root - INFO - lr: 3.5111e-05 gnorm: 1.08 [ 9:44:33<14:50:39] +[titan] 2025-10-05 08:18:53,795 - root - INFO - step: 15855 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 08:18:53,795 - root - INFO - lr: 3.5102e-05 gnorm: 1.15 [ 9:44:44<14:50:28] +[titan] 2025-10-05 08:19:04,726 - root - INFO - step: 15860 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0324 +[titan] 2025-10-05 08:19:04,726 - root - INFO - lr: 3.5094e-05 gnorm: 1.13 [ 9:44:55<14:50:17] +[titan] 2025-10-05 08:19:15,610 - root - INFO - step: 15865 loss: 2.2234 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 08:19:15,610 - root - INFO - lr: 3.5085e-05 gnorm: 1.07 [ 9:45:05<14:50:05] +[titan] 2025-10-05 08:19:26,577 - root - INFO - step: 15870 loss: 2.2122 memory: 118.84GiB(85.28%) tps: 29,880 tflops: 414.54 mfu: 41.91% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:19:26,577 - root - INFO - lr: 3.5077e-05 gnorm: 1.09 [ 9:45:16<14:49:54] +[titan] 2025-10-05 08:19:31,104 - root - INFO - Dumping profiler traces at step 15872 +[titan] 2025-10-05 08:19:31,143 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:19:37,696 - root - INFO - step: 15875 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 08:19:37,697 - root - INFO - lr: 3.5068e-05 gnorm: 1.05 [ 9:45:28<14:49:43] +[titan] 2025-10-05 08:19:48,571 - root - INFO - step: 15880 loss: 2.2001 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9437 +[titan] 2025-10-05 08:19:48,571 - root - INFO - lr: 3.5060e-05 gnorm: 1.08 [ 9:45:38<14:49:32] +[titan] 2025-10-05 08:19:59,444 - root - INFO - step: 15885 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9707 +[titan] 2025-10-05 08:19:59,444 - root - INFO - lr: 3.5052e-05 gnorm: 1.06 [ 9:45:49<14:49:20] +[titan] 2025-10-05 08:20:10,353 - root - INFO - step: 15890 loss: 2.2269 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 08:20:10,354 - root - INFO - lr: 3.5043e-05 gnorm: 1.09 [ 9:46:00<14:49:09] +[titan] 2025-10-05 08:20:21,229 - root - INFO - step: 15895 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9759 +[titan] 2025-10-05 08:20:21,230 - root - INFO - lr: 3.5035e-05 gnorm: 1.07 [ 9:46:11<14:48:58] +[titan] 2025-10-05 08:20:29,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:20:32,140 - root - INFO - step: 15900 loss: 2.1957 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 08:20:32,140 - root - INFO - lr: 3.5026e-05 gnorm: 1.09 [ 9:46:22<14:48:46] +[titan] 2025-10-05 08:20:43,027 - root - INFO - step: 15905 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:20:43,027 - root - INFO - lr: 3.5018e-05 gnorm: 1.06 [ 9:46:33<14:48:35] +[titan] 2025-10-05 08:20:53,932 - root - INFO - step: 15910 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 08:20:53,932 - root - INFO - lr: 3.5010e-05 gnorm: 1.11 [ 9:46:44<14:48:24] +[titan] 2025-10-05 08:21:04,803 - root - INFO - step: 15915 loss: 2.1550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9045 +[titan] 2025-10-05 08:21:04,803 - root - INFO - lr: 3.5001e-05 gnorm: 1.05 [ 9:46:55<14:48:12] +[titan] 2025-10-05 08:21:15,707 - root - INFO - step: 15920 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 08:21:15,707 - root - INFO - lr: 3.4993e-05 gnorm: 1.08 [ 9:47:06<14:48:01] +[titan] 2025-10-05 08:21:26,598 - root - INFO - step: 15925 loss: 2.2282 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 08:21:26,599 - root - INFO - lr: 3.4984e-05 gnorm: 1.10 [ 9:47:16<14:47:50] +[titan] 2025-10-05 08:21:37,540 - root - INFO - step: 15930 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0083 +[titan] 2025-10-05 08:21:37,540 - root - INFO - lr: 3.4976e-05 gnorm: 1.11 [ 9:47:27<14:47:39] +[titan] 2025-10-05 08:21:48,426 - root - INFO - step: 15935 loss: 2.2034 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9455 +[titan] 2025-10-05 08:21:48,426 - root - INFO - lr: 3.4968e-05 gnorm: 1.07 [ 9:47:38<14:47:27] +[titan] 2025-10-05 08:21:59,298 - root - INFO - step: 15940 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9502 +[titan] 2025-10-05 08:21:59,298 - root - INFO - lr: 3.4959e-05 gnorm: 1.07 [ 9:47:49<14:47:16] +[titan] 2025-10-05 08:22:10,199 - root - INFO - step: 15945 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9384 +[titan] 2025-10-05 08:22:10,199 - root - INFO - lr: 3.4951e-05 gnorm: 1.09 [ 9:48:00<14:47:05] +[titan] 2025-10-05 08:22:18,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:22:21,106 - root - INFO - step: 15950 loss: 2.2603 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9966 +[titan] 2025-10-05 08:22:21,107 - root - INFO - lr: 3.4942e-05 gnorm: 1.06 [ 9:48:11<14:46:53] +[titan] 2025-10-05 08:22:32,066 - root - INFO - step: 15955 loss: 2.1766 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 08:22:32,066 - root - INFO - lr: 3.4934e-05 gnorm: 1.07 [ 9:48:22<14:46:42] +[titan] 2025-10-05 08:22:42,935 - root - INFO - step: 15960 loss: 2.2164 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:22:42,935 - root - INFO - lr: 3.4925e-05 gnorm: 1.06 [ 9:48:33<14:46:31] +[titan] 2025-10-05 08:22:53,820 - root - INFO - step: 15965 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 08:22:53,820 - root - INFO - lr: 3.4917e-05 gnorm: 1.07 [ 9:48:44<14:46:19] +[titan] 2025-10-05 08:23:04,735 - root - INFO - step: 15970 loss: 2.2899 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0224 +[titan] 2025-10-05 08:23:04,735 - root - INFO - lr: 3.4909e-05 gnorm: 1.17 [ 9:48:55<14:46:08] +[titan] 2025-10-05 08:23:15,637 - root - INFO - step: 15975 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9298 +[titan] 2025-10-05 08:23:15,637 - root - INFO - lr: 3.4900e-05 gnorm: 1.05 [ 9:49:05<14:45:57] +[titan] 2025-10-05 08:23:26,529 - root - INFO - step: 15980 loss: 2.2468 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 08:23:26,529 - root - INFO - lr: 3.4892e-05 gnorm: 1.08 [ 9:49:16<14:45:45] +[titan] 2025-10-05 08:23:37,517 - root - INFO - step: 15985 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.76 mfu: 41.84% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:23:37,517 - root - INFO - lr: 3.4883e-05 gnorm: 1.09 [ 9:49:27<14:45:34] +[titan] 2025-10-05 08:23:48,403 - root - INFO - step: 15990 loss: 2.2605 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 08:23:48,403 - root - INFO - lr: 3.4875e-05 gnorm: 1.12 [ 9:49:38<14:45:23] +[titan] 2025-10-05 08:23:59,284 - root - INFO - step: 15995 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:23:59,284 - root - INFO - lr: 3.4866e-05 gnorm: 1.08 [ 9:49:49<14:45:12] +[titan] 2025-10-05 08:24:07,984 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:24:10,169 - root - INFO - step: 16000 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9270 +[titan] 2025-10-05 08:24:10,169 - root - INFO - lr: 3.4858e-05 gnorm: 1.05 [ 9:50:00<14:45:00] +[titan] 2025-10-05 08:24:21,053 - root - INFO - step: 16005 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 08:24:21,053 - root - INFO - lr: 3.4850e-05 gnorm: 1.09 [ 9:50:11<14:44:49] +[titan] 2025-10-05 08:24:31,978 - root - INFO - step: 16010 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 08:24:31,979 - root - INFO - lr: 3.4841e-05 gnorm: 1.05 [ 9:50:22<14:44:38] +[titan] 2025-10-05 08:24:42,866 - root - INFO - step: 16015 loss: 2.2354 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:24:42,866 - root - INFO - lr: 3.4833e-05 gnorm: 1.04 [ 9:50:33<14:44:26] +[titan] 2025-10-05 08:24:53,773 - root - INFO - step: 16020 loss: 2.2147 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9542 +[titan] 2025-10-05 08:24:53,773 - root - INFO - lr: 3.4824e-05 gnorm: 1.08 [ 9:50:44<14:44:15] +[titan] 2025-10-05 08:25:04,656 - root - INFO - step: 16025 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 08:25:04,656 - root - INFO - lr: 3.4816e-05 gnorm: 1.08 [ 9:50:54<14:44:04] +[titan] 2025-10-05 08:25:15,527 - root - INFO - step: 16030 loss: 2.2616 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 08:25:15,527 - root - INFO - lr: 3.4807e-05 gnorm: 1.05 [ 9:51:05<14:43:52] +[titan] 2025-10-05 08:25:26,410 - root - INFO - step: 16035 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0174 +[titan] 2025-10-05 08:25:26,411 - root - INFO - lr: 3.4799e-05 gnorm: 1.10 [ 9:51:16<14:43:41] +[titan] 2025-10-05 08:25:37,315 - root - INFO - step: 16040 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9599 +[titan] 2025-10-05 08:25:37,315 - root - INFO - lr: 3.4790e-05 gnorm: 1.09 [ 9:51:27<14:43:30] +[titan] 2025-10-05 08:25:48,166 - root - INFO - step: 16045 loss: 2.2422 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:25:48,166 - root - INFO - lr: 3.4782e-05 gnorm: 1.07 [ 9:51:38<14:43:18] +[titan] 2025-10-05 08:25:56,884 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:25:59,065 - root - INFO - step: 16050 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0259 +[titan] 2025-10-05 08:25:59,065 - root - INFO - lr: 3.4774e-05 gnorm: 1.08 [ 9:51:49<14:43:07] +[titan] 2025-10-05 08:26:09,947 - root - INFO - step: 16055 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 08:26:09,947 - root - INFO - lr: 3.4765e-05 gnorm: 1.09 [ 9:52:00<14:42:56] +[titan] 2025-10-05 08:26:20,832 - root - INFO - step: 16060 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 08:26:20,832 - root - INFO - lr: 3.4757e-05 gnorm: 1.17 [ 9:52:11<14:42:44] +[titan] 2025-10-05 08:26:31,707 - root - INFO - step: 16065 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 08:26:31,707 - root - INFO - lr: 3.4748e-05 gnorm: 1.08 [ 9:52:22<14:42:33] +[titan] 2025-10-05 08:26:42,617 - root - INFO - step: 16070 loss: 2.2299 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 08:26:42,618 - root - INFO - lr: 3.4740e-05 gnorm: 1.09 [ 9:52:32<14:42:22] +[titan] 2025-10-05 08:26:53,494 - root - INFO - step: 16075 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9418 +[titan] 2025-10-05 08:26:53,495 - root - INFO - lr: 3.4731e-05 gnorm: 1.08 [ 9:52:43<14:42:10] +[titan] 2025-10-05 08:27:04,387 - root - INFO - step: 16080 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 08:27:04,387 - root - INFO - lr: 3.4723e-05 gnorm: 1.09 [ 9:52:54<14:41:59] +[titan] 2025-10-05 08:27:15,275 - root - INFO - step: 16085 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 08:27:15,276 - root - INFO - lr: 3.4714e-05 gnorm: 1.08 [ 9:53:05<14:41:48] +[titan] 2025-10-05 08:27:26,154 - root - INFO - step: 16090 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9521 +[titan] 2025-10-05 08:27:26,154 - root - INFO - lr: 3.4706e-05 gnorm: 1.05 [ 9:53:16<14:41:36] +[titan] 2025-10-05 08:27:37,046 - root - INFO - step: 16095 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:27:37,047 - root - INFO - lr: 3.4698e-05 gnorm: 1.07 [ 9:53:27<14:41:25] +[titan] 2025-10-05 08:27:45,745 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:27:47,929 - root - INFO - step: 16100 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 08:27:47,930 - root - INFO - lr: 3.4689e-05 gnorm: 1.08 [ 9:53:38<14:41:14] +[titan] 2025-10-05 08:27:58,796 - root - INFO - step: 16105 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9585 +[titan] 2025-10-05 08:27:58,796 - root - INFO - lr: 3.4681e-05 gnorm: 1.07 [ 9:53:49<14:41:02] +[titan] 2025-10-05 08:28:09,669 - root - INFO - step: 16110 loss: 2.2129 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9548 +[titan] 2025-10-05 08:28:09,669 - root - INFO - lr: 3.4672e-05 gnorm: 1.07 [ 9:53:59<14:40:51] +[titan] 2025-10-05 08:28:20,594 - root - INFO - step: 16115 loss: 2.1544 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:28:20,594 - root - INFO - lr: 3.4664e-05 gnorm: 1.05 [ 9:54:10<14:40:40] +[titan] 2025-10-05 08:28:31,485 - root - INFO - step: 16120 loss: 2.2760 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 08:28:31,485 - root - INFO - lr: 3.4655e-05 gnorm: 1.09 [ 9:54:21<14:40:29] +[titan] 2025-10-05 08:28:42,397 - root - INFO - step: 16125 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 08:28:42,398 - root - INFO - lr: 3.4647e-05 gnorm: 1.10 [ 9:54:32<14:40:17] +[titan] 2025-10-05 08:28:53,284 - root - INFO - step: 16130 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 08:28:53,284 - root - INFO - lr: 3.4638e-05 gnorm: 1.14 [ 9:54:43<14:40:06] +[titan] 2025-10-05 08:29:04,160 - root - INFO - step: 16135 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0235 +[titan] 2025-10-05 08:29:04,161 - root - INFO - lr: 3.4630e-05 gnorm: 1.09 [ 9:54:54<14:39:55] +[titan] 2025-10-05 08:29:15,049 - root - INFO - step: 16140 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 08:29:15,049 - root - INFO - lr: 3.4621e-05 gnorm: 1.06 [ 9:55:05<14:39:43] +[titan] 2025-10-05 08:29:25,956 - root - INFO - step: 16145 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9694 +[titan] 2025-10-05 08:29:25,956 - root - INFO - lr: 3.4613e-05 gnorm: 1.10 [ 9:55:16<14:39:32] +[titan] 2025-10-05 08:29:34,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:29:36,856 - root - INFO - step: 16150 loss: 2.1905 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 08:29:36,857 - root - INFO - lr: 3.4604e-05 gnorm: 1.12 [ 9:55:27<14:39:21] +[titan] 2025-10-05 08:29:47,747 - root - INFO - step: 16155 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 08:29:47,748 - root - INFO - lr: 3.4596e-05 gnorm: 1.06 [ 9:55:38<14:39:09] +[titan] 2025-10-05 08:29:58,621 - root - INFO - step: 16160 loss: 2.2108 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9539 +[titan] 2025-10-05 08:29:58,621 - root - INFO - lr: 3.4588e-05 gnorm: 1.06 [ 9:55:48<14:38:58] +[titan] 2025-10-05 08:30:09,500 - root - INFO - step: 16165 loss: 2.2802 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:30:09,500 - root - INFO - lr: 3.4579e-05 gnorm: 1.11 [ 9:55:59<14:38:47] +[titan] 2025-10-05 08:30:20,377 - root - INFO - step: 16170 loss: 2.2485 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9864 +[titan] 2025-10-05 08:30:20,377 - root - INFO - lr: 3.4571e-05 gnorm: 1.07 [ 9:56:10<14:38:35] +[titan] 2025-10-05 08:30:31,256 - root - INFO - step: 16175 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0133 +[titan] 2025-10-05 08:30:31,256 - root - INFO - lr: 3.4562e-05 gnorm: 1.07 [ 9:56:21<14:38:24] +[titan] 2025-10-05 08:30:42,181 - root - INFO - step: 16180 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 08:30:42,181 - root - INFO - lr: 3.4554e-05 gnorm: 1.08 [ 9:56:32<14:38:13] +[titan] 2025-10-05 08:30:53,053 - root - INFO - step: 16185 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 08:30:53,053 - root - INFO - lr: 3.4545e-05 gnorm: 1.05 [ 9:56:43<14:38:01] +[titan] 2025-10-05 08:31:03,931 - root - INFO - step: 16190 loss: 2.1765 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9234 +[titan] 2025-10-05 08:31:03,931 - root - INFO - lr: 3.4537e-05 gnorm: 1.08 [ 9:56:54<14:37:50] +[titan] 2025-10-05 08:31:14,795 - root - INFO - step: 16195 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9284 +[titan] 2025-10-05 08:31:14,796 - root - INFO - lr: 3.4528e-05 gnorm: 1.09 [ 9:57:05<14:37:39] +[titan] 2025-10-05 08:31:23,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:31:25,652 - root - INFO - step: 16200 loss: 2.3077 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0390 +[titan] 2025-10-05 08:31:25,653 - root - INFO - lr: 3.4520e-05 gnorm: 1.10 [ 9:57:15<14:37:27] +[titan] 2025-10-05 08:31:36,508 - root - INFO - step: 16205 loss: 2.2864 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0206 +[titan] 2025-10-05 08:31:36,508 - root - INFO - lr: 3.4511e-05 gnorm: 1.04 [ 9:57:26<14:37:16] +[titan] 2025-10-05 08:31:47,457 - root - INFO - step: 16210 loss: 2.2341 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 08:31:47,457 - root - INFO - lr: 3.4503e-05 gnorm: 1.09 [ 9:57:37<14:37:05] +[titan] 2025-10-05 08:31:58,346 - root - INFO - step: 16215 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 08:31:58,346 - root - INFO - lr: 3.4494e-05 gnorm: 1.08 [ 9:57:48<14:36:53] +[titan] 2025-10-05 08:32:09,203 - root - INFO - step: 16220 loss: 2.1804 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 08:32:09,204 - root - INFO - lr: 3.4486e-05 gnorm: 1.07 [ 9:57:59<14:36:42] +[titan] 2025-10-05 08:32:20,094 - root - INFO - step: 16225 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 08:32:20,094 - root - INFO - lr: 3.4477e-05 gnorm: 1.07 [ 9:58:10<14:36:31] +[titan] 2025-10-05 08:32:30,976 - root - INFO - step: 16230 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9488 +[titan] 2025-10-05 08:32:30,977 - root - INFO - lr: 3.4469e-05 gnorm: 1.05 [ 9:58:21<14:36:20] +[titan] 2025-10-05 08:32:41,910 - root - INFO - step: 16235 loss: 2.2424 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:32:41,911 - root - INFO - lr: 3.4460e-05 gnorm: 1.06 [ 9:58:32<14:36:08] +[titan] 2025-10-05 08:32:52,835 - root - INFO - step: 16240 loss: 2.1658 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9139 +[titan] 2025-10-05 08:32:52,835 - root - INFO - lr: 3.4452e-05 gnorm: 1.04 [ 9:58:43<14:35:57] +[titan] 2025-10-05 08:33:03,725 - root - INFO - step: 16245 loss: 2.2254 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:33:03,725 - root - INFO - lr: 3.4443e-05 gnorm: 1.08 [ 9:58:54<14:35:46] +[titan] 2025-10-05 08:33:12,441 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:33:14,633 - root - INFO - step: 16250 loss: 2.2316 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 08:33:14,634 - root - INFO - lr: 3.4435e-05 gnorm: 1.10 [ 9:59:04<14:35:34] +[titan] 2025-10-05 08:33:25,534 - root - INFO - step: 16255 loss: 2.3076 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0385 +[titan] 2025-10-05 08:33:25,534 - root - INFO - lr: 3.4426e-05 gnorm: 1.10 [ 9:59:15<14:35:23] +[titan] 2025-10-05 08:33:36,432 - root - INFO - step: 16260 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 08:33:36,433 - root - INFO - lr: 3.4418e-05 gnorm: 1.13 [ 9:59:26<14:35:12] +[titan] 2025-10-05 08:33:47,313 - root - INFO - step: 16265 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9849 +[titan] 2025-10-05 08:33:47,313 - root - INFO - lr: 3.4409e-05 gnorm: 1.10 [ 9:59:37<14:35:00] +[titan] 2025-10-05 08:33:58,157 - root - INFO - step: 16270 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:33:58,157 - root - INFO - lr: 3.4401e-05 gnorm: 1.09 [ 9:59:48<14:34:49] +[titan] 2025-10-05 08:34:09,059 - root - INFO - step: 16275 loss: 2.2042 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 08:34:09,059 - root - INFO - lr: 3.4392e-05 gnorm: 1.05 [ 9:59:59<14:34:38] +[titan] 2025-10-05 08:34:19,912 - root - INFO - step: 16280 loss: 2.2416 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:34:19,912 - root - INFO - lr: 3.4384e-05 gnorm: 1.07 [10:00:10<14:34:26] +[titan] 2025-10-05 08:34:30,777 - root - INFO - step: 16285 loss: 2.1576 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:34:30,777 - root - INFO - lr: 3.4375e-05 gnorm: 1.09 [10:00:21<14:34:15] +[titan] 2025-10-05 08:34:41,653 - root - INFO - step: 16290 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 08:34:41,653 - root - INFO - lr: 3.4367e-05 gnorm: 1.05 [10:00:31<14:34:04] +[titan] 2025-10-05 08:34:52,516 - root - INFO - step: 16295 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 08:34:52,516 - root - INFO - lr: 3.4358e-05 gnorm: 1.05 [10:00:42<14:33:52] +[titan] 2025-10-05 08:35:01,184 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:35:03,369 - root - INFO - step: 16300 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0097 +[titan] 2025-10-05 08:35:03,370 - root - INFO - lr: 3.4350e-05 gnorm: 1.13 [10:00:53<14:33:41] +[titan] 2025-10-05 08:35:14,258 - root - INFO - step: 16305 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:35:14,258 - root - INFO - lr: 3.4341e-05 gnorm: 1.10 [10:01:04<14:33:30] +[titan] 2025-10-05 08:35:25,117 - root - INFO - step: 16310 loss: 2.2039 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 08:35:25,117 - root - INFO - lr: 3.4333e-05 gnorm: 1.07 [10:01:15<14:33:18] +[titan] 2025-10-05 08:35:35,923 - root - INFO - step: 16315 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:35:35,923 - root - INFO - lr: 3.4324e-05 gnorm: 1.06 [10:01:26<14:33:07] +[titan] 2025-10-05 08:35:46,803 - root - INFO - step: 16320 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9004 +[titan] 2025-10-05 08:35:46,803 - root - INFO - lr: 3.4316e-05 gnorm: 1.06 [10:01:37<14:32:56] +[titan] 2025-10-05 08:35:57,651 - root - INFO - step: 16325 loss: 2.2716 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0076 +[titan] 2025-10-05 08:35:57,651 - root - INFO - lr: 3.4307e-05 gnorm: 1.08 [10:01:47<14:32:44] +[titan] 2025-10-05 08:36:08,474 - root - INFO - step: 16330 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8746 +[titan] 2025-10-05 08:36:08,474 - root - INFO - lr: 3.4299e-05 gnorm: 1.05 [10:01:58<14:32:33] +[titan] 2025-10-05 08:36:19,326 - root - INFO - step: 16335 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 08:36:19,326 - root - INFO - lr: 3.4290e-05 gnorm: 1.05 [10:02:09<14:32:22] +[titan] 2025-10-05 08:36:30,202 - root - INFO - step: 16340 loss: 2.2109 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9528 +[titan] 2025-10-05 08:36:30,202 - root - INFO - lr: 3.4282e-05 gnorm: 1.09 [10:02:20<14:32:10] +[titan] 2025-10-05 08:36:41,056 - root - INFO - step: 16345 loss: 2.2287 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9688 +[titan] 2025-10-05 08:36:41,056 - root - INFO - lr: 3.4273e-05 gnorm: 1.09 [10:02:31<14:31:59] +[titan] 2025-10-05 08:36:49,742 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:36:51,933 - root - INFO - step: 16350 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 08:36:51,933 - root - INFO - lr: 3.4265e-05 gnorm: 1.08 [10:02:42<14:31:48] +[titan] 2025-10-05 08:37:02,815 - root - INFO - step: 16355 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0175 +[titan] 2025-10-05 08:37:02,815 - root - INFO - lr: 3.4256e-05 gnorm: 1.09 [10:02:53<14:31:36] +[titan] 2025-10-05 08:37:13,670 - root - INFO - step: 16360 loss: 2.1862 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:37:13,671 - root - INFO - lr: 3.4248e-05 gnorm: 1.04 [10:03:03<14:31:25] +[titan] 2025-10-05 08:37:24,518 - root - INFO - step: 16365 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:37:24,518 - root - INFO - lr: 3.4239e-05 gnorm: 1.12 [10:03:14<14:31:14] +[titan] 2025-10-05 08:37:35,400 - root - INFO - step: 16370 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9962 +[titan] 2025-10-05 08:37:35,401 - root - INFO - lr: 3.4231e-05 gnorm: 1.08 [10:03:25<14:31:02] +[titan] 2025-10-05 08:37:46,321 - root - INFO - step: 16375 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 08:37:46,321 - root - INFO - lr: 3.4222e-05 gnorm: 1.06 [10:03:36<14:30:51] +[titan] 2025-10-05 08:37:57,173 - root - INFO - step: 16380 loss: 2.2402 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9783 +[titan] 2025-10-05 08:37:57,173 - root - INFO - lr: 3.4214e-05 gnorm: 1.11 [10:03:47<14:30:40] +[titan] 2025-10-05 08:38:06,121 - root - INFO - Dumping profiler traces at step 16384 +[titan] 2025-10-05 08:38:06,159 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:38:08,373 - root - INFO - step: 16385 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 29,257 tflops: 405.90 mfu: 41.04% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9247 +[titan] 2025-10-05 08:38:08,373 - root - INFO - lr: 3.4205e-05 gnorm: 1.11 [10:03:58<14:30:29] +[titan] 2025-10-05 08:38:19,239 - root - INFO - step: 16390 loss: 2.2560 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 08:38:19,239 - root - INFO - lr: 3.4197e-05 gnorm: 1.08 [10:04:09<14:30:17] +[titan] 2025-10-05 08:38:30,091 - root - INFO - step: 16395 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 08:38:30,091 - root - INFO - lr: 3.4188e-05 gnorm: 1.06 [10:04:20<14:30:06] +[titan] 2025-10-05 08:38:38,779 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:38:41,007 - root - INFO - step: 16400 loss: 2.1921 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 08:38:41,007 - root - INFO - lr: 3.4180e-05 gnorm: 1.12 [10:04:31<14:29:55] +[titan] 2025-10-05 08:38:51,898 - root - INFO - step: 16405 loss: 2.2523 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9902 +[titan] 2025-10-05 08:38:51,898 - root - INFO - lr: 3.4171e-05 gnorm: 1.10 [10:04:42<14:29:43] +[titan] 2025-10-05 08:39:02,751 - root - INFO - step: 16410 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 08:39:02,751 - root - INFO - lr: 3.4163e-05 gnorm: 1.10 [10:04:53<14:29:32] +[titan] 2025-10-05 08:39:13,601 - root - INFO - step: 16415 loss: 2.1622 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 08:39:13,601 - root - INFO - lr: 3.4154e-05 gnorm: 1.06 [10:05:03<14:29:21] +[titan] 2025-10-05 08:39:24,471 - root - INFO - step: 16420 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9579 +[titan] 2025-10-05 08:39:24,471 - root - INFO - lr: 3.4146e-05 gnorm: 1.06 [10:05:14<14:29:09] +[titan] 2025-10-05 08:39:35,332 - root - INFO - step: 16425 loss: 2.1912 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9364 +[titan] 2025-10-05 08:39:35,333 - root - INFO - lr: 3.4137e-05 gnorm: 1.06 [10:05:25<14:28:58] +[titan] 2025-10-05 08:39:46,223 - root - INFO - step: 16430 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 08:39:46,223 - root - INFO - lr: 3.4129e-05 gnorm: 1.07 [10:05:36<14:28:47] +[titan] 2025-10-05 08:39:57,116 - root - INFO - step: 16435 loss: 2.2229 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9601 +[titan] 2025-10-05 08:39:57,116 - root - INFO - lr: 3.4120e-05 gnorm: 1.10 [10:05:47<14:28:36] +[titan] 2025-10-05 08:40:07,956 - root - INFO - step: 16440 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9241 +[titan] 2025-10-05 08:40:07,956 - root - INFO - lr: 3.4111e-05 gnorm: 1.06 [10:05:58<14:28:24] +[titan] 2025-10-05 08:40:18,791 - root - INFO - step: 16445 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:40:18,792 - root - INFO - lr: 3.4103e-05 gnorm: 1.08 [10:06:09<14:28:13] +[titan] 2025-10-05 08:40:27,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:40:29,625 - root - INFO - step: 16450 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9165 +[titan] 2025-10-05 08:40:29,626 - root - INFO - lr: 3.4094e-05 gnorm: 1.09 [10:06:19<14:28:01] +[titan] 2025-10-05 08:40:40,476 - root - INFO - step: 16455 loss: 2.1561 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 08:40:40,476 - root - INFO - lr: 3.4086e-05 gnorm: 1.05 [10:06:30<14:27:50] +[titan] 2025-10-05 08:40:51,351 - root - INFO - step: 16460 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.9013 +[titan] 2025-10-05 08:40:51,351 - root - INFO - lr: 3.4077e-05 gnorm: 1.06 [10:06:41<14:27:39] +[titan] 2025-10-05 08:41:02,252 - root - INFO - step: 16465 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:41:02,252 - root - INFO - lr: 3.4069e-05 gnorm: 1.05 [10:06:52<14:27:27] +[titan] 2025-10-05 08:41:13,112 - root - INFO - step: 16470 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9462 +[titan] 2025-10-05 08:41:13,113 - root - INFO - lr: 3.4060e-05 gnorm: 1.10 [10:07:03<14:27:16] +[titan] 2025-10-05 08:41:23,980 - root - INFO - step: 16475 loss: 2.2132 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:41:23,981 - root - INFO - lr: 3.4052e-05 gnorm: 1.05 [10:07:14<14:27:05] +[titan] 2025-10-05 08:41:34,850 - root - INFO - step: 16480 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 08:41:34,850 - root - INFO - lr: 3.4043e-05 gnorm: 1.07 [10:07:25<14:26:53] +[titan] 2025-10-05 08:41:45,728 - root - INFO - step: 16485 loss: 2.1837 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 08:41:45,728 - root - INFO - lr: 3.4035e-05 gnorm: 1.10 [10:07:36<14:26:42] +[titan] 2025-10-05 08:41:56,603 - root - INFO - step: 16490 loss: 2.2265 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 08:41:56,603 - root - INFO - lr: 3.4026e-05 gnorm: 1.08 [10:07:46<14:26:31] +[titan] 2025-10-05 08:42:07,468 - root - INFO - step: 16495 loss: 2.2288 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 08:42:07,468 - root - INFO - lr: 3.4018e-05 gnorm: 1.10 [10:07:57<14:26:19] +[titan] 2025-10-05 08:42:16,188 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:42:18,373 - root - INFO - step: 16500 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9920 +[titan] 2025-10-05 08:42:18,373 - root - INFO - lr: 3.4009e-05 gnorm: 1.10 [10:08:08<14:26:08] +[titan] 2025-10-05 08:42:29,248 - root - INFO - step: 16505 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 08:42:29,248 - root - INFO - lr: 3.4000e-05 gnorm: 1.06 [10:08:19<14:25:57] +[titan] 2025-10-05 08:42:40,112 - root - INFO - step: 16510 loss: 2.1951 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9387 +[titan] 2025-10-05 08:42:40,112 - root - INFO - lr: 3.3992e-05 gnorm: 1.06 [10:08:30<14:25:46] +[titan] 2025-10-05 08:42:51,000 - root - INFO - step: 16515 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9731 +[titan] 2025-10-05 08:42:51,000 - root - INFO - lr: 3.3983e-05 gnorm: 1.06 [10:08:41<14:25:34] +[titan] 2025-10-05 08:43:01,864 - root - INFO - step: 16520 loss: 2.2392 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 1.9746 +[titan] 2025-10-05 08:43:01,864 - root - INFO - lr: 3.3975e-05 gnorm: 1.07 [10:08:52<14:25:23] +[titan] 2025-10-05 08:43:12,727 - root - INFO - step: 16525 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0203 +[titan] 2025-10-05 08:43:12,727 - root - INFO - lr: 3.3966e-05 gnorm: 1.13 [10:09:03<14:25:12] +[titan] 2025-10-05 08:43:23,632 - root - INFO - step: 16530 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 08:43:23,632 - root - INFO - lr: 3.3958e-05 gnorm: 1.08 [10:09:13<14:25:00] +[titan] 2025-10-05 08:43:34,515 - root - INFO - step: 16535 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:43:34,515 - root - INFO - lr: 3.3949e-05 gnorm: 1.08 [10:09:24<14:24:49] +[titan] 2025-10-05 08:43:45,404 - root - INFO - step: 16540 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 08:43:45,404 - root - INFO - lr: 3.3941e-05 gnorm: 1.14 [10:09:35<14:24:38] +[titan] 2025-10-05 08:43:56,319 - root - INFO - step: 16545 loss: 2.1857 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 08:43:56,319 - root - INFO - lr: 3.3932e-05 gnorm: 1.07 [10:09:46<14:24:26] +[titan] 2025-10-05 08:44:05,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:44:07,197 - root - INFO - step: 16550 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 08:44:07,197 - root - INFO - lr: 3.3924e-05 gnorm: 1.05 [10:09:57<14:24:15] +[titan] 2025-10-05 08:44:18,066 - root - INFO - step: 16555 loss: 2.2226 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9632 +[titan] 2025-10-05 08:44:18,066 - root - INFO - lr: 3.3915e-05 gnorm: 1.09 [10:10:08<14:24:04] +[titan] 2025-10-05 08:44:28,972 - root - INFO - step: 16560 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 08:44:28,972 - root - INFO - lr: 3.3906e-05 gnorm: 1.05 [10:10:19<14:23:53] +[titan] 2025-10-05 08:44:39,817 - root - INFO - step: 16565 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9362 +[titan] 2025-10-05 08:44:39,817 - root - INFO - lr: 3.3898e-05 gnorm: 1.07 [10:10:30<14:23:41] +[titan] 2025-10-05 08:44:50,691 - root - INFO - step: 16570 loss: 2.1798 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9260 +[titan] 2025-10-05 08:44:50,691 - root - INFO - lr: 3.3889e-05 gnorm: 1.08 [10:10:40<14:23:30] +[titan] 2025-10-05 08:45:01,549 - root - INFO - step: 16575 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:45:01,549 - root - INFO - lr: 3.3881e-05 gnorm: 1.05 [10:10:51<14:23:19] +[titan] 2025-10-05 08:45:12,413 - root - INFO - step: 16580 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:45:12,413 - root - INFO - lr: 3.3872e-05 gnorm: 1.08 [10:11:02<14:23:07] +[titan] 2025-10-05 08:45:23,289 - root - INFO - step: 16585 loss: 2.1742 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9208 +[titan] 2025-10-05 08:45:23,289 - root - INFO - lr: 3.3864e-05 gnorm: 1.07 [10:11:13<14:22:56] +[titan] 2025-10-05 08:45:34,149 - root - INFO - step: 16590 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 08:45:34,149 - root - INFO - lr: 3.3855e-05 gnorm: 1.11 [10:11:24<14:22:45] +[titan] 2025-10-05 08:45:45,091 - root - INFO - step: 16595 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 08:45:45,091 - root - INFO - lr: 3.3847e-05 gnorm: 1.06 [10:11:35<14:22:33] +[titan] 2025-10-05 08:45:53,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:45:55,993 - root - INFO - step: 16600 loss: 2.1689 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9160 +[titan] 2025-10-05 08:45:55,993 - root - INFO - lr: 3.3838e-05 gnorm: 1.04 [10:11:46<14:22:22] +[titan] 2025-10-05 08:46:06,866 - root - INFO - step: 16605 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:46:06,866 - root - INFO - lr: 3.3829e-05 gnorm: 1.04 [10:11:57<14:22:11] +[titan] 2025-10-05 08:46:17,754 - root - INFO - step: 16610 loss: 2.2141 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 08:46:17,755 - root - INFO - lr: 3.3821e-05 gnorm: 1.09 [10:12:08<14:21:59] +[titan] 2025-10-05 08:46:28,629 - root - INFO - step: 16615 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9330 +[titan] 2025-10-05 08:46:28,629 - root - INFO - lr: 3.3812e-05 gnorm: 1.09 [10:12:18<14:21:48] +[titan] 2025-10-05 08:46:39,510 - root - INFO - step: 16620 loss: 2.1330 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 08:46:39,510 - root - INFO - lr: 3.3804e-05 gnorm: 1.07 [10:12:29<14:21:37] +[titan] 2025-10-05 08:46:50,420 - root - INFO - step: 16625 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9610 +[titan] 2025-10-05 08:46:50,420 - root - INFO - lr: 3.3795e-05 gnorm: 1.09 [10:12:40<14:21:26] +[titan] 2025-10-05 08:47:01,324 - root - INFO - step: 16630 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 08:47:01,324 - root - INFO - lr: 3.3787e-05 gnorm: 1.10 [10:12:51<14:21:14] +[titan] 2025-10-05 08:47:12,217 - root - INFO - step: 16635 loss: 2.1195 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 08:47:12,217 - root - INFO - lr: 3.3778e-05 gnorm: 1.09 [10:13:02<14:21:03] +[titan] 2025-10-05 08:47:23,110 - root - INFO - step: 16640 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:47:23,110 - root - INFO - lr: 3.3769e-05 gnorm: 1.12 [10:13:13<14:20:52] +[titan] 2025-10-05 08:47:34,010 - root - INFO - step: 16645 loss: 2.1744 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 08:47:34,010 - root - INFO - lr: 3.3761e-05 gnorm: 1.10 [10:13:24<14:20:40] +[titan] 2025-10-05 08:47:42,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:47:44,910 - root - INFO - step: 16650 loss: 2.1803 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 08:47:44,910 - root - INFO - lr: 3.3752e-05 gnorm: 1.11 [10:13:35<14:20:29] +[titan] 2025-10-05 08:47:55,812 - root - INFO - step: 16655 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 08:47:55,812 - root - INFO - lr: 3.3744e-05 gnorm: 1.10 [10:13:46<14:20:18] +[titan] 2025-10-05 08:48:06,738 - root - INFO - step: 16660 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0120 +[titan] 2025-10-05 08:48:06,738 - root - INFO - lr: 3.3735e-05 gnorm: 1.11 [10:13:57<14:20:07] +[titan] 2025-10-05 08:48:17,635 - root - INFO - step: 16665 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:48:17,635 - root - INFO - lr: 3.3727e-05 gnorm: 1.10 [10:14:07<14:19:55] +[titan] 2025-10-05 08:48:28,518 - root - INFO - step: 16670 loss: 2.2203 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:48:28,518 - root - INFO - lr: 3.3718e-05 gnorm: 1.10 [10:14:18<14:19:44] +[titan] 2025-10-05 08:48:39,418 - root - INFO - step: 16675 loss: 2.2253 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2593 global_avg_mtp_loss: 1.9660 +[titan] 2025-10-05 08:48:39,419 - root - INFO - lr: 3.3709e-05 gnorm: 1.14 [10:14:29<14:19:33] +[titan] 2025-10-05 08:48:50,307 - root - INFO - step: 16680 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 08:48:50,307 - root - INFO - lr: 3.3701e-05 gnorm: 1.09 [10:14:40<14:19:22] +[titan] 2025-10-05 08:49:01,231 - root - INFO - step: 16685 loss: 2.2071 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 08:49:01,231 - root - INFO - lr: 3.3692e-05 gnorm: 1.06 [10:14:51<14:19:10] +[titan] 2025-10-05 08:49:12,142 - root - INFO - step: 16690 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9875 +[titan] 2025-10-05 08:49:12,142 - root - INFO - lr: 3.3684e-05 gnorm: 1.05 [10:15:02<14:18:59] +[titan] 2025-10-05 08:49:23,035 - root - INFO - step: 16695 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0009 +[titan] 2025-10-05 08:49:23,036 - root - INFO - lr: 3.3675e-05 gnorm: 1.04 [10:15:13<14:18:48] +[titan] 2025-10-05 08:49:31,750 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:49:33,935 - root - INFO - step: 16700 loss: 2.1213 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8753 +[titan] 2025-10-05 08:49:33,935 - root - INFO - lr: 3.3667e-05 gnorm: 1.05 [10:15:24<14:18:36] +[titan] 2025-10-05 08:49:44,821 - root - INFO - step: 16705 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 08:49:44,821 - root - INFO - lr: 3.3658e-05 gnorm: 1.04 [10:15:35<14:18:25] +[titan] 2025-10-05 08:49:55,770 - root - INFO - step: 16710 loss: 2.1830 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9279 +[titan] 2025-10-05 08:49:55,770 - root - INFO - lr: 3.3649e-05 gnorm: 1.06 [10:15:46<14:18:14] +[titan] 2025-10-05 08:50:06,646 - root - INFO - step: 16715 loss: 2.1474 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 08:50:06,647 - root - INFO - lr: 3.3641e-05 gnorm: 1.05 [10:15:56<14:18:03] +[titan] 2025-10-05 08:50:17,562 - root - INFO - step: 16720 loss: 2.2478 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9859 +[titan] 2025-10-05 08:50:17,562 - root - INFO - lr: 3.3632e-05 gnorm: 1.08 [10:16:07<14:17:51] +[titan] 2025-10-05 08:50:28,447 - root - INFO - step: 16725 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 08:50:28,447 - root - INFO - lr: 3.3624e-05 gnorm: 1.03 [10:16:18<14:17:40] +[titan] 2025-10-05 08:50:39,327 - root - INFO - step: 16730 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 08:50:39,327 - root - INFO - lr: 3.3615e-05 gnorm: 1.07 [10:16:29<14:17:29] +[titan] 2025-10-05 08:50:50,218 - root - INFO - step: 16735 loss: 2.1919 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:50:50,218 - root - INFO - lr: 3.3606e-05 gnorm: 1.08 [10:16:40<14:17:18] +[titan] 2025-10-05 08:51:01,116 - root - INFO - step: 16740 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9342 +[titan] 2025-10-05 08:51:01,116 - root - INFO - lr: 3.3598e-05 gnorm: 1.01 [10:16:51<14:17:06] +[titan] 2025-10-05 08:51:11,988 - root - INFO - step: 16745 loss: 2.1719 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 08:51:11,988 - root - INFO - lr: 3.3589e-05 gnorm: 1.09 [10:17:02<14:16:55] +[titan] 2025-10-05 08:51:20,683 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:51:22,867 - root - INFO - step: 16750 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:51:22,867 - root - INFO - lr: 3.3581e-05 gnorm: 1.07 [10:17:13<14:16:44] +[titan] 2025-10-05 08:51:33,766 - root - INFO - step: 16755 loss: 2.1698 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:51:33,766 - root - INFO - lr: 3.3572e-05 gnorm: 1.08 [10:17:24<14:16:32] +[titan] 2025-10-05 08:51:44,647 - root - INFO - step: 16760 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 08:51:44,647 - root - INFO - lr: 3.3563e-05 gnorm: 1.07 [10:17:34<14:16:21] +[titan] 2025-10-05 08:51:55,539 - root - INFO - step: 16765 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 08:51:55,539 - root - INFO - lr: 3.3555e-05 gnorm: 1.08 [10:17:45<14:16:10] +[titan] 2025-10-05 08:52:06,452 - root - INFO - step: 16770 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9905 +[titan] 2025-10-05 08:52:06,452 - root - INFO - lr: 3.3546e-05 gnorm: 1.10 [10:17:56<14:15:59] +[titan] 2025-10-05 08:52:17,344 - root - INFO - step: 16775 loss: 2.2357 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 08:52:17,344 - root - INFO - lr: 3.3538e-05 gnorm: 1.12 [10:18:07<14:15:47] +[titan] 2025-10-05 08:52:28,243 - root - INFO - step: 16780 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 08:52:28,243 - root - INFO - lr: 3.3529e-05 gnorm: 1.05 [10:18:18<14:15:36] +[titan] 2025-10-05 08:52:39,158 - root - INFO - step: 16785 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 08:52:39,158 - root - INFO - lr: 3.3520e-05 gnorm: 1.08 [10:18:29<14:15:25] +[titan] 2025-10-05 08:52:50,027 - root - INFO - step: 16790 loss: 2.3254 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 08:52:50,027 - root - INFO - lr: 3.3512e-05 gnorm: 1.08 [10:18:40<14:15:13] +[titan] 2025-10-05 08:53:00,972 - root - INFO - step: 16795 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8815 +[titan] 2025-10-05 08:53:00,972 - root - INFO - lr: 3.3503e-05 gnorm: 1.05 [10:18:51<14:15:02] +[titan] 2025-10-05 08:53:09,655 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:53:11,847 - root - INFO - step: 16800 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9137 +[titan] 2025-10-05 08:53:11,847 - root - INFO - lr: 3.3495e-05 gnorm: 1.04 [10:19:02<14:14:51] +[titan] 2025-10-05 08:53:22,744 - root - INFO - step: 16805 loss: 2.2778 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0130 +[titan] 2025-10-05 08:53:22,744 - root - INFO - lr: 3.3486e-05 gnorm: 1.06 [10:19:12<14:14:40] +[titan] 2025-10-05 08:53:33,623 - root - INFO - step: 16810 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 08:53:33,623 - root - INFO - lr: 3.3477e-05 gnorm: 1.10 [10:19:23<14:14:28] +[titan] 2025-10-05 08:53:44,493 - root - INFO - step: 16815 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 08:53:44,493 - root - INFO - lr: 3.3469e-05 gnorm: 1.08 [10:19:34<14:14:17] +[titan] 2025-10-05 08:53:55,405 - root - INFO - step: 16820 loss: 2.3161 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2704 global_avg_mtp_loss: 2.0457 +[titan] 2025-10-05 08:53:55,405 - root - INFO - lr: 3.3460e-05 gnorm: 1.05 [10:19:45<14:14:06] +[titan] 2025-10-05 08:54:06,325 - root - INFO - step: 16825 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:54:06,325 - root - INFO - lr: 3.3452e-05 gnorm: 1.06 [10:19:56<14:13:55] +[titan] 2025-10-05 08:54:17,199 - root - INFO - step: 16830 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 08:54:17,199 - root - INFO - lr: 3.3443e-05 gnorm: 1.14 [10:20:07<14:13:43] +[titan] 2025-10-05 08:54:28,086 - root - INFO - step: 16835 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 08:54:28,087 - root - INFO - lr: 3.3434e-05 gnorm: 1.11 [10:20:18<14:13:32] +[titan] 2025-10-05 08:54:38,979 - root - INFO - step: 16840 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:54:38,979 - root - INFO - lr: 3.3426e-05 gnorm: 1.10 [10:20:29<14:13:21] +[titan] 2025-10-05 08:54:49,879 - root - INFO - step: 16845 loss: 2.2348 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9741 +[titan] 2025-10-05 08:54:49,879 - root - INFO - lr: 3.3417e-05 gnorm: 1.14 [10:20:40<14:13:09] +[titan] 2025-10-05 08:54:58,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:55:00,873 - root - INFO - step: 16850 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 08:55:00,873 - root - INFO - lr: 3.3409e-05 gnorm: 1.06 [10:20:51<14:12:58] +[titan] 2025-10-05 08:55:11,763 - root - INFO - step: 16855 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:55:11,763 - root - INFO - lr: 3.3400e-05 gnorm: 1.10 [10:21:02<14:12:47] +[titan] 2025-10-05 08:55:22,662 - root - INFO - step: 16860 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:55:22,662 - root - INFO - lr: 3.3391e-05 gnorm: 1.05 [10:21:12<14:12:36] +[titan] 2025-10-05 08:55:33,543 - root - INFO - step: 16865 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8902 +[titan] 2025-10-05 08:55:33,543 - root - INFO - lr: 3.3383e-05 gnorm: 1.08 [10:21:23<14:12:24] +[titan] 2025-10-05 08:55:44,433 - root - INFO - step: 16870 loss: 2.2119 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:55:44,433 - root - INFO - lr: 3.3374e-05 gnorm: 1.08 [10:21:34<14:12:13] +[titan] 2025-10-05 08:55:55,318 - root - INFO - step: 16875 loss: 2.2256 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:55:55,319 - root - INFO - lr: 3.3366e-05 gnorm: 1.09 [10:21:45<14:12:02] +[titan] 2025-10-05 08:56:06,283 - root - INFO - step: 16880 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:56:06,283 - root - INFO - lr: 3.3357e-05 gnorm: 1.08 [10:21:56<14:11:51] +[titan] 2025-10-05 08:56:17,168 - root - INFO - step: 16885 loss: 2.2361 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9760 +[titan] 2025-10-05 08:56:17,168 - root - INFO - lr: 3.3348e-05 gnorm: 1.07 [10:22:07<14:11:39] +[titan] 2025-10-05 08:56:28,070 - root - INFO - step: 16890 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:56:28,070 - root - INFO - lr: 3.3340e-05 gnorm: 1.03 [10:22:18<14:11:28] +[titan] 2025-10-05 08:56:39,053 - root - INFO - step: 16895 loss: 2.2559 memory: 118.84GiB(85.28%) tps: 29,836 tflops: 413.93 mfu: 41.85% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 08:56:39,054 - root - INFO - lr: 3.3331e-05 gnorm: 1.10 [10:22:29<14:11:17] +[titan] 2025-10-05 08:56:41,408 - root - INFO - Dumping profiler traces at step 16896 +[titan] 2025-10-05 08:56:41,446 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:56:47,993 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:56:50,179 - root - INFO - step: 16900 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 29,452 tflops: 408.61 mfu: 41.32% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9188 +[titan] 2025-10-05 08:56:50,180 - root - INFO - lr: 3.3322e-05 gnorm: 1.02 [10:22:40<14:11:06] +[titan] 2025-10-05 08:57:01,083 - root - INFO - step: 16905 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9226 +[titan] 2025-10-05 08:57:01,084 - root - INFO - lr: 3.3314e-05 gnorm: 1.15 [10:22:51<14:10:55] +[titan] 2025-10-05 08:57:11,941 - root - INFO - step: 16910 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9271 +[titan] 2025-10-05 08:57:11,942 - root - INFO - lr: 3.3305e-05 gnorm: 1.04 [10:23:02<14:10:44] +[titan] 2025-10-05 08:57:22,821 - root - INFO - step: 16915 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 08:57:22,822 - root - INFO - lr: 3.3297e-05 gnorm: 1.10 [10:23:13<14:10:32] +[titan] 2025-10-05 08:57:33,708 - root - INFO - step: 16920 loss: 2.1768 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9231 +[titan] 2025-10-05 08:57:33,708 - root - INFO - lr: 3.3288e-05 gnorm: 1.07 [10:23:23<14:10:21] +[titan] 2025-10-05 08:57:44,586 - root - INFO - step: 16925 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 08:57:44,586 - root - INFO - lr: 3.3279e-05 gnorm: 1.10 [10:23:34<14:10:10] +[titan] 2025-10-05 08:57:55,466 - root - INFO - step: 16930 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 08:57:55,466 - root - INFO - lr: 3.3271e-05 gnorm: 1.08 [10:23:45<14:09:58] +[titan] 2025-10-05 08:58:06,365 - root - INFO - step: 16935 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9721 +[titan] 2025-10-05 08:58:06,365 - root - INFO - lr: 3.3262e-05 gnorm: 1.09 [10:23:56<14:09:47] +[titan] 2025-10-05 08:58:17,240 - root - INFO - step: 16940 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 08:58:17,240 - root - INFO - lr: 3.3253e-05 gnorm: 1.07 [10:24:07<14:09:36] +[titan] 2025-10-05 08:58:28,143 - root - INFO - step: 16945 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9826 +[titan] 2025-10-05 08:58:28,143 - root - INFO - lr: 3.3245e-05 gnorm: 1.07 [10:24:18<14:09:25] +[titan] 2025-10-05 08:58:36,824 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:58:39,030 - root - INFO - step: 16950 loss: 2.2032 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:58:39,030 - root - INFO - lr: 3.3236e-05 gnorm: 1.07 [10:24:29<14:09:13] +[titan] 2025-10-05 08:58:49,927 - root - INFO - step: 16955 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 08:58:49,928 - root - INFO - lr: 3.3228e-05 gnorm: 1.13 [10:24:40<14:09:02] +[titan] 2025-10-05 08:59:00,813 - root - INFO - step: 16960 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 08:59:00,813 - root - INFO - lr: 3.3219e-05 gnorm: 1.07 [10:24:51<14:08:51] +[titan] 2025-10-05 08:59:11,725 - root - INFO - step: 16965 loss: 2.1770 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9229 +[titan] 2025-10-05 08:59:11,725 - root - INFO - lr: 3.3210e-05 gnorm: 1.09 [10:25:01<14:08:39] +[titan] 2025-10-05 08:59:22,600 - root - INFO - step: 16970 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 08:59:22,601 - root - INFO - lr: 3.3202e-05 gnorm: 1.13 [10:25:12<14:08:28] +[titan] 2025-10-05 08:59:33,459 - root - INFO - step: 16975 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9184 +[titan] 2025-10-05 08:59:33,460 - root - INFO - lr: 3.3193e-05 gnorm: 1.10 [10:25:23<14:08:17] +[titan] 2025-10-05 08:59:44,382 - root - INFO - step: 16980 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9735 +[titan] 2025-10-05 08:59:44,382 - root - INFO - lr: 3.3184e-05 gnorm: 1.04 [10:25:34<14:08:06] +[titan] 2025-10-05 08:59:55,274 - root - INFO - step: 16985 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8945 +[titan] 2025-10-05 08:59:55,274 - root - INFO - lr: 3.3176e-05 gnorm: 1.06 [10:25:45<14:07:54] +[titan] 2025-10-05 09:00:06,182 - root - INFO - step: 16990 loss: 2.2652 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0002 +[titan] 2025-10-05 09:00:06,183 - root - INFO - lr: 3.3167e-05 gnorm: 1.09 [10:25:56<14:07:43] +[titan] 2025-10-05 09:00:17,071 - root - INFO - step: 16995 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:00:17,071 - root - INFO - lr: 3.3158e-05 gnorm: 1.08 [10:26:07<14:07:32] +[titan] 2025-10-05 09:00:25,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:00:27,948 - root - INFO - step: 17000 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:00:27,948 - root - INFO - lr: 3.3150e-05 gnorm: 1.11 [10:26:18<14:07:21] +[titan] 2025-10-05 09:00:38,826 - root - INFO - step: 17005 loss: 2.2227 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 09:00:38,826 - root - INFO - lr: 3.3141e-05 gnorm: 1.07 [10:26:29<14:07:09] +[titan] 2025-10-05 09:00:49,742 - root - INFO - step: 17010 loss: 2.2205 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:00:49,742 - root - INFO - lr: 3.3133e-05 gnorm: 1.05 [10:26:39<14:06:58] +[titan] 2025-10-05 09:01:00,622 - root - INFO - step: 17015 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9094 +[titan] 2025-10-05 09:01:00,622 - root - INFO - lr: 3.3124e-05 gnorm: 1.08 [10:26:50<14:06:47] +[titan] 2025-10-05 09:01:11,523 - root - INFO - step: 17020 loss: 2.1800 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9266 +[titan] 2025-10-05 09:01:11,523 - root - INFO - lr: 3.3115e-05 gnorm: 1.07 [10:27:01<14:06:35] +[titan] 2025-10-05 09:01:22,424 - root - INFO - step: 17025 loss: 2.2024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9461 +[titan] 2025-10-05 09:01:22,425 - root - INFO - lr: 3.3107e-05 gnorm: 1.04 [10:27:12<14:06:24] +[titan] 2025-10-05 09:01:33,324 - root - INFO - step: 17030 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 09:01:33,324 - root - INFO - lr: 3.3098e-05 gnorm: 1.07 [10:27:23<14:06:13] +[titan] 2025-10-05 09:01:44,236 - root - INFO - step: 17035 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.12% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9083 +[titan] 2025-10-05 09:01:44,236 - root - INFO - lr: 3.3089e-05 gnorm: 1.04 [10:27:34<14:06:02] +[titan] 2025-10-05 09:01:55,136 - root - INFO - step: 17040 loss: 2.1831 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 09:01:55,136 - root - INFO - lr: 3.3081e-05 gnorm: 1.08 [10:27:45<14:05:50] +[titan] 2025-10-05 09:02:06,035 - root - INFO - step: 17045 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:02:06,035 - root - INFO - lr: 3.3072e-05 gnorm: 1.06 [10:27:56<14:05:39] +[titan] 2025-10-05 09:02:14,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:02:16,917 - root - INFO - step: 17050 loss: 2.2428 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:02:16,917 - root - INFO - lr: 3.3063e-05 gnorm: 1.04 [10:28:07<14:05:28] +[titan] 2025-10-05 09:02:27,783 - root - INFO - step: 17055 loss: 2.2213 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9627 +[titan] 2025-10-05 09:02:27,783 - root - INFO - lr: 3.3055e-05 gnorm: 1.05 [10:28:18<14:05:17] +[titan] 2025-10-05 09:02:38,654 - root - INFO - step: 17060 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8924 +[titan] 2025-10-05 09:02:38,654 - root - INFO - lr: 3.3046e-05 gnorm: 1.07 [10:28:28<14:05:05] +[titan] 2025-10-05 09:02:49,542 - root - INFO - step: 17065 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9244 +[titan] 2025-10-05 09:02:49,542 - root - INFO - lr: 3.3037e-05 gnorm: 1.10 [10:28:39<14:04:54] +[titan] 2025-10-05 09:03:00,423 - root - INFO - step: 17070 loss: 2.2506 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 09:03:00,423 - root - INFO - lr: 3.3029e-05 gnorm: 1.08 [10:28:50<14:04:43] +[titan] 2025-10-05 09:03:11,347 - root - INFO - step: 17075 loss: 2.1585 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:03:11,347 - root - INFO - lr: 3.3020e-05 gnorm: 1.09 [10:29:01<14:04:32] +[titan] 2025-10-05 09:03:22,220 - root - INFO - step: 17080 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 09:03:22,221 - root - INFO - lr: 3.3011e-05 gnorm: 1.07 [10:29:12<14:04:20] +[titan] 2025-10-05 09:03:33,091 - root - INFO - step: 17085 loss: 2.1813 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:03:33,091 - root - INFO - lr: 3.3003e-05 gnorm: 1.12 [10:29:23<14:04:09] +[titan] 2025-10-05 09:03:43,968 - root - INFO - step: 17090 loss: 2.2621 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 1.9971 +[titan] 2025-10-05 09:03:43,968 - root - INFO - lr: 3.2994e-05 gnorm: 1.09 [10:29:34<14:03:58] +[titan] 2025-10-05 09:03:54,850 - root - INFO - step: 17095 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:03:54,850 - root - INFO - lr: 3.2986e-05 gnorm: 1.05 [10:29:45<14:03:46] +[titan] 2025-10-05 09:04:03,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:04:05,728 - root - INFO - step: 17100 loss: 2.1531 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 09:04:05,728 - root - INFO - lr: 3.2977e-05 gnorm: 1.07 [10:29:55<14:03:35] +[titan] 2025-10-05 09:04:16,647 - root - INFO - step: 17105 loss: 2.1923 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 09:04:16,647 - root - INFO - lr: 3.2968e-05 gnorm: 1.11 [10:30:06<14:03:24] +[titan] 2025-10-05 09:04:27,507 - root - INFO - step: 17110 loss: 2.1551 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9046 +[titan] 2025-10-05 09:04:27,507 - root - INFO - lr: 3.2960e-05 gnorm: 1.11 [10:30:17<14:03:13] +[titan] 2025-10-05 09:04:38,376 - root - INFO - step: 17115 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 09:04:38,376 - root - INFO - lr: 3.2951e-05 gnorm: 1.09 [10:30:28<14:03:01] +[titan] 2025-10-05 09:04:49,249 - root - INFO - step: 17120 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:04:49,249 - root - INFO - lr: 3.2942e-05 gnorm: 1.04 [10:30:39<14:02:50] +[titan] 2025-10-05 09:05:00,120 - root - INFO - step: 17125 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 09:05:00,120 - root - INFO - lr: 3.2934e-05 gnorm: 1.09 [10:30:50<14:02:39] +[titan] 2025-10-05 09:05:10,996 - root - INFO - step: 17130 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:05:10,997 - root - INFO - lr: 3.2925e-05 gnorm: 6.19 [10:31:01<14:02:27] +[titan] 2025-10-05 09:05:21,855 - root - INFO - step: 17135 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 09:05:21,856 - root - INFO - lr: 3.2916e-05 gnorm: 1.04 [10:31:12<14:02:16] +[titan] 2025-10-05 09:05:32,760 - root - INFO - step: 17140 loss: 2.2847 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0182 +[titan] 2025-10-05 09:05:32,760 - root - INFO - lr: 3.2908e-05 gnorm: 1.13 [10:31:22<14:02:05] +[titan] 2025-10-05 09:05:43,616 - root - INFO - step: 17145 loss: 2.1628 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 09:05:43,616 - root - INFO - lr: 3.2899e-05 gnorm: 1.13 [10:31:33<14:01:54] +[titan] 2025-10-05 09:05:52,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:05:54,484 - root - INFO - step: 17150 loss: 2.2557 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 09:05:54,484 - root - INFO - lr: 3.2890e-05 gnorm: 1.04 [10:31:44<14:01:42] +[titan] 2025-10-05 09:06:05,356 - root - INFO - step: 17155 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 09:06:05,356 - root - INFO - lr: 3.2882e-05 gnorm: 1.06 [10:31:55<14:01:31] +[titan] 2025-10-05 09:06:16,249 - root - INFO - step: 17160 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:06:16,249 - root - INFO - lr: 3.2873e-05 gnorm: 1.06 [10:32:06<14:01:20] +[titan] 2025-10-05 09:06:27,125 - root - INFO - step: 17165 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:06:27,125 - root - INFO - lr: 3.2864e-05 gnorm: 1.06 [10:32:17<14:01:08] +[titan] 2025-10-05 09:06:38,025 - root - INFO - step: 17170 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:06:38,025 - root - INFO - lr: 3.2856e-05 gnorm: 1.14 [10:32:28<14:00:57] +[titan] 2025-10-05 09:06:48,880 - root - INFO - step: 17175 loss: 2.1394 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 09:06:48,880 - root - INFO - lr: 3.2847e-05 gnorm: 1.07 [10:32:39<14:00:46] +[titan] 2025-10-05 09:06:59,724 - root - INFO - step: 17180 loss: 2.1898 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 09:06:59,724 - root - INFO - lr: 3.2838e-05 gnorm: 1.07 [10:32:49<14:00:35] +[titan] 2025-10-05 09:07:10,582 - root - INFO - step: 17185 loss: 2.1634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9111 +[titan] 2025-10-05 09:07:10,583 - root - INFO - lr: 3.2830e-05 gnorm: 1.03 [10:33:00<14:00:23] +[titan] 2025-10-05 09:07:21,443 - root - INFO - step: 17190 loss: 2.1666 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:07:21,443 - root - INFO - lr: 3.2821e-05 gnorm: 1.09 [10:33:11<14:00:12] +[titan] 2025-10-05 09:07:32,307 - root - INFO - step: 17195 loss: 2.2954 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 09:07:32,308 - root - INFO - lr: 3.2812e-05 gnorm: 1.05 [10:33:22<14:00:01] +[titan] 2025-10-05 09:07:40,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:07:43,204 - root - INFO - step: 17200 loss: 2.2434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:07:43,204 - root - INFO - lr: 3.2804e-05 gnorm: 1.02 [10:33:33<13:59:49] +[titan] 2025-10-05 09:07:54,076 - root - INFO - step: 17205 loss: 2.2300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:07:54,076 - root - INFO - lr: 3.2795e-05 gnorm: 1.07 [10:33:44<13:59:38] +[titan] 2025-10-05 09:08:04,949 - root - INFO - step: 17210 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:08:04,949 - root - INFO - lr: 3.2786e-05 gnorm: 1.14 [10:33:55<13:59:27] +[titan] 2025-10-05 09:08:15,833 - root - INFO - step: 17215 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 09:08:15,833 - root - INFO - lr: 3.2778e-05 gnorm: 1.07 [10:34:06<13:59:16] +[titan] 2025-10-05 09:08:26,702 - root - INFO - step: 17220 loss: 2.1866 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9324 +[titan] 2025-10-05 09:08:26,702 - root - INFO - lr: 3.2769e-05 gnorm: 1.12 [10:34:16<13:59:04] +[titan] 2025-10-05 09:08:37,566 - root - INFO - step: 17225 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 09:08:37,566 - root - INFO - lr: 3.2760e-05 gnorm: 1.09 [10:34:27<13:58:53] +[titan] 2025-10-05 09:08:48,419 - root - INFO - step: 17230 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 09:08:48,419 - root - INFO - lr: 3.2752e-05 gnorm: 1.06 [10:34:38<13:58:42] +[titan] 2025-10-05 09:08:59,310 - root - INFO - step: 17235 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:08:59,310 - root - INFO - lr: 3.2743e-05 gnorm: 1.11 [10:34:49<13:58:30] +[titan] 2025-10-05 09:09:10,177 - root - INFO - step: 17240 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9319 +[titan] 2025-10-05 09:09:10,177 - root - INFO - lr: 3.2734e-05 gnorm: 1.07 [10:35:00<13:58:19] +[titan] 2025-10-05 09:09:21,054 - root - INFO - step: 17245 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:09:21,054 - root - INFO - lr: 3.2725e-05 gnorm: 1.03 [10:35:11<13:58:08] +[titan] 2025-10-05 09:09:29,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:09:31,915 - root - INFO - step: 17250 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9306 +[titan] 2025-10-05 09:09:31,915 - root - INFO - lr: 3.2717e-05 gnorm: 1.06 [10:35:22<13:57:57] +[titan] 2025-10-05 09:09:42,794 - root - INFO - step: 17255 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 09:09:42,794 - root - INFO - lr: 3.2708e-05 gnorm: 1.07 [10:35:32<13:57:45] +[titan] 2025-10-05 09:09:53,683 - root - INFO - step: 17260 loss: 2.1486 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 09:09:53,683 - root - INFO - lr: 3.2699e-05 gnorm: 1.09 [10:35:43<13:57:34] +[titan] 2025-10-05 09:10:04,613 - root - INFO - step: 17265 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 09:10:04,613 - root - INFO - lr: 3.2691e-05 gnorm: 1.10 [10:35:54<13:57:23] +[titan] 2025-10-05 09:10:15,520 - root - INFO - step: 17270 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:10:15,520 - root - INFO - lr: 3.2682e-05 gnorm: 1.07 [10:36:05<13:57:12] +[titan] 2025-10-05 09:10:26,410 - root - INFO - step: 17275 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9303 +[titan] 2025-10-05 09:10:26,410 - root - INFO - lr: 3.2673e-05 gnorm: 1.08 [10:36:16<13:57:00] +[titan] 2025-10-05 09:10:37,314 - root - INFO - step: 17280 loss: 2.3099 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 09:10:37,314 - root - INFO - lr: 3.2665e-05 gnorm: 1.11 [10:36:27<13:56:49] +[titan] 2025-10-05 09:10:48,218 - root - INFO - step: 17285 loss: 2.2025 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 09:10:48,218 - root - INFO - lr: 3.2656e-05 gnorm: 1.04 [10:36:38<13:56:38] +[titan] 2025-10-05 09:10:59,106 - root - INFO - step: 17290 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 09:10:59,106 - root - INFO - lr: 3.2647e-05 gnorm: 1.08 [10:36:49<13:56:27] +[titan] 2025-10-05 09:11:09,991 - root - INFO - step: 17295 loss: 2.2277 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 09:11:09,991 - root - INFO - lr: 3.2639e-05 gnorm: 1.09 [10:37:00<13:56:15] +[titan] 2025-10-05 09:11:18,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:11:20,963 - root - INFO - step: 17300 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 09:11:20,963 - root - INFO - lr: 3.2630e-05 gnorm: 1.10 [10:37:11<13:56:04] +[titan] 2025-10-05 09:11:31,859 - root - INFO - step: 17305 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:11:31,859 - root - INFO - lr: 3.2621e-05 gnorm: 1.04 [10:37:22<13:55:53] +[titan] 2025-10-05 09:11:42,726 - root - INFO - step: 17310 loss: 2.2050 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 09:11:42,726 - root - INFO - lr: 3.2613e-05 gnorm: 1.08 [10:37:32<13:55:42] +[titan] 2025-10-05 09:11:53,604 - root - INFO - step: 17315 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:11:53,604 - root - INFO - lr: 3.2604e-05 gnorm: 1.06 [10:37:43<13:55:30] +[titan] 2025-10-05 09:12:04,491 - root - INFO - step: 17320 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:12:04,491 - root - INFO - lr: 3.2595e-05 gnorm: 1.08 [10:37:54<13:55:19] +[titan] 2025-10-05 09:12:15,414 - root - INFO - step: 17325 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:12:15,414 - root - INFO - lr: 3.2586e-05 gnorm: 1.03 [10:38:05<13:55:08] +[titan] 2025-10-05 09:12:26,330 - root - INFO - step: 17330 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9711 +[titan] 2025-10-05 09:12:26,330 - root - INFO - lr: 3.2578e-05 gnorm: 1.08 [10:38:16<13:54:57] +[titan] 2025-10-05 09:12:37,205 - root - INFO - step: 17335 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9425 +[titan] 2025-10-05 09:12:37,206 - root - INFO - lr: 3.2569e-05 gnorm: 1.08 [10:38:27<13:54:45] +[titan] 2025-10-05 09:12:48,107 - root - INFO - step: 17340 loss: 2.2311 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 09:12:48,107 - root - INFO - lr: 3.2560e-05 gnorm: 1.07 [10:38:38<13:54:34] +[titan] 2025-10-05 09:12:58,971 - root - INFO - step: 17345 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:12:58,971 - root - INFO - lr: 3.2552e-05 gnorm: 1.02 [10:38:49<13:54:23] +[titan] 2025-10-05 09:13:07,640 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:13:09,828 - root - INFO - step: 17350 loss: 2.1864 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9313 +[titan] 2025-10-05 09:13:09,828 - root - INFO - lr: 3.2543e-05 gnorm: 1.12 [10:39:00<13:54:11] +[titan] 2025-10-05 09:13:20,766 - root - INFO - step: 17355 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:13:20,766 - root - INFO - lr: 3.2534e-05 gnorm: 1.05 [10:39:10<13:54:00] +[titan] 2025-10-05 09:13:31,647 - root - INFO - step: 17360 loss: 2.1890 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9339 +[titan] 2025-10-05 09:13:31,647 - root - INFO - lr: 3.2526e-05 gnorm: 1.06 [10:39:21<13:53:49] +[titan] 2025-10-05 09:13:42,494 - root - INFO - step: 17365 loss: 2.2669 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 09:13:42,494 - root - INFO - lr: 3.2517e-05 gnorm: 1.11 [10:39:32<13:53:38] +[titan] 2025-10-05 09:13:53,353 - root - INFO - step: 17370 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0243 +[titan] 2025-10-05 09:13:53,353 - root - INFO - lr: 3.2508e-05 gnorm: 1.16 [10:39:43<13:53:26] +[titan] 2025-10-05 09:14:04,232 - root - INFO - step: 17375 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9603 +[titan] 2025-10-05 09:14:04,232 - root - INFO - lr: 3.2500e-05 gnorm: 1.06 [10:39:54<13:53:15] +[titan] 2025-10-05 09:14:15,120 - root - INFO - step: 17380 loss: 2.2381 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 09:14:15,120 - root - INFO - lr: 3.2491e-05 gnorm: 1.09 [10:40:05<13:53:04] +[titan] 2025-10-05 09:14:26,052 - root - INFO - step: 17385 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 09:14:26,052 - root - INFO - lr: 3.2482e-05 gnorm: 1.07 [10:40:16<13:52:53] +[titan] 2025-10-05 09:14:36,924 - root - INFO - step: 17390 loss: 2.1808 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:14:36,924 - root - INFO - lr: 3.2473e-05 gnorm: 1.07 [10:40:27<13:52:41] +[titan] 2025-10-05 09:14:47,853 - root - INFO - step: 17395 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9403 +[titan] 2025-10-05 09:14:47,853 - root - INFO - lr: 3.2465e-05 gnorm: 1.04 [10:40:38<13:52:30] +[titan] 2025-10-05 09:14:56,560 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:14:58,759 - root - INFO - step: 17400 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 09:14:58,759 - root - INFO - lr: 3.2456e-05 gnorm: 1.05 [10:40:48<13:52:19] +[titan] 2025-10-05 09:15:09,749 - root - INFO - step: 17405 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 29,817 tflops: 413.66 mfu: 41.83% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 09:15:09,750 - root - INFO - lr: 3.2447e-05 gnorm: 1.05 [10:40:59<13:52:08] +[titan] 2025-10-05 09:15:16,479 - root - INFO - Dumping profiler traces at step 17408 +[titan] 2025-10-05 09:15:16,517 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:15:20,898 - root - INFO - step: 17410 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 29,392 tflops: 407.77 mfu: 41.23% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:15:20,899 - root - INFO - lr: 3.2439e-05 gnorm: 1.10 [10:41:11<13:51:57] +[titan] 2025-10-05 09:15:31,784 - root - INFO - step: 17415 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 09:15:31,784 - root - INFO - lr: 3.2430e-05 gnorm: 1.11 [10:41:21<13:51:46] +[titan] 2025-10-05 09:15:42,678 - root - INFO - step: 17420 loss: 2.1926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9383 +[titan] 2025-10-05 09:15:42,678 - root - INFO - lr: 3.2421e-05 gnorm: 1.05 [10:41:32<13:51:34] +[titan] 2025-10-05 09:15:53,585 - root - INFO - step: 17425 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 09:15:53,585 - root - INFO - lr: 3.2412e-05 gnorm: 1.05 [10:41:43<13:51:23] +[titan] 2025-10-05 09:16:04,476 - root - INFO - step: 17430 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9031 +[titan] 2025-10-05 09:16:04,476 - root - INFO - lr: 3.2404e-05 gnorm: 1.06 [10:41:54<13:51:12] +[titan] 2025-10-05 09:16:15,351 - root - INFO - step: 17435 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 09:16:15,351 - root - INFO - lr: 3.2395e-05 gnorm: 1.09 [10:42:05<13:51:01] +[titan] 2025-10-05 09:16:26,256 - root - INFO - step: 17440 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:16:26,257 - root - INFO - lr: 3.2386e-05 gnorm: 1.08 [10:42:16<13:50:49] +[titan] 2025-10-05 09:16:37,135 - root - INFO - step: 17445 loss: 2.1787 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9254 +[titan] 2025-10-05 09:16:37,135 - root - INFO - lr: 3.2378e-05 gnorm: 1.06 [10:42:27<13:50:38] +[titan] 2025-10-05 09:16:45,826 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:16:48,014 - root - INFO - step: 17450 loss: 2.1992 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9428 +[titan] 2025-10-05 09:16:48,014 - root - INFO - lr: 3.2369e-05 gnorm: 1.03 [10:42:38<13:50:27] +[titan] 2025-10-05 09:16:58,900 - root - INFO - step: 17455 loss: 2.2831 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:16:58,900 - root - INFO - lr: 3.2360e-05 gnorm: 1.09 [10:42:49<13:50:16] +[titan] 2025-10-05 09:17:09,817 - root - INFO - step: 17460 loss: 2.2252 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:17:09,817 - root - INFO - lr: 3.2351e-05 gnorm: 1.08 [10:43:00<13:50:04] +[titan] 2025-10-05 09:17:20,746 - root - INFO - step: 17465 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:17:20,746 - root - INFO - lr: 3.2343e-05 gnorm: 1.05 [10:43:10<13:49:53] +[titan] 2025-10-05 09:17:31,624 - root - INFO - step: 17470 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 09:17:31,624 - root - INFO - lr: 3.2334e-05 gnorm: 1.07 [10:43:21<13:49:42] +[titan] 2025-10-05 09:17:42,511 - root - INFO - step: 17475 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:17:42,511 - root - INFO - lr: 3.2325e-05 gnorm: 1.07 [10:43:32<13:49:31] +[titan] 2025-10-05 09:17:53,406 - root - INFO - step: 17480 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 09:17:53,407 - root - INFO - lr: 3.2317e-05 gnorm: 1.09 [10:43:43<13:49:19] +[titan] 2025-10-05 09:18:04,291 - root - INFO - step: 17485 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:18:04,292 - root - INFO - lr: 3.2308e-05 gnorm: 1.09 [10:43:54<13:49:08] +[titan] 2025-10-05 09:18:15,232 - root - INFO - step: 17490 loss: 2.1875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 09:18:15,232 - root - INFO - lr: 3.2299e-05 gnorm: 1.09 [10:44:05<13:48:57] +[titan] 2025-10-05 09:18:26,148 - root - INFO - step: 17495 loss: 2.1821 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9280 +[titan] 2025-10-05 09:18:26,148 - root - INFO - lr: 3.2290e-05 gnorm: 1.06 [10:44:16<13:48:46] +[titan] 2025-10-05 09:18:34,840 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:18:37,024 - root - INFO - step: 17500 loss: 2.2275 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9680 +[titan] 2025-10-05 09:18:37,024 - root - INFO - lr: 3.2282e-05 gnorm: 1.08 [10:44:27<13:48:34] +[titan] 2025-10-05 09:18:47,898 - root - INFO - step: 17505 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9235 +[titan] 2025-10-05 09:18:47,898 - root - INFO - lr: 3.2273e-05 gnorm: 1.10 [10:44:38<13:48:23] +[titan] 2025-10-05 09:18:58,787 - root - INFO - step: 17510 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 09:18:58,787 - root - INFO - lr: 3.2264e-05 gnorm: 1.07 [10:44:48<13:48:12] +[titan] 2025-10-05 09:19:09,664 - root - INFO - step: 17515 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9713 +[titan] 2025-10-05 09:19:09,664 - root - INFO - lr: 3.2256e-05 gnorm: 1.11 [10:44:59<13:48:01] +[titan] 2025-10-05 09:19:20,602 - root - INFO - step: 17520 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 09:19:20,603 - root - INFO - lr: 3.2247e-05 gnorm: 1.06 [10:45:10<13:47:50] +[titan] 2025-10-05 09:19:31,492 - root - INFO - step: 17525 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9837 +[titan] 2025-10-05 09:19:31,492 - root - INFO - lr: 3.2238e-05 gnorm: 1.06 [10:45:21<13:47:38] +[titan] 2025-10-05 09:19:42,388 - root - INFO - step: 17530 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:19:42,388 - root - INFO - lr: 3.2229e-05 gnorm: 1.04 [10:45:32<13:47:27] +[titan] 2025-10-05 09:19:53,275 - root - INFO - step: 17535 loss: 2.1899 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9365 +[titan] 2025-10-05 09:19:53,275 - root - INFO - lr: 3.2221e-05 gnorm: 1.11 [10:45:43<13:47:16] +[titan] 2025-10-05 09:20:04,158 - root - INFO - step: 17540 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:20:04,158 - root - INFO - lr: 3.2212e-05 gnorm: 1.08 [10:45:54<13:47:04] +[titan] 2025-10-05 09:20:15,047 - root - INFO - step: 17545 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 09:20:15,047 - root - INFO - lr: 3.2203e-05 gnorm: 1.14 [10:46:05<13:46:53] +[titan] 2025-10-05 09:20:23,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:20:25,948 - root - INFO - step: 17550 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:20:25,949 - root - INFO - lr: 3.2194e-05 gnorm: 1.06 [10:46:16<13:46:42] +[titan] 2025-10-05 09:20:36,875 - root - INFO - step: 17555 loss: 2.1706 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 09:20:36,875 - root - INFO - lr: 3.2186e-05 gnorm: 1.05 [10:46:27<13:46:31] +[titan] 2025-10-05 09:20:47,778 - root - INFO - step: 17560 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:20:47,778 - root - INFO - lr: 3.2177e-05 gnorm: 1.04 [10:46:37<13:46:20] +[titan] 2025-10-05 09:20:58,670 - root - INFO - step: 17565 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 09:20:58,670 - root - INFO - lr: 3.2168e-05 gnorm: 1.11 [10:46:48<13:46:08] +[titan] 2025-10-05 09:21:09,567 - root - INFO - step: 17570 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9227 +[titan] 2025-10-05 09:21:09,567 - root - INFO - lr: 3.2160e-05 gnorm: 1.03 [10:46:59<13:45:57] +[titan] 2025-10-05 09:21:20,447 - root - INFO - step: 17575 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:21:20,447 - root - INFO - lr: 3.2151e-05 gnorm: 1.06 [10:47:10<13:45:46] +[titan] 2025-10-05 09:21:31,358 - root - INFO - step: 17580 loss: 2.1219 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 09:21:31,358 - root - INFO - lr: 3.2142e-05 gnorm: 1.07 [10:47:21<13:45:35] +[titan] 2025-10-05 09:21:42,250 - root - INFO - step: 17585 loss: 2.2406 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 09:21:42,250 - root - INFO - lr: 3.2133e-05 gnorm: 1.10 [10:47:32<13:45:23] +[titan] 2025-10-05 09:21:53,130 - root - INFO - step: 17590 loss: 2.2175 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 09:21:53,130 - root - INFO - lr: 3.2125e-05 gnorm: 1.08 [10:47:43<13:45:12] +[titan] 2025-10-05 09:22:04,011 - root - INFO - step: 17595 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9755 +[titan] 2025-10-05 09:22:04,011 - root - INFO - lr: 3.2116e-05 gnorm: 1.05 [10:47:54<13:45:01] +[titan] 2025-10-05 09:22:12,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:22:14,893 - root - INFO - step: 17600 loss: 2.2663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0020 +[titan] 2025-10-05 09:22:14,894 - root - INFO - lr: 3.2107e-05 gnorm: 1.08 [10:48:05<13:44:50] +[titan] 2025-10-05 09:22:25,790 - root - INFO - step: 17605 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 09:22:25,790 - root - INFO - lr: 3.2098e-05 gnorm: 1.11 [10:48:15<13:44:38] +[titan] 2025-10-05 09:22:36,676 - root - INFO - step: 17610 loss: 2.2048 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9482 +[titan] 2025-10-05 09:22:36,676 - root - INFO - lr: 3.2090e-05 gnorm: 1.08 [10:48:26<13:44:27] +[titan] 2025-10-05 09:22:47,556 - root - INFO - step: 17615 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 09:22:47,556 - root - INFO - lr: 3.2081e-05 gnorm: 1.10 [10:48:37<13:44:16] +[titan] 2025-10-05 09:22:58,451 - root - INFO - step: 17620 loss: 2.1471 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:22:58,451 - root - INFO - lr: 3.2072e-05 gnorm: 1.10 [10:48:48<13:44:05] +[titan] 2025-10-05 09:23:09,330 - root - INFO - step: 17625 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 09:23:09,330 - root - INFO - lr: 3.2063e-05 gnorm: 1.04 [10:48:59<13:43:53] +[titan] 2025-10-05 09:23:20,210 - root - INFO - step: 17630 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9426 +[titan] 2025-10-05 09:23:20,210 - root - INFO - lr: 3.2055e-05 gnorm: 1.05 [10:49:10<13:43:42] +[titan] 2025-10-05 09:23:31,084 - root - INFO - step: 17635 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 09:23:31,084 - root - INFO - lr: 3.2046e-05 gnorm: 1.06 [10:49:21<13:43:31] +[titan] 2025-10-05 09:23:41,968 - root - INFO - step: 17640 loss: 2.2575 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 09:23:41,969 - root - INFO - lr: 3.2037e-05 gnorm: 1.12 [10:49:32<13:43:20] +[titan] 2025-10-05 09:23:52,856 - root - INFO - step: 17645 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9799 +[titan] 2025-10-05 09:23:52,856 - root - INFO - lr: 3.2029e-05 gnorm: 1.10 [10:49:43<13:43:08] +[titan] 2025-10-05 09:24:01,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:24:03,759 - root - INFO - step: 17650 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:24:03,760 - root - INFO - lr: 3.2020e-05 gnorm: 1.14 [10:49:53<13:42:57] +[titan] 2025-10-05 09:24:14,635 - root - INFO - step: 17655 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 09:24:14,636 - root - INFO - lr: 3.2011e-05 gnorm: 1.12 [10:50:04<13:42:46] +[titan] 2025-10-05 09:24:25,539 - root - INFO - step: 17660 loss: 2.1876 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9327 +[titan] 2025-10-05 09:24:25,539 - root - INFO - lr: 3.2002e-05 gnorm: 1.06 [10:50:15<13:42:35] +[titan] 2025-10-05 09:24:36,410 - root - INFO - step: 17665 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9523 +[titan] 2025-10-05 09:24:36,410 - root - INFO - lr: 3.1994e-05 gnorm: 1.09 [10:50:26<13:42:23] +[titan] 2025-10-05 09:24:47,292 - root - INFO - step: 17670 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9587 +[titan] 2025-10-05 09:24:47,292 - root - INFO - lr: 3.1985e-05 gnorm: 1.03 [10:50:37<13:42:12] +[titan] 2025-10-05 09:24:58,185 - root - INFO - step: 17675 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 09:24:58,185 - root - INFO - lr: 3.1976e-05 gnorm: 1.17 [10:50:48<13:42:01] +[titan] 2025-10-05 09:25:09,105 - root - INFO - step: 17680 loss: 2.2810 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 09:25:09,105 - root - INFO - lr: 3.1967e-05 gnorm: 1.11 [10:50:59<13:41:50] +[titan] 2025-10-05 09:25:19,979 - root - INFO - step: 17685 loss: 2.1693 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:25:19,980 - root - INFO - lr: 3.1959e-05 gnorm: 1.07 [10:51:10<13:41:38] +[titan] 2025-10-05 09:25:30,867 - root - INFO - step: 17690 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9351 +[titan] 2025-10-05 09:25:30,867 - root - INFO - lr: 3.1950e-05 gnorm: 1.10 [10:51:21<13:41:27] +[titan] 2025-10-05 09:25:41,737 - root - INFO - step: 17695 loss: 2.1997 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 09:25:41,737 - root - INFO - lr: 3.1941e-05 gnorm: 1.03 [10:51:31<13:41:16] +[titan] 2025-10-05 09:25:50,446 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:25:52,639 - root - INFO - step: 17700 loss: 2.1679 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 09:25:52,639 - root - INFO - lr: 3.1932e-05 gnorm: 1.08 [10:51:42<13:41:05] +[titan] 2025-10-05 09:26:03,528 - root - INFO - step: 17705 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9507 +[titan] 2025-10-05 09:26:03,528 - root - INFO - lr: 3.1924e-05 gnorm: 1.14 [10:51:53<13:40:53] +[titan] 2025-10-05 09:26:14,424 - root - INFO - step: 17710 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:26:14,424 - root - INFO - lr: 3.1915e-05 gnorm: 1.05 [10:52:04<13:40:42] +[titan] 2025-10-05 09:26:25,353 - root - INFO - step: 17715 loss: 2.1118 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 09:26:25,353 - root - INFO - lr: 3.1906e-05 gnorm: 1.09 [10:52:15<13:40:31] +[titan] 2025-10-05 09:26:36,227 - root - INFO - step: 17720 loss: 2.1460 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8958 +[titan] 2025-10-05 09:26:36,227 - root - INFO - lr: 3.1897e-05 gnorm: 1.09 [10:52:26<13:40:20] +[titan] 2025-10-05 09:26:47,086 - root - INFO - step: 17725 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:26:47,086 - root - INFO - lr: 3.1889e-05 gnorm: 1.04 [10:52:37<13:40:08] +[titan] 2025-10-05 09:26:57,951 - root - INFO - step: 17730 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 09:26:57,951 - root - INFO - lr: 3.1880e-05 gnorm: 1.13 [10:52:48<13:39:57] +[titan] 2025-10-05 09:27:08,802 - root - INFO - step: 17735 loss: 2.2199 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 09:27:08,802 - root - INFO - lr: 3.1871e-05 gnorm: 1.04 [10:52:58<13:39:46] +[titan] 2025-10-05 09:27:19,665 - root - INFO - step: 17740 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 09:27:19,665 - root - INFO - lr: 3.1862e-05 gnorm: 1.09 [10:53:09<13:39:35] +[titan] 2025-10-05 09:27:30,612 - root - INFO - step: 17745 loss: 2.1677 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9150 +[titan] 2025-10-05 09:27:30,612 - root - INFO - lr: 3.1854e-05 gnorm: 1.09 [10:53:20<13:39:23] +[titan] 2025-10-05 09:27:39,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:27:41,462 - root - INFO - step: 17750 loss: 2.1954 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 09:27:41,462 - root - INFO - lr: 3.1845e-05 gnorm: 1.09 [10:53:31<13:39:12] +[titan] 2025-10-05 09:27:52,328 - root - INFO - step: 17755 loss: 2.1602 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9091 +[titan] 2025-10-05 09:27:52,328 - root - INFO - lr: 3.1836e-05 gnorm: 1.04 [10:53:42<13:39:01] +[titan] 2025-10-05 09:28:03,186 - root - INFO - step: 17760 loss: 2.2440 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:28:03,186 - root - INFO - lr: 3.1827e-05 gnorm: 1.08 [10:53:53<13:38:50] +[titan] 2025-10-05 09:28:14,043 - root - INFO - step: 17765 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9224 +[titan] 2025-10-05 09:28:14,043 - root - INFO - lr: 3.1818e-05 gnorm: 1.07 [10:54:04<13:38:38] +[titan] 2025-10-05 09:28:24,918 - root - INFO - step: 17770 loss: 2.1581 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9064 +[titan] 2025-10-05 09:28:24,918 - root - INFO - lr: 3.1810e-05 gnorm: 1.06 [10:54:15<13:38:27] +[titan] 2025-10-05 09:28:35,788 - root - INFO - step: 17775 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9652 +[titan] 2025-10-05 09:28:35,788 - root - INFO - lr: 3.1801e-05 gnorm: 1.15 [10:54:25<13:38:16] +[titan] 2025-10-05 09:28:46,696 - root - INFO - step: 17780 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:28:46,696 - root - INFO - lr: 3.1792e-05 gnorm: 1.10 [10:54:36<13:38:05] +[titan] 2025-10-05 09:28:57,567 - root - INFO - step: 17785 loss: 2.1809 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9261 +[titan] 2025-10-05 09:28:57,567 - root - INFO - lr: 3.1783e-05 gnorm: 1.05 [10:54:47<13:37:53] +[titan] 2025-10-05 09:29:08,437 - root - INFO - step: 17790 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:29:08,437 - root - INFO - lr: 3.1775e-05 gnorm: 1.09 [10:54:58<13:37:42] +[titan] 2025-10-05 09:29:19,347 - root - INFO - step: 17795 loss: 2.1437 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 09:29:19,348 - root - INFO - lr: 3.1766e-05 gnorm: 1.05 [10:55:09<13:37:31] +[titan] 2025-10-05 09:29:28,050 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:29:30,284 - root - INFO - step: 17800 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 09:29:30,284 - root - INFO - lr: 3.1757e-05 gnorm: 1.07 [10:55:20<13:37:20] +[titan] 2025-10-05 09:29:41,184 - root - INFO - step: 17805 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9269 +[titan] 2025-10-05 09:29:41,184 - root - INFO - lr: 3.1748e-05 gnorm: 1.07 [10:55:31<13:37:08] +[titan] 2025-10-05 09:29:52,086 - root - INFO - step: 17810 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:29:52,086 - root - INFO - lr: 3.1740e-05 gnorm: 1.06 [10:55:42<13:36:57] +[titan] 2025-10-05 09:30:03,070 - root - INFO - step: 17815 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9441 +[titan] 2025-10-05 09:30:03,070 - root - INFO - lr: 3.1731e-05 gnorm: 1.04 [10:55:53<13:36:46] +[titan] 2025-10-05 09:30:13,933 - root - INFO - step: 17820 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 09:30:13,933 - root - INFO - lr: 3.1722e-05 gnorm: 1.09 [10:56:04<13:36:35] +[titan] 2025-10-05 09:30:24,824 - root - INFO - step: 17825 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:30:24,824 - root - INFO - lr: 3.1713e-05 gnorm: 1.05 [10:56:14<13:36:24] +[titan] 2025-10-05 09:30:35,792 - root - INFO - step: 17830 loss: 2.1738 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:30:35,792 - root - INFO - lr: 3.1705e-05 gnorm: 1.09 [10:56:25<13:36:12] +[titan] 2025-10-05 09:30:46,656 - root - INFO - step: 17835 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 09:30:46,656 - root - INFO - lr: 3.1696e-05 gnorm: 1.04 [10:56:36<13:36:01] +[titan] 2025-10-05 09:30:57,542 - root - INFO - step: 17840 loss: 2.1750 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 09:30:57,542 - root - INFO - lr: 3.1687e-05 gnorm: 1.05 [10:56:47<13:35:50] +[titan] 2025-10-05 09:31:08,403 - root - INFO - step: 17845 loss: 2.2534 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 09:31:08,403 - root - INFO - lr: 3.1678e-05 gnorm: 1.10 [10:56:58<13:35:39] +[titan] 2025-10-05 09:31:17,089 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:31:19,279 - root - INFO - step: 17850 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9894 +[titan] 2025-10-05 09:31:19,279 - root - INFO - lr: 3.1670e-05 gnorm: 1.07 [10:57:09<13:35:27] +[titan] 2025-10-05 09:31:30,156 - root - INFO - step: 17855 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9467 +[titan] 2025-10-05 09:31:30,156 - root - INFO - lr: 3.1661e-05 gnorm: 1.03 [10:57:20<13:35:16] +[titan] 2025-10-05 09:31:41,087 - root - INFO - step: 17860 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9406 +[titan] 2025-10-05 09:31:41,088 - root - INFO - lr: 3.1652e-05 gnorm: 1.07 [10:57:31<13:35:05] +[titan] 2025-10-05 09:31:51,960 - root - INFO - step: 17865 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:31:51,961 - root - INFO - lr: 3.1643e-05 gnorm: 1.08 [10:57:42<13:34:54] +[titan] 2025-10-05 09:32:02,826 - root - INFO - step: 17870 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 09:32:02,826 - root - INFO - lr: 3.1634e-05 gnorm: 1.08 [10:57:52<13:34:42] +[titan] 2025-10-05 09:32:13,722 - root - INFO - step: 17875 loss: 2.2074 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:32:13,722 - root - INFO - lr: 3.1626e-05 gnorm: 1.04 [10:58:03<13:34:31] +[titan] 2025-10-05 09:32:24,584 - root - INFO - step: 17880 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 09:32:24,584 - root - INFO - lr: 3.1617e-05 gnorm: 1.06 [10:58:14<13:34:20] +[titan] 2025-10-05 09:32:35,482 - root - INFO - step: 17885 loss: 2.2057 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 09:32:35,482 - root - INFO - lr: 3.1608e-05 gnorm: 1.05 [10:58:25<13:34:09] +[titan] 2025-10-05 09:32:46,344 - root - INFO - step: 17890 loss: 2.2259 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9659 +[titan] 2025-10-05 09:32:46,344 - root - INFO - lr: 3.1599e-05 gnorm: 1.05 [10:58:36<13:33:57] +[titan] 2025-10-05 09:32:57,245 - root - INFO - step: 17895 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8967 +[titan] 2025-10-05 09:32:57,245 - root - INFO - lr: 3.1591e-05 gnorm: 1.07 [10:58:47<13:33:46] +[titan] 2025-10-05 09:33:05,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:33:08,118 - root - INFO - step: 17900 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 09:33:08,118 - root - INFO - lr: 3.1582e-05 gnorm: 1.01 [10:58:58<13:33:35] +[titan] 2025-10-05 09:33:19,021 - root - INFO - step: 17905 loss: 2.1704 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 09:33:19,022 - root - INFO - lr: 3.1573e-05 gnorm: 1.15 [10:59:09<13:33:24] +[titan] 2025-10-05 09:33:29,882 - root - INFO - step: 17910 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 09:33:29,882 - root - INFO - lr: 3.1564e-05 gnorm: 1.05 [10:59:20<13:33:12] +[titan] 2025-10-05 09:33:40,805 - root - INFO - step: 17915 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 09:33:40,805 - root - INFO - lr: 3.1555e-05 gnorm: 1.08 [10:59:30<13:33:01] +[titan] 2025-10-05 09:33:51,745 - root - INFO - step: 17920 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 09:33:51,746 - root - INFO - lr: 3.1547e-05 gnorm: 1.12 [10:59:41<13:32:50] +[titan] 2025-10-05 09:33:51,928 - root - INFO - Dumping profiler traces at step 17920 +[titan] 2025-10-05 09:33:51,965 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:34:02,867 - root - INFO - step: 17925 loss: 2.1932 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 09:34:02,867 - root - INFO - lr: 3.1538e-05 gnorm: 1.06 [10:59:53<13:32:39] +[titan] 2025-10-05 09:34:13,739 - root - INFO - step: 17930 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:34:13,739 - root - INFO - lr: 3.1529e-05 gnorm: 1.12 [11:00:03<13:32:28] +[titan] 2025-10-05 09:34:24,600 - root - INFO - step: 17935 loss: 2.2250 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:34:24,600 - root - INFO - lr: 3.1520e-05 gnorm: 1.06 [11:00:14<13:32:17] +[titan] 2025-10-05 09:34:35,525 - root - INFO - step: 17940 loss: 2.1726 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 09:34:35,525 - root - INFO - lr: 3.1512e-05 gnorm: 1.05 [11:00:25<13:32:05] +[titan] 2025-10-05 09:34:46,392 - root - INFO - step: 17945 loss: 2.0902 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 09:34:46,392 - root - INFO - lr: 3.1503e-05 gnorm: 1.04 [11:00:36<13:31:54] +[titan] 2025-10-05 09:34:55,075 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:34:57,263 - root - INFO - step: 17950 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 09:34:57,264 - root - INFO - lr: 3.1494e-05 gnorm: 1.09 [11:00:47<13:31:43] +[titan] 2025-10-05 09:35:08,110 - root - INFO - step: 17955 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 09:35:08,110 - root - INFO - lr: 3.1485e-05 gnorm: 1.05 [11:00:58<13:31:32] +[titan] 2025-10-05 09:35:18,976 - root - INFO - step: 17960 loss: 2.2219 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:35:18,976 - root - INFO - lr: 3.1476e-05 gnorm: 1.08 [11:01:09<13:31:20] +[titan] 2025-10-05 09:35:29,838 - root - INFO - step: 17965 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9589 +[titan] 2025-10-05 09:35:29,838 - root - INFO - lr: 3.1468e-05 gnorm: 1.08 [11:01:19<13:31:09] +[titan] 2025-10-05 09:35:40,783 - root - INFO - step: 17970 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.35 mfu: 42.00% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9404 +[titan] 2025-10-05 09:35:40,783 - root - INFO - lr: 3.1459e-05 gnorm: 1.07 [11:01:30<13:30:58] +[titan] 2025-10-05 09:35:51,637 - root - INFO - step: 17975 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.0326 +[titan] 2025-10-05 09:35:51,637 - root - INFO - lr: 3.1450e-05 gnorm: 1.14 [11:01:41<13:30:47] +[titan] 2025-10-05 09:36:02,514 - root - INFO - step: 17980 loss: 2.1848 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 09:36:02,514 - root - INFO - lr: 3.1441e-05 gnorm: 1.06 [11:01:52<13:30:35] +[titan] 2025-10-05 09:36:13,381 - root - INFO - step: 17985 loss: 2.1655 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9132 +[titan] 2025-10-05 09:36:13,381 - root - INFO - lr: 3.1432e-05 gnorm: 1.07 [11:02:03<13:30:24] +[titan] 2025-10-05 09:36:24,242 - root - INFO - step: 17990 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 09:36:24,242 - root - INFO - lr: 3.1424e-05 gnorm: 1.05 [11:02:14<13:30:13] +[titan] 2025-10-05 09:36:35,118 - root - INFO - step: 17995 loss: 2.2044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9479 +[titan] 2025-10-05 09:36:35,118 - root - INFO - lr: 3.1415e-05 gnorm: 1.05 [11:02:25<13:30:02] +[titan] 2025-10-05 09:36:43,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:36:46,012 - root - INFO - step: 18000 loss: 2.1302 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 09:36:46,012 - root - INFO - lr: 3.1406e-05 gnorm: 1.05 [11:02:36<13:29:50] +[titan] 2025-10-05 09:36:56,870 - root - INFO - step: 18005 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9388 +[titan] 2025-10-05 09:36:56,871 - root - INFO - lr: 3.1397e-05 gnorm: 1.04 [11:02:47<13:29:39] +[titan] 2025-10-05 09:37:07,717 - root - INFO - step: 18010 loss: 2.2185 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9597 +[titan] 2025-10-05 09:37:07,718 - root - INFO - lr: 3.1389e-05 gnorm: 1.06 [11:02:57<13:29:28] +[titan] 2025-10-05 09:37:18,576 - root - INFO - step: 18015 loss: 2.2301 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:37:18,576 - root - INFO - lr: 3.1380e-05 gnorm: 1.13 [11:03:08<13:29:17] +[titan] 2025-10-05 09:37:29,423 - root - INFO - step: 18020 loss: 2.2014 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9439 +[titan] 2025-10-05 09:37:29,423 - root - INFO - lr: 3.1371e-05 gnorm: 1.04 [11:03:19<13:29:05] +[titan] 2025-10-05 09:37:40,354 - root - INFO - step: 18025 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:37:40,354 - root - INFO - lr: 3.1362e-05 gnorm: 1.05 [11:03:30<13:28:54] +[titan] 2025-10-05 09:37:51,204 - root - INFO - step: 18030 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9219 +[titan] 2025-10-05 09:37:51,204 - root - INFO - lr: 3.1353e-05 gnorm: 1.05 [11:03:41<13:28:43] +[titan] 2025-10-05 09:38:02,089 - root - INFO - step: 18035 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:38:02,089 - root - INFO - lr: 3.1345e-05 gnorm: 1.09 [11:03:52<13:28:32] +[titan] 2025-10-05 09:38:12,956 - root - INFO - step: 18040 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:38:12,956 - root - INFO - lr: 3.1336e-05 gnorm: 1.11 [11:04:03<13:28:20] +[titan] 2025-10-05 09:38:23,803 - root - INFO - step: 18045 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 09:38:23,803 - root - INFO - lr: 3.1327e-05 gnorm: 1.04 [11:04:13<13:28:09] +[titan] 2025-10-05 09:38:32,482 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:38:34,663 - root - INFO - step: 18050 loss: 2.1705 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:38:34,663 - root - INFO - lr: 3.1318e-05 gnorm: 1.05 [11:04:24<13:27:58] +[titan] 2025-10-05 09:38:45,582 - root - INFO - step: 18055 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 09:38:45,582 - root - INFO - lr: 3.1309e-05 gnorm: 1.10 [11:04:35<13:27:47] +[titan] 2025-10-05 09:38:56,461 - root - INFO - step: 18060 loss: 2.1737 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 09:38:56,462 - root - INFO - lr: 3.1301e-05 gnorm: 1.10 [11:04:46<13:27:35] +[titan] 2025-10-05 09:39:07,387 - root - INFO - step: 18065 loss: 2.2727 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0073 +[titan] 2025-10-05 09:39:07,388 - root - INFO - lr: 3.1292e-05 gnorm: 1.11 [11:04:57<13:27:24] +[titan] 2025-10-05 09:39:18,267 - root - INFO - step: 18070 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9196 +[titan] 2025-10-05 09:39:18,267 - root - INFO - lr: 3.1283e-05 gnorm: 1.03 [11:05:08<13:27:13] +[titan] 2025-10-05 09:39:29,177 - root - INFO - step: 18075 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 09:39:29,177 - root - INFO - lr: 3.1274e-05 gnorm: 1.09 [11:05:19<13:27:02] +[titan] 2025-10-05 09:39:40,351 - root - INFO - step: 18080 loss: 2.1525 memory: 118.84GiB(85.28%) tps: 29,326 tflops: 406.85 mfu: 41.14% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 09:39:40,352 - root - INFO - lr: 3.1265e-05 gnorm: 1.07 [11:05:30<13:26:51] +[titan] 2025-10-05 09:39:51,220 - root - INFO - step: 18085 loss: 2.1539 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 09:39:51,221 - root - INFO - lr: 3.1257e-05 gnorm: 1.06 [11:05:41<13:26:40] +[titan] 2025-10-05 09:40:02,072 - root - INFO - step: 18090 loss: 2.1462 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8962 +[titan] 2025-10-05 09:40:02,072 - root - INFO - lr: 3.1248e-05 gnorm: 1.09 [11:05:52<13:26:28] +[titan] 2025-10-05 09:40:12,918 - root - INFO - step: 18095 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:40:12,918 - root - INFO - lr: 3.1239e-05 gnorm: 1.10 [11:06:03<13:26:17] +[titan] 2025-10-05 09:40:21,622 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:40:23,799 - root - INFO - step: 18100 loss: 2.2201 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:40:23,799 - root - INFO - lr: 3.1230e-05 gnorm: 1.06 [11:06:13<13:26:06] +[titan] 2025-10-05 09:40:34,657 - root - INFO - step: 18105 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9333 +[titan] 2025-10-05 09:40:34,657 - root - INFO - lr: 3.1221e-05 gnorm: 1.02 [11:06:24<13:25:54] +[titan] 2025-10-05 09:40:45,595 - root - INFO - step: 18110 loss: 2.2690 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0044 +[titan] 2025-10-05 09:40:45,595 - root - INFO - lr: 3.1213e-05 gnorm: 1.11 [11:06:35<13:25:43] +[titan] 2025-10-05 09:40:56,456 - root - INFO - step: 18115 loss: 2.1375 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 09:40:56,457 - root - INFO - lr: 3.1204e-05 gnorm: 1.07 [11:06:46<13:25:32] +[titan] 2025-10-05 09:41:07,318 - root - INFO - step: 18120 loss: 2.2233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9634 +[titan] 2025-10-05 09:41:07,318 - root - INFO - lr: 3.1195e-05 gnorm: 1.08 [11:06:57<13:25:21] +[titan] 2025-10-05 09:41:18,178 - root - INFO - step: 18125 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9584 +[titan] 2025-10-05 09:41:18,178 - root - INFO - lr: 3.1186e-05 gnorm: 1.07 [11:07:08<13:25:10] +[titan] 2025-10-05 09:41:29,063 - root - INFO - step: 18130 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9142 +[titan] 2025-10-05 09:41:29,063 - root - INFO - lr: 3.1177e-05 gnorm: 1.07 [11:07:19<13:24:58] +[titan] 2025-10-05 09:41:39,951 - root - INFO - step: 18135 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9334 +[titan] 2025-10-05 09:41:39,951 - root - INFO - lr: 3.1169e-05 gnorm: 1.06 [11:07:30<13:24:47] +[titan] 2025-10-05 09:41:50,877 - root - INFO - step: 18140 loss: 2.2241 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 09:41:50,877 - root - INFO - lr: 3.1160e-05 gnorm: 1.13 [11:07:40<13:24:36] +[titan] 2025-10-05 09:42:01,736 - root - INFO - step: 18145 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 09:42:01,736 - root - INFO - lr: 3.1151e-05 gnorm: 1.11 [11:07:51<13:24:25] +[titan] 2025-10-05 09:42:10,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:42:12,607 - root - INFO - step: 18150 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8818 +[titan] 2025-10-05 09:42:12,608 - root - INFO - lr: 3.1142e-05 gnorm: 1.06 [11:08:02<13:24:13] +[titan] 2025-10-05 09:42:23,477 - root - INFO - step: 18155 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9019 +[titan] 2025-10-05 09:42:23,477 - root - INFO - lr: 3.1133e-05 gnorm: 1.02 [11:08:13<13:24:02] +[titan] 2025-10-05 09:42:34,354 - root - INFO - step: 18160 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:42:34,354 - root - INFO - lr: 3.1125e-05 gnorm: 1.06 [11:08:24<13:23:51] +[titan] 2025-10-05 09:42:45,261 - root - INFO - step: 18165 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 09:42:45,261 - root - INFO - lr: 3.1116e-05 gnorm: 1.07 [11:08:35<13:23:40] +[titan] 2025-10-05 09:42:56,138 - root - INFO - step: 18170 loss: 2.1630 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9103 +[titan] 2025-10-05 09:42:56,138 - root - INFO - lr: 3.1107e-05 gnorm: 1.06 [11:08:46<13:23:28] +[titan] 2025-10-05 09:43:06,985 - root - INFO - step: 18175 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 09:43:06,985 - root - INFO - lr: 3.1098e-05 gnorm: 1.05 [11:08:57<13:23:17] +[titan] 2025-10-05 09:43:17,840 - root - INFO - step: 18180 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 09:43:17,840 - root - INFO - lr: 3.1089e-05 gnorm: 1.06 [11:09:07<13:23:06] +[titan] 2025-10-05 09:43:28,708 - root - INFO - step: 18185 loss: 2.2232 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9641 +[titan] 2025-10-05 09:43:28,708 - root - INFO - lr: 3.1080e-05 gnorm: 1.07 [11:09:18<13:22:55] +[titan] 2025-10-05 09:43:39,585 - root - INFO - step: 18190 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:43:39,586 - root - INFO - lr: 3.1072e-05 gnorm: 1.04 [11:09:29<13:22:43] +[titan] 2025-10-05 09:43:50,582 - root - INFO - step: 18195 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.41 mfu: 41.80% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 09:43:50,582 - root - INFO - lr: 3.1063e-05 gnorm: 1.10 [11:09:40<13:22:32] +[titan] 2025-10-05 09:43:59,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:44:01,443 - root - INFO - step: 18200 loss: 2.1663 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9138 +[titan] 2025-10-05 09:44:01,443 - root - INFO - lr: 3.1054e-05 gnorm: 1.04 [11:09:51<13:22:21] +[titan] 2025-10-05 09:44:12,364 - root - INFO - step: 18205 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 09:44:12,364 - root - INFO - lr: 3.1045e-05 gnorm: 1.07 [11:10:02<13:22:10] +[titan] 2025-10-05 09:44:23,235 - root - INFO - step: 18210 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 09:44:23,235 - root - INFO - lr: 3.1036e-05 gnorm: 1.04 [11:10:13<13:21:59] +[titan] 2025-10-05 09:44:34,114 - root - INFO - step: 18215 loss: 2.1970 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 09:44:34,114 - root - INFO - lr: 3.1028e-05 gnorm: 1.08 [11:10:24<13:21:47] +[titan] 2025-10-05 09:44:45,034 - root - INFO - step: 18220 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:44:45,034 - root - INFO - lr: 3.1019e-05 gnorm: 1.02 [11:10:35<13:21:36] +[titan] 2025-10-05 09:44:55,926 - root - INFO - step: 18225 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 09:44:55,926 - root - INFO - lr: 3.1010e-05 gnorm: 1.06 [11:10:46<13:21:25] +[titan] 2025-10-05 09:45:06,773 - root - INFO - step: 18230 loss: 2.2584 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 09:45:06,773 - root - INFO - lr: 3.1001e-05 gnorm: 1.08 [11:10:56<13:21:14] +[titan] 2025-10-05 09:45:17,639 - root - INFO - step: 18235 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:45:17,639 - root - INFO - lr: 3.0992e-05 gnorm: 1.10 [11:11:07<13:21:02] +[titan] 2025-10-05 09:45:28,516 - root - INFO - step: 18240 loss: 2.1421 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 09:45:28,516 - root - INFO - lr: 3.0984e-05 gnorm: 1.05 [11:11:18<13:20:51] +[titan] 2025-10-05 09:45:39,379 - root - INFO - step: 18245 loss: 2.1122 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8654 +[titan] 2025-10-05 09:45:39,379 - root - INFO - lr: 3.0975e-05 gnorm: 1.06 [11:11:29<13:20:40] +[titan] 2025-10-05 09:45:48,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:45:50,324 - root - INFO - step: 18250 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9443 +[titan] 2025-10-05 09:45:50,324 - root - INFO - lr: 3.0966e-05 gnorm: 1.03 [11:11:40<13:20:29] +[titan] 2025-10-05 09:46:01,186 - root - INFO - step: 18255 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9065 +[titan] 2025-10-05 09:46:01,186 - root - INFO - lr: 3.0957e-05 gnorm: 1.04 [11:11:51<13:20:18] +[titan] 2025-10-05 09:46:12,076 - root - INFO - step: 18260 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:46:12,077 - root - INFO - lr: 3.0948e-05 gnorm: 1.05 [11:12:02<13:20:06] +[titan] 2025-10-05 09:46:22,981 - root - INFO - step: 18265 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 09:46:22,981 - root - INFO - lr: 3.0939e-05 gnorm: 1.12 [11:12:13<13:19:55] +[titan] 2025-10-05 09:46:33,839 - root - INFO - step: 18270 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:46:33,840 - root - INFO - lr: 3.0931e-05 gnorm: 1.05 [11:12:23<13:19:44] +[titan] 2025-10-05 09:46:44,752 - root - INFO - step: 18275 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 09:46:44,752 - root - INFO - lr: 3.0922e-05 gnorm: 1.10 [11:12:34<13:19:33] +[titan] 2025-10-05 09:46:55,639 - root - INFO - step: 18280 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 09:46:55,639 - root - INFO - lr: 3.0913e-05 gnorm: 1.08 [11:12:45<13:19:21] +[titan] 2025-10-05 09:47:06,489 - root - INFO - step: 18285 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 09:47:06,489 - root - INFO - lr: 3.0904e-05 gnorm: 1.06 [11:12:56<13:19:10] +[titan] 2025-10-05 09:47:17,381 - root - INFO - step: 18290 loss: 2.2060 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:47:17,381 - root - INFO - lr: 3.0895e-05 gnorm: 1.12 [11:13:07<13:18:59] +[titan] 2025-10-05 09:47:28,261 - root - INFO - step: 18295 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9274 +[titan] 2025-10-05 09:47:28,261 - root - INFO - lr: 3.0887e-05 gnorm: 1.07 [11:13:18<13:18:48] +[titan] 2025-10-05 09:47:36,941 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:47:39,132 - root - INFO - step: 18300 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 09:47:39,132 - root - INFO - lr: 3.0878e-05 gnorm: 1.07 [11:13:29<13:18:36] +[titan] 2025-10-05 09:47:50,070 - root - INFO - step: 18305 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:47:50,070 - root - INFO - lr: 3.0869e-05 gnorm: 1.04 [11:13:40<13:18:25] +[titan] 2025-10-05 09:48:00,953 - root - INFO - step: 18310 loss: 2.2365 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 09:48:00,953 - root - INFO - lr: 3.0860e-05 gnorm: 1.10 [11:13:51<13:18:14] +[titan] 2025-10-05 09:48:11,821 - root - INFO - step: 18315 loss: 2.1228 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 09:48:11,821 - root - INFO - lr: 3.0851e-05 gnorm: 1.10 [11:14:01<13:18:03] +[titan] 2025-10-05 09:48:22,702 - root - INFO - step: 18320 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:22,702 - root - INFO - lr: 3.0842e-05 gnorm: 1.10 [11:14:12<13:17:52] +[titan] 2025-10-05 09:48:33,562 - root - INFO - step: 18325 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 09:48:33,562 - root - INFO - lr: 3.0834e-05 gnorm: 1.06 [11:14:23<13:17:40] +[titan] 2025-10-05 09:48:44,442 - root - INFO - step: 18330 loss: 2.1384 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:48:44,442 - root - INFO - lr: 3.0825e-05 gnorm: 1.07 [11:14:34<13:17:29] +[titan] 2025-10-05 09:48:55,377 - root - INFO - step: 18335 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:55,377 - root - INFO - lr: 3.0816e-05 gnorm: 1.05 [11:14:45<13:17:18] +[titan] 2025-10-05 09:49:06,255 - root - INFO - step: 18340 loss: 2.1540 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:49:06,255 - root - INFO - lr: 3.0807e-05 gnorm: 1.02 [11:14:56<13:17:07] +[titan] 2025-10-05 09:49:17,139 - root - INFO - step: 18345 loss: 2.1319 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8845 +[titan] 2025-10-05 09:49:17,139 - root - INFO - lr: 3.0798e-05 gnorm: 1.06 [11:15:07<13:16:55] +[titan] 2025-10-05 09:49:25,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:49:28,007 - root - INFO - step: 18350 loss: 2.2255 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9664 +[titan] 2025-10-05 09:49:28,007 - root - INFO - lr: 3.0789e-05 gnorm: 1.07 [11:15:18<13:16:44] +[titan] 2025-10-05 09:49:38,916 - root - INFO - step: 18355 loss: 2.1700 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9168 +[titan] 2025-10-05 09:49:38,916 - root - INFO - lr: 3.0781e-05 gnorm: 1.03 [11:15:29<13:16:33] +[titan] 2025-10-05 09:49:49,833 - root - INFO - step: 18360 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9456 +[titan] 2025-10-05 09:49:49,834 - root - INFO - lr: 3.0772e-05 gnorm: 1.12 [11:15:39<13:16:22] +[titan] 2025-10-05 09:50:00,730 - root - INFO - step: 18365 loss: 2.2105 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 09:50:00,730 - root - INFO - lr: 3.0763e-05 gnorm: 1.05 [11:15:50<13:16:11] +[titan] 2025-10-05 09:50:11,596 - root - INFO - step: 18370 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9432 +[titan] 2025-10-05 09:50:11,596 - root - INFO - lr: 3.0754e-05 gnorm: 1.09 [11:16:01<13:15:59] +[titan] 2025-10-05 09:50:22,481 - root - INFO - step: 18375 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8909 +[titan] 2025-10-05 09:50:22,481 - root - INFO - lr: 3.0745e-05 gnorm: 1.07 [11:16:12<13:15:48] +[titan] 2025-10-05 09:50:33,379 - root - INFO - step: 18380 loss: 2.1743 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 09:50:33,379 - root - INFO - lr: 3.0736e-05 gnorm: 1.05 [11:16:23<13:15:37] +[titan] 2025-10-05 09:50:44,278 - root - INFO - step: 18385 loss: 2.2455 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9831 +[titan] 2025-10-05 09:50:44,278 - root - INFO - lr: 3.0728e-05 gnorm: 1.08 [11:16:34<13:15:26] +[titan] 2025-10-05 09:50:55,194 - root - INFO - step: 18390 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 09:50:55,194 - root - INFO - lr: 3.0719e-05 gnorm: 1.04 [11:16:45<13:15:15] +[titan] 2025-10-05 09:51:06,079 - root - INFO - step: 18395 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9155 +[titan] 2025-10-05 09:51:06,080 - root - INFO - lr: 3.0710e-05 gnorm: 1.07 [11:16:56<13:15:03] +[titan] 2025-10-05 09:51:14,780 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:51:16,960 - root - INFO - step: 18400 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:51:16,961 - root - INFO - lr: 3.0701e-05 gnorm: 1.03 [11:17:07<13:14:52] +[titan] 2025-10-05 09:51:27,817 - root - INFO - step: 18405 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 09:51:27,817 - root - INFO - lr: 3.0692e-05 gnorm: 1.07 [11:17:17<13:14:41] +[titan] 2025-10-05 09:51:38,681 - root - INFO - step: 18410 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 09:51:38,682 - root - INFO - lr: 3.0683e-05 gnorm: 1.06 [11:17:28<13:14:30] +[titan] 2025-10-05 09:51:49,581 - root - INFO - step: 18415 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8667 +[titan] 2025-10-05 09:51:49,582 - root - INFO - lr: 3.0675e-05 gnorm: 1.10 [11:17:39<13:14:18] +[titan] 2025-10-05 09:52:00,490 - root - INFO - step: 18420 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:52:00,490 - root - INFO - lr: 3.0666e-05 gnorm: 1.06 [11:17:50<13:14:07] +[titan] 2025-10-05 09:52:11,358 - root - INFO - step: 18425 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9183 +[titan] 2025-10-05 09:52:11,358 - root - INFO - lr: 3.0657e-05 gnorm: 1.09 [11:18:01<13:13:56] +[titan] 2025-10-05 09:52:22,299 - root - INFO - step: 18430 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 09:52:22,299 - root - INFO - lr: 3.0648e-05 gnorm: 1.10 [11:18:12<13:13:45] +[titan] 2025-10-05 09:52:26,818 - root - INFO - Dumping profiler traces at step 18432 +[titan] 2025-10-05 09:52:26,856 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:52:33,371 - root - INFO - step: 18435 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 29,597 tflops: 410.61 mfu: 41.52% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 09:52:33,371 - root - INFO - lr: 3.0639e-05 gnorm: 1.09 [11:18:23<13:13:34] +[titan] 2025-10-05 09:52:44,226 - root - INFO - step: 18440 loss: 2.1224 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8757 +[titan] 2025-10-05 09:52:44,226 - root - INFO - lr: 3.0630e-05 gnorm: 1.05 [11:18:34<13:13:23] +[titan] 2025-10-05 09:52:55,086 - root - INFO - step: 18445 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:52:55,086 - root - INFO - lr: 3.0622e-05 gnorm: 1.16 [11:18:45<13:13:11] +[titan] 2025-10-05 09:53:03,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:53:05,965 - root - INFO - step: 18450 loss: 2.1736 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:53:05,965 - root - INFO - lr: 3.0613e-05 gnorm: 1.04 [11:18:56<13:13:00] +[titan] 2025-10-05 09:53:16,799 - root - INFO - step: 18455 loss: 2.2016 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9454 +[titan] 2025-10-05 09:53:16,799 - root - INFO - lr: 3.0604e-05 gnorm: 1.08 [11:19:06<13:12:49] +[titan] 2025-10-05 09:53:27,669 - root - INFO - step: 18460 loss: 2.1859 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9308 +[titan] 2025-10-05 09:53:27,669 - root - INFO - lr: 3.0595e-05 gnorm: 1.09 [11:19:17<13:12:38] +[titan] 2025-10-05 09:53:38,507 - root - INFO - step: 18465 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:53:38,508 - root - INFO - lr: 3.0586e-05 gnorm: 1.11 [11:19:28<13:12:26] +[titan] 2025-10-05 09:53:49,352 - root - INFO - step: 18470 loss: 2.2070 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:53:49,352 - root - INFO - lr: 3.0577e-05 gnorm: 1.15 [11:19:39<13:12:15] +[titan] 2025-10-05 09:54:00,241 - root - INFO - step: 18475 loss: 2.2443 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:54:00,242 - root - INFO - lr: 3.0569e-05 gnorm: 1.07 [11:19:50<13:12:04] +[titan] 2025-10-05 09:54:11,123 - root - INFO - step: 18480 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 09:54:11,123 - root - INFO - lr: 3.0560e-05 gnorm: 1.02 [11:20:01<13:11:53] +[titan] 2025-10-05 09:54:21,973 - root - INFO - step: 18485 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:54:21,973 - root - INFO - lr: 3.0551e-05 gnorm: 3.61 [11:20:12<13:11:41] +[titan] 2025-10-05 09:54:32,841 - root - INFO - step: 18490 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:54:32,841 - root - INFO - lr: 3.0542e-05 gnorm: 1.08 [11:20:22<13:11:30] +[titan] 2025-10-05 09:54:43,710 - root - INFO - step: 18495 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:54:43,710 - root - INFO - lr: 3.0533e-05 gnorm: 1.03 [11:20:33<13:11:19] +[titan] 2025-10-05 09:54:52,411 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:54:54,598 - root - INFO - step: 18500 loss: 2.1801 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:54:54,598 - root - INFO - lr: 3.0524e-05 gnorm: 1.07 [11:20:44<13:11:08] +[titan] 2025-10-05 09:55:05,447 - root - INFO - step: 18505 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 09:55:05,447 - root - INFO - lr: 3.0515e-05 gnorm: 1.05 [11:20:55<13:10:56] +[titan] 2025-10-05 09:55:16,304 - root - INFO - step: 18510 loss: 2.2328 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9729 +[titan] 2025-10-05 09:55:16,304 - root - INFO - lr: 3.0507e-05 gnorm: 1.12 [11:21:06<13:10:45] +[titan] 2025-10-05 09:55:27,210 - root - INFO - step: 18515 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 09:55:27,211 - root - INFO - lr: 3.0498e-05 gnorm: 1.04 [11:21:17<13:10:34] +[titan] 2025-10-05 09:55:38,070 - root - INFO - step: 18520 loss: 2.1990 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:55:38,071 - root - INFO - lr: 3.0489e-05 gnorm: 1.06 [11:21:28<13:10:23] +[titan] 2025-10-05 09:55:48,946 - root - INFO - step: 18525 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8803 +[titan] 2025-10-05 09:55:48,946 - root - INFO - lr: 3.0480e-05 gnorm: 1.10 [11:21:39<13:10:11] +[titan] 2025-10-05 09:55:59,819 - root - INFO - step: 18530 loss: 2.1569 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:55:59,819 - root - INFO - lr: 3.0471e-05 gnorm: 1.09 [11:21:49<13:10:00] +[titan] 2025-10-05 09:56:10,667 - root - INFO - step: 18535 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 09:56:10,668 - root - INFO - lr: 3.0462e-05 gnorm: 1.08 [11:22:00<13:09:49] +[titan] 2025-10-05 09:56:21,514 - root - INFO - step: 18540 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8789 +[titan] 2025-10-05 09:56:21,515 - root - INFO - lr: 3.0454e-05 gnorm: 1.06 [11:22:11<13:09:38] +[titan] 2025-10-05 09:56:32,416 - root - INFO - step: 18545 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 09:56:32,416 - root - INFO - lr: 3.0445e-05 gnorm: 1.09 [11:22:22<13:09:27] +[titan] 2025-10-05 09:56:41,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:56:43,301 - root - INFO - step: 18550 loss: 2.2123 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9545 +[titan] 2025-10-05 09:56:43,301 - root - INFO - lr: 3.0436e-05 gnorm: 1.11 [11:22:33<13:09:15] +[titan] 2025-10-05 09:56:54,209 - root - INFO - step: 18555 loss: 2.1250 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8774 +[titan] 2025-10-05 09:56:54,209 - root - INFO - lr: 3.0427e-05 gnorm: 1.05 [11:22:44<13:09:04] +[titan] 2025-10-05 09:57:05,059 - root - INFO - step: 18560 loss: 2.1067 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 09:57:05,060 - root - INFO - lr: 3.0418e-05 gnorm: 1.11 [11:22:55<13:08:53] +[titan] 2025-10-05 09:57:15,909 - root - INFO - step: 18565 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8970 +[titan] 2025-10-05 09:57:15,909 - root - INFO - lr: 3.0409e-05 gnorm: 1.06 [11:23:05<13:08:42] +[titan] 2025-10-05 09:57:26,796 - root - INFO - step: 18570 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:57:26,796 - root - INFO - lr: 3.0400e-05 gnorm: 1.05 [11:23:16<13:08:30] +[titan] 2025-10-05 09:57:37,659 - root - INFO - step: 18575 loss: 2.1669 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:57:37,659 - root - INFO - lr: 3.0392e-05 gnorm: 1.07 [11:23:27<13:08:19] +[titan] 2025-10-05 09:57:48,558 - root - INFO - step: 18580 loss: 2.1694 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:57:48,559 - root - INFO - lr: 3.0383e-05 gnorm: 1.08 [11:23:38<13:08:08] +[titan] 2025-10-05 09:57:59,464 - root - INFO - step: 18585 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 09:57:59,464 - root - INFO - lr: 3.0374e-05 gnorm: 1.08 [11:23:49<13:07:57] +[titan] 2025-10-05 09:58:10,319 - root - INFO - step: 18590 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 09:58:10,319 - root - INFO - lr: 3.0365e-05 gnorm: 1.09 [11:24:00<13:07:45] +[titan] 2025-10-05 09:58:21,169 - root - INFO - step: 18595 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 09:58:21,169 - root - INFO - lr: 3.0356e-05 gnorm: 1.06 [11:24:11<13:07:34] +[titan] 2025-10-05 09:58:29,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:58:32,051 - root - INFO - step: 18600 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:58:32,052 - root - INFO - lr: 3.0347e-05 gnorm: 1.06 [11:24:22<13:07:23] +[titan] 2025-10-05 09:58:42,941 - root - INFO - step: 18605 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 09:58:42,941 - root - INFO - lr: 3.0339e-05 gnorm: 1.07 [11:24:33<13:07:12] +[titan] 2025-10-05 09:58:53,849 - root - INFO - step: 18610 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 09:58:53,849 - root - INFO - lr: 3.0330e-05 gnorm: 1.13 [11:24:43<13:07:01] +[titan] 2025-10-05 09:59:04,767 - root - INFO - step: 18615 loss: 2.1618 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9098 +[titan] 2025-10-05 09:59:04,767 - root - INFO - lr: 3.0321e-05 gnorm: 1.07 [11:24:54<13:06:49] +[titan] 2025-10-05 09:59:15,655 - root - INFO - step: 18620 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:59:15,655 - root - INFO - lr: 3.0312e-05 gnorm: 1.08 [11:25:05<13:06:38] +[titan] 2025-10-05 09:59:26,502 - root - INFO - step: 18625 loss: 2.1982 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 09:59:26,502 - root - INFO - lr: 3.0303e-05 gnorm: 1.06 [11:25:16<13:06:27] +[titan] 2025-10-05 09:59:37,342 - root - INFO - step: 18630 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:59:37,342 - root - INFO - lr: 3.0294e-05 gnorm: 1.02 [11:25:27<13:06:16] +[titan] 2025-10-05 09:59:48,204 - root - INFO - step: 18635 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:59:48,205 - root - INFO - lr: 3.0285e-05 gnorm: 1.06 [11:25:38<13:06:04] +[titan] 2025-10-05 09:59:59,141 - root - INFO - step: 18640 loss: 2.1586 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:59:59,142 - root - INFO - lr: 3.0277e-05 gnorm: 1.09 [11:25:49<13:05:53] +[titan] 2025-10-05 10:00:09,999 - root - INFO - step: 18645 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:00:09,999 - root - INFO - lr: 3.0268e-05 gnorm: 1.04 [11:26:00<13:05:42] +[titan] 2025-10-05 10:00:18,687 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:00:20,868 - root - INFO - step: 18650 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 10:00:20,868 - root - INFO - lr: 3.0259e-05 gnorm: 1.06 [11:26:10<13:05:31] +[titan] 2025-10-05 10:00:31,751 - root - INFO - step: 18655 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8955 +[titan] 2025-10-05 10:00:31,751 - root - INFO - lr: 3.0250e-05 gnorm: 1.10 [11:26:21<13:05:20] +[titan] 2025-10-05 10:00:42,624 - root - INFO - step: 18660 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9236 +[titan] 2025-10-05 10:00:42,624 - root - INFO - lr: 3.0241e-05 gnorm: 1.06 [11:26:32<13:05:08] +[titan] 2025-10-05 10:00:53,508 - root - INFO - step: 18665 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 10:00:53,508 - root - INFO - lr: 3.0232e-05 gnorm: 1.07 [11:26:43<13:04:57] +[titan] 2025-10-05 10:01:04,417 - root - INFO - step: 18670 loss: 2.1073 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8611 +[titan] 2025-10-05 10:01:04,417 - root - INFO - lr: 3.0223e-05 gnorm: 1.05 [11:26:54<13:04:46] +[titan] 2025-10-05 10:01:15,322 - root - INFO - step: 18675 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9018 +[titan] 2025-10-05 10:01:15,322 - root - INFO - lr: 3.0215e-05 gnorm: 1.01 [11:27:05<13:04:35] +[titan] 2025-10-05 10:01:26,187 - root - INFO - step: 18680 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 10:01:26,188 - root - INFO - lr: 3.0206e-05 gnorm: 1.06 [11:27:16<13:04:24] +[titan] 2025-10-05 10:01:37,075 - root - INFO - step: 18685 loss: 2.2297 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9695 +[titan] 2025-10-05 10:01:37,076 - root - INFO - lr: 3.0197e-05 gnorm: 1.09 [11:27:27<13:04:12] +[titan] 2025-10-05 10:01:47,947 - root - INFO - step: 18690 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 10:01:47,947 - root - INFO - lr: 3.0188e-05 gnorm: 1.13 [11:27:38<13:04:01] +[titan] 2025-10-05 10:01:58,853 - root - INFO - step: 18695 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 10:01:58,853 - root - INFO - lr: 3.0179e-05 gnorm: 1.11 [11:27:48<13:03:50] +[titan] 2025-10-05 10:02:07,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:02:09,720 - root - INFO - step: 18700 loss: 2.1760 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9218 +[titan] 2025-10-05 10:02:09,720 - root - INFO - lr: 3.0170e-05 gnorm: 1.05 [11:27:59<13:03:39] +[titan] 2025-10-05 10:02:20,646 - root - INFO - step: 18705 loss: 2.1878 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:02:20,646 - root - INFO - lr: 3.0161e-05 gnorm: 1.13 [11:28:10<13:03:28] +[titan] 2025-10-05 10:02:31,519 - root - INFO - step: 18710 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9250 +[titan] 2025-10-05 10:02:31,519 - root - INFO - lr: 3.0153e-05 gnorm: 1.03 [11:28:21<13:03:16] +[titan] 2025-10-05 10:02:42,408 - root - INFO - step: 18715 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 10:02:42,408 - root - INFO - lr: 3.0144e-05 gnorm: 1.14 [11:28:32<13:03:05] +[titan] 2025-10-05 10:02:53,291 - root - INFO - step: 18720 loss: 2.1198 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 10:02:53,292 - root - INFO - lr: 3.0135e-05 gnorm: 1.04 [11:28:43<13:02:54] +[titan] 2025-10-05 10:03:04,164 - root - INFO - step: 18725 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 10:03:04,164 - root - INFO - lr: 3.0126e-05 gnorm: 1.09 [11:28:54<13:02:43] +[titan] 2025-10-05 10:03:15,026 - root - INFO - step: 18730 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 10:03:15,026 - root - INFO - lr: 3.0117e-05 gnorm: 1.09 [11:29:05<13:02:31] +[titan] 2025-10-05 10:03:25,889 - root - INFO - step: 18735 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:03:25,889 - root - INFO - lr: 3.0108e-05 gnorm: 1.09 [11:29:15<13:02:20] +[titan] 2025-10-05 10:03:36,788 - root - INFO - step: 18740 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 10:03:36,789 - root - INFO - lr: 3.0099e-05 gnorm: 1.05 [11:29:26<13:02:09] +[titan] 2025-10-05 10:03:47,682 - root - INFO - step: 18745 loss: 2.1174 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:03:47,683 - root - INFO - lr: 3.0090e-05 gnorm: 1.02 [11:29:37<13:01:58] +[titan] 2025-10-05 10:03:56,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:03:58,557 - root - INFO - step: 18750 loss: 2.1769 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 10:03:58,557 - root - INFO - lr: 3.0082e-05 gnorm: 1.06 [11:29:48<13:01:47] +[titan] 2025-10-05 10:04:09,464 - root - INFO - step: 18755 loss: 2.1852 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9314 +[titan] 2025-10-05 10:04:09,464 - root - INFO - lr: 3.0073e-05 gnorm: 1.08 [11:29:59<13:01:35] +[titan] 2025-10-05 10:04:20,357 - root - INFO - step: 18760 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:04:20,357 - root - INFO - lr: 3.0064e-05 gnorm: 1.07 [11:30:10<13:01:24] +[titan] 2025-10-05 10:04:31,231 - root - INFO - step: 18765 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 10:04:31,232 - root - INFO - lr: 3.0055e-05 gnorm: 1.11 [11:30:21<13:01:13] +[titan] 2025-10-05 10:04:42,135 - root - INFO - step: 18770 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 10:04:42,136 - root - INFO - lr: 3.0046e-05 gnorm: 1.10 [11:30:32<13:01:02] +[titan] 2025-10-05 10:04:53,006 - root - INFO - step: 18775 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 10:04:53,006 - root - INFO - lr: 3.0037e-05 gnorm: 1.03 [11:30:43<13:00:51] +[titan] 2025-10-05 10:05:03,904 - root - INFO - step: 18780 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 10:05:03,904 - root - INFO - lr: 3.0028e-05 gnorm: 1.07 [11:30:53<13:00:39] +[titan] 2025-10-05 10:05:14,747 - root - INFO - step: 18785 loss: 2.1812 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 10:05:14,748 - root - INFO - lr: 3.0020e-05 gnorm: 1.09 [11:31:04<13:00:28] +[titan] 2025-10-05 10:05:25,598 - root - INFO - step: 18790 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 10:05:25,598 - root - INFO - lr: 3.0011e-05 gnorm: 1.06 [11:31:15<13:00:17] +[titan] 2025-10-05 10:05:36,466 - root - INFO - step: 18795 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 10:05:36,466 - root - INFO - lr: 3.0002e-05 gnorm: 1.04 [11:31:26<13:00:06] +[titan] 2025-10-05 10:05:45,130 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:05:47,339 - root - INFO - step: 18800 loss: 2.2290 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 10:05:47,340 - root - INFO - lr: 2.9993e-05 gnorm: 1.07 [11:31:37<12:59:54] +[titan] 2025-10-05 10:05:58,210 - root - INFO - step: 18805 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 10:05:58,210 - root - INFO - lr: 2.9984e-05 gnorm: 1.07 [11:31:48<12:59:43] +[titan] 2025-10-05 10:06:09,120 - root - INFO - step: 18810 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 10:06:09,121 - root - INFO - lr: 2.9975e-05 gnorm: 1.04 [11:31:59<12:59:32] +[titan] 2025-10-05 10:06:19,986 - root - INFO - step: 18815 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 10:06:19,986 - root - INFO - lr: 2.9966e-05 gnorm: 1.08 [11:32:10<12:59:21] +[titan] 2025-10-05 10:06:30,847 - root - INFO - step: 18820 loss: 2.1851 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 10:06:30,847 - root - INFO - lr: 2.9957e-05 gnorm: 1.05 [11:32:20<12:59:10] +[titan] 2025-10-05 10:06:41,727 - root - INFO - step: 18825 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:06:41,727 - root - INFO - lr: 2.9949e-05 gnorm: 1.07 [11:32:31<12:58:58] +[titan] 2025-10-05 10:06:52,600 - root - INFO - step: 18830 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 10:06:52,601 - root - INFO - lr: 2.9940e-05 gnorm: 1.10 [11:32:42<12:58:47] +[titan] 2025-10-05 10:07:03,527 - root - INFO - step: 18835 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9505 +[titan] 2025-10-05 10:07:03,527 - root - INFO - lr: 2.9931e-05 gnorm: 1.07 [11:32:53<12:58:36] +[titan] 2025-10-05 10:07:14,367 - root - INFO - step: 18840 loss: 2.2003 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 10:07:14,367 - root - INFO - lr: 2.9922e-05 gnorm: 1.08 [11:33:04<12:58:25] +[titan] 2025-10-05 10:07:25,248 - root - INFO - step: 18845 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 10:07:25,248 - root - INFO - lr: 2.9913e-05 gnorm: 1.08 [11:33:15<12:58:14] +[titan] 2025-10-05 10:07:33,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:07:36,110 - root - INFO - step: 18850 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 10:07:36,110 - root - INFO - lr: 2.9904e-05 gnorm: 1.09 [11:33:26<12:58:02] +[titan] 2025-10-05 10:07:46,979 - root - INFO - step: 18855 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:07:46,979 - root - INFO - lr: 2.9895e-05 gnorm: 1.07 [11:33:37<12:57:51] +[titan] 2025-10-05 10:07:57,853 - root - INFO - step: 18860 loss: 2.1443 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:07:57,853 - root - INFO - lr: 2.9886e-05 gnorm: 1.06 [11:33:47<12:57:40] +[titan] 2025-10-05 10:08:08,767 - root - INFO - step: 18865 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 10:08:08,767 - root - INFO - lr: 2.9878e-05 gnorm: 1.05 [11:33:58<12:57:29] +[titan] 2025-10-05 10:08:19,643 - root - INFO - step: 18870 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:08:19,643 - root - INFO - lr: 2.9869e-05 gnorm: 1.09 [11:34:09<12:57:17] +[titan] 2025-10-05 10:08:30,505 - root - INFO - step: 18875 loss: 2.1432 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:08:30,505 - root - INFO - lr: 2.9860e-05 gnorm: 1.11 [11:34:20<12:57:06] +[titan] 2025-10-05 10:08:41,373 - root - INFO - step: 18880 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8639 +[titan] 2025-10-05 10:08:41,373 - root - INFO - lr: 2.9851e-05 gnorm: 1.04 [11:34:31<12:56:55] +[titan] 2025-10-05 10:08:52,227 - root - INFO - step: 18885 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:08:52,227 - root - INFO - lr: 2.9842e-05 gnorm: 1.09 [11:34:42<12:56:44] +[titan] 2025-10-05 10:09:03,099 - root - INFO - step: 18890 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 10:09:03,099 - root - INFO - lr: 2.9833e-05 gnorm: 1.06 [11:34:53<12:56:33] +[titan] 2025-10-05 10:09:13,982 - root - INFO - step: 18895 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 10:09:13,982 - root - INFO - lr: 2.9824e-05 gnorm: 1.06 [11:35:04<12:56:21] +[titan] 2025-10-05 10:09:22,708 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:09:24,889 - root - INFO - step: 18900 loss: 2.2596 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 10:09:24,889 - root - INFO - lr: 2.9815e-05 gnorm: 1.09 [11:35:14<12:56:10] +[titan] 2025-10-05 10:09:35,723 - root - INFO - step: 18905 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9049 +[titan] 2025-10-05 10:09:35,723 - root - INFO - lr: 2.9807e-05 gnorm: 2.16 [11:35:25<12:55:59] +[titan] 2025-10-05 10:09:46,616 - root - INFO - step: 18910 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 10:09:46,616 - root - INFO - lr: 2.9798e-05 gnorm: 1.10 [11:35:36<12:55:48] +[titan] 2025-10-05 10:09:57,505 - root - INFO - step: 18915 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8990 +[titan] 2025-10-05 10:09:57,506 - root - INFO - lr: 2.9789e-05 gnorm: 1.06 [11:35:47<12:55:36] +[titan] 2025-10-05 10:10:08,408 - root - INFO - step: 18920 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 10:10:08,409 - root - INFO - lr: 2.9780e-05 gnorm: 1.11 [11:35:58<12:55:25] +[titan] 2025-10-05 10:10:19,290 - root - INFO - step: 18925 loss: 2.1401 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8910 +[titan] 2025-10-05 10:10:19,290 - root - INFO - lr: 2.9771e-05 gnorm: 1.09 [11:36:09<12:55:14] +[titan] 2025-10-05 10:10:30,188 - root - INFO - step: 18930 loss: 2.1578 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9075 +[titan] 2025-10-05 10:10:30,188 - root - INFO - lr: 2.9762e-05 gnorm: 1.08 [11:36:20<12:55:03] +[titan] 2025-10-05 10:10:41,057 - root - INFO - step: 18935 loss: 2.1455 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:10:41,057 - root - INFO - lr: 2.9753e-05 gnorm: 1.08 [11:36:31<12:54:52] +[titan] 2025-10-05 10:10:51,943 - root - INFO - step: 18940 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 10:10:51,943 - root - INFO - lr: 2.9744e-05 gnorm: 1.09 [11:36:41<12:54:40] +[titan] 2025-10-05 10:11:00,935 - root - INFO - Dumping profiler traces at step 18944 +[titan] 2025-10-05 10:11:00,975 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:11:03,165 - root - INFO - step: 18945 loss: 2.2146 memory: 118.84GiB(85.28%) tps: 29,200 tflops: 405.11 mfu: 40.96% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 10:11:03,166 - root - INFO - lr: 2.9736e-05 gnorm: 1.05 [11:36:53<12:54:30] +[titan] 2025-10-05 10:11:11,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:11:14,040 - root - INFO - step: 18950 loss: 2.2217 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9615 +[titan] 2025-10-05 10:11:14,040 - root - INFO - lr: 2.9727e-05 gnorm: 1.10 [11:37:04<12:54:18] +[titan] 2025-10-05 10:11:24,927 - root - INFO - step: 18955 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 10:11:24,927 - root - INFO - lr: 2.9718e-05 gnorm: 1.04 [11:37:14<12:54:07] +[titan] 2025-10-05 10:11:35,836 - root - INFO - step: 18960 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:11:35,836 - root - INFO - lr: 2.9709e-05 gnorm: 1.08 [11:37:25<12:53:56] +[titan] 2025-10-05 10:11:46,725 - root - INFO - step: 18965 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9561 +[titan] 2025-10-05 10:11:46,725 - root - INFO - lr: 2.9700e-05 gnorm: 1.06 [11:37:36<12:53:45] +[titan] 2025-10-05 10:11:57,607 - root - INFO - step: 18970 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 10:11:57,608 - root - INFO - lr: 2.9691e-05 gnorm: 1.06 [11:37:47<12:53:34] +[titan] 2025-10-05 10:12:08,492 - root - INFO - step: 18975 loss: 2.1885 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9336 +[titan] 2025-10-05 10:12:08,492 - root - INFO - lr: 2.9682e-05 gnorm: 1.05 [11:37:58<12:53:22] +[titan] 2025-10-05 10:12:19,363 - root - INFO - step: 18980 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 10:12:19,364 - root - INFO - lr: 2.9673e-05 gnorm: 1.05 [11:38:09<12:53:11] +[titan] 2025-10-05 10:12:30,251 - root - INFO - step: 18985 loss: 2.2178 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 10:12:30,252 - root - INFO - lr: 2.9664e-05 gnorm: 1.08 [11:38:20<12:53:00] +[titan] 2025-10-05 10:12:41,145 - root - INFO - step: 18990 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 10:12:41,146 - root - INFO - lr: 2.9656e-05 gnorm: 1.04 [11:38:31<12:52:49] +[titan] 2025-10-05 10:12:52,037 - root - INFO - step: 18995 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:12:52,037 - root - INFO - lr: 2.9647e-05 gnorm: 1.06 [11:38:42<12:52:38] +[titan] 2025-10-05 10:13:00,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:13:02,914 - root - INFO - step: 19000 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9377 +[titan] 2025-10-05 10:13:02,914 - root - INFO - lr: 2.9638e-05 gnorm: 1.06 [11:38:52<12:52:26] +[titan] 2025-10-05 10:13:13,797 - root - INFO - step: 19005 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 10:13:13,797 - root - INFO - lr: 2.9629e-05 gnorm: 1.09 [11:39:03<12:52:15] +[titan] 2025-10-05 10:13:24,684 - root - INFO - step: 19010 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 10:13:24,684 - root - INFO - lr: 2.9620e-05 gnorm: 1.04 [11:39:14<12:52:04] +[titan] 2025-10-05 10:13:35,565 - root - INFO - step: 19015 loss: 2.1615 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 10:13:35,565 - root - INFO - lr: 2.9611e-05 gnorm: 1.06 [11:39:25<12:51:53] +[titan] 2025-10-05 10:13:46,458 - root - INFO - step: 19020 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:13:46,458 - root - INFO - lr: 2.9602e-05 gnorm: 1.09 [11:39:36<12:51:42] +[titan] 2025-10-05 10:13:57,365 - root - INFO - step: 19025 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9259 +[titan] 2025-10-05 10:13:57,365 - root - INFO - lr: 2.9593e-05 gnorm: 1.11 [11:39:47<12:51:30] +[titan] 2025-10-05 10:14:08,243 - root - INFO - step: 19030 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 10:14:08,244 - root - INFO - lr: 2.9585e-05 gnorm: 1.11 [11:39:58<12:51:19] +[titan] 2025-10-05 10:14:19,180 - root - INFO - step: 19035 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.68 mfu: 42.03% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 10:14:19,180 - root - INFO - lr: 2.9576e-05 gnorm: 1.08 [11:40:09<12:51:08] +[titan] 2025-10-05 10:14:30,057 - root - INFO - step: 19040 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 10:14:30,058 - root - INFO - lr: 2.9567e-05 gnorm: 1.08 [11:40:20<12:50:57] +[titan] 2025-10-05 10:14:40,934 - root - INFO - step: 19045 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:14:40,934 - root - INFO - lr: 2.9558e-05 gnorm: 1.05 [11:40:30<12:50:46] +[titan] 2025-10-05 10:14:49,618 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:14:51,803 - root - INFO - step: 19050 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 10:14:51,803 - root - INFO - lr: 2.9549e-05 gnorm: 1.10 [11:40:41<12:50:34] +[titan] 2025-10-05 10:15:02,687 - root - INFO - step: 19055 loss: 2.1320 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:15:02,687 - root - INFO - lr: 2.9540e-05 gnorm: 1.03 [11:40:52<12:50:23] +[titan] 2025-10-05 10:15:13,599 - root - INFO - step: 19060 loss: 2.1731 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:15:13,600 - root - INFO - lr: 2.9531e-05 gnorm: 1.05 [11:41:03<12:50:12] +[titan] 2025-10-05 10:15:24,470 - root - INFO - step: 19065 loss: 2.0790 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 10:15:24,470 - root - INFO - lr: 2.9522e-05 gnorm: 1.02 [11:41:14<12:50:01] +[titan] 2025-10-05 10:15:35,340 - root - INFO - step: 19070 loss: 2.1215 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 10:15:35,340 - root - INFO - lr: 2.9513e-05 gnorm: 1.03 [11:41:25<12:49:50] +[titan] 2025-10-05 10:15:46,220 - root - INFO - step: 19075 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 10:15:46,220 - root - INFO - lr: 2.9505e-05 gnorm: 1.03 [11:41:36<12:49:38] +[titan] 2025-10-05 10:15:57,087 - root - INFO - step: 19080 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 10:15:57,087 - root - INFO - lr: 2.9496e-05 gnorm: 1.08 [11:41:47<12:49:27] +[titan] 2025-10-05 10:16:07,949 - root - INFO - step: 19085 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:16:07,949 - root - INFO - lr: 2.9487e-05 gnorm: 1.03 [11:41:57<12:49:16] +[titan] 2025-10-05 10:16:18,866 - root - INFO - step: 19090 loss: 2.1027 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:16:18,866 - root - INFO - lr: 2.9478e-05 gnorm: 1.05 [11:42:08<12:49:05] +[titan] 2025-10-05 10:16:29,722 - root - INFO - step: 19095 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 10:16:29,722 - root - INFO - lr: 2.9469e-05 gnorm: 1.07 [11:42:19<12:48:54] +[titan] 2025-10-05 10:16:38,404 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:16:40,594 - root - INFO - step: 19100 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 10:16:40,594 - root - INFO - lr: 2.9460e-05 gnorm: 1.12 [11:42:30<12:48:42] +[titan] 2025-10-05 10:16:51,467 - root - INFO - step: 19105 loss: 2.1659 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 10:16:51,467 - root - INFO - lr: 2.9451e-05 gnorm: 1.07 [11:42:41<12:48:31] +[titan] 2025-10-05 10:17:02,333 - root - INFO - step: 19110 loss: 2.1571 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:17:02,333 - root - INFO - lr: 2.9442e-05 gnorm: 1.07 [11:42:52<12:48:20] +[titan] 2025-10-05 10:17:13,254 - root - INFO - step: 19115 loss: 2.1907 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9353 +[titan] 2025-10-05 10:17:13,254 - root - INFO - lr: 2.9433e-05 gnorm: 1.05 [11:43:03<12:48:09] +[titan] 2025-10-05 10:17:24,144 - root - INFO - step: 19120 loss: 2.2215 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 10:17:24,145 - root - INFO - lr: 2.9424e-05 gnorm: 1.08 [11:43:14<12:47:58] +[titan] 2025-10-05 10:17:34,985 - root - INFO - step: 19125 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 10:17:34,985 - root - INFO - lr: 2.9416e-05 gnorm: 1.07 [11:43:25<12:47:46] +[titan] 2025-10-05 10:17:45,834 - root - INFO - step: 19130 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 10:17:45,834 - root - INFO - lr: 2.9407e-05 gnorm: 1.04 [11:43:35<12:47:35] +[titan] 2025-10-05 10:17:56,697 - root - INFO - step: 19135 loss: 2.1835 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 10:17:56,697 - root - INFO - lr: 2.9398e-05 gnorm: 1.04 [11:43:46<12:47:24] +[titan] 2025-10-05 10:18:07,545 - root - INFO - step: 19140 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 10:18:07,545 - root - INFO - lr: 2.9389e-05 gnorm: 1.06 [11:43:57<12:47:13] +[titan] 2025-10-05 10:18:18,458 - root - INFO - step: 19145 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9199 +[titan] 2025-10-05 10:18:18,458 - root - INFO - lr: 2.9380e-05 gnorm: 1.08 [11:44:08<12:47:02] +[titan] 2025-10-05 10:18:27,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:18:29,337 - root - INFO - step: 19150 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8515 +[titan] 2025-10-05 10:18:29,338 - root - INFO - lr: 2.9371e-05 gnorm: 1.34 [11:44:19<12:46:50] +[titan] 2025-10-05 10:18:40,256 - root - INFO - step: 19155 loss: 2.1332 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8849 +[titan] 2025-10-05 10:18:40,256 - root - INFO - lr: 2.9362e-05 gnorm: 1.09 [11:44:30<12:46:39] +[titan] 2025-10-05 10:18:51,145 - root - INFO - step: 19160 loss: 2.1481 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 10:18:51,145 - root - INFO - lr: 2.9353e-05 gnorm: 1.07 [11:44:41<12:46:28] +[titan] 2025-10-05 10:19:02,037 - root - INFO - step: 19165 loss: 2.1516 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 10:19:02,037 - root - INFO - lr: 2.9344e-05 gnorm: 1.05 [11:44:52<12:46:17] +[titan] 2025-10-05 10:19:12,937 - root - INFO - step: 19170 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9133 +[titan] 2025-10-05 10:19:12,937 - root - INFO - lr: 2.9336e-05 gnorm: 1.08 [11:45:02<12:46:06] +[titan] 2025-10-05 10:19:23,875 - root - INFO - step: 19175 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 10:19:23,875 - root - INFO - lr: 2.9327e-05 gnorm: 1.04 [11:45:13<12:45:54] +[titan] 2025-10-05 10:19:34,776 - root - INFO - step: 19180 loss: 2.1428 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 10:19:34,776 - root - INFO - lr: 2.9318e-05 gnorm: 1.07 [11:45:24<12:45:43] +[titan] 2025-10-05 10:19:45,696 - root - INFO - step: 19185 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 10:19:45,696 - root - INFO - lr: 2.9309e-05 gnorm: 1.05 [11:45:35<12:45:32] +[titan] 2025-10-05 10:19:56,559 - root - INFO - step: 19190 loss: 2.2063 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 10:19:56,559 - root - INFO - lr: 2.9300e-05 gnorm: 1.05 [11:45:46<12:45:21] +[titan] 2025-10-05 10:20:07,440 - root - INFO - step: 19195 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 10:20:07,440 - root - INFO - lr: 2.9291e-05 gnorm: 1.06 [11:45:57<12:45:10] +[titan] 2025-10-05 10:20:16,203 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:20:18,388 - root - INFO - step: 19200 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:20:18,389 - root - INFO - lr: 2.9282e-05 gnorm: 1.10 [11:46:08<12:44:59] +[titan] 2025-10-05 10:20:29,261 - root - INFO - step: 19205 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9722 +[titan] 2025-10-05 10:20:29,261 - root - INFO - lr: 2.9273e-05 gnorm: 1.05 [11:46:19<12:44:47] +[titan] 2025-10-05 10:20:40,136 - root - INFO - step: 19210 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 10:20:40,136 - root - INFO - lr: 2.9264e-05 gnorm: 1.05 [11:46:30<12:44:36] +[titan] 2025-10-05 10:20:51,016 - root - INFO - step: 19215 loss: 2.1099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8637 +[titan] 2025-10-05 10:20:51,016 - root - INFO - lr: 2.9255e-05 gnorm: 1.02 [11:46:41<12:44:25] +[titan] 2025-10-05 10:21:01,917 - root - INFO - step: 19220 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 10:21:01,918 - root - INFO - lr: 2.9247e-05 gnorm: 1.06 [11:46:51<12:44:14] +[titan] 2025-10-05 10:21:12,779 - root - INFO - step: 19225 loss: 2.1977 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9427 +[titan] 2025-10-05 10:21:12,780 - root - INFO - lr: 2.9238e-05 gnorm: 1.08 [11:47:02<12:44:03] +[titan] 2025-10-05 10:21:23,714 - root - INFO - step: 19230 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 10:21:23,714 - root - INFO - lr: 2.9229e-05 gnorm: 1.09 [11:47:13<12:43:51] +[titan] 2025-10-05 10:21:34,597 - root - INFO - step: 19235 loss: 2.1070 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 10:21:34,597 - root - INFO - lr: 2.9220e-05 gnorm: 1.08 [11:47:24<12:43:40] +[titan] 2025-10-05 10:21:45,492 - root - INFO - step: 19240 loss: 2.0962 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 10:21:45,492 - root - INFO - lr: 2.9211e-05 gnorm: 1.04 [11:47:35<12:43:29] +[titan] 2025-10-05 10:21:56,367 - root - INFO - step: 19245 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 10:21:56,367 - root - INFO - lr: 2.9202e-05 gnorm: 1.08 [11:47:46<12:43:18] +[titan] 2025-10-05 10:22:05,105 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:22:07,295 - root - INFO - step: 19250 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:22:07,296 - root - INFO - lr: 2.9193e-05 gnorm: 1.04 [11:47:57<12:43:07] +[titan] 2025-10-05 10:22:18,238 - root - INFO - step: 19255 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 10:22:18,238 - root - INFO - lr: 2.9184e-05 gnorm: 1.06 [11:48:08<12:42:56] +[titan] 2025-10-05 10:22:29,120 - root - INFO - step: 19260 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 10:22:29,120 - root - INFO - lr: 2.9175e-05 gnorm: 1.10 [11:48:19<12:42:44] +[titan] 2025-10-05 10:22:40,008 - root - INFO - step: 19265 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:22:40,008 - root - INFO - lr: 2.9167e-05 gnorm: 1.08 [11:48:30<12:42:33] +[titan] 2025-10-05 10:22:50,875 - root - INFO - step: 19270 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 10:22:50,875 - root - INFO - lr: 2.9158e-05 gnorm: 1.07 [11:48:40<12:42:22] +[titan] 2025-10-05 10:23:01,737 - root - INFO - step: 19275 loss: 2.1975 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:23:01,737 - root - INFO - lr: 2.9149e-05 gnorm: 1.08 [11:48:51<12:42:11] +[titan] 2025-10-05 10:23:12,658 - root - INFO - step: 19280 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 10:23:12,658 - root - INFO - lr: 2.9140e-05 gnorm: 1.06 [11:49:02<12:42:00] +[titan] 2025-10-05 10:23:23,594 - root - INFO - step: 19285 loss: 2.1554 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9047 +[titan] 2025-10-05 10:23:23,595 - root - INFO - lr: 2.9131e-05 gnorm: 1.11 [11:49:13<12:41:48] +[titan] 2025-10-05 10:23:34,471 - root - INFO - step: 19290 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9525 +[titan] 2025-10-05 10:23:34,471 - root - INFO - lr: 2.9122e-05 gnorm: 1.08 [11:49:24<12:41:37] +[titan] 2025-10-05 10:23:45,370 - root - INFO - step: 19295 loss: 2.2145 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 10:23:45,370 - root - INFO - lr: 2.9113e-05 gnorm: 1.11 [11:49:35<12:41:26] +[titan] 2025-10-05 10:23:54,147 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:23:56,333 - root - INFO - step: 19300 loss: 2.1524 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 10:23:56,333 - root - INFO - lr: 2.9104e-05 gnorm: 1.12 [11:49:46<12:41:15] +[titan] 2025-10-05 10:24:07,214 - root - INFO - step: 19305 loss: 2.1152 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 10:24:07,214 - root - INFO - lr: 2.9095e-05 gnorm: 1.06 [11:49:57<12:41:04] +[titan] 2025-10-05 10:24:18,150 - root - INFO - step: 19310 loss: 2.1360 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8875 +[titan] 2025-10-05 10:24:18,150 - root - INFO - lr: 2.9086e-05 gnorm: 1.11 [11:50:08<12:40:53] +[titan] 2025-10-05 10:24:29,081 - root - INFO - step: 19315 loss: 2.1682 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9158 +[titan] 2025-10-05 10:24:29,081 - root - INFO - lr: 2.9077e-05 gnorm: 1.08 [11:50:19<12:40:42] +[titan] 2025-10-05 10:24:39,944 - root - INFO - step: 19320 loss: 2.1420 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 10:24:39,945 - root - INFO - lr: 2.9069e-05 gnorm: 1.05 [11:50:29<12:40:30] +[titan] 2025-10-05 10:24:50,812 - root - INFO - step: 19325 loss: 2.1255 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 10:24:50,812 - root - INFO - lr: 2.9060e-05 gnorm: 1.04 [11:50:40<12:40:19] +[titan] 2025-10-05 10:25:01,681 - root - INFO - step: 19330 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 10:25:01,681 - root - INFO - lr: 2.9051e-05 gnorm: 1.04 [11:50:51<12:40:08] +[titan] 2025-10-05 10:25:12,540 - root - INFO - step: 19335 loss: 2.1642 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 10:25:12,540 - root - INFO - lr: 2.9042e-05 gnorm: 1.05 [11:51:02<12:39:57] +[titan] 2025-10-05 10:25:23,440 - root - INFO - step: 19340 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 10:25:23,441 - root - INFO - lr: 2.9033e-05 gnorm: 1.08 [11:51:13<12:39:46] +[titan] 2025-10-05 10:25:34,323 - root - INFO - step: 19345 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 10:25:34,323 - root - INFO - lr: 2.9024e-05 gnorm: 1.06 [11:51:24<12:39:34] +[titan] 2025-10-05 10:25:42,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:25:45,172 - root - INFO - step: 19350 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 10:25:45,172 - root - INFO - lr: 2.9015e-05 gnorm: 1.06 [11:51:35<12:39:23] +[titan] 2025-10-05 10:25:56,041 - root - INFO - step: 19355 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 10:25:56,041 - root - INFO - lr: 2.9006e-05 gnorm: 1.05 [11:51:46<12:39:12] +[titan] 2025-10-05 10:26:06,901 - root - INFO - step: 19360 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 10:26:06,901 - root - INFO - lr: 2.8997e-05 gnorm: 1.09 [11:51:56<12:39:01] +[titan] 2025-10-05 10:26:17,768 - root - INFO - step: 19365 loss: 2.2565 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 10:26:17,768 - root - INFO - lr: 2.8988e-05 gnorm: 1.06 [11:52:07<12:38:49] +[titan] 2025-10-05 10:26:28,693 - root - INFO - step: 19370 loss: 2.1913 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 10:26:28,693 - root - INFO - lr: 2.8980e-05 gnorm: 1.07 [11:52:18<12:38:38] +[titan] 2025-10-05 10:26:39,550 - root - INFO - step: 19375 loss: 2.2098 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:26:39,550 - root - INFO - lr: 2.8971e-05 gnorm: 1.10 [11:52:29<12:38:27] +[titan] 2025-10-05 10:26:50,433 - root - INFO - step: 19380 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 10:26:50,433 - root - INFO - lr: 2.8962e-05 gnorm: 1.07 [11:52:40<12:38:16] +[titan] 2025-10-05 10:27:01,279 - root - INFO - step: 19385 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 10:27:01,279 - root - INFO - lr: 2.8953e-05 gnorm: 1.04 [11:52:51<12:38:05] +[titan] 2025-10-05 10:27:12,140 - root - INFO - step: 19390 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 10:27:12,141 - root - INFO - lr: 2.8944e-05 gnorm: 1.10 [11:53:02<12:37:53] +[titan] 2025-10-05 10:27:23,043 - root - INFO - step: 19395 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:27:23,043 - root - INFO - lr: 2.8935e-05 gnorm: 1.07 [11:53:13<12:37:42] +[titan] 2025-10-05 10:27:31,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:27:33,895 - root - INFO - step: 19400 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:27:33,895 - root - INFO - lr: 2.8926e-05 gnorm: 1.06 [11:53:23<12:37:31] +[titan] 2025-10-05 10:27:44,768 - root - INFO - step: 19405 loss: 2.0933 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 10:27:44,768 - root - INFO - lr: 2.8917e-05 gnorm: 1.05 [11:53:34<12:37:20] +[titan] 2025-10-05 10:27:55,669 - root - INFO - step: 19410 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 10:27:55,669 - root - INFO - lr: 2.8908e-05 gnorm: 1.05 [11:53:45<12:37:09] +[titan] 2025-10-05 10:28:06,538 - root - INFO - step: 19415 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 10:28:06,538 - root - INFO - lr: 2.8899e-05 gnorm: 1.07 [11:53:56<12:36:57] +[titan] 2025-10-05 10:28:17,407 - root - INFO - step: 19420 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:28:17,407 - root - INFO - lr: 2.8890e-05 gnorm: 1.09 [11:54:07<12:36:46] +[titan] 2025-10-05 10:28:28,336 - root - INFO - step: 19425 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9504 +[titan] 2025-10-05 10:28:28,336 - root - INFO - lr: 2.8882e-05 gnorm: 1.11 [11:54:18<12:36:35] +[titan] 2025-10-05 10:28:39,218 - root - INFO - step: 19430 loss: 2.1045 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8601 +[titan] 2025-10-05 10:28:39,218 - root - INFO - lr: 2.8873e-05 gnorm: 1.08 [11:54:29<12:36:24] +[titan] 2025-10-05 10:28:50,126 - root - INFO - step: 19435 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9318 +[titan] 2025-10-05 10:28:50,127 - root - INFO - lr: 2.8864e-05 gnorm: 1.07 [11:54:40<12:36:13] +[titan] 2025-10-05 10:29:01,033 - root - INFO - step: 19440 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9099 +[titan] 2025-10-05 10:29:01,033 - root - INFO - lr: 2.8855e-05 gnorm: 1.04 [11:54:51<12:36:02] +[titan] 2025-10-05 10:29:11,909 - root - INFO - step: 19445 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 10:29:11,909 - root - INFO - lr: 2.8846e-05 gnorm: 1.06 [11:55:01<12:35:50] +[titan] 2025-10-05 10:29:20,622 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:29:22,834 - root - INFO - step: 19450 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8914 +[titan] 2025-10-05 10:29:22,834 - root - INFO - lr: 2.8837e-05 gnorm: 1.04 [11:55:12<12:35:39] +[titan] 2025-10-05 10:29:33,814 - root - INFO - step: 19455 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 29,845 tflops: 414.05 mfu: 41.87% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:29:33,814 - root - INFO - lr: 2.8828e-05 gnorm: 1.06 [11:55:23<12:35:28] +[titan] 2025-10-05 10:29:36,170 - root - INFO - Dumping profiler traces at step 19456 +[titan] 2025-10-05 10:29:36,209 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:29:44,902 - root - INFO - step: 19460 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 29,553 tflops: 410.00 mfu: 41.46% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:29:44,902 - root - INFO - lr: 2.8819e-05 gnorm: 1.04 [11:55:34<12:35:17] +[titan] 2025-10-05 10:29:55,774 - root - INFO - step: 19465 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:29:55,774 - root - INFO - lr: 2.8810e-05 gnorm: 1.05 [11:55:45<12:35:06] +[titan] 2025-10-05 10:30:06,632 - root - INFO - step: 19470 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8999 +[titan] 2025-10-05 10:30:06,632 - root - INFO - lr: 2.8801e-05 gnorm: 1.06 [11:55:56<12:34:55] +[titan] 2025-10-05 10:30:17,527 - root - INFO - step: 19475 loss: 2.0697 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8296 +[titan] 2025-10-05 10:30:17,527 - root - INFO - lr: 2.8792e-05 gnorm: 1.03 [11:56:07<12:34:44] +[titan] 2025-10-05 10:30:28,427 - root - INFO - step: 19480 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 10:30:28,427 - root - INFO - lr: 2.8784e-05 gnorm: 1.06 [11:56:18<12:34:32] +[titan] 2025-10-05 10:30:39,303 - root - INFO - step: 19485 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 10:30:39,303 - root - INFO - lr: 2.8775e-05 gnorm: 1.05 [11:56:29<12:34:21] +[titan] 2025-10-05 10:30:50,176 - root - INFO - step: 19490 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:30:50,176 - root - INFO - lr: 2.8766e-05 gnorm: 1.09 [11:56:40<12:34:10] +[titan] 2025-10-05 10:31:01,024 - root - INFO - step: 19495 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:31:01,024 - root - INFO - lr: 2.8757e-05 gnorm: 1.06 [11:56:51<12:33:59] +[titan] 2025-10-05 10:31:09,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:31:11,884 - root - INFO - step: 19500 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9032 +[titan] 2025-10-05 10:31:11,884 - root - INFO - lr: 2.8748e-05 gnorm: 1.04 [11:57:01<12:33:48] +[titan] 2025-10-05 10:31:22,776 - root - INFO - step: 19505 loss: 2.1755 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 10:31:22,776 - root - INFO - lr: 2.8739e-05 gnorm: 1.05 [11:57:12<12:33:36] +[titan] 2025-10-05 10:31:33,671 - root - INFO - step: 19510 loss: 2.1889 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:31:33,672 - root - INFO - lr: 2.8730e-05 gnorm: 1.09 [11:57:23<12:33:25] +[titan] 2025-10-05 10:31:44,519 - root - INFO - step: 19515 loss: 2.1331 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8848 +[titan] 2025-10-05 10:31:44,519 - root - INFO - lr: 2.8721e-05 gnorm: 1.09 [11:57:34<12:33:14] +[titan] 2025-10-05 10:31:55,368 - root - INFO - step: 19520 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:31:55,368 - root - INFO - lr: 2.8712e-05 gnorm: 1.09 [11:57:45<12:33:03] +[titan] 2025-10-05 10:32:06,223 - root - INFO - step: 19525 loss: 2.1590 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 10:32:06,223 - root - INFO - lr: 2.8703e-05 gnorm: 1.04 [11:57:56<12:32:52] +[titan] 2025-10-05 10:32:17,080 - root - INFO - step: 19530 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:32:17,081 - root - INFO - lr: 2.8694e-05 gnorm: 1.03 [11:58:07<12:32:40] +[titan] 2025-10-05 10:32:27,960 - root - INFO - step: 19535 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 10:32:27,960 - root - INFO - lr: 2.8686e-05 gnorm: 1.12 [11:58:17<12:32:29] +[titan] 2025-10-05 10:32:38,837 - root - INFO - step: 19540 loss: 2.1660 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 10:32:38,837 - root - INFO - lr: 2.8677e-05 gnorm: 1.05 [11:58:28<12:32:18] +[titan] 2025-10-05 10:32:49,713 - root - INFO - step: 19545 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 10:32:49,713 - root - INFO - lr: 2.8668e-05 gnorm: 1.05 [11:58:39<12:32:07] +[titan] 2025-10-05 10:32:58,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:33:00,607 - root - INFO - step: 19550 loss: 2.1396 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8912 +[titan] 2025-10-05 10:33:00,607 - root - INFO - lr: 2.8659e-05 gnorm: 1.07 [11:58:50<12:31:56] +[titan] 2025-10-05 10:33:11,463 - root - INFO - step: 19555 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8942 +[titan] 2025-10-05 10:33:11,463 - root - INFO - lr: 2.8650e-05 gnorm: 1.05 [11:59:01<12:31:44] +[titan] 2025-10-05 10:33:22,332 - root - INFO - step: 19560 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9357 +[titan] 2025-10-05 10:33:22,332 - root - INFO - lr: 2.8641e-05 gnorm: 1.08 [11:59:12<12:31:33] +[titan] 2025-10-05 10:33:33,247 - root - INFO - step: 19565 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 10:33:33,247 - root - INFO - lr: 2.8632e-05 gnorm: 1.12 [11:59:23<12:31:22] +[titan] 2025-10-05 10:33:44,148 - root - INFO - step: 19570 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:33:44,148 - root - INFO - lr: 2.8623e-05 gnorm: 1.14 [11:59:34<12:31:11] +[titan] 2025-10-05 10:33:55,019 - root - INFO - step: 19575 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 10:33:55,019 - root - INFO - lr: 2.8614e-05 gnorm: 1.09 [11:59:45<12:31:00] +[titan] 2025-10-05 10:34:05,890 - root - INFO - step: 19580 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8641 +[titan] 2025-10-05 10:34:05,890 - root - INFO - lr: 2.8605e-05 gnorm: 1.05 [11:59:55<12:30:49] +[titan] 2025-10-05 10:34:16,751 - root - INFO - step: 19585 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 10:34:16,752 - root - INFO - lr: 2.8596e-05 gnorm: 1.07 [12:00:06<12:30:37] +[titan] 2025-10-05 10:34:27,618 - root - INFO - step: 19590 loss: 2.1741 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 10:34:27,619 - root - INFO - lr: 2.8588e-05 gnorm: 1.06 [12:00:17<12:30:26] +[titan] 2025-10-05 10:34:38,478 - root - INFO - step: 19595 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 10:34:38,478 - root - INFO - lr: 2.8579e-05 gnorm: 1.07 [12:00:28<12:30:15] +[titan] 2025-10-05 10:34:47,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:34:49,385 - root - INFO - step: 19600 loss: 2.1233 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:34:49,385 - root - INFO - lr: 2.8570e-05 gnorm: 1.04 [12:00:39<12:30:04] +[titan] 2025-10-05 10:35:00,251 - root - INFO - step: 19605 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9248 +[titan] 2025-10-05 10:35:00,251 - root - INFO - lr: 2.8561e-05 gnorm: 1.04 [12:00:50<12:29:53] +[titan] 2025-10-05 10:35:11,113 - root - INFO - step: 19610 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9104 +[titan] 2025-10-05 10:35:11,113 - root - INFO - lr: 2.8552e-05 gnorm: 1.09 [12:01:01<12:29:41] +[titan] 2025-10-05 10:35:21,983 - root - INFO - step: 19615 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 10:35:21,983 - root - INFO - lr: 2.8543e-05 gnorm: 1.05 [12:01:11<12:29:30] +[titan] 2025-10-05 10:35:32,879 - root - INFO - step: 19620 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 10:35:32,879 - root - INFO - lr: 2.8534e-05 gnorm: 1.11 [12:01:22<12:29:19] +[titan] 2025-10-05 10:35:43,764 - root - INFO - step: 19625 loss: 2.1033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:35:43,764 - root - INFO - lr: 2.8525e-05 gnorm: 1.05 [12:01:33<12:29:08] +[titan] 2025-10-05 10:35:54,636 - root - INFO - step: 19630 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 10:35:54,637 - root - INFO - lr: 2.8516e-05 gnorm: 1.10 [12:01:44<12:28:57] +[titan] 2025-10-05 10:36:05,532 - root - INFO - step: 19635 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 10:36:05,532 - root - INFO - lr: 2.8507e-05 gnorm: 1.02 [12:01:55<12:28:45] +[titan] 2025-10-05 10:36:16,411 - root - INFO - step: 19640 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 10:36:16,411 - root - INFO - lr: 2.8498e-05 gnorm: 1.05 [12:02:06<12:28:34] +[titan] 2025-10-05 10:36:27,270 - root - INFO - step: 19645 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 10:36:27,270 - root - INFO - lr: 2.8489e-05 gnorm: 1.06 [12:02:17<12:28:23] +[titan] 2025-10-05 10:36:35,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:36:38,157 - root - INFO - step: 19650 loss: 2.0890 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8470 +[titan] 2025-10-05 10:36:38,157 - root - INFO - lr: 2.8481e-05 gnorm: 1.01 [12:02:28<12:28:12] +[titan] 2025-10-05 10:36:49,018 - root - INFO - step: 19655 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 10:36:49,019 - root - INFO - lr: 2.8472e-05 gnorm: 1.07 [12:02:38<12:28:01] +[titan] 2025-10-05 10:36:59,878 - root - INFO - step: 19660 loss: 2.2289 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 10:36:59,879 - root - INFO - lr: 2.8463e-05 gnorm: 1.08 [12:02:49<12:27:49] +[titan] 2025-10-05 10:37:10,783 - root - INFO - step: 19665 loss: 2.1435 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8951 +[titan] 2025-10-05 10:37:10,783 - root - INFO - lr: 2.8454e-05 gnorm: 1.08 [12:03:00<12:27:38] +[titan] 2025-10-05 10:37:21,656 - root - INFO - step: 19670 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8722 +[titan] 2025-10-05 10:37:21,657 - root - INFO - lr: 2.8445e-05 gnorm: 1.08 [12:03:11<12:27:27] +[titan] 2025-10-05 10:37:32,556 - root - INFO - step: 19675 loss: 2.2272 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9633 +[titan] 2025-10-05 10:37:32,556 - root - INFO - lr: 2.8436e-05 gnorm: 1.12 [12:03:22<12:27:16] +[titan] 2025-10-05 10:37:43,429 - root - INFO - step: 19680 loss: 2.1453 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:37:43,429 - root - INFO - lr: 2.8427e-05 gnorm: 1.06 [12:03:33<12:27:05] +[titan] 2025-10-05 10:37:54,290 - root - INFO - step: 19685 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 10:37:54,290 - root - INFO - lr: 2.8418e-05 gnorm: 1.10 [12:03:44<12:26:54] +[titan] 2025-10-05 10:38:05,156 - root - INFO - step: 19690 loss: 2.1517 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:38:05,157 - root - INFO - lr: 2.8409e-05 gnorm: 1.07 [12:03:55<12:26:42] +[titan] 2025-10-05 10:38:16,025 - root - INFO - step: 19695 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 10:38:16,025 - root - INFO - lr: 2.8400e-05 gnorm: 1.11 [12:04:05<12:26:31] +[titan] 2025-10-05 10:38:24,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:38:26,916 - root - INFO - step: 19700 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:38:26,916 - root - INFO - lr: 2.8391e-05 gnorm: 1.06 [12:04:16<12:26:20] +[titan] 2025-10-05 10:38:37,813 - root - INFO - step: 19705 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 10:38:37,814 - root - INFO - lr: 2.8382e-05 gnorm: 1.04 [12:04:27<12:26:09] +[titan] 2025-10-05 10:38:48,686 - root - INFO - step: 19710 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8558 +[titan] 2025-10-05 10:38:48,686 - root - INFO - lr: 2.8374e-05 gnorm: 1.10 [12:04:38<12:25:58] +[titan] 2025-10-05 10:38:59,549 - root - INFO - step: 19715 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 10:38:59,549 - root - INFO - lr: 2.8365e-05 gnorm: 1.07 [12:04:49<12:25:46] +[titan] 2025-10-05 10:39:10,404 - root - INFO - step: 19720 loss: 2.2251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 10:39:10,404 - root - INFO - lr: 2.8356e-05 gnorm: 1.05 [12:05:00<12:25:35] +[titan] 2025-10-05 10:39:21,281 - root - INFO - step: 19725 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8864 +[titan] 2025-10-05 10:39:21,281 - root - INFO - lr: 2.8347e-05 gnorm: 1.06 [12:05:11<12:25:24] +[titan] 2025-10-05 10:39:32,209 - root - INFO - step: 19730 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9186 +[titan] 2025-10-05 10:39:32,209 - root - INFO - lr: 2.8338e-05 gnorm: 1.05 [12:05:22<12:25:13] +[titan] 2025-10-05 10:39:43,074 - root - INFO - step: 19735 loss: 2.1410 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8919 +[titan] 2025-10-05 10:39:43,074 - root - INFO - lr: 2.8329e-05 gnorm: 1.09 [12:05:33<12:25:02] +[titan] 2025-10-05 10:39:53,944 - root - INFO - step: 19740 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9376 +[titan] 2025-10-05 10:39:53,945 - root - INFO - lr: 2.8320e-05 gnorm: 1.05 [12:05:43<12:24:50] +[titan] 2025-10-05 10:40:04,859 - root - INFO - step: 19745 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:40:04,859 - root - INFO - lr: 2.8311e-05 gnorm: 1.04 [12:05:54<12:24:39] +[titan] 2025-10-05 10:40:13,560 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:40:15,748 - root - INFO - step: 19750 loss: 2.1520 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:40:15,748 - root - INFO - lr: 2.8302e-05 gnorm: 1.04 [12:06:05<12:24:28] +[titan] 2025-10-05 10:40:26,639 - root - INFO - step: 19755 loss: 2.1342 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8866 +[titan] 2025-10-05 10:40:26,639 - root - INFO - lr: 2.8293e-05 gnorm: 1.04 [12:06:16<12:24:17] +[titan] 2025-10-05 10:40:37,586 - root - INFO - step: 19760 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.28 mfu: 41.99% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:40:37,587 - root - INFO - lr: 2.8284e-05 gnorm: 1.06 [12:06:27<12:24:06] +[titan] 2025-10-05 10:40:48,488 - root - INFO - step: 19765 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8834 +[titan] 2025-10-05 10:40:48,489 - root - INFO - lr: 2.8275e-05 gnorm: 1.06 [12:06:38<12:23:55] +[titan] 2025-10-05 10:40:59,376 - root - INFO - step: 19770 loss: 2.2031 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 10:40:59,377 - root - INFO - lr: 2.8266e-05 gnorm: 1.07 [12:06:49<12:23:44] +[titan] 2025-10-05 10:41:10,261 - root - INFO - step: 19775 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 10:41:10,262 - root - INFO - lr: 2.8258e-05 gnorm: 1.05 [12:07:00<12:23:32] +[titan] 2025-10-05 10:41:21,161 - root - INFO - step: 19780 loss: 2.2202 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 10:41:21,161 - root - INFO - lr: 2.8249e-05 gnorm: 1.08 [12:07:11<12:23:21] +[titan] 2025-10-05 10:41:32,049 - root - INFO - step: 19785 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:41:32,049 - root - INFO - lr: 2.8240e-05 gnorm: 1.07 [12:07:22<12:23:10] +[titan] 2025-10-05 10:41:42,943 - root - INFO - step: 19790 loss: 2.0669 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 10:41:42,943 - root - INFO - lr: 2.8231e-05 gnorm: 1.04 [12:07:32<12:22:59] +[titan] 2025-10-05 10:41:53,847 - root - INFO - step: 19795 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 10:41:53,847 - root - INFO - lr: 2.8222e-05 gnorm: 1.09 [12:07:43<12:22:48] +[titan] 2025-10-05 10:42:02,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:42:04,726 - root - INFO - step: 19800 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9124 +[titan] 2025-10-05 10:42:04,726 - root - INFO - lr: 2.8213e-05 gnorm: 1.07 [12:07:54<12:22:37] +[titan] 2025-10-05 10:42:15,602 - root - INFO - step: 19805 loss: 2.1292 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8809 +[titan] 2025-10-05 10:42:15,602 - root - INFO - lr: 2.8204e-05 gnorm: 1.06 [12:08:05<12:22:25] +[titan] 2025-10-05 10:42:26,476 - root - INFO - step: 19810 loss: 2.1988 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:42:26,476 - root - INFO - lr: 2.8195e-05 gnorm: 1.07 [12:08:16<12:22:14] +[titan] 2025-10-05 10:42:37,355 - root - INFO - step: 19815 loss: 2.1111 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 10:42:37,355 - root - INFO - lr: 2.8186e-05 gnorm: 1.08 [12:08:27<12:22:03] +[titan] 2025-10-05 10:42:48,237 - root - INFO - step: 19820 loss: 2.1257 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8783 +[titan] 2025-10-05 10:42:48,237 - root - INFO - lr: 2.8177e-05 gnorm: 1.07 [12:08:38<12:21:52] +[titan] 2025-10-05 10:42:59,142 - root - INFO - step: 19825 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:42:59,142 - root - INFO - lr: 2.8168e-05 gnorm: 1.06 [12:08:49<12:21:41] +[titan] 2025-10-05 10:43:09,994 - root - INFO - step: 19830 loss: 2.1713 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9190 +[titan] 2025-10-05 10:43:09,994 - root - INFO - lr: 2.8159e-05 gnorm: 1.12 [12:08:59<12:21:29] +[titan] 2025-10-05 10:43:20,854 - root - INFO - step: 19835 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9251 +[titan] 2025-10-05 10:43:20,854 - root - INFO - lr: 2.8151e-05 gnorm: 1.09 [12:09:10<12:21:18] +[titan] 2025-10-05 10:43:31,720 - root - INFO - step: 19840 loss: 2.1270 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:43:31,720 - root - INFO - lr: 2.8142e-05 gnorm: 1.04 [12:09:21<12:21:07] +[titan] 2025-10-05 10:43:42,583 - root - INFO - step: 19845 loss: 2.1653 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9125 +[titan] 2025-10-05 10:43:42,583 - root - INFO - lr: 2.8133e-05 gnorm: 1.03 [12:09:32<12:20:56] +[titan] 2025-10-05 10:43:51,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:43:53,475 - root - INFO - step: 19850 loss: 2.1376 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 10:43:53,475 - root - INFO - lr: 2.8124e-05 gnorm: 1.05 [12:09:43<12:20:45] +[titan] 2025-10-05 10:44:04,341 - root - INFO - step: 19855 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 10:44:04,341 - root - INFO - lr: 2.8115e-05 gnorm: 1.09 [12:09:54<12:20:33] +[titan] 2025-10-05 10:44:15,250 - root - INFO - step: 19860 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9089 +[titan] 2025-10-05 10:44:15,250 - root - INFO - lr: 2.8106e-05 gnorm: 1.09 [12:10:05<12:20:22] +[titan] 2025-10-05 10:44:26,122 - root - INFO - step: 19865 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 10:44:26,122 - root - INFO - lr: 2.8097e-05 gnorm: 1.06 [12:10:16<12:20:11] +[titan] 2025-10-05 10:44:37,015 - root - INFO - step: 19870 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 10:44:37,015 - root - INFO - lr: 2.8088e-05 gnorm: 1.07 [12:10:26<12:20:00] +[titan] 2025-10-05 10:44:47,890 - root - INFO - step: 19875 loss: 2.1479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8987 +[titan] 2025-10-05 10:44:47,890 - root - INFO - lr: 2.8079e-05 gnorm: 1.11 [12:10:37<12:19:49] +[titan] 2025-10-05 10:44:58,757 - root - INFO - step: 19880 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 10:44:58,757 - root - INFO - lr: 2.8070e-05 gnorm: 1.06 [12:10:48<12:19:38] +[titan] 2025-10-05 10:45:09,633 - root - INFO - step: 19885 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:45:09,633 - root - INFO - lr: 2.8061e-05 gnorm: 1.08 [12:10:59<12:19:26] +[titan] 2025-10-05 10:45:20,533 - root - INFO - step: 19890 loss: 2.1170 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:45:20,533 - root - INFO - lr: 2.8052e-05 gnorm: 1.07 [12:11:10<12:19:15] +[titan] 2025-10-05 10:45:31,359 - root - INFO - step: 19895 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:45:31,360 - root - INFO - lr: 2.8043e-05 gnorm: 1.10 [12:11:21<12:19:04] +[titan] 2025-10-05 10:45:40,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:45:42,231 - root - INFO - step: 19900 loss: 2.1514 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 10:45:42,231 - root - INFO - lr: 2.8035e-05 gnorm: 1.08 [12:11:32<12:18:53] +[titan] 2025-10-05 10:45:53,088 - root - INFO - step: 19905 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 10:45:53,088 - root - INFO - lr: 2.8026e-05 gnorm: 1.03 [12:11:43<12:18:42] +[titan] 2025-10-05 10:46:03,910 - root - INFO - step: 19910 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 10:46:03,910 - root - INFO - lr: 2.8017e-05 gnorm: 1.08 [12:11:53<12:18:30] +[titan] 2025-10-05 10:46:14,770 - root - INFO - step: 19915 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 10:46:14,771 - root - INFO - lr: 2.8008e-05 gnorm: 1.08 [12:12:04<12:18:19] +[titan] 2025-10-05 10:46:25,652 - root - INFO - step: 19920 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:46:25,652 - root - INFO - lr: 2.7999e-05 gnorm: 1.09 [12:12:15<12:18:08] +[titan] 2025-10-05 10:46:36,496 - root - INFO - step: 19925 loss: 2.2094 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 10:46:36,497 - root - INFO - lr: 2.7990e-05 gnorm: 1.06 [12:12:26<12:17:57] +[titan] 2025-10-05 10:46:47,345 - root - INFO - step: 19930 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 10:46:47,345 - root - INFO - lr: 2.7981e-05 gnorm: 1.11 [12:12:37<12:17:46] +[titan] 2025-10-05 10:46:58,221 - root - INFO - step: 19935 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 10:46:58,221 - root - INFO - lr: 2.7972e-05 gnorm: 1.05 [12:12:48<12:17:34] +[titan] 2025-10-05 10:47:09,102 - root - INFO - step: 19940 loss: 2.1225 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 10:47:09,102 - root - INFO - lr: 2.7963e-05 gnorm: 1.05 [12:12:59<12:17:23] +[titan] 2025-10-05 10:47:19,968 - root - INFO - step: 19945 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8581 +[titan] 2025-10-05 10:47:19,968 - root - INFO - lr: 2.7954e-05 gnorm: 1.09 [12:13:09<12:17:12] +[titan] 2025-10-05 10:47:28,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:47:30,850 - root - INFO - step: 19950 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8703 +[titan] 2025-10-05 10:47:30,850 - root - INFO - lr: 2.7945e-05 gnorm: 1.07 [12:13:20<12:17:01] +[titan] 2025-10-05 10:47:41,822 - root - INFO - step: 19955 loss: 2.1253 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8777 +[titan] 2025-10-05 10:47:41,822 - root - INFO - lr: 2.7936e-05 gnorm: 1.09 [12:13:31<12:16:50] +[titan] 2025-10-05 10:47:52,686 - root - INFO - step: 19960 loss: 2.1316 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:47:52,686 - root - INFO - lr: 2.7927e-05 gnorm: 1.11 [12:13:42<12:16:39] +[titan] 2025-10-05 10:48:03,639 - root - INFO - step: 19965 loss: 2.1229 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8756 +[titan] 2025-10-05 10:48:03,639 - root - INFO - lr: 2.7919e-05 gnorm: 1.08 [12:13:53<12:16:27] +[titan] 2025-10-05 10:48:10,345 - root - INFO - Dumping profiler traces at step 19968 +[titan] 2025-10-05 10:48:10,384 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:48:14,746 - root - INFO - step: 19970 loss: 2.1632 memory: 118.84GiB(85.28%) tps: 29,504 tflops: 409.32 mfu: 41.39% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9112 +[titan] 2025-10-05 10:48:14,746 - root - INFO - lr: 2.7910e-05 gnorm: 1.01 [12:14:04<12:16:17] +[titan] 2025-10-05 10:48:25,610 - root - INFO - step: 19975 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 10:48:25,610 - root - INFO - lr: 2.7901e-05 gnorm: 1.06 [12:14:15<12:16:05] +[titan] 2025-10-05 10:48:36,506 - root - INFO - step: 19980 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 10:48:36,507 - root - INFO - lr: 2.7892e-05 gnorm: 1.07 [12:14:26<12:15:54] +[titan] 2025-10-05 10:48:47,491 - root - INFO - step: 19985 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,831 tflops: 413.86 mfu: 41.85% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:48:47,491 - root - INFO - lr: 2.7883e-05 gnorm: 1.06 [12:14:37<12:15:43] +[titan] 2025-10-05 10:48:58,374 - root - INFO - step: 19990 loss: 2.1671 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:48:58,374 - root - INFO - lr: 2.7874e-05 gnorm: 1.08 [12:14:48<12:15:32] +[titan] 2025-10-05 10:49:09,251 - root - INFO - step: 19995 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:49:09,251 - root - INFO - lr: 2.7865e-05 gnorm: 1.06 [12:14:59<12:15:21] +[titan] 2025-10-05 10:49:17,928 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:49:20,117 - root - INFO - step: 20000 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8613 +[titan] 2025-10-05 10:49:20,118 - root - INFO - lr: 2.7856e-05 gnorm: 1.09 [12:15:10<12:15:10] +[titan] 2025-10-05 10:49:20,118 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 10:49:39,403 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 10:49:39,403 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.29 seconds. +[titan] 2025-10-05 10:51:35,525 - root - INFO - step: 20005 loss: 2.1785 memory: 118.84GiB(85.28%) tps: 2,420 tflops: 33.57 mfu: 3.39% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 10:51:35,525 - root - INFO - lr: 2.7847e-05 gnorm: 1.02 [12:17:25<12:17:03] +[titan] 2025-10-05 10:51:46,302 - root - INFO - step: 20010 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9317 +[titan] 2025-10-05 10:51:46,302 - root - INFO - lr: 2.7838e-05 gnorm: 1.08 [12:17:36<12:16:52] +[titan] 2025-10-05 10:51:57,112 - root - INFO - step: 20015 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 10:51:57,113 - root - INFO - lr: 2.7829e-05 gnorm: 1.07 [12:17:47<12:16:40] +[titan] 2025-10-05 10:52:07,924 - root - INFO - step: 20020 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.47 mfu: 42.51% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8728 +[titan] 2025-10-05 10:52:07,925 - root - INFO - lr: 2.7820e-05 gnorm: 1.05 [12:17:57<12:16:29] +[titan] 2025-10-05 10:52:18,739 - root - INFO - step: 20025 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 10:52:18,739 - root - INFO - lr: 2.7811e-05 gnorm: 1.08 [12:18:08<12:16:18] +[titan] 2025-10-05 10:52:29,561 - root - INFO - step: 20030 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 10:52:29,562 - root - INFO - lr: 2.7803e-05 gnorm: 1.05 [12:18:19<12:16:06] +[titan] 2025-10-05 10:52:40,397 - root - INFO - step: 20035 loss: 2.1681 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:52:40,397 - root - INFO - lr: 2.7794e-05 gnorm: 1.09 [12:18:30<12:15:55] +[titan] 2025-10-05 10:52:51,270 - root - INFO - step: 20040 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:52:51,270 - root - INFO - lr: 2.7785e-05 gnorm: 1.08 [12:18:41<12:15:44] +[titan] 2025-10-05 10:53:02,099 - root - INFO - step: 20045 loss: 2.1535 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 10:53:02,099 - root - INFO - lr: 2.7776e-05 gnorm: 1.06 [12:18:52<12:15:32] +[titan] 2025-10-05 10:53:10,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:53:12,953 - root - INFO - step: 20050 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:53:12,953 - root - INFO - lr: 2.7767e-05 gnorm: 1.06 [12:19:02<12:15:21] +[titan] 2025-10-05 10:53:23,781 - root - INFO - step: 20055 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8880 +[titan] 2025-10-05 10:53:23,781 - root - INFO - lr: 2.7758e-05 gnorm: 1.07 [12:19:13<12:15:10] +[titan] 2025-10-05 10:53:34,615 - root - INFO - step: 20060 loss: 2.2260 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 10:53:34,615 - root - INFO - lr: 2.7749e-05 gnorm: 1.08 [12:19:24<12:14:59] +[titan] 2025-10-05 10:53:45,482 - root - INFO - step: 20065 loss: 2.1538 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9039 +[titan] 2025-10-05 10:53:45,482 - root - INFO - lr: 2.7740e-05 gnorm: 1.07 [12:19:35<12:14:47] +[titan] 2025-10-05 10:53:56,339 - root - INFO - step: 20070 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 10:53:56,339 - root - INFO - lr: 2.7731e-05 gnorm: 1.04 [12:19:46<12:14:36] +[titan] 2025-10-05 10:54:07,188 - root - INFO - step: 20075 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 10:54:07,188 - root - INFO - lr: 2.7722e-05 gnorm: 1.06 [12:19:57<12:14:25] +[titan] 2025-10-05 10:54:18,059 - root - INFO - step: 20080 loss: 2.1485 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:54:18,059 - root - INFO - lr: 2.7713e-05 gnorm: 1.06 [12:20:07<12:14:14] +[titan] 2025-10-05 10:54:28,894 - root - INFO - step: 20085 loss: 2.2267 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9673 +[titan] 2025-10-05 10:54:28,894 - root - INFO - lr: 2.7704e-05 gnorm: 1.85 [12:20:18<12:14:02] +[titan] 2025-10-05 10:54:39,760 - root - INFO - step: 20090 loss: 2.1383 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 10:54:39,760 - root - INFO - lr: 2.7695e-05 gnorm: 1.09 [12:20:29<12:13:51] +[titan] 2025-10-05 10:54:50,700 - root - INFO - step: 20095 loss: 2.1379 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8897 +[titan] 2025-10-05 10:54:50,700 - root - INFO - lr: 2.7687e-05 gnorm: 1.04 [12:20:40<12:13:40] +[titan] 2025-10-05 10:54:59,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:55:01,599 - root - INFO - step: 20100 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:55:01,599 - root - INFO - lr: 2.7678e-05 gnorm: 1.11 [12:20:51<12:13:29] +[titan] 2025-10-05 10:55:12,449 - root - INFO - step: 20105 loss: 2.1710 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 10:55:12,449 - root - INFO - lr: 2.7669e-05 gnorm: 1.03 [12:21:02<12:13:17] +[titan] 2025-10-05 10:55:23,313 - root - INFO - step: 20110 loss: 2.0931 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 10:55:23,313 - root - INFO - lr: 2.7660e-05 gnorm: 1.04 [12:21:13<12:13:06] +[titan] 2025-10-05 10:55:34,176 - root - INFO - step: 20115 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 10:55:34,176 - root - INFO - lr: 2.7651e-05 gnorm: 1.05 [12:21:24<12:12:55] +[titan] 2025-10-05 10:55:45,039 - root - INFO - step: 20120 loss: 2.1203 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 10:55:45,039 - root - INFO - lr: 2.7642e-05 gnorm: 1.06 [12:21:34<12:12:44] +[titan] 2025-10-05 10:55:55,943 - root - INFO - step: 20125 loss: 2.1150 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8697 +[titan] 2025-10-05 10:55:55,943 - root - INFO - lr: 2.7633e-05 gnorm: 1.05 [12:21:45<12:12:33] +[titan] 2025-10-05 10:56:06,800 - root - INFO - step: 20130 loss: 2.1880 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 10:56:06,800 - root - INFO - lr: 2.7624e-05 gnorm: 1.08 [12:21:56<12:12:21] +[titan] 2025-10-05 10:56:17,695 - root - INFO - step: 20135 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8794 +[titan] 2025-10-05 10:56:17,696 - root - INFO - lr: 2.7615e-05 gnorm: 1.08 [12:22:07<12:12:10] +[titan] 2025-10-05 10:56:28,544 - root - INFO - step: 20140 loss: 2.1589 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9087 +[titan] 2025-10-05 10:56:28,544 - root - INFO - lr: 2.7606e-05 gnorm: 1.04 [12:22:18<12:11:59] +[titan] 2025-10-05 10:56:39,421 - root - INFO - step: 20145 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8560 +[titan] 2025-10-05 10:56:39,422 - root - INFO - lr: 2.7597e-05 gnorm: 1.08 [12:22:29<12:11:48] +[titan] 2025-10-05 10:56:48,102 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:56:50,277 - root - INFO - step: 20150 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:56:50,277 - root - INFO - lr: 2.7588e-05 gnorm: 1.05 [12:22:40<12:11:36] +[titan] 2025-10-05 10:57:01,155 - root - INFO - step: 20155 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 10:57:01,155 - root - INFO - lr: 2.7579e-05 gnorm: 1.09 [12:22:51<12:11:25] +[titan] 2025-10-05 10:57:12,015 - root - INFO - step: 20160 loss: 2.1842 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 10:57:12,015 - root - INFO - lr: 2.7571e-05 gnorm: 1.05 [12:23:01<12:11:14] +[titan] 2025-10-05 10:57:22,907 - root - INFO - step: 20165 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 10:57:22,907 - root - INFO - lr: 2.7562e-05 gnorm: 1.05 [12:23:12<12:11:03] +[titan] 2025-10-05 10:57:33,769 - root - INFO - step: 20170 loss: 2.1734 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9209 +[titan] 2025-10-05 10:57:33,769 - root - INFO - lr: 2.7553e-05 gnorm: 1.10 [12:23:23<12:10:51] +[titan] 2025-10-05 10:57:44,629 - root - INFO - step: 20175 loss: 2.1616 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:57:44,629 - root - INFO - lr: 2.7544e-05 gnorm: 1.10 [12:23:34<12:10:40] +[titan] 2025-10-05 10:57:55,575 - root - INFO - step: 20180 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 10:57:55,575 - root - INFO - lr: 2.7535e-05 gnorm: 1.09 [12:23:45<12:10:29] +[titan] 2025-10-05 10:58:06,449 - root - INFO - step: 20185 loss: 2.0747 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 10:58:06,449 - root - INFO - lr: 2.7526e-05 gnorm: 1.09 [12:23:56<12:10:18] +[titan] 2025-10-05 10:58:17,339 - root - INFO - step: 20190 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 10:58:17,339 - root - INFO - lr: 2.7517e-05 gnorm: 1.11 [12:24:07<12:10:06] +[titan] 2025-10-05 10:58:28,224 - root - INFO - step: 20195 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 10:58:28,224 - root - INFO - lr: 2.7508e-05 gnorm: 1.09 [12:24:18<12:09:55] +[titan] 2025-10-05 10:58:36,913 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:58:39,105 - root - INFO - step: 20200 loss: 2.1272 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 10:58:39,105 - root - INFO - lr: 2.7499e-05 gnorm: 1.10 [12:24:29<12:09:44] +[titan] 2025-10-05 10:58:49,983 - root - INFO - step: 20205 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9320 +[titan] 2025-10-05 10:58:49,983 - root - INFO - lr: 2.7490e-05 gnorm: 1.10 [12:24:39<12:09:33] +[titan] 2025-10-05 10:59:00,935 - root - INFO - step: 20210 loss: 2.0945 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 10:59:00,935 - root - INFO - lr: 2.7481e-05 gnorm: 1.07 [12:24:50<12:09:22] +[titan] 2025-10-05 10:59:11,794 - root - INFO - step: 20215 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:59:11,794 - root - INFO - lr: 2.7472e-05 gnorm: 1.08 [12:25:01<12:09:10] +[titan] 2025-10-05 10:59:22,679 - root - INFO - step: 20220 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9899 +[titan] 2025-10-05 10:59:22,679 - root - INFO - lr: 2.7463e-05 gnorm: 1.09 [12:25:12<12:08:59] +[titan] 2025-10-05 10:59:33,536 - root - INFO - step: 20225 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 10:59:33,536 - root - INFO - lr: 2.7454e-05 gnorm: 1.10 [12:25:23<12:08:48] +[titan] 2025-10-05 10:59:44,381 - root - INFO - step: 20230 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 10:59:44,381 - root - INFO - lr: 2.7446e-05 gnorm: 1.07 [12:25:34<12:08:37] +[titan] 2025-10-05 10:59:55,274 - root - INFO - step: 20235 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8518 +[titan] 2025-10-05 10:59:55,275 - root - INFO - lr: 2.7437e-05 gnorm: 1.10 [12:25:45<12:08:25] +[titan] 2025-10-05 11:00:06,163 - root - INFO - step: 20240 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8998 +[titan] 2025-10-05 11:00:06,164 - root - INFO - lr: 2.7428e-05 gnorm: 1.09 [12:25:56<12:08:14] +[titan] 2025-10-05 11:00:17,039 - root - INFO - step: 20245 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 11:00:17,040 - root - INFO - lr: 2.7419e-05 gnorm: 1.06 [12:26:06<12:08:03] +[titan] 2025-10-05 11:00:25,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:00:27,899 - root - INFO - step: 20250 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 11:00:27,900 - root - INFO - lr: 2.7410e-05 gnorm: 1.06 [12:26:17<12:07:52] +[titan] 2025-10-05 11:00:38,739 - root - INFO - step: 20255 loss: 2.1856 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 11:00:38,739 - root - INFO - lr: 2.7401e-05 gnorm: 1.07 [12:26:28<12:07:40] +[titan] 2025-10-05 11:00:49,595 - root - INFO - step: 20260 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 11:00:49,595 - root - INFO - lr: 2.7392e-05 gnorm: 1.05 [12:26:39<12:07:29] +[titan] 2025-10-05 11:01:00,505 - root - INFO - step: 20265 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 11:01:00,505 - root - INFO - lr: 2.7383e-05 gnorm: 1.05 [12:26:50<12:07:18] +[titan] 2025-10-05 11:01:11,382 - root - INFO - step: 20270 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8446 +[titan] 2025-10-05 11:01:11,382 - root - INFO - lr: 2.7374e-05 gnorm: 1.08 [12:27:01<12:07:07] +[titan] 2025-10-05 11:01:22,284 - root - INFO - step: 20275 loss: 2.1344 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:01:22,284 - root - INFO - lr: 2.7365e-05 gnorm: 1.10 [12:27:12<12:06:56] +[titan] 2025-10-05 11:01:33,138 - root - INFO - step: 20280 loss: 2.1211 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:01:33,138 - root - INFO - lr: 2.7356e-05 gnorm: 1.03 [12:27:23<12:06:44] +[titan] 2025-10-05 11:01:44,002 - root - INFO - step: 20285 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:01:44,002 - root - INFO - lr: 2.7347e-05 gnorm: 1.05 [12:27:33<12:06:33] +[titan] 2025-10-05 11:01:54,890 - root - INFO - step: 20290 loss: 2.1434 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 11:01:54,890 - root - INFO - lr: 2.7338e-05 gnorm: 1.08 [12:27:44<12:06:22] +[titan] 2025-10-05 11:02:06,133 - root - INFO - step: 20295 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 29,147 tflops: 404.38 mfu: 40.89% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 11:02:06,133 - root - INFO - lr: 2.7330e-05 gnorm: 1.06 [12:27:56<12:06:11] +[titan] 2025-10-05 11:02:14,822 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:02:17,010 - root - INFO - step: 20300 loss: 2.1482 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 11:02:17,010 - root - INFO - lr: 2.7321e-05 gnorm: 1.33 [12:28:06<12:06:00] +[titan] 2025-10-05 11:02:27,926 - root - INFO - step: 20305 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 11:02:27,927 - root - INFO - lr: 2.7312e-05 gnorm: 1.05 [12:28:17<12:05:49] +[titan] 2025-10-05 11:02:38,794 - root - INFO - step: 20310 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8580 +[titan] 2025-10-05 11:02:38,794 - root - INFO - lr: 2.7303e-05 gnorm: 1.02 [12:28:28<12:05:37] +[titan] 2025-10-05 11:02:49,655 - root - INFO - step: 20315 loss: 2.1038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:02:49,655 - root - INFO - lr: 2.7294e-05 gnorm: 1.06 [12:28:39<12:05:26] +[titan] 2025-10-05 11:03:00,551 - root - INFO - step: 20320 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 11:03:00,551 - root - INFO - lr: 2.7285e-05 gnorm: 1.07 [12:28:50<12:05:15] +[titan] 2025-10-05 11:03:11,416 - root - INFO - step: 20325 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9040 +[titan] 2025-10-05 11:03:11,417 - root - INFO - lr: 2.7276e-05 gnorm: 1.04 [12:29:01<12:05:04] +[titan] 2025-10-05 11:03:22,259 - root - INFO - step: 20330 loss: 2.1001 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8555 +[titan] 2025-10-05 11:03:22,259 - root - INFO - lr: 2.7267e-05 gnorm: 1.07 [12:29:12<12:04:52] +[titan] 2025-10-05 11:03:33,113 - root - INFO - step: 20335 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8112 +[titan] 2025-10-05 11:03:33,113 - root - INFO - lr: 2.7258e-05 gnorm: 1.06 [12:29:23<12:04:41] +[titan] 2025-10-05 11:03:44,014 - root - INFO - step: 20340 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 11:03:44,014 - root - INFO - lr: 2.7249e-05 gnorm: 1.02 [12:29:33<12:04:30] +[titan] 2025-10-05 11:03:54,889 - root - INFO - step: 20345 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9095 +[titan] 2025-10-05 11:03:54,889 - root - INFO - lr: 2.7240e-05 gnorm: 1.05 [12:29:44<12:04:19] +[titan] 2025-10-05 11:04:03,595 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:04:05,779 - root - INFO - step: 20350 loss: 2.1910 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9371 +[titan] 2025-10-05 11:04:05,779 - root - INFO - lr: 2.7231e-05 gnorm: 1.07 [12:29:55<12:04:07] +[titan] 2025-10-05 11:04:16,637 - root - INFO - step: 20355 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 11:04:16,638 - root - INFO - lr: 2.7222e-05 gnorm: 1.05 [12:30:06<12:03:56] +[titan] 2025-10-05 11:04:27,458 - root - INFO - step: 20360 loss: 2.1358 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8882 +[titan] 2025-10-05 11:04:27,458 - root - INFO - lr: 2.7214e-05 gnorm: 1.06 [12:30:17<12:03:45] +[titan] 2025-10-05 11:04:38,299 - root - INFO - step: 20365 loss: 2.1403 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 11:04:38,299 - root - INFO - lr: 2.7205e-05 gnorm: 1.10 [12:30:28<12:03:34] +[titan] 2025-10-05 11:04:49,208 - root - INFO - step: 20370 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 11:04:49,208 - root - INFO - lr: 2.7196e-05 gnorm: 1.09 [12:30:39<12:03:22] +[titan] 2025-10-05 11:05:00,089 - root - INFO - step: 20375 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:05:00,089 - root - INFO - lr: 2.7187e-05 gnorm: 1.06 [12:30:49<12:03:11] +[titan] 2025-10-05 11:05:10,946 - root - INFO - step: 20380 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:05:10,946 - root - INFO - lr: 2.7178e-05 gnorm: 1.11 [12:31:00<12:03:00] +[titan] 2025-10-05 11:05:21,800 - root - INFO - step: 20385 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:05:21,800 - root - INFO - lr: 2.7169e-05 gnorm: 1.08 [12:31:11<12:02:49] +[titan] 2025-10-05 11:05:32,664 - root - INFO - step: 20390 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 11:05:32,664 - root - INFO - lr: 2.7160e-05 gnorm: 1.05 [12:31:22<12:02:37] +[titan] 2025-10-05 11:05:43,530 - root - INFO - step: 20395 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 11:05:43,530 - root - INFO - lr: 2.7151e-05 gnorm: 1.10 [12:31:33<12:02:26] +[titan] 2025-10-05 11:05:52,200 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:05:54,413 - root - INFO - step: 20400 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 11:05:54,413 - root - INFO - lr: 2.7142e-05 gnorm: 1.05 [12:31:44<12:02:15] +[titan] 2025-10-05 11:06:05,284 - root - INFO - step: 20405 loss: 2.1600 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 11:06:05,284 - root - INFO - lr: 2.7133e-05 gnorm: 1.08 [12:31:55<12:02:04] +[titan] 2025-10-05 11:06:16,130 - root - INFO - step: 20410 loss: 2.1684 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 11:06:16,130 - root - INFO - lr: 2.7124e-05 gnorm: 1.07 [12:32:06<12:01:53] +[titan] 2025-10-05 11:06:26,974 - root - INFO - step: 20415 loss: 2.1914 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:06:26,975 - root - INFO - lr: 2.7115e-05 gnorm: 1.09 [12:32:16<12:01:41] +[titan] 2025-10-05 11:06:37,832 - root - INFO - step: 20420 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 11:06:37,832 - root - INFO - lr: 2.7106e-05 gnorm: 1.09 [12:32:27<12:01:30] +[titan] 2025-10-05 11:06:48,689 - root - INFO - step: 20425 loss: 2.1157 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 11:06:48,689 - root - INFO - lr: 2.7098e-05 gnorm: 1.08 [12:32:38<12:01:19] +[titan] 2025-10-05 11:06:59,539 - root - INFO - step: 20430 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 11:06:59,539 - root - INFO - lr: 2.7089e-05 gnorm: 1.05 [12:32:49<12:01:08] +[titan] 2025-10-05 11:07:10,461 - root - INFO - step: 20435 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 11:07:10,461 - root - INFO - lr: 2.7080e-05 gnorm: 1.06 [12:33:00<12:00:56] +[titan] 2025-10-05 11:07:21,318 - root - INFO - step: 20440 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:07:21,318 - root - INFO - lr: 2.7071e-05 gnorm: 1.07 [12:33:11<12:00:45] +[titan] 2025-10-05 11:07:32,168 - root - INFO - step: 20445 loss: 2.0912 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:07:32,168 - root - INFO - lr: 2.7062e-05 gnorm: 1.09 [12:33:22<12:00:34] +[titan] 2025-10-05 11:07:40,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:07:43,023 - root - INFO - step: 20450 loss: 2.1251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 11:07:43,023 - root - INFO - lr: 2.7053e-05 gnorm: 1.07 [12:33:32<12:00:23] +[titan] 2025-10-05 11:07:53,871 - root - INFO - step: 20455 loss: 2.1649 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 11:07:53,871 - root - INFO - lr: 2.7044e-05 gnorm: 1.07 [12:33:43<12:00:11] +[titan] 2025-10-05 11:08:04,763 - root - INFO - step: 20460 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 11:08:04,763 - root - INFO - lr: 2.7035e-05 gnorm: 1.03 [12:33:54<12:00:00] +[titan] 2025-10-05 11:08:15,662 - root - INFO - step: 20465 loss: 2.1274 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 11:08:15,662 - root - INFO - lr: 2.7026e-05 gnorm: 1.03 [12:34:05<11:59:49] +[titan] 2025-10-05 11:08:26,490 - root - INFO - step: 20470 loss: 2.1025 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8578 +[titan] 2025-10-05 11:08:26,490 - root - INFO - lr: 2.7017e-05 gnorm: 1.06 [12:34:16<11:59:38] +[titan] 2025-10-05 11:08:37,320 - root - INFO - step: 20475 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 11:08:37,321 - root - INFO - lr: 2.7008e-05 gnorm: 1.11 [12:34:27<11:59:26] +[titan] 2025-10-05 11:08:48,242 - root - INFO - step: 20480 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:08:48,242 - root - INFO - lr: 2.6999e-05 gnorm: 1.04 [12:34:38<11:59:15] +[titan] 2025-10-05 11:08:48,430 - root - INFO - Dumping profiler traces at step 20480 +[titan] 2025-10-05 11:08:48,469 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:08:59,308 - root - INFO - step: 20485 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 29,611 tflops: 410.81 mfu: 41.54% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 11:08:59,308 - root - INFO - lr: 2.6990e-05 gnorm: 1.06 [12:34:49<11:59:04] +[titan] 2025-10-05 11:09:10,168 - root - INFO - step: 20490 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.8976 +[titan] 2025-10-05 11:09:10,168 - root - INFO - lr: 2.6982e-05 gnorm: 1.06 [12:35:00<11:58:53] +[titan] 2025-10-05 11:09:21,026 - root - INFO - step: 20495 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9159 +[titan] 2025-10-05 11:09:21,027 - root - INFO - lr: 2.6973e-05 gnorm: 1.10 [12:35:10<11:58:42] +[titan] 2025-10-05 11:09:29,736 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:09:31,923 - root - INFO - step: 20500 loss: 2.0830 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 11:09:31,923 - root - INFO - lr: 2.6964e-05 gnorm: 1.09 [12:35:21<11:58:30] +[titan] 2025-10-05 11:09:42,776 - root - INFO - step: 20505 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8339 +[titan] 2025-10-05 11:09:42,776 - root - INFO - lr: 2.6955e-05 gnorm: 1.10 [12:35:32<11:58:19] +[titan] 2025-10-05 11:09:53,605 - root - INFO - step: 20510 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8968 +[titan] 2025-10-05 11:09:53,605 - root - INFO - lr: 2.6946e-05 gnorm: 1.06 [12:35:43<11:58:08] +[titan] 2025-10-05 11:10:04,473 - root - INFO - step: 20515 loss: 2.1247 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8771 +[titan] 2025-10-05 11:10:04,473 - root - INFO - lr: 2.6937e-05 gnorm: 1.06 [12:35:54<11:57:57] +[titan] 2025-10-05 11:10:15,308 - root - INFO - step: 20520 loss: 2.1987 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9434 +[titan] 2025-10-05 11:10:15,308 - root - INFO - lr: 2.6928e-05 gnorm: 1.06 [12:36:05<11:57:45] +[titan] 2025-10-05 11:10:26,169 - root - INFO - step: 20525 loss: 2.1470 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8974 +[titan] 2025-10-05 11:10:26,170 - root - INFO - lr: 2.6919e-05 gnorm: 1.04 [12:36:16<11:57:34] +[titan] 2025-10-05 11:10:37,027 - root - INFO - step: 20530 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8850 +[titan] 2025-10-05 11:10:37,027 - root - INFO - lr: 2.6910e-05 gnorm: 1.13 [12:36:26<11:57:23] +[titan] 2025-10-05 11:10:47,875 - root - INFO - step: 20535 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 11:10:47,875 - root - INFO - lr: 2.6901e-05 gnorm: 1.03 [12:36:37<11:57:12] +[titan] 2025-10-05 11:10:58,732 - root - INFO - step: 20540 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:10:58,733 - root - INFO - lr: 2.6892e-05 gnorm: 1.06 [12:36:48<11:57:01] +[titan] 2025-10-05 11:11:09,619 - root - INFO - step: 20545 loss: 2.1707 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:11:09,619 - root - INFO - lr: 2.6883e-05 gnorm: 1.10 [12:36:59<11:56:49] +[titan] 2025-10-05 11:11:18,306 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:11:20,486 - root - INFO - step: 20550 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 11:11:20,487 - root - INFO - lr: 2.6874e-05 gnorm: 2.06 [12:37:10<11:56:38] +[titan] 2025-10-05 11:11:31,328 - root - INFO - step: 20555 loss: 2.2027 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 11:11:31,329 - root - INFO - lr: 2.6866e-05 gnorm: 1.09 [12:37:21<11:56:27] +[titan] 2025-10-05 11:11:42,212 - root - INFO - step: 20560 loss: 2.0837 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 11:11:42,212 - root - INFO - lr: 2.6857e-05 gnorm: 1.05 [12:37:32<11:56:16] +[titan] 2025-10-05 11:11:53,051 - root - INFO - step: 20565 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 11:11:53,051 - root - INFO - lr: 2.6848e-05 gnorm: 1.08 [12:37:42<11:56:04] +[titan] 2025-10-05 11:12:03,886 - root - INFO - step: 20570 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 11:12:03,887 - root - INFO - lr: 2.6839e-05 gnorm: 1.14 [12:37:53<11:55:53] +[titan] 2025-10-05 11:12:14,773 - root - INFO - step: 20575 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 11:12:14,773 - root - INFO - lr: 2.6830e-05 gnorm: 1.09 [12:38:04<11:55:42] +[titan] 2025-10-05 11:12:25,620 - root - INFO - step: 20580 loss: 2.0736 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8329 +[titan] 2025-10-05 11:12:25,620 - root - INFO - lr: 2.6821e-05 gnorm: 1.09 [12:38:15<11:55:31] +[titan] 2025-10-05 11:12:36,467 - root - INFO - step: 20585 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 11:12:36,467 - root - INFO - lr: 2.6812e-05 gnorm: 1.05 [12:38:26<11:55:19] +[titan] 2025-10-05 11:12:47,318 - root - INFO - step: 20590 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 11:12:47,318 - root - INFO - lr: 2.6803e-05 gnorm: 1.07 [12:38:37<11:55:08] +[titan] 2025-10-05 11:12:58,203 - root - INFO - step: 20595 loss: 2.1151 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8687 +[titan] 2025-10-05 11:12:58,203 - root - INFO - lr: 2.6794e-05 gnorm: 1.07 [12:38:48<11:54:57] +[titan] 2025-10-05 11:13:06,877 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:13:09,064 - root - INFO - step: 20600 loss: 2.1894 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 11:13:09,064 - root - INFO - lr: 2.6785e-05 gnorm: 1.09 [12:38:58<11:54:46] +[titan] 2025-10-05 11:13:19,929 - root - INFO - step: 20605 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 11:13:19,929 - root - INFO - lr: 2.6776e-05 gnorm: 1.07 [12:39:09<11:54:34] +[titan] 2025-10-05 11:13:30,796 - root - INFO - step: 20610 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 11:13:30,796 - root - INFO - lr: 2.6767e-05 gnorm: 1.06 [12:39:20<11:54:23] +[titan] 2025-10-05 11:13:41,654 - root - INFO - step: 20615 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8398 +[titan] 2025-10-05 11:13:41,654 - root - INFO - lr: 2.6758e-05 gnorm: 1.03 [12:39:31<11:54:12] +[titan] 2025-10-05 11:13:52,508 - root - INFO - step: 20620 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 11:13:52,508 - root - INFO - lr: 2.6750e-05 gnorm: 1.06 [12:39:42<11:54:01] +[titan] 2025-10-05 11:14:03,381 - root - INFO - step: 20625 loss: 2.1197 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 11:14:03,381 - root - INFO - lr: 2.6741e-05 gnorm: 1.06 [12:39:53<11:53:50] +[titan] 2025-10-05 11:14:14,251 - root - INFO - step: 20630 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:14:14,251 - root - INFO - lr: 2.6732e-05 gnorm: 1.06 [12:40:04<11:53:38] +[titan] 2025-10-05 11:14:25,097 - root - INFO - step: 20635 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:14:25,098 - root - INFO - lr: 2.6723e-05 gnorm: 1.08 [12:40:14<11:53:27] +[titan] 2025-10-05 11:14:35,947 - root - INFO - step: 20640 loss: 2.0980 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8548 +[titan] 2025-10-05 11:14:35,947 - root - INFO - lr: 2.6714e-05 gnorm: 1.09 [12:40:25<11:53:16] +[titan] 2025-10-05 11:14:46,798 - root - INFO - step: 20645 loss: 2.1242 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8770 +[titan] 2025-10-05 11:14:46,799 - root - INFO - lr: 2.6705e-05 gnorm: 1.09 [12:40:36<11:53:05] +[titan] 2025-10-05 11:14:55,473 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:14:57,653 - root - INFO - step: 20650 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:14:57,653 - root - INFO - lr: 2.6696e-05 gnorm: 1.08 [12:40:47<11:52:53] +[titan] 2025-10-05 11:15:08,530 - root - INFO - step: 20655 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 11:15:08,530 - root - INFO - lr: 2.6687e-05 gnorm: 1.08 [12:40:58<11:52:42] +[titan] 2025-10-05 11:15:19,423 - root - INFO - step: 20660 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 11:15:19,423 - root - INFO - lr: 2.6678e-05 gnorm: 1.15 [12:41:09<11:52:31] +[titan] 2025-10-05 11:15:30,279 - root - INFO - step: 20665 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:15:30,279 - root - INFO - lr: 2.6669e-05 gnorm: 1.06 [12:41:20<11:52:20] +[titan] 2025-10-05 11:15:41,155 - root - INFO - step: 20670 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9037 +[titan] 2025-10-05 11:15:41,156 - root - INFO - lr: 2.6660e-05 gnorm: 1.05 [12:41:31<11:52:08] +[titan] 2025-10-05 11:15:52,007 - root - INFO - step: 20675 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 11:15:52,007 - root - INFO - lr: 2.6651e-05 gnorm: 1.04 [12:41:41<11:51:57] +[titan] 2025-10-05 11:16:02,840 - root - INFO - step: 20680 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:16:02,840 - root - INFO - lr: 2.6643e-05 gnorm: 1.03 [12:41:52<11:51:46] +[titan] 2025-10-05 11:16:13,755 - root - INFO - step: 20685 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9115 +[titan] 2025-10-05 11:16:13,756 - root - INFO - lr: 2.6634e-05 gnorm: 1.04 [12:42:03<11:51:35] +[titan] 2025-10-05 11:16:24,631 - root - INFO - step: 20690 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:16:24,632 - root - INFO - lr: 2.6625e-05 gnorm: 1.05 [12:42:14<11:51:24] +[titan] 2025-10-05 11:16:35,463 - root - INFO - step: 20695 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 11:16:35,464 - root - INFO - lr: 2.6616e-05 gnorm: 1.10 [12:42:25<11:51:12] +[titan] 2025-10-05 11:16:44,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:16:46,315 - root - INFO - step: 20700 loss: 2.1496 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:16:46,316 - root - INFO - lr: 2.6607e-05 gnorm: 1.10 [12:42:36<11:51:01] +[titan] 2025-10-05 11:16:57,157 - root - INFO - step: 20705 loss: 2.0983 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 11:16:57,157 - root - INFO - lr: 2.6598e-05 gnorm: 1.04 [12:42:47<11:50:50] +[titan] 2025-10-05 11:17:08,007 - root - INFO - step: 20710 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 11:17:08,007 - root - INFO - lr: 2.6589e-05 gnorm: 1.07 [12:42:57<11:50:39] +[titan] 2025-10-05 11:17:18,892 - root - INFO - step: 20715 loss: 2.1366 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8867 +[titan] 2025-10-05 11:17:18,892 - root - INFO - lr: 2.6580e-05 gnorm: 1.14 [12:43:08<11:50:27] +[titan] 2025-10-05 11:17:29,767 - root - INFO - step: 20720 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:17:29,768 - root - INFO - lr: 2.6571e-05 gnorm: 1.04 [12:43:19<11:50:16] +[titan] 2025-10-05 11:17:40,628 - root - INFO - step: 20725 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9005 +[titan] 2025-10-05 11:17:40,628 - root - INFO - lr: 2.6562e-05 gnorm: 1.09 [12:43:30<11:50:05] +[titan] 2025-10-05 11:17:51,474 - root - INFO - step: 20730 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:17:51,474 - root - INFO - lr: 2.6553e-05 gnorm: 1.10 [12:43:41<11:49:54] +[titan] 2025-10-05 11:18:02,326 - root - INFO - step: 20735 loss: 2.1204 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:18:02,326 - root - INFO - lr: 2.6544e-05 gnorm: 1.06 [12:43:52<11:49:42] +[titan] 2025-10-05 11:18:13,213 - root - INFO - step: 20740 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8841 +[titan] 2025-10-05 11:18:13,213 - root - INFO - lr: 2.6536e-05 gnorm: 1.08 [12:44:03<11:49:31] +[titan] 2025-10-05 11:18:24,093 - root - INFO - step: 20745 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 11:18:24,093 - root - INFO - lr: 2.6527e-05 gnorm: 1.05 [12:44:13<11:49:20] +[titan] 2025-10-05 11:18:32,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:18:34,984 - root - INFO - step: 20750 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:18:34,984 - root - INFO - lr: 2.6518e-05 gnorm: 1.06 [12:44:24<11:49:09] +[titan] 2025-10-05 11:18:45,854 - root - INFO - step: 20755 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:18:45,854 - root - INFO - lr: 2.6509e-05 gnorm: 1.09 [12:44:35<11:48:58] +[titan] 2025-10-05 11:18:56,673 - root - INFO - step: 20760 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:18:56,674 - root - INFO - lr: 2.6500e-05 gnorm: 1.04 [12:44:46<11:48:46] +[titan] 2025-10-05 11:19:07,503 - root - INFO - step: 20765 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8648 +[titan] 2025-10-05 11:19:07,504 - root - INFO - lr: 2.6491e-05 gnorm: 1.08 [12:44:57<11:48:35] +[titan] 2025-10-05 11:19:18,411 - root - INFO - step: 20770 loss: 2.2056 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9489 +[titan] 2025-10-05 11:19:18,411 - root - INFO - lr: 2.6482e-05 gnorm: 1.12 [12:45:08<11:48:24] +[titan] 2025-10-05 11:19:29,234 - root - INFO - step: 20775 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8766 +[titan] 2025-10-05 11:19:29,234 - root - INFO - lr: 2.6473e-05 gnorm: 1.05 [12:45:19<11:48:13] +[titan] 2025-10-05 11:19:40,065 - root - INFO - step: 20780 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 11:19:40,065 - root - INFO - lr: 2.6464e-05 gnorm: 1.08 [12:45:29<11:48:01] +[titan] 2025-10-05 11:19:50,928 - root - INFO - step: 20785 loss: 2.1284 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:19:50,928 - root - INFO - lr: 2.6455e-05 gnorm: 1.03 [12:45:40<11:47:50] +[titan] 2025-10-05 11:20:01,769 - root - INFO - step: 20790 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 11:20:01,769 - root - INFO - lr: 2.6446e-05 gnorm: 1.07 [12:45:51<11:47:39] +[titan] 2025-10-05 11:20:12,646 - root - INFO - step: 20795 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 11:20:12,646 - root - INFO - lr: 2.6437e-05 gnorm: 1.10 [12:46:02<11:47:28] +[titan] 2025-10-05 11:20:21,353 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:20:23,533 - root - INFO - step: 20800 loss: 2.0768 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 11:20:23,533 - root - INFO - lr: 2.6429e-05 gnorm: 1.06 [12:46:13<11:47:16] +[titan] 2025-10-05 11:20:34,392 - root - INFO - step: 20805 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:20:34,392 - root - INFO - lr: 2.6420e-05 gnorm: 1.09 [12:46:24<11:47:05] +[titan] 2025-10-05 11:20:45,231 - root - INFO - step: 20810 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 11:20:45,231 - root - INFO - lr: 2.6411e-05 gnorm: 1.04 [12:46:35<11:46:54] +[titan] 2025-10-05 11:20:56,074 - root - INFO - step: 20815 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:20:56,074 - root - INFO - lr: 2.6402e-05 gnorm: 1.08 [12:46:45<11:46:43] +[titan] 2025-10-05 11:21:06,980 - root - INFO - step: 20820 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:21:06,980 - root - INFO - lr: 2.6393e-05 gnorm: 1.07 [12:46:56<11:46:32] +[titan] 2025-10-05 11:21:17,884 - root - INFO - step: 20825 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 11:21:17,884 - root - INFO - lr: 2.6384e-05 gnorm: 1.08 [12:47:07<11:46:20] +[titan] 2025-10-05 11:21:28,741 - root - INFO - step: 20830 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:21:28,741 - root - INFO - lr: 2.6375e-05 gnorm: 1.08 [12:47:18<11:46:09] +[titan] 2025-10-05 11:21:39,613 - root - INFO - step: 20835 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8881 +[titan] 2025-10-05 11:21:39,613 - root - INFO - lr: 2.6366e-05 gnorm: 1.07 [12:47:29<11:45:58] +[titan] 2025-10-05 11:21:50,471 - root - INFO - step: 20840 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 11:21:50,471 - root - INFO - lr: 2.6357e-05 gnorm: 1.03 [12:47:40<11:45:47] +[titan] 2025-10-05 11:22:01,325 - root - INFO - step: 20845 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:22:01,325 - root - INFO - lr: 2.6348e-05 gnorm: 1.07 [12:47:51<11:45:35] +[titan] 2025-10-05 11:22:10,042 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:22:12,228 - root - INFO - step: 20850 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:22:12,228 - root - INFO - lr: 2.6339e-05 gnorm: 1.01 [12:48:02<11:45:24] +[titan] 2025-10-05 11:22:23,145 - root - INFO - step: 20855 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 11:22:23,145 - root - INFO - lr: 2.6330e-05 gnorm: 1.08 [12:48:13<11:45:13] +[titan] 2025-10-05 11:22:33,976 - root - INFO - step: 20860 loss: 2.1509 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:22:33,976 - root - INFO - lr: 2.6322e-05 gnorm: 1.08 [12:48:23<11:45:02] +[titan] 2025-10-05 11:22:44,818 - root - INFO - step: 20865 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 11:22:44,819 - root - INFO - lr: 2.6313e-05 gnorm: 1.08 [12:48:34<11:44:51] +[titan] 2025-10-05 11:22:55,670 - root - INFO - step: 20870 loss: 2.1029 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 11:22:55,671 - root - INFO - lr: 2.6304e-05 gnorm: 1.04 [12:48:45<11:44:39] +[titan] 2025-10-05 11:23:06,495 - root - INFO - step: 20875 loss: 2.1668 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 11:23:06,495 - root - INFO - lr: 2.6295e-05 gnorm: 1.03 [12:48:56<11:44:28] +[titan] 2025-10-05 11:23:17,426 - root - INFO - step: 20880 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 11:23:17,426 - root - INFO - lr: 2.6286e-05 gnorm: 1.06 [12:49:07<11:44:17] +[titan] 2025-10-05 11:23:28,304 - root - INFO - step: 20885 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:23:28,305 - root - INFO - lr: 2.6277e-05 gnorm: 1.02 [12:49:18<11:44:06] +[titan] 2025-10-05 11:23:39,146 - root - INFO - step: 20890 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 11:23:39,147 - root - INFO - lr: 2.6268e-05 gnorm: 1.04 [12:49:29<11:43:55] +[titan] 2025-10-05 11:23:50,019 - root - INFO - step: 20895 loss: 2.1373 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:23:50,019 - root - INFO - lr: 2.6259e-05 gnorm: 1.05 [12:49:39<11:43:43] +[titan] 2025-10-05 11:23:58,682 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:24:00,862 - root - INFO - step: 20900 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 11:24:00,862 - root - INFO - lr: 2.6250e-05 gnorm: 1.08 [12:49:50<11:43:32] +[titan] 2025-10-05 11:24:11,693 - root - INFO - step: 20905 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8801 +[titan] 2025-10-05 11:24:11,693 - root - INFO - lr: 2.6241e-05 gnorm: 1.09 [12:50:01<11:43:21] +[titan] 2025-10-05 11:24:22,592 - root - INFO - step: 20910 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8505 +[titan] 2025-10-05 11:24:22,592 - root - INFO - lr: 2.6232e-05 gnorm: 1.06 [12:50:12<11:43:10] +[titan] 2025-10-05 11:24:33,463 - root - INFO - step: 20915 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 11:24:33,463 - root - INFO - lr: 2.6224e-05 gnorm: 1.05 [12:50:23<11:42:58] +[titan] 2025-10-05 11:24:44,313 - root - INFO - step: 20920 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:24:44,314 - root - INFO - lr: 2.6215e-05 gnorm: 1.05 [12:50:34<11:42:47] +[titan] 2025-10-05 11:24:55,176 - root - INFO - step: 20925 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8450 +[titan] 2025-10-05 11:24:55,176 - root - INFO - lr: 2.6206e-05 gnorm: 1.05 [12:50:45<11:42:36] +[titan] 2025-10-05 11:25:06,030 - root - INFO - step: 20930 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8992 +[titan] 2025-10-05 11:25:06,030 - root - INFO - lr: 2.6197e-05 gnorm: 1.10 [12:50:55<11:42:25] +[titan] 2025-10-05 11:25:16,898 - root - INFO - step: 20935 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8741 +[titan] 2025-10-05 11:25:16,898 - root - INFO - lr: 2.6188e-05 gnorm: 1.05 [12:51:06<11:42:14] +[titan] 2025-10-05 11:25:27,781 - root - INFO - step: 20940 loss: 2.1440 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:25:27,781 - root - INFO - lr: 2.6179e-05 gnorm: 1.04 [12:51:17<11:42:02] +[titan] 2025-10-05 11:25:38,668 - root - INFO - step: 20945 loss: 2.1635 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 11:25:38,668 - root - INFO - lr: 2.6170e-05 gnorm: 1.04 [12:51:28<11:41:51] +[titan] 2025-10-05 11:25:47,372 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:25:49,579 - root - INFO - step: 20950 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8588 +[titan] 2025-10-05 11:25:49,579 - root - INFO - lr: 2.6161e-05 gnorm: 1.02 [12:51:39<11:41:40] +[titan] 2025-10-05 11:26:00,466 - root - INFO - step: 20955 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9476 +[titan] 2025-10-05 11:26:00,466 - root - INFO - lr: 2.6152e-05 gnorm: 1.08 [12:51:50<11:41:29] +[titan] 2025-10-05 11:26:11,358 - root - INFO - step: 20960 loss: 2.1680 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 11:26:11,358 - root - INFO - lr: 2.6143e-05 gnorm: 1.07 [12:52:01<11:41:18] +[titan] 2025-10-05 11:26:22,285 - root - INFO - step: 20965 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 11:26:22,285 - root - INFO - lr: 2.6134e-05 gnorm: 1.03 [12:52:12<11:41:06] +[titan] 2025-10-05 11:26:33,153 - root - INFO - step: 20970 loss: 2.0712 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 11:26:33,153 - root - INFO - lr: 2.6126e-05 gnorm: 1.04 [12:52:23<11:40:55] +[titan] 2025-10-05 11:26:44,020 - root - INFO - step: 20975 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:26:44,021 - root - INFO - lr: 2.6117e-05 gnorm: 1.09 [12:52:33<11:40:44] +[titan] 2025-10-05 11:26:54,991 - root - INFO - step: 20980 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 11:26:54,992 - root - INFO - lr: 2.6108e-05 gnorm: 1.07 [12:52:44<11:40:33] +[titan] 2025-10-05 11:27:05,851 - root - INFO - step: 20985 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8877 +[titan] 2025-10-05 11:27:05,851 - root - INFO - lr: 2.6099e-05 gnorm: 1.11 [12:52:55<11:40:22] +[titan] 2025-10-05 11:27:16,808 - root - INFO - step: 20990 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:27:16,809 - root - INFO - lr: 2.6090e-05 gnorm: 1.08 [12:53:06<11:40:10] +[titan] 2025-10-05 11:27:21,388 - root - INFO - Dumping profiler traces at step 20992 +[titan] 2025-10-05 11:27:21,423 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:27:27,954 - root - INFO - step: 20995 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,401 tflops: 407.90 mfu: 41.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 11:27:27,954 - root - INFO - lr: 2.6081e-05 gnorm: 1.05 [12:53:17<11:40:00] +[titan] 2025-10-05 11:27:36,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:27:38,817 - root - INFO - step: 21000 loss: 2.1220 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8750 +[titan] 2025-10-05 11:27:38,817 - root - INFO - lr: 2.6072e-05 gnorm: 1.05 [12:53:28<11:39:48] +[titan] 2025-10-05 11:27:49,677 - root - INFO - step: 21005 loss: 2.1703 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:27:49,677 - root - INFO - lr: 2.6063e-05 gnorm: 1.10 [12:53:39<11:39:37] +[titan] 2025-10-05 11:28:00,541 - root - INFO - step: 21010 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 11:28:00,541 - root - INFO - lr: 2.6054e-05 gnorm: 1.05 [12:53:50<11:39:26] +[titan] 2025-10-05 11:28:11,383 - root - INFO - step: 21015 loss: 2.1081 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8636 +[titan] 2025-10-05 11:28:11,384 - root - INFO - lr: 2.6045e-05 gnorm: 1.04 [12:54:01<11:39:15] +[titan] 2025-10-05 11:28:22,286 - root - INFO - step: 21020 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:28:22,286 - root - INFO - lr: 2.6036e-05 gnorm: 1.10 [12:54:12<11:39:03] +[titan] 2025-10-05 11:28:33,136 - root - INFO - step: 21025 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 11:28:33,136 - root - INFO - lr: 2.6028e-05 gnorm: 1.07 [12:54:22<11:38:52] +[titan] 2025-10-05 11:28:43,995 - root - INFO - step: 21030 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8395 +[titan] 2025-10-05 11:28:43,995 - root - INFO - lr: 2.6019e-05 gnorm: 1.06 [12:54:33<11:38:41] +[titan] 2025-10-05 11:28:54,868 - root - INFO - step: 21035 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8811 +[titan] 2025-10-05 11:28:54,868 - root - INFO - lr: 2.6010e-05 gnorm: 1.09 [12:54:44<11:38:30] +[titan] 2025-10-05 11:29:05,770 - root - INFO - step: 21040 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 11:29:05,770 - root - INFO - lr: 2.6001e-05 gnorm: 1.06 [12:54:55<11:38:19] +[titan] 2025-10-05 11:29:16,625 - root - INFO - step: 21045 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:29:16,625 - root - INFO - lr: 2.5992e-05 gnorm: 1.04 [12:55:06<11:38:07] +[titan] 2025-10-05 11:29:25,365 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:29:27,546 - root - INFO - step: 21050 loss: 2.1350 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:29:27,546 - root - INFO - lr: 2.5983e-05 gnorm: 1.09 [12:55:17<11:37:56] +[titan] 2025-10-05 11:29:38,415 - root - INFO - step: 21055 loss: 2.0977 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8537 +[titan] 2025-10-05 11:29:38,415 - root - INFO - lr: 2.5974e-05 gnorm: 1.05 [12:55:28<11:37:45] +[titan] 2025-10-05 11:29:49,289 - root - INFO - step: 21060 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 11:29:49,289 - root - INFO - lr: 2.5965e-05 gnorm: 1.09 [12:55:39<11:37:34] +[titan] 2025-10-05 11:30:00,149 - root - INFO - step: 21065 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 11:30:00,149 - root - INFO - lr: 2.5956e-05 gnorm: 1.09 [12:55:49<11:37:23] +[titan] 2025-10-05 11:30:11,032 - root - INFO - step: 21070 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:30:11,032 - root - INFO - lr: 2.5947e-05 gnorm: 1.08 [12:56:00<11:37:11] +[titan] 2025-10-05 11:30:21,932 - root - INFO - step: 21075 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 11:30:21,933 - root - INFO - lr: 2.5939e-05 gnorm: 1.07 [12:56:11<11:37:00] +[titan] 2025-10-05 11:30:32,855 - root - INFO - step: 21080 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8856 +[titan] 2025-10-05 11:30:32,855 - root - INFO - lr: 2.5930e-05 gnorm: 1.07 [12:56:22<11:36:49] +[titan] 2025-10-05 11:30:43,698 - root - INFO - step: 21085 loss: 2.1181 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:30:43,699 - root - INFO - lr: 2.5921e-05 gnorm: 1.11 [12:56:33<11:36:38] +[titan] 2025-10-05 11:30:54,563 - root - INFO - step: 21090 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 11:30:54,563 - root - INFO - lr: 2.5912e-05 gnorm: 1.03 [12:56:44<11:36:27] +[titan] 2025-10-05 11:31:05,426 - root - INFO - step: 21095 loss: 2.2239 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9636 +[titan] 2025-10-05 11:31:05,427 - root - INFO - lr: 2.5903e-05 gnorm: 1.06 [12:56:55<11:36:15] +[titan] 2025-10-05 11:31:14,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:31:16,304 - root - INFO - step: 21100 loss: 2.0959 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 11:31:16,304 - root - INFO - lr: 2.5894e-05 gnorm: 1.03 [12:57:06<11:36:04] +[titan] 2025-10-05 11:31:27,255 - root - INFO - step: 21105 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 11:31:27,256 - root - INFO - lr: 2.5885e-05 gnorm: 1.07 [12:57:17<11:35:53] +[titan] 2025-10-05 11:31:38,131 - root - INFO - step: 21110 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8621 +[titan] 2025-10-05 11:31:38,132 - root - INFO - lr: 2.5876e-05 gnorm: 1.06 [12:57:27<11:35:42] +[titan] 2025-10-05 11:31:49,004 - root - INFO - step: 21115 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:31:49,004 - root - INFO - lr: 2.5867e-05 gnorm: 1.07 [12:57:38<11:35:31] +[titan] 2025-10-05 11:31:59,893 - root - INFO - step: 21120 loss: 2.0727 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8315 +[titan] 2025-10-05 11:31:59,893 - root - INFO - lr: 2.5858e-05 gnorm: 1.07 [12:57:49<11:35:19] +[titan] 2025-10-05 11:32:10,768 - root - INFO - step: 21125 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 11:32:10,769 - root - INFO - lr: 2.5850e-05 gnorm: 1.07 [12:58:00<11:35:08] +[titan] 2025-10-05 11:32:21,633 - root - INFO - step: 21130 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8324 +[titan] 2025-10-05 11:32:21,633 - root - INFO - lr: 2.5841e-05 gnorm: 1.05 [12:58:11<11:34:57] +[titan] 2025-10-05 11:32:32,656 - root - INFO - step: 21135 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 29,729 tflops: 412.45 mfu: 41.70% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 11:32:32,656 - root - INFO - lr: 2.5832e-05 gnorm: 1.08 [12:58:22<11:34:46] +[titan] 2025-10-05 11:32:43,550 - root - INFO - step: 21140 loss: 2.1392 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 11:32:43,550 - root - INFO - lr: 2.5823e-05 gnorm: 1.07 [12:58:33<11:34:35] +[titan] 2025-10-05 11:32:54,408 - root - INFO - step: 21145 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 11:32:54,408 - root - INFO - lr: 2.5814e-05 gnorm: 1.06 [12:58:44<11:34:24] +[titan] 2025-10-05 11:33:03,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:33:05,258 - root - INFO - step: 21150 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8836 +[titan] 2025-10-05 11:33:05,258 - root - INFO - lr: 2.5805e-05 gnorm: 1.09 [12:58:55<11:34:12] +[titan] 2025-10-05 11:33:16,124 - root - INFO - step: 21155 loss: 2.1477 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 11:33:16,124 - root - INFO - lr: 2.5796e-05 gnorm: 1.07 [12:59:05<11:34:01] +[titan] 2025-10-05 11:33:27,050 - root - INFO - step: 21160 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 11:33:27,050 - root - INFO - lr: 2.5787e-05 gnorm: 1.06 [12:59:16<11:33:50] +[titan] 2025-10-05 11:33:37,906 - root - INFO - step: 21165 loss: 2.1021 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 11:33:37,906 - root - INFO - lr: 2.5778e-05 gnorm: 1.06 [12:59:27<11:33:39] +[titan] 2025-10-05 11:33:48,805 - root - INFO - step: 21170 loss: 2.1153 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8695 +[titan] 2025-10-05 11:33:48,805 - root - INFO - lr: 2.5769e-05 gnorm: 1.10 [12:59:38<11:33:28] +[titan] 2025-10-05 11:33:59,670 - root - INFO - step: 21175 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 11:33:59,670 - root - INFO - lr: 2.5761e-05 gnorm: 1.05 [12:59:49<11:33:16] +[titan] 2025-10-05 11:34:10,542 - root - INFO - step: 21180 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8645 +[titan] 2025-10-05 11:34:10,542 - root - INFO - lr: 2.5752e-05 gnorm: 1.07 [13:00:00<11:33:05] +[titan] 2025-10-05 11:34:21,425 - root - INFO - step: 21185 loss: 2.0963 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8530 +[titan] 2025-10-05 11:34:21,425 - root - INFO - lr: 2.5743e-05 gnorm: 1.01 [13:00:11<11:32:54] +[titan] 2025-10-05 11:34:32,352 - root - INFO - step: 21190 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:34:32,352 - root - INFO - lr: 2.5734e-05 gnorm: 1.08 [13:00:22<11:32:43] +[titan] 2025-10-05 11:34:43,216 - root - INFO - step: 21195 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 11:34:43,216 - root - INFO - lr: 2.5725e-05 gnorm: 1.04 [13:00:33<11:32:32] +[titan] 2025-10-05 11:34:51,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:34:54,111 - root - INFO - step: 21200 loss: 2.0921 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 11:34:54,111 - root - INFO - lr: 2.5716e-05 gnorm: 1.07 [13:00:43<11:32:20] +[titan] 2025-10-05 11:35:04,964 - root - INFO - step: 21205 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 11:35:04,964 - root - INFO - lr: 2.5707e-05 gnorm: 1.09 [13:00:54<11:32:09] +[titan] 2025-10-05 11:35:15,826 - root - INFO - step: 21210 loss: 2.1528 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 11:35:15,827 - root - INFO - lr: 2.5698e-05 gnorm: 1.09 [13:01:05<11:31:58] +[titan] 2025-10-05 11:35:26,686 - root - INFO - step: 21215 loss: 2.1911 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:35:26,686 - root - INFO - lr: 2.5689e-05 gnorm: 1.11 [13:01:16<11:31:47] +[titan] 2025-10-05 11:35:37,615 - root - INFO - step: 21220 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 11:35:37,615 - root - INFO - lr: 2.5680e-05 gnorm: 1.08 [13:01:27<11:31:36] +[titan] 2025-10-05 11:35:48,489 - root - INFO - step: 21225 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 11:35:48,489 - root - INFO - lr: 2.5672e-05 gnorm: 1.11 [13:01:38<11:31:24] +[titan] 2025-10-05 11:35:59,356 - root - INFO - step: 21230 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:35:59,356 - root - INFO - lr: 2.5663e-05 gnorm: 1.08 [13:01:49<11:31:13] +[titan] 2025-10-05 11:36:10,239 - root - INFO - step: 21235 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8980 +[titan] 2025-10-05 11:36:10,239 - root - INFO - lr: 2.5654e-05 gnorm: 1.09 [13:02:00<11:31:02] +[titan] 2025-10-05 11:36:21,092 - root - INFO - step: 21240 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:36:21,092 - root - INFO - lr: 2.5645e-05 gnorm: 1.05 [13:02:10<11:30:51] +[titan] 2025-10-05 11:36:32,021 - root - INFO - step: 21245 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 11:36:32,021 - root - INFO - lr: 2.5636e-05 gnorm: 1.09 [13:02:21<11:30:40] +[titan] 2025-10-05 11:36:40,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:36:42,889 - root - INFO - step: 21250 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 11:36:42,889 - root - INFO - lr: 2.5627e-05 gnorm: 1.07 [13:02:32<11:30:28] +[titan] 2025-10-05 11:36:53,745 - root - INFO - step: 21255 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 11:36:53,746 - root - INFO - lr: 2.5618e-05 gnorm: 1.05 [13:02:43<11:30:17] +[titan] 2025-10-05 11:37:04,622 - root - INFO - step: 21260 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 11:37:04,622 - root - INFO - lr: 2.5609e-05 gnorm: 1.08 [13:02:54<11:30:06] +[titan] 2025-10-05 11:37:15,535 - root - INFO - step: 21265 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 11:37:15,536 - root - INFO - lr: 2.5600e-05 gnorm: 1.08 [13:03:05<11:29:55] +[titan] 2025-10-05 11:37:26,391 - root - INFO - step: 21270 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 11:37:26,392 - root - INFO - lr: 2.5592e-05 gnorm: 1.05 [13:03:16<11:29:44] +[titan] 2025-10-05 11:37:37,276 - root - INFO - step: 21275 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 11:37:37,276 - root - INFO - lr: 2.5583e-05 gnorm: 1.07 [13:03:27<11:29:32] +[titan] 2025-10-05 11:37:48,150 - root - INFO - step: 21280 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8442 +[titan] 2025-10-05 11:37:48,150 - root - INFO - lr: 2.5574e-05 gnorm: 1.05 [13:03:37<11:29:21] +[titan] 2025-10-05 11:37:59,010 - root - INFO - step: 21285 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 11:37:59,010 - root - INFO - lr: 2.5565e-05 gnorm: 1.07 [13:03:48<11:29:10] +[titan] 2025-10-05 11:38:09,872 - root - INFO - step: 21290 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8545 +[titan] 2025-10-05 11:38:09,872 - root - INFO - lr: 2.5556e-05 gnorm: 1.10 [13:03:59<11:28:59] +[titan] 2025-10-05 11:38:20,741 - root - INFO - step: 21295 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 11:38:20,741 - root - INFO - lr: 2.5547e-05 gnorm: 1.12 [13:04:10<11:28:48] +[titan] 2025-10-05 11:38:29,453 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:38:31,672 - root - INFO - step: 21300 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 11:38:31,672 - root - INFO - lr: 2.5538e-05 gnorm: 1.05 [13:04:21<11:28:36] +[titan] 2025-10-05 11:38:42,540 - root - INFO - step: 21305 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:38:42,540 - root - INFO - lr: 2.5529e-05 gnorm: 1.08 [13:04:32<11:28:25] +[titan] 2025-10-05 11:38:53,411 - root - INFO - step: 21310 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 11:38:53,411 - root - INFO - lr: 2.5520e-05 gnorm: 1.07 [13:04:43<11:28:14] +[titan] 2025-10-05 11:39:04,301 - root - INFO - step: 21315 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8790 +[titan] 2025-10-05 11:39:04,301 - root - INFO - lr: 2.5511e-05 gnorm: 1.10 [13:04:54<11:28:03] +[titan] 2025-10-05 11:39:15,170 - root - INFO - step: 21320 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8772 +[titan] 2025-10-05 11:39:15,170 - root - INFO - lr: 2.5503e-05 gnorm: 1.06 [13:05:04<11:27:52] +[titan] 2025-10-05 11:39:26,035 - root - INFO - step: 21325 loss: 2.1518 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:39:26,035 - root - INFO - lr: 2.5494e-05 gnorm: 1.08 [13:05:15<11:27:40] +[titan] 2025-10-05 11:39:36,994 - root - INFO - step: 21330 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8956 +[titan] 2025-10-05 11:39:36,994 - root - INFO - lr: 2.5485e-05 gnorm: 1.06 [13:05:26<11:27:29] +[titan] 2025-10-05 11:39:47,849 - root - INFO - step: 21335 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 11:39:47,849 - root - INFO - lr: 2.5476e-05 gnorm: 1.03 [13:05:37<11:27:18] +[titan] 2025-10-05 11:39:58,709 - root - INFO - step: 21340 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 11:39:58,709 - root - INFO - lr: 2.5467e-05 gnorm: 1.07 [13:05:48<11:27:07] +[titan] 2025-10-05 11:40:09,576 - root - INFO - step: 21345 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 11:40:09,576 - root - INFO - lr: 2.5458e-05 gnorm: 1.05 [13:05:59<11:26:56] +[titan] 2025-10-05 11:40:18,258 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:40:20,450 - root - INFO - step: 21350 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9393 +[titan] 2025-10-05 11:40:20,450 - root - INFO - lr: 2.5449e-05 gnorm: 1.09 [13:06:10<11:26:44] +[titan] 2025-10-05 11:40:31,323 - root - INFO - step: 21355 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 11:40:31,324 - root - INFO - lr: 2.5440e-05 gnorm: 1.11 [13:06:21<11:26:33] +[titan] 2025-10-05 11:40:42,303 - root - INFO - step: 21360 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 29,846 tflops: 414.07 mfu: 41.87% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:40:42,303 - root - INFO - lr: 2.5431e-05 gnorm: 1.09 [13:06:32<11:26:22] +[titan] 2025-10-05 11:40:53,190 - root - INFO - step: 21365 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 11:40:53,190 - root - INFO - lr: 2.5423e-05 gnorm: 1.04 [13:06:43<11:26:11] +[titan] 2025-10-05 11:41:04,057 - root - INFO - step: 21370 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8252 +[titan] 2025-10-05 11:41:04,057 - root - INFO - lr: 2.5414e-05 gnorm: 1.06 [13:06:53<11:26:00] +[titan] 2025-10-05 11:41:14,914 - root - INFO - step: 21375 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 11:41:14,914 - root - INFO - lr: 2.5405e-05 gnorm: 1.05 [13:07:04<11:25:49] +[titan] 2025-10-05 11:41:25,788 - root - INFO - step: 21380 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 11:41:25,788 - root - INFO - lr: 2.5396e-05 gnorm: 1.08 [13:07:15<11:25:37] +[titan] 2025-10-05 11:41:36,680 - root - INFO - step: 21385 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8705 +[titan] 2025-10-05 11:41:36,680 - root - INFO - lr: 2.5387e-05 gnorm: 1.06 [13:07:26<11:25:26] +[titan] 2025-10-05 11:41:47,564 - root - INFO - step: 21390 loss: 2.0660 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 11:41:47,564 - root - INFO - lr: 2.5378e-05 gnorm: 1.06 [13:07:37<11:25:15] +[titan] 2025-10-05 11:41:58,477 - root - INFO - step: 21395 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:41:58,477 - root - INFO - lr: 2.5369e-05 gnorm: 1.05 [13:07:48<11:25:04] +[titan] 2025-10-05 11:42:07,157 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:42:09,346 - root - INFO - step: 21400 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 11:42:09,346 - root - INFO - lr: 2.5360e-05 gnorm: 1.06 [13:07:59<11:24:53] +[titan] 2025-10-05 11:42:20,225 - root - INFO - step: 21405 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:42:20,226 - root - INFO - lr: 2.5352e-05 gnorm: 1.09 [13:08:10<11:24:41] +[titan] 2025-10-05 11:42:31,111 - root - INFO - step: 21410 loss: 2.1240 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:42:31,112 - root - INFO - lr: 2.5343e-05 gnorm: 1.12 [13:08:20<11:24:30] +[titan] 2025-10-05 11:42:42,010 - root - INFO - step: 21415 loss: 2.0961 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8538 +[titan] 2025-10-05 11:42:42,011 - root - INFO - lr: 2.5334e-05 gnorm: 1.06 [13:08:31<11:24:19] +[titan] 2025-10-05 11:42:52,881 - root - INFO - step: 21420 loss: 2.1163 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 11:42:52,881 - root - INFO - lr: 2.5325e-05 gnorm: 1.06 [13:08:42<11:24:08] +[titan] 2025-10-05 11:43:03,753 - root - INFO - step: 21425 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8953 +[titan] 2025-10-05 11:43:03,753 - root - INFO - lr: 2.5316e-05 gnorm: 1.05 [13:08:53<11:23:57] +[titan] 2025-10-05 11:43:14,617 - root - INFO - step: 21430 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8631 +[titan] 2025-10-05 11:43:14,617 - root - INFO - lr: 2.5307e-05 gnorm: 1.07 [13:09:04<11:23:45] +[titan] 2025-10-05 11:43:25,474 - root - INFO - step: 21435 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 11:43:25,474 - root - INFO - lr: 2.5298e-05 gnorm: 1.04 [13:09:15<11:23:34] +[titan] 2025-10-05 11:43:36,449 - root - INFO - step: 21440 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.25 mfu: 41.89% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 11:43:36,449 - root - INFO - lr: 2.5289e-05 gnorm: 1.05 [13:09:26<11:23:23] +[titan] 2025-10-05 11:43:47,314 - root - INFO - step: 21445 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:43:47,314 - root - INFO - lr: 2.5280e-05 gnorm: 1.04 [13:09:37<11:23:12] +[titan] 2025-10-05 11:43:56,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:43:58,209 - root - INFO - step: 21450 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 11:43:58,209 - root - INFO - lr: 2.5272e-05 gnorm: 1.06 [13:09:48<11:23:01] +[titan] 2025-10-05 11:44:09,061 - root - INFO - step: 21455 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 11:44:09,061 - root - INFO - lr: 2.5263e-05 gnorm: 1.08 [13:09:58<11:22:50] +[titan] 2025-10-05 11:44:19,965 - root - INFO - step: 21460 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9120 +[titan] 2025-10-05 11:44:19,966 - root - INFO - lr: 2.5254e-05 gnorm: 1.07 [13:10:09<11:22:38] +[titan] 2025-10-05 11:44:30,808 - root - INFO - step: 21465 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:44:30,808 - root - INFO - lr: 2.5245e-05 gnorm: 1.04 [13:10:20<11:22:27] +[titan] 2025-10-05 11:44:41,706 - root - INFO - step: 21470 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:44:41,706 - root - INFO - lr: 2.5236e-05 gnorm: 1.08 [13:10:31<11:22:16] +[titan] 2025-10-05 11:44:52,552 - root - INFO - step: 21475 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8376 +[titan] 2025-10-05 11:44:52,552 - root - INFO - lr: 2.5227e-05 gnorm: 1.04 [13:10:42<11:22:05] +[titan] 2025-10-05 11:45:03,391 - root - INFO - step: 21480 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:45:03,391 - root - INFO - lr: 2.5218e-05 gnorm: 1.07 [13:10:53<11:21:54] +[titan] 2025-10-05 11:45:14,218 - root - INFO - step: 21485 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 11:45:14,218 - root - INFO - lr: 2.5209e-05 gnorm: 1.09 [13:11:04<11:21:42] +[titan] 2025-10-05 11:45:25,127 - root - INFO - step: 21490 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 11:45:25,127 - root - INFO - lr: 2.5201e-05 gnorm: 1.06 [13:11:14<11:21:31] +[titan] 2025-10-05 11:45:35,950 - root - INFO - step: 21495 loss: 2.1076 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 11:45:35,951 - root - INFO - lr: 2.5192e-05 gnorm: 1.05 [13:11:25<11:21:20] +[titan] 2025-10-05 11:45:44,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:45:46,815 - root - INFO - step: 21500 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9073 +[titan] 2025-10-05 11:45:46,815 - root - INFO - lr: 2.5183e-05 gnorm: 1.08 [13:11:36<11:21:09] +[titan] 2025-10-05 11:45:55,763 - root - INFO - Dumping profiler traces at step 21504 +[titan] 2025-10-05 11:45:55,804 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:45:57,977 - root - INFO - step: 21505 loss: 2.1378 memory: 118.84GiB(85.28%) tps: 29,357 tflops: 407.29 mfu: 41.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8896 +[titan] 2025-10-05 11:45:57,977 - root - INFO - lr: 2.5174e-05 gnorm: 1.10 [13:11:47<11:20:58] +[titan] 2025-10-05 11:46:08,810 - root - INFO - step: 21510 loss: 2.1100 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:46:08,810 - root - INFO - lr: 2.5165e-05 gnorm: 1.08 [13:11:58<11:20:47] +[titan] 2025-10-05 11:46:19,644 - root - INFO - step: 21515 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:46:19,645 - root - INFO - lr: 2.5156e-05 gnorm: 1.05 [13:12:09<11:20:35] +[titan] 2025-10-05 11:46:30,518 - root - INFO - step: 21520 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 11:46:30,518 - root - INFO - lr: 2.5147e-05 gnorm: 1.08 [13:12:20<11:20:24] +[titan] 2025-10-05 11:46:41,409 - root - INFO - step: 21525 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 11:46:41,409 - root - INFO - lr: 2.5138e-05 gnorm: 1.08 [13:12:31<11:20:13] +[titan] 2025-10-05 11:46:52,228 - root - INFO - step: 21530 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 11:46:52,228 - root - INFO - lr: 2.5130e-05 gnorm: 1.06 [13:12:42<11:20:02] +[titan] 2025-10-05 11:47:03,059 - root - INFO - step: 21535 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 11:47:03,059 - root - INFO - lr: 2.5121e-05 gnorm: 1.03 [13:12:52<11:19:50] +[titan] 2025-10-05 11:47:13,907 - root - INFO - step: 21540 loss: 2.1549 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 11:47:13,907 - root - INFO - lr: 2.5112e-05 gnorm: 1.09 [13:13:03<11:19:39] +[titan] 2025-10-05 11:47:24,716 - root - INFO - step: 21545 loss: 2.1223 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 11:47:24,716 - root - INFO - lr: 2.5103e-05 gnorm: 1.07 [13:13:14<11:19:28] +[titan] 2025-10-05 11:47:33,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:47:35,549 - root - INFO - step: 21550 loss: 2.1493 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8994 +[titan] 2025-10-05 11:47:35,549 - root - INFO - lr: 2.5094e-05 gnorm: 1.05 [13:13:25<11:19:17] +[titan] 2025-10-05 11:47:46,489 - root - INFO - step: 21555 loss: 2.0469 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 11:47:46,490 - root - INFO - lr: 2.5085e-05 gnorm: 1.04 [13:13:36<11:19:06] +[titan] 2025-10-05 11:47:57,291 - root - INFO - step: 21560 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:47:57,291 - root - INFO - lr: 2.5076e-05 gnorm: 1.08 [13:13:47<11:18:54] +[titan] 2025-10-05 11:48:08,089 - root - INFO - step: 21565 loss: 2.0826 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 11:48:08,090 - root - INFO - lr: 2.5067e-05 gnorm: 1.06 [13:13:57<11:18:43] +[titan] 2025-10-05 11:48:18,889 - root - INFO - step: 21570 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:48:18,889 - root - INFO - lr: 2.5059e-05 gnorm: 1.09 [13:14:08<11:18:32] +[titan] 2025-10-05 11:48:29,708 - root - INFO - step: 21575 loss: 2.1425 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:48:29,709 - root - INFO - lr: 2.5050e-05 gnorm: 1.06 [13:14:19<11:18:21] +[titan] 2025-10-05 11:48:40,539 - root - INFO - step: 21580 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.76 mfu: 42.44% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 11:48:40,539 - root - INFO - lr: 2.5041e-05 gnorm: 1.11 [13:14:30<11:18:09] +[titan] 2025-10-05 11:48:51,410 - root - INFO - step: 21585 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:48:51,410 - root - INFO - lr: 2.5032e-05 gnorm: 1.06 [13:14:41<11:17:58] +[titan] 2025-10-05 11:49:02,256 - root - INFO - step: 21590 loss: 2.1780 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9253 +[titan] 2025-10-05 11:49:02,256 - root - INFO - lr: 2.5023e-05 gnorm: 1.12 [13:14:52<11:17:47] +[titan] 2025-10-05 11:49:13,089 - root - INFO - step: 21595 loss: 2.1172 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 11:49:13,089 - root - INFO - lr: 2.5014e-05 gnorm: 1.10 [13:15:02<11:17:36] +[titan] 2025-10-05 11:49:21,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:49:23,936 - root - INFO - step: 21600 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 11:49:23,936 - root - INFO - lr: 2.5005e-05 gnorm: 1.09 [13:15:13<11:17:25] +[titan] 2025-10-05 11:49:34,750 - root - INFO - step: 21605 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8838 +[titan] 2025-10-05 11:49:34,751 - root - INFO - lr: 2.4996e-05 gnorm: 1.08 [13:15:24<11:17:13] +[titan] 2025-10-05 11:49:45,562 - root - INFO - step: 21610 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8037 +[titan] 2025-10-05 11:49:45,563 - root - INFO - lr: 2.4988e-05 gnorm: 1.02 [13:15:35<11:17:02] +[titan] 2025-10-05 11:49:56,369 - root - INFO - step: 21615 loss: 2.1371 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8883 +[titan] 2025-10-05 11:49:56,370 - root - INFO - lr: 2.4979e-05 gnorm: 1.04 [13:15:46<11:16:51] +[titan] 2025-10-05 11:50:07,237 - root - INFO - step: 21620 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:50:07,237 - root - INFO - lr: 2.4970e-05 gnorm: 1.05 [13:15:57<11:16:40] +[titan] 2025-10-05 11:50:18,053 - root - INFO - step: 21625 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8344 +[titan] 2025-10-05 11:50:18,053 - root - INFO - lr: 2.4961e-05 gnorm: 1.06 [13:16:07<11:16:28] +[titan] 2025-10-05 11:50:28,850 - root - INFO - step: 21630 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.05 mfu: 42.57% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:50:28,851 - root - INFO - lr: 2.4952e-05 gnorm: 1.04 [13:16:18<11:16:17] +[titan] 2025-10-05 11:50:39,656 - root - INFO - step: 21635 loss: 2.0898 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:50:39,656 - root - INFO - lr: 2.4943e-05 gnorm: 1.09 [13:16:29<11:16:06] +[titan] 2025-10-05 11:50:50,529 - root - INFO - step: 21640 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 11:50:50,529 - root - INFO - lr: 2.4934e-05 gnorm: 1.06 [13:16:40<11:15:55] +[titan] 2025-10-05 11:51:01,328 - root - INFO - step: 21645 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.56% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:51:01,329 - root - INFO - lr: 2.4926e-05 gnorm: 1.04 [13:16:51<11:15:43] +[titan] 2025-10-05 11:51:09,997 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:51:12,168 - root - INFO - step: 21650 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 11:51:12,168 - root - INFO - lr: 2.4917e-05 gnorm: 1.07 [13:17:01<11:15:32] +[titan] 2025-10-05 11:51:23,012 - root - INFO - step: 21655 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 11:51:23,012 - root - INFO - lr: 2.4908e-05 gnorm: 1.06 [13:17:12<11:15:21] +[titan] 2025-10-05 11:51:33,829 - root - INFO - step: 21660 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 11:51:33,830 - root - INFO - lr: 2.4899e-05 gnorm: 1.06 [13:17:23<11:15:10] +[titan] 2025-10-05 11:51:44,687 - root - INFO - step: 21665 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 11:51:44,687 - root - INFO - lr: 2.4890e-05 gnorm: 1.03 [13:17:34<11:14:59] +[titan] 2025-10-05 11:51:55,529 - root - INFO - step: 21670 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 11:51:55,529 - root - INFO - lr: 2.4881e-05 gnorm: 1.04 [13:17:45<11:14:47] +[titan] 2025-10-05 11:52:06,368 - root - INFO - step: 21675 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 11:52:06,368 - root - INFO - lr: 2.4872e-05 gnorm: 1.05 [13:17:56<11:14:36] +[titan] 2025-10-05 11:52:17,248 - root - INFO - step: 21680 loss: 2.0964 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8526 +[titan] 2025-10-05 11:52:17,248 - root - INFO - lr: 2.4863e-05 gnorm: 1.08 [13:18:07<11:14:25] +[titan] 2025-10-05 11:52:28,077 - root - INFO - step: 21685 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 11:52:28,077 - root - INFO - lr: 2.4855e-05 gnorm: 1.04 [13:18:17<11:14:14] +[titan] 2025-10-05 11:52:38,897 - root - INFO - step: 21690 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 11:52:38,897 - root - INFO - lr: 2.4846e-05 gnorm: 1.12 [13:18:28<11:14:02] +[titan] 2025-10-05 11:52:49,731 - root - INFO - step: 21695 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:52:49,731 - root - INFO - lr: 2.4837e-05 gnorm: 1.14 [13:18:39<11:13:51] +[titan] 2025-10-05 11:52:58,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:53:00,558 - root - INFO - step: 21700 loss: 2.0942 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 11:53:00,558 - root - INFO - lr: 2.4828e-05 gnorm: 1.04 [13:18:50<11:13:40] +[titan] 2025-10-05 11:53:11,384 - root - INFO - step: 21705 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8616 +[titan] 2025-10-05 11:53:11,384 - root - INFO - lr: 2.4819e-05 gnorm: 1.01 [13:19:01<11:13:29] +[titan] 2025-10-05 11:53:22,180 - root - INFO - step: 21710 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,354 tflops: 421.11 mfu: 42.58% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 11:53:22,180 - root - INFO - lr: 2.4810e-05 gnorm: 1.08 [13:19:11<11:13:18] +[titan] 2025-10-05 11:53:33,006 - root - INFO - step: 21715 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 11:53:33,006 - root - INFO - lr: 2.4801e-05 gnorm: 1.07 [13:19:22<11:13:06] +[titan] 2025-10-05 11:53:43,863 - root - INFO - step: 21720 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8820 +[titan] 2025-10-05 11:53:43,864 - root - INFO - lr: 2.4793e-05 gnorm: 1.07 [13:19:33<11:12:55] +[titan] 2025-10-05 11:53:54,726 - root - INFO - step: 21725 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 11:53:54,726 - root - INFO - lr: 2.4784e-05 gnorm: 1.07 [13:19:44<11:12:44] +[titan] 2025-10-05 11:54:05,529 - root - INFO - step: 21730 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 11:54:05,529 - root - INFO - lr: 2.4775e-05 gnorm: 1.10 [13:19:55<11:12:33] +[titan] 2025-10-05 11:54:16,329 - root - INFO - step: 21735 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,341 tflops: 420.94 mfu: 42.56% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8624 +[titan] 2025-10-05 11:54:16,329 - root - INFO - lr: 2.4766e-05 gnorm: 1.08 [13:20:06<11:12:21] +[titan] 2025-10-05 11:54:27,148 - root - INFO - step: 21740 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.19 mfu: 42.49% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9108 +[titan] 2025-10-05 11:54:27,149 - root - INFO - lr: 2.4757e-05 gnorm: 1.08 [13:20:16<11:12:10] +[titan] 2025-10-05 11:54:38,009 - root - INFO - step: 21745 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 11:54:38,009 - root - INFO - lr: 2.4748e-05 gnorm: 1.09 [13:20:27<11:11:59] +[titan] 2025-10-05 11:54:46,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:54:48,886 - root - INFO - step: 21750 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8871 +[titan] 2025-10-05 11:54:48,886 - root - INFO - lr: 2.4739e-05 gnorm: 1.11 [13:20:38<11:11:48] +[titan] 2025-10-05 11:54:59,687 - root - INFO - step: 21755 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 11:54:59,688 - root - INFO - lr: 2.4731e-05 gnorm: 1.03 [13:20:49<11:11:37] +[titan] 2025-10-05 11:55:10,503 - root - INFO - step: 21760 loss: 2.0855 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:55:10,504 - root - INFO - lr: 2.4722e-05 gnorm: 1.08 [13:21:00<11:11:25] +[titan] 2025-10-05 11:55:21,303 - root - INFO - step: 21765 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 11:55:21,303 - root - INFO - lr: 2.4713e-05 gnorm: 1.06 [13:21:11<11:11:14] +[titan] 2025-10-05 11:55:32,128 - root - INFO - step: 21770 loss: 2.0394 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 11:55:32,129 - root - INFO - lr: 2.4704e-05 gnorm: 1.07 [13:21:21<11:11:03] +[titan] 2025-10-05 11:55:42,948 - root - INFO - step: 21775 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 11:55:42,949 - root - INFO - lr: 2.4695e-05 gnorm: 1.13 [13:21:32<11:10:52] +[titan] 2025-10-05 11:55:53,849 - root - INFO - step: 21780 loss: 2.1107 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 11:55:53,849 - root - INFO - lr: 2.4686e-05 gnorm: 1.06 [13:21:43<11:10:40] +[titan] 2025-10-05 11:56:04,670 - root - INFO - step: 21785 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 11:56:04,670 - root - INFO - lr: 2.4677e-05 gnorm: 1.11 [13:21:54<11:10:29] +[titan] 2025-10-05 11:56:15,465 - root - INFO - step: 21790 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,355 tflops: 421.13 mfu: 42.58% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 11:56:15,465 - root - INFO - lr: 2.4669e-05 gnorm: 1.08 [13:22:05<11:10:18] +[titan] 2025-10-05 11:56:26,269 - root - INFO - step: 21795 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 11:56:26,269 - root - INFO - lr: 2.4660e-05 gnorm: 1.04 [13:22:16<11:10:07] +[titan] 2025-10-05 11:56:34,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:56:37,050 - root - INFO - step: 21800 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 11:56:37,050 - root - INFO - lr: 2.4651e-05 gnorm: 1.03 [13:22:26<11:09:55] +[titan] 2025-10-05 11:56:47,849 - root - INFO - step: 21805 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8423 +[titan] 2025-10-05 11:56:47,849 - root - INFO - lr: 2.4642e-05 gnorm: 1.06 [13:22:37<11:09:44] +[titan] 2025-10-05 11:56:58,686 - root - INFO - step: 21810 loss: 2.0632 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 11:56:58,686 - root - INFO - lr: 2.4633e-05 gnorm: 1.08 [13:22:48<11:09:33] +[titan] 2025-10-05 11:57:09,468 - root - INFO - step: 21815 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,391 tflops: 421.64 mfu: 42.63% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 11:57:09,469 - root - INFO - lr: 2.4624e-05 gnorm: 1.04 [13:22:59<11:09:22] +[titan] 2025-10-05 11:57:20,268 - root - INFO - step: 21820 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,344 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 11:57:20,268 - root - INFO - lr: 2.4615e-05 gnorm: 1.06 [13:23:10<11:09:11] +[titan] 2025-10-05 11:57:31,069 - root - INFO - step: 21825 loss: 2.0588 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 11:57:31,069 - root - INFO - lr: 2.4607e-05 gnorm: 1.03 [13:23:20<11:08:59] +[titan] 2025-10-05 11:57:41,865 - root - INFO - step: 21830 loss: 2.1085 memory: 118.84GiB(85.28%) tps: 30,353 tflops: 421.10 mfu: 42.58% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 11:57:41,865 - root - INFO - lr: 2.4598e-05 gnorm: 1.03 [13:23:31<11:08:48] +[titan] 2025-10-05 11:57:52,686 - root - INFO - step: 21835 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:57:52,687 - root - INFO - lr: 2.4589e-05 gnorm: 1.03 [13:23:42<11:08:37] +[titan] 2025-10-05 11:58:03,531 - root - INFO - step: 21840 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:58:03,532 - root - INFO - lr: 2.4580e-05 gnorm: 1.05 [13:23:53<11:08:26] +[titan] 2025-10-05 11:58:14,308 - root - INFO - step: 21845 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,407 tflops: 421.85 mfu: 42.65% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9173 +[titan] 2025-10-05 11:58:14,309 - root - INFO - lr: 2.4571e-05 gnorm: 1.09 [13:24:04<11:08:14] +[titan] 2025-10-05 11:58:22,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:58:25,109 - root - INFO - step: 21850 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:58:25,110 - root - INFO - lr: 2.4562e-05 gnorm: 1.08 [13:24:14<11:08:03] +[titan] 2025-10-05 11:58:35,880 - root - INFO - step: 21855 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,424 tflops: 422.09 mfu: 42.68% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 11:58:35,881 - root - INFO - lr: 2.4554e-05 gnorm: 1.08 [13:24:25<11:07:52] +[titan] 2025-10-05 11:58:46,714 - root - INFO - step: 21860 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:58:46,714 - root - INFO - lr: 2.4545e-05 gnorm: 1.03 [13:24:36<11:07:41] +[titan] 2025-10-05 11:58:57,569 - root - INFO - step: 21865 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8989 +[titan] 2025-10-05 11:58:57,569 - root - INFO - lr: 2.4536e-05 gnorm: 1.07 [13:24:47<11:07:29] +[titan] 2025-10-05 11:59:08,390 - root - INFO - step: 21870 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 11:59:08,390 - root - INFO - lr: 2.4527e-05 gnorm: 1.04 [13:24:58<11:07:18] +[titan] 2025-10-05 11:59:19,246 - root - INFO - step: 21875 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8438 +[titan] 2025-10-05 11:59:19,246 - root - INFO - lr: 2.4518e-05 gnorm: 1.06 [13:25:09<11:07:07] +[titan] 2025-10-05 11:59:30,047 - root - INFO - step: 21880 loss: 2.0852 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:59:30,048 - root - INFO - lr: 2.4509e-05 gnorm: 1.08 [13:25:19<11:06:56] +[titan] 2025-10-05 11:59:40,863 - root - INFO - step: 21885 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 11:59:40,863 - root - INFO - lr: 2.4500e-05 gnorm: 1.05 [13:25:30<11:06:44] +[titan] 2025-10-05 11:59:51,744 - root - INFO - step: 21890 loss: 2.1740 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9204 +[titan] 2025-10-05 11:59:51,745 - root - INFO - lr: 2.4492e-05 gnorm: 1.10 [13:25:41<11:06:33] +[titan] 2025-10-05 12:00:02,570 - root - INFO - step: 21895 loss: 2.2128 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9549 +[titan] 2025-10-05 12:00:02,570 - root - INFO - lr: 2.4483e-05 gnorm: 1.10 [13:25:52<11:06:22] +[titan] 2025-10-05 12:00:11,224 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:00:13,387 - root - INFO - step: 21900 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 12:00:13,387 - root - INFO - lr: 2.4474e-05 gnorm: 1.04 [13:26:03<11:06:11] +[titan] 2025-10-05 12:00:24,246 - root - INFO - step: 21905 loss: 2.1321 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 12:00:24,246 - root - INFO - lr: 2.4465e-05 gnorm: 1.05 [13:26:14<11:06:00] +[titan] 2025-10-05 12:00:35,064 - root - INFO - step: 21910 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 12:00:35,064 - root - INFO - lr: 2.4456e-05 gnorm: 1.06 [13:26:24<11:05:48] +[titan] 2025-10-05 12:00:45,889 - root - INFO - step: 21915 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:00:45,889 - root - INFO - lr: 2.4447e-05 gnorm: 1.07 [13:26:35<11:05:37] +[titan] 2025-10-05 12:00:56,747 - root - INFO - step: 21920 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 12:00:56,748 - root - INFO - lr: 2.4439e-05 gnorm: 1.11 [13:26:46<11:05:26] +[titan] 2025-10-05 12:01:07,566 - root - INFO - step: 21925 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:01:07,567 - root - INFO - lr: 2.4430e-05 gnorm: 1.06 [13:26:57<11:05:15] +[titan] 2025-10-05 12:01:18,394 - root - INFO - step: 21930 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 12:01:18,394 - root - INFO - lr: 2.4421e-05 gnorm: 1.08 [13:27:08<11:05:04] +[titan] 2025-10-05 12:01:29,213 - root - INFO - step: 21935 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 12:01:29,213 - root - INFO - lr: 2.4412e-05 gnorm: 1.05 [13:27:18<11:04:52] +[titan] 2025-10-05 12:01:40,068 - root - INFO - step: 21940 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 12:01:40,068 - root - INFO - lr: 2.4403e-05 gnorm: 1.06 [13:27:29<11:04:41] +[titan] 2025-10-05 12:01:50,925 - root - INFO - step: 21945 loss: 2.1040 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8614 +[titan] 2025-10-05 12:01:50,925 - root - INFO - lr: 2.4394e-05 gnorm: 1.09 [13:27:40<11:04:30] +[titan] 2025-10-05 12:01:59,596 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:02:01,768 - root - INFO - step: 21950 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:02:01,768 - root - INFO - lr: 2.4385e-05 gnorm: 1.08 [13:27:51<11:04:19] +[titan] 2025-10-05 12:02:12,595 - root - INFO - step: 21955 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8121 +[titan] 2025-10-05 12:02:12,595 - root - INFO - lr: 2.4377e-05 gnorm: 1.04 [13:28:02<11:04:08] +[titan] 2025-10-05 12:02:23,415 - root - INFO - step: 21960 loss: 2.0883 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8460 +[titan] 2025-10-05 12:02:23,415 - root - INFO - lr: 2.4368e-05 gnorm: 1.02 [13:28:13<11:03:56] +[titan] 2025-10-05 12:02:34,233 - root - INFO - step: 21965 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:02:34,234 - root - INFO - lr: 2.4359e-05 gnorm: 1.07 [13:28:23<11:03:45] +[titan] 2025-10-05 12:02:45,129 - root - INFO - step: 21970 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8679 +[titan] 2025-10-05 12:02:45,129 - root - INFO - lr: 2.4350e-05 gnorm: 1.06 [13:28:34<11:03:34] +[titan] 2025-10-05 12:02:56,069 - root - INFO - step: 21975 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8574 +[titan] 2025-10-05 12:02:56,069 - root - INFO - lr: 2.4341e-05 gnorm: 1.04 [13:28:45<11:03:23] +[titan] 2025-10-05 12:03:06,899 - root - INFO - step: 21980 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9135 +[titan] 2025-10-05 12:03:06,899 - root - INFO - lr: 2.4332e-05 gnorm: 1.08 [13:28:56<11:03:12] +[titan] 2025-10-05 12:03:17,738 - root - INFO - step: 21985 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:03:17,738 - root - INFO - lr: 2.4324e-05 gnorm: 1.09 [13:29:07<11:03:00] +[titan] 2025-10-05 12:03:28,567 - root - INFO - step: 21990 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 12:03:28,567 - root - INFO - lr: 2.4315e-05 gnorm: 1.02 [13:29:18<11:02:49] +[titan] 2025-10-05 12:03:39,369 - root - INFO - step: 21995 loss: 2.1137 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 12:03:39,369 - root - INFO - lr: 2.4306e-05 gnorm: 1.06 [13:29:29<11:02:38] +[titan] 2025-10-05 12:03:48,026 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:03:50,242 - root - INFO - step: 22000 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:03:50,242 - root - INFO - lr: 2.4297e-05 gnorm: 1.08 [13:29:39<11:02:27] +[titan] 2025-10-05 12:04:01,127 - root - INFO - step: 22005 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:04:01,127 - root - INFO - lr: 2.4288e-05 gnorm: 1.01 [13:29:50<11:02:16] +[titan] 2025-10-05 12:04:11,950 - root - INFO - step: 22010 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8594 +[titan] 2025-10-05 12:04:11,950 - root - INFO - lr: 2.4279e-05 gnorm: 1.06 [13:30:01<11:02:04] +[titan] 2025-10-05 12:04:22,889 - root - INFO - step: 22015 loss: 2.0810 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:04:22,889 - root - INFO - lr: 2.4271e-05 gnorm: 1.07 [13:30:12<11:01:53] +[titan] 2025-10-05 12:04:25,239 - root - INFO - Dumping profiler traces at step 22016 +[titan] 2025-10-05 12:04:25,276 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:04:33,972 - root - INFO - step: 22020 loss: 2.1387 memory: 118.84GiB(85.28%) tps: 29,567 tflops: 410.19 mfu: 41.48% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8891 +[titan] 2025-10-05 12:04:33,972 - root - INFO - lr: 2.4262e-05 gnorm: 1.03 [13:30:23<11:01:42] +[titan] 2025-10-05 12:04:44,810 - root - INFO - step: 22025 loss: 2.1465 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 12:04:44,810 - root - INFO - lr: 2.4253e-05 gnorm: 1.08 [13:30:34<11:01:31] +[titan] 2025-10-05 12:04:55,694 - root - INFO - step: 22030 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 12:04:55,694 - root - INFO - lr: 2.4244e-05 gnorm: 1.05 [13:30:45<11:01:20] +[titan] 2025-10-05 12:05:06,571 - root - INFO - step: 22035 loss: 2.0627 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8224 +[titan] 2025-10-05 12:05:06,571 - root - INFO - lr: 2.4235e-05 gnorm: 1.04 [13:30:56<11:01:09] +[titan] 2025-10-05 12:05:17,439 - root - INFO - step: 22040 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 12:05:17,439 - root - INFO - lr: 2.4226e-05 gnorm: 1.05 [13:31:07<11:00:58] +[titan] 2025-10-05 12:05:28,290 - root - INFO - step: 22045 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 12:05:28,290 - root - INFO - lr: 2.4218e-05 gnorm: 1.06 [13:31:18<11:00:46] +[titan] 2025-10-05 12:05:36,955 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:05:39,138 - root - INFO - step: 22050 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:05:39,138 - root - INFO - lr: 2.4209e-05 gnorm: 1.06 [13:31:28<11:00:35] +[titan] 2025-10-05 12:05:49,987 - root - INFO - step: 22055 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 12:05:49,987 - root - INFO - lr: 2.4200e-05 gnorm: 1.05 [13:31:39<11:00:24] +[titan] 2025-10-05 12:06:00,891 - root - INFO - step: 22060 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 12:06:00,891 - root - INFO - lr: 2.4191e-05 gnorm: 1.10 [13:31:50<11:00:13] +[titan] 2025-10-05 12:06:11,774 - root - INFO - step: 22065 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:06:11,775 - root - INFO - lr: 2.4182e-05 gnorm: 1.05 [13:32:01<11:00:02] +[titan] 2025-10-05 12:06:22,629 - root - INFO - step: 22070 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:06:22,629 - root - INFO - lr: 2.4173e-05 gnorm: 1.06 [13:32:12<10:59:50] +[titan] 2025-10-05 12:06:33,471 - root - INFO - step: 22075 loss: 2.0401 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8033 +[titan] 2025-10-05 12:06:33,471 - root - INFO - lr: 2.4165e-05 gnorm: 1.07 [13:32:23<10:59:39] +[titan] 2025-10-05 12:06:44,307 - root - INFO - step: 22080 loss: 2.1317 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8847 +[titan] 2025-10-05 12:06:44,307 - root - INFO - lr: 2.4156e-05 gnorm: 1.08 [13:32:34<10:59:28] +[titan] 2025-10-05 12:06:55,150 - root - INFO - step: 22085 loss: 2.0997 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8559 +[titan] 2025-10-05 12:06:55,151 - root - INFO - lr: 2.4147e-05 gnorm: 1.06 [13:32:44<10:59:17] +[titan] 2025-10-05 12:07:06,029 - root - INFO - step: 22090 loss: 2.1094 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 12:07:06,030 - root - INFO - lr: 2.4138e-05 gnorm: 1.09 [13:32:55<10:59:06] +[titan] 2025-10-05 12:07:16,895 - root - INFO - step: 22095 loss: 2.1217 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:07:16,895 - root - INFO - lr: 2.4129e-05 gnorm: 1.14 [13:33:06<10:58:54] +[titan] 2025-10-05 12:07:25,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:07:27,824 - root - INFO - step: 22100 loss: 2.1006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 12:07:27,824 - root - INFO - lr: 2.4121e-05 gnorm: 1.06 [13:33:17<10:58:43] +[titan] 2025-10-05 12:07:38,689 - root - INFO - step: 22105 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 12:07:38,689 - root - INFO - lr: 2.4112e-05 gnorm: 1.07 [13:33:28<10:58:32] +[titan] 2025-10-05 12:07:49,564 - root - INFO - step: 22110 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 12:07:49,564 - root - INFO - lr: 2.4103e-05 gnorm: 1.12 [13:33:39<10:58:21] +[titan] 2025-10-05 12:08:00,491 - root - INFO - step: 22115 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8523 +[titan] 2025-10-05 12:08:00,491 - root - INFO - lr: 2.4094e-05 gnorm: 1.06 [13:33:50<10:58:10] +[titan] 2025-10-05 12:08:11,388 - root - INFO - step: 22120 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 12:08:11,388 - root - INFO - lr: 2.4085e-05 gnorm: 1.07 [13:34:01<10:57:59] +[titan] 2025-10-05 12:08:22,246 - root - INFO - step: 22125 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 12:08:22,246 - root - INFO - lr: 2.4076e-05 gnorm: 1.08 [13:34:11<10:57:47] +[titan] 2025-10-05 12:08:33,148 - root - INFO - step: 22130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:08:33,148 - root - INFO - lr: 2.4068e-05 gnorm: 1.08 [13:34:22<10:57:36] +[titan] 2025-10-05 12:08:44,031 - root - INFO - step: 22135 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:08:44,031 - root - INFO - lr: 2.4059e-05 gnorm: 1.02 [13:34:33<10:57:25] +[titan] 2025-10-05 12:08:54,887 - root - INFO - step: 22140 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:08:54,888 - root - INFO - lr: 2.4050e-05 gnorm: 1.06 [13:34:44<10:57:14] +[titan] 2025-10-05 12:09:05,796 - root - INFO - step: 22145 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 12:09:05,796 - root - INFO - lr: 2.4041e-05 gnorm: 1.02 [13:34:55<10:57:03] +[titan] 2025-10-05 12:09:14,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:09:16,655 - root - INFO - step: 22150 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 12:09:16,655 - root - INFO - lr: 2.4032e-05 gnorm: 1.06 [13:35:06<10:56:52] +[titan] 2025-10-05 12:09:27,522 - root - INFO - step: 22155 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8661 +[titan] 2025-10-05 12:09:27,522 - root - INFO - lr: 2.4024e-05 gnorm: 1.02 [13:35:17<10:56:40] +[titan] 2025-10-05 12:09:38,432 - root - INFO - step: 22160 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 12:09:38,432 - root - INFO - lr: 2.4015e-05 gnorm: 1.04 [13:35:28<10:56:29] +[titan] 2025-10-05 12:09:49,301 - root - INFO - step: 22165 loss: 2.1166 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:09:49,302 - root - INFO - lr: 2.4006e-05 gnorm: 1.12 [13:35:39<10:56:18] +[titan] 2025-10-05 12:10:00,224 - root - INFO - step: 22170 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:10:00,224 - root - INFO - lr: 2.3997e-05 gnorm: 1.09 [13:35:49<10:56:07] +[titan] 2025-10-05 12:10:11,087 - root - INFO - step: 22175 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:10:11,087 - root - INFO - lr: 2.3988e-05 gnorm: 1.05 [13:36:00<10:55:56] +[titan] 2025-10-05 12:10:21,968 - root - INFO - step: 22180 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9855 +[titan] 2025-10-05 12:10:21,968 - root - INFO - lr: 2.3979e-05 gnorm: 1.15 [13:36:11<10:55:45] +[titan] 2025-10-05 12:10:32,857 - root - INFO - step: 22185 loss: 2.1657 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9136 +[titan] 2025-10-05 12:10:32,857 - root - INFO - lr: 2.3971e-05 gnorm: 1.08 [13:36:22<10:55:34] +[titan] 2025-10-05 12:10:43,721 - root - INFO - step: 22190 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:10:43,721 - root - INFO - lr: 2.3962e-05 gnorm: 1.07 [13:36:33<10:55:22] +[titan] 2025-10-05 12:10:54,626 - root - INFO - step: 22195 loss: 2.1296 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 12:10:54,626 - root - INFO - lr: 2.3953e-05 gnorm: 1.11 [13:36:44<10:55:11] +[titan] 2025-10-05 12:11:03,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:11:05,537 - root - INFO - step: 22200 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 12:11:05,537 - root - INFO - lr: 2.3944e-05 gnorm: 1.09 [13:36:55<10:55:00] +[titan] 2025-10-05 12:11:16,410 - root - INFO - step: 22205 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 12:11:16,410 - root - INFO - lr: 2.3935e-05 gnorm: 1.10 [13:37:06<10:54:49] +[titan] 2025-10-05 12:11:27,277 - root - INFO - step: 22210 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 12:11:27,277 - root - INFO - lr: 2.3927e-05 gnorm: 1.10 [13:37:17<10:54:38] +[titan] 2025-10-05 12:11:38,149 - root - INFO - step: 22215 loss: 2.0858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8426 +[titan] 2025-10-05 12:11:38,149 - root - INFO - lr: 2.3918e-05 gnorm: 1.08 [13:37:27<10:54:27] +[titan] 2025-10-05 12:11:49,017 - root - INFO - step: 22220 loss: 2.1032 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 12:11:49,017 - root - INFO - lr: 2.3909e-05 gnorm: 1.08 [13:37:38<10:54:15] +[titan] 2025-10-05 12:11:59,905 - root - INFO - step: 22225 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8675 +[titan] 2025-10-05 12:11:59,905 - root - INFO - lr: 2.3900e-05 gnorm: 1.08 [13:37:49<10:54:04] +[titan] 2025-10-05 12:12:10,823 - root - INFO - step: 22230 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:12:10,823 - root - INFO - lr: 2.3891e-05 gnorm: 1.09 [13:38:00<10:53:53] +[titan] 2025-10-05 12:12:21,651 - root - INFO - step: 22235 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 12:12:21,651 - root - INFO - lr: 2.3883e-05 gnorm: 1.08 [13:38:11<10:53:42] +[titan] 2025-10-05 12:12:32,529 - root - INFO - step: 22240 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 12:12:32,529 - root - INFO - lr: 2.3874e-05 gnorm: 1.11 [13:38:22<10:53:31] +[titan] 2025-10-05 12:12:43,387 - root - INFO - step: 22245 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 12:12:43,388 - root - INFO - lr: 2.3865e-05 gnorm: 1.06 [13:38:33<10:53:19] +[titan] 2025-10-05 12:12:52,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:12:54,236 - root - INFO - step: 22250 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 12:12:54,236 - root - INFO - lr: 2.3856e-05 gnorm: 1.09 [13:38:43<10:53:08] +[titan] 2025-10-05 12:13:05,203 - root - INFO - step: 22255 loss: 2.2062 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 12:13:05,204 - root - INFO - lr: 2.3847e-05 gnorm: 1.10 [13:38:54<10:52:57] +[titan] 2025-10-05 12:13:16,105 - root - INFO - step: 22260 loss: 2.0839 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8418 +[titan] 2025-10-05 12:13:16,105 - root - INFO - lr: 2.3838e-05 gnorm: 1.06 [13:39:05<10:52:46] +[titan] 2025-10-05 12:13:26,969 - root - INFO - step: 22265 loss: 2.1143 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 12:13:26,969 - root - INFO - lr: 2.3830e-05 gnorm: 1.09 [13:39:16<10:52:35] +[titan] 2025-10-05 12:13:37,833 - root - INFO - step: 22270 loss: 2.1822 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 12:13:37,833 - root - INFO - lr: 2.3821e-05 gnorm: 1.10 [13:39:27<10:52:24] +[titan] 2025-10-05 12:13:48,696 - root - INFO - step: 22275 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 12:13:48,696 - root - INFO - lr: 2.3812e-05 gnorm: 1.05 [13:39:38<10:52:12] +[titan] 2025-10-05 12:13:59,557 - root - INFO - step: 22280 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 12:13:59,557 - root - INFO - lr: 2.3803e-05 gnorm: 1.06 [13:39:49<10:52:01] +[titan] 2025-10-05 12:14:10,442 - root - INFO - step: 22285 loss: 2.1340 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:14:10,442 - root - INFO - lr: 2.3794e-05 gnorm: 1.08 [13:40:00<10:51:50] +[titan] 2025-10-05 12:14:21,358 - root - INFO - step: 22290 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:14:21,358 - root - INFO - lr: 2.3786e-05 gnorm: 1.08 [13:40:11<10:51:39] +[titan] 2025-10-05 12:14:32,225 - root - INFO - step: 22295 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 12:14:32,225 - root - INFO - lr: 2.3777e-05 gnorm: 1.07 [13:40:21<10:51:28] +[titan] 2025-10-05 12:14:40,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:14:43,076 - root - INFO - step: 22300 loss: 2.0949 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 12:14:43,076 - root - INFO - lr: 2.3768e-05 gnorm: 1.08 [13:40:32<10:51:17] +[titan] 2025-10-05 12:14:53,944 - root - INFO - step: 22305 loss: 2.2081 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 12:14:53,944 - root - INFO - lr: 2.3759e-05 gnorm: 1.05 [13:40:43<10:51:05] +[titan] 2025-10-05 12:15:04,844 - root - INFO - step: 22310 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 12:15:04,844 - root - INFO - lr: 2.3750e-05 gnorm: 1.11 [13:40:54<10:50:54] +[titan] 2025-10-05 12:15:15,692 - root - INFO - step: 22315 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:15:15,693 - root - INFO - lr: 2.3742e-05 gnorm: 1.06 [13:41:05<10:50:43] +[titan] 2025-10-05 12:15:26,567 - root - INFO - step: 22320 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 12:15:26,567 - root - INFO - lr: 2.3733e-05 gnorm: 1.04 [13:41:16<10:50:32] +[titan] 2025-10-05 12:15:37,421 - root - INFO - step: 22325 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 12:15:37,422 - root - INFO - lr: 2.3724e-05 gnorm: 1.08 [13:41:27<10:50:21] +[titan] 2025-10-05 12:15:48,281 - root - INFO - step: 22330 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 12:15:48,281 - root - INFO - lr: 2.3715e-05 gnorm: 1.08 [13:41:37<10:50:10] +[titan] 2025-10-05 12:15:59,149 - root - INFO - step: 22335 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:15:59,149 - root - INFO - lr: 2.3706e-05 gnorm: 1.04 [13:41:48<10:49:58] +[titan] 2025-10-05 12:16:10,046 - root - INFO - step: 22340 loss: 2.0616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 12:16:10,046 - root - INFO - lr: 2.3698e-05 gnorm: 1.10 [13:41:59<10:49:47] +[titan] 2025-10-05 12:16:20,913 - root - INFO - step: 22345 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 12:16:20,913 - root - INFO - lr: 2.3689e-05 gnorm: 1.09 [13:42:10<10:49:36] +[titan] 2025-10-05 12:16:29,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:16:31,773 - root - INFO - step: 22350 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:16:31,773 - root - INFO - lr: 2.3680e-05 gnorm: 1.09 [13:42:21<10:49:25] +[titan] 2025-10-05 12:16:42,676 - root - INFO - step: 22355 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8541 +[titan] 2025-10-05 12:16:42,676 - root - INFO - lr: 2.3671e-05 gnorm: 1.06 [13:42:32<10:49:14] +[titan] 2025-10-05 12:16:53,529 - root - INFO - step: 22360 loss: 2.1363 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:16:53,529 - root - INFO - lr: 2.3662e-05 gnorm: 1.08 [13:42:43<10:49:03] +[titan] 2025-10-05 12:17:04,373 - root - INFO - step: 22365 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:17:04,373 - root - INFO - lr: 2.3654e-05 gnorm: 1.08 [13:42:54<10:48:51] +[titan] 2025-10-05 12:17:15,272 - root - INFO - step: 22370 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:17:15,272 - root - INFO - lr: 2.3645e-05 gnorm: 1.06 [13:43:04<10:48:40] +[titan] 2025-10-05 12:17:26,145 - root - INFO - step: 22375 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 12:17:26,145 - root - INFO - lr: 2.3636e-05 gnorm: 1.05 [13:43:15<10:48:29] +[titan] 2025-10-05 12:17:36,995 - root - INFO - step: 22380 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 12:17:36,995 - root - INFO - lr: 2.3627e-05 gnorm: 1.07 [13:43:26<10:48:18] +[titan] 2025-10-05 12:17:47,877 - root - INFO - step: 22385 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:17:47,877 - root - INFO - lr: 2.3619e-05 gnorm: 1.08 [13:43:37<10:48:07] +[titan] 2025-10-05 12:17:58,732 - root - INFO - step: 22390 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8286 +[titan] 2025-10-05 12:17:58,732 - root - INFO - lr: 2.3610e-05 gnorm: 1.02 [13:43:48<10:47:56] +[titan] 2025-10-05 12:18:09,619 - root - INFO - step: 22395 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8656 +[titan] 2025-10-05 12:18:09,619 - root - INFO - lr: 2.3601e-05 gnorm: 1.08 [13:43:59<10:47:44] +[titan] 2025-10-05 12:18:18,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:18:20,461 - root - INFO - step: 22400 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8304 +[titan] 2025-10-05 12:18:20,461 - root - INFO - lr: 2.3592e-05 gnorm: 1.03 [13:44:10<10:47:33] +[titan] 2025-10-05 12:18:31,320 - root - INFO - step: 22405 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:18:31,320 - root - INFO - lr: 2.3583e-05 gnorm: 1.06 [13:44:21<10:47:22] +[titan] 2025-10-05 12:18:42,176 - root - INFO - step: 22410 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 12:18:42,176 - root - INFO - lr: 2.3575e-05 gnorm: 1.06 [13:44:31<10:47:11] +[titan] 2025-10-05 12:18:53,029 - root - INFO - step: 22415 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 12:18:53,029 - root - INFO - lr: 2.3566e-05 gnorm: 1.07 [13:44:42<10:47:00] +[titan] 2025-10-05 12:19:03,920 - root - INFO - step: 22420 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 12:19:03,920 - root - INFO - lr: 2.3557e-05 gnorm: 1.04 [13:44:53<10:46:49] +[titan] 2025-10-05 12:19:14,809 - root - INFO - step: 22425 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 12:19:14,809 - root - INFO - lr: 2.3548e-05 gnorm: 1.08 [13:45:04<10:46:37] +[titan] 2025-10-05 12:19:25,666 - root - INFO - step: 22430 loss: 2.1054 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 12:19:25,666 - root - INFO - lr: 2.3539e-05 gnorm: 1.11 [13:45:15<10:46:26] +[titan] 2025-10-05 12:19:36,537 - root - INFO - step: 22435 loss: 2.0990 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:19:36,537 - root - INFO - lr: 2.3531e-05 gnorm: 1.07 [13:45:26<10:46:15] +[titan] 2025-10-05 12:19:47,408 - root - INFO - step: 22440 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:19:47,408 - root - INFO - lr: 2.3522e-05 gnorm: 1.07 [13:45:37<10:46:04] +[titan] 2025-10-05 12:19:58,267 - root - INFO - step: 22445 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:19:58,267 - root - INFO - lr: 2.3513e-05 gnorm: 1.04 [13:45:47<10:45:53] +[titan] 2025-10-05 12:20:06,985 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:20:09,202 - root - INFO - step: 22450 loss: 2.1175 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:20:09,202 - root - INFO - lr: 2.3504e-05 gnorm: 1.08 [13:45:58<10:45:42] +[titan] 2025-10-05 12:20:20,059 - root - INFO - step: 22455 loss: 2.1341 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8869 +[titan] 2025-10-05 12:20:20,059 - root - INFO - lr: 2.3495e-05 gnorm: 1.04 [13:46:09<10:45:30] +[titan] 2025-10-05 12:20:30,913 - root - INFO - step: 22460 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:20:30,913 - root - INFO - lr: 2.3487e-05 gnorm: 1.14 [13:46:20<10:45:19] +[titan] 2025-10-05 12:20:41,788 - root - INFO - step: 22465 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8576 +[titan] 2025-10-05 12:20:41,788 - root - INFO - lr: 2.3478e-05 gnorm: 1.02 [13:46:31<10:45:08] +[titan] 2025-10-05 12:20:52,649 - root - INFO - step: 22470 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 12:20:52,649 - root - INFO - lr: 2.3469e-05 gnorm: 1.04 [13:46:42<10:44:57] +[titan] 2025-10-05 12:21:03,515 - root - INFO - step: 22475 loss: 2.0698 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8297 +[titan] 2025-10-05 12:21:03,515 - root - INFO - lr: 2.3460e-05 gnorm: 1.09 [13:46:53<10:44:46] +[titan] 2025-10-05 12:21:14,426 - root - INFO - step: 22480 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8596 +[titan] 2025-10-05 12:21:14,426 - root - INFO - lr: 2.3452e-05 gnorm: 1.07 [13:47:04<10:44:35] +[titan] 2025-10-05 12:21:25,291 - root - INFO - step: 22485 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8221 +[titan] 2025-10-05 12:21:25,291 - root - INFO - lr: 2.3443e-05 gnorm: 1.07 [13:47:14<10:44:23] +[titan] 2025-10-05 12:21:36,157 - root - INFO - step: 22490 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:21:36,157 - root - INFO - lr: 2.3434e-05 gnorm: 1.07 [13:47:25<10:44:12] +[titan] 2025-10-05 12:21:47,031 - root - INFO - step: 22495 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8952 +[titan] 2025-10-05 12:21:47,032 - root - INFO - lr: 2.3425e-05 gnorm: 1.05 [13:47:36<10:44:01] +[titan] 2025-10-05 12:21:55,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:21:57,921 - root - INFO - step: 22500 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 12:21:57,921 - root - INFO - lr: 2.3416e-05 gnorm: 1.10 [13:47:47<10:43:50] +[titan] 2025-10-05 12:22:08,788 - root - INFO - step: 22505 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 12:22:08,788 - root - INFO - lr: 2.3408e-05 gnorm: 1.07 [13:47:58<10:43:39] +[titan] 2025-10-05 12:22:19,750 - root - INFO - step: 22510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 29,895 tflops: 414.75 mfu: 41.94% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:22:19,750 - root - INFO - lr: 2.3399e-05 gnorm: 1.09 [13:48:09<10:43:28] +[titan] 2025-10-05 12:22:30,622 - root - INFO - step: 22515 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:22:30,622 - root - INFO - lr: 2.3390e-05 gnorm: 1.11 [13:48:20<10:43:16] +[titan] 2025-10-05 12:22:41,466 - root - INFO - step: 22520 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 12:22:41,466 - root - INFO - lr: 2.3381e-05 gnorm: 1.06 [13:48:31<10:43:05] +[titan] 2025-10-05 12:22:52,408 - root - INFO - step: 22525 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:22:52,408 - root - INFO - lr: 2.3373e-05 gnorm: 1.06 [13:48:42<10:42:54] +[titan] 2025-10-05 12:22:59,134 - root - INFO - Dumping profiler traces at step 22528 +[titan] 2025-10-05 12:22:59,170 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:23:03,544 - root - INFO - step: 22530 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,427 tflops: 408.25 mfu: 41.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 12:23:03,544 - root - INFO - lr: 2.3364e-05 gnorm: 1.08 [13:48:53<10:42:43] +[titan] 2025-10-05 12:23:14,458 - root - INFO - step: 22535 loss: 2.1311 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8827 +[titan] 2025-10-05 12:23:14,458 - root - INFO - lr: 2.3355e-05 gnorm: 1.33 [13:49:04<10:42:32] +[titan] 2025-10-05 12:23:25,322 - root - INFO - step: 22540 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 12:23:25,322 - root - INFO - lr: 2.3346e-05 gnorm: 1.10 [13:49:15<10:42:21] +[titan] 2025-10-05 12:23:36,189 - root - INFO - step: 22545 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 12:23:36,190 - root - INFO - lr: 2.3338e-05 gnorm: 1.04 [13:49:25<10:42:10] +[titan] 2025-10-05 12:23:44,857 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:23:47,032 - root - INFO - step: 22550 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 12:23:47,032 - root - INFO - lr: 2.3329e-05 gnorm: 1.08 [13:49:36<10:41:59] +[titan] 2025-10-05 12:23:57,904 - root - INFO - step: 22555 loss: 2.0817 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:23:57,904 - root - INFO - lr: 2.3320e-05 gnorm: 1.06 [13:49:47<10:41:47] +[titan] 2025-10-05 12:24:08,764 - root - INFO - step: 22560 loss: 2.0564 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 12:24:08,764 - root - INFO - lr: 2.3311e-05 gnorm: 1.08 [13:49:58<10:41:36] +[titan] 2025-10-05 12:24:19,652 - root - INFO - step: 22565 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8615 +[titan] 2025-10-05 12:24:19,652 - root - INFO - lr: 2.3302e-05 gnorm: 1.10 [13:50:09<10:41:25] +[titan] 2025-10-05 12:24:30,523 - root - INFO - step: 22570 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 12:24:30,523 - root - INFO - lr: 2.3294e-05 gnorm: 1.05 [13:50:20<10:41:14] +[titan] 2025-10-05 12:24:41,397 - root - INFO - step: 22575 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 12:24:41,397 - root - INFO - lr: 2.3285e-05 gnorm: 1.06 [13:50:31<10:41:03] +[titan] 2025-10-05 12:24:52,282 - root - INFO - step: 22580 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8584 +[titan] 2025-10-05 12:24:52,283 - root - INFO - lr: 2.3276e-05 gnorm: 1.02 [13:50:41<10:40:52] +[titan] 2025-10-05 12:25:03,150 - root - INFO - step: 22585 loss: 2.0722 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:25:03,150 - root - INFO - lr: 2.3267e-05 gnorm: 1.07 [13:50:52<10:40:40] +[titan] 2025-10-05 12:25:14,069 - root - INFO - step: 22590 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 12:25:14,069 - root - INFO - lr: 2.3259e-05 gnorm: 1.07 [13:51:03<10:40:29] +[titan] 2025-10-05 12:25:24,944 - root - INFO - step: 22595 loss: 2.0307 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 12:25:24,945 - root - INFO - lr: 2.3250e-05 gnorm: 1.06 [13:51:14<10:40:18] +[titan] 2025-10-05 12:25:33,616 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:25:35,800 - root - INFO - step: 22600 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 12:25:35,800 - root - INFO - lr: 2.3241e-05 gnorm: 1.09 [13:51:25<10:40:07] +[titan] 2025-10-05 12:25:46,666 - root - INFO - step: 22605 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 12:25:46,666 - root - INFO - lr: 2.3232e-05 gnorm: 1.08 [13:51:36<10:39:56] +[titan] 2025-10-05 12:25:57,545 - root - INFO - step: 22610 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8253 +[titan] 2025-10-05 12:25:57,545 - root - INFO - lr: 2.3224e-05 gnorm: 1.04 [13:51:47<10:39:45] +[titan] 2025-10-05 12:26:08,410 - root - INFO - step: 22615 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 12:26:08,411 - root - INFO - lr: 2.3215e-05 gnorm: 1.05 [13:51:58<10:39:33] +[titan] 2025-10-05 12:26:19,368 - root - INFO - step: 22620 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 12:26:19,368 - root - INFO - lr: 2.3206e-05 gnorm: 1.13 [13:52:09<10:39:22] +[titan] 2025-10-05 12:26:30,266 - root - INFO - step: 22625 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8493 +[titan] 2025-10-05 12:26:30,266 - root - INFO - lr: 2.3197e-05 gnorm: 1.07 [13:52:19<10:39:11] +[titan] 2025-10-05 12:26:41,175 - root - INFO - step: 22630 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 12:26:41,175 - root - INFO - lr: 2.3189e-05 gnorm: 1.06 [13:52:30<10:39:00] +[titan] 2025-10-05 12:26:52,070 - root - INFO - step: 22635 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8261 +[titan] 2025-10-05 12:26:52,070 - root - INFO - lr: 2.3180e-05 gnorm: 1.03 [13:52:41<10:38:49] +[titan] 2025-10-05 12:27:02,956 - root - INFO - step: 22640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 12:27:02,956 - root - INFO - lr: 2.3171e-05 gnorm: 1.05 [13:52:52<10:38:38] +[titan] 2025-10-05 12:27:13,822 - root - INFO - step: 22645 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 12:27:13,822 - root - INFO - lr: 2.3162e-05 gnorm: 1.02 [13:53:03<10:38:27] +[titan] 2025-10-05 12:27:22,552 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:27:24,736 - root - INFO - step: 22650 loss: 2.0501 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 12:27:24,737 - root - INFO - lr: 2.3153e-05 gnorm: 1.10 [13:53:14<10:38:15] +[titan] 2025-10-05 12:27:35,626 - root - INFO - step: 22655 loss: 2.0835 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 12:27:35,626 - root - INFO - lr: 2.3145e-05 gnorm: 1.05 [13:53:25<10:38:04] +[titan] 2025-10-05 12:27:46,518 - root - INFO - step: 22660 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:27:46,518 - root - INFO - lr: 2.3136e-05 gnorm: 1.11 [13:53:36<10:37:53] +[titan] 2025-10-05 12:27:57,386 - root - INFO - step: 22665 loss: 2.1687 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9171 +[titan] 2025-10-05 12:27:57,386 - root - INFO - lr: 2.3127e-05 gnorm: 1.10 [13:53:47<10:37:42] +[titan] 2025-10-05 12:28:08,227 - root - INFO - step: 22670 loss: 2.0850 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8425 +[titan] 2025-10-05 12:28:08,227 - root - INFO - lr: 2.3118e-05 gnorm: 1.05 [13:53:57<10:37:31] +[titan] 2025-10-05 12:28:19,140 - root - INFO - step: 22675 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:28:19,140 - root - INFO - lr: 2.3110e-05 gnorm: 1.08 [13:54:08<10:37:20] +[titan] 2025-10-05 12:28:30,016 - root - INFO - step: 22680 loss: 2.1382 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 12:28:30,016 - root - INFO - lr: 2.3101e-05 gnorm: 1.16 [13:54:19<10:37:09] +[titan] 2025-10-05 12:28:40,902 - root - INFO - step: 22685 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8186 +[titan] 2025-10-05 12:28:40,902 - root - INFO - lr: 2.3092e-05 gnorm: 1.08 [13:54:30<10:36:57] +[titan] 2025-10-05 12:28:51,765 - root - INFO - step: 22690 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 12:28:51,766 - root - INFO - lr: 2.3083e-05 gnorm: 1.03 [13:54:41<10:36:46] +[titan] 2025-10-05 12:29:02,626 - root - INFO - step: 22695 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:29:02,626 - root - INFO - lr: 2.3075e-05 gnorm: 1.06 [13:54:52<10:36:35] +[titan] 2025-10-05 12:29:11,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:29:13,457 - root - INFO - step: 22700 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 12:29:13,457 - root - INFO - lr: 2.3066e-05 gnorm: 1.07 [13:55:03<10:36:24] +[titan] 2025-10-05 12:29:24,373 - root - INFO - step: 22705 loss: 2.0814 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:29:24,373 - root - INFO - lr: 2.3057e-05 gnorm: 1.08 [13:55:14<10:36:13] +[titan] 2025-10-05 12:29:35,226 - root - INFO - step: 22710 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 12:29:35,226 - root - INFO - lr: 2.3048e-05 gnorm: 1.06 [13:55:24<10:36:02] +[titan] 2025-10-05 12:29:46,086 - root - INFO - step: 22715 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8506 +[titan] 2025-10-05 12:29:46,087 - root - INFO - lr: 2.3040e-05 gnorm: 1.12 [13:55:35<10:35:50] +[titan] 2025-10-05 12:29:56,956 - root - INFO - step: 22720 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 12:29:56,956 - root - INFO - lr: 2.3031e-05 gnorm: 1.05 [13:55:46<10:35:39] +[titan] 2025-10-05 12:30:07,794 - root - INFO - step: 22725 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8635 +[titan] 2025-10-05 12:30:07,794 - root - INFO - lr: 2.3022e-05 gnorm: 1.08 [13:55:57<10:35:28] +[titan] 2025-10-05 12:30:18,701 - root - INFO - step: 22730 loss: 2.0684 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:30:18,701 - root - INFO - lr: 2.3013e-05 gnorm: 1.06 [13:56:08<10:35:17] +[titan] 2025-10-05 12:30:29,526 - root - INFO - step: 22735 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9056 +[titan] 2025-10-05 12:30:29,526 - root - INFO - lr: 2.3005e-05 gnorm: 1.08 [13:56:19<10:35:06] +[titan] 2025-10-05 12:30:40,389 - root - INFO - step: 22740 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 12:30:40,389 - root - INFO - lr: 2.2996e-05 gnorm: 1.08 [13:56:30<10:34:55] +[titan] 2025-10-05 12:30:51,240 - root - INFO - step: 22745 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 12:30:51,240 - root - INFO - lr: 2.2987e-05 gnorm: 1.07 [13:56:40<10:34:43] +[titan] 2025-10-05 12:30:59,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:31:02,123 - root - INFO - step: 22750 loss: 2.1101 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:31:02,123 - root - INFO - lr: 2.2978e-05 gnorm: 1.09 [13:56:51<10:34:32] +[titan] 2025-10-05 12:31:12,994 - root - INFO - step: 22755 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8489 +[titan] 2025-10-05 12:31:12,994 - root - INFO - lr: 2.2970e-05 gnorm: 1.07 [13:57:02<10:34:21] +[titan] 2025-10-05 12:31:23,866 - root - INFO - step: 22760 loss: 2.0378 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 12:31:23,866 - root - INFO - lr: 2.2961e-05 gnorm: 1.07 [13:57:13<10:34:10] +[titan] 2025-10-05 12:31:34,726 - root - INFO - step: 22765 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8417 +[titan] 2025-10-05 12:31:34,726 - root - INFO - lr: 2.2952e-05 gnorm: 1.06 [13:57:24<10:33:59] +[titan] 2025-10-05 12:31:45,584 - root - INFO - step: 22770 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 12:31:45,585 - root - INFO - lr: 2.2944e-05 gnorm: 1.08 [13:57:35<10:33:48] +[titan] 2025-10-05 12:31:56,424 - root - INFO - step: 22775 loss: 2.0368 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 12:31:56,424 - root - INFO - lr: 2.2935e-05 gnorm: 1.06 [13:57:46<10:33:36] +[titan] 2025-10-05 12:32:07,271 - root - INFO - step: 22780 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8625 +[titan] 2025-10-05 12:32:07,271 - root - INFO - lr: 2.2926e-05 gnorm: 1.09 [13:57:56<10:33:25] +[titan] 2025-10-05 12:32:18,125 - root - INFO - step: 22785 loss: 2.0749 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:32:18,126 - root - INFO - lr: 2.2917e-05 gnorm: 1.06 [13:58:07<10:33:14] +[titan] 2025-10-05 12:32:29,041 - root - INFO - step: 22790 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 12:32:29,041 - root - INFO - lr: 2.2909e-05 gnorm: 1.01 [13:58:18<10:33:03] +[titan] 2025-10-05 12:32:39,901 - root - INFO - step: 22795 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8495 +[titan] 2025-10-05 12:32:39,901 - root - INFO - lr: 2.2900e-05 gnorm: 1.05 [13:58:29<10:32:52] +[titan] 2025-10-05 12:32:48,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:32:50,763 - root - INFO - step: 22800 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:32:50,764 - root - INFO - lr: 2.2891e-05 gnorm: 1.04 [13:58:40<10:32:41] +[titan] 2025-10-05 12:33:01,622 - root - INFO - step: 22805 loss: 2.0900 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8482 +[titan] 2025-10-05 12:33:01,622 - root - INFO - lr: 2.2882e-05 gnorm: 1.02 [13:58:51<10:32:29] +[titan] 2025-10-05 12:33:12,469 - root - INFO - step: 22810 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 12:33:12,469 - root - INFO - lr: 2.2874e-05 gnorm: 1.05 [13:59:02<10:32:18] +[titan] 2025-10-05 12:33:23,367 - root - INFO - step: 22815 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8893 +[titan] 2025-10-05 12:33:23,367 - root - INFO - lr: 2.2865e-05 gnorm: 1.08 [13:59:13<10:32:07] +[titan] 2025-10-05 12:33:34,205 - root - INFO - step: 22820 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:33:34,206 - root - INFO - lr: 2.2856e-05 gnorm: 1.08 [13:59:23<10:31:56] +[titan] 2025-10-05 12:33:45,062 - root - INFO - step: 22825 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:33:45,062 - root - INFO - lr: 2.2847e-05 gnorm: 1.06 [13:59:34<10:31:45] +[titan] 2025-10-05 12:33:55,902 - root - INFO - step: 22830 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 12:33:55,902 - root - INFO - lr: 2.2839e-05 gnorm: 1.08 [13:59:45<10:31:34] +[titan] 2025-10-05 12:34:06,747 - root - INFO - step: 22835 loss: 2.0824 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 12:34:06,747 - root - INFO - lr: 2.2830e-05 gnorm: 1.04 [13:59:56<10:31:22] +[titan] 2025-10-05 12:34:17,586 - root - INFO - step: 22840 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8552 +[titan] 2025-10-05 12:34:17,586 - root - INFO - lr: 2.2821e-05 gnorm: 1.04 [14:00:07<10:31:11] +[titan] 2025-10-05 12:34:28,454 - root - INFO - step: 22845 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8428 +[titan] 2025-10-05 12:34:28,454 - root - INFO - lr: 2.2813e-05 gnorm: 1.11 [14:00:18<10:31:00] +[titan] 2025-10-05 12:34:37,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:34:39,324 - root - INFO - step: 22850 loss: 2.0362 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 12:34:39,324 - root - INFO - lr: 2.2804e-05 gnorm: 1.07 [14:00:28<10:30:49] +[titan] 2025-10-05 12:34:50,183 - root - INFO - step: 22855 loss: 2.0829 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:34:50,183 - root - INFO - lr: 2.2795e-05 gnorm: 1.04 [14:00:39<10:30:38] +[titan] 2025-10-05 12:35:01,017 - root - INFO - step: 22860 loss: 1.9834 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 12:35:01,017 - root - INFO - lr: 2.2786e-05 gnorm: 1.01 [14:00:50<10:30:26] +[titan] 2025-10-05 12:35:11,885 - root - INFO - step: 22865 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:35:11,885 - root - INFO - lr: 2.2778e-05 gnorm: 1.04 [14:01:01<10:30:15] +[titan] 2025-10-05 12:35:22,742 - root - INFO - step: 22870 loss: 2.1227 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8759 +[titan] 2025-10-05 12:35:22,742 - root - INFO - lr: 2.2769e-05 gnorm: 1.09 [14:01:12<10:30:04] +[titan] 2025-10-05 12:35:33,625 - root - INFO - step: 22875 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 12:35:33,625 - root - INFO - lr: 2.2760e-05 gnorm: 1.12 [14:01:23<10:29:53] +[titan] 2025-10-05 12:35:44,473 - root - INFO - step: 22880 loss: 2.0907 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8483 +[titan] 2025-10-05 12:35:44,473 - root - INFO - lr: 2.2751e-05 gnorm: 1.09 [14:01:34<10:29:42] +[titan] 2025-10-05 12:35:55,316 - root - INFO - step: 22885 loss: 2.1475 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 12:35:55,316 - root - INFO - lr: 2.2743e-05 gnorm: 1.08 [14:01:44<10:29:31] +[titan] 2025-10-05 12:36:06,165 - root - INFO - step: 22890 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8494 +[titan] 2025-10-05 12:36:06,165 - root - INFO - lr: 2.2734e-05 gnorm: 1.10 [14:01:55<10:29:19] +[titan] 2025-10-05 12:36:17,010 - root - INFO - step: 22895 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:36:17,011 - root - INFO - lr: 2.2725e-05 gnorm: 1.06 [14:02:06<10:29:08] +[titan] 2025-10-05 12:36:25,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:36:27,886 - root - INFO - step: 22900 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 12:36:27,887 - root - INFO - lr: 2.2717e-05 gnorm: 1.09 [14:02:17<10:28:57] +[titan] 2025-10-05 12:36:38,741 - root - INFO - step: 22905 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8109 +[titan] 2025-10-05 12:36:38,741 - root - INFO - lr: 2.2708e-05 gnorm: 1.06 [14:02:28<10:28:46] +[titan] 2025-10-05 12:36:49,633 - root - INFO - step: 22910 loss: 2.0954 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:36:49,633 - root - INFO - lr: 2.2699e-05 gnorm: 1.13 [14:02:39<10:28:35] +[titan] 2025-10-05 12:37:00,494 - root - INFO - step: 22915 loss: 2.1261 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8788 +[titan] 2025-10-05 12:37:00,494 - root - INFO - lr: 2.2690e-05 gnorm: 1.09 [14:02:50<10:28:24] +[titan] 2025-10-05 12:37:11,343 - root - INFO - step: 22920 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8270 +[titan] 2025-10-05 12:37:11,343 - root - INFO - lr: 2.2682e-05 gnorm: 1.05 [14:03:01<10:28:13] +[titan] 2025-10-05 12:37:22,183 - root - INFO - step: 22925 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8036 +[titan] 2025-10-05 12:37:22,183 - root - INFO - lr: 2.2673e-05 gnorm: 1.04 [14:03:11<10:28:01] +[titan] 2025-10-05 12:37:33,033 - root - INFO - step: 22930 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9081 +[titan] 2025-10-05 12:37:33,033 - root - INFO - lr: 2.2664e-05 gnorm: 1.08 [14:03:22<10:27:50] +[titan] 2025-10-05 12:37:43,902 - root - INFO - step: 22935 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:37:43,903 - root - INFO - lr: 2.2656e-05 gnorm: 1.04 [14:03:33<10:27:39] +[titan] 2025-10-05 12:37:54,792 - root - INFO - step: 22940 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 12:37:54,792 - root - INFO - lr: 2.2647e-05 gnorm: 1.09 [14:03:44<10:27:28] +[titan] 2025-10-05 12:38:05,627 - root - INFO - step: 22945 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8431 +[titan] 2025-10-05 12:38:05,628 - root - INFO - lr: 2.2638e-05 gnorm: 1.08 [14:03:55<10:27:17] +[titan] 2025-10-05 12:38:14,306 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:38:16,484 - root - INFO - step: 22950 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7713 +[titan] 2025-10-05 12:38:16,484 - root - INFO - lr: 2.2629e-05 gnorm: 1.08 [14:04:06<10:27:06] +[titan] 2025-10-05 12:38:27,334 - root - INFO - step: 22955 loss: 2.0812 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:38:27,334 - root - INFO - lr: 2.2621e-05 gnorm: 1.09 [14:04:16<10:26:54] +[titan] 2025-10-05 12:38:38,180 - root - INFO - step: 22960 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8473 +[titan] 2025-10-05 12:38:38,180 - root - INFO - lr: 2.2612e-05 gnorm: 1.14 [14:04:27<10:26:43] +[titan] 2025-10-05 12:38:49,045 - root - INFO - step: 22965 loss: 2.0894 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 12:38:49,045 - root - INFO - lr: 2.2603e-05 gnorm: 1.02 [14:04:38<10:26:32] +[titan] 2025-10-05 12:38:59,904 - root - INFO - step: 22970 loss: 2.0347 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7986 +[titan] 2025-10-05 12:38:59,904 - root - INFO - lr: 2.2595e-05 gnorm: 1.08 [14:04:49<10:26:21] +[titan] 2025-10-05 12:39:10,774 - root - INFO - step: 22975 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9048 +[titan] 2025-10-05 12:39:10,774 - root - INFO - lr: 2.2586e-05 gnorm: 1.09 [14:05:00<10:26:10] +[titan] 2025-10-05 12:39:21,640 - root - INFO - step: 22980 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:39:21,640 - root - INFO - lr: 2.2577e-05 gnorm: 1.09 [14:05:11<10:25:59] +[titan] 2025-10-05 12:39:32,525 - root - INFO - step: 22985 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8780 +[titan] 2025-10-05 12:39:32,525 - root - INFO - lr: 2.2568e-05 gnorm: 1.07 [14:05:22<10:25:47] +[titan] 2025-10-05 12:39:43,368 - root - INFO - step: 22990 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 12:39:43,369 - root - INFO - lr: 2.2560e-05 gnorm: 1.08 [14:05:33<10:25:36] +[titan] 2025-10-05 12:39:54,216 - root - INFO - step: 22995 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 12:39:54,217 - root - INFO - lr: 2.2551e-05 gnorm: 1.02 [14:05:43<10:25:25] +[titan] 2025-10-05 12:40:02,884 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:40:05,066 - root - INFO - step: 23000 loss: 2.1507 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 12:40:05,066 - root - INFO - lr: 2.2542e-05 gnorm: 1.06 [14:05:54<10:25:14] +[titan] 2025-10-05 12:40:15,916 - root - INFO - step: 23005 loss: 2.1008 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 12:40:15,916 - root - INFO - lr: 2.2534e-05 gnorm: 1.09 [14:06:05<10:25:03] +[titan] 2025-10-05 12:40:26,775 - root - INFO - step: 23010 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8717 +[titan] 2025-10-05 12:40:26,775 - root - INFO - lr: 2.2525e-05 gnorm: 1.06 [14:06:16<10:24:52] +[titan] 2025-10-05 12:40:37,622 - root - INFO - step: 23015 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8876 +[titan] 2025-10-05 12:40:37,622 - root - INFO - lr: 2.2516e-05 gnorm: 1.06 [14:06:27<10:24:40] +[titan] 2025-10-05 12:40:48,479 - root - INFO - step: 23020 loss: 2.1422 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:40:48,480 - root - INFO - lr: 2.2507e-05 gnorm: 1.08 [14:06:38<10:24:29] +[titan] 2025-10-05 12:40:59,327 - root - INFO - step: 23025 loss: 2.0668 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:40:59,327 - root - INFO - lr: 2.2499e-05 gnorm: 1.05 [14:06:48<10:24:18] +[titan] 2025-10-05 12:41:10,188 - root - INFO - step: 23030 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:41:10,188 - root - INFO - lr: 2.2490e-05 gnorm: 1.06 [14:06:59<10:24:07] +[titan] 2025-10-05 12:41:21,085 - root - INFO - step: 23035 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 12:41:21,085 - root - INFO - lr: 2.2481e-05 gnorm: 1.07 [14:07:10<10:23:56] +[titan] 2025-10-05 12:41:32,099 - root - INFO - step: 23040 loss: 2.1136 memory: 118.84GiB(85.28%) tps: 29,752 tflops: 412.76 mfu: 41.74% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:41:32,100 - root - INFO - lr: 2.2473e-05 gnorm: 1.05 [14:07:21<10:23:45] +[titan] 2025-10-05 12:41:32,279 - root - INFO - Dumping profiler traces at step 23040 +[titan] 2025-10-05 12:41:32,319 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:41:43,199 - root - INFO - step: 23045 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.58 mfu: 41.41% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 12:41:43,199 - root - INFO - lr: 2.2464e-05 gnorm: 1.07 [14:07:32<10:23:34] +[titan] 2025-10-05 12:41:51,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:41:54,062 - root - INFO - step: 23050 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:41:54,062 - root - INFO - lr: 2.2455e-05 gnorm: 1.10 [14:07:43<10:23:23] +[titan] 2025-10-05 12:42:04,939 - root - INFO - step: 23055 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 12:42:04,940 - root - INFO - lr: 2.2447e-05 gnorm: 1.10 [14:07:54<10:23:11] +[titan] 2025-10-05 12:42:15,807 - root - INFO - step: 23060 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 12:42:15,807 - root - INFO - lr: 2.2438e-05 gnorm: 1.09 [14:08:05<10:23:00] +[titan] 2025-10-05 12:42:26,648 - root - INFO - step: 23065 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 12:42:26,648 - root - INFO - lr: 2.2429e-05 gnorm: 1.06 [14:08:16<10:22:49] +[titan] 2025-10-05 12:42:37,523 - root - INFO - step: 23070 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 12:42:37,523 - root - INFO - lr: 2.2420e-05 gnorm: 1.06 [14:08:27<10:22:38] +[titan] 2025-10-05 12:42:48,380 - root - INFO - step: 23075 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 12:42:48,380 - root - INFO - lr: 2.2412e-05 gnorm: 1.08 [14:08:38<10:22:27] +[titan] 2025-10-05 12:42:59,255 - root - INFO - step: 23080 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 12:42:59,255 - root - INFO - lr: 2.2403e-05 gnorm: 1.11 [14:08:48<10:22:16] +[titan] 2025-10-05 12:43:10,104 - root - INFO - step: 23085 loss: 2.0492 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 12:43:10,104 - root - INFO - lr: 2.2394e-05 gnorm: 1.06 [14:08:59<10:22:04] +[titan] 2025-10-05 12:43:20,963 - root - INFO - step: 23090 loss: 2.0906 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8478 +[titan] 2025-10-05 12:43:20,963 - root - INFO - lr: 2.2386e-05 gnorm: 1.07 [14:09:10<10:21:53] +[titan] 2025-10-05 12:43:31,830 - root - INFO - step: 23095 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:43:31,830 - root - INFO - lr: 2.2377e-05 gnorm: 1.06 [14:09:21<10:21:42] +[titan] 2025-10-05 12:43:40,505 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:43:42,684 - root - INFO - step: 23100 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 12:43:42,684 - root - INFO - lr: 2.2368e-05 gnorm: 1.08 [14:09:32<10:21:31] +[titan] 2025-10-05 12:43:53,521 - root - INFO - step: 23105 loss: 2.1541 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9043 +[titan] 2025-10-05 12:43:53,521 - root - INFO - lr: 2.2360e-05 gnorm: 1.12 [14:09:43<10:21:20] +[titan] 2025-10-05 12:44:04,389 - root - INFO - step: 23110 loss: 2.0636 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:44:04,389 - root - INFO - lr: 2.2351e-05 gnorm: 1.09 [14:09:54<10:21:09] +[titan] 2025-10-05 12:44:15,253 - root - INFO - step: 23115 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 12:44:15,254 - root - INFO - lr: 2.2342e-05 gnorm: 1.06 [14:10:04<10:20:57] +[titan] 2025-10-05 12:44:26,116 - root - INFO - step: 23120 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 12:44:26,116 - root - INFO - lr: 2.2334e-05 gnorm: 1.03 [14:10:15<10:20:46] +[titan] 2025-10-05 12:44:36,986 - root - INFO - step: 23125 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:44:36,986 - root - INFO - lr: 2.2325e-05 gnorm: 1.04 [14:10:26<10:20:35] +[titan] 2025-10-05 12:44:47,859 - root - INFO - step: 23130 loss: 2.1268 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 12:44:47,860 - root - INFO - lr: 2.2316e-05 gnorm: 1.08 [14:10:37<10:20:24] +[titan] 2025-10-05 12:44:58,729 - root - INFO - step: 23135 loss: 2.1048 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 12:44:58,729 - root - INFO - lr: 2.2308e-05 gnorm: 1.10 [14:10:48<10:20:13] +[titan] 2025-10-05 12:45:09,592 - root - INFO - step: 23140 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8194 +[titan] 2025-10-05 12:45:09,592 - root - INFO - lr: 2.2299e-05 gnorm: 1.09 [14:10:59<10:20:02] +[titan] 2025-10-05 12:45:20,444 - root - INFO - step: 23145 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:45:20,444 - root - INFO - lr: 2.2290e-05 gnorm: 1.10 [14:11:10<10:19:51] +[titan] 2025-10-05 12:45:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:45:31,320 - root - INFO - step: 23150 loss: 2.0752 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:45:31,320 - root - INFO - lr: 2.2281e-05 gnorm: 1.05 [14:11:20<10:19:39] +[titan] 2025-10-05 12:45:42,180 - root - INFO - step: 23155 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:45:42,181 - root - INFO - lr: 2.2273e-05 gnorm: 1.10 [14:11:31<10:19:28] +[titan] 2025-10-05 12:45:53,058 - root - INFO - step: 23160 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 12:45:53,058 - root - INFO - lr: 2.2264e-05 gnorm: 1.06 [14:11:42<10:19:17] +[titan] 2025-10-05 12:46:03,966 - root - INFO - step: 23165 loss: 1.9940 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 12:46:03,966 - root - INFO - lr: 2.2255e-05 gnorm: 1.07 [14:11:53<10:19:06] +[titan] 2025-10-05 12:46:14,825 - root - INFO - step: 23170 loss: 2.1123 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8673 +[titan] 2025-10-05 12:46:14,825 - root - INFO - lr: 2.2247e-05 gnorm: 1.06 [14:12:04<10:18:55] +[titan] 2025-10-05 12:46:25,704 - root - INFO - step: 23175 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:46:25,705 - root - INFO - lr: 2.2238e-05 gnorm: 1.10 [14:12:15<10:18:44] +[titan] 2025-10-05 12:46:36,828 - root - INFO - step: 23180 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 29,460 tflops: 408.71 mfu: 41.33% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8561 +[titan] 2025-10-05 12:46:36,828 - root - INFO - lr: 2.2229e-05 gnorm: 1.09 [14:12:26<10:18:33] +[titan] 2025-10-05 12:46:47,680 - root - INFO - step: 23185 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 12:46:47,680 - root - INFO - lr: 2.2221e-05 gnorm: 1.07 [14:12:37<10:18:22] +[titan] 2025-10-05 12:46:58,544 - root - INFO - step: 23190 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8817 +[titan] 2025-10-05 12:46:58,544 - root - INFO - lr: 2.2212e-05 gnorm: 1.07 [14:12:48<10:18:10] +[titan] 2025-10-05 12:47:09,423 - root - INFO - step: 23195 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 12:47:09,424 - root - INFO - lr: 2.2203e-05 gnorm: 1.07 [14:12:59<10:17:59] +[titan] 2025-10-05 12:47:18,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:47:20,285 - root - INFO - step: 23200 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:47:20,285 - root - INFO - lr: 2.2195e-05 gnorm: 1.05 [14:13:09<10:17:48] +[titan] 2025-10-05 12:47:31,161 - root - INFO - step: 23205 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:47:31,161 - root - INFO - lr: 2.2186e-05 gnorm: 1.07 [14:13:20<10:17:37] +[titan] 2025-10-05 12:47:42,031 - root - INFO - step: 23210 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 12:47:42,031 - root - INFO - lr: 2.2177e-05 gnorm: 1.08 [14:13:31<10:17:26] +[titan] 2025-10-05 12:47:52,877 - root - INFO - step: 23215 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8652 +[titan] 2025-10-05 12:47:52,877 - root - INFO - lr: 2.2169e-05 gnorm: 1.07 [14:13:42<10:17:15] +[titan] 2025-10-05 12:48:03,720 - root - INFO - step: 23220 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:48:03,720 - root - INFO - lr: 2.2160e-05 gnorm: 1.06 [14:13:53<10:17:03] +[titan] 2025-10-05 12:48:14,569 - root - INFO - step: 23225 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8277 +[titan] 2025-10-05 12:48:14,569 - root - INFO - lr: 2.2151e-05 gnorm: 1.07 [14:14:04<10:16:52] +[titan] 2025-10-05 12:48:25,456 - root - INFO - step: 23230 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:48:25,456 - root - INFO - lr: 2.2143e-05 gnorm: 1.09 [14:14:15<10:16:41] +[titan] 2025-10-05 12:48:36,322 - root - INFO - step: 23235 loss: 2.0597 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 12:48:36,322 - root - INFO - lr: 2.2134e-05 gnorm: 1.05 [14:14:25<10:16:30] +[titan] 2025-10-05 12:48:47,191 - root - INFO - step: 23240 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 12:48:47,191 - root - INFO - lr: 2.2125e-05 gnorm: 1.06 [14:14:36<10:16:19] +[titan] 2025-10-05 12:48:58,072 - root - INFO - step: 23245 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8758 +[titan] 2025-10-05 12:48:58,072 - root - INFO - lr: 2.2117e-05 gnorm: 1.08 [14:14:47<10:16:08] +[titan] 2025-10-05 12:49:06,748 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:49:08,942 - root - INFO - step: 23250 loss: 2.0918 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 12:49:08,943 - root - INFO - lr: 2.2108e-05 gnorm: 1.10 [14:14:58<10:15:57] +[titan] 2025-10-05 12:49:19,822 - root - INFO - step: 23255 loss: 2.1127 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8665 +[titan] 2025-10-05 12:49:19,822 - root - INFO - lr: 2.2099e-05 gnorm: 1.05 [14:15:09<10:15:45] +[titan] 2025-10-05 12:49:30,722 - root - INFO - step: 23260 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8961 +[titan] 2025-10-05 12:49:30,723 - root - INFO - lr: 2.2091e-05 gnorm: 1.10 [14:15:20<10:15:34] +[titan] 2025-10-05 12:49:41,642 - root - INFO - step: 23265 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 12:49:41,642 - root - INFO - lr: 2.2082e-05 gnorm: 1.09 [14:15:31<10:15:23] +[titan] 2025-10-05 12:49:52,513 - root - INFO - step: 23270 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 12:49:52,513 - root - INFO - lr: 2.2073e-05 gnorm: 1.08 [14:15:42<10:15:12] +[titan] 2025-10-05 12:50:03,384 - root - INFO - step: 23275 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8161 +[titan] 2025-10-05 12:50:03,384 - root - INFO - lr: 2.2065e-05 gnorm: 1.07 [14:15:53<10:15:01] +[titan] 2025-10-05 12:50:14,264 - root - INFO - step: 23280 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 12:50:14,264 - root - INFO - lr: 2.2056e-05 gnorm: 1.06 [14:16:03<10:14:50] +[titan] 2025-10-05 12:50:25,152 - root - INFO - step: 23285 loss: 2.1398 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 12:50:25,152 - root - INFO - lr: 2.2047e-05 gnorm: 1.05 [14:16:14<10:14:39] +[titan] 2025-10-05 12:50:36,029 - root - INFO - step: 23290 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 12:50:36,029 - root - INFO - lr: 2.2039e-05 gnorm: 1.05 [14:16:25<10:14:27] +[titan] 2025-10-05 12:50:46,933 - root - INFO - step: 23295 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:50:46,933 - root - INFO - lr: 2.2030e-05 gnorm: 1.10 [14:16:36<10:14:16] +[titan] 2025-10-05 12:50:55,612 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:50:57,805 - root - INFO - step: 23300 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:50:57,805 - root - INFO - lr: 2.2021e-05 gnorm: 1.10 [14:16:47<10:14:05] +[titan] 2025-10-05 12:51:08,673 - root - INFO - step: 23305 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:51:08,673 - root - INFO - lr: 2.2013e-05 gnorm: 1.06 [14:16:58<10:13:54] +[titan] 2025-10-05 12:51:19,553 - root - INFO - step: 23310 loss: 2.0851 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:51:19,553 - root - INFO - lr: 2.2004e-05 gnorm: 1.07 [14:17:09<10:13:43] +[titan] 2025-10-05 12:51:30,434 - root - INFO - step: 23315 loss: 2.0776 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 12:51:30,435 - root - INFO - lr: 2.1995e-05 gnorm: 1.07 [14:17:20<10:13:32] +[titan] 2025-10-05 12:51:41,297 - root - INFO - step: 23320 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:51:41,298 - root - INFO - lr: 2.1987e-05 gnorm: 1.06 [14:17:30<10:13:21] +[titan] 2025-10-05 12:51:52,171 - root - INFO - step: 23325 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8412 +[titan] 2025-10-05 12:51:52,172 - root - INFO - lr: 2.1978e-05 gnorm: 1.06 [14:17:41<10:13:09] +[titan] 2025-10-05 12:52:03,034 - root - INFO - step: 23330 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 12:52:03,034 - root - INFO - lr: 2.1969e-05 gnorm: 1.06 [14:17:52<10:12:58] +[titan] 2025-10-05 12:52:13,904 - root - INFO - step: 23335 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:52:13,905 - root - INFO - lr: 2.1961e-05 gnorm: 1.08 [14:18:03<10:12:47] +[titan] 2025-10-05 12:52:24,765 - root - INFO - step: 23340 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 12:52:24,765 - root - INFO - lr: 2.1952e-05 gnorm: 1.05 [14:18:14<10:12:36] +[titan] 2025-10-05 12:52:35,613 - root - INFO - step: 23345 loss: 2.0713 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 12:52:35,613 - root - INFO - lr: 2.1944e-05 gnorm: 1.08 [14:18:25<10:12:25] +[titan] 2025-10-05 12:52:44,296 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:52:46,481 - root - INFO - step: 23350 loss: 2.0693 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:52:46,481 - root - INFO - lr: 2.1935e-05 gnorm: 1.06 [14:18:36<10:12:14] +[titan] 2025-10-05 12:52:57,341 - root - INFO - step: 23355 loss: 2.1206 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 12:52:57,341 - root - INFO - lr: 2.1926e-05 gnorm: 1.09 [14:18:46<10:12:03] +[titan] 2025-10-05 12:53:08,214 - root - INFO - step: 23360 loss: 2.1012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:53:08,214 - root - INFO - lr: 2.1918e-05 gnorm: 1.05 [14:18:57<10:11:51] +[titan] 2025-10-05 12:53:19,079 - root - INFO - step: 23365 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8477 +[titan] 2025-10-05 12:53:19,079 - root - INFO - lr: 2.1909e-05 gnorm: 1.11 [14:19:08<10:11:40] +[titan] 2025-10-05 12:53:29,939 - root - INFO - step: 23370 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8916 +[titan] 2025-10-05 12:53:29,939 - root - INFO - lr: 2.1900e-05 gnorm: 1.11 [14:19:19<10:11:29] +[titan] 2025-10-05 12:53:40,836 - root - INFO - step: 23375 loss: 2.0922 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 12:53:40,836 - root - INFO - lr: 2.1892e-05 gnorm: 1.13 [14:19:30<10:11:18] +[titan] 2025-10-05 12:53:51,725 - root - INFO - step: 23380 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 12:53:51,725 - root - INFO - lr: 2.1883e-05 gnorm: 1.07 [14:19:41<10:11:07] +[titan] 2025-10-05 12:54:02,631 - root - INFO - step: 23385 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 12:54:02,631 - root - INFO - lr: 2.1874e-05 gnorm: 1.07 [14:19:52<10:10:56] +[titan] 2025-10-05 12:54:13,542 - root - INFO - step: 23390 loss: 2.0791 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:54:13,543 - root - INFO - lr: 2.1866e-05 gnorm: 1.12 [14:20:03<10:10:45] +[titan] 2025-10-05 12:54:24,401 - root - INFO - step: 23395 loss: 2.0662 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 12:54:24,401 - root - INFO - lr: 2.1857e-05 gnorm: 1.08 [14:20:14<10:10:33] +[titan] 2025-10-05 12:54:33,071 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:54:35,256 - root - INFO - step: 23400 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 12:54:35,256 - root - INFO - lr: 2.1848e-05 gnorm: 1.09 [14:20:24<10:10:22] +[titan] 2025-10-05 12:54:46,105 - root - INFO - step: 23405 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8085 +[titan] 2025-10-05 12:54:46,105 - root - INFO - lr: 2.1840e-05 gnorm: 1.03 [14:20:35<10:10:11] +[titan] 2025-10-05 12:54:56,966 - root - INFO - step: 23410 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8644 +[titan] 2025-10-05 12:54:56,966 - root - INFO - lr: 2.1831e-05 gnorm: 1.09 [14:20:46<10:10:00] +[titan] 2025-10-05 12:55:07,809 - root - INFO - step: 23415 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8747 +[titan] 2025-10-05 12:55:07,810 - root - INFO - lr: 2.1823e-05 gnorm: 1.09 [14:20:57<10:09:49] +[titan] 2025-10-05 12:55:18,648 - root - INFO - step: 23420 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:55:18,648 - root - INFO - lr: 2.1814e-05 gnorm: 1.09 [14:21:08<10:09:38] +[titan] 2025-10-05 12:55:29,531 - root - INFO - step: 23425 loss: 2.1312 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 12:55:29,531 - root - INFO - lr: 2.1805e-05 gnorm: 1.07 [14:21:19<10:09:26] +[titan] 2025-10-05 12:55:40,423 - root - INFO - step: 23430 loss: 2.0740 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:55:40,423 - root - INFO - lr: 2.1797e-05 gnorm: 1.07 [14:21:30<10:09:15] +[titan] 2025-10-05 12:55:51,286 - root - INFO - step: 23435 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 12:55:51,286 - root - INFO - lr: 2.1788e-05 gnorm: 1.05 [14:21:40<10:09:04] +[titan] 2025-10-05 12:56:02,131 - root - INFO - step: 23440 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8791 +[titan] 2025-10-05 12:56:02,131 - root - INFO - lr: 2.1779e-05 gnorm: 1.05 [14:21:51<10:08:53] +[titan] 2025-10-05 12:56:12,982 - root - INFO - step: 23445 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 12:56:12,982 - root - INFO - lr: 2.1771e-05 gnorm: 1.07 [14:22:02<10:08:42] +[titan] 2025-10-05 12:56:21,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:56:23,837 - root - INFO - step: 23450 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 12:56:23,837 - root - INFO - lr: 2.1762e-05 gnorm: 1.03 [14:22:13<10:08:31] +[titan] 2025-10-05 12:56:34,722 - root - INFO - step: 23455 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 12:56:34,722 - root - INFO - lr: 2.1753e-05 gnorm: 1.08 [14:22:24<10:08:20] +[titan] 2025-10-05 12:56:45,579 - root - INFO - step: 23460 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 12:56:45,579 - root - INFO - lr: 2.1745e-05 gnorm: 1.05 [14:22:35<10:08:08] +[titan] 2025-10-05 12:56:56,409 - root - INFO - step: 23465 loss: 2.0982 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 12:56:56,409 - root - INFO - lr: 2.1736e-05 gnorm: 1.07 [14:22:46<10:07:57] +[titan] 2025-10-05 12:57:07,229 - root - INFO - step: 23470 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:57:07,229 - root - INFO - lr: 2.1728e-05 gnorm: 1.08 [14:22:56<10:07:46] +[titan] 2025-10-05 12:57:18,049 - root - INFO - step: 23475 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 12:57:18,049 - root - INFO - lr: 2.1719e-05 gnorm: 1.09 [14:23:07<10:07:35] +[titan] 2025-10-05 12:57:28,860 - root - INFO - step: 23480 loss: 2.0930 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:57:28,860 - root - INFO - lr: 2.1710e-05 gnorm: 1.07 [14:23:18<10:07:24] +[titan] 2025-10-05 12:57:39,712 - root - INFO - step: 23485 loss: 2.1212 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8737 +[titan] 2025-10-05 12:57:39,712 - root - INFO - lr: 2.1702e-05 gnorm: 1.09 [14:23:29<10:07:13] +[titan] 2025-10-05 12:57:50,568 - root - INFO - step: 23490 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 12:57:50,568 - root - INFO - lr: 2.1693e-05 gnorm: 1.06 [14:23:40<10:07:01] +[titan] 2025-10-05 12:58:01,418 - root - INFO - step: 23495 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 12:58:01,418 - root - INFO - lr: 2.1684e-05 gnorm: 1.13 [14:23:51<10:06:50] +[titan] 2025-10-05 12:58:10,087 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:58:12,263 - root - INFO - step: 23500 loss: 2.0793 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:58:12,263 - root - INFO - lr: 2.1676e-05 gnorm: 1.05 [14:24:01<10:06:39] +[titan] 2025-10-05 12:58:23,119 - root - INFO - step: 23505 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 12:58:23,119 - root - INFO - lr: 2.1667e-05 gnorm: 1.09 [14:24:12<10:06:28] +[titan] 2025-10-05 12:58:33,963 - root - INFO - step: 23510 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 12:58:33,964 - root - INFO - lr: 2.1659e-05 gnorm: 1.07 [14:24:23<10:06:17] +[titan] 2025-10-05 12:58:44,834 - root - INFO - step: 23515 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 12:58:44,834 - root - INFO - lr: 2.1650e-05 gnorm: 1.06 [14:24:34<10:06:06] +[titan] 2025-10-05 12:58:55,708 - root - INFO - step: 23520 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8363 +[titan] 2025-10-05 12:58:55,708 - root - INFO - lr: 2.1641e-05 gnorm: 1.05 [14:24:45<10:05:55] +[titan] 2025-10-05 12:59:06,592 - root - INFO - step: 23525 loss: 2.0619 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 12:59:06,592 - root - INFO - lr: 2.1633e-05 gnorm: 1.06 [14:24:56<10:05:43] +[titan] 2025-10-05 12:59:17,459 - root - INFO - step: 23530 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8941 +[titan] 2025-10-05 12:59:17,459 - root - INFO - lr: 2.1624e-05 gnorm: 1.08 [14:25:07<10:05:32] +[titan] 2025-10-05 12:59:28,332 - root - INFO - step: 23535 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:59:28,332 - root - INFO - lr: 2.1616e-05 gnorm: 1.07 [14:25:17<10:05:21] +[titan] 2025-10-05 12:59:39,189 - root - INFO - step: 23540 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 12:59:39,190 - root - INFO - lr: 2.1607e-05 gnorm: 1.06 [14:25:28<10:05:10] +[titan] 2025-10-05 12:59:50,087 - root - INFO - step: 23545 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8219 +[titan] 2025-10-05 12:59:50,087 - root - INFO - lr: 2.1598e-05 gnorm: 1.07 [14:25:39<10:04:59] +[titan] 2025-10-05 12:59:58,865 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:00:01,039 - root - INFO - step: 23550 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 13:00:01,039 - root - INFO - lr: 2.1590e-05 gnorm: 1.13 [14:25:50<10:04:48] +[titan] 2025-10-05 13:00:05,582 - root - INFO - Dumping profiler traces at step 23552 +[titan] 2025-10-05 13:00:05,624 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:00:12,148 - root - INFO - step: 23555 loss: 2.0620 memory: 118.84GiB(85.28%) tps: 29,498 tflops: 409.24 mfu: 41.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 13:00:12,148 - root - INFO - lr: 2.1581e-05 gnorm: 1.07 [14:26:01<10:04:37] +[titan] 2025-10-05 13:00:23,000 - root - INFO - step: 23560 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 13:00:23,000 - root - INFO - lr: 2.1572e-05 gnorm: 1.11 [14:26:12<10:04:26] +[titan] 2025-10-05 13:00:33,832 - root - INFO - step: 23565 loss: 2.1010 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.69 mfu: 42.44% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 13:00:33,833 - root - INFO - lr: 2.1564e-05 gnorm: 1.09 [14:26:23<10:04:14] +[titan] 2025-10-05 13:00:44,700 - root - INFO - step: 23570 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 13:00:44,700 - root - INFO - lr: 2.1555e-05 gnorm: 1.03 [14:26:34<10:04:03] +[titan] 2025-10-05 13:00:55,558 - root - INFO - step: 23575 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 13:00:55,558 - root - INFO - lr: 2.1547e-05 gnorm: 1.08 [14:26:45<10:03:52] +[titan] 2025-10-05 13:01:06,406 - root - INFO - step: 23580 loss: 2.1114 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 13:01:06,406 - root - INFO - lr: 2.1538e-05 gnorm: 1.10 [14:26:56<10:03:41] +[titan] 2025-10-05 13:01:17,310 - root - INFO - step: 23585 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 13:01:17,310 - root - INFO - lr: 2.1529e-05 gnorm: 1.11 [14:27:06<10:03:30] +[titan] 2025-10-05 13:01:28,160 - root - INFO - step: 23590 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:01:28,161 - root - INFO - lr: 2.1521e-05 gnorm: 1.07 [14:27:17<10:03:19] +[titan] 2025-10-05 13:01:39,026 - root - INFO - step: 23595 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 13:01:39,026 - root - INFO - lr: 2.1512e-05 gnorm: 1.08 [14:27:28<10:03:08] +[titan] 2025-10-05 13:01:47,708 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:01:49,899 - root - INFO - step: 23600 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 13:01:49,899 - root - INFO - lr: 2.1504e-05 gnorm: 1.10 [14:27:39<10:02:56] +[titan] 2025-10-05 13:02:00,769 - root - INFO - step: 23605 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 13:02:00,769 - root - INFO - lr: 2.1495e-05 gnorm: 1.04 [14:27:50<10:02:45] +[titan] 2025-10-05 13:02:11,618 - root - INFO - step: 23610 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 13:02:11,618 - root - INFO - lr: 2.1486e-05 gnorm: 1.09 [14:28:01<10:02:34] +[titan] 2025-10-05 13:02:22,509 - root - INFO - step: 23615 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 13:02:22,509 - root - INFO - lr: 2.1478e-05 gnorm: 1.07 [14:28:12<10:02:23] +[titan] 2025-10-05 13:02:33,369 - root - INFO - step: 23620 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 13:02:33,369 - root - INFO - lr: 2.1469e-05 gnorm: 1.06 [14:28:22<10:02:12] +[titan] 2025-10-05 13:02:44,239 - root - INFO - step: 23625 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8846 +[titan] 2025-10-05 13:02:44,239 - root - INFO - lr: 2.1461e-05 gnorm: 1.08 [14:28:33<10:02:01] +[titan] 2025-10-05 13:02:55,117 - root - INFO - step: 23630 loss: 2.0120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:02:55,117 - root - INFO - lr: 2.1452e-05 gnorm: 1.06 [14:28:44<10:01:50] +[titan] 2025-10-05 13:03:05,938 - root - INFO - step: 23635 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 13:03:05,938 - root - INFO - lr: 2.1443e-05 gnorm: 1.04 [14:28:55<10:01:38] +[titan] 2025-10-05 13:03:16,775 - root - INFO - step: 23640 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 13:03:16,775 - root - INFO - lr: 2.1435e-05 gnorm: 1.06 [14:29:06<10:01:27] +[titan] 2025-10-05 13:03:27,645 - root - INFO - step: 23645 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8228 +[titan] 2025-10-05 13:03:27,645 - root - INFO - lr: 2.1426e-05 gnorm: 1.08 [14:29:17<10:01:16] +[titan] 2025-10-05 13:03:36,315 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:03:38,490 - root - INFO - step: 23650 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 13:03:38,491 - root - INFO - lr: 2.1418e-05 gnorm: 1.14 [14:29:28<10:01:05] +[titan] 2025-10-05 13:03:49,367 - root - INFO - step: 23655 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 13:03:49,367 - root - INFO - lr: 2.1409e-05 gnorm: 1.07 [14:29:38<10:00:54] +[titan] 2025-10-05 13:04:00,220 - root - INFO - step: 23660 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 13:04:00,220 - root - INFO - lr: 2.1400e-05 gnorm: 1.05 [14:29:49<10:00:43] +[titan] 2025-10-05 13:04:11,080 - root - INFO - step: 23665 loss: 2.0569 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:04:11,080 - root - INFO - lr: 2.1392e-05 gnorm: 1.05 [14:30:00<10:00:32] +[titan] 2025-10-05 13:04:21,931 - root - INFO - step: 23670 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 13:04:21,931 - root - INFO - lr: 2.1383e-05 gnorm: 1.08 [14:30:11<10:00:20] +[titan] 2025-10-05 13:04:32,799 - root - INFO - step: 23675 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:04:32,799 - root - INFO - lr: 2.1375e-05 gnorm: 1.09 [14:30:22<10:00:09] +[titan] 2025-10-05 13:04:43,687 - root - INFO - step: 23680 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8405 +[titan] 2025-10-05 13:04:43,687 - root - INFO - lr: 2.1366e-05 gnorm: 1.09 [14:30:33< 9:59:58] +[titan] 2025-10-05 13:04:54,557 - root - INFO - step: 23685 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8546 +[titan] 2025-10-05 13:04:54,557 - root - INFO - lr: 2.1358e-05 gnorm: 1.06 [14:30:44< 9:59:47] +[titan] 2025-10-05 13:05:05,423 - root - INFO - step: 23690 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 13:05:05,423 - root - INFO - lr: 2.1349e-05 gnorm: 1.11 [14:30:55< 9:59:36] +[titan] 2025-10-05 13:05:16,292 - root - INFO - step: 23695 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 13:05:16,292 - root - INFO - lr: 2.1340e-05 gnorm: 1.07 [14:31:05< 9:59:25] +[titan] 2025-10-05 13:05:24,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:05:27,152 - root - INFO - step: 23700 loss: 2.0847 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 13:05:27,152 - root - INFO - lr: 2.1332e-05 gnorm: 1.06 [14:31:16< 9:59:14] +[titan] 2025-10-05 13:05:38,037 - root - INFO - step: 23705 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 13:05:38,037 - root - INFO - lr: 2.1323e-05 gnorm: 1.07 [14:31:27< 9:59:02] +[titan] 2025-10-05 13:05:48,993 - root - INFO - step: 23710 loss: 2.0935 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8500 +[titan] 2025-10-05 13:05:48,993 - root - INFO - lr: 2.1315e-05 gnorm: 1.06 [14:31:38< 9:58:51] +[titan] 2025-10-05 13:05:59,853 - root - INFO - step: 23715 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 13:05:59,853 - root - INFO - lr: 2.1306e-05 gnorm: 1.12 [14:31:49< 9:58:40] +[titan] 2025-10-05 13:06:10,728 - root - INFO - step: 23720 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 13:06:10,729 - root - INFO - lr: 2.1297e-05 gnorm: 1.05 [14:32:00< 9:58:29] +[titan] 2025-10-05 13:06:21,603 - root - INFO - step: 23725 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8106 +[titan] 2025-10-05 13:06:21,603 - root - INFO - lr: 2.1289e-05 gnorm: 1.04 [14:32:11< 9:58:18] +[titan] 2025-10-05 13:06:32,482 - root - INFO - step: 23730 loss: 2.0312 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 13:06:32,482 - root - INFO - lr: 2.1280e-05 gnorm: 1.09 [14:32:22< 9:58:07] +[titan] 2025-10-05 13:06:43,351 - root - INFO - step: 23735 loss: 2.0992 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 13:06:43,351 - root - INFO - lr: 2.1272e-05 gnorm: 1.09 [14:32:32< 9:57:56] +[titan] 2025-10-05 13:06:54,243 - root - INFO - step: 23740 loss: 2.0278 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 13:06:54,243 - root - INFO - lr: 2.1263e-05 gnorm: 1.08 [14:32:43< 9:57:45] +[titan] 2025-10-05 13:07:05,147 - root - INFO - step: 23745 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:07:05,147 - root - INFO - lr: 2.1255e-05 gnorm: 1.08 [14:32:54< 9:57:33] +[titan] 2025-10-05 13:07:13,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:07:16,019 - root - INFO - step: 23750 loss: 2.0022 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 13:07:16,019 - root - INFO - lr: 2.1246e-05 gnorm: 1.06 [14:33:05< 9:57:22] +[titan] 2025-10-05 13:07:26,891 - root - INFO - step: 23755 loss: 2.0412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 13:07:26,891 - root - INFO - lr: 2.1237e-05 gnorm: 1.10 [14:33:16< 9:57:11] +[titan] 2025-10-05 13:07:37,752 - root - INFO - step: 23760 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:07:37,753 - root - INFO - lr: 2.1229e-05 gnorm: 1.10 [14:33:27< 9:57:00] +[titan] 2025-10-05 13:07:48,618 - root - INFO - step: 23765 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 13:07:48,618 - root - INFO - lr: 2.1220e-05 gnorm: 1.07 [14:33:38< 9:56:49] +[titan] 2025-10-05 13:07:59,505 - root - INFO - step: 23770 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:07:59,505 - root - INFO - lr: 2.1212e-05 gnorm: 1.10 [14:33:49< 9:56:38] +[titan] 2025-10-05 13:08:10,407 - root - INFO - step: 23775 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 13:08:10,408 - root - INFO - lr: 2.1203e-05 gnorm: 1.07 [14:33:59< 9:56:27] +[titan] 2025-10-05 13:08:21,270 - root - INFO - step: 23780 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 13:08:21,270 - root - INFO - lr: 2.1195e-05 gnorm: 1.12 [14:34:10< 9:56:15] +[titan] 2025-10-05 13:08:32,141 - root - INFO - step: 23785 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:08:32,141 - root - INFO - lr: 2.1186e-05 gnorm: 1.07 [14:34:21< 9:56:04] +[titan] 2025-10-05 13:08:43,013 - root - INFO - step: 23790 loss: 2.0543 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8158 +[titan] 2025-10-05 13:08:43,013 - root - INFO - lr: 2.1177e-05 gnorm: 1.08 [14:34:32< 9:55:53] +[titan] 2025-10-05 13:08:53,898 - root - INFO - step: 23795 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 13:08:53,898 - root - INFO - lr: 2.1169e-05 gnorm: 1.08 [14:34:43< 9:55:42] +[titan] 2025-10-05 13:09:02,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:09:04,767 - root - INFO - step: 23800 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:09:04,767 - root - INFO - lr: 2.1160e-05 gnorm: 1.07 [14:34:54< 9:55:31] +[titan] 2025-10-05 13:09:15,675 - root - INFO - step: 23805 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:09:15,675 - root - INFO - lr: 2.1152e-05 gnorm: 1.07 [14:35:05< 9:55:20] +[titan] 2025-10-05 13:09:26,546 - root - INFO - step: 23810 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8689 +[titan] 2025-10-05 13:09:26,546 - root - INFO - lr: 2.1143e-05 gnorm: 1.06 [14:35:16< 9:55:09] +[titan] 2025-10-05 13:09:37,416 - root - INFO - step: 23815 loss: 2.0689 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:09:37,416 - root - INFO - lr: 2.1135e-05 gnorm: 1.04 [14:35:26< 9:54:58] +[titan] 2025-10-05 13:09:48,302 - root - INFO - step: 23820 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 13:09:48,302 - root - INFO - lr: 2.1126e-05 gnorm: 1.05 [14:35:37< 9:54:46] +[titan] 2025-10-05 13:09:59,200 - root - INFO - step: 23825 loss: 2.1145 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8692 +[titan] 2025-10-05 13:09:59,200 - root - INFO - lr: 2.1118e-05 gnorm: 1.10 [14:35:48< 9:54:35] +[titan] 2025-10-05 13:10:10,087 - root - INFO - step: 23830 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:10:10,087 - root - INFO - lr: 2.1109e-05 gnorm: 1.07 [14:35:59< 9:54:24] +[titan] 2025-10-05 13:10:20,968 - root - INFO - step: 23835 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8802 +[titan] 2025-10-05 13:10:20,968 - root - INFO - lr: 2.1100e-05 gnorm: 1.11 [14:36:10< 9:54:13] +[titan] 2025-10-05 13:10:31,877 - root - INFO - step: 23840 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 13:10:31,877 - root - INFO - lr: 2.1092e-05 gnorm: 1.07 [14:36:21< 9:54:02] +[titan] 2025-10-05 13:10:42,750 - root - INFO - step: 23845 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8826 +[titan] 2025-10-05 13:10:42,751 - root - INFO - lr: 2.1083e-05 gnorm: 1.08 [14:36:32< 9:53:51] +[titan] 2025-10-05 13:10:51,448 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:10:53,641 - root - INFO - step: 23850 loss: 2.0254 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 13:10:53,642 - root - INFO - lr: 2.1075e-05 gnorm: 1.07 [14:36:43< 9:53:40] +[titan] 2025-10-05 13:11:04,523 - root - INFO - step: 23855 loss: 2.0986 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 13:11:04,523 - root - INFO - lr: 2.1066e-05 gnorm: 1.09 [14:36:54< 9:53:29] +[titan] 2025-10-05 13:11:15,407 - root - INFO - step: 23860 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 13:11:15,407 - root - INFO - lr: 2.1058e-05 gnorm: 1.07 [14:37:04< 9:53:17] +[titan] 2025-10-05 13:11:26,299 - root - INFO - step: 23865 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8171 +[titan] 2025-10-05 13:11:26,299 - root - INFO - lr: 2.1049e-05 gnorm: 1.08 [14:37:15< 9:53:06] +[titan] 2025-10-05 13:11:37,198 - root - INFO - step: 23870 loss: 2.1119 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:11:37,198 - root - INFO - lr: 2.1041e-05 gnorm: 1.10 [14:37:26< 9:52:55] +[titan] 2025-10-05 13:11:48,068 - root - INFO - step: 23875 loss: 2.0789 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 13:11:48,068 - root - INFO - lr: 2.1032e-05 gnorm: 1.03 [14:37:37< 9:52:44] +[titan] 2025-10-05 13:11:58,937 - root - INFO - step: 23880 loss: 2.1572 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9068 +[titan] 2025-10-05 13:11:58,937 - root - INFO - lr: 2.1023e-05 gnorm: 1.10 [14:37:48< 9:52:33] +[titan] 2025-10-05 13:12:09,818 - root - INFO - step: 23885 loss: 2.1050 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:12:09,818 - root - INFO - lr: 2.1015e-05 gnorm: 1.07 [14:37:59< 9:52:22] +[titan] 2025-10-05 13:12:20,691 - root - INFO - step: 23890 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 13:12:20,691 - root - INFO - lr: 2.1006e-05 gnorm: 1.04 [14:38:10< 9:52:11] +[titan] 2025-10-05 13:12:31,575 - root - INFO - step: 23895 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 13:12:31,575 - root - INFO - lr: 2.0998e-05 gnorm: 1.07 [14:38:21< 9:52:00] +[titan] 2025-10-05 13:12:40,266 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:12:42,455 - root - INFO - step: 23900 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 13:12:42,455 - root - INFO - lr: 2.0989e-05 gnorm: 1.07 [14:38:32< 9:51:48] +[titan] 2025-10-05 13:12:53,357 - root - INFO - step: 23905 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 13:12:53,357 - root - INFO - lr: 2.0981e-05 gnorm: 1.10 [14:38:42< 9:51:37] +[titan] 2025-10-05 13:13:04,239 - root - INFO - step: 23910 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8487 +[titan] 2025-10-05 13:13:04,239 - root - INFO - lr: 2.0972e-05 gnorm: 1.07 [14:38:53< 9:51:26] +[titan] 2025-10-05 13:13:15,113 - root - INFO - step: 23915 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 13:13:15,113 - root - INFO - lr: 2.0964e-05 gnorm: 1.10 [14:39:04< 9:51:15] +[titan] 2025-10-05 13:13:25,979 - root - INFO - step: 23920 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 13:13:25,980 - root - INFO - lr: 2.0955e-05 gnorm: 1.05 [14:39:15< 9:51:04] +[titan] 2025-10-05 13:13:36,839 - root - INFO - step: 23925 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8255 +[titan] 2025-10-05 13:13:36,839 - root - INFO - lr: 2.0947e-05 gnorm: 1.08 [14:39:26< 9:50:53] +[titan] 2025-10-05 13:13:47,718 - root - INFO - step: 23930 loss: 2.0539 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 13:13:47,718 - root - INFO - lr: 2.0938e-05 gnorm: 1.07 [14:39:37< 9:50:42] +[titan] 2025-10-05 13:13:58,659 - root - INFO - step: 23935 loss: 2.1295 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 13:13:58,659 - root - INFO - lr: 2.0929e-05 gnorm: 1.09 [14:39:48< 9:50:31] +[titan] 2025-10-05 13:14:09,537 - root - INFO - step: 23940 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7883 +[titan] 2025-10-05 13:14:09,537 - root - INFO - lr: 2.0921e-05 gnorm: 1.06 [14:39:59< 9:50:19] +[titan] 2025-10-05 13:14:20,423 - root - INFO - step: 23945 loss: 2.0391 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8022 +[titan] 2025-10-05 13:14:20,423 - root - INFO - lr: 2.0912e-05 gnorm: 1.08 [14:40:09< 9:50:08] +[titan] 2025-10-05 13:14:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:14:31,294 - root - INFO - step: 23950 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8110 +[titan] 2025-10-05 13:14:31,294 - root - INFO - lr: 2.0904e-05 gnorm: 1.02 [14:40:20< 9:49:57] +[titan] 2025-10-05 13:14:42,149 - root - INFO - step: 23955 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:14:42,149 - root - INFO - lr: 2.0895e-05 gnorm: 1.11 [14:40:31< 9:49:46] +[titan] 2025-10-05 13:14:53,021 - root - INFO - step: 23960 loss: 2.0544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 13:14:53,021 - root - INFO - lr: 2.0887e-05 gnorm: 1.07 [14:40:42< 9:49:35] +[titan] 2025-10-05 13:15:03,924 - root - INFO - step: 23965 loss: 2.0186 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 13:15:03,925 - root - INFO - lr: 2.0878e-05 gnorm: 1.08 [14:40:53< 9:49:24] +[titan] 2025-10-05 13:15:14,778 - root - INFO - step: 23970 loss: 2.0244 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 13:15:14,778 - root - INFO - lr: 2.0870e-05 gnorm: 1.10 [14:41:04< 9:49:13] +[titan] 2025-10-05 13:15:25,658 - root - INFO - step: 23975 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:15:25,658 - root - INFO - lr: 2.0861e-05 gnorm: 1.05 [14:41:15< 9:49:02] +[titan] 2025-10-05 13:15:36,526 - root - INFO - step: 23980 loss: 2.1043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 13:15:36,526 - root - INFO - lr: 2.0853e-05 gnorm: 1.11 [14:41:26< 9:48:50] +[titan] 2025-10-05 13:15:47,390 - root - INFO - step: 23985 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 13:15:47,390 - root - INFO - lr: 2.0844e-05 gnorm: 1.10 [14:41:36< 9:48:39] +[titan] 2025-10-05 13:15:58,289 - root - INFO - step: 23990 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 13:15:58,289 - root - INFO - lr: 2.0836e-05 gnorm: 1.06 [14:41:47< 9:48:28] +[titan] 2025-10-05 13:16:09,157 - root - INFO - step: 23995 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8039 +[titan] 2025-10-05 13:16:09,157 - root - INFO - lr: 2.0827e-05 gnorm: 1.11 [14:41:58< 9:48:17] +[titan] 2025-10-05 13:16:17,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:16:20,053 - root - INFO - step: 24000 loss: 2.0037 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:16:20,053 - root - INFO - lr: 2.0819e-05 gnorm: 1.08 [14:42:09< 9:48:06] +[titan] 2025-10-05 13:16:30,898 - root - INFO - step: 24005 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 13:16:30,899 - root - INFO - lr: 2.0810e-05 gnorm: 1.07 [14:42:20< 9:47:55] +[titan] 2025-10-05 13:16:41,756 - root - INFO - step: 24010 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 13:16:41,757 - root - INFO - lr: 2.0802e-05 gnorm: 1.05 [14:42:31< 9:47:44] +[titan] 2025-10-05 13:16:52,618 - root - INFO - step: 24015 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8347 +[titan] 2025-10-05 13:16:52,618 - root - INFO - lr: 2.0793e-05 gnorm: 1.12 [14:42:42< 9:47:32] +[titan] 2025-10-05 13:17:03,489 - root - INFO - step: 24020 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:17:03,489 - root - INFO - lr: 2.0785e-05 gnorm: 1.10 [14:42:53< 9:47:21] +[titan] 2025-10-05 13:17:14,356 - root - INFO - step: 24025 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 13:17:14,356 - root - INFO - lr: 2.0776e-05 gnorm: 1.08 [14:43:03< 9:47:10] +[titan] 2025-10-05 13:17:25,293 - root - INFO - step: 24030 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 13:17:25,293 - root - INFO - lr: 2.0767e-05 gnorm: 1.14 [14:43:14< 9:46:59] +[titan] 2025-10-05 13:17:36,153 - root - INFO - step: 24035 loss: 2.0553 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8156 +[titan] 2025-10-05 13:17:36,153 - root - INFO - lr: 2.0759e-05 gnorm: 1.07 [14:43:25< 9:46:48] +[titan] 2025-10-05 13:17:47,022 - root - INFO - step: 24040 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 13:17:47,022 - root - INFO - lr: 2.0750e-05 gnorm: 1.08 [14:43:36< 9:46:37] +[titan] 2025-10-05 13:17:57,898 - root - INFO - step: 24045 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 13:17:57,899 - root - INFO - lr: 2.0742e-05 gnorm: 1.07 [14:43:47< 9:46:26] +[titan] 2025-10-05 13:18:06,588 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:18:08,771 - root - INFO - step: 24050 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:18:08,771 - root - INFO - lr: 2.0733e-05 gnorm: 1.05 [14:43:58< 9:46:15] +[titan] 2025-10-05 13:18:19,609 - root - INFO - step: 24055 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 13:18:19,609 - root - INFO - lr: 2.0725e-05 gnorm: 1.10 [14:44:09< 9:46:03] +[titan] 2025-10-05 13:18:30,457 - root - INFO - step: 24060 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 13:18:30,457 - root - INFO - lr: 2.0716e-05 gnorm: 1.12 [14:44:20< 9:45:52] +[titan] 2025-10-05 13:18:39,435 - root - INFO - Dumping profiler traces at step 24064 +[titan] 2025-10-05 13:18:39,477 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:18:41,660 - root - INFO - step: 24065 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,250 tflops: 405.80 mfu: 41.03% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7997 +[titan] 2025-10-05 13:18:41,661 - root - INFO - lr: 2.0708e-05 gnorm: 1.05 [14:44:31< 9:45:41] +[titan] 2025-10-05 13:18:52,499 - root - INFO - step: 24070 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 13:18:52,499 - root - INFO - lr: 2.0699e-05 gnorm: 1.05 [14:44:42< 9:45:30] +[titan] 2025-10-05 13:19:03,398 - root - INFO - step: 24075 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:19:03,398 - root - INFO - lr: 2.0691e-05 gnorm: 1.08 [14:44:52< 9:45:19] +[titan] 2025-10-05 13:19:14,221 - root - INFO - step: 24080 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 13:19:14,221 - root - INFO - lr: 2.0682e-05 gnorm: 1.08 [14:45:03< 9:45:08] +[titan] 2025-10-05 13:19:25,059 - root - INFO - step: 24085 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 13:19:25,059 - root - INFO - lr: 2.0674e-05 gnorm: 1.05 [14:45:14< 9:44:57] +[titan] 2025-10-05 13:19:35,885 - root - INFO - step: 24090 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 13:19:35,885 - root - INFO - lr: 2.0665e-05 gnorm: 1.08 [14:45:25< 9:44:46] +[titan] 2025-10-05 13:19:46,755 - root - INFO - step: 24095 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 13:19:46,755 - root - INFO - lr: 2.0657e-05 gnorm: 1.09 [14:45:36< 9:44:35] +[titan] 2025-10-05 13:19:55,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:19:57,605 - root - INFO - step: 24100 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 13:19:57,605 - root - INFO - lr: 2.0648e-05 gnorm: 1.05 [14:45:47< 9:44:23] +[titan] 2025-10-05 13:20:08,458 - root - INFO - step: 24105 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 13:20:08,458 - root - INFO - lr: 2.0640e-05 gnorm: 1.11 [14:45:58< 9:44:12] +[titan] 2025-10-05 13:20:19,304 - root - INFO - step: 24110 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:20:19,305 - root - INFO - lr: 2.0631e-05 gnorm: 1.04 [14:46:08< 9:44:01] +[titan] 2025-10-05 13:20:30,155 - root - INFO - step: 24115 loss: 2.0297 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 13:20:30,155 - root - INFO - lr: 2.0623e-05 gnorm: 1.07 [14:46:19< 9:43:50] +[titan] 2025-10-05 13:20:41,004 - root - INFO - step: 24120 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:20:41,005 - root - INFO - lr: 2.0614e-05 gnorm: 1.07 [14:46:30< 9:43:39] +[titan] 2025-10-05 13:20:51,867 - root - INFO - step: 24125 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8402 +[titan] 2025-10-05 13:20:51,867 - root - INFO - lr: 2.0606e-05 gnorm: 1.12 [14:46:41< 9:43:28] +[titan] 2025-10-05 13:21:02,698 - root - INFO - step: 24130 loss: 2.0869 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 13:21:02,699 - root - INFO - lr: 2.0597e-05 gnorm: 1.06 [14:46:52< 9:43:17] +[titan] 2025-10-05 13:21:13,527 - root - INFO - step: 24135 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 13:21:13,527 - root - INFO - lr: 2.0589e-05 gnorm: 1.10 [14:47:03< 9:43:05] +[titan] 2025-10-05 13:21:24,355 - root - INFO - step: 24140 loss: 2.0475 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8102 +[titan] 2025-10-05 13:21:24,355 - root - INFO - lr: 2.0580e-05 gnorm: 1.07 [14:47:13< 9:42:54] +[titan] 2025-10-05 13:21:35,208 - root - INFO - step: 24145 loss: 2.1059 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:21:35,208 - root - INFO - lr: 2.0572e-05 gnorm: 1.10 [14:47:24< 9:42:43] +[titan] 2025-10-05 13:21:43,854 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:21:46,037 - root - INFO - step: 24150 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 13:21:46,037 - root - INFO - lr: 2.0563e-05 gnorm: 1.05 [14:47:35< 9:42:32] +[titan] 2025-10-05 13:21:56,862 - root - INFO - step: 24155 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8062 +[titan] 2025-10-05 13:21:56,862 - root - INFO - lr: 2.0555e-05 gnorm: 1.05 [14:47:46< 9:42:21] +[titan] 2025-10-05 13:22:07,697 - root - INFO - step: 24160 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:22:07,697 - root - INFO - lr: 2.0546e-05 gnorm: 1.07 [14:47:57< 9:42:10] +[titan] 2025-10-05 13:22:18,551 - root - INFO - step: 24165 loss: 2.0865 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 13:22:18,551 - root - INFO - lr: 2.0538e-05 gnorm: 1.09 [14:48:08< 9:41:58] +[titan] 2025-10-05 13:22:29,396 - root - INFO - step: 24170 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:22:29,396 - root - INFO - lr: 2.0529e-05 gnorm: 1.08 [14:48:18< 9:41:47] +[titan] 2025-10-05 13:22:40,227 - root - INFO - step: 24175 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8696 +[titan] 2025-10-05 13:22:40,227 - root - INFO - lr: 2.0521e-05 gnorm: 1.09 [14:48:29< 9:41:36] +[titan] 2025-10-05 13:22:51,092 - root - INFO - step: 24180 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 13:22:51,092 - root - INFO - lr: 2.0512e-05 gnorm: 1.09 [14:48:40< 9:41:25] +[titan] 2025-10-05 13:23:01,952 - root - INFO - step: 24185 loss: 1.9953 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 13:23:01,953 - root - INFO - lr: 2.0504e-05 gnorm: 1.07 [14:48:51< 9:41:14] +[titan] 2025-10-05 13:23:12,844 - root - INFO - step: 24190 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 13:23:12,844 - root - INFO - lr: 2.0496e-05 gnorm: 1.15 [14:49:02< 9:41:03] +[titan] 2025-10-05 13:23:23,695 - root - INFO - step: 24195 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 13:23:23,695 - root - INFO - lr: 2.0487e-05 gnorm: 1.07 [14:49:13< 9:40:52] +[titan] 2025-10-05 13:23:32,375 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:23:34,564 - root - INFO - step: 24200 loss: 2.0236 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:23:34,564 - root - INFO - lr: 2.0479e-05 gnorm: 1.07 [14:49:24< 9:40:41] +[titan] 2025-10-05 13:23:45,424 - root - INFO - step: 24205 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 13:23:45,424 - root - INFO - lr: 2.0470e-05 gnorm: 1.07 [14:49:34< 9:40:29] +[titan] 2025-10-05 13:23:56,267 - root - INFO - step: 24210 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 13:23:56,267 - root - INFO - lr: 2.0462e-05 gnorm: 1.03 [14:49:45< 9:40:18] +[titan] 2025-10-05 13:24:07,115 - root - INFO - step: 24215 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8655 +[titan] 2025-10-05 13:24:07,115 - root - INFO - lr: 2.0453e-05 gnorm: 1.12 [14:49:56< 9:40:07] +[titan] 2025-10-05 13:24:17,952 - root - INFO - step: 24220 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 13:24:17,952 - root - INFO - lr: 2.0445e-05 gnorm: 1.13 [14:50:07< 9:39:56] +[titan] 2025-10-05 13:24:28,825 - root - INFO - step: 24225 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8509 +[titan] 2025-10-05 13:24:28,825 - root - INFO - lr: 2.0436e-05 gnorm: 1.06 [14:50:18< 9:39:45] +[titan] 2025-10-05 13:24:39,649 - root - INFO - step: 24230 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 13:24:39,649 - root - INFO - lr: 2.0428e-05 gnorm: 1.10 [14:50:29< 9:39:34] +[titan] 2025-10-05 13:24:50,487 - root - INFO - step: 24235 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 13:24:50,487 - root - INFO - lr: 2.0419e-05 gnorm: 1.07 [14:50:40< 9:39:23] +[titan] 2025-10-05 13:25:01,334 - root - INFO - step: 24240 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:25:01,335 - root - INFO - lr: 2.0411e-05 gnorm: 1.02 [14:50:50< 9:39:11] +[titan] 2025-10-05 13:25:12,172 - root - INFO - step: 24245 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 13:25:12,172 - root - INFO - lr: 2.0402e-05 gnorm: 1.07 [14:51:01< 9:39:00] +[titan] 2025-10-05 13:25:20,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:25:23,024 - root - INFO - step: 24250 loss: 2.1386 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:25:23,025 - root - INFO - lr: 2.0394e-05 gnorm: 1.10 [14:51:12< 9:38:49] +[titan] 2025-10-05 13:25:33,889 - root - INFO - step: 24255 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:25:33,889 - root - INFO - lr: 2.0385e-05 gnorm: 1.11 [14:51:23< 9:38:38] +[titan] 2025-10-05 13:25:44,730 - root - INFO - step: 24260 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 13:25:44,730 - root - INFO - lr: 2.0377e-05 gnorm: 1.07 [14:51:34< 9:38:27] +[titan] 2025-10-05 13:25:55,582 - root - INFO - step: 24265 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 13:25:55,582 - root - INFO - lr: 2.0368e-05 gnorm: 1.07 [14:51:45< 9:38:16] +[titan] 2025-10-05 13:26:06,446 - root - INFO - step: 24270 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:26:06,447 - root - INFO - lr: 2.0360e-05 gnorm: 1.08 [14:51:55< 9:38:05] +[titan] 2025-10-05 13:26:17,296 - root - INFO - step: 24275 loss: 2.0367 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8000 +[titan] 2025-10-05 13:26:17,296 - root - INFO - lr: 2.0352e-05 gnorm: 1.08 [14:52:06< 9:37:53] +[titan] 2025-10-05 13:26:28,151 - root - INFO - step: 24280 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 13:26:28,151 - root - INFO - lr: 2.0343e-05 gnorm: 1.09 [14:52:17< 9:37:42] +[titan] 2025-10-05 13:26:39,051 - root - INFO - step: 24285 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 13:26:39,051 - root - INFO - lr: 2.0335e-05 gnorm: 1.10 [14:52:28< 9:37:31] +[titan] 2025-10-05 13:26:49,902 - root - INFO - step: 24290 loss: 2.0746 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:26:49,902 - root - INFO - lr: 2.0326e-05 gnorm: 1.07 [14:52:39< 9:37:20] +[titan] 2025-10-05 13:27:00,733 - root - INFO - step: 24295 loss: 2.1061 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 13:27:00,734 - root - INFO - lr: 2.0318e-05 gnorm: 1.11 [14:52:50< 9:37:09] +[titan] 2025-10-05 13:27:09,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:27:11,587 - root - INFO - step: 24300 loss: 2.0702 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 13:27:11,587 - root - INFO - lr: 2.0309e-05 gnorm: 1.10 [14:53:01< 9:36:58] +[titan] 2025-10-05 13:27:22,433 - root - INFO - step: 24305 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 13:27:22,433 - root - INFO - lr: 2.0301e-05 gnorm: 1.05 [14:53:11< 9:36:47] +[titan] 2025-10-05 13:27:33,270 - root - INFO - step: 24310 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 13:27:33,270 - root - INFO - lr: 2.0292e-05 gnorm: 1.06 [14:53:22< 9:36:35] +[titan] 2025-10-05 13:27:44,105 - root - INFO - step: 24315 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 13:27:44,105 - root - INFO - lr: 2.0284e-05 gnorm: 1.07 [14:53:33< 9:36:24] +[titan] 2025-10-05 13:27:54,981 - root - INFO - step: 24320 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 13:27:54,981 - root - INFO - lr: 2.0275e-05 gnorm: 1.13 [14:53:44< 9:36:13] +[titan] 2025-10-05 13:28:05,837 - root - INFO - step: 24325 loss: 2.1113 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:28:05,838 - root - INFO - lr: 2.0267e-05 gnorm: 1.14 [14:53:55< 9:36:02] +[titan] 2025-10-05 13:28:16,705 - root - INFO - step: 24330 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 13:28:16,705 - root - INFO - lr: 2.0258e-05 gnorm: 1.05 [14:54:06< 9:35:51] +[titan] 2025-10-05 13:28:27,566 - root - INFO - step: 24335 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8683 +[titan] 2025-10-05 13:28:27,566 - root - INFO - lr: 2.0250e-05 gnorm: 1.15 [14:54:17< 9:35:40] +[titan] 2025-10-05 13:28:38,418 - root - INFO - step: 24340 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:28:38,418 - root - INFO - lr: 2.0242e-05 gnorm: 1.08 [14:54:27< 9:35:29] +[titan] 2025-10-05 13:28:49,296 - root - INFO - step: 24345 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 13:28:49,296 - root - INFO - lr: 2.0233e-05 gnorm: 1.14 [14:54:38< 9:35:18] +[titan] 2025-10-05 13:28:58,013 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:29:00,192 - root - INFO - step: 24350 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:29:00,192 - root - INFO - lr: 2.0225e-05 gnorm: 1.18 [14:54:49< 9:35:06] +[titan] 2025-10-05 13:29:11,072 - root - INFO - step: 24355 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 13:29:11,072 - root - INFO - lr: 2.0216e-05 gnorm: 1.09 [14:55:00< 9:34:55] +[titan] 2025-10-05 13:29:21,925 - root - INFO - step: 24360 loss: 2.1089 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 13:29:21,925 - root - INFO - lr: 2.0208e-05 gnorm: 1.07 [14:55:11< 9:34:44] +[titan] 2025-10-05 13:29:32,780 - root - INFO - step: 24365 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:29:32,781 - root - INFO - lr: 2.0199e-05 gnorm: 1.10 [14:55:22< 9:34:33] +[titan] 2025-10-05 13:29:43,663 - root - INFO - step: 24370 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:29:43,663 - root - INFO - lr: 2.0191e-05 gnorm: 1.10 [14:55:33< 9:34:22] +[titan] 2025-10-05 13:29:54,539 - root - INFO - step: 24375 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:29:54,540 - root - INFO - lr: 2.0182e-05 gnorm: 1.12 [14:55:44< 9:34:11] +[titan] 2025-10-05 13:30:05,417 - root - INFO - step: 24380 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 13:30:05,417 - root - INFO - lr: 2.0174e-05 gnorm: 1.12 [14:55:54< 9:34:00] +[titan] 2025-10-05 13:30:16,350 - root - INFO - step: 24385 loss: 2.1282 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 13:30:16,350 - root - INFO - lr: 2.0166e-05 gnorm: 1.05 [14:56:05< 9:33:49] +[titan] 2025-10-05 13:30:27,217 - root - INFO - step: 24390 loss: 2.0751 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:30:27,217 - root - INFO - lr: 2.0157e-05 gnorm: 1.12 [14:56:16< 9:33:37] +[titan] 2025-10-05 13:30:38,065 - root - INFO - step: 24395 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:30:38,065 - root - INFO - lr: 2.0149e-05 gnorm: 1.08 [14:56:27< 9:33:26] +[titan] 2025-10-05 13:30:46,747 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:30:48,926 - root - INFO - step: 24400 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 13:30:48,926 - root - INFO - lr: 2.0140e-05 gnorm: 1.09 [14:56:38< 9:33:15] +[titan] 2025-10-05 13:30:59,781 - root - INFO - step: 24405 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8166 +[titan] 2025-10-05 13:30:59,781 - root - INFO - lr: 2.0132e-05 gnorm: 1.07 [14:56:49< 9:33:04] +[titan] 2025-10-05 13:31:10,656 - root - INFO - step: 24410 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 13:31:10,656 - root - INFO - lr: 2.0123e-05 gnorm: 1.11 [14:57:00< 9:32:53] +[titan] 2025-10-05 13:31:21,555 - root - INFO - step: 24415 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8268 +[titan] 2025-10-05 13:31:21,555 - root - INFO - lr: 2.0115e-05 gnorm: 1.09 [14:57:11< 9:32:42] +[titan] 2025-10-05 13:31:32,426 - root - INFO - step: 24420 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 13:31:32,426 - root - INFO - lr: 2.0107e-05 gnorm: 1.07 [14:57:21< 9:32:31] +[titan] 2025-10-05 13:31:43,323 - root - INFO - step: 24425 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 13:31:43,323 - root - INFO - lr: 2.0098e-05 gnorm: 1.31 [14:57:32< 9:32:20] +[titan] 2025-10-05 13:31:54,203 - root - INFO - step: 24430 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 13:31:54,204 - root - INFO - lr: 2.0090e-05 gnorm: 1.05 [14:57:43< 9:32:09] +[titan] 2025-10-05 13:32:05,075 - root - INFO - step: 24435 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 13:32:05,075 - root - INFO - lr: 2.0081e-05 gnorm: 1.07 [14:57:54< 9:31:57] +[titan] 2025-10-05 13:32:15,980 - root - INFO - step: 24440 loss: 2.1665 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 13:32:15,980 - root - INFO - lr: 2.0073e-05 gnorm: 1.09 [14:58:05< 9:31:46] +[titan] 2025-10-05 13:32:26,906 - root - INFO - step: 24445 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 13:32:26,906 - root - INFO - lr: 2.0064e-05 gnorm: 1.08 [14:58:16< 9:31:35] +[titan] 2025-10-05 13:32:35,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:32:37,766 - root - INFO - step: 24450 loss: 2.0220 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 13:32:37,766 - root - INFO - lr: 2.0056e-05 gnorm: 1.06 [14:58:27< 9:31:24] +[titan] 2025-10-05 13:32:48,638 - root - INFO - step: 24455 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 13:32:48,638 - root - INFO - lr: 2.0048e-05 gnorm: 1.07 [14:58:38< 9:31:13] +[titan] 2025-10-05 13:32:59,507 - root - INFO - step: 24460 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:32:59,507 - root - INFO - lr: 2.0039e-05 gnorm: 1.07 [14:58:49< 9:31:02] +[titan] 2025-10-05 13:33:10,393 - root - INFO - step: 24465 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:33:10,393 - root - INFO - lr: 2.0031e-05 gnorm: 1.05 [14:58:59< 9:30:51] +[titan] 2025-10-05 13:33:21,345 - root - INFO - step: 24470 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8103 +[titan] 2025-10-05 13:33:21,345 - root - INFO - lr: 2.0022e-05 gnorm: 1.06 [14:59:10< 9:30:40] +[titan] 2025-10-05 13:33:32,228 - root - INFO - step: 24475 loss: 2.0788 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:33:32,228 - root - INFO - lr: 2.0014e-05 gnorm: 1.09 [14:59:21< 9:30:29] +[titan] 2025-10-05 13:33:43,179 - root - INFO - step: 24480 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8709 +[titan] 2025-10-05 13:33:43,180 - root - INFO - lr: 2.0006e-05 gnorm: 1.10 [14:59:32< 9:30:17] +[titan] 2025-10-05 13:33:54,062 - root - INFO - step: 24485 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 13:33:54,062 - root - INFO - lr: 1.9997e-05 gnorm: 1.07 [14:59:43< 9:30:06] +[titan] 2025-10-05 13:34:04,940 - root - INFO - step: 24490 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 13:34:04,940 - root - INFO - lr: 1.9989e-05 gnorm: 1.06 [14:59:54< 9:29:55] +[titan] 2025-10-05 13:34:15,844 - root - INFO - step: 24495 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8069 +[titan] 2025-10-05 13:34:15,844 - root - INFO - lr: 1.9980e-05 gnorm: 1.09 [15:00:05< 9:29:44] +[titan] 2025-10-05 13:34:24,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:34:26,706 - root - INFO - step: 24500 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 13:34:26,706 - root - INFO - lr: 1.9972e-05 gnorm: 1.11 [15:00:16< 9:29:33] +[titan] 2025-10-05 13:34:37,585 - root - INFO - step: 24505 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 13:34:37,585 - root - INFO - lr: 1.9963e-05 gnorm: 1.08 [15:00:27< 9:29:22] +[titan] 2025-10-05 13:34:48,499 - root - INFO - step: 24510 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 13:34:48,500 - root - INFO - lr: 1.9955e-05 gnorm: 1.12 [15:00:38< 9:29:11] +[titan] 2025-10-05 13:34:59,379 - root - INFO - step: 24515 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 13:34:59,379 - root - INFO - lr: 1.9947e-05 gnorm: 1.09 [15:00:48< 9:29:00] +[titan] 2025-10-05 13:35:10,244 - root - INFO - step: 24520 loss: 2.0374 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8014 +[titan] 2025-10-05 13:35:10,245 - root - INFO - lr: 1.9938e-05 gnorm: 1.03 [15:00:59< 9:28:49] +[titan] 2025-10-05 13:35:21,112 - root - INFO - step: 24525 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 13:35:21,113 - root - INFO - lr: 1.9930e-05 gnorm: 1.06 [15:01:10< 9:28:37] +[titan] 2025-10-05 13:35:31,956 - root - INFO - step: 24530 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8681 +[titan] 2025-10-05 13:35:31,957 - root - INFO - lr: 1.9921e-05 gnorm: 1.08 [15:01:21< 9:28:26] +[titan] 2025-10-05 13:35:42,842 - root - INFO - step: 24535 loss: 2.0794 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8375 +[titan] 2025-10-05 13:35:42,842 - root - INFO - lr: 1.9913e-05 gnorm: 1.10 [15:01:32< 9:28:15] +[titan] 2025-10-05 13:35:53,706 - root - INFO - step: 24540 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 13:35:53,706 - root - INFO - lr: 1.9905e-05 gnorm: 1.11 [15:01:43< 9:28:04] +[titan] 2025-10-05 13:36:04,625 - root - INFO - step: 24545 loss: 2.1385 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:36:04,625 - root - INFO - lr: 1.9896e-05 gnorm: 1.07 [15:01:54< 9:27:53] +[titan] 2025-10-05 13:36:13,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:36:15,477 - root - INFO - step: 24550 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 13:36:15,477 - root - INFO - lr: 1.9888e-05 gnorm: 1.08 [15:02:04< 9:27:42] +[titan] 2025-10-05 13:36:26,344 - root - INFO - step: 24555 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 13:36:26,344 - root - INFO - lr: 1.9879e-05 gnorm: 1.07 [15:02:15< 9:27:31] +[titan] 2025-10-05 13:36:37,204 - root - INFO - step: 24560 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 13:36:37,204 - root - INFO - lr: 1.9871e-05 gnorm: 1.07 [15:02:26< 9:27:20] +[titan] 2025-10-05 13:36:48,082 - root - INFO - step: 24565 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 13:36:48,082 - root - INFO - lr: 1.9863e-05 gnorm: 1.05 [15:02:37< 9:27:09] +[titan] 2025-10-05 13:36:58,948 - root - INFO - step: 24570 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 13:36:58,948 - root - INFO - lr: 1.9854e-05 gnorm: 1.07 [15:02:48< 9:26:57] +[titan] 2025-10-05 13:37:09,939 - root - INFO - step: 24575 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 29,815 tflops: 413.64 mfu: 41.82% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 13:37:09,939 - root - INFO - lr: 1.9846e-05 gnorm: 1.08 [15:02:59< 9:26:46] +[titan] 2025-10-05 13:37:12,310 - root - INFO - Dumping profiler traces at step 24576 +[titan] 2025-10-05 13:37:12,350 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:37:21,087 - root - INFO - step: 24580 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 29,394 tflops: 407.79 mfu: 41.23% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:37:21,087 - root - INFO - lr: 1.9837e-05 gnorm: 1.10 [15:03:10< 9:26:35] +[titan] 2025-10-05 13:37:31,945 - root - INFO - step: 24585 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:37:31,945 - root - INFO - lr: 1.9829e-05 gnorm: 1.04 [15:03:21< 9:26:24] +[titan] 2025-10-05 13:37:42,812 - root - INFO - step: 24590 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8024 +[titan] 2025-10-05 13:37:42,812 - root - INFO - lr: 1.9821e-05 gnorm: 1.05 [15:03:32< 9:26:13] +[titan] 2025-10-05 13:37:53,676 - root - INFO - step: 24595 loss: 2.0523 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 13:37:53,677 - root - INFO - lr: 1.9812e-05 gnorm: 1.07 [15:03:43< 9:26:02] +[titan] 2025-10-05 13:38:02,369 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:38:04,554 - root - INFO - step: 24600 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8521 +[titan] 2025-10-05 13:38:04,554 - root - INFO - lr: 1.9804e-05 gnorm: 1.09 [15:03:54< 9:25:51] +[titan] 2025-10-05 13:38:15,471 - root - INFO - step: 24605 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 13:38:15,471 - root - INFO - lr: 1.9796e-05 gnorm: 1.07 [15:04:04< 9:25:40] +[titan] 2025-10-05 13:38:26,377 - root - INFO - step: 24610 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8870 +[titan] 2025-10-05 13:38:26,377 - root - INFO - lr: 1.9787e-05 gnorm: 1.12 [15:04:15< 9:25:29] +[titan] 2025-10-05 13:38:37,243 - root - INFO - step: 24615 loss: 2.0786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8367 +[titan] 2025-10-05 13:38:37,243 - root - INFO - lr: 1.9779e-05 gnorm: 1.09 [15:04:26< 9:25:18] +[titan] 2025-10-05 13:38:48,119 - root - INFO - step: 24620 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 13:38:48,119 - root - INFO - lr: 1.9770e-05 gnorm: 1.07 [15:04:37< 9:25:06] +[titan] 2025-10-05 13:38:58,977 - root - INFO - step: 24625 loss: 2.0721 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8321 +[titan] 2025-10-05 13:38:58,977 - root - INFO - lr: 1.9762e-05 gnorm: 1.11 [15:04:48< 9:24:55] +[titan] 2025-10-05 13:39:09,830 - root - INFO - step: 24630 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8930 +[titan] 2025-10-05 13:39:09,830 - root - INFO - lr: 1.9754e-05 gnorm: 1.13 [15:04:59< 9:24:44] +[titan] 2025-10-05 13:39:20,732 - root - INFO - step: 24635 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 13:39:20,733 - root - INFO - lr: 1.9745e-05 gnorm: 1.10 [15:05:10< 9:24:33] +[titan] 2025-10-05 13:39:31,629 - root - INFO - step: 24640 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 13:39:31,629 - root - INFO - lr: 1.9737e-05 gnorm: 1.08 [15:05:21< 9:24:22] +[titan] 2025-10-05 13:39:42,484 - root - INFO - step: 24645 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 13:39:42,484 - root - INFO - lr: 1.9728e-05 gnorm: 1.05 [15:05:31< 9:24:11] +[titan] 2025-10-05 13:39:51,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:39:53,346 - root - INFO - step: 24650 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 13:39:53,346 - root - INFO - lr: 1.9720e-05 gnorm: 1.06 [15:05:42< 9:24:00] +[titan] 2025-10-05 13:40:04,203 - root - INFO - step: 24655 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 13:40:04,203 - root - INFO - lr: 1.9712e-05 gnorm: 1.12 [15:05:53< 9:23:49] +[titan] 2025-10-05 13:40:15,073 - root - INFO - step: 24660 loss: 2.0882 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 13:40:15,073 - root - INFO - lr: 1.9703e-05 gnorm: 1.10 [15:06:04< 9:23:38] +[titan] 2025-10-05 13:40:25,992 - root - INFO - step: 24665 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:40:25,992 - root - INFO - lr: 1.9695e-05 gnorm: 1.06 [15:06:15< 9:23:26] +[titan] 2025-10-05 13:40:36,894 - root - INFO - step: 24670 loss: 2.0856 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 13:40:36,894 - root - INFO - lr: 1.9687e-05 gnorm: 1.12 [15:06:26< 9:23:15] +[titan] 2025-10-05 13:40:47,766 - root - INFO - step: 24675 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 13:40:47,766 - root - INFO - lr: 1.9678e-05 gnorm: 1.09 [15:06:37< 9:23:04] +[titan] 2025-10-05 13:40:58,618 - root - INFO - step: 24680 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8134 +[titan] 2025-10-05 13:40:58,618 - root - INFO - lr: 1.9670e-05 gnorm: 1.07 [15:06:48< 9:22:53] +[titan] 2025-10-05 13:41:09,490 - root - INFO - step: 24685 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 13:41:09,490 - root - INFO - lr: 1.9662e-05 gnorm: 1.13 [15:06:58< 9:22:42] +[titan] 2025-10-05 13:41:20,418 - root - INFO - step: 24690 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 13:41:20,418 - root - INFO - lr: 1.9653e-05 gnorm: 1.06 [15:07:09< 9:22:31] +[titan] 2025-10-05 13:41:31,285 - root - INFO - step: 24695 loss: 2.0651 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:41:31,285 - root - INFO - lr: 1.9645e-05 gnorm: 1.08 [15:07:20< 9:22:20] +[titan] 2025-10-05 13:41:39,977 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:41:42,156 - root - INFO - step: 24700 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 13:41:42,156 - root - INFO - lr: 1.9636e-05 gnorm: 1.11 [15:07:31< 9:22:09] +[titan] 2025-10-05 13:41:53,063 - root - INFO - step: 24705 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 13:41:53,063 - root - INFO - lr: 1.9628e-05 gnorm: 1.08 [15:07:42< 9:21:58] +[titan] 2025-10-05 13:42:03,922 - root - INFO - step: 24710 loss: 2.0804 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 13:42:03,922 - root - INFO - lr: 1.9620e-05 gnorm: 1.06 [15:07:53< 9:21:46] +[titan] 2025-10-05 13:42:14,790 - root - INFO - step: 24715 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8274 +[titan] 2025-10-05 13:42:14,790 - root - INFO - lr: 1.9611e-05 gnorm: 1.09 [15:08:04< 9:21:35] +[titan] 2025-10-05 13:42:25,702 - root - INFO - step: 24720 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 13:42:25,702 - root - INFO - lr: 1.9603e-05 gnorm: 1.11 [15:08:15< 9:21:24] +[titan] 2025-10-05 13:42:36,573 - root - INFO - step: 24725 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 13:42:36,573 - root - INFO - lr: 1.9595e-05 gnorm: 1.08 [15:08:26< 9:21:13] +[titan] 2025-10-05 13:42:47,423 - root - INFO - step: 24730 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 13:42:47,424 - root - INFO - lr: 1.9586e-05 gnorm: 1.12 [15:08:36< 9:21:02] +[titan] 2025-10-05 13:42:58,321 - root - INFO - step: 24735 loss: 2.1290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 13:42:58,321 - root - INFO - lr: 1.9578e-05 gnorm: 1.08 [15:08:47< 9:20:51] +[titan] 2025-10-05 13:43:09,170 - root - INFO - step: 24740 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:43:09,171 - root - INFO - lr: 1.9570e-05 gnorm: 1.12 [15:08:58< 9:20:40] +[titan] 2025-10-05 13:43:20,002 - root - INFO - step: 24745 loss: 2.0612 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8216 +[titan] 2025-10-05 13:43:20,002 - root - INFO - lr: 1.9561e-05 gnorm: 1.11 [15:09:09< 9:20:29] +[titan] 2025-10-05 13:43:28,699 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:43:30,874 - root - INFO - step: 24750 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 13:43:30,874 - root - INFO - lr: 1.9553e-05 gnorm: 1.08 [15:09:20< 9:20:18] +[titan] 2025-10-05 13:43:41,719 - root - INFO - step: 24755 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 13:43:41,719 - root - INFO - lr: 1.9545e-05 gnorm: 1.11 [15:09:31< 9:20:06] +[titan] 2025-10-05 13:43:52,574 - root - INFO - step: 24760 loss: 2.0568 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 13:43:52,574 - root - INFO - lr: 1.9536e-05 gnorm: 1.07 [15:09:42< 9:19:55] +[titan] 2025-10-05 13:44:03,465 - root - INFO - step: 24765 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 13:44:03,465 - root - INFO - lr: 1.9528e-05 gnorm: 1.07 [15:09:52< 9:19:44] +[titan] 2025-10-05 13:44:14,316 - root - INFO - step: 24770 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 13:44:14,316 - root - INFO - lr: 1.9519e-05 gnorm: 1.05 [15:10:03< 9:19:33] +[titan] 2025-10-05 13:44:25,153 - root - INFO - step: 24775 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8456 +[titan] 2025-10-05 13:44:25,154 - root - INFO - lr: 1.9511e-05 gnorm: 1.07 [15:10:14< 9:19:22] +[titan] 2025-10-05 13:44:36,002 - root - INFO - step: 24780 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 13:44:36,003 - root - INFO - lr: 1.9503e-05 gnorm: 1.08 [15:10:25< 9:19:11] +[titan] 2025-10-05 13:44:46,858 - root - INFO - step: 24785 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 13:44:46,858 - root - INFO - lr: 1.9494e-05 gnorm: 1.07 [15:10:36< 9:19:00] +[titan] 2025-10-05 13:44:57,702 - root - INFO - step: 24790 loss: 2.0838 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 13:44:57,702 - root - INFO - lr: 1.9486e-05 gnorm: 1.08 [15:10:47< 9:18:48] +[titan] 2025-10-05 13:45:08,535 - root - INFO - step: 24795 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 13:45:08,535 - root - INFO - lr: 1.9478e-05 gnorm: 1.06 [15:10:58< 9:18:37] +[titan] 2025-10-05 13:45:17,248 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:45:19,421 - root - INFO - step: 24800 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8718 +[titan] 2025-10-05 13:45:19,421 - root - INFO - lr: 1.9469e-05 gnorm: 1.08 [15:11:08< 9:18:26] +[titan] 2025-10-05 13:45:30,265 - root - INFO - step: 24805 loss: 2.0238 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 13:45:30,265 - root - INFO - lr: 1.9461e-05 gnorm: 1.07 [15:11:19< 9:18:15] +[titan] 2025-10-05 13:45:41,104 - root - INFO - step: 24810 loss: 2.0540 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8154 +[titan] 2025-10-05 13:45:41,104 - root - INFO - lr: 1.9453e-05 gnorm: 1.07 [15:11:30< 9:18:04] +[titan] 2025-10-05 13:45:51,953 - root - INFO - step: 24815 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8778 +[titan] 2025-10-05 13:45:51,953 - root - INFO - lr: 1.9444e-05 gnorm: 1.11 [15:11:41< 9:17:53] +[titan] 2025-10-05 13:46:02,816 - root - INFO - step: 24820 loss: 2.1004 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 13:46:02,816 - root - INFO - lr: 1.9436e-05 gnorm: 1.07 [15:11:52< 9:17:42] +[titan] 2025-10-05 13:46:13,676 - root - INFO - step: 24825 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 13:46:13,676 - root - INFO - lr: 1.9428e-05 gnorm: 1.10 [15:12:03< 9:17:31] +[titan] 2025-10-05 13:46:24,572 - root - INFO - step: 24830 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 13:46:24,572 - root - INFO - lr: 1.9419e-05 gnorm: 1.10 [15:12:14< 9:17:20] +[titan] 2025-10-05 13:46:35,432 - root - INFO - step: 24835 loss: 2.1026 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:46:35,432 - root - INFO - lr: 1.9411e-05 gnorm: 1.07 [15:12:24< 9:17:08] +[titan] 2025-10-05 13:46:46,286 - root - INFO - step: 24840 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:46:46,287 - root - INFO - lr: 1.9403e-05 gnorm: 1.10 [15:12:35< 9:16:57] +[titan] 2025-10-05 13:46:57,123 - root - INFO - step: 24845 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8212 +[titan] 2025-10-05 13:46:57,123 - root - INFO - lr: 1.9394e-05 gnorm: 1.07 [15:12:46< 9:16:46] +[titan] 2025-10-05 13:47:05,788 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:47:07,960 - root - INFO - step: 24850 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 13:47:07,960 - root - INFO - lr: 1.9386e-05 gnorm: 1.13 [15:12:57< 9:16:35] +[titan] 2025-10-05 13:47:18,794 - root - INFO - step: 24855 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 13:47:18,794 - root - INFO - lr: 1.9378e-05 gnorm: 1.07 [15:13:08< 9:16:24] +[titan] 2025-10-05 13:47:29,672 - root - INFO - step: 24860 loss: 2.1559 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 13:47:29,672 - root - INFO - lr: 1.9369e-05 gnorm: 1.08 [15:13:19< 9:16:13] +[titan] 2025-10-05 13:47:40,555 - root - INFO - step: 24865 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 13:47:40,556 - root - INFO - lr: 1.9361e-05 gnorm: 1.14 [15:13:30< 9:16:02] +[titan] 2025-10-05 13:47:51,413 - root - INFO - step: 24870 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 13:47:51,413 - root - INFO - lr: 1.9353e-05 gnorm: 1.07 [15:13:40< 9:15:51] +[titan] 2025-10-05 13:48:02,253 - root - INFO - step: 24875 loss: 2.0532 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 13:48:02,253 - root - INFO - lr: 1.9345e-05 gnorm: 1.10 [15:13:51< 9:15:39] +[titan] 2025-10-05 13:48:13,099 - root - INFO - step: 24880 loss: 2.0338 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 13:48:13,099 - root - INFO - lr: 1.9336e-05 gnorm: 1.08 [15:14:02< 9:15:28] +[titan] 2025-10-05 13:48:23,933 - root - INFO - step: 24885 loss: 2.0834 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 13:48:23,933 - root - INFO - lr: 1.9328e-05 gnorm: 1.08 [15:14:13< 9:15:17] +[titan] 2025-10-05 13:48:34,822 - root - INFO - step: 24890 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7910 +[titan] 2025-10-05 13:48:34,822 - root - INFO - lr: 1.9320e-05 gnorm: 1.05 [15:14:24< 9:15:06] +[titan] 2025-10-05 13:48:45,673 - root - INFO - step: 24895 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:48:45,673 - root - INFO - lr: 1.9311e-05 gnorm: 1.13 [15:14:35< 9:14:55] +[titan] 2025-10-05 13:48:54,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:48:56,513 - root - INFO - step: 24900 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 13:48:56,513 - root - INFO - lr: 1.9303e-05 gnorm: 1.08 [15:14:45< 9:14:44] +[titan] 2025-10-05 13:49:07,354 - root - INFO - step: 24905 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 13:49:07,354 - root - INFO - lr: 1.9295e-05 gnorm: 1.09 [15:14:56< 9:14:33] +[titan] 2025-10-05 13:49:18,206 - root - INFO - step: 24910 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 13:49:18,206 - root - INFO - lr: 1.9286e-05 gnorm: 1.06 [15:15:07< 9:14:22] +[titan] 2025-10-05 13:49:29,079 - root - INFO - step: 24915 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 13:49:29,080 - root - INFO - lr: 1.9278e-05 gnorm: 1.07 [15:15:18< 9:14:10] +[titan] 2025-10-05 13:49:39,928 - root - INFO - step: 24920 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8887 +[titan] 2025-10-05 13:49:39,928 - root - INFO - lr: 1.9270e-05 gnorm: 1.10 [15:15:29< 9:13:59] +[titan] 2025-10-05 13:49:50,803 - root - INFO - step: 24925 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7714 +[titan] 2025-10-05 13:49:50,804 - root - INFO - lr: 1.9261e-05 gnorm: 1.05 [15:15:40< 9:13:48] +[titan] 2025-10-05 13:50:01,632 - root - INFO - step: 24930 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7977 +[titan] 2025-10-05 13:50:01,632 - root - INFO - lr: 1.9253e-05 gnorm: 1.12 [15:15:51< 9:13:37] +[titan] 2025-10-05 13:50:12,484 - root - INFO - step: 24935 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 13:50:12,484 - root - INFO - lr: 1.9245e-05 gnorm: 1.08 [15:16:01< 9:13:26] +[titan] 2025-10-05 13:50:23,352 - root - INFO - step: 24940 loss: 2.0643 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 13:50:23,352 - root - INFO - lr: 1.9236e-05 gnorm: 1.08 [15:16:12< 9:13:15] +[titan] 2025-10-05 13:50:34,241 - root - INFO - step: 24945 loss: 2.0637 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:50:34,241 - root - INFO - lr: 1.9228e-05 gnorm: 1.09 [15:16:23< 9:13:04] +[titan] 2025-10-05 13:50:42,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:50:45,094 - root - INFO - step: 24950 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:50:45,094 - root - INFO - lr: 1.9220e-05 gnorm: 1.09 [15:16:34< 9:12:53] +[titan] 2025-10-05 13:50:55,957 - root - INFO - step: 24955 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 13:50:55,957 - root - INFO - lr: 1.9212e-05 gnorm: 1.10 [15:16:45< 9:12:41] +[titan] 2025-10-05 13:51:06,846 - root - INFO - step: 24960 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:51:06,846 - root - INFO - lr: 1.9203e-05 gnorm: 1.18 [15:16:56< 9:12:30] +[titan] 2025-10-05 13:51:17,738 - root - INFO - step: 24965 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:51:17,738 - root - INFO - lr: 1.9195e-05 gnorm: 1.09 [15:17:07< 9:12:19] +[titan] 2025-10-05 13:51:28,604 - root - INFO - step: 24970 loss: 2.1023 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:51:28,604 - root - INFO - lr: 1.9187e-05 gnorm: 1.11 [15:17:18< 9:12:08] +[titan] 2025-10-05 13:51:39,453 - root - INFO - step: 24975 loss: 2.0306 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 13:51:39,453 - root - INFO - lr: 1.9178e-05 gnorm: 1.12 [15:17:28< 9:11:57] +[titan] 2025-10-05 13:51:50,305 - root - INFO - step: 24980 loss: 2.0966 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8535 +[titan] 2025-10-05 13:51:50,305 - root - INFO - lr: 1.9170e-05 gnorm: 1.08 [15:17:39< 9:11:46] +[titan] 2025-10-05 13:52:01,147 - root - INFO - step: 24985 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7712 +[titan] 2025-10-05 13:52:01,147 - root - INFO - lr: 1.9162e-05 gnorm: 1.09 [15:17:50< 9:11:35] +[titan] 2025-10-05 13:52:12,002 - root - INFO - step: 24990 loss: 2.0567 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 13:52:12,002 - root - INFO - lr: 1.9154e-05 gnorm: 1.09 [15:18:01< 9:11:24] +[titan] 2025-10-05 13:52:22,852 - root - INFO - step: 24995 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:52:22,852 - root - INFO - lr: 1.9145e-05 gnorm: 1.08 [15:18:12< 9:11:13] +[titan] 2025-10-05 13:52:31,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:52:33,740 - root - INFO - step: 25000 loss: 2.0319 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7959 +[titan] 2025-10-05 13:52:33,740 - root - INFO - lr: 1.9137e-05 gnorm: 1.07 [15:18:23< 9:11:01] +[titan] 2025-10-05 13:52:33,740 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 13:52:51,416 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 13:52:51,417 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.68 seconds. +[titan] 2025-10-05 13:54:51,998 - root - INFO - step: 25005 loss: 2.0275 memory: 118.84GiB(85.28%) tps: 2,370 tflops: 32.88 mfu: 3.32% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7926 +[titan] 2025-10-05 13:54:51,999 - root - INFO - lr: 1.9129e-05 gnorm: 1.11 [15:20:41< 9:12:07] +[titan] 2025-10-05 13:55:02,804 - root - INFO - step: 25010 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8047 +[titan] 2025-10-05 13:55:02,804 - root - INFO - lr: 1.9120e-05 gnorm: 1.11 [15:20:52< 9:11:56] +[titan] 2025-10-05 13:55:13,603 - root - INFO - step: 25015 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:55:13,603 - root - INFO - lr: 1.9112e-05 gnorm: 1.08 [15:21:03< 9:11:44] +[titan] 2025-10-05 13:55:24,411 - root - INFO - step: 25020 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 13:55:24,411 - root - INFO - lr: 1.9104e-05 gnorm: 1.12 [15:21:13< 9:11:33] +[titan] 2025-10-05 13:55:35,262 - root - INFO - step: 25025 loss: 2.0508 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:55:35,263 - root - INFO - lr: 1.9096e-05 gnorm: 1.09 [15:21:24< 9:11:22] +[titan] 2025-10-05 13:55:46,139 - root - INFO - step: 25030 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 13:55:46,139 - root - INFO - lr: 1.9087e-05 gnorm: 1.12 [15:21:35< 9:11:11] +[titan] 2025-10-05 13:55:56,971 - root - INFO - step: 25035 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 13:55:56,971 - root - INFO - lr: 1.9079e-05 gnorm: 1.06 [15:21:46< 9:11:00] +[titan] 2025-10-05 13:56:07,833 - root - INFO - step: 25040 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7890 +[titan] 2025-10-05 13:56:07,833 - root - INFO - lr: 1.9071e-05 gnorm: 1.09 [15:21:57< 9:10:48] +[titan] 2025-10-05 13:56:18,697 - root - INFO - step: 25045 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 13:56:18,697 - root - INFO - lr: 1.9062e-05 gnorm: 1.07 [15:22:08< 9:10:37] +[titan] 2025-10-05 13:56:27,381 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:56:29,566 - root - INFO - step: 25050 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 13:56:29,566 - root - INFO - lr: 1.9054e-05 gnorm: 1.09 [15:22:19< 9:10:26] +[titan] 2025-10-05 13:56:40,477 - root - INFO - step: 25055 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 13:56:40,478 - root - INFO - lr: 1.9046e-05 gnorm: 1.10 [15:22:29< 9:10:15] +[titan] 2025-10-05 13:56:51,355 - root - INFO - step: 25060 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 13:56:51,355 - root - INFO - lr: 1.9038e-05 gnorm: 1.09 [15:22:40< 9:10:04] +[titan] 2025-10-05 13:57:02,218 - root - INFO - step: 25065 loss: 2.1039 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:57:02,218 - root - INFO - lr: 1.9029e-05 gnorm: 1.13 [15:22:51< 9:09:53] +[titan] 2025-10-05 13:57:13,100 - root - INFO - step: 25070 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 13:57:13,100 - root - INFO - lr: 1.9021e-05 gnorm: 1.08 [15:23:02< 9:09:42] +[titan] 2025-10-05 13:57:23,991 - root - INFO - step: 25075 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 13:57:23,991 - root - INFO - lr: 1.9013e-05 gnorm: 1.07 [15:23:13< 9:09:30] +[titan] 2025-10-05 13:57:34,864 - root - INFO - step: 25080 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 13:57:34,864 - root - INFO - lr: 1.9005e-05 gnorm: 1.05 [15:23:24< 9:09:19] +[titan] 2025-10-05 13:57:45,884 - root - INFO - step: 25085 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 29,737 tflops: 412.55 mfu: 41.71% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8452 +[titan] 2025-10-05 13:57:45,884 - root - INFO - lr: 1.8996e-05 gnorm: 1.11 [15:23:35< 9:09:08] +[titan] 2025-10-05 13:57:52,579 - root - INFO - Dumping profiler traces at step 25088 +[titan] 2025-10-05 13:57:52,617 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:57:56,994 - root - INFO - step: 25090 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.20 mfu: 41.38% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 13:57:56,994 - root - INFO - lr: 1.8988e-05 gnorm: 1.10 [15:23:46< 9:08:57] +[titan] 2025-10-05 13:58:07,853 - root - INFO - step: 25095 loss: 2.0873 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 13:58:07,853 - root - INFO - lr: 1.8980e-05 gnorm: 1.09 [15:23:57< 9:08:46] +[titan] 2025-10-05 13:58:16,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:58:18,699 - root - INFO - step: 25100 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7915 +[titan] 2025-10-05 13:58:18,700 - root - INFO - lr: 1.8972e-05 gnorm: 1.07 [15:24:08< 9:08:35] +[titan] 2025-10-05 13:58:29,551 - root - INFO - step: 25105 loss: 2.0232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 13:58:29,551 - root - INFO - lr: 1.8963e-05 gnorm: 1.09 [15:24:19< 9:08:24] +[titan] 2025-10-05 13:58:40,400 - root - INFO - step: 25110 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 13:58:40,400 - root - INFO - lr: 1.8955e-05 gnorm: 1.11 [15:24:29< 9:08:13] +[titan] 2025-10-05 13:58:51,352 - root - INFO - step: 25115 loss: 2.0288 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 13:58:51,352 - root - INFO - lr: 1.8947e-05 gnorm: 1.09 [15:24:40< 9:08:02] +[titan] 2025-10-05 13:59:02,234 - root - INFO - step: 25120 loss: 2.0905 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8475 +[titan] 2025-10-05 13:59:02,234 - root - INFO - lr: 1.8939e-05 gnorm: 1.09 [15:24:51< 9:07:50] +[titan] 2025-10-05 13:59:13,120 - root - INFO - step: 25125 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8215 +[titan] 2025-10-05 13:59:13,120 - root - INFO - lr: 1.8930e-05 gnorm: 1.07 [15:25:02< 9:07:39] +[titan] 2025-10-05 13:59:23,995 - root - INFO - step: 25130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 13:59:23,996 - root - INFO - lr: 1.8922e-05 gnorm: 1.07 [15:25:13< 9:07:28] +[titan] 2025-10-05 13:59:34,878 - root - INFO - step: 25135 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8860 +[titan] 2025-10-05 13:59:34,879 - root - INFO - lr: 1.8914e-05 gnorm: 1.12 [15:25:24< 9:07:17] +[titan] 2025-10-05 13:59:45,774 - root - INFO - step: 25140 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 13:59:45,775 - root - INFO - lr: 1.8905e-05 gnorm: 1.07 [15:25:35< 9:07:06] +[titan] 2025-10-05 13:59:56,648 - root - INFO - step: 25145 loss: 2.0630 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:59:56,648 - root - INFO - lr: 1.8897e-05 gnorm: 1.08 [15:25:46< 9:06:55] +[titan] 2025-10-05 14:00:05,339 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:00:07,518 - root - INFO - step: 25150 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8682 +[titan] 2025-10-05 14:00:07,519 - root - INFO - lr: 1.8889e-05 gnorm: 1.15 [15:25:56< 9:06:44] +[titan] 2025-10-05 14:00:18,376 - root - INFO - step: 25155 loss: 2.0122 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 14:00:18,376 - root - INFO - lr: 1.8881e-05 gnorm: 1.04 [15:26:07< 9:06:32] +[titan] 2025-10-05 14:00:29,255 - root - INFO - step: 25160 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 14:00:29,255 - root - INFO - lr: 1.8873e-05 gnorm: 1.08 [15:26:18< 9:06:21] +[titan] 2025-10-05 14:00:40,131 - root - INFO - step: 25165 loss: 2.0645 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 14:00:40,131 - root - INFO - lr: 1.8864e-05 gnorm: 1.09 [15:26:29< 9:06:10] +[titan] 2025-10-05 14:00:51,071 - root - INFO - step: 25170 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8358 +[titan] 2025-10-05 14:00:51,071 - root - INFO - lr: 1.8856e-05 gnorm: 1.06 [15:26:40< 9:05:59] +[titan] 2025-10-05 14:01:01,932 - root - INFO - step: 25175 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 14:01:01,932 - root - INFO - lr: 1.8848e-05 gnorm: 1.09 [15:26:51< 9:05:48] +[titan] 2025-10-05 14:01:12,823 - root - INFO - step: 25180 loss: 2.0514 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8133 +[titan] 2025-10-05 14:01:12,824 - root - INFO - lr: 1.8840e-05 gnorm: 1.08 [15:27:02< 9:05:37] +[titan] 2025-10-05 14:01:23,713 - root - INFO - step: 25185 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8457 +[titan] 2025-10-05 14:01:23,713 - root - INFO - lr: 1.8831e-05 gnorm: 1.04 [15:27:13< 9:05:26] +[titan] 2025-10-05 14:01:34,565 - root - INFO - step: 25190 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8554 +[titan] 2025-10-05 14:01:34,565 - root - INFO - lr: 1.8823e-05 gnorm: 1.08 [15:27:24< 9:05:14] +[titan] 2025-10-05 14:01:45,489 - root - INFO - step: 25195 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 14:01:45,489 - root - INFO - lr: 1.8815e-05 gnorm: 1.10 [15:27:34< 9:05:03] +[titan] 2025-10-05 14:01:54,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:01:56,348 - root - INFO - step: 25200 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8821 +[titan] 2025-10-05 14:01:56,348 - root - INFO - lr: 1.8807e-05 gnorm: 1.13 [15:27:45< 9:04:52] +[titan] 2025-10-05 14:02:07,198 - root - INFO - step: 25205 loss: 2.0344 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:02:07,199 - root - INFO - lr: 1.8798e-05 gnorm: 1.06 [15:27:56< 9:04:41] +[titan] 2025-10-05 14:02:18,072 - root - INFO - step: 25210 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 14:02:18,073 - root - INFO - lr: 1.8790e-05 gnorm: 1.09 [15:28:07< 9:04:30] +[titan] 2025-10-05 14:02:28,950 - root - INFO - step: 25215 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:02:28,951 - root - INFO - lr: 1.8782e-05 gnorm: 1.11 [15:28:18< 9:04:19] +[titan] 2025-10-05 14:02:39,828 - root - INFO - step: 25220 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 14:02:39,828 - root - INFO - lr: 1.8774e-05 gnorm: 1.10 [15:28:29< 9:04:08] +[titan] 2025-10-05 14:02:50,798 - root - INFO - step: 25225 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 14:02:50,798 - root - INFO - lr: 1.8765e-05 gnorm: 1.10 [15:28:40< 9:03:56] +[titan] 2025-10-05 14:03:01,706 - root - INFO - step: 25230 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 14:03:01,706 - root - INFO - lr: 1.8757e-05 gnorm: 1.07 [15:28:51< 9:03:45] +[titan] 2025-10-05 14:03:12,597 - root - INFO - step: 25235 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 14:03:12,597 - root - INFO - lr: 1.8749e-05 gnorm: 1.08 [15:29:02< 9:03:34] +[titan] 2025-10-05 14:03:23,476 - root - INFO - step: 25240 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:03:23,477 - root - INFO - lr: 1.8741e-05 gnorm: 1.05 [15:29:12< 9:03:23] +[titan] 2025-10-05 14:03:34,394 - root - INFO - step: 25245 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 14:03:34,395 - root - INFO - lr: 1.8733e-05 gnorm: 1.06 [15:29:23< 9:03:12] +[titan] 2025-10-05 14:03:43,075 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:03:45,291 - root - INFO - step: 25250 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7908 +[titan] 2025-10-05 14:03:45,292 - root - INFO - lr: 1.8724e-05 gnorm: 1.08 [15:29:34< 9:03:01] +[titan] 2025-10-05 14:03:56,215 - root - INFO - step: 25255 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8068 +[titan] 2025-10-05 14:03:56,215 - root - INFO - lr: 1.8716e-05 gnorm: 1.07 [15:29:45< 9:02:50] +[titan] 2025-10-05 14:04:07,069 - root - INFO - step: 25260 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7296 +[titan] 2025-10-05 14:04:07,070 - root - INFO - lr: 1.8708e-05 gnorm: 1.09 [15:29:56< 9:02:39] +[titan] 2025-10-05 14:04:17,929 - root - INFO - step: 25265 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 14:04:17,929 - root - INFO - lr: 1.8700e-05 gnorm: 1.05 [15:30:07< 9:02:27] +[titan] 2025-10-05 14:04:28,778 - root - INFO - step: 25270 loss: 2.0659 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8256 +[titan] 2025-10-05 14:04:28,778 - root - INFO - lr: 1.8692e-05 gnorm: 1.05 [15:30:18< 9:02:16] +[titan] 2025-10-05 14:04:39,663 - root - INFO - step: 25275 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:04:39,663 - root - INFO - lr: 1.8683e-05 gnorm: 1.10 [15:30:29< 9:02:05] +[titan] 2025-10-05 14:04:50,619 - root - INFO - step: 25280 loss: 2.0423 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 14:04:50,620 - root - INFO - lr: 1.8675e-05 gnorm: 1.10 [15:30:40< 9:01:54] +[titan] 2025-10-05 14:05:01,490 - root - INFO - step: 25285 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 14:05:01,490 - root - INFO - lr: 1.8667e-05 gnorm: 1.07 [15:30:50< 9:01:43] +[titan] 2025-10-05 14:05:12,363 - root - INFO - step: 25290 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 14:05:12,363 - root - INFO - lr: 1.8659e-05 gnorm: 1.08 [15:31:01< 9:01:32] +[titan] 2025-10-05 14:05:23,239 - root - INFO - step: 25295 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 14:05:23,240 - root - INFO - lr: 1.8650e-05 gnorm: 1.12 [15:31:12< 9:01:21] +[titan] 2025-10-05 14:05:31,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:05:34,099 - root - INFO - step: 25300 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:05:34,099 - root - INFO - lr: 1.8642e-05 gnorm: 1.10 [15:31:23< 9:01:09] +[titan] 2025-10-05 14:05:44,978 - root - INFO - step: 25305 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 14:05:44,978 - root - INFO - lr: 1.8634e-05 gnorm: 1.07 [15:31:34< 9:00:58] +[titan] 2025-10-05 14:05:55,924 - root - INFO - step: 25310 loss: 2.0792 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 41.99% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8356 +[titan] 2025-10-05 14:05:55,924 - root - INFO - lr: 1.8626e-05 gnorm: 1.11 [15:31:45< 9:00:47] +[titan] 2025-10-05 14:06:06,777 - root - INFO - step: 25315 loss: 2.0737 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8330 +[titan] 2025-10-05 14:06:06,777 - root - INFO - lr: 1.8618e-05 gnorm: 1.08 [15:31:56< 9:00:36] +[titan] 2025-10-05 14:06:17,654 - root - INFO - step: 25320 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 14:06:17,654 - root - INFO - lr: 1.8609e-05 gnorm: 1.06 [15:32:07< 9:00:25] +[titan] 2025-10-05 14:06:28,537 - root - INFO - step: 25325 loss: 2.1056 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 14:06:28,537 - root - INFO - lr: 1.8601e-05 gnorm: 1.08 [15:32:17< 9:00:14] +[titan] 2025-10-05 14:06:39,411 - root - INFO - step: 25330 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:06:39,411 - root - INFO - lr: 1.8593e-05 gnorm: 1.11 [15:32:28< 9:00:03] +[titan] 2025-10-05 14:06:50,340 - root - INFO - step: 25335 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:06:50,341 - root - INFO - lr: 1.8585e-05 gnorm: 1.10 [15:32:39< 8:59:51] +[titan] 2025-10-05 14:07:01,212 - root - INFO - step: 25340 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 14:07:01,212 - root - INFO - lr: 1.8577e-05 gnorm: 1.08 [15:32:50< 8:59:40] +[titan] 2025-10-05 14:07:12,114 - root - INFO - step: 25345 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 14:07:12,114 - root - INFO - lr: 1.8568e-05 gnorm: 1.06 [15:33:01< 8:59:29] +[titan] 2025-10-05 14:07:20,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:07:22,994 - root - INFO - step: 25350 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 14:07:22,994 - root - INFO - lr: 1.8560e-05 gnorm: 1.06 [15:33:12< 8:59:18] +[titan] 2025-10-05 14:07:33,878 - root - INFO - step: 25355 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8658 +[titan] 2025-10-05 14:07:33,878 - root - INFO - lr: 1.8552e-05 gnorm: 1.11 [15:33:23< 8:59:07] +[titan] 2025-10-05 14:07:44,774 - root - INFO - step: 25360 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 14:07:44,774 - root - INFO - lr: 1.8544e-05 gnorm: 1.08 [15:33:34< 8:58:56] +[titan] 2025-10-05 14:07:55,691 - root - INFO - step: 25365 loss: 2.0709 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:07:55,691 - root - INFO - lr: 1.8536e-05 gnorm: 1.08 [15:33:45< 8:58:45] +[titan] 2025-10-05 14:08:06,574 - root - INFO - step: 25370 loss: 2.0036 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 14:08:06,574 - root - INFO - lr: 1.8528e-05 gnorm: 1.08 [15:33:56< 8:58:34] +[titan] 2025-10-05 14:08:17,490 - root - INFO - step: 25375 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 14:08:17,490 - root - INFO - lr: 1.8519e-05 gnorm: 1.13 [15:34:06< 8:58:22] +[titan] 2025-10-05 14:08:28,356 - root - INFO - step: 25380 loss: 2.1491 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 14:08:28,357 - root - INFO - lr: 1.8511e-05 gnorm: 1.09 [15:34:17< 8:58:11] +[titan] 2025-10-05 14:08:39,210 - root - INFO - step: 25385 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:08:39,210 - root - INFO - lr: 1.8503e-05 gnorm: 1.09 [15:34:28< 8:58:00] +[titan] 2025-10-05 14:08:50,100 - root - INFO - step: 25390 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 14:08:50,100 - root - INFO - lr: 1.8495e-05 gnorm: 1.11 [15:34:39< 8:57:49] +[titan] 2025-10-05 14:09:00,958 - root - INFO - step: 25395 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 14:09:00,959 - root - INFO - lr: 1.8487e-05 gnorm: 1.09 [15:34:50< 8:57:38] +[titan] 2025-10-05 14:09:09,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:09:11,824 - root - INFO - step: 25400 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:09:11,824 - root - INFO - lr: 1.8478e-05 gnorm: 1.09 [15:35:01< 8:57:27] +[titan] 2025-10-05 14:09:22,722 - root - INFO - step: 25405 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 14:09:22,722 - root - INFO - lr: 1.8470e-05 gnorm: 1.06 [15:35:12< 8:57:16] +[titan] 2025-10-05 14:09:33,582 - root - INFO - step: 25410 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 14:09:33,583 - root - INFO - lr: 1.8462e-05 gnorm: 1.07 [15:35:23< 8:57:04] +[titan] 2025-10-05 14:09:44,445 - root - INFO - step: 25415 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 14:09:44,445 - root - INFO - lr: 1.8454e-05 gnorm: 1.07 [15:35:33< 8:56:53] +[titan] 2025-10-05 14:09:55,342 - root - INFO - step: 25420 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8650 +[titan] 2025-10-05 14:09:55,342 - root - INFO - lr: 1.8446e-05 gnorm: 1.08 [15:35:44< 8:56:42] +[titan] 2025-10-05 14:10:06,229 - root - INFO - step: 25425 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 14:10:06,229 - root - INFO - lr: 1.8438e-05 gnorm: 1.09 [15:35:55< 8:56:31] +[titan] 2025-10-05 14:10:17,110 - root - INFO - step: 25430 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 14:10:17,110 - root - INFO - lr: 1.8429e-05 gnorm: 1.09 [15:36:06< 8:56:20] +[titan] 2025-10-05 14:10:28,014 - root - INFO - step: 25435 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 14:10:28,014 - root - INFO - lr: 1.8421e-05 gnorm: 1.05 [15:36:17< 8:56:09] +[titan] 2025-10-05 14:10:38,939 - root - INFO - step: 25440 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8011 +[titan] 2025-10-05 14:10:38,939 - root - INFO - lr: 1.8413e-05 gnorm: 1.10 [15:36:28< 8:55:58] +[titan] 2025-10-05 14:10:49,824 - root - INFO - step: 25445 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 14:10:49,825 - root - INFO - lr: 1.8405e-05 gnorm: 1.08 [15:36:39< 8:55:46] +[titan] 2025-10-05 14:10:58,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:11:00,730 - root - INFO - step: 25450 loss: 2.0470 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 14:11:00,730 - root - INFO - lr: 1.8397e-05 gnorm: 1.07 [15:36:50< 8:55:35] +[titan] 2025-10-05 14:11:11,607 - root - INFO - step: 25455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 14:11:11,607 - root - INFO - lr: 1.8389e-05 gnorm: 1.07 [15:37:01< 8:55:24] +[titan] 2025-10-05 14:11:22,482 - root - INFO - step: 25460 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 14:11:22,482 - root - INFO - lr: 1.8380e-05 gnorm: 1.10 [15:37:11< 8:55:13] +[titan] 2025-10-05 14:11:33,348 - root - INFO - step: 25465 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:11:33,348 - root - INFO - lr: 1.8372e-05 gnorm: 1.09 [15:37:22< 8:55:02] +[titan] 2025-10-05 14:11:44,248 - root - INFO - step: 25470 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 14:11:44,248 - root - INFO - lr: 1.8364e-05 gnorm: 1.09 [15:37:33< 8:54:51] +[titan] 2025-10-05 14:11:55,157 - root - INFO - step: 25475 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 14:11:55,157 - root - INFO - lr: 1.8356e-05 gnorm: 1.09 [15:37:44< 8:54:40] +[titan] 2025-10-05 14:12:06,026 - root - INFO - step: 25480 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 14:12:06,026 - root - INFO - lr: 1.8348e-05 gnorm: 1.07 [15:37:55< 8:54:29] +[titan] 2025-10-05 14:12:16,908 - root - INFO - step: 25485 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:12:16,909 - root - INFO - lr: 1.8340e-05 gnorm: 1.10 [15:38:06< 8:54:17] +[titan] 2025-10-05 14:12:27,776 - root - INFO - step: 25490 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7677 +[titan] 2025-10-05 14:12:27,776 - root - INFO - lr: 1.8332e-05 gnorm: 1.09 [15:38:17< 8:54:06] +[titan] 2025-10-05 14:12:38,651 - root - INFO - step: 25495 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 14:12:38,651 - root - INFO - lr: 1.8323e-05 gnorm: 1.08 [15:38:28< 8:53:55] +[titan] 2025-10-05 14:12:47,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:12:49,537 - root - INFO - step: 25500 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7501 +[titan] 2025-10-05 14:12:49,537 - root - INFO - lr: 1.8315e-05 gnorm: 1.13 [15:38:38< 8:53:44] +[titan] 2025-10-05 14:13:00,470 - root - INFO - step: 25505 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:13:00,470 - root - INFO - lr: 1.8307e-05 gnorm: 1.08 [15:38:49< 8:53:33] +[titan] 2025-10-05 14:13:11,338 - root - INFO - step: 25510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 14:13:11,338 - root - INFO - lr: 1.8299e-05 gnorm: 1.11 [15:39:00< 8:53:22] +[titan] 2025-10-05 14:13:22,196 - root - INFO - step: 25515 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 14:13:22,197 - root - INFO - lr: 1.8291e-05 gnorm: 1.17 [15:39:11< 8:53:11] +[titan] 2025-10-05 14:13:33,046 - root - INFO - step: 25520 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 14:13:33,047 - root - INFO - lr: 1.8283e-05 gnorm: 1.07 [15:39:22< 8:52:59] +[titan] 2025-10-05 14:13:43,917 - root - INFO - step: 25525 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 14:13:43,917 - root - INFO - lr: 1.8275e-05 gnorm: 1.12 [15:39:33< 8:52:48] +[titan] 2025-10-05 14:13:54,888 - root - INFO - step: 25530 loss: 2.1016 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 14:13:54,888 - root - INFO - lr: 1.8266e-05 gnorm: 1.14 [15:39:44< 8:52:37] +[titan] 2025-10-05 14:14:05,796 - root - INFO - step: 25535 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:14:05,796 - root - INFO - lr: 1.8258e-05 gnorm: 1.11 [15:39:55< 8:52:26] +[titan] 2025-10-05 14:14:16,658 - root - INFO - step: 25540 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 14:14:16,658 - root - INFO - lr: 1.8250e-05 gnorm: 1.12 [15:40:06< 8:52:15] +[titan] 2025-10-05 14:14:27,520 - root - INFO - step: 25545 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 14:14:27,521 - root - INFO - lr: 1.8242e-05 gnorm: 1.08 [15:40:16< 8:52:04] +[titan] 2025-10-05 14:14:36,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:14:38,398 - root - INFO - step: 25550 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 14:14:38,398 - root - INFO - lr: 1.8234e-05 gnorm: 1.07 [15:40:27< 8:51:53] +[titan] 2025-10-05 14:14:49,271 - root - INFO - step: 25555 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:14:49,271 - root - INFO - lr: 1.8226e-05 gnorm: 1.10 [15:40:38< 8:51:42] +[titan] 2025-10-05 14:15:00,189 - root - INFO - step: 25560 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:15:00,189 - root - INFO - lr: 1.8218e-05 gnorm: 1.05 [15:40:49< 8:51:30] +[titan] 2025-10-05 14:15:11,120 - root - INFO - step: 25565 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 14:15:11,121 - root - INFO - lr: 1.8209e-05 gnorm: 1.07 [15:41:00< 8:51:19] +[titan] 2025-10-05 14:15:21,997 - root - INFO - step: 25570 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 14:15:21,997 - root - INFO - lr: 1.8201e-05 gnorm: 1.56 [15:41:11< 8:51:08] +[titan] 2025-10-05 14:15:32,888 - root - INFO - step: 25575 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 14:15:32,888 - root - INFO - lr: 1.8193e-05 gnorm: 1.07 [15:41:22< 8:50:57] +[titan] 2025-10-05 14:15:43,769 - root - INFO - step: 25580 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8710 +[titan] 2025-10-05 14:15:43,769 - root - INFO - lr: 1.8185e-05 gnorm: 1.07 [15:41:33< 8:50:46] +[titan] 2025-10-05 14:15:54,652 - root - INFO - step: 25585 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 14:15:54,652 - root - INFO - lr: 1.8177e-05 gnorm: 1.05 [15:41:44< 8:50:35] +[titan] 2025-10-05 14:16:05,536 - root - INFO - step: 25590 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 14:16:05,536 - root - INFO - lr: 1.8169e-05 gnorm: 1.07 [15:41:54< 8:50:24] +[titan] 2025-10-05 14:16:16,420 - root - INFO - step: 25595 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:16:16,420 - root - INFO - lr: 1.8161e-05 gnorm: 1.09 [15:42:05< 8:50:13] +[titan] 2025-10-05 14:16:25,234 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:16:27,418 - root - INFO - step: 25600 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 29,795 tflops: 413.36 mfu: 41.80% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 14:16:27,418 - root - INFO - lr: 1.8153e-05 gnorm: 1.10 [15:42:16< 8:50:01] +[titan] 2025-10-05 14:16:27,612 - root - INFO - Dumping profiler traces at step 25600 +[titan] 2025-10-05 14:16:27,654 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:16:38,481 - root - INFO - step: 25605 loss: 2.0476 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.94 mfu: 41.55% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:16:38,482 - root - INFO - lr: 1.8144e-05 gnorm: 1.11 [15:42:27< 8:49:50] +[titan] 2025-10-05 14:16:49,316 - root - INFO - step: 25610 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8107 +[titan] 2025-10-05 14:16:49,316 - root - INFO - lr: 1.8136e-05 gnorm: 1.06 [15:42:38< 8:49:39] +[titan] 2025-10-05 14:17:00,171 - root - INFO - step: 25615 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 14:17:00,172 - root - INFO - lr: 1.8128e-05 gnorm: 1.06 [15:42:49< 8:49:28] +[titan] 2025-10-05 14:17:11,028 - root - INFO - step: 25620 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 14:17:11,028 - root - INFO - lr: 1.8120e-05 gnorm: 1.08 [15:43:00< 8:49:17] +[titan] 2025-10-05 14:17:21,893 - root - INFO - step: 25625 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 14:17:21,894 - root - INFO - lr: 1.8112e-05 gnorm: 1.08 [15:43:11< 8:49:06] +[titan] 2025-10-05 14:17:32,791 - root - INFO - step: 25630 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8510 +[titan] 2025-10-05 14:17:32,791 - root - INFO - lr: 1.8104e-05 gnorm: 1.17 [15:43:22< 8:48:55] +[titan] 2025-10-05 14:17:43,645 - root - INFO - step: 25635 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 14:17:43,645 - root - INFO - lr: 1.8096e-05 gnorm: 1.09 [15:43:33< 8:48:44] +[titan] 2025-10-05 14:17:54,490 - root - INFO - step: 25640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:17:54,490 - root - INFO - lr: 1.8088e-05 gnorm: 1.07 [15:43:43< 8:48:32] +[titan] 2025-10-05 14:18:05,362 - root - INFO - step: 25645 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:18:05,362 - root - INFO - lr: 1.8080e-05 gnorm: 1.09 [15:43:54< 8:48:21] +[titan] 2025-10-05 14:18:14,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:18:16,215 - root - INFO - step: 25650 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 14:18:16,215 - root - INFO - lr: 1.8071e-05 gnorm: 1.09 [15:44:05< 8:48:10] +[titan] 2025-10-05 14:18:27,067 - root - INFO - step: 25655 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 14:18:27,068 - root - INFO - lr: 1.8063e-05 gnorm: 1.05 [15:44:16< 8:47:59] +[titan] 2025-10-05 14:18:37,921 - root - INFO - step: 25660 loss: 2.0284 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7927 +[titan] 2025-10-05 14:18:37,921 - root - INFO - lr: 1.8055e-05 gnorm: 1.09 [15:44:27< 8:47:48] +[titan] 2025-10-05 14:18:48,835 - root - INFO - step: 25665 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 14:18:48,835 - root - INFO - lr: 1.8047e-05 gnorm: 1.08 [15:44:38< 8:47:37] +[titan] 2025-10-05 14:18:59,735 - root - INFO - step: 25670 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:18:59,735 - root - INFO - lr: 1.8039e-05 gnorm: 1.11 [15:44:49< 8:47:26] +[titan] 2025-10-05 14:19:10,621 - root - INFO - step: 25675 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 14:19:10,621 - root - INFO - lr: 1.8031e-05 gnorm: 1.12 [15:45:00< 8:47:14] +[titan] 2025-10-05 14:19:21,506 - root - INFO - step: 25680 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8389 +[titan] 2025-10-05 14:19:21,507 - root - INFO - lr: 1.8023e-05 gnorm: 1.07 [15:45:10< 8:47:03] +[titan] 2025-10-05 14:19:32,375 - root - INFO - step: 25685 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 14:19:32,375 - root - INFO - lr: 1.8015e-05 gnorm: 1.07 [15:45:21< 8:46:52] +[titan] 2025-10-05 14:19:43,253 - root - INFO - step: 25690 loss: 1.9973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7661 +[titan] 2025-10-05 14:19:43,254 - root - INFO - lr: 1.8007e-05 gnorm: 1.09 [15:45:32< 8:46:41] +[titan] 2025-10-05 14:19:54,175 - root - INFO - step: 25695 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7719 +[titan] 2025-10-05 14:19:54,175 - root - INFO - lr: 1.7999e-05 gnorm: 1.09 [15:45:43< 8:46:30] +[titan] 2025-10-05 14:20:02,862 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:20:05,037 - root - INFO - step: 25700 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 14:20:05,037 - root - INFO - lr: 1.7991e-05 gnorm: 1.10 [15:45:54< 8:46:19] +[titan] 2025-10-05 14:20:15,889 - root - INFO - step: 25705 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9084 +[titan] 2025-10-05 14:20:15,889 - root - INFO - lr: 1.7982e-05 gnorm: 1.09 [15:46:05< 8:46:08] +[titan] 2025-10-05 14:20:26,754 - root - INFO - step: 25710 loss: 2.0748 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 14:20:26,754 - root - INFO - lr: 1.7974e-05 gnorm: 1.08 [15:46:16< 8:45:57] +[titan] 2025-10-05 14:20:37,621 - root - INFO - step: 25715 loss: 2.0337 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7984 +[titan] 2025-10-05 14:20:37,621 - root - INFO - lr: 1.7966e-05 gnorm: 1.06 [15:46:27< 8:45:45] +[titan] 2025-10-05 14:20:48,501 - root - INFO - step: 25720 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 14:20:48,501 - root - INFO - lr: 1.7958e-05 gnorm: 1.07 [15:46:37< 8:45:34] +[titan] 2025-10-05 14:20:59,442 - root - INFO - step: 25725 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8266 +[titan] 2025-10-05 14:20:59,442 - root - INFO - lr: 1.7950e-05 gnorm: 1.11 [15:46:48< 8:45:23] +[titan] 2025-10-05 14:21:10,316 - root - INFO - step: 25730 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8362 +[titan] 2025-10-05 14:21:10,316 - root - INFO - lr: 1.7942e-05 gnorm: 1.10 [15:46:59< 8:45:12] +[titan] 2025-10-05 14:21:21,179 - root - INFO - step: 25735 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 14:21:21,179 - root - INFO - lr: 1.7934e-05 gnorm: 1.11 [15:47:10< 8:45:01] +[titan] 2025-10-05 14:21:32,060 - root - INFO - step: 25740 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 14:21:32,060 - root - INFO - lr: 1.7926e-05 gnorm: 1.05 [15:47:21< 8:44:50] +[titan] 2025-10-05 14:21:42,940 - root - INFO - step: 25745 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 14:21:42,940 - root - INFO - lr: 1.7918e-05 gnorm: 1.13 [15:47:32< 8:44:39] +[titan] 2025-10-05 14:21:51,620 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:21:53,800 - root - INFO - step: 25750 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 14:21:53,800 - root - INFO - lr: 1.7910e-05 gnorm: 1.09 [15:47:43< 8:44:27] +[titan] 2025-10-05 14:22:04,676 - root - INFO - step: 25755 loss: 2.0272 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:22:04,676 - root - INFO - lr: 1.7902e-05 gnorm: 1.10 [15:47:54< 8:44:16] +[titan] 2025-10-05 14:22:15,594 - root - INFO - step: 25760 loss: 2.0342 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7988 +[titan] 2025-10-05 14:22:15,594 - root - INFO - lr: 1.7894e-05 gnorm: 1.07 [15:48:04< 8:44:05] +[titan] 2025-10-05 14:22:26,449 - root - INFO - step: 25765 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 14:22:26,449 - root - INFO - lr: 1.7885e-05 gnorm: 1.09 [15:48:15< 8:43:54] +[titan] 2025-10-05 14:22:37,310 - root - INFO - step: 25770 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 14:22:37,310 - root - INFO - lr: 1.7877e-05 gnorm: 1.05 [15:48:26< 8:43:43] +[titan] 2025-10-05 14:22:48,182 - root - INFO - step: 25775 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 14:22:48,182 - root - INFO - lr: 1.7869e-05 gnorm: 1.11 [15:48:37< 8:43:32] +[titan] 2025-10-05 14:22:59,049 - root - INFO - step: 25780 loss: 2.0127 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 14:22:59,049 - root - INFO - lr: 1.7861e-05 gnorm: 1.06 [15:48:48< 8:43:21] +[titan] 2025-10-05 14:23:09,928 - root - INFO - step: 25785 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 14:23:09,928 - root - INFO - lr: 1.7853e-05 gnorm: 1.04 [15:48:59< 8:43:10] +[titan] 2025-10-05 14:23:20,861 - root - INFO - step: 25790 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 14:23:20,861 - root - INFO - lr: 1.7845e-05 gnorm: 1.11 [15:49:10< 8:42:58] +[titan] 2025-10-05 14:23:31,734 - root - INFO - step: 25795 loss: 2.0316 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 14:23:31,734 - root - INFO - lr: 1.7837e-05 gnorm: 1.08 [15:49:21< 8:42:47] +[titan] 2025-10-05 14:23:40,435 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:23:42,620 - root - INFO - step: 25800 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.8738 +[titan] 2025-10-05 14:23:42,620 - root - INFO - lr: 1.7829e-05 gnorm: 2.05 [15:49:32< 8:42:36] +[titan] 2025-10-05 14:23:53,479 - root - INFO - step: 25805 loss: 2.0499 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8114 +[titan] 2025-10-05 14:23:53,479 - root - INFO - lr: 1.7821e-05 gnorm: 1.10 [15:49:42< 8:42:25] +[titan] 2025-10-05 14:24:04,354 - root - INFO - step: 25810 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8199 +[titan] 2025-10-05 14:24:04,354 - root - INFO - lr: 1.7813e-05 gnorm: 1.10 [15:49:53< 8:42:14] +[titan] 2025-10-05 14:24:15,228 - root - INFO - step: 25815 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:24:15,228 - root - INFO - lr: 1.7805e-05 gnorm: 1.07 [15:50:04< 8:42:03] +[titan] 2025-10-05 14:24:26,126 - root - INFO - step: 25820 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:24:26,126 - root - INFO - lr: 1.7797e-05 gnorm: 1.11 [15:50:15< 8:41:52] +[titan] 2025-10-05 14:24:37,054 - root - INFO - step: 25825 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 14:24:37,055 - root - INFO - lr: 1.7789e-05 gnorm: 1.09 [15:50:26< 8:41:41] +[titan] 2025-10-05 14:24:47,925 - root - INFO - step: 25830 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 14:24:47,925 - root - INFO - lr: 1.7781e-05 gnorm: 1.08 [15:50:37< 8:41:29] +[titan] 2025-10-05 14:24:58,795 - root - INFO - step: 25835 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 14:24:58,795 - root - INFO - lr: 1.7773e-05 gnorm: 1.15 [15:50:48< 8:41:18] +[titan] 2025-10-05 14:25:09,680 - root - INFO - step: 25840 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 14:25:09,680 - root - INFO - lr: 1.7765e-05 gnorm: 1.04 [15:50:59< 8:41:07] +[titan] 2025-10-05 14:25:20,542 - root - INFO - step: 25845 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7882 +[titan] 2025-10-05 14:25:20,543 - root - INFO - lr: 1.7757e-05 gnorm: 1.08 [15:51:09< 8:40:56] +[titan] 2025-10-05 14:25:29,239 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:25:31,434 - root - INFO - step: 25850 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8073 +[titan] 2025-10-05 14:25:31,434 - root - INFO - lr: 1.7749e-05 gnorm: 1.08 [15:51:20< 8:40:45] +[titan] 2025-10-05 14:25:42,355 - root - INFO - step: 25855 loss: 2.0565 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:25:42,356 - root - INFO - lr: 1.7740e-05 gnorm: 1.09 [15:51:31< 8:40:34] +[titan] 2025-10-05 14:25:53,227 - root - INFO - step: 25860 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 14:25:53,227 - root - INFO - lr: 1.7732e-05 gnorm: 1.11 [15:51:42< 8:40:23] +[titan] 2025-10-05 14:26:04,104 - root - INFO - step: 25865 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8184 +[titan] 2025-10-05 14:26:04,105 - root - INFO - lr: 1.7724e-05 gnorm: 1.11 [15:51:53< 8:40:12] +[titan] 2025-10-05 14:26:15,028 - root - INFO - step: 25870 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 14:26:15,028 - root - INFO - lr: 1.7716e-05 gnorm: 1.04 [15:52:04< 8:40:00] +[titan] 2025-10-05 14:26:25,939 - root - INFO - step: 25875 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8127 +[titan] 2025-10-05 14:26:25,939 - root - INFO - lr: 1.7708e-05 gnorm: 1.08 [15:52:15< 8:39:49] +[titan] 2025-10-05 14:26:36,815 - root - INFO - step: 25880 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:26:36,815 - root - INFO - lr: 1.7700e-05 gnorm: 1.07 [15:52:26< 8:39:38] +[titan] 2025-10-05 14:26:47,749 - root - INFO - step: 25885 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8378 +[titan] 2025-10-05 14:26:47,749 - root - INFO - lr: 1.7692e-05 gnorm: 1.10 [15:52:37< 8:39:27] +[titan] 2025-10-05 14:26:58,622 - root - INFO - step: 25890 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:26:58,622 - root - INFO - lr: 1.7684e-05 gnorm: 1.07 [15:52:48< 8:39:16] +[titan] 2025-10-05 14:27:09,541 - root - INFO - step: 25895 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7826 +[titan] 2025-10-05 14:27:09,541 - root - INFO - lr: 1.7676e-05 gnorm: 1.10 [15:52:58< 8:39:05] +[titan] 2025-10-05 14:27:18,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:27:20,420 - root - INFO - step: 25900 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 14:27:20,420 - root - INFO - lr: 1.7668e-05 gnorm: 1.08 [15:53:09< 8:38:54] +[titan] 2025-10-05 14:27:31,298 - root - INFO - step: 25905 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 14:27:31,299 - root - INFO - lr: 1.7660e-05 gnorm: 1.08 [15:53:20< 8:38:43] +[titan] 2025-10-05 14:27:42,163 - root - INFO - step: 25910 loss: 2.0892 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 14:27:42,163 - root - INFO - lr: 1.7652e-05 gnorm: 1.12 [15:53:31< 8:38:31] +[titan] 2025-10-05 14:27:53,040 - root - INFO - step: 25915 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 14:27:53,041 - root - INFO - lr: 1.7644e-05 gnorm: 1.09 [15:53:42< 8:38:20] +[titan] 2025-10-05 14:28:03,938 - root - INFO - step: 25920 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 14:28:03,938 - root - INFO - lr: 1.7636e-05 gnorm: 1.05 [15:53:53< 8:38:09] +[titan] 2025-10-05 14:28:14,994 - root - INFO - step: 25925 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.19 mfu: 41.58% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8390 +[titan] 2025-10-05 14:28:14,995 - root - INFO - lr: 1.7628e-05 gnorm: 1.11 [15:54:04< 8:37:58] +[titan] 2025-10-05 14:28:25,864 - root - INFO - step: 25930 loss: 2.0995 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 14:28:25,864 - root - INFO - lr: 1.7620e-05 gnorm: 1.09 [15:54:15< 8:37:47] +[titan] 2025-10-05 14:28:36,720 - root - INFO - step: 25935 loss: 2.0585 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 14:28:36,720 - root - INFO - lr: 1.7612e-05 gnorm: 1.12 [15:54:26< 8:37:36] +[titan] 2025-10-05 14:28:47,595 - root - INFO - step: 25940 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 14:28:47,596 - root - INFO - lr: 1.7604e-05 gnorm: 1.13 [15:54:36< 8:37:25] +[titan] 2025-10-05 14:28:58,468 - root - INFO - step: 25945 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7629 +[titan] 2025-10-05 14:28:58,469 - root - INFO - lr: 1.7596e-05 gnorm: 1.11 [15:54:47< 8:37:14] +[titan] 2025-10-05 14:29:07,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:29:09,436 - root - INFO - step: 25950 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 29,879 tflops: 414.52 mfu: 41.91% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:29:09,436 - root - INFO - lr: 1.7588e-05 gnorm: 1.14 [15:54:58< 8:37:03] +[titan] 2025-10-05 14:29:20,286 - root - INFO - step: 25955 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 14:29:20,286 - root - INFO - lr: 1.7580e-05 gnorm: 1.08 [15:55:09< 8:36:51] +[titan] 2025-10-05 14:29:31,140 - root - INFO - step: 25960 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:29:31,140 - root - INFO - lr: 1.7572e-05 gnorm: 1.08 [15:55:20< 8:36:40] +[titan] 2025-10-05 14:29:42,013 - root - INFO - step: 25965 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8359 +[titan] 2025-10-05 14:29:42,013 - root - INFO - lr: 1.7564e-05 gnorm: 1.10 [15:55:31< 8:36:29] +[titan] 2025-10-05 14:29:52,914 - root - INFO - step: 25970 loss: 2.1034 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 14:29:52,914 - root - INFO - lr: 1.7556e-05 gnorm: 1.06 [15:55:42< 8:36:18] +[titan] 2025-10-05 14:30:03,792 - root - INFO - step: 25975 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 14:30:03,792 - root - INFO - lr: 1.7548e-05 gnorm: 1.08 [15:55:53< 8:36:07] +[titan] 2025-10-05 14:30:14,715 - root - INFO - step: 25980 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8170 +[titan] 2025-10-05 14:30:14,715 - root - INFO - lr: 1.7540e-05 gnorm: 1.11 [15:56:04< 8:35:56] +[titan] 2025-10-05 14:30:25,638 - root - INFO - step: 25985 loss: 2.0484 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:30:25,638 - root - INFO - lr: 1.7532e-05 gnorm: 1.07 [15:56:15< 8:35:45] +[titan] 2025-10-05 14:30:36,501 - root - INFO - step: 25990 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:30:36,501 - root - INFO - lr: 1.7524e-05 gnorm: 1.10 [15:56:25< 8:35:34] +[titan] 2025-10-05 14:30:47,379 - root - INFO - step: 25995 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7746 +[titan] 2025-10-05 14:30:47,379 - root - INFO - lr: 1.7516e-05 gnorm: 1.07 [15:56:36< 8:35:22] +[titan] 2025-10-05 14:30:56,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:30:58,259 - root - INFO - step: 26000 loss: 2.0535 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8148 +[titan] 2025-10-05 14:30:58,259 - root - INFO - lr: 1.7508e-05 gnorm: 1.14 [15:56:47< 8:35:11] +[titan] 2025-10-05 14:31:09,132 - root - INFO - step: 26005 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 14:31:09,132 - root - INFO - lr: 1.7500e-05 gnorm: 1.09 [15:56:58< 8:35:00] +[titan] 2025-10-05 14:31:20,058 - root - INFO - step: 26010 loss: 2.0243 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 14:31:20,058 - root - INFO - lr: 1.7492e-05 gnorm: 1.12 [15:57:09< 8:34:49] +[titan] 2025-10-05 14:31:30,973 - root - INFO - step: 26015 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:31:30,974 - root - INFO - lr: 1.7484e-05 gnorm: 1.10 [15:57:20< 8:34:38] +[titan] 2025-10-05 14:31:41,835 - root - INFO - step: 26020 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8519 +[titan] 2025-10-05 14:31:41,835 - root - INFO - lr: 1.7476e-05 gnorm: 1.16 [15:57:31< 8:34:27] +[titan] 2025-10-05 14:31:52,692 - root - INFO - step: 26025 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 14:31:52,692 - root - INFO - lr: 1.7468e-05 gnorm: 1.07 [15:57:42< 8:34:16] +[titan] 2025-10-05 14:32:03,561 - root - INFO - step: 26030 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 14:32:03,561 - root - INFO - lr: 1.7460e-05 gnorm: 1.09 [15:57:52< 8:34:05] +[titan] 2025-10-05 14:32:14,505 - root - INFO - step: 26035 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:32:14,505 - root - INFO - lr: 1.7452e-05 gnorm: 1.10 [15:58:03< 8:33:53] +[titan] 2025-10-05 14:32:25,388 - root - INFO - step: 26040 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:32:25,388 - root - INFO - lr: 1.7444e-05 gnorm: 1.09 [15:58:14< 8:33:42] +[titan] 2025-10-05 14:32:36,316 - root - INFO - step: 26045 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8342 +[titan] 2025-10-05 14:32:36,316 - root - INFO - lr: 1.7436e-05 gnorm: 1.10 [15:58:25< 8:33:31] +[titan] 2025-10-05 14:32:45,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:32:47,196 - root - INFO - step: 26050 loss: 2.0388 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:32:47,196 - root - INFO - lr: 1.7428e-05 gnorm: 1.08 [15:58:36< 8:33:20] +[titan] 2025-10-05 14:32:58,069 - root - INFO - step: 26055 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8131 +[titan] 2025-10-05 14:32:58,069 - root - INFO - lr: 1.7420e-05 gnorm: 1.06 [15:58:47< 8:33:09] +[titan] 2025-10-05 14:33:08,972 - root - INFO - step: 26060 loss: 2.0150 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 14:33:08,972 - root - INFO - lr: 1.7412e-05 gnorm: 1.10 [15:58:58< 8:32:58] +[titan] 2025-10-05 14:33:19,953 - root - INFO - step: 26065 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:33:19,953 - root - INFO - lr: 1.7404e-05 gnorm: 1.12 [15:59:09< 8:32:47] +[titan] 2025-10-05 14:33:30,852 - root - INFO - step: 26070 loss: 2.0795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 14:33:30,852 - root - INFO - lr: 1.7396e-05 gnorm: 1.11 [15:59:20< 8:32:36] +[titan] 2025-10-05 14:33:41,755 - root - INFO - step: 26075 loss: 2.0764 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 14:33:41,755 - root - INFO - lr: 1.7388e-05 gnorm: 1.11 [15:59:31< 8:32:25] +[titan] 2025-10-05 14:33:52,678 - root - INFO - step: 26080 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 14:33:52,678 - root - INFO - lr: 1.7380e-05 gnorm: 1.08 [15:59:42< 8:32:13] +[titan] 2025-10-05 14:34:03,540 - root - INFO - step: 26085 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:34:03,540 - root - INFO - lr: 1.7372e-05 gnorm: 1.14 [15:59:52< 8:32:02] +[titan] 2025-10-05 14:34:14,468 - root - INFO - step: 26090 loss: 2.0497 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8118 +[titan] 2025-10-05 14:34:14,468 - root - INFO - lr: 1.7364e-05 gnorm: 1.11 [16:00:03< 8:31:51] +[titan] 2025-10-05 14:34:25,355 - root - INFO - step: 26095 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7776 +[titan] 2025-10-05 14:34:25,355 - root - INFO - lr: 1.7356e-05 gnorm: 1.07 [16:00:14< 8:31:40] +[titan] 2025-10-05 14:34:34,040 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:34:36,218 - root - INFO - step: 26100 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 14:34:36,218 - root - INFO - lr: 1.7348e-05 gnorm: 1.06 [16:00:25< 8:31:29] +[titan] 2025-10-05 14:34:47,106 - root - INFO - step: 26105 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 14:34:47,107 - root - INFO - lr: 1.7340e-05 gnorm: 1.06 [16:00:36< 8:31:18] +[titan] 2025-10-05 14:34:58,140 - root - INFO - step: 26110 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,699 tflops: 412.03 mfu: 41.66% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:34:58,140 - root - INFO - lr: 1.7332e-05 gnorm: 1.12 [16:00:47< 8:31:07] +[titan] 2025-10-05 14:35:02,663 - root - INFO - Dumping profiler traces at step 26112 +[titan] 2025-10-05 14:35:02,702 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:35:09,255 - root - INFO - step: 26115 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 29,482 tflops: 409.02 mfu: 41.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 14:35:09,255 - root - INFO - lr: 1.7324e-05 gnorm: 1.07 [16:00:58< 8:30:56] +[titan] 2025-10-05 14:35:20,180 - root - INFO - step: 26120 loss: 1.9396 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 14:35:20,180 - root - INFO - lr: 1.7316e-05 gnorm: 1.06 [16:01:09< 8:30:45] +[titan] 2025-10-05 14:35:31,056 - root - INFO - step: 26125 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 14:35:31,056 - root - INFO - lr: 1.7309e-05 gnorm: 1.12 [16:01:20< 8:30:34] +[titan] 2025-10-05 14:35:41,959 - root - INFO - step: 26130 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:35:41,959 - root - INFO - lr: 1.7301e-05 gnorm: 1.07 [16:01:31< 8:30:22] +[titan] 2025-10-05 14:35:52,846 - root - INFO - step: 26135 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:35:52,847 - root - INFO - lr: 1.7293e-05 gnorm: 1.10 [16:01:42< 8:30:11] +[titan] 2025-10-05 14:36:03,715 - root - INFO - step: 26140 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7686 +[titan] 2025-10-05 14:36:03,715 - root - INFO - lr: 1.7285e-05 gnorm: 1.07 [16:01:53< 8:30:00] +[titan] 2025-10-05 14:36:14,674 - root - INFO - step: 26145 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7630 +[titan] 2025-10-05 14:36:14,674 - root - INFO - lr: 1.7277e-05 gnorm: 1.05 [16:02:04< 8:29:49] +[titan] 2025-10-05 14:36:23,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:36:25,541 - root - INFO - step: 26150 loss: 2.1124 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 14:36:25,541 - root - INFO - lr: 1.7269e-05 gnorm: 1.13 [16:02:14< 8:29:38] +[titan] 2025-10-05 14:36:36,415 - root - INFO - step: 26155 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:36:36,415 - root - INFO - lr: 1.7261e-05 gnorm: 1.12 [16:02:25< 8:29:27] +[titan] 2025-10-05 14:36:47,302 - root - INFO - step: 26160 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 14:36:47,302 - root - INFO - lr: 1.7253e-05 gnorm: 1.08 [16:02:36< 8:29:16] +[titan] 2025-10-05 14:36:58,194 - root - INFO - step: 26165 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 14:36:58,194 - root - INFO - lr: 1.7245e-05 gnorm: 1.08 [16:02:47< 8:29:05] +[titan] 2025-10-05 14:37:09,092 - root - INFO - step: 26170 loss: 2.1112 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8651 +[titan] 2025-10-05 14:37:09,092 - root - INFO - lr: 1.7237e-05 gnorm: 1.11 [16:02:58< 8:28:54] +[titan] 2025-10-05 14:37:20,018 - root - INFO - step: 26175 loss: 2.0516 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 14:37:20,018 - root - INFO - lr: 1.7229e-05 gnorm: 1.08 [16:03:09< 8:28:42] +[titan] 2025-10-05 14:37:30,897 - root - INFO - step: 26180 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:37:30,897 - root - INFO - lr: 1.7221e-05 gnorm: 1.09 [16:03:20< 8:28:31] +[titan] 2025-10-05 14:37:41,783 - root - INFO - step: 26185 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 14:37:41,783 - root - INFO - lr: 1.7213e-05 gnorm: 1.08 [16:03:31< 8:28:20] +[titan] 2025-10-05 14:37:52,662 - root - INFO - step: 26190 loss: 1.9604 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7331 +[titan] 2025-10-05 14:37:52,662 - root - INFO - lr: 1.7205e-05 gnorm: 1.08 [16:03:42< 8:28:09] +[titan] 2025-10-05 14:38:03,547 - root - INFO - step: 26195 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 14:38:03,548 - root - INFO - lr: 1.7197e-05 gnorm: 1.08 [16:03:52< 8:27:58] +[titan] 2025-10-05 14:38:12,253 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:38:14,439 - root - INFO - step: 26200 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8117 +[titan] 2025-10-05 14:38:14,440 - root - INFO - lr: 1.7189e-05 gnorm: 1.06 [16:04:03< 8:27:47] +[titan] 2025-10-05 14:38:25,405 - root - INFO - step: 26205 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:38:25,405 - root - INFO - lr: 1.7181e-05 gnorm: 1.10 [16:04:14< 8:27:36] +[titan] 2025-10-05 14:38:36,296 - root - INFO - step: 26210 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:38:36,296 - root - INFO - lr: 1.7173e-05 gnorm: 1.08 [16:04:25< 8:27:25] +[titan] 2025-10-05 14:38:47,187 - root - INFO - step: 26215 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 14:38:47,187 - root - INFO - lr: 1.7166e-05 gnorm: 1.13 [16:04:36< 8:27:14] +[titan] 2025-10-05 14:38:58,082 - root - INFO - step: 26220 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:38:58,082 - root - INFO - lr: 1.7158e-05 gnorm: 1.07 [16:04:47< 8:27:02] +[titan] 2025-10-05 14:39:08,974 - root - INFO - step: 26225 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7872 +[titan] 2025-10-05 14:39:08,975 - root - INFO - lr: 1.7150e-05 gnorm: 1.06 [16:04:58< 8:26:51] +[titan] 2025-10-05 14:39:19,875 - root - INFO - step: 26230 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 14:39:19,875 - root - INFO - lr: 1.7142e-05 gnorm: 1.07 [16:05:09< 8:26:40] +[titan] 2025-10-05 14:39:30,758 - root - INFO - step: 26235 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 14:39:30,759 - root - INFO - lr: 1.7134e-05 gnorm: 1.07 [16:05:20< 8:26:29] +[titan] 2025-10-05 14:39:41,666 - root - INFO - step: 26240 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7444 +[titan] 2025-10-05 14:39:41,666 - root - INFO - lr: 1.7126e-05 gnorm: 1.05 [16:05:31< 8:26:18] +[titan] 2025-10-05 14:39:52,544 - root - INFO - step: 26245 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 14:39:52,544 - root - INFO - lr: 1.7118e-05 gnorm: 1.08 [16:05:41< 8:26:07] +[titan] 2025-10-05 14:40:01,233 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:40:03,416 - root - INFO - step: 26250 loss: 2.0445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:40:03,416 - root - INFO - lr: 1.7110e-05 gnorm: 1.05 [16:05:52< 8:25:56] +[titan] 2025-10-05 14:40:14,284 - root - INFO - step: 26255 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9243 +[titan] 2025-10-05 14:40:14,284 - root - INFO - lr: 1.7102e-05 gnorm: 1.15 [16:06:03< 8:25:45] +[titan] 2025-10-05 14:40:25,201 - root - INFO - step: 26260 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 14:40:25,201 - root - INFO - lr: 1.7094e-05 gnorm: 1.31 [16:06:14< 8:25:33] +[titan] 2025-10-05 14:40:36,067 - root - INFO - step: 26265 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 14:40:36,067 - root - INFO - lr: 1.7086e-05 gnorm: 1.07 [16:06:25< 8:25:22] +[titan] 2025-10-05 14:40:46,986 - root - INFO - step: 26270 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 14:40:46,986 - root - INFO - lr: 1.7078e-05 gnorm: 1.12 [16:06:36< 8:25:11] +[titan] 2025-10-05 14:40:57,856 - root - INFO - step: 26275 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 14:40:57,856 - root - INFO - lr: 1.7071e-05 gnorm: 1.05 [16:06:47< 8:25:00] +[titan] 2025-10-05 14:41:08,711 - root - INFO - step: 26280 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8421 +[titan] 2025-10-05 14:41:08,711 - root - INFO - lr: 1.7063e-05 gnorm: 1.08 [16:06:58< 8:24:49] +[titan] 2025-10-05 14:41:19,613 - root - INFO - step: 26285 loss: 2.0172 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 14:41:19,613 - root - INFO - lr: 1.7055e-05 gnorm: 1.10 [16:07:08< 8:24:38] +[titan] 2025-10-05 14:41:30,475 - root - INFO - step: 26290 loss: 2.0509 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 14:41:30,476 - root - INFO - lr: 1.7047e-05 gnorm: 1.10 [16:07:19< 8:24:27] +[titan] 2025-10-05 14:41:41,347 - root - INFO - step: 26295 loss: 2.0334 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7973 +[titan] 2025-10-05 14:41:41,347 - root - INFO - lr: 1.7039e-05 gnorm: 1.04 [16:07:30< 8:24:16] +[titan] 2025-10-05 14:41:50,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:41:52,244 - root - INFO - step: 26300 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 14:41:52,245 - root - INFO - lr: 1.7031e-05 gnorm: 1.10 [16:07:41< 8:24:05] +[titan] 2025-10-05 14:42:03,172 - root - INFO - step: 26305 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 14:42:03,172 - root - INFO - lr: 1.7023e-05 gnorm: 1.10 [16:07:52< 8:23:53] +[titan] 2025-10-05 14:42:14,032 - root - INFO - step: 26310 loss: 2.0276 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:42:14,032 - root - INFO - lr: 1.7015e-05 gnorm: 1.10 [16:08:03< 8:23:42] +[titan] 2025-10-05 14:42:24,897 - root - INFO - step: 26315 loss: 2.0611 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:42:24,897 - root - INFO - lr: 1.7007e-05 gnorm: 1.05 [16:08:14< 8:23:31] +[titan] 2025-10-05 14:42:35,737 - root - INFO - step: 26320 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 14:42:35,737 - root - INFO - lr: 1.6999e-05 gnorm: 1.08 [16:08:25< 8:23:20] +[titan] 2025-10-05 14:42:46,593 - root - INFO - step: 26325 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8364 +[titan] 2025-10-05 14:42:46,593 - root - INFO - lr: 1.6992e-05 gnorm: 1.11 [16:08:35< 8:23:09] +[titan] 2025-10-05 14:42:57,467 - root - INFO - step: 26330 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 14:42:57,467 - root - INFO - lr: 1.6984e-05 gnorm: 1.05 [16:08:46< 8:22:58] +[titan] 2025-10-05 14:43:08,377 - root - INFO - step: 26335 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7862 +[titan] 2025-10-05 14:43:08,377 - root - INFO - lr: 1.6976e-05 gnorm: 1.10 [16:08:57< 8:22:47] +[titan] 2025-10-05 14:43:19,276 - root - INFO - step: 26340 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:43:19,276 - root - INFO - lr: 1.6968e-05 gnorm: 1.09 [16:09:08< 8:22:36] +[titan] 2025-10-05 14:43:30,150 - root - INFO - step: 26345 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 14:43:30,150 - root - INFO - lr: 1.6960e-05 gnorm: 1.09 [16:09:19< 8:22:24] +[titan] 2025-10-05 14:43:38,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:43:41,032 - root - INFO - step: 26350 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 14:43:41,033 - root - INFO - lr: 1.6952e-05 gnorm: 1.12 [16:09:30< 8:22:13] +[titan] 2025-10-05 14:43:51,910 - root - INFO - step: 26355 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 14:43:51,911 - root - INFO - lr: 1.6944e-05 gnorm: 1.10 [16:09:41< 8:22:02] +[titan] 2025-10-05 14:44:02,775 - root - INFO - step: 26360 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 14:44:02,775 - root - INFO - lr: 1.6936e-05 gnorm: 1.11 [16:09:52< 8:21:51] +[titan] 2025-10-05 14:44:13,677 - root - INFO - step: 26365 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 14:44:13,678 - root - INFO - lr: 1.6928e-05 gnorm: 1.09 [16:10:03< 8:21:40] +[titan] 2025-10-05 14:44:24,544 - root - INFO - step: 26370 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 14:44:24,544 - root - INFO - lr: 1.6921e-05 gnorm: 1.08 [16:10:13< 8:21:29] +[titan] 2025-10-05 14:44:35,405 - root - INFO - step: 26375 loss: 2.0563 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8183 +[titan] 2025-10-05 14:44:35,405 - root - INFO - lr: 1.6913e-05 gnorm: 1.09 [16:10:24< 8:21:18] +[titan] 2025-10-05 14:44:46,277 - root - INFO - step: 26380 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 14:44:46,277 - root - INFO - lr: 1.6905e-05 gnorm: 1.10 [16:10:35< 8:21:07] +[titan] 2025-10-05 14:44:57,156 - root - INFO - step: 26385 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 14:44:57,156 - root - INFO - lr: 1.6897e-05 gnorm: 1.09 [16:10:46< 8:20:55] +[titan] 2025-10-05 14:45:07,991 - root - INFO - step: 26390 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:45:07,991 - root - INFO - lr: 1.6889e-05 gnorm: 1.09 [16:10:57< 8:20:44] +[titan] 2025-10-05 14:45:18,850 - root - INFO - step: 26395 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:45:18,850 - root - INFO - lr: 1.6881e-05 gnorm: 1.09 [16:11:08< 8:20:33] +[titan] 2025-10-05 14:45:27,579 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:45:29,758 - root - INFO - step: 26400 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 14:45:29,758 - root - INFO - lr: 1.6873e-05 gnorm: 1.09 [16:11:19< 8:20:22] +[titan] 2025-10-05 14:45:40,628 - root - INFO - step: 26405 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 14:45:40,628 - root - INFO - lr: 1.6865e-05 gnorm: 1.09 [16:11:29< 8:20:11] +[titan] 2025-10-05 14:45:51,472 - root - INFO - step: 26410 loss: 2.0493 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 14:45:51,472 - root - INFO - lr: 1.6858e-05 gnorm: 1.09 [16:11:40< 8:20:00] +[titan] 2025-10-05 14:46:02,329 - root - INFO - step: 26415 loss: 2.0718 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8312 +[titan] 2025-10-05 14:46:02,329 - root - INFO - lr: 1.6850e-05 gnorm: 1.09 [16:11:51< 8:19:49] +[titan] 2025-10-05 14:46:13,208 - root - INFO - step: 26420 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 14:46:13,208 - root - INFO - lr: 1.6842e-05 gnorm: 1.09 [16:12:02< 8:19:38] +[titan] 2025-10-05 14:46:24,077 - root - INFO - step: 26425 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:46:24,077 - root - INFO - lr: 1.6834e-05 gnorm: 1.09 [16:12:13< 8:19:26] +[titan] 2025-10-05 14:46:34,964 - root - INFO - step: 26430 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8192 +[titan] 2025-10-05 14:46:34,964 - root - INFO - lr: 1.6826e-05 gnorm: 1.08 [16:12:24< 8:19:15] +[titan] 2025-10-05 14:46:45,809 - root - INFO - step: 26435 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 14:46:45,810 - root - INFO - lr: 1.6818e-05 gnorm: 1.08 [16:12:35< 8:19:04] +[titan] 2025-10-05 14:46:56,653 - root - INFO - step: 26440 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 14:46:56,653 - root - INFO - lr: 1.6810e-05 gnorm: 1.07 [16:12:45< 8:18:53] +[titan] 2025-10-05 14:47:07,510 - root - INFO - step: 26445 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 14:47:07,511 - root - INFO - lr: 1.6803e-05 gnorm: 1.09 [16:12:56< 8:18:42] +[titan] 2025-10-05 14:47:16,212 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:47:18,390 - root - INFO - step: 26450 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:47:18,390 - root - INFO - lr: 1.6795e-05 gnorm: 1.07 [16:13:07< 8:18:31] +[titan] 2025-10-05 14:47:29,255 - root - INFO - step: 26455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:47:29,255 - root - INFO - lr: 1.6787e-05 gnorm: 1.10 [16:13:18< 8:18:20] +[titan] 2025-10-05 14:47:40,123 - root - INFO - step: 26460 loss: 2.0742 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 14:47:40,123 - root - INFO - lr: 1.6779e-05 gnorm: 1.14 [16:13:29< 8:18:09] +[titan] 2025-10-05 14:47:51,023 - root - INFO - step: 26465 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 14:47:51,023 - root - INFO - lr: 1.6771e-05 gnorm: 1.10 [16:13:40< 8:17:57] +[titan] 2025-10-05 14:48:01,888 - root - INFO - step: 26470 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 14:48:01,888 - root - INFO - lr: 1.6763e-05 gnorm: 1.05 [16:13:51< 8:17:46] +[titan] 2025-10-05 14:48:12,750 - root - INFO - step: 26475 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:48:12,750 - root - INFO - lr: 1.6756e-05 gnorm: 1.10 [16:14:02< 8:17:35] +[titan] 2025-10-05 14:48:23,596 - root - INFO - step: 26480 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 14:48:23,597 - root - INFO - lr: 1.6748e-05 gnorm: 1.05 [16:14:12< 8:17:24] +[titan] 2025-10-05 14:48:34,475 - root - INFO - step: 26485 loss: 2.0429 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:48:34,475 - root - INFO - lr: 1.6740e-05 gnorm: 1.11 [16:14:23< 8:17:13] +[titan] 2025-10-05 14:48:45,347 - root - INFO - step: 26490 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 14:48:45,348 - root - INFO - lr: 1.6732e-05 gnorm: 1.11 [16:14:34< 8:17:02] +[titan] 2025-10-05 14:48:56,251 - root - INFO - step: 26495 loss: 2.1088 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 14:48:56,252 - root - INFO - lr: 1.6724e-05 gnorm: 1.15 [16:14:45< 8:16:51] +[titan] 2025-10-05 14:49:04,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:49:07,097 - root - INFO - step: 26500 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8508 +[titan] 2025-10-05 14:49:07,097 - root - INFO - lr: 1.6716e-05 gnorm: 1.14 [16:14:56< 8:16:40] +[titan] 2025-10-05 14:49:17,975 - root - INFO - step: 26505 loss: 2.0105 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 14:49:17,976 - root - INFO - lr: 1.6709e-05 gnorm: 1.12 [16:15:07< 8:16:28] +[titan] 2025-10-05 14:49:28,870 - root - INFO - step: 26510 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7377 +[titan] 2025-10-05 14:49:28,870 - root - INFO - lr: 1.6701e-05 gnorm: 1.05 [16:15:18< 8:16:17] +[titan] 2025-10-05 14:49:39,744 - root - INFO - step: 26515 loss: 2.0774 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 14:49:39,744 - root - INFO - lr: 1.6693e-05 gnorm: 1.14 [16:15:29< 8:16:06] +[titan] 2025-10-05 14:49:50,606 - root - INFO - step: 26520 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:49:50,606 - root - INFO - lr: 1.6685e-05 gnorm: 1.11 [16:15:39< 8:15:55] +[titan] 2025-10-05 14:50:01,497 - root - INFO - step: 26525 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:50:01,497 - root - INFO - lr: 1.6677e-05 gnorm: 1.07 [16:15:50< 8:15:44] +[titan] 2025-10-05 14:50:12,351 - root - INFO - step: 26530 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:50:12,351 - root - INFO - lr: 1.6669e-05 gnorm: 1.11 [16:16:01< 8:15:33] +[titan] 2025-10-05 14:50:23,197 - root - INFO - step: 26535 loss: 2.0146 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 14:50:23,197 - root - INFO - lr: 1.6662e-05 gnorm: 1.28 [16:16:12< 8:15:22] +[titan] 2025-10-05 14:50:34,070 - root - INFO - step: 26540 loss: 2.0363 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 14:50:34,070 - root - INFO - lr: 1.6654e-05 gnorm: 1.09 [16:16:23< 8:15:11] +[titan] 2025-10-05 14:50:44,935 - root - INFO - step: 26545 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 14:50:44,935 - root - INFO - lr: 1.6646e-05 gnorm: 1.07 [16:16:34< 8:14:59] +[titan] 2025-10-05 14:50:53,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:50:55,778 - root - INFO - step: 26550 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 14:50:55,778 - root - INFO - lr: 1.6638e-05 gnorm: 1.07 [16:16:45< 8:14:48] +[titan] 2025-10-05 14:51:06,624 - root - INFO - step: 26555 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 14:51:06,625 - root - INFO - lr: 1.6630e-05 gnorm: 1.11 [16:16:55< 8:14:37] +[titan] 2025-10-05 14:51:17,534 - root - INFO - step: 26560 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 14:51:17,534 - root - INFO - lr: 1.6622e-05 gnorm: 1.12 [16:17:06< 8:14:26] +[titan] 2025-10-05 14:51:28,410 - root - INFO - step: 26565 loss: 2.1178 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 14:51:28,410 - root - INFO - lr: 1.6615e-05 gnorm: 1.09 [16:17:17< 8:14:15] +[titan] 2025-10-05 14:51:39,262 - root - INFO - step: 26570 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 14:51:39,262 - root - INFO - lr: 1.6607e-05 gnorm: 1.11 [16:17:28< 8:14:04] +[titan] 2025-10-05 14:51:50,113 - root - INFO - step: 26575 loss: 2.1052 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 14:51:50,113 - root - INFO - lr: 1.6599e-05 gnorm: 1.15 [16:17:39< 8:13:53] +[titan] 2025-10-05 14:52:00,978 - root - INFO - step: 26580 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:52:00,978 - root - INFO - lr: 1.6591e-05 gnorm: 1.09 [16:17:50< 8:13:42] +[titan] 2025-10-05 14:52:11,826 - root - INFO - step: 26585 loss: 2.0519 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 14:52:11,826 - root - INFO - lr: 1.6583e-05 gnorm: 1.14 [16:18:01< 8:13:30] +[titan] 2025-10-05 14:52:22,714 - root - INFO - step: 26590 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 14:52:22,714 - root - INFO - lr: 1.6576e-05 gnorm: 1.09 [16:18:12< 8:13:19] +[titan] 2025-10-05 14:52:33,578 - root - INFO - step: 26595 loss: 2.0442 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 14:52:33,578 - root - INFO - lr: 1.6568e-05 gnorm: 1.08 [16:18:22< 8:13:08] +[titan] 2025-10-05 14:52:42,212 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:52:44,379 - root - INFO - step: 26600 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,340 tflops: 420.92 mfu: 42.56% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:52:44,379 - root - INFO - lr: 1.6560e-05 gnorm: 1.10 [16:18:33< 8:12:57] +[titan] 2025-10-05 14:52:55,225 - root - INFO - step: 26605 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 14:52:55,225 - root - INFO - lr: 1.6552e-05 gnorm: 1.10 [16:18:44< 8:12:46] +[titan] 2025-10-05 14:53:06,069 - root - INFO - step: 26610 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:53:06,070 - root - INFO - lr: 1.6544e-05 gnorm: 1.07 [16:18:55< 8:12:35] +[titan] 2025-10-05 14:53:16,896 - root - INFO - step: 26615 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 14:53:16,896 - root - INFO - lr: 1.6537e-05 gnorm: 1.08 [16:19:06< 8:12:24] +[titan] 2025-10-05 14:53:27,734 - root - INFO - step: 26620 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7725 +[titan] 2025-10-05 14:53:27,734 - root - INFO - lr: 1.6529e-05 gnorm: 1.15 [16:19:17< 8:12:13] +[titan] 2025-10-05 14:53:36,731 - root - INFO - Dumping profiler traces at step 26624 +[titan] 2025-10-05 14:53:36,779 - root - INFO - Finished dumping profiler traces in 0.05 seconds +[titan] 2025-10-05 14:53:38,970 - root - INFO - step: 26625 loss: 2.0899 memory: 118.84GiB(85.28%) tps: 29,164 tflops: 404.60 mfu: 40.91% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8466 +[titan] 2025-10-05 14:53:38,971 - root - INFO - lr: 1.6521e-05 gnorm: 1.13 [16:19:28< 8:12:02] +[titan] 2025-10-05 14:53:49,827 - root - INFO - step: 26630 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 14:53:49,827 - root - INFO - lr: 1.6513e-05 gnorm: 1.09 [16:19:39< 8:11:50] +[titan] 2025-10-05 14:54:00,657 - root - INFO - step: 26635 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 14:54:00,657 - root - INFO - lr: 1.6505e-05 gnorm: 1.12 [16:19:49< 8:11:39] +[titan] 2025-10-05 14:54:11,514 - root - INFO - step: 26640 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8209 +[titan] 2025-10-05 14:54:11,514 - root - INFO - lr: 1.6498e-05 gnorm: 1.10 [16:20:00< 8:11:28] +[titan] 2025-10-05 14:54:22,378 - root - INFO - step: 26645 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 14:54:22,378 - root - INFO - lr: 1.6490e-05 gnorm: 1.06 [16:20:11< 8:11:17] +[titan] 2025-10-05 14:54:31,073 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:54:33,259 - root - INFO - step: 26650 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 14:54:33,259 - root - INFO - lr: 1.6482e-05 gnorm: 1.12 [16:20:22< 8:11:06] +[titan] 2025-10-05 14:54:44,181 - root - INFO - step: 26655 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 14:54:44,181 - root - INFO - lr: 1.6474e-05 gnorm: 1.10 [16:20:33< 8:10:55] +[titan] 2025-10-05 14:54:55,045 - root - INFO - step: 26660 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:54:55,045 - root - INFO - lr: 1.6467e-05 gnorm: 1.09 [16:20:44< 8:10:44] +[titan] 2025-10-05 14:55:05,921 - root - INFO - step: 26665 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8484 +[titan] 2025-10-05 14:55:05,921 - root - INFO - lr: 1.6459e-05 gnorm: 1.12 [16:20:55< 8:10:33] +[titan] 2025-10-05 14:55:16,810 - root - INFO - step: 26670 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 14:55:16,810 - root - INFO - lr: 1.6451e-05 gnorm: 1.08 [16:21:06< 8:10:22] +[titan] 2025-10-05 14:55:27,678 - root - INFO - step: 26675 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 14:55:27,679 - root - INFO - lr: 1.6443e-05 gnorm: 1.09 [16:21:16< 8:10:10] +[titan] 2025-10-05 14:55:38,537 - root - INFO - step: 26680 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 14:55:38,537 - root - INFO - lr: 1.6435e-05 gnorm: 1.10 [16:21:27< 8:09:59] +[titan] 2025-10-05 14:55:49,438 - root - INFO - step: 26685 loss: 2.0107 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 14:55:49,439 - root - INFO - lr: 1.6428e-05 gnorm: 1.11 [16:21:38< 8:09:48] +[titan] 2025-10-05 14:56:00,304 - root - INFO - step: 26690 loss: 2.0743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:56:00,304 - root - INFO - lr: 1.6420e-05 gnorm: 1.11 [16:21:49< 8:09:37] +[titan] 2025-10-05 14:56:11,149 - root - INFO - step: 26695 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8035 +[titan] 2025-10-05 14:56:11,149 - root - INFO - lr: 1.6412e-05 gnorm: 1.10 [16:22:00< 8:09:26] +[titan] 2025-10-05 14:56:19,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:56:22,000 - root - INFO - step: 26700 loss: 2.0496 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8111 +[titan] 2025-10-05 14:56:22,000 - root - INFO - lr: 1.6404e-05 gnorm: 1.07 [16:22:11< 8:09:15] +[titan] 2025-10-05 14:56:32,858 - root - INFO - step: 26705 loss: 1.9909 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 14:56:32,858 - root - INFO - lr: 1.6397e-05 gnorm: 1.07 [16:22:22< 8:09:04] +[titan] 2025-10-05 14:56:43,728 - root - INFO - step: 26710 loss: 2.1246 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:56:43,729 - root - INFO - lr: 1.6389e-05 gnorm: 1.12 [16:22:33< 8:08:53] +[titan] 2025-10-05 14:56:54,594 - root - INFO - step: 26715 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 14:56:54,594 - root - INFO - lr: 1.6381e-05 gnorm: 1.08 [16:22:43< 8:08:41] +[titan] 2025-10-05 14:57:05,497 - root - INFO - step: 26720 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 14:57:05,497 - root - INFO - lr: 1.6373e-05 gnorm: 1.08 [16:22:54< 8:08:30] +[titan] 2025-10-05 14:57:16,361 - root - INFO - step: 26725 loss: 2.0885 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 14:57:16,361 - root - INFO - lr: 1.6366e-05 gnorm: 1.07 [16:23:05< 8:08:19] +[titan] 2025-10-05 14:57:27,210 - root - INFO - step: 26730 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 14:57:27,211 - root - INFO - lr: 1.6358e-05 gnorm: 1.10 [16:23:16< 8:08:08] +[titan] 2025-10-05 14:57:38,049 - root - INFO - step: 26735 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 14:57:38,050 - root - INFO - lr: 1.6350e-05 gnorm: 1.11 [16:23:27< 8:07:57] +[titan] 2025-10-05 14:57:48,918 - root - INFO - step: 26740 loss: 2.0984 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8556 +[titan] 2025-10-05 14:57:48,918 - root - INFO - lr: 1.6342e-05 gnorm: 1.15 [16:23:38< 8:07:46] +[titan] 2025-10-05 14:57:59,773 - root - INFO - step: 26745 loss: 2.0328 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 14:57:59,773 - root - INFO - lr: 1.6335e-05 gnorm: 1.10 [16:23:49< 8:07:35] +[titan] 2025-10-05 14:58:08,498 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:58:10,688 - root - INFO - step: 26750 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 14:58:10,688 - root - INFO - lr: 1.6327e-05 gnorm: 1.10 [16:23:59< 8:07:24] +[titan] 2025-10-05 14:58:21,558 - root - INFO - step: 26755 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 14:58:21,559 - root - INFO - lr: 1.6319e-05 gnorm: 1.10 [16:24:10< 8:07:12] +[titan] 2025-10-05 14:58:32,424 - root - INFO - step: 26760 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:58:32,425 - root - INFO - lr: 1.6311e-05 gnorm: 1.08 [16:24:21< 8:07:01] +[titan] 2025-10-05 14:58:43,310 - root - INFO - step: 26765 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:58:43,310 - root - INFO - lr: 1.6304e-05 gnorm: 1.07 [16:24:32< 8:06:50] +[titan] 2025-10-05 14:58:54,204 - root - INFO - step: 26770 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7566 +[titan] 2025-10-05 14:58:54,204 - root - INFO - lr: 1.6296e-05 gnorm: 1.08 [16:24:43< 8:06:39] +[titan] 2025-10-05 14:59:05,077 - root - INFO - step: 26775 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:59:05,077 - root - INFO - lr: 1.6288e-05 gnorm: 1.09 [16:24:54< 8:06:28] +[titan] 2025-10-05 14:59:15,970 - root - INFO - step: 26780 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:59:15,970 - root - INFO - lr: 1.6280e-05 gnorm: 1.13 [16:25:05< 8:06:17] +[titan] 2025-10-05 14:59:26,894 - root - INFO - step: 26785 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 14:59:26,895 - root - INFO - lr: 1.6273e-05 gnorm: 1.11 [16:25:16< 8:06:06] +[titan] 2025-10-05 14:59:37,753 - root - INFO - step: 26790 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 14:59:37,753 - root - INFO - lr: 1.6265e-05 gnorm: 1.03 [16:25:27< 8:05:55] +[titan] 2025-10-05 14:59:48,629 - root - INFO - step: 26795 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:59:48,629 - root - INFO - lr: 1.6257e-05 gnorm: 1.08 [16:25:37< 8:05:44] +[titan] 2025-10-05 14:59:57,327 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:59:59,514 - root - INFO - step: 26800 loss: 1.9889 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:59:59,514 - root - INFO - lr: 1.6249e-05 gnorm: 1.12 [16:25:48< 8:05:32] +[titan] 2025-10-05 15:00:10,404 - root - INFO - step: 26805 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 15:00:10,405 - root - INFO - lr: 1.6242e-05 gnorm: 1.09 [16:25:59< 8:05:21] +[titan] 2025-10-05 15:00:21,298 - root - INFO - step: 26810 loss: 2.0441 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8079 +[titan] 2025-10-05 15:00:21,298 - root - INFO - lr: 1.6234e-05 gnorm: 1.09 [16:26:10< 8:05:10] +[titan] 2025-10-05 15:00:32,228 - root - INFO - step: 26815 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 15:00:32,228 - root - INFO - lr: 1.6226e-05 gnorm: 1.15 [16:26:21< 8:04:59] +[titan] 2025-10-05 15:00:43,121 - root - INFO - step: 26820 loss: 2.0556 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 15:00:43,121 - root - INFO - lr: 1.6219e-05 gnorm: 1.10 [16:26:32< 8:04:48] +[titan] 2025-10-05 15:00:54,008 - root - INFO - step: 26825 loss: 2.0473 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8094 +[titan] 2025-10-05 15:00:54,008 - root - INFO - lr: 1.6211e-05 gnorm: 1.16 [16:26:43< 8:04:37] +[titan] 2025-10-05 15:01:04,889 - root - INFO - step: 26830 loss: 2.0024 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 15:01:04,889 - root - INFO - lr: 1.6203e-05 gnorm: 1.09 [16:26:54< 8:04:26] +[titan] 2025-10-05 15:01:15,765 - root - INFO - step: 26835 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 15:01:15,765 - root - INFO - lr: 1.6195e-05 gnorm: 1.07 [16:27:05< 8:04:15] +[titan] 2025-10-05 15:01:26,630 - root - INFO - step: 26840 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8050 +[titan] 2025-10-05 15:01:26,631 - root - INFO - lr: 1.6188e-05 gnorm: 1.11 [16:27:15< 8:04:04] +[titan] 2025-10-05 15:01:37,602 - root - INFO - step: 26845 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:01:37,602 - root - INFO - lr: 1.6180e-05 gnorm: 1.12 [16:27:26< 8:03:53] +[titan] 2025-10-05 15:01:46,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:01:48,489 - root - INFO - step: 26850 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 15:01:48,490 - root - INFO - lr: 1.6172e-05 gnorm: 1.08 [16:27:37< 8:03:41] +[titan] 2025-10-05 15:01:59,381 - root - INFO - step: 26855 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 15:01:59,382 - root - INFO - lr: 1.6165e-05 gnorm: 1.07 [16:27:48< 8:03:30] +[titan] 2025-10-05 15:02:10,248 - root - INFO - step: 26860 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:02:10,248 - root - INFO - lr: 1.6157e-05 gnorm: 1.10 [16:27:59< 8:03:19] +[titan] 2025-10-05 15:02:21,138 - root - INFO - step: 26865 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 15:02:21,139 - root - INFO - lr: 1.6149e-05 gnorm: 1.12 [16:28:10< 8:03:08] +[titan] 2025-10-05 15:02:32,019 - root - INFO - step: 26870 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 15:02:32,019 - root - INFO - lr: 1.6141e-05 gnorm: 1.07 [16:28:21< 8:02:57] +[titan] 2025-10-05 15:02:42,942 - root - INFO - step: 26875 loss: 2.0517 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 15:02:42,942 - root - INFO - lr: 1.6134e-05 gnorm: 1.12 [16:28:32< 8:02:46] +[titan] 2025-10-05 15:02:53,877 - root - INFO - step: 26880 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 15:02:53,877 - root - INFO - lr: 1.6126e-05 gnorm: 1.13 [16:28:43< 8:02:35] +[titan] 2025-10-05 15:03:04,754 - root - INFO - step: 26885 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 15:03:04,754 - root - INFO - lr: 1.6118e-05 gnorm: 1.16 [16:28:54< 8:02:24] +[titan] 2025-10-05 15:03:15,633 - root - INFO - step: 26890 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:03:15,633 - root - INFO - lr: 1.6111e-05 gnorm: 1.11 [16:29:04< 8:02:13] +[titan] 2025-10-05 15:03:26,500 - root - INFO - step: 26895 loss: 2.0231 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 15:03:26,500 - root - INFO - lr: 1.6103e-05 gnorm: 1.12 [16:29:15< 8:02:02] +[titan] 2025-10-05 15:03:35,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:03:37,367 - root - INFO - step: 26900 loss: 2.0325 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 15:03:37,368 - root - INFO - lr: 1.6095e-05 gnorm: 1.11 [16:29:26< 8:01:50] +[titan] 2025-10-05 15:03:48,289 - root - INFO - step: 26905 loss: 2.0322 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7960 +[titan] 2025-10-05 15:03:48,289 - root - INFO - lr: 1.6088e-05 gnorm: 1.12 [16:29:37< 8:01:39] +[titan] 2025-10-05 15:03:59,203 - root - INFO - step: 26910 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 15:03:59,203 - root - INFO - lr: 1.6080e-05 gnorm: 1.17 [16:29:48< 8:01:28] +[titan] 2025-10-05 15:04:10,072 - root - INFO - step: 26915 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 15:04:10,072 - root - INFO - lr: 1.6072e-05 gnorm: 1.08 [16:29:59< 8:01:17] +[titan] 2025-10-05 15:04:20,947 - root - INFO - step: 26920 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8313 +[titan] 2025-10-05 15:04:20,948 - root - INFO - lr: 1.6065e-05 gnorm: 1.11 [16:30:10< 8:01:06] +[titan] 2025-10-05 15:04:31,818 - root - INFO - step: 26925 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:04:31,818 - root - INFO - lr: 1.6057e-05 gnorm: 1.10 [16:30:21< 8:00:55] +[titan] 2025-10-05 15:04:42,737 - root - INFO - step: 26930 loss: 1.9755 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 15:04:42,737 - root - INFO - lr: 1.6049e-05 gnorm: 1.07 [16:30:32< 8:00:44] +[titan] 2025-10-05 15:04:53,614 - root - INFO - step: 26935 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 15:04:53,614 - root - INFO - lr: 1.6041e-05 gnorm: 1.10 [16:30:42< 8:00:33] +[titan] 2025-10-05 15:05:04,493 - root - INFO - step: 26940 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 15:05:04,493 - root - INFO - lr: 1.6034e-05 gnorm: 1.16 [16:30:53< 8:00:22] +[titan] 2025-10-05 15:05:15,413 - root - INFO - step: 26945 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 15:05:15,413 - root - INFO - lr: 1.6026e-05 gnorm: 1.09 [16:31:04< 8:00:10] +[titan] 2025-10-05 15:05:24,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:05:26,301 - root - INFO - step: 26950 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:05:26,302 - root - INFO - lr: 1.6018e-05 gnorm: 1.13 [16:31:15< 7:59:59] +[titan] 2025-10-05 15:05:37,170 - root - INFO - step: 26955 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:05:37,170 - root - INFO - lr: 1.6011e-05 gnorm: 1.07 [16:31:26< 7:59:48] +[titan] 2025-10-05 15:05:48,097 - root - INFO - step: 26960 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 15:05:48,097 - root - INFO - lr: 1.6003e-05 gnorm: 1.11 [16:31:37< 7:59:37] +[titan] 2025-10-05 15:05:58,956 - root - INFO - step: 26965 loss: 2.0670 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 15:05:58,956 - root - INFO - lr: 1.5995e-05 gnorm: 1.13 [16:31:48< 7:59:26] +[titan] 2025-10-05 15:06:09,830 - root - INFO - step: 26970 loss: 1.9712 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 15:06:09,830 - root - INFO - lr: 1.5988e-05 gnorm: 1.09 [16:31:59< 7:59:15] +[titan] 2025-10-05 15:06:20,738 - root - INFO - step: 26975 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 15:06:20,738 - root - INFO - lr: 1.5980e-05 gnorm: 1.14 [16:32:10< 7:59:04] +[titan] 2025-10-05 15:06:31,607 - root - INFO - step: 26980 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 15:06:31,607 - root - INFO - lr: 1.5972e-05 gnorm: 1.13 [16:32:20< 7:58:53] +[titan] 2025-10-05 15:06:42,557 - root - INFO - step: 26985 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.18 mfu: 41.98% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8708 +[titan] 2025-10-05 15:06:42,557 - root - INFO - lr: 1.5965e-05 gnorm: 1.13 [16:32:31< 7:58:42] +[titan] 2025-10-05 15:06:53,430 - root - INFO - step: 26990 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:06:53,431 - root - INFO - lr: 1.5957e-05 gnorm: 1.11 [16:32:42< 7:58:31] +[titan] 2025-10-05 15:07:04,312 - root - INFO - step: 26995 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 15:07:04,312 - root - INFO - lr: 1.5949e-05 gnorm: 1.11 [16:32:53< 7:58:19] +[titan] 2025-10-05 15:07:13,022 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:07:15,207 - root - INFO - step: 27000 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 15:07:15,207 - root - INFO - lr: 1.5942e-05 gnorm: 1.13 [16:33:04< 7:58:08] +[titan] 2025-10-05 15:07:26,138 - root - INFO - step: 27005 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:07:26,138 - root - INFO - lr: 1.5934e-05 gnorm: 1.14 [16:33:15< 7:57:57] +[titan] 2025-10-05 15:07:37,028 - root - INFO - step: 27010 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 15:07:37,028 - root - INFO - lr: 1.5926e-05 gnorm: 1.14 [16:33:26< 7:57:46] +[titan] 2025-10-05 15:07:47,970 - root - INFO - step: 27015 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 15:07:47,971 - root - INFO - lr: 1.5919e-05 gnorm: 1.12 [16:33:37< 7:57:35] +[titan] 2025-10-05 15:07:58,854 - root - INFO - step: 27020 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7853 +[titan] 2025-10-05 15:07:58,855 - root - INFO - lr: 1.5911e-05 gnorm: 1.15 [16:33:48< 7:57:24] +[titan] 2025-10-05 15:08:09,736 - root - INFO - step: 27025 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:08:09,736 - root - INFO - lr: 1.5903e-05 gnorm: 1.12 [16:33:59< 7:57:13] +[titan] 2025-10-05 15:08:20,606 - root - INFO - step: 27030 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 15:08:20,606 - root - INFO - lr: 1.5896e-05 gnorm: 1.08 [16:34:09< 7:57:02] +[titan] 2025-10-05 15:08:31,489 - root - INFO - step: 27035 loss: 1.9763 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 15:08:31,490 - root - INFO - lr: 1.5888e-05 gnorm: 1.09 [16:34:20< 7:56:51] +[titan] 2025-10-05 15:08:42,436 - root - INFO - step: 27040 loss: 2.0880 memory: 118.84GiB(85.28%) tps: 29,936 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 15:08:42,436 - root - INFO - lr: 1.5881e-05 gnorm: 1.12 [16:34:31< 7:56:40] +[titan] 2025-10-05 15:08:53,408 - root - INFO - step: 27045 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 15:08:53,409 - root - INFO - lr: 1.5873e-05 gnorm: 1.16 [16:34:42< 7:56:28] +[titan] 2025-10-05 15:09:02,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:09:04,301 - root - INFO - step: 27050 loss: 2.0295 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7940 +[titan] 2025-10-05 15:09:04,301 - root - INFO - lr: 1.5865e-05 gnorm: 1.11 [16:34:53< 7:56:17] +[titan] 2025-10-05 15:09:15,193 - root - INFO - step: 27055 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 15:09:15,194 - root - INFO - lr: 1.5858e-05 gnorm: 1.10 [16:35:04< 7:56:06] +[titan] 2025-10-05 15:09:26,100 - root - INFO - step: 27060 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8091 +[titan] 2025-10-05 15:09:26,100 - root - INFO - lr: 1.5850e-05 gnorm: 1.12 [16:35:15< 7:55:55] +[titan] 2025-10-05 15:09:36,976 - root - INFO - step: 27065 loss: 1.9733 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7439 +[titan] 2025-10-05 15:09:36,976 - root - INFO - lr: 1.5842e-05 gnorm: 1.09 [16:35:26< 7:55:44] +[titan] 2025-10-05 15:09:47,942 - root - INFO - step: 27070 loss: 2.0633 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 15:09:47,942 - root - INFO - lr: 1.5835e-05 gnorm: 1.09 [16:35:37< 7:55:33] +[titan] 2025-10-05 15:09:58,812 - root - INFO - step: 27075 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 15:09:58,812 - root - INFO - lr: 1.5827e-05 gnorm: 1.08 [16:35:48< 7:55:22] +[titan] 2025-10-05 15:10:09,685 - root - INFO - step: 27080 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 15:10:09,685 - root - INFO - lr: 1.5819e-05 gnorm: 1.09 [16:35:58< 7:55:11] +[titan] 2025-10-05 15:10:20,555 - root - INFO - step: 27085 loss: 2.0147 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 15:10:20,556 - root - INFO - lr: 1.5812e-05 gnorm: 1.07 [16:36:09< 7:55:00] +[titan] 2025-10-05 15:10:31,449 - root - INFO - step: 27090 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 15:10:31,449 - root - INFO - lr: 1.5804e-05 gnorm: 1.06 [16:36:20< 7:54:49] +[titan] 2025-10-05 15:10:42,317 - root - INFO - step: 27095 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:10:42,317 - root - INFO - lr: 1.5797e-05 gnorm: 1.08 [16:36:31< 7:54:37] +[titan] 2025-10-05 15:10:51,064 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:10:53,251 - root - INFO - step: 27100 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:10:53,252 - root - INFO - lr: 1.5789e-05 gnorm: 1.15 [16:36:42< 7:54:26] +[titan] 2025-10-05 15:11:04,173 - root - INFO - step: 27105 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 15:11:04,174 - root - INFO - lr: 1.5781e-05 gnorm: 1.12 [16:36:53< 7:54:15] +[titan] 2025-10-05 15:11:15,060 - root - INFO - step: 27110 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:11:15,060 - root - INFO - lr: 1.5774e-05 gnorm: 1.15 [16:37:04< 7:54:04] +[titan] 2025-10-05 15:11:25,971 - root - INFO - step: 27115 loss: 2.0649 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:11:25,971 - root - INFO - lr: 1.5766e-05 gnorm: 1.12 [16:37:15< 7:53:53] +[titan] 2025-10-05 15:11:36,858 - root - INFO - step: 27120 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 15:11:36,859 - root - INFO - lr: 1.5759e-05 gnorm: 1.09 [16:37:26< 7:53:42] +[titan] 2025-10-05 15:11:47,776 - root - INFO - step: 27125 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7663 +[titan] 2025-10-05 15:11:47,777 - root - INFO - lr: 1.5751e-05 gnorm: 1.08 [16:37:37< 7:53:31] +[titan] 2025-10-05 15:11:58,636 - root - INFO - step: 27130 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:11:58,636 - root - INFO - lr: 1.5743e-05 gnorm: 1.13 [16:37:47< 7:53:20] +[titan] 2025-10-05 15:12:09,626 - root - INFO - step: 27135 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 29,816 tflops: 413.65 mfu: 41.83% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 15:12:09,626 - root - INFO - lr: 1.5736e-05 gnorm: 1.11 [16:37:58< 7:53:09] +[titan] 2025-10-05 15:12:11,990 - root - INFO - Dumping profiler traces at step 27136 +[titan] 2025-10-05 15:12:12,029 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:12:20,793 - root - INFO - step: 27140 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 29,344 tflops: 407.10 mfu: 41.16% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 15:12:20,794 - root - INFO - lr: 1.5728e-05 gnorm: 1.11 [16:38:10< 7:52:58] +[titan] 2025-10-05 15:12:31,651 - root - INFO - step: 27145 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 15:12:31,651 - root - INFO - lr: 1.5720e-05 gnorm: 1.09 [16:38:20< 7:52:47] +[titan] 2025-10-05 15:12:40,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:12:42,534 - root - INFO - step: 27150 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7607 +[titan] 2025-10-05 15:12:42,534 - root - INFO - lr: 1.5713e-05 gnorm: 1.06 [16:38:31< 7:52:36] +[titan] 2025-10-05 15:12:53,435 - root - INFO - step: 27155 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:12:53,435 - root - INFO - lr: 1.5705e-05 gnorm: 1.08 [16:38:42< 7:52:24] +[titan] 2025-10-05 15:13:04,284 - root - INFO - step: 27160 loss: 2.0466 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8071 +[titan] 2025-10-05 15:13:04,284 - root - INFO - lr: 1.5698e-05 gnorm: 1.11 [16:38:53< 7:52:13] +[titan] 2025-10-05 15:13:15,182 - root - INFO - step: 27165 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 15:13:15,182 - root - INFO - lr: 1.5690e-05 gnorm: 1.08 [16:39:04< 7:52:02] +[titan] 2025-10-05 15:13:26,046 - root - INFO - step: 27170 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 15:13:26,046 - root - INFO - lr: 1.5682e-05 gnorm: 1.08 [16:39:15< 7:51:51] +[titan] 2025-10-05 15:13:36,900 - root - INFO - step: 27175 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7416 +[titan] 2025-10-05 15:13:36,900 - root - INFO - lr: 1.5675e-05 gnorm: 1.10 [16:39:26< 7:51:40] +[titan] 2025-10-05 15:13:47,796 - root - INFO - step: 27180 loss: 2.1244 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8768 +[titan] 2025-10-05 15:13:47,796 - root - INFO - lr: 1.5667e-05 gnorm: 1.09 [16:39:37< 7:51:29] +[titan] 2025-10-05 15:13:58,664 - root - INFO - step: 27185 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:13:58,664 - root - INFO - lr: 1.5660e-05 gnorm: 1.10 [16:39:47< 7:51:18] +[titan] 2025-10-05 15:14:09,527 - root - INFO - step: 27190 loss: 2.0164 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:14:09,527 - root - INFO - lr: 1.5652e-05 gnorm: 1.09 [16:39:58< 7:51:07] +[titan] 2025-10-05 15:14:20,387 - root - INFO - step: 27195 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:14:20,387 - root - INFO - lr: 1.5645e-05 gnorm: 1.07 [16:40:09< 7:50:56] +[titan] 2025-10-05 15:14:29,129 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:14:31,314 - root - INFO - step: 27200 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 15:14:31,314 - root - INFO - lr: 1.5637e-05 gnorm: 1.10 [16:40:20< 7:50:44] +[titan] 2025-10-05 15:14:42,185 - root - INFO - step: 27205 loss: 2.0377 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:14:42,186 - root - INFO - lr: 1.5629e-05 gnorm: 1.08 [16:40:31< 7:50:33] +[titan] 2025-10-05 15:14:53,065 - root - INFO - step: 27210 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 15:14:53,065 - root - INFO - lr: 1.5622e-05 gnorm: 1.12 [16:40:42< 7:50:22] +[titan] 2025-10-05 15:15:03,941 - root - INFO - step: 27215 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 15:15:03,941 - root - INFO - lr: 1.5614e-05 gnorm: 1.11 [16:40:53< 7:50:11] +[titan] 2025-10-05 15:15:14,801 - root - INFO - step: 27220 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 15:15:14,801 - root - INFO - lr: 1.5607e-05 gnorm: 1.10 [16:41:04< 7:50:00] +[titan] 2025-10-05 15:15:25,653 - root - INFO - step: 27225 loss: 1.9878 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:15:25,653 - root - INFO - lr: 1.5599e-05 gnorm: 1.09 [16:41:14< 7:49:49] +[titan] 2025-10-05 15:15:36,551 - root - INFO - step: 27230 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8280 +[titan] 2025-10-05 15:15:36,552 - root - INFO - lr: 1.5591e-05 gnorm: 1.13 [16:41:25< 7:49:38] +[titan] 2025-10-05 15:15:47,426 - root - INFO - step: 27235 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 15:15:47,426 - root - INFO - lr: 1.5584e-05 gnorm: 1.10 [16:41:36< 7:49:27] +[titan] 2025-10-05 15:15:58,353 - root - INFO - step: 27240 loss: 2.0437 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 15:15:58,354 - root - INFO - lr: 1.5576e-05 gnorm: 1.08 [16:41:47< 7:49:16] +[titan] 2025-10-05 15:16:09,201 - root - INFO - step: 27245 loss: 2.0207 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 15:16:09,202 - root - INFO - lr: 1.5569e-05 gnorm: 1.11 [16:41:58< 7:49:05] +[titan] 2025-10-05 15:16:17,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:16:20,072 - root - INFO - step: 27250 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7443 +[titan] 2025-10-05 15:16:20,072 - root - INFO - lr: 1.5561e-05 gnorm: 1.08 [16:42:09< 7:48:53] +[titan] 2025-10-05 15:16:30,931 - root - INFO - step: 27255 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:16:30,931 - root - INFO - lr: 1.5554e-05 gnorm: 1.12 [16:42:20< 7:48:42] +[titan] 2025-10-05 15:16:41,802 - root - INFO - step: 27260 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 15:16:41,803 - root - INFO - lr: 1.5546e-05 gnorm: 1.15 [16:42:31< 7:48:31] +[titan] 2025-10-05 15:16:52,730 - root - INFO - step: 27265 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 15:16:52,730 - root - INFO - lr: 1.5539e-05 gnorm: 1.08 [16:42:41< 7:48:20] +[titan] 2025-10-05 15:17:03,595 - root - INFO - step: 27270 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 15:17:03,596 - root - INFO - lr: 1.5531e-05 gnorm: 1.08 [16:42:52< 7:48:09] +[titan] 2025-10-05 15:17:14,444 - root - INFO - step: 27275 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 15:17:14,444 - root - INFO - lr: 1.5523e-05 gnorm: 1.11 [16:43:03< 7:47:58] +[titan] 2025-10-05 15:17:25,317 - root - INFO - step: 27280 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 15:17:25,317 - root - INFO - lr: 1.5516e-05 gnorm: 1.11 [16:43:14< 7:47:47] +[titan] 2025-10-05 15:17:36,180 - root - INFO - step: 27285 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 15:17:36,180 - root - INFO - lr: 1.5508e-05 gnorm: 1.10 [16:43:25< 7:47:36] +[titan] 2025-10-05 15:17:47,037 - root - INFO - step: 27290 loss: 2.0421 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 15:17:47,037 - root - INFO - lr: 1.5501e-05 gnorm: 1.10 [16:43:36< 7:47:25] +[titan] 2025-10-05 15:17:57,971 - root - INFO - step: 27295 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:17:57,971 - root - INFO - lr: 1.5493e-05 gnorm: 1.10 [16:43:47< 7:47:13] +[titan] 2025-10-05 15:18:06,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:18:08,853 - root - INFO - step: 27300 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 15:18:08,854 - root - INFO - lr: 1.5486e-05 gnorm: 1.09 [16:43:58< 7:47:02] +[titan] 2025-10-05 15:18:19,735 - root - INFO - step: 27305 loss: 2.0092 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 15:18:19,735 - root - INFO - lr: 1.5478e-05 gnorm: 1.11 [16:44:08< 7:46:51] +[titan] 2025-10-05 15:18:30,612 - root - INFO - step: 27310 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 15:18:30,612 - root - INFO - lr: 1.5471e-05 gnorm: 1.10 [16:44:19< 7:46:40] +[titan] 2025-10-05 15:18:41,512 - root - INFO - step: 27315 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 15:18:41,512 - root - INFO - lr: 1.5463e-05 gnorm: 1.10 [16:44:30< 7:46:29] +[titan] 2025-10-05 15:18:52,399 - root - INFO - step: 27320 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 15:18:52,399 - root - INFO - lr: 1.5455e-05 gnorm: 1.12 [16:44:41< 7:46:18] +[titan] 2025-10-05 15:19:03,317 - root - INFO - step: 27325 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 15:19:03,317 - root - INFO - lr: 1.5448e-05 gnorm: 1.13 [16:44:52< 7:46:07] +[titan] 2025-10-05 15:19:14,195 - root - INFO - step: 27330 loss: 2.1168 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 15:19:14,195 - root - INFO - lr: 1.5440e-05 gnorm: 1.12 [16:45:03< 7:45:56] +[titan] 2025-10-05 15:19:25,053 - root - INFO - step: 27335 loss: 2.0622 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8236 +[titan] 2025-10-05 15:19:25,053 - root - INFO - lr: 1.5433e-05 gnorm: 1.14 [16:45:14< 7:45:45] +[titan] 2025-10-05 15:19:35,914 - root - INFO - step: 27340 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:19:35,914 - root - INFO - lr: 1.5425e-05 gnorm: 1.09 [16:45:25< 7:45:34] +[titan] 2025-10-05 15:19:46,795 - root - INFO - step: 27345 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8129 +[titan] 2025-10-05 15:19:46,796 - root - INFO - lr: 1.5418e-05 gnorm: 1.14 [16:45:36< 7:45:22] +[titan] 2025-10-05 15:19:55,519 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:19:57,711 - root - INFO - step: 27350 loss: 2.0646 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:19:57,711 - root - INFO - lr: 1.5410e-05 gnorm: 1.12 [16:45:46< 7:45:11] +[titan] 2025-10-05 15:20:08,574 - root - INFO - step: 27355 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:20:08,574 - root - INFO - lr: 1.5403e-05 gnorm: 1.10 [16:45:57< 7:45:00] +[titan] 2025-10-05 15:20:19,517 - root - INFO - step: 27360 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8070 +[titan] 2025-10-05 15:20:19,518 - root - INFO - lr: 1.5395e-05 gnorm: 1.10 [16:46:08< 7:44:49] +[titan] 2025-10-05 15:20:30,400 - root - INFO - step: 27365 loss: 2.0266 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 15:20:30,401 - root - INFO - lr: 1.5388e-05 gnorm: 1.10 [16:46:19< 7:44:38] +[titan] 2025-10-05 15:20:41,257 - root - INFO - step: 27370 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 15:20:41,257 - root - INFO - lr: 1.5380e-05 gnorm: 1.09 [16:46:30< 7:44:27] +[titan] 2025-10-05 15:20:52,147 - root - INFO - step: 27375 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8366 +[titan] 2025-10-05 15:20:52,147 - root - INFO - lr: 1.5373e-05 gnorm: 1.09 [16:46:41< 7:44:16] +[titan] 2025-10-05 15:21:03,049 - root - INFO - step: 27380 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 15:21:03,049 - root - INFO - lr: 1.5365e-05 gnorm: 1.10 [16:46:52< 7:44:05] +[titan] 2025-10-05 15:21:13,927 - root - INFO - step: 27385 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 15:21:13,928 - root - INFO - lr: 1.5358e-05 gnorm: 1.10 [16:47:03< 7:43:54] +[titan] 2025-10-05 15:21:24,850 - root - INFO - step: 27390 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:21:24,850 - root - INFO - lr: 1.5350e-05 gnorm: 1.13 [16:47:14< 7:43:43] +[titan] 2025-10-05 15:21:35,739 - root - INFO - step: 27395 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 15:21:35,740 - root - INFO - lr: 1.5343e-05 gnorm: 1.13 [16:47:24< 7:43:31] +[titan] 2025-10-05 15:21:44,436 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:21:46,630 - root - INFO - step: 27400 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:21:46,630 - root - INFO - lr: 1.5335e-05 gnorm: 1.08 [16:47:35< 7:43:20] +[titan] 2025-10-05 15:21:57,526 - root - INFO - step: 27405 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7972 +[titan] 2025-10-05 15:21:57,526 - root - INFO - lr: 1.5328e-05 gnorm: 1.12 [16:47:46< 7:43:09] +[titan] 2025-10-05 15:22:08,415 - root - INFO - step: 27410 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7583 +[titan] 2025-10-05 15:22:08,415 - root - INFO - lr: 1.5320e-05 gnorm: 1.07 [16:47:57< 7:42:58] +[titan] 2025-10-05 15:22:19,305 - root - INFO - step: 27415 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 15:22:19,305 - root - INFO - lr: 1.5313e-05 gnorm: 1.12 [16:48:08< 7:42:47] +[titan] 2025-10-05 15:22:30,195 - root - INFO - step: 27420 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8052 +[titan] 2025-10-05 15:22:30,196 - root - INFO - lr: 1.5305e-05 gnorm: 1.11 [16:48:19< 7:42:36] +[titan] 2025-10-05 15:22:41,161 - root - INFO - step: 27425 loss: 2.0339 memory: 118.84GiB(85.28%) tps: 29,884 tflops: 414.60 mfu: 41.92% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 15:22:41,161 - root - INFO - lr: 1.5298e-05 gnorm: 1.12 [16:48:30< 7:42:25] +[titan] 2025-10-05 15:22:52,043 - root - INFO - step: 27430 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 15:22:52,043 - root - INFO - lr: 1.5290e-05 gnorm: 1.10 [16:48:41< 7:42:14] +[titan] 2025-10-05 15:23:02,939 - root - INFO - step: 27435 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 15:23:02,939 - root - INFO - lr: 1.5283e-05 gnorm: 1.12 [16:48:52< 7:42:03] +[titan] 2025-10-05 15:23:13,837 - root - INFO - step: 27440 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 15:23:13,837 - root - INFO - lr: 1.5275e-05 gnorm: 1.12 [16:49:03< 7:41:52] +[titan] 2025-10-05 15:23:24,732 - root - INFO - step: 27445 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 15:23:24,733 - root - INFO - lr: 1.5268e-05 gnorm: 1.10 [16:49:13< 7:41:41] +[titan] 2025-10-05 15:23:33,428 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:23:35,615 - root - INFO - step: 27450 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8394 +[titan] 2025-10-05 15:23:35,615 - root - INFO - lr: 1.5260e-05 gnorm: 1.12 [16:49:24< 7:41:29] +[titan] 2025-10-05 15:23:46,565 - root - INFO - step: 27455 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.19 mfu: 41.98% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 15:23:46,565 - root - INFO - lr: 1.5253e-05 gnorm: 1.14 [16:49:35< 7:41:18] +[titan] 2025-10-05 15:23:57,475 - root - INFO - step: 27460 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 15:23:57,475 - root - INFO - lr: 1.5245e-05 gnorm: 1.11 [16:49:46< 7:41:07] +[titan] 2025-10-05 15:24:08,327 - root - INFO - step: 27465 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 15:24:08,327 - root - INFO - lr: 1.5238e-05 gnorm: 1.11 [16:49:57< 7:40:56] +[titan] 2025-10-05 15:24:19,201 - root - INFO - step: 27470 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:24:19,201 - root - INFO - lr: 1.5230e-05 gnorm: 1.08 [16:50:08< 7:40:45] +[titan] 2025-10-05 15:24:30,080 - root - INFO - step: 27475 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:24:30,080 - root - INFO - lr: 1.5223e-05 gnorm: 1.09 [16:50:19< 7:40:34] +[titan] 2025-10-05 15:24:40,967 - root - INFO - step: 27480 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8213 +[titan] 2025-10-05 15:24:40,967 - root - INFO - lr: 1.5215e-05 gnorm: 1.10 [16:50:30< 7:40:23] +[titan] 2025-10-05 15:24:51,863 - root - INFO - step: 27485 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 15:24:51,863 - root - INFO - lr: 1.5208e-05 gnorm: 1.11 [16:50:41< 7:40:12] +[titan] 2025-10-05 15:25:02,777 - root - INFO - step: 27490 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8587 +[titan] 2025-10-05 15:25:02,778 - root - INFO - lr: 1.5200e-05 gnorm: 1.16 [16:50:52< 7:40:01] +[titan] 2025-10-05 15:25:13,681 - root - INFO - step: 27495 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:25:13,681 - root - INFO - lr: 1.5193e-05 gnorm: 1.12 [16:51:02< 7:39:50] +[titan] 2025-10-05 15:25:22,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:25:24,569 - root - INFO - step: 27500 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 15:25:24,570 - root - INFO - lr: 1.5185e-05 gnorm: 1.09 [16:51:13< 7:39:39] +[titan] 2025-10-05 15:25:35,452 - root - INFO - step: 27505 loss: 2.0528 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:25:35,452 - root - INFO - lr: 1.5178e-05 gnorm: 1.14 [16:51:24< 7:39:27] +[titan] 2025-10-05 15:25:46,344 - root - INFO - step: 27510 loss: 2.2224 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 15:25:46,344 - root - INFO - lr: 1.5170e-05 gnorm: 7.42 [16:51:35< 7:39:16] +[titan] 2025-10-05 15:25:57,223 - root - INFO - step: 27515 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7946 +[titan] 2025-10-05 15:25:57,223 - root - INFO - lr: 1.5163e-05 gnorm: 1.13 [16:51:46< 7:39:05] +[titan] 2025-10-05 15:26:08,195 - root - INFO - step: 27520 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 15:26:08,195 - root - INFO - lr: 1.5155e-05 gnorm: 1.07 [16:51:57< 7:38:54] +[titan] 2025-10-05 15:26:19,076 - root - INFO - step: 27525 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 15:26:19,076 - root - INFO - lr: 1.5148e-05 gnorm: 1.13 [16:52:08< 7:38:43] +[titan] 2025-10-05 15:26:29,960 - root - INFO - step: 27530 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7786 +[titan] 2025-10-05 15:26:29,960 - root - INFO - lr: 1.5141e-05 gnorm: 1.17 [16:52:19< 7:38:32] +[titan] 2025-10-05 15:26:40,847 - root - INFO - step: 27535 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 15:26:40,847 - root - INFO - lr: 1.5133e-05 gnorm: 1.09 [16:52:30< 7:38:21] +[titan] 2025-10-05 15:26:51,718 - root - INFO - step: 27540 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 15:26:51,718 - root - INFO - lr: 1.5126e-05 gnorm: 1.13 [16:52:40< 7:38:10] +[titan] 2025-10-05 15:27:02,650 - root - INFO - step: 27545 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 15:27:02,650 - root - INFO - lr: 1.5118e-05 gnorm: 1.12 [16:52:51< 7:37:59] +[titan] 2025-10-05 15:27:11,363 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:27:13,551 - root - INFO - step: 27550 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 15:27:13,551 - root - INFO - lr: 1.5111e-05 gnorm: 1.14 [16:53:02< 7:37:48] +[titan] 2025-10-05 15:27:24,430 - root - INFO - step: 27555 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:27:24,430 - root - INFO - lr: 1.5103e-05 gnorm: 1.10 [16:53:13< 7:37:36] +[titan] 2025-10-05 15:27:35,290 - root - INFO - step: 27560 loss: 2.0098 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7771 +[titan] 2025-10-05 15:27:35,290 - root - INFO - lr: 1.5096e-05 gnorm: 1.11 [16:53:24< 7:37:25] +[titan] 2025-10-05 15:27:46,162 - root - INFO - step: 27565 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 15:27:46,162 - root - INFO - lr: 1.5088e-05 gnorm: 1.14 [16:53:35< 7:37:14] +[titan] 2025-10-05 15:27:57,049 - root - INFO - step: 27570 loss: 2.0327 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7970 +[titan] 2025-10-05 15:27:57,049 - root - INFO - lr: 1.5081e-05 gnorm: 1.09 [16:53:46< 7:37:03] +[titan] 2025-10-05 15:28:07,972 - root - INFO - step: 27575 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8349 +[titan] 2025-10-05 15:28:07,972 - root - INFO - lr: 1.5074e-05 gnorm: 1.15 [16:53:57< 7:36:52] +[titan] 2025-10-05 15:28:18,854 - root - INFO - step: 27580 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 15:28:18,855 - root - INFO - lr: 1.5066e-05 gnorm: 1.16 [16:54:08< 7:36:41] +[titan] 2025-10-05 15:28:29,772 - root - INFO - step: 27585 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8547 +[titan] 2025-10-05 15:28:29,772 - root - INFO - lr: 1.5059e-05 gnorm: 1.13 [16:54:18< 7:36:30] +[titan] 2025-10-05 15:28:40,678 - root - INFO - step: 27590 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 15:28:40,678 - root - INFO - lr: 1.5051e-05 gnorm: 1.09 [16:54:29< 7:36:19] +[titan] 2025-10-05 15:28:51,534 - root - INFO - step: 27595 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8331 +[titan] 2025-10-05 15:28:51,534 - root - INFO - lr: 1.5044e-05 gnorm: 1.10 [16:54:40< 7:36:08] +[titan] 2025-10-05 15:29:00,205 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:29:02,428 - root - INFO - step: 27600 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7298 +[titan] 2025-10-05 15:29:02,429 - root - INFO - lr: 1.5036e-05 gnorm: 1.12 [16:54:51< 7:35:57] +[titan] 2025-10-05 15:29:13,305 - root - INFO - step: 27605 loss: 2.0663 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:29:13,305 - root - INFO - lr: 1.5029e-05 gnorm: 1.09 [16:55:02< 7:35:46] +[titan] 2025-10-05 15:29:24,170 - root - INFO - step: 27610 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 15:29:24,170 - root - INFO - lr: 1.5021e-05 gnorm: 1.06 [16:55:13< 7:35:34] +[titan] 2025-10-05 15:29:35,069 - root - INFO - step: 27615 loss: 1.9817 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 15:29:35,070 - root - INFO - lr: 1.5014e-05 gnorm: 1.13 [16:55:24< 7:35:23] +[titan] 2025-10-05 15:29:45,908 - root - INFO - step: 27620 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 15:29:45,908 - root - INFO - lr: 1.5007e-05 gnorm: 1.14 [16:55:35< 7:35:12] +[titan] 2025-10-05 15:29:56,752 - root - INFO - step: 27625 loss: 2.0304 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7949 +[titan] 2025-10-05 15:29:56,752 - root - INFO - lr: 1.4999e-05 gnorm: 1.14 [16:55:45< 7:35:01] +[titan] 2025-10-05 15:30:07,651 - root - INFO - step: 27630 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 15:30:07,652 - root - INFO - lr: 1.4992e-05 gnorm: 1.10 [16:55:56< 7:34:50] +[titan] 2025-10-05 15:30:18,514 - root - INFO - step: 27635 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8163 +[titan] 2025-10-05 15:30:18,514 - root - INFO - lr: 1.4984e-05 gnorm: 1.11 [16:56:07< 7:34:39] +[titan] 2025-10-05 15:30:29,382 - root - INFO - step: 27640 loss: 2.0889 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 15:30:29,382 - root - INFO - lr: 1.4977e-05 gnorm: 1.09 [16:56:18< 7:34:28] +[titan] 2025-10-05 15:30:40,353 - root - INFO - step: 27645 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7813 +[titan] 2025-10-05 15:30:40,354 - root - INFO - lr: 1.4970e-05 gnorm: 1.12 [16:56:29< 7:34:17] +[titan] 2025-10-05 15:30:47,046 - root - INFO - Dumping profiler traces at step 27648 +[titan] 2025-10-05 15:30:47,083 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:30:49,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:30:51,455 - root - INFO - step: 27650 loss: 2.0385 memory: 118.84GiB(85.28%) tps: 29,516 tflops: 409.49 mfu: 41.40% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:30:51,456 - root - INFO - lr: 1.4962e-05 gnorm: 1.14 [16:56:40< 7:34:06] +[titan] 2025-10-05 15:31:02,322 - root - INFO - step: 27655 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 15:31:02,323 - root - INFO - lr: 1.4955e-05 gnorm: 1.07 [16:56:51< 7:33:55] +[titan] 2025-10-05 15:31:13,179 - root - INFO - step: 27660 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:31:13,179 - root - INFO - lr: 1.4947e-05 gnorm: 1.10 [16:57:02< 7:33:44] +[titan] 2025-10-05 15:31:24,033 - root - INFO - step: 27665 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 15:31:24,033 - root - INFO - lr: 1.4940e-05 gnorm: 1.09 [16:57:13< 7:33:32] +[titan] 2025-10-05 15:31:34,896 - root - INFO - step: 27670 loss: 2.0761 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 15:31:34,896 - root - INFO - lr: 1.4933e-05 gnorm: 1.11 [16:57:24< 7:33:21] +[titan] 2025-10-05 15:31:45,721 - root - INFO - step: 27675 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 15:31:45,721 - root - INFO - lr: 1.4925e-05 gnorm: 1.11 [16:57:34< 7:33:10] +[titan] 2025-10-05 15:31:56,620 - root - INFO - step: 27680 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7791 +[titan] 2025-10-05 15:31:56,620 - root - INFO - lr: 1.4918e-05 gnorm: 1.10 [16:57:45< 7:32:59] +[titan] 2025-10-05 15:32:07,525 - root - INFO - step: 27685 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 15:32:07,525 - root - INFO - lr: 1.4910e-05 gnorm: 1.10 [16:57:56< 7:32:48] +[titan] 2025-10-05 15:32:18,367 - root - INFO - step: 27690 loss: 2.0707 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 15:32:18,367 - root - INFO - lr: 1.4903e-05 gnorm: 1.09 [16:58:07< 7:32:37] +[titan] 2025-10-05 15:32:29,238 - root - INFO - step: 27695 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 15:32:29,238 - root - INFO - lr: 1.4896e-05 gnorm: 1.13 [16:58:18< 7:32:26] +[titan] 2025-10-05 15:32:37,923 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:32:40,104 - root - INFO - step: 27700 loss: 2.0988 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 15:32:40,104 - root - INFO - lr: 1.4888e-05 gnorm: 1.12 [16:58:29< 7:32:15] +[titan] 2025-10-05 15:32:50,965 - root - INFO - step: 27705 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 15:32:50,965 - root - INFO - lr: 1.4881e-05 gnorm: 1.13 [16:58:40< 7:32:04] +[titan] 2025-10-05 15:33:01,872 - root - INFO - step: 27710 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 15:33:01,872 - root - INFO - lr: 1.4873e-05 gnorm: 1.13 [16:58:51< 7:31:52] +[titan] 2025-10-05 15:33:12,825 - root - INFO - step: 27715 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8034 +[titan] 2025-10-05 15:33:12,825 - root - INFO - lr: 1.4866e-05 gnorm: 1.12 [16:59:02< 7:31:41] +[titan] 2025-10-05 15:33:23,696 - root - INFO - step: 27720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 15:33:23,696 - root - INFO - lr: 1.4859e-05 gnorm: 1.13 [16:59:12< 7:31:30] +[titan] 2025-10-05 15:33:34,571 - root - INFO - step: 27725 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:33:34,571 - root - INFO - lr: 1.4851e-05 gnorm: 1.13 [16:59:23< 7:31:19] +[titan] 2025-10-05 15:33:45,446 - root - INFO - step: 27730 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 15:33:45,446 - root - INFO - lr: 1.4844e-05 gnorm: 1.07 [16:59:34< 7:31:08] +[titan] 2025-10-05 15:33:56,288 - root - INFO - step: 27735 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7851 +[titan] 2025-10-05 15:33:56,288 - root - INFO - lr: 1.4836e-05 gnorm: 1.13 [16:59:45< 7:30:57] +[titan] 2025-10-05 15:34:07,199 - root - INFO - step: 27740 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 15:34:07,199 - root - INFO - lr: 1.4829e-05 gnorm: 1.11 [16:59:56< 7:30:46] +[titan] 2025-10-05 15:34:18,088 - root - INFO - step: 27745 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:34:18,088 - root - INFO - lr: 1.4822e-05 gnorm: 1.14 [17:00:07< 7:30:35] +[titan] 2025-10-05 15:34:26,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:34:28,919 - root - INFO - step: 27750 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7660 +[titan] 2025-10-05 15:34:28,919 - root - INFO - lr: 1.4814e-05 gnorm: 1.10 [17:00:18< 7:30:24] +[titan] 2025-10-05 15:34:39,788 - root - INFO - step: 27755 loss: 2.0085 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:34:39,788 - root - INFO - lr: 1.4807e-05 gnorm: 1.13 [17:00:28< 7:30:13] +[titan] 2025-10-05 15:34:50,649 - root - INFO - step: 27760 loss: 2.0229 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7889 +[titan] 2025-10-05 15:34:50,649 - root - INFO - lr: 1.4800e-05 gnorm: 1.08 [17:00:39< 7:30:02] +[titan] 2025-10-05 15:35:01,518 - root - INFO - step: 27765 loss: 2.0372 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:35:01,518 - root - INFO - lr: 1.4792e-05 gnorm: 1.09 [17:00:50< 7:29:50] +[titan] 2025-10-05 15:35:12,435 - root - INFO - step: 27770 loss: 2.0491 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 15:35:12,435 - root - INFO - lr: 1.4785e-05 gnorm: 1.11 [17:01:01< 7:29:39] +[titan] 2025-10-05 15:35:23,343 - root - INFO - step: 27775 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:35:23,344 - root - INFO - lr: 1.4777e-05 gnorm: 1.10 [17:01:12< 7:29:28] +[titan] 2025-10-05 15:35:34,184 - root - INFO - step: 27780 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 15:35:34,184 - root - INFO - lr: 1.4770e-05 gnorm: 1.09 [17:01:23< 7:29:17] +[titan] 2025-10-05 15:35:45,050 - root - INFO - step: 27785 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 15:35:45,050 - root - INFO - lr: 1.4763e-05 gnorm: 1.08 [17:01:34< 7:29:06] +[titan] 2025-10-05 15:35:55,912 - root - INFO - step: 27790 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:35:55,912 - root - INFO - lr: 1.4755e-05 gnorm: 1.10 [17:01:45< 7:28:55] +[titan] 2025-10-05 15:36:06,779 - root - INFO - step: 27795 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7727 +[titan] 2025-10-05 15:36:06,780 - root - INFO - lr: 1.4748e-05 gnorm: 1.09 [17:01:55< 7:28:44] +[titan] 2025-10-05 15:36:15,502 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:36:17,685 - root - INFO - step: 27800 loss: 2.0545 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8152 +[titan] 2025-10-05 15:36:17,685 - root - INFO - lr: 1.4741e-05 gnorm: 1.11 [17:02:06< 7:28:33] +[titan] 2025-10-05 15:36:28,568 - root - INFO - step: 27805 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 15:36:28,568 - root - INFO - lr: 1.4733e-05 gnorm: 1.09 [17:02:17< 7:28:22] +[titan] 2025-10-05 15:36:39,435 - root - INFO - step: 27810 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 15:36:39,435 - root - INFO - lr: 1.4726e-05 gnorm: 1.12 [17:02:28< 7:28:11] +[titan] 2025-10-05 15:36:50,300 - root - INFO - step: 27815 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:36:50,300 - root - INFO - lr: 1.4719e-05 gnorm: 1.08 [17:02:39< 7:27:59] +[titan] 2025-10-05 15:37:01,162 - root - INFO - step: 27820 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 15:37:01,162 - root - INFO - lr: 1.4711e-05 gnorm: 1.10 [17:02:50< 7:27:48] +[titan] 2025-10-05 15:37:12,068 - root - INFO - step: 27825 loss: 2.0443 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8065 +[titan] 2025-10-05 15:37:12,068 - root - INFO - lr: 1.4704e-05 gnorm: 1.12 [17:03:01< 7:27:37] +[titan] 2025-10-05 15:37:22,924 - root - INFO - step: 27830 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:37:22,924 - root - INFO - lr: 1.4697e-05 gnorm: 1.12 [17:03:12< 7:27:26] +[titan] 2025-10-05 15:37:33,784 - root - INFO - step: 27835 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 15:37:33,784 - root - INFO - lr: 1.4689e-05 gnorm: 1.09 [17:03:22< 7:27:15] +[titan] 2025-10-05 15:37:44,693 - root - INFO - step: 27840 loss: 1.9926 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 15:37:44,694 - root - INFO - lr: 1.4682e-05 gnorm: 1.08 [17:03:33< 7:27:04] +[titan] 2025-10-05 15:37:55,562 - root - INFO - step: 27845 loss: 1.9782 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 15:37:55,562 - root - INFO - lr: 1.4675e-05 gnorm: 1.11 [17:03:44< 7:26:53] +[titan] 2025-10-05 15:38:04,219 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:38:06,403 - root - INFO - step: 27850 loss: 1.9362 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 15:38:06,403 - root - INFO - lr: 1.4667e-05 gnorm: 1.10 [17:03:55< 7:26:42] +[titan] 2025-10-05 15:38:17,311 - root - INFO - step: 27855 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 15:38:17,311 - root - INFO - lr: 1.4660e-05 gnorm: 1.11 [17:04:06< 7:26:31] +[titan] 2025-10-05 15:38:28,219 - root - INFO - step: 27860 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 15:38:28,219 - root - INFO - lr: 1.4653e-05 gnorm: 1.12 [17:04:17< 7:26:20] +[titan] 2025-10-05 15:38:39,092 - root - INFO - step: 27865 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 15:38:39,092 - root - INFO - lr: 1.4645e-05 gnorm: 1.10 [17:04:28< 7:26:08] +[titan] 2025-10-05 15:38:49,999 - root - INFO - step: 27870 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 15:38:49,999 - root - INFO - lr: 1.4638e-05 gnorm: 1.15 [17:04:39< 7:25:57] +[titan] 2025-10-05 15:39:00,879 - root - INFO - step: 27875 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:39:00,879 - root - INFO - lr: 1.4631e-05 gnorm: 1.10 [17:04:50< 7:25:46] +[titan] 2025-10-05 15:39:11,796 - root - INFO - step: 27880 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 15:39:11,796 - root - INFO - lr: 1.4623e-05 gnorm: 1.09 [17:05:00< 7:25:35] +[titan] 2025-10-05 15:39:22,647 - root - INFO - step: 27885 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8326 +[titan] 2025-10-05 15:39:22,647 - root - INFO - lr: 1.4616e-05 gnorm: 1.15 [17:05:11< 7:25:24] +[titan] 2025-10-05 15:39:33,473 - root - INFO - step: 27890 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 15:39:33,473 - root - INFO - lr: 1.4609e-05 gnorm: 1.11 [17:05:22< 7:25:13] +[titan] 2025-10-05 15:39:44,338 - root - INFO - step: 27895 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 15:39:44,338 - root - INFO - lr: 1.4601e-05 gnorm: 1.12 [17:05:33< 7:25:02] +[titan] 2025-10-05 15:39:53,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:39:55,213 - root - INFO - step: 27900 loss: 2.0345 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 15:39:55,214 - root - INFO - lr: 1.4594e-05 gnorm: 1.12 [17:05:44< 7:24:51] +[titan] 2025-10-05 15:40:06,088 - root - INFO - step: 27905 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:40:06,088 - root - INFO - lr: 1.4587e-05 gnorm: 1.11 [17:05:55< 7:24:40] +[titan] 2025-10-05 15:40:16,993 - root - INFO - step: 27910 loss: 2.0536 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8146 +[titan] 2025-10-05 15:40:16,993 - root - INFO - lr: 1.4579e-05 gnorm: 1.11 [17:06:06< 7:24:29] +[titan] 2025-10-05 15:40:27,815 - root - INFO - step: 27915 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 15:40:27,815 - root - INFO - lr: 1.4572e-05 gnorm: 1.11 [17:06:17< 7:24:18] +[titan] 2025-10-05 15:40:38,643 - root - INFO - step: 27920 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8155 +[titan] 2025-10-05 15:40:38,643 - root - INFO - lr: 1.4565e-05 gnorm: 1.07 [17:06:27< 7:24:06] +[titan] 2025-10-05 15:40:49,515 - root - INFO - step: 27925 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7489 +[titan] 2025-10-05 15:40:49,515 - root - INFO - lr: 1.4558e-05 gnorm: 1.15 [17:06:38< 7:23:55] +[titan] 2025-10-05 15:41:00,387 - root - INFO - step: 27930 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 15:41:00,388 - root - INFO - lr: 1.4550e-05 gnorm: 1.08 [17:06:49< 7:23:44] +[titan] 2025-10-05 15:41:11,313 - root - INFO - step: 27935 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 15:41:11,313 - root - INFO - lr: 1.4543e-05 gnorm: 1.11 [17:07:00< 7:23:33] +[titan] 2025-10-05 15:41:22,241 - root - INFO - step: 27940 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:41:22,241 - root - INFO - lr: 1.4536e-05 gnorm: 1.12 [17:07:11< 7:23:22] +[titan] 2025-10-05 15:41:33,099 - root - INFO - step: 27945 loss: 2.0587 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 15:41:33,100 - root - INFO - lr: 1.4528e-05 gnorm: 1.10 [17:07:22< 7:23:11] +[titan] 2025-10-05 15:41:41,785 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:41:43,968 - root - INFO - step: 27950 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:41:43,968 - root - INFO - lr: 1.4521e-05 gnorm: 1.11 [17:07:33< 7:23:00] +[titan] 2025-10-05 15:41:54,868 - root - INFO - step: 27955 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 15:41:54,868 - root - INFO - lr: 1.4514e-05 gnorm: 1.12 [17:07:44< 7:22:49] +[titan] 2025-10-05 15:42:05,736 - root - INFO - step: 27960 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:05,736 - root - INFO - lr: 1.4507e-05 gnorm: 1.10 [17:07:54< 7:22:38] +[titan] 2025-10-05 15:42:16,692 - root - INFO - step: 27965 loss: 1.9991 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:42:16,692 - root - INFO - lr: 1.4499e-05 gnorm: 1.12 [17:08:05< 7:22:27] +[titan] 2025-10-05 15:42:27,570 - root - INFO - step: 27970 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 15:42:27,570 - root - INFO - lr: 1.4492e-05 gnorm: 1.12 [17:08:16< 7:22:16] +[titan] 2025-10-05 15:42:38,440 - root - INFO - step: 27975 loss: 2.0135 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:38,440 - root - INFO - lr: 1.4485e-05 gnorm: 1.13 [17:08:27< 7:22:04] +[titan] 2025-10-05 15:42:49,330 - root - INFO - step: 27980 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 15:42:49,330 - root - INFO - lr: 1.4477e-05 gnorm: 1.12 [17:08:38< 7:21:53] +[titan] 2025-10-05 15:43:00,202 - root - INFO - step: 27985 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 15:43:00,202 - root - INFO - lr: 1.4470e-05 gnorm: 1.10 [17:08:49< 7:21:42] +[titan] 2025-10-05 15:43:11,083 - root - INFO - step: 27990 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:43:11,083 - root - INFO - lr: 1.4463e-05 gnorm: 1.11 [17:09:00< 7:21:31] +[titan] 2025-10-05 15:43:21,971 - root - INFO - step: 27995 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 15:43:21,972 - root - INFO - lr: 1.4456e-05 gnorm: 1.10 [17:09:11< 7:21:20] +[titan] 2025-10-05 15:43:30,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:43:32,877 - root - INFO - step: 28000 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 15:43:32,878 - root - INFO - lr: 1.4448e-05 gnorm: 1.08 [17:09:22< 7:21:09] +[titan] 2025-10-05 15:43:43,741 - root - INFO - step: 28005 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:43:43,741 - root - INFO - lr: 1.4441e-05 gnorm: 1.13 [17:09:32< 7:20:58] +[titan] 2025-10-05 15:43:54,582 - root - INFO - step: 28010 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:43:54,582 - root - INFO - lr: 1.4434e-05 gnorm: 1.11 [17:09:43< 7:20:47] +[titan] 2025-10-05 15:44:05,429 - root - INFO - step: 28015 loss: 2.0300 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 15:44:05,429 - root - INFO - lr: 1.4426e-05 gnorm: 1.11 [17:09:54< 7:20:36] +[titan] 2025-10-05 15:44:16,331 - root - INFO - step: 28020 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 15:44:16,331 - root - INFO - lr: 1.4419e-05 gnorm: 1.09 [17:10:05< 7:20:25] +[titan] 2025-10-05 15:44:27,186 - root - INFO - step: 28025 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 15:44:27,186 - root - INFO - lr: 1.4412e-05 gnorm: 1.08 [17:10:16< 7:20:13] +[titan] 2025-10-05 15:44:38,082 - root - INFO - step: 28030 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 15:44:38,082 - root - INFO - lr: 1.4405e-05 gnorm: 1.17 [17:10:27< 7:20:02] +[titan] 2025-10-05 15:44:48,943 - root - INFO - step: 28035 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 15:44:48,943 - root - INFO - lr: 1.4397e-05 gnorm: 1.12 [17:10:38< 7:19:51] +[titan] 2025-10-05 15:44:59,808 - root - INFO - step: 28040 loss: 2.0729 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8327 +[titan] 2025-10-05 15:44:59,808 - root - INFO - lr: 1.4390e-05 gnorm: 1.14 [17:10:48< 7:19:40] +[titan] 2025-10-05 15:45:10,680 - root - INFO - step: 28045 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 15:45:10,681 - root - INFO - lr: 1.4383e-05 gnorm: 1.11 [17:10:59< 7:19:29] +[titan] 2025-10-05 15:45:19,402 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:45:21,586 - root - INFO - step: 28050 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7741 +[titan] 2025-10-05 15:45:21,587 - root - INFO - lr: 1.4376e-05 gnorm: 1.06 [17:11:10< 7:19:18] +[titan] 2025-10-05 15:45:32,449 - root - INFO - step: 28055 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:45:32,449 - root - INFO - lr: 1.4368e-05 gnorm: 1.10 [17:11:21< 7:19:07] +[titan] 2025-10-05 15:45:43,299 - root - INFO - step: 28060 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:45:43,299 - root - INFO - lr: 1.4361e-05 gnorm: 1.14 [17:11:32< 7:18:56] +[titan] 2025-10-05 15:45:54,193 - root - INFO - step: 28065 loss: 2.0655 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 15:45:54,193 - root - INFO - lr: 1.4354e-05 gnorm: 1.10 [17:11:43< 7:18:45] +[titan] 2025-10-05 15:46:05,045 - root - INFO - step: 28070 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 15:46:05,045 - root - INFO - lr: 1.4347e-05 gnorm: 1.08 [17:11:54< 7:18:34] +[titan] 2025-10-05 15:46:15,889 - root - INFO - step: 28075 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 15:46:15,889 - root - INFO - lr: 1.4339e-05 gnorm: 1.09 [17:12:05< 7:18:22] +[titan] 2025-10-05 15:46:26,781 - root - INFO - step: 28080 loss: 1.9684 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 15:46:26,781 - root - INFO - lr: 1.4332e-05 gnorm: 1.10 [17:12:15< 7:18:11] +[titan] 2025-10-05 15:46:37,625 - root - INFO - step: 28085 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 15:46:37,625 - root - INFO - lr: 1.4325e-05 gnorm: 1.08 [17:12:26< 7:18:00] +[titan] 2025-10-05 15:46:48,485 - root - INFO - step: 28090 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:46:48,486 - root - INFO - lr: 1.4318e-05 gnorm: 1.09 [17:12:37< 7:17:49] +[titan] 2025-10-05 15:46:59,393 - root - INFO - step: 28095 loss: 1.9937 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 15:46:59,393 - root - INFO - lr: 1.4311e-05 gnorm: 1.12 [17:12:48< 7:17:38] +[titan] 2025-10-05 15:47:08,069 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:47:10,253 - root - INFO - step: 28100 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:47:10,253 - root - INFO - lr: 1.4303e-05 gnorm: 1.13 [17:12:59< 7:17:27] +[titan] 2025-10-05 15:47:21,183 - root - INFO - step: 28105 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 15:47:21,183 - root - INFO - lr: 1.4296e-05 gnorm: 1.14 [17:13:10< 7:17:16] +[titan] 2025-10-05 15:47:32,061 - root - INFO - step: 28110 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 15:47:32,061 - root - INFO - lr: 1.4289e-05 gnorm: 1.07 [17:13:21< 7:17:05] +[titan] 2025-10-05 15:47:42,930 - root - INFO - step: 28115 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:47:42,930 - root - INFO - lr: 1.4282e-05 gnorm: 1.11 [17:13:32< 7:16:54] +[titan] 2025-10-05 15:47:53,817 - root - INFO - step: 28120 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7942 +[titan] 2025-10-05 15:47:53,817 - root - INFO - lr: 1.4274e-05 gnorm: 1.09 [17:13:42< 7:16:43] +[titan] 2025-10-05 15:48:04,758 - root - INFO - step: 28125 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 15:48:04,758 - root - INFO - lr: 1.4267e-05 gnorm: 1.14 [17:13:53< 7:16:32] +[titan] 2025-10-05 15:48:15,645 - root - INFO - step: 28130 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 15:48:15,645 - root - INFO - lr: 1.4260e-05 gnorm: 1.09 [17:14:04< 7:16:21] +[titan] 2025-10-05 15:48:26,569 - root - INFO - step: 28135 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 15:48:26,569 - root - INFO - lr: 1.4253e-05 gnorm: 1.15 [17:14:15< 7:16:09] +[titan] 2025-10-05 15:48:37,430 - root - INFO - step: 28140 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 15:48:37,430 - root - INFO - lr: 1.4246e-05 gnorm: 1.13 [17:14:26< 7:15:58] +[titan] 2025-10-05 15:48:48,297 - root - INFO - step: 28145 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7912 +[titan] 2025-10-05 15:48:48,297 - root - INFO - lr: 1.4238e-05 gnorm: 1.13 [17:14:37< 7:15:47] +[titan] 2025-10-05 15:48:56,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:48:59,187 - root - INFO - step: 28150 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 15:48:59,187 - root - INFO - lr: 1.4231e-05 gnorm: 1.13 [17:14:48< 7:15:36] +[titan] 2025-10-05 15:49:10,034 - root - INFO - step: 28155 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 15:49:10,035 - root - INFO - lr: 1.4224e-05 gnorm: 1.11 [17:14:59< 7:15:25] +[titan] 2025-10-05 15:49:21,051 - root - INFO - step: 28160 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 29,745 tflops: 412.66 mfu: 41.73% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 15:49:21,051 - root - INFO - lr: 1.4217e-05 gnorm: 1.09 [17:15:10< 7:15:14] +[titan] 2025-10-05 15:49:21,234 - root - INFO - Dumping profiler traces at step 28160 +[titan] 2025-10-05 15:49:21,275 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:49:32,111 - root - INFO - step: 28165 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 29,630 tflops: 411.07 mfu: 41.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 15:49:32,111 - root - INFO - lr: 1.4210e-05 gnorm: 1.11 [17:15:21< 7:15:03] +[titan] 2025-10-05 15:49:42,959 - root - INFO - step: 28170 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:49:42,959 - root - INFO - lr: 1.4202e-05 gnorm: 1.08 [17:15:32< 7:14:52] +[titan] 2025-10-05 15:49:53,795 - root - INFO - step: 28175 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:49:53,795 - root - INFO - lr: 1.4195e-05 gnorm: 1.14 [17:15:42< 7:14:41] +[titan] 2025-10-05 15:50:04,659 - root - INFO - step: 28180 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7994 +[titan] 2025-10-05 15:50:04,659 - root - INFO - lr: 1.4188e-05 gnorm: 1.10 [17:15:53< 7:14:30] +[titan] 2025-10-05 15:50:15,529 - root - INFO - step: 28185 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 15:50:15,529 - root - INFO - lr: 1.4181e-05 gnorm: 1.09 [17:16:04< 7:14:19] +[titan] 2025-10-05 15:50:26,481 - root - INFO - step: 28190 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 15:50:26,482 - root - INFO - lr: 1.4174e-05 gnorm: 1.11 [17:16:15< 7:14:08] +[titan] 2025-10-05 15:50:37,355 - root - INFO - step: 28195 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 15:50:37,355 - root - INFO - lr: 1.4166e-05 gnorm: 1.06 [17:16:26< 7:13:56] +[titan] 2025-10-05 15:50:46,040 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:50:48,223 - root - INFO - step: 28200 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:50:48,223 - root - INFO - lr: 1.4159e-05 gnorm: 1.12 [17:16:37< 7:13:45] +[titan] 2025-10-05 15:50:59,085 - root - INFO - step: 28205 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 15:50:59,085 - root - INFO - lr: 1.4152e-05 gnorm: 1.12 [17:16:48< 7:13:34] +[titan] 2025-10-05 15:51:09,953 - root - INFO - step: 28210 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 15:51:09,953 - root - INFO - lr: 1.4145e-05 gnorm: 1.14 [17:16:59< 7:13:23] +[titan] 2025-10-05 15:51:20,822 - root - INFO - step: 28215 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:51:20,822 - root - INFO - lr: 1.4138e-05 gnorm: 1.10 [17:17:09< 7:13:12] +[titan] 2025-10-05 15:51:31,728 - root - INFO - step: 28220 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 15:51:31,728 - root - INFO - lr: 1.4130e-05 gnorm: 1.18 [17:17:20< 7:13:01] +[titan] 2025-10-05 15:51:42,649 - root - INFO - step: 28225 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 15:51:42,650 - root - INFO - lr: 1.4123e-05 gnorm: 1.10 [17:17:31< 7:12:50] +[titan] 2025-10-05 15:51:53,522 - root - INFO - step: 28230 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 15:51:53,522 - root - INFO - lr: 1.4116e-05 gnorm: 1.09 [17:17:42< 7:12:39] +[titan] 2025-10-05 15:52:04,406 - root - INFO - step: 28235 loss: 2.0389 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:52:04,406 - root - INFO - lr: 1.4109e-05 gnorm: 1.11 [17:17:53< 7:12:28] +[titan] 2025-10-05 15:52:15,280 - root - INFO - step: 28240 loss: 1.9948 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 15:52:15,281 - root - INFO - lr: 1.4102e-05 gnorm: 1.11 [17:18:04< 7:12:17] +[titan] 2025-10-05 15:52:26,225 - root - INFO - step: 28245 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 15:52:26,225 - root - INFO - lr: 1.4095e-05 gnorm: 1.12 [17:18:15< 7:12:06] +[titan] 2025-10-05 15:52:34,921 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:52:37,105 - root - INFO - step: 28250 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 15:52:37,105 - root - INFO - lr: 1.4087e-05 gnorm: 1.08 [17:18:26< 7:11:54] +[titan] 2025-10-05 15:52:48,023 - root - INFO - step: 28255 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 15:52:48,023 - root - INFO - lr: 1.4080e-05 gnorm: 1.10 [17:18:37< 7:11:43] +[titan] 2025-10-05 15:52:58,912 - root - INFO - step: 28260 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 15:52:58,913 - root - INFO - lr: 1.4073e-05 gnorm: 1.10 [17:18:48< 7:11:32] +[titan] 2025-10-05 15:53:09,803 - root - INFO - step: 28265 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 15:53:09,803 - root - INFO - lr: 1.4066e-05 gnorm: 1.11 [17:18:58< 7:11:21] +[titan] 2025-10-05 15:53:20,692 - root - INFO - step: 28270 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 15:53:20,692 - root - INFO - lr: 1.4059e-05 gnorm: 1.34 [17:19:09< 7:11:10] +[titan] 2025-10-05 15:53:31,636 - root - INFO - step: 28275 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 15:53:31,636 - root - INFO - lr: 1.4052e-05 gnorm: 1.11 [17:19:20< 7:10:59] +[titan] 2025-10-05 15:53:42,507 - root - INFO - step: 28280 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7702 +[titan] 2025-10-05 15:53:42,508 - root - INFO - lr: 1.4044e-05 gnorm: 1.09 [17:19:31< 7:10:48] +[titan] 2025-10-05 15:53:53,408 - root - INFO - step: 28285 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:53:53,408 - root - INFO - lr: 1.4037e-05 gnorm: 1.12 [17:19:42< 7:10:37] +[titan] 2025-10-05 15:54:04,269 - root - INFO - step: 28290 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:54:04,269 - root - INFO - lr: 1.4030e-05 gnorm: 1.11 [17:19:53< 7:10:26] +[titan] 2025-10-05 15:54:15,140 - root - INFO - step: 28295 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:54:15,140 - root - INFO - lr: 1.4023e-05 gnorm: 1.09 [17:20:04< 7:10:15] +[titan] 2025-10-05 15:54:23,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:54:26,111 - root - INFO - step: 28300 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7860 +[titan] 2025-10-05 15:54:26,111 - root - INFO - lr: 1.4016e-05 gnorm: 1.07 [17:20:15< 7:10:04] +[titan] 2025-10-05 15:54:36,983 - root - INFO - step: 28305 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 15:54:36,984 - root - INFO - lr: 1.4009e-05 gnorm: 1.12 [17:20:26< 7:09:53] +[titan] 2025-10-05 15:54:47,858 - root - INFO - step: 28310 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:54:47,858 - root - INFO - lr: 1.4002e-05 gnorm: 1.07 [17:20:37< 7:09:42] +[titan] 2025-10-05 15:54:58,731 - root - INFO - step: 28315 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 15:54:58,732 - root - INFO - lr: 1.3994e-05 gnorm: 1.09 [17:20:47< 7:09:30] +[titan] 2025-10-05 15:55:09,654 - root - INFO - step: 28320 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 15:55:09,655 - root - INFO - lr: 1.3987e-05 gnorm: 1.46 [17:20:58< 7:09:19] +[titan] 2025-10-05 15:55:20,530 - root - INFO - step: 28325 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 15:55:20,530 - root - INFO - lr: 1.3980e-05 gnorm: 1.12 [17:21:09< 7:09:08] +[titan] 2025-10-05 15:55:31,457 - root - INFO - step: 28330 loss: 1.9576 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7300 +[titan] 2025-10-05 15:55:31,457 - root - INFO - lr: 1.3973e-05 gnorm: 1.10 [17:21:20< 7:08:57] +[titan] 2025-10-05 15:55:42,330 - root - INFO - step: 28335 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 15:55:42,330 - root - INFO - lr: 1.3966e-05 gnorm: 1.12 [17:21:31< 7:08:46] +[titan] 2025-10-05 15:55:53,200 - root - INFO - step: 28340 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:55:53,200 - root - INFO - lr: 1.3959e-05 gnorm: 1.15 [17:21:42< 7:08:35] +[titan] 2025-10-05 15:56:04,083 - root - INFO - step: 28345 loss: 2.0214 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 15:56:04,083 - root - INFO - lr: 1.3952e-05 gnorm: 1.17 [17:21:53< 7:08:24] +[titan] 2025-10-05 15:56:12,817 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:56:15,006 - root - INFO - step: 28350 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 15:56:15,006 - root - INFO - lr: 1.3944e-05 gnorm: 1.14 [17:22:04< 7:08:13] +[titan] 2025-10-05 15:56:25,936 - root - INFO - step: 28355 loss: 1.9838 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 15:56:25,936 - root - INFO - lr: 1.3937e-05 gnorm: 1.10 [17:22:15< 7:08:02] +[titan] 2025-10-05 15:56:36,882 - root - INFO - step: 28360 loss: 2.0896 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 15:56:36,882 - root - INFO - lr: 1.3930e-05 gnorm: 1.15 [17:22:26< 7:07:51] +[titan] 2025-10-05 15:56:47,760 - root - INFO - step: 28365 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 15:56:47,760 - root - INFO - lr: 1.3923e-05 gnorm: 1.11 [17:22:36< 7:07:40] +[titan] 2025-10-05 15:56:58,635 - root - INFO - step: 28370 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 15:56:58,635 - root - INFO - lr: 1.3916e-05 gnorm: 1.09 [17:22:47< 7:07:29] +[titan] 2025-10-05 15:57:09,503 - root - INFO - step: 28375 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:57:09,503 - root - INFO - lr: 1.3909e-05 gnorm: 1.05 [17:22:58< 7:07:17] +[titan] 2025-10-05 15:57:20,365 - root - INFO - step: 28380 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7154 +[titan] 2025-10-05 15:57:20,365 - root - INFO - lr: 1.3902e-05 gnorm: 1.13 [17:23:09< 7:07:06] +[titan] 2025-10-05 15:57:31,331 - root - INFO - step: 28385 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 29,881 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:57:31,331 - root - INFO - lr: 1.3895e-05 gnorm: 1.12 [17:23:20< 7:06:55] +[titan] 2025-10-05 15:57:42,204 - root - INFO - step: 28390 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 15:57:42,205 - root - INFO - lr: 1.3888e-05 gnorm: 1.13 [17:23:31< 7:06:44] +[titan] 2025-10-05 15:57:53,067 - root - INFO - step: 28395 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:57:53,067 - root - INFO - lr: 1.3880e-05 gnorm: 1.09 [17:23:42< 7:06:33] +[titan] 2025-10-05 15:58:01,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:58:03,923 - root - INFO - step: 28400 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8317 +[titan] 2025-10-05 15:58:03,923 - root - INFO - lr: 1.3873e-05 gnorm: 1.11 [17:23:53< 7:06:22] +[titan] 2025-10-05 15:58:14,796 - root - INFO - step: 28405 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 15:58:14,796 - root - INFO - lr: 1.3866e-05 gnorm: 1.09 [17:24:03< 7:06:11] +[titan] 2025-10-05 15:58:25,653 - root - INFO - step: 28410 loss: 1.9984 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7669 +[titan] 2025-10-05 15:58:25,653 - root - INFO - lr: 1.3859e-05 gnorm: 1.10 [17:24:14< 7:06:00] +[titan] 2025-10-05 15:58:36,589 - root - INFO - step: 28415 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 15:58:36,589 - root - INFO - lr: 1.3852e-05 gnorm: 1.13 [17:24:25< 7:05:49] +[titan] 2025-10-05 15:58:47,471 - root - INFO - step: 28420 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7957 +[titan] 2025-10-05 15:58:47,471 - root - INFO - lr: 1.3845e-05 gnorm: 1.10 [17:24:36< 7:05:38] +[titan] 2025-10-05 15:58:58,364 - root - INFO - step: 28425 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 15:58:58,364 - root - INFO - lr: 1.3838e-05 gnorm: 1.14 [17:24:47< 7:05:27] +[titan] 2025-10-05 15:59:09,235 - root - INFO - step: 28430 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 15:59:09,235 - root - INFO - lr: 1.3831e-05 gnorm: 1.13 [17:24:58< 7:05:16] +[titan] 2025-10-05 15:59:20,106 - root - INFO - step: 28435 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 15:59:20,106 - root - INFO - lr: 1.3824e-05 gnorm: 1.13 [17:25:09< 7:05:04] +[titan] 2025-10-05 15:59:31,054 - root - INFO - step: 28440 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7996 +[titan] 2025-10-05 15:59:31,054 - root - INFO - lr: 1.3817e-05 gnorm: 1.10 [17:25:20< 7:04:53] +[titan] 2025-10-05 15:59:41,947 - root - INFO - step: 28445 loss: 2.0638 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 15:59:41,947 - root - INFO - lr: 1.3810e-05 gnorm: 1.16 [17:25:31< 7:04:42] +[titan] 2025-10-05 15:59:50,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:59:52,821 - root - INFO - step: 28450 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 15:59:52,821 - root - INFO - lr: 1.3802e-05 gnorm: 1.11 [17:25:41< 7:04:31] +[titan] 2025-10-05 16:00:03,693 - root - INFO - step: 28455 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 16:00:03,693 - root - INFO - lr: 1.3795e-05 gnorm: 1.09 [17:25:52< 7:04:20] +[titan] 2025-10-05 16:00:14,540 - root - INFO - step: 28460 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 16:00:14,540 - root - INFO - lr: 1.3788e-05 gnorm: 1.09 [17:26:03< 7:04:09] +[titan] 2025-10-05 16:00:25,402 - root - INFO - step: 28465 loss: 2.0314 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 16:00:25,402 - root - INFO - lr: 1.3781e-05 gnorm: 1.12 [17:26:14< 7:03:58] +[titan] 2025-10-05 16:00:36,344 - root - INFO - step: 28470 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:00:36,344 - root - INFO - lr: 1.3774e-05 gnorm: 1.11 [17:26:25< 7:03:47] +[titan] 2025-10-05 16:00:47,202 - root - INFO - step: 28475 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:00:47,202 - root - INFO - lr: 1.3767e-05 gnorm: 1.13 [17:26:36< 7:03:36] +[titan] 2025-10-05 16:00:58,120 - root - INFO - step: 28480 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 16:00:58,120 - root - INFO - lr: 1.3760e-05 gnorm: 1.14 [17:26:47< 7:03:25] +[titan] 2025-10-05 16:01:09,012 - root - INFO - step: 28485 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 16:01:09,012 - root - INFO - lr: 1.3753e-05 gnorm: 1.14 [17:26:58< 7:03:14] +[titan] 2025-10-05 16:01:19,903 - root - INFO - step: 28490 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 16:01:19,903 - root - INFO - lr: 1.3746e-05 gnorm: 1.12 [17:27:09< 7:03:03] +[titan] 2025-10-05 16:01:30,819 - root - INFO - step: 28495 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 16:01:30,819 - root - INFO - lr: 1.3739e-05 gnorm: 1.18 [17:27:19< 7:02:51] +[titan] 2025-10-05 16:01:39,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:01:41,746 - root - INFO - step: 28500 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:01:41,746 - root - INFO - lr: 1.3732e-05 gnorm: 1.13 [17:27:30< 7:02:40] +[titan] 2025-10-05 16:01:52,631 - root - INFO - step: 28505 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7896 +[titan] 2025-10-05 16:01:52,631 - root - INFO - lr: 1.3725e-05 gnorm: 1.13 [17:27:41< 7:02:29] +[titan] 2025-10-05 16:02:03,551 - root - INFO - step: 28510 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:02:03,551 - root - INFO - lr: 1.3718e-05 gnorm: 1.12 [17:27:52< 7:02:18] +[titan] 2025-10-05 16:02:14,435 - root - INFO - step: 28515 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 16:02:14,435 - root - INFO - lr: 1.3711e-05 gnorm: 1.16 [17:28:03< 7:02:07] +[titan] 2025-10-05 16:02:25,309 - root - INFO - step: 28520 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 16:02:25,310 - root - INFO - lr: 1.3704e-05 gnorm: 1.09 [17:28:14< 7:01:56] +[titan] 2025-10-05 16:02:36,209 - root - INFO - step: 28525 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 16:02:36,209 - root - INFO - lr: 1.3696e-05 gnorm: 1.09 [17:28:25< 7:01:45] +[titan] 2025-10-05 16:02:47,089 - root - INFO - step: 28530 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 16:02:47,089 - root - INFO - lr: 1.3689e-05 gnorm: 1.11 [17:28:36< 7:01:34] +[titan] 2025-10-05 16:02:57,982 - root - INFO - step: 28535 loss: 2.0168 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 16:02:57,982 - root - INFO - lr: 1.3682e-05 gnorm: 1.14 [17:28:47< 7:01:23] +[titan] 2025-10-05 16:03:08,840 - root - INFO - step: 28540 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 16:03:08,840 - root - INFO - lr: 1.3675e-05 gnorm: 1.13 [17:28:57< 7:01:12] +[titan] 2025-10-05 16:03:19,756 - root - INFO - step: 28545 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 16:03:19,756 - root - INFO - lr: 1.3668e-05 gnorm: 1.15 [17:29:08< 7:01:01] +[titan] 2025-10-05 16:03:28,456 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:03:30,650 - root - INFO - step: 28550 loss: 1.9538 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:03:30,650 - root - INFO - lr: 1.3661e-05 gnorm: 1.08 [17:29:19< 7:00:50] +[titan] 2025-10-05 16:03:41,553 - root - INFO - step: 28555 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 16:03:41,553 - root - INFO - lr: 1.3654e-05 gnorm: 1.11 [17:29:30< 7:00:38] +[titan] 2025-10-05 16:03:52,429 - root - INFO - step: 28560 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 16:03:52,429 - root - INFO - lr: 1.3647e-05 gnorm: 1.07 [17:29:41< 7:00:27] +[titan] 2025-10-05 16:04:03,288 - root - INFO - step: 28565 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 16:04:03,288 - root - INFO - lr: 1.3640e-05 gnorm: 1.10 [17:29:52< 7:00:16] +[titan] 2025-10-05 16:04:14,124 - root - INFO - step: 28570 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:04:14,124 - root - INFO - lr: 1.3633e-05 gnorm: 1.11 [17:30:03< 7:00:05] +[titan] 2025-10-05 16:04:25,006 - root - INFO - step: 28575 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 16:04:25,006 - root - INFO - lr: 1.3626e-05 gnorm: 1.95 [17:30:14< 6:59:54] +[titan] 2025-10-05 16:04:35,875 - root - INFO - step: 28580 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:04:35,875 - root - INFO - lr: 1.3619e-05 gnorm: 1.09 [17:30:25< 6:59:43] +[titan] 2025-10-05 16:04:46,735 - root - INFO - step: 28585 loss: 1.9918 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 16:04:46,735 - root - INFO - lr: 1.3612e-05 gnorm: 1.12 [17:30:35< 6:59:32] +[titan] 2025-10-05 16:04:57,585 - root - INFO - step: 28590 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 16:04:57,585 - root - INFO - lr: 1.3605e-05 gnorm: 1.06 [17:30:46< 6:59:21] +[titan] 2025-10-05 16:05:08,445 - root - INFO - step: 28595 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 16:05:08,445 - root - INFO - lr: 1.3598e-05 gnorm: 1.15 [17:30:57< 6:59:10] +[titan] 2025-10-05 16:05:17,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:05:19,314 - root - INFO - step: 28600 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:05:19,314 - root - INFO - lr: 1.3591e-05 gnorm: 1.10 [17:31:08< 6:58:59] +[titan] 2025-10-05 16:05:30,213 - root - INFO - step: 28605 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 16:05:30,213 - root - INFO - lr: 1.3584e-05 gnorm: 1.11 [17:31:19< 6:58:48] +[titan] 2025-10-05 16:05:41,137 - root - INFO - step: 28610 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 16:05:41,137 - root - INFO - lr: 1.3577e-05 gnorm: 1.11 [17:31:30< 6:58:37] +[titan] 2025-10-05 16:05:52,006 - root - INFO - step: 28615 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:05:52,007 - root - INFO - lr: 1.3570e-05 gnorm: 1.10 [17:31:41< 6:58:25] +[titan] 2025-10-05 16:06:02,858 - root - INFO - step: 28620 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 16:06:02,859 - root - INFO - lr: 1.3563e-05 gnorm: 1.12 [17:31:51< 6:58:14] +[titan] 2025-10-05 16:06:13,712 - root - INFO - step: 28625 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 16:06:13,712 - root - INFO - lr: 1.3556e-05 gnorm: 1.10 [17:32:02< 6:58:03] +[titan] 2025-10-05 16:06:24,582 - root - INFO - step: 28630 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 16:06:24,582 - root - INFO - lr: 1.3549e-05 gnorm: 1.11 [17:32:13< 6:57:52] +[titan] 2025-10-05 16:06:35,472 - root - INFO - step: 28635 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7192 +[titan] 2025-10-05 16:06:35,472 - root - INFO - lr: 1.3542e-05 gnorm: 1.12 [17:32:24< 6:57:41] +[titan] 2025-10-05 16:06:46,399 - root - INFO - step: 28640 loss: 2.0089 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7766 +[titan] 2025-10-05 16:06:46,399 - root - INFO - lr: 1.3535e-05 gnorm: 1.13 [17:32:35< 6:57:30] +[titan] 2025-10-05 16:06:57,266 - root - INFO - step: 28645 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 16:06:57,266 - root - INFO - lr: 1.3528e-05 gnorm: 1.16 [17:32:46< 6:57:19] +[titan] 2025-10-05 16:07:05,934 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:07:08,125 - root - INFO - step: 28650 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 16:07:08,125 - root - INFO - lr: 1.3521e-05 gnorm: 1.08 [17:32:57< 6:57:08] +[titan] 2025-10-05 16:07:18,989 - root - INFO - step: 28655 loss: 1.9921 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 16:07:18,989 - root - INFO - lr: 1.3514e-05 gnorm: 1.13 [17:33:08< 6:56:57] +[titan] 2025-10-05 16:07:29,863 - root - INFO - step: 28660 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:07:29,863 - root - INFO - lr: 1.3507e-05 gnorm: 1.11 [17:33:18< 6:56:46] +[titan] 2025-10-05 16:07:40,776 - root - INFO - step: 28665 loss: 2.0607 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8160 +[titan] 2025-10-05 16:07:40,776 - root - INFO - lr: 1.3500e-05 gnorm: 1.24 [17:33:29< 6:56:35] +[titan] 2025-10-05 16:07:51,762 - root - INFO - step: 28670 loss: 2.0573 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.81 mfu: 41.84% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 16:07:51,762 - root - INFO - lr: 1.3493e-05 gnorm: 1.20 [17:33:40< 6:56:24] +[titan] 2025-10-05 16:07:56,299 - root - INFO - Dumping profiler traces at step 28672 +[titan] 2025-10-05 16:07:56,337 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:08:02,863 - root - INFO - step: 28675 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,520 tflops: 409.54 mfu: 41.41% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 16:08:02,863 - root - INFO - lr: 1.3486e-05 gnorm: 1.09 [17:33:51< 6:56:13] +[titan] 2025-10-05 16:08:13,727 - root - INFO - step: 28680 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 16:08:13,728 - root - INFO - lr: 1.3479e-05 gnorm: 1.10 [17:34:02< 6:56:01] +[titan] 2025-10-05 16:08:24,599 - root - INFO - step: 28685 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7697 +[titan] 2025-10-05 16:08:24,599 - root - INFO - lr: 1.3472e-05 gnorm: 1.11 [17:34:13< 6:55:50] +[titan] 2025-10-05 16:08:35,472 - root - INFO - step: 28690 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 16:08:35,472 - root - INFO - lr: 1.3465e-05 gnorm: 1.12 [17:34:24< 6:55:39] +[titan] 2025-10-05 16:08:46,365 - root - INFO - step: 28695 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:08:46,366 - root - INFO - lr: 1.3458e-05 gnorm: 1.10 [17:34:35< 6:55:28] +[titan] 2025-10-05 16:08:55,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:08:57,207 - root - INFO - step: 28700 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 16:08:57,207 - root - INFO - lr: 1.3451e-05 gnorm: 1.16 [17:34:46< 6:55:17] +[titan] 2025-10-05 16:09:08,084 - root - INFO - step: 28705 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 16:09:08,084 - root - INFO - lr: 1.3444e-05 gnorm: 1.11 [17:34:57< 6:55:06] +[titan] 2025-10-05 16:09:18,920 - root - INFO - step: 28710 loss: 1.8967 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 16:09:18,920 - root - INFO - lr: 1.3437e-05 gnorm: 1.12 [17:35:08< 6:54:55] +[titan] 2025-10-05 16:09:29,743 - root - INFO - step: 28715 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 16:09:29,743 - root - INFO - lr: 1.3430e-05 gnorm: 1.24 [17:35:18< 6:54:44] +[titan] 2025-10-05 16:09:40,610 - root - INFO - step: 28720 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:09:40,610 - root - INFO - lr: 1.3423e-05 gnorm: 1.10 [17:35:29< 6:54:33] +[titan] 2025-10-05 16:09:51,475 - root - INFO - step: 28725 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 16:09:51,475 - root - INFO - lr: 1.3416e-05 gnorm: 1.07 [17:35:40< 6:54:22] +[titan] 2025-10-05 16:10:02,333 - root - INFO - step: 28730 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:10:02,333 - root - INFO - lr: 1.3409e-05 gnorm: 1.13 [17:35:51< 6:54:11] +[titan] 2025-10-05 16:10:13,264 - root - INFO - step: 28735 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:10:13,265 - root - INFO - lr: 1.3402e-05 gnorm: 1.14 [17:36:02< 6:53:59] +[titan] 2025-10-05 16:10:24,137 - root - INFO - step: 28740 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:10:24,137 - root - INFO - lr: 1.3395e-05 gnorm: 1.07 [17:36:13< 6:53:48] +[titan] 2025-10-05 16:10:34,996 - root - INFO - step: 28745 loss: 2.0343 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7979 +[titan] 2025-10-05 16:10:34,996 - root - INFO - lr: 1.3389e-05 gnorm: 1.14 [17:36:24< 6:53:37] +[titan] 2025-10-05 16:10:43,931 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:10:46,124 - root - INFO - step: 28750 loss: 2.0411 memory: 118.84GiB(85.28%) tps: 29,446 tflops: 408.52 mfu: 41.31% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 16:10:46,125 - root - INFO - lr: 1.3382e-05 gnorm: 1.10 [17:36:35< 6:53:26] +[titan] 2025-10-05 16:10:56,975 - root - INFO - step: 28755 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7186 +[titan] 2025-10-05 16:10:56,975 - root - INFO - lr: 1.3375e-05 gnorm: 1.11 [17:36:46< 6:53:15] +[titan] 2025-10-05 16:11:07,804 - root - INFO - step: 28760 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 16:11:07,804 - root - INFO - lr: 1.3368e-05 gnorm: 1.13 [17:36:56< 6:53:04] +[titan] 2025-10-05 16:11:18,644 - root - INFO - step: 28765 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 16:11:18,644 - root - INFO - lr: 1.3361e-05 gnorm: 1.14 [17:37:07< 6:52:53] +[titan] 2025-10-05 16:11:29,465 - root - INFO - step: 28770 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 16:11:29,465 - root - INFO - lr: 1.3354e-05 gnorm: 1.11 [17:37:18< 6:52:42] +[titan] 2025-10-05 16:11:40,342 - root - INFO - step: 28775 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 16:11:40,342 - root - INFO - lr: 1.3347e-05 gnorm: 1.10 [17:37:29< 6:52:31] +[titan] 2025-10-05 16:11:51,163 - root - INFO - step: 28780 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 16:11:51,164 - root - INFO - lr: 1.3340e-05 gnorm: 1.10 [17:37:40< 6:52:20] +[titan] 2025-10-05 16:12:01,972 - root - INFO - step: 28785 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:12:01,972 - root - INFO - lr: 1.3333e-05 gnorm: 1.09 [17:37:51< 6:52:09] +[titan] 2025-10-05 16:12:12,796 - root - INFO - step: 28790 loss: 2.0542 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 16:12:12,796 - root - INFO - lr: 1.3326e-05 gnorm: 1.14 [17:38:01< 6:51:58] +[titan] 2025-10-05 16:12:23,627 - root - INFO - step: 28795 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 16:12:23,627 - root - INFO - lr: 1.3319e-05 gnorm: 5.74 [17:38:12< 6:51:46] +[titan] 2025-10-05 16:12:32,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:12:34,510 - root - INFO - step: 28800 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 16:12:34,510 - root - INFO - lr: 1.3312e-05 gnorm: 1.12 [17:38:23< 6:51:35] +[titan] 2025-10-05 16:12:45,424 - root - INFO - step: 28805 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 16:12:45,424 - root - INFO - lr: 1.3305e-05 gnorm: 1.12 [17:38:34< 6:51:24] +[titan] 2025-10-05 16:12:56,285 - root - INFO - step: 28810 loss: 1.9337 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 16:12:56,285 - root - INFO - lr: 1.3298e-05 gnorm: 1.11 [17:38:45< 6:51:13] +[titan] 2025-10-05 16:13:07,115 - root - INFO - step: 28815 loss: 2.0821 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8406 +[titan] 2025-10-05 16:13:07,115 - root - INFO - lr: 1.3291e-05 gnorm: 1.14 [17:38:56< 6:51:02] +[titan] 2025-10-05 16:13:17,934 - root - INFO - step: 28820 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 16:13:17,934 - root - INFO - lr: 1.3284e-05 gnorm: 1.14 [17:39:07< 6:50:51] +[titan] 2025-10-05 16:13:28,784 - root - INFO - step: 28825 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7000 +[titan] 2025-10-05 16:13:28,784 - root - INFO - lr: 1.3278e-05 gnorm: 1.09 [17:39:17< 6:50:40] +[titan] 2025-10-05 16:13:39,674 - root - INFO - step: 28830 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 16:13:39,675 - root - INFO - lr: 1.3271e-05 gnorm: 1.13 [17:39:28< 6:50:29] +[titan] 2025-10-05 16:13:50,584 - root - INFO - step: 28835 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 16:13:50,584 - root - INFO - lr: 1.3264e-05 gnorm: 1.14 [17:39:39< 6:50:18] +[titan] 2025-10-05 16:14:01,435 - root - INFO - step: 28840 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 16:14:01,435 - root - INFO - lr: 1.3257e-05 gnorm: 1.11 [17:39:50< 6:50:07] +[titan] 2025-10-05 16:14:12,264 - root - INFO - step: 28845 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 16:14:12,264 - root - INFO - lr: 1.3250e-05 gnorm: 1.14 [17:40:01< 6:49:56] +[titan] 2025-10-05 16:14:20,891 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:14:23,077 - root - INFO - step: 28850 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,305 tflops: 420.44 mfu: 42.51% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 16:14:23,077 - root - INFO - lr: 1.3243e-05 gnorm: 1.13 [17:40:12< 6:49:44] +[titan] 2025-10-05 16:14:33,903 - root - INFO - step: 28855 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 16:14:33,903 - root - INFO - lr: 1.3236e-05 gnorm: 1.10 [17:40:23< 6:49:33] +[titan] 2025-10-05 16:14:44,758 - root - INFO - step: 28860 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 16:14:44,758 - root - INFO - lr: 1.3229e-05 gnorm: 1.18 [17:40:33< 6:49:22] +[titan] 2025-10-05 16:14:55,624 - root - INFO - step: 28865 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 16:14:55,624 - root - INFO - lr: 1.3222e-05 gnorm: 1.13 [17:40:44< 6:49:11] +[titan] 2025-10-05 16:15:06,429 - root - INFO - step: 28870 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,328 tflops: 420.76 mfu: 42.54% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 16:15:06,429 - root - INFO - lr: 1.3215e-05 gnorm: 1.12 [17:40:55< 6:49:00] +[titan] 2025-10-05 16:15:17,255 - root - INFO - step: 28875 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 16:15:17,255 - root - INFO - lr: 1.3209e-05 gnorm: 1.14 [17:41:06< 6:48:49] +[titan] 2025-10-05 16:15:28,083 - root - INFO - step: 28880 loss: 2.0444 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8077 +[titan] 2025-10-05 16:15:28,084 - root - INFO - lr: 1.3202e-05 gnorm: 1.11 [17:41:17< 6:48:38] +[titan] 2025-10-05 16:15:38,890 - root - INFO - step: 28885 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 16:15:38,890 - root - INFO - lr: 1.3195e-05 gnorm: 1.12 [17:41:27< 6:48:27] +[titan] 2025-10-05 16:15:49,744 - root - INFO - step: 28890 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7735 +[titan] 2025-10-05 16:15:49,744 - root - INFO - lr: 1.3188e-05 gnorm: 1.17 [17:41:38< 6:48:16] +[titan] 2025-10-05 16:16:00,598 - root - INFO - step: 28895 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 16:16:00,598 - root - INFO - lr: 1.3181e-05 gnorm: 1.11 [17:41:49< 6:48:05] +[titan] 2025-10-05 16:16:09,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:16:11,420 - root - INFO - step: 28900 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 16:16:11,421 - root - INFO - lr: 1.3174e-05 gnorm: 1.14 [17:42:00< 6:47:53] +[titan] 2025-10-05 16:16:22,244 - root - INFO - step: 28905 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:16:22,245 - root - INFO - lr: 1.3167e-05 gnorm: 1.10 [17:42:11< 6:47:42] +[titan] 2025-10-05 16:16:33,059 - root - INFO - step: 28910 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 16:16:33,059 - root - INFO - lr: 1.3160e-05 gnorm: 1.14 [17:42:22< 6:47:31] +[titan] 2025-10-05 16:16:43,886 - root - INFO - step: 28915 loss: 1.9331 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7085 +[titan] 2025-10-05 16:16:43,887 - root - INFO - lr: 1.3153e-05 gnorm: 1.09 [17:42:32< 6:47:20] +[titan] 2025-10-05 16:16:54,738 - root - INFO - step: 28920 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 16:16:54,738 - root - INFO - lr: 1.3147e-05 gnorm: 1.09 [17:42:43< 6:47:09] +[titan] 2025-10-05 16:17:05,576 - root - INFO - step: 28925 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 16:17:05,576 - root - INFO - lr: 1.3140e-05 gnorm: 1.13 [17:42:54< 6:46:58] +[titan] 2025-10-05 16:17:16,436 - root - INFO - step: 28930 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 16:17:16,436 - root - INFO - lr: 1.3133e-05 gnorm: 1.12 [17:43:05< 6:46:47] +[titan] 2025-10-05 16:17:27,262 - root - INFO - step: 28935 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 16:17:27,263 - root - INFO - lr: 1.3126e-05 gnorm: 1.13 [17:43:16< 6:46:36] +[titan] 2025-10-05 16:17:38,068 - root - INFO - step: 28940 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 16:17:38,069 - root - INFO - lr: 1.3119e-05 gnorm: 1.12 [17:43:27< 6:46:25] +[titan] 2025-10-05 16:17:48,901 - root - INFO - step: 28945 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 16:17:48,902 - root - INFO - lr: 1.3112e-05 gnorm: 1.11 [17:43:37< 6:46:14] +[titan] 2025-10-05 16:17:57,530 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:17:59,707 - root - INFO - step: 28950 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 16:17:59,708 - root - INFO - lr: 1.3105e-05 gnorm: 1.13 [17:43:48< 6:46:02] +[titan] 2025-10-05 16:18:10,530 - root - INFO - step: 28955 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 16:18:10,531 - root - INFO - lr: 1.3099e-05 gnorm: 1.13 [17:43:59< 6:45:51] +[titan] 2025-10-05 16:18:21,362 - root - INFO - step: 28960 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7806 +[titan] 2025-10-05 16:18:21,362 - root - INFO - lr: 1.3092e-05 gnorm: 1.11 [17:44:10< 6:45:40] +[titan] 2025-10-05 16:18:32,177 - root - INFO - step: 28965 loss: 2.0315 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 16:18:32,177 - root - INFO - lr: 1.3085e-05 gnorm: 1.15 [17:44:21< 6:45:29] +[titan] 2025-10-05 16:18:43,001 - root - INFO - step: 28970 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7932 +[titan] 2025-10-05 16:18:43,002 - root - INFO - lr: 1.3078e-05 gnorm: 1.12 [17:44:32< 6:45:18] +[titan] 2025-10-05 16:18:53,823 - root - INFO - step: 28975 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7847 +[titan] 2025-10-05 16:18:53,823 - root - INFO - lr: 1.3071e-05 gnorm: 1.15 [17:44:42< 6:45:07] +[titan] 2025-10-05 16:19:04,658 - root - INFO - step: 28980 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7964 +[titan] 2025-10-05 16:19:04,658 - root - INFO - lr: 1.3064e-05 gnorm: 1.09 [17:44:53< 6:44:56] +[titan] 2025-10-05 16:19:15,441 - root - INFO - step: 28985 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,389 tflops: 421.61 mfu: 42.63% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 16:19:15,441 - root - INFO - lr: 1.3057e-05 gnorm: 1.14 [17:45:04< 6:44:45] +[titan] 2025-10-05 16:19:26,267 - root - INFO - step: 28990 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:19:26,267 - root - INFO - lr: 1.3051e-05 gnorm: 1.12 [17:45:15< 6:44:34] +[titan] 2025-10-05 16:19:37,046 - root - INFO - step: 28995 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,401 tflops: 421.76 mfu: 42.65% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 16:19:37,046 - root - INFO - lr: 1.3044e-05 gnorm: 1.09 [17:45:26< 6:44:23] +[titan] 2025-10-05 16:19:45,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:19:47,884 - root - INFO - step: 29000 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 16:19:47,884 - root - INFO - lr: 1.3037e-05 gnorm: 1.10 [17:45:36< 6:44:11] +[titan] 2025-10-05 16:19:58,682 - root - INFO - step: 29005 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 16:19:58,682 - root - INFO - lr: 1.3030e-05 gnorm: 1.14 [17:45:47< 6:44:00] +[titan] 2025-10-05 16:20:09,482 - root - INFO - step: 29010 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 16:20:09,482 - root - INFO - lr: 1.3023e-05 gnorm: 1.08 [17:45:58< 6:43:49] +[titan] 2025-10-05 16:20:20,322 - root - INFO - step: 29015 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 16:20:20,323 - root - INFO - lr: 1.3016e-05 gnorm: 1.11 [17:46:09< 6:43:38] +[titan] 2025-10-05 16:20:31,122 - root - INFO - step: 29020 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 16:20:31,122 - root - INFO - lr: 1.3010e-05 gnorm: 1.14 [17:46:20< 6:43:27] +[titan] 2025-10-05 16:20:42,001 - root - INFO - step: 29025 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:20:42,001 - root - INFO - lr: 1.3003e-05 gnorm: 1.11 [17:46:31< 6:43:16] +[titan] 2025-10-05 16:20:52,862 - root - INFO - step: 29030 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 16:20:52,862 - root - INFO - lr: 1.2996e-05 gnorm: 1.17 [17:46:41< 6:43:05] +[titan] 2025-10-05 16:21:03,692 - root - INFO - step: 29035 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8113 +[titan] 2025-10-05 16:21:03,692 - root - INFO - lr: 1.2989e-05 gnorm: 1.19 [17:46:52< 6:42:54] +[titan] 2025-10-05 16:21:14,546 - root - INFO - step: 29040 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 16:21:14,546 - root - INFO - lr: 1.2982e-05 gnorm: 1.13 [17:47:03< 6:42:43] +[titan] 2025-10-05 16:21:25,382 - root - INFO - step: 29045 loss: 2.0710 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 16:21:25,382 - root - INFO - lr: 1.2975e-05 gnorm: 1.11 [17:47:14< 6:42:32] +[titan] 2025-10-05 16:21:34,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:21:36,183 - root - INFO - step: 29050 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7903 +[titan] 2025-10-05 16:21:36,183 - root - INFO - lr: 1.2969e-05 gnorm: 1.15 [17:47:25< 6:42:20] +[titan] 2025-10-05 16:21:47,040 - root - INFO - step: 29055 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 16:21:47,040 - root - INFO - lr: 1.2962e-05 gnorm: 1.13 [17:47:36< 6:42:09] +[titan] 2025-10-05 16:21:57,945 - root - INFO - step: 29060 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 16:21:57,945 - root - INFO - lr: 1.2955e-05 gnorm: 1.09 [17:47:47< 6:41:58] +[titan] 2025-10-05 16:22:08,763 - root - INFO - step: 29065 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:22:08,763 - root - INFO - lr: 1.2948e-05 gnorm: 1.10 [17:47:57< 6:41:47] +[titan] 2025-10-05 16:22:19,602 - root - INFO - step: 29070 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 16:22:19,602 - root - INFO - lr: 1.2941e-05 gnorm: 1.12 [17:48:08< 6:41:36] +[titan] 2025-10-05 16:22:30,424 - root - INFO - step: 29075 loss: 1.9436 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.11 mfu: 42.48% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 16:22:30,424 - root - INFO - lr: 1.2935e-05 gnorm: 1.08 [17:48:19< 6:41:25] +[titan] 2025-10-05 16:22:41,221 - root - INFO - step: 29080 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.04 mfu: 42.57% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 16:22:41,221 - root - INFO - lr: 1.2928e-05 gnorm: 1.12 [17:48:30< 6:41:14] +[titan] 2025-10-05 16:22:52,143 - root - INFO - step: 29085 loss: 2.0455 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 16:22:52,143 - root - INFO - lr: 1.2921e-05 gnorm: 1.13 [17:48:41< 6:41:03] +[titan] 2025-10-05 16:23:02,963 - root - INFO - step: 29090 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:23:02,964 - root - INFO - lr: 1.2914e-05 gnorm: 1.15 [17:48:52< 6:40:52] +[titan] 2025-10-05 16:23:13,781 - root - INFO - step: 29095 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7612 +[titan] 2025-10-05 16:23:13,781 - root - INFO - lr: 1.2907e-05 gnorm: 1.12 [17:49:02< 6:40:41] +[titan] 2025-10-05 16:23:22,389 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:23:24,561 - root - INFO - step: 29100 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 16:23:24,562 - root - INFO - lr: 1.2901e-05 gnorm: 1.15 [17:49:13< 6:40:30] +[titan] 2025-10-05 16:23:35,362 - root - INFO - step: 29105 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:23:35,363 - root - INFO - lr: 1.2894e-05 gnorm: 1.15 [17:49:24< 6:40:18] +[titan] 2025-10-05 16:23:46,147 - root - INFO - step: 29110 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,385 tflops: 421.54 mfu: 42.62% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 16:23:46,147 - root - INFO - lr: 1.2887e-05 gnorm: 1.13 [17:49:35< 6:40:07] +[titan] 2025-10-05 16:23:56,986 - root - INFO - step: 29115 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 16:23:56,986 - root - INFO - lr: 1.2880e-05 gnorm: 1.11 [17:49:46< 6:39:56] +[titan] 2025-10-05 16:24:07,804 - root - INFO - step: 29120 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 16:24:07,804 - root - INFO - lr: 1.2873e-05 gnorm: 1.09 [17:49:56< 6:39:45] +[titan] 2025-10-05 16:24:18,657 - root - INFO - step: 29125 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 16:24:18,657 - root - INFO - lr: 1.2867e-05 gnorm: 1.08 [17:50:07< 6:39:34] +[titan] 2025-10-05 16:24:29,461 - root - INFO - step: 29130 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 16:24:29,461 - root - INFO - lr: 1.2860e-05 gnorm: 1.11 [17:50:18< 6:39:23] +[titan] 2025-10-05 16:24:40,248 - root - INFO - step: 29135 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,379 tflops: 421.46 mfu: 42.61% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:24:40,248 - root - INFO - lr: 1.2853e-05 gnorm: 1.23 [17:50:29< 6:39:12] +[titan] 2025-10-05 16:24:51,066 - root - INFO - step: 29140 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 16:24:51,066 - root - INFO - lr: 1.2846e-05 gnorm: 1.11 [17:50:40< 6:39:01] +[titan] 2025-10-05 16:25:01,882 - root - INFO - step: 29145 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.33 mfu: 42.50% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 16:25:01,882 - root - INFO - lr: 1.2840e-05 gnorm: 1.14 [17:50:50< 6:38:50] +[titan] 2025-10-05 16:25:10,529 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:25:12,729 - root - INFO - step: 29150 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:12,730 - root - INFO - lr: 1.2833e-05 gnorm: 1.16 [17:51:01< 6:38:39] +[titan] 2025-10-05 16:25:23,552 - root - INFO - step: 29155 loss: 1.9771 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:25:23,552 - root - INFO - lr: 1.2826e-05 gnorm: 1.11 [17:51:12< 6:38:27] +[titan] 2025-10-05 16:25:34,364 - root - INFO - step: 29160 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:34,364 - root - INFO - lr: 1.2819e-05 gnorm: 1.13 [17:51:23< 6:38:16] +[titan] 2025-10-05 16:25:45,141 - root - INFO - step: 29165 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:25:45,141 - root - INFO - lr: 1.2813e-05 gnorm: 1.10 [17:51:34< 6:38:05] +[titan] 2025-10-05 16:25:55,942 - root - INFO - step: 29170 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 16:25:55,943 - root - INFO - lr: 1.2806e-05 gnorm: 1.12 [17:51:45< 6:37:54] +[titan] 2025-10-05 16:26:06,754 - root - INFO - step: 29175 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:26:06,754 - root - INFO - lr: 1.2799e-05 gnorm: 1.13 [17:51:55< 6:37:43] +[titan] 2025-10-05 16:26:17,565 - root - INFO - step: 29180 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,310 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 16:26:17,566 - root - INFO - lr: 1.2792e-05 gnorm: 1.11 [17:52:06< 6:37:32] +[titan] 2025-10-05 16:26:26,489 - root - INFO - Dumping profiler traces at step 29184 +[titan] 2025-10-05 16:26:26,528 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:26:28,703 - root - INFO - step: 29185 loss: 2.0239 memory: 118.84GiB(85.28%) tps: 29,423 tflops: 408.20 mfu: 41.27% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:26:28,703 - root - INFO - lr: 1.2786e-05 gnorm: 1.13 [17:52:17< 6:37:21] +[titan] 2025-10-05 16:26:39,480 - root - INFO - step: 29190 loss: 2.0459 memory: 118.84GiB(85.28%) tps: 30,405 tflops: 421.82 mfu: 42.65% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 16:26:39,480 - root - INFO - lr: 1.2779e-05 gnorm: 1.08 [17:52:28< 6:37:10] +[titan] 2025-10-05 16:26:50,281 - root - INFO - step: 29195 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 16:26:50,282 - root - INFO - lr: 1.2772e-05 gnorm: 1.11 [17:52:39< 6:36:59] +[titan] 2025-10-05 16:26:58,915 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:27:01,083 - root - INFO - step: 29200 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:27:01,084 - root - INFO - lr: 1.2765e-05 gnorm: 1.10 [17:52:50< 6:36:48] +[titan] 2025-10-05 16:27:11,900 - root - INFO - step: 29205 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,295 tflops: 420.29 mfu: 42.50% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:27:11,900 - root - INFO - lr: 1.2759e-05 gnorm: 1.11 [17:53:00< 6:36:37] +[titan] 2025-10-05 16:27:22,704 - root - INFO - step: 29210 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:27:22,704 - root - INFO - lr: 1.2752e-05 gnorm: 1.13 [17:53:11< 6:36:25] +[titan] 2025-10-05 16:27:33,520 - root - INFO - step: 29215 loss: 1.9806 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 16:27:33,520 - root - INFO - lr: 1.2745e-05 gnorm: 1.13 [17:53:22< 6:36:14] +[titan] 2025-10-05 16:27:44,343 - root - INFO - step: 29220 loss: 2.0330 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:27:44,344 - root - INFO - lr: 1.2738e-05 gnorm: 1.11 [17:53:33< 6:36:03] +[titan] 2025-10-05 16:27:55,246 - root - INFO - step: 29225 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 16:27:55,246 - root - INFO - lr: 1.2732e-05 gnorm: 1.14 [17:53:44< 6:35:52] +[titan] 2025-10-05 16:28:06,063 - root - INFO - step: 29230 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 16:28:06,064 - root - INFO - lr: 1.2725e-05 gnorm: 1.10 [17:53:55< 6:35:41] +[titan] 2025-10-05 16:28:16,881 - root - INFO - step: 29235 loss: 1.9977 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7668 +[titan] 2025-10-05 16:28:16,882 - root - INFO - lr: 1.2718e-05 gnorm: 1.12 [17:54:05< 6:35:30] +[titan] 2025-10-05 16:28:27,741 - root - INFO - step: 29240 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 16:28:27,741 - root - INFO - lr: 1.2711e-05 gnorm: 1.12 [17:54:16< 6:35:19] +[titan] 2025-10-05 16:28:38,608 - root - INFO - step: 29245 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 16:28:38,608 - root - INFO - lr: 1.2705e-05 gnorm: 1.14 [17:54:27< 6:35:08] +[titan] 2025-10-05 16:28:47,296 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:28:49,481 - root - INFO - step: 29250 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7535 +[titan] 2025-10-05 16:28:49,482 - root - INFO - lr: 1.2698e-05 gnorm: 1.12 [17:54:38< 6:34:57] +[titan] 2025-10-05 16:29:00,345 - root - INFO - step: 29255 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 16:29:00,345 - root - INFO - lr: 1.2691e-05 gnorm: 1.13 [17:54:49< 6:34:46] +[titan] 2025-10-05 16:29:11,181 - root - INFO - step: 29260 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 16:29:11,181 - root - INFO - lr: 1.2684e-05 gnorm: 1.10 [17:55:00< 6:34:35] +[titan] 2025-10-05 16:29:22,010 - root - INFO - step: 29265 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 16:29:22,010 - root - INFO - lr: 1.2678e-05 gnorm: 1.10 [17:55:11< 6:34:23] +[titan] 2025-10-05 16:29:32,844 - root - INFO - step: 29270 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8340 +[titan] 2025-10-05 16:29:32,845 - root - INFO - lr: 1.2671e-05 gnorm: 1.14 [17:55:21< 6:34:12] +[titan] 2025-10-05 16:29:43,662 - root - INFO - step: 29275 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 16:29:43,662 - root - INFO - lr: 1.2664e-05 gnorm: 1.10 [17:55:32< 6:34:01] +[titan] 2025-10-05 16:29:54,552 - root - INFO - step: 29280 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 16:29:54,553 - root - INFO - lr: 1.2658e-05 gnorm: 1.15 [17:55:43< 6:33:50] +[titan] 2025-10-05 16:30:05,442 - root - INFO - step: 29285 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:30:05,442 - root - INFO - lr: 1.2651e-05 gnorm: 1.15 [17:55:54< 6:33:39] +[titan] 2025-10-05 16:30:16,285 - root - INFO - step: 29290 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:30:16,285 - root - INFO - lr: 1.2644e-05 gnorm: 1.13 [17:56:05< 6:33:28] +[titan] 2025-10-05 16:30:27,122 - root - INFO - step: 29295 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 16:30:27,122 - root - INFO - lr: 1.2638e-05 gnorm: 1.16 [17:56:16< 6:33:17] +[titan] 2025-10-05 16:30:35,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:30:37,974 - root - INFO - step: 29300 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:30:37,975 - root - INFO - lr: 1.2631e-05 gnorm: 1.13 [17:56:27< 6:33:06] +[titan] 2025-10-05 16:30:48,835 - root - INFO - step: 29305 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 16:30:48,835 - root - INFO - lr: 1.2624e-05 gnorm: 1.12 [17:56:37< 6:32:55] +[titan] 2025-10-05 16:30:59,735 - root - INFO - step: 29310 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 16:30:59,735 - root - INFO - lr: 1.2617e-05 gnorm: 1.16 [17:56:48< 6:32:44] +[titan] 2025-10-05 16:31:10,585 - root - INFO - step: 29315 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 16:31:10,585 - root - INFO - lr: 1.2611e-05 gnorm: 1.11 [17:56:59< 6:32:33] +[titan] 2025-10-05 16:31:21,451 - root - INFO - step: 29320 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:31:21,452 - root - INFO - lr: 1.2604e-05 gnorm: 1.14 [17:57:10< 6:32:22] +[titan] 2025-10-05 16:31:32,282 - root - INFO - step: 29325 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 16:31:32,283 - root - INFO - lr: 1.2597e-05 gnorm: 1.08 [17:57:21< 6:32:10] +[titan] 2025-10-05 16:31:43,142 - root - INFO - step: 29330 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7937 +[titan] 2025-10-05 16:31:43,143 - root - INFO - lr: 1.2591e-05 gnorm: 1.15 [17:57:32< 6:31:59] +[titan] 2025-10-05 16:31:54,012 - root - INFO - step: 29335 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 16:31:54,012 - root - INFO - lr: 1.2584e-05 gnorm: 1.12 [17:57:43< 6:31:48] +[titan] 2025-10-05 16:32:04,880 - root - INFO - step: 29340 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 16:32:04,880 - root - INFO - lr: 1.2577e-05 gnorm: 1.18 [17:57:53< 6:31:37] +[titan] 2025-10-05 16:32:15,774 - root - INFO - step: 29345 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 16:32:15,774 - root - INFO - lr: 1.2571e-05 gnorm: 1.14 [17:58:04< 6:31:26] +[titan] 2025-10-05 16:32:24,447 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:32:26,631 - root - INFO - step: 29350 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 16:32:26,632 - root - INFO - lr: 1.2564e-05 gnorm: 1.11 [17:58:15< 6:31:15] +[titan] 2025-10-05 16:32:37,480 - root - INFO - step: 29355 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:32:37,480 - root - INFO - lr: 1.2557e-05 gnorm: 1.10 [17:58:26< 6:31:04] +[titan] 2025-10-05 16:32:48,323 - root - INFO - step: 29360 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:32:48,323 - root - INFO - lr: 1.2551e-05 gnorm: 1.14 [17:58:37< 6:30:53] +[titan] 2025-10-05 16:32:59,199 - root - INFO - step: 29365 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 16:32:59,199 - root - INFO - lr: 1.2544e-05 gnorm: 1.13 [17:58:48< 6:30:42] +[titan] 2025-10-05 16:33:10,048 - root - INFO - step: 29370 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 16:33:10,048 - root - INFO - lr: 1.2537e-05 gnorm: 1.12 [17:58:59< 6:30:31] +[titan] 2025-10-05 16:33:20,934 - root - INFO - step: 29375 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 16:33:20,934 - root - INFO - lr: 1.2531e-05 gnorm: 1.15 [17:59:09< 6:30:20] +[titan] 2025-10-05 16:33:31,794 - root - INFO - step: 29380 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:33:31,794 - root - INFO - lr: 1.2524e-05 gnorm: 1.11 [17:59:20< 6:30:09] +[titan] 2025-10-05 16:33:42,652 - root - INFO - step: 29385 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7647 +[titan] 2025-10-05 16:33:42,652 - root - INFO - lr: 1.2517e-05 gnorm: 1.13 [17:59:31< 6:29:58] +[titan] 2025-10-05 16:33:53,484 - root - INFO - step: 29390 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:33:53,484 - root - INFO - lr: 1.2511e-05 gnorm: 1.15 [17:59:42< 6:29:46] +[titan] 2025-10-05 16:34:04,355 - root - INFO - step: 29395 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 16:34:04,355 - root - INFO - lr: 1.2504e-05 gnorm: 1.11 [17:59:53< 6:29:35] +[titan] 2025-10-05 16:34:13,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:34:15,217 - root - INFO - step: 29400 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 16:34:15,218 - root - INFO - lr: 1.2497e-05 gnorm: 1.12 [18:00:04< 6:29:24] +[titan] 2025-10-05 16:34:26,084 - root - INFO - step: 29405 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 16:34:26,084 - root - INFO - lr: 1.2491e-05 gnorm: 1.13 [18:00:15< 6:29:13] +[titan] 2025-10-05 16:34:36,985 - root - INFO - step: 29410 loss: 1.9746 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:34:36,985 - root - INFO - lr: 1.2484e-05 gnorm: 1.14 [18:00:26< 6:29:02] +[titan] 2025-10-05 16:34:47,862 - root - INFO - step: 29415 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 16:34:47,862 - root - INFO - lr: 1.2477e-05 gnorm: 1.14 [18:00:36< 6:28:51] +[titan] 2025-10-05 16:34:58,716 - root - INFO - step: 29420 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:34:58,716 - root - INFO - lr: 1.2471e-05 gnorm: 1.10 [18:00:47< 6:28:40] +[titan] 2025-10-05 16:35:09,612 - root - INFO - step: 29425 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 16:35:09,613 - root - INFO - lr: 1.2464e-05 gnorm: 1.13 [18:00:58< 6:28:29] +[titan] 2025-10-05 16:35:20,487 - root - INFO - step: 29430 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7675 +[titan] 2025-10-05 16:35:20,488 - root - INFO - lr: 1.2457e-05 gnorm: 1.12 [18:01:09< 6:28:18] +[titan] 2025-10-05 16:35:31,364 - root - INFO - step: 29435 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:35:31,364 - root - INFO - lr: 1.2451e-05 gnorm: 1.13 [18:01:20< 6:28:07] +[titan] 2025-10-05 16:35:42,266 - root - INFO - step: 29440 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 16:35:42,266 - root - INFO - lr: 1.2444e-05 gnorm: 1.13 [18:01:31< 6:27:56] +[titan] 2025-10-05 16:35:53,139 - root - INFO - step: 29445 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 16:35:53,139 - root - INFO - lr: 1.2438e-05 gnorm: 1.10 [18:01:42< 6:27:45] +[titan] 2025-10-05 16:36:01,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:36:04,036 - root - INFO - step: 29450 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:36:04,036 - root - INFO - lr: 1.2431e-05 gnorm: 1.10 [18:01:53< 6:27:34] +[titan] 2025-10-05 16:36:14,913 - root - INFO - step: 29455 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 16:36:14,913 - root - INFO - lr: 1.2424e-05 gnorm: 1.13 [18:02:03< 6:27:23] +[titan] 2025-10-05 16:36:25,795 - root - INFO - step: 29460 loss: 2.0213 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7869 +[titan] 2025-10-05 16:36:25,795 - root - INFO - lr: 1.2418e-05 gnorm: 1.13 [18:02:14< 6:27:11] +[titan] 2025-10-05 16:36:36,668 - root - INFO - step: 29465 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 16:36:36,668 - root - INFO - lr: 1.2411e-05 gnorm: 1.14 [18:02:25< 6:27:00] +[titan] 2025-10-05 16:36:47,594 - root - INFO - step: 29470 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 16:36:47,594 - root - INFO - lr: 1.2404e-05 gnorm: 1.17 [18:02:36< 6:26:49] +[titan] 2025-10-05 16:36:58,488 - root - INFO - step: 29475 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8272 +[titan] 2025-10-05 16:36:58,489 - root - INFO - lr: 1.2398e-05 gnorm: 1.14 [18:02:47< 6:26:38] +[titan] 2025-10-05 16:37:09,396 - root - INFO - step: 29480 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 16:37:09,396 - root - INFO - lr: 1.2391e-05 gnorm: 1.10 [18:02:58< 6:26:27] +[titan] 2025-10-05 16:37:20,276 - root - INFO - step: 29485 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7523 +[titan] 2025-10-05 16:37:20,276 - root - INFO - lr: 1.2385e-05 gnorm: 1.14 [18:03:09< 6:26:16] +[titan] 2025-10-05 16:37:31,149 - root - INFO - step: 29490 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:37:31,149 - root - INFO - lr: 1.2378e-05 gnorm: 1.18 [18:03:20< 6:26:05] +[titan] 2025-10-05 16:37:42,032 - root - INFO - step: 29495 loss: 1.9702 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:37:42,032 - root - INFO - lr: 1.2371e-05 gnorm: 1.12 [18:03:31< 6:25:54] +[titan] 2025-10-05 16:37:50,726 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:37:52,909 - root - INFO - step: 29500 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 16:37:52,910 - root - INFO - lr: 1.2365e-05 gnorm: 1.18 [18:03:41< 6:25:43] +[titan] 2025-10-05 16:38:03,862 - root - INFO - step: 29505 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7779 +[titan] 2025-10-05 16:38:03,862 - root - INFO - lr: 1.2358e-05 gnorm: 1.08 [18:03:52< 6:25:32] +[titan] 2025-10-05 16:38:14,737 - root - INFO - step: 29510 loss: 2.0280 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 16:38:14,737 - root - INFO - lr: 1.2352e-05 gnorm: 1.12 [18:04:03< 6:25:21] +[titan] 2025-10-05 16:38:25,629 - root - INFO - step: 29515 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 16:38:25,629 - root - INFO - lr: 1.2345e-05 gnorm: 1.10 [18:04:14< 6:25:10] +[titan] 2025-10-05 16:38:36,496 - root - INFO - step: 29520 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 16:38:36,497 - root - INFO - lr: 1.2338e-05 gnorm: 1.14 [18:04:25< 6:24:59] +[titan] 2025-10-05 16:38:47,375 - root - INFO - step: 29525 loss: 2.0360 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 16:38:47,375 - root - INFO - lr: 1.2332e-05 gnorm: 1.12 [18:04:36< 6:24:48] +[titan] 2025-10-05 16:38:58,269 - root - INFO - step: 29530 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 16:38:58,270 - root - INFO - lr: 1.2325e-05 gnorm: 1.14 [18:04:47< 6:24:37] +[titan] 2025-10-05 16:39:09,198 - root - INFO - step: 29535 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:39:09,198 - root - INFO - lr: 1.2319e-05 gnorm: 1.14 [18:04:58< 6:24:25] +[titan] 2025-10-05 16:39:20,067 - root - INFO - step: 29540 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7690 +[titan] 2025-10-05 16:39:20,068 - root - INFO - lr: 1.2312e-05 gnorm: 1.11 [18:05:09< 6:24:14] +[titan] 2025-10-05 16:39:30,927 - root - INFO - step: 29545 loss: 1.9548 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:39:30,927 - root - INFO - lr: 1.2305e-05 gnorm: 1.08 [18:05:19< 6:24:03] +[titan] 2025-10-05 16:39:39,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:39:41,783 - root - INFO - step: 29550 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 16:39:41,783 - root - INFO - lr: 1.2299e-05 gnorm: 1.16 [18:05:30< 6:23:52] +[titan] 2025-10-05 16:39:52,647 - root - INFO - step: 29555 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:39:52,647 - root - INFO - lr: 1.2292e-05 gnorm: 1.11 [18:05:41< 6:23:41] +[titan] 2025-10-05 16:40:03,511 - root - INFO - step: 29560 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 16:40:03,511 - root - INFO - lr: 1.2286e-05 gnorm: 1.11 [18:05:52< 6:23:30] +[titan] 2025-10-05 16:40:14,393 - root - INFO - step: 29565 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 16:40:14,393 - root - INFO - lr: 1.2279e-05 gnorm: 1.09 [18:06:03< 6:23:19] +[titan] 2025-10-05 16:40:25,289 - root - INFO - step: 29570 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 16:40:25,289 - root - INFO - lr: 1.2273e-05 gnorm: 1.15 [18:06:14< 6:23:08] +[titan] 2025-10-05 16:40:36,151 - root - INFO - step: 29575 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 16:40:36,151 - root - INFO - lr: 1.2266e-05 gnorm: 1.12 [18:06:25< 6:22:57] +[titan] 2025-10-05 16:40:47,014 - root - INFO - step: 29580 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:40:47,014 - root - INFO - lr: 1.2259e-05 gnorm: 1.15 [18:06:36< 6:22:46] +[titan] 2025-10-05 16:40:57,884 - root - INFO - step: 29585 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7835 +[titan] 2025-10-05 16:40:57,884 - root - INFO - lr: 1.2253e-05 gnorm: 1.13 [18:06:46< 6:22:35] +[titan] 2025-10-05 16:41:08,765 - root - INFO - step: 29590 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 16:41:08,765 - root - INFO - lr: 1.2246e-05 gnorm: 1.12 [18:06:57< 6:22:24] +[titan] 2025-10-05 16:41:19,628 - root - INFO - step: 29595 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:41:19,628 - root - INFO - lr: 1.2240e-05 gnorm: 1.14 [18:07:08< 6:22:13] +[titan] 2025-10-05 16:41:28,344 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:41:30,522 - root - INFO - step: 29600 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 16:41:30,522 - root - INFO - lr: 1.2233e-05 gnorm: 1.11 [18:07:19< 6:22:02] +[titan] 2025-10-05 16:41:41,388 - root - INFO - step: 29605 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 16:41:41,389 - root - INFO - lr: 1.2227e-05 gnorm: 1.11 [18:07:30< 6:21:50] +[titan] 2025-10-05 16:41:52,245 - root - INFO - step: 29610 loss: 1.9448 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 16:41:52,245 - root - INFO - lr: 1.2220e-05 gnorm: 1.09 [18:07:41< 6:21:39] +[titan] 2025-10-05 16:42:03,126 - root - INFO - step: 29615 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8316 +[titan] 2025-10-05 16:42:03,126 - root - INFO - lr: 1.2214e-05 gnorm: 1.15 [18:07:52< 6:21:28] +[titan] 2025-10-05 16:42:13,989 - root - INFO - step: 29620 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7570 +[titan] 2025-10-05 16:42:13,989 - root - INFO - lr: 1.2207e-05 gnorm: 1.13 [18:08:03< 6:21:17] +[titan] 2025-10-05 16:42:24,845 - root - INFO - step: 29625 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 16:42:24,845 - root - INFO - lr: 1.2200e-05 gnorm: 1.11 [18:08:13< 6:21:06] +[titan] 2025-10-05 16:42:35,740 - root - INFO - step: 29630 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 16:42:35,740 - root - INFO - lr: 1.2194e-05 gnorm: 1.16 [18:08:24< 6:20:55] +[titan] 2025-10-05 16:42:46,609 - root - INFO - step: 29635 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 16:42:46,609 - root - INFO - lr: 1.2187e-05 gnorm: 1.13 [18:08:35< 6:20:44] +[titan] 2025-10-05 16:42:57,451 - root - INFO - step: 29640 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:42:57,451 - root - INFO - lr: 1.2181e-05 gnorm: 1.11 [18:08:46< 6:20:33] +[titan] 2025-10-05 16:43:08,337 - root - INFO - step: 29645 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 16:43:08,337 - root - INFO - lr: 1.2174e-05 gnorm: 1.10 [18:08:57< 6:20:22] +[titan] 2025-10-05 16:43:17,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:43:19,192 - root - INFO - step: 29650 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7561 +[titan] 2025-10-05 16:43:19,192 - root - INFO - lr: 1.2168e-05 gnorm: 1.14 [18:09:08< 6:20:11] +[titan] 2025-10-05 16:43:30,040 - root - INFO - step: 29655 loss: 1.9877 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 16:43:30,040 - root - INFO - lr: 1.2161e-05 gnorm: 1.13 [18:09:19< 6:20:00] +[titan] 2025-10-05 16:43:40,896 - root - INFO - step: 29660 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:43:40,896 - root - INFO - lr: 1.2155e-05 gnorm: 1.16 [18:09:29< 6:19:49] +[titan] 2025-10-05 16:43:51,775 - root - INFO - step: 29665 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 16:43:51,776 - root - INFO - lr: 1.2148e-05 gnorm: 1.12 [18:09:40< 6:19:38] +[titan] 2025-10-05 16:44:02,650 - root - INFO - step: 29670 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7764 +[titan] 2025-10-05 16:44:02,651 - root - INFO - lr: 1.2142e-05 gnorm: 1.12 [18:09:51< 6:19:26] +[titan] 2025-10-05 16:44:13,541 - root - INFO - step: 29675 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 16:44:13,542 - root - INFO - lr: 1.2135e-05 gnorm: 1.12 [18:10:02< 6:19:15] +[titan] 2025-10-05 16:44:24,406 - root - INFO - step: 29680 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:44:24,406 - root - INFO - lr: 1.2129e-05 gnorm: 1.10 [18:10:13< 6:19:04] +[titan] 2025-10-05 16:44:35,270 - root - INFO - step: 29685 loss: 2.0294 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 16:44:35,271 - root - INFO - lr: 1.2122e-05 gnorm: 1.14 [18:10:24< 6:18:53] +[titan] 2025-10-05 16:44:46,146 - root - INFO - step: 29690 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:44:46,146 - root - INFO - lr: 1.2116e-05 gnorm: 1.14 [18:10:35< 6:18:42] +[titan] 2025-10-05 16:44:57,137 - root - INFO - step: 29695 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:44:57,138 - root - INFO - lr: 1.2109e-05 gnorm: 1.16 [18:10:46< 6:18:31] +[titan] 2025-10-05 16:44:59,499 - root - INFO - Dumping profiler traces at step 29696 +[titan] 2025-10-05 16:44:59,539 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:45:06,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:45:08,245 - root - INFO - step: 29700 loss: 2.0615 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.30 mfu: 41.38% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8226 +[titan] 2025-10-05 16:45:08,245 - root - INFO - lr: 1.2103e-05 gnorm: 1.15 [18:10:57< 6:18:20] +[titan] 2025-10-05 16:45:19,144 - root - INFO - step: 29705 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 16:45:19,144 - root - INFO - lr: 1.2096e-05 gnorm: 1.11 [18:11:08< 6:18:09] +[titan] 2025-10-05 16:45:30,019 - root - INFO - step: 29710 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 16:45:30,019 - root - INFO - lr: 1.2090e-05 gnorm: 1.15 [18:11:19< 6:17:58] +[titan] 2025-10-05 16:45:40,886 - root - INFO - step: 29715 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:45:40,886 - root - INFO - lr: 1.2083e-05 gnorm: 1.09 [18:11:29< 6:17:47] +[titan] 2025-10-05 16:45:51,774 - root - INFO - step: 29720 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8207 +[titan] 2025-10-05 16:45:51,775 - root - INFO - lr: 1.2077e-05 gnorm: 1.13 [18:11:40< 6:17:36] +[titan] 2025-10-05 16:46:02,667 - root - INFO - step: 29725 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 16:46:02,667 - root - INFO - lr: 1.2070e-05 gnorm: 1.11 [18:11:51< 6:17:25] +[titan] 2025-10-05 16:46:13,605 - root - INFO - step: 29730 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 16:46:13,605 - root - INFO - lr: 1.2064e-05 gnorm: 1.10 [18:12:02< 6:17:14] +[titan] 2025-10-05 16:46:24,504 - root - INFO - step: 29735 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 16:46:24,505 - root - INFO - lr: 1.2057e-05 gnorm: 1.14 [18:12:13< 6:17:03] +[titan] 2025-10-05 16:46:35,396 - root - INFO - step: 29740 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 16:46:35,397 - root - INFO - lr: 1.2051e-05 gnorm: 1.16 [18:12:24< 6:16:52] +[titan] 2025-10-05 16:46:46,263 - root - INFO - step: 29745 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:46:46,263 - root - INFO - lr: 1.2044e-05 gnorm: 1.14 [18:12:35< 6:16:41] +[titan] 2025-10-05 16:46:54,956 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:46:57,142 - root - INFO - step: 29750 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:46:57,142 - root - INFO - lr: 1.2038e-05 gnorm: 1.14 [18:12:46< 6:16:30] +[titan] 2025-10-05 16:47:08,011 - root - INFO - step: 29755 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 16:47:08,011 - root - INFO - lr: 1.2031e-05 gnorm: 1.14 [18:12:57< 6:16:18] +[titan] 2025-10-05 16:47:18,928 - root - INFO - step: 29760 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 16:47:18,929 - root - INFO - lr: 1.2025e-05 gnorm: 1.15 [18:13:07< 6:16:07] +[titan] 2025-10-05 16:47:29,805 - root - INFO - step: 29765 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 16:47:29,806 - root - INFO - lr: 1.2018e-05 gnorm: 1.11 [18:13:18< 6:15:56] +[titan] 2025-10-05 16:47:40,695 - root - INFO - step: 29770 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 16:47:40,695 - root - INFO - lr: 1.2012e-05 gnorm: 1.12 [18:13:29< 6:15:45] +[titan] 2025-10-05 16:47:51,568 - root - INFO - step: 29775 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 16:47:51,568 - root - INFO - lr: 1.2005e-05 gnorm: 1.13 [18:13:40< 6:15:34] +[titan] 2025-10-05 16:48:02,434 - root - INFO - step: 29780 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 16:48:02,435 - root - INFO - lr: 1.1999e-05 gnorm: 1.13 [18:13:51< 6:15:23] +[titan] 2025-10-05 16:48:13,326 - root - INFO - step: 29785 loss: 2.0923 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 16:48:13,326 - root - INFO - lr: 1.1992e-05 gnorm: 1.17 [18:14:02< 6:15:12] +[titan] 2025-10-05 16:48:24,246 - root - INFO - step: 29790 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 16:48:24,246 - root - INFO - lr: 1.1986e-05 gnorm: 1.21 [18:14:13< 6:15:01] +[titan] 2025-10-05 16:48:35,115 - root - INFO - step: 29795 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7438 +[titan] 2025-10-05 16:48:35,115 - root - INFO - lr: 1.1979e-05 gnorm: 1.16 [18:14:24< 6:14:50] +[titan] 2025-10-05 16:48:43,808 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:48:45,984 - root - INFO - step: 29800 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:48:45,984 - root - INFO - lr: 1.1973e-05 gnorm: 1.17 [18:14:34< 6:14:39] +[titan] 2025-10-05 16:48:56,850 - root - INFO - step: 29805 loss: 2.0467 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 16:48:56,850 - root - INFO - lr: 1.1966e-05 gnorm: 1.13 [18:14:45< 6:14:28] +[titan] 2025-10-05 16:49:07,720 - root - INFO - step: 29810 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 16:49:07,720 - root - INFO - lr: 1.1960e-05 gnorm: 1.14 [18:14:56< 6:14:17] +[titan] 2025-10-05 16:49:18,594 - root - INFO - step: 29815 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 16:49:18,594 - root - INFO - lr: 1.1954e-05 gnorm: 1.11 [18:15:07< 6:14:06] +[titan] 2025-10-05 16:49:29,475 - root - INFO - step: 29820 loss: 2.0086 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7751 +[titan] 2025-10-05 16:49:29,475 - root - INFO - lr: 1.1947e-05 gnorm: 1.16 [18:15:18< 6:13:55] +[titan] 2025-10-05 16:49:40,387 - root - INFO - step: 29825 loss: 1.9867 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7563 +[titan] 2025-10-05 16:49:40,388 - root - INFO - lr: 1.1941e-05 gnorm: 1.10 [18:15:29< 6:13:44] +[titan] 2025-10-05 16:49:51,279 - root - INFO - step: 29830 loss: 1.9675 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 16:49:51,279 - root - INFO - lr: 1.1934e-05 gnorm: 1.09 [18:15:40< 6:13:32] +[titan] 2025-10-05 16:50:02,138 - root - INFO - step: 29835 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7622 +[titan] 2025-10-05 16:50:02,138 - root - INFO - lr: 1.1928e-05 gnorm: 1.12 [18:15:51< 6:13:21] +[titan] 2025-10-05 16:50:13,006 - root - INFO - step: 29840 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 16:50:13,006 - root - INFO - lr: 1.1921e-05 gnorm: 1.13 [18:16:02< 6:13:10] +[titan] 2025-10-05 16:50:23,932 - root - INFO - step: 29845 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6997 +[titan] 2025-10-05 16:50:23,933 - root - INFO - lr: 1.1915e-05 gnorm: 1.10 [18:16:12< 6:12:59] +[titan] 2025-10-05 16:50:32,610 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:50:34,782 - root - INFO - step: 29850 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8188 +[titan] 2025-10-05 16:50:34,782 - root - INFO - lr: 1.1908e-05 gnorm: 1.18 [18:16:23< 6:12:48] +[titan] 2025-10-05 16:50:45,679 - root - INFO - step: 29855 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7933 +[titan] 2025-10-05 16:50:45,680 - root - INFO - lr: 1.1902e-05 gnorm: 1.17 [18:16:34< 6:12:37] +[titan] 2025-10-05 16:50:56,541 - root - INFO - step: 29860 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:50:56,541 - root - INFO - lr: 1.1896e-05 gnorm: 1.11 [18:16:45< 6:12:26] +[titan] 2025-10-05 16:51:07,402 - root - INFO - step: 29865 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 16:51:07,402 - root - INFO - lr: 1.1889e-05 gnorm: 1.18 [18:16:56< 6:12:15] +[titan] 2025-10-05 16:51:18,320 - root - INFO - step: 29870 loss: 1.9395 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 16:51:18,320 - root - INFO - lr: 1.1883e-05 gnorm: 1.13 [18:17:07< 6:12:04] +[titan] 2025-10-05 16:51:29,178 - root - INFO - step: 29875 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 16:51:29,178 - root - INFO - lr: 1.1876e-05 gnorm: 1.13 [18:17:18< 6:11:53] +[titan] 2025-10-05 16:51:40,033 - root - INFO - step: 29880 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 16:51:40,033 - root - INFO - lr: 1.1870e-05 gnorm: 1.12 [18:17:29< 6:11:42] +[titan] 2025-10-05 16:51:50,881 - root - INFO - step: 29885 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 16:51:50,881 - root - INFO - lr: 1.1863e-05 gnorm: 1.10 [18:17:39< 6:11:31] +[titan] 2025-10-05 16:52:01,762 - root - INFO - step: 29890 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 16:52:01,762 - root - INFO - lr: 1.1857e-05 gnorm: 1.15 [18:17:50< 6:11:20] +[titan] 2025-10-05 16:52:12,608 - root - INFO - step: 29895 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 16:52:12,608 - root - INFO - lr: 1.1851e-05 gnorm: 1.13 [18:18:01< 6:11:09] +[titan] 2025-10-05 16:52:21,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:52:23,480 - root - INFO - step: 29900 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 16:52:23,481 - root - INFO - lr: 1.1844e-05 gnorm: 1.13 [18:18:12< 6:10:57] +[titan] 2025-10-05 16:52:34,301 - root - INFO - step: 29905 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 16:52:34,301 - root - INFO - lr: 1.1838e-05 gnorm: 1.15 [18:18:23< 6:10:46] +[titan] 2025-10-05 16:52:45,148 - root - INFO - step: 29910 loss: 1.9512 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 16:52:45,148 - root - INFO - lr: 1.1831e-05 gnorm: 1.11 [18:18:34< 6:10:35] +[titan] 2025-10-05 16:52:55,998 - root - INFO - step: 29915 loss: 2.0610 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8211 +[titan] 2025-10-05 16:52:55,998 - root - INFO - lr: 1.1825e-05 gnorm: 1.13 [18:18:45< 6:10:24] +[titan] 2025-10-05 16:53:06,867 - root - INFO - step: 29920 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 16:53:06,867 - root - INFO - lr: 1.1819e-05 gnorm: 1.12 [18:18:55< 6:10:13] +[titan] 2025-10-05 16:53:17,736 - root - INFO - step: 29925 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 16:53:17,736 - root - INFO - lr: 1.1812e-05 gnorm: 1.12 [18:19:06< 6:10:02] +[titan] 2025-10-05 16:53:28,570 - root - INFO - step: 29930 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7831 +[titan] 2025-10-05 16:53:28,570 - root - INFO - lr: 1.1806e-05 gnorm: 1.12 [18:19:17< 6:09:51] +[titan] 2025-10-05 16:53:39,418 - root - INFO - step: 29935 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 16:53:39,418 - root - INFO - lr: 1.1799e-05 gnorm: 1.25 [18:19:28< 6:09:40] +[titan] 2025-10-05 16:53:50,272 - root - INFO - step: 29940 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 16:53:50,272 - root - INFO - lr: 1.1793e-05 gnorm: 1.12 [18:19:39< 6:09:29] +[titan] 2025-10-05 16:54:01,117 - root - INFO - step: 29945 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:54:01,117 - root - INFO - lr: 1.1787e-05 gnorm: 1.14 [18:19:50< 6:09:18] +[titan] 2025-10-05 16:54:09,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:54:12,029 - root - INFO - step: 29950 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 16:54:12,029 - root - INFO - lr: 1.1780e-05 gnorm: 1.18 [18:20:01< 6:09:07] +[titan] 2025-10-05 16:54:22,840 - root - INFO - step: 29955 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 16:54:22,840 - root - INFO - lr: 1.1774e-05 gnorm: 1.14 [18:20:11< 6:08:56] +[titan] 2025-10-05 16:54:33,694 - root - INFO - step: 29960 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 16:54:33,694 - root - INFO - lr: 1.1767e-05 gnorm: 1.14 [18:20:22< 6:08:45] +[titan] 2025-10-05 16:54:44,540 - root - INFO - step: 29965 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 16:54:44,540 - root - INFO - lr: 1.1761e-05 gnorm: 1.14 [18:20:33< 6:08:34] +[titan] 2025-10-05 16:54:55,380 - root - INFO - step: 29970 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 16:54:55,380 - root - INFO - lr: 1.1755e-05 gnorm: 1.13 [18:20:44< 6:08:22] +[titan] 2025-10-05 16:55:06,200 - root - INFO - step: 29975 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7975 +[titan] 2025-10-05 16:55:06,200 - root - INFO - lr: 1.1748e-05 gnorm: 1.16 [18:20:55< 6:08:11] +[titan] 2025-10-05 16:55:17,035 - root - INFO - step: 29980 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 16:55:17,035 - root - INFO - lr: 1.1742e-05 gnorm: 1.16 [18:21:06< 6:08:00] +[titan] 2025-10-05 16:55:27,861 - root - INFO - step: 29985 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 16:55:27,861 - root - INFO - lr: 1.1736e-05 gnorm: 1.11 [18:21:16< 6:07:49] +[titan] 2025-10-05 16:55:38,685 - root - INFO - step: 29990 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 16:55:38,685 - root - INFO - lr: 1.1729e-05 gnorm: 1.08 [18:21:27< 6:07:38] +[titan] 2025-10-05 16:55:49,531 - root - INFO - step: 29995 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 16:55:49,531 - root - INFO - lr: 1.1723e-05 gnorm: 1.11 [18:21:38< 6:07:27] +[titan] 2025-10-05 16:55:58,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:56:00,346 - root - INFO - step: 30000 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 16:56:00,346 - root - INFO - lr: 1.1716e-05 gnorm: 1.14 [18:21:49< 6:07:16] +[titan] 2025-10-05 16:56:00,346 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 16:56:17,594 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 16:56:17,594 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.25 seconds. +[titan] 2025-10-05 16:58:26,179 - root - INFO - step: 30005 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 2,247 tflops: 31.17 mfu: 3.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 16:58:26,179 - root - INFO - lr: 1.1710e-05 gnorm: 1.15 [18:24:15< 6:07:50] +[titan] 2025-10-05 16:58:36,943 - root - INFO - step: 30010 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,443 tflops: 422.35 mfu: 42.71% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 16:58:36,943 - root - INFO - lr: 1.1704e-05 gnorm: 1.13 [18:24:25< 6:07:39] +[titan] 2025-10-05 16:58:47,757 - root - INFO - step: 30015 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7859 +[titan] 2025-10-05 16:58:47,757 - root - INFO - lr: 1.1697e-05 gnorm: 1.19 [18:24:36< 6:07:28] +[titan] 2025-10-05 16:58:58,551 - root - INFO - step: 30020 loss: 2.0398 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.19 mfu: 42.59% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 16:58:58,551 - root - INFO - lr: 1.1691e-05 gnorm: 1.16 [18:24:47< 6:07:16] +[titan] 2025-10-05 16:59:09,338 - root - INFO - step: 30025 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,378 tflops: 421.45 mfu: 42.61% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7804 +[titan] 2025-10-05 16:59:09,338 - root - INFO - lr: 1.1685e-05 gnorm: 1.17 [18:24:58< 6:07:05] +[titan] 2025-10-05 16:59:20,123 - root - INFO - step: 30030 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,384 tflops: 421.53 mfu: 42.62% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 16:59:20,123 - root - INFO - lr: 1.1678e-05 gnorm: 1.14 [18:25:09< 6:06:54] +[titan] 2025-10-05 16:59:30,956 - root - INFO - step: 30035 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 16:59:30,956 - root - INFO - lr: 1.1672e-05 gnorm: 1.17 [18:25:19< 6:06:43] +[titan] 2025-10-05 16:59:41,784 - root - INFO - step: 30040 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7750 +[titan] 2025-10-05 16:59:41,784 - root - INFO - lr: 1.1666e-05 gnorm: 1.10 [18:25:30< 6:06:32] +[titan] 2025-10-05 16:59:52,578 - root - INFO - step: 30045 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.18 mfu: 42.59% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 16:59:52,578 - root - INFO - lr: 1.1659e-05 gnorm: 1.20 [18:25:41< 6:06:21] +[titan] 2025-10-05 17:00:01,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:00:03,430 - root - INFO - step: 30050 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:00:03,431 - root - INFO - lr: 1.1653e-05 gnorm: 1.13 [18:25:52< 6:06:10] +[titan] 2025-10-05 17:00:14,272 - root - INFO - step: 30055 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:00:14,273 - root - INFO - lr: 1.1647e-05 gnorm: 1.14 [18:26:03< 6:05:59] +[titan] 2025-10-05 17:00:25,096 - root - INFO - step: 30060 loss: 2.0424 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 17:00:25,096 - root - INFO - lr: 1.1640e-05 gnorm: 1.13 [18:26:14< 6:05:48] +[titan] 2025-10-05 17:00:35,911 - root - INFO - step: 30065 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.37 mfu: 42.50% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 17:00:35,911 - root - INFO - lr: 1.1634e-05 gnorm: 1.13 [18:26:24< 6:05:36] +[titan] 2025-10-05 17:00:46,749 - root - INFO - step: 30070 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:00:46,749 - root - INFO - lr: 1.1628e-05 gnorm: 1.12 [18:26:35< 6:05:25] +[titan] 2025-10-05 17:00:57,558 - root - INFO - step: 30075 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 17:00:57,558 - root - INFO - lr: 1.1621e-05 gnorm: 1.11 [18:26:46< 6:05:14] +[titan] 2025-10-05 17:01:08,392 - root - INFO - step: 30080 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7726 +[titan] 2025-10-05 17:01:08,392 - root - INFO - lr: 1.1615e-05 gnorm: 1.15 [18:26:57< 6:05:03] +[titan] 2025-10-05 17:01:19,229 - root - INFO - step: 30085 loss: 2.0397 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 17:01:19,229 - root - INFO - lr: 1.1609e-05 gnorm: 1.15 [18:27:08< 6:04:52] +[titan] 2025-10-05 17:01:30,104 - root - INFO - step: 30090 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:01:30,104 - root - INFO - lr: 1.1602e-05 gnorm: 1.11 [18:27:19< 6:04:41] +[titan] 2025-10-05 17:01:40,932 - root - INFO - step: 30095 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 17:01:40,932 - root - INFO - lr: 1.1596e-05 gnorm: 1.14 [18:27:29< 6:04:30] +[titan] 2025-10-05 17:01:49,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:01:51,740 - root - INFO - step: 30100 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:01:51,740 - root - INFO - lr: 1.1590e-05 gnorm: 1.12 [18:27:40< 6:04:19] +[titan] 2025-10-05 17:02:02,591 - root - INFO - step: 30105 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7463 +[titan] 2025-10-05 17:02:02,591 - root - INFO - lr: 1.1583e-05 gnorm: 1.13 [18:27:51< 6:04:08] +[titan] 2025-10-05 17:02:13,423 - root - INFO - step: 30110 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 17:02:13,423 - root - INFO - lr: 1.1577e-05 gnorm: 1.16 [18:28:02< 6:03:56] +[titan] 2025-10-05 17:02:24,227 - root - INFO - step: 30115 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.78 mfu: 42.55% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 17:02:24,227 - root - INFO - lr: 1.1571e-05 gnorm: 1.12 [18:28:13< 6:03:45] +[titan] 2025-10-05 17:02:35,077 - root - INFO - step: 30120 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 17:02:35,077 - root - INFO - lr: 1.1565e-05 gnorm: 1.14 [18:28:24< 6:03:34] +[titan] 2025-10-05 17:02:45,895 - root - INFO - step: 30125 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 17:02:45,895 - root - INFO - lr: 1.1558e-05 gnorm: 1.13 [18:28:34< 6:03:23] +[titan] 2025-10-05 17:02:56,710 - root - INFO - step: 30130 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 17:02:56,710 - root - INFO - lr: 1.1552e-05 gnorm: 1.13 [18:28:45< 6:03:12] +[titan] 2025-10-05 17:03:07,565 - root - INFO - step: 30135 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8142 +[titan] 2025-10-05 17:03:07,565 - root - INFO - lr: 1.1546e-05 gnorm: 1.11 [18:28:56< 6:03:01] +[titan] 2025-10-05 17:03:18,382 - root - INFO - step: 30140 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 17:03:18,382 - root - INFO - lr: 1.1539e-05 gnorm: 1.21 [18:29:07< 6:02:50] +[titan] 2025-10-05 17:03:29,277 - root - INFO - step: 30145 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 17:03:29,277 - root - INFO - lr: 1.1533e-05 gnorm: 1.14 [18:29:18< 6:02:39] +[titan] 2025-10-05 17:03:37,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:03:40,104 - root - INFO - step: 30150 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 17:03:40,104 - root - INFO - lr: 1.1527e-05 gnorm: 1.13 [18:29:29< 6:02:28] +[titan] 2025-10-05 17:03:50,940 - root - INFO - step: 30155 loss: 2.0613 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:03:50,940 - root - INFO - lr: 1.1521e-05 gnorm: 1.15 [18:29:39< 6:02:17] +[titan] 2025-10-05 17:04:01,762 - root - INFO - step: 30160 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 17:04:01,762 - root - INFO - lr: 1.1514e-05 gnorm: 1.14 [18:29:50< 6:02:05] +[titan] 2025-10-05 17:04:12,567 - root - INFO - step: 30165 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 17:04:12,567 - root - INFO - lr: 1.1508e-05 gnorm: 1.12 [18:30:01< 6:01:54] +[titan] 2025-10-05 17:04:23,420 - root - INFO - step: 30170 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:04:23,420 - root - INFO - lr: 1.1502e-05 gnorm: 1.12 [18:30:12< 6:01:43] +[titan] 2025-10-05 17:04:34,282 - root - INFO - step: 30175 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 17:04:34,282 - root - INFO - lr: 1.1495e-05 gnorm: 1.12 [18:30:23< 6:01:32] +[titan] 2025-10-05 17:04:45,111 - root - INFO - step: 30180 loss: 1.9784 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:04:45,111 - root - INFO - lr: 1.1489e-05 gnorm: 1.16 [18:30:34< 6:01:21] +[titan] 2025-10-05 17:04:55,961 - root - INFO - step: 30185 loss: 2.0025 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:04:55,962 - root - INFO - lr: 1.1483e-05 gnorm: 1.13 [18:30:44< 6:01:10] +[titan] 2025-10-05 17:05:06,781 - root - INFO - step: 30190 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.18 mfu: 42.48% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 17:05:06,781 - root - INFO - lr: 1.1477e-05 gnorm: 1.16 [18:30:55< 6:00:59] +[titan] 2025-10-05 17:05:17,581 - root - INFO - step: 30195 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:05:17,581 - root - INFO - lr: 1.1470e-05 gnorm: 1.16 [18:31:06< 6:00:48] +[titan] 2025-10-05 17:05:26,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:05:28,480 - root - INFO - step: 30200 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 17:05:28,480 - root - INFO - lr: 1.1464e-05 gnorm: 1.13 [18:31:17< 6:00:37] +[titan] 2025-10-05 17:05:39,462 - root - INFO - step: 30205 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 29,840 tflops: 413.98 mfu: 41.86% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 17:05:39,462 - root - INFO - lr: 1.1458e-05 gnorm: 1.11 [18:31:28< 6:00:25] +[titan] 2025-10-05 17:05:46,166 - root - INFO - Dumping profiler traces at step 30208 +[titan] 2025-10-05 17:05:46,205 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:05:50,671 - root - INFO - step: 30210 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 29,233 tflops: 405.56 mfu: 41.01% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7633 +[titan] 2025-10-05 17:05:50,672 - root - INFO - lr: 1.1452e-05 gnorm: 1.14 [18:31:39< 6:00:15] +[titan] 2025-10-05 17:06:01,511 - root - INFO - step: 30215 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 17:06:01,511 - root - INFO - lr: 1.1445e-05 gnorm: 1.17 [18:31:50< 6:00:03] +[titan] 2025-10-05 17:06:12,360 - root - INFO - step: 30220 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7378 +[titan] 2025-10-05 17:06:12,360 - root - INFO - lr: 1.1439e-05 gnorm: 1.11 [18:32:01< 5:59:52] +[titan] 2025-10-05 17:06:23,184 - root - INFO - step: 30225 loss: 2.0049 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 17:06:23,184 - root - INFO - lr: 1.1433e-05 gnorm: 1.13 [18:32:12< 5:59:41] +[titan] 2025-10-05 17:06:34,073 - root - INFO - step: 30230 loss: 1.9745 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 17:06:34,073 - root - INFO - lr: 1.1427e-05 gnorm: 1.15 [18:32:23< 5:59:30] +[titan] 2025-10-05 17:06:44,900 - root - INFO - step: 30235 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7525 +[titan] 2025-10-05 17:06:44,900 - root - INFO - lr: 1.1420e-05 gnorm: 1.11 [18:32:33< 5:59:19] +[titan] 2025-10-05 17:06:55,740 - root - INFO - step: 30240 loss: 1.9188 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 17:06:55,740 - root - INFO - lr: 1.1414e-05 gnorm: 1.16 [18:32:44< 5:59:08] +[titan] 2025-10-05 17:07:06,541 - root - INFO - step: 30245 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:07:06,541 - root - INFO - lr: 1.1408e-05 gnorm: 1.13 [18:32:55< 5:58:57] +[titan] 2025-10-05 17:07:15,217 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:07:17,391 - root - INFO - step: 30250 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 17:07:17,392 - root - INFO - lr: 1.1402e-05 gnorm: 1.17 [18:33:06< 5:58:46] +[titan] 2025-10-05 17:07:28,241 - root - INFO - step: 30255 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 17:07:28,241 - root - INFO - lr: 1.1395e-05 gnorm: 1.18 [18:33:17< 5:58:35] +[titan] 2025-10-05 17:07:39,102 - root - INFO - step: 30260 loss: 2.0013 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 17:07:39,103 - root - INFO - lr: 1.1389e-05 gnorm: 1.12 [18:33:28< 5:58:23] +[titan] 2025-10-05 17:07:49,999 - root - INFO - step: 30265 loss: 1.9338 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 17:07:49,999 - root - INFO - lr: 1.1383e-05 gnorm: 1.16 [18:33:38< 5:58:12] +[titan] 2025-10-05 17:08:00,848 - root - INFO - step: 30270 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 17:08:00,848 - root - INFO - lr: 1.1377e-05 gnorm: 1.17 [18:33:49< 5:58:01] +[titan] 2025-10-05 17:08:11,692 - root - INFO - step: 30275 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 17:08:11,692 - root - INFO - lr: 1.1370e-05 gnorm: 1.17 [18:34:00< 5:57:50] +[titan] 2025-10-05 17:08:22,552 - root - INFO - step: 30280 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:08:22,552 - root - INFO - lr: 1.1364e-05 gnorm: 1.18 [18:34:11< 5:57:39] +[titan] 2025-10-05 17:08:33,450 - root - INFO - step: 30285 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 17:08:33,450 - root - INFO - lr: 1.1358e-05 gnorm: 1.11 [18:34:22< 5:57:28] +[titan] 2025-10-05 17:08:44,280 - root - INFO - step: 30290 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 17:08:44,280 - root - INFO - lr: 1.1352e-05 gnorm: 1.13 [18:34:33< 5:57:17] +[titan] 2025-10-05 17:08:55,139 - root - INFO - step: 30295 loss: 2.0245 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 17:08:55,139 - root - INFO - lr: 1.1346e-05 gnorm: 1.13 [18:34:44< 5:57:06] +[titan] 2025-10-05 17:09:03,787 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:09:05,964 - root - INFO - step: 30300 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 17:09:05,964 - root - INFO - lr: 1.1339e-05 gnorm: 1.17 [18:34:54< 5:56:55] +[titan] 2025-10-05 17:09:16,818 - root - INFO - step: 30305 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 17:09:16,818 - root - INFO - lr: 1.1333e-05 gnorm: 1.16 [18:35:05< 5:56:44] +[titan] 2025-10-05 17:09:27,662 - root - INFO - step: 30310 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 17:09:27,662 - root - INFO - lr: 1.1327e-05 gnorm: 1.15 [18:35:16< 5:56:33] +[titan] 2025-10-05 17:09:38,520 - root - INFO - step: 30315 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 17:09:38,520 - root - INFO - lr: 1.1321e-05 gnorm: 1.14 [18:35:27< 5:56:21] +[titan] 2025-10-05 17:09:49,395 - root - INFO - step: 30320 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 17:09:49,395 - root - INFO - lr: 1.1315e-05 gnorm: 1.14 [18:35:38< 5:56:10] +[titan] 2025-10-05 17:10:00,277 - root - INFO - step: 30325 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 17:10:00,277 - root - INFO - lr: 1.1308e-05 gnorm: 1.15 [18:35:49< 5:55:59] +[titan] 2025-10-05 17:10:11,173 - root - INFO - step: 30330 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:10:11,174 - root - INFO - lr: 1.1302e-05 gnorm: 1.15 [18:36:00< 5:55:48] +[titan] 2025-10-05 17:10:22,000 - root - INFO - step: 30335 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 17:10:22,000 - root - INFO - lr: 1.1296e-05 gnorm: 1.18 [18:36:10< 5:55:37] +[titan] 2025-10-05 17:10:32,877 - root - INFO - step: 30340 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 17:10:32,877 - root - INFO - lr: 1.1290e-05 gnorm: 1.13 [18:36:21< 5:55:26] +[titan] 2025-10-05 17:10:43,769 - root - INFO - step: 30345 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:10:43,769 - root - INFO - lr: 1.1284e-05 gnorm: 1.15 [18:36:32< 5:55:15] +[titan] 2025-10-05 17:10:52,407 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:10:54,603 - root - INFO - step: 30350 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 17:10:54,603 - root - INFO - lr: 1.1277e-05 gnorm: 1.15 [18:36:43< 5:55:04] +[titan] 2025-10-05 17:11:05,438 - root - INFO - step: 30355 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:11:05,438 - root - INFO - lr: 1.1271e-05 gnorm: 1.14 [18:36:54< 5:54:53] +[titan] 2025-10-05 17:11:16,300 - root - INFO - step: 30360 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 17:11:16,300 - root - INFO - lr: 1.1265e-05 gnorm: 1.11 [18:37:05< 5:54:42] +[titan] 2025-10-05 17:11:27,159 - root - INFO - step: 30365 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:11:27,159 - root - INFO - lr: 1.1259e-05 gnorm: 1.11 [18:37:16< 5:54:30] +[titan] 2025-10-05 17:11:38,071 - root - INFO - step: 30370 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 17:11:38,071 - root - INFO - lr: 1.1253e-05 gnorm: 1.18 [18:37:27< 5:54:19] +[titan] 2025-10-05 17:11:48,937 - root - INFO - step: 30375 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:11:48,937 - root - INFO - lr: 1.1247e-05 gnorm: 1.15 [18:37:37< 5:54:08] +[titan] 2025-10-05 17:11:59,780 - root - INFO - step: 30380 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 17:11:59,780 - root - INFO - lr: 1.1240e-05 gnorm: 1.13 [18:37:48< 5:53:57] +[titan] 2025-10-05 17:12:10,619 - root - INFO - step: 30385 loss: 1.9947 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 17:12:10,620 - root - INFO - lr: 1.1234e-05 gnorm: 1.15 [18:37:59< 5:53:46] +[titan] 2025-10-05 17:12:21,479 - root - INFO - step: 30390 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 17:12:21,479 - root - INFO - lr: 1.1228e-05 gnorm: 1.11 [18:38:10< 5:53:35] +[titan] 2025-10-05 17:12:32,330 - root - INFO - step: 30395 loss: 1.9584 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:12:32,330 - root - INFO - lr: 1.1222e-05 gnorm: 1.12 [18:38:21< 5:53:24] +[titan] 2025-10-05 17:12:41,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:12:43,230 - root - INFO - step: 30400 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 17:12:43,230 - root - INFO - lr: 1.1216e-05 gnorm: 1.16 [18:38:32< 5:53:13] +[titan] 2025-10-05 17:12:54,073 - root - INFO - step: 30405 loss: 1.9890 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 17:12:54,073 - root - INFO - lr: 1.1210e-05 gnorm: 1.19 [18:38:43< 5:53:02] +[titan] 2025-10-05 17:13:04,941 - root - INFO - step: 30410 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 17:13:04,942 - root - INFO - lr: 1.1203e-05 gnorm: 1.16 [18:38:53< 5:52:51] +[titan] 2025-10-05 17:13:15,791 - root - INFO - step: 30415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 17:13:15,791 - root - INFO - lr: 1.1197e-05 gnorm: 1.18 [18:39:04< 5:52:40] +[titan] 2025-10-05 17:13:26,642 - root - INFO - step: 30420 loss: 2.0087 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 17:13:26,642 - root - INFO - lr: 1.1191e-05 gnorm: 1.13 [18:39:15< 5:52:28] +[titan] 2025-10-05 17:13:37,590 - root - INFO - step: 30425 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.26 mfu: 41.99% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 17:13:37,590 - root - INFO - lr: 1.1185e-05 gnorm: 1.13 [18:39:26< 5:52:17] +[titan] 2025-10-05 17:13:48,481 - root - INFO - step: 30430 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7083 +[titan] 2025-10-05 17:13:48,481 - root - INFO - lr: 1.1179e-05 gnorm: 1.22 [18:39:37< 5:52:06] +[titan] 2025-10-05 17:13:59,341 - root - INFO - step: 30435 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 17:13:59,341 - root - INFO - lr: 1.1173e-05 gnorm: 1.10 [18:39:48< 5:51:55] +[titan] 2025-10-05 17:14:10,199 - root - INFO - step: 30440 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 17:14:10,199 - root - INFO - lr: 1.1166e-05 gnorm: 1.15 [18:39:59< 5:51:44] +[titan] 2025-10-05 17:14:21,050 - root - INFO - step: 30445 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:14:21,050 - root - INFO - lr: 1.1160e-05 gnorm: 1.17 [18:40:10< 5:51:33] +[titan] 2025-10-05 17:14:29,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:14:31,915 - root - INFO - step: 30450 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:14:31,915 - root - INFO - lr: 1.1154e-05 gnorm: 1.13 [18:40:20< 5:51:22] +[titan] 2025-10-05 17:14:42,853 - root - INFO - step: 30455 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 17:14:42,853 - root - INFO - lr: 1.1148e-05 gnorm: 1.15 [18:40:31< 5:51:11] +[titan] 2025-10-05 17:14:53,689 - root - INFO - step: 30460 loss: 1.9279 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:14:53,689 - root - INFO - lr: 1.1142e-05 gnorm: 1.16 [18:40:42< 5:51:00] +[titan] 2025-10-05 17:15:04,539 - root - INFO - step: 30465 loss: 1.9730 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7446 +[titan] 2025-10-05 17:15:04,539 - root - INFO - lr: 1.1136e-05 gnorm: 1.13 [18:40:53< 5:50:49] +[titan] 2025-10-05 17:15:15,418 - root - INFO - step: 30470 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 17:15:15,418 - root - INFO - lr: 1.1130e-05 gnorm: 1.20 [18:41:04< 5:50:38] +[titan] 2025-10-05 17:15:26,296 - root - INFO - step: 30475 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 17:15:26,296 - root - INFO - lr: 1.1124e-05 gnorm: 1.13 [18:41:15< 5:50:26] +[titan] 2025-10-05 17:15:37,128 - root - INFO - step: 30480 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 17:15:37,129 - root - INFO - lr: 1.1117e-05 gnorm: 1.16 [18:41:26< 5:50:15] +[titan] 2025-10-05 17:15:48,020 - root - INFO - step: 30485 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 17:15:48,020 - root - INFO - lr: 1.1111e-05 gnorm: 1.16 [18:41:36< 5:50:04] +[titan] 2025-10-05 17:15:58,881 - root - INFO - step: 30490 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 17:15:58,881 - root - INFO - lr: 1.1105e-05 gnorm: 1.13 [18:41:47< 5:49:53] +[titan] 2025-10-05 17:16:09,737 - root - INFO - step: 30495 loss: 2.0163 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7828 +[titan] 2025-10-05 17:16:09,738 - root - INFO - lr: 1.1099e-05 gnorm: 1.13 [18:41:58< 5:49:42] +[titan] 2025-10-05 17:16:18,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:16:20,594 - root - INFO - step: 30500 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:16:20,594 - root - INFO - lr: 1.1093e-05 gnorm: 1.15 [18:42:09< 5:49:31] +[titan] 2025-10-05 17:16:31,472 - root - INFO - step: 30505 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7582 +[titan] 2025-10-05 17:16:31,472 - root - INFO - lr: 1.1087e-05 gnorm: 1.19 [18:42:20< 5:49:20] +[titan] 2025-10-05 17:16:42,399 - root - INFO - step: 30510 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:16:42,399 - root - INFO - lr: 1.1081e-05 gnorm: 1.14 [18:42:31< 5:49:09] +[titan] 2025-10-05 17:16:53,259 - root - INFO - step: 30515 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 17:16:53,259 - root - INFO - lr: 1.1075e-05 gnorm: 1.15 [18:42:42< 5:48:58] +[titan] 2025-10-05 17:17:04,140 - root - INFO - step: 30520 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 17:17:04,140 - root - INFO - lr: 1.1069e-05 gnorm: 1.13 [18:42:53< 5:48:47] +[titan] 2025-10-05 17:17:14,989 - root - INFO - step: 30525 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 17:17:14,989 - root - INFO - lr: 1.1063e-05 gnorm: 1.36 [18:43:03< 5:48:36] +[titan] 2025-10-05 17:17:25,901 - root - INFO - step: 30530 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:17:25,902 - root - INFO - lr: 1.1056e-05 gnorm: 1.14 [18:43:14< 5:48:24] +[titan] 2025-10-05 17:17:36,768 - root - INFO - step: 30535 loss: 2.0575 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8191 +[titan] 2025-10-05 17:17:36,768 - root - INFO - lr: 1.1050e-05 gnorm: 1.17 [18:43:25< 5:48:13] +[titan] 2025-10-05 17:17:47,700 - root - INFO - step: 30540 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 17:17:47,700 - root - INFO - lr: 1.1044e-05 gnorm: 1.12 [18:43:36< 5:48:02] +[titan] 2025-10-05 17:17:58,569 - root - INFO - step: 30545 loss: 1.9982 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 17:17:58,569 - root - INFO - lr: 1.1038e-05 gnorm: 1.13 [18:43:47< 5:47:51] +[titan] 2025-10-05 17:18:07,246 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:18:09,461 - root - INFO - step: 30550 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7963 +[titan] 2025-10-05 17:18:09,461 - root - INFO - lr: 1.1032e-05 gnorm: 1.15 [18:43:58< 5:47:40] +[titan] 2025-10-05 17:18:20,334 - root - INFO - step: 30555 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 17:18:20,335 - root - INFO - lr: 1.1026e-05 gnorm: 1.13 [18:44:09< 5:47:29] +[titan] 2025-10-05 17:18:31,222 - root - INFO - step: 30560 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:18:31,222 - root - INFO - lr: 1.1020e-05 gnorm: 1.16 [18:44:20< 5:47:18] +[titan] 2025-10-05 17:18:42,115 - root - INFO - step: 30565 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 17:18:42,116 - root - INFO - lr: 1.1014e-05 gnorm: 1.18 [18:44:31< 5:47:07] +[titan] 2025-10-05 17:18:52,976 - root - INFO - step: 30570 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7939 +[titan] 2025-10-05 17:18:52,977 - root - INFO - lr: 1.1008e-05 gnorm: 1.15 [18:44:41< 5:46:56] +[titan] 2025-10-05 17:19:03,822 - root - INFO - step: 30575 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 17:19:03,822 - root - INFO - lr: 1.1002e-05 gnorm: 1.13 [18:44:52< 5:46:45] +[titan] 2025-10-05 17:19:14,680 - root - INFO - step: 30580 loss: 1.9714 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 17:19:14,680 - root - INFO - lr: 1.0996e-05 gnorm: 1.15 [18:45:03< 5:46:34] +[titan] 2025-10-05 17:19:25,560 - root - INFO - step: 30585 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 17:19:25,560 - root - INFO - lr: 1.0990e-05 gnorm: 1.12 [18:45:14< 5:46:23] +[titan] 2025-10-05 17:19:36,432 - root - INFO - step: 30590 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7819 +[titan] 2025-10-05 17:19:36,432 - root - INFO - lr: 1.0984e-05 gnorm: 1.18 [18:45:25< 5:46:11] +[titan] 2025-10-05 17:19:47,343 - root - INFO - step: 30595 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 17:19:47,343 - root - INFO - lr: 1.0977e-05 gnorm: 1.12 [18:45:36< 5:46:00] +[titan] 2025-10-05 17:19:56,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:19:58,231 - root - INFO - step: 30600 loss: 2.0557 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 17:19:58,231 - root - INFO - lr: 1.0971e-05 gnorm: 1.17 [18:45:47< 5:45:49] +[titan] 2025-10-05 17:20:09,100 - root - INFO - step: 30605 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:20:09,100 - root - INFO - lr: 1.0965e-05 gnorm: 1.15 [18:45:58< 5:45:38] +[titan] 2025-10-05 17:20:19,957 - root - INFO - step: 30610 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 17:20:19,957 - root - INFO - lr: 1.0959e-05 gnorm: 1.11 [18:46:08< 5:45:27] +[titan] 2025-10-05 17:20:30,886 - root - INFO - step: 30615 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 17:20:30,886 - root - INFO - lr: 1.0953e-05 gnorm: 1.14 [18:46:19< 5:45:16] +[titan] 2025-10-05 17:20:41,762 - root - INFO - step: 30620 loss: 1.9612 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:20:41,763 - root - INFO - lr: 1.0947e-05 gnorm: 1.19 [18:46:30< 5:45:05] +[titan] 2025-10-05 17:20:52,672 - root - INFO - step: 30625 loss: 1.9688 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7412 +[titan] 2025-10-05 17:20:52,672 - root - INFO - lr: 1.0941e-05 gnorm: 1.14 [18:46:41< 5:44:54] +[titan] 2025-10-05 17:21:03,551 - root - INFO - step: 30630 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 17:21:03,551 - root - INFO - lr: 1.0935e-05 gnorm: 1.13 [18:46:52< 5:44:43] +[titan] 2025-10-05 17:21:14,413 - root - INFO - step: 30635 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 17:21:14,413 - root - INFO - lr: 1.0929e-05 gnorm: 1.13 [18:47:03< 5:44:32] +[titan] 2025-10-05 17:21:25,276 - root - INFO - step: 30640 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 17:21:25,276 - root - INFO - lr: 1.0923e-05 gnorm: 1.18 [18:47:14< 5:44:21] +[titan] 2025-10-05 17:21:36,129 - root - INFO - step: 30645 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 17:21:36,129 - root - INFO - lr: 1.0917e-05 gnorm: 1.13 [18:47:25< 5:44:10] +[titan] 2025-10-05 17:21:44,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:21:47,049 - root - INFO - step: 30650 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:21:47,049 - root - INFO - lr: 1.0911e-05 gnorm: 1.12 [18:47:35< 5:43:58] +[titan] 2025-10-05 17:21:57,919 - root - INFO - step: 30655 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 17:21:57,919 - root - INFO - lr: 1.0905e-05 gnorm: 1.17 [18:47:46< 5:43:47] +[titan] 2025-10-05 17:22:08,772 - root - INFO - step: 30660 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:22:08,772 - root - INFO - lr: 1.0899e-05 gnorm: 1.14 [18:47:57< 5:43:36] +[titan] 2025-10-05 17:22:19,639 - root - INFO - step: 30665 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7649 +[titan] 2025-10-05 17:22:19,639 - root - INFO - lr: 1.0893e-05 gnorm: 1.17 [18:48:08< 5:43:25] +[titan] 2025-10-05 17:22:30,511 - root - INFO - step: 30670 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 17:22:30,511 - root - INFO - lr: 1.0887e-05 gnorm: 1.15 [18:48:19< 5:43:14] +[titan] 2025-10-05 17:22:41,385 - root - INFO - step: 30675 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 17:22:41,385 - root - INFO - lr: 1.0881e-05 gnorm: 1.13 [18:48:30< 5:43:03] +[titan] 2025-10-05 17:22:52,312 - root - INFO - step: 30680 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:22:52,312 - root - INFO - lr: 1.0875e-05 gnorm: 1.15 [18:48:41< 5:42:52] +[titan] 2025-10-05 17:23:03,165 - root - INFO - step: 30685 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 17:23:03,165 - root - INFO - lr: 1.0869e-05 gnorm: 1.13 [18:48:52< 5:42:41] +[titan] 2025-10-05 17:23:14,020 - root - INFO - step: 30690 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7825 +[titan] 2025-10-05 17:23:14,020 - root - INFO - lr: 1.0863e-05 gnorm: 1.14 [18:49:02< 5:42:30] +[titan] 2025-10-05 17:23:24,876 - root - INFO - step: 30695 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:23:24,876 - root - INFO - lr: 1.0857e-05 gnorm: 1.15 [18:49:13< 5:42:19] +[titan] 2025-10-05 17:23:33,557 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:23:35,744 - root - INFO - step: 30700 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:23:35,744 - root - INFO - lr: 1.0851e-05 gnorm: 1.12 [18:49:24< 5:42:08] +[titan] 2025-10-05 17:23:46,630 - root - INFO - step: 30705 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 17:23:46,630 - root - INFO - lr: 1.0845e-05 gnorm: 1.14 [18:49:35< 5:41:56] +[titan] 2025-10-05 17:23:57,506 - root - INFO - step: 30710 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:23:57,506 - root - INFO - lr: 1.0839e-05 gnorm: 1.17 [18:49:46< 5:41:45] +[titan] 2025-10-05 17:24:08,364 - root - INFO - step: 30715 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:24:08,364 - root - INFO - lr: 1.0833e-05 gnorm: 1.13 [18:49:57< 5:41:34] +[titan] 2025-10-05 17:24:19,332 - root - INFO - step: 30720 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 29,876 tflops: 414.48 mfu: 41.91% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7148 +[titan] 2025-10-05 17:24:19,332 - root - INFO - lr: 1.0827e-05 gnorm: 1.11 [18:50:08< 5:41:23] +[titan] 2025-10-05 17:24:19,514 - root - INFO - Dumping profiler traces at step 30720 +[titan] 2025-10-05 17:24:19,553 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:24:30,456 - root - INFO - step: 30725 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 29,458 tflops: 408.69 mfu: 41.32% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 17:24:30,456 - root - INFO - lr: 1.0821e-05 gnorm: 1.13 [18:50:19< 5:41:12] +[titan] 2025-10-05 17:24:41,338 - root - INFO - step: 30730 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 17:24:41,338 - root - INFO - lr: 1.0815e-05 gnorm: 1.11 [18:50:30< 5:41:01] +[titan] 2025-10-05 17:24:52,229 - root - INFO - step: 30735 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7376 +[titan] 2025-10-05 17:24:52,229 - root - INFO - lr: 1.0809e-05 gnorm: 1.15 [18:50:41< 5:40:50] +[titan] 2025-10-05 17:25:03,105 - root - INFO - step: 30740 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7807 +[titan] 2025-10-05 17:25:03,105 - root - INFO - lr: 1.0803e-05 gnorm: 1.23 [18:50:52< 5:40:39] +[titan] 2025-10-05 17:25:13,996 - root - INFO - step: 30745 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:25:13,996 - root - INFO - lr: 1.0797e-05 gnorm: 1.16 [18:51:02< 5:40:28] +[titan] 2025-10-05 17:25:22,692 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:25:24,892 - root - INFO - step: 30750 loss: 2.0403 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 17:25:24,892 - root - INFO - lr: 1.0791e-05 gnorm: 1.21 [18:51:13< 5:40:17] +[titan] 2025-10-05 17:25:35,755 - root - INFO - step: 30755 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 17:25:35,755 - root - INFO - lr: 1.0785e-05 gnorm: 1.15 [18:51:24< 5:40:06] +[titan] 2025-10-05 17:25:46,627 - root - INFO - step: 30760 loss: 1.9424 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:25:46,627 - root - INFO - lr: 1.0779e-05 gnorm: 1.14 [18:51:35< 5:39:55] +[titan] 2025-10-05 17:25:57,513 - root - INFO - step: 30765 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8128 +[titan] 2025-10-05 17:25:57,513 - root - INFO - lr: 1.0773e-05 gnorm: 1.16 [18:51:46< 5:39:44] +[titan] 2025-10-05 17:26:08,369 - root - INFO - step: 30770 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:26:08,369 - root - INFO - lr: 1.0767e-05 gnorm: 1.15 [18:51:57< 5:39:32] +[titan] 2025-10-05 17:26:19,291 - root - INFO - step: 30775 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 17:26:19,291 - root - INFO - lr: 1.0761e-05 gnorm: 1.16 [18:52:08< 5:39:21] +[titan] 2025-10-05 17:26:30,180 - root - INFO - step: 30780 loss: 1.9939 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7628 +[titan] 2025-10-05 17:26:30,180 - root - INFO - lr: 1.0755e-05 gnorm: 1.14 [18:52:19< 5:39:10] +[titan] 2025-10-05 17:26:41,064 - root - INFO - step: 30785 loss: 2.0227 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 17:26:41,065 - root - INFO - lr: 1.0749e-05 gnorm: 1.14 [18:52:29< 5:38:59] +[titan] 2025-10-05 17:26:51,961 - root - INFO - step: 30790 loss: 1.9654 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:26:51,961 - root - INFO - lr: 1.0743e-05 gnorm: 1.11 [18:52:40< 5:38:48] +[titan] 2025-10-05 17:27:02,841 - root - INFO - step: 30795 loss: 2.0724 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 17:27:02,841 - root - INFO - lr: 1.0737e-05 gnorm: 1.16 [18:52:51< 5:38:37] +[titan] 2025-10-05 17:27:11,522 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:27:13,697 - root - INFO - step: 30800 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 17:27:13,697 - root - INFO - lr: 1.0731e-05 gnorm: 1.14 [18:53:02< 5:38:26] +[titan] 2025-10-05 17:27:24,566 - root - INFO - step: 30805 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 17:27:24,566 - root - INFO - lr: 1.0725e-05 gnorm: 1.15 [18:53:13< 5:38:15] +[titan] 2025-10-05 17:27:35,469 - root - INFO - step: 30810 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 17:27:35,469 - root - INFO - lr: 1.0719e-05 gnorm: 1.14 [18:53:24< 5:38:04] +[titan] 2025-10-05 17:27:46,339 - root - INFO - step: 30815 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 17:27:46,340 - root - INFO - lr: 1.0713e-05 gnorm: 1.16 [18:53:35< 5:37:53] +[titan] 2025-10-05 17:27:57,238 - root - INFO - step: 30820 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 17:27:57,239 - root - INFO - lr: 1.0707e-05 gnorm: 1.12 [18:53:46< 5:37:42] +[titan] 2025-10-05 17:28:08,105 - root - INFO - step: 30825 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 17:28:08,105 - root - INFO - lr: 1.0702e-05 gnorm: 1.12 [18:53:57< 5:37:31] +[titan] 2025-10-05 17:28:18,970 - root - INFO - step: 30830 loss: 1.8472 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6336 +[titan] 2025-10-05 17:28:18,971 - root - INFO - lr: 1.0696e-05 gnorm: 1.15 [18:54:07< 5:37:19] +[titan] 2025-10-05 17:28:29,843 - root - INFO - step: 30835 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:28:29,844 - root - INFO - lr: 1.0690e-05 gnorm: 1.13 [18:54:18< 5:37:08] +[titan] 2025-10-05 17:28:40,744 - root - INFO - step: 30840 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 17:28:40,744 - root - INFO - lr: 1.0684e-05 gnorm: 1.13 [18:54:29< 5:36:57] +[titan] 2025-10-05 17:28:51,648 - root - INFO - step: 30845 loss: 1.9017 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6818 +[titan] 2025-10-05 17:28:51,648 - root - INFO - lr: 1.0678e-05 gnorm: 1.14 [18:54:40< 5:36:46] +[titan] 2025-10-05 17:29:00,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:29:02,544 - root - INFO - step: 30850 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 17:29:02,544 - root - INFO - lr: 1.0672e-05 gnorm: 1.15 [18:54:51< 5:36:35] +[titan] 2025-10-05 17:29:13,430 - root - INFO - step: 30855 loss: 1.9892 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:13,431 - root - INFO - lr: 1.0666e-05 gnorm: 1.16 [18:55:02< 5:36:24] +[titan] 2025-10-05 17:29:24,310 - root - INFO - step: 30860 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 17:29:24,310 - root - INFO - lr: 1.0660e-05 gnorm: 1.12 [18:55:13< 5:36:13] +[titan] 2025-10-05 17:29:35,178 - root - INFO - step: 30865 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:35,178 - root - INFO - lr: 1.0654e-05 gnorm: 1.16 [18:55:24< 5:36:02] +[titan] 2025-10-05 17:29:46,070 - root - INFO - step: 30870 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 17:29:46,070 - root - INFO - lr: 1.0648e-05 gnorm: 1.13 [18:55:34< 5:35:51] +[titan] 2025-10-05 17:29:56,949 - root - INFO - step: 30875 loss: 1.9562 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7292 +[titan] 2025-10-05 17:29:56,949 - root - INFO - lr: 1.0642e-05 gnorm: 1.14 [18:55:45< 5:35:40] +[titan] 2025-10-05 17:30:07,804 - root - INFO - step: 30880 loss: 2.0097 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 17:30:07,804 - root - INFO - lr: 1.0636e-05 gnorm: 1.15 [18:55:56< 5:35:29] +[titan] 2025-10-05 17:30:18,658 - root - INFO - step: 30885 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 17:30:18,658 - root - INFO - lr: 1.0630e-05 gnorm: 1.17 [18:56:07< 5:35:18] +[titan] 2025-10-05 17:30:29,536 - root - INFO - step: 30890 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 17:30:29,536 - root - INFO - lr: 1.0625e-05 gnorm: 1.16 [18:56:18< 5:35:07] +[titan] 2025-10-05 17:30:40,429 - root - INFO - step: 30895 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7591 +[titan] 2025-10-05 17:30:40,429 - root - INFO - lr: 1.0619e-05 gnorm: 1.14 [18:56:29< 5:34:55] +[titan] 2025-10-05 17:30:49,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:30:51,303 - root - INFO - step: 30900 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 17:30:51,303 - root - INFO - lr: 1.0613e-05 gnorm: 1.15 [18:56:40< 5:34:44] +[titan] 2025-10-05 17:31:02,242 - root - INFO - step: 30905 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:31:02,242 - root - INFO - lr: 1.0607e-05 gnorm: 1.14 [18:56:51< 5:34:33] +[titan] 2025-10-05 17:31:13,130 - root - INFO - step: 30910 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 17:31:13,130 - root - INFO - lr: 1.0601e-05 gnorm: 1.25 [18:57:02< 5:34:22] +[titan] 2025-10-05 17:31:24,016 - root - INFO - step: 30915 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7966 +[titan] 2025-10-05 17:31:24,017 - root - INFO - lr: 1.0595e-05 gnorm: 1.13 [18:57:12< 5:34:11] +[titan] 2025-10-05 17:31:34,902 - root - INFO - step: 30920 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 17:31:34,903 - root - INFO - lr: 1.0589e-05 gnorm: 1.11 [18:57:23< 5:34:00] +[titan] 2025-10-05 17:31:45,757 - root - INFO - step: 30925 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:31:45,758 - root - INFO - lr: 1.0583e-05 gnorm: 1.14 [18:57:34< 5:33:49] +[titan] 2025-10-05 17:31:56,639 - root - INFO - step: 30930 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:31:56,639 - root - INFO - lr: 1.0577e-05 gnorm: 1.15 [18:57:45< 5:33:38] +[titan] 2025-10-05 17:32:07,510 - root - INFO - step: 30935 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 17:32:07,510 - root - INFO - lr: 1.0572e-05 gnorm: 1.14 [18:57:56< 5:33:27] +[titan] 2025-10-05 17:32:18,361 - root - INFO - step: 30940 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 17:32:18,361 - root - INFO - lr: 1.0566e-05 gnorm: 1.16 [18:58:07< 5:33:16] +[titan] 2025-10-05 17:32:29,229 - root - INFO - step: 30945 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:32:29,229 - root - INFO - lr: 1.0560e-05 gnorm: 1.17 [18:58:18< 5:33:05] +[titan] 2025-10-05 17:32:37,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:32:40,069 - root - INFO - step: 30950 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:32:40,069 - root - INFO - lr: 1.0554e-05 gnorm: 1.13 [18:58:28< 5:32:54] +[titan] 2025-10-05 17:32:50,918 - root - INFO - step: 30955 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 17:32:50,918 - root - INFO - lr: 1.0548e-05 gnorm: 1.15 [18:58:39< 5:32:42] +[titan] 2025-10-05 17:33:01,839 - root - INFO - step: 30960 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6694 +[titan] 2025-10-05 17:33:01,839 - root - INFO - lr: 1.0542e-05 gnorm: 1.11 [18:58:50< 5:32:31] +[titan] 2025-10-05 17:33:12,698 - root - INFO - step: 30965 loss: 1.9487 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 17:33:12,698 - root - INFO - lr: 1.0536e-05 gnorm: 1.13 [18:59:01< 5:32:20] +[titan] 2025-10-05 17:33:23,587 - root - INFO - step: 30970 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7005 +[titan] 2025-10-05 17:33:23,587 - root - INFO - lr: 1.0530e-05 gnorm: 1.17 [18:59:12< 5:32:09] +[titan] 2025-10-05 17:33:34,467 - root - INFO - step: 30975 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 17:33:34,467 - root - INFO - lr: 1.0525e-05 gnorm: 1.19 [18:59:23< 5:31:58] +[titan] 2025-10-05 17:33:45,329 - root - INFO - step: 30980 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 17:33:45,329 - root - INFO - lr: 1.0519e-05 gnorm: 1.21 [18:59:34< 5:31:47] +[titan] 2025-10-05 17:33:56,227 - root - INFO - step: 30985 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7673 +[titan] 2025-10-05 17:33:56,227 - root - INFO - lr: 1.0513e-05 gnorm: 1.14 [18:59:45< 5:31:36] +[titan] 2025-10-05 17:34:07,068 - root - INFO - step: 30990 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 17:34:07,068 - root - INFO - lr: 1.0507e-05 gnorm: 1.14 [18:59:55< 5:31:25] +[titan] 2025-10-05 17:34:17,920 - root - INFO - step: 30995 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 17:34:17,920 - root - INFO - lr: 1.0501e-05 gnorm: 1.13 [19:00:06< 5:31:14] +[titan] 2025-10-05 17:34:26,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:34:28,785 - root - INFO - step: 31000 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:34:28,785 - root - INFO - lr: 1.0495e-05 gnorm: 1.14 [19:00:17< 5:31:03] +[titan] 2025-10-05 17:34:39,677 - root - INFO - step: 31005 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 17:34:39,677 - root - INFO - lr: 1.0490e-05 gnorm: 1.12 [19:00:28< 5:30:52] +[titan] 2025-10-05 17:34:50,557 - root - INFO - step: 31010 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 17:34:50,557 - root - INFO - lr: 1.0484e-05 gnorm: 1.13 [19:00:39< 5:30:41] +[titan] 2025-10-05 17:35:01,441 - root - INFO - step: 31015 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 17:35:01,441 - root - INFO - lr: 1.0478e-05 gnorm: 1.11 [19:00:50< 5:30:29] +[titan] 2025-10-05 17:35:12,298 - root - INFO - step: 31020 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 17:35:12,298 - root - INFO - lr: 1.0472e-05 gnorm: 1.13 [19:01:01< 5:30:18] +[titan] 2025-10-05 17:35:23,148 - root - INFO - step: 31025 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 17:35:23,149 - root - INFO - lr: 1.0466e-05 gnorm: 1.19 [19:01:12< 5:30:07] +[titan] 2025-10-05 17:35:34,041 - root - INFO - step: 31030 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:35:34,042 - root - INFO - lr: 1.0460e-05 gnorm: 1.14 [19:01:22< 5:29:56] +[titan] 2025-10-05 17:35:44,917 - root - INFO - step: 31035 loss: 2.0130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 17:35:44,917 - root - INFO - lr: 1.0455e-05 gnorm: 1.15 [19:01:33< 5:29:45] +[titan] 2025-10-05 17:35:55,789 - root - INFO - step: 31040 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 17:35:55,789 - root - INFO - lr: 1.0449e-05 gnorm: 1.14 [19:01:44< 5:29:34] +[titan] 2025-10-05 17:36:06,662 - root - INFO - step: 31045 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 17:36:06,663 - root - INFO - lr: 1.0443e-05 gnorm: 1.12 [19:01:55< 5:29:23] +[titan] 2025-10-05 17:36:15,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:36:17,541 - root - INFO - step: 31050 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 17:36:17,541 - root - INFO - lr: 1.0437e-05 gnorm: 1.15 [19:02:06< 5:29:12] +[titan] 2025-10-05 17:36:28,426 - root - INFO - step: 31055 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:36:28,426 - root - INFO - lr: 1.0431e-05 gnorm: 1.15 [19:02:17< 5:29:01] +[titan] 2025-10-05 17:36:39,289 - root - INFO - step: 31060 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 17:36:39,290 - root - INFO - lr: 1.0425e-05 gnorm: 1.14 [19:02:28< 5:28:50] +[titan] 2025-10-05 17:36:50,187 - root - INFO - step: 31065 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 17:36:50,187 - root - INFO - lr: 1.0420e-05 gnorm: 1.16 [19:02:39< 5:28:39] +[titan] 2025-10-05 17:37:01,103 - root - INFO - step: 31070 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 17:37:01,103 - root - INFO - lr: 1.0414e-05 gnorm: 1.19 [19:02:49< 5:28:28] +[titan] 2025-10-05 17:37:11,969 - root - INFO - step: 31075 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:37:11,969 - root - INFO - lr: 1.0408e-05 gnorm: 1.16 [19:03:00< 5:28:16] +[titan] 2025-10-05 17:37:22,843 - root - INFO - step: 31080 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 17:37:22,843 - root - INFO - lr: 1.0402e-05 gnorm: 1.15 [19:03:11< 5:28:05] +[titan] 2025-10-05 17:37:33,710 - root - INFO - step: 31085 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:37:33,710 - root - INFO - lr: 1.0396e-05 gnorm: 1.14 [19:03:22< 5:27:54] +[titan] 2025-10-05 17:37:44,589 - root - INFO - step: 31090 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 17:37:44,589 - root - INFO - lr: 1.0391e-05 gnorm: 1.15 [19:03:33< 5:27:43] +[titan] 2025-10-05 17:37:55,476 - root - INFO - step: 31095 loss: 1.9001 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6797 +[titan] 2025-10-05 17:37:55,476 - root - INFO - lr: 1.0385e-05 gnorm: 1.14 [19:03:44< 5:27:32] +[titan] 2025-10-05 17:38:04,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:38:06,373 - root - INFO - step: 31100 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:38:06,373 - root - INFO - lr: 1.0379e-05 gnorm: 1.18 [19:03:55< 5:27:21] +[titan] 2025-10-05 17:38:17,276 - root - INFO - step: 31105 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:38:17,277 - root - INFO - lr: 1.0373e-05 gnorm: 1.12 [19:04:06< 5:27:10] +[titan] 2025-10-05 17:38:28,149 - root - INFO - step: 31110 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 17:38:28,149 - root - INFO - lr: 1.0367e-05 gnorm: 1.11 [19:04:17< 5:26:59] +[titan] 2025-10-05 17:38:39,025 - root - INFO - step: 31115 loss: 1.9815 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 17:38:39,025 - root - INFO - lr: 1.0362e-05 gnorm: 1.15 [19:04:27< 5:26:48] +[titan] 2025-10-05 17:38:49,892 - root - INFO - step: 31120 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:38:49,892 - root - INFO - lr: 1.0356e-05 gnorm: 1.13 [19:04:38< 5:26:37] +[titan] 2025-10-05 17:39:00,809 - root - INFO - step: 31125 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 17:39:00,809 - root - INFO - lr: 1.0350e-05 gnorm: 1.12 [19:04:49< 5:26:26] +[titan] 2025-10-05 17:39:11,704 - root - INFO - step: 31130 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 17:39:11,705 - root - INFO - lr: 1.0344e-05 gnorm: 1.13 [19:05:00< 5:26:15] +[titan] 2025-10-05 17:39:22,594 - root - INFO - step: 31135 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:39:22,595 - root - INFO - lr: 1.0339e-05 gnorm: 1.19 [19:05:11< 5:26:04] +[titan] 2025-10-05 17:39:33,466 - root - INFO - step: 31140 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7578 +[titan] 2025-10-05 17:39:33,467 - root - INFO - lr: 1.0333e-05 gnorm: 1.15 [19:05:22< 5:25:52] +[titan] 2025-10-05 17:39:44,337 - root - INFO - step: 31145 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 17:39:44,337 - root - INFO - lr: 1.0327e-05 gnorm: 1.16 [19:05:33< 5:25:41] +[titan] 2025-10-05 17:39:53,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:39:55,199 - root - INFO - step: 31150 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7763 +[titan] 2025-10-05 17:39:55,199 - root - INFO - lr: 1.0321e-05 gnorm: 1.14 [19:05:44< 5:25:30] +[titan] 2025-10-05 17:40:06,057 - root - INFO - step: 31155 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 17:40:06,057 - root - INFO - lr: 1.0315e-05 gnorm: 1.17 [19:05:54< 5:25:19] +[titan] 2025-10-05 17:40:16,910 - root - INFO - step: 31160 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 17:40:16,910 - root - INFO - lr: 1.0310e-05 gnorm: 1.10 [19:06:05< 5:25:08] +[titan] 2025-10-05 17:40:27,753 - root - INFO - step: 31165 loss: 1.8951 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6755 +[titan] 2025-10-05 17:40:27,753 - root - INFO - lr: 1.0304e-05 gnorm: 1.16 [19:06:16< 5:24:57] +[titan] 2025-10-05 17:40:38,617 - root - INFO - step: 31170 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 17:40:38,617 - root - INFO - lr: 1.0298e-05 gnorm: 1.17 [19:06:27< 5:24:46] +[titan] 2025-10-05 17:40:49,491 - root - INFO - step: 31175 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7707 +[titan] 2025-10-05 17:40:49,491 - root - INFO - lr: 1.0292e-05 gnorm: 1.18 [19:06:38< 5:24:35] +[titan] 2025-10-05 17:41:00,364 - root - INFO - step: 31180 loss: 2.0114 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 17:41:00,365 - root - INFO - lr: 1.0287e-05 gnorm: 1.12 [19:06:49< 5:24:24] +[titan] 2025-10-05 17:41:11,255 - root - INFO - step: 31185 loss: 2.0026 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 17:41:11,255 - root - INFO - lr: 1.0281e-05 gnorm: 1.19 [19:07:00< 5:24:13] +[titan] 2025-10-05 17:41:22,116 - root - INFO - step: 31190 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:41:22,116 - root - INFO - lr: 1.0275e-05 gnorm: 1.10 [19:07:11< 5:24:02] +[titan] 2025-10-05 17:41:32,999 - root - INFO - step: 31195 loss: 1.9088 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 17:41:32,999 - root - INFO - lr: 1.0269e-05 gnorm: 1.13 [19:07:21< 5:23:51] +[titan] 2025-10-05 17:41:41,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:41:43,892 - root - INFO - step: 31200 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:41:43,892 - root - INFO - lr: 1.0264e-05 gnorm: 1.13 [19:07:32< 5:23:40] +[titan] 2025-10-05 17:41:54,767 - root - INFO - step: 31205 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 17:41:54,767 - root - INFO - lr: 1.0258e-05 gnorm: 1.13 [19:07:43< 5:23:28] +[titan] 2025-10-05 17:42:05,616 - root - INFO - step: 31210 loss: 1.9827 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 17:42:05,617 - root - INFO - lr: 1.0252e-05 gnorm: 1.12 [19:07:54< 5:23:17] +[titan] 2025-10-05 17:42:16,473 - root - INFO - step: 31215 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:42:16,473 - root - INFO - lr: 1.0247e-05 gnorm: 1.17 [19:08:05< 5:23:06] +[titan] 2025-10-05 17:42:27,363 - root - INFO - step: 31220 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:42:27,363 - root - INFO - lr: 1.0241e-05 gnorm: 1.16 [19:08:16< 5:22:55] +[titan] 2025-10-05 17:42:38,236 - root - INFO - step: 31225 loss: 1.8762 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 17:42:38,237 - root - INFO - lr: 1.0235e-05 gnorm: 1.18 [19:08:27< 5:22:44] +[titan] 2025-10-05 17:42:49,232 - root - INFO - step: 31230 loss: 2.0595 memory: 118.84GiB(85.28%) tps: 29,802 tflops: 413.45 mfu: 41.80% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 17:42:49,232 - root - INFO - lr: 1.0229e-05 gnorm: 1.23 [19:08:38< 5:22:33] +[titan] 2025-10-05 17:42:53,765 - root - INFO - Dumping profiler traces at step 31232 +[titan] 2025-10-05 17:42:53,805 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:43:00,360 - root - INFO - step: 31235 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 29,447 tflops: 408.54 mfu: 41.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 17:43:00,360 - root - INFO - lr: 1.0224e-05 gnorm: 1.10 [19:08:49< 5:22:22] +[titan] 2025-10-05 17:43:11,236 - root - INFO - step: 31240 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 17:43:11,236 - root - INFO - lr: 1.0218e-05 gnorm: 1.12 [19:09:00< 5:22:11] +[titan] 2025-10-05 17:43:22,106 - root - INFO - step: 31245 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 17:43:22,106 - root - INFO - lr: 1.0212e-05 gnorm: 1.14 [19:09:10< 5:22:00] +[titan] 2025-10-05 17:43:30,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:43:32,976 - root - INFO - step: 31250 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:43:32,977 - root - INFO - lr: 1.0207e-05 gnorm: 1.15 [19:09:21< 5:21:49] +[titan] 2025-10-05 17:43:43,850 - root - INFO - step: 31255 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:43:43,850 - root - INFO - lr: 1.0201e-05 gnorm: 1.17 [19:09:32< 5:21:38] +[titan] 2025-10-05 17:43:54,726 - root - INFO - step: 31260 loss: 2.0422 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:43:54,727 - root - INFO - lr: 1.0195e-05 gnorm: 1.16 [19:09:43< 5:21:27] +[titan] 2025-10-05 17:44:05,648 - root - INFO - step: 31265 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:44:05,649 - root - INFO - lr: 1.0189e-05 gnorm: 1.18 [19:09:54< 5:21:16] +[titan] 2025-10-05 17:44:16,492 - root - INFO - step: 31270 loss: 1.9624 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 17:44:16,493 - root - INFO - lr: 1.0184e-05 gnorm: 1.11 [19:10:05< 5:21:05] +[titan] 2025-10-05 17:44:27,352 - root - INFO - step: 31275 loss: 1.9671 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 17:44:27,352 - root - INFO - lr: 1.0178e-05 gnorm: 1.17 [19:10:16< 5:20:53] +[titan] 2025-10-05 17:44:38,191 - root - INFO - step: 31280 loss: 1.9559 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:44:38,191 - root - INFO - lr: 1.0172e-05 gnorm: 1.11 [19:10:27< 5:20:42] +[titan] 2025-10-05 17:44:49,058 - root - INFO - step: 31285 loss: 2.0070 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 17:44:49,058 - root - INFO - lr: 1.0167e-05 gnorm: 1.16 [19:10:37< 5:20:31] +[titan] 2025-10-05 17:44:59,922 - root - INFO - step: 31290 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 17:44:59,922 - root - INFO - lr: 1.0161e-05 gnorm: 1.16 [19:10:48< 5:20:20] +[titan] 2025-10-05 17:45:10,831 - root - INFO - step: 31295 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 17:45:10,831 - root - INFO - lr: 1.0155e-05 gnorm: 1.14 [19:10:59< 5:20:09] +[titan] 2025-10-05 17:45:19,500 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:45:21,690 - root - INFO - step: 31300 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:45:21,690 - root - INFO - lr: 1.0150e-05 gnorm: 1.13 [19:11:10< 5:19:58] +[titan] 2025-10-05 17:45:32,589 - root - INFO - step: 31305 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 17:45:32,590 - root - INFO - lr: 1.0144e-05 gnorm: 1.14 [19:11:21< 5:19:47] +[titan] 2025-10-05 17:45:43,458 - root - INFO - step: 31310 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 17:45:43,458 - root - INFO - lr: 1.0138e-05 gnorm: 1.16 [19:11:32< 5:19:36] +[titan] 2025-10-05 17:45:54,309 - root - INFO - step: 31315 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7610 +[titan] 2025-10-05 17:45:54,309 - root - INFO - lr: 1.0133e-05 gnorm: 1.15 [19:11:43< 5:19:25] +[titan] 2025-10-05 17:46:05,142 - root - INFO - step: 31320 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 17:46:05,143 - root - INFO - lr: 1.0127e-05 gnorm: 1.11 [19:11:54< 5:19:14] +[titan] 2025-10-05 17:46:16,012 - root - INFO - step: 31325 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 17:46:16,012 - root - INFO - lr: 1.0121e-05 gnorm: 1.22 [19:12:04< 5:19:03] +[titan] 2025-10-05 17:46:26,886 - root - INFO - step: 31330 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 17:46:26,886 - root - INFO - lr: 1.0116e-05 gnorm: 1.16 [19:12:15< 5:18:52] +[titan] 2025-10-05 17:46:37,770 - root - INFO - step: 31335 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 17:46:37,770 - root - INFO - lr: 1.0110e-05 gnorm: 1.14 [19:12:26< 5:18:40] +[titan] 2025-10-05 17:46:48,608 - root - INFO - step: 31340 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 17:46:48,608 - root - INFO - lr: 1.0104e-05 gnorm: 1.12 [19:12:37< 5:18:29] +[titan] 2025-10-05 17:46:59,446 - root - INFO - step: 31345 loss: 1.9908 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 17:46:59,446 - root - INFO - lr: 1.0099e-05 gnorm: 1.14 [19:12:48< 5:18:18] +[titan] 2025-10-05 17:47:08,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:47:10,307 - root - INFO - step: 31350 loss: 2.0078 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7753 +[titan] 2025-10-05 17:47:10,307 - root - INFO - lr: 1.0093e-05 gnorm: 1.18 [19:12:59< 5:18:07] +[titan] 2025-10-05 17:47:21,149 - root - INFO - step: 31355 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 17:47:21,149 - root - INFO - lr: 1.0087e-05 gnorm: 1.14 [19:13:10< 5:17:56] +[titan] 2025-10-05 17:47:32,020 - root - INFO - step: 31360 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 17:47:32,020 - root - INFO - lr: 1.0082e-05 gnorm: 1.16 [19:13:20< 5:17:45] +[titan] 2025-10-05 17:47:42,860 - root - INFO - step: 31365 loss: 2.0383 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 17:47:42,861 - root - INFO - lr: 1.0076e-05 gnorm: 1.15 [19:13:31< 5:17:34] +[titan] 2025-10-05 17:47:53,707 - root - INFO - step: 31370 loss: 2.0511 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 17:47:53,708 - root - INFO - lr: 1.0070e-05 gnorm: 1.16 [19:13:42< 5:17:23] +[titan] 2025-10-05 17:48:04,561 - root - INFO - step: 31375 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7875 +[titan] 2025-10-05 17:48:04,561 - root - INFO - lr: 1.0065e-05 gnorm: 1.20 [19:13:53< 5:17:12] +[titan] 2025-10-05 17:48:15,405 - root - INFO - step: 31380 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:48:15,405 - root - INFO - lr: 1.0059e-05 gnorm: 1.15 [19:14:04< 5:17:01] +[titan] 2025-10-05 17:48:26,264 - root - INFO - step: 31385 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 17:48:26,264 - root - INFO - lr: 1.0053e-05 gnorm: 1.15 [19:14:15< 5:16:50] +[titan] 2025-10-05 17:48:37,141 - root - INFO - step: 31390 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 17:48:37,141 - root - INFO - lr: 1.0048e-05 gnorm: 1.19 [19:14:26< 5:16:39] +[titan] 2025-10-05 17:48:47,988 - root - INFO - step: 31395 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 17:48:47,988 - root - INFO - lr: 1.0042e-05 gnorm: 1.17 [19:14:36< 5:16:27] +[titan] 2025-10-05 17:48:56,653 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:48:58,841 - root - INFO - step: 31400 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 17:48:58,841 - root - INFO - lr: 1.0036e-05 gnorm: 1.19 [19:14:47< 5:16:16] +[titan] 2025-10-05 17:49:09,687 - root - INFO - step: 31405 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:49:09,687 - root - INFO - lr: 1.0031e-05 gnorm: 1.19 [19:14:58< 5:16:05] +[titan] 2025-10-05 17:49:20,527 - root - INFO - step: 31410 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 17:49:20,527 - root - INFO - lr: 1.0025e-05 gnorm: 1.12 [19:15:09< 5:15:54] +[titan] 2025-10-05 17:49:31,368 - root - INFO - step: 31415 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:49:31,368 - root - INFO - lr: 1.0020e-05 gnorm: 1.12 [19:15:20< 5:15:43] +[titan] 2025-10-05 17:49:42,213 - root - INFO - step: 31420 loss: 1.9250 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7033 +[titan] 2025-10-05 17:49:42,213 - root - INFO - lr: 1.0014e-05 gnorm: 1.15 [19:15:31< 5:15:32] +[titan] 2025-10-05 17:49:53,106 - root - INFO - step: 31425 loss: 1.9352 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 17:49:53,106 - root - INFO - lr: 1.0008e-05 gnorm: 1.16 [19:15:41< 5:15:21] +[titan] 2025-10-05 17:50:03,949 - root - INFO - step: 31430 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:50:03,949 - root - INFO - lr: 1.0003e-05 gnorm: 1.17 [19:15:52< 5:15:10] +[titan] 2025-10-05 17:50:14,831 - root - INFO - step: 31435 loss: 1.9571 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 17:50:14,832 - root - INFO - lr: 9.9971e-06 gnorm: 1.15 [19:16:03< 5:14:59] +[titan] 2025-10-05 17:50:25,694 - root - INFO - step: 31440 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 17:50:25,694 - root - INFO - lr: 9.9915e-06 gnorm: 1.18 [19:16:14< 5:14:48] +[titan] 2025-10-05 17:50:36,538 - root - INFO - step: 31445 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 17:50:36,538 - root - INFO - lr: 9.9859e-06 gnorm: 2.11 [19:16:25< 5:14:37] +[titan] 2025-10-05 17:50:45,221 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:50:47,411 - root - INFO - step: 31450 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:50:47,411 - root - INFO - lr: 9.9803e-06 gnorm: 1.16 [19:16:36< 5:14:26] +[titan] 2025-10-05 17:50:58,315 - root - INFO - step: 31455 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:50:58,315 - root - INFO - lr: 9.9747e-06 gnorm: 1.15 [19:16:47< 5:14:15] +[titan] 2025-10-05 17:51:09,156 - root - INFO - step: 31460 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7552 +[titan] 2025-10-05 17:51:09,156 - root - INFO - lr: 9.9691e-06 gnorm: 1.20 [19:16:58< 5:14:03] +[titan] 2025-10-05 17:51:20,027 - root - INFO - step: 31465 loss: 2.0529 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 17:51:20,027 - root - INFO - lr: 9.9635e-06 gnorm: 1.21 [19:17:08< 5:13:52] +[titan] 2025-10-05 17:51:30,891 - root - INFO - step: 31470 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 17:51:30,891 - root - INFO - lr: 9.9579e-06 gnorm: 1.16 [19:17:19< 5:13:41] +[titan] 2025-10-05 17:51:41,738 - root - INFO - step: 31475 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 17:51:41,738 - root - INFO - lr: 9.9524e-06 gnorm: 1.16 [19:17:30< 5:13:30] +[titan] 2025-10-05 17:51:52,590 - root - INFO - step: 31480 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 17:51:52,590 - root - INFO - lr: 9.9468e-06 gnorm: 1.15 [19:17:41< 5:13:19] +[titan] 2025-10-05 17:52:03,461 - root - INFO - step: 31485 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 17:52:03,461 - root - INFO - lr: 9.9412e-06 gnorm: 1.15 [19:17:52< 5:13:08] +[titan] 2025-10-05 17:52:14,350 - root - INFO - step: 31490 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 17:52:14,350 - root - INFO - lr: 9.9356e-06 gnorm: 1.18 [19:18:03< 5:12:57] +[titan] 2025-10-05 17:52:25,212 - root - INFO - step: 31495 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 17:52:25,212 - root - INFO - lr: 9.9300e-06 gnorm: 1.13 [19:18:14< 5:12:46] +[titan] 2025-10-05 17:52:33,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:52:36,054 - root - INFO - step: 31500 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 17:52:36,054 - root - INFO - lr: 9.9245e-06 gnorm: 1.18 [19:18:24< 5:12:35] +[titan] 2025-10-05 17:52:46,921 - root - INFO - step: 31505 loss: 1.9036 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 17:52:46,921 - root - INFO - lr: 9.9189e-06 gnorm: 1.12 [19:18:35< 5:12:24] +[titan] 2025-10-05 17:52:57,775 - root - INFO - step: 31510 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:52:57,775 - root - INFO - lr: 9.9133e-06 gnorm: 1.14 [19:18:46< 5:12:13] +[titan] 2025-10-05 17:53:08,630 - root - INFO - step: 31515 loss: 1.8954 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6762 +[titan] 2025-10-05 17:53:08,630 - root - INFO - lr: 9.9078e-06 gnorm: 1.14 [19:18:57< 5:12:02] +[titan] 2025-10-05 17:53:19,545 - root - INFO - step: 31520 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 17:53:19,545 - root - INFO - lr: 9.9022e-06 gnorm: 1.13 [19:19:08< 5:11:50] +[titan] 2025-10-05 17:53:30,414 - root - INFO - step: 31525 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:53:30,414 - root - INFO - lr: 9.8966e-06 gnorm: 1.16 [19:19:19< 5:11:39] +[titan] 2025-10-05 17:53:41,302 - root - INFO - step: 31530 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 17:53:41,302 - root - INFO - lr: 9.8911e-06 gnorm: 1.16 [19:19:30< 5:11:28] +[titan] 2025-10-05 17:53:52,151 - root - INFO - step: 31535 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:53:52,151 - root - INFO - lr: 9.8855e-06 gnorm: 1.18 [19:19:41< 5:11:17] +[titan] 2025-10-05 17:54:03,009 - root - INFO - step: 31540 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 17:54:03,009 - root - INFO - lr: 9.8800e-06 gnorm: 1.19 [19:19:51< 5:11:06] +[titan] 2025-10-05 17:54:13,869 - root - INFO - step: 31545 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6744 +[titan] 2025-10-05 17:54:13,869 - root - INFO - lr: 9.8744e-06 gnorm: 1.15 [19:20:02< 5:10:55] +[titan] 2025-10-05 17:54:22,536 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:54:24,751 - root - INFO - step: 31550 loss: 2.0225 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 17:54:24,751 - root - INFO - lr: 9.8689e-06 gnorm: 1.23 [19:20:13< 5:10:44] +[titan] 2025-10-05 17:54:35,610 - root - INFO - step: 31555 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 17:54:35,610 - root - INFO - lr: 9.8633e-06 gnorm: 1.15 [19:20:24< 5:10:33] +[titan] 2025-10-05 17:54:46,473 - root - INFO - step: 31560 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 17:54:46,473 - root - INFO - lr: 9.8578e-06 gnorm: 1.13 [19:20:35< 5:10:22] +[titan] 2025-10-05 17:54:57,341 - root - INFO - step: 31565 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:54:57,341 - root - INFO - lr: 9.8523e-06 gnorm: 1.14 [19:20:46< 5:10:11] +[titan] 2025-10-05 17:55:08,193 - root - INFO - step: 31570 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 17:55:08,193 - root - INFO - lr: 9.8467e-06 gnorm: 1.13 [19:20:57< 5:10:00] +[titan] 2025-10-05 17:55:19,059 - root - INFO - step: 31575 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 17:55:19,059 - root - INFO - lr: 9.8412e-06 gnorm: 1.16 [19:21:07< 5:09:49] +[titan] 2025-10-05 17:55:29,929 - root - INFO - step: 31580 loss: 2.0143 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7811 +[titan] 2025-10-05 17:55:29,929 - root - INFO - lr: 9.8357e-06 gnorm: 1.16 [19:21:18< 5:09:38] +[titan] 2025-10-05 17:55:40,810 - root - INFO - step: 31585 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 17:55:40,810 - root - INFO - lr: 9.8301e-06 gnorm: 1.18 [19:21:29< 5:09:26] +[titan] 2025-10-05 17:55:51,674 - root - INFO - step: 31590 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 17:55:51,674 - root - INFO - lr: 9.8246e-06 gnorm: 1.14 [19:21:40< 5:09:15] +[titan] 2025-10-05 17:56:02,575 - root - INFO - step: 31595 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 17:56:02,575 - root - INFO - lr: 9.8191e-06 gnorm: 1.17 [19:21:51< 5:09:04] +[titan] 2025-10-05 17:56:11,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:56:13,445 - root - INFO - step: 31600 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 17:56:13,445 - root - INFO - lr: 9.8136e-06 gnorm: 1.13 [19:22:02< 5:08:53] +[titan] 2025-10-05 17:56:24,318 - root - INFO - step: 31605 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:56:24,318 - root - INFO - lr: 9.8081e-06 gnorm: 1.14 [19:22:13< 5:08:42] +[titan] 2025-10-05 17:56:35,171 - root - INFO - step: 31610 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 17:56:35,172 - root - INFO - lr: 9.8025e-06 gnorm: 1.19 [19:22:24< 5:08:31] +[titan] 2025-10-05 17:56:46,086 - root - INFO - step: 31615 loss: 2.0603 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:56:46,087 - root - INFO - lr: 9.7970e-06 gnorm: 1.20 [19:22:34< 5:08:20] +[titan] 2025-10-05 17:56:56,970 - root - INFO - step: 31620 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 17:56:56,970 - root - INFO - lr: 9.7915e-06 gnorm: 1.15 [19:22:45< 5:08:09] +[titan] 2025-10-05 17:57:07,861 - root - INFO - step: 31625 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:57:07,862 - root - INFO - lr: 9.7860e-06 gnorm: 1.16 [19:22:56< 5:07:58] +[titan] 2025-10-05 17:57:18,731 - root - INFO - step: 31630 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 17:57:18,731 - root - INFO - lr: 9.7805e-06 gnorm: 1.15 [19:23:07< 5:07:47] +[titan] 2025-10-05 17:57:29,588 - root - INFO - step: 31635 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7639 +[titan] 2025-10-05 17:57:29,588 - root - INFO - lr: 9.7750e-06 gnorm: 1.13 [19:23:18< 5:07:36] +[titan] 2025-10-05 17:57:40,435 - root - INFO - step: 31640 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7360 +[titan] 2025-10-05 17:57:40,435 - root - INFO - lr: 9.7695e-06 gnorm: 1.15 [19:23:29< 5:07:25] +[titan] 2025-10-05 17:57:51,307 - root - INFO - step: 31645 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:57:51,307 - root - INFO - lr: 9.7640e-06 gnorm: 1.15 [19:23:40< 5:07:14] +[titan] 2025-10-05 17:58:00,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:58:02,211 - root - INFO - step: 31650 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 17:58:02,211 - root - INFO - lr: 9.7585e-06 gnorm: 1.14 [19:23:51< 5:07:03] +[titan] 2025-10-05 17:58:13,078 - root - INFO - step: 31655 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:58:13,078 - root - INFO - lr: 9.7531e-06 gnorm: 1.13 [19:24:01< 5:06:51] +[titan] 2025-10-05 17:58:23,965 - root - INFO - step: 31660 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:58:23,966 - root - INFO - lr: 9.7476e-06 gnorm: 1.16 [19:24:12< 5:06:40] +[titan] 2025-10-05 17:58:34,856 - root - INFO - step: 31665 loss: 1.9619 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 17:58:34,857 - root - INFO - lr: 9.7421e-06 gnorm: 1.15 [19:24:23< 5:06:29] +[titan] 2025-10-05 17:58:45,708 - root - INFO - step: 31670 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7203 +[titan] 2025-10-05 17:58:45,708 - root - INFO - lr: 9.7366e-06 gnorm: 1.17 [19:24:34< 5:06:18] +[titan] 2025-10-05 17:58:56,585 - root - INFO - step: 31675 loss: 1.9360 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7121 +[titan] 2025-10-05 17:58:56,586 - root - INFO - lr: 9.7311e-06 gnorm: 1.19 [19:24:45< 5:06:07] +[titan] 2025-10-05 17:59:07,478 - root - INFO - step: 31680 loss: 2.0159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 17:59:07,478 - root - INFO - lr: 9.7257e-06 gnorm: 1.17 [19:24:56< 5:05:56] +[titan] 2025-10-05 17:59:18,333 - root - INFO - step: 31685 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 17:59:18,333 - root - INFO - lr: 9.7202e-06 gnorm: 1.14 [19:25:07< 5:05:45] +[titan] 2025-10-05 17:59:29,202 - root - INFO - step: 31690 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 17:59:29,202 - root - INFO - lr: 9.7147e-06 gnorm: 1.15 [19:25:18< 5:05:34] +[titan] 2025-10-05 17:59:40,087 - root - INFO - step: 31695 loss: 2.0267 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 17:59:40,087 - root - INFO - lr: 9.7093e-06 gnorm: 1.18 [19:25:28< 5:05:23] +[titan] 2025-10-05 17:59:48,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:59:50,987 - root - INFO - step: 31700 loss: 2.0901 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 17:59:50,987 - root - INFO - lr: 9.7038e-06 gnorm: 1.21 [19:25:39< 5:05:12] +[titan] 2025-10-05 18:00:01,873 - root - INFO - step: 31705 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 18:00:01,873 - root - INFO - lr: 9.6983e-06 gnorm: 1.15 [19:25:50< 5:05:01] +[titan] 2025-10-05 18:00:12,786 - root - INFO - step: 31710 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 18:00:12,786 - root - INFO - lr: 9.6929e-06 gnorm: 1.19 [19:26:01< 5:04:50] +[titan] 2025-10-05 18:00:23,689 - root - INFO - step: 31715 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 18:00:23,689 - root - INFO - lr: 9.6874e-06 gnorm: 1.20 [19:26:12< 5:04:39] +[titan] 2025-10-05 18:00:34,603 - root - INFO - step: 31720 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:00:34,603 - root - INFO - lr: 9.6820e-06 gnorm: 1.17 [19:26:23< 5:04:28] +[titan] 2025-10-05 18:00:45,500 - root - INFO - step: 31725 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 18:00:45,500 - root - INFO - lr: 9.6765e-06 gnorm: 1.17 [19:26:34< 5:04:16] +[titan] 2025-10-05 18:00:56,416 - root - INFO - step: 31730 loss: 1.8776 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6601 +[titan] 2025-10-05 18:00:56,416 - root - INFO - lr: 9.6711e-06 gnorm: 1.13 [19:26:45< 5:04:05] +[titan] 2025-10-05 18:01:07,298 - root - INFO - step: 31735 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:01:07,298 - root - INFO - lr: 9.6656e-06 gnorm: 1.14 [19:26:56< 5:03:54] +[titan] 2025-10-05 18:01:18,193 - root - INFO - step: 31740 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 18:01:18,193 - root - INFO - lr: 9.6602e-06 gnorm: 1.19 [19:27:07< 5:03:43] +[titan] 2025-10-05 18:01:27,251 - root - INFO - Dumping profiler traces at step 31744 +[titan] 2025-10-05 18:01:27,288 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:01:29,483 - root - INFO - step: 31745 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 29,024 tflops: 402.66 mfu: 40.71% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:01:29,484 - root - INFO - lr: 9.6548e-06 gnorm: 1.14 [19:27:18< 5:03:32] +[titan] 2025-10-05 18:01:38,164 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:01:40,345 - root - INFO - step: 31750 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 18:01:40,345 - root - INFO - lr: 9.6493e-06 gnorm: 1.15 [19:27:29< 5:03:21] +[titan] 2025-10-05 18:01:51,212 - root - INFO - step: 31755 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 18:01:51,212 - root - INFO - lr: 9.6439e-06 gnorm: 1.16 [19:27:40< 5:03:10] +[titan] 2025-10-05 18:02:02,067 - root - INFO - step: 31760 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 18:02:02,067 - root - INFO - lr: 9.6385e-06 gnorm: 1.20 [19:27:50< 5:02:59] +[titan] 2025-10-05 18:02:12,939 - root - INFO - step: 31765 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 18:02:12,939 - root - INFO - lr: 9.6330e-06 gnorm: 1.12 [19:28:01< 5:02:48] +[titan] 2025-10-05 18:02:24,117 - root - INFO - step: 31770 loss: 1.9667 memory: 118.84GiB(85.28%) tps: 29,315 tflops: 406.70 mfu: 41.12% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 18:02:24,118 - root - INFO - lr: 9.6276e-06 gnorm: 1.16 [19:28:12< 5:02:37] +[titan] 2025-10-05 18:02:35,008 - root - INFO - step: 31775 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 18:02:35,009 - root - INFO - lr: 9.6222e-06 gnorm: 1.21 [19:28:23< 5:02:26] +[titan] 2025-10-05 18:02:45,869 - root - INFO - step: 31780 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:02:45,869 - root - INFO - lr: 9.6168e-06 gnorm: 1.16 [19:28:34< 5:02:15] +[titan] 2025-10-05 18:02:56,730 - root - INFO - step: 31785 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:02:56,730 - root - INFO - lr: 9.6114e-06 gnorm: 1.17 [19:28:45< 5:02:04] +[titan] 2025-10-05 18:03:07,583 - root - INFO - step: 31790 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 18:03:07,583 - root - INFO - lr: 9.6059e-06 gnorm: 1.14 [19:28:56< 5:01:53] +[titan] 2025-10-05 18:03:18,426 - root - INFO - step: 31795 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 18:03:18,426 - root - INFO - lr: 9.6005e-06 gnorm: 1.12 [19:29:07< 5:01:42] +[titan] 2025-10-05 18:03:27,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:03:29,339 - root - INFO - step: 31800 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 18:03:29,339 - root - INFO - lr: 9.5951e-06 gnorm: 1.13 [19:29:18< 5:01:31] +[titan] 2025-10-05 18:03:40,190 - root - INFO - step: 31805 loss: 1.9797 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:03:40,190 - root - INFO - lr: 9.5897e-06 gnorm: 1.15 [19:29:29< 5:01:20] +[titan] 2025-10-05 18:03:51,090 - root - INFO - step: 31810 loss: 2.0140 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 18:03:51,090 - root - INFO - lr: 9.5843e-06 gnorm: 1.18 [19:29:39< 5:01:08] +[titan] 2025-10-05 18:04:01,948 - root - INFO - step: 31815 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 18:04:01,948 - root - INFO - lr: 9.5789e-06 gnorm: 1.12 [19:29:50< 5:00:57] +[titan] 2025-10-05 18:04:12,805 - root - INFO - step: 31820 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 18:04:12,805 - root - INFO - lr: 9.5735e-06 gnorm: 1.15 [19:30:01< 5:00:46] +[titan] 2025-10-05 18:04:23,715 - root - INFO - step: 31825 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7202 +[titan] 2025-10-05 18:04:23,715 - root - INFO - lr: 9.5681e-06 gnorm: 1.12 [19:30:12< 5:00:35] +[titan] 2025-10-05 18:04:34,585 - root - INFO - step: 31830 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 18:04:34,585 - root - INFO - lr: 9.5628e-06 gnorm: 1.16 [19:30:23< 5:00:24] +[titan] 2025-10-05 18:04:45,454 - root - INFO - step: 31835 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:04:45,454 - root - INFO - lr: 9.5574e-06 gnorm: 1.18 [19:30:34< 5:00:13] +[titan] 2025-10-05 18:04:56,357 - root - INFO - step: 31840 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 18:04:56,357 - root - INFO - lr: 9.5520e-06 gnorm: 1.16 [19:30:45< 5:00:02] +[titan] 2025-10-05 18:05:07,225 - root - INFO - step: 31845 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 18:05:07,226 - root - INFO - lr: 9.5466e-06 gnorm: 1.18 [19:30:56< 4:59:51] +[titan] 2025-10-05 18:05:15,912 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:05:18,103 - root - INFO - step: 31850 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 18:05:18,103 - root - INFO - lr: 9.5412e-06 gnorm: 1.16 [19:31:06< 4:59:40] +[titan] 2025-10-05 18:05:29,031 - root - INFO - step: 31855 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7969 +[titan] 2025-10-05 18:05:29,031 - root - INFO - lr: 9.5359e-06 gnorm: 1.17 [19:31:17< 4:59:29] +[titan] 2025-10-05 18:05:39,898 - root - INFO - step: 31860 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7375 +[titan] 2025-10-05 18:05:39,898 - root - INFO - lr: 9.5305e-06 gnorm: 1.17 [19:31:28< 4:59:18] +[titan] 2025-10-05 18:05:50,764 - root - INFO - step: 31865 loss: 1.9005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6802 +[titan] 2025-10-05 18:05:50,764 - root - INFO - lr: 9.5251e-06 gnorm: 1.14 [19:31:39< 4:59:07] +[titan] 2025-10-05 18:06:01,663 - root - INFO - step: 31870 loss: 1.9427 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:06:01,664 - root - INFO - lr: 9.5197e-06 gnorm: 1.17 [19:31:50< 4:58:56] +[titan] 2025-10-05 18:06:12,506 - root - INFO - step: 31875 loss: 2.0201 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 18:06:12,507 - root - INFO - lr: 9.5144e-06 gnorm: 1.20 [19:32:01< 4:58:45] +[titan] 2025-10-05 18:06:23,358 - root - INFO - step: 31880 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 18:06:23,358 - root - INFO - lr: 9.5090e-06 gnorm: 1.12 [19:32:12< 4:58:33] +[titan] 2025-10-05 18:06:34,258 - root - INFO - step: 31885 loss: 1.8475 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 18:06:34,258 - root - INFO - lr: 9.5037e-06 gnorm: 1.13 [19:32:23< 4:58:22] +[titan] 2025-10-05 18:06:45,106 - root - INFO - step: 31890 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 18:06:45,106 - root - INFO - lr: 9.4983e-06 gnorm: 1.19 [19:32:33< 4:58:11] +[titan] 2025-10-05 18:06:55,965 - root - INFO - step: 31895 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 18:06:55,965 - root - INFO - lr: 9.4930e-06 gnorm: 1.16 [19:32:44< 4:58:00] +[titan] 2025-10-05 18:07:04,625 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:07:06,799 - root - INFO - step: 31900 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:07:06,799 - root - INFO - lr: 9.4876e-06 gnorm: 1.18 [19:32:55< 4:57:49] +[titan] 2025-10-05 18:07:17,698 - root - INFO - step: 31905 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 18:07:17,698 - root - INFO - lr: 9.4823e-06 gnorm: 1.20 [19:33:06< 4:57:38] +[titan] 2025-10-05 18:07:28,596 - root - INFO - step: 31910 loss: 1.9594 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 18:07:28,597 - root - INFO - lr: 9.4769e-06 gnorm: 1.14 [19:33:17< 4:57:27] +[titan] 2025-10-05 18:07:39,465 - root - INFO - step: 31915 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 18:07:39,466 - root - INFO - lr: 9.4716e-06 gnorm: 1.15 [19:33:28< 4:57:16] +[titan] 2025-10-05 18:07:50,320 - root - INFO - step: 31920 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:07:50,320 - root - INFO - lr: 9.4662e-06 gnorm: 1.18 [19:33:39< 4:57:05] +[titan] 2025-10-05 18:08:01,166 - root - INFO - step: 31925 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:08:01,166 - root - INFO - lr: 9.4609e-06 gnorm: 1.19 [19:33:49< 4:56:54] +[titan] 2025-10-05 18:08:12,045 - root - INFO - step: 31930 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 18:08:12,045 - root - INFO - lr: 9.4556e-06 gnorm: 1.11 [19:34:00< 4:56:43] +[titan] 2025-10-05 18:08:22,957 - root - INFO - step: 31935 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7321 +[titan] 2025-10-05 18:08:22,957 - root - INFO - lr: 9.4502e-06 gnorm: 1.14 [19:34:11< 4:56:32] +[titan] 2025-10-05 18:08:33,867 - root - INFO - step: 31940 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7987 +[titan] 2025-10-05 18:08:33,867 - root - INFO - lr: 9.4449e-06 gnorm: 1.15 [19:34:22< 4:56:21] +[titan] 2025-10-05 18:08:44,766 - root - INFO - step: 31945 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 18:08:44,766 - root - INFO - lr: 9.4396e-06 gnorm: 1.16 [19:34:33< 4:56:10] +[titan] 2025-10-05 18:08:53,450 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:08:55,628 - root - INFO - step: 31950 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 18:08:55,628 - root - INFO - lr: 9.4343e-06 gnorm: 1.18 [19:34:44< 4:55:58] +[titan] 2025-10-05 18:09:06,503 - root - INFO - step: 31955 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 18:09:06,503 - root - INFO - lr: 9.4289e-06 gnorm: 1.12 [19:34:55< 4:55:47] +[titan] 2025-10-05 18:09:17,363 - root - INFO - step: 31960 loss: 2.0329 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 18:09:17,363 - root - INFO - lr: 9.4236e-06 gnorm: 1.18 [19:35:06< 4:55:36] +[titan] 2025-10-05 18:09:28,265 - root - INFO - step: 31965 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 18:09:28,265 - root - INFO - lr: 9.4183e-06 gnorm: 1.18 [19:35:17< 4:55:25] +[titan] 2025-10-05 18:09:39,153 - root - INFO - step: 31970 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 18:09:39,153 - root - INFO - lr: 9.4130e-06 gnorm: 1.15 [19:35:27< 4:55:14] +[titan] 2025-10-05 18:09:50,010 - root - INFO - step: 31975 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 18:09:50,010 - root - INFO - lr: 9.4077e-06 gnorm: 1.18 [19:35:38< 4:55:03] +[titan] 2025-10-05 18:10:00,880 - root - INFO - step: 31980 loss: 1.9569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 18:10:00,880 - root - INFO - lr: 9.4024e-06 gnorm: 1.14 [19:35:49< 4:54:52] +[titan] 2025-10-05 18:10:11,742 - root - INFO - step: 31985 loss: 1.9260 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 18:10:11,742 - root - INFO - lr: 9.3971e-06 gnorm: 1.13 [19:36:00< 4:54:41] +[titan] 2025-10-05 18:10:22,613 - root - INFO - step: 31990 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 18:10:22,613 - root - INFO - lr: 9.3918e-06 gnorm: 1.16 [19:36:11< 4:54:30] +[titan] 2025-10-05 18:10:33,523 - root - INFO - step: 31995 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 18:10:33,524 - root - INFO - lr: 9.3865e-06 gnorm: 1.14 [19:36:22< 4:54:19] +[titan] 2025-10-05 18:10:42,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:10:44,426 - root - INFO - step: 32000 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 18:10:44,426 - root - INFO - lr: 9.3812e-06 gnorm: 1.14 [19:36:33< 4:54:08] +[titan] 2025-10-05 18:10:55,314 - root - INFO - step: 32005 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 18:10:55,315 - root - INFO - lr: 9.3759e-06 gnorm: 1.17 [19:36:44< 4:53:57] +[titan] 2025-10-05 18:11:06,182 - root - INFO - step: 32010 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:11:06,182 - root - INFO - lr: 9.3706e-06 gnorm: 1.16 [19:36:54< 4:53:46] +[titan] 2025-10-05 18:11:17,044 - root - INFO - step: 32015 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 18:11:17,044 - root - INFO - lr: 9.3653e-06 gnorm: 1.16 [19:37:05< 4:53:35] +[titan] 2025-10-05 18:11:27,919 - root - INFO - step: 32020 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 18:11:27,919 - root - INFO - lr: 9.3601e-06 gnorm: 1.21 [19:37:16< 4:53:24] +[titan] 2025-10-05 18:11:38,842 - root - INFO - step: 32025 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:11:38,842 - root - INFO - lr: 9.3548e-06 gnorm: 1.19 [19:37:27< 4:53:12] +[titan] 2025-10-05 18:11:49,758 - root - INFO - step: 32030 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7644 +[titan] 2025-10-05 18:11:49,758 - root - INFO - lr: 9.3495e-06 gnorm: 1.19 [19:37:38< 4:53:01] +[titan] 2025-10-05 18:12:00,638 - root - INFO - step: 32035 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:12:00,638 - root - INFO - lr: 9.3442e-06 gnorm: 1.14 [19:37:49< 4:52:50] +[titan] 2025-10-05 18:12:11,546 - root - INFO - step: 32040 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:12:11,547 - root - INFO - lr: 9.3390e-06 gnorm: 1.15 [19:38:00< 4:52:39] +[titan] 2025-10-05 18:12:22,450 - root - INFO - step: 32045 loss: 1.8868 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 18:12:22,450 - root - INFO - lr: 9.3337e-06 gnorm: 1.14 [19:38:11< 4:52:28] +[titan] 2025-10-05 18:12:31,215 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:12:33,403 - root - INFO - step: 32050 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 18:12:33,403 - root - INFO - lr: 9.3284e-06 gnorm: 1.15 [19:38:22< 4:52:17] +[titan] 2025-10-05 18:12:44,298 - root - INFO - step: 32055 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 18:12:44,298 - root - INFO - lr: 9.3232e-06 gnorm: 1.15 [19:38:33< 4:52:06] +[titan] 2025-10-05 18:12:55,164 - root - INFO - step: 32060 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:12:55,164 - root - INFO - lr: 9.3179e-06 gnorm: 1.16 [19:38:43< 4:51:55] +[titan] 2025-10-05 18:13:06,043 - root - INFO - step: 32065 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7655 +[titan] 2025-10-05 18:13:06,043 - root - INFO - lr: 9.3127e-06 gnorm: 1.15 [19:38:54< 4:51:44] +[titan] 2025-10-05 18:13:16,898 - root - INFO - step: 32070 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 18:13:16,898 - root - INFO - lr: 9.3074e-06 gnorm: 1.18 [19:39:05< 4:51:33] +[titan] 2025-10-05 18:13:27,792 - root - INFO - step: 32075 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 18:13:27,793 - root - INFO - lr: 9.3022e-06 gnorm: 1.19 [19:39:16< 4:51:22] +[titan] 2025-10-05 18:13:38,698 - root - INFO - step: 32080 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 18:13:38,698 - root - INFO - lr: 9.2969e-06 gnorm: 1.19 [19:39:27< 4:51:11] +[titan] 2025-10-05 18:13:49,556 - root - INFO - step: 32085 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 18:13:49,557 - root - INFO - lr: 9.2917e-06 gnorm: 1.17 [19:39:38< 4:51:00] +[titan] 2025-10-05 18:14:00,441 - root - INFO - step: 32090 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 18:14:00,441 - root - INFO - lr: 9.2864e-06 gnorm: 1.16 [19:39:49< 4:50:49] +[titan] 2025-10-05 18:14:11,340 - root - INFO - step: 32095 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 18:14:11,340 - root - INFO - lr: 9.2812e-06 gnorm: 1.14 [19:40:00< 4:50:38] +[titan] 2025-10-05 18:14:20,021 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:14:22,203 - root - INFO - step: 32100 loss: 1.9882 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 18:14:22,203 - root - INFO - lr: 9.2759e-06 gnorm: 1.14 [19:40:11< 4:50:27] +[titan] 2025-10-05 18:14:33,146 - root - INFO - step: 32105 loss: 2.0008 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:14:33,146 - root - INFO - lr: 9.2707e-06 gnorm: 1.18 [19:40:21< 4:50:15] +[titan] 2025-10-05 18:14:44,011 - root - INFO - step: 32110 loss: 1.9522 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:14:44,011 - root - INFO - lr: 9.2655e-06 gnorm: 1.14 [19:40:32< 4:50:04] +[titan] 2025-10-05 18:14:54,863 - root - INFO - step: 32115 loss: 1.9586 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:14:54,864 - root - INFO - lr: 9.2603e-06 gnorm: 1.15 [19:40:43< 4:49:53] +[titan] 2025-10-05 18:15:05,716 - root - INFO - step: 32120 loss: 1.9321 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 18:15:05,716 - root - INFO - lr: 9.2550e-06 gnorm: 1.13 [19:40:54< 4:49:42] +[titan] 2025-10-05 18:15:16,556 - root - INFO - step: 32125 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:15:16,556 - root - INFO - lr: 9.2498e-06 gnorm: 1.15 [19:41:05< 4:49:31] +[titan] 2025-10-05 18:15:27,432 - root - INFO - step: 32130 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 18:15:27,433 - root - INFO - lr: 9.2446e-06 gnorm: 1.20 [19:41:16< 4:49:20] +[titan] 2025-10-05 18:15:38,339 - root - INFO - step: 32135 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7023 +[titan] 2025-10-05 18:15:38,339 - root - INFO - lr: 9.2394e-06 gnorm: 1.17 [19:41:27< 4:49:09] +[titan] 2025-10-05 18:15:49,183 - root - INFO - step: 32140 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 18:15:49,184 - root - INFO - lr: 9.2342e-06 gnorm: 1.19 [19:41:37< 4:48:58] +[titan] 2025-10-05 18:16:00,016 - root - INFO - step: 32145 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:16:00,017 - root - INFO - lr: 9.2290e-06 gnorm: 1.19 [19:41:48< 4:48:47] +[titan] 2025-10-05 18:16:08,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:16:10,859 - root - INFO - step: 32150 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7520 +[titan] 2025-10-05 18:16:10,859 - root - INFO - lr: 9.2237e-06 gnorm: 1.17 [19:41:59< 4:48:36] +[titan] 2025-10-05 18:16:21,712 - root - INFO - step: 32155 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 18:16:21,712 - root - INFO - lr: 9.2185e-06 gnorm: 1.14 [19:42:10< 4:48:25] +[titan] 2025-10-05 18:16:32,617 - root - INFO - step: 32160 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 18:16:32,618 - root - INFO - lr: 9.2133e-06 gnorm: 1.19 [19:42:21< 4:48:14] +[titan] 2025-10-05 18:16:43,525 - root - INFO - step: 32165 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 18:16:43,525 - root - INFO - lr: 9.2081e-06 gnorm: 1.20 [19:42:32< 4:48:03] +[titan] 2025-10-05 18:16:54,419 - root - INFO - step: 32170 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 18:16:54,419 - root - INFO - lr: 9.2029e-06 gnorm: 1.15 [19:42:43< 4:47:52] +[titan] 2025-10-05 18:17:05,289 - root - INFO - step: 32175 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 18:17:05,289 - root - INFO - lr: 9.1978e-06 gnorm: 1.15 [19:42:54< 4:47:40] +[titan] 2025-10-05 18:17:16,163 - root - INFO - step: 32180 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 18:17:16,163 - root - INFO - lr: 9.1926e-06 gnorm: 1.17 [19:43:04< 4:47:29] +[titan] 2025-10-05 18:17:27,035 - root - INFO - step: 32185 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7293 +[titan] 2025-10-05 18:17:27,035 - root - INFO - lr: 9.1874e-06 gnorm: 1.11 [19:43:15< 4:47:18] +[titan] 2025-10-05 18:17:37,986 - root - INFO - step: 32190 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:17:37,986 - root - INFO - lr: 9.1822e-06 gnorm: 1.20 [19:43:26< 4:47:07] +[titan] 2025-10-05 18:17:48,863 - root - INFO - step: 32195 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 18:17:48,863 - root - INFO - lr: 9.1770e-06 gnorm: 1.14 [19:43:37< 4:46:56] +[titan] 2025-10-05 18:17:57,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:17:59,752 - root - INFO - step: 32200 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 18:17:59,752 - root - INFO - lr: 9.1718e-06 gnorm: 1.14 [19:43:48< 4:46:45] +[titan] 2025-10-05 18:18:10,633 - root - INFO - step: 32205 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 18:18:10,633 - root - INFO - lr: 9.1667e-06 gnorm: 1.16 [19:43:59< 4:46:34] +[titan] 2025-10-05 18:18:21,504 - root - INFO - step: 32210 loss: 1.9549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 18:18:21,505 - root - INFO - lr: 9.1615e-06 gnorm: 1.14 [19:44:10< 4:46:23] +[titan] 2025-10-05 18:18:32,363 - root - INFO - step: 32215 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 18:18:32,363 - root - INFO - lr: 9.1563e-06 gnorm: 1.15 [19:44:21< 4:46:12] +[titan] 2025-10-05 18:18:43,287 - root - INFO - step: 32220 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 18:18:43,287 - root - INFO - lr: 9.1512e-06 gnorm: 1.16 [19:44:32< 4:46:01] +[titan] 2025-10-05 18:18:54,183 - root - INFO - step: 32225 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 18:18:54,183 - root - INFO - lr: 9.1460e-06 gnorm: 1.14 [19:44:42< 4:45:50] +[titan] 2025-10-05 18:19:05,065 - root - INFO - step: 32230 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:19:05,066 - root - INFO - lr: 9.1408e-06 gnorm: 1.13 [19:44:53< 4:45:39] +[titan] 2025-10-05 18:19:15,932 - root - INFO - step: 32235 loss: 1.9942 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:19:15,932 - root - INFO - lr: 9.1357e-06 gnorm: 1.19 [19:45:04< 4:45:28] +[titan] 2025-10-05 18:19:26,783 - root - INFO - step: 32240 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 18:19:26,783 - root - INFO - lr: 9.1305e-06 gnorm: 1.16 [19:45:15< 4:45:17] +[titan] 2025-10-05 18:19:37,703 - root - INFO - step: 32245 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:19:37,703 - root - INFO - lr: 9.1254e-06 gnorm: 1.16 [19:45:26< 4:45:06] +[titan] 2025-10-05 18:19:46,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:19:48,556 - root - INFO - step: 32250 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 18:19:48,556 - root - INFO - lr: 9.1202e-06 gnorm: 1.15 [19:45:37< 4:44:55] +[titan] 2025-10-05 18:19:59,512 - root - INFO - step: 32255 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:19:59,512 - root - INFO - lr: 9.1151e-06 gnorm: 1.19 [19:45:48< 4:44:43] +[titan] 2025-10-05 18:20:01,862 - root - INFO - Dumping profiler traces at step 32256 +[titan] 2025-10-05 18:20:01,900 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:20:10,612 - root - INFO - step: 32260 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.57 mfu: 41.41% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 18:20:10,612 - root - INFO - lr: 9.1099e-06 gnorm: 1.15 [19:45:59< 4:44:32] +[titan] 2025-10-05 18:20:21,505 - root - INFO - step: 32265 loss: 1.9661 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 18:20:21,505 - root - INFO - lr: 9.1048e-06 gnorm: 1.18 [19:46:10< 4:44:21] +[titan] 2025-10-05 18:20:32,390 - root - INFO - step: 32270 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 18:20:32,390 - root - INFO - lr: 9.0996e-06 gnorm: 1.19 [19:46:21< 4:44:10] +[titan] 2025-10-05 18:20:43,343 - root - INFO - step: 32275 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 18:20:43,343 - root - INFO - lr: 9.0945e-06 gnorm: 1.17 [19:46:32< 4:43:59] +[titan] 2025-10-05 18:20:54,195 - root - INFO - step: 32280 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 18:20:54,195 - root - INFO - lr: 9.0894e-06 gnorm: 1.13 [19:46:42< 4:43:48] +[titan] 2025-10-05 18:21:05,056 - root - INFO - step: 32285 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:21:05,056 - root - INFO - lr: 9.0842e-06 gnorm: 1.14 [19:46:53< 4:43:37] +[titan] 2025-10-05 18:21:15,905 - root - INFO - step: 32290 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 18:21:15,905 - root - INFO - lr: 9.0791e-06 gnorm: 1.14 [19:47:04< 4:43:26] +[titan] 2025-10-05 18:21:26,822 - root - INFO - step: 32295 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:21:26,822 - root - INFO - lr: 9.0740e-06 gnorm: 1.15 [19:47:15< 4:43:15] +[titan] 2025-10-05 18:21:35,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:21:37,747 - root - INFO - step: 32300 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:21:37,747 - root - INFO - lr: 9.0689e-06 gnorm: 1.15 [19:47:26< 4:43:04] +[titan] 2025-10-05 18:21:48,651 - root - INFO - step: 32305 loss: 1.9420 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:21:48,651 - root - INFO - lr: 9.0638e-06 gnorm: 1.13 [19:47:37< 4:42:53] +[titan] 2025-10-05 18:21:59,526 - root - INFO - step: 32310 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:21:59,526 - root - INFO - lr: 9.0586e-06 gnorm: 1.20 [19:47:48< 4:42:42] +[titan] 2025-10-05 18:22:10,410 - root - INFO - step: 32315 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 18:22:10,410 - root - INFO - lr: 9.0535e-06 gnorm: 1.16 [19:47:59< 4:42:31] +[titan] 2025-10-05 18:22:21,310 - root - INFO - step: 32320 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6910 +[titan] 2025-10-05 18:22:21,310 - root - INFO - lr: 9.0484e-06 gnorm: 1.16 [19:48:10< 4:42:20] +[titan] 2025-10-05 18:22:32,228 - root - INFO - step: 32325 loss: 1.9625 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 18:22:32,229 - root - INFO - lr: 9.0433e-06 gnorm: 1.17 [19:48:21< 4:42:09] +[titan] 2025-10-05 18:22:43,163 - root - INFO - step: 32330 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 18:22:43,164 - root - INFO - lr: 9.0382e-06 gnorm: 1.16 [19:48:31< 4:41:58] +[titan] 2025-10-05 18:22:54,059 - root - INFO - step: 32335 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:22:54,059 - root - INFO - lr: 9.0331e-06 gnorm: 1.18 [19:48:42< 4:41:47] +[titan] 2025-10-05 18:23:04,937 - root - INFO - step: 32340 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 18:23:04,938 - root - INFO - lr: 9.0280e-06 gnorm: 1.22 [19:48:53< 4:41:35] +[titan] 2025-10-05 18:23:15,809 - root - INFO - step: 32345 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:23:15,809 - root - INFO - lr: 9.0229e-06 gnorm: 1.18 [19:49:04< 4:41:24] +[titan] 2025-10-05 18:23:24,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:23:26,693 - root - INFO - step: 32350 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 18:23:26,693 - root - INFO - lr: 9.0178e-06 gnorm: 1.24 [19:49:15< 4:41:13] +[titan] 2025-10-05 18:23:37,572 - root - INFO - step: 32355 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:23:37,573 - root - INFO - lr: 9.0127e-06 gnorm: 1.15 [19:49:26< 4:41:02] +[titan] 2025-10-05 18:23:48,530 - root - INFO - step: 32360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 18:23:48,530 - root - INFO - lr: 9.0077e-06 gnorm: 1.15 [19:49:37< 4:40:51] +[titan] 2025-10-05 18:23:59,408 - root - INFO - step: 32365 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 18:23:59,408 - root - INFO - lr: 9.0026e-06 gnorm: 1.20 [19:49:48< 4:40:40] +[titan] 2025-10-05 18:24:10,292 - root - INFO - step: 32370 loss: 1.9796 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 18:24:10,292 - root - INFO - lr: 8.9975e-06 gnorm: 1.14 [19:49:59< 4:40:29] +[titan] 2025-10-05 18:24:21,173 - root - INFO - step: 32375 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:24:21,173 - root - INFO - lr: 8.9924e-06 gnorm: 1.18 [19:50:09< 4:40:18] +[titan] 2025-10-05 18:24:32,033 - root - INFO - step: 32380 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7551 +[titan] 2025-10-05 18:24:32,033 - root - INFO - lr: 8.9873e-06 gnorm: 1.17 [19:50:20< 4:40:07] +[titan] 2025-10-05 18:24:42,932 - root - INFO - step: 32385 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 18:24:42,932 - root - INFO - lr: 8.9823e-06 gnorm: 1.17 [19:50:31< 4:39:56] +[titan] 2025-10-05 18:24:53,801 - root - INFO - step: 32390 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 18:24:53,801 - root - INFO - lr: 8.9772e-06 gnorm: 1.16 [19:50:42< 4:39:45] +[titan] 2025-10-05 18:25:04,696 - root - INFO - step: 32395 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 18:25:04,696 - root - INFO - lr: 8.9721e-06 gnorm: 1.18 [19:50:53< 4:39:34] +[titan] 2025-10-05 18:25:13,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:25:15,566 - root - INFO - step: 32400 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:25:15,566 - root - INFO - lr: 8.9671e-06 gnorm: 1.16 [19:51:04< 4:39:23] +[titan] 2025-10-05 18:25:26,448 - root - INFO - step: 32405 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 18:25:26,449 - root - INFO - lr: 8.9620e-06 gnorm: 1.13 [19:51:15< 4:39:12] +[titan] 2025-10-05 18:25:37,323 - root - INFO - step: 32410 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 18:25:37,323 - root - INFO - lr: 8.9570e-06 gnorm: 1.15 [19:51:26< 4:39:01] +[titan] 2025-10-05 18:25:48,505 - root - INFO - step: 32415 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 29,304 tflops: 406.54 mfu: 41.11% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 18:25:48,505 - root - INFO - lr: 8.9519e-06 gnorm: 1.17 [19:51:37< 4:38:50] +[titan] 2025-10-05 18:25:59,396 - root - INFO - step: 32420 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 18:25:59,396 - root - INFO - lr: 8.9469e-06 gnorm: 1.17 [19:51:48< 4:38:39] +[titan] 2025-10-05 18:26:10,310 - root - INFO - step: 32425 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 18:26:10,310 - root - INFO - lr: 8.9418e-06 gnorm: 1.15 [19:51:59< 4:38:28] +[titan] 2025-10-05 18:26:21,195 - root - INFO - step: 32430 loss: 1.9222 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 18:26:21,195 - root - INFO - lr: 8.9368e-06 gnorm: 1.14 [19:52:09< 4:38:16] +[titan] 2025-10-05 18:26:32,089 - root - INFO - step: 32435 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:26:32,089 - root - INFO - lr: 8.9317e-06 gnorm: 1.17 [19:52:20< 4:38:05] +[titan] 2025-10-05 18:26:42,997 - root - INFO - step: 32440 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:26:42,997 - root - INFO - lr: 8.9267e-06 gnorm: 1.18 [19:52:31< 4:37:54] +[titan] 2025-10-05 18:26:53,888 - root - INFO - step: 32445 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 18:26:53,888 - root - INFO - lr: 8.9217e-06 gnorm: 1.18 [19:52:42< 4:37:43] +[titan] 2025-10-05 18:27:02,624 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:27:04,807 - root - INFO - step: 32450 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 18:27:04,807 - root - INFO - lr: 8.9166e-06 gnorm: 1.18 [19:52:53< 4:37:32] +[titan] 2025-10-05 18:27:15,706 - root - INFO - step: 32455 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7589 +[titan] 2025-10-05 18:27:15,706 - root - INFO - lr: 8.9116e-06 gnorm: 1.15 [19:53:04< 4:37:21] +[titan] 2025-10-05 18:27:26,608 - root - INFO - step: 32460 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 18:27:26,608 - root - INFO - lr: 8.9066e-06 gnorm: 1.14 [19:53:15< 4:37:10] +[titan] 2025-10-05 18:27:37,484 - root - INFO - step: 32465 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 18:27:37,485 - root - INFO - lr: 8.9015e-06 gnorm: 1.11 [19:53:26< 4:36:59] +[titan] 2025-10-05 18:27:48,368 - root - INFO - step: 32470 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:27:48,368 - root - INFO - lr: 8.8965e-06 gnorm: 1.13 [19:53:37< 4:36:48] +[titan] 2025-10-05 18:27:59,231 - root - INFO - step: 32475 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 18:27:59,231 - root - INFO - lr: 8.8915e-06 gnorm: 1.15 [19:53:47< 4:36:37] +[titan] 2025-10-05 18:28:10,108 - root - INFO - step: 32480 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7775 +[titan] 2025-10-05 18:28:10,108 - root - INFO - lr: 8.8865e-06 gnorm: 1.18 [19:53:58< 4:36:26] +[titan] 2025-10-05 18:28:20,988 - root - INFO - step: 32485 loss: 1.9823 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 18:28:20,988 - root - INFO - lr: 8.8815e-06 gnorm: 1.15 [19:54:09< 4:36:15] +[titan] 2025-10-05 18:28:31,851 - root - INFO - step: 32490 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 18:28:31,851 - root - INFO - lr: 8.8765e-06 gnorm: 1.16 [19:54:20< 4:36:04] +[titan] 2025-10-05 18:28:42,715 - root - INFO - step: 32495 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:28:42,716 - root - INFO - lr: 8.8715e-06 gnorm: 1.16 [19:54:31< 4:35:53] +[titan] 2025-10-05 18:28:51,431 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:28:53,617 - root - INFO - step: 32500 loss: 1.9959 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 18:28:53,617 - root - INFO - lr: 8.8665e-06 gnorm: 1.21 [19:54:42< 4:35:42] +[titan] 2025-10-05 18:29:04,488 - root - INFO - step: 32505 loss: 1.9052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6852 +[titan] 2025-10-05 18:29:04,489 - root - INFO - lr: 8.8615e-06 gnorm: 1.15 [19:54:53< 4:35:31] +[titan] 2025-10-05 18:29:15,355 - root - INFO - step: 32510 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 18:29:15,355 - root - INFO - lr: 8.8565e-06 gnorm: 1.20 [19:55:04< 4:35:19] +[titan] 2025-10-05 18:29:26,197 - root - INFO - step: 32515 loss: 1.9015 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 18:29:26,198 - root - INFO - lr: 8.8515e-06 gnorm: 1.16 [19:55:14< 4:35:08] +[titan] 2025-10-05 18:29:37,043 - root - INFO - step: 32520 loss: 1.9322 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:29:37,043 - root - INFO - lr: 8.8465e-06 gnorm: 1.14 [19:55:25< 4:34:57] +[titan] 2025-10-05 18:29:47,915 - root - INFO - step: 32525 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:29:47,915 - root - INFO - lr: 8.8415e-06 gnorm: 1.17 [19:55:36< 4:34:46] +[titan] 2025-10-05 18:29:58,786 - root - INFO - step: 32530 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:29:58,786 - root - INFO - lr: 8.8365e-06 gnorm: 1.14 [19:55:47< 4:34:35] +[titan] 2025-10-05 18:30:09,635 - root - INFO - step: 32535 loss: 1.9367 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 18:30:09,635 - root - INFO - lr: 8.8315e-06 gnorm: 1.18 [19:55:58< 4:34:24] +[titan] 2025-10-05 18:30:20,517 - root - INFO - step: 32540 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:30:20,517 - root - INFO - lr: 8.8265e-06 gnorm: 1.22 [19:56:09< 4:34:13] +[titan] 2025-10-05 18:30:31,388 - root - INFO - step: 32545 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 18:30:31,388 - root - INFO - lr: 8.8216e-06 gnorm: 1.18 [19:56:20< 4:34:02] +[titan] 2025-10-05 18:30:40,099 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:30:42,279 - root - INFO - step: 32550 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 18:30:42,279 - root - INFO - lr: 8.8166e-06 gnorm: 1.22 [19:56:31< 4:33:51] +[titan] 2025-10-05 18:30:53,167 - root - INFO - step: 32555 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7816 +[titan] 2025-10-05 18:30:53,168 - root - INFO - lr: 8.8116e-06 gnorm: 1.16 [19:56:41< 4:33:40] +[titan] 2025-10-05 18:31:04,037 - root - INFO - step: 32560 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7457 +[titan] 2025-10-05 18:31:04,037 - root - INFO - lr: 8.8066e-06 gnorm: 1.15 [19:56:52< 4:33:29] +[titan] 2025-10-05 18:31:14,905 - root - INFO - step: 32565 loss: 2.0104 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 18:31:14,905 - root - INFO - lr: 8.8017e-06 gnorm: 1.18 [19:57:03< 4:33:18] +[titan] 2025-10-05 18:31:25,750 - root - INFO - step: 32570 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:31:25,750 - root - INFO - lr: 8.7967e-06 gnorm: 1.18 [19:57:14< 4:33:07] +[titan] 2025-10-05 18:31:36,615 - root - INFO - step: 32575 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 18:31:36,615 - root - INFO - lr: 8.7917e-06 gnorm: 1.21 [19:57:25< 4:32:56] +[titan] 2025-10-05 18:31:47,505 - root - INFO - step: 32580 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 18:31:47,505 - root - INFO - lr: 8.7868e-06 gnorm: 1.18 [19:57:36< 4:32:45] +[titan] 2025-10-05 18:31:58,405 - root - INFO - step: 32585 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 18:31:58,406 - root - INFO - lr: 8.7818e-06 gnorm: 1.15 [19:57:47< 4:32:33] +[titan] 2025-10-05 18:32:09,289 - root - INFO - step: 32590 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 18:32:09,289 - root - INFO - lr: 8.7769e-06 gnorm: 1.17 [19:57:58< 4:32:22] +[titan] 2025-10-05 18:32:20,143 - root - INFO - step: 32595 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 18:32:20,143 - root - INFO - lr: 8.7719e-06 gnorm: 1.15 [19:58:08< 4:32:11] +[titan] 2025-10-05 18:32:28,834 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:32:31,015 - root - INFO - step: 32600 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:32:31,015 - root - INFO - lr: 8.7670e-06 gnorm: 1.15 [19:58:19< 4:32:00] +[titan] 2025-10-05 18:32:41,866 - root - INFO - step: 32605 loss: 1.9357 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 18:32:41,866 - root - INFO - lr: 8.7621e-06 gnorm: 1.17 [19:58:30< 4:31:49] +[titan] 2025-10-05 18:32:52,744 - root - INFO - step: 32610 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 18:32:52,744 - root - INFO - lr: 8.7571e-06 gnorm: 1.17 [19:58:41< 4:31:38] +[titan] 2025-10-05 18:33:03,626 - root - INFO - step: 32615 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:33:03,626 - root - INFO - lr: 8.7522e-06 gnorm: 1.16 [19:58:52< 4:31:27] +[titan] 2025-10-05 18:33:14,510 - root - INFO - step: 32620 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 18:33:14,510 - root - INFO - lr: 8.7472e-06 gnorm: 1.19 [19:59:03< 4:31:16] +[titan] 2025-10-05 18:33:25,381 - root - INFO - step: 32625 loss: 1.9774 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 18:33:25,381 - root - INFO - lr: 8.7423e-06 gnorm: 1.16 [19:59:14< 4:31:05] +[titan] 2025-10-05 18:33:36,243 - root - INFO - step: 32630 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 18:33:36,243 - root - INFO - lr: 8.7374e-06 gnorm: 1.17 [19:59:24< 4:30:54] +[titan] 2025-10-05 18:33:47,116 - root - INFO - step: 32635 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:33:47,116 - root - INFO - lr: 8.7325e-06 gnorm: 1.15 [19:59:35< 4:30:43] +[titan] 2025-10-05 18:33:58,057 - root - INFO - step: 32640 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:33:58,057 - root - INFO - lr: 8.7275e-06 gnorm: 1.20 [19:59:46< 4:30:32] +[titan] 2025-10-05 18:34:08,946 - root - INFO - step: 32645 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:34:08,946 - root - INFO - lr: 8.7226e-06 gnorm: 1.17 [19:59:57< 4:30:21] +[titan] 2025-10-05 18:34:17,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:34:19,814 - root - INFO - step: 32650 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7704 +[titan] 2025-10-05 18:34:19,814 - root - INFO - lr: 8.7177e-06 gnorm: 1.14 [20:00:08< 4:30:10] +[titan] 2025-10-05 18:34:30,684 - root - INFO - step: 32655 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 18:34:30,685 - root - INFO - lr: 8.7128e-06 gnorm: 1.21 [20:00:19< 4:29:59] +[titan] 2025-10-05 18:34:41,540 - root - INFO - step: 32660 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 18:34:41,540 - root - INFO - lr: 8.7079e-06 gnorm: 1.18 [20:00:30< 4:29:48] +[titan] 2025-10-05 18:34:52,419 - root - INFO - step: 32665 loss: 1.9116 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 18:34:52,419 - root - INFO - lr: 8.7030e-06 gnorm: 1.19 [20:00:41< 4:29:37] +[titan] 2025-10-05 18:35:03,284 - root - INFO - step: 32670 loss: 1.9841 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 18:35:03,284 - root - INFO - lr: 8.6981e-06 gnorm: 1.23 [20:00:52< 4:29:25] +[titan] 2025-10-05 18:35:14,164 - root - INFO - step: 32675 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:35:14,164 - root - INFO - lr: 8.6932e-06 gnorm: 1.14 [20:01:02< 4:29:14] +[titan] 2025-10-05 18:35:25,030 - root - INFO - step: 32680 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 18:35:25,030 - root - INFO - lr: 8.6883e-06 gnorm: 1.17 [20:01:13< 4:29:03] +[titan] 2025-10-05 18:35:35,901 - root - INFO - step: 32685 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 18:35:35,901 - root - INFO - lr: 8.6834e-06 gnorm: 1.19 [20:01:24< 4:28:52] +[titan] 2025-10-05 18:35:46,769 - root - INFO - step: 32690 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 18:35:46,769 - root - INFO - lr: 8.6785e-06 gnorm: 1.17 [20:01:35< 4:28:41] +[titan] 2025-10-05 18:35:57,644 - root - INFO - step: 32695 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 18:35:57,644 - root - INFO - lr: 8.6736e-06 gnorm: 1.18 [20:01:46< 4:28:30] +[titan] 2025-10-05 18:36:06,324 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:36:08,504 - root - INFO - step: 32700 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 18:36:08,504 - root - INFO - lr: 8.6687e-06 gnorm: 1.16 [20:01:57< 4:28:19] +[titan] 2025-10-05 18:36:19,368 - root - INFO - step: 32705 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6516 +[titan] 2025-10-05 18:36:19,368 - root - INFO - lr: 8.6638e-06 gnorm: 1.12 [20:02:08< 4:28:08] +[titan] 2025-10-05 18:36:30,228 - root - INFO - step: 32710 loss: 1.9004 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:36:30,228 - root - INFO - lr: 8.6590e-06 gnorm: 1.17 [20:02:18< 4:27:57] +[titan] 2025-10-05 18:36:41,098 - root - INFO - step: 32715 loss: 1.9595 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7326 +[titan] 2025-10-05 18:36:41,098 - root - INFO - lr: 8.6541e-06 gnorm: 1.21 [20:02:29< 4:27:46] +[titan] 2025-10-05 18:36:51,987 - root - INFO - step: 32720 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 18:36:51,987 - root - INFO - lr: 8.6492e-06 gnorm: 1.15 [20:02:40< 4:27:35] +[titan] 2025-10-05 18:37:02,853 - root - INFO - step: 32725 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7097 +[titan] 2025-10-05 18:37:02,853 - root - INFO - lr: 8.6443e-06 gnorm: 1.18 [20:02:51< 4:27:24] +[titan] 2025-10-05 18:37:13,725 - root - INFO - step: 32730 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 18:37:13,725 - root - INFO - lr: 8.6395e-06 gnorm: 1.18 [20:03:02< 4:27:13] +[titan] 2025-10-05 18:37:24,602 - root - INFO - step: 32735 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 18:37:24,603 - root - INFO - lr: 8.6346e-06 gnorm: 1.18 [20:03:13< 4:27:02] +[titan] 2025-10-05 18:37:35,486 - root - INFO - step: 32740 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 18:37:35,486 - root - INFO - lr: 8.6297e-06 gnorm: 1.15 [20:03:24< 4:26:51] +[titan] 2025-10-05 18:37:46,374 - root - INFO - step: 32745 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 18:37:46,374 - root - INFO - lr: 8.6249e-06 gnorm: 1.18 [20:03:35< 4:26:40] +[titan] 2025-10-05 18:37:55,082 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:37:57,268 - root - INFO - step: 32750 loss: 1.9951 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 18:37:57,269 - root - INFO - lr: 8.6200e-06 gnorm: 1.16 [20:03:46< 4:26:28] +[titan] 2025-10-05 18:38:08,165 - root - INFO - step: 32755 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 18:38:08,165 - root - INFO - lr: 8.6152e-06 gnorm: 1.15 [20:03:56< 4:26:17] +[titan] 2025-10-05 18:38:19,048 - root - INFO - step: 32760 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 18:38:19,048 - root - INFO - lr: 8.6103e-06 gnorm: 1.17 [20:04:07< 4:26:06] +[titan] 2025-10-05 18:38:30,021 - root - INFO - step: 32765 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 29,864 tflops: 414.31 mfu: 41.89% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 18:38:30,021 - root - INFO - lr: 8.6055e-06 gnorm: 1.19 [20:04:18< 4:25:55] +[titan] 2025-10-05 18:38:36,739 - root - INFO - Dumping profiler traces at step 32768 +[titan] 2025-10-05 18:38:36,778 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:38:41,140 - root - INFO - step: 32770 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 18:38:41,140 - root - INFO - lr: 8.6006e-06 gnorm: 1.18 [20:04:29< 4:25:44] +[titan] 2025-10-05 18:38:52,028 - root - INFO - step: 32775 loss: 1.8866 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 18:38:52,028 - root - INFO - lr: 8.5958e-06 gnorm: 1.16 [20:04:40< 4:25:33] +[titan] 2025-10-05 18:39:02,921 - root - INFO - step: 32780 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 18:39:02,921 - root - INFO - lr: 8.5909e-06 gnorm: 1.17 [20:04:51< 4:25:22] +[titan] 2025-10-05 18:39:13,797 - root - INFO - step: 32785 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 18:39:13,797 - root - INFO - lr: 8.5861e-06 gnorm: 1.19 [20:05:02< 4:25:11] +[titan] 2025-10-05 18:39:24,687 - root - INFO - step: 32790 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 18:39:24,688 - root - INFO - lr: 8.5813e-06 gnorm: 1.18 [20:05:13< 4:25:00] +[titan] 2025-10-05 18:39:35,548 - root - INFO - step: 32795 loss: 1.9151 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 18:39:35,548 - root - INFO - lr: 8.5764e-06 gnorm: 1.17 [20:05:24< 4:24:49] +[titan] 2025-10-05 18:39:44,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:39:46,421 - root - INFO - step: 32800 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:39:46,421 - root - INFO - lr: 8.5716e-06 gnorm: 1.14 [20:05:35< 4:24:38] +[titan] 2025-10-05 18:39:57,301 - root - INFO - step: 32805 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 18:39:57,301 - root - INFO - lr: 8.5668e-06 gnorm: 1.20 [20:05:46< 4:24:27] +[titan] 2025-10-05 18:40:08,174 - root - INFO - step: 32810 loss: 1.8700 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6536 +[titan] 2025-10-05 18:40:08,175 - root - INFO - lr: 8.5620e-06 gnorm: 1.15 [20:05:56< 4:24:16] +[titan] 2025-10-05 18:40:19,054 - root - INFO - step: 32815 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 18:40:19,054 - root - INFO - lr: 8.5572e-06 gnorm: 1.17 [20:06:07< 4:24:05] +[titan] 2025-10-05 18:40:29,922 - root - INFO - step: 32820 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:40:29,922 - root - INFO - lr: 8.5523e-06 gnorm: 1.20 [20:06:18< 4:23:54] +[titan] 2025-10-05 18:40:40,814 - root - INFO - step: 32825 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7226 +[titan] 2025-10-05 18:40:40,814 - root - INFO - lr: 8.5475e-06 gnorm: 1.14 [20:06:29< 4:23:43] +[titan] 2025-10-05 18:40:51,678 - root - INFO - step: 32830 loss: 1.9398 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:40:51,678 - root - INFO - lr: 8.5427e-06 gnorm: 1.23 [20:06:40< 4:23:32] +[titan] 2025-10-05 18:41:02,570 - root - INFO - step: 32835 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:41:02,570 - root - INFO - lr: 8.5379e-06 gnorm: 1.15 [20:06:51< 4:23:21] +[titan] 2025-10-05 18:41:13,446 - root - INFO - step: 32840 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 18:41:13,447 - root - INFO - lr: 8.5331e-06 gnorm: 1.21 [20:07:02< 4:23:09] +[titan] 2025-10-05 18:41:24,359 - root - INFO - step: 32845 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:41:24,359 - root - INFO - lr: 8.5283e-06 gnorm: 1.16 [20:07:13< 4:22:58] +[titan] 2025-10-05 18:41:33,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:41:35,240 - root - INFO - step: 32850 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:41:35,240 - root - INFO - lr: 8.5235e-06 gnorm: 1.14 [20:07:23< 4:22:47] +[titan] 2025-10-05 18:41:46,124 - root - INFO - step: 32855 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 18:41:46,125 - root - INFO - lr: 8.5187e-06 gnorm: 1.17 [20:07:34< 4:22:36] +[titan] 2025-10-05 18:41:56,993 - root - INFO - step: 32860 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:41:56,993 - root - INFO - lr: 8.5139e-06 gnorm: 1.22 [20:07:45< 4:22:25] +[titan] 2025-10-05 18:42:07,859 - root - INFO - step: 32865 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 18:42:07,859 - root - INFO - lr: 8.5091e-06 gnorm: 1.20 [20:07:56< 4:22:14] +[titan] 2025-10-05 18:42:18,752 - root - INFO - step: 32870 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 18:42:18,752 - root - INFO - lr: 8.5044e-06 gnorm: 1.13 [20:08:07< 4:22:03] +[titan] 2025-10-05 18:42:29,644 - root - INFO - step: 32875 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 18:42:29,644 - root - INFO - lr: 8.4996e-06 gnorm: 1.19 [20:08:18< 4:21:52] +[titan] 2025-10-05 18:42:40,538 - root - INFO - step: 32880 loss: 1.9506 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 18:42:40,539 - root - INFO - lr: 8.4948e-06 gnorm: 1.15 [20:08:29< 4:21:41] +[titan] 2025-10-05 18:42:51,405 - root - INFO - step: 32885 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:42:51,405 - root - INFO - lr: 8.4900e-06 gnorm: 1.14 [20:08:40< 4:21:30] +[titan] 2025-10-05 18:43:02,281 - root - INFO - step: 32890 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 18:43:02,281 - root - INFO - lr: 8.4853e-06 gnorm: 1.17 [20:08:51< 4:21:19] +[titan] 2025-10-05 18:43:13,144 - root - INFO - step: 32895 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7334 +[titan] 2025-10-05 18:43:13,144 - root - INFO - lr: 8.4805e-06 gnorm: 1.22 [20:09:01< 4:21:08] +[titan] 2025-10-05 18:43:21,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:43:24,029 - root - INFO - step: 32900 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:43:24,030 - root - INFO - lr: 8.4757e-06 gnorm: 1.16 [20:09:12< 4:20:57] +[titan] 2025-10-05 18:43:34,912 - root - INFO - step: 32905 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:43:34,912 - root - INFO - lr: 8.4710e-06 gnorm: 1.22 [20:09:23< 4:20:46] +[titan] 2025-10-05 18:43:45,784 - root - INFO - step: 32910 loss: 1.9113 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6908 +[titan] 2025-10-05 18:43:45,784 - root - INFO - lr: 8.4662e-06 gnorm: 1.19 [20:09:34< 4:20:35] +[titan] 2025-10-05 18:43:56,656 - root - INFO - step: 32915 loss: 1.9080 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6877 +[titan] 2025-10-05 18:43:56,657 - root - INFO - lr: 8.4614e-06 gnorm: 1.15 [20:09:45< 4:20:24] +[titan] 2025-10-05 18:44:07,519 - root - INFO - step: 32920 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 18:44:07,520 - root - INFO - lr: 8.4567e-06 gnorm: 1.14 [20:09:56< 4:20:13] +[titan] 2025-10-05 18:44:18,364 - root - INFO - step: 32925 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 18:44:18,364 - root - INFO - lr: 8.4519e-06 gnorm: 1.16 [20:10:07< 4:20:01] +[titan] 2025-10-05 18:44:29,222 - root - INFO - step: 32930 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:44:29,222 - root - INFO - lr: 8.4472e-06 gnorm: 1.19 [20:10:17< 4:19:50] +[titan] 2025-10-05 18:44:40,084 - root - INFO - step: 32935 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 18:44:40,084 - root - INFO - lr: 8.4424e-06 gnorm: 1.20 [20:10:28< 4:19:39] +[titan] 2025-10-05 18:44:50,961 - root - INFO - step: 32940 loss: 2.0407 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 18:44:50,961 - root - INFO - lr: 8.4377e-06 gnorm: 1.21 [20:10:39< 4:19:28] +[titan] 2025-10-05 18:45:01,827 - root - INFO - step: 32945 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:45:01,827 - root - INFO - lr: 8.4330e-06 gnorm: 1.18 [20:10:50< 4:19:17] +[titan] 2025-10-05 18:45:10,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:45:12,694 - root - INFO - step: 32950 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:45:12,695 - root - INFO - lr: 8.4282e-06 gnorm: 1.17 [20:11:01< 4:19:06] +[titan] 2025-10-05 18:45:23,553 - root - INFO - step: 32955 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 18:45:23,553 - root - INFO - lr: 8.4235e-06 gnorm: 1.19 [20:11:12< 4:18:55] +[titan] 2025-10-05 18:45:34,381 - root - INFO - step: 32960 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 18:45:34,381 - root - INFO - lr: 8.4187e-06 gnorm: 1.16 [20:11:23< 4:18:44] +[titan] 2025-10-05 18:45:45,262 - root - INFO - step: 32965 loss: 2.0361 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 18:45:45,262 - root - INFO - lr: 8.4140e-06 gnorm: 1.21 [20:11:33< 4:18:33] +[titan] 2025-10-05 18:45:56,104 - root - INFO - step: 32970 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 18:45:56,105 - root - INFO - lr: 8.4093e-06 gnorm: 1.16 [20:11:44< 4:18:22] +[titan] 2025-10-05 18:46:06,947 - root - INFO - step: 32975 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7958 +[titan] 2025-10-05 18:46:06,948 - root - INFO - lr: 8.4046e-06 gnorm: 1.24 [20:11:55< 4:18:11] +[titan] 2025-10-05 18:46:17,797 - root - INFO - step: 32980 loss: 1.9700 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7415 +[titan] 2025-10-05 18:46:17,797 - root - INFO - lr: 8.3999e-06 gnorm: 1.16 [20:12:06< 4:18:00] +[titan] 2025-10-05 18:46:28,662 - root - INFO - step: 32985 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 18:46:28,662 - root - INFO - lr: 8.3951e-06 gnorm: 1.18 [20:12:17< 4:17:49] +[titan] 2025-10-05 18:46:39,537 - root - INFO - step: 32990 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 18:46:39,537 - root - INFO - lr: 8.3904e-06 gnorm: 1.22 [20:12:28< 4:17:38] +[titan] 2025-10-05 18:46:50,425 - root - INFO - step: 32995 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:46:50,425 - root - INFO - lr: 8.3857e-06 gnorm: 1.15 [20:12:39< 4:17:27] +[titan] 2025-10-05 18:46:59,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:47:01,344 - root - INFO - step: 33000 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 18:47:01,344 - root - INFO - lr: 8.3810e-06 gnorm: 1.17 [20:12:50< 4:17:16] +[titan] 2025-10-05 18:47:12,220 - root - INFO - step: 33005 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:47:12,220 - root - INFO - lr: 8.3763e-06 gnorm: 1.17 [20:13:00< 4:17:05] +[titan] 2025-10-05 18:47:23,105 - root - INFO - step: 33010 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 18:47:23,105 - root - INFO - lr: 8.3716e-06 gnorm: 1.16 [20:13:11< 4:16:53] +[titan] 2025-10-05 18:47:33,991 - root - INFO - step: 33015 loss: 1.9630 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 18:47:33,991 - root - INFO - lr: 8.3669e-06 gnorm: 1.21 [20:13:22< 4:16:42] +[titan] 2025-10-05 18:47:44,854 - root - INFO - step: 33020 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:47:44,854 - root - INFO - lr: 8.3622e-06 gnorm: 1.20 [20:13:33< 4:16:31] +[titan] 2025-10-05 18:47:55,728 - root - INFO - step: 33025 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:47:55,728 - root - INFO - lr: 8.3575e-06 gnorm: 1.16 [20:13:44< 4:16:20] +[titan] 2025-10-05 18:48:06,621 - root - INFO - step: 33030 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 18:48:06,622 - root - INFO - lr: 8.3528e-06 gnorm: 1.15 [20:13:55< 4:16:09] +[titan] 2025-10-05 18:48:17,519 - root - INFO - step: 33035 loss: 2.0726 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 18:48:17,519 - root - INFO - lr: 8.3481e-06 gnorm: 1.22 [20:14:06< 4:15:58] +[titan] 2025-10-05 18:48:28,405 - root - INFO - step: 33040 loss: 1.9946 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 18:48:28,405 - root - INFO - lr: 8.3435e-06 gnorm: 1.19 [20:14:17< 4:15:47] +[titan] 2025-10-05 18:48:39,282 - root - INFO - step: 33045 loss: 1.9543 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 18:48:39,282 - root - INFO - lr: 8.3388e-06 gnorm: 1.16 [20:14:27< 4:15:36] +[titan] 2025-10-05 18:48:47,979 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:48:50,167 - root - INFO - step: 33050 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:48:50,167 - root - INFO - lr: 8.3341e-06 gnorm: 1.22 [20:14:38< 4:15:25] +[titan] 2025-10-05 18:49:01,041 - root - INFO - step: 33055 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7799 +[titan] 2025-10-05 18:49:01,041 - root - INFO - lr: 8.3294e-06 gnorm: 1.21 [20:14:49< 4:15:14] +[titan] 2025-10-05 18:49:11,966 - root - INFO - step: 33060 loss: 1.9156 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6929 +[titan] 2025-10-05 18:49:11,967 - root - INFO - lr: 8.3248e-06 gnorm: 1.15 [20:15:00< 4:15:03] +[titan] 2025-10-05 18:49:22,851 - root - INFO - step: 33065 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 18:49:22,851 - root - INFO - lr: 8.3201e-06 gnorm: 1.22 [20:15:11< 4:14:52] +[titan] 2025-10-05 18:49:33,699 - root - INFO - step: 33070 loss: 1.9488 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 18:49:33,699 - root - INFO - lr: 8.3154e-06 gnorm: 1.17 [20:15:22< 4:14:41] +[titan] 2025-10-05 18:49:44,562 - root - INFO - step: 33075 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 18:49:44,562 - root - INFO - lr: 8.3108e-06 gnorm: 1.18 [20:15:33< 4:14:30] +[titan] 2025-10-05 18:49:55,434 - root - INFO - step: 33080 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6975 +[titan] 2025-10-05 18:49:55,434 - root - INFO - lr: 8.3061e-06 gnorm: 1.15 [20:15:44< 4:14:19] +[titan] 2025-10-05 18:50:06,341 - root - INFO - step: 33085 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 18:50:06,341 - root - INFO - lr: 8.3015e-06 gnorm: 1.19 [20:15:55< 4:14:08] +[titan] 2025-10-05 18:50:17,204 - root - INFO - step: 33090 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 18:50:17,204 - root - INFO - lr: 8.2968e-06 gnorm: 1.17 [20:16:05< 4:13:57] +[titan] 2025-10-05 18:50:28,085 - root - INFO - step: 33095 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 18:50:28,085 - root - INFO - lr: 8.2922e-06 gnorm: 1.19 [20:16:16< 4:13:46] +[titan] 2025-10-05 18:50:36,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:50:38,970 - root - INFO - step: 33100 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 18:50:38,970 - root - INFO - lr: 8.2875e-06 gnorm: 1.16 [20:16:27< 4:13:34] +[titan] 2025-10-05 18:50:49,853 - root - INFO - step: 33105 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:50:49,853 - root - INFO - lr: 8.2829e-06 gnorm: 1.18 [20:16:38< 4:13:23] +[titan] 2025-10-05 18:51:00,737 - root - INFO - step: 33110 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 18:51:00,737 - root - INFO - lr: 8.2782e-06 gnorm: 1.20 [20:16:49< 4:13:12] +[titan] 2025-10-05 18:51:11,650 - root - INFO - step: 33115 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7236 +[titan] 2025-10-05 18:51:11,650 - root - INFO - lr: 8.2736e-06 gnorm: 1.18 [20:17:00< 4:13:01] +[titan] 2025-10-05 18:51:22,517 - root - INFO - step: 33120 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 18:51:22,518 - root - INFO - lr: 8.2690e-06 gnorm: 1.15 [20:17:11< 4:12:50] +[titan] 2025-10-05 18:51:33,423 - root - INFO - step: 33125 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 18:51:33,423 - root - INFO - lr: 8.2643e-06 gnorm: 1.16 [20:17:22< 4:12:39] +[titan] 2025-10-05 18:51:44,314 - root - INFO - step: 33130 loss: 1.9891 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:51:44,314 - root - INFO - lr: 8.2597e-06 gnorm: 1.16 [20:17:33< 4:12:28] +[titan] 2025-10-05 18:51:55,207 - root - INFO - step: 33135 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 18:51:55,207 - root - INFO - lr: 8.2551e-06 gnorm: 1.19 [20:17:43< 4:12:17] +[titan] 2025-10-05 18:52:06,124 - root - INFO - step: 33140 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 18:52:06,124 - root - INFO - lr: 8.2504e-06 gnorm: 1.15 [20:17:54< 4:12:06] +[titan] 2025-10-05 18:52:17,015 - root - INFO - step: 33145 loss: 1.8716 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6545 +[titan] 2025-10-05 18:52:17,015 - root - INFO - lr: 8.2458e-06 gnorm: 1.17 [20:18:05< 4:11:55] +[titan] 2025-10-05 18:52:25,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:52:27,896 - root - INFO - step: 33150 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 18:52:27,896 - root - INFO - lr: 8.2412e-06 gnorm: 1.28 [20:18:16< 4:11:44] +[titan] 2025-10-05 18:52:38,755 - root - INFO - step: 33155 loss: 1.9340 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 18:52:38,755 - root - INFO - lr: 8.2366e-06 gnorm: 1.17 [20:18:27< 4:11:33] +[titan] 2025-10-05 18:52:49,651 - root - INFO - step: 33160 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6592 +[titan] 2025-10-05 18:52:49,651 - root - INFO - lr: 8.2320e-06 gnorm: 1.14 [20:18:38< 4:11:22] +[titan] 2025-10-05 18:53:00,524 - root - INFO - step: 33165 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7065 +[titan] 2025-10-05 18:53:00,525 - root - INFO - lr: 8.2274e-06 gnorm: 1.16 [20:18:49< 4:11:11] +[titan] 2025-10-05 18:53:11,434 - root - INFO - step: 33170 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:53:11,435 - root - INFO - lr: 8.2228e-06 gnorm: 1.19 [20:19:00< 4:11:00] +[titan] 2025-10-05 18:53:22,306 - root - INFO - step: 33175 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:53:22,306 - root - INFO - lr: 8.2182e-06 gnorm: 1.21 [20:19:11< 4:10:49] +[titan] 2025-10-05 18:53:33,152 - root - INFO - step: 33180 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 18:53:33,152 - root - INFO - lr: 8.2136e-06 gnorm: 1.19 [20:19:21< 4:10:38] +[titan] 2025-10-05 18:53:44,004 - root - INFO - step: 33185 loss: 1.9935 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 18:53:44,004 - root - INFO - lr: 8.2090e-06 gnorm: 1.16 [20:19:32< 4:10:27] +[titan] 2025-10-05 18:53:54,872 - root - INFO - step: 33190 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:53:54,872 - root - INFO - lr: 8.2044e-06 gnorm: 1.21 [20:19:43< 4:10:15] +[titan] 2025-10-05 18:54:05,750 - root - INFO - step: 33195 loss: 2.0158 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 18:54:05,750 - root - INFO - lr: 8.1998e-06 gnorm: 1.18 [20:19:54< 4:10:04] +[titan] 2025-10-05 18:54:14,509 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:54:16,691 - root - INFO - step: 33200 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 18:54:16,691 - root - INFO - lr: 8.1952e-06 gnorm: 1.21 [20:20:05< 4:09:53] +[titan] 2025-10-05 18:54:27,562 - root - INFO - step: 33205 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7204 +[titan] 2025-10-05 18:54:27,563 - root - INFO - lr: 8.1906e-06 gnorm: 1.18 [20:20:16< 4:09:42] +[titan] 2025-10-05 18:54:38,424 - root - INFO - step: 33210 loss: 1.9533 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 18:54:38,424 - root - INFO - lr: 8.1861e-06 gnorm: 1.19 [20:20:27< 4:09:31] +[titan] 2025-10-05 18:54:49,288 - root - INFO - step: 33215 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 18:54:49,289 - root - INFO - lr: 8.1815e-06 gnorm: 1.22 [20:20:37< 4:09:20] +[titan] 2025-10-05 18:55:00,163 - root - INFO - step: 33220 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:55:00,163 - root - INFO - lr: 8.1769e-06 gnorm: 1.21 [20:20:48< 4:09:09] +[titan] 2025-10-05 18:55:11,094 - root - INFO - step: 33225 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7865 +[titan] 2025-10-05 18:55:11,094 - root - INFO - lr: 8.1723e-06 gnorm: 1.22 [20:20:59< 4:08:58] +[titan] 2025-10-05 18:55:21,957 - root - INFO - step: 33230 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 18:55:21,957 - root - INFO - lr: 8.1678e-06 gnorm: 1.18 [20:21:10< 4:08:47] +[titan] 2025-10-05 18:55:32,818 - root - INFO - step: 33235 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 18:55:32,818 - root - INFO - lr: 8.1632e-06 gnorm: 1.16 [20:21:21< 4:08:36] +[titan] 2025-10-05 18:55:43,665 - root - INFO - step: 33240 loss: 2.0182 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 18:55:43,665 - root - INFO - lr: 8.1586e-06 gnorm: 1.18 [20:21:32< 4:08:25] +[titan] 2025-10-05 18:55:54,510 - root - INFO - step: 33245 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 18:55:54,510 - root - INFO - lr: 8.1541e-06 gnorm: 1.19 [20:21:43< 4:08:14] +[titan] 2025-10-05 18:56:03,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:56:05,369 - root - INFO - step: 33250 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 18:56:05,369 - root - INFO - lr: 8.1495e-06 gnorm: 1.22 [20:21:54< 4:08:03] +[titan] 2025-10-05 18:56:16,302 - root - INFO - step: 33255 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 18:56:16,302 - root - INFO - lr: 8.1450e-06 gnorm: 1.21 [20:22:04< 4:07:52] +[titan] 2025-10-05 18:56:27,156 - root - INFO - step: 33260 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 18:56:27,156 - root - INFO - lr: 8.1404e-06 gnorm: 1.17 [20:22:15< 4:07:41] +[titan] 2025-10-05 18:56:37,991 - root - INFO - step: 33265 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 18:56:37,991 - root - INFO - lr: 8.1359e-06 gnorm: 1.18 [20:22:26< 4:07:30] +[titan] 2025-10-05 18:56:48,867 - root - INFO - step: 33270 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 18:56:48,867 - root - INFO - lr: 8.1313e-06 gnorm: 1.14 [20:22:37< 4:07:19] +[titan] 2025-10-05 18:56:59,716 - root - INFO - step: 33275 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 18:56:59,716 - root - INFO - lr: 8.1268e-06 gnorm: 1.16 [20:22:48< 4:07:08] +[titan] 2025-10-05 18:57:10,748 - root - INFO - step: 33280 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 29,703 tflops: 412.08 mfu: 41.67% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:57:10,748 - root - INFO - lr: 8.1223e-06 gnorm: 1.20 [20:22:59< 4:06:57] +[titan] 2025-10-05 18:57:10,929 - root - INFO - Dumping profiler traces at step 33280 +[titan] 2025-10-05 18:57:10,969 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:57:21,865 - root - INFO - step: 33285 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 29,477 tflops: 408.95 mfu: 41.35% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:57:21,865 - root - INFO - lr: 8.1177e-06 gnorm: 1.18 [20:23:10< 4:06:45] +[titan] 2025-10-05 18:57:32,746 - root - INFO - step: 33290 loss: 1.9692 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 18:57:32,747 - root - INFO - lr: 8.1132e-06 gnorm: 1.19 [20:23:21< 4:06:34] +[titan] 2025-10-05 18:57:43,611 - root - INFO - step: 33295 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 18:57:43,611 - root - INFO - lr: 8.1087e-06 gnorm: 1.19 [20:23:32< 4:06:23] +[titan] 2025-10-05 18:57:52,270 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:57:54,457 - root - INFO - step: 33300 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 18:57:54,457 - root - INFO - lr: 8.1041e-06 gnorm: 1.22 [20:23:43< 4:06:12] +[titan] 2025-10-05 18:58:05,339 - root - INFO - step: 33305 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:58:05,339 - root - INFO - lr: 8.0996e-06 gnorm: 1.21 [20:23:54< 4:06:01] +[titan] 2025-10-05 18:58:16,262 - root - INFO - step: 33310 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 18:58:16,263 - root - INFO - lr: 8.0951e-06 gnorm: 1.24 [20:24:04< 4:05:50] +[titan] 2025-10-05 18:58:27,116 - root - INFO - step: 33315 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 18:58:27,116 - root - INFO - lr: 8.0906e-06 gnorm: 1.20 [20:24:15< 4:05:39] +[titan] 2025-10-05 18:58:38,011 - root - INFO - step: 33320 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 18:58:38,011 - root - INFO - lr: 8.0861e-06 gnorm: 1.18 [20:24:26< 4:05:28] +[titan] 2025-10-05 18:58:48,874 - root - INFO - step: 33325 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 18:58:48,874 - root - INFO - lr: 8.0816e-06 gnorm: 1.17 [20:24:37< 4:05:17] +[titan] 2025-10-05 18:58:59,752 - root - INFO - step: 33330 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 18:58:59,752 - root - INFO - lr: 8.0771e-06 gnorm: 1.18 [20:24:48< 4:05:06] +[titan] 2025-10-05 18:59:10,641 - root - INFO - step: 33335 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 18:59:10,641 - root - INFO - lr: 8.0725e-06 gnorm: 1.20 [20:24:59< 4:04:55] +[titan] 2025-10-05 18:59:21,564 - root - INFO - step: 33340 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 18:59:21,564 - root - INFO - lr: 8.0680e-06 gnorm: 1.15 [20:25:10< 4:04:44] +[titan] 2025-10-05 18:59:32,450 - root - INFO - step: 33345 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:59:32,450 - root - INFO - lr: 8.0636e-06 gnorm: 1.18 [20:25:21< 4:04:33] +[titan] 2025-10-05 18:59:41,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:59:43,356 - root - INFO - step: 33350 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:59:43,356 - root - INFO - lr: 8.0591e-06 gnorm: 1.16 [20:25:32< 4:04:22] +[titan] 2025-10-05 18:59:54,223 - root - INFO - step: 33355 loss: 1.9358 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7114 +[titan] 2025-10-05 18:59:54,223 - root - INFO - lr: 8.0546e-06 gnorm: 1.18 [20:25:42< 4:04:11] +[titan] 2025-10-05 19:00:05,102 - root - INFO - step: 33360 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 19:00:05,102 - root - INFO - lr: 8.0501e-06 gnorm: 1.17 [20:25:53< 4:04:00] +[titan] 2025-10-05 19:00:16,037 - root - INFO - step: 33365 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:00:16,037 - root - INFO - lr: 8.0456e-06 gnorm: 1.20 [20:26:04< 4:03:49] +[titan] 2025-10-05 19:00:26,915 - root - INFO - step: 33370 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:00:26,916 - root - INFO - lr: 8.0411e-06 gnorm: 1.20 [20:26:15< 4:03:38] +[titan] 2025-10-05 19:00:37,762 - root - INFO - step: 33375 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:00:37,763 - root - INFO - lr: 8.0366e-06 gnorm: 1.21 [20:26:26< 4:03:27] +[titan] 2025-10-05 19:00:48,614 - root - INFO - step: 33380 loss: 1.9232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:00:48,614 - root - INFO - lr: 8.0322e-06 gnorm: 1.18 [20:26:37< 4:03:15] +[titan] 2025-10-05 19:00:59,483 - root - INFO - step: 33385 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:00:59,483 - root - INFO - lr: 8.0277e-06 gnorm: 1.19 [20:26:48< 4:03:04] +[titan] 2025-10-05 19:01:10,340 - root - INFO - step: 33390 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 19:01:10,340 - root - INFO - lr: 8.0232e-06 gnorm: 1.19 [20:26:59< 4:02:53] +[titan] 2025-10-05 19:01:21,250 - root - INFO - step: 33395 loss: 1.9470 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 19:01:21,250 - root - INFO - lr: 8.0187e-06 gnorm: 1.17 [20:27:09< 4:02:42] +[titan] 2025-10-05 19:01:29,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:01:32,102 - root - INFO - step: 33400 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 19:01:32,102 - root - INFO - lr: 8.0143e-06 gnorm: 1.17 [20:27:20< 4:02:31] +[titan] 2025-10-05 19:01:42,959 - root - INFO - step: 33405 loss: 1.8686 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 19:01:42,960 - root - INFO - lr: 8.0098e-06 gnorm: 1.18 [20:27:31< 4:02:20] +[titan] 2025-10-05 19:01:53,819 - root - INFO - step: 33410 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 19:01:53,819 - root - INFO - lr: 8.0054e-06 gnorm: 1.19 [20:27:42< 4:02:09] +[titan] 2025-10-05 19:02:04,734 - root - INFO - step: 33415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 19:02:04,734 - root - INFO - lr: 8.0009e-06 gnorm: 1.18 [20:27:53< 4:01:58] +[titan] 2025-10-05 19:02:15,660 - root - INFO - step: 33420 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 19:02:15,661 - root - INFO - lr: 7.9965e-06 gnorm: 1.20 [20:28:04< 4:01:47] +[titan] 2025-10-05 19:02:26,561 - root - INFO - step: 33425 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 19:02:26,561 - root - INFO - lr: 7.9920e-06 gnorm: 1.15 [20:28:15< 4:01:36] +[titan] 2025-10-05 19:02:37,445 - root - INFO - step: 33430 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 19:02:37,445 - root - INFO - lr: 7.9876e-06 gnorm: 1.18 [20:28:26< 4:01:25] +[titan] 2025-10-05 19:02:48,327 - root - INFO - step: 33435 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:02:48,327 - root - INFO - lr: 7.9831e-06 gnorm: 1.16 [20:28:37< 4:01:14] +[titan] 2025-10-05 19:02:59,208 - root - INFO - step: 33440 loss: 1.9304 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 19:02:59,209 - root - INFO - lr: 7.9787e-06 gnorm: 1.20 [20:28:47< 4:01:03] +[titan] 2025-10-05 19:03:10,117 - root - INFO - step: 33445 loss: 2.0526 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8150 +[titan] 2025-10-05 19:03:10,117 - root - INFO - lr: 7.9742e-06 gnorm: 1.21 [20:28:58< 4:00:52] +[titan] 2025-10-05 19:03:18,857 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:03:21,042 - root - INFO - step: 33450 loss: 1.9353 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:03:21,042 - root - INFO - lr: 7.9698e-06 gnorm: 1.16 [20:29:09< 4:00:41] +[titan] 2025-10-05 19:03:31,901 - root - INFO - step: 33455 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:03:31,901 - root - INFO - lr: 7.9654e-06 gnorm: 1.18 [20:29:20< 4:00:30] +[titan] 2025-10-05 19:03:42,767 - root - INFO - step: 33460 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:03:42,767 - root - INFO - lr: 7.9610e-06 gnorm: 1.19 [20:29:31< 4:00:19] +[titan] 2025-10-05 19:03:53,626 - root - INFO - step: 33465 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:03:53,626 - root - INFO - lr: 7.9565e-06 gnorm: 1.20 [20:29:42< 4:00:08] +[titan] 2025-10-05 19:04:04,499 - root - INFO - step: 33470 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:04:04,499 - root - INFO - lr: 7.9521e-06 gnorm: 1.22 [20:29:53< 3:59:57] +[titan] 2025-10-05 19:04:15,390 - root - INFO - step: 33475 loss: 1.9236 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:04:15,390 - root - INFO - lr: 7.9477e-06 gnorm: 1.15 [20:30:04< 3:59:46] +[titan] 2025-10-05 19:04:26,338 - root - INFO - step: 33480 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 19:04:26,338 - root - INFO - lr: 7.9433e-06 gnorm: 1.18 [20:30:15< 3:59:34] +[titan] 2025-10-05 19:04:37,222 - root - INFO - step: 33485 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 19:04:37,222 - root - INFO - lr: 7.9389e-06 gnorm: 1.12 [20:30:25< 3:59:23] +[titan] 2025-10-05 19:04:48,095 - root - INFO - step: 33490 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 19:04:48,095 - root - INFO - lr: 7.9345e-06 gnorm: 1.17 [20:30:36< 3:59:12] +[titan] 2025-10-05 19:04:58,969 - root - INFO - step: 33495 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 19:04:58,969 - root - INFO - lr: 7.9301e-06 gnorm: 1.19 [20:30:47< 3:59:01] +[titan] 2025-10-05 19:05:07,650 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:05:09,844 - root - INFO - step: 33500 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 19:05:09,844 - root - INFO - lr: 7.9256e-06 gnorm: 1.16 [20:30:58< 3:58:50] +[titan] 2025-10-05 19:05:20,803 - root - INFO - step: 33505 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7103 +[titan] 2025-10-05 19:05:20,803 - root - INFO - lr: 7.9212e-06 gnorm: 1.17 [20:31:09< 3:58:39] +[titan] 2025-10-05 19:05:31,705 - root - INFO - step: 33510 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 19:05:31,706 - root - INFO - lr: 7.9169e-06 gnorm: 1.18 [20:31:20< 3:58:28] +[titan] 2025-10-05 19:05:42,585 - root - INFO - step: 33515 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 19:05:42,585 - root - INFO - lr: 7.9125e-06 gnorm: 1.16 [20:31:31< 3:58:17] +[titan] 2025-10-05 19:05:53,459 - root - INFO - step: 33520 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 19:05:53,459 - root - INFO - lr: 7.9081e-06 gnorm: 1.20 [20:31:42< 3:58:06] +[titan] 2025-10-05 19:06:04,332 - root - INFO - step: 33525 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6778 +[titan] 2025-10-05 19:06:04,332 - root - INFO - lr: 7.9037e-06 gnorm: 1.12 [20:31:52< 3:57:55] +[titan] 2025-10-05 19:06:15,198 - root - INFO - step: 33530 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 19:06:15,198 - root - INFO - lr: 7.8993e-06 gnorm: 1.17 [20:32:03< 3:57:44] +[titan] 2025-10-05 19:06:26,152 - root - INFO - step: 33535 loss: 1.9859 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 19:06:26,152 - root - INFO - lr: 7.8949e-06 gnorm: 1.24 [20:32:14< 3:57:33] +[titan] 2025-10-05 19:06:37,024 - root - INFO - step: 33540 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7744 +[titan] 2025-10-05 19:06:37,024 - root - INFO - lr: 7.8905e-06 gnorm: 1.22 [20:32:25< 3:57:22] +[titan] 2025-10-05 19:06:47,931 - root - INFO - step: 33545 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 19:06:47,931 - root - INFO - lr: 7.8862e-06 gnorm: 1.24 [20:32:36< 3:57:11] +[titan] 2025-10-05 19:06:56,619 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:06:58,805 - root - INFO - step: 33550 loss: 1.9223 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:06:58,805 - root - INFO - lr: 7.8818e-06 gnorm: 1.17 [20:32:47< 3:57:00] +[titan] 2025-10-05 19:07:09,652 - root - INFO - step: 33555 loss: 1.9140 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:07:09,653 - root - INFO - lr: 7.8774e-06 gnorm: 1.21 [20:32:58< 3:56:49] +[titan] 2025-10-05 19:07:20,562 - root - INFO - step: 33560 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:07:20,563 - root - INFO - lr: 7.8731e-06 gnorm: 1.23 [20:33:09< 3:56:38] +[titan] 2025-10-05 19:07:31,425 - root - INFO - step: 33565 loss: 1.8946 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 19:07:31,425 - root - INFO - lr: 7.8687e-06 gnorm: 1.19 [20:33:20< 3:56:27] +[titan] 2025-10-05 19:07:42,303 - root - INFO - step: 33570 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:07:42,303 - root - INFO - lr: 7.8643e-06 gnorm: 1.20 [20:33:30< 3:56:16] +[titan] 2025-10-05 19:07:53,210 - root - INFO - step: 33575 loss: 1.9262 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:07:53,210 - root - INFO - lr: 7.8600e-06 gnorm: 1.18 [20:33:41< 3:56:05] +[titan] 2025-10-05 19:08:04,072 - root - INFO - step: 33580 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 19:08:04,072 - root - INFO - lr: 7.8556e-06 gnorm: 1.18 [20:33:52< 3:55:53] +[titan] 2025-10-05 19:08:14,947 - root - INFO - step: 33585 loss: 1.8953 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 19:08:14,947 - root - INFO - lr: 7.8513e-06 gnorm: 1.14 [20:34:03< 3:55:42] +[titan] 2025-10-05 19:08:25,883 - root - INFO - step: 33590 loss: 1.9998 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 19:08:25,883 - root - INFO - lr: 7.8469e-06 gnorm: 1.19 [20:34:14< 3:55:31] +[titan] 2025-10-05 19:08:36,748 - root - INFO - step: 33595 loss: 1.8788 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6618 +[titan] 2025-10-05 19:08:36,748 - root - INFO - lr: 7.8426e-06 gnorm: 1.17 [20:34:25< 3:55:20] +[titan] 2025-10-05 19:08:45,430 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:08:47,610 - root - INFO - step: 33600 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7176 +[titan] 2025-10-05 19:08:47,610 - root - INFO - lr: 7.8382e-06 gnorm: 1.20 [20:34:36< 3:55:09] +[titan] 2025-10-05 19:08:58,491 - root - INFO - step: 33605 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7542 +[titan] 2025-10-05 19:08:58,491 - root - INFO - lr: 7.8339e-06 gnorm: 1.19 [20:34:47< 3:54:58] +[titan] 2025-10-05 19:09:09,347 - root - INFO - step: 33610 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 19:09:09,347 - root - INFO - lr: 7.8296e-06 gnorm: 1.17 [20:34:58< 3:54:47] +[titan] 2025-10-05 19:09:20,217 - root - INFO - step: 33615 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 19:09:20,217 - root - INFO - lr: 7.8252e-06 gnorm: 1.18 [20:35:08< 3:54:36] +[titan] 2025-10-05 19:09:31,144 - root - INFO - step: 33620 loss: 1.9273 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 19:09:31,144 - root - INFO - lr: 7.8209e-06 gnorm: 1.16 [20:35:19< 3:54:25] +[titan] 2025-10-05 19:09:41,985 - root - INFO - step: 33625 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 19:09:41,985 - root - INFO - lr: 7.8166e-06 gnorm: 1.18 [20:35:30< 3:54:14] +[titan] 2025-10-05 19:09:52,855 - root - INFO - step: 33630 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7601 +[titan] 2025-10-05 19:09:52,855 - root - INFO - lr: 7.8123e-06 gnorm: 1.21 [20:35:41< 3:54:03] +[titan] 2025-10-05 19:10:03,725 - root - INFO - step: 33635 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:10:03,725 - root - INFO - lr: 7.8080e-06 gnorm: 1.19 [20:35:52< 3:53:52] +[titan] 2025-10-05 19:10:14,597 - root - INFO - step: 33640 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6948 +[titan] 2025-10-05 19:10:14,597 - root - INFO - lr: 7.8036e-06 gnorm: 1.18 [20:36:03< 3:53:41] +[titan] 2025-10-05 19:10:25,501 - root - INFO - step: 33645 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 19:10:25,501 - root - INFO - lr: 7.7993e-06 gnorm: 1.17 [20:36:14< 3:53:30] +[titan] 2025-10-05 19:10:34,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:10:36,352 - root - INFO - step: 33650 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:10:36,352 - root - INFO - lr: 7.7950e-06 gnorm: 1.18 [20:36:25< 3:53:19] +[titan] 2025-10-05 19:10:47,197 - root - INFO - step: 33655 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 19:10:47,197 - root - INFO - lr: 7.7907e-06 gnorm: 1.18 [20:36:35< 3:53:08] +[titan] 2025-10-05 19:10:58,037 - root - INFO - step: 33660 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:10:58,037 - root - INFO - lr: 7.7864e-06 gnorm: 1.20 [20:36:46< 3:52:57] +[titan] 2025-10-05 19:11:08,895 - root - INFO - step: 33665 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 19:11:08,895 - root - INFO - lr: 7.7821e-06 gnorm: 1.18 [20:36:57< 3:52:46] +[titan] 2025-10-05 19:11:19,804 - root - INFO - step: 33670 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:11:19,805 - root - INFO - lr: 7.7778e-06 gnorm: 1.13 [20:37:08< 3:52:35] +[titan] 2025-10-05 19:11:30,707 - root - INFO - step: 33675 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:11:30,707 - root - INFO - lr: 7.7735e-06 gnorm: 1.19 [20:37:19< 3:52:23] +[titan] 2025-10-05 19:11:41,571 - root - INFO - step: 33680 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 19:11:41,571 - root - INFO - lr: 7.7692e-06 gnorm: 1.15 [20:37:30< 3:52:12] +[titan] 2025-10-05 19:11:52,439 - root - INFO - step: 33685 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7395 +[titan] 2025-10-05 19:11:52,439 - root - INFO - lr: 7.7649e-06 gnorm: 1.17 [20:37:41< 3:52:01] +[titan] 2025-10-05 19:12:03,278 - root - INFO - step: 33690 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 19:12:03,278 - root - INFO - lr: 7.7606e-06 gnorm: 1.21 [20:37:51< 3:51:50] +[titan] 2025-10-05 19:12:14,126 - root - INFO - step: 33695 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:12:14,126 - root - INFO - lr: 7.7564e-06 gnorm: 1.23 [20:38:02< 3:51:39] +[titan] 2025-10-05 19:12:22,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:12:25,032 - root - INFO - step: 33700 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 19:12:25,032 - root - INFO - lr: 7.7521e-06 gnorm: 1.18 [20:38:13< 3:51:28] +[titan] 2025-10-05 19:12:35,912 - root - INFO - step: 33705 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 19:12:35,912 - root - INFO - lr: 7.7478e-06 gnorm: 1.21 [20:38:24< 3:51:17] +[titan] 2025-10-05 19:12:46,776 - root - INFO - step: 33710 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 19:12:46,776 - root - INFO - lr: 7.7435e-06 gnorm: 1.19 [20:38:35< 3:51:06] +[titan] 2025-10-05 19:12:57,642 - root - INFO - step: 33715 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 19:12:57,642 - root - INFO - lr: 7.7393e-06 gnorm: 1.17 [20:38:46< 3:50:55] +[titan] 2025-10-05 19:13:08,509 - root - INFO - step: 33720 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6902 +[titan] 2025-10-05 19:13:08,509 - root - INFO - lr: 7.7350e-06 gnorm: 1.19 [20:38:57< 3:50:44] +[titan] 2025-10-05 19:13:19,370 - root - INFO - step: 33725 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:13:19,370 - root - INFO - lr: 7.7307e-06 gnorm: 1.18 [20:39:08< 3:50:33] +[titan] 2025-10-05 19:13:30,375 - root - INFO - step: 33730 loss: 1.9645 memory: 118.84GiB(85.28%) tps: 29,776 tflops: 413.09 mfu: 41.77% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 19:13:30,375 - root - INFO - lr: 7.7265e-06 gnorm: 1.18 [20:39:19< 3:50:22] +[titan] 2025-10-05 19:13:41,281 - root - INFO - step: 33735 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 19:13:41,281 - root - INFO - lr: 7.7222e-06 gnorm: 1.18 [20:39:29< 3:50:11] +[titan] 2025-10-05 19:13:52,137 - root - INFO - step: 33740 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 19:13:52,137 - root - INFO - lr: 7.7180e-06 gnorm: 1.20 [20:39:40< 3:50:00] +[titan] 2025-10-05 19:14:03,003 - root - INFO - step: 33745 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 19:14:03,003 - root - INFO - lr: 7.7137e-06 gnorm: 1.17 [20:39:51< 3:49:49] +[titan] 2025-10-05 19:14:11,685 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:14:13,871 - root - INFO - step: 33750 loss: 2.0153 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7820 +[titan] 2025-10-05 19:14:13,871 - root - INFO - lr: 7.7095e-06 gnorm: 1.23 [20:40:02< 3:49:38] +[titan] 2025-10-05 19:14:24,752 - root - INFO - step: 33755 loss: 1.8533 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2139 global_avg_mtp_loss: 1.6394 +[titan] 2025-10-05 19:14:24,752 - root - INFO - lr: 7.7052e-06 gnorm: 1.18 [20:40:13< 3:49:27] +[titan] 2025-10-05 19:14:35,653 - root - INFO - step: 33760 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7664 +[titan] 2025-10-05 19:14:35,654 - root - INFO - lr: 7.7010e-06 gnorm: 1.20 [20:40:24< 3:49:16] +[titan] 2025-10-05 19:14:46,559 - root - INFO - step: 33765 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 19:14:46,559 - root - INFO - lr: 7.6967e-06 gnorm: 1.17 [20:40:35< 3:49:05] +[titan] 2025-10-05 19:14:57,429 - root - INFO - step: 33770 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6631 +[titan] 2025-10-05 19:14:57,429 - root - INFO - lr: 7.6925e-06 gnorm: 1.19 [20:40:46< 3:48:54] +[titan] 2025-10-05 19:15:08,283 - root - INFO - step: 33775 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 19:15:08,283 - root - INFO - lr: 7.6883e-06 gnorm: 1.20 [20:40:56< 3:48:43] +[titan] 2025-10-05 19:15:19,145 - root - INFO - step: 33780 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:15:19,145 - root - INFO - lr: 7.6841e-06 gnorm: 1.21 [20:41:07< 3:48:31] +[titan] 2025-10-05 19:15:30,024 - root - INFO - step: 33785 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6677 +[titan] 2025-10-05 19:15:30,024 - root - INFO - lr: 7.6798e-06 gnorm: 1.18 [20:41:18< 3:48:20] +[titan] 2025-10-05 19:15:40,968 - root - INFO - step: 33790 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:15:40,968 - root - INFO - lr: 7.6756e-06 gnorm: 1.18 [20:41:29< 3:48:09] +[titan] 2025-10-05 19:15:45,497 - root - INFO - Dumping profiler traces at step 33792 +[titan] 2025-10-05 19:15:45,535 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:15:52,106 - root - INFO - step: 33795 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 29,421 tflops: 408.17 mfu: 41.27% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 19:15:52,106 - root - INFO - lr: 7.6714e-06 gnorm: 1.19 [20:41:40< 3:47:58] +[titan] 2025-10-05 19:16:00,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:16:03,008 - root - INFO - step: 33800 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:16:03,008 - root - INFO - lr: 7.6672e-06 gnorm: 1.20 [20:41:51< 3:47:47] +[titan] 2025-10-05 19:16:13,885 - root - INFO - step: 33805 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 19:16:13,885 - root - INFO - lr: 7.6630e-06 gnorm: 1.17 [20:42:02< 3:47:36] +[titan] 2025-10-05 19:16:24,767 - root - INFO - step: 33810 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 19:16:24,767 - root - INFO - lr: 7.6587e-06 gnorm: 1.15 [20:42:13< 3:47:25] +[titan] 2025-10-05 19:16:35,714 - root - INFO - step: 33815 loss: 2.0005 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 19:16:35,714 - root - INFO - lr: 7.6545e-06 gnorm: 1.20 [20:42:24< 3:47:14] +[titan] 2025-10-05 19:16:46,588 - root - INFO - step: 33820 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 19:16:46,588 - root - INFO - lr: 7.6503e-06 gnorm: 1.18 [20:42:35< 3:47:03] +[titan] 2025-10-05 19:16:57,467 - root - INFO - step: 33825 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6954 +[titan] 2025-10-05 19:16:57,467 - root - INFO - lr: 7.6461e-06 gnorm: 1.16 [20:42:46< 3:46:52] +[titan] 2025-10-05 19:17:08,370 - root - INFO - step: 33830 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 19:17:08,371 - root - INFO - lr: 7.6419e-06 gnorm: 1.15 [20:42:57< 3:46:41] +[titan] 2025-10-05 19:17:19,239 - root - INFO - step: 33835 loss: 1.9118 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:17:19,240 - root - INFO - lr: 7.6377e-06 gnorm: 1.19 [20:43:07< 3:46:30] +[titan] 2025-10-05 19:17:30,168 - root - INFO - step: 33840 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7619 +[titan] 2025-10-05 19:17:30,168 - root - INFO - lr: 7.6335e-06 gnorm: 1.20 [20:43:18< 3:46:19] +[titan] 2025-10-05 19:17:41,047 - root - INFO - step: 33845 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:17:41,047 - root - INFO - lr: 7.6294e-06 gnorm: 1.17 [20:43:29< 3:46:08] +[titan] 2025-10-05 19:17:49,727 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:17:51,911 - root - INFO - step: 33850 loss: 1.9924 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7615 +[titan] 2025-10-05 19:17:51,911 - root - INFO - lr: 7.6252e-06 gnorm: 1.20 [20:43:40< 3:45:57] +[titan] 2025-10-05 19:18:02,789 - root - INFO - step: 33855 loss: 1.9320 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 19:18:02,789 - root - INFO - lr: 7.6210e-06 gnorm: 1.18 [20:43:51< 3:45:46] +[titan] 2025-10-05 19:18:13,634 - root - INFO - step: 33860 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 19:18:13,634 - root - INFO - lr: 7.6168e-06 gnorm: 1.20 [20:44:02< 3:45:35] +[titan] 2025-10-05 19:18:24,528 - root - INFO - step: 33865 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:18:24,528 - root - INFO - lr: 7.6126e-06 gnorm: 1.21 [20:44:13< 3:45:24] +[titan] 2025-10-05 19:18:35,439 - root - INFO - step: 33870 loss: 1.8718 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6559 +[titan] 2025-10-05 19:18:35,440 - root - INFO - lr: 7.6085e-06 gnorm: 1.16 [20:44:24< 3:45:13] +[titan] 2025-10-05 19:18:46,300 - root - INFO - step: 33875 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:18:46,300 - root - INFO - lr: 7.6043e-06 gnorm: 1.18 [20:44:34< 3:45:02] +[titan] 2025-10-05 19:18:57,171 - root - INFO - step: 33880 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 19:18:57,171 - root - INFO - lr: 7.6001e-06 gnorm: 1.18 [20:44:45< 3:44:51] +[titan] 2025-10-05 19:19:08,027 - root - INFO - step: 33885 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 19:19:08,028 - root - INFO - lr: 7.5960e-06 gnorm: 1.18 [20:44:56< 3:44:40] +[titan] 2025-10-05 19:19:18,884 - root - INFO - step: 33890 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 19:19:18,884 - root - INFO - lr: 7.5918e-06 gnorm: 1.15 [20:45:07< 3:44:28] +[titan] 2025-10-05 19:19:29,764 - root - INFO - step: 33895 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6963 +[titan] 2025-10-05 19:19:29,765 - root - INFO - lr: 7.5877e-06 gnorm: 1.14 [20:45:18< 3:44:17] +[titan] 2025-10-05 19:19:38,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:19:40,700 - root - INFO - step: 33900 loss: 1.9418 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:19:40,700 - root - INFO - lr: 7.5835e-06 gnorm: 1.15 [20:45:29< 3:44:06] +[titan] 2025-10-05 19:19:51,575 - root - INFO - step: 33905 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 19:19:51,575 - root - INFO - lr: 7.5793e-06 gnorm: 1.18 [20:45:40< 3:43:55] +[titan] 2025-10-05 19:20:02,438 - root - INFO - step: 33910 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 19:20:02,438 - root - INFO - lr: 7.5752e-06 gnorm: 1.17 [20:45:51< 3:43:44] +[titan] 2025-10-05 19:20:13,310 - root - INFO - step: 33915 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 19:20:13,310 - root - INFO - lr: 7.5711e-06 gnorm: 1.21 [20:46:01< 3:43:33] +[titan] 2025-10-05 19:20:24,174 - root - INFO - step: 33920 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 19:20:24,174 - root - INFO - lr: 7.5669e-06 gnorm: 1.18 [20:46:12< 3:43:22] +[titan] 2025-10-05 19:20:35,419 - root - INFO - step: 33925 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 29,141 tflops: 404.28 mfu: 40.88% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 19:20:35,419 - root - INFO - lr: 7.5628e-06 gnorm: 1.19 [20:46:24< 3:43:11] +[titan] 2025-10-05 19:20:46,283 - root - INFO - step: 33930 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 19:20:46,283 - root - INFO - lr: 7.5586e-06 gnorm: 1.17 [20:46:34< 3:43:00] +[titan] 2025-10-05 19:20:57,167 - root - INFO - step: 33935 loss: 1.9676 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 19:20:57,167 - root - INFO - lr: 7.5545e-06 gnorm: 1.23 [20:46:45< 3:42:49] +[titan] 2025-10-05 19:21:08,017 - root - INFO - step: 33940 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:21:08,017 - root - INFO - lr: 7.5504e-06 gnorm: 1.19 [20:46:56< 3:42:38] +[titan] 2025-10-05 19:21:18,883 - root - INFO - step: 33945 loss: 1.9536 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 19:21:18,883 - root - INFO - lr: 7.5463e-06 gnorm: 1.17 [20:47:07< 3:42:27] +[titan] 2025-10-05 19:21:27,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:21:29,744 - root - INFO - step: 33950 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:21:29,745 - root - INFO - lr: 7.5421e-06 gnorm: 1.24 [20:47:18< 3:42:16] +[titan] 2025-10-05 19:21:40,693 - root - INFO - step: 33955 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:21:40,694 - root - INFO - lr: 7.5380e-06 gnorm: 1.21 [20:47:29< 3:42:05] +[titan] 2025-10-05 19:21:51,598 - root - INFO - step: 33960 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 19:21:51,598 - root - INFO - lr: 7.5339e-06 gnorm: 1.22 [20:47:40< 3:41:54] +[titan] 2025-10-05 19:22:02,484 - root - INFO - step: 33965 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 19:22:02,484 - root - INFO - lr: 7.5298e-06 gnorm: 1.17 [20:47:51< 3:41:43] +[titan] 2025-10-05 19:22:13,362 - root - INFO - step: 33970 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 19:22:13,362 - root - INFO - lr: 7.5257e-06 gnorm: 1.15 [20:48:01< 3:41:32] +[titan] 2025-10-05 19:22:24,229 - root - INFO - step: 33975 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:22:24,229 - root - INFO - lr: 7.5216e-06 gnorm: 1.21 [20:48:12< 3:41:21] +[titan] 2025-10-05 19:22:35,177 - root - INFO - step: 33980 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:22:35,178 - root - INFO - lr: 7.5175e-06 gnorm: 1.26 [20:48:23< 3:41:10] +[titan] 2025-10-05 19:22:46,038 - root - INFO - step: 33985 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:22:46,038 - root - INFO - lr: 7.5134e-06 gnorm: 1.22 [20:48:34< 3:40:59] +[titan] 2025-10-05 19:22:56,932 - root - INFO - step: 33990 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 19:22:56,932 - root - INFO - lr: 7.5093e-06 gnorm: 1.16 [20:48:45< 3:40:48] +[titan] 2025-10-05 19:23:07,777 - root - INFO - step: 33995 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:23:07,778 - root - INFO - lr: 7.5052e-06 gnorm: 1.21 [20:48:56< 3:40:37] +[titan] 2025-10-05 19:23:16,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:23:18,638 - root - INFO - step: 34000 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:23:18,638 - root - INFO - lr: 7.5011e-06 gnorm: 1.17 [20:49:07< 3:40:25] +[titan] 2025-10-05 19:23:29,501 - root - INFO - step: 34005 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:23:29,501 - root - INFO - lr: 7.4970e-06 gnorm: 1.18 [20:49:18< 3:40:14] +[titan] 2025-10-05 19:23:40,426 - root - INFO - step: 34010 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 19:23:40,426 - root - INFO - lr: 7.4929e-06 gnorm: 1.19 [20:49:29< 3:40:03] +[titan] 2025-10-05 19:23:51,314 - root - INFO - step: 34015 loss: 1.9884 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:23:51,315 - root - INFO - lr: 7.4888e-06 gnorm: 1.21 [20:49:39< 3:39:52] +[titan] 2025-10-05 19:24:02,171 - root - INFO - step: 34020 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 19:24:02,171 - root - INFO - lr: 7.4847e-06 gnorm: 1.18 [20:49:50< 3:39:41] +[titan] 2025-10-05 19:24:13,068 - root - INFO - step: 34025 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 19:24:13,068 - root - INFO - lr: 7.4807e-06 gnorm: 1.26 [20:50:01< 3:39:30] +[titan] 2025-10-05 19:24:23,950 - root - INFO - step: 34030 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 19:24:23,951 - root - INFO - lr: 7.4766e-06 gnorm: 1.18 [20:50:12< 3:39:19] +[titan] 2025-10-05 19:24:34,827 - root - INFO - step: 34035 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6696 +[titan] 2025-10-05 19:24:34,827 - root - INFO - lr: 7.4725e-06 gnorm: 1.20 [20:50:23< 3:39:08] +[titan] 2025-10-05 19:24:45,778 - root - INFO - step: 34040 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 19:24:45,778 - root - INFO - lr: 7.4685e-06 gnorm: 1.19 [20:50:34< 3:38:57] +[titan] 2025-10-05 19:24:56,664 - root - INFO - step: 34045 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 19:24:56,664 - root - INFO - lr: 7.4644e-06 gnorm: 1.20 [20:50:45< 3:38:46] +[titan] 2025-10-05 19:25:05,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:25:07,528 - root - INFO - step: 34050 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:25:07,528 - root - INFO - lr: 7.4603e-06 gnorm: 1.21 [20:50:56< 3:38:35] +[titan] 2025-10-05 19:25:18,416 - root - INFO - step: 34055 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6971 +[titan] 2025-10-05 19:25:18,416 - root - INFO - lr: 7.4563e-06 gnorm: 1.17 [20:51:07< 3:38:24] +[titan] 2025-10-05 19:25:29,290 - root - INFO - step: 34060 loss: 1.9560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 19:25:29,291 - root - INFO - lr: 7.4522e-06 gnorm: 1.17 [20:51:17< 3:38:13] +[titan] 2025-10-05 19:25:40,227 - root - INFO - step: 34065 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6962 +[titan] 2025-10-05 19:25:40,227 - root - INFO - lr: 7.4482e-06 gnorm: 1.16 [20:51:28< 3:38:02] +[titan] 2025-10-05 19:25:51,094 - root - INFO - step: 34070 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 19:25:51,094 - root - INFO - lr: 7.4441e-06 gnorm: 1.17 [20:51:39< 3:37:51] +[titan] 2025-10-05 19:26:01,990 - root - INFO - step: 34075 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 19:26:01,990 - root - INFO - lr: 7.4401e-06 gnorm: 1.19 [20:51:50< 3:37:40] +[titan] 2025-10-05 19:26:12,871 - root - INFO - step: 34080 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6921 +[titan] 2025-10-05 19:26:12,871 - root - INFO - lr: 7.4361e-06 gnorm: 1.18 [20:52:01< 3:37:29] +[titan] 2025-10-05 19:26:23,746 - root - INFO - step: 34085 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7530 +[titan] 2025-10-05 19:26:23,746 - root - INFO - lr: 7.4320e-06 gnorm: 1.19 [20:52:12< 3:37:18] +[titan] 2025-10-05 19:26:34,615 - root - INFO - step: 34090 loss: 1.9192 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 19:26:34,615 - root - INFO - lr: 7.4280e-06 gnorm: 1.17 [20:52:23< 3:37:07] +[titan] 2025-10-05 19:26:45,574 - root - INFO - step: 34095 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 19:26:45,575 - root - INFO - lr: 7.4239e-06 gnorm: 1.24 [20:52:34< 3:36:56] +[titan] 2025-10-05 19:26:54,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:26:56,447 - root - INFO - step: 34100 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:26:56,447 - root - INFO - lr: 7.4199e-06 gnorm: 1.21 [20:52:45< 3:36:45] +[titan] 2025-10-05 19:27:07,327 - root - INFO - step: 34105 loss: 1.8752 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 19:27:07,327 - root - INFO - lr: 7.4159e-06 gnorm: 1.17 [20:52:55< 3:36:34] +[titan] 2025-10-05 19:27:18,206 - root - INFO - step: 34110 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 19:27:18,206 - root - INFO - lr: 7.4119e-06 gnorm: 1.25 [20:53:06< 3:36:23] +[titan] 2025-10-05 19:27:29,088 - root - INFO - step: 34115 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 19:27:29,088 - root - INFO - lr: 7.4079e-06 gnorm: 1.18 [20:53:17< 3:36:11] +[titan] 2025-10-05 19:27:40,016 - root - INFO - step: 34120 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 19:27:40,017 - root - INFO - lr: 7.4038e-06 gnorm: 1.18 [20:53:28< 3:36:00] +[titan] 2025-10-05 19:27:50,909 - root - INFO - step: 34125 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 19:27:50,909 - root - INFO - lr: 7.3998e-06 gnorm: 1.20 [20:53:39< 3:35:49] +[titan] 2025-10-05 19:28:01,811 - root - INFO - step: 34130 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:28:01,811 - root - INFO - lr: 7.3958e-06 gnorm: 1.15 [20:53:50< 3:35:38] +[titan] 2025-10-05 19:28:12,709 - root - INFO - step: 34135 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 19:28:12,710 - root - INFO - lr: 7.3918e-06 gnorm: 1.17 [20:54:01< 3:35:27] +[titan] 2025-10-05 19:28:23,581 - root - INFO - step: 34140 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:28:23,582 - root - INFO - lr: 7.3878e-06 gnorm: 1.23 [20:54:12< 3:35:16] +[titan] 2025-10-05 19:28:34,458 - root - INFO - step: 34145 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 19:28:34,458 - root - INFO - lr: 7.3838e-06 gnorm: 1.18 [20:54:23< 3:35:05] +[titan] 2025-10-05 19:28:43,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:28:45,388 - root - INFO - step: 34150 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 19:28:45,388 - root - INFO - lr: 7.3798e-06 gnorm: 1.19 [20:54:33< 3:34:54] +[titan] 2025-10-05 19:28:56,262 - root - INFO - step: 34155 loss: 1.9387 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:28:56,262 - root - INFO - lr: 7.3758e-06 gnorm: 1.16 [20:54:44< 3:34:43] +[titan] 2025-10-05 19:29:07,168 - root - INFO - step: 34160 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 19:29:07,168 - root - INFO - lr: 7.3718e-06 gnorm: 1.18 [20:54:55< 3:34:32] +[titan] 2025-10-05 19:29:18,057 - root - INFO - step: 34165 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 19:29:18,057 - root - INFO - lr: 7.3678e-06 gnorm: 1.19 [20:55:06< 3:34:21] +[titan] 2025-10-05 19:29:28,930 - root - INFO - step: 34170 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 19:29:28,930 - root - INFO - lr: 7.3639e-06 gnorm: 1.18 [20:55:17< 3:34:10] +[titan] 2025-10-05 19:29:39,843 - root - INFO - step: 34175 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 19:29:39,844 - root - INFO - lr: 7.3599e-06 gnorm: 1.25 [20:55:28< 3:33:59] +[titan] 2025-10-05 19:29:50,737 - root - INFO - step: 34180 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 19:29:50,737 - root - INFO - lr: 7.3559e-06 gnorm: 1.26 [20:55:39< 3:33:48] +[titan] 2025-10-05 19:30:01,652 - root - INFO - step: 34185 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 19:30:01,652 - root - INFO - lr: 7.3519e-06 gnorm: 1.25 [20:55:50< 3:33:37] +[titan] 2025-10-05 19:30:12,505 - root - INFO - step: 34190 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 19:30:12,506 - root - INFO - lr: 7.3480e-06 gnorm: 1.20 [20:56:01< 3:33:26] +[titan] 2025-10-05 19:30:23,389 - root - INFO - step: 34195 loss: 1.9339 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 19:30:23,390 - root - INFO - lr: 7.3440e-06 gnorm: 1.19 [20:56:11< 3:33:15] +[titan] 2025-10-05 19:30:32,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:30:34,246 - root - INFO - step: 34200 loss: 1.9408 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 19:30:34,246 - root - INFO - lr: 7.3400e-06 gnorm: 1.19 [20:56:22< 3:33:04] +[titan] 2025-10-05 19:30:45,157 - root - INFO - step: 34205 loss: 1.9115 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 19:30:45,157 - root - INFO - lr: 7.3361e-06 gnorm: 1.18 [20:56:33< 3:32:53] +[titan] 2025-10-05 19:30:56,027 - root - INFO - step: 34210 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 19:30:56,027 - root - INFO - lr: 7.3321e-06 gnorm: 1.19 [20:56:44< 3:32:42] +[titan] 2025-10-05 19:31:06,908 - root - INFO - step: 34215 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 19:31:06,908 - root - INFO - lr: 7.3281e-06 gnorm: 1.17 [20:56:55< 3:32:31] +[titan] 2025-10-05 19:31:17,775 - root - INFO - step: 34220 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7272 +[titan] 2025-10-05 19:31:17,776 - root - INFO - lr: 7.3242e-06 gnorm: 1.20 [20:57:06< 3:32:20] +[titan] 2025-10-05 19:31:28,639 - root - INFO - step: 34225 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 19:31:28,639 - root - INFO - lr: 7.3202e-06 gnorm: 1.19 [20:57:17< 3:32:09] +[titan] 2025-10-05 19:31:39,529 - root - INFO - step: 34230 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:31:39,529 - root - INFO - lr: 7.3163e-06 gnorm: 1.20 [20:57:28< 3:31:57] +[titan] 2025-10-05 19:31:50,429 - root - INFO - step: 34235 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 19:31:50,429 - root - INFO - lr: 7.3124e-06 gnorm: 1.19 [20:57:39< 3:31:46] +[titan] 2025-10-05 19:32:01,297 - root - INFO - step: 34240 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6972 +[titan] 2025-10-05 19:32:01,297 - root - INFO - lr: 7.3084e-06 gnorm: 1.22 [20:57:49< 3:31:35] +[titan] 2025-10-05 19:32:12,194 - root - INFO - step: 34245 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 19:32:12,194 - root - INFO - lr: 7.3045e-06 gnorm: 1.21 [20:58:00< 3:31:24] +[titan] 2025-10-05 19:32:20,872 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:32:23,059 - root - INFO - step: 34250 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 19:32:23,059 - root - INFO - lr: 7.3006e-06 gnorm: 1.18 [20:58:11< 3:31:13] +[titan] 2025-10-05 19:32:33,942 - root - INFO - step: 34255 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:32:33,942 - root - INFO - lr: 7.2966e-06 gnorm: 1.16 [20:58:22< 3:31:02] +[titan] 2025-10-05 19:32:44,861 - root - INFO - step: 34260 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7106 +[titan] 2025-10-05 19:32:44,861 - root - INFO - lr: 7.2927e-06 gnorm: 1.18 [20:58:33< 3:30:51] +[titan] 2025-10-05 19:32:55,734 - root - INFO - step: 34265 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 19:32:55,735 - root - INFO - lr: 7.2888e-06 gnorm: 1.16 [20:58:44< 3:30:40] +[titan] 2025-10-05 19:33:06,617 - root - INFO - step: 34270 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 19:33:06,617 - root - INFO - lr: 7.2849e-06 gnorm: 1.22 [20:58:55< 3:30:29] +[titan] 2025-10-05 19:33:17,521 - root - INFO - step: 34275 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 19:33:17,521 - root - INFO - lr: 7.2809e-06 gnorm: 1.19 [20:59:06< 3:30:18] +[titan] 2025-10-05 19:33:28,449 - root - INFO - step: 34280 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 19:33:28,449 - root - INFO - lr: 7.2770e-06 gnorm: 1.23 [20:59:17< 3:30:07] +[titan] 2025-10-05 19:33:39,327 - root - INFO - step: 34285 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:33:39,328 - root - INFO - lr: 7.2731e-06 gnorm: 1.17 [20:59:27< 3:29:56] +[titan] 2025-10-05 19:33:50,236 - root - INFO - step: 34290 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:33:50,236 - root - INFO - lr: 7.2692e-06 gnorm: 1.23 [20:59:38< 3:29:45] +[titan] 2025-10-05 19:34:01,108 - root - INFO - step: 34295 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 19:34:01,108 - root - INFO - lr: 7.2653e-06 gnorm: 1.17 [20:59:49< 3:29:34] +[titan] 2025-10-05 19:34:09,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:34:11,978 - root - INFO - step: 34300 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 19:34:11,978 - root - INFO - lr: 7.2614e-06 gnorm: 1.19 [21:00:00< 3:29:23] +[titan] 2025-10-05 19:34:20,927 - root - INFO - Dumping profiler traces at step 34304 +[titan] 2025-10-05 19:34:20,967 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:34:23,158 - root - INFO - step: 34305 loss: 1.8387 memory: 118.84GiB(85.28%) tps: 29,312 tflops: 406.65 mfu: 41.12% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6259 +[titan] 2025-10-05 19:34:23,158 - root - INFO - lr: 7.2575e-06 gnorm: 1.17 [21:00:11< 3:29:12] +[titan] 2025-10-05 19:34:34,056 - root - INFO - step: 34310 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 19:34:34,057 - root - INFO - lr: 7.2536e-06 gnorm: 1.17 [21:00:22< 3:29:01] +[titan] 2025-10-05 19:34:44,938 - root - INFO - step: 34315 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 19:34:44,938 - root - INFO - lr: 7.2497e-06 gnorm: 1.18 [21:00:33< 3:28:50] +[titan] 2025-10-05 19:34:55,805 - root - INFO - step: 34320 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:34:55,805 - root - INFO - lr: 7.2458e-06 gnorm: 1.22 [21:00:44< 3:28:39] +[titan] 2025-10-05 19:35:06,664 - root - INFO - step: 34325 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 19:35:06,664 - root - INFO - lr: 7.2419e-06 gnorm: 1.19 [21:00:55< 3:28:28] +[titan] 2025-10-05 19:35:17,530 - root - INFO - step: 34330 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 19:35:17,530 - root - INFO - lr: 7.2381e-06 gnorm: 1.18 [21:01:06< 3:28:17] +[titan] 2025-10-05 19:35:28,388 - root - INFO - step: 34335 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 19:35:28,388 - root - INFO - lr: 7.2342e-06 gnorm: 1.27 [21:01:16< 3:28:06] +[titan] 2025-10-05 19:35:39,210 - root - INFO - step: 34340 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 19:35:39,210 - root - INFO - lr: 7.2303e-06 gnorm: 1.23 [21:01:27< 3:27:55] +[titan] 2025-10-05 19:35:50,071 - root - INFO - step: 34345 loss: 1.9981 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7671 +[titan] 2025-10-05 19:35:50,072 - root - INFO - lr: 7.2264e-06 gnorm: 1.20 [21:01:38< 3:27:43] +[titan] 2025-10-05 19:35:58,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:36:00,937 - root - INFO - step: 34350 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:00,937 - root - INFO - lr: 7.2226e-06 gnorm: 1.19 [21:01:49< 3:27:32] +[titan] 2025-10-05 19:36:11,779 - root - INFO - step: 34355 loss: 1.9721 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:36:11,779 - root - INFO - lr: 7.2187e-06 gnorm: 1.22 [21:02:00< 3:27:21] +[titan] 2025-10-05 19:36:22,618 - root - INFO - step: 34360 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 19:36:22,618 - root - INFO - lr: 7.2148e-06 gnorm: 1.19 [21:02:11< 3:27:10] +[titan] 2025-10-05 19:36:33,472 - root - INFO - step: 34365 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 19:36:33,472 - root - INFO - lr: 7.2110e-06 gnorm: 1.16 [21:02:22< 3:26:59] +[titan] 2025-10-05 19:36:44,328 - root - INFO - step: 34370 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:44,329 - root - INFO - lr: 7.2071e-06 gnorm: 1.17 [21:02:32< 3:26:48] +[titan] 2025-10-05 19:36:55,235 - root - INFO - step: 34375 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 19:36:55,235 - root - INFO - lr: 7.2033e-06 gnorm: 1.14 [21:02:43< 3:26:37] +[titan] 2025-10-05 19:37:06,084 - root - INFO - step: 34380 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:37:06,084 - root - INFO - lr: 7.1994e-06 gnorm: 1.17 [21:02:54< 3:26:26] +[titan] 2025-10-05 19:37:16,961 - root - INFO - step: 34385 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7555 +[titan] 2025-10-05 19:37:16,961 - root - INFO - lr: 7.1956e-06 gnorm: 1.22 [21:03:05< 3:26:15] +[titan] 2025-10-05 19:37:27,815 - root - INFO - step: 34390 loss: 2.0305 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 19:37:27,815 - root - INFO - lr: 7.1917e-06 gnorm: 1.20 [21:03:16< 3:26:04] +[titan] 2025-10-05 19:37:38,670 - root - INFO - step: 34395 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 19:37:38,670 - root - INFO - lr: 7.1879e-06 gnorm: 1.19 [21:03:27< 3:25:53] +[titan] 2025-10-05 19:37:47,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:37:49,563 - root - INFO - step: 34400 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:37:49,563 - root - INFO - lr: 7.1840e-06 gnorm: 1.23 [21:03:38< 3:25:42] +[titan] 2025-10-05 19:38:00,476 - root - INFO - step: 34405 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:38:00,476 - root - INFO - lr: 7.1802e-06 gnorm: 1.21 [21:03:49< 3:25:31] +[titan] 2025-10-05 19:38:11,337 - root - INFO - step: 34410 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7067 +[titan] 2025-10-05 19:38:11,337 - root - INFO - lr: 7.1764e-06 gnorm: 1.16 [21:03:59< 3:25:20] +[titan] 2025-10-05 19:38:22,210 - root - INFO - step: 34415 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 19:38:22,210 - root - INFO - lr: 7.1726e-06 gnorm: 1.20 [21:04:10< 3:25:09] +[titan] 2025-10-05 19:38:33,092 - root - INFO - step: 34420 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:38:33,092 - root - INFO - lr: 7.1687e-06 gnorm: 1.23 [21:04:21< 3:24:58] +[titan] 2025-10-05 19:38:43,954 - root - INFO - step: 34425 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 19:38:43,954 - root - INFO - lr: 7.1649e-06 gnorm: 1.19 [21:04:32< 3:24:47] +[titan] 2025-10-05 19:38:54,847 - root - INFO - step: 34430 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 19:38:54,847 - root - INFO - lr: 7.1611e-06 gnorm: 1.22 [21:04:43< 3:24:36] +[titan] 2025-10-05 19:39:05,711 - root - INFO - step: 34435 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:39:05,711 - root - INFO - lr: 7.1573e-06 gnorm: 1.18 [21:04:54< 3:24:25] +[titan] 2025-10-05 19:39:16,607 - root - INFO - step: 34440 loss: 1.9084 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 19:39:16,607 - root - INFO - lr: 7.1535e-06 gnorm: 1.15 [21:05:05< 3:24:14] +[titan] 2025-10-05 19:39:27,468 - root - INFO - step: 34445 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 19:39:27,469 - root - INFO - lr: 7.1497e-06 gnorm: 1.21 [21:05:16< 3:24:03] +[titan] 2025-10-05 19:39:36,132 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:39:38,309 - root - INFO - step: 34450 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 19:39:38,309 - root - INFO - lr: 7.1458e-06 gnorm: 1.19 [21:05:26< 3:23:52] +[titan] 2025-10-05 19:39:49,168 - root - INFO - step: 34455 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7614 +[titan] 2025-10-05 19:39:49,168 - root - INFO - lr: 7.1420e-06 gnorm: 1.22 [21:05:37< 3:23:41] +[titan] 2025-10-05 19:39:59,988 - root - INFO - step: 34460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:39:59,988 - root - INFO - lr: 7.1382e-06 gnorm: 1.18 [21:05:48< 3:23:29] +[titan] 2025-10-05 19:40:10,837 - root - INFO - step: 34465 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:40:10,837 - root - INFO - lr: 7.1345e-06 gnorm: 1.20 [21:05:59< 3:23:18] +[titan] 2025-10-05 19:40:21,711 - root - INFO - step: 34470 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 19:40:21,711 - root - INFO - lr: 7.1307e-06 gnorm: 1.24 [21:06:10< 3:23:07] +[titan] 2025-10-05 19:40:32,577 - root - INFO - step: 34475 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 19:40:32,577 - root - INFO - lr: 7.1269e-06 gnorm: 1.26 [21:06:21< 3:22:56] +[titan] 2025-10-05 19:40:43,432 - root - INFO - step: 34480 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6998 +[titan] 2025-10-05 19:40:43,432 - root - INFO - lr: 7.1231e-06 gnorm: 1.19 [21:06:32< 3:22:45] +[titan] 2025-10-05 19:40:54,326 - root - INFO - step: 34485 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:40:54,326 - root - INFO - lr: 7.1193e-06 gnorm: 1.19 [21:06:42< 3:22:34] +[titan] 2025-10-05 19:41:05,190 - root - INFO - step: 34490 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 19:41:05,190 - root - INFO - lr: 7.1155e-06 gnorm: 1.19 [21:06:53< 3:22:23] +[titan] 2025-10-05 19:41:16,016 - root - INFO - step: 34495 loss: 1.9452 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7197 +[titan] 2025-10-05 19:41:16,016 - root - INFO - lr: 7.1117e-06 gnorm: 1.28 [21:07:04< 3:22:12] +[titan] 2025-10-05 19:41:24,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:41:26,864 - root - INFO - step: 34500 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 19:41:26,864 - root - INFO - lr: 7.1080e-06 gnorm: 1.20 [21:07:15< 3:22:01] +[titan] 2025-10-05 19:41:37,746 - root - INFO - step: 34505 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:41:37,746 - root - INFO - lr: 7.1042e-06 gnorm: 1.20 [21:07:26< 3:21:50] +[titan] 2025-10-05 19:41:48,592 - root - INFO - step: 34510 loss: 1.9716 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 19:41:48,592 - root - INFO - lr: 7.1004e-06 gnorm: 1.23 [21:07:37< 3:21:39] +[titan] 2025-10-05 19:41:59,472 - root - INFO - step: 34515 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 19:41:59,472 - root - INFO - lr: 7.0967e-06 gnorm: 1.19 [21:07:48< 3:21:28] +[titan] 2025-10-05 19:42:10,331 - root - INFO - step: 34520 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 19:42:10,331 - root - INFO - lr: 7.0929e-06 gnorm: 1.21 [21:07:58< 3:21:17] +[titan] 2025-10-05 19:42:21,195 - root - INFO - step: 34525 loss: 1.8598 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6441 +[titan] 2025-10-05 19:42:21,195 - root - INFO - lr: 7.0892e-06 gnorm: 1.20 [21:08:09< 3:21:06] +[titan] 2025-10-05 19:42:32,043 - root - INFO - step: 34530 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 19:42:32,043 - root - INFO - lr: 7.0854e-06 gnorm: 1.18 [21:08:20< 3:20:55] +[titan] 2025-10-05 19:42:42,933 - root - INFO - step: 34535 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:42:42,933 - root - INFO - lr: 7.0816e-06 gnorm: 1.17 [21:08:31< 3:20:44] +[titan] 2025-10-05 19:42:53,805 - root - INFO - step: 34540 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 19:42:53,805 - root - INFO - lr: 7.0779e-06 gnorm: 1.20 [21:08:42< 3:20:33] +[titan] 2025-10-05 19:43:04,676 - root - INFO - step: 34545 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:43:04,676 - root - INFO - lr: 7.0742e-06 gnorm: 1.19 [21:08:53< 3:20:22] +[titan] 2025-10-05 19:43:13,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:43:15,579 - root - INFO - step: 34550 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7242 +[titan] 2025-10-05 19:43:15,579 - root - INFO - lr: 7.0704e-06 gnorm: 1.21 [21:09:04< 3:20:11] +[titan] 2025-10-05 19:43:26,490 - root - INFO - step: 34555 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:43:26,490 - root - INFO - lr: 7.0667e-06 gnorm: 1.18 [21:09:15< 3:20:00] +[titan] 2025-10-05 19:43:37,391 - root - INFO - step: 34560 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8232 +[titan] 2025-10-05 19:43:37,391 - root - INFO - lr: 7.0629e-06 gnorm: 4.37 [21:09:25< 3:19:49] +[titan] 2025-10-05 19:43:48,315 - root - INFO - step: 34565 loss: 1.9033 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 19:43:48,315 - root - INFO - lr: 7.0592e-06 gnorm: 1.24 [21:09:36< 3:19:38] +[titan] 2025-10-05 19:43:59,252 - root - INFO - step: 34570 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 19:43:59,252 - root - INFO - lr: 7.0555e-06 gnorm: 1.17 [21:09:47< 3:19:27] +[titan] 2025-10-05 19:44:10,131 - root - INFO - step: 34575 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 19:44:10,131 - root - INFO - lr: 7.0518e-06 gnorm: 1.21 [21:09:58< 3:19:15] +[titan] 2025-10-05 19:44:20,965 - root - INFO - step: 34580 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7717 +[titan] 2025-10-05 19:44:20,965 - root - INFO - lr: 7.0480e-06 gnorm: 1.26 [21:10:09< 3:19:04] +[titan] 2025-10-05 19:44:31,829 - root - INFO - step: 34585 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 19:44:31,829 - root - INFO - lr: 7.0443e-06 gnorm: 1.19 [21:10:20< 3:18:53] +[titan] 2025-10-05 19:44:42,679 - root - INFO - step: 34590 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.7230 +[titan] 2025-10-05 19:44:42,680 - root - INFO - lr: 7.0406e-06 gnorm: 2.68 [21:10:31< 3:18:42] +[titan] 2025-10-05 19:44:53,560 - root - INFO - step: 34595 loss: 1.8805 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 19:44:53,561 - root - INFO - lr: 7.0369e-06 gnorm: 1.23 [21:10:42< 3:18:31] +[titan] 2025-10-05 19:45:02,272 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:45:04,448 - root - INFO - step: 34600 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 19:45:04,448 - root - INFO - lr: 7.0332e-06 gnorm: 1.18 [21:10:53< 3:18:20] +[titan] 2025-10-05 19:45:15,326 - root - INFO - step: 34605 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:45:15,326 - root - INFO - lr: 7.0295e-06 gnorm: 1.19 [21:11:03< 3:18:09] +[titan] 2025-10-05 19:45:26,191 - root - INFO - step: 34610 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:45:26,191 - root - INFO - lr: 7.0258e-06 gnorm: 1.20 [21:11:14< 3:17:58] +[titan] 2025-10-05 19:45:37,037 - root - INFO - step: 34615 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:45:37,037 - root - INFO - lr: 7.0221e-06 gnorm: 1.17 [21:11:25< 3:17:47] +[titan] 2025-10-05 19:45:47,905 - root - INFO - step: 34620 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:45:47,905 - root - INFO - lr: 7.0184e-06 gnorm: 1.23 [21:11:36< 3:17:36] +[titan] 2025-10-05 19:45:58,807 - root - INFO - step: 34625 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 19:45:58,807 - root - INFO - lr: 7.0147e-06 gnorm: 1.25 [21:11:47< 3:17:25] +[titan] 2025-10-05 19:46:09,704 - root - INFO - step: 34630 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 19:46:09,704 - root - INFO - lr: 7.0110e-06 gnorm: 1.24 [21:11:58< 3:17:14] +[titan] 2025-10-05 19:46:20,566 - root - INFO - step: 34635 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 19:46:20,566 - root - INFO - lr: 7.0073e-06 gnorm: 1.25 [21:12:09< 3:17:03] +[titan] 2025-10-05 19:46:31,407 - root - INFO - step: 34640 loss: 1.9051 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 19:46:31,407 - root - INFO - lr: 7.0036e-06 gnorm: 1.18 [21:12:19< 3:16:52] +[titan] 2025-10-05 19:46:42,249 - root - INFO - step: 34645 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 19:46:42,249 - root - INFO - lr: 6.9999e-06 gnorm: 1.17 [21:12:30< 3:16:41] +[titan] 2025-10-05 19:46:50,916 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:46:53,097 - root - INFO - step: 34650 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7558 +[titan] 2025-10-05 19:46:53,097 - root - INFO - lr: 6.9963e-06 gnorm: 1.18 [21:12:41< 3:16:30] +[titan] 2025-10-05 19:47:03,992 - root - INFO - step: 34655 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 19:47:03,992 - root - INFO - lr: 6.9926e-06 gnorm: 1.23 [21:12:52< 3:16:19] +[titan] 2025-10-05 19:47:14,867 - root - INFO - step: 34660 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 19:47:14,867 - root - INFO - lr: 6.9889e-06 gnorm: 1.29 [21:13:03< 3:16:08] +[titan] 2025-10-05 19:47:25,759 - root - INFO - step: 34665 loss: 1.9370 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 19:47:25,759 - root - INFO - lr: 6.9853e-06 gnorm: 1.19 [21:13:14< 3:15:57] +[titan] 2025-10-05 19:47:36,638 - root - INFO - step: 34670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 19:47:36,639 - root - INFO - lr: 6.9816e-06 gnorm: 1.16 [21:13:25< 3:15:46] +[titan] 2025-10-05 19:47:47,526 - root - INFO - step: 34675 loss: 1.9202 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:47:47,526 - root - INFO - lr: 6.9779e-06 gnorm: 1.19 [21:13:36< 3:15:35] +[titan] 2025-10-05 19:47:58,418 - root - INFO - step: 34680 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 19:47:58,418 - root - INFO - lr: 6.9743e-06 gnorm: 1.20 [21:13:46< 3:15:24] +[titan] 2025-10-05 19:48:09,256 - root - INFO - step: 34685 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6999 +[titan] 2025-10-05 19:48:09,256 - root - INFO - lr: 6.9706e-06 gnorm: 1.21 [21:13:57< 3:15:13] +[titan] 2025-10-05 19:48:20,111 - root - INFO - step: 34690 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 19:48:20,111 - root - INFO - lr: 6.9670e-06 gnorm: 1.22 [21:14:08< 3:15:02] +[titan] 2025-10-05 19:48:31,007 - root - INFO - step: 34695 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 19:48:31,007 - root - INFO - lr: 6.9633e-06 gnorm: 1.22 [21:14:19< 3:14:50] +[titan] 2025-10-05 19:48:39,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:48:41,855 - root - INFO - step: 34700 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 19:48:41,855 - root - INFO - lr: 6.9597e-06 gnorm: 1.18 [21:14:30< 3:14:39] +[titan] 2025-10-05 19:48:52,713 - root - INFO - step: 34705 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7370 +[titan] 2025-10-05 19:48:52,713 - root - INFO - lr: 6.9560e-06 gnorm: 1.17 [21:14:41< 3:14:28] +[titan] 2025-10-05 19:49:03,608 - root - INFO - step: 34710 loss: 1.9120 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6899 +[titan] 2025-10-05 19:49:03,608 - root - INFO - lr: 6.9524e-06 gnorm: 1.17 [21:14:52< 3:14:17] +[titan] 2025-10-05 19:49:14,465 - root - INFO - step: 34715 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:49:14,465 - root - INFO - lr: 6.9488e-06 gnorm: 1.22 [21:15:03< 3:14:06] +[titan] 2025-10-05 19:49:25,305 - root - INFO - step: 34720 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:49:25,305 - root - INFO - lr: 6.9451e-06 gnorm: 1.21 [21:15:13< 3:13:55] +[titan] 2025-10-05 19:49:36,189 - root - INFO - step: 34725 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:49:36,189 - root - INFO - lr: 6.9415e-06 gnorm: 1.18 [21:15:24< 3:13:44] +[titan] 2025-10-05 19:49:47,060 - root - INFO - step: 34730 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 19:49:47,060 - root - INFO - lr: 6.9379e-06 gnorm: 1.21 [21:15:35< 3:13:33] +[titan] 2025-10-05 19:49:57,949 - root - INFO - step: 34735 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 19:49:57,949 - root - INFO - lr: 6.9343e-06 gnorm: 1.21 [21:15:46< 3:13:22] +[titan] 2025-10-05 19:50:08,803 - root - INFO - step: 34740 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 19:50:08,803 - root - INFO - lr: 6.9306e-06 gnorm: 1.24 [21:15:57< 3:13:11] +[titan] 2025-10-05 19:50:19,673 - root - INFO - step: 34745 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 19:50:19,673 - root - INFO - lr: 6.9270e-06 gnorm: 1.26 [21:16:08< 3:13:00] +[titan] 2025-10-05 19:50:28,354 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:50:30,535 - root - INFO - step: 34750 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7233 +[titan] 2025-10-05 19:50:30,535 - root - INFO - lr: 6.9234e-06 gnorm: 1.24 [21:16:19< 3:12:49] +[titan] 2025-10-05 19:50:41,406 - root - INFO - step: 34755 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:50:41,406 - root - INFO - lr: 6.9198e-06 gnorm: 1.19 [21:16:29< 3:12:38] +[titan] 2025-10-05 19:50:52,304 - root - INFO - step: 34760 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 19:50:52,304 - root - INFO - lr: 6.9162e-06 gnorm: 1.19 [21:16:40< 3:12:27] +[titan] 2025-10-05 19:51:03,222 - root - INFO - step: 34765 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 19:51:03,223 - root - INFO - lr: 6.9126e-06 gnorm: 1.23 [21:16:51< 3:12:16] +[titan] 2025-10-05 19:51:14,086 - root - INFO - step: 34770 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:51:14,086 - root - INFO - lr: 6.9090e-06 gnorm: 1.16 [21:17:02< 3:12:05] +[titan] 2025-10-05 19:51:24,963 - root - INFO - step: 34775 loss: 1.9641 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 19:51:24,963 - root - INFO - lr: 6.9054e-06 gnorm: 1.22 [21:17:13< 3:11:54] +[titan] 2025-10-05 19:51:35,828 - root - INFO - step: 34780 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 19:51:35,828 - root - INFO - lr: 6.9018e-06 gnorm: 1.21 [21:17:24< 3:11:43] +[titan] 2025-10-05 19:51:46,685 - root - INFO - step: 34785 loss: 1.9053 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 19:51:46,685 - root - INFO - lr: 6.8982e-06 gnorm: 1.20 [21:17:35< 3:11:32] +[titan] 2025-10-05 19:51:57,587 - root - INFO - step: 34790 loss: 1.9201 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 19:51:57,587 - root - INFO - lr: 6.8946e-06 gnorm: 1.18 [21:17:46< 3:11:21] +[titan] 2025-10-05 19:52:08,485 - root - INFO - step: 34795 loss: 1.9967 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 19:52:08,485 - root - INFO - lr: 6.8910e-06 gnorm: 1.22 [21:17:57< 3:11:10] +[titan] 2025-10-05 19:52:17,185 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:52:19,378 - root - INFO - step: 34800 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 19:52:19,378 - root - INFO - lr: 6.8875e-06 gnorm: 1.20 [21:18:07< 3:10:59] +[titan] 2025-10-05 19:52:30,261 - root - INFO - step: 34805 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 19:52:30,261 - root - INFO - lr: 6.8839e-06 gnorm: 1.19 [21:18:18< 3:10:48] +[titan] 2025-10-05 19:52:41,146 - root - INFO - step: 34810 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 19:52:41,146 - root - INFO - lr: 6.8803e-06 gnorm: 1.20 [21:18:29< 3:10:37] +[titan] 2025-10-05 19:52:52,095 - root - INFO - step: 34815 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7667 +[titan] 2025-10-05 19:52:52,095 - root - INFO - lr: 6.8767e-06 gnorm: 1.23 [21:18:40< 3:10:26] +[titan] 2025-10-05 19:52:54,454 - root - INFO - Dumping profiler traces at step 34816 +[titan] 2025-10-05 19:52:54,492 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:53:03,216 - root - INFO - step: 34820 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 29,466 tflops: 408.80 mfu: 41.33% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:53:03,216 - root - INFO - lr: 6.8732e-06 gnorm: 1.20 [21:18:51< 3:10:15] +[titan] 2025-10-05 19:53:14,080 - root - INFO - step: 34825 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7156 +[titan] 2025-10-05 19:53:14,080 - root - INFO - lr: 6.8696e-06 gnorm: 1.17 [21:19:02< 3:10:03] +[titan] 2025-10-05 19:53:24,945 - root - INFO - step: 34830 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 19:53:24,945 - root - INFO - lr: 6.8661e-06 gnorm: 1.15 [21:19:13< 3:09:52] +[titan] 2025-10-05 19:53:35,780 - root - INFO - step: 34835 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 19:53:35,780 - root - INFO - lr: 6.8625e-06 gnorm: 1.16 [21:19:24< 3:09:41] +[titan] 2025-10-05 19:53:46,625 - root - INFO - step: 34840 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7678 +[titan] 2025-10-05 19:53:46,626 - root - INFO - lr: 6.8589e-06 gnorm: 1.21 [21:19:35< 3:09:30] +[titan] 2025-10-05 19:53:57,479 - root - INFO - step: 34845 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:53:57,479 - root - INFO - lr: 6.8554e-06 gnorm: 1.18 [21:19:46< 3:09:19] +[titan] 2025-10-05 19:54:06,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:54:08,348 - root - INFO - step: 34850 loss: 2.0208 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 19:54:08,349 - root - INFO - lr: 6.8518e-06 gnorm: 1.22 [21:19:56< 3:09:08] +[titan] 2025-10-05 19:54:19,236 - root - INFO - step: 34855 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 19:54:19,236 - root - INFO - lr: 6.8483e-06 gnorm: 1.19 [21:20:07< 3:08:57] +[titan] 2025-10-05 19:54:30,115 - root - INFO - step: 34860 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 19:54:30,115 - root - INFO - lr: 6.8448e-06 gnorm: 1.17 [21:20:18< 3:08:46] +[titan] 2025-10-05 19:54:40,989 - root - INFO - step: 34865 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 19:54:40,989 - root - INFO - lr: 6.8412e-06 gnorm: 1.22 [21:20:29< 3:08:35] +[titan] 2025-10-05 19:54:51,840 - root - INFO - step: 34870 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:54:51,840 - root - INFO - lr: 6.8377e-06 gnorm: 1.23 [21:20:40< 3:08:24] +[titan] 2025-10-05 19:55:02,739 - root - INFO - step: 34875 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:55:02,739 - root - INFO - lr: 6.8342e-06 gnorm: 1.18 [21:20:51< 3:08:13] +[titan] 2025-10-05 19:55:13,616 - root - INFO - step: 34880 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:55:13,616 - root - INFO - lr: 6.8306e-06 gnorm: 1.19 [21:21:02< 3:08:02] +[titan] 2025-10-05 19:55:24,502 - root - INFO - step: 34885 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7556 +[titan] 2025-10-05 19:55:24,502 - root - INFO - lr: 6.8271e-06 gnorm: 1.20 [21:21:13< 3:07:51] +[titan] 2025-10-05 19:55:35,390 - root - INFO - step: 34890 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 19:55:35,390 - root - INFO - lr: 6.8236e-06 gnorm: 1.20 [21:21:23< 3:07:40] +[titan] 2025-10-05 19:55:46,234 - root - INFO - step: 34895 loss: 1.9281 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 19:55:46,234 - root - INFO - lr: 6.8201e-06 gnorm: 1.21 [21:21:34< 3:07:29] +[titan] 2025-10-05 19:55:54,939 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:55:57,124 - root - INFO - step: 34900 loss: 1.9752 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 19:55:57,124 - root - INFO - lr: 6.8166e-06 gnorm: 1.22 [21:21:45< 3:07:18] +[titan] 2025-10-05 19:56:07,979 - root - INFO - step: 34905 loss: 1.8773 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6604 +[titan] 2025-10-05 19:56:07,979 - root - INFO - lr: 6.8130e-06 gnorm: 1.27 [21:21:56< 3:07:07] +[titan] 2025-10-05 19:56:18,858 - root - INFO - step: 34910 loss: 1.9375 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:56:18,858 - root - INFO - lr: 6.8095e-06 gnorm: 1.28 [21:22:07< 3:06:56] +[titan] 2025-10-05 19:56:29,723 - root - INFO - step: 34915 loss: 1.9603 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:56:29,723 - root - INFO - lr: 6.8060e-06 gnorm: 1.20 [21:22:18< 3:06:45] +[titan] 2025-10-05 19:56:40,632 - root - INFO - step: 34920 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:56:40,632 - root - INFO - lr: 6.8025e-06 gnorm: 1.20 [21:22:29< 3:06:34] +[titan] 2025-10-05 19:56:51,542 - root - INFO - step: 34925 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 19:56:51,542 - root - INFO - lr: 6.7990e-06 gnorm: 1.19 [21:22:40< 3:06:23] +[titan] 2025-10-05 19:57:02,433 - root - INFO - step: 34930 loss: 1.8978 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6780 +[titan] 2025-10-05 19:57:02,433 - root - INFO - lr: 6.7955e-06 gnorm: 1.19 [21:22:50< 3:06:12] +[titan] 2025-10-05 19:57:13,339 - root - INFO - step: 34935 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 19:57:13,340 - root - INFO - lr: 6.7920e-06 gnorm: 1.19 [21:23:01< 3:06:01] +[titan] 2025-10-05 19:57:24,225 - root - INFO - step: 34940 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 19:57:24,225 - root - INFO - lr: 6.7886e-06 gnorm: 1.21 [21:23:12< 3:05:50] +[titan] 2025-10-05 19:57:35,111 - root - INFO - step: 34945 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:57:35,112 - root - INFO - lr: 6.7851e-06 gnorm: 1.20 [21:23:23< 3:05:39] +[titan] 2025-10-05 19:57:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:57:46,109 - root - INFO - step: 34950 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 29,796 tflops: 413.37 mfu: 41.80% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:57:46,109 - root - INFO - lr: 6.7816e-06 gnorm: 1.21 [21:23:34< 3:05:28] +[titan] 2025-10-05 19:57:56,976 - root - INFO - step: 34955 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 19:57:56,976 - root - INFO - lr: 6.7781e-06 gnorm: 1.21 [21:23:45< 3:05:16] +[titan] 2025-10-05 19:58:07,860 - root - INFO - step: 34960 loss: 1.8843 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 19:58:07,860 - root - INFO - lr: 6.7746e-06 gnorm: 1.18 [21:23:56< 3:05:05] +[titan] 2025-10-05 19:58:18,737 - root - INFO - step: 34965 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 19:58:18,737 - root - INFO - lr: 6.7712e-06 gnorm: 1.21 [21:24:07< 3:04:54] +[titan] 2025-10-05 19:58:29,592 - root - INFO - step: 34970 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 19:58:29,592 - root - INFO - lr: 6.7677e-06 gnorm: 2.00 [21:24:18< 3:04:43] +[titan] 2025-10-05 19:58:40,452 - root - INFO - step: 34975 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 19:58:40,453 - root - INFO - lr: 6.7642e-06 gnorm: 1.24 [21:24:28< 3:04:32] +[titan] 2025-10-05 19:58:51,317 - root - INFO - step: 34980 loss: 1.8424 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6294 +[titan] 2025-10-05 19:58:51,317 - root - INFO - lr: 6.7608e-06 gnorm: 1.20 [21:24:39< 3:04:21] +[titan] 2025-10-05 19:59:02,209 - root - INFO - step: 34985 loss: 2.0210 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 19:59:02,209 - root - INFO - lr: 6.7573e-06 gnorm: 1.25 [21:24:50< 3:04:10] +[titan] 2025-10-05 19:59:13,085 - root - INFO - step: 34990 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 19:59:13,085 - root - INFO - lr: 6.7538e-06 gnorm: 1.21 [21:25:01< 3:03:59] +[titan] 2025-10-05 19:59:23,963 - root - INFO - step: 34995 loss: 1.9729 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7447 +[titan] 2025-10-05 19:59:23,964 - root - INFO - lr: 6.7504e-06 gnorm: 1.20 [21:25:12< 3:03:48] +[titan] 2025-10-05 19:59:32,669 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:59:34,852 - root - INFO - step: 35000 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 19:59:34,852 - root - INFO - lr: 6.7469e-06 gnorm: 1.20 [21:25:23< 3:03:37] +[titan] 2025-10-05 19:59:34,852 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 19:59:52,565 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 19:59:52,565 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.71 seconds. +[titan] 2025-10-05 20:02:00,815 - root - INFO - step: 35005 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 2,245 tflops: 31.15 mfu: 3.15% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 20:02:00,815 - root - INFO - lr: 6.7435e-06 gnorm: 1.17 [21:27:49< 3:03:45] +[titan] 2025-10-05 20:02:11,608 - root - INFO - step: 35010 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,362 tflops: 421.22 mfu: 42.59% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 20:02:11,608 - root - INFO - lr: 6.7401e-06 gnorm: 1.25 [21:28:00< 3:03:34] +[titan] 2025-10-05 20:02:22,413 - root - INFO - step: 35015 loss: 1.8869 memory: 118.84GiB(85.28%) tps: 30,329 tflops: 420.77 mfu: 42.55% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 20:02:22,413 - root - INFO - lr: 6.7366e-06 gnorm: 1.21 [21:28:10< 3:03:23] +[titan] 2025-10-05 20:02:33,281 - root - INFO - step: 35020 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 20:02:33,281 - root - INFO - lr: 6.7332e-06 gnorm: 1.19 [21:28:21< 3:03:12] +[titan] 2025-10-05 20:02:44,100 - root - INFO - step: 35025 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:02:44,100 - root - INFO - lr: 6.7297e-06 gnorm: 1.17 [21:28:32< 3:03:01] +[titan] 2025-10-05 20:02:54,948 - root - INFO - step: 35030 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 20:02:54,948 - root - INFO - lr: 6.7263e-06 gnorm: 1.23 [21:28:43< 3:02:50] +[titan] 2025-10-05 20:03:05,780 - root - INFO - step: 35035 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:03:05,780 - root - INFO - lr: 6.7229e-06 gnorm: 1.24 [21:28:54< 3:02:39] +[titan] 2025-10-05 20:03:16,638 - root - INFO - step: 35040 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 20:03:16,638 - root - INFO - lr: 6.7195e-06 gnorm: 1.23 [21:29:05< 3:02:28] +[titan] 2025-10-05 20:03:27,560 - root - INFO - step: 35045 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 20:03:27,560 - root - INFO - lr: 6.7160e-06 gnorm: 1.19 [21:29:16< 3:02:17] +[titan] 2025-10-05 20:03:36,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:03:38,413 - root - INFO - step: 35050 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7064 +[titan] 2025-10-05 20:03:38,413 - root - INFO - lr: 6.7126e-06 gnorm: 1.23 [21:29:26< 3:02:06] +[titan] 2025-10-05 20:03:49,265 - root - INFO - step: 35055 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 20:03:49,265 - root - INFO - lr: 6.7092e-06 gnorm: 1.18 [21:29:37< 3:01:55] +[titan] 2025-10-05 20:04:00,143 - root - INFO - step: 35060 loss: 1.9047 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6842 +[titan] 2025-10-05 20:04:00,143 - root - INFO - lr: 6.7058e-06 gnorm: 1.22 [21:29:48< 3:01:44] +[titan] 2025-10-05 20:04:11,001 - root - INFO - step: 35065 loss: 1.8697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 20:04:11,001 - root - INFO - lr: 6.7024e-06 gnorm: 1.21 [21:29:59< 3:01:33] +[titan] 2025-10-05 20:04:21,863 - root - INFO - step: 35070 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:04:21,863 - root - INFO - lr: 6.6990e-06 gnorm: 1.24 [21:30:10< 3:01:22] +[titan] 2025-10-05 20:04:32,799 - root - INFO - step: 35075 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 20:04:32,799 - root - INFO - lr: 6.6956e-06 gnorm: 1.21 [21:30:21< 3:01:10] +[titan] 2025-10-05 20:04:43,675 - root - INFO - step: 35080 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 20:04:43,675 - root - INFO - lr: 6.6922e-06 gnorm: 1.15 [21:30:32< 3:00:59] +[titan] 2025-10-05 20:04:54,541 - root - INFO - step: 35085 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 20:04:54,542 - root - INFO - lr: 6.6888e-06 gnorm: 1.19 [21:30:43< 3:00:48] +[titan] 2025-10-05 20:05:05,402 - root - INFO - step: 35090 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:05:05,402 - root - INFO - lr: 6.6854e-06 gnorm: 1.18 [21:30:53< 3:00:37] +[titan] 2025-10-05 20:05:16,263 - root - INFO - step: 35095 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:05:16,263 - root - INFO - lr: 6.6820e-06 gnorm: 1.22 [21:31:04< 3:00:26] +[titan] 2025-10-05 20:05:24,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:05:27,144 - root - INFO - step: 35100 loss: 1.9245 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7014 +[titan] 2025-10-05 20:05:27,145 - root - INFO - lr: 6.6786e-06 gnorm: 1.23 [21:31:15< 3:00:15] +[titan] 2025-10-05 20:05:38,035 - root - INFO - step: 35105 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 20:05:38,035 - root - INFO - lr: 6.6753e-06 gnorm: 1.17 [21:31:26< 3:00:04] +[titan] 2025-10-05 20:05:48,877 - root - INFO - step: 35110 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7243 +[titan] 2025-10-05 20:05:48,877 - root - INFO - lr: 6.6719e-06 gnorm: 1.15 [21:31:37< 2:59:53] +[titan] 2025-10-05 20:05:59,749 - root - INFO - step: 35115 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:05:59,749 - root - INFO - lr: 6.6685e-06 gnorm: 1.20 [21:31:48< 2:59:42] +[titan] 2025-10-05 20:06:10,605 - root - INFO - step: 35120 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:06:10,605 - root - INFO - lr: 6.6651e-06 gnorm: 1.17 [21:31:59< 2:59:31] +[titan] 2025-10-05 20:06:21,451 - root - INFO - step: 35125 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:06:21,451 - root - INFO - lr: 6.6618e-06 gnorm: 1.20 [21:32:09< 2:59:20] +[titan] 2025-10-05 20:06:32,365 - root - INFO - step: 35130 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:06:32,365 - root - INFO - lr: 6.6584e-06 gnorm: 1.23 [21:32:20< 2:59:09] +[titan] 2025-10-05 20:06:43,231 - root - INFO - step: 35135 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7050 +[titan] 2025-10-05 20:06:43,232 - root - INFO - lr: 6.6550e-06 gnorm: 1.18 [21:32:31< 2:58:58] +[titan] 2025-10-05 20:06:54,140 - root - INFO - step: 35140 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 20:06:54,140 - root - INFO - lr: 6.6517e-06 gnorm: 1.23 [21:32:42< 2:58:47] +[titan] 2025-10-05 20:07:05,022 - root - INFO - step: 35145 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 20:07:05,022 - root - INFO - lr: 6.6483e-06 gnorm: 1.23 [21:32:53< 2:58:36] +[titan] 2025-10-05 20:07:13,698 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:07:15,932 - root - INFO - step: 35150 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6718 +[titan] 2025-10-05 20:07:15,932 - root - INFO - lr: 6.6450e-06 gnorm: 1.21 [21:33:04< 2:58:25] +[titan] 2025-10-05 20:07:26,828 - root - INFO - step: 35155 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:07:26,828 - root - INFO - lr: 6.6416e-06 gnorm: 1.16 [21:33:15< 2:58:14] +[titan] 2025-10-05 20:07:37,740 - root - INFO - step: 35160 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 20:07:37,740 - root - INFO - lr: 6.6383e-06 gnorm: 1.17 [21:33:26< 2:58:02] +[titan] 2025-10-05 20:07:48,623 - root - INFO - step: 35165 loss: 1.9332 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 20:07:48,623 - root - INFO - lr: 6.6349e-06 gnorm: 1.21 [21:33:37< 2:57:51] +[titan] 2025-10-05 20:07:59,524 - root - INFO - step: 35170 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 20:07:59,524 - root - INFO - lr: 6.6316e-06 gnorm: 1.20 [21:33:48< 2:57:40] +[titan] 2025-10-05 20:08:10,396 - root - INFO - step: 35175 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:08:10,396 - root - INFO - lr: 6.6283e-06 gnorm: 1.19 [21:33:58< 2:57:29] +[titan] 2025-10-05 20:08:21,270 - root - INFO - step: 35180 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:08:21,270 - root - INFO - lr: 6.6249e-06 gnorm: 1.19 [21:34:09< 2:57:18] +[titan] 2025-10-05 20:08:32,174 - root - INFO - step: 35185 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 20:08:32,175 - root - INFO - lr: 6.6216e-06 gnorm: 1.21 [21:34:20< 2:57:07] +[titan] 2025-10-05 20:08:43,054 - root - INFO - step: 35190 loss: 1.9950 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7626 +[titan] 2025-10-05 20:08:43,054 - root - INFO - lr: 6.6183e-06 gnorm: 1.21 [21:34:31< 2:56:56] +[titan] 2025-10-05 20:08:53,935 - root - INFO - step: 35195 loss: 1.9405 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7155 +[titan] 2025-10-05 20:08:53,935 - root - INFO - lr: 6.6150e-06 gnorm: 1.23 [21:34:42< 2:56:45] +[titan] 2025-10-05 20:09:02,614 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:09:04,794 - root - INFO - step: 35200 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 20:09:04,794 - root - INFO - lr: 6.6116e-06 gnorm: 1.17 [21:34:53< 2:56:34] +[titan] 2025-10-05 20:09:15,694 - root - INFO - step: 35205 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:09:15,695 - root - INFO - lr: 6.6083e-06 gnorm: 1.21 [21:35:04< 2:56:23] +[titan] 2025-10-05 20:09:26,591 - root - INFO - step: 35210 loss: 1.9224 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:09:26,591 - root - INFO - lr: 6.6050e-06 gnorm: 1.21 [21:35:15< 2:56:12] +[titan] 2025-10-05 20:09:37,512 - root - INFO - step: 35215 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 20:09:37,512 - root - INFO - lr: 6.6017e-06 gnorm: 1.22 [21:35:26< 2:56:01] +[titan] 2025-10-05 20:09:48,396 - root - INFO - step: 35220 loss: 1.9286 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7057 +[titan] 2025-10-05 20:09:48,396 - root - INFO - lr: 6.5984e-06 gnorm: 1.23 [21:35:36< 2:55:50] +[titan] 2025-10-05 20:09:59,291 - root - INFO - step: 35225 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 20:09:59,291 - root - INFO - lr: 6.5951e-06 gnorm: 1.22 [21:35:47< 2:55:39] +[titan] 2025-10-05 20:10:10,147 - root - INFO - step: 35230 loss: 1.9319 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 20:10:10,147 - root - INFO - lr: 6.5918e-06 gnorm: 1.26 [21:35:58< 2:55:28] +[titan] 2025-10-05 20:10:21,054 - root - INFO - step: 35235 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 20:10:21,055 - root - INFO - lr: 6.5885e-06 gnorm: 1.18 [21:36:09< 2:55:17] +[titan] 2025-10-05 20:10:31,940 - root - INFO - step: 35240 loss: 1.8612 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:10:31,940 - root - INFO - lr: 6.5852e-06 gnorm: 1.16 [21:36:20< 2:55:06] +[titan] 2025-10-05 20:10:42,806 - root - INFO - step: 35245 loss: 2.0002 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 20:10:42,807 - root - INFO - lr: 6.5819e-06 gnorm: 1.22 [21:36:31< 2:54:55] +[titan] 2025-10-05 20:10:51,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:10:53,701 - root - INFO - step: 35250 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 20:10:53,701 - root - INFO - lr: 6.5786e-06 gnorm: 1.21 [21:36:42< 2:54:43] +[titan] 2025-10-05 20:11:04,581 - root - INFO - step: 35255 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 20:11:04,581 - root - INFO - lr: 6.5754e-06 gnorm: 1.20 [21:36:53< 2:54:32] +[titan] 2025-10-05 20:11:15,487 - root - INFO - step: 35260 loss: 1.9259 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:11:15,487 - root - INFO - lr: 6.5721e-06 gnorm: 1.23 [21:37:03< 2:54:21] +[titan] 2025-10-05 20:11:26,398 - root - INFO - step: 35265 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:11:26,398 - root - INFO - lr: 6.5688e-06 gnorm: 1.23 [21:37:14< 2:54:10] +[titan] 2025-10-05 20:11:37,313 - root - INFO - step: 35270 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:11:37,314 - root - INFO - lr: 6.5655e-06 gnorm: 1.21 [21:37:25< 2:53:59] +[titan] 2025-10-05 20:11:48,214 - root - INFO - step: 35275 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 20:11:48,214 - root - INFO - lr: 6.5623e-06 gnorm: 1.24 [21:37:36< 2:53:48] +[titan] 2025-10-05 20:11:59,075 - root - INFO - step: 35280 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 20:11:59,075 - root - INFO - lr: 6.5590e-06 gnorm: 1.20 [21:37:47< 2:53:37] +[titan] 2025-10-05 20:12:09,938 - root - INFO - step: 35285 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 20:12:09,939 - root - INFO - lr: 6.5557e-06 gnorm: 1.21 [21:37:58< 2:53:26] +[titan] 2025-10-05 20:12:20,821 - root - INFO - step: 35290 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 20:12:20,821 - root - INFO - lr: 6.5525e-06 gnorm: 1.18 [21:38:09< 2:53:15] +[titan] 2025-10-05 20:12:31,713 - root - INFO - step: 35295 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 20:12:31,713 - root - INFO - lr: 6.5492e-06 gnorm: 1.23 [21:38:20< 2:53:04] +[titan] 2025-10-05 20:12:40,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:12:42,667 - root - INFO - step: 35300 loss: 1.9229 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:12:42,667 - root - INFO - lr: 6.5460e-06 gnorm: 1.23 [21:38:31< 2:52:53] +[titan] 2025-10-05 20:12:53,570 - root - INFO - step: 35305 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 20:12:53,570 - root - INFO - lr: 6.5427e-06 gnorm: 1.21 [21:38:42< 2:52:42] +[titan] 2025-10-05 20:13:04,452 - root - INFO - step: 35310 loss: 1.9317 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7088 +[titan] 2025-10-05 20:13:04,452 - root - INFO - lr: 6.5395e-06 gnorm: 1.22 [21:38:52< 2:52:31] +[titan] 2025-10-05 20:13:15,334 - root - INFO - step: 35315 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 20:13:15,334 - root - INFO - lr: 6.5362e-06 gnorm: 1.20 [21:39:03< 2:52:20] +[titan] 2025-10-05 20:13:26,220 - root - INFO - step: 35320 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 20:13:26,221 - root - INFO - lr: 6.5330e-06 gnorm: 1.19 [21:39:14< 2:52:09] +[titan] 2025-10-05 20:13:37,204 - root - INFO - step: 35325 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 29,834 tflops: 413.90 mfu: 41.85% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 20:13:37,204 - root - INFO - lr: 6.5297e-06 gnorm: 1.20 [21:39:25< 2:51:58] +[titan] 2025-10-05 20:13:43,921 - root - INFO - Dumping profiler traces at step 35328 +[titan] 2025-10-05 20:13:43,962 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:13:48,348 - root - INFO - step: 35330 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,406 tflops: 407.96 mfu: 41.25% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:13:48,348 - root - INFO - lr: 6.5265e-06 gnorm: 1.23 [21:39:36< 2:51:47] +[titan] 2025-10-05 20:13:59,245 - root - INFO - step: 35335 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7238 +[titan] 2025-10-05 20:13:59,246 - root - INFO - lr: 6.5233e-06 gnorm: 1.23 [21:39:47< 2:51:36] +[titan] 2025-10-05 20:14:10,148 - root - INFO - step: 35340 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:14:10,149 - root - INFO - lr: 6.5201e-06 gnorm: 1.24 [21:39:58< 2:51:25] +[titan] 2025-10-05 20:14:21,047 - root - INFO - step: 35345 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7363 +[titan] 2025-10-05 20:14:21,048 - root - INFO - lr: 6.5168e-06 gnorm: 1.20 [21:40:09< 2:51:13] +[titan] 2025-10-05 20:14:29,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:14:31,931 - root - INFO - step: 35350 loss: 1.9071 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:14:31,931 - root - INFO - lr: 6.5136e-06 gnorm: 1.22 [21:40:20< 2:51:02] +[titan] 2025-10-05 20:14:42,833 - root - INFO - step: 35355 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6814 +[titan] 2025-10-05 20:14:42,833 - root - INFO - lr: 6.5104e-06 gnorm: 1.18 [21:40:31< 2:50:51] +[titan] 2025-10-05 20:14:53,713 - root - INFO - step: 35360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 20:14:53,714 - root - INFO - lr: 6.5072e-06 gnorm: 1.22 [21:40:42< 2:50:40] +[titan] 2025-10-05 20:15:04,622 - root - INFO - step: 35365 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:15:04,622 - root - INFO - lr: 6.5040e-06 gnorm: 1.20 [21:40:53< 2:50:29] +[titan] 2025-10-05 20:15:15,532 - root - INFO - step: 35370 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:15:15,532 - root - INFO - lr: 6.5008e-06 gnorm: 1.21 [21:41:04< 2:50:18] +[titan] 2025-10-05 20:15:26,422 - root - INFO - step: 35375 loss: 1.9139 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 20:15:26,422 - root - INFO - lr: 6.4976e-06 gnorm: 1.20 [21:41:14< 2:50:07] +[titan] 2025-10-05 20:15:37,640 - root - INFO - step: 35380 loss: 1.9110 memory: 118.84GiB(85.28%) tps: 29,212 tflops: 405.27 mfu: 40.98% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:15:37,640 - root - INFO - lr: 6.4944e-06 gnorm: 1.19 [21:41:26< 2:49:56] +[titan] 2025-10-05 20:15:48,502 - root - INFO - step: 35385 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6841 +[titan] 2025-10-05 20:15:48,502 - root - INFO - lr: 6.4912e-06 gnorm: 1.22 [21:41:36< 2:49:45] +[titan] 2025-10-05 20:15:59,387 - root - INFO - step: 35390 loss: 1.9078 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 20:15:59,387 - root - INFO - lr: 6.4880e-06 gnorm: 1.27 [21:41:47< 2:49:34] +[titan] 2025-10-05 20:16:10,282 - root - INFO - step: 35395 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7479 +[titan] 2025-10-05 20:16:10,282 - root - INFO - lr: 6.4848e-06 gnorm: 1.21 [21:41:58< 2:49:23] +[titan] 2025-10-05 20:16:18,973 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:16:21,149 - root - INFO - step: 35400 loss: 1.8914 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6724 +[titan] 2025-10-05 20:16:21,149 - root - INFO - lr: 6.4816e-06 gnorm: 1.20 [21:42:09< 2:49:12] +[titan] 2025-10-05 20:16:32,038 - root - INFO - step: 35405 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7194 +[titan] 2025-10-05 20:16:32,038 - root - INFO - lr: 6.4784e-06 gnorm: 1.23 [21:42:20< 2:49:01] +[titan] 2025-10-05 20:16:42,971 - root - INFO - step: 35410 loss: 1.9290 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:16:42,971 - root - INFO - lr: 6.4752e-06 gnorm: 1.20 [21:42:31< 2:48:50] +[titan] 2025-10-05 20:16:53,840 - root - INFO - step: 35415 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:16:53,840 - root - INFO - lr: 6.4721e-06 gnorm: 1.17 [21:42:42< 2:48:39] +[titan] 2025-10-05 20:17:04,705 - root - INFO - step: 35420 loss: 1.9333 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:17:04,705 - root - INFO - lr: 6.4689e-06 gnorm: 1.23 [21:42:53< 2:48:28] +[titan] 2025-10-05 20:17:15,612 - root - INFO - step: 35425 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.15% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 20:17:15,612 - root - INFO - lr: 6.4657e-06 gnorm: 1.20 [21:43:04< 2:48:17] +[titan] 2025-10-05 20:17:26,485 - root - INFO - step: 35430 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:17:26,486 - root - INFO - lr: 6.4625e-06 gnorm: 1.18 [21:43:14< 2:48:06] +[titan] 2025-10-05 20:17:37,354 - root - INFO - step: 35435 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:17:37,354 - root - INFO - lr: 6.4594e-06 gnorm: 1.23 [21:43:25< 2:47:55] +[titan] 2025-10-05 20:17:48,271 - root - INFO - step: 35440 loss: 1.9162 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:17:48,271 - root - INFO - lr: 6.4562e-06 gnorm: 1.18 [21:43:36< 2:47:44] +[titan] 2025-10-05 20:17:59,161 - root - INFO - step: 35445 loss: 1.9393 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:17:59,161 - root - INFO - lr: 6.4531e-06 gnorm: 1.21 [21:43:47< 2:47:32] +[titan] 2025-10-05 20:18:07,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:18:10,014 - root - INFO - step: 35450 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:18:10,014 - root - INFO - lr: 6.4499e-06 gnorm: 1.20 [21:43:58< 2:47:21] +[titan] 2025-10-05 20:18:20,865 - root - INFO - step: 35455 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 20:18:20,865 - root - INFO - lr: 6.4468e-06 gnorm: 1.25 [21:44:09< 2:47:10] +[titan] 2025-10-05 20:18:31,752 - root - INFO - step: 35460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.6977 +[titan] 2025-10-05 20:18:31,752 - root - INFO - lr: 6.4436e-06 gnorm: 1.27 [21:44:20< 2:46:59] +[titan] 2025-10-05 20:18:42,672 - root - INFO - step: 35465 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:18:42,672 - root - INFO - lr: 6.4405e-06 gnorm: 1.21 [21:44:31< 2:46:48] +[titan] 2025-10-05 20:18:53,523 - root - INFO - step: 35470 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 20:18:53,523 - root - INFO - lr: 6.4373e-06 gnorm: 1.22 [21:44:42< 2:46:37] +[titan] 2025-10-05 20:19:04,397 - root - INFO - step: 35475 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 20:19:04,397 - root - INFO - lr: 6.4342e-06 gnorm: 1.20 [21:44:52< 2:46:26] +[titan] 2025-10-05 20:19:15,272 - root - INFO - step: 35480 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:19:15,272 - root - INFO - lr: 6.4311e-06 gnorm: 1.15 [21:45:03< 2:46:15] +[titan] 2025-10-05 20:19:26,134 - root - INFO - step: 35485 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6964 +[titan] 2025-10-05 20:19:26,134 - root - INFO - lr: 6.4279e-06 gnorm: 1.22 [21:45:14< 2:46:04] +[titan] 2025-10-05 20:19:37,003 - root - INFO - step: 35490 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:19:37,003 - root - INFO - lr: 6.4248e-06 gnorm: 1.22 [21:45:25< 2:45:53] +[titan] 2025-10-05 20:19:48,060 - root - INFO - step: 35495 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 29,637 tflops: 411.17 mfu: 41.57% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:19:48,060 - root - INFO - lr: 6.4217e-06 gnorm: 1.17 [21:45:36< 2:45:42] +[titan] 2025-10-05 20:19:56,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:19:58,930 - root - INFO - step: 35500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 20:19:58,930 - root - INFO - lr: 6.4186e-06 gnorm: 1.23 [21:45:47< 2:45:31] +[titan] 2025-10-05 20:20:09,779 - root - INFO - step: 35505 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:20:09,780 - root - INFO - lr: 6.4154e-06 gnorm: 1.19 [21:45:58< 2:45:20] +[titan] 2025-10-05 20:20:20,670 - root - INFO - step: 35510 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:20:20,671 - root - INFO - lr: 6.4123e-06 gnorm: 1.22 [21:46:09< 2:45:09] +[titan] 2025-10-05 20:20:31,543 - root - INFO - step: 35515 loss: 1.8943 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 20:20:31,543 - root - INFO - lr: 6.4092e-06 gnorm: 1.24 [21:46:20< 2:44:58] +[titan] 2025-10-05 20:20:42,433 - root - INFO - step: 35520 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:20:42,433 - root - INFO - lr: 6.4061e-06 gnorm: 1.20 [21:46:30< 2:44:47] +[titan] 2025-10-05 20:20:53,334 - root - INFO - step: 35525 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7142 +[titan] 2025-10-05 20:20:53,334 - root - INFO - lr: 6.4030e-06 gnorm: 1.24 [21:46:41< 2:44:36] +[titan] 2025-10-05 20:21:04,211 - root - INFO - step: 35530 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7544 +[titan] 2025-10-05 20:21:04,211 - root - INFO - lr: 6.3999e-06 gnorm: 1.20 [21:46:52< 2:44:25] +[titan] 2025-10-05 20:21:15,077 - root - INFO - step: 35535 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 20:21:15,077 - root - INFO - lr: 6.3968e-06 gnorm: 1.22 [21:47:03< 2:44:13] +[titan] 2025-10-05 20:21:25,947 - root - INFO - step: 35540 loss: 2.0043 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 20:21:25,947 - root - INFO - lr: 6.3937e-06 gnorm: 1.23 [21:47:14< 2:44:02] +[titan] 2025-10-05 20:21:36,813 - root - INFO - step: 35545 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:21:36,813 - root - INFO - lr: 6.3906e-06 gnorm: 1.26 [21:47:25< 2:43:51] +[titan] 2025-10-05 20:21:45,547 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:21:47,733 - root - INFO - step: 35550 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 20:21:47,733 - root - INFO - lr: 6.3875e-06 gnorm: 1.29 [21:47:36< 2:43:40] +[titan] 2025-10-05 20:21:58,624 - root - INFO - step: 35555 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6918 +[titan] 2025-10-05 20:21:58,624 - root - INFO - lr: 6.3845e-06 gnorm: 1.20 [21:47:47< 2:43:29] +[titan] 2025-10-05 20:22:09,503 - root - INFO - step: 35560 loss: 1.8840 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 20:22:09,503 - root - INFO - lr: 6.3814e-06 gnorm: 1.18 [21:47:57< 2:43:18] +[titan] 2025-10-05 20:22:20,399 - root - INFO - step: 35565 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7435 +[titan] 2025-10-05 20:22:20,399 - root - INFO - lr: 6.3783e-06 gnorm: 1.22 [21:48:08< 2:43:07] +[titan] 2025-10-05 20:22:31,288 - root - INFO - step: 35570 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 20:22:31,288 - root - INFO - lr: 6.3752e-06 gnorm: 1.21 [21:48:19< 2:42:56] +[titan] 2025-10-05 20:22:42,161 - root - INFO - step: 35575 loss: 1.9928 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 20:22:42,162 - root - INFO - lr: 6.3722e-06 gnorm: 1.31 [21:48:30< 2:42:45] +[titan] 2025-10-05 20:22:53,096 - root - INFO - step: 35580 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 20:22:53,096 - root - INFO - lr: 6.3691e-06 gnorm: 1.25 [21:48:41< 2:42:34] +[titan] 2025-10-05 20:23:04,005 - root - INFO - step: 35585 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 20:23:04,005 - root - INFO - lr: 6.3660e-06 gnorm: 1.22 [21:48:52< 2:42:23] +[titan] 2025-10-05 20:23:14,874 - root - INFO - step: 35590 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6686 +[titan] 2025-10-05 20:23:14,874 - root - INFO - lr: 6.3630e-06 gnorm: 1.15 [21:49:03< 2:42:12] +[titan] 2025-10-05 20:23:25,760 - root - INFO - step: 35595 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 20:23:25,760 - root - INFO - lr: 6.3599e-06 gnorm: 1.26 [21:49:14< 2:42:01] +[titan] 2025-10-05 20:23:34,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:23:36,637 - root - INFO - step: 35600 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:23:36,637 - root - INFO - lr: 6.3568e-06 gnorm: 1.19 [21:49:25< 2:41:50] +[titan] 2025-10-05 20:23:47,564 - root - INFO - step: 35605 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 20:23:47,564 - root - INFO - lr: 6.3538e-06 gnorm: 1.18 [21:49:36< 2:41:39] +[titan] 2025-10-05 20:23:58,425 - root - INFO - step: 35610 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 20:23:58,425 - root - INFO - lr: 6.3508e-06 gnorm: 1.20 [21:49:46< 2:41:28] +[titan] 2025-10-05 20:24:09,278 - root - INFO - step: 35615 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 20:24:09,278 - root - INFO - lr: 6.3477e-06 gnorm: 1.21 [21:49:57< 2:41:17] +[titan] 2025-10-05 20:24:20,176 - root - INFO - step: 35620 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 20:24:20,176 - root - INFO - lr: 6.3447e-06 gnorm: 1.21 [21:50:08< 2:41:06] +[titan] 2025-10-05 20:24:31,048 - root - INFO - step: 35625 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7146 +[titan] 2025-10-05 20:24:31,049 - root - INFO - lr: 6.3416e-06 gnorm: 1.18 [21:50:19< 2:40:55] +[titan] 2025-10-05 20:24:41,914 - root - INFO - step: 35630 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 20:24:41,914 - root - INFO - lr: 6.3386e-06 gnorm: 1.22 [21:50:30< 2:40:43] +[titan] 2025-10-05 20:24:52,829 - root - INFO - step: 35635 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:24:52,829 - root - INFO - lr: 6.3356e-06 gnorm: 1.18 [21:50:41< 2:40:32] +[titan] 2025-10-05 20:25:03,707 - root - INFO - step: 35640 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:25:03,707 - root - INFO - lr: 6.3325e-06 gnorm: 1.23 [21:50:52< 2:40:21] +[titan] 2025-10-05 20:25:14,555 - root - INFO - step: 35645 loss: 1.8684 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 20:25:14,556 - root - INFO - lr: 6.3295e-06 gnorm: 1.19 [21:51:03< 2:40:10] +[titan] 2025-10-05 20:25:23,267 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:25:25,451 - root - INFO - step: 35650 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7187 +[titan] 2025-10-05 20:25:25,451 - root - INFO - lr: 6.3265e-06 gnorm: 1.20 [21:51:13< 2:39:59] +[titan] 2025-10-05 20:25:36,291 - root - INFO - step: 35655 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 20:25:36,291 - root - INFO - lr: 6.3235e-06 gnorm: 1.22 [21:51:24< 2:39:48] +[titan] 2025-10-05 20:25:47,198 - root - INFO - step: 35660 loss: 1.9669 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 20:25:47,199 - root - INFO - lr: 6.3205e-06 gnorm: 1.19 [21:51:35< 2:39:37] +[titan] 2025-10-05 20:25:58,057 - root - INFO - step: 35665 loss: 1.9343 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 20:25:58,057 - root - INFO - lr: 6.3174e-06 gnorm: 1.22 [21:51:46< 2:39:26] +[titan] 2025-10-05 20:26:08,933 - root - INFO - step: 35670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:26:08,934 - root - INFO - lr: 6.3144e-06 gnorm: 1.20 [21:51:57< 2:39:15] +[titan] 2025-10-05 20:26:19,799 - root - INFO - step: 35675 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 20:26:19,799 - root - INFO - lr: 6.3114e-06 gnorm: 1.25 [21:52:08< 2:39:04] +[titan] 2025-10-05 20:26:30,670 - root - INFO - step: 35680 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 20:26:30,670 - root - INFO - lr: 6.3084e-06 gnorm: 1.22 [21:52:19< 2:38:53] +[titan] 2025-10-05 20:26:41,581 - root - INFO - step: 35685 loss: 2.0069 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 20:26:41,581 - root - INFO - lr: 6.3054e-06 gnorm: 1.29 [21:52:30< 2:38:42] +[titan] 2025-10-05 20:26:52,517 - root - INFO - step: 35690 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:26:52,517 - root - INFO - lr: 6.3024e-06 gnorm: 1.23 [21:52:40< 2:38:31] +[titan] 2025-10-05 20:27:03,395 - root - INFO - step: 35695 loss: 1.9599 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7330 +[titan] 2025-10-05 20:27:03,395 - root - INFO - lr: 6.2995e-06 gnorm: 1.23 [21:52:51< 2:38:20] +[titan] 2025-10-05 20:27:12,077 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:27:14,268 - root - INFO - step: 35700 loss: 1.9472 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 20:27:14,268 - root - INFO - lr: 6.2965e-06 gnorm: 1.23 [21:53:02< 2:38:09] +[titan] 2025-10-05 20:27:25,130 - root - INFO - step: 35705 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:27:25,130 - root - INFO - lr: 6.2935e-06 gnorm: 1.19 [21:53:13< 2:37:58] +[titan] 2025-10-05 20:27:35,993 - root - INFO - step: 35710 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 20:27:35,993 - root - INFO - lr: 6.2905e-06 gnorm: 1.28 [21:53:24< 2:37:47] +[titan] 2025-10-05 20:27:46,902 - root - INFO - step: 35715 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.7053 +[titan] 2025-10-05 20:27:46,902 - root - INFO - lr: 6.2875e-06 gnorm: 1.23 [21:53:35< 2:37:36] +[titan] 2025-10-05 20:27:57,813 - root - INFO - step: 35720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 20:27:57,813 - root - INFO - lr: 6.2846e-06 gnorm: 1.21 [21:53:46< 2:37:25] +[titan] 2025-10-05 20:28:08,684 - root - INFO - step: 35725 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 20:28:08,685 - root - INFO - lr: 6.2816e-06 gnorm: 1.21 [21:53:57< 2:37:13] +[titan] 2025-10-05 20:28:19,553 - root - INFO - step: 35730 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 20:28:19,553 - root - INFO - lr: 6.2786e-06 gnorm: 1.19 [21:54:08< 2:37:02] +[titan] 2025-10-05 20:28:30,452 - root - INFO - step: 35735 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 20:28:30,452 - root - INFO - lr: 6.2756e-06 gnorm: 1.25 [21:54:18< 2:36:51] +[titan] 2025-10-05 20:28:41,341 - root - INFO - step: 35740 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 20:28:41,341 - root - INFO - lr: 6.2727e-06 gnorm: 1.26 [21:54:29< 2:36:40] +[titan] 2025-10-05 20:28:52,320 - root - INFO - step: 35745 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.09 mfu: 41.87% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6496 +[titan] 2025-10-05 20:28:52,320 - root - INFO - lr: 6.2697e-06 gnorm: 1.19 [21:54:40< 2:36:29] +[titan] 2025-10-05 20:29:01,029 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:29:03,215 - root - INFO - step: 35750 loss: 1.8998 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 20:29:03,215 - root - INFO - lr: 6.2668e-06 gnorm: 1.23 [21:54:51< 2:36:18] +[titan] 2025-10-05 20:29:14,102 - root - INFO - step: 35755 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 20:29:14,102 - root - INFO - lr: 6.2638e-06 gnorm: 1.25 [21:55:02< 2:36:07] +[titan] 2025-10-05 20:29:24,977 - root - INFO - step: 35760 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 20:29:24,977 - root - INFO - lr: 6.2609e-06 gnorm: 1.19 [21:55:13< 2:35:56] +[titan] 2025-10-05 20:29:35,865 - root - INFO - step: 35765 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 20:29:35,865 - root - INFO - lr: 6.2579e-06 gnorm: 1.20 [21:55:24< 2:35:45] +[titan] 2025-10-05 20:29:46,743 - root - INFO - step: 35770 loss: 1.9516 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 20:29:46,743 - root - INFO - lr: 6.2550e-06 gnorm: 1.22 [21:55:35< 2:35:34] +[titan] 2025-10-05 20:29:57,662 - root - INFO - step: 35775 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 20:29:57,662 - root - INFO - lr: 6.2521e-06 gnorm: 1.24 [21:55:46< 2:35:23] +[titan] 2025-10-05 20:30:08,549 - root - INFO - step: 35780 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 20:30:08,549 - root - INFO - lr: 6.2491e-06 gnorm: 1.24 [21:55:57< 2:35:12] +[titan] 2025-10-05 20:30:19,428 - root - INFO - step: 35785 loss: 2.0119 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 20:30:19,428 - root - INFO - lr: 6.2462e-06 gnorm: 1.22 [21:56:07< 2:35:01] +[titan] 2025-10-05 20:30:30,298 - root - INFO - step: 35790 loss: 1.8995 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 20:30:30,298 - root - INFO - lr: 6.2433e-06 gnorm: 1.20 [21:56:18< 2:34:50] +[titan] 2025-10-05 20:30:41,166 - root - INFO - step: 35795 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 20:30:41,166 - root - INFO - lr: 6.2403e-06 gnorm: 1.22 [21:56:29< 2:34:39] +[titan] 2025-10-05 20:30:49,930 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:30:52,145 - root - INFO - step: 35800 loss: 1.8719 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.08 mfu: 41.87% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6562 +[titan] 2025-10-05 20:30:52,145 - root - INFO - lr: 6.2374e-06 gnorm: 1.18 [21:56:40< 2:34:28] +[titan] 2025-10-05 20:31:03,002 - root - INFO - step: 35805 loss: 1.8418 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6290 +[titan] 2025-10-05 20:31:03,002 - root - INFO - lr: 6.2345e-06 gnorm: 1.17 [21:56:51< 2:34:17] +[titan] 2025-10-05 20:31:13,902 - root - INFO - step: 35810 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 20:31:13,902 - root - INFO - lr: 6.2316e-06 gnorm: 1.20 [21:57:02< 2:34:06] +[titan] 2025-10-05 20:31:24,763 - root - INFO - step: 35815 loss: 1.8766 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:31:24,763 - root - INFO - lr: 6.2287e-06 gnorm: 1.21 [21:57:13< 2:33:55] +[titan] 2025-10-05 20:31:35,622 - root - INFO - step: 35820 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 20:31:35,623 - root - INFO - lr: 6.2258e-06 gnorm: 1.20 [21:57:24< 2:33:44] +[titan] 2025-10-05 20:31:46,463 - root - INFO - step: 35825 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 20:31:46,464 - root - INFO - lr: 6.2229e-06 gnorm: 1.23 [21:57:34< 2:33:32] +[titan] 2025-10-05 20:31:57,412 - root - INFO - step: 35830 loss: 1.8980 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 20:31:57,412 - root - INFO - lr: 6.2200e-06 gnorm: 1.20 [21:57:45< 2:33:21] +[titan] 2025-10-05 20:32:08,273 - root - INFO - step: 35835 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6616 +[titan] 2025-10-05 20:32:08,273 - root - INFO - lr: 6.2171e-06 gnorm: 1.17 [21:57:56< 2:33:10] +[titan] 2025-10-05 20:32:19,226 - root - INFO - step: 35840 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 20:32:19,227 - root - INFO - lr: 6.2142e-06 gnorm: 1.24 [21:58:07< 2:32:59] +[titan] 2025-10-05 20:32:19,412 - root - INFO - Dumping profiler traces at step 35840 +[titan] 2025-10-05 20:32:19,449 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:32:30,380 - root - INFO - step: 35845 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,379 tflops: 407.58 mfu: 41.21% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 20:32:30,381 - root - INFO - lr: 6.2113e-06 gnorm: 1.21 [21:58:18< 2:32:48] +[titan] 2025-10-05 20:32:39,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:32:41,258 - root - INFO - step: 35850 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:32:41,258 - root - INFO - lr: 6.2084e-06 gnorm: 1.18 [21:58:29< 2:32:37] +[titan] 2025-10-05 20:32:52,159 - root - INFO - step: 35855 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 20:32:52,159 - root - INFO - lr: 6.2055e-06 gnorm: 1.21 [21:58:40< 2:32:26] +[titan] 2025-10-05 20:33:03,040 - root - INFO - step: 35860 loss: 1.9254 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:33:03,041 - root - INFO - lr: 6.2026e-06 gnorm: 1.22 [21:58:51< 2:32:15] +[titan] 2025-10-05 20:33:13,902 - root - INFO - step: 35865 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7075 +[titan] 2025-10-05 20:33:13,902 - root - INFO - lr: 6.1998e-06 gnorm: 1.24 [21:59:02< 2:32:04] +[titan] 2025-10-05 20:33:24,764 - root - INFO - step: 35870 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 20:33:24,764 - root - INFO - lr: 6.1969e-06 gnorm: 1.24 [21:59:13< 2:31:53] +[titan] 2025-10-05 20:33:35,665 - root - INFO - step: 35875 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:33:35,666 - root - INFO - lr: 6.1940e-06 gnorm: 1.20 [21:59:24< 2:31:42] +[titan] 2025-10-05 20:33:46,542 - root - INFO - step: 35880 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:33:46,542 - root - INFO - lr: 6.1911e-06 gnorm: 1.22 [21:59:34< 2:31:31] +[titan] 2025-10-05 20:33:57,456 - root - INFO - step: 35885 loss: 1.9215 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 20:33:57,456 - root - INFO - lr: 6.1883e-06 gnorm: 1.21 [21:59:45< 2:31:20] +[titan] 2025-10-05 20:34:08,320 - root - INFO - step: 35890 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 20:34:08,321 - root - INFO - lr: 6.1854e-06 gnorm: 1.23 [21:59:56< 2:31:09] +[titan] 2025-10-05 20:34:19,183 - root - INFO - step: 35895 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 20:34:19,183 - root - INFO - lr: 6.1826e-06 gnorm: 1.20 [22:00:07< 2:30:58] +[titan] 2025-10-05 20:34:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:34:30,044 - root - INFO - step: 35900 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 20:34:30,044 - root - INFO - lr: 6.1797e-06 gnorm: 1.24 [22:00:18< 2:30:47] +[titan] 2025-10-05 20:34:40,962 - root - INFO - step: 35905 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 20:34:40,962 - root - INFO - lr: 6.1769e-06 gnorm: 1.21 [22:00:29< 2:30:36] +[titan] 2025-10-05 20:34:51,814 - root - INFO - step: 35910 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 20:34:51,814 - root - INFO - lr: 6.1740e-06 gnorm: 1.20 [22:00:40< 2:30:25] +[titan] 2025-10-05 20:35:02,707 - root - INFO - step: 35915 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7636 +[titan] 2025-10-05 20:35:02,708 - root - INFO - lr: 6.1712e-06 gnorm: 1.25 [22:00:51< 2:30:14] +[titan] 2025-10-05 20:35:13,561 - root - INFO - step: 35920 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 20:35:13,561 - root - INFO - lr: 6.1683e-06 gnorm: 1.21 [22:01:02< 2:30:03] +[titan] 2025-10-05 20:35:24,429 - root - INFO - step: 35925 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 20:35:24,429 - root - INFO - lr: 6.1655e-06 gnorm: 1.21 [22:01:12< 2:29:51] +[titan] 2025-10-05 20:35:35,298 - root - INFO - step: 35930 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7410 +[titan] 2025-10-05 20:35:35,298 - root - INFO - lr: 6.1627e-06 gnorm: 1.20 [22:01:23< 2:29:40] +[titan] 2025-10-05 20:35:46,161 - root - INFO - step: 35935 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6568 +[titan] 2025-10-05 20:35:46,161 - root - INFO - lr: 6.1598e-06 gnorm: 1.22 [22:01:34< 2:29:29] +[titan] 2025-10-05 20:35:57,095 - root - INFO - step: 35940 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7140 +[titan] 2025-10-05 20:35:57,096 - root - INFO - lr: 6.1570e-06 gnorm: 1.21 [22:01:45< 2:29:18] +[titan] 2025-10-05 20:36:07,977 - root - INFO - step: 35945 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 20:36:07,977 - root - INFO - lr: 6.1542e-06 gnorm: 1.23 [22:01:56< 2:29:07] +[titan] 2025-10-05 20:36:16,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:36:18,851 - root - INFO - step: 35950 loss: 1.8140 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6041 +[titan] 2025-10-05 20:36:18,851 - root - INFO - lr: 6.1514e-06 gnorm: 1.21 [22:02:07< 2:28:56] +[titan] 2025-10-05 20:36:29,728 - root - INFO - step: 35955 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:36:29,728 - root - INFO - lr: 6.1485e-06 gnorm: 1.18 [22:02:18< 2:28:45] +[titan] 2025-10-05 20:36:40,603 - root - INFO - step: 35960 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 20:36:40,603 - root - INFO - lr: 6.1457e-06 gnorm: 1.20 [22:02:29< 2:28:34] +[titan] 2025-10-05 20:36:51,466 - root - INFO - step: 35965 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7135 +[titan] 2025-10-05 20:36:51,466 - root - INFO - lr: 6.1429e-06 gnorm: 1.21 [22:02:39< 2:28:23] +[titan] 2025-10-05 20:37:02,418 - root - INFO - step: 35970 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 20:37:02,418 - root - INFO - lr: 6.1401e-06 gnorm: 1.17 [22:02:50< 2:28:12] +[titan] 2025-10-05 20:37:13,277 - root - INFO - step: 35975 loss: 1.9766 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:37:13,277 - root - INFO - lr: 6.1373e-06 gnorm: 1.23 [22:03:01< 2:28:01] +[titan] 2025-10-05 20:37:24,151 - root - INFO - step: 35980 loss: 1.9461 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:37:24,151 - root - INFO - lr: 6.1345e-06 gnorm: 1.17 [22:03:12< 2:27:50] +[titan] 2025-10-05 20:37:34,997 - root - INFO - step: 35985 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6927 +[titan] 2025-10-05 20:37:34,997 - root - INFO - lr: 6.1317e-06 gnorm: 1.19 [22:03:23< 2:27:39] +[titan] 2025-10-05 20:37:45,857 - root - INFO - step: 35990 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:37:45,857 - root - INFO - lr: 6.1289e-06 gnorm: 1.21 [22:03:34< 2:27:28] +[titan] 2025-10-05 20:37:56,761 - root - INFO - step: 35995 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 20:37:56,761 - root - INFO - lr: 6.1261e-06 gnorm: 1.22 [22:03:45< 2:27:17] +[titan] 2025-10-05 20:38:05,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:38:07,602 - root - INFO - step: 36000 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 20:38:07,602 - root - INFO - lr: 6.1233e-06 gnorm: 1.24 [22:03:56< 2:27:06] +[titan] 2025-10-05 20:38:18,465 - root - INFO - step: 36005 loss: 1.8959 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 20:38:18,465 - root - INFO - lr: 6.1206e-06 gnorm: 1.22 [22:04:06< 2:26:55] +[titan] 2025-10-05 20:38:29,352 - root - INFO - step: 36010 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:38:29,352 - root - INFO - lr: 6.1178e-06 gnorm: 1.19 [22:04:17< 2:26:44] +[titan] 2025-10-05 20:38:40,197 - root - INFO - step: 36015 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 20:38:40,197 - root - INFO - lr: 6.1150e-06 gnorm: 1.22 [22:04:28< 2:26:33] +[titan] 2025-10-05 20:38:51,058 - root - INFO - step: 36020 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 20:38:51,058 - root - INFO - lr: 6.1122e-06 gnorm: 1.21 [22:04:39< 2:26:22] +[titan] 2025-10-05 20:39:01,952 - root - INFO - step: 36025 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6606 +[titan] 2025-10-05 20:39:01,952 - root - INFO - lr: 6.1095e-06 gnorm: 1.20 [22:04:50< 2:26:10] +[titan] 2025-10-05 20:39:12,835 - root - INFO - step: 36030 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 20:39:12,835 - root - INFO - lr: 6.1067e-06 gnorm: 1.25 [22:05:01< 2:25:59] +[titan] 2025-10-05 20:39:23,710 - root - INFO - step: 36035 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 20:39:23,711 - root - INFO - lr: 6.1039e-06 gnorm: 1.23 [22:05:12< 2:25:48] +[titan] 2025-10-05 20:39:34,575 - root - INFO - step: 36040 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 20:39:34,575 - root - INFO - lr: 6.1012e-06 gnorm: 1.23 [22:05:23< 2:25:37] +[titan] 2025-10-05 20:39:45,433 - root - INFO - step: 36045 loss: 1.8945 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6757 +[titan] 2025-10-05 20:39:45,434 - root - INFO - lr: 6.0984e-06 gnorm: 1.20 [22:05:33< 2:25:26] +[titan] 2025-10-05 20:39:54,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:39:56,290 - root - INFO - step: 36050 loss: 1.9349 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7112 +[titan] 2025-10-05 20:39:56,290 - root - INFO - lr: 6.0957e-06 gnorm: 1.20 [22:05:44< 2:25:15] +[titan] 2025-10-05 20:40:07,175 - root - INFO - step: 36055 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 20:40:07,175 - root - INFO - lr: 6.0929e-06 gnorm: 1.26 [22:05:55< 2:25:04] +[titan] 2025-10-05 20:40:18,043 - root - INFO - step: 36060 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:40:18,044 - root - INFO - lr: 6.0902e-06 gnorm: 1.22 [22:06:06< 2:24:53] +[titan] 2025-10-05 20:40:28,916 - root - INFO - step: 36065 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7253 +[titan] 2025-10-05 20:40:28,916 - root - INFO - lr: 6.0874e-06 gnorm: 1.21 [22:06:17< 2:24:42] +[titan] 2025-10-05 20:40:39,778 - root - INFO - step: 36070 loss: 1.8531 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6374 +[titan] 2025-10-05 20:40:39,778 - root - INFO - lr: 6.0847e-06 gnorm: 1.21 [22:06:28< 2:24:31] +[titan] 2025-10-05 20:40:50,621 - root - INFO - step: 36075 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 20:40:50,621 - root - INFO - lr: 6.0820e-06 gnorm: 1.21 [22:06:39< 2:24:20] +[titan] 2025-10-05 20:41:01,488 - root - INFO - step: 36080 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 20:41:01,489 - root - INFO - lr: 6.0792e-06 gnorm: 1.24 [22:06:49< 2:24:09] +[titan] 2025-10-05 20:41:12,335 - root - INFO - step: 36085 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 20:41:12,335 - root - INFO - lr: 6.0765e-06 gnorm: 1.21 [22:07:00< 2:23:58] +[titan] 2025-10-05 20:41:23,167 - root - INFO - step: 36090 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 20:41:23,168 - root - INFO - lr: 6.0738e-06 gnorm: 1.24 [22:07:11< 2:23:47] +[titan] 2025-10-05 20:41:34,043 - root - INFO - step: 36095 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:41:34,044 - root - INFO - lr: 6.0710e-06 gnorm: 1.26 [22:07:22< 2:23:36] +[titan] 2025-10-05 20:41:42,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:41:44,911 - root - INFO - step: 36100 loss: 1.9238 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7003 +[titan] 2025-10-05 20:41:44,911 - root - INFO - lr: 6.0683e-06 gnorm: 1.23 [22:07:33< 2:23:25] +[titan] 2025-10-05 20:41:55,794 - root - INFO - step: 36105 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 20:41:55,794 - root - INFO - lr: 6.0656e-06 gnorm: 1.18 [22:07:44< 2:23:14] +[titan] 2025-10-05 20:42:06,656 - root - INFO - step: 36110 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 20:42:06,657 - root - INFO - lr: 6.0629e-06 gnorm: 1.22 [22:07:55< 2:23:03] +[titan] 2025-10-05 20:42:17,515 - root - INFO - step: 36115 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 20:42:17,515 - root - INFO - lr: 6.0602e-06 gnorm: 1.22 [22:08:05< 2:22:52] +[titan] 2025-10-05 20:42:28,350 - root - INFO - step: 36120 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 20:42:28,350 - root - INFO - lr: 6.0575e-06 gnorm: 1.23 [22:08:16< 2:22:41] +[titan] 2025-10-05 20:42:39,197 - root - INFO - step: 36125 loss: 1.8516 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 20:42:39,197 - root - INFO - lr: 6.0548e-06 gnorm: 1.24 [22:08:27< 2:22:29] +[titan] 2025-10-05 20:42:50,082 - root - INFO - step: 36130 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 20:42:50,083 - root - INFO - lr: 6.0521e-06 gnorm: 1.22 [22:08:38< 2:22:18] +[titan] 2025-10-05 20:43:00,986 - root - INFO - step: 36135 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:43:00,987 - root - INFO - lr: 6.0494e-06 gnorm: 1.22 [22:08:49< 2:22:07] +[titan] 2025-10-05 20:43:11,851 - root - INFO - step: 36140 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 20:43:11,851 - root - INFO - lr: 6.0467e-06 gnorm: 1.89 [22:09:00< 2:21:56] +[titan] 2025-10-05 20:43:22,694 - root - INFO - step: 36145 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 20:43:22,694 - root - INFO - lr: 6.0440e-06 gnorm: 1.18 [22:09:11< 2:21:45] +[titan] 2025-10-05 20:43:31,365 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:43:33,546 - root - INFO - step: 36150 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:43:33,547 - root - INFO - lr: 6.0413e-06 gnorm: 1.25 [22:09:21< 2:21:34] +[titan] 2025-10-05 20:43:44,389 - root - INFO - step: 36155 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 20:43:44,389 - root - INFO - lr: 6.0386e-06 gnorm: 1.20 [22:09:32< 2:21:23] +[titan] 2025-10-05 20:43:55,248 - root - INFO - step: 36160 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 20:43:55,248 - root - INFO - lr: 6.0360e-06 gnorm: 1.26 [22:09:43< 2:21:12] +[titan] 2025-10-05 20:44:06,157 - root - INFO - step: 36165 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:06,158 - root - INFO - lr: 6.0333e-06 gnorm: 1.22 [22:09:54< 2:21:01] +[titan] 2025-10-05 20:44:17,014 - root - INFO - step: 36170 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 20:44:17,014 - root - INFO - lr: 6.0306e-06 gnorm: 1.24 [22:10:05< 2:20:50] +[titan] 2025-10-05 20:44:27,855 - root - INFO - step: 36175 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:27,855 - root - INFO - lr: 6.0279e-06 gnorm: 1.23 [22:10:16< 2:20:39] +[titan] 2025-10-05 20:44:38,720 - root - INFO - step: 36180 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:44:38,720 - root - INFO - lr: 6.0253e-06 gnorm: 1.23 [22:10:27< 2:20:28] +[titan] 2025-10-05 20:44:49,571 - root - INFO - step: 36185 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:44:49,571 - root - INFO - lr: 6.0226e-06 gnorm: 1.24 [22:10:37< 2:20:17] +[titan] 2025-10-05 20:45:00,410 - root - INFO - step: 36190 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 20:45:00,411 - root - INFO - lr: 6.0200e-06 gnorm: 1.29 [22:10:48< 2:20:06] +[titan] 2025-10-05 20:45:11,321 - root - INFO - step: 36195 loss: 1.8986 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6794 +[titan] 2025-10-05 20:45:11,321 - root - INFO - lr: 6.0173e-06 gnorm: 1.22 [22:10:59< 2:19:55] +[titan] 2025-10-05 20:45:19,982 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:45:22,165 - root - INFO - step: 36200 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 20:45:22,166 - root - INFO - lr: 6.0146e-06 gnorm: 1.25 [22:11:10< 2:19:44] +[titan] 2025-10-05 20:45:33,012 - root - INFO - step: 36205 loss: 1.8677 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6514 +[titan] 2025-10-05 20:45:33,012 - root - INFO - lr: 6.0120e-06 gnorm: 1.21 [22:11:21< 2:19:33] +[titan] 2025-10-05 20:45:43,868 - root - INFO - step: 36210 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 20:45:43,868 - root - INFO - lr: 6.0094e-06 gnorm: 1.23 [22:11:32< 2:19:22] +[titan] 2025-10-05 20:45:54,736 - root - INFO - step: 36215 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6484 +[titan] 2025-10-05 20:45:54,737 - root - INFO - lr: 6.0067e-06 gnorm: 1.18 [22:11:43< 2:19:11] +[titan] 2025-10-05 20:46:05,631 - root - INFO - step: 36220 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:46:05,631 - root - INFO - lr: 6.0041e-06 gnorm: 1.19 [22:11:54< 2:19:00] +[titan] 2025-10-05 20:46:16,518 - root - INFO - step: 36225 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 20:46:16,518 - root - INFO - lr: 6.0014e-06 gnorm: 1.22 [22:12:04< 2:18:48] +[titan] 2025-10-05 20:46:27,370 - root - INFO - step: 36230 loss: 1.9836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:46:27,370 - root - INFO - lr: 5.9988e-06 gnorm: 1.19 [22:12:15< 2:18:37] +[titan] 2025-10-05 20:46:38,233 - root - INFO - step: 36235 loss: 1.8873 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 20:46:38,233 - root - INFO - lr: 5.9962e-06 gnorm: 1.22 [22:12:26< 2:18:26] +[titan] 2025-10-05 20:46:49,088 - root - INFO - step: 36240 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:46:49,088 - root - INFO - lr: 5.9936e-06 gnorm: 1.21 [22:12:37< 2:18:15] +[titan] 2025-10-05 20:46:59,957 - root - INFO - step: 36245 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6792 +[titan] 2025-10-05 20:46:59,958 - root - INFO - lr: 5.9909e-06 gnorm: 1.22 [22:12:48< 2:18:04] +[titan] 2025-10-05 20:47:08,665 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:47:10,858 - root - INFO - step: 36250 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:47:10,858 - root - INFO - lr: 5.9883e-06 gnorm: 1.19 [22:12:59< 2:17:53] +[titan] 2025-10-05 20:47:21,702 - root - INFO - step: 36255 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 20:47:21,702 - root - INFO - lr: 5.9857e-06 gnorm: 1.26 [22:13:10< 2:17:42] +[titan] 2025-10-05 20:47:32,596 - root - INFO - step: 36260 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 20:47:32,596 - root - INFO - lr: 5.9831e-06 gnorm: 1.22 [22:13:21< 2:17:31] +[titan] 2025-10-05 20:47:43,478 - root - INFO - step: 36265 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 20:47:43,478 - root - INFO - lr: 5.9805e-06 gnorm: 1.28 [22:13:31< 2:17:20] +[titan] 2025-10-05 20:47:54,366 - root - INFO - step: 36270 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:47:54,366 - root - INFO - lr: 5.9779e-06 gnorm: 1.22 [22:13:42< 2:17:09] +[titan] 2025-10-05 20:48:05,288 - root - INFO - step: 36275 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6837 +[titan] 2025-10-05 20:48:05,288 - root - INFO - lr: 5.9753e-06 gnorm: 1.22 [22:13:53< 2:16:58] +[titan] 2025-10-05 20:48:16,197 - root - INFO - step: 36280 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 20:48:16,197 - root - INFO - lr: 5.9727e-06 gnorm: 1.22 [22:14:04< 2:16:47] +[titan] 2025-10-05 20:48:27,074 - root - INFO - step: 36285 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 20:48:27,075 - root - INFO - lr: 5.9701e-06 gnorm: 1.23 [22:14:15< 2:16:36] +[titan] 2025-10-05 20:48:37,962 - root - INFO - step: 36290 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 20:48:37,962 - root - INFO - lr: 5.9675e-06 gnorm: 1.26 [22:14:26< 2:16:25] +[titan] 2025-10-05 20:48:48,831 - root - INFO - step: 36295 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 20:48:48,832 - root - INFO - lr: 5.9649e-06 gnorm: 1.22 [22:14:37< 2:16:14] +[titan] 2025-10-05 20:48:57,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:48:59,686 - root - INFO - step: 36300 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6884 +[titan] 2025-10-05 20:48:59,686 - root - INFO - lr: 5.9623e-06 gnorm: 1.23 [22:14:48< 2:16:03] +[titan] 2025-10-05 20:49:10,530 - root - INFO - step: 36305 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:49:10,530 - root - INFO - lr: 5.9597e-06 gnorm: 1.21 [22:14:58< 2:15:52] +[titan] 2025-10-05 20:49:21,373 - root - INFO - step: 36310 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7418 +[titan] 2025-10-05 20:49:21,373 - root - INFO - lr: 5.9572e-06 gnorm: 1.26 [22:15:09< 2:15:41] +[titan] 2025-10-05 20:49:32,211 - root - INFO - step: 36315 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 20:49:32,211 - root - INFO - lr: 5.9546e-06 gnorm: 1.21 [22:15:20< 2:15:30] +[titan] 2025-10-05 20:49:43,047 - root - INFO - step: 36320 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7327 +[titan] 2025-10-05 20:49:43,048 - root - INFO - lr: 5.9520e-06 gnorm: 1.23 [22:15:31< 2:15:19] +[titan] 2025-10-05 20:49:53,930 - root - INFO - step: 36325 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 20:49:53,930 - root - INFO - lr: 5.9495e-06 gnorm: 1.21 [22:15:42< 2:15:07] +[titan] 2025-10-05 20:50:04,790 - root - INFO - step: 36330 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 20:50:04,790 - root - INFO - lr: 5.9469e-06 gnorm: 1.22 [22:15:53< 2:14:56] +[titan] 2025-10-05 20:50:15,657 - root - INFO - step: 36335 loss: 1.9258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7028 +[titan] 2025-10-05 20:50:15,657 - root - INFO - lr: 5.9443e-06 gnorm: 1.28 [22:16:04< 2:14:45] +[titan] 2025-10-05 20:50:26,516 - root - INFO - step: 36340 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 20:50:26,516 - root - INFO - lr: 5.9418e-06 gnorm: 1.22 [22:16:14< 2:14:34] +[titan] 2025-10-05 20:50:37,351 - root - INFO - step: 36345 loss: 1.8859 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 20:50:37,351 - root - INFO - lr: 5.9392e-06 gnorm: 1.21 [22:16:25< 2:14:23] +[titan] 2025-10-05 20:50:46,107 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:50:48,289 - root - INFO - step: 36350 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:50:48,289 - root - INFO - lr: 5.9367e-06 gnorm: 1.26 [22:16:36< 2:14:12] +[titan] 2025-10-05 20:50:52,809 - root - INFO - Dumping profiler traces at step 36352 +[titan] 2025-10-05 20:50:52,852 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:50:59,390 - root - INFO - step: 36355 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 29,519 tflops: 409.53 mfu: 41.41% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6886 +[titan] 2025-10-05 20:50:59,390 - root - INFO - lr: 5.9341e-06 gnorm: 1.21 [22:16:47< 2:14:01] +[titan] 2025-10-05 20:51:10,256 - root - INFO - step: 36360 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6670 +[titan] 2025-10-05 20:51:10,257 - root - INFO - lr: 5.9316e-06 gnorm: 1.20 [22:16:58< 2:13:50] +[titan] 2025-10-05 20:51:21,108 - root - INFO - step: 36365 loss: 1.9715 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7430 +[titan] 2025-10-05 20:51:21,108 - root - INFO - lr: 5.9290e-06 gnorm: 1.25 [22:17:09< 2:13:39] +[titan] 2025-10-05 20:51:31,957 - root - INFO - step: 36370 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 20:51:31,957 - root - INFO - lr: 5.9265e-06 gnorm: 1.22 [22:17:20< 2:13:28] +[titan] 2025-10-05 20:51:42,813 - root - INFO - step: 36375 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:51:42,813 - root - INFO - lr: 5.9240e-06 gnorm: 1.22 [22:17:31< 2:13:17] +[titan] 2025-10-05 20:51:53,656 - root - INFO - step: 36380 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 20:51:53,656 - root - INFO - lr: 5.9214e-06 gnorm: 1.27 [22:17:42< 2:13:06] +[titan] 2025-10-05 20:52:04,533 - root - INFO - step: 36385 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7274 +[titan] 2025-10-05 20:52:04,533 - root - INFO - lr: 5.9189e-06 gnorm: 1.22 [22:17:52< 2:12:55] +[titan] 2025-10-05 20:52:15,414 - root - INFO - step: 36390 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7019 +[titan] 2025-10-05 20:52:15,414 - root - INFO - lr: 5.9164e-06 gnorm: 1.23 [22:18:03< 2:12:44] +[titan] 2025-10-05 20:52:26,295 - root - INFO - step: 36395 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 20:52:26,295 - root - INFO - lr: 5.9139e-06 gnorm: 1.21 [22:18:14< 2:12:33] +[titan] 2025-10-05 20:52:34,981 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:52:37,152 - root - INFO - step: 36400 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 20:52:37,153 - root - INFO - lr: 5.9114e-06 gnorm: 1.22 [22:18:25< 2:12:22] +[titan] 2025-10-05 20:52:48,028 - root - INFO - step: 36405 loss: 1.9539 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:52:48,028 - root - INFO - lr: 5.9088e-06 gnorm: 1.20 [22:18:36< 2:12:11] +[titan] 2025-10-05 20:52:58,901 - root - INFO - step: 36410 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:52:58,901 - root - INFO - lr: 5.9063e-06 gnorm: 1.21 [22:18:47< 2:12:00] +[titan] 2025-10-05 20:53:10,114 - root - INFO - step: 36415 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 29,223 tflops: 405.42 mfu: 40.99% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6627 +[titan] 2025-10-05 20:53:10,115 - root - INFO - lr: 5.9038e-06 gnorm: 1.21 [22:18:58< 2:11:49] +[titan] 2025-10-05 20:53:21,005 - root - INFO - step: 36420 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 20:53:21,005 - root - INFO - lr: 5.9013e-06 gnorm: 1.27 [22:19:09< 2:11:38] +[titan] 2025-10-05 20:53:31,873 - root - INFO - step: 36425 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 20:53:31,874 - root - INFO - lr: 5.8988e-06 gnorm: 1.24 [22:19:20< 2:11:27] +[titan] 2025-10-05 20:53:42,745 - root - INFO - step: 36430 loss: 1.8831 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6658 +[titan] 2025-10-05 20:53:42,745 - root - INFO - lr: 5.8963e-06 gnorm: 1.28 [22:19:31< 2:11:16] +[titan] 2025-10-05 20:53:53,613 - root - INFO - step: 36435 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 20:53:53,613 - root - INFO - lr: 5.8938e-06 gnorm: 1.23 [22:19:42< 2:11:05] +[titan] 2025-10-05 20:54:04,481 - root - INFO - step: 36440 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:54:04,481 - root - INFO - lr: 5.8914e-06 gnorm: 1.22 [22:19:52< 2:10:53] +[titan] 2025-10-05 20:54:15,378 - root - INFO - step: 36445 loss: 1.9147 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:54:15,378 - root - INFO - lr: 5.8889e-06 gnorm: 1.24 [22:20:03< 2:10:42] +[titan] 2025-10-05 20:54:24,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:54:26,273 - root - INFO - step: 36450 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 20:54:26,273 - root - INFO - lr: 5.8864e-06 gnorm: 1.25 [22:20:14< 2:10:31] +[titan] 2025-10-05 20:54:37,147 - root - INFO - step: 36455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 20:54:37,147 - root - INFO - lr: 5.8839e-06 gnorm: 1.20 [22:20:25< 2:10:20] +[titan] 2025-10-05 20:54:48,029 - root - INFO - step: 36460 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 20:54:48,029 - root - INFO - lr: 5.8814e-06 gnorm: 1.21 [22:20:36< 2:10:09] +[titan] 2025-10-05 20:54:58,890 - root - INFO - step: 36465 loss: 1.9169 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 20:54:58,890 - root - INFO - lr: 5.8790e-06 gnorm: 1.22 [22:20:47< 2:09:58] +[titan] 2025-10-05 20:55:09,764 - root - INFO - step: 36470 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6935 +[titan] 2025-10-05 20:55:09,764 - root - INFO - lr: 5.8765e-06 gnorm: 1.24 [22:20:58< 2:09:47] +[titan] 2025-10-05 20:55:20,621 - root - INFO - step: 36475 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 20:55:20,622 - root - INFO - lr: 5.8740e-06 gnorm: 1.24 [22:21:09< 2:09:36] +[titan] 2025-10-05 20:55:31,491 - root - INFO - step: 36480 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:55:31,491 - root - INFO - lr: 5.8716e-06 gnorm: 1.25 [22:21:19< 2:09:25] +[titan] 2025-10-05 20:55:42,366 - root - INFO - step: 36485 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 20:55:42,367 - root - INFO - lr: 5.8691e-06 gnorm: 1.22 [22:21:30< 2:09:14] +[titan] 2025-10-05 20:55:53,240 - root - INFO - step: 36490 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:55:53,241 - root - INFO - lr: 5.8667e-06 gnorm: 1.19 [22:21:41< 2:09:03] +[titan] 2025-10-05 20:56:04,092 - root - INFO - step: 36495 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 20:56:04,092 - root - INFO - lr: 5.8642e-06 gnorm: 1.28 [22:21:52< 2:08:52] +[titan] 2025-10-05 20:56:12,794 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:56:14,974 - root - INFO - step: 36500 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:56:14,974 - root - INFO - lr: 5.8618e-06 gnorm: 1.22 [22:22:03< 2:08:41] +[titan] 2025-10-05 20:56:25,858 - root - INFO - step: 36505 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:56:25,858 - root - INFO - lr: 5.8593e-06 gnorm: 1.21 [22:22:14< 2:08:30] +[titan] 2025-10-05 20:56:36,712 - root - INFO - step: 36510 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 20:56:36,712 - root - INFO - lr: 5.8569e-06 gnorm: 1.26 [22:22:25< 2:08:19] +[titan] 2025-10-05 20:56:47,594 - root - INFO - step: 36515 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:56:47,594 - root - INFO - lr: 5.8544e-06 gnorm: 1.24 [22:22:35< 2:08:08] +[titan] 2025-10-05 20:56:58,464 - root - INFO - step: 36520 loss: 1.8908 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6705 +[titan] 2025-10-05 20:56:58,465 - root - INFO - lr: 5.8520e-06 gnorm: 1.23 [22:22:46< 2:07:57] +[titan] 2025-10-05 20:57:09,332 - root - INFO - step: 36525 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:57:09,332 - root - INFO - lr: 5.8496e-06 gnorm: 1.21 [22:22:57< 2:07:46] +[titan] 2025-10-05 20:57:20,232 - root - INFO - step: 36530 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 20:57:20,232 - root - INFO - lr: 5.8471e-06 gnorm: 1.21 [22:23:08< 2:07:35] +[titan] 2025-10-05 20:57:31,124 - root - INFO - step: 36535 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 20:57:31,124 - root - INFO - lr: 5.8447e-06 gnorm: 1.23 [22:23:19< 2:07:24] +[titan] 2025-10-05 20:57:42,014 - root - INFO - step: 36540 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 20:57:42,014 - root - INFO - lr: 5.8423e-06 gnorm: 1.25 [22:23:30< 2:07:13] +[titan] 2025-10-05 20:57:52,927 - root - INFO - step: 36545 loss: 1.9727 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 20:57:52,928 - root - INFO - lr: 5.8399e-06 gnorm: 1.24 [22:23:41< 2:07:02] +[titan] 2025-10-05 20:58:01,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:58:03,825 - root - INFO - step: 36550 loss: 1.9288 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7049 +[titan] 2025-10-05 20:58:03,825 - root - INFO - lr: 5.8375e-06 gnorm: 1.24 [22:23:52< 2:06:50] +[titan] 2025-10-05 20:58:14,740 - root - INFO - step: 36555 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 20:58:14,741 - root - INFO - lr: 5.8351e-06 gnorm: 1.26 [22:24:03< 2:06:39] +[titan] 2025-10-05 20:58:25,614 - root - INFO - step: 36560 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 20:58:25,614 - root - INFO - lr: 5.8326e-06 gnorm: 1.18 [22:24:13< 2:06:28] +[titan] 2025-10-05 20:58:36,506 - root - INFO - step: 36565 loss: 1.8964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:58:36,506 - root - INFO - lr: 5.8302e-06 gnorm: 1.20 [22:24:24< 2:06:17] +[titan] 2025-10-05 20:58:47,389 - root - INFO - step: 36570 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 20:58:47,390 - root - INFO - lr: 5.8278e-06 gnorm: 1.25 [22:24:35< 2:06:06] +[titan] 2025-10-05 20:58:58,289 - root - INFO - step: 36575 loss: 1.9029 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:58:58,289 - root - INFO - lr: 5.8254e-06 gnorm: 1.20 [22:24:46< 2:05:55] +[titan] 2025-10-05 20:59:09,190 - root - INFO - step: 36580 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7074 +[titan] 2025-10-05 20:59:09,190 - root - INFO - lr: 5.8231e-06 gnorm: 1.24 [22:24:57< 2:05:44] +[titan] 2025-10-05 20:59:20,102 - root - INFO - step: 36585 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7440 +[titan] 2025-10-05 20:59:20,103 - root - INFO - lr: 5.8207e-06 gnorm: 1.22 [22:25:08< 2:05:33] +[titan] 2025-10-05 20:59:30,980 - root - INFO - step: 36590 loss: 1.9441 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:59:30,980 - root - INFO - lr: 5.8183e-06 gnorm: 1.22 [22:25:19< 2:05:22] +[titan] 2025-10-05 20:59:41,845 - root - INFO - step: 36595 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6830 +[titan] 2025-10-05 20:59:41,845 - root - INFO - lr: 5.8159e-06 gnorm: 1.20 [22:25:30< 2:05:11] +[titan] 2025-10-05 20:59:50,540 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:59:52,732 - root - INFO - step: 36600 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:59:52,732 - root - INFO - lr: 5.8135e-06 gnorm: 1.20 [22:25:41< 2:05:00] +[titan] 2025-10-05 21:00:03,618 - root - INFO - step: 36605 loss: 1.8614 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6465 +[titan] 2025-10-05 21:00:03,618 - root - INFO - lr: 5.8111e-06 gnorm: 1.22 [22:25:51< 2:04:49] +[titan] 2025-10-05 21:00:14,529 - root - INFO - step: 36610 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 21:00:14,529 - root - INFO - lr: 5.8088e-06 gnorm: 1.24 [22:26:02< 2:04:38] +[titan] 2025-10-05 21:00:25,449 - root - INFO - step: 36615 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:00:25,450 - root - INFO - lr: 5.8064e-06 gnorm: 1.23 [22:26:13< 2:04:27] +[titan] 2025-10-05 21:00:36,361 - root - INFO - step: 36620 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6464 +[titan] 2025-10-05 21:00:36,361 - root - INFO - lr: 5.8040e-06 gnorm: 1.24 [22:26:24< 2:04:16] +[titan] 2025-10-05 21:00:47,259 - root - INFO - step: 36625 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7482 +[titan] 2025-10-05 21:00:47,259 - root - INFO - lr: 5.8017e-06 gnorm: 1.24 [22:26:35< 2:04:05] +[titan] 2025-10-05 21:00:58,160 - root - INFO - step: 36630 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:00:58,160 - root - INFO - lr: 5.7993e-06 gnorm: 1.21 [22:26:46< 2:03:54] +[titan] 2025-10-05 21:01:09,053 - root - INFO - step: 36635 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:01:09,053 - root - INFO - lr: 5.7969e-06 gnorm: 1.26 [22:26:57< 2:03:43] +[titan] 2025-10-05 21:01:19,958 - root - INFO - step: 36640 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 21:01:19,958 - root - INFO - lr: 5.7946e-06 gnorm: 1.24 [22:27:08< 2:03:32] +[titan] 2025-10-05 21:01:30,865 - root - INFO - step: 36645 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:01:30,865 - root - INFO - lr: 5.7922e-06 gnorm: 1.22 [22:27:19< 2:03:21] +[titan] 2025-10-05 21:01:39,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:01:41,769 - root - INFO - step: 36650 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:01:41,769 - root - INFO - lr: 5.7899e-06 gnorm: 1.25 [22:27:30< 2:03:10] +[titan] 2025-10-05 21:01:52,656 - root - INFO - step: 36655 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6666 +[titan] 2025-10-05 21:01:52,656 - root - INFO - lr: 5.7876e-06 gnorm: 1.26 [22:27:41< 2:02:59] +[titan] 2025-10-05 21:02:03,549 - root - INFO - step: 36660 loss: 1.9170 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 21:02:03,549 - root - INFO - lr: 5.7852e-06 gnorm: 1.24 [22:27:51< 2:02:48] +[titan] 2025-10-05 21:02:14,436 - root - INFO - step: 36665 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6585 +[titan] 2025-10-05 21:02:14,436 - root - INFO - lr: 5.7829e-06 gnorm: 1.20 [22:28:02< 2:02:36] +[titan] 2025-10-05 21:02:25,324 - root - INFO - step: 36670 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 21:02:25,324 - root - INFO - lr: 5.7806e-06 gnorm: 1.25 [22:28:13< 2:02:25] +[titan] 2025-10-05 21:02:36,230 - root - INFO - step: 36675 loss: 1.8517 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6371 +[titan] 2025-10-05 21:02:36,230 - root - INFO - lr: 5.7782e-06 gnorm: 1.21 [22:28:24< 2:02:14] +[titan] 2025-10-05 21:02:47,119 - root - INFO - step: 36680 loss: 1.8308 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6192 +[titan] 2025-10-05 21:02:47,119 - root - INFO - lr: 5.7759e-06 gnorm: 1.21 [22:28:35< 2:02:03] +[titan] 2025-10-05 21:02:58,028 - root - INFO - step: 36685 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:02:58,028 - root - INFO - lr: 5.7736e-06 gnorm: 1.27 [22:28:46< 2:01:52] +[titan] 2025-10-05 21:03:08,899 - root - INFO - step: 36690 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 21:03:08,899 - root - INFO - lr: 5.7713e-06 gnorm: 1.23 [22:28:57< 2:01:41] +[titan] 2025-10-05 21:03:19,806 - root - INFO - step: 36695 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:03:19,806 - root - INFO - lr: 5.7689e-06 gnorm: 1.23 [22:29:08< 2:01:30] +[titan] 2025-10-05 21:03:28,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:03:30,710 - root - INFO - step: 36700 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 21:03:30,710 - root - INFO - lr: 5.7666e-06 gnorm: 1.27 [22:29:19< 2:01:19] +[titan] 2025-10-05 21:03:41,623 - root - INFO - step: 36705 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 21:03:41,624 - root - INFO - lr: 5.7643e-06 gnorm: 1.24 [22:29:29< 2:01:08] +[titan] 2025-10-05 21:03:52,525 - root - INFO - step: 36710 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:03:52,525 - root - INFO - lr: 5.7620e-06 gnorm: 1.26 [22:29:40< 2:00:57] +[titan] 2025-10-05 21:04:03,447 - root - INFO - step: 36715 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:04:03,447 - root - INFO - lr: 5.7597e-06 gnorm: 1.26 [22:29:51< 2:00:46] +[titan] 2025-10-05 21:04:14,324 - root - INFO - step: 36720 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:04:14,324 - root - INFO - lr: 5.7574e-06 gnorm: 1.20 [22:30:02< 2:00:35] +[titan] 2025-10-05 21:04:25,273 - root - INFO - step: 36725 loss: 1.9301 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:04:25,273 - root - INFO - lr: 5.7551e-06 gnorm: 1.23 [22:30:13< 2:00:24] +[titan] 2025-10-05 21:04:36,157 - root - INFO - step: 36730 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:04:36,157 - root - INFO - lr: 5.7528e-06 gnorm: 1.24 [22:30:24< 2:00:13] +[titan] 2025-10-05 21:04:47,035 - root - INFO - step: 36735 loss: 1.9023 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 21:04:47,035 - root - INFO - lr: 5.7505e-06 gnorm: 1.26 [22:30:35< 2:00:02] +[titan] 2025-10-05 21:04:57,939 - root - INFO - step: 36740 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 21:04:57,939 - root - INFO - lr: 5.7483e-06 gnorm: 1.21 [22:30:46< 1:59:51] +[titan] 2025-10-05 21:05:08,831 - root - INFO - step: 36745 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 21:05:08,831 - root - INFO - lr: 5.7460e-06 gnorm: 1.25 [22:30:57< 1:59:40] +[titan] 2025-10-05 21:05:17,519 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:05:19,701 - root - INFO - step: 36750 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:05:19,702 - root - INFO - lr: 5.7437e-06 gnorm: 1.22 [22:31:08< 1:59:29] +[titan] 2025-10-05 21:05:30,640 - root - INFO - step: 36755 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 21:05:30,640 - root - INFO - lr: 5.7414e-06 gnorm: 1.23 [22:31:19< 1:59:18] +[titan] 2025-10-05 21:05:41,514 - root - INFO - step: 36760 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:05:41,514 - root - INFO - lr: 5.7392e-06 gnorm: 1.21 [22:31:29< 1:59:07] +[titan] 2025-10-05 21:05:52,376 - root - INFO - step: 36765 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 21:05:52,376 - root - INFO - lr: 5.7369e-06 gnorm: 1.26 [22:31:40< 1:58:56] +[titan] 2025-10-05 21:06:03,266 - root - INFO - step: 36770 loss: 1.8668 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 21:06:03,266 - root - INFO - lr: 5.7346e-06 gnorm: 1.22 [22:31:51< 1:58:45] +[titan] 2025-10-05 21:06:14,143 - root - INFO - step: 36775 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 21:06:14,143 - root - INFO - lr: 5.7324e-06 gnorm: 1.23 [22:32:02< 1:58:34] +[titan] 2025-10-05 21:06:25,098 - root - INFO - step: 36780 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 21:06:25,098 - root - INFO - lr: 5.7301e-06 gnorm: 1.22 [22:32:13< 1:58:23] +[titan] 2025-10-05 21:06:35,961 - root - INFO - step: 36785 loss: 1.8486 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6355 +[titan] 2025-10-05 21:06:35,961 - root - INFO - lr: 5.7279e-06 gnorm: 1.26 [22:32:24< 1:58:11] +[titan] 2025-10-05 21:06:46,824 - root - INFO - step: 36790 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 21:06:46,825 - root - INFO - lr: 5.7256e-06 gnorm: 1.26 [22:32:35< 1:58:00] +[titan] 2025-10-05 21:06:57,688 - root - INFO - step: 36795 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7115 +[titan] 2025-10-05 21:06:57,688 - root - INFO - lr: 5.7234e-06 gnorm: 1.23 [22:32:46< 1:57:49] +[titan] 2025-10-05 21:07:06,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:07:08,540 - root - INFO - step: 36800 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 21:07:08,541 - root - INFO - lr: 5.7211e-06 gnorm: 1.23 [22:32:56< 1:57:38] +[titan] 2025-10-05 21:07:19,425 - root - INFO - step: 36805 loss: 1.9493 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:07:19,425 - root - INFO - lr: 5.7189e-06 gnorm: 1.24 [22:33:07< 1:57:27] +[titan] 2025-10-05 21:07:30,382 - root - INFO - step: 36810 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:07:30,383 - root - INFO - lr: 5.7166e-06 gnorm: 1.23 [22:33:18< 1:57:16] +[titan] 2025-10-05 21:07:41,263 - root - INFO - step: 36815 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 21:07:41,263 - root - INFO - lr: 5.7144e-06 gnorm: 1.24 [22:33:29< 1:57:05] +[titan] 2025-10-05 21:07:52,120 - root - INFO - step: 36820 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 21:07:52,120 - root - INFO - lr: 5.7122e-06 gnorm: 1.21 [22:33:40< 1:56:54] +[titan] 2025-10-05 21:08:02,998 - root - INFO - step: 36825 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6712 +[titan] 2025-10-05 21:08:02,999 - root - INFO - lr: 5.7100e-06 gnorm: 1.24 [22:33:51< 1:56:43] +[titan] 2025-10-05 21:08:13,877 - root - INFO - step: 36830 loss: 1.9915 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 21:08:13,878 - root - INFO - lr: 5.7077e-06 gnorm: 1.31 [22:34:02< 1:56:32] +[titan] 2025-10-05 21:08:25,107 - root - INFO - step: 36835 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 29,180 tflops: 404.83 mfu: 40.93% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6650 +[titan] 2025-10-05 21:08:25,108 - root - INFO - lr: 5.7055e-06 gnorm: 1.20 [22:34:13< 1:56:21] +[titan] 2025-10-05 21:08:35,977 - root - INFO - step: 36840 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 21:08:35,977 - root - INFO - lr: 5.7033e-06 gnorm: 1.24 [22:34:24< 1:56:10] +[titan] 2025-10-05 21:08:46,865 - root - INFO - step: 36845 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 21:08:46,865 - root - INFO - lr: 5.7011e-06 gnorm: 1.24 [22:34:35< 1:55:59] +[titan] 2025-10-05 21:08:55,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:08:57,718 - root - INFO - step: 36850 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 21:08:57,718 - root - INFO - lr: 5.6989e-06 gnorm: 1.26 [22:34:46< 1:55:48] +[titan] 2025-10-05 21:09:08,595 - root - INFO - step: 36855 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7308 +[titan] 2025-10-05 21:09:08,595 - root - INFO - lr: 5.6967e-06 gnorm: 1.24 [22:34:56< 1:55:37] +[titan] 2025-10-05 21:09:19,469 - root - INFO - step: 36860 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:09:19,470 - root - INFO - lr: 5.6945e-06 gnorm: 1.27 [22:35:07< 1:55:26] +[titan] 2025-10-05 21:09:28,574 - root - INFO - Dumping profiler traces at step 36864 +[titan] 2025-10-05 21:09:28,615 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:09:30,837 - root - INFO - step: 36865 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 28,827 tflops: 399.93 mfu: 40.44% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 21:09:30,837 - root - INFO - lr: 5.6923e-06 gnorm: 1.23 [22:35:19< 1:55:15] +[titan] 2025-10-05 21:09:41,699 - root - INFO - step: 36870 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 21:09:41,699 - root - INFO - lr: 5.6901e-06 gnorm: 1.24 [22:35:30< 1:55:04] +[titan] 2025-10-05 21:09:52,574 - root - INFO - step: 36875 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 21:09:52,574 - root - INFO - lr: 5.6879e-06 gnorm: 1.24 [22:35:40< 1:54:53] +[titan] 2025-10-05 21:10:03,422 - root - INFO - step: 36880 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:10:03,422 - root - INFO - lr: 5.6857e-06 gnorm: 1.21 [22:35:51< 1:54:42] +[titan] 2025-10-05 21:10:14,288 - root - INFO - step: 36885 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 21:10:14,288 - root - INFO - lr: 5.6835e-06 gnorm: 1.23 [22:36:02< 1:54:31] +[titan] 2025-10-05 21:10:25,157 - root - INFO - step: 36890 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 21:10:25,157 - root - INFO - lr: 5.6813e-06 gnorm: 1.24 [22:36:13< 1:54:20] +[titan] 2025-10-05 21:10:36,097 - root - INFO - step: 36895 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 21:10:36,097 - root - INFO - lr: 5.6792e-06 gnorm: 1.24 [22:36:24< 1:54:09] +[titan] 2025-10-05 21:10:44,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:10:46,986 - root - INFO - step: 36900 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 21:10:46,987 - root - INFO - lr: 5.6770e-06 gnorm: 1.28 [22:36:35< 1:53:58] +[titan] 2025-10-05 21:10:57,833 - root - INFO - step: 36905 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 21:10:57,833 - root - INFO - lr: 5.6748e-06 gnorm: 1.23 [22:36:46< 1:53:47] +[titan] 2025-10-05 21:11:08,682 - root - INFO - step: 36910 loss: 1.8557 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6407 +[titan] 2025-10-05 21:11:08,682 - root - INFO - lr: 5.6726e-06 gnorm: 1.19 [22:36:57< 1:53:36] +[titan] 2025-10-05 21:11:19,531 - root - INFO - step: 36915 loss: 1.8896 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:11:19,532 - root - INFO - lr: 5.6705e-06 gnorm: 1.18 [22:37:07< 1:53:24] +[titan] 2025-10-05 21:11:30,448 - root - INFO - step: 36920 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:11:30,449 - root - INFO - lr: 5.6683e-06 gnorm: 1.24 [22:37:18< 1:53:13] +[titan] 2025-10-05 21:11:41,323 - root - INFO - step: 36925 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 21:11:41,323 - root - INFO - lr: 5.6662e-06 gnorm: 1.26 [22:37:29< 1:53:02] +[titan] 2025-10-05 21:11:52,243 - root - INFO - step: 36930 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6917 +[titan] 2025-10-05 21:11:52,243 - root - INFO - lr: 5.6640e-06 gnorm: 1.28 [22:37:40< 1:52:51] +[titan] 2025-10-05 21:12:03,124 - root - INFO - step: 36935 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6806 +[titan] 2025-10-05 21:12:03,124 - root - INFO - lr: 5.6619e-06 gnorm: 1.20 [22:37:51< 1:52:40] +[titan] 2025-10-05 21:12:14,002 - root - INFO - step: 36940 loss: 1.9158 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6952 +[titan] 2025-10-05 21:12:14,002 - root - INFO - lr: 5.6597e-06 gnorm: 1.26 [22:38:02< 1:52:29] +[titan] 2025-10-05 21:12:24,869 - root - INFO - step: 36945 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 21:12:24,870 - root - INFO - lr: 5.6576e-06 gnorm: 1.21 [22:38:13< 1:52:18] +[titan] 2025-10-05 21:12:33,629 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:12:35,810 - root - INFO - step: 36950 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 21:12:35,810 - root - INFO - lr: 5.6554e-06 gnorm: 1.24 [22:38:24< 1:52:07] +[titan] 2025-10-05 21:12:46,684 - root - INFO - step: 36955 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7035 +[titan] 2025-10-05 21:12:46,685 - root - INFO - lr: 5.6533e-06 gnorm: 1.21 [22:38:35< 1:51:56] +[titan] 2025-10-05 21:12:57,552 - root - INFO - step: 36960 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 21:12:57,552 - root - INFO - lr: 5.6512e-06 gnorm: 1.24 [22:38:45< 1:51:45] +[titan] 2025-10-05 21:13:08,463 - root - INFO - step: 36965 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7952 +[titan] 2025-10-05 21:13:08,463 - root - INFO - lr: 5.6490e-06 gnorm: 1.24 [22:38:56< 1:51:34] +[titan] 2025-10-05 21:13:19,335 - root - INFO - step: 36970 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:19,336 - root - INFO - lr: 5.6469e-06 gnorm: 1.21 [22:39:07< 1:51:23] +[titan] 2025-10-05 21:13:30,256 - root - INFO - step: 36975 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:30,256 - root - INFO - lr: 5.6448e-06 gnorm: 1.28 [22:39:18< 1:51:12] +[titan] 2025-10-05 21:13:41,127 - root - INFO - step: 36980 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:13:41,128 - root - INFO - lr: 5.6427e-06 gnorm: 1.23 [22:39:29< 1:51:01] +[titan] 2025-10-05 21:13:51,994 - root - INFO - step: 36985 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:13:51,995 - root - INFO - lr: 5.6405e-06 gnorm: 1.29 [22:39:40< 1:50:50] +[titan] 2025-10-05 21:14:02,859 - root - INFO - step: 36990 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7044 +[titan] 2025-10-05 21:14:02,859 - root - INFO - lr: 5.6384e-06 gnorm: 1.24 [22:39:51< 1:50:39] +[titan] 2025-10-05 21:14:13,749 - root - INFO - step: 36995 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6987 +[titan] 2025-10-05 21:14:13,749 - root - INFO - lr: 5.6363e-06 gnorm: 1.22 [22:40:02< 1:50:28] +[titan] 2025-10-05 21:14:22,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:14:24,631 - root - INFO - step: 37000 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6388 +[titan] 2025-10-05 21:14:24,631 - root - INFO - lr: 5.6342e-06 gnorm: 1.20 [22:40:12< 1:50:17] +[titan] 2025-10-05 21:14:35,570 - root - INFO - step: 37005 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 21:14:35,570 - root - INFO - lr: 5.6321e-06 gnorm: 1.26 [22:40:23< 1:50:06] +[titan] 2025-10-05 21:14:46,450 - root - INFO - step: 37010 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 21:14:46,450 - root - INFO - lr: 5.6300e-06 gnorm: 1.19 [22:40:34< 1:49:55] +[titan] 2025-10-05 21:14:57,328 - root - INFO - step: 37015 loss: 1.9312 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7076 +[titan] 2025-10-05 21:14:57,329 - root - INFO - lr: 5.6279e-06 gnorm: 1.27 [22:40:45< 1:49:44] +[titan] 2025-10-05 21:15:08,191 - root - INFO - step: 37020 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 21:15:08,191 - root - INFO - lr: 5.6258e-06 gnorm: 1.28 [22:40:56< 1:49:33] +[titan] 2025-10-05 21:15:19,080 - root - INFO - step: 37025 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:15:19,081 - root - INFO - lr: 5.6237e-06 gnorm: 1.20 [22:41:07< 1:49:22] +[titan] 2025-10-05 21:15:29,953 - root - INFO - step: 37030 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 21:15:29,953 - root - INFO - lr: 5.6216e-06 gnorm: 1.21 [22:41:18< 1:49:11] +[titan] 2025-10-05 21:15:40,885 - root - INFO - step: 37035 loss: 1.8738 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6571 +[titan] 2025-10-05 21:15:40,885 - root - INFO - lr: 5.6196e-06 gnorm: 1.21 [22:41:29< 1:48:59] +[titan] 2025-10-05 21:15:51,738 - root - INFO - step: 37040 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6703 +[titan] 2025-10-05 21:15:51,738 - root - INFO - lr: 5.6175e-06 gnorm: 1.25 [22:41:40< 1:48:48] +[titan] 2025-10-05 21:16:02,623 - root - INFO - step: 37045 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:16:02,624 - root - INFO - lr: 5.6154e-06 gnorm: 1.21 [22:41:50< 1:48:37] +[titan] 2025-10-05 21:16:11,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:16:13,511 - root - INFO - step: 37050 loss: 1.9092 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 21:16:13,511 - root - INFO - lr: 5.6133e-06 gnorm: 1.23 [22:42:01< 1:48:26] +[titan] 2025-10-05 21:16:24,393 - root - INFO - step: 37055 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 21:16:24,393 - root - INFO - lr: 5.6113e-06 gnorm: 1.24 [22:42:12< 1:48:15] +[titan] 2025-10-05 21:16:35,329 - root - INFO - step: 37060 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:16:35,329 - root - INFO - lr: 5.6092e-06 gnorm: 1.25 [22:42:23< 1:48:04] +[titan] 2025-10-05 21:16:46,199 - root - INFO - step: 37065 loss: 1.9535 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:16:46,199 - root - INFO - lr: 5.6071e-06 gnorm: 1.27 [22:42:34< 1:47:53] +[titan] 2025-10-05 21:16:57,064 - root - INFO - step: 37070 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 21:16:57,064 - root - INFO - lr: 5.6051e-06 gnorm: 1.24 [22:42:45< 1:47:42] +[titan] 2025-10-05 21:17:07,940 - root - INFO - step: 37075 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7181 +[titan] 2025-10-05 21:17:07,940 - root - INFO - lr: 5.6030e-06 gnorm: 1.23 [22:42:56< 1:47:31] +[titan] 2025-10-05 21:17:18,806 - root - INFO - step: 37080 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 21:17:18,807 - root - INFO - lr: 5.6010e-06 gnorm: 1.20 [22:43:07< 1:47:20] +[titan] 2025-10-05 21:17:29,692 - root - INFO - step: 37085 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 21:17:29,692 - root - INFO - lr: 5.5989e-06 gnorm: 1.21 [22:43:18< 1:47:09] +[titan] 2025-10-05 21:17:40,647 - root - INFO - step: 37090 loss: 1.9429 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:17:40,648 - root - INFO - lr: 5.5969e-06 gnorm: 1.27 [22:43:28< 1:46:58] +[titan] 2025-10-05 21:17:51,517 - root - INFO - step: 37095 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 21:17:51,517 - root - INFO - lr: 5.5949e-06 gnorm: 1.27 [22:43:39< 1:46:47] +[titan] 2025-10-05 21:18:00,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:18:02,399 - root - INFO - step: 37100 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6612 +[titan] 2025-10-05 21:18:02,399 - root - INFO - lr: 5.5928e-06 gnorm: 1.26 [22:43:50< 1:46:36] +[titan] 2025-10-05 21:18:13,285 - root - INFO - step: 37105 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6690 +[titan] 2025-10-05 21:18:13,286 - root - INFO - lr: 5.5908e-06 gnorm: 1.20 [22:44:01< 1:46:25] +[titan] 2025-10-05 21:18:24,145 - root - INFO - step: 37110 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 21:18:24,145 - root - INFO - lr: 5.5888e-06 gnorm: 1.25 [22:44:12< 1:46:14] +[titan] 2025-10-05 21:18:35,081 - root - INFO - step: 37115 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 21:18:35,081 - root - INFO - lr: 5.5867e-06 gnorm: 1.20 [22:44:23< 1:46:03] +[titan] 2025-10-05 21:18:45,955 - root - INFO - step: 37120 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:18:45,955 - root - INFO - lr: 5.5847e-06 gnorm: 1.24 [22:44:34< 1:45:52] +[titan] 2025-10-05 21:18:56,847 - root - INFO - step: 37125 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7220 +[titan] 2025-10-05 21:18:56,848 - root - INFO - lr: 5.5827e-06 gnorm: 1.27 [22:44:45< 1:45:41] +[titan] 2025-10-05 21:19:07,708 - root - INFO - step: 37130 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 21:19:07,709 - root - INFO - lr: 5.5807e-06 gnorm: 1.20 [22:44:56< 1:45:30] +[titan] 2025-10-05 21:19:18,570 - root - INFO - step: 37135 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 21:19:18,570 - root - INFO - lr: 5.5787e-06 gnorm: 1.23 [22:45:06< 1:45:19] +[titan] 2025-10-05 21:19:29,432 - root - INFO - step: 37140 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 21:19:29,432 - root - INFO - lr: 5.5766e-06 gnorm: 1.25 [22:45:17< 1:45:08] +[titan] 2025-10-05 21:19:40,328 - root - INFO - step: 37145 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 21:19:40,328 - root - INFO - lr: 5.5746e-06 gnorm: 1.28 [22:45:28< 1:44:57] +[titan] 2025-10-05 21:19:48,998 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:19:51,178 - root - INFO - step: 37150 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 21:19:51,178 - root - INFO - lr: 5.5726e-06 gnorm: 1.28 [22:45:39< 1:44:46] +[titan] 2025-10-05 21:20:02,074 - root - INFO - step: 37155 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:20:02,074 - root - INFO - lr: 5.5706e-06 gnorm: 1.22 [22:45:50< 1:44:35] +[titan] 2025-10-05 21:20:12,936 - root - INFO - step: 37160 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 21:20:12,937 - root - INFO - lr: 5.5686e-06 gnorm: 1.25 [22:46:01< 1:44:23] +[titan] 2025-10-05 21:20:23,793 - root - INFO - step: 37165 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 21:20:23,793 - root - INFO - lr: 5.5666e-06 gnorm: 1.26 [22:46:12< 1:44:12] +[titan] 2025-10-05 21:20:34,674 - root - INFO - step: 37170 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:20:34,674 - root - INFO - lr: 5.5647e-06 gnorm: 1.21 [22:46:22< 1:44:01] +[titan] 2025-10-05 21:20:45,596 - root - INFO - step: 37175 loss: 1.9773 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 21:20:45,596 - root - INFO - lr: 5.5627e-06 gnorm: 1.27 [22:46:33< 1:43:50] +[titan] 2025-10-05 21:20:56,483 - root - INFO - step: 37180 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:20:56,484 - root - INFO - lr: 5.5607e-06 gnorm: 1.26 [22:46:44< 1:43:39] +[titan] 2025-10-05 21:21:07,391 - root - INFO - step: 37185 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:21:07,392 - root - INFO - lr: 5.5587e-06 gnorm: 1.24 [22:46:55< 1:43:28] +[titan] 2025-10-05 21:21:18,272 - root - INFO - step: 37190 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 21:21:18,272 - root - INFO - lr: 5.5567e-06 gnorm: 1.22 [22:47:06< 1:43:17] +[titan] 2025-10-05 21:21:29,155 - root - INFO - step: 37195 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:21:29,155 - root - INFO - lr: 5.5548e-06 gnorm: 1.25 [22:47:17< 1:43:06] +[titan] 2025-10-05 21:21:37,896 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:21:40,076 - root - INFO - step: 37200 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:21:40,076 - root - INFO - lr: 5.5528e-06 gnorm: 1.22 [22:47:28< 1:42:55] +[titan] 2025-10-05 21:21:50,943 - root - INFO - step: 37205 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 21:21:50,944 - root - INFO - lr: 5.5508e-06 gnorm: 1.21 [22:47:39< 1:42:44] +[titan] 2025-10-05 21:22:01,837 - root - INFO - step: 37210 loss: 1.9065 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 21:22:01,837 - root - INFO - lr: 5.5489e-06 gnorm: 1.21 [22:47:50< 1:42:33] +[titan] 2025-10-05 21:22:12,716 - root - INFO - step: 37215 loss: 1.8559 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6413 +[titan] 2025-10-05 21:22:12,716 - root - INFO - lr: 5.5469e-06 gnorm: 1.23 [22:48:01< 1:42:22] +[titan] 2025-10-05 21:22:23,615 - root - INFO - step: 37220 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 21:22:23,615 - root - INFO - lr: 5.5450e-06 gnorm: 1.24 [22:48:11< 1:42:11] +[titan] 2025-10-05 21:22:34,482 - root - INFO - step: 37225 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 21:22:34,483 - root - INFO - lr: 5.5430e-06 gnorm: 1.26 [22:48:22< 1:42:00] +[titan] 2025-10-05 21:22:45,400 - root - INFO - step: 37230 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 21:22:45,400 - root - INFO - lr: 5.5411e-06 gnorm: 1.23 [22:48:33< 1:41:49] +[titan] 2025-10-05 21:22:56,271 - root - INFO - step: 37235 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 21:22:56,272 - root - INFO - lr: 5.5391e-06 gnorm: 1.23 [22:48:44< 1:41:38] +[titan] 2025-10-05 21:23:07,114 - root - INFO - step: 37240 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:23:07,114 - root - INFO - lr: 5.5372e-06 gnorm: 1.23 [22:48:55< 1:41:27] +[titan] 2025-10-05 21:23:17,969 - root - INFO - step: 37245 loss: 1.8827 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 21:23:17,970 - root - INFO - lr: 5.5352e-06 gnorm: 1.23 [22:49:06< 1:41:16] +[titan] 2025-10-05 21:23:26,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:23:28,858 - root - INFO - step: 37250 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6816 +[titan] 2025-10-05 21:23:28,858 - root - INFO - lr: 5.5333e-06 gnorm: 1.21 [22:49:17< 1:41:05] +[titan] 2025-10-05 21:23:39,774 - root - INFO - step: 37255 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 21:23:39,774 - root - INFO - lr: 5.5314e-06 gnorm: 1.21 [22:49:28< 1:40:54] +[titan] 2025-10-05 21:23:50,632 - root - INFO - step: 37260 loss: 1.8928 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 21:23:50,632 - root - INFO - lr: 5.5295e-06 gnorm: 1.20 [22:49:38< 1:40:43] +[titan] 2025-10-05 21:24:01,494 - root - INFO - step: 37265 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 21:24:01,495 - root - INFO - lr: 5.5275e-06 gnorm: 1.21 [22:49:49< 1:40:32] +[titan] 2025-10-05 21:24:12,333 - root - INFO - step: 37270 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 21:24:12,333 - root - INFO - lr: 5.5256e-06 gnorm: 1.24 [22:50:00< 1:40:21] +[titan] 2025-10-05 21:24:23,189 - root - INFO - step: 37275 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 21:24:23,189 - root - INFO - lr: 5.5237e-06 gnorm: 1.22 [22:50:11< 1:40:10] +[titan] 2025-10-05 21:24:34,040 - root - INFO - step: 37280 loss: 1.8747 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 21:24:34,041 - root - INFO - lr: 5.5218e-06 gnorm: 1.19 [22:50:22< 1:39:59] +[titan] 2025-10-05 21:24:44,965 - root - INFO - step: 37285 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6500 +[titan] 2025-10-05 21:24:44,965 - root - INFO - lr: 5.5199e-06 gnorm: 1.23 [22:50:33< 1:39:48] +[titan] 2025-10-05 21:24:55,829 - root - INFO - step: 37290 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6803 +[titan] 2025-10-05 21:24:55,829 - root - INFO - lr: 5.5180e-06 gnorm: 1.24 [22:50:44< 1:39:36] +[titan] 2025-10-05 21:25:06,686 - root - INFO - step: 37295 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 21:25:06,686 - root - INFO - lr: 5.5161e-06 gnorm: 1.23 [22:50:54< 1:39:25] +[titan] 2025-10-05 21:25:15,357 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:25:17,530 - root - INFO - step: 37300 loss: 1.9230 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 21:25:17,530 - root - INFO - lr: 5.5142e-06 gnorm: 1.29 [22:51:05< 1:39:14] +[titan] 2025-10-05 21:25:28,378 - root - INFO - step: 37305 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 21:25:28,378 - root - INFO - lr: 5.5123e-06 gnorm: 1.28 [22:51:16< 1:39:03] +[titan] 2025-10-05 21:25:39,211 - root - INFO - step: 37310 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 21:25:39,211 - root - INFO - lr: 5.5104e-06 gnorm: 1.28 [22:51:27< 1:38:52] +[titan] 2025-10-05 21:25:50,153 - root - INFO - step: 37315 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 21:25:50,153 - root - INFO - lr: 5.5085e-06 gnorm: 1.24 [22:51:38< 1:38:41] +[titan] 2025-10-05 21:26:01,007 - root - INFO - step: 37320 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 21:26:01,007 - root - INFO - lr: 5.5066e-06 gnorm: 1.22 [22:51:49< 1:38:30] +[titan] 2025-10-05 21:26:11,849 - root - INFO - step: 37325 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7401 +[titan] 2025-10-05 21:26:11,849 - root - INFO - lr: 5.5047e-06 gnorm: 1.28 [22:52:00< 1:38:19] +[titan] 2025-10-05 21:26:22,692 - root - INFO - step: 37330 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 21:26:22,692 - root - INFO - lr: 5.5028e-06 gnorm: 1.20 [22:52:10< 1:38:08] +[titan] 2025-10-05 21:26:33,566 - root - INFO - step: 37335 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 21:26:33,566 - root - INFO - lr: 5.5010e-06 gnorm: 1.21 [22:52:21< 1:37:57] +[titan] 2025-10-05 21:26:44,447 - root - INFO - step: 37340 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 21:26:44,447 - root - INFO - lr: 5.4991e-06 gnorm: 1.30 [22:52:32< 1:37:46] +[titan] 2025-10-05 21:26:55,353 - root - INFO - step: 37345 loss: 1.8670 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6522 +[titan] 2025-10-05 21:26:55,354 - root - INFO - lr: 5.4972e-06 gnorm: 1.19 [22:52:43< 1:37:35] +[titan] 2025-10-05 21:27:04,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:27:06,210 - root - INFO - step: 37350 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 21:27:06,210 - root - INFO - lr: 5.4954e-06 gnorm: 1.23 [22:52:54< 1:37:24] +[titan] 2025-10-05 21:27:17,097 - root - INFO - step: 37355 loss: 1.8844 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 21:27:17,097 - root - INFO - lr: 5.4935e-06 gnorm: 1.22 [22:53:05< 1:37:13] +[titan] 2025-10-05 21:27:27,968 - root - INFO - step: 37360 loss: 1.8981 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:27:27,968 - root - INFO - lr: 5.4917e-06 gnorm: 1.24 [22:53:16< 1:37:02] +[titan] 2025-10-05 21:27:38,788 - root - INFO - step: 37365 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 21:27:38,788 - root - INFO - lr: 5.4898e-06 gnorm: 1.22 [22:53:27< 1:36:51] +[titan] 2025-10-05 21:27:49,689 - root - INFO - step: 37370 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 21:27:49,689 - root - INFO - lr: 5.4880e-06 gnorm: 1.26 [22:53:37< 1:36:40] +[titan] 2025-10-05 21:28:00,629 - root - INFO - step: 37375 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 21:28:00,630 - root - INFO - lr: 5.4861e-06 gnorm: 1.28 [22:53:48< 1:36:29] +[titan] 2025-10-05 21:28:02,997 - root - INFO - Dumping profiler traces at step 37376 +[titan] 2025-10-05 21:28:03,035 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:28:11,751 - root - INFO - step: 37380 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 29,465 tflops: 408.78 mfu: 41.33% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 21:28:11,751 - root - INFO - lr: 5.4843e-06 gnorm: 1.26 [22:54:00< 1:36:18] +[titan] 2025-10-05 21:28:22,605 - root - INFO - step: 37385 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 21:28:22,605 - root - INFO - lr: 5.4824e-06 gnorm: 1.23 [22:54:10< 1:36:07] +[titan] 2025-10-05 21:28:33,443 - root - INFO - step: 37390 loss: 1.8450 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6318 +[titan] 2025-10-05 21:28:33,443 - root - INFO - lr: 5.4806e-06 gnorm: 1.24 [22:54:21< 1:35:56] +[titan] 2025-10-05 21:28:44,325 - root - INFO - step: 37395 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:28:44,326 - root - INFO - lr: 5.4788e-06 gnorm: 1.22 [22:54:32< 1:35:45] +[titan] 2025-10-05 21:28:52,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:28:55,171 - root - INFO - step: 37400 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:28:55,171 - root - INFO - lr: 5.4769e-06 gnorm: 1.21 [22:54:43< 1:35:34] +[titan] 2025-10-05 21:29:06,005 - root - INFO - step: 37405 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 21:29:06,006 - root - INFO - lr: 5.4751e-06 gnorm: 1.21 [22:54:54< 1:35:23] +[titan] 2025-10-05 21:29:16,874 - root - INFO - step: 37410 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 21:29:16,874 - root - INFO - lr: 5.4733e-06 gnorm: 1.21 [22:55:05< 1:35:12] +[titan] 2025-10-05 21:29:27,686 - root - INFO - step: 37415 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 21:29:27,686 - root - INFO - lr: 5.4715e-06 gnorm: 1.19 [22:55:15< 1:35:01] +[titan] 2025-10-05 21:29:38,526 - root - INFO - step: 37420 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:29:38,526 - root - INFO - lr: 5.4696e-06 gnorm: 1.22 [22:55:26< 1:34:49] +[titan] 2025-10-05 21:29:49,408 - root - INFO - step: 37425 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 21:29:49,408 - root - INFO - lr: 5.4678e-06 gnorm: 1.25 [22:55:37< 1:34:38] +[titan] 2025-10-05 21:30:00,250 - root - INFO - step: 37430 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 21:30:00,250 - root - INFO - lr: 5.4660e-06 gnorm: 1.23 [22:55:48< 1:34:27] +[titan] 2025-10-05 21:30:11,084 - root - INFO - step: 37435 loss: 1.9022 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:30:11,084 - root - INFO - lr: 5.4642e-06 gnorm: 1.27 [22:55:59< 1:34:16] +[titan] 2025-10-05 21:30:21,909 - root - INFO - step: 37440 loss: 1.9502 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:30:21,909 - root - INFO - lr: 5.4624e-06 gnorm: 1.24 [22:56:10< 1:34:05] +[titan] 2025-10-05 21:30:32,791 - root - INFO - step: 37445 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7483 +[titan] 2025-10-05 21:30:32,792 - root - INFO - lr: 5.4606e-06 gnorm: 1.30 [22:56:21< 1:33:54] +[titan] 2025-10-05 21:30:41,432 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:30:43,606 - root - INFO - step: 37450 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.37 mfu: 42.51% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 21:30:43,606 - root - INFO - lr: 5.4588e-06 gnorm: 1.25 [22:56:31< 1:33:43] +[titan] 2025-10-05 21:30:54,447 - root - INFO - step: 37455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 21:30:54,447 - root - INFO - lr: 5.4570e-06 gnorm: 1.27 [22:56:42< 1:33:32] +[titan] 2025-10-05 21:31:05,288 - root - INFO - step: 37460 loss: 1.8916 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:31:05,288 - root - INFO - lr: 5.4552e-06 gnorm: 1.22 [22:56:53< 1:33:21] +[titan] 2025-10-05 21:31:16,146 - root - INFO - step: 37465 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 21:31:16,146 - root - INFO - lr: 5.4535e-06 gnorm: 1.26 [22:57:04< 1:33:10] +[titan] 2025-10-05 21:31:26,988 - root - INFO - step: 37470 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 21:31:26,988 - root - INFO - lr: 5.4517e-06 gnorm: 1.26 [22:57:15< 1:32:59] +[titan] 2025-10-05 21:31:37,863 - root - INFO - step: 37475 loss: 1.8457 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2133 global_avg_mtp_loss: 1.6324 +[titan] 2025-10-05 21:31:37,863 - root - INFO - lr: 5.4499e-06 gnorm: 1.20 [22:57:26< 1:32:48] +[titan] 2025-10-05 21:31:48,716 - root - INFO - step: 37480 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6511 +[titan] 2025-10-05 21:31:48,716 - root - INFO - lr: 5.4481e-06 gnorm: 1.22 [22:57:37< 1:32:37] +[titan] 2025-10-05 21:31:59,576 - root - INFO - step: 37485 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6883 +[titan] 2025-10-05 21:31:59,577 - root - INFO - lr: 5.4463e-06 gnorm: 1.26 [22:57:47< 1:32:26] +[titan] 2025-10-05 21:32:10,434 - root - INFO - step: 37490 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 21:32:10,434 - root - INFO - lr: 5.4446e-06 gnorm: 1.24 [22:57:58< 1:32:15] +[titan] 2025-10-05 21:32:21,290 - root - INFO - step: 37495 loss: 1.9993 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 21:32:21,290 - root - INFO - lr: 5.4428e-06 gnorm: 1.24 [22:58:09< 1:32:04] +[titan] 2025-10-05 21:32:29,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:32:32,151 - root - INFO - step: 37500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 21:32:32,151 - root - INFO - lr: 5.4411e-06 gnorm: 1.29 [22:58:20< 1:31:53] +[titan] 2025-10-05 21:32:43,013 - root - INFO - step: 37505 loss: 1.8923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:32:43,014 - root - INFO - lr: 5.4393e-06 gnorm: 1.21 [22:58:31< 1:31:42] +[titan] 2025-10-05 21:32:53,854 - root - INFO - step: 37510 loss: 1.9490 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7237 +[titan] 2025-10-05 21:32:53,854 - root - INFO - lr: 5.4375e-06 gnorm: 1.21 [22:58:42< 1:31:31] +[titan] 2025-10-05 21:33:04,724 - root - INFO - step: 37515 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 21:33:04,724 - root - INFO - lr: 5.4358e-06 gnorm: 1.24 [22:58:53< 1:31:20] +[titan] 2025-10-05 21:33:15,605 - root - INFO - step: 37520 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:33:15,605 - root - INFO - lr: 5.4341e-06 gnorm: 1.22 [22:59:03< 1:31:09] +[titan] 2025-10-05 21:33:26,465 - root - INFO - step: 37525 loss: 1.8732 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6563 +[titan] 2025-10-05 21:33:26,465 - root - INFO - lr: 5.4323e-06 gnorm: 1.23 [22:59:14< 1:30:58] +[titan] 2025-10-05 21:33:37,315 - root - INFO - step: 37530 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 21:33:37,315 - root - INFO - lr: 5.4306e-06 gnorm: 1.23 [22:59:25< 1:30:47] +[titan] 2025-10-05 21:33:48,179 - root - INFO - step: 37535 loss: 1.8524 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6376 +[titan] 2025-10-05 21:33:48,179 - root - INFO - lr: 5.4288e-06 gnorm: 1.25 [22:59:36< 1:30:36] +[titan] 2025-10-05 21:33:59,032 - root - INFO - step: 37540 loss: 1.8890 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 21:33:59,032 - root - INFO - lr: 5.4271e-06 gnorm: 1.22 [22:59:47< 1:30:25] +[titan] 2025-10-05 21:34:09,894 - root - INFO - step: 37545 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:34:09,894 - root - INFO - lr: 5.4254e-06 gnorm: 1.24 [22:59:58< 1:30:14] +[titan] 2025-10-05 21:34:18,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:34:20,750 - root - INFO - step: 37550 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6861 +[titan] 2025-10-05 21:34:20,750 - root - INFO - lr: 5.4236e-06 gnorm: 1.24 [23:00:09< 1:30:02] +[titan] 2025-10-05 21:34:31,630 - root - INFO - step: 37555 loss: 1.9520 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 21:34:31,630 - root - INFO - lr: 5.4219e-06 gnorm: 1.21 [23:00:19< 1:29:51] +[titan] 2025-10-05 21:34:42,476 - root - INFO - step: 37560 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7291 +[titan] 2025-10-05 21:34:42,476 - root - INFO - lr: 5.4202e-06 gnorm: 1.23 [23:00:30< 1:29:40] +[titan] 2025-10-05 21:34:53,333 - root - INFO - step: 37565 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 21:34:53,333 - root - INFO - lr: 5.4185e-06 gnorm: 1.26 [23:00:41< 1:29:29] +[titan] 2025-10-05 21:35:04,184 - root - INFO - step: 37570 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 21:35:04,184 - root - INFO - lr: 5.4168e-06 gnorm: 1.30 [23:00:52< 1:29:18] +[titan] 2025-10-05 21:35:15,037 - root - INFO - step: 37575 loss: 1.8778 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6614 +[titan] 2025-10-05 21:35:15,037 - root - INFO - lr: 5.4151e-06 gnorm: 1.21 [23:01:03< 1:29:07] +[titan] 2025-10-05 21:35:25,912 - root - INFO - step: 37580 loss: 1.8864 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6681 +[titan] 2025-10-05 21:35:25,913 - root - INFO - lr: 5.4134e-06 gnorm: 1.23 [23:01:14< 1:28:56] +[titan] 2025-10-05 21:35:36,806 - root - INFO - step: 37585 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 21:35:36,807 - root - INFO - lr: 5.4117e-06 gnorm: 1.25 [23:01:25< 1:28:45] +[titan] 2025-10-05 21:35:47,715 - root - INFO - step: 37590 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 21:35:47,716 - root - INFO - lr: 5.4100e-06 gnorm: 1.22 [23:01:35< 1:28:34] +[titan] 2025-10-05 21:35:58,598 - root - INFO - step: 37595 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:35:58,598 - root - INFO - lr: 5.4083e-06 gnorm: 1.20 [23:01:46< 1:28:23] +[titan] 2025-10-05 21:36:07,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:36:09,461 - root - INFO - step: 37600 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7104 +[titan] 2025-10-05 21:36:09,461 - root - INFO - lr: 5.4066e-06 gnorm: 1.22 [23:01:57< 1:28:12] +[titan] 2025-10-05 21:36:20,345 - root - INFO - step: 37605 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 21:36:20,345 - root - INFO - lr: 5.4049e-06 gnorm: 1.28 [23:02:08< 1:28:01] +[titan] 2025-10-05 21:36:31,206 - root - INFO - step: 37610 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 21:36:31,206 - root - INFO - lr: 5.4032e-06 gnorm: 1.20 [23:02:19< 1:27:50] +[titan] 2025-10-05 21:36:42,084 - root - INFO - step: 37615 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:36:42,084 - root - INFO - lr: 5.4015e-06 gnorm: 1.28 [23:02:30< 1:27:39] +[titan] 2025-10-05 21:36:52,956 - root - INFO - step: 37620 loss: 2.0281 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 21:36:52,956 - root - INFO - lr: 5.3999e-06 gnorm: 1.25 [23:02:41< 1:27:28] +[titan] 2025-10-05 21:37:03,800 - root - INFO - step: 37625 loss: 1.8956 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6758 +[titan] 2025-10-05 21:37:03,800 - root - INFO - lr: 5.3982e-06 gnorm: 1.23 [23:02:52< 1:27:17] +[titan] 2025-10-05 21:37:14,649 - root - INFO - step: 37630 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 21:37:14,649 - root - INFO - lr: 5.3965e-06 gnorm: 1.33 [23:03:02< 1:27:06] +[titan] 2025-10-05 21:37:25,497 - root - INFO - step: 37635 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 21:37:25,497 - root - INFO - lr: 5.3948e-06 gnorm: 1.24 [23:03:13< 1:26:55] +[titan] 2025-10-05 21:37:36,353 - root - INFO - step: 37640 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 21:37:36,353 - root - INFO - lr: 5.3932e-06 gnorm: 1.22 [23:03:24< 1:26:44] +[titan] 2025-10-05 21:37:47,208 - root - INFO - step: 37645 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7232 +[titan] 2025-10-05 21:37:47,208 - root - INFO - lr: 5.3915e-06 gnorm: 1.27 [23:03:35< 1:26:33] +[titan] 2025-10-05 21:37:55,906 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:37:58,091 - root - INFO - step: 37650 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6691 +[titan] 2025-10-05 21:37:58,091 - root - INFO - lr: 5.3899e-06 gnorm: 1.23 [23:03:46< 1:26:22] +[titan] 2025-10-05 21:38:08,977 - root - INFO - step: 37655 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 21:38:08,977 - root - INFO - lr: 5.3882e-06 gnorm: 1.28 [23:03:57< 1:26:11] +[titan] 2025-10-05 21:38:19,857 - root - INFO - step: 37660 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 21:38:19,857 - root - INFO - lr: 5.3866e-06 gnorm: 1.29 [23:04:08< 1:26:00] +[titan] 2025-10-05 21:38:30,712 - root - INFO - step: 37665 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6720 +[titan] 2025-10-05 21:38:30,713 - root - INFO - lr: 5.3849e-06 gnorm: 1.23 [23:04:18< 1:25:49] +[titan] 2025-10-05 21:38:41,564 - root - INFO - step: 37670 loss: 1.8372 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6256 +[titan] 2025-10-05 21:38:41,565 - root - INFO - lr: 5.3833e-06 gnorm: 1.21 [23:04:29< 1:25:38] +[titan] 2025-10-05 21:38:52,429 - root - INFO - step: 37675 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 21:38:52,429 - root - INFO - lr: 5.3816e-06 gnorm: 1.26 [23:04:40< 1:25:27] +[titan] 2025-10-05 21:39:03,314 - root - INFO - step: 37680 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 21:39:03,314 - root - INFO - lr: 5.3800e-06 gnorm: 1.28 [23:04:51< 1:25:16] +[titan] 2025-10-05 21:39:14,212 - root - INFO - step: 37685 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 21:39:14,212 - root - INFO - lr: 5.3784e-06 gnorm: 1.22 [23:05:02< 1:25:05] +[titan] 2025-10-05 21:39:25,088 - root - INFO - step: 37690 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 21:39:25,089 - root - INFO - lr: 5.3767e-06 gnorm: 1.24 [23:05:13< 1:24:53] +[titan] 2025-10-05 21:39:35,965 - root - INFO - step: 37695 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 21:39:35,965 - root - INFO - lr: 5.3751e-06 gnorm: 1.26 [23:05:24< 1:24:42] +[titan] 2025-10-05 21:39:44,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:39:46,832 - root - INFO - step: 37700 loss: 1.8803 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:39:46,832 - root - INFO - lr: 5.3735e-06 gnorm: 1.24 [23:05:35< 1:24:31] +[titan] 2025-10-05 21:39:57,708 - root - INFO - step: 37705 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:39:57,708 - root - INFO - lr: 5.3719e-06 gnorm: 1.24 [23:05:45< 1:24:20] +[titan] 2025-10-05 21:40:08,584 - root - INFO - step: 37710 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6486 +[titan] 2025-10-05 21:40:08,584 - root - INFO - lr: 5.3703e-06 gnorm: 1.23 [23:05:56< 1:24:09] +[titan] 2025-10-05 21:40:19,491 - root - INFO - step: 37715 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 21:40:19,491 - root - INFO - lr: 5.3687e-06 gnorm: 1.24 [23:06:07< 1:23:58] +[titan] 2025-10-05 21:40:30,374 - root - INFO - step: 37720 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 21:40:30,375 - root - INFO - lr: 5.3671e-06 gnorm: 1.21 [23:06:18< 1:23:47] +[titan] 2025-10-05 21:40:41,250 - root - INFO - step: 37725 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 21:40:41,250 - root - INFO - lr: 5.3654e-06 gnorm: 1.21 [23:06:29< 1:23:36] +[titan] 2025-10-05 21:40:52,074 - root - INFO - step: 37730 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 21:40:52,075 - root - INFO - lr: 5.3638e-06 gnorm: 1.23 [23:06:40< 1:23:25] +[titan] 2025-10-05 21:41:02,927 - root - INFO - step: 37735 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 21:41:02,928 - root - INFO - lr: 5.3622e-06 gnorm: 1.24 [23:06:51< 1:23:14] +[titan] 2025-10-05 21:41:13,783 - root - INFO - step: 37740 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 21:41:13,783 - root - INFO - lr: 5.3607e-06 gnorm: 1.24 [23:07:02< 1:23:03] +[titan] 2025-10-05 21:41:24,647 - root - INFO - step: 37745 loss: 1.8905 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 21:41:24,647 - root - INFO - lr: 5.3591e-06 gnorm: 1.24 [23:07:12< 1:22:52] +[titan] 2025-10-05 21:41:33,361 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:41:35,546 - root - INFO - step: 37750 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 21:41:35,546 - root - INFO - lr: 5.3575e-06 gnorm: 1.24 [23:07:23< 1:22:41] +[titan] 2025-10-05 21:41:46,407 - root - INFO - step: 37755 loss: 1.8127 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6029 +[titan] 2025-10-05 21:41:46,407 - root - INFO - lr: 5.3559e-06 gnorm: 1.23 [23:07:34< 1:22:30] +[titan] 2025-10-05 21:41:57,261 - root - INFO - step: 37760 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 21:41:57,261 - root - INFO - lr: 5.3543e-06 gnorm: 1.27 [23:07:45< 1:22:19] +[titan] 2025-10-05 21:42:08,104 - root - INFO - step: 37765 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6996 +[titan] 2025-10-05 21:42:08,104 - root - INFO - lr: 5.3527e-06 gnorm: 1.22 [23:07:56< 1:22:08] +[titan] 2025-10-05 21:42:18,953 - root - INFO - step: 37770 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:42:18,954 - root - INFO - lr: 5.3512e-06 gnorm: 1.28 [23:08:07< 1:21:57] +[titan] 2025-10-05 21:42:29,811 - root - INFO - step: 37775 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 21:42:29,811 - root - INFO - lr: 5.3496e-06 gnorm: 1.30 [23:08:18< 1:21:46] +[titan] 2025-10-05 21:42:40,701 - root - INFO - step: 37780 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 21:42:40,701 - root - INFO - lr: 5.3480e-06 gnorm: 1.24 [23:08:28< 1:21:35] +[titan] 2025-10-05 21:42:51,568 - root - INFO - step: 37785 loss: 1.8503 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 21:42:51,568 - root - INFO - lr: 5.3465e-06 gnorm: 1.25 [23:08:39< 1:21:24] +[titan] 2025-10-05 21:43:02,441 - root - INFO - step: 37790 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 21:43:02,441 - root - INFO - lr: 5.3449e-06 gnorm: 1.29 [23:08:50< 1:21:13] +[titan] 2025-10-05 21:43:13,297 - root - INFO - step: 37795 loss: 1.9468 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7219 +[titan] 2025-10-05 21:43:13,297 - root - INFO - lr: 5.3434e-06 gnorm: 1.25 [23:09:01< 1:21:02] +[titan] 2025-10-05 21:43:21,968 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:43:24,171 - root - INFO - step: 37800 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 21:43:24,171 - root - INFO - lr: 5.3418e-06 gnorm: 1.23 [23:09:12< 1:20:51] +[titan] 2025-10-05 21:43:35,037 - root - INFO - step: 37805 loss: 1.9248 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 21:43:35,037 - root - INFO - lr: 5.3403e-06 gnorm: 1.25 [23:09:23< 1:20:40] +[titan] 2025-10-05 21:43:45,919 - root - INFO - step: 37810 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:43:45,919 - root - INFO - lr: 5.3387e-06 gnorm: 1.21 [23:09:34< 1:20:29] +[titan] 2025-10-05 21:43:56,805 - root - INFO - step: 37815 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 21:43:56,805 - root - INFO - lr: 5.3372e-06 gnorm: 1.27 [23:09:45< 1:20:18] +[titan] 2025-10-05 21:44:07,687 - root - INFO - step: 37820 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 21:44:07,687 - root - INFO - lr: 5.3356e-06 gnorm: 1.30 [23:09:55< 1:20:07] +[titan] 2025-10-05 21:44:18,545 - root - INFO - step: 37825 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:44:18,545 - root - INFO - lr: 5.3341e-06 gnorm: 1.27 [23:10:06< 1:19:56] +[titan] 2025-10-05 21:44:29,413 - root - INFO - step: 37830 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 21:44:29,413 - root - INFO - lr: 5.3326e-06 gnorm: 1.21 [23:10:17< 1:19:44] +[titan] 2025-10-05 21:44:40,283 - root - INFO - step: 37835 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 21:44:40,283 - root - INFO - lr: 5.3310e-06 gnorm: 1.22 [23:10:28< 1:19:33] +[titan] 2025-10-05 21:44:51,148 - root - INFO - step: 37840 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 21:44:51,149 - root - INFO - lr: 5.3295e-06 gnorm: 1.21 [23:10:39< 1:19:22] +[titan] 2025-10-05 21:45:02,046 - root - INFO - step: 37845 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:45:02,047 - root - INFO - lr: 5.3280e-06 gnorm: 1.25 [23:10:50< 1:19:11] +[titan] 2025-10-05 21:45:10,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:45:12,905 - root - INFO - step: 37850 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:45:12,905 - root - INFO - lr: 5.3265e-06 gnorm: 1.23 [23:11:01< 1:19:00] +[titan] 2025-10-05 21:45:23,773 - root - INFO - step: 37855 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6640 +[titan] 2025-10-05 21:45:23,773 - root - INFO - lr: 5.3250e-06 gnorm: 1.22 [23:11:12< 1:18:49] +[titan] 2025-10-05 21:45:34,638 - root - INFO - step: 37860 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7081 +[titan] 2025-10-05 21:45:34,638 - root - INFO - lr: 5.3235e-06 gnorm: 1.24 [23:11:22< 1:18:38] +[titan] 2025-10-05 21:45:45,491 - root - INFO - step: 37865 loss: 1.9514 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7254 +[titan] 2025-10-05 21:45:45,491 - root - INFO - lr: 5.3220e-06 gnorm: 1.24 [23:11:33< 1:18:27] +[titan] 2025-10-05 21:45:56,352 - root - INFO - step: 37870 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6506 +[titan] 2025-10-05 21:45:56,353 - root - INFO - lr: 5.3205e-06 gnorm: 1.21 [23:11:44< 1:18:16] +[titan] 2025-10-05 21:46:07,270 - root - INFO - step: 37875 loss: 1.9195 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 21:46:07,271 - root - INFO - lr: 5.3190e-06 gnorm: 1.24 [23:11:55< 1:18:05] +[titan] 2025-10-05 21:46:18,130 - root - INFO - step: 37880 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 21:46:18,131 - root - INFO - lr: 5.3175e-06 gnorm: 1.26 [23:12:06< 1:17:54] +[titan] 2025-10-05 21:46:29,081 - root - INFO - step: 37885 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:46:29,082 - root - INFO - lr: 5.3160e-06 gnorm: 1.22 [23:12:17< 1:17:43] +[titan] 2025-10-05 21:46:35,785 - root - INFO - Dumping profiler traces at step 37888 +[titan] 2025-10-05 21:46:35,824 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:46:40,200 - root - INFO - step: 37890 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 29,473 tflops: 408.89 mfu: 41.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 21:46:40,200 - root - INFO - lr: 5.3145e-06 gnorm: 1.28 [23:12:28< 1:17:32] +[titan] 2025-10-05 21:46:51,073 - root - INFO - step: 37895 loss: 1.9689 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 21:46:51,073 - root - INFO - lr: 5.3130e-06 gnorm: 1.23 [23:12:39< 1:17:21] +[titan] 2025-10-05 21:46:59,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:47:01,983 - root - INFO - step: 37900 loss: 1.9609 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:47:01,983 - root - INFO - lr: 5.3115e-06 gnorm: 1.24 [23:12:50< 1:17:10] +[titan] 2025-10-05 21:47:12,859 - root - INFO - step: 37905 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7007 +[titan] 2025-10-05 21:47:12,859 - root - INFO - lr: 5.3100e-06 gnorm: 1.27 [23:13:01< 1:16:59] +[titan] 2025-10-05 21:47:23,757 - root - INFO - step: 37910 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 21:47:23,757 - root - INFO - lr: 5.3086e-06 gnorm: 1.26 [23:13:12< 1:16:48] +[titan] 2025-10-05 21:47:34,635 - root - INFO - step: 37915 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6659 +[titan] 2025-10-05 21:47:34,635 - root - INFO - lr: 5.3071e-06 gnorm: 1.24 [23:13:22< 1:16:37] +[titan] 2025-10-05 21:47:45,522 - root - INFO - step: 37920 loss: 1.8835 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:47:45,522 - root - INFO - lr: 5.3056e-06 gnorm: 1.20 [23:13:33< 1:16:26] +[titan] 2025-10-05 21:47:56,386 - root - INFO - step: 37925 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 21:47:56,386 - root - INFO - lr: 5.3042e-06 gnorm: 1.28 [23:13:44< 1:16:15] +[titan] 2025-10-05 21:48:07,400 - root - INFO - step: 37930 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,753 tflops: 412.78 mfu: 41.74% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 21:48:07,400 - root - INFO - lr: 5.3027e-06 gnorm: 1.23 [23:13:55< 1:16:04] +[titan] 2025-10-05 21:48:18,249 - root - INFO - step: 37935 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 21:48:18,249 - root - INFO - lr: 5.3012e-06 gnorm: 1.24 [23:14:06< 1:15:53] +[titan] 2025-10-05 21:48:29,154 - root - INFO - step: 37940 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 21:48:29,154 - root - INFO - lr: 5.2998e-06 gnorm: 1.26 [23:14:17< 1:15:42] +[titan] 2025-10-05 21:48:40,024 - root - INFO - step: 37945 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6992 +[titan] 2025-10-05 21:48:40,024 - root - INFO - lr: 5.2983e-06 gnorm: 1.29 [23:14:28< 1:15:31] +[titan] 2025-10-05 21:48:48,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:48:50,876 - root - INFO - step: 37950 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:48:50,876 - root - INFO - lr: 5.2969e-06 gnorm: 1.28 [23:14:39< 1:15:20] +[titan] 2025-10-05 21:49:01,777 - root - INFO - step: 37955 loss: 1.9146 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:49:01,777 - root - INFO - lr: 5.2954e-06 gnorm: 1.23 [23:14:50< 1:15:09] +[titan] 2025-10-05 21:49:12,633 - root - INFO - step: 37960 loss: 1.9032 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:49:12,633 - root - INFO - lr: 5.2940e-06 gnorm: 1.25 [23:15:00< 1:14:58] +[titan] 2025-10-05 21:49:23,498 - root - INFO - step: 37965 loss: 1.8874 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 21:49:23,498 - root - INFO - lr: 5.2926e-06 gnorm: 1.21 [23:15:11< 1:14:47] +[titan] 2025-10-05 21:49:34,372 - root - INFO - step: 37970 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 21:49:34,372 - root - INFO - lr: 5.2911e-06 gnorm: 1.25 [23:15:22< 1:14:36] +[titan] 2025-10-05 21:49:45,244 - root - INFO - step: 37975 loss: 1.9350 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 21:49:45,244 - root - INFO - lr: 5.2897e-06 gnorm: 1.25 [23:15:33< 1:14:25] +[titan] 2025-10-05 21:49:56,122 - root - INFO - step: 37980 loss: 2.0219 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7886 +[titan] 2025-10-05 21:49:56,122 - root - INFO - lr: 5.2883e-06 gnorm: 1.31 [23:15:44< 1:14:14] +[titan] 2025-10-05 21:50:07,019 - root - INFO - step: 37985 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:50:07,019 - root - INFO - lr: 5.2869e-06 gnorm: 1.24 [23:15:55< 1:14:02] +[titan] 2025-10-05 21:50:17,884 - root - INFO - step: 37990 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 21:50:17,884 - root - INFO - lr: 5.2854e-06 gnorm: 1.22 [23:16:06< 1:13:51] +[titan] 2025-10-05 21:50:28,745 - root - INFO - step: 37995 loss: 1.8863 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 21:50:28,745 - root - INFO - lr: 5.2840e-06 gnorm: 1.21 [23:16:16< 1:13:40] +[titan] 2025-10-05 21:50:37,416 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:50:39,603 - root - INFO - step: 38000 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:50:39,603 - root - INFO - lr: 5.2826e-06 gnorm: 1.24 [23:16:27< 1:13:29] +[titan] 2025-10-05 21:50:50,499 - root - INFO - step: 38005 loss: 1.9446 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:50:50,499 - root - INFO - lr: 5.2812e-06 gnorm: 1.24 [23:16:38< 1:13:18] +[titan] 2025-10-05 21:51:01,361 - root - INFO - step: 38010 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 21:51:01,361 - root - INFO - lr: 5.2798e-06 gnorm: 1.25 [23:16:49< 1:13:07] +[titan] 2025-10-05 21:51:12,250 - root - INFO - step: 38015 loss: 1.9035 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 21:51:12,250 - root - INFO - lr: 5.2784e-06 gnorm: 1.23 [23:17:00< 1:12:56] +[titan] 2025-10-05 21:51:23,111 - root - INFO - step: 38020 loss: 1.9570 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 21:51:23,111 - root - INFO - lr: 5.2770e-06 gnorm: 1.26 [23:17:11< 1:12:45] +[titan] 2025-10-05 21:51:33,966 - root - INFO - step: 38025 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:51:33,966 - root - INFO - lr: 5.2756e-06 gnorm: 1.24 [23:17:22< 1:12:34] +[titan] 2025-10-05 21:51:44,841 - root - INFO - step: 38030 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6303 +[titan] 2025-10-05 21:51:44,841 - root - INFO - lr: 5.2742e-06 gnorm: 1.22 [23:17:33< 1:12:23] +[titan] 2025-10-05 21:51:55,747 - root - INFO - step: 38035 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 21:51:55,747 - root - INFO - lr: 5.2728e-06 gnorm: 1.23 [23:17:43< 1:12:12] +[titan] 2025-10-05 21:52:06,666 - root - INFO - step: 38040 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:52:06,666 - root - INFO - lr: 5.2714e-06 gnorm: 1.25 [23:17:54< 1:12:01] +[titan] 2025-10-05 21:52:17,555 - root - INFO - step: 38045 loss: 1.8640 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6482 +[titan] 2025-10-05 21:52:17,555 - root - INFO - lr: 5.2701e-06 gnorm: 1.25 [23:18:05< 1:11:50] +[titan] 2025-10-05 21:52:26,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:52:28,442 - root - INFO - step: 38050 loss: 1.8572 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6424 +[titan] 2025-10-05 21:52:28,442 - root - INFO - lr: 5.2687e-06 gnorm: 1.21 [23:18:16< 1:11:39] +[titan] 2025-10-05 21:52:39,324 - root - INFO - step: 38055 loss: 1.9652 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 21:52:39,325 - root - INFO - lr: 5.2673e-06 gnorm: 1.23 [23:18:27< 1:11:28] +[titan] 2025-10-05 21:52:50,189 - root - INFO - step: 38060 loss: 1.9568 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 21:52:50,189 - root - INFO - lr: 5.2659e-06 gnorm: 1.26 [23:18:38< 1:11:17] +[titan] 2025-10-05 21:53:01,061 - root - INFO - step: 38065 loss: 1.8871 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 21:53:01,061 - root - INFO - lr: 5.2646e-06 gnorm: 1.22 [23:18:49< 1:11:06] +[titan] 2025-10-05 21:53:12,017 - root - INFO - step: 38070 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 21:53:12,018 - root - INFO - lr: 5.2632e-06 gnorm: 1.21 [23:19:00< 1:10:55] +[titan] 2025-10-05 21:53:22,903 - root - INFO - step: 38075 loss: 1.8578 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6438 +[titan] 2025-10-05 21:53:22,903 - root - INFO - lr: 5.2619e-06 gnorm: 1.21 [23:19:11< 1:10:44] +[titan] 2025-10-05 21:53:33,778 - root - INFO - step: 38080 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:53:33,779 - root - INFO - lr: 5.2605e-06 gnorm: 1.25 [23:19:22< 1:10:33] +[titan] 2025-10-05 21:53:44,628 - root - INFO - step: 38085 loss: 1.9527 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:53:44,628 - root - INFO - lr: 5.2591e-06 gnorm: 1.23 [23:19:32< 1:10:22] +[titan] 2025-10-05 21:53:55,480 - root - INFO - step: 38090 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6835 +[titan] 2025-10-05 21:53:55,480 - root - INFO - lr: 5.2578e-06 gnorm: 1.21 [23:19:43< 1:10:11] +[titan] 2025-10-05 21:54:06,381 - root - INFO - step: 38095 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:54:06,381 - root - INFO - lr: 5.2565e-06 gnorm: 1.32 [23:19:54< 1:10:00] +[titan] 2025-10-05 21:54:15,086 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:54:17,264 - root - INFO - step: 38100 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:54:17,264 - root - INFO - lr: 5.2551e-06 gnorm: 1.21 [23:20:05< 1:09:49] +[titan] 2025-10-05 21:54:28,122 - root - INFO - step: 38105 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:54:28,123 - root - INFO - lr: 5.2538e-06 gnorm: 1.25 [23:20:16< 1:09:38] +[titan] 2025-10-05 21:54:38,982 - root - INFO - step: 38110 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:54:38,982 - root - INFO - lr: 5.2524e-06 gnorm: 1.30 [23:20:27< 1:09:27] +[titan] 2025-10-05 21:54:49,840 - root - INFO - step: 38115 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 21:54:49,840 - root - INFO - lr: 5.2511e-06 gnorm: 1.27 [23:20:38< 1:09:16] +[titan] 2025-10-05 21:55:00,693 - root - INFO - step: 38120 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:55:00,694 - root - INFO - lr: 5.2498e-06 gnorm: 1.22 [23:20:48< 1:09:05] +[titan] 2025-10-05 21:55:11,613 - root - INFO - step: 38125 loss: 1.8922 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:55:11,613 - root - INFO - lr: 5.2485e-06 gnorm: 1.22 [23:20:59< 1:08:54] +[titan] 2025-10-05 21:55:22,478 - root - INFO - step: 38130 loss: 1.8761 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6600 +[titan] 2025-10-05 21:55:22,478 - root - INFO - lr: 5.2471e-06 gnorm: 1.23 [23:21:10< 1:08:43] +[titan] 2025-10-05 21:55:33,363 - root - INFO - step: 38135 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7568 +[titan] 2025-10-05 21:55:33,364 - root - INFO - lr: 5.2458e-06 gnorm: 1.25 [23:21:21< 1:08:32] +[titan] 2025-10-05 21:55:44,229 - root - INFO - step: 38140 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 21:55:44,229 - root - INFO - lr: 5.2445e-06 gnorm: 1.25 [23:21:32< 1:08:20] +[titan] 2025-10-05 21:55:55,104 - root - INFO - step: 38145 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6732 +[titan] 2025-10-05 21:55:55,104 - root - INFO - lr: 5.2432e-06 gnorm: 1.23 [23:21:43< 1:08:09] +[titan] 2025-10-05 21:56:03,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:56:05,959 - root - INFO - step: 38150 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6637 +[titan] 2025-10-05 21:56:05,960 - root - INFO - lr: 5.2419e-06 gnorm: 1.28 [23:21:54< 1:07:58] +[titan] 2025-10-05 21:56:16,858 - root - INFO - step: 38155 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6893 +[titan] 2025-10-05 21:56:16,858 - root - INFO - lr: 5.2406e-06 gnorm: 1.23 [23:22:05< 1:07:47] +[titan] 2025-10-05 21:56:27,718 - root - INFO - step: 38160 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 21:56:27,719 - root - INFO - lr: 5.2393e-06 gnorm: 1.25 [23:22:15< 1:07:36] +[titan] 2025-10-05 21:56:38,596 - root - INFO - step: 38165 loss: 1.8754 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6582 +[titan] 2025-10-05 21:56:38,597 - root - INFO - lr: 5.2380e-06 gnorm: 1.20 [23:22:26< 1:07:25] +[titan] 2025-10-05 21:56:49,479 - root - INFO - step: 38170 loss: 1.9310 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 21:56:49,480 - root - INFO - lr: 5.2367e-06 gnorm: 1.22 [23:22:37< 1:07:14] +[titan] 2025-10-05 21:57:00,354 - root - INFO - step: 38175 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:57:00,354 - root - INFO - lr: 5.2354e-06 gnorm: 1.27 [23:22:48< 1:07:03] +[titan] 2025-10-05 21:57:11,246 - root - INFO - step: 38180 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 21:57:11,247 - root - INFO - lr: 5.2341e-06 gnorm: 1.25 [23:22:59< 1:06:52] +[titan] 2025-10-05 21:57:22,096 - root - INFO - step: 38185 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7581 +[titan] 2025-10-05 21:57:22,096 - root - INFO - lr: 5.2328e-06 gnorm: 1.27 [23:23:10< 1:06:41] +[titan] 2025-10-05 21:57:32,943 - root - INFO - step: 38190 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:57:32,943 - root - INFO - lr: 5.2316e-06 gnorm: 1.26 [23:23:21< 1:06:30] +[titan] 2025-10-05 21:57:43,812 - root - INFO - step: 38195 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 21:57:43,812 - root - INFO - lr: 5.2303e-06 gnorm: 1.23 [23:23:32< 1:06:19] +[titan] 2025-10-05 21:57:52,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:57:54,668 - root - INFO - step: 38200 loss: 1.9598 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7333 +[titan] 2025-10-05 21:57:54,668 - root - INFO - lr: 5.2290e-06 gnorm: 1.24 [23:23:42< 1:06:08] +[titan] 2025-10-05 21:58:05,542 - root - INFO - step: 38205 loss: 1.8481 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 21:58:05,543 - root - INFO - lr: 5.2277e-06 gnorm: 1.26 [23:23:53< 1:05:57] +[titan] 2025-10-05 21:58:16,438 - root - INFO - step: 38210 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7477 +[titan] 2025-10-05 21:58:16,438 - root - INFO - lr: 5.2265e-06 gnorm: 1.28 [23:24:04< 1:05:46] +[titan] 2025-10-05 21:58:27,285 - root - INFO - step: 38215 loss: 1.9355 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 21:58:27,285 - root - INFO - lr: 5.2252e-06 gnorm: 1.22 [23:24:15< 1:05:35] +[titan] 2025-10-05 21:58:38,133 - root - INFO - step: 38220 loss: 1.8546 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6405 +[titan] 2025-10-05 21:58:38,133 - root - INFO - lr: 5.2240e-06 gnorm: 1.23 [23:24:26< 1:05:24] +[titan] 2025-10-05 21:58:48,997 - root - INFO - step: 38225 loss: 1.8842 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6664 +[titan] 2025-10-05 21:58:48,997 - root - INFO - lr: 5.2227e-06 gnorm: 1.21 [23:24:37< 1:05:13] +[titan] 2025-10-05 21:58:59,888 - root - INFO - step: 38230 loss: 1.9848 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 21:58:59,888 - root - INFO - lr: 5.2214e-06 gnorm: 1.24 [23:24:48< 1:05:02] +[titan] 2025-10-05 21:59:10,888 - root - INFO - step: 38235 loss: 1.8777 memory: 118.84GiB(85.28%) tps: 29,791 tflops: 413.31 mfu: 41.79% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 21:59:10,888 - root - INFO - lr: 5.2202e-06 gnorm: 1.21 [23:24:59< 1:04:51] +[titan] 2025-10-05 21:59:21,732 - root - INFO - step: 38240 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6791 +[titan] 2025-10-05 21:59:21,732 - root - INFO - lr: 5.2190e-06 gnorm: 1.22 [23:25:09< 1:04:40] +[titan] 2025-10-05 21:59:32,592 - root - INFO - step: 38245 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 21:59:32,592 - root - INFO - lr: 5.2177e-06 gnorm: 1.26 [23:25:20< 1:04:29] +[titan] 2025-10-05 21:59:41,259 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:59:43,442 - root - INFO - step: 38250 loss: 1.8699 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6533 +[titan] 2025-10-05 21:59:43,442 - root - INFO - lr: 5.2165e-06 gnorm: 1.23 [23:25:31< 1:04:18] +[titan] 2025-10-05 21:59:54,302 - root - INFO - step: 38255 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:59:54,303 - root - INFO - lr: 5.2152e-06 gnorm: 1.28 [23:25:42< 1:04:07] +[titan] 2025-10-05 22:00:05,203 - root - INFO - step: 38260 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 22:00:05,203 - root - INFO - lr: 5.2140e-06 gnorm: 1.26 [23:25:53< 1:03:56] +[titan] 2025-10-05 22:00:16,075 - root - INFO - step: 38265 loss: 1.8744 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6580 +[titan] 2025-10-05 22:00:16,076 - root - INFO - lr: 5.2128e-06 gnorm: 1.25 [23:26:04< 1:03:45] +[titan] 2025-10-05 22:00:26,953 - root - INFO - step: 38270 loss: 1.9090 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:00:26,953 - root - INFO - lr: 5.2116e-06 gnorm: 1.26 [23:26:15< 1:03:34] +[titan] 2025-10-05 22:00:37,822 - root - INFO - step: 38275 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6897 +[titan] 2025-10-05 22:00:37,823 - root - INFO - lr: 5.2103e-06 gnorm: 1.27 [23:26:26< 1:03:23] +[titan] 2025-10-05 22:00:48,688 - root - INFO - step: 38280 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 22:00:48,688 - root - INFO - lr: 5.2091e-06 gnorm: 1.25 [23:26:36< 1:03:12] +[titan] 2025-10-05 22:00:59,558 - root - INFO - step: 38285 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 22:00:59,559 - root - INFO - lr: 5.2079e-06 gnorm: 1.24 [23:26:47< 1:03:01] +[titan] 2025-10-05 22:01:10,423 - root - INFO - step: 38290 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 22:01:10,423 - root - INFO - lr: 5.2067e-06 gnorm: 1.23 [23:26:58< 1:02:50] +[titan] 2025-10-05 22:01:21,364 - root - INFO - step: 38295 loss: 1.9718 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 22:01:21,364 - root - INFO - lr: 5.2055e-06 gnorm: 1.27 [23:27:09< 1:02:39] +[titan] 2025-10-05 22:01:30,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:01:32,234 - root - INFO - step: 38300 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7052 +[titan] 2025-10-05 22:01:32,234 - root - INFO - lr: 5.2043e-06 gnorm: 1.22 [23:27:20< 1:02:28] +[titan] 2025-10-05 22:01:43,099 - root - INFO - step: 38305 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 22:01:43,099 - root - INFO - lr: 5.2031e-06 gnorm: 1.24 [23:27:31< 1:02:16] +[titan] 2025-10-05 22:01:53,973 - root - INFO - step: 38310 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 22:01:53,973 - root - INFO - lr: 5.2019e-06 gnorm: 1.25 [23:27:42< 1:02:05] +[titan] 2025-10-05 22:02:04,844 - root - INFO - step: 38315 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 22:02:04,845 - root - INFO - lr: 5.2007e-06 gnorm: 1.28 [23:27:53< 1:01:54] +[titan] 2025-10-05 22:02:15,752 - root - INFO - step: 38320 loss: 1.9010 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6811 +[titan] 2025-10-05 22:02:15,752 - root - INFO - lr: 5.1995e-06 gnorm: 1.24 [23:28:03< 1:01:43] +[titan] 2025-10-05 22:02:26,644 - root - INFO - step: 38325 loss: 1.8521 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6383 +[titan] 2025-10-05 22:02:26,645 - root - INFO - lr: 5.1983e-06 gnorm: 1.27 [23:28:14< 1:01:32] +[titan] 2025-10-05 22:02:37,525 - root - INFO - step: 38330 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:02:37,525 - root - INFO - lr: 5.1972e-06 gnorm: 1.27 [23:28:25< 1:01:21] +[titan] 2025-10-05 22:02:48,403 - root - INFO - step: 38335 loss: 1.8947 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6753 +[titan] 2025-10-05 22:02:48,403 - root - INFO - lr: 5.1960e-06 gnorm: 1.31 [23:28:36< 1:01:10] +[titan] 2025-10-05 22:02:59,271 - root - INFO - step: 38340 loss: 1.8646 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 22:02:59,271 - root - INFO - lr: 5.1948e-06 gnorm: 1.22 [23:28:47< 1:00:59] +[titan] 2025-10-05 22:03:10,127 - root - INFO - step: 38345 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 22:03:10,127 - root - INFO - lr: 5.1936e-06 gnorm: 1.28 [23:28:58< 1:00:48] +[titan] 2025-10-05 22:03:18,835 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:03:21,025 - root - INFO - step: 38350 loss: 1.8758 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6588 +[titan] 2025-10-05 22:03:21,025 - root - INFO - lr: 5.1925e-06 gnorm: 1.22 [23:29:09< 1:00:37] +[titan] 2025-10-05 22:03:31,925 - root - INFO - step: 38355 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6874 +[titan] 2025-10-05 22:03:31,925 - root - INFO - lr: 5.1913e-06 gnorm: 1.20 [23:29:20< 1:00:26] +[titan] 2025-10-05 22:03:42,780 - root - INFO - step: 38360 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 22:03:42,780 - root - INFO - lr: 5.1902e-06 gnorm: 1.24 [23:29:30< 1:00:15] +[titan] 2025-10-05 22:03:53,638 - root - INFO - step: 38365 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 22:03:53,638 - root - INFO - lr: 5.1890e-06 gnorm: 1.25 [23:29:41< 1:00:04] +[titan] 2025-10-05 22:04:04,503 - root - INFO - step: 38370 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 22:04:04,503 - root - INFO - lr: 5.1878e-06 gnorm: 1.23 [23:29:52< 0:59:53] +[titan] 2025-10-05 22:04:15,408 - root - INFO - step: 38375 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 22:04:15,409 - root - INFO - lr: 5.1867e-06 gnorm: 1.24 [23:30:03< 0:59:42] +[titan] 2025-10-05 22:04:26,282 - root - INFO - step: 38380 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 22:04:26,282 - root - INFO - lr: 5.1856e-06 gnorm: 1.23 [23:30:14< 0:59:31] +[titan] 2025-10-05 22:04:37,152 - root - INFO - step: 38385 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6840 +[titan] 2025-10-05 22:04:37,153 - root - INFO - lr: 5.1844e-06 gnorm: 1.26 [23:30:25< 0:59:20] +[titan] 2025-10-05 22:04:48,030 - root - INFO - step: 38390 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:04:48,030 - root - INFO - lr: 5.1833e-06 gnorm: 1.26 [23:30:36< 0:59:09] +[titan] 2025-10-05 22:04:58,887 - root - INFO - step: 38395 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:04:58,887 - root - INFO - lr: 5.1821e-06 gnorm: 1.24 [23:30:47< 0:58:58] +[titan] 2025-10-05 22:05:07,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:05:09,829 - root - INFO - step: 38400 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 22:05:09,829 - root - INFO - lr: 5.1810e-06 gnorm: 1.25 [23:30:58< 0:58:47] +[titan] 2025-10-05 22:05:10,022 - root - INFO - Dumping profiler traces at step 38400 +[titan] 2025-10-05 22:05:10,064 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:05:20,979 - root - INFO - step: 38405 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 29,389 tflops: 407.73 mfu: 41.23% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 22:05:20,979 - root - INFO - lr: 5.1799e-06 gnorm: 1.21 [23:31:09< 0:58:36] +[titan] 2025-10-05 22:05:31,845 - root - INFO - step: 38410 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6711 +[titan] 2025-10-05 22:05:31,845 - root - INFO - lr: 5.1788e-06 gnorm: 1.25 [23:31:20< 0:58:25] +[titan] 2025-10-05 22:05:42,706 - root - INFO - step: 38415 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6821 +[titan] 2025-10-05 22:05:42,706 - root - INFO - lr: 5.1776e-06 gnorm: 1.25 [23:31:30< 0:58:14] +[titan] 2025-10-05 22:05:53,597 - root - INFO - step: 38420 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 22:05:53,598 - root - INFO - lr: 5.1765e-06 gnorm: 1.24 [23:31:41< 0:58:03] +[titan] 2025-10-05 22:06:04,473 - root - INFO - step: 38425 loss: 1.8931 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:06:04,473 - root - INFO - lr: 5.1754e-06 gnorm: 1.24 [23:31:52< 0:57:52] +[titan] 2025-10-05 22:06:15,341 - root - INFO - step: 38430 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:06:15,342 - root - INFO - lr: 5.1743e-06 gnorm: 1.28 [23:32:03< 0:57:41] +[titan] 2025-10-05 22:06:26,263 - root - INFO - step: 38435 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6607 +[titan] 2025-10-05 22:06:26,263 - root - INFO - lr: 5.1732e-06 gnorm: 1.21 [23:32:14< 0:57:30] +[titan] 2025-10-05 22:06:37,132 - root - INFO - step: 38440 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7207 +[titan] 2025-10-05 22:06:37,132 - root - INFO - lr: 5.1721e-06 gnorm: 1.24 [23:32:25< 0:57:19] +[titan] 2025-10-05 22:06:48,006 - root - INFO - step: 38445 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6765 +[titan] 2025-10-05 22:06:48,006 - root - INFO - lr: 5.1710e-06 gnorm: 1.26 [23:32:36< 0:57:08] +[titan] 2025-10-05 22:06:56,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:06:58,881 - root - INFO - step: 38450 loss: 1.9214 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:06:58,881 - root - INFO - lr: 5.1699e-06 gnorm: 1.25 [23:32:47< 0:56:57] +[titan] 2025-10-05 22:07:09,781 - root - INFO - step: 38455 loss: 1.9440 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:07:09,781 - root - INFO - lr: 5.1688e-06 gnorm: 1.24 [23:32:57< 0:56:46] +[titan] 2025-10-05 22:07:20,663 - root - INFO - step: 38460 loss: 1.8888 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6704 +[titan] 2025-10-05 22:07:20,663 - root - INFO - lr: 5.1677e-06 gnorm: 1.25 [23:33:08< 0:56:35] +[titan] 2025-10-05 22:07:31,515 - root - INFO - step: 38465 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:07:31,515 - root - INFO - lr: 5.1666e-06 gnorm: 1.27 [23:33:19< 0:56:24] +[titan] 2025-10-05 22:07:42,351 - root - INFO - step: 38470 loss: 1.8510 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2144 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 22:07:42,351 - root - INFO - lr: 5.1655e-06 gnorm: 1.22 [23:33:30< 0:56:13] +[titan] 2025-10-05 22:07:53,204 - root - INFO - step: 38475 loss: 1.9409 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7151 +[titan] 2025-10-05 22:07:53,204 - root - INFO - lr: 5.1645e-06 gnorm: 1.27 [23:33:41< 0:56:01] +[titan] 2025-10-05 22:08:04,067 - root - INFO - step: 38480 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 22:08:04,067 - root - INFO - lr: 5.1634e-06 gnorm: 1.21 [23:33:52< 0:55:50] +[titan] 2025-10-05 22:08:14,965 - root - INFO - step: 38485 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:08:14,965 - root - INFO - lr: 5.1623e-06 gnorm: 1.27 [23:34:03< 0:55:39] +[titan] 2025-10-05 22:08:25,908 - root - INFO - step: 38490 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.44 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:08:25,908 - root - INFO - lr: 5.1612e-06 gnorm: 1.26 [23:34:14< 0:55:28] +[titan] 2025-10-05 22:08:36,784 - root - INFO - step: 38495 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6699 +[titan] 2025-10-05 22:08:36,784 - root - INFO - lr: 5.1602e-06 gnorm: 1.32 [23:34:24< 0:55:17] +[titan] 2025-10-05 22:08:45,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:08:47,650 - root - INFO - step: 38500 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 22:08:47,650 - root - INFO - lr: 5.1591e-06 gnorm: 1.33 [23:34:35< 0:55:06] +[titan] 2025-10-05 22:08:58,527 - root - INFO - step: 38505 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 22:08:58,527 - root - INFO - lr: 5.1581e-06 gnorm: 1.28 [23:34:46< 0:54:55] +[titan] 2025-10-05 22:09:09,391 - root - INFO - step: 38510 loss: 1.9323 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:09:09,391 - root - INFO - lr: 5.1570e-06 gnorm: 1.28 [23:34:57< 0:54:44] +[titan] 2025-10-05 22:09:20,365 - root - INFO - step: 38515 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 29,861 tflops: 414.28 mfu: 41.89% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:09:20,365 - root - INFO - lr: 5.1560e-06 gnorm: 1.27 [23:35:08< 0:54:33] +[titan] 2025-10-05 22:09:31,218 - root - INFO - step: 38520 loss: 1.9315 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:09:31,219 - root - INFO - lr: 5.1549e-06 gnorm: 1.25 [23:35:19< 0:54:22] +[titan] 2025-10-05 22:09:42,070 - root - INFO - step: 38525 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6845 +[titan] 2025-10-05 22:09:42,071 - root - INFO - lr: 5.1539e-06 gnorm: 1.23 [23:35:30< 0:54:11] +[titan] 2025-10-05 22:09:52,922 - root - INFO - step: 38530 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 22:09:52,922 - root - INFO - lr: 5.1528e-06 gnorm: 1.26 [23:35:41< 0:54:00] +[titan] 2025-10-05 22:10:03,769 - root - INFO - step: 38535 loss: 1.9228 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:10:03,769 - root - INFO - lr: 5.1518e-06 gnorm: 1.25 [23:35:51< 0:53:49] +[titan] 2025-10-05 22:10:14,645 - root - INFO - step: 38540 loss: 1.9149 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6940 +[titan] 2025-10-05 22:10:14,645 - root - INFO - lr: 5.1508e-06 gnorm: 1.24 [23:36:02< 0:53:38] +[titan] 2025-10-05 22:10:25,531 - root - INFO - step: 38545 loss: 1.8971 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 22:10:25,532 - root - INFO - lr: 5.1497e-06 gnorm: 1.21 [23:36:13< 0:53:27] +[titan] 2025-10-05 22:10:34,231 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:10:36,410 - root - INFO - step: 38550 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 22:10:36,411 - root - INFO - lr: 5.1487e-06 gnorm: 1.26 [23:36:24< 0:53:16] +[titan] 2025-10-05 22:10:47,265 - root - INFO - step: 38555 loss: 1.9055 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 22:10:47,265 - root - INFO - lr: 5.1477e-06 gnorm: 1.25 [23:36:35< 0:53:05] +[titan] 2025-10-05 22:10:58,113 - root - INFO - step: 38560 loss: 1.8963 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 22:10:58,113 - root - INFO - lr: 5.1467e-06 gnorm: 1.28 [23:36:46< 0:52:54] +[titan] 2025-10-05 22:11:08,954 - root - INFO - step: 38565 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:11:08,954 - root - INFO - lr: 5.1456e-06 gnorm: 1.26 [23:36:57< 0:52:43] +[titan] 2025-10-05 22:11:19,804 - root - INFO - step: 38570 loss: 1.9003 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 22:11:19,805 - root - INFO - lr: 5.1446e-06 gnorm: 1.22 [23:37:07< 0:52:32] +[titan] 2025-10-05 22:11:30,699 - root - INFO - step: 38575 loss: 1.8708 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6535 +[titan] 2025-10-05 22:11:30,699 - root - INFO - lr: 5.1436e-06 gnorm: 1.25 [23:37:18< 0:52:21] +[titan] 2025-10-05 22:11:41,605 - root - INFO - step: 38580 loss: 1.9498 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 22:11:41,605 - root - INFO - lr: 5.1426e-06 gnorm: 1.26 [23:37:29< 0:52:10] +[titan] 2025-10-05 22:11:52,476 - root - INFO - step: 38585 loss: 1.8659 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2149 global_avg_mtp_loss: 1.6510 +[titan] 2025-10-05 22:11:52,476 - root - INFO - lr: 5.1416e-06 gnorm: 1.27 [23:37:40< 0:51:59] +[titan] 2025-10-05 22:12:03,366 - root - INFO - step: 38590 loss: 1.8820 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6644 +[titan] 2025-10-05 22:12:03,366 - root - INFO - lr: 5.1406e-06 gnorm: 1.30 [23:37:51< 0:51:48] +[titan] 2025-10-05 22:12:14,240 - root - INFO - step: 38595 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 22:12:14,241 - root - INFO - lr: 5.1396e-06 gnorm: 1.20 [23:38:02< 0:51:37] +[titan] 2025-10-05 22:12:22,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:12:25,159 - root - INFO - step: 38600 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6314 +[titan] 2025-10-05 22:12:25,159 - root - INFO - lr: 5.1386e-06 gnorm: 1.21 [23:38:13< 0:51:26] +[titan] 2025-10-05 22:12:36,019 - root - INFO - step: 38605 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 22:12:36,019 - root - INFO - lr: 5.1376e-06 gnorm: 1.24 [23:38:24< 0:51:15] +[titan] 2025-10-05 22:12:46,891 - root - INFO - step: 38610 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:12:46,891 - root - INFO - lr: 5.1367e-06 gnorm: 1.19 [23:38:35< 0:51:04] +[titan] 2025-10-05 22:12:57,808 - root - INFO - step: 38615 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 22:12:57,808 - root - INFO - lr: 5.1357e-06 gnorm: 1.29 [23:38:45< 0:50:53] +[titan] 2025-10-05 22:13:08,674 - root - INFO - step: 38620 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 22:13:08,674 - root - INFO - lr: 5.1347e-06 gnorm: 1.29 [23:38:56< 0:50:42] +[titan] 2025-10-05 22:13:19,536 - root - INFO - step: 38625 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 22:13:19,537 - root - INFO - lr: 5.1337e-06 gnorm: 1.26 [23:39:07< 0:50:31] +[titan] 2025-10-05 22:13:30,453 - root - INFO - step: 38630 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6688 +[titan] 2025-10-05 22:13:30,453 - root - INFO - lr: 5.1328e-06 gnorm: 1.24 [23:39:18< 0:50:20] +[titan] 2025-10-05 22:13:41,303 - root - INFO - step: 38635 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 22:13:41,303 - root - INFO - lr: 5.1318e-06 gnorm: 1.27 [23:39:29< 0:50:09] +[titan] 2025-10-05 22:13:52,138 - root - INFO - step: 38640 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 22:13:52,139 - root - INFO - lr: 5.1308e-06 gnorm: 1.25 [23:39:40< 0:49:58] +[titan] 2025-10-05 22:14:03,026 - root - INFO - step: 38645 loss: 1.8958 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6769 +[titan] 2025-10-05 22:14:03,026 - root - INFO - lr: 5.1299e-06 gnorm: 1.24 [23:39:51< 0:49:47] +[titan] 2025-10-05 22:14:11,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:14:13,845 - root - INFO - step: 38650 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6629 +[titan] 2025-10-05 22:14:13,845 - root - INFO - lr: 5.1289e-06 gnorm: 1.27 [23:40:02< 0:49:36] +[titan] 2025-10-05 22:14:24,687 - root - INFO - step: 38655 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 22:14:24,687 - root - INFO - lr: 5.1280e-06 gnorm: 1.32 [23:40:12< 0:49:24] +[titan] 2025-10-05 22:14:35,527 - root - INFO - step: 38660 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6957 +[titan] 2025-10-05 22:14:35,527 - root - INFO - lr: 5.1270e-06 gnorm: 1.28 [23:40:23< 0:49:13] +[titan] 2025-10-05 22:14:46,388 - root - INFO - step: 38665 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 22:14:46,388 - root - INFO - lr: 5.1261e-06 gnorm: 1.24 [23:40:34< 0:49:02] +[titan] 2025-10-05 22:14:57,230 - root - INFO - step: 38670 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 22:14:57,230 - root - INFO - lr: 5.1251e-06 gnorm: 1.25 [23:40:45< 0:48:51] +[titan] 2025-10-05 22:15:08,076 - root - INFO - step: 38675 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 22:15:08,076 - root - INFO - lr: 5.1242e-06 gnorm: 1.23 [23:40:56< 0:48:40] +[titan] 2025-10-05 22:15:18,905 - root - INFO - step: 38680 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 22:15:18,905 - root - INFO - lr: 5.1233e-06 gnorm: 1.24 [23:41:07< 0:48:29] +[titan] 2025-10-05 22:15:29,770 - root - INFO - step: 38685 loss: 1.8560 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6416 +[titan] 2025-10-05 22:15:29,770 - root - INFO - lr: 5.1223e-06 gnorm: 1.26 [23:41:17< 0:48:18] +[titan] 2025-10-05 22:15:40,605 - root - INFO - step: 38690 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 22:15:40,606 - root - INFO - lr: 5.1214e-06 gnorm: 1.26 [23:41:28< 0:48:07] +[titan] 2025-10-05 22:15:51,445 - root - INFO - step: 38695 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 22:15:51,445 - root - INFO - lr: 5.1205e-06 gnorm: 1.26 [23:41:39< 0:47:56] +[titan] 2025-10-05 22:16:00,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:16:02,303 - root - INFO - step: 38700 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6625 +[titan] 2025-10-05 22:16:02,303 - root - INFO - lr: 5.1195e-06 gnorm: 1.22 [23:41:50< 0:47:45] +[titan] 2025-10-05 22:16:13,157 - root - INFO - step: 38705 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 22:16:13,157 - root - INFO - lr: 5.1186e-06 gnorm: 1.24 [23:42:01< 0:47:34] +[titan] 2025-10-05 22:16:24,067 - root - INFO - step: 38710 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 22:16:24,067 - root - INFO - lr: 5.1177e-06 gnorm: 1.23 [23:42:12< 0:47:23] +[titan] 2025-10-05 22:16:34,977 - root - INFO - step: 38715 loss: 1.9159 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:16:34,977 - root - INFO - lr: 5.1168e-06 gnorm: 1.27 [23:42:23< 0:47:12] +[titan] 2025-10-05 22:16:45,845 - root - INFO - step: 38720 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 22:16:45,845 - root - INFO - lr: 5.1159e-06 gnorm: 1.26 [23:42:34< 0:47:01] +[titan] 2025-10-05 22:16:56,703 - root - INFO - step: 38725 loss: 1.8703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 22:16:56,703 - root - INFO - lr: 5.1150e-06 gnorm: 1.22 [23:42:44< 0:46:50] +[titan] 2025-10-05 22:17:07,552 - root - INFO - step: 38730 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6731 +[titan] 2025-10-05 22:17:07,553 - root - INFO - lr: 5.1141e-06 gnorm: 1.23 [23:42:55< 0:46:39] +[titan] 2025-10-05 22:17:18,393 - root - INFO - step: 38735 loss: 1.9710 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 22:17:18,394 - root - INFO - lr: 5.1132e-06 gnorm: 1.32 [23:43:06< 0:46:28] +[titan] 2025-10-05 22:17:29,314 - root - INFO - step: 38740 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 22:17:29,314 - root - INFO - lr: 5.1123e-06 gnorm: 1.24 [23:43:17< 0:46:17] +[titan] 2025-10-05 22:17:40,164 - root - INFO - step: 38745 loss: 1.8962 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6772 +[titan] 2025-10-05 22:17:40,164 - root - INFO - lr: 5.1114e-06 gnorm: 1.27 [23:43:28< 0:46:06] +[titan] 2025-10-05 22:17:48,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:17:51,020 - root - INFO - step: 38750 loss: 1.8652 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6492 +[titan] 2025-10-05 22:17:51,020 - root - INFO - lr: 5.1105e-06 gnorm: 1.30 [23:43:39< 0:45:55] +[titan] 2025-10-05 22:18:01,867 - root - INFO - step: 38755 loss: 1.8715 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6553 +[titan] 2025-10-05 22:18:01,867 - root - INFO - lr: 5.1097e-06 gnorm: 1.24 [23:43:50< 0:45:44] +[titan] 2025-10-05 22:18:12,725 - root - INFO - step: 38760 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 22:18:12,725 - root - INFO - lr: 5.1088e-06 gnorm: 1.25 [23:44:00< 0:45:33] +[titan] 2025-10-05 22:18:23,576 - root - INFO - step: 38765 loss: 1.9134 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 22:18:23,576 - root - INFO - lr: 5.1079e-06 gnorm: 1.24 [23:44:11< 0:45:22] +[titan] 2025-10-05 22:18:34,466 - root - INFO - step: 38770 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 22:18:34,466 - root - INFO - lr: 5.1070e-06 gnorm: 1.21 [23:44:22< 0:45:11] +[titan] 2025-10-05 22:18:45,359 - root - INFO - step: 38775 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 22:18:45,359 - root - INFO - lr: 5.1062e-06 gnorm: 1.22 [23:44:33< 0:45:00] +[titan] 2025-10-05 22:18:56,225 - root - INFO - step: 38780 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 22:18:56,225 - root - INFO - lr: 5.1053e-06 gnorm: 1.23 [23:44:44< 0:44:49] +[titan] 2025-10-05 22:19:07,063 - root - INFO - step: 38785 loss: 1.8911 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 22:19:07,063 - root - INFO - lr: 5.1044e-06 gnorm: 1.28 [23:44:55< 0:44:38] +[titan] 2025-10-05 22:19:17,908 - root - INFO - step: 38790 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 22:19:17,908 - root - INFO - lr: 5.1036e-06 gnorm: 1.27 [23:45:06< 0:44:27] +[titan] 2025-10-05 22:19:28,765 - root - INFO - step: 38795 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:19:28,765 - root - INFO - lr: 5.1027e-06 gnorm: 1.25 [23:45:16< 0:44:16] +[titan] 2025-10-05 22:19:37,452 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:19:39,646 - root - INFO - step: 38800 loss: 1.9199 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 22:19:39,646 - root - INFO - lr: 5.1019e-06 gnorm: 1.22 [23:45:27< 0:44:05] +[titan] 2025-10-05 22:19:50,541 - root - INFO - step: 38805 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 22:19:50,542 - root - INFO - lr: 5.1010e-06 gnorm: 1.25 [23:45:38< 0:43:54] +[titan] 2025-10-05 22:20:01,404 - root - INFO - step: 38810 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6756 +[titan] 2025-10-05 22:20:01,405 - root - INFO - lr: 5.1002e-06 gnorm: 1.25 [23:45:49< 0:43:43] +[titan] 2025-10-05 22:20:12,258 - root - INFO - step: 38815 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7061 +[titan] 2025-10-05 22:20:12,258 - root - INFO - lr: 5.0993e-06 gnorm: 1.33 [23:46:00< 0:43:32] +[titan] 2025-10-05 22:20:23,109 - root - INFO - step: 38820 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:20:23,109 - root - INFO - lr: 5.0985e-06 gnorm: 1.25 [23:46:11< 0:43:21] +[titan] 2025-10-05 22:20:33,977 - root - INFO - step: 38825 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6488 +[titan] 2025-10-05 22:20:33,977 - root - INFO - lr: 5.0977e-06 gnorm: 1.28 [23:46:22< 0:43:10] +[titan] 2025-10-05 22:20:44,821 - root - INFO - step: 38830 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6682 +[titan] 2025-10-05 22:20:44,822 - root - INFO - lr: 5.0969e-06 gnorm: 1.27 [23:46:32< 0:42:59] +[titan] 2025-10-05 22:20:55,718 - root - INFO - step: 38835 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 22:20:55,718 - root - INFO - lr: 5.0960e-06 gnorm: 1.26 [23:46:43< 0:42:48] +[titan] 2025-10-05 22:21:06,566 - root - INFO - step: 38840 loss: 1.9277 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 22:21:06,566 - root - INFO - lr: 5.0952e-06 gnorm: 1.27 [23:46:54< 0:42:36] +[titan] 2025-10-05 22:21:17,446 - root - INFO - step: 38845 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6385 +[titan] 2025-10-05 22:21:17,446 - root - INFO - lr: 5.0944e-06 gnorm: 1.24 [23:47:05< 0:42:25] +[titan] 2025-10-05 22:21:26,133 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:21:28,317 - root - INFO - step: 38850 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 22:21:28,317 - root - INFO - lr: 5.0936e-06 gnorm: 1.28 [23:47:16< 0:42:14] +[titan] 2025-10-05 22:21:39,188 - root - INFO - step: 38855 loss: 1.8571 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6436 +[titan] 2025-10-05 22:21:39,188 - root - INFO - lr: 5.0928e-06 gnorm: 1.25 [23:47:27< 0:42:03] +[titan] 2025-10-05 22:21:50,046 - root - INFO - step: 38860 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 22:21:50,047 - root - INFO - lr: 5.0920e-06 gnorm: 1.30 [23:47:38< 0:41:52] +[titan] 2025-10-05 22:22:00,909 - root - INFO - step: 38865 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:22:00,909 - root - INFO - lr: 5.0911e-06 gnorm: 1.28 [23:47:49< 0:41:41] +[titan] 2025-10-05 22:22:11,785 - root - INFO - step: 38870 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 22:22:11,786 - root - INFO - lr: 5.0903e-06 gnorm: 1.26 [23:47:59< 0:41:30] +[titan] 2025-10-05 22:22:22,628 - root - INFO - step: 38875 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7259 +[titan] 2025-10-05 22:22:22,628 - root - INFO - lr: 5.0895e-06 gnorm: 1.24 [23:48:10< 0:41:19] +[titan] 2025-10-05 22:22:33,500 - root - INFO - step: 38880 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 22:22:33,500 - root - INFO - lr: 5.0888e-06 gnorm: 1.26 [23:48:21< 0:41:08] +[titan] 2025-10-05 22:22:44,338 - root - INFO - step: 38885 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:22:44,338 - root - INFO - lr: 5.0880e-06 gnorm: 1.28 [23:48:32< 0:40:57] +[titan] 2025-10-05 22:22:55,187 - root - INFO - step: 38890 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6843 +[titan] 2025-10-05 22:22:55,187 - root - INFO - lr: 5.0872e-06 gnorm: 1.24 [23:48:43< 0:40:46] +[titan] 2025-10-05 22:23:06,026 - root - INFO - step: 38895 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:23:06,026 - root - INFO - lr: 5.0864e-06 gnorm: 1.26 [23:48:54< 0:40:35] +[titan] 2025-10-05 22:23:14,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:23:16,918 - root - INFO - step: 38900 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 22:23:16,918 - root - INFO - lr: 5.0856e-06 gnorm: 1.25 [23:49:05< 0:40:24] +[titan] 2025-10-05 22:23:27,768 - root - INFO - step: 38905 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 22:23:27,768 - root - INFO - lr: 5.0848e-06 gnorm: 1.28 [23:49:15< 0:40:13] +[titan] 2025-10-05 22:23:38,736 - root - INFO - step: 38910 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 22:23:38,736 - root - INFO - lr: 5.0841e-06 gnorm: 1.25 [23:49:26< 0:40:02] +[titan] 2025-10-05 22:23:43,291 - root - INFO - Dumping profiler traces at step 38912 +[titan] 2025-10-05 22:23:43,329 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:23:49,840 - root - INFO - step: 38915 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:23:49,840 - root - INFO - lr: 5.0833e-06 gnorm: 1.23 [23:49:37< 0:39:51] +[titan] 2025-10-05 22:24:00,685 - root - INFO - step: 38920 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 22:24:00,685 - root - INFO - lr: 5.0825e-06 gnorm: 1.24 [23:49:48< 0:39:40] +[titan] 2025-10-05 22:24:11,518 - root - INFO - step: 38925 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:24:11,518 - root - INFO - lr: 5.0818e-06 gnorm: 1.28 [23:49:59< 0:39:29] +[titan] 2025-10-05 22:24:22,383 - root - INFO - step: 38930 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:24:22,383 - root - INFO - lr: 5.0810e-06 gnorm: 1.22 [23:50:10< 0:39:18] +[titan] 2025-10-05 22:24:33,286 - root - INFO - step: 38935 loss: 1.9341 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 22:24:33,286 - root - INFO - lr: 5.0803e-06 gnorm: 1.25 [23:50:21< 0:39:07] +[titan] 2025-10-05 22:24:44,145 - root - INFO - step: 38940 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6642 +[titan] 2025-10-05 22:24:44,145 - root - INFO - lr: 5.0795e-06 gnorm: 1.33 [23:50:32< 0:38:56] +[titan] 2025-10-05 22:24:55,011 - root - INFO - step: 38945 loss: 1.8488 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6353 +[titan] 2025-10-05 22:24:55,011 - root - INFO - lr: 5.0788e-06 gnorm: 1.25 [23:50:43< 0:38:45] +[titan] 2025-10-05 22:25:03,688 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:25:05,861 - root - INFO - step: 38950 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 22:25:05,861 - root - INFO - lr: 5.0780e-06 gnorm: 1.26 [23:50:54< 0:38:34] +[titan] 2025-10-05 22:25:16,696 - root - INFO - step: 38955 loss: 1.8763 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6594 +[titan] 2025-10-05 22:25:16,696 - root - INFO - lr: 5.0773e-06 gnorm: 1.25 [23:51:04< 0:38:23] +[titan] 2025-10-05 22:25:27,557 - root - INFO - step: 38960 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:25:27,557 - root - INFO - lr: 5.0765e-06 gnorm: 1.25 [23:51:15< 0:38:12] +[titan] 2025-10-05 22:25:38,467 - root - INFO - step: 38965 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 22:25:38,467 - root - INFO - lr: 5.0758e-06 gnorm: 1.24 [23:51:26< 0:38:01] +[titan] 2025-10-05 22:25:49,317 - root - INFO - step: 38970 loss: 1.8769 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 22:25:49,317 - root - INFO - lr: 5.0751e-06 gnorm: 1.22 [23:51:37< 0:37:50] +[titan] 2025-10-05 22:26:00,183 - root - INFO - step: 38975 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 22:26:00,183 - root - INFO - lr: 5.0743e-06 gnorm: 1.28 [23:51:48< 0:37:39] +[titan] 2025-10-05 22:26:11,057 - root - INFO - step: 38980 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 22:26:11,057 - root - INFO - lr: 5.0736e-06 gnorm: 1.29 [23:51:59< 0:37:28] +[titan] 2025-10-05 22:26:21,891 - root - INFO - step: 38985 loss: 1.8837 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 22:26:21,891 - root - INFO - lr: 5.0729e-06 gnorm: 1.26 [23:52:10< 0:37:17] +[titan] 2025-10-05 22:26:32,761 - root - INFO - step: 38990 loss: 1.8936 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:26:32,761 - root - INFO - lr: 5.0722e-06 gnorm: 1.25 [23:52:20< 0:37:06] +[titan] 2025-10-05 22:26:43,668 - root - INFO - step: 38995 loss: 1.8343 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2124 global_avg_mtp_loss: 1.6219 +[titan] 2025-10-05 22:26:43,668 - root - INFO - lr: 5.0715e-06 gnorm: 1.22 [23:52:31< 0:36:55] +[titan] 2025-10-05 22:26:52,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:26:54,511 - root - INFO - step: 39000 loss: 1.8692 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6534 +[titan] 2025-10-05 22:26:54,511 - root - INFO - lr: 5.0708e-06 gnorm: 1.23 [23:52:42< 0:36:44] +[titan] 2025-10-05 22:27:05,357 - root - INFO - step: 39005 loss: 1.8448 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6317 +[titan] 2025-10-05 22:27:05,357 - root - INFO - lr: 5.0701e-06 gnorm: 1.24 [23:52:53< 0:36:33] +[titan] 2025-10-05 22:27:16,214 - root - INFO - step: 39010 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 22:27:16,214 - root - INFO - lr: 5.0694e-06 gnorm: 1.27 [23:53:04< 0:36:22] +[titan] 2025-10-05 22:27:27,027 - root - INFO - step: 39015 loss: 1.8935 memory: 118.84GiB(85.28%) tps: 30,304 tflops: 420.43 mfu: 42.51% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 22:27:27,028 - root - INFO - lr: 5.0687e-06 gnorm: 1.29 [23:53:15< 0:36:11] +[titan] 2025-10-05 22:27:37,873 - root - INFO - step: 39020 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 22:27:37,873 - root - INFO - lr: 5.0680e-06 gnorm: 1.25 [23:53:26< 0:36:00] +[titan] 2025-10-05 22:27:48,725 - root - INFO - step: 39025 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 22:27:48,725 - root - INFO - lr: 5.0673e-06 gnorm: 1.23 [23:53:36< 0:35:49] +[titan] 2025-10-05 22:27:59,585 - root - INFO - step: 39030 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7320 +[titan] 2025-10-05 22:27:59,585 - root - INFO - lr: 5.0666e-06 gnorm: 1.26 [23:53:47< 0:35:38] +[titan] 2025-10-05 22:28:10,411 - root - INFO - step: 39035 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:28:10,411 - root - INFO - lr: 5.0659e-06 gnorm: 1.29 [23:53:58< 0:35:26] +[titan] 2025-10-05 22:28:21,251 - root - INFO - step: 39040 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:28:21,251 - root - INFO - lr: 5.0652e-06 gnorm: 1.26 [23:54:09< 0:35:15] +[titan] 2025-10-05 22:28:32,077 - root - INFO - step: 39045 loss: 1.9016 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6812 +[titan] 2025-10-05 22:28:32,077 - root - INFO - lr: 5.0645e-06 gnorm: 1.24 [23:54:20< 0:35:04] +[titan] 2025-10-05 22:28:40,768 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:28:42,943 - root - INFO - step: 39050 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 22:28:42,943 - root - INFO - lr: 5.0639e-06 gnorm: 1.25 [23:54:31< 0:34:53] +[titan] 2025-10-05 22:28:53,779 - root - INFO - step: 39055 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 22:28:53,779 - root - INFO - lr: 5.0632e-06 gnorm: 1.27 [23:54:41< 0:34:42] +[titan] 2025-10-05 22:29:04,650 - root - INFO - step: 39060 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:29:04,650 - root - INFO - lr: 5.0625e-06 gnorm: 1.28 [23:54:52< 0:34:31] +[titan] 2025-10-05 22:29:15,481 - root - INFO - step: 39065 loss: 1.8892 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:29:15,481 - root - INFO - lr: 5.0619e-06 gnorm: 1.29 [23:55:03< 0:34:20] +[titan] 2025-10-05 22:29:26,319 - root - INFO - step: 39070 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7449 +[titan] 2025-10-05 22:29:26,319 - root - INFO - lr: 5.0612e-06 gnorm: 1.27 [23:55:14< 0:34:09] +[titan] 2025-10-05 22:29:37,169 - root - INFO - step: 39075 loss: 1.8711 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:29:37,169 - root - INFO - lr: 5.0606e-06 gnorm: 1.39 [23:55:25< 0:33:58] +[titan] 2025-10-05 22:29:47,983 - root - INFO - step: 39080 loss: 1.9585 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 22:29:47,983 - root - INFO - lr: 5.0599e-06 gnorm: 1.27 [23:55:36< 0:33:47] +[titan] 2025-10-05 22:29:58,811 - root - INFO - step: 39085 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 22:29:58,811 - root - INFO - lr: 5.0593e-06 gnorm: 1.28 [23:55:46< 0:33:36] +[titan] 2025-10-05 22:30:09,630 - root - INFO - step: 39090 loss: 1.8996 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:30:09,630 - root - INFO - lr: 5.0586e-06 gnorm: 1.26 [23:55:57< 0:33:25] +[titan] 2025-10-05 22:30:20,468 - root - INFO - step: 39095 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6810 +[titan] 2025-10-05 22:30:20,469 - root - INFO - lr: 5.0580e-06 gnorm: 1.24 [23:56:08< 0:33:14] +[titan] 2025-10-05 22:30:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:30:31,293 - root - INFO - step: 39100 loss: 1.9874 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:30:31,293 - root - INFO - lr: 5.0573e-06 gnorm: 1.34 [23:56:19< 0:33:03] +[titan] 2025-10-05 22:30:42,362 - root - INFO - step: 39105 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,605 tflops: 410.73 mfu: 41.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:30:42,362 - root - INFO - lr: 5.0567e-06 gnorm: 1.26 [23:56:30< 0:32:52] +[titan] 2025-10-05 22:30:53,217 - root - INFO - step: 39110 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2156 global_avg_mtp_loss: 1.6453 +[titan] 2025-10-05 22:30:53,217 - root - INFO - lr: 5.0561e-06 gnorm: 1.23 [23:56:41< 0:32:41] +[titan] 2025-10-05 22:31:04,043 - root - INFO - step: 39115 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 22:31:04,043 - root - INFO - lr: 5.0554e-06 gnorm: 1.23 [23:56:52< 0:32:30] +[titan] 2025-10-05 22:31:14,877 - root - INFO - step: 39120 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 22:31:14,877 - root - INFO - lr: 5.0548e-06 gnorm: 1.28 [23:57:03< 0:32:19] +[titan] 2025-10-05 22:31:25,759 - root - INFO - step: 39125 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 22:31:25,759 - root - INFO - lr: 5.0542e-06 gnorm: 1.27 [23:57:13< 0:32:08] +[titan] 2025-10-05 22:31:36,579 - root - INFO - step: 39130 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 22:31:36,579 - root - INFO - lr: 5.0536e-06 gnorm: 1.28 [23:57:24< 0:31:57] +[titan] 2025-10-05 22:31:47,420 - root - INFO - step: 39135 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7389 +[titan] 2025-10-05 22:31:47,420 - root - INFO - lr: 5.0530e-06 gnorm: 1.29 [23:57:35< 0:31:46] +[titan] 2025-10-05 22:31:58,260 - root - INFO - step: 39140 loss: 1.9505 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7258 +[titan] 2025-10-05 22:31:58,260 - root - INFO - lr: 5.0523e-06 gnorm: 1.27 [23:57:46< 0:31:35] +[titan] 2025-10-05 22:32:09,071 - root - INFO - step: 39145 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 22:32:09,071 - root - INFO - lr: 5.0517e-06 gnorm: 1.24 [23:57:57< 0:31:24] +[titan] 2025-10-05 22:32:17,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:32:19,885 - root - INFO - step: 39150 loss: 1.8924 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:32:19,885 - root - INFO - lr: 5.0511e-06 gnorm: 1.26 [23:58:08< 0:31:13] +[titan] 2025-10-05 22:32:30,741 - root - INFO - step: 39155 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:32:30,741 - root - INFO - lr: 5.0505e-06 gnorm: 1.26 [23:58:18< 0:31:02] +[titan] 2025-10-05 22:32:41,618 - root - INFO - step: 39160 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 22:32:41,618 - root - INFO - lr: 5.0499e-06 gnorm: 1.27 [23:58:29< 0:30:51] +[titan] 2025-10-05 22:32:52,420 - root - INFO - step: 39165 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,335 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7041 +[titan] 2025-10-05 22:32:52,421 - root - INFO - lr: 5.0493e-06 gnorm: 1.31 [23:58:40< 0:30:40] +[titan] 2025-10-05 22:33:03,241 - root - INFO - step: 39170 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6786 +[titan] 2025-10-05 22:33:03,241 - root - INFO - lr: 5.0488e-06 gnorm: 1.28 [23:58:51< 0:30:29] +[titan] 2025-10-05 22:33:14,059 - root - INFO - step: 39175 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:33:14,059 - root - INFO - lr: 5.0482e-06 gnorm: 1.25 [23:59:02< 0:30:18] +[titan] 2025-10-05 22:33:24,854 - root - INFO - step: 39180 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,356 tflops: 421.14 mfu: 42.58% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7066 +[titan] 2025-10-05 22:33:24,854 - root - INFO - lr: 5.0476e-06 gnorm: 1.25 [23:59:12< 0:30:07] +[titan] 2025-10-05 22:33:35,698 - root - INFO - step: 39185 loss: 1.8822 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6646 +[titan] 2025-10-05 22:33:35,698 - root - INFO - lr: 5.0470e-06 gnorm: 1.24 [23:59:23< 0:29:56] +[titan] 2025-10-05 22:33:46,541 - root - INFO - step: 39190 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 22:33:46,542 - root - INFO - lr: 5.0464e-06 gnorm: 1.26 [23:59:34< 0:29:45] +[titan] 2025-10-05 22:33:57,343 - root - INFO - step: 39195 loss: 1.8734 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6567 +[titan] 2025-10-05 22:33:57,343 - root - INFO - lr: 5.0459e-06 gnorm: 1.26 [23:59:45< 0:29:34] +[titan] 2025-10-05 22:34:05,988 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:34:08,160 - root - INFO - step: 39200 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6909 +[titan] 2025-10-05 22:34:08,161 - root - INFO - lr: 5.0453e-06 gnorm: 1.24 [23:59:56< 0:29:23] +[titan] 2025-10-05 22:34:18,971 - root - INFO - step: 39205 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,312 tflops: 420.53 mfu: 42.52% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:34:18,971 - root - INFO - lr: 5.0447e-06 gnorm: 1.27 [1 day, 0:00:07< 0:29:12] +[titan] 2025-10-05 22:34:29,800 - root - INFO - step: 39210 loss: 1.8480 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 22:34:29,800 - root - INFO - lr: 5.0442e-06 gnorm: 1.20 [1 day, 0:00:17< 0:29:01] +[titan] 2025-10-05 22:34:40,603 - root - INFO - step: 39215 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 22:34:40,603 - root - INFO - lr: 5.0436e-06 gnorm: 1.28 [1 day, 0:00:28< 0:28:50] +[titan] 2025-10-05 22:34:51,467 - root - INFO - step: 39220 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6993 +[titan] 2025-10-05 22:34:51,467 - root - INFO - lr: 5.0431e-06 gnorm: 1.25 [1 day, 0:00:39< 0:28:39] +[titan] 2025-10-05 22:35:02,300 - root - INFO - step: 39225 loss: 1.9143 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 22:35:02,300 - root - INFO - lr: 5.0425e-06 gnorm: 1.26 [1 day, 0:00:50< 0:28:28] +[titan] 2025-10-05 22:35:13,119 - root - INFO - step: 39230 loss: 1.8713 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6548 +[titan] 2025-10-05 22:35:13,119 - root - INFO - lr: 5.0420e-06 gnorm: 1.29 [1 day, 0:01:01< 0:28:17] +[titan] 2025-10-05 22:35:23,922 - root - INFO - step: 39235 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.83 mfu: 42.55% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6983 +[titan] 2025-10-05 22:35:23,922 - root - INFO - lr: 5.0414e-06 gnorm: 1.26 [1 day, 0:01:12< 0:28:06] +[titan] 2025-10-05 22:35:34,763 - root - INFO - step: 39240 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 22:35:34,763 - root - INFO - lr: 5.0409e-06 gnorm: 1.28 [1 day, 0:01:22< 0:27:54] +[titan] 2025-10-05 22:35:45,607 - root - INFO - step: 39245 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 22:35:45,608 - root - INFO - lr: 5.0403e-06 gnorm: 1.28 [1 day, 0:01:33< 0:27:43] +[titan] 2025-10-05 22:35:54,248 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:35:56,420 - root - INFO - step: 39250 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 22:35:56,420 - root - INFO - lr: 5.0398e-06 gnorm: 1.22 [1 day, 0:01:44< 0:27:32] +[titan] 2025-10-05 22:36:07,280 - root - INFO - step: 39255 loss: 1.8902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6708 +[titan] 2025-10-05 22:36:07,280 - root - INFO - lr: 5.0393e-06 gnorm: 1.24 [1 day, 0:01:55< 0:27:21] +[titan] 2025-10-05 22:36:18,098 - root - INFO - step: 39260 loss: 1.9171 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:36:18,098 - root - INFO - lr: 5.0388e-06 gnorm: 1.26 [1 day, 0:02:06< 0:27:10] +[titan] 2025-10-05 22:36:28,912 - root - INFO - step: 39265 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6622 +[titan] 2025-10-05 22:36:28,912 - root - INFO - lr: 5.0382e-06 gnorm: 1.27 [1 day, 0:02:17< 0:26:59] +[titan] 2025-10-05 22:36:39,738 - root - INFO - step: 39270 loss: 1.8621 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6463 +[titan] 2025-10-05 22:36:39,739 - root - INFO - lr: 5.0377e-06 gnorm: 1.26 [1 day, 0:02:27< 0:26:48] +[titan] 2025-10-05 22:36:50,600 - root - INFO - step: 39275 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 22:36:50,600 - root - INFO - lr: 5.0372e-06 gnorm: 1.28 [1 day, 0:02:38< 0:26:37] +[titan] 2025-10-05 22:37:01,420 - root - INFO - step: 39280 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6663 +[titan] 2025-10-05 22:37:01,421 - root - INFO - lr: 5.0367e-06 gnorm: 1.20 [1 day, 0:02:49< 0:26:26] +[titan] 2025-10-05 22:37:12,238 - root - INFO - step: 39285 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 22:37:12,238 - root - INFO - lr: 5.0362e-06 gnorm: 1.27 [1 day, 0:03:00< 0:26:15] +[titan] 2025-10-05 22:37:23,040 - root - INFO - step: 39290 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 22:37:23,040 - root - INFO - lr: 5.0357e-06 gnorm: 1.26 [1 day, 0:03:11< 0:26:04] +[titan] 2025-10-05 22:37:33,840 - root - INFO - step: 39295 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:37:33,840 - root - INFO - lr: 5.0352e-06 gnorm: 1.36 [1 day, 0:03:21< 0:25:53] +[titan] 2025-10-05 22:37:42,487 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:37:44,699 - root - INFO - step: 39300 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6904 +[titan] 2025-10-05 22:37:44,699 - root - INFO - lr: 5.0347e-06 gnorm: 1.25 [1 day, 0:03:32< 0:25:42] +[titan] 2025-10-05 22:37:55,513 - root - INFO - step: 39305 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 22:37:55,513 - root - INFO - lr: 5.0342e-06 gnorm: 1.29 [1 day, 0:03:43< 0:25:31] +[titan] 2025-10-05 22:38:06,319 - root - INFO - step: 39310 loss: 1.8070 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2093 global_avg_mtp_loss: 1.5976 +[titan] 2025-10-05 22:38:06,319 - root - INFO - lr: 5.0337e-06 gnorm: 1.22 [1 day, 0:03:54< 0:25:20] +[titan] 2025-10-05 22:38:17,140 - root - INFO - step: 39315 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 22:38:17,140 - root - INFO - lr: 5.0332e-06 gnorm: 1.28 [1 day, 0:04:05< 0:25:09] +[titan] 2025-10-05 22:38:27,940 - root - INFO - step: 39320 loss: 1.8952 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:38:27,940 - root - INFO - lr: 5.0327e-06 gnorm: 1.24 [1 day, 0:04:16< 0:24:58] +[titan] 2025-10-05 22:38:38,794 - root - INFO - step: 39325 loss: 1.8206 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2106 global_avg_mtp_loss: 1.6101 +[titan] 2025-10-05 22:38:38,795 - root - INFO - lr: 5.0323e-06 gnorm: 1.23 [1 day, 0:04:26< 0:24:47] +[titan] 2025-10-05 22:38:49,643 - root - INFO - step: 39330 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7124 +[titan] 2025-10-05 22:38:49,643 - root - INFO - lr: 5.0318e-06 gnorm: 1.29 [1 day, 0:04:37< 0:24:36] +[titan] 2025-10-05 22:39:00,463 - root - INFO - step: 39335 loss: 1.9117 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:39:00,464 - root - INFO - lr: 5.0313e-06 gnorm: 1.25 [1 day, 0:04:48< 0:24:25] +[titan] 2025-10-05 22:39:11,289 - root - INFO - step: 39340 loss: 1.8200 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2105 global_avg_mtp_loss: 1.6095 +[titan] 2025-10-05 22:39:11,289 - root - INFO - lr: 5.0308e-06 gnorm: 1.26 [1 day, 0:04:59< 0:24:14] +[titan] 2025-10-05 22:39:22,114 - root - INFO - step: 39345 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:39:22,114 - root - INFO - lr: 5.0304e-06 gnorm: 1.27 [1 day, 0:05:10< 0:24:03] +[titan] 2025-10-05 22:39:30,839 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:39:33,015 - root - INFO - step: 39350 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 22:39:33,015 - root - INFO - lr: 5.0299e-06 gnorm: 1.24 [1 day, 0:05:21< 0:23:52] +[titan] 2025-10-05 22:39:43,880 - root - INFO - step: 39355 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 22:39:43,880 - root - INFO - lr: 5.0294e-06 gnorm: 1.21 [1 day, 0:05:31< 0:23:41] +[titan] 2025-10-05 22:39:54,779 - root - INFO - step: 39360 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2123 global_avg_mtp_loss: 1.6258 +[titan] 2025-10-05 22:39:54,779 - root - INFO - lr: 5.0290e-06 gnorm: 1.26 [1 day, 0:05:42< 0:23:30] +[titan] 2025-10-05 22:40:05,616 - root - INFO - step: 39365 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.42% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 22:40:05,617 - root - INFO - lr: 5.0285e-06 gnorm: 1.22 [1 day, 0:05:53< 0:23:19] +[titan] 2025-10-05 22:40:16,459 - root - INFO - step: 39370 loss: 1.8828 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:40:16,459 - root - INFO - lr: 5.0281e-06 gnorm: 1.23 [1 day, 0:06:04< 0:23:08] +[titan] 2025-10-05 22:40:27,280 - root - INFO - step: 39375 loss: 1.9073 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 22:40:27,280 - root - INFO - lr: 5.0277e-06 gnorm: 1.28 [1 day, 0:06:15< 0:22:57] +[titan] 2025-10-05 22:40:38,119 - root - INFO - step: 39380 loss: 1.9206 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:40:38,119 - root - INFO - lr: 5.0272e-06 gnorm: 1.23 [1 day, 0:06:26< 0:22:46] +[titan] 2025-10-05 22:40:49,118 - root - INFO - step: 39385 loss: 1.9186 memory: 118.84GiB(85.28%) tps: 29,794 tflops: 413.35 mfu: 41.79% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6970 +[titan] 2025-10-05 22:40:49,118 - root - INFO - lr: 5.0268e-06 gnorm: 1.25 [1 day, 0:06:37< 0:22:35] +[titan] 2025-10-05 22:40:59,990 - root - INFO - step: 39390 loss: 1.9410 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 22:40:59,990 - root - INFO - lr: 5.0263e-06 gnorm: 1.30 [1 day, 0:06:48< 0:22:24] +[titan] 2025-10-05 22:41:10,818 - root - INFO - step: 39395 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6520 +[titan] 2025-10-05 22:41:10,818 - root - INFO - lr: 5.0259e-06 gnorm: 1.23 [1 day, 0:06:58< 0:22:13] +[titan] 2025-10-05 22:41:19,492 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:41:21,663 - root - INFO - step: 39400 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:41:21,663 - root - INFO - lr: 5.0255e-06 gnorm: 1.24 [1 day, 0:07:09< 0:22:02] +[titan] 2025-10-05 22:41:32,499 - root - INFO - step: 39405 loss: 1.8950 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:41:32,500 - root - INFO - lr: 5.0251e-06 gnorm: 1.26 [1 day, 0:07:20< 0:21:51] +[titan] 2025-10-05 22:41:43,338 - root - INFO - step: 39410 loss: 1.9067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:41:43,338 - root - INFO - lr: 5.0246e-06 gnorm: 1.25 [1 day, 0:07:31< 0:21:40] +[titan] 2025-10-05 22:41:54,271 - root - INFO - step: 39415 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 22:41:54,271 - root - INFO - lr: 5.0242e-06 gnorm: 1.26 [1 day, 0:07:42< 0:21:29] +[titan] 2025-10-05 22:42:05,121 - root - INFO - step: 39420 loss: 1.8925 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6733 +[titan] 2025-10-05 22:42:05,121 - root - INFO - lr: 5.0238e-06 gnorm: 1.27 [1 day, 0:07:53< 0:21:18] +[titan] 2025-10-05 22:42:14,073 - root - INFO - Dumping profiler traces at step 39424 +[titan] 2025-10-05 22:42:14,110 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:42:16,301 - root - INFO - step: 39425 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 29,310 tflops: 406.64 mfu: 41.12% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 22:42:16,301 - root - INFO - lr: 5.0234e-06 gnorm: 1.27 [1 day, 0:08:04< 0:21:07] +[titan] 2025-10-05 22:42:27,154 - root - INFO - step: 39430 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 22:42:27,155 - root - INFO - lr: 5.0230e-06 gnorm: 1.25 [1 day, 0:08:15< 0:20:56] +[titan] 2025-10-05 22:42:37,960 - root - INFO - step: 39435 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 22:42:37,960 - root - INFO - lr: 5.0226e-06 gnorm: 1.27 [1 day, 0:08:26< 0:20:45] +[titan] 2025-10-05 22:42:48,874 - root - INFO - step: 39440 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:42:48,874 - root - INFO - lr: 5.0222e-06 gnorm: 1.24 [1 day, 0:08:36< 0:20:34] +[titan] 2025-10-05 22:42:59,759 - root - INFO - step: 39445 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 22:42:59,759 - root - INFO - lr: 5.0218e-06 gnorm: 1.26 [1 day, 0:08:47< 0:20:23] +[titan] 2025-10-05 22:43:08,434 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:43:10,640 - root - INFO - step: 39450 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 22:43:10,641 - root - INFO - lr: 5.0214e-06 gnorm: 1.28 [1 day, 0:08:58< 0:20:12] +[titan] 2025-10-05 22:43:21,477 - root - INFO - step: 39455 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:43:21,478 - root - INFO - lr: 5.0210e-06 gnorm: 1.32 [1 day, 0:09:09< 0:20:01] +[titan] 2025-10-05 22:43:32,319 - root - INFO - step: 39460 loss: 1.9474 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7223 +[titan] 2025-10-05 22:43:32,319 - root - INFO - lr: 5.0206e-06 gnorm: 1.30 [1 day, 0:09:20< 0:19:50] +[titan] 2025-10-05 22:43:43,178 - root - INFO - step: 39465 loss: 1.8880 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6701 +[titan] 2025-10-05 22:43:43,178 - root - INFO - lr: 5.0203e-06 gnorm: 1.28 [1 day, 0:09:31< 0:19:39] +[titan] 2025-10-05 22:43:54,102 - root - INFO - step: 39470 loss: 1.8901 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6714 +[titan] 2025-10-05 22:43:54,102 - root - INFO - lr: 5.0199e-06 gnorm: 1.25 [1 day, 0:09:42< 0:19:27] +[titan] 2025-10-05 22:44:04,938 - root - INFO - step: 39475 loss: 1.8656 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6497 +[titan] 2025-10-05 22:44:04,938 - root - INFO - lr: 5.0195e-06 gnorm: 1.23 [1 day, 0:09:53< 0:19:16] +[titan] 2025-10-05 22:44:15,757 - root - INFO - step: 39480 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 22:44:15,757 - root - INFO - lr: 5.0191e-06 gnorm: 1.27 [1 day, 0:10:03< 0:19:05] +[titan] 2025-10-05 22:44:26,640 - root - INFO - step: 39485 loss: 1.8523 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6375 +[titan] 2025-10-05 22:44:26,640 - root - INFO - lr: 5.0188e-06 gnorm: 1.23 [1 day, 0:10:14< 0:18:54] +[titan] 2025-10-05 22:44:37,455 - root - INFO - step: 39490 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6979 +[titan] 2025-10-05 22:44:37,455 - root - INFO - lr: 5.0184e-06 gnorm: 1.26 [1 day, 0:10:25< 0:18:43] +[titan] 2025-10-05 22:44:48,278 - root - INFO - step: 39495 loss: 1.8271 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2112 global_avg_mtp_loss: 1.6159 +[titan] 2025-10-05 22:44:48,279 - root - INFO - lr: 5.0181e-06 gnorm: 1.24 [1 day, 0:10:36< 0:18:32] +[titan] 2025-10-05 22:44:57,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:44:59,302 - root - INFO - step: 39500 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 29,726 tflops: 412.41 mfu: 41.70% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:44:59,302 - root - INFO - lr: 5.0177e-06 gnorm: 1.26 [1 day, 0:10:47< 0:18:21] +[titan] 2025-10-05 22:45:10,138 - root - INFO - step: 39505 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:45:10,138 - root - INFO - lr: 5.0173e-06 gnorm: 1.25 [1 day, 0:10:58< 0:18:10] +[titan] 2025-10-05 22:45:20,973 - root - INFO - step: 39510 loss: 1.9394 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 22:45:20,973 - root - INFO - lr: 5.0170e-06 gnorm: 1.26 [1 day, 0:11:09< 0:17:59] +[titan] 2025-10-05 22:45:31,857 - root - INFO - step: 39515 loss: 1.8345 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6215 +[titan] 2025-10-05 22:45:31,857 - root - INFO - lr: 5.0167e-06 gnorm: 1.32 [1 day, 0:11:19< 0:17:48] +[titan] 2025-10-05 22:45:42,715 - root - INFO - step: 39520 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 22:45:42,715 - root - INFO - lr: 5.0163e-06 gnorm: 1.30 [1 day, 0:11:30< 0:17:37] +[titan] 2025-10-05 22:45:53,618 - root - INFO - step: 39525 loss: 1.8642 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 22:45:53,618 - root - INFO - lr: 5.0160e-06 gnorm: 1.23 [1 day, 0:11:41< 0:17:26] +[titan] 2025-10-05 22:46:04,478 - root - INFO - step: 39530 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:46:04,478 - root - INFO - lr: 5.0156e-06 gnorm: 1.28 [1 day, 0:11:52< 0:17:15] +[titan] 2025-10-05 22:46:15,353 - root - INFO - step: 39535 loss: 1.8455 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6312 +[titan] 2025-10-05 22:46:15,353 - root - INFO - lr: 5.0153e-06 gnorm: 1.27 [1 day, 0:12:03< 0:17:04] +[titan] 2025-10-05 22:46:26,197 - root - INFO - step: 39540 loss: 1.8853 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 22:46:26,197 - root - INFO - lr: 5.0150e-06 gnorm: 1.25 [1 day, 0:12:14< 0:16:53] +[titan] 2025-10-05 22:46:37,052 - root - INFO - step: 39545 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 22:46:37,052 - root - INFO - lr: 5.0147e-06 gnorm: 1.30 [1 day, 0:12:25< 0:16:42] +[titan] 2025-10-05 22:46:45,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:46:47,965 - root - INFO - step: 39550 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 22:46:47,965 - root - INFO - lr: 5.0143e-06 gnorm: 1.34 [1 day, 0:12:36< 0:16:31] +[titan] 2025-10-05 22:46:58,895 - root - INFO - step: 39555 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6838 +[titan] 2025-10-05 22:46:58,895 - root - INFO - lr: 5.0140e-06 gnorm: 1.24 [1 day, 0:12:46< 0:16:20] +[titan] 2025-10-05 22:47:09,746 - root - INFO - step: 39560 loss: 1.9366 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7128 +[titan] 2025-10-05 22:47:09,746 - root - INFO - lr: 5.0137e-06 gnorm: 1.22 [1 day, 0:12:57< 0:16:09] +[titan] 2025-10-05 22:47:20,590 - root - INFO - step: 39565 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 22:47:20,590 - root - INFO - lr: 5.0134e-06 gnorm: 1.26 [1 day, 0:13:08< 0:15:58] +[titan] 2025-10-05 22:47:31,450 - root - INFO - step: 39570 loss: 1.8471 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6330 +[titan] 2025-10-05 22:47:31,450 - root - INFO - lr: 5.0131e-06 gnorm: 1.24 [1 day, 0:13:19< 0:15:47] +[titan] 2025-10-05 22:47:42,294 - root - INFO - step: 39575 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:47:42,294 - root - INFO - lr: 5.0128e-06 gnorm: 1.26 [1 day, 0:13:30< 0:15:36] +[titan] 2025-10-05 22:47:53,185 - root - INFO - step: 39580 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 22:47:53,186 - root - INFO - lr: 5.0125e-06 gnorm: 1.24 [1 day, 0:13:41< 0:15:25] +[titan] 2025-10-05 22:48:04,125 - root - INFO - step: 39585 loss: 1.8977 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6775 +[titan] 2025-10-05 22:48:04,125 - root - INFO - lr: 5.0122e-06 gnorm: 1.23 [1 day, 0:13:52< 0:15:14] +[titan] 2025-10-05 22:48:14,970 - root - INFO - step: 39590 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7450 +[titan] 2025-10-05 22:48:14,970 - root - INFO - lr: 5.0119e-06 gnorm: 1.27 [1 day, 0:14:03< 0:15:03] +[titan] 2025-10-05 22:48:25,818 - root - INFO - step: 39595 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 22:48:25,818 - root - INFO - lr: 5.0116e-06 gnorm: 1.25 [1 day, 0:14:13< 0:14:52] +[titan] 2025-10-05 22:48:34,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:48:36,655 - root - INFO - step: 39600 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:48:36,655 - root - INFO - lr: 5.0113e-06 gnorm: 1.26 [1 day, 0:14:24< 0:14:41] +[titan] 2025-10-05 22:48:47,505 - root - INFO - step: 39605 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:48:47,505 - root - INFO - lr: 5.0110e-06 gnorm: 1.25 [1 day, 0:14:35< 0:14:30] +[titan] 2025-10-05 22:48:58,396 - root - INFO - step: 39610 loss: 1.9266 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7045 +[titan] 2025-10-05 22:48:58,396 - root - INFO - lr: 5.0108e-06 gnorm: 1.27 [1 day, 0:14:46< 0:14:19] +[titan] 2025-10-05 22:49:09,249 - root - INFO - step: 39615 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:49:09,249 - root - INFO - lr: 5.0105e-06 gnorm: 1.28 [1 day, 0:14:57< 0:14:08] +[titan] 2025-10-05 22:49:20,110 - root - INFO - step: 39620 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 22:49:20,110 - root - INFO - lr: 5.0102e-06 gnorm: 1.26 [1 day, 0:15:08< 0:13:57] +[titan] 2025-10-05 22:49:30,958 - root - INFO - step: 39625 loss: 1.9163 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 22:49:30,958 - root - INFO - lr: 5.0100e-06 gnorm: 1.28 [1 day, 0:15:19< 0:13:46] +[titan] 2025-10-05 22:49:41,804 - root - INFO - step: 39630 loss: 1.8829 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:49:41,804 - root - INFO - lr: 5.0097e-06 gnorm: 1.26 [1 day, 0:15:29< 0:13:35] +[titan] 2025-10-05 22:49:52,655 - root - INFO - step: 39635 loss: 1.8627 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6476 +[titan] 2025-10-05 22:49:52,655 - root - INFO - lr: 5.0094e-06 gnorm: 1.24 [1 day, 0:15:40< 0:13:24] +[titan] 2025-10-05 22:50:03,525 - root - INFO - step: 39640 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 22:50:03,526 - root - INFO - lr: 5.0092e-06 gnorm: 1.26 [1 day, 0:15:51< 0:13:13] +[titan] 2025-10-05 22:50:14,446 - root - INFO - step: 39645 loss: 1.8260 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2114 global_avg_mtp_loss: 1.6146 +[titan] 2025-10-05 22:50:14,446 - root - INFO - lr: 5.0089e-06 gnorm: 1.27 [1 day, 0:16:02< 0:13:02] +[titan] 2025-10-05 22:50:23,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:50:25,308 - root - INFO - step: 39650 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7039 +[titan] 2025-10-05 22:50:25,308 - root - INFO - lr: 5.0087e-06 gnorm: 1.28 [1 day, 0:16:13< 0:12:51] +[titan] 2025-10-05 22:50:36,173 - root - INFO - step: 39655 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:50:36,173 - root - INFO - lr: 5.0084e-06 gnorm: 1.26 [1 day, 0:16:24< 0:12:40] +[titan] 2025-10-05 22:50:47,037 - root - INFO - step: 39660 loss: 1.9006 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:50:47,037 - root - INFO - lr: 5.0082e-06 gnorm: 1.27 [1 day, 0:16:35< 0:12:29] +[titan] 2025-10-05 22:50:57,913 - root - INFO - step: 39665 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 22:50:57,914 - root - INFO - lr: 5.0079e-06 gnorm: 1.29 [1 day, 0:16:45< 0:12:18] +[titan] 2025-10-05 22:51:08,776 - root - INFO - step: 39670 loss: 1.8655 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 22:51:08,776 - root - INFO - lr: 5.0077e-06 gnorm: 1.30 [1 day, 0:16:56< 0:12:07] +[titan] 2025-10-05 22:51:19,670 - root - INFO - step: 39675 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:51:19,670 - root - INFO - lr: 5.0075e-06 gnorm: 1.25 [1 day, 0:17:07< 0:11:56] +[titan] 2025-10-05 22:51:30,542 - root - INFO - step: 39680 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 22:51:30,542 - root - INFO - lr: 5.0072e-06 gnorm: 1.27 [1 day, 0:17:18< 0:11:45] +[titan] 2025-10-05 22:51:41,391 - root - INFO - step: 39685 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:51:41,391 - root - INFO - lr: 5.0070e-06 gnorm: 1.27 [1 day, 0:17:29< 0:11:34] +[titan] 2025-10-05 22:51:52,276 - root - INFO - step: 39690 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:51:52,276 - root - INFO - lr: 5.0068e-06 gnorm: 1.25 [1 day, 0:17:40< 0:11:23] +[titan] 2025-10-05 22:52:03,144 - root - INFO - step: 39695 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 22:52:03,144 - root - INFO - lr: 5.0066e-06 gnorm: 1.27 [1 day, 0:17:51< 0:11:12] +[titan] 2025-10-05 22:52:11,813 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:52:13,994 - root - INFO - step: 39700 loss: 1.9249 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 22:52:13,994 - root - INFO - lr: 5.0064e-06 gnorm: 1.23 [1 day, 0:18:02< 0:11:01] +[titan] 2025-10-05 22:52:24,851 - root - INFO - step: 39705 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6876 +[titan] 2025-10-05 22:52:24,852 - root - INFO - lr: 5.0062e-06 gnorm: 1.27 [1 day, 0:18:12< 0:10:50] +[titan] 2025-10-05 22:52:35,763 - root - INFO - step: 39710 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6727 +[titan] 2025-10-05 22:52:35,763 - root - INFO - lr: 5.0060e-06 gnorm: 1.30 [1 day, 0:18:23< 0:10:39] +[titan] 2025-10-05 22:52:46,625 - root - INFO - step: 39715 loss: 1.8269 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2119 global_avg_mtp_loss: 1.6151 +[titan] 2025-10-05 22:52:46,625 - root - INFO - lr: 5.0058e-06 gnorm: 1.25 [1 day, 0:18:34< 0:10:28] +[titan] 2025-10-05 22:52:57,517 - root - INFO - step: 39720 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:52:57,517 - root - INFO - lr: 5.0056e-06 gnorm: 1.26 [1 day, 0:18:45< 0:10:16] +[titan] 2025-10-05 22:53:08,394 - root - INFO - step: 39725 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 22:53:08,395 - root - INFO - lr: 5.0054e-06 gnorm: 1.27 [1 day, 0:18:56< 0:10:05] +[titan] 2025-10-05 22:53:19,248 - root - INFO - step: 39730 loss: 1.8733 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:53:19,248 - root - INFO - lr: 5.0052e-06 gnorm: 1.26 [1 day, 0:19:07< 0:09:54] +[titan] 2025-10-05 22:53:30,094 - root - INFO - step: 39735 loss: 1.8701 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:53:30,094 - root - INFO - lr: 5.0050e-06 gnorm: 1.25 [1 day, 0:19:18< 0:09:43] +[titan] 2025-10-05 22:53:40,977 - root - INFO - step: 39740 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 22:53:40,977 - root - INFO - lr: 5.0048e-06 gnorm: 1.28 [1 day, 0:19:29< 0:09:32] +[titan] 2025-10-05 22:53:51,816 - root - INFO - step: 39745 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 22:53:51,816 - root - INFO - lr: 5.0046e-06 gnorm: 1.30 [1 day, 0:19:39< 0:09:21] +[titan] 2025-10-05 22:54:00,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:54:02,694 - root - INFO - step: 39750 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 22:54:02,694 - root - INFO - lr: 5.0044e-06 gnorm: 1.25 [1 day, 0:19:50< 0:09:10] +[titan] 2025-10-05 22:54:13,563 - root - INFO - step: 39755 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 22:54:13,563 - root - INFO - lr: 5.0042e-06 gnorm: 1.31 [1 day, 0:20:01< 0:08:59] +[titan] 2025-10-05 22:54:24,438 - root - INFO - step: 39760 loss: 1.8623 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6459 +[titan] 2025-10-05 22:54:24,438 - root - INFO - lr: 5.0041e-06 gnorm: 1.23 [1 day, 0:20:12< 0:08:48] +[titan] 2025-10-05 22:54:35,297 - root - INFO - step: 39765 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:54:35,297 - root - INFO - lr: 5.0039e-06 gnorm: 1.29 [1 day, 0:20:23< 0:08:37] +[titan] 2025-10-05 22:54:46,209 - root - INFO - step: 39770 loss: 1.8709 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6539 +[titan] 2025-10-05 22:54:46,209 - root - INFO - lr: 5.0037e-06 gnorm: 1.26 [1 day, 0:20:34< 0:08:26] +[titan] 2025-10-05 22:54:57,101 - root - INFO - step: 39775 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 22:54:57,101 - root - INFO - lr: 5.0036e-06 gnorm: 1.27 [1 day, 0:20:45< 0:08:15] +[titan] 2025-10-05 22:55:08,022 - root - INFO - step: 39780 loss: 1.9966 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7659 +[titan] 2025-10-05 22:55:08,023 - root - INFO - lr: 5.0034e-06 gnorm: 1.31 [1 day, 0:20:56< 0:08:04] +[titan] 2025-10-05 22:55:18,865 - root - INFO - step: 39785 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:55:18,865 - root - INFO - lr: 5.0033e-06 gnorm: 1.29 [1 day, 0:21:06< 0:07:53] +[titan] 2025-10-05 22:55:29,721 - root - INFO - step: 39790 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:55:29,721 - root - INFO - lr: 5.0031e-06 gnorm: 1.28 [1 day, 0:21:17< 0:07:42] +[titan] 2025-10-05 22:55:40,588 - root - INFO - step: 39795 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 22:55:40,588 - root - INFO - lr: 5.0030e-06 gnorm: 1.25 [1 day, 0:21:28< 0:07:31] +[titan] 2025-10-05 22:55:49,265 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:55:51,443 - root - INFO - step: 39800 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:55:51,443 - root - INFO - lr: 5.0028e-06 gnorm: 1.25 [1 day, 0:21:39< 0:07:20] +[titan] 2025-10-05 22:56:02,382 - root - INFO - step: 39805 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6609 +[titan] 2025-10-05 22:56:02,382 - root - INFO - lr: 5.0027e-06 gnorm: 1.21 [1 day, 0:21:50< 0:07:09] +[titan] 2025-10-05 22:56:13,242 - root - INFO - step: 39810 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:56:13,242 - root - INFO - lr: 5.0026e-06 gnorm: 1.28 [1 day, 0:22:01< 0:06:58] +[titan] 2025-10-05 22:56:24,068 - root - INFO - step: 39815 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6926 +[titan] 2025-10-05 22:56:24,068 - root - INFO - lr: 5.0024e-06 gnorm: 1.28 [1 day, 0:22:12< 0:06:47] +[titan] 2025-10-05 22:56:34,883 - root - INFO - step: 39820 loss: 1.8589 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6448 +[titan] 2025-10-05 22:56:34,883 - root - INFO - lr: 5.0023e-06 gnorm: 1.23 [1 day, 0:22:22< 0:06:36] +[titan] 2025-10-05 22:56:45,691 - root - INFO - step: 39825 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6254 +[titan] 2025-10-05 22:56:45,691 - root - INFO - lr: 5.0022e-06 gnorm: 1.24 [1 day, 0:22:33< 0:06:25] +[titan] 2025-10-05 22:56:56,537 - root - INFO - step: 39830 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:56:56,537 - root - INFO - lr: 5.0020e-06 gnorm: 1.29 [1 day, 0:22:44< 0:06:14] +[titan] 2025-10-05 22:57:07,418 - root - INFO - step: 39835 loss: 1.8289 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2110 global_avg_mtp_loss: 1.6179 +[titan] 2025-10-05 22:57:07,419 - root - INFO - lr: 5.0019e-06 gnorm: 1.25 [1 day, 0:22:55< 0:06:03] +[titan] 2025-10-05 22:57:18,260 - root - INFO - step: 39840 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6716 +[titan] 2025-10-05 22:57:18,260 - root - INFO - lr: 5.0018e-06 gnorm: 1.28 [1 day, 0:23:06< 0:05:52] +[titan] 2025-10-05 22:57:29,092 - root - INFO - step: 39845 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:57:29,092 - root - INFO - lr: 5.0017e-06 gnorm: 1.31 [1 day, 0:23:17< 0:05:41] +[titan] 2025-10-05 22:57:37,723 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:57:39,898 - root - INFO - step: 39850 loss: 1.8816 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 22:57:39,899 - root - INFO - lr: 5.0016e-06 gnorm: 1.24 [1 day, 0:23:27< 0:05:30] +[titan] 2025-10-05 22:57:50,741 - root - INFO - step: 39855 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 22:57:50,741 - root - INFO - lr: 5.0015e-06 gnorm: 1.32 [1 day, 0:23:38< 0:05:19] +[titan] 2025-10-05 22:58:01,598 - root - INFO - step: 39860 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 22:58:01,598 - root - INFO - lr: 5.0014e-06 gnorm: 1.33 [1 day, 0:23:49< 0:05:08] +[titan] 2025-10-05 22:58:12,433 - root - INFO - step: 39865 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6621 +[titan] 2025-10-05 22:58:12,433 - root - INFO - lr: 5.0013e-06 gnorm: 1.27 [1 day, 0:24:00< 0:04:57] +[titan] 2025-10-05 22:58:23,320 - root - INFO - step: 39870 loss: 1.8085 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2094 global_avg_mtp_loss: 1.5991 +[titan] 2025-10-05 22:58:23,320 - root - INFO - lr: 5.0012e-06 gnorm: 1.27 [1 day, 0:24:11< 0:04:46] +[titan] 2025-10-05 22:58:34,151 - root - INFO - step: 39875 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7171 +[titan] 2025-10-05 22:58:34,151 - root - INFO - lr: 5.0011e-06 gnorm: 1.29 [1 day, 0:24:22< 0:04:35] +[titan] 2025-10-05 22:58:44,982 - root - INFO - step: 39880 loss: 1.8617 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2163 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 22:58:44,982 - root - INFO - lr: 5.0010e-06 gnorm: 1.24 [1 day, 0:24:33< 0:04:24] +[titan] 2025-10-05 22:58:55,801 - root - INFO - step: 39885 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:58:55,801 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:43< 0:04:13] +[titan] 2025-10-05 22:59:06,655 - root - INFO - step: 39890 loss: 1.8466 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 22:59:06,655 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:54< 0:04:02] +[titan] 2025-10-05 22:59:17,499 - root - INFO - step: 39895 loss: 1.9303 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 22:59:17,499 - root - INFO - lr: 5.0008e-06 gnorm: 1.27 [1 day, 0:25:05< 0:03:51] +[titan] 2025-10-05 22:59:26,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:59:28,385 - root - INFO - step: 39900 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:59:28,385 - root - INFO - lr: 5.0007e-06 gnorm: 1.29 [1 day, 0:25:16< 0:03:40] +[titan] 2025-10-05 22:59:39,223 - root - INFO - step: 39905 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6730 +[titan] 2025-10-05 22:59:39,223 - root - INFO - lr: 5.0006e-06 gnorm: 1.25 [1 day, 0:25:27< 0:03:29] +[titan] 2025-10-05 22:59:50,050 - root - INFO - step: 39910 loss: 1.9026 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6824 +[titan] 2025-10-05 22:59:50,051 - root - INFO - lr: 5.0006e-06 gnorm: 1.33 [1 day, 0:25:38< 0:03:18] +[titan] 2025-10-05 23:00:00,881 - root - INFO - step: 39915 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 23:00:00,881 - root - INFO - lr: 5.0005e-06 gnorm: 1.25 [1 day, 0:25:48< 0:03:07] +[titan] 2025-10-05 23:00:11,722 - root - INFO - step: 39920 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 23:00:11,722 - root - INFO - lr: 5.0005e-06 gnorm: 1.23 [1 day, 0:25:59< 0:02:56] +[titan] 2025-10-05 23:00:22,583 - root - INFO - step: 39925 loss: 1.8682 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 23:00:22,583 - root - INFO - lr: 5.0004e-06 gnorm: 1.24 [1 day, 0:26:10< 0:02:45] +[titan] 2025-10-05 23:00:33,459 - root - INFO - step: 39930 loss: 1.8937 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 23:00:33,459 - root - INFO - lr: 5.0003e-06 gnorm: 1.28 [1 day, 0:26:21< 0:02:34] +[titan] 2025-10-05 23:00:44,397 - root - INFO - step: 39935 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 23:00:44,397 - root - INFO - lr: 5.0003e-06 gnorm: 1.37 [1 day, 0:26:32< 0:02:23] +[titan] 2025-10-05 23:00:46,750 - root - INFO - Dumping profiler traces at step 39936 +[titan] 2025-10-05 23:00:46,789 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 23:00:55,477 - root - INFO - step: 39940 loss: 1.9007 memory: 118.84GiB(85.28%) tps: 29,576 tflops: 410.32 mfu: 41.49% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 23:00:55,477 - root - INFO - lr: 5.0003e-06 gnorm: 1.22 [1 day, 0:26:43< 0:02:12] +[titan] 2025-10-05 23:01:06,304 - root - INFO - step: 39945 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 23:01:06,305 - root - INFO - lr: 5.0002e-06 gnorm: 1.26 [1 day, 0:26:54< 0:02:01] +[titan] 2025-10-05 23:01:14,966 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:01:17,145 - root - INFO - step: 39950 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6639 +[titan] 2025-10-05 23:01:17,145 - root - INFO - lr: 5.0002e-06 gnorm: 1.25 [1 day, 0:27:05< 0:01:50] +[titan] 2025-10-05 23:01:28,000 - root - INFO - step: 39955 loss: 1.8456 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2134 global_avg_mtp_loss: 1.6322 +[titan] 2025-10-05 23:01:28,000 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:16< 0:01:39] +[titan] 2025-10-05 23:01:38,823 - root - INFO - step: 39960 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 23:01:38,823 - root - INFO - lr: 5.0001e-06 gnorm: 1.26 [1 day, 0:27:26< 0:01:28] +[titan] 2025-10-05 23:01:49,702 - root - INFO - step: 39965 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6498 +[titan] 2025-10-05 23:01:49,702 - root - INFO - lr: 5.0001e-06 gnorm: 1.29 [1 day, 0:27:37< 0:01:17] +[titan] 2025-10-05 23:02:00,536 - root - INFO - step: 39970 loss: 1.8845 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 23:02:00,536 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:48< 0:01:06] +[titan] 2025-10-05 23:02:11,385 - root - INFO - step: 39975 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6632 +[titan] 2025-10-05 23:02:11,385 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:27:59< 0:00:55] +[titan] 2025-10-05 23:02:22,224 - root - INFO - step: 39980 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 23:02:22,224 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:10< 0:00:44] +[titan] 2025-10-05 23:02:33,062 - root - INFO - step: 39985 loss: 1.8577 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6423 +[titan] 2025-10-05 23:02:33,062 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:21< 0:00:33] +[titan] 2025-10-05 23:02:43,924 - root - INFO - step: 39990 loss: 1.9469 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 23:02:43,924 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:28:31< 0:00:22] +[titan] 2025-10-05 23:02:54,801 - root - INFO - step: 39995 loss: 1.8720 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6560 +[titan] 2025-10-05 23:02:54,801 - root - INFO - lr: 5.0000e-06 gnorm: 1.27 [1 day, 0:28:42< 0:00:11] +[titan] 2025-10-05 23:03:03,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:03:05,651 - root - INFO - step: 40000 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6752 +[titan] 2025-10-05 23:03:05,651 - root - INFO - lr: 5.0000e-06 gnorm: 1.24 [1 day, 0:28:53< 0:00:00] +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving a full checkpoint at last step, step 40000. +[titan] 2025-10-05 23:03:23,696 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 23:03:23,696 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 18.05 seconds. +[titan] 2025-10-05 23:03:23,696 - root - INFO - Training completed diff --git a/logs/none_99omtdbz/attempt_0/1/stdout.log b/logs/none_99omtdbz/attempt_0/1/stdout.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/logs/none_99omtdbz/attempt_0/4/stderr.log b/logs/none_99omtdbz/attempt_0/4/stderr.log new file mode 100644 index 0000000000000000000000000000000000000000..0a9daba7613ab965d17ba8b834adbe59cbe336f0 --- /dev/null +++ b/logs/none_99omtdbz/attempt_0/4/stderr.log @@ -0,0 +1,17257 @@ +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc +wandb: Currently logged in as: zaydzuhri to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured. +[titan] 2025-10-04 22:33:17,457 - root - INFO - Starting job: default job +[titan] 2025-10-04 22:33:17,457 - root - INFO - { + "activation_checkpoint": { + "mode": "none", + "selective_ac_option": "2" + }, + "activation_offload": { + "mode": "none" + }, + "checkpoint": { + "async_mode": "disabled", + "convert_to_hf_on_save": false, + "create_seed_checkpoint": false, + "enable_checkpoint": true, + "exclude_from_loading": [], + "export_dtype": "float32", + "folder": "checkpoint", + "hf_repo_base_name": "zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000", + "hf_upload_enabled": true, + "hf_upload_format": "dcp", + "interval": 5000, + "interval_type": "steps", + "keep_latest_k": 0, + "load_step": -1, + "model_weights_only": false + }, + "comm": { + "init_timeout_seconds": 6000, + "trace_buf_size": 20000, + "train_timeout_seconds": 6000 + }, + "experimental": { + "context_parallel_degree": 1, + "context_parallel_rotate_method": "allgather", + "custom_model_path": "", + "enable_async_tensor_parallel": false, + "enable_compiled_autograd": false, + "pipeline_parallel_degree": 1, + "pipeline_parallel_microbatches": null, + "pipeline_parallel_schedule": "1F1B", + "pipeline_parallel_schedule_csv": "", + "pipeline_parallel_split_points": [] + }, + "fault_tolerance": { + "enable": false, + "group_size": 0, + "min_replica_size": 1, + "replica_id": 0 + }, + "float8": { + "enable_fsdp_float8_all_gather": false, + "force_recompute_fp8_weight_in_bwd": false, + "precompute_float8_dynamic_scale_for_fsdp": false, + "recipe_name": null + }, + "job": { + "config_file": "flame/models/fla.toml", + "description": "default job", + "dump_folder": "exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine", + "print_args": true, + "use_for_integration_test": false + }, + "lr_scheduler": { + "decay_ratio": null, + "decay_type": "cosine", + "lr_min": 0.1, + "warmup_steps": 400 + }, + "memory_estimation": { + "disable_fake_mode": false, + "enabled": false + }, + "metrics": { + "disable_color_printing": false, + "enable_tensorboard": false, + "enable_wandb": true, + "log_freq": 5, + "save_for_all_ranks": false, + "save_tb_folder": "tb" + }, + "model": { + "config": "configs/mtp_transformer_1B.json", + "converters": [], + "name": "fla", + "print_after_conversion": false, + "tokenizer_path": "fla-hub/transformer-1.3B-100B" + }, + "optimizer": { + "early_step_in_backward": false, + "eps": 1e-15, + "implementation": "fused", + "lr": 5e-05, + "name": "AdamW" + }, + "profiling": { + "enable_memory_snapshot": false, + "enable_profiling": true, + "profile_freq": 512, + "save_memory_snapshot_folder": "memory_snapshot", + "save_traces_folder": "profile_trace" + }, + "training": { + "batch_size": 16, + "compile": true, + "context_len": 4096, + "data_dir": null, + "data_files": null, + "data_parallel_replicate_degree": 1, + "data_parallel_shard_degree": -1, + "data_probs": null, + "dataset": "/root/.cache/zaydzuhri___open_math_instruct-2-text/default", + "dataset_name": "default", + "dataset_split": "train", + "deterministic": false, + "disable_loss_parallel": false, + "enable_cpu_offload": false, + "fsdp_reshard_after_forward": "default", + "gc_freq": 50, + "gradient_accumulation_steps": 1, + "max_norm": 1.0, + "mixed_precision_param": "bfloat16", + "mixed_precision_reduce": "float32", + "num_workers": 32, + "persistent_workers": false, + "pin_memory": false, + "prefetch_factor": 2, + "seed": 79, + "seq_len": 4096, + "skip_nan_inf": true, + "steps": 40000, + "streaming": false, + "tensor_parallel_degree": 1, + "varlen": false + } +} +[titan] 2025-10-04 22:33:17,457 - root - INFO - [GC] Initial GC collection. 0.00 seconds. +[titan] 2025-10-04 22:33:46,001 - root - INFO - Target Hugging Face repository for this run: zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000-20251004-223346 +[titan] 2025-10-04 22:33:46,001 - root - WARNING - ENV[TORCH_NCCL_ASYNC_ERROR_HANDLING] = 1 will be overridden to 3 based on job config +[titan] 2025-10-04 22:33:46,003 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:33:46,004 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:33:46,004 - root - INFO - Peak FLOPS used for computing MFU: 9.890e+14 +[titan] 2025-10-04 22:33:46,004 - root - INFO - Building 1-D device mesh with ['dp_shard'], [8] +[titan] 2025-10-04 22:33:46,069 - root - INFO - Loading tokenizer... +[titan] 2025-10-04 22:33:46,224 - root - INFO - LlamaTokenizerFast(name_or_path='fla-hub/transformer-1.3B-100B', vocab_size=32000, model_max_length=10000000000, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': ''}, clean_up_tokenization_spaces=False, added_tokens_decoder={ + 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), +} +) +[titan] 2025-10-04 22:33:46,224 - root - INFO - Loading dataset /root/.cache/zaydzuhri___open_math_instruct-2-text/default:default +`trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,224 - datasets.load - ERROR - `trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:47,012 - root - INFO - Dataset({ + features: ['text'], + num_rows: 21972791 +}) +[titan] 2025-10-04 22:33:47,013 - root - INFO - Shuffling the dataset with seed 79 +[titan] 2025-10-04 22:33:53,134 - root - INFO - Loading model config from configs/mtp_transformer_1B.json +[titan] 2025-10-04 22:33:53,136 - root - INFO - Building dataloader... +[titan] 2025-10-04 22:33:53,138 - root - INFO - Building model from the config +MTPTransformerConfig { + "bos_token_id": 1, + "elementwise_affine": true, + "eos_token_id": 2, + "fuse_cross_entropy": true, + "fuse_norm": true, + "fuse_swiglu": true, + "hidden_act": "swish", + "hidden_ratio": 4, + "hidden_size": 2048, + "initializer_range": 0.006, + "intermediate_size": null, + "max_position_embeddings": 8192, + "model_type": "mtp_transformer", + "n_future_tokens": 4, + "norm_eps": 1e-06, + "num_heads": 32, + "num_hidden_layers": 32, + "num_kv_heads": null, + "pad_token_id": 2, + "qk_norm": false, + "qkv_bias": false, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "transformers_version": "4.51.3", + "use_cache": true, + "use_custom_backward": false, + "vocab_size": 32000, + "window_size": null +} + +[titan] 2025-10-04 22:33:53,271 - root - INFO -  +MTPTransformerForCausalLM( + (model): MTPTransformerModel( + (embeddings): Embedding(32000, 2048, padding_idx=2) + (layers): ModuleList( + (0-27): 28 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (extra_heads): ModuleList( + (0-3): 4 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (norm): RMSNorm(2048, eps=1e-06) + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + (criterion): FusedLinearCrossEntropyLoss() +) + +[titan] 2025-10-04 22:33:53,299 - root - INFO - Compiling each block with torch.compile +[titan] 2025-10-04 22:33:53,299 - root - INFO - Compiling the embedding, norm, and lm_head layers with torch.compile +[titan] 2025-10-04 22:33:53,300 - root - INFO - Compiling the entire model with torch.compile +[titan] 2025-10-04 22:33:53,375 - root - INFO - Applied FSDP to the model +[titan] 2025-10-04 22:33:53,617 - root - INFO - CUDA memory usage for model: 0.84GiB(0.60%) +[titan] 2025-10-04 22:33:53,637 - root - INFO - Checkpointing active. Checkpoints will be loaded from and saved to exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/checkpoint +[titan] 2025-10-04 22:33:53,637 - root - INFO - Loading the checkpoint at step 0. +[titan] 2025-10-04 22:34:08,077 - root - INFO - [GC] GC collection for checkpoint loading. 0.57 seconds. +[titan] 2025-10-04 22:34:08,077 - root - INFO - Finished loading the checkpoint in 14.44 seconds. +[titan] 2025-10-04 22:34:08,077 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:34:08,078 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:34:09,133 - root - INFO - ***** Running training ***** +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Training starts at step 1 +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Number of tokens per sequence = 4,096 +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Gradient Accumulation steps = 1 +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Instantaneous batch size (per device) = 16 +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Global batch size (w. parallel, distributed & accumulation) = 128 (524,288 tokens) +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Total optimization steps = 40,000 (20,971,520,000 tokens) +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Warmup steps = 400 (209,715,200 tokens) +[titan] 2025-10-04 22:34:09,134 - root - INFO -  Number of parameters = 1,775,372,288  +[titan] 2025-10-04 22:34:09,134 - root - INFO - Profiling active. Traces will be saved at exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace +[titan] 2025-10-04 22:34:47,724 - root - INFO - step: 1 loss: 12.0105 memory: 116.89GiB(83.88%) tps: 1,653 tflops: 22.93 mfu: 2.32% global_avg_ntp_loss: 2.1249 global_avg_mtp_loss: 9.8856 +[titan] 2025-10-04 22:34:47,724 - root - INFO - lr: 2.4938e-07 gnorm: 20.89 [ 0:00:39<18 days, 8:29:14] +[titan] 2025-10-04 22:34:47,724 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-04 22:34:53,099 - root - INFO - [GC] GC collection invoked by checkpointer. 0.20 seconds. +[titan] 2025-10-04 22:34:53,099 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 5.38 seconds. +[titan] 2025-10-04 22:34:53,100 - root - INFO - Synchronizing and adjusting timeout for all ProcessGroups to 1:40:00 +[titan] 2025-10-04 22:36:58,991 - root - INFO - step: 5 loss: 11.7564 memory: 118.84GiB(85.28%) tps: 1,997 tflops: 27.71 mfu: 2.80% global_avg_ntp_loss: 2.0697 global_avg_mtp_loss: 9.6867 +[titan] 2025-10-04 22:36:58,992 - root - INFO - lr: 7.4813e-07 gnorm: 19.96 [ 0:02:50<15 days, 19:45:23] +[titan] 2025-10-04 22:37:09,851 - root - INFO - step: 10 loss: 11.2335 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 1.9192 global_avg_mtp_loss: 9.3143 +[titan] 2025-10-04 22:37:09,851 - root - INFO - lr: 1.3716e-06 gnorm: 18.16 [ 0:03:01<8 days, 9:55:02] +[titan] 2025-10-04 22:37:20,642 - root - INFO - step: 15 loss: 10.8309 memory: 118.84GiB(85.28%) tps: 30,368 tflops: 421.30 mfu: 42.60% global_avg_ntp_loss: 1.7960 global_avg_mtp_loss: 9.0349 +[titan] 2025-10-04 22:37:20,642 - root - INFO - lr: 1.9950e-06 gnorm: 10.62 [ 0:03:12<5 days, 22:35:04] +[titan] 2025-10-04 22:37:31,508 - root - INFO - step: 20 loss: 10.3172 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 1.6641 global_avg_mtp_loss: 8.6531 +[titan] 2025-10-04 22:37:31,508 - root - INFO - lr: 2.6185e-06 gnorm: 8.22 [ 0:03:23<4 days, 16:57:30] +[titan] 2025-10-04 22:37:42,328 - root - INFO - step: 25 loss: 9.9294 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 1.5801 global_avg_mtp_loss: 8.3492 +[titan] 2025-10-04 22:37:42,328 - root - INFO - lr: 3.2419e-06 gnorm: 7.10 [ 0:03:34<3 days, 23:09:40] +[titan] 2025-10-04 22:37:53,161 - root - INFO - step: 30 loss: 9.5763 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 1.4997 global_avg_mtp_loss: 8.0766 +[titan] 2025-10-04 22:37:53,161 - root - INFO - lr: 3.8653e-06 gnorm: 6.23 [ 0:03:45<3 days, 11:17:59] +[titan] 2025-10-04 22:38:04,056 - root - INFO - step: 35 loss: 9.3711 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 1.4603 global_avg_mtp_loss: 7.9108 +[titan] 2025-10-04 22:38:04,056 - root - INFO - lr: 4.4888e-06 gnorm: 6.20 [ 0:03:55<3 days, 2:50:47] +[titan] 2025-10-04 22:38:14,933 - root - INFO - step: 40 loss: 9.0179 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 1.3853 global_avg_mtp_loss: 7.6325 +[titan] 2025-10-04 22:38:14,933 - root - INFO - lr: 5.1122e-06 gnorm: 5.60 [ 0:04:06<2 days, 20:30:02] +[titan] 2025-10-04 22:38:25,789 - root - INFO - step: 45 loss: 8.7524 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 1.3406 global_avg_mtp_loss: 7.4118 +[titan] 2025-10-04 22:38:25,789 - root - INFO - lr: 5.7357e-06 gnorm: 5.43 [ 0:04:17<2 days, 15:33:33] +[titan] 2025-10-04 22:38:34,548 - root - INFO - [GC] Peforming periodical GC collection. 0.05 seconds. +[titan] 2025-10-04 22:38:36,745 - root - INFO - step: 50 loss: 8.5439 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.94 mfu: 41.96% global_avg_ntp_loss: 1.3050 global_avg_mtp_loss: 7.2389 +[titan] 2025-10-04 22:38:36,746 - root - INFO - lr: 6.3591e-06 gnorm: 5.74 [ 0:04:28<2 days, 11:37:40] +[titan] 2025-10-04 22:38:47,618 - root - INFO - step: 55 loss: 8.3158 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 1.2609 global_avg_mtp_loss: 7.0549 +[titan] 2025-10-04 22:38:47,619 - root - INFO - lr: 6.9825e-06 gnorm: 5.52 [ 0:04:39<2 days, 8:23:37] +[titan] 2025-10-04 22:38:58,482 - root - INFO - step: 60 loss: 8.2006 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 1.2373 global_avg_mtp_loss: 6.9633 +[titan] 2025-10-04 22:38:58,482 - root - INFO - lr: 7.6060e-06 gnorm: 5.72 [ 0:04:50<2 days, 5:41:47] +[titan] 2025-10-04 22:39:09,360 - root - INFO - step: 65 loss: 8.1393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.2182 global_avg_mtp_loss: 6.9211 +[titan] 2025-10-04 22:39:09,360 - root - INFO - lr: 8.2294e-06 gnorm: 5.66 [ 0:05:01<2 days, 3:24:58] +[titan] 2025-10-04 22:39:20,248 - root - INFO - step: 70 loss: 7.7608 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 1.1495 global_avg_mtp_loss: 6.6112 +[titan] 2025-10-04 22:39:20,248 - root - INFO - lr: 8.8529e-06 gnorm: 5.54 [ 0:05:12<2 days, 1:27:45] +[titan] 2025-10-04 22:39:31,185 - root - INFO - step: 75 loss: 7.6862 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 1.1395 global_avg_mtp_loss: 6.5467 +[titan] 2025-10-04 22:39:31,185 - root - INFO - lr: 9.4763e-06 gnorm: 6.04 [ 0:05:23<1 day, 23:46:35] +[titan] 2025-10-04 22:39:42,063 - root - INFO - step: 80 loss: 7.4352 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.0959 global_avg_mtp_loss: 6.3393 +[titan] 2025-10-04 22:39:42,063 - root - INFO - lr: 1.0100e-05 gnorm: 5.61 [ 0:05:33<1 day, 22:17:33] +[titan] 2025-10-04 22:39:52,933 - root - INFO - step: 85 loss: 7.3232 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 1.0671 global_avg_mtp_loss: 6.2561 +[titan] 2025-10-04 22:39:52,934 - root - INFO - lr: 1.0723e-05 gnorm: 5.89 [ 0:05:44<1 day, 20:58:55] +[titan] 2025-10-04 22:40:03,808 - root - INFO - step: 90 loss: 7.1910 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 1.0545 global_avg_mtp_loss: 6.1364 +[titan] 2025-10-04 22:40:03,808 - root - INFO - lr: 1.1347e-05 gnorm: 6.24 [ 0:05:55<1 day, 19:49:01] +[titan] 2025-10-04 22:40:14,668 - root - INFO - step: 95 loss: 7.0637 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 1.0179 global_avg_mtp_loss: 6.0458 +[titan] 2025-10-04 22:40:14,668 - root - INFO - lr: 1.1970e-05 gnorm: 5.80 [ 0:06:06<1 day, 18:46:22] +[titan] 2025-10-04 22:40:23,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:40:25,567 - root - INFO - step: 100 loss: 7.0183 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 1.0144 global_avg_mtp_loss: 6.0039 +[titan] 2025-10-04 22:40:25,567 - root - INFO - lr: 1.2594e-05 gnorm: 5.49 [ 0:06:17<1 day, 17:50:13] +[titan] 2025-10-04 22:40:36,554 - root - INFO - step: 105 loss: 6.7845 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.77 mfu: 41.84% global_avg_ntp_loss: 0.9684 global_avg_mtp_loss: 5.8161 +[titan] 2025-10-04 22:40:36,554 - root - INFO - lr: 1.3217e-05 gnorm: 5.66 [ 0:06:28<1 day, 16:59:57] +[titan] 2025-10-04 22:40:47,440 - root - INFO - step: 110 loss: 6.7610 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.9616 global_avg_mtp_loss: 5.7993 +[titan] 2025-10-04 22:40:47,440 - root - INFO - lr: 1.3840e-05 gnorm: 5.76 [ 0:06:39<1 day, 16:13:38] +[titan] 2025-10-04 22:40:58,316 - root - INFO - step: 115 loss: 6.7822 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.9526 global_avg_mtp_loss: 5.8296 +[titan] 2025-10-04 22:40:58,316 - root - INFO - lr: 1.4464e-05 gnorm: 5.41 [ 0:06:50<1 day, 15:31:16] +[titan] 2025-10-04 22:41:09,192 - root - INFO - step: 120 loss: 6.5921 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.9190 global_avg_mtp_loss: 5.6731 +[titan] 2025-10-04 22:41:09,193 - root - INFO - lr: 1.5087e-05 gnorm: 5.18 [ 0:07:01<1 day, 14:52:25] +[titan] 2025-10-04 22:41:20,086 - root - INFO - step: 125 loss: 6.3759 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8880 global_avg_mtp_loss: 5.4878 +[titan] 2025-10-04 22:41:20,086 - root - INFO - lr: 1.5711e-05 gnorm: 4.91 [ 0:07:11<1 day, 14:16:46] +[titan] 2025-10-04 22:41:31,181 - root - INFO - step: 130 loss: 6.3566 memory: 118.84GiB(85.28%) tps: 29,536 tflops: 409.77 mfu: 41.43% global_avg_ntp_loss: 0.8781 global_avg_mtp_loss: 5.4786 +[titan] 2025-10-04 22:41:31,181 - root - INFO - lr: 1.6334e-05 gnorm: 4.37 [ 0:07:23<1 day, 13:44:51] +[titan] 2025-10-04 22:41:42,074 - root - INFO - step: 135 loss: 6.3044 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8713 global_avg_mtp_loss: 5.4331 +[titan] 2025-10-04 22:41:42,075 - root - INFO - lr: 1.6958e-05 gnorm: 4.29 [ 0:07:33<1 day, 13:14:18] +[titan] 2025-10-04 22:41:52,936 - root - INFO - step: 140 loss: 6.3158 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.8632 global_avg_mtp_loss: 5.4526 +[titan] 2025-10-04 22:41:52,936 - root - INFO - lr: 1.7581e-05 gnorm: 3.03 [ 0:07:44<1 day, 12:45:47] +[titan] 2025-10-04 22:42:03,814 - root - INFO - step: 145 loss: 6.2266 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.8508 global_avg_mtp_loss: 5.3758 +[titan] 2025-10-04 22:42:03,815 - root - INFO - lr: 1.8204e-05 gnorm: 3.86 [ 0:07:55<1 day, 12:19:17] +[titan] 2025-10-04 22:42:12,516 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:42:14,709 - root - INFO - step: 150 loss: 6.0872 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.8237 global_avg_mtp_loss: 5.2635 +[titan] 2025-10-04 22:42:14,710 - root - INFO - lr: 1.8828e-05 gnorm: 3.31 [ 0:08:06<1 day, 11:54:37] +[titan] 2025-10-04 22:42:25,613 - root - INFO - step: 155 loss: 6.0870 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.8286 global_avg_mtp_loss: 5.2584 +[titan] 2025-10-04 22:42:25,613 - root - INFO - lr: 1.9451e-05 gnorm: 3.04 [ 0:08:17<1 day, 11:31:34] +[titan] 2025-10-04 22:42:36,528 - root - INFO - step: 160 loss: 5.9733 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.8032 global_avg_mtp_loss: 5.1701 +[titan] 2025-10-04 22:42:36,529 - root - INFO - lr: 2.0075e-05 gnorm: 3.06 [ 0:08:28<1 day, 11:09:59] +[titan] 2025-10-04 22:42:47,448 - root - INFO - step: 165 loss: 5.8683 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.7907 global_avg_mtp_loss: 5.0776 +[titan] 2025-10-04 22:42:47,448 - root - INFO - lr: 2.0698e-05 gnorm: 3.39 [ 0:08:39<1 day, 10:49:44] +[titan] 2025-10-04 22:42:58,343 - root - INFO - step: 170 loss: 5.8536 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.7847 global_avg_mtp_loss: 5.0689 +[titan] 2025-10-04 22:42:58,343 - root - INFO - lr: 2.1322e-05 gnorm: 2.80 [ 0:08:50<1 day, 10:30:33] +[titan] 2025-10-04 22:43:09,215 - root - INFO - step: 175 loss: 5.7812 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.7716 global_avg_mtp_loss: 5.0096 +[titan] 2025-10-04 22:43:09,216 - root - INFO - lr: 2.1945e-05 gnorm: 4.02 [ 0:09:01<1 day, 10:12:22] +[titan] 2025-10-04 22:43:20,097 - root - INFO - step: 180 loss: 5.7994 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.7711 global_avg_mtp_loss: 5.0283 +[titan] 2025-10-04 22:43:20,098 - root - INFO - lr: 2.2569e-05 gnorm: 3.36 [ 0:09:11<1 day, 9:55:14] +[titan] 2025-10-04 22:43:31,003 - root - INFO - step: 185 loss: 5.6617 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9134 +[titan] 2025-10-04 22:43:31,003 - root - INFO - lr: 2.3192e-05 gnorm: 2.73 [ 0:09:22<1 day, 9:39:06] +[titan] 2025-10-04 22:43:41,902 - root - INFO - step: 190 loss: 5.6564 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9080 +[titan] 2025-10-04 22:43:41,903 - root - INFO - lr: 2.3815e-05 gnorm: 3.17 [ 0:09:33<1 day, 9:23:46] +[titan] 2025-10-04 22:43:52,788 - root - INFO - step: 195 loss: 5.6643 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.7475 global_avg_mtp_loss: 4.9168 +[titan] 2025-10-04 22:43:52,788 - root - INFO - lr: 2.4439e-05 gnorm: 2.43 [ 0:09:44<1 day, 9:09:11] +[titan] 2025-10-04 22:44:01,483 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:44:03,675 - root - INFO - step: 200 loss: 5.6189 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.7360 global_avg_mtp_loss: 4.8830 +[titan] 2025-10-04 22:44:03,675 - root - INFO - lr: 2.5062e-05 gnorm: 3.47 [ 0:09:55<1 day, 8:55:19] +[titan] 2025-10-04 22:44:14,559 - root - INFO - step: 205 loss: 5.5215 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.7213 global_avg_mtp_loss: 4.8002 +[titan] 2025-10-04 22:44:14,559 - root - INFO - lr: 2.5686e-05 gnorm: 3.09 [ 0:10:06<1 day, 8:42:06] +[titan] 2025-10-04 22:44:25,433 - root - INFO - step: 210 loss: 5.5044 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.7198 global_avg_mtp_loss: 4.7846 +[titan] 2025-10-04 22:44:25,433 - root - INFO - lr: 2.6309e-05 gnorm: 2.66 [ 0:10:17<1 day, 8:29:29] +[titan] 2025-10-04 22:44:36,338 - root - INFO - step: 215 loss: 5.4728 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.7115 global_avg_mtp_loss: 4.7613 +[titan] 2025-10-04 22:44:36,338 - root - INFO - lr: 2.6933e-05 gnorm: 2.45 [ 0:10:28<1 day, 8:17:32] +[titan] 2025-10-04 22:44:47,225 - root - INFO - step: 220 loss: 5.3310 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.6944 global_avg_mtp_loss: 4.6366 +[titan] 2025-10-04 22:44:47,225 - root - INFO - lr: 2.7556e-05 gnorm: 2.66 [ 0:10:39<1 day, 8:06:05] +[titan] 2025-10-04 22:44:58,124 - root - INFO - step: 225 loss: 5.3739 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6934 global_avg_mtp_loss: 4.6805 +[titan] 2025-10-04 22:44:58,125 - root - INFO - lr: 2.8180e-05 gnorm: 2.95 [ 0:10:50<1 day, 7:55:09] +[titan] 2025-10-04 22:45:09,004 - root - INFO - step: 230 loss: 5.4216 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.7014 global_avg_mtp_loss: 4.7202 +[titan] 2025-10-04 22:45:09,004 - root - INFO - lr: 2.8803e-05 gnorm: 2.60 [ 0:11:00<1 day, 7:44:38] +[titan] 2025-10-04 22:45:19,907 - root - INFO - step: 235 loss: 5.3090 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.6909 global_avg_mtp_loss: 4.6180 +[titan] 2025-10-04 22:45:19,907 - root - INFO - lr: 2.9426e-05 gnorm: 2.68 [ 0:11:11<1 day, 7:34:37] +[titan] 2025-10-04 22:45:30,796 - root - INFO - step: 240 loss: 5.2690 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.6785 global_avg_mtp_loss: 4.5905 +[titan] 2025-10-04 22:45:30,796 - root - INFO - lr: 3.0050e-05 gnorm: 2.38 [ 0:11:22<1 day, 7:24:59] +[titan] 2025-10-04 22:45:41,709 - root - INFO - step: 245 loss: 5.1965 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.6691 global_avg_mtp_loss: 4.5274 +[titan] 2025-10-04 22:45:41,710 - root - INFO - lr: 3.0673e-05 gnorm: 2.47 [ 0:11:33<1 day, 7:15:48] +[titan] 2025-10-04 22:45:50,403 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:45:52,597 - root - INFO - step: 250 loss: 5.1858 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6654 global_avg_mtp_loss: 4.5204 +[titan] 2025-10-04 22:45:52,597 - root - INFO - lr: 3.1297e-05 gnorm: 3.00 [ 0:11:44<1 day, 7:06:54] +[titan] 2025-10-04 22:46:03,496 - root - INFO - step: 255 loss: 5.1706 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.6625 global_avg_mtp_loss: 4.5081 +[titan] 2025-10-04 22:46:03,496 - root - INFO - lr: 3.1920e-05 gnorm: 2.61 [ 0:11:55<1 day, 6:58:22] +[titan] 2025-10-04 22:46:14,369 - root - INFO - step: 260 loss: 5.1473 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.6607 global_avg_mtp_loss: 4.4865 +[titan] 2025-10-04 22:46:14,369 - root - INFO - lr: 3.2544e-05 gnorm: 2.39 [ 0:12:06<1 day, 6:50:06] +[titan] 2025-10-04 22:46:25,252 - root - INFO - step: 265 loss: 5.1300 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.6565 global_avg_mtp_loss: 4.4735 +[titan] 2025-10-04 22:46:25,253 - root - INFO - lr: 3.3167e-05 gnorm: 2.29 [ 0:12:17<1 day, 6:42:10] +[titan] 2025-10-04 22:46:36,152 - root - INFO - step: 270 loss: 5.1579 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6598 global_avg_mtp_loss: 4.4981 +[titan] 2025-10-04 22:46:36,152 - root - INFO - lr: 3.3791e-05 gnorm: 2.51 [ 0:12:28<1 day, 6:34:33] +[titan] 2025-10-04 22:46:47,010 - root - INFO - step: 275 loss: 5.0167 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.6398 global_avg_mtp_loss: 4.3769 +[titan] 2025-10-04 22:46:47,011 - root - INFO - lr: 3.4414e-05 gnorm: 2.10 [ 0:12:38<1 day, 6:27:06] +[titan] 2025-10-04 22:46:57,896 - root - INFO - step: 280 loss: 5.0898 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.6486 global_avg_mtp_loss: 4.4413 +[titan] 2025-10-04 22:46:57,896 - root - INFO - lr: 3.5037e-05 gnorm: 3.07 [ 0:12:49<1 day, 6:19:59] +[titan] 2025-10-04 22:47:08,770 - root - INFO - step: 285 loss: 5.1105 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.6521 global_avg_mtp_loss: 4.4584 +[titan] 2025-10-04 22:47:08,770 - root - INFO - lr: 3.5661e-05 gnorm: 2.23 [ 0:13:00<1 day, 6:13:05] +[titan] 2025-10-04 22:47:19,662 - root - INFO - step: 290 loss: 5.0807 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6456 global_avg_mtp_loss: 4.4352 +[titan] 2025-10-04 22:47:19,662 - root - INFO - lr: 3.6284e-05 gnorm: 2.82 [ 0:13:11<1 day, 6:06:28] +[titan] 2025-10-04 22:47:30,549 - root - INFO - step: 295 loss: 5.0464 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.6427 global_avg_mtp_loss: 4.4037 +[titan] 2025-10-04 22:47:30,550 - root - INFO - lr: 3.6908e-05 gnorm: 2.35 [ 0:13:22<1 day, 6:00:03] +[titan] 2025-10-04 22:47:39,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:47:41,466 - root - INFO - step: 300 loss: 5.1119 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.6529 global_avg_mtp_loss: 4.4589 +[titan] 2025-10-04 22:47:41,466 - root - INFO - lr: 3.7531e-05 gnorm: 2.72 [ 0:13:33<1 day, 5:53:54] +[titan] 2025-10-04 22:47:52,331 - root - INFO - step: 305 loss: 4.9831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.6338 global_avg_mtp_loss: 4.3492 +[titan] 2025-10-04 22:47:52,331 - root - INFO - lr: 3.8155e-05 gnorm: 2.81 [ 0:13:44<1 day, 5:47:50] +[titan] 2025-10-04 22:48:03,188 - root - INFO - step: 310 loss: 4.9896 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.6364 global_avg_mtp_loss: 4.3532 +[titan] 2025-10-04 22:48:03,188 - root - INFO - lr: 3.8778e-05 gnorm: 2.39 [ 0:13:55<1 day, 5:41:56] +[titan] 2025-10-04 22:48:14,051 - root - INFO - step: 315 loss: 4.8865 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.6207 global_avg_mtp_loss: 4.2658 +[titan] 2025-10-04 22:48:14,051 - root - INFO - lr: 3.9401e-05 gnorm: 3.11 [ 0:14:05<1 day, 5:36:14] +[titan] 2025-10-04 22:48:24,948 - root - INFO - step: 320 loss: 4.9416 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.6290 global_avg_mtp_loss: 4.3126 +[titan] 2025-10-04 22:48:24,948 - root - INFO - lr: 4.0025e-05 gnorm: 2.57 [ 0:14:16<1 day, 5:30:47] +[titan] 2025-10-04 22:48:35,879 - root - INFO - step: 325 loss: 4.8914 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.6229 global_avg_mtp_loss: 4.2686 +[titan] 2025-10-04 22:48:35,879 - root - INFO - lr: 4.0648e-05 gnorm: 2.22 [ 0:14:27<1 day, 5:25:34] +[titan] 2025-10-04 22:48:46,771 - root - INFO - step: 330 loss: 4.8494 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.6146 global_avg_mtp_loss: 4.2348 +[titan] 2025-10-04 22:48:46,771 - root - INFO - lr: 4.1272e-05 gnorm: 2.17 [ 0:14:38<1 day, 5:20:25] +[titan] 2025-10-04 22:48:57,658 - root - INFO - step: 335 loss: 4.9431 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6274 global_avg_mtp_loss: 4.3157 +[titan] 2025-10-04 22:48:57,658 - root - INFO - lr: 4.1895e-05 gnorm: 2.41 [ 0:14:49<1 day, 5:15:24] +[titan] 2025-10-04 22:49:08,546 - root - INFO - step: 340 loss: 4.8429 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.6110 global_avg_mtp_loss: 4.2319 +[titan] 2025-10-04 22:49:08,546 - root - INFO - lr: 4.2519e-05 gnorm: 2.38 [ 0:15:00<1 day, 5:10:32] +[titan] 2025-10-04 22:49:19,437 - root - INFO - step: 345 loss: 4.7699 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.6044 global_avg_mtp_loss: 4.1656 +[titan] 2025-10-04 22:49:19,437 - root - INFO - lr: 4.3142e-05 gnorm: 2.47 [ 0:15:11<1 day, 5:05:49] +[titan] 2025-10-04 22:49:28,142 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:49:30,329 - root - INFO - step: 350 loss: 4.8354 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6129 global_avg_mtp_loss: 4.2225 +[titan] 2025-10-04 22:49:30,329 - root - INFO - lr: 4.3766e-05 gnorm: 2.30 [ 0:15:22<1 day, 5:01:13] +[titan] 2025-10-04 22:49:41,264 - root - INFO - step: 355 loss: 4.8409 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.6123 global_avg_mtp_loss: 4.2286 +[titan] 2025-10-04 22:49:41,264 - root - INFO - lr: 4.4389e-05 gnorm: 2.44 [ 0:15:33<1 day, 4:56:50] +[titan] 2025-10-04 22:49:52,147 - root - INFO - step: 360 loss: 4.6777 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.5902 global_avg_mtp_loss: 4.0875 +[titan] 2025-10-04 22:49:52,148 - root - INFO - lr: 4.5012e-05 gnorm: 1.96 [ 0:15:44<1 day, 4:52:28] +[titan] 2025-10-04 22:50:03,033 - root - INFO - step: 365 loss: 4.8152 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.6116 global_avg_mtp_loss: 4.2037 +[titan] 2025-10-04 22:50:03,033 - root - INFO - lr: 4.5636e-05 gnorm: 2.14 [ 0:15:54<1 day, 4:48:13] +[titan] 2025-10-04 22:50:13,908 - root - INFO - step: 370 loss: 4.7797 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.6024 global_avg_mtp_loss: 4.1773 +[titan] 2025-10-04 22:50:13,908 - root - INFO - lr: 4.6259e-05 gnorm: 2.37 [ 0:16:05<1 day, 4:44:04] +[titan] 2025-10-04 22:50:24,783 - root - INFO - step: 375 loss: 4.6716 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.5906 global_avg_mtp_loss: 4.0810 +[titan] 2025-10-04 22:50:24,783 - root - INFO - lr: 4.6883e-05 gnorm: 2.26 [ 0:16:16<1 day, 4:40:01] +[titan] 2025-10-04 22:50:35,652 - root - INFO - step: 380 loss: 4.7162 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.5950 global_avg_mtp_loss: 4.1212 +[titan] 2025-10-04 22:50:35,652 - root - INFO - lr: 4.7506e-05 gnorm: 2.15 [ 0:16:27<1 day, 4:36:03] +[titan] 2025-10-04 22:50:46,574 - root - INFO - step: 385 loss: 4.8016 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.6054 global_avg_mtp_loss: 4.1962 +[titan] 2025-10-04 22:50:46,574 - root - INFO - lr: 4.8130e-05 gnorm: 2.50 [ 0:16:38<1 day, 4:32:17] +[titan] 2025-10-04 22:50:57,443 - root - INFO - step: 390 loss: 4.7078 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.5929 global_avg_mtp_loss: 4.1150 +[titan] 2025-10-04 22:50:57,444 - root - INFO - lr: 4.8753e-05 gnorm: 2.00 [ 0:16:49<1 day, 4:28:31] +[titan] 2025-10-04 22:51:08,305 - root - INFO - step: 395 loss: 4.6384 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.5834 global_avg_mtp_loss: 4.0551 +[titan] 2025-10-04 22:51:08,305 - root - INFO - lr: 4.9377e-05 gnorm: 2.37 [ 0:17:00<1 day, 4:24:49] +[titan] 2025-10-04 22:51:16,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:51:19,152 - root - INFO - step: 400 loss: 4.6918 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.5928 global_avg_mtp_loss: 4.0990 +[titan] 2025-10-04 22:51:19,152 - root - INFO - lr: 5.0000e-05 gnorm: 2.36 [ 0:17:11<1 day, 4:21:12] +[titan] 2025-10-04 22:51:30,025 - root - INFO - step: 405 loss: 4.6284 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.5843 global_avg_mtp_loss: 4.0441 +[titan] 2025-10-04 22:51:30,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.37 [ 0:17:21<1 day, 4:17:42] +[titan] 2025-10-04 22:51:40,903 - root - INFO - step: 410 loss: 4.5757 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.5764 global_avg_mtp_loss: 3.9993 +[titan] 2025-10-04 22:51:40,903 - root - INFO - lr: 5.0000e-05 gnorm: 2.16 [ 0:17:32<1 day, 4:14:17] +[titan] 2025-10-04 22:51:51,757 - root - INFO - step: 415 loss: 4.6798 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.5875 global_avg_mtp_loss: 4.0923 +[titan] 2025-10-04 22:51:51,757 - root - INFO - lr: 5.0000e-05 gnorm: 2.18 [ 0:17:43<1 day, 4:10:55] +[titan] 2025-10-04 22:52:02,632 - root - INFO - step: 420 loss: 4.6984 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.5914 global_avg_mtp_loss: 4.1070 +[titan] 2025-10-04 22:52:02,632 - root - INFO - lr: 5.0000e-05 gnorm: 2.08 [ 0:17:54<1 day, 4:07:39] +[titan] 2025-10-04 22:52:13,523 - root - INFO - step: 425 loss: 4.6583 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.5870 global_avg_mtp_loss: 4.0713 +[titan] 2025-10-04 22:52:13,523 - root - INFO - lr: 5.0000e-05 gnorm: 1.97 [ 0:18:05<1 day, 4:04:29] +[titan] 2025-10-04 22:52:24,408 - root - INFO - step: 430 loss: 4.5843 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.5750 global_avg_mtp_loss: 4.0093 +[titan] 2025-10-04 22:52:24,408 - root - INFO - lr: 5.0000e-05 gnorm: 2.22 [ 0:18:16<1 day, 4:01:23] +[titan] 2025-10-04 22:52:35,258 - root - INFO - step: 435 loss: 4.5321 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.5697 global_avg_mtp_loss: 3.9625 +[titan] 2025-10-04 22:52:35,258 - root - INFO - lr: 5.0000e-05 gnorm: 2.13 [ 0:18:27<1 day, 3:58:18] +[titan] 2025-10-04 22:52:46,145 - root - INFO - step: 440 loss: 4.5606 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.5730 global_avg_mtp_loss: 3.9875 +[titan] 2025-10-04 22:52:46,146 - root - INFO - lr: 5.0000e-05 gnorm: 2.40 [ 0:18:38<1 day, 3:55:20] +[titan] 2025-10-04 22:52:57,025 - root - INFO - step: 445 loss: 4.5406 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.5687 global_avg_mtp_loss: 3.9718 +[titan] 2025-10-04 22:52:57,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:18:48<1 day, 3:52:25] +[titan] 2025-10-04 22:53:05,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:53:07,904 - root - INFO - step: 450 loss: 4.5707 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.5740 global_avg_mtp_loss: 3.9967 +[titan] 2025-10-04 22:53:07,904 - root - INFO - lr: 5.0000e-05 gnorm: 2.34 [ 0:18:59<1 day, 3:49:33] +[titan] 2025-10-04 22:53:18,769 - root - INFO - step: 455 loss: 4.4743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.5620 global_avg_mtp_loss: 3.9123 +[titan] 2025-10-04 22:53:18,770 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:19:10<1 day, 3:46:44] +[titan] 2025-10-04 22:53:29,609 - root - INFO - step: 460 loss: 4.4303 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8761 +[titan] 2025-10-04 22:53:29,609 - root - INFO - lr: 5.0000e-05 gnorm: 2.25 [ 0:19:21<1 day, 3:43:57] +[titan] 2025-10-04 22:53:40,497 - root - INFO - step: 465 loss: 4.4283 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.5552 global_avg_mtp_loss: 3.8731 +[titan] 2025-10-04 22:53:40,497 - root - INFO - lr: 5.0000e-05 gnorm: 1.84 [ 0:19:32<1 day, 3:41:16] +[titan] 2025-10-04 22:53:51,344 - root - INFO - step: 470 loss: 4.4176 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8632 +[titan] 2025-10-04 22:53:51,344 - root - INFO - lr: 5.0000e-05 gnorm: 2.15 [ 0:19:43<1 day, 3:38:36] +[titan] 2025-10-04 22:54:02,202 - root - INFO - step: 475 loss: 4.4882 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.5655 global_avg_mtp_loss: 3.9227 +[titan] 2025-10-04 22:54:02,202 - root - INFO - lr: 5.0000e-05 gnorm: 1.78 [ 0:19:54<1 day, 3:35:59] +[titan] 2025-10-04 22:54:13,066 - root - INFO - step: 480 loss: 4.4600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.5572 global_avg_mtp_loss: 3.9028 +[titan] 2025-10-04 22:54:13,066 - root - INFO - lr: 5.0000e-05 gnorm: 2.00 [ 0:20:04<1 day, 3:33:26] +[titan] 2025-10-04 22:54:23,913 - root - INFO - step: 485 loss: 4.3781 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.5484 global_avg_mtp_loss: 3.8297 +[titan] 2025-10-04 22:54:23,913 - root - INFO - lr: 4.9999e-05 gnorm: 1.60 [ 0:20:15<1 day, 3:30:55] +[titan] 2025-10-04 22:54:34,742 - root - INFO - step: 490 loss: 4.4068 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.5524 global_avg_mtp_loss: 3.8544 +[titan] 2025-10-04 22:54:34,742 - root - INFO - lr: 4.9999e-05 gnorm: 2.19 [ 0:20:26<1 day, 3:28:25] +[titan] 2025-10-04 22:54:45,647 - root - INFO - step: 495 loss: 4.3459 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.5461 global_avg_mtp_loss: 3.7998 +[titan] 2025-10-04 22:54:45,647 - root - INFO - lr: 4.9999e-05 gnorm: 1.79 [ 0:20:37<1 day, 3:26:03] +[titan] 2025-10-04 22:54:54,304 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:54:56,481 - root - INFO - step: 500 loss: 4.5195 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.5664 global_avg_mtp_loss: 3.9531 +[titan] 2025-10-04 22:54:56,481 - root - INFO - lr: 4.9999e-05 gnorm: 1.81 [ 0:20:48<1 day, 3:23:39] +[titan] 2025-10-04 22:55:07,316 - root - INFO - step: 505 loss: 4.3727 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.5468 global_avg_mtp_loss: 3.8259 +[titan] 2025-10-04 22:55:07,316 - root - INFO - lr: 4.9999e-05 gnorm: 1.99 [ 0:20:59<1 day, 3:21:18] +[titan] 2025-10-04 22:55:18,908 - root - INFO - step: 510 loss: 4.3913 memory: 118.84GiB(85.28%) tps: 28,268 tflops: 392.18 mfu: 39.65% global_avg_ntp_loss: 0.5477 global_avg_mtp_loss: 3.8435 +[titan] 2025-10-04 22:55:18,908 - root - INFO - lr: 4.9999e-05 gnorm: 1.64 [ 0:21:10<1 day, 3:19:58] +[titan] 2025-10-04 22:55:23,528 - root - INFO - Dumping profiler traces at step 512 +[titan] 2025-10-04 22:55:23,563 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 22:55:30,219 - root - INFO - step: 515 loss: 4.3744 memory: 118.84GiB(85.28%) tps: 28,972 tflops: 401.94 mfu: 40.64% global_avg_ntp_loss: 0.5458 global_avg_mtp_loss: 3.8286 +[titan] 2025-10-04 22:55:30,219 - root - INFO - lr: 4.9999e-05 gnorm: 1.67 [ 0:21:22<1 day, 3:18:17] +[titan] 2025-10-04 22:55:41,134 - root - INFO - step: 520 loss: 4.3427 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.5439 global_avg_mtp_loss: 3.7988 +[titan] 2025-10-04 22:55:41,135 - root - INFO - lr: 4.9999e-05 gnorm: 2.16 [ 0:21:33<1 day, 3:16:08] +[titan] 2025-10-04 22:55:52,306 - root - INFO - step: 525 loss: 4.3706 memory: 118.84GiB(85.28%) tps: 29,331 tflops: 406.93 mfu: 41.15% global_avg_ntp_loss: 0.5472 global_avg_mtp_loss: 3.8234 +[titan] 2025-10-04 22:55:52,307 - root - INFO - lr: 4.9999e-05 gnorm: 1.88 [ 0:21:44<1 day, 3:14:21] +[titan] 2025-10-04 22:56:03,131 - root - INFO - step: 530 loss: 4.3726 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.5471 global_avg_mtp_loss: 3.8256 +[titan] 2025-10-04 22:56:03,131 - root - INFO - lr: 4.9999e-05 gnorm: 2.18 [ 0:21:54<1 day, 3:12:10] +[titan] 2025-10-04 22:56:13,930 - root - INFO - step: 535 loss: 4.4086 memory: 118.84GiB(85.28%) tps: 30,344 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.5498 global_avg_mtp_loss: 3.8588 +[titan] 2025-10-04 22:56:13,930 - root - INFO - lr: 4.9999e-05 gnorm: 1.95 [ 0:22:05<1 day, 3:09:59] +[titan] 2025-10-04 22:56:24,765 - root - INFO - step: 540 loss: 4.4155 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.5521 global_avg_mtp_loss: 3.8634 +[titan] 2025-10-04 22:56:24,765 - root - INFO - lr: 4.9999e-05 gnorm: 2.04 [ 0:22:16<1 day, 3:07:53] +[titan] 2025-10-04 22:56:35,621 - root - INFO - step: 545 loss: 4.3565 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.5455 global_avg_mtp_loss: 3.8109 +[titan] 2025-10-04 22:56:35,621 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:22:27<1 day, 3:05:50] +[titan] 2025-10-04 22:56:44,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:56:46,499 - root - INFO - step: 550 loss: 4.2924 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.5365 global_avg_mtp_loss: 3.7559 +[titan] 2025-10-04 22:56:46,499 - root - INFO - lr: 4.9998e-05 gnorm: 1.96 [ 0:22:38<1 day, 3:03:51] +[titan] 2025-10-04 22:56:57,360 - root - INFO - step: 555 loss: 4.3086 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.5367 global_avg_mtp_loss: 3.7719 +[titan] 2025-10-04 22:56:57,361 - root - INFO - lr: 4.9998e-05 gnorm: 1.94 [ 0:22:49<1 day, 3:01:53] +[titan] 2025-10-04 22:57:08,185 - root - INFO - step: 560 loss: 4.2981 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.5349 global_avg_mtp_loss: 3.7631 +[titan] 2025-10-04 22:57:08,185 - root - INFO - lr: 4.9998e-05 gnorm: 1.84 [ 0:23:00<1 day, 2:59:54] +[titan] 2025-10-04 22:57:19,007 - root - INFO - step: 565 loss: 4.3383 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7969 +[titan] 2025-10-04 22:57:19,007 - root - INFO - lr: 4.9998e-05 gnorm: 1.66 [ 0:23:10<1 day, 2:57:57] +[titan] 2025-10-04 22:57:29,825 - root - INFO - step: 570 loss: 4.3634 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.5450 global_avg_mtp_loss: 3.8184 +[titan] 2025-10-04 22:57:29,825 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:23:21<1 day, 2:56:02] +[titan] 2025-10-04 22:57:40,662 - root - INFO - step: 575 loss: 4.2261 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.5285 global_avg_mtp_loss: 3.6977 +[titan] 2025-10-04 22:57:40,663 - root - INFO - lr: 4.9998e-05 gnorm: 1.67 [ 0:23:32<1 day, 2:54:10] +[titan] 2025-10-04 22:57:51,566 - root - INFO - step: 580 loss: 4.2298 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.5294 global_avg_mtp_loss: 3.7005 +[titan] 2025-10-04 22:57:51,566 - root - INFO - lr: 4.9998e-05 gnorm: 1.98 [ 0:23:43<1 day, 2:52:24] +[titan] 2025-10-04 22:58:02,405 - root - INFO - step: 585 loss: 4.3315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7902 +[titan] 2025-10-04 22:58:02,405 - root - INFO - lr: 4.9998e-05 gnorm: 1.72 [ 0:23:54<1 day, 2:50:35] +[titan] 2025-10-04 22:58:13,269 - root - INFO - step: 590 loss: 4.2600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.5322 global_avg_mtp_loss: 3.7278 +[titan] 2025-10-04 22:58:13,270 - root - INFO - lr: 4.9997e-05 gnorm: 1.95 [ 0:24:05<1 day, 2:48:49] +[titan] 2025-10-04 22:58:24,105 - root - INFO - step: 595 loss: 4.1808 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.5216 global_avg_mtp_loss: 3.6592 +[titan] 2025-10-04 22:58:24,105 - root - INFO - lr: 4.9997e-05 gnorm: 1.65 [ 0:24:15<1 day, 2:47:04] +[titan] 2025-10-04 22:58:32,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:58:34,964 - root - INFO - step: 600 loss: 4.1976 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.5240 global_avg_mtp_loss: 3.6736 +[titan] 2025-10-04 22:58:34,964 - root - INFO - lr: 4.9997e-05 gnorm: 1.83 [ 0:24:26<1 day, 2:45:21] +[titan] 2025-10-04 22:58:45,870 - root - INFO - step: 605 loss: 4.3159 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.5391 global_avg_mtp_loss: 3.7769 +[titan] 2025-10-04 22:58:45,870 - root - INFO - lr: 4.9997e-05 gnorm: 1.87 [ 0:24:37<1 day, 2:43:43] +[titan] 2025-10-04 22:58:56,733 - root - INFO - step: 610 loss: 4.1166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.5131 global_avg_mtp_loss: 3.6035 +[titan] 2025-10-04 22:58:56,733 - root - INFO - lr: 4.9997e-05 gnorm: 1.62 [ 0:24:48<1 day, 2:42:04] +[titan] 2025-10-04 22:59:07,585 - root - INFO - step: 615 loss: 4.2340 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.5275 global_avg_mtp_loss: 3.7065 +[titan] 2025-10-04 22:59:07,585 - root - INFO - lr: 4.9997e-05 gnorm: 1.88 [ 0:24:59<1 day, 2:40:25] +[titan] 2025-10-04 22:59:18,424 - root - INFO - step: 620 loss: 4.2004 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5249 global_avg_mtp_loss: 3.6756 +[titan] 2025-10-04 22:59:18,424 - root - INFO - lr: 4.9997e-05 gnorm: 1.91 [ 0:25:10<1 day, 2:38:47] +[titan] 2025-10-04 22:59:29,245 - root - INFO - step: 625 loss: 4.2113 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.5247 global_avg_mtp_loss: 3.6866 +[titan] 2025-10-04 22:59:29,245 - root - INFO - lr: 4.9996e-05 gnorm: 1.62 [ 0:25:21<1 day, 2:37:09] +[titan] 2025-10-04 22:59:40,085 - root - INFO - step: 630 loss: 4.1954 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.5210 global_avg_mtp_loss: 3.6745 +[titan] 2025-10-04 22:59:40,085 - root - INFO - lr: 4.9996e-05 gnorm: 1.68 [ 0:25:31<1 day, 2:35:34] +[titan] 2025-10-04 22:59:51,004 - root - INFO - step: 635 loss: 4.0965 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.5096 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 22:59:51,004 - root - INFO - lr: 4.9996e-05 gnorm: 1.82 [ 0:25:42<1 day, 2:34:05] +[titan] 2025-10-04 23:00:01,832 - root - INFO - step: 640 loss: 4.2067 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.5236 global_avg_mtp_loss: 3.6831 +[titan] 2025-10-04 23:00:01,832 - root - INFO - lr: 4.9996e-05 gnorm: 1.87 [ 0:25:53<1 day, 2:32:31] +[titan] 2025-10-04 23:00:12,683 - root - INFO - step: 645 loss: 4.0562 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.5030 global_avg_mtp_loss: 3.5532 +[titan] 2025-10-04 23:00:12,683 - root - INFO - lr: 4.9996e-05 gnorm: 1.73 [ 0:26:04<1 day, 2:31:01] +[titan] 2025-10-04 23:00:21,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:00:23,493 - root - INFO - step: 650 loss: 4.1298 memory: 118.84GiB(85.28%) tps: 30,314 tflops: 420.56 mfu: 42.52% global_avg_ntp_loss: 0.5128 global_avg_mtp_loss: 3.6170 +[titan] 2025-10-04 23:00:23,493 - root - INFO - lr: 4.9996e-05 gnorm: 1.75 [ 0:26:15<1 day, 2:29:29] +[titan] 2025-10-04 23:00:34,283 - root - INFO - step: 655 loss: 4.0941 memory: 118.84GiB(85.28%) tps: 30,370 tflops: 421.33 mfu: 42.60% global_avg_ntp_loss: 0.5089 global_avg_mtp_loss: 3.5852 +[titan] 2025-10-04 23:00:34,283 - root - INFO - lr: 4.9995e-05 gnorm: 1.70 [ 0:26:26<1 day, 2:27:57] +[titan] 2025-10-04 23:00:45,102 - root - INFO - step: 660 loss: 4.1313 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.5130 global_avg_mtp_loss: 3.6184 +[titan] 2025-10-04 23:00:45,102 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:36<1 day, 2:26:28] +[titan] 2025-10-04 23:00:55,946 - root - INFO - step: 665 loss: 4.1367 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5147 global_avg_mtp_loss: 3.6220 +[titan] 2025-10-04 23:00:55,946 - root - INFO - lr: 4.9995e-05 gnorm: 1.99 [ 0:26:47<1 day, 2:25:01] +[titan] 2025-10-04 23:01:06,742 - root - INFO - step: 670 loss: 4.0904 memory: 118.84GiB(85.28%) tps: 30,352 tflops: 421.09 mfu: 42.58% global_avg_ntp_loss: 0.5075 global_avg_mtp_loss: 3.5829 +[titan] 2025-10-04 23:01:06,743 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:58<1 day, 2:23:34] +[titan] 2025-10-04 23:01:17,585 - root - INFO - step: 675 loss: 4.0638 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.5042 global_avg_mtp_loss: 3.5596 +[titan] 2025-10-04 23:01:17,585 - root - INFO - lr: 4.9995e-05 gnorm: 2.15 [ 0:27:09<1 day, 2:22:09] +[titan] 2025-10-04 23:01:28,410 - root - INFO - step: 680 loss: 4.0064 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4976 global_avg_mtp_loss: 3.5088 +[titan] 2025-10-04 23:01:28,410 - root - INFO - lr: 4.9994e-05 gnorm: 1.81 [ 0:27:20<1 day, 2:20:45] +[titan] 2025-10-04 23:01:39,214 - root - INFO - step: 685 loss: 4.1427 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.5134 global_avg_mtp_loss: 3.6293 +[titan] 2025-10-04 23:01:39,214 - root - INFO - lr: 4.9994e-05 gnorm: 1.69 [ 0:27:31<1 day, 2:19:21] +[titan] 2025-10-04 23:01:50,056 - root - INFO - step: 690 loss: 4.0571 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.5019 global_avg_mtp_loss: 3.5553 +[titan] 2025-10-04 23:01:50,056 - root - INFO - lr: 4.9994e-05 gnorm: 1.63 [ 0:27:41<1 day, 2:18:00] +[titan] 2025-10-04 23:02:00,900 - root - INFO - step: 695 loss: 4.0380 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5011 global_avg_mtp_loss: 3.5369 +[titan] 2025-10-04 23:02:00,900 - root - INFO - lr: 4.9994e-05 gnorm: 1.77 [ 0:27:52<1 day, 2:16:40] +[titan] 2025-10-04 23:02:09,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:02:11,753 - root - INFO - step: 700 loss: 4.0879 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.5070 global_avg_mtp_loss: 3.5810 +[titan] 2025-10-04 23:02:11,753 - root - INFO - lr: 4.9994e-05 gnorm: 1.96 [ 0:28:03<1 day, 2:15:22] +[titan] 2025-10-04 23:02:22,605 - root - INFO - step: 705 loss: 4.0241 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4988 global_avg_mtp_loss: 3.5252 +[titan] 2025-10-04 23:02:22,605 - root - INFO - lr: 4.9993e-05 gnorm: 1.83 [ 0:28:14<1 day, 2:14:04] +[titan] 2025-10-04 23:02:33,405 - root - INFO - step: 710 loss: 4.0903 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.5058 global_avg_mtp_loss: 3.5844 +[titan] 2025-10-04 23:02:33,405 - root - INFO - lr: 4.9993e-05 gnorm: 1.64 [ 0:28:25<1 day, 2:12:45] +[titan] 2025-10-04 23:02:44,244 - root - INFO - step: 715 loss: 4.0535 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5015 global_avg_mtp_loss: 3.5520 +[titan] 2025-10-04 23:02:44,244 - root - INFO - lr: 4.9993e-05 gnorm: 1.50 [ 0:28:36<1 day, 2:11:29] +[titan] 2025-10-04 23:02:55,077 - root - INFO - step: 720 loss: 4.0093 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.4957 global_avg_mtp_loss: 3.5137 +[titan] 2025-10-04 23:02:55,077 - root - INFO - lr: 4.9993e-05 gnorm: 1.58 [ 0:28:46<1 day, 2:10:13] +[titan] 2025-10-04 23:03:05,902 - root - INFO - step: 725 loss: 3.9529 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4894 global_avg_mtp_loss: 3.4635 +[titan] 2025-10-04 23:03:05,902 - root - INFO - lr: 4.9992e-05 gnorm: 1.53 [ 0:28:57<1 day, 2:08:58] +[titan] 2025-10-04 23:03:16,765 - root - INFO - step: 730 loss: 3.9701 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.4916 global_avg_mtp_loss: 3.4785 +[titan] 2025-10-04 23:03:16,765 - root - INFO - lr: 4.9992e-05 gnorm: 1.57 [ 0:29:08<1 day, 2:07:45] +[titan] 2025-10-04 23:03:27,585 - root - INFO - step: 735 loss: 4.0191 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.4982 global_avg_mtp_loss: 3.5209 +[titan] 2025-10-04 23:03:27,585 - root - INFO - lr: 4.9992e-05 gnorm: 1.59 [ 0:29:19<1 day, 2:06:31] +[titan] 2025-10-04 23:03:38,404 - root - INFO - step: 740 loss: 3.9770 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.4912 global_avg_mtp_loss: 3.4857 +[titan] 2025-10-04 23:03:38,404 - root - INFO - lr: 4.9992e-05 gnorm: 1.61 [ 0:29:30<1 day, 2:05:18] +[titan] 2025-10-04 23:03:49,265 - root - INFO - step: 745 loss: 4.0755 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.5054 global_avg_mtp_loss: 3.5701 +[titan] 2025-10-04 23:03:49,265 - root - INFO - lr: 4.9992e-05 gnorm: 1.52 [ 0:29:41<1 day, 2:04:09] +[titan] 2025-10-04 23:03:57,894 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:04:00,081 - root - INFO - step: 750 loss: 3.9375 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.4868 global_avg_mtp_loss: 3.4508 +[titan] 2025-10-04 23:04:00,081 - root - INFO - lr: 4.9991e-05 gnorm: 1.67 [ 0:29:51<1 day, 2:02:57] +[titan] 2025-10-04 23:04:10,923 - root - INFO - step: 755 loss: 4.0060 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.4974 global_avg_mtp_loss: 3.5087 +[titan] 2025-10-04 23:04:10,923 - root - INFO - lr: 4.9991e-05 gnorm: 1.62 [ 0:30:02<1 day, 2:01:48] +[titan] 2025-10-04 23:04:21,765 - root - INFO - step: 760 loss: 3.9826 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.4928 global_avg_mtp_loss: 3.4897 +[titan] 2025-10-04 23:04:21,765 - root - INFO - lr: 4.9991e-05 gnorm: 1.57 [ 0:30:13<1 day, 2:00:39] +[titan] 2025-10-04 23:04:32,624 - root - INFO - step: 765 loss: 3.9503 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4871 global_avg_mtp_loss: 3.4633 +[titan] 2025-10-04 23:04:32,625 - root - INFO - lr: 4.9991e-05 gnorm: 1.73 [ 0:30:24<1 day, 1:59:32] +[titan] 2025-10-04 23:04:43,499 - root - INFO - step: 770 loss: 4.0928 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.5059 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 23:04:43,499 - root - INFO - lr: 4.9990e-05 gnorm: 1.68 [ 0:30:35<1 day, 1:58:27] +[titan] 2025-10-04 23:04:54,363 - root - INFO - step: 775 loss: 4.0138 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.4966 global_avg_mtp_loss: 3.5172 +[titan] 2025-10-04 23:04:54,364 - root - INFO - lr: 4.9990e-05 gnorm: 1.84 [ 0:30:46<1 day, 1:57:21] +[titan] 2025-10-04 23:05:05,165 - root - INFO - step: 780 loss: 3.9609 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.4878 global_avg_mtp_loss: 3.4731 +[titan] 2025-10-04 23:05:05,165 - root - INFO - lr: 4.9990e-05 gnorm: 1.66 [ 0:30:57<1 day, 1:56:14] +[titan] 2025-10-04 23:05:16,001 - root - INFO - step: 785 loss: 4.0392 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.5003 global_avg_mtp_loss: 3.5389 +[titan] 2025-10-04 23:05:16,002 - root - INFO - lr: 4.9989e-05 gnorm: 1.74 [ 0:31:07<1 day, 1:55:08] +[titan] 2025-10-04 23:05:26,809 - root - INFO - step: 790 loss: 3.9123 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.4820 global_avg_mtp_loss: 3.4303 +[titan] 2025-10-04 23:05:26,809 - root - INFO - lr: 4.9989e-05 gnorm: 1.71 [ 0:31:18<1 day, 1:54:02] +[titan] 2025-10-04 23:05:37,659 - root - INFO - step: 795 loss: 3.9513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.4870 global_avg_mtp_loss: 3.4643 +[titan] 2025-10-04 23:05:37,659 - root - INFO - lr: 4.9989e-05 gnorm: 1.57 [ 0:31:29<1 day, 1:52:59] +[titan] 2025-10-04 23:05:46,349 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:05:48,524 - root - INFO - step: 800 loss: 3.8805 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4013 +[titan] 2025-10-04 23:05:48,524 - root - INFO - lr: 4.9989e-05 gnorm: 1.63 [ 0:31:40<1 day, 1:51:57] +[titan] 2025-10-04 23:05:59,423 - root - INFO - step: 805 loss: 4.0567 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.5041 global_avg_mtp_loss: 3.5527 +[titan] 2025-10-04 23:05:59,424 - root - INFO - lr: 4.9988e-05 gnorm: 1.65 [ 0:31:51<1 day, 1:50:58] +[titan] 2025-10-04 23:06:10,267 - root - INFO - step: 810 loss: 3.9384 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4865 global_avg_mtp_loss: 3.4519 +[titan] 2025-10-04 23:06:10,267 - root - INFO - lr: 4.9988e-05 gnorm: 1.62 [ 0:32:02<1 day, 1:49:56] +[titan] 2025-10-04 23:06:21,120 - root - INFO - step: 815 loss: 3.9402 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.4841 global_avg_mtp_loss: 3.4561 +[titan] 2025-10-04 23:06:21,120 - root - INFO - lr: 4.9988e-05 gnorm: 1.83 [ 0:32:12<1 day, 1:48:56] +[titan] 2025-10-04 23:06:31,962 - root - INFO - step: 820 loss: 3.8907 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.4804 global_avg_mtp_loss: 3.4102 +[titan] 2025-10-04 23:06:31,962 - root - INFO - lr: 4.9987e-05 gnorm: 1.56 [ 0:32:23<1 day, 1:47:55] +[titan] 2025-10-04 23:06:42,804 - root - INFO - step: 825 loss: 3.9391 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.4866 global_avg_mtp_loss: 3.4525 +[titan] 2025-10-04 23:06:42,804 - root - INFO - lr: 4.9987e-05 gnorm: 1.73 [ 0:32:34<1 day, 1:46:55] +[titan] 2025-10-04 23:06:53,697 - root - INFO - step: 830 loss: 3.8534 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.4757 global_avg_mtp_loss: 3.3777 +[titan] 2025-10-04 23:06:53,697 - root - INFO - lr: 4.9987e-05 gnorm: 1.46 [ 0:32:45<1 day, 1:45:59] +[titan] 2025-10-04 23:07:04,599 - root - INFO - step: 835 loss: 3.9680 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.4909 global_avg_mtp_loss: 3.4770 +[titan] 2025-10-04 23:07:04,599 - root - INFO - lr: 4.9987e-05 gnorm: 1.69 [ 0:32:56<1 day, 1:45:03] +[titan] 2025-10-04 23:07:15,482 - root - INFO - step: 840 loss: 3.8804 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4793 global_avg_mtp_loss: 3.4011 +[titan] 2025-10-04 23:07:15,483 - root - INFO - lr: 4.9986e-05 gnorm: 1.65 [ 0:33:07<1 day, 1:44:06] +[titan] 2025-10-04 23:07:26,345 - root - INFO - step: 845 loss: 3.9335 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.4859 global_avg_mtp_loss: 3.4476 +[titan] 2025-10-04 23:07:26,345 - root - INFO - lr: 4.9986e-05 gnorm: 1.67 [ 0:33:18<1 day, 1:43:10] +[titan] 2025-10-04 23:07:35,004 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:07:37,166 - root - INFO - step: 850 loss: 3.9466 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.4899 global_avg_mtp_loss: 3.4568 +[titan] 2025-10-04 23:07:37,166 - root - INFO - lr: 4.9986e-05 gnorm: 1.53 [ 0:33:29<1 day, 1:42:12] +[titan] 2025-10-04 23:07:48,038 - root - INFO - step: 855 loss: 3.8553 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3805 +[titan] 2025-10-04 23:07:48,038 - root - INFO - lr: 4.9985e-05 gnorm: 1.54 [ 0:33:39<1 day, 1:41:17] +[titan] 2025-10-04 23:07:58,950 - root - INFO - step: 860 loss: 3.9192 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.4837 global_avg_mtp_loss: 3.4355 +[titan] 2025-10-04 23:07:58,951 - root - INFO - lr: 4.9985e-05 gnorm: 1.63 [ 0:33:50<1 day, 1:40:24] +[titan] 2025-10-04 23:08:09,863 - root - INFO - step: 865 loss: 3.8398 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.4747 global_avg_mtp_loss: 3.3651 +[titan] 2025-10-04 23:08:09,863 - root - INFO - lr: 4.9985e-05 gnorm: 1.57 [ 0:34:01<1 day, 1:39:32] +[titan] 2025-10-04 23:08:20,763 - root - INFO - step: 870 loss: 3.9660 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.4876 global_avg_mtp_loss: 3.4784 +[titan] 2025-10-04 23:08:20,763 - root - INFO - lr: 4.9984e-05 gnorm: 1.70 [ 0:34:12<1 day, 1:38:39] +[titan] 2025-10-04 23:08:31,644 - root - INFO - step: 875 loss: 3.8236 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4708 global_avg_mtp_loss: 3.3528 +[titan] 2025-10-04 23:08:31,644 - root - INFO - lr: 4.9984e-05 gnorm: 1.58 [ 0:34:23<1 day, 1:37:46] +[titan] 2025-10-04 23:08:42,521 - root - INFO - step: 880 loss: 3.8393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4731 global_avg_mtp_loss: 3.3662 +[titan] 2025-10-04 23:08:42,522 - root - INFO - lr: 4.9984e-05 gnorm: 1.66 [ 0:34:34<1 day, 1:36:54] +[titan] 2025-10-04 23:08:53,411 - root - INFO - step: 885 loss: 3.9181 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4832 global_avg_mtp_loss: 3.4349 +[titan] 2025-10-04 23:08:53,412 - root - INFO - lr: 4.9983e-05 gnorm: 1.81 [ 0:34:45<1 day, 1:36:03] +[titan] 2025-10-04 23:09:04,287 - root - INFO - step: 890 loss: 3.8540 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4756 global_avg_mtp_loss: 3.3784 +[titan] 2025-10-04 23:09:04,287 - root - INFO - lr: 4.9983e-05 gnorm: 1.63 [ 0:34:56<1 day, 1:35:11] +[titan] 2025-10-04 23:09:15,149 - root - INFO - step: 895 loss: 3.7956 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4646 global_avg_mtp_loss: 3.3310 +[titan] 2025-10-04 23:09:15,149 - root - INFO - lr: 4.9983e-05 gnorm: 1.59 [ 0:35:06<1 day, 1:34:19] +[titan] 2025-10-04 23:09:23,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:09:26,036 - root - INFO - step: 900 loss: 3.8814 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4023 +[titan] 2025-10-04 23:09:26,036 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:17<1 day, 1:33:29] +[titan] 2025-10-04 23:09:36,928 - root - INFO - step: 905 loss: 3.8547 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3797 +[titan] 2025-10-04 23:09:36,928 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:28<1 day, 1:32:39] +[titan] 2025-10-04 23:09:47,795 - root - INFO - step: 910 loss: 3.7503 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4614 global_avg_mtp_loss: 3.2890 +[titan] 2025-10-04 23:09:47,795 - root - INFO - lr: 4.9982e-05 gnorm: 1.63 [ 0:35:39<1 day, 1:31:49] +[titan] 2025-10-04 23:09:58,664 - root - INFO - step: 915 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3219 +[titan] 2025-10-04 23:09:58,665 - root - INFO - lr: 4.9981e-05 gnorm: 1.57 [ 0:35:50<1 day, 1:31:00] +[titan] 2025-10-04 23:10:09,537 - root - INFO - step: 920 loss: 3.8477 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.4753 global_avg_mtp_loss: 3.3723 +[titan] 2025-10-04 23:10:09,537 - root - INFO - lr: 4.9981e-05 gnorm: 1.56 [ 0:36:01<1 day, 1:30:11] +[titan] 2025-10-04 23:10:20,420 - root - INFO - step: 925 loss: 3.8141 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3439 +[titan] 2025-10-04 23:10:20,420 - root - INFO - lr: 4.9980e-05 gnorm: 1.53 [ 0:36:12<1 day, 1:29:22] +[titan] 2025-10-04 23:10:31,298 - root - INFO - step: 930 loss: 3.8185 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3482 +[titan] 2025-10-04 23:10:31,298 - root - INFO - lr: 4.9980e-05 gnorm: 1.56 [ 0:36:23<1 day, 1:28:34] +[titan] 2025-10-04 23:10:42,186 - root - INFO - step: 935 loss: 3.7234 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.4574 global_avg_mtp_loss: 3.2661 +[titan] 2025-10-04 23:10:42,186 - root - INFO - lr: 4.9980e-05 gnorm: 1.52 [ 0:36:34<1 day, 1:27:47] +[titan] 2025-10-04 23:10:53,053 - root - INFO - step: 940 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4666 global_avg_mtp_loss: 3.3211 +[titan] 2025-10-04 23:10:53,053 - root - INFO - lr: 4.9979e-05 gnorm: 1.69 [ 0:36:44<1 day, 1:26:59] +[titan] 2025-10-04 23:11:03,935 - root - INFO - step: 945 loss: 3.7815 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.4635 global_avg_mtp_loss: 3.3180 +[titan] 2025-10-04 23:11:03,935 - root - INFO - lr: 4.9979e-05 gnorm: 1.45 [ 0:36:55<1 day, 1:26:13] +[titan] 2025-10-04 23:11:12,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:11:14,787 - root - INFO - step: 950 loss: 3.8345 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4716 global_avg_mtp_loss: 3.3629 +[titan] 2025-10-04 23:11:14,787 - root - INFO - lr: 4.9979e-05 gnorm: 1.54 [ 0:37:06<1 day, 1:25:25] +[titan] 2025-10-04 23:11:25,662 - root - INFO - step: 955 loss: 3.7153 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4570 global_avg_mtp_loss: 3.2583 +[titan] 2025-10-04 23:11:25,662 - root - INFO - lr: 4.9978e-05 gnorm: 1.40 [ 0:37:17<1 day, 1:24:39] +[titan] 2025-10-04 23:11:36,506 - root - INFO - step: 960 loss: 3.7474 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4595 global_avg_mtp_loss: 3.2878 +[titan] 2025-10-04 23:11:36,506 - root - INFO - lr: 4.9978e-05 gnorm: 1.39 [ 0:37:28<1 day, 1:23:52] +[titan] 2025-10-04 23:11:47,428 - root - INFO - step: 965 loss: 3.7469 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4597 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:11:47,429 - root - INFO - lr: 4.9977e-05 gnorm: 1.60 [ 0:37:39<1 day, 1:23:08] +[titan] 2025-10-04 23:11:58,339 - root - INFO - step: 970 loss: 3.7767 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.4638 global_avg_mtp_loss: 3.3129 +[titan] 2025-10-04 23:11:58,339 - root - INFO - lr: 4.9977e-05 gnorm: 1.59 [ 0:37:50<1 day, 1:22:24] +[titan] 2025-10-04 23:12:09,214 - root - INFO - step: 975 loss: 3.7198 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4568 global_avg_mtp_loss: 3.2630 +[titan] 2025-10-04 23:12:09,214 - root - INFO - lr: 4.9977e-05 gnorm: 1.44 [ 0:38:01<1 day, 1:21:39] +[titan] 2025-10-04 23:12:20,081 - root - INFO - step: 980 loss: 3.7702 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4623 global_avg_mtp_loss: 3.3079 +[titan] 2025-10-04 23:12:20,081 - root - INFO - lr: 4.9976e-05 gnorm: 1.42 [ 0:38:11<1 day, 1:20:55] +[titan] 2025-10-04 23:12:30,946 - root - INFO - step: 985 loss: 3.8212 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3515 +[titan] 2025-10-04 23:12:30,947 - root - INFO - lr: 4.9976e-05 gnorm: 1.39 [ 0:38:22<1 day, 1:20:10] +[titan] 2025-10-04 23:12:41,799 - root - INFO - step: 990 loss: 3.7716 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.4659 global_avg_mtp_loss: 3.3057 +[titan] 2025-10-04 23:12:41,799 - root - INFO - lr: 4.9975e-05 gnorm: 1.50 [ 0:38:33<1 day, 1:19:26] +[titan] 2025-10-04 23:12:52,700 - root - INFO - step: 995 loss: 3.8144 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3447 +[titan] 2025-10-04 23:12:52,701 - root - INFO - lr: 4.9975e-05 gnorm: 1.47 [ 0:38:44<1 day, 1:18:43] +[titan] 2025-10-04 23:13:01,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:13:03,568 - root - INFO - step: 1000 loss: 3.6411 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.4473 global_avg_mtp_loss: 3.1938 +[titan] 2025-10-04 23:13:03,569 - root - INFO - lr: 4.9974e-05 gnorm: 1.70 [ 0:38:55<1 day, 1:18:00] +[titan] 2025-10-04 23:13:14,441 - root - INFO - step: 1005 loss: 3.7872 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4636 global_avg_mtp_loss: 3.3236 +[titan] 2025-10-04 23:13:14,442 - root - INFO - lr: 4.9974e-05 gnorm: 1.62 [ 0:39:06<1 day, 1:17:17] +[titan] 2025-10-04 23:13:25,308 - root - INFO - step: 1010 loss: 3.8240 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4681 global_avg_mtp_loss: 3.3560 +[titan] 2025-10-04 23:13:25,308 - root - INFO - lr: 4.9974e-05 gnorm: 1.51 [ 0:39:17<1 day, 1:16:34] +[titan] 2025-10-04 23:13:36,156 - root - INFO - step: 1015 loss: 3.7026 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.4566 global_avg_mtp_loss: 3.2461 +[titan] 2025-10-04 23:13:36,157 - root - INFO - lr: 4.9973e-05 gnorm: 1.61 [ 0:39:27<1 day, 1:15:51] +[titan] 2025-10-04 23:13:47,024 - root - INFO - step: 1020 loss: 3.8204 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4698 global_avg_mtp_loss: 3.3506 +[titan] 2025-10-04 23:13:47,025 - root - INFO - lr: 4.9973e-05 gnorm: 1.58 [ 0:39:38<1 day, 1:15:09] +[titan] 2025-10-04 23:13:55,951 - root - INFO - Dumping profiler traces at step 1024 +[titan] 2025-10-04 23:13:55,986 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 23:13:58,196 - root - INFO - step: 1025 loss: 3.7098 memory: 118.84GiB(85.28%) tps: 29,332 tflops: 406.94 mfu: 41.15% global_avg_ntp_loss: 0.4550 global_avg_mtp_loss: 3.2548 +[titan] 2025-10-04 23:13:58,196 - root - INFO - lr: 4.9972e-05 gnorm: 1.53 [ 0:39:50<1 day, 1:14:38] +[titan] 2025-10-04 23:14:09,055 - root - INFO - step: 1030 loss: 3.6684 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4505 global_avg_mtp_loss: 3.2179 +[titan] 2025-10-04 23:14:09,056 - root - INFO - lr: 4.9972e-05 gnorm: 1.49 [ 0:40:00<1 day, 1:13:56] +[titan] 2025-10-04 23:14:19,917 - root - INFO - step: 1035 loss: 3.7778 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.4632 global_avg_mtp_loss: 3.3146 +[titan] 2025-10-04 23:14:19,917 - root - INFO - lr: 4.9971e-05 gnorm: 1.64 [ 0:40:11<1 day, 1:13:15] +[titan] 2025-10-04 23:14:30,784 - root - INFO - step: 1040 loss: 3.7600 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4596 global_avg_mtp_loss: 3.3004 +[titan] 2025-10-04 23:14:30,784 - root - INFO - lr: 4.9971e-05 gnorm: 1.73 [ 0:40:22<1 day, 1:12:34] +[titan] 2025-10-04 23:14:41,642 - root - INFO - step: 1045 loss: 3.7970 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3312 +[titan] 2025-10-04 23:14:41,642 - root - INFO - lr: 4.9970e-05 gnorm: 1.60 [ 0:40:33<1 day, 1:11:53] +[titan] 2025-10-04 23:14:50,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:14:52,527 - root - INFO - step: 1050 loss: 3.7607 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.4629 global_avg_mtp_loss: 3.2979 +[titan] 2025-10-04 23:14:52,527 - root - INFO - lr: 4.9970e-05 gnorm: 1.86 [ 0:40:44<1 day, 1:11:13] +[titan] 2025-10-04 23:15:03,398 - root - INFO - step: 1055 loss: 3.6921 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.4533 global_avg_mtp_loss: 3.2388 +[titan] 2025-10-04 23:15:03,398 - root - INFO - lr: 4.9970e-05 gnorm: 1.59 [ 0:40:55<1 day, 1:10:33] +[titan] 2025-10-04 23:15:14,306 - root - INFO - step: 1060 loss: 3.7138 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.4561 global_avg_mtp_loss: 3.2577 +[titan] 2025-10-04 23:15:14,306 - root - INFO - lr: 4.9969e-05 gnorm: 1.89 [ 0:41:06<1 day, 1:09:55] +[titan] 2025-10-04 23:15:25,186 - root - INFO - step: 1065 loss: 3.7455 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4584 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:15:25,186 - root - INFO - lr: 4.9969e-05 gnorm: 1.72 [ 0:41:17<1 day, 1:09:15] +[titan] 2025-10-04 23:15:36,061 - root - INFO - step: 1070 loss: 3.6510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4474 global_avg_mtp_loss: 3.2036 +[titan] 2025-10-04 23:15:36,061 - root - INFO - lr: 4.9968e-05 gnorm: 1.70 [ 0:41:27<1 day, 1:08:36] +[titan] 2025-10-04 23:15:46,950 - root - INFO - step: 1075 loss: 3.7757 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4630 global_avg_mtp_loss: 3.3127 +[titan] 2025-10-04 23:15:46,950 - root - INFO - lr: 4.9968e-05 gnorm: 1.53 [ 0:41:38<1 day, 1:07:58] +[titan] 2025-10-04 23:15:57,821 - root - INFO - step: 1080 loss: 3.6997 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4542 global_avg_mtp_loss: 3.2455 +[titan] 2025-10-04 23:15:57,821 - root - INFO - lr: 4.9967e-05 gnorm: 1.40 [ 0:41:49<1 day, 1:07:19] +[titan] 2025-10-04 23:16:08,691 - root - INFO - step: 1085 loss: 3.7768 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.4652 global_avg_mtp_loss: 3.3116 +[titan] 2025-10-04 23:16:08,691 - root - INFO - lr: 4.9967e-05 gnorm: 1.71 [ 0:42:00<1 day, 1:06:41] +[titan] 2025-10-04 23:16:19,625 - root - INFO - step: 1090 loss: 3.7891 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.4653 global_avg_mtp_loss: 3.3238 +[titan] 2025-10-04 23:16:19,625 - root - INFO - lr: 4.9966e-05 gnorm: 1.32 [ 0:42:11<1 day, 1:06:05] +[titan] 2025-10-04 23:16:30,524 - root - INFO - step: 1095 loss: 3.6348 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.4440 global_avg_mtp_loss: 3.1907 +[titan] 2025-10-04 23:16:30,525 - root - INFO - lr: 4.9966e-05 gnorm: 1.55 [ 0:42:22<1 day, 1:05:28] +[titan] 2025-10-04 23:16:39,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:16:41,421 - root - INFO - step: 1100 loss: 3.7357 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.4573 global_avg_mtp_loss: 3.2785 +[titan] 2025-10-04 23:16:41,421 - root - INFO - lr: 4.9965e-05 gnorm: 1.50 [ 0:42:33<1 day, 1:04:51] +[titan] 2025-10-04 23:16:52,335 - root - INFO - step: 1105 loss: 3.6253 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1800 +[titan] 2025-10-04 23:16:52,335 - root - INFO - lr: 4.9965e-05 gnorm: 1.52 [ 0:42:44<1 day, 1:04:15] +[titan] 2025-10-04 23:17:03,265 - root - INFO - step: 1110 loss: 3.6786 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.4500 global_avg_mtp_loss: 3.2285 +[titan] 2025-10-04 23:17:03,266 - root - INFO - lr: 4.9964e-05 gnorm: 1.41 [ 0:42:55<1 day, 1:03:40] +[titan] 2025-10-04 23:17:14,175 - root - INFO - step: 1115 loss: 3.6578 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.4465 global_avg_mtp_loss: 3.2112 +[titan] 2025-10-04 23:17:14,175 - root - INFO - lr: 4.9964e-05 gnorm: 1.35 [ 0:43:05<1 day, 1:03:04] +[titan] 2025-10-04 23:17:25,067 - root - INFO - step: 1120 loss: 3.6849 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.4511 global_avg_mtp_loss: 3.2339 +[titan] 2025-10-04 23:17:25,067 - root - INFO - lr: 4.9963e-05 gnorm: 1.51 [ 0:43:16<1 day, 1:02:28] +[titan] 2025-10-04 23:17:35,980 - root - INFO - step: 1125 loss: 3.6812 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.4516 global_avg_mtp_loss: 3.2296 +[titan] 2025-10-04 23:17:35,980 - root - INFO - lr: 4.9963e-05 gnorm: 1.53 [ 0:43:27<1 day, 1:01:53] +[titan] 2025-10-04 23:17:46,863 - root - INFO - step: 1130 loss: 3.6167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4428 global_avg_mtp_loss: 3.1739 +[titan] 2025-10-04 23:17:46,863 - root - INFO - lr: 4.9962e-05 gnorm: 1.69 [ 0:43:38<1 day, 1:01:17] +[titan] 2025-10-04 23:17:57,754 - root - INFO - step: 1135 loss: 3.5668 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.4385 global_avg_mtp_loss: 3.1284 +[titan] 2025-10-04 23:17:57,754 - root - INFO - lr: 4.9962e-05 gnorm: 1.44 [ 0:43:49<1 day, 1:00:42] +[titan] 2025-10-04 23:18:08,676 - root - INFO - step: 1140 loss: 3.6958 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.4522 global_avg_mtp_loss: 3.2436 +[titan] 2025-10-04 23:18:08,676 - root - INFO - lr: 4.9961e-05 gnorm: 1.51 [ 0:44:00<1 day, 1:00:08] +[titan] 2025-10-04 23:18:19,548 - root - INFO - step: 1145 loss: 3.7386 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.4725 global_avg_mtp_loss: 3.2662 +[titan] 2025-10-04 23:18:19,548 - root - INFO - lr: 4.9961e-05 gnorm: 1.52 [ 0:44:11<1 day, 0:59:32] +[titan] 2025-10-04 23:18:28,249 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:18:30,436 - root - INFO - step: 1150 loss: 3.6554 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.4491 global_avg_mtp_loss: 3.2063 +[titan] 2025-10-04 23:18:30,436 - root - INFO - lr: 4.9960e-05 gnorm: 1.51 [ 0:44:22<1 day, 0:58:57] +[titan] 2025-10-04 23:18:41,365 - root - INFO - step: 1155 loss: 3.6986 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.4535 global_avg_mtp_loss: 3.2451 +[titan] 2025-10-04 23:18:41,365 - root - INFO - lr: 4.9960e-05 gnorm: 1.49 [ 0:44:33<1 day, 0:58:24] +[titan] 2025-10-04 23:18:52,242 - root - INFO - step: 1160 loss: 3.6068 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4415 global_avg_mtp_loss: 3.1653 +[titan] 2025-10-04 23:18:52,243 - root - INFO - lr: 4.9959e-05 gnorm: 1.49 [ 0:44:44<1 day, 0:57:49] +[titan] 2025-10-04 23:19:03,171 - root - INFO - step: 1165 loss: 3.5931 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.4398 global_avg_mtp_loss: 3.1533 +[titan] 2025-10-04 23:19:03,171 - root - INFO - lr: 4.9958e-05 gnorm: 1.54 [ 0:44:54<1 day, 0:57:16] +[titan] 2025-10-04 23:19:14,054 - root - INFO - step: 1170 loss: 3.6446 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4453 global_avg_mtp_loss: 3.1993 +[titan] 2025-10-04 23:19:14,054 - root - INFO - lr: 4.9958e-05 gnorm: 1.49 [ 0:45:05<1 day, 0:56:42] +[titan] 2025-10-04 23:19:24,934 - root - INFO - step: 1175 loss: 3.6211 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1757 +[titan] 2025-10-04 23:19:24,934 - root - INFO - lr: 4.9957e-05 gnorm: 1.48 [ 0:45:16<1 day, 0:56:07] +[titan] 2025-10-04 23:19:35,805 - root - INFO - step: 1180 loss: 3.6634 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4499 global_avg_mtp_loss: 3.2135 +[titan] 2025-10-04 23:19:35,805 - root - INFO - lr: 4.9957e-05 gnorm: 1.55 [ 0:45:27<1 day, 0:55:33] +[titan] 2025-10-04 23:19:46,722 - root - INFO - step: 1185 loss: 3.6182 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1740 +[titan] 2025-10-04 23:19:46,722 - root - INFO - lr: 4.9956e-05 gnorm: 1.56 [ 0:45:38<1 day, 0:55:01] +[titan] 2025-10-04 23:19:57,577 - root - INFO - step: 1190 loss: 3.6307 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.4437 global_avg_mtp_loss: 3.1870 +[titan] 2025-10-04 23:19:57,578 - root - INFO - lr: 4.9956e-05 gnorm: 1.44 [ 0:45:49<1 day, 0:54:26] +[titan] 2025-10-04 23:20:08,587 - root - INFO - step: 1195 loss: 3.6947 memory: 118.84GiB(85.28%) tps: 29,765 tflops: 412.95 mfu: 41.75% global_avg_ntp_loss: 0.4519 global_avg_mtp_loss: 3.2429 +[titan] 2025-10-04 23:20:08,587 - root - INFO - lr: 4.9955e-05 gnorm: 1.42 [ 0:46:00<1 day, 0:53:57] +[titan] 2025-10-04 23:20:17,298 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:20:19,487 - root - INFO - step: 1200 loss: 3.6239 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1796 +[titan] 2025-10-04 23:20:19,487 - root - INFO - lr: 4.9955e-05 gnorm: 1.44 [ 0:46:11<1 day, 0:53:25] +[titan] 2025-10-04 23:20:30,366 - root - INFO - step: 1205 loss: 3.6270 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4452 global_avg_mtp_loss: 3.1819 +[titan] 2025-10-04 23:20:30,366 - root - INFO - lr: 4.9954e-05 gnorm: 1.60 [ 0:46:22<1 day, 0:52:51] +[titan] 2025-10-04 23:20:41,259 - root - INFO - step: 1210 loss: 3.6144 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1709 +[titan] 2025-10-04 23:20:41,259 - root - INFO - lr: 4.9953e-05 gnorm: 1.66 [ 0:46:33<1 day, 0:52:19] +[titan] 2025-10-04 23:20:52,152 - root - INFO - step: 1215 loss: 3.6886 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4528 global_avg_mtp_loss: 3.2359 +[titan] 2025-10-04 23:20:52,152 - root - INFO - lr: 4.9953e-05 gnorm: 1.48 [ 0:46:43<1 day, 0:51:47] +[titan] 2025-10-04 23:21:03,098 - root - INFO - step: 1220 loss: 3.5263 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.4324 global_avg_mtp_loss: 3.0939 +[titan] 2025-10-04 23:21:03,098 - root - INFO - lr: 4.9952e-05 gnorm: 1.62 [ 0:46:54<1 day, 0:51:16] +[titan] 2025-10-04 23:21:14,014 - root - INFO - step: 1225 loss: 3.6228 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.4426 global_avg_mtp_loss: 3.1801 +[titan] 2025-10-04 23:21:14,014 - root - INFO - lr: 4.9952e-05 gnorm: 1.53 [ 0:47:05<1 day, 0:50:45] +[titan] 2025-10-04 23:21:24,903 - root - INFO - step: 1230 loss: 3.5398 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.4327 global_avg_mtp_loss: 3.1072 +[titan] 2025-10-04 23:21:24,904 - root - INFO - lr: 4.9951e-05 gnorm: 1.39 [ 0:47:16<1 day, 0:50:13] +[titan] 2025-10-04 23:21:35,790 - root - INFO - step: 1235 loss: 3.5790 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.4389 global_avg_mtp_loss: 3.1401 +[titan] 2025-10-04 23:21:35,790 - root - INFO - lr: 4.9951e-05 gnorm: 1.42 [ 0:47:27<1 day, 0:49:41] +[titan] 2025-10-04 23:21:46,666 - root - INFO - step: 1240 loss: 3.6434 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4448 global_avg_mtp_loss: 3.1987 +[titan] 2025-10-04 23:21:46,666 - root - INFO - lr: 4.9950e-05 gnorm: 1.43 [ 0:47:38<1 day, 0:49:10] +[titan] 2025-10-04 23:21:57,577 - root - INFO - step: 1245 loss: 3.5452 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4350 global_avg_mtp_loss: 3.1102 +[titan] 2025-10-04 23:21:57,577 - root - INFO - lr: 4.9949e-05 gnorm: 1.40 [ 0:47:49<1 day, 0:48:39] +[titan] 2025-10-04 23:22:06,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:22:08,563 - root - INFO - step: 1250 loss: 3.5844 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.4369 global_avg_mtp_loss: 3.1475 +[titan] 2025-10-04 23:22:08,564 - root - INFO - lr: 4.9949e-05 gnorm: 1.48 [ 0:48:00<1 day, 0:48:11] +[titan] 2025-10-04 23:22:19,438 - root - INFO - step: 1255 loss: 3.6078 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1643 +[titan] 2025-10-04 23:22:19,438 - root - INFO - lr: 4.9948e-05 gnorm: 1.59 [ 0:48:11<1 day, 0:47:39] +[titan] 2025-10-04 23:22:30,309 - root - INFO - step: 1260 loss: 3.5536 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4346 global_avg_mtp_loss: 3.1191 +[titan] 2025-10-04 23:22:30,309 - root - INFO - lr: 4.9948e-05 gnorm: 1.57 [ 0:48:22<1 day, 0:47:08] +[titan] 2025-10-04 23:22:41,203 - root - INFO - step: 1265 loss: 3.5861 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.4376 global_avg_mtp_loss: 3.1485 +[titan] 2025-10-04 23:22:41,203 - root - INFO - lr: 4.9947e-05 gnorm: 1.47 [ 0:48:32<1 day, 0:46:37] +[titan] 2025-10-04 23:22:52,080 - root - INFO - step: 1270 loss: 3.6181 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4419 global_avg_mtp_loss: 3.1762 +[titan] 2025-10-04 23:22:52,081 - root - INFO - lr: 4.9946e-05 gnorm: 1.38 [ 0:48:43<1 day, 0:46:06] +[titan] 2025-10-04 23:23:02,961 - root - INFO - step: 1275 loss: 3.5508 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1168 +[titan] 2025-10-04 23:23:02,961 - root - INFO - lr: 4.9946e-05 gnorm: 1.48 [ 0:48:54<1 day, 0:45:36] +[titan] 2025-10-04 23:23:13,913 - root - INFO - step: 1280 loss: 3.5362 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.4318 global_avg_mtp_loss: 3.1044 +[titan] 2025-10-04 23:23:13,913 - root - INFO - lr: 4.9945e-05 gnorm: 1.47 [ 0:49:05<1 day, 0:45:07] +[titan] 2025-10-04 23:23:24,835 - root - INFO - step: 1285 loss: 3.5593 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4354 global_avg_mtp_loss: 3.1239 +[titan] 2025-10-04 23:23:24,835 - root - INFO - lr: 4.9944e-05 gnorm: 1.48 [ 0:49:16<1 day, 0:44:38] +[titan] 2025-10-04 23:23:35,699 - root - INFO - step: 1290 loss: 3.5751 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4357 global_avg_mtp_loss: 3.1395 +[titan] 2025-10-04 23:23:35,700 - root - INFO - lr: 4.9944e-05 gnorm: 1.42 [ 0:49:27<1 day, 0:44:07] +[titan] 2025-10-04 23:23:46,610 - root - INFO - step: 1295 loss: 3.5938 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4377 global_avg_mtp_loss: 3.1562 +[titan] 2025-10-04 23:23:46,610 - root - INFO - lr: 4.9943e-05 gnorm: 1.35 [ 0:49:38<1 day, 0:43:38] +[titan] 2025-10-04 23:23:55,309 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:23:57,488 - root - INFO - step: 1300 loss: 3.5542 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4336 global_avg_mtp_loss: 3.1206 +[titan] 2025-10-04 23:23:57,489 - root - INFO - lr: 4.9943e-05 gnorm: 1.38 [ 0:49:49<1 day, 0:43:08] +[titan] 2025-10-04 23:24:08,378 - root - INFO - step: 1305 loss: 3.5644 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4344 global_avg_mtp_loss: 3.1301 +[titan] 2025-10-04 23:24:08,379 - root - INFO - lr: 4.9942e-05 gnorm: 1.38 [ 0:50:00<1 day, 0:42:39] +[titan] 2025-10-04 23:24:19,247 - root - INFO - step: 1310 loss: 3.5464 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1124 +[titan] 2025-10-04 23:24:19,247 - root - INFO - lr: 4.9941e-05 gnorm: 1.43 [ 0:50:11<1 day, 0:42:09] +[titan] 2025-10-04 23:24:30,161 - root - INFO - step: 1315 loss: 3.5898 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.4372 global_avg_mtp_loss: 3.1527 +[titan] 2025-10-04 23:24:30,161 - root - INFO - lr: 4.9941e-05 gnorm: 1.34 [ 0:50:21<1 day, 0:41:40] +[titan] 2025-10-04 23:24:41,039 - root - INFO - step: 1320 loss: 3.6159 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4427 global_avg_mtp_loss: 3.1731 +[titan] 2025-10-04 23:24:41,039 - root - INFO - lr: 4.9940e-05 gnorm: 1.34 [ 0:50:32<1 day, 0:41:11] +[titan] 2025-10-04 23:24:51,938 - root - INFO - step: 1325 loss: 3.4618 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4229 global_avg_mtp_loss: 3.0389 +[titan] 2025-10-04 23:24:51,938 - root - INFO - lr: 4.9939e-05 gnorm: 1.36 [ 0:50:43<1 day, 0:40:42] +[titan] 2025-10-04 23:25:02,828 - root - INFO - step: 1330 loss: 3.5160 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4280 global_avg_mtp_loss: 3.0880 +[titan] 2025-10-04 23:25:02,829 - root - INFO - lr: 4.9939e-05 gnorm: 1.38 [ 0:50:54<1 day, 0:40:13] +[titan] 2025-10-04 23:25:13,745 - root - INFO - step: 1335 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4282 global_avg_mtp_loss: 3.0763 +[titan] 2025-10-04 23:25:13,746 - root - INFO - lr: 4.9938e-05 gnorm: 1.46 [ 0:51:05<1 day, 0:39:45] +[titan] 2025-10-04 23:25:24,642 - root - INFO - step: 1340 loss: 3.5440 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4337 global_avg_mtp_loss: 3.1103 +[titan] 2025-10-04 23:25:24,642 - root - INFO - lr: 4.9937e-05 gnorm: 1.42 [ 0:51:16<1 day, 0:39:17] +[titan] 2025-10-04 23:25:35,576 - root - INFO - step: 1345 loss: 3.6036 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.4395 global_avg_mtp_loss: 3.1641 +[titan] 2025-10-04 23:25:35,576 - root - INFO - lr: 4.9937e-05 gnorm: 1.35 [ 0:51:27<1 day, 0:38:50] +[titan] 2025-10-04 23:25:44,278 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:25:46,465 - root - INFO - step: 1350 loss: 3.5202 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4296 global_avg_mtp_loss: 3.0905 +[titan] 2025-10-04 23:25:46,465 - root - INFO - lr: 4.9936e-05 gnorm: 1.31 [ 0:51:38<1 day, 0:38:21] +[titan] 2025-10-04 23:25:57,344 - root - INFO - step: 1355 loss: 3.5459 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.4309 global_avg_mtp_loss: 3.1149 +[titan] 2025-10-04 23:25:57,345 - root - INFO - lr: 4.9935e-05 gnorm: 1.30 [ 0:51:49<1 day, 0:37:53] +[titan] 2025-10-04 23:26:08,268 - root - INFO - step: 1360 loss: 3.5720 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.4351 global_avg_mtp_loss: 3.1369 +[titan] 2025-10-04 23:26:08,269 - root - INFO - lr: 4.9935e-05 gnorm: 1.39 [ 0:52:00<1 day, 0:37:26] +[titan] 2025-10-04 23:26:19,143 - root - INFO - step: 1365 loss: 3.4497 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0288 +[titan] 2025-10-04 23:26:19,144 - root - INFO - lr: 4.9934e-05 gnorm: 1.37 [ 0:52:10<1 day, 0:36:57] +[titan] 2025-10-04 23:26:30,030 - root - INFO - step: 1370 loss: 3.5847 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.4370 global_avg_mtp_loss: 3.1477 +[titan] 2025-10-04 23:26:30,030 - root - INFO - lr: 4.9933e-05 gnorm: 1.49 [ 0:52:21<1 day, 0:36:30] +[titan] 2025-10-04 23:26:40,913 - root - INFO - step: 1375 loss: 3.4970 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4269 global_avg_mtp_loss: 3.0701 +[titan] 2025-10-04 23:26:40,913 - root - INFO - lr: 4.9933e-05 gnorm: 1.38 [ 0:52:32<1 day, 0:36:02] +[titan] 2025-10-04 23:26:51,832 - root - INFO - step: 1380 loss: 3.4520 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 3.0312 +[titan] 2025-10-04 23:26:51,832 - root - INFO - lr: 4.9932e-05 gnorm: 1.36 [ 0:52:43<1 day, 0:35:35] +[titan] 2025-10-04 23:27:02,721 - root - INFO - step: 1385 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4169 global_avg_mtp_loss: 3.0149 +[titan] 2025-10-04 23:27:02,721 - root - INFO - lr: 4.9931e-05 gnorm: 1.42 [ 0:52:54<1 day, 0:35:07] +[titan] 2025-10-04 23:27:13,641 - root - INFO - step: 1390 loss: 3.4046 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.4139 global_avg_mtp_loss: 2.9907 +[titan] 2025-10-04 23:27:13,641 - root - INFO - lr: 4.9931e-05 gnorm: 1.37 [ 0:53:05<1 day, 0:34:41] +[titan] 2025-10-04 23:27:24,527 - root - INFO - step: 1395 loss: 3.4971 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.4253 global_avg_mtp_loss: 3.0717 +[titan] 2025-10-04 23:27:24,527 - root - INFO - lr: 4.9930e-05 gnorm: 1.41 [ 0:53:16<1 day, 0:34:14] +[titan] 2025-10-04 23:27:33,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:27:35,394 - root - INFO - step: 1400 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.4290 global_avg_mtp_loss: 3.0755 +[titan] 2025-10-04 23:27:35,394 - root - INFO - lr: 4.9929e-05 gnorm: 1.40 [ 0:53:27<1 day, 0:33:46] +[titan] 2025-10-04 23:27:46,287 - root - INFO - step: 1405 loss: 3.4686 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4233 global_avg_mtp_loss: 3.0453 +[titan] 2025-10-04 23:27:46,287 - root - INFO - lr: 4.9928e-05 gnorm: 1.49 [ 0:53:38<1 day, 0:33:19] +[titan] 2025-10-04 23:27:57,198 - root - INFO - step: 1410 loss: 3.5153 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4300 global_avg_mtp_loss: 3.0853 +[titan] 2025-10-04 23:27:57,198 - root - INFO - lr: 4.9928e-05 gnorm: 1.47 [ 0:53:48<1 day, 0:32:53] +[titan] 2025-10-04 23:28:08,061 - root - INFO - step: 1415 loss: 3.4739 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4242 global_avg_mtp_loss: 3.0497 +[titan] 2025-10-04 23:28:08,061 - root - INFO - lr: 4.9927e-05 gnorm: 1.34 [ 0:53:59<1 day, 0:32:25] +[titan] 2025-10-04 23:28:18,978 - root - INFO - step: 1420 loss: 3.5053 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.4276 global_avg_mtp_loss: 3.0778 +[titan] 2025-10-04 23:28:18,978 - root - INFO - lr: 4.9926e-05 gnorm: 1.41 [ 0:54:10<1 day, 0:31:59] +[titan] 2025-10-04 23:28:29,841 - root - INFO - step: 1425 loss: 3.4083 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4170 global_avg_mtp_loss: 2.9913 +[titan] 2025-10-04 23:28:29,841 - root - INFO - lr: 4.9926e-05 gnorm: 1.42 [ 0:54:21<1 day, 0:31:32] +[titan] 2025-10-04 23:28:40,714 - root - INFO - step: 1430 loss: 3.4627 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0411 +[titan] 2025-10-04 23:28:40,714 - root - INFO - lr: 4.9925e-05 gnorm: 1.43 [ 0:54:32<1 day, 0:31:05] +[titan] 2025-10-04 23:28:51,581 - root - INFO - step: 1435 loss: 3.4919 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0684 +[titan] 2025-10-04 23:28:51,582 - root - INFO - lr: 4.9924e-05 gnorm: 1.37 [ 0:54:43<1 day, 0:30:38] +[titan] 2025-10-04 23:29:02,457 - root - INFO - step: 1440 loss: 3.4907 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.4267 global_avg_mtp_loss: 3.0640 +[titan] 2025-10-04 23:29:02,457 - root - INFO - lr: 4.9923e-05 gnorm: 1.37 [ 0:54:54<1 day, 0:30:12] +[titan] 2025-10-04 23:29:13,408 - root - INFO - step: 1445 loss: 3.4656 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0447 +[titan] 2025-10-04 23:29:13,408 - root - INFO - lr: 4.9923e-05 gnorm: 1.40 [ 0:55:05<1 day, 0:29:47] +[titan] 2025-10-04 23:29:22,093 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:29:24,281 - root - INFO - step: 1450 loss: 3.4814 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4230 global_avg_mtp_loss: 3.0585 +[titan] 2025-10-04 23:29:24,281 - root - INFO - lr: 4.9922e-05 gnorm: 1.47 [ 0:55:16<1 day, 0:29:21] +[titan] 2025-10-04 23:29:35,145 - root - INFO - step: 1455 loss: 3.4419 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.4184 global_avg_mtp_loss: 3.0235 +[titan] 2025-10-04 23:29:35,145 - root - INFO - lr: 4.9921e-05 gnorm: 1.37 [ 0:55:26<1 day, 0:28:54] +[titan] 2025-10-04 23:29:46,030 - root - INFO - step: 1460 loss: 3.5546 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.4320 global_avg_mtp_loss: 3.1226 +[titan] 2025-10-04 23:29:46,030 - root - INFO - lr: 4.9920e-05 gnorm: 1.41 [ 0:55:37<1 day, 0:28:28] +[titan] 2025-10-04 23:29:56,926 - root - INFO - step: 1465 loss: 3.5290 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4295 global_avg_mtp_loss: 3.0995 +[titan] 2025-10-04 23:29:56,927 - root - INFO - lr: 4.9920e-05 gnorm: 1.36 [ 0:55:48<1 day, 0:28:03] +[titan] 2025-10-04 23:30:07,807 - root - INFO - step: 1470 loss: 3.4674 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0442 +[titan] 2025-10-04 23:30:07,808 - root - INFO - lr: 4.9919e-05 gnorm: 1.41 [ 0:55:59<1 day, 0:27:37] +[titan] 2025-10-04 23:30:18,704 - root - INFO - step: 1475 loss: 3.4400 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0220 +[titan] 2025-10-04 23:30:18,705 - root - INFO - lr: 4.9918e-05 gnorm: 1.36 [ 0:56:10<1 day, 0:27:12] +[titan] 2025-10-04 23:30:29,561 - root - INFO - step: 1480 loss: 3.4692 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4228 global_avg_mtp_loss: 3.0463 +[titan] 2025-10-04 23:30:29,562 - root - INFO - lr: 4.9917e-05 gnorm: 1.30 [ 0:56:21<1 day, 0:26:46] +[titan] 2025-10-04 23:30:40,438 - root - INFO - step: 1485 loss: 3.4861 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4263 global_avg_mtp_loss: 3.0598 +[titan] 2025-10-04 23:30:40,438 - root - INFO - lr: 4.9917e-05 gnorm: 1.35 [ 0:56:32<1 day, 0:26:20] +[titan] 2025-10-04 23:30:51,302 - root - INFO - step: 1490 loss: 3.4181 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4151 global_avg_mtp_loss: 3.0030 +[titan] 2025-10-04 23:30:51,303 - root - INFO - lr: 4.9916e-05 gnorm: 1.47 [ 0:56:43<1 day, 0:25:54] +[titan] 2025-10-04 23:31:02,175 - root - INFO - step: 1495 loss: 3.4587 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4192 global_avg_mtp_loss: 3.0394 +[titan] 2025-10-04 23:31:02,176 - root - INFO - lr: 4.9915e-05 gnorm: 1.30 [ 0:56:53<1 day, 0:25:29] +[titan] 2025-10-04 23:31:10,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:31:13,053 - root - INFO - step: 1500 loss: 3.4454 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4203 global_avg_mtp_loss: 3.0251 +[titan] 2025-10-04 23:31:13,053 - root - INFO - lr: 4.9914e-05 gnorm: 1.32 [ 0:57:04<1 day, 0:25:03] +[titan] 2025-10-04 23:31:23,959 - root - INFO - step: 1505 loss: 3.5094 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.4278 global_avg_mtp_loss: 3.0816 +[titan] 2025-10-04 23:31:23,959 - root - INFO - lr: 4.9913e-05 gnorm: 1.39 [ 0:57:15<1 day, 0:24:39] +[titan] 2025-10-04 23:31:34,816 - root - INFO - step: 1510 loss: 3.4203 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 2.9996 +[titan] 2025-10-04 23:31:34,816 - root - INFO - lr: 4.9913e-05 gnorm: 1.40 [ 0:57:26<1 day, 0:24:13] +[titan] 2025-10-04 23:31:45,697 - root - INFO - step: 1515 loss: 3.4819 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4246 global_avg_mtp_loss: 3.0574 +[titan] 2025-10-04 23:31:45,697 - root - INFO - lr: 4.9912e-05 gnorm: 1.42 [ 0:57:37<1 day, 0:23:48] +[titan] 2025-10-04 23:31:56,581 - root - INFO - step: 1520 loss: 3.4715 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0513 +[titan] 2025-10-04 23:31:56,581 - root - INFO - lr: 4.9911e-05 gnorm: 1.54 [ 0:57:48<1 day, 0:23:24] +[titan] 2025-10-04 23:32:07,443 - root - INFO - step: 1525 loss: 3.3887 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4123 global_avg_mtp_loss: 2.9763 +[titan] 2025-10-04 23:32:07,443 - root - INFO - lr: 4.9910e-05 gnorm: 1.50 [ 0:57:59<1 day, 0:22:58] +[titan] 2025-10-04 23:32:18,324 - root - INFO - step: 1530 loss: 3.4137 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4161 global_avg_mtp_loss: 2.9977 +[titan] 2025-10-04 23:32:18,324 - root - INFO - lr: 4.9909e-05 gnorm: 1.39 [ 0:58:10<1 day, 0:22:34] +[titan] 2025-10-04 23:32:29,266 - root - INFO - step: 1535 loss: 3.4241 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.4172 global_avg_mtp_loss: 3.0069 +[titan] 2025-10-04 23:32:29,266 - root - INFO - lr: 4.9909e-05 gnorm: 1.38 [ 0:58:21<1 day, 0:22:11] +[titan] 2025-10-04 23:32:31,606 - root - INFO - Dumping profiler traces at step 1536 +[titan] 2025-10-04 23:32:31,643 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:32:40,423 - root - INFO - step: 1540 loss: 3.4722 memory: 118.84GiB(85.28%) tps: 29,370 tflops: 407.47 mfu: 41.20% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0490 +[titan] 2025-10-04 23:32:40,423 - root - INFO - lr: 4.9908e-05 gnorm: 1.48 [ 0:58:32<1 day, 0:21:53] +[titan] 2025-10-04 23:32:51,288 - root - INFO - step: 1545 loss: 3.4793 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4234 global_avg_mtp_loss: 3.0559 +[titan] 2025-10-04 23:32:51,288 - root - INFO - lr: 4.9907e-05 gnorm: 1.37 [ 0:58:43<1 day, 0:21:28] +[titan] 2025-10-04 23:32:59,971 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:33:02,152 - root - INFO - step: 1550 loss: 3.4035 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4133 global_avg_mtp_loss: 2.9902 +[titan] 2025-10-04 23:33:02,152 - root - INFO - lr: 4.9906e-05 gnorm: 1.32 [ 0:58:53<1 day, 0:21:04] +[titan] 2025-10-04 23:33:13,032 - root - INFO - step: 1555 loss: 3.4850 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.4225 global_avg_mtp_loss: 3.0625 +[titan] 2025-10-04 23:33:13,032 - root - INFO - lr: 4.9905e-05 gnorm: 1.34 [ 0:59:04<1 day, 0:20:39] +[titan] 2025-10-04 23:33:23,946 - root - INFO - step: 1560 loss: 3.5272 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4289 global_avg_mtp_loss: 3.0982 +[titan] 2025-10-04 23:33:23,946 - root - INFO - lr: 4.9905e-05 gnorm: 1.37 [ 0:59:15<1 day, 0:20:16] +[titan] 2025-10-04 23:33:34,861 - root - INFO - step: 1565 loss: 3.5253 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.4294 global_avg_mtp_loss: 3.0959 +[titan] 2025-10-04 23:33:34,861 - root - INFO - lr: 4.9904e-05 gnorm: 1.37 [ 0:59:26<1 day, 0:19:53] +[titan] 2025-10-04 23:33:45,801 - root - INFO - step: 1570 loss: 3.4320 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.4173 global_avg_mtp_loss: 3.0147 +[titan] 2025-10-04 23:33:45,801 - root - INFO - lr: 4.9903e-05 gnorm: 1.35 [ 0:59:37<1 day, 0:19:30] +[titan] 2025-10-04 23:33:56,697 - root - INFO - step: 1575 loss: 3.4044 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.4154 global_avg_mtp_loss: 2.9890 +[titan] 2025-10-04 23:33:56,697 - root - INFO - lr: 4.9902e-05 gnorm: 1.32 [ 0:59:48<1 day, 0:19:07] +[titan] 2025-10-04 23:34:07,560 - root - INFO - step: 1580 loss: 3.5820 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4484 global_avg_mtp_loss: 3.1336 +[titan] 2025-10-04 23:34:07,560 - root - INFO - lr: 4.9901e-05 gnorm: 1.32 [ 0:59:59<1 day, 0:18:42] +[titan] 2025-10-04 23:34:18,478 - root - INFO - step: 1585 loss: 3.3932 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.4134 global_avg_mtp_loss: 2.9798 +[titan] 2025-10-04 23:34:18,479 - root - INFO - lr: 4.9900e-05 gnorm: 1.40 [ 1:00:10<1 day, 0:18:19] +[titan] 2025-10-04 23:34:29,342 - root - INFO - step: 1590 loss: 3.4358 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.4195 global_avg_mtp_loss: 3.0163 +[titan] 2025-10-04 23:34:29,342 - root - INFO - lr: 4.9900e-05 gnorm: 1.38 [ 1:00:21<1 day, 0:17:55] +[titan] 2025-10-04 23:34:40,218 - root - INFO - step: 1595 loss: 3.3310 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4056 global_avg_mtp_loss: 2.9254 +[titan] 2025-10-04 23:34:40,218 - root - INFO - lr: 4.9899e-05 gnorm: 1.38 [ 1:00:31<1 day, 0:17:32] +[titan] 2025-10-04 23:34:48,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:34:51,087 - root - INFO - step: 1600 loss: 3.4555 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4196 global_avg_mtp_loss: 3.0358 +[titan] 2025-10-04 23:34:51,088 - root - INFO - lr: 4.9898e-05 gnorm: 1.39 [ 1:00:42<1 day, 0:17:08] +[titan] 2025-10-04 23:35:01,992 - root - INFO - step: 1605 loss: 3.4766 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0531 +[titan] 2025-10-04 23:35:01,992 - root - INFO - lr: 4.9897e-05 gnorm: 1.33 [ 1:00:53<1 day, 0:16:45] +[titan] 2025-10-04 23:35:12,867 - root - INFO - step: 1610 loss: 3.3824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4111 global_avg_mtp_loss: 2.9713 +[titan] 2025-10-04 23:35:12,867 - root - INFO - lr: 4.9896e-05 gnorm: 1.41 [ 1:01:04<1 day, 0:16:22] +[titan] 2025-10-04 23:35:23,778 - root - INFO - step: 1615 loss: 3.4363 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.4168 global_avg_mtp_loss: 3.0195 +[titan] 2025-10-04 23:35:23,779 - root - INFO - lr: 4.9895e-05 gnorm: 1.27 [ 1:01:15<1 day, 0:15:59] +[titan] 2025-10-04 23:35:34,649 - root - INFO - step: 1620 loss: 3.3175 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.4028 global_avg_mtp_loss: 2.9147 +[titan] 2025-10-04 23:35:34,649 - root - INFO - lr: 4.9895e-05 gnorm: 1.32 [ 1:01:26<1 day, 0:15:36] +[titan] 2025-10-04 23:35:45,526 - root - INFO - step: 1625 loss: 3.3715 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4086 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:35:45,527 - root - INFO - lr: 4.9894e-05 gnorm: 1.41 [ 1:01:37<1 day, 0:15:12] +[titan] 2025-10-04 23:35:56,405 - root - INFO - step: 1630 loss: 3.3383 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4038 global_avg_mtp_loss: 2.9345 +[titan] 2025-10-04 23:35:56,405 - root - INFO - lr: 4.9893e-05 gnorm: 1.32 [ 1:01:48<1 day, 0:14:49] +[titan] 2025-10-04 23:36:07,309 - root - INFO - step: 1635 loss: 3.4176 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.16% global_avg_ntp_loss: 0.4148 global_avg_mtp_loss: 3.0028 +[titan] 2025-10-04 23:36:07,309 - root - INFO - lr: 4.9892e-05 gnorm: 1.40 [ 1:01:59<1 day, 0:14:27] +[titan] 2025-10-04 23:36:18,303 - root - INFO - step: 1640 loss: 3.3374 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.4052 global_avg_mtp_loss: 2.9322 +[titan] 2025-10-04 23:36:18,304 - root - INFO - lr: 4.9891e-05 gnorm: 1.45 [ 1:02:10<1 day, 0:14:07] +[titan] 2025-10-04 23:36:29,175 - root - INFO - step: 1645 loss: 3.4862 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.4238 global_avg_mtp_loss: 3.0624 +[titan] 2025-10-04 23:36:29,175 - root - INFO - lr: 4.9890e-05 gnorm: 1.49 [ 1:02:20<1 day, 0:13:44] +[titan] 2025-10-04 23:36:37,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:36:40,054 - root - INFO - step: 1650 loss: 3.2615 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8657 +[titan] 2025-10-04 23:36:40,054 - root - INFO - lr: 4.9889e-05 gnorm: 1.45 [ 1:02:31<1 day, 0:13:21] +[titan] 2025-10-04 23:36:50,937 - root - INFO - step: 1655 loss: 3.4016 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4127 global_avg_mtp_loss: 2.9889 +[titan] 2025-10-04 23:36:50,937 - root - INFO - lr: 4.9888e-05 gnorm: 1.34 [ 1:02:42<1 day, 0:12:58] +[titan] 2025-10-04 23:37:01,815 - root - INFO - step: 1660 loss: 3.3760 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4106 global_avg_mtp_loss: 2.9654 +[titan] 2025-10-04 23:37:01,815 - root - INFO - lr: 4.9888e-05 gnorm: 1.33 [ 1:02:53<1 day, 0:12:35] +[titan] 2025-10-04 23:37:12,722 - root - INFO - step: 1665 loss: 3.3861 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.4119 global_avg_mtp_loss: 2.9742 +[titan] 2025-10-04 23:37:12,722 - root - INFO - lr: 4.9887e-05 gnorm: 1.28 [ 1:03:04<1 day, 0:12:13] +[titan] 2025-10-04 23:37:23,672 - root - INFO - step: 1670 loss: 3.3993 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.20 mfu: 41.98% global_avg_ntp_loss: 0.4125 global_avg_mtp_loss: 2.9867 +[titan] 2025-10-04 23:37:23,672 - root - INFO - lr: 4.9886e-05 gnorm: 1.29 [ 1:03:15<1 day, 0:11:53] +[titan] 2025-10-04 23:37:34,543 - root - INFO - step: 1675 loss: 3.3445 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9396 +[titan] 2025-10-04 23:37:34,543 - root - INFO - lr: 4.9885e-05 gnorm: 1.45 [ 1:03:26<1 day, 0:11:30] +[titan] 2025-10-04 23:37:45,421 - root - INFO - step: 1680 loss: 3.4052 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4135 global_avg_mtp_loss: 2.9917 +[titan] 2025-10-04 23:37:45,421 - root - INFO - lr: 4.9884e-05 gnorm: 1.41 [ 1:03:37<1 day, 0:11:07] +[titan] 2025-10-04 23:37:56,304 - root - INFO - step: 1685 loss: 3.3465 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4066 global_avg_mtp_loss: 2.9399 +[titan] 2025-10-04 23:37:56,305 - root - INFO - lr: 4.9883e-05 gnorm: 1.35 [ 1:03:48<1 day, 0:10:45] +[titan] 2025-10-04 23:38:07,165 - root - INFO - step: 1690 loss: 3.4157 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.4162 global_avg_mtp_loss: 2.9995 +[titan] 2025-10-04 23:38:07,165 - root - INFO - lr: 4.9882e-05 gnorm: 1.31 [ 1:03:58<1 day, 0:10:23] +[titan] 2025-10-04 23:38:18,032 - root - INFO - step: 1695 loss: 3.3211 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.4037 global_avg_mtp_loss: 2.9174 +[titan] 2025-10-04 23:38:18,032 - root - INFO - lr: 4.9881e-05 gnorm: 1.27 [ 1:04:09<1 day, 0:10:00] +[titan] 2025-10-04 23:38:26,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:38:28,977 - root - INFO - step: 1700 loss: 3.4333 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0154 +[titan] 2025-10-04 23:38:28,977 - root - INFO - lr: 4.9880e-05 gnorm: 1.47 [ 1:04:20<1 day, 0:09:39] +[titan] 2025-10-04 23:38:39,826 - root - INFO - step: 1705 loss: 3.3912 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.4113 global_avg_mtp_loss: 2.9799 +[titan] 2025-10-04 23:38:39,826 - root - INFO - lr: 4.9879e-05 gnorm: 1.35 [ 1:04:31<1 day, 0:09:17] +[titan] 2025-10-04 23:38:50,670 - root - INFO - step: 1710 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4158 global_avg_mtp_loss: 3.0159 +[titan] 2025-10-04 23:38:50,670 - root - INFO - lr: 4.9878e-05 gnorm: 1.41 [ 1:04:42<1 day, 0:08:54] +[titan] 2025-10-04 23:39:01,517 - root - INFO - step: 1715 loss: 3.4588 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0386 +[titan] 2025-10-04 23:39:01,517 - root - INFO - lr: 4.9877e-05 gnorm: 1.41 [ 1:04:53<1 day, 0:08:31] +[titan] 2025-10-04 23:39:12,377 - root - INFO - step: 1720 loss: 3.3718 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.4092 global_avg_mtp_loss: 2.9625 +[titan] 2025-10-04 23:39:12,377 - root - INFO - lr: 4.9877e-05 gnorm: 1.24 [ 1:05:04<1 day, 0:08:09] +[titan] 2025-10-04 23:39:23,301 - root - INFO - step: 1725 loss: 3.3446 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.4100 global_avg_mtp_loss: 2.9346 +[titan] 2025-10-04 23:39:23,301 - root - INFO - lr: 4.9876e-05 gnorm: 1.27 [ 1:05:15<1 day, 0:07:48] +[titan] 2025-10-04 23:39:34,194 - root - INFO - step: 1730 loss: 3.4582 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0367 +[titan] 2025-10-04 23:39:34,195 - root - INFO - lr: 4.9875e-05 gnorm: 1.32 [ 1:05:25<1 day, 0:07:27] +[titan] 2025-10-04 23:39:45,081 - root - INFO - step: 1735 loss: 3.4372 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4157 global_avg_mtp_loss: 3.0215 +[titan] 2025-10-04 23:39:45,081 - root - INFO - lr: 4.9874e-05 gnorm: 1.37 [ 1:05:36<1 day, 0:07:05] +[titan] 2025-10-04 23:39:55,972 - root - INFO - step: 1740 loss: 3.3532 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9463 +[titan] 2025-10-04 23:39:55,972 - root - INFO - lr: 4.9873e-05 gnorm: 1.36 [ 1:05:47<1 day, 0:06:44] +[titan] 2025-10-04 23:40:06,852 - root - INFO - step: 1745 loss: 3.3083 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4022 global_avg_mtp_loss: 2.9061 +[titan] 2025-10-04 23:40:06,853 - root - INFO - lr: 4.9872e-05 gnorm: 1.33 [ 1:05:58<1 day, 0:06:22] +[titan] 2025-10-04 23:40:15,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:40:17,731 - root - INFO - step: 1750 loss: 3.4480 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4191 global_avg_mtp_loss: 3.0289 +[titan] 2025-10-04 23:40:17,731 - root - INFO - lr: 4.9871e-05 gnorm: 1.35 [ 1:06:09<1 day, 0:06:01] +[titan] 2025-10-04 23:40:28,641 - root - INFO - step: 1755 loss: 3.3860 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4107 global_avg_mtp_loss: 2.9753 +[titan] 2025-10-04 23:40:28,641 - root - INFO - lr: 4.9870e-05 gnorm: 1.31 [ 1:06:20<1 day, 0:05:40] +[titan] 2025-10-04 23:40:39,515 - root - INFO - step: 1760 loss: 3.3596 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9525 +[titan] 2025-10-04 23:40:39,516 - root - INFO - lr: 4.9869e-05 gnorm: 1.44 [ 1:06:31<1 day, 0:05:19] +[titan] 2025-10-04 23:40:50,423 - root - INFO - step: 1765 loss: 3.2984 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3986 global_avg_mtp_loss: 2.8998 +[titan] 2025-10-04 23:40:50,423 - root - INFO - lr: 4.9868e-05 gnorm: 1.40 [ 1:06:42<1 day, 0:04:58] +[titan] 2025-10-04 23:41:01,295 - root - INFO - step: 1770 loss: 3.3670 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.4093 global_avg_mtp_loss: 2.9577 +[titan] 2025-10-04 23:41:01,295 - root - INFO - lr: 4.9867e-05 gnorm: 1.37 [ 1:06:53<1 day, 0:04:37] +[titan] 2025-10-04 23:41:12,156 - root - INFO - step: 1775 loss: 3.3745 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.4116 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:41:12,156 - root - INFO - lr: 4.9866e-05 gnorm: 1.36 [ 1:07:03<1 day, 0:04:15] +[titan] 2025-10-04 23:41:23,073 - root - INFO - step: 1780 loss: 3.2774 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:41:23,074 - root - INFO - lr: 4.9865e-05 gnorm: 1.44 [ 1:07:14<1 day, 0:03:55] +[titan] 2025-10-04 23:41:33,936 - root - INFO - step: 1785 loss: 3.3608 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9538 +[titan] 2025-10-04 23:41:33,937 - root - INFO - lr: 4.9864e-05 gnorm: 1.39 [ 1:07:25<1 day, 0:03:33] +[titan] 2025-10-04 23:41:44,811 - root - INFO - step: 1790 loss: 3.3548 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9479 +[titan] 2025-10-04 23:41:44,812 - root - INFO - lr: 4.9863e-05 gnorm: 1.46 [ 1:07:36<1 day, 0:03:12] +[titan] 2025-10-04 23:41:55,714 - root - INFO - step: 1795 loss: 3.4000 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.4121 global_avg_mtp_loss: 2.9879 +[titan] 2025-10-04 23:41:55,715 - root - INFO - lr: 4.9862e-05 gnorm: 1.53 [ 1:07:47<1 day, 0:02:52] +[titan] 2025-10-04 23:42:04,388 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:42:06,574 - root - INFO - step: 1800 loss: 3.3948 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.4124 global_avg_mtp_loss: 2.9824 +[titan] 2025-10-04 23:42:06,574 - root - INFO - lr: 4.9861e-05 gnorm: 1.37 [ 1:07:58<1 day, 0:02:30] +[titan] 2025-10-04 23:42:17,436 - root - INFO - step: 1805 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.4017 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:42:17,437 - root - INFO - lr: 4.9860e-05 gnorm: 1.29 [ 1:08:09<1 day, 0:02:09] +[titan] 2025-10-04 23:42:28,375 - root - INFO - step: 1810 loss: 3.3561 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:42:28,375 - root - INFO - lr: 4.9859e-05 gnorm: 1.39 [ 1:08:20<1 day, 0:01:50] +[titan] 2025-10-04 23:42:39,217 - root - INFO - step: 1815 loss: 3.3053 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.3995 global_avg_mtp_loss: 2.9058 +[titan] 2025-10-04 23:42:39,217 - root - INFO - lr: 4.9858e-05 gnorm: 1.34 [ 1:08:30<1 day, 0:01:28] +[titan] 2025-10-04 23:42:50,059 - root - INFO - step: 1820 loss: 3.2854 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8887 +[titan] 2025-10-04 23:42:50,059 - root - INFO - lr: 4.9857e-05 gnorm: 1.37 [ 1:08:41<1 day, 0:01:07] +[titan] 2025-10-04 23:43:00,958 - root - INFO - step: 1825 loss: 3.3393 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4035 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:00,958 - root - INFO - lr: 4.9856e-05 gnorm: 1.37 [ 1:08:52<1 day, 0:00:47] +[titan] 2025-10-04 23:43:11,802 - root - INFO - step: 1830 loss: 3.3421 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4062 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:11,802 - root - INFO - lr: 4.9855e-05 gnorm: 1.36 [ 1:09:03<1 day, 0:00:25] +[titan] 2025-10-04 23:43:22,644 - root - INFO - step: 1835 loss: 3.3492 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.4055 global_avg_mtp_loss: 2.9437 +[titan] 2025-10-04 23:43:22,645 - root - INFO - lr: 4.9854e-05 gnorm: 1.32 [ 1:09:14<1 day, 0:00:04] +[titan] 2025-10-04 23:43:33,561 - root - INFO - step: 1840 loss: 3.2612 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3942 global_avg_mtp_loss: 2.8670 +[titan] 2025-10-04 23:43:33,562 - root - INFO - lr: 4.9853e-05 gnorm: 1.27 [ 1:09:25<23:59:44] +[titan] 2025-10-04 23:43:44,438 - root - INFO - step: 1845 loss: 3.3605 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9537 +[titan] 2025-10-04 23:43:44,438 - root - INFO - lr: 4.9852e-05 gnorm: 1.27 [ 1:09:36<23:59:24] +[titan] 2025-10-04 23:43:53,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:43:55,315 - root - INFO - step: 1850 loss: 3.3556 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4063 global_avg_mtp_loss: 2.9493 +[titan] 2025-10-04 23:43:55,315 - root - INFO - lr: 4.9851e-05 gnorm: 1.32 [ 1:09:47<23:59:03] +[titan] 2025-10-04 23:44:06,182 - root - INFO - step: 1855 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4016 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:44:06,182 - root - INFO - lr: 4.9850e-05 gnorm: 1.40 [ 1:09:57<23:58:43] +[titan] 2025-10-04 23:44:17,099 - root - INFO - step: 1860 loss: 3.3782 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4085 global_avg_mtp_loss: 2.9697 +[titan] 2025-10-04 23:44:17,099 - root - INFO - lr: 4.9849e-05 gnorm: 1.35 [ 1:10:08<23:58:23] +[titan] 2025-10-04 23:44:28,008 - root - INFO - step: 1865 loss: 3.2855 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3997 global_avg_mtp_loss: 2.8857 +[titan] 2025-10-04 23:44:28,009 - root - INFO - lr: 4.9848e-05 gnorm: 1.35 [ 1:10:19<23:58:04] +[titan] 2025-10-04 23:44:38,889 - root - INFO - step: 1870 loss: 3.3023 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.9043 +[titan] 2025-10-04 23:44:38,889 - root - INFO - lr: 4.9847e-05 gnorm: 1.24 [ 1:10:30<23:57:44] +[titan] 2025-10-04 23:44:49,776 - root - INFO - step: 1875 loss: 3.3134 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.4008 global_avg_mtp_loss: 2.9126 +[titan] 2025-10-04 23:44:49,776 - root - INFO - lr: 4.9846e-05 gnorm: 1.32 [ 1:10:41<23:57:24] +[titan] 2025-10-04 23:45:00,642 - root - INFO - step: 1880 loss: 3.2097 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8207 +[titan] 2025-10-04 23:45:00,642 - root - INFO - lr: 4.9845e-05 gnorm: 1.33 [ 1:10:52<23:57:03] +[titan] 2025-10-04 23:45:11,496 - root - INFO - step: 1885 loss: 3.2568 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8621 +[titan] 2025-10-04 23:45:11,497 - root - INFO - lr: 4.9844e-05 gnorm: 1.34 [ 1:11:03<23:56:43] +[titan] 2025-10-04 23:45:22,417 - root - INFO - step: 1890 loss: 3.3180 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.4019 global_avg_mtp_loss: 2.9160 +[titan] 2025-10-04 23:45:22,417 - root - INFO - lr: 4.9843e-05 gnorm: 1.39 [ 1:11:14<23:56:24] +[titan] 2025-10-04 23:45:33,318 - root - INFO - step: 1895 loss: 3.2706 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3955 global_avg_mtp_loss: 2.8752 +[titan] 2025-10-04 23:45:33,318 - root - INFO - lr: 4.9842e-05 gnorm: 1.50 [ 1:11:25<23:56:04] +[titan] 2025-10-04 23:45:41,992 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:45:44,171 - root - INFO - step: 1900 loss: 3.2793 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8827 +[titan] 2025-10-04 23:45:44,171 - root - INFO - lr: 4.9841e-05 gnorm: 1.29 [ 1:11:35<23:55:44] +[titan] 2025-10-04 23:45:55,048 - root - INFO - step: 1905 loss: 3.3144 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4029 global_avg_mtp_loss: 2.9115 +[titan] 2025-10-04 23:45:55,048 - root - INFO - lr: 4.9840e-05 gnorm: 1.32 [ 1:11:46<23:55:24] +[titan] 2025-10-04 23:46:05,920 - root - INFO - step: 1910 loss: 3.2864 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3983 global_avg_mtp_loss: 2.8881 +[titan] 2025-10-04 23:46:05,920 - root - INFO - lr: 4.9839e-05 gnorm: 1.32 [ 1:11:57<23:55:04] +[titan] 2025-10-04 23:46:16,784 - root - INFO - step: 1915 loss: 3.2475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8558 +[titan] 2025-10-04 23:46:16,785 - root - INFO - lr: 4.9837e-05 gnorm: 1.28 [ 1:12:08<23:54:44] +[titan] 2025-10-04 23:46:27,699 - root - INFO - step: 1920 loss: 3.3007 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.3987 global_avg_mtp_loss: 2.9020 +[titan] 2025-10-04 23:46:27,700 - root - INFO - lr: 4.9836e-05 gnorm: 1.39 [ 1:12:19<23:54:25] +[titan] 2025-10-04 23:46:38,626 - root - INFO - step: 1925 loss: 3.2659 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3951 global_avg_mtp_loss: 2.8709 +[titan] 2025-10-04 23:46:38,626 - root - INFO - lr: 4.9835e-05 gnorm: 1.32 [ 1:12:30<23:54:06] +[titan] 2025-10-04 23:46:49,497 - root - INFO - step: 1930 loss: 3.2880 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8914 +[titan] 2025-10-04 23:46:49,497 - root - INFO - lr: 4.9834e-05 gnorm: 1.31 [ 1:12:41<23:53:46] +[titan] 2025-10-04 23:47:00,373 - root - INFO - step: 1935 loss: 3.2719 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8754 +[titan] 2025-10-04 23:47:00,374 - root - INFO - lr: 4.9833e-05 gnorm: 1.33 [ 1:12:52<23:53:27] +[titan] 2025-10-04 23:47:11,263 - root - INFO - step: 1940 loss: 3.3395 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4048 global_avg_mtp_loss: 2.9347 +[titan] 2025-10-04 23:47:11,263 - root - INFO - lr: 4.9832e-05 gnorm: 1.41 [ 1:13:02<23:53:07] +[titan] 2025-10-04 23:47:22,130 - root - INFO - step: 1945 loss: 3.2947 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8976 +[titan] 2025-10-04 23:47:22,130 - root - INFO - lr: 4.9831e-05 gnorm: 1.48 [ 1:13:13<23:52:48] +[titan] 2025-10-04 23:47:30,853 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:47:33,036 - root - INFO - step: 1950 loss: 3.3613 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.4054 global_avg_mtp_loss: 2.9558 +[titan] 2025-10-04 23:47:33,037 - root - INFO - lr: 4.9830e-05 gnorm: 1.34 [ 1:13:24<23:52:29] +[titan] 2025-10-04 23:47:43,944 - root - INFO - step: 1955 loss: 3.2920 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3990 global_avg_mtp_loss: 2.8929 +[titan] 2025-10-04 23:47:43,944 - root - INFO - lr: 4.9829e-05 gnorm: 1.29 [ 1:13:35<23:52:10] +[titan] 2025-10-04 23:47:54,843 - root - INFO - step: 1960 loss: 3.2473 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3941 global_avg_mtp_loss: 2.8533 +[titan] 2025-10-04 23:47:54,843 - root - INFO - lr: 4.9828e-05 gnorm: 1.30 [ 1:13:46<23:51:51] +[titan] 2025-10-04 23:48:05,717 - root - INFO - step: 1965 loss: 3.2766 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3954 global_avg_mtp_loss: 2.8812 +[titan] 2025-10-04 23:48:05,717 - root - INFO - lr: 4.9827e-05 gnorm: 1.23 [ 1:13:57<23:51:32] +[titan] 2025-10-04 23:48:16,623 - root - INFO - step: 1970 loss: 3.2148 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3891 global_avg_mtp_loss: 2.8257 +[titan] 2025-10-04 23:48:16,623 - root - INFO - lr: 4.9825e-05 gnorm: 1.38 [ 1:14:08<23:51:13] +[titan] 2025-10-04 23:48:27,497 - root - INFO - step: 1975 loss: 3.2117 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3887 global_avg_mtp_loss: 2.8230 +[titan] 2025-10-04 23:48:27,497 - root - INFO - lr: 4.9824e-05 gnorm: 1.35 [ 1:14:19<23:50:54] +[titan] 2025-10-04 23:48:38,417 - root - INFO - step: 1980 loss: 3.3095 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.4021 global_avg_mtp_loss: 2.9075 +[titan] 2025-10-04 23:48:38,417 - root - INFO - lr: 4.9823e-05 gnorm: 1.35 [ 1:14:30<23:50:35] +[titan] 2025-10-04 23:48:49,319 - root - INFO - step: 1985 loss: 3.2797 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:48:49,320 - root - INFO - lr: 4.9822e-05 gnorm: 1.26 [ 1:14:41<23:50:17] +[titan] 2025-10-04 23:49:00,192 - root - INFO - step: 1990 loss: 3.3317 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4032 global_avg_mtp_loss: 2.9285 +[titan] 2025-10-04 23:49:00,193 - root - INFO - lr: 4.9821e-05 gnorm: 1.36 [ 1:14:51<23:49:57] +[titan] 2025-10-04 23:49:11,083 - root - INFO - step: 1995 loss: 3.2394 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-04 23:49:11,084 - root - INFO - lr: 4.9820e-05 gnorm: 1.25 [ 1:15:02<23:49:39] +[titan] 2025-10-04 23:49:19,762 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:49:21,941 - root - INFO - step: 2000 loss: 3.2905 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.3991 global_avg_mtp_loss: 2.8913 +[titan] 2025-10-04 23:49:21,941 - root - INFO - lr: 4.9819e-05 gnorm: 1.41 [ 1:15:13<23:49:19] +[titan] 2025-10-04 23:49:32,868 - root - INFO - step: 2005 loss: 3.2217 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8299 +[titan] 2025-10-04 23:49:32,868 - root - INFO - lr: 4.9818e-05 gnorm: 1.41 [ 1:15:24<23:49:01] +[titan] 2025-10-04 23:49:43,749 - root - INFO - step: 2010 loss: 3.2369 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3913 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:49:43,750 - root - INFO - lr: 4.9816e-05 gnorm: 1.33 [ 1:15:35<23:48:42] +[titan] 2025-10-04 23:49:54,661 - root - INFO - step: 2015 loss: 3.2498 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3947 global_avg_mtp_loss: 2.8551 +[titan] 2025-10-04 23:49:54,661 - root - INFO - lr: 4.9815e-05 gnorm: 1.34 [ 1:15:46<23:48:24] +[titan] 2025-10-04 23:50:05,578 - root - INFO - step: 2020 loss: 3.2711 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3939 global_avg_mtp_loss: 2.8772 +[titan] 2025-10-04 23:50:05,579 - root - INFO - lr: 4.9814e-05 gnorm: 1.36 [ 1:15:57<23:48:06] +[titan] 2025-10-04 23:50:16,459 - root - INFO - step: 2025 loss: 3.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3962 global_avg_mtp_loss: 2.8751 +[titan] 2025-10-04 23:50:16,459 - root - INFO - lr: 4.9813e-05 gnorm: 1.26 [ 1:16:08<23:47:47] +[titan] 2025-10-04 23:50:27,328 - root - INFO - step: 2030 loss: 3.2606 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3932 global_avg_mtp_loss: 2.8674 +[titan] 2025-10-04 23:50:27,329 - root - INFO - lr: 4.9812e-05 gnorm: 1.27 [ 1:16:19<23:47:28] +[titan] 2025-10-04 23:50:38,283 - root - INFO - step: 2035 loss: 3.3063 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3996 global_avg_mtp_loss: 2.9067 +[titan] 2025-10-04 23:50:38,284 - root - INFO - lr: 4.9811e-05 gnorm: 1.35 [ 1:16:30<23:47:11] +[titan] 2025-10-04 23:50:49,166 - root - INFO - step: 2040 loss: 3.1900 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3852 global_avg_mtp_loss: 2.8048 +[titan] 2025-10-04 23:50:49,166 - root - INFO - lr: 4.9810e-05 gnorm: 1.37 [ 1:16:40<23:46:52] +[titan] 2025-10-04 23:51:00,136 - root - INFO - step: 2045 loss: 3.2396 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.3910 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:00,136 - root - INFO - lr: 4.9808e-05 gnorm: 1.30 [ 1:16:51<23:46:35] +[titan] 2025-10-04 23:51:06,838 - root - INFO - Dumping profiler traces at step 2048 +[titan] 2025-10-04 23:51:06,873 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:51:09,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:51:11,279 - root - INFO - step: 2050 loss: 3.2428 memory: 118.84GiB(85.28%) tps: 29,407 tflops: 407.98 mfu: 41.25% global_avg_ntp_loss: 0.3943 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:11,280 - root - INFO - lr: 4.9807e-05 gnorm: 1.39 [ 1:17:02<23:46:21] +[titan] 2025-10-04 23:51:22,173 - root - INFO - step: 2055 loss: 3.3541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:51:22,173 - root - INFO - lr: 4.9806e-05 gnorm: 1.42 [ 1:17:13<23:46:03] +[titan] 2025-10-04 23:51:33,068 - root - INFO - step: 2060 loss: 3.2810 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3960 global_avg_mtp_loss: 2.8850 +[titan] 2025-10-04 23:51:33,069 - root - INFO - lr: 4.9805e-05 gnorm: 1.33 [ 1:17:24<23:45:45] +[titan] 2025-10-04 23:51:43,943 - root - INFO - step: 2065 loss: 3.2366 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:51:43,943 - root - INFO - lr: 4.9804e-05 gnorm: 1.45 [ 1:17:35<23:45:26] +[titan] 2025-10-04 23:51:54,802 - root - INFO - step: 2070 loss: 3.2400 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3911 global_avg_mtp_loss: 2.8489 +[titan] 2025-10-04 23:51:54,802 - root - INFO - lr: 4.9803e-05 gnorm: 1.37 [ 1:17:46<23:45:07] +[titan] 2025-10-04 23:52:05,671 - root - INFO - step: 2075 loss: 3.2363 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3927 global_avg_mtp_loss: 2.8436 +[titan] 2025-10-04 23:52:05,671 - root - INFO - lr: 4.9801e-05 gnorm: 1.32 [ 1:17:57<23:44:49] +[titan] 2025-10-04 23:52:16,539 - root - INFO - step: 2080 loss: 3.1819 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3837 global_avg_mtp_loss: 2.7983 +[titan] 2025-10-04 23:52:16,539 - root - INFO - lr: 4.9800e-05 gnorm: 1.25 [ 1:18:08<23:44:30] +[titan] 2025-10-04 23:52:27,458 - root - INFO - step: 2085 loss: 3.2817 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8845 +[titan] 2025-10-04 23:52:27,458 - root - INFO - lr: 4.9799e-05 gnorm: 1.31 [ 1:18:19<23:44:12] +[titan] 2025-10-04 23:52:38,351 - root - INFO - step: 2090 loss: 3.2776 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3974 global_avg_mtp_loss: 2.8802 +[titan] 2025-10-04 23:52:38,351 - root - INFO - lr: 4.9798e-05 gnorm: 1.27 [ 1:18:30<23:43:54] +[titan] 2025-10-04 23:52:49,245 - root - INFO - step: 2095 loss: 3.2401 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8479 +[titan] 2025-10-04 23:52:49,245 - root - INFO - lr: 4.9797e-05 gnorm: 1.35 [ 1:18:40<23:43:36] +[titan] 2025-10-04 23:52:57,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:53:00,142 - root - INFO - step: 2100 loss: 3.1666 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3827 global_avg_mtp_loss: 2.7839 +[titan] 2025-10-04 23:53:00,142 - root - INFO - lr: 4.9795e-05 gnorm: 1.31 [ 1:18:51<23:43:18] +[titan] 2025-10-04 23:53:11,021 - root - INFO - step: 2105 loss: 3.1171 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3762 global_avg_mtp_loss: 2.7409 +[titan] 2025-10-04 23:53:11,021 - root - INFO - lr: 4.9794e-05 gnorm: 1.45 [ 1:19:02<23:43:00] +[titan] 2025-10-04 23:53:21,893 - root - INFO - step: 2110 loss: 3.2816 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3984 global_avg_mtp_loss: 2.8833 +[titan] 2025-10-04 23:53:21,894 - root - INFO - lr: 4.9793e-05 gnorm: 1.35 [ 1:19:13<23:42:42] +[titan] 2025-10-04 23:53:32,852 - root - INFO - step: 2115 loss: 3.2607 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8663 +[titan] 2025-10-04 23:53:32,852 - root - INFO - lr: 4.9792e-05 gnorm: 1.27 [ 1:19:24<23:42:25] +[titan] 2025-10-04 23:53:43,730 - root - INFO - step: 2120 loss: 3.2629 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8683 +[titan] 2025-10-04 23:53:43,730 - root - INFO - lr: 4.9791e-05 gnorm: 1.25 [ 1:19:35<23:42:07] +[titan] 2025-10-04 23:53:54,620 - root - INFO - step: 2125 loss: 3.0920 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3725 global_avg_mtp_loss: 2.7195 +[titan] 2025-10-04 23:53:54,620 - root - INFO - lr: 4.9789e-05 gnorm: 1.37 [ 1:19:46<23:41:49] +[titan] 2025-10-04 23:54:05,507 - root - INFO - step: 2130 loss: 3.2038 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8165 +[titan] 2025-10-04 23:54:05,508 - root - INFO - lr: 4.9788e-05 gnorm: 1.28 [ 1:19:57<23:41:31] +[titan] 2025-10-04 23:54:16,404 - root - INFO - step: 2135 loss: 3.1616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.3810 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-04 23:54:16,404 - root - INFO - lr: 4.9787e-05 gnorm: 1.27 [ 1:20:08<23:41:13] +[titan] 2025-10-04 23:54:27,282 - root - INFO - step: 2140 loss: 3.1455 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3792 global_avg_mtp_loss: 2.7663 +[titan] 2025-10-04 23:54:27,282 - root - INFO - lr: 4.9786e-05 gnorm: 1.36 [ 1:20:18<23:40:55] +[titan] 2025-10-04 23:54:38,216 - root - INFO - step: 2145 loss: 3.1443 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7655 +[titan] 2025-10-04 23:54:38,216 - root - INFO - lr: 4.9785e-05 gnorm: 1.24 [ 1:20:29<23:40:38] +[titan] 2025-10-04 23:54:46,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:54:49,106 - root - INFO - step: 2150 loss: 3.2432 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3921 global_avg_mtp_loss: 2.8511 +[titan] 2025-10-04 23:54:49,106 - root - INFO - lr: 4.9783e-05 gnorm: 1.23 [ 1:20:40<23:40:20] +[titan] 2025-10-04 23:54:59,985 - root - INFO - step: 2155 loss: 3.1416 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7620 +[titan] 2025-10-04 23:54:59,985 - root - INFO - lr: 4.9782e-05 gnorm: 1.23 [ 1:20:51<23:40:02] +[titan] 2025-10-04 23:55:10,860 - root - INFO - step: 2160 loss: 3.1386 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7597 +[titan] 2025-10-04 23:55:10,860 - root - INFO - lr: 4.9781e-05 gnorm: 1.27 [ 1:21:02<23:39:44] +[titan] 2025-10-04 23:55:21,730 - root - INFO - step: 2165 loss: 3.2482 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8559 +[titan] 2025-10-04 23:55:21,730 - root - INFO - lr: 4.9780e-05 gnorm: 1.29 [ 1:21:13<23:39:26] +[titan] 2025-10-04 23:55:32,617 - root - INFO - step: 2170 loss: 3.2349 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3914 global_avg_mtp_loss: 2.8435 +[titan] 2025-10-04 23:55:32,617 - root - INFO - lr: 4.9778e-05 gnorm: 1.22 [ 1:21:24<23:39:09] +[titan] 2025-10-04 23:55:43,541 - root - INFO - step: 2175 loss: 3.2325 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.3901 global_avg_mtp_loss: 2.8424 +[titan] 2025-10-04 23:55:43,542 - root - INFO - lr: 4.9777e-05 gnorm: 1.32 [ 1:21:35<23:38:52] +[titan] 2025-10-04 23:55:54,482 - root - INFO - step: 2180 loss: 3.1551 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7749 +[titan] 2025-10-04 23:55:54,482 - root - INFO - lr: 4.9776e-05 gnorm: 1.29 [ 1:21:46<23:38:35] +[titan] 2025-10-04 23:56:05,357 - root - INFO - step: 2185 loss: 3.2187 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3882 global_avg_mtp_loss: 2.8305 +[titan] 2025-10-04 23:56:05,357 - root - INFO - lr: 4.9775e-05 gnorm: 1.37 [ 1:21:57<23:38:17] +[titan] 2025-10-04 23:56:16,252 - root - INFO - step: 2190 loss: 3.1722 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7900 +[titan] 2025-10-04 23:56:16,253 - root - INFO - lr: 4.9773e-05 gnorm: 1.44 [ 1:22:07<23:38:00] +[titan] 2025-10-04 23:56:27,132 - root - INFO - step: 2195 loss: 3.1685 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3823 global_avg_mtp_loss: 2.7862 +[titan] 2025-10-04 23:56:27,132 - root - INFO - lr: 4.9772e-05 gnorm: 1.33 [ 1:22:18<23:37:42] +[titan] 2025-10-04 23:56:35,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:56:38,038 - root - INFO - step: 2200 loss: 3.1985 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3856 global_avg_mtp_loss: 2.8129 +[titan] 2025-10-04 23:56:38,038 - root - INFO - lr: 4.9771e-05 gnorm: 1.31 [ 1:22:29<23:37:25] +[titan] 2025-10-04 23:56:48,912 - root - INFO - step: 2205 loss: 3.2059 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.8192 +[titan] 2025-10-04 23:56:48,913 - root - INFO - lr: 4.9769e-05 gnorm: 1.26 [ 1:22:40<23:37:07] +[titan] 2025-10-04 23:56:59,840 - root - INFO - step: 2210 loss: 3.1541 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7729 +[titan] 2025-10-04 23:56:59,840 - root - INFO - lr: 4.9768e-05 gnorm: 1.42 [ 1:22:51<23:36:51] +[titan] 2025-10-04 23:57:10,737 - root - INFO - step: 2215 loss: 3.2356 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3903 global_avg_mtp_loss: 2.8453 +[titan] 2025-10-04 23:57:10,737 - root - INFO - lr: 4.9767e-05 gnorm: 1.51 [ 1:23:02<23:36:33] +[titan] 2025-10-04 23:57:21,630 - root - INFO - step: 2220 loss: 3.1859 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.7992 +[titan] 2025-10-04 23:57:21,630 - root - INFO - lr: 4.9766e-05 gnorm: 1.39 [ 1:23:13<23:36:16] +[titan] 2025-10-04 23:57:32,532 - root - INFO - step: 2225 loss: 3.1779 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3842 global_avg_mtp_loss: 2.7936 +[titan] 2025-10-04 23:57:32,532 - root - INFO - lr: 4.9764e-05 gnorm: 1.24 [ 1:23:24<23:35:59] +[titan] 2025-10-04 23:57:43,450 - root - INFO - step: 2230 loss: 3.2176 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.3895 global_avg_mtp_loss: 2.8282 +[titan] 2025-10-04 23:57:43,450 - root - INFO - lr: 4.9763e-05 gnorm: 1.28 [ 1:23:35<23:35:42] +[titan] 2025-10-04 23:57:54,366 - root - INFO - step: 2235 loss: 3.2212 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3879 global_avg_mtp_loss: 2.8333 +[titan] 2025-10-04 23:57:54,366 - root - INFO - lr: 4.9762e-05 gnorm: 1.35 [ 1:23:46<23:35:25] +[titan] 2025-10-04 23:58:05,251 - root - INFO - step: 2240 loss: 3.2781 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8824 +[titan] 2025-10-04 23:58:05,252 - root - INFO - lr: 4.9760e-05 gnorm: 1.39 [ 1:23:56<23:35:08] +[titan] 2025-10-04 23:58:16,173 - root - INFO - step: 2245 loss: 3.1710 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7885 +[titan] 2025-10-04 23:58:16,174 - root - INFO - lr: 4.9759e-05 gnorm: 1.30 [ 1:24:07<23:34:51] +[titan] 2025-10-04 23:58:24,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:58:27,069 - root - INFO - step: 2250 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7772 +[titan] 2025-10-04 23:58:27,069 - root - INFO - lr: 4.9758e-05 gnorm: 1.33 [ 1:24:18<23:34:34] +[titan] 2025-10-04 23:58:37,973 - root - INFO - step: 2255 loss: 3.2917 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3969 global_avg_mtp_loss: 2.8947 +[titan] 2025-10-04 23:58:37,973 - root - INFO - lr: 4.9757e-05 gnorm: 1.34 [ 1:24:29<23:34:17] +[titan] 2025-10-04 23:58:48,849 - root - INFO - step: 2260 loss: 3.1742 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3832 global_avg_mtp_loss: 2.7911 +[titan] 2025-10-04 23:58:48,849 - root - INFO - lr: 4.9755e-05 gnorm: 1.32 [ 1:24:40<23:34:00] +[titan] 2025-10-04 23:58:59,727 - root - INFO - step: 2265 loss: 3.1716 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3830 global_avg_mtp_loss: 2.7886 +[titan] 2025-10-04 23:58:59,727 - root - INFO - lr: 4.9754e-05 gnorm: 1.31 [ 1:24:51<23:33:43] +[titan] 2025-10-04 23:59:10,618 - root - INFO - step: 2270 loss: 3.2242 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8352 +[titan] 2025-10-04 23:59:10,618 - root - INFO - lr: 4.9753e-05 gnorm: 1.31 [ 1:25:02<23:33:26] +[titan] 2025-10-04 23:59:21,547 - root - INFO - step: 2275 loss: 3.2006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3858 global_avg_mtp_loss: 2.8148 +[titan] 2025-10-04 23:59:21,547 - root - INFO - lr: 4.9751e-05 gnorm: 1.30 [ 1:25:13<23:33:09] +[titan] 2025-10-04 23:59:32,439 - root - INFO - step: 2280 loss: 3.1251 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7491 +[titan] 2025-10-04 23:59:32,439 - root - INFO - lr: 4.9750e-05 gnorm: 1.25 [ 1:25:24<23:32:52] +[titan] 2025-10-04 23:59:43,315 - root - INFO - step: 2285 loss: 3.1971 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3843 global_avg_mtp_loss: 2.8128 +[titan] 2025-10-04 23:59:43,315 - root - INFO - lr: 4.9749e-05 gnorm: 1.24 [ 1:25:35<23:32:35] +[titan] 2025-10-04 23:59:54,211 - root - INFO - step: 2290 loss: 3.1138 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3754 global_avg_mtp_loss: 2.7384 +[titan] 2025-10-04 23:59:54,211 - root - INFO - lr: 4.9747e-05 gnorm: 1.30 [ 1:25:45<23:32:18] +[titan] 2025-10-05 00:00:05,066 - root - INFO - step: 2295 loss: 3.1381 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3802 global_avg_mtp_loss: 2.7579 +[titan] 2025-10-05 00:00:05,067 - root - INFO - lr: 4.9746e-05 gnorm: 1.34 [ 1:25:56<23:32:01] +[titan] 2025-10-05 00:00:13,755 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:00:15,940 - root - INFO - step: 2300 loss: 3.1684 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7859 +[titan] 2025-10-05 00:00:15,940 - root - INFO - lr: 4.9745e-05 gnorm: 1.25 [ 1:26:07<23:31:44] +[titan] 2025-10-05 00:00:26,871 - root - INFO - step: 2305 loss: 3.1673 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3819 global_avg_mtp_loss: 2.7853 +[titan] 2025-10-05 00:00:26,871 - root - INFO - lr: 4.9743e-05 gnorm: 1.22 [ 1:26:18<23:31:28] +[titan] 2025-10-05 00:00:37,762 - root - INFO - step: 2310 loss: 3.1531 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7719 +[titan] 2025-10-05 00:00:37,762 - root - INFO - lr: 4.9742e-05 gnorm: 1.30 [ 1:26:29<23:31:11] +[titan] 2025-10-05 00:00:48,669 - root - INFO - step: 2315 loss: 3.1583 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3795 global_avg_mtp_loss: 2.7788 +[titan] 2025-10-05 00:00:48,669 - root - INFO - lr: 4.9741e-05 gnorm: 1.22 [ 1:26:40<23:30:54] +[titan] 2025-10-05 00:00:59,522 - root - INFO - step: 2320 loss: 3.1995 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.3851 global_avg_mtp_loss: 2.8144 +[titan] 2025-10-05 00:00:59,522 - root - INFO - lr: 4.9739e-05 gnorm: 1.29 [ 1:26:51<23:30:37] +[titan] 2025-10-05 00:01:10,409 - root - INFO - step: 2325 loss: 3.1550 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3800 global_avg_mtp_loss: 2.7750 +[titan] 2025-10-05 00:01:10,409 - root - INFO - lr: 4.9738e-05 gnorm: 1.29 [ 1:27:02<23:30:20] +[titan] 2025-10-05 00:01:21,286 - root - INFO - step: 2330 loss: 3.1042 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3734 global_avg_mtp_loss: 2.7308 +[titan] 2025-10-05 00:01:21,286 - root - INFO - lr: 4.9737e-05 gnorm: 1.25 [ 1:27:12<23:30:03] +[titan] 2025-10-05 00:01:32,170 - root - INFO - step: 2335 loss: 3.1428 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.3775 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:01:32,170 - root - INFO - lr: 4.9735e-05 gnorm: 1.24 [ 1:27:23<23:29:46] +[titan] 2025-10-05 00:01:43,255 - root - INFO - step: 2340 loss: 3.2357 memory: 118.84GiB(85.28%) tps: 29,561 tflops: 410.11 mfu: 41.47% global_avg_ntp_loss: 0.3959 global_avg_mtp_loss: 2.8398 +[titan] 2025-10-05 00:01:43,255 - root - INFO - lr: 4.9734e-05 gnorm: 1.31 [ 1:27:34<23:29:33] +[titan] 2025-10-05 00:01:54,139 - root - INFO - step: 2345 loss: 3.2594 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.3938 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:01:54,139 - root - INFO - lr: 4.9732e-05 gnorm: 1.30 [ 1:27:45<23:29:16] +[titan] 2025-10-05 00:02:02,829 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:02:05,010 - root - INFO - step: 2350 loss: 3.1385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3798 global_avg_mtp_loss: 2.7587 +[titan] 2025-10-05 00:02:05,010 - root - INFO - lr: 4.9731e-05 gnorm: 1.30 [ 1:27:56<23:28:59] +[titan] 2025-10-05 00:02:15,898 - root - INFO - step: 2355 loss: 3.1702 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3826 global_avg_mtp_loss: 2.7876 +[titan] 2025-10-05 00:02:15,898 - root - INFO - lr: 4.9730e-05 gnorm: 1.32 [ 1:28:07<23:28:42] +[titan] 2025-10-05 00:02:26,769 - root - INFO - step: 2360 loss: 3.1893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8043 +[titan] 2025-10-05 00:02:26,769 - root - INFO - lr: 4.9728e-05 gnorm: 1.43 [ 1:28:18<23:28:25] +[titan] 2025-10-05 00:02:37,640 - root - INFO - step: 2365 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7481 +[titan] 2025-10-05 00:02:37,640 - root - INFO - lr: 4.9727e-05 gnorm: 1.39 [ 1:28:29<23:28:09] +[titan] 2025-10-05 00:02:48,598 - root - INFO - step: 2370 loss: 3.1988 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3869 global_avg_mtp_loss: 2.8118 +[titan] 2025-10-05 00:02:48,598 - root - INFO - lr: 4.9726e-05 gnorm: 1.28 [ 1:28:40<23:27:53] +[titan] 2025-10-05 00:02:59,464 - root - INFO - step: 2375 loss: 3.1613 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3814 global_avg_mtp_loss: 2.7799 +[titan] 2025-10-05 00:02:59,464 - root - INFO - lr: 4.9724e-05 gnorm: 1.31 [ 1:28:51<23:27:36] +[titan] 2025-10-05 00:03:10,332 - root - INFO - step: 2380 loss: 3.2049 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3860 global_avg_mtp_loss: 2.8189 +[titan] 2025-10-05 00:03:10,332 - root - INFO - lr: 4.9723e-05 gnorm: 1.34 [ 1:29:02<23:27:19] +[titan] 2025-10-05 00:03:21,196 - root - INFO - step: 2385 loss: 3.1936 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3835 global_avg_mtp_loss: 2.8101 +[titan] 2025-10-05 00:03:21,196 - root - INFO - lr: 4.9721e-05 gnorm: 1.30 [ 1:29:12<23:27:02] +[titan] 2025-10-05 00:03:32,051 - root - INFO - step: 2390 loss: 3.2440 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3919 global_avg_mtp_loss: 2.8521 +[titan] 2025-10-05 00:03:32,051 - root - INFO - lr: 4.9720e-05 gnorm: 1.33 [ 1:29:23<23:26:45] +[titan] 2025-10-05 00:03:42,978 - root - INFO - step: 2395 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3771 global_avg_mtp_loss: 2.7475 +[titan] 2025-10-05 00:03:42,978 - root - INFO - lr: 4.9719e-05 gnorm: 1.31 [ 1:29:34<23:26:30] +[titan] 2025-10-05 00:03:51,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:03:53,851 - root - INFO - step: 2400 loss: 3.2662 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8718 +[titan] 2025-10-05 00:03:53,851 - root - INFO - lr: 4.9717e-05 gnorm: 1.40 [ 1:29:45<23:26:13] +[titan] 2025-10-05 00:04:04,749 - root - INFO - step: 2405 loss: 3.2406 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-05 00:04:04,749 - root - INFO - lr: 4.9716e-05 gnorm: 1.38 [ 1:29:56<23:25:57] +[titan] 2025-10-05 00:04:15,630 - root - INFO - step: 2410 loss: 3.1271 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7504 +[titan] 2025-10-05 00:04:15,630 - root - INFO - lr: 4.9714e-05 gnorm: 1.27 [ 1:30:07<23:25:40] +[titan] 2025-10-05 00:04:26,491 - root - INFO - step: 2415 loss: 3.1402 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3779 global_avg_mtp_loss: 2.7623 +[titan] 2025-10-05 00:04:26,491 - root - INFO - lr: 4.9713e-05 gnorm: 1.39 [ 1:30:18<23:25:23] +[titan] 2025-10-05 00:04:37,350 - root - INFO - step: 2420 loss: 3.1746 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7924 +[titan] 2025-10-05 00:04:37,350 - root - INFO - lr: 4.9711e-05 gnorm: 1.45 [ 1:30:29<23:25:07] +[titan] 2025-10-05 00:04:48,268 - root - INFO - step: 2425 loss: 3.1765 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3831 global_avg_mtp_loss: 2.7934 +[titan] 2025-10-05 00:04:48,269 - root - INFO - lr: 4.9710e-05 gnorm: 1.42 [ 1:30:39<23:24:51] +[titan] 2025-10-05 00:04:59,129 - root - INFO - step: 2430 loss: 3.2456 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8539 +[titan] 2025-10-05 00:04:59,129 - root - INFO - lr: 4.9709e-05 gnorm: 1.29 [ 1:30:50<23:24:34] +[titan] 2025-10-05 00:05:10,040 - root - INFO - step: 2435 loss: 3.0885 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3728 global_avg_mtp_loss: 2.7157 +[titan] 2025-10-05 00:05:10,040 - root - INFO - lr: 4.9707e-05 gnorm: 1.30 [ 1:31:01<23:24:18] +[titan] 2025-10-05 00:05:20,901 - root - INFO - step: 2440 loss: 3.1883 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3839 global_avg_mtp_loss: 2.8044 +[titan] 2025-10-05 00:05:20,901 - root - INFO - lr: 4.9706e-05 gnorm: 1.29 [ 1:31:12<23:24:01] +[titan] 2025-10-05 00:05:31,767 - root - INFO - step: 2445 loss: 3.1123 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3742 global_avg_mtp_loss: 2.7381 +[titan] 2025-10-05 00:05:31,767 - root - INFO - lr: 4.9704e-05 gnorm: 1.28 [ 1:31:23<23:23:45] +[titan] 2025-10-05 00:05:40,460 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:05:42,650 - root - INFO - step: 2450 loss: 3.1786 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3845 global_avg_mtp_loss: 2.7941 +[titan] 2025-10-05 00:05:42,650 - root - INFO - lr: 4.9703e-05 gnorm: 1.27 [ 1:31:34<23:23:29] +[titan] 2025-10-05 00:05:53,573 - root - INFO - step: 2455 loss: 3.1398 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3790 global_avg_mtp_loss: 2.7608 +[titan] 2025-10-05 00:05:53,573 - root - INFO - lr: 4.9701e-05 gnorm: 1.27 [ 1:31:45<23:23:13] +[titan] 2025-10-05 00:06:04,454 - root - INFO - step: 2460 loss: 3.2308 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8350 +[titan] 2025-10-05 00:06:04,455 - root - INFO - lr: 4.9700e-05 gnorm: 2.69 [ 1:31:56<23:22:57] +[titan] 2025-10-05 00:06:15,398 - root - INFO - step: 2465 loss: 3.1213 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7452 +[titan] 2025-10-05 00:06:15,398 - root - INFO - lr: 4.9698e-05 gnorm: 1.28 [ 1:32:07<23:22:41] +[titan] 2025-10-05 00:06:26,299 - root - INFO - step: 2470 loss: 3.1059 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3749 global_avg_mtp_loss: 2.7310 +[titan] 2025-10-05 00:06:26,299 - root - INFO - lr: 4.9697e-05 gnorm: 1.29 [ 1:32:17<23:22:25] +[titan] 2025-10-05 00:06:37,192 - root - INFO - step: 2475 loss: 3.1051 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3745 global_avg_mtp_loss: 2.7306 +[titan] 2025-10-05 00:06:37,192 - root - INFO - lr: 4.9696e-05 gnorm: 1.31 [ 1:32:28<23:22:09] +[titan] 2025-10-05 00:06:48,155 - root - INFO - step: 2480 loss: 3.1093 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.69 mfu: 41.93% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7359 +[titan] 2025-10-05 00:06:48,155 - root - INFO - lr: 4.9694e-05 gnorm: 1.32 [ 1:32:39<23:21:54] +[titan] 2025-10-05 00:06:59,038 - root - INFO - step: 2485 loss: 3.1283 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3767 global_avg_mtp_loss: 2.7516 +[titan] 2025-10-05 00:06:59,038 - root - INFO - lr: 4.9693e-05 gnorm: 1.34 [ 1:32:50<23:21:38] +[titan] 2025-10-05 00:07:09,901 - root - INFO - step: 2490 loss: 3.1376 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7607 +[titan] 2025-10-05 00:07:09,901 - root - INFO - lr: 4.9691e-05 gnorm: 1.34 [ 1:33:01<23:21:22] +[titan] 2025-10-05 00:07:20,803 - root - INFO - step: 2495 loss: 3.1543 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7736 +[titan] 2025-10-05 00:07:20,803 - root - INFO - lr: 4.9690e-05 gnorm: 1.36 [ 1:33:12<23:21:06] +[titan] 2025-10-05 00:07:29,527 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:07:31,706 - root - INFO - step: 2500 loss: 3.1575 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7787 +[titan] 2025-10-05 00:07:31,706 - root - INFO - lr: 4.9688e-05 gnorm: 1.31 [ 1:33:23<23:20:50] +[titan] 2025-10-05 00:07:42,568 - root - INFO - step: 2505 loss: 3.1325 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3760 global_avg_mtp_loss: 2.7566 +[titan] 2025-10-05 00:07:42,568 - root - INFO - lr: 4.9687e-05 gnorm: 1.22 [ 1:33:34<23:20:34] +[titan] 2025-10-05 00:07:53,496 - root - INFO - step: 2510 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3718 global_avg_mtp_loss: 2.7142 +[titan] 2025-10-05 00:07:53,497 - root - INFO - lr: 4.9685e-05 gnorm: 1.31 [ 1:33:45<23:20:18] +[titan] 2025-10-05 00:08:04,377 - root - INFO - step: 2515 loss: 3.2003 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3862 global_avg_mtp_loss: 2.8141 +[titan] 2025-10-05 00:08:04,378 - root - INFO - lr: 4.9684e-05 gnorm: 1.43 [ 1:33:56<23:20:02] +[titan] 2025-10-05 00:08:15,255 - root - INFO - step: 2520 loss: 3.1816 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3847 global_avg_mtp_loss: 2.7969 +[titan] 2025-10-05 00:08:15,255 - root - INFO - lr: 4.9682e-05 gnorm: 1.38 [ 1:34:06<23:19:46] +[titan] 2025-10-05 00:08:26,136 - root - INFO - step: 2525 loss: 3.2579 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:08:26,136 - root - INFO - lr: 4.9681e-05 gnorm: 1.37 [ 1:34:17<23:19:30] +[titan] 2025-10-05 00:08:37,049 - root - INFO - step: 2530 loss: 3.1078 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7346 +[titan] 2025-10-05 00:08:37,049 - root - INFO - lr: 4.9679e-05 gnorm: 1.28 [ 1:34:28<23:19:15] +[titan] 2025-10-05 00:08:48,046 - root - INFO - step: 2535 loss: 3.0953 memory: 118.84GiB(85.28%) tps: 29,797 tflops: 413.39 mfu: 41.80% global_avg_ntp_loss: 0.3719 global_avg_mtp_loss: 2.7233 +[titan] 2025-10-05 00:08:48,047 - root - INFO - lr: 4.9678e-05 gnorm: 1.25 [ 1:34:39<23:19:01] +[titan] 2025-10-05 00:08:58,919 - root - INFO - step: 2540 loss: 3.1620 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3803 global_avg_mtp_loss: 2.7817 +[titan] 2025-10-05 00:08:58,919 - root - INFO - lr: 4.9676e-05 gnorm: 1.26 [ 1:34:50<23:18:45] +[titan] 2025-10-05 00:09:09,786 - root - INFO - step: 2545 loss: 3.1667 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3815 global_avg_mtp_loss: 2.7852 +[titan] 2025-10-05 00:09:09,786 - root - INFO - lr: 4.9675e-05 gnorm: 1.40 [ 1:35:01<23:18:28] +[titan] 2025-10-05 00:09:18,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:09:20,677 - root - INFO - step: 2550 loss: 3.0790 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3704 global_avg_mtp_loss: 2.7086 +[titan] 2025-10-05 00:09:20,677 - root - INFO - lr: 4.9673e-05 gnorm: 1.34 [ 1:35:12<23:18:13] +[titan] 2025-10-05 00:09:31,556 - root - INFO - step: 2555 loss: 3.0389 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3662 global_avg_mtp_loss: 2.6727 +[titan] 2025-10-05 00:09:31,557 - root - INFO - lr: 4.9672e-05 gnorm: 1.31 [ 1:35:23<23:17:57] +[titan] 2025-10-05 00:09:42,516 - root - INFO - step: 2560 loss: 3.1285 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.3755 global_avg_mtp_loss: 2.7530 +[titan] 2025-10-05 00:09:42,516 - root - INFO - lr: 4.9670e-05 gnorm: 1.23 [ 1:35:34<23:17:42] +[titan] 2025-10-05 00:09:42,679 - root - INFO - Dumping profiler traces at step 2560 +[titan] 2025-10-05 00:09:42,715 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:09:53,687 - root - INFO - step: 2565 loss: 3.0635 memory: 118.84GiB(85.28%) tps: 29,334 tflops: 406.97 mfu: 41.15% global_avg_ntp_loss: 0.3659 global_avg_mtp_loss: 2.6976 +[titan] 2025-10-05 00:09:53,687 - root - INFO - lr: 4.9669e-05 gnorm: 1.33 [ 1:35:45<23:17:30] +[titan] 2025-10-05 00:10:04,566 - root - INFO - step: 2570 loss: 3.0420 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6772 +[titan] 2025-10-05 00:10:04,566 - root - INFO - lr: 4.9667e-05 gnorm: 1.29 [ 1:35:56<23:17:14] +[titan] 2025-10-05 00:10:15,470 - root - INFO - step: 2575 loss: 3.2085 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8213 +[titan] 2025-10-05 00:10:15,471 - root - INFO - lr: 4.9666e-05 gnorm: 1.30 [ 1:36:07<23:16:59] +[titan] 2025-10-05 00:10:26,384 - root - INFO - step: 2580 loss: 3.2105 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3855 global_avg_mtp_loss: 2.8250 +[titan] 2025-10-05 00:10:26,384 - root - INFO - lr: 4.9664e-05 gnorm: 1.29 [ 1:36:18<23:16:44] +[titan] 2025-10-05 00:10:37,260 - root - INFO - step: 2585 loss: 3.0856 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3698 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:10:37,261 - root - INFO - lr: 4.9663e-05 gnorm: 1.30 [ 1:36:28<23:16:28] +[titan] 2025-10-05 00:10:48,212 - root - INFO - step: 2590 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.3650 global_avg_mtp_loss: 2.6717 +[titan] 2025-10-05 00:10:48,212 - root - INFO - lr: 4.9661e-05 gnorm: 1.26 [ 1:36:39<23:16:13] +[titan] 2025-10-05 00:10:59,142 - root - INFO - step: 2595 loss: 3.1492 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.3799 global_avg_mtp_loss: 2.7693 +[titan] 2025-10-05 00:10:59,142 - root - INFO - lr: 4.9659e-05 gnorm: 1.24 [ 1:36:50<23:15:58] +[titan] 2025-10-05 00:11:07,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:11:10,032 - root - INFO - step: 2600 loss: 3.0911 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3727 global_avg_mtp_loss: 2.7185 +[titan] 2025-10-05 00:11:10,033 - root - INFO - lr: 4.9658e-05 gnorm: 1.22 [ 1:37:01<23:15:42] +[titan] 2025-10-05 00:11:20,915 - root - INFO - step: 2605 loss: 3.1578 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3808 global_avg_mtp_loss: 2.7770 +[titan] 2025-10-05 00:11:20,915 - root - INFO - lr: 4.9656e-05 gnorm: 1.26 [ 1:37:12<23:15:27] +[titan] 2025-10-05 00:11:31,815 - root - INFO - step: 2610 loss: 3.1088 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3741 global_avg_mtp_loss: 2.7348 +[titan] 2025-10-05 00:11:31,815 - root - INFO - lr: 4.9655e-05 gnorm: 1.25 [ 1:37:23<23:15:11] +[titan] 2025-10-05 00:11:42,699 - root - INFO - step: 2615 loss: 3.1165 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7413 +[titan] 2025-10-05 00:11:42,699 - root - INFO - lr: 4.9653e-05 gnorm: 1.30 [ 1:37:34<23:14:56] +[titan] 2025-10-05 00:11:53,594 - root - INFO - step: 2620 loss: 3.1397 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7617 +[titan] 2025-10-05 00:11:53,594 - root - INFO - lr: 4.9652e-05 gnorm: 1.27 [ 1:37:45<23:14:40] +[titan] 2025-10-05 00:12:04,505 - root - INFO - step: 2625 loss: 3.1215 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7463 +[titan] 2025-10-05 00:12:04,505 - root - INFO - lr: 4.9650e-05 gnorm: 1.33 [ 1:37:56<23:14:25] +[titan] 2025-10-05 00:12:15,389 - root - INFO - step: 2630 loss: 3.1525 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7737 +[titan] 2025-10-05 00:12:15,390 - root - INFO - lr: 4.9649e-05 gnorm: 1.27 [ 1:38:07<23:14:09] +[titan] 2025-10-05 00:12:26,270 - root - INFO - step: 2635 loss: 3.1176 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3751 global_avg_mtp_loss: 2.7424 +[titan] 2025-10-05 00:12:26,271 - root - INFO - lr: 4.9647e-05 gnorm: 1.30 [ 1:38:17<23:13:54] +[titan] 2025-10-05 00:12:37,153 - root - INFO - step: 2640 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6977 +[titan] 2025-10-05 00:12:37,153 - root - INFO - lr: 4.9645e-05 gnorm: 1.28 [ 1:38:28<23:13:38] +[titan] 2025-10-05 00:12:48,055 - root - INFO - step: 2645 loss: 3.1119 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7387 +[titan] 2025-10-05 00:12:48,055 - root - INFO - lr: 4.9644e-05 gnorm: 1.30 [ 1:38:39<23:13:23] +[titan] 2025-10-05 00:12:56,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:12:58,983 - root - INFO - step: 2650 loss: 3.0548 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6872 +[titan] 2025-10-05 00:12:58,983 - root - INFO - lr: 4.9642e-05 gnorm: 1.23 [ 1:38:50<23:13:08] +[titan] 2025-10-05 00:13:09,879 - root - INFO - step: 2655 loss: 3.0496 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3671 global_avg_mtp_loss: 2.6826 +[titan] 2025-10-05 00:13:09,879 - root - INFO - lr: 4.9641e-05 gnorm: 1.28 [ 1:39:01<23:12:53] +[titan] 2025-10-05 00:13:20,805 - root - INFO - step: 2660 loss: 3.1186 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.3759 global_avg_mtp_loss: 2.7427 +[titan] 2025-10-05 00:13:20,805 - root - INFO - lr: 4.9639e-05 gnorm: 1.25 [ 1:39:12<23:12:38] +[titan] 2025-10-05 00:13:31,679 - root - INFO - step: 2665 loss: 3.0573 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3678 global_avg_mtp_loss: 2.6895 +[titan] 2025-10-05 00:13:31,680 - root - INFO - lr: 4.9637e-05 gnorm: 1.25 [ 1:39:23<23:12:22] +[titan] 2025-10-05 00:13:42,558 - root - INFO - step: 2670 loss: 3.0570 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3663 global_avg_mtp_loss: 2.6907 +[titan] 2025-10-05 00:13:42,558 - root - INFO - lr: 4.9636e-05 gnorm: 1.26 [ 1:39:34<23:12:07] +[titan] 2025-10-05 00:13:53,472 - root - INFO - step: 2675 loss: 3.1878 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8028 +[titan] 2025-10-05 00:13:53,472 - root - INFO - lr: 4.9634e-05 gnorm: 1.31 [ 1:39:45<23:11:52] +[titan] 2025-10-05 00:14:04,364 - root - INFO - step: 2680 loss: 3.1135 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3738 global_avg_mtp_loss: 2.7397 +[titan] 2025-10-05 00:14:04,365 - root - INFO - lr: 4.9633e-05 gnorm: 1.22 [ 1:39:56<23:11:36] +[titan] 2025-10-05 00:14:15,279 - root - INFO - step: 2685 loss: 3.0010 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3606 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:14:15,279 - root - INFO - lr: 4.9631e-05 gnorm: 1.32 [ 1:40:06<23:11:21] +[titan] 2025-10-05 00:14:26,223 - root - INFO - step: 2690 loss: 3.1084 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.3737 global_avg_mtp_loss: 2.7347 +[titan] 2025-10-05 00:14:26,223 - root - INFO - lr: 4.9629e-05 gnorm: 1.28 [ 1:40:17<23:11:07] +[titan] 2025-10-05 00:14:37,114 - root - INFO - step: 2695 loss: 3.1301 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3758 global_avg_mtp_loss: 2.7543 +[titan] 2025-10-05 00:14:37,114 - root - INFO - lr: 4.9628e-05 gnorm: 1.31 [ 1:40:28<23:10:52] +[titan] 2025-10-05 00:14:45,831 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:14:48,028 - root - INFO - step: 2700 loss: 3.0874 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3690 global_avg_mtp_loss: 2.7184 +[titan] 2025-10-05 00:14:48,029 - root - INFO - lr: 4.9626e-05 gnorm: 1.38 [ 1:40:39<23:10:37] +[titan] 2025-10-05 00:14:58,931 - root - INFO - step: 2705 loss: 3.1260 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.3750 global_avg_mtp_loss: 2.7509 +[titan] 2025-10-05 00:14:58,931 - root - INFO - lr: 4.9625e-05 gnorm: 1.28 [ 1:40:50<23:10:22] +[titan] 2025-10-05 00:15:09,812 - root - INFO - step: 2710 loss: 3.0477 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3643 global_avg_mtp_loss: 2.6834 +[titan] 2025-10-05 00:15:09,812 - root - INFO - lr: 4.9623e-05 gnorm: 1.29 [ 1:41:01<23:10:06] +[titan] 2025-10-05 00:15:20,681 - root - INFO - step: 2715 loss: 2.9784 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3575 global_avg_mtp_loss: 2.6209 +[titan] 2025-10-05 00:15:20,681 - root - INFO - lr: 4.9621e-05 gnorm: 1.39 [ 1:41:12<23:09:51] +[titan] 2025-10-05 00:15:31,544 - root - INFO - step: 2720 loss: 3.0989 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3712 global_avg_mtp_loss: 2.7276 +[titan] 2025-10-05 00:15:31,544 - root - INFO - lr: 4.9620e-05 gnorm: 1.28 [ 1:41:23<23:09:35] +[titan] 2025-10-05 00:15:42,481 - root - INFO - step: 2725 loss: 3.0279 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3634 global_avg_mtp_loss: 2.6645 +[titan] 2025-10-05 00:15:42,482 - root - INFO - lr: 4.9618e-05 gnorm: 1.38 [ 1:41:34<23:09:21] +[titan] 2025-10-05 00:15:53,371 - root - INFO - step: 2730 loss: 3.0629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3682 global_avg_mtp_loss: 2.6946 +[titan] 2025-10-05 00:15:53,371 - root - INFO - lr: 4.9616e-05 gnorm: 1.27 [ 1:41:45<23:09:05] +[titan] 2025-10-05 00:16:04,250 - root - INFO - step: 2735 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6257 +[titan] 2025-10-05 00:16:04,250 - root - INFO - lr: 4.9615e-05 gnorm: 1.32 [ 1:41:55<23:08:50] +[titan] 2025-10-05 00:16:15,152 - root - INFO - step: 2740 loss: 3.0246 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6608 +[titan] 2025-10-05 00:16:15,152 - root - INFO - lr: 4.9613e-05 gnorm: 1.29 [ 1:42:06<23:08:35] +[titan] 2025-10-05 00:16:26,041 - root - INFO - step: 2745 loss: 3.1571 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7775 +[titan] 2025-10-05 00:16:26,041 - root - INFO - lr: 4.9611e-05 gnorm: 1.28 [ 1:42:17<23:08:20] +[titan] 2025-10-05 00:16:34,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:16:36,918 - root - INFO - step: 2750 loss: 3.0736 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3686 global_avg_mtp_loss: 2.7050 +[titan] 2025-10-05 00:16:36,919 - root - INFO - lr: 4.9610e-05 gnorm: 1.24 [ 1:42:28<23:08:05] +[titan] 2025-10-05 00:16:47,865 - root - INFO - step: 2755 loss: 2.9899 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6309 +[titan] 2025-10-05 00:16:47,865 - root - INFO - lr: 4.9608e-05 gnorm: 1.22 [ 1:42:39<23:07:50] +[titan] 2025-10-05 00:16:58,851 - root - INFO - step: 2760 loss: 3.0390 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.3657 global_avg_mtp_loss: 2.6733 +[titan] 2025-10-05 00:16:58,851 - root - INFO - lr: 4.9606e-05 gnorm: 1.33 [ 1:42:50<23:07:37] +[titan] 2025-10-05 00:17:09,727 - root - INFO - step: 2765 loss: 3.1133 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3740 global_avg_mtp_loss: 2.7394 +[titan] 2025-10-05 00:17:09,727 - root - INFO - lr: 4.9605e-05 gnorm: 1.30 [ 1:43:01<23:07:21] +[titan] 2025-10-05 00:17:20,607 - root - INFO - step: 2770 loss: 3.0638 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3695 global_avg_mtp_loss: 2.6943 +[titan] 2025-10-05 00:17:20,607 - root - INFO - lr: 4.9603e-05 gnorm: 1.35 [ 1:43:12<23:07:06] +[titan] 2025-10-05 00:17:31,517 - root - INFO - step: 2775 loss: 3.0938 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3716 global_avg_mtp_loss: 2.7222 +[titan] 2025-10-05 00:17:31,517 - root - INFO - lr: 4.9601e-05 gnorm: 1.26 [ 1:43:23<23:06:51] +[titan] 2025-10-05 00:17:42,399 - root - INFO - step: 2780 loss: 3.0126 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6519 +[titan] 2025-10-05 00:17:42,399 - root - INFO - lr: 4.9600e-05 gnorm: 1.30 [ 1:43:34<23:06:36] +[titan] 2025-10-05 00:17:53,331 - root - INFO - step: 2785 loss: 3.0873 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3714 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:17:53,331 - root - INFO - lr: 4.9598e-05 gnorm: 1.28 [ 1:43:44<23:06:22] +[titan] 2025-10-05 00:18:04,263 - root - INFO - step: 2790 loss: 3.0185 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.3627 global_avg_mtp_loss: 2.6559 +[titan] 2025-10-05 00:18:04,263 - root - INFO - lr: 4.9596e-05 gnorm: 1.33 [ 1:43:55<23:06:07] +[titan] 2025-10-05 00:18:15,157 - root - INFO - step: 2795 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3677 global_avg_mtp_loss: 2.6975 +[titan] 2025-10-05 00:18:15,157 - root - INFO - lr: 4.9595e-05 gnorm: 1.25 [ 1:44:06<23:05:52] +[titan] 2025-10-05 00:18:23,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:18:26,054 - root - INFO - step: 2800 loss: 3.0213 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6588 +[titan] 2025-10-05 00:18:26,054 - root - INFO - lr: 4.9593e-05 gnorm: 1.28 [ 1:44:17<23:05:38] +[titan] 2025-10-05 00:18:36,954 - root - INFO - step: 2805 loss: 3.1425 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3783 global_avg_mtp_loss: 2.7642 +[titan] 2025-10-05 00:18:36,954 - root - INFO - lr: 4.9591e-05 gnorm: 1.28 [ 1:44:28<23:05:23] +[titan] 2025-10-05 00:18:47,864 - root - INFO - step: 2810 loss: 3.0392 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3638 global_avg_mtp_loss: 2.6754 +[titan] 2025-10-05 00:18:47,864 - root - INFO - lr: 4.9590e-05 gnorm: 1.27 [ 1:44:39<23:05:08] +[titan] 2025-10-05 00:18:58,796 - root - INFO - step: 2815 loss: 3.0728 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3684 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:18:58,796 - root - INFO - lr: 4.9588e-05 gnorm: 1.28 [ 1:44:50<23:04:54] +[titan] 2025-10-05 00:19:09,768 - root - INFO - step: 2820 loss: 3.0759 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.3697 global_avg_mtp_loss: 2.7062 +[titan] 2025-10-05 00:19:09,768 - root - INFO - lr: 4.9586e-05 gnorm: 1.28 [ 1:45:01<23:04:40] +[titan] 2025-10-05 00:19:20,659 - root - INFO - step: 2825 loss: 3.0518 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3667 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:19:20,659 - root - INFO - lr: 4.9585e-05 gnorm: 1.38 [ 1:45:12<23:04:25] +[titan] 2025-10-05 00:19:31,538 - root - INFO - step: 2830 loss: 3.1035 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7302 +[titan] 2025-10-05 00:19:31,538 - root - INFO - lr: 4.9583e-05 gnorm: 1.34 [ 1:45:23<23:04:10] +[titan] 2025-10-05 00:19:42,419 - root - INFO - step: 2835 loss: 3.0685 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3683 global_avg_mtp_loss: 2.7002 +[titan] 2025-10-05 00:19:42,419 - root - INFO - lr: 4.9581e-05 gnorm: 1.37 [ 1:45:34<23:03:55] +[titan] 2025-10-05 00:19:53,306 - root - INFO - step: 2840 loss: 3.0223 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3652 global_avg_mtp_loss: 2.6571 +[titan] 2025-10-05 00:19:53,306 - root - INFO - lr: 4.9579e-05 gnorm: 1.32 [ 1:45:44<23:03:40] +[titan] 2025-10-05 00:20:04,219 - root - INFO - step: 2845 loss: 3.0274 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3633 global_avg_mtp_loss: 2.6641 +[titan] 2025-10-05 00:20:04,220 - root - INFO - lr: 4.9578e-05 gnorm: 1.28 [ 1:45:55<23:03:25] +[titan] 2025-10-05 00:20:12,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:20:15,145 - root - INFO - step: 2850 loss: 3.0430 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3661 global_avg_mtp_loss: 2.6770 +[titan] 2025-10-05 00:20:15,145 - root - INFO - lr: 4.9576e-05 gnorm: 1.26 [ 1:46:06<23:03:11] +[titan] 2025-10-05 00:20:26,027 - root - INFO - step: 2855 loss: 3.0893 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7209 +[titan] 2025-10-05 00:20:26,027 - root - INFO - lr: 4.9574e-05 gnorm: 1.27 [ 1:46:17<23:02:56] +[titan] 2025-10-05 00:20:36,904 - root - INFO - step: 2860 loss: 3.0960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3724 global_avg_mtp_loss: 2.7236 +[titan] 2025-10-05 00:20:36,904 - root - INFO - lr: 4.9573e-05 gnorm: 1.28 [ 1:46:28<23:02:41] +[titan] 2025-10-05 00:20:47,806 - root - INFO - step: 2865 loss: 3.1434 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:20:47,806 - root - INFO - lr: 4.9571e-05 gnorm: 1.30 [ 1:46:39<23:02:27] +[titan] 2025-10-05 00:20:58,761 - root - INFO - step: 2870 loss: 2.9969 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:20:58,761 - root - INFO - lr: 4.9569e-05 gnorm: 1.30 [ 1:46:50<23:02:13] +[titan] 2025-10-05 00:21:09,643 - root - INFO - step: 2875 loss: 3.0232 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3635 global_avg_mtp_loss: 2.6597 +[titan] 2025-10-05 00:21:09,643 - root - INFO - lr: 4.9567e-05 gnorm: 1.30 [ 1:47:01<23:01:58] +[titan] 2025-10-05 00:21:20,548 - root - INFO - step: 2880 loss: 2.9737 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3570 global_avg_mtp_loss: 2.6167 +[titan] 2025-10-05 00:21:20,548 - root - INFO - lr: 4.9566e-05 gnorm: 1.28 [ 1:47:12<23:01:43] +[titan] 2025-10-05 00:21:31,529 - root - INFO - step: 2885 loss: 3.0875 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.3720 global_avg_mtp_loss: 2.7155 +[titan] 2025-10-05 00:21:31,530 - root - INFO - lr: 4.9564e-05 gnorm: 1.25 [ 1:47:23<23:01:30] +[titan] 2025-10-05 00:21:42,407 - root - INFO - step: 2890 loss: 3.0347 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6705 +[titan] 2025-10-05 00:21:42,407 - root - INFO - lr: 4.9562e-05 gnorm: 1.38 [ 1:47:34<23:01:15] +[titan] 2025-10-05 00:21:53,280 - root - INFO - step: 2895 loss: 3.0145 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3610 global_avg_mtp_loss: 2.6535 +[titan] 2025-10-05 00:21:53,280 - root - INFO - lr: 4.9560e-05 gnorm: 1.22 [ 1:47:44<23:01:00] +[titan] 2025-10-05 00:22:02,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:22:04,199 - root - INFO - step: 2900 loss: 3.1605 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-05 00:22:04,199 - root - INFO - lr: 4.9559e-05 gnorm: 1.35 [ 1:47:55<23:00:46] +[titan] 2025-10-05 00:22:15,084 - root - INFO - step: 2905 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3703 global_avg_mtp_loss: 2.7158 +[titan] 2025-10-05 00:22:15,084 - root - INFO - lr: 4.9557e-05 gnorm: 1.29 [ 1:48:06<23:00:31] +[titan] 2025-10-05 00:22:25,962 - root - INFO - step: 2910 loss: 3.0022 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6423 +[titan] 2025-10-05 00:22:25,962 - root - INFO - lr: 4.9555e-05 gnorm: 1.31 [ 1:48:17<23:00:16] +[titan] 2025-10-05 00:22:36,871 - root - INFO - step: 2915 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3797 global_avg_mtp_loss: 2.7783 +[titan] 2025-10-05 00:22:36,871 - root - INFO - lr: 4.9553e-05 gnorm: 1.42 [ 1:48:28<23:00:02] +[titan] 2025-10-05 00:22:47,815 - root - INFO - step: 2920 loss: 3.0326 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3653 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:22:47,815 - root - INFO - lr: 4.9552e-05 gnorm: 1.30 [ 1:48:39<22:59:48] +[titan] 2025-10-05 00:22:58,703 - root - INFO - step: 2925 loss: 3.0724 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3681 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:22:58,703 - root - INFO - lr: 4.9550e-05 gnorm: 1.34 [ 1:48:50<22:59:33] +[titan] 2025-10-05 00:23:09,632 - root - INFO - step: 2930 loss: 3.0482 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6818 +[titan] 2025-10-05 00:23:09,633 - root - INFO - lr: 4.9548e-05 gnorm: 1.23 [ 1:49:01<22:59:19] +[titan] 2025-10-05 00:23:20,517 - root - INFO - step: 2935 loss: 2.9200 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5708 +[titan] 2025-10-05 00:23:20,517 - root - INFO - lr: 4.9546e-05 gnorm: 1.28 [ 1:49:12<22:59:04] +[titan] 2025-10-05 00:23:31,391 - root - INFO - step: 2940 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6615 +[titan] 2025-10-05 00:23:31,391 - root - INFO - lr: 4.9544e-05 gnorm: 1.25 [ 1:49:23<22:58:49] +[titan] 2025-10-05 00:23:42,322 - root - INFO - step: 2945 loss: 3.1473 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7704 +[titan] 2025-10-05 00:23:42,322 - root - INFO - lr: 4.9543e-05 gnorm: 1.35 [ 1:49:33<22:58:35] +[titan] 2025-10-05 00:23:51,004 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:23:53,182 - root - INFO - step: 2950 loss: 3.0250 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6620 +[titan] 2025-10-05 00:23:53,183 - root - INFO - lr: 4.9541e-05 gnorm: 1.26 [ 1:49:44<22:58:20] +[titan] 2025-10-05 00:24:04,100 - root - INFO - step: 2955 loss: 2.9887 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3579 global_avg_mtp_loss: 2.6308 +[titan] 2025-10-05 00:24:04,100 - root - INFO - lr: 4.9539e-05 gnorm: 1.32 [ 1:49:55<22:58:06] +[titan] 2025-10-05 00:24:14,957 - root - INFO - step: 2960 loss: 2.9752 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6188 +[titan] 2025-10-05 00:24:14,957 - root - INFO - lr: 4.9537e-05 gnorm: 1.29 [ 1:50:06<22:57:51] +[titan] 2025-10-05 00:24:25,824 - root - INFO - step: 2965 loss: 3.0670 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3670 global_avg_mtp_loss: 2.7000 +[titan] 2025-10-05 00:24:25,824 - root - INFO - lr: 4.9535e-05 gnorm: 1.36 [ 1:50:17<22:57:36] +[titan] 2025-10-05 00:24:36,677 - root - INFO - step: 2970 loss: 3.0105 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3605 global_avg_mtp_loss: 2.6500 +[titan] 2025-10-05 00:24:36,677 - root - INFO - lr: 4.9534e-05 gnorm: 1.28 [ 1:50:28<22:57:21] +[titan] 2025-10-05 00:24:47,550 - root - INFO - step: 2975 loss: 3.0798 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7113 +[titan] 2025-10-05 00:24:47,550 - root - INFO - lr: 4.9532e-05 gnorm: 1.26 [ 1:50:39<22:57:07] +[titan] 2025-10-05 00:24:58,508 - root - INFO - step: 2980 loss: 3.0933 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7212 +[titan] 2025-10-05 00:24:58,508 - root - INFO - lr: 4.9530e-05 gnorm: 1.34 [ 1:50:50<22:56:53] +[titan] 2025-10-05 00:25:09,436 - root - INFO - step: 2985 loss: 2.9918 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6318 +[titan] 2025-10-05 00:25:09,436 - root - INFO - lr: 4.9528e-05 gnorm: 1.29 [ 1:51:01<22:56:39] +[titan] 2025-10-05 00:25:20,336 - root - INFO - step: 2990 loss: 3.0864 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3699 global_avg_mtp_loss: 2.7165 +[titan] 2025-10-05 00:25:20,336 - root - INFO - lr: 4.9526e-05 gnorm: 1.30 [ 1:51:11<22:56:25] +[titan] 2025-10-05 00:25:31,210 - root - INFO - step: 2995 loss: 3.0152 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3613 global_avg_mtp_loss: 2.6538 +[titan] 2025-10-05 00:25:31,210 - root - INFO - lr: 4.9525e-05 gnorm: 1.34 [ 1:51:22<22:56:10] +[titan] 2025-10-05 00:25:39,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:25:42,077 - root - INFO - step: 3000 loss: 2.9639 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6087 +[titan] 2025-10-05 00:25:42,077 - root - INFO - lr: 4.9523e-05 gnorm: 1.20 [ 1:51:33<22:55:55] +[titan] 2025-10-05 00:25:52,956 - root - INFO - step: 3005 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6274 +[titan] 2025-10-05 00:25:52,956 - root - INFO - lr: 4.9521e-05 gnorm: 1.25 [ 1:51:44<22:55:41] +[titan] 2025-10-05 00:26:03,943 - root - INFO - step: 3010 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:26:03,943 - root - INFO - lr: 4.9519e-05 gnorm: 1.25 [ 1:51:55<22:55:27] +[titan] 2025-10-05 00:26:14,799 - root - INFO - step: 3015 loss: 2.9622 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6081 +[titan] 2025-10-05 00:26:14,799 - root - INFO - lr: 4.9517e-05 gnorm: 1.20 [ 1:52:06<22:55:13] +[titan] 2025-10-05 00:26:25,658 - root - INFO - step: 3020 loss: 3.1014 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7293 +[titan] 2025-10-05 00:26:25,658 - root - INFO - lr: 4.9515e-05 gnorm: 1.29 [ 1:52:17<22:54:58] +[titan] 2025-10-05 00:26:36,501 - root - INFO - step: 3025 loss: 3.0035 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.3588 global_avg_mtp_loss: 2.6447 +[titan] 2025-10-05 00:26:36,501 - root - INFO - lr: 4.9514e-05 gnorm: 1.22 [ 1:52:28<22:54:43] +[titan] 2025-10-05 00:26:47,370 - root - INFO - step: 3030 loss: 2.9868 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3589 global_avg_mtp_loss: 2.6279 +[titan] 2025-10-05 00:26:47,370 - root - INFO - lr: 4.9512e-05 gnorm: 1.28 [ 1:52:38<22:54:28] +[titan] 2025-10-05 00:26:58,255 - root - INFO - step: 3035 loss: 3.0690 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.7021 +[titan] 2025-10-05 00:26:58,255 - root - INFO - lr: 4.9510e-05 gnorm: 1.29 [ 1:52:49<22:54:14] +[titan] 2025-10-05 00:27:09,176 - root - INFO - step: 3040 loss: 2.9415 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5893 +[titan] 2025-10-05 00:27:09,176 - root - INFO - lr: 4.9508e-05 gnorm: 1.23 [ 1:53:00<22:54:00] +[titan] 2025-10-05 00:27:20,081 - root - INFO - step: 3045 loss: 2.9565 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.6029 +[titan] 2025-10-05 00:27:20,081 - root - INFO - lr: 4.9506e-05 gnorm: 1.31 [ 1:53:11<22:53:46] +[titan] 2025-10-05 00:27:28,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:27:30,926 - root - INFO - step: 3050 loss: 3.0382 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.6713 +[titan] 2025-10-05 00:27:30,926 - root - INFO - lr: 4.9504e-05 gnorm: 1.32 [ 1:53:22<22:53:31] +[titan] 2025-10-05 00:27:41,788 - root - INFO - step: 3055 loss: 2.9038 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5556 +[titan] 2025-10-05 00:27:41,788 - root - INFO - lr: 4.9502e-05 gnorm: 1.27 [ 1:53:33<22:53:16] +[titan] 2025-10-05 00:27:52,674 - root - INFO - step: 3060 loss: 3.0259 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3639 global_avg_mtp_loss: 2.6619 +[titan] 2025-10-05 00:27:52,674 - root - INFO - lr: 4.9501e-05 gnorm: 1.32 [ 1:53:44<22:53:02] +[titan] 2025-10-05 00:28:03,564 - root - INFO - step: 3065 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6732 +[titan] 2025-10-05 00:28:03,564 - root - INFO - lr: 4.9499e-05 gnorm: 1.39 [ 1:53:55<22:52:47] +[titan] 2025-10-05 00:28:14,505 - root - INFO - step: 3070 loss: 2.9931 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.3595 global_avg_mtp_loss: 2.6336 +[titan] 2025-10-05 00:28:14,505 - root - INFO - lr: 4.9497e-05 gnorm: 1.46 [ 1:54:06<22:52:34] +[titan] 2025-10-05 00:28:19,014 - root - INFO - Dumping profiler traces at step 3072 +[titan] 2025-10-05 00:28:19,050 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:28:25,635 - root - INFO - step: 3075 loss: 2.9714 memory: 118.84GiB(85.28%) tps: 29,442 tflops: 408.46 mfu: 41.30% global_avg_ntp_loss: 0.3583 global_avg_mtp_loss: 2.6131 +[titan] 2025-10-05 00:28:25,635 - root - INFO - lr: 4.9495e-05 gnorm: 1.38 [ 1:54:17<22:52:22] +[titan] 2025-10-05 00:28:36,484 - root - INFO - step: 3080 loss: 3.0383 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3644 global_avg_mtp_loss: 2.6739 +[titan] 2025-10-05 00:28:36,484 - root - INFO - lr: 4.9493e-05 gnorm: 1.27 [ 1:54:28<22:52:08] +[titan] 2025-10-05 00:28:47,350 - root - INFO - step: 3085 loss: 3.0016 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:28:47,350 - root - INFO - lr: 4.9491e-05 gnorm: 1.28 [ 1:54:38<22:51:53] +[titan] 2025-10-05 00:28:58,198 - root - INFO - step: 3090 loss: 2.8733 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 00:28:58,198 - root - INFO - lr: 4.9489e-05 gnorm: 1.28 [ 1:54:49<22:51:38] +[titan] 2025-10-05 00:29:09,096 - root - INFO - step: 3095 loss: 3.0415 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6767 +[titan] 2025-10-05 00:29:09,096 - root - INFO - lr: 4.9487e-05 gnorm: 1.33 [ 1:55:00<22:51:24] +[titan] 2025-10-05 00:29:17,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:29:19,960 - root - INFO - step: 3100 loss: 2.9482 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.5947 +[titan] 2025-10-05 00:29:19,960 - root - INFO - lr: 4.9485e-05 gnorm: 1.33 [ 1:55:11<22:51:10] +[titan] 2025-10-05 00:29:30,867 - root - INFO - step: 3105 loss: 2.9859 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6283 +[titan] 2025-10-05 00:29:30,868 - root - INFO - lr: 4.9484e-05 gnorm: 1.27 [ 1:55:22<22:50:56] +[titan] 2025-10-05 00:29:41,783 - root - INFO - step: 3110 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6306 +[titan] 2025-10-05 00:29:41,784 - root - INFO - lr: 4.9482e-05 gnorm: 1.30 [ 1:55:33<22:50:42] +[titan] 2025-10-05 00:29:52,657 - root - INFO - step: 3115 loss: 2.9941 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6352 +[titan] 2025-10-05 00:29:52,657 - root - INFO - lr: 4.9480e-05 gnorm: 1.24 [ 1:55:44<22:50:27] +[titan] 2025-10-05 00:30:03,529 - root - INFO - step: 3120 loss: 3.0041 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3611 global_avg_mtp_loss: 2.6431 +[titan] 2025-10-05 00:30:03,529 - root - INFO - lr: 4.9478e-05 gnorm: 1.22 [ 1:55:55<22:50:13] +[titan] 2025-10-05 00:30:14,438 - root - INFO - step: 3125 loss: 2.9712 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6162 +[titan] 2025-10-05 00:30:14,438 - root - INFO - lr: 4.9476e-05 gnorm: 1.28 [ 1:56:06<22:49:59] +[titan] 2025-10-05 00:30:25,289 - root - INFO - step: 3130 loss: 2.9425 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:30:25,289 - root - INFO - lr: 4.9474e-05 gnorm: 1.28 [ 1:56:16<22:49:44] +[titan] 2025-10-05 00:30:36,160 - root - INFO - step: 3135 loss: 3.0775 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3696 global_avg_mtp_loss: 2.7079 +[titan] 2025-10-05 00:30:36,160 - root - INFO - lr: 4.9472e-05 gnorm: 1.26 [ 1:56:27<22:49:30] +[titan] 2025-10-05 00:30:47,054 - root - INFO - step: 3140 loss: 3.0122 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6492 +[titan] 2025-10-05 00:30:47,054 - root - INFO - lr: 4.9470e-05 gnorm: 1.22 [ 1:56:38<22:49:16] +[titan] 2025-10-05 00:30:57,914 - root - INFO - step: 3145 loss: 3.0169 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6560 +[titan] 2025-10-05 00:30:57,914 - root - INFO - lr: 4.9468e-05 gnorm: 1.27 [ 1:56:49<22:49:01] +[titan] 2025-10-05 00:31:06,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:31:08,816 - root - INFO - step: 3150 loss: 2.9327 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3545 global_avg_mtp_loss: 2.5782 +[titan] 2025-10-05 00:31:08,816 - root - INFO - lr: 4.9466e-05 gnorm: 1.26 [ 1:57:00<22:48:47] +[titan] 2025-10-05 00:31:19,715 - root - INFO - step: 3155 loss: 3.0434 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3631 global_avg_mtp_loss: 2.6803 +[titan] 2025-10-05 00:31:19,715 - root - INFO - lr: 4.9464e-05 gnorm: 1.33 [ 1:57:11<22:48:33] +[titan] 2025-10-05 00:31:30,598 - root - INFO - step: 3160 loss: 2.9152 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5676 +[titan] 2025-10-05 00:31:30,598 - root - INFO - lr: 4.9462e-05 gnorm: 1.28 [ 1:57:22<22:48:19] +[titan] 2025-10-05 00:31:41,469 - root - INFO - step: 3165 loss: 3.0228 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6602 +[titan] 2025-10-05 00:31:41,469 - root - INFO - lr: 4.9460e-05 gnorm: 1.32 [ 1:57:33<22:48:05] +[titan] 2025-10-05 00:31:52,401 - root - INFO - step: 3170 loss: 2.9954 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:31:52,401 - root - INFO - lr: 4.9459e-05 gnorm: 1.39 [ 1:57:44<22:47:51] +[titan] 2025-10-05 00:32:03,274 - root - INFO - step: 3175 loss: 2.9805 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.6231 +[titan] 2025-10-05 00:32:03,274 - root - INFO - lr: 4.9457e-05 gnorm: 1.26 [ 1:57:54<22:47:37] +[titan] 2025-10-05 00:32:14,178 - root - INFO - step: 3180 loss: 3.0141 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.3598 global_avg_mtp_loss: 2.6543 +[titan] 2025-10-05 00:32:14,178 - root - INFO - lr: 4.9455e-05 gnorm: 1.31 [ 1:58:05<22:47:23] +[titan] 2025-10-05 00:32:25,055 - root - INFO - step: 3185 loss: 3.0493 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3641 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:32:25,055 - root - INFO - lr: 4.9453e-05 gnorm: 1.37 [ 1:58:16<22:47:09] +[titan] 2025-10-05 00:32:35,936 - root - INFO - step: 3190 loss: 2.9654 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6093 +[titan] 2025-10-05 00:32:35,936 - root - INFO - lr: 4.9451e-05 gnorm: 1.29 [ 1:58:27<22:46:55] +[titan] 2025-10-05 00:32:46,815 - root - INFO - step: 3195 loss: 2.9889 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3566 global_avg_mtp_loss: 2.6323 +[titan] 2025-10-05 00:32:46,815 - root - INFO - lr: 4.9449e-05 gnorm: 1.28 [ 1:58:38<22:46:41] +[titan] 2025-10-05 00:32:55,521 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:32:57,705 - root - INFO - step: 3200 loss: 2.9502 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.5953 +[titan] 2025-10-05 00:32:57,705 - root - INFO - lr: 4.9447e-05 gnorm: 1.30 [ 1:58:49<22:46:27] +[titan] 2025-10-05 00:33:08,681 - root - INFO - step: 3205 loss: 2.9709 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.22 mfu: 41.88% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6155 +[titan] 2025-10-05 00:33:08,681 - root - INFO - lr: 4.9445e-05 gnorm: 1.23 [ 1:59:00<22:46:14] +[titan] 2025-10-05 00:33:19,557 - root - INFO - step: 3210 loss: 2.9185 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5693 +[titan] 2025-10-05 00:33:19,558 - root - INFO - lr: 4.9443e-05 gnorm: 1.28 [ 1:59:11<22:45:59] +[titan] 2025-10-05 00:33:30,432 - root - INFO - step: 3215 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3514 global_avg_mtp_loss: 2.5956 +[titan] 2025-10-05 00:33:30,432 - root - INFO - lr: 4.9441e-05 gnorm: 1.39 [ 1:59:22<22:45:45] +[titan] 2025-10-05 00:33:41,300 - root - INFO - step: 3220 loss: 3.0300 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3628 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:33:41,300 - root - INFO - lr: 4.9439e-05 gnorm: 1.32 [ 1:59:32<22:45:31] +[titan] 2025-10-05 00:33:52,166 - root - INFO - step: 3225 loss: 3.0123 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6515 +[titan] 2025-10-05 00:33:52,166 - root - INFO - lr: 4.9437e-05 gnorm: 1.29 [ 1:59:43<22:45:17] +[titan] 2025-10-05 00:34:03,015 - root - INFO - step: 3230 loss: 3.0282 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3622 global_avg_mtp_loss: 2.6660 +[titan] 2025-10-05 00:34:03,015 - root - INFO - lr: 4.9435e-05 gnorm: 1.29 [ 1:59:54<22:45:02] +[titan] 2025-10-05 00:34:13,972 - root - INFO - step: 3235 loss: 3.0440 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3646 global_avg_mtp_loss: 2.6794 +[titan] 2025-10-05 00:34:13,973 - root - INFO - lr: 4.9433e-05 gnorm: 1.27 [ 2:00:05<22:44:49] +[titan] 2025-10-05 00:34:24,817 - root - INFO - step: 3240 loss: 2.9616 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:34:24,817 - root - INFO - lr: 4.9431e-05 gnorm: 1.21 [ 2:00:16<22:44:35] +[titan] 2025-10-05 00:34:35,664 - root - INFO - step: 3245 loss: 3.0402 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6760 +[titan] 2025-10-05 00:34:35,664 - root - INFO - lr: 4.9429e-05 gnorm: 1.23 [ 2:00:27<22:44:20] +[titan] 2025-10-05 00:34:44,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:34:46,540 - root - INFO - step: 3250 loss: 3.0298 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3617 global_avg_mtp_loss: 2.6681 +[titan] 2025-10-05 00:34:46,540 - root - INFO - lr: 4.9427e-05 gnorm: 1.26 [ 2:00:38<22:44:06] +[titan] 2025-10-05 00:34:57,421 - root - INFO - step: 3255 loss: 2.9633 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6078 +[titan] 2025-10-05 00:34:57,421 - root - INFO - lr: 4.9425e-05 gnorm: 1.31 [ 2:00:49<22:43:52] +[titan] 2025-10-05 00:35:08,296 - root - INFO - step: 3260 loss: 2.9911 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6340 +[titan] 2025-10-05 00:35:08,296 - root - INFO - lr: 4.9423e-05 gnorm: 1.27 [ 2:00:59<22:43:38] +[titan] 2025-10-05 00:35:19,241 - root - INFO - step: 3265 loss: 2.9592 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6039 +[titan] 2025-10-05 00:35:19,242 - root - INFO - lr: 4.9421e-05 gnorm: 1.30 [ 2:01:10<22:43:25] +[titan] 2025-10-05 00:35:30,115 - root - INFO - step: 3270 loss: 2.9685 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6125 +[titan] 2025-10-05 00:35:30,115 - root - INFO - lr: 4.9419e-05 gnorm: 1.33 [ 2:01:21<22:43:11] +[titan] 2025-10-05 00:35:40,981 - root - INFO - step: 3275 loss: 3.0649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3675 global_avg_mtp_loss: 2.6974 +[titan] 2025-10-05 00:35:40,981 - root - INFO - lr: 4.9417e-05 gnorm: 1.36 [ 2:01:32<22:42:57] +[titan] 2025-10-05 00:35:51,879 - root - INFO - step: 3280 loss: 2.9994 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6401 +[titan] 2025-10-05 00:35:51,879 - root - INFO - lr: 4.9415e-05 gnorm: 1.31 [ 2:01:43<22:42:43] +[titan] 2025-10-05 00:36:02,779 - root - INFO - step: 3285 loss: 2.9516 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3524 global_avg_mtp_loss: 2.5992 +[titan] 2025-10-05 00:36:02,779 - root - INFO - lr: 4.9413e-05 gnorm: 1.24 [ 2:01:54<22:42:29] +[titan] 2025-10-05 00:36:13,718 - root - INFO - step: 3290 loss: 3.0135 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6533 +[titan] 2025-10-05 00:36:13,718 - root - INFO - lr: 4.9411e-05 gnorm: 1.32 [ 2:02:05<22:42:16] +[titan] 2025-10-05 00:36:24,612 - root - INFO - step: 3295 loss: 2.9374 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3515 global_avg_mtp_loss: 2.5859 +[titan] 2025-10-05 00:36:24,613 - root - INFO - lr: 4.9409e-05 gnorm: 1.30 [ 2:02:16<22:42:02] +[titan] 2025-10-05 00:36:33,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:36:35,569 - root - INFO - step: 3300 loss: 3.0216 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6591 +[titan] 2025-10-05 00:36:35,570 - root - INFO - lr: 4.9407e-05 gnorm: 1.29 [ 2:02:27<22:41:49] +[titan] 2025-10-05 00:36:46,479 - root - INFO - step: 3305 loss: 2.9748 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6193 +[titan] 2025-10-05 00:36:46,480 - root - INFO - lr: 4.9405e-05 gnorm: 1.29 [ 2:02:38<22:41:35] +[titan] 2025-10-05 00:36:57,349 - root - INFO - step: 3310 loss: 2.9636 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3540 global_avg_mtp_loss: 2.6095 +[titan] 2025-10-05 00:36:57,350 - root - INFO - lr: 4.9403e-05 gnorm: 1.18 [ 2:02:48<22:41:21] +[titan] 2025-10-05 00:37:08,233 - root - INFO - step: 3315 loss: 2.9774 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6224 +[titan] 2025-10-05 00:37:08,233 - root - INFO - lr: 4.9401e-05 gnorm: 1.24 [ 2:02:59<22:41:07] +[titan] 2025-10-05 00:37:19,133 - root - INFO - step: 3320 loss: 2.9377 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:37:19,133 - root - INFO - lr: 4.9399e-05 gnorm: 1.24 [ 2:03:10<22:40:54] +[titan] 2025-10-05 00:37:29,998 - root - INFO - step: 3325 loss: 2.8934 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:37:29,999 - root - INFO - lr: 4.9397e-05 gnorm: 1.31 [ 2:03:21<22:40:40] +[titan] 2025-10-05 00:37:40,921 - root - INFO - step: 3330 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3596 global_avg_mtp_loss: 2.6407 +[titan] 2025-10-05 00:37:40,922 - root - INFO - lr: 4.9395e-05 gnorm: 1.29 [ 2:03:32<22:40:26] +[titan] 2025-10-05 00:37:51,784 - root - INFO - step: 3335 loss: 2.9450 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5927 +[titan] 2025-10-05 00:37:51,784 - root - INFO - lr: 4.9392e-05 gnorm: 1.29 [ 2:03:43<22:40:12] +[titan] 2025-10-05 00:38:02,640 - root - INFO - step: 3340 loss: 2.9243 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3497 global_avg_mtp_loss: 2.5746 +[titan] 2025-10-05 00:38:02,640 - root - INFO - lr: 4.9390e-05 gnorm: 1.24 [ 2:03:54<22:39:58] +[titan] 2025-10-05 00:38:13,559 - root - INFO - step: 3345 loss: 2.9258 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5767 +[titan] 2025-10-05 00:38:13,559 - root - INFO - lr: 4.9388e-05 gnorm: 1.32 [ 2:04:05<22:39:45] +[titan] 2025-10-05 00:38:22,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:38:24,447 - root - INFO - step: 3350 loss: 2.9893 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3573 global_avg_mtp_loss: 2.6320 +[titan] 2025-10-05 00:38:24,447 - root - INFO - lr: 4.9386e-05 gnorm: 1.23 [ 2:04:16<22:39:31] +[titan] 2025-10-05 00:38:35,319 - root - INFO - step: 3355 loss: 2.8550 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3405 global_avg_mtp_loss: 2.5146 +[titan] 2025-10-05 00:38:35,319 - root - INFO - lr: 4.9384e-05 gnorm: 1.25 [ 2:04:26<22:39:17] +[titan] 2025-10-05 00:38:46,199 - root - INFO - step: 3360 loss: 2.8891 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:38:46,199 - root - INFO - lr: 4.9382e-05 gnorm: 1.31 [ 2:04:37<22:39:03] +[titan] 2025-10-05 00:38:57,161 - root - INFO - step: 3365 loss: 2.9521 memory: 118.84GiB(85.28%) tps: 29,893 tflops: 414.71 mfu: 41.93% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.5991 +[titan] 2025-10-05 00:38:57,161 - root - INFO - lr: 4.9380e-05 gnorm: 1.25 [ 2:04:48<22:38:50] +[titan] 2025-10-05 00:39:08,046 - root - INFO - step: 3370 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.5919 +[titan] 2025-10-05 00:39:08,046 - root - INFO - lr: 4.9378e-05 gnorm: 1.32 [ 2:04:59<22:38:36] +[titan] 2025-10-05 00:39:18,937 - root - INFO - step: 3375 loss: 2.9184 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5694 +[titan] 2025-10-05 00:39:18,937 - root - INFO - lr: 4.9376e-05 gnorm: 1.25 [ 2:05:10<22:38:23] +[titan] 2025-10-05 00:39:29,827 - root - INFO - step: 3380 loss: 2.9621 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6080 +[titan] 2025-10-05 00:39:29,827 - root - INFO - lr: 4.9374e-05 gnorm: 1.24 [ 2:05:21<22:38:09] +[titan] 2025-10-05 00:39:40,719 - root - INFO - step: 3385 loss: 2.9011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3465 global_avg_mtp_loss: 2.5547 +[titan] 2025-10-05 00:39:40,719 - root - INFO - lr: 4.9372e-05 gnorm: 1.22 [ 2:05:32<22:37:55] +[titan] 2025-10-05 00:39:51,594 - root - INFO - step: 3390 loss: 2.9910 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6324 +[titan] 2025-10-05 00:39:51,595 - root - INFO - lr: 4.9370e-05 gnorm: 1.24 [ 2:05:43<22:37:41] +[titan] 2025-10-05 00:40:02,576 - root - INFO - step: 3395 loss: 2.9436 memory: 118.84GiB(85.28%) tps: 29,839 tflops: 413.97 mfu: 41.86% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5907 +[titan] 2025-10-05 00:40:02,577 - root - INFO - lr: 4.9367e-05 gnorm: 1.26 [ 2:05:54<22:37:29] +[titan] 2025-10-05 00:40:11,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:40:13,489 - root - INFO - step: 3400 loss: 2.9838 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3569 global_avg_mtp_loss: 2.6269 +[titan] 2025-10-05 00:40:13,489 - root - INFO - lr: 4.9365e-05 gnorm: 1.27 [ 2:06:05<22:37:15] +[titan] 2025-10-05 00:40:24,371 - root - INFO - step: 3405 loss: 3.0515 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3666 global_avg_mtp_loss: 2.6849 +[titan] 2025-10-05 00:40:24,371 - root - INFO - lr: 4.9363e-05 gnorm: 1.23 [ 2:06:15<22:37:02] +[titan] 2025-10-05 00:40:35,244 - root - INFO - step: 3410 loss: 2.9631 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3556 global_avg_mtp_loss: 2.6074 +[titan] 2025-10-05 00:40:35,244 - root - INFO - lr: 4.9361e-05 gnorm: 1.28 [ 2:06:26<22:36:48] +[titan] 2025-10-05 00:40:46,133 - root - INFO - step: 3415 loss: 2.9578 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3547 global_avg_mtp_loss: 2.6032 +[titan] 2025-10-05 00:40:46,134 - root - INFO - lr: 4.9359e-05 gnorm: 1.23 [ 2:06:37<22:36:34] +[titan] 2025-10-05 00:40:57,009 - root - INFO - step: 3420 loss: 2.9329 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3498 global_avg_mtp_loss: 2.5832 +[titan] 2025-10-05 00:40:57,009 - root - INFO - lr: 4.9357e-05 gnorm: 1.19 [ 2:06:48<22:36:20] +[titan] 2025-10-05 00:41:07,937 - root - INFO - step: 3425 loss: 2.9564 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.6041 +[titan] 2025-10-05 00:41:07,937 - root - INFO - lr: 4.9355e-05 gnorm: 1.27 [ 2:06:59<22:36:07] +[titan] 2025-10-05 00:41:18,921 - root - INFO - step: 3430 loss: 2.9729 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6166 +[titan] 2025-10-05 00:41:18,921 - root - INFO - lr: 4.9353e-05 gnorm: 1.26 [ 2:07:10<22:35:54] +[titan] 2025-10-05 00:41:29,788 - root - INFO - step: 3435 loss: 2.9570 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3535 global_avg_mtp_loss: 2.6035 +[titan] 2025-10-05 00:41:29,789 - root - INFO - lr: 4.9351e-05 gnorm: 1.30 [ 2:07:21<22:35:41] +[titan] 2025-10-05 00:41:40,636 - root - INFO - step: 3440 loss: 2.9121 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.3473 global_avg_mtp_loss: 2.5649 +[titan] 2025-10-05 00:41:40,637 - root - INFO - lr: 4.9348e-05 gnorm: 1.25 [ 2:07:32<22:35:27] +[titan] 2025-10-05 00:41:51,497 - root - INFO - step: 3445 loss: 2.9720 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3548 global_avg_mtp_loss: 2.6172 +[titan] 2025-10-05 00:41:51,498 - root - INFO - lr: 4.9346e-05 gnorm: 1.24 [ 2:07:43<22:35:13] +[titan] 2025-10-05 00:42:00,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:42:02,373 - root - INFO - step: 3450 loss: 3.0025 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:42:02,373 - root - INFO - lr: 4.9344e-05 gnorm: 1.40 [ 2:07:53<22:34:59] +[titan] 2025-10-05 00:42:13,236 - root - INFO - step: 3455 loss: 2.8984 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5533 +[titan] 2025-10-05 00:42:13,236 - root - INFO - lr: 4.9342e-05 gnorm: 1.33 [ 2:08:04<22:34:45] +[titan] 2025-10-05 00:42:24,195 - root - INFO - step: 3460 loss: 2.8961 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.3468 global_avg_mtp_loss: 2.5493 +[titan] 2025-10-05 00:42:24,195 - root - INFO - lr: 4.9340e-05 gnorm: 1.30 [ 2:08:15<22:34:32] +[titan] 2025-10-05 00:42:35,085 - root - INFO - step: 3465 loss: 3.0085 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3591 global_avg_mtp_loss: 2.6494 +[titan] 2025-10-05 00:42:35,085 - root - INFO - lr: 4.9338e-05 gnorm: 1.28 [ 2:08:26<22:34:19] +[titan] 2025-10-05 00:42:45,952 - root - INFO - step: 3470 loss: 2.9361 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5855 +[titan] 2025-10-05 00:42:45,952 - root - INFO - lr: 4.9336e-05 gnorm: 1.26 [ 2:08:37<22:34:05] +[titan] 2025-10-05 00:42:56,840 - root - INFO - step: 3475 loss: 2.9223 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3496 global_avg_mtp_loss: 2.5727 +[titan] 2025-10-05 00:42:56,841 - root - INFO - lr: 4.9333e-05 gnorm: 1.25 [ 2:08:48<22:33:51] +[titan] 2025-10-05 00:43:07,696 - root - INFO - step: 3480 loss: 2.9007 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5539 +[titan] 2025-10-05 00:43:07,696 - root - INFO - lr: 4.9331e-05 gnorm: 1.30 [ 2:08:59<22:33:37] +[titan] 2025-10-05 00:43:18,563 - root - INFO - step: 3485 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5412 +[titan] 2025-10-05 00:43:18,564 - root - INFO - lr: 4.9329e-05 gnorm: 1.24 [ 2:09:10<22:33:24] +[titan] 2025-10-05 00:43:29,498 - root - INFO - step: 3490 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3565 global_avg_mtp_loss: 2.6312 +[titan] 2025-10-05 00:43:29,498 - root - INFO - lr: 4.9327e-05 gnorm: 1.34 [ 2:09:21<22:33:11] +[titan] 2025-10-05 00:43:40,371 - root - INFO - step: 3495 loss: 2.8500 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5087 +[titan] 2025-10-05 00:43:40,371 - root - INFO - lr: 4.9325e-05 gnorm: 1.24 [ 2:09:31<22:32:57] +[titan] 2025-10-05 00:43:49,059 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:43:51,242 - root - INFO - step: 3500 loss: 2.9053 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5576 +[titan] 2025-10-05 00:43:51,242 - root - INFO - lr: 4.9323e-05 gnorm: 1.26 [ 2:09:42<22:32:43] +[titan] 2025-10-05 00:44:02,120 - root - INFO - step: 3505 loss: 2.9596 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:44:02,120 - root - INFO - lr: 4.9320e-05 gnorm: 1.27 [ 2:09:53<22:32:30] +[titan] 2025-10-05 00:44:13,041 - root - INFO - step: 3510 loss: 2.9620 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3557 global_avg_mtp_loss: 2.6063 +[titan] 2025-10-05 00:44:13,041 - root - INFO - lr: 4.9318e-05 gnorm: 1.36 [ 2:10:04<22:32:16] +[titan] 2025-10-05 00:44:23,983 - root - INFO - step: 3515 loss: 2.9163 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5681 +[titan] 2025-10-05 00:44:23,984 - root - INFO - lr: 4.9316e-05 gnorm: 1.35 [ 2:10:15<22:32:03] +[titan] 2025-10-05 00:44:34,890 - root - INFO - step: 3520 loss: 2.9840 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6176 +[titan] 2025-10-05 00:44:34,890 - root - INFO - lr: 4.9314e-05 gnorm: 1.30 [ 2:10:26<22:31:50] +[titan] 2025-10-05 00:44:45,807 - root - INFO - step: 3525 loss: 2.8766 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3420 global_avg_mtp_loss: 2.5345 +[titan] 2025-10-05 00:44:45,807 - root - INFO - lr: 4.9312e-05 gnorm: 1.33 [ 2:10:37<22:31:37] +[titan] 2025-10-05 00:44:56,695 - root - INFO - step: 3530 loss: 2.8643 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5214 +[titan] 2025-10-05 00:44:56,696 - root - INFO - lr: 4.9309e-05 gnorm: 1.31 [ 2:10:48<22:31:23] +[titan] 2025-10-05 00:45:07,556 - root - INFO - step: 3535 loss: 2.9317 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5804 +[titan] 2025-10-05 00:45:07,556 - root - INFO - lr: 4.9307e-05 gnorm: 1.24 [ 2:10:59<22:31:10] +[titan] 2025-10-05 00:45:18,462 - root - INFO - step: 3540 loss: 2.9149 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3487 global_avg_mtp_loss: 2.5661 +[titan] 2025-10-05 00:45:18,463 - root - INFO - lr: 4.9305e-05 gnorm: 1.24 [ 2:11:10<22:30:56] +[titan] 2025-10-05 00:45:29,403 - root - INFO - step: 3545 loss: 2.9166 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5665 +[titan] 2025-10-05 00:45:29,403 - root - INFO - lr: 4.9303e-05 gnorm: 1.31 [ 2:11:20<22:30:44] +[titan] 2025-10-05 00:45:38,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:45:40,270 - root - INFO - step: 3550 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5477 +[titan] 2025-10-05 00:45:40,270 - root - INFO - lr: 4.9301e-05 gnorm: 1.20 [ 2:11:31<22:30:30] +[titan] 2025-10-05 00:45:51,156 - root - INFO - step: 3555 loss: 2.8547 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3414 global_avg_mtp_loss: 2.5133 +[titan] 2025-10-05 00:45:51,156 - root - INFO - lr: 4.9298e-05 gnorm: 1.22 [ 2:11:42<22:30:16] +[titan] 2025-10-05 00:46:02,028 - root - INFO - step: 3560 loss: 2.9708 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3553 global_avg_mtp_loss: 2.6154 +[titan] 2025-10-05 00:46:02,029 - root - INFO - lr: 4.9296e-05 gnorm: 1.25 [ 2:11:53<22:30:03] +[titan] 2025-10-05 00:46:12,872 - root - INFO - step: 3565 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3463 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:46:12,873 - root - INFO - lr: 4.9294e-05 gnorm: 1.22 [ 2:12:04<22:29:49] +[titan] 2025-10-05 00:46:23,793 - root - INFO - step: 3570 loss: 2.9591 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3538 global_avg_mtp_loss: 2.6053 +[titan] 2025-10-05 00:46:23,794 - root - INFO - lr: 4.9292e-05 gnorm: 1.26 [ 2:12:15<22:29:36] +[titan] 2025-10-05 00:46:34,664 - root - INFO - step: 3575 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3519 global_avg_mtp_loss: 2.5886 +[titan] 2025-10-05 00:46:34,664 - root - INFO - lr: 4.9290e-05 gnorm: 1.30 [ 2:12:26<22:29:22] +[titan] 2025-10-05 00:46:45,547 - root - INFO - step: 3580 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3464 global_avg_mtp_loss: 2.5473 +[titan] 2025-10-05 00:46:45,548 - root - INFO - lr: 4.9287e-05 gnorm: 1.23 [ 2:12:37<22:29:09] +[titan] 2025-10-05 00:46:54,494 - root - INFO - Dumping profiler traces at step 3584 +[titan] 2025-10-05 00:46:54,532 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:46:56,755 - root - INFO - step: 3585 loss: 2.9232 memory: 118.84GiB(85.28%) tps: 29,239 tflops: 405.64 mfu: 41.02% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5729 +[titan] 2025-10-05 00:46:56,755 - root - INFO - lr: 4.9285e-05 gnorm: 1.28 [ 2:12:48<22:28:59] +[titan] 2025-10-05 00:47:07,619 - root - INFO - step: 3590 loss: 2.9273 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3501 global_avg_mtp_loss: 2.5772 +[titan] 2025-10-05 00:47:07,619 - root - INFO - lr: 4.9283e-05 gnorm: 1.25 [ 2:12:59<22:28:45] +[titan] 2025-10-05 00:47:18,508 - root - INFO - step: 3595 loss: 2.9212 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5720 +[titan] 2025-10-05 00:47:18,509 - root - INFO - lr: 4.9281e-05 gnorm: 1.34 [ 2:13:10<22:28:32] +[titan] 2025-10-05 00:47:27,262 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:47:29,446 - root - INFO - step: 3600 loss: 2.8603 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5188 +[titan] 2025-10-05 00:47:29,447 - root - INFO - lr: 4.9278e-05 gnorm: 1.22 [ 2:13:21<22:28:19] +[titan] 2025-10-05 00:47:40,308 - root - INFO - step: 3605 loss: 2.8618 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5190 +[titan] 2025-10-05 00:47:40,309 - root - INFO - lr: 4.9276e-05 gnorm: 1.19 [ 2:13:31<22:28:05] +[titan] 2025-10-05 00:47:51,174 - root - INFO - step: 3610 loss: 2.9114 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5639 +[titan] 2025-10-05 00:47:51,174 - root - INFO - lr: 4.9274e-05 gnorm: 1.25 [ 2:13:42<22:27:51] +[titan] 2025-10-05 00:48:02,028 - root - INFO - step: 3615 loss: 2.8693 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5265 +[titan] 2025-10-05 00:48:02,028 - root - INFO - lr: 4.9272e-05 gnorm: 1.25 [ 2:13:53<22:27:38] +[titan] 2025-10-05 00:48:12,956 - root - INFO - step: 3620 loss: 2.9829 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3582 global_avg_mtp_loss: 2.6247 +[titan] 2025-10-05 00:48:12,956 - root - INFO - lr: 4.9269e-05 gnorm: 1.25 [ 2:14:04<22:27:25] +[titan] 2025-10-05 00:48:23,914 - root - INFO - step: 3625 loss: 2.9614 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.6084 +[titan] 2025-10-05 00:48:23,914 - root - INFO - lr: 4.9267e-05 gnorm: 1.19 [ 2:14:15<22:27:12] +[titan] 2025-10-05 00:48:34,821 - root - INFO - step: 3630 loss: 2.9416 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3505 global_avg_mtp_loss: 2.5911 +[titan] 2025-10-05 00:48:34,821 - root - INFO - lr: 4.9265e-05 gnorm: 1.30 [ 2:14:26<22:26:59] +[titan] 2025-10-05 00:48:45,728 - root - INFO - step: 3635 loss: 2.8827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5377 +[titan] 2025-10-05 00:48:45,728 - root - INFO - lr: 4.9263e-05 gnorm: 1.22 [ 2:14:37<22:26:46] +[titan] 2025-10-05 00:48:56,629 - root - INFO - step: 3640 loss: 2.8474 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3379 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 00:48:56,629 - root - INFO - lr: 4.9260e-05 gnorm: 1.32 [ 2:14:48<22:26:33] +[titan] 2025-10-05 00:49:07,530 - root - INFO - step: 3645 loss: 2.9298 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5793 +[titan] 2025-10-05 00:49:07,530 - root - INFO - lr: 4.9258e-05 gnorm: 1.31 [ 2:14:59<22:26:19] +[titan] 2025-10-05 00:49:16,263 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:49:18,487 - root - INFO - step: 3650 loss: 3.0056 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6456 +[titan] 2025-10-05 00:49:18,488 - root - INFO - lr: 4.9256e-05 gnorm: 1.32 [ 2:15:10<22:26:07] +[titan] 2025-10-05 00:49:29,386 - root - INFO - step: 3655 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5347 +[titan] 2025-10-05 00:49:29,387 - root - INFO - lr: 4.9254e-05 gnorm: 1.23 [ 2:15:20<22:25:54] +[titan] 2025-10-05 00:49:40,255 - root - INFO - step: 3660 loss: 2.8748 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5314 +[titan] 2025-10-05 00:49:40,255 - root - INFO - lr: 4.9251e-05 gnorm: 1.26 [ 2:15:31<22:25:40] +[titan] 2025-10-05 00:49:51,122 - root - INFO - step: 3665 loss: 2.9419 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:49:51,122 - root - INFO - lr: 4.9249e-05 gnorm: 1.24 [ 2:15:42<22:25:27] +[titan] 2025-10-05 00:50:01,986 - root - INFO - step: 3670 loss: 2.8845 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5398 +[titan] 2025-10-05 00:50:01,986 - root - INFO - lr: 4.9247e-05 gnorm: 1.29 [ 2:15:53<22:25:13] +[titan] 2025-10-05 00:50:12,850 - root - INFO - step: 3675 loss: 2.8906 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5453 +[titan] 2025-10-05 00:50:12,851 - root - INFO - lr: 4.9244e-05 gnorm: 1.29 [ 2:16:04<22:24:59] +[titan] 2025-10-05 00:50:23,731 - root - INFO - step: 3680 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3632 global_avg_mtp_loss: 2.6609 +[titan] 2025-10-05 00:50:23,731 - root - INFO - lr: 4.9242e-05 gnorm: 1.28 [ 2:16:15<22:24:46] +[titan] 2025-10-05 00:50:34,722 - root - INFO - step: 3685 loss: 2.9110 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.3489 global_avg_mtp_loss: 2.5621 +[titan] 2025-10-05 00:50:34,723 - root - INFO - lr: 4.9240e-05 gnorm: 1.25 [ 2:16:26<22:24:34] +[titan] 2025-10-05 00:50:45,616 - root - INFO - step: 3690 loss: 2.8445 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5064 +[titan] 2025-10-05 00:50:45,616 - root - INFO - lr: 4.9238e-05 gnorm: 1.23 [ 2:16:37<22:24:21] +[titan] 2025-10-05 00:50:56,496 - root - INFO - step: 3695 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3361 global_avg_mtp_loss: 2.4870 +[titan] 2025-10-05 00:50:56,496 - root - INFO - lr: 4.9235e-05 gnorm: 1.19 [ 2:16:48<22:24:07] +[titan] 2025-10-05 00:51:05,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:51:07,381 - root - INFO - step: 3700 loss: 2.8874 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5427 +[titan] 2025-10-05 00:51:07,381 - root - INFO - lr: 4.9233e-05 gnorm: 1.31 [ 2:16:58<22:23:54] +[titan] 2025-10-05 00:51:18,258 - root - INFO - step: 3705 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3441 global_avg_mtp_loss: 2.5425 +[titan] 2025-10-05 00:51:18,258 - root - INFO - lr: 4.9231e-05 gnorm: 1.36 [ 2:17:09<22:23:41] +[titan] 2025-10-05 00:51:29,175 - root - INFO - step: 3710 loss: 2.9115 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3467 global_avg_mtp_loss: 2.5648 +[titan] 2025-10-05 00:51:29,175 - root - INFO - lr: 4.9228e-05 gnorm: 1.27 [ 2:17:20<22:23:28] +[titan] 2025-10-05 00:51:40,064 - root - INFO - step: 3715 loss: 2.9140 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5671 +[titan] 2025-10-05 00:51:40,065 - root - INFO - lr: 4.9226e-05 gnorm: 1.23 [ 2:17:31<22:23:14] +[titan] 2025-10-05 00:51:50,950 - root - INFO - step: 3720 loss: 2.8644 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.5220 +[titan] 2025-10-05 00:51:50,950 - root - INFO - lr: 4.9224e-05 gnorm: 1.28 [ 2:17:42<22:23:01] +[titan] 2025-10-05 00:52:01,826 - root - INFO - step: 3725 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5677 +[titan] 2025-10-05 00:52:01,826 - root - INFO - lr: 4.9221e-05 gnorm: 1.30 [ 2:17:53<22:22:48] +[titan] 2025-10-05 00:52:12,692 - root - INFO - step: 3730 loss: 2.8843 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5392 +[titan] 2025-10-05 00:52:12,692 - root - INFO - lr: 4.9219e-05 gnorm: 1.27 [ 2:18:04<22:22:34] +[titan] 2025-10-05 00:52:23,581 - root - INFO - step: 3735 loss: 2.8622 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3411 global_avg_mtp_loss: 2.5211 +[titan] 2025-10-05 00:52:23,581 - root - INFO - lr: 4.9217e-05 gnorm: 1.29 [ 2:18:15<22:22:21] +[titan] 2025-10-05 00:52:34,507 - root - INFO - step: 3740 loss: 2.8833 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5382 +[titan] 2025-10-05 00:52:34,507 - root - INFO - lr: 4.9214e-05 gnorm: 1.32 [ 2:18:26<22:22:08] +[titan] 2025-10-05 00:52:45,424 - root - INFO - step: 3745 loss: 2.8876 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5439 +[titan] 2025-10-05 00:52:45,424 - root - INFO - lr: 4.9212e-05 gnorm: 1.30 [ 2:18:36<22:21:55] +[titan] 2025-10-05 00:52:54,123 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:52:56,315 - root - INFO - step: 3750 loss: 2.9081 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3472 global_avg_mtp_loss: 2.5609 +[titan] 2025-10-05 00:52:56,315 - root - INFO - lr: 4.9210e-05 gnorm: 1.37 [ 2:18:47<22:21:42] +[titan] 2025-10-05 00:53:07,243 - root - INFO - step: 3755 loss: 2.8797 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 00:53:07,243 - root - INFO - lr: 4.9207e-05 gnorm: 1.27 [ 2:18:58<22:21:29] +[titan] 2025-10-05 00:53:18,154 - root - INFO - step: 3760 loss: 2.8545 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3406 global_avg_mtp_loss: 2.5139 +[titan] 2025-10-05 00:53:18,154 - root - INFO - lr: 4.9205e-05 gnorm: 1.27 [ 2:19:09<22:21:16] +[titan] 2025-10-05 00:53:29,071 - root - INFO - step: 3765 loss: 2.8350 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.4960 +[titan] 2025-10-05 00:53:29,071 - root - INFO - lr: 4.9203e-05 gnorm: 1.27 [ 2:19:20<22:21:04] +[titan] 2025-10-05 00:53:39,977 - root - INFO - step: 3770 loss: 2.8227 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3370 global_avg_mtp_loss: 2.4857 +[titan] 2025-10-05 00:53:39,978 - root - INFO - lr: 4.9200e-05 gnorm: 1.19 [ 2:19:31<22:20:51] +[titan] 2025-10-05 00:53:50,879 - root - INFO - step: 3775 loss: 2.8842 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3430 global_avg_mtp_loss: 2.5411 +[titan] 2025-10-05 00:53:50,879 - root - INFO - lr: 4.9198e-05 gnorm: 1.23 [ 2:19:42<22:20:38] +[titan] 2025-10-05 00:54:01,831 - root - INFO - step: 3780 loss: 2.9375 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5862 +[titan] 2025-10-05 00:54:01,831 - root - INFO - lr: 4.9196e-05 gnorm: 1.20 [ 2:19:53<22:20:25] +[titan] 2025-10-05 00:54:12,711 - root - INFO - step: 3785 loss: 2.8747 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5318 +[titan] 2025-10-05 00:54:12,711 - root - INFO - lr: 4.9193e-05 gnorm: 1.23 [ 2:20:04<22:20:12] +[titan] 2025-10-05 00:54:23,577 - root - INFO - step: 3790 loss: 2.8207 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4833 +[titan] 2025-10-05 00:54:23,577 - root - INFO - lr: 4.9191e-05 gnorm: 1.27 [ 2:20:15<22:19:58] +[titan] 2025-10-05 00:54:34,480 - root - INFO - step: 3795 loss: 2.9584 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3532 global_avg_mtp_loss: 2.6052 +[titan] 2025-10-05 00:54:34,480 - root - INFO - lr: 4.9188e-05 gnorm: 1.29 [ 2:20:26<22:19:45] +[titan] 2025-10-05 00:54:43,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:54:45,353 - root - INFO - step: 3800 loss: 2.9385 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3511 global_avg_mtp_loss: 2.5874 +[titan] 2025-10-05 00:54:45,353 - root - INFO - lr: 4.9186e-05 gnorm: 1.24 [ 2:20:36<22:19:32] +[titan] 2025-10-05 00:54:56,214 - root - INFO - step: 3805 loss: 2.8516 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3400 global_avg_mtp_loss: 2.5116 +[titan] 2025-10-05 00:54:56,214 - root - INFO - lr: 4.9184e-05 gnorm: 1.32 [ 2:20:47<22:19:19] +[titan] 2025-10-05 00:55:07,134 - root - INFO - step: 3810 loss: 2.8608 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5196 +[titan] 2025-10-05 00:55:07,134 - root - INFO - lr: 4.9181e-05 gnorm: 1.27 [ 2:20:58<22:19:06] +[titan] 2025-10-05 00:55:18,019 - root - INFO - step: 3815 loss: 2.9132 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3495 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 00:55:18,019 - root - INFO - lr: 4.9179e-05 gnorm: 1.33 [ 2:21:09<22:18:53] +[titan] 2025-10-05 00:55:28,882 - root - INFO - step: 3820 loss: 2.8903 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3438 global_avg_mtp_loss: 2.5465 +[titan] 2025-10-05 00:55:28,882 - root - INFO - lr: 4.9176e-05 gnorm: 1.28 [ 2:21:20<22:18:39] +[titan] 2025-10-05 00:55:39,765 - root - INFO - step: 3825 loss: 2.8538 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3396 global_avg_mtp_loss: 2.5142 +[titan] 2025-10-05 00:55:39,765 - root - INFO - lr: 4.9174e-05 gnorm: 1.35 [ 2:21:31<22:18:26] +[titan] 2025-10-05 00:55:50,656 - root - INFO - step: 3830 loss: 2.8951 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5503 +[titan] 2025-10-05 00:55:50,656 - root - INFO - lr: 4.9172e-05 gnorm: 1.29 [ 2:21:42<22:18:13] +[titan] 2025-10-05 00:56:01,544 - root - INFO - step: 3835 loss: 2.8701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 00:56:01,544 - root - INFO - lr: 4.9169e-05 gnorm: 1.28 [ 2:21:53<22:18:00] +[titan] 2025-10-05 00:56:12,424 - root - INFO - step: 3840 loss: 2.8980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3459 global_avg_mtp_loss: 2.5521 +[titan] 2025-10-05 00:56:12,424 - root - INFO - lr: 4.9167e-05 gnorm: 1.29 [ 2:22:03<22:17:47] +[titan] 2025-10-05 00:56:23,350 - root - INFO - step: 3845 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:56:23,350 - root - INFO - lr: 4.9164e-05 gnorm: 1.33 [ 2:22:14<22:17:34] +[titan] 2025-10-05 00:56:32,044 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:56:34,236 - root - INFO - step: 3850 loss: 2.8817 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5379 +[titan] 2025-10-05 00:56:34,237 - root - INFO - lr: 4.9162e-05 gnorm: 1.28 [ 2:22:25<22:17:21] +[titan] 2025-10-05 00:56:45,120 - root - INFO - step: 3855 loss: 2.8016 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 00:56:45,120 - root - INFO - lr: 4.9160e-05 gnorm: 1.32 [ 2:22:36<22:17:08] +[titan] 2025-10-05 00:56:56,000 - root - INFO - step: 3860 loss: 2.8851 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 00:56:56,000 - root - INFO - lr: 4.9157e-05 gnorm: 1.29 [ 2:22:47<22:16:55] +[titan] 2025-10-05 00:57:06,896 - root - INFO - step: 3865 loss: 2.8534 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3401 global_avg_mtp_loss: 2.5132 +[titan] 2025-10-05 00:57:06,896 - root - INFO - lr: 4.9155e-05 gnorm: 1.25 [ 2:22:58<22:16:42] +[titan] 2025-10-05 00:57:17,779 - root - INFO - step: 3870 loss: 2.9197 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5707 +[titan] 2025-10-05 00:57:17,779 - root - INFO - lr: 4.9152e-05 gnorm: 1.28 [ 2:23:09<22:16:29] +[titan] 2025-10-05 00:57:28,718 - root - INFO - step: 3875 loss: 2.9466 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.3534 global_avg_mtp_loss: 2.5932 +[titan] 2025-10-05 00:57:28,718 - root - INFO - lr: 4.9150e-05 gnorm: 1.21 [ 2:23:20<22:16:16] +[titan] 2025-10-05 00:57:39,599 - root - INFO - step: 3880 loss: 2.8840 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3444 global_avg_mtp_loss: 2.5396 +[titan] 2025-10-05 00:57:39,600 - root - INFO - lr: 4.9148e-05 gnorm: 1.28 [ 2:23:31<22:16:03] +[titan] 2025-10-05 00:57:50,474 - root - INFO - step: 3885 loss: 2.9370 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3488 global_avg_mtp_loss: 2.5882 +[titan] 2025-10-05 00:57:50,474 - root - INFO - lr: 4.9145e-05 gnorm: 1.25 [ 2:23:42<22:15:50] +[titan] 2025-10-05 00:58:01,351 - root - INFO - step: 3890 loss: 2.9350 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3493 global_avg_mtp_loss: 2.5857 +[titan] 2025-10-05 00:58:01,351 - root - INFO - lr: 4.9143e-05 gnorm: 1.31 [ 2:23:52<22:15:37] +[titan] 2025-10-05 00:58:12,271 - root - INFO - step: 3895 loss: 2.9044 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5592 +[titan] 2025-10-05 00:58:12,271 - root - INFO - lr: 4.9140e-05 gnorm: 1.26 [ 2:24:03<22:15:24] +[titan] 2025-10-05 00:58:20,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:58:23,152 - root - INFO - step: 3900 loss: 2.7993 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 00:58:23,152 - root - INFO - lr: 4.9138e-05 gnorm: 1.27 [ 2:24:14<22:15:11] +[titan] 2025-10-05 00:58:34,070 - root - INFO - step: 3905 loss: 2.9356 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:58:34,070 - root - INFO - lr: 4.9135e-05 gnorm: 1.23 [ 2:24:25<22:14:58] +[titan] 2025-10-05 00:58:44,959 - root - INFO - step: 3910 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3475 global_avg_mtp_loss: 2.5678 +[titan] 2025-10-05 00:58:44,959 - root - INFO - lr: 4.9133e-05 gnorm: 1.26 [ 2:24:36<22:14:45] +[titan] 2025-10-05 00:58:55,830 - root - INFO - step: 3915 loss: 2.8401 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5019 +[titan] 2025-10-05 00:58:55,830 - root - INFO - lr: 4.9130e-05 gnorm: 1.23 [ 2:24:47<22:14:32] +[titan] 2025-10-05 00:59:06,689 - root - INFO - step: 3920 loss: 2.9547 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3597 global_avg_mtp_loss: 2.5950 +[titan] 2025-10-05 00:59:06,690 - root - INFO - lr: 4.9128e-05 gnorm: 1.24 [ 2:24:58<22:14:19] +[titan] 2025-10-05 00:59:17,583 - root - INFO - step: 3925 loss: 2.9231 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3478 global_avg_mtp_loss: 2.5753 +[titan] 2025-10-05 00:59:17,584 - root - INFO - lr: 4.9125e-05 gnorm: 1.29 [ 2:25:09<22:14:06] +[titan] 2025-10-05 00:59:28,459 - root - INFO - step: 3930 loss: 2.8642 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5227 +[titan] 2025-10-05 00:59:28,459 - root - INFO - lr: 4.9123e-05 gnorm: 1.29 [ 2:25:19<22:13:53] +[titan] 2025-10-05 00:59:39,392 - root - INFO - step: 3935 loss: 2.8806 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 00:59:39,393 - root - INFO - lr: 4.9121e-05 gnorm: 1.31 [ 2:25:30<22:13:40] +[titan] 2025-10-05 00:59:50,302 - root - INFO - step: 3940 loss: 2.9187 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.3484 global_avg_mtp_loss: 2.5703 +[titan] 2025-10-05 00:59:50,302 - root - INFO - lr: 4.9118e-05 gnorm: 1.23 [ 2:25:41<22:13:27] +[titan] 2025-10-05 01:00:01,171 - root - INFO - step: 3945 loss: 2.8435 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:00:01,172 - root - INFO - lr: 4.9116e-05 gnorm: 1.25 [ 2:25:52<22:13:14] +[titan] 2025-10-05 01:00:09,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:00:12,051 - root - INFO - step: 3950 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5446 +[titan] 2025-10-05 01:00:12,051 - root - INFO - lr: 4.9113e-05 gnorm: 1.27 [ 2:26:03<22:13:01] +[titan] 2025-10-05 01:00:22,938 - root - INFO - step: 3955 loss: 2.8946 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5492 +[titan] 2025-10-05 01:00:22,938 - root - INFO - lr: 4.9111e-05 gnorm: 1.31 [ 2:26:14<22:12:48] +[titan] 2025-10-05 01:00:33,863 - root - INFO - step: 3960 loss: 2.9358 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5868 +[titan] 2025-10-05 01:00:33,863 - root - INFO - lr: 4.9108e-05 gnorm: 1.30 [ 2:26:25<22:12:35] +[titan] 2025-10-05 01:00:44,742 - root - INFO - step: 3965 loss: 2.8537 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3415 global_avg_mtp_loss: 2.5123 +[titan] 2025-10-05 01:00:44,743 - root - INFO - lr: 4.9106e-05 gnorm: 1.24 [ 2:26:36<22:12:22] +[titan] 2025-10-05 01:00:55,669 - root - INFO - step: 3970 loss: 2.8697 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5257 +[titan] 2025-10-05 01:00:55,670 - root - INFO - lr: 4.9103e-05 gnorm: 1.26 [ 2:26:47<22:12:10] +[titan] 2025-10-05 01:01:06,531 - root - INFO - step: 3975 loss: 2.8184 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4844 +[titan] 2025-10-05 01:01:06,531 - root - INFO - lr: 4.9101e-05 gnorm: 1.26 [ 2:26:58<22:11:57] +[titan] 2025-10-05 01:01:17,435 - root - INFO - step: 3980 loss: 2.8685 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5264 +[titan] 2025-10-05 01:01:17,435 - root - INFO - lr: 4.9098e-05 gnorm: 1.32 [ 2:27:08<22:11:44] +[titan] 2025-10-05 01:01:28,313 - root - INFO - step: 3985 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3312 global_avg_mtp_loss: 2.4498 +[titan] 2025-10-05 01:01:28,313 - root - INFO - lr: 4.9096e-05 gnorm: 1.26 [ 2:27:19<22:11:31] +[titan] 2025-10-05 01:01:39,229 - root - INFO - step: 3990 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.3337 global_avg_mtp_loss: 2.4610 +[titan] 2025-10-05 01:01:39,229 - root - INFO - lr: 4.9093e-05 gnorm: 1.30 [ 2:27:30<22:11:18] +[titan] 2025-10-05 01:01:50,092 - root - INFO - step: 3995 loss: 2.7943 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3322 global_avg_mtp_loss: 2.4621 +[titan] 2025-10-05 01:01:50,092 - root - INFO - lr: 4.9091e-05 gnorm: 1.21 [ 2:27:41<22:11:05] +[titan] 2025-10-05 01:01:58,771 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:02:00,958 - root - INFO - step: 4000 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5902 +[titan] 2025-10-05 01:02:00,958 - root - INFO - lr: 4.9088e-05 gnorm: 1.27 [ 2:27:52<22:10:52] +[titan] 2025-10-05 01:02:11,850 - root - INFO - step: 4005 loss: 2.8699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5271 +[titan] 2025-10-05 01:02:11,850 - root - INFO - lr: 4.9086e-05 gnorm: 1.29 [ 2:28:03<22:10:39] +[titan] 2025-10-05 01:02:22,761 - root - INFO - step: 4010 loss: 2.8862 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 01:02:22,761 - root - INFO - lr: 4.9083e-05 gnorm: 1.23 [ 2:28:14<22:10:26] +[titan] 2025-10-05 01:02:33,616 - root - INFO - step: 4015 loss: 2.8251 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.4858 +[titan] 2025-10-05 01:02:33,616 - root - INFO - lr: 4.9081e-05 gnorm: 1.23 [ 2:28:25<22:10:13] +[titan] 2025-10-05 01:02:44,524 - root - INFO - step: 4020 loss: 2.8756 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5328 +[titan] 2025-10-05 01:02:44,525 - root - INFO - lr: 4.9078e-05 gnorm: 1.23 [ 2:28:36<22:10:00] +[titan] 2025-10-05 01:02:55,396 - root - INFO - step: 4025 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3363 global_avg_mtp_loss: 2.4949 +[titan] 2025-10-05 01:02:55,396 - root - INFO - lr: 4.9076e-05 gnorm: 1.22 [ 2:28:46<22:09:47] +[titan] 2025-10-05 01:03:06,265 - root - INFO - step: 4030 loss: 2.8674 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5248 +[titan] 2025-10-05 01:03:06,265 - root - INFO - lr: 4.9073e-05 gnorm: 1.24 [ 2:28:57<22:09:34] +[titan] 2025-10-05 01:03:17,168 - root - INFO - step: 4035 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:03:17,168 - root - INFO - lr: 4.9070e-05 gnorm: 1.29 [ 2:29:08<22:09:22] +[titan] 2025-10-05 01:03:28,097 - root - INFO - step: 4040 loss: 2.8057 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3338 global_avg_mtp_loss: 2.4719 +[titan] 2025-10-05 01:03:28,098 - root - INFO - lr: 4.9068e-05 gnorm: 1.23 [ 2:29:19<22:09:09] +[titan] 2025-10-05 01:03:39,019 - root - INFO - step: 4045 loss: 2.8686 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5273 +[titan] 2025-10-05 01:03:39,019 - root - INFO - lr: 4.9065e-05 gnorm: 1.33 [ 2:29:30<22:08:56] +[titan] 2025-10-05 01:03:47,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:03:49,927 - root - INFO - step: 4050 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5088 +[titan] 2025-10-05 01:03:49,927 - root - INFO - lr: 4.9063e-05 gnorm: 1.25 [ 2:29:41<22:08:44] +[titan] 2025-10-05 01:04:00,828 - root - INFO - step: 4055 loss: 2.8040 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4693 +[titan] 2025-10-05 01:04:00,828 - root - INFO - lr: 4.9060e-05 gnorm: 1.23 [ 2:29:52<22:08:31] +[titan] 2025-10-05 01:04:11,717 - root - INFO - step: 4060 loss: 2.8008 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4673 +[titan] 2025-10-05 01:04:11,717 - root - INFO - lr: 4.9058e-05 gnorm: 1.27 [ 2:30:03<22:08:18] +[titan] 2025-10-05 01:04:22,649 - root - INFO - step: 4065 loss: 2.8860 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5433 +[titan] 2025-10-05 01:04:22,649 - root - INFO - lr: 4.9055e-05 gnorm: 1.27 [ 2:30:14<22:08:06] +[titan] 2025-10-05 01:04:33,534 - root - INFO - step: 4070 loss: 2.8482 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.5092 +[titan] 2025-10-05 01:04:33,534 - root - INFO - lr: 4.9053e-05 gnorm: 1.28 [ 2:30:25<22:07:53] +[titan] 2025-10-05 01:04:44,493 - root - INFO - step: 4075 loss: 2.7243 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.3989 +[titan] 2025-10-05 01:04:44,493 - root - INFO - lr: 4.9050e-05 gnorm: 1.28 [ 2:30:36<22:07:41] +[titan] 2025-10-05 01:04:55,369 - root - INFO - step: 4080 loss: 2.9124 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5655 +[titan] 2025-10-05 01:04:55,370 - root - INFO - lr: 4.9047e-05 gnorm: 1.24 [ 2:30:46<22:07:28] +[titan] 2025-10-05 01:05:06,228 - root - INFO - step: 4085 loss: 2.8731 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 01:05:06,228 - root - INFO - lr: 4.9045e-05 gnorm: 1.27 [ 2:30:57<22:07:15] +[titan] 2025-10-05 01:05:17,102 - root - INFO - step: 4090 loss: 2.7997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4672 +[titan] 2025-10-05 01:05:17,102 - root - INFO - lr: 4.9042e-05 gnorm: 1.28 [ 2:31:08<22:07:02] +[titan] 2025-10-05 01:05:28,059 - root - INFO - step: 4095 loss: 2.9035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5587 +[titan] 2025-10-05 01:05:28,060 - root - INFO - lr: 4.9040e-05 gnorm: 1.23 [ 2:31:19<22:06:49] +[titan] 2025-10-05 01:05:30,407 - root - INFO - Dumping profiler traces at step 4096 +[titan] 2025-10-05 01:05:30,444 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:05:37,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:05:39,213 - root - INFO - step: 4100 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 29,380 tflops: 407.60 mfu: 41.21% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4763 +[titan] 2025-10-05 01:05:39,213 - root - INFO - lr: 4.9037e-05 gnorm: 1.29 [ 2:31:30<22:06:39] +[titan] 2025-10-05 01:05:50,104 - root - INFO - step: 4105 loss: 2.8434 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5046 +[titan] 2025-10-05 01:05:50,104 - root - INFO - lr: 4.9035e-05 gnorm: 1.25 [ 2:31:41<22:06:26] +[titan] 2025-10-05 01:06:00,954 - root - INFO - step: 4110 loss: 2.8513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3410 global_avg_mtp_loss: 2.5103 +[titan] 2025-10-05 01:06:00,954 - root - INFO - lr: 4.9032e-05 gnorm: 1.30 [ 2:31:52<22:06:13] +[titan] 2025-10-05 01:06:11,792 - root - INFO - step: 4115 loss: 2.8687 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 01:06:11,793 - root - INFO - lr: 4.9029e-05 gnorm: 1.28 [ 2:32:03<22:06:00] +[titan] 2025-10-05 01:06:22,672 - root - INFO - step: 4120 loss: 2.7381 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3273 global_avg_mtp_loss: 2.4108 +[titan] 2025-10-05 01:06:22,673 - root - INFO - lr: 4.9027e-05 gnorm: 1.20 [ 2:32:14<22:05:47] +[titan] 2025-10-05 01:06:33,541 - root - INFO - step: 4125 loss: 2.8811 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 01:06:33,541 - root - INFO - lr: 4.9024e-05 gnorm: 1.27 [ 2:32:25<22:05:34] +[titan] 2025-10-05 01:06:44,458 - root - INFO - step: 4130 loss: 2.7955 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3316 global_avg_mtp_loss: 2.4639 +[titan] 2025-10-05 01:06:44,459 - root - INFO - lr: 4.9022e-05 gnorm: 1.22 [ 2:32:35<22:05:21] +[titan] 2025-10-05 01:06:55,338 - root - INFO - step: 4135 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3357 global_avg_mtp_loss: 2.4937 +[titan] 2025-10-05 01:06:55,338 - root - INFO - lr: 4.9019e-05 gnorm: 1.26 [ 2:32:46<22:05:08] +[titan] 2025-10-05 01:07:06,209 - root - INFO - step: 4140 loss: 2.8211 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3351 global_avg_mtp_loss: 2.4860 +[titan] 2025-10-05 01:07:06,209 - root - INFO - lr: 4.9016e-05 gnorm: 1.23 [ 2:32:57<22:04:55] +[titan] 2025-10-05 01:07:17,116 - root - INFO - step: 4145 loss: 2.7757 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4442 +[titan] 2025-10-05 01:07:17,116 - root - INFO - lr: 4.9014e-05 gnorm: 1.33 [ 2:33:08<22:04:43] +[titan] 2025-10-05 01:07:25,818 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:07:28,011 - root - INFO - step: 4150 loss: 2.8404 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.5032 +[titan] 2025-10-05 01:07:28,012 - root - INFO - lr: 4.9011e-05 gnorm: 1.29 [ 2:33:19<22:04:30] +[titan] 2025-10-05 01:07:38,919 - root - INFO - step: 4155 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5380 +[titan] 2025-10-05 01:07:38,919 - root - INFO - lr: 4.9009e-05 gnorm: 1.22 [ 2:33:30<22:04:18] +[titan] 2025-10-05 01:07:49,794 - root - INFO - step: 4160 loss: 2.8305 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3386 global_avg_mtp_loss: 2.4919 +[titan] 2025-10-05 01:07:49,794 - root - INFO - lr: 4.9006e-05 gnorm: 1.23 [ 2:33:41<22:04:05] +[titan] 2025-10-05 01:08:00,715 - root - INFO - step: 4165 loss: 2.7568 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4286 +[titan] 2025-10-05 01:08:00,715 - root - INFO - lr: 4.9003e-05 gnorm: 1.22 [ 2:33:52<22:03:52] +[titan] 2025-10-05 01:08:11,575 - root - INFO - step: 4170 loss: 2.8449 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.3395 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:08:11,575 - root - INFO - lr: 4.9001e-05 gnorm: 1.22 [ 2:34:03<22:03:39] +[titan] 2025-10-05 01:08:22,448 - root - INFO - step: 4175 loss: 2.8005 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3330 global_avg_mtp_loss: 2.4675 +[titan] 2025-10-05 01:08:22,448 - root - INFO - lr: 4.8998e-05 gnorm: 1.22 [ 2:34:13<22:03:26] +[titan] 2025-10-05 01:08:33,314 - root - INFO - step: 4180 loss: 2.7794 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4468 +[titan] 2025-10-05 01:08:33,314 - root - INFO - lr: 4.8995e-05 gnorm: 1.18 [ 2:34:24<22:03:13] +[titan] 2025-10-05 01:08:44,215 - root - INFO - step: 4185 loss: 2.8110 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3342 global_avg_mtp_loss: 2.4768 +[titan] 2025-10-05 01:08:44,215 - root - INFO - lr: 4.8993e-05 gnorm: 1.25 [ 2:34:35<22:03:01] +[titan] 2025-10-05 01:08:55,079 - root - INFO - step: 4190 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4689 +[titan] 2025-10-05 01:08:55,079 - root - INFO - lr: 4.8990e-05 gnorm: 1.20 [ 2:34:46<22:02:48] +[titan] 2025-10-05 01:09:05,968 - root - INFO - step: 4195 loss: 2.7893 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3318 global_avg_mtp_loss: 2.4575 +[titan] 2025-10-05 01:09:05,968 - root - INFO - lr: 4.8987e-05 gnorm: 1.27 [ 2:34:57<22:02:35] +[titan] 2025-10-05 01:09:14,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:09:16,867 - root - INFO - step: 4200 loss: 2.8001 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 01:09:16,867 - root - INFO - lr: 4.8985e-05 gnorm: 1.37 [ 2:35:08<22:02:22] +[titan] 2025-10-05 01:09:27,758 - root - INFO - step: 4205 loss: 2.8414 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5036 +[titan] 2025-10-05 01:09:27,758 - root - INFO - lr: 4.8982e-05 gnorm: 1.27 [ 2:35:19<22:02:10] +[titan] 2025-10-05 01:09:38,614 - root - INFO - step: 4210 loss: 2.8082 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4739 +[titan] 2025-10-05 01:09:38,614 - root - INFO - lr: 4.8980e-05 gnorm: 1.21 [ 2:35:30<22:01:57] +[titan] 2025-10-05 01:09:49,535 - root - INFO - step: 4215 loss: 2.8257 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4886 +[titan] 2025-10-05 01:09:49,535 - root - INFO - lr: 4.8977e-05 gnorm: 1.25 [ 2:35:41<22:01:44] +[titan] 2025-10-05 01:10:00,451 - root - INFO - step: 4220 loss: 2.8238 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3391 global_avg_mtp_loss: 2.4847 +[titan] 2025-10-05 01:10:00,451 - root - INFO - lr: 4.8974e-05 gnorm: 1.27 [ 2:35:51<22:01:32] +[titan] 2025-10-05 01:10:11,409 - root - INFO - step: 4225 loss: 2.7720 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4420 +[titan] 2025-10-05 01:10:11,409 - root - INFO - lr: 4.8972e-05 gnorm: 1.25 [ 2:36:02<22:01:20] +[titan] 2025-10-05 01:10:22,330 - root - INFO - step: 4230 loss: 2.8335 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3375 global_avg_mtp_loss: 2.4961 +[titan] 2025-10-05 01:10:22,330 - root - INFO - lr: 4.8969e-05 gnorm: 1.22 [ 2:36:13<22:01:07] +[titan] 2025-10-05 01:10:33,205 - root - INFO - step: 4235 loss: 2.9402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5879 +[titan] 2025-10-05 01:10:33,205 - root - INFO - lr: 4.8966e-05 gnorm: 1.26 [ 2:36:24<22:00:54] +[titan] 2025-10-05 01:10:44,111 - root - INFO - step: 4240 loss: 2.8115 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4770 +[titan] 2025-10-05 01:10:44,111 - root - INFO - lr: 4.8964e-05 gnorm: 1.23 [ 2:36:35<22:00:42] +[titan] 2025-10-05 01:10:54,992 - root - INFO - step: 4245 loss: 2.7621 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4328 +[titan] 2025-10-05 01:10:54,992 - root - INFO - lr: 4.8961e-05 gnorm: 1.25 [ 2:36:46<22:00:29] +[titan] 2025-10-05 01:11:03,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:11:05,860 - root - INFO - step: 4250 loss: 2.7919 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:11:05,860 - root - INFO - lr: 4.8958e-05 gnorm: 1.34 [ 2:36:57<22:00:16] +[titan] 2025-10-05 01:11:16,750 - root - INFO - step: 4255 loss: 2.8769 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 01:11:16,750 - root - INFO - lr: 4.8955e-05 gnorm: 1.23 [ 2:37:08<22:00:04] +[titan] 2025-10-05 01:11:27,682 - root - INFO - step: 4260 loss: 2.8447 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5059 +[titan] 2025-10-05 01:11:27,682 - root - INFO - lr: 4.8953e-05 gnorm: 1.29 [ 2:37:19<21:59:51] +[titan] 2025-10-05 01:11:38,566 - root - INFO - step: 4265 loss: 2.8553 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3402 global_avg_mtp_loss: 2.5151 +[titan] 2025-10-05 01:11:38,566 - root - INFO - lr: 4.8950e-05 gnorm: 1.28 [ 2:37:30<21:59:38] +[titan] 2025-10-05 01:11:49,489 - root - INFO - step: 4270 loss: 2.8265 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:11:49,489 - root - INFO - lr: 4.8947e-05 gnorm: 1.23 [ 2:37:40<21:59:26] +[titan] 2025-10-05 01:12:00,379 - root - INFO - step: 4275 loss: 2.7626 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3291 global_avg_mtp_loss: 2.4335 +[titan] 2025-10-05 01:12:00,379 - root - INFO - lr: 4.8945e-05 gnorm: 1.23 [ 2:37:51<21:59:13] +[titan] 2025-10-05 01:12:11,266 - root - INFO - step: 4280 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4955 +[titan] 2025-10-05 01:12:11,266 - root - INFO - lr: 4.8942e-05 gnorm: 1.25 [ 2:38:02<21:59:01] +[titan] 2025-10-05 01:12:22,135 - root - INFO - step: 4285 loss: 2.8353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3380 global_avg_mtp_loss: 2.4973 +[titan] 2025-10-05 01:12:22,135 - root - INFO - lr: 4.8939e-05 gnorm: 1.27 [ 2:38:13<21:58:48] +[titan] 2025-10-05 01:12:33,063 - root - INFO - step: 4290 loss: 2.7796 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4469 +[titan] 2025-10-05 01:12:33,063 - root - INFO - lr: 4.8937e-05 gnorm: 1.31 [ 2:38:24<21:58:36] +[titan] 2025-10-05 01:12:43,959 - root - INFO - step: 4295 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4492 +[titan] 2025-10-05 01:12:43,959 - root - INFO - lr: 4.8934e-05 gnorm: 1.37 [ 2:38:35<21:58:23] +[titan] 2025-10-05 01:12:52,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:12:54,832 - root - INFO - step: 4300 loss: 2.9113 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 01:12:54,832 - root - INFO - lr: 4.8931e-05 gnorm: 1.32 [ 2:38:46<21:58:10] +[titan] 2025-10-05 01:13:05,696 - root - INFO - step: 4305 loss: 2.8427 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:13:05,696 - root - INFO - lr: 4.8928e-05 gnorm: 1.29 [ 2:38:57<21:57:57] +[titan] 2025-10-05 01:13:16,559 - root - INFO - step: 4310 loss: 2.8552 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5127 +[titan] 2025-10-05 01:13:16,559 - root - INFO - lr: 4.8926e-05 gnorm: 1.25 [ 2:39:08<21:57:45] +[titan] 2025-10-05 01:13:27,434 - root - INFO - step: 4315 loss: 2.7587 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:13:27,434 - root - INFO - lr: 4.8923e-05 gnorm: 1.28 [ 2:39:18<21:57:32] +[titan] 2025-10-05 01:13:38,295 - root - INFO - step: 4320 loss: 2.8361 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3373 global_avg_mtp_loss: 2.4988 +[titan] 2025-10-05 01:13:38,295 - root - INFO - lr: 4.8920e-05 gnorm: 1.33 [ 2:39:29<21:57:19] +[titan] 2025-10-05 01:13:49,212 - root - INFO - step: 4325 loss: 2.8809 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5381 +[titan] 2025-10-05 01:13:49,212 - root - INFO - lr: 4.8918e-05 gnorm: 1.32 [ 2:39:40<21:57:07] +[titan] 2025-10-05 01:14:00,073 - root - INFO - step: 4330 loss: 2.8655 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5232 +[titan] 2025-10-05 01:14:00,073 - root - INFO - lr: 4.8915e-05 gnorm: 1.25 [ 2:39:51<21:56:54] +[titan] 2025-10-05 01:14:10,949 - root - INFO - step: 4335 loss: 2.8077 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4742 +[titan] 2025-10-05 01:14:10,949 - root - INFO - lr: 4.8912e-05 gnorm: 1.25 [ 2:40:02<21:56:41] +[titan] 2025-10-05 01:14:21,868 - root - INFO - step: 4340 loss: 2.8223 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3346 global_avg_mtp_loss: 2.4877 +[titan] 2025-10-05 01:14:21,868 - root - INFO - lr: 4.8909e-05 gnorm: 1.21 [ 2:40:13<21:56:29] +[titan] 2025-10-05 01:14:32,754 - root - INFO - step: 4345 loss: 2.8555 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3431 global_avg_mtp_loss: 2.5124 +[titan] 2025-10-05 01:14:32,754 - root - INFO - lr: 4.8907e-05 gnorm: 1.26 [ 2:40:24<21:56:16] +[titan] 2025-10-05 01:14:41,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:14:43,631 - root - INFO - step: 4350 loss: 2.7309 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 01:14:43,631 - root - INFO - lr: 4.8904e-05 gnorm: 1.21 [ 2:40:35<21:56:03] +[titan] 2025-10-05 01:14:54,554 - root - INFO - step: 4355 loss: 2.7817 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:14:54,555 - root - INFO - lr: 4.8901e-05 gnorm: 1.31 [ 2:40:46<21:55:51] +[titan] 2025-10-05 01:15:05,471 - root - INFO - step: 4360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:15:05,471 - root - INFO - lr: 4.8898e-05 gnorm: 1.18 [ 2:40:56<21:55:39] +[titan] 2025-10-05 01:15:16,353 - root - INFO - step: 4365 loss: 2.7543 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3265 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:15:16,353 - root - INFO - lr: 4.8896e-05 gnorm: 1.34 [ 2:41:07<21:55:26] +[titan] 2025-10-05 01:15:27,221 - root - INFO - step: 4370 loss: 2.8151 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3349 global_avg_mtp_loss: 2.4802 +[titan] 2025-10-05 01:15:27,222 - root - INFO - lr: 4.8893e-05 gnorm: 1.33 [ 2:41:18<21:55:13] +[titan] 2025-10-05 01:15:38,092 - root - INFO - step: 4375 loss: 2.8402 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:15:38,092 - root - INFO - lr: 4.8890e-05 gnorm: 1.24 [ 2:41:29<21:55:00] +[titan] 2025-10-05 01:15:48,973 - root - INFO - step: 4380 loss: 2.7636 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4347 +[titan] 2025-10-05 01:15:48,973 - root - INFO - lr: 4.8887e-05 gnorm: 1.28 [ 2:41:40<21:54:48] +[titan] 2025-10-05 01:15:59,862 - root - INFO - step: 4385 loss: 2.7822 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4512 +[titan] 2025-10-05 01:15:59,863 - root - INFO - lr: 4.8884e-05 gnorm: 1.22 [ 2:41:51<21:54:35] +[titan] 2025-10-05 01:16:10,768 - root - INFO - step: 4390 loss: 2.8774 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5334 +[titan] 2025-10-05 01:16:10,768 - root - INFO - lr: 4.8882e-05 gnorm: 1.31 [ 2:42:02<21:54:23] +[titan] 2025-10-05 01:16:21,633 - root - INFO - step: 4395 loss: 2.7736 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4446 +[titan] 2025-10-05 01:16:21,633 - root - INFO - lr: 4.8879e-05 gnorm: 1.27 [ 2:42:13<21:54:10] +[titan] 2025-10-05 01:16:30,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:16:32,515 - root - INFO - step: 4400 loss: 2.8412 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5024 +[titan] 2025-10-05 01:16:32,515 - root - INFO - lr: 4.8876e-05 gnorm: 1.24 [ 2:42:24<21:53:57] +[titan] 2025-10-05 01:16:43,378 - root - INFO - step: 4405 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4765 +[titan] 2025-10-05 01:16:43,378 - root - INFO - lr: 4.8873e-05 gnorm: 1.25 [ 2:42:34<21:53:45] +[titan] 2025-10-05 01:16:54,311 - root - INFO - step: 4410 loss: 2.7984 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3341 global_avg_mtp_loss: 2.4642 +[titan] 2025-10-05 01:16:54,312 - root - INFO - lr: 4.8871e-05 gnorm: 1.22 [ 2:42:45<21:53:32] +[titan] 2025-10-05 01:17:05,164 - root - INFO - step: 4415 loss: 2.7761 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3306 global_avg_mtp_loss: 2.4455 +[titan] 2025-10-05 01:17:05,164 - root - INFO - lr: 4.8868e-05 gnorm: 1.24 [ 2:42:56<21:53:20] +[titan] 2025-10-05 01:17:16,059 - root - INFO - step: 4420 loss: 2.8777 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5351 +[titan] 2025-10-05 01:17:16,059 - root - INFO - lr: 4.8865e-05 gnorm: 1.27 [ 2:43:07<21:53:07] +[titan] 2025-10-05 01:17:26,943 - root - INFO - step: 4425 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4104 +[titan] 2025-10-05 01:17:26,943 - root - INFO - lr: 4.8862e-05 gnorm: 1.25 [ 2:43:18<21:52:54] +[titan] 2025-10-05 01:17:37,810 - root - INFO - step: 4430 loss: 2.8315 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:17:37,810 - root - INFO - lr: 4.8859e-05 gnorm: 1.24 [ 2:43:29<21:52:42] +[titan] 2025-10-05 01:17:48,674 - root - INFO - step: 4435 loss: 2.7874 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4564 +[titan] 2025-10-05 01:17:48,674 - root - INFO - lr: 4.8857e-05 gnorm: 1.29 [ 2:43:40<21:52:29] +[titan] 2025-10-05 01:17:59,549 - root - INFO - step: 4440 loss: 2.7652 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4365 +[titan] 2025-10-05 01:17:59,549 - root - INFO - lr: 4.8854e-05 gnorm: 1.25 [ 2:43:51<21:52:16] +[titan] 2025-10-05 01:18:10,464 - root - INFO - step: 4445 loss: 2.7634 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4344 +[titan] 2025-10-05 01:18:10,464 - root - INFO - lr: 4.8851e-05 gnorm: 1.21 [ 2:44:01<21:52:04] +[titan] 2025-10-05 01:18:19,181 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:18:21,402 - root - INFO - step: 4450 loss: 2.8198 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.3358 global_avg_mtp_loss: 2.4839 +[titan] 2025-10-05 01:18:21,402 - root - INFO - lr: 4.8848e-05 gnorm: 1.25 [ 2:44:12<21:51:52] +[titan] 2025-10-05 01:18:32,290 - root - INFO - step: 4455 loss: 2.8002 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4666 +[titan] 2025-10-05 01:18:32,290 - root - INFO - lr: 4.8845e-05 gnorm: 1.21 [ 2:44:23<21:51:39] +[titan] 2025-10-05 01:18:43,182 - root - INFO - step: 4460 loss: 2.7924 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:18:43,183 - root - INFO - lr: 4.8842e-05 gnorm: 1.17 [ 2:44:34<21:51:27] +[titan] 2025-10-05 01:18:54,107 - root - INFO - step: 4465 loss: 2.8210 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3364 global_avg_mtp_loss: 2.4846 +[titan] 2025-10-05 01:18:54,107 - root - INFO - lr: 4.8840e-05 gnorm: 1.23 [ 2:44:45<21:51:15] +[titan] 2025-10-05 01:19:04,974 - root - INFO - step: 4470 loss: 2.7913 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4596 +[titan] 2025-10-05 01:19:04,974 - root - INFO - lr: 4.8837e-05 gnorm: 1.21 [ 2:44:56<21:51:02] +[titan] 2025-10-05 01:19:15,845 - root - INFO - step: 4475 loss: 2.8258 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3360 global_avg_mtp_loss: 2.4898 +[titan] 2025-10-05 01:19:15,846 - root - INFO - lr: 4.8834e-05 gnorm: 1.28 [ 2:45:07<21:50:49] +[titan] 2025-10-05 01:19:26,715 - root - INFO - step: 4480 loss: 2.7821 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:19:26,715 - root - INFO - lr: 4.8831e-05 gnorm: 1.29 [ 2:45:18<21:50:37] +[titan] 2025-10-05 01:19:37,611 - root - INFO - step: 4485 loss: 2.8154 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4798 +[titan] 2025-10-05 01:19:37,611 - root - INFO - lr: 4.8828e-05 gnorm: 1.28 [ 2:45:29<21:50:24] +[titan] 2025-10-05 01:19:48,473 - root - INFO - step: 4490 loss: 2.7910 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3319 global_avg_mtp_loss: 2.4591 +[titan] 2025-10-05 01:19:48,474 - root - INFO - lr: 4.8825e-05 gnorm: 1.39 [ 2:45:39<21:50:12] +[titan] 2025-10-05 01:19:59,362 - root - INFO - step: 4495 loss: 2.7586 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4304 +[titan] 2025-10-05 01:19:59,363 - root - INFO - lr: 4.8823e-05 gnorm: 1.26 [ 2:45:50<21:49:59] +[titan] 2025-10-05 01:20:08,035 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:20:10,224 - root - INFO - step: 4500 loss: 2.8484 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.5091 +[titan] 2025-10-05 01:20:10,224 - root - INFO - lr: 4.8820e-05 gnorm: 1.25 [ 2:46:01<21:49:46] +[titan] 2025-10-05 01:20:21,077 - root - INFO - step: 4505 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3816 +[titan] 2025-10-05 01:20:21,077 - root - INFO - lr: 4.8817e-05 gnorm: 1.25 [ 2:46:12<21:49:34] +[titan] 2025-10-05 01:20:31,932 - root - INFO - step: 4510 loss: 2.8270 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3376 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:20:31,932 - root - INFO - lr: 4.8814e-05 gnorm: 1.26 [ 2:46:23<21:49:21] +[titan] 2025-10-05 01:20:42,845 - root - INFO - step: 4515 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4207 +[titan] 2025-10-05 01:20:42,845 - root - INFO - lr: 4.8811e-05 gnorm: 1.20 [ 2:46:34<21:49:08] +[titan] 2025-10-05 01:20:53,800 - root - INFO - step: 4520 loss: 2.8244 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4888 +[titan] 2025-10-05 01:20:53,800 - root - INFO - lr: 4.8808e-05 gnorm: 1.37 [ 2:46:45<21:48:57] +[titan] 2025-10-05 01:21:04,708 - root - INFO - step: 4525 loss: 2.7186 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3964 +[titan] 2025-10-05 01:21:04,708 - root - INFO - lr: 4.8805e-05 gnorm: 1.27 [ 2:46:56<21:48:44] +[titan] 2025-10-05 01:21:15,602 - root - INFO - step: 4530 loss: 2.7206 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3236 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:21:15,602 - root - INFO - lr: 4.8803e-05 gnorm: 1.23 [ 2:47:07<21:48:32] +[titan] 2025-10-05 01:21:26,498 - root - INFO - step: 4535 loss: 2.7518 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3249 global_avg_mtp_loss: 2.4269 +[titan] 2025-10-05 01:21:26,498 - root - INFO - lr: 4.8800e-05 gnorm: 1.35 [ 2:47:17<21:48:19] +[titan] 2025-10-05 01:21:37,376 - root - INFO - step: 4540 loss: 2.7814 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3301 global_avg_mtp_loss: 2.4513 +[titan] 2025-10-05 01:21:37,376 - root - INFO - lr: 4.8797e-05 gnorm: 1.23 [ 2:47:28<21:48:07] +[titan] 2025-10-05 01:21:48,331 - root - INFO - step: 4545 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4487 +[titan] 2025-10-05 01:21:48,331 - root - INFO - lr: 4.8794e-05 gnorm: 1.25 [ 2:47:39<21:47:55] +[titan] 2025-10-05 01:21:57,058 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:21:59,248 - root - INFO - step: 4550 loss: 2.8483 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 01:21:59,249 - root - INFO - lr: 4.8791e-05 gnorm: 1.26 [ 2:47:50<21:47:43] +[titan] 2025-10-05 01:22:10,102 - root - INFO - step: 4555 loss: 2.7389 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.4138 +[titan] 2025-10-05 01:22:10,103 - root - INFO - lr: 4.8788e-05 gnorm: 1.20 [ 2:48:01<21:47:30] +[titan] 2025-10-05 01:22:20,974 - root - INFO - step: 4560 loss: 2.7847 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:22:20,974 - root - INFO - lr: 4.8785e-05 gnorm: 1.21 [ 2:48:12<21:47:17] +[titan] 2025-10-05 01:22:31,853 - root - INFO - step: 4565 loss: 2.7537 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:22:31,854 - root - INFO - lr: 4.8782e-05 gnorm: 1.27 [ 2:48:23<21:47:05] +[titan] 2025-10-05 01:22:42,729 - root - INFO - step: 4570 loss: 2.6580 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 01:22:42,729 - root - INFO - lr: 4.8779e-05 gnorm: 1.26 [ 2:48:34<21:46:52] +[titan] 2025-10-05 01:22:53,792 - root - INFO - step: 4575 loss: 2.8422 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.3385 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:22:53,792 - root - INFO - lr: 4.8777e-05 gnorm: 1.26 [ 2:48:45<21:46:41] +[titan] 2025-10-05 01:23:04,721 - root - INFO - step: 4580 loss: 2.6906 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3732 +[titan] 2025-10-05 01:23:04,721 - root - INFO - lr: 4.8774e-05 gnorm: 1.18 [ 2:48:56<21:46:29] +[titan] 2025-10-05 01:23:15,616 - root - INFO - step: 4585 loss: 2.7509 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:23:15,616 - root - INFO - lr: 4.8771e-05 gnorm: 1.23 [ 2:49:07<21:46:17] +[titan] 2025-10-05 01:23:26,529 - root - INFO - step: 4590 loss: 2.7868 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4570 +[titan] 2025-10-05 01:23:26,530 - root - INFO - lr: 4.8768e-05 gnorm: 1.24 [ 2:49:18<21:46:04] +[titan] 2025-10-05 01:23:37,394 - root - INFO - step: 4595 loss: 2.7525 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3274 global_avg_mtp_loss: 2.4251 +[titan] 2025-10-05 01:23:37,394 - root - INFO - lr: 4.8765e-05 gnorm: 1.22 [ 2:49:28<21:45:52] +[titan] 2025-10-05 01:23:46,091 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:23:48,293 - root - INFO - step: 4600 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4209 +[titan] 2025-10-05 01:23:48,294 - root - INFO - lr: 4.8762e-05 gnorm: 1.18 [ 2:49:39<21:45:39] +[titan] 2025-10-05 01:23:59,314 - root - INFO - step: 4605 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 29,734 tflops: 412.51 mfu: 41.71% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.5076 +[titan] 2025-10-05 01:23:59,314 - root - INFO - lr: 4.8759e-05 gnorm: 1.23 [ 2:49:50<21:45:28] +[titan] 2025-10-05 01:24:06,006 - root - INFO - Dumping profiler traces at step 4608 +[titan] 2025-10-05 01:24:06,042 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:24:10,470 - root - INFO - step: 4610 loss: 2.7849 memory: 118.84GiB(85.28%) tps: 29,373 tflops: 407.50 mfu: 41.20% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4534 +[titan] 2025-10-05 01:24:10,471 - root - INFO - lr: 4.8756e-05 gnorm: 1.28 [ 2:50:01<21:45:18] +[titan] 2025-10-05 01:24:21,351 - root - INFO - step: 4615 loss: 2.7549 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4283 +[titan] 2025-10-05 01:24:21,351 - root - INFO - lr: 4.8753e-05 gnorm: 1.21 [ 2:50:12<21:45:05] +[titan] 2025-10-05 01:24:32,230 - root - INFO - step: 4620 loss: 2.6761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3586 +[titan] 2025-10-05 01:24:32,230 - root - INFO - lr: 4.8750e-05 gnorm: 1.22 [ 2:50:23<21:44:53] +[titan] 2025-10-05 01:24:43,126 - root - INFO - step: 4625 loss: 2.6974 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:24:43,126 - root - INFO - lr: 4.8747e-05 gnorm: 1.18 [ 2:50:34<21:44:40] +[titan] 2025-10-05 01:24:54,032 - root - INFO - step: 4630 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3412 global_avg_mtp_loss: 2.5238 +[titan] 2025-10-05 01:24:54,032 - root - INFO - lr: 4.8744e-05 gnorm: 1.24 [ 2:50:45<21:44:28] +[titan] 2025-10-05 01:25:04,940 - root - INFO - step: 4635 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4155 +[titan] 2025-10-05 01:25:04,940 - root - INFO - lr: 4.8741e-05 gnorm: 1.21 [ 2:50:56<21:44:16] +[titan] 2025-10-05 01:25:15,817 - root - INFO - step: 4640 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3562 +[titan] 2025-10-05 01:25:15,817 - root - INFO - lr: 4.8739e-05 gnorm: 1.31 [ 2:51:07<21:44:03] +[titan] 2025-10-05 01:25:26,720 - root - INFO - step: 4645 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3352 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:25:26,721 - root - INFO - lr: 4.8736e-05 gnorm: 1.23 [ 2:51:18<21:43:51] +[titan] 2025-10-05 01:25:35,460 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:25:37,643 - root - INFO - step: 4650 loss: 2.6937 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3193 global_avg_mtp_loss: 2.3743 +[titan] 2025-10-05 01:25:37,643 - root - INFO - lr: 4.8733e-05 gnorm: 1.23 [ 2:51:29<21:43:39] +[titan] 2025-10-05 01:25:48,524 - root - INFO - step: 4655 loss: 2.7402 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4144 +[titan] 2025-10-05 01:25:48,525 - root - INFO - lr: 4.8730e-05 gnorm: 1.22 [ 2:51:39<21:43:26] +[titan] 2025-10-05 01:25:59,422 - root - INFO - step: 4660 loss: 2.7820 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4520 +[titan] 2025-10-05 01:25:59,422 - root - INFO - lr: 4.8727e-05 gnorm: 1.30 [ 2:51:50<21:43:14] +[titan] 2025-10-05 01:26:10,311 - root - INFO - step: 4665 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3392 global_avg_mtp_loss: 2.5074 +[titan] 2025-10-05 01:26:10,311 - root - INFO - lr: 4.8724e-05 gnorm: 1.25 [ 2:52:01<21:43:02] +[titan] 2025-10-05 01:26:21,210 - root - INFO - step: 4670 loss: 2.7305 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4066 +[titan] 2025-10-05 01:26:21,210 - root - INFO - lr: 4.8721e-05 gnorm: 1.25 [ 2:52:12<21:42:49] +[titan] 2025-10-05 01:26:32,122 - root - INFO - step: 4675 loss: 2.7530 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4264 +[titan] 2025-10-05 01:26:32,122 - root - INFO - lr: 4.8718e-05 gnorm: 1.25 [ 2:52:23<21:42:37] +[titan] 2025-10-05 01:26:43,055 - root - INFO - step: 4680 loss: 2.8067 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4732 +[titan] 2025-10-05 01:26:43,055 - root - INFO - lr: 4.8715e-05 gnorm: 1.24 [ 2:52:34<21:42:25] +[titan] 2025-10-05 01:26:53,990 - root - INFO - step: 4685 loss: 2.6707 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 01:26:53,990 - root - INFO - lr: 4.8712e-05 gnorm: 1.36 [ 2:52:45<21:42:13] +[titan] 2025-10-05 01:27:04,906 - root - INFO - step: 4690 loss: 2.7149 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3214 global_avg_mtp_loss: 2.3935 +[titan] 2025-10-05 01:27:04,906 - root - INFO - lr: 4.8709e-05 gnorm: 1.23 [ 2:52:56<21:42:01] +[titan] 2025-10-05 01:27:15,817 - root - INFO - step: 4695 loss: 2.6965 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3761 +[titan] 2025-10-05 01:27:15,817 - root - INFO - lr: 4.8706e-05 gnorm: 1.25 [ 2:53:07<21:41:49] +[titan] 2025-10-05 01:27:24,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:27:26,735 - root - INFO - step: 4700 loss: 2.7982 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 01:27:26,735 - root - INFO - lr: 4.8703e-05 gnorm: 1.22 [ 2:53:18<21:41:37] +[titan] 2025-10-05 01:27:37,672 - root - INFO - step: 4705 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.3443 global_avg_mtp_loss: 2.5494 +[titan] 2025-10-05 01:27:37,673 - root - INFO - lr: 4.8700e-05 gnorm: 1.26 [ 2:53:29<21:41:25] +[titan] 2025-10-05 01:27:48,615 - root - INFO - step: 4710 loss: 2.7471 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.3269 global_avg_mtp_loss: 2.4201 +[titan] 2025-10-05 01:27:48,616 - root - INFO - lr: 4.8697e-05 gnorm: 1.21 [ 2:53:40<21:41:13] +[titan] 2025-10-05 01:27:59,548 - root - INFO - step: 4715 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.3271 global_avg_mtp_loss: 2.4303 +[titan] 2025-10-05 01:27:59,548 - root - INFO - lr: 4.8694e-05 gnorm: 1.22 [ 2:53:51<21:41:01] +[titan] 2025-10-05 01:28:10,470 - root - INFO - step: 4720 loss: 2.8297 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3377 global_avg_mtp_loss: 2.4920 +[titan] 2025-10-05 01:28:10,471 - root - INFO - lr: 4.8691e-05 gnorm: 1.25 [ 2:54:01<21:40:49] +[titan] 2025-10-05 01:28:21,389 - root - INFO - step: 4725 loss: 2.8079 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4747 +[titan] 2025-10-05 01:28:21,389 - root - INFO - lr: 4.8688e-05 gnorm: 1.25 [ 2:54:12<21:40:36] +[titan] 2025-10-05 01:28:32,287 - root - INFO - step: 4730 loss: 2.7460 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3255 global_avg_mtp_loss: 2.4205 +[titan] 2025-10-05 01:28:32,287 - root - INFO - lr: 4.8685e-05 gnorm: 1.27 [ 2:54:23<21:40:24] +[titan] 2025-10-05 01:28:43,251 - root - INFO - step: 4735 loss: 2.6848 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.64 mfu: 41.92% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3668 +[titan] 2025-10-05 01:28:43,252 - root - INFO - lr: 4.8682e-05 gnorm: 1.24 [ 2:54:34<21:40:12] +[titan] 2025-10-05 01:28:54,171 - root - INFO - step: 4740 loss: 2.7918 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4618 +[titan] 2025-10-05 01:28:54,171 - root - INFO - lr: 4.8679e-05 gnorm: 1.32 [ 2:54:45<21:40:00] +[titan] 2025-10-05 01:29:05,077 - root - INFO - step: 4745 loss: 2.7361 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4131 +[titan] 2025-10-05 01:29:05,077 - root - INFO - lr: 4.8676e-05 gnorm: 1.29 [ 2:54:56<21:39:48] +[titan] 2025-10-05 01:29:13,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:29:15,984 - root - INFO - step: 4750 loss: 2.7499 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:29:15,984 - root - INFO - lr: 4.8673e-05 gnorm: 1.26 [ 2:55:07<21:39:36] +[titan] 2025-10-05 01:29:26,874 - root - INFO - step: 4755 loss: 2.7721 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3285 global_avg_mtp_loss: 2.4435 +[titan] 2025-10-05 01:29:26,874 - root - INFO - lr: 4.8670e-05 gnorm: 1.19 [ 2:55:18<21:39:23] +[titan] 2025-10-05 01:29:37,761 - root - INFO - step: 4760 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3353 global_avg_mtp_loss: 2.4595 +[titan] 2025-10-05 01:29:37,761 - root - INFO - lr: 4.8667e-05 gnorm: 1.22 [ 2:55:29<21:39:11] +[titan] 2025-10-05 01:29:48,663 - root - INFO - step: 4765 loss: 2.7250 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3240 global_avg_mtp_loss: 2.4010 +[titan] 2025-10-05 01:29:48,664 - root - INFO - lr: 4.8664e-05 gnorm: 1.28 [ 2:55:40<21:38:59] +[titan] 2025-10-05 01:29:59,563 - root - INFO - step: 4770 loss: 2.7157 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3221 global_avg_mtp_loss: 2.3936 +[titan] 2025-10-05 01:29:59,563 - root - INFO - lr: 4.8661e-05 gnorm: 2.78 [ 2:55:51<21:38:47] +[titan] 2025-10-05 01:30:10,469 - root - INFO - step: 4775 loss: 2.8036 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4701 +[titan] 2025-10-05 01:30:10,469 - root - INFO - lr: 4.8658e-05 gnorm: 1.25 [ 2:56:01<21:38:34] +[titan] 2025-10-05 01:30:21,348 - root - INFO - step: 4780 loss: 2.7215 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:30:21,348 - root - INFO - lr: 4.8655e-05 gnorm: 1.38 [ 2:56:12<21:38:22] +[titan] 2025-10-05 01:30:32,231 - root - INFO - step: 4785 loss: 2.7709 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4425 +[titan] 2025-10-05 01:30:32,231 - root - INFO - lr: 4.8652e-05 gnorm: 1.21 [ 2:56:23<21:38:10] +[titan] 2025-10-05 01:30:43,113 - root - INFO - step: 4790 loss: 2.7171 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.3934 +[titan] 2025-10-05 01:30:43,113 - root - INFO - lr: 4.8649e-05 gnorm: 1.19 [ 2:56:34<21:37:57] +[titan] 2025-10-05 01:30:54,053 - root - INFO - step: 4795 loss: 2.8155 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.4731 +[titan] 2025-10-05 01:30:54,054 - root - INFO - lr: 4.8646e-05 gnorm: 1.20 [ 2:56:45<21:37:45] +[titan] 2025-10-05 01:31:02,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:31:04,944 - root - INFO - step: 4800 loss: 2.7229 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4004 +[titan] 2025-10-05 01:31:04,944 - root - INFO - lr: 4.8643e-05 gnorm: 1.27 [ 2:56:56<21:37:33] +[titan] 2025-10-05 01:31:15,845 - root - INFO - step: 4805 loss: 2.7633 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4354 +[titan] 2025-10-05 01:31:15,845 - root - INFO - lr: 4.8639e-05 gnorm: 1.30 [ 2:57:07<21:37:21] +[titan] 2025-10-05 01:31:26,718 - root - INFO - step: 4810 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 01:31:26,718 - root - INFO - lr: 4.8636e-05 gnorm: 1.23 [ 2:57:18<21:37:08] +[titan] 2025-10-05 01:31:37,587 - root - INFO - step: 4815 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4100 +[titan] 2025-10-05 01:31:37,587 - root - INFO - lr: 4.8633e-05 gnorm: 1.25 [ 2:57:29<21:36:56] +[titan] 2025-10-05 01:31:48,487 - root - INFO - step: 4820 loss: 2.7752 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4454 +[titan] 2025-10-05 01:31:48,487 - root - INFO - lr: 4.8630e-05 gnorm: 1.24 [ 2:57:39<21:36:44] +[titan] 2025-10-05 01:31:59,366 - root - INFO - step: 4825 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3299 global_avg_mtp_loss: 2.4440 +[titan] 2025-10-05 01:31:59,366 - root - INFO - lr: 4.8627e-05 gnorm: 1.27 [ 2:57:50<21:36:31] +[titan] 2025-10-05 01:32:10,285 - root - INFO - step: 4830 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3289 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:32:10,285 - root - INFO - lr: 4.8624e-05 gnorm: 1.25 [ 2:58:01<21:36:19] +[titan] 2025-10-05 01:32:21,158 - root - INFO - step: 4835 loss: 2.7916 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4584 +[titan] 2025-10-05 01:32:21,158 - root - INFO - lr: 4.8621e-05 gnorm: 1.23 [ 2:58:12<21:36:07] +[titan] 2025-10-05 01:32:32,019 - root - INFO - step: 4840 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3305 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:32:32,020 - root - INFO - lr: 4.8618e-05 gnorm: 1.25 [ 2:58:23<21:35:54] +[titan] 2025-10-05 01:32:42,890 - root - INFO - step: 4845 loss: 2.7622 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4341 +[titan] 2025-10-05 01:32:42,890 - root - INFO - lr: 4.8615e-05 gnorm: 1.24 [ 2:58:34<21:35:42] +[titan] 2025-10-05 01:32:51,570 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:32:53,752 - root - INFO - step: 4850 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3209 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 01:32:53,752 - root - INFO - lr: 4.8612e-05 gnorm: 1.26 [ 2:58:45<21:35:30] +[titan] 2025-10-05 01:33:04,624 - root - INFO - step: 4855 loss: 2.7888 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4588 +[titan] 2025-10-05 01:33:04,624 - root - INFO - lr: 4.8609e-05 gnorm: 1.30 [ 2:58:56<21:35:17] +[titan] 2025-10-05 01:33:15,520 - root - INFO - step: 4860 loss: 2.6936 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3751 +[titan] 2025-10-05 01:33:15,521 - root - INFO - lr: 4.8606e-05 gnorm: 1.24 [ 2:59:06<21:35:05] +[titan] 2025-10-05 01:33:26,393 - root - INFO - step: 4865 loss: 2.8919 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3439 global_avg_mtp_loss: 2.5480 +[titan] 2025-10-05 01:33:26,393 - root - INFO - lr: 4.8603e-05 gnorm: 1.25 [ 2:59:17<21:34:53] +[titan] 2025-10-05 01:33:37,259 - root - INFO - step: 4870 loss: 2.7240 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 01:33:37,259 - root - INFO - lr: 4.8599e-05 gnorm: 1.24 [ 2:59:28<21:34:40] +[titan] 2025-10-05 01:33:48,148 - root - INFO - step: 4875 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4417 +[titan] 2025-10-05 01:33:48,148 - root - INFO - lr: 4.8596e-05 gnorm: 1.26 [ 2:59:39<21:34:28] +[titan] 2025-10-05 01:33:59,034 - root - INFO - step: 4880 loss: 2.7227 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4000 +[titan] 2025-10-05 01:33:59,035 - root - INFO - lr: 4.8593e-05 gnorm: 1.27 [ 2:59:50<21:34:16] +[titan] 2025-10-05 01:34:09,948 - root - INFO - step: 4885 loss: 2.7234 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4009 +[titan] 2025-10-05 01:34:09,948 - root - INFO - lr: 4.8590e-05 gnorm: 1.20 [ 3:00:01<21:34:04] +[titan] 2025-10-05 01:34:20,817 - root - INFO - step: 4890 loss: 2.7314 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4083 +[titan] 2025-10-05 01:34:20,818 - root - INFO - lr: 4.8587e-05 gnorm: 1.33 [ 3:00:12<21:33:51] +[titan] 2025-10-05 01:34:31,730 - root - INFO - step: 4895 loss: 2.7077 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3210 global_avg_mtp_loss: 2.3867 +[titan] 2025-10-05 01:34:31,730 - root - INFO - lr: 4.8584e-05 gnorm: 1.29 [ 3:00:23<21:33:39] +[titan] 2025-10-05 01:34:40,426 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:34:42,620 - root - INFO - step: 4900 loss: 2.7734 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3292 global_avg_mtp_loss: 2.4443 +[titan] 2025-10-05 01:34:42,620 - root - INFO - lr: 4.8581e-05 gnorm: 1.28 [ 3:00:34<21:33:27] +[titan] 2025-10-05 01:34:53,494 - root - INFO - step: 4905 loss: 2.7406 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4146 +[titan] 2025-10-05 01:34:53,495 - root - INFO - lr: 4.8578e-05 gnorm: 1.17 [ 3:00:44<21:33:14] +[titan] 2025-10-05 01:35:04,450 - root - INFO - step: 4910 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3981 +[titan] 2025-10-05 01:35:04,451 - root - INFO - lr: 4.8575e-05 gnorm: 1.20 [ 3:00:55<21:33:03] +[titan] 2025-10-05 01:35:15,335 - root - INFO - step: 4915 loss: 2.7382 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4140 +[titan] 2025-10-05 01:35:15,335 - root - INFO - lr: 4.8571e-05 gnorm: 1.28 [ 3:01:06<21:32:50] +[titan] 2025-10-05 01:35:26,233 - root - INFO - step: 4920 loss: 2.7952 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3320 global_avg_mtp_loss: 2.4631 +[titan] 2025-10-05 01:35:26,233 - root - INFO - lr: 4.8568e-05 gnorm: 1.29 [ 3:01:17<21:32:38] +[titan] 2025-10-05 01:35:37,136 - root - INFO - step: 4925 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4403 +[titan] 2025-10-05 01:35:37,136 - root - INFO - lr: 4.8565e-05 gnorm: 1.25 [ 3:01:28<21:32:26] +[titan] 2025-10-05 01:35:48,014 - root - INFO - step: 4930 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4081 +[titan] 2025-10-05 01:35:48,014 - root - INFO - lr: 4.8562e-05 gnorm: 1.21 [ 3:01:39<21:32:14] +[titan] 2025-10-05 01:35:58,895 - root - INFO - step: 4935 loss: 2.7204 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:35:58,895 - root - INFO - lr: 4.8559e-05 gnorm: 1.20 [ 3:01:50<21:32:01] +[titan] 2025-10-05 01:36:09,806 - root - INFO - step: 4940 loss: 2.7788 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:36:09,807 - root - INFO - lr: 4.8556e-05 gnorm: 1.21 [ 3:02:01<21:31:49] +[titan] 2025-10-05 01:36:20,731 - root - INFO - step: 4945 loss: 2.7547 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3304 global_avg_mtp_loss: 2.4243 +[titan] 2025-10-05 01:36:20,732 - root - INFO - lr: 4.8553e-05 gnorm: 1.23 [ 3:02:12<21:31:37] +[titan] 2025-10-05 01:36:29,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:36:31,611 - root - INFO - step: 4950 loss: 2.6438 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3129 global_avg_mtp_loss: 2.3309 +[titan] 2025-10-05 01:36:31,611 - root - INFO - lr: 4.8549e-05 gnorm: 1.20 [ 3:02:23<21:31:25] +[titan] 2025-10-05 01:36:42,497 - root - INFO - step: 4955 loss: 2.7743 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:36:42,497 - root - INFO - lr: 4.8546e-05 gnorm: 1.29 [ 3:02:33<21:31:13] +[titan] 2025-10-05 01:36:53,369 - root - INFO - step: 4960 loss: 2.7846 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4551 +[titan] 2025-10-05 01:36:53,369 - root - INFO - lr: 4.8543e-05 gnorm: 1.25 [ 3:02:44<21:31:01] +[titan] 2025-10-05 01:37:04,267 - root - INFO - step: 4965 loss: 2.8172 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3344 global_avg_mtp_loss: 2.4828 +[titan] 2025-10-05 01:37:04,267 - root - INFO - lr: 4.8540e-05 gnorm: 1.25 [ 3:02:55<21:30:48] +[titan] 2025-10-05 01:37:15,212 - root - INFO - step: 4970 loss: 2.6436 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3310 +[titan] 2025-10-05 01:37:15,212 - root - INFO - lr: 4.8537e-05 gnorm: 1.25 [ 3:03:06<21:30:37] +[titan] 2025-10-05 01:37:26,159 - root - INFO - step: 4975 loss: 2.7551 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4289 +[titan] 2025-10-05 01:37:26,159 - root - INFO - lr: 4.8534e-05 gnorm: 1.22 [ 3:03:17<21:30:25] +[titan] 2025-10-05 01:37:37,030 - root - INFO - step: 4980 loss: 2.7052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3218 global_avg_mtp_loss: 2.3834 +[titan] 2025-10-05 01:37:37,031 - root - INFO - lr: 4.8530e-05 gnorm: 1.26 [ 3:03:28<21:30:12] +[titan] 2025-10-05 01:37:47,943 - root - INFO - step: 4985 loss: 2.7357 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4094 +[titan] 2025-10-05 01:37:47,944 - root - INFO - lr: 4.8527e-05 gnorm: 1.27 [ 3:03:39<21:30:00] +[titan] 2025-10-05 01:37:58,856 - root - INFO - step: 4990 loss: 2.7950 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4628 +[titan] 2025-10-05 01:37:58,857 - root - INFO - lr: 4.8524e-05 gnorm: 1.22 [ 3:03:50<21:29:48] +[titan] 2025-10-05 01:38:09,823 - root - INFO - step: 4995 loss: 2.7375 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.3261 global_avg_mtp_loss: 2.4114 +[titan] 2025-10-05 01:38:09,823 - root - INFO - lr: 4.8521e-05 gnorm: 1.18 [ 3:04:01<21:29:37] +[titan] 2025-10-05 01:38:18,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:38:20,753 - root - INFO - step: 5000 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3200 global_avg_mtp_loss: 2.3792 +[titan] 2025-10-05 01:38:20,753 - root - INFO - lr: 4.8518e-05 gnorm: 1.26 [ 3:04:12<21:29:25] +[titan] 2025-10-05 01:38:20,753 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 01:38:42,127 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 01:38:42,128 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 21.37 seconds. +[titan] 2025-10-05 01:40:51,998 - root - INFO - step: 5005 loss: 2.7858 memory: 118.84GiB(85.28%) tps: 2,167 tflops: 30.06 mfu: 3.04% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:40:51,998 - root - INFO - lr: 4.8515e-05 gnorm: 1.27 [ 3:06:43<21:45:34] +[titan] 2025-10-05 01:41:02,796 - root - INFO - step: 5010 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3270 global_avg_mtp_loss: 2.4291 +[titan] 2025-10-05 01:41:02,796 - root - INFO - lr: 4.8511e-05 gnorm: 1.34 [ 3:06:54<21:45:20] +[titan] 2025-10-05 01:41:13,614 - root - INFO - step: 5015 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.3283 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:41:13,614 - root - INFO - lr: 4.8508e-05 gnorm: 1.32 [ 3:07:05<21:45:06] +[titan] 2025-10-05 01:41:24,485 - root - INFO - step: 5020 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4305 +[titan] 2025-10-05 01:41:24,485 - root - INFO - lr: 4.8505e-05 gnorm: 1.31 [ 3:07:15<21:44:53] +[titan] 2025-10-05 01:41:35,321 - root - INFO - step: 5025 loss: 2.7060 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3836 +[titan] 2025-10-05 01:41:35,321 - root - INFO - lr: 4.8502e-05 gnorm: 1.27 [ 3:07:26<21:44:39] +[titan] 2025-10-05 01:41:46,205 - root - INFO - step: 5030 loss: 2.7304 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3247 global_avg_mtp_loss: 2.4057 +[titan] 2025-10-05 01:41:46,205 - root - INFO - lr: 4.8499e-05 gnorm: 1.28 [ 3:07:37<21:44:26] +[titan] 2025-10-05 01:41:57,092 - root - INFO - step: 5035 loss: 2.7485 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4237 +[titan] 2025-10-05 01:41:57,093 - root - INFO - lr: 4.8495e-05 gnorm: 1.26 [ 3:07:48<21:44:13] +[titan] 2025-10-05 01:42:08,008 - root - INFO - step: 5040 loss: 2.7641 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4360 +[titan] 2025-10-05 01:42:08,008 - root - INFO - lr: 4.8492e-05 gnorm: 1.18 [ 3:07:59<21:43:59] +[titan] 2025-10-05 01:42:18,888 - root - INFO - step: 5045 loss: 2.6254 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3136 +[titan] 2025-10-05 01:42:18,888 - root - INFO - lr: 4.8489e-05 gnorm: 1.29 [ 3:08:10<21:43:46] +[titan] 2025-10-05 01:42:27,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:42:29,897 - root - INFO - step: 5050 loss: 2.7825 memory: 118.84GiB(85.28%) tps: 29,766 tflops: 412.96 mfu: 41.75% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4531 +[titan] 2025-10-05 01:42:29,897 - root - INFO - lr: 4.8486e-05 gnorm: 1.24 [ 3:08:21<21:43:34] +[titan] 2025-10-05 01:42:40,766 - root - INFO - step: 5055 loss: 2.7808 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4505 +[titan] 2025-10-05 01:42:40,766 - root - INFO - lr: 4.8483e-05 gnorm: 1.22 [ 3:08:32<21:43:20] +[titan] 2025-10-05 01:42:51,649 - root - INFO - step: 5060 loss: 2.6497 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3358 +[titan] 2025-10-05 01:42:51,649 - root - INFO - lr: 4.8479e-05 gnorm: 1.25 [ 3:08:43<21:43:07] +[titan] 2025-10-05 01:43:02,533 - root - INFO - step: 5065 loss: 2.7482 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 01:43:02,533 - root - INFO - lr: 4.8476e-05 gnorm: 1.21 [ 3:08:53<21:42:54] +[titan] 2025-10-05 01:43:13,418 - root - INFO - step: 5070 loss: 2.8515 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3494 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:43:13,418 - root - INFO - lr: 4.8473e-05 gnorm: 1.24 [ 3:09:04<21:42:40] +[titan] 2025-10-05 01:43:24,295 - root - INFO - step: 5075 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3955 +[titan] 2025-10-05 01:43:24,295 - root - INFO - lr: 4.8470e-05 gnorm: 1.23 [ 3:09:15<21:42:27] +[titan] 2025-10-05 01:43:35,165 - root - INFO - step: 5080 loss: 2.6731 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3557 +[titan] 2025-10-05 01:43:35,166 - root - INFO - lr: 4.8466e-05 gnorm: 1.24 [ 3:09:26<21:42:14] +[titan] 2025-10-05 01:43:46,043 - root - INFO - step: 5085 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 01:43:46,043 - root - INFO - lr: 4.8463e-05 gnorm: 1.24 [ 3:09:37<21:42:00] +[titan] 2025-10-05 01:43:56,916 - root - INFO - step: 5090 loss: 2.7316 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4079 +[titan] 2025-10-05 01:43:56,916 - root - INFO - lr: 4.8460e-05 gnorm: 1.35 [ 3:09:48<21:41:47] +[titan] 2025-10-05 01:44:07,778 - root - INFO - step: 5095 loss: 2.7611 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4327 +[titan] 2025-10-05 01:44:07,778 - root - INFO - lr: 4.8457e-05 gnorm: 1.27 [ 3:09:59<21:41:34] +[titan] 2025-10-05 01:44:16,486 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:44:18,671 - root - INFO - step: 5100 loss: 2.6824 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3640 +[titan] 2025-10-05 01:44:18,671 - root - INFO - lr: 4.8453e-05 gnorm: 1.28 [ 3:10:10<21:41:20] +[titan] 2025-10-05 01:44:29,534 - root - INFO - step: 5105 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.4782 +[titan] 2025-10-05 01:44:29,534 - root - INFO - lr: 4.8450e-05 gnorm: 1.26 [ 3:10:20<21:41:07] +[titan] 2025-10-05 01:44:40,413 - root - INFO - step: 5110 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.3923 +[titan] 2025-10-05 01:44:40,413 - root - INFO - lr: 4.8447e-05 gnorm: 1.23 [ 3:10:31<21:40:54] +[titan] 2025-10-05 01:44:51,299 - root - INFO - step: 5115 loss: 2.6959 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3758 +[titan] 2025-10-05 01:44:51,300 - root - INFO - lr: 4.8444e-05 gnorm: 1.26 [ 3:10:42<21:40:40] +[titan] 2025-10-05 01:45:02,275 - root - INFO - step: 5120 loss: 2.7516 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.21 mfu: 41.88% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4257 +[titan] 2025-10-05 01:45:02,275 - root - INFO - lr: 4.8440e-05 gnorm: 1.21 [ 3:10:53<21:40:28] +[titan] 2025-10-05 01:45:02,450 - root - INFO - Dumping profiler traces at step 5120 +[titan] 2025-10-05 01:45:02,490 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:45:13,379 - root - INFO - step: 5125 loss: 2.7714 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4427 +[titan] 2025-10-05 01:45:13,379 - root - INFO - lr: 4.8437e-05 gnorm: 1.24 [ 3:11:04<21:40:16] +[titan] 2025-10-05 01:45:24,262 - root - INFO - step: 5130 loss: 2.6786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3612 +[titan] 2025-10-05 01:45:24,263 - root - INFO - lr: 4.8434e-05 gnorm: 1.22 [ 3:11:15<21:40:03] +[titan] 2025-10-05 01:45:35,196 - root - INFO - step: 5135 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4702 +[titan] 2025-10-05 01:45:35,196 - root - INFO - lr: 4.8431e-05 gnorm: 1.27 [ 3:11:26<21:39:50] +[titan] 2025-10-05 01:45:46,094 - root - INFO - step: 5140 loss: 2.7216 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3233 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:45:46,094 - root - INFO - lr: 4.8427e-05 gnorm: 1.26 [ 3:11:37<21:39:37] +[titan] 2025-10-05 01:45:56,991 - root - INFO - step: 5145 loss: 2.7084 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3869 +[titan] 2025-10-05 01:45:56,991 - root - INFO - lr: 4.8424e-05 gnorm: 1.23 [ 3:11:48<21:39:24] +[titan] 2025-10-05 01:46:05,683 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:46:07,870 - root - INFO - step: 5150 loss: 2.7550 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4284 +[titan] 2025-10-05 01:46:07,870 - root - INFO - lr: 4.8421e-05 gnorm: 1.28 [ 3:11:59<21:39:10] +[titan] 2025-10-05 01:46:18,768 - root - INFO - step: 5155 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3142 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 01:46:18,768 - root - INFO - lr: 4.8417e-05 gnorm: 1.20 [ 3:12:10<21:38:57] +[titan] 2025-10-05 01:46:29,716 - root - INFO - step: 5160 loss: 2.7141 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3918 +[titan] 2025-10-05 01:46:29,716 - root - INFO - lr: 4.8414e-05 gnorm: 1.22 [ 3:12:21<21:38:45] +[titan] 2025-10-05 01:46:40,611 - root - INFO - step: 5165 loss: 2.7431 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3245 global_avg_mtp_loss: 2.4185 +[titan] 2025-10-05 01:46:40,611 - root - INFO - lr: 4.8411e-05 gnorm: 1.18 [ 3:12:32<21:38:31] +[titan] 2025-10-05 01:46:51,503 - root - INFO - step: 5170 loss: 2.6610 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 01:46:51,503 - root - INFO - lr: 4.8408e-05 gnorm: 1.21 [ 3:12:42<21:38:18] +[titan] 2025-10-05 01:47:02,418 - root - INFO - step: 5175 loss: 2.7319 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4077 +[titan] 2025-10-05 01:47:02,418 - root - INFO - lr: 4.8404e-05 gnorm: 1.21 [ 3:12:53<21:38:05] +[titan] 2025-10-05 01:47:13,333 - root - INFO - step: 5180 loss: 2.7303 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:47:13,333 - root - INFO - lr: 4.8401e-05 gnorm: 1.24 [ 3:13:04<21:37:52] +[titan] 2025-10-05 01:47:24,247 - root - INFO - step: 5185 loss: 2.6746 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 01:47:24,247 - root - INFO - lr: 4.8398e-05 gnorm: 1.22 [ 3:13:15<21:37:39] +[titan] 2025-10-05 01:47:35,216 - root - INFO - step: 5190 loss: 2.7738 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4456 +[titan] 2025-10-05 01:47:35,216 - root - INFO - lr: 4.8394e-05 gnorm: 1.31 [ 3:13:26<21:37:27] +[titan] 2025-10-05 01:47:46,124 - root - INFO - step: 5195 loss: 2.8394 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3384 global_avg_mtp_loss: 2.5009 +[titan] 2025-10-05 01:47:46,124 - root - INFO - lr: 4.8391e-05 gnorm: 1.27 [ 3:13:37<21:37:14] +[titan] 2025-10-05 01:47:54,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:47:57,027 - root - INFO - step: 5200 loss: 2.7263 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4026 +[titan] 2025-10-05 01:47:57,027 - root - INFO - lr: 4.8388e-05 gnorm: 1.24 [ 3:13:48<21:37:01] +[titan] 2025-10-05 01:48:07,915 - root - INFO - step: 5205 loss: 2.7277 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.4038 +[titan] 2025-10-05 01:48:07,915 - root - INFO - lr: 4.8384e-05 gnorm: 1.21 [ 3:13:59<21:36:47] +[titan] 2025-10-05 01:48:18,830 - root - INFO - step: 5210 loss: 2.6835 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.3172 global_avg_mtp_loss: 2.3663 +[titan] 2025-10-05 01:48:18,830 - root - INFO - lr: 4.8381e-05 gnorm: 1.22 [ 3:14:10<21:36:35] +[titan] 2025-10-05 01:48:29,733 - root - INFO - step: 5215 loss: 2.6886 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3701 +[titan] 2025-10-05 01:48:29,733 - root - INFO - lr: 4.8378e-05 gnorm: 1.23 [ 3:14:21<21:36:21] +[titan] 2025-10-05 01:48:40,645 - root - INFO - step: 5220 loss: 2.7098 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 01:48:40,645 - root - INFO - lr: 4.8374e-05 gnorm: 1.25 [ 3:14:32<21:36:09] +[titan] 2025-10-05 01:48:51,536 - root - INFO - step: 5225 loss: 2.8169 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4830 +[titan] 2025-10-05 01:48:51,536 - root - INFO - lr: 4.8371e-05 gnorm: 1.24 [ 3:14:42<21:35:55] +[titan] 2025-10-05 01:49:02,433 - root - INFO - step: 5230 loss: 2.7455 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4197 +[titan] 2025-10-05 01:49:02,433 - root - INFO - lr: 4.8368e-05 gnorm: 1.26 [ 3:14:53<21:35:42] +[titan] 2025-10-05 01:49:13,324 - root - INFO - step: 5235 loss: 2.7873 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.3324 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:49:13,325 - root - INFO - lr: 4.8364e-05 gnorm: 1.21 [ 3:15:04<21:35:29] +[titan] 2025-10-05 01:49:24,205 - root - INFO - step: 5240 loss: 2.6851 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3675 +[titan] 2025-10-05 01:49:24,206 - root - INFO - lr: 4.8361e-05 gnorm: 1.22 [ 3:15:15<21:35:16] +[titan] 2025-10-05 01:49:35,124 - root - INFO - step: 5245 loss: 2.7664 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:49:35,124 - root - INFO - lr: 4.8358e-05 gnorm: 1.24 [ 3:15:26<21:35:03] +[titan] 2025-10-05 01:49:43,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:49:45,992 - root - INFO - step: 5250 loss: 2.7297 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4047 +[titan] 2025-10-05 01:49:45,992 - root - INFO - lr: 4.8354e-05 gnorm: 1.29 [ 3:15:37<21:34:50] +[titan] 2025-10-05 01:49:56,896 - root - INFO - step: 5255 loss: 2.7151 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3928 +[titan] 2025-10-05 01:49:56,896 - root - INFO - lr: 4.8351e-05 gnorm: 1.29 [ 3:15:48<21:34:37] +[titan] 2025-10-05 01:50:07,763 - root - INFO - step: 5260 loss: 2.7886 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3308 global_avg_mtp_loss: 2.4578 +[titan] 2025-10-05 01:50:07,763 - root - INFO - lr: 4.8348e-05 gnorm: 1.36 [ 3:15:59<21:34:24] +[titan] 2025-10-05 01:50:18,645 - root - INFO - step: 5265 loss: 2.6117 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3012 +[titan] 2025-10-05 01:50:18,645 - root - INFO - lr: 4.8344e-05 gnorm: 1.24 [ 3:16:10<21:34:11] +[titan] 2025-10-05 01:50:29,515 - root - INFO - step: 5270 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4451 +[titan] 2025-10-05 01:50:29,516 - root - INFO - lr: 4.8341e-05 gnorm: 1.24 [ 3:16:20<21:33:57] +[titan] 2025-10-05 01:50:40,456 - root - INFO - step: 5275 loss: 2.7065 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.01% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3874 +[titan] 2025-10-05 01:50:40,457 - root - INFO - lr: 4.8338e-05 gnorm: 1.25 [ 3:16:31<21:33:45] +[titan] 2025-10-05 01:50:51,334 - root - INFO - step: 5280 loss: 2.7674 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4384 +[titan] 2025-10-05 01:50:51,334 - root - INFO - lr: 4.8334e-05 gnorm: 1.25 [ 3:16:42<21:33:31] +[titan] 2025-10-05 01:51:02,214 - root - INFO - step: 5285 loss: 2.6660 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3493 +[titan] 2025-10-05 01:51:02,214 - root - INFO - lr: 4.8331e-05 gnorm: 1.20 [ 3:16:53<21:33:18] +[titan] 2025-10-05 01:51:13,075 - root - INFO - step: 5290 loss: 2.7457 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4189 +[titan] 2025-10-05 01:51:13,075 - root - INFO - lr: 4.8327e-05 gnorm: 1.25 [ 3:17:04<21:33:05] +[titan] 2025-10-05 01:51:23,938 - root - INFO - step: 5295 loss: 2.7299 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:51:23,938 - root - INFO - lr: 4.8324e-05 gnorm: 1.18 [ 3:17:15<21:32:52] +[titan] 2025-10-05 01:51:32,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:51:34,836 - root - INFO - step: 5300 loss: 2.7577 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4310 +[titan] 2025-10-05 01:51:34,836 - root - INFO - lr: 4.8321e-05 gnorm: 1.27 [ 3:17:26<21:32:39] +[titan] 2025-10-05 01:51:45,732 - root - INFO - step: 5305 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.4411 +[titan] 2025-10-05 01:51:45,732 - root - INFO - lr: 4.8317e-05 gnorm: 1.28 [ 3:17:37<21:32:26] +[titan] 2025-10-05 01:51:56,598 - root - INFO - step: 5310 loss: 2.6649 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3486 +[titan] 2025-10-05 01:51:56,598 - root - INFO - lr: 4.8314e-05 gnorm: 1.25 [ 3:17:48<21:32:13] +[titan] 2025-10-05 01:52:07,463 - root - INFO - step: 5315 loss: 2.6130 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3093 global_avg_mtp_loss: 2.3037 +[titan] 2025-10-05 01:52:07,463 - root - INFO - lr: 4.8311e-05 gnorm: 1.23 [ 3:17:58<21:31:59] +[titan] 2025-10-05 01:52:18,354 - root - INFO - step: 5320 loss: 2.7768 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4481 +[titan] 2025-10-05 01:52:18,354 - root - INFO - lr: 4.8307e-05 gnorm: 1.31 [ 3:18:09<21:31:46] +[titan] 2025-10-05 01:52:29,236 - root - INFO - step: 5325 loss: 2.7143 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3924 +[titan] 2025-10-05 01:52:29,236 - root - INFO - lr: 4.8304e-05 gnorm: 1.21 [ 3:18:20<21:31:33] +[titan] 2025-10-05 01:52:40,146 - root - INFO - step: 5330 loss: 2.7556 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4290 +[titan] 2025-10-05 01:52:40,146 - root - INFO - lr: 4.8300e-05 gnorm: 1.27 [ 3:18:31<21:31:20] +[titan] 2025-10-05 01:52:51,044 - root - INFO - step: 5335 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4168 +[titan] 2025-10-05 01:52:51,044 - root - INFO - lr: 4.8297e-05 gnorm: 1.26 [ 3:18:42<21:31:07] +[titan] 2025-10-05 01:53:01,911 - root - INFO - step: 5340 loss: 2.7097 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3206 global_avg_mtp_loss: 2.3891 +[titan] 2025-10-05 01:53:01,911 - root - INFO - lr: 4.8294e-05 gnorm: 1.30 [ 3:18:53<21:30:54] +[titan] 2025-10-05 01:53:12,786 - root - INFO - step: 5345 loss: 2.6651 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3501 +[titan] 2025-10-05 01:53:12,787 - root - INFO - lr: 4.8290e-05 gnorm: 1.21 [ 3:19:04<21:30:41] +[titan] 2025-10-05 01:53:21,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:53:23,680 - root - INFO - step: 5350 loss: 2.7279 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3226 global_avg_mtp_loss: 2.4053 +[titan] 2025-10-05 01:53:23,680 - root - INFO - lr: 4.8287e-05 gnorm: 1.24 [ 3:19:15<21:30:28] +[titan] 2025-10-05 01:53:34,600 - root - INFO - step: 5355 loss: 2.6227 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 01:53:34,600 - root - INFO - lr: 4.8283e-05 gnorm: 1.28 [ 3:19:26<21:30:15] +[titan] 2025-10-05 01:53:45,495 - root - INFO - step: 5360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4548 +[titan] 2025-10-05 01:53:45,495 - root - INFO - lr: 4.8280e-05 gnorm: 1.23 [ 3:19:36<21:30:02] +[titan] 2025-10-05 01:53:56,371 - root - INFO - step: 5365 loss: 2.7914 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4555 +[titan] 2025-10-05 01:53:56,372 - root - INFO - lr: 4.8276e-05 gnorm: 1.23 [ 3:19:47<21:29:49] +[titan] 2025-10-05 01:54:07,246 - root - INFO - step: 5370 loss: 2.6816 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3627 +[titan] 2025-10-05 01:54:07,246 - root - INFO - lr: 4.8273e-05 gnorm: 1.23 [ 3:19:58<21:29:36] +[titan] 2025-10-05 01:54:18,130 - root - INFO - step: 5375 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3272 +[titan] 2025-10-05 01:54:18,130 - root - INFO - lr: 4.8270e-05 gnorm: 1.27 [ 3:20:09<21:29:23] +[titan] 2025-10-05 01:54:28,973 - root - INFO - step: 5380 loss: 2.7116 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3899 +[titan] 2025-10-05 01:54:28,973 - root - INFO - lr: 4.8266e-05 gnorm: 1.23 [ 3:20:20<21:29:10] +[titan] 2025-10-05 01:54:39,864 - root - INFO - step: 5385 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3764 +[titan] 2025-10-05 01:54:39,864 - root - INFO - lr: 4.8263e-05 gnorm: 1.24 [ 3:20:31<21:28:57] +[titan] 2025-10-05 01:54:50,734 - root - INFO - step: 5390 loss: 2.7644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4366 +[titan] 2025-10-05 01:54:50,735 - root - INFO - lr: 4.8259e-05 gnorm: 1.25 [ 3:20:42<21:28:44] +[titan] 2025-10-05 01:55:01,593 - root - INFO - step: 5395 loss: 2.7603 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3272 global_avg_mtp_loss: 2.4331 +[titan] 2025-10-05 01:55:01,593 - root - INFO - lr: 4.8256e-05 gnorm: 1.21 [ 3:20:52<21:28:31] +[titan] 2025-10-05 01:55:10,273 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:55:12,472 - root - INFO - step: 5400 loss: 2.7045 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3833 +[titan] 2025-10-05 01:55:12,472 - root - INFO - lr: 4.8252e-05 gnorm: 1.20 [ 3:21:03<21:28:18] +[titan] 2025-10-05 01:55:23,346 - root - INFO - step: 5405 loss: 2.7062 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3213 global_avg_mtp_loss: 2.3849 +[titan] 2025-10-05 01:55:23,346 - root - INFO - lr: 4.8249e-05 gnorm: 1.20 [ 3:21:14<21:28:05] +[titan] 2025-10-05 01:55:34,207 - root - INFO - step: 5410 loss: 2.7345 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4118 +[titan] 2025-10-05 01:55:34,207 - root - INFO - lr: 4.8245e-05 gnorm: 1.26 [ 3:21:25<21:27:51] +[titan] 2025-10-05 01:55:45,114 - root - INFO - step: 5415 loss: 2.6787 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3618 +[titan] 2025-10-05 01:55:45,114 - root - INFO - lr: 4.8242e-05 gnorm: 1.16 [ 3:21:36<21:27:39] +[titan] 2025-10-05 01:55:55,985 - root - INFO - step: 5420 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.2994 +[titan] 2025-10-05 01:55:55,985 - root - INFO - lr: 4.8239e-05 gnorm: 1.23 [ 3:21:47<21:27:25] +[titan] 2025-10-05 01:56:06,858 - root - INFO - step: 5425 loss: 2.6262 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 01:56:06,858 - root - INFO - lr: 4.8235e-05 gnorm: 1.20 [ 3:21:58<21:27:12] +[titan] 2025-10-05 01:56:17,752 - root - INFO - step: 5430 loss: 2.6880 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3706 +[titan] 2025-10-05 01:56:17,752 - root - INFO - lr: 4.8232e-05 gnorm: 1.21 [ 3:22:09<21:26:59] +[titan] 2025-10-05 01:56:28,647 - root - INFO - step: 5435 loss: 2.6104 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 01:56:28,647 - root - INFO - lr: 4.8228e-05 gnorm: 1.24 [ 3:22:20<21:26:47] +[titan] 2025-10-05 01:56:39,549 - root - INFO - step: 5440 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3753 +[titan] 2025-10-05 01:56:39,550 - root - INFO - lr: 4.8225e-05 gnorm: 1.24 [ 3:22:30<21:26:34] +[titan] 2025-10-05 01:56:50,425 - root - INFO - step: 5445 loss: 2.7005 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 01:56:50,426 - root - INFO - lr: 4.8221e-05 gnorm: 1.24 [ 3:22:41<21:26:21] +[titan] 2025-10-05 01:56:59,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:57:01,307 - root - INFO - step: 5450 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3211 global_avg_mtp_loss: 2.3942 +[titan] 2025-10-05 01:57:01,307 - root - INFO - lr: 4.8218e-05 gnorm: 1.22 [ 3:22:52<21:26:08] +[titan] 2025-10-05 01:57:12,168 - root - INFO - step: 5455 loss: 2.7238 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4003 +[titan] 2025-10-05 01:57:12,168 - root - INFO - lr: 4.8214e-05 gnorm: 1.25 [ 3:23:03<21:25:55] +[titan] 2025-10-05 01:57:23,004 - root - INFO - step: 5460 loss: 2.7013 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 01:57:23,004 - root - INFO - lr: 4.8211e-05 gnorm: 1.21 [ 3:23:14<21:25:41] +[titan] 2025-10-05 01:57:33,870 - root - INFO - step: 5465 loss: 2.7566 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:57:33,870 - root - INFO - lr: 4.8207e-05 gnorm: 1.28 [ 3:23:25<21:25:28] +[titan] 2025-10-05 01:57:44,735 - root - INFO - step: 5470 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 01:57:44,736 - root - INFO - lr: 4.8204e-05 gnorm: 3.95 [ 3:23:36<21:25:15] +[titan] 2025-10-05 01:57:55,597 - root - INFO - step: 5475 loss: 2.7332 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4064 +[titan] 2025-10-05 01:57:55,597 - root - INFO - lr: 4.8200e-05 gnorm: 5.60 [ 3:23:46<21:25:02] +[titan] 2025-10-05 01:58:06,457 - root - INFO - step: 5480 loss: 2.6333 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3196 +[titan] 2025-10-05 01:58:06,457 - root - INFO - lr: 4.8197e-05 gnorm: 1.42 [ 3:23:57<21:24:49] +[titan] 2025-10-05 01:58:17,326 - root - INFO - step: 5485 loss: 2.6808 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3195 global_avg_mtp_loss: 2.3613 +[titan] 2025-10-05 01:58:17,327 - root - INFO - lr: 4.8193e-05 gnorm: 1.64 [ 3:24:08<21:24:36] +[titan] 2025-10-05 01:58:28,172 - root - INFO - step: 5490 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3789 +[titan] 2025-10-05 01:58:28,172 - root - INFO - lr: 4.8190e-05 gnorm: 1.44 [ 3:24:19<21:24:23] +[titan] 2025-10-05 01:58:39,061 - root - INFO - step: 5495 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:58:39,061 - root - INFO - lr: 4.8186e-05 gnorm: 1.37 [ 3:24:30<21:24:10] +[titan] 2025-10-05 01:58:47,779 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:58:49,967 - root - INFO - step: 5500 loss: 2.7427 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4165 +[titan] 2025-10-05 01:58:49,967 - root - INFO - lr: 4.8183e-05 gnorm: 1.30 [ 3:24:41<21:23:57] +[titan] 2025-10-05 01:59:00,823 - root - INFO - step: 5505 loss: 2.7373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4120 +[titan] 2025-10-05 01:59:00,823 - root - INFO - lr: 4.8179e-05 gnorm: 1.29 [ 3:24:52<21:23:44] +[titan] 2025-10-05 01:59:11,693 - root - INFO - step: 5510 loss: 2.6666 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3511 +[titan] 2025-10-05 01:59:11,693 - root - INFO - lr: 4.8176e-05 gnorm: 1.30 [ 3:25:03<21:23:31] +[titan] 2025-10-05 01:59:22,587 - root - INFO - step: 5515 loss: 2.7189 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.3938 +[titan] 2025-10-05 01:59:22,588 - root - INFO - lr: 4.8172e-05 gnorm: 6.71 [ 3:25:13<21:23:18] +[titan] 2025-10-05 01:59:33,471 - root - INFO - step: 5520 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3125 global_avg_mtp_loss: 2.3281 +[titan] 2025-10-05 01:59:33,471 - root - INFO - lr: 4.8169e-05 gnorm: 1.27 [ 3:25:24<21:23:05] +[titan] 2025-10-05 01:59:44,386 - root - INFO - step: 5525 loss: 2.6236 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3140 +[titan] 2025-10-05 01:59:44,386 - root - INFO - lr: 4.8165e-05 gnorm: 1.21 [ 3:25:35<21:22:52] +[titan] 2025-10-05 01:59:55,268 - root - INFO - step: 5530 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 01:59:55,268 - root - INFO - lr: 4.8162e-05 gnorm: 1.23 [ 3:25:46<21:22:40] +[titan] 2025-10-05 02:00:06,139 - root - INFO - step: 5535 loss: 2.6010 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3104 global_avg_mtp_loss: 2.2906 +[titan] 2025-10-05 02:00:06,139 - root - INFO - lr: 4.8158e-05 gnorm: 1.28 [ 3:25:57<21:22:27] +[titan] 2025-10-05 02:00:17,012 - root - INFO - step: 5540 loss: 2.6903 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 02:00:17,013 - root - INFO - lr: 4.8155e-05 gnorm: 1.28 [ 3:26:08<21:22:14] +[titan] 2025-10-05 02:00:27,882 - root - INFO - step: 5545 loss: 2.6624 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3482 +[titan] 2025-10-05 02:00:27,882 - root - INFO - lr: 4.8151e-05 gnorm: 1.25 [ 3:26:19<21:22:01] +[titan] 2025-10-05 02:00:36,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:00:38,754 - root - INFO - step: 5550 loss: 2.6437 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3307 +[titan] 2025-10-05 02:00:38,754 - root - INFO - lr: 4.8147e-05 gnorm: 1.23 [ 3:26:30<21:21:48] +[titan] 2025-10-05 02:00:49,688 - root - INFO - step: 5555 loss: 2.6840 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:00:49,689 - root - INFO - lr: 4.8144e-05 gnorm: 1.21 [ 3:26:41<21:21:35] +[titan] 2025-10-05 02:01:00,569 - root - INFO - step: 5560 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3569 +[titan] 2025-10-05 02:01:00,569 - root - INFO - lr: 4.8140e-05 gnorm: 1.21 [ 3:26:51<21:21:22] +[titan] 2025-10-05 02:01:11,488 - root - INFO - step: 5565 loss: 2.6609 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 02:01:11,488 - root - INFO - lr: 4.8137e-05 gnorm: 1.24 [ 3:27:02<21:21:09] +[titan] 2025-10-05 02:01:22,384 - root - INFO - step: 5570 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.3978 +[titan] 2025-10-05 02:01:22,384 - root - INFO - lr: 4.8133e-05 gnorm: 1.26 [ 3:27:13<21:20:57] +[titan] 2025-10-05 02:01:33,286 - root - INFO - step: 5575 loss: 2.6770 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3597 +[titan] 2025-10-05 02:01:33,287 - root - INFO - lr: 4.8130e-05 gnorm: 1.23 [ 3:27:24<21:20:44] +[titan] 2025-10-05 02:01:44,187 - root - INFO - step: 5580 loss: 2.6684 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.3160 global_avg_mtp_loss: 2.3524 +[titan] 2025-10-05 02:01:44,187 - root - INFO - lr: 4.8126e-05 gnorm: 1.22 [ 3:27:35<21:20:31] +[titan] 2025-10-05 02:01:55,071 - root - INFO - step: 5585 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3821 +[titan] 2025-10-05 02:01:55,072 - root - INFO - lr: 4.8123e-05 gnorm: 1.23 [ 3:27:46<21:20:18] +[titan] 2025-10-05 02:02:05,953 - root - INFO - step: 5590 loss: 2.7020 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 02:02:05,954 - root - INFO - lr: 4.8119e-05 gnorm: 1.29 [ 3:27:57<21:20:05] +[titan] 2025-10-05 02:02:16,866 - root - INFO - step: 5595 loss: 2.6621 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3458 +[titan] 2025-10-05 02:02:16,866 - root - INFO - lr: 4.8115e-05 gnorm: 1.25 [ 3:28:08<21:19:53] +[titan] 2025-10-05 02:02:25,554 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:02:27,738 - root - INFO - step: 5600 loss: 2.7026 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3818 +[titan] 2025-10-05 02:02:27,739 - root - INFO - lr: 4.8112e-05 gnorm: 1.26 [ 3:28:19<21:19:40] +[titan] 2025-10-05 02:02:38,604 - root - INFO - step: 5605 loss: 2.6192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:02:38,604 - root - INFO - lr: 4.8108e-05 gnorm: 1.25 [ 3:28:29<21:19:27] +[titan] 2025-10-05 02:02:49,527 - root - INFO - step: 5610 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3444 +[titan] 2025-10-05 02:02:49,527 - root - INFO - lr: 4.8105e-05 gnorm: 1.32 [ 3:28:40<21:19:14] +[titan] 2025-10-05 02:03:00,407 - root - INFO - step: 5615 loss: 2.6727 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3552 +[titan] 2025-10-05 02:03:00,408 - root - INFO - lr: 4.8101e-05 gnorm: 1.19 [ 3:28:51<21:19:01] +[titan] 2025-10-05 02:03:11,293 - root - INFO - step: 5620 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3098 +[titan] 2025-10-05 02:03:11,293 - root - INFO - lr: 4.8097e-05 gnorm: 1.24 [ 3:29:02<21:18:48] +[titan] 2025-10-05 02:03:22,216 - root - INFO - step: 5625 loss: 2.6235 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 02:03:22,216 - root - INFO - lr: 4.8094e-05 gnorm: 1.21 [ 3:29:13<21:18:36] +[titan] 2025-10-05 02:03:33,165 - root - INFO - step: 5630 loss: 2.7089 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3881 +[titan] 2025-10-05 02:03:33,165 - root - INFO - lr: 4.8090e-05 gnorm: 1.27 [ 3:29:24<21:18:23] +[titan] 2025-10-05 02:03:37,701 - root - INFO - Dumping profiler traces at step 5632 +[titan] 2025-10-05 02:03:37,740 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:03:44,274 - root - INFO - step: 5635 loss: 2.6796 memory: 118.84GiB(85.28%) tps: 29,497 tflops: 409.23 mfu: 41.38% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3615 +[titan] 2025-10-05 02:03:44,274 - root - INFO - lr: 4.8087e-05 gnorm: 1.25 [ 3:29:35<21:18:12] +[titan] 2025-10-05 02:03:55,158 - root - INFO - step: 5640 loss: 2.6061 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3075 global_avg_mtp_loss: 2.2987 +[titan] 2025-10-05 02:03:55,158 - root - INFO - lr: 4.8083e-05 gnorm: 1.25 [ 3:29:46<21:17:59] +[titan] 2025-10-05 02:04:06,053 - root - INFO - step: 5645 loss: 2.7125 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3908 +[titan] 2025-10-05 02:04:06,053 - root - INFO - lr: 4.8079e-05 gnorm: 1.34 [ 3:29:57<21:17:46] +[titan] 2025-10-05 02:04:14,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:04:16,937 - root - INFO - step: 5650 loss: 2.5977 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:04:16,937 - root - INFO - lr: 4.8076e-05 gnorm: 1.27 [ 3:30:08<21:17:34] +[titan] 2025-10-05 02:04:27,853 - root - INFO - step: 5655 loss: 2.6416 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3299 +[titan] 2025-10-05 02:04:27,854 - root - INFO - lr: 4.8072e-05 gnorm: 1.30 [ 3:30:19<21:17:21] +[titan] 2025-10-05 02:04:38,772 - root - INFO - step: 5660 loss: 2.7230 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 02:04:38,772 - root - INFO - lr: 4.8069e-05 gnorm: 1.24 [ 3:30:30<21:17:08] +[titan] 2025-10-05 02:04:49,685 - root - INFO - step: 5665 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3829 +[titan] 2025-10-05 02:04:49,685 - root - INFO - lr: 4.8065e-05 gnorm: 1.26 [ 3:30:41<21:16:56] +[titan] 2025-10-05 02:05:00,577 - root - INFO - step: 5670 loss: 2.6274 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3178 +[titan] 2025-10-05 02:05:00,577 - root - INFO - lr: 4.8061e-05 gnorm: 1.25 [ 3:30:51<21:16:43] +[titan] 2025-10-05 02:05:11,454 - root - INFO - step: 5675 loss: 2.6289 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:11,454 - root - INFO - lr: 4.8058e-05 gnorm: 1.22 [ 3:31:02<21:16:30] +[titan] 2025-10-05 02:05:22,325 - root - INFO - step: 5680 loss: 2.7071 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3216 global_avg_mtp_loss: 2.3854 +[titan] 2025-10-05 02:05:22,325 - root - INFO - lr: 4.8054e-05 gnorm: 1.24 [ 3:31:13<21:16:17] +[titan] 2025-10-05 02:05:33,190 - root - INFO - step: 5685 loss: 2.6647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3151 global_avg_mtp_loss: 2.3496 +[titan] 2025-10-05 02:05:33,190 - root - INFO - lr: 4.8051e-05 gnorm: 1.25 [ 3:31:24<21:16:04] +[titan] 2025-10-05 02:05:44,079 - root - INFO - step: 5690 loss: 2.6318 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3204 +[titan] 2025-10-05 02:05:44,079 - root - INFO - lr: 4.8047e-05 gnorm: 1.20 [ 3:31:35<21:15:51] +[titan] 2025-10-05 02:05:54,989 - root - INFO - step: 5695 loss: 2.6284 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:54,989 - root - INFO - lr: 4.8043e-05 gnorm: 1.18 [ 3:31:46<21:15:39] +[titan] 2025-10-05 02:06:03,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:06:05,856 - root - INFO - step: 5700 loss: 2.6425 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:06:05,856 - root - INFO - lr: 4.8040e-05 gnorm: 1.17 [ 3:31:57<21:15:26] +[titan] 2025-10-05 02:06:16,740 - root - INFO - step: 5705 loss: 2.6825 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3176 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:06:16,740 - root - INFO - lr: 4.8036e-05 gnorm: 1.21 [ 3:32:08<21:15:13] +[titan] 2025-10-05 02:06:27,613 - root - INFO - step: 5710 loss: 2.7487 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 02:06:27,613 - root - INFO - lr: 4.8032e-05 gnorm: 1.24 [ 3:32:18<21:15:00] +[titan] 2025-10-05 02:06:38,482 - root - INFO - step: 5715 loss: 2.6692 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3542 +[titan] 2025-10-05 02:06:38,482 - root - INFO - lr: 4.8029e-05 gnorm: 1.27 [ 3:32:29<21:14:47] +[titan] 2025-10-05 02:06:49,408 - root - INFO - step: 5720 loss: 2.6745 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3170 global_avg_mtp_loss: 2.3576 +[titan] 2025-10-05 02:06:49,408 - root - INFO - lr: 4.8025e-05 gnorm: 1.21 [ 3:32:40<21:14:35] +[titan] 2025-10-05 02:07:00,305 - root - INFO - step: 5725 loss: 2.6145 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3063 +[titan] 2025-10-05 02:07:00,305 - root - INFO - lr: 4.8021e-05 gnorm: 1.25 [ 3:32:51<21:14:22] +[titan] 2025-10-05 02:07:11,183 - root - INFO - step: 5730 loss: 2.6939 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3752 +[titan] 2025-10-05 02:07:11,183 - root - INFO - lr: 4.8018e-05 gnorm: 1.27 [ 3:33:02<21:14:09] +[titan] 2025-10-05 02:07:22,045 - root - INFO - step: 5735 loss: 2.6083 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3002 +[titan] 2025-10-05 02:07:22,046 - root - INFO - lr: 4.8014e-05 gnorm: 1.28 [ 3:33:13<21:13:57] +[titan] 2025-10-05 02:07:32,920 - root - INFO - step: 5740 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3073 global_avg_mtp_loss: 2.2892 +[titan] 2025-10-05 02:07:32,920 - root - INFO - lr: 4.8010e-05 gnorm: 1.17 [ 3:33:24<21:13:44] +[titan] 2025-10-05 02:07:43,786 - root - INFO - step: 5745 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:07:43,786 - root - INFO - lr: 4.8007e-05 gnorm: 1.24 [ 3:33:35<21:13:31] +[titan] 2025-10-05 02:07:52,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:07:54,774 - root - INFO - step: 5750 loss: 2.6142 memory: 118.84GiB(85.28%) tps: 29,822 tflops: 413.73 mfu: 41.83% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 02:07:54,774 - root - INFO - lr: 4.8003e-05 gnorm: 1.24 [ 3:33:46<21:13:19] +[titan] 2025-10-05 02:08:05,686 - root - INFO - step: 5755 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3384 +[titan] 2025-10-05 02:08:05,686 - root - INFO - lr: 4.7999e-05 gnorm: 1.20 [ 3:33:57<21:13:06] +[titan] 2025-10-05 02:08:16,606 - root - INFO - step: 5760 loss: 2.7255 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4025 +[titan] 2025-10-05 02:08:16,606 - root - INFO - lr: 4.7996e-05 gnorm: 1.24 [ 3:34:07<21:12:54] +[titan] 2025-10-05 02:08:27,488 - root - INFO - step: 5765 loss: 2.6698 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3144 global_avg_mtp_loss: 2.3553 +[titan] 2025-10-05 02:08:27,489 - root - INFO - lr: 4.7992e-05 gnorm: 1.25 [ 3:34:18<21:12:41] +[titan] 2025-10-05 02:08:38,371 - root - INFO - step: 5770 loss: 2.7107 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 02:08:38,371 - root - INFO - lr: 4.7988e-05 gnorm: 1.26 [ 3:34:29<21:12:28] +[titan] 2025-10-05 02:08:49,290 - root - INFO - step: 5775 loss: 2.7046 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3203 global_avg_mtp_loss: 2.3843 +[titan] 2025-10-05 02:08:49,291 - root - INFO - lr: 4.7985e-05 gnorm: 1.28 [ 3:34:40<21:12:15] +[titan] 2025-10-05 02:09:00,170 - root - INFO - step: 5780 loss: 2.7717 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4437 +[titan] 2025-10-05 02:09:00,170 - root - INFO - lr: 4.7981e-05 gnorm: 1.66 [ 3:34:51<21:12:03] +[titan] 2025-10-05 02:09:11,065 - root - INFO - step: 5785 loss: 2.6598 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3451 +[titan] 2025-10-05 02:09:11,066 - root - INFO - lr: 4.7977e-05 gnorm: 1.28 [ 3:35:02<21:11:50] +[titan] 2025-10-05 02:09:21,936 - root - INFO - step: 5790 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3077 +[titan] 2025-10-05 02:09:21,936 - root - INFO - lr: 4.7973e-05 gnorm: 1.24 [ 3:35:13<21:11:37] +[titan] 2025-10-05 02:09:32,809 - root - INFO - step: 5795 loss: 2.6803 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3637 +[titan] 2025-10-05 02:09:32,809 - root - INFO - lr: 4.7970e-05 gnorm: 1.27 [ 3:35:24<21:11:24] +[titan] 2025-10-05 02:09:41,492 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:09:43,680 - root - INFO - step: 5800 loss: 2.6313 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3124 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:09:43,680 - root - INFO - lr: 4.7966e-05 gnorm: 1.25 [ 3:35:35<21:11:12] +[titan] 2025-10-05 02:09:54,628 - root - INFO - step: 5805 loss: 2.6182 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.3088 +[titan] 2025-10-05 02:09:54,629 - root - INFO - lr: 4.7962e-05 gnorm: 1.26 [ 3:35:45<21:10:59] +[titan] 2025-10-05 02:10:05,480 - root - INFO - step: 5810 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 02:10:05,481 - root - INFO - lr: 4.7959e-05 gnorm: 1.28 [ 3:35:56<21:10:46] +[titan] 2025-10-05 02:10:16,374 - root - INFO - step: 5815 loss: 2.6620 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3479 +[titan] 2025-10-05 02:10:16,374 - root - INFO - lr: 4.7955e-05 gnorm: 1.22 [ 3:36:07<21:10:34] +[titan] 2025-10-05 02:10:27,283 - root - INFO - step: 5820 loss: 2.6968 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3191 global_avg_mtp_loss: 2.3777 +[titan] 2025-10-05 02:10:27,283 - root - INFO - lr: 4.7951e-05 gnorm: 1.21 [ 3:36:18<21:10:21] +[titan] 2025-10-05 02:10:38,152 - root - INFO - step: 5825 loss: 2.6399 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3263 +[titan] 2025-10-05 02:10:38,152 - root - INFO - lr: 4.7947e-05 gnorm: 1.26 [ 3:36:29<21:10:08] +[titan] 2025-10-05 02:10:49,018 - root - INFO - step: 5830 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3158 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:10:49,018 - root - INFO - lr: 4.7944e-05 gnorm: 1.27 [ 3:36:40<21:09:56] +[titan] 2025-10-05 02:10:59,943 - root - INFO - step: 5835 loss: 2.6687 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 02:10:59,943 - root - INFO - lr: 4.7940e-05 gnorm: 1.27 [ 3:36:51<21:09:43] +[titan] 2025-10-05 02:11:10,804 - root - INFO - step: 5840 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3646 +[titan] 2025-10-05 02:11:10,804 - root - INFO - lr: 4.7936e-05 gnorm: 1.23 [ 3:37:02<21:09:30] +[titan] 2025-10-05 02:11:21,663 - root - INFO - step: 5845 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:11:21,663 - root - INFO - lr: 4.7933e-05 gnorm: 1.22 [ 3:37:13<21:09:17] +[titan] 2025-10-05 02:11:30,368 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:11:32,557 - root - INFO - step: 5850 loss: 2.5946 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2879 +[titan] 2025-10-05 02:11:32,557 - root - INFO - lr: 4.7929e-05 gnorm: 1.24 [ 3:37:23<21:09:05] +[titan] 2025-10-05 02:11:43,442 - root - INFO - step: 5855 loss: 2.6553 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3404 +[titan] 2025-10-05 02:11:43,442 - root - INFO - lr: 4.7925e-05 gnorm: 1.31 [ 3:37:34<21:08:52] +[titan] 2025-10-05 02:11:54,344 - root - INFO - step: 5860 loss: 2.6942 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3769 +[titan] 2025-10-05 02:11:54,344 - root - INFO - lr: 4.7921e-05 gnorm: 1.23 [ 3:37:45<21:08:39] +[titan] 2025-10-05 02:12:05,223 - root - INFO - step: 5865 loss: 2.5612 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3011 global_avg_mtp_loss: 2.2601 +[titan] 2025-10-05 02:12:05,223 - root - INFO - lr: 4.7918e-05 gnorm: 1.19 [ 3:37:56<21:08:27] +[titan] 2025-10-05 02:12:16,102 - root - INFO - step: 5870 loss: 2.6730 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3578 +[titan] 2025-10-05 02:12:16,102 - root - INFO - lr: 4.7914e-05 gnorm: 1.22 [ 3:38:07<21:08:14] +[titan] 2025-10-05 02:12:26,998 - root - INFO - step: 5875 loss: 2.7092 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3873 +[titan] 2025-10-05 02:12:26,998 - root - INFO - lr: 4.7910e-05 gnorm: 1.27 [ 3:38:18<21:08:01] +[titan] 2025-10-05 02:12:37,886 - root - INFO - step: 5880 loss: 2.6639 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3140 global_avg_mtp_loss: 2.3499 +[titan] 2025-10-05 02:12:37,886 - root - INFO - lr: 4.7906e-05 gnorm: 1.23 [ 3:38:29<21:07:49] +[titan] 2025-10-05 02:12:48,782 - root - INFO - step: 5885 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3282 +[titan] 2025-10-05 02:12:48,782 - root - INFO - lr: 4.7903e-05 gnorm: 1.23 [ 3:38:40<21:07:36] +[titan] 2025-10-05 02:12:59,686 - root - INFO - step: 5890 loss: 2.6332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3225 +[titan] 2025-10-05 02:12:59,686 - root - INFO - lr: 4.7899e-05 gnorm: 1.23 [ 3:38:51<21:07:24] +[titan] 2025-10-05 02:13:10,552 - root - INFO - step: 5895 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3779 +[titan] 2025-10-05 02:13:10,552 - root - INFO - lr: 4.7895e-05 gnorm: 1.20 [ 3:39:01<21:07:11] +[titan] 2025-10-05 02:13:19,229 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:13:21,417 - root - INFO - step: 5900 loss: 2.6773 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3598 +[titan] 2025-10-05 02:13:21,418 - root - INFO - lr: 4.7891e-05 gnorm: 1.21 [ 3:39:12<21:06:58] +[titan] 2025-10-05 02:13:32,300 - root - INFO - step: 5905 loss: 2.6413 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3290 +[titan] 2025-10-05 02:13:32,300 - root - INFO - lr: 4.7888e-05 gnorm: 1.21 [ 3:39:23<21:06:45] +[titan] 2025-10-05 02:13:43,183 - root - INFO - step: 5910 loss: 2.7061 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3853 +[titan] 2025-10-05 02:13:43,184 - root - INFO - lr: 4.7884e-05 gnorm: 1.23 [ 3:39:34<21:06:33] +[titan] 2025-10-05 02:13:54,153 - root - INFO - step: 5915 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 02:13:54,153 - root - INFO - lr: 4.7880e-05 gnorm: 1.20 [ 3:39:45<21:06:21] +[titan] 2025-10-05 02:14:05,035 - root - INFO - step: 5920 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3748 +[titan] 2025-10-05 02:14:05,035 - root - INFO - lr: 4.7876e-05 gnorm: 1.23 [ 3:39:56<21:06:08] +[titan] 2025-10-05 02:14:15,930 - root - INFO - step: 5925 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 02:14:15,930 - root - INFO - lr: 4.7872e-05 gnorm: 1.31 [ 3:40:07<21:05:55] +[titan] 2025-10-05 02:14:26,810 - root - INFO - step: 5930 loss: 2.5791 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 02:14:26,810 - root - INFO - lr: 4.7869e-05 gnorm: 1.28 [ 3:40:18<21:05:43] +[titan] 2025-10-05 02:14:37,679 - root - INFO - step: 5935 loss: 2.8206 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3526 global_avg_mtp_loss: 2.4680 +[titan] 2025-10-05 02:14:37,679 - root - INFO - lr: 4.7865e-05 gnorm: 1.21 [ 3:40:29<21:05:30] +[titan] 2025-10-05 02:14:48,570 - root - INFO - step: 5940 loss: 2.6562 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3137 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:14:48,571 - root - INFO - lr: 4.7861e-05 gnorm: 1.27 [ 3:40:39<21:05:17] +[titan] 2025-10-05 02:14:59,517 - root - INFO - step: 5945 loss: 2.6955 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3188 global_avg_mtp_loss: 2.3767 +[titan] 2025-10-05 02:14:59,518 - root - INFO - lr: 4.7857e-05 gnorm: 1.24 [ 3:40:50<21:05:05] +[titan] 2025-10-05 02:15:08,203 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:15:10,403 - root - INFO - step: 5950 loss: 2.6441 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3332 +[titan] 2025-10-05 02:15:10,403 - root - INFO - lr: 4.7853e-05 gnorm: 1.24 [ 3:41:01<21:04:52] +[titan] 2025-10-05 02:15:21,261 - root - INFO - step: 5955 loss: 2.6351 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3224 +[titan] 2025-10-05 02:15:21,261 - root - INFO - lr: 4.7850e-05 gnorm: 1.27 [ 3:41:12<21:04:40] +[titan] 2025-10-05 02:15:32,145 - root - INFO - step: 5960 loss: 2.5704 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2660 +[titan] 2025-10-05 02:15:32,146 - root - INFO - lr: 4.7846e-05 gnorm: 1.24 [ 3:41:23<21:04:27] +[titan] 2025-10-05 02:15:43,038 - root - INFO - step: 5965 loss: 2.6451 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.3134 global_avg_mtp_loss: 2.3317 +[titan] 2025-10-05 02:15:43,038 - root - INFO - lr: 4.7842e-05 gnorm: 1.24 [ 3:41:34<21:04:14] +[titan] 2025-10-05 02:15:53,932 - root - INFO - step: 5970 loss: 2.6446 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:15:53,932 - root - INFO - lr: 4.7838e-05 gnorm: 1.25 [ 3:41:45<21:04:02] +[titan] 2025-10-05 02:16:04,943 - root - INFO - step: 5975 loss: 2.6984 memory: 118.84GiB(85.28%) tps: 29,760 tflops: 412.88 mfu: 41.75% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 02:16:04,943 - root - INFO - lr: 4.7834e-05 gnorm: 1.22 [ 3:41:56<21:03:50] +[titan] 2025-10-05 02:16:15,864 - root - INFO - step: 5980 loss: 2.6883 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3697 +[titan] 2025-10-05 02:16:15,864 - root - INFO - lr: 4.7831e-05 gnorm: 1.23 [ 3:42:07<21:03:37] +[titan] 2025-10-05 02:16:26,743 - root - INFO - step: 5985 loss: 2.6999 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3205 global_avg_mtp_loss: 2.3795 +[titan] 2025-10-05 02:16:26,743 - root - INFO - lr: 4.7827e-05 gnorm: 1.25 [ 3:42:18<21:03:25] +[titan] 2025-10-05 02:16:37,616 - root - INFO - step: 5990 loss: 2.6514 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3368 +[titan] 2025-10-05 02:16:37,616 - root - INFO - lr: 4.7823e-05 gnorm: 1.24 [ 3:42:28<21:03:12] +[titan] 2025-10-05 02:16:48,504 - root - INFO - step: 5995 loss: 2.6633 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3143 global_avg_mtp_loss: 2.3490 +[titan] 2025-10-05 02:16:48,504 - root - INFO - lr: 4.7819e-05 gnorm: 1.24 [ 3:42:39<21:03:00] +[titan] 2025-10-05 02:16:57,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:16:59,424 - root - INFO - step: 6000 loss: 2.7331 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4096 +[titan] 2025-10-05 02:16:59,424 - root - INFO - lr: 4.7815e-05 gnorm: 1.20 [ 3:42:50<21:02:47] +[titan] 2025-10-05 02:17:10,295 - root - INFO - step: 6005 loss: 2.6202 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3105 +[titan] 2025-10-05 02:17:10,295 - root - INFO - lr: 4.7811e-05 gnorm: 1.18 [ 3:43:01<21:02:34] +[titan] 2025-10-05 02:17:21,201 - root - INFO - step: 6010 loss: 2.5634 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2608 +[titan] 2025-10-05 02:17:21,201 - root - INFO - lr: 4.7808e-05 gnorm: 1.22 [ 3:43:12<21:02:22] +[titan] 2025-10-05 02:17:32,081 - root - INFO - step: 6015 loss: 2.6412 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3292 +[titan] 2025-10-05 02:17:32,082 - root - INFO - lr: 4.7804e-05 gnorm: 1.19 [ 3:43:23<21:02:09] +[titan] 2025-10-05 02:17:42,964 - root - INFO - step: 6020 loss: 2.7137 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3909 +[titan] 2025-10-05 02:17:42,964 - root - INFO - lr: 4.7800e-05 gnorm: 1.23 [ 3:43:34<21:01:57] +[titan] 2025-10-05 02:17:53,873 - root - INFO - step: 6025 loss: 2.6409 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3232 +[titan] 2025-10-05 02:17:53,873 - root - INFO - lr: 4.7796e-05 gnorm: 1.20 [ 3:43:45<21:01:44] +[titan] 2025-10-05 02:18:04,793 - root - INFO - step: 6030 loss: 2.6673 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3165 global_avg_mtp_loss: 2.3508 +[titan] 2025-10-05 02:18:04,793 - root - INFO - lr: 4.7792e-05 gnorm: 1.27 [ 3:43:56<21:01:32] +[titan] 2025-10-05 02:18:15,648 - root - INFO - step: 6035 loss: 2.5627 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2603 +[titan] 2025-10-05 02:18:15,648 - root - INFO - lr: 4.7788e-05 gnorm: 1.20 [ 3:44:06<21:01:19] +[titan] 2025-10-05 02:18:26,520 - root - INFO - step: 6040 loss: 2.6300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:18:26,520 - root - INFO - lr: 4.7784e-05 gnorm: 1.19 [ 3:44:17<21:01:07] +[titan] 2025-10-05 02:18:37,421 - root - INFO - step: 6045 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2925 +[titan] 2025-10-05 02:18:37,421 - root - INFO - lr: 4.7781e-05 gnorm: 1.23 [ 3:44:28<21:00:54] +[titan] 2025-10-05 02:18:46,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:18:48,292 - root - INFO - step: 6050 loss: 2.6234 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3122 +[titan] 2025-10-05 02:18:48,292 - root - INFO - lr: 4.7777e-05 gnorm: 1.22 [ 3:44:39<21:00:41] +[titan] 2025-10-05 02:18:59,214 - root - INFO - step: 6055 loss: 2.7909 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3399 global_avg_mtp_loss: 2.4510 +[titan] 2025-10-05 02:18:59,214 - root - INFO - lr: 4.7773e-05 gnorm: 1.28 [ 3:44:50<21:00:29] +[titan] 2025-10-05 02:19:10,081 - root - INFO - step: 6060 loss: 2.7169 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 02:19:10,081 - root - INFO - lr: 4.7769e-05 gnorm: 1.19 [ 3:45:01<21:00:16] +[titan] 2025-10-05 02:19:20,960 - root - INFO - step: 6065 loss: 2.5899 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3057 global_avg_mtp_loss: 2.2843 +[titan] 2025-10-05 02:19:20,960 - root - INFO - lr: 4.7765e-05 gnorm: 1.20 [ 3:45:12<21:00:04] +[titan] 2025-10-05 02:19:31,815 - root - INFO - step: 6070 loss: 2.5974 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2900 +[titan] 2025-10-05 02:19:31,815 - root - INFO - lr: 4.7761e-05 gnorm: 1.19 [ 3:45:23<20:59:51] +[titan] 2025-10-05 02:19:42,704 - root - INFO - step: 6075 loss: 2.5388 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2387 +[titan] 2025-10-05 02:19:42,704 - root - INFO - lr: 4.7757e-05 gnorm: 1.24 [ 3:45:34<20:59:38] +[titan] 2025-10-05 02:19:53,571 - root - INFO - step: 6080 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2553 +[titan] 2025-10-05 02:19:53,571 - root - INFO - lr: 4.7753e-05 gnorm: 1.24 [ 3:45:44<20:59:26] +[titan] 2025-10-05 02:20:04,484 - root - INFO - step: 6085 loss: 2.6574 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 02:20:04,484 - root - INFO - lr: 4.7750e-05 gnorm: 1.25 [ 3:45:55<20:59:13] +[titan] 2025-10-05 02:20:15,352 - root - INFO - step: 6090 loss: 2.6004 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2933 +[titan] 2025-10-05 02:20:15,352 - root - INFO - lr: 4.7746e-05 gnorm: 1.29 [ 3:46:06<20:59:01] +[titan] 2025-10-05 02:20:26,230 - root - INFO - step: 6095 loss: 2.6515 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:20:26,230 - root - INFO - lr: 4.7742e-05 gnorm: 1.25 [ 3:46:17<20:58:48] +[titan] 2025-10-05 02:20:34,896 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:20:37,079 - root - INFO - step: 6100 loss: 2.6900 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3703 +[titan] 2025-10-05 02:20:37,079 - root - INFO - lr: 4.7738e-05 gnorm: 1.19 [ 3:46:28<20:58:35] +[titan] 2025-10-05 02:20:47,995 - root - INFO - step: 6105 loss: 2.7058 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3851 +[titan] 2025-10-05 02:20:47,995 - root - INFO - lr: 4.7734e-05 gnorm: 1.26 [ 3:46:39<20:58:23] +[titan] 2025-10-05 02:20:58,928 - root - INFO - step: 6110 loss: 2.6693 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3537 +[titan] 2025-10-05 02:20:58,928 - root - INFO - lr: 4.7730e-05 gnorm: 1.27 [ 3:46:50<20:58:11] +[titan] 2025-10-05 02:21:09,804 - root - INFO - step: 6115 loss: 2.5456 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:21:09,805 - root - INFO - lr: 4.7726e-05 gnorm: 1.13 [ 3:47:01<20:57:58] +[titan] 2025-10-05 02:21:20,686 - root - INFO - step: 6120 loss: 2.6377 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3244 +[titan] 2025-10-05 02:21:20,686 - root - INFO - lr: 4.7722e-05 gnorm: 1.17 [ 3:47:12<20:57:46] +[titan] 2025-10-05 02:21:31,544 - root - INFO - step: 6125 loss: 2.5803 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:21:31,544 - root - INFO - lr: 4.7718e-05 gnorm: 1.19 [ 3:47:22<20:57:33] +[titan] 2025-10-05 02:21:42,406 - root - INFO - step: 6130 loss: 2.6986 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3790 +[titan] 2025-10-05 02:21:42,406 - root - INFO - lr: 4.7714e-05 gnorm: 1.30 [ 3:47:33<20:57:20] +[titan] 2025-10-05 02:21:53,244 - root - INFO - step: 6135 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:21:53,244 - root - INFO - lr: 4.7710e-05 gnorm: 1.24 [ 3:47:44<20:57:07] +[titan] 2025-10-05 02:22:04,175 - root - INFO - step: 6140 loss: 2.5814 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3040 global_avg_mtp_loss: 2.2775 +[titan] 2025-10-05 02:22:04,175 - root - INFO - lr: 4.7707e-05 gnorm: 1.23 [ 3:47:55<20:56:55] +[titan] 2025-10-05 02:22:13,110 - root - INFO - Dumping profiler traces at step 6144 +[titan] 2025-10-05 02:22:13,150 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:22:15,343 - root - INFO - step: 6145 loss: 2.6735 memory: 118.84GiB(85.28%) tps: 29,341 tflops: 407.07 mfu: 41.16% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3580 +[titan] 2025-10-05 02:22:15,343 - root - INFO - lr: 4.7703e-05 gnorm: 1.26 [ 3:48:06<20:56:44] +[titan] 2025-10-05 02:22:24,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:22:26,217 - root - INFO - step: 6150 loss: 2.6490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3135 global_avg_mtp_loss: 2.3355 +[titan] 2025-10-05 02:22:26,217 - root - INFO - lr: 4.7699e-05 gnorm: 1.24 [ 3:48:17<20:56:32] +[titan] 2025-10-05 02:22:37,096 - root - INFO - step: 6155 loss: 2.6463 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3340 +[titan] 2025-10-05 02:22:37,096 - root - INFO - lr: 4.7695e-05 gnorm: 1.18 [ 3:48:28<20:56:19] +[titan] 2025-10-05 02:22:47,962 - root - INFO - step: 6160 loss: 2.6975 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:22:47,962 - root - INFO - lr: 4.7691e-05 gnorm: 1.25 [ 3:48:39<20:56:06] +[titan] 2025-10-05 02:22:58,842 - root - INFO - step: 6165 loss: 2.6719 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 02:22:58,842 - root - INFO - lr: 4.7687e-05 gnorm: 1.27 [ 3:48:50<20:55:54] +[titan] 2025-10-05 02:23:09,781 - root - INFO - step: 6170 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3183 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:23:09,781 - root - INFO - lr: 4.7683e-05 gnorm: 1.18 [ 3:49:01<20:55:42] +[titan] 2025-10-05 02:23:20,657 - root - INFO - step: 6175 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2762 +[titan] 2025-10-05 02:23:20,657 - root - INFO - lr: 4.7679e-05 gnorm: 1.25 [ 3:49:11<20:55:29] +[titan] 2025-10-05 02:23:31,536 - root - INFO - step: 6180 loss: 2.6338 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3119 global_avg_mtp_loss: 2.3219 +[titan] 2025-10-05 02:23:31,536 - root - INFO - lr: 4.7675e-05 gnorm: 1.21 [ 3:49:22<20:55:17] +[titan] 2025-10-05 02:23:42,416 - root - INFO - step: 6185 loss: 2.6751 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3584 +[titan] 2025-10-05 02:23:42,416 - root - INFO - lr: 4.7671e-05 gnorm: 1.23 [ 3:49:33<20:55:04] +[titan] 2025-10-05 02:23:53,282 - root - INFO - step: 6190 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:23:53,282 - root - INFO - lr: 4.7667e-05 gnorm: 1.94 [ 3:49:44<20:54:51] +[titan] 2025-10-05 02:24:04,176 - root - INFO - step: 6195 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.3001 +[titan] 2025-10-05 02:24:04,177 - root - INFO - lr: 4.7663e-05 gnorm: 1.30 [ 3:49:55<20:54:39] +[titan] 2025-10-05 02:24:12,862 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:24:15,046 - root - INFO - step: 6200 loss: 2.6013 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2936 +[titan] 2025-10-05 02:24:15,047 - root - INFO - lr: 4.7659e-05 gnorm: 1.22 [ 3:50:06<20:54:26] +[titan] 2025-10-05 02:24:25,976 - root - INFO - step: 6205 loss: 2.6406 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:24:25,976 - root - INFO - lr: 4.7655e-05 gnorm: 1.21 [ 3:50:17<20:54:14] +[titan] 2025-10-05 02:24:36,842 - root - INFO - step: 6210 loss: 2.5418 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 02:24:36,842 - root - INFO - lr: 4.7651e-05 gnorm: 1.17 [ 3:50:28<20:54:02] +[titan] 2025-10-05 02:24:47,725 - root - INFO - step: 6215 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2744 +[titan] 2025-10-05 02:24:47,725 - root - INFO - lr: 4.7647e-05 gnorm: 1.20 [ 3:50:39<20:53:49] +[titan] 2025-10-05 02:24:58,595 - root - INFO - step: 6220 loss: 2.6116 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 02:24:58,595 - root - INFO - lr: 4.7643e-05 gnorm: 1.26 [ 3:50:49<20:53:37] +[titan] 2025-10-05 02:25:09,462 - root - INFO - step: 6225 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3158 +[titan] 2025-10-05 02:25:09,462 - root - INFO - lr: 4.7639e-05 gnorm: 1.26 [ 3:51:00<20:53:24] +[titan] 2025-10-05 02:25:20,338 - root - INFO - step: 6230 loss: 2.6316 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3207 +[titan] 2025-10-05 02:25:20,338 - root - INFO - lr: 4.7635e-05 gnorm: 1.26 [ 3:51:11<20:53:11] +[titan] 2025-10-05 02:25:31,243 - root - INFO - step: 6235 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:25:31,243 - root - INFO - lr: 4.7631e-05 gnorm: 1.24 [ 3:51:22<20:52:59] +[titan] 2025-10-05 02:25:42,123 - root - INFO - step: 6240 loss: 2.6737 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3161 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:25:42,123 - root - INFO - lr: 4.7627e-05 gnorm: 1.21 [ 3:51:33<20:52:47] +[titan] 2025-10-05 02:25:53,008 - root - INFO - step: 6245 loss: 2.6264 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3138 +[titan] 2025-10-05 02:25:53,008 - root - INFO - lr: 4.7623e-05 gnorm: 1.21 [ 3:51:44<20:52:34] +[titan] 2025-10-05 02:26:01,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:26:03,943 - root - INFO - step: 6250 loss: 2.6166 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 02:26:03,944 - root - INFO - lr: 4.7619e-05 gnorm: 1.24 [ 3:51:55<20:52:22] +[titan] 2025-10-05 02:26:14,837 - root - INFO - step: 6255 loss: 2.5876 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2818 +[titan] 2025-10-05 02:26:14,837 - root - INFO - lr: 4.7615e-05 gnorm: 1.18 [ 3:52:06<20:52:09] +[titan] 2025-10-05 02:26:25,726 - root - INFO - step: 6260 loss: 2.7070 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3866 +[titan] 2025-10-05 02:26:25,726 - root - INFO - lr: 4.7611e-05 gnorm: 1.20 [ 3:52:17<20:51:57] +[titan] 2025-10-05 02:26:36,613 - root - INFO - step: 6265 loss: 2.6830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3168 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:26:36,613 - root - INFO - lr: 4.7607e-05 gnorm: 1.22 [ 3:52:27<20:51:45] +[titan] 2025-10-05 02:26:47,499 - root - INFO - step: 6270 loss: 2.4995 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2939 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 02:26:47,499 - root - INFO - lr: 4.7603e-05 gnorm: 1.22 [ 3:52:38<20:51:32] +[titan] 2025-10-05 02:26:58,361 - root - INFO - step: 6275 loss: 2.5337 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2345 +[titan] 2025-10-05 02:26:58,361 - root - INFO - lr: 4.7599e-05 gnorm: 1.19 [ 3:52:49<20:51:20] +[titan] 2025-10-05 02:27:09,255 - root - INFO - step: 6280 loss: 2.5465 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 02:27:09,255 - root - INFO - lr: 4.7595e-05 gnorm: 1.18 [ 3:53:00<20:51:07] +[titan] 2025-10-05 02:27:20,123 - root - INFO - step: 6285 loss: 2.6725 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3560 +[titan] 2025-10-05 02:27:20,123 - root - INFO - lr: 4.7591e-05 gnorm: 1.25 [ 3:53:11<20:50:55] +[titan] 2025-10-05 02:27:30,985 - root - INFO - step: 6290 loss: 2.6086 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.2999 +[titan] 2025-10-05 02:27:30,985 - root - INFO - lr: 4.7587e-05 gnorm: 1.20 [ 3:53:22<20:50:42] +[titan] 2025-10-05 02:27:41,851 - root - INFO - step: 6295 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:27:41,851 - root - INFO - lr: 4.7583e-05 gnorm: 1.19 [ 3:53:33<20:50:29] +[titan] 2025-10-05 02:27:50,574 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:27:52,765 - root - INFO - step: 6300 loss: 2.6057 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2983 +[titan] 2025-10-05 02:27:52,765 - root - INFO - lr: 4.7579e-05 gnorm: 1.25 [ 3:53:44<20:50:17] +[titan] 2025-10-05 02:28:03,660 - root - INFO - step: 6305 loss: 2.6038 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3067 global_avg_mtp_loss: 2.2971 +[titan] 2025-10-05 02:28:03,661 - root - INFO - lr: 4.7575e-05 gnorm: 1.34 [ 3:53:54<20:50:05] +[titan] 2025-10-05 02:28:14,554 - root - INFO - step: 6310 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3056 +[titan] 2025-10-05 02:28:14,554 - root - INFO - lr: 4.7571e-05 gnorm: 1.26 [ 3:54:05<20:49:52] +[titan] 2025-10-05 02:28:25,460 - root - INFO - step: 6315 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 02:28:25,460 - root - INFO - lr: 4.7567e-05 gnorm: 1.30 [ 3:54:16<20:49:40] +[titan] 2025-10-05 02:28:36,327 - root - INFO - step: 6320 loss: 2.6294 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3180 +[titan] 2025-10-05 02:28:36,327 - root - INFO - lr: 4.7563e-05 gnorm: 1.20 [ 3:54:27<20:49:28] +[titan] 2025-10-05 02:28:47,212 - root - INFO - step: 6325 loss: 2.5971 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:28:47,212 - root - INFO - lr: 4.7559e-05 gnorm: 1.24 [ 3:54:38<20:49:15] +[titan] 2025-10-05 02:28:58,148 - root - INFO - step: 6330 loss: 2.5947 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2896 +[titan] 2025-10-05 02:28:58,148 - root - INFO - lr: 4.7555e-05 gnorm: 1.17 [ 3:54:49<20:49:03] +[titan] 2025-10-05 02:29:09,045 - root - INFO - step: 6335 loss: 2.6560 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3421 +[titan] 2025-10-05 02:29:09,045 - root - INFO - lr: 4.7551e-05 gnorm: 1.23 [ 3:55:00<20:48:51] +[titan] 2025-10-05 02:29:19,929 - root - INFO - step: 6340 loss: 2.5919 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2841 +[titan] 2025-10-05 02:29:19,929 - root - INFO - lr: 4.7547e-05 gnorm: 1.21 [ 3:55:11<20:48:38] +[titan] 2025-10-05 02:29:30,803 - root - INFO - step: 6345 loss: 2.6337 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3228 +[titan] 2025-10-05 02:29:30,803 - root - INFO - lr: 4.7543e-05 gnorm: 1.20 [ 3:55:22<20:48:26] +[titan] 2025-10-05 02:29:39,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:29:41,698 - root - INFO - step: 6350 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.2911 +[titan] 2025-10-05 02:29:41,698 - root - INFO - lr: 4.7539e-05 gnorm: 1.21 [ 3:55:33<20:48:13] +[titan] 2025-10-05 02:29:52,582 - root - INFO - step: 6355 loss: 2.5766 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2722 +[titan] 2025-10-05 02:29:52,582 - root - INFO - lr: 4.7535e-05 gnorm: 1.31 [ 3:55:43<20:48:01] +[titan] 2025-10-05 02:30:03,454 - root - INFO - step: 6360 loss: 2.6402 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3288 +[titan] 2025-10-05 02:30:03,454 - root - INFO - lr: 4.7531e-05 gnorm: 1.19 [ 3:55:54<20:47:48] +[titan] 2025-10-05 02:30:14,405 - root - INFO - step: 6365 loss: 2.5756 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.16 mfu: 41.98% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2718 +[titan] 2025-10-05 02:30:14,405 - root - INFO - lr: 4.7527e-05 gnorm: 1.21 [ 3:56:05<20:47:36] +[titan] 2025-10-05 02:30:25,300 - root - INFO - step: 6370 loss: 2.6721 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3566 +[titan] 2025-10-05 02:30:25,301 - root - INFO - lr: 4.7523e-05 gnorm: 1.26 [ 3:56:16<20:47:24] +[titan] 2025-10-05 02:30:36,188 - root - INFO - step: 6375 loss: 2.6701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3548 +[titan] 2025-10-05 02:30:36,189 - root - INFO - lr: 4.7519e-05 gnorm: 1.26 [ 3:56:27<20:47:12] +[titan] 2025-10-05 02:30:47,063 - root - INFO - step: 6380 loss: 2.6577 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:30:47,063 - root - INFO - lr: 4.7514e-05 gnorm: 1.19 [ 3:56:38<20:46:59] +[titan] 2025-10-05 02:30:57,930 - root - INFO - step: 6385 loss: 2.5739 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:30:57,930 - root - INFO - lr: 4.7510e-05 gnorm: 1.20 [ 3:56:49<20:46:47] +[titan] 2025-10-05 02:31:08,797 - root - INFO - step: 6390 loss: 2.6461 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3331 +[titan] 2025-10-05 02:31:08,797 - root - INFO - lr: 4.7506e-05 gnorm: 1.18 [ 3:57:00<20:46:34] +[titan] 2025-10-05 02:31:19,713 - root - INFO - step: 6395 loss: 2.6359 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3246 +[titan] 2025-10-05 02:31:19,713 - root - INFO - lr: 4.7502e-05 gnorm: 1.18 [ 3:57:11<20:46:22] +[titan] 2025-10-05 02:31:28,409 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:31:30,590 - root - INFO - step: 6400 loss: 2.6427 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.3304 +[titan] 2025-10-05 02:31:30,590 - root - INFO - lr: 4.7498e-05 gnorm: 1.20 [ 3:57:21<20:46:09] +[titan] 2025-10-05 02:31:41,458 - root - INFO - step: 6405 loss: 2.5702 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:31:41,458 - root - INFO - lr: 4.7494e-05 gnorm: 1.26 [ 3:57:32<20:45:57] +[titan] 2025-10-05 02:31:52,328 - root - INFO - step: 6410 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2830 +[titan] 2025-10-05 02:31:52,329 - root - INFO - lr: 4.7490e-05 gnorm: 1.28 [ 3:57:43<20:45:44] +[titan] 2025-10-05 02:32:03,197 - root - INFO - step: 6415 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:32:03,197 - root - INFO - lr: 4.7486e-05 gnorm: 1.24 [ 3:57:54<20:45:32] +[titan] 2025-10-05 02:32:14,082 - root - INFO - step: 6420 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2355 +[titan] 2025-10-05 02:32:14,082 - root - INFO - lr: 4.7482e-05 gnorm: 1.22 [ 3:58:05<20:45:20] +[titan] 2025-10-05 02:32:25,006 - root - INFO - step: 6425 loss: 2.6729 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:32:25,007 - root - INFO - lr: 4.7478e-05 gnorm: 1.28 [ 3:58:16<20:45:07] +[titan] 2025-10-05 02:32:35,882 - root - INFO - step: 6430 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2763 +[titan] 2025-10-05 02:32:35,883 - root - INFO - lr: 4.7474e-05 gnorm: 1.22 [ 3:58:27<20:44:55] +[titan] 2025-10-05 02:32:46,767 - root - INFO - step: 6435 loss: 2.5922 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2859 +[titan] 2025-10-05 02:32:46,767 - root - INFO - lr: 4.7469e-05 gnorm: 1.22 [ 3:58:38<20:44:43] +[titan] 2025-10-05 02:32:57,635 - root - INFO - step: 6440 loss: 2.5566 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2552 +[titan] 2025-10-05 02:32:57,635 - root - INFO - lr: 4.7465e-05 gnorm: 1.19 [ 3:58:48<20:44:30] +[titan] 2025-10-05 02:33:08,509 - root - INFO - step: 6445 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3471 +[titan] 2025-10-05 02:33:08,509 - root - INFO - lr: 4.7461e-05 gnorm: 1.18 [ 3:58:59<20:44:18] +[titan] 2025-10-05 02:33:17,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:33:19,369 - root - INFO - step: 6450 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2865 +[titan] 2025-10-05 02:33:19,369 - root - INFO - lr: 4.7457e-05 gnorm: 1.22 [ 3:59:10<20:44:05] +[titan] 2025-10-05 02:33:30,229 - root - INFO - step: 6455 loss: 2.6465 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3333 +[titan] 2025-10-05 02:33:30,229 - root - INFO - lr: 4.7453e-05 gnorm: 1.20 [ 3:59:21<20:43:53] +[titan] 2025-10-05 02:33:41,125 - root - INFO - step: 6460 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2613 +[titan] 2025-10-05 02:33:41,125 - root - INFO - lr: 4.7449e-05 gnorm: 1.21 [ 3:59:32<20:43:40] +[titan] 2025-10-05 02:33:51,972 - root - INFO - step: 6465 loss: 2.6340 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.3110 global_avg_mtp_loss: 2.3230 +[titan] 2025-10-05 02:33:51,972 - root - INFO - lr: 4.7445e-05 gnorm: 1.25 [ 3:59:43<20:43:28] +[titan] 2025-10-05 02:34:02,829 - root - INFO - step: 6470 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:34:02,830 - root - INFO - lr: 4.7441e-05 gnorm: 1.22 [ 3:59:54<20:43:15] +[titan] 2025-10-05 02:34:13,713 - root - INFO - step: 6475 loss: 2.6622 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3477 +[titan] 2025-10-05 02:34:13,713 - root - INFO - lr: 4.7436e-05 gnorm: 1.22 [ 4:00:05<20:43:03] +[titan] 2025-10-05 02:34:24,581 - root - INFO - step: 6480 loss: 2.5985 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2920 +[titan] 2025-10-05 02:34:24,581 - root - INFO - lr: 4.7432e-05 gnorm: 1.22 [ 4:00:15<20:42:51] +[titan] 2025-10-05 02:34:35,430 - root - INFO - step: 6485 loss: 2.5699 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2657 +[titan] 2025-10-05 02:34:35,430 - root - INFO - lr: 4.7428e-05 gnorm: 1.26 [ 4:00:26<20:42:38] +[titan] 2025-10-05 02:34:46,317 - root - INFO - step: 6490 loss: 2.5393 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:34:46,318 - root - INFO - lr: 4.7424e-05 gnorm: 1.22 [ 4:00:37<20:42:26] +[titan] 2025-10-05 02:34:57,192 - root - INFO - step: 6495 loss: 2.6369 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3116 global_avg_mtp_loss: 2.3253 +[titan] 2025-10-05 02:34:57,193 - root - INFO - lr: 4.7420e-05 gnorm: 1.23 [ 4:00:48<20:42:13] +[titan] 2025-10-05 02:35:05,878 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:35:08,068 - root - INFO - step: 6500 loss: 2.5435 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3003 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 02:35:08,068 - root - INFO - lr: 4.7416e-05 gnorm: 1.25 [ 4:00:59<20:42:01] +[titan] 2025-10-05 02:35:18,953 - root - INFO - step: 6505 loss: 2.6050 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2979 +[titan] 2025-10-05 02:35:18,953 - root - INFO - lr: 4.7412e-05 gnorm: 1.26 [ 4:01:10<20:41:48] +[titan] 2025-10-05 02:35:29,825 - root - INFO - step: 6510 loss: 2.5818 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2782 +[titan] 2025-10-05 02:35:29,825 - root - INFO - lr: 4.7407e-05 gnorm: 1.19 [ 4:01:21<20:41:36] +[titan] 2025-10-05 02:35:40,705 - root - INFO - step: 6515 loss: 2.5167 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2195 +[titan] 2025-10-05 02:35:40,705 - root - INFO - lr: 4.7403e-05 gnorm: 1.18 [ 4:01:31<20:41:24] +[titan] 2025-10-05 02:35:51,579 - root - INFO - step: 6520 loss: 2.6889 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3704 +[titan] 2025-10-05 02:35:51,580 - root - INFO - lr: 4.7399e-05 gnorm: 1.25 [ 4:01:42<20:41:11] +[titan] 2025-10-05 02:36:02,521 - root - INFO - step: 6525 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3046 global_avg_mtp_loss: 2.2801 +[titan] 2025-10-05 02:36:02,521 - root - INFO - lr: 4.7395e-05 gnorm: 1.21 [ 4:01:53<20:40:59] +[titan] 2025-10-05 02:36:13,407 - root - INFO - step: 6530 loss: 2.5064 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2103 +[titan] 2025-10-05 02:36:13,407 - root - INFO - lr: 4.7391e-05 gnorm: 1.17 [ 4:02:04<20:40:47] +[titan] 2025-10-05 02:36:24,288 - root - INFO - step: 6535 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:36:24,289 - root - INFO - lr: 4.7387e-05 gnorm: 1.26 [ 4:02:15<20:40:35] +[titan] 2025-10-05 02:36:35,150 - root - INFO - step: 6540 loss: 2.6944 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 02:36:35,150 - root - INFO - lr: 4.7382e-05 gnorm: 1.22 [ 4:02:26<20:40:22] +[titan] 2025-10-05 02:36:46,030 - root - INFO - step: 6545 loss: 2.5975 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2909 +[titan] 2025-10-05 02:36:46,030 - root - INFO - lr: 4.7378e-05 gnorm: 1.25 [ 4:02:37<20:40:10] +[titan] 2025-10-05 02:36:54,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:36:56,903 - root - INFO - step: 6550 loss: 2.5802 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2749 +[titan] 2025-10-05 02:36:56,904 - root - INFO - lr: 4.7374e-05 gnorm: 1.18 [ 4:02:48<20:39:57] +[titan] 2025-10-05 02:37:07,790 - root - INFO - step: 6555 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 02:37:07,790 - root - INFO - lr: 4.7370e-05 gnorm: 1.23 [ 4:02:59<20:39:45] +[titan] 2025-10-05 02:37:18,673 - root - INFO - step: 6560 loss: 2.6310 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3210 +[titan] 2025-10-05 02:37:18,673 - root - INFO - lr: 4.7366e-05 gnorm: 1.22 [ 4:03:09<20:39:33] +[titan] 2025-10-05 02:37:29,519 - root - INFO - step: 6565 loss: 2.6348 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3242 +[titan] 2025-10-05 02:37:29,520 - root - INFO - lr: 4.7361e-05 gnorm: 1.24 [ 4:03:20<20:39:20] +[titan] 2025-10-05 02:37:40,400 - root - INFO - step: 6570 loss: 2.5419 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2417 +[titan] 2025-10-05 02:37:40,400 - root - INFO - lr: 4.7357e-05 gnorm: 1.19 [ 4:03:31<20:39:08] +[titan] 2025-10-05 02:37:51,268 - root - INFO - step: 6575 loss: 2.5865 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2812 +[titan] 2025-10-05 02:37:51,269 - root - INFO - lr: 4.7353e-05 gnorm: 1.28 [ 4:03:42<20:38:55] +[titan] 2025-10-05 02:38:02,128 - root - INFO - step: 6580 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:38:02,128 - root - INFO - lr: 4.7349e-05 gnorm: 1.20 [ 4:03:53<20:38:43] +[titan] 2025-10-05 02:38:13,063 - root - INFO - step: 6585 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3373 +[titan] 2025-10-05 02:38:13,063 - root - INFO - lr: 4.7345e-05 gnorm: 1.24 [ 4:04:04<20:38:31] +[titan] 2025-10-05 02:38:23,973 - root - INFO - step: 6590 loss: 2.6349 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3243 +[titan] 2025-10-05 02:38:23,973 - root - INFO - lr: 4.7340e-05 gnorm: 1.19 [ 4:04:15<20:38:19] +[titan] 2025-10-05 02:38:34,826 - root - INFO - step: 6595 loss: 2.7415 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4095 +[titan] 2025-10-05 02:38:34,826 - root - INFO - lr: 4.7336e-05 gnorm: 1.21 [ 4:04:26<20:38:06] +[titan] 2025-10-05 02:38:43,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:38:45,682 - root - INFO - step: 6600 loss: 2.5758 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2716 +[titan] 2025-10-05 02:38:45,682 - root - INFO - lr: 4.7332e-05 gnorm: 1.18 [ 4:04:36<20:37:54] +[titan] 2025-10-05 02:38:56,550 - root - INFO - step: 6605 loss: 2.5294 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2310 +[titan] 2025-10-05 02:38:56,550 - root - INFO - lr: 4.7328e-05 gnorm: 1.19 [ 4:04:47<20:37:41] +[titan] 2025-10-05 02:39:07,416 - root - INFO - step: 6610 loss: 2.5451 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:39:07,416 - root - INFO - lr: 4.7324e-05 gnorm: 1.20 [ 4:04:58<20:37:29] +[titan] 2025-10-05 02:39:18,347 - root - INFO - step: 6615 loss: 2.7044 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3846 +[titan] 2025-10-05 02:39:18,347 - root - INFO - lr: 4.7319e-05 gnorm: 1.23 [ 4:05:09<20:37:17] +[titan] 2025-10-05 02:39:29,249 - root - INFO - step: 6620 loss: 2.5846 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3048 global_avg_mtp_loss: 2.2799 +[titan] 2025-10-05 02:39:29,249 - root - INFO - lr: 4.7315e-05 gnorm: 1.16 [ 4:05:20<20:37:05] +[titan] 2025-10-05 02:39:40,113 - root - INFO - step: 6625 loss: 2.6491 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3371 +[titan] 2025-10-05 02:39:40,113 - root - INFO - lr: 4.7311e-05 gnorm: 1.27 [ 4:05:31<20:36:52] +[titan] 2025-10-05 02:39:50,990 - root - INFO - step: 6630 loss: 2.5891 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:39:50,990 - root - INFO - lr: 4.7307e-05 gnorm: 1.21 [ 4:05:42<20:36:40] +[titan] 2025-10-05 02:40:01,853 - root - INFO - step: 6635 loss: 2.6888 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3719 +[titan] 2025-10-05 02:40:01,853 - root - INFO - lr: 4.7302e-05 gnorm: 1.21 [ 4:05:53<20:36:28] +[titan] 2025-10-05 02:40:12,718 - root - INFO - step: 6640 loss: 2.5610 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2587 +[titan] 2025-10-05 02:40:12,718 - root - INFO - lr: 4.7298e-05 gnorm: 1.19 [ 4:06:03<20:36:15] +[titan] 2025-10-05 02:40:23,628 - root - INFO - step: 6645 loss: 2.5680 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2644 +[titan] 2025-10-05 02:40:23,628 - root - INFO - lr: 4.7294e-05 gnorm: 1.27 [ 4:06:14<20:36:03] +[titan] 2025-10-05 02:40:32,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:40:34,565 - root - INFO - step: 6650 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3119 +[titan] 2025-10-05 02:40:34,566 - root - INFO - lr: 4.7290e-05 gnorm: 1.21 [ 4:06:25<20:35:51] +[titan] 2025-10-05 02:40:45,524 - root - INFO - step: 6655 loss: 2.6619 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:40:45,524 - root - INFO - lr: 4.7285e-05 gnorm: 1.16 [ 4:06:36<20:35:39] +[titan] 2025-10-05 02:40:47,888 - root - INFO - Dumping profiler traces at step 6656 +[titan] 2025-10-05 02:40:47,928 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:40:56,627 - root - INFO - step: 6660 loss: 2.5796 memory: 118.84GiB(85.28%) tps: 29,513 tflops: 409.44 mfu: 41.40% global_avg_ntp_loss: 0.3032 global_avg_mtp_loss: 2.2764 +[titan] 2025-10-05 02:40:56,627 - root - INFO - lr: 4.7281e-05 gnorm: 1.18 [ 4:06:47<20:35:28] +[titan] 2025-10-05 02:41:07,500 - root - INFO - step: 6665 loss: 2.5859 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2808 +[titan] 2025-10-05 02:41:07,500 - root - INFO - lr: 4.7277e-05 gnorm: 1.21 [ 4:06:58<20:35:16] +[titan] 2025-10-05 02:41:18,462 - root - INFO - step: 6670 loss: 2.5619 memory: 118.84GiB(85.28%) tps: 29,894 tflops: 414.73 mfu: 41.93% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:41:18,462 - root - INFO - lr: 4.7273e-05 gnorm: 1.20 [ 4:07:09<20:35:04] +[titan] 2025-10-05 02:41:29,324 - root - INFO - step: 6675 loss: 2.4816 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.1887 +[titan] 2025-10-05 02:41:29,324 - root - INFO - lr: 4.7268e-05 gnorm: 1.20 [ 4:07:20<20:34:51] +[titan] 2025-10-05 02:41:40,209 - root - INFO - step: 6680 loss: 2.6410 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3279 +[titan] 2025-10-05 02:41:40,209 - root - INFO - lr: 4.7264e-05 gnorm: 1.26 [ 4:07:31<20:34:39] +[titan] 2025-10-05 02:41:51,110 - root - INFO - step: 6685 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3116 +[titan] 2025-10-05 02:41:51,110 - root - INFO - lr: 4.7260e-05 gnorm: 1.25 [ 4:07:42<20:34:27] +[titan] 2025-10-05 02:42:01,973 - root - INFO - step: 6690 loss: 2.6096 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3014 +[titan] 2025-10-05 02:42:01,973 - root - INFO - lr: 4.7256e-05 gnorm: 1.20 [ 4:07:53<20:34:14] +[titan] 2025-10-05 02:42:12,862 - root - INFO - step: 6695 loss: 2.5175 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2207 +[titan] 2025-10-05 02:42:12,862 - root - INFO - lr: 4.7251e-05 gnorm: 1.18 [ 4:08:04<20:34:02] +[titan] 2025-10-05 02:42:21,626 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:42:23,816 - root - INFO - step: 6700 loss: 2.6088 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3010 +[titan] 2025-10-05 02:42:23,816 - root - INFO - lr: 4.7247e-05 gnorm: 1.21 [ 4:08:15<20:33:50] +[titan] 2025-10-05 02:42:34,693 - root - INFO - step: 6705 loss: 2.6071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.2991 +[titan] 2025-10-05 02:42:34,693 - root - INFO - lr: 4.7243e-05 gnorm: 1.19 [ 4:08:25<20:33:38] +[titan] 2025-10-05 02:42:45,561 - root - INFO - step: 6710 loss: 2.5118 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2161 +[titan] 2025-10-05 02:42:45,561 - root - INFO - lr: 4.7238e-05 gnorm: 1.24 [ 4:08:36<20:33:26] +[titan] 2025-10-05 02:42:56,442 - root - INFO - step: 6715 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.2997 +[titan] 2025-10-05 02:42:56,442 - root - INFO - lr: 4.7234e-05 gnorm: 1.21 [ 4:08:47<20:33:13] +[titan] 2025-10-05 02:43:07,287 - root - INFO - step: 6720 loss: 2.5570 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2554 +[titan] 2025-10-05 02:43:07,287 - root - INFO - lr: 4.7230e-05 gnorm: 1.22 [ 4:08:58<20:33:01] +[titan] 2025-10-05 02:43:18,136 - root - INFO - step: 6725 loss: 2.5707 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3021 global_avg_mtp_loss: 2.2686 +[titan] 2025-10-05 02:43:18,136 - root - INFO - lr: 4.7226e-05 gnorm: 1.53 [ 4:09:09<20:32:49] +[titan] 2025-10-05 02:43:29,070 - root - INFO - step: 6730 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2464 +[titan] 2025-10-05 02:43:29,070 - root - INFO - lr: 4.7221e-05 gnorm: 1.26 [ 4:09:20<20:32:36] +[titan] 2025-10-05 02:43:39,913 - root - INFO - step: 6735 loss: 2.5430 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 02:43:39,913 - root - INFO - lr: 4.7217e-05 gnorm: 1.22 [ 4:09:31<20:32:24] +[titan] 2025-10-05 02:43:50,772 - root - INFO - step: 6740 loss: 2.5235 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2275 +[titan] 2025-10-05 02:43:50,772 - root - INFO - lr: 4.7213e-05 gnorm: 1.21 [ 4:09:42<20:32:12] +[titan] 2025-10-05 02:44:01,659 - root - INFO - step: 6745 loss: 2.6439 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3121 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:44:01,660 - root - INFO - lr: 4.7208e-05 gnorm: 1.19 [ 4:09:52<20:31:59] +[titan] 2025-10-05 02:44:10,337 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:44:12,526 - root - INFO - step: 6750 loss: 2.5875 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 02:44:12,526 - root - INFO - lr: 4.7204e-05 gnorm: 1.21 [ 4:10:03<20:31:47] +[titan] 2025-10-05 02:44:23,443 - root - INFO - step: 6755 loss: 2.4956 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2014 +[titan] 2025-10-05 02:44:23,443 - root - INFO - lr: 4.7200e-05 gnorm: 1.19 [ 4:10:14<20:31:35] +[titan] 2025-10-05 02:44:34,289 - root - INFO - step: 6760 loss: 2.5401 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2414 +[titan] 2025-10-05 02:44:34,289 - root - INFO - lr: 4.7196e-05 gnorm: 1.22 [ 4:10:25<20:31:23] +[titan] 2025-10-05 02:44:45,167 - root - INFO - step: 6765 loss: 2.5998 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2924 +[titan] 2025-10-05 02:44:45,167 - root - INFO - lr: 4.7191e-05 gnorm: 1.30 [ 4:10:36<20:31:10] +[titan] 2025-10-05 02:44:56,029 - root - INFO - step: 6770 loss: 2.5743 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2714 +[titan] 2025-10-05 02:44:56,030 - root - INFO - lr: 4.7187e-05 gnorm: 1.21 [ 4:10:47<20:30:58] +[titan] 2025-10-05 02:45:06,886 - root - INFO - step: 6775 loss: 2.5839 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3045 global_avg_mtp_loss: 2.2794 +[titan] 2025-10-05 02:45:06,886 - root - INFO - lr: 4.7183e-05 gnorm: 1.17 [ 4:10:58<20:30:46] +[titan] 2025-10-05 02:45:17,790 - root - INFO - step: 6780 loss: 2.5182 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2228 +[titan] 2025-10-05 02:45:17,790 - root - INFO - lr: 4.7178e-05 gnorm: 1.24 [ 4:11:09<20:30:33] +[titan] 2025-10-05 02:45:28,696 - root - INFO - step: 6785 loss: 2.5460 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2463 +[titan] 2025-10-05 02:45:28,696 - root - INFO - lr: 4.7174e-05 gnorm: 1.20 [ 4:11:19<20:30:21] +[titan] 2025-10-05 02:45:39,548 - root - INFO - step: 6790 loss: 2.5312 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2336 +[titan] 2025-10-05 02:45:39,548 - root - INFO - lr: 4.7170e-05 gnorm: 1.16 [ 4:11:30<20:30:09] +[titan] 2025-10-05 02:45:50,426 - root - INFO - step: 6795 loss: 2.5011 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:45:50,427 - root - INFO - lr: 4.7165e-05 gnorm: 1.18 [ 4:11:41<20:29:57] +[titan] 2025-10-05 02:45:59,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:46:01,303 - root - INFO - step: 6800 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2649 +[titan] 2025-10-05 02:46:01,303 - root - INFO - lr: 4.7161e-05 gnorm: 1.23 [ 4:11:52<20:29:44] +[titan] 2025-10-05 02:46:12,162 - root - INFO - step: 6805 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:46:12,162 - root - INFO - lr: 4.7157e-05 gnorm: 1.28 [ 4:12:03<20:29:32] +[titan] 2025-10-05 02:46:23,159 - root - INFO - step: 6810 loss: 2.5521 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.40 mfu: 41.80% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2496 +[titan] 2025-10-05 02:46:23,159 - root - INFO - lr: 4.7152e-05 gnorm: 1.22 [ 4:12:14<20:29:20] +[titan] 2025-10-05 02:46:34,046 - root - INFO - step: 6815 loss: 2.6067 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.3007 +[titan] 2025-10-05 02:46:34,047 - root - INFO - lr: 4.7148e-05 gnorm: 1.17 [ 4:12:25<20:29:08] +[titan] 2025-10-05 02:46:44,908 - root - INFO - step: 6820 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2807 +[titan] 2025-10-05 02:46:44,908 - root - INFO - lr: 4.7143e-05 gnorm: 1.17 [ 4:12:36<20:28:56] +[titan] 2025-10-05 02:46:55,788 - root - INFO - step: 6825 loss: 2.5910 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2852 +[titan] 2025-10-05 02:46:55,788 - root - INFO - lr: 4.7139e-05 gnorm: 1.16 [ 4:12:47<20:28:44] +[titan] 2025-10-05 02:47:06,620 - root - INFO - step: 6830 loss: 2.5384 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 02:47:06,620 - root - INFO - lr: 4.7135e-05 gnorm: 1.18 [ 4:12:57<20:28:31] +[titan] 2025-10-05 02:47:17,469 - root - INFO - step: 6835 loss: 2.5733 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 02:47:17,469 - root - INFO - lr: 4.7130e-05 gnorm: 1.23 [ 4:13:08<20:28:19] +[titan] 2025-10-05 02:47:28,380 - root - INFO - step: 6840 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2835 +[titan] 2025-10-05 02:47:28,380 - root - INFO - lr: 4.7126e-05 gnorm: 1.26 [ 4:13:19<20:28:07] +[titan] 2025-10-05 02:47:39,283 - root - INFO - step: 6845 loss: 2.5574 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2546 +[titan] 2025-10-05 02:47:39,283 - root - INFO - lr: 4.7122e-05 gnorm: 1.19 [ 4:13:30<20:27:55] +[titan] 2025-10-05 02:47:47,974 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:47:50,155 - root - INFO - step: 6850 loss: 2.5366 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2372 +[titan] 2025-10-05 02:47:50,155 - root - INFO - lr: 4.7117e-05 gnorm: 1.18 [ 4:13:41<20:27:42] +[titan] 2025-10-05 02:48:01,026 - root - INFO - step: 6855 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2793 +[titan] 2025-10-05 02:48:01,026 - root - INFO - lr: 4.7113e-05 gnorm: 1.17 [ 4:13:52<20:27:30] +[titan] 2025-10-05 02:48:11,906 - root - INFO - step: 6860 loss: 2.5452 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2453 +[titan] 2025-10-05 02:48:11,906 - root - INFO - lr: 4.7109e-05 gnorm: 1.21 [ 4:14:03<20:27:18] +[titan] 2025-10-05 02:48:22,764 - root - INFO - step: 6865 loss: 2.5903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2839 +[titan] 2025-10-05 02:48:22,764 - root - INFO - lr: 4.7104e-05 gnorm: 1.22 [ 4:14:14<20:27:05] +[titan] 2025-10-05 02:48:33,663 - root - INFO - step: 6870 loss: 2.5282 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2305 +[titan] 2025-10-05 02:48:33,663 - root - INFO - lr: 4.7100e-05 gnorm: 1.21 [ 4:14:24<20:26:53] +[titan] 2025-10-05 02:48:44,571 - root - INFO - step: 6875 loss: 2.5842 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2787 +[titan] 2025-10-05 02:48:44,571 - root - INFO - lr: 4.7095e-05 gnorm: 1.20 [ 4:14:35<20:26:41] +[titan] 2025-10-05 02:48:55,419 - root - INFO - step: 6880 loss: 2.5406 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2421 +[titan] 2025-10-05 02:48:55,419 - root - INFO - lr: 4.7091e-05 gnorm: 1.23 [ 4:14:46<20:26:29] +[titan] 2025-10-05 02:49:06,283 - root - INFO - step: 6885 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2631 +[titan] 2025-10-05 02:49:06,283 - root - INFO - lr: 4.7087e-05 gnorm: 1.33 [ 4:14:57<20:26:17] +[titan] 2025-10-05 02:49:17,141 - root - INFO - step: 6890 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3033 global_avg_mtp_loss: 2.2679 +[titan] 2025-10-05 02:49:17,141 - root - INFO - lr: 4.7082e-05 gnorm: 1.20 [ 4:15:08<20:26:04] +[titan] 2025-10-05 02:49:28,078 - root - INFO - step: 6895 loss: 2.5483 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2478 +[titan] 2025-10-05 02:49:28,078 - root - INFO - lr: 4.7078e-05 gnorm: 1.18 [ 4:15:19<20:25:52] +[titan] 2025-10-05 02:49:36,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:49:38,935 - root - INFO - step: 6900 loss: 2.5983 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:49:38,935 - root - INFO - lr: 4.7073e-05 gnorm: 1.24 [ 4:15:30<20:25:40] +[titan] 2025-10-05 02:49:49,829 - root - INFO - step: 6905 loss: 2.5554 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2534 +[titan] 2025-10-05 02:49:49,830 - root - INFO - lr: 4.7069e-05 gnorm: 1.19 [ 4:15:41<20:25:28] +[titan] 2025-10-05 02:50:00,703 - root - INFO - step: 6910 loss: 2.6056 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.2975 +[titan] 2025-10-05 02:50:00,703 - root - INFO - lr: 4.7065e-05 gnorm: 1.20 [ 4:15:51<20:25:16] +[titan] 2025-10-05 02:50:11,566 - root - INFO - step: 6915 loss: 2.5960 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2897 +[titan] 2025-10-05 02:50:11,566 - root - INFO - lr: 4.7060e-05 gnorm: 1.24 [ 4:16:02<20:25:03] +[titan] 2025-10-05 02:50:22,427 - root - INFO - step: 6920 loss: 2.5924 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2856 +[titan] 2025-10-05 02:50:22,427 - root - INFO - lr: 4.7056e-05 gnorm: 1.19 [ 4:16:13<20:24:51] +[titan] 2025-10-05 02:50:33,321 - root - INFO - step: 6925 loss: 2.4869 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1938 +[titan] 2025-10-05 02:50:33,321 - root - INFO - lr: 4.7051e-05 gnorm: 1.18 [ 4:16:24<20:24:39] +[titan] 2025-10-05 02:50:44,192 - root - INFO - step: 6930 loss: 2.5543 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 02:50:44,193 - root - INFO - lr: 4.7047e-05 gnorm: 1.24 [ 4:16:35<20:24:27] +[titan] 2025-10-05 02:50:55,042 - root - INFO - step: 6935 loss: 2.5426 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2428 +[titan] 2025-10-05 02:50:55,042 - root - INFO - lr: 4.7043e-05 gnorm: 1.21 [ 4:16:46<20:24:14] +[titan] 2025-10-05 02:51:05,935 - root - INFO - step: 6940 loss: 2.6667 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3517 +[titan] 2025-10-05 02:51:05,935 - root - INFO - lr: 4.7038e-05 gnorm: 1.24 [ 4:16:57<20:24:02] +[titan] 2025-10-05 02:51:16,790 - root - INFO - step: 6945 loss: 2.6473 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3345 +[titan] 2025-10-05 02:51:16,790 - root - INFO - lr: 4.7034e-05 gnorm: 1.27 [ 4:17:08<20:23:50] +[titan] 2025-10-05 02:51:25,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:51:27,720 - root - INFO - step: 6950 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2047 +[titan] 2025-10-05 02:51:27,721 - root - INFO - lr: 4.7029e-05 gnorm: 1.25 [ 4:17:18<20:23:38] +[titan] 2025-10-05 02:51:38,573 - root - INFO - step: 6955 loss: 2.6408 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:51:38,573 - root - INFO - lr: 4.7025e-05 gnorm: 1.22 [ 4:17:29<20:23:26] +[titan] 2025-10-05 02:51:49,457 - root - INFO - step: 6960 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3017 global_avg_mtp_loss: 2.2580 +[titan] 2025-10-05 02:51:49,458 - root - INFO - lr: 4.7020e-05 gnorm: 1.20 [ 4:17:40<20:23:13] +[titan] 2025-10-05 02:52:00,296 - root - INFO - step: 6965 loss: 2.6601 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:52:00,296 - root - INFO - lr: 4.7016e-05 gnorm: 1.30 [ 4:17:51<20:23:01] +[titan] 2025-10-05 02:52:11,200 - root - INFO - step: 6970 loss: 2.5501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2484 +[titan] 2025-10-05 02:52:11,200 - root - INFO - lr: 4.7012e-05 gnorm: 1.25 [ 4:18:02<20:22:49] +[titan] 2025-10-05 02:52:22,059 - root - INFO - step: 6975 loss: 2.5650 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2627 +[titan] 2025-10-05 02:52:22,059 - root - INFO - lr: 4.7007e-05 gnorm: 1.18 [ 4:18:13<20:22:37] +[titan] 2025-10-05 02:52:32,953 - root - INFO - step: 6980 loss: 2.6856 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.3581 +[titan] 2025-10-05 02:52:32,953 - root - INFO - lr: 4.7003e-05 gnorm: 1.24 [ 4:18:24<20:22:25] +[titan] 2025-10-05 02:52:43,790 - root - INFO - step: 6985 loss: 2.5169 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2194 +[titan] 2025-10-05 02:52:43,790 - root - INFO - lr: 4.6998e-05 gnorm: 1.33 [ 4:18:35<20:22:12] +[titan] 2025-10-05 02:52:54,642 - root - INFO - step: 6990 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2300 +[titan] 2025-10-05 02:52:54,642 - root - INFO - lr: 4.6994e-05 gnorm: 1.18 [ 4:18:45<20:22:00] +[titan] 2025-10-05 02:53:05,477 - root - INFO - step: 6995 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3774 +[titan] 2025-10-05 02:53:05,477 - root - INFO - lr: 4.6989e-05 gnorm: 1.28 [ 4:18:56<20:21:48] +[titan] 2025-10-05 02:53:14,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:53:16,323 - root - INFO - step: 7000 loss: 2.6331 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.3240 +[titan] 2025-10-05 02:53:16,323 - root - INFO - lr: 4.6985e-05 gnorm: 1.28 [ 4:19:07<20:21:35] +[titan] 2025-10-05 02:53:27,204 - root - INFO - step: 7005 loss: 2.5777 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2742 +[titan] 2025-10-05 02:53:27,204 - root - INFO - lr: 4.6980e-05 gnorm: 1.20 [ 4:19:18<20:21:23] +[titan] 2025-10-05 02:53:38,086 - root - INFO - step: 7010 loss: 2.5633 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:53:38,086 - root - INFO - lr: 4.6976e-05 gnorm: 1.21 [ 4:19:29<20:21:11] +[titan] 2025-10-05 02:53:48,973 - root - INFO - step: 7015 loss: 2.5508 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2507 +[titan] 2025-10-05 02:53:48,973 - root - INFO - lr: 4.6971e-05 gnorm: 1.17 [ 4:19:40<20:20:59] +[titan] 2025-10-05 02:53:59,845 - root - INFO - step: 7020 loss: 2.6141 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3057 +[titan] 2025-10-05 02:53:59,845 - root - INFO - lr: 4.6967e-05 gnorm: 1.23 [ 4:19:51<20:20:47] +[titan] 2025-10-05 02:54:10,698 - root - INFO - step: 7025 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2184 +[titan] 2025-10-05 02:54:10,698 - root - INFO - lr: 4.6962e-05 gnorm: 1.18 [ 4:20:01<20:20:34] +[titan] 2025-10-05 02:54:21,549 - root - INFO - step: 7030 loss: 2.5250 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2274 +[titan] 2025-10-05 02:54:21,549 - root - INFO - lr: 4.6958e-05 gnorm: 1.18 [ 4:20:12<20:20:22] +[titan] 2025-10-05 02:54:32,510 - root - INFO - step: 7035 loss: 2.4583 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2895 global_avg_mtp_loss: 2.1687 +[titan] 2025-10-05 02:54:32,510 - root - INFO - lr: 4.6954e-05 gnorm: 1.13 [ 4:20:23<20:20:10] +[titan] 2025-10-05 02:54:43,391 - root - INFO - step: 7040 loss: 2.5911 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2860 +[titan] 2025-10-05 02:54:43,392 - root - INFO - lr: 4.6949e-05 gnorm: 1.24 [ 4:20:34<20:19:58] +[titan] 2025-10-05 02:54:54,247 - root - INFO - step: 7045 loss: 2.5161 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2197 +[titan] 2025-10-05 02:54:54,247 - root - INFO - lr: 4.6945e-05 gnorm: 1.19 [ 4:20:45<20:19:46] +[titan] 2025-10-05 02:55:02,933 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:55:05,120 - root - INFO - step: 7050 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2600 +[titan] 2025-10-05 02:55:05,120 - root - INFO - lr: 4.6940e-05 gnorm: 1.19 [ 4:20:56<20:19:34] +[titan] 2025-10-05 02:55:15,988 - root - INFO - step: 7055 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:55:15,988 - root - INFO - lr: 4.6936e-05 gnorm: 1.18 [ 4:21:07<20:19:21] +[titan] 2025-10-05 02:55:26,853 - root - INFO - step: 7060 loss: 2.6283 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3103 global_avg_mtp_loss: 2.3181 +[titan] 2025-10-05 02:55:26,853 - root - INFO - lr: 4.6931e-05 gnorm: 1.21 [ 4:21:18<20:19:09] +[titan] 2025-10-05 02:55:37,782 - root - INFO - step: 7065 loss: 2.5429 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2433 +[titan] 2025-10-05 02:55:37,782 - root - INFO - lr: 4.6927e-05 gnorm: 1.17 [ 4:21:29<20:18:57] +[titan] 2025-10-05 02:55:48,649 - root - INFO - step: 7070 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2882 +[titan] 2025-10-05 02:55:48,649 - root - INFO - lr: 4.6922e-05 gnorm: 1.22 [ 4:21:39<20:18:45] +[titan] 2025-10-05 02:55:59,510 - root - INFO - step: 7075 loss: 2.5409 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 02:55:59,510 - root - INFO - lr: 4.6918e-05 gnorm: 1.20 [ 4:21:50<20:18:33] +[titan] 2025-10-05 02:56:10,352 - root - INFO - step: 7080 loss: 2.5976 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:56:10,353 - root - INFO - lr: 4.6913e-05 gnorm: 1.19 [ 4:22:01<20:18:20] +[titan] 2025-10-05 02:56:21,217 - root - INFO - step: 7085 loss: 2.5675 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2647 +[titan] 2025-10-05 02:56:21,218 - root - INFO - lr: 4.6909e-05 gnorm: 1.26 [ 4:22:12<20:18:08] +[titan] 2025-10-05 02:56:32,130 - root - INFO - step: 7090 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 02:56:32,130 - root - INFO - lr: 4.6904e-05 gnorm: 1.19 [ 4:22:23<20:17:56] +[titan] 2025-10-05 02:56:43,006 - root - INFO - step: 7095 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 02:56:43,006 - root - INFO - lr: 4.6899e-05 gnorm: 1.16 [ 4:22:34<20:17:44] +[titan] 2025-10-05 02:56:51,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:56:53,930 - root - INFO - step: 7100 loss: 2.6150 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3102 global_avg_mtp_loss: 2.3048 +[titan] 2025-10-05 02:56:53,930 - root - INFO - lr: 4.6895e-05 gnorm: 1.24 [ 4:22:45<20:17:32] +[titan] 2025-10-05 02:57:04,810 - root - INFO - step: 7105 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2524 +[titan] 2025-10-05 02:57:04,810 - root - INFO - lr: 4.6890e-05 gnorm: 1.23 [ 4:22:56<20:17:20] +[titan] 2025-10-05 02:57:15,679 - root - INFO - step: 7110 loss: 2.6249 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3152 +[titan] 2025-10-05 02:57:15,679 - root - INFO - lr: 4.6886e-05 gnorm: 1.23 [ 4:23:06<20:17:08] +[titan] 2025-10-05 02:57:26,560 - root - INFO - step: 7115 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 02:57:26,560 - root - INFO - lr: 4.6881e-05 gnorm: 1.21 [ 4:23:17<20:16:56] +[titan] 2025-10-05 02:57:37,504 - root - INFO - step: 7120 loss: 2.5642 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 02:57:37,504 - root - INFO - lr: 4.6877e-05 gnorm: 1.25 [ 4:23:28<20:16:44] +[titan] 2025-10-05 02:57:48,407 - root - INFO - step: 7125 loss: 2.5252 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2269 +[titan] 2025-10-05 02:57:48,408 - root - INFO - lr: 4.6872e-05 gnorm: 1.23 [ 4:23:39<20:16:32] +[titan] 2025-10-05 02:57:59,324 - root - INFO - step: 7130 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2910 +[titan] 2025-10-05 02:57:59,324 - root - INFO - lr: 4.6868e-05 gnorm: 1.23 [ 4:23:50<20:16:20] +[titan] 2025-10-05 02:58:10,198 - root - INFO - step: 7135 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3080 +[titan] 2025-10-05 02:58:10,198 - root - INFO - lr: 4.6863e-05 gnorm: 1.30 [ 4:24:01<20:16:08] +[titan] 2025-10-05 02:58:21,069 - root - INFO - step: 7140 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:58:21,069 - root - INFO - lr: 4.6859e-05 gnorm: 1.21 [ 4:24:12<20:15:56] +[titan] 2025-10-05 02:58:31,936 - root - INFO - step: 7145 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2622 +[titan] 2025-10-05 02:58:31,936 - root - INFO - lr: 4.6854e-05 gnorm: 1.20 [ 4:24:23<20:15:43] +[titan] 2025-10-05 02:58:40,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:58:42,870 - root - INFO - step: 7150 loss: 2.5513 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2508 +[titan] 2025-10-05 02:58:42,871 - root - INFO - lr: 4.6850e-05 gnorm: 1.21 [ 4:24:34<20:15:32] +[titan] 2025-10-05 02:58:53,743 - root - INFO - step: 7155 loss: 2.5589 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2563 +[titan] 2025-10-05 02:58:53,743 - root - INFO - lr: 4.6845e-05 gnorm: 1.16 [ 4:24:44<20:15:19] +[titan] 2025-10-05 02:59:04,618 - root - INFO - step: 7160 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2723 +[titan] 2025-10-05 02:59:04,618 - root - INFO - lr: 4.6840e-05 gnorm: 1.21 [ 4:24:55<20:15:07] +[titan] 2025-10-05 02:59:15,628 - root - INFO - step: 7165 loss: 2.5541 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.90 mfu: 41.75% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2529 +[titan] 2025-10-05 02:59:15,629 - root - INFO - lr: 4.6836e-05 gnorm: 1.17 [ 4:25:06<20:14:56] +[titan] 2025-10-05 02:59:22,341 - root - INFO - Dumping profiler traces at step 7168 +[titan] 2025-10-05 02:59:22,379 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:59:26,736 - root - INFO - step: 7170 loss: 2.6199 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.30 mfu: 41.38% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3097 +[titan] 2025-10-05 02:59:26,736 - root - INFO - lr: 4.6831e-05 gnorm: 1.20 [ 4:25:17<20:14:45] +[titan] 2025-10-05 02:59:37,672 - root - INFO - step: 7175 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1801 +[titan] 2025-10-05 02:59:37,672 - root - INFO - lr: 4.6827e-05 gnorm: 1.15 [ 4:25:28<20:14:33] +[titan] 2025-10-05 02:59:48,576 - root - INFO - step: 7180 loss: 2.6188 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3101 +[titan] 2025-10-05 02:59:48,576 - root - INFO - lr: 4.6822e-05 gnorm: 1.22 [ 4:25:39<20:14:21] +[titan] 2025-10-05 02:59:59,449 - root - INFO - step: 7185 loss: 2.5330 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2333 +[titan] 2025-10-05 02:59:59,450 - root - INFO - lr: 4.6818e-05 gnorm: 1.21 [ 4:25:50<20:14:09] +[titan] 2025-10-05 03:00:10,322 - root - INFO - step: 7190 loss: 2.6028 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2970 +[titan] 2025-10-05 03:00:10,322 - root - INFO - lr: 4.6813e-05 gnorm: 1.20 [ 4:26:01<20:13:57] +[titan] 2025-10-05 03:00:21,233 - root - INFO - step: 7195 loss: 2.6073 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2996 +[titan] 2025-10-05 03:00:21,233 - root - INFO - lr: 4.6808e-05 gnorm: 1.23 [ 4:26:12<20:13:45] +[titan] 2025-10-05 03:00:29,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:00:32,108 - root - INFO - step: 7200 loss: 2.5130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:00:32,108 - root - INFO - lr: 4.6804e-05 gnorm: 1.32 [ 4:26:23<20:13:32] +[titan] 2025-10-05 03:00:43,038 - root - INFO - step: 7205 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2785 +[titan] 2025-10-05 03:00:43,038 - root - INFO - lr: 4.6799e-05 gnorm: 1.29 [ 4:26:34<20:13:21] +[titan] 2025-10-05 03:00:53,933 - root - INFO - step: 7210 loss: 2.5257 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:00:53,933 - root - INFO - lr: 4.6795e-05 gnorm: 1.20 [ 4:26:45<20:13:09] +[titan] 2025-10-05 03:01:04,827 - root - INFO - step: 7215 loss: 2.5854 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3039 global_avg_mtp_loss: 2.2815 +[titan] 2025-10-05 03:01:04,827 - root - INFO - lr: 4.6790e-05 gnorm: 1.21 [ 4:26:56<20:12:57] +[titan] 2025-10-05 03:01:15,711 - root - INFO - step: 7220 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2982 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:01:15,712 - root - INFO - lr: 4.6786e-05 gnorm: 1.19 [ 4:27:06<20:12:44] +[titan] 2025-10-05 03:01:26,615 - root - INFO - step: 7225 loss: 2.4967 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2040 +[titan] 2025-10-05 03:01:26,615 - root - INFO - lr: 4.6781e-05 gnorm: 1.24 [ 4:27:17<20:12:32] +[titan] 2025-10-05 03:01:37,539 - root - INFO - step: 7230 loss: 2.6118 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.3044 +[titan] 2025-10-05 03:01:37,539 - root - INFO - lr: 4.6776e-05 gnorm: 1.20 [ 4:27:28<20:12:21] +[titan] 2025-10-05 03:01:48,431 - root - INFO - step: 7235 loss: 2.5240 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2979 global_avg_mtp_loss: 2.2261 +[titan] 2025-10-05 03:01:48,431 - root - INFO - lr: 4.6772e-05 gnorm: 1.18 [ 4:27:39<20:12:09] +[titan] 2025-10-05 03:01:59,313 - root - INFO - step: 7240 loss: 2.5262 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2284 +[titan] 2025-10-05 03:01:59,313 - root - INFO - lr: 4.6767e-05 gnorm: 1.17 [ 4:27:50<20:11:56] +[titan] 2025-10-05 03:02:10,185 - root - INFO - step: 7245 loss: 2.5139 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2193 +[titan] 2025-10-05 03:02:10,185 - root - INFO - lr: 4.6762e-05 gnorm: 1.26 [ 4:28:01<20:11:44] +[titan] 2025-10-05 03:02:18,882 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:02:21,067 - root - INFO - step: 7250 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:02:21,067 - root - INFO - lr: 4.6758e-05 gnorm: 1.26 [ 4:28:12<20:11:32] +[titan] 2025-10-05 03:02:31,931 - root - INFO - step: 7255 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2327 +[titan] 2025-10-05 03:02:31,931 - root - INFO - lr: 4.6753e-05 gnorm: 1.22 [ 4:28:23<20:11:20] +[titan] 2025-10-05 03:02:42,869 - root - INFO - step: 7260 loss: 2.5329 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.2991 global_avg_mtp_loss: 2.2339 +[titan] 2025-10-05 03:02:42,869 - root - INFO - lr: 4.6749e-05 gnorm: 1.22 [ 4:28:34<20:11:08] +[titan] 2025-10-05 03:02:53,734 - root - INFO - step: 7265 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.2033 +[titan] 2025-10-05 03:02:53,734 - root - INFO - lr: 4.6744e-05 gnorm: 1.30 [ 4:28:44<20:10:56] +[titan] 2025-10-05 03:03:04,623 - root - INFO - step: 7270 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3079 global_avg_mtp_loss: 2.3083 +[titan] 2025-10-05 03:03:04,623 - root - INFO - lr: 4.6739e-05 gnorm: 1.34 [ 4:28:55<20:10:44] +[titan] 2025-10-05 03:03:15,505 - root - INFO - step: 7275 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3072 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 03:03:15,505 - root - INFO - lr: 4.6735e-05 gnorm: 1.22 [ 4:29:06<20:10:32] +[titan] 2025-10-05 03:03:26,372 - root - INFO - step: 7280 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2876 +[titan] 2025-10-05 03:03:26,372 - root - INFO - lr: 4.6730e-05 gnorm: 1.19 [ 4:29:17<20:10:20] +[titan] 2025-10-05 03:03:37,274 - root - INFO - step: 7285 loss: 2.6024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2948 +[titan] 2025-10-05 03:03:37,274 - root - INFO - lr: 4.6725e-05 gnorm: 1.27 [ 4:29:28<20:10:08] +[titan] 2025-10-05 03:03:48,171 - root - INFO - step: 7290 loss: 2.5142 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2183 +[titan] 2025-10-05 03:03:48,171 - root - INFO - lr: 4.6721e-05 gnorm: 1.18 [ 4:29:39<20:09:56] +[titan] 2025-10-05 03:03:59,037 - root - INFO - step: 7295 loss: 2.5672 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3019 global_avg_mtp_loss: 2.2653 +[titan] 2025-10-05 03:03:59,037 - root - INFO - lr: 4.6716e-05 gnorm: 1.21 [ 4:29:50<20:09:44] +[titan] 2025-10-05 03:04:07,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:04:09,893 - root - INFO - step: 7300 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2362 +[titan] 2025-10-05 03:04:09,893 - root - INFO - lr: 4.6712e-05 gnorm: 1.19 [ 4:30:01<20:09:32] +[titan] 2025-10-05 03:04:20,770 - root - INFO - step: 7305 loss: 2.5190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:04:20,770 - root - INFO - lr: 4.6707e-05 gnorm: 1.20 [ 4:30:11<20:09:20] +[titan] 2025-10-05 03:04:31,636 - root - INFO - step: 7310 loss: 2.5542 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2522 +[titan] 2025-10-05 03:04:31,637 - root - INFO - lr: 4.6702e-05 gnorm: 1.16 [ 4:30:22<20:09:07] +[titan] 2025-10-05 03:04:42,538 - root - INFO - step: 7315 loss: 2.5823 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3047 global_avg_mtp_loss: 2.2776 +[titan] 2025-10-05 03:04:42,538 - root - INFO - lr: 4.6698e-05 gnorm: 1.19 [ 4:30:33<20:08:55] +[titan] 2025-10-05 03:04:53,396 - root - INFO - step: 7320 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.2988 +[titan] 2025-10-05 03:04:53,396 - root - INFO - lr: 4.6693e-05 gnorm: 1.20 [ 4:30:44<20:08:43] +[titan] 2025-10-05 03:05:04,291 - root - INFO - step: 7325 loss: 2.6131 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 03:05:04,291 - root - INFO - lr: 4.6688e-05 gnorm: 1.20 [ 4:30:55<20:08:31] +[titan] 2025-10-05 03:05:15,170 - root - INFO - step: 7330 loss: 2.5664 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2639 +[titan] 2025-10-05 03:05:15,170 - root - INFO - lr: 4.6684e-05 gnorm: 1.19 [ 4:31:06<20:08:19] +[titan] 2025-10-05 03:05:26,057 - root - INFO - step: 7335 loss: 2.5718 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:05:26,057 - root - INFO - lr: 4.6679e-05 gnorm: 1.19 [ 4:31:17<20:08:07] +[titan] 2025-10-05 03:05:36,944 - root - INFO - step: 7340 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:05:36,945 - root - INFO - lr: 4.6674e-05 gnorm: 1.21 [ 4:31:28<20:07:55] +[titan] 2025-10-05 03:05:47,861 - root - INFO - step: 7345 loss: 2.4951 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2008 +[titan] 2025-10-05 03:05:47,862 - root - INFO - lr: 4.6670e-05 gnorm: 1.18 [ 4:31:39<20:07:43] +[titan] 2025-10-05 03:05:56,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:05:58,742 - root - INFO - step: 7350 loss: 2.6375 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3271 +[titan] 2025-10-05 03:05:58,742 - root - INFO - lr: 4.6665e-05 gnorm: 1.20 [ 4:31:49<20:07:31] +[titan] 2025-10-05 03:06:09,631 - root - INFO - step: 7355 loss: 2.5204 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2241 +[titan] 2025-10-05 03:06:09,632 - root - INFO - lr: 4.6660e-05 gnorm: 1.13 [ 4:32:00<20:07:19] +[titan] 2025-10-05 03:06:20,514 - root - INFO - step: 7360 loss: 2.5761 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2736 +[titan] 2025-10-05 03:06:20,514 - root - INFO - lr: 4.6656e-05 gnorm: 1.20 [ 4:32:11<20:07:07] +[titan] 2025-10-05 03:06:31,396 - root - INFO - step: 7365 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2678 +[titan] 2025-10-05 03:06:31,397 - root - INFO - lr: 4.6651e-05 gnorm: 1.18 [ 4:32:22<20:06:55] +[titan] 2025-10-05 03:06:42,281 - root - INFO - step: 7370 loss: 2.5449 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2457 +[titan] 2025-10-05 03:06:42,282 - root - INFO - lr: 4.6646e-05 gnorm: 1.20 [ 4:32:33<20:06:43] +[titan] 2025-10-05 03:06:53,156 - root - INFO - step: 7375 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2076 +[titan] 2025-10-05 03:06:53,156 - root - INFO - lr: 4.6642e-05 gnorm: 1.20 [ 4:32:44<20:06:31] +[titan] 2025-10-05 03:07:04,009 - root - INFO - step: 7380 loss: 2.4884 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:07:04,009 - root - INFO - lr: 4.6637e-05 gnorm: 1.18 [ 4:32:55<20:06:19] +[titan] 2025-10-05 03:07:14,887 - root - INFO - step: 7385 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2624 +[titan] 2025-10-05 03:07:14,887 - root - INFO - lr: 4.6632e-05 gnorm: 1.29 [ 4:33:06<20:06:07] +[titan] 2025-10-05 03:07:25,781 - root - INFO - step: 7390 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:07:25,781 - root - INFO - lr: 4.6627e-05 gnorm: 1.19 [ 4:33:16<20:05:55] +[titan] 2025-10-05 03:07:36,668 - root - INFO - step: 7395 loss: 2.5215 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2253 +[titan] 2025-10-05 03:07:36,668 - root - INFO - lr: 4.6623e-05 gnorm: 1.18 [ 4:33:27<20:05:43] +[titan] 2025-10-05 03:07:45,411 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:07:47,595 - root - INFO - step: 7400 loss: 2.5552 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 03:07:47,595 - root - INFO - lr: 4.6618e-05 gnorm: 1.25 [ 4:33:38<20:05:31] +[titan] 2025-10-05 03:07:58,479 - root - INFO - step: 7405 loss: 2.5722 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2664 +[titan] 2025-10-05 03:07:58,479 - root - INFO - lr: 4.6613e-05 gnorm: 1.23 [ 4:33:49<20:05:19] +[titan] 2025-10-05 03:08:09,352 - root - INFO - step: 7410 loss: 2.6173 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 03:08:09,352 - root - INFO - lr: 4.6609e-05 gnorm: 1.26 [ 4:34:00<20:05:07] +[titan] 2025-10-05 03:08:20,245 - root - INFO - step: 7415 loss: 2.6371 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3115 global_avg_mtp_loss: 2.3256 +[titan] 2025-10-05 03:08:20,245 - root - INFO - lr: 4.6604e-05 gnorm: 1.18 [ 4:34:11<20:04:55] +[titan] 2025-10-05 03:08:31,148 - root - INFO - step: 7420 loss: 2.5121 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:08:31,148 - root - INFO - lr: 4.6599e-05 gnorm: 1.18 [ 4:34:22<20:04:43] +[titan] 2025-10-05 03:08:42,047 - root - INFO - step: 7425 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2028 +[titan] 2025-10-05 03:08:42,047 - root - INFO - lr: 4.6594e-05 gnorm: 1.17 [ 4:34:33<20:04:31] +[titan] 2025-10-05 03:08:52,923 - root - INFO - step: 7430 loss: 2.5993 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2935 +[titan] 2025-10-05 03:08:52,923 - root - INFO - lr: 4.6590e-05 gnorm: 1.19 [ 4:34:44<20:04:19] +[titan] 2025-10-05 03:09:03,806 - root - INFO - step: 7435 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2641 +[titan] 2025-10-05 03:09:03,806 - root - INFO - lr: 4.6585e-05 gnorm: 1.23 [ 4:34:55<20:04:07] +[titan] 2025-10-05 03:09:14,682 - root - INFO - step: 7440 loss: 2.4458 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:09:14,682 - root - INFO - lr: 4.6580e-05 gnorm: 1.21 [ 4:35:05<20:03:55] +[titan] 2025-10-05 03:09:25,563 - root - INFO - step: 7445 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2988 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:09:25,563 - root - INFO - lr: 4.6576e-05 gnorm: 1.20 [ 4:35:16<20:03:43] +[titan] 2025-10-05 03:09:34,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:09:36,483 - root - INFO - step: 7450 loss: 2.4992 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2041 +[titan] 2025-10-05 03:09:36,483 - root - INFO - lr: 4.6571e-05 gnorm: 1.13 [ 4:35:27<20:03:31] +[titan] 2025-10-05 03:09:47,415 - root - INFO - step: 7455 loss: 2.5685 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:09:47,415 - root - INFO - lr: 4.6566e-05 gnorm: 1.21 [ 4:35:38<20:03:19] +[titan] 2025-10-05 03:09:58,322 - root - INFO - step: 7460 loss: 2.5530 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2521 +[titan] 2025-10-05 03:09:58,322 - root - INFO - lr: 4.6561e-05 gnorm: 1.19 [ 4:35:49<20:03:07] +[titan] 2025-10-05 03:10:09,217 - root - INFO - step: 7465 loss: 2.5984 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2930 +[titan] 2025-10-05 03:10:09,217 - root - INFO - lr: 4.6557e-05 gnorm: 1.33 [ 4:36:00<20:02:55] +[titan] 2025-10-05 03:10:20,126 - root - INFO - step: 7470 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:10:20,127 - root - INFO - lr: 4.6552e-05 gnorm: 1.25 [ 4:36:11<20:02:44] +[titan] 2025-10-05 03:10:31,009 - root - INFO - step: 7475 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3233 +[titan] 2025-10-05 03:10:31,009 - root - INFO - lr: 4.6547e-05 gnorm: 1.21 [ 4:36:22<20:02:32] +[titan] 2025-10-05 03:10:41,908 - root - INFO - step: 7480 loss: 2.6221 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3088 global_avg_mtp_loss: 2.3133 +[titan] 2025-10-05 03:10:41,908 - root - INFO - lr: 4.6542e-05 gnorm: 1.24 [ 4:36:33<20:02:20] +[titan] 2025-10-05 03:10:52,859 - root - INFO - step: 7485 loss: 2.6267 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3150 +[titan] 2025-10-05 03:10:52,859 - root - INFO - lr: 4.6538e-05 gnorm: 1.23 [ 4:36:44<20:02:08] +[titan] 2025-10-05 03:11:03,748 - root - INFO - step: 7490 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:11:03,748 - root - INFO - lr: 4.6533e-05 gnorm: 1.16 [ 4:36:54<20:01:56] +[titan] 2025-10-05 03:11:14,653 - root - INFO - step: 7495 loss: 2.5041 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2100 +[titan] 2025-10-05 03:11:14,654 - root - INFO - lr: 4.6528e-05 gnorm: 1.17 [ 4:37:05<20:01:44] +[titan] 2025-10-05 03:11:23,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:11:25,558 - root - INFO - step: 7500 loss: 2.5279 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 03:11:25,558 - root - INFO - lr: 4.6523e-05 gnorm: 1.17 [ 4:37:16<20:01:32] +[titan] 2025-10-05 03:11:36,447 - root - INFO - step: 7505 loss: 2.5670 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:11:36,447 - root - INFO - lr: 4.6519e-05 gnorm: 1.26 [ 4:37:27<20:01:20] +[titan] 2025-10-05 03:11:47,366 - root - INFO - step: 7510 loss: 2.5107 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 03:11:47,366 - root - INFO - lr: 4.6514e-05 gnorm: 1.18 [ 4:37:38<20:01:08] +[titan] 2025-10-05 03:11:58,284 - root - INFO - step: 7515 loss: 2.6471 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3343 +[titan] 2025-10-05 03:11:58,284 - root - INFO - lr: 4.6509e-05 gnorm: 1.26 [ 4:37:49<20:00:56] +[titan] 2025-10-05 03:12:09,177 - root - INFO - step: 7520 loss: 2.5022 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:12:09,177 - root - INFO - lr: 4.6504e-05 gnorm: 1.24 [ 4:38:00<20:00:45] +[titan] 2025-10-05 03:12:20,065 - root - INFO - step: 7525 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2289 +[titan] 2025-10-05 03:12:20,065 - root - INFO - lr: 4.6499e-05 gnorm: 1.20 [ 4:38:11<20:00:33] +[titan] 2025-10-05 03:12:30,937 - root - INFO - step: 7530 loss: 2.5858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2803 +[titan] 2025-10-05 03:12:30,937 - root - INFO - lr: 4.6495e-05 gnorm: 1.25 [ 4:38:22<20:00:21] +[titan] 2025-10-05 03:12:41,813 - root - INFO - step: 7535 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:12:41,814 - root - INFO - lr: 4.6490e-05 gnorm: 1.20 [ 4:38:33<20:00:09] +[titan] 2025-10-05 03:12:52,684 - root - INFO - step: 7540 loss: 2.5356 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:12:52,684 - root - INFO - lr: 4.6485e-05 gnorm: 1.23 [ 4:38:43<19:59:56] +[titan] 2025-10-05 03:13:03,580 - root - INFO - step: 7545 loss: 2.5425 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2420 +[titan] 2025-10-05 03:13:03,580 - root - INFO - lr: 4.6480e-05 gnorm: 1.22 [ 4:38:54<19:59:45] +[titan] 2025-10-05 03:13:12,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:13:14,425 - root - INFO - step: 7550 loss: 2.5098 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:14,425 - root - INFO - lr: 4.6476e-05 gnorm: 1.21 [ 4:39:05<19:59:32] +[titan] 2025-10-05 03:13:25,285 - root - INFO - step: 7555 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2953 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:25,285 - root - INFO - lr: 4.6471e-05 gnorm: 1.32 [ 4:39:16<19:59:20] +[titan] 2025-10-05 03:13:36,128 - root - INFO - step: 7560 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1394 +[titan] 2025-10-05 03:13:36,128 - root - INFO - lr: 4.6466e-05 gnorm: 1.23 [ 4:39:27<19:59:08] +[titan] 2025-10-05 03:13:47,004 - root - INFO - step: 7565 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2297 +[titan] 2025-10-05 03:13:47,005 - root - INFO - lr: 4.6461e-05 gnorm: 1.21 [ 4:39:38<19:58:56] +[titan] 2025-10-05 03:13:57,856 - root - INFO - step: 7570 loss: 2.4658 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:13:57,856 - root - INFO - lr: 4.6456e-05 gnorm: 1.15 [ 4:39:49<19:58:44] +[titan] 2025-10-05 03:14:08,701 - root - INFO - step: 7575 loss: 2.5486 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2477 +[titan] 2025-10-05 03:14:08,701 - root - INFO - lr: 4.6452e-05 gnorm: 1.16 [ 4:39:59<19:58:32] +[titan] 2025-10-05 03:14:19,585 - root - INFO - step: 7580 loss: 2.4950 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:14:19,586 - root - INFO - lr: 4.6447e-05 gnorm: 1.20 [ 4:40:10<19:58:20] +[titan] 2025-10-05 03:14:30,487 - root - INFO - step: 7585 loss: 2.5519 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3008 global_avg_mtp_loss: 2.2511 +[titan] 2025-10-05 03:14:30,487 - root - INFO - lr: 4.6442e-05 gnorm: 1.18 [ 4:40:21<19:58:08] +[titan] 2025-10-05 03:14:41,356 - root - INFO - step: 7590 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2224 +[titan] 2025-10-05 03:14:41,356 - root - INFO - lr: 4.6437e-05 gnorm: 1.18 [ 4:40:32<19:57:56] +[titan] 2025-10-05 03:14:52,221 - root - INFO - step: 7595 loss: 2.5646 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 03:14:52,221 - root - INFO - lr: 4.6432e-05 gnorm: 1.16 [ 4:40:43<19:57:44] +[titan] 2025-10-05 03:15:00,910 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:15:03,087 - root - INFO - step: 7600 loss: 2.5198 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:15:03,087 - root - INFO - lr: 4.6427e-05 gnorm: 1.22 [ 4:40:54<19:57:32] +[titan] 2025-10-05 03:15:13,944 - root - INFO - step: 7605 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2354 +[titan] 2025-10-05 03:15:13,944 - root - INFO - lr: 4.6423e-05 gnorm: 1.19 [ 4:41:05<19:57:20] +[titan] 2025-10-05 03:15:24,824 - root - INFO - step: 7610 loss: 2.4376 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1514 +[titan] 2025-10-05 03:15:24,824 - root - INFO - lr: 4.6418e-05 gnorm: 1.19 [ 4:41:16<19:57:08] +[titan] 2025-10-05 03:15:35,666 - root - INFO - step: 7615 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2192 +[titan] 2025-10-05 03:15:35,666 - root - INFO - lr: 4.6413e-05 gnorm: 1.17 [ 4:41:26<19:56:56] +[titan] 2025-10-05 03:15:46,512 - root - INFO - step: 7620 loss: 2.5412 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2413 +[titan] 2025-10-05 03:15:46,512 - root - INFO - lr: 4.6408e-05 gnorm: 1.18 [ 4:41:37<19:56:44] +[titan] 2025-10-05 03:15:57,356 - root - INFO - step: 7625 loss: 2.6165 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3094 global_avg_mtp_loss: 2.3070 +[titan] 2025-10-05 03:15:57,356 - root - INFO - lr: 4.6403e-05 gnorm: 1.26 [ 4:41:48<19:56:32] +[titan] 2025-10-05 03:16:08,215 - root - INFO - step: 7630 loss: 2.5181 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:16:08,215 - root - INFO - lr: 4.6398e-05 gnorm: 1.21 [ 4:41:59<19:56:19] +[titan] 2025-10-05 03:16:19,087 - root - INFO - step: 7635 loss: 2.4574 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1676 +[titan] 2025-10-05 03:16:19,088 - root - INFO - lr: 4.6394e-05 gnorm: 1.19 [ 4:42:10<19:56:07] +[titan] 2025-10-05 03:16:29,923 - root - INFO - step: 7640 loss: 2.4611 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1719 +[titan] 2025-10-05 03:16:29,923 - root - INFO - lr: 4.6389e-05 gnorm: 1.17 [ 4:42:21<19:55:55] +[titan] 2025-10-05 03:16:40,805 - root - INFO - step: 7645 loss: 2.5518 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2500 +[titan] 2025-10-05 03:16:40,805 - root - INFO - lr: 4.6384e-05 gnorm: 1.19 [ 4:42:31<19:55:43] +[titan] 2025-10-05 03:16:49,483 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:16:51,676 - root - INFO - step: 7650 loss: 2.5593 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2579 +[titan] 2025-10-05 03:16:51,677 - root - INFO - lr: 4.6379e-05 gnorm: 1.21 [ 4:42:42<19:55:31] +[titan] 2025-10-05 03:17:02,521 - root - INFO - step: 7655 loss: 2.5404 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 03:17:02,521 - root - INFO - lr: 4.6374e-05 gnorm: 1.24 [ 4:42:53<19:55:19] +[titan] 2025-10-05 03:17:13,367 - root - INFO - step: 7660 loss: 2.5051 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2097 +[titan] 2025-10-05 03:17:13,367 - root - INFO - lr: 4.6369e-05 gnorm: 1.23 [ 4:43:04<19:55:07] +[titan] 2025-10-05 03:17:24,235 - root - INFO - step: 7665 loss: 2.6218 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.3131 +[titan] 2025-10-05 03:17:24,235 - root - INFO - lr: 4.6364e-05 gnorm: 1.19 [ 4:43:15<19:54:55] +[titan] 2025-10-05 03:17:35,066 - root - INFO - step: 7670 loss: 2.5900 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2847 +[titan] 2025-10-05 03:17:35,066 - root - INFO - lr: 4.6360e-05 gnorm: 1.23 [ 4:43:26<19:54:43] +[titan] 2025-10-05 03:17:45,893 - root - INFO - step: 7675 loss: 2.5953 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 03:17:45,893 - root - INFO - lr: 4.6355e-05 gnorm: 1.19 [ 4:43:37<19:54:31] +[titan] 2025-10-05 03:17:56,861 - root - INFO - step: 7680 loss: 2.5148 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2185 +[titan] 2025-10-05 03:17:56,861 - root - INFO - lr: 4.6350e-05 gnorm: 1.23 [ 4:43:48<19:54:19] +[titan] 2025-10-05 03:17:57,042 - root - INFO - Dumping profiler traces at step 7680 +[titan] 2025-10-05 03:17:57,081 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:18:07,907 - root - INFO - step: 7685 loss: 2.4389 memory: 118.84GiB(85.28%) tps: 29,665 tflops: 411.56 mfu: 41.61% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 03:18:07,907 - root - INFO - lr: 4.6345e-05 gnorm: 1.17 [ 4:43:59<19:54:08] +[titan] 2025-10-05 03:18:18,756 - root - INFO - step: 7690 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:18:18,757 - root - INFO - lr: 4.6340e-05 gnorm: 1.18 [ 4:44:09<19:53:56] +[titan] 2025-10-05 03:18:29,609 - root - INFO - step: 7695 loss: 2.5730 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:18:29,609 - root - INFO - lr: 4.6335e-05 gnorm: 1.36 [ 4:44:20<19:53:44] +[titan] 2025-10-05 03:18:38,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:18:40,472 - root - INFO - step: 7700 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2643 +[titan] 2025-10-05 03:18:40,473 - root - INFO - lr: 4.6330e-05 gnorm: 1.19 [ 4:44:31<19:53:32] +[titan] 2025-10-05 03:18:51,364 - root - INFO - step: 7705 loss: 2.5443 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 03:18:51,364 - root - INFO - lr: 4.6325e-05 gnorm: 1.19 [ 4:44:42<19:53:20] +[titan] 2025-10-05 03:19:02,224 - root - INFO - step: 7710 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2637 +[titan] 2025-10-05 03:19:02,225 - root - INFO - lr: 4.6321e-05 gnorm: 1.20 [ 4:44:53<19:53:08] +[titan] 2025-10-05 03:19:13,098 - root - INFO - step: 7715 loss: 2.5489 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2486 +[titan] 2025-10-05 03:19:13,098 - root - INFO - lr: 4.6316e-05 gnorm: 1.20 [ 4:45:04<19:52:56] +[titan] 2025-10-05 03:19:23,973 - root - INFO - step: 7720 loss: 2.4402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1529 +[titan] 2025-10-05 03:19:23,974 - root - INFO - lr: 4.6311e-05 gnorm: 1.21 [ 4:45:15<19:52:44] +[titan] 2025-10-05 03:19:34,816 - root - INFO - step: 7725 loss: 2.5551 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:19:34,816 - root - INFO - lr: 4.6306e-05 gnorm: 1.19 [ 4:45:25<19:52:32] +[titan] 2025-10-05 03:19:45,679 - root - INFO - step: 7730 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2814 +[titan] 2025-10-05 03:19:45,679 - root - INFO - lr: 4.6301e-05 gnorm: 1.17 [ 4:45:36<19:52:20] +[titan] 2025-10-05 03:19:56,502 - root - INFO - step: 7735 loss: 2.5206 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2970 global_avg_mtp_loss: 2.2236 +[titan] 2025-10-05 03:19:56,502 - root - INFO - lr: 4.6296e-05 gnorm: 1.24 [ 4:45:47<19:52:08] +[titan] 2025-10-05 03:20:07,337 - root - INFO - step: 7740 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2545 +[titan] 2025-10-05 03:20:07,337 - root - INFO - lr: 4.6291e-05 gnorm: 1.19 [ 4:45:58<19:51:55] +[titan] 2025-10-05 03:20:18,166 - root - INFO - step: 7745 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.79 mfu: 42.45% global_avg_ntp_loss: 0.2938 global_avg_mtp_loss: 2.1964 +[titan] 2025-10-05 03:20:18,166 - root - INFO - lr: 4.6286e-05 gnorm: 1.20 [ 4:46:09<19:51:43] +[titan] 2025-10-05 03:20:26,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:20:29,014 - root - INFO - step: 7750 loss: 2.4800 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.1876 +[titan] 2025-10-05 03:20:29,015 - root - INFO - lr: 4.6281e-05 gnorm: 1.17 [ 4:46:20<19:51:31] +[titan] 2025-10-05 03:20:39,856 - root - INFO - step: 7755 loss: 2.4850 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1929 +[titan] 2025-10-05 03:20:39,857 - root - INFO - lr: 4.6276e-05 gnorm: 1.24 [ 4:46:31<19:51:19] +[titan] 2025-10-05 03:20:50,697 - root - INFO - step: 7760 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2144 +[titan] 2025-10-05 03:20:50,697 - root - INFO - lr: 4.6271e-05 gnorm: 1.13 [ 4:46:41<19:51:07] +[titan] 2025-10-05 03:21:01,572 - root - INFO - step: 7765 loss: 2.5168 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2200 +[titan] 2025-10-05 03:21:01,573 - root - INFO - lr: 4.6267e-05 gnorm: 1.17 [ 4:46:52<19:50:55] +[titan] 2025-10-05 03:21:12,426 - root - INFO - step: 7770 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:21:12,426 - root - INFO - lr: 4.6262e-05 gnorm: 1.24 [ 4:47:03<19:50:43] +[titan] 2025-10-05 03:21:23,262 - root - INFO - step: 7775 loss: 2.5468 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2462 +[titan] 2025-10-05 03:21:23,262 - root - INFO - lr: 4.6257e-05 gnorm: 1.22 [ 4:47:14<19:50:31] +[titan] 2025-10-05 03:21:34,121 - root - INFO - step: 7780 loss: 2.5186 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2965 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:21:34,121 - root - INFO - lr: 4.6252e-05 gnorm: 1.22 [ 4:47:25<19:50:19] +[titan] 2025-10-05 03:21:44,959 - root - INFO - step: 7785 loss: 2.5555 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2532 +[titan] 2025-10-05 03:21:44,960 - root - INFO - lr: 4.6247e-05 gnorm: 1.19 [ 4:47:36<19:50:07] +[titan] 2025-10-05 03:21:55,841 - root - INFO - step: 7790 loss: 2.5595 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2574 +[titan] 2025-10-05 03:21:55,841 - root - INFO - lr: 4.6242e-05 gnorm: 1.21 [ 4:47:47<19:49:55] +[titan] 2025-10-05 03:22:06,686 - root - INFO - step: 7795 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2407 +[titan] 2025-10-05 03:22:06,686 - root - INFO - lr: 4.6237e-05 gnorm: 1.20 [ 4:47:57<19:49:43] +[titan] 2025-10-05 03:22:15,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:22:17,501 - root - INFO - step: 7800 loss: 2.4671 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1775 +[titan] 2025-10-05 03:22:17,501 - root - INFO - lr: 4.6232e-05 gnorm: 1.31 [ 4:48:08<19:49:31] +[titan] 2025-10-05 03:22:28,367 - root - INFO - step: 7805 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2366 +[titan] 2025-10-05 03:22:28,367 - root - INFO - lr: 4.6227e-05 gnorm: 1.21 [ 4:48:19<19:49:19] +[titan] 2025-10-05 03:22:39,182 - root - INFO - step: 7810 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:22:39,182 - root - INFO - lr: 4.6222e-05 gnorm: 1.23 [ 4:48:30<19:49:07] +[titan] 2025-10-05 03:22:50,001 - root - INFO - step: 7815 loss: 2.5037 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2093 +[titan] 2025-10-05 03:22:50,001 - root - INFO - lr: 4.6217e-05 gnorm: 1.17 [ 4:48:41<19:48:54] +[titan] 2025-10-05 03:23:00,861 - root - INFO - step: 7820 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 03:23:00,861 - root - INFO - lr: 4.6212e-05 gnorm: 1.15 [ 4:48:52<19:48:42] +[titan] 2025-10-05 03:23:11,665 - root - INFO - step: 7825 loss: 2.5549 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:23:11,665 - root - INFO - lr: 4.6207e-05 gnorm: 1.18 [ 4:49:02<19:48:30] +[titan] 2025-10-05 03:23:22,463 - root - INFO - step: 7830 loss: 2.5877 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2813 +[titan] 2025-10-05 03:23:22,464 - root - INFO - lr: 4.6202e-05 gnorm: 1.22 [ 4:49:13<19:48:18] +[titan] 2025-10-05 03:23:33,276 - root - INFO - step: 7835 loss: 2.5278 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:23:33,276 - root - INFO - lr: 4.6197e-05 gnorm: 1.28 [ 4:49:24<19:48:06] +[titan] 2025-10-05 03:23:44,101 - root - INFO - step: 7840 loss: 2.5759 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 03:23:44,101 - root - INFO - lr: 4.6192e-05 gnorm: 1.19 [ 4:49:35<19:47:54] +[titan] 2025-10-05 03:23:54,974 - root - INFO - step: 7845 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 03:23:54,975 - root - INFO - lr: 4.6187e-05 gnorm: 1.19 [ 4:49:46<19:47:42] +[titan] 2025-10-05 03:24:03,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:24:05,782 - root - INFO - step: 7850 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2873 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 03:24:05,783 - root - INFO - lr: 4.6182e-05 gnorm: 1.17 [ 4:49:56<19:47:29] +[titan] 2025-10-05 03:24:16,593 - root - INFO - step: 7855 loss: 2.4523 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1636 +[titan] 2025-10-05 03:24:16,593 - root - INFO - lr: 4.6177e-05 gnorm: 1.14 [ 4:50:07<19:47:17] +[titan] 2025-10-05 03:24:27,424 - root - INFO - step: 7860 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2509 +[titan] 2025-10-05 03:24:27,424 - root - INFO - lr: 4.6172e-05 gnorm: 1.24 [ 4:50:18<19:47:05] +[titan] 2025-10-05 03:24:38,249 - root - INFO - step: 7865 loss: 2.5375 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2394 +[titan] 2025-10-05 03:24:38,249 - root - INFO - lr: 4.6167e-05 gnorm: 1.22 [ 4:50:29<19:46:53] +[titan] 2025-10-05 03:24:49,117 - root - INFO - step: 7870 loss: 2.4208 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1365 +[titan] 2025-10-05 03:24:49,117 - root - INFO - lr: 4.6163e-05 gnorm: 1.17 [ 4:50:40<19:46:41] +[titan] 2025-10-05 03:25:00,043 - root - INFO - step: 7875 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:25:00,043 - root - INFO - lr: 4.6158e-05 gnorm: 1.19 [ 4:50:51<19:46:29] +[titan] 2025-10-05 03:25:10,889 - root - INFO - step: 7880 loss: 2.5464 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2467 +[titan] 2025-10-05 03:25:10,889 - root - INFO - lr: 4.6153e-05 gnorm: 1.19 [ 4:51:02<19:46:17] +[titan] 2025-10-05 03:25:21,745 - root - INFO - step: 7885 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:25:21,745 - root - INFO - lr: 4.6148e-05 gnorm: 1.18 [ 4:51:12<19:46:05] +[titan] 2025-10-05 03:25:32,610 - root - INFO - step: 7890 loss: 2.5321 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2346 +[titan] 2025-10-05 03:25:32,610 - root - INFO - lr: 4.6143e-05 gnorm: 1.20 [ 4:51:23<19:45:53] +[titan] 2025-10-05 03:25:43,443 - root - INFO - step: 7895 loss: 2.5115 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:25:43,443 - root - INFO - lr: 4.6138e-05 gnorm: 1.14 [ 4:51:34<19:45:41] +[titan] 2025-10-05 03:25:52,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:25:54,248 - root - INFO - step: 7900 loss: 2.5320 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.75 mfu: 42.54% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2324 +[titan] 2025-10-05 03:25:54,248 - root - INFO - lr: 4.6133e-05 gnorm: 1.18 [ 4:51:45<19:45:29] +[titan] 2025-10-05 03:26:05,135 - root - INFO - step: 7905 loss: 2.5694 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2673 +[titan] 2025-10-05 03:26:05,135 - root - INFO - lr: 4.6128e-05 gnorm: 1.17 [ 4:51:56<19:45:17] +[titan] 2025-10-05 03:26:15,976 - root - INFO - step: 7910 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2989 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:26:15,976 - root - INFO - lr: 4.6123e-05 gnorm: 1.24 [ 4:52:07<19:45:05] +[titan] 2025-10-05 03:26:26,803 - root - INFO - step: 7915 loss: 2.5234 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2262 +[titan] 2025-10-05 03:26:26,803 - root - INFO - lr: 4.6118e-05 gnorm: 1.20 [ 4:52:17<19:44:53] +[titan] 2025-10-05 03:26:37,605 - root - INFO - step: 7920 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2145 +[titan] 2025-10-05 03:26:37,605 - root - INFO - lr: 4.6113e-05 gnorm: 1.21 [ 4:52:28<19:44:41] +[titan] 2025-10-05 03:26:48,452 - root - INFO - step: 7925 loss: 2.4185 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 03:26:48,452 - root - INFO - lr: 4.6107e-05 gnorm: 1.15 [ 4:52:39<19:44:29] +[titan] 2025-10-05 03:26:59,330 - root - INFO - step: 7930 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 03:26:59,330 - root - INFO - lr: 4.6102e-05 gnorm: 1.26 [ 4:52:50<19:44:17] +[titan] 2025-10-05 03:27:10,155 - root - INFO - step: 7935 loss: 2.4620 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2890 global_avg_mtp_loss: 2.1731 +[titan] 2025-10-05 03:27:10,155 - root - INFO - lr: 4.6097e-05 gnorm: 1.18 [ 4:53:01<19:44:05] +[titan] 2025-10-05 03:27:20,964 - root - INFO - step: 7940 loss: 2.4808 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.61 mfu: 42.53% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:27:20,964 - root - INFO - lr: 4.6092e-05 gnorm: 1.15 [ 4:53:12<19:43:53] +[titan] 2025-10-05 03:27:31,803 - root - INFO - step: 7945 loss: 2.5084 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2142 +[titan] 2025-10-05 03:27:31,803 - root - INFO - lr: 4.6087e-05 gnorm: 1.16 [ 4:53:22<19:43:41] +[titan] 2025-10-05 03:27:40,415 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:27:42,583 - root - INFO - step: 7950 loss: 2.5326 memory: 118.84GiB(85.28%) tps: 30,397 tflops: 421.71 mfu: 42.64% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2359 +[titan] 2025-10-05 03:27:42,583 - root - INFO - lr: 4.6082e-05 gnorm: 1.21 [ 4:53:33<19:43:28] +[titan] 2025-10-05 03:27:53,381 - root - INFO - step: 7955 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2591 +[titan] 2025-10-05 03:27:53,382 - root - INFO - lr: 4.6077e-05 gnorm: 1.18 [ 4:53:44<19:43:16] +[titan] 2025-10-05 03:28:04,227 - root - INFO - step: 7960 loss: 2.4969 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2940 global_avg_mtp_loss: 2.2030 +[titan] 2025-10-05 03:28:04,227 - root - INFO - lr: 4.6072e-05 gnorm: 1.15 [ 4:53:55<19:43:04] +[titan] 2025-10-05 03:28:15,055 - root - INFO - step: 7965 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1237 +[titan] 2025-10-05 03:28:15,055 - root - INFO - lr: 4.6067e-05 gnorm: 1.13 [ 4:54:06<19:42:52] +[titan] 2025-10-05 03:28:25,883 - root - INFO - step: 7970 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.2034 +[titan] 2025-10-05 03:28:25,883 - root - INFO - lr: 4.6062e-05 gnorm: 1.17 [ 4:54:17<19:42:40] +[titan] 2025-10-05 03:28:36,715 - root - INFO - step: 7975 loss: 2.5491 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2491 +[titan] 2025-10-05 03:28:36,715 - root - INFO - lr: 4.6057e-05 gnorm: 1.19 [ 4:54:27<19:42:28] +[titan] 2025-10-05 03:28:47,543 - root - INFO - step: 7980 loss: 2.4817 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:28:47,543 - root - INFO - lr: 4.6052e-05 gnorm: 1.16 [ 4:54:38<19:42:16] +[titan] 2025-10-05 03:28:58,364 - root - INFO - step: 7985 loss: 2.5422 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2429 +[titan] 2025-10-05 03:28:58,364 - root - INFO - lr: 4.6047e-05 gnorm: 1.18 [ 4:54:49<19:42:04] +[titan] 2025-10-05 03:29:09,176 - root - INFO - step: 7990 loss: 2.5558 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2558 +[titan] 2025-10-05 03:29:09,176 - root - INFO - lr: 4.6042e-05 gnorm: 1.18 [ 4:55:00<19:41:52] +[titan] 2025-10-05 03:29:19,983 - root - INFO - step: 7995 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.68 mfu: 42.54% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:29:19,983 - root - INFO - lr: 4.6037e-05 gnorm: 1.16 [ 4:55:11<19:41:39] +[titan] 2025-10-05 03:29:28,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:29:30,811 - root - INFO - step: 8000 loss: 2.5669 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.3034 global_avg_mtp_loss: 2.2635 +[titan] 2025-10-05 03:29:30,811 - root - INFO - lr: 4.6032e-05 gnorm: 1.20 [ 4:55:21<19:41:27] +[titan] 2025-10-05 03:29:41,667 - root - INFO - step: 8005 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2699 +[titan] 2025-10-05 03:29:41,667 - root - INFO - lr: 4.6027e-05 gnorm: 1.25 [ 4:55:32<19:41:15] +[titan] 2025-10-05 03:29:52,487 - root - INFO - step: 8010 loss: 2.5006 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2058 +[titan] 2025-10-05 03:29:52,487 - root - INFO - lr: 4.6022e-05 gnorm: 1.26 [ 4:55:43<19:41:03] +[titan] 2025-10-05 03:30:03,339 - root - INFO - step: 8015 loss: 2.4914 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:30:03,339 - root - INFO - lr: 4.6017e-05 gnorm: 1.18 [ 4:55:54<19:40:51] +[titan] 2025-10-05 03:30:14,162 - root - INFO - step: 8020 loss: 2.4809 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:30:14,162 - root - INFO - lr: 4.6012e-05 gnorm: 1.20 [ 4:56:05<19:40:39] +[titan] 2025-10-05 03:30:25,003 - root - INFO - step: 8025 loss: 2.4991 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2042 +[titan] 2025-10-05 03:30:25,003 - root - INFO - lr: 4.6007e-05 gnorm: 1.17 [ 4:56:16<19:40:27] +[titan] 2025-10-05 03:30:35,840 - root - INFO - step: 8030 loss: 2.4390 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1512 +[titan] 2025-10-05 03:30:35,841 - root - INFO - lr: 4.6001e-05 gnorm: 1.18 [ 4:56:26<19:40:15] +[titan] 2025-10-05 03:30:46,678 - root - INFO - step: 8035 loss: 2.5127 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:30:46,678 - root - INFO - lr: 4.5996e-05 gnorm: 1.21 [ 4:56:37<19:40:03] +[titan] 2025-10-05 03:30:57,494 - root - INFO - step: 8040 loss: 2.4745 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1848 +[titan] 2025-10-05 03:30:57,495 - root - INFO - lr: 4.5991e-05 gnorm: 1.17 [ 4:56:48<19:39:51] +[titan] 2025-10-05 03:31:08,359 - root - INFO - step: 8045 loss: 2.5034 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2082 +[titan] 2025-10-05 03:31:08,360 - root - INFO - lr: 4.5986e-05 gnorm: 1.19 [ 4:56:59<19:39:39] +[titan] 2025-10-05 03:31:17,028 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:31:19,196 - root - INFO - step: 8050 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1741 +[titan] 2025-10-05 03:31:19,196 - root - INFO - lr: 4.5981e-05 gnorm: 1.19 [ 4:57:10<19:39:27] +[titan] 2025-10-05 03:31:30,047 - root - INFO - step: 8055 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2206 +[titan] 2025-10-05 03:31:30,047 - root - INFO - lr: 4.5976e-05 gnorm: 1.16 [ 4:57:21<19:39:15] +[titan] 2025-10-05 03:31:40,901 - root - INFO - step: 8060 loss: 2.4474 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 03:31:40,901 - root - INFO - lr: 4.5971e-05 gnorm: 1.14 [ 4:57:32<19:39:03] +[titan] 2025-10-05 03:31:51,725 - root - INFO - step: 8065 loss: 2.5411 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2406 +[titan] 2025-10-05 03:31:51,725 - root - INFO - lr: 4.5966e-05 gnorm: 1.17 [ 4:57:42<19:38:51] +[titan] 2025-10-05 03:32:02,621 - root - INFO - step: 8070 loss: 2.4864 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1942 +[titan] 2025-10-05 03:32:02,621 - root - INFO - lr: 4.5961e-05 gnorm: 1.20 [ 4:57:53<19:38:39] +[titan] 2025-10-05 03:32:13,441 - root - INFO - step: 8075 loss: 2.5540 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 03:32:13,441 - root - INFO - lr: 4.5956e-05 gnorm: 1.17 [ 4:58:04<19:38:27] +[titan] 2025-10-05 03:32:24,287 - root - INFO - step: 8080 loss: 2.4398 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1536 +[titan] 2025-10-05 03:32:24,288 - root - INFO - lr: 4.5951e-05 gnorm: 1.14 [ 4:58:15<19:38:15] +[titan] 2025-10-05 03:32:35,118 - root - INFO - step: 8085 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2356 +[titan] 2025-10-05 03:32:35,118 - root - INFO - lr: 4.5945e-05 gnorm: 1.21 [ 4:58:26<19:38:03] +[titan] 2025-10-05 03:32:45,958 - root - INFO - step: 8090 loss: 2.5225 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2257 +[titan] 2025-10-05 03:32:45,959 - root - INFO - lr: 4.5940e-05 gnorm: 1.12 [ 4:58:37<19:37:51] +[titan] 2025-10-05 03:32:56,823 - root - INFO - step: 8095 loss: 2.5506 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2506 +[titan] 2025-10-05 03:32:56,824 - root - INFO - lr: 4.5935e-05 gnorm: 1.21 [ 4:58:47<19:37:39] +[titan] 2025-10-05 03:33:05,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:33:07,719 - root - INFO - step: 8100 loss: 2.5049 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 03:33:07,719 - root - INFO - lr: 4.5930e-05 gnorm: 1.20 [ 4:58:58<19:37:28] +[titan] 2025-10-05 03:33:18,615 - root - INFO - step: 8105 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:33:18,615 - root - INFO - lr: 4.5925e-05 gnorm: 1.12 [ 4:59:09<19:37:16] +[titan] 2025-10-05 03:33:29,481 - root - INFO - step: 8110 loss: 2.4795 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 03:33:29,481 - root - INFO - lr: 4.5920e-05 gnorm: 1.16 [ 4:59:20<19:37:04] +[titan] 2025-10-05 03:33:40,332 - root - INFO - step: 8115 loss: 2.4748 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1833 +[titan] 2025-10-05 03:33:40,332 - root - INFO - lr: 4.5915e-05 gnorm: 1.18 [ 4:59:31<19:36:52] +[titan] 2025-10-05 03:33:51,164 - root - INFO - step: 8120 loss: 2.5292 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2311 +[titan] 2025-10-05 03:33:51,164 - root - INFO - lr: 4.5910e-05 gnorm: 1.19 [ 4:59:42<19:36:40] +[titan] 2025-10-05 03:34:02,020 - root - INFO - step: 8125 loss: 2.4881 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:34:02,020 - root - INFO - lr: 4.5904e-05 gnorm: 1.21 [ 4:59:53<19:36:28] +[titan] 2025-10-05 03:34:12,891 - root - INFO - step: 8130 loss: 2.5727 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:34:12,891 - root - INFO - lr: 4.5899e-05 gnorm: 1.22 [ 5:00:04<19:36:16] +[titan] 2025-10-05 03:34:23,761 - root - INFO - step: 8135 loss: 2.4550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1663 +[titan] 2025-10-05 03:34:23,761 - root - INFO - lr: 4.5894e-05 gnorm: 1.21 [ 5:00:14<19:36:04] +[titan] 2025-10-05 03:34:34,624 - root - INFO - step: 8140 loss: 2.4669 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:34:34,624 - root - INFO - lr: 4.5889e-05 gnorm: 1.16 [ 5:00:25<19:35:52] +[titan] 2025-10-05 03:34:45,506 - root - INFO - step: 8145 loss: 2.5656 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:34:45,506 - root - INFO - lr: 4.5884e-05 gnorm: 1.18 [ 5:00:36<19:35:41] +[titan] 2025-10-05 03:34:54,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:34:56,340 - root - INFO - step: 8150 loss: 2.4846 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1926 +[titan] 2025-10-05 03:34:56,340 - root - INFO - lr: 4.5879e-05 gnorm: 1.16 [ 5:00:47<19:35:29] +[titan] 2025-10-05 03:35:07,237 - root - INFO - step: 8155 loss: 2.5131 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2164 +[titan] 2025-10-05 03:35:07,237 - root - INFO - lr: 4.5874e-05 gnorm: 1.17 [ 5:00:58<19:35:17] +[titan] 2025-10-05 03:35:18,098 - root - INFO - step: 8160 loss: 2.6082 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3005 +[titan] 2025-10-05 03:35:18,098 - root - INFO - lr: 4.5868e-05 gnorm: 1.18 [ 5:01:09<19:35:05] +[titan] 2025-10-05 03:35:28,978 - root - INFO - step: 8165 loss: 2.5372 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2397 +[titan] 2025-10-05 03:35:28,978 - root - INFO - lr: 4.5863e-05 gnorm: 1.17 [ 5:01:20<19:34:53] +[titan] 2025-10-05 03:35:39,844 - root - INFO - step: 8170 loss: 2.4152 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 03:35:39,844 - root - INFO - lr: 4.5858e-05 gnorm: 1.18 [ 5:01:30<19:34:41] +[titan] 2025-10-05 03:35:50,781 - root - INFO - step: 8175 loss: 2.5578 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3013 global_avg_mtp_loss: 2.2565 +[titan] 2025-10-05 03:35:50,781 - root - INFO - lr: 4.5853e-05 gnorm: 1.27 [ 5:01:41<19:34:30] +[titan] 2025-10-05 03:36:01,663 - root - INFO - step: 8180 loss: 2.4462 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1586 +[titan] 2025-10-05 03:36:01,663 - root - INFO - lr: 4.5848e-05 gnorm: 1.13 [ 5:01:52<19:34:18] +[titan] 2025-10-05 03:36:12,582 - root - INFO - step: 8185 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:36:12,582 - root - INFO - lr: 4.5843e-05 gnorm: 1.20 [ 5:02:03<19:34:06] +[titan] 2025-10-05 03:36:23,548 - root - INFO - step: 8190 loss: 2.4035 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1217 +[titan] 2025-10-05 03:36:23,549 - root - INFO - lr: 4.5837e-05 gnorm: 1.16 [ 5:02:14<19:33:55] +[titan] 2025-10-05 03:36:28,081 - root - INFO - Dumping profiler traces at step 8192 +[titan] 2025-10-05 03:36:28,119 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:36:34,646 - root - INFO - step: 8195 loss: 2.4867 memory: 118.84GiB(85.28%) tps: 29,528 tflops: 409.66 mfu: 41.42% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 03:36:34,646 - root - INFO - lr: 4.5832e-05 gnorm: 1.16 [ 5:02:25<19:33:44] +[titan] 2025-10-05 03:36:43,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:36:45,533 - root - INFO - step: 8200 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2809 +[titan] 2025-10-05 03:36:45,533 - root - INFO - lr: 4.5827e-05 gnorm: 1.15 [ 5:02:36<19:33:32] +[titan] 2025-10-05 03:36:56,421 - root - INFO - step: 8205 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1517 +[titan] 2025-10-05 03:36:56,421 - root - INFO - lr: 4.5822e-05 gnorm: 1.15 [ 5:02:47<19:33:20] +[titan] 2025-10-05 03:37:07,262 - root - INFO - step: 8210 loss: 2.4422 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2866 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:37:07,262 - root - INFO - lr: 4.5817e-05 gnorm: 1.16 [ 5:02:58<19:33:08] +[titan] 2025-10-05 03:37:18,124 - root - INFO - step: 8215 loss: 2.5901 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3062 global_avg_mtp_loss: 2.2840 +[titan] 2025-10-05 03:37:18,124 - root - INFO - lr: 4.5812e-05 gnorm: 1.23 [ 5:03:09<19:32:56] +[titan] 2025-10-05 03:37:29,001 - root - INFO - step: 8220 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2582 +[titan] 2025-10-05 03:37:29,001 - root - INFO - lr: 4.5806e-05 gnorm: 1.20 [ 5:03:20<19:32:44] +[titan] 2025-10-05 03:37:39,844 - root - INFO - step: 8225 loss: 2.4659 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 03:37:39,845 - root - INFO - lr: 4.5801e-05 gnorm: 1.23 [ 5:03:30<19:32:33] +[titan] 2025-10-05 03:37:50,743 - root - INFO - step: 8230 loss: 2.5410 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 03:37:50,743 - root - INFO - lr: 4.5796e-05 gnorm: 1.19 [ 5:03:41<19:32:21] +[titan] 2025-10-05 03:38:01,585 - root - INFO - step: 8235 loss: 2.5291 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2326 +[titan] 2025-10-05 03:38:01,585 - root - INFO - lr: 4.5791e-05 gnorm: 1.15 [ 5:03:52<19:32:09] +[titan] 2025-10-05 03:38:12,474 - root - INFO - step: 8240 loss: 2.5137 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:38:12,474 - root - INFO - lr: 4.5786e-05 gnorm: 1.17 [ 5:04:03<19:31:57] +[titan] 2025-10-05 03:38:23,335 - root - INFO - step: 8245 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:38:23,335 - root - INFO - lr: 4.5780e-05 gnorm: 1.17 [ 5:04:14<19:31:45] +[titan] 2025-10-05 03:38:32,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:38:34,223 - root - INFO - step: 8250 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2131 +[titan] 2025-10-05 03:38:34,223 - root - INFO - lr: 4.5775e-05 gnorm: 1.18 [ 5:04:25<19:31:33] +[titan] 2025-10-05 03:38:45,088 - root - INFO - step: 8255 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2198 +[titan] 2025-10-05 03:38:45,088 - root - INFO - lr: 4.5770e-05 gnorm: 1.20 [ 5:04:36<19:31:22] +[titan] 2025-10-05 03:38:55,962 - root - INFO - step: 8260 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1524 +[titan] 2025-10-05 03:38:55,962 - root - INFO - lr: 4.5765e-05 gnorm: 1.19 [ 5:04:47<19:31:10] +[titan] 2025-10-05 03:39:06,818 - root - INFO - step: 8265 loss: 2.6017 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2949 +[titan] 2025-10-05 03:39:06,818 - root - INFO - lr: 4.5760e-05 gnorm: 1.23 [ 5:04:57<19:30:58] +[titan] 2025-10-05 03:39:17,707 - root - INFO - step: 8270 loss: 2.4450 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1572 +[titan] 2025-10-05 03:39:17,707 - root - INFO - lr: 4.5754e-05 gnorm: 1.18 [ 5:05:08<19:30:46] +[titan] 2025-10-05 03:39:28,574 - root - INFO - step: 8275 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1559 +[titan] 2025-10-05 03:39:28,575 - root - INFO - lr: 4.5749e-05 gnorm: 1.20 [ 5:05:19<19:30:34] +[titan] 2025-10-05 03:39:39,438 - root - INFO - step: 8280 loss: 2.4782 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2902 global_avg_mtp_loss: 2.1880 +[titan] 2025-10-05 03:39:39,438 - root - INFO - lr: 4.5744e-05 gnorm: 1.20 [ 5:05:30<19:30:22] +[titan] 2025-10-05 03:39:50,344 - root - INFO - step: 8285 loss: 2.4818 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:39:50,345 - root - INFO - lr: 4.5739e-05 gnorm: 1.16 [ 5:05:41<19:30:11] +[titan] 2025-10-05 03:40:01,252 - root - INFO - step: 8290 loss: 2.4954 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2009 +[titan] 2025-10-05 03:40:01,252 - root - INFO - lr: 4.5733e-05 gnorm: 1.16 [ 5:05:52<19:29:59] +[titan] 2025-10-05 03:40:12,143 - root - INFO - step: 8295 loss: 2.5302 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2317 +[titan] 2025-10-05 03:40:12,143 - root - INFO - lr: 4.5728e-05 gnorm: 1.18 [ 5:06:03<19:29:47] +[titan] 2025-10-05 03:40:20,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:40:23,034 - root - INFO - step: 8300 loss: 2.4874 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:40:23,034 - root - INFO - lr: 4.5723e-05 gnorm: 1.19 [ 5:06:14<19:29:35] +[titan] 2025-10-05 03:40:33,937 - root - INFO - step: 8305 loss: 2.5831 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2800 +[titan] 2025-10-05 03:40:33,938 - root - INFO - lr: 4.5718e-05 gnorm: 1.17 [ 5:06:25<19:29:24] +[titan] 2025-10-05 03:40:44,825 - root - INFO - step: 8310 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2514 +[titan] 2025-10-05 03:40:44,825 - root - INFO - lr: 4.5713e-05 gnorm: 1.17 [ 5:06:35<19:29:12] +[titan] 2025-10-05 03:40:55,729 - root - INFO - step: 8315 loss: 2.5111 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:40:55,729 - root - INFO - lr: 4.5707e-05 gnorm: 1.14 [ 5:06:46<19:29:00] +[titan] 2025-10-05 03:41:06,596 - root - INFO - step: 8320 loss: 2.5003 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2067 +[titan] 2025-10-05 03:41:06,596 - root - INFO - lr: 4.5702e-05 gnorm: 1.19 [ 5:06:57<19:28:48] +[titan] 2025-10-05 03:41:17,525 - root - INFO - step: 8325 loss: 2.4974 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 03:41:17,525 - root - INFO - lr: 4.5697e-05 gnorm: 1.26 [ 5:07:08<19:28:37] +[titan] 2025-10-05 03:41:28,416 - root - INFO - step: 8330 loss: 2.4791 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1878 +[titan] 2025-10-05 03:41:28,416 - root - INFO - lr: 4.5692e-05 gnorm: 1.19 [ 5:07:19<19:28:25] +[titan] 2025-10-05 03:41:39,305 - root - INFO - step: 8335 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:41:39,305 - root - INFO - lr: 4.5686e-05 gnorm: 1.25 [ 5:07:30<19:28:13] +[titan] 2025-10-05 03:41:50,197 - root - INFO - step: 8340 loss: 2.4762 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:41:50,197 - root - INFO - lr: 4.5681e-05 gnorm: 1.22 [ 5:07:41<19:28:02] +[titan] 2025-10-05 03:42:01,087 - root - INFO - step: 8345 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:42:01,087 - root - INFO - lr: 4.5676e-05 gnorm: 1.33 [ 5:07:52<19:27:50] +[titan] 2025-10-05 03:42:09,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:42:11,958 - root - INFO - step: 8350 loss: 2.5178 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2229 +[titan] 2025-10-05 03:42:11,958 - root - INFO - lr: 4.5671e-05 gnorm: 1.20 [ 5:08:03<19:27:38] +[titan] 2025-10-05 03:42:22,859 - root - INFO - step: 8355 loss: 2.5012 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2065 +[titan] 2025-10-05 03:42:22,859 - root - INFO - lr: 4.5665e-05 gnorm: 1.16 [ 5:08:13<19:27:26] +[titan] 2025-10-05 03:42:33,724 - root - INFO - step: 8360 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2087 +[titan] 2025-10-05 03:42:33,724 - root - INFO - lr: 4.5660e-05 gnorm: 1.21 [ 5:08:24<19:27:15] +[titan] 2025-10-05 03:42:44,605 - root - INFO - step: 8365 loss: 2.4169 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1341 +[titan] 2025-10-05 03:42:44,606 - root - INFO - lr: 4.5655e-05 gnorm: 1.27 [ 5:08:35<19:27:03] +[titan] 2025-10-05 03:42:55,502 - root - INFO - step: 8370 loss: 2.4654 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:42:55,503 - root - INFO - lr: 4.5649e-05 gnorm: 1.13 [ 5:08:46<19:26:51] +[titan] 2025-10-05 03:43:06,377 - root - INFO - step: 8375 loss: 2.4547 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1658 +[titan] 2025-10-05 03:43:06,377 - root - INFO - lr: 4.5644e-05 gnorm: 1.15 [ 5:08:57<19:26:39] +[titan] 2025-10-05 03:43:17,279 - root - INFO - step: 8380 loss: 2.5065 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2118 +[titan] 2025-10-05 03:43:17,279 - root - INFO - lr: 4.5639e-05 gnorm: 1.18 [ 5:09:08<19:26:28] +[titan] 2025-10-05 03:43:28,170 - root - INFO - step: 8385 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.1973 +[titan] 2025-10-05 03:43:28,171 - root - INFO - lr: 4.5634e-05 gnorm: 1.19 [ 5:09:19<19:26:16] +[titan] 2025-10-05 03:43:39,058 - root - INFO - step: 8390 loss: 2.3818 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 03:43:39,059 - root - INFO - lr: 4.5628e-05 gnorm: 1.18 [ 5:09:30<19:26:04] +[titan] 2025-10-05 03:43:49,941 - root - INFO - step: 8395 loss: 2.4979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2044 +[titan] 2025-10-05 03:43:49,941 - root - INFO - lr: 4.5623e-05 gnorm: 1.24 [ 5:09:41<19:25:52] +[titan] 2025-10-05 03:43:58,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:44:00,835 - root - INFO - step: 8400 loss: 2.4609 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1710 +[titan] 2025-10-05 03:44:00,835 - root - INFO - lr: 4.5618e-05 gnorm: 1.21 [ 5:09:51<19:25:41] +[titan] 2025-10-05 03:44:11,708 - root - INFO - step: 8405 loss: 2.4714 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1804 +[titan] 2025-10-05 03:44:11,708 - root - INFO - lr: 4.5612e-05 gnorm: 1.18 [ 5:10:02<19:25:29] +[titan] 2025-10-05 03:44:22,628 - root - INFO - step: 8410 loss: 2.4894 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1963 +[titan] 2025-10-05 03:44:22,628 - root - INFO - lr: 4.5607e-05 gnorm: 1.17 [ 5:10:13<19:25:17] +[titan] 2025-10-05 03:44:33,498 - root - INFO - step: 8415 loss: 2.4601 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1709 +[titan] 2025-10-05 03:44:33,498 - root - INFO - lr: 4.5602e-05 gnorm: 1.15 [ 5:10:24<19:25:05] +[titan] 2025-10-05 03:44:44,372 - root - INFO - step: 8420 loss: 2.4695 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1796 +[titan] 2025-10-05 03:44:44,372 - root - INFO - lr: 4.5597e-05 gnorm: 1.21 [ 5:10:35<19:24:54] +[titan] 2025-10-05 03:44:55,241 - root - INFO - step: 8425 loss: 2.6043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.2890 +[titan] 2025-10-05 03:44:55,241 - root - INFO - lr: 4.5591e-05 gnorm: 1.22 [ 5:10:46<19:24:42] +[titan] 2025-10-05 03:45:06,108 - root - INFO - step: 8430 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1610 +[titan] 2025-10-05 03:45:06,108 - root - INFO - lr: 4.5586e-05 gnorm: 1.22 [ 5:10:57<19:24:30] +[titan] 2025-10-05 03:45:17,033 - root - INFO - step: 8435 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1622 +[titan] 2025-10-05 03:45:17,033 - root - INFO - lr: 4.5581e-05 gnorm: 1.17 [ 5:11:08<19:24:18] +[titan] 2025-10-05 03:45:27,906 - root - INFO - step: 8440 loss: 2.4384 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1521 +[titan] 2025-10-05 03:45:27,906 - root - INFO - lr: 4.5575e-05 gnorm: 1.18 [ 5:11:19<19:24:07] +[titan] 2025-10-05 03:45:38,796 - root - INFO - step: 8445 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2251 +[titan] 2025-10-05 03:45:38,796 - root - INFO - lr: 4.5570e-05 gnorm: 1.18 [ 5:11:29<19:23:55] +[titan] 2025-10-05 03:45:47,503 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:45:49,701 - root - INFO - step: 8450 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1765 +[titan] 2025-10-05 03:45:49,701 - root - INFO - lr: 4.5565e-05 gnorm: 1.15 [ 5:11:40<19:23:43] +[titan] 2025-10-05 03:46:00,576 - root - INFO - step: 8455 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1407 +[titan] 2025-10-05 03:46:00,576 - root - INFO - lr: 4.5559e-05 gnorm: 1.16 [ 5:11:51<19:23:31] +[titan] 2025-10-05 03:46:11,464 - root - INFO - step: 8460 loss: 2.4581 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1695 +[titan] 2025-10-05 03:46:11,465 - root - INFO - lr: 4.5554e-05 gnorm: 1.18 [ 5:12:02<19:23:20] +[titan] 2025-10-05 03:46:22,405 - root - INFO - step: 8465 loss: 2.4681 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2912 global_avg_mtp_loss: 2.1769 +[titan] 2025-10-05 03:46:22,406 - root - INFO - lr: 4.5549e-05 gnorm: 1.26 [ 5:12:13<19:23:08] +[titan] 2025-10-05 03:46:33,303 - root - INFO - step: 8470 loss: 2.4812 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:46:33,303 - root - INFO - lr: 4.5543e-05 gnorm: 1.18 [ 5:12:24<19:22:57] +[titan] 2025-10-05 03:46:44,215 - root - INFO - step: 8475 loss: 2.4456 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:46:44,215 - root - INFO - lr: 4.5538e-05 gnorm: 1.19 [ 5:12:35<19:22:45] +[titan] 2025-10-05 03:46:55,102 - root - INFO - step: 8480 loss: 2.5134 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2167 +[titan] 2025-10-05 03:46:55,103 - root - INFO - lr: 4.5533e-05 gnorm: 1.22 [ 5:12:46<19:22:33] +[titan] 2025-10-05 03:47:05,998 - root - INFO - step: 8485 loss: 2.4337 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 03:47:05,998 - root - INFO - lr: 4.5527e-05 gnorm: 1.16 [ 5:12:57<19:22:21] +[titan] 2025-10-05 03:47:16,904 - root - INFO - step: 8490 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1235 +[titan] 2025-10-05 03:47:16,904 - root - INFO - lr: 4.5522e-05 gnorm: 1.17 [ 5:13:08<19:22:10] +[titan] 2025-10-05 03:47:27,782 - root - INFO - step: 8495 loss: 2.4698 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1791 +[titan] 2025-10-05 03:47:27,783 - root - INFO - lr: 4.5517e-05 gnorm: 1.17 [ 5:13:18<19:21:58] +[titan] 2025-10-05 03:47:36,458 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:47:38,638 - root - INFO - step: 8500 loss: 2.3537 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0773 +[titan] 2025-10-05 03:47:38,638 - root - INFO - lr: 4.5511e-05 gnorm: 1.20 [ 5:13:29<19:21:46] +[titan] 2025-10-05 03:47:49,538 - root - INFO - step: 8505 loss: 2.5368 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2395 +[titan] 2025-10-05 03:47:49,538 - root - INFO - lr: 4.5506e-05 gnorm: 1.16 [ 5:13:40<19:21:35] +[titan] 2025-10-05 03:48:00,412 - root - INFO - step: 8510 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.1961 +[titan] 2025-10-05 03:48:00,412 - root - INFO - lr: 4.5501e-05 gnorm: 1.19 [ 5:13:51<19:21:23] +[titan] 2025-10-05 03:48:11,277 - root - INFO - step: 8515 loss: 2.4264 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:48:11,277 - root - INFO - lr: 4.5495e-05 gnorm: 1.17 [ 5:14:02<19:21:11] +[titan] 2025-10-05 03:48:22,187 - root - INFO - step: 8520 loss: 2.4968 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2043 +[titan] 2025-10-05 03:48:22,188 - root - INFO - lr: 4.5490e-05 gnorm: 1.24 [ 5:14:13<19:20:59] +[titan] 2025-10-05 03:48:33,044 - root - INFO - step: 8525 loss: 2.5002 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2934 global_avg_mtp_loss: 2.2068 +[titan] 2025-10-05 03:48:33,044 - root - INFO - lr: 4.5485e-05 gnorm: 1.16 [ 5:14:24<19:20:47] +[titan] 2025-10-05 03:48:43,906 - root - INFO - step: 8530 loss: 2.5203 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2249 +[titan] 2025-10-05 03:48:43,906 - root - INFO - lr: 4.5479e-05 gnorm: 1.18 [ 5:14:35<19:20:36] +[titan] 2025-10-05 03:48:54,778 - root - INFO - step: 8535 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:48:54,778 - root - INFO - lr: 4.5474e-05 gnorm: 1.23 [ 5:14:45<19:20:24] +[titan] 2025-10-05 03:49:05,664 - root - INFO - step: 8540 loss: 2.5027 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2084 +[titan] 2025-10-05 03:49:05,664 - root - INFO - lr: 4.5468e-05 gnorm: 1.19 [ 5:14:56<19:20:12] +[titan] 2025-10-05 03:49:16,537 - root - INFO - step: 8545 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2823 global_avg_mtp_loss: 2.1266 +[titan] 2025-10-05 03:49:16,537 - root - INFO - lr: 4.5463e-05 gnorm: 1.19 [ 5:15:07<19:20:00] +[titan] 2025-10-05 03:49:25,283 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:49:27,468 - root - INFO - step: 8550 loss: 2.4984 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2049 +[titan] 2025-10-05 03:49:27,468 - root - INFO - lr: 4.5458e-05 gnorm: 1.21 [ 5:15:18<19:19:49] +[titan] 2025-10-05 03:49:38,338 - root - INFO - step: 8555 loss: 2.4539 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1653 +[titan] 2025-10-05 03:49:38,338 - root - INFO - lr: 4.5452e-05 gnorm: 1.20 [ 5:15:29<19:19:37] +[titan] 2025-10-05 03:49:49,202 - root - INFO - step: 8560 loss: 2.4721 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:49:49,202 - root - INFO - lr: 4.5447e-05 gnorm: 1.17 [ 5:15:40<19:19:25] +[titan] 2025-10-05 03:50:00,074 - root - INFO - step: 8565 loss: 2.5405 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 03:50:00,074 - root - INFO - lr: 4.5442e-05 gnorm: 1.15 [ 5:15:51<19:19:14] +[titan] 2025-10-05 03:50:10,978 - root - INFO - step: 8570 loss: 2.4470 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 03:50:10,979 - root - INFO - lr: 4.5436e-05 gnorm: 1.22 [ 5:16:02<19:19:02] +[titan] 2025-10-05 03:50:21,887 - root - INFO - step: 8575 loss: 2.4633 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1735 +[titan] 2025-10-05 03:50:21,887 - root - INFO - lr: 4.5431e-05 gnorm: 1.21 [ 5:16:12<19:18:50] +[titan] 2025-10-05 03:50:32,776 - root - INFO - step: 8580 loss: 2.4711 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1810 +[titan] 2025-10-05 03:50:32,776 - root - INFO - lr: 4.5425e-05 gnorm: 1.18 [ 5:16:23<19:18:39] +[titan] 2025-10-05 03:50:43,667 - root - INFO - step: 8585 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:50:43,667 - root - INFO - lr: 4.5420e-05 gnorm: 1.22 [ 5:16:34<19:18:27] +[titan] 2025-10-05 03:50:54,557 - root - INFO - step: 8590 loss: 2.5385 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2399 +[titan] 2025-10-05 03:50:54,558 - root - INFO - lr: 4.5415e-05 gnorm: 1.18 [ 5:16:45<19:18:15] +[titan] 2025-10-05 03:51:05,424 - root - INFO - step: 8595 loss: 2.4767 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 03:51:05,425 - root - INFO - lr: 4.5409e-05 gnorm: 1.16 [ 5:16:56<19:18:03] +[titan] 2025-10-05 03:51:14,103 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:51:16,290 - root - INFO - step: 8600 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:51:16,290 - root - INFO - lr: 4.5404e-05 gnorm: 1.14 [ 5:17:07<19:17:52] +[titan] 2025-10-05 03:51:27,250 - root - INFO - step: 8605 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2352 +[titan] 2025-10-05 03:51:27,251 - root - INFO - lr: 4.5398e-05 gnorm: 1.15 [ 5:17:18<19:17:40] +[titan] 2025-10-05 03:51:38,134 - root - INFO - step: 8610 loss: 2.4373 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1515 +[titan] 2025-10-05 03:51:38,134 - root - INFO - lr: 4.5393e-05 gnorm: 1.14 [ 5:17:29<19:17:28] +[titan] 2025-10-05 03:51:49,035 - root - INFO - step: 8615 loss: 2.5154 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2980 global_avg_mtp_loss: 2.2174 +[titan] 2025-10-05 03:51:49,036 - root - INFO - lr: 4.5388e-05 gnorm: 1.21 [ 5:17:40<19:17:17] +[titan] 2025-10-05 03:51:59,908 - root - INFO - step: 8620 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1329 +[titan] 2025-10-05 03:51:59,909 - root - INFO - lr: 4.5382e-05 gnorm: 1.19 [ 5:17:51<19:17:05] +[titan] 2025-10-05 03:52:10,800 - root - INFO - step: 8625 loss: 2.4772 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:52:10,800 - root - INFO - lr: 4.5377e-05 gnorm: 1.19 [ 5:18:01<19:16:53] +[titan] 2025-10-05 03:52:21,724 - root - INFO - step: 8630 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1254 +[titan] 2025-10-05 03:52:21,724 - root - INFO - lr: 4.5371e-05 gnorm: 1.17 [ 5:18:12<19:16:42] +[titan] 2025-10-05 03:52:32,629 - root - INFO - step: 8635 loss: 2.4666 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 03:52:32,629 - root - INFO - lr: 4.5366e-05 gnorm: 1.18 [ 5:18:23<19:16:30] +[titan] 2025-10-05 03:52:43,516 - root - INFO - step: 8640 loss: 2.5035 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:52:43,516 - root - INFO - lr: 4.5360e-05 gnorm: 1.16 [ 5:18:34<19:16:18] +[titan] 2025-10-05 03:52:54,413 - root - INFO - step: 8645 loss: 2.4079 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1260 +[titan] 2025-10-05 03:52:54,414 - root - INFO - lr: 4.5355e-05 gnorm: 1.18 [ 5:18:45<19:16:07] +[titan] 2025-10-05 03:53:03,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:53:05,278 - root - INFO - step: 8650 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:53:05,278 - root - INFO - lr: 4.5350e-05 gnorm: 1.17 [ 5:18:56<19:15:55] +[titan] 2025-10-05 03:53:16,166 - root - INFO - step: 8655 loss: 2.4949 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2017 +[titan] 2025-10-05 03:53:16,166 - root - INFO - lr: 4.5344e-05 gnorm: 1.17 [ 5:19:07<19:15:43] +[titan] 2025-10-05 03:53:27,098 - root - INFO - step: 8660 loss: 2.4590 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1706 +[titan] 2025-10-05 03:53:27,098 - root - INFO - lr: 4.5339e-05 gnorm: 1.20 [ 5:19:18<19:15:32] +[titan] 2025-10-05 03:53:38,012 - root - INFO - step: 8665 loss: 2.5151 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2199 +[titan] 2025-10-05 03:53:38,012 - root - INFO - lr: 4.5333e-05 gnorm: 1.19 [ 5:19:29<19:15:20] +[titan] 2025-10-05 03:53:48,872 - root - INFO - step: 8670 loss: 2.4344 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 03:53:48,872 - root - INFO - lr: 4.5328e-05 gnorm: 1.15 [ 5:19:39<19:15:08] +[titan] 2025-10-05 03:53:59,744 - root - INFO - step: 8675 loss: 2.4632 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1742 +[titan] 2025-10-05 03:53:59,744 - root - INFO - lr: 4.5322e-05 gnorm: 1.17 [ 5:19:50<19:14:57] +[titan] 2025-10-05 03:54:10,610 - root - INFO - step: 8680 loss: 2.4556 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 03:54:10,611 - root - INFO - lr: 4.5317e-05 gnorm: 1.17 [ 5:20:01<19:14:45] +[titan] 2025-10-05 03:54:21,508 - root - INFO - step: 8685 loss: 2.4742 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1837 +[titan] 2025-10-05 03:54:21,508 - root - INFO - lr: 4.5311e-05 gnorm: 1.20 [ 5:20:12<19:14:33] +[titan] 2025-10-05 03:54:32,411 - root - INFO - step: 8690 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2972 global_avg_mtp_loss: 2.2331 +[titan] 2025-10-05 03:54:32,411 - root - INFO - lr: 4.5306e-05 gnorm: 1.22 [ 5:20:23<19:14:22] +[titan] 2025-10-05 03:54:43,289 - root - INFO - step: 8695 loss: 2.4873 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1952 +[titan] 2025-10-05 03:54:43,290 - root - INFO - lr: 4.5301e-05 gnorm: 1.21 [ 5:20:34<19:14:10] +[titan] 2025-10-05 03:54:52,023 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:54:54,213 - root - INFO - step: 8700 loss: 2.4737 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1831 +[titan] 2025-10-05 03:54:54,213 - root - INFO - lr: 4.5295e-05 gnorm: 1.19 [ 5:20:45<19:13:58] +[titan] 2025-10-05 03:55:03,156 - root - INFO - Dumping profiler traces at step 8704 +[titan] 2025-10-05 03:55:03,191 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 03:55:05,378 - root - INFO - step: 8705 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 29,348 tflops: 407.16 mfu: 41.17% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:55:05,378 - root - INFO - lr: 4.5290e-05 gnorm: 1.17 [ 5:20:56<19:13:48] +[titan] 2025-10-05 03:55:16,259 - root - INFO - step: 8710 loss: 2.3993 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1181 +[titan] 2025-10-05 03:55:16,259 - root - INFO - lr: 4.5284e-05 gnorm: 1.16 [ 5:21:07<19:13:36] +[titan] 2025-10-05 03:55:27,179 - root - INFO - step: 8715 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1461 +[titan] 2025-10-05 03:55:27,179 - root - INFO - lr: 4.5279e-05 gnorm: 1.17 [ 5:21:18<19:13:24] +[titan] 2025-10-05 03:55:38,073 - root - INFO - step: 8720 loss: 2.3963 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 03:55:38,073 - root - INFO - lr: 4.5273e-05 gnorm: 1.24 [ 5:21:29<19:13:13] +[titan] 2025-10-05 03:55:48,962 - root - INFO - step: 8725 loss: 2.4482 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 03:55:48,962 - root - INFO - lr: 4.5268e-05 gnorm: 1.19 [ 5:21:40<19:13:01] +[titan] 2025-10-05 03:55:59,898 - root - INFO - step: 8730 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:55:59,898 - root - INFO - lr: 4.5262e-05 gnorm: 1.18 [ 5:21:50<19:12:50] +[titan] 2025-10-05 03:56:10,791 - root - INFO - step: 8735 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:56:10,791 - root - INFO - lr: 4.5257e-05 gnorm: 1.13 [ 5:22:01<19:12:38] +[titan] 2025-10-05 03:56:21,690 - root - INFO - step: 8740 loss: 2.5138 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:56:21,690 - root - INFO - lr: 4.5251e-05 gnorm: 1.17 [ 5:22:12<19:12:26] +[titan] 2025-10-05 03:56:32,598 - root - INFO - step: 8745 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:56:32,598 - root - INFO - lr: 4.5246e-05 gnorm: 1.21 [ 5:22:23<19:12:15] +[titan] 2025-10-05 03:56:41,299 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:56:43,483 - root - INFO - step: 8750 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 03:56:43,483 - root - INFO - lr: 4.5240e-05 gnorm: 1.23 [ 5:22:34<19:12:03] +[titan] 2025-10-05 03:56:54,343 - root - INFO - step: 8755 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1820 +[titan] 2025-10-05 03:56:54,343 - root - INFO - lr: 4.5235e-05 gnorm: 1.20 [ 5:22:45<19:11:51] +[titan] 2025-10-05 03:57:05,209 - root - INFO - step: 8760 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:57:05,209 - root - INFO - lr: 4.5229e-05 gnorm: 1.14 [ 5:22:56<19:11:39] +[titan] 2025-10-05 03:57:16,152 - root - INFO - step: 8765 loss: 2.5128 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:57:16,152 - root - INFO - lr: 4.5224e-05 gnorm: 1.17 [ 5:23:07<19:11:28] +[titan] 2025-10-05 03:57:27,083 - root - INFO - step: 8770 loss: 2.4066 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 03:57:27,083 - root - INFO - lr: 4.5218e-05 gnorm: 1.11 [ 5:23:18<19:11:16] +[titan] 2025-10-05 03:57:37,931 - root - INFO - step: 8775 loss: 2.4260 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 03:57:37,932 - root - INFO - lr: 4.5213e-05 gnorm: 1.17 [ 5:23:29<19:11:05] +[titan] 2025-10-05 03:57:48,805 - root - INFO - step: 8780 loss: 2.4759 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1847 +[titan] 2025-10-05 03:57:48,805 - root - INFO - lr: 4.5207e-05 gnorm: 1.24 [ 5:23:39<19:10:53] +[titan] 2025-10-05 03:57:59,678 - root - INFO - step: 8785 loss: 2.4875 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:57:59,678 - root - INFO - lr: 4.5202e-05 gnorm: 1.16 [ 5:23:50<19:10:41] +[titan] 2025-10-05 03:58:10,559 - root - INFO - step: 8790 loss: 2.4424 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:58:10,559 - root - INFO - lr: 4.5196e-05 gnorm: 1.16 [ 5:24:01<19:10:29] +[titan] 2025-10-05 03:58:21,459 - root - INFO - step: 8795 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1293 +[titan] 2025-10-05 03:58:21,459 - root - INFO - lr: 4.5191e-05 gnorm: 1.13 [ 5:24:12<19:10:18] +[titan] 2025-10-05 03:58:30,179 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:58:32,360 - root - INFO - step: 8800 loss: 2.3926 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1119 +[titan] 2025-10-05 03:58:32,360 - root - INFO - lr: 4.5185e-05 gnorm: 1.16 [ 5:24:23<19:10:06] +[titan] 2025-10-05 03:58:43,220 - root - INFO - step: 8805 loss: 2.5057 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2110 +[titan] 2025-10-05 03:58:43,221 - root - INFO - lr: 4.5180e-05 gnorm: 1.16 [ 5:24:34<19:09:54] +[titan] 2025-10-05 03:58:54,092 - root - INFO - step: 8810 loss: 2.4643 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:58:54,092 - root - INFO - lr: 4.5174e-05 gnorm: 1.21 [ 5:24:45<19:09:43] +[titan] 2025-10-05 03:59:04,956 - root - INFO - step: 8815 loss: 2.5184 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2227 +[titan] 2025-10-05 03:59:04,956 - root - INFO - lr: 4.5169e-05 gnorm: 1.20 [ 5:24:56<19:09:31] +[titan] 2025-10-05 03:59:15,807 - root - INFO - step: 8820 loss: 2.3921 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 03:59:15,807 - root - INFO - lr: 4.5163e-05 gnorm: 1.12 [ 5:25:06<19:09:19] +[titan] 2025-10-05 03:59:26,817 - root - INFO - step: 8825 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.90 mfu: 41.75% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1453 +[titan] 2025-10-05 03:59:26,817 - root - INFO - lr: 4.5158e-05 gnorm: 1.14 [ 5:25:17<19:09:08] +[titan] 2025-10-05 03:59:37,700 - root - INFO - step: 8830 loss: 2.4161 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 03:59:37,700 - root - INFO - lr: 4.5152e-05 gnorm: 1.17 [ 5:25:28<19:08:56] +[titan] 2025-10-05 03:59:48,610 - root - INFO - step: 8835 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:59:48,610 - root - INFO - lr: 4.5147e-05 gnorm: 1.20 [ 5:25:39<19:08:45] +[titan] 2025-10-05 03:59:59,499 - root - INFO - step: 8840 loss: 2.4555 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 03:59:59,499 - root - INFO - lr: 4.5141e-05 gnorm: 1.16 [ 5:25:50<19:08:33] +[titan] 2025-10-05 04:00:10,376 - root - INFO - step: 8845 loss: 2.5058 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2957 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 04:00:10,376 - root - INFO - lr: 4.5136e-05 gnorm: 1.15 [ 5:26:01<19:08:21] +[titan] 2025-10-05 04:00:19,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:00:21,274 - root - INFO - step: 8850 loss: 2.4134 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:00:21,274 - root - INFO - lr: 4.5130e-05 gnorm: 1.16 [ 5:26:12<19:08:10] +[titan] 2025-10-05 04:00:32,174 - root - INFO - step: 8855 loss: 2.3939 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1130 +[titan] 2025-10-05 04:00:32,174 - root - INFO - lr: 4.5124e-05 gnorm: 1.14 [ 5:26:23<19:07:58] +[titan] 2025-10-05 04:00:43,105 - root - INFO - step: 8860 loss: 2.4901 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.1965 +[titan] 2025-10-05 04:00:43,105 - root - INFO - lr: 4.5119e-05 gnorm: 1.13 [ 5:26:34<19:07:47] +[titan] 2025-10-05 04:00:53,982 - root - INFO - step: 8865 loss: 2.4318 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1455 +[titan] 2025-10-05 04:00:53,982 - root - INFO - lr: 4.5113e-05 gnorm: 1.20 [ 5:26:45<19:07:35] +[titan] 2025-10-05 04:01:04,884 - root - INFO - step: 8870 loss: 2.4552 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 04:01:04,884 - root - INFO - lr: 4.5108e-05 gnorm: 1.17 [ 5:26:55<19:07:23] +[titan] 2025-10-05 04:01:15,755 - root - INFO - step: 8875 loss: 2.4361 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1505 +[titan] 2025-10-05 04:01:15,755 - root - INFO - lr: 4.5102e-05 gnorm: 1.11 [ 5:27:06<19:07:12] +[titan] 2025-10-05 04:01:26,620 - root - INFO - step: 8880 loss: 2.4652 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 04:01:26,621 - root - INFO - lr: 4.5097e-05 gnorm: 1.18 [ 5:27:17<19:07:00] +[titan] 2025-10-05 04:01:37,500 - root - INFO - step: 8885 loss: 2.4777 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1862 +[titan] 2025-10-05 04:01:37,500 - root - INFO - lr: 4.5091e-05 gnorm: 1.16 [ 5:27:28<19:06:48] +[titan] 2025-10-05 04:01:48,415 - root - INFO - step: 8890 loss: 2.4058 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:01:48,416 - root - INFO - lr: 4.5086e-05 gnorm: 1.17 [ 5:27:39<19:06:37] +[titan] 2025-10-05 04:01:59,279 - root - INFO - step: 8895 loss: 2.4655 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1759 +[titan] 2025-10-05 04:01:59,280 - root - INFO - lr: 4.5080e-05 gnorm: 1.19 [ 5:27:50<19:06:25] +[titan] 2025-10-05 04:02:07,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:02:10,126 - root - INFO - step: 8900 loss: 2.4494 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:02:10,126 - root - INFO - lr: 4.5074e-05 gnorm: 1.24 [ 5:28:01<19:06:13] +[titan] 2025-10-05 04:02:20,976 - root - INFO - step: 8905 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 04:02:20,976 - root - INFO - lr: 4.5069e-05 gnorm: 1.18 [ 5:28:12<19:06:01] +[titan] 2025-10-05 04:02:31,857 - root - INFO - step: 8910 loss: 2.4530 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1646 +[titan] 2025-10-05 04:02:31,857 - root - INFO - lr: 4.5063e-05 gnorm: 1.18 [ 5:28:22<19:05:50] +[titan] 2025-10-05 04:02:42,714 - root - INFO - step: 8915 loss: 2.4292 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:02:42,714 - root - INFO - lr: 4.5058e-05 gnorm: 1.18 [ 5:28:33<19:05:38] +[titan] 2025-10-05 04:02:53,586 - root - INFO - step: 8920 loss: 2.4665 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 04:02:53,586 - root - INFO - lr: 4.5052e-05 gnorm: 1.14 [ 5:28:44<19:05:26] +[titan] 2025-10-05 04:03:04,511 - root - INFO - step: 8925 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1484 +[titan] 2025-10-05 04:03:04,511 - root - INFO - lr: 4.5047e-05 gnorm: 1.20 [ 5:28:55<19:05:15] +[titan] 2025-10-05 04:03:15,417 - root - INFO - step: 8930 loss: 2.5325 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 04:03:15,417 - root - INFO - lr: 4.5041e-05 gnorm: 1.18 [ 5:29:06<19:05:03] +[titan] 2025-10-05 04:03:26,302 - root - INFO - step: 8935 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:03:26,303 - root - INFO - lr: 4.5035e-05 gnorm: 1.21 [ 5:29:17<19:04:51] +[titan] 2025-10-05 04:03:37,172 - root - INFO - step: 8940 loss: 2.6656 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3417 +[titan] 2025-10-05 04:03:37,172 - root - INFO - lr: 4.5030e-05 gnorm: 1.16 [ 5:29:28<19:04:40] +[titan] 2025-10-05 04:03:48,057 - root - INFO - step: 8945 loss: 2.4401 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1543 +[titan] 2025-10-05 04:03:48,057 - root - INFO - lr: 4.5024e-05 gnorm: 1.12 [ 5:29:39<19:04:28] +[titan] 2025-10-05 04:03:56,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:03:58,919 - root - INFO - step: 8950 loss: 2.4061 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1253 +[titan] 2025-10-05 04:03:58,919 - root - INFO - lr: 4.5019e-05 gnorm: 1.11 [ 5:29:49<19:04:16] +[titan] 2025-10-05 04:04:09,819 - root - INFO - step: 8955 loss: 2.4957 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 04:04:09,820 - root - INFO - lr: 4.5013e-05 gnorm: 1.12 [ 5:30:00<19:04:05] +[titan] 2025-10-05 04:04:20,693 - root - INFO - step: 8960 loss: 2.4047 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1227 +[titan] 2025-10-05 04:04:20,693 - root - INFO - lr: 4.5007e-05 gnorm: 1.15 [ 5:30:11<19:03:53] +[titan] 2025-10-05 04:04:31,580 - root - INFO - step: 8965 loss: 2.4637 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1740 +[titan] 2025-10-05 04:04:31,580 - root - INFO - lr: 4.5002e-05 gnorm: 1.15 [ 5:30:22<19:03:41] +[titan] 2025-10-05 04:04:42,434 - root - INFO - step: 8970 loss: 2.4642 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 04:04:42,434 - root - INFO - lr: 4.4996e-05 gnorm: 1.19 [ 5:30:33<19:03:30] +[titan] 2025-10-05 04:04:53,298 - root - INFO - step: 8975 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 04:04:53,298 - root - INFO - lr: 4.4991e-05 gnorm: 1.20 [ 5:30:44<19:03:18] +[titan] 2025-10-05 04:05:04,159 - root - INFO - step: 8980 loss: 2.4094 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1265 +[titan] 2025-10-05 04:05:04,159 - root - INFO - lr: 4.4985e-05 gnorm: 1.14 [ 5:30:55<19:03:06] +[titan] 2025-10-05 04:05:15,056 - root - INFO - step: 8985 loss: 2.4593 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1707 +[titan] 2025-10-05 04:05:15,057 - root - INFO - lr: 4.4979e-05 gnorm: 1.20 [ 5:31:06<19:02:55] +[titan] 2025-10-05 04:05:25,930 - root - INFO - step: 8990 loss: 2.3911 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 04:05:25,930 - root - INFO - lr: 4.4974e-05 gnorm: 1.15 [ 5:31:16<19:02:43] +[titan] 2025-10-05 04:05:36,798 - root - INFO - step: 8995 loss: 2.4428 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1566 +[titan] 2025-10-05 04:05:36,798 - root - INFO - lr: 4.4968e-05 gnorm: 1.17 [ 5:31:27<19:02:31] +[titan] 2025-10-05 04:05:45,469 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:05:47,651 - root - INFO - step: 9000 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0801 +[titan] 2025-10-05 04:05:47,652 - root - INFO - lr: 4.4962e-05 gnorm: 1.23 [ 5:31:38<19:02:19] +[titan] 2025-10-05 04:05:58,519 - root - INFO - step: 9005 loss: 2.4431 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1560 +[titan] 2025-10-05 04:05:58,519 - root - INFO - lr: 4.4957e-05 gnorm: 1.17 [ 5:31:49<19:02:08] +[titan] 2025-10-05 04:06:09,392 - root - INFO - step: 9010 loss: 2.4584 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1697 +[titan] 2025-10-05 04:06:09,392 - root - INFO - lr: 4.4951e-05 gnorm: 1.17 [ 5:32:00<19:01:56] +[titan] 2025-10-05 04:06:20,257 - root - INFO - step: 9015 loss: 2.4693 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1794 +[titan] 2025-10-05 04:06:20,257 - root - INFO - lr: 4.4946e-05 gnorm: 1.13 [ 5:32:11<19:01:44] +[titan] 2025-10-05 04:06:31,158 - root - INFO - step: 9020 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1487 +[titan] 2025-10-05 04:06:31,158 - root - INFO - lr: 4.4940e-05 gnorm: 1.15 [ 5:32:22<19:01:33] +[titan] 2025-10-05 04:06:42,018 - root - INFO - step: 9025 loss: 2.3968 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:06:42,018 - root - INFO - lr: 4.4934e-05 gnorm: 1.11 [ 5:32:33<19:01:21] +[titan] 2025-10-05 04:06:52,886 - root - INFO - step: 9030 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:06:52,886 - root - INFO - lr: 4.4929e-05 gnorm: 1.08 [ 5:32:43<19:01:09] +[titan] 2025-10-05 04:07:03,747 - root - INFO - step: 9035 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 04:07:03,747 - root - INFO - lr: 4.4923e-05 gnorm: 1.16 [ 5:32:54<19:00:58] +[titan] 2025-10-05 04:07:14,610 - root - INFO - step: 9040 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1859 +[titan] 2025-10-05 04:07:14,610 - root - INFO - lr: 4.4917e-05 gnorm: 1.17 [ 5:33:05<19:00:46] +[titan] 2025-10-05 04:07:25,476 - root - INFO - step: 9045 loss: 2.4520 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1641 +[titan] 2025-10-05 04:07:25,476 - root - INFO - lr: 4.4912e-05 gnorm: 1.19 [ 5:33:16<19:00:34] +[titan] 2025-10-05 04:07:34,201 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:07:36,379 - root - INFO - step: 9050 loss: 2.4771 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:07:36,379 - root - INFO - lr: 4.4906e-05 gnorm: 1.19 [ 5:33:27<19:00:23] +[titan] 2025-10-05 04:07:47,258 - root - INFO - step: 9055 loss: 2.4168 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1339 +[titan] 2025-10-05 04:07:47,258 - root - INFO - lr: 4.4900e-05 gnorm: 1.14 [ 5:33:38<19:00:11] +[titan] 2025-10-05 04:07:58,123 - root - INFO - step: 9060 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:07:58,123 - root - INFO - lr: 4.4895e-05 gnorm: 1.16 [ 5:33:49<18:59:59] +[titan] 2025-10-05 04:08:09,003 - root - INFO - step: 9065 loss: 2.4858 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2911 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:08:09,003 - root - INFO - lr: 4.4889e-05 gnorm: 1.18 [ 5:34:00<18:59:48] +[titan] 2025-10-05 04:08:19,858 - root - INFO - step: 9070 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 04:08:19,858 - root - INFO - lr: 4.4883e-05 gnorm: 1.18 [ 5:34:10<18:59:36] +[titan] 2025-10-05 04:08:30,739 - root - INFO - step: 9075 loss: 2.4338 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1483 +[titan] 2025-10-05 04:08:30,739 - root - INFO - lr: 4.4878e-05 gnorm: 1.16 [ 5:34:21<18:59:24] +[titan] 2025-10-05 04:08:41,605 - root - INFO - step: 9080 loss: 2.3786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 04:08:41,605 - root - INFO - lr: 4.4872e-05 gnorm: 1.24 [ 5:34:32<18:59:13] +[titan] 2025-10-05 04:08:52,482 - root - INFO - step: 9085 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2055 +[titan] 2025-10-05 04:08:52,482 - root - INFO - lr: 4.4866e-05 gnorm: 1.20 [ 5:34:43<18:59:01] +[titan] 2025-10-05 04:09:03,324 - root - INFO - step: 9090 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1628 +[titan] 2025-10-05 04:09:03,325 - root - INFO - lr: 4.4861e-05 gnorm: 1.24 [ 5:34:54<18:58:49] +[titan] 2025-10-05 04:09:14,169 - root - INFO - step: 9095 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1886 +[titan] 2025-10-05 04:09:14,169 - root - INFO - lr: 4.4855e-05 gnorm: 1.21 [ 5:35:05<18:58:37] +[titan] 2025-10-05 04:09:22,830 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:09:25,022 - root - INFO - step: 9100 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:09:25,022 - root - INFO - lr: 4.4849e-05 gnorm: 1.19 [ 5:35:16<18:58:26] +[titan] 2025-10-05 04:09:35,891 - root - INFO - step: 9105 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1300 +[titan] 2025-10-05 04:09:35,891 - root - INFO - lr: 4.4844e-05 gnorm: 1.18 [ 5:35:26<18:58:14] +[titan] 2025-10-05 04:09:46,754 - root - INFO - step: 9110 loss: 2.3843 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1029 +[titan] 2025-10-05 04:09:46,754 - root - INFO - lr: 4.4838e-05 gnorm: 1.28 [ 5:35:37<18:58:02] +[titan] 2025-10-05 04:09:57,624 - root - INFO - step: 9115 loss: 2.4036 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1228 +[titan] 2025-10-05 04:09:57,624 - root - INFO - lr: 4.4832e-05 gnorm: 1.19 [ 5:35:48<18:57:51] +[titan] 2025-10-05 04:10:08,470 - root - INFO - step: 9120 loss: 2.4158 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1325 +[titan] 2025-10-05 04:10:08,470 - root - INFO - lr: 4.4827e-05 gnorm: 1.14 [ 5:35:59<18:57:39] +[titan] 2025-10-05 04:10:19,323 - root - INFO - step: 9125 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 04:10:19,323 - root - INFO - lr: 4.4821e-05 gnorm: 1.19 [ 5:36:10<18:57:27] +[titan] 2025-10-05 04:10:30,178 - root - INFO - step: 9130 loss: 2.4437 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 04:10:30,178 - root - INFO - lr: 4.4815e-05 gnorm: 1.22 [ 5:36:21<18:57:15] +[titan] 2025-10-05 04:10:41,058 - root - INFO - step: 9135 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1518 +[titan] 2025-10-05 04:10:41,058 - root - INFO - lr: 4.4809e-05 gnorm: 1.15 [ 5:36:32<18:57:04] +[titan] 2025-10-05 04:10:51,913 - root - INFO - step: 9140 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:10:51,913 - root - INFO - lr: 4.4804e-05 gnorm: 1.14 [ 5:36:42<18:56:52] +[titan] 2025-10-05 04:11:02,801 - root - INFO - step: 9145 loss: 2.4160 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1331 +[titan] 2025-10-05 04:11:02,801 - root - INFO - lr: 4.4798e-05 gnorm: 1.18 [ 5:36:53<18:56:40] +[titan] 2025-10-05 04:11:11,463 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:11:13,643 - root - INFO - step: 9150 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1327 +[titan] 2025-10-05 04:11:13,643 - root - INFO - lr: 4.4792e-05 gnorm: 1.15 [ 5:37:04<18:56:29] +[titan] 2025-10-05 04:11:24,500 - root - INFO - step: 9155 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1789 +[titan] 2025-10-05 04:11:24,500 - root - INFO - lr: 4.4787e-05 gnorm: 1.16 [ 5:37:15<18:56:17] +[titan] 2025-10-05 04:11:35,333 - root - INFO - step: 9160 loss: 2.4173 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1345 +[titan] 2025-10-05 04:11:35,333 - root - INFO - lr: 4.4781e-05 gnorm: 1.15 [ 5:37:26<18:56:05] +[titan] 2025-10-05 04:11:46,194 - root - INFO - step: 9165 loss: 2.4180 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 04:11:46,195 - root - INFO - lr: 4.4775e-05 gnorm: 1.13 [ 5:37:37<18:55:54] +[titan] 2025-10-05 04:11:57,056 - root - INFO - step: 9170 loss: 2.3989 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 04:11:57,057 - root - INFO - lr: 4.4769e-05 gnorm: 1.15 [ 5:37:48<18:55:42] +[titan] 2025-10-05 04:12:07,928 - root - INFO - step: 9175 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 04:12:07,928 - root - INFO - lr: 4.4764e-05 gnorm: 1.11 [ 5:37:58<18:55:30] +[titan] 2025-10-05 04:12:18,847 - root - INFO - step: 9180 loss: 2.5568 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2549 +[titan] 2025-10-05 04:12:18,847 - root - INFO - lr: 4.4758e-05 gnorm: 1.20 [ 5:38:09<18:55:19] +[titan] 2025-10-05 04:12:29,719 - root - INFO - step: 9185 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1545 +[titan] 2025-10-05 04:12:29,719 - root - INFO - lr: 4.4752e-05 gnorm: 1.16 [ 5:38:20<18:55:07] +[titan] 2025-10-05 04:12:40,611 - root - INFO - step: 9190 loss: 2.3798 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.1027 +[titan] 2025-10-05 04:12:40,611 - root - INFO - lr: 4.4747e-05 gnorm: 1.15 [ 5:38:31<18:54:55] +[titan] 2025-10-05 04:12:51,477 - root - INFO - step: 9195 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1621 +[titan] 2025-10-05 04:12:51,477 - root - INFO - lr: 4.4741e-05 gnorm: 1.15 [ 5:38:42<18:54:44] +[titan] 2025-10-05 04:13:00,167 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:13:02,353 - root - INFO - step: 9200 loss: 2.4374 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1525 +[titan] 2025-10-05 04:13:02,353 - root - INFO - lr: 4.4735e-05 gnorm: 1.20 [ 5:38:53<18:54:32] +[titan] 2025-10-05 04:13:13,230 - root - INFO - step: 9205 loss: 2.4854 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1940 +[titan] 2025-10-05 04:13:13,230 - root - INFO - lr: 4.4729e-05 gnorm: 1.22 [ 5:39:04<18:54:21] +[titan] 2025-10-05 04:13:24,132 - root - INFO - step: 9210 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:13:24,132 - root - INFO - lr: 4.4724e-05 gnorm: 1.19 [ 5:39:15<18:54:09] +[titan] 2025-10-05 04:13:35,087 - root - INFO - step: 9215 loss: 2.4851 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1930 +[titan] 2025-10-05 04:13:35,087 - root - INFO - lr: 4.4718e-05 gnorm: 1.15 [ 5:39:26<18:53:58] +[titan] 2025-10-05 04:13:37,437 - root - INFO - Dumping profiler traces at step 9216 +[titan] 2025-10-05 04:13:37,474 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:13:46,201 - root - INFO - step: 9220 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 29,486 tflops: 409.07 mfu: 41.36% global_avg_ntp_loss: 0.2928 global_avg_mtp_loss: 2.2073 +[titan] 2025-10-05 04:13:46,201 - root - INFO - lr: 4.4712e-05 gnorm: 1.17 [ 5:39:37<18:53:47] +[titan] 2025-10-05 04:13:57,080 - root - INFO - step: 9225 loss: 2.3856 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1071 +[titan] 2025-10-05 04:13:57,080 - root - INFO - lr: 4.4706e-05 gnorm: 1.15 [ 5:39:48<18:53:35] +[titan] 2025-10-05 04:14:07,953 - root - INFO - step: 9230 loss: 2.4302 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1452 +[titan] 2025-10-05 04:14:07,953 - root - INFO - lr: 4.4701e-05 gnorm: 1.15 [ 5:39:58<18:53:23] +[titan] 2025-10-05 04:14:18,819 - root - INFO - step: 9235 loss: 2.4502 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1635 +[titan] 2025-10-05 04:14:18,819 - root - INFO - lr: 4.4695e-05 gnorm: 1.22 [ 5:40:09<18:53:12] +[titan] 2025-10-05 04:14:29,678 - root - INFO - step: 9240 loss: 2.4452 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1584 +[titan] 2025-10-05 04:14:29,678 - root - INFO - lr: 4.4689e-05 gnorm: 1.17 [ 5:40:20<18:53:00] +[titan] 2025-10-05 04:14:40,618 - root - INFO - step: 9245 loss: 2.4345 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1494 +[titan] 2025-10-05 04:14:40,618 - root - INFO - lr: 4.4683e-05 gnorm: 1.14 [ 5:40:31<18:52:49] +[titan] 2025-10-05 04:14:49,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:14:51,484 - root - INFO - step: 9250 loss: 2.5104 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 04:14:51,484 - root - INFO - lr: 4.4678e-05 gnorm: 1.18 [ 5:40:42<18:52:37] +[titan] 2025-10-05 04:15:02,363 - root - INFO - step: 9255 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1304 +[titan] 2025-10-05 04:15:02,363 - root - INFO - lr: 4.4672e-05 gnorm: 1.16 [ 5:40:53<18:52:25] +[titan] 2025-10-05 04:15:13,235 - root - INFO - step: 9260 loss: 2.4511 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:15:13,236 - root - INFO - lr: 4.4666e-05 gnorm: 1.17 [ 5:41:04<18:52:14] +[titan] 2025-10-05 04:15:24,134 - root - INFO - step: 9265 loss: 2.5208 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2247 +[titan] 2025-10-05 04:15:24,134 - root - INFO - lr: 4.4660e-05 gnorm: 1.12 [ 5:41:15<18:52:02] +[titan] 2025-10-05 04:15:35,014 - root - INFO - step: 9270 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1064 +[titan] 2025-10-05 04:15:35,015 - root - INFO - lr: 4.4655e-05 gnorm: 1.14 [ 5:41:26<18:51:51] +[titan] 2025-10-05 04:15:45,940 - root - INFO - step: 9275 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2155 +[titan] 2025-10-05 04:15:45,940 - root - INFO - lr: 4.4649e-05 gnorm: 3.57 [ 5:41:36<18:51:39] +[titan] 2025-10-05 04:15:56,816 - root - INFO - step: 9280 loss: 2.4602 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1702 +[titan] 2025-10-05 04:15:56,816 - root - INFO - lr: 4.4643e-05 gnorm: 1.17 [ 5:41:47<18:51:28] +[titan] 2025-10-05 04:16:07,687 - root - INFO - step: 9285 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1597 +[titan] 2025-10-05 04:16:07,687 - root - INFO - lr: 4.4637e-05 gnorm: 1.16 [ 5:41:58<18:51:16] +[titan] 2025-10-05 04:16:18,550 - root - INFO - step: 9290 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1896 +[titan] 2025-10-05 04:16:18,550 - root - INFO - lr: 4.4631e-05 gnorm: 1.19 [ 5:42:09<18:51:04] +[titan] 2025-10-05 04:16:29,436 - root - INFO - step: 9295 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1040 +[titan] 2025-10-05 04:16:29,436 - root - INFO - lr: 4.4626e-05 gnorm: 1.23 [ 5:42:20<18:50:53] +[titan] 2025-10-05 04:16:38,131 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:16:40,321 - root - INFO - step: 9300 loss: 2.4653 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:16:40,321 - root - INFO - lr: 4.4620e-05 gnorm: 1.12 [ 5:42:31<18:50:41] +[titan] 2025-10-05 04:16:51,231 - root - INFO - step: 9305 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:16:51,232 - root - INFO - lr: 4.4614e-05 gnorm: 1.15 [ 5:42:42<18:50:30] +[titan] 2025-10-05 04:17:02,103 - root - INFO - step: 9310 loss: 2.4882 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1966 +[titan] 2025-10-05 04:17:02,103 - root - INFO - lr: 4.4608e-05 gnorm: 1.14 [ 5:42:53<18:50:18] +[titan] 2025-10-05 04:17:13,000 - root - INFO - step: 9315 loss: 2.4906 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1975 +[titan] 2025-10-05 04:17:13,000 - root - INFO - lr: 4.4602e-05 gnorm: 1.19 [ 5:43:04<18:50:06] +[titan] 2025-10-05 04:17:23,889 - root - INFO - step: 9320 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:17:23,889 - root - INFO - lr: 4.4597e-05 gnorm: 1.23 [ 5:43:14<18:49:55] +[titan] 2025-10-05 04:17:34,759 - root - INFO - step: 9325 loss: 2.4923 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2926 global_avg_mtp_loss: 2.1997 +[titan] 2025-10-05 04:17:34,759 - root - INFO - lr: 4.4591e-05 gnorm: 1.20 [ 5:43:25<18:49:43] +[titan] 2025-10-05 04:17:45,670 - root - INFO - step: 9330 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1813 +[titan] 2025-10-05 04:17:45,670 - root - INFO - lr: 4.4585e-05 gnorm: 1.15 [ 5:43:36<18:49:32] +[titan] 2025-10-05 04:17:56,531 - root - INFO - step: 9335 loss: 2.5353 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:17:56,531 - root - INFO - lr: 4.4579e-05 gnorm: 1.15 [ 5:43:47<18:49:20] +[titan] 2025-10-05 04:18:07,423 - root - INFO - step: 9340 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 04:18:07,423 - root - INFO - lr: 4.4573e-05 gnorm: 1.22 [ 5:43:58<18:49:08] +[titan] 2025-10-05 04:18:18,296 - root - INFO - step: 9345 loss: 2.4834 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1917 +[titan] 2025-10-05 04:18:18,296 - root - INFO - lr: 4.4568e-05 gnorm: 1.16 [ 5:44:09<18:48:57] +[titan] 2025-10-05 04:18:27,002 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:18:29,196 - root - INFO - step: 9350 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:18:29,196 - root - INFO - lr: 4.4562e-05 gnorm: 1.12 [ 5:44:20<18:48:45] +[titan] 2025-10-05 04:18:40,056 - root - INFO - step: 9355 loss: 2.4321 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1466 +[titan] 2025-10-05 04:18:40,056 - root - INFO - lr: 4.4556e-05 gnorm: 1.12 [ 5:44:31<18:48:34] +[titan] 2025-10-05 04:18:50,968 - root - INFO - step: 9360 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2060 +[titan] 2025-10-05 04:18:50,968 - root - INFO - lr: 4.4550e-05 gnorm: 1.14 [ 5:44:41<18:48:22] +[titan] 2025-10-05 04:19:01,819 - root - INFO - step: 9365 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1401 +[titan] 2025-10-05 04:19:01,819 - root - INFO - lr: 4.4544e-05 gnorm: 1.14 [ 5:44:52<18:48:10] +[titan] 2025-10-05 04:19:12,717 - root - INFO - step: 9370 loss: 2.5021 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 04:19:12,718 - root - INFO - lr: 4.4538e-05 gnorm: 1.13 [ 5:45:03<18:47:59] +[titan] 2025-10-05 04:19:23,592 - root - INFO - step: 9375 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 04:19:23,592 - root - INFO - lr: 4.4533e-05 gnorm: 1.15 [ 5:45:14<18:47:47] +[titan] 2025-10-05 04:19:34,464 - root - INFO - step: 9380 loss: 2.4564 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1688 +[titan] 2025-10-05 04:19:34,465 - root - INFO - lr: 4.4527e-05 gnorm: 1.21 [ 5:45:25<18:47:36] +[titan] 2025-10-05 04:19:45,394 - root - INFO - step: 9385 loss: 2.4197 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1348 +[titan] 2025-10-05 04:19:45,394 - root - INFO - lr: 4.4521e-05 gnorm: 1.16 [ 5:45:36<18:47:24] +[titan] 2025-10-05 04:19:56,282 - root - INFO - step: 9390 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:19:56,282 - root - INFO - lr: 4.4515e-05 gnorm: 1.15 [ 5:45:47<18:47:13] +[titan] 2025-10-05 04:20:07,169 - root - INFO - step: 9395 loss: 2.4327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1475 +[titan] 2025-10-05 04:20:07,169 - root - INFO - lr: 4.4509e-05 gnorm: 1.21 [ 5:45:58<18:47:01] +[titan] 2025-10-05 04:20:15,874 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:20:18,060 - root - INFO - step: 9400 loss: 2.5009 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2079 +[titan] 2025-10-05 04:20:18,061 - root - INFO - lr: 4.4503e-05 gnorm: 1.18 [ 5:46:09<18:46:49] +[titan] 2025-10-05 04:20:28,966 - root - INFO - step: 9405 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1469 +[titan] 2025-10-05 04:20:28,966 - root - INFO - lr: 4.4498e-05 gnorm: 1.14 [ 5:46:19<18:46:38] +[titan] 2025-10-05 04:20:39,882 - root - INFO - step: 9410 loss: 2.4983 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 04:20:39,882 - root - INFO - lr: 4.4492e-05 gnorm: 1.20 [ 5:46:30<18:46:26] +[titan] 2025-10-05 04:20:50,800 - root - INFO - step: 9415 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:20:50,800 - root - INFO - lr: 4.4486e-05 gnorm: 1.13 [ 5:46:41<18:46:15] +[titan] 2025-10-05 04:21:01,668 - root - INFO - step: 9420 loss: 2.3688 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0904 +[titan] 2025-10-05 04:21:01,668 - root - INFO - lr: 4.4480e-05 gnorm: 1.16 [ 5:46:52<18:46:03] +[titan] 2025-10-05 04:21:12,542 - root - INFO - step: 9425 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 04:21:12,543 - root - INFO - lr: 4.4474e-05 gnorm: 1.16 [ 5:47:03<18:45:52] +[titan] 2025-10-05 04:21:23,412 - root - INFO - step: 9430 loss: 2.4415 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1551 +[titan] 2025-10-05 04:21:23,412 - root - INFO - lr: 4.4468e-05 gnorm: 1.20 [ 5:47:14<18:45:40] +[titan] 2025-10-05 04:21:34,322 - root - INFO - step: 9435 loss: 2.3669 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 04:21:34,322 - root - INFO - lr: 4.4462e-05 gnorm: 1.10 [ 5:47:25<18:45:29] +[titan] 2025-10-05 04:21:45,197 - root - INFO - step: 9440 loss: 2.3883 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1070 +[titan] 2025-10-05 04:21:45,197 - root - INFO - lr: 4.4457e-05 gnorm: 1.17 [ 5:47:36<18:45:17] +[titan] 2025-10-05 04:21:56,142 - root - INFO - step: 9445 loss: 2.4394 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1537 +[titan] 2025-10-05 04:21:56,142 - root - INFO - lr: 4.4451e-05 gnorm: 1.15 [ 5:47:47<18:45:06] +[titan] 2025-10-05 04:22:04,824 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:22:07,011 - root - INFO - step: 9450 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1302 +[titan] 2025-10-05 04:22:07,011 - root - INFO - lr: 4.4445e-05 gnorm: 1.11 [ 5:47:58<18:44:54] +[titan] 2025-10-05 04:22:17,891 - root - INFO - step: 9455 loss: 2.4826 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1916 +[titan] 2025-10-05 04:22:17,891 - root - INFO - lr: 4.4439e-05 gnorm: 1.14 [ 5:48:08<18:44:42] +[titan] 2025-10-05 04:22:28,768 - root - INFO - step: 9460 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 04:22:28,768 - root - INFO - lr: 4.4433e-05 gnorm: 1.12 [ 5:48:19<18:44:31] +[titan] 2025-10-05 04:22:39,662 - root - INFO - step: 9465 loss: 2.4758 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:22:39,663 - root - INFO - lr: 4.4427e-05 gnorm: 1.12 [ 5:48:30<18:44:19] +[titan] 2025-10-05 04:22:50,623 - root - INFO - step: 9470 loss: 2.4549 memory: 118.84GiB(85.28%) tps: 29,899 tflops: 414.80 mfu: 41.94% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1672 +[titan] 2025-10-05 04:22:50,623 - root - INFO - lr: 4.4421e-05 gnorm: 1.19 [ 5:48:41<18:44:08] +[titan] 2025-10-05 04:23:01,499 - root - INFO - step: 9475 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1431 +[titan] 2025-10-05 04:23:01,500 - root - INFO - lr: 4.4415e-05 gnorm: 1.12 [ 5:48:52<18:43:56] +[titan] 2025-10-05 04:23:12,360 - root - INFO - step: 9480 loss: 2.4464 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1579 +[titan] 2025-10-05 04:23:12,361 - root - INFO - lr: 4.4410e-05 gnorm: 1.18 [ 5:49:03<18:43:45] +[titan] 2025-10-05 04:23:23,239 - root - INFO - step: 9485 loss: 2.4527 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1643 +[titan] 2025-10-05 04:23:23,239 - root - INFO - lr: 4.4404e-05 gnorm: 1.19 [ 5:49:14<18:43:33] +[titan] 2025-10-05 04:23:34,114 - root - INFO - step: 9490 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:23:34,114 - root - INFO - lr: 4.4398e-05 gnorm: 1.11 [ 5:49:25<18:43:22] +[titan] 2025-10-05 04:23:44,977 - root - INFO - step: 9495 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1819 +[titan] 2025-10-05 04:23:44,977 - root - INFO - lr: 4.4392e-05 gnorm: 1.11 [ 5:49:35<18:43:10] +[titan] 2025-10-05 04:23:53,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:23:55,915 - root - INFO - step: 9500 loss: 2.4279 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1438 +[titan] 2025-10-05 04:23:55,915 - root - INFO - lr: 4.4386e-05 gnorm: 1.12 [ 5:49:46<18:42:59] +[titan] 2025-10-05 04:24:06,759 - root - INFO - step: 9505 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:24:06,759 - root - INFO - lr: 4.4380e-05 gnorm: 1.15 [ 5:49:57<18:42:47] +[titan] 2025-10-05 04:24:17,624 - root - INFO - step: 9510 loss: 2.4001 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1187 +[titan] 2025-10-05 04:24:17,624 - root - INFO - lr: 4.4374e-05 gnorm: 1.13 [ 5:50:08<18:42:35] +[titan] 2025-10-05 04:24:28,498 - root - INFO - step: 9515 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 04:24:28,498 - root - INFO - lr: 4.4368e-05 gnorm: 1.17 [ 5:50:19<18:42:24] +[titan] 2025-10-05 04:24:39,377 - root - INFO - step: 9520 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:24:39,377 - root - INFO - lr: 4.4362e-05 gnorm: 1.13 [ 5:50:30<18:42:12] +[titan] 2025-10-05 04:24:50,308 - root - INFO - step: 9525 loss: 2.3498 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0740 +[titan] 2025-10-05 04:24:50,308 - root - INFO - lr: 4.4357e-05 gnorm: 1.18 [ 5:50:41<18:42:01] +[titan] 2025-10-05 04:25:01,216 - root - INFO - step: 9530 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1821 +[titan] 2025-10-05 04:25:01,216 - root - INFO - lr: 4.4351e-05 gnorm: 1.23 [ 5:50:52<18:41:49] +[titan] 2025-10-05 04:25:12,092 - root - INFO - step: 9535 loss: 2.4240 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1392 +[titan] 2025-10-05 04:25:12,092 - root - INFO - lr: 4.4345e-05 gnorm: 1.17 [ 5:51:03<18:41:38] +[titan] 2025-10-05 04:25:22,993 - root - INFO - step: 9540 loss: 2.4342 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1477 +[titan] 2025-10-05 04:25:22,994 - root - INFO - lr: 4.4339e-05 gnorm: 1.18 [ 5:51:13<18:41:26] +[titan] 2025-10-05 04:25:33,873 - root - INFO - step: 9545 loss: 2.4536 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1647 +[titan] 2025-10-05 04:25:33,873 - root - INFO - lr: 4.4333e-05 gnorm: 1.18 [ 5:51:24<18:41:15] +[titan] 2025-10-05 04:25:42,543 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:25:44,728 - root - INFO - step: 9550 loss: 2.4518 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1638 +[titan] 2025-10-05 04:25:44,728 - root - INFO - lr: 4.4327e-05 gnorm: 1.19 [ 5:51:35<18:41:03] +[titan] 2025-10-05 04:25:55,649 - root - INFO - step: 9555 loss: 2.4091 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 04:25:55,650 - root - INFO - lr: 4.4321e-05 gnorm: 1.19 [ 5:51:46<18:40:51] +[titan] 2025-10-05 04:26:06,497 - root - INFO - step: 9560 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1277 +[titan] 2025-10-05 04:26:06,497 - root - INFO - lr: 4.4315e-05 gnorm: 1.14 [ 5:51:57<18:40:40] +[titan] 2025-10-05 04:26:17,403 - root - INFO - step: 9565 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1464 +[titan] 2025-10-05 04:26:17,403 - root - INFO - lr: 4.4309e-05 gnorm: 1.18 [ 5:52:08<18:40:28] +[titan] 2025-10-05 04:26:28,292 - root - INFO - step: 9570 loss: 2.4323 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 04:26:28,293 - root - INFO - lr: 4.4303e-05 gnorm: 1.17 [ 5:52:19<18:40:17] +[titan] 2025-10-05 04:26:39,137 - root - INFO - step: 9575 loss: 2.4565 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:26:39,137 - root - INFO - lr: 4.4297e-05 gnorm: 1.17 [ 5:52:30<18:40:05] +[titan] 2025-10-05 04:26:50,002 - root - INFO - step: 9580 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.2636 +[titan] 2025-10-05 04:26:50,002 - root - INFO - lr: 4.4291e-05 gnorm: 1.16 [ 5:52:41<18:39:53] +[titan] 2025-10-05 04:27:00,916 - root - INFO - step: 9585 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 04:27:00,916 - root - INFO - lr: 4.4285e-05 gnorm: 1.16 [ 5:52:51<18:39:42] +[titan] 2025-10-05 04:27:11,772 - root - INFO - step: 9590 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2492 +[titan] 2025-10-05 04:27:11,773 - root - INFO - lr: 4.4279e-05 gnorm: 1.14 [ 5:53:02<18:39:30] +[titan] 2025-10-05 04:27:22,632 - root - INFO - step: 9595 loss: 2.4580 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1692 +[titan] 2025-10-05 04:27:22,632 - root - INFO - lr: 4.4273e-05 gnorm: 1.16 [ 5:53:13<18:39:19] +[titan] 2025-10-05 04:27:31,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:27:33,503 - root - INFO - step: 9600 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:27:33,503 - root - INFO - lr: 4.4268e-05 gnorm: 1.14 [ 5:53:24<18:39:07] +[titan] 2025-10-05 04:27:44,378 - root - INFO - step: 9605 loss: 2.4209 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1359 +[titan] 2025-10-05 04:27:44,378 - root - INFO - lr: 4.4262e-05 gnorm: 1.14 [ 5:53:35<18:38:56] +[titan] 2025-10-05 04:27:55,281 - root - INFO - step: 9610 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1495 +[titan] 2025-10-05 04:27:55,281 - root - INFO - lr: 4.4256e-05 gnorm: 1.27 [ 5:53:46<18:38:44] +[titan] 2025-10-05 04:28:06,144 - root - INFO - step: 9615 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1414 +[titan] 2025-10-05 04:28:06,145 - root - INFO - lr: 4.4250e-05 gnorm: 1.12 [ 5:53:57<18:38:32] +[titan] 2025-10-05 04:28:17,025 - root - INFO - step: 9620 loss: 2.4380 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1520 +[titan] 2025-10-05 04:28:17,025 - root - INFO - lr: 4.4244e-05 gnorm: 1.17 [ 5:54:08<18:38:21] +[titan] 2025-10-05 04:28:27,900 - root - INFO - step: 9625 loss: 2.4092 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1278 +[titan] 2025-10-05 04:28:27,900 - root - INFO - lr: 4.4238e-05 gnorm: 1.17 [ 5:54:18<18:38:09] +[titan] 2025-10-05 04:28:38,759 - root - INFO - step: 9630 loss: 2.3955 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1144 +[titan] 2025-10-05 04:28:38,759 - root - INFO - lr: 4.4232e-05 gnorm: 1.18 [ 5:54:29<18:37:58] +[titan] 2025-10-05 04:28:49,641 - root - INFO - step: 9635 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 04:28:49,641 - root - INFO - lr: 4.4226e-05 gnorm: 1.17 [ 5:54:40<18:37:46] +[titan] 2025-10-05 04:29:00,565 - root - INFO - step: 9640 loss: 2.5391 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2382 +[titan] 2025-10-05 04:29:00,565 - root - INFO - lr: 4.4220e-05 gnorm: 1.17 [ 5:54:51<18:37:35] +[titan] 2025-10-05 04:29:11,410 - root - INFO - step: 9645 loss: 2.4192 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 04:29:11,410 - root - INFO - lr: 4.4214e-05 gnorm: 1.18 [ 5:55:02<18:37:23] +[titan] 2025-10-05 04:29:20,108 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:29:22,295 - root - INFO - step: 9650 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:29:22,295 - root - INFO - lr: 4.4208e-05 gnorm: 1.14 [ 5:55:13<18:37:11] +[titan] 2025-10-05 04:29:33,192 - root - INFO - step: 9655 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1795 +[titan] 2025-10-05 04:29:33,192 - root - INFO - lr: 4.4202e-05 gnorm: 1.18 [ 5:55:24<18:37:00] +[titan] 2025-10-05 04:29:44,075 - root - INFO - step: 9660 loss: 2.5077 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2141 +[titan] 2025-10-05 04:29:44,076 - root - INFO - lr: 4.4196e-05 gnorm: 1.19 [ 5:55:35<18:36:48] +[titan] 2025-10-05 04:29:55,012 - root - INFO - step: 9665 loss: 2.3987 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2812 global_avg_mtp_loss: 2.1174 +[titan] 2025-10-05 04:29:55,012 - root - INFO - lr: 4.4190e-05 gnorm: 1.13 [ 5:55:46<18:36:37] +[titan] 2025-10-05 04:30:05,890 - root - INFO - step: 9670 loss: 2.4206 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1355 +[titan] 2025-10-05 04:30:05,891 - root - INFO - lr: 4.4184e-05 gnorm: 1.15 [ 5:55:56<18:36:25] +[titan] 2025-10-05 04:30:16,776 - root - INFO - step: 9675 loss: 2.3409 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 04:30:16,776 - root - INFO - lr: 4.4178e-05 gnorm: 1.12 [ 5:56:07<18:36:14] +[titan] 2025-10-05 04:30:27,638 - root - INFO - step: 9680 loss: 2.4055 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1224 +[titan] 2025-10-05 04:30:27,639 - root - INFO - lr: 4.4172e-05 gnorm: 1.11 [ 5:56:18<18:36:02] +[titan] 2025-10-05 04:30:38,514 - root - INFO - step: 9685 loss: 2.4020 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1213 +[titan] 2025-10-05 04:30:38,514 - root - INFO - lr: 4.4166e-05 gnorm: 1.10 [ 5:56:29<18:35:51] +[titan] 2025-10-05 04:30:49,397 - root - INFO - step: 9690 loss: 2.3894 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 04:30:49,397 - root - INFO - lr: 4.4160e-05 gnorm: 1.14 [ 5:56:40<18:35:39] +[titan] 2025-10-05 04:31:00,376 - root - INFO - step: 9695 loss: 2.4118 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.10 mfu: 41.87% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:31:00,376 - root - INFO - lr: 4.4154e-05 gnorm: 1.13 [ 5:56:51<18:35:28] +[titan] 2025-10-05 04:31:09,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:31:11,234 - root - INFO - step: 9700 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 04:31:11,235 - root - INFO - lr: 4.4148e-05 gnorm: 1.17 [ 5:57:02<18:35:16] +[titan] 2025-10-05 04:31:22,095 - root - INFO - step: 9705 loss: 2.4525 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1644 +[titan] 2025-10-05 04:31:22,095 - root - INFO - lr: 4.4142e-05 gnorm: 1.18 [ 5:57:13<18:35:05] +[titan] 2025-10-05 04:31:32,925 - root - INFO - step: 9710 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.77 mfu: 42.44% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:31:32,925 - root - INFO - lr: 4.4136e-05 gnorm: 1.17 [ 5:57:23<18:34:53] +[titan] 2025-10-05 04:31:43,787 - root - INFO - step: 9715 loss: 2.4891 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 04:31:43,787 - root - INFO - lr: 4.4130e-05 gnorm: 1.38 [ 5:57:34<18:34:41] +[titan] 2025-10-05 04:31:54,630 - root - INFO - step: 9720 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0983 +[titan] 2025-10-05 04:31:54,630 - root - INFO - lr: 4.4124e-05 gnorm: 1.14 [ 5:57:45<18:34:30] +[titan] 2025-10-05 04:32:05,581 - root - INFO - step: 9725 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 04:32:05,582 - root - INFO - lr: 4.4118e-05 gnorm: 1.14 [ 5:57:56<18:34:18] +[titan] 2025-10-05 04:32:12,293 - root - INFO - Dumping profiler traces at step 9728 +[titan] 2025-10-05 04:32:12,331 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:32:16,691 - root - INFO - step: 9730 loss: 2.4883 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.1950 +[titan] 2025-10-05 04:32:16,691 - root - INFO - lr: 4.4112e-05 gnorm: 1.25 [ 5:58:07<18:34:08] +[titan] 2025-10-05 04:32:27,533 - root - INFO - step: 9735 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:32:27,534 - root - INFO - lr: 4.4106e-05 gnorm: 1.17 [ 5:58:18<18:33:56] +[titan] 2025-10-05 04:32:38,369 - root - INFO - step: 9740 loss: 2.4600 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1703 +[titan] 2025-10-05 04:32:38,369 - root - INFO - lr: 4.4100e-05 gnorm: 1.17 [ 5:58:29<18:33:44] +[titan] 2025-10-05 04:32:49,220 - root - INFO - step: 9745 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 04:32:49,220 - root - INFO - lr: 4.4094e-05 gnorm: 1.16 [ 5:58:40<18:33:33] +[titan] 2025-10-05 04:32:57,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:33:00,142 - root - INFO - step: 9750 loss: 2.3885 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1078 +[titan] 2025-10-05 04:33:00,143 - root - INFO - lr: 4.4088e-05 gnorm: 1.14 [ 5:58:51<18:33:21] +[titan] 2025-10-05 04:33:10,995 - root - INFO - step: 9755 loss: 2.5700 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3070 global_avg_mtp_loss: 2.2630 +[titan] 2025-10-05 04:33:10,995 - root - INFO - lr: 4.4082e-05 gnorm: 1.38 [ 5:59:01<18:33:10] +[titan] 2025-10-05 04:33:21,841 - root - INFO - step: 9760 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:33:21,841 - root - INFO - lr: 4.4076e-05 gnorm: 1.10 [ 5:59:12<18:32:58] +[titan] 2025-10-05 04:33:32,699 - root - INFO - step: 9765 loss: 2.4074 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:33:32,699 - root - INFO - lr: 4.4070e-05 gnorm: 1.12 [ 5:59:23<18:32:46] +[titan] 2025-10-05 04:33:43,562 - root - INFO - step: 9770 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 04:33:43,563 - root - INFO - lr: 4.4064e-05 gnorm: 1.20 [ 5:59:34<18:32:35] +[titan] 2025-10-05 04:33:54,429 - root - INFO - step: 9775 loss: 2.3924 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1125 +[titan] 2025-10-05 04:33:54,429 - root - INFO - lr: 4.4058e-05 gnorm: 1.13 [ 5:59:45<18:32:23] +[titan] 2025-10-05 04:34:05,364 - root - INFO - step: 9780 loss: 2.4335 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1473 +[titan] 2025-10-05 04:34:05,365 - root - INFO - lr: 4.4052e-05 gnorm: 1.19 [ 5:59:56<18:32:12] +[titan] 2025-10-05 04:34:16,251 - root - INFO - step: 9785 loss: 2.4309 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:34:16,251 - root - INFO - lr: 4.4046e-05 gnorm: 1.30 [ 6:00:07<18:32:00] +[titan] 2025-10-05 04:34:27,120 - root - INFO - step: 9790 loss: 2.4512 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2882 global_avg_mtp_loss: 2.1629 +[titan] 2025-10-05 04:34:27,120 - root - INFO - lr: 4.4039e-05 gnorm: 1.21 [ 6:00:18<18:31:49] +[titan] 2025-10-05 04:34:37,998 - root - INFO - step: 9795 loss: 2.3456 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 04:34:37,999 - root - INFO - lr: 4.4033e-05 gnorm: 1.14 [ 6:00:28<18:31:37] +[titan] 2025-10-05 04:34:46,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:34:48,883 - root - INFO - step: 9800 loss: 2.4057 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:34:48,883 - root - INFO - lr: 4.4027e-05 gnorm: 1.18 [ 6:00:39<18:31:26] +[titan] 2025-10-05 04:34:59,779 - root - INFO - step: 9805 loss: 2.5371 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2995 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:34:59,779 - root - INFO - lr: 4.4021e-05 gnorm: 1.15 [ 6:00:50<18:31:14] +[titan] 2025-10-05 04:35:10,650 - root - INFO - step: 9810 loss: 2.4142 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1309 +[titan] 2025-10-05 04:35:10,651 - root - INFO - lr: 4.4015e-05 gnorm: 1.16 [ 6:01:01<18:31:03] +[titan] 2025-10-05 04:35:21,521 - root - INFO - step: 9815 loss: 2.4068 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1250 +[titan] 2025-10-05 04:35:21,521 - root - INFO - lr: 4.4009e-05 gnorm: 1.16 [ 6:01:12<18:30:51] +[titan] 2025-10-05 04:35:32,405 - root - INFO - step: 9820 loss: 2.4191 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:35:32,405 - root - INFO - lr: 4.4003e-05 gnorm: 1.14 [ 6:01:23<18:30:39] +[titan] 2025-10-05 04:35:43,265 - root - INFO - step: 9825 loss: 2.4557 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:35:43,266 - root - INFO - lr: 4.3997e-05 gnorm: 1.11 [ 6:01:34<18:30:28] +[titan] 2025-10-05 04:35:54,144 - root - INFO - step: 9830 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 04:35:54,144 - root - INFO - lr: 4.3991e-05 gnorm: 1.10 [ 6:01:45<18:30:16] +[titan] 2025-10-05 04:36:05,038 - root - INFO - step: 9835 loss: 2.3594 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0837 +[titan] 2025-10-05 04:36:05,038 - root - INFO - lr: 4.3985e-05 gnorm: 1.17 [ 6:01:56<18:30:05] +[titan] 2025-10-05 04:36:15,903 - root - INFO - step: 9840 loss: 2.3943 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1136 +[titan] 2025-10-05 04:36:15,903 - root - INFO - lr: 4.3979e-05 gnorm: 1.13 [ 6:02:06<18:29:53] +[titan] 2025-10-05 04:36:26,766 - root - INFO - step: 9845 loss: 2.3607 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0841 +[titan] 2025-10-05 04:36:26,766 - root - INFO - lr: 4.3973e-05 gnorm: 1.11 [ 6:02:17<18:29:42] +[titan] 2025-10-05 04:36:35,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:36:37,631 - root - INFO - step: 9850 loss: 2.4018 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1206 +[titan] 2025-10-05 04:36:37,631 - root - INFO - lr: 4.3967e-05 gnorm: 1.18 [ 6:02:28<18:29:30] +[titan] 2025-10-05 04:36:48,494 - root - INFO - step: 9855 loss: 2.3920 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 04:36:48,494 - root - INFO - lr: 4.3961e-05 gnorm: 1.14 [ 6:02:39<18:29:19] +[titan] 2025-10-05 04:36:59,366 - root - INFO - step: 9860 loss: 2.3928 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1132 +[titan] 2025-10-05 04:36:59,366 - root - INFO - lr: 4.3955e-05 gnorm: 1.16 [ 6:02:50<18:29:07] +[titan] 2025-10-05 04:37:10,292 - root - INFO - step: 9865 loss: 2.3430 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0689 +[titan] 2025-10-05 04:37:10,292 - root - INFO - lr: 4.3948e-05 gnorm: 1.14 [ 6:03:01<18:28:56] +[titan] 2025-10-05 04:37:21,111 - root - INFO - step: 9870 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.0953 +[titan] 2025-10-05 04:37:21,111 - root - INFO - lr: 4.3942e-05 gnorm: 1.23 [ 6:03:12<18:28:44] +[titan] 2025-10-05 04:37:31,972 - root - INFO - step: 9875 loss: 2.4673 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2893 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:37:31,972 - root - INFO - lr: 4.3936e-05 gnorm: 1.12 [ 6:03:22<18:28:32] +[titan] 2025-10-05 04:37:42,800 - root - INFO - step: 9880 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 04:37:42,800 - root - INFO - lr: 4.3930e-05 gnorm: 1.18 [ 6:03:33<18:28:21] +[titan] 2025-10-05 04:37:53,645 - root - INFO - step: 9885 loss: 2.3888 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:37:53,645 - root - INFO - lr: 4.3924e-05 gnorm: 1.14 [ 6:03:44<18:28:09] +[titan] 2025-10-05 04:38:04,551 - root - INFO - step: 9890 loss: 2.3882 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 04:38:04,552 - root - INFO - lr: 4.3918e-05 gnorm: 1.12 [ 6:03:55<18:27:58] +[titan] 2025-10-05 04:38:15,412 - root - INFO - step: 9895 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1157 +[titan] 2025-10-05 04:38:15,412 - root - INFO - lr: 4.3912e-05 gnorm: 1.14 [ 6:04:06<18:27:46] +[titan] 2025-10-05 04:38:24,052 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:38:26,242 - root - INFO - step: 9900 loss: 2.3816 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1025 +[titan] 2025-10-05 04:38:26,242 - root - INFO - lr: 4.3906e-05 gnorm: 1.14 [ 6:04:17<18:27:34] +[titan] 2025-10-05 04:38:37,109 - root - INFO - step: 9905 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 04:38:37,109 - root - INFO - lr: 4.3900e-05 gnorm: 1.17 [ 6:04:28<18:27:23] +[titan] 2025-10-05 04:38:47,968 - root - INFO - step: 9910 loss: 2.4451 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 04:38:47,968 - root - INFO - lr: 4.3894e-05 gnorm: 1.17 [ 6:04:38<18:27:11] +[titan] 2025-10-05 04:38:58,828 - root - INFO - step: 9915 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0776 +[titan] 2025-10-05 04:38:58,828 - root - INFO - lr: 4.3887e-05 gnorm: 1.15 [ 6:04:49<18:27:00] +[titan] 2025-10-05 04:39:09,703 - root - INFO - step: 9920 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 04:39:09,703 - root - INFO - lr: 4.3881e-05 gnorm: 1.13 [ 6:05:00<18:26:48] +[titan] 2025-10-05 04:39:20,593 - root - INFO - step: 9925 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 04:39:20,593 - root - INFO - lr: 4.3875e-05 gnorm: 1.14 [ 6:05:11<18:26:36] +[titan] 2025-10-05 04:39:31,464 - root - INFO - step: 9930 loss: 2.2894 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 04:39:31,464 - root - INFO - lr: 4.3869e-05 gnorm: 1.11 [ 6:05:22<18:26:25] +[titan] 2025-10-05 04:39:42,337 - root - INFO - step: 9935 loss: 2.3475 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 04:39:42,338 - root - INFO - lr: 4.3863e-05 gnorm: 1.10 [ 6:05:33<18:26:13] +[titan] 2025-10-05 04:39:53,224 - root - INFO - step: 9940 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0849 +[titan] 2025-10-05 04:39:53,224 - root - INFO - lr: 4.3857e-05 gnorm: 1.13 [ 6:05:44<18:26:02] +[titan] 2025-10-05 04:40:04,154 - root - INFO - step: 9945 loss: 2.3821 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1033 +[titan] 2025-10-05 04:40:04,154 - root - INFO - lr: 4.3851e-05 gnorm: 1.14 [ 6:05:55<18:25:51] +[titan] 2025-10-05 04:40:12,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:40:15,025 - root - INFO - step: 9950 loss: 2.4179 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:40:15,026 - root - INFO - lr: 4.3845e-05 gnorm: 1.11 [ 6:06:05<18:25:39] +[titan] 2025-10-05 04:40:25,938 - root - INFO - step: 9955 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1159 +[titan] 2025-10-05 04:40:25,938 - root - INFO - lr: 4.3838e-05 gnorm: 1.14 [ 6:06:16<18:25:28] +[titan] 2025-10-05 04:40:36,795 - root - INFO - step: 9960 loss: 2.3949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 04:40:36,795 - root - INFO - lr: 4.3832e-05 gnorm: 1.17 [ 6:06:27<18:25:16] +[titan] 2025-10-05 04:40:47,648 - root - INFO - step: 9965 loss: 2.4110 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:40:47,648 - root - INFO - lr: 4.3826e-05 gnorm: 1.15 [ 6:06:38<18:25:04] +[titan] 2025-10-05 04:40:58,540 - root - INFO - step: 9970 loss: 2.3944 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1127 +[titan] 2025-10-05 04:40:58,540 - root - INFO - lr: 4.3820e-05 gnorm: 1.18 [ 6:06:49<18:24:53] +[titan] 2025-10-05 04:41:09,431 - root - INFO - step: 9975 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 04:41:09,432 - root - INFO - lr: 4.3814e-05 gnorm: 1.19 [ 6:07:00<18:24:41] +[titan] 2025-10-05 04:41:20,364 - root - INFO - step: 9980 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 04:41:20,364 - root - INFO - lr: 4.3808e-05 gnorm: 1.18 [ 6:07:11<18:24:30] +[titan] 2025-10-05 04:41:31,259 - root - INFO - step: 9985 loss: 2.4484 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:41:31,260 - root - INFO - lr: 4.3802e-05 gnorm: 1.15 [ 6:07:22<18:24:19] +[titan] 2025-10-05 04:41:42,148 - root - INFO - step: 9990 loss: 2.4717 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:41:42,148 - root - INFO - lr: 4.3795e-05 gnorm: 1.17 [ 6:07:33<18:24:07] +[titan] 2025-10-05 04:41:53,059 - root - INFO - step: 9995 loss: 2.3948 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1147 +[titan] 2025-10-05 04:41:53,059 - root - INFO - lr: 4.3789e-05 gnorm: 1.17 [ 6:07:44<18:23:56] +[titan] 2025-10-05 04:42:01,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:42:03,951 - root - INFO - step: 10000 loss: 2.4699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1790 +[titan] 2025-10-05 04:42:03,951 - root - INFO - lr: 4.3783e-05 gnorm: 1.18 [ 6:07:54<18:23:44] +[titan] 2025-10-05 04:42:03,951 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 04:42:23,097 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 04:42:23,097 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.15 seconds. +[titan] 2025-10-05 04:44:28,943 - root - INFO - step: 10005 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 2,260 tflops: 31.35 mfu: 3.17% global_avg_ntp_loss: 0.2840 global_avg_mtp_loss: 2.1282 +[titan] 2025-10-05 04:44:28,943 - root - INFO - lr: 4.3777e-05 gnorm: 1.15 [ 6:10:19<18:30:15] +[titan] 2025-10-05 04:44:39,758 - root - INFO - step: 10010 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0843 +[titan] 2025-10-05 04:44:39,759 - root - INFO - lr: 4.3771e-05 gnorm: 1.10 [ 6:10:30<18:30:03] +[titan] 2025-10-05 04:44:50,583 - root - INFO - step: 10015 loss: 2.4606 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1726 +[titan] 2025-10-05 04:44:50,583 - root - INFO - lr: 4.3765e-05 gnorm: 1.17 [ 6:10:41<18:29:51] +[titan] 2025-10-05 04:45:01,371 - root - INFO - step: 10020 loss: 2.3595 memory: 118.84GiB(85.28%) tps: 30,377 tflops: 421.43 mfu: 42.61% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0816 +[titan] 2025-10-05 04:45:01,371 - root - INFO - lr: 4.3758e-05 gnorm: 1.12 [ 6:10:52<18:29:39] +[titan] 2025-10-05 04:45:12,207 - root - INFO - step: 10025 loss: 2.3890 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1095 +[titan] 2025-10-05 04:45:12,207 - root - INFO - lr: 4.3752e-05 gnorm: 1.13 [ 6:11:03<18:29:27] +[titan] 2025-10-05 04:45:23,056 - root - INFO - step: 10030 loss: 2.4171 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:45:23,056 - root - INFO - lr: 4.3746e-05 gnorm: 1.14 [ 6:11:14<18:29:15] +[titan] 2025-10-05 04:45:33,878 - root - INFO - step: 10035 loss: 2.4258 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1417 +[titan] 2025-10-05 04:45:33,879 - root - INFO - lr: 4.3740e-05 gnorm: 1.18 [ 6:11:24<18:29:03] +[titan] 2025-10-05 04:45:44,722 - root - INFO - step: 10040 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:45:44,722 - root - INFO - lr: 4.3734e-05 gnorm: 1.13 [ 6:11:35<18:28:51] +[titan] 2025-10-05 04:45:55,531 - root - INFO - step: 10045 loss: 2.3962 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.60 mfu: 42.53% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:45:55,531 - root - INFO - lr: 4.3728e-05 gnorm: 1.14 [ 6:11:46<18:28:39] +[titan] 2025-10-05 04:46:04,196 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:46:06,376 - root - INFO - step: 10050 loss: 2.4217 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1372 +[titan] 2025-10-05 04:46:06,376 - root - INFO - lr: 4.3721e-05 gnorm: 1.19 [ 6:11:57<18:28:27] +[titan] 2025-10-05 04:46:17,244 - root - INFO - step: 10055 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1117 +[titan] 2025-10-05 04:46:17,244 - root - INFO - lr: 4.3715e-05 gnorm: 1.09 [ 6:12:08<18:28:16] +[titan] 2025-10-05 04:46:28,093 - root - INFO - step: 10060 loss: 2.4776 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 04:46:28,093 - root - INFO - lr: 4.3709e-05 gnorm: 1.12 [ 6:12:19<18:28:04] +[titan] 2025-10-05 04:46:38,949 - root - INFO - step: 10065 loss: 2.3571 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0814 +[titan] 2025-10-05 04:46:38,949 - root - INFO - lr: 4.3703e-05 gnorm: 1.17 [ 6:12:29<18:27:52] +[titan] 2025-10-05 04:46:49,820 - root - INFO - step: 10070 loss: 2.4101 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:46:49,820 - root - INFO - lr: 4.3697e-05 gnorm: 1.14 [ 6:12:40<18:27:40] +[titan] 2025-10-05 04:47:00,671 - root - INFO - step: 10075 loss: 2.4112 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:47:00,671 - root - INFO - lr: 4.3690e-05 gnorm: 1.17 [ 6:12:51<18:27:28] +[titan] 2025-10-05 04:47:11,530 - root - INFO - step: 10080 loss: 2.3867 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1065 +[titan] 2025-10-05 04:47:11,530 - root - INFO - lr: 4.3684e-05 gnorm: 1.12 [ 6:13:02<18:27:16] +[titan] 2025-10-05 04:47:22,402 - root - INFO - step: 10085 loss: 2.3591 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 04:47:22,403 - root - INFO - lr: 4.3678e-05 gnorm: 1.14 [ 6:13:13<18:27:05] +[titan] 2025-10-05 04:47:33,304 - root - INFO - step: 10090 loss: 2.3953 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1153 +[titan] 2025-10-05 04:47:33,305 - root - INFO - lr: 4.3672e-05 gnorm: 1.12 [ 6:13:24<18:26:53] +[titan] 2025-10-05 04:47:44,169 - root - INFO - step: 10095 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2874 global_avg_mtp_loss: 2.1668 +[titan] 2025-10-05 04:47:44,169 - root - INFO - lr: 4.3666e-05 gnorm: 1.20 [ 6:13:35<18:26:41] +[titan] 2025-10-05 04:47:52,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:47:55,091 - root - INFO - step: 10100 loss: 2.4560 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1684 +[titan] 2025-10-05 04:47:55,091 - root - INFO - lr: 4.3659e-05 gnorm: 1.18 [ 6:13:46<18:26:29] +[titan] 2025-10-05 04:48:05,969 - root - INFO - step: 10105 loss: 2.4312 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:48:05,969 - root - INFO - lr: 4.3653e-05 gnorm: 1.10 [ 6:13:56<18:26:18] +[titan] 2025-10-05 04:48:16,842 - root - INFO - step: 10110 loss: 2.3985 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1175 +[titan] 2025-10-05 04:48:16,842 - root - INFO - lr: 4.3647e-05 gnorm: 1.15 [ 6:14:07<18:26:06] +[titan] 2025-10-05 04:48:27,739 - root - INFO - step: 10115 loss: 2.4183 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1354 +[titan] 2025-10-05 04:48:27,739 - root - INFO - lr: 4.3641e-05 gnorm: 1.11 [ 6:14:18<18:25:54] +[titan] 2025-10-05 04:48:38,638 - root - INFO - step: 10120 loss: 2.3862 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 04:48:38,639 - root - INFO - lr: 4.3635e-05 gnorm: 1.15 [ 6:14:29<18:25:43] +[titan] 2025-10-05 04:48:49,495 - root - INFO - step: 10125 loss: 2.4046 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1229 +[titan] 2025-10-05 04:48:49,495 - root - INFO - lr: 4.3628e-05 gnorm: 1.13 [ 6:14:40<18:25:31] +[titan] 2025-10-05 04:49:00,374 - root - INFO - step: 10130 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 04:49:00,374 - root - INFO - lr: 4.3622e-05 gnorm: 1.12 [ 6:14:51<18:25:19] +[titan] 2025-10-05 04:49:11,231 - root - INFO - step: 10135 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1240 +[titan] 2025-10-05 04:49:11,231 - root - INFO - lr: 4.3616e-05 gnorm: 1.11 [ 6:15:02<18:25:07] +[titan] 2025-10-05 04:49:22,073 - root - INFO - step: 10140 loss: 2.4295 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1448 +[titan] 2025-10-05 04:49:22,073 - root - INFO - lr: 4.3610e-05 gnorm: 1.19 [ 6:15:13<18:24:55] +[titan] 2025-10-05 04:49:32,953 - root - INFO - step: 10145 loss: 2.4182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:49:32,953 - root - INFO - lr: 4.3603e-05 gnorm: 1.13 [ 6:15:23<18:24:43] +[titan] 2025-10-05 04:49:41,628 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:49:43,809 - root - INFO - step: 10150 loss: 2.4033 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1220 +[titan] 2025-10-05 04:49:43,810 - root - INFO - lr: 4.3597e-05 gnorm: 1.18 [ 6:15:34<18:24:32] +[titan] 2025-10-05 04:49:54,722 - root - INFO - step: 10155 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1491 +[titan] 2025-10-05 04:49:54,722 - root - INFO - lr: 4.3591e-05 gnorm: 1.19 [ 6:15:45<18:24:20] +[titan] 2025-10-05 04:50:05,570 - root - INFO - step: 10160 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2925 global_avg_mtp_loss: 2.2000 +[titan] 2025-10-05 04:50:05,570 - root - INFO - lr: 4.3585e-05 gnorm: 1.18 [ 6:15:56<18:24:08] +[titan] 2025-10-05 04:50:16,417 - root - INFO - step: 10165 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 04:50:16,418 - root - INFO - lr: 4.3578e-05 gnorm: 1.15 [ 6:16:07<18:23:56] +[titan] 2025-10-05 04:50:27,286 - root - INFO - step: 10170 loss: 2.4892 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:50:27,286 - root - INFO - lr: 4.3572e-05 gnorm: 1.17 [ 6:16:18<18:23:45] +[titan] 2025-10-05 04:50:38,151 - root - INFO - step: 10175 loss: 2.4728 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1825 +[titan] 2025-10-05 04:50:38,151 - root - INFO - lr: 4.3566e-05 gnorm: 1.14 [ 6:16:29<18:23:33] +[titan] 2025-10-05 04:50:49,013 - root - INFO - step: 10180 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1066 +[titan] 2025-10-05 04:50:49,013 - root - INFO - lr: 4.3560e-05 gnorm: 1.11 [ 6:16:39<18:23:21] +[titan] 2025-10-05 04:50:59,879 - root - INFO - step: 10185 loss: 2.3308 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0576 +[titan] 2025-10-05 04:50:59,880 - root - INFO - lr: 4.3553e-05 gnorm: 1.10 [ 6:16:50<18:23:09] +[titan] 2025-10-05 04:51:10,735 - root - INFO - step: 10190 loss: 2.4005 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1191 +[titan] 2025-10-05 04:51:10,735 - root - INFO - lr: 4.3547e-05 gnorm: 1.12 [ 6:17:01<18:22:57] +[titan] 2025-10-05 04:51:21,605 - root - INFO - step: 10195 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:51:21,605 - root - INFO - lr: 4.3541e-05 gnorm: 1.07 [ 6:17:12<18:22:46] +[titan] 2025-10-05 04:51:30,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:51:32,491 - root - INFO - step: 10200 loss: 2.4592 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1708 +[titan] 2025-10-05 04:51:32,491 - root - INFO - lr: 4.3535e-05 gnorm: 1.19 [ 6:17:23<18:22:34] +[titan] 2025-10-05 04:51:43,357 - root - INFO - step: 10205 loss: 2.3585 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0823 +[titan] 2025-10-05 04:51:43,357 - root - INFO - lr: 4.3528e-05 gnorm: 1.08 [ 6:17:34<18:22:22] +[titan] 2025-10-05 04:51:54,234 - root - INFO - step: 10210 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 04:51:54,234 - root - INFO - lr: 4.3522e-05 gnorm: 1.13 [ 6:17:45<18:22:10] +[titan] 2025-10-05 04:52:05,148 - root - INFO - step: 10215 loss: 2.4224 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1397 +[titan] 2025-10-05 04:52:05,148 - root - INFO - lr: 4.3516e-05 gnorm: 1.15 [ 6:17:56<18:21:59] +[titan] 2025-10-05 04:52:16,012 - root - INFO - step: 10220 loss: 2.3880 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:52:16,012 - root - INFO - lr: 4.3510e-05 gnorm: 1.17 [ 6:18:06<18:21:47] +[titan] 2025-10-05 04:52:26,919 - root - INFO - step: 10225 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0969 +[titan] 2025-10-05 04:52:26,919 - root - INFO - lr: 4.3503e-05 gnorm: 1.13 [ 6:18:17<18:21:35] +[titan] 2025-10-05 04:52:37,795 - root - INFO - step: 10230 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2964 global_avg_mtp_loss: 2.1864 +[titan] 2025-10-05 04:52:37,795 - root - INFO - lr: 4.3497e-05 gnorm: 1.23 [ 6:18:28<18:21:23] +[titan] 2025-10-05 04:52:48,651 - root - INFO - step: 10235 loss: 2.3739 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0960 +[titan] 2025-10-05 04:52:48,651 - root - INFO - lr: 4.3491e-05 gnorm: 1.14 [ 6:18:39<18:21:12] +[titan] 2025-10-05 04:52:59,594 - root - INFO - step: 10240 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.43 mfu: 42.00% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0761 +[titan] 2025-10-05 04:52:59,594 - root - INFO - lr: 4.3485e-05 gnorm: 1.17 [ 6:18:50<18:21:00] +[titan] 2025-10-05 04:52:59,771 - root - INFO - Dumping profiler traces at step 10240 +[titan] 2025-10-05 04:52:59,811 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:53:10,672 - root - INFO - step: 10245 loss: 2.4638 memory: 118.84GiB(85.28%) tps: 29,580 tflops: 410.38 mfu: 41.49% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1752 +[titan] 2025-10-05 04:53:10,672 - root - INFO - lr: 4.3478e-05 gnorm: 1.18 [ 6:19:01<18:20:49] +[titan] 2025-10-05 04:53:19,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:53:21,556 - root - INFO - step: 10250 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0638 +[titan] 2025-10-05 04:53:21,556 - root - INFO - lr: 4.3472e-05 gnorm: 1.18 [ 6:19:12<18:20:37] +[titan] 2025-10-05 04:53:32,460 - root - INFO - step: 10255 loss: 2.3782 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.0997 +[titan] 2025-10-05 04:53:32,460 - root - INFO - lr: 4.3466e-05 gnorm: 1.11 [ 6:19:23<18:20:26] +[titan] 2025-10-05 04:53:43,321 - root - INFO - step: 10260 loss: 2.3383 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 04:53:43,321 - root - INFO - lr: 4.3459e-05 gnorm: 1.16 [ 6:19:34<18:20:14] +[titan] 2025-10-05 04:53:54,178 - root - INFO - step: 10265 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 04:53:54,178 - root - INFO - lr: 4.3453e-05 gnorm: 1.16 [ 6:19:45<18:20:02] +[titan] 2025-10-05 04:54:05,007 - root - INFO - step: 10270 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 04:54:05,007 - root - INFO - lr: 4.3447e-05 gnorm: 1.17 [ 6:19:55<18:19:50] +[titan] 2025-10-05 04:54:15,842 - root - INFO - step: 10275 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:54:15,843 - root - INFO - lr: 4.3440e-05 gnorm: 1.14 [ 6:20:06<18:19:38] +[titan] 2025-10-05 04:54:26,778 - root - INFO - step: 10280 loss: 2.3590 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0825 +[titan] 2025-10-05 04:54:26,778 - root - INFO - lr: 4.3434e-05 gnorm: 1.09 [ 6:20:17<18:19:27] +[titan] 2025-10-05 04:54:37,611 - root - INFO - step: 10285 loss: 2.3467 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 04:54:37,611 - root - INFO - lr: 4.3428e-05 gnorm: 1.17 [ 6:20:28<18:19:15] +[titan] 2025-10-05 04:54:48,457 - root - INFO - step: 10290 loss: 2.3098 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 04:54:48,457 - root - INFO - lr: 4.3422e-05 gnorm: 1.13 [ 6:20:39<18:19:03] +[titan] 2025-10-05 04:54:59,307 - root - INFO - step: 10295 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 04:54:59,307 - root - INFO - lr: 4.3415e-05 gnorm: 1.19 [ 6:20:50<18:18:51] +[titan] 2025-10-05 04:55:07,970 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:55:10,154 - root - INFO - step: 10300 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:55:10,154 - root - INFO - lr: 4.3409e-05 gnorm: 1.11 [ 6:21:01<18:18:39] +[titan] 2025-10-05 04:55:20,995 - root - INFO - step: 10305 loss: 2.4115 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1272 +[titan] 2025-10-05 04:55:20,995 - root - INFO - lr: 4.3403e-05 gnorm: 1.16 [ 6:21:11<18:18:27] +[titan] 2025-10-05 04:55:31,895 - root - INFO - step: 10310 loss: 2.3942 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:55:31,895 - root - INFO - lr: 4.3396e-05 gnorm: 1.11 [ 6:21:22<18:18:16] +[titan] 2025-10-05 04:55:42,797 - root - INFO - step: 10315 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0890 +[titan] 2025-10-05 04:55:42,797 - root - INFO - lr: 4.3390e-05 gnorm: 1.14 [ 6:21:33<18:18:04] +[titan] 2025-10-05 04:55:53,631 - root - INFO - step: 10320 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0675 +[titan] 2025-10-05 04:55:53,631 - root - INFO - lr: 4.3384e-05 gnorm: 1.13 [ 6:21:44<18:17:52] +[titan] 2025-10-05 04:56:04,495 - root - INFO - step: 10325 loss: 2.3236 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 04:56:04,495 - root - INFO - lr: 4.3377e-05 gnorm: 1.11 [ 6:21:55<18:17:41] +[titan] 2025-10-05 04:56:15,368 - root - INFO - step: 10330 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0554 +[titan] 2025-10-05 04:56:15,368 - root - INFO - lr: 4.3371e-05 gnorm: 1.11 [ 6:22:06<18:17:29] +[titan] 2025-10-05 04:56:26,235 - root - INFO - step: 10335 loss: 2.3812 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1021 +[titan] 2025-10-05 04:56:26,235 - root - INFO - lr: 4.3365e-05 gnorm: 1.13 [ 6:22:17<18:17:17] +[titan] 2025-10-05 04:56:37,100 - root - INFO - step: 10340 loss: 2.4139 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 04:56:37,100 - root - INFO - lr: 4.3358e-05 gnorm: 1.15 [ 6:22:28<18:17:05] +[titan] 2025-10-05 04:56:48,014 - root - INFO - step: 10345 loss: 2.3627 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0861 +[titan] 2025-10-05 04:56:48,014 - root - INFO - lr: 4.3352e-05 gnorm: 1.15 [ 6:22:38<18:16:54] +[titan] 2025-10-05 04:56:56,705 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:56:58,888 - root - INFO - step: 10350 loss: 2.3704 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0924 +[titan] 2025-10-05 04:56:58,888 - root - INFO - lr: 4.3346e-05 gnorm: 1.12 [ 6:22:49<18:16:42] +[titan] 2025-10-05 04:57:09,755 - root - INFO - step: 10355 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0403 +[titan] 2025-10-05 04:57:09,755 - root - INFO - lr: 4.3339e-05 gnorm: 1.14 [ 6:23:00<18:16:30] +[titan] 2025-10-05 04:57:20,636 - root - INFO - step: 10360 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1400 +[titan] 2025-10-05 04:57:20,637 - root - INFO - lr: 4.3333e-05 gnorm: 1.16 [ 6:23:11<18:16:18] +[titan] 2025-10-05 04:57:31,521 - root - INFO - step: 10365 loss: 2.3992 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 04:57:31,521 - root - INFO - lr: 4.3327e-05 gnorm: 1.14 [ 6:23:22<18:16:07] +[titan] 2025-10-05 04:57:42,396 - root - INFO - step: 10370 loss: 2.4732 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1823 +[titan] 2025-10-05 04:57:42,396 - root - INFO - lr: 4.3320e-05 gnorm: 1.14 [ 6:23:33<18:15:55] +[titan] 2025-10-05 04:57:53,311 - root - INFO - step: 10375 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1107 +[titan] 2025-10-05 04:57:53,311 - root - INFO - lr: 4.3314e-05 gnorm: 1.17 [ 6:23:44<18:15:43] +[titan] 2025-10-05 04:58:04,191 - root - INFO - step: 10380 loss: 2.3285 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 04:58:04,191 - root - INFO - lr: 4.3308e-05 gnorm: 1.15 [ 6:23:55<18:15:32] +[titan] 2025-10-05 04:58:15,071 - root - INFO - step: 10385 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:58:15,072 - root - INFO - lr: 4.3301e-05 gnorm: 2.89 [ 6:24:05<18:15:20] +[titan] 2025-10-05 04:58:25,961 - root - INFO - step: 10390 loss: 2.4472 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 04:58:25,961 - root - INFO - lr: 4.3295e-05 gnorm: 1.19 [ 6:24:16<18:15:08] +[titan] 2025-10-05 04:58:36,832 - root - INFO - step: 10395 loss: 2.4116 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:58:36,832 - root - INFO - lr: 4.3289e-05 gnorm: 1.19 [ 6:24:27<18:14:57] +[titan] 2025-10-05 04:58:45,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:58:47,732 - root - INFO - step: 10400 loss: 2.3889 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:58:47,733 - root - INFO - lr: 4.3282e-05 gnorm: 1.15 [ 6:24:38<18:14:45] +[titan] 2025-10-05 04:58:58,620 - root - INFO - step: 10405 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1661 +[titan] 2025-10-05 04:58:58,620 - root - INFO - lr: 4.3276e-05 gnorm: 1.15 [ 6:24:49<18:14:33] +[titan] 2025-10-05 04:59:09,537 - root - INFO - step: 10410 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0667 +[titan] 2025-10-05 04:59:09,538 - root - INFO - lr: 4.3270e-05 gnorm: 1.09 [ 6:25:00<18:14:22] +[titan] 2025-10-05 04:59:20,430 - root - INFO - step: 10415 loss: 2.4412 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1531 +[titan] 2025-10-05 04:59:20,430 - root - INFO - lr: 4.3263e-05 gnorm: 1.11 [ 6:25:11<18:14:10] +[titan] 2025-10-05 04:59:31,331 - root - INFO - step: 10420 loss: 2.4559 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1675 +[titan] 2025-10-05 04:59:31,331 - root - INFO - lr: 4.3257e-05 gnorm: 1.18 [ 6:25:22<18:13:58] +[titan] 2025-10-05 04:59:42,198 - root - INFO - step: 10425 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0782 +[titan] 2025-10-05 04:59:42,198 - root - INFO - lr: 4.3250e-05 gnorm: 1.15 [ 6:25:33<18:13:47] +[titan] 2025-10-05 04:59:53,072 - root - INFO - step: 10430 loss: 2.3763 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0987 +[titan] 2025-10-05 04:59:53,072 - root - INFO - lr: 4.3244e-05 gnorm: 1.14 [ 6:25:43<18:13:35] +[titan] 2025-10-05 05:00:03,938 - root - INFO - step: 10435 loss: 2.4170 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2835 global_avg_mtp_loss: 2.1335 +[titan] 2025-10-05 05:00:03,939 - root - INFO - lr: 4.3238e-05 gnorm: 1.15 [ 6:25:54<18:13:23] +[titan] 2025-10-05 05:00:14,820 - root - INFO - step: 10440 loss: 2.4296 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 05:00:14,820 - root - INFO - lr: 4.3231e-05 gnorm: 1.12 [ 6:26:05<18:13:11] +[titan] 2025-10-05 05:00:25,686 - root - INFO - step: 10445 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0958 +[titan] 2025-10-05 05:00:25,686 - root - INFO - lr: 4.3225e-05 gnorm: 1.15 [ 6:26:16<18:13:00] +[titan] 2025-10-05 05:00:34,395 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:00:36,573 - root - INFO - step: 10450 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:00:36,574 - root - INFO - lr: 4.3219e-05 gnorm: 1.12 [ 6:26:27<18:12:48] +[titan] 2025-10-05 05:00:47,453 - root - INFO - step: 10455 loss: 2.2956 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 05:00:47,453 - root - INFO - lr: 4.3212e-05 gnorm: 1.12 [ 6:26:38<18:12:36] +[titan] 2025-10-05 05:00:58,326 - root - INFO - step: 10460 loss: 2.4231 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1381 +[titan] 2025-10-05 05:00:58,326 - root - INFO - lr: 4.3206e-05 gnorm: 1.13 [ 6:26:49<18:12:25] +[titan] 2025-10-05 05:01:09,212 - root - INFO - step: 10465 loss: 2.3984 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1166 +[titan] 2025-10-05 05:01:09,212 - root - INFO - lr: 4.3199e-05 gnorm: 1.16 [ 6:27:00<18:12:13] +[titan] 2025-10-05 05:01:20,082 - root - INFO - step: 10470 loss: 2.3857 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1068 +[titan] 2025-10-05 05:01:20,083 - root - INFO - lr: 4.3193e-05 gnorm: 1.17 [ 6:27:10<18:12:01] +[titan] 2025-10-05 05:01:31,025 - root - INFO - step: 10475 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0845 +[titan] 2025-10-05 05:01:31,025 - root - INFO - lr: 4.3187e-05 gnorm: 1.18 [ 6:27:21<18:11:50] +[titan] 2025-10-05 05:01:41,900 - root - INFO - step: 10480 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1605 +[titan] 2025-10-05 05:01:41,900 - root - INFO - lr: 4.3180e-05 gnorm: 1.14 [ 6:27:32<18:11:38] +[titan] 2025-10-05 05:01:52,794 - root - INFO - step: 10485 loss: 2.3469 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0715 +[titan] 2025-10-05 05:01:52,794 - root - INFO - lr: 4.3174e-05 gnorm: 1.11 [ 6:27:43<18:11:26] +[titan] 2025-10-05 05:02:03,640 - root - INFO - step: 10490 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:02:03,641 - root - INFO - lr: 4.3167e-05 gnorm: 1.12 [ 6:27:54<18:11:14] +[titan] 2025-10-05 05:02:14,499 - root - INFO - step: 10495 loss: 2.4247 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2861 global_avg_mtp_loss: 2.1386 +[titan] 2025-10-05 05:02:14,499 - root - INFO - lr: 4.3161e-05 gnorm: 1.11 [ 6:28:05<18:11:03] +[titan] 2025-10-05 05:02:23,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:02:25,357 - root - INFO - step: 10500 loss: 2.3813 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1019 +[titan] 2025-10-05 05:02:25,357 - root - INFO - lr: 4.3155e-05 gnorm: 1.11 [ 6:28:16<18:10:51] +[titan] 2025-10-05 05:02:36,309 - root - INFO - step: 10505 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1463 +[titan] 2025-10-05 05:02:36,310 - root - INFO - lr: 4.3148e-05 gnorm: 1.31 [ 6:28:27<18:10:39] +[titan] 2025-10-05 05:02:47,169 - root - INFO - step: 10510 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0752 +[titan] 2025-10-05 05:02:47,169 - root - INFO - lr: 4.3142e-05 gnorm: 1.12 [ 6:28:38<18:10:28] +[titan] 2025-10-05 05:02:58,035 - root - INFO - step: 10515 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1262 +[titan] 2025-10-05 05:02:58,035 - root - INFO - lr: 4.3135e-05 gnorm: 1.20 [ 6:28:48<18:10:16] +[titan] 2025-10-05 05:03:08,894 - root - INFO - step: 10520 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0523 +[titan] 2025-10-05 05:03:08,894 - root - INFO - lr: 4.3129e-05 gnorm: 1.10 [ 6:28:59<18:10:04] +[titan] 2025-10-05 05:03:19,768 - root - INFO - step: 10525 loss: 2.4870 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 05:03:19,768 - root - INFO - lr: 4.3122e-05 gnorm: 1.18 [ 6:29:10<18:09:52] +[titan] 2025-10-05 05:03:30,631 - root - INFO - step: 10530 loss: 2.3951 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:03:30,631 - root - INFO - lr: 4.3116e-05 gnorm: 1.13 [ 6:29:21<18:09:41] +[titan] 2025-10-05 05:03:41,571 - root - INFO - step: 10535 loss: 2.3677 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:03:41,571 - root - INFO - lr: 4.3110e-05 gnorm: 1.19 [ 6:29:32<18:09:29] +[titan] 2025-10-05 05:03:52,432 - root - INFO - step: 10540 loss: 2.4252 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1411 +[titan] 2025-10-05 05:03:52,432 - root - INFO - lr: 4.3103e-05 gnorm: 1.19 [ 6:29:43<18:09:17] +[titan] 2025-10-05 05:04:03,276 - root - INFO - step: 10545 loss: 2.4280 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1435 +[titan] 2025-10-05 05:04:03,277 - root - INFO - lr: 4.3097e-05 gnorm: 1.16 [ 6:29:54<18:09:06] +[titan] 2025-10-05 05:04:11,963 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:04:14,149 - root - INFO - step: 10550 loss: 2.2936 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0258 +[titan] 2025-10-05 05:04:14,149 - root - INFO - lr: 4.3090e-05 gnorm: 1.14 [ 6:30:05<18:08:54] +[titan] 2025-10-05 05:04:25,007 - root - INFO - step: 10555 loss: 2.3687 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0910 +[titan] 2025-10-05 05:04:25,007 - root - INFO - lr: 4.3084e-05 gnorm: 1.18 [ 6:30:15<18:08:42] +[titan] 2025-10-05 05:04:35,912 - root - INFO - step: 10560 loss: 2.4093 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1275 +[titan] 2025-10-05 05:04:35,912 - root - INFO - lr: 4.3077e-05 gnorm: 1.23 [ 6:30:26<18:08:31] +[titan] 2025-10-05 05:04:46,752 - root - INFO - step: 10565 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1548 +[titan] 2025-10-05 05:04:46,752 - root - INFO - lr: 4.3071e-05 gnorm: 1.10 [ 6:30:37<18:08:19] +[titan] 2025-10-05 05:04:57,630 - root - INFO - step: 10570 loss: 2.3849 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1052 +[titan] 2025-10-05 05:04:57,630 - root - INFO - lr: 4.3065e-05 gnorm: 1.13 [ 6:30:48<18:08:07] +[titan] 2025-10-05 05:05:08,469 - root - INFO - step: 10575 loss: 2.4749 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1842 +[titan] 2025-10-05 05:05:08,469 - root - INFO - lr: 4.3058e-05 gnorm: 1.19 [ 6:30:59<18:07:55] +[titan] 2025-10-05 05:05:19,334 - root - INFO - step: 10580 loss: 2.3851 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1046 +[titan] 2025-10-05 05:05:19,334 - root - INFO - lr: 4.3052e-05 gnorm: 1.12 [ 6:31:10<18:07:44] +[titan] 2025-10-05 05:05:30,220 - root - INFO - step: 10585 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0900 +[titan] 2025-10-05 05:05:30,221 - root - INFO - lr: 4.3045e-05 gnorm: 1.17 [ 6:31:21<18:07:32] +[titan] 2025-10-05 05:05:41,134 - root - INFO - step: 10590 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0866 +[titan] 2025-10-05 05:05:41,134 - root - INFO - lr: 4.3039e-05 gnorm: 1.10 [ 6:31:32<18:07:20] +[titan] 2025-10-05 05:05:51,981 - root - INFO - step: 10595 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 05:05:51,981 - root - INFO - lr: 4.3032e-05 gnorm: 1.13 [ 6:31:42<18:07:09] +[titan] 2025-10-05 05:06:00,680 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:06:02,853 - root - INFO - step: 10600 loss: 2.4272 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1436 +[titan] 2025-10-05 05:06:02,853 - root - INFO - lr: 4.3026e-05 gnorm: 1.13 [ 6:31:53<18:06:57] +[titan] 2025-10-05 05:06:13,702 - root - INFO - step: 10605 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1154 +[titan] 2025-10-05 05:06:13,702 - root - INFO - lr: 4.3019e-05 gnorm: 1.18 [ 6:32:04<18:06:45] +[titan] 2025-10-05 05:06:24,546 - root - INFO - step: 10610 loss: 2.4439 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 05:06:24,547 - root - INFO - lr: 4.3013e-05 gnorm: 1.17 [ 6:32:15<18:06:33] +[titan] 2025-10-05 05:06:35,421 - root - INFO - step: 10615 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0903 +[titan] 2025-10-05 05:06:35,422 - root - INFO - lr: 4.3006e-05 gnorm: 1.10 [ 6:32:26<18:06:22] +[titan] 2025-10-05 05:06:46,307 - root - INFO - step: 10620 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 05:06:46,307 - root - INFO - lr: 4.3000e-05 gnorm: 1.15 [ 6:32:37<18:06:10] +[titan] 2025-10-05 05:06:57,167 - root - INFO - step: 10625 loss: 2.3874 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1077 +[titan] 2025-10-05 05:06:57,168 - root - INFO - lr: 4.2993e-05 gnorm: 1.16 [ 6:32:48<18:05:58] +[titan] 2025-10-05 05:07:08,027 - root - INFO - step: 10630 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0719 +[titan] 2025-10-05 05:07:08,027 - root - INFO - lr: 4.2987e-05 gnorm: 1.17 [ 6:32:58<18:05:47] +[titan] 2025-10-05 05:07:18,912 - root - INFO - step: 10635 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0503 +[titan] 2025-10-05 05:07:18,912 - root - INFO - lr: 4.2981e-05 gnorm: 1.11 [ 6:33:09<18:05:35] +[titan] 2025-10-05 05:07:29,770 - root - INFO - step: 10640 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0909 +[titan] 2025-10-05 05:07:29,770 - root - INFO - lr: 4.2974e-05 gnorm: 1.11 [ 6:33:20<18:05:23] +[titan] 2025-10-05 05:07:40,642 - root - INFO - step: 10645 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1251 +[titan] 2025-10-05 05:07:40,642 - root - INFO - lr: 4.2968e-05 gnorm: 1.15 [ 6:33:31<18:05:11] +[titan] 2025-10-05 05:07:49,320 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:07:51,513 - root - INFO - step: 10650 loss: 2.3800 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 05:07:51,513 - root - INFO - lr: 4.2961e-05 gnorm: 1.13 [ 6:33:42<18:05:00] +[titan] 2025-10-05 05:08:02,386 - root - INFO - step: 10655 loss: 2.2876 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0198 +[titan] 2025-10-05 05:08:02,387 - root - INFO - lr: 4.2955e-05 gnorm: 1.11 [ 6:33:53<18:04:48] +[titan] 2025-10-05 05:08:13,251 - root - INFO - step: 10660 loss: 2.3831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 05:08:13,252 - root - INFO - lr: 4.2948e-05 gnorm: 1.14 [ 6:34:04<18:04:36] +[titan] 2025-10-05 05:08:24,145 - root - INFO - step: 10665 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 05:08:24,145 - root - INFO - lr: 4.2942e-05 gnorm: 1.11 [ 6:34:15<18:04:25] +[titan] 2025-10-05 05:08:34,996 - root - INFO - step: 10670 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 05:08:34,996 - root - INFO - lr: 4.2935e-05 gnorm: 1.10 [ 6:34:25<18:04:13] +[titan] 2025-10-05 05:08:45,876 - root - INFO - step: 10675 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1231 +[titan] 2025-10-05 05:08:45,876 - root - INFO - lr: 4.2929e-05 gnorm: 1.11 [ 6:34:36<18:04:01] +[titan] 2025-10-05 05:08:56,738 - root - INFO - step: 10680 loss: 2.4221 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1374 +[titan] 2025-10-05 05:08:56,738 - root - INFO - lr: 4.2922e-05 gnorm: 1.12 [ 6:34:47<18:03:50] +[titan] 2025-10-05 05:09:07,575 - root - INFO - step: 10685 loss: 2.4893 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1990 +[titan] 2025-10-05 05:09:07,575 - root - INFO - lr: 4.2916e-05 gnorm: 1.14 [ 6:34:58<18:03:38] +[titan] 2025-10-05 05:09:18,438 - root - INFO - step: 10690 loss: 2.3907 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1112 +[titan] 2025-10-05 05:09:18,438 - root - INFO - lr: 4.2909e-05 gnorm: 1.15 [ 6:35:09<18:03:26] +[titan] 2025-10-05 05:09:29,320 - root - INFO - step: 10695 loss: 2.3485 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0733 +[titan] 2025-10-05 05:09:29,320 - root - INFO - lr: 4.2903e-05 gnorm: 1.12 [ 6:35:20<18:03:14] +[titan] 2025-10-05 05:09:38,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:09:40,188 - root - INFO - step: 10700 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0915 +[titan] 2025-10-05 05:09:40,188 - root - INFO - lr: 4.2896e-05 gnorm: 1.13 [ 6:35:31<18:03:03] +[titan] 2025-10-05 05:09:51,053 - root - INFO - step: 10705 loss: 2.4598 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1721 +[titan] 2025-10-05 05:09:51,054 - root - INFO - lr: 4.2890e-05 gnorm: 1.14 [ 6:35:41<18:02:51] +[titan] 2025-10-05 05:10:01,930 - root - INFO - step: 10710 loss: 2.4459 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 05:10:01,930 - root - INFO - lr: 4.2883e-05 gnorm: 1.13 [ 6:35:52<18:02:39] +[titan] 2025-10-05 05:10:12,779 - root - INFO - step: 10715 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:10:12,779 - root - INFO - lr: 4.2877e-05 gnorm: 1.10 [ 6:36:03<18:02:28] +[titan] 2025-10-05 05:10:23,641 - root - INFO - step: 10720 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0671 +[titan] 2025-10-05 05:10:23,641 - root - INFO - lr: 4.2870e-05 gnorm: 1.07 [ 6:36:14<18:02:16] +[titan] 2025-10-05 05:10:34,518 - root - INFO - step: 10725 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 05:10:34,518 - root - INFO - lr: 4.2864e-05 gnorm: 1.07 [ 6:36:25<18:02:04] +[titan] 2025-10-05 05:10:45,426 - root - INFO - step: 10730 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0982 +[titan] 2025-10-05 05:10:45,426 - root - INFO - lr: 4.2857e-05 gnorm: 1.17 [ 6:36:36<18:01:53] +[titan] 2025-10-05 05:10:56,306 - root - INFO - step: 10735 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 05:10:56,306 - root - INFO - lr: 4.2851e-05 gnorm: 1.12 [ 6:36:47<18:01:41] +[titan] 2025-10-05 05:11:07,161 - root - INFO - step: 10740 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:11:07,161 - root - INFO - lr: 4.2844e-05 gnorm: 1.17 [ 6:36:58<18:01:29] +[titan] 2025-10-05 05:11:18,031 - root - INFO - step: 10745 loss: 2.3429 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0692 +[titan] 2025-10-05 05:11:18,031 - root - INFO - lr: 4.2837e-05 gnorm: 1.13 [ 6:37:08<18:01:18] +[titan] 2025-10-05 05:11:26,767 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:11:28,948 - root - INFO - step: 10750 loss: 2.2983 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 05:11:28,948 - root - INFO - lr: 4.2831e-05 gnorm: 1.14 [ 6:37:19<18:01:06] +[titan] 2025-10-05 05:11:33,469 - root - INFO - Dumping profiler traces at step 10752 +[titan] 2025-10-05 05:11:33,511 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:11:40,090 - root - INFO - step: 10755 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 29,411 tflops: 408.03 mfu: 41.26% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 05:11:40,090 - root - INFO - lr: 4.2824e-05 gnorm: 1.14 [ 6:37:30<18:00:55] +[titan] 2025-10-05 05:11:50,993 - root - INFO - step: 10760 loss: 2.3455 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0704 +[titan] 2025-10-05 05:11:50,993 - root - INFO - lr: 4.2818e-05 gnorm: 1.14 [ 6:37:41<18:00:43] +[titan] 2025-10-05 05:12:01,856 - root - INFO - step: 10765 loss: 2.3069 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0370 +[titan] 2025-10-05 05:12:01,857 - root - INFO - lr: 4.2811e-05 gnorm: 1.12 [ 6:37:52<18:00:32] +[titan] 2025-10-05 05:12:12,697 - root - INFO - step: 10770 loss: 2.3339 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 05:12:12,697 - root - INFO - lr: 4.2805e-05 gnorm: 1.09 [ 6:38:03<18:00:20] +[titan] 2025-10-05 05:12:23,573 - root - INFO - step: 10775 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1096 +[titan] 2025-10-05 05:12:23,573 - root - INFO - lr: 4.2798e-05 gnorm: 1.09 [ 6:38:14<18:00:08] +[titan] 2025-10-05 05:12:34,428 - root - INFO - step: 10780 loss: 2.2969 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0279 +[titan] 2025-10-05 05:12:34,428 - root - INFO - lr: 4.2792e-05 gnorm: 1.09 [ 6:38:25<17:59:57] +[titan] 2025-10-05 05:12:45,414 - root - INFO - step: 10785 loss: 2.3471 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 05:12:45,414 - root - INFO - lr: 4.2785e-05 gnorm: 1.13 [ 6:38:36<17:59:45] +[titan] 2025-10-05 05:12:56,296 - root - INFO - step: 10790 loss: 2.3752 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0968 +[titan] 2025-10-05 05:12:56,297 - root - INFO - lr: 4.2779e-05 gnorm: 1.12 [ 6:38:47<17:59:34] +[titan] 2025-10-05 05:13:07,167 - root - INFO - step: 10795 loss: 2.3683 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:13:07,167 - root - INFO - lr: 4.2772e-05 gnorm: 1.15 [ 6:38:58<17:59:22] +[titan] 2025-10-05 05:13:15,828 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:13:18,033 - root - INFO - step: 10800 loss: 2.3892 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1094 +[titan] 2025-10-05 05:13:18,033 - root - INFO - lr: 4.2765e-05 gnorm: 1.12 [ 6:39:08<17:59:10] +[titan] 2025-10-05 05:13:28,909 - root - INFO - step: 10805 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0879 +[titan] 2025-10-05 05:13:28,909 - root - INFO - lr: 4.2759e-05 gnorm: 1.13 [ 6:39:19<17:58:59] +[titan] 2025-10-05 05:13:39,766 - root - INFO - step: 10810 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 05:13:39,766 - root - INFO - lr: 4.2752e-05 gnorm: 1.11 [ 6:39:30<17:58:47] +[titan] 2025-10-05 05:13:50,697 - root - INFO - step: 10815 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1334 +[titan] 2025-10-05 05:13:50,697 - root - INFO - lr: 4.2746e-05 gnorm: 1.13 [ 6:39:41<17:58:35] +[titan] 2025-10-05 05:14:01,553 - root - INFO - step: 10820 loss: 2.3463 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:14:01,554 - root - INFO - lr: 4.2739e-05 gnorm: 1.09 [ 6:39:52<17:58:24] +[titan] 2025-10-05 05:14:12,442 - root - INFO - step: 10825 loss: 2.3705 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0925 +[titan] 2025-10-05 05:14:12,442 - root - INFO - lr: 4.2733e-05 gnorm: 1.17 [ 6:40:03<17:58:12] +[titan] 2025-10-05 05:14:23,285 - root - INFO - step: 10830 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:14:23,285 - root - INFO - lr: 4.2726e-05 gnorm: 1.13 [ 6:40:14<17:58:00] +[titan] 2025-10-05 05:14:34,165 - root - INFO - step: 10835 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 05:14:34,166 - root - INFO - lr: 4.2720e-05 gnorm: 1.16 [ 6:40:25<17:57:49] +[titan] 2025-10-05 05:14:45,051 - root - INFO - step: 10840 loss: 2.3728 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0952 +[titan] 2025-10-05 05:14:45,052 - root - INFO - lr: 4.2713e-05 gnorm: 1.13 [ 6:40:35<17:57:37] +[titan] 2025-10-05 05:14:55,878 - root - INFO - step: 10845 loss: 2.4128 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 05:14:55,878 - root - INFO - lr: 4.2706e-05 gnorm: 1.10 [ 6:40:46<17:57:25] +[titan] 2025-10-05 05:15:04,525 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:15:06,705 - root - INFO - step: 10850 loss: 2.3718 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:15:06,705 - root - INFO - lr: 4.2700e-05 gnorm: 1.12 [ 6:40:57<17:57:13] +[titan] 2025-10-05 05:15:17,575 - root - INFO - step: 10855 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0802 +[titan] 2025-10-05 05:15:17,576 - root - INFO - lr: 4.2693e-05 gnorm: 1.14 [ 6:41:08<17:57:02] +[titan] 2025-10-05 05:15:28,456 - root - INFO - step: 10860 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0820 +[titan] 2025-10-05 05:15:28,456 - root - INFO - lr: 4.2687e-05 gnorm: 1.13 [ 6:41:19<17:56:50] +[titan] 2025-10-05 05:15:39,313 - root - INFO - step: 10865 loss: 2.4256 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 05:15:39,313 - root - INFO - lr: 4.2680e-05 gnorm: 1.10 [ 6:41:30<17:56:38] +[titan] 2025-10-05 05:15:50,205 - root - INFO - step: 10870 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 05:15:50,205 - root - INFO - lr: 4.2673e-05 gnorm: 1.13 [ 6:41:41<17:56:27] +[titan] 2025-10-05 05:16:01,082 - root - INFO - step: 10875 loss: 2.3634 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:16:01,082 - root - INFO - lr: 4.2667e-05 gnorm: 1.15 [ 6:41:51<17:56:15] +[titan] 2025-10-05 05:16:11,946 - root - INFO - step: 10880 loss: 2.3075 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 05:16:11,946 - root - INFO - lr: 4.2660e-05 gnorm: 1.14 [ 6:42:02<17:56:04] +[titan] 2025-10-05 05:16:22,841 - root - INFO - step: 10885 loss: 2.4065 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1258 +[titan] 2025-10-05 05:16:22,841 - root - INFO - lr: 4.2654e-05 gnorm: 1.21 [ 6:42:13<17:55:52] +[titan] 2025-10-05 05:16:33,734 - root - INFO - step: 10890 loss: 2.3635 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0864 +[titan] 2025-10-05 05:16:33,734 - root - INFO - lr: 4.2647e-05 gnorm: 1.10 [ 6:42:24<17:55:40] +[titan] 2025-10-05 05:16:44,609 - root - INFO - step: 10895 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1223 +[titan] 2025-10-05 05:16:44,609 - root - INFO - lr: 4.2640e-05 gnorm: 1.11 [ 6:42:35<17:55:29] +[titan] 2025-10-05 05:16:53,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:16:55,473 - root - INFO - step: 10900 loss: 2.3494 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0759 +[titan] 2025-10-05 05:16:55,473 - root - INFO - lr: 4.2634e-05 gnorm: 1.15 [ 6:42:46<17:55:17] +[titan] 2025-10-05 05:17:06,345 - root - INFO - step: 10905 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 05:17:06,345 - root - INFO - lr: 4.2627e-05 gnorm: 1.13 [ 6:42:57<17:55:05] +[titan] 2025-10-05 05:17:17,231 - root - INFO - step: 10910 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0659 +[titan] 2025-10-05 05:17:17,231 - root - INFO - lr: 4.2621e-05 gnorm: 1.17 [ 6:43:08<17:54:54] +[titan] 2025-10-05 05:17:28,109 - root - INFO - step: 10915 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0894 +[titan] 2025-10-05 05:17:28,110 - root - INFO - lr: 4.2614e-05 gnorm: 1.19 [ 6:43:18<17:54:42] +[titan] 2025-10-05 05:17:39,014 - root - INFO - step: 10920 loss: 2.3277 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:17:39,014 - root - INFO - lr: 4.2607e-05 gnorm: 1.14 [ 6:43:29<17:54:31] +[titan] 2025-10-05 05:17:49,944 - root - INFO - step: 10925 loss: 2.3202 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0487 +[titan] 2025-10-05 05:17:49,944 - root - INFO - lr: 4.2601e-05 gnorm: 1.12 [ 6:43:40<17:54:19] +[titan] 2025-10-05 05:18:00,806 - root - INFO - step: 10930 loss: 2.3343 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0611 +[titan] 2025-10-05 05:18:00,807 - root - INFO - lr: 4.2594e-05 gnorm: 1.12 [ 6:43:51<17:54:07] +[titan] 2025-10-05 05:18:11,668 - root - INFO - step: 10935 loss: 2.4012 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1192 +[titan] 2025-10-05 05:18:11,669 - root - INFO - lr: 4.2588e-05 gnorm: 1.13 [ 6:44:02<17:53:56] +[titan] 2025-10-05 05:18:22,533 - root - INFO - step: 10940 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:18:22,533 - root - INFO - lr: 4.2581e-05 gnorm: 1.10 [ 6:44:13<17:53:44] +[titan] 2025-10-05 05:18:33,393 - root - INFO - step: 10945 loss: 2.3284 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0574 +[titan] 2025-10-05 05:18:33,393 - root - INFO - lr: 4.2574e-05 gnorm: 1.16 [ 6:44:24<17:53:32] +[titan] 2025-10-05 05:18:42,068 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:18:44,246 - root - INFO - step: 10950 loss: 2.3482 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0732 +[titan] 2025-10-05 05:18:44,246 - root - INFO - lr: 4.2568e-05 gnorm: 1.17 [ 6:44:35<17:53:21] +[titan] 2025-10-05 05:18:55,149 - root - INFO - step: 10955 loss: 2.4275 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 05:18:55,149 - root - INFO - lr: 4.2561e-05 gnorm: 1.19 [ 6:44:46<17:53:09] +[titan] 2025-10-05 05:19:06,006 - root - INFO - step: 10960 loss: 2.3559 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 05:19:06,006 - root - INFO - lr: 4.2554e-05 gnorm: 1.17 [ 6:44:56<17:52:57] +[titan] 2025-10-05 05:19:16,844 - root - INFO - step: 10965 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0690 +[titan] 2025-10-05 05:19:16,844 - root - INFO - lr: 4.2548e-05 gnorm: 1.13 [ 6:45:07<17:52:46] +[titan] 2025-10-05 05:19:27,707 - root - INFO - step: 10970 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0665 +[titan] 2025-10-05 05:19:27,707 - root - INFO - lr: 4.2541e-05 gnorm: 1.11 [ 6:45:18<17:52:34] +[titan] 2025-10-05 05:19:38,565 - root - INFO - step: 10975 loss: 2.4017 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1202 +[titan] 2025-10-05 05:19:38,565 - root - INFO - lr: 4.2535e-05 gnorm: 1.13 [ 6:45:29<17:52:22] +[titan] 2025-10-05 05:19:49,430 - root - INFO - step: 10980 loss: 2.3707 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0929 +[titan] 2025-10-05 05:19:49,430 - root - INFO - lr: 4.2528e-05 gnorm: 1.14 [ 6:45:40<17:52:11] +[titan] 2025-10-05 05:20:00,329 - root - INFO - step: 10985 loss: 2.3910 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 05:20:00,329 - root - INFO - lr: 4.2521e-05 gnorm: 1.11 [ 6:45:51<17:51:59] +[titan] 2025-10-05 05:20:11,199 - root - INFO - step: 10990 loss: 2.2943 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 05:20:11,199 - root - INFO - lr: 4.2515e-05 gnorm: 1.15 [ 6:46:02<17:51:47] +[titan] 2025-10-05 05:20:22,060 - root - INFO - step: 10995 loss: 2.4220 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1390 +[titan] 2025-10-05 05:20:22,060 - root - INFO - lr: 4.2508e-05 gnorm: 1.17 [ 6:46:12<17:51:36] +[titan] 2025-10-05 05:20:30,769 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:20:32,950 - root - INFO - step: 11000 loss: 2.4329 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 05:20:32,950 - root - INFO - lr: 4.2501e-05 gnorm: 1.13 [ 6:46:23<17:51:24] +[titan] 2025-10-05 05:20:43,793 - root - INFO - step: 11005 loss: 2.3674 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0912 +[titan] 2025-10-05 05:20:43,793 - root - INFO - lr: 4.2495e-05 gnorm: 1.13 [ 6:46:34<17:51:12] +[titan] 2025-10-05 05:20:54,676 - root - INFO - step: 11010 loss: 2.3859 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.1074 +[titan] 2025-10-05 05:20:54,677 - root - INFO - lr: 4.2488e-05 gnorm: 1.23 [ 6:46:45<17:51:01] +[titan] 2025-10-05 05:21:05,537 - root - INFO - step: 11015 loss: 2.4219 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 05:21:05,537 - root - INFO - lr: 4.2481e-05 gnorm: 1.14 [ 6:46:56<17:50:49] +[titan] 2025-10-05 05:21:16,444 - root - INFO - step: 11020 loss: 2.3693 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0921 +[titan] 2025-10-05 05:21:16,444 - root - INFO - lr: 4.2475e-05 gnorm: 1.15 [ 6:47:07<17:50:38] +[titan] 2025-10-05 05:21:27,322 - root - INFO - step: 11025 loss: 2.4120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1303 +[titan] 2025-10-05 05:21:27,323 - root - INFO - lr: 4.2468e-05 gnorm: 1.14 [ 6:47:18<17:50:26] +[titan] 2025-10-05 05:21:38,201 - root - INFO - step: 11030 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2721 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:21:38,202 - root - INFO - lr: 4.2461e-05 gnorm: 1.11 [ 6:47:29<17:50:14] +[titan] 2025-10-05 05:21:49,263 - root - INFO - step: 11035 loss: 2.3662 memory: 118.84GiB(85.28%) tps: 29,623 tflops: 410.98 mfu: 41.55% global_avg_ntp_loss: 0.2773 global_avg_mtp_loss: 2.0889 +[titan] 2025-10-05 05:21:49,264 - root - INFO - lr: 4.2455e-05 gnorm: 1.06 [ 6:47:40<17:50:03] +[titan] 2025-10-05 05:22:00,112 - root - INFO - step: 11040 loss: 2.3713 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0938 +[titan] 2025-10-05 05:22:00,112 - root - INFO - lr: 4.2448e-05 gnorm: 1.16 [ 6:47:50<17:49:51] +[titan] 2025-10-05 05:22:10,978 - root - INFO - step: 11045 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0794 +[titan] 2025-10-05 05:22:10,978 - root - INFO - lr: 4.2441e-05 gnorm: 1.12 [ 6:48:01<17:49:40] +[titan] 2025-10-05 05:22:19,680 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:22:21,866 - root - INFO - step: 11050 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:22:21,866 - root - INFO - lr: 4.2435e-05 gnorm: 1.18 [ 6:48:12<17:49:28] +[titan] 2025-10-05 05:22:32,725 - root - INFO - step: 11055 loss: 2.4619 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1734 +[titan] 2025-10-05 05:22:32,725 - root - INFO - lr: 4.2428e-05 gnorm: 1.17 [ 6:48:23<17:49:17] +[titan] 2025-10-05 05:22:43,603 - root - INFO - step: 11060 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 05:22:43,604 - root - INFO - lr: 4.2421e-05 gnorm: 1.18 [ 6:48:34<17:49:05] +[titan] 2025-10-05 05:22:54,557 - root - INFO - step: 11065 loss: 2.3059 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0363 +[titan] 2025-10-05 05:22:54,558 - root - INFO - lr: 4.2415e-05 gnorm: 1.11 [ 6:48:45<17:48:54] +[titan] 2025-10-05 05:23:05,447 - root - INFO - step: 11070 loss: 2.3833 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1048 +[titan] 2025-10-05 05:23:05,447 - root - INFO - lr: 4.2408e-05 gnorm: 1.15 [ 6:48:56<17:48:42] +[titan] 2025-10-05 05:23:16,319 - root - INFO - step: 11075 loss: 2.3472 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:23:16,319 - root - INFO - lr: 4.2401e-05 gnorm: 1.12 [ 6:49:07<17:48:30] +[titan] 2025-10-05 05:23:27,231 - root - INFO - step: 11080 loss: 2.3159 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0451 +[titan] 2025-10-05 05:23:27,231 - root - INFO - lr: 4.2395e-05 gnorm: 1.15 [ 6:49:18<17:48:19] +[titan] 2025-10-05 05:23:38,120 - root - INFO - step: 11085 loss: 2.3918 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 05:23:38,120 - root - INFO - lr: 4.2388e-05 gnorm: 1.10 [ 6:49:28<17:48:07] +[titan] 2025-10-05 05:23:48,999 - root - INFO - step: 11090 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:23:48,999 - root - INFO - lr: 4.2381e-05 gnorm: 1.12 [ 6:49:39<17:47:56] +[titan] 2025-10-05 05:23:59,936 - root - INFO - step: 11095 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:23:59,936 - root - INFO - lr: 4.2375e-05 gnorm: 1.15 [ 6:49:50<17:47:44] +[titan] 2025-10-05 05:24:08,638 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:24:10,828 - root - INFO - step: 11100 loss: 2.3700 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:24:10,828 - root - INFO - lr: 4.2368e-05 gnorm: 1.16 [ 6:50:01<17:47:33] +[titan] 2025-10-05 05:24:21,716 - root - INFO - step: 11105 loss: 2.3080 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 05:24:21,716 - root - INFO - lr: 4.2361e-05 gnorm: 1.11 [ 6:50:12<17:47:21] +[titan] 2025-10-05 05:24:32,601 - root - INFO - step: 11110 loss: 2.3389 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0656 +[titan] 2025-10-05 05:24:32,602 - root - INFO - lr: 4.2354e-05 gnorm: 1.18 [ 6:50:23<17:47:09] +[titan] 2025-10-05 05:24:43,497 - root - INFO - step: 11115 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:24:43,497 - root - INFO - lr: 4.2348e-05 gnorm: 1.16 [ 6:50:34<17:46:58] +[titan] 2025-10-05 05:24:54,382 - root - INFO - step: 11120 loss: 2.3434 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0676 +[titan] 2025-10-05 05:24:54,382 - root - INFO - lr: 4.2341e-05 gnorm: 1.17 [ 6:50:45<17:46:46] +[titan] 2025-10-05 05:25:05,236 - root - INFO - step: 11125 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:25:05,236 - root - INFO - lr: 4.2334e-05 gnorm: 1.14 [ 6:50:56<17:46:35] +[titan] 2025-10-05 05:25:16,090 - root - INFO - step: 11130 loss: 2.3586 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0834 +[titan] 2025-10-05 05:25:16,091 - root - INFO - lr: 4.2328e-05 gnorm: 1.10 [ 6:51:06<17:46:23] +[titan] 2025-10-05 05:25:26,938 - root - INFO - step: 11135 loss: 2.3923 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 05:25:26,939 - root - INFO - lr: 4.2321e-05 gnorm: 1.15 [ 6:51:17<17:46:11] +[titan] 2025-10-05 05:25:37,783 - root - INFO - step: 11140 loss: 2.3864 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 05:25:37,783 - root - INFO - lr: 4.2314e-05 gnorm: 1.15 [ 6:51:28<17:45:59] +[titan] 2025-10-05 05:25:48,642 - root - INFO - step: 11145 loss: 2.3257 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0532 +[titan] 2025-10-05 05:25:48,642 - root - INFO - lr: 4.2307e-05 gnorm: 1.12 [ 6:51:39<17:45:48] +[titan] 2025-10-05 05:25:57,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:25:59,544 - root - INFO - step: 11150 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0565 +[titan] 2025-10-05 05:25:59,544 - root - INFO - lr: 4.2301e-05 gnorm: 1.12 [ 6:51:50<17:45:36] +[titan] 2025-10-05 05:26:10,397 - root - INFO - step: 11155 loss: 2.3187 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0478 +[titan] 2025-10-05 05:26:10,397 - root - INFO - lr: 4.2294e-05 gnorm: 1.08 [ 6:52:01<17:45:25] +[titan] 2025-10-05 05:26:21,273 - root - INFO - step: 11160 loss: 2.3623 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0869 +[titan] 2025-10-05 05:26:21,273 - root - INFO - lr: 4.2287e-05 gnorm: 1.14 [ 6:52:12<17:45:13] +[titan] 2025-10-05 05:26:32,142 - root - INFO - step: 11165 loss: 2.3541 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:26:32,142 - root - INFO - lr: 4.2281e-05 gnorm: 1.13 [ 6:52:22<17:45:01] +[titan] 2025-10-05 05:26:43,035 - root - INFO - step: 11170 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 05:26:43,035 - root - INFO - lr: 4.2274e-05 gnorm: 1.13 [ 6:52:33<17:44:50] +[titan] 2025-10-05 05:26:53,989 - root - INFO - step: 11175 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.97% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 05:26:53,989 - root - INFO - lr: 4.2267e-05 gnorm: 1.12 [ 6:52:44<17:44:38] +[titan] 2025-10-05 05:27:04,880 - root - INFO - step: 11180 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0666 +[titan] 2025-10-05 05:27:04,880 - root - INFO - lr: 4.2260e-05 gnorm: 1.19 [ 6:52:55<17:44:27] +[titan] 2025-10-05 05:27:15,757 - root - INFO - step: 11185 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0699 +[titan] 2025-10-05 05:27:15,757 - root - INFO - lr: 4.2254e-05 gnorm: 1.15 [ 6:53:06<17:44:15] +[titan] 2025-10-05 05:27:26,622 - root - INFO - step: 11190 loss: 2.3961 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1162 +[titan] 2025-10-05 05:27:26,622 - root - INFO - lr: 4.2247e-05 gnorm: 1.10 [ 6:53:17<17:44:04] +[titan] 2025-10-05 05:27:37,484 - root - INFO - step: 11195 loss: 2.3721 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 05:27:37,484 - root - INFO - lr: 4.2240e-05 gnorm: 1.15 [ 6:53:28<17:43:52] +[titan] 2025-10-05 05:27:46,182 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:27:48,372 - root - INFO - step: 11200 loss: 2.3645 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:27:48,372 - root - INFO - lr: 4.2233e-05 gnorm: 1.17 [ 6:53:39<17:43:40] +[titan] 2025-10-05 05:27:59,307 - root - INFO - step: 11205 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:27:59,307 - root - INFO - lr: 4.2227e-05 gnorm: 1.09 [ 6:53:50<17:43:29] +[titan] 2025-10-05 05:28:10,176 - root - INFO - step: 11210 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0464 +[titan] 2025-10-05 05:28:10,176 - root - INFO - lr: 4.2220e-05 gnorm: 1.15 [ 6:54:01<17:43:17] +[titan] 2025-10-05 05:28:21,076 - root - INFO - step: 11215 loss: 2.3354 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 05:28:21,076 - root - INFO - lr: 4.2213e-05 gnorm: 1.14 [ 6:54:11<17:43:06] +[titan] 2025-10-05 05:28:31,935 - root - INFO - step: 11220 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0592 +[titan] 2025-10-05 05:28:31,935 - root - INFO - lr: 4.2206e-05 gnorm: 1.10 [ 6:54:22<17:42:54] +[titan] 2025-10-05 05:28:42,804 - root - INFO - step: 11225 loss: 2.2877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 05:28:42,805 - root - INFO - lr: 4.2200e-05 gnorm: 1.15 [ 6:54:33<17:42:42] +[titan] 2025-10-05 05:28:53,662 - root - INFO - step: 11230 loss: 2.3995 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 05:28:53,662 - root - INFO - lr: 4.2193e-05 gnorm: 1.17 [ 6:54:44<17:42:31] +[titan] 2025-10-05 05:29:04,634 - root - INFO - step: 11235 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 05:29:04,634 - root - INFO - lr: 4.2186e-05 gnorm: 1.17 [ 6:54:55<17:42:19] +[titan] 2025-10-05 05:29:15,534 - root - INFO - step: 11240 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0842 +[titan] 2025-10-05 05:29:15,535 - root - INFO - lr: 4.2179e-05 gnorm: 1.12 [ 6:55:06<17:42:08] +[titan] 2025-10-05 05:29:26,383 - root - INFO - step: 11245 loss: 2.3641 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0871 +[titan] 2025-10-05 05:29:26,383 - root - INFO - lr: 4.2173e-05 gnorm: 1.08 [ 6:55:17<17:41:56] +[titan] 2025-10-05 05:29:35,041 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:29:37,225 - root - INFO - step: 11250 loss: 2.3893 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 05:29:37,226 - root - INFO - lr: 4.2166e-05 gnorm: 1.11 [ 6:55:28<17:41:45] +[titan] 2025-10-05 05:29:48,080 - root - INFO - step: 11255 loss: 2.3315 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0589 +[titan] 2025-10-05 05:29:48,080 - root - INFO - lr: 4.2159e-05 gnorm: 1.15 [ 6:55:38<17:41:33] +[titan] 2025-10-05 05:29:58,912 - root - INFO - step: 11260 loss: 2.3790 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1000 +[titan] 2025-10-05 05:29:58,912 - root - INFO - lr: 4.2152e-05 gnorm: 1.11 [ 6:55:49<17:41:21] +[titan] 2025-10-05 05:30:07,829 - root - INFO - Dumping profiler traces at step 11264 +[titan] 2025-10-05 05:30:07,867 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:30:10,050 - root - INFO - step: 11265 loss: 2.2811 memory: 118.84GiB(85.28%) tps: 29,420 tflops: 408.16 mfu: 41.27% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 05:30:10,051 - root - INFO - lr: 4.2146e-05 gnorm: 1.10 [ 6:56:00<17:41:10] +[titan] 2025-10-05 05:30:20,892 - root - INFO - step: 11270 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0809 +[titan] 2025-10-05 05:30:20,892 - root - INFO - lr: 4.2139e-05 gnorm: 1.12 [ 6:56:11<17:40:59] +[titan] 2025-10-05 05:30:31,735 - root - INFO - step: 11275 loss: 2.3738 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0947 +[titan] 2025-10-05 05:30:31,735 - root - INFO - lr: 4.2132e-05 gnorm: 1.10 [ 6:56:22<17:40:47] +[titan] 2025-10-05 05:30:42,574 - root - INFO - step: 11280 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 05:30:42,574 - root - INFO - lr: 4.2125e-05 gnorm: 1.10 [ 6:56:33<17:40:35] +[titan] 2025-10-05 05:30:53,425 - root - INFO - step: 11285 loss: 2.3915 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1121 +[titan] 2025-10-05 05:30:53,426 - root - INFO - lr: 4.2118e-05 gnorm: 1.14 [ 6:56:44<17:40:24] +[titan] 2025-10-05 05:31:04,306 - root - INFO - step: 11290 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1164 +[titan] 2025-10-05 05:31:04,307 - root - INFO - lr: 4.2112e-05 gnorm: 1.16 [ 6:56:55<17:40:12] +[titan] 2025-10-05 05:31:15,165 - root - INFO - step: 11295 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 05:31:15,165 - root - INFO - lr: 4.2105e-05 gnorm: 1.16 [ 6:57:06<17:40:00] +[titan] 2025-10-05 05:31:23,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:31:26,028 - root - INFO - step: 11300 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1297 +[titan] 2025-10-05 05:31:26,028 - root - INFO - lr: 4.2098e-05 gnorm: 1.16 [ 6:57:16<17:39:49] +[titan] 2025-10-05 05:31:36,890 - root - INFO - step: 11305 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 05:31:36,890 - root - INFO - lr: 4.2091e-05 gnorm: 1.19 [ 6:57:27<17:39:37] +[titan] 2025-10-05 05:31:47,751 - root - INFO - step: 11310 loss: 2.3629 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0867 +[titan] 2025-10-05 05:31:47,751 - root - INFO - lr: 4.2084e-05 gnorm: 1.13 [ 6:57:38<17:39:25] +[titan] 2025-10-05 05:31:58,646 - root - INFO - step: 11315 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0333 +[titan] 2025-10-05 05:31:58,646 - root - INFO - lr: 4.2078e-05 gnorm: 1.14 [ 6:57:49<17:39:14] +[titan] 2025-10-05 05:32:09,512 - root - INFO - step: 11320 loss: 2.4605 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 05:32:09,513 - root - INFO - lr: 4.2071e-05 gnorm: 1.15 [ 6:58:00<17:39:02] +[titan] 2025-10-05 05:32:20,392 - root - INFO - step: 11325 loss: 2.3568 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0807 +[titan] 2025-10-05 05:32:20,392 - root - INFO - lr: 4.2064e-05 gnorm: 1.12 [ 6:58:11<17:38:51] +[titan] 2025-10-05 05:32:31,290 - root - INFO - step: 11330 loss: 2.4028 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1208 +[titan] 2025-10-05 05:32:31,290 - root - INFO - lr: 4.2057e-05 gnorm: 1.14 [ 6:58:22<17:38:39] +[titan] 2025-10-05 05:32:42,174 - root - INFO - step: 11335 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:32:42,174 - root - INFO - lr: 4.2050e-05 gnorm: 1.16 [ 6:58:33<17:38:28] +[titan] 2025-10-05 05:32:53,063 - root - INFO - step: 11340 loss: 2.3303 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0571 +[titan] 2025-10-05 05:32:53,064 - root - INFO - lr: 4.2044e-05 gnorm: 1.10 [ 6:58:43<17:38:16] +[titan] 2025-10-05 05:33:03,971 - root - INFO - step: 11345 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.1089 +[titan] 2025-10-05 05:33:03,972 - root - INFO - lr: 4.2037e-05 gnorm: 1.10 [ 6:58:54<17:38:05] +[titan] 2025-10-05 05:33:12,662 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:33:14,854 - root - INFO - step: 11350 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:33:14,854 - root - INFO - lr: 4.2030e-05 gnorm: 1.16 [ 6:59:05<17:37:53] +[titan] 2025-10-05 05:33:25,725 - root - INFO - step: 11355 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:33:25,725 - root - INFO - lr: 4.2023e-05 gnorm: 1.14 [ 6:59:16<17:37:41] +[titan] 2025-10-05 05:33:36,578 - root - INFO - step: 11360 loss: 2.2858 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0185 +[titan] 2025-10-05 05:33:36,578 - root - INFO - lr: 4.2016e-05 gnorm: 1.08 [ 6:59:27<17:37:30] +[titan] 2025-10-05 05:33:47,452 - root - INFO - step: 11365 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:33:47,452 - root - INFO - lr: 4.2010e-05 gnorm: 1.07 [ 6:59:38<17:37:18] +[titan] 2025-10-05 05:33:58,347 - root - INFO - step: 11370 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0475 +[titan] 2025-10-05 05:33:58,347 - root - INFO - lr: 4.2003e-05 gnorm: 1.09 [ 6:59:49<17:37:07] +[titan] 2025-10-05 05:34:09,277 - root - INFO - step: 11375 loss: 2.4178 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1340 +[titan] 2025-10-05 05:34:09,277 - root - INFO - lr: 4.1996e-05 gnorm: 1.13 [ 7:00:00<17:36:55] +[titan] 2025-10-05 05:34:20,157 - root - INFO - step: 11380 loss: 2.3349 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:34:20,157 - root - INFO - lr: 4.1989e-05 gnorm: 1.18 [ 7:00:10<17:36:44] +[titan] 2025-10-05 05:34:31,049 - root - INFO - step: 11385 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:34:31,049 - root - INFO - lr: 4.1982e-05 gnorm: 1.10 [ 7:00:21<17:36:32] +[titan] 2025-10-05 05:34:41,929 - root - INFO - step: 11390 loss: 2.4099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1279 +[titan] 2025-10-05 05:34:41,929 - root - INFO - lr: 4.1975e-05 gnorm: 1.10 [ 7:00:32<17:36:20] +[titan] 2025-10-05 05:34:52,785 - root - INFO - step: 11395 loss: 2.3564 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:34:52,785 - root - INFO - lr: 4.1969e-05 gnorm: 1.15 [ 7:00:43<17:36:09] +[titan] 2025-10-05 05:35:01,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:35:03,690 - root - INFO - step: 11400 loss: 2.4143 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1315 +[titan] 2025-10-05 05:35:03,690 - root - INFO - lr: 4.1962e-05 gnorm: 1.14 [ 7:00:54<17:35:57] +[titan] 2025-10-05 05:35:14,535 - root - INFO - step: 11405 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 05:35:14,536 - root - INFO - lr: 4.1955e-05 gnorm: 1.17 [ 7:01:05<17:35:46] +[titan] 2025-10-05 05:35:25,412 - root - INFO - step: 11410 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0477 +[titan] 2025-10-05 05:35:25,412 - root - INFO - lr: 4.1948e-05 gnorm: 1.13 [ 7:01:16<17:35:34] +[titan] 2025-10-05 05:35:36,263 - root - INFO - step: 11415 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0351 +[titan] 2025-10-05 05:35:36,263 - root - INFO - lr: 4.1941e-05 gnorm: 1.12 [ 7:01:27<17:35:22] +[titan] 2025-10-05 05:35:47,122 - root - INFO - step: 11420 loss: 2.3875 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 05:35:47,122 - root - INFO - lr: 4.1934e-05 gnorm: 1.14 [ 7:01:37<17:35:11] +[titan] 2025-10-05 05:35:57,974 - root - INFO - step: 11425 loss: 2.3552 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0795 +[titan] 2025-10-05 05:35:57,974 - root - INFO - lr: 4.1928e-05 gnorm: 1.13 [ 7:01:48<17:34:59] +[titan] 2025-10-05 05:36:08,849 - root - INFO - step: 11430 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 05:36:08,849 - root - INFO - lr: 4.1921e-05 gnorm: 1.17 [ 7:01:59<17:34:48] +[titan] 2025-10-05 05:36:19,695 - root - INFO - step: 11435 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:36:19,695 - root - INFO - lr: 4.1914e-05 gnorm: 1.16 [ 7:02:10<17:34:36] +[titan] 2025-10-05 05:36:30,564 - root - INFO - step: 11440 loss: 2.3449 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0705 +[titan] 2025-10-05 05:36:30,564 - root - INFO - lr: 4.1907e-05 gnorm: 1.08 [ 7:02:21<17:34:24] +[titan] 2025-10-05 05:36:41,427 - root - INFO - step: 11445 loss: 2.4403 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1539 +[titan] 2025-10-05 05:36:41,427 - root - INFO - lr: 4.1900e-05 gnorm: 1.15 [ 7:02:32<17:34:13] +[titan] 2025-10-05 05:36:50,092 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:36:52,270 - root - INFO - step: 11450 loss: 2.3496 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:36:52,270 - root - INFO - lr: 4.1893e-05 gnorm: 1.14 [ 7:02:43<17:34:01] +[titan] 2025-10-05 05:37:03,144 - root - INFO - step: 11455 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 05:37:03,145 - root - INFO - lr: 4.1886e-05 gnorm: 1.13 [ 7:02:53<17:33:50] +[titan] 2025-10-05 05:37:13,972 - root - INFO - step: 11460 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 05:37:13,973 - root - INFO - lr: 4.1880e-05 gnorm: 1.13 [ 7:03:04<17:33:38] +[titan] 2025-10-05 05:37:24,845 - root - INFO - step: 11465 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0606 +[titan] 2025-10-05 05:37:24,845 - root - INFO - lr: 4.1873e-05 gnorm: 1.16 [ 7:03:15<17:33:26] +[titan] 2025-10-05 05:37:35,703 - root - INFO - step: 11470 loss: 2.3317 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0582 +[titan] 2025-10-05 05:37:35,703 - root - INFO - lr: 4.1866e-05 gnorm: 1.10 [ 7:03:26<17:33:15] +[titan] 2025-10-05 05:37:46,570 - root - INFO - step: 11475 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 05:37:46,570 - root - INFO - lr: 4.1859e-05 gnorm: 1.18 [ 7:03:37<17:33:03] +[titan] 2025-10-05 05:37:57,446 - root - INFO - step: 11480 loss: 2.3142 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0433 +[titan] 2025-10-05 05:37:57,447 - root - INFO - lr: 4.1852e-05 gnorm: 1.10 [ 7:03:48<17:32:51] +[titan] 2025-10-05 05:38:08,329 - root - INFO - step: 11485 loss: 2.3042 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0343 +[titan] 2025-10-05 05:38:08,329 - root - INFO - lr: 4.1845e-05 gnorm: 1.16 [ 7:03:59<17:32:40] +[titan] 2025-10-05 05:38:19,195 - root - INFO - step: 11490 loss: 2.4232 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1402 +[titan] 2025-10-05 05:38:19,195 - root - INFO - lr: 4.1838e-05 gnorm: 1.17 [ 7:04:10<17:32:28] +[titan] 2025-10-05 05:38:30,073 - root - INFO - step: 11495 loss: 2.3563 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0808 +[titan] 2025-10-05 05:38:30,073 - root - INFO - lr: 4.1831e-05 gnorm: 1.12 [ 7:04:20<17:32:17] +[titan] 2025-10-05 05:38:38,740 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:38:40,929 - root - INFO - step: 11500 loss: 2.3519 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0760 +[titan] 2025-10-05 05:38:40,929 - root - INFO - lr: 4.1825e-05 gnorm: 1.09 [ 7:04:31<17:32:05] +[titan] 2025-10-05 05:38:51,791 - root - INFO - step: 11505 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 05:38:51,792 - root - INFO - lr: 4.1818e-05 gnorm: 1.18 [ 7:04:42<17:31:54] +[titan] 2025-10-05 05:39:02,689 - root - INFO - step: 11510 loss: 2.3200 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0488 +[titan] 2025-10-05 05:39:02,689 - root - INFO - lr: 4.1811e-05 gnorm: 1.13 [ 7:04:53<17:31:42] +[titan] 2025-10-05 05:39:13,585 - root - INFO - step: 11515 loss: 2.4548 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1671 +[titan] 2025-10-05 05:39:13,586 - root - INFO - lr: 4.1804e-05 gnorm: 1.13 [ 7:05:04<17:31:30] +[titan] 2025-10-05 05:39:24,449 - root - INFO - step: 11520 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0252 +[titan] 2025-10-05 05:39:24,449 - root - INFO - lr: 4.1797e-05 gnorm: 1.15 [ 7:05:15<17:31:19] +[titan] 2025-10-05 05:39:35,295 - root - INFO - step: 11525 loss: 2.2866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 05:39:35,295 - root - INFO - lr: 4.1790e-05 gnorm: 1.07 [ 7:05:26<17:31:07] +[titan] 2025-10-05 05:39:46,183 - root - INFO - step: 11530 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0847 +[titan] 2025-10-05 05:39:46,183 - root - INFO - lr: 4.1783e-05 gnorm: 1.14 [ 7:05:36<17:30:56] +[titan] 2025-10-05 05:39:57,043 - root - INFO - step: 11535 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 05:39:57,043 - root - INFO - lr: 4.1776e-05 gnorm: 1.14 [ 7:05:47<17:30:44] +[titan] 2025-10-05 05:40:07,933 - root - INFO - step: 11540 loss: 2.3581 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0821 +[titan] 2025-10-05 05:40:07,934 - root - INFO - lr: 4.1769e-05 gnorm: 1.10 [ 7:05:58<17:30:33] +[titan] 2025-10-05 05:40:18,821 - root - INFO - step: 11545 loss: 2.4229 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:40:18,821 - root - INFO - lr: 4.1763e-05 gnorm: 1.15 [ 7:06:09<17:30:21] +[titan] 2025-10-05 05:40:27,479 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:40:29,677 - root - INFO - step: 11550 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0971 +[titan] 2025-10-05 05:40:29,677 - root - INFO - lr: 4.1756e-05 gnorm: 1.16 [ 7:06:20<17:30:09] +[titan] 2025-10-05 05:40:40,531 - root - INFO - step: 11555 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0225 +[titan] 2025-10-05 05:40:40,531 - root - INFO - lr: 4.1749e-05 gnorm: 1.07 [ 7:06:31<17:29:58] +[titan] 2025-10-05 05:40:51,372 - root - INFO - step: 11560 loss: 2.3640 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 05:40:51,372 - root - INFO - lr: 4.1742e-05 gnorm: 1.13 [ 7:06:42<17:29:46] +[titan] 2025-10-05 05:41:02,211 - root - INFO - step: 11565 loss: 2.3067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0371 +[titan] 2025-10-05 05:41:02,211 - root - INFO - lr: 4.1735e-05 gnorm: 1.09 [ 7:06:53<17:29:35] +[titan] 2025-10-05 05:41:13,062 - root - INFO - step: 11570 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:41:13,062 - root - INFO - lr: 4.1728e-05 gnorm: 1.08 [ 7:07:03<17:29:23] +[titan] 2025-10-05 05:41:23,914 - root - INFO - step: 11575 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 05:41:23,914 - root - INFO - lr: 4.1721e-05 gnorm: 1.11 [ 7:07:14<17:29:11] +[titan] 2025-10-05 05:41:34,780 - root - INFO - step: 11580 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0896 +[titan] 2025-10-05 05:41:34,780 - root - INFO - lr: 4.1714e-05 gnorm: 1.16 [ 7:07:25<17:29:00] +[titan] 2025-10-05 05:41:45,632 - root - INFO - step: 11585 loss: 2.3149 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0440 +[titan] 2025-10-05 05:41:45,632 - root - INFO - lr: 4.1707e-05 gnorm: 1.12 [ 7:07:36<17:28:48] +[titan] 2025-10-05 05:41:56,483 - root - INFO - step: 11590 loss: 2.2891 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0207 +[titan] 2025-10-05 05:41:56,483 - root - INFO - lr: 4.1700e-05 gnorm: 1.11 [ 7:07:47<17:28:36] +[titan] 2025-10-05 05:42:07,367 - root - INFO - step: 11595 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 05:42:07,367 - root - INFO - lr: 4.1693e-05 gnorm: 1.09 [ 7:07:58<17:28:25] +[titan] 2025-10-05 05:42:16,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:42:18,229 - root - INFO - step: 11600 loss: 2.3596 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0839 +[titan] 2025-10-05 05:42:18,229 - root - INFO - lr: 4.1686e-05 gnorm: 1.13 [ 7:08:09<17:28:13] +[titan] 2025-10-05 05:42:29,091 - root - INFO - step: 11605 loss: 2.3723 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0941 +[titan] 2025-10-05 05:42:29,092 - root - INFO - lr: 4.1680e-05 gnorm: 1.11 [ 7:08:19<17:28:02] +[titan] 2025-10-05 05:42:39,944 - root - INFO - step: 11610 loss: 2.3331 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0601 +[titan] 2025-10-05 05:42:39,944 - root - INFO - lr: 4.1673e-05 gnorm: 1.12 [ 7:08:30<17:27:50] +[titan] 2025-10-05 05:42:50,809 - root - INFO - step: 11615 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0661 +[titan] 2025-10-05 05:42:50,809 - root - INFO - lr: 4.1666e-05 gnorm: 1.14 [ 7:08:41<17:27:39] +[titan] 2025-10-05 05:43:01,660 - root - INFO - step: 11620 loss: 2.3817 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1022 +[titan] 2025-10-05 05:43:01,660 - root - INFO - lr: 4.1659e-05 gnorm: 1.16 [ 7:08:52<17:27:27] +[titan] 2025-10-05 05:43:12,542 - root - INFO - step: 11625 loss: 2.3129 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0422 +[titan] 2025-10-05 05:43:12,542 - root - INFO - lr: 4.1652e-05 gnorm: 1.15 [ 7:09:03<17:27:15] +[titan] 2025-10-05 05:43:23,381 - root - INFO - step: 11630 loss: 2.3032 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 05:43:23,381 - root - INFO - lr: 4.1645e-05 gnorm: 1.17 [ 7:09:14<17:27:04] +[titan] 2025-10-05 05:43:34,203 - root - INFO - step: 11635 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0697 +[titan] 2025-10-05 05:43:34,203 - root - INFO - lr: 4.1638e-05 gnorm: 1.17 [ 7:09:25<17:26:52] +[titan] 2025-10-05 05:43:45,042 - root - INFO - step: 11640 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0979 +[titan] 2025-10-05 05:43:45,042 - root - INFO - lr: 4.1631e-05 gnorm: 1.09 [ 7:09:35<17:26:40] +[titan] 2025-10-05 05:43:55,889 - root - INFO - step: 11645 loss: 2.3366 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 05:43:55,889 - root - INFO - lr: 4.1624e-05 gnorm: 1.12 [ 7:09:46<17:26:29] +[titan] 2025-10-05 05:44:04,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:44:06,731 - root - INFO - step: 11650 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0964 +[titan] 2025-10-05 05:44:06,731 - root - INFO - lr: 4.1617e-05 gnorm: 1.13 [ 7:09:57<17:26:17] +[titan] 2025-10-05 05:44:17,623 - root - INFO - step: 11655 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0423 +[titan] 2025-10-05 05:44:17,623 - root - INFO - lr: 4.1610e-05 gnorm: 1.16 [ 7:10:08<17:26:06] +[titan] 2025-10-05 05:44:28,491 - root - INFO - step: 11660 loss: 2.3791 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 05:44:28,491 - root - INFO - lr: 4.1603e-05 gnorm: 1.14 [ 7:10:19<17:25:54] +[titan] 2025-10-05 05:44:39,349 - root - INFO - step: 11665 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0347 +[titan] 2025-10-05 05:44:39,349 - root - INFO - lr: 4.1596e-05 gnorm: 1.14 [ 7:10:30<17:25:43] +[titan] 2025-10-05 05:44:50,212 - root - INFO - step: 11670 loss: 2.2728 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0088 +[titan] 2025-10-05 05:44:50,212 - root - INFO - lr: 4.1589e-05 gnorm: 1.12 [ 7:10:41<17:25:31] +[titan] 2025-10-05 05:45:01,081 - root - INFO - step: 11675 loss: 2.3589 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:45:01,081 - root - INFO - lr: 4.1582e-05 gnorm: 1.11 [ 7:10:51<17:25:19] +[titan] 2025-10-05 05:45:11,965 - root - INFO - step: 11680 loss: 2.3297 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0575 +[titan] 2025-10-05 05:45:11,965 - root - INFO - lr: 4.1575e-05 gnorm: 1.10 [ 7:11:02<17:25:08] +[titan] 2025-10-05 05:45:22,811 - root - INFO - step: 11685 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0746 +[titan] 2025-10-05 05:45:22,811 - root - INFO - lr: 4.1568e-05 gnorm: 1.11 [ 7:11:13<17:24:56] +[titan] 2025-10-05 05:45:33,673 - root - INFO - step: 11690 loss: 2.3753 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2787 global_avg_mtp_loss: 2.0966 +[titan] 2025-10-05 05:45:33,674 - root - INFO - lr: 4.1561e-05 gnorm: 1.10 [ 7:11:24<17:24:45] +[titan] 2025-10-05 05:45:44,536 - root - INFO - step: 11695 loss: 2.3906 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1103 +[titan] 2025-10-05 05:45:44,537 - root - INFO - lr: 4.1554e-05 gnorm: 1.11 [ 7:11:35<17:24:33] +[titan] 2025-10-05 05:45:53,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:45:55,410 - root - INFO - step: 11700 loss: 2.3089 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 05:45:55,410 - root - INFO - lr: 4.1547e-05 gnorm: 1.16 [ 7:11:46<17:24:22] +[titan] 2025-10-05 05:46:06,262 - root - INFO - step: 11705 loss: 2.3134 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 05:46:06,262 - root - INFO - lr: 4.1540e-05 gnorm: 1.11 [ 7:11:57<17:24:10] +[titan] 2025-10-05 05:46:17,130 - root - INFO - step: 11710 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:46:17,130 - root - INFO - lr: 4.1534e-05 gnorm: 1.07 [ 7:12:07<17:23:58] +[titan] 2025-10-05 05:46:27,969 - root - INFO - step: 11715 loss: 2.3153 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0455 +[titan] 2025-10-05 05:46:27,969 - root - INFO - lr: 4.1527e-05 gnorm: 1.10 [ 7:12:18<17:23:47] +[titan] 2025-10-05 05:46:38,818 - root - INFO - step: 11720 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1170 +[titan] 2025-10-05 05:46:38,818 - root - INFO - lr: 4.1520e-05 gnorm: 1.16 [ 7:12:29<17:23:35] +[titan] 2025-10-05 05:46:49,675 - root - INFO - step: 11725 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0144 +[titan] 2025-10-05 05:46:49,675 - root - INFO - lr: 4.1513e-05 gnorm: 1.16 [ 7:12:40<17:23:24] +[titan] 2025-10-05 05:47:00,544 - root - INFO - step: 11730 loss: 2.4145 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1312 +[titan] 2025-10-05 05:47:00,544 - root - INFO - lr: 4.1506e-05 gnorm: 1.10 [ 7:12:51<17:23:12] +[titan] 2025-10-05 05:47:11,419 - root - INFO - step: 11735 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0736 +[titan] 2025-10-05 05:47:11,419 - root - INFO - lr: 4.1499e-05 gnorm: 1.08 [ 7:13:02<17:23:00] +[titan] 2025-10-05 05:47:22,265 - root - INFO - step: 11740 loss: 2.3154 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 05:47:22,265 - root - INFO - lr: 4.1492e-05 gnorm: 1.11 [ 7:13:13<17:22:49] +[titan] 2025-10-05 05:47:33,131 - root - INFO - step: 11745 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 05:47:33,132 - root - INFO - lr: 4.1485e-05 gnorm: 1.13 [ 7:13:23<17:22:37] +[titan] 2025-10-05 05:47:41,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:47:43,985 - root - INFO - step: 11750 loss: 2.3279 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0557 +[titan] 2025-10-05 05:47:43,985 - root - INFO - lr: 4.1478e-05 gnorm: 1.13 [ 7:13:34<17:22:26] +[titan] 2025-10-05 05:47:54,868 - root - INFO - step: 11755 loss: 2.3253 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0524 +[titan] 2025-10-05 05:47:54,869 - root - INFO - lr: 4.1471e-05 gnorm: 1.15 [ 7:13:45<17:22:14] +[titan] 2025-10-05 05:48:05,705 - root - INFO - step: 11760 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 05:48:05,705 - root - INFO - lr: 4.1464e-05 gnorm: 1.11 [ 7:13:56<17:22:03] +[titan] 2025-10-05 05:48:16,588 - root - INFO - step: 11765 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0673 +[titan] 2025-10-05 05:48:16,588 - root - INFO - lr: 4.1457e-05 gnorm: 1.08 [ 7:14:07<17:21:51] +[titan] 2025-10-05 05:48:27,456 - root - INFO - step: 11770 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:48:27,456 - root - INFO - lr: 4.1450e-05 gnorm: 1.13 [ 7:14:18<17:21:39] +[titan] 2025-10-05 05:48:38,410 - root - INFO - step: 11775 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 29,914 tflops: 415.01 mfu: 41.96% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 05:48:38,411 - root - INFO - lr: 4.1443e-05 gnorm: 1.12 [ 7:14:29<17:21:28] +[titan] 2025-10-05 05:48:40,790 - root - INFO - Dumping profiler traces at step 11776 +[titan] 2025-10-05 05:48:40,829 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:48:49,532 - root - INFO - step: 11780 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0553 +[titan] 2025-10-05 05:48:49,532 - root - INFO - lr: 4.1436e-05 gnorm: 1.10 [ 7:14:40<17:21:17] +[titan] 2025-10-05 05:49:00,425 - root - INFO - step: 11785 loss: 2.3316 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0583 +[titan] 2025-10-05 05:49:00,425 - root - INFO - lr: 4.1429e-05 gnorm: 1.11 [ 7:14:51<17:21:06] +[titan] 2025-10-05 05:49:11,300 - root - INFO - step: 11790 loss: 2.2637 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 05:49:11,301 - root - INFO - lr: 4.1422e-05 gnorm: 1.08 [ 7:15:02<17:20:54] +[titan] 2025-10-05 05:49:22,173 - root - INFO - step: 11795 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1151 +[titan] 2025-10-05 05:49:22,173 - root - INFO - lr: 4.1415e-05 gnorm: 1.13 [ 7:15:12<17:20:43] +[titan] 2025-10-05 05:49:30,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:49:33,049 - root - INFO - step: 11800 loss: 2.3168 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0453 +[titan] 2025-10-05 05:49:33,050 - root - INFO - lr: 4.1408e-05 gnorm: 1.14 [ 7:15:23<17:20:31] +[titan] 2025-10-05 05:49:43,908 - root - INFO - step: 11805 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:49:43,909 - root - INFO - lr: 4.1401e-05 gnorm: 1.11 [ 7:15:34<17:20:19] +[titan] 2025-10-05 05:49:54,777 - root - INFO - step: 11810 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 05:49:54,777 - root - INFO - lr: 4.1394e-05 gnorm: 1.14 [ 7:15:45<17:20:08] +[titan] 2025-10-05 05:50:05,641 - root - INFO - step: 11815 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:50:05,641 - root - INFO - lr: 4.1387e-05 gnorm: 1.10 [ 7:15:56<17:19:56] +[titan] 2025-10-05 05:50:16,549 - root - INFO - step: 11820 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 05:50:16,549 - root - INFO - lr: 4.1379e-05 gnorm: 1.14 [ 7:16:07<17:19:45] +[titan] 2025-10-05 05:50:27,410 - root - INFO - step: 11825 loss: 2.3545 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:50:27,410 - root - INFO - lr: 4.1372e-05 gnorm: 1.11 [ 7:16:18<17:19:33] +[titan] 2025-10-05 05:50:38,296 - root - INFO - step: 11830 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 05:50:38,297 - root - INFO - lr: 4.1365e-05 gnorm: 1.17 [ 7:16:29<17:19:22] +[titan] 2025-10-05 05:50:49,183 - root - INFO - step: 11835 loss: 2.4085 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1264 +[titan] 2025-10-05 05:50:49,184 - root - INFO - lr: 4.1358e-05 gnorm: 1.12 [ 7:16:39<17:19:10] +[titan] 2025-10-05 05:51:00,086 - root - INFO - step: 11840 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 05:51:00,086 - root - INFO - lr: 4.1351e-05 gnorm: 1.11 [ 7:16:50<17:18:59] +[titan] 2025-10-05 05:51:10,957 - root - INFO - step: 11845 loss: 2.3242 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0501 +[titan] 2025-10-05 05:51:10,957 - root - INFO - lr: 4.1344e-05 gnorm: 1.08 [ 7:17:01<17:18:47] +[titan] 2025-10-05 05:51:19,706 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:51:21,898 - root - INFO - step: 11850 loss: 2.3518 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0767 +[titan] 2025-10-05 05:51:21,898 - root - INFO - lr: 4.1337e-05 gnorm: 1.12 [ 7:17:12<17:18:36] +[titan] 2025-10-05 05:51:32,790 - root - INFO - step: 11855 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 05:51:32,790 - root - INFO - lr: 4.1330e-05 gnorm: 1.12 [ 7:17:23<17:18:24] +[titan] 2025-10-05 05:51:43,664 - root - INFO - step: 11860 loss: 2.3095 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 05:51:43,664 - root - INFO - lr: 4.1323e-05 gnorm: 1.18 [ 7:17:34<17:18:13] +[titan] 2025-10-05 05:51:54,563 - root - INFO - step: 11865 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0318 +[titan] 2025-10-05 05:51:54,563 - root - INFO - lr: 4.1316e-05 gnorm: 1.09 [ 7:17:45<17:18:01] +[titan] 2025-10-05 05:52:05,455 - root - INFO - step: 11870 loss: 2.3710 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0936 +[titan] 2025-10-05 05:52:05,455 - root - INFO - lr: 4.1309e-05 gnorm: 1.11 [ 7:17:56<17:17:50] +[titan] 2025-10-05 05:52:16,379 - root - INFO - step: 11875 loss: 2.3659 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0895 +[titan] 2025-10-05 05:52:16,379 - root - INFO - lr: 4.1302e-05 gnorm: 1.15 [ 7:18:07<17:17:39] +[titan] 2025-10-05 05:52:27,265 - root - INFO - step: 11880 loss: 2.4011 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1135 +[titan] 2025-10-05 05:52:27,265 - root - INFO - lr: 4.1295e-05 gnorm: 3.35 [ 7:18:18<17:17:27] +[titan] 2025-10-05 05:52:38,136 - root - INFO - step: 11885 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0521 +[titan] 2025-10-05 05:52:38,137 - root - INFO - lr: 4.1288e-05 gnorm: 1.14 [ 7:18:28<17:17:16] +[titan] 2025-10-05 05:52:49,001 - root - INFO - step: 11890 loss: 2.3415 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0669 +[titan] 2025-10-05 05:52:49,001 - root - INFO - lr: 4.1281e-05 gnorm: 1.11 [ 7:18:39<17:17:04] +[titan] 2025-10-05 05:52:59,880 - root - INFO - step: 11895 loss: 2.3264 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2720 global_avg_mtp_loss: 2.0545 +[titan] 2025-10-05 05:52:59,880 - root - INFO - lr: 4.1274e-05 gnorm: 1.12 [ 7:18:50<17:16:52] +[titan] 2025-10-05 05:53:08,563 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:53:10,763 - root - INFO - step: 11900 loss: 2.2583 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9932 +[titan] 2025-10-05 05:53:10,763 - root - INFO - lr: 4.1267e-05 gnorm: 1.12 [ 7:19:01<17:16:41] +[titan] 2025-10-05 05:53:21,692 - root - INFO - step: 11905 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 05:53:21,692 - root - INFO - lr: 4.1260e-05 gnorm: 1.14 [ 7:19:12<17:16:30] +[titan] 2025-10-05 05:53:32,550 - root - INFO - step: 11910 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:53:32,550 - root - INFO - lr: 4.1253e-05 gnorm: 1.07 [ 7:19:23<17:16:18] +[titan] 2025-10-05 05:53:43,445 - root - INFO - step: 11915 loss: 2.3927 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:53:43,445 - root - INFO - lr: 4.1246e-05 gnorm: 1.12 [ 7:19:34<17:16:07] +[titan] 2025-10-05 05:53:54,326 - root - INFO - step: 11920 loss: 2.4016 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:53:54,326 - root - INFO - lr: 4.1239e-05 gnorm: 1.11 [ 7:19:45<17:15:55] +[titan] 2025-10-05 05:54:05,201 - root - INFO - step: 11925 loss: 2.3896 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 05:54:05,201 - root - INFO - lr: 4.1232e-05 gnorm: 1.10 [ 7:19:55<17:15:43] +[titan] 2025-10-05 05:54:16,091 - root - INFO - step: 11930 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:54:16,091 - root - INFO - lr: 4.1224e-05 gnorm: 1.18 [ 7:20:06<17:15:32] +[titan] 2025-10-05 05:54:27,039 - root - INFO - step: 11935 loss: 2.3186 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 05:54:27,039 - root - INFO - lr: 4.1217e-05 gnorm: 1.13 [ 7:20:17<17:15:21] +[titan] 2025-10-05 05:54:37,903 - root - INFO - step: 11940 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1239 +[titan] 2025-10-05 05:54:37,903 - root - INFO - lr: 4.1210e-05 gnorm: 1.14 [ 7:20:28<17:15:09] +[titan] 2025-10-05 05:54:48,775 - root - INFO - step: 11945 loss: 2.3374 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:54:48,775 - root - INFO - lr: 4.1203e-05 gnorm: 1.16 [ 7:20:39<17:14:58] +[titan] 2025-10-05 05:54:57,461 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:54:59,643 - root - INFO - step: 11950 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0633 +[titan] 2025-10-05 05:54:59,643 - root - INFO - lr: 4.1196e-05 gnorm: 1.10 [ 7:20:50<17:14:46] +[titan] 2025-10-05 05:55:10,528 - root - INFO - step: 11955 loss: 2.3258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 05:55:10,528 - root - INFO - lr: 4.1189e-05 gnorm: 1.08 [ 7:21:01<17:14:35] +[titan] 2025-10-05 05:55:21,455 - root - INFO - step: 11960 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:55:21,456 - root - INFO - lr: 4.1182e-05 gnorm: 1.12 [ 7:21:12<17:14:23] +[titan] 2025-10-05 05:55:32,338 - root - INFO - step: 11965 loss: 2.3022 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 05:55:32,338 - root - INFO - lr: 4.1175e-05 gnorm: 1.06 [ 7:21:23<17:14:12] +[titan] 2025-10-05 05:55:43,237 - root - INFO - step: 11970 loss: 2.3819 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 05:55:43,238 - root - INFO - lr: 4.1168e-05 gnorm: 1.11 [ 7:21:34<17:14:00] +[titan] 2025-10-05 05:55:54,122 - root - INFO - step: 11975 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0302 +[titan] 2025-10-05 05:55:54,122 - root - INFO - lr: 4.1161e-05 gnorm: 1.07 [ 7:21:44<17:13:49] +[titan] 2025-10-05 05:56:04,989 - root - INFO - step: 11980 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:56:04,989 - root - INFO - lr: 4.1154e-05 gnorm: 1.08 [ 7:21:55<17:13:37] +[titan] 2025-10-05 05:56:15,876 - root - INFO - step: 11985 loss: 2.3487 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:56:15,876 - root - INFO - lr: 4.1147e-05 gnorm: 1.11 [ 7:22:06<17:13:26] +[titan] 2025-10-05 05:56:26,799 - root - INFO - step: 11990 loss: 2.3624 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 05:56:26,799 - root - INFO - lr: 4.1139e-05 gnorm: 1.07 [ 7:22:17<17:13:14] +[titan] 2025-10-05 05:56:37,664 - root - INFO - step: 11995 loss: 2.3352 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:56:37,664 - root - INFO - lr: 4.1132e-05 gnorm: 1.15 [ 7:22:28<17:13:03] +[titan] 2025-10-05 05:56:46,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:56:48,559 - root - INFO - step: 12000 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0434 +[titan] 2025-10-05 05:56:48,559 - root - INFO - lr: 4.1125e-05 gnorm: 1.15 [ 7:22:39<17:12:51] +[titan] 2025-10-05 05:56:59,430 - root - INFO - step: 12005 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0683 +[titan] 2025-10-05 05:56:59,430 - root - INFO - lr: 4.1118e-05 gnorm: 1.12 [ 7:22:50<17:12:40] +[titan] 2025-10-05 05:57:10,327 - root - INFO - step: 12010 loss: 2.3294 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0572 +[titan] 2025-10-05 05:57:10,327 - root - INFO - lr: 4.1111e-05 gnorm: 1.11 [ 7:23:01<17:12:28] +[titan] 2025-10-05 05:57:21,254 - root - INFO - step: 12015 loss: 2.3689 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:57:21,254 - root - INFO - lr: 4.1104e-05 gnorm: 1.08 [ 7:23:12<17:12:17] +[titan] 2025-10-05 05:57:32,120 - root - INFO - step: 12020 loss: 2.3542 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0781 +[titan] 2025-10-05 05:57:32,120 - root - INFO - lr: 4.1097e-05 gnorm: 1.08 [ 7:23:22<17:12:05] +[titan] 2025-10-05 05:57:43,004 - root - INFO - step: 12025 loss: 2.3233 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:57:43,004 - root - INFO - lr: 4.1090e-05 gnorm: 1.13 [ 7:23:33<17:11:54] +[titan] 2025-10-05 05:57:53,894 - root - INFO - step: 12030 loss: 2.3526 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:57:53,895 - root - INFO - lr: 4.1083e-05 gnorm: 1.09 [ 7:23:44<17:11:42] +[titan] 2025-10-05 05:58:04,763 - root - INFO - step: 12035 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 05:58:04,764 - root - INFO - lr: 4.1075e-05 gnorm: 1.11 [ 7:23:55<17:11:31] +[titan] 2025-10-05 05:58:15,655 - root - INFO - step: 12040 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 05:58:15,655 - root - INFO - lr: 4.1068e-05 gnorm: 1.13 [ 7:24:06<17:11:19] +[titan] 2025-10-05 05:58:26,581 - root - INFO - step: 12045 loss: 2.2551 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9914 +[titan] 2025-10-05 05:58:26,582 - root - INFO - lr: 4.1061e-05 gnorm: 1.10 [ 7:24:17<17:11:08] +[titan] 2025-10-05 05:58:35,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:58:37,445 - root - INFO - step: 12050 loss: 2.2791 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 05:58:37,445 - root - INFO - lr: 4.1054e-05 gnorm: 1.12 [ 7:24:28<17:10:56] +[titan] 2025-10-05 05:58:48,333 - root - INFO - step: 12055 loss: 2.3027 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0322 +[titan] 2025-10-05 05:58:48,334 - root - INFO - lr: 4.1047e-05 gnorm: 1.09 [ 7:24:39<17:10:45] +[titan] 2025-10-05 05:58:59,215 - root - INFO - step: 12060 loss: 2.3599 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:58:59,215 - root - INFO - lr: 4.1040e-05 gnorm: 1.13 [ 7:24:49<17:10:33] +[titan] 2025-10-05 05:59:10,066 - root - INFO - step: 12065 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 05:59:10,066 - root - INFO - lr: 4.1033e-05 gnorm: 1.14 [ 7:25:00<17:10:22] +[titan] 2025-10-05 05:59:20,922 - root - INFO - step: 12070 loss: 2.3313 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:59:20,923 - root - INFO - lr: 4.1026e-05 gnorm: 1.12 [ 7:25:11<17:10:10] +[titan] 2025-10-05 05:59:31,844 - root - INFO - step: 12075 loss: 2.4140 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 05:59:31,844 - root - INFO - lr: 4.1018e-05 gnorm: 1.14 [ 7:25:22<17:09:59] +[titan] 2025-10-05 05:59:42,686 - root - INFO - step: 12080 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0618 +[titan] 2025-10-05 05:59:42,686 - root - INFO - lr: 4.1011e-05 gnorm: 1.13 [ 7:25:33<17:09:47] +[titan] 2025-10-05 05:59:53,539 - root - INFO - step: 12085 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0449 +[titan] 2025-10-05 05:59:53,540 - root - INFO - lr: 4.1004e-05 gnorm: 1.11 [ 7:25:44<17:09:36] +[titan] 2025-10-05 06:00:04,392 - root - INFO - step: 12090 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0005 +[titan] 2025-10-05 06:00:04,392 - root - INFO - lr: 4.0997e-05 gnorm: 1.08 [ 7:25:55<17:09:24] +[titan] 2025-10-05 06:00:15,254 - root - INFO - step: 12095 loss: 2.3576 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0822 +[titan] 2025-10-05 06:00:15,254 - root - INFO - lr: 4.0990e-05 gnorm: 1.07 [ 7:26:06<17:09:13] +[titan] 2025-10-05 06:00:23,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:00:26,169 - root - INFO - step: 12100 loss: 2.3299 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0570 +[titan] 2025-10-05 06:00:26,169 - root - INFO - lr: 4.0983e-05 gnorm: 1.12 [ 7:26:16<17:09:01] +[titan] 2025-10-05 06:00:37,019 - root - INFO - step: 12105 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 06:00:37,019 - root - INFO - lr: 4.0976e-05 gnorm: 1.10 [ 7:26:27<17:08:50] +[titan] 2025-10-05 06:00:47,875 - root - INFO - step: 12110 loss: 2.3109 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0407 +[titan] 2025-10-05 06:00:47,875 - root - INFO - lr: 4.0968e-05 gnorm: 1.14 [ 7:26:38<17:08:38] +[titan] 2025-10-05 06:00:58,710 - root - INFO - step: 12115 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0231 +[titan] 2025-10-05 06:00:58,710 - root - INFO - lr: 4.0961e-05 gnorm: 1.09 [ 7:26:49<17:08:27] +[titan] 2025-10-05 06:01:09,539 - root - INFO - step: 12120 loss: 2.3227 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0502 +[titan] 2025-10-05 06:01:09,539 - root - INFO - lr: 4.0954e-05 gnorm: 1.11 [ 7:27:00<17:08:15] +[titan] 2025-10-05 06:01:20,374 - root - INFO - step: 12125 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 06:01:20,374 - root - INFO - lr: 4.0947e-05 gnorm: 1.07 [ 7:27:11<17:08:03] +[titan] 2025-10-05 06:01:31,270 - root - INFO - step: 12130 loss: 2.2677 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0025 +[titan] 2025-10-05 06:01:31,270 - root - INFO - lr: 4.0940e-05 gnorm: 1.31 [ 7:27:22<17:07:52] +[titan] 2025-10-05 06:01:42,106 - root - INFO - step: 12135 loss: 2.2796 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:01:42,106 - root - INFO - lr: 4.0933e-05 gnorm: 1.13 [ 7:27:32<17:07:40] +[titan] 2025-10-05 06:01:52,949 - root - INFO - step: 12140 loss: 2.3222 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:01:52,949 - root - INFO - lr: 4.0926e-05 gnorm: 1.09 [ 7:27:43<17:07:29] +[titan] 2025-10-05 06:02:03,787 - root - INFO - step: 12145 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:02:03,787 - root - INFO - lr: 4.0918e-05 gnorm: 1.12 [ 7:27:54<17:07:17] +[titan] 2025-10-05 06:02:12,468 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:02:14,649 - root - INFO - step: 12150 loss: 2.3633 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2765 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 06:02:14,649 - root - INFO - lr: 4.0911e-05 gnorm: 1.10 [ 7:28:05<17:07:06] +[titan] 2025-10-05 06:02:25,544 - root - INFO - step: 12155 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 06:02:25,544 - root - INFO - lr: 4.0904e-05 gnorm: 1.08 [ 7:28:16<17:06:54] +[titan] 2025-10-05 06:02:36,407 - root - INFO - step: 12160 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:02:36,407 - root - INFO - lr: 4.0897e-05 gnorm: 1.12 [ 7:28:27<17:06:43] +[titan] 2025-10-05 06:02:47,265 - root - INFO - step: 12165 loss: 2.3191 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:02:47,265 - root - INFO - lr: 4.0890e-05 gnorm: 1.13 [ 7:28:38<17:06:31] +[titan] 2025-10-05 06:02:58,124 - root - INFO - step: 12170 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0270 +[titan] 2025-10-05 06:02:58,124 - root - INFO - lr: 4.0883e-05 gnorm: 1.13 [ 7:28:48<17:06:20] +[titan] 2025-10-05 06:03:08,999 - root - INFO - step: 12175 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 06:03:08,999 - root - INFO - lr: 4.0875e-05 gnorm: 1.10 [ 7:28:59<17:06:08] +[titan] 2025-10-05 06:03:19,863 - root - INFO - step: 12180 loss: 2.3860 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1055 +[titan] 2025-10-05 06:03:19,864 - root - INFO - lr: 4.0868e-05 gnorm: 1.08 [ 7:29:10<17:05:57] +[titan] 2025-10-05 06:03:30,733 - root - INFO - step: 12185 loss: 2.2786 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 06:03:30,733 - root - INFO - lr: 4.0861e-05 gnorm: 1.09 [ 7:29:21<17:05:45] +[titan] 2025-10-05 06:03:41,601 - root - INFO - step: 12190 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 06:03:41,601 - root - INFO - lr: 4.0854e-05 gnorm: 1.13 [ 7:29:32<17:05:34] +[titan] 2025-10-05 06:03:52,503 - root - INFO - step: 12195 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9906 +[titan] 2025-10-05 06:03:52,503 - root - INFO - lr: 4.0847e-05 gnorm: 1.13 [ 7:29:43<17:05:22] +[titan] 2025-10-05 06:04:01,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:04:03,365 - root - INFO - step: 12200 loss: 2.3747 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0972 +[titan] 2025-10-05 06:04:03,365 - root - INFO - lr: 4.0839e-05 gnorm: 1.12 [ 7:29:54<17:05:11] +[titan] 2025-10-05 06:04:14,208 - root - INFO - step: 12205 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0049 +[titan] 2025-10-05 06:04:14,208 - root - INFO - lr: 4.0832e-05 gnorm: 1.10 [ 7:30:04<17:04:59] +[titan] 2025-10-05 06:04:25,065 - root - INFO - step: 12210 loss: 2.3060 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:04:25,065 - root - INFO - lr: 4.0825e-05 gnorm: 1.06 [ 7:30:15<17:04:48] +[titan] 2025-10-05 06:04:35,929 - root - INFO - step: 12215 loss: 2.2793 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 06:04:35,929 - root - INFO - lr: 4.0818e-05 gnorm: 1.04 [ 7:30:26<17:04:36] +[titan] 2025-10-05 06:04:46,809 - root - INFO - step: 12220 loss: 2.3271 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 06:04:46,809 - root - INFO - lr: 4.0811e-05 gnorm: 1.14 [ 7:30:37<17:04:25] +[titan] 2025-10-05 06:04:57,691 - root - INFO - step: 12225 loss: 2.2624 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9984 +[titan] 2025-10-05 06:04:57,691 - root - INFO - lr: 4.0803e-05 gnorm: 1.17 [ 7:30:48<17:04:13] +[titan] 2025-10-05 06:05:08,549 - root - INFO - step: 12230 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 06:05:08,549 - root - INFO - lr: 4.0796e-05 gnorm: 1.09 [ 7:30:59<17:04:02] +[titan] 2025-10-05 06:05:19,441 - root - INFO - step: 12235 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0739 +[titan] 2025-10-05 06:05:19,441 - root - INFO - lr: 4.0789e-05 gnorm: 1.10 [ 7:31:10<17:03:50] +[titan] 2025-10-05 06:05:30,318 - root - INFO - step: 12240 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 06:05:30,318 - root - INFO - lr: 4.0782e-05 gnorm: 1.09 [ 7:31:21<17:03:39] +[titan] 2025-10-05 06:05:41,191 - root - INFO - step: 12245 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 06:05:41,191 - root - INFO - lr: 4.0775e-05 gnorm: 1.08 [ 7:31:31<17:03:27] +[titan] 2025-10-05 06:05:49,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:05:52,078 - root - INFO - step: 12250 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:05:52,079 - root - INFO - lr: 4.0767e-05 gnorm: 1.17 [ 7:31:42<17:03:16] +[titan] 2025-10-05 06:06:02,966 - root - INFO - step: 12255 loss: 2.3830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 06:06:02,966 - root - INFO - lr: 4.0760e-05 gnorm: 1.12 [ 7:31:53<17:03:04] +[titan] 2025-10-05 06:06:13,829 - root - INFO - step: 12260 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9702 +[titan] 2025-10-05 06:06:13,829 - root - INFO - lr: 4.0753e-05 gnorm: 1.10 [ 7:32:04<17:02:53] +[titan] 2025-10-05 06:06:24,716 - root - INFO - step: 12265 loss: 2.3897 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1102 +[titan] 2025-10-05 06:06:24,716 - root - INFO - lr: 4.0746e-05 gnorm: 1.13 [ 7:32:15<17:02:41] +[titan] 2025-10-05 06:06:35,605 - root - INFO - step: 12270 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0629 +[titan] 2025-10-05 06:06:35,605 - root - INFO - lr: 4.0739e-05 gnorm: 1.15 [ 7:32:26<17:02:30] +[titan] 2025-10-05 06:06:46,502 - root - INFO - step: 12275 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 06:06:46,502 - root - INFO - lr: 4.0731e-05 gnorm: 1.17 [ 7:32:37<17:02:18] +[titan] 2025-10-05 06:06:57,383 - root - INFO - step: 12280 loss: 2.3419 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0674 +[titan] 2025-10-05 06:06:57,383 - root - INFO - lr: 4.0724e-05 gnorm: 1.16 [ 7:32:48<17:02:07] +[titan] 2025-10-05 06:07:08,352 - root - INFO - step: 12285 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.44 mfu: 41.91% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 06:07:08,353 - root - INFO - lr: 4.0717e-05 gnorm: 1.14 [ 7:32:59<17:01:56] +[titan] 2025-10-05 06:07:15,074 - root - INFO - Dumping profiler traces at step 12288 +[titan] 2025-10-05 06:07:15,113 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:07:19,500 - root - INFO - step: 12290 loss: 2.3565 memory: 118.84GiB(85.28%) tps: 29,395 tflops: 407.81 mfu: 41.23% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:07:19,500 - root - INFO - lr: 4.0710e-05 gnorm: 1.08 [ 7:33:10<17:01:45] +[titan] 2025-10-05 06:07:30,465 - root - INFO - step: 12295 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 29,886 tflops: 414.62 mfu: 41.92% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 06:07:30,465 - root - INFO - lr: 4.0702e-05 gnorm: 1.07 [ 7:33:21<17:01:33] +[titan] 2025-10-05 06:07:39,154 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:07:41,347 - root - INFO - step: 12300 loss: 2.3244 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:07:41,347 - root - INFO - lr: 4.0695e-05 gnorm: 1.16 [ 7:33:32<17:01:22] +[titan] 2025-10-05 06:07:52,196 - root - INFO - step: 12305 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 06:07:52,196 - root - INFO - lr: 4.0688e-05 gnorm: 1.09 [ 7:33:42<17:01:10] +[titan] 2025-10-05 06:08:03,050 - root - INFO - step: 12310 loss: 2.3555 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:08:03,050 - root - INFO - lr: 4.0681e-05 gnorm: 1.12 [ 7:33:53<17:00:59] +[titan] 2025-10-05 06:08:13,913 - root - INFO - step: 12315 loss: 2.3066 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0365 +[titan] 2025-10-05 06:08:13,914 - root - INFO - lr: 4.0674e-05 gnorm: 1.08 [ 7:34:04<17:00:47] +[titan] 2025-10-05 06:08:24,841 - root - INFO - step: 12320 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0745 +[titan] 2025-10-05 06:08:24,842 - root - INFO - lr: 4.0666e-05 gnorm: 1.11 [ 7:34:15<17:00:36] +[titan] 2025-10-05 06:08:35,938 - root - INFO - step: 12325 loss: 2.4352 memory: 118.84GiB(85.28%) tps: 29,531 tflops: 409.69 mfu: 41.42% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1501 +[titan] 2025-10-05 06:08:35,938 - root - INFO - lr: 4.0659e-05 gnorm: 1.15 [ 7:34:26<17:00:25] +[titan] 2025-10-05 06:08:46,800 - root - INFO - step: 12330 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:08:46,800 - root - INFO - lr: 4.0652e-05 gnorm: 1.09 [ 7:34:37<17:00:14] +[titan] 2025-10-05 06:08:57,665 - root - INFO - step: 12335 loss: 2.3478 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 06:08:57,665 - root - INFO - lr: 4.0645e-05 gnorm: 1.09 [ 7:34:48<17:00:02] +[titan] 2025-10-05 06:09:08,538 - root - INFO - step: 12340 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0577 +[titan] 2025-10-05 06:09:08,539 - root - INFO - lr: 4.0637e-05 gnorm: 1.13 [ 7:34:59<16:59:51] +[titan] 2025-10-05 06:09:19,441 - root - INFO - step: 12345 loss: 2.3988 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1189 +[titan] 2025-10-05 06:09:19,441 - root - INFO - lr: 4.0630e-05 gnorm: 1.13 [ 7:35:10<16:59:39] +[titan] 2025-10-05 06:09:28,178 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:09:30,418 - root - INFO - step: 12350 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.17 mfu: 41.88% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0382 +[titan] 2025-10-05 06:09:30,418 - root - INFO - lr: 4.0623e-05 gnorm: 1.12 [ 7:35:21<16:59:28] +[titan] 2025-10-05 06:09:41,340 - root - INFO - step: 12355 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0087 +[titan] 2025-10-05 06:09:41,341 - root - INFO - lr: 4.0616e-05 gnorm: 1.16 [ 7:35:32<16:59:17] +[titan] 2025-10-05 06:09:52,209 - root - INFO - step: 12360 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0389 +[titan] 2025-10-05 06:09:52,209 - root - INFO - lr: 4.0608e-05 gnorm: 1.09 [ 7:35:42<16:59:05] +[titan] 2025-10-05 06:10:03,072 - root - INFO - step: 12365 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 06:10:03,073 - root - INFO - lr: 4.0601e-05 gnorm: 1.09 [ 7:35:53<16:58:54] +[titan] 2025-10-05 06:10:13,928 - root - INFO - step: 12370 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 06:10:13,928 - root - INFO - lr: 4.0594e-05 gnorm: 1.09 [ 7:36:04<16:58:42] +[titan] 2025-10-05 06:10:24,802 - root - INFO - step: 12375 loss: 2.3408 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:10:24,803 - root - INFO - lr: 4.0587e-05 gnorm: 1.10 [ 7:36:15<16:58:31] +[titan] 2025-10-05 06:10:35,777 - root - INFO - step: 12380 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.24 mfu: 41.88% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:10:35,777 - root - INFO - lr: 4.0579e-05 gnorm: 1.08 [ 7:36:26<16:58:19] +[titan] 2025-10-05 06:10:46,648 - root - INFO - step: 12385 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:10:46,649 - root - INFO - lr: 4.0572e-05 gnorm: 1.13 [ 7:36:37<16:58:08] +[titan] 2025-10-05 06:10:57,506 - root - INFO - step: 12390 loss: 2.3730 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 06:10:57,506 - root - INFO - lr: 4.0565e-05 gnorm: 1.14 [ 7:36:48<16:57:56] +[titan] 2025-10-05 06:11:08,373 - root - INFO - step: 12395 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:11:08,373 - root - INFO - lr: 4.0558e-05 gnorm: 1.06 [ 7:36:59<16:57:45] +[titan] 2025-10-05 06:11:17,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:11:19,239 - root - INFO - step: 12400 loss: 2.3820 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 06:11:19,240 - root - INFO - lr: 4.0550e-05 gnorm: 1.12 [ 7:37:09<16:57:33] +[titan] 2025-10-05 06:11:30,093 - root - INFO - step: 12405 loss: 2.3346 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0597 +[titan] 2025-10-05 06:11:30,094 - root - INFO - lr: 4.0543e-05 gnorm: 1.09 [ 7:37:20<16:57:22] +[titan] 2025-10-05 06:11:41,037 - root - INFO - step: 12410 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0232 +[titan] 2025-10-05 06:11:41,037 - root - INFO - lr: 4.0536e-05 gnorm: 1.14 [ 7:37:31<16:57:10] +[titan] 2025-10-05 06:11:51,926 - root - INFO - step: 12415 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0550 +[titan] 2025-10-05 06:11:51,926 - root - INFO - lr: 4.0528e-05 gnorm: 1.18 [ 7:37:42<16:56:59] +[titan] 2025-10-05 06:12:02,805 - root - INFO - step: 12420 loss: 2.3265 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 06:12:02,806 - root - INFO - lr: 4.0521e-05 gnorm: 1.08 [ 7:37:53<16:56:48] +[titan] 2025-10-05 06:12:13,684 - root - INFO - step: 12425 loss: 2.3185 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0481 +[titan] 2025-10-05 06:12:13,684 - root - INFO - lr: 4.0514e-05 gnorm: 1.14 [ 7:38:04<16:56:36] +[titan] 2025-10-05 06:12:24,578 - root - INFO - step: 12430 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:12:24,578 - root - INFO - lr: 4.0507e-05 gnorm: 1.12 [ 7:38:15<16:56:25] +[titan] 2025-10-05 06:12:35,467 - root - INFO - step: 12435 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0765 +[titan] 2025-10-05 06:12:35,468 - root - INFO - lr: 4.0499e-05 gnorm: 1.10 [ 7:38:26<16:56:13] +[titan] 2025-10-05 06:12:46,337 - root - INFO - step: 12440 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:12:46,337 - root - INFO - lr: 4.0492e-05 gnorm: 1.07 [ 7:38:37<16:56:02] +[titan] 2025-10-05 06:12:57,242 - root - INFO - step: 12445 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 06:12:57,242 - root - INFO - lr: 4.0485e-05 gnorm: 1.08 [ 7:38:47<16:55:50] +[titan] 2025-10-05 06:13:05,952 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:13:08,132 - root - INFO - step: 12450 loss: 2.3232 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0520 +[titan] 2025-10-05 06:13:08,132 - root - INFO - lr: 4.0477e-05 gnorm: 1.09 [ 7:38:58<16:55:39] +[titan] 2025-10-05 06:13:19,019 - root - INFO - step: 12455 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:13:19,019 - root - INFO - lr: 4.0470e-05 gnorm: 1.08 [ 7:39:09<16:55:27] +[titan] 2025-10-05 06:13:29,895 - root - INFO - step: 12460 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:13:29,895 - root - INFO - lr: 4.0463e-05 gnorm: 1.12 [ 7:39:20<16:55:16] +[titan] 2025-10-05 06:13:40,820 - root - INFO - step: 12465 loss: 2.3135 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0425 +[titan] 2025-10-05 06:13:40,820 - root - INFO - lr: 4.0456e-05 gnorm: 1.11 [ 7:39:31<16:55:05] +[titan] 2025-10-05 06:13:51,710 - root - INFO - step: 12470 loss: 2.3792 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 06:13:51,710 - root - INFO - lr: 4.0448e-05 gnorm: 1.07 [ 7:39:42<16:54:53] +[titan] 2025-10-05 06:14:02,592 - root - INFO - step: 12475 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0962 +[titan] 2025-10-05 06:14:02,592 - root - INFO - lr: 4.0441e-05 gnorm: 1.11 [ 7:39:53<16:54:42] +[titan] 2025-10-05 06:14:13,496 - root - INFO - step: 12480 loss: 2.2332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9712 +[titan] 2025-10-05 06:14:13,497 - root - INFO - lr: 4.0434e-05 gnorm: 1.08 [ 7:40:04<16:54:30] +[titan] 2025-10-05 06:14:24,366 - root - INFO - step: 12485 loss: 2.3235 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 06:14:24,366 - root - INFO - lr: 4.0426e-05 gnorm: 1.11 [ 7:40:15<16:54:19] +[titan] 2025-10-05 06:14:35,268 - root - INFO - step: 12490 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0610 +[titan] 2025-10-05 06:14:35,269 - root - INFO - lr: 4.0419e-05 gnorm: 1.09 [ 7:40:25<16:54:07] +[titan] 2025-10-05 06:14:46,143 - root - INFO - step: 12495 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 06:14:46,144 - root - INFO - lr: 4.0412e-05 gnorm: 1.12 [ 7:40:36<16:53:56] +[titan] 2025-10-05 06:14:54,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:14:57,025 - root - INFO - step: 12500 loss: 2.2990 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0321 +[titan] 2025-10-05 06:14:57,025 - root - INFO - lr: 4.0404e-05 gnorm: 1.12 [ 7:40:47<16:53:45] +[titan] 2025-10-05 06:15:07,897 - root - INFO - step: 12505 loss: 2.3230 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 06:15:07,897 - root - INFO - lr: 4.0397e-05 gnorm: 1.14 [ 7:40:58<16:53:33] +[titan] 2025-10-05 06:15:18,787 - root - INFO - step: 12510 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0680 +[titan] 2025-10-05 06:15:18,788 - root - INFO - lr: 4.0390e-05 gnorm: 1.11 [ 7:41:09<16:53:22] +[titan] 2025-10-05 06:15:29,657 - root - INFO - step: 12515 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:15:29,657 - root - INFO - lr: 4.0383e-05 gnorm: 1.11 [ 7:41:20<16:53:10] +[titan] 2025-10-05 06:15:40,564 - root - INFO - step: 12520 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 06:15:40,564 - root - INFO - lr: 4.0375e-05 gnorm: 1.09 [ 7:41:31<16:52:59] +[titan] 2025-10-05 06:15:51,439 - root - INFO - step: 12525 loss: 2.2600 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 06:15:51,439 - root - INFO - lr: 4.0368e-05 gnorm: 1.12 [ 7:41:42<16:52:47] +[titan] 2025-10-05 06:16:02,309 - root - INFO - step: 12530 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2748 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 06:16:02,309 - root - INFO - lr: 4.0361e-05 gnorm: 1.12 [ 7:41:53<16:52:36] +[titan] 2025-10-05 06:16:13,212 - root - INFO - step: 12535 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:16:13,212 - root - INFO - lr: 4.0353e-05 gnorm: 1.11 [ 7:42:03<16:52:24] +[titan] 2025-10-05 06:16:24,126 - root - INFO - step: 12540 loss: 2.3391 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 06:16:24,127 - root - INFO - lr: 4.0346e-05 gnorm: 1.13 [ 7:42:14<16:52:13] +[titan] 2025-10-05 06:16:35,001 - root - INFO - step: 12545 loss: 2.3246 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 06:16:35,001 - root - INFO - lr: 4.0339e-05 gnorm: 1.12 [ 7:42:25<16:52:02] +[titan] 2025-10-05 06:16:43,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:16:45,891 - root - INFO - step: 12550 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0627 +[titan] 2025-10-05 06:16:45,891 - root - INFO - lr: 4.0331e-05 gnorm: 1.10 [ 7:42:36<16:51:50] +[titan] 2025-10-05 06:16:56,777 - root - INFO - step: 12555 loss: 2.2647 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 06:16:56,777 - root - INFO - lr: 4.0324e-05 gnorm: 1.14 [ 7:42:47<16:51:39] +[titan] 2025-10-05 06:17:07,666 - root - INFO - step: 12560 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 06:17:07,667 - root - INFO - lr: 4.0317e-05 gnorm: 1.12 [ 7:42:58<16:51:27] +[titan] 2025-10-05 06:17:18,556 - root - INFO - step: 12565 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:17:18,557 - root - INFO - lr: 4.0309e-05 gnorm: 1.11 [ 7:43:09<16:51:16] +[titan] 2025-10-05 06:17:29,439 - root - INFO - step: 12570 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 06:17:29,439 - root - INFO - lr: 4.0302e-05 gnorm: 1.11 [ 7:43:20<16:51:04] +[titan] 2025-10-05 06:17:40,372 - root - INFO - step: 12575 loss: 2.2819 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 06:17:40,372 - root - INFO - lr: 4.0295e-05 gnorm: 1.11 [ 7:43:31<16:50:53] +[titan] 2025-10-05 06:17:51,237 - root - INFO - step: 12580 loss: 2.3250 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0525 +[titan] 2025-10-05 06:17:51,238 - root - INFO - lr: 4.0287e-05 gnorm: 1.11 [ 7:43:41<16:50:42] +[titan] 2025-10-05 06:18:02,105 - root - INFO - step: 12585 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:18:02,105 - root - INFO - lr: 4.0280e-05 gnorm: 1.09 [ 7:43:52<16:50:30] +[titan] 2025-10-05 06:18:12,984 - root - INFO - step: 12590 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.0880 +[titan] 2025-10-05 06:18:12,984 - root - INFO - lr: 4.0273e-05 gnorm: 1.17 [ 7:44:03<16:50:19] +[titan] 2025-10-05 06:18:23,839 - root - INFO - step: 12595 loss: 2.3742 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0956 +[titan] 2025-10-05 06:18:23,839 - root - INFO - lr: 4.0265e-05 gnorm: 1.11 [ 7:44:14<16:50:07] +[titan] 2025-10-05 06:18:32,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:18:34,727 - root - INFO - step: 12600 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9762 +[titan] 2025-10-05 06:18:34,727 - root - INFO - lr: 4.0258e-05 gnorm: 1.14 [ 7:44:25<16:49:56] +[titan] 2025-10-05 06:18:45,664 - root - INFO - step: 12605 loss: 2.3207 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0497 +[titan] 2025-10-05 06:18:45,664 - root - INFO - lr: 4.0250e-05 gnorm: 1.17 [ 7:44:36<16:49:44] +[titan] 2025-10-05 06:18:56,552 - root - INFO - step: 12610 loss: 2.3981 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 06:18:56,552 - root - INFO - lr: 4.0243e-05 gnorm: 1.13 [ 7:44:47<16:49:33] +[titan] 2025-10-05 06:19:07,421 - root - INFO - step: 12615 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 06:19:07,421 - root - INFO - lr: 4.0236e-05 gnorm: 1.09 [ 7:44:58<16:49:22] +[titan] 2025-10-05 06:19:18,306 - root - INFO - step: 12620 loss: 2.3150 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 06:19:18,306 - root - INFO - lr: 4.0228e-05 gnorm: 1.12 [ 7:45:09<16:49:10] +[titan] 2025-10-05 06:19:29,188 - root - INFO - step: 12625 loss: 2.3979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1165 +[titan] 2025-10-05 06:19:29,189 - root - INFO - lr: 4.0221e-05 gnorm: 1.12 [ 7:45:19<16:48:59] +[titan] 2025-10-05 06:19:40,105 - root - INFO - step: 12630 loss: 2.2606 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9964 +[titan] 2025-10-05 06:19:40,105 - root - INFO - lr: 4.0214e-05 gnorm: 1.14 [ 7:45:30<16:48:47] +[titan] 2025-10-05 06:19:50,986 - root - INFO - step: 12635 loss: 2.3546 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:19:50,986 - root - INFO - lr: 4.0206e-05 gnorm: 1.11 [ 7:45:41<16:48:36] +[titan] 2025-10-05 06:20:01,908 - root - INFO - step: 12640 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:20:01,908 - root - INFO - lr: 4.0199e-05 gnorm: 1.11 [ 7:45:52<16:48:25] +[titan] 2025-10-05 06:20:12,799 - root - INFO - step: 12645 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0332 +[titan] 2025-10-05 06:20:12,799 - root - INFO - lr: 4.0192e-05 gnorm: 1.10 [ 7:46:03<16:48:13] +[titan] 2025-10-05 06:20:21,492 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:20:23,685 - root - INFO - step: 12650 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 06:20:23,685 - root - INFO - lr: 4.0184e-05 gnorm: 1.11 [ 7:46:14<16:48:02] +[titan] 2025-10-05 06:20:34,581 - root - INFO - step: 12655 loss: 2.2611 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 06:20:34,581 - root - INFO - lr: 4.0177e-05 gnorm: 1.08 [ 7:46:25<16:47:50] +[titan] 2025-10-05 06:20:45,479 - root - INFO - step: 12660 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:20:45,480 - root - INFO - lr: 4.0169e-05 gnorm: 1.09 [ 7:46:36<16:47:39] +[titan] 2025-10-05 06:20:56,352 - root - INFO - step: 12665 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:20:56,353 - root - INFO - lr: 4.0162e-05 gnorm: 1.10 [ 7:46:47<16:47:27] +[titan] 2025-10-05 06:21:07,226 - root - INFO - step: 12670 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9534 +[titan] 2025-10-05 06:21:07,226 - root - INFO - lr: 4.0155e-05 gnorm: 1.08 [ 7:46:57<16:47:16] +[titan] 2025-10-05 06:21:18,106 - root - INFO - step: 12675 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9903 +[titan] 2025-10-05 06:21:18,106 - root - INFO - lr: 4.0147e-05 gnorm: 1.16 [ 7:47:08<16:47:05] +[titan] 2025-10-05 06:21:28,978 - root - INFO - step: 12680 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 06:21:28,978 - root - INFO - lr: 4.0140e-05 gnorm: 1.11 [ 7:47:19<16:46:53] +[titan] 2025-10-05 06:21:39,844 - root - INFO - step: 12685 loss: 2.3348 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0622 +[titan] 2025-10-05 06:21:39,844 - root - INFO - lr: 4.0133e-05 gnorm: 1.13 [ 7:47:30<16:46:42] +[titan] 2025-10-05 06:21:50,731 - root - INFO - step: 12690 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 06:21:50,731 - root - INFO - lr: 4.0125e-05 gnorm: 1.14 [ 7:47:41<16:46:30] +[titan] 2025-10-05 06:22:01,611 - root - INFO - step: 12695 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 06:22:01,612 - root - INFO - lr: 4.0118e-05 gnorm: 1.10 [ 7:47:52<16:46:19] +[titan] 2025-10-05 06:22:10,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:22:12,500 - root - INFO - step: 12700 loss: 2.3396 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0653 +[titan] 2025-10-05 06:22:12,501 - root - INFO - lr: 4.0110e-05 gnorm: 1.11 [ 7:48:03<16:46:07] +[titan] 2025-10-05 06:22:23,372 - root - INFO - step: 12705 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0153 +[titan] 2025-10-05 06:22:23,372 - root - INFO - lr: 4.0103e-05 gnorm: 1.11 [ 7:48:14<16:45:56] +[titan] 2025-10-05 06:22:34,241 - root - INFO - step: 12710 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:22:34,241 - root - INFO - lr: 4.0096e-05 gnorm: 1.10 [ 7:48:24<16:45:44] +[titan] 2025-10-05 06:22:45,141 - root - INFO - step: 12715 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0103 +[titan] 2025-10-05 06:22:45,141 - root - INFO - lr: 4.0088e-05 gnorm: 1.14 [ 7:48:35<16:45:33] +[titan] 2025-10-05 06:22:56,018 - root - INFO - step: 12720 loss: 2.2452 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 06:22:56,018 - root - INFO - lr: 4.0081e-05 gnorm: 1.10 [ 7:48:46<16:45:22] +[titan] 2025-10-05 06:23:06,904 - root - INFO - step: 12725 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0456 +[titan] 2025-10-05 06:23:06,904 - root - INFO - lr: 4.0073e-05 gnorm: 1.10 [ 7:48:57<16:45:10] +[titan] 2025-10-05 06:23:17,777 - root - INFO - step: 12730 loss: 2.3547 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0804 +[titan] 2025-10-05 06:23:17,778 - root - INFO - lr: 4.0066e-05 gnorm: 1.09 [ 7:49:08<16:44:59] +[titan] 2025-10-05 06:23:28,700 - root - INFO - step: 12735 loss: 2.4579 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1714 +[titan] 2025-10-05 06:23:28,700 - root - INFO - lr: 4.0059e-05 gnorm: 1.12 [ 7:49:19<16:44:47] +[titan] 2025-10-05 06:23:39,577 - root - INFO - step: 12740 loss: 2.2807 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 06:23:39,577 - root - INFO - lr: 4.0051e-05 gnorm: 1.08 [ 7:49:30<16:44:36] +[titan] 2025-10-05 06:23:50,466 - root - INFO - step: 12745 loss: 2.2580 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9955 +[titan] 2025-10-05 06:23:50,467 - root - INFO - lr: 4.0044e-05 gnorm: 1.13 [ 7:49:41<16:44:25] +[titan] 2025-10-05 06:23:59,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:24:01,332 - root - INFO - step: 12750 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:24:01,332 - root - INFO - lr: 4.0036e-05 gnorm: 1.16 [ 7:49:52<16:44:13] +[titan] 2025-10-05 06:24:12,211 - root - INFO - step: 12755 loss: 2.3122 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 06:24:12,211 - root - INFO - lr: 4.0029e-05 gnorm: 1.10 [ 7:50:02<16:44:02] +[titan] 2025-10-05 06:24:23,070 - root - INFO - step: 12760 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 06:24:23,070 - root - INFO - lr: 4.0022e-05 gnorm: 1.11 [ 7:50:13<16:43:50] +[titan] 2025-10-05 06:24:33,960 - root - INFO - step: 12765 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0308 +[titan] 2025-10-05 06:24:33,960 - root - INFO - lr: 4.0014e-05 gnorm: 1.11 [ 7:50:24<16:43:39] +[titan] 2025-10-05 06:24:44,855 - root - INFO - step: 12770 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0202 +[titan] 2025-10-05 06:24:44,855 - root - INFO - lr: 4.0007e-05 gnorm: 1.10 [ 7:50:35<16:43:27] +[titan] 2025-10-05 06:24:55,732 - root - INFO - step: 12775 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 06:24:55,732 - root - INFO - lr: 3.9999e-05 gnorm: 1.13 [ 7:50:46<16:43:16] +[titan] 2025-10-05 06:25:06,578 - root - INFO - step: 12780 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:06,578 - root - INFO - lr: 3.9992e-05 gnorm: 1.10 [ 7:50:57<16:43:04] +[titan] 2025-10-05 06:25:17,446 - root - INFO - step: 12785 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0703 +[titan] 2025-10-05 06:25:17,446 - root - INFO - lr: 3.9984e-05 gnorm: 1.15 [ 7:51:08<16:42:53] +[titan] 2025-10-05 06:25:28,322 - root - INFO - step: 12790 loss: 2.1995 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 06:25:28,322 - root - INFO - lr: 3.9977e-05 gnorm: 1.08 [ 7:51:19<16:42:42] +[titan] 2025-10-05 06:25:39,196 - root - INFO - step: 12795 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0818 +[titan] 2025-10-05 06:25:39,196 - root - INFO - lr: 3.9970e-05 gnorm: 1.11 [ 7:51:29<16:42:30] +[titan] 2025-10-05 06:25:48,007 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:25:50,190 - root - INFO - step: 12800 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:50,191 - root - INFO - lr: 3.9962e-05 gnorm: 1.12 [ 7:51:40<16:42:19] +[titan] 2025-10-05 06:25:50,380 - root - INFO - Dumping profiler traces at step 12800 +[titan] 2025-10-05 06:25:50,419 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:26:01,279 - root - INFO - step: 12805 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 29,551 tflops: 409.98 mfu: 41.45% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 06:26:01,280 - root - INFO - lr: 3.9955e-05 gnorm: 1.13 [ 7:51:51<16:42:08] +[titan] 2025-10-05 06:26:12,154 - root - INFO - step: 12810 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:26:12,154 - root - INFO - lr: 3.9947e-05 gnorm: 1.08 [ 7:52:02<16:41:56] +[titan] 2025-10-05 06:26:23,032 - root - INFO - step: 12815 loss: 2.3306 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0579 +[titan] 2025-10-05 06:26:23,033 - root - INFO - lr: 3.9940e-05 gnorm: 1.06 [ 7:52:13<16:41:45] +[titan] 2025-10-05 06:26:33,940 - root - INFO - step: 12820 loss: 2.3775 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0989 +[titan] 2025-10-05 06:26:33,940 - root - INFO - lr: 3.9932e-05 gnorm: 1.15 [ 7:52:24<16:41:34] +[titan] 2025-10-05 06:26:44,836 - root - INFO - step: 12825 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0008 +[titan] 2025-10-05 06:26:44,836 - root - INFO - lr: 3.9925e-05 gnorm: 1.05 [ 7:52:35<16:41:22] +[titan] 2025-10-05 06:26:55,799 - root - INFO - step: 12830 loss: 2.3367 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0630 +[titan] 2025-10-05 06:26:55,799 - root - INFO - lr: 3.9918e-05 gnorm: 1.14 [ 7:52:46<16:41:11] +[titan] 2025-10-05 06:27:06,678 - root - INFO - step: 12835 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 06:27:06,678 - root - INFO - lr: 3.9910e-05 gnorm: 1.07 [ 7:52:57<16:41:00] +[titan] 2025-10-05 06:27:17,590 - root - INFO - step: 12840 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:27:17,590 - root - INFO - lr: 3.9903e-05 gnorm: 1.10 [ 7:53:08<16:40:48] +[titan] 2025-10-05 06:27:28,495 - root - INFO - step: 12845 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0092 +[titan] 2025-10-05 06:27:28,495 - root - INFO - lr: 3.9895e-05 gnorm: 1.10 [ 7:53:19<16:40:37] +[titan] 2025-10-05 06:27:37,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:27:39,368 - root - INFO - step: 12850 loss: 2.2958 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0276 +[titan] 2025-10-05 06:27:39,368 - root - INFO - lr: 3.9888e-05 gnorm: 1.09 [ 7:53:30<16:40:25] +[titan] 2025-10-05 06:27:50,338 - root - INFO - step: 12855 loss: 2.2825 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0128 +[titan] 2025-10-05 06:27:50,339 - root - INFO - lr: 3.9880e-05 gnorm: 1.16 [ 7:53:41<16:40:14] +[titan] 2025-10-05 06:28:01,245 - root - INFO - step: 12860 loss: 2.3056 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:28:01,245 - root - INFO - lr: 3.9873e-05 gnorm: 1.08 [ 7:53:51<16:40:03] +[titan] 2025-10-05 06:28:12,135 - root - INFO - step: 12865 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9912 +[titan] 2025-10-05 06:28:12,135 - root - INFO - lr: 3.9865e-05 gnorm: 1.10 [ 7:54:02<16:39:51] +[titan] 2025-10-05 06:28:23,005 - root - INFO - step: 12870 loss: 2.3501 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 06:28:23,005 - root - INFO - lr: 3.9858e-05 gnorm: 1.07 [ 7:54:13<16:39:40] +[titan] 2025-10-05 06:28:33,877 - root - INFO - step: 12875 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0229 +[titan] 2025-10-05 06:28:33,877 - root - INFO - lr: 3.9850e-05 gnorm: 1.13 [ 7:54:24<16:39:29] +[titan] 2025-10-05 06:28:44,761 - root - INFO - step: 12880 loss: 2.3117 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:28:44,761 - root - INFO - lr: 3.9843e-05 gnorm: 1.15 [ 7:54:35<16:39:17] +[titan] 2025-10-05 06:28:55,685 - root - INFO - step: 12885 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:28:55,685 - root - INFO - lr: 3.9836e-05 gnorm: 1.11 [ 7:54:46<16:39:06] +[titan] 2025-10-05 06:29:06,556 - root - INFO - step: 12890 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:29:06,556 - root - INFO - lr: 3.9828e-05 gnorm: 1.12 [ 7:54:57<16:38:54] +[titan] 2025-10-05 06:29:17,466 - root - INFO - step: 12895 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:29:17,467 - root - INFO - lr: 3.9821e-05 gnorm: 1.11 [ 7:55:08<16:38:43] +[titan] 2025-10-05 06:29:26,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:29:28,342 - root - INFO - step: 12900 loss: 2.3579 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0826 +[titan] 2025-10-05 06:29:28,342 - root - INFO - lr: 3.9813e-05 gnorm: 1.11 [ 7:55:19<16:38:32] +[titan] 2025-10-05 06:29:39,206 - root - INFO - step: 12905 loss: 2.2414 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 06:29:39,206 - root - INFO - lr: 3.9806e-05 gnorm: 1.08 [ 7:55:29<16:38:20] +[titan] 2025-10-05 06:29:50,114 - root - INFO - step: 12910 loss: 2.2702 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:29:50,114 - root - INFO - lr: 3.9798e-05 gnorm: 1.13 [ 7:55:40<16:38:09] +[titan] 2025-10-05 06:30:00,993 - root - INFO - step: 12915 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0072 +[titan] 2025-10-05 06:30:00,993 - root - INFO - lr: 3.9791e-05 gnorm: 1.07 [ 7:55:51<16:37:57] +[titan] 2025-10-05 06:30:11,897 - root - INFO - step: 12920 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0307 +[titan] 2025-10-05 06:30:11,898 - root - INFO - lr: 3.9783e-05 gnorm: 1.04 [ 7:56:02<16:37:46] +[titan] 2025-10-05 06:30:22,817 - root - INFO - step: 12925 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0581 +[titan] 2025-10-05 06:30:22,817 - root - INFO - lr: 3.9776e-05 gnorm: 1.13 [ 7:56:13<16:37:35] +[titan] 2025-10-05 06:30:33,737 - root - INFO - step: 12930 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:30:33,737 - root - INFO - lr: 3.9768e-05 gnorm: 1.13 [ 7:56:24<16:37:23] +[titan] 2025-10-05 06:30:44,627 - root - INFO - step: 12935 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:30:44,627 - root - INFO - lr: 3.9761e-05 gnorm: 1.10 [ 7:56:35<16:37:12] +[titan] 2025-10-05 06:30:55,585 - root - INFO - step: 12940 loss: 2.3356 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 06:30:55,585 - root - INFO - lr: 3.9753e-05 gnorm: 1.11 [ 7:56:46<16:37:01] +[titan] 2025-10-05 06:31:06,518 - root - INFO - step: 12945 loss: 2.2859 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:31:06,518 - root - INFO - lr: 3.9746e-05 gnorm: 1.10 [ 7:56:57<16:36:49] +[titan] 2025-10-05 06:31:15,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:31:17,447 - root - INFO - step: 12950 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0289 +[titan] 2025-10-05 06:31:17,447 - root - INFO - lr: 3.9738e-05 gnorm: 1.11 [ 7:57:08<16:36:38] +[titan] 2025-10-05 06:31:28,381 - root - INFO - step: 12955 loss: 2.3005 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 06:31:28,382 - root - INFO - lr: 3.9731e-05 gnorm: 1.07 [ 7:57:19<16:36:27] +[titan] 2025-10-05 06:31:39,314 - root - INFO - step: 12960 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0616 +[titan] 2025-10-05 06:31:39,314 - root - INFO - lr: 3.9723e-05 gnorm: 1.13 [ 7:57:29<16:36:15] +[titan] 2025-10-05 06:31:50,231 - root - INFO - step: 12965 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 06:31:50,231 - root - INFO - lr: 3.9716e-05 gnorm: 1.14 [ 7:57:40<16:36:04] +[titan] 2025-10-05 06:32:01,132 - root - INFO - step: 12970 loss: 2.3312 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:32:01,132 - root - INFO - lr: 3.9708e-05 gnorm: 1.16 [ 7:57:51<16:35:53] +[titan] 2025-10-05 06:32:12,023 - root - INFO - step: 12975 loss: 2.2497 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 06:32:12,023 - root - INFO - lr: 3.9701e-05 gnorm: 1.13 [ 7:58:02<16:35:41] +[titan] 2025-10-05 06:32:22,921 - root - INFO - step: 12980 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0314 +[titan] 2025-10-05 06:32:22,922 - root - INFO - lr: 3.9693e-05 gnorm: 1.08 [ 7:58:13<16:35:30] +[titan] 2025-10-05 06:32:33,791 - root - INFO - step: 12985 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 06:32:33,791 - root - INFO - lr: 3.9686e-05 gnorm: 1.06 [ 7:58:24<16:35:18] +[titan] 2025-10-05 06:32:44,706 - root - INFO - step: 12990 loss: 2.3628 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 06:32:44,706 - root - INFO - lr: 3.9678e-05 gnorm: 1.12 [ 7:58:35<16:35:07] +[titan] 2025-10-05 06:32:55,609 - root - INFO - step: 12995 loss: 2.2830 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:32:55,609 - root - INFO - lr: 3.9671e-05 gnorm: 1.13 [ 7:58:46<16:34:56] +[titan] 2025-10-05 06:33:04,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:33:06,487 - root - INFO - step: 13000 loss: 2.2887 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:33:06,487 - root - INFO - lr: 3.9663e-05 gnorm: 1.09 [ 7:58:57<16:34:44] +[titan] 2025-10-05 06:33:17,365 - root - INFO - step: 13005 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9733 +[titan] 2025-10-05 06:33:17,365 - root - INFO - lr: 3.9656e-05 gnorm: 1.09 [ 7:59:08<16:34:33] +[titan] 2025-10-05 06:33:28,255 - root - INFO - step: 13010 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0677 +[titan] 2025-10-05 06:33:28,255 - root - INFO - lr: 3.9648e-05 gnorm: 1.10 [ 7:59:18<16:34:22] +[titan] 2025-10-05 06:33:39,107 - root - INFO - step: 13015 loss: 2.3870 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 06:33:39,107 - root - INFO - lr: 3.9641e-05 gnorm: 1.14 [ 7:59:29<16:34:10] +[titan] 2025-10-05 06:33:49,999 - root - INFO - step: 13020 loss: 2.2362 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9754 +[titan] 2025-10-05 06:33:49,999 - root - INFO - lr: 3.9633e-05 gnorm: 1.04 [ 7:59:40<16:33:59] +[titan] 2025-10-05 06:34:00,906 - root - INFO - step: 13025 loss: 2.3058 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0372 +[titan] 2025-10-05 06:34:00,906 - root - INFO - lr: 3.9626e-05 gnorm: 1.10 [ 7:59:51<16:33:47] +[titan] 2025-10-05 06:34:11,756 - root - INFO - step: 13030 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9901 +[titan] 2025-10-05 06:34:11,756 - root - INFO - lr: 3.9618e-05 gnorm: 1.10 [ 8:00:02<16:33:36] +[titan] 2025-10-05 06:34:22,620 - root - INFO - step: 13035 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:34:22,620 - root - INFO - lr: 3.9611e-05 gnorm: 1.07 [ 8:00:13<16:33:24] +[titan] 2025-10-05 06:34:33,499 - root - INFO - step: 13040 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:34:33,499 - root - INFO - lr: 3.9603e-05 gnorm: 1.10 [ 8:00:24<16:33:13] +[titan] 2025-10-05 06:34:44,364 - root - INFO - step: 13045 loss: 2.3062 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 06:34:44,365 - root - INFO - lr: 3.9596e-05 gnorm: 1.12 [ 8:00:35<16:33:02] +[titan] 2025-10-05 06:34:53,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:34:55,288 - root - INFO - step: 13050 loss: 2.2984 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:34:55,289 - root - INFO - lr: 3.9588e-05 gnorm: 1.10 [ 8:00:45<16:32:50] +[titan] 2025-10-05 06:35:06,196 - root - INFO - step: 13055 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9765 +[titan] 2025-10-05 06:35:06,197 - root - INFO - lr: 3.9581e-05 gnorm: 1.05 [ 8:00:56<16:32:39] +[titan] 2025-10-05 06:35:17,080 - root - INFO - step: 13060 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 06:35:17,080 - root - INFO - lr: 3.9573e-05 gnorm: 1.10 [ 8:01:07<16:32:28] +[titan] 2025-10-05 06:35:27,969 - root - INFO - step: 13065 loss: 2.2499 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:35:27,969 - root - INFO - lr: 3.9566e-05 gnorm: 1.09 [ 8:01:18<16:32:16] +[titan] 2025-10-05 06:35:38,866 - root - INFO - step: 13070 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0717 +[titan] 2025-10-05 06:35:38,867 - root - INFO - lr: 3.9558e-05 gnorm: 1.12 [ 8:01:29<16:32:05] +[titan] 2025-10-05 06:35:49,752 - root - INFO - step: 13075 loss: 2.3177 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0458 +[titan] 2025-10-05 06:35:49,752 - root - INFO - lr: 3.9551e-05 gnorm: 1.11 [ 8:01:40<16:31:53] +[titan] 2025-10-05 06:36:00,668 - root - INFO - step: 13080 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 06:36:00,669 - root - INFO - lr: 3.9543e-05 gnorm: 1.07 [ 8:01:51<16:31:42] +[titan] 2025-10-05 06:36:11,580 - root - INFO - step: 13085 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 06:36:11,580 - root - INFO - lr: 3.9535e-05 gnorm: 1.11 [ 8:02:02<16:31:31] +[titan] 2025-10-05 06:36:22,465 - root - INFO - step: 13090 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:36:22,465 - root - INFO - lr: 3.9528e-05 gnorm: 1.09 [ 8:02:13<16:31:19] +[titan] 2025-10-05 06:36:33,326 - root - INFO - step: 13095 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9630 +[titan] 2025-10-05 06:36:33,326 - root - INFO - lr: 3.9520e-05 gnorm: 1.10 [ 8:02:23<16:31:08] +[titan] 2025-10-05 06:36:41,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:36:44,174 - root - INFO - step: 13100 loss: 2.3105 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:36:44,174 - root - INFO - lr: 3.9513e-05 gnorm: 1.14 [ 8:02:34<16:30:56] +[titan] 2025-10-05 06:36:55,075 - root - INFO - step: 13105 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:36:55,075 - root - INFO - lr: 3.9505e-05 gnorm: 1.18 [ 8:02:45<16:30:45] +[titan] 2025-10-05 06:37:05,918 - root - INFO - step: 13110 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 06:37:05,918 - root - INFO - lr: 3.9498e-05 gnorm: 1.08 [ 8:02:56<16:30:34] +[titan] 2025-10-05 06:37:16,786 - root - INFO - step: 13115 loss: 2.2582 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 06:37:16,786 - root - INFO - lr: 3.9490e-05 gnorm: 1.10 [ 8:03:07<16:30:22] +[titan] 2025-10-05 06:37:27,685 - root - INFO - step: 13120 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 06:37:27,685 - root - INFO - lr: 3.9483e-05 gnorm: 1.11 [ 8:03:18<16:30:11] +[titan] 2025-10-05 06:37:38,554 - root - INFO - step: 13125 loss: 2.3124 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 06:37:38,554 - root - INFO - lr: 3.9475e-05 gnorm: 1.10 [ 8:03:29<16:29:59] +[titan] 2025-10-05 06:37:49,418 - root - INFO - step: 13130 loss: 2.3195 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:37:49,418 - root - INFO - lr: 3.9468e-05 gnorm: 1.09 [ 8:03:40<16:29:48] +[titan] 2025-10-05 06:38:00,337 - root - INFO - step: 13135 loss: 2.2981 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:38:00,337 - root - INFO - lr: 3.9460e-05 gnorm: 1.09 [ 8:03:50<16:29:37] +[titan] 2025-10-05 06:38:11,199 - root - INFO - step: 13140 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9924 +[titan] 2025-10-05 06:38:11,200 - root - INFO - lr: 3.9452e-05 gnorm: 1.06 [ 8:04:01<16:29:25] +[titan] 2025-10-05 06:38:22,080 - root - INFO - step: 13145 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2759 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 06:38:22,080 - root - INFO - lr: 3.9445e-05 gnorm: 1.08 [ 8:04:12<16:29:14] +[titan] 2025-10-05 06:38:30,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:38:32,990 - root - INFO - step: 13150 loss: 2.2897 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0219 +[titan] 2025-10-05 06:38:32,990 - root - INFO - lr: 3.9437e-05 gnorm: 1.12 [ 8:04:23<16:29:02] +[titan] 2025-10-05 06:38:43,859 - root - INFO - step: 13155 loss: 2.2817 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0147 +[titan] 2025-10-05 06:38:43,859 - root - INFO - lr: 3.9430e-05 gnorm: 1.08 [ 8:04:34<16:28:51] +[titan] 2025-10-05 06:38:54,735 - root - INFO - step: 13160 loss: 2.3131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0428 +[titan] 2025-10-05 06:38:54,736 - root - INFO - lr: 3.9422e-05 gnorm: 1.11 [ 8:04:45<16:28:40] +[titan] 2025-10-05 06:39:05,628 - root - INFO - step: 13165 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 06:39:05,629 - root - INFO - lr: 3.9415e-05 gnorm: 1.10 [ 8:04:56<16:28:28] +[titan] 2025-10-05 06:39:16,489 - root - INFO - step: 13170 loss: 2.3292 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:39:16,489 - root - INFO - lr: 3.9407e-05 gnorm: 1.11 [ 8:05:07<16:28:17] +[titan] 2025-10-05 06:39:27,377 - root - INFO - step: 13175 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9764 +[titan] 2025-10-05 06:39:27,377 - root - INFO - lr: 3.9399e-05 gnorm: 1.07 [ 8:05:18<16:28:05] +[titan] 2025-10-05 06:39:38,260 - root - INFO - step: 13180 loss: 2.2929 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0261 +[titan] 2025-10-05 06:39:38,260 - root - INFO - lr: 3.9392e-05 gnorm: 1.18 [ 8:05:28<16:27:54] +[titan] 2025-10-05 06:39:49,151 - root - INFO - step: 13185 loss: 2.2880 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0212 +[titan] 2025-10-05 06:39:49,152 - root - INFO - lr: 3.9384e-05 gnorm: 1.13 [ 8:05:39<16:27:43] +[titan] 2025-10-05 06:40:00,050 - root - INFO - step: 13190 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 06:40:00,050 - root - INFO - lr: 3.9377e-05 gnorm: 1.11 [ 8:05:50<16:27:31] +[titan] 2025-10-05 06:40:10,934 - root - INFO - step: 13195 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9939 +[titan] 2025-10-05 06:40:10,934 - root - INFO - lr: 3.9369e-05 gnorm: 1.10 [ 8:06:01<16:27:20] +[titan] 2025-10-05 06:40:19,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:40:21,820 - root - INFO - step: 13200 loss: 2.2675 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0021 +[titan] 2025-10-05 06:40:21,820 - root - INFO - lr: 3.9362e-05 gnorm: 1.13 [ 8:06:12<16:27:08] +[titan] 2025-10-05 06:40:32,683 - root - INFO - step: 13205 loss: 2.3004 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:40:32,683 - root - INFO - lr: 3.9354e-05 gnorm: 1.11 [ 8:06:23<16:26:57] +[titan] 2025-10-05 06:40:43,552 - root - INFO - step: 13210 loss: 2.3321 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0596 +[titan] 2025-10-05 06:40:43,552 - root - INFO - lr: 3.9346e-05 gnorm: 1.09 [ 8:06:34<16:26:46] +[titan] 2025-10-05 06:40:54,441 - root - INFO - step: 13215 loss: 2.3746 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 06:40:54,441 - root - INFO - lr: 3.9339e-05 gnorm: 1.09 [ 8:06:45<16:26:34] +[titan] 2025-10-05 06:41:05,315 - root - INFO - step: 13220 loss: 2.3394 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0660 +[titan] 2025-10-05 06:41:05,315 - root - INFO - lr: 3.9331e-05 gnorm: 1.13 [ 8:06:55<16:26:23] +[titan] 2025-10-05 06:41:16,174 - root - INFO - step: 13225 loss: 2.2522 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 06:41:16,174 - root - INFO - lr: 3.9324e-05 gnorm: 1.10 [ 8:07:06<16:26:11] +[titan] 2025-10-05 06:41:27,031 - root - INFO - step: 13230 loss: 2.2903 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:41:27,031 - root - INFO - lr: 3.9316e-05 gnorm: 1.10 [ 8:07:17<16:26:00] +[titan] 2025-10-05 06:41:37,890 - root - INFO - step: 13235 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0078 +[titan] 2025-10-05 06:41:37,890 - root - INFO - lr: 3.9308e-05 gnorm: 1.09 [ 8:07:28<16:25:49] +[titan] 2025-10-05 06:41:48,764 - root - INFO - step: 13240 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 06:41:48,764 - root - INFO - lr: 3.9301e-05 gnorm: 1.10 [ 8:07:39<16:25:37] +[titan] 2025-10-05 06:41:59,671 - root - INFO - step: 13245 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0046 +[titan] 2025-10-05 06:41:59,672 - root - INFO - lr: 3.9293e-05 gnorm: 1.13 [ 8:07:50<16:25:26] +[titan] 2025-10-05 06:42:08,368 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:42:10,557 - root - INFO - step: 13250 loss: 2.3326 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0599 +[titan] 2025-10-05 06:42:10,557 - root - INFO - lr: 3.9286e-05 gnorm: 1.14 [ 8:08:01<16:25:14] +[titan] 2025-10-05 06:42:21,421 - root - INFO - step: 13255 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 06:42:21,421 - root - INFO - lr: 3.9278e-05 gnorm: 1.14 [ 8:08:12<16:25:03] +[titan] 2025-10-05 06:42:32,317 - root - INFO - step: 13260 loss: 2.2022 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9453 +[titan] 2025-10-05 06:42:32,317 - root - INFO - lr: 3.9270e-05 gnorm: 1.07 [ 8:08:22<16:24:52] +[titan] 2025-10-05 06:42:43,197 - root - INFO - step: 13265 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 06:42:43,198 - root - INFO - lr: 3.9263e-05 gnorm: 1.11 [ 8:08:33<16:24:40] +[titan] 2025-10-05 06:42:54,090 - root - INFO - step: 13270 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 06:42:54,091 - root - INFO - lr: 3.9255e-05 gnorm: 1.10 [ 8:08:44<16:24:29] +[titan] 2025-10-05 06:43:05,001 - root - INFO - step: 13275 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 06:43:05,001 - root - INFO - lr: 3.9248e-05 gnorm: 1.10 [ 8:08:55<16:24:18] +[titan] 2025-10-05 06:43:15,880 - root - INFO - step: 13280 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:15,880 - root - INFO - lr: 3.9240e-05 gnorm: 1.07 [ 8:09:06<16:24:06] +[titan] 2025-10-05 06:43:26,737 - root - INFO - step: 13285 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:26,737 - root - INFO - lr: 3.9232e-05 gnorm: 1.11 [ 8:09:17<16:23:55] +[titan] 2025-10-05 06:43:37,602 - root - INFO - step: 13290 loss: 2.3086 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:43:37,602 - root - INFO - lr: 3.9225e-05 gnorm: 1.10 [ 8:09:28<16:23:43] +[titan] 2025-10-05 06:43:48,473 - root - INFO - step: 13295 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 06:43:48,473 - root - INFO - lr: 3.9217e-05 gnorm: 1.11 [ 8:09:39<16:23:32] +[titan] 2025-10-05 06:43:57,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:43:59,317 - root - INFO - step: 13300 loss: 2.3797 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 06:43:59,318 - root - INFO - lr: 3.9209e-05 gnorm: 1.11 [ 8:09:49<16:23:20] +[titan] 2025-10-05 06:44:10,186 - root - INFO - step: 13305 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0141 +[titan] 2025-10-05 06:44:10,186 - root - INFO - lr: 3.9202e-05 gnorm: 1.09 [ 8:10:00<16:23:09] +[titan] 2025-10-05 06:44:21,180 - root - INFO - step: 13310 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0337 +[titan] 2025-10-05 06:44:21,180 - root - INFO - lr: 3.9194e-05 gnorm: 1.09 [ 8:10:11<16:22:58] +[titan] 2025-10-05 06:44:25,695 - root - INFO - Dumping profiler traces at step 13312 +[titan] 2025-10-05 06:44:25,733 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:44:32,265 - root - INFO - step: 13315 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 29,562 tflops: 410.13 mfu: 41.47% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 06:44:32,265 - root - INFO - lr: 3.9187e-05 gnorm: 1.04 [ 8:10:22<16:22:47] +[titan] 2025-10-05 06:44:43,144 - root - INFO - step: 13320 loss: 2.3112 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 06:44:43,145 - root - INFO - lr: 3.9179e-05 gnorm: 1.13 [ 8:10:33<16:22:35] +[titan] 2025-10-05 06:44:54,006 - root - INFO - step: 13325 loss: 2.3530 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0783 +[titan] 2025-10-05 06:44:54,006 - root - INFO - lr: 3.9171e-05 gnorm: 1.06 [ 8:10:44<16:22:24] +[titan] 2025-10-05 06:45:04,897 - root - INFO - step: 13330 loss: 2.3671 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 06:45:04,897 - root - INFO - lr: 3.9164e-05 gnorm: 1.11 [ 8:10:55<16:22:13] +[titan] 2025-10-05 06:45:15,754 - root - INFO - step: 13335 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0644 +[titan] 2025-10-05 06:45:15,754 - root - INFO - lr: 3.9156e-05 gnorm: 1.16 [ 8:11:06<16:22:01] +[titan] 2025-10-05 06:45:26,632 - root - INFO - step: 13340 loss: 2.2623 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:45:26,632 - root - INFO - lr: 3.9148e-05 gnorm: 1.12 [ 8:11:17<16:21:50] +[titan] 2025-10-05 06:45:37,522 - root - INFO - step: 13345 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 06:45:37,522 - root - INFO - lr: 3.9141e-05 gnorm: 1.07 [ 8:11:28<16:21:38] +[titan] 2025-10-05 06:45:46,192 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:45:48,374 - root - INFO - step: 13350 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:45:48,374 - root - INFO - lr: 3.9133e-05 gnorm: 1.10 [ 8:11:39<16:21:27] +[titan] 2025-10-05 06:45:59,227 - root - INFO - step: 13355 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0173 +[titan] 2025-10-05 06:45:59,227 - root - INFO - lr: 3.9126e-05 gnorm: 1.11 [ 8:11:49<16:21:16] +[titan] 2025-10-05 06:46:10,100 - root - INFO - step: 13360 loss: 2.3111 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:46:10,100 - root - INFO - lr: 3.9118e-05 gnorm: 1.11 [ 8:12:00<16:21:04] +[titan] 2025-10-05 06:46:20,957 - root - INFO - step: 13365 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 06:46:20,958 - root - INFO - lr: 3.9110e-05 gnorm: 1.10 [ 8:12:11<16:20:53] +[titan] 2025-10-05 06:46:31,838 - root - INFO - step: 13370 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0254 +[titan] 2025-10-05 06:46:31,838 - root - INFO - lr: 3.9103e-05 gnorm: 1.13 [ 8:12:22<16:20:41] +[titan] 2025-10-05 06:46:42,735 - root - INFO - step: 13375 loss: 2.3437 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0696 +[titan] 2025-10-05 06:46:42,735 - root - INFO - lr: 3.9095e-05 gnorm: 1.12 [ 8:12:33<16:20:30] +[titan] 2025-10-05 06:46:53,595 - root - INFO - step: 13380 loss: 2.2952 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0281 +[titan] 2025-10-05 06:46:53,595 - root - INFO - lr: 3.9087e-05 gnorm: 1.07 [ 8:12:44<16:20:19] +[titan] 2025-10-05 06:47:04,484 - root - INFO - step: 13385 loss: 2.3167 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0450 +[titan] 2025-10-05 06:47:04,485 - root - INFO - lr: 3.9080e-05 gnorm: 1.12 [ 8:12:55<16:20:07] +[titan] 2025-10-05 06:47:15,385 - root - INFO - step: 13390 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:47:15,385 - root - INFO - lr: 3.9072e-05 gnorm: 1.13 [ 8:13:06<16:19:56] +[titan] 2025-10-05 06:47:26,291 - root - INFO - step: 13395 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:47:26,291 - root - INFO - lr: 3.9064e-05 gnorm: 1.09 [ 8:13:16<16:19:45] +[titan] 2025-10-05 06:47:34,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:47:37,159 - root - INFO - step: 13400 loss: 2.2934 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0255 +[titan] 2025-10-05 06:47:37,159 - root - INFO - lr: 3.9057e-05 gnorm: 1.10 [ 8:13:27<16:19:33] +[titan] 2025-10-05 06:47:48,051 - root - INFO - step: 13405 loss: 2.1829 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 06:47:48,051 - root - INFO - lr: 3.9049e-05 gnorm: 1.13 [ 8:13:38<16:19:22] +[titan] 2025-10-05 06:47:58,962 - root - INFO - step: 13410 loss: 2.3403 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0664 +[titan] 2025-10-05 06:47:58,962 - root - INFO - lr: 3.9041e-05 gnorm: 1.08 [ 8:13:49<16:19:11] +[titan] 2025-10-05 06:48:09,859 - root - INFO - step: 13415 loss: 2.2971 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:48:09,859 - root - INFO - lr: 3.9034e-05 gnorm: 1.09 [ 8:14:00<16:18:59] +[titan] 2025-10-05 06:48:20,742 - root - INFO - step: 13420 loss: 2.3033 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0348 +[titan] 2025-10-05 06:48:20,742 - root - INFO - lr: 3.9026e-05 gnorm: 1.09 [ 8:14:11<16:18:48] +[titan] 2025-10-05 06:48:31,616 - root - INFO - step: 13425 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0709 +[titan] 2025-10-05 06:48:31,616 - root - INFO - lr: 3.9018e-05 gnorm: 1.11 [ 8:14:22<16:18:36] +[titan] 2025-10-05 06:48:42,471 - root - INFO - step: 13430 loss: 2.2153 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 06:48:42,471 - root - INFO - lr: 3.9011e-05 gnorm: 1.09 [ 8:14:33<16:18:25] +[titan] 2025-10-05 06:48:53,334 - root - INFO - step: 13435 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 06:48:53,334 - root - INFO - lr: 3.9003e-05 gnorm: 1.10 [ 8:14:43<16:18:14] +[titan] 2025-10-05 06:49:04,235 - root - INFO - step: 13440 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 06:49:04,235 - root - INFO - lr: 3.8995e-05 gnorm: 1.10 [ 8:14:54<16:18:02] +[titan] 2025-10-05 06:49:15,122 - root - INFO - step: 13445 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0366 +[titan] 2025-10-05 06:49:15,122 - root - INFO - lr: 3.8988e-05 gnorm: 1.10 [ 8:15:05<16:17:51] +[titan] 2025-10-05 06:49:23,790 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:49:25,981 - root - INFO - step: 13450 loss: 2.2828 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0115 +[titan] 2025-10-05 06:49:25,981 - root - INFO - lr: 3.8980e-05 gnorm: 1.07 [ 8:15:16<16:17:39] +[titan] 2025-10-05 06:49:36,831 - root - INFO - step: 13455 loss: 2.2498 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9867 +[titan] 2025-10-05 06:49:36,831 - root - INFO - lr: 3.8972e-05 gnorm: 1.03 [ 8:15:27<16:17:28] +[titan] 2025-10-05 06:49:47,714 - root - INFO - step: 13460 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0682 +[titan] 2025-10-05 06:49:47,714 - root - INFO - lr: 3.8965e-05 gnorm: 1.14 [ 8:15:38<16:17:17] +[titan] 2025-10-05 06:49:58,585 - root - INFO - step: 13465 loss: 2.2324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 06:49:58,585 - root - INFO - lr: 3.8957e-05 gnorm: 1.11 [ 8:15:49<16:17:05] +[titan] 2025-10-05 06:50:09,688 - root - INFO - step: 13470 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 29,514 tflops: 409.46 mfu: 41.40% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9928 +[titan] 2025-10-05 06:50:09,688 - root - INFO - lr: 3.8949e-05 gnorm: 1.07 [ 8:16:00<16:16:54] +[titan] 2025-10-05 06:50:20,551 - root - INFO - step: 13475 loss: 2.2930 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0250 +[titan] 2025-10-05 06:50:20,551 - root - INFO - lr: 3.8942e-05 gnorm: 1.12 [ 8:16:11<16:16:43] +[titan] 2025-10-05 06:50:31,416 - root - INFO - step: 13480 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:50:31,416 - root - INFO - lr: 3.8934e-05 gnorm: 1.09 [ 8:16:22<16:16:31] +[titan] 2025-10-05 06:50:42,269 - root - INFO - step: 13485 loss: 2.2218 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9617 +[titan] 2025-10-05 06:50:42,269 - root - INFO - lr: 3.8926e-05 gnorm: 1.10 [ 8:16:32<16:16:20] +[titan] 2025-10-05 06:50:53,127 - root - INFO - step: 13490 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 06:50:53,128 - root - INFO - lr: 3.8919e-05 gnorm: 1.07 [ 8:16:43<16:16:09] +[titan] 2025-10-05 06:51:03,982 - root - INFO - step: 13495 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:51:03,982 - root - INFO - lr: 3.8911e-05 gnorm: 1.09 [ 8:16:54<16:15:57] +[titan] 2025-10-05 06:51:12,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:51:14,857 - root - INFO - step: 13500 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 06:51:14,857 - root - INFO - lr: 3.8903e-05 gnorm: 1.09 [ 8:17:05<16:15:46] +[titan] 2025-10-05 06:51:25,746 - root - INFO - step: 13505 loss: 2.2715 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 06:51:25,746 - root - INFO - lr: 3.8896e-05 gnorm: 1.09 [ 8:17:16<16:15:34] +[titan] 2025-10-05 06:51:36,614 - root - INFO - step: 13510 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 06:51:36,614 - root - INFO - lr: 3.8888e-05 gnorm: 1.08 [ 8:17:27<16:15:23] +[titan] 2025-10-05 06:51:47,494 - root - INFO - step: 13515 loss: 2.2519 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 06:51:47,494 - root - INFO - lr: 3.8880e-05 gnorm: 1.12 [ 8:17:38<16:15:12] +[titan] 2025-10-05 06:51:58,360 - root - INFO - step: 13520 loss: 2.2323 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:51:58,360 - root - INFO - lr: 3.8872e-05 gnorm: 1.05 [ 8:17:48<16:15:00] +[titan] 2025-10-05 06:52:09,236 - root - INFO - step: 13525 loss: 2.2346 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 06:52:09,236 - root - INFO - lr: 3.8865e-05 gnorm: 1.07 [ 8:17:59<16:14:49] +[titan] 2025-10-05 06:52:20,103 - root - INFO - step: 13530 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9669 +[titan] 2025-10-05 06:52:20,103 - root - INFO - lr: 3.8857e-05 gnorm: 1.08 [ 8:18:10<16:14:38] +[titan] 2025-10-05 06:52:30,992 - root - INFO - step: 13535 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9949 +[titan] 2025-10-05 06:52:30,992 - root - INFO - lr: 3.8849e-05 gnorm: 1.10 [ 8:18:21<16:14:26] +[titan] 2025-10-05 06:52:41,845 - root - INFO - step: 13540 loss: 2.2743 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0096 +[titan] 2025-10-05 06:52:41,846 - root - INFO - lr: 3.8842e-05 gnorm: 1.16 [ 8:18:32<16:14:15] +[titan] 2025-10-05 06:52:52,731 - root - INFO - step: 13545 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:52:52,731 - root - INFO - lr: 3.8834e-05 gnorm: 1.19 [ 8:18:43<16:14:03] +[titan] 2025-10-05 06:53:01,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:53:03,584 - root - INFO - step: 13550 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:53:03,584 - root - INFO - lr: 3.8826e-05 gnorm: 1.12 [ 8:18:54<16:13:52] +[titan] 2025-10-05 06:53:14,560 - root - INFO - step: 13555 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.17 mfu: 41.88% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:53:14,560 - root - INFO - lr: 3.8818e-05 gnorm: 1.18 [ 8:19:05<16:13:41] +[titan] 2025-10-05 06:53:25,426 - root - INFO - step: 13560 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:53:25,426 - root - INFO - lr: 3.8811e-05 gnorm: 1.10 [ 8:19:16<16:13:29] +[titan] 2025-10-05 06:53:36,319 - root - INFO - step: 13565 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0242 +[titan] 2025-10-05 06:53:36,320 - root - INFO - lr: 3.8803e-05 gnorm: 1.11 [ 8:19:26<16:13:18] +[titan] 2025-10-05 06:53:47,222 - root - INFO - step: 13570 loss: 2.2893 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:53:47,222 - root - INFO - lr: 3.8795e-05 gnorm: 1.11 [ 8:19:37<16:13:07] +[titan] 2025-10-05 06:53:58,096 - root - INFO - step: 13575 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9963 +[titan] 2025-10-05 06:53:58,096 - root - INFO - lr: 3.8788e-05 gnorm: 1.11 [ 8:19:48<16:12:55] +[titan] 2025-10-05 06:54:08,974 - root - INFO - step: 13580 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:54:08,975 - root - INFO - lr: 3.8780e-05 gnorm: 1.11 [ 8:19:59<16:12:44] +[titan] 2025-10-05 06:54:19,877 - root - INFO - step: 13585 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0437 +[titan] 2025-10-05 06:54:19,877 - root - INFO - lr: 3.8772e-05 gnorm: 1.15 [ 8:20:10<16:12:33] +[titan] 2025-10-05 06:54:30,750 - root - INFO - step: 13590 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0181 +[titan] 2025-10-05 06:54:30,750 - root - INFO - lr: 3.8764e-05 gnorm: 1.09 [ 8:20:21<16:12:21] +[titan] 2025-10-05 06:54:41,615 - root - INFO - step: 13595 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0386 +[titan] 2025-10-05 06:54:41,615 - root - INFO - lr: 3.8757e-05 gnorm: 1.12 [ 8:20:32<16:12:10] +[titan] 2025-10-05 06:54:50,323 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:54:52,501 - root - INFO - step: 13600 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:54:52,501 - root - INFO - lr: 3.8749e-05 gnorm: 1.12 [ 8:20:43<16:11:58] +[titan] 2025-10-05 06:55:03,350 - root - INFO - step: 13605 loss: 2.2279 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 06:55:03,350 - root - INFO - lr: 3.8741e-05 gnorm: 1.09 [ 8:20:53<16:11:47] +[titan] 2025-10-05 06:55:14,228 - root - INFO - step: 13610 loss: 2.3259 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0547 +[titan] 2025-10-05 06:55:14,228 - root - INFO - lr: 3.8734e-05 gnorm: 1.14 [ 8:21:04<16:11:36] +[titan] 2025-10-05 06:55:25,123 - root - INFO - step: 13615 loss: 2.2661 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0007 +[titan] 2025-10-05 06:55:25,123 - root - INFO - lr: 3.8726e-05 gnorm: 1.11 [ 8:21:15<16:11:24] +[titan] 2025-10-05 06:55:35,976 - root - INFO - step: 13620 loss: 2.3686 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0920 +[titan] 2025-10-05 06:55:35,976 - root - INFO - lr: 3.8718e-05 gnorm: 1.15 [ 8:21:26<16:11:13] +[titan] 2025-10-05 06:55:46,835 - root - INFO - step: 13625 loss: 2.2851 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0187 +[titan] 2025-10-05 06:55:46,835 - root - INFO - lr: 3.8710e-05 gnorm: 1.07 [ 8:21:37<16:11:02] +[titan] 2025-10-05 06:55:57,740 - root - INFO - step: 13630 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0176 +[titan] 2025-10-05 06:55:57,740 - root - INFO - lr: 3.8703e-05 gnorm: 1.08 [ 8:21:48<16:10:50] +[titan] 2025-10-05 06:56:08,602 - root - INFO - step: 13635 loss: 2.3123 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 06:56:08,602 - root - INFO - lr: 3.8695e-05 gnorm: 1.12 [ 8:21:59<16:10:39] +[titan] 2025-10-05 06:56:19,485 - root - INFO - step: 13640 loss: 2.2360 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 06:56:19,486 - root - INFO - lr: 3.8687e-05 gnorm: 1.08 [ 8:22:10<16:10:27] +[titan] 2025-10-05 06:56:30,339 - root - INFO - step: 13645 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0101 +[titan] 2025-10-05 06:56:30,339 - root - INFO - lr: 3.8679e-05 gnorm: 1.20 [ 8:22:20<16:10:16] +[titan] 2025-10-05 06:56:39,024 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:56:41,218 - root - INFO - step: 13650 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0411 +[titan] 2025-10-05 06:56:41,218 - root - INFO - lr: 3.8672e-05 gnorm: 1.10 [ 8:22:31<16:10:05] +[titan] 2025-10-05 06:56:52,068 - root - INFO - step: 13655 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 06:56:52,068 - root - INFO - lr: 3.8664e-05 gnorm: 1.09 [ 8:22:42<16:09:53] +[titan] 2025-10-05 06:57:02,942 - root - INFO - step: 13660 loss: 2.3364 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0632 +[titan] 2025-10-05 06:57:02,942 - root - INFO - lr: 3.8656e-05 gnorm: 1.13 [ 8:22:53<16:09:42] +[titan] 2025-10-05 06:57:13,852 - root - INFO - step: 13665 loss: 2.2401 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 06:57:13,852 - root - INFO - lr: 3.8648e-05 gnorm: 1.09 [ 8:23:04<16:09:31] +[titan] 2025-10-05 06:57:24,731 - root - INFO - step: 13670 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9808 +[titan] 2025-10-05 06:57:24,731 - root - INFO - lr: 3.8641e-05 gnorm: 1.12 [ 8:23:15<16:09:19] +[titan] 2025-10-05 06:57:35,601 - root - INFO - step: 13675 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 06:57:35,601 - root - INFO - lr: 3.8633e-05 gnorm: 1.12 [ 8:23:26<16:09:08] +[titan] 2025-10-05 06:57:46,492 - root - INFO - step: 13680 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9915 +[titan] 2025-10-05 06:57:46,493 - root - INFO - lr: 3.8625e-05 gnorm: 1.09 [ 8:23:37<16:08:56] +[titan] 2025-10-05 06:57:57,361 - root - INFO - step: 13685 loss: 2.2907 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:57:57,361 - root - INFO - lr: 3.8617e-05 gnorm: 1.05 [ 8:23:47<16:08:45] +[titan] 2025-10-05 06:58:08,244 - root - INFO - step: 13690 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 06:58:08,244 - root - INFO - lr: 3.8610e-05 gnorm: 1.12 [ 8:23:58<16:08:34] +[titan] 2025-10-05 06:58:19,163 - root - INFO - step: 13695 loss: 2.2749 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0089 +[titan] 2025-10-05 06:58:19,163 - root - INFO - lr: 3.8602e-05 gnorm: 1.09 [ 8:24:09<16:08:22] +[titan] 2025-10-05 06:58:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:58:30,056 - root - INFO - step: 13700 loss: 2.3146 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 06:58:30,056 - root - INFO - lr: 3.8594e-05 gnorm: 1.10 [ 8:24:20<16:08:11] +[titan] 2025-10-05 06:58:40,938 - root - INFO - step: 13705 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 06:58:40,938 - root - INFO - lr: 3.8586e-05 gnorm: 1.07 [ 8:24:31<16:08:00] +[titan] 2025-10-05 06:58:51,816 - root - INFO - step: 13710 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 06:58:51,816 - root - INFO - lr: 3.8578e-05 gnorm: 1.10 [ 8:24:42<16:07:48] +[titan] 2025-10-05 06:59:02,700 - root - INFO - step: 13715 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 06:59:02,700 - root - INFO - lr: 3.8571e-05 gnorm: 1.12 [ 8:24:53<16:07:37] +[titan] 2025-10-05 06:59:13,554 - root - INFO - step: 13720 loss: 2.3118 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:59:13,554 - root - INFO - lr: 3.8563e-05 gnorm: 1.14 [ 8:25:04<16:07:26] +[titan] 2025-10-05 06:59:24,420 - root - INFO - step: 13725 loss: 2.2285 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9687 +[titan] 2025-10-05 06:59:24,420 - root - INFO - lr: 3.8555e-05 gnorm: 1.11 [ 8:25:15<16:07:14] +[titan] 2025-10-05 06:59:35,307 - root - INFO - step: 13730 loss: 2.2243 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 06:59:35,307 - root - INFO - lr: 3.8547e-05 gnorm: 1.10 [ 8:25:25<16:07:03] +[titan] 2025-10-05 06:59:46,179 - root - INFO - step: 13735 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 06:59:46,179 - root - INFO - lr: 3.8540e-05 gnorm: 1.08 [ 8:25:36<16:06:52] +[titan] 2025-10-05 06:59:57,061 - root - INFO - step: 13740 loss: 2.2450 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9828 +[titan] 2025-10-05 06:59:57,061 - root - INFO - lr: 3.8532e-05 gnorm: 1.15 [ 8:25:47<16:06:40] +[titan] 2025-10-05 07:00:07,935 - root - INFO - step: 13745 loss: 2.3278 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:00:07,935 - root - INFO - lr: 3.8524e-05 gnorm: 1.10 [ 8:25:58<16:06:29] +[titan] 2025-10-05 07:00:16,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:00:18,832 - root - INFO - step: 13750 loss: 2.3084 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 07:00:18,833 - root - INFO - lr: 3.8516e-05 gnorm: 1.10 [ 8:26:09<16:06:18] +[titan] 2025-10-05 07:00:29,706 - root - INFO - step: 13755 loss: 2.3204 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0493 +[titan] 2025-10-05 07:00:29,706 - root - INFO - lr: 3.8509e-05 gnorm: 1.11 [ 8:26:20<16:06:06] +[titan] 2025-10-05 07:00:40,608 - root - INFO - step: 13760 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0305 +[titan] 2025-10-05 07:00:40,608 - root - INFO - lr: 3.8501e-05 gnorm: 1.15 [ 8:26:31<16:05:55] +[titan] 2025-10-05 07:00:51,487 - root - INFO - step: 13765 loss: 2.2771 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 07:00:51,487 - root - INFO - lr: 3.8493e-05 gnorm: 1.08 [ 8:26:42<16:05:43] +[titan] 2025-10-05 07:01:02,367 - root - INFO - step: 13770 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0107 +[titan] 2025-10-05 07:01:02,367 - root - INFO - lr: 3.8485e-05 gnorm: 1.52 [ 8:26:52<16:05:32] +[titan] 2025-10-05 07:01:13,257 - root - INFO - step: 13775 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0460 +[titan] 2025-10-05 07:01:13,257 - root - INFO - lr: 3.8477e-05 gnorm: 1.11 [ 8:27:03<16:05:21] +[titan] 2025-10-05 07:01:24,150 - root - INFO - step: 13780 loss: 2.3133 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 07:01:24,150 - root - INFO - lr: 3.8470e-05 gnorm: 1.05 [ 8:27:14<16:05:09] +[titan] 2025-10-05 07:01:35,054 - root - INFO - step: 13785 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9600 +[titan] 2025-10-05 07:01:35,054 - root - INFO - lr: 3.8462e-05 gnorm: 1.10 [ 8:27:25<16:04:58] +[titan] 2025-10-05 07:01:45,974 - root - INFO - step: 13790 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0014 +[titan] 2025-10-05 07:01:45,974 - root - INFO - lr: 3.8454e-05 gnorm: 1.09 [ 8:27:36<16:04:47] +[titan] 2025-10-05 07:01:56,865 - root - INFO - step: 13795 loss: 2.2879 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:01:56,865 - root - INFO - lr: 3.8446e-05 gnorm: 1.08 [ 8:27:47<16:04:36] +[titan] 2025-10-05 07:02:05,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:02:07,773 - root - INFO - step: 13800 loss: 2.2846 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0177 +[titan] 2025-10-05 07:02:07,773 - root - INFO - lr: 3.8438e-05 gnorm: 1.09 [ 8:27:58<16:04:24] +[titan] 2025-10-05 07:02:18,700 - root - INFO - step: 13805 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 07:02:18,700 - root - INFO - lr: 3.8431e-05 gnorm: 1.09 [ 8:28:09<16:04:13] +[titan] 2025-10-05 07:02:29,593 - root - INFO - step: 13810 loss: 2.2868 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 07:02:29,593 - root - INFO - lr: 3.8423e-05 gnorm: 1.08 [ 8:28:20<16:04:02] +[titan] 2025-10-05 07:02:40,489 - root - INFO - step: 13815 loss: 2.3125 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 07:02:40,489 - root - INFO - lr: 3.8415e-05 gnorm: 1.08 [ 8:28:31<16:03:50] +[titan] 2025-10-05 07:02:51,396 - root - INFO - step: 13820 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1047 +[titan] 2025-10-05 07:02:51,396 - root - INFO - lr: 3.8407e-05 gnorm: 1.13 [ 8:28:41<16:03:39] +[titan] 2025-10-05 07:03:00,399 - root - INFO - Dumping profiler traces at step 13824 +[titan] 2025-10-05 07:03:00,438 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:03:02,625 - root - INFO - step: 13825 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 29,181 tflops: 404.84 mfu: 40.93% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:03:02,626 - root - INFO - lr: 3.8399e-05 gnorm: 1.09 [ 8:28:53<16:03:28] +[titan] 2025-10-05 07:03:13,525 - root - INFO - step: 13830 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0510 +[titan] 2025-10-05 07:03:13,526 - root - INFO - lr: 3.8392e-05 gnorm: 1.08 [ 8:29:04<16:03:17] +[titan] 2025-10-05 07:03:24,465 - root - INFO - step: 13835 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:03:24,466 - root - INFO - lr: 3.8384e-05 gnorm: 1.07 [ 8:29:15<16:03:06] +[titan] 2025-10-05 07:03:35,347 - root - INFO - step: 13840 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:03:35,347 - root - INFO - lr: 3.8376e-05 gnorm: 1.09 [ 8:29:25<16:02:54] +[titan] 2025-10-05 07:03:46,225 - root - INFO - step: 13845 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 07:03:46,225 - root - INFO - lr: 3.8368e-05 gnorm: 1.11 [ 8:29:36<16:02:43] +[titan] 2025-10-05 07:03:54,924 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:03:57,111 - root - INFO - step: 13850 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0113 +[titan] 2025-10-05 07:03:57,111 - root - INFO - lr: 3.8360e-05 gnorm: 1.11 [ 8:29:47<16:02:32] +[titan] 2025-10-05 07:04:08,025 - root - INFO - step: 13855 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0769 +[titan] 2025-10-05 07:04:08,025 - root - INFO - lr: 3.8353e-05 gnorm: 1.11 [ 8:29:58<16:02:20] +[titan] 2025-10-05 07:04:18,937 - root - INFO - step: 13860 loss: 2.2484 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9860 +[titan] 2025-10-05 07:04:18,937 - root - INFO - lr: 3.8345e-05 gnorm: 1.13 [ 8:30:09<16:02:09] +[titan] 2025-10-05 07:04:29,819 - root - INFO - step: 13865 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9412 +[titan] 2025-10-05 07:04:29,820 - root - INFO - lr: 3.8337e-05 gnorm: 1.13 [ 8:30:20<16:01:58] +[titan] 2025-10-05 07:04:40,706 - root - INFO - step: 13870 loss: 2.1522 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 07:04:40,706 - root - INFO - lr: 3.8329e-05 gnorm: 1.10 [ 8:30:31<16:01:46] +[titan] 2025-10-05 07:04:51,600 - root - INFO - step: 13875 loss: 2.2926 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:04:51,601 - root - INFO - lr: 3.8321e-05 gnorm: 1.13 [ 8:30:42<16:01:35] +[titan] 2025-10-05 07:05:02,483 - root - INFO - step: 13880 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 07:05:02,484 - root - INFO - lr: 3.8313e-05 gnorm: 1.05 [ 8:30:53<16:01:24] +[titan] 2025-10-05 07:05:13,375 - root - INFO - step: 13885 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 07:05:13,375 - root - INFO - lr: 3.8306e-05 gnorm: 1.09 [ 8:31:03<16:01:12] +[titan] 2025-10-05 07:05:24,346 - root - INFO - step: 13890 loss: 2.3386 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 07:05:24,346 - root - INFO - lr: 3.8298e-05 gnorm: 1.09 [ 8:31:14<16:01:01] +[titan] 2025-10-05 07:05:35,221 - root - INFO - step: 13895 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 07:05:35,221 - root - INFO - lr: 3.8290e-05 gnorm: 1.10 [ 8:31:25<16:00:50] +[titan] 2025-10-05 07:05:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:05:46,101 - root - INFO - step: 13900 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 07:05:46,101 - root - INFO - lr: 3.8282e-05 gnorm: 1.11 [ 8:31:36<16:00:39] +[titan] 2025-10-05 07:05:56,991 - root - INFO - step: 13905 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 07:05:56,991 - root - INFO - lr: 3.8274e-05 gnorm: 1.09 [ 8:31:47<16:00:27] +[titan] 2025-10-05 07:06:07,860 - root - INFO - step: 13910 loss: 2.2822 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0150 +[titan] 2025-10-05 07:06:07,861 - root - INFO - lr: 3.8266e-05 gnorm: 1.06 [ 8:31:58<16:00:16] +[titan] 2025-10-05 07:06:18,755 - root - INFO - step: 13915 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0626 +[titan] 2025-10-05 07:06:18,755 - root - INFO - lr: 3.8259e-05 gnorm: 1.12 [ 8:32:09<16:00:05] +[titan] 2025-10-05 07:06:29,694 - root - INFO - step: 13920 loss: 2.3240 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0527 +[titan] 2025-10-05 07:06:29,694 - root - INFO - lr: 3.8251e-05 gnorm: 1.13 [ 8:32:20<15:59:53] +[titan] 2025-10-05 07:06:40,578 - root - INFO - step: 13925 loss: 2.2091 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9511 +[titan] 2025-10-05 07:06:40,578 - root - INFO - lr: 3.8243e-05 gnorm: 1.13 [ 8:32:31<15:59:42] +[titan] 2025-10-05 07:06:51,433 - root - INFO - step: 13930 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:06:51,434 - root - INFO - lr: 3.8235e-05 gnorm: 1.12 [ 8:32:42<15:59:31] +[titan] 2025-10-05 07:07:02,325 - root - INFO - step: 13935 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0277 +[titan] 2025-10-05 07:07:02,326 - root - INFO - lr: 3.8227e-05 gnorm: 1.12 [ 8:32:52<15:59:19] +[titan] 2025-10-05 07:07:13,223 - root - INFO - step: 13940 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 07:07:13,223 - root - INFO - lr: 3.8219e-05 gnorm: 1.05 [ 8:33:03<15:59:08] +[titan] 2025-10-05 07:07:24,144 - root - INFO - step: 13945 loss: 2.2627 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 07:07:24,144 - root - INFO - lr: 3.8212e-05 gnorm: 1.07 [ 8:33:14<15:58:57] +[titan] 2025-10-05 07:07:32,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:07:35,060 - root - INFO - step: 13950 loss: 2.3247 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0535 +[titan] 2025-10-05 07:07:35,061 - root - INFO - lr: 3.8204e-05 gnorm: 1.15 [ 8:33:25<15:58:45] +[titan] 2025-10-05 07:07:45,949 - root - INFO - step: 13955 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 1.9994 +[titan] 2025-10-05 07:07:45,949 - root - INFO - lr: 3.8196e-05 gnorm: 1.12 [ 8:33:36<15:58:34] +[titan] 2025-10-05 07:07:56,827 - root - INFO - step: 13960 loss: 2.2073 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 07:07:56,827 - root - INFO - lr: 3.8188e-05 gnorm: 1.13 [ 8:33:47<15:58:23] +[titan] 2025-10-05 07:08:07,719 - root - INFO - step: 13965 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 07:08:07,720 - root - INFO - lr: 3.8180e-05 gnorm: 1.05 [ 8:33:58<15:58:11] +[titan] 2025-10-05 07:08:18,609 - root - INFO - step: 13970 loss: 2.3210 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0494 +[titan] 2025-10-05 07:08:18,609 - root - INFO - lr: 3.8172e-05 gnorm: 1.11 [ 8:34:09<15:58:00] +[titan] 2025-10-05 07:08:29,526 - root - INFO - step: 13975 loss: 2.3414 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0678 +[titan] 2025-10-05 07:08:29,526 - root - INFO - lr: 3.8164e-05 gnorm: 1.06 [ 8:34:20<15:57:49] +[titan] 2025-10-05 07:08:40,409 - root - INFO - step: 13980 loss: 2.2904 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0234 +[titan] 2025-10-05 07:08:40,409 - root - INFO - lr: 3.8157e-05 gnorm: 1.10 [ 8:34:30<15:57:38] +[titan] 2025-10-05 07:08:51,305 - root - INFO - step: 13985 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9806 +[titan] 2025-10-05 07:08:51,305 - root - INFO - lr: 3.8149e-05 gnorm: 1.08 [ 8:34:41<15:57:26] +[titan] 2025-10-05 07:09:02,176 - root - INFO - step: 13990 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:09:02,177 - root - INFO - lr: 3.8141e-05 gnorm: 1.06 [ 8:34:52<15:57:15] +[titan] 2025-10-05 07:09:13,061 - root - INFO - step: 13995 loss: 2.2816 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0139 +[titan] 2025-10-05 07:09:13,062 - root - INFO - lr: 3.8133e-05 gnorm: 1.14 [ 8:35:03<15:57:04] +[titan] 2025-10-05 07:09:21,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:09:23,969 - root - INFO - step: 14000 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 07:09:23,969 - root - INFO - lr: 3.8125e-05 gnorm: 1.09 [ 8:35:14<15:56:52] +[titan] 2025-10-05 07:09:34,866 - root - INFO - step: 14005 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0102 +[titan] 2025-10-05 07:09:34,866 - root - INFO - lr: 3.8117e-05 gnorm: 1.06 [ 8:35:25<15:56:41] +[titan] 2025-10-05 07:09:45,752 - root - INFO - step: 14010 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0239 +[titan] 2025-10-05 07:09:45,752 - root - INFO - lr: 3.8109e-05 gnorm: 1.14 [ 8:35:36<15:56:30] +[titan] 2025-10-05 07:09:56,681 - root - INFO - step: 14015 loss: 2.2388 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9782 +[titan] 2025-10-05 07:09:56,681 - root - INFO - lr: 3.8101e-05 gnorm: 1.10 [ 8:35:47<15:56:18] +[titan] 2025-10-05 07:10:07,561 - root - INFO - step: 14020 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0100 +[titan] 2025-10-05 07:10:07,561 - root - INFO - lr: 3.8094e-05 gnorm: 1.10 [ 8:35:58<15:56:07] +[titan] 2025-10-05 07:10:18,446 - root - INFO - step: 14025 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0031 +[titan] 2025-10-05 07:10:18,446 - root - INFO - lr: 3.8086e-05 gnorm: 1.06 [ 8:36:09<15:55:56] +[titan] 2025-10-05 07:10:29,418 - root - INFO - step: 14030 loss: 2.3296 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 07:10:29,418 - root - INFO - lr: 3.8078e-05 gnorm: 1.11 [ 8:36:19<15:55:45] +[titan] 2025-10-05 07:10:40,286 - root - INFO - step: 14035 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0286 +[titan] 2025-10-05 07:10:40,286 - root - INFO - lr: 3.8070e-05 gnorm: 1.08 [ 8:36:30<15:55:33] +[titan] 2025-10-05 07:10:51,186 - root - INFO - step: 14040 loss: 2.3219 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 07:10:51,186 - root - INFO - lr: 3.8062e-05 gnorm: 1.08 [ 8:36:41<15:55:22] +[titan] 2025-10-05 07:11:02,100 - root - INFO - step: 14045 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:11:02,100 - root - INFO - lr: 3.8054e-05 gnorm: 1.03 [ 8:36:52<15:55:11] +[titan] 2025-10-05 07:11:10,810 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:11:13,002 - root - INFO - step: 14050 loss: 2.2598 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:13,002 - root - INFO - lr: 3.8046e-05 gnorm: 1.08 [ 8:37:03<15:54:59] +[titan] 2025-10-05 07:11:23,889 - root - INFO - step: 14055 loss: 2.2829 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0164 +[titan] 2025-10-05 07:11:23,889 - root - INFO - lr: 3.8038e-05 gnorm: 1.06 [ 8:37:14<15:54:48] +[titan] 2025-10-05 07:11:34,797 - root - INFO - step: 14060 loss: 2.2612 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:34,797 - root - INFO - lr: 3.8031e-05 gnorm: 1.08 [ 8:37:25<15:54:37] +[titan] 2025-10-05 07:11:45,686 - root - INFO - step: 14065 loss: 2.2504 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:11:45,686 - root - INFO - lr: 3.8023e-05 gnorm: 1.10 [ 8:37:36<15:54:25] +[titan] 2025-10-05 07:11:56,588 - root - INFO - step: 14070 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 07:11:56,588 - root - INFO - lr: 3.8015e-05 gnorm: 1.10 [ 8:37:47<15:54:14] +[titan] 2025-10-05 07:12:07,484 - root - INFO - step: 14075 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:12:07,484 - root - INFO - lr: 3.8007e-05 gnorm: 1.05 [ 8:37:58<15:54:03] +[titan] 2025-10-05 07:12:18,379 - root - INFO - step: 14080 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 07:12:18,379 - root - INFO - lr: 3.7999e-05 gnorm: 1.09 [ 8:38:08<15:53:51] +[titan] 2025-10-05 07:12:29,280 - root - INFO - step: 14085 loss: 2.2541 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 07:12:29,281 - root - INFO - lr: 3.7991e-05 gnorm: 1.11 [ 8:38:19<15:53:40] +[titan] 2025-10-05 07:12:40,158 - root - INFO - step: 14090 loss: 2.2892 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0223 +[titan] 2025-10-05 07:12:40,159 - root - INFO - lr: 3.7983e-05 gnorm: 1.07 [ 8:38:30<15:53:29] +[titan] 2025-10-05 07:12:51,038 - root - INFO - step: 14095 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0060 +[titan] 2025-10-05 07:12:51,038 - root - INFO - lr: 3.7975e-05 gnorm: 1.08 [ 8:38:41<15:53:17] +[titan] 2025-10-05 07:12:59,737 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:13:01,923 - root - INFO - step: 14100 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 07:13:01,924 - root - INFO - lr: 3.7967e-05 gnorm: 1.10 [ 8:38:52<15:53:06] +[titan] 2025-10-05 07:13:12,819 - root - INFO - step: 14105 loss: 2.2680 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0026 +[titan] 2025-10-05 07:13:12,819 - root - INFO - lr: 3.7959e-05 gnorm: 1.10 [ 8:39:03<15:52:55] +[titan] 2025-10-05 07:13:23,712 - root - INFO - step: 14110 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:13:23,712 - root - INFO - lr: 3.7952e-05 gnorm: 1.06 [ 8:39:14<15:52:44] +[titan] 2025-10-05 07:13:34,613 - root - INFO - step: 14115 loss: 2.3226 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0504 +[titan] 2025-10-05 07:13:34,613 - root - INFO - lr: 3.7944e-05 gnorm: 1.17 [ 8:39:25<15:52:32] +[titan] 2025-10-05 07:13:45,510 - root - INFO - step: 14120 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 07:13:45,510 - root - INFO - lr: 3.7936e-05 gnorm: 1.12 [ 8:39:36<15:52:21] +[titan] 2025-10-05 07:13:56,397 - root - INFO - step: 14125 loss: 2.2697 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0052 +[titan] 2025-10-05 07:13:56,397 - root - INFO - lr: 3.7928e-05 gnorm: 1.11 [ 8:39:46<15:52:10] +[titan] 2025-10-05 07:14:07,282 - root - INFO - step: 14130 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 07:14:07,282 - root - INFO - lr: 3.7920e-05 gnorm: 1.09 [ 8:39:57<15:51:58] +[titan] 2025-10-05 07:14:18,161 - root - INFO - step: 14135 loss: 2.2782 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 07:14:18,162 - root - INFO - lr: 3.7912e-05 gnorm: 1.13 [ 8:40:08<15:51:47] +[titan] 2025-10-05 07:14:29,064 - root - INFO - step: 14140 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 07:14:29,064 - root - INFO - lr: 3.7904e-05 gnorm: 1.12 [ 8:40:19<15:51:36] +[titan] 2025-10-05 07:14:39,953 - root - INFO - step: 14145 loss: 2.2613 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9977 +[titan] 2025-10-05 07:14:39,953 - root - INFO - lr: 3.7896e-05 gnorm: 1.07 [ 8:40:30<15:51:24] +[titan] 2025-10-05 07:14:48,667 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:14:50,851 - root - INFO - step: 14150 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 07:14:50,851 - root - INFO - lr: 3.7888e-05 gnorm: 1.09 [ 8:40:41<15:51:13] +[titan] 2025-10-05 07:15:01,722 - root - INFO - step: 14155 loss: 2.3499 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0768 +[titan] 2025-10-05 07:15:01,723 - root - INFO - lr: 3.7880e-05 gnorm: 1.07 [ 8:40:52<15:51:02] +[titan] 2025-10-05 07:15:12,596 - root - INFO - step: 14160 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 07:15:12,597 - root - INFO - lr: 3.7872e-05 gnorm: 1.07 [ 8:41:03<15:50:50] +[titan] 2025-10-05 07:15:23,478 - root - INFO - step: 14165 loss: 2.2806 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 07:15:23,478 - root - INFO - lr: 3.7865e-05 gnorm: 1.09 [ 8:41:14<15:50:39] +[titan] 2025-10-05 07:15:34,374 - root - INFO - step: 14170 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:15:34,374 - root - INFO - lr: 3.7857e-05 gnorm: 1.08 [ 8:41:24<15:50:28] +[titan] 2025-10-05 07:15:45,286 - root - INFO - step: 14175 loss: 2.2571 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9933 +[titan] 2025-10-05 07:15:45,287 - root - INFO - lr: 3.7849e-05 gnorm: 1.11 [ 8:41:35<15:50:16] +[titan] 2025-10-05 07:15:56,187 - root - INFO - step: 14180 loss: 2.3045 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 07:15:56,187 - root - INFO - lr: 3.7841e-05 gnorm: 1.13 [ 8:41:46<15:50:05] +[titan] 2025-10-05 07:16:07,077 - root - INFO - step: 14185 loss: 2.2313 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9715 +[titan] 2025-10-05 07:16:07,077 - root - INFO - lr: 3.7833e-05 gnorm: 1.08 [ 8:41:57<15:49:54] +[titan] 2025-10-05 07:16:17,954 - root - INFO - step: 14190 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9824 +[titan] 2025-10-05 07:16:17,954 - root - INFO - lr: 3.7825e-05 gnorm: 1.05 [ 8:42:08<15:49:43] +[titan] 2025-10-05 07:16:28,838 - root - INFO - step: 14195 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 07:16:28,839 - root - INFO - lr: 3.7817e-05 gnorm: 1.10 [ 8:42:19<15:49:31] +[titan] 2025-10-05 07:16:37,518 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:16:39,702 - root - INFO - step: 14200 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9987 +[titan] 2025-10-05 07:16:39,703 - root - INFO - lr: 3.7809e-05 gnorm: 1.10 [ 8:42:30<15:49:20] +[titan] 2025-10-05 07:16:50,596 - root - INFO - step: 14205 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 07:16:50,597 - root - INFO - lr: 3.7801e-05 gnorm: 1.05 [ 8:42:41<15:49:09] +[titan] 2025-10-05 07:17:01,477 - root - INFO - step: 14210 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:17:01,477 - root - INFO - lr: 3.7793e-05 gnorm: 1.09 [ 8:42:52<15:48:57] +[titan] 2025-10-05 07:17:12,357 - root - INFO - step: 14215 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 07:17:12,357 - root - INFO - lr: 3.7785e-05 gnorm: 1.09 [ 8:43:02<15:48:46] +[titan] 2025-10-05 07:17:23,224 - root - INFO - step: 14220 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0228 +[titan] 2025-10-05 07:17:23,224 - root - INFO - lr: 3.7777e-05 gnorm: 1.12 [ 8:43:13<15:48:35] +[titan] 2025-10-05 07:17:34,101 - root - INFO - step: 14225 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9842 +[titan] 2025-10-05 07:17:34,101 - root - INFO - lr: 3.7769e-05 gnorm: 1.11 [ 8:43:24<15:48:23] +[titan] 2025-10-05 07:17:44,966 - root - INFO - step: 14230 loss: 2.2228 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9631 +[titan] 2025-10-05 07:17:44,966 - root - INFO - lr: 3.7761e-05 gnorm: 1.06 [ 8:43:35<15:48:12] +[titan] 2025-10-05 07:17:55,865 - root - INFO - step: 14235 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:17:55,865 - root - INFO - lr: 3.7753e-05 gnorm: 1.15 [ 8:43:46<15:48:01] +[titan] 2025-10-05 07:18:06,742 - root - INFO - step: 14240 loss: 2.2274 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9683 +[titan] 2025-10-05 07:18:06,742 - root - INFO - lr: 3.7746e-05 gnorm: 1.07 [ 8:43:57<15:47:49] +[titan] 2025-10-05 07:18:17,634 - root - INFO - step: 14245 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0317 +[titan] 2025-10-05 07:18:17,634 - root - INFO - lr: 3.7738e-05 gnorm: 1.09 [ 8:44:08<15:47:38] +[titan] 2025-10-05 07:18:26,321 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:18:28,505 - root - INFO - step: 14250 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:18:28,505 - root - INFO - lr: 3.7730e-05 gnorm: 1.10 [ 8:44:19<15:47:27] +[titan] 2025-10-05 07:18:39,411 - root - INFO - step: 14255 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 07:18:39,412 - root - INFO - lr: 3.7722e-05 gnorm: 1.06 [ 8:44:29<15:47:15] +[titan] 2025-10-05 07:18:50,297 - root - INFO - step: 14260 loss: 2.3010 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0327 +[titan] 2025-10-05 07:18:50,297 - root - INFO - lr: 3.7714e-05 gnorm: 1.07 [ 8:44:40<15:47:04] +[titan] 2025-10-05 07:19:01,201 - root - INFO - step: 14265 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0104 +[titan] 2025-10-05 07:19:01,201 - root - INFO - lr: 3.7706e-05 gnorm: 1.10 [ 8:44:51<15:46:53] +[titan] 2025-10-05 07:19:12,083 - root - INFO - step: 14270 loss: 2.2667 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0022 +[titan] 2025-10-05 07:19:12,083 - root - INFO - lr: 3.7698e-05 gnorm: 1.08 [ 8:45:02<15:46:41] +[titan] 2025-10-05 07:19:22,993 - root - INFO - step: 14275 loss: 2.1944 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9385 +[titan] 2025-10-05 07:19:22,993 - root - INFO - lr: 3.7690e-05 gnorm: 1.09 [ 8:45:13<15:46:30] +[titan] 2025-10-05 07:19:33,913 - root - INFO - step: 14280 loss: 2.2467 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9851 +[titan] 2025-10-05 07:19:33,913 - root - INFO - lr: 3.7682e-05 gnorm: 1.07 [ 8:45:24<15:46:19] +[titan] 2025-10-05 07:19:44,768 - root - INFO - step: 14285 loss: 2.2223 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9628 +[titan] 2025-10-05 07:19:44,768 - root - INFO - lr: 3.7674e-05 gnorm: 1.09 [ 8:45:35<15:46:07] +[titan] 2025-10-05 07:19:55,630 - root - INFO - step: 14290 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 07:19:55,631 - root - INFO - lr: 3.7666e-05 gnorm: 1.10 [ 8:45:46<15:45:56] +[titan] 2025-10-05 07:20:06,491 - root - INFO - step: 14295 loss: 2.2948 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 07:20:06,491 - root - INFO - lr: 3.7658e-05 gnorm: 1.11 [ 8:45:57<15:45:45] +[titan] 2025-10-05 07:20:15,199 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:20:17,375 - root - INFO - step: 14300 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 07:20:17,376 - root - INFO - lr: 3.7650e-05 gnorm: 1.15 [ 8:46:07<15:45:33] +[titan] 2025-10-05 07:20:28,246 - root - INFO - step: 14305 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0609 +[titan] 2025-10-05 07:20:28,246 - root - INFO - lr: 3.7642e-05 gnorm: 1.12 [ 8:46:18<15:45:22] +[titan] 2025-10-05 07:20:39,160 - root - INFO - step: 14310 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0267 +[titan] 2025-10-05 07:20:39,160 - root - INFO - lr: 3.7634e-05 gnorm: 1.14 [ 8:46:29<15:45:11] +[titan] 2025-10-05 07:20:50,006 - root - INFO - step: 14315 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 07:20:50,006 - root - INFO - lr: 3.7626e-05 gnorm: 1.07 [ 8:46:40<15:44:59] +[titan] 2025-10-05 07:21:00,866 - root - INFO - step: 14320 loss: 2.2698 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 07:21:00,866 - root - INFO - lr: 3.7618e-05 gnorm: 1.09 [ 8:46:51<15:44:48] +[titan] 2025-10-05 07:21:11,703 - root - INFO - step: 14325 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:21:11,703 - root - INFO - lr: 3.7610e-05 gnorm: 1.09 [ 8:47:02<15:44:37] +[titan] 2025-10-05 07:21:22,593 - root - INFO - step: 14330 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0330 +[titan] 2025-10-05 07:21:22,594 - root - INFO - lr: 3.7602e-05 gnorm: 1.14 [ 8:47:13<15:44:25] +[titan] 2025-10-05 07:21:33,559 - root - INFO - step: 14335 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 07:21:33,559 - root - INFO - lr: 3.7594e-05 gnorm: 1.09 [ 8:47:24<15:44:14] +[titan] 2025-10-05 07:21:35,956 - root - INFO - Dumping profiler traces at step 14336 +[titan] 2025-10-05 07:21:35,994 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:21:44,698 - root - INFO - step: 14340 loss: 2.3096 memory: 118.84GiB(85.28%) tps: 29,418 tflops: 408.13 mfu: 41.27% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 07:21:44,699 - root - INFO - lr: 3.7586e-05 gnorm: 1.13 [ 8:47:35<15:44:03] +[titan] 2025-10-05 07:21:55,565 - root - INFO - step: 14345 loss: 2.3329 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 07:21:55,565 - root - INFO - lr: 3.7578e-05 gnorm: 1.11 [ 8:47:46<15:43:52] +[titan] 2025-10-05 07:22:04,241 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:22:06,418 - root - INFO - step: 14350 loss: 2.2380 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9769 +[titan] 2025-10-05 07:22:06,419 - root - INFO - lr: 3.7570e-05 gnorm: 1.07 [ 8:47:56<15:43:41] +[titan] 2025-10-05 07:22:17,273 - root - INFO - step: 14355 loss: 2.2325 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 07:22:17,273 - root - INFO - lr: 3.7562e-05 gnorm: 1.12 [ 8:48:07<15:43:29] +[titan] 2025-10-05 07:22:28,142 - root - INFO - step: 14360 loss: 2.3425 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0700 +[titan] 2025-10-05 07:22:28,143 - root - INFO - lr: 3.7554e-05 gnorm: 1.12 [ 8:48:18<15:43:18] +[titan] 2025-10-05 07:22:39,138 - root - INFO - step: 14365 loss: 2.2707 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.44 mfu: 41.80% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:22:39,139 - root - INFO - lr: 3.7546e-05 gnorm: 1.08 [ 8:48:29<15:43:07] +[titan] 2025-10-05 07:22:50,009 - root - INFO - step: 14370 loss: 2.2987 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 07:22:50,009 - root - INFO - lr: 3.7538e-05 gnorm: 1.13 [ 8:48:40<15:42:56] +[titan] 2025-10-05 07:23:00,863 - root - INFO - step: 14375 loss: 2.2114 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 07:23:00,864 - root - INFO - lr: 3.7530e-05 gnorm: 1.09 [ 8:48:51<15:42:44] +[titan] 2025-10-05 07:23:11,714 - root - INFO - step: 14380 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:23:11,714 - root - INFO - lr: 3.7522e-05 gnorm: 1.11 [ 8:49:02<15:42:33] +[titan] 2025-10-05 07:23:22,597 - root - INFO - step: 14385 loss: 2.3245 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0537 +[titan] 2025-10-05 07:23:22,597 - root - INFO - lr: 3.7514e-05 gnorm: 1.07 [ 8:49:13<15:42:21] +[titan] 2025-10-05 07:23:33,453 - root - INFO - step: 14390 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:23:33,453 - root - INFO - lr: 3.7506e-05 gnorm: 1.10 [ 8:49:23<15:42:10] +[titan] 2025-10-05 07:23:44,404 - root - INFO - step: 14395 loss: 2.3155 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0444 +[titan] 2025-10-05 07:23:44,404 - root - INFO - lr: 3.7498e-05 gnorm: 1.07 [ 8:49:34<15:41:59] +[titan] 2025-10-05 07:23:53,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:23:55,271 - root - INFO - step: 14400 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:23:55,271 - root - INFO - lr: 3.7490e-05 gnorm: 1.09 [ 8:49:45<15:41:48] +[titan] 2025-10-05 07:24:06,109 - root - INFO - step: 14405 loss: 2.3174 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0466 +[titan] 2025-10-05 07:24:06,109 - root - INFO - lr: 3.7482e-05 gnorm: 1.11 [ 8:49:56<15:41:36] +[titan] 2025-10-05 07:24:16,949 - root - INFO - step: 14410 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0222 +[titan] 2025-10-05 07:24:16,949 - root - INFO - lr: 3.7474e-05 gnorm: 1.22 [ 8:50:07<15:41:25] +[titan] 2025-10-05 07:24:27,813 - root - INFO - step: 14415 loss: 2.2533 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9898 +[titan] 2025-10-05 07:24:27,813 - root - INFO - lr: 3.7466e-05 gnorm: 1.06 [ 8:50:18<15:41:13] +[titan] 2025-10-05 07:24:38,740 - root - INFO - step: 14420 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9292 +[titan] 2025-10-05 07:24:38,740 - root - INFO - lr: 3.7458e-05 gnorm: 1.07 [ 8:50:29<15:41:02] +[titan] 2025-10-05 07:24:49,616 - root - INFO - step: 14425 loss: 2.2439 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 07:24:49,616 - root - INFO - lr: 3.7450e-05 gnorm: 1.09 [ 8:50:40<15:40:51] +[titan] 2025-10-05 07:25:00,495 - root - INFO - step: 14430 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0617 +[titan] 2025-10-05 07:25:00,495 - root - INFO - lr: 3.7442e-05 gnorm: 1.10 [ 8:50:51<15:40:40] +[titan] 2025-10-05 07:25:11,357 - root - INFO - step: 14435 loss: 2.2516 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9889 +[titan] 2025-10-05 07:25:11,357 - root - INFO - lr: 3.7434e-05 gnorm: 1.10 [ 8:51:01<15:40:28] +[titan] 2025-10-05 07:25:22,214 - root - INFO - step: 14440 loss: 2.2632 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:25:22,214 - root - INFO - lr: 3.7426e-05 gnorm: 1.10 [ 8:51:12<15:40:17] +[titan] 2025-10-05 07:25:33,076 - root - INFO - step: 14445 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 07:25:33,076 - root - INFO - lr: 3.7418e-05 gnorm: 1.09 [ 8:51:23<15:40:06] +[titan] 2025-10-05 07:25:41,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:25:44,001 - root - INFO - step: 14450 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 07:25:44,001 - root - INFO - lr: 3.7410e-05 gnorm: 1.07 [ 8:51:34<15:39:54] +[titan] 2025-10-05 07:25:54,893 - root - INFO - step: 14455 loss: 2.2554 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9922 +[titan] 2025-10-05 07:25:54,893 - root - INFO - lr: 3.7402e-05 gnorm: 1.13 [ 8:51:45<15:39:43] +[titan] 2025-10-05 07:26:05,774 - root - INFO - step: 14460 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9572 +[titan] 2025-10-05 07:26:05,774 - root - INFO - lr: 3.7394e-05 gnorm: 1.08 [ 8:51:56<15:39:32] +[titan] 2025-10-05 07:26:16,651 - root - INFO - step: 14465 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 07:26:16,651 - root - INFO - lr: 3.7386e-05 gnorm: 1.11 [ 8:52:07<15:39:20] +[titan] 2025-10-05 07:26:27,521 - root - INFO - step: 14470 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 07:26:27,521 - root - INFO - lr: 3.7378e-05 gnorm: 1.08 [ 8:52:18<15:39:09] +[titan] 2025-10-05 07:26:38,394 - root - INFO - step: 14475 loss: 2.2013 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 07:26:38,395 - root - INFO - lr: 3.7370e-05 gnorm: 1.08 [ 8:52:28<15:38:58] +[titan] 2025-10-05 07:26:49,332 - root - INFO - step: 14480 loss: 2.2812 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0154 +[titan] 2025-10-05 07:26:49,332 - root - INFO - lr: 3.7362e-05 gnorm: 1.12 [ 8:52:39<15:38:47] +[titan] 2025-10-05 07:27:00,212 - root - INFO - step: 14485 loss: 2.2411 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 07:27:00,213 - root - INFO - lr: 3.7354e-05 gnorm: 1.05 [ 8:52:50<15:38:35] +[titan] 2025-10-05 07:27:11,129 - root - INFO - step: 14490 loss: 2.2405 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9788 +[titan] 2025-10-05 07:27:11,129 - root - INFO - lr: 3.7346e-05 gnorm: 1.09 [ 8:53:01<15:38:24] +[titan] 2025-10-05 07:27:22,004 - root - INFO - step: 14495 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 07:27:22,004 - root - INFO - lr: 3.7338e-05 gnorm: 1.09 [ 8:53:12<15:38:13] +[titan] 2025-10-05 07:27:30,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:27:32,894 - root - INFO - step: 14500 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 07:27:32,894 - root - INFO - lr: 3.7330e-05 gnorm: 1.10 [ 8:53:23<15:38:01] +[titan] 2025-10-05 07:27:43,812 - root - INFO - step: 14505 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0409 +[titan] 2025-10-05 07:27:43,812 - root - INFO - lr: 3.7322e-05 gnorm: 1.06 [ 8:53:34<15:37:50] +[titan] 2025-10-05 07:27:54,688 - root - INFO - step: 14510 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9921 +[titan] 2025-10-05 07:27:54,688 - root - INFO - lr: 3.7314e-05 gnorm: 1.08 [ 8:53:45<15:37:39] +[titan] 2025-10-05 07:28:05,542 - root - INFO - step: 14515 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 07:28:05,542 - root - INFO - lr: 3.7306e-05 gnorm: 1.05 [ 8:53:56<15:37:27] +[titan] 2025-10-05 07:28:16,397 - root - INFO - step: 14520 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:28:16,398 - root - INFO - lr: 3.7298e-05 gnorm: 1.08 [ 8:54:06<15:37:16] +[titan] 2025-10-05 07:28:27,301 - root - INFO - step: 14525 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9512 +[titan] 2025-10-05 07:28:27,301 - root - INFO - lr: 3.7290e-05 gnorm: 1.05 [ 8:54:17<15:37:05] +[titan] 2025-10-05 07:28:38,153 - root - INFO - step: 14530 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0325 +[titan] 2025-10-05 07:28:38,153 - root - INFO - lr: 3.7282e-05 gnorm: 1.12 [ 8:54:28<15:36:53] +[titan] 2025-10-05 07:28:49,042 - root - INFO - step: 14535 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 07:28:49,042 - root - INFO - lr: 3.7274e-05 gnorm: 1.10 [ 8:54:39<15:36:42] +[titan] 2025-10-05 07:28:59,909 - root - INFO - step: 14540 loss: 2.2631 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9992 +[titan] 2025-10-05 07:28:59,910 - root - INFO - lr: 3.7266e-05 gnorm: 1.09 [ 8:54:50<15:36:31] +[titan] 2025-10-05 07:29:10,771 - root - INFO - step: 14545 loss: 2.2017 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9458 +[titan] 2025-10-05 07:29:10,771 - root - INFO - lr: 3.7258e-05 gnorm: 1.10 [ 8:55:01<15:36:20] +[titan] 2025-10-05 07:29:19,437 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:29:21,621 - root - INFO - step: 14550 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 07:29:21,621 - root - INFO - lr: 3.7250e-05 gnorm: 1.09 [ 8:55:12<15:36:08] +[titan] 2025-10-05 07:29:32,531 - root - INFO - step: 14555 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0226 +[titan] 2025-10-05 07:29:32,531 - root - INFO - lr: 3.7242e-05 gnorm: 1.14 [ 8:55:23<15:35:57] +[titan] 2025-10-05 07:29:43,472 - root - INFO - step: 14560 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8975 +[titan] 2025-10-05 07:29:43,472 - root - INFO - lr: 3.7234e-05 gnorm: 1.02 [ 8:55:33<15:35:46] +[titan] 2025-10-05 07:29:54,345 - root - INFO - step: 14565 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 07:29:54,346 - root - INFO - lr: 3.7226e-05 gnorm: 1.13 [ 8:55:44<15:35:34] +[titan] 2025-10-05 07:30:05,208 - root - INFO - step: 14570 loss: 2.3031 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0344 +[titan] 2025-10-05 07:30:05,209 - root - INFO - lr: 3.7218e-05 gnorm: 1.12 [ 8:55:55<15:35:23] +[titan] 2025-10-05 07:30:16,066 - root - INFO - step: 14575 loss: 2.2367 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 07:30:16,066 - root - INFO - lr: 3.7210e-05 gnorm: 1.10 [ 8:56:06<15:35:12] +[titan] 2025-10-05 07:30:26,932 - root - INFO - step: 14580 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 07:30:26,933 - root - INFO - lr: 3.7202e-05 gnorm: 1.07 [ 8:56:17<15:35:00] +[titan] 2025-10-05 07:30:37,811 - root - INFO - step: 14585 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9566 +[titan] 2025-10-05 07:30:37,812 - root - INFO - lr: 3.7194e-05 gnorm: 1.08 [ 8:56:28<15:34:49] +[titan] 2025-10-05 07:30:48,772 - root - INFO - step: 14590 loss: 2.3418 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0695 +[titan] 2025-10-05 07:30:48,772 - root - INFO - lr: 3.7185e-05 gnorm: 1.20 [ 8:56:39<15:34:38] +[titan] 2025-10-05 07:30:59,630 - root - INFO - step: 14595 loss: 2.2116 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 07:30:59,630 - root - INFO - lr: 3.7177e-05 gnorm: 1.09 [ 8:56:50<15:34:27] +[titan] 2025-10-05 07:31:08,301 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:31:10,494 - root - INFO - step: 14600 loss: 2.1772 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 07:31:10,494 - root - INFO - lr: 3.7169e-05 gnorm: 1.08 [ 8:57:01<15:34:15] +[titan] 2025-10-05 07:31:21,365 - root - INFO - step: 14605 loss: 2.3083 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:31:21,365 - root - INFO - lr: 3.7161e-05 gnorm: 1.09 [ 8:57:11<15:34:04] +[titan] 2025-10-05 07:31:32,251 - root - INFO - step: 14610 loss: 2.3039 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 07:31:32,251 - root - INFO - lr: 3.7153e-05 gnorm: 1.15 [ 8:57:22<15:33:53] +[titan] 2025-10-05 07:31:43,124 - root - INFO - step: 14615 loss: 2.2982 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0301 +[titan] 2025-10-05 07:31:43,125 - root - INFO - lr: 3.7145e-05 gnorm: 1.12 [ 8:57:33<15:33:41] +[titan] 2025-10-05 07:31:54,094 - root - INFO - step: 14620 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9332 +[titan] 2025-10-05 07:31:54,094 - root - INFO - lr: 3.7137e-05 gnorm: 1.09 [ 8:57:44<15:33:30] +[titan] 2025-10-05 07:32:04,989 - root - INFO - step: 14625 loss: 2.2391 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:32:04,989 - root - INFO - lr: 3.7129e-05 gnorm: 1.10 [ 8:57:55<15:33:19] +[titan] 2025-10-05 07:32:15,888 - root - INFO - step: 14630 loss: 2.3113 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 07:32:15,888 - root - INFO - lr: 3.7121e-05 gnorm: 1.10 [ 8:58:06<15:33:08] +[titan] 2025-10-05 07:32:26,771 - root - INFO - step: 14635 loss: 2.2726 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:32:26,771 - root - INFO - lr: 3.7113e-05 gnorm: 1.12 [ 8:58:17<15:32:56] +[titan] 2025-10-05 07:32:37,649 - root - INFO - step: 14640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 07:32:37,649 - root - INFO - lr: 3.7105e-05 gnorm: 1.08 [ 8:58:28<15:32:45] +[titan] 2025-10-05 07:32:48,613 - root - INFO - step: 14645 loss: 2.1989 memory: 118.84GiB(85.28%) tps: 29,888 tflops: 414.65 mfu: 41.93% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9422 +[titan] 2025-10-05 07:32:48,613 - root - INFO - lr: 3.7097e-05 gnorm: 1.05 [ 8:58:39<15:32:34] +[titan] 2025-10-05 07:32:57,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:32:59,535 - root - INFO - step: 14650 loss: 2.3040 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0349 +[titan] 2025-10-05 07:32:59,535 - root - INFO - lr: 3.7089e-05 gnorm: 1.06 [ 8:58:50<15:32:23] +[titan] 2025-10-05 07:33:10,438 - root - INFO - step: 14655 loss: 2.2889 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0209 +[titan] 2025-10-05 07:33:10,439 - root - INFO - lr: 3.7081e-05 gnorm: 1.13 [ 8:59:00<15:32:11] +[titan] 2025-10-05 07:33:21,347 - root - INFO - step: 14660 loss: 2.2514 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:33:21,348 - root - INFO - lr: 3.7073e-05 gnorm: 1.12 [ 8:59:11<15:32:00] +[titan] 2025-10-05 07:33:32,227 - root - INFO - step: 14665 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 07:33:32,227 - root - INFO - lr: 3.7064e-05 gnorm: 1.12 [ 8:59:22<15:31:49] +[titan] 2025-10-05 07:33:43,130 - root - INFO - step: 14670 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:33:43,130 - root - INFO - lr: 3.7056e-05 gnorm: 1.13 [ 8:59:33<15:31:38] +[titan] 2025-10-05 07:33:54,090 - root - INFO - step: 14675 loss: 2.2801 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.79 mfu: 41.94% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 07:33:54,091 - root - INFO - lr: 3.7048e-05 gnorm: 1.08 [ 8:59:44<15:31:26] +[titan] 2025-10-05 07:34:04,932 - root - INFO - step: 14680 loss: 2.1187 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 07:34:04,932 - root - INFO - lr: 3.7040e-05 gnorm: 1.06 [ 8:59:55<15:31:15] +[titan] 2025-10-05 07:34:15,806 - root - INFO - step: 14685 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9938 +[titan] 2025-10-05 07:34:15,806 - root - INFO - lr: 3.7032e-05 gnorm: 1.10 [ 9:00:06<15:31:04] +[titan] 2025-10-05 07:34:26,671 - root - INFO - step: 14690 loss: 2.2095 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9509 +[titan] 2025-10-05 07:34:26,671 - root - INFO - lr: 3.7024e-05 gnorm: 1.06 [ 9:00:17<15:30:52] +[titan] 2025-10-05 07:34:37,510 - root - INFO - step: 14695 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 07:34:37,510 - root - INFO - lr: 3.7016e-05 gnorm: 1.06 [ 9:00:28<15:30:41] +[titan] 2025-10-05 07:34:46,191 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:34:48,450 - root - INFO - step: 14700 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9405 +[titan] 2025-10-05 07:34:48,450 - root - INFO - lr: 3.7008e-05 gnorm: 1.10 [ 9:00:38<15:30:30] +[titan] 2025-10-05 07:34:59,300 - root - INFO - step: 14705 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0240 +[titan] 2025-10-05 07:34:59,300 - root - INFO - lr: 3.7000e-05 gnorm: 1.14 [ 9:00:49<15:30:18] +[titan] 2025-10-05 07:35:10,167 - root - INFO - step: 14710 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0017 +[titan] 2025-10-05 07:35:10,168 - root - INFO - lr: 3.6992e-05 gnorm: 1.09 [ 9:01:00<15:30:07] +[titan] 2025-10-05 07:35:21,048 - root - INFO - step: 14715 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0027 +[titan] 2025-10-05 07:35:21,048 - root - INFO - lr: 3.6984e-05 gnorm: 1.13 [ 9:01:11<15:29:56] +[titan] 2025-10-05 07:35:31,930 - root - INFO - step: 14720 loss: 2.2273 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9675 +[titan] 2025-10-05 07:35:31,931 - root - INFO - lr: 3.6976e-05 gnorm: 1.08 [ 9:01:22<15:29:45] +[titan] 2025-10-05 07:35:42,810 - root - INFO - step: 14725 loss: 2.3179 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 07:35:42,810 - root - INFO - lr: 3.6967e-05 gnorm: 1.12 [ 9:01:33<15:29:33] +[titan] 2025-10-05 07:35:53,724 - root - INFO - step: 14730 loss: 2.2620 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9989 +[titan] 2025-10-05 07:35:53,724 - root - INFO - lr: 3.6959e-05 gnorm: 1.11 [ 9:01:44<15:29:22] +[titan] 2025-10-05 07:36:04,629 - root - INFO - step: 14735 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 07:36:04,629 - root - INFO - lr: 3.6951e-05 gnorm: 1.06 [ 9:01:55<15:29:11] +[titan] 2025-10-05 07:36:15,522 - root - INFO - step: 14740 loss: 2.2768 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 07:36:15,523 - root - INFO - lr: 3.6943e-05 gnorm: 1.09 [ 9:02:06<15:28:59] +[titan] 2025-10-05 07:36:26,431 - root - INFO - step: 14745 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 07:36:26,431 - root - INFO - lr: 3.6935e-05 gnorm: 1.07 [ 9:02:16<15:28:48] +[titan] 2025-10-05 07:36:35,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:36:37,321 - root - INFO - step: 14750 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 07:36:37,321 - root - INFO - lr: 3.6927e-05 gnorm: 1.10 [ 9:02:27<15:28:37] +[titan] 2025-10-05 07:36:48,227 - root - INFO - step: 14755 loss: 2.2186 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:36:48,227 - root - INFO - lr: 3.6919e-05 gnorm: 1.04 [ 9:02:38<15:28:26] +[titan] 2025-10-05 07:36:59,096 - root - INFO - step: 14760 loss: 2.2696 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0036 +[titan] 2025-10-05 07:36:59,096 - root - INFO - lr: 3.6911e-05 gnorm: 1.08 [ 9:02:49<15:28:14] +[titan] 2025-10-05 07:37:09,945 - root - INFO - step: 14765 loss: 2.2510 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9893 +[titan] 2025-10-05 07:37:09,945 - root - INFO - lr: 3.6903e-05 gnorm: 1.13 [ 9:03:00<15:28:03] +[titan] 2025-10-05 07:37:20,822 - root - INFO - step: 14770 loss: 2.2169 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:37:20,822 - root - INFO - lr: 3.6894e-05 gnorm: 1.08 [ 9:03:11<15:27:52] +[titan] 2025-10-05 07:37:31,692 - root - INFO - step: 14775 loss: 2.2524 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 07:37:31,692 - root - INFO - lr: 3.6886e-05 gnorm: 1.10 [ 9:03:22<15:27:40] +[titan] 2025-10-05 07:37:42,588 - root - INFO - step: 14780 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 07:37:42,588 - root - INFO - lr: 3.6878e-05 gnorm: 1.12 [ 9:03:33<15:27:29] +[titan] 2025-10-05 07:37:53,516 - root - INFO - step: 14785 loss: 2.1691 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9163 +[titan] 2025-10-05 07:37:53,516 - root - INFO - lr: 3.6870e-05 gnorm: 1.06 [ 9:03:44<15:27:18] +[titan] 2025-10-05 07:38:04,385 - root - INFO - step: 14790 loss: 2.1764 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 07:38:04,386 - root - INFO - lr: 3.6862e-05 gnorm: 1.05 [ 9:03:54<15:27:07] +[titan] 2025-10-05 07:38:15,271 - root - INFO - step: 14795 loss: 2.2615 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9988 +[titan] 2025-10-05 07:38:15,271 - root - INFO - lr: 3.6854e-05 gnorm: 1.11 [ 9:04:05<15:26:55] +[titan] 2025-10-05 07:38:23,946 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:38:26,148 - root - INFO - step: 14800 loss: 2.2171 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 07:38:26,148 - root - INFO - lr: 3.6846e-05 gnorm: 1.11 [ 9:04:16<15:26:44] +[titan] 2025-10-05 07:38:37,018 - root - INFO - step: 14805 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0292 +[titan] 2025-10-05 07:38:37,018 - root - INFO - lr: 3.6838e-05 gnorm: 1.12 [ 9:04:27<15:26:33] +[titan] 2025-10-05 07:38:47,933 - root - INFO - step: 14810 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 07:38:47,933 - root - INFO - lr: 3.6830e-05 gnorm: 1.11 [ 9:04:38<15:26:22] +[titan] 2025-10-05 07:38:58,873 - root - INFO - step: 14815 loss: 2.2872 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:38:58,873 - root - INFO - lr: 3.6821e-05 gnorm: 1.08 [ 9:04:49<15:26:10] +[titan] 2025-10-05 07:39:09,749 - root - INFO - step: 14820 loss: 2.2863 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:39:09,749 - root - INFO - lr: 3.6813e-05 gnorm: 1.08 [ 9:05:00<15:25:59] +[titan] 2025-10-05 07:39:20,633 - root - INFO - step: 14825 loss: 2.3248 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 07:39:20,633 - root - INFO - lr: 3.6805e-05 gnorm: 1.06 [ 9:05:11<15:25:48] +[titan] 2025-10-05 07:39:31,524 - root - INFO - step: 14830 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:39:31,525 - root - INFO - lr: 3.6797e-05 gnorm: 1.05 [ 9:05:22<15:25:36] +[titan] 2025-10-05 07:39:42,407 - root - INFO - step: 14835 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0410 +[titan] 2025-10-05 07:39:42,408 - root - INFO - lr: 3.6789e-05 gnorm: 1.09 [ 9:05:32<15:25:25] +[titan] 2025-10-05 07:39:53,316 - root - INFO - step: 14840 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:39:53,316 - root - INFO - lr: 3.6781e-05 gnorm: 1.06 [ 9:05:43<15:25:14] +[titan] 2025-10-05 07:40:04,271 - root - INFO - step: 14845 loss: 2.2304 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.97 mfu: 41.96% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9723 +[titan] 2025-10-05 07:40:04,271 - root - INFO - lr: 3.6773e-05 gnorm: 1.12 [ 9:05:54<15:25:03] +[titan] 2025-10-05 07:40:10,994 - root - INFO - Dumping profiler traces at step 14848 +[titan] 2025-10-05 07:40:11,029 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:40:13,209 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:40:15,391 - root - INFO - step: 14850 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 29,468 tflops: 408.82 mfu: 41.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:40:15,392 - root - INFO - lr: 3.6765e-05 gnorm: 1.08 [ 9:06:05<15:24:52] +[titan] 2025-10-05 07:40:26,262 - root - INFO - step: 14855 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 07:40:26,262 - root - INFO - lr: 3.6756e-05 gnorm: 1.09 [ 9:06:16<15:24:41] +[titan] 2025-10-05 07:40:37,129 - root - INFO - step: 14860 loss: 2.2444 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 07:40:37,129 - root - INFO - lr: 3.6748e-05 gnorm: 1.08 [ 9:06:27<15:24:29] +[titan] 2025-10-05 07:40:47,995 - root - INFO - step: 14865 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0098 +[titan] 2025-10-05 07:40:47,995 - root - INFO - lr: 3.6740e-05 gnorm: 1.10 [ 9:06:38<15:24:18] +[titan] 2025-10-05 07:40:58,905 - root - INFO - step: 14870 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:40:58,905 - root - INFO - lr: 3.6732e-05 gnorm: 1.10 [ 9:06:49<15:24:07] +[titan] 2025-10-05 07:41:09,784 - root - INFO - step: 14875 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 07:41:09,784 - root - INFO - lr: 3.6724e-05 gnorm: 1.10 [ 9:07:00<15:23:55] +[titan] 2025-10-05 07:41:20,683 - root - INFO - step: 14880 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 07:41:20,683 - root - INFO - lr: 3.6716e-05 gnorm: 1.08 [ 9:07:11<15:23:44] +[titan] 2025-10-05 07:41:31,553 - root - INFO - step: 14885 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 07:41:31,553 - root - INFO - lr: 3.6708e-05 gnorm: 1.05 [ 9:07:22<15:23:33] +[titan] 2025-10-05 07:41:42,413 - root - INFO - step: 14890 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 07:41:42,414 - root - INFO - lr: 3.6699e-05 gnorm: 1.15 [ 9:07:32<15:23:22] +[titan] 2025-10-05 07:41:53,308 - root - INFO - step: 14895 loss: 2.2418 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:41:53,308 - root - INFO - lr: 3.6691e-05 gnorm: 1.07 [ 9:07:43<15:23:10] +[titan] 2025-10-05 07:42:01,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:42:04,160 - root - INFO - step: 14900 loss: 2.2908 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0236 +[titan] 2025-10-05 07:42:04,160 - root - INFO - lr: 3.6683e-05 gnorm: 1.13 [ 9:07:54<15:22:59] +[titan] 2025-10-05 07:42:15,031 - root - INFO - step: 14905 loss: 2.3078 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0380 +[titan] 2025-10-05 07:42:15,031 - root - INFO - lr: 3.6675e-05 gnorm: 1.12 [ 9:08:05<15:22:48] +[titan] 2025-10-05 07:42:25,895 - root - INFO - step: 14910 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9881 +[titan] 2025-10-05 07:42:25,895 - root - INFO - lr: 3.6667e-05 gnorm: 1.09 [ 9:08:16<15:22:36] +[titan] 2025-10-05 07:42:36,754 - root - INFO - step: 14915 loss: 2.2480 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 07:42:36,754 - root - INFO - lr: 3.6659e-05 gnorm: 1.09 [ 9:08:27<15:22:25] +[titan] 2025-10-05 07:42:47,621 - root - INFO - step: 14920 loss: 2.4317 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 07:42:47,621 - root - INFO - lr: 3.6651e-05 gnorm: 1.11 [ 9:08:38<15:22:14] +[titan] 2025-10-05 07:42:58,504 - root - INFO - step: 14925 loss: 2.2167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9576 +[titan] 2025-10-05 07:42:58,504 - root - INFO - lr: 3.6642e-05 gnorm: 1.09 [ 9:08:48<15:22:02] +[titan] 2025-10-05 07:43:09,387 - root - INFO - step: 14930 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 07:43:09,388 - root - INFO - lr: 3.6634e-05 gnorm: 1.08 [ 9:08:59<15:21:51] +[titan] 2025-10-05 07:43:20,273 - root - INFO - step: 14935 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:43:20,273 - root - INFO - lr: 3.6626e-05 gnorm: 1.11 [ 9:09:10<15:21:40] +[titan] 2025-10-05 07:43:31,152 - root - INFO - step: 14940 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:43:31,152 - root - INFO - lr: 3.6618e-05 gnorm: 1.09 [ 9:09:21<15:21:29] +[titan] 2025-10-05 07:43:42,038 - root - INFO - step: 14945 loss: 2.2476 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 07:43:42,038 - root - INFO - lr: 3.6610e-05 gnorm: 1.04 [ 9:09:32<15:21:17] +[titan] 2025-10-05 07:43:50,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:43:52,910 - root - INFO - step: 14950 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9350 +[titan] 2025-10-05 07:43:52,910 - root - INFO - lr: 3.6602e-05 gnorm: 1.07 [ 9:09:43<15:21:06] +[titan] 2025-10-05 07:44:03,804 - root - INFO - step: 14955 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 07:44:03,804 - root - INFO - lr: 3.6593e-05 gnorm: 1.06 [ 9:09:54<15:20:55] +[titan] 2025-10-05 07:44:14,704 - root - INFO - step: 14960 loss: 2.2966 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0290 +[titan] 2025-10-05 07:44:14,704 - root - INFO - lr: 3.6585e-05 gnorm: 1.08 [ 9:10:05<15:20:43] +[titan] 2025-10-05 07:44:25,575 - root - INFO - step: 14965 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 07:44:25,575 - root - INFO - lr: 3.6577e-05 gnorm: 1.07 [ 9:10:16<15:20:32] +[titan] 2025-10-05 07:44:36,428 - root - INFO - step: 14970 loss: 2.2508 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:44:36,428 - root - INFO - lr: 3.6569e-05 gnorm: 1.09 [ 9:10:26<15:20:21] +[titan] 2025-10-05 07:44:47,311 - root - INFO - step: 14975 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:44:47,311 - root - INFO - lr: 3.6561e-05 gnorm: 1.08 [ 9:10:37<15:20:10] +[titan] 2025-10-05 07:44:58,219 - root - INFO - step: 14980 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:44:58,219 - root - INFO - lr: 3.6553e-05 gnorm: 1.11 [ 9:10:48<15:19:58] +[titan] 2025-10-05 07:45:09,066 - root - INFO - step: 14985 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9745 +[titan] 2025-10-05 07:45:09,066 - root - INFO - lr: 3.6544e-05 gnorm: 1.07 [ 9:10:59<15:19:47] +[titan] 2025-10-05 07:45:19,933 - root - INFO - step: 14990 loss: 2.1985 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 07:45:19,934 - root - INFO - lr: 3.6536e-05 gnorm: 1.07 [ 9:11:10<15:19:36] +[titan] 2025-10-05 07:45:30,815 - root - INFO - step: 14995 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:45:30,816 - root - INFO - lr: 3.6528e-05 gnorm: 1.05 [ 9:11:21<15:19:24] +[titan] 2025-10-05 07:45:39,479 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:45:41,661 - root - INFO - step: 15000 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 07:45:41,661 - root - INFO - lr: 3.6520e-05 gnorm: 1.11 [ 9:11:32<15:19:13] +[titan] 2025-10-05 07:45:41,661 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 07:46:00,837 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 07:46:00,837 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.18 seconds. +[titan] 2025-10-05 07:48:03,855 - root - INFO - step: 15005 loss: 2.1283 memory: 118.84GiB(85.28%) tps: 2,304 tflops: 31.97 mfu: 3.23% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 07:48:03,856 - root - INFO - lr: 3.6512e-05 gnorm: 1.06 [ 9:13:54<15:22:41] +[titan] 2025-10-05 07:48:14,694 - root - INFO - step: 15010 loss: 2.2315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 07:48:14,695 - root - INFO - lr: 3.6504e-05 gnorm: 1.12 [ 9:14:05<15:22:29] +[titan] 2025-10-05 07:48:25,509 - root - INFO - step: 15015 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 07:48:25,510 - root - INFO - lr: 3.6495e-05 gnorm: 1.09 [ 9:14:15<15:22:18] +[titan] 2025-10-05 07:48:36,334 - root - INFO - step: 15020 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 07:48:36,334 - root - INFO - lr: 3.6487e-05 gnorm: 1.09 [ 9:14:26<15:22:06] +[titan] 2025-10-05 07:48:47,212 - root - INFO - step: 15025 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:48:47,212 - root - INFO - lr: 3.6479e-05 gnorm: 1.07 [ 9:14:37<15:21:55] +[titan] 2025-10-05 07:48:58,068 - root - INFO - step: 15030 loss: 2.2843 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:48:58,069 - root - INFO - lr: 3.6471e-05 gnorm: 1.12 [ 9:14:48<15:21:43] +[titan] 2025-10-05 07:49:08,916 - root - INFO - step: 15035 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 07:49:08,916 - root - INFO - lr: 3.6463e-05 gnorm: 1.06 [ 9:14:59<15:21:32] +[titan] 2025-10-05 07:49:19,759 - root - INFO - step: 15040 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9876 +[titan] 2025-10-05 07:49:19,759 - root - INFO - lr: 3.6454e-05 gnorm: 1.06 [ 9:15:10<15:21:20] +[titan] 2025-10-05 07:49:30,613 - root - INFO - step: 15045 loss: 2.2689 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 07:49:30,613 - root - INFO - lr: 3.6446e-05 gnorm: 1.09 [ 9:15:21<15:21:09] +[titan] 2025-10-05 07:49:39,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:49:41,450 - root - INFO - step: 15050 loss: 2.2266 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 07:49:41,450 - root - INFO - lr: 3.6438e-05 gnorm: 1.08 [ 9:15:31<15:20:57] +[titan] 2025-10-05 07:49:52,326 - root - INFO - step: 15055 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0056 +[titan] 2025-10-05 07:49:52,326 - root - INFO - lr: 3.6430e-05 gnorm: 1.10 [ 9:15:42<15:20:46] +[titan] 2025-10-05 07:50:03,204 - root - INFO - step: 15060 loss: 2.2848 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:50:03,204 - root - INFO - lr: 3.6422e-05 gnorm: 1.09 [ 9:15:53<15:20:35] +[titan] 2025-10-05 07:50:14,065 - root - INFO - step: 15065 loss: 2.2635 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 07:50:14,065 - root - INFO - lr: 3.6413e-05 gnorm: 1.09 [ 9:16:04<15:20:23] +[titan] 2025-10-05 07:50:24,948 - root - INFO - step: 15070 loss: 2.2568 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 07:50:24,948 - root - INFO - lr: 3.6405e-05 gnorm: 1.08 [ 9:16:15<15:20:12] +[titan] 2025-10-05 07:50:35,794 - root - INFO - step: 15075 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0211 +[titan] 2025-10-05 07:50:35,794 - root - INFO - lr: 3.6397e-05 gnorm: 1.09 [ 9:16:26<15:20:00] +[titan] 2025-10-05 07:50:46,648 - root - INFO - step: 15080 loss: 2.2769 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0110 +[titan] 2025-10-05 07:50:46,649 - root - INFO - lr: 3.6389e-05 gnorm: 1.07 [ 9:16:37<15:19:49] +[titan] 2025-10-05 07:50:57,498 - root - INFO - step: 15085 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 07:50:57,498 - root - INFO - lr: 3.6381e-05 gnorm: 1.07 [ 9:16:47<15:19:37] +[titan] 2025-10-05 07:51:08,384 - root - INFO - step: 15090 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:51:08,385 - root - INFO - lr: 3.6373e-05 gnorm: 1.08 [ 9:16:58<15:19:26] +[titan] 2025-10-05 07:51:19,234 - root - INFO - step: 15095 loss: 2.2363 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9753 +[titan] 2025-10-05 07:51:19,234 - root - INFO - lr: 3.6364e-05 gnorm: 1.07 [ 9:17:09<15:19:15] +[titan] 2025-10-05 07:51:27,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:51:30,098 - root - INFO - step: 15100 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:51:30,098 - root - INFO - lr: 3.6356e-05 gnorm: 1.15 [ 9:17:20<15:19:03] +[titan] 2025-10-05 07:51:40,977 - root - INFO - step: 15105 loss: 2.2586 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9945 +[titan] 2025-10-05 07:51:40,977 - root - INFO - lr: 3.6348e-05 gnorm: 1.11 [ 9:17:31<15:18:52] +[titan] 2025-10-05 07:51:51,845 - root - INFO - step: 15110 loss: 2.2404 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 07:51:51,845 - root - INFO - lr: 3.6340e-05 gnorm: 1.07 [ 9:17:42<15:18:40] +[titan] 2025-10-05 07:52:02,714 - root - INFO - step: 15115 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0275 +[titan] 2025-10-05 07:52:02,715 - root - INFO - lr: 3.6331e-05 gnorm: 1.14 [ 9:17:53<15:18:29] +[titan] 2025-10-05 07:52:13,605 - root - INFO - step: 15120 loss: 2.2957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 07:52:13,605 - root - INFO - lr: 3.6323e-05 gnorm: 1.15 [ 9:18:04<15:18:18] +[titan] 2025-10-05 07:52:24,497 - root - INFO - step: 15125 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:52:24,498 - root - INFO - lr: 3.6315e-05 gnorm: 1.03 [ 9:18:14<15:18:06] +[titan] 2025-10-05 07:52:35,368 - root - INFO - step: 15130 loss: 2.2438 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:35,368 - root - INFO - lr: 3.6307e-05 gnorm: 1.11 [ 9:18:25<15:17:55] +[titan] 2025-10-05 07:52:46,243 - root - INFO - step: 15135 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 07:52:46,243 - root - INFO - lr: 3.6299e-05 gnorm: 1.09 [ 9:18:36<15:17:43] +[titan] 2025-10-05 07:52:57,118 - root - INFO - step: 15140 loss: 2.2420 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:57,118 - root - INFO - lr: 3.6290e-05 gnorm: 1.10 [ 9:18:47<15:17:32] +[titan] 2025-10-05 07:53:07,980 - root - INFO - step: 15145 loss: 2.3012 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 07:53:07,981 - root - INFO - lr: 3.6282e-05 gnorm: 1.07 [ 9:18:58<15:17:21] +[titan] 2025-10-05 07:53:16,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:53:18,858 - root - INFO - step: 15150 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9690 +[titan] 2025-10-05 07:53:18,858 - root - INFO - lr: 3.6274e-05 gnorm: 1.07 [ 9:19:09<15:17:09] +[titan] 2025-10-05 07:53:29,759 - root - INFO - step: 15155 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:53:29,759 - root - INFO - lr: 3.6266e-05 gnorm: 1.07 [ 9:19:20<15:16:58] +[titan] 2025-10-05 07:53:40,639 - root - INFO - step: 15160 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:53:40,639 - root - INFO - lr: 3.6258e-05 gnorm: 1.11 [ 9:19:31<15:16:47] +[titan] 2025-10-05 07:53:51,512 - root - INFO - step: 15165 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:53:51,512 - root - INFO - lr: 3.6249e-05 gnorm: 1.10 [ 9:19:41<15:16:35] +[titan] 2025-10-05 07:54:02,384 - root - INFO - step: 15170 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0038 +[titan] 2025-10-05 07:54:02,384 - root - INFO - lr: 3.6241e-05 gnorm: 1.11 [ 9:19:52<15:16:24] +[titan] 2025-10-05 07:54:13,267 - root - INFO - step: 15175 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:54:13,267 - root - INFO - lr: 3.6233e-05 gnorm: 1.08 [ 9:20:03<15:16:12] +[titan] 2025-10-05 07:54:24,133 - root - INFO - step: 15180 loss: 2.3028 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 07:54:24,133 - root - INFO - lr: 3.6225e-05 gnorm: 1.09 [ 9:20:14<15:16:01] +[titan] 2025-10-05 07:54:35,038 - root - INFO - step: 15185 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:54:35,038 - root - INFO - lr: 3.6216e-05 gnorm: 1.07 [ 9:20:25<15:15:50] +[titan] 2025-10-05 07:54:45,892 - root - INFO - step: 15190 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0272 +[titan] 2025-10-05 07:54:45,892 - root - INFO - lr: 3.6208e-05 gnorm: 1.08 [ 9:20:36<15:15:38] +[titan] 2025-10-05 07:54:56,749 - root - INFO - step: 15195 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9845 +[titan] 2025-10-05 07:54:56,749 - root - INFO - lr: 3.6200e-05 gnorm: 1.08 [ 9:20:47<15:15:27] +[titan] 2025-10-05 07:55:05,428 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:55:07,614 - root - INFO - step: 15200 loss: 2.2230 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 07:55:07,615 - root - INFO - lr: 3.6192e-05 gnorm: 1.08 [ 9:20:58<15:15:15] +[titan] 2025-10-05 07:55:18,475 - root - INFO - step: 15205 loss: 2.2720 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 07:55:18,475 - root - INFO - lr: 3.6184e-05 gnorm: 1.09 [ 9:21:08<15:15:04] +[titan] 2025-10-05 07:55:29,333 - root - INFO - step: 15210 loss: 2.2496 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9871 +[titan] 2025-10-05 07:55:29,333 - root - INFO - lr: 3.6175e-05 gnorm: 1.10 [ 9:21:19<15:14:52] +[titan] 2025-10-05 07:55:40,201 - root - INFO - step: 15215 loss: 2.2704 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 07:55:40,201 - root - INFO - lr: 3.6167e-05 gnorm: 1.10 [ 9:21:30<15:14:41] +[titan] 2025-10-05 07:55:51,102 - root - INFO - step: 15220 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9944 +[titan] 2025-10-05 07:55:51,103 - root - INFO - lr: 3.6159e-05 gnorm: 1.09 [ 9:21:41<15:14:30] +[titan] 2025-10-05 07:56:01,979 - root - INFO - step: 15225 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:56:01,979 - root - INFO - lr: 3.6151e-05 gnorm: 1.08 [ 9:21:52<15:14:18] +[titan] 2025-10-05 07:56:12,856 - root - INFO - step: 15230 loss: 2.3282 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0531 +[titan] 2025-10-05 07:56:12,856 - root - INFO - lr: 3.6142e-05 gnorm: 1.08 [ 9:22:03<15:14:07] +[titan] 2025-10-05 07:56:23,718 - root - INFO - step: 15235 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0077 +[titan] 2025-10-05 07:56:23,718 - root - INFO - lr: 3.6134e-05 gnorm: 1.04 [ 9:22:14<15:13:56] +[titan] 2025-10-05 07:56:34,605 - root - INFO - step: 15240 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9751 +[titan] 2025-10-05 07:56:34,605 - root - INFO - lr: 3.6126e-05 gnorm: 1.07 [ 9:22:25<15:13:44] +[titan] 2025-10-05 07:56:45,472 - root - INFO - step: 15245 loss: 2.3360 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 07:56:45,472 - root - INFO - lr: 3.6118e-05 gnorm: 1.05 [ 9:22:35<15:13:33] +[titan] 2025-10-05 07:56:54,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:56:56,368 - root - INFO - step: 15250 loss: 2.2490 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9870 +[titan] 2025-10-05 07:56:56,368 - root - INFO - lr: 3.6109e-05 gnorm: 1.08 [ 9:22:46<15:13:21] +[titan] 2025-10-05 07:57:07,241 - root - INFO - step: 15255 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9655 +[titan] 2025-10-05 07:57:07,241 - root - INFO - lr: 3.6101e-05 gnorm: 1.06 [ 9:22:57<15:13:10] +[titan] 2025-10-05 07:57:18,136 - root - INFO - step: 15260 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:57:18,136 - root - INFO - lr: 3.6093e-05 gnorm: 1.10 [ 9:23:08<15:12:59] +[titan] 2025-10-05 07:57:29,037 - root - INFO - step: 15265 loss: 2.2358 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9744 +[titan] 2025-10-05 07:57:29,037 - root - INFO - lr: 3.6085e-05 gnorm: 1.07 [ 9:23:19<15:12:47] +[titan] 2025-10-05 07:57:39,909 - root - INFO - step: 15270 loss: 2.3087 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 07:57:39,910 - root - INFO - lr: 3.6076e-05 gnorm: 1.08 [ 9:23:30<15:12:36] +[titan] 2025-10-05 07:57:50,767 - root - INFO - step: 15275 loss: 2.2564 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 07:57:50,768 - root - INFO - lr: 3.6068e-05 gnorm: 1.08 [ 9:23:41<15:12:24] +[titan] 2025-10-05 07:58:01,662 - root - INFO - step: 15280 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9973 +[titan] 2025-10-05 07:58:01,662 - root - INFO - lr: 3.6060e-05 gnorm: 1.20 [ 9:23:52<15:12:13] +[titan] 2025-10-05 07:58:12,584 - root - INFO - step: 15285 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:58:12,584 - root - INFO - lr: 3.6052e-05 gnorm: 1.10 [ 9:24:03<15:12:02] +[titan] 2025-10-05 07:58:23,438 - root - INFO - step: 15290 loss: 2.2206 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 07:58:23,438 - root - INFO - lr: 3.6043e-05 gnorm: 1.06 [ 9:24:13<15:11:50] +[titan] 2025-10-05 07:58:34,299 - root - INFO - step: 15295 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 07:58:34,299 - root - INFO - lr: 3.6035e-05 gnorm: 1.06 [ 9:24:24<15:11:39] +[titan] 2025-10-05 07:58:42,983 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:58:45,167 - root - INFO - step: 15300 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 07:58:45,167 - root - INFO - lr: 3.6027e-05 gnorm: 1.08 [ 9:24:35<15:11:28] +[titan] 2025-10-05 07:58:56,031 - root - INFO - step: 15305 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 07:58:56,032 - root - INFO - lr: 3.6019e-05 gnorm: 1.10 [ 9:24:46<15:11:16] +[titan] 2025-10-05 07:59:06,887 - root - INFO - step: 15310 loss: 2.2775 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 07:59:06,887 - root - INFO - lr: 3.6010e-05 gnorm: 1.09 [ 9:24:57<15:11:05] +[titan] 2025-10-05 07:59:17,809 - root - INFO - step: 15315 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9766 +[titan] 2025-10-05 07:59:17,809 - root - INFO - lr: 3.6002e-05 gnorm: 1.10 [ 9:25:08<15:10:53] +[titan] 2025-10-05 07:59:28,686 - root - INFO - step: 15320 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 07:59:28,686 - root - INFO - lr: 3.5994e-05 gnorm: 1.07 [ 9:25:19<15:10:42] +[titan] 2025-10-05 07:59:39,537 - root - INFO - step: 15325 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 07:59:39,537 - root - INFO - lr: 3.5986e-05 gnorm: 1.11 [ 9:25:29<15:10:31] +[titan] 2025-10-05 07:59:50,399 - root - INFO - step: 15330 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:59:50,400 - root - INFO - lr: 3.5977e-05 gnorm: 1.11 [ 9:25:40<15:10:19] +[titan] 2025-10-05 08:00:01,278 - root - INFO - step: 15335 loss: 2.2792 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0137 +[titan] 2025-10-05 08:00:01,278 - root - INFO - lr: 3.5969e-05 gnorm: 1.15 [ 9:25:51<15:10:08] +[titan] 2025-10-05 08:00:12,178 - root - INFO - step: 15340 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 08:00:12,178 - root - INFO - lr: 3.5961e-05 gnorm: 1.06 [ 9:26:02<15:09:57] +[titan] 2025-10-05 08:00:23,116 - root - INFO - step: 15345 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:00:23,116 - root - INFO - lr: 3.5952e-05 gnorm: 1.09 [ 9:26:13<15:09:45] +[titan] 2025-10-05 08:00:31,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:00:33,989 - root - INFO - step: 15350 loss: 2.2871 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 08:00:33,989 - root - INFO - lr: 3.5944e-05 gnorm: 1.10 [ 9:26:24<15:09:34] +[titan] 2025-10-05 08:00:44,860 - root - INFO - step: 15355 loss: 2.1883 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 08:00:44,860 - root - INFO - lr: 3.5936e-05 gnorm: 1.10 [ 9:26:35<15:09:22] +[titan] 2025-10-05 08:00:55,829 - root - INFO - step: 15360 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9417 +[titan] 2025-10-05 08:00:55,830 - root - INFO - lr: 3.5928e-05 gnorm: 1.04 [ 9:26:46<15:09:11] +[titan] 2025-10-05 08:00:56,008 - root - INFO - Dumping profiler traces at step 15360 +[titan] 2025-10-05 08:00:56,048 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:01:06,999 - root - INFO - step: 15365 loss: 2.2781 memory: 118.84GiB(85.28%) tps: 29,337 tflops: 407.01 mfu: 41.15% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 08:01:06,999 - root - INFO - lr: 3.5919e-05 gnorm: 1.07 [ 9:26:57<15:09:00] +[titan] 2025-10-05 08:01:17,972 - root - INFO - step: 15370 loss: 2.2166 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 08:01:17,972 - root - INFO - lr: 3.5911e-05 gnorm: 1.06 [ 9:27:08<15:08:49] +[titan] 2025-10-05 08:01:28,828 - root - INFO - step: 15375 loss: 2.2257 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 08:01:28,829 - root - INFO - lr: 3.5903e-05 gnorm: 1.04 [ 9:27:19<15:08:38] +[titan] 2025-10-05 08:01:39,731 - root - INFO - step: 15380 loss: 2.2608 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9985 +[titan] 2025-10-05 08:01:39,732 - root - INFO - lr: 3.5895e-05 gnorm: 1.14 [ 9:27:30<15:08:26] +[titan] 2025-10-05 08:01:50,600 - root - INFO - step: 15385 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9684 +[titan] 2025-10-05 08:01:50,601 - root - INFO - lr: 3.5886e-05 gnorm: 1.10 [ 9:27:41<15:08:15] +[titan] 2025-10-05 08:02:01,449 - root - INFO - step: 15390 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 08:02:01,449 - root - INFO - lr: 3.5878e-05 gnorm: 1.10 [ 9:27:51<15:08:04] +[titan] 2025-10-05 08:02:12,375 - root - INFO - step: 15395 loss: 2.2776 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 08:02:12,375 - root - INFO - lr: 3.5870e-05 gnorm: 1.10 [ 9:28:02<15:07:52] +[titan] 2025-10-05 08:02:21,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:02:23,262 - root - INFO - step: 15400 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9035 +[titan] 2025-10-05 08:02:23,262 - root - INFO - lr: 3.5861e-05 gnorm: 1.05 [ 9:28:13<15:07:41] +[titan] 2025-10-05 08:02:34,123 - root - INFO - step: 15405 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 08:02:34,123 - root - INFO - lr: 3.5853e-05 gnorm: 1.05 [ 9:28:24<15:07:29] +[titan] 2025-10-05 08:02:45,026 - root - INFO - step: 15410 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:02:45,027 - root - INFO - lr: 3.5845e-05 gnorm: 1.09 [ 9:28:35<15:07:18] +[titan] 2025-10-05 08:02:55,897 - root - INFO - step: 15415 loss: 2.2676 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:02:55,897 - root - INFO - lr: 3.5837e-05 gnorm: 1.09 [ 9:28:46<15:07:07] +[titan] 2025-10-05 08:03:06,761 - root - INFO - step: 15420 loss: 2.2135 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9565 +[titan] 2025-10-05 08:03:06,761 - root - INFO - lr: 3.5828e-05 gnorm: 1.09 [ 9:28:57<15:06:55] +[titan] 2025-10-05 08:03:17,688 - root - INFO - step: 15425 loss: 2.2445 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9820 +[titan] 2025-10-05 08:03:17,688 - root - INFO - lr: 3.5820e-05 gnorm: 1.04 [ 9:29:08<15:06:44] +[titan] 2025-10-05 08:03:28,565 - root - INFO - step: 15430 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 08:03:28,565 - root - INFO - lr: 3.5812e-05 gnorm: 1.11 [ 9:29:19<15:06:33] +[titan] 2025-10-05 08:03:39,425 - root - INFO - step: 15435 loss: 2.2327 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:03:39,425 - root - INFO - lr: 3.5803e-05 gnorm: 1.06 [ 9:29:29<15:06:21] +[titan] 2025-10-05 08:03:50,323 - root - INFO - step: 15440 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 08:03:50,323 - root - INFO - lr: 3.5795e-05 gnorm: 1.04 [ 9:29:40<15:06:10] +[titan] 2025-10-05 08:04:01,195 - root - INFO - step: 15445 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 08:04:01,195 - root - INFO - lr: 3.5787e-05 gnorm: 1.04 [ 9:29:51<15:05:59] +[titan] 2025-10-05 08:04:09,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:04:12,045 - root - INFO - step: 15450 loss: 2.2815 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0162 +[titan] 2025-10-05 08:04:12,045 - root - INFO - lr: 3.5778e-05 gnorm: 1.12 [ 9:30:02<15:05:47] +[titan] 2025-10-05 08:04:22,960 - root - INFO - step: 15455 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 08:04:22,960 - root - INFO - lr: 3.5770e-05 gnorm: 1.11 [ 9:30:13<15:05:36] +[titan] 2025-10-05 08:04:33,818 - root - INFO - step: 15460 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9338 +[titan] 2025-10-05 08:04:33,818 - root - INFO - lr: 3.5762e-05 gnorm: 1.08 [ 9:30:24<15:05:24] +[titan] 2025-10-05 08:04:44,689 - root - INFO - step: 15465 loss: 2.1902 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:04:44,689 - root - INFO - lr: 3.5754e-05 gnorm: 1.09 [ 9:30:35<15:05:13] +[titan] 2025-10-05 08:04:55,581 - root - INFO - step: 15470 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:04:55,581 - root - INFO - lr: 3.5745e-05 gnorm: 1.07 [ 9:30:46<15:05:02] +[titan] 2025-10-05 08:05:06,501 - root - INFO - step: 15475 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0023 +[titan] 2025-10-05 08:05:06,501 - root - INFO - lr: 3.5737e-05 gnorm: 1.12 [ 9:30:56<15:04:50] +[titan] 2025-10-05 08:05:17,432 - root - INFO - step: 15480 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 08:05:17,432 - root - INFO - lr: 3.5729e-05 gnorm: 1.10 [ 9:31:07<15:04:39] +[titan] 2025-10-05 08:05:28,358 - root - INFO - step: 15485 loss: 2.2121 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:05:28,358 - root - INFO - lr: 3.5720e-05 gnorm: 1.05 [ 9:31:18<15:04:28] +[titan] 2025-10-05 08:05:39,256 - root - INFO - step: 15490 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 08:05:39,257 - root - INFO - lr: 3.5712e-05 gnorm: 1.11 [ 9:31:29<15:04:16] +[titan] 2025-10-05 08:05:50,140 - root - INFO - step: 15495 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 08:05:50,140 - root - INFO - lr: 3.5704e-05 gnorm: 1.04 [ 9:31:40<15:04:05] +[titan] 2025-10-05 08:05:58,823 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:06:01,006 - root - INFO - step: 15500 loss: 2.1526 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 08:06:01,006 - root - INFO - lr: 3.5695e-05 gnorm: 1.06 [ 9:31:51<15:03:54] +[titan] 2025-10-05 08:06:11,916 - root - INFO - step: 15505 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 08:06:11,916 - root - INFO - lr: 3.5687e-05 gnorm: 1.09 [ 9:32:02<15:03:42] +[titan] 2025-10-05 08:06:22,849 - root - INFO - step: 15510 loss: 2.2409 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 08:06:22,849 - root - INFO - lr: 3.5679e-05 gnorm: 1.06 [ 9:32:13<15:03:31] +[titan] 2025-10-05 08:06:33,698 - root - INFO - step: 15515 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 08:06:33,698 - root - INFO - lr: 3.5670e-05 gnorm: 1.08 [ 9:32:24<15:03:20] +[titan] 2025-10-05 08:06:44,607 - root - INFO - step: 15520 loss: 2.3868 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 08:06:44,607 - root - INFO - lr: 3.5662e-05 gnorm: 2.99 [ 9:32:35<15:03:08] +[titan] 2025-10-05 08:06:55,464 - root - INFO - step: 15525 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9210 +[titan] 2025-10-05 08:06:55,464 - root - INFO - lr: 3.5654e-05 gnorm: 1.10 [ 9:32:45<15:02:57] +[titan] 2025-10-05 08:07:06,333 - root - INFO - step: 15530 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:07:06,333 - root - INFO - lr: 3.5646e-05 gnorm: 1.03 [ 9:32:56<15:02:46] +[titan] 2025-10-05 08:07:17,232 - root - INFO - step: 15535 loss: 2.2054 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 08:07:17,232 - root - INFO - lr: 3.5637e-05 gnorm: 1.08 [ 9:33:07<15:02:34] +[titan] 2025-10-05 08:07:28,161 - root - INFO - step: 15540 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:07:28,161 - root - INFO - lr: 3.5629e-05 gnorm: 1.10 [ 9:33:18<15:02:23] +[titan] 2025-10-05 08:07:39,016 - root - INFO - step: 15545 loss: 2.2280 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 08:07:39,016 - root - INFO - lr: 3.5621e-05 gnorm: 1.07 [ 9:33:29<15:02:12] +[titan] 2025-10-05 08:07:47,694 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:07:49,876 - root - INFO - step: 15550 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 08:07:49,876 - root - INFO - lr: 3.5612e-05 gnorm: 1.07 [ 9:33:40<15:02:00] +[titan] 2025-10-05 08:08:00,741 - root - INFO - step: 15555 loss: 2.2855 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 08:08:00,741 - root - INFO - lr: 3.5604e-05 gnorm: 1.08 [ 9:33:51<15:01:49] +[titan] 2025-10-05 08:08:11,595 - root - INFO - step: 15560 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 08:08:11,595 - root - INFO - lr: 3.5596e-05 gnorm: 1.08 [ 9:34:02<15:01:37] +[titan] 2025-10-05 08:08:22,482 - root - INFO - step: 15565 loss: 2.2410 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:08:22,482 - root - INFO - lr: 3.5587e-05 gnorm: 1.08 [ 9:34:12<15:01:26] +[titan] 2025-10-05 08:08:33,383 - root - INFO - step: 15570 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 08:08:33,383 - root - INFO - lr: 3.5579e-05 gnorm: 1.11 [ 9:34:23<15:01:15] +[titan] 2025-10-05 08:08:44,242 - root - INFO - step: 15575 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 08:08:44,242 - root - INFO - lr: 3.5571e-05 gnorm: 1.06 [ 9:34:34<15:01:03] +[titan] 2025-10-05 08:08:55,120 - root - INFO - step: 15580 loss: 2.2133 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9541 +[titan] 2025-10-05 08:08:55,120 - root - INFO - lr: 3.5562e-05 gnorm: 1.05 [ 9:34:45<15:00:52] +[titan] 2025-10-05 08:09:06,000 - root - INFO - step: 15585 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9699 +[titan] 2025-10-05 08:09:06,000 - root - INFO - lr: 3.5554e-05 gnorm: 1.10 [ 9:34:56<15:00:41] +[titan] 2025-10-05 08:09:16,881 - root - INFO - step: 15590 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 08:09:16,881 - root - INFO - lr: 3.5546e-05 gnorm: 1.07 [ 9:35:07<15:00:29] +[titan] 2025-10-05 08:09:27,784 - root - INFO - step: 15595 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 08:09:27,784 - root - INFO - lr: 3.5537e-05 gnorm: 1.12 [ 9:35:18<15:00:18] +[titan] 2025-10-05 08:09:36,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:09:38,690 - root - INFO - step: 15600 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 08:09:38,690 - root - INFO - lr: 3.5529e-05 gnorm: 1.09 [ 9:35:29<15:00:07] +[titan] 2025-10-05 08:09:49,593 - root - INFO - step: 15605 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 08:09:49,594 - root - INFO - lr: 3.5521e-05 gnorm: 1.08 [ 9:35:40<14:59:55] +[titan] 2025-10-05 08:10:00,479 - root - INFO - step: 15610 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 08:10:00,480 - root - INFO - lr: 3.5512e-05 gnorm: 1.08 [ 9:35:50<14:59:44] +[titan] 2025-10-05 08:10:11,374 - root - INFO - step: 15615 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9198 +[titan] 2025-10-05 08:10:11,374 - root - INFO - lr: 3.5504e-05 gnorm: 1.05 [ 9:36:01<14:59:33] +[titan] 2025-10-05 08:10:22,287 - root - INFO - step: 15620 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9771 +[titan] 2025-10-05 08:10:22,287 - root - INFO - lr: 3.5496e-05 gnorm: 1.08 [ 9:36:12<14:59:21] +[titan] 2025-10-05 08:10:33,166 - root - INFO - step: 15625 loss: 2.2767 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 08:10:33,166 - root - INFO - lr: 3.5487e-05 gnorm: 1.12 [ 9:36:23<14:59:10] +[titan] 2025-10-05 08:10:44,056 - root - INFO - step: 15630 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 08:10:44,056 - root - INFO - lr: 3.5479e-05 gnorm: 1.07 [ 9:36:34<14:58:59] +[titan] 2025-10-05 08:10:54,998 - root - INFO - step: 15635 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 08:10:54,998 - root - INFO - lr: 3.5471e-05 gnorm: 1.07 [ 9:36:45<14:58:47] +[titan] 2025-10-05 08:11:05,867 - root - INFO - step: 15640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:11:05,867 - root - INFO - lr: 3.5462e-05 gnorm: 1.08 [ 9:36:56<14:58:36] +[titan] 2025-10-05 08:11:16,727 - root - INFO - step: 15645 loss: 2.1832 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:11:16,727 - root - INFO - lr: 3.5454e-05 gnorm: 1.08 [ 9:37:07<14:58:25] +[titan] 2025-10-05 08:11:25,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:11:27,605 - root - INFO - step: 15650 loss: 2.1882 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:11:27,605 - root - INFO - lr: 3.5445e-05 gnorm: 1.03 [ 9:37:18<14:58:13] +[titan] 2025-10-05 08:11:38,458 - root - INFO - step: 15655 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:11:38,458 - root - INFO - lr: 3.5437e-05 gnorm: 1.05 [ 9:37:28<14:58:02] +[titan] 2025-10-05 08:11:49,330 - root - INFO - step: 15660 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 08:11:49,330 - root - INFO - lr: 3.5429e-05 gnorm: 1.11 [ 9:37:39<14:57:50] +[titan] 2025-10-05 08:12:00,235 - root - INFO - step: 15665 loss: 2.1792 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9268 +[titan] 2025-10-05 08:12:00,235 - root - INFO - lr: 3.5420e-05 gnorm: 1.07 [ 9:37:50<14:57:39] +[titan] 2025-10-05 08:12:11,114 - root - INFO - step: 15670 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9779 +[titan] 2025-10-05 08:12:11,114 - root - INFO - lr: 3.5412e-05 gnorm: 1.03 [ 9:38:01<14:57:28] +[titan] 2025-10-05 08:12:21,997 - root - INFO - step: 15675 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 08:12:21,997 - root - INFO - lr: 3.5404e-05 gnorm: 1.07 [ 9:38:12<14:57:16] +[titan] 2025-10-05 08:12:32,868 - root - INFO - step: 15680 loss: 2.2075 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:12:32,868 - root - INFO - lr: 3.5395e-05 gnorm: 1.07 [ 9:38:23<14:57:05] +[titan] 2025-10-05 08:12:43,747 - root - INFO - step: 15685 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 08:12:43,747 - root - INFO - lr: 3.5387e-05 gnorm: 1.10 [ 9:38:34<14:56:54] +[titan] 2025-10-05 08:12:54,610 - root - INFO - step: 15690 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0090 +[titan] 2025-10-05 08:12:54,610 - root - INFO - lr: 3.5379e-05 gnorm: 1.08 [ 9:38:45<14:56:42] +[titan] 2025-10-05 08:13:05,466 - root - INFO - step: 15695 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 08:13:05,466 - root - INFO - lr: 3.5370e-05 gnorm: 1.06 [ 9:38:55<14:56:31] +[titan] 2025-10-05 08:13:14,193 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:13:16,388 - root - INFO - step: 15700 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0069 +[titan] 2025-10-05 08:13:16,388 - root - INFO - lr: 3.5362e-05 gnorm: 1.10 [ 9:39:06<14:56:20] +[titan] 2025-10-05 08:13:27,274 - root - INFO - step: 15705 loss: 2.2396 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9780 +[titan] 2025-10-05 08:13:27,275 - root - INFO - lr: 3.5354e-05 gnorm: 1.10 [ 9:39:17<14:56:08] +[titan] 2025-10-05 08:13:38,135 - root - INFO - step: 15710 loss: 2.2474 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 08:13:38,136 - root - INFO - lr: 3.5345e-05 gnorm: 1.12 [ 9:39:28<14:55:57] +[titan] 2025-10-05 08:13:49,010 - root - INFO - step: 15715 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9203 +[titan] 2025-10-05 08:13:49,010 - root - INFO - lr: 3.5337e-05 gnorm: 1.10 [ 9:39:39<14:55:45] +[titan] 2025-10-05 08:13:59,874 - root - INFO - step: 15720 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9736 +[titan] 2025-10-05 08:13:59,875 - root - INFO - lr: 3.5328e-05 gnorm: 1.11 [ 9:39:50<14:55:34] +[titan] 2025-10-05 08:14:10,743 - root - INFO - step: 15725 loss: 2.2138 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9568 +[titan] 2025-10-05 08:14:10,743 - root - INFO - lr: 3.5320e-05 gnorm: 1.10 [ 9:40:01<14:55:23] +[titan] 2025-10-05 08:14:21,639 - root - INFO - step: 15730 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 08:14:21,640 - root - INFO - lr: 3.5312e-05 gnorm: 1.09 [ 9:40:12<14:55:11] +[titan] 2025-10-05 08:14:32,539 - root - INFO - step: 15735 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 08:14:32,539 - root - INFO - lr: 3.5303e-05 gnorm: 1.05 [ 9:40:22<14:55:00] +[titan] 2025-10-05 08:14:43,413 - root - INFO - step: 15740 loss: 2.2798 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:14:43,414 - root - INFO - lr: 3.5295e-05 gnorm: 1.11 [ 9:40:33<14:54:49] +[titan] 2025-10-05 08:14:54,293 - root - INFO - step: 15745 loss: 2.2448 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9833 +[titan] 2025-10-05 08:14:54,293 - root - INFO - lr: 3.5287e-05 gnorm: 1.15 [ 9:40:44<14:54:37] +[titan] 2025-10-05 08:15:02,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:15:05,161 - root - INFO - step: 15750 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9407 +[titan] 2025-10-05 08:15:05,161 - root - INFO - lr: 3.5278e-05 gnorm: 1.07 [ 9:40:55<14:54:26] +[titan] 2025-10-05 08:15:16,026 - root - INFO - step: 15755 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9732 +[titan] 2025-10-05 08:15:16,026 - root - INFO - lr: 3.5270e-05 gnorm: 1.08 [ 9:41:06<14:54:15] +[titan] 2025-10-05 08:15:26,950 - root - INFO - step: 15760 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9790 +[titan] 2025-10-05 08:15:26,951 - root - INFO - lr: 3.5261e-05 gnorm: 1.13 [ 9:41:17<14:54:03] +[titan] 2025-10-05 08:15:37,835 - root - INFO - step: 15765 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0588 +[titan] 2025-10-05 08:15:37,835 - root - INFO - lr: 3.5253e-05 gnorm: 1.07 [ 9:41:28<14:53:52] +[titan] 2025-10-05 08:15:48,693 - root - INFO - step: 15770 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:15:48,694 - root - INFO - lr: 3.5245e-05 gnorm: 1.07 [ 9:41:39<14:53:41] +[titan] 2025-10-05 08:15:59,558 - root - INFO - step: 15775 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 08:15:59,558 - root - INFO - lr: 3.5236e-05 gnorm: 1.09 [ 9:41:49<14:53:29] +[titan] 2025-10-05 08:16:10,424 - root - INFO - step: 15780 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:16:10,425 - root - INFO - lr: 3.5228e-05 gnorm: 1.07 [ 9:42:00<14:53:18] +[titan] 2025-10-05 08:16:21,284 - root - INFO - step: 15785 loss: 2.2235 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9639 +[titan] 2025-10-05 08:16:21,284 - root - INFO - lr: 3.5220e-05 gnorm: 1.08 [ 9:42:11<14:53:07] +[titan] 2025-10-05 08:16:32,182 - root - INFO - step: 15790 loss: 2.2629 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 08:16:32,183 - root - INFO - lr: 3.5211e-05 gnorm: 1.05 [ 9:42:22<14:52:55] +[titan] 2025-10-05 08:16:43,101 - root - INFO - step: 15795 loss: 2.1715 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9180 +[titan] 2025-10-05 08:16:43,101 - root - INFO - lr: 3.5203e-05 gnorm: 1.08 [ 9:42:33<14:52:44] +[titan] 2025-10-05 08:16:51,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:16:53,985 - root - INFO - step: 15800 loss: 2.2694 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 08:16:53,985 - root - INFO - lr: 3.5194e-05 gnorm: 1.09 [ 9:42:44<14:52:33] +[titan] 2025-10-05 08:17:04,888 - root - INFO - step: 15805 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 08:17:04,888 - root - INFO - lr: 3.5186e-05 gnorm: 1.07 [ 9:42:55<14:52:21] +[titan] 2025-10-05 08:17:15,782 - root - INFO - step: 15810 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9612 +[titan] 2025-10-05 08:17:15,782 - root - INFO - lr: 3.5178e-05 gnorm: 1.11 [ 9:43:06<14:52:10] +[titan] 2025-10-05 08:17:26,682 - root - INFO - step: 15815 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 08:17:26,682 - root - INFO - lr: 3.5169e-05 gnorm: 1.09 [ 9:43:17<14:51:59] +[titan] 2025-10-05 08:17:37,542 - root - INFO - step: 15820 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9730 +[titan] 2025-10-05 08:17:37,542 - root - INFO - lr: 3.5161e-05 gnorm: 1.11 [ 9:43:27<14:51:47] +[titan] 2025-10-05 08:17:48,471 - root - INFO - step: 15825 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 08:17:48,471 - root - INFO - lr: 3.5152e-05 gnorm: 1.07 [ 9:43:38<14:51:36] +[titan] 2025-10-05 08:17:59,372 - root - INFO - step: 15830 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0266 +[titan] 2025-10-05 08:17:59,373 - root - INFO - lr: 3.5144e-05 gnorm: 1.07 [ 9:43:49<14:51:25] +[titan] 2025-10-05 08:18:10,256 - root - INFO - step: 15835 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9594 +[titan] 2025-10-05 08:18:10,256 - root - INFO - lr: 3.5136e-05 gnorm: 1.11 [ 9:44:00<14:51:13] +[titan] 2025-10-05 08:18:21,144 - root - INFO - step: 15840 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 08:18:21,144 - root - INFO - lr: 3.5127e-05 gnorm: 1.07 [ 9:44:11<14:51:02] +[titan] 2025-10-05 08:18:32,043 - root - INFO - step: 15845 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 08:18:32,043 - root - INFO - lr: 3.5119e-05 gnorm: 1.07 [ 9:44:22<14:50:51] +[titan] 2025-10-05 08:18:40,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:18:42,921 - root - INFO - step: 15850 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9352 +[titan] 2025-10-05 08:18:42,921 - root - INFO - lr: 3.5111e-05 gnorm: 1.08 [ 9:44:33<14:50:39] +[titan] 2025-10-05 08:18:53,795 - root - INFO - step: 15855 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 08:18:53,795 - root - INFO - lr: 3.5102e-05 gnorm: 1.15 [ 9:44:44<14:50:28] +[titan] 2025-10-05 08:19:04,726 - root - INFO - step: 15860 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0324 +[titan] 2025-10-05 08:19:04,726 - root - INFO - lr: 3.5094e-05 gnorm: 1.13 [ 9:44:55<14:50:17] +[titan] 2025-10-05 08:19:15,610 - root - INFO - step: 15865 loss: 2.2234 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 08:19:15,610 - root - INFO - lr: 3.5085e-05 gnorm: 1.07 [ 9:45:06<14:50:05] +[titan] 2025-10-05 08:19:26,577 - root - INFO - step: 15870 loss: 2.2122 memory: 118.84GiB(85.28%) tps: 29,880 tflops: 414.54 mfu: 41.91% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:19:26,577 - root - INFO - lr: 3.5077e-05 gnorm: 1.09 [ 9:45:16<14:49:54] +[titan] 2025-10-05 08:19:31,111 - root - INFO - Dumping profiler traces at step 15872 +[titan] 2025-10-05 08:19:31,148 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:19:37,696 - root - INFO - step: 15875 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 08:19:37,697 - root - INFO - lr: 3.5068e-05 gnorm: 1.05 [ 9:45:28<14:49:43] +[titan] 2025-10-05 08:19:48,571 - root - INFO - step: 15880 loss: 2.2001 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9437 +[titan] 2025-10-05 08:19:48,571 - root - INFO - lr: 3.5060e-05 gnorm: 1.08 [ 9:45:38<14:49:32] +[titan] 2025-10-05 08:19:59,444 - root - INFO - step: 15885 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9707 +[titan] 2025-10-05 08:19:59,444 - root - INFO - lr: 3.5052e-05 gnorm: 1.06 [ 9:45:49<14:49:20] +[titan] 2025-10-05 08:20:10,353 - root - INFO - step: 15890 loss: 2.2269 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 08:20:10,354 - root - INFO - lr: 3.5043e-05 gnorm: 1.09 [ 9:46:00<14:49:09] +[titan] 2025-10-05 08:20:21,229 - root - INFO - step: 15895 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9759 +[titan] 2025-10-05 08:20:21,230 - root - INFO - lr: 3.5035e-05 gnorm: 1.07 [ 9:46:11<14:48:58] +[titan] 2025-10-05 08:20:29,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:20:32,140 - root - INFO - step: 15900 loss: 2.1957 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 08:20:32,140 - root - INFO - lr: 3.5026e-05 gnorm: 1.09 [ 9:46:22<14:48:46] +[titan] 2025-10-05 08:20:43,027 - root - INFO - step: 15905 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:20:43,027 - root - INFO - lr: 3.5018e-05 gnorm: 1.06 [ 9:46:33<14:48:35] +[titan] 2025-10-05 08:20:53,932 - root - INFO - step: 15910 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 08:20:53,932 - root - INFO - lr: 3.5010e-05 gnorm: 1.11 [ 9:46:44<14:48:24] +[titan] 2025-10-05 08:21:04,803 - root - INFO - step: 15915 loss: 2.1550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9045 +[titan] 2025-10-05 08:21:04,803 - root - INFO - lr: 3.5001e-05 gnorm: 1.05 [ 9:46:55<14:48:12] +[titan] 2025-10-05 08:21:15,707 - root - INFO - step: 15920 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 08:21:15,707 - root - INFO - lr: 3.4993e-05 gnorm: 1.08 [ 9:47:06<14:48:01] +[titan] 2025-10-05 08:21:26,598 - root - INFO - step: 15925 loss: 2.2282 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 08:21:26,599 - root - INFO - lr: 3.4984e-05 gnorm: 1.10 [ 9:47:16<14:47:50] +[titan] 2025-10-05 08:21:37,540 - root - INFO - step: 15930 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0083 +[titan] 2025-10-05 08:21:37,540 - root - INFO - lr: 3.4976e-05 gnorm: 1.11 [ 9:47:27<14:47:39] +[titan] 2025-10-05 08:21:48,426 - root - INFO - step: 15935 loss: 2.2034 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9455 +[titan] 2025-10-05 08:21:48,426 - root - INFO - lr: 3.4968e-05 gnorm: 1.07 [ 9:47:38<14:47:27] +[titan] 2025-10-05 08:21:59,298 - root - INFO - step: 15940 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9502 +[titan] 2025-10-05 08:21:59,298 - root - INFO - lr: 3.4959e-05 gnorm: 1.07 [ 9:47:49<14:47:16] +[titan] 2025-10-05 08:22:10,199 - root - INFO - step: 15945 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9384 +[titan] 2025-10-05 08:22:10,199 - root - INFO - lr: 3.4951e-05 gnorm: 1.09 [ 9:48:00<14:47:05] +[titan] 2025-10-05 08:22:18,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:22:21,106 - root - INFO - step: 15950 loss: 2.2603 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9966 +[titan] 2025-10-05 08:22:21,106 - root - INFO - lr: 3.4942e-05 gnorm: 1.06 [ 9:48:11<14:46:53] +[titan] 2025-10-05 08:22:32,066 - root - INFO - step: 15955 loss: 2.1766 memory: 118.84GiB(85.28%) tps: 29,899 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 08:22:32,066 - root - INFO - lr: 3.4934e-05 gnorm: 1.07 [ 9:48:22<14:46:42] +[titan] 2025-10-05 08:22:42,935 - root - INFO - step: 15960 loss: 2.2164 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:22:42,935 - root - INFO - lr: 3.4925e-05 gnorm: 1.06 [ 9:48:33<14:46:31] +[titan] 2025-10-05 08:22:53,820 - root - INFO - step: 15965 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 08:22:53,820 - root - INFO - lr: 3.4917e-05 gnorm: 1.07 [ 9:48:44<14:46:19] +[titan] 2025-10-05 08:23:04,735 - root - INFO - step: 15970 loss: 2.2899 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0224 +[titan] 2025-10-05 08:23:04,735 - root - INFO - lr: 3.4909e-05 gnorm: 1.17 [ 9:48:55<14:46:08] +[titan] 2025-10-05 08:23:15,637 - root - INFO - step: 15975 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9298 +[titan] 2025-10-05 08:23:15,637 - root - INFO - lr: 3.4900e-05 gnorm: 1.05 [ 9:49:06<14:45:57] +[titan] 2025-10-05 08:23:26,529 - root - INFO - step: 15980 loss: 2.2468 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 08:23:26,529 - root - INFO - lr: 3.4892e-05 gnorm: 1.08 [ 9:49:16<14:45:46] +[titan] 2025-10-05 08:23:37,517 - root - INFO - step: 15985 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.76 mfu: 41.84% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:23:37,517 - root - INFO - lr: 3.4883e-05 gnorm: 1.09 [ 9:49:27<14:45:34] +[titan] 2025-10-05 08:23:48,403 - root - INFO - step: 15990 loss: 2.2605 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 08:23:48,403 - root - INFO - lr: 3.4875e-05 gnorm: 1.12 [ 9:49:38<14:45:23] +[titan] 2025-10-05 08:23:59,284 - root - INFO - step: 15995 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:23:59,284 - root - INFO - lr: 3.4866e-05 gnorm: 1.08 [ 9:49:49<14:45:12] +[titan] 2025-10-05 08:24:07,984 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:24:10,169 - root - INFO - step: 16000 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9270 +[titan] 2025-10-05 08:24:10,169 - root - INFO - lr: 3.4858e-05 gnorm: 1.05 [ 9:50:00<14:45:00] +[titan] 2025-10-05 08:24:21,053 - root - INFO - step: 16005 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 08:24:21,053 - root - INFO - lr: 3.4850e-05 gnorm: 1.09 [ 9:50:11<14:44:49] +[titan] 2025-10-05 08:24:31,978 - root - INFO - step: 16010 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 08:24:31,979 - root - INFO - lr: 3.4841e-05 gnorm: 1.05 [ 9:50:22<14:44:38] +[titan] 2025-10-05 08:24:42,866 - root - INFO - step: 16015 loss: 2.2354 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:24:42,866 - root - INFO - lr: 3.4833e-05 gnorm: 1.04 [ 9:50:33<14:44:26] +[titan] 2025-10-05 08:24:53,773 - root - INFO - step: 16020 loss: 2.2147 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9542 +[titan] 2025-10-05 08:24:53,773 - root - INFO - lr: 3.4824e-05 gnorm: 1.08 [ 9:50:44<14:44:15] +[titan] 2025-10-05 08:25:04,656 - root - INFO - step: 16025 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 08:25:04,656 - root - INFO - lr: 3.4816e-05 gnorm: 1.08 [ 9:50:55<14:44:04] +[titan] 2025-10-05 08:25:15,527 - root - INFO - step: 16030 loss: 2.2616 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 08:25:15,527 - root - INFO - lr: 3.4807e-05 gnorm: 1.05 [ 9:51:05<14:43:52] +[titan] 2025-10-05 08:25:26,410 - root - INFO - step: 16035 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0174 +[titan] 2025-10-05 08:25:26,411 - root - INFO - lr: 3.4799e-05 gnorm: 1.10 [ 9:51:16<14:43:41] +[titan] 2025-10-05 08:25:37,315 - root - INFO - step: 16040 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9599 +[titan] 2025-10-05 08:25:37,315 - root - INFO - lr: 3.4790e-05 gnorm: 1.09 [ 9:51:27<14:43:30] +[titan] 2025-10-05 08:25:48,166 - root - INFO - step: 16045 loss: 2.2422 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:25:48,166 - root - INFO - lr: 3.4782e-05 gnorm: 1.07 [ 9:51:38<14:43:18] +[titan] 2025-10-05 08:25:56,884 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:25:59,065 - root - INFO - step: 16050 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0259 +[titan] 2025-10-05 08:25:59,065 - root - INFO - lr: 3.4774e-05 gnorm: 1.08 [ 9:51:49<14:43:07] +[titan] 2025-10-05 08:26:09,947 - root - INFO - step: 16055 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 08:26:09,947 - root - INFO - lr: 3.4765e-05 gnorm: 1.09 [ 9:52:00<14:42:56] +[titan] 2025-10-05 08:26:20,832 - root - INFO - step: 16060 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 08:26:20,832 - root - INFO - lr: 3.4757e-05 gnorm: 1.17 [ 9:52:11<14:42:44] +[titan] 2025-10-05 08:26:31,707 - root - INFO - step: 16065 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 08:26:31,707 - root - INFO - lr: 3.4748e-05 gnorm: 1.08 [ 9:52:22<14:42:33] +[titan] 2025-10-05 08:26:42,618 - root - INFO - step: 16070 loss: 2.2299 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 08:26:42,618 - root - INFO - lr: 3.4740e-05 gnorm: 1.09 [ 9:52:32<14:42:22] +[titan] 2025-10-05 08:26:53,494 - root - INFO - step: 16075 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9418 +[titan] 2025-10-05 08:26:53,495 - root - INFO - lr: 3.4731e-05 gnorm: 1.08 [ 9:52:43<14:42:10] +[titan] 2025-10-05 08:27:04,387 - root - INFO - step: 16080 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 08:27:04,387 - root - INFO - lr: 3.4723e-05 gnorm: 1.09 [ 9:52:54<14:41:59] +[titan] 2025-10-05 08:27:15,275 - root - INFO - step: 16085 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 08:27:15,276 - root - INFO - lr: 3.4714e-05 gnorm: 1.08 [ 9:53:05<14:41:48] +[titan] 2025-10-05 08:27:26,154 - root - INFO - step: 16090 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9521 +[titan] 2025-10-05 08:27:26,154 - root - INFO - lr: 3.4706e-05 gnorm: 1.05 [ 9:53:16<14:41:37] +[titan] 2025-10-05 08:27:37,046 - root - INFO - step: 16095 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:27:37,047 - root - INFO - lr: 3.4698e-05 gnorm: 1.07 [ 9:53:27<14:41:25] +[titan] 2025-10-05 08:27:45,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:27:47,929 - root - INFO - step: 16100 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 08:27:47,930 - root - INFO - lr: 3.4689e-05 gnorm: 1.08 [ 9:53:38<14:41:14] +[titan] 2025-10-05 08:27:58,796 - root - INFO - step: 16105 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9585 +[titan] 2025-10-05 08:27:58,796 - root - INFO - lr: 3.4681e-05 gnorm: 1.07 [ 9:53:49<14:41:03] +[titan] 2025-10-05 08:28:09,669 - root - INFO - step: 16110 loss: 2.2129 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9548 +[titan] 2025-10-05 08:28:09,669 - root - INFO - lr: 3.4672e-05 gnorm: 1.07 [ 9:54:00<14:40:51] +[titan] 2025-10-05 08:28:20,594 - root - INFO - step: 16115 loss: 2.1544 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:28:20,594 - root - INFO - lr: 3.4664e-05 gnorm: 1.05 [ 9:54:10<14:40:40] +[titan] 2025-10-05 08:28:31,485 - root - INFO - step: 16120 loss: 2.2760 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 08:28:31,485 - root - INFO - lr: 3.4655e-05 gnorm: 1.09 [ 9:54:21<14:40:29] +[titan] 2025-10-05 08:28:42,397 - root - INFO - step: 16125 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 08:28:42,398 - root - INFO - lr: 3.4647e-05 gnorm: 1.10 [ 9:54:32<14:40:17] +[titan] 2025-10-05 08:28:53,284 - root - INFO - step: 16130 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 08:28:53,284 - root - INFO - lr: 3.4638e-05 gnorm: 1.14 [ 9:54:43<14:40:06] +[titan] 2025-10-05 08:29:04,160 - root - INFO - step: 16135 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0235 +[titan] 2025-10-05 08:29:04,161 - root - INFO - lr: 3.4630e-05 gnorm: 1.09 [ 9:54:54<14:39:55] +[titan] 2025-10-05 08:29:15,049 - root - INFO - step: 16140 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 08:29:15,049 - root - INFO - lr: 3.4621e-05 gnorm: 1.06 [ 9:55:05<14:39:43] +[titan] 2025-10-05 08:29:25,956 - root - INFO - step: 16145 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9694 +[titan] 2025-10-05 08:29:25,956 - root - INFO - lr: 3.4613e-05 gnorm: 1.10 [ 9:55:16<14:39:32] +[titan] 2025-10-05 08:29:34,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:29:36,856 - root - INFO - step: 16150 loss: 2.1905 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 08:29:36,857 - root - INFO - lr: 3.4604e-05 gnorm: 1.12 [ 9:55:27<14:39:21] +[titan] 2025-10-05 08:29:47,747 - root - INFO - step: 16155 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 08:29:47,748 - root - INFO - lr: 3.4596e-05 gnorm: 1.06 [ 9:55:38<14:39:09] +[titan] 2025-10-05 08:29:58,621 - root - INFO - step: 16160 loss: 2.2108 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9539 +[titan] 2025-10-05 08:29:58,621 - root - INFO - lr: 3.4588e-05 gnorm: 1.06 [ 9:55:48<14:38:58] +[titan] 2025-10-05 08:30:09,500 - root - INFO - step: 16165 loss: 2.2802 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:30:09,500 - root - INFO - lr: 3.4579e-05 gnorm: 1.11 [ 9:55:59<14:38:47] +[titan] 2025-10-05 08:30:20,377 - root - INFO - step: 16170 loss: 2.2485 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9864 +[titan] 2025-10-05 08:30:20,377 - root - INFO - lr: 3.4571e-05 gnorm: 1.07 [ 9:56:10<14:38:35] +[titan] 2025-10-05 08:30:31,256 - root - INFO - step: 16175 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0133 +[titan] 2025-10-05 08:30:31,256 - root - INFO - lr: 3.4562e-05 gnorm: 1.07 [ 9:56:21<14:38:24] +[titan] 2025-10-05 08:30:42,181 - root - INFO - step: 16180 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 08:30:42,181 - root - INFO - lr: 3.4554e-05 gnorm: 1.08 [ 9:56:32<14:38:13] +[titan] 2025-10-05 08:30:53,053 - root - INFO - step: 16185 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 08:30:53,053 - root - INFO - lr: 3.4545e-05 gnorm: 1.05 [ 9:56:43<14:38:02] +[titan] 2025-10-05 08:31:03,931 - root - INFO - step: 16190 loss: 2.1765 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9234 +[titan] 2025-10-05 08:31:03,931 - root - INFO - lr: 3.4537e-05 gnorm: 1.08 [ 9:56:54<14:37:50] +[titan] 2025-10-05 08:31:14,795 - root - INFO - step: 16195 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9284 +[titan] 2025-10-05 08:31:14,796 - root - INFO - lr: 3.4528e-05 gnorm: 1.09 [ 9:57:05<14:37:39] +[titan] 2025-10-05 08:31:23,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:31:25,652 - root - INFO - step: 16200 loss: 2.3077 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0390 +[titan] 2025-10-05 08:31:25,653 - root - INFO - lr: 3.4520e-05 gnorm: 1.10 [ 9:57:16<14:37:27] +[titan] 2025-10-05 08:31:36,508 - root - INFO - step: 16205 loss: 2.2864 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0206 +[titan] 2025-10-05 08:31:36,508 - root - INFO - lr: 3.4511e-05 gnorm: 1.04 [ 9:57:26<14:37:16] +[titan] 2025-10-05 08:31:47,457 - root - INFO - step: 16210 loss: 2.2341 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 08:31:47,457 - root - INFO - lr: 3.4503e-05 gnorm: 1.09 [ 9:57:37<14:37:05] +[titan] 2025-10-05 08:31:58,346 - root - INFO - step: 16215 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 08:31:58,346 - root - INFO - lr: 3.4494e-05 gnorm: 1.08 [ 9:57:48<14:36:54] +[titan] 2025-10-05 08:32:09,203 - root - INFO - step: 16220 loss: 2.1804 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 08:32:09,204 - root - INFO - lr: 3.4486e-05 gnorm: 1.07 [ 9:57:59<14:36:42] +[titan] 2025-10-05 08:32:20,094 - root - INFO - step: 16225 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 08:32:20,094 - root - INFO - lr: 3.4477e-05 gnorm: 1.07 [ 9:58:10<14:36:31] +[titan] 2025-10-05 08:32:30,976 - root - INFO - step: 16230 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9488 +[titan] 2025-10-05 08:32:30,977 - root - INFO - lr: 3.4469e-05 gnorm: 1.05 [ 9:58:21<14:36:20] +[titan] 2025-10-05 08:32:41,910 - root - INFO - step: 16235 loss: 2.2424 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:32:41,910 - root - INFO - lr: 3.4460e-05 gnorm: 1.06 [ 9:58:32<14:36:08] +[titan] 2025-10-05 08:32:52,835 - root - INFO - step: 16240 loss: 2.1658 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9139 +[titan] 2025-10-05 08:32:52,835 - root - INFO - lr: 3.4452e-05 gnorm: 1.04 [ 9:58:43<14:35:57] +[titan] 2025-10-05 08:33:03,725 - root - INFO - step: 16245 loss: 2.2254 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:33:03,725 - root - INFO - lr: 3.4443e-05 gnorm: 1.08 [ 9:58:54<14:35:46] +[titan] 2025-10-05 08:33:12,441 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:33:14,633 - root - INFO - step: 16250 loss: 2.2316 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 08:33:14,634 - root - INFO - lr: 3.4435e-05 gnorm: 1.10 [ 9:59:04<14:35:34] +[titan] 2025-10-05 08:33:25,534 - root - INFO - step: 16255 loss: 2.3076 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0385 +[titan] 2025-10-05 08:33:25,534 - root - INFO - lr: 3.4426e-05 gnorm: 1.10 [ 9:59:15<14:35:23] +[titan] 2025-10-05 08:33:36,432 - root - INFO - step: 16260 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 08:33:36,433 - root - INFO - lr: 3.4418e-05 gnorm: 1.13 [ 9:59:26<14:35:12] +[titan] 2025-10-05 08:33:47,313 - root - INFO - step: 16265 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9849 +[titan] 2025-10-05 08:33:47,313 - root - INFO - lr: 3.4409e-05 gnorm: 1.10 [ 9:59:37<14:35:01] +[titan] 2025-10-05 08:33:58,157 - root - INFO - step: 16270 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:33:58,157 - root - INFO - lr: 3.4401e-05 gnorm: 1.09 [ 9:59:48<14:34:49] +[titan] 2025-10-05 08:34:09,059 - root - INFO - step: 16275 loss: 2.2042 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 08:34:09,059 - root - INFO - lr: 3.4392e-05 gnorm: 1.05 [ 9:59:59<14:34:38] +[titan] 2025-10-05 08:34:19,912 - root - INFO - step: 16280 loss: 2.2416 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:34:19,912 - root - INFO - lr: 3.4384e-05 gnorm: 1.07 [10:00:10<14:34:27] +[titan] 2025-10-05 08:34:30,777 - root - INFO - step: 16285 loss: 2.1576 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:34:30,777 - root - INFO - lr: 3.4375e-05 gnorm: 1.09 [10:00:21<14:34:15] +[titan] 2025-10-05 08:34:41,653 - root - INFO - step: 16290 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 08:34:41,653 - root - INFO - lr: 3.4367e-05 gnorm: 1.05 [10:00:32<14:34:04] +[titan] 2025-10-05 08:34:52,516 - root - INFO - step: 16295 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 08:34:52,516 - root - INFO - lr: 3.4358e-05 gnorm: 1.05 [10:00:42<14:33:53] +[titan] 2025-10-05 08:35:01,183 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:35:03,369 - root - INFO - step: 16300 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0097 +[titan] 2025-10-05 08:35:03,369 - root - INFO - lr: 3.4350e-05 gnorm: 1.13 [10:00:53<14:33:41] +[titan] 2025-10-05 08:35:14,258 - root - INFO - step: 16305 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:35:14,258 - root - INFO - lr: 3.4341e-05 gnorm: 1.10 [10:01:04<14:33:30] +[titan] 2025-10-05 08:35:25,117 - root - INFO - step: 16310 loss: 2.2039 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 08:35:25,117 - root - INFO - lr: 3.4333e-05 gnorm: 1.07 [10:01:15<14:33:19] +[titan] 2025-10-05 08:35:35,923 - root - INFO - step: 16315 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:35:35,923 - root - INFO - lr: 3.4324e-05 gnorm: 1.06 [10:01:26<14:33:07] +[titan] 2025-10-05 08:35:46,803 - root - INFO - step: 16320 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9004 +[titan] 2025-10-05 08:35:46,803 - root - INFO - lr: 3.4316e-05 gnorm: 1.06 [10:01:37<14:32:56] +[titan] 2025-10-05 08:35:57,651 - root - INFO - step: 16325 loss: 2.2716 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0076 +[titan] 2025-10-05 08:35:57,651 - root - INFO - lr: 3.4307e-05 gnorm: 1.08 [10:01:48<14:32:44] +[titan] 2025-10-05 08:36:08,474 - root - INFO - step: 16330 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8746 +[titan] 2025-10-05 08:36:08,474 - root - INFO - lr: 3.4299e-05 gnorm: 1.05 [10:01:58<14:32:33] +[titan] 2025-10-05 08:36:19,326 - root - INFO - step: 16335 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 08:36:19,326 - root - INFO - lr: 3.4290e-05 gnorm: 1.05 [10:02:09<14:32:22] +[titan] 2025-10-05 08:36:30,202 - root - INFO - step: 16340 loss: 2.2109 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9528 +[titan] 2025-10-05 08:36:30,202 - root - INFO - lr: 3.4282e-05 gnorm: 1.09 [10:02:20<14:32:10] +[titan] 2025-10-05 08:36:41,056 - root - INFO - step: 16345 loss: 2.2287 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9688 +[titan] 2025-10-05 08:36:41,056 - root - INFO - lr: 3.4273e-05 gnorm: 1.09 [10:02:31<14:31:59] +[titan] 2025-10-05 08:36:49,742 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:36:51,933 - root - INFO - step: 16350 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 08:36:51,933 - root - INFO - lr: 3.4265e-05 gnorm: 1.08 [10:02:42<14:31:48] +[titan] 2025-10-05 08:37:02,815 - root - INFO - step: 16355 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0175 +[titan] 2025-10-05 08:37:02,815 - root - INFO - lr: 3.4256e-05 gnorm: 1.09 [10:02:53<14:31:36] +[titan] 2025-10-05 08:37:13,670 - root - INFO - step: 16360 loss: 2.1862 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:37:13,671 - root - INFO - lr: 3.4248e-05 gnorm: 1.04 [10:03:04<14:31:25] +[titan] 2025-10-05 08:37:24,518 - root - INFO - step: 16365 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:37:24,518 - root - INFO - lr: 3.4239e-05 gnorm: 1.12 [10:03:14<14:31:14] +[titan] 2025-10-05 08:37:35,400 - root - INFO - step: 16370 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9962 +[titan] 2025-10-05 08:37:35,401 - root - INFO - lr: 3.4231e-05 gnorm: 1.08 [10:03:25<14:31:02] +[titan] 2025-10-05 08:37:46,321 - root - INFO - step: 16375 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 08:37:46,321 - root - INFO - lr: 3.4222e-05 gnorm: 1.06 [10:03:36<14:30:51] +[titan] 2025-10-05 08:37:57,173 - root - INFO - step: 16380 loss: 2.2402 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9783 +[titan] 2025-10-05 08:37:57,173 - root - INFO - lr: 3.4214e-05 gnorm: 1.11 [10:03:47<14:30:40] +[titan] 2025-10-05 08:38:06,125 - root - INFO - Dumping profiler traces at step 16384 +[titan] 2025-10-05 08:38:06,165 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:38:08,373 - root - INFO - step: 16385 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 29,257 tflops: 405.90 mfu: 41.04% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9247 +[titan] 2025-10-05 08:38:08,373 - root - INFO - lr: 3.4205e-05 gnorm: 1.11 [10:03:58<14:30:29] +[titan] 2025-10-05 08:38:19,239 - root - INFO - step: 16390 loss: 2.2560 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 08:38:19,239 - root - INFO - lr: 3.4197e-05 gnorm: 1.08 [10:04:09<14:30:17] +[titan] 2025-10-05 08:38:30,091 - root - INFO - step: 16395 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 08:38:30,091 - root - INFO - lr: 3.4188e-05 gnorm: 1.06 [10:04:20<14:30:06] +[titan] 2025-10-05 08:38:38,778 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:38:41,007 - root - INFO - step: 16400 loss: 2.1921 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 08:38:41,007 - root - INFO - lr: 3.4180e-05 gnorm: 1.12 [10:04:31<14:29:55] +[titan] 2025-10-05 08:38:51,898 - root - INFO - step: 16405 loss: 2.2523 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9902 +[titan] 2025-10-05 08:38:51,898 - root - INFO - lr: 3.4171e-05 gnorm: 1.10 [10:04:42<14:29:44] +[titan] 2025-10-05 08:39:02,751 - root - INFO - step: 16410 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 08:39:02,751 - root - INFO - lr: 3.4163e-05 gnorm: 1.10 [10:04:53<14:29:32] +[titan] 2025-10-05 08:39:13,601 - root - INFO - step: 16415 loss: 2.1622 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 08:39:13,601 - root - INFO - lr: 3.4154e-05 gnorm: 1.06 [10:05:03<14:29:21] +[titan] 2025-10-05 08:39:24,471 - root - INFO - step: 16420 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9579 +[titan] 2025-10-05 08:39:24,471 - root - INFO - lr: 3.4146e-05 gnorm: 1.06 [10:05:14<14:29:10] +[titan] 2025-10-05 08:39:35,332 - root - INFO - step: 16425 loss: 2.1912 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9364 +[titan] 2025-10-05 08:39:35,333 - root - INFO - lr: 3.4137e-05 gnorm: 1.06 [10:05:25<14:28:58] +[titan] 2025-10-05 08:39:46,223 - root - INFO - step: 16430 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 08:39:46,223 - root - INFO - lr: 3.4129e-05 gnorm: 1.07 [10:05:36<14:28:47] +[titan] 2025-10-05 08:39:57,116 - root - INFO - step: 16435 loss: 2.2229 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9601 +[titan] 2025-10-05 08:39:57,116 - root - INFO - lr: 3.4120e-05 gnorm: 1.10 [10:05:47<14:28:36] +[titan] 2025-10-05 08:40:07,956 - root - INFO - step: 16440 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9241 +[titan] 2025-10-05 08:40:07,956 - root - INFO - lr: 3.4111e-05 gnorm: 1.06 [10:05:58<14:28:24] +[titan] 2025-10-05 08:40:18,791 - root - INFO - step: 16445 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:40:18,792 - root - INFO - lr: 3.4103e-05 gnorm: 1.08 [10:06:09<14:28:13] +[titan] 2025-10-05 08:40:27,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:40:29,625 - root - INFO - step: 16450 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9165 +[titan] 2025-10-05 08:40:29,626 - root - INFO - lr: 3.4094e-05 gnorm: 1.09 [10:06:19<14:28:01] +[titan] 2025-10-05 08:40:40,476 - root - INFO - step: 16455 loss: 2.1561 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 08:40:40,476 - root - INFO - lr: 3.4086e-05 gnorm: 1.05 [10:06:30<14:27:50] +[titan] 2025-10-05 08:40:51,351 - root - INFO - step: 16460 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.9013 +[titan] 2025-10-05 08:40:51,351 - root - INFO - lr: 3.4077e-05 gnorm: 1.06 [10:06:41<14:27:39] +[titan] 2025-10-05 08:41:02,252 - root - INFO - step: 16465 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:41:02,252 - root - INFO - lr: 3.4069e-05 gnorm: 1.05 [10:06:52<14:27:28] +[titan] 2025-10-05 08:41:13,112 - root - INFO - step: 16470 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9462 +[titan] 2025-10-05 08:41:13,113 - root - INFO - lr: 3.4060e-05 gnorm: 1.10 [10:07:03<14:27:16] +[titan] 2025-10-05 08:41:23,980 - root - INFO - step: 16475 loss: 2.2132 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:41:23,981 - root - INFO - lr: 3.4052e-05 gnorm: 1.05 [10:07:14<14:27:05] +[titan] 2025-10-05 08:41:34,850 - root - INFO - step: 16480 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 08:41:34,850 - root - INFO - lr: 3.4043e-05 gnorm: 1.07 [10:07:25<14:26:54] +[titan] 2025-10-05 08:41:45,728 - root - INFO - step: 16485 loss: 2.1837 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 08:41:45,728 - root - INFO - lr: 3.4035e-05 gnorm: 1.10 [10:07:36<14:26:42] +[titan] 2025-10-05 08:41:56,603 - root - INFO - step: 16490 loss: 2.2265 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 08:41:56,603 - root - INFO - lr: 3.4026e-05 gnorm: 1.08 [10:07:46<14:26:31] +[titan] 2025-10-05 08:42:07,468 - root - INFO - step: 16495 loss: 2.2288 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 08:42:07,468 - root - INFO - lr: 3.4018e-05 gnorm: 1.10 [10:07:57<14:26:20] +[titan] 2025-10-05 08:42:16,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:42:18,373 - root - INFO - step: 16500 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9920 +[titan] 2025-10-05 08:42:18,373 - root - INFO - lr: 3.4009e-05 gnorm: 1.10 [10:08:08<14:26:08] +[titan] 2025-10-05 08:42:29,248 - root - INFO - step: 16505 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 08:42:29,248 - root - INFO - lr: 3.4000e-05 gnorm: 1.06 [10:08:19<14:25:57] +[titan] 2025-10-05 08:42:40,112 - root - INFO - step: 16510 loss: 2.1951 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9387 +[titan] 2025-10-05 08:42:40,112 - root - INFO - lr: 3.3992e-05 gnorm: 1.06 [10:08:30<14:25:46] +[titan] 2025-10-05 08:42:51,000 - root - INFO - step: 16515 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9731 +[titan] 2025-10-05 08:42:51,000 - root - INFO - lr: 3.3983e-05 gnorm: 1.06 [10:08:41<14:25:34] +[titan] 2025-10-05 08:43:01,864 - root - INFO - step: 16520 loss: 2.2392 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 1.9746 +[titan] 2025-10-05 08:43:01,864 - root - INFO - lr: 3.3975e-05 gnorm: 1.07 [10:08:52<14:25:23] +[titan] 2025-10-05 08:43:12,727 - root - INFO - step: 16525 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0203 +[titan] 2025-10-05 08:43:12,727 - root - INFO - lr: 3.3966e-05 gnorm: 1.13 [10:09:03<14:25:12] +[titan] 2025-10-05 08:43:23,632 - root - INFO - step: 16530 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 08:43:23,632 - root - INFO - lr: 3.3958e-05 gnorm: 1.08 [10:09:13<14:25:00] +[titan] 2025-10-05 08:43:34,515 - root - INFO - step: 16535 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:43:34,515 - root - INFO - lr: 3.3949e-05 gnorm: 1.08 [10:09:24<14:24:49] +[titan] 2025-10-05 08:43:45,404 - root - INFO - step: 16540 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 08:43:45,404 - root - INFO - lr: 3.3941e-05 gnorm: 1.14 [10:09:35<14:24:38] +[titan] 2025-10-05 08:43:56,319 - root - INFO - step: 16545 loss: 2.1857 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 08:43:56,319 - root - INFO - lr: 3.3932e-05 gnorm: 1.07 [10:09:46<14:24:27] +[titan] 2025-10-05 08:44:05,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:44:07,197 - root - INFO - step: 16550 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 08:44:07,197 - root - INFO - lr: 3.3924e-05 gnorm: 1.05 [10:09:57<14:24:15] +[titan] 2025-10-05 08:44:18,066 - root - INFO - step: 16555 loss: 2.2226 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9632 +[titan] 2025-10-05 08:44:18,066 - root - INFO - lr: 3.3915e-05 gnorm: 1.09 [10:10:08<14:24:04] +[titan] 2025-10-05 08:44:28,972 - root - INFO - step: 16560 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 08:44:28,972 - root - INFO - lr: 3.3906e-05 gnorm: 1.05 [10:10:19<14:23:53] +[titan] 2025-10-05 08:44:39,817 - root - INFO - step: 16565 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9362 +[titan] 2025-10-05 08:44:39,817 - root - INFO - lr: 3.3898e-05 gnorm: 1.07 [10:10:30<14:23:41] +[titan] 2025-10-05 08:44:50,691 - root - INFO - step: 16570 loss: 2.1798 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9260 +[titan] 2025-10-05 08:44:50,691 - root - INFO - lr: 3.3889e-05 gnorm: 1.08 [10:10:41<14:23:30] +[titan] 2025-10-05 08:45:01,549 - root - INFO - step: 16575 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:45:01,549 - root - INFO - lr: 3.3881e-05 gnorm: 1.05 [10:10:51<14:23:19] +[titan] 2025-10-05 08:45:12,413 - root - INFO - step: 16580 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:45:12,413 - root - INFO - lr: 3.3872e-05 gnorm: 1.08 [10:11:02<14:23:07] +[titan] 2025-10-05 08:45:23,289 - root - INFO - step: 16585 loss: 2.1742 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9208 +[titan] 2025-10-05 08:45:23,289 - root - INFO - lr: 3.3864e-05 gnorm: 1.07 [10:11:13<14:22:56] +[titan] 2025-10-05 08:45:34,149 - root - INFO - step: 16590 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 08:45:34,149 - root - INFO - lr: 3.3855e-05 gnorm: 1.11 [10:11:24<14:22:45] +[titan] 2025-10-05 08:45:45,091 - root - INFO - step: 16595 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 08:45:45,091 - root - INFO - lr: 3.3847e-05 gnorm: 1.06 [10:11:35<14:22:33] +[titan] 2025-10-05 08:45:53,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:45:55,993 - root - INFO - step: 16600 loss: 2.1689 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9160 +[titan] 2025-10-05 08:45:55,993 - root - INFO - lr: 3.3838e-05 gnorm: 1.04 [10:11:46<14:22:22] +[titan] 2025-10-05 08:46:06,866 - root - INFO - step: 16605 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:46:06,866 - root - INFO - lr: 3.3829e-05 gnorm: 1.04 [10:11:57<14:22:11] +[titan] 2025-10-05 08:46:17,754 - root - INFO - step: 16610 loss: 2.2141 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 08:46:17,755 - root - INFO - lr: 3.3821e-05 gnorm: 1.09 [10:12:08<14:22:00] +[titan] 2025-10-05 08:46:28,629 - root - INFO - step: 16615 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9330 +[titan] 2025-10-05 08:46:28,629 - root - INFO - lr: 3.3812e-05 gnorm: 1.09 [10:12:18<14:21:48] +[titan] 2025-10-05 08:46:39,510 - root - INFO - step: 16620 loss: 2.1330 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 08:46:39,510 - root - INFO - lr: 3.3804e-05 gnorm: 1.07 [10:12:29<14:21:37] +[titan] 2025-10-05 08:46:50,420 - root - INFO - step: 16625 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9610 +[titan] 2025-10-05 08:46:50,420 - root - INFO - lr: 3.3795e-05 gnorm: 1.09 [10:12:40<14:21:26] +[titan] 2025-10-05 08:47:01,324 - root - INFO - step: 16630 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 08:47:01,324 - root - INFO - lr: 3.3787e-05 gnorm: 1.10 [10:12:51<14:21:14] +[titan] 2025-10-05 08:47:12,217 - root - INFO - step: 16635 loss: 2.1195 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 08:47:12,217 - root - INFO - lr: 3.3778e-05 gnorm: 1.09 [10:13:02<14:21:03] +[titan] 2025-10-05 08:47:23,110 - root - INFO - step: 16640 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:47:23,110 - root - INFO - lr: 3.3769e-05 gnorm: 1.12 [10:13:13<14:20:52] +[titan] 2025-10-05 08:47:34,010 - root - INFO - step: 16645 loss: 2.1744 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 08:47:34,010 - root - INFO - lr: 3.3761e-05 gnorm: 1.10 [10:13:24<14:20:41] +[titan] 2025-10-05 08:47:42,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:47:44,910 - root - INFO - step: 16650 loss: 2.1803 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 08:47:44,910 - root - INFO - lr: 3.3752e-05 gnorm: 1.11 [10:13:35<14:20:29] +[titan] 2025-10-05 08:47:55,812 - root - INFO - step: 16655 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 08:47:55,812 - root - INFO - lr: 3.3744e-05 gnorm: 1.10 [10:13:46<14:20:18] +[titan] 2025-10-05 08:48:06,738 - root - INFO - step: 16660 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0120 +[titan] 2025-10-05 08:48:06,738 - root - INFO - lr: 3.3735e-05 gnorm: 1.11 [10:13:57<14:20:07] +[titan] 2025-10-05 08:48:17,635 - root - INFO - step: 16665 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:48:17,635 - root - INFO - lr: 3.3727e-05 gnorm: 1.10 [10:14:07<14:19:55] +[titan] 2025-10-05 08:48:28,518 - root - INFO - step: 16670 loss: 2.2203 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:48:28,518 - root - INFO - lr: 3.3718e-05 gnorm: 1.10 [10:14:18<14:19:44] +[titan] 2025-10-05 08:48:39,418 - root - INFO - step: 16675 loss: 2.2253 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2593 global_avg_mtp_loss: 1.9660 +[titan] 2025-10-05 08:48:39,419 - root - INFO - lr: 3.3709e-05 gnorm: 1.14 [10:14:29<14:19:33] +[titan] 2025-10-05 08:48:50,307 - root - INFO - step: 16680 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 08:48:50,307 - root - INFO - lr: 3.3701e-05 gnorm: 1.09 [10:14:40<14:19:22] +[titan] 2025-10-05 08:49:01,231 - root - INFO - step: 16685 loss: 2.2071 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 08:49:01,231 - root - INFO - lr: 3.3692e-05 gnorm: 1.06 [10:14:51<14:19:10] +[titan] 2025-10-05 08:49:12,142 - root - INFO - step: 16690 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9875 +[titan] 2025-10-05 08:49:12,142 - root - INFO - lr: 3.3684e-05 gnorm: 1.05 [10:15:02<14:18:59] +[titan] 2025-10-05 08:49:23,035 - root - INFO - step: 16695 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0009 +[titan] 2025-10-05 08:49:23,036 - root - INFO - lr: 3.3675e-05 gnorm: 1.04 [10:15:13<14:18:48] +[titan] 2025-10-05 08:49:31,751 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:49:33,935 - root - INFO - step: 16700 loss: 2.1213 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8753 +[titan] 2025-10-05 08:49:33,935 - root - INFO - lr: 3.3667e-05 gnorm: 1.05 [10:15:24<14:18:37] +[titan] 2025-10-05 08:49:44,821 - root - INFO - step: 16705 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 08:49:44,821 - root - INFO - lr: 3.3658e-05 gnorm: 1.04 [10:15:35<14:18:25] +[titan] 2025-10-05 08:49:55,770 - root - INFO - step: 16710 loss: 2.1830 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9279 +[titan] 2025-10-05 08:49:55,770 - root - INFO - lr: 3.3649e-05 gnorm: 1.06 [10:15:46<14:18:14] +[titan] 2025-10-05 08:50:06,646 - root - INFO - step: 16715 loss: 2.1474 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 08:50:06,646 - root - INFO - lr: 3.3641e-05 gnorm: 1.05 [10:15:56<14:18:03] +[titan] 2025-10-05 08:50:17,562 - root - INFO - step: 16720 loss: 2.2478 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9859 +[titan] 2025-10-05 08:50:17,562 - root - INFO - lr: 3.3632e-05 gnorm: 1.08 [10:16:07<14:17:52] +[titan] 2025-10-05 08:50:28,447 - root - INFO - step: 16725 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 08:50:28,447 - root - INFO - lr: 3.3624e-05 gnorm: 1.03 [10:16:18<14:17:40] +[titan] 2025-10-05 08:50:39,327 - root - INFO - step: 16730 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 08:50:39,327 - root - INFO - lr: 3.3615e-05 gnorm: 1.07 [10:16:29<14:17:29] +[titan] 2025-10-05 08:50:50,218 - root - INFO - step: 16735 loss: 2.1919 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:50:50,218 - root - INFO - lr: 3.3606e-05 gnorm: 1.08 [10:16:40<14:17:18] +[titan] 2025-10-05 08:51:01,116 - root - INFO - step: 16740 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9342 +[titan] 2025-10-05 08:51:01,116 - root - INFO - lr: 3.3598e-05 gnorm: 1.01 [10:16:51<14:17:06] +[titan] 2025-10-05 08:51:11,988 - root - INFO - step: 16745 loss: 2.1719 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 08:51:11,988 - root - INFO - lr: 3.3589e-05 gnorm: 1.09 [10:17:02<14:16:55] +[titan] 2025-10-05 08:51:20,684 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:51:22,867 - root - INFO - step: 16750 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:51:22,867 - root - INFO - lr: 3.3581e-05 gnorm: 1.07 [10:17:13<14:16:44] +[titan] 2025-10-05 08:51:33,766 - root - INFO - step: 16755 loss: 2.1698 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:51:33,766 - root - INFO - lr: 3.3572e-05 gnorm: 1.08 [10:17:24<14:16:32] +[titan] 2025-10-05 08:51:44,647 - root - INFO - step: 16760 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 08:51:44,647 - root - INFO - lr: 3.3563e-05 gnorm: 1.07 [10:17:34<14:16:21] +[titan] 2025-10-05 08:51:55,539 - root - INFO - step: 16765 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 08:51:55,539 - root - INFO - lr: 3.3555e-05 gnorm: 1.08 [10:17:45<14:16:10] +[titan] 2025-10-05 08:52:06,452 - root - INFO - step: 16770 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9905 +[titan] 2025-10-05 08:52:06,452 - root - INFO - lr: 3.3546e-05 gnorm: 1.10 [10:17:56<14:15:59] +[titan] 2025-10-05 08:52:17,344 - root - INFO - step: 16775 loss: 2.2357 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 08:52:17,344 - root - INFO - lr: 3.3538e-05 gnorm: 1.12 [10:18:07<14:15:47] +[titan] 2025-10-05 08:52:28,243 - root - INFO - step: 16780 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 08:52:28,243 - root - INFO - lr: 3.3529e-05 gnorm: 1.05 [10:18:18<14:15:36] +[titan] 2025-10-05 08:52:39,158 - root - INFO - step: 16785 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 08:52:39,158 - root - INFO - lr: 3.3520e-05 gnorm: 1.08 [10:18:29<14:15:25] +[titan] 2025-10-05 08:52:50,027 - root - INFO - step: 16790 loss: 2.3254 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 08:52:50,027 - root - INFO - lr: 3.3512e-05 gnorm: 1.08 [10:18:40<14:15:14] +[titan] 2025-10-05 08:53:00,972 - root - INFO - step: 16795 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8815 +[titan] 2025-10-05 08:53:00,972 - root - INFO - lr: 3.3503e-05 gnorm: 1.05 [10:18:51<14:15:02] +[titan] 2025-10-05 08:53:09,655 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:53:11,847 - root - INFO - step: 16800 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9137 +[titan] 2025-10-05 08:53:11,847 - root - INFO - lr: 3.3495e-05 gnorm: 1.04 [10:19:02<14:14:51] +[titan] 2025-10-05 08:53:22,744 - root - INFO - step: 16805 loss: 2.2778 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0130 +[titan] 2025-10-05 08:53:22,744 - root - INFO - lr: 3.3486e-05 gnorm: 1.06 [10:19:13<14:14:40] +[titan] 2025-10-05 08:53:33,623 - root - INFO - step: 16810 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 08:53:33,623 - root - INFO - lr: 3.3477e-05 gnorm: 1.10 [10:19:23<14:14:28] +[titan] 2025-10-05 08:53:44,493 - root - INFO - step: 16815 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 08:53:44,493 - root - INFO - lr: 3.3469e-05 gnorm: 1.08 [10:19:34<14:14:17] +[titan] 2025-10-05 08:53:55,405 - root - INFO - step: 16820 loss: 2.3161 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2704 global_avg_mtp_loss: 2.0457 +[titan] 2025-10-05 08:53:55,405 - root - INFO - lr: 3.3460e-05 gnorm: 1.05 [10:19:45<14:14:06] +[titan] 2025-10-05 08:54:06,325 - root - INFO - step: 16825 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:54:06,325 - root - INFO - lr: 3.3452e-05 gnorm: 1.06 [10:19:56<14:13:55] +[titan] 2025-10-05 08:54:17,199 - root - INFO - step: 16830 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 08:54:17,199 - root - INFO - lr: 3.3443e-05 gnorm: 1.14 [10:20:07<14:13:43] +[titan] 2025-10-05 08:54:28,086 - root - INFO - step: 16835 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 08:54:28,087 - root - INFO - lr: 3.3434e-05 gnorm: 1.11 [10:20:18<14:13:32] +[titan] 2025-10-05 08:54:38,979 - root - INFO - step: 16840 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:54:38,979 - root - INFO - lr: 3.3426e-05 gnorm: 1.10 [10:20:29<14:13:21] +[titan] 2025-10-05 08:54:49,879 - root - INFO - step: 16845 loss: 2.2348 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9741 +[titan] 2025-10-05 08:54:49,879 - root - INFO - lr: 3.3417e-05 gnorm: 1.14 [10:20:40<14:13:10] +[titan] 2025-10-05 08:54:58,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:55:00,873 - root - INFO - step: 16850 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 08:55:00,873 - root - INFO - lr: 3.3409e-05 gnorm: 1.06 [10:20:51<14:12:58] +[titan] 2025-10-05 08:55:11,763 - root - INFO - step: 16855 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:55:11,763 - root - INFO - lr: 3.3400e-05 gnorm: 1.10 [10:21:02<14:12:47] +[titan] 2025-10-05 08:55:22,662 - root - INFO - step: 16860 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:55:22,662 - root - INFO - lr: 3.3391e-05 gnorm: 1.05 [10:21:12<14:12:36] +[titan] 2025-10-05 08:55:33,543 - root - INFO - step: 16865 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8902 +[titan] 2025-10-05 08:55:33,543 - root - INFO - lr: 3.3383e-05 gnorm: 1.08 [10:21:23<14:12:25] +[titan] 2025-10-05 08:55:44,433 - root - INFO - step: 16870 loss: 2.2119 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:55:44,433 - root - INFO - lr: 3.3374e-05 gnorm: 1.08 [10:21:34<14:12:13] +[titan] 2025-10-05 08:55:55,318 - root - INFO - step: 16875 loss: 2.2256 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:55:55,318 - root - INFO - lr: 3.3366e-05 gnorm: 1.09 [10:21:45<14:12:02] +[titan] 2025-10-05 08:56:06,283 - root - INFO - step: 16880 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:56:06,283 - root - INFO - lr: 3.3357e-05 gnorm: 1.08 [10:21:56<14:11:51] +[titan] 2025-10-05 08:56:17,168 - root - INFO - step: 16885 loss: 2.2361 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9760 +[titan] 2025-10-05 08:56:17,168 - root - INFO - lr: 3.3348e-05 gnorm: 1.07 [10:22:07<14:11:40] +[titan] 2025-10-05 08:56:28,070 - root - INFO - step: 16890 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:56:28,070 - root - INFO - lr: 3.3340e-05 gnorm: 1.03 [10:22:18<14:11:28] +[titan] 2025-10-05 08:56:39,053 - root - INFO - step: 16895 loss: 2.2559 memory: 118.84GiB(85.28%) tps: 29,836 tflops: 413.93 mfu: 41.85% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 08:56:39,054 - root - INFO - lr: 3.3331e-05 gnorm: 1.10 [10:22:29<14:11:17] +[titan] 2025-10-05 08:56:41,419 - root - INFO - Dumping profiler traces at step 16896 +[titan] 2025-10-05 08:56:41,458 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:56:47,993 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:56:50,179 - root - INFO - step: 16900 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 29,452 tflops: 408.61 mfu: 41.32% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9188 +[titan] 2025-10-05 08:56:50,180 - root - INFO - lr: 3.3322e-05 gnorm: 1.02 [10:22:40<14:11:06] +[titan] 2025-10-05 08:57:01,083 - root - INFO - step: 16905 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9226 +[titan] 2025-10-05 08:57:01,084 - root - INFO - lr: 3.3314e-05 gnorm: 1.15 [10:22:51<14:10:55] +[titan] 2025-10-05 08:57:11,941 - root - INFO - step: 16910 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9271 +[titan] 2025-10-05 08:57:11,942 - root - INFO - lr: 3.3305e-05 gnorm: 1.04 [10:23:02<14:10:44] +[titan] 2025-10-05 08:57:22,821 - root - INFO - step: 16915 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 08:57:22,822 - root - INFO - lr: 3.3297e-05 gnorm: 1.10 [10:23:13<14:10:32] +[titan] 2025-10-05 08:57:33,708 - root - INFO - step: 16920 loss: 2.1768 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9231 +[titan] 2025-10-05 08:57:33,708 - root - INFO - lr: 3.3288e-05 gnorm: 1.07 [10:23:24<14:10:21] +[titan] 2025-10-05 08:57:44,586 - root - INFO - step: 16925 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 08:57:44,586 - root - INFO - lr: 3.3279e-05 gnorm: 1.10 [10:23:34<14:10:10] +[titan] 2025-10-05 08:57:55,466 - root - INFO - step: 16930 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 08:57:55,466 - root - INFO - lr: 3.3271e-05 gnorm: 1.08 [10:23:45<14:09:58] +[titan] 2025-10-05 08:58:06,365 - root - INFO - step: 16935 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9721 +[titan] 2025-10-05 08:58:06,365 - root - INFO - lr: 3.3262e-05 gnorm: 1.09 [10:23:56<14:09:47] +[titan] 2025-10-05 08:58:17,240 - root - INFO - step: 16940 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 08:58:17,240 - root - INFO - lr: 3.3253e-05 gnorm: 1.07 [10:24:07<14:09:36] +[titan] 2025-10-05 08:58:28,143 - root - INFO - step: 16945 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9826 +[titan] 2025-10-05 08:58:28,143 - root - INFO - lr: 3.3245e-05 gnorm: 1.07 [10:24:18<14:09:25] +[titan] 2025-10-05 08:58:36,825 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:58:39,030 - root - INFO - step: 16950 loss: 2.2032 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:58:39,030 - root - INFO - lr: 3.3236e-05 gnorm: 1.07 [10:24:29<14:09:13] +[titan] 2025-10-05 08:58:49,927 - root - INFO - step: 16955 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 08:58:49,928 - root - INFO - lr: 3.3228e-05 gnorm: 1.13 [10:24:40<14:09:02] +[titan] 2025-10-05 08:59:00,813 - root - INFO - step: 16960 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 08:59:00,813 - root - INFO - lr: 3.3219e-05 gnorm: 1.07 [10:24:51<14:08:51] +[titan] 2025-10-05 08:59:11,725 - root - INFO - step: 16965 loss: 2.1770 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9229 +[titan] 2025-10-05 08:59:11,725 - root - INFO - lr: 3.3210e-05 gnorm: 1.09 [10:25:02<14:08:40] +[titan] 2025-10-05 08:59:22,600 - root - INFO - step: 16970 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 08:59:22,601 - root - INFO - lr: 3.3202e-05 gnorm: 1.13 [10:25:12<14:08:28] +[titan] 2025-10-05 08:59:33,459 - root - INFO - step: 16975 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9184 +[titan] 2025-10-05 08:59:33,460 - root - INFO - lr: 3.3193e-05 gnorm: 1.10 [10:25:23<14:08:17] +[titan] 2025-10-05 08:59:44,382 - root - INFO - step: 16980 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9735 +[titan] 2025-10-05 08:59:44,382 - root - INFO - lr: 3.3184e-05 gnorm: 1.04 [10:25:34<14:08:06] +[titan] 2025-10-05 08:59:55,274 - root - INFO - step: 16985 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8945 +[titan] 2025-10-05 08:59:55,274 - root - INFO - lr: 3.3176e-05 gnorm: 1.06 [10:25:45<14:07:54] +[titan] 2025-10-05 09:00:06,182 - root - INFO - step: 16990 loss: 2.2652 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0002 +[titan] 2025-10-05 09:00:06,183 - root - INFO - lr: 3.3167e-05 gnorm: 1.09 [10:25:56<14:07:43] +[titan] 2025-10-05 09:00:17,071 - root - INFO - step: 16995 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:00:17,071 - root - INFO - lr: 3.3158e-05 gnorm: 1.08 [10:26:07<14:07:32] +[titan] 2025-10-05 09:00:25,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:00:27,948 - root - INFO - step: 17000 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:00:27,948 - root - INFO - lr: 3.3150e-05 gnorm: 1.11 [10:26:18<14:07:21] +[titan] 2025-10-05 09:00:38,826 - root - INFO - step: 17005 loss: 2.2227 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 09:00:38,826 - root - INFO - lr: 3.3141e-05 gnorm: 1.07 [10:26:29<14:07:09] +[titan] 2025-10-05 09:00:49,742 - root - INFO - step: 17010 loss: 2.2205 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:00:49,742 - root - INFO - lr: 3.3133e-05 gnorm: 1.05 [10:26:40<14:06:58] +[titan] 2025-10-05 09:01:00,622 - root - INFO - step: 17015 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9094 +[titan] 2025-10-05 09:01:00,622 - root - INFO - lr: 3.3124e-05 gnorm: 1.08 [10:26:50<14:06:47] +[titan] 2025-10-05 09:01:11,523 - root - INFO - step: 17020 loss: 2.1800 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9266 +[titan] 2025-10-05 09:01:11,523 - root - INFO - lr: 3.3115e-05 gnorm: 1.07 [10:27:01<14:06:36] +[titan] 2025-10-05 09:01:22,424 - root - INFO - step: 17025 loss: 2.2024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9461 +[titan] 2025-10-05 09:01:22,425 - root - INFO - lr: 3.3107e-05 gnorm: 1.04 [10:27:12<14:06:24] +[titan] 2025-10-05 09:01:33,324 - root - INFO - step: 17030 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 09:01:33,324 - root - INFO - lr: 3.3098e-05 gnorm: 1.07 [10:27:23<14:06:13] +[titan] 2025-10-05 09:01:44,236 - root - INFO - step: 17035 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.12% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9083 +[titan] 2025-10-05 09:01:44,236 - root - INFO - lr: 3.3089e-05 gnorm: 1.04 [10:27:34<14:06:02] +[titan] 2025-10-05 09:01:55,136 - root - INFO - step: 17040 loss: 2.1831 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 09:01:55,136 - root - INFO - lr: 3.3081e-05 gnorm: 1.08 [10:27:45<14:05:51] +[titan] 2025-10-05 09:02:06,035 - root - INFO - step: 17045 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:02:06,035 - root - INFO - lr: 3.3072e-05 gnorm: 1.06 [10:27:56<14:05:39] +[titan] 2025-10-05 09:02:14,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:02:16,917 - root - INFO - step: 17050 loss: 2.2428 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:02:16,917 - root - INFO - lr: 3.3063e-05 gnorm: 1.04 [10:28:07<14:05:28] +[titan] 2025-10-05 09:02:27,783 - root - INFO - step: 17055 loss: 2.2213 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9627 +[titan] 2025-10-05 09:02:27,783 - root - INFO - lr: 3.3055e-05 gnorm: 1.05 [10:28:18<14:05:17] +[titan] 2025-10-05 09:02:38,654 - root - INFO - step: 17060 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8924 +[titan] 2025-10-05 09:02:38,654 - root - INFO - lr: 3.3046e-05 gnorm: 1.07 [10:28:28<14:05:05] +[titan] 2025-10-05 09:02:49,542 - root - INFO - step: 17065 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9244 +[titan] 2025-10-05 09:02:49,542 - root - INFO - lr: 3.3037e-05 gnorm: 1.10 [10:28:39<14:04:54] +[titan] 2025-10-05 09:03:00,423 - root - INFO - step: 17070 loss: 2.2506 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 09:03:00,423 - root - INFO - lr: 3.3029e-05 gnorm: 1.08 [10:28:50<14:04:43] +[titan] 2025-10-05 09:03:11,347 - root - INFO - step: 17075 loss: 2.1585 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:03:11,347 - root - INFO - lr: 3.3020e-05 gnorm: 1.09 [10:29:01<14:04:32] +[titan] 2025-10-05 09:03:22,220 - root - INFO - step: 17080 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 09:03:22,221 - root - INFO - lr: 3.3011e-05 gnorm: 1.07 [10:29:12<14:04:20] +[titan] 2025-10-05 09:03:33,091 - root - INFO - step: 17085 loss: 2.1813 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:03:33,091 - root - INFO - lr: 3.3003e-05 gnorm: 1.12 [10:29:23<14:04:09] +[titan] 2025-10-05 09:03:43,968 - root - INFO - step: 17090 loss: 2.2621 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 1.9971 +[titan] 2025-10-05 09:03:43,968 - root - INFO - lr: 3.2994e-05 gnorm: 1.09 [10:29:34<14:03:58] +[titan] 2025-10-05 09:03:54,850 - root - INFO - step: 17095 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:03:54,850 - root - INFO - lr: 3.2986e-05 gnorm: 1.05 [10:29:45<14:03:46] +[titan] 2025-10-05 09:04:03,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:04:05,728 - root - INFO - step: 17100 loss: 2.1531 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 09:04:05,728 - root - INFO - lr: 3.2977e-05 gnorm: 1.07 [10:29:56<14:03:35] +[titan] 2025-10-05 09:04:16,647 - root - INFO - step: 17105 loss: 2.1923 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 09:04:16,647 - root - INFO - lr: 3.2968e-05 gnorm: 1.11 [10:30:06<14:03:24] +[titan] 2025-10-05 09:04:27,507 - root - INFO - step: 17110 loss: 2.1551 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9046 +[titan] 2025-10-05 09:04:27,507 - root - INFO - lr: 3.2960e-05 gnorm: 1.11 [10:30:17<14:03:13] +[titan] 2025-10-05 09:04:38,376 - root - INFO - step: 17115 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 09:04:38,376 - root - INFO - lr: 3.2951e-05 gnorm: 1.09 [10:30:28<14:03:01] +[titan] 2025-10-05 09:04:49,249 - root - INFO - step: 17120 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:04:49,249 - root - INFO - lr: 3.2942e-05 gnorm: 1.04 [10:30:39<14:02:50] +[titan] 2025-10-05 09:05:00,120 - root - INFO - step: 17125 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 09:05:00,120 - root - INFO - lr: 3.2934e-05 gnorm: 1.09 [10:30:50<14:02:39] +[titan] 2025-10-05 09:05:10,996 - root - INFO - step: 17130 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:05:10,997 - root - INFO - lr: 3.2925e-05 gnorm: 6.19 [10:31:01<14:02:28] +[titan] 2025-10-05 09:05:21,856 - root - INFO - step: 17135 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 09:05:21,856 - root - INFO - lr: 3.2916e-05 gnorm: 1.04 [10:31:12<14:02:16] +[titan] 2025-10-05 09:05:32,760 - root - INFO - step: 17140 loss: 2.2847 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0182 +[titan] 2025-10-05 09:05:32,760 - root - INFO - lr: 3.2908e-05 gnorm: 1.13 [10:31:23<14:02:05] +[titan] 2025-10-05 09:05:43,616 - root - INFO - step: 17145 loss: 2.1628 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 09:05:43,616 - root - INFO - lr: 3.2899e-05 gnorm: 1.13 [10:31:33<14:01:54] +[titan] 2025-10-05 09:05:52,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:05:54,484 - root - INFO - step: 17150 loss: 2.2557 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 09:05:54,484 - root - INFO - lr: 3.2890e-05 gnorm: 1.04 [10:31:44<14:01:42] +[titan] 2025-10-05 09:06:05,356 - root - INFO - step: 17155 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 09:06:05,356 - root - INFO - lr: 3.2882e-05 gnorm: 1.06 [10:31:55<14:01:31] +[titan] 2025-10-05 09:06:16,249 - root - INFO - step: 17160 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:06:16,249 - root - INFO - lr: 3.2873e-05 gnorm: 1.06 [10:32:06<14:01:20] +[titan] 2025-10-05 09:06:27,125 - root - INFO - step: 17165 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:06:27,125 - root - INFO - lr: 3.2864e-05 gnorm: 1.06 [10:32:17<14:01:09] +[titan] 2025-10-05 09:06:38,025 - root - INFO - step: 17170 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:06:38,025 - root - INFO - lr: 3.2856e-05 gnorm: 1.14 [10:32:28<14:00:57] +[titan] 2025-10-05 09:06:48,880 - root - INFO - step: 17175 loss: 2.1394 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 09:06:48,880 - root - INFO - lr: 3.2847e-05 gnorm: 1.07 [10:32:39<14:00:46] +[titan] 2025-10-05 09:06:59,724 - root - INFO - step: 17180 loss: 2.1898 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 09:06:59,724 - root - INFO - lr: 3.2838e-05 gnorm: 1.07 [10:32:49<14:00:35] +[titan] 2025-10-05 09:07:10,582 - root - INFO - step: 17185 loss: 2.1634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9111 +[titan] 2025-10-05 09:07:10,583 - root - INFO - lr: 3.2830e-05 gnorm: 1.03 [10:33:00<14:00:23] +[titan] 2025-10-05 09:07:21,443 - root - INFO - step: 17190 loss: 2.1666 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:07:21,443 - root - INFO - lr: 3.2821e-05 gnorm: 1.09 [10:33:11<14:00:12] +[titan] 2025-10-05 09:07:32,307 - root - INFO - step: 17195 loss: 2.2954 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 09:07:32,308 - root - INFO - lr: 3.2812e-05 gnorm: 1.05 [10:33:22<14:00:01] +[titan] 2025-10-05 09:07:40,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:07:43,204 - root - INFO - step: 17200 loss: 2.2434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:07:43,204 - root - INFO - lr: 3.2804e-05 gnorm: 1.02 [10:33:33<13:59:49] +[titan] 2025-10-05 09:07:54,076 - root - INFO - step: 17205 loss: 2.2300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:07:54,076 - root - INFO - lr: 3.2795e-05 gnorm: 1.07 [10:33:44<13:59:38] +[titan] 2025-10-05 09:08:04,949 - root - INFO - step: 17210 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:08:04,949 - root - INFO - lr: 3.2786e-05 gnorm: 1.14 [10:33:55<13:59:27] +[titan] 2025-10-05 09:08:15,833 - root - INFO - step: 17215 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 09:08:15,833 - root - INFO - lr: 3.2778e-05 gnorm: 1.07 [10:34:06<13:59:16] +[titan] 2025-10-05 09:08:26,702 - root - INFO - step: 17220 loss: 2.1866 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9324 +[titan] 2025-10-05 09:08:26,702 - root - INFO - lr: 3.2769e-05 gnorm: 1.12 [10:34:16<13:59:04] +[titan] 2025-10-05 09:08:37,566 - root - INFO - step: 17225 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 09:08:37,566 - root - INFO - lr: 3.2760e-05 gnorm: 1.09 [10:34:27<13:58:53] +[titan] 2025-10-05 09:08:48,419 - root - INFO - step: 17230 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 09:08:48,419 - root - INFO - lr: 3.2752e-05 gnorm: 1.06 [10:34:38<13:58:42] +[titan] 2025-10-05 09:08:59,310 - root - INFO - step: 17235 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:08:59,310 - root - INFO - lr: 3.2743e-05 gnorm: 1.11 [10:34:49<13:58:30] +[titan] 2025-10-05 09:09:10,177 - root - INFO - step: 17240 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9319 +[titan] 2025-10-05 09:09:10,177 - root - INFO - lr: 3.2734e-05 gnorm: 1.07 [10:35:00<13:58:19] +[titan] 2025-10-05 09:09:21,054 - root - INFO - step: 17245 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:09:21,054 - root - INFO - lr: 3.2725e-05 gnorm: 1.03 [10:35:11<13:58:08] +[titan] 2025-10-05 09:09:29,732 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:09:31,915 - root - INFO - step: 17250 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9306 +[titan] 2025-10-05 09:09:31,915 - root - INFO - lr: 3.2717e-05 gnorm: 1.06 [10:35:22<13:57:57] +[titan] 2025-10-05 09:09:42,794 - root - INFO - step: 17255 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 09:09:42,794 - root - INFO - lr: 3.2708e-05 gnorm: 1.07 [10:35:33<13:57:45] +[titan] 2025-10-05 09:09:53,683 - root - INFO - step: 17260 loss: 2.1486 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 09:09:53,683 - root - INFO - lr: 3.2699e-05 gnorm: 1.09 [10:35:43<13:57:34] +[titan] 2025-10-05 09:10:04,613 - root - INFO - step: 17265 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 09:10:04,613 - root - INFO - lr: 3.2691e-05 gnorm: 1.10 [10:35:54<13:57:23] +[titan] 2025-10-05 09:10:15,520 - root - INFO - step: 17270 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:10:15,520 - root - INFO - lr: 3.2682e-05 gnorm: 1.07 [10:36:05<13:57:12] +[titan] 2025-10-05 09:10:26,410 - root - INFO - step: 17275 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9303 +[titan] 2025-10-05 09:10:26,410 - root - INFO - lr: 3.2673e-05 gnorm: 1.08 [10:36:16<13:57:00] +[titan] 2025-10-05 09:10:37,314 - root - INFO - step: 17280 loss: 2.3099 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 09:10:37,314 - root - INFO - lr: 3.2665e-05 gnorm: 1.11 [10:36:27<13:56:49] +[titan] 2025-10-05 09:10:48,218 - root - INFO - step: 17285 loss: 2.2025 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 09:10:48,218 - root - INFO - lr: 3.2656e-05 gnorm: 1.04 [10:36:38<13:56:38] +[titan] 2025-10-05 09:10:59,106 - root - INFO - step: 17290 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 09:10:59,106 - root - INFO - lr: 3.2647e-05 gnorm: 1.08 [10:36:49<13:56:27] +[titan] 2025-10-05 09:11:09,991 - root - INFO - step: 17295 loss: 2.2277 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 09:11:09,991 - root - INFO - lr: 3.2639e-05 gnorm: 1.09 [10:37:00<13:56:15] +[titan] 2025-10-05 09:11:18,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:11:20,963 - root - INFO - step: 17300 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 09:11:20,963 - root - INFO - lr: 3.2630e-05 gnorm: 1.10 [10:37:11<13:56:04] +[titan] 2025-10-05 09:11:31,859 - root - INFO - step: 17305 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:11:31,859 - root - INFO - lr: 3.2621e-05 gnorm: 1.04 [10:37:22<13:55:53] +[titan] 2025-10-05 09:11:42,726 - root - INFO - step: 17310 loss: 2.2050 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 09:11:42,726 - root - INFO - lr: 3.2613e-05 gnorm: 1.08 [10:37:32<13:55:42] +[titan] 2025-10-05 09:11:53,604 - root - INFO - step: 17315 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:11:53,604 - root - INFO - lr: 3.2604e-05 gnorm: 1.06 [10:37:43<13:55:30] +[titan] 2025-10-05 09:12:04,491 - root - INFO - step: 17320 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:12:04,491 - root - INFO - lr: 3.2595e-05 gnorm: 1.08 [10:37:54<13:55:19] +[titan] 2025-10-05 09:12:15,414 - root - INFO - step: 17325 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:12:15,414 - root - INFO - lr: 3.2586e-05 gnorm: 1.03 [10:38:05<13:55:08] +[titan] 2025-10-05 09:12:26,330 - root - INFO - step: 17330 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9711 +[titan] 2025-10-05 09:12:26,330 - root - INFO - lr: 3.2578e-05 gnorm: 1.08 [10:38:16<13:54:57] +[titan] 2025-10-05 09:12:37,205 - root - INFO - step: 17335 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9425 +[titan] 2025-10-05 09:12:37,206 - root - INFO - lr: 3.2569e-05 gnorm: 1.08 [10:38:27<13:54:45] +[titan] 2025-10-05 09:12:48,107 - root - INFO - step: 17340 loss: 2.2311 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 09:12:48,107 - root - INFO - lr: 3.2560e-05 gnorm: 1.07 [10:38:38<13:54:34] +[titan] 2025-10-05 09:12:58,971 - root - INFO - step: 17345 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:12:58,971 - root - INFO - lr: 3.2552e-05 gnorm: 1.02 [10:38:49<13:54:23] +[titan] 2025-10-05 09:13:07,640 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:13:09,828 - root - INFO - step: 17350 loss: 2.1864 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9313 +[titan] 2025-10-05 09:13:09,828 - root - INFO - lr: 3.2543e-05 gnorm: 1.12 [10:39:00<13:54:12] +[titan] 2025-10-05 09:13:20,766 - root - INFO - step: 17355 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:13:20,766 - root - INFO - lr: 3.2534e-05 gnorm: 1.05 [10:39:11<13:54:00] +[titan] 2025-10-05 09:13:31,647 - root - INFO - step: 17360 loss: 2.1890 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9339 +[titan] 2025-10-05 09:13:31,647 - root - INFO - lr: 3.2526e-05 gnorm: 1.06 [10:39:21<13:53:49] +[titan] 2025-10-05 09:13:42,494 - root - INFO - step: 17365 loss: 2.2669 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 09:13:42,494 - root - INFO - lr: 3.2517e-05 gnorm: 1.11 [10:39:32<13:53:38] +[titan] 2025-10-05 09:13:53,353 - root - INFO - step: 17370 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0243 +[titan] 2025-10-05 09:13:53,353 - root - INFO - lr: 3.2508e-05 gnorm: 1.16 [10:39:43<13:53:26] +[titan] 2025-10-05 09:14:04,232 - root - INFO - step: 17375 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9603 +[titan] 2025-10-05 09:14:04,232 - root - INFO - lr: 3.2500e-05 gnorm: 1.06 [10:39:54<13:53:15] +[titan] 2025-10-05 09:14:15,120 - root - INFO - step: 17380 loss: 2.2381 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 09:14:15,120 - root - INFO - lr: 3.2491e-05 gnorm: 1.09 [10:40:05<13:53:04] +[titan] 2025-10-05 09:14:26,052 - root - INFO - step: 17385 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 09:14:26,052 - root - INFO - lr: 3.2482e-05 gnorm: 1.07 [10:40:16<13:52:53] +[titan] 2025-10-05 09:14:36,924 - root - INFO - step: 17390 loss: 2.1808 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:14:36,924 - root - INFO - lr: 3.2473e-05 gnorm: 1.07 [10:40:27<13:52:41] +[titan] 2025-10-05 09:14:47,853 - root - INFO - step: 17395 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9403 +[titan] 2025-10-05 09:14:47,853 - root - INFO - lr: 3.2465e-05 gnorm: 1.04 [10:40:38<13:52:30] +[titan] 2025-10-05 09:14:56,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:14:58,759 - root - INFO - step: 17400 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 09:14:58,759 - root - INFO - lr: 3.2456e-05 gnorm: 1.05 [10:40:49<13:52:19] +[titan] 2025-10-05 09:15:09,750 - root - INFO - step: 17405 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 29,817 tflops: 413.66 mfu: 41.83% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 09:15:09,750 - root - INFO - lr: 3.2447e-05 gnorm: 1.05 [10:41:00<13:52:08] +[titan] 2025-10-05 09:15:16,473 - root - INFO - Dumping profiler traces at step 17408 +[titan] 2025-10-05 09:15:16,514 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:15:20,898 - root - INFO - step: 17410 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 29,393 tflops: 407.78 mfu: 41.23% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:15:20,898 - root - INFO - lr: 3.2439e-05 gnorm: 1.10 [10:41:11<13:51:57] +[titan] 2025-10-05 09:15:31,784 - root - INFO - step: 17415 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 09:15:31,784 - root - INFO - lr: 3.2430e-05 gnorm: 1.11 [10:41:22<13:51:46] +[titan] 2025-10-05 09:15:42,678 - root - INFO - step: 17420 loss: 2.1926 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9383 +[titan] 2025-10-05 09:15:42,678 - root - INFO - lr: 3.2421e-05 gnorm: 1.05 [10:41:32<13:51:34] +[titan] 2025-10-05 09:15:53,585 - root - INFO - step: 17425 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 09:15:53,585 - root - INFO - lr: 3.2412e-05 gnorm: 1.05 [10:41:43<13:51:23] +[titan] 2025-10-05 09:16:04,476 - root - INFO - step: 17430 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9031 +[titan] 2025-10-05 09:16:04,476 - root - INFO - lr: 3.2404e-05 gnorm: 1.06 [10:41:54<13:51:12] +[titan] 2025-10-05 09:16:15,351 - root - INFO - step: 17435 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 09:16:15,351 - root - INFO - lr: 3.2395e-05 gnorm: 1.09 [10:42:05<13:51:01] +[titan] 2025-10-05 09:16:26,256 - root - INFO - step: 17440 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:16:26,257 - root - INFO - lr: 3.2386e-05 gnorm: 1.08 [10:42:16<13:50:49] +[titan] 2025-10-05 09:16:37,135 - root - INFO - step: 17445 loss: 2.1787 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9254 +[titan] 2025-10-05 09:16:37,135 - root - INFO - lr: 3.2378e-05 gnorm: 1.06 [10:42:27<13:50:38] +[titan] 2025-10-05 09:16:45,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:16:48,014 - root - INFO - step: 17450 loss: 2.1992 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9428 +[titan] 2025-10-05 09:16:48,014 - root - INFO - lr: 3.2369e-05 gnorm: 1.03 [10:42:38<13:50:27] +[titan] 2025-10-05 09:16:58,900 - root - INFO - step: 17455 loss: 2.2831 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:16:58,900 - root - INFO - lr: 3.2360e-05 gnorm: 1.09 [10:42:49<13:50:16] +[titan] 2025-10-05 09:17:09,817 - root - INFO - step: 17460 loss: 2.2252 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:17:09,817 - root - INFO - lr: 3.2351e-05 gnorm: 1.08 [10:43:00<13:50:04] +[titan] 2025-10-05 09:17:20,746 - root - INFO - step: 17465 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:17:20,746 - root - INFO - lr: 3.2343e-05 gnorm: 1.05 [10:43:10<13:49:53] +[titan] 2025-10-05 09:17:31,624 - root - INFO - step: 17470 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 09:17:31,624 - root - INFO - lr: 3.2334e-05 gnorm: 1.07 [10:43:21<13:49:42] +[titan] 2025-10-05 09:17:42,511 - root - INFO - step: 17475 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:17:42,511 - root - INFO - lr: 3.2325e-05 gnorm: 1.07 [10:43:32<13:49:31] +[titan] 2025-10-05 09:17:53,406 - root - INFO - step: 17480 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 09:17:53,407 - root - INFO - lr: 3.2317e-05 gnorm: 1.09 [10:43:43<13:49:19] +[titan] 2025-10-05 09:18:04,291 - root - INFO - step: 17485 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:18:04,292 - root - INFO - lr: 3.2308e-05 gnorm: 1.09 [10:43:54<13:49:08] +[titan] 2025-10-05 09:18:15,232 - root - INFO - step: 17490 loss: 2.1875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 09:18:15,232 - root - INFO - lr: 3.2299e-05 gnorm: 1.09 [10:44:05<13:48:57] +[titan] 2025-10-05 09:18:26,148 - root - INFO - step: 17495 loss: 2.1821 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9280 +[titan] 2025-10-05 09:18:26,148 - root - INFO - lr: 3.2290e-05 gnorm: 1.06 [10:44:16<13:48:46] +[titan] 2025-10-05 09:18:34,840 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:18:37,024 - root - INFO - step: 17500 loss: 2.2275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9680 +[titan] 2025-10-05 09:18:37,024 - root - INFO - lr: 3.2282e-05 gnorm: 1.08 [10:44:27<13:48:35] +[titan] 2025-10-05 09:18:47,898 - root - INFO - step: 17505 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9235 +[titan] 2025-10-05 09:18:47,898 - root - INFO - lr: 3.2273e-05 gnorm: 1.10 [10:44:38<13:48:23] +[titan] 2025-10-05 09:18:58,787 - root - INFO - step: 17510 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 09:18:58,787 - root - INFO - lr: 3.2264e-05 gnorm: 1.07 [10:44:49<13:48:12] +[titan] 2025-10-05 09:19:09,664 - root - INFO - step: 17515 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9713 +[titan] 2025-10-05 09:19:09,664 - root - INFO - lr: 3.2256e-05 gnorm: 1.11 [10:44:59<13:48:01] +[titan] 2025-10-05 09:19:20,602 - root - INFO - step: 17520 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 09:19:20,603 - root - INFO - lr: 3.2247e-05 gnorm: 1.06 [10:45:10<13:47:50] +[titan] 2025-10-05 09:19:31,492 - root - INFO - step: 17525 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9837 +[titan] 2025-10-05 09:19:31,492 - root - INFO - lr: 3.2238e-05 gnorm: 1.06 [10:45:21<13:47:38] +[titan] 2025-10-05 09:19:42,388 - root - INFO - step: 17530 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:19:42,388 - root - INFO - lr: 3.2229e-05 gnorm: 1.04 [10:45:32<13:47:27] +[titan] 2025-10-05 09:19:53,275 - root - INFO - step: 17535 loss: 2.1899 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9365 +[titan] 2025-10-05 09:19:53,275 - root - INFO - lr: 3.2221e-05 gnorm: 1.11 [10:45:43<13:47:16] +[titan] 2025-10-05 09:20:04,158 - root - INFO - step: 17540 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:20:04,158 - root - INFO - lr: 3.2212e-05 gnorm: 1.08 [10:45:54<13:47:05] +[titan] 2025-10-05 09:20:15,047 - root - INFO - step: 17545 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 09:20:15,047 - root - INFO - lr: 3.2203e-05 gnorm: 1.14 [10:46:05<13:46:53] +[titan] 2025-10-05 09:20:23,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:20:25,948 - root - INFO - step: 17550 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:20:25,949 - root - INFO - lr: 3.2194e-05 gnorm: 1.06 [10:46:16<13:46:42] +[titan] 2025-10-05 09:20:36,875 - root - INFO - step: 17555 loss: 2.1706 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 09:20:36,875 - root - INFO - lr: 3.2186e-05 gnorm: 1.05 [10:46:27<13:46:31] +[titan] 2025-10-05 09:20:47,778 - root - INFO - step: 17560 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:20:47,778 - root - INFO - lr: 3.2177e-05 gnorm: 1.04 [10:46:38<13:46:20] +[titan] 2025-10-05 09:20:58,670 - root - INFO - step: 17565 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 09:20:58,670 - root - INFO - lr: 3.2168e-05 gnorm: 1.11 [10:46:48<13:46:08] +[titan] 2025-10-05 09:21:09,567 - root - INFO - step: 17570 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9227 +[titan] 2025-10-05 09:21:09,567 - root - INFO - lr: 3.2160e-05 gnorm: 1.03 [10:46:59<13:45:57] +[titan] 2025-10-05 09:21:20,447 - root - INFO - step: 17575 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:21:20,447 - root - INFO - lr: 3.2151e-05 gnorm: 1.06 [10:47:10<13:45:46] +[titan] 2025-10-05 09:21:31,358 - root - INFO - step: 17580 loss: 2.1219 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 09:21:31,358 - root - INFO - lr: 3.2142e-05 gnorm: 1.07 [10:47:21<13:45:35] +[titan] 2025-10-05 09:21:42,250 - root - INFO - step: 17585 loss: 2.2406 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 09:21:42,250 - root - INFO - lr: 3.2133e-05 gnorm: 1.10 [10:47:32<13:45:23] +[titan] 2025-10-05 09:21:53,130 - root - INFO - step: 17590 loss: 2.2175 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 09:21:53,130 - root - INFO - lr: 3.2125e-05 gnorm: 1.08 [10:47:43<13:45:12] +[titan] 2025-10-05 09:22:04,011 - root - INFO - step: 17595 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9755 +[titan] 2025-10-05 09:22:04,011 - root - INFO - lr: 3.2116e-05 gnorm: 1.05 [10:47:54<13:45:01] +[titan] 2025-10-05 09:22:12,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:22:14,893 - root - INFO - step: 17600 loss: 2.2663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0020 +[titan] 2025-10-05 09:22:14,894 - root - INFO - lr: 3.2107e-05 gnorm: 1.08 [10:48:05<13:44:50] +[titan] 2025-10-05 09:22:25,790 - root - INFO - step: 17605 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 09:22:25,790 - root - INFO - lr: 3.2098e-05 gnorm: 1.11 [10:48:16<13:44:38] +[titan] 2025-10-05 09:22:36,676 - root - INFO - step: 17610 loss: 2.2048 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9482 +[titan] 2025-10-05 09:22:36,676 - root - INFO - lr: 3.2090e-05 gnorm: 1.08 [10:48:26<13:44:27] +[titan] 2025-10-05 09:22:47,556 - root - INFO - step: 17615 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 09:22:47,556 - root - INFO - lr: 3.2081e-05 gnorm: 1.10 [10:48:37<13:44:16] +[titan] 2025-10-05 09:22:58,451 - root - INFO - step: 17620 loss: 2.1471 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:22:58,451 - root - INFO - lr: 3.2072e-05 gnorm: 1.10 [10:48:48<13:44:05] +[titan] 2025-10-05 09:23:09,330 - root - INFO - step: 17625 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 09:23:09,330 - root - INFO - lr: 3.2063e-05 gnorm: 1.04 [10:48:59<13:43:53] +[titan] 2025-10-05 09:23:20,210 - root - INFO - step: 17630 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9426 +[titan] 2025-10-05 09:23:20,210 - root - INFO - lr: 3.2055e-05 gnorm: 1.05 [10:49:10<13:43:42] +[titan] 2025-10-05 09:23:31,084 - root - INFO - step: 17635 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 09:23:31,084 - root - INFO - lr: 3.2046e-05 gnorm: 1.06 [10:49:21<13:43:31] +[titan] 2025-10-05 09:23:41,968 - root - INFO - step: 17640 loss: 2.2575 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 09:23:41,969 - root - INFO - lr: 3.2037e-05 gnorm: 1.12 [10:49:32<13:43:20] +[titan] 2025-10-05 09:23:52,856 - root - INFO - step: 17645 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9799 +[titan] 2025-10-05 09:23:52,856 - root - INFO - lr: 3.2029e-05 gnorm: 1.10 [10:49:43<13:43:08] +[titan] 2025-10-05 09:24:01,573 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:24:03,759 - root - INFO - step: 17650 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:24:03,759 - root - INFO - lr: 3.2020e-05 gnorm: 1.14 [10:49:53<13:42:57] +[titan] 2025-10-05 09:24:14,635 - root - INFO - step: 17655 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 09:24:14,636 - root - INFO - lr: 3.2011e-05 gnorm: 1.12 [10:50:04<13:42:46] +[titan] 2025-10-05 09:24:25,539 - root - INFO - step: 17660 loss: 2.1876 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9327 +[titan] 2025-10-05 09:24:25,539 - root - INFO - lr: 3.2002e-05 gnorm: 1.06 [10:50:15<13:42:35] +[titan] 2025-10-05 09:24:36,410 - root - INFO - step: 17665 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9523 +[titan] 2025-10-05 09:24:36,410 - root - INFO - lr: 3.1994e-05 gnorm: 1.09 [10:50:26<13:42:23] +[titan] 2025-10-05 09:24:47,292 - root - INFO - step: 17670 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9587 +[titan] 2025-10-05 09:24:47,292 - root - INFO - lr: 3.1985e-05 gnorm: 1.03 [10:50:37<13:42:12] +[titan] 2025-10-05 09:24:58,185 - root - INFO - step: 17675 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 09:24:58,185 - root - INFO - lr: 3.1976e-05 gnorm: 1.17 [10:50:48<13:42:01] +[titan] 2025-10-05 09:25:09,105 - root - INFO - step: 17680 loss: 2.2810 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 09:25:09,105 - root - INFO - lr: 3.1967e-05 gnorm: 1.11 [10:50:59<13:41:50] +[titan] 2025-10-05 09:25:19,979 - root - INFO - step: 17685 loss: 2.1693 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:25:19,979 - root - INFO - lr: 3.1959e-05 gnorm: 1.07 [10:51:10<13:41:38] +[titan] 2025-10-05 09:25:30,867 - root - INFO - step: 17690 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9351 +[titan] 2025-10-05 09:25:30,867 - root - INFO - lr: 3.1950e-05 gnorm: 1.10 [10:51:21<13:41:27] +[titan] 2025-10-05 09:25:41,737 - root - INFO - step: 17695 loss: 2.1997 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 09:25:41,737 - root - INFO - lr: 3.1941e-05 gnorm: 1.03 [10:51:31<13:41:16] +[titan] 2025-10-05 09:25:50,446 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:25:52,639 - root - INFO - step: 17700 loss: 2.1679 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 09:25:52,639 - root - INFO - lr: 3.1932e-05 gnorm: 1.08 [10:51:42<13:41:05] +[titan] 2025-10-05 09:26:03,528 - root - INFO - step: 17705 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9507 +[titan] 2025-10-05 09:26:03,528 - root - INFO - lr: 3.1924e-05 gnorm: 1.14 [10:51:53<13:40:53] +[titan] 2025-10-05 09:26:14,424 - root - INFO - step: 17710 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:26:14,424 - root - INFO - lr: 3.1915e-05 gnorm: 1.05 [10:52:04<13:40:42] +[titan] 2025-10-05 09:26:25,353 - root - INFO - step: 17715 loss: 2.1118 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 09:26:25,353 - root - INFO - lr: 3.1906e-05 gnorm: 1.09 [10:52:15<13:40:31] +[titan] 2025-10-05 09:26:36,227 - root - INFO - step: 17720 loss: 2.1460 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8958 +[titan] 2025-10-05 09:26:36,227 - root - INFO - lr: 3.1897e-05 gnorm: 1.09 [10:52:26<13:40:20] +[titan] 2025-10-05 09:26:47,086 - root - INFO - step: 17725 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:26:47,086 - root - INFO - lr: 3.1889e-05 gnorm: 1.04 [10:52:37<13:40:08] +[titan] 2025-10-05 09:26:57,951 - root - INFO - step: 17730 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 09:26:57,951 - root - INFO - lr: 3.1880e-05 gnorm: 1.13 [10:52:48<13:39:57] +[titan] 2025-10-05 09:27:08,802 - root - INFO - step: 17735 loss: 2.2199 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 09:27:08,802 - root - INFO - lr: 3.1871e-05 gnorm: 1.04 [10:52:59<13:39:46] +[titan] 2025-10-05 09:27:19,665 - root - INFO - step: 17740 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 09:27:19,665 - root - INFO - lr: 3.1862e-05 gnorm: 1.09 [10:53:09<13:39:35] +[titan] 2025-10-05 09:27:30,612 - root - INFO - step: 17745 loss: 2.1677 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9150 +[titan] 2025-10-05 09:27:30,612 - root - INFO - lr: 3.1854e-05 gnorm: 1.09 [10:53:20<13:39:23] +[titan] 2025-10-05 09:27:39,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:27:41,462 - root - INFO - step: 17750 loss: 2.1954 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 09:27:41,462 - root - INFO - lr: 3.1845e-05 gnorm: 1.09 [10:53:31<13:39:12] +[titan] 2025-10-05 09:27:52,328 - root - INFO - step: 17755 loss: 2.1602 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9091 +[titan] 2025-10-05 09:27:52,328 - root - INFO - lr: 3.1836e-05 gnorm: 1.04 [10:53:42<13:39:01] +[titan] 2025-10-05 09:28:03,186 - root - INFO - step: 17760 loss: 2.2440 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:28:03,186 - root - INFO - lr: 3.1827e-05 gnorm: 1.08 [10:53:53<13:38:50] +[titan] 2025-10-05 09:28:14,043 - root - INFO - step: 17765 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9224 +[titan] 2025-10-05 09:28:14,043 - root - INFO - lr: 3.1818e-05 gnorm: 1.07 [10:54:04<13:38:38] +[titan] 2025-10-05 09:28:24,918 - root - INFO - step: 17770 loss: 2.1581 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9064 +[titan] 2025-10-05 09:28:24,918 - root - INFO - lr: 3.1810e-05 gnorm: 1.06 [10:54:15<13:38:27] +[titan] 2025-10-05 09:28:35,788 - root - INFO - step: 17775 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9652 +[titan] 2025-10-05 09:28:35,788 - root - INFO - lr: 3.1801e-05 gnorm: 1.15 [10:54:26<13:38:16] +[titan] 2025-10-05 09:28:46,695 - root - INFO - step: 17780 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:28:46,696 - root - INFO - lr: 3.1792e-05 gnorm: 1.10 [10:54:36<13:38:05] +[titan] 2025-10-05 09:28:57,567 - root - INFO - step: 17785 loss: 2.1809 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9261 +[titan] 2025-10-05 09:28:57,567 - root - INFO - lr: 3.1783e-05 gnorm: 1.05 [10:54:47<13:37:53] +[titan] 2025-10-05 09:29:08,437 - root - INFO - step: 17790 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:29:08,437 - root - INFO - lr: 3.1775e-05 gnorm: 1.09 [10:54:58<13:37:42] +[titan] 2025-10-05 09:29:19,347 - root - INFO - step: 17795 loss: 2.1437 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 09:29:19,348 - root - INFO - lr: 3.1766e-05 gnorm: 1.05 [10:55:09<13:37:31] +[titan] 2025-10-05 09:29:28,050 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:29:30,284 - root - INFO - step: 17800 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 09:29:30,284 - root - INFO - lr: 3.1757e-05 gnorm: 1.07 [10:55:20<13:37:20] +[titan] 2025-10-05 09:29:41,184 - root - INFO - step: 17805 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9269 +[titan] 2025-10-05 09:29:41,184 - root - INFO - lr: 3.1748e-05 gnorm: 1.07 [10:55:31<13:37:08] +[titan] 2025-10-05 09:29:52,086 - root - INFO - step: 17810 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:29:52,086 - root - INFO - lr: 3.1740e-05 gnorm: 1.06 [10:55:42<13:36:57] +[titan] 2025-10-05 09:30:03,070 - root - INFO - step: 17815 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9441 +[titan] 2025-10-05 09:30:03,070 - root - INFO - lr: 3.1731e-05 gnorm: 1.04 [10:55:53<13:36:46] +[titan] 2025-10-05 09:30:13,933 - root - INFO - step: 17820 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 09:30:13,933 - root - INFO - lr: 3.1722e-05 gnorm: 1.09 [10:56:04<13:36:35] +[titan] 2025-10-05 09:30:24,824 - root - INFO - step: 17825 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:30:24,824 - root - INFO - lr: 3.1713e-05 gnorm: 1.05 [10:56:15<13:36:24] +[titan] 2025-10-05 09:30:35,792 - root - INFO - step: 17830 loss: 2.1738 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:30:35,792 - root - INFO - lr: 3.1705e-05 gnorm: 1.09 [10:56:26<13:36:12] +[titan] 2025-10-05 09:30:46,656 - root - INFO - step: 17835 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 09:30:46,656 - root - INFO - lr: 3.1696e-05 gnorm: 1.04 [10:56:36<13:36:01] +[titan] 2025-10-05 09:30:57,542 - root - INFO - step: 17840 loss: 2.1750 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 09:30:57,542 - root - INFO - lr: 3.1687e-05 gnorm: 1.05 [10:56:47<13:35:50] +[titan] 2025-10-05 09:31:08,403 - root - INFO - step: 17845 loss: 2.2534 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 09:31:08,403 - root - INFO - lr: 3.1678e-05 gnorm: 1.10 [10:56:58<13:35:39] +[titan] 2025-10-05 09:31:17,089 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:31:19,279 - root - INFO - step: 17850 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9894 +[titan] 2025-10-05 09:31:19,279 - root - INFO - lr: 3.1670e-05 gnorm: 1.07 [10:57:09<13:35:27] +[titan] 2025-10-05 09:31:30,156 - root - INFO - step: 17855 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9467 +[titan] 2025-10-05 09:31:30,156 - root - INFO - lr: 3.1661e-05 gnorm: 1.03 [10:57:20<13:35:16] +[titan] 2025-10-05 09:31:41,087 - root - INFO - step: 17860 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9406 +[titan] 2025-10-05 09:31:41,088 - root - INFO - lr: 3.1652e-05 gnorm: 1.07 [10:57:31<13:35:05] +[titan] 2025-10-05 09:31:51,960 - root - INFO - step: 17865 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:31:51,961 - root - INFO - lr: 3.1643e-05 gnorm: 1.08 [10:57:42<13:34:54] +[titan] 2025-10-05 09:32:02,826 - root - INFO - step: 17870 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 09:32:02,826 - root - INFO - lr: 3.1634e-05 gnorm: 1.08 [10:57:53<13:34:42] +[titan] 2025-10-05 09:32:13,722 - root - INFO - step: 17875 loss: 2.2074 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:32:13,722 - root - INFO - lr: 3.1626e-05 gnorm: 1.04 [10:58:03<13:34:31] +[titan] 2025-10-05 09:32:24,584 - root - INFO - step: 17880 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 09:32:24,584 - root - INFO - lr: 3.1617e-05 gnorm: 1.06 [10:58:14<13:34:20] +[titan] 2025-10-05 09:32:35,482 - root - INFO - step: 17885 loss: 2.2057 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 09:32:35,482 - root - INFO - lr: 3.1608e-05 gnorm: 1.05 [10:58:25<13:34:09] +[titan] 2025-10-05 09:32:46,344 - root - INFO - step: 17890 loss: 2.2259 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9659 +[titan] 2025-10-05 09:32:46,344 - root - INFO - lr: 3.1599e-05 gnorm: 1.05 [10:58:36<13:33:57] +[titan] 2025-10-05 09:32:57,245 - root - INFO - step: 17895 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8967 +[titan] 2025-10-05 09:32:57,245 - root - INFO - lr: 3.1591e-05 gnorm: 1.07 [10:58:47<13:33:46] +[titan] 2025-10-05 09:33:05,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:33:08,118 - root - INFO - step: 17900 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 09:33:08,118 - root - INFO - lr: 3.1582e-05 gnorm: 1.01 [10:58:58<13:33:35] +[titan] 2025-10-05 09:33:19,021 - root - INFO - step: 17905 loss: 2.1704 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 09:33:19,022 - root - INFO - lr: 3.1573e-05 gnorm: 1.15 [10:59:09<13:33:24] +[titan] 2025-10-05 09:33:29,882 - root - INFO - step: 17910 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 09:33:29,882 - root - INFO - lr: 3.1564e-05 gnorm: 1.05 [10:59:20<13:33:12] +[titan] 2025-10-05 09:33:40,805 - root - INFO - step: 17915 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 09:33:40,805 - root - INFO - lr: 3.1555e-05 gnorm: 1.08 [10:59:31<13:33:01] +[titan] 2025-10-05 09:33:51,745 - root - INFO - step: 17920 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 09:33:51,746 - root - INFO - lr: 3.1547e-05 gnorm: 1.12 [10:59:41<13:32:50] +[titan] 2025-10-05 09:33:51,927 - root - INFO - Dumping profiler traces at step 17920 +[titan] 2025-10-05 09:33:51,967 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:34:02,867 - root - INFO - step: 17925 loss: 2.1932 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.76 mfu: 41.33% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 09:34:02,867 - root - INFO - lr: 3.1538e-05 gnorm: 1.06 [10:59:53<13:32:39] +[titan] 2025-10-05 09:34:13,739 - root - INFO - step: 17930 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:34:13,739 - root - INFO - lr: 3.1529e-05 gnorm: 1.12 [11:00:03<13:32:28] +[titan] 2025-10-05 09:34:24,600 - root - INFO - step: 17935 loss: 2.2250 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:34:24,600 - root - INFO - lr: 3.1520e-05 gnorm: 1.06 [11:00:14<13:32:17] +[titan] 2025-10-05 09:34:35,525 - root - INFO - step: 17940 loss: 2.1726 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 09:34:35,525 - root - INFO - lr: 3.1512e-05 gnorm: 1.05 [11:00:25<13:32:05] +[titan] 2025-10-05 09:34:46,392 - root - INFO - step: 17945 loss: 2.0902 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 09:34:46,392 - root - INFO - lr: 3.1503e-05 gnorm: 1.04 [11:00:36<13:31:54] +[titan] 2025-10-05 09:34:55,074 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:34:57,263 - root - INFO - step: 17950 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 09:34:57,264 - root - INFO - lr: 3.1494e-05 gnorm: 1.09 [11:00:47<13:31:43] +[titan] 2025-10-05 09:35:08,110 - root - INFO - step: 17955 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 09:35:08,110 - root - INFO - lr: 3.1485e-05 gnorm: 1.05 [11:00:58<13:31:32] +[titan] 2025-10-05 09:35:18,976 - root - INFO - step: 17960 loss: 2.2219 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:35:18,976 - root - INFO - lr: 3.1476e-05 gnorm: 1.08 [11:01:09<13:31:20] +[titan] 2025-10-05 09:35:29,838 - root - INFO - step: 17965 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9589 +[titan] 2025-10-05 09:35:29,838 - root - INFO - lr: 3.1468e-05 gnorm: 1.08 [11:01:20<13:31:09] +[titan] 2025-10-05 09:35:40,783 - root - INFO - step: 17970 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.35 mfu: 42.00% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9404 +[titan] 2025-10-05 09:35:40,783 - root - INFO - lr: 3.1459e-05 gnorm: 1.07 [11:01:30<13:30:58] +[titan] 2025-10-05 09:35:51,637 - root - INFO - step: 17975 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.0326 +[titan] 2025-10-05 09:35:51,637 - root - INFO - lr: 3.1450e-05 gnorm: 1.14 [11:01:41<13:30:47] +[titan] 2025-10-05 09:36:02,514 - root - INFO - step: 17980 loss: 2.1848 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 09:36:02,514 - root - INFO - lr: 3.1441e-05 gnorm: 1.06 [11:01:52<13:30:35] +[titan] 2025-10-05 09:36:13,381 - root - INFO - step: 17985 loss: 2.1655 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9132 +[titan] 2025-10-05 09:36:13,381 - root - INFO - lr: 3.1432e-05 gnorm: 1.07 [11:02:03<13:30:24] +[titan] 2025-10-05 09:36:24,242 - root - INFO - step: 17990 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 09:36:24,242 - root - INFO - lr: 3.1424e-05 gnorm: 1.05 [11:02:14<13:30:13] +[titan] 2025-10-05 09:36:35,118 - root - INFO - step: 17995 loss: 2.2044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9479 +[titan] 2025-10-05 09:36:35,118 - root - INFO - lr: 3.1415e-05 gnorm: 1.05 [11:02:25<13:30:02] +[titan] 2025-10-05 09:36:43,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:36:46,012 - root - INFO - step: 18000 loss: 2.1302 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 09:36:46,012 - root - INFO - lr: 3.1406e-05 gnorm: 1.05 [11:02:36<13:29:50] +[titan] 2025-10-05 09:36:56,870 - root - INFO - step: 18005 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9388 +[titan] 2025-10-05 09:36:56,871 - root - INFO - lr: 3.1397e-05 gnorm: 1.04 [11:02:47<13:29:39] +[titan] 2025-10-05 09:37:07,717 - root - INFO - step: 18010 loss: 2.2185 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9597 +[titan] 2025-10-05 09:37:07,718 - root - INFO - lr: 3.1389e-05 gnorm: 1.06 [11:02:57<13:29:28] +[titan] 2025-10-05 09:37:18,576 - root - INFO - step: 18015 loss: 2.2301 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:37:18,576 - root - INFO - lr: 3.1380e-05 gnorm: 1.13 [11:03:08<13:29:17] +[titan] 2025-10-05 09:37:29,423 - root - INFO - step: 18020 loss: 2.2014 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9439 +[titan] 2025-10-05 09:37:29,423 - root - INFO - lr: 3.1371e-05 gnorm: 1.04 [11:03:19<13:29:05] +[titan] 2025-10-05 09:37:40,354 - root - INFO - step: 18025 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:37:40,354 - root - INFO - lr: 3.1362e-05 gnorm: 1.05 [11:03:30<13:28:54] +[titan] 2025-10-05 09:37:51,204 - root - INFO - step: 18030 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9219 +[titan] 2025-10-05 09:37:51,204 - root - INFO - lr: 3.1353e-05 gnorm: 1.05 [11:03:41<13:28:43] +[titan] 2025-10-05 09:38:02,089 - root - INFO - step: 18035 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:38:02,089 - root - INFO - lr: 3.1345e-05 gnorm: 1.09 [11:03:52<13:28:32] +[titan] 2025-10-05 09:38:12,956 - root - INFO - step: 18040 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:38:12,956 - root - INFO - lr: 3.1336e-05 gnorm: 1.11 [11:04:03<13:28:20] +[titan] 2025-10-05 09:38:23,803 - root - INFO - step: 18045 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 09:38:23,803 - root - INFO - lr: 3.1327e-05 gnorm: 1.04 [11:04:13<13:28:09] +[titan] 2025-10-05 09:38:32,482 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:38:34,663 - root - INFO - step: 18050 loss: 2.1705 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:38:34,663 - root - INFO - lr: 3.1318e-05 gnorm: 1.05 [11:04:24<13:27:58] +[titan] 2025-10-05 09:38:45,582 - root - INFO - step: 18055 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 09:38:45,582 - root - INFO - lr: 3.1309e-05 gnorm: 1.10 [11:04:35<13:27:47] +[titan] 2025-10-05 09:38:56,461 - root - INFO - step: 18060 loss: 2.1737 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 09:38:56,462 - root - INFO - lr: 3.1301e-05 gnorm: 1.10 [11:04:46<13:27:35] +[titan] 2025-10-05 09:39:07,387 - root - INFO - step: 18065 loss: 2.2727 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0073 +[titan] 2025-10-05 09:39:07,388 - root - INFO - lr: 3.1292e-05 gnorm: 1.11 [11:04:57<13:27:24] +[titan] 2025-10-05 09:39:18,267 - root - INFO - step: 18070 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9196 +[titan] 2025-10-05 09:39:18,267 - root - INFO - lr: 3.1283e-05 gnorm: 1.03 [11:05:08<13:27:13] +[titan] 2025-10-05 09:39:29,177 - root - INFO - step: 18075 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 09:39:29,177 - root - INFO - lr: 3.1274e-05 gnorm: 1.09 [11:05:19<13:27:02] +[titan] 2025-10-05 09:39:40,351 - root - INFO - step: 18080 loss: 2.1525 memory: 118.84GiB(85.28%) tps: 29,326 tflops: 406.85 mfu: 41.14% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 09:39:40,352 - root - INFO - lr: 3.1265e-05 gnorm: 1.07 [11:05:30<13:26:51] +[titan] 2025-10-05 09:39:51,220 - root - INFO - step: 18085 loss: 2.1539 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 09:39:51,221 - root - INFO - lr: 3.1257e-05 gnorm: 1.06 [11:05:41<13:26:40] +[titan] 2025-10-05 09:40:02,072 - root - INFO - step: 18090 loss: 2.1462 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8962 +[titan] 2025-10-05 09:40:02,072 - root - INFO - lr: 3.1248e-05 gnorm: 1.09 [11:05:52<13:26:28] +[titan] 2025-10-05 09:40:12,918 - root - INFO - step: 18095 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:40:12,918 - root - INFO - lr: 3.1239e-05 gnorm: 1.10 [11:06:03<13:26:17] +[titan] 2025-10-05 09:40:21,622 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:40:23,799 - root - INFO - step: 18100 loss: 2.2201 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:40:23,799 - root - INFO - lr: 3.1230e-05 gnorm: 1.06 [11:06:13<13:26:06] +[titan] 2025-10-05 09:40:34,657 - root - INFO - step: 18105 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9333 +[titan] 2025-10-05 09:40:34,657 - root - INFO - lr: 3.1221e-05 gnorm: 1.02 [11:06:24<13:25:55] +[titan] 2025-10-05 09:40:45,595 - root - INFO - step: 18110 loss: 2.2690 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0044 +[titan] 2025-10-05 09:40:45,595 - root - INFO - lr: 3.1213e-05 gnorm: 1.11 [11:06:35<13:25:43] +[titan] 2025-10-05 09:40:56,456 - root - INFO - step: 18115 loss: 2.1375 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 09:40:56,457 - root - INFO - lr: 3.1204e-05 gnorm: 1.07 [11:06:46<13:25:32] +[titan] 2025-10-05 09:41:07,318 - root - INFO - step: 18120 loss: 2.2233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9634 +[titan] 2025-10-05 09:41:07,318 - root - INFO - lr: 3.1195e-05 gnorm: 1.08 [11:06:57<13:25:21] +[titan] 2025-10-05 09:41:18,178 - root - INFO - step: 18125 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9584 +[titan] 2025-10-05 09:41:18,178 - root - INFO - lr: 3.1186e-05 gnorm: 1.07 [11:07:08<13:25:10] +[titan] 2025-10-05 09:41:29,063 - root - INFO - step: 18130 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9142 +[titan] 2025-10-05 09:41:29,063 - root - INFO - lr: 3.1177e-05 gnorm: 1.07 [11:07:19<13:24:58] +[titan] 2025-10-05 09:41:39,951 - root - INFO - step: 18135 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9334 +[titan] 2025-10-05 09:41:39,951 - root - INFO - lr: 3.1169e-05 gnorm: 1.06 [11:07:30<13:24:47] +[titan] 2025-10-05 09:41:50,877 - root - INFO - step: 18140 loss: 2.2241 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 09:41:50,877 - root - INFO - lr: 3.1160e-05 gnorm: 1.13 [11:07:41<13:24:36] +[titan] 2025-10-05 09:42:01,736 - root - INFO - step: 18145 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 09:42:01,736 - root - INFO - lr: 3.1151e-05 gnorm: 1.11 [11:07:51<13:24:25] +[titan] 2025-10-05 09:42:10,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:42:12,607 - root - INFO - step: 18150 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8818 +[titan] 2025-10-05 09:42:12,608 - root - INFO - lr: 3.1142e-05 gnorm: 1.06 [11:08:02<13:24:13] +[titan] 2025-10-05 09:42:23,477 - root - INFO - step: 18155 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9019 +[titan] 2025-10-05 09:42:23,477 - root - INFO - lr: 3.1133e-05 gnorm: 1.02 [11:08:13<13:24:02] +[titan] 2025-10-05 09:42:34,354 - root - INFO - step: 18160 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:42:34,354 - root - INFO - lr: 3.1125e-05 gnorm: 1.06 [11:08:24<13:23:51] +[titan] 2025-10-05 09:42:45,261 - root - INFO - step: 18165 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 09:42:45,261 - root - INFO - lr: 3.1116e-05 gnorm: 1.07 [11:08:35<13:23:40] +[titan] 2025-10-05 09:42:56,138 - root - INFO - step: 18170 loss: 2.1630 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9103 +[titan] 2025-10-05 09:42:56,138 - root - INFO - lr: 3.1107e-05 gnorm: 1.06 [11:08:46<13:23:28] +[titan] 2025-10-05 09:43:06,985 - root - INFO - step: 18175 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 09:43:06,985 - root - INFO - lr: 3.1098e-05 gnorm: 1.05 [11:08:57<13:23:17] +[titan] 2025-10-05 09:43:17,840 - root - INFO - step: 18180 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 09:43:17,840 - root - INFO - lr: 3.1089e-05 gnorm: 1.06 [11:09:08<13:23:06] +[titan] 2025-10-05 09:43:28,708 - root - INFO - step: 18185 loss: 2.2232 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9641 +[titan] 2025-10-05 09:43:28,708 - root - INFO - lr: 3.1080e-05 gnorm: 1.07 [11:09:18<13:22:55] +[titan] 2025-10-05 09:43:39,585 - root - INFO - step: 18190 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:43:39,586 - root - INFO - lr: 3.1072e-05 gnorm: 1.04 [11:09:29<13:22:43] +[titan] 2025-10-05 09:43:50,582 - root - INFO - step: 18195 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 29,799 tflops: 413.41 mfu: 41.80% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 09:43:50,582 - root - INFO - lr: 3.1063e-05 gnorm: 1.10 [11:09:40<13:22:32] +[titan] 2025-10-05 09:43:59,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:44:01,443 - root - INFO - step: 18200 loss: 2.1663 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9138 +[titan] 2025-10-05 09:44:01,443 - root - INFO - lr: 3.1054e-05 gnorm: 1.04 [11:09:51<13:22:21] +[titan] 2025-10-05 09:44:12,364 - root - INFO - step: 18205 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 09:44:12,364 - root - INFO - lr: 3.1045e-05 gnorm: 1.07 [11:10:02<13:22:10] +[titan] 2025-10-05 09:44:23,235 - root - INFO - step: 18210 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 09:44:23,235 - root - INFO - lr: 3.1036e-05 gnorm: 1.04 [11:10:13<13:21:59] +[titan] 2025-10-05 09:44:34,114 - root - INFO - step: 18215 loss: 2.1970 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 09:44:34,114 - root - INFO - lr: 3.1028e-05 gnorm: 1.08 [11:10:24<13:21:47] +[titan] 2025-10-05 09:44:45,034 - root - INFO - step: 18220 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:44:45,034 - root - INFO - lr: 3.1019e-05 gnorm: 1.02 [11:10:35<13:21:36] +[titan] 2025-10-05 09:44:55,926 - root - INFO - step: 18225 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 09:44:55,926 - root - INFO - lr: 3.1010e-05 gnorm: 1.06 [11:10:46<13:21:25] +[titan] 2025-10-05 09:45:06,773 - root - INFO - step: 18230 loss: 2.2584 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 09:45:06,773 - root - INFO - lr: 3.1001e-05 gnorm: 1.08 [11:10:56<13:21:14] +[titan] 2025-10-05 09:45:17,639 - root - INFO - step: 18235 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:45:17,639 - root - INFO - lr: 3.0992e-05 gnorm: 1.10 [11:11:07<13:21:03] +[titan] 2025-10-05 09:45:28,516 - root - INFO - step: 18240 loss: 2.1421 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 09:45:28,516 - root - INFO - lr: 3.0984e-05 gnorm: 1.05 [11:11:18<13:20:51] +[titan] 2025-10-05 09:45:39,379 - root - INFO - step: 18245 loss: 2.1122 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8654 +[titan] 2025-10-05 09:45:39,379 - root - INFO - lr: 3.0975e-05 gnorm: 1.06 [11:11:29<13:20:40] +[titan] 2025-10-05 09:45:48,140 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:45:50,324 - root - INFO - step: 18250 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.39 mfu: 42.00% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9443 +[titan] 2025-10-05 09:45:50,324 - root - INFO - lr: 3.0966e-05 gnorm: 1.03 [11:11:40<13:20:29] +[titan] 2025-10-05 09:46:01,186 - root - INFO - step: 18255 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9065 +[titan] 2025-10-05 09:46:01,186 - root - INFO - lr: 3.0957e-05 gnorm: 1.04 [11:11:51<13:20:18] +[titan] 2025-10-05 09:46:12,076 - root - INFO - step: 18260 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:46:12,077 - root - INFO - lr: 3.0948e-05 gnorm: 1.05 [11:12:02<13:20:06] +[titan] 2025-10-05 09:46:22,981 - root - INFO - step: 18265 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 09:46:22,981 - root - INFO - lr: 3.0939e-05 gnorm: 1.12 [11:12:13<13:19:55] +[titan] 2025-10-05 09:46:33,839 - root - INFO - step: 18270 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:46:33,840 - root - INFO - lr: 3.0931e-05 gnorm: 1.05 [11:12:24<13:19:44] +[titan] 2025-10-05 09:46:44,752 - root - INFO - step: 18275 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 09:46:44,752 - root - INFO - lr: 3.0922e-05 gnorm: 1.10 [11:12:34<13:19:33] +[titan] 2025-10-05 09:46:55,639 - root - INFO - step: 18280 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 09:46:55,639 - root - INFO - lr: 3.0913e-05 gnorm: 1.08 [11:12:45<13:19:21] +[titan] 2025-10-05 09:47:06,489 - root - INFO - step: 18285 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 09:47:06,489 - root - INFO - lr: 3.0904e-05 gnorm: 1.06 [11:12:56<13:19:10] +[titan] 2025-10-05 09:47:17,381 - root - INFO - step: 18290 loss: 2.2060 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:47:17,381 - root - INFO - lr: 3.0895e-05 gnorm: 1.12 [11:13:07<13:18:59] +[titan] 2025-10-05 09:47:28,261 - root - INFO - step: 18295 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9274 +[titan] 2025-10-05 09:47:28,261 - root - INFO - lr: 3.0887e-05 gnorm: 1.07 [11:13:18<13:18:48] +[titan] 2025-10-05 09:47:36,941 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:47:39,132 - root - INFO - step: 18300 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 09:47:39,132 - root - INFO - lr: 3.0878e-05 gnorm: 1.07 [11:13:29<13:18:37] +[titan] 2025-10-05 09:47:50,070 - root - INFO - step: 18305 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:47:50,070 - root - INFO - lr: 3.0869e-05 gnorm: 1.04 [11:13:40<13:18:25] +[titan] 2025-10-05 09:48:00,953 - root - INFO - step: 18310 loss: 2.2365 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 09:48:00,953 - root - INFO - lr: 3.0860e-05 gnorm: 1.10 [11:13:51<13:18:14] +[titan] 2025-10-05 09:48:11,821 - root - INFO - step: 18315 loss: 2.1228 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 09:48:11,821 - root - INFO - lr: 3.0851e-05 gnorm: 1.10 [11:14:01<13:18:03] +[titan] 2025-10-05 09:48:22,702 - root - INFO - step: 18320 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:22,703 - root - INFO - lr: 3.0842e-05 gnorm: 1.10 [11:14:12<13:17:52] +[titan] 2025-10-05 09:48:33,562 - root - INFO - step: 18325 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 09:48:33,562 - root - INFO - lr: 3.0834e-05 gnorm: 1.06 [11:14:23<13:17:40] +[titan] 2025-10-05 09:48:44,442 - root - INFO - step: 18330 loss: 2.1384 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:48:44,442 - root - INFO - lr: 3.0825e-05 gnorm: 1.07 [11:14:34<13:17:29] +[titan] 2025-10-05 09:48:55,377 - root - INFO - step: 18335 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:55,377 - root - INFO - lr: 3.0816e-05 gnorm: 1.05 [11:14:45<13:17:18] +[titan] 2025-10-05 09:49:06,255 - root - INFO - step: 18340 loss: 2.1540 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:49:06,255 - root - INFO - lr: 3.0807e-05 gnorm: 1.02 [11:14:56<13:17:07] +[titan] 2025-10-05 09:49:17,139 - root - INFO - step: 18345 loss: 2.1319 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8845 +[titan] 2025-10-05 09:49:17,139 - root - INFO - lr: 3.0798e-05 gnorm: 1.06 [11:15:07<13:16:56] +[titan] 2025-10-05 09:49:25,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:49:28,007 - root - INFO - step: 18350 loss: 2.2255 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9664 +[titan] 2025-10-05 09:49:28,007 - root - INFO - lr: 3.0789e-05 gnorm: 1.07 [11:15:18<13:16:44] +[titan] 2025-10-05 09:49:38,916 - root - INFO - step: 18355 loss: 2.1700 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9168 +[titan] 2025-10-05 09:49:38,916 - root - INFO - lr: 3.0781e-05 gnorm: 1.03 [11:15:29<13:16:33] +[titan] 2025-10-05 09:49:49,833 - root - INFO - step: 18360 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9456 +[titan] 2025-10-05 09:49:49,834 - root - INFO - lr: 3.0772e-05 gnorm: 1.12 [11:15:39<13:16:22] +[titan] 2025-10-05 09:50:00,730 - root - INFO - step: 18365 loss: 2.2105 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 09:50:00,730 - root - INFO - lr: 3.0763e-05 gnorm: 1.05 [11:15:50<13:16:11] +[titan] 2025-10-05 09:50:11,596 - root - INFO - step: 18370 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9432 +[titan] 2025-10-05 09:50:11,596 - root - INFO - lr: 3.0754e-05 gnorm: 1.09 [11:16:01<13:15:59] +[titan] 2025-10-05 09:50:22,481 - root - INFO - step: 18375 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8909 +[titan] 2025-10-05 09:50:22,481 - root - INFO - lr: 3.0745e-05 gnorm: 1.07 [11:16:12<13:15:48] +[titan] 2025-10-05 09:50:33,379 - root - INFO - step: 18380 loss: 2.1743 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 09:50:33,379 - root - INFO - lr: 3.0736e-05 gnorm: 1.05 [11:16:23<13:15:37] +[titan] 2025-10-05 09:50:44,278 - root - INFO - step: 18385 loss: 2.2455 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9831 +[titan] 2025-10-05 09:50:44,278 - root - INFO - lr: 3.0728e-05 gnorm: 1.08 [11:16:34<13:15:26] +[titan] 2025-10-05 09:50:55,194 - root - INFO - step: 18390 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 09:50:55,194 - root - INFO - lr: 3.0719e-05 gnorm: 1.04 [11:16:45<13:15:15] +[titan] 2025-10-05 09:51:06,079 - root - INFO - step: 18395 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9155 +[titan] 2025-10-05 09:51:06,080 - root - INFO - lr: 3.0710e-05 gnorm: 1.07 [11:16:56<13:15:03] +[titan] 2025-10-05 09:51:14,780 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:51:16,961 - root - INFO - step: 18400 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:51:16,961 - root - INFO - lr: 3.0701e-05 gnorm: 1.03 [11:17:07<13:14:52] +[titan] 2025-10-05 09:51:27,817 - root - INFO - step: 18405 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 09:51:27,817 - root - INFO - lr: 3.0692e-05 gnorm: 1.07 [11:17:17<13:14:41] +[titan] 2025-10-05 09:51:38,681 - root - INFO - step: 18410 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 09:51:38,682 - root - INFO - lr: 3.0683e-05 gnorm: 1.06 [11:17:28<13:14:30] +[titan] 2025-10-05 09:51:49,581 - root - INFO - step: 18415 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8667 +[titan] 2025-10-05 09:51:49,582 - root - INFO - lr: 3.0675e-05 gnorm: 1.10 [11:17:39<13:14:18] +[titan] 2025-10-05 09:52:00,490 - root - INFO - step: 18420 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:52:00,490 - root - INFO - lr: 3.0666e-05 gnorm: 1.06 [11:17:50<13:14:07] +[titan] 2025-10-05 09:52:11,358 - root - INFO - step: 18425 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9183 +[titan] 2025-10-05 09:52:11,358 - root - INFO - lr: 3.0657e-05 gnorm: 1.09 [11:18:01<13:13:56] +[titan] 2025-10-05 09:52:22,299 - root - INFO - step: 18430 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 09:52:22,299 - root - INFO - lr: 3.0648e-05 gnorm: 1.10 [11:18:12<13:13:45] +[titan] 2025-10-05 09:52:26,813 - root - INFO - Dumping profiler traces at step 18432 +[titan] 2025-10-05 09:52:26,850 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:52:33,371 - root - INFO - step: 18435 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 29,597 tflops: 410.61 mfu: 41.52% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 09:52:33,371 - root - INFO - lr: 3.0639e-05 gnorm: 1.09 [11:18:23<13:13:34] +[titan] 2025-10-05 09:52:44,226 - root - INFO - step: 18440 loss: 2.1224 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8757 +[titan] 2025-10-05 09:52:44,226 - root - INFO - lr: 3.0630e-05 gnorm: 1.05 [11:18:34<13:13:23] +[titan] 2025-10-05 09:52:55,086 - root - INFO - step: 18445 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:52:55,086 - root - INFO - lr: 3.0622e-05 gnorm: 1.16 [11:18:45<13:13:11] +[titan] 2025-10-05 09:53:03,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:53:05,965 - root - INFO - step: 18450 loss: 2.1736 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:53:05,965 - root - INFO - lr: 3.0613e-05 gnorm: 1.04 [11:18:56<13:13:00] +[titan] 2025-10-05 09:53:16,799 - root - INFO - step: 18455 loss: 2.2016 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9454 +[titan] 2025-10-05 09:53:16,799 - root - INFO - lr: 3.0604e-05 gnorm: 1.08 [11:19:06<13:12:49] +[titan] 2025-10-05 09:53:27,669 - root - INFO - step: 18460 loss: 2.1859 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9308 +[titan] 2025-10-05 09:53:27,669 - root - INFO - lr: 3.0595e-05 gnorm: 1.09 [11:19:17<13:12:38] +[titan] 2025-10-05 09:53:38,507 - root - INFO - step: 18465 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:53:38,508 - root - INFO - lr: 3.0586e-05 gnorm: 1.11 [11:19:28<13:12:26] +[titan] 2025-10-05 09:53:49,352 - root - INFO - step: 18470 loss: 2.2070 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:53:49,352 - root - INFO - lr: 3.0577e-05 gnorm: 1.15 [11:19:39<13:12:15] +[titan] 2025-10-05 09:54:00,241 - root - INFO - step: 18475 loss: 2.2443 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:54:00,242 - root - INFO - lr: 3.0569e-05 gnorm: 1.07 [11:19:50<13:12:04] +[titan] 2025-10-05 09:54:11,123 - root - INFO - step: 18480 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 09:54:11,123 - root - INFO - lr: 3.0560e-05 gnorm: 1.02 [11:20:01<13:11:53] +[titan] 2025-10-05 09:54:21,973 - root - INFO - step: 18485 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:54:21,973 - root - INFO - lr: 3.0551e-05 gnorm: 3.61 [11:20:12<13:11:41] +[titan] 2025-10-05 09:54:32,841 - root - INFO - step: 18490 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:54:32,841 - root - INFO - lr: 3.0542e-05 gnorm: 1.08 [11:20:22<13:11:30] +[titan] 2025-10-05 09:54:43,710 - root - INFO - step: 18495 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:54:43,710 - root - INFO - lr: 3.0533e-05 gnorm: 1.03 [11:20:33<13:11:19] +[titan] 2025-10-05 09:54:52,412 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:54:54,598 - root - INFO - step: 18500 loss: 2.1801 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:54:54,598 - root - INFO - lr: 3.0524e-05 gnorm: 1.07 [11:20:44<13:11:08] +[titan] 2025-10-05 09:55:05,447 - root - INFO - step: 18505 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 09:55:05,447 - root - INFO - lr: 3.0515e-05 gnorm: 1.05 [11:20:55<13:10:56] +[titan] 2025-10-05 09:55:16,304 - root - INFO - step: 18510 loss: 2.2328 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9729 +[titan] 2025-10-05 09:55:16,304 - root - INFO - lr: 3.0507e-05 gnorm: 1.12 [11:21:06<13:10:45] +[titan] 2025-10-05 09:55:27,210 - root - INFO - step: 18515 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 09:55:27,210 - root - INFO - lr: 3.0498e-05 gnorm: 1.04 [11:21:17<13:10:34] +[titan] 2025-10-05 09:55:38,070 - root - INFO - step: 18520 loss: 2.1990 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:55:38,071 - root - INFO - lr: 3.0489e-05 gnorm: 1.06 [11:21:28<13:10:23] +[titan] 2025-10-05 09:55:48,946 - root - INFO - step: 18525 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8803 +[titan] 2025-10-05 09:55:48,946 - root - INFO - lr: 3.0480e-05 gnorm: 1.10 [11:21:39<13:10:12] +[titan] 2025-10-05 09:55:59,819 - root - INFO - step: 18530 loss: 2.1569 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:55:59,819 - root - INFO - lr: 3.0471e-05 gnorm: 1.09 [11:21:49<13:10:00] +[titan] 2025-10-05 09:56:10,667 - root - INFO - step: 18535 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 09:56:10,668 - root - INFO - lr: 3.0462e-05 gnorm: 1.08 [11:22:00<13:09:49] +[titan] 2025-10-05 09:56:21,514 - root - INFO - step: 18540 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8789 +[titan] 2025-10-05 09:56:21,515 - root - INFO - lr: 3.0454e-05 gnorm: 1.06 [11:22:11<13:09:38] +[titan] 2025-10-05 09:56:32,416 - root - INFO - step: 18545 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 09:56:32,416 - root - INFO - lr: 3.0445e-05 gnorm: 1.09 [11:22:22<13:09:27] +[titan] 2025-10-05 09:56:41,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:56:43,301 - root - INFO - step: 18550 loss: 2.2123 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9545 +[titan] 2025-10-05 09:56:43,301 - root - INFO - lr: 3.0436e-05 gnorm: 1.11 [11:22:33<13:09:15] +[titan] 2025-10-05 09:56:54,209 - root - INFO - step: 18555 loss: 2.1250 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8774 +[titan] 2025-10-05 09:56:54,209 - root - INFO - lr: 3.0427e-05 gnorm: 1.05 [11:22:44<13:09:04] +[titan] 2025-10-05 09:57:05,059 - root - INFO - step: 18560 loss: 2.1067 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 09:57:05,060 - root - INFO - lr: 3.0418e-05 gnorm: 1.11 [11:22:55<13:08:53] +[titan] 2025-10-05 09:57:15,909 - root - INFO - step: 18565 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8970 +[titan] 2025-10-05 09:57:15,909 - root - INFO - lr: 3.0409e-05 gnorm: 1.06 [11:23:06<13:08:42] +[titan] 2025-10-05 09:57:26,796 - root - INFO - step: 18570 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:57:26,796 - root - INFO - lr: 3.0400e-05 gnorm: 1.05 [11:23:16<13:08:30] +[titan] 2025-10-05 09:57:37,659 - root - INFO - step: 18575 loss: 2.1669 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:57:37,659 - root - INFO - lr: 3.0392e-05 gnorm: 1.07 [11:23:27<13:08:19] +[titan] 2025-10-05 09:57:48,558 - root - INFO - step: 18580 loss: 2.1694 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:57:48,559 - root - INFO - lr: 3.0383e-05 gnorm: 1.08 [11:23:38<13:08:08] +[titan] 2025-10-05 09:57:59,464 - root - INFO - step: 18585 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 09:57:59,464 - root - INFO - lr: 3.0374e-05 gnorm: 1.08 [11:23:49<13:07:57] +[titan] 2025-10-05 09:58:10,319 - root - INFO - step: 18590 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 09:58:10,319 - root - INFO - lr: 3.0365e-05 gnorm: 1.09 [11:24:00<13:07:46] +[titan] 2025-10-05 09:58:21,169 - root - INFO - step: 18595 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 09:58:21,169 - root - INFO - lr: 3.0356e-05 gnorm: 1.06 [11:24:11<13:07:34] +[titan] 2025-10-05 09:58:29,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:58:32,051 - root - INFO - step: 18600 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:58:32,051 - root - INFO - lr: 3.0347e-05 gnorm: 1.06 [11:24:22<13:07:23] +[titan] 2025-10-05 09:58:42,941 - root - INFO - step: 18605 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 09:58:42,941 - root - INFO - lr: 3.0339e-05 gnorm: 1.07 [11:24:33<13:07:12] +[titan] 2025-10-05 09:58:53,849 - root - INFO - step: 18610 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 09:58:53,849 - root - INFO - lr: 3.0330e-05 gnorm: 1.13 [11:24:43<13:07:01] +[titan] 2025-10-05 09:59:04,767 - root - INFO - step: 18615 loss: 2.1618 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9098 +[titan] 2025-10-05 09:59:04,767 - root - INFO - lr: 3.0321e-05 gnorm: 1.07 [11:24:54<13:06:50] +[titan] 2025-10-05 09:59:15,655 - root - INFO - step: 18620 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:59:15,655 - root - INFO - lr: 3.0312e-05 gnorm: 1.08 [11:25:05<13:06:38] +[titan] 2025-10-05 09:59:26,502 - root - INFO - step: 18625 loss: 2.1982 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 09:59:26,502 - root - INFO - lr: 3.0303e-05 gnorm: 1.06 [11:25:16<13:06:27] +[titan] 2025-10-05 09:59:37,342 - root - INFO - step: 18630 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:59:37,342 - root - INFO - lr: 3.0294e-05 gnorm: 1.02 [11:25:27<13:06:16] +[titan] 2025-10-05 09:59:48,204 - root - INFO - step: 18635 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:59:48,205 - root - INFO - lr: 3.0285e-05 gnorm: 1.06 [11:25:38<13:06:05] +[titan] 2025-10-05 09:59:59,141 - root - INFO - step: 18640 loss: 2.1586 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:59:59,142 - root - INFO - lr: 3.0277e-05 gnorm: 1.09 [11:25:49<13:05:53] +[titan] 2025-10-05 10:00:09,999 - root - INFO - step: 18645 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:00:09,999 - root - INFO - lr: 3.0268e-05 gnorm: 1.04 [11:26:00<13:05:42] +[titan] 2025-10-05 10:00:18,686 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:00:20,868 - root - INFO - step: 18650 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 10:00:20,868 - root - INFO - lr: 3.0259e-05 gnorm: 1.06 [11:26:11<13:05:31] +[titan] 2025-10-05 10:00:31,751 - root - INFO - step: 18655 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8955 +[titan] 2025-10-05 10:00:31,751 - root - INFO - lr: 3.0250e-05 gnorm: 1.10 [11:26:21<13:05:20] +[titan] 2025-10-05 10:00:42,624 - root - INFO - step: 18660 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9236 +[titan] 2025-10-05 10:00:42,624 - root - INFO - lr: 3.0241e-05 gnorm: 1.06 [11:26:32<13:05:08] +[titan] 2025-10-05 10:00:53,508 - root - INFO - step: 18665 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 10:00:53,508 - root - INFO - lr: 3.0232e-05 gnorm: 1.07 [11:26:43<13:04:57] +[titan] 2025-10-05 10:01:04,417 - root - INFO - step: 18670 loss: 2.1073 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8611 +[titan] 2025-10-05 10:01:04,417 - root - INFO - lr: 3.0223e-05 gnorm: 1.05 [11:26:54<13:04:46] +[titan] 2025-10-05 10:01:15,322 - root - INFO - step: 18675 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9018 +[titan] 2025-10-05 10:01:15,322 - root - INFO - lr: 3.0215e-05 gnorm: 1.01 [11:27:05<13:04:35] +[titan] 2025-10-05 10:01:26,187 - root - INFO - step: 18680 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 10:01:26,188 - root - INFO - lr: 3.0206e-05 gnorm: 1.06 [11:27:16<13:04:24] +[titan] 2025-10-05 10:01:37,075 - root - INFO - step: 18685 loss: 2.2297 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9695 +[titan] 2025-10-05 10:01:37,076 - root - INFO - lr: 3.0197e-05 gnorm: 1.09 [11:27:27<13:04:12] +[titan] 2025-10-05 10:01:47,947 - root - INFO - step: 18690 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 10:01:47,947 - root - INFO - lr: 3.0188e-05 gnorm: 1.13 [11:27:38<13:04:01] +[titan] 2025-10-05 10:01:58,853 - root - INFO - step: 18695 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 10:01:58,853 - root - INFO - lr: 3.0179e-05 gnorm: 1.11 [11:27:48<13:03:50] +[titan] 2025-10-05 10:02:07,534 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:02:09,720 - root - INFO - step: 18700 loss: 2.1760 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9218 +[titan] 2025-10-05 10:02:09,720 - root - INFO - lr: 3.0170e-05 gnorm: 1.05 [11:27:59<13:03:39] +[titan] 2025-10-05 10:02:20,646 - root - INFO - step: 18705 loss: 2.1878 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:02:20,646 - root - INFO - lr: 3.0161e-05 gnorm: 1.13 [11:28:10<13:03:28] +[titan] 2025-10-05 10:02:31,519 - root - INFO - step: 18710 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9250 +[titan] 2025-10-05 10:02:31,519 - root - INFO - lr: 3.0153e-05 gnorm: 1.03 [11:28:21<13:03:16] +[titan] 2025-10-05 10:02:42,408 - root - INFO - step: 18715 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 10:02:42,408 - root - INFO - lr: 3.0144e-05 gnorm: 1.14 [11:28:32<13:03:05] +[titan] 2025-10-05 10:02:53,291 - root - INFO - step: 18720 loss: 2.1198 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 10:02:53,292 - root - INFO - lr: 3.0135e-05 gnorm: 1.04 [11:28:43<13:02:54] +[titan] 2025-10-05 10:03:04,164 - root - INFO - step: 18725 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 10:03:04,164 - root - INFO - lr: 3.0126e-05 gnorm: 1.09 [11:28:54<13:02:43] +[titan] 2025-10-05 10:03:15,026 - root - INFO - step: 18730 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 10:03:15,026 - root - INFO - lr: 3.0117e-05 gnorm: 1.09 [11:29:05<13:02:32] +[titan] 2025-10-05 10:03:25,889 - root - INFO - step: 18735 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:03:25,889 - root - INFO - lr: 3.0108e-05 gnorm: 1.09 [11:29:16<13:02:20] +[titan] 2025-10-05 10:03:36,788 - root - INFO - step: 18740 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 10:03:36,789 - root - INFO - lr: 3.0099e-05 gnorm: 1.05 [11:29:26<13:02:09] +[titan] 2025-10-05 10:03:47,682 - root - INFO - step: 18745 loss: 2.1174 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:03:47,683 - root - INFO - lr: 3.0090e-05 gnorm: 1.02 [11:29:37<13:01:58] +[titan] 2025-10-05 10:03:56,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:03:58,557 - root - INFO - step: 18750 loss: 2.1769 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 10:03:58,557 - root - INFO - lr: 3.0082e-05 gnorm: 1.06 [11:29:48<13:01:47] +[titan] 2025-10-05 10:04:09,464 - root - INFO - step: 18755 loss: 2.1852 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9314 +[titan] 2025-10-05 10:04:09,464 - root - INFO - lr: 3.0073e-05 gnorm: 1.08 [11:29:59<13:01:35] +[titan] 2025-10-05 10:04:20,357 - root - INFO - step: 18760 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:04:20,357 - root - INFO - lr: 3.0064e-05 gnorm: 1.07 [11:30:10<13:01:24] +[titan] 2025-10-05 10:04:31,231 - root - INFO - step: 18765 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 10:04:31,232 - root - INFO - lr: 3.0055e-05 gnorm: 1.11 [11:30:21<13:01:13] +[titan] 2025-10-05 10:04:42,135 - root - INFO - step: 18770 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 10:04:42,136 - root - INFO - lr: 3.0046e-05 gnorm: 1.10 [11:30:32<13:01:02] +[titan] 2025-10-05 10:04:53,006 - root - INFO - step: 18775 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 10:04:53,006 - root - INFO - lr: 3.0037e-05 gnorm: 1.03 [11:30:43<13:00:51] +[titan] 2025-10-05 10:05:03,904 - root - INFO - step: 18780 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 10:05:03,904 - root - INFO - lr: 3.0028e-05 gnorm: 1.07 [11:30:54<13:00:39] +[titan] 2025-10-05 10:05:14,747 - root - INFO - step: 18785 loss: 2.1812 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 10:05:14,748 - root - INFO - lr: 3.0020e-05 gnorm: 1.09 [11:31:04<13:00:28] +[titan] 2025-10-05 10:05:25,598 - root - INFO - step: 18790 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 10:05:25,598 - root - INFO - lr: 3.0011e-05 gnorm: 1.06 [11:31:15<13:00:17] +[titan] 2025-10-05 10:05:36,466 - root - INFO - step: 18795 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 10:05:36,466 - root - INFO - lr: 3.0002e-05 gnorm: 1.04 [11:31:26<13:00:06] +[titan] 2025-10-05 10:05:45,130 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:05:47,339 - root - INFO - step: 18800 loss: 2.2290 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 10:05:47,340 - root - INFO - lr: 2.9993e-05 gnorm: 1.07 [11:31:37<12:59:55] +[titan] 2025-10-05 10:05:58,210 - root - INFO - step: 18805 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 10:05:58,210 - root - INFO - lr: 2.9984e-05 gnorm: 1.07 [11:31:48<12:59:43] +[titan] 2025-10-05 10:06:09,120 - root - INFO - step: 18810 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 10:06:09,121 - root - INFO - lr: 2.9975e-05 gnorm: 1.04 [11:31:59<12:59:32] +[titan] 2025-10-05 10:06:19,986 - root - INFO - step: 18815 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 10:06:19,986 - root - INFO - lr: 2.9966e-05 gnorm: 1.08 [11:32:10<12:59:21] +[titan] 2025-10-05 10:06:30,847 - root - INFO - step: 18820 loss: 2.1851 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 10:06:30,847 - root - INFO - lr: 2.9957e-05 gnorm: 1.05 [11:32:20<12:59:10] +[titan] 2025-10-05 10:06:41,727 - root - INFO - step: 18825 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:06:41,727 - root - INFO - lr: 2.9949e-05 gnorm: 1.07 [11:32:31<12:58:58] +[titan] 2025-10-05 10:06:52,600 - root - INFO - step: 18830 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 10:06:52,601 - root - INFO - lr: 2.9940e-05 gnorm: 1.10 [11:32:42<12:58:47] +[titan] 2025-10-05 10:07:03,527 - root - INFO - step: 18835 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9505 +[titan] 2025-10-05 10:07:03,527 - root - INFO - lr: 2.9931e-05 gnorm: 1.07 [11:32:53<12:58:36] +[titan] 2025-10-05 10:07:14,367 - root - INFO - step: 18840 loss: 2.2003 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 10:07:14,367 - root - INFO - lr: 2.9922e-05 gnorm: 1.08 [11:33:04<12:58:25] +[titan] 2025-10-05 10:07:25,248 - root - INFO - step: 18845 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 10:07:25,248 - root - INFO - lr: 2.9913e-05 gnorm: 1.08 [11:33:15<12:58:14] +[titan] 2025-10-05 10:07:33,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:07:36,110 - root - INFO - step: 18850 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 10:07:36,110 - root - INFO - lr: 2.9904e-05 gnorm: 1.09 [11:33:26<12:58:02] +[titan] 2025-10-05 10:07:46,979 - root - INFO - step: 18855 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:07:46,979 - root - INFO - lr: 2.9895e-05 gnorm: 1.07 [11:33:37<12:57:51] +[titan] 2025-10-05 10:07:57,853 - root - INFO - step: 18860 loss: 2.1443 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:07:57,853 - root - INFO - lr: 2.9886e-05 gnorm: 1.06 [11:33:47<12:57:40] +[titan] 2025-10-05 10:08:08,767 - root - INFO - step: 18865 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 10:08:08,767 - root - INFO - lr: 2.9878e-05 gnorm: 1.05 [11:33:58<12:57:29] +[titan] 2025-10-05 10:08:19,643 - root - INFO - step: 18870 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:08:19,643 - root - INFO - lr: 2.9869e-05 gnorm: 1.09 [11:34:09<12:57:18] +[titan] 2025-10-05 10:08:30,505 - root - INFO - step: 18875 loss: 2.1432 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:08:30,505 - root - INFO - lr: 2.9860e-05 gnorm: 1.11 [11:34:20<12:57:06] +[titan] 2025-10-05 10:08:41,373 - root - INFO - step: 18880 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8639 +[titan] 2025-10-05 10:08:41,373 - root - INFO - lr: 2.9851e-05 gnorm: 1.04 [11:34:31<12:56:55] +[titan] 2025-10-05 10:08:52,227 - root - INFO - step: 18885 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:08:52,227 - root - INFO - lr: 2.9842e-05 gnorm: 1.09 [11:34:42<12:56:44] +[titan] 2025-10-05 10:09:03,099 - root - INFO - step: 18890 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 10:09:03,099 - root - INFO - lr: 2.9833e-05 gnorm: 1.06 [11:34:53<12:56:33] +[titan] 2025-10-05 10:09:13,982 - root - INFO - step: 18895 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 10:09:13,983 - root - INFO - lr: 2.9824e-05 gnorm: 1.06 [11:35:04<12:56:21] +[titan] 2025-10-05 10:09:22,708 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:09:24,889 - root - INFO - step: 18900 loss: 2.2596 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 10:09:24,889 - root - INFO - lr: 2.9815e-05 gnorm: 1.09 [11:35:14<12:56:10] +[titan] 2025-10-05 10:09:35,723 - root - INFO - step: 18905 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9049 +[titan] 2025-10-05 10:09:35,723 - root - INFO - lr: 2.9807e-05 gnorm: 2.16 [11:35:25<12:55:59] +[titan] 2025-10-05 10:09:46,616 - root - INFO - step: 18910 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 10:09:46,616 - root - INFO - lr: 2.9798e-05 gnorm: 1.10 [11:35:36<12:55:48] +[titan] 2025-10-05 10:09:57,505 - root - INFO - step: 18915 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8990 +[titan] 2025-10-05 10:09:57,506 - root - INFO - lr: 2.9789e-05 gnorm: 1.06 [11:35:47<12:55:37] +[titan] 2025-10-05 10:10:08,408 - root - INFO - step: 18920 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 10:10:08,409 - root - INFO - lr: 2.9780e-05 gnorm: 1.11 [11:35:58<12:55:25] +[titan] 2025-10-05 10:10:19,290 - root - INFO - step: 18925 loss: 2.1401 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8910 +[titan] 2025-10-05 10:10:19,290 - root - INFO - lr: 2.9771e-05 gnorm: 1.09 [11:36:09<12:55:14] +[titan] 2025-10-05 10:10:30,188 - root - INFO - step: 18930 loss: 2.1578 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9075 +[titan] 2025-10-05 10:10:30,188 - root - INFO - lr: 2.9762e-05 gnorm: 1.08 [11:36:20<12:55:03] +[titan] 2025-10-05 10:10:41,057 - root - INFO - step: 18935 loss: 2.1455 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:10:41,057 - root - INFO - lr: 2.9753e-05 gnorm: 1.08 [11:36:31<12:54:52] +[titan] 2025-10-05 10:10:51,943 - root - INFO - step: 18940 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 10:10:51,943 - root - INFO - lr: 2.9744e-05 gnorm: 1.09 [11:36:42<12:54:41] +[titan] 2025-10-05 10:11:00,925 - root - INFO - Dumping profiler traces at step 18944 +[titan] 2025-10-05 10:11:00,963 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:11:03,165 - root - INFO - step: 18945 loss: 2.2146 memory: 118.84GiB(85.28%) tps: 29,200 tflops: 405.11 mfu: 40.96% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 10:11:03,166 - root - INFO - lr: 2.9736e-05 gnorm: 1.05 [11:36:53<12:54:30] +[titan] 2025-10-05 10:11:11,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:11:14,040 - root - INFO - step: 18950 loss: 2.2217 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9615 +[titan] 2025-10-05 10:11:14,040 - root - INFO - lr: 2.9727e-05 gnorm: 1.10 [11:37:04<12:54:19] +[titan] 2025-10-05 10:11:24,927 - root - INFO - step: 18955 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 10:11:24,927 - root - INFO - lr: 2.9718e-05 gnorm: 1.04 [11:37:15<12:54:07] +[titan] 2025-10-05 10:11:35,836 - root - INFO - step: 18960 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:11:35,836 - root - INFO - lr: 2.9709e-05 gnorm: 1.08 [11:37:25<12:53:56] +[titan] 2025-10-05 10:11:46,725 - root - INFO - step: 18965 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9561 +[titan] 2025-10-05 10:11:46,725 - root - INFO - lr: 2.9700e-05 gnorm: 1.06 [11:37:36<12:53:45] +[titan] 2025-10-05 10:11:57,607 - root - INFO - step: 18970 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 10:11:57,608 - root - INFO - lr: 2.9691e-05 gnorm: 1.06 [11:37:47<12:53:34] +[titan] 2025-10-05 10:12:08,492 - root - INFO - step: 18975 loss: 2.1885 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9336 +[titan] 2025-10-05 10:12:08,492 - root - INFO - lr: 2.9682e-05 gnorm: 1.05 [11:37:58<12:53:23] +[titan] 2025-10-05 10:12:19,363 - root - INFO - step: 18980 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 10:12:19,364 - root - INFO - lr: 2.9673e-05 gnorm: 1.05 [11:38:09<12:53:11] +[titan] 2025-10-05 10:12:30,251 - root - INFO - step: 18985 loss: 2.2178 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 10:12:30,251 - root - INFO - lr: 2.9664e-05 gnorm: 1.08 [11:38:20<12:53:00] +[titan] 2025-10-05 10:12:41,145 - root - INFO - step: 18990 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 10:12:41,146 - root - INFO - lr: 2.9656e-05 gnorm: 1.04 [11:38:31<12:52:49] +[titan] 2025-10-05 10:12:52,037 - root - INFO - step: 18995 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:12:52,037 - root - INFO - lr: 2.9647e-05 gnorm: 1.06 [11:38:42<12:52:38] +[titan] 2025-10-05 10:13:00,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:13:02,914 - root - INFO - step: 19000 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9377 +[titan] 2025-10-05 10:13:02,914 - root - INFO - lr: 2.9638e-05 gnorm: 1.06 [11:38:53<12:52:27] +[titan] 2025-10-05 10:13:13,797 - root - INFO - step: 19005 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 10:13:13,797 - root - INFO - lr: 2.9629e-05 gnorm: 1.09 [11:39:03<12:52:15] +[titan] 2025-10-05 10:13:24,684 - root - INFO - step: 19010 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 10:13:24,684 - root - INFO - lr: 2.9620e-05 gnorm: 1.04 [11:39:14<12:52:04] +[titan] 2025-10-05 10:13:35,565 - root - INFO - step: 19015 loss: 2.1615 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 10:13:35,565 - root - INFO - lr: 2.9611e-05 gnorm: 1.06 [11:39:25<12:51:53] +[titan] 2025-10-05 10:13:46,458 - root - INFO - step: 19020 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:13:46,458 - root - INFO - lr: 2.9602e-05 gnorm: 1.09 [11:39:36<12:51:42] +[titan] 2025-10-05 10:13:57,365 - root - INFO - step: 19025 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9259 +[titan] 2025-10-05 10:13:57,365 - root - INFO - lr: 2.9593e-05 gnorm: 1.11 [11:39:47<12:51:31] +[titan] 2025-10-05 10:14:08,243 - root - INFO - step: 19030 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 10:14:08,244 - root - INFO - lr: 2.9585e-05 gnorm: 1.11 [11:39:58<12:51:19] +[titan] 2025-10-05 10:14:19,180 - root - INFO - step: 19035 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.68 mfu: 42.03% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 10:14:19,180 - root - INFO - lr: 2.9576e-05 gnorm: 1.08 [11:40:09<12:51:08] +[titan] 2025-10-05 10:14:30,057 - root - INFO - step: 19040 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 10:14:30,058 - root - INFO - lr: 2.9567e-05 gnorm: 1.08 [11:40:20<12:50:57] +[titan] 2025-10-05 10:14:40,934 - root - INFO - step: 19045 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:14:40,934 - root - INFO - lr: 2.9558e-05 gnorm: 1.05 [11:40:31<12:50:46] +[titan] 2025-10-05 10:14:49,618 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:14:51,803 - root - INFO - step: 19050 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 10:14:51,803 - root - INFO - lr: 2.9549e-05 gnorm: 1.10 [11:40:41<12:50:35] +[titan] 2025-10-05 10:15:02,687 - root - INFO - step: 19055 loss: 2.1320 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:15:02,687 - root - INFO - lr: 2.9540e-05 gnorm: 1.03 [11:40:52<12:50:23] +[titan] 2025-10-05 10:15:13,599 - root - INFO - step: 19060 loss: 2.1731 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:15:13,600 - root - INFO - lr: 2.9531e-05 gnorm: 1.05 [11:41:03<12:50:12] +[titan] 2025-10-05 10:15:24,470 - root - INFO - step: 19065 loss: 2.0790 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 10:15:24,470 - root - INFO - lr: 2.9522e-05 gnorm: 1.02 [11:41:14<12:50:01] +[titan] 2025-10-05 10:15:35,340 - root - INFO - step: 19070 loss: 2.1215 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 10:15:35,340 - root - INFO - lr: 2.9513e-05 gnorm: 1.03 [11:41:25<12:49:50] +[titan] 2025-10-05 10:15:46,220 - root - INFO - step: 19075 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 10:15:46,220 - root - INFO - lr: 2.9505e-05 gnorm: 1.03 [11:41:36<12:49:39] +[titan] 2025-10-05 10:15:57,087 - root - INFO - step: 19080 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 10:15:57,087 - root - INFO - lr: 2.9496e-05 gnorm: 1.08 [11:41:47<12:49:27] +[titan] 2025-10-05 10:16:07,949 - root - INFO - step: 19085 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:16:07,949 - root - INFO - lr: 2.9487e-05 gnorm: 1.03 [11:41:58<12:49:16] +[titan] 2025-10-05 10:16:18,866 - root - INFO - step: 19090 loss: 2.1027 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:16:18,866 - root - INFO - lr: 2.9478e-05 gnorm: 1.05 [11:42:08<12:49:05] +[titan] 2025-10-05 10:16:29,722 - root - INFO - step: 19095 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 10:16:29,722 - root - INFO - lr: 2.9469e-05 gnorm: 1.07 [11:42:19<12:48:54] +[titan] 2025-10-05 10:16:38,404 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:16:40,594 - root - INFO - step: 19100 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 10:16:40,594 - root - INFO - lr: 2.9460e-05 gnorm: 1.12 [11:42:30<12:48:43] +[titan] 2025-10-05 10:16:51,467 - root - INFO - step: 19105 loss: 2.1659 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 10:16:51,467 - root - INFO - lr: 2.9451e-05 gnorm: 1.07 [11:42:41<12:48:31] +[titan] 2025-10-05 10:17:02,333 - root - INFO - step: 19110 loss: 2.1571 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:17:02,333 - root - INFO - lr: 2.9442e-05 gnorm: 1.07 [11:42:52<12:48:20] +[titan] 2025-10-05 10:17:13,254 - root - INFO - step: 19115 loss: 2.1907 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9353 +[titan] 2025-10-05 10:17:13,254 - root - INFO - lr: 2.9433e-05 gnorm: 1.05 [11:43:03<12:48:09] +[titan] 2025-10-05 10:17:24,144 - root - INFO - step: 19120 loss: 2.2215 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 10:17:24,145 - root - INFO - lr: 2.9424e-05 gnorm: 1.08 [11:43:14<12:47:58] +[titan] 2025-10-05 10:17:34,985 - root - INFO - step: 19125 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 10:17:34,985 - root - INFO - lr: 2.9416e-05 gnorm: 1.07 [11:43:25<12:47:46] +[titan] 2025-10-05 10:17:45,834 - root - INFO - step: 19130 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 10:17:45,834 - root - INFO - lr: 2.9407e-05 gnorm: 1.04 [11:43:35<12:47:35] +[titan] 2025-10-05 10:17:56,697 - root - INFO - step: 19135 loss: 2.1835 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 10:17:56,697 - root - INFO - lr: 2.9398e-05 gnorm: 1.04 [11:43:46<12:47:24] +[titan] 2025-10-05 10:18:07,545 - root - INFO - step: 19140 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 10:18:07,545 - root - INFO - lr: 2.9389e-05 gnorm: 1.06 [11:43:57<12:47:13] +[titan] 2025-10-05 10:18:18,458 - root - INFO - step: 19145 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9199 +[titan] 2025-10-05 10:18:18,458 - root - INFO - lr: 2.9380e-05 gnorm: 1.08 [11:44:08<12:47:02] +[titan] 2025-10-05 10:18:27,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:18:29,337 - root - INFO - step: 19150 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8515 +[titan] 2025-10-05 10:18:29,338 - root - INFO - lr: 2.9371e-05 gnorm: 1.34 [11:44:19<12:46:50] +[titan] 2025-10-05 10:18:40,256 - root - INFO - step: 19155 loss: 2.1332 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8849 +[titan] 2025-10-05 10:18:40,256 - root - INFO - lr: 2.9362e-05 gnorm: 1.09 [11:44:30<12:46:39] +[titan] 2025-10-05 10:18:51,145 - root - INFO - step: 19160 loss: 2.1481 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 10:18:51,145 - root - INFO - lr: 2.9353e-05 gnorm: 1.07 [11:44:41<12:46:28] +[titan] 2025-10-05 10:19:02,037 - root - INFO - step: 19165 loss: 2.1516 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 10:19:02,037 - root - INFO - lr: 2.9344e-05 gnorm: 1.05 [11:44:52<12:46:17] +[titan] 2025-10-05 10:19:12,937 - root - INFO - step: 19170 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9133 +[titan] 2025-10-05 10:19:12,937 - root - INFO - lr: 2.9336e-05 gnorm: 1.08 [11:45:03<12:46:06] +[titan] 2025-10-05 10:19:23,875 - root - INFO - step: 19175 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 10:19:23,875 - root - INFO - lr: 2.9327e-05 gnorm: 1.04 [11:45:13<12:45:55] +[titan] 2025-10-05 10:19:34,776 - root - INFO - step: 19180 loss: 2.1428 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 10:19:34,776 - root - INFO - lr: 2.9318e-05 gnorm: 1.07 [11:45:24<12:45:43] +[titan] 2025-10-05 10:19:45,696 - root - INFO - step: 19185 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 10:19:45,696 - root - INFO - lr: 2.9309e-05 gnorm: 1.05 [11:45:35<12:45:32] +[titan] 2025-10-05 10:19:56,559 - root - INFO - step: 19190 loss: 2.2063 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 10:19:56,559 - root - INFO - lr: 2.9300e-05 gnorm: 1.05 [11:45:46<12:45:21] +[titan] 2025-10-05 10:20:07,440 - root - INFO - step: 19195 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 10:20:07,440 - root - INFO - lr: 2.9291e-05 gnorm: 1.06 [11:45:57<12:45:10] +[titan] 2025-10-05 10:20:16,203 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:20:18,388 - root - INFO - step: 19200 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:20:18,389 - root - INFO - lr: 2.9282e-05 gnorm: 1.10 [11:46:08<12:44:59] +[titan] 2025-10-05 10:20:29,261 - root - INFO - step: 19205 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9722 +[titan] 2025-10-05 10:20:29,261 - root - INFO - lr: 2.9273e-05 gnorm: 1.05 [11:46:19<12:44:47] +[titan] 2025-10-05 10:20:40,136 - root - INFO - step: 19210 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 10:20:40,136 - root - INFO - lr: 2.9264e-05 gnorm: 1.05 [11:46:30<12:44:36] +[titan] 2025-10-05 10:20:51,016 - root - INFO - step: 19215 loss: 2.1099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8637 +[titan] 2025-10-05 10:20:51,016 - root - INFO - lr: 2.9255e-05 gnorm: 1.02 [11:46:41<12:44:25] +[titan] 2025-10-05 10:21:01,917 - root - INFO - step: 19220 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 10:21:01,918 - root - INFO - lr: 2.9247e-05 gnorm: 1.06 [11:46:51<12:44:14] +[titan] 2025-10-05 10:21:12,779 - root - INFO - step: 19225 loss: 2.1977 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9427 +[titan] 2025-10-05 10:21:12,780 - root - INFO - lr: 2.9238e-05 gnorm: 1.08 [11:47:02<12:44:03] +[titan] 2025-10-05 10:21:23,714 - root - INFO - step: 19230 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 10:21:23,714 - root - INFO - lr: 2.9229e-05 gnorm: 1.09 [11:47:13<12:43:52] +[titan] 2025-10-05 10:21:34,597 - root - INFO - step: 19235 loss: 2.1070 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 10:21:34,597 - root - INFO - lr: 2.9220e-05 gnorm: 1.08 [11:47:24<12:43:40] +[titan] 2025-10-05 10:21:45,492 - root - INFO - step: 19240 loss: 2.0962 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 10:21:45,492 - root - INFO - lr: 2.9211e-05 gnorm: 1.04 [11:47:35<12:43:29] +[titan] 2025-10-05 10:21:56,367 - root - INFO - step: 19245 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 10:21:56,367 - root - INFO - lr: 2.9202e-05 gnorm: 1.08 [11:47:46<12:43:18] +[titan] 2025-10-05 10:22:05,105 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:22:07,295 - root - INFO - step: 19250 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:22:07,296 - root - INFO - lr: 2.9193e-05 gnorm: 1.04 [11:47:57<12:43:07] +[titan] 2025-10-05 10:22:18,238 - root - INFO - step: 19255 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 10:22:18,238 - root - INFO - lr: 2.9184e-05 gnorm: 1.06 [11:48:08<12:42:56] +[titan] 2025-10-05 10:22:29,120 - root - INFO - step: 19260 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 10:22:29,120 - root - INFO - lr: 2.9175e-05 gnorm: 1.10 [11:48:19<12:42:44] +[titan] 2025-10-05 10:22:40,008 - root - INFO - step: 19265 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:22:40,008 - root - INFO - lr: 2.9167e-05 gnorm: 1.08 [11:48:30<12:42:33] +[titan] 2025-10-05 10:22:50,875 - root - INFO - step: 19270 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 10:22:50,875 - root - INFO - lr: 2.9158e-05 gnorm: 1.07 [11:48:40<12:42:22] +[titan] 2025-10-05 10:23:01,737 - root - INFO - step: 19275 loss: 2.1975 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:23:01,737 - root - INFO - lr: 2.9149e-05 gnorm: 1.08 [11:48:51<12:42:11] +[titan] 2025-10-05 10:23:12,658 - root - INFO - step: 19280 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 10:23:12,658 - root - INFO - lr: 2.9140e-05 gnorm: 1.06 [11:49:02<12:42:00] +[titan] 2025-10-05 10:23:23,594 - root - INFO - step: 19285 loss: 2.1554 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9047 +[titan] 2025-10-05 10:23:23,595 - root - INFO - lr: 2.9131e-05 gnorm: 1.11 [11:49:13<12:41:49] +[titan] 2025-10-05 10:23:34,471 - root - INFO - step: 19290 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9525 +[titan] 2025-10-05 10:23:34,471 - root - INFO - lr: 2.9122e-05 gnorm: 1.08 [11:49:24<12:41:37] +[titan] 2025-10-05 10:23:45,370 - root - INFO - step: 19295 loss: 2.2145 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 10:23:45,370 - root - INFO - lr: 2.9113e-05 gnorm: 1.11 [11:49:35<12:41:26] +[titan] 2025-10-05 10:23:54,147 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:23:56,333 - root - INFO - step: 19300 loss: 2.1524 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 10:23:56,333 - root - INFO - lr: 2.9104e-05 gnorm: 1.12 [11:49:46<12:41:15] +[titan] 2025-10-05 10:24:07,214 - root - INFO - step: 19305 loss: 2.1152 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 10:24:07,214 - root - INFO - lr: 2.9095e-05 gnorm: 1.06 [11:49:57<12:41:04] +[titan] 2025-10-05 10:24:18,150 - root - INFO - step: 19310 loss: 2.1360 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8875 +[titan] 2025-10-05 10:24:18,150 - root - INFO - lr: 2.9086e-05 gnorm: 1.11 [11:50:08<12:40:53] +[titan] 2025-10-05 10:24:29,081 - root - INFO - step: 19315 loss: 2.1682 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9158 +[titan] 2025-10-05 10:24:29,081 - root - INFO - lr: 2.9077e-05 gnorm: 1.08 [11:50:19<12:40:42] +[titan] 2025-10-05 10:24:39,944 - root - INFO - step: 19320 loss: 2.1420 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 10:24:39,945 - root - INFO - lr: 2.9069e-05 gnorm: 1.05 [11:50:30<12:40:30] +[titan] 2025-10-05 10:24:50,812 - root - INFO - step: 19325 loss: 2.1255 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 10:24:50,812 - root - INFO - lr: 2.9060e-05 gnorm: 1.04 [11:50:40<12:40:19] +[titan] 2025-10-05 10:25:01,681 - root - INFO - step: 19330 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 10:25:01,681 - root - INFO - lr: 2.9051e-05 gnorm: 1.04 [11:50:51<12:40:08] +[titan] 2025-10-05 10:25:12,540 - root - INFO - step: 19335 loss: 2.1642 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 10:25:12,540 - root - INFO - lr: 2.9042e-05 gnorm: 1.05 [11:51:02<12:39:57] +[titan] 2025-10-05 10:25:23,440 - root - INFO - step: 19340 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 10:25:23,441 - root - INFO - lr: 2.9033e-05 gnorm: 1.08 [11:51:13<12:39:46] +[titan] 2025-10-05 10:25:34,323 - root - INFO - step: 19345 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 10:25:34,323 - root - INFO - lr: 2.9024e-05 gnorm: 1.06 [11:51:24<12:39:34] +[titan] 2025-10-05 10:25:42,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:25:45,172 - root - INFO - step: 19350 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 10:25:45,173 - root - INFO - lr: 2.9015e-05 gnorm: 1.06 [11:51:35<12:39:23] +[titan] 2025-10-05 10:25:56,041 - root - INFO - step: 19355 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 10:25:56,041 - root - INFO - lr: 2.9006e-05 gnorm: 1.05 [11:51:46<12:39:12] +[titan] 2025-10-05 10:26:06,901 - root - INFO - step: 19360 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 10:26:06,901 - root - INFO - lr: 2.8997e-05 gnorm: 1.09 [11:51:56<12:39:01] +[titan] 2025-10-05 10:26:17,768 - root - INFO - step: 19365 loss: 2.2565 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 10:26:17,768 - root - INFO - lr: 2.8988e-05 gnorm: 1.06 [11:52:07<12:38:50] +[titan] 2025-10-05 10:26:28,693 - root - INFO - step: 19370 loss: 2.1913 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 10:26:28,693 - root - INFO - lr: 2.8980e-05 gnorm: 1.07 [11:52:18<12:38:38] +[titan] 2025-10-05 10:26:39,550 - root - INFO - step: 19375 loss: 2.2098 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:26:39,550 - root - INFO - lr: 2.8971e-05 gnorm: 1.10 [11:52:29<12:38:27] +[titan] 2025-10-05 10:26:50,433 - root - INFO - step: 19380 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 10:26:50,433 - root - INFO - lr: 2.8962e-05 gnorm: 1.07 [11:52:40<12:38:16] +[titan] 2025-10-05 10:27:01,279 - root - INFO - step: 19385 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 10:27:01,279 - root - INFO - lr: 2.8953e-05 gnorm: 1.04 [11:52:51<12:38:05] +[titan] 2025-10-05 10:27:12,140 - root - INFO - step: 19390 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 10:27:12,141 - root - INFO - lr: 2.8944e-05 gnorm: 1.10 [11:53:02<12:37:54] +[titan] 2025-10-05 10:27:23,043 - root - INFO - step: 19395 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:27:23,043 - root - INFO - lr: 2.8935e-05 gnorm: 1.07 [11:53:13<12:37:42] +[titan] 2025-10-05 10:27:31,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:27:33,895 - root - INFO - step: 19400 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:27:33,895 - root - INFO - lr: 2.8926e-05 gnorm: 1.06 [11:53:23<12:37:31] +[titan] 2025-10-05 10:27:44,768 - root - INFO - step: 19405 loss: 2.0933 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 10:27:44,768 - root - INFO - lr: 2.8917e-05 gnorm: 1.05 [11:53:34<12:37:20] +[titan] 2025-10-05 10:27:55,669 - root - INFO - step: 19410 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 10:27:55,669 - root - INFO - lr: 2.8908e-05 gnorm: 1.05 [11:53:45<12:37:09] +[titan] 2025-10-05 10:28:06,538 - root - INFO - step: 19415 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 10:28:06,538 - root - INFO - lr: 2.8899e-05 gnorm: 1.07 [11:53:56<12:36:58] +[titan] 2025-10-05 10:28:17,407 - root - INFO - step: 19420 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:28:17,407 - root - INFO - lr: 2.8890e-05 gnorm: 1.09 [11:54:07<12:36:46] +[titan] 2025-10-05 10:28:28,336 - root - INFO - step: 19425 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9504 +[titan] 2025-10-05 10:28:28,336 - root - INFO - lr: 2.8882e-05 gnorm: 1.11 [11:54:18<12:36:35] +[titan] 2025-10-05 10:28:39,218 - root - INFO - step: 19430 loss: 2.1045 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8601 +[titan] 2025-10-05 10:28:39,218 - root - INFO - lr: 2.8873e-05 gnorm: 1.08 [11:54:29<12:36:24] +[titan] 2025-10-05 10:28:50,126 - root - INFO - step: 19435 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9318 +[titan] 2025-10-05 10:28:50,126 - root - INFO - lr: 2.8864e-05 gnorm: 1.07 [11:54:40<12:36:13] +[titan] 2025-10-05 10:29:01,033 - root - INFO - step: 19440 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9099 +[titan] 2025-10-05 10:29:01,033 - root - INFO - lr: 2.8855e-05 gnorm: 1.04 [11:54:51<12:36:02] +[titan] 2025-10-05 10:29:11,909 - root - INFO - step: 19445 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 10:29:11,909 - root - INFO - lr: 2.8846e-05 gnorm: 1.06 [11:55:01<12:35:50] +[titan] 2025-10-05 10:29:20,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:29:22,834 - root - INFO - step: 19450 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8914 +[titan] 2025-10-05 10:29:22,834 - root - INFO - lr: 2.8837e-05 gnorm: 1.04 [11:55:12<12:35:39] +[titan] 2025-10-05 10:29:33,814 - root - INFO - step: 19455 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 29,845 tflops: 414.05 mfu: 41.87% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:29:33,814 - root - INFO - lr: 2.8828e-05 gnorm: 1.06 [11:55:23<12:35:28] +[titan] 2025-10-05 10:29:36,174 - root - INFO - Dumping profiler traces at step 19456 +[titan] 2025-10-05 10:29:36,213 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:29:44,902 - root - INFO - step: 19460 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 29,553 tflops: 410.01 mfu: 41.46% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:29:44,902 - root - INFO - lr: 2.8819e-05 gnorm: 1.04 [11:55:34<12:35:17] +[titan] 2025-10-05 10:29:55,774 - root - INFO - step: 19465 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:29:55,774 - root - INFO - lr: 2.8810e-05 gnorm: 1.05 [11:55:45<12:35:06] +[titan] 2025-10-05 10:30:06,632 - root - INFO - step: 19470 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8999 +[titan] 2025-10-05 10:30:06,632 - root - INFO - lr: 2.8801e-05 gnorm: 1.06 [11:55:56<12:34:55] +[titan] 2025-10-05 10:30:17,527 - root - INFO - step: 19475 loss: 2.0697 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8296 +[titan] 2025-10-05 10:30:17,527 - root - INFO - lr: 2.8792e-05 gnorm: 1.03 [11:56:07<12:34:44] +[titan] 2025-10-05 10:30:28,427 - root - INFO - step: 19480 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 10:30:28,427 - root - INFO - lr: 2.8784e-05 gnorm: 1.06 [11:56:18<12:34:33] +[titan] 2025-10-05 10:30:39,303 - root - INFO - step: 19485 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 10:30:39,303 - root - INFO - lr: 2.8775e-05 gnorm: 1.05 [11:56:29<12:34:21] +[titan] 2025-10-05 10:30:50,176 - root - INFO - step: 19490 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:30:50,176 - root - INFO - lr: 2.8766e-05 gnorm: 1.09 [11:56:40<12:34:10] +[titan] 2025-10-05 10:31:01,024 - root - INFO - step: 19495 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:31:01,024 - root - INFO - lr: 2.8757e-05 gnorm: 1.06 [11:56:51<12:33:59] +[titan] 2025-10-05 10:31:09,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:31:11,884 - root - INFO - step: 19500 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9032 +[titan] 2025-10-05 10:31:11,884 - root - INFO - lr: 2.8748e-05 gnorm: 1.04 [11:57:01<12:33:48] +[titan] 2025-10-05 10:31:22,776 - root - INFO - step: 19505 loss: 2.1755 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 10:31:22,776 - root - INFO - lr: 2.8739e-05 gnorm: 1.05 [11:57:12<12:33:37] +[titan] 2025-10-05 10:31:33,671 - root - INFO - step: 19510 loss: 2.1889 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:31:33,672 - root - INFO - lr: 2.8730e-05 gnorm: 1.09 [11:57:23<12:33:25] +[titan] 2025-10-05 10:31:44,519 - root - INFO - step: 19515 loss: 2.1331 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8848 +[titan] 2025-10-05 10:31:44,519 - root - INFO - lr: 2.8721e-05 gnorm: 1.09 [11:57:34<12:33:14] +[titan] 2025-10-05 10:31:55,368 - root - INFO - step: 19520 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:31:55,368 - root - INFO - lr: 2.8712e-05 gnorm: 1.09 [11:57:45<12:33:03] +[titan] 2025-10-05 10:32:06,223 - root - INFO - step: 19525 loss: 2.1590 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 10:32:06,223 - root - INFO - lr: 2.8703e-05 gnorm: 1.04 [11:57:56<12:32:52] +[titan] 2025-10-05 10:32:17,080 - root - INFO - step: 19530 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:32:17,080 - root - INFO - lr: 2.8694e-05 gnorm: 1.03 [11:58:07<12:32:40] +[titan] 2025-10-05 10:32:27,960 - root - INFO - step: 19535 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 10:32:27,960 - root - INFO - lr: 2.8686e-05 gnorm: 1.12 [11:58:18<12:32:29] +[titan] 2025-10-05 10:32:38,837 - root - INFO - step: 19540 loss: 2.1660 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 10:32:38,837 - root - INFO - lr: 2.8677e-05 gnorm: 1.05 [11:58:28<12:32:18] +[titan] 2025-10-05 10:32:49,713 - root - INFO - step: 19545 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 10:32:49,713 - root - INFO - lr: 2.8668e-05 gnorm: 1.05 [11:58:39<12:32:07] +[titan] 2025-10-05 10:32:58,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:33:00,607 - root - INFO - step: 19550 loss: 2.1396 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8912 +[titan] 2025-10-05 10:33:00,607 - root - INFO - lr: 2.8659e-05 gnorm: 1.07 [11:58:50<12:31:56] +[titan] 2025-10-05 10:33:11,463 - root - INFO - step: 19555 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8942 +[titan] 2025-10-05 10:33:11,463 - root - INFO - lr: 2.8650e-05 gnorm: 1.05 [11:59:01<12:31:44] +[titan] 2025-10-05 10:33:22,332 - root - INFO - step: 19560 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9357 +[titan] 2025-10-05 10:33:22,332 - root - INFO - lr: 2.8641e-05 gnorm: 1.08 [11:59:12<12:31:33] +[titan] 2025-10-05 10:33:33,247 - root - INFO - step: 19565 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 10:33:33,247 - root - INFO - lr: 2.8632e-05 gnorm: 1.12 [11:59:23<12:31:22] +[titan] 2025-10-05 10:33:44,148 - root - INFO - step: 19570 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:33:44,148 - root - INFO - lr: 2.8623e-05 gnorm: 1.14 [11:59:34<12:31:11] +[titan] 2025-10-05 10:33:55,019 - root - INFO - step: 19575 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 10:33:55,019 - root - INFO - lr: 2.8614e-05 gnorm: 1.09 [11:59:45<12:31:00] +[titan] 2025-10-05 10:34:05,890 - root - INFO - step: 19580 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8641 +[titan] 2025-10-05 10:34:05,890 - root - INFO - lr: 2.8605e-05 gnorm: 1.05 [11:59:55<12:30:49] +[titan] 2025-10-05 10:34:16,751 - root - INFO - step: 19585 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 10:34:16,752 - root - INFO - lr: 2.8596e-05 gnorm: 1.07 [12:00:06<12:30:37] +[titan] 2025-10-05 10:34:27,618 - root - INFO - step: 19590 loss: 2.1741 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 10:34:27,619 - root - INFO - lr: 2.8588e-05 gnorm: 1.06 [12:00:17<12:30:26] +[titan] 2025-10-05 10:34:38,478 - root - INFO - step: 19595 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 10:34:38,478 - root - INFO - lr: 2.8579e-05 gnorm: 1.07 [12:00:28<12:30:15] +[titan] 2025-10-05 10:34:47,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:34:49,385 - root - INFO - step: 19600 loss: 2.1233 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:34:49,385 - root - INFO - lr: 2.8570e-05 gnorm: 1.04 [12:00:39<12:30:04] +[titan] 2025-10-05 10:35:00,251 - root - INFO - step: 19605 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9248 +[titan] 2025-10-05 10:35:00,251 - root - INFO - lr: 2.8561e-05 gnorm: 1.04 [12:00:50<12:29:53] +[titan] 2025-10-05 10:35:11,113 - root - INFO - step: 19610 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9104 +[titan] 2025-10-05 10:35:11,113 - root - INFO - lr: 2.8552e-05 gnorm: 1.09 [12:01:01<12:29:41] +[titan] 2025-10-05 10:35:21,983 - root - INFO - step: 19615 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 10:35:21,983 - root - INFO - lr: 2.8543e-05 gnorm: 1.05 [12:01:12<12:29:30] +[titan] 2025-10-05 10:35:32,879 - root - INFO - step: 19620 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 10:35:32,879 - root - INFO - lr: 2.8534e-05 gnorm: 1.11 [12:01:22<12:29:19] +[titan] 2025-10-05 10:35:43,764 - root - INFO - step: 19625 loss: 2.1033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:35:43,764 - root - INFO - lr: 2.8525e-05 gnorm: 1.05 [12:01:33<12:29:08] +[titan] 2025-10-05 10:35:54,636 - root - INFO - step: 19630 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 10:35:54,637 - root - INFO - lr: 2.8516e-05 gnorm: 1.10 [12:01:44<12:28:57] +[titan] 2025-10-05 10:36:05,532 - root - INFO - step: 19635 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 10:36:05,532 - root - INFO - lr: 2.8507e-05 gnorm: 1.02 [12:01:55<12:28:45] +[titan] 2025-10-05 10:36:16,411 - root - INFO - step: 19640 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 10:36:16,411 - root - INFO - lr: 2.8498e-05 gnorm: 1.05 [12:02:06<12:28:34] +[titan] 2025-10-05 10:36:27,270 - root - INFO - step: 19645 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 10:36:27,270 - root - INFO - lr: 2.8489e-05 gnorm: 1.06 [12:02:17<12:28:23] +[titan] 2025-10-05 10:36:35,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:36:38,157 - root - INFO - step: 19650 loss: 2.0890 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8470 +[titan] 2025-10-05 10:36:38,157 - root - INFO - lr: 2.8481e-05 gnorm: 1.01 [12:02:28<12:28:12] +[titan] 2025-10-05 10:36:49,018 - root - INFO - step: 19655 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 10:36:49,019 - root - INFO - lr: 2.8472e-05 gnorm: 1.07 [12:02:39<12:28:01] +[titan] 2025-10-05 10:36:59,878 - root - INFO - step: 19660 loss: 2.2289 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 10:36:59,879 - root - INFO - lr: 2.8463e-05 gnorm: 1.08 [12:02:49<12:27:49] +[titan] 2025-10-05 10:37:10,783 - root - INFO - step: 19665 loss: 2.1435 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8951 +[titan] 2025-10-05 10:37:10,783 - root - INFO - lr: 2.8454e-05 gnorm: 1.08 [12:03:00<12:27:38] +[titan] 2025-10-05 10:37:21,656 - root - INFO - step: 19670 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8722 +[titan] 2025-10-05 10:37:21,657 - root - INFO - lr: 2.8445e-05 gnorm: 1.08 [12:03:11<12:27:27] +[titan] 2025-10-05 10:37:32,556 - root - INFO - step: 19675 loss: 2.2272 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9633 +[titan] 2025-10-05 10:37:32,556 - root - INFO - lr: 2.8436e-05 gnorm: 1.12 [12:03:22<12:27:16] +[titan] 2025-10-05 10:37:43,429 - root - INFO - step: 19680 loss: 2.1453 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:37:43,429 - root - INFO - lr: 2.8427e-05 gnorm: 1.06 [12:03:33<12:27:05] +[titan] 2025-10-05 10:37:54,290 - root - INFO - step: 19685 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 10:37:54,290 - root - INFO - lr: 2.8418e-05 gnorm: 1.10 [12:03:44<12:26:54] +[titan] 2025-10-05 10:38:05,156 - root - INFO - step: 19690 loss: 2.1517 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:38:05,157 - root - INFO - lr: 2.8409e-05 gnorm: 1.07 [12:03:55<12:26:42] +[titan] 2025-10-05 10:38:16,025 - root - INFO - step: 19695 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 10:38:16,025 - root - INFO - lr: 2.8400e-05 gnorm: 1.11 [12:04:06<12:26:31] +[titan] 2025-10-05 10:38:24,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:38:26,916 - root - INFO - step: 19700 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:38:26,916 - root - INFO - lr: 2.8391e-05 gnorm: 1.06 [12:04:16<12:26:20] +[titan] 2025-10-05 10:38:37,813 - root - INFO - step: 19705 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 10:38:37,813 - root - INFO - lr: 2.8382e-05 gnorm: 1.04 [12:04:27<12:26:09] +[titan] 2025-10-05 10:38:48,686 - root - INFO - step: 19710 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8558 +[titan] 2025-10-05 10:38:48,686 - root - INFO - lr: 2.8374e-05 gnorm: 1.10 [12:04:38<12:25:58] +[titan] 2025-10-05 10:38:59,549 - root - INFO - step: 19715 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 10:38:59,549 - root - INFO - lr: 2.8365e-05 gnorm: 1.07 [12:04:49<12:25:46] +[titan] 2025-10-05 10:39:10,404 - root - INFO - step: 19720 loss: 2.2251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 10:39:10,404 - root - INFO - lr: 2.8356e-05 gnorm: 1.05 [12:05:00<12:25:35] +[titan] 2025-10-05 10:39:21,281 - root - INFO - step: 19725 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8864 +[titan] 2025-10-05 10:39:21,281 - root - INFO - lr: 2.8347e-05 gnorm: 1.06 [12:05:11<12:25:24] +[titan] 2025-10-05 10:39:32,209 - root - INFO - step: 19730 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9186 +[titan] 2025-10-05 10:39:32,209 - root - INFO - lr: 2.8338e-05 gnorm: 1.05 [12:05:22<12:25:13] +[titan] 2025-10-05 10:39:43,074 - root - INFO - step: 19735 loss: 2.1410 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8919 +[titan] 2025-10-05 10:39:43,074 - root - INFO - lr: 2.8329e-05 gnorm: 1.09 [12:05:33<12:25:02] +[titan] 2025-10-05 10:39:53,944 - root - INFO - step: 19740 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9376 +[titan] 2025-10-05 10:39:53,944 - root - INFO - lr: 2.8320e-05 gnorm: 1.05 [12:05:43<12:24:51] +[titan] 2025-10-05 10:40:04,859 - root - INFO - step: 19745 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:40:04,859 - root - INFO - lr: 2.8311e-05 gnorm: 1.04 [12:05:54<12:24:39] +[titan] 2025-10-05 10:40:13,560 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:40:15,748 - root - INFO - step: 19750 loss: 2.1520 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:40:15,748 - root - INFO - lr: 2.8302e-05 gnorm: 1.04 [12:06:05<12:24:28] +[titan] 2025-10-05 10:40:26,639 - root - INFO - step: 19755 loss: 2.1342 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8866 +[titan] 2025-10-05 10:40:26,639 - root - INFO - lr: 2.8293e-05 gnorm: 1.04 [12:06:16<12:24:17] +[titan] 2025-10-05 10:40:37,586 - root - INFO - step: 19760 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.28 mfu: 41.99% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:40:37,587 - root - INFO - lr: 2.8284e-05 gnorm: 1.06 [12:06:27<12:24:06] +[titan] 2025-10-05 10:40:48,488 - root - INFO - step: 19765 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8834 +[titan] 2025-10-05 10:40:48,488 - root - INFO - lr: 2.8275e-05 gnorm: 1.06 [12:06:38<12:23:55] +[titan] 2025-10-05 10:40:59,376 - root - INFO - step: 19770 loss: 2.2031 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 10:40:59,377 - root - INFO - lr: 2.8266e-05 gnorm: 1.07 [12:06:49<12:23:44] +[titan] 2025-10-05 10:41:10,261 - root - INFO - step: 19775 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 10:41:10,261 - root - INFO - lr: 2.8258e-05 gnorm: 1.05 [12:07:00<12:23:32] +[titan] 2025-10-05 10:41:21,161 - root - INFO - step: 19780 loss: 2.2202 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 10:41:21,161 - root - INFO - lr: 2.8249e-05 gnorm: 1.08 [12:07:11<12:23:21] +[titan] 2025-10-05 10:41:32,049 - root - INFO - step: 19785 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:41:32,049 - root - INFO - lr: 2.8240e-05 gnorm: 1.07 [12:07:22<12:23:10] +[titan] 2025-10-05 10:41:42,943 - root - INFO - step: 19790 loss: 2.0669 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 10:41:42,943 - root - INFO - lr: 2.8231e-05 gnorm: 1.04 [12:07:32<12:22:59] +[titan] 2025-10-05 10:41:53,847 - root - INFO - step: 19795 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 10:41:53,847 - root - INFO - lr: 2.8222e-05 gnorm: 1.09 [12:07:43<12:22:48] +[titan] 2025-10-05 10:42:02,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:42:04,726 - root - INFO - step: 19800 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9124 +[titan] 2025-10-05 10:42:04,726 - root - INFO - lr: 2.8213e-05 gnorm: 1.07 [12:07:54<12:22:37] +[titan] 2025-10-05 10:42:15,602 - root - INFO - step: 19805 loss: 2.1292 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8809 +[titan] 2025-10-05 10:42:15,602 - root - INFO - lr: 2.8204e-05 gnorm: 1.06 [12:08:05<12:22:25] +[titan] 2025-10-05 10:42:26,476 - root - INFO - step: 19810 loss: 2.1988 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:42:26,476 - root - INFO - lr: 2.8195e-05 gnorm: 1.07 [12:08:16<12:22:14] +[titan] 2025-10-05 10:42:37,355 - root - INFO - step: 19815 loss: 2.1111 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 10:42:37,355 - root - INFO - lr: 2.8186e-05 gnorm: 1.08 [12:08:27<12:22:03] +[titan] 2025-10-05 10:42:48,237 - root - INFO - step: 19820 loss: 2.1257 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8783 +[titan] 2025-10-05 10:42:48,237 - root - INFO - lr: 2.8177e-05 gnorm: 1.07 [12:08:38<12:21:52] +[titan] 2025-10-05 10:42:59,142 - root - INFO - step: 19825 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:42:59,142 - root - INFO - lr: 2.8168e-05 gnorm: 1.06 [12:08:49<12:21:41] +[titan] 2025-10-05 10:43:09,994 - root - INFO - step: 19830 loss: 2.1713 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9190 +[titan] 2025-10-05 10:43:09,994 - root - INFO - lr: 2.8159e-05 gnorm: 1.12 [12:09:00<12:21:29] +[titan] 2025-10-05 10:43:20,854 - root - INFO - step: 19835 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9251 +[titan] 2025-10-05 10:43:20,854 - root - INFO - lr: 2.8151e-05 gnorm: 1.09 [12:09:10<12:21:18] +[titan] 2025-10-05 10:43:31,720 - root - INFO - step: 19840 loss: 2.1270 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:43:31,720 - root - INFO - lr: 2.8142e-05 gnorm: 1.04 [12:09:21<12:21:07] +[titan] 2025-10-05 10:43:42,583 - root - INFO - step: 19845 loss: 2.1653 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9125 +[titan] 2025-10-05 10:43:42,583 - root - INFO - lr: 2.8133e-05 gnorm: 1.03 [12:09:32<12:20:56] +[titan] 2025-10-05 10:43:51,290 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:43:53,475 - root - INFO - step: 19850 loss: 2.1376 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 10:43:53,475 - root - INFO - lr: 2.8124e-05 gnorm: 1.05 [12:09:43<12:20:45] +[titan] 2025-10-05 10:44:04,341 - root - INFO - step: 19855 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 10:44:04,341 - root - INFO - lr: 2.8115e-05 gnorm: 1.09 [12:09:54<12:20:34] +[titan] 2025-10-05 10:44:15,250 - root - INFO - step: 19860 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9089 +[titan] 2025-10-05 10:44:15,250 - root - INFO - lr: 2.8106e-05 gnorm: 1.09 [12:10:05<12:20:22] +[titan] 2025-10-05 10:44:26,122 - root - INFO - step: 19865 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 10:44:26,122 - root - INFO - lr: 2.8097e-05 gnorm: 1.06 [12:10:16<12:20:11] +[titan] 2025-10-05 10:44:37,015 - root - INFO - step: 19870 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 10:44:37,015 - root - INFO - lr: 2.8088e-05 gnorm: 1.07 [12:10:27<12:20:00] +[titan] 2025-10-05 10:44:47,890 - root - INFO - step: 19875 loss: 2.1479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8987 +[titan] 2025-10-05 10:44:47,890 - root - INFO - lr: 2.8079e-05 gnorm: 1.11 [12:10:37<12:19:49] +[titan] 2025-10-05 10:44:58,757 - root - INFO - step: 19880 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 10:44:58,757 - root - INFO - lr: 2.8070e-05 gnorm: 1.06 [12:10:48<12:19:38] +[titan] 2025-10-05 10:45:09,633 - root - INFO - step: 19885 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:45:09,633 - root - INFO - lr: 2.8061e-05 gnorm: 1.08 [12:10:59<12:19:26] +[titan] 2025-10-05 10:45:20,533 - root - INFO - step: 19890 loss: 2.1170 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:45:20,533 - root - INFO - lr: 2.8052e-05 gnorm: 1.07 [12:11:10<12:19:15] +[titan] 2025-10-05 10:45:31,359 - root - INFO - step: 19895 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:45:31,360 - root - INFO - lr: 2.8043e-05 gnorm: 1.10 [12:11:21<12:19:04] +[titan] 2025-10-05 10:45:40,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:45:42,231 - root - INFO - step: 19900 loss: 2.1514 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 10:45:42,231 - root - INFO - lr: 2.8035e-05 gnorm: 1.08 [12:11:32<12:18:53] +[titan] 2025-10-05 10:45:53,088 - root - INFO - step: 19905 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 10:45:53,088 - root - INFO - lr: 2.8026e-05 gnorm: 1.03 [12:11:43<12:18:42] +[titan] 2025-10-05 10:46:03,910 - root - INFO - step: 19910 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 10:46:03,910 - root - INFO - lr: 2.8017e-05 gnorm: 1.08 [12:11:53<12:18:30] +[titan] 2025-10-05 10:46:14,770 - root - INFO - step: 19915 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 10:46:14,771 - root - INFO - lr: 2.8008e-05 gnorm: 1.08 [12:12:04<12:18:19] +[titan] 2025-10-05 10:46:25,652 - root - INFO - step: 19920 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:46:25,652 - root - INFO - lr: 2.7999e-05 gnorm: 1.09 [12:12:15<12:18:08] +[titan] 2025-10-05 10:46:36,496 - root - INFO - step: 19925 loss: 2.2094 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 10:46:36,497 - root - INFO - lr: 2.7990e-05 gnorm: 1.06 [12:12:26<12:17:57] +[titan] 2025-10-05 10:46:47,345 - root - INFO - step: 19930 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 10:46:47,345 - root - INFO - lr: 2.7981e-05 gnorm: 1.11 [12:12:37<12:17:46] +[titan] 2025-10-05 10:46:58,221 - root - INFO - step: 19935 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 10:46:58,221 - root - INFO - lr: 2.7972e-05 gnorm: 1.05 [12:12:48<12:17:34] +[titan] 2025-10-05 10:47:09,102 - root - INFO - step: 19940 loss: 2.1225 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 10:47:09,102 - root - INFO - lr: 2.7963e-05 gnorm: 1.05 [12:12:59<12:17:23] +[titan] 2025-10-05 10:47:19,968 - root - INFO - step: 19945 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8581 +[titan] 2025-10-05 10:47:19,968 - root - INFO - lr: 2.7954e-05 gnorm: 1.09 [12:13:09<12:17:12] +[titan] 2025-10-05 10:47:28,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:47:30,850 - root - INFO - step: 19950 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8703 +[titan] 2025-10-05 10:47:30,850 - root - INFO - lr: 2.7945e-05 gnorm: 1.07 [12:13:20<12:17:01] +[titan] 2025-10-05 10:47:41,822 - root - INFO - step: 19955 loss: 2.1253 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8777 +[titan] 2025-10-05 10:47:41,822 - root - INFO - lr: 2.7936e-05 gnorm: 1.09 [12:13:31<12:16:50] +[titan] 2025-10-05 10:47:52,686 - root - INFO - step: 19960 loss: 2.1316 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:47:52,686 - root - INFO - lr: 2.7927e-05 gnorm: 1.11 [12:13:42<12:16:39] +[titan] 2025-10-05 10:48:03,639 - root - INFO - step: 19965 loss: 2.1229 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8756 +[titan] 2025-10-05 10:48:03,639 - root - INFO - lr: 2.7919e-05 gnorm: 1.08 [12:13:53<12:16:28] +[titan] 2025-10-05 10:48:10,346 - root - INFO - Dumping profiler traces at step 19968 +[titan] 2025-10-05 10:48:10,385 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:48:14,746 - root - INFO - step: 19970 loss: 2.1632 memory: 118.84GiB(85.28%) tps: 29,504 tflops: 409.32 mfu: 41.39% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9112 +[titan] 2025-10-05 10:48:14,746 - root - INFO - lr: 2.7910e-05 gnorm: 1.01 [12:14:04<12:16:17] +[titan] 2025-10-05 10:48:25,610 - root - INFO - step: 19975 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 10:48:25,610 - root - INFO - lr: 2.7901e-05 gnorm: 1.06 [12:14:15<12:16:05] +[titan] 2025-10-05 10:48:36,506 - root - INFO - step: 19980 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 10:48:36,507 - root - INFO - lr: 2.7892e-05 gnorm: 1.07 [12:14:26<12:15:54] +[titan] 2025-10-05 10:48:47,491 - root - INFO - step: 19985 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,831 tflops: 413.86 mfu: 41.85% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:48:47,491 - root - INFO - lr: 2.7883e-05 gnorm: 1.06 [12:14:37<12:15:43] +[titan] 2025-10-05 10:48:58,374 - root - INFO - step: 19990 loss: 2.1671 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:48:58,374 - root - INFO - lr: 2.7874e-05 gnorm: 1.08 [12:14:48<12:15:32] +[titan] 2025-10-05 10:49:09,251 - root - INFO - step: 19995 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:49:09,251 - root - INFO - lr: 2.7865e-05 gnorm: 1.06 [12:14:59<12:15:21] +[titan] 2025-10-05 10:49:17,928 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:49:20,117 - root - INFO - step: 20000 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8613 +[titan] 2025-10-05 10:49:20,118 - root - INFO - lr: 2.7856e-05 gnorm: 1.09 [12:15:10<12:15:10] +[titan] 2025-10-05 10:49:20,118 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 10:49:39,392 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 10:49:39,392 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.27 seconds. +[titan] 2025-10-05 10:51:35,525 - root - INFO - step: 20005 loss: 2.1785 memory: 118.84GiB(85.28%) tps: 2,420 tflops: 33.57 mfu: 3.39% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 10:51:35,525 - root - INFO - lr: 2.7847e-05 gnorm: 1.02 [12:17:25<12:17:03] +[titan] 2025-10-05 10:51:46,302 - root - INFO - step: 20010 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9317 +[titan] 2025-10-05 10:51:46,302 - root - INFO - lr: 2.7838e-05 gnorm: 1.08 [12:17:36<12:16:52] +[titan] 2025-10-05 10:51:57,112 - root - INFO - step: 20015 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 10:51:57,113 - root - INFO - lr: 2.7829e-05 gnorm: 1.07 [12:17:47<12:16:40] +[titan] 2025-10-05 10:52:07,924 - root - INFO - step: 20020 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.47 mfu: 42.51% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8728 +[titan] 2025-10-05 10:52:07,925 - root - INFO - lr: 2.7820e-05 gnorm: 1.05 [12:17:57<12:16:29] +[titan] 2025-10-05 10:52:18,739 - root - INFO - step: 20025 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 10:52:18,739 - root - INFO - lr: 2.7811e-05 gnorm: 1.08 [12:18:08<12:16:18] +[titan] 2025-10-05 10:52:29,561 - root - INFO - step: 20030 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 10:52:29,561 - root - INFO - lr: 2.7803e-05 gnorm: 1.05 [12:18:19<12:16:06] +[titan] 2025-10-05 10:52:40,397 - root - INFO - step: 20035 loss: 2.1681 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:52:40,397 - root - INFO - lr: 2.7794e-05 gnorm: 1.09 [12:18:30<12:15:55] +[titan] 2025-10-05 10:52:51,270 - root - INFO - step: 20040 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:52:51,270 - root - INFO - lr: 2.7785e-05 gnorm: 1.08 [12:18:41<12:15:44] +[titan] 2025-10-05 10:53:02,099 - root - INFO - step: 20045 loss: 2.1535 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 10:53:02,099 - root - INFO - lr: 2.7776e-05 gnorm: 1.06 [12:18:52<12:15:33] +[titan] 2025-10-05 10:53:10,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:53:12,953 - root - INFO - step: 20050 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:53:12,953 - root - INFO - lr: 2.7767e-05 gnorm: 1.06 [12:19:02<12:15:21] +[titan] 2025-10-05 10:53:23,781 - root - INFO - step: 20055 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8880 +[titan] 2025-10-05 10:53:23,781 - root - INFO - lr: 2.7758e-05 gnorm: 1.07 [12:19:13<12:15:10] +[titan] 2025-10-05 10:53:34,615 - root - INFO - step: 20060 loss: 2.2260 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 10:53:34,615 - root - INFO - lr: 2.7749e-05 gnorm: 1.08 [12:19:24<12:14:59] +[titan] 2025-10-05 10:53:45,482 - root - INFO - step: 20065 loss: 2.1538 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9039 +[titan] 2025-10-05 10:53:45,482 - root - INFO - lr: 2.7740e-05 gnorm: 1.07 [12:19:35<12:14:47] +[titan] 2025-10-05 10:53:56,339 - root - INFO - step: 20070 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 10:53:56,339 - root - INFO - lr: 2.7731e-05 gnorm: 1.04 [12:19:46<12:14:36] +[titan] 2025-10-05 10:54:07,188 - root - INFO - step: 20075 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 10:54:07,188 - root - INFO - lr: 2.7722e-05 gnorm: 1.06 [12:19:57<12:14:25] +[titan] 2025-10-05 10:54:18,059 - root - INFO - step: 20080 loss: 2.1485 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:54:18,059 - root - INFO - lr: 2.7713e-05 gnorm: 1.06 [12:20:08<12:14:14] +[titan] 2025-10-05 10:54:28,894 - root - INFO - step: 20085 loss: 2.2267 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9673 +[titan] 2025-10-05 10:54:28,894 - root - INFO - lr: 2.7704e-05 gnorm: 1.85 [12:20:18<12:14:02] +[titan] 2025-10-05 10:54:39,760 - root - INFO - step: 20090 loss: 2.1383 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 10:54:39,760 - root - INFO - lr: 2.7695e-05 gnorm: 1.09 [12:20:29<12:13:51] +[titan] 2025-10-05 10:54:50,700 - root - INFO - step: 20095 loss: 2.1379 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8897 +[titan] 2025-10-05 10:54:50,700 - root - INFO - lr: 2.7687e-05 gnorm: 1.04 [12:20:40<12:13:40] +[titan] 2025-10-05 10:54:59,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:55:01,599 - root - INFO - step: 20100 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:55:01,599 - root - INFO - lr: 2.7678e-05 gnorm: 1.11 [12:20:51<12:13:29] +[titan] 2025-10-05 10:55:12,449 - root - INFO - step: 20105 loss: 2.1710 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 10:55:12,449 - root - INFO - lr: 2.7669e-05 gnorm: 1.03 [12:21:02<12:13:18] +[titan] 2025-10-05 10:55:23,313 - root - INFO - step: 20110 loss: 2.0931 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 10:55:23,313 - root - INFO - lr: 2.7660e-05 gnorm: 1.04 [12:21:13<12:13:06] +[titan] 2025-10-05 10:55:34,176 - root - INFO - step: 20115 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 10:55:34,176 - root - INFO - lr: 2.7651e-05 gnorm: 1.05 [12:21:24<12:12:55] +[titan] 2025-10-05 10:55:45,039 - root - INFO - step: 20120 loss: 2.1203 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 10:55:45,039 - root - INFO - lr: 2.7642e-05 gnorm: 1.06 [12:21:35<12:12:44] +[titan] 2025-10-05 10:55:55,943 - root - INFO - step: 20125 loss: 2.1150 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8697 +[titan] 2025-10-05 10:55:55,943 - root - INFO - lr: 2.7633e-05 gnorm: 1.05 [12:21:45<12:12:33] +[titan] 2025-10-05 10:56:06,800 - root - INFO - step: 20130 loss: 2.1880 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 10:56:06,800 - root - INFO - lr: 2.7624e-05 gnorm: 1.08 [12:21:56<12:12:21] +[titan] 2025-10-05 10:56:17,695 - root - INFO - step: 20135 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8794 +[titan] 2025-10-05 10:56:17,696 - root - INFO - lr: 2.7615e-05 gnorm: 1.08 [12:22:07<12:12:10] +[titan] 2025-10-05 10:56:28,544 - root - INFO - step: 20140 loss: 2.1589 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9087 +[titan] 2025-10-05 10:56:28,544 - root - INFO - lr: 2.7606e-05 gnorm: 1.04 [12:22:18<12:11:59] +[titan] 2025-10-05 10:56:39,421 - root - INFO - step: 20145 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8560 +[titan] 2025-10-05 10:56:39,422 - root - INFO - lr: 2.7597e-05 gnorm: 1.08 [12:22:29<12:11:48] +[titan] 2025-10-05 10:56:48,102 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:56:50,277 - root - INFO - step: 20150 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:56:50,277 - root - INFO - lr: 2.7588e-05 gnorm: 1.05 [12:22:40<12:11:36] +[titan] 2025-10-05 10:57:01,154 - root - INFO - step: 20155 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 10:57:01,155 - root - INFO - lr: 2.7579e-05 gnorm: 1.09 [12:22:51<12:11:25] +[titan] 2025-10-05 10:57:12,015 - root - INFO - step: 20160 loss: 2.1842 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 10:57:12,015 - root - INFO - lr: 2.7571e-05 gnorm: 1.05 [12:23:02<12:11:14] +[titan] 2025-10-05 10:57:22,907 - root - INFO - step: 20165 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 10:57:22,907 - root - INFO - lr: 2.7562e-05 gnorm: 1.05 [12:23:12<12:11:03] +[titan] 2025-10-05 10:57:33,769 - root - INFO - step: 20170 loss: 2.1734 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9209 +[titan] 2025-10-05 10:57:33,769 - root - INFO - lr: 2.7553e-05 gnorm: 1.10 [12:23:23<12:10:51] +[titan] 2025-10-05 10:57:44,629 - root - INFO - step: 20175 loss: 2.1616 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:57:44,629 - root - INFO - lr: 2.7544e-05 gnorm: 1.10 [12:23:34<12:10:40] +[titan] 2025-10-05 10:57:55,575 - root - INFO - step: 20180 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 10:57:55,575 - root - INFO - lr: 2.7535e-05 gnorm: 1.09 [12:23:45<12:10:29] +[titan] 2025-10-05 10:58:06,449 - root - INFO - step: 20185 loss: 2.0747 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 10:58:06,449 - root - INFO - lr: 2.7526e-05 gnorm: 1.09 [12:23:56<12:10:18] +[titan] 2025-10-05 10:58:17,339 - root - INFO - step: 20190 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 10:58:17,339 - root - INFO - lr: 2.7517e-05 gnorm: 1.11 [12:24:07<12:10:07] +[titan] 2025-10-05 10:58:28,224 - root - INFO - step: 20195 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 10:58:28,224 - root - INFO - lr: 2.7508e-05 gnorm: 1.09 [12:24:18<12:09:55] +[titan] 2025-10-05 10:58:36,913 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:58:39,105 - root - INFO - step: 20200 loss: 2.1272 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 10:58:39,105 - root - INFO - lr: 2.7499e-05 gnorm: 1.10 [12:24:29<12:09:44] +[titan] 2025-10-05 10:58:49,983 - root - INFO - step: 20205 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9320 +[titan] 2025-10-05 10:58:49,983 - root - INFO - lr: 2.7490e-05 gnorm: 1.10 [12:24:39<12:09:33] +[titan] 2025-10-05 10:59:00,935 - root - INFO - step: 20210 loss: 2.0945 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 10:59:00,935 - root - INFO - lr: 2.7481e-05 gnorm: 1.07 [12:24:50<12:09:22] +[titan] 2025-10-05 10:59:11,794 - root - INFO - step: 20215 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:59:11,794 - root - INFO - lr: 2.7472e-05 gnorm: 1.08 [12:25:01<12:09:10] +[titan] 2025-10-05 10:59:22,679 - root - INFO - step: 20220 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9899 +[titan] 2025-10-05 10:59:22,679 - root - INFO - lr: 2.7463e-05 gnorm: 1.09 [12:25:12<12:08:59] +[titan] 2025-10-05 10:59:33,536 - root - INFO - step: 20225 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 10:59:33,536 - root - INFO - lr: 2.7454e-05 gnorm: 1.10 [12:25:23<12:08:48] +[titan] 2025-10-05 10:59:44,381 - root - INFO - step: 20230 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 10:59:44,381 - root - INFO - lr: 2.7446e-05 gnorm: 1.07 [12:25:34<12:08:37] +[titan] 2025-10-05 10:59:55,274 - root - INFO - step: 20235 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8518 +[titan] 2025-10-05 10:59:55,275 - root - INFO - lr: 2.7437e-05 gnorm: 1.10 [12:25:45<12:08:25] +[titan] 2025-10-05 11:00:06,163 - root - INFO - step: 20240 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8998 +[titan] 2025-10-05 11:00:06,164 - root - INFO - lr: 2.7428e-05 gnorm: 1.09 [12:25:56<12:08:14] +[titan] 2025-10-05 11:00:17,039 - root - INFO - step: 20245 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 11:00:17,040 - root - INFO - lr: 2.7419e-05 gnorm: 1.06 [12:26:07<12:08:03] +[titan] 2025-10-05 11:00:25,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:00:27,899 - root - INFO - step: 20250 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 11:00:27,900 - root - INFO - lr: 2.7410e-05 gnorm: 1.06 [12:26:17<12:07:52] +[titan] 2025-10-05 11:00:38,739 - root - INFO - step: 20255 loss: 2.1856 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 11:00:38,739 - root - INFO - lr: 2.7401e-05 gnorm: 1.07 [12:26:28<12:07:40] +[titan] 2025-10-05 11:00:49,595 - root - INFO - step: 20260 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 11:00:49,595 - root - INFO - lr: 2.7392e-05 gnorm: 1.05 [12:26:39<12:07:29] +[titan] 2025-10-05 11:01:00,505 - root - INFO - step: 20265 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 11:01:00,505 - root - INFO - lr: 2.7383e-05 gnorm: 1.05 [12:26:50<12:07:18] +[titan] 2025-10-05 11:01:11,382 - root - INFO - step: 20270 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8446 +[titan] 2025-10-05 11:01:11,382 - root - INFO - lr: 2.7374e-05 gnorm: 1.08 [12:27:01<12:07:07] +[titan] 2025-10-05 11:01:22,284 - root - INFO - step: 20275 loss: 2.1344 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:01:22,284 - root - INFO - lr: 2.7365e-05 gnorm: 1.10 [12:27:12<12:06:56] +[titan] 2025-10-05 11:01:33,138 - root - INFO - step: 20280 loss: 2.1211 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:01:33,138 - root - INFO - lr: 2.7356e-05 gnorm: 1.03 [12:27:23<12:06:44] +[titan] 2025-10-05 11:01:44,002 - root - INFO - step: 20285 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:01:44,002 - root - INFO - lr: 2.7347e-05 gnorm: 1.05 [12:27:33<12:06:33] +[titan] 2025-10-05 11:01:54,890 - root - INFO - step: 20290 loss: 2.1434 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 11:01:54,890 - root - INFO - lr: 2.7338e-05 gnorm: 1.08 [12:27:44<12:06:22] +[titan] 2025-10-05 11:02:06,133 - root - INFO - step: 20295 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 29,147 tflops: 404.38 mfu: 40.89% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 11:02:06,133 - root - INFO - lr: 2.7330e-05 gnorm: 1.06 [12:27:56<12:06:11] +[titan] 2025-10-05 11:02:14,822 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:02:17,010 - root - INFO - step: 20300 loss: 2.1482 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 11:02:17,010 - root - INFO - lr: 2.7321e-05 gnorm: 1.33 [12:28:06<12:06:00] +[titan] 2025-10-05 11:02:27,926 - root - INFO - step: 20305 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 11:02:27,927 - root - INFO - lr: 2.7312e-05 gnorm: 1.05 [12:28:17<12:05:49] +[titan] 2025-10-05 11:02:38,794 - root - INFO - step: 20310 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8580 +[titan] 2025-10-05 11:02:38,794 - root - INFO - lr: 2.7303e-05 gnorm: 1.02 [12:28:28<12:05:37] +[titan] 2025-10-05 11:02:49,655 - root - INFO - step: 20315 loss: 2.1038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:02:49,655 - root - INFO - lr: 2.7294e-05 gnorm: 1.06 [12:28:39<12:05:26] +[titan] 2025-10-05 11:03:00,551 - root - INFO - step: 20320 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 11:03:00,551 - root - INFO - lr: 2.7285e-05 gnorm: 1.07 [12:28:50<12:05:15] +[titan] 2025-10-05 11:03:11,416 - root - INFO - step: 20325 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9040 +[titan] 2025-10-05 11:03:11,417 - root - INFO - lr: 2.7276e-05 gnorm: 1.04 [12:29:01<12:05:04] +[titan] 2025-10-05 11:03:22,259 - root - INFO - step: 20330 loss: 2.1001 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8555 +[titan] 2025-10-05 11:03:22,259 - root - INFO - lr: 2.7267e-05 gnorm: 1.07 [12:29:12<12:04:52] +[titan] 2025-10-05 11:03:33,113 - root - INFO - step: 20335 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8112 +[titan] 2025-10-05 11:03:33,113 - root - INFO - lr: 2.7258e-05 gnorm: 1.06 [12:29:23<12:04:41] +[titan] 2025-10-05 11:03:44,014 - root - INFO - step: 20340 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 11:03:44,014 - root - INFO - lr: 2.7249e-05 gnorm: 1.02 [12:29:33<12:04:30] +[titan] 2025-10-05 11:03:54,889 - root - INFO - step: 20345 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9095 +[titan] 2025-10-05 11:03:54,889 - root - INFO - lr: 2.7240e-05 gnorm: 1.05 [12:29:44<12:04:19] +[titan] 2025-10-05 11:04:03,595 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:04:05,779 - root - INFO - step: 20350 loss: 2.1910 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9371 +[titan] 2025-10-05 11:04:05,779 - root - INFO - lr: 2.7231e-05 gnorm: 1.07 [12:29:55<12:04:07] +[titan] 2025-10-05 11:04:16,637 - root - INFO - step: 20355 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 11:04:16,638 - root - INFO - lr: 2.7222e-05 gnorm: 1.05 [12:30:06<12:03:56] +[titan] 2025-10-05 11:04:27,458 - root - INFO - step: 20360 loss: 2.1358 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8882 +[titan] 2025-10-05 11:04:27,458 - root - INFO - lr: 2.7214e-05 gnorm: 1.06 [12:30:17<12:03:45] +[titan] 2025-10-05 11:04:38,299 - root - INFO - step: 20365 loss: 2.1403 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 11:04:38,299 - root - INFO - lr: 2.7205e-05 gnorm: 1.10 [12:30:28<12:03:34] +[titan] 2025-10-05 11:04:49,208 - root - INFO - step: 20370 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 11:04:49,208 - root - INFO - lr: 2.7196e-05 gnorm: 1.09 [12:30:39<12:03:22] +[titan] 2025-10-05 11:05:00,089 - root - INFO - step: 20375 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:05:00,089 - root - INFO - lr: 2.7187e-05 gnorm: 1.06 [12:30:50<12:03:11] +[titan] 2025-10-05 11:05:10,946 - root - INFO - step: 20380 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:05:10,946 - root - INFO - lr: 2.7178e-05 gnorm: 1.11 [12:31:00<12:03:00] +[titan] 2025-10-05 11:05:21,800 - root - INFO - step: 20385 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:05:21,800 - root - INFO - lr: 2.7169e-05 gnorm: 1.08 [12:31:11<12:02:49] +[titan] 2025-10-05 11:05:32,664 - root - INFO - step: 20390 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 11:05:32,664 - root - INFO - lr: 2.7160e-05 gnorm: 1.05 [12:31:22<12:02:38] +[titan] 2025-10-05 11:05:43,530 - root - INFO - step: 20395 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 11:05:43,531 - root - INFO - lr: 2.7151e-05 gnorm: 1.10 [12:31:33<12:02:26] +[titan] 2025-10-05 11:05:52,200 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:05:54,413 - root - INFO - step: 20400 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 11:05:54,413 - root - INFO - lr: 2.7142e-05 gnorm: 1.05 [12:31:44<12:02:15] +[titan] 2025-10-05 11:06:05,284 - root - INFO - step: 20405 loss: 2.1600 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 11:06:05,284 - root - INFO - lr: 2.7133e-05 gnorm: 1.08 [12:31:55<12:02:04] +[titan] 2025-10-05 11:06:16,130 - root - INFO - step: 20410 loss: 2.1684 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 11:06:16,130 - root - INFO - lr: 2.7124e-05 gnorm: 1.07 [12:32:06<12:01:53] +[titan] 2025-10-05 11:06:26,974 - root - INFO - step: 20415 loss: 2.1914 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:06:26,975 - root - INFO - lr: 2.7115e-05 gnorm: 1.09 [12:32:16<12:01:41] +[titan] 2025-10-05 11:06:37,832 - root - INFO - step: 20420 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 11:06:37,832 - root - INFO - lr: 2.7106e-05 gnorm: 1.09 [12:32:27<12:01:30] +[titan] 2025-10-05 11:06:48,689 - root - INFO - step: 20425 loss: 2.1157 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 11:06:48,689 - root - INFO - lr: 2.7098e-05 gnorm: 1.08 [12:32:38<12:01:19] +[titan] 2025-10-05 11:06:59,539 - root - INFO - step: 20430 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 11:06:59,539 - root - INFO - lr: 2.7089e-05 gnorm: 1.05 [12:32:49<12:01:08] +[titan] 2025-10-05 11:07:10,461 - root - INFO - step: 20435 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 11:07:10,461 - root - INFO - lr: 2.7080e-05 gnorm: 1.06 [12:33:00<12:00:56] +[titan] 2025-10-05 11:07:21,318 - root - INFO - step: 20440 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:07:21,318 - root - INFO - lr: 2.7071e-05 gnorm: 1.07 [12:33:11<12:00:45] +[titan] 2025-10-05 11:07:32,168 - root - INFO - step: 20445 loss: 2.0912 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:07:32,168 - root - INFO - lr: 2.7062e-05 gnorm: 1.09 [12:33:22<12:00:34] +[titan] 2025-10-05 11:07:40,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:07:43,023 - root - INFO - step: 20450 loss: 2.1251 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 11:07:43,023 - root - INFO - lr: 2.7053e-05 gnorm: 1.07 [12:33:32<12:00:23] +[titan] 2025-10-05 11:07:53,871 - root - INFO - step: 20455 loss: 2.1649 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 11:07:53,871 - root - INFO - lr: 2.7044e-05 gnorm: 1.07 [12:33:43<12:00:11] +[titan] 2025-10-05 11:08:04,763 - root - INFO - step: 20460 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 11:08:04,763 - root - INFO - lr: 2.7035e-05 gnorm: 1.03 [12:33:54<12:00:00] +[titan] 2025-10-05 11:08:15,662 - root - INFO - step: 20465 loss: 2.1274 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 11:08:15,662 - root - INFO - lr: 2.7026e-05 gnorm: 1.03 [12:34:05<11:59:49] +[titan] 2025-10-05 11:08:26,490 - root - INFO - step: 20470 loss: 2.1025 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8578 +[titan] 2025-10-05 11:08:26,490 - root - INFO - lr: 2.7017e-05 gnorm: 1.06 [12:34:16<11:59:38] +[titan] 2025-10-05 11:08:37,320 - root - INFO - step: 20475 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 11:08:37,321 - root - INFO - lr: 2.7008e-05 gnorm: 1.11 [12:34:27<11:59:26] +[titan] 2025-10-05 11:08:48,242 - root - INFO - step: 20480 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:08:48,242 - root - INFO - lr: 2.6999e-05 gnorm: 1.04 [12:34:38<11:59:15] +[titan] 2025-10-05 11:08:48,420 - root - INFO - Dumping profiler traces at step 20480 +[titan] 2025-10-05 11:08:48,458 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:08:59,308 - root - INFO - step: 20485 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 29,611 tflops: 410.81 mfu: 41.54% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 11:08:59,308 - root - INFO - lr: 2.6990e-05 gnorm: 1.06 [12:34:49<11:59:04] +[titan] 2025-10-05 11:09:10,168 - root - INFO - step: 20490 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.8976 +[titan] 2025-10-05 11:09:10,168 - root - INFO - lr: 2.6982e-05 gnorm: 1.06 [12:35:00<11:58:53] +[titan] 2025-10-05 11:09:21,026 - root - INFO - step: 20495 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9159 +[titan] 2025-10-05 11:09:21,027 - root - INFO - lr: 2.6973e-05 gnorm: 1.10 [12:35:10<11:58:42] +[titan] 2025-10-05 11:09:29,736 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:09:31,923 - root - INFO - step: 20500 loss: 2.0830 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 11:09:31,923 - root - INFO - lr: 2.6964e-05 gnorm: 1.09 [12:35:21<11:58:31] +[titan] 2025-10-05 11:09:42,776 - root - INFO - step: 20505 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8339 +[titan] 2025-10-05 11:09:42,776 - root - INFO - lr: 2.6955e-05 gnorm: 1.10 [12:35:32<11:58:19] +[titan] 2025-10-05 11:09:53,605 - root - INFO - step: 20510 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8968 +[titan] 2025-10-05 11:09:53,605 - root - INFO - lr: 2.6946e-05 gnorm: 1.06 [12:35:43<11:58:08] +[titan] 2025-10-05 11:10:04,473 - root - INFO - step: 20515 loss: 2.1247 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8771 +[titan] 2025-10-05 11:10:04,473 - root - INFO - lr: 2.6937e-05 gnorm: 1.06 [12:35:54<11:57:57] +[titan] 2025-10-05 11:10:15,308 - root - INFO - step: 20520 loss: 2.1987 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.58 mfu: 42.43% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9434 +[titan] 2025-10-05 11:10:15,308 - root - INFO - lr: 2.6928e-05 gnorm: 1.06 [12:36:05<11:57:46] +[titan] 2025-10-05 11:10:26,169 - root - INFO - step: 20525 loss: 2.1470 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8974 +[titan] 2025-10-05 11:10:26,170 - root - INFO - lr: 2.6919e-05 gnorm: 1.04 [12:36:16<11:57:34] +[titan] 2025-10-05 11:10:37,027 - root - INFO - step: 20530 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8850 +[titan] 2025-10-05 11:10:37,027 - root - INFO - lr: 2.6910e-05 gnorm: 1.13 [12:36:26<11:57:23] +[titan] 2025-10-05 11:10:47,875 - root - INFO - step: 20535 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 11:10:47,875 - root - INFO - lr: 2.6901e-05 gnorm: 1.03 [12:36:37<11:57:12] +[titan] 2025-10-05 11:10:58,732 - root - INFO - step: 20540 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:10:58,733 - root - INFO - lr: 2.6892e-05 gnorm: 1.06 [12:36:48<11:57:01] +[titan] 2025-10-05 11:11:09,619 - root - INFO - step: 20545 loss: 2.1707 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:11:09,619 - root - INFO - lr: 2.6883e-05 gnorm: 1.10 [12:36:59<11:56:49] +[titan] 2025-10-05 11:11:18,306 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:11:20,486 - root - INFO - step: 20550 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 11:11:20,487 - root - INFO - lr: 2.6874e-05 gnorm: 2.06 [12:37:10<11:56:38] +[titan] 2025-10-05 11:11:31,328 - root - INFO - step: 20555 loss: 2.2027 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 11:11:31,329 - root - INFO - lr: 2.6866e-05 gnorm: 1.09 [12:37:21<11:56:27] +[titan] 2025-10-05 11:11:42,212 - root - INFO - step: 20560 loss: 2.0837 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 11:11:42,212 - root - INFO - lr: 2.6857e-05 gnorm: 1.05 [12:37:32<11:56:16] +[titan] 2025-10-05 11:11:53,051 - root - INFO - step: 20565 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 11:11:53,051 - root - INFO - lr: 2.6848e-05 gnorm: 1.08 [12:37:43<11:56:04] +[titan] 2025-10-05 11:12:03,886 - root - INFO - step: 20570 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 11:12:03,887 - root - INFO - lr: 2.6839e-05 gnorm: 1.14 [12:37:53<11:55:53] +[titan] 2025-10-05 11:12:14,773 - root - INFO - step: 20575 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 11:12:14,773 - root - INFO - lr: 2.6830e-05 gnorm: 1.09 [12:38:04<11:55:42] +[titan] 2025-10-05 11:12:25,620 - root - INFO - step: 20580 loss: 2.0736 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8329 +[titan] 2025-10-05 11:12:25,620 - root - INFO - lr: 2.6821e-05 gnorm: 1.09 [12:38:15<11:55:31] +[titan] 2025-10-05 11:12:36,467 - root - INFO - step: 20585 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 11:12:36,467 - root - INFO - lr: 2.6812e-05 gnorm: 1.05 [12:38:26<11:55:19] +[titan] 2025-10-05 11:12:47,318 - root - INFO - step: 20590 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 11:12:47,318 - root - INFO - lr: 2.6803e-05 gnorm: 1.07 [12:38:37<11:55:08] +[titan] 2025-10-05 11:12:58,203 - root - INFO - step: 20595 loss: 2.1151 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8687 +[titan] 2025-10-05 11:12:58,203 - root - INFO - lr: 2.6794e-05 gnorm: 1.07 [12:38:48<11:54:57] +[titan] 2025-10-05 11:13:06,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:13:09,064 - root - INFO - step: 20600 loss: 2.1894 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 11:13:09,064 - root - INFO - lr: 2.6785e-05 gnorm: 1.09 [12:38:59<11:54:46] +[titan] 2025-10-05 11:13:19,929 - root - INFO - step: 20605 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 11:13:19,929 - root - INFO - lr: 2.6776e-05 gnorm: 1.07 [12:39:09<11:54:35] +[titan] 2025-10-05 11:13:30,796 - root - INFO - step: 20610 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 11:13:30,796 - root - INFO - lr: 2.6767e-05 gnorm: 1.06 [12:39:20<11:54:23] +[titan] 2025-10-05 11:13:41,654 - root - INFO - step: 20615 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8398 +[titan] 2025-10-05 11:13:41,654 - root - INFO - lr: 2.6758e-05 gnorm: 1.03 [12:39:31<11:54:12] +[titan] 2025-10-05 11:13:52,508 - root - INFO - step: 20620 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 11:13:52,508 - root - INFO - lr: 2.6750e-05 gnorm: 1.06 [12:39:42<11:54:01] +[titan] 2025-10-05 11:14:03,381 - root - INFO - step: 20625 loss: 2.1197 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 11:14:03,381 - root - INFO - lr: 2.6741e-05 gnorm: 1.06 [12:39:53<11:53:50] +[titan] 2025-10-05 11:14:14,251 - root - INFO - step: 20630 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:14:14,251 - root - INFO - lr: 2.6732e-05 gnorm: 1.06 [12:40:04<11:53:38] +[titan] 2025-10-05 11:14:25,097 - root - INFO - step: 20635 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:14:25,098 - root - INFO - lr: 2.6723e-05 gnorm: 1.08 [12:40:15<11:53:27] +[titan] 2025-10-05 11:14:35,947 - root - INFO - step: 20640 loss: 2.0980 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8548 +[titan] 2025-10-05 11:14:35,947 - root - INFO - lr: 2.6714e-05 gnorm: 1.09 [12:40:25<11:53:16] +[titan] 2025-10-05 11:14:46,798 - root - INFO - step: 20645 loss: 2.1242 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8770 +[titan] 2025-10-05 11:14:46,799 - root - INFO - lr: 2.6705e-05 gnorm: 1.09 [12:40:36<11:53:05] +[titan] 2025-10-05 11:14:55,473 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:14:57,653 - root - INFO - step: 20650 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:14:57,653 - root - INFO - lr: 2.6696e-05 gnorm: 1.08 [12:40:47<11:52:53] +[titan] 2025-10-05 11:15:08,530 - root - INFO - step: 20655 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 11:15:08,530 - root - INFO - lr: 2.6687e-05 gnorm: 1.08 [12:40:58<11:52:42] +[titan] 2025-10-05 11:15:19,423 - root - INFO - step: 20660 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 11:15:19,423 - root - INFO - lr: 2.6678e-05 gnorm: 1.15 [12:41:09<11:52:31] +[titan] 2025-10-05 11:15:30,279 - root - INFO - step: 20665 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:15:30,279 - root - INFO - lr: 2.6669e-05 gnorm: 1.06 [12:41:20<11:52:20] +[titan] 2025-10-05 11:15:41,155 - root - INFO - step: 20670 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9037 +[titan] 2025-10-05 11:15:41,156 - root - INFO - lr: 2.6660e-05 gnorm: 1.05 [12:41:31<11:52:09] +[titan] 2025-10-05 11:15:52,007 - root - INFO - step: 20675 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 11:15:52,007 - root - INFO - lr: 2.6651e-05 gnorm: 1.04 [12:41:41<11:51:57] +[titan] 2025-10-05 11:16:02,840 - root - INFO - step: 20680 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:16:02,840 - root - INFO - lr: 2.6643e-05 gnorm: 1.03 [12:41:52<11:51:46] +[titan] 2025-10-05 11:16:13,755 - root - INFO - step: 20685 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9115 +[titan] 2025-10-05 11:16:13,756 - root - INFO - lr: 2.6634e-05 gnorm: 1.04 [12:42:03<11:51:35] +[titan] 2025-10-05 11:16:24,631 - root - INFO - step: 20690 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:16:24,632 - root - INFO - lr: 2.6625e-05 gnorm: 1.05 [12:42:14<11:51:24] +[titan] 2025-10-05 11:16:35,463 - root - INFO - step: 20695 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 11:16:35,464 - root - INFO - lr: 2.6616e-05 gnorm: 1.10 [12:42:25<11:51:12] +[titan] 2025-10-05 11:16:44,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:16:46,315 - root - INFO - step: 20700 loss: 2.1496 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:16:46,316 - root - INFO - lr: 2.6607e-05 gnorm: 1.10 [12:42:36<11:51:01] +[titan] 2025-10-05 11:16:57,157 - root - INFO - step: 20705 loss: 2.0983 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 11:16:57,157 - root - INFO - lr: 2.6598e-05 gnorm: 1.04 [12:42:47<11:50:50] +[titan] 2025-10-05 11:17:08,007 - root - INFO - step: 20710 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 11:17:08,007 - root - INFO - lr: 2.6589e-05 gnorm: 1.07 [12:42:57<11:50:39] +[titan] 2025-10-05 11:17:18,892 - root - INFO - step: 20715 loss: 2.1366 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8867 +[titan] 2025-10-05 11:17:18,892 - root - INFO - lr: 2.6580e-05 gnorm: 1.14 [12:43:08<11:50:27] +[titan] 2025-10-05 11:17:29,767 - root - INFO - step: 20720 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:17:29,768 - root - INFO - lr: 2.6571e-05 gnorm: 1.04 [12:43:19<11:50:16] +[titan] 2025-10-05 11:17:40,628 - root - INFO - step: 20725 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9005 +[titan] 2025-10-05 11:17:40,628 - root - INFO - lr: 2.6562e-05 gnorm: 1.09 [12:43:30<11:50:05] +[titan] 2025-10-05 11:17:51,474 - root - INFO - step: 20730 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:17:51,474 - root - INFO - lr: 2.6553e-05 gnorm: 1.10 [12:43:41<11:49:54] +[titan] 2025-10-05 11:18:02,326 - root - INFO - step: 20735 loss: 2.1204 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:18:02,326 - root - INFO - lr: 2.6544e-05 gnorm: 1.06 [12:43:52<11:49:43] +[titan] 2025-10-05 11:18:13,213 - root - INFO - step: 20740 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8841 +[titan] 2025-10-05 11:18:13,213 - root - INFO - lr: 2.6536e-05 gnorm: 1.08 [12:44:03<11:49:31] +[titan] 2025-10-05 11:18:24,093 - root - INFO - step: 20745 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 11:18:24,093 - root - INFO - lr: 2.6527e-05 gnorm: 1.05 [12:44:14<11:49:20] +[titan] 2025-10-05 11:18:32,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:18:34,984 - root - INFO - step: 20750 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:18:34,984 - root - INFO - lr: 2.6518e-05 gnorm: 1.06 [12:44:24<11:49:09] +[titan] 2025-10-05 11:18:45,854 - root - INFO - step: 20755 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:18:45,854 - root - INFO - lr: 2.6509e-05 gnorm: 1.09 [12:44:35<11:48:58] +[titan] 2025-10-05 11:18:56,673 - root - INFO - step: 20760 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:18:56,674 - root - INFO - lr: 2.6500e-05 gnorm: 1.04 [12:44:46<11:48:46] +[titan] 2025-10-05 11:19:07,503 - root - INFO - step: 20765 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8648 +[titan] 2025-10-05 11:19:07,504 - root - INFO - lr: 2.6491e-05 gnorm: 1.08 [12:44:57<11:48:35] +[titan] 2025-10-05 11:19:18,411 - root - INFO - step: 20770 loss: 2.2056 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9489 +[titan] 2025-10-05 11:19:18,411 - root - INFO - lr: 2.6482e-05 gnorm: 1.12 [12:45:08<11:48:24] +[titan] 2025-10-05 11:19:29,234 - root - INFO - step: 20775 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8766 +[titan] 2025-10-05 11:19:29,234 - root - INFO - lr: 2.6473e-05 gnorm: 1.05 [12:45:19<11:48:13] +[titan] 2025-10-05 11:19:40,065 - root - INFO - step: 20780 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 11:19:40,065 - root - INFO - lr: 2.6464e-05 gnorm: 1.08 [12:45:29<11:48:01] +[titan] 2025-10-05 11:19:50,928 - root - INFO - step: 20785 loss: 2.1284 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:19:50,928 - root - INFO - lr: 2.6455e-05 gnorm: 1.03 [12:45:40<11:47:50] +[titan] 2025-10-05 11:20:01,769 - root - INFO - step: 20790 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 11:20:01,769 - root - INFO - lr: 2.6446e-05 gnorm: 1.07 [12:45:51<11:47:39] +[titan] 2025-10-05 11:20:12,646 - root - INFO - step: 20795 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 11:20:12,646 - root - INFO - lr: 2.6437e-05 gnorm: 1.10 [12:46:02<11:47:28] +[titan] 2025-10-05 11:20:21,353 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:20:23,533 - root - INFO - step: 20800 loss: 2.0768 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 11:20:23,533 - root - INFO - lr: 2.6429e-05 gnorm: 1.06 [12:46:13<11:47:17] +[titan] 2025-10-05 11:20:34,392 - root - INFO - step: 20805 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:20:34,392 - root - INFO - lr: 2.6420e-05 gnorm: 1.09 [12:46:24<11:47:05] +[titan] 2025-10-05 11:20:45,231 - root - INFO - step: 20810 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 11:20:45,231 - root - INFO - lr: 2.6411e-05 gnorm: 1.04 [12:46:35<11:46:54] +[titan] 2025-10-05 11:20:56,074 - root - INFO - step: 20815 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:20:56,074 - root - INFO - lr: 2.6402e-05 gnorm: 1.08 [12:46:46<11:46:43] +[titan] 2025-10-05 11:21:06,980 - root - INFO - step: 20820 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:21:06,980 - root - INFO - lr: 2.6393e-05 gnorm: 1.07 [12:46:56<11:46:32] +[titan] 2025-10-05 11:21:17,884 - root - INFO - step: 20825 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 11:21:17,884 - root - INFO - lr: 2.6384e-05 gnorm: 1.08 [12:47:07<11:46:20] +[titan] 2025-10-05 11:21:28,741 - root - INFO - step: 20830 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:21:28,741 - root - INFO - lr: 2.6375e-05 gnorm: 1.08 [12:47:18<11:46:09] +[titan] 2025-10-05 11:21:39,613 - root - INFO - step: 20835 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8881 +[titan] 2025-10-05 11:21:39,613 - root - INFO - lr: 2.6366e-05 gnorm: 1.07 [12:47:29<11:45:58] +[titan] 2025-10-05 11:21:50,471 - root - INFO - step: 20840 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 11:21:50,471 - root - INFO - lr: 2.6357e-05 gnorm: 1.03 [12:47:40<11:45:47] +[titan] 2025-10-05 11:22:01,325 - root - INFO - step: 20845 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:22:01,325 - root - INFO - lr: 2.6348e-05 gnorm: 1.07 [12:47:51<11:45:36] +[titan] 2025-10-05 11:22:10,042 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:22:12,228 - root - INFO - step: 20850 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:22:12,228 - root - INFO - lr: 2.6339e-05 gnorm: 1.01 [12:48:02<11:45:24] +[titan] 2025-10-05 11:22:23,145 - root - INFO - step: 20855 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 11:22:23,145 - root - INFO - lr: 2.6330e-05 gnorm: 1.08 [12:48:13<11:45:13] +[titan] 2025-10-05 11:22:33,976 - root - INFO - step: 20860 loss: 2.1509 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:22:33,976 - root - INFO - lr: 2.6322e-05 gnorm: 1.08 [12:48:23<11:45:02] +[titan] 2025-10-05 11:22:44,818 - root - INFO - step: 20865 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 11:22:44,819 - root - INFO - lr: 2.6313e-05 gnorm: 1.08 [12:48:34<11:44:51] +[titan] 2025-10-05 11:22:55,670 - root - INFO - step: 20870 loss: 2.1029 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 11:22:55,671 - root - INFO - lr: 2.6304e-05 gnorm: 1.04 [12:48:45<11:44:39] +[titan] 2025-10-05 11:23:06,495 - root - INFO - step: 20875 loss: 2.1668 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 11:23:06,495 - root - INFO - lr: 2.6295e-05 gnorm: 1.03 [12:48:56<11:44:28] +[titan] 2025-10-05 11:23:17,425 - root - INFO - step: 20880 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 11:23:17,426 - root - INFO - lr: 2.6286e-05 gnorm: 1.06 [12:49:07<11:44:17] +[titan] 2025-10-05 11:23:28,304 - root - INFO - step: 20885 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:23:28,305 - root - INFO - lr: 2.6277e-05 gnorm: 1.02 [12:49:18<11:44:06] +[titan] 2025-10-05 11:23:39,146 - root - INFO - step: 20890 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 11:23:39,147 - root - INFO - lr: 2.6268e-05 gnorm: 1.04 [12:49:29<11:43:55] +[titan] 2025-10-05 11:23:50,019 - root - INFO - step: 20895 loss: 2.1373 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:23:50,019 - root - INFO - lr: 2.6259e-05 gnorm: 1.05 [12:49:39<11:43:43] +[titan] 2025-10-05 11:23:58,682 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:24:00,862 - root - INFO - step: 20900 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 11:24:00,862 - root - INFO - lr: 2.6250e-05 gnorm: 1.08 [12:49:50<11:43:32] +[titan] 2025-10-05 11:24:11,693 - root - INFO - step: 20905 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8801 +[titan] 2025-10-05 11:24:11,693 - root - INFO - lr: 2.6241e-05 gnorm: 1.09 [12:50:01<11:43:21] +[titan] 2025-10-05 11:24:22,592 - root - INFO - step: 20910 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8505 +[titan] 2025-10-05 11:24:22,592 - root - INFO - lr: 2.6232e-05 gnorm: 1.06 [12:50:12<11:43:10] +[titan] 2025-10-05 11:24:33,463 - root - INFO - step: 20915 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 11:24:33,463 - root - INFO - lr: 2.6224e-05 gnorm: 1.05 [12:50:23<11:42:58] +[titan] 2025-10-05 11:24:44,313 - root - INFO - step: 20920 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:24:44,314 - root - INFO - lr: 2.6215e-05 gnorm: 1.05 [12:50:34<11:42:47] +[titan] 2025-10-05 11:24:55,176 - root - INFO - step: 20925 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8450 +[titan] 2025-10-05 11:24:55,176 - root - INFO - lr: 2.6206e-05 gnorm: 1.05 [12:50:45<11:42:36] +[titan] 2025-10-05 11:25:06,030 - root - INFO - step: 20930 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8992 +[titan] 2025-10-05 11:25:06,030 - root - INFO - lr: 2.6197e-05 gnorm: 1.10 [12:50:55<11:42:25] +[titan] 2025-10-05 11:25:16,898 - root - INFO - step: 20935 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8741 +[titan] 2025-10-05 11:25:16,898 - root - INFO - lr: 2.6188e-05 gnorm: 1.05 [12:51:06<11:42:14] +[titan] 2025-10-05 11:25:27,781 - root - INFO - step: 20940 loss: 2.1440 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:25:27,781 - root - INFO - lr: 2.6179e-05 gnorm: 1.04 [12:51:17<11:42:02] +[titan] 2025-10-05 11:25:38,668 - root - INFO - step: 20945 loss: 2.1635 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 11:25:38,668 - root - INFO - lr: 2.6170e-05 gnorm: 1.04 [12:51:28<11:41:51] +[titan] 2025-10-05 11:25:47,372 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:25:49,579 - root - INFO - step: 20950 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8588 +[titan] 2025-10-05 11:25:49,579 - root - INFO - lr: 2.6161e-05 gnorm: 1.02 [12:51:39<11:41:40] +[titan] 2025-10-05 11:26:00,466 - root - INFO - step: 20955 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9476 +[titan] 2025-10-05 11:26:00,466 - root - INFO - lr: 2.6152e-05 gnorm: 1.08 [12:51:50<11:41:29] +[titan] 2025-10-05 11:26:11,358 - root - INFO - step: 20960 loss: 2.1680 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 11:26:11,358 - root - INFO - lr: 2.6143e-05 gnorm: 1.07 [12:52:01<11:41:18] +[titan] 2025-10-05 11:26:22,285 - root - INFO - step: 20965 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 11:26:22,285 - root - INFO - lr: 2.6134e-05 gnorm: 1.03 [12:52:12<11:41:06] +[titan] 2025-10-05 11:26:33,153 - root - INFO - step: 20970 loss: 2.0712 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 11:26:33,153 - root - INFO - lr: 2.6126e-05 gnorm: 1.04 [12:52:23<11:40:55] +[titan] 2025-10-05 11:26:44,020 - root - INFO - step: 20975 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:26:44,021 - root - INFO - lr: 2.6117e-05 gnorm: 1.09 [12:52:33<11:40:44] +[titan] 2025-10-05 11:26:54,991 - root - INFO - step: 20980 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 11:26:54,992 - root - INFO - lr: 2.6108e-05 gnorm: 1.07 [12:52:44<11:40:33] +[titan] 2025-10-05 11:27:05,851 - root - INFO - step: 20985 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8877 +[titan] 2025-10-05 11:27:05,851 - root - INFO - lr: 2.6099e-05 gnorm: 1.11 [12:52:55<11:40:22] +[titan] 2025-10-05 11:27:16,808 - root - INFO - step: 20990 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:27:16,809 - root - INFO - lr: 2.6090e-05 gnorm: 1.08 [12:53:06<11:40:11] +[titan] 2025-10-05 11:27:21,383 - root - INFO - Dumping profiler traces at step 20992 +[titan] 2025-10-05 11:27:21,423 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:27:27,954 - root - INFO - step: 20995 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,401 tflops: 407.90 mfu: 41.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 11:27:27,954 - root - INFO - lr: 2.6081e-05 gnorm: 1.05 [12:53:17<11:40:00] +[titan] 2025-10-05 11:27:36,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:27:38,817 - root - INFO - step: 21000 loss: 2.1220 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8750 +[titan] 2025-10-05 11:27:38,817 - root - INFO - lr: 2.6072e-05 gnorm: 1.05 [12:53:28<11:39:48] +[titan] 2025-10-05 11:27:49,677 - root - INFO - step: 21005 loss: 2.1703 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:27:49,677 - root - INFO - lr: 2.6063e-05 gnorm: 1.10 [12:53:39<11:39:37] +[titan] 2025-10-05 11:28:00,541 - root - INFO - step: 21010 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 11:28:00,541 - root - INFO - lr: 2.6054e-05 gnorm: 1.05 [12:53:50<11:39:26] +[titan] 2025-10-05 11:28:11,383 - root - INFO - step: 21015 loss: 2.1081 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8636 +[titan] 2025-10-05 11:28:11,384 - root - INFO - lr: 2.6045e-05 gnorm: 1.04 [12:54:01<11:39:15] +[titan] 2025-10-05 11:28:22,286 - root - INFO - step: 21020 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:28:22,286 - root - INFO - lr: 2.6036e-05 gnorm: 1.10 [12:54:12<11:39:03] +[titan] 2025-10-05 11:28:33,136 - root - INFO - step: 21025 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 11:28:33,136 - root - INFO - lr: 2.6028e-05 gnorm: 1.07 [12:54:23<11:38:52] +[titan] 2025-10-05 11:28:43,995 - root - INFO - step: 21030 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8395 +[titan] 2025-10-05 11:28:43,995 - root - INFO - lr: 2.6019e-05 gnorm: 1.06 [12:54:33<11:38:41] +[titan] 2025-10-05 11:28:54,868 - root - INFO - step: 21035 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8811 +[titan] 2025-10-05 11:28:54,868 - root - INFO - lr: 2.6010e-05 gnorm: 1.09 [12:54:44<11:38:30] +[titan] 2025-10-05 11:29:05,770 - root - INFO - step: 21040 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 11:29:05,770 - root - INFO - lr: 2.6001e-05 gnorm: 1.06 [12:54:55<11:38:19] +[titan] 2025-10-05 11:29:16,625 - root - INFO - step: 21045 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:29:16,625 - root - INFO - lr: 2.5992e-05 gnorm: 1.04 [12:55:06<11:38:07] +[titan] 2025-10-05 11:29:25,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:29:27,546 - root - INFO - step: 21050 loss: 2.1350 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:29:27,546 - root - INFO - lr: 2.5983e-05 gnorm: 1.09 [12:55:17<11:37:56] +[titan] 2025-10-05 11:29:38,415 - root - INFO - step: 21055 loss: 2.0977 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8537 +[titan] 2025-10-05 11:29:38,415 - root - INFO - lr: 2.5974e-05 gnorm: 1.05 [12:55:28<11:37:45] +[titan] 2025-10-05 11:29:49,289 - root - INFO - step: 21060 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 11:29:49,289 - root - INFO - lr: 2.5965e-05 gnorm: 1.09 [12:55:39<11:37:34] +[titan] 2025-10-05 11:30:00,149 - root - INFO - step: 21065 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 11:30:00,149 - root - INFO - lr: 2.5956e-05 gnorm: 1.09 [12:55:50<11:37:23] +[titan] 2025-10-05 11:30:11,032 - root - INFO - step: 21070 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:30:11,032 - root - INFO - lr: 2.5947e-05 gnorm: 1.08 [12:56:00<11:37:11] +[titan] 2025-10-05 11:30:21,932 - root - INFO - step: 21075 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 11:30:21,932 - root - INFO - lr: 2.5939e-05 gnorm: 1.07 [12:56:11<11:37:00] +[titan] 2025-10-05 11:30:32,855 - root - INFO - step: 21080 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8856 +[titan] 2025-10-05 11:30:32,855 - root - INFO - lr: 2.5930e-05 gnorm: 1.07 [12:56:22<11:36:49] +[titan] 2025-10-05 11:30:43,698 - root - INFO - step: 21085 loss: 2.1181 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:30:43,699 - root - INFO - lr: 2.5921e-05 gnorm: 1.11 [12:56:33<11:36:38] +[titan] 2025-10-05 11:30:54,563 - root - INFO - step: 21090 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 11:30:54,563 - root - INFO - lr: 2.5912e-05 gnorm: 1.03 [12:56:44<11:36:27] +[titan] 2025-10-05 11:31:05,426 - root - INFO - step: 21095 loss: 2.2239 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9636 +[titan] 2025-10-05 11:31:05,427 - root - INFO - lr: 2.5903e-05 gnorm: 1.06 [12:56:55<11:36:15] +[titan] 2025-10-05 11:31:14,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:31:16,304 - root - INFO - step: 21100 loss: 2.0959 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 11:31:16,304 - root - INFO - lr: 2.5894e-05 gnorm: 1.03 [12:57:06<11:36:04] +[titan] 2025-10-05 11:31:27,255 - root - INFO - step: 21105 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.13 mfu: 41.97% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 11:31:27,256 - root - INFO - lr: 2.5885e-05 gnorm: 1.07 [12:57:17<11:35:53] +[titan] 2025-10-05 11:31:38,131 - root - INFO - step: 21110 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8621 +[titan] 2025-10-05 11:31:38,132 - root - INFO - lr: 2.5876e-05 gnorm: 1.06 [12:57:28<11:35:42] +[titan] 2025-10-05 11:31:49,004 - root - INFO - step: 21115 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:31:49,005 - root - INFO - lr: 2.5867e-05 gnorm: 1.07 [12:57:38<11:35:31] +[titan] 2025-10-05 11:31:59,893 - root - INFO - step: 21120 loss: 2.0727 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8315 +[titan] 2025-10-05 11:31:59,893 - root - INFO - lr: 2.5858e-05 gnorm: 1.07 [12:57:49<11:35:19] +[titan] 2025-10-05 11:32:10,768 - root - INFO - step: 21125 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 11:32:10,769 - root - INFO - lr: 2.5850e-05 gnorm: 1.07 [12:58:00<11:35:08] +[titan] 2025-10-05 11:32:21,633 - root - INFO - step: 21130 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8324 +[titan] 2025-10-05 11:32:21,633 - root - INFO - lr: 2.5841e-05 gnorm: 1.05 [12:58:11<11:34:57] +[titan] 2025-10-05 11:32:32,656 - root - INFO - step: 21135 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 29,729 tflops: 412.44 mfu: 41.70% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 11:32:32,656 - root - INFO - lr: 2.5832e-05 gnorm: 1.08 [12:58:22<11:34:46] +[titan] 2025-10-05 11:32:43,550 - root - INFO - step: 21140 loss: 2.1392 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 11:32:43,550 - root - INFO - lr: 2.5823e-05 gnorm: 1.07 [12:58:33<11:34:35] +[titan] 2025-10-05 11:32:54,408 - root - INFO - step: 21145 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 11:32:54,408 - root - INFO - lr: 2.5814e-05 gnorm: 1.06 [12:58:44<11:34:24] +[titan] 2025-10-05 11:33:03,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:33:05,258 - root - INFO - step: 21150 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8836 +[titan] 2025-10-05 11:33:05,258 - root - INFO - lr: 2.5805e-05 gnorm: 1.09 [12:58:55<11:34:12] +[titan] 2025-10-05 11:33:16,124 - root - INFO - step: 21155 loss: 2.1477 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 11:33:16,124 - root - INFO - lr: 2.5796e-05 gnorm: 1.07 [12:59:06<11:34:01] +[titan] 2025-10-05 11:33:27,050 - root - INFO - step: 21160 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 11:33:27,050 - root - INFO - lr: 2.5787e-05 gnorm: 1.06 [12:59:16<11:33:50] +[titan] 2025-10-05 11:33:37,906 - root - INFO - step: 21165 loss: 2.1021 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 11:33:37,906 - root - INFO - lr: 2.5778e-05 gnorm: 1.06 [12:59:27<11:33:39] +[titan] 2025-10-05 11:33:48,805 - root - INFO - step: 21170 loss: 2.1153 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8695 +[titan] 2025-10-05 11:33:48,805 - root - INFO - lr: 2.5769e-05 gnorm: 1.10 [12:59:38<11:33:28] +[titan] 2025-10-05 11:33:59,670 - root - INFO - step: 21175 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 11:33:59,670 - root - INFO - lr: 2.5761e-05 gnorm: 1.05 [12:59:49<11:33:16] +[titan] 2025-10-05 11:34:10,542 - root - INFO - step: 21180 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8645 +[titan] 2025-10-05 11:34:10,542 - root - INFO - lr: 2.5752e-05 gnorm: 1.07 [13:00:00<11:33:05] +[titan] 2025-10-05 11:34:21,425 - root - INFO - step: 21185 loss: 2.0963 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8530 +[titan] 2025-10-05 11:34:21,425 - root - INFO - lr: 2.5743e-05 gnorm: 1.01 [13:00:11<11:32:54] +[titan] 2025-10-05 11:34:32,352 - root - INFO - step: 21190 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:34:32,352 - root - INFO - lr: 2.5734e-05 gnorm: 1.08 [13:00:22<11:32:43] +[titan] 2025-10-05 11:34:43,216 - root - INFO - step: 21195 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 11:34:43,216 - root - INFO - lr: 2.5725e-05 gnorm: 1.04 [13:00:33<11:32:32] +[titan] 2025-10-05 11:34:51,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:34:54,111 - root - INFO - step: 21200 loss: 2.0921 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 11:34:54,111 - root - INFO - lr: 2.5716e-05 gnorm: 1.07 [13:00:44<11:32:20] +[titan] 2025-10-05 11:35:04,964 - root - INFO - step: 21205 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 11:35:04,964 - root - INFO - lr: 2.5707e-05 gnorm: 1.09 [13:00:54<11:32:09] +[titan] 2025-10-05 11:35:15,826 - root - INFO - step: 21210 loss: 2.1528 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 11:35:15,827 - root - INFO - lr: 2.5698e-05 gnorm: 1.09 [13:01:05<11:31:58] +[titan] 2025-10-05 11:35:26,686 - root - INFO - step: 21215 loss: 2.1911 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:35:26,686 - root - INFO - lr: 2.5689e-05 gnorm: 1.11 [13:01:16<11:31:47] +[titan] 2025-10-05 11:35:37,615 - root - INFO - step: 21220 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 11:35:37,615 - root - INFO - lr: 2.5680e-05 gnorm: 1.08 [13:01:27<11:31:36] +[titan] 2025-10-05 11:35:48,489 - root - INFO - step: 21225 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 11:35:48,489 - root - INFO - lr: 2.5672e-05 gnorm: 1.11 [13:01:38<11:31:24] +[titan] 2025-10-05 11:35:59,356 - root - INFO - step: 21230 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:35:59,356 - root - INFO - lr: 2.5663e-05 gnorm: 1.08 [13:01:49<11:31:13] +[titan] 2025-10-05 11:36:10,239 - root - INFO - step: 21235 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8980 +[titan] 2025-10-05 11:36:10,239 - root - INFO - lr: 2.5654e-05 gnorm: 1.09 [13:02:00<11:31:02] +[titan] 2025-10-05 11:36:21,092 - root - INFO - step: 21240 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:36:21,092 - root - INFO - lr: 2.5645e-05 gnorm: 1.05 [13:02:10<11:30:51] +[titan] 2025-10-05 11:36:32,020 - root - INFO - step: 21245 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 11:36:32,021 - root - INFO - lr: 2.5636e-05 gnorm: 1.09 [13:02:21<11:30:40] +[titan] 2025-10-05 11:36:40,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:36:42,889 - root - INFO - step: 21250 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 11:36:42,889 - root - INFO - lr: 2.5627e-05 gnorm: 1.07 [13:02:32<11:30:28] +[titan] 2025-10-05 11:36:53,745 - root - INFO - step: 21255 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 11:36:53,746 - root - INFO - lr: 2.5618e-05 gnorm: 1.05 [13:02:43<11:30:17] +[titan] 2025-10-05 11:37:04,622 - root - INFO - step: 21260 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 11:37:04,622 - root - INFO - lr: 2.5609e-05 gnorm: 1.08 [13:02:54<11:30:06] +[titan] 2025-10-05 11:37:15,535 - root - INFO - step: 21265 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 11:37:15,536 - root - INFO - lr: 2.5600e-05 gnorm: 1.08 [13:03:05<11:29:55] +[titan] 2025-10-05 11:37:26,391 - root - INFO - step: 21270 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 11:37:26,392 - root - INFO - lr: 2.5592e-05 gnorm: 1.05 [13:03:16<11:29:44] +[titan] 2025-10-05 11:37:37,276 - root - INFO - step: 21275 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 11:37:37,276 - root - INFO - lr: 2.5583e-05 gnorm: 1.07 [13:03:27<11:29:32] +[titan] 2025-10-05 11:37:48,150 - root - INFO - step: 21280 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8442 +[titan] 2025-10-05 11:37:48,150 - root - INFO - lr: 2.5574e-05 gnorm: 1.05 [13:03:38<11:29:21] +[titan] 2025-10-05 11:37:59,010 - root - INFO - step: 21285 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 11:37:59,010 - root - INFO - lr: 2.5565e-05 gnorm: 1.07 [13:03:48<11:29:10] +[titan] 2025-10-05 11:38:09,872 - root - INFO - step: 21290 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8545 +[titan] 2025-10-05 11:38:09,872 - root - INFO - lr: 2.5556e-05 gnorm: 1.10 [13:03:59<11:28:59] +[titan] 2025-10-05 11:38:20,741 - root - INFO - step: 21295 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 11:38:20,741 - root - INFO - lr: 2.5547e-05 gnorm: 1.12 [13:04:10<11:28:48] +[titan] 2025-10-05 11:38:29,453 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:38:31,672 - root - INFO - step: 21300 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 11:38:31,672 - root - INFO - lr: 2.5538e-05 gnorm: 1.05 [13:04:21<11:28:36] +[titan] 2025-10-05 11:38:42,540 - root - INFO - step: 21305 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:38:42,540 - root - INFO - lr: 2.5529e-05 gnorm: 1.08 [13:04:32<11:28:25] +[titan] 2025-10-05 11:38:53,411 - root - INFO - step: 21310 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 11:38:53,411 - root - INFO - lr: 2.5520e-05 gnorm: 1.07 [13:04:43<11:28:14] +[titan] 2025-10-05 11:39:04,301 - root - INFO - step: 21315 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8790 +[titan] 2025-10-05 11:39:04,301 - root - INFO - lr: 2.5511e-05 gnorm: 1.10 [13:04:54<11:28:03] +[titan] 2025-10-05 11:39:15,170 - root - INFO - step: 21320 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8772 +[titan] 2025-10-05 11:39:15,170 - root - INFO - lr: 2.5503e-05 gnorm: 1.06 [13:05:05<11:27:52] +[titan] 2025-10-05 11:39:26,035 - root - INFO - step: 21325 loss: 2.1518 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:39:26,035 - root - INFO - lr: 2.5494e-05 gnorm: 1.08 [13:05:15<11:27:40] +[titan] 2025-10-05 11:39:36,994 - root - INFO - step: 21330 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8956 +[titan] 2025-10-05 11:39:36,995 - root - INFO - lr: 2.5485e-05 gnorm: 1.06 [13:05:26<11:27:29] +[titan] 2025-10-05 11:39:47,849 - root - INFO - step: 21335 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 11:39:47,849 - root - INFO - lr: 2.5476e-05 gnorm: 1.03 [13:05:37<11:27:18] +[titan] 2025-10-05 11:39:58,709 - root - INFO - step: 21340 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 11:39:58,709 - root - INFO - lr: 2.5467e-05 gnorm: 1.07 [13:05:48<11:27:07] +[titan] 2025-10-05 11:40:09,576 - root - INFO - step: 21345 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 11:40:09,576 - root - INFO - lr: 2.5458e-05 gnorm: 1.05 [13:05:59<11:26:56] +[titan] 2025-10-05 11:40:18,258 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:40:20,450 - root - INFO - step: 21350 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9393 +[titan] 2025-10-05 11:40:20,450 - root - INFO - lr: 2.5449e-05 gnorm: 1.09 [13:06:10<11:26:44] +[titan] 2025-10-05 11:40:31,323 - root - INFO - step: 21355 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 11:40:31,323 - root - INFO - lr: 2.5440e-05 gnorm: 1.11 [13:06:21<11:26:33] +[titan] 2025-10-05 11:40:42,303 - root - INFO - step: 21360 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 29,846 tflops: 414.06 mfu: 41.87% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:40:42,303 - root - INFO - lr: 2.5431e-05 gnorm: 1.09 [13:06:32<11:26:22] +[titan] 2025-10-05 11:40:53,190 - root - INFO - step: 21365 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 11:40:53,190 - root - INFO - lr: 2.5423e-05 gnorm: 1.04 [13:06:43<11:26:11] +[titan] 2025-10-05 11:41:04,057 - root - INFO - step: 21370 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8252 +[titan] 2025-10-05 11:41:04,057 - root - INFO - lr: 2.5414e-05 gnorm: 1.06 [13:06:53<11:26:00] +[titan] 2025-10-05 11:41:14,914 - root - INFO - step: 21375 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 11:41:14,914 - root - INFO - lr: 2.5405e-05 gnorm: 1.05 [13:07:04<11:25:49] +[titan] 2025-10-05 11:41:25,788 - root - INFO - step: 21380 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 11:41:25,788 - root - INFO - lr: 2.5396e-05 gnorm: 1.08 [13:07:15<11:25:37] +[titan] 2025-10-05 11:41:36,680 - root - INFO - step: 21385 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8705 +[titan] 2025-10-05 11:41:36,680 - root - INFO - lr: 2.5387e-05 gnorm: 1.06 [13:07:26<11:25:26] +[titan] 2025-10-05 11:41:47,564 - root - INFO - step: 21390 loss: 2.0660 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 11:41:47,564 - root - INFO - lr: 2.5378e-05 gnorm: 1.06 [13:07:37<11:25:15] +[titan] 2025-10-05 11:41:58,477 - root - INFO - step: 21395 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:41:58,477 - root - INFO - lr: 2.5369e-05 gnorm: 1.05 [13:07:48<11:25:04] +[titan] 2025-10-05 11:42:07,157 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:42:09,346 - root - INFO - step: 21400 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 11:42:09,346 - root - INFO - lr: 2.5360e-05 gnorm: 1.06 [13:07:59<11:24:53] +[titan] 2025-10-05 11:42:20,225 - root - INFO - step: 21405 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:42:20,226 - root - INFO - lr: 2.5352e-05 gnorm: 1.09 [13:08:10<11:24:41] +[titan] 2025-10-05 11:42:31,111 - root - INFO - step: 21410 loss: 2.1240 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:42:31,112 - root - INFO - lr: 2.5343e-05 gnorm: 1.12 [13:08:20<11:24:30] +[titan] 2025-10-05 11:42:42,010 - root - INFO - step: 21415 loss: 2.0961 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8538 +[titan] 2025-10-05 11:42:42,011 - root - INFO - lr: 2.5334e-05 gnorm: 1.06 [13:08:31<11:24:19] +[titan] 2025-10-05 11:42:52,881 - root - INFO - step: 21420 loss: 2.1163 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 11:42:52,881 - root - INFO - lr: 2.5325e-05 gnorm: 1.06 [13:08:42<11:24:08] +[titan] 2025-10-05 11:43:03,753 - root - INFO - step: 21425 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8953 +[titan] 2025-10-05 11:43:03,753 - root - INFO - lr: 2.5316e-05 gnorm: 1.05 [13:08:53<11:23:57] +[titan] 2025-10-05 11:43:14,617 - root - INFO - step: 21430 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8631 +[titan] 2025-10-05 11:43:14,617 - root - INFO - lr: 2.5307e-05 gnorm: 1.07 [13:09:04<11:23:45] +[titan] 2025-10-05 11:43:25,474 - root - INFO - step: 21435 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 11:43:25,474 - root - INFO - lr: 2.5298e-05 gnorm: 1.04 [13:09:15<11:23:34] +[titan] 2025-10-05 11:43:36,449 - root - INFO - step: 21440 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.24 mfu: 41.89% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 11:43:36,449 - root - INFO - lr: 2.5289e-05 gnorm: 1.05 [13:09:26<11:23:23] +[titan] 2025-10-05 11:43:47,314 - root - INFO - step: 21445 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:43:47,314 - root - INFO - lr: 2.5280e-05 gnorm: 1.04 [13:09:37<11:23:12] +[titan] 2025-10-05 11:43:56,017 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:43:58,209 - root - INFO - step: 21450 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 11:43:58,209 - root - INFO - lr: 2.5272e-05 gnorm: 1.06 [13:09:48<11:23:01] +[titan] 2025-10-05 11:44:09,061 - root - INFO - step: 21455 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 11:44:09,061 - root - INFO - lr: 2.5263e-05 gnorm: 1.08 [13:09:58<11:22:50] +[titan] 2025-10-05 11:44:19,965 - root - INFO - step: 21460 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9120 +[titan] 2025-10-05 11:44:19,966 - root - INFO - lr: 2.5254e-05 gnorm: 1.07 [13:10:09<11:22:38] +[titan] 2025-10-05 11:44:30,808 - root - INFO - step: 21465 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:44:30,808 - root - INFO - lr: 2.5245e-05 gnorm: 1.04 [13:10:20<11:22:27] +[titan] 2025-10-05 11:44:41,706 - root - INFO - step: 21470 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:44:41,706 - root - INFO - lr: 2.5236e-05 gnorm: 1.08 [13:10:31<11:22:16] +[titan] 2025-10-05 11:44:52,552 - root - INFO - step: 21475 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8376 +[titan] 2025-10-05 11:44:52,552 - root - INFO - lr: 2.5227e-05 gnorm: 1.04 [13:10:42<11:22:05] +[titan] 2025-10-05 11:45:03,391 - root - INFO - step: 21480 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:45:03,391 - root - INFO - lr: 2.5218e-05 gnorm: 1.07 [13:10:53<11:21:54] +[titan] 2025-10-05 11:45:14,218 - root - INFO - step: 21485 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 11:45:14,218 - root - INFO - lr: 2.5209e-05 gnorm: 1.09 [13:11:04<11:21:42] +[titan] 2025-10-05 11:45:25,127 - root - INFO - step: 21490 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 11:45:25,127 - root - INFO - lr: 2.5201e-05 gnorm: 1.06 [13:11:14<11:21:31] +[titan] 2025-10-05 11:45:35,950 - root - INFO - step: 21495 loss: 2.1076 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 11:45:35,951 - root - INFO - lr: 2.5192e-05 gnorm: 1.05 [13:11:25<11:21:20] +[titan] 2025-10-05 11:45:44,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:45:46,815 - root - INFO - step: 21500 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9073 +[titan] 2025-10-05 11:45:46,815 - root - INFO - lr: 2.5183e-05 gnorm: 1.08 [13:11:36<11:21:09] +[titan] 2025-10-05 11:45:55,756 - root - INFO - Dumping profiler traces at step 21504 +[titan] 2025-10-05 11:45:55,793 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:45:57,977 - root - INFO - step: 21505 loss: 2.1378 memory: 118.84GiB(85.28%) tps: 29,357 tflops: 407.29 mfu: 41.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8896 +[titan] 2025-10-05 11:45:57,978 - root - INFO - lr: 2.5174e-05 gnorm: 1.10 [13:11:47<11:20:58] +[titan] 2025-10-05 11:46:08,810 - root - INFO - step: 21510 loss: 2.1100 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:46:08,810 - root - INFO - lr: 2.5165e-05 gnorm: 1.08 [13:11:58<11:20:47] +[titan] 2025-10-05 11:46:19,644 - root - INFO - step: 21515 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:46:19,645 - root - INFO - lr: 2.5156e-05 gnorm: 1.05 [13:12:09<11:20:35] +[titan] 2025-10-05 11:46:30,518 - root - INFO - step: 21520 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 11:46:30,518 - root - INFO - lr: 2.5147e-05 gnorm: 1.08 [13:12:20<11:20:24] +[titan] 2025-10-05 11:46:41,409 - root - INFO - step: 21525 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 11:46:41,409 - root - INFO - lr: 2.5138e-05 gnorm: 1.08 [13:12:31<11:20:13] +[titan] 2025-10-05 11:46:52,228 - root - INFO - step: 21530 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 11:46:52,228 - root - INFO - lr: 2.5130e-05 gnorm: 1.06 [13:12:42<11:20:02] +[titan] 2025-10-05 11:47:03,059 - root - INFO - step: 21535 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 11:47:03,059 - root - INFO - lr: 2.5121e-05 gnorm: 1.03 [13:12:52<11:19:50] +[titan] 2025-10-05 11:47:13,907 - root - INFO - step: 21540 loss: 2.1549 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 11:47:13,907 - root - INFO - lr: 2.5112e-05 gnorm: 1.09 [13:13:03<11:19:39] +[titan] 2025-10-05 11:47:24,716 - root - INFO - step: 21545 loss: 2.1223 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 11:47:24,716 - root - INFO - lr: 2.5103e-05 gnorm: 1.07 [13:13:14<11:19:28] +[titan] 2025-10-05 11:47:33,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:47:35,549 - root - INFO - step: 21550 loss: 2.1493 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8994 +[titan] 2025-10-05 11:47:35,549 - root - INFO - lr: 2.5094e-05 gnorm: 1.05 [13:13:25<11:19:17] +[titan] 2025-10-05 11:47:46,489 - root - INFO - step: 21555 loss: 2.0469 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 11:47:46,490 - root - INFO - lr: 2.5085e-05 gnorm: 1.04 [13:13:36<11:19:06] +[titan] 2025-10-05 11:47:57,291 - root - INFO - step: 21560 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:47:57,291 - root - INFO - lr: 2.5076e-05 gnorm: 1.08 [13:13:47<11:18:54] +[titan] 2025-10-05 11:48:08,089 - root - INFO - step: 21565 loss: 2.0826 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 11:48:08,090 - root - INFO - lr: 2.5067e-05 gnorm: 1.06 [13:13:57<11:18:43] +[titan] 2025-10-05 11:48:18,889 - root - INFO - step: 21570 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:48:18,889 - root - INFO - lr: 2.5059e-05 gnorm: 1.09 [13:14:08<11:18:32] +[titan] 2025-10-05 11:48:29,708 - root - INFO - step: 21575 loss: 2.1425 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:48:29,709 - root - INFO - lr: 2.5050e-05 gnorm: 1.06 [13:14:19<11:18:21] +[titan] 2025-10-05 11:48:40,539 - root - INFO - step: 21580 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.76 mfu: 42.44% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 11:48:40,539 - root - INFO - lr: 2.5041e-05 gnorm: 1.11 [13:14:30<11:18:09] +[titan] 2025-10-05 11:48:51,410 - root - INFO - step: 21585 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:48:51,410 - root - INFO - lr: 2.5032e-05 gnorm: 1.06 [13:14:41<11:17:58] +[titan] 2025-10-05 11:49:02,256 - root - INFO - step: 21590 loss: 2.1780 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9253 +[titan] 2025-10-05 11:49:02,256 - root - INFO - lr: 2.5023e-05 gnorm: 1.12 [13:14:52<11:17:47] +[titan] 2025-10-05 11:49:13,089 - root - INFO - step: 21595 loss: 2.1172 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 11:49:13,089 - root - INFO - lr: 2.5014e-05 gnorm: 1.10 [13:15:02<11:17:36] +[titan] 2025-10-05 11:49:21,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:49:23,936 - root - INFO - step: 21600 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 11:49:23,936 - root - INFO - lr: 2.5005e-05 gnorm: 1.09 [13:15:13<11:17:25] +[titan] 2025-10-05 11:49:34,750 - root - INFO - step: 21605 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8838 +[titan] 2025-10-05 11:49:34,751 - root - INFO - lr: 2.4996e-05 gnorm: 1.08 [13:15:24<11:17:13] +[titan] 2025-10-05 11:49:45,562 - root - INFO - step: 21610 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8037 +[titan] 2025-10-05 11:49:45,563 - root - INFO - lr: 2.4988e-05 gnorm: 1.02 [13:15:35<11:17:02] +[titan] 2025-10-05 11:49:56,369 - root - INFO - step: 21615 loss: 2.1371 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8883 +[titan] 2025-10-05 11:49:56,370 - root - INFO - lr: 2.4979e-05 gnorm: 1.04 [13:15:46<11:16:51] +[titan] 2025-10-05 11:50:07,237 - root - INFO - step: 21620 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:50:07,237 - root - INFO - lr: 2.4970e-05 gnorm: 1.05 [13:15:57<11:16:40] +[titan] 2025-10-05 11:50:18,053 - root - INFO - step: 21625 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8344 +[titan] 2025-10-05 11:50:18,053 - root - INFO - lr: 2.4961e-05 gnorm: 1.06 [13:16:07<11:16:28] +[titan] 2025-10-05 11:50:28,850 - root - INFO - step: 21630 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.05 mfu: 42.57% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:50:28,851 - root - INFO - lr: 2.4952e-05 gnorm: 1.04 [13:16:18<11:16:17] +[titan] 2025-10-05 11:50:39,656 - root - INFO - step: 21635 loss: 2.0898 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:50:39,656 - root - INFO - lr: 2.4943e-05 gnorm: 1.09 [13:16:29<11:16:06] +[titan] 2025-10-05 11:50:50,529 - root - INFO - step: 21640 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 11:50:50,529 - root - INFO - lr: 2.4934e-05 gnorm: 1.06 [13:16:40<11:15:55] +[titan] 2025-10-05 11:51:01,328 - root - INFO - step: 21645 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.56% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:51:01,329 - root - INFO - lr: 2.4926e-05 gnorm: 1.04 [13:16:51<11:15:43] +[titan] 2025-10-05 11:51:09,997 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:51:12,168 - root - INFO - step: 21650 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 11:51:12,168 - root - INFO - lr: 2.4917e-05 gnorm: 1.07 [13:17:02<11:15:32] +[titan] 2025-10-05 11:51:23,012 - root - INFO - step: 21655 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 11:51:23,012 - root - INFO - lr: 2.4908e-05 gnorm: 1.06 [13:17:12<11:15:21] +[titan] 2025-10-05 11:51:33,829 - root - INFO - step: 21660 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 11:51:33,830 - root - INFO - lr: 2.4899e-05 gnorm: 1.06 [13:17:23<11:15:10] +[titan] 2025-10-05 11:51:44,687 - root - INFO - step: 21665 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 11:51:44,687 - root - INFO - lr: 2.4890e-05 gnorm: 1.03 [13:17:34<11:14:59] +[titan] 2025-10-05 11:51:55,529 - root - INFO - step: 21670 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 11:51:55,529 - root - INFO - lr: 2.4881e-05 gnorm: 1.04 [13:17:45<11:14:47] +[titan] 2025-10-05 11:52:06,368 - root - INFO - step: 21675 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 11:52:06,368 - root - INFO - lr: 2.4872e-05 gnorm: 1.05 [13:17:56<11:14:36] +[titan] 2025-10-05 11:52:17,248 - root - INFO - step: 21680 loss: 2.0964 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8526 +[titan] 2025-10-05 11:52:17,248 - root - INFO - lr: 2.4863e-05 gnorm: 1.08 [13:18:07<11:14:25] +[titan] 2025-10-05 11:52:28,077 - root - INFO - step: 21685 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 11:52:28,077 - root - INFO - lr: 2.4855e-05 gnorm: 1.04 [13:18:17<11:14:14] +[titan] 2025-10-05 11:52:38,897 - root - INFO - step: 21690 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 11:52:38,897 - root - INFO - lr: 2.4846e-05 gnorm: 1.12 [13:18:28<11:14:03] +[titan] 2025-10-05 11:52:49,731 - root - INFO - step: 21695 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:52:49,731 - root - INFO - lr: 2.4837e-05 gnorm: 1.14 [13:18:39<11:13:51] +[titan] 2025-10-05 11:52:58,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:53:00,557 - root - INFO - step: 21700 loss: 2.0942 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 11:53:00,558 - root - INFO - lr: 2.4828e-05 gnorm: 1.04 [13:18:50<11:13:40] +[titan] 2025-10-05 11:53:11,384 - root - INFO - step: 21705 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8616 +[titan] 2025-10-05 11:53:11,384 - root - INFO - lr: 2.4819e-05 gnorm: 1.01 [13:19:01<11:13:29] +[titan] 2025-10-05 11:53:22,180 - root - INFO - step: 21710 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,354 tflops: 421.11 mfu: 42.58% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 11:53:22,180 - root - INFO - lr: 2.4810e-05 gnorm: 1.08 [13:19:12<11:13:18] +[titan] 2025-10-05 11:53:33,006 - root - INFO - step: 21715 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 11:53:33,006 - root - INFO - lr: 2.4801e-05 gnorm: 1.07 [13:19:22<11:13:06] +[titan] 2025-10-05 11:53:43,863 - root - INFO - step: 21720 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8820 +[titan] 2025-10-05 11:53:43,863 - root - INFO - lr: 2.4793e-05 gnorm: 1.07 [13:19:33<11:12:55] +[titan] 2025-10-05 11:53:54,726 - root - INFO - step: 21725 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 11:53:54,726 - root - INFO - lr: 2.4784e-05 gnorm: 1.07 [13:19:44<11:12:44] +[titan] 2025-10-05 11:54:05,529 - root - INFO - step: 21730 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 11:54:05,529 - root - INFO - lr: 2.4775e-05 gnorm: 1.10 [13:19:55<11:12:33] +[titan] 2025-10-05 11:54:16,329 - root - INFO - step: 21735 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,341 tflops: 420.94 mfu: 42.56% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8624 +[titan] 2025-10-05 11:54:16,329 - root - INFO - lr: 2.4766e-05 gnorm: 1.08 [13:20:06<11:12:21] +[titan] 2025-10-05 11:54:27,148 - root - INFO - step: 21740 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.19 mfu: 42.49% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9108 +[titan] 2025-10-05 11:54:27,149 - root - INFO - lr: 2.4757e-05 gnorm: 1.08 [13:20:16<11:12:10] +[titan] 2025-10-05 11:54:38,009 - root - INFO - step: 21745 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 11:54:38,009 - root - INFO - lr: 2.4748e-05 gnorm: 1.09 [13:20:27<11:11:59] +[titan] 2025-10-05 11:54:46,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:54:48,886 - root - INFO - step: 21750 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8871 +[titan] 2025-10-05 11:54:48,886 - root - INFO - lr: 2.4739e-05 gnorm: 1.11 [13:20:38<11:11:48] +[titan] 2025-10-05 11:54:59,687 - root - INFO - step: 21755 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 11:54:59,688 - root - INFO - lr: 2.4731e-05 gnorm: 1.03 [13:20:49<11:11:37] +[titan] 2025-10-05 11:55:10,503 - root - INFO - step: 21760 loss: 2.0855 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:55:10,504 - root - INFO - lr: 2.4722e-05 gnorm: 1.08 [13:21:00<11:11:25] +[titan] 2025-10-05 11:55:21,303 - root - INFO - step: 21765 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 11:55:21,303 - root - INFO - lr: 2.4713e-05 gnorm: 1.06 [13:21:11<11:11:14] +[titan] 2025-10-05 11:55:32,128 - root - INFO - step: 21770 loss: 2.0394 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 11:55:32,129 - root - INFO - lr: 2.4704e-05 gnorm: 1.07 [13:21:21<11:11:03] +[titan] 2025-10-05 11:55:42,948 - root - INFO - step: 21775 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 11:55:42,948 - root - INFO - lr: 2.4695e-05 gnorm: 1.13 [13:21:32<11:10:52] +[titan] 2025-10-05 11:55:53,849 - root - INFO - step: 21780 loss: 2.1107 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 11:55:53,849 - root - INFO - lr: 2.4686e-05 gnorm: 1.06 [13:21:43<11:10:41] +[titan] 2025-10-05 11:56:04,670 - root - INFO - step: 21785 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 11:56:04,670 - root - INFO - lr: 2.4677e-05 gnorm: 1.11 [13:21:54<11:10:29] +[titan] 2025-10-05 11:56:15,465 - root - INFO - step: 21790 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,355 tflops: 421.13 mfu: 42.58% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 11:56:15,465 - root - INFO - lr: 2.4669e-05 gnorm: 1.08 [13:22:05<11:10:18] +[titan] 2025-10-05 11:56:26,269 - root - INFO - step: 21795 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 11:56:26,269 - root - INFO - lr: 2.4660e-05 gnorm: 1.04 [13:22:16<11:10:07] +[titan] 2025-10-05 11:56:34,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:56:37,050 - root - INFO - step: 21800 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 11:56:37,050 - root - INFO - lr: 2.4651e-05 gnorm: 1.03 [13:22:26<11:09:56] +[titan] 2025-10-05 11:56:47,848 - root - INFO - step: 21805 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8423 +[titan] 2025-10-05 11:56:47,849 - root - INFO - lr: 2.4642e-05 gnorm: 1.06 [13:22:37<11:09:44] +[titan] 2025-10-05 11:56:58,686 - root - INFO - step: 21810 loss: 2.0632 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 11:56:58,686 - root - INFO - lr: 2.4633e-05 gnorm: 1.08 [13:22:48<11:09:33] +[titan] 2025-10-05 11:57:09,468 - root - INFO - step: 21815 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,391 tflops: 421.63 mfu: 42.63% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 11:57:09,469 - root - INFO - lr: 2.4624e-05 gnorm: 1.04 [13:22:59<11:09:22] +[titan] 2025-10-05 11:57:20,268 - root - INFO - step: 21820 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.56% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 11:57:20,268 - root - INFO - lr: 2.4615e-05 gnorm: 1.06 [13:23:10<11:09:11] +[titan] 2025-10-05 11:57:31,069 - root - INFO - step: 21825 loss: 2.0588 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 11:57:31,069 - root - INFO - lr: 2.4607e-05 gnorm: 1.03 [13:23:20<11:08:59] +[titan] 2025-10-05 11:57:41,865 - root - INFO - step: 21830 loss: 2.1085 memory: 118.84GiB(85.28%) tps: 30,353 tflops: 421.10 mfu: 42.58% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 11:57:41,865 - root - INFO - lr: 2.4598e-05 gnorm: 1.03 [13:23:31<11:08:48] +[titan] 2025-10-05 11:57:52,686 - root - INFO - step: 21835 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:57:52,687 - root - INFO - lr: 2.4589e-05 gnorm: 1.03 [13:23:42<11:08:37] +[titan] 2025-10-05 11:58:03,531 - root - INFO - step: 21840 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:58:03,532 - root - INFO - lr: 2.4580e-05 gnorm: 1.05 [13:23:53<11:08:26] +[titan] 2025-10-05 11:58:14,308 - root - INFO - step: 21845 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,407 tflops: 421.85 mfu: 42.65% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9173 +[titan] 2025-10-05 11:58:14,309 - root - INFO - lr: 2.4571e-05 gnorm: 1.09 [13:24:04<11:08:14] +[titan] 2025-10-05 11:58:22,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:58:25,109 - root - INFO - step: 21850 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:58:25,110 - root - INFO - lr: 2.4562e-05 gnorm: 1.08 [13:24:14<11:08:03] +[titan] 2025-10-05 11:58:35,880 - root - INFO - step: 21855 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,424 tflops: 422.09 mfu: 42.68% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 11:58:35,880 - root - INFO - lr: 2.4554e-05 gnorm: 1.08 [13:24:25<11:07:52] +[titan] 2025-10-05 11:58:46,714 - root - INFO - step: 21860 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:58:46,714 - root - INFO - lr: 2.4545e-05 gnorm: 1.03 [13:24:36<11:07:41] +[titan] 2025-10-05 11:58:57,569 - root - INFO - step: 21865 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8989 +[titan] 2025-10-05 11:58:57,569 - root - INFO - lr: 2.4536e-05 gnorm: 1.07 [13:24:47<11:07:29] +[titan] 2025-10-05 11:59:08,390 - root - INFO - step: 21870 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 11:59:08,390 - root - INFO - lr: 2.4527e-05 gnorm: 1.04 [13:24:58<11:07:18] +[titan] 2025-10-05 11:59:19,246 - root - INFO - step: 21875 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8438 +[titan] 2025-10-05 11:59:19,246 - root - INFO - lr: 2.4518e-05 gnorm: 1.06 [13:25:09<11:07:07] +[titan] 2025-10-05 11:59:30,047 - root - INFO - step: 21880 loss: 2.0852 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:59:30,047 - root - INFO - lr: 2.4509e-05 gnorm: 1.08 [13:25:19<11:06:56] +[titan] 2025-10-05 11:59:40,863 - root - INFO - step: 21885 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 11:59:40,863 - root - INFO - lr: 2.4500e-05 gnorm: 1.05 [13:25:30<11:06:45] +[titan] 2025-10-05 11:59:51,744 - root - INFO - step: 21890 loss: 2.1740 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9204 +[titan] 2025-10-05 11:59:51,744 - root - INFO - lr: 2.4492e-05 gnorm: 1.10 [13:25:41<11:06:33] +[titan] 2025-10-05 12:00:02,570 - root - INFO - step: 21895 loss: 2.2128 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9549 +[titan] 2025-10-05 12:00:02,570 - root - INFO - lr: 2.4483e-05 gnorm: 1.10 [13:25:52<11:06:22] +[titan] 2025-10-05 12:00:11,224 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:00:13,387 - root - INFO - step: 21900 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 12:00:13,387 - root - INFO - lr: 2.4474e-05 gnorm: 1.04 [13:26:03<11:06:11] +[titan] 2025-10-05 12:00:24,246 - root - INFO - step: 21905 loss: 2.1321 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 12:00:24,246 - root - INFO - lr: 2.4465e-05 gnorm: 1.05 [13:26:14<11:06:00] +[titan] 2025-10-05 12:00:35,064 - root - INFO - step: 21910 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 12:00:35,064 - root - INFO - lr: 2.4456e-05 gnorm: 1.06 [13:26:24<11:05:49] +[titan] 2025-10-05 12:00:45,889 - root - INFO - step: 21915 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:00:45,889 - root - INFO - lr: 2.4447e-05 gnorm: 1.07 [13:26:35<11:05:37] +[titan] 2025-10-05 12:00:56,747 - root - INFO - step: 21920 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 12:00:56,748 - root - INFO - lr: 2.4439e-05 gnorm: 1.11 [13:26:46<11:05:26] +[titan] 2025-10-05 12:01:07,566 - root - INFO - step: 21925 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:01:07,567 - root - INFO - lr: 2.4430e-05 gnorm: 1.06 [13:26:57<11:05:15] +[titan] 2025-10-05 12:01:18,394 - root - INFO - step: 21930 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 12:01:18,394 - root - INFO - lr: 2.4421e-05 gnorm: 1.08 [13:27:08<11:05:04] +[titan] 2025-10-05 12:01:29,213 - root - INFO - step: 21935 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 12:01:29,213 - root - INFO - lr: 2.4412e-05 gnorm: 1.05 [13:27:19<11:04:52] +[titan] 2025-10-05 12:01:40,068 - root - INFO - step: 21940 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 12:01:40,068 - root - INFO - lr: 2.4403e-05 gnorm: 1.06 [13:27:29<11:04:41] +[titan] 2025-10-05 12:01:50,925 - root - INFO - step: 21945 loss: 2.1040 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8614 +[titan] 2025-10-05 12:01:50,925 - root - INFO - lr: 2.4394e-05 gnorm: 1.09 [13:27:40<11:04:30] +[titan] 2025-10-05 12:01:59,596 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:02:01,768 - root - INFO - step: 21950 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:02:01,768 - root - INFO - lr: 2.4385e-05 gnorm: 1.08 [13:27:51<11:04:19] +[titan] 2025-10-05 12:02:12,595 - root - INFO - step: 21955 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8121 +[titan] 2025-10-05 12:02:12,595 - root - INFO - lr: 2.4377e-05 gnorm: 1.04 [13:28:02<11:04:08] +[titan] 2025-10-05 12:02:23,415 - root - INFO - step: 21960 loss: 2.0883 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8460 +[titan] 2025-10-05 12:02:23,415 - root - INFO - lr: 2.4368e-05 gnorm: 1.02 [13:28:13<11:03:56] +[titan] 2025-10-05 12:02:34,233 - root - INFO - step: 21965 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:02:34,234 - root - INFO - lr: 2.4359e-05 gnorm: 1.07 [13:28:24<11:03:45] +[titan] 2025-10-05 12:02:45,129 - root - INFO - step: 21970 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8679 +[titan] 2025-10-05 12:02:45,129 - root - INFO - lr: 2.4350e-05 gnorm: 1.06 [13:28:34<11:03:34] +[titan] 2025-10-05 12:02:56,069 - root - INFO - step: 21975 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8574 +[titan] 2025-10-05 12:02:56,069 - root - INFO - lr: 2.4341e-05 gnorm: 1.04 [13:28:45<11:03:23] +[titan] 2025-10-05 12:03:06,899 - root - INFO - step: 21980 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9135 +[titan] 2025-10-05 12:03:06,899 - root - INFO - lr: 2.4332e-05 gnorm: 1.08 [13:28:56<11:03:12] +[titan] 2025-10-05 12:03:17,738 - root - INFO - step: 21985 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:03:17,738 - root - INFO - lr: 2.4324e-05 gnorm: 1.09 [13:29:07<11:03:00] +[titan] 2025-10-05 12:03:28,567 - root - INFO - step: 21990 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 12:03:28,567 - root - INFO - lr: 2.4315e-05 gnorm: 1.02 [13:29:18<11:02:49] +[titan] 2025-10-05 12:03:39,369 - root - INFO - step: 21995 loss: 2.1137 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 12:03:39,369 - root - INFO - lr: 2.4306e-05 gnorm: 1.06 [13:29:29<11:02:38] +[titan] 2025-10-05 12:03:48,026 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:03:50,242 - root - INFO - step: 22000 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:03:50,242 - root - INFO - lr: 2.4297e-05 gnorm: 1.08 [13:29:40<11:02:27] +[titan] 2025-10-05 12:04:01,127 - root - INFO - step: 22005 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:04:01,127 - root - INFO - lr: 2.4288e-05 gnorm: 1.01 [13:29:50<11:02:16] +[titan] 2025-10-05 12:04:11,950 - root - INFO - step: 22010 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8594 +[titan] 2025-10-05 12:04:11,950 - root - INFO - lr: 2.4279e-05 gnorm: 1.06 [13:30:01<11:02:04] +[titan] 2025-10-05 12:04:22,889 - root - INFO - step: 22015 loss: 2.0810 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:04:22,889 - root - INFO - lr: 2.4271e-05 gnorm: 1.07 [13:30:12<11:01:53] +[titan] 2025-10-05 12:04:25,236 - root - INFO - Dumping profiler traces at step 22016 +[titan] 2025-10-05 12:04:25,274 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:04:33,972 - root - INFO - step: 22020 loss: 2.1387 memory: 118.84GiB(85.28%) tps: 29,567 tflops: 410.19 mfu: 41.48% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8891 +[titan] 2025-10-05 12:04:33,972 - root - INFO - lr: 2.4262e-05 gnorm: 1.03 [13:30:23<11:01:42] +[titan] 2025-10-05 12:04:44,810 - root - INFO - step: 22025 loss: 2.1465 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 12:04:44,810 - root - INFO - lr: 2.4253e-05 gnorm: 1.08 [13:30:34<11:01:31] +[titan] 2025-10-05 12:04:55,694 - root - INFO - step: 22030 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 12:04:55,694 - root - INFO - lr: 2.4244e-05 gnorm: 1.05 [13:30:45<11:01:20] +[titan] 2025-10-05 12:05:06,571 - root - INFO - step: 22035 loss: 2.0627 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8224 +[titan] 2025-10-05 12:05:06,571 - root - INFO - lr: 2.4235e-05 gnorm: 1.04 [13:30:56<11:01:09] +[titan] 2025-10-05 12:05:17,439 - root - INFO - step: 22040 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 12:05:17,439 - root - INFO - lr: 2.4226e-05 gnorm: 1.05 [13:31:07<11:00:58] +[titan] 2025-10-05 12:05:28,290 - root - INFO - step: 22045 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 12:05:28,290 - root - INFO - lr: 2.4218e-05 gnorm: 1.06 [13:31:18<11:00:46] +[titan] 2025-10-05 12:05:36,955 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:05:39,138 - root - INFO - step: 22050 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:05:39,138 - root - INFO - lr: 2.4209e-05 gnorm: 1.06 [13:31:28<11:00:35] +[titan] 2025-10-05 12:05:49,987 - root - INFO - step: 22055 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 12:05:49,987 - root - INFO - lr: 2.4200e-05 gnorm: 1.05 [13:31:39<11:00:24] +[titan] 2025-10-05 12:06:00,891 - root - INFO - step: 22060 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 12:06:00,891 - root - INFO - lr: 2.4191e-05 gnorm: 1.10 [13:31:50<11:00:13] +[titan] 2025-10-05 12:06:11,774 - root - INFO - step: 22065 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:06:11,775 - root - INFO - lr: 2.4182e-05 gnorm: 1.05 [13:32:01<11:00:02] +[titan] 2025-10-05 12:06:22,629 - root - INFO - step: 22070 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:06:22,629 - root - INFO - lr: 2.4173e-05 gnorm: 1.06 [13:32:12<10:59:50] +[titan] 2025-10-05 12:06:33,471 - root - INFO - step: 22075 loss: 2.0401 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8033 +[titan] 2025-10-05 12:06:33,471 - root - INFO - lr: 2.4165e-05 gnorm: 1.07 [13:32:23<10:59:39] +[titan] 2025-10-05 12:06:44,307 - root - INFO - step: 22080 loss: 2.1317 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8847 +[titan] 2025-10-05 12:06:44,307 - root - INFO - lr: 2.4156e-05 gnorm: 1.08 [13:32:34<10:59:28] +[titan] 2025-10-05 12:06:55,150 - root - INFO - step: 22085 loss: 2.0997 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8559 +[titan] 2025-10-05 12:06:55,151 - root - INFO - lr: 2.4147e-05 gnorm: 1.06 [13:32:44<10:59:17] +[titan] 2025-10-05 12:07:06,029 - root - INFO - step: 22090 loss: 2.1094 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 12:07:06,030 - root - INFO - lr: 2.4138e-05 gnorm: 1.09 [13:32:55<10:59:06] +[titan] 2025-10-05 12:07:16,895 - root - INFO - step: 22095 loss: 2.1217 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:07:16,895 - root - INFO - lr: 2.4129e-05 gnorm: 1.14 [13:33:06<10:58:55] +[titan] 2025-10-05 12:07:25,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:07:27,824 - root - INFO - step: 22100 loss: 2.1006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 12:07:27,824 - root - INFO - lr: 2.4121e-05 gnorm: 1.06 [13:33:17<10:58:43] +[titan] 2025-10-05 12:07:38,689 - root - INFO - step: 22105 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 12:07:38,689 - root - INFO - lr: 2.4112e-05 gnorm: 1.07 [13:33:28<10:58:32] +[titan] 2025-10-05 12:07:49,564 - root - INFO - step: 22110 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 12:07:49,564 - root - INFO - lr: 2.4103e-05 gnorm: 1.12 [13:33:39<10:58:21] +[titan] 2025-10-05 12:08:00,491 - root - INFO - step: 22115 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8523 +[titan] 2025-10-05 12:08:00,491 - root - INFO - lr: 2.4094e-05 gnorm: 1.06 [13:33:50<10:58:10] +[titan] 2025-10-05 12:08:11,388 - root - INFO - step: 22120 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 12:08:11,388 - root - INFO - lr: 2.4085e-05 gnorm: 1.07 [13:34:01<10:57:59] +[titan] 2025-10-05 12:08:22,246 - root - INFO - step: 22125 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 12:08:22,246 - root - INFO - lr: 2.4076e-05 gnorm: 1.08 [13:34:12<10:57:48] +[titan] 2025-10-05 12:08:33,148 - root - INFO - step: 22130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:08:33,148 - root - INFO - lr: 2.4068e-05 gnorm: 1.08 [13:34:22<10:57:36] +[titan] 2025-10-05 12:08:44,031 - root - INFO - step: 22135 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:08:44,031 - root - INFO - lr: 2.4059e-05 gnorm: 1.02 [13:34:33<10:57:25] +[titan] 2025-10-05 12:08:54,887 - root - INFO - step: 22140 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:08:54,888 - root - INFO - lr: 2.4050e-05 gnorm: 1.06 [13:34:44<10:57:14] +[titan] 2025-10-05 12:09:05,796 - root - INFO - step: 22145 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 12:09:05,796 - root - INFO - lr: 2.4041e-05 gnorm: 1.02 [13:34:55<10:57:03] +[titan] 2025-10-05 12:09:14,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:09:16,655 - root - INFO - step: 22150 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 12:09:16,655 - root - INFO - lr: 2.4032e-05 gnorm: 1.06 [13:35:06<10:56:52] +[titan] 2025-10-05 12:09:27,522 - root - INFO - step: 22155 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8661 +[titan] 2025-10-05 12:09:27,522 - root - INFO - lr: 2.4024e-05 gnorm: 1.02 [13:35:17<10:56:41] +[titan] 2025-10-05 12:09:38,432 - root - INFO - step: 22160 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 12:09:38,432 - root - INFO - lr: 2.4015e-05 gnorm: 1.04 [13:35:28<10:56:29] +[titan] 2025-10-05 12:09:49,301 - root - INFO - step: 22165 loss: 2.1166 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:09:49,302 - root - INFO - lr: 2.4006e-05 gnorm: 1.12 [13:35:39<10:56:18] +[titan] 2025-10-05 12:10:00,224 - root - INFO - step: 22170 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:10:00,224 - root - INFO - lr: 2.3997e-05 gnorm: 1.09 [13:35:50<10:56:07] +[titan] 2025-10-05 12:10:11,087 - root - INFO - step: 22175 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:10:11,087 - root - INFO - lr: 2.3988e-05 gnorm: 1.05 [13:36:00<10:55:56] +[titan] 2025-10-05 12:10:21,968 - root - INFO - step: 22180 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9855 +[titan] 2025-10-05 12:10:21,968 - root - INFO - lr: 2.3979e-05 gnorm: 1.15 [13:36:11<10:55:45] +[titan] 2025-10-05 12:10:32,857 - root - INFO - step: 22185 loss: 2.1657 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9136 +[titan] 2025-10-05 12:10:32,857 - root - INFO - lr: 2.3971e-05 gnorm: 1.08 [13:36:22<10:55:34] +[titan] 2025-10-05 12:10:43,721 - root - INFO - step: 22190 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:10:43,721 - root - INFO - lr: 2.3962e-05 gnorm: 1.07 [13:36:33<10:55:22] +[titan] 2025-10-05 12:10:54,626 - root - INFO - step: 22195 loss: 2.1296 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 12:10:54,626 - root - INFO - lr: 2.3953e-05 gnorm: 1.11 [13:36:44<10:55:11] +[titan] 2025-10-05 12:11:03,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:11:05,537 - root - INFO - step: 22200 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 12:11:05,537 - root - INFO - lr: 2.3944e-05 gnorm: 1.09 [13:36:55<10:55:00] +[titan] 2025-10-05 12:11:16,410 - root - INFO - step: 22205 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 12:11:16,410 - root - INFO - lr: 2.3935e-05 gnorm: 1.10 [13:37:06<10:54:49] +[titan] 2025-10-05 12:11:27,277 - root - INFO - step: 22210 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 12:11:27,277 - root - INFO - lr: 2.3927e-05 gnorm: 1.10 [13:37:17<10:54:38] +[titan] 2025-10-05 12:11:38,149 - root - INFO - step: 22215 loss: 2.0858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8426 +[titan] 2025-10-05 12:11:38,149 - root - INFO - lr: 2.3918e-05 gnorm: 1.08 [13:37:27<10:54:27] +[titan] 2025-10-05 12:11:49,017 - root - INFO - step: 22220 loss: 2.1032 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 12:11:49,017 - root - INFO - lr: 2.3909e-05 gnorm: 1.08 [13:37:38<10:54:15] +[titan] 2025-10-05 12:11:59,905 - root - INFO - step: 22225 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8675 +[titan] 2025-10-05 12:11:59,905 - root - INFO - lr: 2.3900e-05 gnorm: 1.08 [13:37:49<10:54:04] +[titan] 2025-10-05 12:12:10,823 - root - INFO - step: 22230 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:12:10,823 - root - INFO - lr: 2.3891e-05 gnorm: 1.09 [13:38:00<10:53:53] +[titan] 2025-10-05 12:12:21,651 - root - INFO - step: 22235 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 12:12:21,651 - root - INFO - lr: 2.3883e-05 gnorm: 1.08 [13:38:11<10:53:42] +[titan] 2025-10-05 12:12:32,529 - root - INFO - step: 22240 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 12:12:32,530 - root - INFO - lr: 2.3874e-05 gnorm: 1.11 [13:38:22<10:53:31] +[titan] 2025-10-05 12:12:43,387 - root - INFO - step: 22245 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 12:12:43,388 - root - INFO - lr: 2.3865e-05 gnorm: 1.06 [13:38:33<10:53:20] +[titan] 2025-10-05 12:12:52,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:12:54,236 - root - INFO - step: 22250 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 12:12:54,236 - root - INFO - lr: 2.3856e-05 gnorm: 1.09 [13:38:44<10:53:08] +[titan] 2025-10-05 12:13:05,203 - root - INFO - step: 22255 loss: 2.2062 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 12:13:05,204 - root - INFO - lr: 2.3847e-05 gnorm: 1.10 [13:38:54<10:52:57] +[titan] 2025-10-05 12:13:16,105 - root - INFO - step: 22260 loss: 2.0839 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8418 +[titan] 2025-10-05 12:13:16,105 - root - INFO - lr: 2.3838e-05 gnorm: 1.06 [13:39:05<10:52:46] +[titan] 2025-10-05 12:13:26,969 - root - INFO - step: 22265 loss: 2.1143 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 12:13:26,969 - root - INFO - lr: 2.3830e-05 gnorm: 1.09 [13:39:16<10:52:35] +[titan] 2025-10-05 12:13:37,833 - root - INFO - step: 22270 loss: 2.1822 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 12:13:37,833 - root - INFO - lr: 2.3821e-05 gnorm: 1.10 [13:39:27<10:52:24] +[titan] 2025-10-05 12:13:48,696 - root - INFO - step: 22275 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 12:13:48,696 - root - INFO - lr: 2.3812e-05 gnorm: 1.05 [13:39:38<10:52:13] +[titan] 2025-10-05 12:13:59,557 - root - INFO - step: 22280 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 12:13:59,557 - root - INFO - lr: 2.3803e-05 gnorm: 1.06 [13:39:49<10:52:01] +[titan] 2025-10-05 12:14:10,442 - root - INFO - step: 22285 loss: 2.1340 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:14:10,442 - root - INFO - lr: 2.3794e-05 gnorm: 1.08 [13:40:00<10:51:50] +[titan] 2025-10-05 12:14:21,358 - root - INFO - step: 22290 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:14:21,358 - root - INFO - lr: 2.3786e-05 gnorm: 1.08 [13:40:11<10:51:39] +[titan] 2025-10-05 12:14:32,225 - root - INFO - step: 22295 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 12:14:32,225 - root - INFO - lr: 2.3777e-05 gnorm: 1.07 [13:40:22<10:51:28] +[titan] 2025-10-05 12:14:40,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:14:43,076 - root - INFO - step: 22300 loss: 2.0949 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 12:14:43,076 - root - INFO - lr: 2.3768e-05 gnorm: 1.08 [13:40:32<10:51:17] +[titan] 2025-10-05 12:14:53,944 - root - INFO - step: 22305 loss: 2.2081 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 12:14:53,944 - root - INFO - lr: 2.3759e-05 gnorm: 1.05 [13:40:43<10:51:06] +[titan] 2025-10-05 12:15:04,844 - root - INFO - step: 22310 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 12:15:04,844 - root - INFO - lr: 2.3750e-05 gnorm: 1.11 [13:40:54<10:50:54] +[titan] 2025-10-05 12:15:15,692 - root - INFO - step: 22315 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:15:15,693 - root - INFO - lr: 2.3742e-05 gnorm: 1.06 [13:41:05<10:50:43] +[titan] 2025-10-05 12:15:26,567 - root - INFO - step: 22320 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 12:15:26,567 - root - INFO - lr: 2.3733e-05 gnorm: 1.04 [13:41:16<10:50:32] +[titan] 2025-10-05 12:15:37,421 - root - INFO - step: 22325 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 12:15:37,422 - root - INFO - lr: 2.3724e-05 gnorm: 1.08 [13:41:27<10:50:21] +[titan] 2025-10-05 12:15:48,281 - root - INFO - step: 22330 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 12:15:48,281 - root - INFO - lr: 2.3715e-05 gnorm: 1.08 [13:41:38<10:50:10] +[titan] 2025-10-05 12:15:59,149 - root - INFO - step: 22335 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:15:59,149 - root - INFO - lr: 2.3706e-05 gnorm: 1.04 [13:41:48<10:49:58] +[titan] 2025-10-05 12:16:10,046 - root - INFO - step: 22340 loss: 2.0616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 12:16:10,046 - root - INFO - lr: 2.3698e-05 gnorm: 1.10 [13:41:59<10:49:47] +[titan] 2025-10-05 12:16:20,913 - root - INFO - step: 22345 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 12:16:20,913 - root - INFO - lr: 2.3689e-05 gnorm: 1.09 [13:42:10<10:49:36] +[titan] 2025-10-05 12:16:29,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:16:31,773 - root - INFO - step: 22350 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:16:31,773 - root - INFO - lr: 2.3680e-05 gnorm: 1.09 [13:42:21<10:49:25] +[titan] 2025-10-05 12:16:42,676 - root - INFO - step: 22355 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8541 +[titan] 2025-10-05 12:16:42,676 - root - INFO - lr: 2.3671e-05 gnorm: 1.06 [13:42:32<10:49:14] +[titan] 2025-10-05 12:16:53,529 - root - INFO - step: 22360 loss: 2.1363 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:16:53,529 - root - INFO - lr: 2.3662e-05 gnorm: 1.08 [13:42:43<10:49:03] +[titan] 2025-10-05 12:17:04,373 - root - INFO - step: 22365 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:17:04,373 - root - INFO - lr: 2.3654e-05 gnorm: 1.08 [13:42:54<10:48:51] +[titan] 2025-10-05 12:17:15,272 - root - INFO - step: 22370 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:17:15,272 - root - INFO - lr: 2.3645e-05 gnorm: 1.06 [13:43:05<10:48:40] +[titan] 2025-10-05 12:17:26,145 - root - INFO - step: 22375 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 12:17:26,145 - root - INFO - lr: 2.3636e-05 gnorm: 1.05 [13:43:15<10:48:29] +[titan] 2025-10-05 12:17:36,995 - root - INFO - step: 22380 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 12:17:36,995 - root - INFO - lr: 2.3627e-05 gnorm: 1.07 [13:43:26<10:48:18] +[titan] 2025-10-05 12:17:47,877 - root - INFO - step: 22385 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:17:47,877 - root - INFO - lr: 2.3619e-05 gnorm: 1.08 [13:43:37<10:48:07] +[titan] 2025-10-05 12:17:58,732 - root - INFO - step: 22390 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8286 +[titan] 2025-10-05 12:17:58,732 - root - INFO - lr: 2.3610e-05 gnorm: 1.02 [13:43:48<10:47:56] +[titan] 2025-10-05 12:18:09,619 - root - INFO - step: 22395 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8656 +[titan] 2025-10-05 12:18:09,619 - root - INFO - lr: 2.3601e-05 gnorm: 1.08 [13:43:59<10:47:44] +[titan] 2025-10-05 12:18:18,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:18:20,460 - root - INFO - step: 22400 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8304 +[titan] 2025-10-05 12:18:20,461 - root - INFO - lr: 2.3592e-05 gnorm: 1.03 [13:44:10<10:47:33] +[titan] 2025-10-05 12:18:31,320 - root - INFO - step: 22405 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:18:31,320 - root - INFO - lr: 2.3583e-05 gnorm: 1.06 [13:44:21<10:47:22] +[titan] 2025-10-05 12:18:42,176 - root - INFO - step: 22410 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 12:18:42,177 - root - INFO - lr: 2.3575e-05 gnorm: 1.06 [13:44:31<10:47:11] +[titan] 2025-10-05 12:18:53,029 - root - INFO - step: 22415 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 12:18:53,029 - root - INFO - lr: 2.3566e-05 gnorm: 1.07 [13:44:42<10:47:00] +[titan] 2025-10-05 12:19:03,920 - root - INFO - step: 22420 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 12:19:03,920 - root - INFO - lr: 2.3557e-05 gnorm: 1.04 [13:44:53<10:46:49] +[titan] 2025-10-05 12:19:14,809 - root - INFO - step: 22425 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 12:19:14,809 - root - INFO - lr: 2.3548e-05 gnorm: 1.08 [13:45:04<10:46:37] +[titan] 2025-10-05 12:19:25,666 - root - INFO - step: 22430 loss: 2.1054 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 12:19:25,666 - root - INFO - lr: 2.3539e-05 gnorm: 1.11 [13:45:15<10:46:26] +[titan] 2025-10-05 12:19:36,537 - root - INFO - step: 22435 loss: 2.0990 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:19:36,537 - root - INFO - lr: 2.3531e-05 gnorm: 1.07 [13:45:26<10:46:15] +[titan] 2025-10-05 12:19:47,408 - root - INFO - step: 22440 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:19:47,408 - root - INFO - lr: 2.3522e-05 gnorm: 1.07 [13:45:37<10:46:04] +[titan] 2025-10-05 12:19:58,267 - root - INFO - step: 22445 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:19:58,267 - root - INFO - lr: 2.3513e-05 gnorm: 1.04 [13:45:48<10:45:53] +[titan] 2025-10-05 12:20:06,985 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:20:09,202 - root - INFO - step: 22450 loss: 2.1175 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:20:09,202 - root - INFO - lr: 2.3504e-05 gnorm: 1.08 [13:45:58<10:45:42] +[titan] 2025-10-05 12:20:20,059 - root - INFO - step: 22455 loss: 2.1341 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8869 +[titan] 2025-10-05 12:20:20,059 - root - INFO - lr: 2.3495e-05 gnorm: 1.04 [13:46:09<10:45:30] +[titan] 2025-10-05 12:20:30,913 - root - INFO - step: 22460 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:20:30,913 - root - INFO - lr: 2.3487e-05 gnorm: 1.14 [13:46:20<10:45:19] +[titan] 2025-10-05 12:20:41,788 - root - INFO - step: 22465 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8576 +[titan] 2025-10-05 12:20:41,788 - root - INFO - lr: 2.3478e-05 gnorm: 1.02 [13:46:31<10:45:08] +[titan] 2025-10-05 12:20:52,649 - root - INFO - step: 22470 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 12:20:52,649 - root - INFO - lr: 2.3469e-05 gnorm: 1.04 [13:46:42<10:44:57] +[titan] 2025-10-05 12:21:03,515 - root - INFO - step: 22475 loss: 2.0698 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8297 +[titan] 2025-10-05 12:21:03,515 - root - INFO - lr: 2.3460e-05 gnorm: 1.09 [13:46:53<10:44:46] +[titan] 2025-10-05 12:21:14,426 - root - INFO - step: 22480 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8596 +[titan] 2025-10-05 12:21:14,426 - root - INFO - lr: 2.3452e-05 gnorm: 1.07 [13:47:04<10:44:35] +[titan] 2025-10-05 12:21:25,291 - root - INFO - step: 22485 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8221 +[titan] 2025-10-05 12:21:25,291 - root - INFO - lr: 2.3443e-05 gnorm: 1.07 [13:47:15<10:44:23] +[titan] 2025-10-05 12:21:36,157 - root - INFO - step: 22490 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:21:36,157 - root - INFO - lr: 2.3434e-05 gnorm: 1.07 [13:47:25<10:44:12] +[titan] 2025-10-05 12:21:47,031 - root - INFO - step: 22495 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8952 +[titan] 2025-10-05 12:21:47,032 - root - INFO - lr: 2.3425e-05 gnorm: 1.05 [13:47:36<10:44:01] +[titan] 2025-10-05 12:21:55,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:21:57,921 - root - INFO - step: 22500 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 12:21:57,921 - root - INFO - lr: 2.3416e-05 gnorm: 1.10 [13:47:47<10:43:50] +[titan] 2025-10-05 12:22:08,788 - root - INFO - step: 22505 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 12:22:08,789 - root - INFO - lr: 2.3408e-05 gnorm: 1.07 [13:47:58<10:43:39] +[titan] 2025-10-05 12:22:19,750 - root - INFO - step: 22510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 29,895 tflops: 414.75 mfu: 41.94% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:22:19,750 - root - INFO - lr: 2.3399e-05 gnorm: 1.09 [13:48:09<10:43:28] +[titan] 2025-10-05 12:22:30,622 - root - INFO - step: 22515 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:22:30,622 - root - INFO - lr: 2.3390e-05 gnorm: 1.11 [13:48:20<10:43:16] +[titan] 2025-10-05 12:22:41,466 - root - INFO - step: 22520 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 12:22:41,466 - root - INFO - lr: 2.3381e-05 gnorm: 1.06 [13:48:31<10:43:05] +[titan] 2025-10-05 12:22:52,408 - root - INFO - step: 22525 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:22:52,408 - root - INFO - lr: 2.3373e-05 gnorm: 1.06 [13:48:42<10:42:54] +[titan] 2025-10-05 12:22:59,146 - root - INFO - Dumping profiler traces at step 22528 +[titan] 2025-10-05 12:22:59,191 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:23:03,544 - root - INFO - step: 22530 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,427 tflops: 408.25 mfu: 41.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 12:23:03,544 - root - INFO - lr: 2.3364e-05 gnorm: 1.08 [13:48:53<10:42:43] +[titan] 2025-10-05 12:23:14,458 - root - INFO - step: 22535 loss: 2.1311 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8827 +[titan] 2025-10-05 12:23:14,458 - root - INFO - lr: 2.3355e-05 gnorm: 1.33 [13:49:04<10:42:32] +[titan] 2025-10-05 12:23:25,322 - root - INFO - step: 22540 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 12:23:25,322 - root - INFO - lr: 2.3346e-05 gnorm: 1.10 [13:49:15<10:42:21] +[titan] 2025-10-05 12:23:36,189 - root - INFO - step: 22545 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 12:23:36,190 - root - INFO - lr: 2.3338e-05 gnorm: 1.04 [13:49:25<10:42:10] +[titan] 2025-10-05 12:23:44,856 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:23:47,032 - root - INFO - step: 22550 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 12:23:47,032 - root - INFO - lr: 2.3329e-05 gnorm: 1.08 [13:49:36<10:41:59] +[titan] 2025-10-05 12:23:57,904 - root - INFO - step: 22555 loss: 2.0817 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:23:57,904 - root - INFO - lr: 2.3320e-05 gnorm: 1.06 [13:49:47<10:41:47] +[titan] 2025-10-05 12:24:08,764 - root - INFO - step: 22560 loss: 2.0564 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 12:24:08,764 - root - INFO - lr: 2.3311e-05 gnorm: 1.08 [13:49:58<10:41:36] +[titan] 2025-10-05 12:24:19,652 - root - INFO - step: 22565 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8615 +[titan] 2025-10-05 12:24:19,652 - root - INFO - lr: 2.3302e-05 gnorm: 1.10 [13:50:09<10:41:25] +[titan] 2025-10-05 12:24:30,523 - root - INFO - step: 22570 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 12:24:30,523 - root - INFO - lr: 2.3294e-05 gnorm: 1.05 [13:50:20<10:41:14] +[titan] 2025-10-05 12:24:41,397 - root - INFO - step: 22575 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 12:24:41,397 - root - INFO - lr: 2.3285e-05 gnorm: 1.06 [13:50:31<10:41:03] +[titan] 2025-10-05 12:24:52,282 - root - INFO - step: 22580 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8584 +[titan] 2025-10-05 12:24:52,283 - root - INFO - lr: 2.3276e-05 gnorm: 1.02 [13:50:42<10:40:52] +[titan] 2025-10-05 12:25:03,150 - root - INFO - step: 22585 loss: 2.0722 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:25:03,150 - root - INFO - lr: 2.3267e-05 gnorm: 1.07 [13:50:52<10:40:40] +[titan] 2025-10-05 12:25:14,069 - root - INFO - step: 22590 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 12:25:14,069 - root - INFO - lr: 2.3259e-05 gnorm: 1.07 [13:51:03<10:40:29] +[titan] 2025-10-05 12:25:24,944 - root - INFO - step: 22595 loss: 2.0307 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 12:25:24,945 - root - INFO - lr: 2.3250e-05 gnorm: 1.06 [13:51:14<10:40:18] +[titan] 2025-10-05 12:25:33,616 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:25:35,800 - root - INFO - step: 22600 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 12:25:35,800 - root - INFO - lr: 2.3241e-05 gnorm: 1.09 [13:51:25<10:40:07] +[titan] 2025-10-05 12:25:46,666 - root - INFO - step: 22605 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 12:25:46,666 - root - INFO - lr: 2.3232e-05 gnorm: 1.08 [13:51:36<10:39:56] +[titan] 2025-10-05 12:25:57,545 - root - INFO - step: 22610 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8253 +[titan] 2025-10-05 12:25:57,545 - root - INFO - lr: 2.3224e-05 gnorm: 1.04 [13:51:47<10:39:45] +[titan] 2025-10-05 12:26:08,410 - root - INFO - step: 22615 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 12:26:08,411 - root - INFO - lr: 2.3215e-05 gnorm: 1.05 [13:51:58<10:39:33] +[titan] 2025-10-05 12:26:19,368 - root - INFO - step: 22620 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 12:26:19,368 - root - INFO - lr: 2.3206e-05 gnorm: 1.13 [13:52:09<10:39:22] +[titan] 2025-10-05 12:26:30,266 - root - INFO - step: 22625 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8493 +[titan] 2025-10-05 12:26:30,266 - root - INFO - lr: 2.3197e-05 gnorm: 1.07 [13:52:20<10:39:11] +[titan] 2025-10-05 12:26:41,175 - root - INFO - step: 22630 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 12:26:41,175 - root - INFO - lr: 2.3189e-05 gnorm: 1.06 [13:52:30<10:39:00] +[titan] 2025-10-05 12:26:52,070 - root - INFO - step: 22635 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8261 +[titan] 2025-10-05 12:26:52,070 - root - INFO - lr: 2.3180e-05 gnorm: 1.03 [13:52:41<10:38:49] +[titan] 2025-10-05 12:27:02,956 - root - INFO - step: 22640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 12:27:02,956 - root - INFO - lr: 2.3171e-05 gnorm: 1.05 [13:52:52<10:38:38] +[titan] 2025-10-05 12:27:13,822 - root - INFO - step: 22645 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 12:27:13,822 - root - INFO - lr: 2.3162e-05 gnorm: 1.02 [13:53:03<10:38:27] +[titan] 2025-10-05 12:27:22,552 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:27:24,736 - root - INFO - step: 22650 loss: 2.0501 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 12:27:24,737 - root - INFO - lr: 2.3153e-05 gnorm: 1.10 [13:53:14<10:38:16] +[titan] 2025-10-05 12:27:35,626 - root - INFO - step: 22655 loss: 2.0835 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 12:27:35,626 - root - INFO - lr: 2.3145e-05 gnorm: 1.05 [13:53:25<10:38:04] +[titan] 2025-10-05 12:27:46,518 - root - INFO - step: 22660 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:27:46,518 - root - INFO - lr: 2.3136e-05 gnorm: 1.11 [13:53:36<10:37:53] +[titan] 2025-10-05 12:27:57,386 - root - INFO - step: 22665 loss: 2.1687 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9171 +[titan] 2025-10-05 12:27:57,386 - root - INFO - lr: 2.3127e-05 gnorm: 1.10 [13:53:47<10:37:42] +[titan] 2025-10-05 12:28:08,227 - root - INFO - step: 22670 loss: 2.0850 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8425 +[titan] 2025-10-05 12:28:08,227 - root - INFO - lr: 2.3118e-05 gnorm: 1.05 [13:53:57<10:37:31] +[titan] 2025-10-05 12:28:19,140 - root - INFO - step: 22675 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:28:19,140 - root - INFO - lr: 2.3110e-05 gnorm: 1.08 [13:54:08<10:37:20] +[titan] 2025-10-05 12:28:30,016 - root - INFO - step: 22680 loss: 2.1382 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 12:28:30,016 - root - INFO - lr: 2.3101e-05 gnorm: 1.16 [13:54:19<10:37:09] +[titan] 2025-10-05 12:28:40,902 - root - INFO - step: 22685 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8186 +[titan] 2025-10-05 12:28:40,902 - root - INFO - lr: 2.3092e-05 gnorm: 1.08 [13:54:30<10:36:57] +[titan] 2025-10-05 12:28:51,765 - root - INFO - step: 22690 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 12:28:51,766 - root - INFO - lr: 2.3083e-05 gnorm: 1.03 [13:54:41<10:36:46] +[titan] 2025-10-05 12:29:02,626 - root - INFO - step: 22695 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:29:02,626 - root - INFO - lr: 2.3075e-05 gnorm: 1.06 [13:54:52<10:36:35] +[titan] 2025-10-05 12:29:11,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:29:13,457 - root - INFO - step: 22700 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 12:29:13,457 - root - INFO - lr: 2.3066e-05 gnorm: 1.07 [13:55:03<10:36:24] +[titan] 2025-10-05 12:29:24,373 - root - INFO - step: 22705 loss: 2.0814 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:29:24,373 - root - INFO - lr: 2.3057e-05 gnorm: 1.08 [13:55:14<10:36:13] +[titan] 2025-10-05 12:29:35,226 - root - INFO - step: 22710 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 12:29:35,226 - root - INFO - lr: 2.3048e-05 gnorm: 1.06 [13:55:24<10:36:02] +[titan] 2025-10-05 12:29:46,086 - root - INFO - step: 22715 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8506 +[titan] 2025-10-05 12:29:46,087 - root - INFO - lr: 2.3040e-05 gnorm: 1.12 [13:55:35<10:35:50] +[titan] 2025-10-05 12:29:56,956 - root - INFO - step: 22720 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 12:29:56,956 - root - INFO - lr: 2.3031e-05 gnorm: 1.05 [13:55:46<10:35:39] +[titan] 2025-10-05 12:30:07,794 - root - INFO - step: 22725 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8635 +[titan] 2025-10-05 12:30:07,794 - root - INFO - lr: 2.3022e-05 gnorm: 1.08 [13:55:57<10:35:28] +[titan] 2025-10-05 12:30:18,701 - root - INFO - step: 22730 loss: 2.0684 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:30:18,702 - root - INFO - lr: 2.3013e-05 gnorm: 1.06 [13:56:08<10:35:17] +[titan] 2025-10-05 12:30:29,526 - root - INFO - step: 22735 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9056 +[titan] 2025-10-05 12:30:29,526 - root - INFO - lr: 2.3005e-05 gnorm: 1.08 [13:56:19<10:35:06] +[titan] 2025-10-05 12:30:40,389 - root - INFO - step: 22740 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 12:30:40,389 - root - INFO - lr: 2.2996e-05 gnorm: 1.08 [13:56:30<10:34:55] +[titan] 2025-10-05 12:30:51,240 - root - INFO - step: 22745 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 12:30:51,240 - root - INFO - lr: 2.2987e-05 gnorm: 1.07 [13:56:40<10:34:43] +[titan] 2025-10-05 12:30:59,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:31:02,123 - root - INFO - step: 22750 loss: 2.1101 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:31:02,123 - root - INFO - lr: 2.2978e-05 gnorm: 1.09 [13:56:51<10:34:32] +[titan] 2025-10-05 12:31:12,994 - root - INFO - step: 22755 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8489 +[titan] 2025-10-05 12:31:12,994 - root - INFO - lr: 2.2970e-05 gnorm: 1.07 [13:57:02<10:34:21] +[titan] 2025-10-05 12:31:23,866 - root - INFO - step: 22760 loss: 2.0378 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 12:31:23,866 - root - INFO - lr: 2.2961e-05 gnorm: 1.07 [13:57:13<10:34:10] +[titan] 2025-10-05 12:31:34,726 - root - INFO - step: 22765 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8417 +[titan] 2025-10-05 12:31:34,726 - root - INFO - lr: 2.2952e-05 gnorm: 1.06 [13:57:24<10:33:59] +[titan] 2025-10-05 12:31:45,584 - root - INFO - step: 22770 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 12:31:45,585 - root - INFO - lr: 2.2944e-05 gnorm: 1.08 [13:57:35<10:33:48] +[titan] 2025-10-05 12:31:56,424 - root - INFO - step: 22775 loss: 2.0368 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 12:31:56,424 - root - INFO - lr: 2.2935e-05 gnorm: 1.06 [13:57:46<10:33:36] +[titan] 2025-10-05 12:32:07,271 - root - INFO - step: 22780 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8625 +[titan] 2025-10-05 12:32:07,271 - root - INFO - lr: 2.2926e-05 gnorm: 1.09 [13:57:57<10:33:25] +[titan] 2025-10-05 12:32:18,125 - root - INFO - step: 22785 loss: 2.0749 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:32:18,126 - root - INFO - lr: 2.2917e-05 gnorm: 1.06 [13:58:07<10:33:14] +[titan] 2025-10-05 12:32:29,041 - root - INFO - step: 22790 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 12:32:29,041 - root - INFO - lr: 2.2909e-05 gnorm: 1.01 [13:58:18<10:33:03] +[titan] 2025-10-05 12:32:39,901 - root - INFO - step: 22795 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8495 +[titan] 2025-10-05 12:32:39,901 - root - INFO - lr: 2.2900e-05 gnorm: 1.05 [13:58:29<10:32:52] +[titan] 2025-10-05 12:32:48,566 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:32:50,763 - root - INFO - step: 22800 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:32:50,764 - root - INFO - lr: 2.2891e-05 gnorm: 1.04 [13:58:40<10:32:41] +[titan] 2025-10-05 12:33:01,622 - root - INFO - step: 22805 loss: 2.0900 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8482 +[titan] 2025-10-05 12:33:01,622 - root - INFO - lr: 2.2882e-05 gnorm: 1.02 [13:58:51<10:32:29] +[titan] 2025-10-05 12:33:12,469 - root - INFO - step: 22810 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 12:33:12,469 - root - INFO - lr: 2.2874e-05 gnorm: 1.05 [13:59:02<10:32:18] +[titan] 2025-10-05 12:33:23,367 - root - INFO - step: 22815 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8893 +[titan] 2025-10-05 12:33:23,367 - root - INFO - lr: 2.2865e-05 gnorm: 1.08 [13:59:13<10:32:07] +[titan] 2025-10-05 12:33:34,205 - root - INFO - step: 22820 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:33:34,206 - root - INFO - lr: 2.2856e-05 gnorm: 1.08 [13:59:23<10:31:56] +[titan] 2025-10-05 12:33:45,062 - root - INFO - step: 22825 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:33:45,062 - root - INFO - lr: 2.2847e-05 gnorm: 1.06 [13:59:34<10:31:45] +[titan] 2025-10-05 12:33:55,902 - root - INFO - step: 22830 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 12:33:55,902 - root - INFO - lr: 2.2839e-05 gnorm: 1.08 [13:59:45<10:31:34] +[titan] 2025-10-05 12:34:06,747 - root - INFO - step: 22835 loss: 2.0824 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 12:34:06,747 - root - INFO - lr: 2.2830e-05 gnorm: 1.04 [13:59:56<10:31:22] +[titan] 2025-10-05 12:34:17,586 - root - INFO - step: 22840 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8552 +[titan] 2025-10-05 12:34:17,586 - root - INFO - lr: 2.2821e-05 gnorm: 1.04 [14:00:07<10:31:11] +[titan] 2025-10-05 12:34:28,454 - root - INFO - step: 22845 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8428 +[titan] 2025-10-05 12:34:28,454 - root - INFO - lr: 2.2813e-05 gnorm: 1.11 [14:00:18<10:31:00] +[titan] 2025-10-05 12:34:37,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:34:39,324 - root - INFO - step: 22850 loss: 2.0362 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 12:34:39,324 - root - INFO - lr: 2.2804e-05 gnorm: 1.07 [14:00:29<10:30:49] +[titan] 2025-10-05 12:34:50,183 - root - INFO - step: 22855 loss: 2.0829 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:34:50,183 - root - INFO - lr: 2.2795e-05 gnorm: 1.04 [14:00:39<10:30:38] +[titan] 2025-10-05 12:35:01,017 - root - INFO - step: 22860 loss: 1.9834 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 12:35:01,017 - root - INFO - lr: 2.2786e-05 gnorm: 1.01 [14:00:50<10:30:27] +[titan] 2025-10-05 12:35:11,885 - root - INFO - step: 22865 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:35:11,885 - root - INFO - lr: 2.2778e-05 gnorm: 1.04 [14:01:01<10:30:15] +[titan] 2025-10-05 12:35:22,742 - root - INFO - step: 22870 loss: 2.1227 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8759 +[titan] 2025-10-05 12:35:22,742 - root - INFO - lr: 2.2769e-05 gnorm: 1.09 [14:01:12<10:30:04] +[titan] 2025-10-05 12:35:33,625 - root - INFO - step: 22875 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 12:35:33,625 - root - INFO - lr: 2.2760e-05 gnorm: 1.12 [14:01:23<10:29:53] +[titan] 2025-10-05 12:35:44,473 - root - INFO - step: 22880 loss: 2.0907 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8483 +[titan] 2025-10-05 12:35:44,473 - root - INFO - lr: 2.2751e-05 gnorm: 1.09 [14:01:34<10:29:42] +[titan] 2025-10-05 12:35:55,316 - root - INFO - step: 22885 loss: 2.1475 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 12:35:55,316 - root - INFO - lr: 2.2743e-05 gnorm: 1.08 [14:01:45<10:29:31] +[titan] 2025-10-05 12:36:06,165 - root - INFO - step: 22890 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8494 +[titan] 2025-10-05 12:36:06,165 - root - INFO - lr: 2.2734e-05 gnorm: 1.10 [14:01:55<10:29:20] +[titan] 2025-10-05 12:36:17,010 - root - INFO - step: 22895 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:36:17,011 - root - INFO - lr: 2.2725e-05 gnorm: 1.06 [14:02:06<10:29:08] +[titan] 2025-10-05 12:36:25,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:36:27,886 - root - INFO - step: 22900 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 12:36:27,887 - root - INFO - lr: 2.2717e-05 gnorm: 1.09 [14:02:17<10:28:57] +[titan] 2025-10-05 12:36:38,741 - root - INFO - step: 22905 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8109 +[titan] 2025-10-05 12:36:38,741 - root - INFO - lr: 2.2708e-05 gnorm: 1.06 [14:02:28<10:28:46] +[titan] 2025-10-05 12:36:49,633 - root - INFO - step: 22910 loss: 2.0954 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:36:49,633 - root - INFO - lr: 2.2699e-05 gnorm: 1.13 [14:02:39<10:28:35] +[titan] 2025-10-05 12:37:00,494 - root - INFO - step: 22915 loss: 2.1261 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8788 +[titan] 2025-10-05 12:37:00,494 - root - INFO - lr: 2.2690e-05 gnorm: 1.09 [14:02:50<10:28:24] +[titan] 2025-10-05 12:37:11,342 - root - INFO - step: 22920 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8270 +[titan] 2025-10-05 12:37:11,343 - root - INFO - lr: 2.2682e-05 gnorm: 1.05 [14:03:01<10:28:13] +[titan] 2025-10-05 12:37:22,183 - root - INFO - step: 22925 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8036 +[titan] 2025-10-05 12:37:22,183 - root - INFO - lr: 2.2673e-05 gnorm: 1.04 [14:03:11<10:28:01] +[titan] 2025-10-05 12:37:33,033 - root - INFO - step: 22930 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9081 +[titan] 2025-10-05 12:37:33,033 - root - INFO - lr: 2.2664e-05 gnorm: 1.08 [14:03:22<10:27:50] +[titan] 2025-10-05 12:37:43,902 - root - INFO - step: 22935 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:37:43,902 - root - INFO - lr: 2.2656e-05 gnorm: 1.04 [14:03:33<10:27:39] +[titan] 2025-10-05 12:37:54,792 - root - INFO - step: 22940 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 12:37:54,792 - root - INFO - lr: 2.2647e-05 gnorm: 1.09 [14:03:44<10:27:28] +[titan] 2025-10-05 12:38:05,628 - root - INFO - step: 22945 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8431 +[titan] 2025-10-05 12:38:05,628 - root - INFO - lr: 2.2638e-05 gnorm: 1.08 [14:03:55<10:27:17] +[titan] 2025-10-05 12:38:14,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:38:16,484 - root - INFO - step: 22950 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7713 +[titan] 2025-10-05 12:38:16,484 - root - INFO - lr: 2.2629e-05 gnorm: 1.08 [14:04:06<10:27:06] +[titan] 2025-10-05 12:38:27,334 - root - INFO - step: 22955 loss: 2.0812 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:38:27,334 - root - INFO - lr: 2.2621e-05 gnorm: 1.09 [14:04:17<10:26:54] +[titan] 2025-10-05 12:38:38,180 - root - INFO - step: 22960 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8473 +[titan] 2025-10-05 12:38:38,180 - root - INFO - lr: 2.2612e-05 gnorm: 1.14 [14:04:27<10:26:43] +[titan] 2025-10-05 12:38:49,045 - root - INFO - step: 22965 loss: 2.0894 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 12:38:49,045 - root - INFO - lr: 2.2603e-05 gnorm: 1.02 [14:04:38<10:26:32] +[titan] 2025-10-05 12:38:59,904 - root - INFO - step: 22970 loss: 2.0347 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7986 +[titan] 2025-10-05 12:38:59,904 - root - INFO - lr: 2.2595e-05 gnorm: 1.08 [14:04:49<10:26:21] +[titan] 2025-10-05 12:39:10,774 - root - INFO - step: 22975 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9048 +[titan] 2025-10-05 12:39:10,774 - root - INFO - lr: 2.2586e-05 gnorm: 1.09 [14:05:00<10:26:10] +[titan] 2025-10-05 12:39:21,640 - root - INFO - step: 22980 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:39:21,640 - root - INFO - lr: 2.2577e-05 gnorm: 1.09 [14:05:11<10:25:59] +[titan] 2025-10-05 12:39:32,525 - root - INFO - step: 22985 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8780 +[titan] 2025-10-05 12:39:32,525 - root - INFO - lr: 2.2568e-05 gnorm: 1.07 [14:05:22<10:25:47] +[titan] 2025-10-05 12:39:43,368 - root - INFO - step: 22990 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 12:39:43,369 - root - INFO - lr: 2.2560e-05 gnorm: 1.08 [14:05:33<10:25:36] +[titan] 2025-10-05 12:39:54,216 - root - INFO - step: 22995 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 12:39:54,217 - root - INFO - lr: 2.2551e-05 gnorm: 1.02 [14:05:43<10:25:25] +[titan] 2025-10-05 12:40:02,883 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:40:05,066 - root - INFO - step: 23000 loss: 2.1507 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 12:40:05,066 - root - INFO - lr: 2.2542e-05 gnorm: 1.06 [14:05:54<10:25:14] +[titan] 2025-10-05 12:40:15,916 - root - INFO - step: 23005 loss: 2.1008 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 12:40:15,916 - root - INFO - lr: 2.2534e-05 gnorm: 1.09 [14:06:05<10:25:03] +[titan] 2025-10-05 12:40:26,775 - root - INFO - step: 23010 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8717 +[titan] 2025-10-05 12:40:26,775 - root - INFO - lr: 2.2525e-05 gnorm: 1.06 [14:06:16<10:24:52] +[titan] 2025-10-05 12:40:37,622 - root - INFO - step: 23015 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8876 +[titan] 2025-10-05 12:40:37,622 - root - INFO - lr: 2.2516e-05 gnorm: 1.06 [14:06:27<10:24:40] +[titan] 2025-10-05 12:40:48,479 - root - INFO - step: 23020 loss: 2.1422 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:40:48,480 - root - INFO - lr: 2.2507e-05 gnorm: 1.08 [14:06:38<10:24:29] +[titan] 2025-10-05 12:40:59,327 - root - INFO - step: 23025 loss: 2.0668 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:40:59,327 - root - INFO - lr: 2.2499e-05 gnorm: 1.05 [14:06:49<10:24:18] +[titan] 2025-10-05 12:41:10,188 - root - INFO - step: 23030 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:41:10,188 - root - INFO - lr: 2.2490e-05 gnorm: 1.06 [14:06:59<10:24:07] +[titan] 2025-10-05 12:41:21,085 - root - INFO - step: 23035 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 12:41:21,085 - root - INFO - lr: 2.2481e-05 gnorm: 1.07 [14:07:10<10:23:56] +[titan] 2025-10-05 12:41:32,099 - root - INFO - step: 23040 loss: 2.1136 memory: 118.84GiB(85.28%) tps: 29,752 tflops: 412.76 mfu: 41.74% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:41:32,100 - root - INFO - lr: 2.2473e-05 gnorm: 1.05 [14:07:21<10:23:45] +[titan] 2025-10-05 12:41:32,277 - root - INFO - Dumping profiler traces at step 23040 +[titan] 2025-10-05 12:41:32,317 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:41:43,199 - root - INFO - step: 23045 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.58 mfu: 41.41% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 12:41:43,199 - root - INFO - lr: 2.2464e-05 gnorm: 1.07 [14:07:32<10:23:34] +[titan] 2025-10-05 12:41:51,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:41:54,062 - root - INFO - step: 23050 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:41:54,062 - root - INFO - lr: 2.2455e-05 gnorm: 1.10 [14:07:43<10:23:23] +[titan] 2025-10-05 12:42:04,939 - root - INFO - step: 23055 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 12:42:04,940 - root - INFO - lr: 2.2447e-05 gnorm: 1.10 [14:07:54<10:23:11] +[titan] 2025-10-05 12:42:15,807 - root - INFO - step: 23060 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 12:42:15,807 - root - INFO - lr: 2.2438e-05 gnorm: 1.09 [14:08:05<10:23:00] +[titan] 2025-10-05 12:42:26,648 - root - INFO - step: 23065 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 12:42:26,648 - root - INFO - lr: 2.2429e-05 gnorm: 1.06 [14:08:16<10:22:49] +[titan] 2025-10-05 12:42:37,523 - root - INFO - step: 23070 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 12:42:37,523 - root - INFO - lr: 2.2420e-05 gnorm: 1.06 [14:08:27<10:22:38] +[titan] 2025-10-05 12:42:48,380 - root - INFO - step: 23075 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 12:42:48,380 - root - INFO - lr: 2.2412e-05 gnorm: 1.08 [14:08:38<10:22:27] +[titan] 2025-10-05 12:42:59,255 - root - INFO - step: 23080 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 12:42:59,255 - root - INFO - lr: 2.2403e-05 gnorm: 1.11 [14:08:48<10:22:16] +[titan] 2025-10-05 12:43:10,104 - root - INFO - step: 23085 loss: 2.0492 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 12:43:10,104 - root - INFO - lr: 2.2394e-05 gnorm: 1.06 [14:08:59<10:22:04] +[titan] 2025-10-05 12:43:20,963 - root - INFO - step: 23090 loss: 2.0906 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8478 +[titan] 2025-10-05 12:43:20,963 - root - INFO - lr: 2.2386e-05 gnorm: 1.07 [14:09:10<10:21:53] +[titan] 2025-10-05 12:43:31,830 - root - INFO - step: 23095 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:43:31,830 - root - INFO - lr: 2.2377e-05 gnorm: 1.06 [14:09:21<10:21:42] +[titan] 2025-10-05 12:43:40,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:43:42,684 - root - INFO - step: 23100 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 12:43:42,684 - root - INFO - lr: 2.2368e-05 gnorm: 1.08 [14:09:32<10:21:31] +[titan] 2025-10-05 12:43:53,521 - root - INFO - step: 23105 loss: 2.1541 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9043 +[titan] 2025-10-05 12:43:53,521 - root - INFO - lr: 2.2360e-05 gnorm: 1.12 [14:09:43<10:21:20] +[titan] 2025-10-05 12:44:04,389 - root - INFO - step: 23110 loss: 2.0636 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:44:04,389 - root - INFO - lr: 2.2351e-05 gnorm: 1.09 [14:09:54<10:21:09] +[titan] 2025-10-05 12:44:15,253 - root - INFO - step: 23115 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 12:44:15,254 - root - INFO - lr: 2.2342e-05 gnorm: 1.06 [14:10:04<10:20:58] +[titan] 2025-10-05 12:44:26,116 - root - INFO - step: 23120 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 12:44:26,116 - root - INFO - lr: 2.2334e-05 gnorm: 1.03 [14:10:15<10:20:46] +[titan] 2025-10-05 12:44:36,986 - root - INFO - step: 23125 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:44:36,986 - root - INFO - lr: 2.2325e-05 gnorm: 1.04 [14:10:26<10:20:35] +[titan] 2025-10-05 12:44:47,859 - root - INFO - step: 23130 loss: 2.1268 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 12:44:47,860 - root - INFO - lr: 2.2316e-05 gnorm: 1.08 [14:10:37<10:20:24] +[titan] 2025-10-05 12:44:58,729 - root - INFO - step: 23135 loss: 2.1048 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 12:44:58,729 - root - INFO - lr: 2.2308e-05 gnorm: 1.10 [14:10:48<10:20:13] +[titan] 2025-10-05 12:45:09,592 - root - INFO - step: 23140 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8194 +[titan] 2025-10-05 12:45:09,592 - root - INFO - lr: 2.2299e-05 gnorm: 1.09 [14:10:59<10:20:02] +[titan] 2025-10-05 12:45:20,444 - root - INFO - step: 23145 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:45:20,444 - root - INFO - lr: 2.2290e-05 gnorm: 1.10 [14:11:10<10:19:51] +[titan] 2025-10-05 12:45:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:45:31,320 - root - INFO - step: 23150 loss: 2.0752 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:45:31,320 - root - INFO - lr: 2.2281e-05 gnorm: 1.05 [14:11:21<10:19:39] +[titan] 2025-10-05 12:45:42,180 - root - INFO - step: 23155 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:45:42,181 - root - INFO - lr: 2.2273e-05 gnorm: 1.10 [14:11:31<10:19:28] +[titan] 2025-10-05 12:45:53,058 - root - INFO - step: 23160 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 12:45:53,058 - root - INFO - lr: 2.2264e-05 gnorm: 1.06 [14:11:42<10:19:17] +[titan] 2025-10-05 12:46:03,966 - root - INFO - step: 23165 loss: 1.9940 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 12:46:03,966 - root - INFO - lr: 2.2255e-05 gnorm: 1.07 [14:11:53<10:19:06] +[titan] 2025-10-05 12:46:14,825 - root - INFO - step: 23170 loss: 2.1123 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8673 +[titan] 2025-10-05 12:46:14,825 - root - INFO - lr: 2.2247e-05 gnorm: 1.06 [14:12:04<10:18:55] +[titan] 2025-10-05 12:46:25,704 - root - INFO - step: 23175 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:46:25,705 - root - INFO - lr: 2.2238e-05 gnorm: 1.10 [14:12:15<10:18:44] +[titan] 2025-10-05 12:46:36,828 - root - INFO - step: 23180 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 29,460 tflops: 408.71 mfu: 41.33% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8561 +[titan] 2025-10-05 12:46:36,828 - root - INFO - lr: 2.2229e-05 gnorm: 1.09 [14:12:26<10:18:33] +[titan] 2025-10-05 12:46:47,680 - root - INFO - step: 23185 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 12:46:47,680 - root - INFO - lr: 2.2221e-05 gnorm: 1.07 [14:12:37<10:18:22] +[titan] 2025-10-05 12:46:58,544 - root - INFO - step: 23190 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8817 +[titan] 2025-10-05 12:46:58,544 - root - INFO - lr: 2.2212e-05 gnorm: 1.07 [14:12:48<10:18:10] +[titan] 2025-10-05 12:47:09,423 - root - INFO - step: 23195 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 12:47:09,424 - root - INFO - lr: 2.2203e-05 gnorm: 1.07 [14:12:59<10:17:59] +[titan] 2025-10-05 12:47:18,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:47:20,285 - root - INFO - step: 23200 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:47:20,285 - root - INFO - lr: 2.2195e-05 gnorm: 1.05 [14:13:09<10:17:48] +[titan] 2025-10-05 12:47:31,161 - root - INFO - step: 23205 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:47:31,161 - root - INFO - lr: 2.2186e-05 gnorm: 1.07 [14:13:20<10:17:37] +[titan] 2025-10-05 12:47:42,031 - root - INFO - step: 23210 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 12:47:42,031 - root - INFO - lr: 2.2177e-05 gnorm: 1.08 [14:13:31<10:17:26] +[titan] 2025-10-05 12:47:52,877 - root - INFO - step: 23215 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8652 +[titan] 2025-10-05 12:47:52,877 - root - INFO - lr: 2.2169e-05 gnorm: 1.07 [14:13:42<10:17:15] +[titan] 2025-10-05 12:48:03,720 - root - INFO - step: 23220 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:48:03,720 - root - INFO - lr: 2.2160e-05 gnorm: 1.06 [14:13:53<10:17:03] +[titan] 2025-10-05 12:48:14,569 - root - INFO - step: 23225 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8277 +[titan] 2025-10-05 12:48:14,569 - root - INFO - lr: 2.2151e-05 gnorm: 1.07 [14:14:04<10:16:52] +[titan] 2025-10-05 12:48:25,456 - root - INFO - step: 23230 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:48:25,456 - root - INFO - lr: 2.2143e-05 gnorm: 1.09 [14:14:15<10:16:41] +[titan] 2025-10-05 12:48:36,322 - root - INFO - step: 23235 loss: 2.0597 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 12:48:36,322 - root - INFO - lr: 2.2134e-05 gnorm: 1.05 [14:14:26<10:16:30] +[titan] 2025-10-05 12:48:47,191 - root - INFO - step: 23240 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 12:48:47,191 - root - INFO - lr: 2.2125e-05 gnorm: 1.06 [14:14:36<10:16:19] +[titan] 2025-10-05 12:48:58,072 - root - INFO - step: 23245 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8758 +[titan] 2025-10-05 12:48:58,072 - root - INFO - lr: 2.2117e-05 gnorm: 1.08 [14:14:47<10:16:08] +[titan] 2025-10-05 12:49:06,748 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:49:08,942 - root - INFO - step: 23250 loss: 2.0918 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 12:49:08,943 - root - INFO - lr: 2.2108e-05 gnorm: 1.10 [14:14:58<10:15:57] +[titan] 2025-10-05 12:49:19,822 - root - INFO - step: 23255 loss: 2.1127 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8665 +[titan] 2025-10-05 12:49:19,822 - root - INFO - lr: 2.2099e-05 gnorm: 1.05 [14:15:09<10:15:45] +[titan] 2025-10-05 12:49:30,722 - root - INFO - step: 23260 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8961 +[titan] 2025-10-05 12:49:30,723 - root - INFO - lr: 2.2091e-05 gnorm: 1.10 [14:15:20<10:15:34] +[titan] 2025-10-05 12:49:41,642 - root - INFO - step: 23265 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 12:49:41,642 - root - INFO - lr: 2.2082e-05 gnorm: 1.09 [14:15:31<10:15:23] +[titan] 2025-10-05 12:49:52,513 - root - INFO - step: 23270 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 12:49:52,513 - root - INFO - lr: 2.2073e-05 gnorm: 1.08 [14:15:42<10:15:12] +[titan] 2025-10-05 12:50:03,384 - root - INFO - step: 23275 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8161 +[titan] 2025-10-05 12:50:03,384 - root - INFO - lr: 2.2065e-05 gnorm: 1.07 [14:15:53<10:15:01] +[titan] 2025-10-05 12:50:14,264 - root - INFO - step: 23280 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 12:50:14,264 - root - INFO - lr: 2.2056e-05 gnorm: 1.06 [14:16:03<10:14:50] +[titan] 2025-10-05 12:50:25,152 - root - INFO - step: 23285 loss: 2.1398 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 12:50:25,152 - root - INFO - lr: 2.2047e-05 gnorm: 1.05 [14:16:14<10:14:39] +[titan] 2025-10-05 12:50:36,029 - root - INFO - step: 23290 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 12:50:36,029 - root - INFO - lr: 2.2039e-05 gnorm: 1.05 [14:16:25<10:14:27] +[titan] 2025-10-05 12:50:46,933 - root - INFO - step: 23295 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:50:46,933 - root - INFO - lr: 2.2030e-05 gnorm: 1.10 [14:16:36<10:14:16] +[titan] 2025-10-05 12:50:55,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:50:57,805 - root - INFO - step: 23300 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:50:57,805 - root - INFO - lr: 2.2021e-05 gnorm: 1.10 [14:16:47<10:14:05] +[titan] 2025-10-05 12:51:08,673 - root - INFO - step: 23305 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:51:08,673 - root - INFO - lr: 2.2013e-05 gnorm: 1.06 [14:16:58<10:13:54] +[titan] 2025-10-05 12:51:19,553 - root - INFO - step: 23310 loss: 2.0851 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:51:19,553 - root - INFO - lr: 2.2004e-05 gnorm: 1.07 [14:17:09<10:13:43] +[titan] 2025-10-05 12:51:30,434 - root - INFO - step: 23315 loss: 2.0776 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 12:51:30,435 - root - INFO - lr: 2.1995e-05 gnorm: 1.07 [14:17:20<10:13:32] +[titan] 2025-10-05 12:51:41,297 - root - INFO - step: 23320 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:51:41,298 - root - INFO - lr: 2.1987e-05 gnorm: 1.06 [14:17:30<10:13:21] +[titan] 2025-10-05 12:51:52,171 - root - INFO - step: 23325 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8412 +[titan] 2025-10-05 12:51:52,171 - root - INFO - lr: 2.1978e-05 gnorm: 1.06 [14:17:41<10:13:09] +[titan] 2025-10-05 12:52:03,034 - root - INFO - step: 23330 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 12:52:03,034 - root - INFO - lr: 2.1969e-05 gnorm: 1.06 [14:17:52<10:12:58] +[titan] 2025-10-05 12:52:13,904 - root - INFO - step: 23335 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:52:13,905 - root - INFO - lr: 2.1961e-05 gnorm: 1.08 [14:18:03<10:12:47] +[titan] 2025-10-05 12:52:24,765 - root - INFO - step: 23340 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 12:52:24,765 - root - INFO - lr: 2.1952e-05 gnorm: 1.05 [14:18:14<10:12:36] +[titan] 2025-10-05 12:52:35,613 - root - INFO - step: 23345 loss: 2.0713 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 12:52:35,613 - root - INFO - lr: 2.1944e-05 gnorm: 1.08 [14:18:25<10:12:25] +[titan] 2025-10-05 12:52:44,296 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:52:46,481 - root - INFO - step: 23350 loss: 2.0693 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:52:46,481 - root - INFO - lr: 2.1935e-05 gnorm: 1.06 [14:18:36<10:12:14] +[titan] 2025-10-05 12:52:57,341 - root - INFO - step: 23355 loss: 2.1206 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 12:52:57,341 - root - INFO - lr: 2.1926e-05 gnorm: 1.09 [14:18:47<10:12:03] +[titan] 2025-10-05 12:53:08,214 - root - INFO - step: 23360 loss: 2.1012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:53:08,214 - root - INFO - lr: 2.1918e-05 gnorm: 1.05 [14:18:57<10:11:51] +[titan] 2025-10-05 12:53:19,079 - root - INFO - step: 23365 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8477 +[titan] 2025-10-05 12:53:19,079 - root - INFO - lr: 2.1909e-05 gnorm: 1.11 [14:19:08<10:11:40] +[titan] 2025-10-05 12:53:29,939 - root - INFO - step: 23370 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8916 +[titan] 2025-10-05 12:53:29,939 - root - INFO - lr: 2.1900e-05 gnorm: 1.11 [14:19:19<10:11:29] +[titan] 2025-10-05 12:53:40,836 - root - INFO - step: 23375 loss: 2.0922 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 12:53:40,836 - root - INFO - lr: 2.1892e-05 gnorm: 1.13 [14:19:30<10:11:18] +[titan] 2025-10-05 12:53:51,725 - root - INFO - step: 23380 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 12:53:51,725 - root - INFO - lr: 2.1883e-05 gnorm: 1.07 [14:19:41<10:11:07] +[titan] 2025-10-05 12:54:02,631 - root - INFO - step: 23385 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 12:54:02,631 - root - INFO - lr: 2.1874e-05 gnorm: 1.07 [14:19:52<10:10:56] +[titan] 2025-10-05 12:54:13,542 - root - INFO - step: 23390 loss: 2.0791 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:54:13,543 - root - INFO - lr: 2.1866e-05 gnorm: 1.12 [14:20:03<10:10:45] +[titan] 2025-10-05 12:54:24,401 - root - INFO - step: 23395 loss: 2.0662 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 12:54:24,401 - root - INFO - lr: 2.1857e-05 gnorm: 1.08 [14:20:14<10:10:33] +[titan] 2025-10-05 12:54:33,071 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:54:35,256 - root - INFO - step: 23400 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 12:54:35,256 - root - INFO - lr: 2.1848e-05 gnorm: 1.09 [14:20:24<10:10:22] +[titan] 2025-10-05 12:54:46,105 - root - INFO - step: 23405 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8085 +[titan] 2025-10-05 12:54:46,105 - root - INFO - lr: 2.1840e-05 gnorm: 1.03 [14:20:35<10:10:11] +[titan] 2025-10-05 12:54:56,966 - root - INFO - step: 23410 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8644 +[titan] 2025-10-05 12:54:56,966 - root - INFO - lr: 2.1831e-05 gnorm: 1.09 [14:20:46<10:10:00] +[titan] 2025-10-05 12:55:07,809 - root - INFO - step: 23415 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8747 +[titan] 2025-10-05 12:55:07,810 - root - INFO - lr: 2.1823e-05 gnorm: 1.09 [14:20:57<10:09:49] +[titan] 2025-10-05 12:55:18,648 - root - INFO - step: 23420 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:55:18,648 - root - INFO - lr: 2.1814e-05 gnorm: 1.09 [14:21:08<10:09:38] +[titan] 2025-10-05 12:55:29,531 - root - INFO - step: 23425 loss: 2.1312 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 12:55:29,531 - root - INFO - lr: 2.1805e-05 gnorm: 1.07 [14:21:19<10:09:27] +[titan] 2025-10-05 12:55:40,423 - root - INFO - step: 23430 loss: 2.0740 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:55:40,423 - root - INFO - lr: 2.1797e-05 gnorm: 1.07 [14:21:30<10:09:15] +[titan] 2025-10-05 12:55:51,286 - root - INFO - step: 23435 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 12:55:51,286 - root - INFO - lr: 2.1788e-05 gnorm: 1.05 [14:21:40<10:09:04] +[titan] 2025-10-05 12:56:02,131 - root - INFO - step: 23440 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8791 +[titan] 2025-10-05 12:56:02,131 - root - INFO - lr: 2.1779e-05 gnorm: 1.05 [14:21:51<10:08:53] +[titan] 2025-10-05 12:56:12,982 - root - INFO - step: 23445 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 12:56:12,982 - root - INFO - lr: 2.1771e-05 gnorm: 1.07 [14:22:02<10:08:42] +[titan] 2025-10-05 12:56:21,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:56:23,837 - root - INFO - step: 23450 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 12:56:23,837 - root - INFO - lr: 2.1762e-05 gnorm: 1.03 [14:22:13<10:08:31] +[titan] 2025-10-05 12:56:34,722 - root - INFO - step: 23455 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 12:56:34,722 - root - INFO - lr: 2.1753e-05 gnorm: 1.08 [14:22:24<10:08:20] +[titan] 2025-10-05 12:56:45,579 - root - INFO - step: 23460 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 12:56:45,579 - root - INFO - lr: 2.1745e-05 gnorm: 1.05 [14:22:35<10:08:09] +[titan] 2025-10-05 12:56:56,409 - root - INFO - step: 23465 loss: 2.0982 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 12:56:56,409 - root - INFO - lr: 2.1736e-05 gnorm: 1.07 [14:22:46<10:07:57] +[titan] 2025-10-05 12:57:07,229 - root - INFO - step: 23470 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:57:07,229 - root - INFO - lr: 2.1728e-05 gnorm: 1.08 [14:22:56<10:07:46] +[titan] 2025-10-05 12:57:18,049 - root - INFO - step: 23475 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 12:57:18,049 - root - INFO - lr: 2.1719e-05 gnorm: 1.09 [14:23:07<10:07:35] +[titan] 2025-10-05 12:57:28,860 - root - INFO - step: 23480 loss: 2.0930 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:57:28,860 - root - INFO - lr: 2.1710e-05 gnorm: 1.07 [14:23:18<10:07:24] +[titan] 2025-10-05 12:57:39,712 - root - INFO - step: 23485 loss: 2.1212 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8737 +[titan] 2025-10-05 12:57:39,712 - root - INFO - lr: 2.1702e-05 gnorm: 1.09 [14:23:29<10:07:13] +[titan] 2025-10-05 12:57:50,568 - root - INFO - step: 23490 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 12:57:50,568 - root - INFO - lr: 2.1693e-05 gnorm: 1.06 [14:23:40<10:07:01] +[titan] 2025-10-05 12:58:01,418 - root - INFO - step: 23495 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 12:58:01,418 - root - INFO - lr: 2.1684e-05 gnorm: 1.13 [14:23:51<10:06:50] +[titan] 2025-10-05 12:58:10,087 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:58:12,263 - root - INFO - step: 23500 loss: 2.0793 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:58:12,263 - root - INFO - lr: 2.1676e-05 gnorm: 1.05 [14:24:01<10:06:39] +[titan] 2025-10-05 12:58:23,119 - root - INFO - step: 23505 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 12:58:23,119 - root - INFO - lr: 2.1667e-05 gnorm: 1.09 [14:24:12<10:06:28] +[titan] 2025-10-05 12:58:33,963 - root - INFO - step: 23510 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 12:58:33,964 - root - INFO - lr: 2.1659e-05 gnorm: 1.07 [14:24:23<10:06:17] +[titan] 2025-10-05 12:58:44,834 - root - INFO - step: 23515 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 12:58:44,834 - root - INFO - lr: 2.1650e-05 gnorm: 1.06 [14:24:34<10:06:06] +[titan] 2025-10-05 12:58:55,708 - root - INFO - step: 23520 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8363 +[titan] 2025-10-05 12:58:55,708 - root - INFO - lr: 2.1641e-05 gnorm: 1.05 [14:24:45<10:05:55] +[titan] 2025-10-05 12:59:06,592 - root - INFO - step: 23525 loss: 2.0619 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 12:59:06,593 - root - INFO - lr: 2.1633e-05 gnorm: 1.06 [14:24:56<10:05:43] +[titan] 2025-10-05 12:59:17,459 - root - INFO - step: 23530 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8941 +[titan] 2025-10-05 12:59:17,459 - root - INFO - lr: 2.1624e-05 gnorm: 1.08 [14:25:07<10:05:32] +[titan] 2025-10-05 12:59:28,332 - root - INFO - step: 23535 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:59:28,332 - root - INFO - lr: 2.1616e-05 gnorm: 1.07 [14:25:18<10:05:21] +[titan] 2025-10-05 12:59:39,189 - root - INFO - step: 23540 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 12:59:39,190 - root - INFO - lr: 2.1607e-05 gnorm: 1.06 [14:25:28<10:05:10] +[titan] 2025-10-05 12:59:50,087 - root - INFO - step: 23545 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8219 +[titan] 2025-10-05 12:59:50,087 - root - INFO - lr: 2.1598e-05 gnorm: 1.07 [14:25:39<10:04:59] +[titan] 2025-10-05 12:59:58,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:00:01,039 - root - INFO - step: 23550 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 13:00:01,039 - root - INFO - lr: 2.1590e-05 gnorm: 1.13 [14:25:50<10:04:48] +[titan] 2025-10-05 13:00:05,573 - root - INFO - Dumping profiler traces at step 23552 +[titan] 2025-10-05 13:00:05,608 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 13:00:12,148 - root - INFO - step: 23555 loss: 2.0620 memory: 118.84GiB(85.28%) tps: 29,498 tflops: 409.24 mfu: 41.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 13:00:12,148 - root - INFO - lr: 2.1581e-05 gnorm: 1.07 [14:26:01<10:04:37] +[titan] 2025-10-05 13:00:23,000 - root - INFO - step: 23560 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 13:00:23,000 - root - INFO - lr: 2.1572e-05 gnorm: 1.11 [14:26:12<10:04:26] +[titan] 2025-10-05 13:00:33,832 - root - INFO - step: 23565 loss: 2.1010 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.69 mfu: 42.44% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 13:00:33,833 - root - INFO - lr: 2.1564e-05 gnorm: 1.09 [14:26:23<10:04:14] +[titan] 2025-10-05 13:00:44,700 - root - INFO - step: 23570 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 13:00:44,700 - root - INFO - lr: 2.1555e-05 gnorm: 1.03 [14:26:34<10:04:03] +[titan] 2025-10-05 13:00:55,558 - root - INFO - step: 23575 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 13:00:55,558 - root - INFO - lr: 2.1547e-05 gnorm: 1.08 [14:26:45<10:03:52] +[titan] 2025-10-05 13:01:06,406 - root - INFO - step: 23580 loss: 2.1114 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 13:01:06,406 - root - INFO - lr: 2.1538e-05 gnorm: 1.10 [14:26:56<10:03:41] +[titan] 2025-10-05 13:01:17,310 - root - INFO - step: 23585 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 13:01:17,310 - root - INFO - lr: 2.1529e-05 gnorm: 1.11 [14:27:06<10:03:30] +[titan] 2025-10-05 13:01:28,160 - root - INFO - step: 23590 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:01:28,161 - root - INFO - lr: 2.1521e-05 gnorm: 1.07 [14:27:17<10:03:19] +[titan] 2025-10-05 13:01:39,026 - root - INFO - step: 23595 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 13:01:39,026 - root - INFO - lr: 2.1512e-05 gnorm: 1.08 [14:27:28<10:03:08] +[titan] 2025-10-05 13:01:47,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:01:49,899 - root - INFO - step: 23600 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 13:01:49,899 - root - INFO - lr: 2.1504e-05 gnorm: 1.10 [14:27:39<10:02:56] +[titan] 2025-10-05 13:02:00,769 - root - INFO - step: 23605 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 13:02:00,769 - root - INFO - lr: 2.1495e-05 gnorm: 1.04 [14:27:50<10:02:45] +[titan] 2025-10-05 13:02:11,618 - root - INFO - step: 23610 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 13:02:11,618 - root - INFO - lr: 2.1486e-05 gnorm: 1.09 [14:28:01<10:02:34] +[titan] 2025-10-05 13:02:22,509 - root - INFO - step: 23615 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 13:02:22,509 - root - INFO - lr: 2.1478e-05 gnorm: 1.07 [14:28:12<10:02:23] +[titan] 2025-10-05 13:02:33,369 - root - INFO - step: 23620 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 13:02:33,369 - root - INFO - lr: 2.1469e-05 gnorm: 1.06 [14:28:23<10:02:12] +[titan] 2025-10-05 13:02:44,239 - root - INFO - step: 23625 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8846 +[titan] 2025-10-05 13:02:44,239 - root - INFO - lr: 2.1461e-05 gnorm: 1.08 [14:28:33<10:02:01] +[titan] 2025-10-05 13:02:55,117 - root - INFO - step: 23630 loss: 2.0120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:02:55,117 - root - INFO - lr: 2.1452e-05 gnorm: 1.06 [14:28:44<10:01:50] +[titan] 2025-10-05 13:03:05,938 - root - INFO - step: 23635 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 13:03:05,938 - root - INFO - lr: 2.1443e-05 gnorm: 1.04 [14:28:55<10:01:38] +[titan] 2025-10-05 13:03:16,775 - root - INFO - step: 23640 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 13:03:16,775 - root - INFO - lr: 2.1435e-05 gnorm: 1.06 [14:29:06<10:01:27] +[titan] 2025-10-05 13:03:27,645 - root - INFO - step: 23645 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8228 +[titan] 2025-10-05 13:03:27,645 - root - INFO - lr: 2.1426e-05 gnorm: 1.08 [14:29:17<10:01:16] +[titan] 2025-10-05 13:03:36,315 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:03:38,490 - root - INFO - step: 23650 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 13:03:38,491 - root - INFO - lr: 2.1418e-05 gnorm: 1.14 [14:29:28<10:01:05] +[titan] 2025-10-05 13:03:49,367 - root - INFO - step: 23655 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 13:03:49,367 - root - INFO - lr: 2.1409e-05 gnorm: 1.07 [14:29:39<10:00:54] +[titan] 2025-10-05 13:04:00,220 - root - INFO - step: 23660 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 13:04:00,220 - root - INFO - lr: 2.1400e-05 gnorm: 1.05 [14:29:49<10:00:43] +[titan] 2025-10-05 13:04:11,080 - root - INFO - step: 23665 loss: 2.0569 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:04:11,080 - root - INFO - lr: 2.1392e-05 gnorm: 1.05 [14:30:00<10:00:32] +[titan] 2025-10-05 13:04:21,931 - root - INFO - step: 23670 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 13:04:21,931 - root - INFO - lr: 2.1383e-05 gnorm: 1.08 [14:30:11<10:00:20] +[titan] 2025-10-05 13:04:32,799 - root - INFO - step: 23675 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:04:32,799 - root - INFO - lr: 2.1375e-05 gnorm: 1.09 [14:30:22<10:00:09] +[titan] 2025-10-05 13:04:43,687 - root - INFO - step: 23680 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8405 +[titan] 2025-10-05 13:04:43,687 - root - INFO - lr: 2.1366e-05 gnorm: 1.09 [14:30:33< 9:59:58] +[titan] 2025-10-05 13:04:54,557 - root - INFO - step: 23685 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8546 +[titan] 2025-10-05 13:04:54,557 - root - INFO - lr: 2.1358e-05 gnorm: 1.06 [14:30:44< 9:59:47] +[titan] 2025-10-05 13:05:05,423 - root - INFO - step: 23690 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 13:05:05,423 - root - INFO - lr: 2.1349e-05 gnorm: 1.11 [14:30:55< 9:59:36] +[titan] 2025-10-05 13:05:16,292 - root - INFO - step: 23695 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 13:05:16,292 - root - INFO - lr: 2.1340e-05 gnorm: 1.07 [14:31:05< 9:59:25] +[titan] 2025-10-05 13:05:24,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:05:27,152 - root - INFO - step: 23700 loss: 2.0847 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 13:05:27,152 - root - INFO - lr: 2.1332e-05 gnorm: 1.06 [14:31:16< 9:59:14] +[titan] 2025-10-05 13:05:38,037 - root - INFO - step: 23705 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 13:05:38,037 - root - INFO - lr: 2.1323e-05 gnorm: 1.07 [14:31:27< 9:59:02] +[titan] 2025-10-05 13:05:48,993 - root - INFO - step: 23710 loss: 2.0935 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8500 +[titan] 2025-10-05 13:05:48,993 - root - INFO - lr: 2.1315e-05 gnorm: 1.06 [14:31:38< 9:58:51] +[titan] 2025-10-05 13:05:59,853 - root - INFO - step: 23715 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 13:05:59,853 - root - INFO - lr: 2.1306e-05 gnorm: 1.12 [14:31:49< 9:58:40] +[titan] 2025-10-05 13:06:10,728 - root - INFO - step: 23720 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 13:06:10,728 - root - INFO - lr: 2.1297e-05 gnorm: 1.05 [14:32:00< 9:58:29] +[titan] 2025-10-05 13:06:21,603 - root - INFO - step: 23725 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8106 +[titan] 2025-10-05 13:06:21,603 - root - INFO - lr: 2.1289e-05 gnorm: 1.04 [14:32:11< 9:58:18] +[titan] 2025-10-05 13:06:32,482 - root - INFO - step: 23730 loss: 2.0312 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 13:06:32,482 - root - INFO - lr: 2.1280e-05 gnorm: 1.09 [14:32:22< 9:58:07] +[titan] 2025-10-05 13:06:43,351 - root - INFO - step: 23735 loss: 2.0992 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 13:06:43,351 - root - INFO - lr: 2.1272e-05 gnorm: 1.09 [14:32:33< 9:57:56] +[titan] 2025-10-05 13:06:54,243 - root - INFO - step: 23740 loss: 2.0278 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 13:06:54,243 - root - INFO - lr: 2.1263e-05 gnorm: 1.08 [14:32:43< 9:57:45] +[titan] 2025-10-05 13:07:05,147 - root - INFO - step: 23745 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:07:05,147 - root - INFO - lr: 2.1255e-05 gnorm: 1.08 [14:32:54< 9:57:33] +[titan] 2025-10-05 13:07:13,826 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:07:16,019 - root - INFO - step: 23750 loss: 2.0022 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 13:07:16,019 - root - INFO - lr: 2.1246e-05 gnorm: 1.06 [14:33:05< 9:57:22] +[titan] 2025-10-05 13:07:26,891 - root - INFO - step: 23755 loss: 2.0412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 13:07:26,891 - root - INFO - lr: 2.1237e-05 gnorm: 1.10 [14:33:16< 9:57:11] +[titan] 2025-10-05 13:07:37,752 - root - INFO - step: 23760 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:07:37,753 - root - INFO - lr: 2.1229e-05 gnorm: 1.10 [14:33:27< 9:57:00] +[titan] 2025-10-05 13:07:48,618 - root - INFO - step: 23765 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 13:07:48,618 - root - INFO - lr: 2.1220e-05 gnorm: 1.07 [14:33:38< 9:56:49] +[titan] 2025-10-05 13:07:59,505 - root - INFO - step: 23770 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:07:59,505 - root - INFO - lr: 2.1212e-05 gnorm: 1.10 [14:33:49< 9:56:38] +[titan] 2025-10-05 13:08:10,407 - root - INFO - step: 23775 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 13:08:10,408 - root - INFO - lr: 2.1203e-05 gnorm: 1.07 [14:34:00< 9:56:27] +[titan] 2025-10-05 13:08:21,270 - root - INFO - step: 23780 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 13:08:21,271 - root - INFO - lr: 2.1195e-05 gnorm: 1.12 [14:34:10< 9:56:16] +[titan] 2025-10-05 13:08:32,141 - root - INFO - step: 23785 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:08:32,141 - root - INFO - lr: 2.1186e-05 gnorm: 1.07 [14:34:21< 9:56:04] +[titan] 2025-10-05 13:08:43,013 - root - INFO - step: 23790 loss: 2.0543 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8158 +[titan] 2025-10-05 13:08:43,013 - root - INFO - lr: 2.1177e-05 gnorm: 1.08 [14:34:32< 9:55:53] +[titan] 2025-10-05 13:08:53,898 - root - INFO - step: 23795 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 13:08:53,898 - root - INFO - lr: 2.1169e-05 gnorm: 1.08 [14:34:43< 9:55:42] +[titan] 2025-10-05 13:09:02,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:09:04,767 - root - INFO - step: 23800 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:09:04,767 - root - INFO - lr: 2.1160e-05 gnorm: 1.07 [14:34:54< 9:55:31] +[titan] 2025-10-05 13:09:15,675 - root - INFO - step: 23805 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:09:15,675 - root - INFO - lr: 2.1152e-05 gnorm: 1.07 [14:35:05< 9:55:20] +[titan] 2025-10-05 13:09:26,546 - root - INFO - step: 23810 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8689 +[titan] 2025-10-05 13:09:26,546 - root - INFO - lr: 2.1143e-05 gnorm: 1.06 [14:35:16< 9:55:09] +[titan] 2025-10-05 13:09:37,416 - root - INFO - step: 23815 loss: 2.0689 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:09:37,416 - root - INFO - lr: 2.1135e-05 gnorm: 1.04 [14:35:27< 9:54:58] +[titan] 2025-10-05 13:09:48,302 - root - INFO - step: 23820 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 13:09:48,302 - root - INFO - lr: 2.1126e-05 gnorm: 1.05 [14:35:37< 9:54:46] +[titan] 2025-10-05 13:09:59,200 - root - INFO - step: 23825 loss: 2.1145 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8692 +[titan] 2025-10-05 13:09:59,200 - root - INFO - lr: 2.1118e-05 gnorm: 1.10 [14:35:48< 9:54:35] +[titan] 2025-10-05 13:10:10,087 - root - INFO - step: 23830 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:10:10,087 - root - INFO - lr: 2.1109e-05 gnorm: 1.07 [14:35:59< 9:54:24] +[titan] 2025-10-05 13:10:20,968 - root - INFO - step: 23835 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8802 +[titan] 2025-10-05 13:10:20,968 - root - INFO - lr: 2.1100e-05 gnorm: 1.11 [14:36:10< 9:54:13] +[titan] 2025-10-05 13:10:31,877 - root - INFO - step: 23840 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 13:10:31,877 - root - INFO - lr: 2.1092e-05 gnorm: 1.07 [14:36:21< 9:54:02] +[titan] 2025-10-05 13:10:42,750 - root - INFO - step: 23845 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8826 +[titan] 2025-10-05 13:10:42,751 - root - INFO - lr: 2.1083e-05 gnorm: 1.08 [14:36:32< 9:53:51] +[titan] 2025-10-05 13:10:51,448 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:10:53,641 - root - INFO - step: 23850 loss: 2.0254 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 13:10:53,642 - root - INFO - lr: 2.1075e-05 gnorm: 1.07 [14:36:43< 9:53:40] +[titan] 2025-10-05 13:11:04,523 - root - INFO - step: 23855 loss: 2.0986 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 13:11:04,523 - root - INFO - lr: 2.1066e-05 gnorm: 1.09 [14:36:54< 9:53:29] +[titan] 2025-10-05 13:11:15,407 - root - INFO - step: 23860 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 13:11:15,407 - root - INFO - lr: 2.1058e-05 gnorm: 1.07 [14:37:05< 9:53:17] +[titan] 2025-10-05 13:11:26,299 - root - INFO - step: 23865 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8171 +[titan] 2025-10-05 13:11:26,299 - root - INFO - lr: 2.1049e-05 gnorm: 1.08 [14:37:15< 9:53:06] +[titan] 2025-10-05 13:11:37,198 - root - INFO - step: 23870 loss: 2.1119 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:11:37,198 - root - INFO - lr: 2.1041e-05 gnorm: 1.10 [14:37:26< 9:52:55] +[titan] 2025-10-05 13:11:48,068 - root - INFO - step: 23875 loss: 2.0789 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 13:11:48,068 - root - INFO - lr: 2.1032e-05 gnorm: 1.03 [14:37:37< 9:52:44] +[titan] 2025-10-05 13:11:58,937 - root - INFO - step: 23880 loss: 2.1572 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9068 +[titan] 2025-10-05 13:11:58,937 - root - INFO - lr: 2.1023e-05 gnorm: 1.10 [14:37:48< 9:52:33] +[titan] 2025-10-05 13:12:09,818 - root - INFO - step: 23885 loss: 2.1050 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:12:09,818 - root - INFO - lr: 2.1015e-05 gnorm: 1.07 [14:37:59< 9:52:22] +[titan] 2025-10-05 13:12:20,691 - root - INFO - step: 23890 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 13:12:20,691 - root - INFO - lr: 2.1006e-05 gnorm: 1.04 [14:38:10< 9:52:11] +[titan] 2025-10-05 13:12:31,575 - root - INFO - step: 23895 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 13:12:31,575 - root - INFO - lr: 2.0998e-05 gnorm: 1.07 [14:38:21< 9:52:00] +[titan] 2025-10-05 13:12:40,267 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:12:42,455 - root - INFO - step: 23900 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 13:12:42,455 - root - INFO - lr: 2.0989e-05 gnorm: 1.07 [14:38:32< 9:51:48] +[titan] 2025-10-05 13:12:53,357 - root - INFO - step: 23905 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 13:12:53,357 - root - INFO - lr: 2.0981e-05 gnorm: 1.10 [14:38:42< 9:51:37] +[titan] 2025-10-05 13:13:04,239 - root - INFO - step: 23910 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8487 +[titan] 2025-10-05 13:13:04,239 - root - INFO - lr: 2.0972e-05 gnorm: 1.07 [14:38:53< 9:51:26] +[titan] 2025-10-05 13:13:15,113 - root - INFO - step: 23915 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 13:13:15,113 - root - INFO - lr: 2.0964e-05 gnorm: 1.10 [14:39:04< 9:51:15] +[titan] 2025-10-05 13:13:25,979 - root - INFO - step: 23920 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 13:13:25,980 - root - INFO - lr: 2.0955e-05 gnorm: 1.05 [14:39:15< 9:51:04] +[titan] 2025-10-05 13:13:36,839 - root - INFO - step: 23925 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8255 +[titan] 2025-10-05 13:13:36,839 - root - INFO - lr: 2.0947e-05 gnorm: 1.08 [14:39:26< 9:50:53] +[titan] 2025-10-05 13:13:47,718 - root - INFO - step: 23930 loss: 2.0539 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 13:13:47,718 - root - INFO - lr: 2.0938e-05 gnorm: 1.07 [14:39:37< 9:50:42] +[titan] 2025-10-05 13:13:58,659 - root - INFO - step: 23935 loss: 2.1295 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 13:13:58,659 - root - INFO - lr: 2.0929e-05 gnorm: 1.09 [14:39:48< 9:50:31] +[titan] 2025-10-05 13:14:09,537 - root - INFO - step: 23940 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7883 +[titan] 2025-10-05 13:14:09,537 - root - INFO - lr: 2.0921e-05 gnorm: 1.06 [14:39:59< 9:50:19] +[titan] 2025-10-05 13:14:20,423 - root - INFO - step: 23945 loss: 2.0391 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8022 +[titan] 2025-10-05 13:14:20,423 - root - INFO - lr: 2.0912e-05 gnorm: 1.08 [14:40:10< 9:50:08] +[titan] 2025-10-05 13:14:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:14:31,294 - root - INFO - step: 23950 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8110 +[titan] 2025-10-05 13:14:31,294 - root - INFO - lr: 2.0904e-05 gnorm: 1.02 [14:40:20< 9:49:57] +[titan] 2025-10-05 13:14:42,149 - root - INFO - step: 23955 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:14:42,149 - root - INFO - lr: 2.0895e-05 gnorm: 1.11 [14:40:31< 9:49:46] +[titan] 2025-10-05 13:14:53,021 - root - INFO - step: 23960 loss: 2.0544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 13:14:53,021 - root - INFO - lr: 2.0887e-05 gnorm: 1.07 [14:40:42< 9:49:35] +[titan] 2025-10-05 13:15:03,924 - root - INFO - step: 23965 loss: 2.0186 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 13:15:03,925 - root - INFO - lr: 2.0878e-05 gnorm: 1.08 [14:40:53< 9:49:24] +[titan] 2025-10-05 13:15:14,778 - root - INFO - step: 23970 loss: 2.0244 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 13:15:14,778 - root - INFO - lr: 2.0870e-05 gnorm: 1.10 [14:41:04< 9:49:13] +[titan] 2025-10-05 13:15:25,658 - root - INFO - step: 23975 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:15:25,658 - root - INFO - lr: 2.0861e-05 gnorm: 1.05 [14:41:15< 9:49:02] +[titan] 2025-10-05 13:15:36,526 - root - INFO - step: 23980 loss: 2.1043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 13:15:36,526 - root - INFO - lr: 2.0853e-05 gnorm: 1.11 [14:41:26< 9:48:50] +[titan] 2025-10-05 13:15:47,390 - root - INFO - step: 23985 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 13:15:47,390 - root - INFO - lr: 2.0844e-05 gnorm: 1.10 [14:41:37< 9:48:39] +[titan] 2025-10-05 13:15:58,289 - root - INFO - step: 23990 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 13:15:58,289 - root - INFO - lr: 2.0836e-05 gnorm: 1.06 [14:41:47< 9:48:28] +[titan] 2025-10-05 13:16:09,157 - root - INFO - step: 23995 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8039 +[titan] 2025-10-05 13:16:09,157 - root - INFO - lr: 2.0827e-05 gnorm: 1.11 [14:41:58< 9:48:17] +[titan] 2025-10-05 13:16:17,877 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:16:20,053 - root - INFO - step: 24000 loss: 2.0037 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:16:20,053 - root - INFO - lr: 2.0819e-05 gnorm: 1.08 [14:42:09< 9:48:06] +[titan] 2025-10-05 13:16:30,898 - root - INFO - step: 24005 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 13:16:30,899 - root - INFO - lr: 2.0810e-05 gnorm: 1.07 [14:42:20< 9:47:55] +[titan] 2025-10-05 13:16:41,756 - root - INFO - step: 24010 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 13:16:41,757 - root - INFO - lr: 2.0802e-05 gnorm: 1.05 [14:42:31< 9:47:44] +[titan] 2025-10-05 13:16:52,618 - root - INFO - step: 24015 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8347 +[titan] 2025-10-05 13:16:52,618 - root - INFO - lr: 2.0793e-05 gnorm: 1.12 [14:42:42< 9:47:33] +[titan] 2025-10-05 13:17:03,489 - root - INFO - step: 24020 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:17:03,489 - root - INFO - lr: 2.0785e-05 gnorm: 1.10 [14:42:53< 9:47:21] +[titan] 2025-10-05 13:17:14,356 - root - INFO - step: 24025 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 13:17:14,356 - root - INFO - lr: 2.0776e-05 gnorm: 1.08 [14:43:03< 9:47:10] +[titan] 2025-10-05 13:17:25,293 - root - INFO - step: 24030 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 13:17:25,293 - root - INFO - lr: 2.0767e-05 gnorm: 1.14 [14:43:14< 9:46:59] +[titan] 2025-10-05 13:17:36,153 - root - INFO - step: 24035 loss: 2.0553 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8156 +[titan] 2025-10-05 13:17:36,153 - root - INFO - lr: 2.0759e-05 gnorm: 1.07 [14:43:25< 9:46:48] +[titan] 2025-10-05 13:17:47,022 - root - INFO - step: 24040 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 13:17:47,022 - root - INFO - lr: 2.0750e-05 gnorm: 1.08 [14:43:36< 9:46:37] +[titan] 2025-10-05 13:17:57,898 - root - INFO - step: 24045 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 13:17:57,899 - root - INFO - lr: 2.0742e-05 gnorm: 1.07 [14:43:47< 9:46:26] +[titan] 2025-10-05 13:18:06,588 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:18:08,771 - root - INFO - step: 24050 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:18:08,771 - root - INFO - lr: 2.0733e-05 gnorm: 1.05 [14:43:58< 9:46:15] +[titan] 2025-10-05 13:18:19,609 - root - INFO - step: 24055 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 13:18:19,609 - root - INFO - lr: 2.0725e-05 gnorm: 1.10 [14:44:09< 9:46:03] +[titan] 2025-10-05 13:18:30,457 - root - INFO - step: 24060 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 13:18:30,457 - root - INFO - lr: 2.0716e-05 gnorm: 1.12 [14:44:20< 9:45:52] +[titan] 2025-10-05 13:18:39,418 - root - INFO - Dumping profiler traces at step 24064 +[titan] 2025-10-05 13:18:39,453 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 13:18:41,660 - root - INFO - step: 24065 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,250 tflops: 405.80 mfu: 41.03% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7997 +[titan] 2025-10-05 13:18:41,660 - root - INFO - lr: 2.0708e-05 gnorm: 1.05 [14:44:31< 9:45:41] +[titan] 2025-10-05 13:18:52,499 - root - INFO - step: 24070 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 13:18:52,499 - root - INFO - lr: 2.0699e-05 gnorm: 1.05 [14:44:42< 9:45:30] +[titan] 2025-10-05 13:19:03,398 - root - INFO - step: 24075 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:19:03,398 - root - INFO - lr: 2.0691e-05 gnorm: 1.08 [14:44:53< 9:45:19] +[titan] 2025-10-05 13:19:14,221 - root - INFO - step: 24080 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 13:19:14,221 - root - INFO - lr: 2.0682e-05 gnorm: 1.08 [14:45:03< 9:45:08] +[titan] 2025-10-05 13:19:25,059 - root - INFO - step: 24085 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 13:19:25,059 - root - INFO - lr: 2.0674e-05 gnorm: 1.05 [14:45:14< 9:44:57] +[titan] 2025-10-05 13:19:35,885 - root - INFO - step: 24090 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 13:19:35,885 - root - INFO - lr: 2.0665e-05 gnorm: 1.08 [14:45:25< 9:44:46] +[titan] 2025-10-05 13:19:46,755 - root - INFO - step: 24095 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 13:19:46,755 - root - INFO - lr: 2.0657e-05 gnorm: 1.09 [14:45:36< 9:44:35] +[titan] 2025-10-05 13:19:55,428 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:19:57,605 - root - INFO - step: 24100 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 13:19:57,605 - root - INFO - lr: 2.0648e-05 gnorm: 1.05 [14:45:47< 9:44:23] +[titan] 2025-10-05 13:20:08,458 - root - INFO - step: 24105 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 13:20:08,458 - root - INFO - lr: 2.0640e-05 gnorm: 1.11 [14:45:58< 9:44:12] +[titan] 2025-10-05 13:20:19,304 - root - INFO - step: 24110 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:20:19,305 - root - INFO - lr: 2.0631e-05 gnorm: 1.04 [14:46:08< 9:44:01] +[titan] 2025-10-05 13:20:30,155 - root - INFO - step: 24115 loss: 2.0297 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 13:20:30,155 - root - INFO - lr: 2.0623e-05 gnorm: 1.07 [14:46:19< 9:43:50] +[titan] 2025-10-05 13:20:41,004 - root - INFO - step: 24120 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:20:41,005 - root - INFO - lr: 2.0614e-05 gnorm: 1.07 [14:46:30< 9:43:39] +[titan] 2025-10-05 13:20:51,867 - root - INFO - step: 24125 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8402 +[titan] 2025-10-05 13:20:51,867 - root - INFO - lr: 2.0606e-05 gnorm: 1.12 [14:46:41< 9:43:28] +[titan] 2025-10-05 13:21:02,698 - root - INFO - step: 24130 loss: 2.0869 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 13:21:02,699 - root - INFO - lr: 2.0597e-05 gnorm: 1.06 [14:46:52< 9:43:17] +[titan] 2025-10-05 13:21:13,527 - root - INFO - step: 24135 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 13:21:13,527 - root - INFO - lr: 2.0589e-05 gnorm: 1.10 [14:47:03< 9:43:05] +[titan] 2025-10-05 13:21:24,355 - root - INFO - step: 24140 loss: 2.0475 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8102 +[titan] 2025-10-05 13:21:24,355 - root - INFO - lr: 2.0580e-05 gnorm: 1.07 [14:47:13< 9:42:54] +[titan] 2025-10-05 13:21:35,208 - root - INFO - step: 24145 loss: 2.1059 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:21:35,208 - root - INFO - lr: 2.0572e-05 gnorm: 1.10 [14:47:24< 9:42:43] +[titan] 2025-10-05 13:21:43,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:21:46,037 - root - INFO - step: 24150 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 13:21:46,037 - root - INFO - lr: 2.0563e-05 gnorm: 1.05 [14:47:35< 9:42:32] +[titan] 2025-10-05 13:21:56,862 - root - INFO - step: 24155 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8062 +[titan] 2025-10-05 13:21:56,862 - root - INFO - lr: 2.0555e-05 gnorm: 1.05 [14:47:46< 9:42:21] +[titan] 2025-10-05 13:22:07,697 - root - INFO - step: 24160 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:22:07,697 - root - INFO - lr: 2.0546e-05 gnorm: 1.07 [14:47:57< 9:42:10] +[titan] 2025-10-05 13:22:18,551 - root - INFO - step: 24165 loss: 2.0865 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 13:22:18,551 - root - INFO - lr: 2.0538e-05 gnorm: 1.09 [14:48:08< 9:41:59] +[titan] 2025-10-05 13:22:29,396 - root - INFO - step: 24170 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:22:29,396 - root - INFO - lr: 2.0529e-05 gnorm: 1.08 [14:48:19< 9:41:47] +[titan] 2025-10-05 13:22:40,227 - root - INFO - step: 24175 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8696 +[titan] 2025-10-05 13:22:40,227 - root - INFO - lr: 2.0521e-05 gnorm: 1.09 [14:48:29< 9:41:36] +[titan] 2025-10-05 13:22:51,092 - root - INFO - step: 24180 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 13:22:51,093 - root - INFO - lr: 2.0512e-05 gnorm: 1.09 [14:48:40< 9:41:25] +[titan] 2025-10-05 13:23:01,952 - root - INFO - step: 24185 loss: 1.9953 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 13:23:01,953 - root - INFO - lr: 2.0504e-05 gnorm: 1.07 [14:48:51< 9:41:14] +[titan] 2025-10-05 13:23:12,844 - root - INFO - step: 24190 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 13:23:12,844 - root - INFO - lr: 2.0496e-05 gnorm: 1.15 [14:49:02< 9:41:03] +[titan] 2025-10-05 13:23:23,695 - root - INFO - step: 24195 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 13:23:23,695 - root - INFO - lr: 2.0487e-05 gnorm: 1.07 [14:49:13< 9:40:52] +[titan] 2025-10-05 13:23:32,375 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:23:34,564 - root - INFO - step: 24200 loss: 2.0236 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:23:34,564 - root - INFO - lr: 2.0479e-05 gnorm: 1.07 [14:49:24< 9:40:41] +[titan] 2025-10-05 13:23:45,424 - root - INFO - step: 24205 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 13:23:45,424 - root - INFO - lr: 2.0470e-05 gnorm: 1.07 [14:49:35< 9:40:29] +[titan] 2025-10-05 13:23:56,267 - root - INFO - step: 24210 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 13:23:56,267 - root - INFO - lr: 2.0462e-05 gnorm: 1.03 [14:49:45< 9:40:18] +[titan] 2025-10-05 13:24:07,115 - root - INFO - step: 24215 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8655 +[titan] 2025-10-05 13:24:07,115 - root - INFO - lr: 2.0453e-05 gnorm: 1.12 [14:49:56< 9:40:07] +[titan] 2025-10-05 13:24:17,952 - root - INFO - step: 24220 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 13:24:17,952 - root - INFO - lr: 2.0445e-05 gnorm: 1.13 [14:50:07< 9:39:56] +[titan] 2025-10-05 13:24:28,825 - root - INFO - step: 24225 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8509 +[titan] 2025-10-05 13:24:28,826 - root - INFO - lr: 2.0436e-05 gnorm: 1.06 [14:50:18< 9:39:45] +[titan] 2025-10-05 13:24:39,649 - root - INFO - step: 24230 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 13:24:39,649 - root - INFO - lr: 2.0428e-05 gnorm: 1.10 [14:50:29< 9:39:34] +[titan] 2025-10-05 13:24:50,487 - root - INFO - step: 24235 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 13:24:50,487 - root - INFO - lr: 2.0419e-05 gnorm: 1.07 [14:50:40< 9:39:23] +[titan] 2025-10-05 13:25:01,334 - root - INFO - step: 24240 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:25:01,335 - root - INFO - lr: 2.0411e-05 gnorm: 1.02 [14:50:50< 9:39:11] +[titan] 2025-10-05 13:25:12,172 - root - INFO - step: 24245 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 13:25:12,172 - root - INFO - lr: 2.0402e-05 gnorm: 1.07 [14:51:01< 9:39:00] +[titan] 2025-10-05 13:25:20,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:25:23,024 - root - INFO - step: 24250 loss: 2.1386 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:25:23,025 - root - INFO - lr: 2.0394e-05 gnorm: 1.10 [14:51:12< 9:38:49] +[titan] 2025-10-05 13:25:33,889 - root - INFO - step: 24255 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:25:33,889 - root - INFO - lr: 2.0385e-05 gnorm: 1.11 [14:51:23< 9:38:38] +[titan] 2025-10-05 13:25:44,730 - root - INFO - step: 24260 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 13:25:44,730 - root - INFO - lr: 2.0377e-05 gnorm: 1.07 [14:51:34< 9:38:27] +[titan] 2025-10-05 13:25:55,582 - root - INFO - step: 24265 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 13:25:55,582 - root - INFO - lr: 2.0368e-05 gnorm: 1.07 [14:51:45< 9:38:16] +[titan] 2025-10-05 13:26:06,446 - root - INFO - step: 24270 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:26:06,447 - root - INFO - lr: 2.0360e-05 gnorm: 1.08 [14:51:56< 9:38:05] +[titan] 2025-10-05 13:26:17,296 - root - INFO - step: 24275 loss: 2.0367 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8000 +[titan] 2025-10-05 13:26:17,296 - root - INFO - lr: 2.0352e-05 gnorm: 1.08 [14:52:06< 9:37:53] +[titan] 2025-10-05 13:26:28,151 - root - INFO - step: 24280 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 13:26:28,151 - root - INFO - lr: 2.0343e-05 gnorm: 1.09 [14:52:17< 9:37:42] +[titan] 2025-10-05 13:26:39,050 - root - INFO - step: 24285 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 13:26:39,051 - root - INFO - lr: 2.0335e-05 gnorm: 1.10 [14:52:28< 9:37:31] +[titan] 2025-10-05 13:26:49,902 - root - INFO - step: 24290 loss: 2.0746 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:26:49,902 - root - INFO - lr: 2.0326e-05 gnorm: 1.07 [14:52:39< 9:37:20] +[titan] 2025-10-05 13:27:00,733 - root - INFO - step: 24295 loss: 2.1061 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 13:27:00,734 - root - INFO - lr: 2.0318e-05 gnorm: 1.11 [14:52:50< 9:37:09] +[titan] 2025-10-05 13:27:09,415 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:27:11,587 - root - INFO - step: 24300 loss: 2.0702 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 13:27:11,587 - root - INFO - lr: 2.0309e-05 gnorm: 1.10 [14:53:01< 9:36:58] +[titan] 2025-10-05 13:27:22,433 - root - INFO - step: 24305 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 13:27:22,433 - root - INFO - lr: 2.0301e-05 gnorm: 1.05 [14:53:12< 9:36:47] +[titan] 2025-10-05 13:27:33,270 - root - INFO - step: 24310 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 13:27:33,270 - root - INFO - lr: 2.0292e-05 gnorm: 1.06 [14:53:22< 9:36:36] +[titan] 2025-10-05 13:27:44,105 - root - INFO - step: 24315 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 13:27:44,105 - root - INFO - lr: 2.0284e-05 gnorm: 1.07 [14:53:33< 9:36:24] +[titan] 2025-10-05 13:27:54,981 - root - INFO - step: 24320 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 13:27:54,981 - root - INFO - lr: 2.0275e-05 gnorm: 1.13 [14:53:44< 9:36:13] +[titan] 2025-10-05 13:28:05,837 - root - INFO - step: 24325 loss: 2.1113 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:28:05,838 - root - INFO - lr: 2.0267e-05 gnorm: 1.14 [14:53:55< 9:36:02] +[titan] 2025-10-05 13:28:16,705 - root - INFO - step: 24330 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 13:28:16,705 - root - INFO - lr: 2.0258e-05 gnorm: 1.05 [14:54:06< 9:35:51] +[titan] 2025-10-05 13:28:27,566 - root - INFO - step: 24335 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8683 +[titan] 2025-10-05 13:28:27,566 - root - INFO - lr: 2.0250e-05 gnorm: 1.15 [14:54:17< 9:35:40] +[titan] 2025-10-05 13:28:38,418 - root - INFO - step: 24340 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:28:38,418 - root - INFO - lr: 2.0242e-05 gnorm: 1.08 [14:54:28< 9:35:29] +[titan] 2025-10-05 13:28:49,296 - root - INFO - step: 24345 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 13:28:49,296 - root - INFO - lr: 2.0233e-05 gnorm: 1.14 [14:54:38< 9:35:18] +[titan] 2025-10-05 13:28:58,013 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:29:00,192 - root - INFO - step: 24350 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:29:00,192 - root - INFO - lr: 2.0225e-05 gnorm: 1.18 [14:54:49< 9:35:06] +[titan] 2025-10-05 13:29:11,072 - root - INFO - step: 24355 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 13:29:11,072 - root - INFO - lr: 2.0216e-05 gnorm: 1.09 [14:55:00< 9:34:55] +[titan] 2025-10-05 13:29:21,925 - root - INFO - step: 24360 loss: 2.1089 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 13:29:21,925 - root - INFO - lr: 2.0208e-05 gnorm: 1.07 [14:55:11< 9:34:44] +[titan] 2025-10-05 13:29:32,780 - root - INFO - step: 24365 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:29:32,781 - root - INFO - lr: 2.0199e-05 gnorm: 1.10 [14:55:22< 9:34:33] +[titan] 2025-10-05 13:29:43,663 - root - INFO - step: 24370 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:29:43,663 - root - INFO - lr: 2.0191e-05 gnorm: 1.10 [14:55:33< 9:34:22] +[titan] 2025-10-05 13:29:54,539 - root - INFO - step: 24375 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:29:54,540 - root - INFO - lr: 2.0182e-05 gnorm: 1.12 [14:55:44< 9:34:11] +[titan] 2025-10-05 13:30:05,417 - root - INFO - step: 24380 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 13:30:05,417 - root - INFO - lr: 2.0174e-05 gnorm: 1.12 [14:55:55< 9:34:00] +[titan] 2025-10-05 13:30:16,350 - root - INFO - step: 24385 loss: 2.1282 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 13:30:16,350 - root - INFO - lr: 2.0166e-05 gnorm: 1.05 [14:56:05< 9:33:49] +[titan] 2025-10-05 13:30:27,217 - root - INFO - step: 24390 loss: 2.0751 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:30:27,217 - root - INFO - lr: 2.0157e-05 gnorm: 1.12 [14:56:16< 9:33:38] +[titan] 2025-10-05 13:30:38,065 - root - INFO - step: 24395 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:30:38,065 - root - INFO - lr: 2.0149e-05 gnorm: 1.08 [14:56:27< 9:33:26] +[titan] 2025-10-05 13:30:46,746 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:30:48,926 - root - INFO - step: 24400 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 13:30:48,926 - root - INFO - lr: 2.0140e-05 gnorm: 1.09 [14:56:38< 9:33:15] +[titan] 2025-10-05 13:30:59,781 - root - INFO - step: 24405 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8166 +[titan] 2025-10-05 13:30:59,781 - root - INFO - lr: 2.0132e-05 gnorm: 1.07 [14:56:49< 9:33:04] +[titan] 2025-10-05 13:31:10,656 - root - INFO - step: 24410 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 13:31:10,656 - root - INFO - lr: 2.0123e-05 gnorm: 1.11 [14:57:00< 9:32:53] +[titan] 2025-10-05 13:31:21,555 - root - INFO - step: 24415 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8268 +[titan] 2025-10-05 13:31:21,555 - root - INFO - lr: 2.0115e-05 gnorm: 1.09 [14:57:11< 9:32:42] +[titan] 2025-10-05 13:31:32,426 - root - INFO - step: 24420 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 13:31:32,426 - root - INFO - lr: 2.0107e-05 gnorm: 1.07 [14:57:22< 9:32:31] +[titan] 2025-10-05 13:31:43,323 - root - INFO - step: 24425 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 13:31:43,323 - root - INFO - lr: 2.0098e-05 gnorm: 1.31 [14:57:32< 9:32:20] +[titan] 2025-10-05 13:31:54,203 - root - INFO - step: 24430 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 13:31:54,204 - root - INFO - lr: 2.0090e-05 gnorm: 1.05 [14:57:43< 9:32:09] +[titan] 2025-10-05 13:32:05,075 - root - INFO - step: 24435 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 13:32:05,075 - root - INFO - lr: 2.0081e-05 gnorm: 1.07 [14:57:54< 9:31:57] +[titan] 2025-10-05 13:32:15,980 - root - INFO - step: 24440 loss: 2.1665 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 13:32:15,980 - root - INFO - lr: 2.0073e-05 gnorm: 1.09 [14:58:05< 9:31:46] +[titan] 2025-10-05 13:32:26,906 - root - INFO - step: 24445 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 13:32:26,906 - root - INFO - lr: 2.0064e-05 gnorm: 1.08 [14:58:16< 9:31:35] +[titan] 2025-10-05 13:32:35,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:32:37,766 - root - INFO - step: 24450 loss: 2.0220 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 13:32:37,766 - root - INFO - lr: 2.0056e-05 gnorm: 1.06 [14:58:27< 9:31:24] +[titan] 2025-10-05 13:32:48,638 - root - INFO - step: 24455 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 13:32:48,638 - root - INFO - lr: 2.0048e-05 gnorm: 1.07 [14:58:38< 9:31:13] +[titan] 2025-10-05 13:32:59,507 - root - INFO - step: 24460 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:32:59,507 - root - INFO - lr: 2.0039e-05 gnorm: 1.07 [14:58:49< 9:31:02] +[titan] 2025-10-05 13:33:10,393 - root - INFO - step: 24465 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:33:10,393 - root - INFO - lr: 2.0031e-05 gnorm: 1.05 [14:58:59< 9:30:51] +[titan] 2025-10-05 13:33:21,345 - root - INFO - step: 24470 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8103 +[titan] 2025-10-05 13:33:21,345 - root - INFO - lr: 2.0022e-05 gnorm: 1.06 [14:59:10< 9:30:40] +[titan] 2025-10-05 13:33:32,228 - root - INFO - step: 24475 loss: 2.0788 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:33:32,228 - root - INFO - lr: 2.0014e-05 gnorm: 1.09 [14:59:21< 9:30:29] +[titan] 2025-10-05 13:33:43,179 - root - INFO - step: 24480 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8709 +[titan] 2025-10-05 13:33:43,179 - root - INFO - lr: 2.0006e-05 gnorm: 1.10 [14:59:32< 9:30:18] +[titan] 2025-10-05 13:33:54,062 - root - INFO - step: 24485 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 13:33:54,062 - root - INFO - lr: 1.9997e-05 gnorm: 1.07 [14:59:43< 9:30:06] +[titan] 2025-10-05 13:34:04,940 - root - INFO - step: 24490 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 13:34:04,940 - root - INFO - lr: 1.9989e-05 gnorm: 1.06 [14:59:54< 9:29:55] +[titan] 2025-10-05 13:34:15,844 - root - INFO - step: 24495 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8069 +[titan] 2025-10-05 13:34:15,844 - root - INFO - lr: 1.9980e-05 gnorm: 1.09 [15:00:05< 9:29:44] +[titan] 2025-10-05 13:34:24,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:34:26,706 - root - INFO - step: 24500 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 13:34:26,706 - root - INFO - lr: 1.9972e-05 gnorm: 1.11 [15:00:16< 9:29:33] +[titan] 2025-10-05 13:34:37,585 - root - INFO - step: 24505 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 13:34:37,585 - root - INFO - lr: 1.9963e-05 gnorm: 1.08 [15:00:27< 9:29:22] +[titan] 2025-10-05 13:34:48,499 - root - INFO - step: 24510 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 13:34:48,500 - root - INFO - lr: 1.9955e-05 gnorm: 1.12 [15:00:38< 9:29:11] +[titan] 2025-10-05 13:34:59,379 - root - INFO - step: 24515 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 13:34:59,379 - root - INFO - lr: 1.9947e-05 gnorm: 1.09 [15:00:48< 9:29:00] +[titan] 2025-10-05 13:35:10,244 - root - INFO - step: 24520 loss: 2.0374 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8014 +[titan] 2025-10-05 13:35:10,245 - root - INFO - lr: 1.9938e-05 gnorm: 1.03 [15:00:59< 9:28:49] +[titan] 2025-10-05 13:35:21,112 - root - INFO - step: 24525 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 13:35:21,113 - root - INFO - lr: 1.9930e-05 gnorm: 1.06 [15:01:10< 9:28:37] +[titan] 2025-10-05 13:35:31,956 - root - INFO - step: 24530 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8681 +[titan] 2025-10-05 13:35:31,957 - root - INFO - lr: 1.9921e-05 gnorm: 1.08 [15:01:21< 9:28:26] +[titan] 2025-10-05 13:35:42,842 - root - INFO - step: 24535 loss: 2.0794 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8375 +[titan] 2025-10-05 13:35:42,842 - root - INFO - lr: 1.9913e-05 gnorm: 1.10 [15:01:32< 9:28:15] +[titan] 2025-10-05 13:35:53,706 - root - INFO - step: 24540 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 13:35:53,706 - root - INFO - lr: 1.9905e-05 gnorm: 1.11 [15:01:43< 9:28:04] +[titan] 2025-10-05 13:36:04,625 - root - INFO - step: 24545 loss: 2.1385 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:36:04,625 - root - INFO - lr: 1.9896e-05 gnorm: 1.07 [15:01:54< 9:27:53] +[titan] 2025-10-05 13:36:13,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:36:15,477 - root - INFO - step: 24550 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 13:36:15,477 - root - INFO - lr: 1.9888e-05 gnorm: 1.08 [15:02:05< 9:27:42] +[titan] 2025-10-05 13:36:26,344 - root - INFO - step: 24555 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 13:36:26,344 - root - INFO - lr: 1.9879e-05 gnorm: 1.07 [15:02:15< 9:27:31] +[titan] 2025-10-05 13:36:37,204 - root - INFO - step: 24560 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 13:36:37,204 - root - INFO - lr: 1.9871e-05 gnorm: 1.07 [15:02:26< 9:27:20] +[titan] 2025-10-05 13:36:48,082 - root - INFO - step: 24565 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 13:36:48,082 - root - INFO - lr: 1.9863e-05 gnorm: 1.05 [15:02:37< 9:27:09] +[titan] 2025-10-05 13:36:58,948 - root - INFO - step: 24570 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 13:36:58,948 - root - INFO - lr: 1.9854e-05 gnorm: 1.07 [15:02:48< 9:26:57] +[titan] 2025-10-05 13:37:09,939 - root - INFO - step: 24575 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 29,815 tflops: 413.64 mfu: 41.82% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 13:37:09,939 - root - INFO - lr: 1.9846e-05 gnorm: 1.08 [15:02:59< 9:26:46] +[titan] 2025-10-05 13:37:12,314 - root - INFO - Dumping profiler traces at step 24576 +[titan] 2025-10-05 13:37:12,355 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:37:21,087 - root - INFO - step: 24580 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 29,394 tflops: 407.79 mfu: 41.23% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:37:21,087 - root - INFO - lr: 1.9837e-05 gnorm: 1.10 [15:03:10< 9:26:35] +[titan] 2025-10-05 13:37:31,945 - root - INFO - step: 24585 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:37:31,945 - root - INFO - lr: 1.9829e-05 gnorm: 1.04 [15:03:21< 9:26:24] +[titan] 2025-10-05 13:37:42,812 - root - INFO - step: 24590 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8024 +[titan] 2025-10-05 13:37:42,812 - root - INFO - lr: 1.9821e-05 gnorm: 1.05 [15:03:32< 9:26:13] +[titan] 2025-10-05 13:37:53,676 - root - INFO - step: 24595 loss: 2.0523 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 13:37:53,677 - root - INFO - lr: 1.9812e-05 gnorm: 1.07 [15:03:43< 9:26:02] +[titan] 2025-10-05 13:38:02,369 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:38:04,554 - root - INFO - step: 24600 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8521 +[titan] 2025-10-05 13:38:04,554 - root - INFO - lr: 1.9804e-05 gnorm: 1.09 [15:03:54< 9:25:51] +[titan] 2025-10-05 13:38:15,471 - root - INFO - step: 24605 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 13:38:15,471 - root - INFO - lr: 1.9796e-05 gnorm: 1.07 [15:04:05< 9:25:40] +[titan] 2025-10-05 13:38:26,377 - root - INFO - step: 24610 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8870 +[titan] 2025-10-05 13:38:26,377 - root - INFO - lr: 1.9787e-05 gnorm: 1.12 [15:04:15< 9:25:29] +[titan] 2025-10-05 13:38:37,243 - root - INFO - step: 24615 loss: 2.0786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8367 +[titan] 2025-10-05 13:38:37,243 - root - INFO - lr: 1.9779e-05 gnorm: 1.09 [15:04:26< 9:25:18] +[titan] 2025-10-05 13:38:48,119 - root - INFO - step: 24620 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 13:38:48,119 - root - INFO - lr: 1.9770e-05 gnorm: 1.07 [15:04:37< 9:25:07] +[titan] 2025-10-05 13:38:58,977 - root - INFO - step: 24625 loss: 2.0721 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8321 +[titan] 2025-10-05 13:38:58,977 - root - INFO - lr: 1.9762e-05 gnorm: 1.11 [15:04:48< 9:24:55] +[titan] 2025-10-05 13:39:09,830 - root - INFO - step: 24630 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8930 +[titan] 2025-10-05 13:39:09,830 - root - INFO - lr: 1.9754e-05 gnorm: 1.13 [15:04:59< 9:24:44] +[titan] 2025-10-05 13:39:20,732 - root - INFO - step: 24635 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 13:39:20,733 - root - INFO - lr: 1.9745e-05 gnorm: 1.10 [15:05:10< 9:24:33] +[titan] 2025-10-05 13:39:31,629 - root - INFO - step: 24640 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 13:39:31,629 - root - INFO - lr: 1.9737e-05 gnorm: 1.08 [15:05:21< 9:24:22] +[titan] 2025-10-05 13:39:42,484 - root - INFO - step: 24645 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 13:39:42,484 - root - INFO - lr: 1.9728e-05 gnorm: 1.05 [15:05:32< 9:24:11] +[titan] 2025-10-05 13:39:51,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:39:53,346 - root - INFO - step: 24650 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 13:39:53,346 - root - INFO - lr: 1.9720e-05 gnorm: 1.06 [15:05:42< 9:24:00] +[titan] 2025-10-05 13:40:04,203 - root - INFO - step: 24655 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 13:40:04,203 - root - INFO - lr: 1.9712e-05 gnorm: 1.12 [15:05:53< 9:23:49] +[titan] 2025-10-05 13:40:15,073 - root - INFO - step: 24660 loss: 2.0882 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 13:40:15,073 - root - INFO - lr: 1.9703e-05 gnorm: 1.10 [15:06:04< 9:23:38] +[titan] 2025-10-05 13:40:25,992 - root - INFO - step: 24665 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:40:25,992 - root - INFO - lr: 1.9695e-05 gnorm: 1.06 [15:06:15< 9:23:26] +[titan] 2025-10-05 13:40:36,894 - root - INFO - step: 24670 loss: 2.0856 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 13:40:36,894 - root - INFO - lr: 1.9687e-05 gnorm: 1.12 [15:06:26< 9:23:15] +[titan] 2025-10-05 13:40:47,766 - root - INFO - step: 24675 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 13:40:47,766 - root - INFO - lr: 1.9678e-05 gnorm: 1.09 [15:06:37< 9:23:04] +[titan] 2025-10-05 13:40:58,618 - root - INFO - step: 24680 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8134 +[titan] 2025-10-05 13:40:58,618 - root - INFO - lr: 1.9670e-05 gnorm: 1.07 [15:06:48< 9:22:53] +[titan] 2025-10-05 13:41:09,490 - root - INFO - step: 24685 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 13:41:09,490 - root - INFO - lr: 1.9662e-05 gnorm: 1.13 [15:06:59< 9:22:42] +[titan] 2025-10-05 13:41:20,418 - root - INFO - step: 24690 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 13:41:20,418 - root - INFO - lr: 1.9653e-05 gnorm: 1.06 [15:07:09< 9:22:31] +[titan] 2025-10-05 13:41:31,285 - root - INFO - step: 24695 loss: 2.0651 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:41:31,285 - root - INFO - lr: 1.9645e-05 gnorm: 1.08 [15:07:20< 9:22:20] +[titan] 2025-10-05 13:41:39,977 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:41:42,156 - root - INFO - step: 24700 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 13:41:42,156 - root - INFO - lr: 1.9636e-05 gnorm: 1.11 [15:07:31< 9:22:09] +[titan] 2025-10-05 13:41:53,063 - root - INFO - step: 24705 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 13:41:53,063 - root - INFO - lr: 1.9628e-05 gnorm: 1.08 [15:07:42< 9:21:58] +[titan] 2025-10-05 13:42:03,922 - root - INFO - step: 24710 loss: 2.0804 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 13:42:03,922 - root - INFO - lr: 1.9620e-05 gnorm: 1.06 [15:07:53< 9:21:46] +[titan] 2025-10-05 13:42:14,790 - root - INFO - step: 24715 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8274 +[titan] 2025-10-05 13:42:14,790 - root - INFO - lr: 1.9611e-05 gnorm: 1.09 [15:08:04< 9:21:35] +[titan] 2025-10-05 13:42:25,702 - root - INFO - step: 24720 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 13:42:25,702 - root - INFO - lr: 1.9603e-05 gnorm: 1.11 [15:08:15< 9:21:24] +[titan] 2025-10-05 13:42:36,573 - root - INFO - step: 24725 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 13:42:36,573 - root - INFO - lr: 1.9595e-05 gnorm: 1.08 [15:08:26< 9:21:13] +[titan] 2025-10-05 13:42:47,423 - root - INFO - step: 24730 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 13:42:47,424 - root - INFO - lr: 1.9586e-05 gnorm: 1.12 [15:08:36< 9:21:02] +[titan] 2025-10-05 13:42:58,321 - root - INFO - step: 24735 loss: 2.1290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 13:42:58,321 - root - INFO - lr: 1.9578e-05 gnorm: 1.08 [15:08:47< 9:20:51] +[titan] 2025-10-05 13:43:09,170 - root - INFO - step: 24740 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:43:09,171 - root - INFO - lr: 1.9570e-05 gnorm: 1.12 [15:08:58< 9:20:40] +[titan] 2025-10-05 13:43:20,002 - root - INFO - step: 24745 loss: 2.0612 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8216 +[titan] 2025-10-05 13:43:20,002 - root - INFO - lr: 1.9561e-05 gnorm: 1.11 [15:09:09< 9:20:29] +[titan] 2025-10-05 13:43:28,699 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:43:30,874 - root - INFO - step: 24750 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 13:43:30,874 - root - INFO - lr: 1.9553e-05 gnorm: 1.08 [15:09:20< 9:20:18] +[titan] 2025-10-05 13:43:41,719 - root - INFO - step: 24755 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 13:43:41,719 - root - INFO - lr: 1.9545e-05 gnorm: 1.11 [15:09:31< 9:20:06] +[titan] 2025-10-05 13:43:52,574 - root - INFO - step: 24760 loss: 2.0568 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 13:43:52,574 - root - INFO - lr: 1.9536e-05 gnorm: 1.07 [15:09:42< 9:19:55] +[titan] 2025-10-05 13:44:03,465 - root - INFO - step: 24765 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 13:44:03,465 - root - INFO - lr: 1.9528e-05 gnorm: 1.07 [15:09:53< 9:19:44] +[titan] 2025-10-05 13:44:14,316 - root - INFO - step: 24770 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 13:44:14,316 - root - INFO - lr: 1.9519e-05 gnorm: 1.05 [15:10:03< 9:19:33] +[titan] 2025-10-05 13:44:25,153 - root - INFO - step: 24775 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8456 +[titan] 2025-10-05 13:44:25,154 - root - INFO - lr: 1.9511e-05 gnorm: 1.07 [15:10:14< 9:19:22] +[titan] 2025-10-05 13:44:36,002 - root - INFO - step: 24780 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 13:44:36,003 - root - INFO - lr: 1.9503e-05 gnorm: 1.08 [15:10:25< 9:19:11] +[titan] 2025-10-05 13:44:46,858 - root - INFO - step: 24785 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 13:44:46,858 - root - INFO - lr: 1.9494e-05 gnorm: 1.07 [15:10:36< 9:19:00] +[titan] 2025-10-05 13:44:57,702 - root - INFO - step: 24790 loss: 2.0838 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 13:44:57,702 - root - INFO - lr: 1.9486e-05 gnorm: 1.08 [15:10:47< 9:18:49] +[titan] 2025-10-05 13:45:08,535 - root - INFO - step: 24795 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 13:45:08,535 - root - INFO - lr: 1.9478e-05 gnorm: 1.06 [15:10:58< 9:18:37] +[titan] 2025-10-05 13:45:17,247 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:45:19,421 - root - INFO - step: 24800 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8718 +[titan] 2025-10-05 13:45:19,421 - root - INFO - lr: 1.9469e-05 gnorm: 1.08 [15:11:08< 9:18:26] +[titan] 2025-10-05 13:45:30,265 - root - INFO - step: 24805 loss: 2.0238 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 13:45:30,265 - root - INFO - lr: 1.9461e-05 gnorm: 1.07 [15:11:19< 9:18:15] +[titan] 2025-10-05 13:45:41,104 - root - INFO - step: 24810 loss: 2.0540 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8154 +[titan] 2025-10-05 13:45:41,104 - root - INFO - lr: 1.9453e-05 gnorm: 1.07 [15:11:30< 9:18:04] +[titan] 2025-10-05 13:45:51,953 - root - INFO - step: 24815 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8778 +[titan] 2025-10-05 13:45:51,953 - root - INFO - lr: 1.9444e-05 gnorm: 1.11 [15:11:41< 9:17:53] +[titan] 2025-10-05 13:46:02,816 - root - INFO - step: 24820 loss: 2.1004 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 13:46:02,816 - root - INFO - lr: 1.9436e-05 gnorm: 1.07 [15:11:52< 9:17:42] +[titan] 2025-10-05 13:46:13,676 - root - INFO - step: 24825 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 13:46:13,676 - root - INFO - lr: 1.9428e-05 gnorm: 1.10 [15:12:03< 9:17:31] +[titan] 2025-10-05 13:46:24,572 - root - INFO - step: 24830 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 13:46:24,572 - root - INFO - lr: 1.9419e-05 gnorm: 1.10 [15:12:14< 9:17:20] +[titan] 2025-10-05 13:46:35,432 - root - INFO - step: 24835 loss: 2.1026 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:46:35,432 - root - INFO - lr: 1.9411e-05 gnorm: 1.07 [15:12:24< 9:17:08] +[titan] 2025-10-05 13:46:46,286 - root - INFO - step: 24840 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:46:46,287 - root - INFO - lr: 1.9403e-05 gnorm: 1.10 [15:12:35< 9:16:57] +[titan] 2025-10-05 13:46:57,123 - root - INFO - step: 24845 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8212 +[titan] 2025-10-05 13:46:57,123 - root - INFO - lr: 1.9394e-05 gnorm: 1.07 [15:12:46< 9:16:46] +[titan] 2025-10-05 13:47:05,788 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:47:07,960 - root - INFO - step: 24850 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 13:47:07,960 - root - INFO - lr: 1.9386e-05 gnorm: 1.13 [15:12:57< 9:16:35] +[titan] 2025-10-05 13:47:18,794 - root - INFO - step: 24855 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 13:47:18,794 - root - INFO - lr: 1.9378e-05 gnorm: 1.07 [15:13:08< 9:16:24] +[titan] 2025-10-05 13:47:29,672 - root - INFO - step: 24860 loss: 2.1559 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 13:47:29,672 - root - INFO - lr: 1.9369e-05 gnorm: 1.08 [15:13:19< 9:16:13] +[titan] 2025-10-05 13:47:40,555 - root - INFO - step: 24865 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 13:47:40,556 - root - INFO - lr: 1.9361e-05 gnorm: 1.14 [15:13:30< 9:16:02] +[titan] 2025-10-05 13:47:51,413 - root - INFO - step: 24870 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 13:47:51,413 - root - INFO - lr: 1.9353e-05 gnorm: 1.07 [15:13:40< 9:15:51] +[titan] 2025-10-05 13:48:02,253 - root - INFO - step: 24875 loss: 2.0532 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 13:48:02,253 - root - INFO - lr: 1.9345e-05 gnorm: 1.10 [15:13:51< 9:15:39] +[titan] 2025-10-05 13:48:13,099 - root - INFO - step: 24880 loss: 2.0338 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 13:48:13,099 - root - INFO - lr: 1.9336e-05 gnorm: 1.08 [15:14:02< 9:15:28] +[titan] 2025-10-05 13:48:23,933 - root - INFO - step: 24885 loss: 2.0834 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 13:48:23,933 - root - INFO - lr: 1.9328e-05 gnorm: 1.08 [15:14:13< 9:15:17] +[titan] 2025-10-05 13:48:34,822 - root - INFO - step: 24890 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7910 +[titan] 2025-10-05 13:48:34,822 - root - INFO - lr: 1.9320e-05 gnorm: 1.05 [15:14:24< 9:15:06] +[titan] 2025-10-05 13:48:45,673 - root - INFO - step: 24895 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:48:45,673 - root - INFO - lr: 1.9311e-05 gnorm: 1.13 [15:14:35< 9:14:55] +[titan] 2025-10-05 13:48:54,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:48:56,513 - root - INFO - step: 24900 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 13:48:56,513 - root - INFO - lr: 1.9303e-05 gnorm: 1.08 [15:14:46< 9:14:44] +[titan] 2025-10-05 13:49:07,354 - root - INFO - step: 24905 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 13:49:07,354 - root - INFO - lr: 1.9295e-05 gnorm: 1.09 [15:14:56< 9:14:33] +[titan] 2025-10-05 13:49:18,206 - root - INFO - step: 24910 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 13:49:18,206 - root - INFO - lr: 1.9286e-05 gnorm: 1.06 [15:15:07< 9:14:22] +[titan] 2025-10-05 13:49:29,079 - root - INFO - step: 24915 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 13:49:29,080 - root - INFO - lr: 1.9278e-05 gnorm: 1.07 [15:15:18< 9:14:10] +[titan] 2025-10-05 13:49:39,928 - root - INFO - step: 24920 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8887 +[titan] 2025-10-05 13:49:39,928 - root - INFO - lr: 1.9270e-05 gnorm: 1.10 [15:15:29< 9:13:59] +[titan] 2025-10-05 13:49:50,803 - root - INFO - step: 24925 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7714 +[titan] 2025-10-05 13:49:50,804 - root - INFO - lr: 1.9261e-05 gnorm: 1.05 [15:15:40< 9:13:48] +[titan] 2025-10-05 13:50:01,632 - root - INFO - step: 24930 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7977 +[titan] 2025-10-05 13:50:01,632 - root - INFO - lr: 1.9253e-05 gnorm: 1.12 [15:15:51< 9:13:37] +[titan] 2025-10-05 13:50:12,484 - root - INFO - step: 24935 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 13:50:12,484 - root - INFO - lr: 1.9245e-05 gnorm: 1.08 [15:16:02< 9:13:26] +[titan] 2025-10-05 13:50:23,352 - root - INFO - step: 24940 loss: 2.0643 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 13:50:23,352 - root - INFO - lr: 1.9236e-05 gnorm: 1.08 [15:16:12< 9:13:15] +[titan] 2025-10-05 13:50:34,241 - root - INFO - step: 24945 loss: 2.0637 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:50:34,241 - root - INFO - lr: 1.9228e-05 gnorm: 1.09 [15:16:23< 9:13:04] +[titan] 2025-10-05 13:50:42,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:50:45,094 - root - INFO - step: 24950 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:50:45,094 - root - INFO - lr: 1.9220e-05 gnorm: 1.09 [15:16:34< 9:12:53] +[titan] 2025-10-05 13:50:55,957 - root - INFO - step: 24955 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 13:50:55,957 - root - INFO - lr: 1.9212e-05 gnorm: 1.10 [15:16:45< 9:12:41] +[titan] 2025-10-05 13:51:06,846 - root - INFO - step: 24960 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:51:06,846 - root - INFO - lr: 1.9203e-05 gnorm: 1.18 [15:16:56< 9:12:30] +[titan] 2025-10-05 13:51:17,738 - root - INFO - step: 24965 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:51:17,738 - root - INFO - lr: 1.9195e-05 gnorm: 1.09 [15:17:07< 9:12:19] +[titan] 2025-10-05 13:51:28,604 - root - INFO - step: 24970 loss: 2.1023 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:51:28,604 - root - INFO - lr: 1.9187e-05 gnorm: 1.11 [15:17:18< 9:12:08] +[titan] 2025-10-05 13:51:39,453 - root - INFO - step: 24975 loss: 2.0306 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 13:51:39,453 - root - INFO - lr: 1.9178e-05 gnorm: 1.12 [15:17:28< 9:11:57] +[titan] 2025-10-05 13:51:50,305 - root - INFO - step: 24980 loss: 2.0966 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8535 +[titan] 2025-10-05 13:51:50,305 - root - INFO - lr: 1.9170e-05 gnorm: 1.08 [15:17:39< 9:11:46] +[titan] 2025-10-05 13:52:01,147 - root - INFO - step: 24985 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7712 +[titan] 2025-10-05 13:52:01,147 - root - INFO - lr: 1.9162e-05 gnorm: 1.09 [15:17:50< 9:11:35] +[titan] 2025-10-05 13:52:12,002 - root - INFO - step: 24990 loss: 2.0567 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 13:52:12,002 - root - INFO - lr: 1.9154e-05 gnorm: 1.09 [15:18:01< 9:11:24] +[titan] 2025-10-05 13:52:22,852 - root - INFO - step: 24995 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:52:22,852 - root - INFO - lr: 1.9145e-05 gnorm: 1.08 [15:18:12< 9:11:13] +[titan] 2025-10-05 13:52:31,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:52:33,740 - root - INFO - step: 25000 loss: 2.0319 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7959 +[titan] 2025-10-05 13:52:33,740 - root - INFO - lr: 1.9137e-05 gnorm: 1.07 [15:18:23< 9:11:01] +[titan] 2025-10-05 13:52:33,740 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 13:52:51,437 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 13:52:51,437 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.70 seconds. +[titan] 2025-10-05 13:54:51,998 - root - INFO - step: 25005 loss: 2.0275 memory: 118.84GiB(85.28%) tps: 2,370 tflops: 32.88 mfu: 3.32% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7926 +[titan] 2025-10-05 13:54:51,999 - root - INFO - lr: 1.9129e-05 gnorm: 1.11 [15:20:41< 9:12:07] +[titan] 2025-10-05 13:55:02,804 - root - INFO - step: 25010 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8047 +[titan] 2025-10-05 13:55:02,804 - root - INFO - lr: 1.9120e-05 gnorm: 1.11 [15:20:52< 9:11:56] +[titan] 2025-10-05 13:55:13,603 - root - INFO - step: 25015 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:55:13,603 - root - INFO - lr: 1.9112e-05 gnorm: 1.08 [15:21:03< 9:11:44] +[titan] 2025-10-05 13:55:24,411 - root - INFO - step: 25020 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 13:55:24,411 - root - INFO - lr: 1.9104e-05 gnorm: 1.12 [15:21:13< 9:11:33] +[titan] 2025-10-05 13:55:35,262 - root - INFO - step: 25025 loss: 2.0508 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:55:35,263 - root - INFO - lr: 1.9096e-05 gnorm: 1.09 [15:21:24< 9:11:22] +[titan] 2025-10-05 13:55:46,139 - root - INFO - step: 25030 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 13:55:46,139 - root - INFO - lr: 1.9087e-05 gnorm: 1.12 [15:21:35< 9:11:11] +[titan] 2025-10-05 13:55:56,971 - root - INFO - step: 25035 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 13:55:56,971 - root - INFO - lr: 1.9079e-05 gnorm: 1.06 [15:21:46< 9:11:00] +[titan] 2025-10-05 13:56:07,833 - root - INFO - step: 25040 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7890 +[titan] 2025-10-05 13:56:07,833 - root - INFO - lr: 1.9071e-05 gnorm: 1.09 [15:21:57< 9:10:49] +[titan] 2025-10-05 13:56:18,697 - root - INFO - step: 25045 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 13:56:18,697 - root - INFO - lr: 1.9062e-05 gnorm: 1.07 [15:22:08< 9:10:37] +[titan] 2025-10-05 13:56:27,381 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:56:29,566 - root - INFO - step: 25050 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 13:56:29,566 - root - INFO - lr: 1.9054e-05 gnorm: 1.09 [15:22:19< 9:10:26] +[titan] 2025-10-05 13:56:40,477 - root - INFO - step: 25055 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 13:56:40,478 - root - INFO - lr: 1.9046e-05 gnorm: 1.10 [15:22:30< 9:10:15] +[titan] 2025-10-05 13:56:51,355 - root - INFO - step: 25060 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 13:56:51,355 - root - INFO - lr: 1.9038e-05 gnorm: 1.09 [15:22:40< 9:10:04] +[titan] 2025-10-05 13:57:02,218 - root - INFO - step: 25065 loss: 2.1039 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:57:02,218 - root - INFO - lr: 1.9029e-05 gnorm: 1.13 [15:22:51< 9:09:53] +[titan] 2025-10-05 13:57:13,100 - root - INFO - step: 25070 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 13:57:13,100 - root - INFO - lr: 1.9021e-05 gnorm: 1.08 [15:23:02< 9:09:42] +[titan] 2025-10-05 13:57:23,991 - root - INFO - step: 25075 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 13:57:23,991 - root - INFO - lr: 1.9013e-05 gnorm: 1.07 [15:23:13< 9:09:31] +[titan] 2025-10-05 13:57:34,864 - root - INFO - step: 25080 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 13:57:34,864 - root - INFO - lr: 1.9005e-05 gnorm: 1.05 [15:23:24< 9:09:19] +[titan] 2025-10-05 13:57:45,884 - root - INFO - step: 25085 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 29,737 tflops: 412.55 mfu: 41.71% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8452 +[titan] 2025-10-05 13:57:45,884 - root - INFO - lr: 1.8996e-05 gnorm: 1.11 [15:23:35< 9:09:08] +[titan] 2025-10-05 13:57:52,574 - root - INFO - Dumping profiler traces at step 25088 +[titan] 2025-10-05 13:57:52,612 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:57:56,994 - root - INFO - step: 25090 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.20 mfu: 41.38% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 13:57:56,994 - root - INFO - lr: 1.8988e-05 gnorm: 1.10 [15:23:46< 9:08:57] +[titan] 2025-10-05 13:58:07,853 - root - INFO - step: 25095 loss: 2.0873 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 13:58:07,853 - root - INFO - lr: 1.8980e-05 gnorm: 1.09 [15:23:57< 9:08:46] +[titan] 2025-10-05 13:58:16,516 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:58:18,699 - root - INFO - step: 25100 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7915 +[titan] 2025-10-05 13:58:18,700 - root - INFO - lr: 1.8972e-05 gnorm: 1.07 [15:24:08< 9:08:35] +[titan] 2025-10-05 13:58:29,551 - root - INFO - step: 25105 loss: 2.0232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 13:58:29,551 - root - INFO - lr: 1.8963e-05 gnorm: 1.09 [15:24:19< 9:08:24] +[titan] 2025-10-05 13:58:40,400 - root - INFO - step: 25110 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 13:58:40,400 - root - INFO - lr: 1.8955e-05 gnorm: 1.11 [15:24:29< 9:08:13] +[titan] 2025-10-05 13:58:51,352 - root - INFO - step: 25115 loss: 2.0288 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 13:58:51,352 - root - INFO - lr: 1.8947e-05 gnorm: 1.09 [15:24:40< 9:08:02] +[titan] 2025-10-05 13:59:02,234 - root - INFO - step: 25120 loss: 2.0905 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8475 +[titan] 2025-10-05 13:59:02,234 - root - INFO - lr: 1.8939e-05 gnorm: 1.09 [15:24:51< 9:07:50] +[titan] 2025-10-05 13:59:13,119 - root - INFO - step: 25125 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8215 +[titan] 2025-10-05 13:59:13,120 - root - INFO - lr: 1.8930e-05 gnorm: 1.07 [15:25:02< 9:07:39] +[titan] 2025-10-05 13:59:23,995 - root - INFO - step: 25130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 13:59:23,996 - root - INFO - lr: 1.8922e-05 gnorm: 1.07 [15:25:13< 9:07:28] +[titan] 2025-10-05 13:59:34,878 - root - INFO - step: 25135 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8860 +[titan] 2025-10-05 13:59:34,879 - root - INFO - lr: 1.8914e-05 gnorm: 1.12 [15:25:24< 9:07:17] +[titan] 2025-10-05 13:59:45,774 - root - INFO - step: 25140 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 13:59:45,775 - root - INFO - lr: 1.8905e-05 gnorm: 1.07 [15:25:35< 9:07:06] +[titan] 2025-10-05 13:59:56,648 - root - INFO - step: 25145 loss: 2.0630 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:59:56,648 - root - INFO - lr: 1.8897e-05 gnorm: 1.08 [15:25:46< 9:06:55] +[titan] 2025-10-05 14:00:05,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:00:07,518 - root - INFO - step: 25150 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8682 +[titan] 2025-10-05 14:00:07,519 - root - INFO - lr: 1.8889e-05 gnorm: 1.15 [15:25:57< 9:06:44] +[titan] 2025-10-05 14:00:18,376 - root - INFO - step: 25155 loss: 2.0122 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 14:00:18,376 - root - INFO - lr: 1.8881e-05 gnorm: 1.04 [15:26:07< 9:06:32] +[titan] 2025-10-05 14:00:29,255 - root - INFO - step: 25160 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 14:00:29,255 - root - INFO - lr: 1.8873e-05 gnorm: 1.08 [15:26:18< 9:06:21] +[titan] 2025-10-05 14:00:40,131 - root - INFO - step: 25165 loss: 2.0645 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 14:00:40,132 - root - INFO - lr: 1.8864e-05 gnorm: 1.09 [15:26:29< 9:06:10] +[titan] 2025-10-05 14:00:51,071 - root - INFO - step: 25170 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8358 +[titan] 2025-10-05 14:00:51,071 - root - INFO - lr: 1.8856e-05 gnorm: 1.06 [15:26:40< 9:05:59] +[titan] 2025-10-05 14:01:01,932 - root - INFO - step: 25175 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 14:01:01,932 - root - INFO - lr: 1.8848e-05 gnorm: 1.09 [15:26:51< 9:05:48] +[titan] 2025-10-05 14:01:12,823 - root - INFO - step: 25180 loss: 2.0514 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8133 +[titan] 2025-10-05 14:01:12,824 - root - INFO - lr: 1.8840e-05 gnorm: 1.08 [15:27:02< 9:05:37] +[titan] 2025-10-05 14:01:23,713 - root - INFO - step: 25185 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8457 +[titan] 2025-10-05 14:01:23,713 - root - INFO - lr: 1.8831e-05 gnorm: 1.04 [15:27:13< 9:05:26] +[titan] 2025-10-05 14:01:34,565 - root - INFO - step: 25190 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8554 +[titan] 2025-10-05 14:01:34,565 - root - INFO - lr: 1.8823e-05 gnorm: 1.08 [15:27:24< 9:05:14] +[titan] 2025-10-05 14:01:45,489 - root - INFO - step: 25195 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 14:01:45,489 - root - INFO - lr: 1.8815e-05 gnorm: 1.10 [15:27:35< 9:05:03] +[titan] 2025-10-05 14:01:54,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:01:56,348 - root - INFO - step: 25200 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8821 +[titan] 2025-10-05 14:01:56,348 - root - INFO - lr: 1.8807e-05 gnorm: 1.13 [15:27:45< 9:04:52] +[titan] 2025-10-05 14:02:07,198 - root - INFO - step: 25205 loss: 2.0344 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:02:07,199 - root - INFO - lr: 1.8798e-05 gnorm: 1.06 [15:27:56< 9:04:41] +[titan] 2025-10-05 14:02:18,072 - root - INFO - step: 25210 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 14:02:18,073 - root - INFO - lr: 1.8790e-05 gnorm: 1.09 [15:28:07< 9:04:30] +[titan] 2025-10-05 14:02:28,950 - root - INFO - step: 25215 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:02:28,951 - root - INFO - lr: 1.8782e-05 gnorm: 1.11 [15:28:18< 9:04:19] +[titan] 2025-10-05 14:02:39,828 - root - INFO - step: 25220 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 14:02:39,828 - root - INFO - lr: 1.8774e-05 gnorm: 1.10 [15:28:29< 9:04:08] +[titan] 2025-10-05 14:02:50,798 - root - INFO - step: 25225 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 14:02:50,798 - root - INFO - lr: 1.8765e-05 gnorm: 1.10 [15:28:40< 9:03:56] +[titan] 2025-10-05 14:03:01,706 - root - INFO - step: 25230 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 14:03:01,706 - root - INFO - lr: 1.8757e-05 gnorm: 1.07 [15:28:51< 9:03:45] +[titan] 2025-10-05 14:03:12,597 - root - INFO - step: 25235 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 14:03:12,597 - root - INFO - lr: 1.8749e-05 gnorm: 1.08 [15:29:02< 9:03:34] +[titan] 2025-10-05 14:03:23,476 - root - INFO - step: 25240 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:03:23,477 - root - INFO - lr: 1.8741e-05 gnorm: 1.05 [15:29:12< 9:03:23] +[titan] 2025-10-05 14:03:34,394 - root - INFO - step: 25245 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 14:03:34,395 - root - INFO - lr: 1.8733e-05 gnorm: 1.06 [15:29:23< 9:03:12] +[titan] 2025-10-05 14:03:43,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:03:45,291 - root - INFO - step: 25250 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7908 +[titan] 2025-10-05 14:03:45,292 - root - INFO - lr: 1.8724e-05 gnorm: 1.08 [15:29:34< 9:03:01] +[titan] 2025-10-05 14:03:56,215 - root - INFO - step: 25255 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8068 +[titan] 2025-10-05 14:03:56,215 - root - INFO - lr: 1.8716e-05 gnorm: 1.07 [15:29:45< 9:02:50] +[titan] 2025-10-05 14:04:07,069 - root - INFO - step: 25260 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7296 +[titan] 2025-10-05 14:04:07,070 - root - INFO - lr: 1.8708e-05 gnorm: 1.09 [15:29:56< 9:02:39] +[titan] 2025-10-05 14:04:17,929 - root - INFO - step: 25265 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 14:04:17,929 - root - INFO - lr: 1.8700e-05 gnorm: 1.05 [15:30:07< 9:02:27] +[titan] 2025-10-05 14:04:28,778 - root - INFO - step: 25270 loss: 2.0659 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8256 +[titan] 2025-10-05 14:04:28,778 - root - INFO - lr: 1.8692e-05 gnorm: 1.05 [15:30:18< 9:02:16] +[titan] 2025-10-05 14:04:39,663 - root - INFO - step: 25275 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:04:39,663 - root - INFO - lr: 1.8683e-05 gnorm: 1.10 [15:30:29< 9:02:05] +[titan] 2025-10-05 14:04:50,619 - root - INFO - step: 25280 loss: 2.0423 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.93 mfu: 41.95% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 14:04:50,620 - root - INFO - lr: 1.8675e-05 gnorm: 1.10 [15:30:40< 9:01:54] +[titan] 2025-10-05 14:05:01,490 - root - INFO - step: 25285 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 14:05:01,490 - root - INFO - lr: 1.8667e-05 gnorm: 1.07 [15:30:50< 9:01:43] +[titan] 2025-10-05 14:05:12,363 - root - INFO - step: 25290 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 14:05:12,363 - root - INFO - lr: 1.8659e-05 gnorm: 1.08 [15:31:01< 9:01:32] +[titan] 2025-10-05 14:05:23,239 - root - INFO - step: 25295 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 14:05:23,239 - root - INFO - lr: 1.8650e-05 gnorm: 1.12 [15:31:12< 9:01:21] +[titan] 2025-10-05 14:05:31,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:05:34,099 - root - INFO - step: 25300 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:05:34,100 - root - INFO - lr: 1.8642e-05 gnorm: 1.10 [15:31:23< 9:01:09] +[titan] 2025-10-05 14:05:44,978 - root - INFO - step: 25305 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 14:05:44,978 - root - INFO - lr: 1.8634e-05 gnorm: 1.07 [15:31:34< 9:00:58] +[titan] 2025-10-05 14:05:55,924 - root - INFO - step: 25310 loss: 2.0792 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 41.99% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8356 +[titan] 2025-10-05 14:05:55,924 - root - INFO - lr: 1.8626e-05 gnorm: 1.11 [15:31:45< 9:00:47] +[titan] 2025-10-05 14:06:06,777 - root - INFO - step: 25315 loss: 2.0737 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8330 +[titan] 2025-10-05 14:06:06,777 - root - INFO - lr: 1.8618e-05 gnorm: 1.08 [15:31:56< 9:00:36] +[titan] 2025-10-05 14:06:17,654 - root - INFO - step: 25320 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 14:06:17,654 - root - INFO - lr: 1.8609e-05 gnorm: 1.06 [15:32:07< 9:00:25] +[titan] 2025-10-05 14:06:28,537 - root - INFO - step: 25325 loss: 2.1056 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 14:06:28,537 - root - INFO - lr: 1.8601e-05 gnorm: 1.08 [15:32:18< 9:00:14] +[titan] 2025-10-05 14:06:39,411 - root - INFO - step: 25330 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:06:39,411 - root - INFO - lr: 1.8593e-05 gnorm: 1.11 [15:32:28< 9:00:03] +[titan] 2025-10-05 14:06:50,340 - root - INFO - step: 25335 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:06:50,341 - root - INFO - lr: 1.8585e-05 gnorm: 1.10 [15:32:39< 8:59:51] +[titan] 2025-10-05 14:07:01,212 - root - INFO - step: 25340 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 14:07:01,212 - root - INFO - lr: 1.8577e-05 gnorm: 1.08 [15:32:50< 8:59:40] +[titan] 2025-10-05 14:07:12,114 - root - INFO - step: 25345 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 14:07:12,114 - root - INFO - lr: 1.8568e-05 gnorm: 1.06 [15:33:01< 8:59:29] +[titan] 2025-10-05 14:07:20,807 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:07:22,994 - root - INFO - step: 25350 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 14:07:22,994 - root - INFO - lr: 1.8560e-05 gnorm: 1.06 [15:33:12< 8:59:18] +[titan] 2025-10-05 14:07:33,878 - root - INFO - step: 25355 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8658 +[titan] 2025-10-05 14:07:33,878 - root - INFO - lr: 1.8552e-05 gnorm: 1.11 [15:33:23< 8:59:07] +[titan] 2025-10-05 14:07:44,774 - root - INFO - step: 25360 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 14:07:44,774 - root - INFO - lr: 1.8544e-05 gnorm: 1.08 [15:33:34< 8:58:56] +[titan] 2025-10-05 14:07:55,691 - root - INFO - step: 25365 loss: 2.0709 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:07:55,691 - root - INFO - lr: 1.8536e-05 gnorm: 1.08 [15:33:45< 8:58:45] +[titan] 2025-10-05 14:08:06,574 - root - INFO - step: 25370 loss: 2.0036 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 14:08:06,574 - root - INFO - lr: 1.8528e-05 gnorm: 1.08 [15:33:56< 8:58:34] +[titan] 2025-10-05 14:08:17,490 - root - INFO - step: 25375 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 14:08:17,490 - root - INFO - lr: 1.8519e-05 gnorm: 1.13 [15:34:06< 8:58:22] +[titan] 2025-10-05 14:08:28,356 - root - INFO - step: 25380 loss: 2.1491 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 14:08:28,357 - root - INFO - lr: 1.8511e-05 gnorm: 1.09 [15:34:17< 8:58:11] +[titan] 2025-10-05 14:08:39,210 - root - INFO - step: 25385 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:08:39,210 - root - INFO - lr: 1.8503e-05 gnorm: 1.09 [15:34:28< 8:58:00] +[titan] 2025-10-05 14:08:50,100 - root - INFO - step: 25390 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 14:08:50,100 - root - INFO - lr: 1.8495e-05 gnorm: 1.11 [15:34:39< 8:57:49] +[titan] 2025-10-05 14:09:00,958 - root - INFO - step: 25395 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 14:09:00,959 - root - INFO - lr: 1.8487e-05 gnorm: 1.09 [15:34:50< 8:57:38] +[titan] 2025-10-05 14:09:09,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:09:11,824 - root - INFO - step: 25400 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:09:11,824 - root - INFO - lr: 1.8478e-05 gnorm: 1.09 [15:35:01< 8:57:27] +[titan] 2025-10-05 14:09:22,722 - root - INFO - step: 25405 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 14:09:22,722 - root - INFO - lr: 1.8470e-05 gnorm: 1.06 [15:35:12< 8:57:16] +[titan] 2025-10-05 14:09:33,582 - root - INFO - step: 25410 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 14:09:33,583 - root - INFO - lr: 1.8462e-05 gnorm: 1.07 [15:35:23< 8:57:04] +[titan] 2025-10-05 14:09:44,445 - root - INFO - step: 25415 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 14:09:44,445 - root - INFO - lr: 1.8454e-05 gnorm: 1.07 [15:35:33< 8:56:53] +[titan] 2025-10-05 14:09:55,342 - root - INFO - step: 25420 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8650 +[titan] 2025-10-05 14:09:55,342 - root - INFO - lr: 1.8446e-05 gnorm: 1.08 [15:35:44< 8:56:42] +[titan] 2025-10-05 14:10:06,229 - root - INFO - step: 25425 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 14:10:06,229 - root - INFO - lr: 1.8438e-05 gnorm: 1.09 [15:35:55< 8:56:31] +[titan] 2025-10-05 14:10:17,110 - root - INFO - step: 25430 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 14:10:17,110 - root - INFO - lr: 1.8429e-05 gnorm: 1.09 [15:36:06< 8:56:20] +[titan] 2025-10-05 14:10:28,014 - root - INFO - step: 25435 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 14:10:28,014 - root - INFO - lr: 1.8421e-05 gnorm: 1.05 [15:36:17< 8:56:09] +[titan] 2025-10-05 14:10:38,939 - root - INFO - step: 25440 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8011 +[titan] 2025-10-05 14:10:38,939 - root - INFO - lr: 1.8413e-05 gnorm: 1.10 [15:36:28< 8:55:58] +[titan] 2025-10-05 14:10:49,824 - root - INFO - step: 25445 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 14:10:49,825 - root - INFO - lr: 1.8405e-05 gnorm: 1.08 [15:36:39< 8:55:47] +[titan] 2025-10-05 14:10:58,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:11:00,730 - root - INFO - step: 25450 loss: 2.0470 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 14:11:00,730 - root - INFO - lr: 1.8397e-05 gnorm: 1.07 [15:36:50< 8:55:35] +[titan] 2025-10-05 14:11:11,607 - root - INFO - step: 25455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 14:11:11,607 - root - INFO - lr: 1.8389e-05 gnorm: 1.07 [15:37:01< 8:55:24] +[titan] 2025-10-05 14:11:22,482 - root - INFO - step: 25460 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 14:11:22,482 - root - INFO - lr: 1.8380e-05 gnorm: 1.10 [15:37:11< 8:55:13] +[titan] 2025-10-05 14:11:33,348 - root - INFO - step: 25465 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:11:33,348 - root - INFO - lr: 1.8372e-05 gnorm: 1.09 [15:37:22< 8:55:02] +[titan] 2025-10-05 14:11:44,248 - root - INFO - step: 25470 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 14:11:44,248 - root - INFO - lr: 1.8364e-05 gnorm: 1.09 [15:37:33< 8:54:51] +[titan] 2025-10-05 14:11:55,157 - root - INFO - step: 25475 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 14:11:55,157 - root - INFO - lr: 1.8356e-05 gnorm: 1.09 [15:37:44< 8:54:40] +[titan] 2025-10-05 14:12:06,026 - root - INFO - step: 25480 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 14:12:06,027 - root - INFO - lr: 1.8348e-05 gnorm: 1.07 [15:37:55< 8:54:29] +[titan] 2025-10-05 14:12:16,908 - root - INFO - step: 25485 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:12:16,909 - root - INFO - lr: 1.8340e-05 gnorm: 1.10 [15:38:06< 8:54:17] +[titan] 2025-10-05 14:12:27,776 - root - INFO - step: 25490 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7677 +[titan] 2025-10-05 14:12:27,776 - root - INFO - lr: 1.8332e-05 gnorm: 1.09 [15:38:17< 8:54:06] +[titan] 2025-10-05 14:12:38,651 - root - INFO - step: 25495 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 14:12:38,651 - root - INFO - lr: 1.8323e-05 gnorm: 1.08 [15:38:28< 8:53:55] +[titan] 2025-10-05 14:12:47,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:12:49,537 - root - INFO - step: 25500 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7501 +[titan] 2025-10-05 14:12:49,537 - root - INFO - lr: 1.8315e-05 gnorm: 1.13 [15:38:39< 8:53:44] +[titan] 2025-10-05 14:13:00,470 - root - INFO - step: 25505 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:13:00,470 - root - INFO - lr: 1.8307e-05 gnorm: 1.08 [15:38:49< 8:53:33] +[titan] 2025-10-05 14:13:11,338 - root - INFO - step: 25510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 14:13:11,338 - root - INFO - lr: 1.8299e-05 gnorm: 1.11 [15:39:00< 8:53:22] +[titan] 2025-10-05 14:13:22,196 - root - INFO - step: 25515 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 14:13:22,197 - root - INFO - lr: 1.8291e-05 gnorm: 1.17 [15:39:11< 8:53:11] +[titan] 2025-10-05 14:13:33,046 - root - INFO - step: 25520 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 14:13:33,047 - root - INFO - lr: 1.8283e-05 gnorm: 1.07 [15:39:22< 8:52:59] +[titan] 2025-10-05 14:13:43,917 - root - INFO - step: 25525 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 14:13:43,917 - root - INFO - lr: 1.8275e-05 gnorm: 1.12 [15:39:33< 8:52:48] +[titan] 2025-10-05 14:13:54,888 - root - INFO - step: 25530 loss: 2.1016 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 14:13:54,888 - root - INFO - lr: 1.8266e-05 gnorm: 1.14 [15:39:44< 8:52:37] +[titan] 2025-10-05 14:14:05,796 - root - INFO - step: 25535 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:14:05,796 - root - INFO - lr: 1.8258e-05 gnorm: 1.11 [15:39:55< 8:52:26] +[titan] 2025-10-05 14:14:16,658 - root - INFO - step: 25540 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 14:14:16,658 - root - INFO - lr: 1.8250e-05 gnorm: 1.12 [15:40:06< 8:52:15] +[titan] 2025-10-05 14:14:27,520 - root - INFO - step: 25545 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 14:14:27,521 - root - INFO - lr: 1.8242e-05 gnorm: 1.08 [15:40:17< 8:52:04] +[titan] 2025-10-05 14:14:36,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:14:38,398 - root - INFO - step: 25550 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 14:14:38,399 - root - INFO - lr: 1.8234e-05 gnorm: 1.07 [15:40:27< 8:51:53] +[titan] 2025-10-05 14:14:49,271 - root - INFO - step: 25555 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:14:49,271 - root - INFO - lr: 1.8226e-05 gnorm: 1.10 [15:40:38< 8:51:42] +[titan] 2025-10-05 14:15:00,189 - root - INFO - step: 25560 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:15:00,189 - root - INFO - lr: 1.8218e-05 gnorm: 1.05 [15:40:49< 8:51:30] +[titan] 2025-10-05 14:15:11,120 - root - INFO - step: 25565 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 14:15:11,121 - root - INFO - lr: 1.8209e-05 gnorm: 1.07 [15:41:00< 8:51:19] +[titan] 2025-10-05 14:15:21,997 - root - INFO - step: 25570 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 14:15:21,997 - root - INFO - lr: 1.8201e-05 gnorm: 1.56 [15:41:11< 8:51:08] +[titan] 2025-10-05 14:15:32,888 - root - INFO - step: 25575 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 14:15:32,888 - root - INFO - lr: 1.8193e-05 gnorm: 1.07 [15:41:22< 8:50:57] +[titan] 2025-10-05 14:15:43,769 - root - INFO - step: 25580 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8710 +[titan] 2025-10-05 14:15:43,769 - root - INFO - lr: 1.8185e-05 gnorm: 1.07 [15:41:33< 8:50:46] +[titan] 2025-10-05 14:15:54,652 - root - INFO - step: 25585 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 14:15:54,652 - root - INFO - lr: 1.8177e-05 gnorm: 1.05 [15:41:44< 8:50:35] +[titan] 2025-10-05 14:16:05,536 - root - INFO - step: 25590 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 14:16:05,536 - root - INFO - lr: 1.8169e-05 gnorm: 1.07 [15:41:55< 8:50:24] +[titan] 2025-10-05 14:16:16,420 - root - INFO - step: 25595 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:16:16,420 - root - INFO - lr: 1.8161e-05 gnorm: 1.09 [15:42:05< 8:50:13] +[titan] 2025-10-05 14:16:25,234 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:16:27,418 - root - INFO - step: 25600 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 29,795 tflops: 413.36 mfu: 41.80% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 14:16:27,418 - root - INFO - lr: 1.8153e-05 gnorm: 1.10 [15:42:16< 8:50:02] +[titan] 2025-10-05 14:16:27,593 - root - INFO - Dumping profiler traces at step 25600 +[titan] 2025-10-05 14:16:27,629 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:16:38,481 - root - INFO - step: 25605 loss: 2.0476 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:16:38,481 - root - INFO - lr: 1.8144e-05 gnorm: 1.11 [15:42:27< 8:49:50] +[titan] 2025-10-05 14:16:49,316 - root - INFO - step: 25610 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8107 +[titan] 2025-10-05 14:16:49,316 - root - INFO - lr: 1.8136e-05 gnorm: 1.06 [15:42:38< 8:49:39] +[titan] 2025-10-05 14:17:00,171 - root - INFO - step: 25615 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 14:17:00,172 - root - INFO - lr: 1.8128e-05 gnorm: 1.06 [15:42:49< 8:49:28] +[titan] 2025-10-05 14:17:11,028 - root - INFO - step: 25620 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 14:17:11,028 - root - INFO - lr: 1.8120e-05 gnorm: 1.08 [15:43:00< 8:49:17] +[titan] 2025-10-05 14:17:21,893 - root - INFO - step: 25625 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 14:17:21,894 - root - INFO - lr: 1.8112e-05 gnorm: 1.08 [15:43:11< 8:49:06] +[titan] 2025-10-05 14:17:32,791 - root - INFO - step: 25630 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8510 +[titan] 2025-10-05 14:17:32,791 - root - INFO - lr: 1.8104e-05 gnorm: 1.17 [15:43:22< 8:48:55] +[titan] 2025-10-05 14:17:43,645 - root - INFO - step: 25635 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 14:17:43,645 - root - INFO - lr: 1.8096e-05 gnorm: 1.09 [15:43:33< 8:48:44] +[titan] 2025-10-05 14:17:54,490 - root - INFO - step: 25640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:17:54,490 - root - INFO - lr: 1.8088e-05 gnorm: 1.07 [15:43:43< 8:48:32] +[titan] 2025-10-05 14:18:05,362 - root - INFO - step: 25645 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:18:05,362 - root - INFO - lr: 1.8080e-05 gnorm: 1.09 [15:43:54< 8:48:21] +[titan] 2025-10-05 14:18:14,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:18:16,215 - root - INFO - step: 25650 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 14:18:16,216 - root - INFO - lr: 1.8071e-05 gnorm: 1.09 [15:44:05< 8:48:10] +[titan] 2025-10-05 14:18:27,067 - root - INFO - step: 25655 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 14:18:27,068 - root - INFO - lr: 1.8063e-05 gnorm: 1.05 [15:44:16< 8:47:59] +[titan] 2025-10-05 14:18:37,921 - root - INFO - step: 25660 loss: 2.0284 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7927 +[titan] 2025-10-05 14:18:37,921 - root - INFO - lr: 1.8055e-05 gnorm: 1.09 [15:44:27< 8:47:48] +[titan] 2025-10-05 14:18:48,835 - root - INFO - step: 25665 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 14:18:48,835 - root - INFO - lr: 1.8047e-05 gnorm: 1.08 [15:44:38< 8:47:37] +[titan] 2025-10-05 14:18:59,735 - root - INFO - step: 25670 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:18:59,735 - root - INFO - lr: 1.8039e-05 gnorm: 1.11 [15:44:49< 8:47:26] +[titan] 2025-10-05 14:19:10,621 - root - INFO - step: 25675 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 14:19:10,621 - root - INFO - lr: 1.8031e-05 gnorm: 1.12 [15:45:00< 8:47:15] +[titan] 2025-10-05 14:19:21,506 - root - INFO - step: 25680 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8389 +[titan] 2025-10-05 14:19:21,506 - root - INFO - lr: 1.8023e-05 gnorm: 1.07 [15:45:10< 8:47:03] +[titan] 2025-10-05 14:19:32,375 - root - INFO - step: 25685 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 14:19:32,375 - root - INFO - lr: 1.8015e-05 gnorm: 1.07 [15:45:21< 8:46:52] +[titan] 2025-10-05 14:19:43,253 - root - INFO - step: 25690 loss: 1.9973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7661 +[titan] 2025-10-05 14:19:43,254 - root - INFO - lr: 1.8007e-05 gnorm: 1.09 [15:45:32< 8:46:41] +[titan] 2025-10-05 14:19:54,175 - root - INFO - step: 25695 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7719 +[titan] 2025-10-05 14:19:54,175 - root - INFO - lr: 1.7999e-05 gnorm: 1.09 [15:45:43< 8:46:30] +[titan] 2025-10-05 14:20:02,862 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:20:05,037 - root - INFO - step: 25700 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 14:20:05,037 - root - INFO - lr: 1.7991e-05 gnorm: 1.10 [15:45:54< 8:46:19] +[titan] 2025-10-05 14:20:15,889 - root - INFO - step: 25705 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9084 +[titan] 2025-10-05 14:20:15,889 - root - INFO - lr: 1.7982e-05 gnorm: 1.09 [15:46:05< 8:46:08] +[titan] 2025-10-05 14:20:26,754 - root - INFO - step: 25710 loss: 2.0748 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 14:20:26,754 - root - INFO - lr: 1.7974e-05 gnorm: 1.08 [15:46:16< 8:45:57] +[titan] 2025-10-05 14:20:37,621 - root - INFO - step: 25715 loss: 2.0337 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7984 +[titan] 2025-10-05 14:20:37,621 - root - INFO - lr: 1.7966e-05 gnorm: 1.06 [15:46:27< 8:45:45] +[titan] 2025-10-05 14:20:48,501 - root - INFO - step: 25720 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 14:20:48,501 - root - INFO - lr: 1.7958e-05 gnorm: 1.07 [15:46:37< 8:45:34] +[titan] 2025-10-05 14:20:59,442 - root - INFO - step: 25725 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8266 +[titan] 2025-10-05 14:20:59,442 - root - INFO - lr: 1.7950e-05 gnorm: 1.11 [15:46:48< 8:45:23] +[titan] 2025-10-05 14:21:10,316 - root - INFO - step: 25730 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8362 +[titan] 2025-10-05 14:21:10,316 - root - INFO - lr: 1.7942e-05 gnorm: 1.10 [15:46:59< 8:45:12] +[titan] 2025-10-05 14:21:21,179 - root - INFO - step: 25735 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 14:21:21,179 - root - INFO - lr: 1.7934e-05 gnorm: 1.11 [15:47:10< 8:45:01] +[titan] 2025-10-05 14:21:32,060 - root - INFO - step: 25740 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 14:21:32,060 - root - INFO - lr: 1.7926e-05 gnorm: 1.05 [15:47:21< 8:44:50] +[titan] 2025-10-05 14:21:42,940 - root - INFO - step: 25745 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 14:21:42,940 - root - INFO - lr: 1.7918e-05 gnorm: 1.13 [15:47:32< 8:44:39] +[titan] 2025-10-05 14:21:51,620 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:21:53,800 - root - INFO - step: 25750 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 14:21:53,800 - root - INFO - lr: 1.7910e-05 gnorm: 1.09 [15:47:43< 8:44:28] +[titan] 2025-10-05 14:22:04,676 - root - INFO - step: 25755 loss: 2.0272 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:22:04,676 - root - INFO - lr: 1.7902e-05 gnorm: 1.10 [15:47:54< 8:44:16] +[titan] 2025-10-05 14:22:15,594 - root - INFO - step: 25760 loss: 2.0342 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7988 +[titan] 2025-10-05 14:22:15,594 - root - INFO - lr: 1.7894e-05 gnorm: 1.07 [15:48:05< 8:44:05] +[titan] 2025-10-05 14:22:26,449 - root - INFO - step: 25765 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 14:22:26,449 - root - INFO - lr: 1.7885e-05 gnorm: 1.09 [15:48:15< 8:43:54] +[titan] 2025-10-05 14:22:37,310 - root - INFO - step: 25770 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 14:22:37,310 - root - INFO - lr: 1.7877e-05 gnorm: 1.05 [15:48:26< 8:43:43] +[titan] 2025-10-05 14:22:48,182 - root - INFO - step: 25775 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 14:22:48,182 - root - INFO - lr: 1.7869e-05 gnorm: 1.11 [15:48:37< 8:43:32] +[titan] 2025-10-05 14:22:59,049 - root - INFO - step: 25780 loss: 2.0127 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 14:22:59,049 - root - INFO - lr: 1.7861e-05 gnorm: 1.06 [15:48:48< 8:43:21] +[titan] 2025-10-05 14:23:09,928 - root - INFO - step: 25785 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 14:23:09,928 - root - INFO - lr: 1.7853e-05 gnorm: 1.04 [15:48:59< 8:43:10] +[titan] 2025-10-05 14:23:20,861 - root - INFO - step: 25790 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 14:23:20,861 - root - INFO - lr: 1.7845e-05 gnorm: 1.11 [15:49:10< 8:42:58] +[titan] 2025-10-05 14:23:31,734 - root - INFO - step: 25795 loss: 2.0316 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 14:23:31,734 - root - INFO - lr: 1.7837e-05 gnorm: 1.08 [15:49:21< 8:42:47] +[titan] 2025-10-05 14:23:40,434 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:23:42,620 - root - INFO - step: 25800 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.8738 +[titan] 2025-10-05 14:23:42,620 - root - INFO - lr: 1.7829e-05 gnorm: 2.05 [15:49:32< 8:42:36] +[titan] 2025-10-05 14:23:53,479 - root - INFO - step: 25805 loss: 2.0499 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8114 +[titan] 2025-10-05 14:23:53,479 - root - INFO - lr: 1.7821e-05 gnorm: 1.10 [15:49:42< 8:42:25] +[titan] 2025-10-05 14:24:04,354 - root - INFO - step: 25810 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8199 +[titan] 2025-10-05 14:24:04,354 - root - INFO - lr: 1.7813e-05 gnorm: 1.10 [15:49:53< 8:42:14] +[titan] 2025-10-05 14:24:15,228 - root - INFO - step: 25815 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:24:15,228 - root - INFO - lr: 1.7805e-05 gnorm: 1.07 [15:50:04< 8:42:03] +[titan] 2025-10-05 14:24:26,126 - root - INFO - step: 25820 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:24:26,126 - root - INFO - lr: 1.7797e-05 gnorm: 1.11 [15:50:15< 8:41:52] +[titan] 2025-10-05 14:24:37,054 - root - INFO - step: 25825 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 14:24:37,055 - root - INFO - lr: 1.7789e-05 gnorm: 1.09 [15:50:26< 8:41:41] +[titan] 2025-10-05 14:24:47,925 - root - INFO - step: 25830 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 14:24:47,925 - root - INFO - lr: 1.7781e-05 gnorm: 1.08 [15:50:37< 8:41:29] +[titan] 2025-10-05 14:24:58,795 - root - INFO - step: 25835 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 14:24:58,795 - root - INFO - lr: 1.7773e-05 gnorm: 1.15 [15:50:48< 8:41:18] +[titan] 2025-10-05 14:25:09,680 - root - INFO - step: 25840 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 14:25:09,680 - root - INFO - lr: 1.7765e-05 gnorm: 1.04 [15:50:59< 8:41:07] +[titan] 2025-10-05 14:25:20,542 - root - INFO - step: 25845 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7882 +[titan] 2025-10-05 14:25:20,543 - root - INFO - lr: 1.7757e-05 gnorm: 1.08 [15:51:09< 8:40:56] +[titan] 2025-10-05 14:25:29,240 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:25:31,434 - root - INFO - step: 25850 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8073 +[titan] 2025-10-05 14:25:31,434 - root - INFO - lr: 1.7749e-05 gnorm: 1.08 [15:51:20< 8:40:45] +[titan] 2025-10-05 14:25:42,355 - root - INFO - step: 25855 loss: 2.0565 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:25:42,356 - root - INFO - lr: 1.7740e-05 gnorm: 1.09 [15:51:31< 8:40:34] +[titan] 2025-10-05 14:25:53,227 - root - INFO - step: 25860 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 14:25:53,227 - root - INFO - lr: 1.7732e-05 gnorm: 1.11 [15:51:42< 8:40:23] +[titan] 2025-10-05 14:26:04,104 - root - INFO - step: 25865 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8184 +[titan] 2025-10-05 14:26:04,105 - root - INFO - lr: 1.7724e-05 gnorm: 1.11 [15:51:53< 8:40:12] +[titan] 2025-10-05 14:26:15,028 - root - INFO - step: 25870 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 14:26:15,028 - root - INFO - lr: 1.7716e-05 gnorm: 1.04 [15:52:04< 8:40:00] +[titan] 2025-10-05 14:26:25,939 - root - INFO - step: 25875 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8127 +[titan] 2025-10-05 14:26:25,939 - root - INFO - lr: 1.7708e-05 gnorm: 1.08 [15:52:15< 8:39:49] +[titan] 2025-10-05 14:26:36,815 - root - INFO - step: 25880 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:26:36,815 - root - INFO - lr: 1.7700e-05 gnorm: 1.07 [15:52:26< 8:39:38] +[titan] 2025-10-05 14:26:47,749 - root - INFO - step: 25885 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8378 +[titan] 2025-10-05 14:26:47,749 - root - INFO - lr: 1.7692e-05 gnorm: 1.10 [15:52:37< 8:39:27] +[titan] 2025-10-05 14:26:58,622 - root - INFO - step: 25890 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:26:58,622 - root - INFO - lr: 1.7684e-05 gnorm: 1.07 [15:52:48< 8:39:16] +[titan] 2025-10-05 14:27:09,541 - root - INFO - step: 25895 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7826 +[titan] 2025-10-05 14:27:09,541 - root - INFO - lr: 1.7676e-05 gnorm: 1.10 [15:52:58< 8:39:05] +[titan] 2025-10-05 14:27:18,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:27:20,420 - root - INFO - step: 25900 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 14:27:20,420 - root - INFO - lr: 1.7668e-05 gnorm: 1.08 [15:53:09< 8:38:54] +[titan] 2025-10-05 14:27:31,298 - root - INFO - step: 25905 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 14:27:31,299 - root - INFO - lr: 1.7660e-05 gnorm: 1.08 [15:53:20< 8:38:43] +[titan] 2025-10-05 14:27:42,163 - root - INFO - step: 25910 loss: 2.0892 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 14:27:42,163 - root - INFO - lr: 1.7652e-05 gnorm: 1.12 [15:53:31< 8:38:31] +[titan] 2025-10-05 14:27:53,040 - root - INFO - step: 25915 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 14:27:53,041 - root - INFO - lr: 1.7644e-05 gnorm: 1.09 [15:53:42< 8:38:20] +[titan] 2025-10-05 14:28:03,938 - root - INFO - step: 25920 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 14:28:03,938 - root - INFO - lr: 1.7636e-05 gnorm: 1.05 [15:53:53< 8:38:09] +[titan] 2025-10-05 14:28:14,994 - root - INFO - step: 25925 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.19 mfu: 41.58% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8390 +[titan] 2025-10-05 14:28:14,994 - root - INFO - lr: 1.7628e-05 gnorm: 1.11 [15:54:04< 8:37:58] +[titan] 2025-10-05 14:28:25,864 - root - INFO - step: 25930 loss: 2.0995 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 14:28:25,864 - root - INFO - lr: 1.7620e-05 gnorm: 1.09 [15:54:15< 8:37:47] +[titan] 2025-10-05 14:28:36,720 - root - INFO - step: 25935 loss: 2.0585 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 14:28:36,720 - root - INFO - lr: 1.7612e-05 gnorm: 1.12 [15:54:26< 8:37:36] +[titan] 2025-10-05 14:28:47,595 - root - INFO - step: 25940 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 14:28:47,596 - root - INFO - lr: 1.7604e-05 gnorm: 1.13 [15:54:37< 8:37:25] +[titan] 2025-10-05 14:28:58,468 - root - INFO - step: 25945 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7629 +[titan] 2025-10-05 14:28:58,469 - root - INFO - lr: 1.7596e-05 gnorm: 1.11 [15:54:47< 8:37:14] +[titan] 2025-10-05 14:29:07,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:29:09,436 - root - INFO - step: 25950 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 29,879 tflops: 414.52 mfu: 41.91% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:29:09,436 - root - INFO - lr: 1.7588e-05 gnorm: 1.14 [15:54:58< 8:37:03] +[titan] 2025-10-05 14:29:20,286 - root - INFO - step: 25955 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 14:29:20,286 - root - INFO - lr: 1.7580e-05 gnorm: 1.08 [15:55:09< 8:36:51] +[titan] 2025-10-05 14:29:31,140 - root - INFO - step: 25960 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:29:31,140 - root - INFO - lr: 1.7572e-05 gnorm: 1.08 [15:55:20< 8:36:40] +[titan] 2025-10-05 14:29:42,013 - root - INFO - step: 25965 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8359 +[titan] 2025-10-05 14:29:42,013 - root - INFO - lr: 1.7564e-05 gnorm: 1.10 [15:55:31< 8:36:29] +[titan] 2025-10-05 14:29:52,914 - root - INFO - step: 25970 loss: 2.1034 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 14:29:52,914 - root - INFO - lr: 1.7556e-05 gnorm: 1.06 [15:55:42< 8:36:18] +[titan] 2025-10-05 14:30:03,792 - root - INFO - step: 25975 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 14:30:03,792 - root - INFO - lr: 1.7548e-05 gnorm: 1.08 [15:55:53< 8:36:07] +[titan] 2025-10-05 14:30:14,715 - root - INFO - step: 25980 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8170 +[titan] 2025-10-05 14:30:14,715 - root - INFO - lr: 1.7540e-05 gnorm: 1.11 [15:56:04< 8:35:56] +[titan] 2025-10-05 14:30:25,638 - root - INFO - step: 25985 loss: 2.0484 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:30:25,638 - root - INFO - lr: 1.7532e-05 gnorm: 1.07 [15:56:15< 8:35:45] +[titan] 2025-10-05 14:30:36,501 - root - INFO - step: 25990 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:30:36,501 - root - INFO - lr: 1.7524e-05 gnorm: 1.10 [15:56:25< 8:35:34] +[titan] 2025-10-05 14:30:47,379 - root - INFO - step: 25995 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7746 +[titan] 2025-10-05 14:30:47,379 - root - INFO - lr: 1.7516e-05 gnorm: 1.07 [15:56:36< 8:35:22] +[titan] 2025-10-05 14:30:56,083 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:30:58,259 - root - INFO - step: 26000 loss: 2.0535 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8148 +[titan] 2025-10-05 14:30:58,259 - root - INFO - lr: 1.7508e-05 gnorm: 1.14 [15:56:47< 8:35:11] +[titan] 2025-10-05 14:31:09,132 - root - INFO - step: 26005 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 14:31:09,132 - root - INFO - lr: 1.7500e-05 gnorm: 1.09 [15:56:58< 8:35:00] +[titan] 2025-10-05 14:31:20,058 - root - INFO - step: 26010 loss: 2.0243 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 14:31:20,058 - root - INFO - lr: 1.7492e-05 gnorm: 1.12 [15:57:09< 8:34:49] +[titan] 2025-10-05 14:31:30,973 - root - INFO - step: 26015 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:31:30,974 - root - INFO - lr: 1.7484e-05 gnorm: 1.10 [15:57:20< 8:34:38] +[titan] 2025-10-05 14:31:41,835 - root - INFO - step: 26020 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8519 +[titan] 2025-10-05 14:31:41,835 - root - INFO - lr: 1.7476e-05 gnorm: 1.16 [15:57:31< 8:34:27] +[titan] 2025-10-05 14:31:52,692 - root - INFO - step: 26025 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 14:31:52,692 - root - INFO - lr: 1.7468e-05 gnorm: 1.07 [15:57:42< 8:34:16] +[titan] 2025-10-05 14:32:03,561 - root - INFO - step: 26030 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 14:32:03,561 - root - INFO - lr: 1.7460e-05 gnorm: 1.09 [15:57:52< 8:34:05] +[titan] 2025-10-05 14:32:14,505 - root - INFO - step: 26035 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:32:14,505 - root - INFO - lr: 1.7452e-05 gnorm: 1.10 [15:58:03< 8:33:54] +[titan] 2025-10-05 14:32:25,388 - root - INFO - step: 26040 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:32:25,388 - root - INFO - lr: 1.7444e-05 gnorm: 1.09 [15:58:14< 8:33:42] +[titan] 2025-10-05 14:32:36,316 - root - INFO - step: 26045 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8342 +[titan] 2025-10-05 14:32:36,316 - root - INFO - lr: 1.7436e-05 gnorm: 1.10 [15:58:25< 8:33:31] +[titan] 2025-10-05 14:32:45,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:32:47,196 - root - INFO - step: 26050 loss: 2.0388 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:32:47,196 - root - INFO - lr: 1.7428e-05 gnorm: 1.08 [15:58:36< 8:33:20] +[titan] 2025-10-05 14:32:58,069 - root - INFO - step: 26055 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8131 +[titan] 2025-10-05 14:32:58,069 - root - INFO - lr: 1.7420e-05 gnorm: 1.06 [15:58:47< 8:33:09] +[titan] 2025-10-05 14:33:08,972 - root - INFO - step: 26060 loss: 2.0150 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 14:33:08,972 - root - INFO - lr: 1.7412e-05 gnorm: 1.10 [15:58:58< 8:32:58] +[titan] 2025-10-05 14:33:19,953 - root - INFO - step: 26065 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:33:19,953 - root - INFO - lr: 1.7404e-05 gnorm: 1.12 [15:59:09< 8:32:47] +[titan] 2025-10-05 14:33:30,852 - root - INFO - step: 26070 loss: 2.0795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 14:33:30,852 - root - INFO - lr: 1.7396e-05 gnorm: 1.11 [15:59:20< 8:32:36] +[titan] 2025-10-05 14:33:41,755 - root - INFO - step: 26075 loss: 2.0764 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 14:33:41,755 - root - INFO - lr: 1.7388e-05 gnorm: 1.11 [15:59:31< 8:32:25] +[titan] 2025-10-05 14:33:52,678 - root - INFO - step: 26080 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 14:33:52,678 - root - INFO - lr: 1.7380e-05 gnorm: 1.08 [15:59:42< 8:32:14] +[titan] 2025-10-05 14:34:03,540 - root - INFO - step: 26085 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:34:03,540 - root - INFO - lr: 1.7372e-05 gnorm: 1.14 [15:59:52< 8:32:02] +[titan] 2025-10-05 14:34:14,468 - root - INFO - step: 26090 loss: 2.0497 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8118 +[titan] 2025-10-05 14:34:14,468 - root - INFO - lr: 1.7364e-05 gnorm: 1.11 [16:00:03< 8:31:51] +[titan] 2025-10-05 14:34:25,355 - root - INFO - step: 26095 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7776 +[titan] 2025-10-05 14:34:25,355 - root - INFO - lr: 1.7356e-05 gnorm: 1.07 [16:00:14< 8:31:40] +[titan] 2025-10-05 14:34:34,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:34:36,218 - root - INFO - step: 26100 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 14:34:36,218 - root - INFO - lr: 1.7348e-05 gnorm: 1.06 [16:00:25< 8:31:29] +[titan] 2025-10-05 14:34:47,106 - root - INFO - step: 26105 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 14:34:47,107 - root - INFO - lr: 1.7340e-05 gnorm: 1.06 [16:00:36< 8:31:18] +[titan] 2025-10-05 14:34:58,140 - root - INFO - step: 26110 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,699 tflops: 412.03 mfu: 41.66% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:34:58,140 - root - INFO - lr: 1.7332e-05 gnorm: 1.12 [16:00:47< 8:31:07] +[titan] 2025-10-05 14:35:02,672 - root - INFO - Dumping profiler traces at step 26112 +[titan] 2025-10-05 14:35:02,709 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:35:09,255 - root - INFO - step: 26115 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 29,482 tflops: 409.02 mfu: 41.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 14:35:09,255 - root - INFO - lr: 1.7324e-05 gnorm: 1.07 [16:00:58< 8:30:56] +[titan] 2025-10-05 14:35:20,180 - root - INFO - step: 26120 loss: 1.9396 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 14:35:20,180 - root - INFO - lr: 1.7316e-05 gnorm: 1.06 [16:01:09< 8:30:45] +[titan] 2025-10-05 14:35:31,056 - root - INFO - step: 26125 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 14:35:31,056 - root - INFO - lr: 1.7309e-05 gnorm: 1.12 [16:01:20< 8:30:34] +[titan] 2025-10-05 14:35:41,959 - root - INFO - step: 26130 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:35:41,959 - root - INFO - lr: 1.7301e-05 gnorm: 1.07 [16:01:31< 8:30:23] +[titan] 2025-10-05 14:35:52,846 - root - INFO - step: 26135 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:35:52,846 - root - INFO - lr: 1.7293e-05 gnorm: 1.10 [16:01:42< 8:30:11] +[titan] 2025-10-05 14:36:03,715 - root - INFO - step: 26140 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7686 +[titan] 2025-10-05 14:36:03,715 - root - INFO - lr: 1.7285e-05 gnorm: 1.07 [16:01:53< 8:30:00] +[titan] 2025-10-05 14:36:14,674 - root - INFO - step: 26145 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7630 +[titan] 2025-10-05 14:36:14,674 - root - INFO - lr: 1.7277e-05 gnorm: 1.05 [16:02:04< 8:29:49] +[titan] 2025-10-05 14:36:23,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:36:25,541 - root - INFO - step: 26150 loss: 2.1124 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 14:36:25,541 - root - INFO - lr: 1.7269e-05 gnorm: 1.13 [16:02:14< 8:29:38] +[titan] 2025-10-05 14:36:36,415 - root - INFO - step: 26155 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:36:36,415 - root - INFO - lr: 1.7261e-05 gnorm: 1.12 [16:02:25< 8:29:27] +[titan] 2025-10-05 14:36:47,302 - root - INFO - step: 26160 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 14:36:47,302 - root - INFO - lr: 1.7253e-05 gnorm: 1.08 [16:02:36< 8:29:16] +[titan] 2025-10-05 14:36:58,194 - root - INFO - step: 26165 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 14:36:58,194 - root - INFO - lr: 1.7245e-05 gnorm: 1.08 [16:02:47< 8:29:05] +[titan] 2025-10-05 14:37:09,092 - root - INFO - step: 26170 loss: 2.1112 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8651 +[titan] 2025-10-05 14:37:09,092 - root - INFO - lr: 1.7237e-05 gnorm: 1.11 [16:02:58< 8:28:54] +[titan] 2025-10-05 14:37:20,018 - root - INFO - step: 26175 loss: 2.0516 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 14:37:20,018 - root - INFO - lr: 1.7229e-05 gnorm: 1.08 [16:03:09< 8:28:42] +[titan] 2025-10-05 14:37:30,897 - root - INFO - step: 26180 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:37:30,897 - root - INFO - lr: 1.7221e-05 gnorm: 1.09 [16:03:20< 8:28:31] +[titan] 2025-10-05 14:37:41,783 - root - INFO - step: 26185 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 14:37:41,783 - root - INFO - lr: 1.7213e-05 gnorm: 1.08 [16:03:31< 8:28:20] +[titan] 2025-10-05 14:37:52,662 - root - INFO - step: 26190 loss: 1.9604 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7331 +[titan] 2025-10-05 14:37:52,662 - root - INFO - lr: 1.7205e-05 gnorm: 1.08 [16:03:42< 8:28:09] +[titan] 2025-10-05 14:38:03,547 - root - INFO - step: 26195 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 14:38:03,548 - root - INFO - lr: 1.7197e-05 gnorm: 1.08 [16:03:52< 8:27:58] +[titan] 2025-10-05 14:38:12,253 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:38:14,439 - root - INFO - step: 26200 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8117 +[titan] 2025-10-05 14:38:14,440 - root - INFO - lr: 1.7189e-05 gnorm: 1.06 [16:04:03< 8:27:47] +[titan] 2025-10-05 14:38:25,405 - root - INFO - step: 26205 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:38:25,405 - root - INFO - lr: 1.7181e-05 gnorm: 1.10 [16:04:14< 8:27:36] +[titan] 2025-10-05 14:38:36,296 - root - INFO - step: 26210 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:38:36,296 - root - INFO - lr: 1.7173e-05 gnorm: 1.08 [16:04:25< 8:27:25] +[titan] 2025-10-05 14:38:47,187 - root - INFO - step: 26215 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 14:38:47,187 - root - INFO - lr: 1.7166e-05 gnorm: 1.13 [16:04:36< 8:27:14] +[titan] 2025-10-05 14:38:58,082 - root - INFO - step: 26220 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:38:58,082 - root - INFO - lr: 1.7158e-05 gnorm: 1.07 [16:04:47< 8:27:02] +[titan] 2025-10-05 14:39:08,974 - root - INFO - step: 26225 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7872 +[titan] 2025-10-05 14:39:08,975 - root - INFO - lr: 1.7150e-05 gnorm: 1.06 [16:04:58< 8:26:51] +[titan] 2025-10-05 14:39:19,875 - root - INFO - step: 26230 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 14:39:19,875 - root - INFO - lr: 1.7142e-05 gnorm: 1.07 [16:05:09< 8:26:40] +[titan] 2025-10-05 14:39:30,758 - root - INFO - step: 26235 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 14:39:30,759 - root - INFO - lr: 1.7134e-05 gnorm: 1.07 [16:05:20< 8:26:29] +[titan] 2025-10-05 14:39:41,666 - root - INFO - step: 26240 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7444 +[titan] 2025-10-05 14:39:41,666 - root - INFO - lr: 1.7126e-05 gnorm: 1.05 [16:05:31< 8:26:18] +[titan] 2025-10-05 14:39:52,544 - root - INFO - step: 26245 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 14:39:52,544 - root - INFO - lr: 1.7118e-05 gnorm: 1.08 [16:05:41< 8:26:07] +[titan] 2025-10-05 14:40:01,233 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:40:03,416 - root - INFO - step: 26250 loss: 2.0445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:40:03,416 - root - INFO - lr: 1.7110e-05 gnorm: 1.05 [16:05:52< 8:25:56] +[titan] 2025-10-05 14:40:14,284 - root - INFO - step: 26255 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9243 +[titan] 2025-10-05 14:40:14,284 - root - INFO - lr: 1.7102e-05 gnorm: 1.15 [16:06:03< 8:25:45] +[titan] 2025-10-05 14:40:25,201 - root - INFO - step: 26260 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 14:40:25,201 - root - INFO - lr: 1.7094e-05 gnorm: 1.31 [16:06:14< 8:25:34] +[titan] 2025-10-05 14:40:36,067 - root - INFO - step: 26265 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 14:40:36,067 - root - INFO - lr: 1.7086e-05 gnorm: 1.07 [16:06:25< 8:25:22] +[titan] 2025-10-05 14:40:46,986 - root - INFO - step: 26270 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 14:40:46,986 - root - INFO - lr: 1.7078e-05 gnorm: 1.12 [16:06:36< 8:25:11] +[titan] 2025-10-05 14:40:57,856 - root - INFO - step: 26275 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 14:40:57,856 - root - INFO - lr: 1.7071e-05 gnorm: 1.05 [16:06:47< 8:25:00] +[titan] 2025-10-05 14:41:08,711 - root - INFO - step: 26280 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8421 +[titan] 2025-10-05 14:41:08,711 - root - INFO - lr: 1.7063e-05 gnorm: 1.08 [16:06:58< 8:24:49] +[titan] 2025-10-05 14:41:19,613 - root - INFO - step: 26285 loss: 2.0172 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 14:41:19,613 - root - INFO - lr: 1.7055e-05 gnorm: 1.10 [16:07:09< 8:24:38] +[titan] 2025-10-05 14:41:30,475 - root - INFO - step: 26290 loss: 2.0509 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 14:41:30,475 - root - INFO - lr: 1.7047e-05 gnorm: 1.10 [16:07:19< 8:24:27] +[titan] 2025-10-05 14:41:41,346 - root - INFO - step: 26295 loss: 2.0334 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7973 +[titan] 2025-10-05 14:41:41,347 - root - INFO - lr: 1.7039e-05 gnorm: 1.04 [16:07:30< 8:24:16] +[titan] 2025-10-05 14:41:50,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:41:52,244 - root - INFO - step: 26300 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 14:41:52,245 - root - INFO - lr: 1.7031e-05 gnorm: 1.10 [16:07:41< 8:24:05] +[titan] 2025-10-05 14:42:03,172 - root - INFO - step: 26305 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 14:42:03,172 - root - INFO - lr: 1.7023e-05 gnorm: 1.10 [16:07:52< 8:23:53] +[titan] 2025-10-05 14:42:14,032 - root - INFO - step: 26310 loss: 2.0276 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:42:14,032 - root - INFO - lr: 1.7015e-05 gnorm: 1.10 [16:08:03< 8:23:42] +[titan] 2025-10-05 14:42:24,897 - root - INFO - step: 26315 loss: 2.0611 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:42:24,897 - root - INFO - lr: 1.7007e-05 gnorm: 1.05 [16:08:14< 8:23:31] +[titan] 2025-10-05 14:42:35,737 - root - INFO - step: 26320 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 14:42:35,737 - root - INFO - lr: 1.6999e-05 gnorm: 1.08 [16:08:25< 8:23:20] +[titan] 2025-10-05 14:42:46,593 - root - INFO - step: 26325 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8364 +[titan] 2025-10-05 14:42:46,593 - root - INFO - lr: 1.6992e-05 gnorm: 1.11 [16:08:36< 8:23:09] +[titan] 2025-10-05 14:42:57,467 - root - INFO - step: 26330 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 14:42:57,467 - root - INFO - lr: 1.6984e-05 gnorm: 1.05 [16:08:46< 8:22:58] +[titan] 2025-10-05 14:43:08,377 - root - INFO - step: 26335 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7862 +[titan] 2025-10-05 14:43:08,377 - root - INFO - lr: 1.6976e-05 gnorm: 1.10 [16:08:57< 8:22:47] +[titan] 2025-10-05 14:43:19,276 - root - INFO - step: 26340 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:43:19,276 - root - INFO - lr: 1.6968e-05 gnorm: 1.09 [16:09:08< 8:22:36] +[titan] 2025-10-05 14:43:30,150 - root - INFO - step: 26345 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 14:43:30,150 - root - INFO - lr: 1.6960e-05 gnorm: 1.09 [16:09:19< 8:22:24] +[titan] 2025-10-05 14:43:38,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:43:41,032 - root - INFO - step: 26350 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 14:43:41,033 - root - INFO - lr: 1.6952e-05 gnorm: 1.12 [16:09:30< 8:22:13] +[titan] 2025-10-05 14:43:51,910 - root - INFO - step: 26355 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 14:43:51,911 - root - INFO - lr: 1.6944e-05 gnorm: 1.10 [16:09:41< 8:22:02] +[titan] 2025-10-05 14:44:02,775 - root - INFO - step: 26360 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 14:44:02,776 - root - INFO - lr: 1.6936e-05 gnorm: 1.11 [16:09:52< 8:21:51] +[titan] 2025-10-05 14:44:13,677 - root - INFO - step: 26365 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 14:44:13,678 - root - INFO - lr: 1.6928e-05 gnorm: 1.09 [16:10:03< 8:21:40] +[titan] 2025-10-05 14:44:24,544 - root - INFO - step: 26370 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 14:44:24,544 - root - INFO - lr: 1.6921e-05 gnorm: 1.08 [16:10:13< 8:21:29] +[titan] 2025-10-05 14:44:35,405 - root - INFO - step: 26375 loss: 2.0563 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8183 +[titan] 2025-10-05 14:44:35,405 - root - INFO - lr: 1.6913e-05 gnorm: 1.09 [16:10:24< 8:21:18] +[titan] 2025-10-05 14:44:46,277 - root - INFO - step: 26380 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 14:44:46,277 - root - INFO - lr: 1.6905e-05 gnorm: 1.10 [16:10:35< 8:21:07] +[titan] 2025-10-05 14:44:57,156 - root - INFO - step: 26385 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 14:44:57,156 - root - INFO - lr: 1.6897e-05 gnorm: 1.09 [16:10:46< 8:20:55] +[titan] 2025-10-05 14:45:07,991 - root - INFO - step: 26390 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:45:07,991 - root - INFO - lr: 1.6889e-05 gnorm: 1.09 [16:10:57< 8:20:44] +[titan] 2025-10-05 14:45:18,850 - root - INFO - step: 26395 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:45:18,850 - root - INFO - lr: 1.6881e-05 gnorm: 1.09 [16:11:08< 8:20:33] +[titan] 2025-10-05 14:45:27,579 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:45:29,758 - root - INFO - step: 26400 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 14:45:29,758 - root - INFO - lr: 1.6873e-05 gnorm: 1.09 [16:11:19< 8:20:22] +[titan] 2025-10-05 14:45:40,628 - root - INFO - step: 26405 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 14:45:40,628 - root - INFO - lr: 1.6865e-05 gnorm: 1.09 [16:11:30< 8:20:11] +[titan] 2025-10-05 14:45:51,472 - root - INFO - step: 26410 loss: 2.0493 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 14:45:51,472 - root - INFO - lr: 1.6858e-05 gnorm: 1.09 [16:11:40< 8:20:00] +[titan] 2025-10-05 14:46:02,329 - root - INFO - step: 26415 loss: 2.0718 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8312 +[titan] 2025-10-05 14:46:02,329 - root - INFO - lr: 1.6850e-05 gnorm: 1.09 [16:11:51< 8:19:49] +[titan] 2025-10-05 14:46:13,208 - root - INFO - step: 26420 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 14:46:13,208 - root - INFO - lr: 1.6842e-05 gnorm: 1.09 [16:12:02< 8:19:38] +[titan] 2025-10-05 14:46:24,077 - root - INFO - step: 26425 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:46:24,077 - root - INFO - lr: 1.6834e-05 gnorm: 1.09 [16:12:13< 8:19:26] +[titan] 2025-10-05 14:46:34,964 - root - INFO - step: 26430 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8192 +[titan] 2025-10-05 14:46:34,964 - root - INFO - lr: 1.6826e-05 gnorm: 1.08 [16:12:24< 8:19:15] +[titan] 2025-10-05 14:46:45,809 - root - INFO - step: 26435 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 14:46:45,810 - root - INFO - lr: 1.6818e-05 gnorm: 1.08 [16:12:35< 8:19:04] +[titan] 2025-10-05 14:46:56,653 - root - INFO - step: 26440 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 14:46:56,654 - root - INFO - lr: 1.6810e-05 gnorm: 1.07 [16:12:46< 8:18:53] +[titan] 2025-10-05 14:47:07,510 - root - INFO - step: 26445 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 14:47:07,511 - root - INFO - lr: 1.6803e-05 gnorm: 1.09 [16:12:56< 8:18:42] +[titan] 2025-10-05 14:47:16,211 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:47:18,390 - root - INFO - step: 26450 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:47:18,390 - root - INFO - lr: 1.6795e-05 gnorm: 1.07 [16:13:07< 8:18:31] +[titan] 2025-10-05 14:47:29,255 - root - INFO - step: 26455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:47:29,255 - root - INFO - lr: 1.6787e-05 gnorm: 1.10 [16:13:18< 8:18:20] +[titan] 2025-10-05 14:47:40,123 - root - INFO - step: 26460 loss: 2.0742 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 14:47:40,123 - root - INFO - lr: 1.6779e-05 gnorm: 1.14 [16:13:29< 8:18:09] +[titan] 2025-10-05 14:47:51,023 - root - INFO - step: 26465 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 14:47:51,023 - root - INFO - lr: 1.6771e-05 gnorm: 1.10 [16:13:40< 8:17:57] +[titan] 2025-10-05 14:48:01,888 - root - INFO - step: 26470 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 14:48:01,888 - root - INFO - lr: 1.6763e-05 gnorm: 1.05 [16:13:51< 8:17:46] +[titan] 2025-10-05 14:48:12,750 - root - INFO - step: 26475 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:48:12,750 - root - INFO - lr: 1.6756e-05 gnorm: 1.10 [16:14:02< 8:17:35] +[titan] 2025-10-05 14:48:23,596 - root - INFO - step: 26480 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 14:48:23,597 - root - INFO - lr: 1.6748e-05 gnorm: 1.05 [16:14:12< 8:17:24] +[titan] 2025-10-05 14:48:34,475 - root - INFO - step: 26485 loss: 2.0429 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:48:34,475 - root - INFO - lr: 1.6740e-05 gnorm: 1.11 [16:14:23< 8:17:13] +[titan] 2025-10-05 14:48:45,347 - root - INFO - step: 26490 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 14:48:45,348 - root - INFO - lr: 1.6732e-05 gnorm: 1.11 [16:14:34< 8:17:02] +[titan] 2025-10-05 14:48:56,251 - root - INFO - step: 26495 loss: 2.1088 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 14:48:56,252 - root - INFO - lr: 1.6724e-05 gnorm: 1.15 [16:14:45< 8:16:51] +[titan] 2025-10-05 14:49:04,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:49:07,097 - root - INFO - step: 26500 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8508 +[titan] 2025-10-05 14:49:07,097 - root - INFO - lr: 1.6716e-05 gnorm: 1.14 [16:14:56< 8:16:40] +[titan] 2025-10-05 14:49:17,975 - root - INFO - step: 26505 loss: 2.0105 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 14:49:17,976 - root - INFO - lr: 1.6709e-05 gnorm: 1.12 [16:15:07< 8:16:28] +[titan] 2025-10-05 14:49:28,870 - root - INFO - step: 26510 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7377 +[titan] 2025-10-05 14:49:28,870 - root - INFO - lr: 1.6701e-05 gnorm: 1.05 [16:15:18< 8:16:17] +[titan] 2025-10-05 14:49:39,744 - root - INFO - step: 26515 loss: 2.0774 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 14:49:39,744 - root - INFO - lr: 1.6693e-05 gnorm: 1.14 [16:15:29< 8:16:06] +[titan] 2025-10-05 14:49:50,606 - root - INFO - step: 26520 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:49:50,606 - root - INFO - lr: 1.6685e-05 gnorm: 1.11 [16:15:39< 8:15:55] +[titan] 2025-10-05 14:50:01,497 - root - INFO - step: 26525 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:50:01,497 - root - INFO - lr: 1.6677e-05 gnorm: 1.07 [16:15:50< 8:15:44] +[titan] 2025-10-05 14:50:12,351 - root - INFO - step: 26530 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:50:12,351 - root - INFO - lr: 1.6669e-05 gnorm: 1.11 [16:16:01< 8:15:33] +[titan] 2025-10-05 14:50:23,197 - root - INFO - step: 26535 loss: 2.0146 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 14:50:23,197 - root - INFO - lr: 1.6662e-05 gnorm: 1.28 [16:16:12< 8:15:22] +[titan] 2025-10-05 14:50:34,070 - root - INFO - step: 26540 loss: 2.0363 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 14:50:34,070 - root - INFO - lr: 1.6654e-05 gnorm: 1.09 [16:16:23< 8:15:11] +[titan] 2025-10-05 14:50:44,935 - root - INFO - step: 26545 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 14:50:44,935 - root - INFO - lr: 1.6646e-05 gnorm: 1.07 [16:16:34< 8:15:00] +[titan] 2025-10-05 14:50:53,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:50:55,778 - root - INFO - step: 26550 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 14:50:55,778 - root - INFO - lr: 1.6638e-05 gnorm: 1.07 [16:16:45< 8:14:48] +[titan] 2025-10-05 14:51:06,624 - root - INFO - step: 26555 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 14:51:06,625 - root - INFO - lr: 1.6630e-05 gnorm: 1.11 [16:16:56< 8:14:37] +[titan] 2025-10-05 14:51:17,534 - root - INFO - step: 26560 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 14:51:17,534 - root - INFO - lr: 1.6622e-05 gnorm: 1.12 [16:17:06< 8:14:26] +[titan] 2025-10-05 14:51:28,410 - root - INFO - step: 26565 loss: 2.1178 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 14:51:28,410 - root - INFO - lr: 1.6615e-05 gnorm: 1.09 [16:17:17< 8:14:15] +[titan] 2025-10-05 14:51:39,262 - root - INFO - step: 26570 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 14:51:39,262 - root - INFO - lr: 1.6607e-05 gnorm: 1.11 [16:17:28< 8:14:04] +[titan] 2025-10-05 14:51:50,113 - root - INFO - step: 26575 loss: 2.1052 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 14:51:50,113 - root - INFO - lr: 1.6599e-05 gnorm: 1.15 [16:17:39< 8:13:53] +[titan] 2025-10-05 14:52:00,978 - root - INFO - step: 26580 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:52:00,978 - root - INFO - lr: 1.6591e-05 gnorm: 1.09 [16:17:50< 8:13:42] +[titan] 2025-10-05 14:52:11,826 - root - INFO - step: 26585 loss: 2.0519 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 14:52:11,826 - root - INFO - lr: 1.6583e-05 gnorm: 1.14 [16:18:01< 8:13:30] +[titan] 2025-10-05 14:52:22,714 - root - INFO - step: 26590 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 14:52:22,714 - root - INFO - lr: 1.6576e-05 gnorm: 1.09 [16:18:12< 8:13:19] +[titan] 2025-10-05 14:52:33,578 - root - INFO - step: 26595 loss: 2.0442 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 14:52:33,578 - root - INFO - lr: 1.6568e-05 gnorm: 1.08 [16:18:22< 8:13:08] +[titan] 2025-10-05 14:52:42,211 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:52:44,378 - root - INFO - step: 26600 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,340 tflops: 420.92 mfu: 42.56% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:52:44,379 - root - INFO - lr: 1.6560e-05 gnorm: 1.10 [16:18:33< 8:12:57] +[titan] 2025-10-05 14:52:55,225 - root - INFO - step: 26605 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 14:52:55,225 - root - INFO - lr: 1.6552e-05 gnorm: 1.10 [16:18:44< 8:12:46] +[titan] 2025-10-05 14:53:06,069 - root - INFO - step: 26610 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:53:06,070 - root - INFO - lr: 1.6544e-05 gnorm: 1.07 [16:18:55< 8:12:35] +[titan] 2025-10-05 14:53:16,896 - root - INFO - step: 26615 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 14:53:16,896 - root - INFO - lr: 1.6537e-05 gnorm: 1.08 [16:19:06< 8:12:24] +[titan] 2025-10-05 14:53:27,734 - root - INFO - step: 26620 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7725 +[titan] 2025-10-05 14:53:27,734 - root - INFO - lr: 1.6529e-05 gnorm: 1.15 [16:19:17< 8:12:13] +[titan] 2025-10-05 14:53:36,729 - root - INFO - Dumping profiler traces at step 26624 +[titan] 2025-10-05 14:53:36,770 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:53:38,970 - root - INFO - step: 26625 loss: 2.0899 memory: 118.84GiB(85.28%) tps: 29,164 tflops: 404.60 mfu: 40.91% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8466 +[titan] 2025-10-05 14:53:38,971 - root - INFO - lr: 1.6521e-05 gnorm: 1.13 [16:19:28< 8:12:02] +[titan] 2025-10-05 14:53:49,827 - root - INFO - step: 26630 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 14:53:49,827 - root - INFO - lr: 1.6513e-05 gnorm: 1.09 [16:19:39< 8:11:51] +[titan] 2025-10-05 14:54:00,657 - root - INFO - step: 26635 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 14:54:00,657 - root - INFO - lr: 1.6505e-05 gnorm: 1.12 [16:19:50< 8:11:39] +[titan] 2025-10-05 14:54:11,514 - root - INFO - step: 26640 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8209 +[titan] 2025-10-05 14:54:11,514 - root - INFO - lr: 1.6498e-05 gnorm: 1.10 [16:20:00< 8:11:28] +[titan] 2025-10-05 14:54:22,378 - root - INFO - step: 26645 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 14:54:22,378 - root - INFO - lr: 1.6490e-05 gnorm: 1.06 [16:20:11< 8:11:17] +[titan] 2025-10-05 14:54:31,073 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:54:33,259 - root - INFO - step: 26650 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 14:54:33,259 - root - INFO - lr: 1.6482e-05 gnorm: 1.12 [16:20:22< 8:11:06] +[titan] 2025-10-05 14:54:44,181 - root - INFO - step: 26655 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 14:54:44,181 - root - INFO - lr: 1.6474e-05 gnorm: 1.10 [16:20:33< 8:10:55] +[titan] 2025-10-05 14:54:55,045 - root - INFO - step: 26660 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:54:55,045 - root - INFO - lr: 1.6467e-05 gnorm: 1.09 [16:20:44< 8:10:44] +[titan] 2025-10-05 14:55:05,921 - root - INFO - step: 26665 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8484 +[titan] 2025-10-05 14:55:05,921 - root - INFO - lr: 1.6459e-05 gnorm: 1.12 [16:20:55< 8:10:33] +[titan] 2025-10-05 14:55:16,810 - root - INFO - step: 26670 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 14:55:16,810 - root - INFO - lr: 1.6451e-05 gnorm: 1.08 [16:21:06< 8:10:22] +[titan] 2025-10-05 14:55:27,678 - root - INFO - step: 26675 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 14:55:27,679 - root - INFO - lr: 1.6443e-05 gnorm: 1.09 [16:21:17< 8:10:10] +[titan] 2025-10-05 14:55:38,537 - root - INFO - step: 26680 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 14:55:38,537 - root - INFO - lr: 1.6435e-05 gnorm: 1.10 [16:21:27< 8:09:59] +[titan] 2025-10-05 14:55:49,438 - root - INFO - step: 26685 loss: 2.0107 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 14:55:49,439 - root - INFO - lr: 1.6428e-05 gnorm: 1.11 [16:21:38< 8:09:48] +[titan] 2025-10-05 14:56:00,304 - root - INFO - step: 26690 loss: 2.0743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:56:00,304 - root - INFO - lr: 1.6420e-05 gnorm: 1.11 [16:21:49< 8:09:37] +[titan] 2025-10-05 14:56:11,149 - root - INFO - step: 26695 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8035 +[titan] 2025-10-05 14:56:11,149 - root - INFO - lr: 1.6412e-05 gnorm: 1.10 [16:22:00< 8:09:26] +[titan] 2025-10-05 14:56:19,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:56:22,000 - root - INFO - step: 26700 loss: 2.0496 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8111 +[titan] 2025-10-05 14:56:22,000 - root - INFO - lr: 1.6404e-05 gnorm: 1.07 [16:22:11< 8:09:15] +[titan] 2025-10-05 14:56:32,858 - root - INFO - step: 26705 loss: 1.9909 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 14:56:32,858 - root - INFO - lr: 1.6397e-05 gnorm: 1.07 [16:22:22< 8:09:04] +[titan] 2025-10-05 14:56:43,728 - root - INFO - step: 26710 loss: 2.1246 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:56:43,729 - root - INFO - lr: 1.6389e-05 gnorm: 1.12 [16:22:33< 8:08:53] +[titan] 2025-10-05 14:56:54,594 - root - INFO - step: 26715 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 14:56:54,594 - root - INFO - lr: 1.6381e-05 gnorm: 1.08 [16:22:43< 8:08:41] +[titan] 2025-10-05 14:57:05,497 - root - INFO - step: 26720 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 14:57:05,497 - root - INFO - lr: 1.6373e-05 gnorm: 1.08 [16:22:54< 8:08:30] +[titan] 2025-10-05 14:57:16,361 - root - INFO - step: 26725 loss: 2.0885 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 14:57:16,361 - root - INFO - lr: 1.6366e-05 gnorm: 1.07 [16:23:05< 8:08:19] +[titan] 2025-10-05 14:57:27,210 - root - INFO - step: 26730 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 14:57:27,211 - root - INFO - lr: 1.6358e-05 gnorm: 1.10 [16:23:16< 8:08:08] +[titan] 2025-10-05 14:57:38,049 - root - INFO - step: 26735 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 14:57:38,050 - root - INFO - lr: 1.6350e-05 gnorm: 1.11 [16:23:27< 8:07:57] +[titan] 2025-10-05 14:57:48,918 - root - INFO - step: 26740 loss: 2.0984 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8556 +[titan] 2025-10-05 14:57:48,918 - root - INFO - lr: 1.6342e-05 gnorm: 1.15 [16:23:38< 8:07:46] +[titan] 2025-10-05 14:57:59,773 - root - INFO - step: 26745 loss: 2.0328 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 14:57:59,773 - root - INFO - lr: 1.6335e-05 gnorm: 1.10 [16:23:49< 8:07:35] +[titan] 2025-10-05 14:58:08,499 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:58:10,688 - root - INFO - step: 26750 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 14:58:10,689 - root - INFO - lr: 1.6327e-05 gnorm: 1.10 [16:24:00< 8:07:24] +[titan] 2025-10-05 14:58:21,558 - root - INFO - step: 26755 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 14:58:21,559 - root - INFO - lr: 1.6319e-05 gnorm: 1.10 [16:24:10< 8:07:13] +[titan] 2025-10-05 14:58:32,424 - root - INFO - step: 26760 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:58:32,425 - root - INFO - lr: 1.6311e-05 gnorm: 1.08 [16:24:21< 8:07:01] +[titan] 2025-10-05 14:58:43,310 - root - INFO - step: 26765 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:58:43,310 - root - INFO - lr: 1.6304e-05 gnorm: 1.07 [16:24:32< 8:06:50] +[titan] 2025-10-05 14:58:54,204 - root - INFO - step: 26770 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7566 +[titan] 2025-10-05 14:58:54,204 - root - INFO - lr: 1.6296e-05 gnorm: 1.08 [16:24:43< 8:06:39] +[titan] 2025-10-05 14:59:05,077 - root - INFO - step: 26775 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:59:05,077 - root - INFO - lr: 1.6288e-05 gnorm: 1.09 [16:24:54< 8:06:28] +[titan] 2025-10-05 14:59:15,970 - root - INFO - step: 26780 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:59:15,970 - root - INFO - lr: 1.6280e-05 gnorm: 1.13 [16:25:05< 8:06:17] +[titan] 2025-10-05 14:59:26,894 - root - INFO - step: 26785 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 14:59:26,894 - root - INFO - lr: 1.6273e-05 gnorm: 1.11 [16:25:16< 8:06:06] +[titan] 2025-10-05 14:59:37,753 - root - INFO - step: 26790 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 14:59:37,753 - root - INFO - lr: 1.6265e-05 gnorm: 1.03 [16:25:27< 8:05:55] +[titan] 2025-10-05 14:59:48,629 - root - INFO - step: 26795 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:59:48,629 - root - INFO - lr: 1.6257e-05 gnorm: 1.08 [16:25:37< 8:05:44] +[titan] 2025-10-05 14:59:57,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:59:59,514 - root - INFO - step: 26800 loss: 1.9889 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:59:59,514 - root - INFO - lr: 1.6249e-05 gnorm: 1.12 [16:25:48< 8:05:33] +[titan] 2025-10-05 15:00:10,404 - root - INFO - step: 26805 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 15:00:10,405 - root - INFO - lr: 1.6242e-05 gnorm: 1.09 [16:25:59< 8:05:21] +[titan] 2025-10-05 15:00:21,298 - root - INFO - step: 26810 loss: 2.0441 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8079 +[titan] 2025-10-05 15:00:21,298 - root - INFO - lr: 1.6234e-05 gnorm: 1.09 [16:26:10< 8:05:10] +[titan] 2025-10-05 15:00:32,228 - root - INFO - step: 26815 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 15:00:32,228 - root - INFO - lr: 1.6226e-05 gnorm: 1.15 [16:26:21< 8:04:59] +[titan] 2025-10-05 15:00:43,121 - root - INFO - step: 26820 loss: 2.0556 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 15:00:43,121 - root - INFO - lr: 1.6219e-05 gnorm: 1.10 [16:26:32< 8:04:48] +[titan] 2025-10-05 15:00:54,008 - root - INFO - step: 26825 loss: 2.0473 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8094 +[titan] 2025-10-05 15:00:54,008 - root - INFO - lr: 1.6211e-05 gnorm: 1.16 [16:26:43< 8:04:37] +[titan] 2025-10-05 15:01:04,889 - root - INFO - step: 26830 loss: 2.0024 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 15:01:04,889 - root - INFO - lr: 1.6203e-05 gnorm: 1.09 [16:26:54< 8:04:26] +[titan] 2025-10-05 15:01:15,765 - root - INFO - step: 26835 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 15:01:15,765 - root - INFO - lr: 1.6195e-05 gnorm: 1.07 [16:27:05< 8:04:15] +[titan] 2025-10-05 15:01:26,630 - root - INFO - step: 26840 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8050 +[titan] 2025-10-05 15:01:26,631 - root - INFO - lr: 1.6188e-05 gnorm: 1.11 [16:27:15< 8:04:04] +[titan] 2025-10-05 15:01:37,602 - root - INFO - step: 26845 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:01:37,602 - root - INFO - lr: 1.6180e-05 gnorm: 1.12 [16:27:26< 8:03:53] +[titan] 2025-10-05 15:01:46,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:01:48,489 - root - INFO - step: 26850 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 15:01:48,490 - root - INFO - lr: 1.6172e-05 gnorm: 1.08 [16:27:37< 8:03:41] +[titan] 2025-10-05 15:01:59,381 - root - INFO - step: 26855 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 15:01:59,382 - root - INFO - lr: 1.6165e-05 gnorm: 1.07 [16:27:48< 8:03:30] +[titan] 2025-10-05 15:02:10,248 - root - INFO - step: 26860 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:02:10,248 - root - INFO - lr: 1.6157e-05 gnorm: 1.10 [16:27:59< 8:03:19] +[titan] 2025-10-05 15:02:21,138 - root - INFO - step: 26865 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 15:02:21,139 - root - INFO - lr: 1.6149e-05 gnorm: 1.12 [16:28:10< 8:03:08] +[titan] 2025-10-05 15:02:32,019 - root - INFO - step: 26870 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 15:02:32,019 - root - INFO - lr: 1.6141e-05 gnorm: 1.07 [16:28:21< 8:02:57] +[titan] 2025-10-05 15:02:42,942 - root - INFO - step: 26875 loss: 2.0517 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 15:02:42,942 - root - INFO - lr: 1.6134e-05 gnorm: 1.12 [16:28:32< 8:02:46] +[titan] 2025-10-05 15:02:53,877 - root - INFO - step: 26880 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 15:02:53,877 - root - INFO - lr: 1.6126e-05 gnorm: 1.13 [16:28:43< 8:02:35] +[titan] 2025-10-05 15:03:04,754 - root - INFO - step: 26885 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 15:03:04,754 - root - INFO - lr: 1.6118e-05 gnorm: 1.16 [16:28:54< 8:02:24] +[titan] 2025-10-05 15:03:15,633 - root - INFO - step: 26890 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:03:15,633 - root - INFO - lr: 1.6111e-05 gnorm: 1.11 [16:29:04< 8:02:13] +[titan] 2025-10-05 15:03:26,500 - root - INFO - step: 26895 loss: 2.0231 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 15:03:26,500 - root - INFO - lr: 1.6103e-05 gnorm: 1.12 [16:29:15< 8:02:02] +[titan] 2025-10-05 15:03:35,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:03:37,367 - root - INFO - step: 26900 loss: 2.0325 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 15:03:37,368 - root - INFO - lr: 1.6095e-05 gnorm: 1.11 [16:29:26< 8:01:50] +[titan] 2025-10-05 15:03:48,288 - root - INFO - step: 26905 loss: 2.0322 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7960 +[titan] 2025-10-05 15:03:48,289 - root - INFO - lr: 1.6088e-05 gnorm: 1.12 [16:29:37< 8:01:39] +[titan] 2025-10-05 15:03:59,203 - root - INFO - step: 26910 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 15:03:59,203 - root - INFO - lr: 1.6080e-05 gnorm: 1.17 [16:29:48< 8:01:28] +[titan] 2025-10-05 15:04:10,072 - root - INFO - step: 26915 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 15:04:10,072 - root - INFO - lr: 1.6072e-05 gnorm: 1.08 [16:29:59< 8:01:17] +[titan] 2025-10-05 15:04:20,947 - root - INFO - step: 26920 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8313 +[titan] 2025-10-05 15:04:20,948 - root - INFO - lr: 1.6065e-05 gnorm: 1.11 [16:30:10< 8:01:06] +[titan] 2025-10-05 15:04:31,818 - root - INFO - step: 26925 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:04:31,818 - root - INFO - lr: 1.6057e-05 gnorm: 1.10 [16:30:21< 8:00:55] +[titan] 2025-10-05 15:04:42,737 - root - INFO - step: 26930 loss: 1.9755 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 15:04:42,737 - root - INFO - lr: 1.6049e-05 gnorm: 1.07 [16:30:32< 8:00:44] +[titan] 2025-10-05 15:04:53,614 - root - INFO - step: 26935 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 15:04:53,614 - root - INFO - lr: 1.6041e-05 gnorm: 1.10 [16:30:42< 8:00:33] +[titan] 2025-10-05 15:05:04,493 - root - INFO - step: 26940 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 15:05:04,494 - root - INFO - lr: 1.6034e-05 gnorm: 1.16 [16:30:53< 8:00:22] +[titan] 2025-10-05 15:05:15,413 - root - INFO - step: 26945 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 15:05:15,413 - root - INFO - lr: 1.6026e-05 gnorm: 1.09 [16:31:04< 8:00:11] +[titan] 2025-10-05 15:05:24,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:05:26,301 - root - INFO - step: 26950 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:05:26,302 - root - INFO - lr: 1.6018e-05 gnorm: 1.13 [16:31:15< 7:59:59] +[titan] 2025-10-05 15:05:37,170 - root - INFO - step: 26955 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:05:37,170 - root - INFO - lr: 1.6011e-05 gnorm: 1.07 [16:31:26< 7:59:48] +[titan] 2025-10-05 15:05:48,097 - root - INFO - step: 26960 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 15:05:48,097 - root - INFO - lr: 1.6003e-05 gnorm: 1.11 [16:31:37< 7:59:37] +[titan] 2025-10-05 15:05:58,956 - root - INFO - step: 26965 loss: 2.0670 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 15:05:58,956 - root - INFO - lr: 1.5995e-05 gnorm: 1.13 [16:31:48< 7:59:26] +[titan] 2025-10-05 15:06:09,830 - root - INFO - step: 26970 loss: 1.9712 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 15:06:09,830 - root - INFO - lr: 1.5988e-05 gnorm: 1.09 [16:31:59< 7:59:15] +[titan] 2025-10-05 15:06:20,738 - root - INFO - step: 26975 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 15:06:20,738 - root - INFO - lr: 1.5980e-05 gnorm: 1.14 [16:32:10< 7:59:04] +[titan] 2025-10-05 15:06:31,607 - root - INFO - step: 26980 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 15:06:31,607 - root - INFO - lr: 1.5972e-05 gnorm: 1.13 [16:32:20< 7:58:53] +[titan] 2025-10-05 15:06:42,557 - root - INFO - step: 26985 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 29,926 tflops: 415.18 mfu: 41.98% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8708 +[titan] 2025-10-05 15:06:42,557 - root - INFO - lr: 1.5965e-05 gnorm: 1.13 [16:32:31< 7:58:42] +[titan] 2025-10-05 15:06:53,430 - root - INFO - step: 26990 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:06:53,431 - root - INFO - lr: 1.5957e-05 gnorm: 1.11 [16:32:42< 7:58:31] +[titan] 2025-10-05 15:07:04,312 - root - INFO - step: 26995 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 15:07:04,312 - root - INFO - lr: 1.5949e-05 gnorm: 1.11 [16:32:53< 7:58:19] +[titan] 2025-10-05 15:07:13,023 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:07:15,207 - root - INFO - step: 27000 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 15:07:15,207 - root - INFO - lr: 1.5942e-05 gnorm: 1.13 [16:33:04< 7:58:08] +[titan] 2025-10-05 15:07:26,138 - root - INFO - step: 27005 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:07:26,138 - root - INFO - lr: 1.5934e-05 gnorm: 1.14 [16:33:15< 7:57:57] +[titan] 2025-10-05 15:07:37,028 - root - INFO - step: 27010 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 15:07:37,028 - root - INFO - lr: 1.5926e-05 gnorm: 1.14 [16:33:26< 7:57:46] +[titan] 2025-10-05 15:07:47,970 - root - INFO - step: 27015 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 15:07:47,971 - root - INFO - lr: 1.5919e-05 gnorm: 1.12 [16:33:37< 7:57:35] +[titan] 2025-10-05 15:07:58,854 - root - INFO - step: 27020 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7853 +[titan] 2025-10-05 15:07:58,855 - root - INFO - lr: 1.5911e-05 gnorm: 1.15 [16:33:48< 7:57:24] +[titan] 2025-10-05 15:08:09,736 - root - INFO - step: 27025 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:08:09,736 - root - INFO - lr: 1.5903e-05 gnorm: 1.12 [16:33:59< 7:57:13] +[titan] 2025-10-05 15:08:20,606 - root - INFO - step: 27030 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 15:08:20,606 - root - INFO - lr: 1.5896e-05 gnorm: 1.08 [16:34:09< 7:57:02] +[titan] 2025-10-05 15:08:31,489 - root - INFO - step: 27035 loss: 1.9763 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 15:08:31,490 - root - INFO - lr: 1.5888e-05 gnorm: 1.09 [16:34:20< 7:56:51] +[titan] 2025-10-05 15:08:42,436 - root - INFO - step: 27040 loss: 2.0880 memory: 118.84GiB(85.28%) tps: 29,936 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 15:08:42,436 - root - INFO - lr: 1.5881e-05 gnorm: 1.12 [16:34:31< 7:56:40] +[titan] 2025-10-05 15:08:53,408 - root - INFO - step: 27045 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 15:08:53,409 - root - INFO - lr: 1.5873e-05 gnorm: 1.16 [16:34:42< 7:56:29] +[titan] 2025-10-05 15:09:02,118 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:09:04,301 - root - INFO - step: 27050 loss: 2.0295 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7940 +[titan] 2025-10-05 15:09:04,301 - root - INFO - lr: 1.5865e-05 gnorm: 1.11 [16:34:53< 7:56:17] +[titan] 2025-10-05 15:09:15,194 - root - INFO - step: 27055 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 15:09:15,194 - root - INFO - lr: 1.5858e-05 gnorm: 1.10 [16:35:04< 7:56:06] +[titan] 2025-10-05 15:09:26,100 - root - INFO - step: 27060 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8091 +[titan] 2025-10-05 15:09:26,100 - root - INFO - lr: 1.5850e-05 gnorm: 1.12 [16:35:15< 7:55:55] +[titan] 2025-10-05 15:09:36,976 - root - INFO - step: 27065 loss: 1.9733 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7439 +[titan] 2025-10-05 15:09:36,976 - root - INFO - lr: 1.5842e-05 gnorm: 1.09 [16:35:26< 7:55:44] +[titan] 2025-10-05 15:09:47,942 - root - INFO - step: 27070 loss: 2.0633 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.57 mfu: 41.92% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 15:09:47,942 - root - INFO - lr: 1.5835e-05 gnorm: 1.09 [16:35:37< 7:55:33] +[titan] 2025-10-05 15:09:58,812 - root - INFO - step: 27075 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 15:09:58,812 - root - INFO - lr: 1.5827e-05 gnorm: 1.08 [16:35:48< 7:55:22] +[titan] 2025-10-05 15:10:09,685 - root - INFO - step: 27080 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 15:10:09,685 - root - INFO - lr: 1.5819e-05 gnorm: 1.09 [16:35:59< 7:55:11] +[titan] 2025-10-05 15:10:20,555 - root - INFO - step: 27085 loss: 2.0147 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 15:10:20,556 - root - INFO - lr: 1.5812e-05 gnorm: 1.07 [16:36:09< 7:55:00] +[titan] 2025-10-05 15:10:31,449 - root - INFO - step: 27090 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 15:10:31,449 - root - INFO - lr: 1.5804e-05 gnorm: 1.06 [16:36:20< 7:54:49] +[titan] 2025-10-05 15:10:42,317 - root - INFO - step: 27095 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:10:42,317 - root - INFO - lr: 1.5797e-05 gnorm: 1.08 [16:36:31< 7:54:37] +[titan] 2025-10-05 15:10:51,064 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:10:53,251 - root - INFO - step: 27100 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:10:53,252 - root - INFO - lr: 1.5789e-05 gnorm: 1.15 [16:36:42< 7:54:26] +[titan] 2025-10-05 15:11:04,173 - root - INFO - step: 27105 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 15:11:04,174 - root - INFO - lr: 1.5781e-05 gnorm: 1.12 [16:36:53< 7:54:15] +[titan] 2025-10-05 15:11:15,060 - root - INFO - step: 27110 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:11:15,060 - root - INFO - lr: 1.5774e-05 gnorm: 1.15 [16:37:04< 7:54:04] +[titan] 2025-10-05 15:11:25,971 - root - INFO - step: 27115 loss: 2.0649 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:11:25,971 - root - INFO - lr: 1.5766e-05 gnorm: 1.12 [16:37:15< 7:53:53] +[titan] 2025-10-05 15:11:36,858 - root - INFO - step: 27120 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 15:11:36,859 - root - INFO - lr: 1.5759e-05 gnorm: 1.09 [16:37:26< 7:53:42] +[titan] 2025-10-05 15:11:47,776 - root - INFO - step: 27125 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7663 +[titan] 2025-10-05 15:11:47,777 - root - INFO - lr: 1.5751e-05 gnorm: 1.08 [16:37:37< 7:53:31] +[titan] 2025-10-05 15:11:58,636 - root - INFO - step: 27130 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:11:58,636 - root - INFO - lr: 1.5743e-05 gnorm: 1.13 [16:37:47< 7:53:20] +[titan] 2025-10-05 15:12:09,626 - root - INFO - step: 27135 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 29,816 tflops: 413.65 mfu: 41.83% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 15:12:09,626 - root - INFO - lr: 1.5736e-05 gnorm: 1.11 [16:37:58< 7:53:09] +[titan] 2025-10-05 15:12:11,991 - root - INFO - Dumping profiler traces at step 27136 +[titan] 2025-10-05 15:12:12,031 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:12:20,793 - root - INFO - step: 27140 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 29,344 tflops: 407.10 mfu: 41.16% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 15:12:20,794 - root - INFO - lr: 1.5728e-05 gnorm: 1.11 [16:38:10< 7:52:58] +[titan] 2025-10-05 15:12:31,651 - root - INFO - step: 27145 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 15:12:31,651 - root - INFO - lr: 1.5720e-05 gnorm: 1.09 [16:38:20< 7:52:47] +[titan] 2025-10-05 15:12:40,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:12:42,534 - root - INFO - step: 27150 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7607 +[titan] 2025-10-05 15:12:42,534 - root - INFO - lr: 1.5713e-05 gnorm: 1.06 [16:38:31< 7:52:36] +[titan] 2025-10-05 15:12:53,435 - root - INFO - step: 27155 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:12:53,435 - root - INFO - lr: 1.5705e-05 gnorm: 1.08 [16:38:42< 7:52:24] +[titan] 2025-10-05 15:13:04,284 - root - INFO - step: 27160 loss: 2.0466 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8071 +[titan] 2025-10-05 15:13:04,284 - root - INFO - lr: 1.5698e-05 gnorm: 1.11 [16:38:53< 7:52:13] +[titan] 2025-10-05 15:13:15,182 - root - INFO - step: 27165 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 15:13:15,182 - root - INFO - lr: 1.5690e-05 gnorm: 1.08 [16:39:04< 7:52:02] +[titan] 2025-10-05 15:13:26,046 - root - INFO - step: 27170 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 15:13:26,046 - root - INFO - lr: 1.5682e-05 gnorm: 1.08 [16:39:15< 7:51:51] +[titan] 2025-10-05 15:13:36,900 - root - INFO - step: 27175 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7416 +[titan] 2025-10-05 15:13:36,900 - root - INFO - lr: 1.5675e-05 gnorm: 1.10 [16:39:26< 7:51:40] +[titan] 2025-10-05 15:13:47,796 - root - INFO - step: 27180 loss: 2.1244 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8768 +[titan] 2025-10-05 15:13:47,796 - root - INFO - lr: 1.5667e-05 gnorm: 1.09 [16:39:37< 7:51:29] +[titan] 2025-10-05 15:13:58,664 - root - INFO - step: 27185 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:13:58,664 - root - INFO - lr: 1.5660e-05 gnorm: 1.10 [16:39:47< 7:51:18] +[titan] 2025-10-05 15:14:09,527 - root - INFO - step: 27190 loss: 2.0164 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:14:09,527 - root - INFO - lr: 1.5652e-05 gnorm: 1.09 [16:39:58< 7:51:07] +[titan] 2025-10-05 15:14:20,387 - root - INFO - step: 27195 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:14:20,387 - root - INFO - lr: 1.5645e-05 gnorm: 1.07 [16:40:09< 7:50:56] +[titan] 2025-10-05 15:14:29,129 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:14:31,314 - root - INFO - step: 27200 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 15:14:31,314 - root - INFO - lr: 1.5637e-05 gnorm: 1.10 [16:40:20< 7:50:45] +[titan] 2025-10-05 15:14:42,185 - root - INFO - step: 27205 loss: 2.0377 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:14:42,186 - root - INFO - lr: 1.5629e-05 gnorm: 1.08 [16:40:31< 7:50:33] +[titan] 2025-10-05 15:14:53,065 - root - INFO - step: 27210 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 15:14:53,065 - root - INFO - lr: 1.5622e-05 gnorm: 1.12 [16:40:42< 7:50:22] +[titan] 2025-10-05 15:15:03,941 - root - INFO - step: 27215 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 15:15:03,941 - root - INFO - lr: 1.5614e-05 gnorm: 1.11 [16:40:53< 7:50:11] +[titan] 2025-10-05 15:15:14,801 - root - INFO - step: 27220 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 15:15:14,801 - root - INFO - lr: 1.5607e-05 gnorm: 1.10 [16:41:04< 7:50:00] +[titan] 2025-10-05 15:15:25,653 - root - INFO - step: 27225 loss: 1.9878 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:15:25,653 - root - INFO - lr: 1.5599e-05 gnorm: 1.09 [16:41:14< 7:49:49] +[titan] 2025-10-05 15:15:36,551 - root - INFO - step: 27230 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8280 +[titan] 2025-10-05 15:15:36,552 - root - INFO - lr: 1.5591e-05 gnorm: 1.13 [16:41:25< 7:49:38] +[titan] 2025-10-05 15:15:47,426 - root - INFO - step: 27235 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 15:15:47,426 - root - INFO - lr: 1.5584e-05 gnorm: 1.10 [16:41:36< 7:49:27] +[titan] 2025-10-05 15:15:58,353 - root - INFO - step: 27240 loss: 2.0437 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 15:15:58,354 - root - INFO - lr: 1.5576e-05 gnorm: 1.08 [16:41:47< 7:49:16] +[titan] 2025-10-05 15:16:09,201 - root - INFO - step: 27245 loss: 2.0207 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 15:16:09,202 - root - INFO - lr: 1.5569e-05 gnorm: 1.11 [16:41:58< 7:49:05] +[titan] 2025-10-05 15:16:17,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:16:20,072 - root - INFO - step: 27250 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7443 +[titan] 2025-10-05 15:16:20,072 - root - INFO - lr: 1.5561e-05 gnorm: 1.08 [16:42:09< 7:48:53] +[titan] 2025-10-05 15:16:30,931 - root - INFO - step: 27255 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:16:30,931 - root - INFO - lr: 1.5554e-05 gnorm: 1.12 [16:42:20< 7:48:42] +[titan] 2025-10-05 15:16:41,802 - root - INFO - step: 27260 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 15:16:41,803 - root - INFO - lr: 1.5546e-05 gnorm: 1.15 [16:42:31< 7:48:31] +[titan] 2025-10-05 15:16:52,730 - root - INFO - step: 27265 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.07% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 15:16:52,730 - root - INFO - lr: 1.5539e-05 gnorm: 1.08 [16:42:42< 7:48:20] +[titan] 2025-10-05 15:17:03,595 - root - INFO - step: 27270 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 15:17:03,596 - root - INFO - lr: 1.5531e-05 gnorm: 1.08 [16:42:52< 7:48:09] +[titan] 2025-10-05 15:17:14,444 - root - INFO - step: 27275 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 15:17:14,444 - root - INFO - lr: 1.5523e-05 gnorm: 1.11 [16:43:03< 7:47:58] +[titan] 2025-10-05 15:17:25,317 - root - INFO - step: 27280 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 15:17:25,317 - root - INFO - lr: 1.5516e-05 gnorm: 1.11 [16:43:14< 7:47:47] +[titan] 2025-10-05 15:17:36,180 - root - INFO - step: 27285 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 15:17:36,180 - root - INFO - lr: 1.5508e-05 gnorm: 1.10 [16:43:25< 7:47:36] +[titan] 2025-10-05 15:17:47,037 - root - INFO - step: 27290 loss: 2.0421 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 15:17:47,037 - root - INFO - lr: 1.5501e-05 gnorm: 1.10 [16:43:36< 7:47:25] +[titan] 2025-10-05 15:17:57,971 - root - INFO - step: 27295 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:17:57,971 - root - INFO - lr: 1.5493e-05 gnorm: 1.10 [16:43:47< 7:47:13] +[titan] 2025-10-05 15:18:06,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:18:08,853 - root - INFO - step: 27300 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 15:18:08,854 - root - INFO - lr: 1.5486e-05 gnorm: 1.09 [16:43:58< 7:47:02] +[titan] 2025-10-05 15:18:19,735 - root - INFO - step: 27305 loss: 2.0092 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 15:18:19,735 - root - INFO - lr: 1.5478e-05 gnorm: 1.11 [16:44:09< 7:46:51] +[titan] 2025-10-05 15:18:30,612 - root - INFO - step: 27310 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 15:18:30,612 - root - INFO - lr: 1.5471e-05 gnorm: 1.10 [16:44:19< 7:46:40] +[titan] 2025-10-05 15:18:41,512 - root - INFO - step: 27315 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 15:18:41,512 - root - INFO - lr: 1.5463e-05 gnorm: 1.10 [16:44:30< 7:46:29] +[titan] 2025-10-05 15:18:52,399 - root - INFO - step: 27320 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 15:18:52,399 - root - INFO - lr: 1.5455e-05 gnorm: 1.12 [16:44:41< 7:46:18] +[titan] 2025-10-05 15:19:03,317 - root - INFO - step: 27325 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 15:19:03,317 - root - INFO - lr: 1.5448e-05 gnorm: 1.13 [16:44:52< 7:46:07] +[titan] 2025-10-05 15:19:14,195 - root - INFO - step: 27330 loss: 2.1168 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 15:19:14,195 - root - INFO - lr: 1.5440e-05 gnorm: 1.12 [16:45:03< 7:45:56] +[titan] 2025-10-05 15:19:25,053 - root - INFO - step: 27335 loss: 2.0622 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8236 +[titan] 2025-10-05 15:19:25,053 - root - INFO - lr: 1.5433e-05 gnorm: 1.14 [16:45:14< 7:45:45] +[titan] 2025-10-05 15:19:35,914 - root - INFO - step: 27340 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:19:35,914 - root - INFO - lr: 1.5425e-05 gnorm: 1.09 [16:45:25< 7:45:34] +[titan] 2025-10-05 15:19:46,795 - root - INFO - step: 27345 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8129 +[titan] 2025-10-05 15:19:46,796 - root - INFO - lr: 1.5418e-05 gnorm: 1.14 [16:45:36< 7:45:22] +[titan] 2025-10-05 15:19:55,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:19:57,711 - root - INFO - step: 27350 loss: 2.0646 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:19:57,711 - root - INFO - lr: 1.5410e-05 gnorm: 1.12 [16:45:47< 7:45:11] +[titan] 2025-10-05 15:20:08,574 - root - INFO - step: 27355 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:20:08,574 - root - INFO - lr: 1.5403e-05 gnorm: 1.10 [16:45:57< 7:45:00] +[titan] 2025-10-05 15:20:19,517 - root - INFO - step: 27360 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8070 +[titan] 2025-10-05 15:20:19,518 - root - INFO - lr: 1.5395e-05 gnorm: 1.10 [16:46:08< 7:44:49] +[titan] 2025-10-05 15:20:30,400 - root - INFO - step: 27365 loss: 2.0266 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 15:20:30,401 - root - INFO - lr: 1.5388e-05 gnorm: 1.10 [16:46:19< 7:44:38] +[titan] 2025-10-05 15:20:41,257 - root - INFO - step: 27370 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 15:20:41,257 - root - INFO - lr: 1.5380e-05 gnorm: 1.09 [16:46:30< 7:44:27] +[titan] 2025-10-05 15:20:52,147 - root - INFO - step: 27375 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8366 +[titan] 2025-10-05 15:20:52,147 - root - INFO - lr: 1.5373e-05 gnorm: 1.09 [16:46:41< 7:44:16] +[titan] 2025-10-05 15:21:03,049 - root - INFO - step: 27380 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 15:21:03,049 - root - INFO - lr: 1.5365e-05 gnorm: 1.10 [16:46:52< 7:44:05] +[titan] 2025-10-05 15:21:13,927 - root - INFO - step: 27385 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 15:21:13,927 - root - INFO - lr: 1.5358e-05 gnorm: 1.10 [16:47:03< 7:43:54] +[titan] 2025-10-05 15:21:24,850 - root - INFO - step: 27390 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:21:24,850 - root - INFO - lr: 1.5350e-05 gnorm: 1.13 [16:47:14< 7:43:43] +[titan] 2025-10-05 15:21:35,739 - root - INFO - step: 27395 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 15:21:35,739 - root - INFO - lr: 1.5343e-05 gnorm: 1.13 [16:47:25< 7:43:32] +[titan] 2025-10-05 15:21:44,436 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:21:46,630 - root - INFO - step: 27400 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:21:46,630 - root - INFO - lr: 1.5335e-05 gnorm: 1.08 [16:47:35< 7:43:20] +[titan] 2025-10-05 15:21:57,526 - root - INFO - step: 27405 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7972 +[titan] 2025-10-05 15:21:57,526 - root - INFO - lr: 1.5328e-05 gnorm: 1.12 [16:47:46< 7:43:09] +[titan] 2025-10-05 15:22:08,415 - root - INFO - step: 27410 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7583 +[titan] 2025-10-05 15:22:08,415 - root - INFO - lr: 1.5320e-05 gnorm: 1.07 [16:47:57< 7:42:58] +[titan] 2025-10-05 15:22:19,304 - root - INFO - step: 27415 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 15:22:19,305 - root - INFO - lr: 1.5313e-05 gnorm: 1.12 [16:48:08< 7:42:47] +[titan] 2025-10-05 15:22:30,195 - root - INFO - step: 27420 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8052 +[titan] 2025-10-05 15:22:30,196 - root - INFO - lr: 1.5305e-05 gnorm: 1.11 [16:48:19< 7:42:36] +[titan] 2025-10-05 15:22:41,161 - root - INFO - step: 27425 loss: 2.0339 memory: 118.84GiB(85.28%) tps: 29,884 tflops: 414.60 mfu: 41.92% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 15:22:41,161 - root - INFO - lr: 1.5298e-05 gnorm: 1.12 [16:48:30< 7:42:25] +[titan] 2025-10-05 15:22:52,043 - root - INFO - step: 27430 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 15:22:52,043 - root - INFO - lr: 1.5290e-05 gnorm: 1.10 [16:48:41< 7:42:14] +[titan] 2025-10-05 15:23:02,939 - root - INFO - step: 27435 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 15:23:02,939 - root - INFO - lr: 1.5283e-05 gnorm: 1.12 [16:48:52< 7:42:03] +[titan] 2025-10-05 15:23:13,837 - root - INFO - step: 27440 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 15:23:13,837 - root - INFO - lr: 1.5275e-05 gnorm: 1.12 [16:49:03< 7:41:52] +[titan] 2025-10-05 15:23:24,732 - root - INFO - step: 27445 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 15:23:24,733 - root - INFO - lr: 1.5268e-05 gnorm: 1.10 [16:49:14< 7:41:41] +[titan] 2025-10-05 15:23:33,429 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:23:35,615 - root - INFO - step: 27450 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8394 +[titan] 2025-10-05 15:23:35,615 - root - INFO - lr: 1.5260e-05 gnorm: 1.12 [16:49:24< 7:41:29] +[titan] 2025-10-05 15:23:46,565 - root - INFO - step: 27455 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.19 mfu: 41.98% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 15:23:46,565 - root - INFO - lr: 1.5253e-05 gnorm: 1.14 [16:49:35< 7:41:18] +[titan] 2025-10-05 15:23:57,475 - root - INFO - step: 27460 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 15:23:57,475 - root - INFO - lr: 1.5245e-05 gnorm: 1.11 [16:49:46< 7:41:07] +[titan] 2025-10-05 15:24:08,327 - root - INFO - step: 27465 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 15:24:08,327 - root - INFO - lr: 1.5238e-05 gnorm: 1.11 [16:49:57< 7:40:56] +[titan] 2025-10-05 15:24:19,201 - root - INFO - step: 27470 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:24:19,201 - root - INFO - lr: 1.5230e-05 gnorm: 1.08 [16:50:08< 7:40:45] +[titan] 2025-10-05 15:24:30,080 - root - INFO - step: 27475 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:24:30,080 - root - INFO - lr: 1.5223e-05 gnorm: 1.09 [16:50:19< 7:40:34] +[titan] 2025-10-05 15:24:40,967 - root - INFO - step: 27480 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8213 +[titan] 2025-10-05 15:24:40,967 - root - INFO - lr: 1.5215e-05 gnorm: 1.10 [16:50:30< 7:40:23] +[titan] 2025-10-05 15:24:51,863 - root - INFO - step: 27485 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 15:24:51,863 - root - INFO - lr: 1.5208e-05 gnorm: 1.11 [16:50:41< 7:40:12] +[titan] 2025-10-05 15:25:02,777 - root - INFO - step: 27490 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8587 +[titan] 2025-10-05 15:25:02,778 - root - INFO - lr: 1.5200e-05 gnorm: 1.16 [16:50:52< 7:40:01] +[titan] 2025-10-05 15:25:13,681 - root - INFO - step: 27495 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:25:13,681 - root - INFO - lr: 1.5193e-05 gnorm: 1.12 [16:51:02< 7:39:50] +[titan] 2025-10-05 15:25:22,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:25:24,569 - root - INFO - step: 27500 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 15:25:24,570 - root - INFO - lr: 1.5185e-05 gnorm: 1.09 [16:51:13< 7:39:39] +[titan] 2025-10-05 15:25:35,452 - root - INFO - step: 27505 loss: 2.0528 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:25:35,452 - root - INFO - lr: 1.5178e-05 gnorm: 1.14 [16:51:24< 7:39:27] +[titan] 2025-10-05 15:25:46,344 - root - INFO - step: 27510 loss: 2.2224 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 15:25:46,344 - root - INFO - lr: 1.5170e-05 gnorm: 7.42 [16:51:35< 7:39:16] +[titan] 2025-10-05 15:25:57,223 - root - INFO - step: 27515 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7946 +[titan] 2025-10-05 15:25:57,223 - root - INFO - lr: 1.5163e-05 gnorm: 1.13 [16:51:46< 7:39:05] +[titan] 2025-10-05 15:26:08,195 - root - INFO - step: 27520 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 15:26:08,195 - root - INFO - lr: 1.5155e-05 gnorm: 1.07 [16:51:57< 7:38:54] +[titan] 2025-10-05 15:26:19,076 - root - INFO - step: 27525 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 15:26:19,076 - root - INFO - lr: 1.5148e-05 gnorm: 1.13 [16:52:08< 7:38:43] +[titan] 2025-10-05 15:26:29,960 - root - INFO - step: 27530 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7786 +[titan] 2025-10-05 15:26:29,960 - root - INFO - lr: 1.5141e-05 gnorm: 1.17 [16:52:19< 7:38:32] +[titan] 2025-10-05 15:26:40,847 - root - INFO - step: 27535 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 15:26:40,847 - root - INFO - lr: 1.5133e-05 gnorm: 1.09 [16:52:30< 7:38:21] +[titan] 2025-10-05 15:26:51,718 - root - INFO - step: 27540 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 15:26:51,718 - root - INFO - lr: 1.5126e-05 gnorm: 1.13 [16:52:41< 7:38:10] +[titan] 2025-10-05 15:27:02,650 - root - INFO - step: 27545 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 15:27:02,650 - root - INFO - lr: 1.5118e-05 gnorm: 1.12 [16:52:51< 7:37:59] +[titan] 2025-10-05 15:27:11,363 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:27:13,551 - root - INFO - step: 27550 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 15:27:13,551 - root - INFO - lr: 1.5111e-05 gnorm: 1.14 [16:53:02< 7:37:48] +[titan] 2025-10-05 15:27:24,430 - root - INFO - step: 27555 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:27:24,430 - root - INFO - lr: 1.5103e-05 gnorm: 1.10 [16:53:13< 7:37:37] +[titan] 2025-10-05 15:27:35,290 - root - INFO - step: 27560 loss: 2.0098 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7771 +[titan] 2025-10-05 15:27:35,290 - root - INFO - lr: 1.5096e-05 gnorm: 1.11 [16:53:24< 7:37:25] +[titan] 2025-10-05 15:27:46,162 - root - INFO - step: 27565 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 15:27:46,162 - root - INFO - lr: 1.5088e-05 gnorm: 1.14 [16:53:35< 7:37:14] +[titan] 2025-10-05 15:27:57,049 - root - INFO - step: 27570 loss: 2.0327 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7970 +[titan] 2025-10-05 15:27:57,049 - root - INFO - lr: 1.5081e-05 gnorm: 1.09 [16:53:46< 7:37:03] +[titan] 2025-10-05 15:28:07,972 - root - INFO - step: 27575 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8349 +[titan] 2025-10-05 15:28:07,972 - root - INFO - lr: 1.5074e-05 gnorm: 1.15 [16:53:57< 7:36:52] +[titan] 2025-10-05 15:28:18,854 - root - INFO - step: 27580 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 15:28:18,855 - root - INFO - lr: 1.5066e-05 gnorm: 1.16 [16:54:08< 7:36:41] +[titan] 2025-10-05 15:28:29,772 - root - INFO - step: 27585 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8547 +[titan] 2025-10-05 15:28:29,772 - root - INFO - lr: 1.5059e-05 gnorm: 1.13 [16:54:19< 7:36:30] +[titan] 2025-10-05 15:28:40,678 - root - INFO - step: 27590 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 15:28:40,678 - root - INFO - lr: 1.5051e-05 gnorm: 1.09 [16:54:29< 7:36:19] +[titan] 2025-10-05 15:28:51,534 - root - INFO - step: 27595 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8331 +[titan] 2025-10-05 15:28:51,534 - root - INFO - lr: 1.5044e-05 gnorm: 1.10 [16:54:40< 7:36:08] +[titan] 2025-10-05 15:29:00,205 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:29:02,428 - root - INFO - step: 27600 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7298 +[titan] 2025-10-05 15:29:02,428 - root - INFO - lr: 1.5036e-05 gnorm: 1.12 [16:54:51< 7:35:57] +[titan] 2025-10-05 15:29:13,305 - root - INFO - step: 27605 loss: 2.0663 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:29:13,305 - root - INFO - lr: 1.5029e-05 gnorm: 1.09 [16:55:02< 7:35:46] +[titan] 2025-10-05 15:29:24,170 - root - INFO - step: 27610 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 15:29:24,170 - root - INFO - lr: 1.5021e-05 gnorm: 1.06 [16:55:13< 7:35:34] +[titan] 2025-10-05 15:29:35,069 - root - INFO - step: 27615 loss: 1.9817 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 15:29:35,070 - root - INFO - lr: 1.5014e-05 gnorm: 1.13 [16:55:24< 7:35:23] +[titan] 2025-10-05 15:29:45,908 - root - INFO - step: 27620 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 15:29:45,908 - root - INFO - lr: 1.5007e-05 gnorm: 1.14 [16:55:35< 7:35:12] +[titan] 2025-10-05 15:29:56,752 - root - INFO - step: 27625 loss: 2.0304 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7949 +[titan] 2025-10-05 15:29:56,752 - root - INFO - lr: 1.4999e-05 gnorm: 1.14 [16:55:46< 7:35:01] +[titan] 2025-10-05 15:30:07,651 - root - INFO - step: 27630 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 15:30:07,652 - root - INFO - lr: 1.4992e-05 gnorm: 1.10 [16:55:56< 7:34:50] +[titan] 2025-10-05 15:30:18,514 - root - INFO - step: 27635 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8163 +[titan] 2025-10-05 15:30:18,514 - root - INFO - lr: 1.4984e-05 gnorm: 1.11 [16:56:07< 7:34:39] +[titan] 2025-10-05 15:30:29,382 - root - INFO - step: 27640 loss: 2.0889 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 15:30:29,382 - root - INFO - lr: 1.4977e-05 gnorm: 1.09 [16:56:18< 7:34:28] +[titan] 2025-10-05 15:30:40,353 - root - INFO - step: 27645 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7813 +[titan] 2025-10-05 15:30:40,354 - root - INFO - lr: 1.4970e-05 gnorm: 1.12 [16:56:29< 7:34:17] +[titan] 2025-10-05 15:30:47,046 - root - INFO - Dumping profiler traces at step 27648 +[titan] 2025-10-05 15:30:47,083 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:30:49,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:30:51,455 - root - INFO - step: 27650 loss: 2.0385 memory: 118.84GiB(85.28%) tps: 29,516 tflops: 409.49 mfu: 41.40% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:30:51,456 - root - INFO - lr: 1.4962e-05 gnorm: 1.14 [16:56:40< 7:34:06] +[titan] 2025-10-05 15:31:02,322 - root - INFO - step: 27655 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 15:31:02,323 - root - INFO - lr: 1.4955e-05 gnorm: 1.07 [16:56:51< 7:33:55] +[titan] 2025-10-05 15:31:13,179 - root - INFO - step: 27660 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:31:13,179 - root - INFO - lr: 1.4947e-05 gnorm: 1.10 [16:57:02< 7:33:44] +[titan] 2025-10-05 15:31:24,033 - root - INFO - step: 27665 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 15:31:24,033 - root - INFO - lr: 1.4940e-05 gnorm: 1.09 [16:57:13< 7:33:32] +[titan] 2025-10-05 15:31:34,896 - root - INFO - step: 27670 loss: 2.0761 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 15:31:34,896 - root - INFO - lr: 1.4933e-05 gnorm: 1.11 [16:57:24< 7:33:21] +[titan] 2025-10-05 15:31:45,721 - root - INFO - step: 27675 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 15:31:45,721 - root - INFO - lr: 1.4925e-05 gnorm: 1.11 [16:57:35< 7:33:10] +[titan] 2025-10-05 15:31:56,620 - root - INFO - step: 27680 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7791 +[titan] 2025-10-05 15:31:56,620 - root - INFO - lr: 1.4918e-05 gnorm: 1.10 [16:57:45< 7:32:59] +[titan] 2025-10-05 15:32:07,525 - root - INFO - step: 27685 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 15:32:07,525 - root - INFO - lr: 1.4910e-05 gnorm: 1.10 [16:57:56< 7:32:48] +[titan] 2025-10-05 15:32:18,367 - root - INFO - step: 27690 loss: 2.0707 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 15:32:18,367 - root - INFO - lr: 1.4903e-05 gnorm: 1.09 [16:58:07< 7:32:37] +[titan] 2025-10-05 15:32:29,238 - root - INFO - step: 27695 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 15:32:29,238 - root - INFO - lr: 1.4896e-05 gnorm: 1.13 [16:58:18< 7:32:26] +[titan] 2025-10-05 15:32:37,922 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:32:40,104 - root - INFO - step: 27700 loss: 2.0988 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 15:32:40,104 - root - INFO - lr: 1.4888e-05 gnorm: 1.12 [16:58:29< 7:32:15] +[titan] 2025-10-05 15:32:50,965 - root - INFO - step: 27705 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 15:32:50,965 - root - INFO - lr: 1.4881e-05 gnorm: 1.13 [16:58:40< 7:32:04] +[titan] 2025-10-05 15:33:01,872 - root - INFO - step: 27710 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 15:33:01,872 - root - INFO - lr: 1.4873e-05 gnorm: 1.13 [16:58:51< 7:31:53] +[titan] 2025-10-05 15:33:12,825 - root - INFO - step: 27715 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8034 +[titan] 2025-10-05 15:33:12,825 - root - INFO - lr: 1.4866e-05 gnorm: 1.12 [16:59:02< 7:31:41] +[titan] 2025-10-05 15:33:23,696 - root - INFO - step: 27720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 15:33:23,696 - root - INFO - lr: 1.4859e-05 gnorm: 1.13 [16:59:12< 7:31:30] +[titan] 2025-10-05 15:33:34,571 - root - INFO - step: 27725 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:33:34,571 - root - INFO - lr: 1.4851e-05 gnorm: 1.13 [16:59:23< 7:31:19] +[titan] 2025-10-05 15:33:45,446 - root - INFO - step: 27730 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 15:33:45,446 - root - INFO - lr: 1.4844e-05 gnorm: 1.07 [16:59:34< 7:31:08] +[titan] 2025-10-05 15:33:56,288 - root - INFO - step: 27735 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7851 +[titan] 2025-10-05 15:33:56,288 - root - INFO - lr: 1.4836e-05 gnorm: 1.13 [16:59:45< 7:30:57] +[titan] 2025-10-05 15:34:07,199 - root - INFO - step: 27740 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 15:34:07,199 - root - INFO - lr: 1.4829e-05 gnorm: 1.11 [16:59:56< 7:30:46] +[titan] 2025-10-05 15:34:18,088 - root - INFO - step: 27745 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:34:18,088 - root - INFO - lr: 1.4822e-05 gnorm: 1.14 [17:00:07< 7:30:35] +[titan] 2025-10-05 15:34:26,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:34:28,919 - root - INFO - step: 27750 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7660 +[titan] 2025-10-05 15:34:28,919 - root - INFO - lr: 1.4814e-05 gnorm: 1.10 [17:00:18< 7:30:24] +[titan] 2025-10-05 15:34:39,788 - root - INFO - step: 27755 loss: 2.0085 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:34:39,788 - root - INFO - lr: 1.4807e-05 gnorm: 1.13 [17:00:29< 7:30:13] +[titan] 2025-10-05 15:34:50,649 - root - INFO - step: 27760 loss: 2.0229 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7889 +[titan] 2025-10-05 15:34:50,649 - root - INFO - lr: 1.4800e-05 gnorm: 1.08 [17:00:39< 7:30:02] +[titan] 2025-10-05 15:35:01,518 - root - INFO - step: 27765 loss: 2.0372 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:35:01,518 - root - INFO - lr: 1.4792e-05 gnorm: 1.09 [17:00:50< 7:29:50] +[titan] 2025-10-05 15:35:12,435 - root - INFO - step: 27770 loss: 2.0491 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 15:35:12,435 - root - INFO - lr: 1.4785e-05 gnorm: 1.11 [17:01:01< 7:29:39] +[titan] 2025-10-05 15:35:23,343 - root - INFO - step: 27775 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:35:23,344 - root - INFO - lr: 1.4777e-05 gnorm: 1.10 [17:01:12< 7:29:28] +[titan] 2025-10-05 15:35:34,184 - root - INFO - step: 27780 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 15:35:34,184 - root - INFO - lr: 1.4770e-05 gnorm: 1.09 [17:01:23< 7:29:17] +[titan] 2025-10-05 15:35:45,050 - root - INFO - step: 27785 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 15:35:45,050 - root - INFO - lr: 1.4763e-05 gnorm: 1.08 [17:01:34< 7:29:06] +[titan] 2025-10-05 15:35:55,912 - root - INFO - step: 27790 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:35:55,912 - root - INFO - lr: 1.4755e-05 gnorm: 1.10 [17:01:45< 7:28:55] +[titan] 2025-10-05 15:36:06,779 - root - INFO - step: 27795 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7727 +[titan] 2025-10-05 15:36:06,780 - root - INFO - lr: 1.4748e-05 gnorm: 1.09 [17:01:56< 7:28:44] +[titan] 2025-10-05 15:36:15,502 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:36:17,685 - root - INFO - step: 27800 loss: 2.0545 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8152 +[titan] 2025-10-05 15:36:17,685 - root - INFO - lr: 1.4741e-05 gnorm: 1.11 [17:02:06< 7:28:33] +[titan] 2025-10-05 15:36:28,568 - root - INFO - step: 27805 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 15:36:28,568 - root - INFO - lr: 1.4733e-05 gnorm: 1.09 [17:02:17< 7:28:22] +[titan] 2025-10-05 15:36:39,434 - root - INFO - step: 27810 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 15:36:39,435 - root - INFO - lr: 1.4726e-05 gnorm: 1.12 [17:02:28< 7:28:11] +[titan] 2025-10-05 15:36:50,300 - root - INFO - step: 27815 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:36:50,300 - root - INFO - lr: 1.4719e-05 gnorm: 1.08 [17:02:39< 7:27:59] +[titan] 2025-10-05 15:37:01,162 - root - INFO - step: 27820 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 15:37:01,162 - root - INFO - lr: 1.4711e-05 gnorm: 1.10 [17:02:50< 7:27:48] +[titan] 2025-10-05 15:37:12,068 - root - INFO - step: 27825 loss: 2.0443 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8065 +[titan] 2025-10-05 15:37:12,068 - root - INFO - lr: 1.4704e-05 gnorm: 1.12 [17:03:01< 7:27:37] +[titan] 2025-10-05 15:37:22,924 - root - INFO - step: 27830 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:37:22,924 - root - INFO - lr: 1.4697e-05 gnorm: 1.12 [17:03:12< 7:27:26] +[titan] 2025-10-05 15:37:33,784 - root - INFO - step: 27835 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 15:37:33,784 - root - INFO - lr: 1.4689e-05 gnorm: 1.09 [17:03:23< 7:27:15] +[titan] 2025-10-05 15:37:44,693 - root - INFO - step: 27840 loss: 1.9926 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 15:37:44,694 - root - INFO - lr: 1.4682e-05 gnorm: 1.08 [17:03:33< 7:27:04] +[titan] 2025-10-05 15:37:55,562 - root - INFO - step: 27845 loss: 1.9782 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 15:37:55,562 - root - INFO - lr: 1.4675e-05 gnorm: 1.11 [17:03:44< 7:26:53] +[titan] 2025-10-05 15:38:04,219 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:38:06,403 - root - INFO - step: 27850 loss: 1.9362 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 15:38:06,403 - root - INFO - lr: 1.4667e-05 gnorm: 1.10 [17:03:55< 7:26:42] +[titan] 2025-10-05 15:38:17,311 - root - INFO - step: 27855 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 15:38:17,311 - root - INFO - lr: 1.4660e-05 gnorm: 1.11 [17:04:06< 7:26:31] +[titan] 2025-10-05 15:38:28,219 - root - INFO - step: 27860 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 15:38:28,219 - root - INFO - lr: 1.4653e-05 gnorm: 1.12 [17:04:17< 7:26:20] +[titan] 2025-10-05 15:38:39,092 - root - INFO - step: 27865 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 15:38:39,092 - root - INFO - lr: 1.4645e-05 gnorm: 1.10 [17:04:28< 7:26:09] +[titan] 2025-10-05 15:38:49,999 - root - INFO - step: 27870 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 15:38:49,999 - root - INFO - lr: 1.4638e-05 gnorm: 1.15 [17:04:39< 7:25:57] +[titan] 2025-10-05 15:39:00,879 - root - INFO - step: 27875 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:39:00,879 - root - INFO - lr: 1.4631e-05 gnorm: 1.10 [17:04:50< 7:25:46] +[titan] 2025-10-05 15:39:11,796 - root - INFO - step: 27880 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 15:39:11,796 - root - INFO - lr: 1.4623e-05 gnorm: 1.09 [17:05:01< 7:25:35] +[titan] 2025-10-05 15:39:22,647 - root - INFO - step: 27885 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8326 +[titan] 2025-10-05 15:39:22,647 - root - INFO - lr: 1.4616e-05 gnorm: 1.15 [17:05:11< 7:25:24] +[titan] 2025-10-05 15:39:33,473 - root - INFO - step: 27890 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 15:39:33,473 - root - INFO - lr: 1.4609e-05 gnorm: 1.11 [17:05:22< 7:25:13] +[titan] 2025-10-05 15:39:44,338 - root - INFO - step: 27895 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 15:39:44,338 - root - INFO - lr: 1.4601e-05 gnorm: 1.12 [17:05:33< 7:25:02] +[titan] 2025-10-05 15:39:53,007 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:39:55,213 - root - INFO - step: 27900 loss: 2.0345 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 15:39:55,213 - root - INFO - lr: 1.4594e-05 gnorm: 1.12 [17:05:44< 7:24:51] +[titan] 2025-10-05 15:40:06,088 - root - INFO - step: 27905 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:40:06,088 - root - INFO - lr: 1.4587e-05 gnorm: 1.11 [17:05:55< 7:24:40] +[titan] 2025-10-05 15:40:16,993 - root - INFO - step: 27910 loss: 2.0536 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8146 +[titan] 2025-10-05 15:40:16,993 - root - INFO - lr: 1.4579e-05 gnorm: 1.11 [17:06:06< 7:24:29] +[titan] 2025-10-05 15:40:27,815 - root - INFO - step: 27915 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 15:40:27,815 - root - INFO - lr: 1.4572e-05 gnorm: 1.11 [17:06:17< 7:24:18] +[titan] 2025-10-05 15:40:38,643 - root - INFO - step: 27920 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8155 +[titan] 2025-10-05 15:40:38,643 - root - INFO - lr: 1.4565e-05 gnorm: 1.07 [17:06:27< 7:24:06] +[titan] 2025-10-05 15:40:49,515 - root - INFO - step: 27925 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7489 +[titan] 2025-10-05 15:40:49,515 - root - INFO - lr: 1.4558e-05 gnorm: 1.15 [17:06:38< 7:23:55] +[titan] 2025-10-05 15:41:00,387 - root - INFO - step: 27930 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 15:41:00,387 - root - INFO - lr: 1.4550e-05 gnorm: 1.08 [17:06:49< 7:23:44] +[titan] 2025-10-05 15:41:11,313 - root - INFO - step: 27935 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 15:41:11,313 - root - INFO - lr: 1.4543e-05 gnorm: 1.11 [17:07:00< 7:23:33] +[titan] 2025-10-05 15:41:22,241 - root - INFO - step: 27940 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:41:22,241 - root - INFO - lr: 1.4536e-05 gnorm: 1.12 [17:07:11< 7:23:22] +[titan] 2025-10-05 15:41:33,099 - root - INFO - step: 27945 loss: 2.0587 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 15:41:33,100 - root - INFO - lr: 1.4528e-05 gnorm: 1.10 [17:07:22< 7:23:11] +[titan] 2025-10-05 15:41:41,784 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:41:43,967 - root - INFO - step: 27950 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:41:43,968 - root - INFO - lr: 1.4521e-05 gnorm: 1.11 [17:07:33< 7:23:00] +[titan] 2025-10-05 15:41:54,868 - root - INFO - step: 27955 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 15:41:54,868 - root - INFO - lr: 1.4514e-05 gnorm: 1.12 [17:07:44< 7:22:49] +[titan] 2025-10-05 15:42:05,736 - root - INFO - step: 27960 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:05,736 - root - INFO - lr: 1.4507e-05 gnorm: 1.10 [17:07:54< 7:22:38] +[titan] 2025-10-05 15:42:16,692 - root - INFO - step: 27965 loss: 1.9991 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:42:16,692 - root - INFO - lr: 1.4499e-05 gnorm: 1.12 [17:08:05< 7:22:27] +[titan] 2025-10-05 15:42:27,570 - root - INFO - step: 27970 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 15:42:27,570 - root - INFO - lr: 1.4492e-05 gnorm: 1.12 [17:08:16< 7:22:16] +[titan] 2025-10-05 15:42:38,440 - root - INFO - step: 27975 loss: 2.0135 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:38,440 - root - INFO - lr: 1.4485e-05 gnorm: 1.13 [17:08:27< 7:22:04] +[titan] 2025-10-05 15:42:49,330 - root - INFO - step: 27980 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 15:42:49,330 - root - INFO - lr: 1.4477e-05 gnorm: 1.12 [17:08:38< 7:21:53] +[titan] 2025-10-05 15:43:00,202 - root - INFO - step: 27985 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 15:43:00,202 - root - INFO - lr: 1.4470e-05 gnorm: 1.10 [17:08:49< 7:21:42] +[titan] 2025-10-05 15:43:11,083 - root - INFO - step: 27990 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:43:11,083 - root - INFO - lr: 1.4463e-05 gnorm: 1.11 [17:09:00< 7:21:31] +[titan] 2025-10-05 15:43:21,971 - root - INFO - step: 27995 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 15:43:21,972 - root - INFO - lr: 1.4456e-05 gnorm: 1.10 [17:09:11< 7:21:20] +[titan] 2025-10-05 15:43:30,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:43:32,877 - root - INFO - step: 28000 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 15:43:32,878 - root - INFO - lr: 1.4448e-05 gnorm: 1.08 [17:09:22< 7:21:09] +[titan] 2025-10-05 15:43:43,741 - root - INFO - step: 28005 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:43:43,741 - root - INFO - lr: 1.4441e-05 gnorm: 1.13 [17:09:32< 7:20:58] +[titan] 2025-10-05 15:43:54,582 - root - INFO - step: 28010 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:43:54,582 - root - INFO - lr: 1.4434e-05 gnorm: 1.11 [17:09:43< 7:20:47] +[titan] 2025-10-05 15:44:05,429 - root - INFO - step: 28015 loss: 2.0300 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 15:44:05,429 - root - INFO - lr: 1.4426e-05 gnorm: 1.11 [17:09:54< 7:20:36] +[titan] 2025-10-05 15:44:16,331 - root - INFO - step: 28020 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 15:44:16,331 - root - INFO - lr: 1.4419e-05 gnorm: 1.09 [17:10:05< 7:20:25] +[titan] 2025-10-05 15:44:27,186 - root - INFO - step: 28025 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 15:44:27,186 - root - INFO - lr: 1.4412e-05 gnorm: 1.08 [17:10:16< 7:20:13] +[titan] 2025-10-05 15:44:38,082 - root - INFO - step: 28030 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 15:44:38,082 - root - INFO - lr: 1.4405e-05 gnorm: 1.17 [17:10:27< 7:20:02] +[titan] 2025-10-05 15:44:48,943 - root - INFO - step: 28035 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 15:44:48,943 - root - INFO - lr: 1.4397e-05 gnorm: 1.12 [17:10:38< 7:19:51] +[titan] 2025-10-05 15:44:59,808 - root - INFO - step: 28040 loss: 2.0729 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8327 +[titan] 2025-10-05 15:44:59,808 - root - INFO - lr: 1.4390e-05 gnorm: 1.14 [17:10:49< 7:19:40] +[titan] 2025-10-05 15:45:10,680 - root - INFO - step: 28045 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 15:45:10,681 - root - INFO - lr: 1.4383e-05 gnorm: 1.11 [17:10:59< 7:19:29] +[titan] 2025-10-05 15:45:19,402 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:45:21,586 - root - INFO - step: 28050 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7741 +[titan] 2025-10-05 15:45:21,587 - root - INFO - lr: 1.4376e-05 gnorm: 1.06 [17:11:10< 7:19:18] +[titan] 2025-10-05 15:45:32,449 - root - INFO - step: 28055 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:45:32,449 - root - INFO - lr: 1.4368e-05 gnorm: 1.10 [17:11:21< 7:19:07] +[titan] 2025-10-05 15:45:43,299 - root - INFO - step: 28060 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:45:43,299 - root - INFO - lr: 1.4361e-05 gnorm: 1.14 [17:11:32< 7:18:56] +[titan] 2025-10-05 15:45:54,193 - root - INFO - step: 28065 loss: 2.0655 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 15:45:54,193 - root - INFO - lr: 1.4354e-05 gnorm: 1.10 [17:11:43< 7:18:45] +[titan] 2025-10-05 15:46:05,045 - root - INFO - step: 28070 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 15:46:05,045 - root - INFO - lr: 1.4347e-05 gnorm: 1.08 [17:11:54< 7:18:34] +[titan] 2025-10-05 15:46:15,889 - root - INFO - step: 28075 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 15:46:15,889 - root - INFO - lr: 1.4339e-05 gnorm: 1.09 [17:12:05< 7:18:23] +[titan] 2025-10-05 15:46:26,781 - root - INFO - step: 28080 loss: 1.9684 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 15:46:26,781 - root - INFO - lr: 1.4332e-05 gnorm: 1.10 [17:12:16< 7:18:11] +[titan] 2025-10-05 15:46:37,625 - root - INFO - step: 28085 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 15:46:37,625 - root - INFO - lr: 1.4325e-05 gnorm: 1.08 [17:12:26< 7:18:00] +[titan] 2025-10-05 15:46:48,485 - root - INFO - step: 28090 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:46:48,486 - root - INFO - lr: 1.4318e-05 gnorm: 1.09 [17:12:37< 7:17:49] +[titan] 2025-10-05 15:46:59,393 - root - INFO - step: 28095 loss: 1.9937 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 15:46:59,393 - root - INFO - lr: 1.4311e-05 gnorm: 1.12 [17:12:48< 7:17:38] +[titan] 2025-10-05 15:47:08,069 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:47:10,253 - root - INFO - step: 28100 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:47:10,253 - root - INFO - lr: 1.4303e-05 gnorm: 1.13 [17:12:59< 7:17:27] +[titan] 2025-10-05 15:47:21,183 - root - INFO - step: 28105 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 15:47:21,183 - root - INFO - lr: 1.4296e-05 gnorm: 1.14 [17:13:10< 7:17:16] +[titan] 2025-10-05 15:47:32,061 - root - INFO - step: 28110 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 15:47:32,061 - root - INFO - lr: 1.4289e-05 gnorm: 1.07 [17:13:21< 7:17:05] +[titan] 2025-10-05 15:47:42,930 - root - INFO - step: 28115 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:47:42,930 - root - INFO - lr: 1.4282e-05 gnorm: 1.11 [17:13:32< 7:16:54] +[titan] 2025-10-05 15:47:53,817 - root - INFO - step: 28120 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7942 +[titan] 2025-10-05 15:47:53,817 - root - INFO - lr: 1.4274e-05 gnorm: 1.09 [17:13:43< 7:16:43] +[titan] 2025-10-05 15:48:04,758 - root - INFO - step: 28125 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 15:48:04,758 - root - INFO - lr: 1.4267e-05 gnorm: 1.14 [17:13:53< 7:16:32] +[titan] 2025-10-05 15:48:15,645 - root - INFO - step: 28130 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 15:48:15,645 - root - INFO - lr: 1.4260e-05 gnorm: 1.09 [17:14:04< 7:16:21] +[titan] 2025-10-05 15:48:26,569 - root - INFO - step: 28135 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 15:48:26,569 - root - INFO - lr: 1.4253e-05 gnorm: 1.15 [17:14:15< 7:16:09] +[titan] 2025-10-05 15:48:37,430 - root - INFO - step: 28140 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 15:48:37,430 - root - INFO - lr: 1.4246e-05 gnorm: 1.13 [17:14:26< 7:15:58] +[titan] 2025-10-05 15:48:48,297 - root - INFO - step: 28145 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7912 +[titan] 2025-10-05 15:48:48,297 - root - INFO - lr: 1.4238e-05 gnorm: 1.13 [17:14:37< 7:15:47] +[titan] 2025-10-05 15:48:56,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:48:59,187 - root - INFO - step: 28150 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 15:48:59,187 - root - INFO - lr: 1.4231e-05 gnorm: 1.13 [17:14:48< 7:15:36] +[titan] 2025-10-05 15:49:10,034 - root - INFO - step: 28155 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 15:49:10,035 - root - INFO - lr: 1.4224e-05 gnorm: 1.11 [17:14:59< 7:15:25] +[titan] 2025-10-05 15:49:21,051 - root - INFO - step: 28160 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 29,745 tflops: 412.66 mfu: 41.73% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 15:49:21,051 - root - INFO - lr: 1.4217e-05 gnorm: 1.09 [17:15:10< 7:15:14] +[titan] 2025-10-05 15:49:21,242 - root - INFO - Dumping profiler traces at step 28160 +[titan] 2025-10-05 15:49:21,280 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:49:32,111 - root - INFO - step: 28165 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 29,630 tflops: 411.07 mfu: 41.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 15:49:32,111 - root - INFO - lr: 1.4210e-05 gnorm: 1.11 [17:15:21< 7:15:03] +[titan] 2025-10-05 15:49:42,959 - root - INFO - step: 28170 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:49:42,959 - root - INFO - lr: 1.4202e-05 gnorm: 1.08 [17:15:32< 7:14:52] +[titan] 2025-10-05 15:49:53,795 - root - INFO - step: 28175 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:49:53,795 - root - INFO - lr: 1.4195e-05 gnorm: 1.14 [17:15:43< 7:14:41] +[titan] 2025-10-05 15:50:04,659 - root - INFO - step: 28180 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7994 +[titan] 2025-10-05 15:50:04,660 - root - INFO - lr: 1.4188e-05 gnorm: 1.10 [17:15:53< 7:14:30] +[titan] 2025-10-05 15:50:15,529 - root - INFO - step: 28185 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 15:50:15,529 - root - INFO - lr: 1.4181e-05 gnorm: 1.09 [17:16:04< 7:14:19] +[titan] 2025-10-05 15:50:26,481 - root - INFO - step: 28190 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 15:50:26,482 - root - INFO - lr: 1.4174e-05 gnorm: 1.11 [17:16:15< 7:14:08] +[titan] 2025-10-05 15:50:37,355 - root - INFO - step: 28195 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 15:50:37,355 - root - INFO - lr: 1.4166e-05 gnorm: 1.06 [17:16:26< 7:13:56] +[titan] 2025-10-05 15:50:46,040 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:50:48,223 - root - INFO - step: 28200 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:50:48,223 - root - INFO - lr: 1.4159e-05 gnorm: 1.12 [17:16:37< 7:13:45] +[titan] 2025-10-05 15:50:59,085 - root - INFO - step: 28205 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 15:50:59,085 - root - INFO - lr: 1.4152e-05 gnorm: 1.12 [17:16:48< 7:13:34] +[titan] 2025-10-05 15:51:09,953 - root - INFO - step: 28210 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 15:51:09,953 - root - INFO - lr: 1.4145e-05 gnorm: 1.14 [17:16:59< 7:13:23] +[titan] 2025-10-05 15:51:20,822 - root - INFO - step: 28215 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:51:20,822 - root - INFO - lr: 1.4138e-05 gnorm: 1.10 [17:17:10< 7:13:12] +[titan] 2025-10-05 15:51:31,728 - root - INFO - step: 28220 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 15:51:31,728 - root - INFO - lr: 1.4130e-05 gnorm: 1.18 [17:17:20< 7:13:01] +[titan] 2025-10-05 15:51:42,649 - root - INFO - step: 28225 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 15:51:42,650 - root - INFO - lr: 1.4123e-05 gnorm: 1.10 [17:17:31< 7:12:50] +[titan] 2025-10-05 15:51:53,522 - root - INFO - step: 28230 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 15:51:53,522 - root - INFO - lr: 1.4116e-05 gnorm: 1.09 [17:17:42< 7:12:39] +[titan] 2025-10-05 15:52:04,406 - root - INFO - step: 28235 loss: 2.0389 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:52:04,406 - root - INFO - lr: 1.4109e-05 gnorm: 1.11 [17:17:53< 7:12:28] +[titan] 2025-10-05 15:52:15,280 - root - INFO - step: 28240 loss: 1.9948 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 15:52:15,281 - root - INFO - lr: 1.4102e-05 gnorm: 1.11 [17:18:04< 7:12:17] +[titan] 2025-10-05 15:52:26,225 - root - INFO - step: 28245 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 15:52:26,225 - root - INFO - lr: 1.4095e-05 gnorm: 1.12 [17:18:15< 7:12:06] +[titan] 2025-10-05 15:52:34,921 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:52:37,105 - root - INFO - step: 28250 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 15:52:37,105 - root - INFO - lr: 1.4087e-05 gnorm: 1.08 [17:18:26< 7:11:55] +[titan] 2025-10-05 15:52:48,023 - root - INFO - step: 28255 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 15:52:48,023 - root - INFO - lr: 1.4080e-05 gnorm: 1.10 [17:18:37< 7:11:43] +[titan] 2025-10-05 15:52:58,913 - root - INFO - step: 28260 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 15:52:58,913 - root - INFO - lr: 1.4073e-05 gnorm: 1.10 [17:18:48< 7:11:32] +[titan] 2025-10-05 15:53:09,803 - root - INFO - step: 28265 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 15:53:09,803 - root - INFO - lr: 1.4066e-05 gnorm: 1.11 [17:18:59< 7:11:21] +[titan] 2025-10-05 15:53:20,692 - root - INFO - step: 28270 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 15:53:20,693 - root - INFO - lr: 1.4059e-05 gnorm: 1.34 [17:19:09< 7:11:10] +[titan] 2025-10-05 15:53:31,636 - root - INFO - step: 28275 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 15:53:31,637 - root - INFO - lr: 1.4052e-05 gnorm: 1.11 [17:19:20< 7:10:59] +[titan] 2025-10-05 15:53:42,507 - root - INFO - step: 28280 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7702 +[titan] 2025-10-05 15:53:42,508 - root - INFO - lr: 1.4044e-05 gnorm: 1.09 [17:19:31< 7:10:48] +[titan] 2025-10-05 15:53:53,408 - root - INFO - step: 28285 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:53:53,408 - root - INFO - lr: 1.4037e-05 gnorm: 1.12 [17:19:42< 7:10:37] +[titan] 2025-10-05 15:54:04,269 - root - INFO - step: 28290 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:54:04,269 - root - INFO - lr: 1.4030e-05 gnorm: 1.11 [17:19:53< 7:10:26] +[titan] 2025-10-05 15:54:15,140 - root - INFO - step: 28295 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:54:15,140 - root - INFO - lr: 1.4023e-05 gnorm: 1.09 [17:20:04< 7:10:15] +[titan] 2025-10-05 15:54:23,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:54:26,111 - root - INFO - step: 28300 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7860 +[titan] 2025-10-05 15:54:26,111 - root - INFO - lr: 1.4016e-05 gnorm: 1.07 [17:20:15< 7:10:04] +[titan] 2025-10-05 15:54:36,983 - root - INFO - step: 28305 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 15:54:36,984 - root - INFO - lr: 1.4009e-05 gnorm: 1.12 [17:20:26< 7:09:53] +[titan] 2025-10-05 15:54:47,858 - root - INFO - step: 28310 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:54:47,858 - root - INFO - lr: 1.4002e-05 gnorm: 1.07 [17:20:37< 7:09:42] +[titan] 2025-10-05 15:54:58,731 - root - INFO - step: 28315 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 15:54:58,732 - root - INFO - lr: 1.3994e-05 gnorm: 1.09 [17:20:47< 7:09:30] +[titan] 2025-10-05 15:55:09,654 - root - INFO - step: 28320 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 15:55:09,655 - root - INFO - lr: 1.3987e-05 gnorm: 1.46 [17:20:58< 7:09:19] +[titan] 2025-10-05 15:55:20,530 - root - INFO - step: 28325 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 15:55:20,530 - root - INFO - lr: 1.3980e-05 gnorm: 1.12 [17:21:09< 7:09:08] +[titan] 2025-10-05 15:55:31,457 - root - INFO - step: 28330 loss: 1.9576 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7300 +[titan] 2025-10-05 15:55:31,457 - root - INFO - lr: 1.3973e-05 gnorm: 1.10 [17:21:20< 7:08:57] +[titan] 2025-10-05 15:55:42,330 - root - INFO - step: 28335 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 15:55:42,330 - root - INFO - lr: 1.3966e-05 gnorm: 1.12 [17:21:31< 7:08:46] +[titan] 2025-10-05 15:55:53,200 - root - INFO - step: 28340 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:55:53,200 - root - INFO - lr: 1.3959e-05 gnorm: 1.15 [17:21:42< 7:08:35] +[titan] 2025-10-05 15:56:04,083 - root - INFO - step: 28345 loss: 2.0214 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 15:56:04,083 - root - INFO - lr: 1.3952e-05 gnorm: 1.17 [17:21:53< 7:08:24] +[titan] 2025-10-05 15:56:12,817 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:56:15,006 - root - INFO - step: 28350 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 15:56:15,006 - root - INFO - lr: 1.3944e-05 gnorm: 1.14 [17:22:04< 7:08:13] +[titan] 2025-10-05 15:56:25,936 - root - INFO - step: 28355 loss: 1.9838 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 15:56:25,936 - root - INFO - lr: 1.3937e-05 gnorm: 1.10 [17:22:15< 7:08:02] +[titan] 2025-10-05 15:56:36,882 - root - INFO - step: 28360 loss: 2.0896 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 15:56:36,882 - root - INFO - lr: 1.3930e-05 gnorm: 1.15 [17:22:26< 7:07:51] +[titan] 2025-10-05 15:56:47,760 - root - INFO - step: 28365 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 15:56:47,760 - root - INFO - lr: 1.3923e-05 gnorm: 1.11 [17:22:36< 7:07:40] +[titan] 2025-10-05 15:56:58,635 - root - INFO - step: 28370 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 15:56:58,635 - root - INFO - lr: 1.3916e-05 gnorm: 1.09 [17:22:47< 7:07:29] +[titan] 2025-10-05 15:57:09,503 - root - INFO - step: 28375 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:57:09,503 - root - INFO - lr: 1.3909e-05 gnorm: 1.05 [17:22:58< 7:07:17] +[titan] 2025-10-05 15:57:20,365 - root - INFO - step: 28380 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7154 +[titan] 2025-10-05 15:57:20,365 - root - INFO - lr: 1.3902e-05 gnorm: 1.13 [17:23:09< 7:07:06] +[titan] 2025-10-05 15:57:31,331 - root - INFO - step: 28385 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:57:31,331 - root - INFO - lr: 1.3895e-05 gnorm: 1.12 [17:23:20< 7:06:55] +[titan] 2025-10-05 15:57:42,204 - root - INFO - step: 28390 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 15:57:42,205 - root - INFO - lr: 1.3888e-05 gnorm: 1.13 [17:23:31< 7:06:44] +[titan] 2025-10-05 15:57:53,067 - root - INFO - step: 28395 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:57:53,067 - root - INFO - lr: 1.3880e-05 gnorm: 1.09 [17:23:42< 7:06:33] +[titan] 2025-10-05 15:58:01,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:58:03,923 - root - INFO - step: 28400 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8317 +[titan] 2025-10-05 15:58:03,923 - root - INFO - lr: 1.3873e-05 gnorm: 1.11 [17:23:53< 7:06:22] +[titan] 2025-10-05 15:58:14,796 - root - INFO - step: 28405 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 15:58:14,796 - root - INFO - lr: 1.3866e-05 gnorm: 1.09 [17:24:04< 7:06:11] +[titan] 2025-10-05 15:58:25,653 - root - INFO - step: 28410 loss: 1.9984 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7669 +[titan] 2025-10-05 15:58:25,653 - root - INFO - lr: 1.3859e-05 gnorm: 1.10 [17:24:14< 7:06:00] +[titan] 2025-10-05 15:58:36,589 - root - INFO - step: 28415 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 15:58:36,589 - root - INFO - lr: 1.3852e-05 gnorm: 1.13 [17:24:25< 7:05:49] +[titan] 2025-10-05 15:58:47,471 - root - INFO - step: 28420 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7957 +[titan] 2025-10-05 15:58:47,471 - root - INFO - lr: 1.3845e-05 gnorm: 1.10 [17:24:36< 7:05:38] +[titan] 2025-10-05 15:58:58,364 - root - INFO - step: 28425 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 15:58:58,364 - root - INFO - lr: 1.3838e-05 gnorm: 1.14 [17:24:47< 7:05:27] +[titan] 2025-10-05 15:59:09,235 - root - INFO - step: 28430 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 15:59:09,235 - root - INFO - lr: 1.3831e-05 gnorm: 1.13 [17:24:58< 7:05:16] +[titan] 2025-10-05 15:59:20,106 - root - INFO - step: 28435 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 15:59:20,106 - root - INFO - lr: 1.3824e-05 gnorm: 1.13 [17:25:09< 7:05:04] +[titan] 2025-10-05 15:59:31,054 - root - INFO - step: 28440 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7996 +[titan] 2025-10-05 15:59:31,054 - root - INFO - lr: 1.3817e-05 gnorm: 1.10 [17:25:20< 7:04:53] +[titan] 2025-10-05 15:59:41,947 - root - INFO - step: 28445 loss: 2.0638 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 15:59:41,947 - root - INFO - lr: 1.3810e-05 gnorm: 1.16 [17:25:31< 7:04:42] +[titan] 2025-10-05 15:59:50,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:59:52,821 - root - INFO - step: 28450 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 15:59:52,821 - root - INFO - lr: 1.3802e-05 gnorm: 1.11 [17:25:42< 7:04:31] +[titan] 2025-10-05 16:00:03,693 - root - INFO - step: 28455 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 16:00:03,693 - root - INFO - lr: 1.3795e-05 gnorm: 1.09 [17:25:52< 7:04:20] +[titan] 2025-10-05 16:00:14,540 - root - INFO - step: 28460 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 16:00:14,540 - root - INFO - lr: 1.3788e-05 gnorm: 1.09 [17:26:03< 7:04:09] +[titan] 2025-10-05 16:00:25,402 - root - INFO - step: 28465 loss: 2.0314 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 16:00:25,402 - root - INFO - lr: 1.3781e-05 gnorm: 1.12 [17:26:14< 7:03:58] +[titan] 2025-10-05 16:00:36,344 - root - INFO - step: 28470 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:00:36,344 - root - INFO - lr: 1.3774e-05 gnorm: 1.11 [17:26:25< 7:03:47] +[titan] 2025-10-05 16:00:47,202 - root - INFO - step: 28475 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:00:47,202 - root - INFO - lr: 1.3767e-05 gnorm: 1.13 [17:26:36< 7:03:36] +[titan] 2025-10-05 16:00:58,120 - root - INFO - step: 28480 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 16:00:58,120 - root - INFO - lr: 1.3760e-05 gnorm: 1.14 [17:26:47< 7:03:25] +[titan] 2025-10-05 16:01:09,012 - root - INFO - step: 28485 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 16:01:09,012 - root - INFO - lr: 1.3753e-05 gnorm: 1.14 [17:26:58< 7:03:14] +[titan] 2025-10-05 16:01:19,903 - root - INFO - step: 28490 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 16:01:19,903 - root - INFO - lr: 1.3746e-05 gnorm: 1.12 [17:27:09< 7:03:03] +[titan] 2025-10-05 16:01:30,819 - root - INFO - step: 28495 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 16:01:30,819 - root - INFO - lr: 1.3739e-05 gnorm: 1.18 [17:27:20< 7:02:51] +[titan] 2025-10-05 16:01:39,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:01:41,746 - root - INFO - step: 28500 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:01:41,746 - root - INFO - lr: 1.3732e-05 gnorm: 1.13 [17:27:30< 7:02:40] +[titan] 2025-10-05 16:01:52,631 - root - INFO - step: 28505 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7896 +[titan] 2025-10-05 16:01:52,632 - root - INFO - lr: 1.3725e-05 gnorm: 1.13 [17:27:41< 7:02:29] +[titan] 2025-10-05 16:02:03,551 - root - INFO - step: 28510 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:02:03,551 - root - INFO - lr: 1.3718e-05 gnorm: 1.12 [17:27:52< 7:02:18] +[titan] 2025-10-05 16:02:14,435 - root - INFO - step: 28515 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 16:02:14,435 - root - INFO - lr: 1.3711e-05 gnorm: 1.16 [17:28:03< 7:02:07] +[titan] 2025-10-05 16:02:25,309 - root - INFO - step: 28520 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 16:02:25,310 - root - INFO - lr: 1.3704e-05 gnorm: 1.09 [17:28:14< 7:01:56] +[titan] 2025-10-05 16:02:36,209 - root - INFO - step: 28525 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 16:02:36,209 - root - INFO - lr: 1.3696e-05 gnorm: 1.09 [17:28:25< 7:01:45] +[titan] 2025-10-05 16:02:47,089 - root - INFO - step: 28530 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 16:02:47,089 - root - INFO - lr: 1.3689e-05 gnorm: 1.11 [17:28:36< 7:01:34] +[titan] 2025-10-05 16:02:57,982 - root - INFO - step: 28535 loss: 2.0168 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 16:02:57,982 - root - INFO - lr: 1.3682e-05 gnorm: 1.14 [17:28:47< 7:01:23] +[titan] 2025-10-05 16:03:08,840 - root - INFO - step: 28540 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 16:03:08,840 - root - INFO - lr: 1.3675e-05 gnorm: 1.13 [17:28:58< 7:01:12] +[titan] 2025-10-05 16:03:19,756 - root - INFO - step: 28545 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 16:03:19,756 - root - INFO - lr: 1.3668e-05 gnorm: 1.15 [17:29:08< 7:01:01] +[titan] 2025-10-05 16:03:28,457 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:03:30,650 - root - INFO - step: 28550 loss: 1.9538 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:03:30,650 - root - INFO - lr: 1.3661e-05 gnorm: 1.08 [17:29:19< 7:00:50] +[titan] 2025-10-05 16:03:41,553 - root - INFO - step: 28555 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 16:03:41,553 - root - INFO - lr: 1.3654e-05 gnorm: 1.11 [17:29:30< 7:00:39] +[titan] 2025-10-05 16:03:52,429 - root - INFO - step: 28560 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 16:03:52,429 - root - INFO - lr: 1.3647e-05 gnorm: 1.07 [17:29:41< 7:00:27] +[titan] 2025-10-05 16:04:03,288 - root - INFO - step: 28565 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 16:04:03,288 - root - INFO - lr: 1.3640e-05 gnorm: 1.10 [17:29:52< 7:00:16] +[titan] 2025-10-05 16:04:14,124 - root - INFO - step: 28570 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:04:14,124 - root - INFO - lr: 1.3633e-05 gnorm: 1.11 [17:30:03< 7:00:05] +[titan] 2025-10-05 16:04:25,006 - root - INFO - step: 28575 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 16:04:25,006 - root - INFO - lr: 1.3626e-05 gnorm: 1.95 [17:30:14< 6:59:54] +[titan] 2025-10-05 16:04:35,875 - root - INFO - step: 28580 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:04:35,876 - root - INFO - lr: 1.3619e-05 gnorm: 1.09 [17:30:25< 6:59:43] +[titan] 2025-10-05 16:04:46,735 - root - INFO - step: 28585 loss: 1.9918 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 16:04:46,735 - root - INFO - lr: 1.3612e-05 gnorm: 1.12 [17:30:35< 6:59:32] +[titan] 2025-10-05 16:04:57,585 - root - INFO - step: 28590 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 16:04:57,585 - root - INFO - lr: 1.3605e-05 gnorm: 1.06 [17:30:46< 6:59:21] +[titan] 2025-10-05 16:05:08,445 - root - INFO - step: 28595 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 16:05:08,445 - root - INFO - lr: 1.3598e-05 gnorm: 1.15 [17:30:57< 6:59:10] +[titan] 2025-10-05 16:05:17,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:05:19,314 - root - INFO - step: 28600 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:05:19,314 - root - INFO - lr: 1.3591e-05 gnorm: 1.10 [17:31:08< 6:58:59] +[titan] 2025-10-05 16:05:30,213 - root - INFO - step: 28605 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 16:05:30,213 - root - INFO - lr: 1.3584e-05 gnorm: 1.11 [17:31:19< 6:58:48] +[titan] 2025-10-05 16:05:41,137 - root - INFO - step: 28610 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 16:05:41,137 - root - INFO - lr: 1.3577e-05 gnorm: 1.11 [17:31:30< 6:58:37] +[titan] 2025-10-05 16:05:52,006 - root - INFO - step: 28615 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:05:52,007 - root - INFO - lr: 1.3570e-05 gnorm: 1.10 [17:31:41< 6:58:25] +[titan] 2025-10-05 16:06:02,858 - root - INFO - step: 28620 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 16:06:02,859 - root - INFO - lr: 1.3563e-05 gnorm: 1.12 [17:31:52< 6:58:14] +[titan] 2025-10-05 16:06:13,712 - root - INFO - step: 28625 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 16:06:13,712 - root - INFO - lr: 1.3556e-05 gnorm: 1.10 [17:32:02< 6:58:03] +[titan] 2025-10-05 16:06:24,582 - root - INFO - step: 28630 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 16:06:24,582 - root - INFO - lr: 1.3549e-05 gnorm: 1.11 [17:32:13< 6:57:52] +[titan] 2025-10-05 16:06:35,472 - root - INFO - step: 28635 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7192 +[titan] 2025-10-05 16:06:35,472 - root - INFO - lr: 1.3542e-05 gnorm: 1.12 [17:32:24< 6:57:41] +[titan] 2025-10-05 16:06:46,399 - root - INFO - step: 28640 loss: 2.0089 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7766 +[titan] 2025-10-05 16:06:46,399 - root - INFO - lr: 1.3535e-05 gnorm: 1.13 [17:32:35< 6:57:30] +[titan] 2025-10-05 16:06:57,266 - root - INFO - step: 28645 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 16:06:57,266 - root - INFO - lr: 1.3528e-05 gnorm: 1.16 [17:32:46< 6:57:19] +[titan] 2025-10-05 16:07:05,934 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:07:08,125 - root - INFO - step: 28650 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 16:07:08,125 - root - INFO - lr: 1.3521e-05 gnorm: 1.08 [17:32:57< 6:57:08] +[titan] 2025-10-05 16:07:18,989 - root - INFO - step: 28655 loss: 1.9921 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 16:07:18,989 - root - INFO - lr: 1.3514e-05 gnorm: 1.13 [17:33:08< 6:56:57] +[titan] 2025-10-05 16:07:29,863 - root - INFO - step: 28660 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:07:29,863 - root - INFO - lr: 1.3507e-05 gnorm: 1.11 [17:33:19< 6:56:46] +[titan] 2025-10-05 16:07:40,776 - root - INFO - step: 28665 loss: 2.0607 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8160 +[titan] 2025-10-05 16:07:40,776 - root - INFO - lr: 1.3500e-05 gnorm: 1.24 [17:33:29< 6:56:35] +[titan] 2025-10-05 16:07:51,762 - root - INFO - step: 28670 loss: 2.0573 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.81 mfu: 41.84% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 16:07:51,762 - root - INFO - lr: 1.3493e-05 gnorm: 1.20 [17:33:40< 6:56:24] +[titan] 2025-10-05 16:07:56,294 - root - INFO - Dumping profiler traces at step 28672 +[titan] 2025-10-05 16:07:56,335 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:08:02,863 - root - INFO - step: 28675 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,520 tflops: 409.54 mfu: 41.41% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 16:08:02,863 - root - INFO - lr: 1.3486e-05 gnorm: 1.09 [17:33:52< 6:56:13] +[titan] 2025-10-05 16:08:13,727 - root - INFO - step: 28680 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 16:08:13,728 - root - INFO - lr: 1.3479e-05 gnorm: 1.10 [17:34:02< 6:56:01] +[titan] 2025-10-05 16:08:24,599 - root - INFO - step: 28685 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7697 +[titan] 2025-10-05 16:08:24,599 - root - INFO - lr: 1.3472e-05 gnorm: 1.11 [17:34:13< 6:55:50] +[titan] 2025-10-05 16:08:35,472 - root - INFO - step: 28690 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 16:08:35,472 - root - INFO - lr: 1.3465e-05 gnorm: 1.12 [17:34:24< 6:55:39] +[titan] 2025-10-05 16:08:46,365 - root - INFO - step: 28695 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:08:46,366 - root - INFO - lr: 1.3458e-05 gnorm: 1.10 [17:34:35< 6:55:28] +[titan] 2025-10-05 16:08:55,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:08:57,207 - root - INFO - step: 28700 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 16:08:57,207 - root - INFO - lr: 1.3451e-05 gnorm: 1.16 [17:34:46< 6:55:17] +[titan] 2025-10-05 16:09:08,084 - root - INFO - step: 28705 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 16:09:08,084 - root - INFO - lr: 1.3444e-05 gnorm: 1.11 [17:34:57< 6:55:06] +[titan] 2025-10-05 16:09:18,920 - root - INFO - step: 28710 loss: 1.8967 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 16:09:18,920 - root - INFO - lr: 1.3437e-05 gnorm: 1.12 [17:35:08< 6:54:55] +[titan] 2025-10-05 16:09:29,743 - root - INFO - step: 28715 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 16:09:29,743 - root - INFO - lr: 1.3430e-05 gnorm: 1.24 [17:35:18< 6:54:44] +[titan] 2025-10-05 16:09:40,610 - root - INFO - step: 28720 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:09:40,610 - root - INFO - lr: 1.3423e-05 gnorm: 1.10 [17:35:29< 6:54:33] +[titan] 2025-10-05 16:09:51,475 - root - INFO - step: 28725 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 16:09:51,475 - root - INFO - lr: 1.3416e-05 gnorm: 1.07 [17:35:40< 6:54:22] +[titan] 2025-10-05 16:10:02,333 - root - INFO - step: 28730 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:10:02,333 - root - INFO - lr: 1.3409e-05 gnorm: 1.13 [17:35:51< 6:54:11] +[titan] 2025-10-05 16:10:13,264 - root - INFO - step: 28735 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:10:13,265 - root - INFO - lr: 1.3402e-05 gnorm: 1.14 [17:36:02< 6:54:00] +[titan] 2025-10-05 16:10:24,137 - root - INFO - step: 28740 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:10:24,137 - root - INFO - lr: 1.3395e-05 gnorm: 1.07 [17:36:13< 6:53:48] +[titan] 2025-10-05 16:10:34,996 - root - INFO - step: 28745 loss: 2.0343 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7979 +[titan] 2025-10-05 16:10:34,996 - root - INFO - lr: 1.3389e-05 gnorm: 1.14 [17:36:24< 6:53:37] +[titan] 2025-10-05 16:10:43,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:10:46,124 - root - INFO - step: 28750 loss: 2.0411 memory: 118.84GiB(85.28%) tps: 29,446 tflops: 408.52 mfu: 41.31% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 16:10:46,125 - root - INFO - lr: 1.3382e-05 gnorm: 1.10 [17:36:35< 6:53:26] +[titan] 2025-10-05 16:10:56,975 - root - INFO - step: 28755 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7186 +[titan] 2025-10-05 16:10:56,975 - root - INFO - lr: 1.3375e-05 gnorm: 1.11 [17:36:46< 6:53:15] +[titan] 2025-10-05 16:11:07,804 - root - INFO - step: 28760 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 16:11:07,804 - root - INFO - lr: 1.3368e-05 gnorm: 1.13 [17:36:56< 6:53:04] +[titan] 2025-10-05 16:11:18,644 - root - INFO - step: 28765 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 16:11:18,644 - root - INFO - lr: 1.3361e-05 gnorm: 1.14 [17:37:07< 6:52:53] +[titan] 2025-10-05 16:11:29,465 - root - INFO - step: 28770 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 16:11:29,465 - root - INFO - lr: 1.3354e-05 gnorm: 1.11 [17:37:18< 6:52:42] +[titan] 2025-10-05 16:11:40,342 - root - INFO - step: 28775 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 16:11:40,342 - root - INFO - lr: 1.3347e-05 gnorm: 1.10 [17:37:29< 6:52:31] +[titan] 2025-10-05 16:11:51,163 - root - INFO - step: 28780 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 16:11:51,163 - root - INFO - lr: 1.3340e-05 gnorm: 1.10 [17:37:40< 6:52:20] +[titan] 2025-10-05 16:12:01,972 - root - INFO - step: 28785 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:12:01,972 - root - INFO - lr: 1.3333e-05 gnorm: 1.09 [17:37:51< 6:52:09] +[titan] 2025-10-05 16:12:12,796 - root - INFO - step: 28790 loss: 2.0542 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 16:12:12,796 - root - INFO - lr: 1.3326e-05 gnorm: 1.14 [17:38:01< 6:51:58] +[titan] 2025-10-05 16:12:23,627 - root - INFO - step: 28795 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 16:12:23,627 - root - INFO - lr: 1.3319e-05 gnorm: 5.74 [17:38:12< 6:51:46] +[titan] 2025-10-05 16:12:32,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:12:34,510 - root - INFO - step: 28800 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 16:12:34,510 - root - INFO - lr: 1.3312e-05 gnorm: 1.12 [17:38:23< 6:51:35] +[titan] 2025-10-05 16:12:45,424 - root - INFO - step: 28805 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 16:12:45,424 - root - INFO - lr: 1.3305e-05 gnorm: 1.12 [17:38:34< 6:51:24] +[titan] 2025-10-05 16:12:56,285 - root - INFO - step: 28810 loss: 1.9337 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 16:12:56,285 - root - INFO - lr: 1.3298e-05 gnorm: 1.11 [17:38:45< 6:51:13] +[titan] 2025-10-05 16:13:07,115 - root - INFO - step: 28815 loss: 2.0821 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8406 +[titan] 2025-10-05 16:13:07,115 - root - INFO - lr: 1.3291e-05 gnorm: 1.14 [17:38:56< 6:51:02] +[titan] 2025-10-05 16:13:17,934 - root - INFO - step: 28820 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 16:13:17,934 - root - INFO - lr: 1.3284e-05 gnorm: 1.14 [17:39:07< 6:50:51] +[titan] 2025-10-05 16:13:28,784 - root - INFO - step: 28825 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7000 +[titan] 2025-10-05 16:13:28,784 - root - INFO - lr: 1.3278e-05 gnorm: 1.09 [17:39:17< 6:50:40] +[titan] 2025-10-05 16:13:39,674 - root - INFO - step: 28830 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 16:13:39,675 - root - INFO - lr: 1.3271e-05 gnorm: 1.13 [17:39:28< 6:50:29] +[titan] 2025-10-05 16:13:50,584 - root - INFO - step: 28835 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 16:13:50,584 - root - INFO - lr: 1.3264e-05 gnorm: 1.14 [17:39:39< 6:50:18] +[titan] 2025-10-05 16:14:01,435 - root - INFO - step: 28840 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 16:14:01,435 - root - INFO - lr: 1.3257e-05 gnorm: 1.11 [17:39:50< 6:50:07] +[titan] 2025-10-05 16:14:12,264 - root - INFO - step: 28845 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 16:14:12,264 - root - INFO - lr: 1.3250e-05 gnorm: 1.14 [17:40:01< 6:49:56] +[titan] 2025-10-05 16:14:20,892 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:14:23,077 - root - INFO - step: 28850 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,305 tflops: 420.44 mfu: 42.51% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 16:14:23,077 - root - INFO - lr: 1.3243e-05 gnorm: 1.13 [17:40:12< 6:49:44] +[titan] 2025-10-05 16:14:33,903 - root - INFO - step: 28855 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 16:14:33,903 - root - INFO - lr: 1.3236e-05 gnorm: 1.10 [17:40:23< 6:49:33] +[titan] 2025-10-05 16:14:44,758 - root - INFO - step: 28860 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 16:14:44,758 - root - INFO - lr: 1.3229e-05 gnorm: 1.18 [17:40:33< 6:49:22] +[titan] 2025-10-05 16:14:55,624 - root - INFO - step: 28865 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 16:14:55,624 - root - INFO - lr: 1.3222e-05 gnorm: 1.13 [17:40:44< 6:49:11] +[titan] 2025-10-05 16:15:06,429 - root - INFO - step: 28870 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,328 tflops: 420.76 mfu: 42.54% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 16:15:06,429 - root - INFO - lr: 1.3215e-05 gnorm: 1.12 [17:40:55< 6:49:00] +[titan] 2025-10-05 16:15:17,255 - root - INFO - step: 28875 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 16:15:17,255 - root - INFO - lr: 1.3209e-05 gnorm: 1.14 [17:41:06< 6:48:49] +[titan] 2025-10-05 16:15:28,083 - root - INFO - step: 28880 loss: 2.0444 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8077 +[titan] 2025-10-05 16:15:28,084 - root - INFO - lr: 1.3202e-05 gnorm: 1.11 [17:41:17< 6:48:38] +[titan] 2025-10-05 16:15:38,890 - root - INFO - step: 28885 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 16:15:38,890 - root - INFO - lr: 1.3195e-05 gnorm: 1.12 [17:41:28< 6:48:27] +[titan] 2025-10-05 16:15:49,744 - root - INFO - step: 28890 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7735 +[titan] 2025-10-05 16:15:49,744 - root - INFO - lr: 1.3188e-05 gnorm: 1.17 [17:41:38< 6:48:16] +[titan] 2025-10-05 16:16:00,598 - root - INFO - step: 28895 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 16:16:00,598 - root - INFO - lr: 1.3181e-05 gnorm: 1.11 [17:41:49< 6:48:05] +[titan] 2025-10-05 16:16:09,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:16:11,420 - root - INFO - step: 28900 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 16:16:11,421 - root - INFO - lr: 1.3174e-05 gnorm: 1.14 [17:42:00< 6:47:53] +[titan] 2025-10-05 16:16:22,244 - root - INFO - step: 28905 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:16:22,245 - root - INFO - lr: 1.3167e-05 gnorm: 1.10 [17:42:11< 6:47:42] +[titan] 2025-10-05 16:16:33,059 - root - INFO - step: 28910 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 16:16:33,059 - root - INFO - lr: 1.3160e-05 gnorm: 1.14 [17:42:22< 6:47:31] +[titan] 2025-10-05 16:16:43,886 - root - INFO - step: 28915 loss: 1.9331 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7085 +[titan] 2025-10-05 16:16:43,887 - root - INFO - lr: 1.3153e-05 gnorm: 1.09 [17:42:33< 6:47:20] +[titan] 2025-10-05 16:16:54,738 - root - INFO - step: 28920 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 16:16:54,738 - root - INFO - lr: 1.3147e-05 gnorm: 1.09 [17:42:43< 6:47:09] +[titan] 2025-10-05 16:17:05,576 - root - INFO - step: 28925 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 16:17:05,576 - root - INFO - lr: 1.3140e-05 gnorm: 1.13 [17:42:54< 6:46:58] +[titan] 2025-10-05 16:17:16,436 - root - INFO - step: 28930 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 16:17:16,436 - root - INFO - lr: 1.3133e-05 gnorm: 1.12 [17:43:05< 6:46:47] +[titan] 2025-10-05 16:17:27,262 - root - INFO - step: 28935 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 16:17:27,263 - root - INFO - lr: 1.3126e-05 gnorm: 1.13 [17:43:16< 6:46:36] +[titan] 2025-10-05 16:17:38,068 - root - INFO - step: 28940 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 16:17:38,069 - root - INFO - lr: 1.3119e-05 gnorm: 1.12 [17:43:27< 6:46:25] +[titan] 2025-10-05 16:17:48,901 - root - INFO - step: 28945 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 16:17:48,902 - root - INFO - lr: 1.3112e-05 gnorm: 1.11 [17:43:38< 6:46:14] +[titan] 2025-10-05 16:17:57,530 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:17:59,707 - root - INFO - step: 28950 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 16:17:59,708 - root - INFO - lr: 1.3105e-05 gnorm: 1.13 [17:43:48< 6:46:03] +[titan] 2025-10-05 16:18:10,530 - root - INFO - step: 28955 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 16:18:10,531 - root - INFO - lr: 1.3099e-05 gnorm: 1.13 [17:43:59< 6:45:51] +[titan] 2025-10-05 16:18:21,362 - root - INFO - step: 28960 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7806 +[titan] 2025-10-05 16:18:21,362 - root - INFO - lr: 1.3092e-05 gnorm: 1.11 [17:44:10< 6:45:40] +[titan] 2025-10-05 16:18:32,177 - root - INFO - step: 28965 loss: 2.0315 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 16:18:32,178 - root - INFO - lr: 1.3085e-05 gnorm: 1.15 [17:44:21< 6:45:29] +[titan] 2025-10-05 16:18:43,001 - root - INFO - step: 28970 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7932 +[titan] 2025-10-05 16:18:43,002 - root - INFO - lr: 1.3078e-05 gnorm: 1.12 [17:44:32< 6:45:18] +[titan] 2025-10-05 16:18:53,823 - root - INFO - step: 28975 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7847 +[titan] 2025-10-05 16:18:53,823 - root - INFO - lr: 1.3071e-05 gnorm: 1.15 [17:44:42< 6:45:07] +[titan] 2025-10-05 16:19:04,658 - root - INFO - step: 28980 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7964 +[titan] 2025-10-05 16:19:04,658 - root - INFO - lr: 1.3064e-05 gnorm: 1.09 [17:44:53< 6:44:56] +[titan] 2025-10-05 16:19:15,441 - root - INFO - step: 28985 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,390 tflops: 421.61 mfu: 42.63% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 16:19:15,441 - root - INFO - lr: 1.3057e-05 gnorm: 1.14 [17:45:04< 6:44:45] +[titan] 2025-10-05 16:19:26,267 - root - INFO - step: 28990 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:19:26,267 - root - INFO - lr: 1.3051e-05 gnorm: 1.12 [17:45:15< 6:44:34] +[titan] 2025-10-05 16:19:37,046 - root - INFO - step: 28995 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,401 tflops: 421.76 mfu: 42.65% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 16:19:37,046 - root - INFO - lr: 1.3044e-05 gnorm: 1.09 [17:45:26< 6:44:23] +[titan] 2025-10-05 16:19:45,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:19:47,884 - root - INFO - step: 29000 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 16:19:47,884 - root - INFO - lr: 1.3037e-05 gnorm: 1.10 [17:45:37< 6:44:11] +[titan] 2025-10-05 16:19:58,682 - root - INFO - step: 29005 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 16:19:58,682 - root - INFO - lr: 1.3030e-05 gnorm: 1.14 [17:45:47< 6:44:00] +[titan] 2025-10-05 16:20:09,482 - root - INFO - step: 29010 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 16:20:09,482 - root - INFO - lr: 1.3023e-05 gnorm: 1.08 [17:45:58< 6:43:49] +[titan] 2025-10-05 16:20:20,322 - root - INFO - step: 29015 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 16:20:20,323 - root - INFO - lr: 1.3016e-05 gnorm: 1.11 [17:46:09< 6:43:38] +[titan] 2025-10-05 16:20:31,122 - root - INFO - step: 29020 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 16:20:31,122 - root - INFO - lr: 1.3010e-05 gnorm: 1.14 [17:46:20< 6:43:27] +[titan] 2025-10-05 16:20:42,001 - root - INFO - step: 29025 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:20:42,001 - root - INFO - lr: 1.3003e-05 gnorm: 1.11 [17:46:31< 6:43:16] +[titan] 2025-10-05 16:20:52,862 - root - INFO - step: 29030 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 16:20:52,862 - root - INFO - lr: 1.2996e-05 gnorm: 1.17 [17:46:42< 6:43:05] +[titan] 2025-10-05 16:21:03,692 - root - INFO - step: 29035 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8113 +[titan] 2025-10-05 16:21:03,692 - root - INFO - lr: 1.2989e-05 gnorm: 1.19 [17:46:52< 6:42:54] +[titan] 2025-10-05 16:21:14,546 - root - INFO - step: 29040 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 16:21:14,546 - root - INFO - lr: 1.2982e-05 gnorm: 1.13 [17:47:03< 6:42:43] +[titan] 2025-10-05 16:21:25,382 - root - INFO - step: 29045 loss: 2.0710 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 16:21:25,382 - root - INFO - lr: 1.2975e-05 gnorm: 1.11 [17:47:14< 6:42:32] +[titan] 2025-10-05 16:21:34,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:21:36,183 - root - INFO - step: 29050 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7903 +[titan] 2025-10-05 16:21:36,183 - root - INFO - lr: 1.2969e-05 gnorm: 1.15 [17:47:25< 6:42:21] +[titan] 2025-10-05 16:21:47,040 - root - INFO - step: 29055 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 16:21:47,040 - root - INFO - lr: 1.2962e-05 gnorm: 1.13 [17:47:36< 6:42:09] +[titan] 2025-10-05 16:21:57,945 - root - INFO - step: 29060 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 16:21:57,945 - root - INFO - lr: 1.2955e-05 gnorm: 1.09 [17:47:47< 6:41:58] +[titan] 2025-10-05 16:22:08,763 - root - INFO - step: 29065 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:22:08,763 - root - INFO - lr: 1.2948e-05 gnorm: 1.10 [17:47:57< 6:41:47] +[titan] 2025-10-05 16:22:19,602 - root - INFO - step: 29070 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 16:22:19,602 - root - INFO - lr: 1.2941e-05 gnorm: 1.12 [17:48:08< 6:41:36] +[titan] 2025-10-05 16:22:30,424 - root - INFO - step: 29075 loss: 1.9436 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.11 mfu: 42.48% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 16:22:30,424 - root - INFO - lr: 1.2935e-05 gnorm: 1.08 [17:48:19< 6:41:25] +[titan] 2025-10-05 16:22:41,221 - root - INFO - step: 29080 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.04 mfu: 42.57% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 16:22:41,221 - root - INFO - lr: 1.2928e-05 gnorm: 1.12 [17:48:30< 6:41:14] +[titan] 2025-10-05 16:22:52,143 - root - INFO - step: 29085 loss: 2.0455 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 16:22:52,143 - root - INFO - lr: 1.2921e-05 gnorm: 1.13 [17:48:41< 6:41:03] +[titan] 2025-10-05 16:23:02,963 - root - INFO - step: 29090 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:23:02,964 - root - INFO - lr: 1.2914e-05 gnorm: 1.15 [17:48:52< 6:40:52] +[titan] 2025-10-05 16:23:13,781 - root - INFO - step: 29095 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7612 +[titan] 2025-10-05 16:23:13,781 - root - INFO - lr: 1.2907e-05 gnorm: 1.12 [17:49:02< 6:40:41] +[titan] 2025-10-05 16:23:22,389 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:23:24,561 - root - INFO - step: 29100 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 16:23:24,562 - root - INFO - lr: 1.2901e-05 gnorm: 1.15 [17:49:13< 6:40:30] +[titan] 2025-10-05 16:23:35,362 - root - INFO - step: 29105 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:23:35,363 - root - INFO - lr: 1.2894e-05 gnorm: 1.15 [17:49:24< 6:40:18] +[titan] 2025-10-05 16:23:46,147 - root - INFO - step: 29110 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,385 tflops: 421.54 mfu: 42.62% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 16:23:46,147 - root - INFO - lr: 1.2887e-05 gnorm: 1.13 [17:49:35< 6:40:07] +[titan] 2025-10-05 16:23:56,986 - root - INFO - step: 29115 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 16:23:56,986 - root - INFO - lr: 1.2880e-05 gnorm: 1.11 [17:49:46< 6:39:56] +[titan] 2025-10-05 16:24:07,804 - root - INFO - step: 29120 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 16:24:07,804 - root - INFO - lr: 1.2873e-05 gnorm: 1.09 [17:49:56< 6:39:45] +[titan] 2025-10-05 16:24:18,657 - root - INFO - step: 29125 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 16:24:18,657 - root - INFO - lr: 1.2867e-05 gnorm: 1.08 [17:50:07< 6:39:34] +[titan] 2025-10-05 16:24:29,461 - root - INFO - step: 29130 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 16:24:29,461 - root - INFO - lr: 1.2860e-05 gnorm: 1.11 [17:50:18< 6:39:23] +[titan] 2025-10-05 16:24:40,248 - root - INFO - step: 29135 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,379 tflops: 421.46 mfu: 42.61% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:24:40,248 - root - INFO - lr: 1.2853e-05 gnorm: 1.23 [17:50:29< 6:39:12] +[titan] 2025-10-05 16:24:51,066 - root - INFO - step: 29140 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 16:24:51,066 - root - INFO - lr: 1.2846e-05 gnorm: 1.11 [17:50:40< 6:39:01] +[titan] 2025-10-05 16:25:01,882 - root - INFO - step: 29145 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.33 mfu: 42.50% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 16:25:01,882 - root - INFO - lr: 1.2840e-05 gnorm: 1.14 [17:50:51< 6:38:50] +[titan] 2025-10-05 16:25:10,529 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:25:12,729 - root - INFO - step: 29150 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:12,730 - root - INFO - lr: 1.2833e-05 gnorm: 1.16 [17:51:01< 6:38:39] +[titan] 2025-10-05 16:25:23,552 - root - INFO - step: 29155 loss: 1.9771 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:25:23,552 - root - INFO - lr: 1.2826e-05 gnorm: 1.11 [17:51:12< 6:38:27] +[titan] 2025-10-05 16:25:34,364 - root - INFO - step: 29160 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:34,364 - root - INFO - lr: 1.2819e-05 gnorm: 1.13 [17:51:23< 6:38:16] +[titan] 2025-10-05 16:25:45,141 - root - INFO - step: 29165 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:25:45,141 - root - INFO - lr: 1.2813e-05 gnorm: 1.10 [17:51:34< 6:38:05] +[titan] 2025-10-05 16:25:55,942 - root - INFO - step: 29170 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 16:25:55,943 - root - INFO - lr: 1.2806e-05 gnorm: 1.12 [17:51:45< 6:37:54] +[titan] 2025-10-05 16:26:06,754 - root - INFO - step: 29175 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:26:06,754 - root - INFO - lr: 1.2799e-05 gnorm: 1.13 [17:51:55< 6:37:43] +[titan] 2025-10-05 16:26:17,565 - root - INFO - step: 29180 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,310 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 16:26:17,566 - root - INFO - lr: 1.2792e-05 gnorm: 1.11 [17:52:06< 6:37:32] +[titan] 2025-10-05 16:26:26,495 - root - INFO - Dumping profiler traces at step 29184 +[titan] 2025-10-05 16:26:26,530 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:26:28,703 - root - INFO - step: 29185 loss: 2.0239 memory: 118.84GiB(85.28%) tps: 29,423 tflops: 408.20 mfu: 41.27% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:26:28,703 - root - INFO - lr: 1.2786e-05 gnorm: 1.13 [17:52:17< 6:37:21] +[titan] 2025-10-05 16:26:39,480 - root - INFO - step: 29190 loss: 2.0459 memory: 118.84GiB(85.28%) tps: 30,405 tflops: 421.82 mfu: 42.65% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 16:26:39,480 - root - INFO - lr: 1.2779e-05 gnorm: 1.08 [17:52:28< 6:37:10] +[titan] 2025-10-05 16:26:50,281 - root - INFO - step: 29195 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 16:26:50,282 - root - INFO - lr: 1.2772e-05 gnorm: 1.11 [17:52:39< 6:36:59] +[titan] 2025-10-05 16:26:58,914 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:27:01,083 - root - INFO - step: 29200 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:27:01,084 - root - INFO - lr: 1.2765e-05 gnorm: 1.10 [17:52:50< 6:36:48] +[titan] 2025-10-05 16:27:11,900 - root - INFO - step: 29205 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,295 tflops: 420.29 mfu: 42.50% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:27:11,900 - root - INFO - lr: 1.2759e-05 gnorm: 1.11 [17:53:01< 6:36:37] +[titan] 2025-10-05 16:27:22,704 - root - INFO - step: 29210 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:27:22,704 - root - INFO - lr: 1.2752e-05 gnorm: 1.13 [17:53:11< 6:36:25] +[titan] 2025-10-05 16:27:33,520 - root - INFO - step: 29215 loss: 1.9806 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 16:27:33,520 - root - INFO - lr: 1.2745e-05 gnorm: 1.13 [17:53:22< 6:36:14] +[titan] 2025-10-05 16:27:44,343 - root - INFO - step: 29220 loss: 2.0330 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:27:44,344 - root - INFO - lr: 1.2738e-05 gnorm: 1.11 [17:53:33< 6:36:03] +[titan] 2025-10-05 16:27:55,246 - root - INFO - step: 29225 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 16:27:55,246 - root - INFO - lr: 1.2732e-05 gnorm: 1.14 [17:53:44< 6:35:52] +[titan] 2025-10-05 16:28:06,063 - root - INFO - step: 29230 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 16:28:06,064 - root - INFO - lr: 1.2725e-05 gnorm: 1.10 [17:53:55< 6:35:41] +[titan] 2025-10-05 16:28:16,881 - root - INFO - step: 29235 loss: 1.9977 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7668 +[titan] 2025-10-05 16:28:16,882 - root - INFO - lr: 1.2718e-05 gnorm: 1.12 [17:54:06< 6:35:30] +[titan] 2025-10-05 16:28:27,741 - root - INFO - step: 29240 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 16:28:27,741 - root - INFO - lr: 1.2711e-05 gnorm: 1.12 [17:54:16< 6:35:19] +[titan] 2025-10-05 16:28:38,608 - root - INFO - step: 29245 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 16:28:38,608 - root - INFO - lr: 1.2705e-05 gnorm: 1.14 [17:54:27< 6:35:08] +[titan] 2025-10-05 16:28:47,296 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:28:49,481 - root - INFO - step: 29250 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7535 +[titan] 2025-10-05 16:28:49,482 - root - INFO - lr: 1.2698e-05 gnorm: 1.12 [17:54:38< 6:34:57] +[titan] 2025-10-05 16:29:00,345 - root - INFO - step: 29255 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 16:29:00,345 - root - INFO - lr: 1.2691e-05 gnorm: 1.13 [17:54:49< 6:34:46] +[titan] 2025-10-05 16:29:11,181 - root - INFO - step: 29260 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 16:29:11,181 - root - INFO - lr: 1.2684e-05 gnorm: 1.10 [17:55:00< 6:34:35] +[titan] 2025-10-05 16:29:22,010 - root - INFO - step: 29265 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 16:29:22,010 - root - INFO - lr: 1.2678e-05 gnorm: 1.10 [17:55:11< 6:34:24] +[titan] 2025-10-05 16:29:32,844 - root - INFO - step: 29270 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8340 +[titan] 2025-10-05 16:29:32,845 - root - INFO - lr: 1.2671e-05 gnorm: 1.14 [17:55:21< 6:34:12] +[titan] 2025-10-05 16:29:43,662 - root - INFO - step: 29275 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 16:29:43,662 - root - INFO - lr: 1.2664e-05 gnorm: 1.10 [17:55:32< 6:34:01] +[titan] 2025-10-05 16:29:54,552 - root - INFO - step: 29280 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 16:29:54,553 - root - INFO - lr: 1.2658e-05 gnorm: 1.15 [17:55:43< 6:33:50] +[titan] 2025-10-05 16:30:05,442 - root - INFO - step: 29285 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:30:05,442 - root - INFO - lr: 1.2651e-05 gnorm: 1.15 [17:55:54< 6:33:39] +[titan] 2025-10-05 16:30:16,285 - root - INFO - step: 29290 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:30:16,285 - root - INFO - lr: 1.2644e-05 gnorm: 1.13 [17:56:05< 6:33:28] +[titan] 2025-10-05 16:30:27,122 - root - INFO - step: 29295 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 16:30:27,122 - root - INFO - lr: 1.2638e-05 gnorm: 1.16 [17:56:16< 6:33:17] +[titan] 2025-10-05 16:30:35,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:30:37,974 - root - INFO - step: 29300 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:30:37,975 - root - INFO - lr: 1.2631e-05 gnorm: 1.13 [17:56:27< 6:33:06] +[titan] 2025-10-05 16:30:48,835 - root - INFO - step: 29305 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 16:30:48,835 - root - INFO - lr: 1.2624e-05 gnorm: 1.12 [17:56:37< 6:32:55] +[titan] 2025-10-05 16:30:59,735 - root - INFO - step: 29310 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 16:30:59,735 - root - INFO - lr: 1.2617e-05 gnorm: 1.16 [17:56:48< 6:32:44] +[titan] 2025-10-05 16:31:10,585 - root - INFO - step: 29315 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 16:31:10,585 - root - INFO - lr: 1.2611e-05 gnorm: 1.11 [17:56:59< 6:32:33] +[titan] 2025-10-05 16:31:21,451 - root - INFO - step: 29320 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:31:21,452 - root - INFO - lr: 1.2604e-05 gnorm: 1.14 [17:57:10< 6:32:22] +[titan] 2025-10-05 16:31:32,282 - root - INFO - step: 29325 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 16:31:32,283 - root - INFO - lr: 1.2597e-05 gnorm: 1.08 [17:57:21< 6:32:11] +[titan] 2025-10-05 16:31:43,142 - root - INFO - step: 29330 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7937 +[titan] 2025-10-05 16:31:43,143 - root - INFO - lr: 1.2591e-05 gnorm: 1.15 [17:57:32< 6:31:59] +[titan] 2025-10-05 16:31:54,012 - root - INFO - step: 29335 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 16:31:54,012 - root - INFO - lr: 1.2584e-05 gnorm: 1.12 [17:57:43< 6:31:48] +[titan] 2025-10-05 16:32:04,880 - root - INFO - step: 29340 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 16:32:04,880 - root - INFO - lr: 1.2577e-05 gnorm: 1.18 [17:57:54< 6:31:37] +[titan] 2025-10-05 16:32:15,774 - root - INFO - step: 29345 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 16:32:15,774 - root - INFO - lr: 1.2571e-05 gnorm: 1.14 [17:58:04< 6:31:26] +[titan] 2025-10-05 16:32:24,447 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:32:26,631 - root - INFO - step: 29350 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 16:32:26,632 - root - INFO - lr: 1.2564e-05 gnorm: 1.11 [17:58:15< 6:31:15] +[titan] 2025-10-05 16:32:37,480 - root - INFO - step: 29355 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:32:37,480 - root - INFO - lr: 1.2557e-05 gnorm: 1.10 [17:58:26< 6:31:04] +[titan] 2025-10-05 16:32:48,323 - root - INFO - step: 29360 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:32:48,323 - root - INFO - lr: 1.2551e-05 gnorm: 1.14 [17:58:37< 6:30:53] +[titan] 2025-10-05 16:32:59,199 - root - INFO - step: 29365 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 16:32:59,199 - root - INFO - lr: 1.2544e-05 gnorm: 1.13 [17:58:48< 6:30:42] +[titan] 2025-10-05 16:33:10,048 - root - INFO - step: 29370 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 16:33:10,048 - root - INFO - lr: 1.2537e-05 gnorm: 1.12 [17:58:59< 6:30:31] +[titan] 2025-10-05 16:33:20,934 - root - INFO - step: 29375 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 16:33:20,935 - root - INFO - lr: 1.2531e-05 gnorm: 1.15 [17:59:10< 6:30:20] +[titan] 2025-10-05 16:33:31,794 - root - INFO - step: 29380 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:33:31,794 - root - INFO - lr: 1.2524e-05 gnorm: 1.11 [17:59:20< 6:30:09] +[titan] 2025-10-05 16:33:42,652 - root - INFO - step: 29385 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7647 +[titan] 2025-10-05 16:33:42,652 - root - INFO - lr: 1.2517e-05 gnorm: 1.13 [17:59:31< 6:29:58] +[titan] 2025-10-05 16:33:53,484 - root - INFO - step: 29390 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:33:53,484 - root - INFO - lr: 1.2511e-05 gnorm: 1.15 [17:59:42< 6:29:46] +[titan] 2025-10-05 16:34:04,355 - root - INFO - step: 29395 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 16:34:04,355 - root - INFO - lr: 1.2504e-05 gnorm: 1.11 [17:59:53< 6:29:35] +[titan] 2025-10-05 16:34:13,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:34:15,217 - root - INFO - step: 29400 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 16:34:15,218 - root - INFO - lr: 1.2497e-05 gnorm: 1.12 [18:00:04< 6:29:24] +[titan] 2025-10-05 16:34:26,084 - root - INFO - step: 29405 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 16:34:26,084 - root - INFO - lr: 1.2491e-05 gnorm: 1.13 [18:00:15< 6:29:13] +[titan] 2025-10-05 16:34:36,985 - root - INFO - step: 29410 loss: 1.9746 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:34:36,985 - root - INFO - lr: 1.2484e-05 gnorm: 1.14 [18:00:26< 6:29:02] +[titan] 2025-10-05 16:34:47,862 - root - INFO - step: 29415 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 16:34:47,862 - root - INFO - lr: 1.2477e-05 gnorm: 1.14 [18:00:36< 6:28:51] +[titan] 2025-10-05 16:34:58,716 - root - INFO - step: 29420 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:34:58,716 - root - INFO - lr: 1.2471e-05 gnorm: 1.10 [18:00:47< 6:28:40] +[titan] 2025-10-05 16:35:09,613 - root - INFO - step: 29425 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 16:35:09,613 - root - INFO - lr: 1.2464e-05 gnorm: 1.13 [18:00:58< 6:28:29] +[titan] 2025-10-05 16:35:20,487 - root - INFO - step: 29430 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7675 +[titan] 2025-10-05 16:35:20,487 - root - INFO - lr: 1.2457e-05 gnorm: 1.12 [18:01:09< 6:28:18] +[titan] 2025-10-05 16:35:31,364 - root - INFO - step: 29435 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:35:31,364 - root - INFO - lr: 1.2451e-05 gnorm: 1.13 [18:01:20< 6:28:07] +[titan] 2025-10-05 16:35:42,266 - root - INFO - step: 29440 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 16:35:42,266 - root - INFO - lr: 1.2444e-05 gnorm: 1.13 [18:01:31< 6:27:56] +[titan] 2025-10-05 16:35:53,139 - root - INFO - step: 29445 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 16:35:53,139 - root - INFO - lr: 1.2438e-05 gnorm: 1.10 [18:01:42< 6:27:45] +[titan] 2025-10-05 16:36:01,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:36:04,036 - root - INFO - step: 29450 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:36:04,036 - root - INFO - lr: 1.2431e-05 gnorm: 1.10 [18:01:53< 6:27:34] +[titan] 2025-10-05 16:36:14,913 - root - INFO - step: 29455 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 16:36:14,913 - root - INFO - lr: 1.2424e-05 gnorm: 1.13 [18:02:04< 6:27:23] +[titan] 2025-10-05 16:36:25,795 - root - INFO - step: 29460 loss: 2.0213 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7869 +[titan] 2025-10-05 16:36:25,795 - root - INFO - lr: 1.2418e-05 gnorm: 1.13 [18:02:14< 6:27:11] +[titan] 2025-10-05 16:36:36,668 - root - INFO - step: 29465 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 16:36:36,668 - root - INFO - lr: 1.2411e-05 gnorm: 1.14 [18:02:25< 6:27:00] +[titan] 2025-10-05 16:36:47,594 - root - INFO - step: 29470 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 16:36:47,594 - root - INFO - lr: 1.2404e-05 gnorm: 1.17 [18:02:36< 6:26:49] +[titan] 2025-10-05 16:36:58,488 - root - INFO - step: 29475 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8272 +[titan] 2025-10-05 16:36:58,488 - root - INFO - lr: 1.2398e-05 gnorm: 1.14 [18:02:47< 6:26:38] +[titan] 2025-10-05 16:37:09,396 - root - INFO - step: 29480 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 16:37:09,396 - root - INFO - lr: 1.2391e-05 gnorm: 1.10 [18:02:58< 6:26:27] +[titan] 2025-10-05 16:37:20,276 - root - INFO - step: 29485 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7523 +[titan] 2025-10-05 16:37:20,276 - root - INFO - lr: 1.2385e-05 gnorm: 1.14 [18:03:09< 6:26:16] +[titan] 2025-10-05 16:37:31,149 - root - INFO - step: 29490 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:37:31,149 - root - INFO - lr: 1.2378e-05 gnorm: 1.18 [18:03:20< 6:26:05] +[titan] 2025-10-05 16:37:42,032 - root - INFO - step: 29495 loss: 1.9702 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:37:42,032 - root - INFO - lr: 1.2371e-05 gnorm: 1.12 [18:03:31< 6:25:54] +[titan] 2025-10-05 16:37:50,726 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:37:52,909 - root - INFO - step: 29500 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 16:37:52,910 - root - INFO - lr: 1.2365e-05 gnorm: 1.18 [18:03:42< 6:25:43] +[titan] 2025-10-05 16:38:03,862 - root - INFO - step: 29505 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7779 +[titan] 2025-10-05 16:38:03,862 - root - INFO - lr: 1.2358e-05 gnorm: 1.08 [18:03:52< 6:25:32] +[titan] 2025-10-05 16:38:14,737 - root - INFO - step: 29510 loss: 2.0280 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 16:38:14,738 - root - INFO - lr: 1.2352e-05 gnorm: 1.12 [18:04:03< 6:25:21] +[titan] 2025-10-05 16:38:25,629 - root - INFO - step: 29515 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 16:38:25,629 - root - INFO - lr: 1.2345e-05 gnorm: 1.10 [18:04:14< 6:25:10] +[titan] 2025-10-05 16:38:36,497 - root - INFO - step: 29520 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 16:38:36,497 - root - INFO - lr: 1.2338e-05 gnorm: 1.14 [18:04:25< 6:24:59] +[titan] 2025-10-05 16:38:47,375 - root - INFO - step: 29525 loss: 2.0360 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 16:38:47,375 - root - INFO - lr: 1.2332e-05 gnorm: 1.12 [18:04:36< 6:24:48] +[titan] 2025-10-05 16:38:58,269 - root - INFO - step: 29530 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 16:38:58,270 - root - INFO - lr: 1.2325e-05 gnorm: 1.14 [18:04:47< 6:24:37] +[titan] 2025-10-05 16:39:09,198 - root - INFO - step: 29535 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:39:09,198 - root - INFO - lr: 1.2319e-05 gnorm: 1.14 [18:04:58< 6:24:25] +[titan] 2025-10-05 16:39:20,067 - root - INFO - step: 29540 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7690 +[titan] 2025-10-05 16:39:20,068 - root - INFO - lr: 1.2312e-05 gnorm: 1.11 [18:05:09< 6:24:14] +[titan] 2025-10-05 16:39:30,927 - root - INFO - step: 29545 loss: 1.9548 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:39:30,927 - root - INFO - lr: 1.2305e-05 gnorm: 1.08 [18:05:20< 6:24:03] +[titan] 2025-10-05 16:39:39,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:39:41,783 - root - INFO - step: 29550 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 16:39:41,783 - root - INFO - lr: 1.2299e-05 gnorm: 1.16 [18:05:30< 6:23:52] +[titan] 2025-10-05 16:39:52,647 - root - INFO - step: 29555 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:39:52,647 - root - INFO - lr: 1.2292e-05 gnorm: 1.11 [18:05:41< 6:23:41] +[titan] 2025-10-05 16:40:03,511 - root - INFO - step: 29560 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 16:40:03,511 - root - INFO - lr: 1.2286e-05 gnorm: 1.11 [18:05:52< 6:23:30] +[titan] 2025-10-05 16:40:14,393 - root - INFO - step: 29565 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 16:40:14,393 - root - INFO - lr: 1.2279e-05 gnorm: 1.09 [18:06:03< 6:23:19] +[titan] 2025-10-05 16:40:25,289 - root - INFO - step: 29570 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 16:40:25,289 - root - INFO - lr: 1.2273e-05 gnorm: 1.15 [18:06:14< 6:23:08] +[titan] 2025-10-05 16:40:36,151 - root - INFO - step: 29575 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 16:40:36,151 - root - INFO - lr: 1.2266e-05 gnorm: 1.12 [18:06:25< 6:22:57] +[titan] 2025-10-05 16:40:47,014 - root - INFO - step: 29580 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:40:47,014 - root - INFO - lr: 1.2259e-05 gnorm: 1.15 [18:06:36< 6:22:46] +[titan] 2025-10-05 16:40:57,884 - root - INFO - step: 29585 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7835 +[titan] 2025-10-05 16:40:57,884 - root - INFO - lr: 1.2253e-05 gnorm: 1.13 [18:06:46< 6:22:35] +[titan] 2025-10-05 16:41:08,765 - root - INFO - step: 29590 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 16:41:08,765 - root - INFO - lr: 1.2246e-05 gnorm: 1.12 [18:06:57< 6:22:24] +[titan] 2025-10-05 16:41:19,628 - root - INFO - step: 29595 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:41:19,628 - root - INFO - lr: 1.2240e-05 gnorm: 1.14 [18:07:08< 6:22:13] +[titan] 2025-10-05 16:41:28,344 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:41:30,522 - root - INFO - step: 29600 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 16:41:30,522 - root - INFO - lr: 1.2233e-05 gnorm: 1.11 [18:07:19< 6:22:02] +[titan] 2025-10-05 16:41:41,388 - root - INFO - step: 29605 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 16:41:41,389 - root - INFO - lr: 1.2227e-05 gnorm: 1.11 [18:07:30< 6:21:50] +[titan] 2025-10-05 16:41:52,245 - root - INFO - step: 29610 loss: 1.9448 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 16:41:52,245 - root - INFO - lr: 1.2220e-05 gnorm: 1.09 [18:07:41< 6:21:39] +[titan] 2025-10-05 16:42:03,126 - root - INFO - step: 29615 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8316 +[titan] 2025-10-05 16:42:03,126 - root - INFO - lr: 1.2214e-05 gnorm: 1.15 [18:07:52< 6:21:28] +[titan] 2025-10-05 16:42:13,989 - root - INFO - step: 29620 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7570 +[titan] 2025-10-05 16:42:13,989 - root - INFO - lr: 1.2207e-05 gnorm: 1.13 [18:08:03< 6:21:17] +[titan] 2025-10-05 16:42:24,845 - root - INFO - step: 29625 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 16:42:24,845 - root - INFO - lr: 1.2200e-05 gnorm: 1.11 [18:08:13< 6:21:06] +[titan] 2025-10-05 16:42:35,740 - root - INFO - step: 29630 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 16:42:35,740 - root - INFO - lr: 1.2194e-05 gnorm: 1.16 [18:08:24< 6:20:55] +[titan] 2025-10-05 16:42:46,609 - root - INFO - step: 29635 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 16:42:46,609 - root - INFO - lr: 1.2187e-05 gnorm: 1.13 [18:08:35< 6:20:44] +[titan] 2025-10-05 16:42:57,451 - root - INFO - step: 29640 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:42:57,451 - root - INFO - lr: 1.2181e-05 gnorm: 1.11 [18:08:46< 6:20:33] +[titan] 2025-10-05 16:43:08,337 - root - INFO - step: 29645 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 16:43:08,337 - root - INFO - lr: 1.2174e-05 gnorm: 1.10 [18:08:57< 6:20:22] +[titan] 2025-10-05 16:43:17,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:43:19,192 - root - INFO - step: 29650 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7561 +[titan] 2025-10-05 16:43:19,192 - root - INFO - lr: 1.2168e-05 gnorm: 1.14 [18:09:08< 6:20:11] +[titan] 2025-10-05 16:43:30,040 - root - INFO - step: 29655 loss: 1.9877 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 16:43:30,040 - root - INFO - lr: 1.2161e-05 gnorm: 1.13 [18:09:19< 6:20:00] +[titan] 2025-10-05 16:43:40,896 - root - INFO - step: 29660 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:43:40,896 - root - INFO - lr: 1.2155e-05 gnorm: 1.16 [18:09:29< 6:19:49] +[titan] 2025-10-05 16:43:51,775 - root - INFO - step: 29665 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 16:43:51,776 - root - INFO - lr: 1.2148e-05 gnorm: 1.12 [18:09:40< 6:19:38] +[titan] 2025-10-05 16:44:02,650 - root - INFO - step: 29670 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7764 +[titan] 2025-10-05 16:44:02,651 - root - INFO - lr: 1.2142e-05 gnorm: 1.12 [18:09:51< 6:19:26] +[titan] 2025-10-05 16:44:13,541 - root - INFO - step: 29675 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 16:44:13,542 - root - INFO - lr: 1.2135e-05 gnorm: 1.12 [18:10:02< 6:19:15] +[titan] 2025-10-05 16:44:24,406 - root - INFO - step: 29680 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:44:24,406 - root - INFO - lr: 1.2129e-05 gnorm: 1.10 [18:10:13< 6:19:04] +[titan] 2025-10-05 16:44:35,270 - root - INFO - step: 29685 loss: 2.0294 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 16:44:35,270 - root - INFO - lr: 1.2122e-05 gnorm: 1.14 [18:10:24< 6:18:53] +[titan] 2025-10-05 16:44:46,146 - root - INFO - step: 29690 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:44:46,146 - root - INFO - lr: 1.2116e-05 gnorm: 1.14 [18:10:35< 6:18:42] +[titan] 2025-10-05 16:44:57,137 - root - INFO - step: 29695 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 29,813 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:44:57,138 - root - INFO - lr: 1.2109e-05 gnorm: 1.16 [18:10:46< 6:18:31] +[titan] 2025-10-05 16:44:59,502 - root - INFO - Dumping profiler traces at step 29696 +[titan] 2025-10-05 16:44:59,542 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:45:06,052 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:45:08,245 - root - INFO - step: 29700 loss: 2.0615 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.30 mfu: 41.38% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8226 +[titan] 2025-10-05 16:45:08,245 - root - INFO - lr: 1.2103e-05 gnorm: 1.15 [18:10:57< 6:18:20] +[titan] 2025-10-05 16:45:19,144 - root - INFO - step: 29705 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 16:45:19,144 - root - INFO - lr: 1.2096e-05 gnorm: 1.11 [18:11:08< 6:18:09] +[titan] 2025-10-05 16:45:30,018 - root - INFO - step: 29710 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 16:45:30,019 - root - INFO - lr: 1.2090e-05 gnorm: 1.15 [18:11:19< 6:17:58] +[titan] 2025-10-05 16:45:40,886 - root - INFO - step: 29715 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:45:40,886 - root - INFO - lr: 1.2083e-05 gnorm: 1.09 [18:11:29< 6:17:47] +[titan] 2025-10-05 16:45:51,774 - root - INFO - step: 29720 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8207 +[titan] 2025-10-05 16:45:51,775 - root - INFO - lr: 1.2077e-05 gnorm: 1.13 [18:11:40< 6:17:36] +[titan] 2025-10-05 16:46:02,667 - root - INFO - step: 29725 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 16:46:02,667 - root - INFO - lr: 1.2070e-05 gnorm: 1.11 [18:11:51< 6:17:25] +[titan] 2025-10-05 16:46:13,605 - root - INFO - step: 29730 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 16:46:13,605 - root - INFO - lr: 1.2064e-05 gnorm: 1.10 [18:12:02< 6:17:14] +[titan] 2025-10-05 16:46:24,505 - root - INFO - step: 29735 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 16:46:24,505 - root - INFO - lr: 1.2057e-05 gnorm: 1.14 [18:12:13< 6:17:03] +[titan] 2025-10-05 16:46:35,396 - root - INFO - step: 29740 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 16:46:35,397 - root - INFO - lr: 1.2051e-05 gnorm: 1.16 [18:12:24< 6:16:52] +[titan] 2025-10-05 16:46:46,263 - root - INFO - step: 29745 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:46:46,263 - root - INFO - lr: 1.2044e-05 gnorm: 1.14 [18:12:35< 6:16:41] +[titan] 2025-10-05 16:46:54,956 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:46:57,142 - root - INFO - step: 29750 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:46:57,142 - root - INFO - lr: 1.2038e-05 gnorm: 1.14 [18:12:46< 6:16:30] +[titan] 2025-10-05 16:47:08,011 - root - INFO - step: 29755 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 16:47:08,011 - root - INFO - lr: 1.2031e-05 gnorm: 1.14 [18:12:57< 6:16:18] +[titan] 2025-10-05 16:47:18,928 - root - INFO - step: 29760 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 16:47:18,929 - root - INFO - lr: 1.2025e-05 gnorm: 1.15 [18:13:08< 6:16:07] +[titan] 2025-10-05 16:47:29,805 - root - INFO - step: 29765 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 16:47:29,806 - root - INFO - lr: 1.2018e-05 gnorm: 1.11 [18:13:18< 6:15:56] +[titan] 2025-10-05 16:47:40,695 - root - INFO - step: 29770 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 16:47:40,695 - root - INFO - lr: 1.2012e-05 gnorm: 1.12 [18:13:29< 6:15:45] +[titan] 2025-10-05 16:47:51,568 - root - INFO - step: 29775 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 16:47:51,568 - root - INFO - lr: 1.2005e-05 gnorm: 1.13 [18:13:40< 6:15:34] +[titan] 2025-10-05 16:48:02,434 - root - INFO - step: 29780 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 16:48:02,434 - root - INFO - lr: 1.1999e-05 gnorm: 1.13 [18:13:51< 6:15:23] +[titan] 2025-10-05 16:48:13,326 - root - INFO - step: 29785 loss: 2.0923 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 16:48:13,326 - root - INFO - lr: 1.1992e-05 gnorm: 1.17 [18:14:02< 6:15:12] +[titan] 2025-10-05 16:48:24,246 - root - INFO - step: 29790 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 16:48:24,246 - root - INFO - lr: 1.1986e-05 gnorm: 1.21 [18:14:13< 6:15:01] +[titan] 2025-10-05 16:48:35,115 - root - INFO - step: 29795 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7438 +[titan] 2025-10-05 16:48:35,116 - root - INFO - lr: 1.1979e-05 gnorm: 1.16 [18:14:24< 6:14:50] +[titan] 2025-10-05 16:48:43,809 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:48:45,984 - root - INFO - step: 29800 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:48:45,984 - root - INFO - lr: 1.1973e-05 gnorm: 1.17 [18:14:35< 6:14:39] +[titan] 2025-10-05 16:48:56,850 - root - INFO - step: 29805 loss: 2.0467 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 16:48:56,850 - root - INFO - lr: 1.1966e-05 gnorm: 1.13 [18:14:45< 6:14:28] +[titan] 2025-10-05 16:49:07,720 - root - INFO - step: 29810 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 16:49:07,720 - root - INFO - lr: 1.1960e-05 gnorm: 1.14 [18:14:56< 6:14:17] +[titan] 2025-10-05 16:49:18,594 - root - INFO - step: 29815 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 16:49:18,594 - root - INFO - lr: 1.1954e-05 gnorm: 1.11 [18:15:07< 6:14:06] +[titan] 2025-10-05 16:49:29,475 - root - INFO - step: 29820 loss: 2.0086 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7751 +[titan] 2025-10-05 16:49:29,475 - root - INFO - lr: 1.1947e-05 gnorm: 1.16 [18:15:18< 6:13:55] +[titan] 2025-10-05 16:49:40,387 - root - INFO - step: 29825 loss: 1.9867 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7563 +[titan] 2025-10-05 16:49:40,388 - root - INFO - lr: 1.1941e-05 gnorm: 1.10 [18:15:29< 6:13:44] +[titan] 2025-10-05 16:49:51,279 - root - INFO - step: 29830 loss: 1.9675 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 16:49:51,279 - root - INFO - lr: 1.1934e-05 gnorm: 1.09 [18:15:40< 6:13:32] +[titan] 2025-10-05 16:50:02,138 - root - INFO - step: 29835 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7622 +[titan] 2025-10-05 16:50:02,138 - root - INFO - lr: 1.1928e-05 gnorm: 1.12 [18:15:51< 6:13:21] +[titan] 2025-10-05 16:50:13,006 - root - INFO - step: 29840 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 16:50:13,006 - root - INFO - lr: 1.1921e-05 gnorm: 1.13 [18:16:02< 6:13:10] +[titan] 2025-10-05 16:50:23,932 - root - INFO - step: 29845 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6997 +[titan] 2025-10-05 16:50:23,933 - root - INFO - lr: 1.1915e-05 gnorm: 1.10 [18:16:13< 6:12:59] +[titan] 2025-10-05 16:50:32,610 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:50:34,782 - root - INFO - step: 29850 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8188 +[titan] 2025-10-05 16:50:34,782 - root - INFO - lr: 1.1908e-05 gnorm: 1.18 [18:16:23< 6:12:48] +[titan] 2025-10-05 16:50:45,679 - root - INFO - step: 29855 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7933 +[titan] 2025-10-05 16:50:45,680 - root - INFO - lr: 1.1902e-05 gnorm: 1.17 [18:16:34< 6:12:37] +[titan] 2025-10-05 16:50:56,541 - root - INFO - step: 29860 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:50:56,541 - root - INFO - lr: 1.1896e-05 gnorm: 1.11 [18:16:45< 6:12:26] +[titan] 2025-10-05 16:51:07,402 - root - INFO - step: 29865 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 16:51:07,402 - root - INFO - lr: 1.1889e-05 gnorm: 1.18 [18:16:56< 6:12:15] +[titan] 2025-10-05 16:51:18,320 - root - INFO - step: 29870 loss: 1.9395 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 16:51:18,320 - root - INFO - lr: 1.1883e-05 gnorm: 1.13 [18:17:07< 6:12:04] +[titan] 2025-10-05 16:51:29,178 - root - INFO - step: 29875 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 16:51:29,178 - root - INFO - lr: 1.1876e-05 gnorm: 1.13 [18:17:18< 6:11:53] +[titan] 2025-10-05 16:51:40,033 - root - INFO - step: 29880 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 16:51:40,033 - root - INFO - lr: 1.1870e-05 gnorm: 1.12 [18:17:29< 6:11:42] +[titan] 2025-10-05 16:51:50,881 - root - INFO - step: 29885 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 16:51:50,881 - root - INFO - lr: 1.1863e-05 gnorm: 1.10 [18:17:39< 6:11:31] +[titan] 2025-10-05 16:52:01,762 - root - INFO - step: 29890 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 16:52:01,763 - root - INFO - lr: 1.1857e-05 gnorm: 1.15 [18:17:50< 6:11:20] +[titan] 2025-10-05 16:52:12,608 - root - INFO - step: 29895 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 16:52:12,608 - root - INFO - lr: 1.1851e-05 gnorm: 1.13 [18:18:01< 6:11:09] +[titan] 2025-10-05 16:52:21,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:52:23,480 - root - INFO - step: 29900 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 16:52:23,481 - root - INFO - lr: 1.1844e-05 gnorm: 1.13 [18:18:12< 6:10:58] +[titan] 2025-10-05 16:52:34,301 - root - INFO - step: 29905 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 16:52:34,301 - root - INFO - lr: 1.1838e-05 gnorm: 1.15 [18:18:23< 6:10:46] +[titan] 2025-10-05 16:52:45,148 - root - INFO - step: 29910 loss: 1.9512 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 16:52:45,148 - root - INFO - lr: 1.1831e-05 gnorm: 1.11 [18:18:34< 6:10:35] +[titan] 2025-10-05 16:52:55,998 - root - INFO - step: 29915 loss: 2.0610 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8211 +[titan] 2025-10-05 16:52:55,999 - root - INFO - lr: 1.1825e-05 gnorm: 1.13 [18:18:45< 6:10:24] +[titan] 2025-10-05 16:53:06,867 - root - INFO - step: 29920 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 16:53:06,867 - root - INFO - lr: 1.1819e-05 gnorm: 1.12 [18:18:55< 6:10:13] +[titan] 2025-10-05 16:53:17,736 - root - INFO - step: 29925 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 16:53:17,736 - root - INFO - lr: 1.1812e-05 gnorm: 1.12 [18:19:06< 6:10:02] +[titan] 2025-10-05 16:53:28,570 - root - INFO - step: 29930 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7831 +[titan] 2025-10-05 16:53:28,570 - root - INFO - lr: 1.1806e-05 gnorm: 1.12 [18:19:17< 6:09:51] +[titan] 2025-10-05 16:53:39,418 - root - INFO - step: 29935 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 16:53:39,418 - root - INFO - lr: 1.1799e-05 gnorm: 1.25 [18:19:28< 6:09:40] +[titan] 2025-10-05 16:53:50,272 - root - INFO - step: 29940 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 16:53:50,272 - root - INFO - lr: 1.1793e-05 gnorm: 1.12 [18:19:39< 6:09:29] +[titan] 2025-10-05 16:54:01,117 - root - INFO - step: 29945 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:54:01,117 - root - INFO - lr: 1.1787e-05 gnorm: 1.14 [18:19:50< 6:09:18] +[titan] 2025-10-05 16:54:09,772 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:54:12,029 - root - INFO - step: 29950 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 16:54:12,029 - root - INFO - lr: 1.1780e-05 gnorm: 1.18 [18:20:01< 6:09:07] +[titan] 2025-10-05 16:54:22,840 - root - INFO - step: 29955 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 16:54:22,840 - root - INFO - lr: 1.1774e-05 gnorm: 1.14 [18:20:11< 6:08:56] +[titan] 2025-10-05 16:54:33,694 - root - INFO - step: 29960 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 16:54:33,694 - root - INFO - lr: 1.1767e-05 gnorm: 1.14 [18:20:22< 6:08:45] +[titan] 2025-10-05 16:54:44,540 - root - INFO - step: 29965 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 16:54:44,540 - root - INFO - lr: 1.1761e-05 gnorm: 1.14 [18:20:33< 6:08:34] +[titan] 2025-10-05 16:54:55,380 - root - INFO - step: 29970 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 16:54:55,380 - root - INFO - lr: 1.1755e-05 gnorm: 1.13 [18:20:44< 6:08:22] +[titan] 2025-10-05 16:55:06,200 - root - INFO - step: 29975 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7975 +[titan] 2025-10-05 16:55:06,200 - root - INFO - lr: 1.1748e-05 gnorm: 1.16 [18:20:55< 6:08:11] +[titan] 2025-10-05 16:55:17,035 - root - INFO - step: 29980 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 16:55:17,035 - root - INFO - lr: 1.1742e-05 gnorm: 1.16 [18:21:06< 6:08:00] +[titan] 2025-10-05 16:55:27,861 - root - INFO - step: 29985 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 16:55:27,861 - root - INFO - lr: 1.1736e-05 gnorm: 1.11 [18:21:16< 6:07:49] +[titan] 2025-10-05 16:55:38,685 - root - INFO - step: 29990 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 16:55:38,685 - root - INFO - lr: 1.1729e-05 gnorm: 1.08 [18:21:27< 6:07:38] +[titan] 2025-10-05 16:55:49,531 - root - INFO - step: 29995 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 16:55:49,531 - root - INFO - lr: 1.1723e-05 gnorm: 1.11 [18:21:38< 6:07:27] +[titan] 2025-10-05 16:55:58,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:56:00,346 - root - INFO - step: 30000 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 16:56:00,346 - root - INFO - lr: 1.1716e-05 gnorm: 1.14 [18:21:49< 6:07:16] +[titan] 2025-10-05 16:56:00,346 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 16:56:17,590 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 16:56:17,590 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.24 seconds. +[titan] 2025-10-05 16:58:26,179 - root - INFO - step: 30005 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 2,247 tflops: 31.17 mfu: 3.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 16:58:26,179 - root - INFO - lr: 1.1710e-05 gnorm: 1.15 [18:24:15< 6:07:50] +[titan] 2025-10-05 16:58:36,943 - root - INFO - step: 30010 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,443 tflops: 422.35 mfu: 42.71% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 16:58:36,943 - root - INFO - lr: 1.1704e-05 gnorm: 1.13 [18:24:26< 6:07:39] +[titan] 2025-10-05 16:58:47,757 - root - INFO - step: 30015 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7859 +[titan] 2025-10-05 16:58:47,757 - root - INFO - lr: 1.1697e-05 gnorm: 1.19 [18:24:36< 6:07:28] +[titan] 2025-10-05 16:58:58,551 - root - INFO - step: 30020 loss: 2.0398 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.19 mfu: 42.59% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 16:58:58,551 - root - INFO - lr: 1.1691e-05 gnorm: 1.16 [18:24:47< 6:07:16] +[titan] 2025-10-05 16:59:09,338 - root - INFO - step: 30025 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,378 tflops: 421.45 mfu: 42.61% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7804 +[titan] 2025-10-05 16:59:09,338 - root - INFO - lr: 1.1685e-05 gnorm: 1.17 [18:24:58< 6:07:05] +[titan] 2025-10-05 16:59:20,123 - root - INFO - step: 30030 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,384 tflops: 421.53 mfu: 42.62% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 16:59:20,123 - root - INFO - lr: 1.1678e-05 gnorm: 1.14 [18:25:09< 6:06:54] +[titan] 2025-10-05 16:59:30,956 - root - INFO - step: 30035 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 16:59:30,956 - root - INFO - lr: 1.1672e-05 gnorm: 1.17 [18:25:20< 6:06:43] +[titan] 2025-10-05 16:59:41,784 - root - INFO - step: 30040 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7750 +[titan] 2025-10-05 16:59:41,784 - root - INFO - lr: 1.1666e-05 gnorm: 1.10 [18:25:30< 6:06:32] +[titan] 2025-10-05 16:59:52,578 - root - INFO - step: 30045 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.18 mfu: 42.59% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 16:59:52,578 - root - INFO - lr: 1.1659e-05 gnorm: 1.20 [18:25:41< 6:06:21] +[titan] 2025-10-05 17:00:01,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:00:03,430 - root - INFO - step: 30050 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:00:03,431 - root - INFO - lr: 1.1653e-05 gnorm: 1.13 [18:25:52< 6:06:10] +[titan] 2025-10-05 17:00:14,272 - root - INFO - step: 30055 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:00:14,273 - root - INFO - lr: 1.1647e-05 gnorm: 1.14 [18:26:03< 6:05:59] +[titan] 2025-10-05 17:00:25,096 - root - INFO - step: 30060 loss: 2.0424 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 17:00:25,096 - root - INFO - lr: 1.1640e-05 gnorm: 1.13 [18:26:14< 6:05:48] +[titan] 2025-10-05 17:00:35,911 - root - INFO - step: 30065 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.37 mfu: 42.50% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 17:00:35,911 - root - INFO - lr: 1.1634e-05 gnorm: 1.13 [18:26:24< 6:05:36] +[titan] 2025-10-05 17:00:46,749 - root - INFO - step: 30070 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:00:46,749 - root - INFO - lr: 1.1628e-05 gnorm: 1.12 [18:26:35< 6:05:25] +[titan] 2025-10-05 17:00:57,558 - root - INFO - step: 30075 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 17:00:57,558 - root - INFO - lr: 1.1621e-05 gnorm: 1.11 [18:26:46< 6:05:14] +[titan] 2025-10-05 17:01:08,392 - root - INFO - step: 30080 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7726 +[titan] 2025-10-05 17:01:08,392 - root - INFO - lr: 1.1615e-05 gnorm: 1.15 [18:26:57< 6:05:03] +[titan] 2025-10-05 17:01:19,229 - root - INFO - step: 30085 loss: 2.0397 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 17:01:19,229 - root - INFO - lr: 1.1609e-05 gnorm: 1.15 [18:27:08< 6:04:52] +[titan] 2025-10-05 17:01:30,104 - root - INFO - step: 30090 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:01:30,104 - root - INFO - lr: 1.1602e-05 gnorm: 1.11 [18:27:19< 6:04:41] +[titan] 2025-10-05 17:01:40,932 - root - INFO - step: 30095 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 17:01:40,932 - root - INFO - lr: 1.1596e-05 gnorm: 1.14 [18:27:29< 6:04:30] +[titan] 2025-10-05 17:01:49,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:01:51,740 - root - INFO - step: 30100 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:01:51,740 - root - INFO - lr: 1.1590e-05 gnorm: 1.12 [18:27:40< 6:04:19] +[titan] 2025-10-05 17:02:02,591 - root - INFO - step: 30105 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7463 +[titan] 2025-10-05 17:02:02,591 - root - INFO - lr: 1.1583e-05 gnorm: 1.13 [18:27:51< 6:04:08] +[titan] 2025-10-05 17:02:13,423 - root - INFO - step: 30110 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 17:02:13,423 - root - INFO - lr: 1.1577e-05 gnorm: 1.16 [18:28:02< 6:03:56] +[titan] 2025-10-05 17:02:24,227 - root - INFO - step: 30115 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 17:02:24,228 - root - INFO - lr: 1.1571e-05 gnorm: 1.12 [18:28:13< 6:03:45] +[titan] 2025-10-05 17:02:35,077 - root - INFO - step: 30120 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 17:02:35,077 - root - INFO - lr: 1.1565e-05 gnorm: 1.14 [18:28:24< 6:03:34] +[titan] 2025-10-05 17:02:45,895 - root - INFO - step: 30125 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 17:02:45,895 - root - INFO - lr: 1.1558e-05 gnorm: 1.13 [18:28:34< 6:03:23] +[titan] 2025-10-05 17:02:56,710 - root - INFO - step: 30130 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 17:02:56,710 - root - INFO - lr: 1.1552e-05 gnorm: 1.13 [18:28:45< 6:03:12] +[titan] 2025-10-05 17:03:07,565 - root - INFO - step: 30135 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8142 +[titan] 2025-10-05 17:03:07,565 - root - INFO - lr: 1.1546e-05 gnorm: 1.11 [18:28:56< 6:03:01] +[titan] 2025-10-05 17:03:18,382 - root - INFO - step: 30140 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 17:03:18,382 - root - INFO - lr: 1.1539e-05 gnorm: 1.21 [18:29:07< 6:02:50] +[titan] 2025-10-05 17:03:29,277 - root - INFO - step: 30145 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 17:03:29,277 - root - INFO - lr: 1.1533e-05 gnorm: 1.14 [18:29:18< 6:02:39] +[titan] 2025-10-05 17:03:37,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:03:40,104 - root - INFO - step: 30150 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 17:03:40,104 - root - INFO - lr: 1.1527e-05 gnorm: 1.13 [18:29:29< 6:02:28] +[titan] 2025-10-05 17:03:50,940 - root - INFO - step: 30155 loss: 2.0613 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:03:50,940 - root - INFO - lr: 1.1521e-05 gnorm: 1.15 [18:29:39< 6:02:17] +[titan] 2025-10-05 17:04:01,762 - root - INFO - step: 30160 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 17:04:01,762 - root - INFO - lr: 1.1514e-05 gnorm: 1.14 [18:29:50< 6:02:05] +[titan] 2025-10-05 17:04:12,567 - root - INFO - step: 30165 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 17:04:12,567 - root - INFO - lr: 1.1508e-05 gnorm: 1.12 [18:30:01< 6:01:54] +[titan] 2025-10-05 17:04:23,420 - root - INFO - step: 30170 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:04:23,420 - root - INFO - lr: 1.1502e-05 gnorm: 1.12 [18:30:12< 6:01:43] +[titan] 2025-10-05 17:04:34,282 - root - INFO - step: 30175 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 17:04:34,282 - root - INFO - lr: 1.1495e-05 gnorm: 1.12 [18:30:23< 6:01:32] +[titan] 2025-10-05 17:04:45,111 - root - INFO - step: 30180 loss: 1.9784 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:04:45,111 - root - INFO - lr: 1.1489e-05 gnorm: 1.16 [18:30:34< 6:01:21] +[titan] 2025-10-05 17:04:55,961 - root - INFO - step: 30185 loss: 2.0025 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:04:55,962 - root - INFO - lr: 1.1483e-05 gnorm: 1.13 [18:30:45< 6:01:10] +[titan] 2025-10-05 17:05:06,781 - root - INFO - step: 30190 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 17:05:06,781 - root - INFO - lr: 1.1477e-05 gnorm: 1.16 [18:30:55< 6:00:59] +[titan] 2025-10-05 17:05:17,581 - root - INFO - step: 30195 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:05:17,581 - root - INFO - lr: 1.1470e-05 gnorm: 1.16 [18:31:06< 6:00:48] +[titan] 2025-10-05 17:05:26,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:05:28,480 - root - INFO - step: 30200 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 17:05:28,480 - root - INFO - lr: 1.1464e-05 gnorm: 1.13 [18:31:17< 6:00:37] +[titan] 2025-10-05 17:05:39,462 - root - INFO - step: 30205 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 29,840 tflops: 413.98 mfu: 41.86% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 17:05:39,462 - root - INFO - lr: 1.1458e-05 gnorm: 1.11 [18:31:28< 6:00:26] +[titan] 2025-10-05 17:05:46,166 - root - INFO - Dumping profiler traces at step 30208 +[titan] 2025-10-05 17:05:46,205 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:05:50,671 - root - INFO - step: 30210 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 29,233 tflops: 405.56 mfu: 41.01% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7633 +[titan] 2025-10-05 17:05:50,672 - root - INFO - lr: 1.1452e-05 gnorm: 1.14 [18:31:39< 6:00:15] +[titan] 2025-10-05 17:06:01,511 - root - INFO - step: 30215 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 17:06:01,511 - root - INFO - lr: 1.1445e-05 gnorm: 1.17 [18:31:50< 6:00:03] +[titan] 2025-10-05 17:06:12,360 - root - INFO - step: 30220 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7378 +[titan] 2025-10-05 17:06:12,360 - root - INFO - lr: 1.1439e-05 gnorm: 1.11 [18:32:01< 5:59:52] +[titan] 2025-10-05 17:06:23,184 - root - INFO - step: 30225 loss: 2.0049 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 17:06:23,184 - root - INFO - lr: 1.1433e-05 gnorm: 1.13 [18:32:12< 5:59:41] +[titan] 2025-10-05 17:06:34,073 - root - INFO - step: 30230 loss: 1.9745 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 17:06:34,073 - root - INFO - lr: 1.1427e-05 gnorm: 1.15 [18:32:23< 5:59:30] +[titan] 2025-10-05 17:06:44,900 - root - INFO - step: 30235 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7525 +[titan] 2025-10-05 17:06:44,900 - root - INFO - lr: 1.1420e-05 gnorm: 1.11 [18:32:33< 5:59:19] +[titan] 2025-10-05 17:06:55,740 - root - INFO - step: 30240 loss: 1.9188 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 17:06:55,740 - root - INFO - lr: 1.1414e-05 gnorm: 1.16 [18:32:44< 5:59:08] +[titan] 2025-10-05 17:07:06,541 - root - INFO - step: 30245 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:07:06,541 - root - INFO - lr: 1.1408e-05 gnorm: 1.13 [18:32:55< 5:58:57] +[titan] 2025-10-05 17:07:15,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:07:17,391 - root - INFO - step: 30250 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 17:07:17,392 - root - INFO - lr: 1.1402e-05 gnorm: 1.17 [18:33:06< 5:58:46] +[titan] 2025-10-05 17:07:28,241 - root - INFO - step: 30255 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 17:07:28,241 - root - INFO - lr: 1.1395e-05 gnorm: 1.18 [18:33:17< 5:58:35] +[titan] 2025-10-05 17:07:39,102 - root - INFO - step: 30260 loss: 2.0013 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 17:07:39,103 - root - INFO - lr: 1.1389e-05 gnorm: 1.12 [18:33:28< 5:58:24] +[titan] 2025-10-05 17:07:49,999 - root - INFO - step: 30265 loss: 1.9338 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 17:07:49,999 - root - INFO - lr: 1.1383e-05 gnorm: 1.16 [18:33:39< 5:58:12] +[titan] 2025-10-05 17:08:00,848 - root - INFO - step: 30270 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 17:08:00,848 - root - INFO - lr: 1.1377e-05 gnorm: 1.17 [18:33:49< 5:58:01] +[titan] 2025-10-05 17:08:11,692 - root - INFO - step: 30275 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 17:08:11,692 - root - INFO - lr: 1.1370e-05 gnorm: 1.17 [18:34:00< 5:57:50] +[titan] 2025-10-05 17:08:22,552 - root - INFO - step: 30280 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:08:22,553 - root - INFO - lr: 1.1364e-05 gnorm: 1.18 [18:34:11< 5:57:39] +[titan] 2025-10-05 17:08:33,450 - root - INFO - step: 30285 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 17:08:33,450 - root - INFO - lr: 1.1358e-05 gnorm: 1.11 [18:34:22< 5:57:28] +[titan] 2025-10-05 17:08:44,280 - root - INFO - step: 30290 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 17:08:44,280 - root - INFO - lr: 1.1352e-05 gnorm: 1.13 [18:34:33< 5:57:17] +[titan] 2025-10-05 17:08:55,139 - root - INFO - step: 30295 loss: 2.0245 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 17:08:55,139 - root - INFO - lr: 1.1346e-05 gnorm: 1.13 [18:34:44< 5:57:06] +[titan] 2025-10-05 17:09:03,788 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:09:05,964 - root - INFO - step: 30300 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 17:09:05,964 - root - INFO - lr: 1.1339e-05 gnorm: 1.17 [18:34:54< 5:56:55] +[titan] 2025-10-05 17:09:16,818 - root - INFO - step: 30305 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 17:09:16,818 - root - INFO - lr: 1.1333e-05 gnorm: 1.16 [18:35:05< 5:56:44] +[titan] 2025-10-05 17:09:27,662 - root - INFO - step: 30310 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 17:09:27,662 - root - INFO - lr: 1.1327e-05 gnorm: 1.15 [18:35:16< 5:56:33] +[titan] 2025-10-05 17:09:38,520 - root - INFO - step: 30315 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 17:09:38,520 - root - INFO - lr: 1.1321e-05 gnorm: 1.14 [18:35:27< 5:56:21] +[titan] 2025-10-05 17:09:49,395 - root - INFO - step: 30320 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 17:09:49,395 - root - INFO - lr: 1.1315e-05 gnorm: 1.14 [18:35:38< 5:56:10] +[titan] 2025-10-05 17:10:00,277 - root - INFO - step: 30325 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 17:10:00,277 - root - INFO - lr: 1.1308e-05 gnorm: 1.15 [18:35:49< 5:55:59] +[titan] 2025-10-05 17:10:11,173 - root - INFO - step: 30330 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:10:11,174 - root - INFO - lr: 1.1302e-05 gnorm: 1.15 [18:36:00< 5:55:48] +[titan] 2025-10-05 17:10:22,000 - root - INFO - step: 30335 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 17:10:22,000 - root - INFO - lr: 1.1296e-05 gnorm: 1.18 [18:36:11< 5:55:37] +[titan] 2025-10-05 17:10:32,877 - root - INFO - step: 30340 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 17:10:32,877 - root - INFO - lr: 1.1290e-05 gnorm: 1.13 [18:36:21< 5:55:26] +[titan] 2025-10-05 17:10:43,769 - root - INFO - step: 30345 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:10:43,769 - root - INFO - lr: 1.1284e-05 gnorm: 1.15 [18:36:32< 5:55:15] +[titan] 2025-10-05 17:10:52,408 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:10:54,603 - root - INFO - step: 30350 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 17:10:54,603 - root - INFO - lr: 1.1277e-05 gnorm: 1.15 [18:36:43< 5:55:04] +[titan] 2025-10-05 17:11:05,438 - root - INFO - step: 30355 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:11:05,438 - root - INFO - lr: 1.1271e-05 gnorm: 1.14 [18:36:54< 5:54:53] +[titan] 2025-10-05 17:11:16,300 - root - INFO - step: 30360 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 17:11:16,300 - root - INFO - lr: 1.1265e-05 gnorm: 1.11 [18:37:05< 5:54:42] +[titan] 2025-10-05 17:11:27,159 - root - INFO - step: 30365 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:11:27,159 - root - INFO - lr: 1.1259e-05 gnorm: 1.11 [18:37:16< 5:54:30] +[titan] 2025-10-05 17:11:38,071 - root - INFO - step: 30370 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 17:11:38,071 - root - INFO - lr: 1.1253e-05 gnorm: 1.18 [18:37:27< 5:54:19] +[titan] 2025-10-05 17:11:48,937 - root - INFO - step: 30375 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:11:48,937 - root - INFO - lr: 1.1247e-05 gnorm: 1.15 [18:37:37< 5:54:08] +[titan] 2025-10-05 17:11:59,780 - root - INFO - step: 30380 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 17:11:59,780 - root - INFO - lr: 1.1240e-05 gnorm: 1.13 [18:37:48< 5:53:57] +[titan] 2025-10-05 17:12:10,619 - root - INFO - step: 30385 loss: 1.9947 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 17:12:10,620 - root - INFO - lr: 1.1234e-05 gnorm: 1.15 [18:37:59< 5:53:46] +[titan] 2025-10-05 17:12:21,479 - root - INFO - step: 30390 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 17:12:21,479 - root - INFO - lr: 1.1228e-05 gnorm: 1.11 [18:38:10< 5:53:35] +[titan] 2025-10-05 17:12:32,330 - root - INFO - step: 30395 loss: 1.9584 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:12:32,330 - root - INFO - lr: 1.1222e-05 gnorm: 1.12 [18:38:21< 5:53:24] +[titan] 2025-10-05 17:12:41,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:12:43,230 - root - INFO - step: 30400 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 17:12:43,230 - root - INFO - lr: 1.1216e-05 gnorm: 1.16 [18:38:32< 5:53:13] +[titan] 2025-10-05 17:12:54,073 - root - INFO - step: 30405 loss: 1.9890 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 17:12:54,073 - root - INFO - lr: 1.1210e-05 gnorm: 1.19 [18:38:43< 5:53:02] +[titan] 2025-10-05 17:13:04,941 - root - INFO - step: 30410 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 17:13:04,942 - root - INFO - lr: 1.1203e-05 gnorm: 1.16 [18:38:53< 5:52:51] +[titan] 2025-10-05 17:13:15,791 - root - INFO - step: 30415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 17:13:15,791 - root - INFO - lr: 1.1197e-05 gnorm: 1.18 [18:39:04< 5:52:40] +[titan] 2025-10-05 17:13:26,642 - root - INFO - step: 30420 loss: 2.0087 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 17:13:26,642 - root - INFO - lr: 1.1191e-05 gnorm: 1.13 [18:39:15< 5:52:28] +[titan] 2025-10-05 17:13:37,590 - root - INFO - step: 30425 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.26 mfu: 41.99% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 17:13:37,590 - root - INFO - lr: 1.1185e-05 gnorm: 1.13 [18:39:26< 5:52:17] +[titan] 2025-10-05 17:13:48,481 - root - INFO - step: 30430 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7083 +[titan] 2025-10-05 17:13:48,481 - root - INFO - lr: 1.1179e-05 gnorm: 1.22 [18:39:37< 5:52:06] +[titan] 2025-10-05 17:13:59,341 - root - INFO - step: 30435 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 17:13:59,341 - root - INFO - lr: 1.1173e-05 gnorm: 1.10 [18:39:48< 5:51:55] +[titan] 2025-10-05 17:14:10,199 - root - INFO - step: 30440 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 17:14:10,199 - root - INFO - lr: 1.1166e-05 gnorm: 1.15 [18:39:59< 5:51:44] +[titan] 2025-10-05 17:14:21,050 - root - INFO - step: 30445 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:14:21,050 - root - INFO - lr: 1.1160e-05 gnorm: 1.17 [18:40:10< 5:51:33] +[titan] 2025-10-05 17:14:29,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:14:31,915 - root - INFO - step: 30450 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:14:31,915 - root - INFO - lr: 1.1154e-05 gnorm: 1.13 [18:40:20< 5:51:22] +[titan] 2025-10-05 17:14:42,853 - root - INFO - step: 30455 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 17:14:42,853 - root - INFO - lr: 1.1148e-05 gnorm: 1.15 [18:40:31< 5:51:11] +[titan] 2025-10-05 17:14:53,689 - root - INFO - step: 30460 loss: 1.9279 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:14:53,689 - root - INFO - lr: 1.1142e-05 gnorm: 1.16 [18:40:42< 5:51:00] +[titan] 2025-10-05 17:15:04,539 - root - INFO - step: 30465 loss: 1.9730 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7446 +[titan] 2025-10-05 17:15:04,539 - root - INFO - lr: 1.1136e-05 gnorm: 1.13 [18:40:53< 5:50:49] +[titan] 2025-10-05 17:15:15,418 - root - INFO - step: 30470 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 17:15:15,418 - root - INFO - lr: 1.1130e-05 gnorm: 1.20 [18:41:04< 5:50:38] +[titan] 2025-10-05 17:15:26,296 - root - INFO - step: 30475 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 17:15:26,296 - root - INFO - lr: 1.1124e-05 gnorm: 1.13 [18:41:15< 5:50:26] +[titan] 2025-10-05 17:15:37,128 - root - INFO - step: 30480 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 17:15:37,129 - root - INFO - lr: 1.1117e-05 gnorm: 1.16 [18:41:26< 5:50:15] +[titan] 2025-10-05 17:15:48,020 - root - INFO - step: 30485 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 17:15:48,020 - root - INFO - lr: 1.1111e-05 gnorm: 1.16 [18:41:37< 5:50:04] +[titan] 2025-10-05 17:15:58,881 - root - INFO - step: 30490 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 17:15:58,881 - root - INFO - lr: 1.1105e-05 gnorm: 1.13 [18:41:47< 5:49:53] +[titan] 2025-10-05 17:16:09,737 - root - INFO - step: 30495 loss: 2.0163 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7828 +[titan] 2025-10-05 17:16:09,738 - root - INFO - lr: 1.1099e-05 gnorm: 1.13 [18:41:58< 5:49:42] +[titan] 2025-10-05 17:16:18,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:16:20,594 - root - INFO - step: 30500 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:16:20,594 - root - INFO - lr: 1.1093e-05 gnorm: 1.15 [18:42:09< 5:49:31] +[titan] 2025-10-05 17:16:31,472 - root - INFO - step: 30505 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7582 +[titan] 2025-10-05 17:16:31,472 - root - INFO - lr: 1.1087e-05 gnorm: 1.19 [18:42:20< 5:49:20] +[titan] 2025-10-05 17:16:42,399 - root - INFO - step: 30510 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:16:42,399 - root - INFO - lr: 1.1081e-05 gnorm: 1.14 [18:42:31< 5:49:09] +[titan] 2025-10-05 17:16:53,259 - root - INFO - step: 30515 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 17:16:53,259 - root - INFO - lr: 1.1075e-05 gnorm: 1.15 [18:42:42< 5:48:58] +[titan] 2025-10-05 17:17:04,140 - root - INFO - step: 30520 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 17:17:04,140 - root - INFO - lr: 1.1069e-05 gnorm: 1.13 [18:42:53< 5:48:47] +[titan] 2025-10-05 17:17:14,989 - root - INFO - step: 30525 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 17:17:14,989 - root - INFO - lr: 1.1063e-05 gnorm: 1.36 [18:43:03< 5:48:36] +[titan] 2025-10-05 17:17:25,901 - root - INFO - step: 30530 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:17:25,902 - root - INFO - lr: 1.1056e-05 gnorm: 1.14 [18:43:14< 5:48:25] +[titan] 2025-10-05 17:17:36,768 - root - INFO - step: 30535 loss: 2.0575 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8191 +[titan] 2025-10-05 17:17:36,768 - root - INFO - lr: 1.1050e-05 gnorm: 1.17 [18:43:25< 5:48:13] +[titan] 2025-10-05 17:17:47,700 - root - INFO - step: 30540 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 17:17:47,700 - root - INFO - lr: 1.1044e-05 gnorm: 1.12 [18:43:36< 5:48:02] +[titan] 2025-10-05 17:17:58,569 - root - INFO - step: 30545 loss: 1.9982 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 17:17:58,569 - root - INFO - lr: 1.1038e-05 gnorm: 1.13 [18:43:47< 5:47:51] +[titan] 2025-10-05 17:18:07,246 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:18:09,461 - root - INFO - step: 30550 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7963 +[titan] 2025-10-05 17:18:09,461 - root - INFO - lr: 1.1032e-05 gnorm: 1.15 [18:43:58< 5:47:40] +[titan] 2025-10-05 17:18:20,334 - root - INFO - step: 30555 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 17:18:20,335 - root - INFO - lr: 1.1026e-05 gnorm: 1.13 [18:44:09< 5:47:29] +[titan] 2025-10-05 17:18:31,222 - root - INFO - step: 30560 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:18:31,222 - root - INFO - lr: 1.1020e-05 gnorm: 1.16 [18:44:20< 5:47:18] +[titan] 2025-10-05 17:18:42,115 - root - INFO - step: 30565 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 17:18:42,116 - root - INFO - lr: 1.1014e-05 gnorm: 1.18 [18:44:31< 5:47:07] +[titan] 2025-10-05 17:18:52,976 - root - INFO - step: 30570 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7939 +[titan] 2025-10-05 17:18:52,977 - root - INFO - lr: 1.1008e-05 gnorm: 1.15 [18:44:41< 5:46:56] +[titan] 2025-10-05 17:19:03,822 - root - INFO - step: 30575 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 17:19:03,822 - root - INFO - lr: 1.1002e-05 gnorm: 1.13 [18:44:52< 5:46:45] +[titan] 2025-10-05 17:19:14,680 - root - INFO - step: 30580 loss: 1.9714 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 17:19:14,680 - root - INFO - lr: 1.0996e-05 gnorm: 1.15 [18:45:03< 5:46:34] +[titan] 2025-10-05 17:19:25,560 - root - INFO - step: 30585 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 17:19:25,560 - root - INFO - lr: 1.0990e-05 gnorm: 1.12 [18:45:14< 5:46:23] +[titan] 2025-10-05 17:19:36,432 - root - INFO - step: 30590 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7819 +[titan] 2025-10-05 17:19:36,432 - root - INFO - lr: 1.0984e-05 gnorm: 1.18 [18:45:25< 5:46:11] +[titan] 2025-10-05 17:19:47,343 - root - INFO - step: 30595 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 17:19:47,343 - root - INFO - lr: 1.0977e-05 gnorm: 1.12 [18:45:36< 5:46:00] +[titan] 2025-10-05 17:19:56,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:19:58,231 - root - INFO - step: 30600 loss: 2.0557 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 17:19:58,231 - root - INFO - lr: 1.0971e-05 gnorm: 1.17 [18:45:47< 5:45:49] +[titan] 2025-10-05 17:20:09,100 - root - INFO - step: 30605 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:20:09,100 - root - INFO - lr: 1.0965e-05 gnorm: 1.15 [18:45:58< 5:45:38] +[titan] 2025-10-05 17:20:19,957 - root - INFO - step: 30610 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 17:20:19,957 - root - INFO - lr: 1.0959e-05 gnorm: 1.11 [18:46:08< 5:45:27] +[titan] 2025-10-05 17:20:30,886 - root - INFO - step: 30615 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 17:20:30,886 - root - INFO - lr: 1.0953e-05 gnorm: 1.14 [18:46:19< 5:45:16] +[titan] 2025-10-05 17:20:41,762 - root - INFO - step: 30620 loss: 1.9612 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:20:41,763 - root - INFO - lr: 1.0947e-05 gnorm: 1.19 [18:46:30< 5:45:05] +[titan] 2025-10-05 17:20:52,672 - root - INFO - step: 30625 loss: 1.9688 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.13% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7412 +[titan] 2025-10-05 17:20:52,672 - root - INFO - lr: 1.0941e-05 gnorm: 1.14 [18:46:41< 5:44:54] +[titan] 2025-10-05 17:21:03,551 - root - INFO - step: 30630 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 17:21:03,551 - root - INFO - lr: 1.0935e-05 gnorm: 1.13 [18:46:52< 5:44:43] +[titan] 2025-10-05 17:21:14,413 - root - INFO - step: 30635 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 17:21:14,413 - root - INFO - lr: 1.0929e-05 gnorm: 1.13 [18:47:03< 5:44:32] +[titan] 2025-10-05 17:21:25,276 - root - INFO - step: 30640 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 17:21:25,276 - root - INFO - lr: 1.0923e-05 gnorm: 1.18 [18:47:14< 5:44:21] +[titan] 2025-10-05 17:21:36,129 - root - INFO - step: 30645 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 17:21:36,129 - root - INFO - lr: 1.0917e-05 gnorm: 1.13 [18:47:25< 5:44:10] +[titan] 2025-10-05 17:21:44,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:21:47,049 - root - INFO - step: 30650 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:21:47,049 - root - INFO - lr: 1.0911e-05 gnorm: 1.12 [18:47:36< 5:43:58] +[titan] 2025-10-05 17:21:57,919 - root - INFO - step: 30655 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 17:21:57,919 - root - INFO - lr: 1.0905e-05 gnorm: 1.17 [18:47:46< 5:43:47] +[titan] 2025-10-05 17:22:08,772 - root - INFO - step: 30660 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:22:08,772 - root - INFO - lr: 1.0899e-05 gnorm: 1.14 [18:47:57< 5:43:36] +[titan] 2025-10-05 17:22:19,639 - root - INFO - step: 30665 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7649 +[titan] 2025-10-05 17:22:19,639 - root - INFO - lr: 1.0893e-05 gnorm: 1.17 [18:48:08< 5:43:25] +[titan] 2025-10-05 17:22:30,511 - root - INFO - step: 30670 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 17:22:30,511 - root - INFO - lr: 1.0887e-05 gnorm: 1.15 [18:48:19< 5:43:14] +[titan] 2025-10-05 17:22:41,385 - root - INFO - step: 30675 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 17:22:41,385 - root - INFO - lr: 1.0881e-05 gnorm: 1.13 [18:48:30< 5:43:03] +[titan] 2025-10-05 17:22:52,312 - root - INFO - step: 30680 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:22:52,312 - root - INFO - lr: 1.0875e-05 gnorm: 1.15 [18:48:41< 5:42:52] +[titan] 2025-10-05 17:23:03,165 - root - INFO - step: 30685 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 17:23:03,165 - root - INFO - lr: 1.0869e-05 gnorm: 1.13 [18:48:52< 5:42:41] +[titan] 2025-10-05 17:23:14,020 - root - INFO - step: 30690 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7825 +[titan] 2025-10-05 17:23:14,020 - root - INFO - lr: 1.0863e-05 gnorm: 1.14 [18:49:03< 5:42:30] +[titan] 2025-10-05 17:23:24,876 - root - INFO - step: 30695 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:23:24,876 - root - INFO - lr: 1.0857e-05 gnorm: 1.15 [18:49:13< 5:42:19] +[titan] 2025-10-05 17:23:33,557 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:23:35,744 - root - INFO - step: 30700 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:23:35,744 - root - INFO - lr: 1.0851e-05 gnorm: 1.12 [18:49:24< 5:42:08] +[titan] 2025-10-05 17:23:46,630 - root - INFO - step: 30705 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 17:23:46,630 - root - INFO - lr: 1.0845e-05 gnorm: 1.14 [18:49:35< 5:41:56] +[titan] 2025-10-05 17:23:57,506 - root - INFO - step: 30710 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:23:57,506 - root - INFO - lr: 1.0839e-05 gnorm: 1.17 [18:49:46< 5:41:45] +[titan] 2025-10-05 17:24:08,364 - root - INFO - step: 30715 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:24:08,364 - root - INFO - lr: 1.0833e-05 gnorm: 1.13 [18:49:57< 5:41:34] +[titan] 2025-10-05 17:24:19,332 - root - INFO - step: 30720 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 29,876 tflops: 414.48 mfu: 41.91% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7148 +[titan] 2025-10-05 17:24:19,332 - root - INFO - lr: 1.0827e-05 gnorm: 1.11 [18:50:08< 5:41:23] +[titan] 2025-10-05 17:24:19,513 - root - INFO - Dumping profiler traces at step 30720 +[titan] 2025-10-05 17:24:19,551 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:24:30,456 - root - INFO - step: 30725 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 29,458 tflops: 408.69 mfu: 41.32% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 17:24:30,456 - root - INFO - lr: 1.0821e-05 gnorm: 1.13 [18:50:19< 5:41:12] +[titan] 2025-10-05 17:24:41,338 - root - INFO - step: 30730 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 17:24:41,338 - root - INFO - lr: 1.0815e-05 gnorm: 1.11 [18:50:30< 5:41:01] +[titan] 2025-10-05 17:24:52,229 - root - INFO - step: 30735 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7376 +[titan] 2025-10-05 17:24:52,229 - root - INFO - lr: 1.0809e-05 gnorm: 1.15 [18:50:41< 5:40:50] +[titan] 2025-10-05 17:25:03,105 - root - INFO - step: 30740 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7807 +[titan] 2025-10-05 17:25:03,105 - root - INFO - lr: 1.0803e-05 gnorm: 1.23 [18:50:52< 5:40:39] +[titan] 2025-10-05 17:25:13,996 - root - INFO - step: 30745 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:25:13,996 - root - INFO - lr: 1.0797e-05 gnorm: 1.16 [18:51:02< 5:40:28] +[titan] 2025-10-05 17:25:22,693 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:25:24,892 - root - INFO - step: 30750 loss: 2.0403 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 17:25:24,892 - root - INFO - lr: 1.0791e-05 gnorm: 1.21 [18:51:13< 5:40:17] +[titan] 2025-10-05 17:25:35,755 - root - INFO - step: 30755 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 17:25:35,755 - root - INFO - lr: 1.0785e-05 gnorm: 1.15 [18:51:24< 5:40:06] +[titan] 2025-10-05 17:25:46,627 - root - INFO - step: 30760 loss: 1.9424 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:25:46,627 - root - INFO - lr: 1.0779e-05 gnorm: 1.14 [18:51:35< 5:39:55] +[titan] 2025-10-05 17:25:57,513 - root - INFO - step: 30765 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8128 +[titan] 2025-10-05 17:25:57,513 - root - INFO - lr: 1.0773e-05 gnorm: 1.16 [18:51:46< 5:39:44] +[titan] 2025-10-05 17:26:08,369 - root - INFO - step: 30770 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:26:08,369 - root - INFO - lr: 1.0767e-05 gnorm: 1.15 [18:51:57< 5:39:32] +[titan] 2025-10-05 17:26:19,291 - root - INFO - step: 30775 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 17:26:19,291 - root - INFO - lr: 1.0761e-05 gnorm: 1.16 [18:52:08< 5:39:21] +[titan] 2025-10-05 17:26:30,180 - root - INFO - step: 30780 loss: 1.9939 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7628 +[titan] 2025-10-05 17:26:30,180 - root - INFO - lr: 1.0755e-05 gnorm: 1.14 [18:52:19< 5:39:10] +[titan] 2025-10-05 17:26:41,064 - root - INFO - step: 30785 loss: 2.0227 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 17:26:41,065 - root - INFO - lr: 1.0749e-05 gnorm: 1.14 [18:52:30< 5:38:59] +[titan] 2025-10-05 17:26:51,961 - root - INFO - step: 30790 loss: 1.9654 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:26:51,961 - root - INFO - lr: 1.0743e-05 gnorm: 1.11 [18:52:40< 5:38:48] +[titan] 2025-10-05 17:27:02,841 - root - INFO - step: 30795 loss: 2.0724 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 17:27:02,841 - root - INFO - lr: 1.0737e-05 gnorm: 1.16 [18:52:51< 5:38:37] +[titan] 2025-10-05 17:27:11,522 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:27:13,697 - root - INFO - step: 30800 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 17:27:13,697 - root - INFO - lr: 1.0731e-05 gnorm: 1.14 [18:53:02< 5:38:26] +[titan] 2025-10-05 17:27:24,566 - root - INFO - step: 30805 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 17:27:24,566 - root - INFO - lr: 1.0725e-05 gnorm: 1.15 [18:53:13< 5:38:15] +[titan] 2025-10-05 17:27:35,469 - root - INFO - step: 30810 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 17:27:35,469 - root - INFO - lr: 1.0719e-05 gnorm: 1.14 [18:53:24< 5:38:04] +[titan] 2025-10-05 17:27:46,339 - root - INFO - step: 30815 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 17:27:46,340 - root - INFO - lr: 1.0713e-05 gnorm: 1.16 [18:53:35< 5:37:53] +[titan] 2025-10-05 17:27:57,238 - root - INFO - step: 30820 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 17:27:57,239 - root - INFO - lr: 1.0707e-05 gnorm: 1.12 [18:53:46< 5:37:42] +[titan] 2025-10-05 17:28:08,105 - root - INFO - step: 30825 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 17:28:08,105 - root - INFO - lr: 1.0702e-05 gnorm: 1.12 [18:53:57< 5:37:31] +[titan] 2025-10-05 17:28:18,970 - root - INFO - step: 30830 loss: 1.8472 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6336 +[titan] 2025-10-05 17:28:18,971 - root - INFO - lr: 1.0696e-05 gnorm: 1.15 [18:54:07< 5:37:20] +[titan] 2025-10-05 17:28:29,843 - root - INFO - step: 30835 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:28:29,844 - root - INFO - lr: 1.0690e-05 gnorm: 1.13 [18:54:18< 5:37:08] +[titan] 2025-10-05 17:28:40,744 - root - INFO - step: 30840 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 17:28:40,744 - root - INFO - lr: 1.0684e-05 gnorm: 1.13 [18:54:29< 5:36:57] +[titan] 2025-10-05 17:28:51,648 - root - INFO - step: 30845 loss: 1.9017 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6818 +[titan] 2025-10-05 17:28:51,648 - root - INFO - lr: 1.0678e-05 gnorm: 1.14 [18:54:40< 5:36:46] +[titan] 2025-10-05 17:29:00,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:29:02,544 - root - INFO - step: 30850 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 17:29:02,544 - root - INFO - lr: 1.0672e-05 gnorm: 1.15 [18:54:51< 5:36:35] +[titan] 2025-10-05 17:29:13,430 - root - INFO - step: 30855 loss: 1.9892 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:13,431 - root - INFO - lr: 1.0666e-05 gnorm: 1.16 [18:55:02< 5:36:24] +[titan] 2025-10-05 17:29:24,310 - root - INFO - step: 30860 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 17:29:24,310 - root - INFO - lr: 1.0660e-05 gnorm: 1.12 [18:55:13< 5:36:13] +[titan] 2025-10-05 17:29:35,178 - root - INFO - step: 30865 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:35,178 - root - INFO - lr: 1.0654e-05 gnorm: 1.16 [18:55:24< 5:36:02] +[titan] 2025-10-05 17:29:46,070 - root - INFO - step: 30870 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 17:29:46,070 - root - INFO - lr: 1.0648e-05 gnorm: 1.13 [18:55:35< 5:35:51] +[titan] 2025-10-05 17:29:56,949 - root - INFO - step: 30875 loss: 1.9562 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7292 +[titan] 2025-10-05 17:29:56,949 - root - INFO - lr: 1.0642e-05 gnorm: 1.14 [18:55:45< 5:35:40] +[titan] 2025-10-05 17:30:07,804 - root - INFO - step: 30880 loss: 2.0097 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 17:30:07,804 - root - INFO - lr: 1.0636e-05 gnorm: 1.15 [18:55:56< 5:35:29] +[titan] 2025-10-05 17:30:18,658 - root - INFO - step: 30885 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 17:30:18,658 - root - INFO - lr: 1.0630e-05 gnorm: 1.17 [18:56:07< 5:35:18] +[titan] 2025-10-05 17:30:29,536 - root - INFO - step: 30890 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 17:30:29,536 - root - INFO - lr: 1.0625e-05 gnorm: 1.16 [18:56:18< 5:35:07] +[titan] 2025-10-05 17:30:40,429 - root - INFO - step: 30895 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7591 +[titan] 2025-10-05 17:30:40,429 - root - INFO - lr: 1.0619e-05 gnorm: 1.14 [18:56:29< 5:34:55] +[titan] 2025-10-05 17:30:49,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:30:51,303 - root - INFO - step: 30900 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 17:30:51,303 - root - INFO - lr: 1.0613e-05 gnorm: 1.15 [18:56:40< 5:34:44] +[titan] 2025-10-05 17:31:02,242 - root - INFO - step: 30905 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:31:02,242 - root - INFO - lr: 1.0607e-05 gnorm: 1.14 [18:56:51< 5:34:33] +[titan] 2025-10-05 17:31:13,130 - root - INFO - step: 30910 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 17:31:13,130 - root - INFO - lr: 1.0601e-05 gnorm: 1.25 [18:57:02< 5:34:22] +[titan] 2025-10-05 17:31:24,016 - root - INFO - step: 30915 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7966 +[titan] 2025-10-05 17:31:24,017 - root - INFO - lr: 1.0595e-05 gnorm: 1.13 [18:57:12< 5:34:11] +[titan] 2025-10-05 17:31:34,902 - root - INFO - step: 30920 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 17:31:34,903 - root - INFO - lr: 1.0589e-05 gnorm: 1.11 [18:57:23< 5:34:00] +[titan] 2025-10-05 17:31:45,757 - root - INFO - step: 30925 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:31:45,758 - root - INFO - lr: 1.0583e-05 gnorm: 1.14 [18:57:34< 5:33:49] +[titan] 2025-10-05 17:31:56,639 - root - INFO - step: 30930 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:31:56,639 - root - INFO - lr: 1.0577e-05 gnorm: 1.15 [18:57:45< 5:33:38] +[titan] 2025-10-05 17:32:07,510 - root - INFO - step: 30935 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 17:32:07,510 - root - INFO - lr: 1.0572e-05 gnorm: 1.14 [18:57:56< 5:33:27] +[titan] 2025-10-05 17:32:18,361 - root - INFO - step: 30940 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 17:32:18,361 - root - INFO - lr: 1.0566e-05 gnorm: 1.16 [18:58:07< 5:33:16] +[titan] 2025-10-05 17:32:29,229 - root - INFO - step: 30945 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:32:29,229 - root - INFO - lr: 1.0560e-05 gnorm: 1.17 [18:58:18< 5:33:05] +[titan] 2025-10-05 17:32:37,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:32:40,069 - root - INFO - step: 30950 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:32:40,069 - root - INFO - lr: 1.0554e-05 gnorm: 1.13 [18:58:29< 5:32:54] +[titan] 2025-10-05 17:32:50,918 - root - INFO - step: 30955 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 17:32:50,918 - root - INFO - lr: 1.0548e-05 gnorm: 1.15 [18:58:39< 5:32:42] +[titan] 2025-10-05 17:33:01,839 - root - INFO - step: 30960 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6694 +[titan] 2025-10-05 17:33:01,840 - root - INFO - lr: 1.0542e-05 gnorm: 1.11 [18:58:50< 5:32:31] +[titan] 2025-10-05 17:33:12,698 - root - INFO - step: 30965 loss: 1.9487 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 17:33:12,698 - root - INFO - lr: 1.0536e-05 gnorm: 1.13 [18:59:01< 5:32:20] +[titan] 2025-10-05 17:33:23,587 - root - INFO - step: 30970 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7005 +[titan] 2025-10-05 17:33:23,587 - root - INFO - lr: 1.0530e-05 gnorm: 1.17 [18:59:12< 5:32:09] +[titan] 2025-10-05 17:33:34,467 - root - INFO - step: 30975 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 17:33:34,467 - root - INFO - lr: 1.0525e-05 gnorm: 1.19 [18:59:23< 5:31:58] +[titan] 2025-10-05 17:33:45,329 - root - INFO - step: 30980 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 17:33:45,329 - root - INFO - lr: 1.0519e-05 gnorm: 1.21 [18:59:34< 5:31:47] +[titan] 2025-10-05 17:33:56,227 - root - INFO - step: 30985 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7673 +[titan] 2025-10-05 17:33:56,227 - root - INFO - lr: 1.0513e-05 gnorm: 1.14 [18:59:45< 5:31:36] +[titan] 2025-10-05 17:34:07,068 - root - INFO - step: 30990 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 17:34:07,068 - root - INFO - lr: 1.0507e-05 gnorm: 1.14 [18:59:56< 5:31:25] +[titan] 2025-10-05 17:34:17,920 - root - INFO - step: 30995 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 17:34:17,920 - root - INFO - lr: 1.0501e-05 gnorm: 1.13 [19:00:06< 5:31:14] +[titan] 2025-10-05 17:34:26,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:34:28,785 - root - INFO - step: 31000 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:34:28,785 - root - INFO - lr: 1.0495e-05 gnorm: 1.14 [19:00:17< 5:31:03] +[titan] 2025-10-05 17:34:39,677 - root - INFO - step: 31005 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 17:34:39,677 - root - INFO - lr: 1.0490e-05 gnorm: 1.12 [19:00:28< 5:30:52] +[titan] 2025-10-05 17:34:50,557 - root - INFO - step: 31010 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 17:34:50,557 - root - INFO - lr: 1.0484e-05 gnorm: 1.13 [19:00:39< 5:30:41] +[titan] 2025-10-05 17:35:01,441 - root - INFO - step: 31015 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 17:35:01,441 - root - INFO - lr: 1.0478e-05 gnorm: 1.11 [19:00:50< 5:30:29] +[titan] 2025-10-05 17:35:12,298 - root - INFO - step: 31020 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 17:35:12,298 - root - INFO - lr: 1.0472e-05 gnorm: 1.13 [19:01:01< 5:30:18] +[titan] 2025-10-05 17:35:23,148 - root - INFO - step: 31025 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 17:35:23,149 - root - INFO - lr: 1.0466e-05 gnorm: 1.19 [19:01:12< 5:30:07] +[titan] 2025-10-05 17:35:34,041 - root - INFO - step: 31030 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:35:34,042 - root - INFO - lr: 1.0460e-05 gnorm: 1.14 [19:01:23< 5:29:56] +[titan] 2025-10-05 17:35:44,917 - root - INFO - step: 31035 loss: 2.0130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 17:35:44,917 - root - INFO - lr: 1.0455e-05 gnorm: 1.15 [19:01:33< 5:29:45] +[titan] 2025-10-05 17:35:55,789 - root - INFO - step: 31040 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 17:35:55,789 - root - INFO - lr: 1.0449e-05 gnorm: 1.14 [19:01:44< 5:29:34] +[titan] 2025-10-05 17:36:06,662 - root - INFO - step: 31045 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 17:36:06,663 - root - INFO - lr: 1.0443e-05 gnorm: 1.12 [19:01:55< 5:29:23] +[titan] 2025-10-05 17:36:15,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:36:17,541 - root - INFO - step: 31050 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 17:36:17,541 - root - INFO - lr: 1.0437e-05 gnorm: 1.15 [19:02:06< 5:29:12] +[titan] 2025-10-05 17:36:28,426 - root - INFO - step: 31055 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:36:28,426 - root - INFO - lr: 1.0431e-05 gnorm: 1.15 [19:02:17< 5:29:01] +[titan] 2025-10-05 17:36:39,289 - root - INFO - step: 31060 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 17:36:39,290 - root - INFO - lr: 1.0425e-05 gnorm: 1.14 [19:02:28< 5:28:50] +[titan] 2025-10-05 17:36:50,187 - root - INFO - step: 31065 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 17:36:50,187 - root - INFO - lr: 1.0420e-05 gnorm: 1.16 [19:02:39< 5:28:39] +[titan] 2025-10-05 17:37:01,103 - root - INFO - step: 31070 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 17:37:01,103 - root - INFO - lr: 1.0414e-05 gnorm: 1.19 [19:02:50< 5:28:28] +[titan] 2025-10-05 17:37:11,969 - root - INFO - step: 31075 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:37:11,969 - root - INFO - lr: 1.0408e-05 gnorm: 1.16 [19:03:00< 5:28:17] +[titan] 2025-10-05 17:37:22,843 - root - INFO - step: 31080 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 17:37:22,843 - root - INFO - lr: 1.0402e-05 gnorm: 1.15 [19:03:11< 5:28:05] +[titan] 2025-10-05 17:37:33,710 - root - INFO - step: 31085 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:37:33,710 - root - INFO - lr: 1.0396e-05 gnorm: 1.14 [19:03:22< 5:27:54] +[titan] 2025-10-05 17:37:44,589 - root - INFO - step: 31090 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 17:37:44,589 - root - INFO - lr: 1.0391e-05 gnorm: 1.15 [19:03:33< 5:27:43] +[titan] 2025-10-05 17:37:55,476 - root - INFO - step: 31095 loss: 1.9001 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6797 +[titan] 2025-10-05 17:37:55,476 - root - INFO - lr: 1.0385e-05 gnorm: 1.14 [19:03:44< 5:27:32] +[titan] 2025-10-05 17:38:04,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:38:06,373 - root - INFO - step: 31100 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:38:06,373 - root - INFO - lr: 1.0379e-05 gnorm: 1.18 [19:03:55< 5:27:21] +[titan] 2025-10-05 17:38:17,276 - root - INFO - step: 31105 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:38:17,277 - root - INFO - lr: 1.0373e-05 gnorm: 1.12 [19:04:06< 5:27:10] +[titan] 2025-10-05 17:38:28,149 - root - INFO - step: 31110 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 17:38:28,149 - root - INFO - lr: 1.0367e-05 gnorm: 1.11 [19:04:17< 5:26:59] +[titan] 2025-10-05 17:38:39,025 - root - INFO - step: 31115 loss: 1.9815 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 17:38:39,025 - root - INFO - lr: 1.0362e-05 gnorm: 1.15 [19:04:27< 5:26:48] +[titan] 2025-10-05 17:38:49,892 - root - INFO - step: 31120 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:38:49,892 - root - INFO - lr: 1.0356e-05 gnorm: 1.13 [19:04:38< 5:26:37] +[titan] 2025-10-05 17:39:00,809 - root - INFO - step: 31125 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 17:39:00,809 - root - INFO - lr: 1.0350e-05 gnorm: 1.12 [19:04:49< 5:26:26] +[titan] 2025-10-05 17:39:11,704 - root - INFO - step: 31130 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 17:39:11,705 - root - INFO - lr: 1.0344e-05 gnorm: 1.13 [19:05:00< 5:26:15] +[titan] 2025-10-05 17:39:22,594 - root - INFO - step: 31135 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:39:22,595 - root - INFO - lr: 1.0339e-05 gnorm: 1.19 [19:05:11< 5:26:04] +[titan] 2025-10-05 17:39:33,466 - root - INFO - step: 31140 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7578 +[titan] 2025-10-05 17:39:33,467 - root - INFO - lr: 1.0333e-05 gnorm: 1.15 [19:05:22< 5:25:53] +[titan] 2025-10-05 17:39:44,337 - root - INFO - step: 31145 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 17:39:44,337 - root - INFO - lr: 1.0327e-05 gnorm: 1.16 [19:05:33< 5:25:41] +[titan] 2025-10-05 17:39:53,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:39:55,199 - root - INFO - step: 31150 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7763 +[titan] 2025-10-05 17:39:55,199 - root - INFO - lr: 1.0321e-05 gnorm: 1.14 [19:05:44< 5:25:30] +[titan] 2025-10-05 17:40:06,057 - root - INFO - step: 31155 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 17:40:06,057 - root - INFO - lr: 1.0315e-05 gnorm: 1.17 [19:05:55< 5:25:19] +[titan] 2025-10-05 17:40:16,910 - root - INFO - step: 31160 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 17:40:16,910 - root - INFO - lr: 1.0310e-05 gnorm: 1.10 [19:06:05< 5:25:08] +[titan] 2025-10-05 17:40:27,753 - root - INFO - step: 31165 loss: 1.8951 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6755 +[titan] 2025-10-05 17:40:27,753 - root - INFO - lr: 1.0304e-05 gnorm: 1.16 [19:06:16< 5:24:57] +[titan] 2025-10-05 17:40:38,617 - root - INFO - step: 31170 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 17:40:38,617 - root - INFO - lr: 1.0298e-05 gnorm: 1.17 [19:06:27< 5:24:46] +[titan] 2025-10-05 17:40:49,491 - root - INFO - step: 31175 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7707 +[titan] 2025-10-05 17:40:49,491 - root - INFO - lr: 1.0292e-05 gnorm: 1.18 [19:06:38< 5:24:35] +[titan] 2025-10-05 17:41:00,364 - root - INFO - step: 31180 loss: 2.0114 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 17:41:00,365 - root - INFO - lr: 1.0287e-05 gnorm: 1.12 [19:06:49< 5:24:24] +[titan] 2025-10-05 17:41:11,255 - root - INFO - step: 31185 loss: 2.0026 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 17:41:11,255 - root - INFO - lr: 1.0281e-05 gnorm: 1.19 [19:07:00< 5:24:13] +[titan] 2025-10-05 17:41:22,116 - root - INFO - step: 31190 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:41:22,116 - root - INFO - lr: 1.0275e-05 gnorm: 1.10 [19:07:11< 5:24:02] +[titan] 2025-10-05 17:41:32,999 - root - INFO - step: 31195 loss: 1.9088 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 17:41:32,999 - root - INFO - lr: 1.0269e-05 gnorm: 1.13 [19:07:21< 5:23:51] +[titan] 2025-10-05 17:41:41,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:41:43,892 - root - INFO - step: 31200 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:41:43,892 - root - INFO - lr: 1.0264e-05 gnorm: 1.13 [19:07:32< 5:23:40] +[titan] 2025-10-05 17:41:54,767 - root - INFO - step: 31205 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 17:41:54,767 - root - INFO - lr: 1.0258e-05 gnorm: 1.13 [19:07:43< 5:23:28] +[titan] 2025-10-05 17:42:05,616 - root - INFO - step: 31210 loss: 1.9827 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 17:42:05,617 - root - INFO - lr: 1.0252e-05 gnorm: 1.12 [19:07:54< 5:23:17] +[titan] 2025-10-05 17:42:16,473 - root - INFO - step: 31215 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:42:16,473 - root - INFO - lr: 1.0247e-05 gnorm: 1.17 [19:08:05< 5:23:06] +[titan] 2025-10-05 17:42:27,363 - root - INFO - step: 31220 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:42:27,363 - root - INFO - lr: 1.0241e-05 gnorm: 1.16 [19:08:16< 5:22:55] +[titan] 2025-10-05 17:42:38,236 - root - INFO - step: 31225 loss: 1.8762 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 17:42:38,237 - root - INFO - lr: 1.0235e-05 gnorm: 1.18 [19:08:27< 5:22:44] +[titan] 2025-10-05 17:42:49,232 - root - INFO - step: 31230 loss: 2.0595 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.45 mfu: 41.80% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 17:42:49,232 - root - INFO - lr: 1.0229e-05 gnorm: 1.23 [19:08:38< 5:22:33] +[titan] 2025-10-05 17:42:53,764 - root - INFO - Dumping profiler traces at step 31232 +[titan] 2025-10-05 17:42:53,804 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:43:00,360 - root - INFO - step: 31235 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 29,447 tflops: 408.54 mfu: 41.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 17:43:00,360 - root - INFO - lr: 1.0224e-05 gnorm: 1.10 [19:08:49< 5:22:22] +[titan] 2025-10-05 17:43:11,236 - root - INFO - step: 31240 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 17:43:11,236 - root - INFO - lr: 1.0218e-05 gnorm: 1.12 [19:09:00< 5:22:11] +[titan] 2025-10-05 17:43:22,106 - root - INFO - step: 31245 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 17:43:22,107 - root - INFO - lr: 1.0212e-05 gnorm: 1.14 [19:09:11< 5:22:00] +[titan] 2025-10-05 17:43:30,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:43:32,976 - root - INFO - step: 31250 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:43:32,977 - root - INFO - lr: 1.0207e-05 gnorm: 1.15 [19:09:21< 5:21:49] +[titan] 2025-10-05 17:43:43,850 - root - INFO - step: 31255 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:43:43,850 - root - INFO - lr: 1.0201e-05 gnorm: 1.17 [19:09:32< 5:21:38] +[titan] 2025-10-05 17:43:54,726 - root - INFO - step: 31260 loss: 2.0422 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:43:54,727 - root - INFO - lr: 1.0195e-05 gnorm: 1.16 [19:09:43< 5:21:27] +[titan] 2025-10-05 17:44:05,648 - root - INFO - step: 31265 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:44:05,649 - root - INFO - lr: 1.0189e-05 gnorm: 1.18 [19:09:54< 5:21:16] +[titan] 2025-10-05 17:44:16,493 - root - INFO - step: 31270 loss: 1.9624 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 17:44:16,493 - root - INFO - lr: 1.0184e-05 gnorm: 1.11 [19:10:05< 5:21:05] +[titan] 2025-10-05 17:44:27,352 - root - INFO - step: 31275 loss: 1.9671 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 17:44:27,352 - root - INFO - lr: 1.0178e-05 gnorm: 1.17 [19:10:16< 5:20:53] +[titan] 2025-10-05 17:44:38,191 - root - INFO - step: 31280 loss: 1.9559 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:44:38,191 - root - INFO - lr: 1.0172e-05 gnorm: 1.11 [19:10:27< 5:20:42] +[titan] 2025-10-05 17:44:49,058 - root - INFO - step: 31285 loss: 2.0070 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 17:44:49,058 - root - INFO - lr: 1.0167e-05 gnorm: 1.16 [19:10:37< 5:20:31] +[titan] 2025-10-05 17:44:59,922 - root - INFO - step: 31290 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 17:44:59,922 - root - INFO - lr: 1.0161e-05 gnorm: 1.16 [19:10:48< 5:20:20] +[titan] 2025-10-05 17:45:10,831 - root - INFO - step: 31295 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 17:45:10,831 - root - INFO - lr: 1.0155e-05 gnorm: 1.14 [19:10:59< 5:20:09] +[titan] 2025-10-05 17:45:19,501 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:45:21,690 - root - INFO - step: 31300 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:45:21,690 - root - INFO - lr: 1.0150e-05 gnorm: 1.13 [19:11:10< 5:19:58] +[titan] 2025-10-05 17:45:32,589 - root - INFO - step: 31305 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 17:45:32,590 - root - INFO - lr: 1.0144e-05 gnorm: 1.14 [19:11:21< 5:19:47] +[titan] 2025-10-05 17:45:43,458 - root - INFO - step: 31310 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 17:45:43,459 - root - INFO - lr: 1.0138e-05 gnorm: 1.16 [19:11:32< 5:19:36] +[titan] 2025-10-05 17:45:54,309 - root - INFO - step: 31315 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7610 +[titan] 2025-10-05 17:45:54,309 - root - INFO - lr: 1.0133e-05 gnorm: 1.15 [19:11:43< 5:19:25] +[titan] 2025-10-05 17:46:05,142 - root - INFO - step: 31320 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 17:46:05,142 - root - INFO - lr: 1.0127e-05 gnorm: 1.11 [19:11:54< 5:19:14] +[titan] 2025-10-05 17:46:16,012 - root - INFO - step: 31325 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 17:46:16,012 - root - INFO - lr: 1.0121e-05 gnorm: 1.22 [19:12:04< 5:19:03] +[titan] 2025-10-05 17:46:26,886 - root - INFO - step: 31330 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 17:46:26,886 - root - INFO - lr: 1.0116e-05 gnorm: 1.16 [19:12:15< 5:18:52] +[titan] 2025-10-05 17:46:37,770 - root - INFO - step: 31335 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 17:46:37,770 - root - INFO - lr: 1.0110e-05 gnorm: 1.14 [19:12:26< 5:18:40] +[titan] 2025-10-05 17:46:48,608 - root - INFO - step: 31340 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 17:46:48,608 - root - INFO - lr: 1.0104e-05 gnorm: 1.12 [19:12:37< 5:18:29] +[titan] 2025-10-05 17:46:59,446 - root - INFO - step: 31345 loss: 1.9908 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 17:46:59,446 - root - INFO - lr: 1.0099e-05 gnorm: 1.14 [19:12:48< 5:18:18] +[titan] 2025-10-05 17:47:08,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:47:10,307 - root - INFO - step: 31350 loss: 2.0078 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7753 +[titan] 2025-10-05 17:47:10,307 - root - INFO - lr: 1.0093e-05 gnorm: 1.18 [19:12:59< 5:18:07] +[titan] 2025-10-05 17:47:21,149 - root - INFO - step: 31355 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 17:47:21,149 - root - INFO - lr: 1.0087e-05 gnorm: 1.14 [19:13:10< 5:17:56] +[titan] 2025-10-05 17:47:32,020 - root - INFO - step: 31360 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 17:47:32,020 - root - INFO - lr: 1.0082e-05 gnorm: 1.16 [19:13:20< 5:17:45] +[titan] 2025-10-05 17:47:42,860 - root - INFO - step: 31365 loss: 2.0383 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 17:47:42,861 - root - INFO - lr: 1.0076e-05 gnorm: 1.15 [19:13:31< 5:17:34] +[titan] 2025-10-05 17:47:53,707 - root - INFO - step: 31370 loss: 2.0511 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 17:47:53,708 - root - INFO - lr: 1.0070e-05 gnorm: 1.16 [19:13:42< 5:17:23] +[titan] 2025-10-05 17:48:04,561 - root - INFO - step: 31375 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7875 +[titan] 2025-10-05 17:48:04,561 - root - INFO - lr: 1.0065e-05 gnorm: 1.20 [19:13:53< 5:17:12] +[titan] 2025-10-05 17:48:15,405 - root - INFO - step: 31380 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:48:15,405 - root - INFO - lr: 1.0059e-05 gnorm: 1.15 [19:14:04< 5:17:01] +[titan] 2025-10-05 17:48:26,264 - root - INFO - step: 31385 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 17:48:26,265 - root - INFO - lr: 1.0053e-05 gnorm: 1.15 [19:14:15< 5:16:50] +[titan] 2025-10-05 17:48:37,141 - root - INFO - step: 31390 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 17:48:37,141 - root - INFO - lr: 1.0048e-05 gnorm: 1.19 [19:14:26< 5:16:39] +[titan] 2025-10-05 17:48:47,988 - root - INFO - step: 31395 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 17:48:47,988 - root - INFO - lr: 1.0042e-05 gnorm: 1.17 [19:14:36< 5:16:27] +[titan] 2025-10-05 17:48:56,653 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:48:58,841 - root - INFO - step: 31400 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 17:48:58,841 - root - INFO - lr: 1.0036e-05 gnorm: 1.19 [19:14:47< 5:16:16] +[titan] 2025-10-05 17:49:09,687 - root - INFO - step: 31405 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:49:09,687 - root - INFO - lr: 1.0031e-05 gnorm: 1.19 [19:14:58< 5:16:05] +[titan] 2025-10-05 17:49:20,527 - root - INFO - step: 31410 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 17:49:20,527 - root - INFO - lr: 1.0025e-05 gnorm: 1.12 [19:15:09< 5:15:54] +[titan] 2025-10-05 17:49:31,368 - root - INFO - step: 31415 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:49:31,368 - root - INFO - lr: 1.0020e-05 gnorm: 1.12 [19:15:20< 5:15:43] +[titan] 2025-10-05 17:49:42,213 - root - INFO - step: 31420 loss: 1.9250 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7033 +[titan] 2025-10-05 17:49:42,213 - root - INFO - lr: 1.0014e-05 gnorm: 1.15 [19:15:31< 5:15:32] +[titan] 2025-10-05 17:49:53,106 - root - INFO - step: 31425 loss: 1.9352 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 17:49:53,106 - root - INFO - lr: 1.0008e-05 gnorm: 1.16 [19:15:42< 5:15:21] +[titan] 2025-10-05 17:50:03,949 - root - INFO - step: 31430 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:50:03,949 - root - INFO - lr: 1.0003e-05 gnorm: 1.17 [19:15:52< 5:15:10] +[titan] 2025-10-05 17:50:14,831 - root - INFO - step: 31435 loss: 1.9571 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 17:50:14,832 - root - INFO - lr: 9.9971e-06 gnorm: 1.15 [19:16:03< 5:14:59] +[titan] 2025-10-05 17:50:25,694 - root - INFO - step: 31440 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 17:50:25,694 - root - INFO - lr: 9.9915e-06 gnorm: 1.18 [19:16:14< 5:14:48] +[titan] 2025-10-05 17:50:36,538 - root - INFO - step: 31445 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 17:50:36,538 - root - INFO - lr: 9.9859e-06 gnorm: 2.11 [19:16:25< 5:14:37] +[titan] 2025-10-05 17:50:45,221 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:50:47,411 - root - INFO - step: 31450 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:50:47,411 - root - INFO - lr: 9.9803e-06 gnorm: 1.16 [19:16:36< 5:14:26] +[titan] 2025-10-05 17:50:58,315 - root - INFO - step: 31455 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:50:58,315 - root - INFO - lr: 9.9747e-06 gnorm: 1.15 [19:16:47< 5:14:15] +[titan] 2025-10-05 17:51:09,156 - root - INFO - step: 31460 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7552 +[titan] 2025-10-05 17:51:09,156 - root - INFO - lr: 9.9691e-06 gnorm: 1.20 [19:16:58< 5:14:03] +[titan] 2025-10-05 17:51:20,027 - root - INFO - step: 31465 loss: 2.0529 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 17:51:20,027 - root - INFO - lr: 9.9635e-06 gnorm: 1.21 [19:17:08< 5:13:52] +[titan] 2025-10-05 17:51:30,891 - root - INFO - step: 31470 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 17:51:30,891 - root - INFO - lr: 9.9579e-06 gnorm: 1.16 [19:17:19< 5:13:41] +[titan] 2025-10-05 17:51:41,738 - root - INFO - step: 31475 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 17:51:41,738 - root - INFO - lr: 9.9524e-06 gnorm: 1.16 [19:17:30< 5:13:30] +[titan] 2025-10-05 17:51:52,590 - root - INFO - step: 31480 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 17:51:52,590 - root - INFO - lr: 9.9468e-06 gnorm: 1.15 [19:17:41< 5:13:19] +[titan] 2025-10-05 17:52:03,461 - root - INFO - step: 31485 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 17:52:03,461 - root - INFO - lr: 9.9412e-06 gnorm: 1.15 [19:17:52< 5:13:08] +[titan] 2025-10-05 17:52:14,350 - root - INFO - step: 31490 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 17:52:14,350 - root - INFO - lr: 9.9356e-06 gnorm: 1.18 [19:18:03< 5:12:57] +[titan] 2025-10-05 17:52:25,212 - root - INFO - step: 31495 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 17:52:25,212 - root - INFO - lr: 9.9300e-06 gnorm: 1.13 [19:18:14< 5:12:46] +[titan] 2025-10-05 17:52:33,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:52:36,054 - root - INFO - step: 31500 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 17:52:36,054 - root - INFO - lr: 9.9245e-06 gnorm: 1.18 [19:18:24< 5:12:35] +[titan] 2025-10-05 17:52:46,921 - root - INFO - step: 31505 loss: 1.9036 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 17:52:46,921 - root - INFO - lr: 9.9189e-06 gnorm: 1.12 [19:18:35< 5:12:24] +[titan] 2025-10-05 17:52:57,775 - root - INFO - step: 31510 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:52:57,775 - root - INFO - lr: 9.9133e-06 gnorm: 1.14 [19:18:46< 5:12:13] +[titan] 2025-10-05 17:53:08,630 - root - INFO - step: 31515 loss: 1.8954 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6762 +[titan] 2025-10-05 17:53:08,630 - root - INFO - lr: 9.9078e-06 gnorm: 1.14 [19:18:57< 5:12:02] +[titan] 2025-10-05 17:53:19,545 - root - INFO - step: 31520 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 17:53:19,545 - root - INFO - lr: 9.9022e-06 gnorm: 1.13 [19:19:08< 5:11:51] +[titan] 2025-10-05 17:53:30,414 - root - INFO - step: 31525 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:53:30,414 - root - INFO - lr: 9.8966e-06 gnorm: 1.16 [19:19:19< 5:11:39] +[titan] 2025-10-05 17:53:41,302 - root - INFO - step: 31530 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 17:53:41,302 - root - INFO - lr: 9.8911e-06 gnorm: 1.16 [19:19:30< 5:11:28] +[titan] 2025-10-05 17:53:52,151 - root - INFO - step: 31535 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:53:52,151 - root - INFO - lr: 9.8855e-06 gnorm: 1.18 [19:19:41< 5:11:17] +[titan] 2025-10-05 17:54:03,009 - root - INFO - step: 31540 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 17:54:03,009 - root - INFO - lr: 9.8800e-06 gnorm: 1.19 [19:19:51< 5:11:06] +[titan] 2025-10-05 17:54:13,869 - root - INFO - step: 31545 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6744 +[titan] 2025-10-05 17:54:13,869 - root - INFO - lr: 9.8744e-06 gnorm: 1.15 [19:20:02< 5:10:55] +[titan] 2025-10-05 17:54:22,535 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:54:24,751 - root - INFO - step: 31550 loss: 2.0225 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 17:54:24,751 - root - INFO - lr: 9.8689e-06 gnorm: 1.23 [19:20:13< 5:10:44] +[titan] 2025-10-05 17:54:35,610 - root - INFO - step: 31555 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 17:54:35,610 - root - INFO - lr: 9.8633e-06 gnorm: 1.15 [19:20:24< 5:10:33] +[titan] 2025-10-05 17:54:46,473 - root - INFO - step: 31560 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 17:54:46,473 - root - INFO - lr: 9.8578e-06 gnorm: 1.13 [19:20:35< 5:10:22] +[titan] 2025-10-05 17:54:57,341 - root - INFO - step: 31565 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:54:57,341 - root - INFO - lr: 9.8523e-06 gnorm: 1.14 [19:20:46< 5:10:11] +[titan] 2025-10-05 17:55:08,193 - root - INFO - step: 31570 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 17:55:08,193 - root - INFO - lr: 9.8467e-06 gnorm: 1.13 [19:20:57< 5:10:00] +[titan] 2025-10-05 17:55:19,059 - root - INFO - step: 31575 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 17:55:19,059 - root - INFO - lr: 9.8412e-06 gnorm: 1.16 [19:21:07< 5:09:49] +[titan] 2025-10-05 17:55:29,929 - root - INFO - step: 31580 loss: 2.0143 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7811 +[titan] 2025-10-05 17:55:29,929 - root - INFO - lr: 9.8357e-06 gnorm: 1.16 [19:21:18< 5:09:38] +[titan] 2025-10-05 17:55:40,810 - root - INFO - step: 31585 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 17:55:40,810 - root - INFO - lr: 9.8301e-06 gnorm: 1.18 [19:21:29< 5:09:27] +[titan] 2025-10-05 17:55:51,674 - root - INFO - step: 31590 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 17:55:51,674 - root - INFO - lr: 9.8246e-06 gnorm: 1.14 [19:21:40< 5:09:15] +[titan] 2025-10-05 17:56:02,575 - root - INFO - step: 31595 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 17:56:02,575 - root - INFO - lr: 9.8191e-06 gnorm: 1.17 [19:21:51< 5:09:04] +[titan] 2025-10-05 17:56:11,262 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:56:13,445 - root - INFO - step: 31600 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 17:56:13,445 - root - INFO - lr: 9.8136e-06 gnorm: 1.13 [19:22:02< 5:08:53] +[titan] 2025-10-05 17:56:24,318 - root - INFO - step: 31605 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:56:24,318 - root - INFO - lr: 9.8081e-06 gnorm: 1.14 [19:22:13< 5:08:42] +[titan] 2025-10-05 17:56:35,171 - root - INFO - step: 31610 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 17:56:35,172 - root - INFO - lr: 9.8025e-06 gnorm: 1.19 [19:22:24< 5:08:31] +[titan] 2025-10-05 17:56:46,087 - root - INFO - step: 31615 loss: 2.0603 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:56:46,087 - root - INFO - lr: 9.7970e-06 gnorm: 1.20 [19:22:34< 5:08:20] +[titan] 2025-10-05 17:56:56,970 - root - INFO - step: 31620 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 17:56:56,970 - root - INFO - lr: 9.7915e-06 gnorm: 1.15 [19:22:45< 5:08:09] +[titan] 2025-10-05 17:57:07,861 - root - INFO - step: 31625 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:57:07,862 - root - INFO - lr: 9.7860e-06 gnorm: 1.16 [19:22:56< 5:07:58] +[titan] 2025-10-05 17:57:18,731 - root - INFO - step: 31630 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 17:57:18,731 - root - INFO - lr: 9.7805e-06 gnorm: 1.15 [19:23:07< 5:07:47] +[titan] 2025-10-05 17:57:29,588 - root - INFO - step: 31635 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7639 +[titan] 2025-10-05 17:57:29,588 - root - INFO - lr: 9.7750e-06 gnorm: 1.13 [19:23:18< 5:07:36] +[titan] 2025-10-05 17:57:40,435 - root - INFO - step: 31640 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7360 +[titan] 2025-10-05 17:57:40,435 - root - INFO - lr: 9.7695e-06 gnorm: 1.15 [19:23:29< 5:07:25] +[titan] 2025-10-05 17:57:51,307 - root - INFO - step: 31645 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:57:51,307 - root - INFO - lr: 9.7640e-06 gnorm: 1.15 [19:23:40< 5:07:14] +[titan] 2025-10-05 17:58:00,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:58:02,211 - root - INFO - step: 31650 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 17:58:02,211 - root - INFO - lr: 9.7585e-06 gnorm: 1.14 [19:23:51< 5:07:03] +[titan] 2025-10-05 17:58:13,078 - root - INFO - step: 31655 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:58:13,078 - root - INFO - lr: 9.7531e-06 gnorm: 1.13 [19:24:01< 5:06:51] +[titan] 2025-10-05 17:58:23,966 - root - INFO - step: 31660 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:58:23,966 - root - INFO - lr: 9.7476e-06 gnorm: 1.16 [19:24:12< 5:06:40] +[titan] 2025-10-05 17:58:34,856 - root - INFO - step: 31665 loss: 1.9619 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 17:58:34,857 - root - INFO - lr: 9.7421e-06 gnorm: 1.15 [19:24:23< 5:06:29] +[titan] 2025-10-05 17:58:45,708 - root - INFO - step: 31670 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7203 +[titan] 2025-10-05 17:58:45,708 - root - INFO - lr: 9.7366e-06 gnorm: 1.17 [19:24:34< 5:06:18] +[titan] 2025-10-05 17:58:56,585 - root - INFO - step: 31675 loss: 1.9360 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7121 +[titan] 2025-10-05 17:58:56,586 - root - INFO - lr: 9.7311e-06 gnorm: 1.19 [19:24:45< 5:06:07] +[titan] 2025-10-05 17:59:07,478 - root - INFO - step: 31680 loss: 2.0159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 17:59:07,478 - root - INFO - lr: 9.7257e-06 gnorm: 1.17 [19:24:56< 5:05:56] +[titan] 2025-10-05 17:59:18,333 - root - INFO - step: 31685 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 17:59:18,333 - root - INFO - lr: 9.7202e-06 gnorm: 1.14 [19:25:07< 5:05:45] +[titan] 2025-10-05 17:59:29,202 - root - INFO - step: 31690 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 17:59:29,202 - root - INFO - lr: 9.7147e-06 gnorm: 1.15 [19:25:18< 5:05:34] +[titan] 2025-10-05 17:59:40,087 - root - INFO - step: 31695 loss: 2.0267 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 17:59:40,087 - root - INFO - lr: 9.7093e-06 gnorm: 1.18 [19:25:28< 5:05:23] +[titan] 2025-10-05 17:59:48,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:59:50,987 - root - INFO - step: 31700 loss: 2.0901 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 17:59:50,987 - root - INFO - lr: 9.7038e-06 gnorm: 1.21 [19:25:39< 5:05:12] +[titan] 2025-10-05 18:00:01,873 - root - INFO - step: 31705 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 18:00:01,873 - root - INFO - lr: 9.6983e-06 gnorm: 1.15 [19:25:50< 5:05:01] +[titan] 2025-10-05 18:00:12,786 - root - INFO - step: 31710 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 18:00:12,786 - root - INFO - lr: 9.6929e-06 gnorm: 1.19 [19:26:01< 5:04:50] +[titan] 2025-10-05 18:00:23,689 - root - INFO - step: 31715 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 18:00:23,689 - root - INFO - lr: 9.6874e-06 gnorm: 1.20 [19:26:12< 5:04:39] +[titan] 2025-10-05 18:00:34,603 - root - INFO - step: 31720 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:00:34,603 - root - INFO - lr: 9.6820e-06 gnorm: 1.17 [19:26:23< 5:04:28] +[titan] 2025-10-05 18:00:45,500 - root - INFO - step: 31725 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 18:00:45,500 - root - INFO - lr: 9.6765e-06 gnorm: 1.17 [19:26:34< 5:04:17] +[titan] 2025-10-05 18:00:56,416 - root - INFO - step: 31730 loss: 1.8776 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6601 +[titan] 2025-10-05 18:00:56,416 - root - INFO - lr: 9.6711e-06 gnorm: 1.13 [19:26:45< 5:04:05] +[titan] 2025-10-05 18:01:07,298 - root - INFO - step: 31735 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:01:07,298 - root - INFO - lr: 9.6656e-06 gnorm: 1.14 [19:26:56< 5:03:54] +[titan] 2025-10-05 18:01:18,193 - root - INFO - step: 31740 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 18:01:18,193 - root - INFO - lr: 9.6602e-06 gnorm: 1.19 [19:27:07< 5:03:43] +[titan] 2025-10-05 18:01:27,259 - root - INFO - Dumping profiler traces at step 31744 +[titan] 2025-10-05 18:01:27,298 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:01:29,483 - root - INFO - step: 31745 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 29,024 tflops: 402.66 mfu: 40.71% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:01:29,484 - root - INFO - lr: 9.6548e-06 gnorm: 1.14 [19:27:18< 5:03:32] +[titan] 2025-10-05 18:01:38,164 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:01:40,345 - root - INFO - step: 31750 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 18:01:40,345 - root - INFO - lr: 9.6493e-06 gnorm: 1.15 [19:27:29< 5:03:21] +[titan] 2025-10-05 18:01:51,212 - root - INFO - step: 31755 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 18:01:51,212 - root - INFO - lr: 9.6439e-06 gnorm: 1.16 [19:27:40< 5:03:10] +[titan] 2025-10-05 18:02:02,067 - root - INFO - step: 31760 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 18:02:02,067 - root - INFO - lr: 9.6385e-06 gnorm: 1.20 [19:27:50< 5:02:59] +[titan] 2025-10-05 18:02:12,939 - root - INFO - step: 31765 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 18:02:12,940 - root - INFO - lr: 9.6330e-06 gnorm: 1.12 [19:28:01< 5:02:48] +[titan] 2025-10-05 18:02:24,117 - root - INFO - step: 31770 loss: 1.9667 memory: 118.84GiB(85.28%) tps: 29,315 tflops: 406.70 mfu: 41.12% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 18:02:24,118 - root - INFO - lr: 9.6276e-06 gnorm: 1.16 [19:28:13< 5:02:37] +[titan] 2025-10-05 18:02:35,008 - root - INFO - step: 31775 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 18:02:35,009 - root - INFO - lr: 9.6222e-06 gnorm: 1.21 [19:28:23< 5:02:26] +[titan] 2025-10-05 18:02:45,869 - root - INFO - step: 31780 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:02:45,869 - root - INFO - lr: 9.6168e-06 gnorm: 1.16 [19:28:34< 5:02:15] +[titan] 2025-10-05 18:02:56,730 - root - INFO - step: 31785 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:02:56,730 - root - INFO - lr: 9.6114e-06 gnorm: 1.17 [19:28:45< 5:02:04] +[titan] 2025-10-05 18:03:07,583 - root - INFO - step: 31790 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 18:03:07,583 - root - INFO - lr: 9.6059e-06 gnorm: 1.14 [19:28:56< 5:01:53] +[titan] 2025-10-05 18:03:18,426 - root - INFO - step: 31795 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 18:03:18,426 - root - INFO - lr: 9.6005e-06 gnorm: 1.12 [19:29:07< 5:01:42] +[titan] 2025-10-05 18:03:27,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:03:29,339 - root - INFO - step: 31800 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 18:03:29,339 - root - INFO - lr: 9.5951e-06 gnorm: 1.13 [19:29:18< 5:01:31] +[titan] 2025-10-05 18:03:40,190 - root - INFO - step: 31805 loss: 1.9797 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:03:40,190 - root - INFO - lr: 9.5897e-06 gnorm: 1.15 [19:29:29< 5:01:20] +[titan] 2025-10-05 18:03:51,090 - root - INFO - step: 31810 loss: 2.0140 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 18:03:51,090 - root - INFO - lr: 9.5843e-06 gnorm: 1.18 [19:29:39< 5:01:08] +[titan] 2025-10-05 18:04:01,948 - root - INFO - step: 31815 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 18:04:01,948 - root - INFO - lr: 9.5789e-06 gnorm: 1.12 [19:29:50< 5:00:57] +[titan] 2025-10-05 18:04:12,805 - root - INFO - step: 31820 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 18:04:12,805 - root - INFO - lr: 9.5735e-06 gnorm: 1.15 [19:30:01< 5:00:46] +[titan] 2025-10-05 18:04:23,715 - root - INFO - step: 31825 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7202 +[titan] 2025-10-05 18:04:23,715 - root - INFO - lr: 9.5681e-06 gnorm: 1.12 [19:30:12< 5:00:35] +[titan] 2025-10-05 18:04:34,585 - root - INFO - step: 31830 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 18:04:34,585 - root - INFO - lr: 9.5628e-06 gnorm: 1.16 [19:30:23< 5:00:24] +[titan] 2025-10-05 18:04:45,454 - root - INFO - step: 31835 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:04:45,454 - root - INFO - lr: 9.5574e-06 gnorm: 1.18 [19:30:34< 5:00:13] +[titan] 2025-10-05 18:04:56,357 - root - INFO - step: 31840 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 18:04:56,357 - root - INFO - lr: 9.5520e-06 gnorm: 1.16 [19:30:45< 5:00:02] +[titan] 2025-10-05 18:05:07,225 - root - INFO - step: 31845 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 18:05:07,226 - root - INFO - lr: 9.5466e-06 gnorm: 1.18 [19:30:56< 4:59:51] +[titan] 2025-10-05 18:05:15,912 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:05:18,103 - root - INFO - step: 31850 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 18:05:18,103 - root - INFO - lr: 9.5412e-06 gnorm: 1.16 [19:31:06< 4:59:40] +[titan] 2025-10-05 18:05:29,031 - root - INFO - step: 31855 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7969 +[titan] 2025-10-05 18:05:29,031 - root - INFO - lr: 9.5359e-06 gnorm: 1.17 [19:31:17< 4:59:29] +[titan] 2025-10-05 18:05:39,898 - root - INFO - step: 31860 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7375 +[titan] 2025-10-05 18:05:39,898 - root - INFO - lr: 9.5305e-06 gnorm: 1.17 [19:31:28< 4:59:18] +[titan] 2025-10-05 18:05:50,764 - root - INFO - step: 31865 loss: 1.9005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6802 +[titan] 2025-10-05 18:05:50,764 - root - INFO - lr: 9.5251e-06 gnorm: 1.14 [19:31:39< 4:59:07] +[titan] 2025-10-05 18:06:01,663 - root - INFO - step: 31870 loss: 1.9427 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:06:01,664 - root - INFO - lr: 9.5197e-06 gnorm: 1.17 [19:31:50< 4:58:56] +[titan] 2025-10-05 18:06:12,506 - root - INFO - step: 31875 loss: 2.0201 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 18:06:12,507 - root - INFO - lr: 9.5144e-06 gnorm: 1.20 [19:32:01< 4:58:45] +[titan] 2025-10-05 18:06:23,358 - root - INFO - step: 31880 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 18:06:23,358 - root - INFO - lr: 9.5090e-06 gnorm: 1.12 [19:32:12< 4:58:33] +[titan] 2025-10-05 18:06:34,258 - root - INFO - step: 31885 loss: 1.8475 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 18:06:34,259 - root - INFO - lr: 9.5037e-06 gnorm: 1.13 [19:32:23< 4:58:22] +[titan] 2025-10-05 18:06:45,106 - root - INFO - step: 31890 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 18:06:45,106 - root - INFO - lr: 9.4983e-06 gnorm: 1.19 [19:32:33< 4:58:11] +[titan] 2025-10-05 18:06:55,965 - root - INFO - step: 31895 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 18:06:55,965 - root - INFO - lr: 9.4930e-06 gnorm: 1.16 [19:32:44< 4:58:00] +[titan] 2025-10-05 18:07:04,626 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:07:06,799 - root - INFO - step: 31900 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:07:06,799 - root - INFO - lr: 9.4876e-06 gnorm: 1.18 [19:32:55< 4:57:49] +[titan] 2025-10-05 18:07:17,698 - root - INFO - step: 31905 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 18:07:17,698 - root - INFO - lr: 9.4823e-06 gnorm: 1.20 [19:33:06< 4:57:38] +[titan] 2025-10-05 18:07:28,596 - root - INFO - step: 31910 loss: 1.9594 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 18:07:28,597 - root - INFO - lr: 9.4769e-06 gnorm: 1.14 [19:33:17< 4:57:27] +[titan] 2025-10-05 18:07:39,465 - root - INFO - step: 31915 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 18:07:39,466 - root - INFO - lr: 9.4716e-06 gnorm: 1.15 [19:33:28< 4:57:16] +[titan] 2025-10-05 18:07:50,320 - root - INFO - step: 31920 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:07:50,320 - root - INFO - lr: 9.4662e-06 gnorm: 1.18 [19:33:39< 4:57:05] +[titan] 2025-10-05 18:08:01,166 - root - INFO - step: 31925 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:08:01,166 - root - INFO - lr: 9.4609e-06 gnorm: 1.19 [19:33:50< 4:56:54] +[titan] 2025-10-05 18:08:12,045 - root - INFO - step: 31930 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 18:08:12,045 - root - INFO - lr: 9.4556e-06 gnorm: 1.11 [19:34:00< 4:56:43] +[titan] 2025-10-05 18:08:22,957 - root - INFO - step: 31935 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7321 +[titan] 2025-10-05 18:08:22,957 - root - INFO - lr: 9.4502e-06 gnorm: 1.14 [19:34:11< 4:56:32] +[titan] 2025-10-05 18:08:33,867 - root - INFO - step: 31940 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7987 +[titan] 2025-10-05 18:08:33,867 - root - INFO - lr: 9.4449e-06 gnorm: 1.15 [19:34:22< 4:56:21] +[titan] 2025-10-05 18:08:44,766 - root - INFO - step: 31945 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 18:08:44,766 - root - INFO - lr: 9.4396e-06 gnorm: 1.16 [19:34:33< 4:56:10] +[titan] 2025-10-05 18:08:53,450 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:08:55,628 - root - INFO - step: 31950 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 18:08:55,628 - root - INFO - lr: 9.4343e-06 gnorm: 1.18 [19:34:44< 4:55:59] +[titan] 2025-10-05 18:09:06,503 - root - INFO - step: 31955 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 18:09:06,503 - root - INFO - lr: 9.4289e-06 gnorm: 1.12 [19:34:55< 4:55:47] +[titan] 2025-10-05 18:09:17,363 - root - INFO - step: 31960 loss: 2.0329 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 18:09:17,363 - root - INFO - lr: 9.4236e-06 gnorm: 1.18 [19:35:06< 4:55:36] +[titan] 2025-10-05 18:09:28,265 - root - INFO - step: 31965 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 18:09:28,265 - root - INFO - lr: 9.4183e-06 gnorm: 1.18 [19:35:17< 4:55:25] +[titan] 2025-10-05 18:09:39,153 - root - INFO - step: 31970 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 18:09:39,153 - root - INFO - lr: 9.4130e-06 gnorm: 1.15 [19:35:28< 4:55:14] +[titan] 2025-10-05 18:09:50,010 - root - INFO - step: 31975 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 18:09:50,010 - root - INFO - lr: 9.4077e-06 gnorm: 1.18 [19:35:38< 4:55:03] +[titan] 2025-10-05 18:10:00,880 - root - INFO - step: 31980 loss: 1.9569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 18:10:00,880 - root - INFO - lr: 9.4024e-06 gnorm: 1.14 [19:35:49< 4:54:52] +[titan] 2025-10-05 18:10:11,742 - root - INFO - step: 31985 loss: 1.9260 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 18:10:11,742 - root - INFO - lr: 9.3971e-06 gnorm: 1.13 [19:36:00< 4:54:41] +[titan] 2025-10-05 18:10:22,613 - root - INFO - step: 31990 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 18:10:22,613 - root - INFO - lr: 9.3918e-06 gnorm: 1.16 [19:36:11< 4:54:30] +[titan] 2025-10-05 18:10:33,523 - root - INFO - step: 31995 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 18:10:33,523 - root - INFO - lr: 9.3865e-06 gnorm: 1.14 [19:36:22< 4:54:19] +[titan] 2025-10-05 18:10:42,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:10:44,426 - root - INFO - step: 32000 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 18:10:44,426 - root - INFO - lr: 9.3812e-06 gnorm: 1.14 [19:36:33< 4:54:08] +[titan] 2025-10-05 18:10:55,314 - root - INFO - step: 32005 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 18:10:55,315 - root - INFO - lr: 9.3759e-06 gnorm: 1.17 [19:36:44< 4:53:57] +[titan] 2025-10-05 18:11:06,182 - root - INFO - step: 32010 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:11:06,182 - root - INFO - lr: 9.3706e-06 gnorm: 1.16 [19:36:55< 4:53:46] +[titan] 2025-10-05 18:11:17,044 - root - INFO - step: 32015 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 18:11:17,044 - root - INFO - lr: 9.3653e-06 gnorm: 1.16 [19:37:05< 4:53:35] +[titan] 2025-10-05 18:11:27,919 - root - INFO - step: 32020 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 18:11:27,919 - root - INFO - lr: 9.3601e-06 gnorm: 1.21 [19:37:16< 4:53:24] +[titan] 2025-10-05 18:11:38,842 - root - INFO - step: 32025 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:11:38,842 - root - INFO - lr: 9.3548e-06 gnorm: 1.19 [19:37:27< 4:53:12] +[titan] 2025-10-05 18:11:49,758 - root - INFO - step: 32030 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7644 +[titan] 2025-10-05 18:11:49,758 - root - INFO - lr: 9.3495e-06 gnorm: 1.19 [19:37:38< 4:53:01] +[titan] 2025-10-05 18:12:00,638 - root - INFO - step: 32035 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:12:00,638 - root - INFO - lr: 9.3442e-06 gnorm: 1.14 [19:37:49< 4:52:50] +[titan] 2025-10-05 18:12:11,546 - root - INFO - step: 32040 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:12:11,547 - root - INFO - lr: 9.3390e-06 gnorm: 1.15 [19:38:00< 4:52:39] +[titan] 2025-10-05 18:12:22,450 - root - INFO - step: 32045 loss: 1.8868 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 18:12:22,450 - root - INFO - lr: 9.3337e-06 gnorm: 1.14 [19:38:11< 4:52:28] +[titan] 2025-10-05 18:12:31,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:12:33,403 - root - INFO - step: 32050 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 18:12:33,403 - root - INFO - lr: 9.3284e-06 gnorm: 1.15 [19:38:22< 4:52:17] +[titan] 2025-10-05 18:12:44,298 - root - INFO - step: 32055 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 18:12:44,298 - root - INFO - lr: 9.3232e-06 gnorm: 1.15 [19:38:33< 4:52:06] +[titan] 2025-10-05 18:12:55,164 - root - INFO - step: 32060 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:12:55,164 - root - INFO - lr: 9.3179e-06 gnorm: 1.16 [19:38:44< 4:51:55] +[titan] 2025-10-05 18:13:06,043 - root - INFO - step: 32065 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7655 +[titan] 2025-10-05 18:13:06,043 - root - INFO - lr: 9.3127e-06 gnorm: 1.15 [19:38:54< 4:51:44] +[titan] 2025-10-05 18:13:16,898 - root - INFO - step: 32070 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 18:13:16,898 - root - INFO - lr: 9.3074e-06 gnorm: 1.18 [19:39:05< 4:51:33] +[titan] 2025-10-05 18:13:27,792 - root - INFO - step: 32075 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 18:13:27,793 - root - INFO - lr: 9.3022e-06 gnorm: 1.19 [19:39:16< 4:51:22] +[titan] 2025-10-05 18:13:38,698 - root - INFO - step: 32080 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 18:13:38,698 - root - INFO - lr: 9.2969e-06 gnorm: 1.19 [19:39:27< 4:51:11] +[titan] 2025-10-05 18:13:49,556 - root - INFO - step: 32085 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 18:13:49,557 - root - INFO - lr: 9.2917e-06 gnorm: 1.17 [19:39:38< 4:51:00] +[titan] 2025-10-05 18:14:00,441 - root - INFO - step: 32090 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 18:14:00,441 - root - INFO - lr: 9.2864e-06 gnorm: 1.16 [19:39:49< 4:50:49] +[titan] 2025-10-05 18:14:11,340 - root - INFO - step: 32095 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 18:14:11,340 - root - INFO - lr: 9.2812e-06 gnorm: 1.14 [19:40:00< 4:50:38] +[titan] 2025-10-05 18:14:20,021 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:14:22,203 - root - INFO - step: 32100 loss: 1.9882 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 18:14:22,203 - root - INFO - lr: 9.2759e-06 gnorm: 1.14 [19:40:11< 4:50:27] +[titan] 2025-10-05 18:14:33,146 - root - INFO - step: 32105 loss: 2.0008 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:14:33,146 - root - INFO - lr: 9.2707e-06 gnorm: 1.18 [19:40:22< 4:50:15] +[titan] 2025-10-05 18:14:44,011 - root - INFO - step: 32110 loss: 1.9522 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:14:44,011 - root - INFO - lr: 9.2655e-06 gnorm: 1.14 [19:40:32< 4:50:04] +[titan] 2025-10-05 18:14:54,863 - root - INFO - step: 32115 loss: 1.9586 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:14:54,864 - root - INFO - lr: 9.2603e-06 gnorm: 1.15 [19:40:43< 4:49:53] +[titan] 2025-10-05 18:15:05,716 - root - INFO - step: 32120 loss: 1.9321 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 18:15:05,716 - root - INFO - lr: 9.2550e-06 gnorm: 1.13 [19:40:54< 4:49:42] +[titan] 2025-10-05 18:15:16,556 - root - INFO - step: 32125 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:15:16,556 - root - INFO - lr: 9.2498e-06 gnorm: 1.15 [19:41:05< 4:49:31] +[titan] 2025-10-05 18:15:27,432 - root - INFO - step: 32130 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 18:15:27,433 - root - INFO - lr: 9.2446e-06 gnorm: 1.20 [19:41:16< 4:49:20] +[titan] 2025-10-05 18:15:38,339 - root - INFO - step: 32135 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7023 +[titan] 2025-10-05 18:15:38,339 - root - INFO - lr: 9.2394e-06 gnorm: 1.17 [19:41:27< 4:49:09] +[titan] 2025-10-05 18:15:49,183 - root - INFO - step: 32140 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 18:15:49,184 - root - INFO - lr: 9.2342e-06 gnorm: 1.19 [19:41:38< 4:48:58] +[titan] 2025-10-05 18:16:00,016 - root - INFO - step: 32145 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:16:00,017 - root - INFO - lr: 9.2290e-06 gnorm: 1.19 [19:41:48< 4:48:47] +[titan] 2025-10-05 18:16:08,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:16:10,859 - root - INFO - step: 32150 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7520 +[titan] 2025-10-05 18:16:10,859 - root - INFO - lr: 9.2237e-06 gnorm: 1.17 [19:41:59< 4:48:36] +[titan] 2025-10-05 18:16:21,712 - root - INFO - step: 32155 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 18:16:21,712 - root - INFO - lr: 9.2185e-06 gnorm: 1.14 [19:42:10< 4:48:25] +[titan] 2025-10-05 18:16:32,617 - root - INFO - step: 32160 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 18:16:32,618 - root - INFO - lr: 9.2133e-06 gnorm: 1.19 [19:42:21< 4:48:14] +[titan] 2025-10-05 18:16:43,525 - root - INFO - step: 32165 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 18:16:43,525 - root - INFO - lr: 9.2081e-06 gnorm: 1.20 [19:42:32< 4:48:03] +[titan] 2025-10-05 18:16:54,419 - root - INFO - step: 32170 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 18:16:54,419 - root - INFO - lr: 9.2029e-06 gnorm: 1.15 [19:42:43< 4:47:52] +[titan] 2025-10-05 18:17:05,289 - root - INFO - step: 32175 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 18:17:05,289 - root - INFO - lr: 9.1978e-06 gnorm: 1.15 [19:42:54< 4:47:40] +[titan] 2025-10-05 18:17:16,163 - root - INFO - step: 32180 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 18:17:16,164 - root - INFO - lr: 9.1926e-06 gnorm: 1.17 [19:43:05< 4:47:29] +[titan] 2025-10-05 18:17:27,035 - root - INFO - step: 32185 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7293 +[titan] 2025-10-05 18:17:27,035 - root - INFO - lr: 9.1874e-06 gnorm: 1.11 [19:43:15< 4:47:18] +[titan] 2025-10-05 18:17:37,986 - root - INFO - step: 32190 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:17:37,987 - root - INFO - lr: 9.1822e-06 gnorm: 1.20 [19:43:26< 4:47:07] +[titan] 2025-10-05 18:17:48,863 - root - INFO - step: 32195 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 18:17:48,863 - root - INFO - lr: 9.1770e-06 gnorm: 1.14 [19:43:37< 4:46:56] +[titan] 2025-10-05 18:17:57,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:17:59,752 - root - INFO - step: 32200 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 18:17:59,752 - root - INFO - lr: 9.1718e-06 gnorm: 1.14 [19:43:48< 4:46:45] +[titan] 2025-10-05 18:18:10,633 - root - INFO - step: 32205 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 18:18:10,633 - root - INFO - lr: 9.1667e-06 gnorm: 1.16 [19:43:59< 4:46:34] +[titan] 2025-10-05 18:18:21,504 - root - INFO - step: 32210 loss: 1.9549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 18:18:21,505 - root - INFO - lr: 9.1615e-06 gnorm: 1.14 [19:44:10< 4:46:23] +[titan] 2025-10-05 18:18:32,363 - root - INFO - step: 32215 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 18:18:32,363 - root - INFO - lr: 9.1563e-06 gnorm: 1.15 [19:44:21< 4:46:12] +[titan] 2025-10-05 18:18:43,287 - root - INFO - step: 32220 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 18:18:43,287 - root - INFO - lr: 9.1512e-06 gnorm: 1.16 [19:44:32< 4:46:01] +[titan] 2025-10-05 18:18:54,183 - root - INFO - step: 32225 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 18:18:54,183 - root - INFO - lr: 9.1460e-06 gnorm: 1.14 [19:44:43< 4:45:50] +[titan] 2025-10-05 18:19:05,065 - root - INFO - step: 32230 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:19:05,066 - root - INFO - lr: 9.1408e-06 gnorm: 1.13 [19:44:53< 4:45:39] +[titan] 2025-10-05 18:19:15,931 - root - INFO - step: 32235 loss: 1.9942 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:19:15,932 - root - INFO - lr: 9.1357e-06 gnorm: 1.19 [19:45:04< 4:45:28] +[titan] 2025-10-05 18:19:26,783 - root - INFO - step: 32240 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 18:19:26,783 - root - INFO - lr: 9.1305e-06 gnorm: 1.16 [19:45:15< 4:45:17] +[titan] 2025-10-05 18:19:37,703 - root - INFO - step: 32245 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:19:37,703 - root - INFO - lr: 9.1254e-06 gnorm: 1.16 [19:45:26< 4:45:06] +[titan] 2025-10-05 18:19:46,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:19:48,556 - root - INFO - step: 32250 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 18:19:48,556 - root - INFO - lr: 9.1202e-06 gnorm: 1.15 [19:45:37< 4:44:55] +[titan] 2025-10-05 18:19:59,512 - root - INFO - step: 32255 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:19:59,512 - root - INFO - lr: 9.1151e-06 gnorm: 1.19 [19:45:48< 4:44:43] +[titan] 2025-10-05 18:20:01,872 - root - INFO - Dumping profiler traces at step 32256 +[titan] 2025-10-05 18:20:01,911 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:20:10,612 - root - INFO - step: 32260 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.57 mfu: 41.41% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 18:20:10,612 - root - INFO - lr: 9.1099e-06 gnorm: 1.15 [19:45:59< 4:44:32] +[titan] 2025-10-05 18:20:21,505 - root - INFO - step: 32265 loss: 1.9661 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 18:20:21,505 - root - INFO - lr: 9.1048e-06 gnorm: 1.18 [19:46:10< 4:44:21] +[titan] 2025-10-05 18:20:32,390 - root - INFO - step: 32270 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 18:20:32,390 - root - INFO - lr: 9.0996e-06 gnorm: 1.19 [19:46:21< 4:44:10] +[titan] 2025-10-05 18:20:43,343 - root - INFO - step: 32275 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 18:20:43,343 - root - INFO - lr: 9.0945e-06 gnorm: 1.17 [19:46:32< 4:43:59] +[titan] 2025-10-05 18:20:54,195 - root - INFO - step: 32280 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 18:20:54,195 - root - INFO - lr: 9.0894e-06 gnorm: 1.13 [19:46:43< 4:43:48] +[titan] 2025-10-05 18:21:05,056 - root - INFO - step: 32285 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:21:05,056 - root - INFO - lr: 9.0842e-06 gnorm: 1.14 [19:46:53< 4:43:37] +[titan] 2025-10-05 18:21:15,905 - root - INFO - step: 32290 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 18:21:15,905 - root - INFO - lr: 9.0791e-06 gnorm: 1.14 [19:47:04< 4:43:26] +[titan] 2025-10-05 18:21:26,822 - root - INFO - step: 32295 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:21:26,822 - root - INFO - lr: 9.0740e-06 gnorm: 1.15 [19:47:15< 4:43:15] +[titan] 2025-10-05 18:21:35,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:21:37,747 - root - INFO - step: 32300 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:21:37,747 - root - INFO - lr: 9.0689e-06 gnorm: 1.15 [19:47:26< 4:43:04] +[titan] 2025-10-05 18:21:48,651 - root - INFO - step: 32305 loss: 1.9420 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:21:48,651 - root - INFO - lr: 9.0638e-06 gnorm: 1.13 [19:47:37< 4:42:53] +[titan] 2025-10-05 18:21:59,526 - root - INFO - step: 32310 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:21:59,526 - root - INFO - lr: 9.0586e-06 gnorm: 1.20 [19:47:48< 4:42:42] +[titan] 2025-10-05 18:22:10,410 - root - INFO - step: 32315 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 18:22:10,410 - root - INFO - lr: 9.0535e-06 gnorm: 1.16 [19:47:59< 4:42:31] +[titan] 2025-10-05 18:22:21,310 - root - INFO - step: 32320 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6910 +[titan] 2025-10-05 18:22:21,310 - root - INFO - lr: 9.0484e-06 gnorm: 1.16 [19:48:10< 4:42:20] +[titan] 2025-10-05 18:22:32,228 - root - INFO - step: 32325 loss: 1.9625 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 18:22:32,229 - root - INFO - lr: 9.0433e-06 gnorm: 1.17 [19:48:21< 4:42:09] +[titan] 2025-10-05 18:22:43,163 - root - INFO - step: 32330 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 18:22:43,164 - root - INFO - lr: 9.0382e-06 gnorm: 1.16 [19:48:32< 4:41:58] +[titan] 2025-10-05 18:22:54,059 - root - INFO - step: 32335 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:22:54,059 - root - INFO - lr: 9.0331e-06 gnorm: 1.18 [19:48:42< 4:41:47] +[titan] 2025-10-05 18:23:04,937 - root - INFO - step: 32340 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 18:23:04,938 - root - INFO - lr: 9.0280e-06 gnorm: 1.22 [19:48:53< 4:41:36] +[titan] 2025-10-05 18:23:15,809 - root - INFO - step: 32345 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:23:15,809 - root - INFO - lr: 9.0229e-06 gnorm: 1.18 [19:49:04< 4:41:24] +[titan] 2025-10-05 18:23:24,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:23:26,693 - root - INFO - step: 32350 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 18:23:26,693 - root - INFO - lr: 9.0178e-06 gnorm: 1.24 [19:49:15< 4:41:13] +[titan] 2025-10-05 18:23:37,572 - root - INFO - step: 32355 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:23:37,573 - root - INFO - lr: 9.0127e-06 gnorm: 1.15 [19:49:26< 4:41:02] +[titan] 2025-10-05 18:23:48,530 - root - INFO - step: 32360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 18:23:48,530 - root - INFO - lr: 9.0077e-06 gnorm: 1.15 [19:49:37< 4:40:51] +[titan] 2025-10-05 18:23:59,408 - root - INFO - step: 32365 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 18:23:59,408 - root - INFO - lr: 9.0026e-06 gnorm: 1.20 [19:49:48< 4:40:40] +[titan] 2025-10-05 18:24:10,292 - root - INFO - step: 32370 loss: 1.9796 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 18:24:10,292 - root - INFO - lr: 8.9975e-06 gnorm: 1.14 [19:49:59< 4:40:29] +[titan] 2025-10-05 18:24:21,173 - root - INFO - step: 32375 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:24:21,173 - root - INFO - lr: 8.9924e-06 gnorm: 1.18 [19:50:10< 4:40:18] +[titan] 2025-10-05 18:24:32,033 - root - INFO - step: 32380 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7551 +[titan] 2025-10-05 18:24:32,033 - root - INFO - lr: 8.9873e-06 gnorm: 1.17 [19:50:20< 4:40:07] +[titan] 2025-10-05 18:24:42,932 - root - INFO - step: 32385 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 18:24:42,932 - root - INFO - lr: 8.9823e-06 gnorm: 1.17 [19:50:31< 4:39:56] +[titan] 2025-10-05 18:24:53,801 - root - INFO - step: 32390 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 18:24:53,801 - root - INFO - lr: 8.9772e-06 gnorm: 1.16 [19:50:42< 4:39:45] +[titan] 2025-10-05 18:25:04,696 - root - INFO - step: 32395 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 18:25:04,696 - root - INFO - lr: 8.9721e-06 gnorm: 1.18 [19:50:53< 4:39:34] +[titan] 2025-10-05 18:25:13,384 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:25:15,566 - root - INFO - step: 32400 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:25:15,566 - root - INFO - lr: 8.9671e-06 gnorm: 1.16 [19:51:04< 4:39:23] +[titan] 2025-10-05 18:25:26,448 - root - INFO - step: 32405 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 18:25:26,449 - root - INFO - lr: 8.9620e-06 gnorm: 1.13 [19:51:15< 4:39:12] +[titan] 2025-10-05 18:25:37,323 - root - INFO - step: 32410 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 18:25:37,323 - root - INFO - lr: 8.9570e-06 gnorm: 1.15 [19:51:26< 4:39:01] +[titan] 2025-10-05 18:25:48,505 - root - INFO - step: 32415 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 29,304 tflops: 406.55 mfu: 41.11% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 18:25:48,505 - root - INFO - lr: 8.9519e-06 gnorm: 1.17 [19:51:37< 4:38:50] +[titan] 2025-10-05 18:25:59,396 - root - INFO - step: 32420 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 18:25:59,396 - root - INFO - lr: 8.9469e-06 gnorm: 1.17 [19:51:48< 4:38:39] +[titan] 2025-10-05 18:26:10,310 - root - INFO - step: 32425 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 18:26:10,310 - root - INFO - lr: 8.9418e-06 gnorm: 1.15 [19:51:59< 4:38:28] +[titan] 2025-10-05 18:26:21,195 - root - INFO - step: 32430 loss: 1.9222 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 18:26:21,195 - root - INFO - lr: 8.9368e-06 gnorm: 1.14 [19:52:10< 4:38:16] +[titan] 2025-10-05 18:26:32,089 - root - INFO - step: 32435 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:26:32,089 - root - INFO - lr: 8.9317e-06 gnorm: 1.17 [19:52:20< 4:38:05] +[titan] 2025-10-05 18:26:42,997 - root - INFO - step: 32440 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:26:42,997 - root - INFO - lr: 8.9267e-06 gnorm: 1.18 [19:52:31< 4:37:54] +[titan] 2025-10-05 18:26:53,888 - root - INFO - step: 32445 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 18:26:53,888 - root - INFO - lr: 8.9217e-06 gnorm: 1.18 [19:52:42< 4:37:43] +[titan] 2025-10-05 18:27:02,624 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:27:04,807 - root - INFO - step: 32450 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 18:27:04,807 - root - INFO - lr: 8.9166e-06 gnorm: 1.18 [19:52:53< 4:37:32] +[titan] 2025-10-05 18:27:15,706 - root - INFO - step: 32455 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7589 +[titan] 2025-10-05 18:27:15,706 - root - INFO - lr: 8.9116e-06 gnorm: 1.15 [19:53:04< 4:37:21] +[titan] 2025-10-05 18:27:26,608 - root - INFO - step: 32460 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 18:27:26,608 - root - INFO - lr: 8.9066e-06 gnorm: 1.14 [19:53:15< 4:37:10] +[titan] 2025-10-05 18:27:37,484 - root - INFO - step: 32465 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 18:27:37,485 - root - INFO - lr: 8.9015e-06 gnorm: 1.11 [19:53:26< 4:36:59] +[titan] 2025-10-05 18:27:48,368 - root - INFO - step: 32470 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:27:48,368 - root - INFO - lr: 8.8965e-06 gnorm: 1.13 [19:53:37< 4:36:48] +[titan] 2025-10-05 18:27:59,231 - root - INFO - step: 32475 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 18:27:59,231 - root - INFO - lr: 8.8915e-06 gnorm: 1.15 [19:53:48< 4:36:37] +[titan] 2025-10-05 18:28:10,108 - root - INFO - step: 32480 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7775 +[titan] 2025-10-05 18:28:10,108 - root - INFO - lr: 8.8865e-06 gnorm: 1.18 [19:53:58< 4:36:26] +[titan] 2025-10-05 18:28:20,988 - root - INFO - step: 32485 loss: 1.9823 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 18:28:20,988 - root - INFO - lr: 8.8815e-06 gnorm: 1.15 [19:54:09< 4:36:15] +[titan] 2025-10-05 18:28:31,851 - root - INFO - step: 32490 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 18:28:31,851 - root - INFO - lr: 8.8765e-06 gnorm: 1.16 [19:54:20< 4:36:04] +[titan] 2025-10-05 18:28:42,715 - root - INFO - step: 32495 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:28:42,716 - root - INFO - lr: 8.8715e-06 gnorm: 1.16 [19:54:31< 4:35:53] +[titan] 2025-10-05 18:28:51,431 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:28:53,617 - root - INFO - step: 32500 loss: 1.9959 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 18:28:53,617 - root - INFO - lr: 8.8665e-06 gnorm: 1.21 [19:54:42< 4:35:42] +[titan] 2025-10-05 18:29:04,488 - root - INFO - step: 32505 loss: 1.9052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6852 +[titan] 2025-10-05 18:29:04,489 - root - INFO - lr: 8.8615e-06 gnorm: 1.15 [19:54:53< 4:35:31] +[titan] 2025-10-05 18:29:15,355 - root - INFO - step: 32510 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 18:29:15,355 - root - INFO - lr: 8.8565e-06 gnorm: 1.20 [19:55:04< 4:35:19] +[titan] 2025-10-05 18:29:26,197 - root - INFO - step: 32515 loss: 1.9015 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 18:29:26,198 - root - INFO - lr: 8.8515e-06 gnorm: 1.16 [19:55:15< 4:35:08] +[titan] 2025-10-05 18:29:37,043 - root - INFO - step: 32520 loss: 1.9322 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:29:37,043 - root - INFO - lr: 8.8465e-06 gnorm: 1.14 [19:55:25< 4:34:57] +[titan] 2025-10-05 18:29:47,915 - root - INFO - step: 32525 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:29:47,915 - root - INFO - lr: 8.8415e-06 gnorm: 1.17 [19:55:36< 4:34:46] +[titan] 2025-10-05 18:29:58,786 - root - INFO - step: 32530 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:29:58,786 - root - INFO - lr: 8.8365e-06 gnorm: 1.14 [19:55:47< 4:34:35] +[titan] 2025-10-05 18:30:09,635 - root - INFO - step: 32535 loss: 1.9367 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 18:30:09,635 - root - INFO - lr: 8.8315e-06 gnorm: 1.18 [19:55:58< 4:34:24] +[titan] 2025-10-05 18:30:20,517 - root - INFO - step: 32540 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:30:20,517 - root - INFO - lr: 8.8265e-06 gnorm: 1.22 [19:56:09< 4:34:13] +[titan] 2025-10-05 18:30:31,388 - root - INFO - step: 32545 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 18:30:31,388 - root - INFO - lr: 8.8216e-06 gnorm: 1.18 [19:56:20< 4:34:02] +[titan] 2025-10-05 18:30:40,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:30:42,279 - root - INFO - step: 32550 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 18:30:42,279 - root - INFO - lr: 8.8166e-06 gnorm: 1.22 [19:56:31< 4:33:51] +[titan] 2025-10-05 18:30:53,167 - root - INFO - step: 32555 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7816 +[titan] 2025-10-05 18:30:53,168 - root - INFO - lr: 8.8116e-06 gnorm: 1.16 [19:56:41< 4:33:40] +[titan] 2025-10-05 18:31:04,037 - root - INFO - step: 32560 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7457 +[titan] 2025-10-05 18:31:04,037 - root - INFO - lr: 8.8066e-06 gnorm: 1.15 [19:56:52< 4:33:29] +[titan] 2025-10-05 18:31:14,905 - root - INFO - step: 32565 loss: 2.0104 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 18:31:14,905 - root - INFO - lr: 8.8017e-06 gnorm: 1.18 [19:57:03< 4:33:18] +[titan] 2025-10-05 18:31:25,750 - root - INFO - step: 32570 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:31:25,750 - root - INFO - lr: 8.7967e-06 gnorm: 1.18 [19:57:14< 4:33:07] +[titan] 2025-10-05 18:31:36,615 - root - INFO - step: 32575 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 18:31:36,615 - root - INFO - lr: 8.7917e-06 gnorm: 1.21 [19:57:25< 4:32:56] +[titan] 2025-10-05 18:31:47,505 - root - INFO - step: 32580 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 18:31:47,505 - root - INFO - lr: 8.7868e-06 gnorm: 1.18 [19:57:36< 4:32:45] +[titan] 2025-10-05 18:31:58,405 - root - INFO - step: 32585 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 18:31:58,406 - root - INFO - lr: 8.7818e-06 gnorm: 1.15 [19:57:47< 4:32:34] +[titan] 2025-10-05 18:32:09,289 - root - INFO - step: 32590 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 18:32:09,289 - root - INFO - lr: 8.7769e-06 gnorm: 1.17 [19:57:58< 4:32:22] +[titan] 2025-10-05 18:32:20,143 - root - INFO - step: 32595 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 18:32:20,143 - root - INFO - lr: 8.7719e-06 gnorm: 1.15 [19:58:08< 4:32:11] +[titan] 2025-10-05 18:32:28,834 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:32:31,015 - root - INFO - step: 32600 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:32:31,015 - root - INFO - lr: 8.7670e-06 gnorm: 1.15 [19:58:19< 4:32:00] +[titan] 2025-10-05 18:32:41,866 - root - INFO - step: 32605 loss: 1.9357 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 18:32:41,866 - root - INFO - lr: 8.7621e-06 gnorm: 1.17 [19:58:30< 4:31:49] +[titan] 2025-10-05 18:32:52,744 - root - INFO - step: 32610 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 18:32:52,744 - root - INFO - lr: 8.7571e-06 gnorm: 1.17 [19:58:41< 4:31:38] +[titan] 2025-10-05 18:33:03,626 - root - INFO - step: 32615 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:33:03,626 - root - INFO - lr: 8.7522e-06 gnorm: 1.16 [19:58:52< 4:31:27] +[titan] 2025-10-05 18:33:14,510 - root - INFO - step: 32620 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 18:33:14,510 - root - INFO - lr: 8.7472e-06 gnorm: 1.19 [19:59:03< 4:31:16] +[titan] 2025-10-05 18:33:25,381 - root - INFO - step: 32625 loss: 1.9774 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 18:33:25,381 - root - INFO - lr: 8.7423e-06 gnorm: 1.16 [19:59:14< 4:31:05] +[titan] 2025-10-05 18:33:36,243 - root - INFO - step: 32630 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 18:33:36,243 - root - INFO - lr: 8.7374e-06 gnorm: 1.17 [19:59:25< 4:30:54] +[titan] 2025-10-05 18:33:47,116 - root - INFO - step: 32635 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:33:47,116 - root - INFO - lr: 8.7325e-06 gnorm: 1.15 [19:59:35< 4:30:43] +[titan] 2025-10-05 18:33:58,057 - root - INFO - step: 32640 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:33:58,058 - root - INFO - lr: 8.7275e-06 gnorm: 1.20 [19:59:46< 4:30:32] +[titan] 2025-10-05 18:34:08,946 - root - INFO - step: 32645 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:34:08,946 - root - INFO - lr: 8.7226e-06 gnorm: 1.17 [19:59:57< 4:30:21] +[titan] 2025-10-05 18:34:17,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:34:19,814 - root - INFO - step: 32650 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7704 +[titan] 2025-10-05 18:34:19,814 - root - INFO - lr: 8.7177e-06 gnorm: 1.14 [20:00:08< 4:30:10] +[titan] 2025-10-05 18:34:30,684 - root - INFO - step: 32655 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 18:34:30,684 - root - INFO - lr: 8.7128e-06 gnorm: 1.21 [20:00:19< 4:29:59] +[titan] 2025-10-05 18:34:41,540 - root - INFO - step: 32660 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 18:34:41,540 - root - INFO - lr: 8.7079e-06 gnorm: 1.18 [20:00:30< 4:29:48] +[titan] 2025-10-05 18:34:52,419 - root - INFO - step: 32665 loss: 1.9116 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 18:34:52,419 - root - INFO - lr: 8.7030e-06 gnorm: 1.19 [20:00:41< 4:29:37] +[titan] 2025-10-05 18:35:03,284 - root - INFO - step: 32670 loss: 1.9841 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 18:35:03,284 - root - INFO - lr: 8.6981e-06 gnorm: 1.23 [20:00:52< 4:29:25] +[titan] 2025-10-05 18:35:14,164 - root - INFO - step: 32675 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:35:14,164 - root - INFO - lr: 8.6932e-06 gnorm: 1.14 [20:01:02< 4:29:14] +[titan] 2025-10-05 18:35:25,030 - root - INFO - step: 32680 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 18:35:25,030 - root - INFO - lr: 8.6883e-06 gnorm: 1.17 [20:01:13< 4:29:03] +[titan] 2025-10-05 18:35:35,901 - root - INFO - step: 32685 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 18:35:35,902 - root - INFO - lr: 8.6834e-06 gnorm: 1.19 [20:01:24< 4:28:52] +[titan] 2025-10-05 18:35:46,769 - root - INFO - step: 32690 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 18:35:46,769 - root - INFO - lr: 8.6785e-06 gnorm: 1.17 [20:01:35< 4:28:41] +[titan] 2025-10-05 18:35:57,644 - root - INFO - step: 32695 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 18:35:57,644 - root - INFO - lr: 8.6736e-06 gnorm: 1.18 [20:01:46< 4:28:30] +[titan] 2025-10-05 18:36:06,324 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:36:08,504 - root - INFO - step: 32700 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 18:36:08,504 - root - INFO - lr: 8.6687e-06 gnorm: 1.16 [20:01:57< 4:28:19] +[titan] 2025-10-05 18:36:19,368 - root - INFO - step: 32705 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6516 +[titan] 2025-10-05 18:36:19,368 - root - INFO - lr: 8.6638e-06 gnorm: 1.12 [20:02:08< 4:28:08] +[titan] 2025-10-05 18:36:30,228 - root - INFO - step: 32710 loss: 1.9004 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:36:30,228 - root - INFO - lr: 8.6590e-06 gnorm: 1.17 [20:02:19< 4:27:57] +[titan] 2025-10-05 18:36:41,098 - root - INFO - step: 32715 loss: 1.9595 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7326 +[titan] 2025-10-05 18:36:41,098 - root - INFO - lr: 8.6541e-06 gnorm: 1.21 [20:02:29< 4:27:46] +[titan] 2025-10-05 18:36:51,987 - root - INFO - step: 32720 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 18:36:51,987 - root - INFO - lr: 8.6492e-06 gnorm: 1.15 [20:02:40< 4:27:35] +[titan] 2025-10-05 18:37:02,853 - root - INFO - step: 32725 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7097 +[titan] 2025-10-05 18:37:02,853 - root - INFO - lr: 8.6443e-06 gnorm: 1.18 [20:02:51< 4:27:24] +[titan] 2025-10-05 18:37:13,725 - root - INFO - step: 32730 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 18:37:13,725 - root - INFO - lr: 8.6395e-06 gnorm: 1.18 [20:03:02< 4:27:13] +[titan] 2025-10-05 18:37:24,602 - root - INFO - step: 32735 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 18:37:24,603 - root - INFO - lr: 8.6346e-06 gnorm: 1.18 [20:03:13< 4:27:02] +[titan] 2025-10-05 18:37:35,486 - root - INFO - step: 32740 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 18:37:35,486 - root - INFO - lr: 8.6297e-06 gnorm: 1.15 [20:03:24< 4:26:51] +[titan] 2025-10-05 18:37:46,374 - root - INFO - step: 32745 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 18:37:46,374 - root - INFO - lr: 8.6249e-06 gnorm: 1.18 [20:03:35< 4:26:40] +[titan] 2025-10-05 18:37:55,082 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:37:57,268 - root - INFO - step: 32750 loss: 1.9951 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 18:37:57,269 - root - INFO - lr: 8.6200e-06 gnorm: 1.16 [20:03:46< 4:26:28] +[titan] 2025-10-05 18:38:08,165 - root - INFO - step: 32755 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 18:38:08,165 - root - INFO - lr: 8.6152e-06 gnorm: 1.15 [20:03:56< 4:26:17] +[titan] 2025-10-05 18:38:19,048 - root - INFO - step: 32760 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 18:38:19,048 - root - INFO - lr: 8.6103e-06 gnorm: 1.17 [20:04:07< 4:26:06] +[titan] 2025-10-05 18:38:30,021 - root - INFO - step: 32765 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 29,864 tflops: 414.31 mfu: 41.89% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 18:38:30,021 - root - INFO - lr: 8.6055e-06 gnorm: 1.19 [20:04:18< 4:25:55] +[titan] 2025-10-05 18:38:36,740 - root - INFO - Dumping profiler traces at step 32768 +[titan] 2025-10-05 18:38:36,779 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:38:41,140 - root - INFO - step: 32770 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 18:38:41,140 - root - INFO - lr: 8.6006e-06 gnorm: 1.18 [20:04:29< 4:25:44] +[titan] 2025-10-05 18:38:52,028 - root - INFO - step: 32775 loss: 1.8866 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 18:38:52,028 - root - INFO - lr: 8.5958e-06 gnorm: 1.16 [20:04:40< 4:25:33] +[titan] 2025-10-05 18:39:02,921 - root - INFO - step: 32780 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 18:39:02,921 - root - INFO - lr: 8.5909e-06 gnorm: 1.17 [20:04:51< 4:25:22] +[titan] 2025-10-05 18:39:13,797 - root - INFO - step: 32785 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 18:39:13,797 - root - INFO - lr: 8.5861e-06 gnorm: 1.19 [20:05:02< 4:25:11] +[titan] 2025-10-05 18:39:24,687 - root - INFO - step: 32790 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 18:39:24,688 - root - INFO - lr: 8.5813e-06 gnorm: 1.18 [20:05:13< 4:25:00] +[titan] 2025-10-05 18:39:35,548 - root - INFO - step: 32795 loss: 1.9151 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 18:39:35,548 - root - INFO - lr: 8.5764e-06 gnorm: 1.17 [20:05:24< 4:24:49] +[titan] 2025-10-05 18:39:44,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:39:46,421 - root - INFO - step: 32800 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:39:46,421 - root - INFO - lr: 8.5716e-06 gnorm: 1.14 [20:05:35< 4:24:38] +[titan] 2025-10-05 18:39:57,301 - root - INFO - step: 32805 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 18:39:57,301 - root - INFO - lr: 8.5668e-06 gnorm: 1.20 [20:05:46< 4:24:27] +[titan] 2025-10-05 18:40:08,174 - root - INFO - step: 32810 loss: 1.8700 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6536 +[titan] 2025-10-05 18:40:08,175 - root - INFO - lr: 8.5620e-06 gnorm: 1.15 [20:05:56< 4:24:16] +[titan] 2025-10-05 18:40:19,054 - root - INFO - step: 32815 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 18:40:19,054 - root - INFO - lr: 8.5572e-06 gnorm: 1.17 [20:06:07< 4:24:05] +[titan] 2025-10-05 18:40:29,922 - root - INFO - step: 32820 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:40:29,922 - root - INFO - lr: 8.5523e-06 gnorm: 1.20 [20:06:18< 4:23:54] +[titan] 2025-10-05 18:40:40,814 - root - INFO - step: 32825 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7226 +[titan] 2025-10-05 18:40:40,814 - root - INFO - lr: 8.5475e-06 gnorm: 1.14 [20:06:29< 4:23:43] +[titan] 2025-10-05 18:40:51,678 - root - INFO - step: 32830 loss: 1.9398 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:40:51,678 - root - INFO - lr: 8.5427e-06 gnorm: 1.23 [20:06:40< 4:23:32] +[titan] 2025-10-05 18:41:02,570 - root - INFO - step: 32835 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:41:02,570 - root - INFO - lr: 8.5379e-06 gnorm: 1.15 [20:06:51< 4:23:21] +[titan] 2025-10-05 18:41:13,446 - root - INFO - step: 32840 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 18:41:13,447 - root - INFO - lr: 8.5331e-06 gnorm: 1.21 [20:07:02< 4:23:09] +[titan] 2025-10-05 18:41:24,359 - root - INFO - step: 32845 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:41:24,359 - root - INFO - lr: 8.5283e-06 gnorm: 1.16 [20:07:13< 4:22:58] +[titan] 2025-10-05 18:41:33,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:41:35,240 - root - INFO - step: 32850 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:41:35,240 - root - INFO - lr: 8.5235e-06 gnorm: 1.14 [20:07:24< 4:22:47] +[titan] 2025-10-05 18:41:46,124 - root - INFO - step: 32855 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 18:41:46,125 - root - INFO - lr: 8.5187e-06 gnorm: 1.17 [20:07:34< 4:22:36] +[titan] 2025-10-05 18:41:56,993 - root - INFO - step: 32860 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:41:56,993 - root - INFO - lr: 8.5139e-06 gnorm: 1.22 [20:07:45< 4:22:25] +[titan] 2025-10-05 18:42:07,859 - root - INFO - step: 32865 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 18:42:07,859 - root - INFO - lr: 8.5091e-06 gnorm: 1.20 [20:07:56< 4:22:14] +[titan] 2025-10-05 18:42:18,752 - root - INFO - step: 32870 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 18:42:18,752 - root - INFO - lr: 8.5044e-06 gnorm: 1.13 [20:08:07< 4:22:03] +[titan] 2025-10-05 18:42:29,644 - root - INFO - step: 32875 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 18:42:29,644 - root - INFO - lr: 8.4996e-06 gnorm: 1.19 [20:08:18< 4:21:52] +[titan] 2025-10-05 18:42:40,538 - root - INFO - step: 32880 loss: 1.9506 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 18:42:40,539 - root - INFO - lr: 8.4948e-06 gnorm: 1.15 [20:08:29< 4:21:41] +[titan] 2025-10-05 18:42:51,405 - root - INFO - step: 32885 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:42:51,405 - root - INFO - lr: 8.4900e-06 gnorm: 1.14 [20:08:40< 4:21:30] +[titan] 2025-10-05 18:43:02,281 - root - INFO - step: 32890 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 18:43:02,281 - root - INFO - lr: 8.4853e-06 gnorm: 1.17 [20:08:51< 4:21:19] +[titan] 2025-10-05 18:43:13,144 - root - INFO - step: 32895 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7334 +[titan] 2025-10-05 18:43:13,144 - root - INFO - lr: 8.4805e-06 gnorm: 1.22 [20:09:01< 4:21:08] +[titan] 2025-10-05 18:43:21,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:43:24,029 - root - INFO - step: 32900 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:43:24,030 - root - INFO - lr: 8.4757e-06 gnorm: 1.16 [20:09:12< 4:20:57] +[titan] 2025-10-05 18:43:34,912 - root - INFO - step: 32905 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:43:34,912 - root - INFO - lr: 8.4710e-06 gnorm: 1.22 [20:09:23< 4:20:46] +[titan] 2025-10-05 18:43:45,784 - root - INFO - step: 32910 loss: 1.9113 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6908 +[titan] 2025-10-05 18:43:45,784 - root - INFO - lr: 8.4662e-06 gnorm: 1.19 [20:09:34< 4:20:35] +[titan] 2025-10-05 18:43:56,656 - root - INFO - step: 32915 loss: 1.9080 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6877 +[titan] 2025-10-05 18:43:56,657 - root - INFO - lr: 8.4614e-06 gnorm: 1.15 [20:09:45< 4:20:24] +[titan] 2025-10-05 18:44:07,519 - root - INFO - step: 32920 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 18:44:07,520 - root - INFO - lr: 8.4567e-06 gnorm: 1.14 [20:09:56< 4:20:13] +[titan] 2025-10-05 18:44:18,364 - root - INFO - step: 32925 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 18:44:18,364 - root - INFO - lr: 8.4519e-06 gnorm: 1.16 [20:10:07< 4:20:01] +[titan] 2025-10-05 18:44:29,222 - root - INFO - step: 32930 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:44:29,222 - root - INFO - lr: 8.4472e-06 gnorm: 1.19 [20:10:18< 4:19:50] +[titan] 2025-10-05 18:44:40,084 - root - INFO - step: 32935 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 18:44:40,084 - root - INFO - lr: 8.4424e-06 gnorm: 1.20 [20:10:28< 4:19:39] +[titan] 2025-10-05 18:44:50,961 - root - INFO - step: 32940 loss: 2.0407 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 18:44:50,961 - root - INFO - lr: 8.4377e-06 gnorm: 1.21 [20:10:39< 4:19:28] +[titan] 2025-10-05 18:45:01,827 - root - INFO - step: 32945 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:45:01,827 - root - INFO - lr: 8.4330e-06 gnorm: 1.18 [20:10:50< 4:19:17] +[titan] 2025-10-05 18:45:10,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:45:12,694 - root - INFO - step: 32950 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:45:12,695 - root - INFO - lr: 8.4282e-06 gnorm: 1.17 [20:11:01< 4:19:06] +[titan] 2025-10-05 18:45:23,553 - root - INFO - step: 32955 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 18:45:23,553 - root - INFO - lr: 8.4235e-06 gnorm: 1.19 [20:11:12< 4:18:55] +[titan] 2025-10-05 18:45:34,381 - root - INFO - step: 32960 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 18:45:34,381 - root - INFO - lr: 8.4187e-06 gnorm: 1.16 [20:11:23< 4:18:44] +[titan] 2025-10-05 18:45:45,262 - root - INFO - step: 32965 loss: 2.0361 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 18:45:45,262 - root - INFO - lr: 8.4140e-06 gnorm: 1.21 [20:11:34< 4:18:33] +[titan] 2025-10-05 18:45:56,104 - root - INFO - step: 32970 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 18:45:56,105 - root - INFO - lr: 8.4093e-06 gnorm: 1.16 [20:11:44< 4:18:22] +[titan] 2025-10-05 18:46:06,947 - root - INFO - step: 32975 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7958 +[titan] 2025-10-05 18:46:06,948 - root - INFO - lr: 8.4046e-06 gnorm: 1.24 [20:11:55< 4:18:11] +[titan] 2025-10-05 18:46:17,797 - root - INFO - step: 32980 loss: 1.9700 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7415 +[titan] 2025-10-05 18:46:17,797 - root - INFO - lr: 8.3999e-06 gnorm: 1.16 [20:12:06< 4:18:00] +[titan] 2025-10-05 18:46:28,662 - root - INFO - step: 32985 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 18:46:28,662 - root - INFO - lr: 8.3951e-06 gnorm: 1.18 [20:12:17< 4:17:49] +[titan] 2025-10-05 18:46:39,537 - root - INFO - step: 32990 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 18:46:39,537 - root - INFO - lr: 8.3904e-06 gnorm: 1.22 [20:12:28< 4:17:38] +[titan] 2025-10-05 18:46:50,425 - root - INFO - step: 32995 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:46:50,425 - root - INFO - lr: 8.3857e-06 gnorm: 1.15 [20:12:39< 4:17:27] +[titan] 2025-10-05 18:46:59,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:47:01,344 - root - INFO - step: 33000 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 18:47:01,345 - root - INFO - lr: 8.3810e-06 gnorm: 1.17 [20:12:50< 4:17:16] +[titan] 2025-10-05 18:47:12,220 - root - INFO - step: 33005 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:47:12,220 - root - INFO - lr: 8.3763e-06 gnorm: 1.17 [20:13:00< 4:17:05] +[titan] 2025-10-05 18:47:23,105 - root - INFO - step: 33010 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 18:47:23,105 - root - INFO - lr: 8.3716e-06 gnorm: 1.16 [20:13:11< 4:16:53] +[titan] 2025-10-05 18:47:33,991 - root - INFO - step: 33015 loss: 1.9630 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 18:47:33,991 - root - INFO - lr: 8.3669e-06 gnorm: 1.21 [20:13:22< 4:16:42] +[titan] 2025-10-05 18:47:44,854 - root - INFO - step: 33020 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:47:44,854 - root - INFO - lr: 8.3622e-06 gnorm: 1.20 [20:13:33< 4:16:31] +[titan] 2025-10-05 18:47:55,728 - root - INFO - step: 33025 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:47:55,728 - root - INFO - lr: 8.3575e-06 gnorm: 1.16 [20:13:44< 4:16:20] +[titan] 2025-10-05 18:48:06,621 - root - INFO - step: 33030 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 18:48:06,622 - root - INFO - lr: 8.3528e-06 gnorm: 1.15 [20:13:55< 4:16:09] +[titan] 2025-10-05 18:48:17,519 - root - INFO - step: 33035 loss: 2.0726 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 18:48:17,519 - root - INFO - lr: 8.3481e-06 gnorm: 1.22 [20:14:06< 4:15:58] +[titan] 2025-10-05 18:48:28,405 - root - INFO - step: 33040 loss: 1.9946 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 18:48:28,405 - root - INFO - lr: 8.3435e-06 gnorm: 1.19 [20:14:17< 4:15:47] +[titan] 2025-10-05 18:48:39,282 - root - INFO - step: 33045 loss: 1.9543 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 18:48:39,282 - root - INFO - lr: 8.3388e-06 gnorm: 1.16 [20:14:28< 4:15:36] +[titan] 2025-10-05 18:48:47,979 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:48:50,167 - root - INFO - step: 33050 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:48:50,167 - root - INFO - lr: 8.3341e-06 gnorm: 1.22 [20:14:38< 4:15:25] +[titan] 2025-10-05 18:49:01,041 - root - INFO - step: 33055 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7799 +[titan] 2025-10-05 18:49:01,041 - root - INFO - lr: 8.3294e-06 gnorm: 1.21 [20:14:49< 4:15:14] +[titan] 2025-10-05 18:49:11,966 - root - INFO - step: 33060 loss: 1.9156 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6929 +[titan] 2025-10-05 18:49:11,967 - root - INFO - lr: 8.3248e-06 gnorm: 1.15 [20:15:00< 4:15:03] +[titan] 2025-10-05 18:49:22,851 - root - INFO - step: 33065 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 18:49:22,851 - root - INFO - lr: 8.3201e-06 gnorm: 1.22 [20:15:11< 4:14:52] +[titan] 2025-10-05 18:49:33,699 - root - INFO - step: 33070 loss: 1.9488 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 18:49:33,699 - root - INFO - lr: 8.3154e-06 gnorm: 1.17 [20:15:22< 4:14:41] +[titan] 2025-10-05 18:49:44,562 - root - INFO - step: 33075 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 18:49:44,562 - root - INFO - lr: 8.3108e-06 gnorm: 1.18 [20:15:33< 4:14:30] +[titan] 2025-10-05 18:49:55,434 - root - INFO - step: 33080 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6975 +[titan] 2025-10-05 18:49:55,434 - root - INFO - lr: 8.3061e-06 gnorm: 1.15 [20:15:44< 4:14:19] +[titan] 2025-10-05 18:50:06,341 - root - INFO - step: 33085 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 18:50:06,341 - root - INFO - lr: 8.3015e-06 gnorm: 1.19 [20:15:55< 4:14:08] +[titan] 2025-10-05 18:50:17,204 - root - INFO - step: 33090 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 18:50:17,204 - root - INFO - lr: 8.2968e-06 gnorm: 1.17 [20:16:05< 4:13:57] +[titan] 2025-10-05 18:50:28,085 - root - INFO - step: 33095 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 18:50:28,085 - root - INFO - lr: 8.2922e-06 gnorm: 1.19 [20:16:16< 4:13:46] +[titan] 2025-10-05 18:50:36,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:50:38,970 - root - INFO - step: 33100 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 18:50:38,970 - root - INFO - lr: 8.2875e-06 gnorm: 1.16 [20:16:27< 4:13:34] +[titan] 2025-10-05 18:50:49,853 - root - INFO - step: 33105 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:50:49,853 - root - INFO - lr: 8.2829e-06 gnorm: 1.18 [20:16:38< 4:13:23] +[titan] 2025-10-05 18:51:00,737 - root - INFO - step: 33110 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 18:51:00,737 - root - INFO - lr: 8.2782e-06 gnorm: 1.20 [20:16:49< 4:13:12] +[titan] 2025-10-05 18:51:11,650 - root - INFO - step: 33115 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7236 +[titan] 2025-10-05 18:51:11,650 - root - INFO - lr: 8.2736e-06 gnorm: 1.18 [20:17:00< 4:13:01] +[titan] 2025-10-05 18:51:22,517 - root - INFO - step: 33120 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 18:51:22,518 - root - INFO - lr: 8.2690e-06 gnorm: 1.15 [20:17:11< 4:12:50] +[titan] 2025-10-05 18:51:33,423 - root - INFO - step: 33125 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 18:51:33,423 - root - INFO - lr: 8.2643e-06 gnorm: 1.16 [20:17:22< 4:12:39] +[titan] 2025-10-05 18:51:44,314 - root - INFO - step: 33130 loss: 1.9891 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:51:44,314 - root - INFO - lr: 8.2597e-06 gnorm: 1.16 [20:17:33< 4:12:28] +[titan] 2025-10-05 18:51:55,207 - root - INFO - step: 33135 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 18:51:55,207 - root - INFO - lr: 8.2551e-06 gnorm: 1.19 [20:17:43< 4:12:17] +[titan] 2025-10-05 18:52:06,124 - root - INFO - step: 33140 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 18:52:06,124 - root - INFO - lr: 8.2504e-06 gnorm: 1.15 [20:17:54< 4:12:06] +[titan] 2025-10-05 18:52:17,015 - root - INFO - step: 33145 loss: 1.8716 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6545 +[titan] 2025-10-05 18:52:17,015 - root - INFO - lr: 8.2458e-06 gnorm: 1.17 [20:18:05< 4:11:55] +[titan] 2025-10-05 18:52:25,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:52:27,896 - root - INFO - step: 33150 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 18:52:27,896 - root - INFO - lr: 8.2412e-06 gnorm: 1.28 [20:18:16< 4:11:44] +[titan] 2025-10-05 18:52:38,755 - root - INFO - step: 33155 loss: 1.9340 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 18:52:38,755 - root - INFO - lr: 8.2366e-06 gnorm: 1.17 [20:18:27< 4:11:33] +[titan] 2025-10-05 18:52:49,651 - root - INFO - step: 33160 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6592 +[titan] 2025-10-05 18:52:49,651 - root - INFO - lr: 8.2320e-06 gnorm: 1.14 [20:18:38< 4:11:22] +[titan] 2025-10-05 18:53:00,524 - root - INFO - step: 33165 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7065 +[titan] 2025-10-05 18:53:00,525 - root - INFO - lr: 8.2274e-06 gnorm: 1.16 [20:18:49< 4:11:11] +[titan] 2025-10-05 18:53:11,434 - root - INFO - step: 33170 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:53:11,435 - root - INFO - lr: 8.2228e-06 gnorm: 1.19 [20:19:00< 4:11:00] +[titan] 2025-10-05 18:53:22,306 - root - INFO - step: 33175 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:53:22,306 - root - INFO - lr: 8.2182e-06 gnorm: 1.21 [20:19:11< 4:10:49] +[titan] 2025-10-05 18:53:33,152 - root - INFO - step: 33180 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 18:53:33,152 - root - INFO - lr: 8.2136e-06 gnorm: 1.19 [20:19:21< 4:10:38] +[titan] 2025-10-05 18:53:44,004 - root - INFO - step: 33185 loss: 1.9935 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 18:53:44,004 - root - INFO - lr: 8.2090e-06 gnorm: 1.16 [20:19:32< 4:10:27] +[titan] 2025-10-05 18:53:54,872 - root - INFO - step: 33190 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:53:54,872 - root - INFO - lr: 8.2044e-06 gnorm: 1.21 [20:19:43< 4:10:15] +[titan] 2025-10-05 18:54:05,750 - root - INFO - step: 33195 loss: 2.0158 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 18:54:05,750 - root - INFO - lr: 8.1998e-06 gnorm: 1.18 [20:19:54< 4:10:04] +[titan] 2025-10-05 18:54:14,509 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:54:16,691 - root - INFO - step: 33200 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 18:54:16,691 - root - INFO - lr: 8.1952e-06 gnorm: 1.21 [20:20:05< 4:09:53] +[titan] 2025-10-05 18:54:27,562 - root - INFO - step: 33205 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7204 +[titan] 2025-10-05 18:54:27,563 - root - INFO - lr: 8.1906e-06 gnorm: 1.18 [20:20:16< 4:09:42] +[titan] 2025-10-05 18:54:38,424 - root - INFO - step: 33210 loss: 1.9533 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 18:54:38,424 - root - INFO - lr: 8.1861e-06 gnorm: 1.19 [20:20:27< 4:09:31] +[titan] 2025-10-05 18:54:49,288 - root - INFO - step: 33215 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 18:54:49,288 - root - INFO - lr: 8.1815e-06 gnorm: 1.22 [20:20:38< 4:09:20] +[titan] 2025-10-05 18:55:00,163 - root - INFO - step: 33220 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:55:00,163 - root - INFO - lr: 8.1769e-06 gnorm: 1.21 [20:20:48< 4:09:09] +[titan] 2025-10-05 18:55:11,094 - root - INFO - step: 33225 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7865 +[titan] 2025-10-05 18:55:11,094 - root - INFO - lr: 8.1723e-06 gnorm: 1.22 [20:20:59< 4:08:58] +[titan] 2025-10-05 18:55:21,957 - root - INFO - step: 33230 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 18:55:21,957 - root - INFO - lr: 8.1678e-06 gnorm: 1.18 [20:21:10< 4:08:47] +[titan] 2025-10-05 18:55:32,818 - root - INFO - step: 33235 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 18:55:32,818 - root - INFO - lr: 8.1632e-06 gnorm: 1.16 [20:21:21< 4:08:36] +[titan] 2025-10-05 18:55:43,665 - root - INFO - step: 33240 loss: 2.0182 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 18:55:43,665 - root - INFO - lr: 8.1586e-06 gnorm: 1.18 [20:21:32< 4:08:25] +[titan] 2025-10-05 18:55:54,510 - root - INFO - step: 33245 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 18:55:54,510 - root - INFO - lr: 8.1541e-06 gnorm: 1.19 [20:21:43< 4:08:14] +[titan] 2025-10-05 18:56:03,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:56:05,369 - root - INFO - step: 33250 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 18:56:05,369 - root - INFO - lr: 8.1495e-06 gnorm: 1.22 [20:21:54< 4:08:03] +[titan] 2025-10-05 18:56:16,302 - root - INFO - step: 33255 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 18:56:16,302 - root - INFO - lr: 8.1450e-06 gnorm: 1.21 [20:22:05< 4:07:52] +[titan] 2025-10-05 18:56:27,156 - root - INFO - step: 33260 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 18:56:27,156 - root - INFO - lr: 8.1404e-06 gnorm: 1.17 [20:22:15< 4:07:41] +[titan] 2025-10-05 18:56:37,991 - root - INFO - step: 33265 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 18:56:37,991 - root - INFO - lr: 8.1359e-06 gnorm: 1.18 [20:22:26< 4:07:30] +[titan] 2025-10-05 18:56:48,867 - root - INFO - step: 33270 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 18:56:48,867 - root - INFO - lr: 8.1313e-06 gnorm: 1.14 [20:22:37< 4:07:19] +[titan] 2025-10-05 18:56:59,716 - root - INFO - step: 33275 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 18:56:59,716 - root - INFO - lr: 8.1268e-06 gnorm: 1.16 [20:22:48< 4:07:08] +[titan] 2025-10-05 18:57:10,748 - root - INFO - step: 33280 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 29,703 tflops: 412.08 mfu: 41.67% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:57:10,748 - root - INFO - lr: 8.1223e-06 gnorm: 1.20 [20:22:59< 4:06:57] +[titan] 2025-10-05 18:57:10,933 - root - INFO - Dumping profiler traces at step 33280 +[titan] 2025-10-05 18:57:10,972 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:57:21,865 - root - INFO - step: 33285 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 29,477 tflops: 408.95 mfu: 41.35% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:57:21,865 - root - INFO - lr: 8.1177e-06 gnorm: 1.18 [20:23:10< 4:06:46] +[titan] 2025-10-05 18:57:32,746 - root - INFO - step: 33290 loss: 1.9692 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 18:57:32,747 - root - INFO - lr: 8.1132e-06 gnorm: 1.19 [20:23:21< 4:06:34] +[titan] 2025-10-05 18:57:43,611 - root - INFO - step: 33295 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 18:57:43,611 - root - INFO - lr: 8.1087e-06 gnorm: 1.19 [20:23:32< 4:06:23] +[titan] 2025-10-05 18:57:52,270 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:57:54,457 - root - INFO - step: 33300 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 18:57:54,457 - root - INFO - lr: 8.1041e-06 gnorm: 1.22 [20:23:43< 4:06:12] +[titan] 2025-10-05 18:58:05,339 - root - INFO - step: 33305 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:58:05,339 - root - INFO - lr: 8.0996e-06 gnorm: 1.21 [20:23:54< 4:06:01] +[titan] 2025-10-05 18:58:16,262 - root - INFO - step: 33310 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 18:58:16,263 - root - INFO - lr: 8.0951e-06 gnorm: 1.24 [20:24:05< 4:05:50] +[titan] 2025-10-05 18:58:27,116 - root - INFO - step: 33315 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 18:58:27,116 - root - INFO - lr: 8.0906e-06 gnorm: 1.20 [20:24:15< 4:05:39] +[titan] 2025-10-05 18:58:38,011 - root - INFO - step: 33320 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 18:58:38,011 - root - INFO - lr: 8.0861e-06 gnorm: 1.18 [20:24:26< 4:05:28] +[titan] 2025-10-05 18:58:48,874 - root - INFO - step: 33325 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 18:58:48,874 - root - INFO - lr: 8.0816e-06 gnorm: 1.17 [20:24:37< 4:05:17] +[titan] 2025-10-05 18:58:59,752 - root - INFO - step: 33330 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 18:58:59,752 - root - INFO - lr: 8.0771e-06 gnorm: 1.18 [20:24:48< 4:05:06] +[titan] 2025-10-05 18:59:10,641 - root - INFO - step: 33335 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 18:59:10,641 - root - INFO - lr: 8.0725e-06 gnorm: 1.20 [20:24:59< 4:04:55] +[titan] 2025-10-05 18:59:21,564 - root - INFO - step: 33340 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 18:59:21,564 - root - INFO - lr: 8.0680e-06 gnorm: 1.15 [20:25:10< 4:04:44] +[titan] 2025-10-05 18:59:32,450 - root - INFO - step: 33345 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:59:32,450 - root - INFO - lr: 8.0636e-06 gnorm: 1.18 [20:25:21< 4:04:33] +[titan] 2025-10-05 18:59:41,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:59:43,356 - root - INFO - step: 33350 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:59:43,356 - root - INFO - lr: 8.0591e-06 gnorm: 1.16 [20:25:32< 4:04:22] +[titan] 2025-10-05 18:59:54,223 - root - INFO - step: 33355 loss: 1.9358 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7114 +[titan] 2025-10-05 18:59:54,223 - root - INFO - lr: 8.0546e-06 gnorm: 1.18 [20:25:42< 4:04:11] +[titan] 2025-10-05 19:00:05,102 - root - INFO - step: 33360 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 19:00:05,102 - root - INFO - lr: 8.0501e-06 gnorm: 1.17 [20:25:53< 4:04:00] +[titan] 2025-10-05 19:00:16,037 - root - INFO - step: 33365 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:00:16,037 - root - INFO - lr: 8.0456e-06 gnorm: 1.20 [20:26:04< 4:03:49] +[titan] 2025-10-05 19:00:26,915 - root - INFO - step: 33370 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:00:26,916 - root - INFO - lr: 8.0411e-06 gnorm: 1.20 [20:26:15< 4:03:38] +[titan] 2025-10-05 19:00:37,762 - root - INFO - step: 33375 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:00:37,763 - root - INFO - lr: 8.0366e-06 gnorm: 1.21 [20:26:26< 4:03:27] +[titan] 2025-10-05 19:00:48,614 - root - INFO - step: 33380 loss: 1.9232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:00:48,614 - root - INFO - lr: 8.0322e-06 gnorm: 1.18 [20:26:37< 4:03:15] +[titan] 2025-10-05 19:00:59,483 - root - INFO - step: 33385 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:00:59,483 - root - INFO - lr: 8.0277e-06 gnorm: 1.19 [20:26:48< 4:03:04] +[titan] 2025-10-05 19:01:10,340 - root - INFO - step: 33390 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 19:01:10,340 - root - INFO - lr: 8.0232e-06 gnorm: 1.19 [20:26:59< 4:02:53] +[titan] 2025-10-05 19:01:21,250 - root - INFO - step: 33395 loss: 1.9470 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 19:01:21,250 - root - INFO - lr: 8.0187e-06 gnorm: 1.17 [20:27:09< 4:02:42] +[titan] 2025-10-05 19:01:29,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:01:32,102 - root - INFO - step: 33400 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 19:01:32,102 - root - INFO - lr: 8.0143e-06 gnorm: 1.17 [20:27:20< 4:02:31] +[titan] 2025-10-05 19:01:42,959 - root - INFO - step: 33405 loss: 1.8686 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 19:01:42,960 - root - INFO - lr: 8.0098e-06 gnorm: 1.18 [20:27:31< 4:02:20] +[titan] 2025-10-05 19:01:53,819 - root - INFO - step: 33410 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 19:01:53,819 - root - INFO - lr: 8.0054e-06 gnorm: 1.19 [20:27:42< 4:02:09] +[titan] 2025-10-05 19:02:04,734 - root - INFO - step: 33415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 19:02:04,734 - root - INFO - lr: 8.0009e-06 gnorm: 1.18 [20:27:53< 4:01:58] +[titan] 2025-10-05 19:02:15,660 - root - INFO - step: 33420 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 19:02:15,661 - root - INFO - lr: 7.9965e-06 gnorm: 1.20 [20:28:04< 4:01:47] +[titan] 2025-10-05 19:02:26,561 - root - INFO - step: 33425 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 19:02:26,561 - root - INFO - lr: 7.9920e-06 gnorm: 1.15 [20:28:15< 4:01:36] +[titan] 2025-10-05 19:02:37,445 - root - INFO - step: 33430 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 19:02:37,446 - root - INFO - lr: 7.9876e-06 gnorm: 1.18 [20:28:26< 4:01:25] +[titan] 2025-10-05 19:02:48,327 - root - INFO - step: 33435 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:02:48,327 - root - INFO - lr: 7.9831e-06 gnorm: 1.16 [20:28:37< 4:01:14] +[titan] 2025-10-05 19:02:59,208 - root - INFO - step: 33440 loss: 1.9304 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 19:02:59,209 - root - INFO - lr: 7.9787e-06 gnorm: 1.20 [20:28:47< 4:01:03] +[titan] 2025-10-05 19:03:10,117 - root - INFO - step: 33445 loss: 2.0526 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8150 +[titan] 2025-10-05 19:03:10,117 - root - INFO - lr: 7.9742e-06 gnorm: 1.21 [20:28:58< 4:00:52] +[titan] 2025-10-05 19:03:18,858 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:03:21,042 - root - INFO - step: 33450 loss: 1.9353 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:03:21,042 - root - INFO - lr: 7.9698e-06 gnorm: 1.16 [20:29:09< 4:00:41] +[titan] 2025-10-05 19:03:31,901 - root - INFO - step: 33455 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:03:31,901 - root - INFO - lr: 7.9654e-06 gnorm: 1.18 [20:29:20< 4:00:30] +[titan] 2025-10-05 19:03:42,767 - root - INFO - step: 33460 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:03:42,767 - root - INFO - lr: 7.9610e-06 gnorm: 1.19 [20:29:31< 4:00:19] +[titan] 2025-10-05 19:03:53,626 - root - INFO - step: 33465 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:03:53,626 - root - INFO - lr: 7.9565e-06 gnorm: 1.20 [20:29:42< 4:00:08] +[titan] 2025-10-05 19:04:04,499 - root - INFO - step: 33470 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:04:04,499 - root - INFO - lr: 7.9521e-06 gnorm: 1.22 [20:29:53< 3:59:57] +[titan] 2025-10-05 19:04:15,390 - root - INFO - step: 33475 loss: 1.9236 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:04:15,390 - root - INFO - lr: 7.9477e-06 gnorm: 1.15 [20:30:04< 3:59:46] +[titan] 2025-10-05 19:04:26,338 - root - INFO - step: 33480 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 19:04:26,338 - root - INFO - lr: 7.9433e-06 gnorm: 1.18 [20:30:15< 3:59:34] +[titan] 2025-10-05 19:04:37,222 - root - INFO - step: 33485 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 19:04:37,222 - root - INFO - lr: 7.9389e-06 gnorm: 1.12 [20:30:25< 3:59:23] +[titan] 2025-10-05 19:04:48,095 - root - INFO - step: 33490 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 19:04:48,095 - root - INFO - lr: 7.9345e-06 gnorm: 1.17 [20:30:36< 3:59:12] +[titan] 2025-10-05 19:04:58,969 - root - INFO - step: 33495 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 19:04:58,969 - root - INFO - lr: 7.9301e-06 gnorm: 1.19 [20:30:47< 3:59:01] +[titan] 2025-10-05 19:05:07,650 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:05:09,844 - root - INFO - step: 33500 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 19:05:09,844 - root - INFO - lr: 7.9256e-06 gnorm: 1.16 [20:30:58< 3:58:50] +[titan] 2025-10-05 19:05:20,803 - root - INFO - step: 33505 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7103 +[titan] 2025-10-05 19:05:20,803 - root - INFO - lr: 7.9212e-06 gnorm: 1.17 [20:31:09< 3:58:39] +[titan] 2025-10-05 19:05:31,705 - root - INFO - step: 33510 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 19:05:31,706 - root - INFO - lr: 7.9169e-06 gnorm: 1.18 [20:31:20< 3:58:28] +[titan] 2025-10-05 19:05:42,585 - root - INFO - step: 33515 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 19:05:42,585 - root - INFO - lr: 7.9125e-06 gnorm: 1.16 [20:31:31< 3:58:17] +[titan] 2025-10-05 19:05:53,459 - root - INFO - step: 33520 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 19:05:53,459 - root - INFO - lr: 7.9081e-06 gnorm: 1.20 [20:31:42< 3:58:06] +[titan] 2025-10-05 19:06:04,332 - root - INFO - step: 33525 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6778 +[titan] 2025-10-05 19:06:04,332 - root - INFO - lr: 7.9037e-06 gnorm: 1.12 [20:31:53< 3:57:55] +[titan] 2025-10-05 19:06:15,198 - root - INFO - step: 33530 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 19:06:15,198 - root - INFO - lr: 7.8993e-06 gnorm: 1.17 [20:32:03< 3:57:44] +[titan] 2025-10-05 19:06:26,152 - root - INFO - step: 33535 loss: 1.9859 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 19:06:26,152 - root - INFO - lr: 7.8949e-06 gnorm: 1.24 [20:32:14< 3:57:33] +[titan] 2025-10-05 19:06:37,024 - root - INFO - step: 33540 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7744 +[titan] 2025-10-05 19:06:37,024 - root - INFO - lr: 7.8905e-06 gnorm: 1.22 [20:32:25< 3:57:22] +[titan] 2025-10-05 19:06:47,931 - root - INFO - step: 33545 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 19:06:47,931 - root - INFO - lr: 7.8862e-06 gnorm: 1.24 [20:32:36< 3:57:11] +[titan] 2025-10-05 19:06:56,619 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:06:58,805 - root - INFO - step: 33550 loss: 1.9223 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:06:58,805 - root - INFO - lr: 7.8818e-06 gnorm: 1.17 [20:32:47< 3:57:00] +[titan] 2025-10-05 19:07:09,652 - root - INFO - step: 33555 loss: 1.9140 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:07:09,653 - root - INFO - lr: 7.8774e-06 gnorm: 1.21 [20:32:58< 3:56:49] +[titan] 2025-10-05 19:07:20,562 - root - INFO - step: 33560 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:07:20,563 - root - INFO - lr: 7.8731e-06 gnorm: 1.23 [20:33:09< 3:56:38] +[titan] 2025-10-05 19:07:31,425 - root - INFO - step: 33565 loss: 1.8946 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 19:07:31,425 - root - INFO - lr: 7.8687e-06 gnorm: 1.19 [20:33:20< 3:56:27] +[titan] 2025-10-05 19:07:42,303 - root - INFO - step: 33570 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:07:42,303 - root - INFO - lr: 7.8643e-06 gnorm: 1.20 [20:33:31< 3:56:16] +[titan] 2025-10-05 19:07:53,210 - root - INFO - step: 33575 loss: 1.9262 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:07:53,211 - root - INFO - lr: 7.8600e-06 gnorm: 1.18 [20:33:41< 3:56:05] +[titan] 2025-10-05 19:08:04,072 - root - INFO - step: 33580 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 19:08:04,072 - root - INFO - lr: 7.8556e-06 gnorm: 1.18 [20:33:52< 3:55:53] +[titan] 2025-10-05 19:08:14,947 - root - INFO - step: 33585 loss: 1.8953 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 19:08:14,947 - root - INFO - lr: 7.8513e-06 gnorm: 1.14 [20:34:03< 3:55:42] +[titan] 2025-10-05 19:08:25,883 - root - INFO - step: 33590 loss: 1.9998 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 19:08:25,883 - root - INFO - lr: 7.8469e-06 gnorm: 1.19 [20:34:14< 3:55:31] +[titan] 2025-10-05 19:08:36,748 - root - INFO - step: 33595 loss: 1.8788 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6618 +[titan] 2025-10-05 19:08:36,748 - root - INFO - lr: 7.8426e-06 gnorm: 1.17 [20:34:25< 3:55:20] +[titan] 2025-10-05 19:08:45,430 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:08:47,610 - root - INFO - step: 33600 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7176 +[titan] 2025-10-05 19:08:47,610 - root - INFO - lr: 7.8382e-06 gnorm: 1.20 [20:34:36< 3:55:09] +[titan] 2025-10-05 19:08:58,491 - root - INFO - step: 33605 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7542 +[titan] 2025-10-05 19:08:58,491 - root - INFO - lr: 7.8339e-06 gnorm: 1.19 [20:34:47< 3:54:58] +[titan] 2025-10-05 19:09:09,347 - root - INFO - step: 33610 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 19:09:09,347 - root - INFO - lr: 7.8296e-06 gnorm: 1.17 [20:34:58< 3:54:47] +[titan] 2025-10-05 19:09:20,217 - root - INFO - step: 33615 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 19:09:20,217 - root - INFO - lr: 7.8252e-06 gnorm: 1.18 [20:35:08< 3:54:36] +[titan] 2025-10-05 19:09:31,144 - root - INFO - step: 33620 loss: 1.9273 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 19:09:31,144 - root - INFO - lr: 7.8209e-06 gnorm: 1.16 [20:35:19< 3:54:25] +[titan] 2025-10-05 19:09:41,985 - root - INFO - step: 33625 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 19:09:41,985 - root - INFO - lr: 7.8166e-06 gnorm: 1.18 [20:35:30< 3:54:14] +[titan] 2025-10-05 19:09:52,855 - root - INFO - step: 33630 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7601 +[titan] 2025-10-05 19:09:52,855 - root - INFO - lr: 7.8123e-06 gnorm: 1.21 [20:35:41< 3:54:03] +[titan] 2025-10-05 19:10:03,725 - root - INFO - step: 33635 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:10:03,725 - root - INFO - lr: 7.8080e-06 gnorm: 1.19 [20:35:52< 3:53:52] +[titan] 2025-10-05 19:10:14,597 - root - INFO - step: 33640 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6948 +[titan] 2025-10-05 19:10:14,597 - root - INFO - lr: 7.8036e-06 gnorm: 1.18 [20:36:03< 3:53:41] +[titan] 2025-10-05 19:10:25,501 - root - INFO - step: 33645 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 19:10:25,501 - root - INFO - lr: 7.7993e-06 gnorm: 1.17 [20:36:14< 3:53:30] +[titan] 2025-10-05 19:10:34,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:10:36,352 - root - INFO - step: 33650 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:10:36,352 - root - INFO - lr: 7.7950e-06 gnorm: 1.18 [20:36:25< 3:53:19] +[titan] 2025-10-05 19:10:47,197 - root - INFO - step: 33655 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 19:10:47,197 - root - INFO - lr: 7.7907e-06 gnorm: 1.18 [20:36:35< 3:53:08] +[titan] 2025-10-05 19:10:58,037 - root - INFO - step: 33660 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:10:58,037 - root - INFO - lr: 7.7864e-06 gnorm: 1.20 [20:36:46< 3:52:57] +[titan] 2025-10-05 19:11:08,895 - root - INFO - step: 33665 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 19:11:08,896 - root - INFO - lr: 7.7821e-06 gnorm: 1.18 [20:36:57< 3:52:46] +[titan] 2025-10-05 19:11:19,804 - root - INFO - step: 33670 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:11:19,805 - root - INFO - lr: 7.7778e-06 gnorm: 1.13 [20:37:08< 3:52:35] +[titan] 2025-10-05 19:11:30,707 - root - INFO - step: 33675 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:11:30,707 - root - INFO - lr: 7.7735e-06 gnorm: 1.19 [20:37:19< 3:52:24] +[titan] 2025-10-05 19:11:41,571 - root - INFO - step: 33680 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 19:11:41,571 - root - INFO - lr: 7.7692e-06 gnorm: 1.15 [20:37:30< 3:52:12] +[titan] 2025-10-05 19:11:52,439 - root - INFO - step: 33685 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7395 +[titan] 2025-10-05 19:11:52,439 - root - INFO - lr: 7.7649e-06 gnorm: 1.17 [20:37:41< 3:52:01] +[titan] 2025-10-05 19:12:03,278 - root - INFO - step: 33690 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 19:12:03,278 - root - INFO - lr: 7.7606e-06 gnorm: 1.21 [20:37:51< 3:51:50] +[titan] 2025-10-05 19:12:14,126 - root - INFO - step: 33695 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:12:14,126 - root - INFO - lr: 7.7564e-06 gnorm: 1.23 [20:38:02< 3:51:39] +[titan] 2025-10-05 19:12:22,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:12:25,032 - root - INFO - step: 33700 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 19:12:25,032 - root - INFO - lr: 7.7521e-06 gnorm: 1.18 [20:38:13< 3:51:28] +[titan] 2025-10-05 19:12:35,912 - root - INFO - step: 33705 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 19:12:35,913 - root - INFO - lr: 7.7478e-06 gnorm: 1.21 [20:38:24< 3:51:17] +[titan] 2025-10-05 19:12:46,776 - root - INFO - step: 33710 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 19:12:46,776 - root - INFO - lr: 7.7435e-06 gnorm: 1.19 [20:38:35< 3:51:06] +[titan] 2025-10-05 19:12:57,642 - root - INFO - step: 33715 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 19:12:57,642 - root - INFO - lr: 7.7393e-06 gnorm: 1.17 [20:38:46< 3:50:55] +[titan] 2025-10-05 19:13:08,509 - root - INFO - step: 33720 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6902 +[titan] 2025-10-05 19:13:08,509 - root - INFO - lr: 7.7350e-06 gnorm: 1.19 [20:38:57< 3:50:44] +[titan] 2025-10-05 19:13:19,370 - root - INFO - step: 33725 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:13:19,370 - root - INFO - lr: 7.7307e-06 gnorm: 1.18 [20:39:08< 3:50:33] +[titan] 2025-10-05 19:13:30,375 - root - INFO - step: 33730 loss: 1.9645 memory: 118.84GiB(85.28%) tps: 29,776 tflops: 413.09 mfu: 41.77% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 19:13:30,375 - root - INFO - lr: 7.7265e-06 gnorm: 1.18 [20:39:19< 3:50:22] +[titan] 2025-10-05 19:13:41,281 - root - INFO - step: 33735 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 19:13:41,281 - root - INFO - lr: 7.7222e-06 gnorm: 1.18 [20:39:29< 3:50:11] +[titan] 2025-10-05 19:13:52,137 - root - INFO - step: 33740 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 19:13:52,137 - root - INFO - lr: 7.7180e-06 gnorm: 1.20 [20:39:40< 3:50:00] +[titan] 2025-10-05 19:14:03,003 - root - INFO - step: 33745 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 19:14:03,003 - root - INFO - lr: 7.7137e-06 gnorm: 1.17 [20:39:51< 3:49:49] +[titan] 2025-10-05 19:14:11,685 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:14:13,871 - root - INFO - step: 33750 loss: 2.0153 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7820 +[titan] 2025-10-05 19:14:13,871 - root - INFO - lr: 7.7095e-06 gnorm: 1.23 [20:40:02< 3:49:38] +[titan] 2025-10-05 19:14:24,752 - root - INFO - step: 33755 loss: 1.8533 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2139 global_avg_mtp_loss: 1.6394 +[titan] 2025-10-05 19:14:24,752 - root - INFO - lr: 7.7052e-06 gnorm: 1.18 [20:40:13< 3:49:27] +[titan] 2025-10-05 19:14:35,653 - root - INFO - step: 33760 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7664 +[titan] 2025-10-05 19:14:35,654 - root - INFO - lr: 7.7010e-06 gnorm: 1.20 [20:40:24< 3:49:16] +[titan] 2025-10-05 19:14:46,559 - root - INFO - step: 33765 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 19:14:46,559 - root - INFO - lr: 7.6967e-06 gnorm: 1.17 [20:40:35< 3:49:05] +[titan] 2025-10-05 19:14:57,429 - root - INFO - step: 33770 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6631 +[titan] 2025-10-05 19:14:57,429 - root - INFO - lr: 7.6925e-06 gnorm: 1.19 [20:40:46< 3:48:54] +[titan] 2025-10-05 19:15:08,283 - root - INFO - step: 33775 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 19:15:08,283 - root - INFO - lr: 7.6883e-06 gnorm: 1.20 [20:40:56< 3:48:43] +[titan] 2025-10-05 19:15:19,145 - root - INFO - step: 33780 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:15:19,145 - root - INFO - lr: 7.6841e-06 gnorm: 1.21 [20:41:07< 3:48:31] +[titan] 2025-10-05 19:15:30,024 - root - INFO - step: 33785 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6677 +[titan] 2025-10-05 19:15:30,024 - root - INFO - lr: 7.6798e-06 gnorm: 1.18 [20:41:18< 3:48:20] +[titan] 2025-10-05 19:15:40,968 - root - INFO - step: 33790 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:15:40,968 - root - INFO - lr: 7.6756e-06 gnorm: 1.18 [20:41:29< 3:48:09] +[titan] 2025-10-05 19:15:45,496 - root - INFO - Dumping profiler traces at step 33792 +[titan] 2025-10-05 19:15:45,534 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:15:52,106 - root - INFO - step: 33795 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 29,421 tflops: 408.17 mfu: 41.27% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 19:15:52,106 - root - INFO - lr: 7.6714e-06 gnorm: 1.19 [20:41:40< 3:47:58] +[titan] 2025-10-05 19:16:00,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:16:03,008 - root - INFO - step: 33800 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:16:03,008 - root - INFO - lr: 7.6672e-06 gnorm: 1.20 [20:41:51< 3:47:47] +[titan] 2025-10-05 19:16:13,885 - root - INFO - step: 33805 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 19:16:13,885 - root - INFO - lr: 7.6630e-06 gnorm: 1.17 [20:42:02< 3:47:36] +[titan] 2025-10-05 19:16:24,767 - root - INFO - step: 33810 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 19:16:24,767 - root - INFO - lr: 7.6587e-06 gnorm: 1.15 [20:42:13< 3:47:25] +[titan] 2025-10-05 19:16:35,714 - root - INFO - step: 33815 loss: 2.0005 memory: 118.84GiB(85.28%) tps: 29,934 tflops: 415.29 mfu: 41.99% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 19:16:35,714 - root - INFO - lr: 7.6545e-06 gnorm: 1.20 [20:42:24< 3:47:14] +[titan] 2025-10-05 19:16:46,588 - root - INFO - step: 33820 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 19:16:46,588 - root - INFO - lr: 7.6503e-06 gnorm: 1.18 [20:42:35< 3:47:03] +[titan] 2025-10-05 19:16:57,467 - root - INFO - step: 33825 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6954 +[titan] 2025-10-05 19:16:57,467 - root - INFO - lr: 7.6461e-06 gnorm: 1.16 [20:42:46< 3:46:52] +[titan] 2025-10-05 19:17:08,370 - root - INFO - step: 33830 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 19:17:08,371 - root - INFO - lr: 7.6419e-06 gnorm: 1.15 [20:42:57< 3:46:41] +[titan] 2025-10-05 19:17:19,239 - root - INFO - step: 33835 loss: 1.9118 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:17:19,239 - root - INFO - lr: 7.6377e-06 gnorm: 1.19 [20:43:07< 3:46:30] +[titan] 2025-10-05 19:17:30,168 - root - INFO - step: 33840 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7619 +[titan] 2025-10-05 19:17:30,168 - root - INFO - lr: 7.6335e-06 gnorm: 1.20 [20:43:18< 3:46:19] +[titan] 2025-10-05 19:17:41,047 - root - INFO - step: 33845 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:17:41,047 - root - INFO - lr: 7.6294e-06 gnorm: 1.17 [20:43:29< 3:46:08] +[titan] 2025-10-05 19:17:49,727 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:17:51,911 - root - INFO - step: 33850 loss: 1.9924 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7615 +[titan] 2025-10-05 19:17:51,911 - root - INFO - lr: 7.6252e-06 gnorm: 1.20 [20:43:40< 3:45:57] +[titan] 2025-10-05 19:18:02,789 - root - INFO - step: 33855 loss: 1.9320 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 19:18:02,789 - root - INFO - lr: 7.6210e-06 gnorm: 1.18 [20:43:51< 3:45:46] +[titan] 2025-10-05 19:18:13,634 - root - INFO - step: 33860 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 19:18:13,634 - root - INFO - lr: 7.6168e-06 gnorm: 1.20 [20:44:02< 3:45:35] +[titan] 2025-10-05 19:18:24,528 - root - INFO - step: 33865 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:18:24,528 - root - INFO - lr: 7.6126e-06 gnorm: 1.21 [20:44:13< 3:45:24] +[titan] 2025-10-05 19:18:35,439 - root - INFO - step: 33870 loss: 1.8718 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6559 +[titan] 2025-10-05 19:18:35,440 - root - INFO - lr: 7.6085e-06 gnorm: 1.16 [20:44:24< 3:45:13] +[titan] 2025-10-05 19:18:46,300 - root - INFO - step: 33875 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:18:46,300 - root - INFO - lr: 7.6043e-06 gnorm: 1.18 [20:44:34< 3:45:02] +[titan] 2025-10-05 19:18:57,171 - root - INFO - step: 33880 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 19:18:57,171 - root - INFO - lr: 7.6001e-06 gnorm: 1.18 [20:44:45< 3:44:51] +[titan] 2025-10-05 19:19:08,027 - root - INFO - step: 33885 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 19:19:08,028 - root - INFO - lr: 7.5960e-06 gnorm: 1.18 [20:44:56< 3:44:40] +[titan] 2025-10-05 19:19:18,884 - root - INFO - step: 33890 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 19:19:18,884 - root - INFO - lr: 7.5918e-06 gnorm: 1.15 [20:45:07< 3:44:28] +[titan] 2025-10-05 19:19:29,764 - root - INFO - step: 33895 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6963 +[titan] 2025-10-05 19:19:29,765 - root - INFO - lr: 7.5877e-06 gnorm: 1.14 [20:45:18< 3:44:17] +[titan] 2025-10-05 19:19:38,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:19:40,700 - root - INFO - step: 33900 loss: 1.9418 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:19:40,700 - root - INFO - lr: 7.5835e-06 gnorm: 1.15 [20:45:29< 3:44:06] +[titan] 2025-10-05 19:19:51,575 - root - INFO - step: 33905 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 19:19:51,575 - root - INFO - lr: 7.5793e-06 gnorm: 1.18 [20:45:40< 3:43:55] +[titan] 2025-10-05 19:20:02,438 - root - INFO - step: 33910 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 19:20:02,438 - root - INFO - lr: 7.5752e-06 gnorm: 1.17 [20:45:51< 3:43:44] +[titan] 2025-10-05 19:20:13,310 - root - INFO - step: 33915 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 19:20:13,310 - root - INFO - lr: 7.5711e-06 gnorm: 1.21 [20:46:01< 3:43:33] +[titan] 2025-10-05 19:20:24,174 - root - INFO - step: 33920 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 19:20:24,174 - root - INFO - lr: 7.5669e-06 gnorm: 1.18 [20:46:12< 3:43:22] +[titan] 2025-10-05 19:20:35,419 - root - INFO - step: 33925 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 29,141 tflops: 404.28 mfu: 40.88% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 19:20:35,420 - root - INFO - lr: 7.5628e-06 gnorm: 1.19 [20:46:24< 3:43:11] +[titan] 2025-10-05 19:20:46,283 - root - INFO - step: 33930 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 19:20:46,283 - root - INFO - lr: 7.5586e-06 gnorm: 1.17 [20:46:34< 3:43:00] +[titan] 2025-10-05 19:20:57,167 - root - INFO - step: 33935 loss: 1.9676 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 19:20:57,167 - root - INFO - lr: 7.5545e-06 gnorm: 1.23 [20:46:45< 3:42:49] +[titan] 2025-10-05 19:21:08,017 - root - INFO - step: 33940 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:21:08,017 - root - INFO - lr: 7.5504e-06 gnorm: 1.19 [20:46:56< 3:42:38] +[titan] 2025-10-05 19:21:18,883 - root - INFO - step: 33945 loss: 1.9536 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 19:21:18,883 - root - INFO - lr: 7.5463e-06 gnorm: 1.17 [20:47:07< 3:42:27] +[titan] 2025-10-05 19:21:27,551 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:21:29,744 - root - INFO - step: 33950 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:21:29,745 - root - INFO - lr: 7.5421e-06 gnorm: 1.24 [20:47:18< 3:42:16] +[titan] 2025-10-05 19:21:40,693 - root - INFO - step: 33955 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:21:40,694 - root - INFO - lr: 7.5380e-06 gnorm: 1.21 [20:47:29< 3:42:05] +[titan] 2025-10-05 19:21:51,598 - root - INFO - step: 33960 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 19:21:51,598 - root - INFO - lr: 7.5339e-06 gnorm: 1.22 [20:47:40< 3:41:54] +[titan] 2025-10-05 19:22:02,484 - root - INFO - step: 33965 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 19:22:02,484 - root - INFO - lr: 7.5298e-06 gnorm: 1.17 [20:47:51< 3:41:43] +[titan] 2025-10-05 19:22:13,362 - root - INFO - step: 33970 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 19:22:13,362 - root - INFO - lr: 7.5257e-06 gnorm: 1.15 [20:48:02< 3:41:32] +[titan] 2025-10-05 19:22:24,229 - root - INFO - step: 33975 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:22:24,229 - root - INFO - lr: 7.5216e-06 gnorm: 1.21 [20:48:12< 3:41:21] +[titan] 2025-10-05 19:22:35,177 - root - INFO - step: 33980 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:22:35,178 - root - INFO - lr: 7.5175e-06 gnorm: 1.26 [20:48:23< 3:41:10] +[titan] 2025-10-05 19:22:46,038 - root - INFO - step: 33985 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:22:46,038 - root - INFO - lr: 7.5134e-06 gnorm: 1.22 [20:48:34< 3:40:59] +[titan] 2025-10-05 19:22:56,932 - root - INFO - step: 33990 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 19:22:56,932 - root - INFO - lr: 7.5093e-06 gnorm: 1.16 [20:48:45< 3:40:48] +[titan] 2025-10-05 19:23:07,777 - root - INFO - step: 33995 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:23:07,778 - root - INFO - lr: 7.5052e-06 gnorm: 1.21 [20:48:56< 3:40:37] +[titan] 2025-10-05 19:23:16,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:23:18,638 - root - INFO - step: 34000 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:23:18,638 - root - INFO - lr: 7.5011e-06 gnorm: 1.17 [20:49:07< 3:40:25] +[titan] 2025-10-05 19:23:29,501 - root - INFO - step: 34005 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:23:29,501 - root - INFO - lr: 7.4970e-06 gnorm: 1.18 [20:49:18< 3:40:14] +[titan] 2025-10-05 19:23:40,426 - root - INFO - step: 34010 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 19:23:40,426 - root - INFO - lr: 7.4929e-06 gnorm: 1.19 [20:49:29< 3:40:03] +[titan] 2025-10-05 19:23:51,314 - root - INFO - step: 34015 loss: 1.9884 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:23:51,315 - root - INFO - lr: 7.4888e-06 gnorm: 1.21 [20:49:39< 3:39:52] +[titan] 2025-10-05 19:24:02,171 - root - INFO - step: 34020 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 19:24:02,171 - root - INFO - lr: 7.4847e-06 gnorm: 1.18 [20:49:50< 3:39:41] +[titan] 2025-10-05 19:24:13,068 - root - INFO - step: 34025 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 19:24:13,068 - root - INFO - lr: 7.4807e-06 gnorm: 1.26 [20:50:01< 3:39:30] +[titan] 2025-10-05 19:24:23,950 - root - INFO - step: 34030 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 19:24:23,951 - root - INFO - lr: 7.4766e-06 gnorm: 1.18 [20:50:12< 3:39:19] +[titan] 2025-10-05 19:24:34,827 - root - INFO - step: 34035 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6696 +[titan] 2025-10-05 19:24:34,827 - root - INFO - lr: 7.4725e-06 gnorm: 1.20 [20:50:23< 3:39:08] +[titan] 2025-10-05 19:24:45,778 - root - INFO - step: 34040 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 19:24:45,778 - root - INFO - lr: 7.4685e-06 gnorm: 1.19 [20:50:34< 3:38:57] +[titan] 2025-10-05 19:24:56,664 - root - INFO - step: 34045 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 19:24:56,664 - root - INFO - lr: 7.4644e-06 gnorm: 1.20 [20:50:45< 3:38:46] +[titan] 2025-10-05 19:25:05,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:25:07,528 - root - INFO - step: 34050 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:25:07,528 - root - INFO - lr: 7.4603e-06 gnorm: 1.21 [20:50:56< 3:38:35] +[titan] 2025-10-05 19:25:18,416 - root - INFO - step: 34055 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6971 +[titan] 2025-10-05 19:25:18,416 - root - INFO - lr: 7.4563e-06 gnorm: 1.17 [20:51:07< 3:38:24] +[titan] 2025-10-05 19:25:29,290 - root - INFO - step: 34060 loss: 1.9560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 19:25:29,291 - root - INFO - lr: 7.4522e-06 gnorm: 1.17 [20:51:17< 3:38:13] +[titan] 2025-10-05 19:25:40,227 - root - INFO - step: 34065 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6962 +[titan] 2025-10-05 19:25:40,227 - root - INFO - lr: 7.4482e-06 gnorm: 1.16 [20:51:28< 3:38:02] +[titan] 2025-10-05 19:25:51,094 - root - INFO - step: 34070 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 19:25:51,094 - root - INFO - lr: 7.4441e-06 gnorm: 1.17 [20:51:39< 3:37:51] +[titan] 2025-10-05 19:26:01,990 - root - INFO - step: 34075 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 19:26:01,990 - root - INFO - lr: 7.4401e-06 gnorm: 1.19 [20:51:50< 3:37:40] +[titan] 2025-10-05 19:26:12,871 - root - INFO - step: 34080 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6921 +[titan] 2025-10-05 19:26:12,871 - root - INFO - lr: 7.4361e-06 gnorm: 1.18 [20:52:01< 3:37:29] +[titan] 2025-10-05 19:26:23,746 - root - INFO - step: 34085 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7530 +[titan] 2025-10-05 19:26:23,746 - root - INFO - lr: 7.4320e-06 gnorm: 1.19 [20:52:12< 3:37:18] +[titan] 2025-10-05 19:26:34,615 - root - INFO - step: 34090 loss: 1.9192 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 19:26:34,615 - root - INFO - lr: 7.4280e-06 gnorm: 1.17 [20:52:23< 3:37:07] +[titan] 2025-10-05 19:26:45,574 - root - INFO - step: 34095 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 19:26:45,575 - root - INFO - lr: 7.4239e-06 gnorm: 1.24 [20:52:34< 3:36:56] +[titan] 2025-10-05 19:26:54,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:26:56,447 - root - INFO - step: 34100 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:26:56,447 - root - INFO - lr: 7.4199e-06 gnorm: 1.21 [20:52:45< 3:36:45] +[titan] 2025-10-05 19:27:07,327 - root - INFO - step: 34105 loss: 1.8752 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 19:27:07,327 - root - INFO - lr: 7.4159e-06 gnorm: 1.17 [20:52:55< 3:36:34] +[titan] 2025-10-05 19:27:18,206 - root - INFO - step: 34110 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 19:27:18,206 - root - INFO - lr: 7.4119e-06 gnorm: 1.25 [20:53:06< 3:36:23] +[titan] 2025-10-05 19:27:29,088 - root - INFO - step: 34115 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 19:27:29,088 - root - INFO - lr: 7.4079e-06 gnorm: 1.18 [20:53:17< 3:36:11] +[titan] 2025-10-05 19:27:40,016 - root - INFO - step: 34120 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 19:27:40,017 - root - INFO - lr: 7.4038e-06 gnorm: 1.18 [20:53:28< 3:36:00] +[titan] 2025-10-05 19:27:50,909 - root - INFO - step: 34125 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 19:27:50,909 - root - INFO - lr: 7.3998e-06 gnorm: 1.20 [20:53:39< 3:35:49] +[titan] 2025-10-05 19:28:01,811 - root - INFO - step: 34130 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:28:01,811 - root - INFO - lr: 7.3958e-06 gnorm: 1.15 [20:53:50< 3:35:38] +[titan] 2025-10-05 19:28:12,709 - root - INFO - step: 34135 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 19:28:12,710 - root - INFO - lr: 7.3918e-06 gnorm: 1.17 [20:54:01< 3:35:27] +[titan] 2025-10-05 19:28:23,581 - root - INFO - step: 34140 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:28:23,582 - root - INFO - lr: 7.3878e-06 gnorm: 1.23 [20:54:12< 3:35:16] +[titan] 2025-10-05 19:28:34,458 - root - INFO - step: 34145 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 19:28:34,458 - root - INFO - lr: 7.3838e-06 gnorm: 1.18 [20:54:23< 3:35:05] +[titan] 2025-10-05 19:28:43,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:28:45,388 - root - INFO - step: 34150 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 19:28:45,388 - root - INFO - lr: 7.3798e-06 gnorm: 1.19 [20:54:34< 3:34:54] +[titan] 2025-10-05 19:28:56,262 - root - INFO - step: 34155 loss: 1.9387 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:28:56,262 - root - INFO - lr: 7.3758e-06 gnorm: 1.16 [20:54:44< 3:34:43] +[titan] 2025-10-05 19:29:07,168 - root - INFO - step: 34160 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 19:29:07,168 - root - INFO - lr: 7.3718e-06 gnorm: 1.18 [20:54:55< 3:34:32] +[titan] 2025-10-05 19:29:18,057 - root - INFO - step: 34165 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 19:29:18,057 - root - INFO - lr: 7.3678e-06 gnorm: 1.19 [20:55:06< 3:34:21] +[titan] 2025-10-05 19:29:28,930 - root - INFO - step: 34170 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 19:29:28,930 - root - INFO - lr: 7.3639e-06 gnorm: 1.18 [20:55:17< 3:34:10] +[titan] 2025-10-05 19:29:39,843 - root - INFO - step: 34175 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 19:29:39,844 - root - INFO - lr: 7.3599e-06 gnorm: 1.25 [20:55:28< 3:33:59] +[titan] 2025-10-05 19:29:50,737 - root - INFO - step: 34180 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 19:29:50,737 - root - INFO - lr: 7.3559e-06 gnorm: 1.26 [20:55:39< 3:33:48] +[titan] 2025-10-05 19:30:01,652 - root - INFO - step: 34185 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 19:30:01,652 - root - INFO - lr: 7.3519e-06 gnorm: 1.25 [20:55:50< 3:33:37] +[titan] 2025-10-05 19:30:12,505 - root - INFO - step: 34190 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 19:30:12,506 - root - INFO - lr: 7.3480e-06 gnorm: 1.20 [20:56:01< 3:33:26] +[titan] 2025-10-05 19:30:23,389 - root - INFO - step: 34195 loss: 1.9339 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 19:30:23,390 - root - INFO - lr: 7.3440e-06 gnorm: 1.19 [20:56:12< 3:33:15] +[titan] 2025-10-05 19:30:32,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:30:34,246 - root - INFO - step: 34200 loss: 1.9408 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 19:30:34,246 - root - INFO - lr: 7.3400e-06 gnorm: 1.19 [20:56:22< 3:33:04] +[titan] 2025-10-05 19:30:45,157 - root - INFO - step: 34205 loss: 1.9115 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 19:30:45,157 - root - INFO - lr: 7.3361e-06 gnorm: 1.18 [20:56:33< 3:32:53] +[titan] 2025-10-05 19:30:56,027 - root - INFO - step: 34210 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 19:30:56,027 - root - INFO - lr: 7.3321e-06 gnorm: 1.19 [20:56:44< 3:32:42] +[titan] 2025-10-05 19:31:06,908 - root - INFO - step: 34215 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 19:31:06,908 - root - INFO - lr: 7.3281e-06 gnorm: 1.17 [20:56:55< 3:32:31] +[titan] 2025-10-05 19:31:17,775 - root - INFO - step: 34220 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7272 +[titan] 2025-10-05 19:31:17,776 - root - INFO - lr: 7.3242e-06 gnorm: 1.20 [20:57:06< 3:32:20] +[titan] 2025-10-05 19:31:28,639 - root - INFO - step: 34225 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 19:31:28,639 - root - INFO - lr: 7.3202e-06 gnorm: 1.19 [20:57:17< 3:32:09] +[titan] 2025-10-05 19:31:39,529 - root - INFO - step: 34230 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:31:39,529 - root - INFO - lr: 7.3163e-06 gnorm: 1.20 [20:57:28< 3:31:57] +[titan] 2025-10-05 19:31:50,429 - root - INFO - step: 34235 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 19:31:50,429 - root - INFO - lr: 7.3124e-06 gnorm: 1.19 [20:57:39< 3:31:46] +[titan] 2025-10-05 19:32:01,297 - root - INFO - step: 34240 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6972 +[titan] 2025-10-05 19:32:01,297 - root - INFO - lr: 7.3084e-06 gnorm: 1.22 [20:57:49< 3:31:35] +[titan] 2025-10-05 19:32:12,194 - root - INFO - step: 34245 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 19:32:12,194 - root - INFO - lr: 7.3045e-06 gnorm: 1.21 [20:58:00< 3:31:24] +[titan] 2025-10-05 19:32:20,872 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:32:23,059 - root - INFO - step: 34250 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 19:32:23,059 - root - INFO - lr: 7.3006e-06 gnorm: 1.18 [20:58:11< 3:31:13] +[titan] 2025-10-05 19:32:33,942 - root - INFO - step: 34255 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:32:33,942 - root - INFO - lr: 7.2966e-06 gnorm: 1.16 [20:58:22< 3:31:02] +[titan] 2025-10-05 19:32:44,861 - root - INFO - step: 34260 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7106 +[titan] 2025-10-05 19:32:44,861 - root - INFO - lr: 7.2927e-06 gnorm: 1.18 [20:58:33< 3:30:51] +[titan] 2025-10-05 19:32:55,734 - root - INFO - step: 34265 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 19:32:55,734 - root - INFO - lr: 7.2888e-06 gnorm: 1.16 [20:58:44< 3:30:40] +[titan] 2025-10-05 19:33:06,617 - root - INFO - step: 34270 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 19:33:06,617 - root - INFO - lr: 7.2849e-06 gnorm: 1.22 [20:58:55< 3:30:29] +[titan] 2025-10-05 19:33:17,521 - root - INFO - step: 34275 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 19:33:17,521 - root - INFO - lr: 7.2809e-06 gnorm: 1.19 [20:59:06< 3:30:18] +[titan] 2025-10-05 19:33:28,449 - root - INFO - step: 34280 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 19:33:28,449 - root - INFO - lr: 7.2770e-06 gnorm: 1.23 [20:59:17< 3:30:07] +[titan] 2025-10-05 19:33:39,328 - root - INFO - step: 34285 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:33:39,328 - root - INFO - lr: 7.2731e-06 gnorm: 1.17 [20:59:27< 3:29:56] +[titan] 2025-10-05 19:33:50,236 - root - INFO - step: 34290 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:33:50,236 - root - INFO - lr: 7.2692e-06 gnorm: 1.23 [20:59:38< 3:29:45] +[titan] 2025-10-05 19:34:01,108 - root - INFO - step: 34295 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 19:34:01,108 - root - INFO - lr: 7.2653e-06 gnorm: 1.17 [20:59:49< 3:29:34] +[titan] 2025-10-05 19:34:09,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:34:11,978 - root - INFO - step: 34300 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 19:34:11,978 - root - INFO - lr: 7.2614e-06 gnorm: 1.19 [21:00:00< 3:29:23] +[titan] 2025-10-05 19:34:20,927 - root - INFO - Dumping profiler traces at step 34304 +[titan] 2025-10-05 19:34:20,968 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:34:23,158 - root - INFO - step: 34305 loss: 1.8387 memory: 118.84GiB(85.28%) tps: 29,312 tflops: 406.65 mfu: 41.12% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6259 +[titan] 2025-10-05 19:34:23,158 - root - INFO - lr: 7.2575e-06 gnorm: 1.17 [21:00:11< 3:29:12] +[titan] 2025-10-05 19:34:34,056 - root - INFO - step: 34310 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 19:34:34,057 - root - INFO - lr: 7.2536e-06 gnorm: 1.17 [21:00:22< 3:29:01] +[titan] 2025-10-05 19:34:44,938 - root - INFO - step: 34315 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 19:34:44,938 - root - INFO - lr: 7.2497e-06 gnorm: 1.18 [21:00:33< 3:28:50] +[titan] 2025-10-05 19:34:55,805 - root - INFO - step: 34320 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:34:55,805 - root - INFO - lr: 7.2458e-06 gnorm: 1.22 [21:00:44< 3:28:39] +[titan] 2025-10-05 19:35:06,664 - root - INFO - step: 34325 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 19:35:06,664 - root - INFO - lr: 7.2419e-06 gnorm: 1.19 [21:00:55< 3:28:28] +[titan] 2025-10-05 19:35:17,530 - root - INFO - step: 34330 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 19:35:17,530 - root - INFO - lr: 7.2381e-06 gnorm: 1.18 [21:01:06< 3:28:17] +[titan] 2025-10-05 19:35:28,388 - root - INFO - step: 34335 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 19:35:28,388 - root - INFO - lr: 7.2342e-06 gnorm: 1.27 [21:01:17< 3:28:06] +[titan] 2025-10-05 19:35:39,210 - root - INFO - step: 34340 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 19:35:39,210 - root - INFO - lr: 7.2303e-06 gnorm: 1.23 [21:01:27< 3:27:55] +[titan] 2025-10-05 19:35:50,071 - root - INFO - step: 34345 loss: 1.9981 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7671 +[titan] 2025-10-05 19:35:50,072 - root - INFO - lr: 7.2264e-06 gnorm: 1.20 [21:01:38< 3:27:44] +[titan] 2025-10-05 19:35:58,755 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:36:00,937 - root - INFO - step: 34350 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:00,937 - root - INFO - lr: 7.2226e-06 gnorm: 1.19 [21:01:49< 3:27:32] +[titan] 2025-10-05 19:36:11,779 - root - INFO - step: 34355 loss: 1.9721 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:36:11,779 - root - INFO - lr: 7.2187e-06 gnorm: 1.22 [21:02:00< 3:27:21] +[titan] 2025-10-05 19:36:22,618 - root - INFO - step: 34360 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 19:36:22,618 - root - INFO - lr: 7.2148e-06 gnorm: 1.19 [21:02:11< 3:27:10] +[titan] 2025-10-05 19:36:33,472 - root - INFO - step: 34365 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 19:36:33,472 - root - INFO - lr: 7.2110e-06 gnorm: 1.16 [21:02:22< 3:26:59] +[titan] 2025-10-05 19:36:44,328 - root - INFO - step: 34370 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:44,329 - root - INFO - lr: 7.2071e-06 gnorm: 1.17 [21:02:32< 3:26:48] +[titan] 2025-10-05 19:36:55,235 - root - INFO - step: 34375 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 19:36:55,235 - root - INFO - lr: 7.2033e-06 gnorm: 1.14 [21:02:43< 3:26:37] +[titan] 2025-10-05 19:37:06,084 - root - INFO - step: 34380 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:37:06,084 - root - INFO - lr: 7.1994e-06 gnorm: 1.17 [21:02:54< 3:26:26] +[titan] 2025-10-05 19:37:16,961 - root - INFO - step: 34385 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7555 +[titan] 2025-10-05 19:37:16,961 - root - INFO - lr: 7.1956e-06 gnorm: 1.22 [21:03:05< 3:26:15] +[titan] 2025-10-05 19:37:27,815 - root - INFO - step: 34390 loss: 2.0305 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 19:37:27,815 - root - INFO - lr: 7.1917e-06 gnorm: 1.20 [21:03:16< 3:26:04] +[titan] 2025-10-05 19:37:38,670 - root - INFO - step: 34395 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 19:37:38,670 - root - INFO - lr: 7.1879e-06 gnorm: 1.19 [21:03:27< 3:25:53] +[titan] 2025-10-05 19:37:47,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:37:49,563 - root - INFO - step: 34400 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:37:49,563 - root - INFO - lr: 7.1840e-06 gnorm: 1.23 [21:03:38< 3:25:42] +[titan] 2025-10-05 19:38:00,476 - root - INFO - step: 34405 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:38:00,476 - root - INFO - lr: 7.1802e-06 gnorm: 1.21 [21:03:49< 3:25:31] +[titan] 2025-10-05 19:38:11,337 - root - INFO - step: 34410 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7067 +[titan] 2025-10-05 19:38:11,338 - root - INFO - lr: 7.1764e-06 gnorm: 1.16 [21:03:59< 3:25:20] +[titan] 2025-10-05 19:38:22,210 - root - INFO - step: 34415 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 19:38:22,210 - root - INFO - lr: 7.1726e-06 gnorm: 1.20 [21:04:10< 3:25:09] +[titan] 2025-10-05 19:38:33,092 - root - INFO - step: 34420 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:38:33,092 - root - INFO - lr: 7.1687e-06 gnorm: 1.23 [21:04:21< 3:24:58] +[titan] 2025-10-05 19:38:43,954 - root - INFO - step: 34425 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 19:38:43,954 - root - INFO - lr: 7.1649e-06 gnorm: 1.19 [21:04:32< 3:24:47] +[titan] 2025-10-05 19:38:54,847 - root - INFO - step: 34430 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 19:38:54,847 - root - INFO - lr: 7.1611e-06 gnorm: 1.22 [21:04:43< 3:24:36] +[titan] 2025-10-05 19:39:05,711 - root - INFO - step: 34435 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:39:05,711 - root - INFO - lr: 7.1573e-06 gnorm: 1.18 [21:04:54< 3:24:25] +[titan] 2025-10-05 19:39:16,607 - root - INFO - step: 34440 loss: 1.9084 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 19:39:16,607 - root - INFO - lr: 7.1535e-06 gnorm: 1.15 [21:05:05< 3:24:14] +[titan] 2025-10-05 19:39:27,468 - root - INFO - step: 34445 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 19:39:27,469 - root - INFO - lr: 7.1497e-06 gnorm: 1.21 [21:05:16< 3:24:03] +[titan] 2025-10-05 19:39:36,132 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:39:38,309 - root - INFO - step: 34450 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 19:39:38,309 - root - INFO - lr: 7.1458e-06 gnorm: 1.19 [21:05:26< 3:23:52] +[titan] 2025-10-05 19:39:49,168 - root - INFO - step: 34455 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7614 +[titan] 2025-10-05 19:39:49,168 - root - INFO - lr: 7.1420e-06 gnorm: 1.22 [21:05:37< 3:23:41] +[titan] 2025-10-05 19:39:59,988 - root - INFO - step: 34460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:39:59,988 - root - INFO - lr: 7.1382e-06 gnorm: 1.18 [21:05:48< 3:23:29] +[titan] 2025-10-05 19:40:10,837 - root - INFO - step: 34465 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:40:10,837 - root - INFO - lr: 7.1345e-06 gnorm: 1.20 [21:05:59< 3:23:18] +[titan] 2025-10-05 19:40:21,711 - root - INFO - step: 34470 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 19:40:21,711 - root - INFO - lr: 7.1307e-06 gnorm: 1.24 [21:06:10< 3:23:07] +[titan] 2025-10-05 19:40:32,577 - root - INFO - step: 34475 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 19:40:32,577 - root - INFO - lr: 7.1269e-06 gnorm: 1.26 [21:06:21< 3:22:56] +[titan] 2025-10-05 19:40:43,432 - root - INFO - step: 34480 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6998 +[titan] 2025-10-05 19:40:43,432 - root - INFO - lr: 7.1231e-06 gnorm: 1.19 [21:06:32< 3:22:45] +[titan] 2025-10-05 19:40:54,326 - root - INFO - step: 34485 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:40:54,326 - root - INFO - lr: 7.1193e-06 gnorm: 1.19 [21:06:42< 3:22:34] +[titan] 2025-10-05 19:41:05,190 - root - INFO - step: 34490 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 19:41:05,190 - root - INFO - lr: 7.1155e-06 gnorm: 1.19 [21:06:53< 3:22:23] +[titan] 2025-10-05 19:41:16,016 - root - INFO - step: 34495 loss: 1.9452 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7197 +[titan] 2025-10-05 19:41:16,016 - root - INFO - lr: 7.1117e-06 gnorm: 1.28 [21:07:04< 3:22:12] +[titan] 2025-10-05 19:41:24,679 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:41:26,864 - root - INFO - step: 34500 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 19:41:26,864 - root - INFO - lr: 7.1080e-06 gnorm: 1.20 [21:07:15< 3:22:01] +[titan] 2025-10-05 19:41:37,746 - root - INFO - step: 34505 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:41:37,746 - root - INFO - lr: 7.1042e-06 gnorm: 1.20 [21:07:26< 3:21:50] +[titan] 2025-10-05 19:41:48,592 - root - INFO - step: 34510 loss: 1.9716 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 19:41:48,592 - root - INFO - lr: 7.1004e-06 gnorm: 1.23 [21:07:37< 3:21:39] +[titan] 2025-10-05 19:41:59,472 - root - INFO - step: 34515 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 19:41:59,472 - root - INFO - lr: 7.0967e-06 gnorm: 1.19 [21:07:48< 3:21:28] +[titan] 2025-10-05 19:42:10,331 - root - INFO - step: 34520 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 19:42:10,331 - root - INFO - lr: 7.0929e-06 gnorm: 1.21 [21:07:58< 3:21:17] +[titan] 2025-10-05 19:42:21,195 - root - INFO - step: 34525 loss: 1.8598 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6441 +[titan] 2025-10-05 19:42:21,195 - root - INFO - lr: 7.0892e-06 gnorm: 1.20 [21:08:09< 3:21:06] +[titan] 2025-10-05 19:42:32,043 - root - INFO - step: 34530 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 19:42:32,043 - root - INFO - lr: 7.0854e-06 gnorm: 1.18 [21:08:20< 3:20:55] +[titan] 2025-10-05 19:42:42,933 - root - INFO - step: 34535 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:42:42,933 - root - INFO - lr: 7.0816e-06 gnorm: 1.17 [21:08:31< 3:20:44] +[titan] 2025-10-05 19:42:53,805 - root - INFO - step: 34540 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 19:42:53,805 - root - INFO - lr: 7.0779e-06 gnorm: 1.20 [21:08:42< 3:20:33] +[titan] 2025-10-05 19:43:04,676 - root - INFO - step: 34545 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:43:04,676 - root - INFO - lr: 7.0742e-06 gnorm: 1.19 [21:08:53< 3:20:22] +[titan] 2025-10-05 19:43:13,386 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:43:15,579 - root - INFO - step: 34550 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7242 +[titan] 2025-10-05 19:43:15,579 - root - INFO - lr: 7.0704e-06 gnorm: 1.21 [21:09:04< 3:20:11] +[titan] 2025-10-05 19:43:26,490 - root - INFO - step: 34555 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:43:26,490 - root - INFO - lr: 7.0667e-06 gnorm: 1.18 [21:09:15< 3:20:00] +[titan] 2025-10-05 19:43:37,391 - root - INFO - step: 34560 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8232 +[titan] 2025-10-05 19:43:37,391 - root - INFO - lr: 7.0629e-06 gnorm: 4.37 [21:09:26< 3:19:49] +[titan] 2025-10-05 19:43:48,315 - root - INFO - step: 34565 loss: 1.9033 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 19:43:48,315 - root - INFO - lr: 7.0592e-06 gnorm: 1.24 [21:09:36< 3:19:38] +[titan] 2025-10-05 19:43:59,252 - root - INFO - step: 34570 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 19:43:59,252 - root - INFO - lr: 7.0555e-06 gnorm: 1.17 [21:09:47< 3:19:27] +[titan] 2025-10-05 19:44:10,131 - root - INFO - step: 34575 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 19:44:10,131 - root - INFO - lr: 7.0518e-06 gnorm: 1.21 [21:09:58< 3:19:15] +[titan] 2025-10-05 19:44:20,965 - root - INFO - step: 34580 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7717 +[titan] 2025-10-05 19:44:20,965 - root - INFO - lr: 7.0480e-06 gnorm: 1.26 [21:10:09< 3:19:04] +[titan] 2025-10-05 19:44:31,829 - root - INFO - step: 34585 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 19:44:31,829 - root - INFO - lr: 7.0443e-06 gnorm: 1.19 [21:10:20< 3:18:53] +[titan] 2025-10-05 19:44:42,679 - root - INFO - step: 34590 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.7230 +[titan] 2025-10-05 19:44:42,680 - root - INFO - lr: 7.0406e-06 gnorm: 2.68 [21:10:31< 3:18:42] +[titan] 2025-10-05 19:44:53,560 - root - INFO - step: 34595 loss: 1.8805 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 19:44:53,561 - root - INFO - lr: 7.0369e-06 gnorm: 1.23 [21:10:42< 3:18:31] +[titan] 2025-10-05 19:45:02,272 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:45:04,448 - root - INFO - step: 34600 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 19:45:04,448 - root - INFO - lr: 7.0332e-06 gnorm: 1.18 [21:10:53< 3:18:20] +[titan] 2025-10-05 19:45:15,326 - root - INFO - step: 34605 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:45:15,326 - root - INFO - lr: 7.0295e-06 gnorm: 1.19 [21:11:03< 3:18:09] +[titan] 2025-10-05 19:45:26,191 - root - INFO - step: 34610 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:45:26,191 - root - INFO - lr: 7.0258e-06 gnorm: 1.20 [21:11:14< 3:17:58] +[titan] 2025-10-05 19:45:37,037 - root - INFO - step: 34615 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:45:37,037 - root - INFO - lr: 7.0221e-06 gnorm: 1.17 [21:11:25< 3:17:47] +[titan] 2025-10-05 19:45:47,905 - root - INFO - step: 34620 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:45:47,905 - root - INFO - lr: 7.0184e-06 gnorm: 1.23 [21:11:36< 3:17:36] +[titan] 2025-10-05 19:45:58,807 - root - INFO - step: 34625 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 19:45:58,807 - root - INFO - lr: 7.0147e-06 gnorm: 1.25 [21:11:47< 3:17:25] +[titan] 2025-10-05 19:46:09,704 - root - INFO - step: 34630 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 19:46:09,704 - root - INFO - lr: 7.0110e-06 gnorm: 1.24 [21:11:58< 3:17:14] +[titan] 2025-10-05 19:46:20,566 - root - INFO - step: 34635 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 19:46:20,566 - root - INFO - lr: 7.0073e-06 gnorm: 1.25 [21:12:09< 3:17:03] +[titan] 2025-10-05 19:46:31,407 - root - INFO - step: 34640 loss: 1.9051 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 19:46:31,407 - root - INFO - lr: 7.0036e-06 gnorm: 1.18 [21:12:20< 3:16:52] +[titan] 2025-10-05 19:46:42,249 - root - INFO - step: 34645 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 19:46:42,249 - root - INFO - lr: 6.9999e-06 gnorm: 1.17 [21:12:30< 3:16:41] +[titan] 2025-10-05 19:46:50,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:46:53,097 - root - INFO - step: 34650 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7558 +[titan] 2025-10-05 19:46:53,097 - root - INFO - lr: 6.9963e-06 gnorm: 1.18 [21:12:41< 3:16:30] +[titan] 2025-10-05 19:47:03,992 - root - INFO - step: 34655 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 19:47:03,992 - root - INFO - lr: 6.9926e-06 gnorm: 1.23 [21:12:52< 3:16:19] +[titan] 2025-10-05 19:47:14,867 - root - INFO - step: 34660 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 19:47:14,867 - root - INFO - lr: 6.9889e-06 gnorm: 1.29 [21:13:03< 3:16:08] +[titan] 2025-10-05 19:47:25,759 - root - INFO - step: 34665 loss: 1.9370 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 19:47:25,759 - root - INFO - lr: 6.9853e-06 gnorm: 1.19 [21:13:14< 3:15:57] +[titan] 2025-10-05 19:47:36,638 - root - INFO - step: 34670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 19:47:36,639 - root - INFO - lr: 6.9816e-06 gnorm: 1.16 [21:13:25< 3:15:46] +[titan] 2025-10-05 19:47:47,526 - root - INFO - step: 34675 loss: 1.9202 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:47:47,526 - root - INFO - lr: 6.9779e-06 gnorm: 1.19 [21:13:36< 3:15:35] +[titan] 2025-10-05 19:47:58,418 - root - INFO - step: 34680 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 19:47:58,418 - root - INFO - lr: 6.9743e-06 gnorm: 1.20 [21:13:47< 3:15:24] +[titan] 2025-10-05 19:48:09,256 - root - INFO - step: 34685 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6999 +[titan] 2025-10-05 19:48:09,256 - root - INFO - lr: 6.9706e-06 gnorm: 1.21 [21:13:57< 3:15:13] +[titan] 2025-10-05 19:48:20,111 - root - INFO - step: 34690 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 19:48:20,111 - root - INFO - lr: 6.9670e-06 gnorm: 1.22 [21:14:08< 3:15:02] +[titan] 2025-10-05 19:48:31,007 - root - INFO - step: 34695 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 19:48:31,007 - root - INFO - lr: 6.9633e-06 gnorm: 1.22 [21:14:19< 3:14:50] +[titan] 2025-10-05 19:48:39,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:48:41,855 - root - INFO - step: 34700 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 19:48:41,855 - root - INFO - lr: 6.9597e-06 gnorm: 1.18 [21:14:30< 3:14:39] +[titan] 2025-10-05 19:48:52,713 - root - INFO - step: 34705 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7370 +[titan] 2025-10-05 19:48:52,713 - root - INFO - lr: 6.9560e-06 gnorm: 1.17 [21:14:41< 3:14:28] +[titan] 2025-10-05 19:49:03,608 - root - INFO - step: 34710 loss: 1.9120 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6899 +[titan] 2025-10-05 19:49:03,608 - root - INFO - lr: 6.9524e-06 gnorm: 1.17 [21:14:52< 3:14:17] +[titan] 2025-10-05 19:49:14,465 - root - INFO - step: 34715 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:49:14,465 - root - INFO - lr: 6.9488e-06 gnorm: 1.22 [21:15:03< 3:14:06] +[titan] 2025-10-05 19:49:25,305 - root - INFO - step: 34720 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:49:25,305 - root - INFO - lr: 6.9451e-06 gnorm: 1.21 [21:15:13< 3:13:55] +[titan] 2025-10-05 19:49:36,189 - root - INFO - step: 34725 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:49:36,189 - root - INFO - lr: 6.9415e-06 gnorm: 1.18 [21:15:24< 3:13:44] +[titan] 2025-10-05 19:49:47,060 - root - INFO - step: 34730 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 19:49:47,060 - root - INFO - lr: 6.9379e-06 gnorm: 1.21 [21:15:35< 3:13:33] +[titan] 2025-10-05 19:49:57,949 - root - INFO - step: 34735 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 19:49:57,949 - root - INFO - lr: 6.9343e-06 gnorm: 1.21 [21:15:46< 3:13:22] +[titan] 2025-10-05 19:50:08,803 - root - INFO - step: 34740 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 19:50:08,803 - root - INFO - lr: 6.9306e-06 gnorm: 1.24 [21:15:57< 3:13:11] +[titan] 2025-10-05 19:50:19,673 - root - INFO - step: 34745 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 19:50:19,673 - root - INFO - lr: 6.9270e-06 gnorm: 1.26 [21:16:08< 3:13:00] +[titan] 2025-10-05 19:50:28,354 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:50:30,535 - root - INFO - step: 34750 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7233 +[titan] 2025-10-05 19:50:30,535 - root - INFO - lr: 6.9234e-06 gnorm: 1.24 [21:16:19< 3:12:49] +[titan] 2025-10-05 19:50:41,406 - root - INFO - step: 34755 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:50:41,406 - root - INFO - lr: 6.9198e-06 gnorm: 1.19 [21:16:30< 3:12:38] +[titan] 2025-10-05 19:50:52,304 - root - INFO - step: 34760 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 19:50:52,304 - root - INFO - lr: 6.9162e-06 gnorm: 1.19 [21:16:40< 3:12:27] +[titan] 2025-10-05 19:51:03,222 - root - INFO - step: 34765 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 19:51:03,223 - root - INFO - lr: 6.9126e-06 gnorm: 1.23 [21:16:51< 3:12:16] +[titan] 2025-10-05 19:51:14,086 - root - INFO - step: 34770 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:51:14,086 - root - INFO - lr: 6.9090e-06 gnorm: 1.16 [21:17:02< 3:12:05] +[titan] 2025-10-05 19:51:24,963 - root - INFO - step: 34775 loss: 1.9641 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 19:51:24,963 - root - INFO - lr: 6.9054e-06 gnorm: 1.22 [21:17:13< 3:11:54] +[titan] 2025-10-05 19:51:35,828 - root - INFO - step: 34780 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 19:51:35,828 - root - INFO - lr: 6.9018e-06 gnorm: 1.21 [21:17:24< 3:11:43] +[titan] 2025-10-05 19:51:46,685 - root - INFO - step: 34785 loss: 1.9053 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 19:51:46,685 - root - INFO - lr: 6.8982e-06 gnorm: 1.20 [21:17:35< 3:11:32] +[titan] 2025-10-05 19:51:57,587 - root - INFO - step: 34790 loss: 1.9201 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 19:51:57,587 - root - INFO - lr: 6.8946e-06 gnorm: 1.18 [21:17:46< 3:11:21] +[titan] 2025-10-05 19:52:08,485 - root - INFO - step: 34795 loss: 1.9967 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 19:52:08,485 - root - INFO - lr: 6.8910e-06 gnorm: 1.22 [21:17:57< 3:11:10] +[titan] 2025-10-05 19:52:17,185 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:52:19,378 - root - INFO - step: 34800 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 19:52:19,378 - root - INFO - lr: 6.8875e-06 gnorm: 1.20 [21:18:07< 3:10:59] +[titan] 2025-10-05 19:52:30,261 - root - INFO - step: 34805 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 19:52:30,261 - root - INFO - lr: 6.8839e-06 gnorm: 1.19 [21:18:18< 3:10:48] +[titan] 2025-10-05 19:52:41,146 - root - INFO - step: 34810 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 19:52:41,146 - root - INFO - lr: 6.8803e-06 gnorm: 1.20 [21:18:29< 3:10:37] +[titan] 2025-10-05 19:52:52,095 - root - INFO - step: 34815 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7667 +[titan] 2025-10-05 19:52:52,095 - root - INFO - lr: 6.8767e-06 gnorm: 1.23 [21:18:40< 3:10:26] +[titan] 2025-10-05 19:52:54,455 - root - INFO - Dumping profiler traces at step 34816 +[titan] 2025-10-05 19:52:54,493 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:53:03,216 - root - INFO - step: 34820 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 29,466 tflops: 408.80 mfu: 41.33% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:53:03,216 - root - INFO - lr: 6.8732e-06 gnorm: 1.20 [21:18:51< 3:10:15] +[titan] 2025-10-05 19:53:14,080 - root - INFO - step: 34825 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7156 +[titan] 2025-10-05 19:53:14,080 - root - INFO - lr: 6.8696e-06 gnorm: 1.17 [21:19:02< 3:10:03] +[titan] 2025-10-05 19:53:24,945 - root - INFO - step: 34830 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 19:53:24,945 - root - INFO - lr: 6.8661e-06 gnorm: 1.15 [21:19:13< 3:09:52] +[titan] 2025-10-05 19:53:35,780 - root - INFO - step: 34835 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 19:53:35,780 - root - INFO - lr: 6.8625e-06 gnorm: 1.16 [21:19:24< 3:09:41] +[titan] 2025-10-05 19:53:46,625 - root - INFO - step: 34840 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7678 +[titan] 2025-10-05 19:53:46,626 - root - INFO - lr: 6.8589e-06 gnorm: 1.21 [21:19:35< 3:09:30] +[titan] 2025-10-05 19:53:57,479 - root - INFO - step: 34845 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:53:57,479 - root - INFO - lr: 6.8554e-06 gnorm: 1.18 [21:19:46< 3:09:19] +[titan] 2025-10-05 19:54:06,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:54:08,348 - root - INFO - step: 34850 loss: 2.0208 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 19:54:08,349 - root - INFO - lr: 6.8518e-06 gnorm: 1.22 [21:19:56< 3:09:08] +[titan] 2025-10-05 19:54:19,236 - root - INFO - step: 34855 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 19:54:19,236 - root - INFO - lr: 6.8483e-06 gnorm: 1.19 [21:20:07< 3:08:57] +[titan] 2025-10-05 19:54:30,115 - root - INFO - step: 34860 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 19:54:30,115 - root - INFO - lr: 6.8448e-06 gnorm: 1.17 [21:20:18< 3:08:46] +[titan] 2025-10-05 19:54:40,989 - root - INFO - step: 34865 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 19:54:40,989 - root - INFO - lr: 6.8412e-06 gnorm: 1.22 [21:20:29< 3:08:35] +[titan] 2025-10-05 19:54:51,840 - root - INFO - step: 34870 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:54:51,840 - root - INFO - lr: 6.8377e-06 gnorm: 1.23 [21:20:40< 3:08:24] +[titan] 2025-10-05 19:55:02,739 - root - INFO - step: 34875 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:55:02,739 - root - INFO - lr: 6.8342e-06 gnorm: 1.18 [21:20:51< 3:08:13] +[titan] 2025-10-05 19:55:13,616 - root - INFO - step: 34880 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:55:13,616 - root - INFO - lr: 6.8306e-06 gnorm: 1.19 [21:21:02< 3:08:02] +[titan] 2025-10-05 19:55:24,502 - root - INFO - step: 34885 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7556 +[titan] 2025-10-05 19:55:24,502 - root - INFO - lr: 6.8271e-06 gnorm: 1.20 [21:21:13< 3:07:51] +[titan] 2025-10-05 19:55:35,390 - root - INFO - step: 34890 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 19:55:35,390 - root - INFO - lr: 6.8236e-06 gnorm: 1.20 [21:21:23< 3:07:40] +[titan] 2025-10-05 19:55:46,234 - root - INFO - step: 34895 loss: 1.9281 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 19:55:46,234 - root - INFO - lr: 6.8201e-06 gnorm: 1.21 [21:21:34< 3:07:29] +[titan] 2025-10-05 19:55:54,939 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:55:57,124 - root - INFO - step: 34900 loss: 1.9752 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 19:55:57,124 - root - INFO - lr: 6.8166e-06 gnorm: 1.22 [21:21:45< 3:07:18] +[titan] 2025-10-05 19:56:07,979 - root - INFO - step: 34905 loss: 1.8773 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6604 +[titan] 2025-10-05 19:56:07,979 - root - INFO - lr: 6.8130e-06 gnorm: 1.27 [21:21:56< 3:07:07] +[titan] 2025-10-05 19:56:18,858 - root - INFO - step: 34910 loss: 1.9375 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:56:18,858 - root - INFO - lr: 6.8095e-06 gnorm: 1.28 [21:22:07< 3:06:56] +[titan] 2025-10-05 19:56:29,723 - root - INFO - step: 34915 loss: 1.9603 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:56:29,723 - root - INFO - lr: 6.8060e-06 gnorm: 1.20 [21:22:18< 3:06:45] +[titan] 2025-10-05 19:56:40,632 - root - INFO - step: 34920 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:56:40,632 - root - INFO - lr: 6.8025e-06 gnorm: 1.20 [21:22:29< 3:06:34] +[titan] 2025-10-05 19:56:51,542 - root - INFO - step: 34925 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 19:56:51,542 - root - INFO - lr: 6.7990e-06 gnorm: 1.19 [21:22:40< 3:06:23] +[titan] 2025-10-05 19:57:02,433 - root - INFO - step: 34930 loss: 1.8978 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6780 +[titan] 2025-10-05 19:57:02,433 - root - INFO - lr: 6.7955e-06 gnorm: 1.19 [21:22:51< 3:06:12] +[titan] 2025-10-05 19:57:13,339 - root - INFO - step: 34935 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 19:57:13,339 - root - INFO - lr: 6.7920e-06 gnorm: 1.19 [21:23:01< 3:06:01] +[titan] 2025-10-05 19:57:24,225 - root - INFO - step: 34940 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 19:57:24,225 - root - INFO - lr: 6.7886e-06 gnorm: 1.21 [21:23:12< 3:05:50] +[titan] 2025-10-05 19:57:35,111 - root - INFO - step: 34945 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:57:35,112 - root - INFO - lr: 6.7851e-06 gnorm: 1.20 [21:23:23< 3:05:39] +[titan] 2025-10-05 19:57:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:57:46,109 - root - INFO - step: 34950 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 29,796 tflops: 413.37 mfu: 41.80% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:57:46,109 - root - INFO - lr: 6.7816e-06 gnorm: 1.21 [21:23:34< 3:05:28] +[titan] 2025-10-05 19:57:56,976 - root - INFO - step: 34955 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 19:57:56,976 - root - INFO - lr: 6.7781e-06 gnorm: 1.21 [21:23:45< 3:05:16] +[titan] 2025-10-05 19:58:07,860 - root - INFO - step: 34960 loss: 1.8843 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 19:58:07,860 - root - INFO - lr: 6.7746e-06 gnorm: 1.18 [21:23:56< 3:05:05] +[titan] 2025-10-05 19:58:18,737 - root - INFO - step: 34965 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 19:58:18,737 - root - INFO - lr: 6.7712e-06 gnorm: 1.21 [21:24:07< 3:04:54] +[titan] 2025-10-05 19:58:29,592 - root - INFO - step: 34970 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 19:58:29,592 - root - INFO - lr: 6.7677e-06 gnorm: 2.00 [21:24:18< 3:04:43] +[titan] 2025-10-05 19:58:40,452 - root - INFO - step: 34975 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 19:58:40,453 - root - INFO - lr: 6.7642e-06 gnorm: 1.24 [21:24:29< 3:04:32] +[titan] 2025-10-05 19:58:51,317 - root - INFO - step: 34980 loss: 1.8424 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6294 +[titan] 2025-10-05 19:58:51,317 - root - INFO - lr: 6.7608e-06 gnorm: 1.20 [21:24:39< 3:04:21] +[titan] 2025-10-05 19:59:02,209 - root - INFO - step: 34985 loss: 2.0210 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 19:59:02,209 - root - INFO - lr: 6.7573e-06 gnorm: 1.25 [21:24:50< 3:04:10] +[titan] 2025-10-05 19:59:13,085 - root - INFO - step: 34990 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 19:59:13,085 - root - INFO - lr: 6.7538e-06 gnorm: 1.21 [21:25:01< 3:03:59] +[titan] 2025-10-05 19:59:23,963 - root - INFO - step: 34995 loss: 1.9729 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7447 +[titan] 2025-10-05 19:59:23,964 - root - INFO - lr: 6.7504e-06 gnorm: 1.20 [21:25:12< 3:03:48] +[titan] 2025-10-05 19:59:32,670 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:59:34,852 - root - INFO - step: 35000 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 19:59:34,852 - root - INFO - lr: 6.7469e-06 gnorm: 1.20 [21:25:23< 3:03:37] +[titan] 2025-10-05 19:59:34,852 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 19:59:52,575 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 19:59:52,576 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.72 seconds. +[titan] 2025-10-05 20:02:00,815 - root - INFO - step: 35005 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 2,245 tflops: 31.15 mfu: 3.15% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 20:02:00,815 - root - INFO - lr: 6.7435e-06 gnorm: 1.17 [21:27:49< 3:03:45] +[titan] 2025-10-05 20:02:11,608 - root - INFO - step: 35010 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,362 tflops: 421.22 mfu: 42.59% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 20:02:11,608 - root - INFO - lr: 6.7401e-06 gnorm: 1.25 [21:28:00< 3:03:34] +[titan] 2025-10-05 20:02:22,413 - root - INFO - step: 35015 loss: 1.8869 memory: 118.84GiB(85.28%) tps: 30,329 tflops: 420.77 mfu: 42.55% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 20:02:22,413 - root - INFO - lr: 6.7366e-06 gnorm: 1.21 [21:28:10< 3:03:23] +[titan] 2025-10-05 20:02:33,281 - root - INFO - step: 35020 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 20:02:33,281 - root - INFO - lr: 6.7332e-06 gnorm: 1.19 [21:28:21< 3:03:12] +[titan] 2025-10-05 20:02:44,100 - root - INFO - step: 35025 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:02:44,100 - root - INFO - lr: 6.7297e-06 gnorm: 1.17 [21:28:32< 3:03:01] +[titan] 2025-10-05 20:02:54,948 - root - INFO - step: 35030 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 20:02:54,948 - root - INFO - lr: 6.7263e-06 gnorm: 1.23 [21:28:43< 3:02:50] +[titan] 2025-10-05 20:03:05,780 - root - INFO - step: 35035 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:03:05,780 - root - INFO - lr: 6.7229e-06 gnorm: 1.24 [21:28:54< 3:02:39] +[titan] 2025-10-05 20:03:16,638 - root - INFO - step: 35040 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 20:03:16,638 - root - INFO - lr: 6.7195e-06 gnorm: 1.23 [21:29:05< 3:02:28] +[titan] 2025-10-05 20:03:27,560 - root - INFO - step: 35045 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 20:03:27,560 - root - INFO - lr: 6.7160e-06 gnorm: 1.19 [21:29:16< 3:02:17] +[titan] 2025-10-05 20:03:36,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:03:38,413 - root - INFO - step: 35050 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7064 +[titan] 2025-10-05 20:03:38,413 - root - INFO - lr: 6.7126e-06 gnorm: 1.23 [21:29:26< 3:02:06] +[titan] 2025-10-05 20:03:49,265 - root - INFO - step: 35055 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 20:03:49,265 - root - INFO - lr: 6.7092e-06 gnorm: 1.18 [21:29:37< 3:01:55] +[titan] 2025-10-05 20:04:00,143 - root - INFO - step: 35060 loss: 1.9047 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6842 +[titan] 2025-10-05 20:04:00,143 - root - INFO - lr: 6.7058e-06 gnorm: 1.22 [21:29:48< 3:01:44] +[titan] 2025-10-05 20:04:11,001 - root - INFO - step: 35065 loss: 1.8697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 20:04:11,001 - root - INFO - lr: 6.7024e-06 gnorm: 1.21 [21:29:59< 3:01:33] +[titan] 2025-10-05 20:04:21,863 - root - INFO - step: 35070 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:04:21,863 - root - INFO - lr: 6.6990e-06 gnorm: 1.24 [21:30:10< 3:01:22] +[titan] 2025-10-05 20:04:32,799 - root - INFO - step: 35075 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 20:04:32,799 - root - INFO - lr: 6.6956e-06 gnorm: 1.21 [21:30:21< 3:01:10] +[titan] 2025-10-05 20:04:43,675 - root - INFO - step: 35080 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 20:04:43,675 - root - INFO - lr: 6.6922e-06 gnorm: 1.15 [21:30:32< 3:00:59] +[titan] 2025-10-05 20:04:54,541 - root - INFO - step: 35085 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 20:04:54,542 - root - INFO - lr: 6.6888e-06 gnorm: 1.19 [21:30:43< 3:00:48] +[titan] 2025-10-05 20:05:05,402 - root - INFO - step: 35090 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:05:05,402 - root - INFO - lr: 6.6854e-06 gnorm: 1.18 [21:30:53< 3:00:37] +[titan] 2025-10-05 20:05:16,263 - root - INFO - step: 35095 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:05:16,263 - root - INFO - lr: 6.6820e-06 gnorm: 1.22 [21:31:04< 3:00:26] +[titan] 2025-10-05 20:05:24,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:05:27,145 - root - INFO - step: 35100 loss: 1.9245 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7014 +[titan] 2025-10-05 20:05:27,145 - root - INFO - lr: 6.6786e-06 gnorm: 1.23 [21:31:15< 3:00:15] +[titan] 2025-10-05 20:05:38,035 - root - INFO - step: 35105 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 20:05:38,035 - root - INFO - lr: 6.6753e-06 gnorm: 1.17 [21:31:26< 3:00:04] +[titan] 2025-10-05 20:05:48,877 - root - INFO - step: 35110 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7243 +[titan] 2025-10-05 20:05:48,877 - root - INFO - lr: 6.6719e-06 gnorm: 1.15 [21:31:37< 2:59:53] +[titan] 2025-10-05 20:05:59,749 - root - INFO - step: 35115 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:05:59,749 - root - INFO - lr: 6.6685e-06 gnorm: 1.20 [21:31:48< 2:59:42] +[titan] 2025-10-05 20:06:10,605 - root - INFO - step: 35120 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:06:10,606 - root - INFO - lr: 6.6651e-06 gnorm: 1.17 [21:31:59< 2:59:31] +[titan] 2025-10-05 20:06:21,451 - root - INFO - step: 35125 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:06:21,451 - root - INFO - lr: 6.6618e-06 gnorm: 1.20 [21:32:10< 2:59:20] +[titan] 2025-10-05 20:06:32,365 - root - INFO - step: 35130 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:06:32,365 - root - INFO - lr: 6.6584e-06 gnorm: 1.23 [21:32:20< 2:59:09] +[titan] 2025-10-05 20:06:43,231 - root - INFO - step: 35135 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7050 +[titan] 2025-10-05 20:06:43,232 - root - INFO - lr: 6.6550e-06 gnorm: 1.18 [21:32:31< 2:58:58] +[titan] 2025-10-05 20:06:54,140 - root - INFO - step: 35140 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 20:06:54,140 - root - INFO - lr: 6.6517e-06 gnorm: 1.23 [21:32:42< 2:58:47] +[titan] 2025-10-05 20:07:05,022 - root - INFO - step: 35145 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 20:07:05,022 - root - INFO - lr: 6.6483e-06 gnorm: 1.23 [21:32:53< 2:58:36] +[titan] 2025-10-05 20:07:13,699 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:07:15,932 - root - INFO - step: 35150 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6718 +[titan] 2025-10-05 20:07:15,932 - root - INFO - lr: 6.6450e-06 gnorm: 1.21 [21:33:04< 2:58:25] +[titan] 2025-10-05 20:07:26,828 - root - INFO - step: 35155 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:07:26,828 - root - INFO - lr: 6.6416e-06 gnorm: 1.16 [21:33:15< 2:58:14] +[titan] 2025-10-05 20:07:37,740 - root - INFO - step: 35160 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 20:07:37,740 - root - INFO - lr: 6.6383e-06 gnorm: 1.17 [21:33:26< 2:58:03] +[titan] 2025-10-05 20:07:48,623 - root - INFO - step: 35165 loss: 1.9332 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 20:07:48,623 - root - INFO - lr: 6.6349e-06 gnorm: 1.21 [21:33:37< 2:57:51] +[titan] 2025-10-05 20:07:59,524 - root - INFO - step: 35170 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 20:07:59,524 - root - INFO - lr: 6.6316e-06 gnorm: 1.20 [21:33:48< 2:57:40] +[titan] 2025-10-05 20:08:10,396 - root - INFO - step: 35175 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:08:10,396 - root - INFO - lr: 6.6283e-06 gnorm: 1.19 [21:33:58< 2:57:29] +[titan] 2025-10-05 20:08:21,269 - root - INFO - step: 35180 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:08:21,270 - root - INFO - lr: 6.6249e-06 gnorm: 1.19 [21:34:09< 2:57:18] +[titan] 2025-10-05 20:08:32,174 - root - INFO - step: 35185 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 20:08:32,175 - root - INFO - lr: 6.6216e-06 gnorm: 1.21 [21:34:20< 2:57:07] +[titan] 2025-10-05 20:08:43,054 - root - INFO - step: 35190 loss: 1.9950 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7626 +[titan] 2025-10-05 20:08:43,054 - root - INFO - lr: 6.6183e-06 gnorm: 1.21 [21:34:31< 2:56:56] +[titan] 2025-10-05 20:08:53,935 - root - INFO - step: 35195 loss: 1.9405 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7155 +[titan] 2025-10-05 20:08:53,935 - root - INFO - lr: 6.6150e-06 gnorm: 1.23 [21:34:42< 2:56:45] +[titan] 2025-10-05 20:09:02,614 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:09:04,794 - root - INFO - step: 35200 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 20:09:04,794 - root - INFO - lr: 6.6116e-06 gnorm: 1.17 [21:34:53< 2:56:34] +[titan] 2025-10-05 20:09:15,694 - root - INFO - step: 35205 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:09:15,695 - root - INFO - lr: 6.6083e-06 gnorm: 1.21 [21:35:04< 2:56:23] +[titan] 2025-10-05 20:09:26,591 - root - INFO - step: 35210 loss: 1.9224 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:09:26,591 - root - INFO - lr: 6.6050e-06 gnorm: 1.21 [21:35:15< 2:56:12] +[titan] 2025-10-05 20:09:37,512 - root - INFO - step: 35215 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 20:09:37,512 - root - INFO - lr: 6.6017e-06 gnorm: 1.22 [21:35:26< 2:56:01] +[titan] 2025-10-05 20:09:48,396 - root - INFO - step: 35220 loss: 1.9286 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7057 +[titan] 2025-10-05 20:09:48,396 - root - INFO - lr: 6.5984e-06 gnorm: 1.23 [21:35:36< 2:55:50] +[titan] 2025-10-05 20:09:59,291 - root - INFO - step: 35225 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 20:09:59,291 - root - INFO - lr: 6.5951e-06 gnorm: 1.22 [21:35:47< 2:55:39] +[titan] 2025-10-05 20:10:10,147 - root - INFO - step: 35230 loss: 1.9319 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 20:10:10,147 - root - INFO - lr: 6.5918e-06 gnorm: 1.26 [21:35:58< 2:55:28] +[titan] 2025-10-05 20:10:21,054 - root - INFO - step: 35235 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 20:10:21,055 - root - INFO - lr: 6.5885e-06 gnorm: 1.18 [21:36:09< 2:55:17] +[titan] 2025-10-05 20:10:31,940 - root - INFO - step: 35240 loss: 1.8612 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:10:31,940 - root - INFO - lr: 6.5852e-06 gnorm: 1.16 [21:36:20< 2:55:06] +[titan] 2025-10-05 20:10:42,806 - root - INFO - step: 35245 loss: 2.0002 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 20:10:42,807 - root - INFO - lr: 6.5819e-06 gnorm: 1.22 [21:36:31< 2:54:55] +[titan] 2025-10-05 20:10:51,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:10:53,701 - root - INFO - step: 35250 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 20:10:53,701 - root - INFO - lr: 6.5786e-06 gnorm: 1.21 [21:36:42< 2:54:43] +[titan] 2025-10-05 20:11:04,581 - root - INFO - step: 35255 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 20:11:04,581 - root - INFO - lr: 6.5754e-06 gnorm: 1.20 [21:36:53< 2:54:32] +[titan] 2025-10-05 20:11:15,487 - root - INFO - step: 35260 loss: 1.9259 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:11:15,487 - root - INFO - lr: 6.5721e-06 gnorm: 1.23 [21:37:04< 2:54:21] +[titan] 2025-10-05 20:11:26,398 - root - INFO - step: 35265 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:11:26,398 - root - INFO - lr: 6.5688e-06 gnorm: 1.23 [21:37:14< 2:54:10] +[titan] 2025-10-05 20:11:37,313 - root - INFO - step: 35270 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:11:37,314 - root - INFO - lr: 6.5655e-06 gnorm: 1.21 [21:37:25< 2:53:59] +[titan] 2025-10-05 20:11:48,214 - root - INFO - step: 35275 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 20:11:48,214 - root - INFO - lr: 6.5623e-06 gnorm: 1.24 [21:37:36< 2:53:48] +[titan] 2025-10-05 20:11:59,075 - root - INFO - step: 35280 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 20:11:59,075 - root - INFO - lr: 6.5590e-06 gnorm: 1.20 [21:37:47< 2:53:37] +[titan] 2025-10-05 20:12:09,938 - root - INFO - step: 35285 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 20:12:09,939 - root - INFO - lr: 6.5557e-06 gnorm: 1.21 [21:37:58< 2:53:26] +[titan] 2025-10-05 20:12:20,821 - root - INFO - step: 35290 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 20:12:20,821 - root - INFO - lr: 6.5525e-06 gnorm: 1.18 [21:38:09< 2:53:15] +[titan] 2025-10-05 20:12:31,713 - root - INFO - step: 35295 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 20:12:31,713 - root - INFO - lr: 6.5492e-06 gnorm: 1.23 [21:38:20< 2:53:04] +[titan] 2025-10-05 20:12:40,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:12:42,667 - root - INFO - step: 35300 loss: 1.9229 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:12:42,667 - root - INFO - lr: 6.5460e-06 gnorm: 1.23 [21:38:31< 2:52:53] +[titan] 2025-10-05 20:12:53,570 - root - INFO - step: 35305 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 20:12:53,570 - root - INFO - lr: 6.5427e-06 gnorm: 1.21 [21:38:42< 2:52:42] +[titan] 2025-10-05 20:13:04,452 - root - INFO - step: 35310 loss: 1.9317 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7088 +[titan] 2025-10-05 20:13:04,452 - root - INFO - lr: 6.5395e-06 gnorm: 1.22 [21:38:53< 2:52:31] +[titan] 2025-10-05 20:13:15,334 - root - INFO - step: 35315 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 20:13:15,334 - root - INFO - lr: 6.5362e-06 gnorm: 1.20 [21:39:03< 2:52:20] +[titan] 2025-10-05 20:13:26,220 - root - INFO - step: 35320 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 20:13:26,221 - root - INFO - lr: 6.5330e-06 gnorm: 1.19 [21:39:14< 2:52:09] +[titan] 2025-10-05 20:13:37,204 - root - INFO - step: 35325 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 29,834 tflops: 413.90 mfu: 41.85% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 20:13:37,205 - root - INFO - lr: 6.5297e-06 gnorm: 1.20 [21:39:25< 2:51:58] +[titan] 2025-10-05 20:13:43,910 - root - INFO - Dumping profiler traces at step 35328 +[titan] 2025-10-05 20:13:43,949 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:13:48,348 - root - INFO - step: 35330 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,406 tflops: 407.96 mfu: 41.25% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:13:48,348 - root - INFO - lr: 6.5265e-06 gnorm: 1.23 [21:39:36< 2:51:47] +[titan] 2025-10-05 20:13:59,245 - root - INFO - step: 35335 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7238 +[titan] 2025-10-05 20:13:59,245 - root - INFO - lr: 6.5233e-06 gnorm: 1.23 [21:39:47< 2:51:36] +[titan] 2025-10-05 20:14:10,148 - root - INFO - step: 35340 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:14:10,149 - root - INFO - lr: 6.5201e-06 gnorm: 1.24 [21:39:58< 2:51:25] +[titan] 2025-10-05 20:14:21,047 - root - INFO - step: 35345 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7363 +[titan] 2025-10-05 20:14:21,048 - root - INFO - lr: 6.5168e-06 gnorm: 1.20 [21:40:09< 2:51:14] +[titan] 2025-10-05 20:14:29,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:14:31,931 - root - INFO - step: 35350 loss: 1.9071 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:14:31,931 - root - INFO - lr: 6.5136e-06 gnorm: 1.22 [21:40:20< 2:51:02] +[titan] 2025-10-05 20:14:42,833 - root - INFO - step: 35355 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6814 +[titan] 2025-10-05 20:14:42,833 - root - INFO - lr: 6.5104e-06 gnorm: 1.18 [21:40:31< 2:50:51] +[titan] 2025-10-05 20:14:53,713 - root - INFO - step: 35360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 20:14:53,714 - root - INFO - lr: 6.5072e-06 gnorm: 1.22 [21:40:42< 2:50:40] +[titan] 2025-10-05 20:15:04,622 - root - INFO - step: 35365 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:15:04,622 - root - INFO - lr: 6.5040e-06 gnorm: 1.20 [21:40:53< 2:50:29] +[titan] 2025-10-05 20:15:15,532 - root - INFO - step: 35370 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:15:15,532 - root - INFO - lr: 6.5008e-06 gnorm: 1.21 [21:41:04< 2:50:18] +[titan] 2025-10-05 20:15:26,422 - root - INFO - step: 35375 loss: 1.9139 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 20:15:26,422 - root - INFO - lr: 6.4976e-06 gnorm: 1.20 [21:41:14< 2:50:07] +[titan] 2025-10-05 20:15:37,640 - root - INFO - step: 35380 loss: 1.9110 memory: 118.84GiB(85.28%) tps: 29,212 tflops: 405.27 mfu: 40.98% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:15:37,640 - root - INFO - lr: 6.4944e-06 gnorm: 1.19 [21:41:26< 2:49:56] +[titan] 2025-10-05 20:15:48,502 - root - INFO - step: 35385 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6841 +[titan] 2025-10-05 20:15:48,503 - root - INFO - lr: 6.4912e-06 gnorm: 1.22 [21:41:37< 2:49:45] +[titan] 2025-10-05 20:15:59,387 - root - INFO - step: 35390 loss: 1.9078 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 20:15:59,387 - root - INFO - lr: 6.4880e-06 gnorm: 1.27 [21:41:47< 2:49:34] +[titan] 2025-10-05 20:16:10,282 - root - INFO - step: 35395 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7479 +[titan] 2025-10-05 20:16:10,282 - root - INFO - lr: 6.4848e-06 gnorm: 1.21 [21:41:58< 2:49:23] +[titan] 2025-10-05 20:16:18,972 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:16:21,149 - root - INFO - step: 35400 loss: 1.8914 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6724 +[titan] 2025-10-05 20:16:21,149 - root - INFO - lr: 6.4816e-06 gnorm: 1.20 [21:42:09< 2:49:12] +[titan] 2025-10-05 20:16:32,037 - root - INFO - step: 35405 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7194 +[titan] 2025-10-05 20:16:32,038 - root - INFO - lr: 6.4784e-06 gnorm: 1.23 [21:42:20< 2:49:01] +[titan] 2025-10-05 20:16:42,971 - root - INFO - step: 35410 loss: 1.9290 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:16:42,971 - root - INFO - lr: 6.4752e-06 gnorm: 1.20 [21:42:31< 2:48:50] +[titan] 2025-10-05 20:16:53,840 - root - INFO - step: 35415 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:16:53,840 - root - INFO - lr: 6.4721e-06 gnorm: 1.17 [21:42:42< 2:48:39] +[titan] 2025-10-05 20:17:04,705 - root - INFO - step: 35420 loss: 1.9333 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:17:04,705 - root - INFO - lr: 6.4689e-06 gnorm: 1.23 [21:42:53< 2:48:28] +[titan] 2025-10-05 20:17:15,612 - root - INFO - step: 35425 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 20:17:15,612 - root - INFO - lr: 6.4657e-06 gnorm: 1.20 [21:43:04< 2:48:17] +[titan] 2025-10-05 20:17:26,485 - root - INFO - step: 35430 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:17:26,486 - root - INFO - lr: 6.4625e-06 gnorm: 1.18 [21:43:15< 2:48:06] +[titan] 2025-10-05 20:17:37,354 - root - INFO - step: 35435 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:17:37,354 - root - INFO - lr: 6.4594e-06 gnorm: 1.23 [21:43:25< 2:47:55] +[titan] 2025-10-05 20:17:48,271 - root - INFO - step: 35440 loss: 1.9162 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:17:48,271 - root - INFO - lr: 6.4562e-06 gnorm: 1.18 [21:43:36< 2:47:44] +[titan] 2025-10-05 20:17:59,161 - root - INFO - step: 35445 loss: 1.9393 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:17:59,161 - root - INFO - lr: 6.4531e-06 gnorm: 1.21 [21:43:47< 2:47:32] +[titan] 2025-10-05 20:18:07,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:18:10,014 - root - INFO - step: 35450 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:18:10,014 - root - INFO - lr: 6.4499e-06 gnorm: 1.20 [21:43:58< 2:47:21] +[titan] 2025-10-05 20:18:20,865 - root - INFO - step: 35455 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 20:18:20,865 - root - INFO - lr: 6.4468e-06 gnorm: 1.25 [21:44:09< 2:47:10] +[titan] 2025-10-05 20:18:31,752 - root - INFO - step: 35460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.6977 +[titan] 2025-10-05 20:18:31,752 - root - INFO - lr: 6.4436e-06 gnorm: 1.27 [21:44:20< 2:46:59] +[titan] 2025-10-05 20:18:42,672 - root - INFO - step: 35465 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:18:42,672 - root - INFO - lr: 6.4405e-06 gnorm: 1.21 [21:44:31< 2:46:48] +[titan] 2025-10-05 20:18:53,523 - root - INFO - step: 35470 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 20:18:53,523 - root - INFO - lr: 6.4373e-06 gnorm: 1.22 [21:44:42< 2:46:37] +[titan] 2025-10-05 20:19:04,397 - root - INFO - step: 35475 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 20:19:04,397 - root - INFO - lr: 6.4342e-06 gnorm: 1.20 [21:44:52< 2:46:26] +[titan] 2025-10-05 20:19:15,272 - root - INFO - step: 35480 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:19:15,272 - root - INFO - lr: 6.4311e-06 gnorm: 1.15 [21:45:03< 2:46:15] +[titan] 2025-10-05 20:19:26,134 - root - INFO - step: 35485 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6964 +[titan] 2025-10-05 20:19:26,134 - root - INFO - lr: 6.4279e-06 gnorm: 1.22 [21:45:14< 2:46:04] +[titan] 2025-10-05 20:19:37,003 - root - INFO - step: 35490 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:19:37,003 - root - INFO - lr: 6.4248e-06 gnorm: 1.22 [21:45:25< 2:45:53] +[titan] 2025-10-05 20:19:48,060 - root - INFO - step: 35495 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 29,637 tflops: 411.17 mfu: 41.57% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:19:48,060 - root - INFO - lr: 6.4217e-06 gnorm: 1.17 [21:45:36< 2:45:42] +[titan] 2025-10-05 20:19:56,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:19:58,930 - root - INFO - step: 35500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 20:19:58,930 - root - INFO - lr: 6.4186e-06 gnorm: 1.23 [21:45:47< 2:45:31] +[titan] 2025-10-05 20:20:09,779 - root - INFO - step: 35505 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:20:09,779 - root - INFO - lr: 6.4154e-06 gnorm: 1.19 [21:45:58< 2:45:20] +[titan] 2025-10-05 20:20:20,670 - root - INFO - step: 35510 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:20:20,671 - root - INFO - lr: 6.4123e-06 gnorm: 1.22 [21:46:09< 2:45:09] +[titan] 2025-10-05 20:20:31,543 - root - INFO - step: 35515 loss: 1.8943 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 20:20:31,543 - root - INFO - lr: 6.4092e-06 gnorm: 1.24 [21:46:20< 2:44:58] +[titan] 2025-10-05 20:20:42,433 - root - INFO - step: 35520 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:20:42,433 - root - INFO - lr: 6.4061e-06 gnorm: 1.20 [21:46:30< 2:44:47] +[titan] 2025-10-05 20:20:53,334 - root - INFO - step: 35525 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7142 +[titan] 2025-10-05 20:20:53,334 - root - INFO - lr: 6.4030e-06 gnorm: 1.24 [21:46:41< 2:44:36] +[titan] 2025-10-05 20:21:04,211 - root - INFO - step: 35530 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7544 +[titan] 2025-10-05 20:21:04,211 - root - INFO - lr: 6.3999e-06 gnorm: 1.20 [21:46:52< 2:44:25] +[titan] 2025-10-05 20:21:15,077 - root - INFO - step: 35535 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 20:21:15,077 - root - INFO - lr: 6.3968e-06 gnorm: 1.22 [21:47:03< 2:44:13] +[titan] 2025-10-05 20:21:25,947 - root - INFO - step: 35540 loss: 2.0043 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 20:21:25,947 - root - INFO - lr: 6.3937e-06 gnorm: 1.23 [21:47:14< 2:44:02] +[titan] 2025-10-05 20:21:36,813 - root - INFO - step: 35545 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:21:36,813 - root - INFO - lr: 6.3906e-06 gnorm: 1.26 [21:47:25< 2:43:51] +[titan] 2025-10-05 20:21:45,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:21:47,733 - root - INFO - step: 35550 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 20:21:47,733 - root - INFO - lr: 6.3875e-06 gnorm: 1.29 [21:47:36< 2:43:40] +[titan] 2025-10-05 20:21:58,624 - root - INFO - step: 35555 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6918 +[titan] 2025-10-05 20:21:58,624 - root - INFO - lr: 6.3845e-06 gnorm: 1.20 [21:47:47< 2:43:29] +[titan] 2025-10-05 20:22:09,503 - root - INFO - step: 35560 loss: 1.8840 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 20:22:09,503 - root - INFO - lr: 6.3814e-06 gnorm: 1.18 [21:47:58< 2:43:18] +[titan] 2025-10-05 20:22:20,399 - root - INFO - step: 35565 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7435 +[titan] 2025-10-05 20:22:20,399 - root - INFO - lr: 6.3783e-06 gnorm: 1.22 [21:48:08< 2:43:07] +[titan] 2025-10-05 20:22:31,288 - root - INFO - step: 35570 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 20:22:31,288 - root - INFO - lr: 6.3752e-06 gnorm: 1.21 [21:48:19< 2:42:56] +[titan] 2025-10-05 20:22:42,161 - root - INFO - step: 35575 loss: 1.9928 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 20:22:42,162 - root - INFO - lr: 6.3722e-06 gnorm: 1.31 [21:48:30< 2:42:45] +[titan] 2025-10-05 20:22:53,096 - root - INFO - step: 35580 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 20:22:53,096 - root - INFO - lr: 6.3691e-06 gnorm: 1.25 [21:48:41< 2:42:34] +[titan] 2025-10-05 20:23:04,005 - root - INFO - step: 35585 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 20:23:04,005 - root - INFO - lr: 6.3660e-06 gnorm: 1.22 [21:48:52< 2:42:23] +[titan] 2025-10-05 20:23:14,874 - root - INFO - step: 35590 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6686 +[titan] 2025-10-05 20:23:14,874 - root - INFO - lr: 6.3630e-06 gnorm: 1.15 [21:49:03< 2:42:12] +[titan] 2025-10-05 20:23:25,760 - root - INFO - step: 35595 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 20:23:25,760 - root - INFO - lr: 6.3599e-06 gnorm: 1.26 [21:49:14< 2:42:01] +[titan] 2025-10-05 20:23:34,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:23:36,637 - root - INFO - step: 35600 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:23:36,637 - root - INFO - lr: 6.3568e-06 gnorm: 1.19 [21:49:25< 2:41:50] +[titan] 2025-10-05 20:23:47,564 - root - INFO - step: 35605 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 20:23:47,564 - root - INFO - lr: 6.3538e-06 gnorm: 1.18 [21:49:36< 2:41:39] +[titan] 2025-10-05 20:23:58,425 - root - INFO - step: 35610 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 20:23:58,425 - root - INFO - lr: 6.3508e-06 gnorm: 1.20 [21:49:46< 2:41:28] +[titan] 2025-10-05 20:24:09,278 - root - INFO - step: 35615 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 20:24:09,278 - root - INFO - lr: 6.3477e-06 gnorm: 1.21 [21:49:57< 2:41:17] +[titan] 2025-10-05 20:24:20,176 - root - INFO - step: 35620 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 20:24:20,176 - root - INFO - lr: 6.3447e-06 gnorm: 1.21 [21:50:08< 2:41:06] +[titan] 2025-10-05 20:24:31,048 - root - INFO - step: 35625 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7146 +[titan] 2025-10-05 20:24:31,049 - root - INFO - lr: 6.3416e-06 gnorm: 1.18 [21:50:19< 2:40:55] +[titan] 2025-10-05 20:24:41,914 - root - INFO - step: 35630 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 20:24:41,914 - root - INFO - lr: 6.3386e-06 gnorm: 1.22 [21:50:30< 2:40:43] +[titan] 2025-10-05 20:24:52,829 - root - INFO - step: 35635 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:24:52,829 - root - INFO - lr: 6.3356e-06 gnorm: 1.18 [21:50:41< 2:40:32] +[titan] 2025-10-05 20:25:03,707 - root - INFO - step: 35640 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:25:03,707 - root - INFO - lr: 6.3325e-06 gnorm: 1.23 [21:50:52< 2:40:21] +[titan] 2025-10-05 20:25:14,555 - root - INFO - step: 35645 loss: 1.8684 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 20:25:14,556 - root - INFO - lr: 6.3295e-06 gnorm: 1.19 [21:51:03< 2:40:10] +[titan] 2025-10-05 20:25:23,268 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:25:25,451 - root - INFO - step: 35650 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7187 +[titan] 2025-10-05 20:25:25,451 - root - INFO - lr: 6.3265e-06 gnorm: 1.20 [21:51:13< 2:39:59] +[titan] 2025-10-05 20:25:36,291 - root - INFO - step: 35655 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 20:25:36,291 - root - INFO - lr: 6.3235e-06 gnorm: 1.22 [21:51:24< 2:39:48] +[titan] 2025-10-05 20:25:47,198 - root - INFO - step: 35660 loss: 1.9669 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 20:25:47,199 - root - INFO - lr: 6.3205e-06 gnorm: 1.19 [21:51:35< 2:39:37] +[titan] 2025-10-05 20:25:58,057 - root - INFO - step: 35665 loss: 1.9343 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 20:25:58,057 - root - INFO - lr: 6.3174e-06 gnorm: 1.22 [21:51:46< 2:39:26] +[titan] 2025-10-05 20:26:08,933 - root - INFO - step: 35670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:26:08,934 - root - INFO - lr: 6.3144e-06 gnorm: 1.20 [21:51:57< 2:39:15] +[titan] 2025-10-05 20:26:19,799 - root - INFO - step: 35675 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 20:26:19,799 - root - INFO - lr: 6.3114e-06 gnorm: 1.25 [21:52:08< 2:39:04] +[titan] 2025-10-05 20:26:30,670 - root - INFO - step: 35680 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 20:26:30,670 - root - INFO - lr: 6.3084e-06 gnorm: 1.22 [21:52:19< 2:38:53] +[titan] 2025-10-05 20:26:41,581 - root - INFO - step: 35685 loss: 2.0069 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 20:26:41,581 - root - INFO - lr: 6.3054e-06 gnorm: 1.29 [21:52:30< 2:38:42] +[titan] 2025-10-05 20:26:52,517 - root - INFO - step: 35690 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:26:52,517 - root - INFO - lr: 6.3024e-06 gnorm: 1.23 [21:52:41< 2:38:31] +[titan] 2025-10-05 20:27:03,395 - root - INFO - step: 35695 loss: 1.9599 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7330 +[titan] 2025-10-05 20:27:03,396 - root - INFO - lr: 6.2995e-06 gnorm: 1.23 [21:52:51< 2:38:20] +[titan] 2025-10-05 20:27:12,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:27:14,268 - root - INFO - step: 35700 loss: 1.9472 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 20:27:14,268 - root - INFO - lr: 6.2965e-06 gnorm: 1.23 [21:53:02< 2:38:09] +[titan] 2025-10-05 20:27:25,130 - root - INFO - step: 35705 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:27:25,130 - root - INFO - lr: 6.2935e-06 gnorm: 1.19 [21:53:13< 2:37:58] +[titan] 2025-10-05 20:27:35,993 - root - INFO - step: 35710 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 20:27:35,993 - root - INFO - lr: 6.2905e-06 gnorm: 1.28 [21:53:24< 2:37:47] +[titan] 2025-10-05 20:27:46,902 - root - INFO - step: 35715 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.7053 +[titan] 2025-10-05 20:27:46,902 - root - INFO - lr: 6.2875e-06 gnorm: 1.23 [21:53:35< 2:37:36] +[titan] 2025-10-05 20:27:57,813 - root - INFO - step: 35720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 20:27:57,813 - root - INFO - lr: 6.2846e-06 gnorm: 1.21 [21:53:46< 2:37:25] +[titan] 2025-10-05 20:28:08,685 - root - INFO - step: 35725 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 20:28:08,685 - root - INFO - lr: 6.2816e-06 gnorm: 1.21 [21:53:57< 2:37:13] +[titan] 2025-10-05 20:28:19,553 - root - INFO - step: 35730 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 20:28:19,553 - root - INFO - lr: 6.2786e-06 gnorm: 1.19 [21:54:08< 2:37:02] +[titan] 2025-10-05 20:28:30,452 - root - INFO - step: 35735 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 20:28:30,452 - root - INFO - lr: 6.2756e-06 gnorm: 1.25 [21:54:18< 2:36:51] +[titan] 2025-10-05 20:28:41,341 - root - INFO - step: 35740 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 20:28:41,341 - root - INFO - lr: 6.2727e-06 gnorm: 1.26 [21:54:29< 2:36:40] +[titan] 2025-10-05 20:28:52,320 - root - INFO - step: 35745 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.09 mfu: 41.87% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6496 +[titan] 2025-10-05 20:28:52,320 - root - INFO - lr: 6.2697e-06 gnorm: 1.19 [21:54:40< 2:36:29] +[titan] 2025-10-05 20:29:01,029 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:29:03,215 - root - INFO - step: 35750 loss: 1.8998 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 20:29:03,215 - root - INFO - lr: 6.2668e-06 gnorm: 1.23 [21:54:51< 2:36:18] +[titan] 2025-10-05 20:29:14,102 - root - INFO - step: 35755 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 20:29:14,102 - root - INFO - lr: 6.2638e-06 gnorm: 1.25 [21:55:02< 2:36:07] +[titan] 2025-10-05 20:29:24,977 - root - INFO - step: 35760 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 20:29:24,977 - root - INFO - lr: 6.2609e-06 gnorm: 1.19 [21:55:13< 2:35:56] +[titan] 2025-10-05 20:29:35,865 - root - INFO - step: 35765 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 20:29:35,865 - root - INFO - lr: 6.2579e-06 gnorm: 1.20 [21:55:24< 2:35:45] +[titan] 2025-10-05 20:29:46,743 - root - INFO - step: 35770 loss: 1.9516 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 20:29:46,743 - root - INFO - lr: 6.2550e-06 gnorm: 1.22 [21:55:35< 2:35:34] +[titan] 2025-10-05 20:29:57,662 - root - INFO - step: 35775 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 20:29:57,662 - root - INFO - lr: 6.2521e-06 gnorm: 1.24 [21:55:46< 2:35:23] +[titan] 2025-10-05 20:30:08,549 - root - INFO - step: 35780 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 20:30:08,549 - root - INFO - lr: 6.2491e-06 gnorm: 1.24 [21:55:57< 2:35:12] +[titan] 2025-10-05 20:30:19,428 - root - INFO - step: 35785 loss: 2.0119 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 20:30:19,428 - root - INFO - lr: 6.2462e-06 gnorm: 1.22 [21:56:07< 2:35:01] +[titan] 2025-10-05 20:30:30,298 - root - INFO - step: 35790 loss: 1.8995 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 20:30:30,298 - root - INFO - lr: 6.2433e-06 gnorm: 1.20 [21:56:18< 2:34:50] +[titan] 2025-10-05 20:30:41,166 - root - INFO - step: 35795 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 20:30:41,166 - root - INFO - lr: 6.2403e-06 gnorm: 1.22 [21:56:29< 2:34:39] +[titan] 2025-10-05 20:30:49,930 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:30:52,145 - root - INFO - step: 35800 loss: 1.8719 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.08 mfu: 41.87% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6562 +[titan] 2025-10-05 20:30:52,145 - root - INFO - lr: 6.2374e-06 gnorm: 1.18 [21:56:40< 2:34:28] +[titan] 2025-10-05 20:31:03,002 - root - INFO - step: 35805 loss: 1.8418 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6290 +[titan] 2025-10-05 20:31:03,002 - root - INFO - lr: 6.2345e-06 gnorm: 1.17 [21:56:51< 2:34:17] +[titan] 2025-10-05 20:31:13,902 - root - INFO - step: 35810 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 20:31:13,902 - root - INFO - lr: 6.2316e-06 gnorm: 1.20 [21:57:02< 2:34:06] +[titan] 2025-10-05 20:31:24,762 - root - INFO - step: 35815 loss: 1.8766 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:31:24,763 - root - INFO - lr: 6.2287e-06 gnorm: 1.21 [21:57:13< 2:33:55] +[titan] 2025-10-05 20:31:35,622 - root - INFO - step: 35820 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 20:31:35,623 - root - INFO - lr: 6.2258e-06 gnorm: 1.20 [21:57:24< 2:33:44] +[titan] 2025-10-05 20:31:46,463 - root - INFO - step: 35825 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 20:31:46,464 - root - INFO - lr: 6.2229e-06 gnorm: 1.23 [21:57:34< 2:33:32] +[titan] 2025-10-05 20:31:57,412 - root - INFO - step: 35830 loss: 1.8980 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 20:31:57,412 - root - INFO - lr: 6.2200e-06 gnorm: 1.20 [21:57:45< 2:33:21] +[titan] 2025-10-05 20:32:08,273 - root - INFO - step: 35835 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6616 +[titan] 2025-10-05 20:32:08,273 - root - INFO - lr: 6.2171e-06 gnorm: 1.17 [21:57:56< 2:33:10] +[titan] 2025-10-05 20:32:19,226 - root - INFO - step: 35840 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 20:32:19,227 - root - INFO - lr: 6.2142e-06 gnorm: 1.24 [21:58:07< 2:32:59] +[titan] 2025-10-05 20:32:19,416 - root - INFO - Dumping profiler traces at step 35840 +[titan] 2025-10-05 20:32:19,454 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:32:30,380 - root - INFO - step: 35845 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,379 tflops: 407.58 mfu: 41.21% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 20:32:30,381 - root - INFO - lr: 6.2113e-06 gnorm: 1.21 [21:58:18< 2:32:48] +[titan] 2025-10-05 20:32:39,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:32:41,258 - root - INFO - step: 35850 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:32:41,258 - root - INFO - lr: 6.2084e-06 gnorm: 1.18 [21:58:29< 2:32:37] +[titan] 2025-10-05 20:32:52,159 - root - INFO - step: 35855 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 20:32:52,159 - root - INFO - lr: 6.2055e-06 gnorm: 1.21 [21:58:40< 2:32:26] +[titan] 2025-10-05 20:33:03,041 - root - INFO - step: 35860 loss: 1.9254 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:33:03,041 - root - INFO - lr: 6.2026e-06 gnorm: 1.22 [21:58:51< 2:32:15] +[titan] 2025-10-05 20:33:13,902 - root - INFO - step: 35865 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7075 +[titan] 2025-10-05 20:33:13,902 - root - INFO - lr: 6.1998e-06 gnorm: 1.24 [21:59:02< 2:32:04] +[titan] 2025-10-05 20:33:24,764 - root - INFO - step: 35870 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 20:33:24,764 - root - INFO - lr: 6.1969e-06 gnorm: 1.24 [21:59:13< 2:31:53] +[titan] 2025-10-05 20:33:35,665 - root - INFO - step: 35875 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:33:35,666 - root - INFO - lr: 6.1940e-06 gnorm: 1.20 [21:59:24< 2:31:42] +[titan] 2025-10-05 20:33:46,542 - root - INFO - step: 35880 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:33:46,542 - root - INFO - lr: 6.1911e-06 gnorm: 1.22 [21:59:35< 2:31:31] +[titan] 2025-10-05 20:33:57,456 - root - INFO - step: 35885 loss: 1.9215 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 20:33:57,456 - root - INFO - lr: 6.1883e-06 gnorm: 1.21 [21:59:45< 2:31:20] +[titan] 2025-10-05 20:34:08,320 - root - INFO - step: 35890 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 20:34:08,321 - root - INFO - lr: 6.1854e-06 gnorm: 1.23 [21:59:56< 2:31:09] +[titan] 2025-10-05 20:34:19,183 - root - INFO - step: 35895 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 20:34:19,183 - root - INFO - lr: 6.1826e-06 gnorm: 1.20 [22:00:07< 2:30:58] +[titan] 2025-10-05 20:34:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:34:30,044 - root - INFO - step: 35900 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 20:34:30,044 - root - INFO - lr: 6.1797e-06 gnorm: 1.24 [22:00:18< 2:30:47] +[titan] 2025-10-05 20:34:40,962 - root - INFO - step: 35905 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 20:34:40,962 - root - INFO - lr: 6.1769e-06 gnorm: 1.21 [22:00:29< 2:30:36] +[titan] 2025-10-05 20:34:51,814 - root - INFO - step: 35910 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 20:34:51,814 - root - INFO - lr: 6.1740e-06 gnorm: 1.20 [22:00:40< 2:30:25] +[titan] 2025-10-05 20:35:02,707 - root - INFO - step: 35915 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7636 +[titan] 2025-10-05 20:35:02,708 - root - INFO - lr: 6.1712e-06 gnorm: 1.25 [22:00:51< 2:30:14] +[titan] 2025-10-05 20:35:13,561 - root - INFO - step: 35920 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 20:35:13,561 - root - INFO - lr: 6.1683e-06 gnorm: 1.21 [22:01:02< 2:30:03] +[titan] 2025-10-05 20:35:24,429 - root - INFO - step: 35925 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 20:35:24,429 - root - INFO - lr: 6.1655e-06 gnorm: 1.21 [22:01:12< 2:29:51] +[titan] 2025-10-05 20:35:35,298 - root - INFO - step: 35930 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7410 +[titan] 2025-10-05 20:35:35,298 - root - INFO - lr: 6.1627e-06 gnorm: 1.20 [22:01:23< 2:29:40] +[titan] 2025-10-05 20:35:46,161 - root - INFO - step: 35935 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6568 +[titan] 2025-10-05 20:35:46,161 - root - INFO - lr: 6.1598e-06 gnorm: 1.22 [22:01:34< 2:29:29] +[titan] 2025-10-05 20:35:57,095 - root - INFO - step: 35940 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7140 +[titan] 2025-10-05 20:35:57,096 - root - INFO - lr: 6.1570e-06 gnorm: 1.21 [22:01:45< 2:29:18] +[titan] 2025-10-05 20:36:07,977 - root - INFO - step: 35945 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 20:36:07,977 - root - INFO - lr: 6.1542e-06 gnorm: 1.23 [22:01:56< 2:29:07] +[titan] 2025-10-05 20:36:16,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:36:18,851 - root - INFO - step: 35950 loss: 1.8140 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6041 +[titan] 2025-10-05 20:36:18,851 - root - INFO - lr: 6.1514e-06 gnorm: 1.21 [22:02:07< 2:28:56] +[titan] 2025-10-05 20:36:29,728 - root - INFO - step: 35955 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:36:29,728 - root - INFO - lr: 6.1485e-06 gnorm: 1.18 [22:02:18< 2:28:45] +[titan] 2025-10-05 20:36:40,603 - root - INFO - step: 35960 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 20:36:40,603 - root - INFO - lr: 6.1457e-06 gnorm: 1.20 [22:02:29< 2:28:34] +[titan] 2025-10-05 20:36:51,466 - root - INFO - step: 35965 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7135 +[titan] 2025-10-05 20:36:51,467 - root - INFO - lr: 6.1429e-06 gnorm: 1.21 [22:02:39< 2:28:23] +[titan] 2025-10-05 20:37:02,418 - root - INFO - step: 35970 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 20:37:02,418 - root - INFO - lr: 6.1401e-06 gnorm: 1.17 [22:02:50< 2:28:12] +[titan] 2025-10-05 20:37:13,277 - root - INFO - step: 35975 loss: 1.9766 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:37:13,277 - root - INFO - lr: 6.1373e-06 gnorm: 1.23 [22:03:01< 2:28:01] +[titan] 2025-10-05 20:37:24,151 - root - INFO - step: 35980 loss: 1.9461 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:37:24,151 - root - INFO - lr: 6.1345e-06 gnorm: 1.17 [22:03:12< 2:27:50] +[titan] 2025-10-05 20:37:34,997 - root - INFO - step: 35985 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6927 +[titan] 2025-10-05 20:37:34,997 - root - INFO - lr: 6.1317e-06 gnorm: 1.19 [22:03:23< 2:27:39] +[titan] 2025-10-05 20:37:45,857 - root - INFO - step: 35990 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:37:45,857 - root - INFO - lr: 6.1289e-06 gnorm: 1.21 [22:03:34< 2:27:28] +[titan] 2025-10-05 20:37:56,761 - root - INFO - step: 35995 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 20:37:56,761 - root - INFO - lr: 6.1261e-06 gnorm: 1.22 [22:03:45< 2:27:17] +[titan] 2025-10-05 20:38:05,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:38:07,602 - root - INFO - step: 36000 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 20:38:07,602 - root - INFO - lr: 6.1233e-06 gnorm: 1.24 [22:03:56< 2:27:06] +[titan] 2025-10-05 20:38:18,465 - root - INFO - step: 36005 loss: 1.8959 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 20:38:18,465 - root - INFO - lr: 6.1206e-06 gnorm: 1.22 [22:04:06< 2:26:55] +[titan] 2025-10-05 20:38:29,352 - root - INFO - step: 36010 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:38:29,352 - root - INFO - lr: 6.1178e-06 gnorm: 1.19 [22:04:17< 2:26:44] +[titan] 2025-10-05 20:38:40,197 - root - INFO - step: 36015 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 20:38:40,197 - root - INFO - lr: 6.1150e-06 gnorm: 1.22 [22:04:28< 2:26:33] +[titan] 2025-10-05 20:38:51,058 - root - INFO - step: 36020 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 20:38:51,058 - root - INFO - lr: 6.1122e-06 gnorm: 1.21 [22:04:39< 2:26:22] +[titan] 2025-10-05 20:39:01,952 - root - INFO - step: 36025 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6606 +[titan] 2025-10-05 20:39:01,952 - root - INFO - lr: 6.1095e-06 gnorm: 1.20 [22:04:50< 2:26:10] +[titan] 2025-10-05 20:39:12,835 - root - INFO - step: 36030 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 20:39:12,835 - root - INFO - lr: 6.1067e-06 gnorm: 1.25 [22:05:01< 2:25:59] +[titan] 2025-10-05 20:39:23,710 - root - INFO - step: 36035 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 20:39:23,711 - root - INFO - lr: 6.1039e-06 gnorm: 1.23 [22:05:12< 2:25:48] +[titan] 2025-10-05 20:39:34,575 - root - INFO - step: 36040 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 20:39:34,575 - root - INFO - lr: 6.1012e-06 gnorm: 1.23 [22:05:23< 2:25:37] +[titan] 2025-10-05 20:39:45,433 - root - INFO - step: 36045 loss: 1.8945 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6757 +[titan] 2025-10-05 20:39:45,434 - root - INFO - lr: 6.0984e-06 gnorm: 1.20 [22:05:33< 2:25:26] +[titan] 2025-10-05 20:39:54,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:39:56,290 - root - INFO - step: 36050 loss: 1.9349 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7112 +[titan] 2025-10-05 20:39:56,290 - root - INFO - lr: 6.0957e-06 gnorm: 1.20 [22:05:44< 2:25:15] +[titan] 2025-10-05 20:40:07,175 - root - INFO - step: 36055 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 20:40:07,175 - root - INFO - lr: 6.0929e-06 gnorm: 1.26 [22:05:55< 2:25:04] +[titan] 2025-10-05 20:40:18,044 - root - INFO - step: 36060 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:40:18,044 - root - INFO - lr: 6.0902e-06 gnorm: 1.22 [22:06:06< 2:24:53] +[titan] 2025-10-05 20:40:28,916 - root - INFO - step: 36065 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7253 +[titan] 2025-10-05 20:40:28,916 - root - INFO - lr: 6.0874e-06 gnorm: 1.21 [22:06:17< 2:24:42] +[titan] 2025-10-05 20:40:39,778 - root - INFO - step: 36070 loss: 1.8531 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6374 +[titan] 2025-10-05 20:40:39,778 - root - INFO - lr: 6.0847e-06 gnorm: 1.21 [22:06:28< 2:24:31] +[titan] 2025-10-05 20:40:50,621 - root - INFO - step: 36075 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 20:40:50,621 - root - INFO - lr: 6.0820e-06 gnorm: 1.21 [22:06:39< 2:24:20] +[titan] 2025-10-05 20:41:01,488 - root - INFO - step: 36080 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 20:41:01,489 - root - INFO - lr: 6.0792e-06 gnorm: 1.24 [22:06:49< 2:24:09] +[titan] 2025-10-05 20:41:12,335 - root - INFO - step: 36085 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 20:41:12,335 - root - INFO - lr: 6.0765e-06 gnorm: 1.21 [22:07:00< 2:23:58] +[titan] 2025-10-05 20:41:23,167 - root - INFO - step: 36090 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 20:41:23,168 - root - INFO - lr: 6.0738e-06 gnorm: 1.24 [22:07:11< 2:23:47] +[titan] 2025-10-05 20:41:34,043 - root - INFO - step: 36095 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:41:34,044 - root - INFO - lr: 6.0710e-06 gnorm: 1.26 [22:07:22< 2:23:36] +[titan] 2025-10-05 20:41:42,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:41:44,911 - root - INFO - step: 36100 loss: 1.9238 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7003 +[titan] 2025-10-05 20:41:44,911 - root - INFO - lr: 6.0683e-06 gnorm: 1.23 [22:07:33< 2:23:25] +[titan] 2025-10-05 20:41:55,794 - root - INFO - step: 36105 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 20:41:55,794 - root - INFO - lr: 6.0656e-06 gnorm: 1.18 [22:07:44< 2:23:14] +[titan] 2025-10-05 20:42:06,656 - root - INFO - step: 36110 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 20:42:06,657 - root - INFO - lr: 6.0629e-06 gnorm: 1.22 [22:07:55< 2:23:03] +[titan] 2025-10-05 20:42:17,515 - root - INFO - step: 36115 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 20:42:17,515 - root - INFO - lr: 6.0602e-06 gnorm: 1.22 [22:08:05< 2:22:52] +[titan] 2025-10-05 20:42:28,350 - root - INFO - step: 36120 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 20:42:28,350 - root - INFO - lr: 6.0575e-06 gnorm: 1.23 [22:08:16< 2:22:41] +[titan] 2025-10-05 20:42:39,197 - root - INFO - step: 36125 loss: 1.8516 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 20:42:39,197 - root - INFO - lr: 6.0548e-06 gnorm: 1.24 [22:08:27< 2:22:29] +[titan] 2025-10-05 20:42:50,083 - root - INFO - step: 36130 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 20:42:50,083 - root - INFO - lr: 6.0521e-06 gnorm: 1.22 [22:08:38< 2:22:18] +[titan] 2025-10-05 20:43:00,986 - root - INFO - step: 36135 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:43:00,987 - root - INFO - lr: 6.0494e-06 gnorm: 1.22 [22:08:49< 2:22:07] +[titan] 2025-10-05 20:43:11,851 - root - INFO - step: 36140 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 20:43:11,851 - root - INFO - lr: 6.0467e-06 gnorm: 1.89 [22:09:00< 2:21:56] +[titan] 2025-10-05 20:43:22,694 - root - INFO - step: 36145 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 20:43:22,694 - root - INFO - lr: 6.0440e-06 gnorm: 1.18 [22:09:11< 2:21:45] +[titan] 2025-10-05 20:43:31,365 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:43:33,546 - root - INFO - step: 36150 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:43:33,547 - root - INFO - lr: 6.0413e-06 gnorm: 1.25 [22:09:22< 2:21:34] +[titan] 2025-10-05 20:43:44,389 - root - INFO - step: 36155 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 20:43:44,389 - root - INFO - lr: 6.0386e-06 gnorm: 1.20 [22:09:32< 2:21:23] +[titan] 2025-10-05 20:43:55,248 - root - INFO - step: 36160 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 20:43:55,248 - root - INFO - lr: 6.0360e-06 gnorm: 1.26 [22:09:43< 2:21:12] +[titan] 2025-10-05 20:44:06,157 - root - INFO - step: 36165 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:06,158 - root - INFO - lr: 6.0333e-06 gnorm: 1.22 [22:09:54< 2:21:01] +[titan] 2025-10-05 20:44:17,014 - root - INFO - step: 36170 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 20:44:17,014 - root - INFO - lr: 6.0306e-06 gnorm: 1.24 [22:10:05< 2:20:50] +[titan] 2025-10-05 20:44:27,855 - root - INFO - step: 36175 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:27,855 - root - INFO - lr: 6.0279e-06 gnorm: 1.23 [22:10:16< 2:20:39] +[titan] 2025-10-05 20:44:38,720 - root - INFO - step: 36180 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:44:38,720 - root - INFO - lr: 6.0253e-06 gnorm: 1.23 [22:10:27< 2:20:28] +[titan] 2025-10-05 20:44:49,571 - root - INFO - step: 36185 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:44:49,571 - root - INFO - lr: 6.0226e-06 gnorm: 1.24 [22:10:38< 2:20:17] +[titan] 2025-10-05 20:45:00,410 - root - INFO - step: 36190 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 20:45:00,411 - root - INFO - lr: 6.0200e-06 gnorm: 1.29 [22:10:48< 2:20:06] +[titan] 2025-10-05 20:45:11,321 - root - INFO - step: 36195 loss: 1.8986 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6794 +[titan] 2025-10-05 20:45:11,321 - root - INFO - lr: 6.0173e-06 gnorm: 1.22 [22:10:59< 2:19:55] +[titan] 2025-10-05 20:45:19,981 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:45:22,165 - root - INFO - step: 36200 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 20:45:22,166 - root - INFO - lr: 6.0146e-06 gnorm: 1.25 [22:11:10< 2:19:44] +[titan] 2025-10-05 20:45:33,012 - root - INFO - step: 36205 loss: 1.8677 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6514 +[titan] 2025-10-05 20:45:33,012 - root - INFO - lr: 6.0120e-06 gnorm: 1.21 [22:11:21< 2:19:33] +[titan] 2025-10-05 20:45:43,868 - root - INFO - step: 36210 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 20:45:43,868 - root - INFO - lr: 6.0094e-06 gnorm: 1.23 [22:11:32< 2:19:22] +[titan] 2025-10-05 20:45:54,736 - root - INFO - step: 36215 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6484 +[titan] 2025-10-05 20:45:54,737 - root - INFO - lr: 6.0067e-06 gnorm: 1.18 [22:11:43< 2:19:11] +[titan] 2025-10-05 20:46:05,631 - root - INFO - step: 36220 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:46:05,631 - root - INFO - lr: 6.0041e-06 gnorm: 1.19 [22:11:54< 2:19:00] +[titan] 2025-10-05 20:46:16,518 - root - INFO - step: 36225 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 20:46:16,518 - root - INFO - lr: 6.0014e-06 gnorm: 1.22 [22:12:04< 2:18:48] +[titan] 2025-10-05 20:46:27,370 - root - INFO - step: 36230 loss: 1.9836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:46:27,370 - root - INFO - lr: 5.9988e-06 gnorm: 1.19 [22:12:15< 2:18:37] +[titan] 2025-10-05 20:46:38,233 - root - INFO - step: 36235 loss: 1.8873 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 20:46:38,233 - root - INFO - lr: 5.9962e-06 gnorm: 1.22 [22:12:26< 2:18:26] +[titan] 2025-10-05 20:46:49,088 - root - INFO - step: 36240 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:46:49,088 - root - INFO - lr: 5.9936e-06 gnorm: 1.21 [22:12:37< 2:18:15] +[titan] 2025-10-05 20:46:59,957 - root - INFO - step: 36245 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6792 +[titan] 2025-10-05 20:46:59,958 - root - INFO - lr: 5.9909e-06 gnorm: 1.22 [22:12:48< 2:18:04] +[titan] 2025-10-05 20:47:08,670 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:47:10,858 - root - INFO - step: 36250 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:47:10,858 - root - INFO - lr: 5.9883e-06 gnorm: 1.19 [22:12:59< 2:17:53] +[titan] 2025-10-05 20:47:21,702 - root - INFO - step: 36255 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 20:47:21,702 - root - INFO - lr: 5.9857e-06 gnorm: 1.26 [22:13:10< 2:17:42] +[titan] 2025-10-05 20:47:32,596 - root - INFO - step: 36260 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 20:47:32,596 - root - INFO - lr: 5.9831e-06 gnorm: 1.22 [22:13:21< 2:17:31] +[titan] 2025-10-05 20:47:43,478 - root - INFO - step: 36265 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 20:47:43,478 - root - INFO - lr: 5.9805e-06 gnorm: 1.28 [22:13:31< 2:17:20] +[titan] 2025-10-05 20:47:54,366 - root - INFO - step: 36270 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:47:54,366 - root - INFO - lr: 5.9779e-06 gnorm: 1.22 [22:13:42< 2:17:09] +[titan] 2025-10-05 20:48:05,288 - root - INFO - step: 36275 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6837 +[titan] 2025-10-05 20:48:05,288 - root - INFO - lr: 5.9753e-06 gnorm: 1.22 [22:13:53< 2:16:58] +[titan] 2025-10-05 20:48:16,197 - root - INFO - step: 36280 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 20:48:16,197 - root - INFO - lr: 5.9727e-06 gnorm: 1.22 [22:14:04< 2:16:47] +[titan] 2025-10-05 20:48:27,074 - root - INFO - step: 36285 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 20:48:27,075 - root - INFO - lr: 5.9701e-06 gnorm: 1.23 [22:14:15< 2:16:36] +[titan] 2025-10-05 20:48:37,962 - root - INFO - step: 36290 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 20:48:37,962 - root - INFO - lr: 5.9675e-06 gnorm: 1.26 [22:14:26< 2:16:25] +[titan] 2025-10-05 20:48:48,831 - root - INFO - step: 36295 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 20:48:48,832 - root - INFO - lr: 5.9649e-06 gnorm: 1.22 [22:14:37< 2:16:14] +[titan] 2025-10-05 20:48:57,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:48:59,686 - root - INFO - step: 36300 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6884 +[titan] 2025-10-05 20:48:59,686 - root - INFO - lr: 5.9623e-06 gnorm: 1.23 [22:14:48< 2:16:03] +[titan] 2025-10-05 20:49:10,530 - root - INFO - step: 36305 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:49:10,530 - root - INFO - lr: 5.9597e-06 gnorm: 1.21 [22:14:58< 2:15:52] +[titan] 2025-10-05 20:49:21,373 - root - INFO - step: 36310 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7418 +[titan] 2025-10-05 20:49:21,373 - root - INFO - lr: 5.9572e-06 gnorm: 1.26 [22:15:09< 2:15:41] +[titan] 2025-10-05 20:49:32,211 - root - INFO - step: 36315 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 20:49:32,211 - root - INFO - lr: 5.9546e-06 gnorm: 1.21 [22:15:20< 2:15:30] +[titan] 2025-10-05 20:49:43,047 - root - INFO - step: 36320 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7327 +[titan] 2025-10-05 20:49:43,048 - root - INFO - lr: 5.9520e-06 gnorm: 1.23 [22:15:31< 2:15:19] +[titan] 2025-10-05 20:49:53,930 - root - INFO - step: 36325 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 20:49:53,930 - root - INFO - lr: 5.9495e-06 gnorm: 1.21 [22:15:42< 2:15:08] +[titan] 2025-10-05 20:50:04,790 - root - INFO - step: 36330 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 20:50:04,790 - root - INFO - lr: 5.9469e-06 gnorm: 1.22 [22:15:53< 2:14:56] +[titan] 2025-10-05 20:50:15,657 - root - INFO - step: 36335 loss: 1.9258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7028 +[titan] 2025-10-05 20:50:15,657 - root - INFO - lr: 5.9443e-06 gnorm: 1.28 [22:16:04< 2:14:45] +[titan] 2025-10-05 20:50:26,516 - root - INFO - step: 36340 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 20:50:26,516 - root - INFO - lr: 5.9418e-06 gnorm: 1.22 [22:16:14< 2:14:34] +[titan] 2025-10-05 20:50:37,351 - root - INFO - step: 36345 loss: 1.8859 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 20:50:37,351 - root - INFO - lr: 5.9392e-06 gnorm: 1.21 [22:16:25< 2:14:23] +[titan] 2025-10-05 20:50:46,109 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:50:48,289 - root - INFO - step: 36350 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:50:48,289 - root - INFO - lr: 5.9367e-06 gnorm: 1.26 [22:16:36< 2:14:12] +[titan] 2025-10-05 20:50:52,802 - root - INFO - Dumping profiler traces at step 36352 +[titan] 2025-10-05 20:50:52,840 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:50:59,390 - root - INFO - step: 36355 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 29,519 tflops: 409.53 mfu: 41.41% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6886 +[titan] 2025-10-05 20:50:59,390 - root - INFO - lr: 5.9341e-06 gnorm: 1.21 [22:16:47< 2:14:01] +[titan] 2025-10-05 20:51:10,256 - root - INFO - step: 36360 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6670 +[titan] 2025-10-05 20:51:10,257 - root - INFO - lr: 5.9316e-06 gnorm: 1.20 [22:16:58< 2:13:50] +[titan] 2025-10-05 20:51:21,108 - root - INFO - step: 36365 loss: 1.9715 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7430 +[titan] 2025-10-05 20:51:21,108 - root - INFO - lr: 5.9290e-06 gnorm: 1.25 [22:17:09< 2:13:39] +[titan] 2025-10-05 20:51:31,957 - root - INFO - step: 36370 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 20:51:31,957 - root - INFO - lr: 5.9265e-06 gnorm: 1.22 [22:17:20< 2:13:28] +[titan] 2025-10-05 20:51:42,813 - root - INFO - step: 36375 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:51:42,813 - root - INFO - lr: 5.9240e-06 gnorm: 1.22 [22:17:31< 2:13:17] +[titan] 2025-10-05 20:51:53,656 - root - INFO - step: 36380 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 20:51:53,656 - root - INFO - lr: 5.9214e-06 gnorm: 1.27 [22:17:42< 2:13:06] +[titan] 2025-10-05 20:52:04,533 - root - INFO - step: 36385 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7274 +[titan] 2025-10-05 20:52:04,533 - root - INFO - lr: 5.9189e-06 gnorm: 1.22 [22:17:52< 2:12:55] +[titan] 2025-10-05 20:52:15,414 - root - INFO - step: 36390 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7019 +[titan] 2025-10-05 20:52:15,414 - root - INFO - lr: 5.9164e-06 gnorm: 1.23 [22:18:03< 2:12:44] +[titan] 2025-10-05 20:52:26,295 - root - INFO - step: 36395 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 20:52:26,295 - root - INFO - lr: 5.9139e-06 gnorm: 1.21 [22:18:14< 2:12:33] +[titan] 2025-10-05 20:52:34,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:52:37,152 - root - INFO - step: 36400 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 20:52:37,153 - root - INFO - lr: 5.9114e-06 gnorm: 1.22 [22:18:25< 2:12:22] +[titan] 2025-10-05 20:52:48,028 - root - INFO - step: 36405 loss: 1.9539 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:52:48,028 - root - INFO - lr: 5.9088e-06 gnorm: 1.20 [22:18:36< 2:12:11] +[titan] 2025-10-05 20:52:58,901 - root - INFO - step: 36410 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:52:58,901 - root - INFO - lr: 5.9063e-06 gnorm: 1.21 [22:18:47< 2:12:00] +[titan] 2025-10-05 20:53:10,114 - root - INFO - step: 36415 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 29,223 tflops: 405.42 mfu: 40.99% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6627 +[titan] 2025-10-05 20:53:10,115 - root - INFO - lr: 5.9038e-06 gnorm: 1.21 [22:18:58< 2:11:49] +[titan] 2025-10-05 20:53:21,005 - root - INFO - step: 36420 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 20:53:21,005 - root - INFO - lr: 5.9013e-06 gnorm: 1.27 [22:19:09< 2:11:38] +[titan] 2025-10-05 20:53:31,873 - root - INFO - step: 36425 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 20:53:31,874 - root - INFO - lr: 5.8988e-06 gnorm: 1.24 [22:19:20< 2:11:27] +[titan] 2025-10-05 20:53:42,745 - root - INFO - step: 36430 loss: 1.8831 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6658 +[titan] 2025-10-05 20:53:42,745 - root - INFO - lr: 5.8963e-06 gnorm: 1.28 [22:19:31< 2:11:16] +[titan] 2025-10-05 20:53:53,613 - root - INFO - step: 36435 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 20:53:53,613 - root - INFO - lr: 5.8938e-06 gnorm: 1.23 [22:19:42< 2:11:05] +[titan] 2025-10-05 20:54:04,481 - root - INFO - step: 36440 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:54:04,481 - root - INFO - lr: 5.8914e-06 gnorm: 1.22 [22:19:52< 2:10:53] +[titan] 2025-10-05 20:54:15,378 - root - INFO - step: 36445 loss: 1.9147 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:54:15,378 - root - INFO - lr: 5.8889e-06 gnorm: 1.24 [22:20:03< 2:10:42] +[titan] 2025-10-05 20:54:24,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:54:26,273 - root - INFO - step: 36450 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 20:54:26,273 - root - INFO - lr: 5.8864e-06 gnorm: 1.25 [22:20:14< 2:10:31] +[titan] 2025-10-05 20:54:37,147 - root - INFO - step: 36455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 20:54:37,147 - root - INFO - lr: 5.8839e-06 gnorm: 1.20 [22:20:25< 2:10:20] +[titan] 2025-10-05 20:54:48,029 - root - INFO - step: 36460 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 20:54:48,029 - root - INFO - lr: 5.8814e-06 gnorm: 1.21 [22:20:36< 2:10:09] +[titan] 2025-10-05 20:54:58,890 - root - INFO - step: 36465 loss: 1.9169 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 20:54:58,890 - root - INFO - lr: 5.8790e-06 gnorm: 1.22 [22:20:47< 2:09:58] +[titan] 2025-10-05 20:55:09,763 - root - INFO - step: 36470 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6935 +[titan] 2025-10-05 20:55:09,764 - root - INFO - lr: 5.8765e-06 gnorm: 1.24 [22:20:58< 2:09:47] +[titan] 2025-10-05 20:55:20,621 - root - INFO - step: 36475 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 20:55:20,622 - root - INFO - lr: 5.8740e-06 gnorm: 1.24 [22:21:09< 2:09:36] +[titan] 2025-10-05 20:55:31,491 - root - INFO - step: 36480 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:55:31,491 - root - INFO - lr: 5.8716e-06 gnorm: 1.25 [22:21:19< 2:09:25] +[titan] 2025-10-05 20:55:42,366 - root - INFO - step: 36485 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 20:55:42,367 - root - INFO - lr: 5.8691e-06 gnorm: 1.22 [22:21:30< 2:09:14] +[titan] 2025-10-05 20:55:53,240 - root - INFO - step: 36490 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:55:53,241 - root - INFO - lr: 5.8667e-06 gnorm: 1.19 [22:21:41< 2:09:03] +[titan] 2025-10-05 20:56:04,092 - root - INFO - step: 36495 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 20:56:04,092 - root - INFO - lr: 5.8642e-06 gnorm: 1.28 [22:21:52< 2:08:52] +[titan] 2025-10-05 20:56:12,794 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:56:14,974 - root - INFO - step: 36500 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:56:14,974 - root - INFO - lr: 5.8618e-06 gnorm: 1.22 [22:22:03< 2:08:41] +[titan] 2025-10-05 20:56:25,858 - root - INFO - step: 36505 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:56:25,858 - root - INFO - lr: 5.8593e-06 gnorm: 1.21 [22:22:14< 2:08:30] +[titan] 2025-10-05 20:56:36,712 - root - INFO - step: 36510 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 20:56:36,712 - root - INFO - lr: 5.8569e-06 gnorm: 1.26 [22:22:25< 2:08:19] +[titan] 2025-10-05 20:56:47,594 - root - INFO - step: 36515 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:56:47,594 - root - INFO - lr: 5.8544e-06 gnorm: 1.24 [22:22:36< 2:08:08] +[titan] 2025-10-05 20:56:58,464 - root - INFO - step: 36520 loss: 1.8908 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6705 +[titan] 2025-10-05 20:56:58,465 - root - INFO - lr: 5.8520e-06 gnorm: 1.23 [22:22:46< 2:07:57] +[titan] 2025-10-05 20:57:09,332 - root - INFO - step: 36525 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:57:09,332 - root - INFO - lr: 5.8496e-06 gnorm: 1.21 [22:22:57< 2:07:46] +[titan] 2025-10-05 20:57:20,232 - root - INFO - step: 36530 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 20:57:20,232 - root - INFO - lr: 5.8471e-06 gnorm: 1.21 [22:23:08< 2:07:35] +[titan] 2025-10-05 20:57:31,124 - root - INFO - step: 36535 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 20:57:31,124 - root - INFO - lr: 5.8447e-06 gnorm: 1.23 [22:23:19< 2:07:24] +[titan] 2025-10-05 20:57:42,014 - root - INFO - step: 36540 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 20:57:42,014 - root - INFO - lr: 5.8423e-06 gnorm: 1.25 [22:23:30< 2:07:13] +[titan] 2025-10-05 20:57:52,927 - root - INFO - step: 36545 loss: 1.9727 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 20:57:52,928 - root - INFO - lr: 5.8399e-06 gnorm: 1.24 [22:23:41< 2:07:02] +[titan] 2025-10-05 20:58:01,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:58:03,825 - root - INFO - step: 36550 loss: 1.9288 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7049 +[titan] 2025-10-05 20:58:03,825 - root - INFO - lr: 5.8375e-06 gnorm: 1.24 [22:23:52< 2:06:50] +[titan] 2025-10-05 20:58:14,740 - root - INFO - step: 36555 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 20:58:14,741 - root - INFO - lr: 5.8351e-06 gnorm: 1.26 [22:24:03< 2:06:39] +[titan] 2025-10-05 20:58:25,614 - root - INFO - step: 36560 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 20:58:25,614 - root - INFO - lr: 5.8326e-06 gnorm: 1.18 [22:24:14< 2:06:28] +[titan] 2025-10-05 20:58:36,506 - root - INFO - step: 36565 loss: 1.8964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:58:36,506 - root - INFO - lr: 5.8302e-06 gnorm: 1.20 [22:24:24< 2:06:17] +[titan] 2025-10-05 20:58:47,390 - root - INFO - step: 36570 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 20:58:47,390 - root - INFO - lr: 5.8278e-06 gnorm: 1.25 [22:24:35< 2:06:06] +[titan] 2025-10-05 20:58:58,289 - root - INFO - step: 36575 loss: 1.9029 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:58:58,289 - root - INFO - lr: 5.8254e-06 gnorm: 1.20 [22:24:46< 2:05:55] +[titan] 2025-10-05 20:59:09,190 - root - INFO - step: 36580 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7074 +[titan] 2025-10-05 20:59:09,190 - root - INFO - lr: 5.8231e-06 gnorm: 1.24 [22:24:57< 2:05:44] +[titan] 2025-10-05 20:59:20,102 - root - INFO - step: 36585 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7440 +[titan] 2025-10-05 20:59:20,103 - root - INFO - lr: 5.8207e-06 gnorm: 1.22 [22:25:08< 2:05:33] +[titan] 2025-10-05 20:59:30,980 - root - INFO - step: 36590 loss: 1.9441 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:59:30,980 - root - INFO - lr: 5.8183e-06 gnorm: 1.22 [22:25:19< 2:05:22] +[titan] 2025-10-05 20:59:41,845 - root - INFO - step: 36595 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6830 +[titan] 2025-10-05 20:59:41,845 - root - INFO - lr: 5.8159e-06 gnorm: 1.20 [22:25:30< 2:05:11] +[titan] 2025-10-05 20:59:50,540 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:59:52,732 - root - INFO - step: 36600 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:59:52,732 - root - INFO - lr: 5.8135e-06 gnorm: 1.20 [22:25:41< 2:05:00] +[titan] 2025-10-05 21:00:03,618 - root - INFO - step: 36605 loss: 1.8614 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6465 +[titan] 2025-10-05 21:00:03,618 - root - INFO - lr: 5.8111e-06 gnorm: 1.22 [22:25:52< 2:04:49] +[titan] 2025-10-05 21:00:14,529 - root - INFO - step: 36610 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 21:00:14,529 - root - INFO - lr: 5.8088e-06 gnorm: 1.24 [22:26:02< 2:04:38] +[titan] 2025-10-05 21:00:25,449 - root - INFO - step: 36615 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:00:25,450 - root - INFO - lr: 5.8064e-06 gnorm: 1.23 [22:26:13< 2:04:27] +[titan] 2025-10-05 21:00:36,361 - root - INFO - step: 36620 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6464 +[titan] 2025-10-05 21:00:36,361 - root - INFO - lr: 5.8040e-06 gnorm: 1.24 [22:26:24< 2:04:16] +[titan] 2025-10-05 21:00:47,259 - root - INFO - step: 36625 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7482 +[titan] 2025-10-05 21:00:47,259 - root - INFO - lr: 5.8017e-06 gnorm: 1.24 [22:26:35< 2:04:05] +[titan] 2025-10-05 21:00:58,160 - root - INFO - step: 36630 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:00:58,160 - root - INFO - lr: 5.7993e-06 gnorm: 1.21 [22:26:46< 2:03:54] +[titan] 2025-10-05 21:01:09,053 - root - INFO - step: 36635 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:01:09,053 - root - INFO - lr: 5.7969e-06 gnorm: 1.26 [22:26:57< 2:03:43] +[titan] 2025-10-05 21:01:19,958 - root - INFO - step: 36640 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 21:01:19,958 - root - INFO - lr: 5.7946e-06 gnorm: 1.24 [22:27:08< 2:03:32] +[titan] 2025-10-05 21:01:30,865 - root - INFO - step: 36645 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:01:30,865 - root - INFO - lr: 5.7922e-06 gnorm: 1.22 [22:27:19< 2:03:21] +[titan] 2025-10-05 21:01:39,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:01:41,769 - root - INFO - step: 36650 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:01:41,769 - root - INFO - lr: 5.7899e-06 gnorm: 1.25 [22:27:30< 2:03:10] +[titan] 2025-10-05 21:01:52,656 - root - INFO - step: 36655 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6666 +[titan] 2025-10-05 21:01:52,656 - root - INFO - lr: 5.7876e-06 gnorm: 1.26 [22:27:41< 2:02:59] +[titan] 2025-10-05 21:02:03,549 - root - INFO - step: 36660 loss: 1.9170 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 21:02:03,549 - root - INFO - lr: 5.7852e-06 gnorm: 1.24 [22:27:51< 2:02:48] +[titan] 2025-10-05 21:02:14,436 - root - INFO - step: 36665 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6585 +[titan] 2025-10-05 21:02:14,436 - root - INFO - lr: 5.7829e-06 gnorm: 1.20 [22:28:02< 2:02:36] +[titan] 2025-10-05 21:02:25,324 - root - INFO - step: 36670 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 21:02:25,324 - root - INFO - lr: 5.7806e-06 gnorm: 1.25 [22:28:13< 2:02:25] +[titan] 2025-10-05 21:02:36,230 - root - INFO - step: 36675 loss: 1.8517 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6371 +[titan] 2025-10-05 21:02:36,230 - root - INFO - lr: 5.7782e-06 gnorm: 1.21 [22:28:24< 2:02:14] +[titan] 2025-10-05 21:02:47,119 - root - INFO - step: 36680 loss: 1.8308 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6192 +[titan] 2025-10-05 21:02:47,119 - root - INFO - lr: 5.7759e-06 gnorm: 1.21 [22:28:35< 2:02:03] +[titan] 2025-10-05 21:02:58,028 - root - INFO - step: 36685 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:02:58,028 - root - INFO - lr: 5.7736e-06 gnorm: 1.27 [22:28:46< 2:01:52] +[titan] 2025-10-05 21:03:08,899 - root - INFO - step: 36690 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 21:03:08,899 - root - INFO - lr: 5.7713e-06 gnorm: 1.23 [22:28:57< 2:01:41] +[titan] 2025-10-05 21:03:19,806 - root - INFO - step: 36695 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:03:19,806 - root - INFO - lr: 5.7689e-06 gnorm: 1.23 [22:29:08< 2:01:30] +[titan] 2025-10-05 21:03:28,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:03:30,710 - root - INFO - step: 36700 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 21:03:30,710 - root - INFO - lr: 5.7666e-06 gnorm: 1.27 [22:29:19< 2:01:19] +[titan] 2025-10-05 21:03:41,623 - root - INFO - step: 36705 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 21:03:41,624 - root - INFO - lr: 5.7643e-06 gnorm: 1.24 [22:29:30< 2:01:08] +[titan] 2025-10-05 21:03:52,525 - root - INFO - step: 36710 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:03:52,525 - root - INFO - lr: 5.7620e-06 gnorm: 1.26 [22:29:40< 2:00:57] +[titan] 2025-10-05 21:04:03,447 - root - INFO - step: 36715 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:04:03,447 - root - INFO - lr: 5.7597e-06 gnorm: 1.26 [22:29:51< 2:00:46] +[titan] 2025-10-05 21:04:14,324 - root - INFO - step: 36720 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:04:14,324 - root - INFO - lr: 5.7574e-06 gnorm: 1.20 [22:30:02< 2:00:35] +[titan] 2025-10-05 21:04:25,273 - root - INFO - step: 36725 loss: 1.9301 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:04:25,273 - root - INFO - lr: 5.7551e-06 gnorm: 1.23 [22:30:13< 2:00:24] +[titan] 2025-10-05 21:04:36,157 - root - INFO - step: 36730 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:04:36,157 - root - INFO - lr: 5.7528e-06 gnorm: 1.24 [22:30:24< 2:00:13] +[titan] 2025-10-05 21:04:47,035 - root - INFO - step: 36735 loss: 1.9023 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 21:04:47,035 - root - INFO - lr: 5.7505e-06 gnorm: 1.26 [22:30:35< 2:00:02] +[titan] 2025-10-05 21:04:57,939 - root - INFO - step: 36740 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 21:04:57,939 - root - INFO - lr: 5.7483e-06 gnorm: 1.21 [22:30:46< 1:59:51] +[titan] 2025-10-05 21:05:08,831 - root - INFO - step: 36745 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 21:05:08,831 - root - INFO - lr: 5.7460e-06 gnorm: 1.25 [22:30:57< 1:59:40] +[titan] 2025-10-05 21:05:17,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:05:19,701 - root - INFO - step: 36750 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:05:19,702 - root - INFO - lr: 5.7437e-06 gnorm: 1.22 [22:31:08< 1:59:29] +[titan] 2025-10-05 21:05:30,640 - root - INFO - step: 36755 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 21:05:30,640 - root - INFO - lr: 5.7414e-06 gnorm: 1.23 [22:31:19< 1:59:18] +[titan] 2025-10-05 21:05:41,514 - root - INFO - step: 36760 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:05:41,514 - root - INFO - lr: 5.7392e-06 gnorm: 1.21 [22:31:29< 1:59:07] +[titan] 2025-10-05 21:05:52,376 - root - INFO - step: 36765 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 21:05:52,376 - root - INFO - lr: 5.7369e-06 gnorm: 1.26 [22:31:40< 1:58:56] +[titan] 2025-10-05 21:06:03,266 - root - INFO - step: 36770 loss: 1.8668 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 21:06:03,266 - root - INFO - lr: 5.7346e-06 gnorm: 1.22 [22:31:51< 1:58:45] +[titan] 2025-10-05 21:06:14,143 - root - INFO - step: 36775 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 21:06:14,143 - root - INFO - lr: 5.7324e-06 gnorm: 1.23 [22:32:02< 1:58:34] +[titan] 2025-10-05 21:06:25,098 - root - INFO - step: 36780 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 21:06:25,098 - root - INFO - lr: 5.7301e-06 gnorm: 1.22 [22:32:13< 1:58:23] +[titan] 2025-10-05 21:06:35,961 - root - INFO - step: 36785 loss: 1.8486 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6355 +[titan] 2025-10-05 21:06:35,961 - root - INFO - lr: 5.7279e-06 gnorm: 1.26 [22:32:24< 1:58:11] +[titan] 2025-10-05 21:06:46,824 - root - INFO - step: 36790 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 21:06:46,825 - root - INFO - lr: 5.7256e-06 gnorm: 1.26 [22:32:35< 1:58:00] +[titan] 2025-10-05 21:06:57,688 - root - INFO - step: 36795 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7115 +[titan] 2025-10-05 21:06:57,688 - root - INFO - lr: 5.7234e-06 gnorm: 1.23 [22:32:46< 1:57:49] +[titan] 2025-10-05 21:07:06,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:07:08,540 - root - INFO - step: 36800 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 21:07:08,541 - root - INFO - lr: 5.7211e-06 gnorm: 1.23 [22:32:56< 1:57:38] +[titan] 2025-10-05 21:07:19,425 - root - INFO - step: 36805 loss: 1.9493 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:07:19,425 - root - INFO - lr: 5.7189e-06 gnorm: 1.24 [22:33:07< 1:57:27] +[titan] 2025-10-05 21:07:30,382 - root - INFO - step: 36810 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:07:30,383 - root - INFO - lr: 5.7166e-06 gnorm: 1.23 [22:33:18< 1:57:16] +[titan] 2025-10-05 21:07:41,263 - root - INFO - step: 36815 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 21:07:41,263 - root - INFO - lr: 5.7144e-06 gnorm: 1.24 [22:33:29< 1:57:05] +[titan] 2025-10-05 21:07:52,120 - root - INFO - step: 36820 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 21:07:52,120 - root - INFO - lr: 5.7122e-06 gnorm: 1.21 [22:33:40< 1:56:54] +[titan] 2025-10-05 21:08:02,998 - root - INFO - step: 36825 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6712 +[titan] 2025-10-05 21:08:02,999 - root - INFO - lr: 5.7100e-06 gnorm: 1.24 [22:33:51< 1:56:43] +[titan] 2025-10-05 21:08:13,877 - root - INFO - step: 36830 loss: 1.9915 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 21:08:13,878 - root - INFO - lr: 5.7077e-06 gnorm: 1.31 [22:34:02< 1:56:32] +[titan] 2025-10-05 21:08:25,107 - root - INFO - step: 36835 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 29,180 tflops: 404.83 mfu: 40.93% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6650 +[titan] 2025-10-05 21:08:25,108 - root - INFO - lr: 5.7055e-06 gnorm: 1.20 [22:34:13< 1:56:21] +[titan] 2025-10-05 21:08:35,977 - root - INFO - step: 36840 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 21:08:35,977 - root - INFO - lr: 5.7033e-06 gnorm: 1.24 [22:34:24< 1:56:10] +[titan] 2025-10-05 21:08:46,865 - root - INFO - step: 36845 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 21:08:46,865 - root - INFO - lr: 5.7011e-06 gnorm: 1.24 [22:34:35< 1:55:59] +[titan] 2025-10-05 21:08:55,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:08:57,718 - root - INFO - step: 36850 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 21:08:57,718 - root - INFO - lr: 5.6989e-06 gnorm: 1.26 [22:34:46< 1:55:48] +[titan] 2025-10-05 21:09:08,595 - root - INFO - step: 36855 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7308 +[titan] 2025-10-05 21:09:08,595 - root - INFO - lr: 5.6967e-06 gnorm: 1.24 [22:34:57< 1:55:37] +[titan] 2025-10-05 21:09:19,469 - root - INFO - step: 36860 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:09:19,470 - root - INFO - lr: 5.6945e-06 gnorm: 1.27 [22:35:07< 1:55:26] +[titan] 2025-10-05 21:09:28,576 - root - INFO - Dumping profiler traces at step 36864 +[titan] 2025-10-05 21:09:28,618 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:09:30,837 - root - INFO - step: 36865 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 28,827 tflops: 399.93 mfu: 40.44% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 21:09:30,837 - root - INFO - lr: 5.6923e-06 gnorm: 1.23 [22:35:19< 1:55:15] +[titan] 2025-10-05 21:09:41,699 - root - INFO - step: 36870 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 21:09:41,699 - root - INFO - lr: 5.6901e-06 gnorm: 1.24 [22:35:30< 1:55:04] +[titan] 2025-10-05 21:09:52,574 - root - INFO - step: 36875 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 21:09:52,574 - root - INFO - lr: 5.6879e-06 gnorm: 1.24 [22:35:40< 1:54:53] +[titan] 2025-10-05 21:10:03,422 - root - INFO - step: 36880 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:10:03,422 - root - INFO - lr: 5.6857e-06 gnorm: 1.21 [22:35:51< 1:54:42] +[titan] 2025-10-05 21:10:14,288 - root - INFO - step: 36885 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 21:10:14,288 - root - INFO - lr: 5.6835e-06 gnorm: 1.23 [22:36:02< 1:54:31] +[titan] 2025-10-05 21:10:25,157 - root - INFO - step: 36890 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 21:10:25,157 - root - INFO - lr: 5.6813e-06 gnorm: 1.24 [22:36:13< 1:54:20] +[titan] 2025-10-05 21:10:36,097 - root - INFO - step: 36895 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 21:10:36,097 - root - INFO - lr: 5.6792e-06 gnorm: 1.24 [22:36:24< 1:54:09] +[titan] 2025-10-05 21:10:44,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:10:46,986 - root - INFO - step: 36900 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 21:10:46,987 - root - INFO - lr: 5.6770e-06 gnorm: 1.28 [22:36:35< 1:53:58] +[titan] 2025-10-05 21:10:57,833 - root - INFO - step: 36905 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 21:10:57,833 - root - INFO - lr: 5.6748e-06 gnorm: 1.23 [22:36:46< 1:53:47] +[titan] 2025-10-05 21:11:08,682 - root - INFO - step: 36910 loss: 1.8557 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6407 +[titan] 2025-10-05 21:11:08,682 - root - INFO - lr: 5.6726e-06 gnorm: 1.19 [22:36:57< 1:53:36] +[titan] 2025-10-05 21:11:19,531 - root - INFO - step: 36915 loss: 1.8896 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:11:19,532 - root - INFO - lr: 5.6705e-06 gnorm: 1.18 [22:37:07< 1:53:24] +[titan] 2025-10-05 21:11:30,448 - root - INFO - step: 36920 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:11:30,449 - root - INFO - lr: 5.6683e-06 gnorm: 1.24 [22:37:18< 1:53:13] +[titan] 2025-10-05 21:11:41,323 - root - INFO - step: 36925 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 21:11:41,323 - root - INFO - lr: 5.6662e-06 gnorm: 1.26 [22:37:29< 1:53:02] +[titan] 2025-10-05 21:11:52,243 - root - INFO - step: 36930 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6917 +[titan] 2025-10-05 21:11:52,243 - root - INFO - lr: 5.6640e-06 gnorm: 1.28 [22:37:40< 1:52:51] +[titan] 2025-10-05 21:12:03,124 - root - INFO - step: 36935 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6806 +[titan] 2025-10-05 21:12:03,124 - root - INFO - lr: 5.6619e-06 gnorm: 1.20 [22:37:51< 1:52:40] +[titan] 2025-10-05 21:12:14,002 - root - INFO - step: 36940 loss: 1.9158 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6952 +[titan] 2025-10-05 21:12:14,002 - root - INFO - lr: 5.6597e-06 gnorm: 1.26 [22:38:02< 1:52:29] +[titan] 2025-10-05 21:12:24,869 - root - INFO - step: 36945 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 21:12:24,870 - root - INFO - lr: 5.6576e-06 gnorm: 1.21 [22:38:13< 1:52:18] +[titan] 2025-10-05 21:12:33,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:12:35,810 - root - INFO - step: 36950 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 21:12:35,810 - root - INFO - lr: 5.6554e-06 gnorm: 1.24 [22:38:24< 1:52:07] +[titan] 2025-10-05 21:12:46,684 - root - INFO - step: 36955 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7035 +[titan] 2025-10-05 21:12:46,685 - root - INFO - lr: 5.6533e-06 gnorm: 1.21 [22:38:35< 1:51:56] +[titan] 2025-10-05 21:12:57,552 - root - INFO - step: 36960 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 21:12:57,552 - root - INFO - lr: 5.6512e-06 gnorm: 1.24 [22:38:45< 1:51:45] +[titan] 2025-10-05 21:13:08,463 - root - INFO - step: 36965 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7952 +[titan] 2025-10-05 21:13:08,463 - root - INFO - lr: 5.6490e-06 gnorm: 1.24 [22:38:56< 1:51:34] +[titan] 2025-10-05 21:13:19,335 - root - INFO - step: 36970 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:19,336 - root - INFO - lr: 5.6469e-06 gnorm: 1.21 [22:39:07< 1:51:23] +[titan] 2025-10-05 21:13:30,256 - root - INFO - step: 36975 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:30,256 - root - INFO - lr: 5.6448e-06 gnorm: 1.28 [22:39:18< 1:51:12] +[titan] 2025-10-05 21:13:41,127 - root - INFO - step: 36980 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:13:41,128 - root - INFO - lr: 5.6427e-06 gnorm: 1.23 [22:39:29< 1:51:01] +[titan] 2025-10-05 21:13:51,994 - root - INFO - step: 36985 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:13:51,995 - root - INFO - lr: 5.6405e-06 gnorm: 1.29 [22:39:40< 1:50:50] +[titan] 2025-10-05 21:14:02,859 - root - INFO - step: 36990 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7044 +[titan] 2025-10-05 21:14:02,859 - root - INFO - lr: 5.6384e-06 gnorm: 1.24 [22:39:51< 1:50:39] +[titan] 2025-10-05 21:14:13,749 - root - INFO - step: 36995 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6987 +[titan] 2025-10-05 21:14:13,749 - root - INFO - lr: 5.6363e-06 gnorm: 1.22 [22:40:02< 1:50:28] +[titan] 2025-10-05 21:14:22,444 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:14:24,631 - root - INFO - step: 37000 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6388 +[titan] 2025-10-05 21:14:24,631 - root - INFO - lr: 5.6342e-06 gnorm: 1.20 [22:40:13< 1:50:17] +[titan] 2025-10-05 21:14:35,570 - root - INFO - step: 37005 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 21:14:35,570 - root - INFO - lr: 5.6321e-06 gnorm: 1.26 [22:40:23< 1:50:06] +[titan] 2025-10-05 21:14:46,450 - root - INFO - step: 37010 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 21:14:46,450 - root - INFO - lr: 5.6300e-06 gnorm: 1.19 [22:40:34< 1:49:55] +[titan] 2025-10-05 21:14:57,328 - root - INFO - step: 37015 loss: 1.9312 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7076 +[titan] 2025-10-05 21:14:57,329 - root - INFO - lr: 5.6279e-06 gnorm: 1.27 [22:40:45< 1:49:44] +[titan] 2025-10-05 21:15:08,191 - root - INFO - step: 37020 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 21:15:08,192 - root - INFO - lr: 5.6258e-06 gnorm: 1.28 [22:40:56< 1:49:33] +[titan] 2025-10-05 21:15:19,080 - root - INFO - step: 37025 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:15:19,080 - root - INFO - lr: 5.6237e-06 gnorm: 1.20 [22:41:07< 1:49:22] +[titan] 2025-10-05 21:15:29,953 - root - INFO - step: 37030 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 21:15:29,953 - root - INFO - lr: 5.6216e-06 gnorm: 1.21 [22:41:18< 1:49:11] +[titan] 2025-10-05 21:15:40,885 - root - INFO - step: 37035 loss: 1.8738 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6571 +[titan] 2025-10-05 21:15:40,885 - root - INFO - lr: 5.6196e-06 gnorm: 1.21 [22:41:29< 1:48:59] +[titan] 2025-10-05 21:15:51,738 - root - INFO - step: 37040 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6703 +[titan] 2025-10-05 21:15:51,738 - root - INFO - lr: 5.6175e-06 gnorm: 1.25 [22:41:40< 1:48:48] +[titan] 2025-10-05 21:16:02,623 - root - INFO - step: 37045 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:16:02,624 - root - INFO - lr: 5.6154e-06 gnorm: 1.21 [22:41:51< 1:48:37] +[titan] 2025-10-05 21:16:11,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:16:13,511 - root - INFO - step: 37050 loss: 1.9092 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 21:16:13,511 - root - INFO - lr: 5.6133e-06 gnorm: 1.23 [22:42:01< 1:48:26] +[titan] 2025-10-05 21:16:24,393 - root - INFO - step: 37055 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 21:16:24,393 - root - INFO - lr: 5.6113e-06 gnorm: 1.24 [22:42:12< 1:48:15] +[titan] 2025-10-05 21:16:35,329 - root - INFO - step: 37060 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:16:35,329 - root - INFO - lr: 5.6092e-06 gnorm: 1.25 [22:42:23< 1:48:04] +[titan] 2025-10-05 21:16:46,199 - root - INFO - step: 37065 loss: 1.9535 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:16:46,199 - root - INFO - lr: 5.6071e-06 gnorm: 1.27 [22:42:34< 1:47:53] +[titan] 2025-10-05 21:16:57,064 - root - INFO - step: 37070 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 21:16:57,064 - root - INFO - lr: 5.6051e-06 gnorm: 1.24 [22:42:45< 1:47:42] +[titan] 2025-10-05 21:17:07,940 - root - INFO - step: 37075 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7181 +[titan] 2025-10-05 21:17:07,940 - root - INFO - lr: 5.6030e-06 gnorm: 1.23 [22:42:56< 1:47:31] +[titan] 2025-10-05 21:17:18,806 - root - INFO - step: 37080 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 21:17:18,807 - root - INFO - lr: 5.6010e-06 gnorm: 1.20 [22:43:07< 1:47:20] +[titan] 2025-10-05 21:17:29,692 - root - INFO - step: 37085 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 21:17:29,692 - root - INFO - lr: 5.5989e-06 gnorm: 1.21 [22:43:18< 1:47:09] +[titan] 2025-10-05 21:17:40,647 - root - INFO - step: 37090 loss: 1.9429 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:17:40,648 - root - INFO - lr: 5.5969e-06 gnorm: 1.27 [22:43:29< 1:46:58] +[titan] 2025-10-05 21:17:51,517 - root - INFO - step: 37095 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 21:17:51,517 - root - INFO - lr: 5.5949e-06 gnorm: 1.27 [22:43:39< 1:46:47] +[titan] 2025-10-05 21:18:00,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:18:02,399 - root - INFO - step: 37100 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6612 +[titan] 2025-10-05 21:18:02,399 - root - INFO - lr: 5.5928e-06 gnorm: 1.26 [22:43:50< 1:46:36] +[titan] 2025-10-05 21:18:13,285 - root - INFO - step: 37105 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6690 +[titan] 2025-10-05 21:18:13,286 - root - INFO - lr: 5.5908e-06 gnorm: 1.20 [22:44:01< 1:46:25] +[titan] 2025-10-05 21:18:24,145 - root - INFO - step: 37110 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 21:18:24,145 - root - INFO - lr: 5.5888e-06 gnorm: 1.25 [22:44:12< 1:46:14] +[titan] 2025-10-05 21:18:35,081 - root - INFO - step: 37115 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 21:18:35,081 - root - INFO - lr: 5.5867e-06 gnorm: 1.20 [22:44:23< 1:46:03] +[titan] 2025-10-05 21:18:45,955 - root - INFO - step: 37120 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:18:45,955 - root - INFO - lr: 5.5847e-06 gnorm: 1.24 [22:44:34< 1:45:52] +[titan] 2025-10-05 21:18:56,847 - root - INFO - step: 37125 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7220 +[titan] 2025-10-05 21:18:56,848 - root - INFO - lr: 5.5827e-06 gnorm: 1.27 [22:44:45< 1:45:41] +[titan] 2025-10-05 21:19:07,708 - root - INFO - step: 37130 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 21:19:07,709 - root - INFO - lr: 5.5807e-06 gnorm: 1.20 [22:44:56< 1:45:30] +[titan] 2025-10-05 21:19:18,570 - root - INFO - step: 37135 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 21:19:18,570 - root - INFO - lr: 5.5787e-06 gnorm: 1.23 [22:45:06< 1:45:19] +[titan] 2025-10-05 21:19:29,432 - root - INFO - step: 37140 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 21:19:29,432 - root - INFO - lr: 5.5766e-06 gnorm: 1.25 [22:45:17< 1:45:08] +[titan] 2025-10-05 21:19:40,328 - root - INFO - step: 37145 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 21:19:40,328 - root - INFO - lr: 5.5746e-06 gnorm: 1.28 [22:45:28< 1:44:57] +[titan] 2025-10-05 21:19:48,998 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:19:51,178 - root - INFO - step: 37150 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 21:19:51,178 - root - INFO - lr: 5.5726e-06 gnorm: 1.28 [22:45:39< 1:44:46] +[titan] 2025-10-05 21:20:02,074 - root - INFO - step: 37155 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:20:02,074 - root - INFO - lr: 5.5706e-06 gnorm: 1.22 [22:45:50< 1:44:35] +[titan] 2025-10-05 21:20:12,936 - root - INFO - step: 37160 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 21:20:12,937 - root - INFO - lr: 5.5686e-06 gnorm: 1.25 [22:46:01< 1:44:23] +[titan] 2025-10-05 21:20:23,793 - root - INFO - step: 37165 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 21:20:23,793 - root - INFO - lr: 5.5666e-06 gnorm: 1.26 [22:46:12< 1:44:12] +[titan] 2025-10-05 21:20:34,674 - root - INFO - step: 37170 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:20:34,674 - root - INFO - lr: 5.5647e-06 gnorm: 1.21 [22:46:23< 1:44:01] +[titan] 2025-10-05 21:20:45,596 - root - INFO - step: 37175 loss: 1.9773 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 21:20:45,596 - root - INFO - lr: 5.5627e-06 gnorm: 1.27 [22:46:33< 1:43:50] +[titan] 2025-10-05 21:20:56,483 - root - INFO - step: 37180 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:20:56,484 - root - INFO - lr: 5.5607e-06 gnorm: 1.26 [22:46:44< 1:43:39] +[titan] 2025-10-05 21:21:07,391 - root - INFO - step: 37185 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:21:07,392 - root - INFO - lr: 5.5587e-06 gnorm: 1.24 [22:46:55< 1:43:28] +[titan] 2025-10-05 21:21:18,272 - root - INFO - step: 37190 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 21:21:18,272 - root - INFO - lr: 5.5567e-06 gnorm: 1.22 [22:47:06< 1:43:17] +[titan] 2025-10-05 21:21:29,155 - root - INFO - step: 37195 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:21:29,155 - root - INFO - lr: 5.5548e-06 gnorm: 1.25 [22:47:17< 1:43:06] +[titan] 2025-10-05 21:21:37,896 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:21:40,076 - root - INFO - step: 37200 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:21:40,076 - root - INFO - lr: 5.5528e-06 gnorm: 1.22 [22:47:28< 1:42:55] +[titan] 2025-10-05 21:21:50,943 - root - INFO - step: 37205 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 21:21:50,944 - root - INFO - lr: 5.5508e-06 gnorm: 1.21 [22:47:39< 1:42:44] +[titan] 2025-10-05 21:22:01,837 - root - INFO - step: 37210 loss: 1.9065 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 21:22:01,837 - root - INFO - lr: 5.5489e-06 gnorm: 1.21 [22:47:50< 1:42:33] +[titan] 2025-10-05 21:22:12,716 - root - INFO - step: 37215 loss: 1.8559 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6413 +[titan] 2025-10-05 21:22:12,716 - root - INFO - lr: 5.5469e-06 gnorm: 1.23 [22:48:01< 1:42:22] +[titan] 2025-10-05 21:22:23,615 - root - INFO - step: 37220 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 21:22:23,615 - root - INFO - lr: 5.5450e-06 gnorm: 1.24 [22:48:11< 1:42:11] +[titan] 2025-10-05 21:22:34,482 - root - INFO - step: 37225 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 21:22:34,483 - root - INFO - lr: 5.5430e-06 gnorm: 1.26 [22:48:22< 1:42:00] +[titan] 2025-10-05 21:22:45,400 - root - INFO - step: 37230 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 21:22:45,400 - root - INFO - lr: 5.5411e-06 gnorm: 1.23 [22:48:33< 1:41:49] +[titan] 2025-10-05 21:22:56,271 - root - INFO - step: 37235 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 21:22:56,272 - root - INFO - lr: 5.5391e-06 gnorm: 1.23 [22:48:44< 1:41:38] +[titan] 2025-10-05 21:23:07,114 - root - INFO - step: 37240 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:23:07,114 - root - INFO - lr: 5.5372e-06 gnorm: 1.23 [22:48:55< 1:41:27] +[titan] 2025-10-05 21:23:17,969 - root - INFO - step: 37245 loss: 1.8827 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 21:23:17,970 - root - INFO - lr: 5.5352e-06 gnorm: 1.23 [22:49:06< 1:41:16] +[titan] 2025-10-05 21:23:26,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:23:28,858 - root - INFO - step: 37250 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6816 +[titan] 2025-10-05 21:23:28,858 - root - INFO - lr: 5.5333e-06 gnorm: 1.21 [22:49:17< 1:41:05] +[titan] 2025-10-05 21:23:39,774 - root - INFO - step: 37255 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 21:23:39,774 - root - INFO - lr: 5.5314e-06 gnorm: 1.21 [22:49:28< 1:40:54] +[titan] 2025-10-05 21:23:50,632 - root - INFO - step: 37260 loss: 1.8928 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 21:23:50,632 - root - INFO - lr: 5.5295e-06 gnorm: 1.20 [22:49:39< 1:40:43] +[titan] 2025-10-05 21:24:01,494 - root - INFO - step: 37265 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 21:24:01,495 - root - INFO - lr: 5.5275e-06 gnorm: 1.21 [22:49:49< 1:40:32] +[titan] 2025-10-05 21:24:12,333 - root - INFO - step: 37270 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 21:24:12,333 - root - INFO - lr: 5.5256e-06 gnorm: 1.24 [22:50:00< 1:40:21] +[titan] 2025-10-05 21:24:23,189 - root - INFO - step: 37275 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 21:24:23,189 - root - INFO - lr: 5.5237e-06 gnorm: 1.22 [22:50:11< 1:40:10] +[titan] 2025-10-05 21:24:34,040 - root - INFO - step: 37280 loss: 1.8747 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 21:24:34,041 - root - INFO - lr: 5.5218e-06 gnorm: 1.19 [22:50:22< 1:39:59] +[titan] 2025-10-05 21:24:44,965 - root - INFO - step: 37285 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6500 +[titan] 2025-10-05 21:24:44,965 - root - INFO - lr: 5.5199e-06 gnorm: 1.23 [22:50:33< 1:39:48] +[titan] 2025-10-05 21:24:55,829 - root - INFO - step: 37290 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6803 +[titan] 2025-10-05 21:24:55,829 - root - INFO - lr: 5.5180e-06 gnorm: 1.24 [22:50:44< 1:39:36] +[titan] 2025-10-05 21:25:06,686 - root - INFO - step: 37295 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 21:25:06,686 - root - INFO - lr: 5.5161e-06 gnorm: 1.23 [22:50:55< 1:39:25] +[titan] 2025-10-05 21:25:15,357 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:25:17,530 - root - INFO - step: 37300 loss: 1.9230 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 21:25:17,530 - root - INFO - lr: 5.5142e-06 gnorm: 1.29 [22:51:05< 1:39:14] +[titan] 2025-10-05 21:25:28,378 - root - INFO - step: 37305 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 21:25:28,378 - root - INFO - lr: 5.5123e-06 gnorm: 1.28 [22:51:16< 1:39:03] +[titan] 2025-10-05 21:25:39,211 - root - INFO - step: 37310 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 21:25:39,211 - root - INFO - lr: 5.5104e-06 gnorm: 1.28 [22:51:27< 1:38:52] +[titan] 2025-10-05 21:25:50,153 - root - INFO - step: 37315 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 21:25:50,153 - root - INFO - lr: 5.5085e-06 gnorm: 1.24 [22:51:38< 1:38:41] +[titan] 2025-10-05 21:26:01,007 - root - INFO - step: 37320 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 21:26:01,007 - root - INFO - lr: 5.5066e-06 gnorm: 1.22 [22:51:49< 1:38:30] +[titan] 2025-10-05 21:26:11,849 - root - INFO - step: 37325 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7401 +[titan] 2025-10-05 21:26:11,849 - root - INFO - lr: 5.5047e-06 gnorm: 1.28 [22:52:00< 1:38:19] +[titan] 2025-10-05 21:26:22,692 - root - INFO - step: 37330 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 21:26:22,692 - root - INFO - lr: 5.5028e-06 gnorm: 1.20 [22:52:11< 1:38:08] +[titan] 2025-10-05 21:26:33,566 - root - INFO - step: 37335 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 21:26:33,566 - root - INFO - lr: 5.5010e-06 gnorm: 1.21 [22:52:21< 1:37:57] +[titan] 2025-10-05 21:26:44,447 - root - INFO - step: 37340 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 21:26:44,447 - root - INFO - lr: 5.4991e-06 gnorm: 1.30 [22:52:32< 1:37:46] +[titan] 2025-10-05 21:26:55,353 - root - INFO - step: 37345 loss: 1.8670 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6522 +[titan] 2025-10-05 21:26:55,354 - root - INFO - lr: 5.4972e-06 gnorm: 1.19 [22:52:43< 1:37:35] +[titan] 2025-10-05 21:27:04,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:27:06,210 - root - INFO - step: 37350 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 21:27:06,210 - root - INFO - lr: 5.4954e-06 gnorm: 1.23 [22:52:54< 1:37:24] +[titan] 2025-10-05 21:27:17,097 - root - INFO - step: 37355 loss: 1.8844 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 21:27:17,097 - root - INFO - lr: 5.4935e-06 gnorm: 1.22 [22:53:05< 1:37:13] +[titan] 2025-10-05 21:27:27,968 - root - INFO - step: 37360 loss: 1.8981 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:27:27,968 - root - INFO - lr: 5.4917e-06 gnorm: 1.24 [22:53:16< 1:37:02] +[titan] 2025-10-05 21:27:38,788 - root - INFO - step: 37365 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 21:27:38,788 - root - INFO - lr: 5.4898e-06 gnorm: 1.22 [22:53:27< 1:36:51] +[titan] 2025-10-05 21:27:49,689 - root - INFO - step: 37370 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 21:27:49,689 - root - INFO - lr: 5.4880e-06 gnorm: 1.26 [22:53:38< 1:36:40] +[titan] 2025-10-05 21:28:00,629 - root - INFO - step: 37375 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 21:28:00,630 - root - INFO - lr: 5.4861e-06 gnorm: 1.28 [22:53:48< 1:36:29] +[titan] 2025-10-05 21:28:03,004 - root - INFO - Dumping profiler traces at step 37376 +[titan] 2025-10-05 21:28:03,043 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:28:11,751 - root - INFO - step: 37380 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 29,465 tflops: 408.78 mfu: 41.33% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 21:28:11,751 - root - INFO - lr: 5.4843e-06 gnorm: 1.26 [22:54:00< 1:36:18] +[titan] 2025-10-05 21:28:22,605 - root - INFO - step: 37385 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 21:28:22,605 - root - INFO - lr: 5.4824e-06 gnorm: 1.23 [22:54:10< 1:36:07] +[titan] 2025-10-05 21:28:33,443 - root - INFO - step: 37390 loss: 1.8450 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6318 +[titan] 2025-10-05 21:28:33,443 - root - INFO - lr: 5.4806e-06 gnorm: 1.24 [22:54:21< 1:35:56] +[titan] 2025-10-05 21:28:44,325 - root - INFO - step: 37395 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:28:44,326 - root - INFO - lr: 5.4788e-06 gnorm: 1.22 [22:54:32< 1:35:45] +[titan] 2025-10-05 21:28:52,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:28:55,171 - root - INFO - step: 37400 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:28:55,171 - root - INFO - lr: 5.4769e-06 gnorm: 1.21 [22:54:43< 1:35:34] +[titan] 2025-10-05 21:29:06,005 - root - INFO - step: 37405 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 21:29:06,006 - root - INFO - lr: 5.4751e-06 gnorm: 1.21 [22:54:54< 1:35:23] +[titan] 2025-10-05 21:29:16,874 - root - INFO - step: 37410 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 21:29:16,874 - root - INFO - lr: 5.4733e-06 gnorm: 1.21 [22:55:05< 1:35:12] +[titan] 2025-10-05 21:29:27,686 - root - INFO - step: 37415 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 21:29:27,686 - root - INFO - lr: 5.4715e-06 gnorm: 1.19 [22:55:16< 1:35:01] +[titan] 2025-10-05 21:29:38,526 - root - INFO - step: 37420 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:29:38,526 - root - INFO - lr: 5.4696e-06 gnorm: 1.22 [22:55:26< 1:34:49] +[titan] 2025-10-05 21:29:49,408 - root - INFO - step: 37425 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 21:29:49,408 - root - INFO - lr: 5.4678e-06 gnorm: 1.25 [22:55:37< 1:34:38] +[titan] 2025-10-05 21:30:00,250 - root - INFO - step: 37430 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 21:30:00,250 - root - INFO - lr: 5.4660e-06 gnorm: 1.23 [22:55:48< 1:34:27] +[titan] 2025-10-05 21:30:11,084 - root - INFO - step: 37435 loss: 1.9022 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:30:11,084 - root - INFO - lr: 5.4642e-06 gnorm: 1.27 [22:55:59< 1:34:16] +[titan] 2025-10-05 21:30:21,909 - root - INFO - step: 37440 loss: 1.9502 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:30:21,909 - root - INFO - lr: 5.4624e-06 gnorm: 1.24 [22:56:10< 1:34:05] +[titan] 2025-10-05 21:30:32,791 - root - INFO - step: 37445 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7483 +[titan] 2025-10-05 21:30:32,792 - root - INFO - lr: 5.4606e-06 gnorm: 1.30 [22:56:21< 1:33:54] +[titan] 2025-10-05 21:30:41,432 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:30:43,606 - root - INFO - step: 37450 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 21:30:43,606 - root - INFO - lr: 5.4588e-06 gnorm: 1.25 [22:56:31< 1:33:43] +[titan] 2025-10-05 21:30:54,447 - root - INFO - step: 37455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 21:30:54,447 - root - INFO - lr: 5.4570e-06 gnorm: 1.27 [22:56:42< 1:33:32] +[titan] 2025-10-05 21:31:05,288 - root - INFO - step: 37460 loss: 1.8916 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:31:05,288 - root - INFO - lr: 5.4552e-06 gnorm: 1.22 [22:56:53< 1:33:21] +[titan] 2025-10-05 21:31:16,146 - root - INFO - step: 37465 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 21:31:16,146 - root - INFO - lr: 5.4535e-06 gnorm: 1.26 [22:57:04< 1:33:10] +[titan] 2025-10-05 21:31:26,988 - root - INFO - step: 37470 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 21:31:26,988 - root - INFO - lr: 5.4517e-06 gnorm: 1.26 [22:57:15< 1:32:59] +[titan] 2025-10-05 21:31:37,863 - root - INFO - step: 37475 loss: 1.8457 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2133 global_avg_mtp_loss: 1.6324 +[titan] 2025-10-05 21:31:37,863 - root - INFO - lr: 5.4499e-06 gnorm: 1.20 [22:57:26< 1:32:48] +[titan] 2025-10-05 21:31:48,716 - root - INFO - step: 37480 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6511 +[titan] 2025-10-05 21:31:48,717 - root - INFO - lr: 5.4481e-06 gnorm: 1.22 [22:57:37< 1:32:37] +[titan] 2025-10-05 21:31:59,577 - root - INFO - step: 37485 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6883 +[titan] 2025-10-05 21:31:59,577 - root - INFO - lr: 5.4463e-06 gnorm: 1.26 [22:57:47< 1:32:26] +[titan] 2025-10-05 21:32:10,434 - root - INFO - step: 37490 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 21:32:10,434 - root - INFO - lr: 5.4446e-06 gnorm: 1.24 [22:57:58< 1:32:15] +[titan] 2025-10-05 21:32:21,290 - root - INFO - step: 37495 loss: 1.9993 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 21:32:21,290 - root - INFO - lr: 5.4428e-06 gnorm: 1.24 [22:58:09< 1:32:04] +[titan] 2025-10-05 21:32:29,977 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:32:32,151 - root - INFO - step: 37500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 21:32:32,151 - root - INFO - lr: 5.4411e-06 gnorm: 1.29 [22:58:20< 1:31:53] +[titan] 2025-10-05 21:32:43,013 - root - INFO - step: 37505 loss: 1.8923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:32:43,014 - root - INFO - lr: 5.4393e-06 gnorm: 1.21 [22:58:31< 1:31:42] +[titan] 2025-10-05 21:32:53,854 - root - INFO - step: 37510 loss: 1.9490 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7237 +[titan] 2025-10-05 21:32:53,854 - root - INFO - lr: 5.4375e-06 gnorm: 1.21 [22:58:42< 1:31:31] +[titan] 2025-10-05 21:33:04,724 - root - INFO - step: 37515 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 21:33:04,724 - root - INFO - lr: 5.4358e-06 gnorm: 1.24 [22:58:53< 1:31:20] +[titan] 2025-10-05 21:33:15,605 - root - INFO - step: 37520 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:33:15,605 - root - INFO - lr: 5.4341e-06 gnorm: 1.22 [22:59:03< 1:31:09] +[titan] 2025-10-05 21:33:26,465 - root - INFO - step: 37525 loss: 1.8732 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6563 +[titan] 2025-10-05 21:33:26,465 - root - INFO - lr: 5.4323e-06 gnorm: 1.23 [22:59:14< 1:30:58] +[titan] 2025-10-05 21:33:37,315 - root - INFO - step: 37530 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 21:33:37,315 - root - INFO - lr: 5.4306e-06 gnorm: 1.23 [22:59:25< 1:30:47] +[titan] 2025-10-05 21:33:48,179 - root - INFO - step: 37535 loss: 1.8524 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6376 +[titan] 2025-10-05 21:33:48,179 - root - INFO - lr: 5.4288e-06 gnorm: 1.25 [22:59:36< 1:30:36] +[titan] 2025-10-05 21:33:59,032 - root - INFO - step: 37540 loss: 1.8890 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 21:33:59,032 - root - INFO - lr: 5.4271e-06 gnorm: 1.22 [22:59:47< 1:30:25] +[titan] 2025-10-05 21:34:09,894 - root - INFO - step: 37545 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:34:09,894 - root - INFO - lr: 5.4254e-06 gnorm: 1.24 [22:59:58< 1:30:14] +[titan] 2025-10-05 21:34:18,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:34:20,750 - root - INFO - step: 37550 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6861 +[titan] 2025-10-05 21:34:20,750 - root - INFO - lr: 5.4236e-06 gnorm: 1.24 [23:00:09< 1:30:02] +[titan] 2025-10-05 21:34:31,630 - root - INFO - step: 37555 loss: 1.9520 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 21:34:31,630 - root - INFO - lr: 5.4219e-06 gnorm: 1.21 [23:00:19< 1:29:51] +[titan] 2025-10-05 21:34:42,476 - root - INFO - step: 37560 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7291 +[titan] 2025-10-05 21:34:42,476 - root - INFO - lr: 5.4202e-06 gnorm: 1.23 [23:00:30< 1:29:40] +[titan] 2025-10-05 21:34:53,333 - root - INFO - step: 37565 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 21:34:53,333 - root - INFO - lr: 5.4185e-06 gnorm: 1.26 [23:00:41< 1:29:29] +[titan] 2025-10-05 21:35:04,184 - root - INFO - step: 37570 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 21:35:04,184 - root - INFO - lr: 5.4168e-06 gnorm: 1.30 [23:00:52< 1:29:18] +[titan] 2025-10-05 21:35:15,037 - root - INFO - step: 37575 loss: 1.8778 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6614 +[titan] 2025-10-05 21:35:15,037 - root - INFO - lr: 5.4151e-06 gnorm: 1.21 [23:01:03< 1:29:07] +[titan] 2025-10-05 21:35:25,912 - root - INFO - step: 37580 loss: 1.8864 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6681 +[titan] 2025-10-05 21:35:25,913 - root - INFO - lr: 5.4134e-06 gnorm: 1.23 [23:01:14< 1:28:56] +[titan] 2025-10-05 21:35:36,806 - root - INFO - step: 37585 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 21:35:36,807 - root - INFO - lr: 5.4117e-06 gnorm: 1.25 [23:01:25< 1:28:45] +[titan] 2025-10-05 21:35:47,715 - root - INFO - step: 37590 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 21:35:47,716 - root - INFO - lr: 5.4100e-06 gnorm: 1.22 [23:01:36< 1:28:34] +[titan] 2025-10-05 21:35:58,598 - root - INFO - step: 37595 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:35:58,598 - root - INFO - lr: 5.4083e-06 gnorm: 1.20 [23:01:46< 1:28:23] +[titan] 2025-10-05 21:36:07,282 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:36:09,461 - root - INFO - step: 37600 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7104 +[titan] 2025-10-05 21:36:09,461 - root - INFO - lr: 5.4066e-06 gnorm: 1.22 [23:01:57< 1:28:12] +[titan] 2025-10-05 21:36:20,345 - root - INFO - step: 37605 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 21:36:20,345 - root - INFO - lr: 5.4049e-06 gnorm: 1.28 [23:02:08< 1:28:01] +[titan] 2025-10-05 21:36:31,206 - root - INFO - step: 37610 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 21:36:31,207 - root - INFO - lr: 5.4032e-06 gnorm: 1.20 [23:02:19< 1:27:50] +[titan] 2025-10-05 21:36:42,084 - root - INFO - step: 37615 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:36:42,084 - root - INFO - lr: 5.4015e-06 gnorm: 1.28 [23:02:30< 1:27:39] +[titan] 2025-10-05 21:36:52,956 - root - INFO - step: 37620 loss: 2.0281 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 21:36:52,956 - root - INFO - lr: 5.3999e-06 gnorm: 1.25 [23:02:41< 1:27:28] +[titan] 2025-10-05 21:37:03,800 - root - INFO - step: 37625 loss: 1.8956 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6758 +[titan] 2025-10-05 21:37:03,800 - root - INFO - lr: 5.3982e-06 gnorm: 1.23 [23:02:52< 1:27:17] +[titan] 2025-10-05 21:37:14,649 - root - INFO - step: 37630 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 21:37:14,649 - root - INFO - lr: 5.3965e-06 gnorm: 1.33 [23:03:02< 1:27:06] +[titan] 2025-10-05 21:37:25,497 - root - INFO - step: 37635 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 21:37:25,497 - root - INFO - lr: 5.3948e-06 gnorm: 1.24 [23:03:13< 1:26:55] +[titan] 2025-10-05 21:37:36,353 - root - INFO - step: 37640 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 21:37:36,353 - root - INFO - lr: 5.3932e-06 gnorm: 1.22 [23:03:24< 1:26:44] +[titan] 2025-10-05 21:37:47,208 - root - INFO - step: 37645 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7232 +[titan] 2025-10-05 21:37:47,208 - root - INFO - lr: 5.3915e-06 gnorm: 1.27 [23:03:35< 1:26:33] +[titan] 2025-10-05 21:37:55,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:37:58,091 - root - INFO - step: 37650 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6691 +[titan] 2025-10-05 21:37:58,091 - root - INFO - lr: 5.3899e-06 gnorm: 1.23 [23:03:46< 1:26:22] +[titan] 2025-10-05 21:38:08,977 - root - INFO - step: 37655 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 21:38:08,977 - root - INFO - lr: 5.3882e-06 gnorm: 1.28 [23:03:57< 1:26:11] +[titan] 2025-10-05 21:38:19,857 - root - INFO - step: 37660 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 21:38:19,858 - root - INFO - lr: 5.3866e-06 gnorm: 1.29 [23:04:08< 1:26:00] +[titan] 2025-10-05 21:38:30,712 - root - INFO - step: 37665 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6720 +[titan] 2025-10-05 21:38:30,712 - root - INFO - lr: 5.3849e-06 gnorm: 1.23 [23:04:19< 1:25:49] +[titan] 2025-10-05 21:38:41,564 - root - INFO - step: 37670 loss: 1.8372 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6256 +[titan] 2025-10-05 21:38:41,564 - root - INFO - lr: 5.3833e-06 gnorm: 1.21 [23:04:29< 1:25:38] +[titan] 2025-10-05 21:38:52,429 - root - INFO - step: 37675 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 21:38:52,429 - root - INFO - lr: 5.3816e-06 gnorm: 1.26 [23:04:40< 1:25:27] +[titan] 2025-10-05 21:39:03,314 - root - INFO - step: 37680 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 21:39:03,314 - root - INFO - lr: 5.3800e-06 gnorm: 1.28 [23:04:51< 1:25:16] +[titan] 2025-10-05 21:39:14,212 - root - INFO - step: 37685 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 21:39:14,212 - root - INFO - lr: 5.3784e-06 gnorm: 1.22 [23:05:02< 1:25:05] +[titan] 2025-10-05 21:39:25,089 - root - INFO - step: 37690 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 21:39:25,089 - root - INFO - lr: 5.3767e-06 gnorm: 1.24 [23:05:13< 1:24:53] +[titan] 2025-10-05 21:39:35,965 - root - INFO - step: 37695 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 21:39:35,965 - root - INFO - lr: 5.3751e-06 gnorm: 1.26 [23:05:24< 1:24:42] +[titan] 2025-10-05 21:39:44,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:39:46,832 - root - INFO - step: 37700 loss: 1.8803 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:39:46,832 - root - INFO - lr: 5.3735e-06 gnorm: 1.24 [23:05:35< 1:24:31] +[titan] 2025-10-05 21:39:57,708 - root - INFO - step: 37705 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:39:57,708 - root - INFO - lr: 5.3719e-06 gnorm: 1.24 [23:05:46< 1:24:20] +[titan] 2025-10-05 21:40:08,584 - root - INFO - step: 37710 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6486 +[titan] 2025-10-05 21:40:08,584 - root - INFO - lr: 5.3703e-06 gnorm: 1.23 [23:05:56< 1:24:09] +[titan] 2025-10-05 21:40:19,491 - root - INFO - step: 37715 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 21:40:19,491 - root - INFO - lr: 5.3687e-06 gnorm: 1.24 [23:06:07< 1:23:58] +[titan] 2025-10-05 21:40:30,374 - root - INFO - step: 37720 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 21:40:30,375 - root - INFO - lr: 5.3671e-06 gnorm: 1.21 [23:06:18< 1:23:47] +[titan] 2025-10-05 21:40:41,250 - root - INFO - step: 37725 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 21:40:41,250 - root - INFO - lr: 5.3654e-06 gnorm: 1.21 [23:06:29< 1:23:36] +[titan] 2025-10-05 21:40:52,074 - root - INFO - step: 37730 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 21:40:52,075 - root - INFO - lr: 5.3638e-06 gnorm: 1.23 [23:06:40< 1:23:25] +[titan] 2025-10-05 21:41:02,927 - root - INFO - step: 37735 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 21:41:02,928 - root - INFO - lr: 5.3622e-06 gnorm: 1.24 [23:06:51< 1:23:14] +[titan] 2025-10-05 21:41:13,783 - root - INFO - step: 37740 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 21:41:13,783 - root - INFO - lr: 5.3607e-06 gnorm: 1.24 [23:07:02< 1:23:03] +[titan] 2025-10-05 21:41:24,647 - root - INFO - step: 37745 loss: 1.8905 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 21:41:24,647 - root - INFO - lr: 5.3591e-06 gnorm: 1.24 [23:07:12< 1:22:52] +[titan] 2025-10-05 21:41:33,361 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:41:35,546 - root - INFO - step: 37750 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 21:41:35,546 - root - INFO - lr: 5.3575e-06 gnorm: 1.24 [23:07:23< 1:22:41] +[titan] 2025-10-05 21:41:46,407 - root - INFO - step: 37755 loss: 1.8127 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6029 +[titan] 2025-10-05 21:41:46,407 - root - INFO - lr: 5.3559e-06 gnorm: 1.23 [23:07:34< 1:22:30] +[titan] 2025-10-05 21:41:57,261 - root - INFO - step: 37760 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 21:41:57,261 - root - INFO - lr: 5.3543e-06 gnorm: 1.27 [23:07:45< 1:22:19] +[titan] 2025-10-05 21:42:08,104 - root - INFO - step: 37765 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6996 +[titan] 2025-10-05 21:42:08,104 - root - INFO - lr: 5.3527e-06 gnorm: 1.22 [23:07:56< 1:22:08] +[titan] 2025-10-05 21:42:18,953 - root - INFO - step: 37770 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:42:18,954 - root - INFO - lr: 5.3512e-06 gnorm: 1.28 [23:08:07< 1:21:57] +[titan] 2025-10-05 21:42:29,811 - root - INFO - step: 37775 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 21:42:29,811 - root - INFO - lr: 5.3496e-06 gnorm: 1.30 [23:08:18< 1:21:46] +[titan] 2025-10-05 21:42:40,701 - root - INFO - step: 37780 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 21:42:40,701 - root - INFO - lr: 5.3480e-06 gnorm: 1.24 [23:08:29< 1:21:35] +[titan] 2025-10-05 21:42:51,568 - root - INFO - step: 37785 loss: 1.8503 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 21:42:51,568 - root - INFO - lr: 5.3465e-06 gnorm: 1.25 [23:08:39< 1:21:24] +[titan] 2025-10-05 21:43:02,441 - root - INFO - step: 37790 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 21:43:02,441 - root - INFO - lr: 5.3449e-06 gnorm: 1.29 [23:08:50< 1:21:13] +[titan] 2025-10-05 21:43:13,297 - root - INFO - step: 37795 loss: 1.9468 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7219 +[titan] 2025-10-05 21:43:13,297 - root - INFO - lr: 5.3434e-06 gnorm: 1.25 [23:09:01< 1:21:02] +[titan] 2025-10-05 21:43:21,968 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:43:24,171 - root - INFO - step: 37800 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 21:43:24,171 - root - INFO - lr: 5.3418e-06 gnorm: 1.23 [23:09:12< 1:20:51] +[titan] 2025-10-05 21:43:35,037 - root - INFO - step: 37805 loss: 1.9248 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 21:43:35,037 - root - INFO - lr: 5.3403e-06 gnorm: 1.25 [23:09:23< 1:20:40] +[titan] 2025-10-05 21:43:45,919 - root - INFO - step: 37810 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:43:45,919 - root - INFO - lr: 5.3387e-06 gnorm: 1.21 [23:09:34< 1:20:29] +[titan] 2025-10-05 21:43:56,805 - root - INFO - step: 37815 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 21:43:56,805 - root - INFO - lr: 5.3372e-06 gnorm: 1.27 [23:09:45< 1:20:18] +[titan] 2025-10-05 21:44:07,687 - root - INFO - step: 37820 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 21:44:07,687 - root - INFO - lr: 5.3356e-06 gnorm: 1.30 [23:09:56< 1:20:07] +[titan] 2025-10-05 21:44:18,545 - root - INFO - step: 37825 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:44:18,545 - root - INFO - lr: 5.3341e-06 gnorm: 1.27 [23:10:06< 1:19:56] +[titan] 2025-10-05 21:44:29,413 - root - INFO - step: 37830 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 21:44:29,413 - root - INFO - lr: 5.3326e-06 gnorm: 1.21 [23:10:17< 1:19:44] +[titan] 2025-10-05 21:44:40,283 - root - INFO - step: 37835 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 21:44:40,283 - root - INFO - lr: 5.3310e-06 gnorm: 1.22 [23:10:28< 1:19:33] +[titan] 2025-10-05 21:44:51,148 - root - INFO - step: 37840 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 21:44:51,149 - root - INFO - lr: 5.3295e-06 gnorm: 1.21 [23:10:39< 1:19:22] +[titan] 2025-10-05 21:45:02,046 - root - INFO - step: 37845 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:45:02,047 - root - INFO - lr: 5.3280e-06 gnorm: 1.25 [23:10:50< 1:19:11] +[titan] 2025-10-05 21:45:10,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:45:12,905 - root - INFO - step: 37850 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:45:12,905 - root - INFO - lr: 5.3265e-06 gnorm: 1.23 [23:11:01< 1:19:00] +[titan] 2025-10-05 21:45:23,773 - root - INFO - step: 37855 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6640 +[titan] 2025-10-05 21:45:23,773 - root - INFO - lr: 5.3250e-06 gnorm: 1.22 [23:11:12< 1:18:49] +[titan] 2025-10-05 21:45:34,638 - root - INFO - step: 37860 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7081 +[titan] 2025-10-05 21:45:34,638 - root - INFO - lr: 5.3235e-06 gnorm: 1.24 [23:11:22< 1:18:38] +[titan] 2025-10-05 21:45:45,491 - root - INFO - step: 37865 loss: 1.9514 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7254 +[titan] 2025-10-05 21:45:45,491 - root - INFO - lr: 5.3220e-06 gnorm: 1.24 [23:11:33< 1:18:27] +[titan] 2025-10-05 21:45:56,352 - root - INFO - step: 37870 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6506 +[titan] 2025-10-05 21:45:56,353 - root - INFO - lr: 5.3205e-06 gnorm: 1.21 [23:11:44< 1:18:16] +[titan] 2025-10-05 21:46:07,270 - root - INFO - step: 37875 loss: 1.9195 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 21:46:07,271 - root - INFO - lr: 5.3190e-06 gnorm: 1.24 [23:11:55< 1:18:05] +[titan] 2025-10-05 21:46:18,130 - root - INFO - step: 37880 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 21:46:18,131 - root - INFO - lr: 5.3175e-06 gnorm: 1.26 [23:12:06< 1:17:54] +[titan] 2025-10-05 21:46:29,081 - root - INFO - step: 37885 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:46:29,082 - root - INFO - lr: 5.3160e-06 gnorm: 1.22 [23:12:17< 1:17:43] +[titan] 2025-10-05 21:46:35,780 - root - INFO - Dumping profiler traces at step 37888 +[titan] 2025-10-05 21:46:35,817 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:46:40,200 - root - INFO - step: 37890 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 29,473 tflops: 408.89 mfu: 41.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 21:46:40,200 - root - INFO - lr: 5.3145e-06 gnorm: 1.28 [23:12:28< 1:17:32] +[titan] 2025-10-05 21:46:51,073 - root - INFO - step: 37895 loss: 1.9689 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 21:46:51,073 - root - INFO - lr: 5.3130e-06 gnorm: 1.23 [23:12:39< 1:17:21] +[titan] 2025-10-05 21:46:59,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:47:01,983 - root - INFO - step: 37900 loss: 1.9609 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:47:01,983 - root - INFO - lr: 5.3115e-06 gnorm: 1.24 [23:12:50< 1:17:10] +[titan] 2025-10-05 21:47:12,859 - root - INFO - step: 37905 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7007 +[titan] 2025-10-05 21:47:12,859 - root - INFO - lr: 5.3100e-06 gnorm: 1.27 [23:13:01< 1:16:59] +[titan] 2025-10-05 21:47:23,757 - root - INFO - step: 37910 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 21:47:23,757 - root - INFO - lr: 5.3086e-06 gnorm: 1.26 [23:13:12< 1:16:48] +[titan] 2025-10-05 21:47:34,635 - root - INFO - step: 37915 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6659 +[titan] 2025-10-05 21:47:34,635 - root - INFO - lr: 5.3071e-06 gnorm: 1.24 [23:13:22< 1:16:37] +[titan] 2025-10-05 21:47:45,522 - root - INFO - step: 37920 loss: 1.8835 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:47:45,522 - root - INFO - lr: 5.3056e-06 gnorm: 1.20 [23:13:33< 1:16:26] +[titan] 2025-10-05 21:47:56,386 - root - INFO - step: 37925 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 21:47:56,386 - root - INFO - lr: 5.3042e-06 gnorm: 1.28 [23:13:44< 1:16:15] +[titan] 2025-10-05 21:48:07,400 - root - INFO - step: 37930 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,753 tflops: 412.78 mfu: 41.74% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 21:48:07,400 - root - INFO - lr: 5.3027e-06 gnorm: 1.23 [23:13:55< 1:16:04] +[titan] 2025-10-05 21:48:18,249 - root - INFO - step: 37935 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 21:48:18,249 - root - INFO - lr: 5.3012e-06 gnorm: 1.24 [23:14:06< 1:15:53] +[titan] 2025-10-05 21:48:29,154 - root - INFO - step: 37940 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 21:48:29,154 - root - INFO - lr: 5.2998e-06 gnorm: 1.26 [23:14:17< 1:15:42] +[titan] 2025-10-05 21:48:40,024 - root - INFO - step: 37945 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6992 +[titan] 2025-10-05 21:48:40,025 - root - INFO - lr: 5.2983e-06 gnorm: 1.29 [23:14:28< 1:15:31] +[titan] 2025-10-05 21:48:48,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:48:50,876 - root - INFO - step: 37950 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:48:50,876 - root - INFO - lr: 5.2969e-06 gnorm: 1.28 [23:14:39< 1:15:20] +[titan] 2025-10-05 21:49:01,777 - root - INFO - step: 37955 loss: 1.9146 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:49:01,777 - root - INFO - lr: 5.2954e-06 gnorm: 1.23 [23:14:50< 1:15:09] +[titan] 2025-10-05 21:49:12,633 - root - INFO - step: 37960 loss: 1.9032 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:49:12,633 - root - INFO - lr: 5.2940e-06 gnorm: 1.25 [23:15:00< 1:14:58] +[titan] 2025-10-05 21:49:23,498 - root - INFO - step: 37965 loss: 1.8874 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 21:49:23,498 - root - INFO - lr: 5.2926e-06 gnorm: 1.21 [23:15:11< 1:14:47] +[titan] 2025-10-05 21:49:34,372 - root - INFO - step: 37970 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 21:49:34,372 - root - INFO - lr: 5.2911e-06 gnorm: 1.25 [23:15:22< 1:14:36] +[titan] 2025-10-05 21:49:45,244 - root - INFO - step: 37975 loss: 1.9350 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 21:49:45,244 - root - INFO - lr: 5.2897e-06 gnorm: 1.25 [23:15:33< 1:14:25] +[titan] 2025-10-05 21:49:56,122 - root - INFO - step: 37980 loss: 2.0219 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7886 +[titan] 2025-10-05 21:49:56,122 - root - INFO - lr: 5.2883e-06 gnorm: 1.31 [23:15:44< 1:14:14] +[titan] 2025-10-05 21:50:07,019 - root - INFO - step: 37985 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:50:07,019 - root - INFO - lr: 5.2869e-06 gnorm: 1.24 [23:15:55< 1:14:02] +[titan] 2025-10-05 21:50:17,884 - root - INFO - step: 37990 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 21:50:17,884 - root - INFO - lr: 5.2854e-06 gnorm: 1.22 [23:16:06< 1:13:51] +[titan] 2025-10-05 21:50:28,745 - root - INFO - step: 37995 loss: 1.8863 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 21:50:28,745 - root - INFO - lr: 5.2840e-06 gnorm: 1.21 [23:16:17< 1:13:40] +[titan] 2025-10-05 21:50:37,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:50:39,603 - root - INFO - step: 38000 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:50:39,603 - root - INFO - lr: 5.2826e-06 gnorm: 1.24 [23:16:27< 1:13:29] +[titan] 2025-10-05 21:50:50,499 - root - INFO - step: 38005 loss: 1.9446 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:50:50,499 - root - INFO - lr: 5.2812e-06 gnorm: 1.24 [23:16:38< 1:13:18] +[titan] 2025-10-05 21:51:01,361 - root - INFO - step: 38010 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 21:51:01,361 - root - INFO - lr: 5.2798e-06 gnorm: 1.25 [23:16:49< 1:13:07] +[titan] 2025-10-05 21:51:12,250 - root - INFO - step: 38015 loss: 1.9035 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 21:51:12,250 - root - INFO - lr: 5.2784e-06 gnorm: 1.23 [23:17:00< 1:12:56] +[titan] 2025-10-05 21:51:23,111 - root - INFO - step: 38020 loss: 1.9570 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 21:51:23,111 - root - INFO - lr: 5.2770e-06 gnorm: 1.26 [23:17:11< 1:12:45] +[titan] 2025-10-05 21:51:33,966 - root - INFO - step: 38025 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:51:33,966 - root - INFO - lr: 5.2756e-06 gnorm: 1.24 [23:17:22< 1:12:34] +[titan] 2025-10-05 21:51:44,841 - root - INFO - step: 38030 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6303 +[titan] 2025-10-05 21:51:44,841 - root - INFO - lr: 5.2742e-06 gnorm: 1.22 [23:17:33< 1:12:23] +[titan] 2025-10-05 21:51:55,747 - root - INFO - step: 38035 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 21:51:55,747 - root - INFO - lr: 5.2728e-06 gnorm: 1.23 [23:17:44< 1:12:12] +[titan] 2025-10-05 21:52:06,666 - root - INFO - step: 38040 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:52:06,666 - root - INFO - lr: 5.2714e-06 gnorm: 1.25 [23:17:54< 1:12:01] +[titan] 2025-10-05 21:52:17,555 - root - INFO - step: 38045 loss: 1.8640 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6482 +[titan] 2025-10-05 21:52:17,555 - root - INFO - lr: 5.2701e-06 gnorm: 1.25 [23:18:05< 1:11:50] +[titan] 2025-10-05 21:52:26,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:52:28,442 - root - INFO - step: 38050 loss: 1.8572 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6424 +[titan] 2025-10-05 21:52:28,442 - root - INFO - lr: 5.2687e-06 gnorm: 1.21 [23:18:16< 1:11:39] +[titan] 2025-10-05 21:52:39,324 - root - INFO - step: 38055 loss: 1.9652 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 21:52:39,325 - root - INFO - lr: 5.2673e-06 gnorm: 1.23 [23:18:27< 1:11:28] +[titan] 2025-10-05 21:52:50,189 - root - INFO - step: 38060 loss: 1.9568 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 21:52:50,189 - root - INFO - lr: 5.2659e-06 gnorm: 1.26 [23:18:38< 1:11:17] +[titan] 2025-10-05 21:53:01,061 - root - INFO - step: 38065 loss: 1.8871 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 21:53:01,061 - root - INFO - lr: 5.2646e-06 gnorm: 1.22 [23:18:49< 1:11:06] +[titan] 2025-10-05 21:53:12,018 - root - INFO - step: 38070 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 21:53:12,018 - root - INFO - lr: 5.2632e-06 gnorm: 1.21 [23:19:00< 1:10:55] +[titan] 2025-10-05 21:53:22,903 - root - INFO - step: 38075 loss: 1.8578 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6438 +[titan] 2025-10-05 21:53:22,903 - root - INFO - lr: 5.2619e-06 gnorm: 1.21 [23:19:11< 1:10:44] +[titan] 2025-10-05 21:53:33,778 - root - INFO - step: 38080 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:53:33,779 - root - INFO - lr: 5.2605e-06 gnorm: 1.25 [23:19:22< 1:10:33] +[titan] 2025-10-05 21:53:44,628 - root - INFO - step: 38085 loss: 1.9527 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:53:44,628 - root - INFO - lr: 5.2591e-06 gnorm: 1.23 [23:19:32< 1:10:22] +[titan] 2025-10-05 21:53:55,480 - root - INFO - step: 38090 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6835 +[titan] 2025-10-05 21:53:55,480 - root - INFO - lr: 5.2578e-06 gnorm: 1.21 [23:19:43< 1:10:11] +[titan] 2025-10-05 21:54:06,381 - root - INFO - step: 38095 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:54:06,381 - root - INFO - lr: 5.2565e-06 gnorm: 1.32 [23:19:54< 1:10:00] +[titan] 2025-10-05 21:54:15,086 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:54:17,264 - root - INFO - step: 38100 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:54:17,264 - root - INFO - lr: 5.2551e-06 gnorm: 1.21 [23:20:05< 1:09:49] +[titan] 2025-10-05 21:54:28,122 - root - INFO - step: 38105 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:54:28,123 - root - INFO - lr: 5.2538e-06 gnorm: 1.25 [23:20:16< 1:09:38] +[titan] 2025-10-05 21:54:38,982 - root - INFO - step: 38110 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:54:38,982 - root - INFO - lr: 5.2524e-06 gnorm: 1.30 [23:20:27< 1:09:27] +[titan] 2025-10-05 21:54:49,840 - root - INFO - step: 38115 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 21:54:49,840 - root - INFO - lr: 5.2511e-06 gnorm: 1.27 [23:20:38< 1:09:16] +[titan] 2025-10-05 21:55:00,694 - root - INFO - step: 38120 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:55:00,694 - root - INFO - lr: 5.2498e-06 gnorm: 1.22 [23:20:48< 1:09:05] +[titan] 2025-10-05 21:55:11,613 - root - INFO - step: 38125 loss: 1.8922 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:55:11,613 - root - INFO - lr: 5.2485e-06 gnorm: 1.22 [23:20:59< 1:08:54] +[titan] 2025-10-05 21:55:22,478 - root - INFO - step: 38130 loss: 1.8761 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6600 +[titan] 2025-10-05 21:55:22,478 - root - INFO - lr: 5.2471e-06 gnorm: 1.23 [23:21:10< 1:08:43] +[titan] 2025-10-05 21:55:33,363 - root - INFO - step: 38135 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7568 +[titan] 2025-10-05 21:55:33,364 - root - INFO - lr: 5.2458e-06 gnorm: 1.25 [23:21:21< 1:08:32] +[titan] 2025-10-05 21:55:44,229 - root - INFO - step: 38140 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 21:55:44,229 - root - INFO - lr: 5.2445e-06 gnorm: 1.25 [23:21:32< 1:08:20] +[titan] 2025-10-05 21:55:55,104 - root - INFO - step: 38145 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6732 +[titan] 2025-10-05 21:55:55,104 - root - INFO - lr: 5.2432e-06 gnorm: 1.23 [23:21:43< 1:08:09] +[titan] 2025-10-05 21:56:03,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:56:05,959 - root - INFO - step: 38150 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6637 +[titan] 2025-10-05 21:56:05,960 - root - INFO - lr: 5.2419e-06 gnorm: 1.28 [23:21:54< 1:07:58] +[titan] 2025-10-05 21:56:16,858 - root - INFO - step: 38155 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6893 +[titan] 2025-10-05 21:56:16,858 - root - INFO - lr: 5.2406e-06 gnorm: 1.23 [23:22:05< 1:07:47] +[titan] 2025-10-05 21:56:27,718 - root - INFO - step: 38160 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 21:56:27,719 - root - INFO - lr: 5.2393e-06 gnorm: 1.25 [23:22:16< 1:07:36] +[titan] 2025-10-05 21:56:38,596 - root - INFO - step: 38165 loss: 1.8754 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6582 +[titan] 2025-10-05 21:56:38,597 - root - INFO - lr: 5.2380e-06 gnorm: 1.20 [23:22:26< 1:07:25] +[titan] 2025-10-05 21:56:49,479 - root - INFO - step: 38170 loss: 1.9310 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 21:56:49,480 - root - INFO - lr: 5.2367e-06 gnorm: 1.22 [23:22:37< 1:07:14] +[titan] 2025-10-05 21:57:00,354 - root - INFO - step: 38175 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:57:00,354 - root - INFO - lr: 5.2354e-06 gnorm: 1.27 [23:22:48< 1:07:03] +[titan] 2025-10-05 21:57:11,246 - root - INFO - step: 38180 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 21:57:11,247 - root - INFO - lr: 5.2341e-06 gnorm: 1.25 [23:22:59< 1:06:52] +[titan] 2025-10-05 21:57:22,096 - root - INFO - step: 38185 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7581 +[titan] 2025-10-05 21:57:22,096 - root - INFO - lr: 5.2328e-06 gnorm: 1.27 [23:23:10< 1:06:41] +[titan] 2025-10-05 21:57:32,943 - root - INFO - step: 38190 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:57:32,943 - root - INFO - lr: 5.2316e-06 gnorm: 1.26 [23:23:21< 1:06:30] +[titan] 2025-10-05 21:57:43,812 - root - INFO - step: 38195 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 21:57:43,812 - root - INFO - lr: 5.2303e-06 gnorm: 1.23 [23:23:32< 1:06:19] +[titan] 2025-10-05 21:57:52,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:57:54,668 - root - INFO - step: 38200 loss: 1.9598 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7333 +[titan] 2025-10-05 21:57:54,668 - root - INFO - lr: 5.2290e-06 gnorm: 1.24 [23:23:42< 1:06:08] +[titan] 2025-10-05 21:58:05,542 - root - INFO - step: 38205 loss: 1.8481 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 21:58:05,543 - root - INFO - lr: 5.2277e-06 gnorm: 1.26 [23:23:53< 1:05:57] +[titan] 2025-10-05 21:58:16,438 - root - INFO - step: 38210 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7477 +[titan] 2025-10-05 21:58:16,438 - root - INFO - lr: 5.2265e-06 gnorm: 1.28 [23:24:04< 1:05:46] +[titan] 2025-10-05 21:58:27,285 - root - INFO - step: 38215 loss: 1.9355 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 21:58:27,285 - root - INFO - lr: 5.2252e-06 gnorm: 1.22 [23:24:15< 1:05:35] +[titan] 2025-10-05 21:58:38,133 - root - INFO - step: 38220 loss: 1.8546 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6405 +[titan] 2025-10-05 21:58:38,133 - root - INFO - lr: 5.2240e-06 gnorm: 1.23 [23:24:26< 1:05:24] +[titan] 2025-10-05 21:58:48,997 - root - INFO - step: 38225 loss: 1.8842 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6664 +[titan] 2025-10-05 21:58:48,997 - root - INFO - lr: 5.2227e-06 gnorm: 1.21 [23:24:37< 1:05:13] +[titan] 2025-10-05 21:58:59,888 - root - INFO - step: 38230 loss: 1.9848 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 21:58:59,888 - root - INFO - lr: 5.2214e-06 gnorm: 1.24 [23:24:48< 1:05:02] +[titan] 2025-10-05 21:59:10,888 - root - INFO - step: 38235 loss: 1.8777 memory: 118.84GiB(85.28%) tps: 29,791 tflops: 413.31 mfu: 41.79% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 21:59:10,888 - root - INFO - lr: 5.2202e-06 gnorm: 1.21 [23:24:59< 1:04:51] +[titan] 2025-10-05 21:59:21,732 - root - INFO - step: 38240 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6791 +[titan] 2025-10-05 21:59:21,732 - root - INFO - lr: 5.2190e-06 gnorm: 1.22 [23:25:10< 1:04:40] +[titan] 2025-10-05 21:59:32,592 - root - INFO - step: 38245 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 21:59:32,592 - root - INFO - lr: 5.2177e-06 gnorm: 1.26 [23:25:20< 1:04:29] +[titan] 2025-10-05 21:59:41,259 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:59:43,442 - root - INFO - step: 38250 loss: 1.8699 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6533 +[titan] 2025-10-05 21:59:43,442 - root - INFO - lr: 5.2165e-06 gnorm: 1.23 [23:25:31< 1:04:18] +[titan] 2025-10-05 21:59:54,302 - root - INFO - step: 38255 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:59:54,303 - root - INFO - lr: 5.2152e-06 gnorm: 1.28 [23:25:42< 1:04:07] +[titan] 2025-10-05 22:00:05,203 - root - INFO - step: 38260 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 22:00:05,203 - root - INFO - lr: 5.2140e-06 gnorm: 1.26 [23:25:53< 1:03:56] +[titan] 2025-10-05 22:00:16,075 - root - INFO - step: 38265 loss: 1.8744 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6580 +[titan] 2025-10-05 22:00:16,076 - root - INFO - lr: 5.2128e-06 gnorm: 1.25 [23:26:04< 1:03:45] +[titan] 2025-10-05 22:00:26,953 - root - INFO - step: 38270 loss: 1.9090 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:00:26,953 - root - INFO - lr: 5.2116e-06 gnorm: 1.26 [23:26:15< 1:03:34] +[titan] 2025-10-05 22:00:37,823 - root - INFO - step: 38275 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6897 +[titan] 2025-10-05 22:00:37,823 - root - INFO - lr: 5.2103e-06 gnorm: 1.27 [23:26:26< 1:03:23] +[titan] 2025-10-05 22:00:48,688 - root - INFO - step: 38280 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 22:00:48,688 - root - INFO - lr: 5.2091e-06 gnorm: 1.25 [23:26:36< 1:03:12] +[titan] 2025-10-05 22:00:59,558 - root - INFO - step: 38285 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 22:00:59,559 - root - INFO - lr: 5.2079e-06 gnorm: 1.24 [23:26:47< 1:03:01] +[titan] 2025-10-05 22:01:10,423 - root - INFO - step: 38290 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 22:01:10,423 - root - INFO - lr: 5.2067e-06 gnorm: 1.23 [23:26:58< 1:02:50] +[titan] 2025-10-05 22:01:21,364 - root - INFO - step: 38295 loss: 1.9718 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 22:01:21,364 - root - INFO - lr: 5.2055e-06 gnorm: 1.27 [23:27:09< 1:02:39] +[titan] 2025-10-05 22:01:30,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:01:32,234 - root - INFO - step: 38300 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7052 +[titan] 2025-10-05 22:01:32,234 - root - INFO - lr: 5.2043e-06 gnorm: 1.22 [23:27:20< 1:02:28] +[titan] 2025-10-05 22:01:43,099 - root - INFO - step: 38305 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 22:01:43,099 - root - INFO - lr: 5.2031e-06 gnorm: 1.24 [23:27:31< 1:02:16] +[titan] 2025-10-05 22:01:53,973 - root - INFO - step: 38310 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 22:01:53,973 - root - INFO - lr: 5.2019e-06 gnorm: 1.25 [23:27:42< 1:02:05] +[titan] 2025-10-05 22:02:04,844 - root - INFO - step: 38315 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 22:02:04,845 - root - INFO - lr: 5.2007e-06 gnorm: 1.28 [23:27:53< 1:01:54] +[titan] 2025-10-05 22:02:15,752 - root - INFO - step: 38320 loss: 1.9010 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6811 +[titan] 2025-10-05 22:02:15,752 - root - INFO - lr: 5.1995e-06 gnorm: 1.24 [23:28:04< 1:01:43] +[titan] 2025-10-05 22:02:26,644 - root - INFO - step: 38325 loss: 1.8521 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6383 +[titan] 2025-10-05 22:02:26,645 - root - INFO - lr: 5.1983e-06 gnorm: 1.27 [23:28:14< 1:01:32] +[titan] 2025-10-05 22:02:37,525 - root - INFO - step: 38330 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:02:37,525 - root - INFO - lr: 5.1972e-06 gnorm: 1.27 [23:28:25< 1:01:21] +[titan] 2025-10-05 22:02:48,403 - root - INFO - step: 38335 loss: 1.8947 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6753 +[titan] 2025-10-05 22:02:48,403 - root - INFO - lr: 5.1960e-06 gnorm: 1.31 [23:28:36< 1:01:10] +[titan] 2025-10-05 22:02:59,271 - root - INFO - step: 38340 loss: 1.8646 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 22:02:59,271 - root - INFO - lr: 5.1948e-06 gnorm: 1.22 [23:28:47< 1:00:59] +[titan] 2025-10-05 22:03:10,127 - root - INFO - step: 38345 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 22:03:10,127 - root - INFO - lr: 5.1936e-06 gnorm: 1.28 [23:28:58< 1:00:48] +[titan] 2025-10-05 22:03:18,835 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:03:21,025 - root - INFO - step: 38350 loss: 1.8758 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6588 +[titan] 2025-10-05 22:03:21,025 - root - INFO - lr: 5.1925e-06 gnorm: 1.22 [23:29:09< 1:00:37] +[titan] 2025-10-05 22:03:31,925 - root - INFO - step: 38355 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6874 +[titan] 2025-10-05 22:03:31,925 - root - INFO - lr: 5.1913e-06 gnorm: 1.20 [23:29:20< 1:00:26] +[titan] 2025-10-05 22:03:42,780 - root - INFO - step: 38360 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 22:03:42,780 - root - INFO - lr: 5.1902e-06 gnorm: 1.24 [23:29:31< 1:00:15] +[titan] 2025-10-05 22:03:53,638 - root - INFO - step: 38365 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 22:03:53,638 - root - INFO - lr: 5.1890e-06 gnorm: 1.25 [23:29:41< 1:00:04] +[titan] 2025-10-05 22:04:04,503 - root - INFO - step: 38370 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 22:04:04,503 - root - INFO - lr: 5.1878e-06 gnorm: 1.23 [23:29:52< 0:59:53] +[titan] 2025-10-05 22:04:15,408 - root - INFO - step: 38375 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 22:04:15,409 - root - INFO - lr: 5.1867e-06 gnorm: 1.24 [23:30:03< 0:59:42] +[titan] 2025-10-05 22:04:26,282 - root - INFO - step: 38380 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 22:04:26,282 - root - INFO - lr: 5.1856e-06 gnorm: 1.23 [23:30:14< 0:59:31] +[titan] 2025-10-05 22:04:37,152 - root - INFO - step: 38385 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6840 +[titan] 2025-10-05 22:04:37,153 - root - INFO - lr: 5.1844e-06 gnorm: 1.26 [23:30:25< 0:59:20] +[titan] 2025-10-05 22:04:48,030 - root - INFO - step: 38390 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:04:48,030 - root - INFO - lr: 5.1833e-06 gnorm: 1.26 [23:30:36< 0:59:09] +[titan] 2025-10-05 22:04:58,887 - root - INFO - step: 38395 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:04:58,887 - root - INFO - lr: 5.1821e-06 gnorm: 1.24 [23:30:47< 0:58:58] +[titan] 2025-10-05 22:05:07,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:05:09,829 - root - INFO - step: 38400 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 22:05:09,829 - root - INFO - lr: 5.1810e-06 gnorm: 1.25 [23:30:58< 0:58:47] +[titan] 2025-10-05 22:05:10,025 - root - INFO - Dumping profiler traces at step 38400 +[titan] 2025-10-05 22:05:10,068 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:05:20,979 - root - INFO - step: 38405 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 29,389 tflops: 407.73 mfu: 41.23% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 22:05:20,979 - root - INFO - lr: 5.1799e-06 gnorm: 1.21 [23:31:09< 0:58:36] +[titan] 2025-10-05 22:05:31,845 - root - INFO - step: 38410 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6711 +[titan] 2025-10-05 22:05:31,845 - root - INFO - lr: 5.1788e-06 gnorm: 1.25 [23:31:20< 0:58:25] +[titan] 2025-10-05 22:05:42,706 - root - INFO - step: 38415 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6821 +[titan] 2025-10-05 22:05:42,706 - root - INFO - lr: 5.1776e-06 gnorm: 1.25 [23:31:30< 0:58:14] +[titan] 2025-10-05 22:05:53,597 - root - INFO - step: 38420 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 22:05:53,598 - root - INFO - lr: 5.1765e-06 gnorm: 1.24 [23:31:41< 0:58:03] +[titan] 2025-10-05 22:06:04,473 - root - INFO - step: 38425 loss: 1.8931 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:06:04,473 - root - INFO - lr: 5.1754e-06 gnorm: 1.24 [23:31:52< 0:57:52] +[titan] 2025-10-05 22:06:15,341 - root - INFO - step: 38430 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:06:15,342 - root - INFO - lr: 5.1743e-06 gnorm: 1.28 [23:32:03< 0:57:41] +[titan] 2025-10-05 22:06:26,263 - root - INFO - step: 38435 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6607 +[titan] 2025-10-05 22:06:26,263 - root - INFO - lr: 5.1732e-06 gnorm: 1.21 [23:32:14< 0:57:30] +[titan] 2025-10-05 22:06:37,131 - root - INFO - step: 38440 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7207 +[titan] 2025-10-05 22:06:37,132 - root - INFO - lr: 5.1721e-06 gnorm: 1.24 [23:32:25< 0:57:19] +[titan] 2025-10-05 22:06:48,006 - root - INFO - step: 38445 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6765 +[titan] 2025-10-05 22:06:48,006 - root - INFO - lr: 5.1710e-06 gnorm: 1.26 [23:32:36< 0:57:08] +[titan] 2025-10-05 22:06:56,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:06:58,881 - root - INFO - step: 38450 loss: 1.9214 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:06:58,881 - root - INFO - lr: 5.1699e-06 gnorm: 1.25 [23:32:47< 0:56:57] +[titan] 2025-10-05 22:07:09,781 - root - INFO - step: 38455 loss: 1.9440 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:07:09,781 - root - INFO - lr: 5.1688e-06 gnorm: 1.24 [23:32:58< 0:56:46] +[titan] 2025-10-05 22:07:20,663 - root - INFO - step: 38460 loss: 1.8888 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6704 +[titan] 2025-10-05 22:07:20,663 - root - INFO - lr: 5.1677e-06 gnorm: 1.25 [23:33:08< 0:56:35] +[titan] 2025-10-05 22:07:31,515 - root - INFO - step: 38465 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:07:31,515 - root - INFO - lr: 5.1666e-06 gnorm: 1.27 [23:33:19< 0:56:24] +[titan] 2025-10-05 22:07:42,351 - root - INFO - step: 38470 loss: 1.8510 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2144 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 22:07:42,351 - root - INFO - lr: 5.1655e-06 gnorm: 1.22 [23:33:30< 0:56:13] +[titan] 2025-10-05 22:07:53,204 - root - INFO - step: 38475 loss: 1.9409 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7151 +[titan] 2025-10-05 22:07:53,204 - root - INFO - lr: 5.1645e-06 gnorm: 1.27 [23:33:41< 0:56:01] +[titan] 2025-10-05 22:08:04,067 - root - INFO - step: 38480 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 22:08:04,067 - root - INFO - lr: 5.1634e-06 gnorm: 1.21 [23:33:52< 0:55:50] +[titan] 2025-10-05 22:08:14,965 - root - INFO - step: 38485 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:08:14,965 - root - INFO - lr: 5.1623e-06 gnorm: 1.27 [23:34:03< 0:55:39] +[titan] 2025-10-05 22:08:25,908 - root - INFO - step: 38490 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:08:25,908 - root - INFO - lr: 5.1612e-06 gnorm: 1.26 [23:34:14< 0:55:28] +[titan] 2025-10-05 22:08:36,784 - root - INFO - step: 38495 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6699 +[titan] 2025-10-05 22:08:36,784 - root - INFO - lr: 5.1602e-06 gnorm: 1.32 [23:34:25< 0:55:17] +[titan] 2025-10-05 22:08:45,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:08:47,650 - root - INFO - step: 38500 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 22:08:47,650 - root - INFO - lr: 5.1591e-06 gnorm: 1.33 [23:34:35< 0:55:06] +[titan] 2025-10-05 22:08:58,527 - root - INFO - step: 38505 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 22:08:58,527 - root - INFO - lr: 5.1581e-06 gnorm: 1.28 [23:34:46< 0:54:55] +[titan] 2025-10-05 22:09:09,391 - root - INFO - step: 38510 loss: 1.9323 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:09:09,391 - root - INFO - lr: 5.1570e-06 gnorm: 1.28 [23:34:57< 0:54:44] +[titan] 2025-10-05 22:09:20,365 - root - INFO - step: 38515 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 29,861 tflops: 414.28 mfu: 41.89% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:09:20,365 - root - INFO - lr: 5.1560e-06 gnorm: 1.27 [23:35:08< 0:54:33] +[titan] 2025-10-05 22:09:31,218 - root - INFO - step: 38520 loss: 1.9315 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:09:31,219 - root - INFO - lr: 5.1549e-06 gnorm: 1.25 [23:35:19< 0:54:22] +[titan] 2025-10-05 22:09:42,070 - root - INFO - step: 38525 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6845 +[titan] 2025-10-05 22:09:42,070 - root - INFO - lr: 5.1539e-06 gnorm: 1.23 [23:35:30< 0:54:11] +[titan] 2025-10-05 22:09:52,922 - root - INFO - step: 38530 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 22:09:52,922 - root - INFO - lr: 5.1528e-06 gnorm: 1.26 [23:35:41< 0:54:00] +[titan] 2025-10-05 22:10:03,769 - root - INFO - step: 38535 loss: 1.9228 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:10:03,769 - root - INFO - lr: 5.1518e-06 gnorm: 1.25 [23:35:52< 0:53:49] +[titan] 2025-10-05 22:10:14,645 - root - INFO - step: 38540 loss: 1.9149 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6940 +[titan] 2025-10-05 22:10:14,646 - root - INFO - lr: 5.1508e-06 gnorm: 1.24 [23:36:02< 0:53:38] +[titan] 2025-10-05 22:10:25,531 - root - INFO - step: 38545 loss: 1.8971 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 22:10:25,532 - root - INFO - lr: 5.1497e-06 gnorm: 1.21 [23:36:13< 0:53:27] +[titan] 2025-10-05 22:10:34,232 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:10:36,411 - root - INFO - step: 38550 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 22:10:36,411 - root - INFO - lr: 5.1487e-06 gnorm: 1.26 [23:36:24< 0:53:16] +[titan] 2025-10-05 22:10:47,265 - root - INFO - step: 38555 loss: 1.9055 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 22:10:47,265 - root - INFO - lr: 5.1477e-06 gnorm: 1.25 [23:36:35< 0:53:05] +[titan] 2025-10-05 22:10:58,113 - root - INFO - step: 38560 loss: 1.8963 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 22:10:58,113 - root - INFO - lr: 5.1467e-06 gnorm: 1.28 [23:36:46< 0:52:54] +[titan] 2025-10-05 22:11:08,954 - root - INFO - step: 38565 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:11:08,954 - root - INFO - lr: 5.1456e-06 gnorm: 1.26 [23:36:57< 0:52:43] +[titan] 2025-10-05 22:11:19,804 - root - INFO - step: 38570 loss: 1.9003 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 22:11:19,805 - root - INFO - lr: 5.1446e-06 gnorm: 1.22 [23:37:08< 0:52:32] +[titan] 2025-10-05 22:11:30,699 - root - INFO - step: 38575 loss: 1.8708 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6535 +[titan] 2025-10-05 22:11:30,699 - root - INFO - lr: 5.1436e-06 gnorm: 1.25 [23:37:18< 0:52:21] +[titan] 2025-10-05 22:11:41,605 - root - INFO - step: 38580 loss: 1.9498 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 22:11:41,605 - root - INFO - lr: 5.1426e-06 gnorm: 1.26 [23:37:29< 0:52:10] +[titan] 2025-10-05 22:11:52,476 - root - INFO - step: 38585 loss: 1.8659 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2149 global_avg_mtp_loss: 1.6510 +[titan] 2025-10-05 22:11:52,476 - root - INFO - lr: 5.1416e-06 gnorm: 1.27 [23:37:40< 0:51:59] +[titan] 2025-10-05 22:12:03,366 - root - INFO - step: 38590 loss: 1.8820 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6644 +[titan] 2025-10-05 22:12:03,366 - root - INFO - lr: 5.1406e-06 gnorm: 1.30 [23:37:51< 0:51:48] +[titan] 2025-10-05 22:12:14,240 - root - INFO - step: 38595 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 22:12:14,241 - root - INFO - lr: 5.1396e-06 gnorm: 1.20 [23:38:02< 0:51:37] +[titan] 2025-10-05 22:12:22,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:12:25,159 - root - INFO - step: 38600 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6314 +[titan] 2025-10-05 22:12:25,159 - root - INFO - lr: 5.1386e-06 gnorm: 1.21 [23:38:13< 0:51:26] +[titan] 2025-10-05 22:12:36,019 - root - INFO - step: 38605 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 22:12:36,019 - root - INFO - lr: 5.1376e-06 gnorm: 1.24 [23:38:24< 0:51:15] +[titan] 2025-10-05 22:12:46,891 - root - INFO - step: 38610 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:12:46,891 - root - INFO - lr: 5.1367e-06 gnorm: 1.19 [23:38:35< 0:51:04] +[titan] 2025-10-05 22:12:57,808 - root - INFO - step: 38615 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 22:12:57,808 - root - INFO - lr: 5.1357e-06 gnorm: 1.29 [23:38:46< 0:50:53] +[titan] 2025-10-05 22:13:08,674 - root - INFO - step: 38620 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 22:13:08,674 - root - INFO - lr: 5.1347e-06 gnorm: 1.29 [23:38:56< 0:50:42] +[titan] 2025-10-05 22:13:19,537 - root - INFO - step: 38625 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 22:13:19,537 - root - INFO - lr: 5.1337e-06 gnorm: 1.26 [23:39:07< 0:50:31] +[titan] 2025-10-05 22:13:30,453 - root - INFO - step: 38630 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6688 +[titan] 2025-10-05 22:13:30,453 - root - INFO - lr: 5.1328e-06 gnorm: 1.24 [23:39:18< 0:50:20] +[titan] 2025-10-05 22:13:41,303 - root - INFO - step: 38635 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 22:13:41,303 - root - INFO - lr: 5.1318e-06 gnorm: 1.27 [23:39:29< 0:50:09] +[titan] 2025-10-05 22:13:52,138 - root - INFO - step: 38640 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 22:13:52,139 - root - INFO - lr: 5.1308e-06 gnorm: 1.25 [23:39:40< 0:49:58] +[titan] 2025-10-05 22:14:03,026 - root - INFO - step: 38645 loss: 1.8958 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6769 +[titan] 2025-10-05 22:14:03,026 - root - INFO - lr: 5.1299e-06 gnorm: 1.24 [23:39:51< 0:49:47] +[titan] 2025-10-05 22:14:11,665 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:14:13,845 - root - INFO - step: 38650 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6629 +[titan] 2025-10-05 22:14:13,845 - root - INFO - lr: 5.1289e-06 gnorm: 1.27 [23:40:02< 0:49:36] +[titan] 2025-10-05 22:14:24,687 - root - INFO - step: 38655 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 22:14:24,687 - root - INFO - lr: 5.1280e-06 gnorm: 1.32 [23:40:12< 0:49:24] +[titan] 2025-10-05 22:14:35,527 - root - INFO - step: 38660 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6957 +[titan] 2025-10-05 22:14:35,527 - root - INFO - lr: 5.1270e-06 gnorm: 1.28 [23:40:23< 0:49:13] +[titan] 2025-10-05 22:14:46,388 - root - INFO - step: 38665 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 22:14:46,388 - root - INFO - lr: 5.1261e-06 gnorm: 1.24 [23:40:34< 0:49:02] +[titan] 2025-10-05 22:14:57,230 - root - INFO - step: 38670 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 22:14:57,230 - root - INFO - lr: 5.1251e-06 gnorm: 1.25 [23:40:45< 0:48:51] +[titan] 2025-10-05 22:15:08,076 - root - INFO - step: 38675 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 22:15:08,077 - root - INFO - lr: 5.1242e-06 gnorm: 1.23 [23:40:56< 0:48:40] +[titan] 2025-10-05 22:15:18,905 - root - INFO - step: 38680 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 22:15:18,905 - root - INFO - lr: 5.1233e-06 gnorm: 1.24 [23:41:07< 0:48:29] +[titan] 2025-10-05 22:15:29,770 - root - INFO - step: 38685 loss: 1.8560 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6416 +[titan] 2025-10-05 22:15:29,770 - root - INFO - lr: 5.1223e-06 gnorm: 1.26 [23:41:18< 0:48:18] +[titan] 2025-10-05 22:15:40,605 - root - INFO - step: 38690 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 22:15:40,606 - root - INFO - lr: 5.1214e-06 gnorm: 1.26 [23:41:28< 0:48:07] +[titan] 2025-10-05 22:15:51,445 - root - INFO - step: 38695 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 22:15:51,445 - root - INFO - lr: 5.1205e-06 gnorm: 1.26 [23:41:39< 0:47:56] +[titan] 2025-10-05 22:16:00,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:16:02,303 - root - INFO - step: 38700 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6625 +[titan] 2025-10-05 22:16:02,303 - root - INFO - lr: 5.1195e-06 gnorm: 1.22 [23:41:50< 0:47:45] +[titan] 2025-10-05 22:16:13,157 - root - INFO - step: 38705 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 22:16:13,157 - root - INFO - lr: 5.1186e-06 gnorm: 1.24 [23:42:01< 0:47:34] +[titan] 2025-10-05 22:16:24,067 - root - INFO - step: 38710 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 22:16:24,067 - root - INFO - lr: 5.1177e-06 gnorm: 1.23 [23:42:12< 0:47:23] +[titan] 2025-10-05 22:16:34,977 - root - INFO - step: 38715 loss: 1.9159 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:16:34,977 - root - INFO - lr: 5.1168e-06 gnorm: 1.27 [23:42:23< 0:47:12] +[titan] 2025-10-05 22:16:45,845 - root - INFO - step: 38720 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 22:16:45,845 - root - INFO - lr: 5.1159e-06 gnorm: 1.26 [23:42:34< 0:47:01] +[titan] 2025-10-05 22:16:56,703 - root - INFO - step: 38725 loss: 1.8703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 22:16:56,703 - root - INFO - lr: 5.1150e-06 gnorm: 1.22 [23:42:44< 0:46:50] +[titan] 2025-10-05 22:17:07,552 - root - INFO - step: 38730 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6731 +[titan] 2025-10-05 22:17:07,553 - root - INFO - lr: 5.1141e-06 gnorm: 1.23 [23:42:55< 0:46:39] +[titan] 2025-10-05 22:17:18,393 - root - INFO - step: 38735 loss: 1.9710 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 22:17:18,394 - root - INFO - lr: 5.1132e-06 gnorm: 1.32 [23:43:06< 0:46:28] +[titan] 2025-10-05 22:17:29,314 - root - INFO - step: 38740 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 22:17:29,314 - root - INFO - lr: 5.1123e-06 gnorm: 1.24 [23:43:17< 0:46:17] +[titan] 2025-10-05 22:17:40,164 - root - INFO - step: 38745 loss: 1.8962 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6772 +[titan] 2025-10-05 22:17:40,164 - root - INFO - lr: 5.1114e-06 gnorm: 1.27 [23:43:28< 0:46:06] +[titan] 2025-10-05 22:17:48,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:17:51,020 - root - INFO - step: 38750 loss: 1.8652 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6492 +[titan] 2025-10-05 22:17:51,020 - root - INFO - lr: 5.1105e-06 gnorm: 1.30 [23:43:39< 0:45:55] +[titan] 2025-10-05 22:18:01,867 - root - INFO - step: 38755 loss: 1.8715 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6553 +[titan] 2025-10-05 22:18:01,867 - root - INFO - lr: 5.1097e-06 gnorm: 1.24 [23:43:50< 0:45:44] +[titan] 2025-10-05 22:18:12,725 - root - INFO - step: 38760 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 22:18:12,725 - root - INFO - lr: 5.1088e-06 gnorm: 1.25 [23:44:00< 0:45:33] +[titan] 2025-10-05 22:18:23,576 - root - INFO - step: 38765 loss: 1.9134 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 22:18:23,576 - root - INFO - lr: 5.1079e-06 gnorm: 1.24 [23:44:11< 0:45:22] +[titan] 2025-10-05 22:18:34,466 - root - INFO - step: 38770 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 22:18:34,466 - root - INFO - lr: 5.1070e-06 gnorm: 1.21 [23:44:22< 0:45:11] +[titan] 2025-10-05 22:18:45,359 - root - INFO - step: 38775 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 22:18:45,359 - root - INFO - lr: 5.1062e-06 gnorm: 1.22 [23:44:33< 0:45:00] +[titan] 2025-10-05 22:18:56,225 - root - INFO - step: 38780 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 22:18:56,225 - root - INFO - lr: 5.1053e-06 gnorm: 1.23 [23:44:44< 0:44:49] +[titan] 2025-10-05 22:19:07,063 - root - INFO - step: 38785 loss: 1.8911 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 22:19:07,063 - root - INFO - lr: 5.1044e-06 gnorm: 1.28 [23:44:55< 0:44:38] +[titan] 2025-10-05 22:19:17,908 - root - INFO - step: 38790 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 22:19:17,908 - root - INFO - lr: 5.1036e-06 gnorm: 1.27 [23:45:06< 0:44:27] +[titan] 2025-10-05 22:19:28,765 - root - INFO - step: 38795 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:19:28,765 - root - INFO - lr: 5.1027e-06 gnorm: 1.25 [23:45:16< 0:44:16] +[titan] 2025-10-05 22:19:37,452 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:19:39,646 - root - INFO - step: 38800 loss: 1.9199 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 22:19:39,646 - root - INFO - lr: 5.1019e-06 gnorm: 1.22 [23:45:27< 0:44:05] +[titan] 2025-10-05 22:19:50,541 - root - INFO - step: 38805 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 22:19:50,542 - root - INFO - lr: 5.1010e-06 gnorm: 1.25 [23:45:38< 0:43:54] +[titan] 2025-10-05 22:20:01,404 - root - INFO - step: 38810 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6756 +[titan] 2025-10-05 22:20:01,405 - root - INFO - lr: 5.1002e-06 gnorm: 1.25 [23:45:49< 0:43:43] +[titan] 2025-10-05 22:20:12,258 - root - INFO - step: 38815 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7061 +[titan] 2025-10-05 22:20:12,258 - root - INFO - lr: 5.0993e-06 gnorm: 1.33 [23:46:00< 0:43:32] +[titan] 2025-10-05 22:20:23,109 - root - INFO - step: 38820 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:20:23,109 - root - INFO - lr: 5.0985e-06 gnorm: 1.25 [23:46:11< 0:43:21] +[titan] 2025-10-05 22:20:33,977 - root - INFO - step: 38825 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6488 +[titan] 2025-10-05 22:20:33,977 - root - INFO - lr: 5.0977e-06 gnorm: 1.28 [23:46:22< 0:43:10] +[titan] 2025-10-05 22:20:44,821 - root - INFO - step: 38830 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6682 +[titan] 2025-10-05 22:20:44,821 - root - INFO - lr: 5.0969e-06 gnorm: 1.27 [23:46:33< 0:42:59] +[titan] 2025-10-05 22:20:55,718 - root - INFO - step: 38835 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 22:20:55,718 - root - INFO - lr: 5.0960e-06 gnorm: 1.26 [23:46:43< 0:42:48] +[titan] 2025-10-05 22:21:06,566 - root - INFO - step: 38840 loss: 1.9277 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 22:21:06,566 - root - INFO - lr: 5.0952e-06 gnorm: 1.27 [23:46:54< 0:42:36] +[titan] 2025-10-05 22:21:17,446 - root - INFO - step: 38845 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6385 +[titan] 2025-10-05 22:21:17,446 - root - INFO - lr: 5.0944e-06 gnorm: 1.24 [23:47:05< 0:42:25] +[titan] 2025-10-05 22:21:26,132 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:21:28,317 - root - INFO - step: 38850 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 22:21:28,317 - root - INFO - lr: 5.0936e-06 gnorm: 1.28 [23:47:16< 0:42:14] +[titan] 2025-10-05 22:21:39,188 - root - INFO - step: 38855 loss: 1.8571 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6436 +[titan] 2025-10-05 22:21:39,188 - root - INFO - lr: 5.0928e-06 gnorm: 1.25 [23:47:27< 0:42:03] +[titan] 2025-10-05 22:21:50,046 - root - INFO - step: 38860 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 22:21:50,047 - root - INFO - lr: 5.0920e-06 gnorm: 1.30 [23:47:38< 0:41:52] +[titan] 2025-10-05 22:22:00,909 - root - INFO - step: 38865 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:22:00,910 - root - INFO - lr: 5.0911e-06 gnorm: 1.28 [23:47:49< 0:41:41] +[titan] 2025-10-05 22:22:11,785 - root - INFO - step: 38870 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 22:22:11,786 - root - INFO - lr: 5.0903e-06 gnorm: 1.26 [23:47:59< 0:41:30] +[titan] 2025-10-05 22:22:22,628 - root - INFO - step: 38875 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7259 +[titan] 2025-10-05 22:22:22,628 - root - INFO - lr: 5.0895e-06 gnorm: 1.24 [23:48:10< 0:41:19] +[titan] 2025-10-05 22:22:33,500 - root - INFO - step: 38880 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 22:22:33,500 - root - INFO - lr: 5.0888e-06 gnorm: 1.26 [23:48:21< 0:41:08] +[titan] 2025-10-05 22:22:44,338 - root - INFO - step: 38885 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:22:44,338 - root - INFO - lr: 5.0880e-06 gnorm: 1.28 [23:48:32< 0:40:57] +[titan] 2025-10-05 22:22:55,187 - root - INFO - step: 38890 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6843 +[titan] 2025-10-05 22:22:55,187 - root - INFO - lr: 5.0872e-06 gnorm: 1.24 [23:48:43< 0:40:46] +[titan] 2025-10-05 22:23:06,026 - root - INFO - step: 38895 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:23:06,026 - root - INFO - lr: 5.0864e-06 gnorm: 1.26 [23:48:54< 0:40:35] +[titan] 2025-10-05 22:23:14,733 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:23:16,918 - root - INFO - step: 38900 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 22:23:16,918 - root - INFO - lr: 5.0856e-06 gnorm: 1.25 [23:49:05< 0:40:24] +[titan] 2025-10-05 22:23:27,768 - root - INFO - step: 38905 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 22:23:27,768 - root - INFO - lr: 5.0848e-06 gnorm: 1.28 [23:49:15< 0:40:13] +[titan] 2025-10-05 22:23:38,736 - root - INFO - step: 38910 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 22:23:38,736 - root - INFO - lr: 5.0841e-06 gnorm: 1.25 [23:49:26< 0:40:02] +[titan] 2025-10-05 22:23:43,291 - root - INFO - Dumping profiler traces at step 38912 +[titan] 2025-10-05 22:23:43,328 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:23:49,840 - root - INFO - step: 38915 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:23:49,840 - root - INFO - lr: 5.0833e-06 gnorm: 1.23 [23:49:38< 0:39:51] +[titan] 2025-10-05 22:24:00,685 - root - INFO - step: 38920 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 22:24:00,685 - root - INFO - lr: 5.0825e-06 gnorm: 1.24 [23:49:48< 0:39:40] +[titan] 2025-10-05 22:24:11,518 - root - INFO - step: 38925 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:24:11,518 - root - INFO - lr: 5.0818e-06 gnorm: 1.28 [23:49:59< 0:39:29] +[titan] 2025-10-05 22:24:22,383 - root - INFO - step: 38930 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:24:22,383 - root - INFO - lr: 5.0810e-06 gnorm: 1.22 [23:50:10< 0:39:18] +[titan] 2025-10-05 22:24:33,286 - root - INFO - step: 38935 loss: 1.9341 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 22:24:33,286 - root - INFO - lr: 5.0803e-06 gnorm: 1.25 [23:50:21< 0:39:07] +[titan] 2025-10-05 22:24:44,145 - root - INFO - step: 38940 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6642 +[titan] 2025-10-05 22:24:44,145 - root - INFO - lr: 5.0795e-06 gnorm: 1.33 [23:50:32< 0:38:56] +[titan] 2025-10-05 22:24:55,011 - root - INFO - step: 38945 loss: 1.8488 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6353 +[titan] 2025-10-05 22:24:55,011 - root - INFO - lr: 5.0788e-06 gnorm: 1.25 [23:50:43< 0:38:45] +[titan] 2025-10-05 22:25:03,688 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:25:05,861 - root - INFO - step: 38950 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 22:25:05,861 - root - INFO - lr: 5.0780e-06 gnorm: 1.26 [23:50:54< 0:38:34] +[titan] 2025-10-05 22:25:16,696 - root - INFO - step: 38955 loss: 1.8763 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6594 +[titan] 2025-10-05 22:25:16,696 - root - INFO - lr: 5.0773e-06 gnorm: 1.25 [23:51:04< 0:38:23] +[titan] 2025-10-05 22:25:27,557 - root - INFO - step: 38960 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:25:27,557 - root - INFO - lr: 5.0765e-06 gnorm: 1.25 [23:51:15< 0:38:12] +[titan] 2025-10-05 22:25:38,467 - root - INFO - step: 38965 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 22:25:38,467 - root - INFO - lr: 5.0758e-06 gnorm: 1.24 [23:51:26< 0:38:01] +[titan] 2025-10-05 22:25:49,317 - root - INFO - step: 38970 loss: 1.8769 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 22:25:49,317 - root - INFO - lr: 5.0751e-06 gnorm: 1.22 [23:51:37< 0:37:50] +[titan] 2025-10-05 22:26:00,183 - root - INFO - step: 38975 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 22:26:00,183 - root - INFO - lr: 5.0743e-06 gnorm: 1.28 [23:51:48< 0:37:39] +[titan] 2025-10-05 22:26:11,057 - root - INFO - step: 38980 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 22:26:11,057 - root - INFO - lr: 5.0736e-06 gnorm: 1.29 [23:51:59< 0:37:28] +[titan] 2025-10-05 22:26:21,891 - root - INFO - step: 38985 loss: 1.8837 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 22:26:21,891 - root - INFO - lr: 5.0729e-06 gnorm: 1.26 [23:52:10< 0:37:17] +[titan] 2025-10-05 22:26:32,761 - root - INFO - step: 38990 loss: 1.8936 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:26:32,761 - root - INFO - lr: 5.0722e-06 gnorm: 1.25 [23:52:20< 0:37:06] +[titan] 2025-10-05 22:26:43,668 - root - INFO - step: 38995 loss: 1.8343 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2124 global_avg_mtp_loss: 1.6219 +[titan] 2025-10-05 22:26:43,668 - root - INFO - lr: 5.0715e-06 gnorm: 1.22 [23:52:31< 0:36:55] +[titan] 2025-10-05 22:26:52,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:26:54,511 - root - INFO - step: 39000 loss: 1.8692 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6534 +[titan] 2025-10-05 22:26:54,511 - root - INFO - lr: 5.0708e-06 gnorm: 1.23 [23:52:42< 0:36:44] +[titan] 2025-10-05 22:27:05,357 - root - INFO - step: 39005 loss: 1.8448 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6317 +[titan] 2025-10-05 22:27:05,357 - root - INFO - lr: 5.0701e-06 gnorm: 1.24 [23:52:53< 0:36:33] +[titan] 2025-10-05 22:27:16,214 - root - INFO - step: 39010 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 22:27:16,214 - root - INFO - lr: 5.0694e-06 gnorm: 1.27 [23:53:04< 0:36:22] +[titan] 2025-10-05 22:27:27,027 - root - INFO - step: 39015 loss: 1.8935 memory: 118.84GiB(85.28%) tps: 30,304 tflops: 420.43 mfu: 42.51% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 22:27:27,028 - root - INFO - lr: 5.0687e-06 gnorm: 1.29 [23:53:15< 0:36:11] +[titan] 2025-10-05 22:27:37,873 - root - INFO - step: 39020 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 22:27:37,873 - root - INFO - lr: 5.0680e-06 gnorm: 1.25 [23:53:26< 0:36:00] +[titan] 2025-10-05 22:27:48,725 - root - INFO - step: 39025 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 22:27:48,725 - root - INFO - lr: 5.0673e-06 gnorm: 1.23 [23:53:36< 0:35:49] +[titan] 2025-10-05 22:27:59,585 - root - INFO - step: 39030 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7320 +[titan] 2025-10-05 22:27:59,585 - root - INFO - lr: 5.0666e-06 gnorm: 1.26 [23:53:47< 0:35:38] +[titan] 2025-10-05 22:28:10,411 - root - INFO - step: 39035 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:28:10,411 - root - INFO - lr: 5.0659e-06 gnorm: 1.29 [23:53:58< 0:35:26] +[titan] 2025-10-05 22:28:21,251 - root - INFO - step: 39040 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:28:21,251 - root - INFO - lr: 5.0652e-06 gnorm: 1.26 [23:54:09< 0:35:15] +[titan] 2025-10-05 22:28:32,077 - root - INFO - step: 39045 loss: 1.9016 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6812 +[titan] 2025-10-05 22:28:32,077 - root - INFO - lr: 5.0645e-06 gnorm: 1.24 [23:54:20< 0:35:04] +[titan] 2025-10-05 22:28:40,768 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:28:42,943 - root - INFO - step: 39050 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 22:28:42,943 - root - INFO - lr: 5.0639e-06 gnorm: 1.25 [23:54:31< 0:34:53] +[titan] 2025-10-05 22:28:53,779 - root - INFO - step: 39055 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 22:28:53,779 - root - INFO - lr: 5.0632e-06 gnorm: 1.27 [23:54:41< 0:34:42] +[titan] 2025-10-05 22:29:04,650 - root - INFO - step: 39060 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:29:04,650 - root - INFO - lr: 5.0625e-06 gnorm: 1.28 [23:54:52< 0:34:31] +[titan] 2025-10-05 22:29:15,481 - root - INFO - step: 39065 loss: 1.8892 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:29:15,481 - root - INFO - lr: 5.0619e-06 gnorm: 1.29 [23:55:03< 0:34:20] +[titan] 2025-10-05 22:29:26,319 - root - INFO - step: 39070 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7449 +[titan] 2025-10-05 22:29:26,319 - root - INFO - lr: 5.0612e-06 gnorm: 1.27 [23:55:14< 0:34:09] +[titan] 2025-10-05 22:29:37,169 - root - INFO - step: 39075 loss: 1.8711 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:29:37,169 - root - INFO - lr: 5.0606e-06 gnorm: 1.39 [23:55:25< 0:33:58] +[titan] 2025-10-05 22:29:47,983 - root - INFO - step: 39080 loss: 1.9585 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 22:29:47,983 - root - INFO - lr: 5.0599e-06 gnorm: 1.27 [23:55:36< 0:33:47] +[titan] 2025-10-05 22:29:58,811 - root - INFO - step: 39085 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 22:29:58,811 - root - INFO - lr: 5.0593e-06 gnorm: 1.28 [23:55:47< 0:33:36] +[titan] 2025-10-05 22:30:09,630 - root - INFO - step: 39090 loss: 1.8996 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:30:09,630 - root - INFO - lr: 5.0586e-06 gnorm: 1.26 [23:55:57< 0:33:25] +[titan] 2025-10-05 22:30:20,468 - root - INFO - step: 39095 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6810 +[titan] 2025-10-05 22:30:20,469 - root - INFO - lr: 5.0580e-06 gnorm: 1.24 [23:56:08< 0:33:14] +[titan] 2025-10-05 22:30:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:30:31,293 - root - INFO - step: 39100 loss: 1.9874 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:30:31,293 - root - INFO - lr: 5.0573e-06 gnorm: 1.34 [23:56:19< 0:33:03] +[titan] 2025-10-05 22:30:42,362 - root - INFO - step: 39105 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,605 tflops: 410.73 mfu: 41.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:30:42,362 - root - INFO - lr: 5.0567e-06 gnorm: 1.26 [23:56:30< 0:32:52] +[titan] 2025-10-05 22:30:53,217 - root - INFO - step: 39110 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2156 global_avg_mtp_loss: 1.6453 +[titan] 2025-10-05 22:30:53,217 - root - INFO - lr: 5.0561e-06 gnorm: 1.23 [23:56:41< 0:32:41] +[titan] 2025-10-05 22:31:04,043 - root - INFO - step: 39115 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 22:31:04,043 - root - INFO - lr: 5.0554e-06 gnorm: 1.23 [23:56:52< 0:32:30] +[titan] 2025-10-05 22:31:14,877 - root - INFO - step: 39120 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 22:31:14,877 - root - INFO - lr: 5.0548e-06 gnorm: 1.28 [23:57:03< 0:32:19] +[titan] 2025-10-05 22:31:25,759 - root - INFO - step: 39125 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 22:31:25,759 - root - INFO - lr: 5.0542e-06 gnorm: 1.27 [23:57:13< 0:32:08] +[titan] 2025-10-05 22:31:36,579 - root - INFO - step: 39130 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 22:31:36,579 - root - INFO - lr: 5.0536e-06 gnorm: 1.28 [23:57:24< 0:31:57] +[titan] 2025-10-05 22:31:47,420 - root - INFO - step: 39135 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7389 +[titan] 2025-10-05 22:31:47,420 - root - INFO - lr: 5.0530e-06 gnorm: 1.29 [23:57:35< 0:31:46] +[titan] 2025-10-05 22:31:58,260 - root - INFO - step: 39140 loss: 1.9505 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7258 +[titan] 2025-10-05 22:31:58,260 - root - INFO - lr: 5.0523e-06 gnorm: 1.27 [23:57:46< 0:31:35] +[titan] 2025-10-05 22:32:09,071 - root - INFO - step: 39145 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 22:32:09,071 - root - INFO - lr: 5.0517e-06 gnorm: 1.24 [23:57:57< 0:31:24] +[titan] 2025-10-05 22:32:17,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:32:19,885 - root - INFO - step: 39150 loss: 1.8924 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:32:19,885 - root - INFO - lr: 5.0511e-06 gnorm: 1.26 [23:58:08< 0:31:13] +[titan] 2025-10-05 22:32:30,741 - root - INFO - step: 39155 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:32:30,741 - root - INFO - lr: 5.0505e-06 gnorm: 1.26 [23:58:18< 0:31:02] +[titan] 2025-10-05 22:32:41,618 - root - INFO - step: 39160 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 22:32:41,618 - root - INFO - lr: 5.0499e-06 gnorm: 1.27 [23:58:29< 0:30:51] +[titan] 2025-10-05 22:32:52,420 - root - INFO - step: 39165 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,335 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7041 +[titan] 2025-10-05 22:32:52,421 - root - INFO - lr: 5.0493e-06 gnorm: 1.31 [23:58:40< 0:30:40] +[titan] 2025-10-05 22:33:03,241 - root - INFO - step: 39170 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6786 +[titan] 2025-10-05 22:33:03,241 - root - INFO - lr: 5.0488e-06 gnorm: 1.28 [23:58:51< 0:30:29] +[titan] 2025-10-05 22:33:14,059 - root - INFO - step: 39175 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:33:14,059 - root - INFO - lr: 5.0482e-06 gnorm: 1.25 [23:59:02< 0:30:18] +[titan] 2025-10-05 22:33:24,854 - root - INFO - step: 39180 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,356 tflops: 421.14 mfu: 42.58% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7066 +[titan] 2025-10-05 22:33:24,854 - root - INFO - lr: 5.0476e-06 gnorm: 1.25 [23:59:13< 0:30:07] +[titan] 2025-10-05 22:33:35,698 - root - INFO - step: 39185 loss: 1.8822 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6646 +[titan] 2025-10-05 22:33:35,698 - root - INFO - lr: 5.0470e-06 gnorm: 1.24 [23:59:23< 0:29:56] +[titan] 2025-10-05 22:33:46,541 - root - INFO - step: 39190 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 22:33:46,541 - root - INFO - lr: 5.0464e-06 gnorm: 1.26 [23:59:34< 0:29:45] +[titan] 2025-10-05 22:33:57,343 - root - INFO - step: 39195 loss: 1.8734 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.87 mfu: 42.56% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6567 +[titan] 2025-10-05 22:33:57,343 - root - INFO - lr: 5.0459e-06 gnorm: 1.26 [23:59:45< 0:29:34] +[titan] 2025-10-05 22:34:05,988 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:34:08,160 - root - INFO - step: 39200 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6909 +[titan] 2025-10-05 22:34:08,161 - root - INFO - lr: 5.0453e-06 gnorm: 1.24 [23:59:56< 0:29:23] +[titan] 2025-10-05 22:34:18,971 - root - INFO - step: 39205 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,312 tflops: 420.53 mfu: 42.52% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:34:18,971 - root - INFO - lr: 5.0447e-06 gnorm: 1.27 [1 day, 0:00:07< 0:29:12] +[titan] 2025-10-05 22:34:29,800 - root - INFO - step: 39210 loss: 1.8480 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 22:34:29,800 - root - INFO - lr: 5.0442e-06 gnorm: 1.20 [1 day, 0:00:17< 0:29:01] +[titan] 2025-10-05 22:34:40,603 - root - INFO - step: 39215 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 22:34:40,603 - root - INFO - lr: 5.0436e-06 gnorm: 1.28 [1 day, 0:00:28< 0:28:50] +[titan] 2025-10-05 22:34:51,467 - root - INFO - step: 39220 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6993 +[titan] 2025-10-05 22:34:51,467 - root - INFO - lr: 5.0431e-06 gnorm: 1.25 [1 day, 0:00:39< 0:28:39] +[titan] 2025-10-05 22:35:02,300 - root - INFO - step: 39225 loss: 1.9143 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 22:35:02,300 - root - INFO - lr: 5.0425e-06 gnorm: 1.26 [1 day, 0:00:50< 0:28:28] +[titan] 2025-10-05 22:35:13,119 - root - INFO - step: 39230 loss: 1.8713 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6548 +[titan] 2025-10-05 22:35:13,119 - root - INFO - lr: 5.0420e-06 gnorm: 1.29 [1 day, 0:01:01< 0:28:17] +[titan] 2025-10-05 22:35:23,922 - root - INFO - step: 39235 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,333 tflops: 420.83 mfu: 42.55% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6983 +[titan] 2025-10-05 22:35:23,922 - root - INFO - lr: 5.0414e-06 gnorm: 1.26 [1 day, 0:01:12< 0:28:06] +[titan] 2025-10-05 22:35:34,763 - root - INFO - step: 39240 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 22:35:34,763 - root - INFO - lr: 5.0409e-06 gnorm: 1.28 [1 day, 0:01:22< 0:27:55] +[titan] 2025-10-05 22:35:45,607 - root - INFO - step: 39245 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 22:35:45,608 - root - INFO - lr: 5.0403e-06 gnorm: 1.28 [1 day, 0:01:33< 0:27:43] +[titan] 2025-10-05 22:35:54,249 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:35:56,420 - root - INFO - step: 39250 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 22:35:56,420 - root - INFO - lr: 5.0398e-06 gnorm: 1.22 [1 day, 0:01:44< 0:27:32] +[titan] 2025-10-05 22:36:07,280 - root - INFO - step: 39255 loss: 1.8902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6708 +[titan] 2025-10-05 22:36:07,280 - root - INFO - lr: 5.0393e-06 gnorm: 1.24 [1 day, 0:01:55< 0:27:21] +[titan] 2025-10-05 22:36:18,098 - root - INFO - step: 39260 loss: 1.9171 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:36:18,098 - root - INFO - lr: 5.0388e-06 gnorm: 1.26 [1 day, 0:02:06< 0:27:10] +[titan] 2025-10-05 22:36:28,912 - root - INFO - step: 39265 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6622 +[titan] 2025-10-05 22:36:28,912 - root - INFO - lr: 5.0382e-06 gnorm: 1.27 [1 day, 0:02:17< 0:26:59] +[titan] 2025-10-05 22:36:39,738 - root - INFO - step: 39270 loss: 1.8621 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6463 +[titan] 2025-10-05 22:36:39,739 - root - INFO - lr: 5.0377e-06 gnorm: 1.26 [1 day, 0:02:27< 0:26:48] +[titan] 2025-10-05 22:36:50,600 - root - INFO - step: 39275 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 22:36:50,600 - root - INFO - lr: 5.0372e-06 gnorm: 1.28 [1 day, 0:02:38< 0:26:37] +[titan] 2025-10-05 22:37:01,420 - root - INFO - step: 39280 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6663 +[titan] 2025-10-05 22:37:01,421 - root - INFO - lr: 5.0367e-06 gnorm: 1.20 [1 day, 0:02:49< 0:26:26] +[titan] 2025-10-05 22:37:12,238 - root - INFO - step: 39285 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 22:37:12,238 - root - INFO - lr: 5.0362e-06 gnorm: 1.27 [1 day, 0:03:00< 0:26:15] +[titan] 2025-10-05 22:37:23,040 - root - INFO - step: 39290 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.87 mfu: 42.55% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 22:37:23,040 - root - INFO - lr: 5.0357e-06 gnorm: 1.26 [1 day, 0:03:11< 0:26:04] +[titan] 2025-10-05 22:37:33,840 - root - INFO - step: 39295 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:37:33,840 - root - INFO - lr: 5.0352e-06 gnorm: 1.36 [1 day, 0:03:22< 0:25:53] +[titan] 2025-10-05 22:37:42,487 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:37:44,699 - root - INFO - step: 39300 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6904 +[titan] 2025-10-05 22:37:44,699 - root - INFO - lr: 5.0347e-06 gnorm: 1.25 [1 day, 0:03:32< 0:25:42] +[titan] 2025-10-05 22:37:55,513 - root - INFO - step: 39305 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 22:37:55,513 - root - INFO - lr: 5.0342e-06 gnorm: 1.29 [1 day, 0:03:43< 0:25:31] +[titan] 2025-10-05 22:38:06,319 - root - INFO - step: 39310 loss: 1.8070 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2093 global_avg_mtp_loss: 1.5976 +[titan] 2025-10-05 22:38:06,319 - root - INFO - lr: 5.0337e-06 gnorm: 1.22 [1 day, 0:03:54< 0:25:20] +[titan] 2025-10-05 22:38:17,140 - root - INFO - step: 39315 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 22:38:17,140 - root - INFO - lr: 5.0332e-06 gnorm: 1.28 [1 day, 0:04:05< 0:25:09] +[titan] 2025-10-05 22:38:27,940 - root - INFO - step: 39320 loss: 1.8952 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:38:27,940 - root - INFO - lr: 5.0327e-06 gnorm: 1.24 [1 day, 0:04:16< 0:24:58] +[titan] 2025-10-05 22:38:38,794 - root - INFO - step: 39325 loss: 1.8206 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2106 global_avg_mtp_loss: 1.6101 +[titan] 2025-10-05 22:38:38,795 - root - INFO - lr: 5.0323e-06 gnorm: 1.23 [1 day, 0:04:26< 0:24:47] +[titan] 2025-10-05 22:38:49,643 - root - INFO - step: 39330 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7124 +[titan] 2025-10-05 22:38:49,643 - root - INFO - lr: 5.0318e-06 gnorm: 1.29 [1 day, 0:04:37< 0:24:36] +[titan] 2025-10-05 22:39:00,463 - root - INFO - step: 39335 loss: 1.9117 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:39:00,464 - root - INFO - lr: 5.0313e-06 gnorm: 1.25 [1 day, 0:04:48< 0:24:25] +[titan] 2025-10-05 22:39:11,289 - root - INFO - step: 39340 loss: 1.8200 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2105 global_avg_mtp_loss: 1.6095 +[titan] 2025-10-05 22:39:11,289 - root - INFO - lr: 5.0308e-06 gnorm: 1.26 [1 day, 0:04:59< 0:24:14] +[titan] 2025-10-05 22:39:22,114 - root - INFO - step: 39345 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:39:22,114 - root - INFO - lr: 5.0304e-06 gnorm: 1.27 [1 day, 0:05:10< 0:24:03] +[titan] 2025-10-05 22:39:30,839 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:39:33,015 - root - INFO - step: 39350 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 22:39:33,015 - root - INFO - lr: 5.0299e-06 gnorm: 1.24 [1 day, 0:05:21< 0:23:52] +[titan] 2025-10-05 22:39:43,880 - root - INFO - step: 39355 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 22:39:43,880 - root - INFO - lr: 5.0294e-06 gnorm: 1.21 [1 day, 0:05:32< 0:23:41] +[titan] 2025-10-05 22:39:54,779 - root - INFO - step: 39360 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2123 global_avg_mtp_loss: 1.6258 +[titan] 2025-10-05 22:39:54,779 - root - INFO - lr: 5.0290e-06 gnorm: 1.26 [1 day, 0:05:42< 0:23:30] +[titan] 2025-10-05 22:40:05,616 - root - INFO - step: 39365 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 22:40:05,617 - root - INFO - lr: 5.0285e-06 gnorm: 1.22 [1 day, 0:05:53< 0:23:19] +[titan] 2025-10-05 22:40:16,459 - root - INFO - step: 39370 loss: 1.8828 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:40:16,459 - root - INFO - lr: 5.0281e-06 gnorm: 1.23 [1 day, 0:06:04< 0:23:08] +[titan] 2025-10-05 22:40:27,280 - root - INFO - step: 39375 loss: 1.9073 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 22:40:27,280 - root - INFO - lr: 5.0277e-06 gnorm: 1.28 [1 day, 0:06:15< 0:22:57] +[titan] 2025-10-05 22:40:38,119 - root - INFO - step: 39380 loss: 1.9206 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:40:38,119 - root - INFO - lr: 5.0272e-06 gnorm: 1.23 [1 day, 0:06:26< 0:22:46] +[titan] 2025-10-05 22:40:49,118 - root - INFO - step: 39385 loss: 1.9186 memory: 118.84GiB(85.28%) tps: 29,794 tflops: 413.35 mfu: 41.79% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6970 +[titan] 2025-10-05 22:40:49,118 - root - INFO - lr: 5.0268e-06 gnorm: 1.25 [1 day, 0:06:37< 0:22:35] +[titan] 2025-10-05 22:40:59,990 - root - INFO - step: 39390 loss: 1.9410 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 22:40:59,990 - root - INFO - lr: 5.0263e-06 gnorm: 1.30 [1 day, 0:06:48< 0:22:24] +[titan] 2025-10-05 22:41:10,818 - root - INFO - step: 39395 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6520 +[titan] 2025-10-05 22:41:10,818 - root - INFO - lr: 5.0259e-06 gnorm: 1.23 [1 day, 0:06:58< 0:22:13] +[titan] 2025-10-05 22:41:19,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:41:21,663 - root - INFO - step: 39400 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:41:21,663 - root - INFO - lr: 5.0255e-06 gnorm: 1.24 [1 day, 0:07:09< 0:22:02] +[titan] 2025-10-05 22:41:32,499 - root - INFO - step: 39405 loss: 1.8950 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:41:32,500 - root - INFO - lr: 5.0251e-06 gnorm: 1.26 [1 day, 0:07:20< 0:21:51] +[titan] 2025-10-05 22:41:43,338 - root - INFO - step: 39410 loss: 1.9067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:41:43,339 - root - INFO - lr: 5.0246e-06 gnorm: 1.25 [1 day, 0:07:31< 0:21:40] +[titan] 2025-10-05 22:41:54,271 - root - INFO - step: 39415 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 22:41:54,271 - root - INFO - lr: 5.0242e-06 gnorm: 1.26 [1 day, 0:07:42< 0:21:29] +[titan] 2025-10-05 22:42:05,121 - root - INFO - step: 39420 loss: 1.8925 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6733 +[titan] 2025-10-05 22:42:05,121 - root - INFO - lr: 5.0238e-06 gnorm: 1.27 [1 day, 0:07:53< 0:21:18] +[titan] 2025-10-05 22:42:14,074 - root - INFO - Dumping profiler traces at step 39424 +[titan] 2025-10-05 22:42:14,111 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:42:16,301 - root - INFO - step: 39425 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 29,310 tflops: 406.64 mfu: 41.12% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 22:42:16,301 - root - INFO - lr: 5.0234e-06 gnorm: 1.27 [1 day, 0:08:04< 0:21:07] +[titan] 2025-10-05 22:42:27,154 - root - INFO - step: 39430 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 22:42:27,155 - root - INFO - lr: 5.0230e-06 gnorm: 1.25 [1 day, 0:08:15< 0:20:56] +[titan] 2025-10-05 22:42:37,960 - root - INFO - step: 39435 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 22:42:37,960 - root - INFO - lr: 5.0226e-06 gnorm: 1.27 [1 day, 0:08:26< 0:20:45] +[titan] 2025-10-05 22:42:48,874 - root - INFO - step: 39440 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:42:48,874 - root - INFO - lr: 5.0222e-06 gnorm: 1.24 [1 day, 0:08:37< 0:20:34] +[titan] 2025-10-05 22:42:59,759 - root - INFO - step: 39445 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 22:42:59,759 - root - INFO - lr: 5.0218e-06 gnorm: 1.26 [1 day, 0:08:47< 0:20:23] +[titan] 2025-10-05 22:43:08,435 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:43:10,640 - root - INFO - step: 39450 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 22:43:10,641 - root - INFO - lr: 5.0214e-06 gnorm: 1.28 [1 day, 0:08:58< 0:20:12] +[titan] 2025-10-05 22:43:21,477 - root - INFO - step: 39455 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:43:21,478 - root - INFO - lr: 5.0210e-06 gnorm: 1.32 [1 day, 0:09:09< 0:20:01] +[titan] 2025-10-05 22:43:32,319 - root - INFO - step: 39460 loss: 1.9474 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7223 +[titan] 2025-10-05 22:43:32,319 - root - INFO - lr: 5.0206e-06 gnorm: 1.30 [1 day, 0:09:20< 0:19:50] +[titan] 2025-10-05 22:43:43,178 - root - INFO - step: 39465 loss: 1.8880 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6701 +[titan] 2025-10-05 22:43:43,178 - root - INFO - lr: 5.0203e-06 gnorm: 1.28 [1 day, 0:09:31< 0:19:39] +[titan] 2025-10-05 22:43:54,102 - root - INFO - step: 39470 loss: 1.8901 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6714 +[titan] 2025-10-05 22:43:54,102 - root - INFO - lr: 5.0199e-06 gnorm: 1.25 [1 day, 0:09:42< 0:19:27] +[titan] 2025-10-05 22:44:04,938 - root - INFO - step: 39475 loss: 1.8656 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6497 +[titan] 2025-10-05 22:44:04,938 - root - INFO - lr: 5.0195e-06 gnorm: 1.23 [1 day, 0:09:53< 0:19:16] +[titan] 2025-10-05 22:44:15,757 - root - INFO - step: 39480 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 22:44:15,757 - root - INFO - lr: 5.0191e-06 gnorm: 1.27 [1 day, 0:10:03< 0:19:05] +[titan] 2025-10-05 22:44:26,640 - root - INFO - step: 39485 loss: 1.8523 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6375 +[titan] 2025-10-05 22:44:26,640 - root - INFO - lr: 5.0188e-06 gnorm: 1.23 [1 day, 0:10:14< 0:18:54] +[titan] 2025-10-05 22:44:37,455 - root - INFO - step: 39490 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6979 +[titan] 2025-10-05 22:44:37,455 - root - INFO - lr: 5.0184e-06 gnorm: 1.26 [1 day, 0:10:25< 0:18:43] +[titan] 2025-10-05 22:44:48,278 - root - INFO - step: 39495 loss: 1.8271 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2112 global_avg_mtp_loss: 1.6159 +[titan] 2025-10-05 22:44:48,278 - root - INFO - lr: 5.0181e-06 gnorm: 1.24 [1 day, 0:10:36< 0:18:32] +[titan] 2025-10-05 22:44:57,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:44:59,302 - root - INFO - step: 39500 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 29,726 tflops: 412.41 mfu: 41.70% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:44:59,302 - root - INFO - lr: 5.0177e-06 gnorm: 1.26 [1 day, 0:10:47< 0:18:21] +[titan] 2025-10-05 22:45:10,138 - root - INFO - step: 39505 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:45:10,138 - root - INFO - lr: 5.0173e-06 gnorm: 1.25 [1 day, 0:10:58< 0:18:10] +[titan] 2025-10-05 22:45:20,973 - root - INFO - step: 39510 loss: 1.9394 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 22:45:20,973 - root - INFO - lr: 5.0170e-06 gnorm: 1.26 [1 day, 0:11:09< 0:17:59] +[titan] 2025-10-05 22:45:31,857 - root - INFO - step: 39515 loss: 1.8345 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6215 +[titan] 2025-10-05 22:45:31,857 - root - INFO - lr: 5.0167e-06 gnorm: 1.32 [1 day, 0:11:20< 0:17:48] +[titan] 2025-10-05 22:45:42,715 - root - INFO - step: 39520 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 22:45:42,715 - root - INFO - lr: 5.0163e-06 gnorm: 1.30 [1 day, 0:11:30< 0:17:37] +[titan] 2025-10-05 22:45:53,618 - root - INFO - step: 39525 loss: 1.8642 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 22:45:53,618 - root - INFO - lr: 5.0160e-06 gnorm: 1.23 [1 day, 0:11:41< 0:17:26] +[titan] 2025-10-05 22:46:04,478 - root - INFO - step: 39530 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:46:04,478 - root - INFO - lr: 5.0156e-06 gnorm: 1.28 [1 day, 0:11:52< 0:17:15] +[titan] 2025-10-05 22:46:15,353 - root - INFO - step: 39535 loss: 1.8455 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6312 +[titan] 2025-10-05 22:46:15,353 - root - INFO - lr: 5.0153e-06 gnorm: 1.27 [1 day, 0:12:03< 0:17:04] +[titan] 2025-10-05 22:46:26,197 - root - INFO - step: 39540 loss: 1.8853 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 22:46:26,197 - root - INFO - lr: 5.0150e-06 gnorm: 1.25 [1 day, 0:12:14< 0:16:53] +[titan] 2025-10-05 22:46:37,052 - root - INFO - step: 39545 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 22:46:37,052 - root - INFO - lr: 5.0147e-06 gnorm: 1.30 [1 day, 0:12:25< 0:16:42] +[titan] 2025-10-05 22:46:45,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:46:47,965 - root - INFO - step: 39550 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 22:46:47,966 - root - INFO - lr: 5.0143e-06 gnorm: 1.34 [1 day, 0:12:36< 0:16:31] +[titan] 2025-10-05 22:46:58,895 - root - INFO - step: 39555 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6838 +[titan] 2025-10-05 22:46:58,895 - root - INFO - lr: 5.0140e-06 gnorm: 1.24 [1 day, 0:12:47< 0:16:20] +[titan] 2025-10-05 22:47:09,746 - root - INFO - step: 39560 loss: 1.9366 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7128 +[titan] 2025-10-05 22:47:09,746 - root - INFO - lr: 5.0137e-06 gnorm: 1.22 [1 day, 0:12:57< 0:16:09] +[titan] 2025-10-05 22:47:20,590 - root - INFO - step: 39565 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 22:47:20,590 - root - INFO - lr: 5.0134e-06 gnorm: 1.26 [1 day, 0:13:08< 0:15:58] +[titan] 2025-10-05 22:47:31,450 - root - INFO - step: 39570 loss: 1.8471 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6330 +[titan] 2025-10-05 22:47:31,450 - root - INFO - lr: 5.0131e-06 gnorm: 1.24 [1 day, 0:13:19< 0:15:47] +[titan] 2025-10-05 22:47:42,294 - root - INFO - step: 39575 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:47:42,294 - root - INFO - lr: 5.0128e-06 gnorm: 1.26 [1 day, 0:13:30< 0:15:36] +[titan] 2025-10-05 22:47:53,185 - root - INFO - step: 39580 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 22:47:53,186 - root - INFO - lr: 5.0125e-06 gnorm: 1.24 [1 day, 0:13:41< 0:15:25] +[titan] 2025-10-05 22:48:04,125 - root - INFO - step: 39585 loss: 1.8977 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6775 +[titan] 2025-10-05 22:48:04,125 - root - INFO - lr: 5.0122e-06 gnorm: 1.23 [1 day, 0:13:52< 0:15:14] +[titan] 2025-10-05 22:48:14,970 - root - INFO - step: 39590 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7450 +[titan] 2025-10-05 22:48:14,970 - root - INFO - lr: 5.0119e-06 gnorm: 1.27 [1 day, 0:14:03< 0:15:03] +[titan] 2025-10-05 22:48:25,818 - root - INFO - step: 39595 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 22:48:25,818 - root - INFO - lr: 5.0116e-06 gnorm: 1.25 [1 day, 0:14:13< 0:14:52] +[titan] 2025-10-05 22:48:34,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:48:36,655 - root - INFO - step: 39600 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:48:36,655 - root - INFO - lr: 5.0113e-06 gnorm: 1.26 [1 day, 0:14:24< 0:14:41] +[titan] 2025-10-05 22:48:47,505 - root - INFO - step: 39605 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:48:47,505 - root - INFO - lr: 5.0110e-06 gnorm: 1.25 [1 day, 0:14:35< 0:14:30] +[titan] 2025-10-05 22:48:58,396 - root - INFO - step: 39610 loss: 1.9266 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7045 +[titan] 2025-10-05 22:48:58,396 - root - INFO - lr: 5.0108e-06 gnorm: 1.27 [1 day, 0:14:46< 0:14:19] +[titan] 2025-10-05 22:49:09,249 - root - INFO - step: 39615 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:49:09,249 - root - INFO - lr: 5.0105e-06 gnorm: 1.28 [1 day, 0:14:57< 0:14:08] +[titan] 2025-10-05 22:49:20,110 - root - INFO - step: 39620 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 22:49:20,110 - root - INFO - lr: 5.0102e-06 gnorm: 1.26 [1 day, 0:15:08< 0:13:57] +[titan] 2025-10-05 22:49:30,958 - root - INFO - step: 39625 loss: 1.9163 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 22:49:30,958 - root - INFO - lr: 5.0100e-06 gnorm: 1.28 [1 day, 0:15:19< 0:13:46] +[titan] 2025-10-05 22:49:41,804 - root - INFO - step: 39630 loss: 1.8829 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:49:41,804 - root - INFO - lr: 5.0097e-06 gnorm: 1.26 [1 day, 0:15:29< 0:13:35] +[titan] 2025-10-05 22:49:52,655 - root - INFO - step: 39635 loss: 1.8627 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6476 +[titan] 2025-10-05 22:49:52,655 - root - INFO - lr: 5.0094e-06 gnorm: 1.24 [1 day, 0:15:40< 0:13:24] +[titan] 2025-10-05 22:50:03,525 - root - INFO - step: 39640 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 22:50:03,526 - root - INFO - lr: 5.0092e-06 gnorm: 1.26 [1 day, 0:15:51< 0:13:13] +[titan] 2025-10-05 22:50:14,446 - root - INFO - step: 39645 loss: 1.8260 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2114 global_avg_mtp_loss: 1.6146 +[titan] 2025-10-05 22:50:14,446 - root - INFO - lr: 5.0089e-06 gnorm: 1.27 [1 day, 0:16:02< 0:13:02] +[titan] 2025-10-05 22:50:23,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:50:25,308 - root - INFO - step: 39650 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7039 +[titan] 2025-10-05 22:50:25,308 - root - INFO - lr: 5.0087e-06 gnorm: 1.28 [1 day, 0:16:13< 0:12:51] +[titan] 2025-10-05 22:50:36,173 - root - INFO - step: 39655 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:50:36,173 - root - INFO - lr: 5.0084e-06 gnorm: 1.26 [1 day, 0:16:24< 0:12:40] +[titan] 2025-10-05 22:50:47,037 - root - INFO - step: 39660 loss: 1.9006 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:50:47,037 - root - INFO - lr: 5.0082e-06 gnorm: 1.27 [1 day, 0:16:35< 0:12:29] +[titan] 2025-10-05 22:50:57,913 - root - INFO - step: 39665 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 22:50:57,914 - root - INFO - lr: 5.0079e-06 gnorm: 1.29 [1 day, 0:16:46< 0:12:18] +[titan] 2025-10-05 22:51:08,776 - root - INFO - step: 39670 loss: 1.8655 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 22:51:08,776 - root - INFO - lr: 5.0077e-06 gnorm: 1.30 [1 day, 0:16:56< 0:12:07] +[titan] 2025-10-05 22:51:19,670 - root - INFO - step: 39675 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:51:19,670 - root - INFO - lr: 5.0075e-06 gnorm: 1.25 [1 day, 0:17:07< 0:11:56] +[titan] 2025-10-05 22:51:30,542 - root - INFO - step: 39680 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 22:51:30,542 - root - INFO - lr: 5.0072e-06 gnorm: 1.27 [1 day, 0:17:18< 0:11:45] +[titan] 2025-10-05 22:51:41,391 - root - INFO - step: 39685 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:51:41,391 - root - INFO - lr: 5.0070e-06 gnorm: 1.27 [1 day, 0:17:29< 0:11:34] +[titan] 2025-10-05 22:51:52,276 - root - INFO - step: 39690 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:51:52,276 - root - INFO - lr: 5.0068e-06 gnorm: 1.25 [1 day, 0:17:40< 0:11:23] +[titan] 2025-10-05 22:52:03,144 - root - INFO - step: 39695 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 22:52:03,144 - root - INFO - lr: 5.0066e-06 gnorm: 1.27 [1 day, 0:17:51< 0:11:12] +[titan] 2025-10-05 22:52:11,813 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:52:13,994 - root - INFO - step: 39700 loss: 1.9249 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 22:52:13,994 - root - INFO - lr: 5.0064e-06 gnorm: 1.23 [1 day, 0:18:02< 0:11:01] +[titan] 2025-10-05 22:52:24,851 - root - INFO - step: 39705 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6876 +[titan] 2025-10-05 22:52:24,852 - root - INFO - lr: 5.0062e-06 gnorm: 1.27 [1 day, 0:18:12< 0:10:50] +[titan] 2025-10-05 22:52:35,763 - root - INFO - step: 39710 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6727 +[titan] 2025-10-05 22:52:35,763 - root - INFO - lr: 5.0060e-06 gnorm: 1.30 [1 day, 0:18:23< 0:10:39] +[titan] 2025-10-05 22:52:46,625 - root - INFO - step: 39715 loss: 1.8269 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2119 global_avg_mtp_loss: 1.6151 +[titan] 2025-10-05 22:52:46,625 - root - INFO - lr: 5.0058e-06 gnorm: 1.25 [1 day, 0:18:34< 0:10:28] +[titan] 2025-10-05 22:52:57,517 - root - INFO - step: 39720 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:52:57,517 - root - INFO - lr: 5.0056e-06 gnorm: 1.26 [1 day, 0:18:45< 0:10:16] +[titan] 2025-10-05 22:53:08,394 - root - INFO - step: 39725 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 22:53:08,395 - root - INFO - lr: 5.0054e-06 gnorm: 1.27 [1 day, 0:18:56< 0:10:05] +[titan] 2025-10-05 22:53:19,248 - root - INFO - step: 39730 loss: 1.8733 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:53:19,248 - root - INFO - lr: 5.0052e-06 gnorm: 1.26 [1 day, 0:19:07< 0:09:54] +[titan] 2025-10-05 22:53:30,094 - root - INFO - step: 39735 loss: 1.8701 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:53:30,094 - root - INFO - lr: 5.0050e-06 gnorm: 1.25 [1 day, 0:19:18< 0:09:43] +[titan] 2025-10-05 22:53:40,977 - root - INFO - step: 39740 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 22:53:40,977 - root - INFO - lr: 5.0048e-06 gnorm: 1.28 [1 day, 0:19:29< 0:09:32] +[titan] 2025-10-05 22:53:51,816 - root - INFO - step: 39745 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 22:53:51,816 - root - INFO - lr: 5.0046e-06 gnorm: 1.30 [1 day, 0:19:39< 0:09:21] +[titan] 2025-10-05 22:54:00,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:54:02,694 - root - INFO - step: 39750 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 22:54:02,694 - root - INFO - lr: 5.0044e-06 gnorm: 1.25 [1 day, 0:19:50< 0:09:10] +[titan] 2025-10-05 22:54:13,563 - root - INFO - step: 39755 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 22:54:13,563 - root - INFO - lr: 5.0042e-06 gnorm: 1.31 [1 day, 0:20:01< 0:08:59] +[titan] 2025-10-05 22:54:24,438 - root - INFO - step: 39760 loss: 1.8623 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6459 +[titan] 2025-10-05 22:54:24,438 - root - INFO - lr: 5.0041e-06 gnorm: 1.23 [1 day, 0:20:12< 0:08:48] +[titan] 2025-10-05 22:54:35,297 - root - INFO - step: 39765 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:54:35,297 - root - INFO - lr: 5.0039e-06 gnorm: 1.29 [1 day, 0:20:23< 0:08:37] +[titan] 2025-10-05 22:54:46,209 - root - INFO - step: 39770 loss: 1.8709 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6539 +[titan] 2025-10-05 22:54:46,209 - root - INFO - lr: 5.0037e-06 gnorm: 1.26 [1 day, 0:20:34< 0:08:26] +[titan] 2025-10-05 22:54:57,101 - root - INFO - step: 39775 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 22:54:57,101 - root - INFO - lr: 5.0036e-06 gnorm: 1.27 [1 day, 0:20:45< 0:08:15] +[titan] 2025-10-05 22:55:08,022 - root - INFO - step: 39780 loss: 1.9966 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7659 +[titan] 2025-10-05 22:55:08,023 - root - INFO - lr: 5.0034e-06 gnorm: 1.31 [1 day, 0:20:56< 0:08:04] +[titan] 2025-10-05 22:55:18,865 - root - INFO - step: 39785 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:55:18,865 - root - INFO - lr: 5.0033e-06 gnorm: 1.29 [1 day, 0:21:06< 0:07:53] +[titan] 2025-10-05 22:55:29,721 - root - INFO - step: 39790 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:55:29,721 - root - INFO - lr: 5.0031e-06 gnorm: 1.28 [1 day, 0:21:17< 0:07:42] +[titan] 2025-10-05 22:55:40,588 - root - INFO - step: 39795 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 22:55:40,588 - root - INFO - lr: 5.0030e-06 gnorm: 1.25 [1 day, 0:21:28< 0:07:31] +[titan] 2025-10-05 22:55:49,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:55:51,443 - root - INFO - step: 39800 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:55:51,443 - root - INFO - lr: 5.0028e-06 gnorm: 1.25 [1 day, 0:21:39< 0:07:20] +[titan] 2025-10-05 22:56:02,382 - root - INFO - step: 39805 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6609 +[titan] 2025-10-05 22:56:02,382 - root - INFO - lr: 5.0027e-06 gnorm: 1.21 [1 day, 0:21:50< 0:07:09] +[titan] 2025-10-05 22:56:13,242 - root - INFO - step: 39810 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:56:13,242 - root - INFO - lr: 5.0026e-06 gnorm: 1.28 [1 day, 0:22:01< 0:06:58] +[titan] 2025-10-05 22:56:24,068 - root - INFO - step: 39815 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6926 +[titan] 2025-10-05 22:56:24,068 - root - INFO - lr: 5.0024e-06 gnorm: 1.28 [1 day, 0:22:12< 0:06:47] +[titan] 2025-10-05 22:56:34,883 - root - INFO - step: 39820 loss: 1.8589 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6448 +[titan] 2025-10-05 22:56:34,883 - root - INFO - lr: 5.0023e-06 gnorm: 1.23 [1 day, 0:22:23< 0:06:36] +[titan] 2025-10-05 22:56:45,691 - root - INFO - step: 39825 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6254 +[titan] 2025-10-05 22:56:45,691 - root - INFO - lr: 5.0022e-06 gnorm: 1.24 [1 day, 0:22:33< 0:06:25] +[titan] 2025-10-05 22:56:56,537 - root - INFO - step: 39830 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:56:56,537 - root - INFO - lr: 5.0020e-06 gnorm: 1.29 [1 day, 0:22:44< 0:06:14] +[titan] 2025-10-05 22:57:07,418 - root - INFO - step: 39835 loss: 1.8289 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2110 global_avg_mtp_loss: 1.6179 +[titan] 2025-10-05 22:57:07,419 - root - INFO - lr: 5.0019e-06 gnorm: 1.25 [1 day, 0:22:55< 0:06:03] +[titan] 2025-10-05 22:57:18,260 - root - INFO - step: 39840 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6716 +[titan] 2025-10-05 22:57:18,260 - root - INFO - lr: 5.0018e-06 gnorm: 1.28 [1 day, 0:23:06< 0:05:52] +[titan] 2025-10-05 22:57:29,092 - root - INFO - step: 39845 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:57:29,092 - root - INFO - lr: 5.0017e-06 gnorm: 1.31 [1 day, 0:23:17< 0:05:41] +[titan] 2025-10-05 22:57:37,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:57:39,898 - root - INFO - step: 39850 loss: 1.8816 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 22:57:39,899 - root - INFO - lr: 5.0016e-06 gnorm: 1.24 [1 day, 0:23:28< 0:05:30] +[titan] 2025-10-05 22:57:50,741 - root - INFO - step: 39855 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 22:57:50,741 - root - INFO - lr: 5.0015e-06 gnorm: 1.32 [1 day, 0:23:38< 0:05:19] +[titan] 2025-10-05 22:58:01,598 - root - INFO - step: 39860 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 22:58:01,598 - root - INFO - lr: 5.0014e-06 gnorm: 1.33 [1 day, 0:23:49< 0:05:08] +[titan] 2025-10-05 22:58:12,433 - root - INFO - step: 39865 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6621 +[titan] 2025-10-05 22:58:12,433 - root - INFO - lr: 5.0013e-06 gnorm: 1.27 [1 day, 0:24:00< 0:04:57] +[titan] 2025-10-05 22:58:23,320 - root - INFO - step: 39870 loss: 1.8085 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2094 global_avg_mtp_loss: 1.5991 +[titan] 2025-10-05 22:58:23,320 - root - INFO - lr: 5.0012e-06 gnorm: 1.27 [1 day, 0:24:11< 0:04:46] +[titan] 2025-10-05 22:58:34,151 - root - INFO - step: 39875 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7171 +[titan] 2025-10-05 22:58:34,151 - root - INFO - lr: 5.0011e-06 gnorm: 1.29 [1 day, 0:24:22< 0:04:35] +[titan] 2025-10-05 22:58:44,982 - root - INFO - step: 39880 loss: 1.8617 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2163 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 22:58:44,982 - root - INFO - lr: 5.0010e-06 gnorm: 1.24 [1 day, 0:24:33< 0:04:24] +[titan] 2025-10-05 22:58:55,801 - root - INFO - step: 39885 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:58:55,801 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:43< 0:04:13] +[titan] 2025-10-05 22:59:06,655 - root - INFO - step: 39890 loss: 1.8466 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 22:59:06,655 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:54< 0:04:02] +[titan] 2025-10-05 22:59:17,499 - root - INFO - step: 39895 loss: 1.9303 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 22:59:17,499 - root - INFO - lr: 5.0008e-06 gnorm: 1.27 [1 day, 0:25:05< 0:03:51] +[titan] 2025-10-05 22:59:26,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:59:28,385 - root - INFO - step: 39900 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:59:28,385 - root - INFO - lr: 5.0007e-06 gnorm: 1.29 [1 day, 0:25:16< 0:03:40] +[titan] 2025-10-05 22:59:39,223 - root - INFO - step: 39905 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6730 +[titan] 2025-10-05 22:59:39,223 - root - INFO - lr: 5.0006e-06 gnorm: 1.25 [1 day, 0:25:27< 0:03:29] +[titan] 2025-10-05 22:59:50,050 - root - INFO - step: 39910 loss: 1.9026 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6824 +[titan] 2025-10-05 22:59:50,051 - root - INFO - lr: 5.0006e-06 gnorm: 1.33 [1 day, 0:25:38< 0:03:18] +[titan] 2025-10-05 23:00:00,881 - root - INFO - step: 39915 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 23:00:00,881 - root - INFO - lr: 5.0005e-06 gnorm: 1.25 [1 day, 0:25:48< 0:03:07] +[titan] 2025-10-05 23:00:11,722 - root - INFO - step: 39920 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 23:00:11,722 - root - INFO - lr: 5.0005e-06 gnorm: 1.23 [1 day, 0:25:59< 0:02:56] +[titan] 2025-10-05 23:00:22,583 - root - INFO - step: 39925 loss: 1.8682 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 23:00:22,583 - root - INFO - lr: 5.0004e-06 gnorm: 1.24 [1 day, 0:26:10< 0:02:45] +[titan] 2025-10-05 23:00:33,459 - root - INFO - step: 39930 loss: 1.8937 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 23:00:33,459 - root - INFO - lr: 5.0003e-06 gnorm: 1.28 [1 day, 0:26:21< 0:02:34] +[titan] 2025-10-05 23:00:44,397 - root - INFO - step: 39935 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 23:00:44,397 - root - INFO - lr: 5.0003e-06 gnorm: 1.37 [1 day, 0:26:32< 0:02:23] +[titan] 2025-10-05 23:00:46,750 - root - INFO - Dumping profiler traces at step 39936 +[titan] 2025-10-05 23:00:46,787 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 23:00:55,477 - root - INFO - step: 39940 loss: 1.9007 memory: 118.84GiB(85.28%) tps: 29,576 tflops: 410.32 mfu: 41.49% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 23:00:55,477 - root - INFO - lr: 5.0003e-06 gnorm: 1.22 [1 day, 0:26:43< 0:02:12] +[titan] 2025-10-05 23:01:06,304 - root - INFO - step: 39945 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 23:01:06,305 - root - INFO - lr: 5.0002e-06 gnorm: 1.26 [1 day, 0:26:54< 0:02:01] +[titan] 2025-10-05 23:01:14,966 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:01:17,145 - root - INFO - step: 39950 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6639 +[titan] 2025-10-05 23:01:17,145 - root - INFO - lr: 5.0002e-06 gnorm: 1.25 [1 day, 0:27:05< 0:01:50] +[titan] 2025-10-05 23:01:28,000 - root - INFO - step: 39955 loss: 1.8456 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2134 global_avg_mtp_loss: 1.6322 +[titan] 2025-10-05 23:01:28,000 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:16< 0:01:39] +[titan] 2025-10-05 23:01:38,823 - root - INFO - step: 39960 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 23:01:38,823 - root - INFO - lr: 5.0001e-06 gnorm: 1.26 [1 day, 0:27:26< 0:01:28] +[titan] 2025-10-05 23:01:49,702 - root - INFO - step: 39965 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6498 +[titan] 2025-10-05 23:01:49,702 - root - INFO - lr: 5.0001e-06 gnorm: 1.29 [1 day, 0:27:37< 0:01:17] +[titan] 2025-10-05 23:02:00,536 - root - INFO - step: 39970 loss: 1.8845 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 23:02:00,536 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:48< 0:01:06] +[titan] 2025-10-05 23:02:11,385 - root - INFO - step: 39975 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6632 +[titan] 2025-10-05 23:02:11,385 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:27:59< 0:00:55] +[titan] 2025-10-05 23:02:22,224 - root - INFO - step: 39980 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 23:02:22,224 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:10< 0:00:44] +[titan] 2025-10-05 23:02:33,062 - root - INFO - step: 39985 loss: 1.8577 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6423 +[titan] 2025-10-05 23:02:33,062 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:21< 0:00:33] +[titan] 2025-10-05 23:02:43,924 - root - INFO - step: 39990 loss: 1.9469 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 23:02:43,924 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:28:32< 0:00:22] +[titan] 2025-10-05 23:02:54,801 - root - INFO - step: 39995 loss: 1.8720 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6560 +[titan] 2025-10-05 23:02:54,801 - root - INFO - lr: 5.0000e-06 gnorm: 1.27 [1 day, 0:28:42< 0:00:11] +[titan] 2025-10-05 23:03:03,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:03:05,651 - root - INFO - step: 40000 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6752 +[titan] 2025-10-05 23:03:05,651 - root - INFO - lr: 5.0000e-06 gnorm: 1.24 [1 day, 0:28:53< 0:00:00] +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving a full checkpoint at last step, step 40000. +[titan] 2025-10-05 23:03:23,689 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 23:03:23,689 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 18.04 seconds. +[titan] 2025-10-05 23:03:23,690 - root - INFO - Training completed diff --git a/logs/none_99omtdbz/attempt_0/5/stderr.log b/logs/none_99omtdbz/attempt_0/5/stderr.log new file mode 100644 index 0000000000000000000000000000000000000000..047a7ccb34905ce66e928b9537f438d22388d26f --- /dev/null +++ b/logs/none_99omtdbz/attempt_0/5/stderr.log @@ -0,0 +1,17257 @@ +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc +wandb: Currently logged in as: zaydzuhri to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured. +[titan] 2025-10-04 22:33:15,945 - root - INFO - Starting job: default job +[titan] 2025-10-04 22:33:15,945 - root - INFO - { + "activation_checkpoint": { + "mode": "none", + "selective_ac_option": "2" + }, + "activation_offload": { + "mode": "none" + }, + "checkpoint": { + "async_mode": "disabled", + "convert_to_hf_on_save": false, + "create_seed_checkpoint": false, + "enable_checkpoint": true, + "exclude_from_loading": [], + "export_dtype": "float32", + "folder": "checkpoint", + "hf_repo_base_name": "zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000", + "hf_upload_enabled": true, + "hf_upload_format": "dcp", + "interval": 5000, + "interval_type": "steps", + "keep_latest_k": 0, + "load_step": -1, + "model_weights_only": false + }, + "comm": { + "init_timeout_seconds": 6000, + "trace_buf_size": 20000, + "train_timeout_seconds": 6000 + }, + "experimental": { + "context_parallel_degree": 1, + "context_parallel_rotate_method": "allgather", + "custom_model_path": "", + "enable_async_tensor_parallel": false, + "enable_compiled_autograd": false, + "pipeline_parallel_degree": 1, + "pipeline_parallel_microbatches": null, + "pipeline_parallel_schedule": "1F1B", + "pipeline_parallel_schedule_csv": "", + "pipeline_parallel_split_points": [] + }, + "fault_tolerance": { + "enable": false, + "group_size": 0, + "min_replica_size": 1, + "replica_id": 0 + }, + "float8": { + "enable_fsdp_float8_all_gather": false, + "force_recompute_fp8_weight_in_bwd": false, + "precompute_float8_dynamic_scale_for_fsdp": false, + "recipe_name": null + }, + "job": { + "config_file": "flame/models/fla.toml", + "description": "default job", + "dump_folder": "exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine", + "print_args": true, + "use_for_integration_test": false + }, + "lr_scheduler": { + "decay_ratio": null, + "decay_type": "cosine", + "lr_min": 0.1, + "warmup_steps": 400 + }, + "memory_estimation": { + "disable_fake_mode": false, + "enabled": false + }, + "metrics": { + "disable_color_printing": false, + "enable_tensorboard": false, + "enable_wandb": true, + "log_freq": 5, + "save_for_all_ranks": false, + "save_tb_folder": "tb" + }, + "model": { + "config": "configs/mtp_transformer_1B.json", + "converters": [], + "name": "fla", + "print_after_conversion": false, + "tokenizer_path": "fla-hub/transformer-1.3B-100B" + }, + "optimizer": { + "early_step_in_backward": false, + "eps": 1e-15, + "implementation": "fused", + "lr": 5e-05, + "name": "AdamW" + }, + "profiling": { + "enable_memory_snapshot": false, + "enable_profiling": true, + "profile_freq": 512, + "save_memory_snapshot_folder": "memory_snapshot", + "save_traces_folder": "profile_trace" + }, + "training": { + "batch_size": 16, + "compile": true, + "context_len": 4096, + "data_dir": null, + "data_files": null, + "data_parallel_replicate_degree": 1, + "data_parallel_shard_degree": -1, + "data_probs": null, + "dataset": "/root/.cache/zaydzuhri___open_math_instruct-2-text/default", + "dataset_name": "default", + "dataset_split": "train", + "deterministic": false, + "disable_loss_parallel": false, + "enable_cpu_offload": false, + "fsdp_reshard_after_forward": "default", + "gc_freq": 50, + "gradient_accumulation_steps": 1, + "max_norm": 1.0, + "mixed_precision_param": "bfloat16", + "mixed_precision_reduce": "float32", + "num_workers": 32, + "persistent_workers": false, + "pin_memory": false, + "prefetch_factor": 2, + "seed": 79, + "seq_len": 4096, + "skip_nan_inf": true, + "steps": 40000, + "streaming": false, + "tensor_parallel_degree": 1, + "varlen": false + } +} +[titan] 2025-10-04 22:33:15,945 - root - INFO - [GC] Initial GC collection. 0.00 seconds. +[titan] 2025-10-04 22:33:45,201 - root - INFO - Target Hugging Face repository for this run: zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000-20251004-223345 +[titan] 2025-10-04 22:33:45,201 - root - WARNING - ENV[TORCH_NCCL_ASYNC_ERROR_HANDLING] = 1 will be overridden to 3 based on job config +[titan] 2025-10-04 22:33:45,203 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:33:45,204 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:33:45,205 - root - INFO - Peak FLOPS used for computing MFU: 9.890e+14 +[titan] 2025-10-04 22:33:45,205 - root - INFO - Building 1-D device mesh with ['dp_shard'], [8] +[titan] 2025-10-04 22:33:45,936 - root - INFO - Loading tokenizer... +[titan] 2025-10-04 22:33:46,091 - root - INFO - LlamaTokenizerFast(name_or_path='fla-hub/transformer-1.3B-100B', vocab_size=32000, model_max_length=10000000000, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': ''}, clean_up_tokenization_spaces=False, added_tokens_decoder={ + 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), +} +) +[titan] 2025-10-04 22:33:46,091 - root - INFO - Loading dataset /root/.cache/zaydzuhri___open_math_instruct-2-text/default:default +`trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,091 - datasets.load - ERROR - `trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,847 - root - INFO - Dataset({ + features: ['text'], + num_rows: 21972791 +}) +[titan] 2025-10-04 22:33:46,847 - root - INFO - Shuffling the dataset with seed 79 +[titan] 2025-10-04 22:33:52,959 - root - INFO - Loading model config from configs/mtp_transformer_1B.json +[titan] 2025-10-04 22:33:52,961 - root - INFO - Building dataloader... +[titan] 2025-10-04 22:33:52,963 - root - INFO - Building model from the config +MTPTransformerConfig { + "bos_token_id": 1, + "elementwise_affine": true, + "eos_token_id": 2, + "fuse_cross_entropy": true, + "fuse_norm": true, + "fuse_swiglu": true, + "hidden_act": "swish", + "hidden_ratio": 4, + "hidden_size": 2048, + "initializer_range": 0.006, + "intermediate_size": null, + "max_position_embeddings": 8192, + "model_type": "mtp_transformer", + "n_future_tokens": 4, + "norm_eps": 1e-06, + "num_heads": 32, + "num_hidden_layers": 32, + "num_kv_heads": null, + "pad_token_id": 2, + "qk_norm": false, + "qkv_bias": false, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "transformers_version": "4.51.3", + "use_cache": true, + "use_custom_backward": false, + "vocab_size": 32000, + "window_size": null +} + +[titan] 2025-10-04 22:33:53,093 - root - INFO -  +MTPTransformerForCausalLM( + (model): MTPTransformerModel( + (embeddings): Embedding(32000, 2048, padding_idx=2) + (layers): ModuleList( + (0-27): 28 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (extra_heads): ModuleList( + (0-3): 4 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (norm): RMSNorm(2048, eps=1e-06) + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + (criterion): FusedLinearCrossEntropyLoss() +) + +[titan] 2025-10-04 22:33:53,121 - root - INFO - Compiling each block with torch.compile +[titan] 2025-10-04 22:33:53,121 - root - INFO - Compiling the embedding, norm, and lm_head layers with torch.compile +[titan] 2025-10-04 22:33:53,121 - root - INFO - Compiling the entire model with torch.compile +[titan] 2025-10-04 22:33:53,197 - root - INFO - Applied FSDP to the model +[titan] 2025-10-04 22:33:53,395 - root - INFO - CUDA memory usage for model: 0.84GiB(0.60%) +[titan] 2025-10-04 22:33:53,416 - root - INFO - Checkpointing active. Checkpoints will be loaded from and saved to exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/checkpoint +[titan] 2025-10-04 22:33:53,416 - root - INFO - Loading the checkpoint at step 0. +[titan] 2025-10-04 22:34:08,151 - root - INFO - [GC] GC collection for checkpoint loading. 0.65 seconds. +[titan] 2025-10-04 22:34:08,151 - root - INFO - Finished loading the checkpoint in 14.73 seconds. +[titan] 2025-10-04 22:34:08,152 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:34:08,153 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:34:09,415 - root - INFO - ***** Running training ***** +[titan] 2025-10-04 22:34:09,418 - root - INFO -  Training starts at step 1 +[titan] 2025-10-04 22:34:09,418 - root - INFO -  Number of tokens per sequence = 4,096 +[titan] 2025-10-04 22:34:09,418 - root - INFO -  Gradient Accumulation steps = 1 +[titan] 2025-10-04 22:34:09,418 - root - INFO -  Instantaneous batch size (per device) = 16 +[titan] 2025-10-04 22:34:09,419 - root - INFO -  Global batch size (w. parallel, distributed & accumulation) = 128 (524,288 tokens) +[titan] 2025-10-04 22:34:09,419 - root - INFO -  Total optimization steps = 40,000 (20,971,520,000 tokens) +[titan] 2025-10-04 22:34:09,419 - root - INFO -  Warmup steps = 400 (209,715,200 tokens) +[titan] 2025-10-04 22:34:09,419 - root - INFO -  Number of parameters = 1,775,372,288  +[titan] 2025-10-04 22:34:09,419 - root - INFO - Profiling active. Traces will be saved at exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace +[titan] 2025-10-04 22:34:47,724 - root - INFO - step: 1 loss: 12.0105 memory: 116.89GiB(83.88%) tps: 1,656 tflops: 22.98 mfu: 2.32% global_avg_ntp_loss: 2.1249 global_avg_mtp_loss: 9.8856 +[titan] 2025-10-04 22:34:47,724 - root - INFO - lr: 2.4938e-07 gnorm: 20.89 [ 0:00:39<18 days, 7:39:21] +[titan] 2025-10-04 22:34:47,724 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-04 22:34:53,085 - root - INFO - [GC] GC collection invoked by checkpointer. 0.19 seconds. +[titan] 2025-10-04 22:34:53,086 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 5.36 seconds. +[titan] 2025-10-04 22:34:53,086 - root - INFO - Synchronizing and adjusting timeout for all ProcessGroups to 1:40:00 +[titan] 2025-10-04 22:36:58,991 - root - INFO - step: 5 loss: 11.7564 memory: 118.84GiB(85.28%) tps: 1,997 tflops: 27.71 mfu: 2.80% global_avg_ntp_loss: 2.0697 global_avg_mtp_loss: 9.6867 +[titan] 2025-10-04 22:36:58,992 - root - INFO - lr: 7.4813e-07 gnorm: 19.96 [ 0:02:50<15 days, 19:35:24] +[titan] 2025-10-04 22:37:09,851 - root - INFO - step: 10 loss: 11.2335 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 1.9192 global_avg_mtp_loss: 9.3143 +[titan] 2025-10-04 22:37:09,851 - root - INFO - lr: 1.3716e-06 gnorm: 18.16 [ 0:03:01<8 days, 9:50:03] +[titan] 2025-10-04 22:37:20,642 - root - INFO - step: 15 loss: 10.8309 memory: 118.84GiB(85.28%) tps: 30,368 tflops: 421.30 mfu: 42.60% global_avg_ntp_loss: 1.7960 global_avg_mtp_loss: 9.0349 +[titan] 2025-10-04 22:37:20,642 - root - INFO - lr: 1.9950e-06 gnorm: 10.62 [ 0:03:12<5 days, 22:31:45] +[titan] 2025-10-04 22:37:31,508 - root - INFO - step: 20 loss: 10.3172 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 1.6641 global_avg_mtp_loss: 8.6531 +[titan] 2025-10-04 22:37:31,508 - root - INFO - lr: 2.6185e-06 gnorm: 8.22 [ 0:03:23<4 days, 16:55:00] +[titan] 2025-10-04 22:37:42,328 - root - INFO - step: 25 loss: 9.9294 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 1.5801 global_avg_mtp_loss: 8.3492 +[titan] 2025-10-04 22:37:42,328 - root - INFO - lr: 3.2419e-06 gnorm: 7.10 [ 0:03:34<3 days, 23:07:40] +[titan] 2025-10-04 22:37:53,161 - root - INFO - step: 30 loss: 9.5763 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 1.4997 global_avg_mtp_loss: 8.0766 +[titan] 2025-10-04 22:37:53,161 - root - INFO - lr: 3.8653e-06 gnorm: 6.23 [ 0:03:45<3 days, 11:16:20] +[titan] 2025-10-04 22:38:04,056 - root - INFO - step: 35 loss: 9.3711 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 1.4603 global_avg_mtp_loss: 7.9108 +[titan] 2025-10-04 22:38:04,056 - root - INFO - lr: 4.4888e-06 gnorm: 6.20 [ 0:03:55<3 days, 2:49:22] +[titan] 2025-10-04 22:38:14,933 - root - INFO - step: 40 loss: 9.0179 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 1.3853 global_avg_mtp_loss: 7.6325 +[titan] 2025-10-04 22:38:14,933 - root - INFO - lr: 5.1122e-06 gnorm: 5.60 [ 0:04:06<2 days, 20:28:48] +[titan] 2025-10-04 22:38:25,789 - root - INFO - step: 45 loss: 8.7524 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 1.3406 global_avg_mtp_loss: 7.4118 +[titan] 2025-10-04 22:38:25,789 - root - INFO - lr: 5.7357e-06 gnorm: 5.43 [ 0:04:17<2 days, 15:32:27] +[titan] 2025-10-04 22:38:34,551 - root - INFO - [GC] Peforming periodical GC collection. 0.05 seconds. +[titan] 2025-10-04 22:38:36,745 - root - INFO - step: 50 loss: 8.5439 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.94 mfu: 41.96% global_avg_ntp_loss: 1.3050 global_avg_mtp_loss: 7.2389 +[titan] 2025-10-04 22:38:36,746 - root - INFO - lr: 6.3591e-06 gnorm: 5.74 [ 0:04:28<2 days, 11:36:40] +[titan] 2025-10-04 22:38:47,618 - root - INFO - step: 55 loss: 8.3158 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 1.2609 global_avg_mtp_loss: 7.0549 +[titan] 2025-10-04 22:38:47,619 - root - INFO - lr: 6.9825e-06 gnorm: 5.52 [ 0:04:39<2 days, 8:22:43] +[titan] 2025-10-04 22:38:58,482 - root - INFO - step: 60 loss: 8.2006 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 1.2373 global_avg_mtp_loss: 6.9633 +[titan] 2025-10-04 22:38:58,482 - root - INFO - lr: 7.6060e-06 gnorm: 5.72 [ 0:04:50<2 days, 5:40:57] +[titan] 2025-10-04 22:39:09,360 - root - INFO - step: 65 loss: 8.1393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.2182 global_avg_mtp_loss: 6.9211 +[titan] 2025-10-04 22:39:09,360 - root - INFO - lr: 8.2294e-06 gnorm: 5.66 [ 0:05:01<2 days, 3:24:12] +[titan] 2025-10-04 22:39:20,248 - root - INFO - step: 70 loss: 7.7608 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 1.1495 global_avg_mtp_loss: 6.6112 +[titan] 2025-10-04 22:39:20,248 - root - INFO - lr: 8.8529e-06 gnorm: 5.54 [ 0:05:12<2 days, 1:27:03] +[titan] 2025-10-04 22:39:31,185 - root - INFO - step: 75 loss: 7.6862 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 1.1395 global_avg_mtp_loss: 6.5467 +[titan] 2025-10-04 22:39:31,185 - root - INFO - lr: 9.4763e-06 gnorm: 6.04 [ 0:05:23<1 day, 23:45:56] +[titan] 2025-10-04 22:39:42,063 - root - INFO - step: 80 loss: 7.4352 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.0959 global_avg_mtp_loss: 6.3393 +[titan] 2025-10-04 22:39:42,063 - root - INFO - lr: 1.0100e-05 gnorm: 5.61 [ 0:05:33<1 day, 22:16:56] +[titan] 2025-10-04 22:39:52,933 - root - INFO - step: 85 loss: 7.3232 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 1.0671 global_avg_mtp_loss: 6.2561 +[titan] 2025-10-04 22:39:52,934 - root - INFO - lr: 1.0723e-05 gnorm: 5.89 [ 0:05:44<1 day, 20:58:20] +[titan] 2025-10-04 22:40:03,808 - root - INFO - step: 90 loss: 7.1910 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 1.0545 global_avg_mtp_loss: 6.1364 +[titan] 2025-10-04 22:40:03,808 - root - INFO - lr: 1.1347e-05 gnorm: 6.24 [ 0:05:55<1 day, 19:48:28] +[titan] 2025-10-04 22:40:14,668 - root - INFO - step: 95 loss: 7.0637 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 1.0179 global_avg_mtp_loss: 6.0458 +[titan] 2025-10-04 22:40:14,668 - root - INFO - lr: 1.1970e-05 gnorm: 5.80 [ 0:06:06<1 day, 18:45:51] +[titan] 2025-10-04 22:40:23,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:40:25,567 - root - INFO - step: 100 loss: 7.0183 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 1.0144 global_avg_mtp_loss: 6.0039 +[titan] 2025-10-04 22:40:25,567 - root - INFO - lr: 1.2594e-05 gnorm: 5.49 [ 0:06:17<1 day, 17:49:43] +[titan] 2025-10-04 22:40:36,554 - root - INFO - step: 105 loss: 6.7845 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.77 mfu: 41.84% global_avg_ntp_loss: 0.9684 global_avg_mtp_loss: 5.8161 +[titan] 2025-10-04 22:40:36,554 - root - INFO - lr: 1.3217e-05 gnorm: 5.66 [ 0:06:28<1 day, 16:59:29] +[titan] 2025-10-04 22:40:47,440 - root - INFO - step: 110 loss: 6.7610 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.9616 global_avg_mtp_loss: 5.7993 +[titan] 2025-10-04 22:40:47,440 - root - INFO - lr: 1.3840e-05 gnorm: 5.76 [ 0:06:39<1 day, 16:13:11] +[titan] 2025-10-04 22:40:58,316 - root - INFO - step: 115 loss: 6.7822 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.9526 global_avg_mtp_loss: 5.8296 +[titan] 2025-10-04 22:40:58,316 - root - INFO - lr: 1.4464e-05 gnorm: 5.41 [ 0:06:50<1 day, 15:30:50] +[titan] 2025-10-04 22:41:09,192 - root - INFO - step: 120 loss: 6.5921 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.9190 global_avg_mtp_loss: 5.6731 +[titan] 2025-10-04 22:41:09,193 - root - INFO - lr: 1.5087e-05 gnorm: 5.18 [ 0:07:01<1 day, 14:52:00] +[titan] 2025-10-04 22:41:20,086 - root - INFO - step: 125 loss: 6.3759 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8880 global_avg_mtp_loss: 5.4878 +[titan] 2025-10-04 22:41:20,086 - root - INFO - lr: 1.5711e-05 gnorm: 4.91 [ 0:07:11<1 day, 14:16:22] +[titan] 2025-10-04 22:41:31,181 - root - INFO - step: 130 loss: 6.3566 memory: 118.84GiB(85.28%) tps: 29,536 tflops: 409.77 mfu: 41.43% global_avg_ntp_loss: 0.8781 global_avg_mtp_loss: 5.4786 +[titan] 2025-10-04 22:41:31,181 - root - INFO - lr: 1.6334e-05 gnorm: 4.37 [ 0:07:23<1 day, 13:44:28] +[titan] 2025-10-04 22:41:42,074 - root - INFO - step: 135 loss: 6.3044 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8713 global_avg_mtp_loss: 5.4331 +[titan] 2025-10-04 22:41:42,075 - root - INFO - lr: 1.6958e-05 gnorm: 4.29 [ 0:07:33<1 day, 13:13:56] +[titan] 2025-10-04 22:41:52,936 - root - INFO - step: 140 loss: 6.3158 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.8632 global_avg_mtp_loss: 5.4526 +[titan] 2025-10-04 22:41:52,936 - root - INFO - lr: 1.7581e-05 gnorm: 3.03 [ 0:07:44<1 day, 12:45:26] +[titan] 2025-10-04 22:42:03,814 - root - INFO - step: 145 loss: 6.2266 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.8508 global_avg_mtp_loss: 5.3758 +[titan] 2025-10-04 22:42:03,815 - root - INFO - lr: 1.8204e-05 gnorm: 3.86 [ 0:07:55<1 day, 12:18:56] +[titan] 2025-10-04 22:42:12,515 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:42:14,709 - root - INFO - step: 150 loss: 6.0872 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.8237 global_avg_mtp_loss: 5.2635 +[titan] 2025-10-04 22:42:14,710 - root - INFO - lr: 1.8828e-05 gnorm: 3.31 [ 0:08:06<1 day, 11:54:17] +[titan] 2025-10-04 22:42:25,613 - root - INFO - step: 155 loss: 6.0870 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.8286 global_avg_mtp_loss: 5.2584 +[titan] 2025-10-04 22:42:25,613 - root - INFO - lr: 1.9451e-05 gnorm: 3.04 [ 0:08:17<1 day, 11:31:15] +[titan] 2025-10-04 22:42:36,528 - root - INFO - step: 160 loss: 5.9733 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.8032 global_avg_mtp_loss: 5.1701 +[titan] 2025-10-04 22:42:36,529 - root - INFO - lr: 2.0075e-05 gnorm: 3.06 [ 0:08:28<1 day, 11:09:41] +[titan] 2025-10-04 22:42:47,448 - root - INFO - step: 165 loss: 5.8683 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.7907 global_avg_mtp_loss: 5.0776 +[titan] 2025-10-04 22:42:47,448 - root - INFO - lr: 2.0698e-05 gnorm: 3.39 [ 0:08:39<1 day, 10:49:26] +[titan] 2025-10-04 22:42:58,343 - root - INFO - step: 170 loss: 5.8536 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.7847 global_avg_mtp_loss: 5.0689 +[titan] 2025-10-04 22:42:58,343 - root - INFO - lr: 2.1322e-05 gnorm: 2.80 [ 0:08:50<1 day, 10:30:16] +[titan] 2025-10-04 22:43:09,215 - root - INFO - step: 175 loss: 5.7812 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.7716 global_avg_mtp_loss: 5.0096 +[titan] 2025-10-04 22:43:09,216 - root - INFO - lr: 2.1945e-05 gnorm: 4.02 [ 0:09:01<1 day, 10:12:06] +[titan] 2025-10-04 22:43:20,097 - root - INFO - step: 180 loss: 5.7994 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.7711 global_avg_mtp_loss: 5.0283 +[titan] 2025-10-04 22:43:20,098 - root - INFO - lr: 2.2569e-05 gnorm: 3.36 [ 0:09:11<1 day, 9:54:58] +[titan] 2025-10-04 22:43:31,003 - root - INFO - step: 185 loss: 5.6617 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9134 +[titan] 2025-10-04 22:43:31,003 - root - INFO - lr: 2.3192e-05 gnorm: 2.73 [ 0:09:22<1 day, 9:38:50] +[titan] 2025-10-04 22:43:41,902 - root - INFO - step: 190 loss: 5.6564 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9080 +[titan] 2025-10-04 22:43:41,903 - root - INFO - lr: 2.3815e-05 gnorm: 3.17 [ 0:09:33<1 day, 9:23:31] +[titan] 2025-10-04 22:43:52,788 - root - INFO - step: 195 loss: 5.6643 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.7475 global_avg_mtp_loss: 4.9168 +[titan] 2025-10-04 22:43:52,788 - root - INFO - lr: 2.4439e-05 gnorm: 2.43 [ 0:09:44<1 day, 9:08:56] +[titan] 2025-10-04 22:44:01,483 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:44:03,675 - root - INFO - step: 200 loss: 5.6189 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.7360 global_avg_mtp_loss: 4.8830 +[titan] 2025-10-04 22:44:03,675 - root - INFO - lr: 2.5062e-05 gnorm: 3.47 [ 0:09:55<1 day, 8:55:04] +[titan] 2025-10-04 22:44:14,559 - root - INFO - step: 205 loss: 5.5215 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.7213 global_avg_mtp_loss: 4.8002 +[titan] 2025-10-04 22:44:14,559 - root - INFO - lr: 2.5686e-05 gnorm: 3.09 [ 0:10:06<1 day, 8:41:52] +[titan] 2025-10-04 22:44:25,433 - root - INFO - step: 210 loss: 5.5044 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.7198 global_avg_mtp_loss: 4.7846 +[titan] 2025-10-04 22:44:25,433 - root - INFO - lr: 2.6309e-05 gnorm: 2.66 [ 0:10:17<1 day, 8:29:15] +[titan] 2025-10-04 22:44:36,338 - root - INFO - step: 215 loss: 5.4728 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.7115 global_avg_mtp_loss: 4.7613 +[titan] 2025-10-04 22:44:36,338 - root - INFO - lr: 2.6933e-05 gnorm: 2.45 [ 0:10:28<1 day, 8:17:19] +[titan] 2025-10-04 22:44:47,225 - root - INFO - step: 220 loss: 5.3310 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.6944 global_avg_mtp_loss: 4.6366 +[titan] 2025-10-04 22:44:47,225 - root - INFO - lr: 2.7556e-05 gnorm: 2.66 [ 0:10:39<1 day, 8:05:51] +[titan] 2025-10-04 22:44:58,124 - root - INFO - step: 225 loss: 5.3739 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6934 global_avg_mtp_loss: 4.6805 +[titan] 2025-10-04 22:44:58,125 - root - INFO - lr: 2.8180e-05 gnorm: 2.95 [ 0:10:49<1 day, 7:54:56] +[titan] 2025-10-04 22:45:09,004 - root - INFO - step: 230 loss: 5.4216 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.7014 global_avg_mtp_loss: 4.7202 +[titan] 2025-10-04 22:45:09,004 - root - INFO - lr: 2.8803e-05 gnorm: 2.60 [ 0:11:00<1 day, 7:44:25] +[titan] 2025-10-04 22:45:19,907 - root - INFO - step: 235 loss: 5.3090 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.6909 global_avg_mtp_loss: 4.6180 +[titan] 2025-10-04 22:45:19,907 - root - INFO - lr: 2.9426e-05 gnorm: 2.68 [ 0:11:11<1 day, 7:34:25] +[titan] 2025-10-04 22:45:30,796 - root - INFO - step: 240 loss: 5.2690 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.6785 global_avg_mtp_loss: 4.5905 +[titan] 2025-10-04 22:45:30,796 - root - INFO - lr: 3.0050e-05 gnorm: 2.38 [ 0:11:22<1 day, 7:24:47] +[titan] 2025-10-04 22:45:41,709 - root - INFO - step: 245 loss: 5.1965 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.6691 global_avg_mtp_loss: 4.5274 +[titan] 2025-10-04 22:45:41,710 - root - INFO - lr: 3.0673e-05 gnorm: 2.47 [ 0:11:33<1 day, 7:15:35] +[titan] 2025-10-04 22:45:50,403 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:45:52,597 - root - INFO - step: 250 loss: 5.1858 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6654 global_avg_mtp_loss: 4.5204 +[titan] 2025-10-04 22:45:52,597 - root - INFO - lr: 3.1297e-05 gnorm: 3.00 [ 0:11:44<1 day, 7:06:42] +[titan] 2025-10-04 22:46:03,496 - root - INFO - step: 255 loss: 5.1706 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.6625 global_avg_mtp_loss: 4.5081 +[titan] 2025-10-04 22:46:03,496 - root - INFO - lr: 3.1920e-05 gnorm: 2.61 [ 0:11:55<1 day, 6:58:11] +[titan] 2025-10-04 22:46:14,369 - root - INFO - step: 260 loss: 5.1473 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.6607 global_avg_mtp_loss: 4.4865 +[titan] 2025-10-04 22:46:14,369 - root - INFO - lr: 3.2544e-05 gnorm: 2.39 [ 0:12:06<1 day, 6:49:55] +[titan] 2025-10-04 22:46:25,252 - root - INFO - step: 265 loss: 5.1300 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.6565 global_avg_mtp_loss: 4.4735 +[titan] 2025-10-04 22:46:25,253 - root - INFO - lr: 3.3167e-05 gnorm: 2.29 [ 0:12:17<1 day, 6:41:59] +[titan] 2025-10-04 22:46:36,152 - root - INFO - step: 270 loss: 5.1579 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6598 global_avg_mtp_loss: 4.4981 +[titan] 2025-10-04 22:46:36,152 - root - INFO - lr: 3.3791e-05 gnorm: 2.51 [ 0:12:27<1 day, 6:34:22] +[titan] 2025-10-04 22:46:47,010 - root - INFO - step: 275 loss: 5.0167 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.6398 global_avg_mtp_loss: 4.3769 +[titan] 2025-10-04 22:46:47,011 - root - INFO - lr: 3.4414e-05 gnorm: 2.10 [ 0:12:38<1 day, 6:26:56] +[titan] 2025-10-04 22:46:57,896 - root - INFO - step: 280 loss: 5.0898 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.6486 global_avg_mtp_loss: 4.4413 +[titan] 2025-10-04 22:46:57,896 - root - INFO - lr: 3.5037e-05 gnorm: 3.07 [ 0:12:49<1 day, 6:19:49] +[titan] 2025-10-04 22:47:08,770 - root - INFO - step: 285 loss: 5.1105 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.6521 global_avg_mtp_loss: 4.4584 +[titan] 2025-10-04 22:47:08,770 - root - INFO - lr: 3.5661e-05 gnorm: 2.23 [ 0:13:00<1 day, 6:12:55] +[titan] 2025-10-04 22:47:19,662 - root - INFO - step: 290 loss: 5.0807 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6456 global_avg_mtp_loss: 4.4352 +[titan] 2025-10-04 22:47:19,662 - root - INFO - lr: 3.6284e-05 gnorm: 2.82 [ 0:13:11<1 day, 6:06:18] +[titan] 2025-10-04 22:47:30,549 - root - INFO - step: 295 loss: 5.0464 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6427 global_avg_mtp_loss: 4.4037 +[titan] 2025-10-04 22:47:30,550 - root - INFO - lr: 3.6908e-05 gnorm: 2.35 [ 0:13:22<1 day, 5:59:52] +[titan] 2025-10-04 22:47:39,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:47:41,466 - root - INFO - step: 300 loss: 5.1119 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.6529 global_avg_mtp_loss: 4.4589 +[titan] 2025-10-04 22:47:41,466 - root - INFO - lr: 3.7531e-05 gnorm: 2.72 [ 0:13:33<1 day, 5:53:44] +[titan] 2025-10-04 22:47:52,331 - root - INFO - step: 305 loss: 4.9831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.6338 global_avg_mtp_loss: 4.3492 +[titan] 2025-10-04 22:47:52,331 - root - INFO - lr: 3.8155e-05 gnorm: 2.81 [ 0:13:44<1 day, 5:47:40] +[titan] 2025-10-04 22:48:03,188 - root - INFO - step: 310 loss: 4.9896 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.6364 global_avg_mtp_loss: 4.3532 +[titan] 2025-10-04 22:48:03,188 - root - INFO - lr: 3.8778e-05 gnorm: 2.39 [ 0:13:55<1 day, 5:41:47] +[titan] 2025-10-04 22:48:14,051 - root - INFO - step: 315 loss: 4.8865 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.6207 global_avg_mtp_loss: 4.2658 +[titan] 2025-10-04 22:48:14,051 - root - INFO - lr: 3.9401e-05 gnorm: 3.11 [ 0:14:05<1 day, 5:36:05] +[titan] 2025-10-04 22:48:24,948 - root - INFO - step: 320 loss: 4.9416 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.6290 global_avg_mtp_loss: 4.3126 +[titan] 2025-10-04 22:48:24,948 - root - INFO - lr: 4.0025e-05 gnorm: 2.57 [ 0:14:16<1 day, 5:30:38] +[titan] 2025-10-04 22:48:35,879 - root - INFO - step: 325 loss: 4.8914 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.6229 global_avg_mtp_loss: 4.2686 +[titan] 2025-10-04 22:48:35,879 - root - INFO - lr: 4.0648e-05 gnorm: 2.22 [ 0:14:27<1 day, 5:25:25] +[titan] 2025-10-04 22:48:46,771 - root - INFO - step: 330 loss: 4.8494 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.6146 global_avg_mtp_loss: 4.2348 +[titan] 2025-10-04 22:48:46,771 - root - INFO - lr: 4.1272e-05 gnorm: 2.17 [ 0:14:38<1 day, 5:20:16] +[titan] 2025-10-04 22:48:57,658 - root - INFO - step: 335 loss: 4.9431 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6274 global_avg_mtp_loss: 4.3157 +[titan] 2025-10-04 22:48:57,658 - root - INFO - lr: 4.1895e-05 gnorm: 2.41 [ 0:14:49<1 day, 5:15:15] +[titan] 2025-10-04 22:49:08,546 - root - INFO - step: 340 loss: 4.8429 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.6110 global_avg_mtp_loss: 4.2319 +[titan] 2025-10-04 22:49:08,546 - root - INFO - lr: 4.2519e-05 gnorm: 2.38 [ 0:15:00<1 day, 5:10:24] +[titan] 2025-10-04 22:49:19,437 - root - INFO - step: 345 loss: 4.7699 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.6044 global_avg_mtp_loss: 4.1656 +[titan] 2025-10-04 22:49:19,437 - root - INFO - lr: 4.3142e-05 gnorm: 2.47 [ 0:15:11<1 day, 5:05:40] +[titan] 2025-10-04 22:49:28,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:49:30,329 - root - INFO - step: 350 loss: 4.8354 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6129 global_avg_mtp_loss: 4.2225 +[titan] 2025-10-04 22:49:30,329 - root - INFO - lr: 4.3766e-05 gnorm: 2.30 [ 0:15:22<1 day, 5:01:05] +[titan] 2025-10-04 22:49:41,264 - root - INFO - step: 355 loss: 4.8409 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.6123 global_avg_mtp_loss: 4.2286 +[titan] 2025-10-04 22:49:41,264 - root - INFO - lr: 4.4389e-05 gnorm: 2.44 [ 0:15:33<1 day, 4:56:42] +[titan] 2025-10-04 22:49:52,147 - root - INFO - step: 360 loss: 4.6777 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.5902 global_avg_mtp_loss: 4.0875 +[titan] 2025-10-04 22:49:52,148 - root - INFO - lr: 4.5012e-05 gnorm: 1.96 [ 0:15:43<1 day, 4:52:20] +[titan] 2025-10-04 22:50:03,033 - root - INFO - step: 365 loss: 4.8152 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.6116 global_avg_mtp_loss: 4.2037 +[titan] 2025-10-04 22:50:03,033 - root - INFO - lr: 4.5636e-05 gnorm: 2.14 [ 0:15:54<1 day, 4:48:05] +[titan] 2025-10-04 22:50:13,908 - root - INFO - step: 370 loss: 4.7797 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.6024 global_avg_mtp_loss: 4.1773 +[titan] 2025-10-04 22:50:13,908 - root - INFO - lr: 4.6259e-05 gnorm: 2.37 [ 0:16:05<1 day, 4:43:56] +[titan] 2025-10-04 22:50:24,783 - root - INFO - step: 375 loss: 4.6716 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.5906 global_avg_mtp_loss: 4.0810 +[titan] 2025-10-04 22:50:24,783 - root - INFO - lr: 4.6883e-05 gnorm: 2.26 [ 0:16:16<1 day, 4:39:53] +[titan] 2025-10-04 22:50:35,652 - root - INFO - step: 380 loss: 4.7162 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.5950 global_avg_mtp_loss: 4.1212 +[titan] 2025-10-04 22:50:35,652 - root - INFO - lr: 4.7506e-05 gnorm: 2.15 [ 0:16:27<1 day, 4:35:55] +[titan] 2025-10-04 22:50:46,574 - root - INFO - step: 385 loss: 4.8016 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.6054 global_avg_mtp_loss: 4.1962 +[titan] 2025-10-04 22:50:46,574 - root - INFO - lr: 4.8130e-05 gnorm: 2.50 [ 0:16:38<1 day, 4:32:09] +[titan] 2025-10-04 22:50:57,443 - root - INFO - step: 390 loss: 4.7078 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.5929 global_avg_mtp_loss: 4.1150 +[titan] 2025-10-04 22:50:57,444 - root - INFO - lr: 4.8753e-05 gnorm: 2.00 [ 0:16:49<1 day, 4:28:23] +[titan] 2025-10-04 22:51:08,305 - root - INFO - step: 395 loss: 4.6384 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.5834 global_avg_mtp_loss: 4.0551 +[titan] 2025-10-04 22:51:08,305 - root - INFO - lr: 4.9377e-05 gnorm: 2.37 [ 0:17:00<1 day, 4:24:42] +[titan] 2025-10-04 22:51:16,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:51:19,152 - root - INFO - step: 400 loss: 4.6918 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.5928 global_avg_mtp_loss: 4.0990 +[titan] 2025-10-04 22:51:19,152 - root - INFO - lr: 5.0000e-05 gnorm: 2.36 [ 0:17:10<1 day, 4:21:04] +[titan] 2025-10-04 22:51:30,025 - root - INFO - step: 405 loss: 4.6284 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.5843 global_avg_mtp_loss: 4.0441 +[titan] 2025-10-04 22:51:30,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.37 [ 0:17:21<1 day, 4:17:34] +[titan] 2025-10-04 22:51:40,903 - root - INFO - step: 410 loss: 4.5757 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.5764 global_avg_mtp_loss: 3.9993 +[titan] 2025-10-04 22:51:40,903 - root - INFO - lr: 5.0000e-05 gnorm: 2.16 [ 0:17:32<1 day, 4:14:10] +[titan] 2025-10-04 22:51:51,757 - root - INFO - step: 415 loss: 4.6798 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.5875 global_avg_mtp_loss: 4.0923 +[titan] 2025-10-04 22:51:51,758 - root - INFO - lr: 5.0000e-05 gnorm: 2.18 [ 0:17:43<1 day, 4:10:48] +[titan] 2025-10-04 22:52:02,632 - root - INFO - step: 420 loss: 4.6984 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.5914 global_avg_mtp_loss: 4.1070 +[titan] 2025-10-04 22:52:02,632 - root - INFO - lr: 5.0000e-05 gnorm: 2.08 [ 0:17:54<1 day, 4:07:32] +[titan] 2025-10-04 22:52:13,523 - root - INFO - step: 425 loss: 4.6583 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.5870 global_avg_mtp_loss: 4.0713 +[titan] 2025-10-04 22:52:13,523 - root - INFO - lr: 5.0000e-05 gnorm: 1.97 [ 0:18:05<1 day, 4:04:23] +[titan] 2025-10-04 22:52:24,408 - root - INFO - step: 430 loss: 4.5843 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.5750 global_avg_mtp_loss: 4.0093 +[titan] 2025-10-04 22:52:24,408 - root - INFO - lr: 5.0000e-05 gnorm: 2.22 [ 0:18:16<1 day, 4:01:16] +[titan] 2025-10-04 22:52:35,258 - root - INFO - step: 435 loss: 4.5321 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.5697 global_avg_mtp_loss: 3.9625 +[titan] 2025-10-04 22:52:35,258 - root - INFO - lr: 5.0000e-05 gnorm: 2.13 [ 0:18:27<1 day, 3:58:11] +[titan] 2025-10-04 22:52:46,145 - root - INFO - step: 440 loss: 4.5606 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.5730 global_avg_mtp_loss: 3.9875 +[titan] 2025-10-04 22:52:46,146 - root - INFO - lr: 5.0000e-05 gnorm: 2.40 [ 0:18:37<1 day, 3:55:13] +[titan] 2025-10-04 22:52:57,025 - root - INFO - step: 445 loss: 4.5406 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.5687 global_avg_mtp_loss: 3.9718 +[titan] 2025-10-04 22:52:57,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:18:48<1 day, 3:52:18] +[titan] 2025-10-04 22:53:05,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:53:07,904 - root - INFO - step: 450 loss: 4.5707 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.5740 global_avg_mtp_loss: 3.9967 +[titan] 2025-10-04 22:53:07,904 - root - INFO - lr: 5.0000e-05 gnorm: 2.34 [ 0:18:59<1 day, 3:49:27] +[titan] 2025-10-04 22:53:18,769 - root - INFO - step: 455 loss: 4.4743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.5620 global_avg_mtp_loss: 3.9123 +[titan] 2025-10-04 22:53:18,770 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:19:10<1 day, 3:46:38] +[titan] 2025-10-04 22:53:29,609 - root - INFO - step: 460 loss: 4.4303 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8761 +[titan] 2025-10-04 22:53:29,609 - root - INFO - lr: 5.0000e-05 gnorm: 2.25 [ 0:19:21<1 day, 3:43:50] +[titan] 2025-10-04 22:53:40,497 - root - INFO - step: 465 loss: 4.4283 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.5552 global_avg_mtp_loss: 3.8731 +[titan] 2025-10-04 22:53:40,497 - root - INFO - lr: 5.0000e-05 gnorm: 1.84 [ 0:19:32<1 day, 3:41:10] +[titan] 2025-10-04 22:53:51,344 - root - INFO - step: 470 loss: 4.4176 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8632 +[titan] 2025-10-04 22:53:51,344 - root - INFO - lr: 5.0000e-05 gnorm: 2.15 [ 0:19:43<1 day, 3:38:29] +[titan] 2025-10-04 22:54:02,202 - root - INFO - step: 475 loss: 4.4882 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.5655 global_avg_mtp_loss: 3.9227 +[titan] 2025-10-04 22:54:02,202 - root - INFO - lr: 5.0000e-05 gnorm: 1.78 [ 0:19:53<1 day, 3:35:53] +[titan] 2025-10-04 22:54:13,066 - root - INFO - step: 480 loss: 4.4600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.5572 global_avg_mtp_loss: 3.9028 +[titan] 2025-10-04 22:54:13,066 - root - INFO - lr: 5.0000e-05 gnorm: 2.00 [ 0:20:04<1 day, 3:33:20] +[titan] 2025-10-04 22:54:23,913 - root - INFO - step: 485 loss: 4.3781 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.5484 global_avg_mtp_loss: 3.8297 +[titan] 2025-10-04 22:54:23,913 - root - INFO - lr: 4.9999e-05 gnorm: 1.60 [ 0:20:15<1 day, 3:30:49] +[titan] 2025-10-04 22:54:34,742 - root - INFO - step: 490 loss: 4.4068 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.5524 global_avg_mtp_loss: 3.8544 +[titan] 2025-10-04 22:54:34,742 - root - INFO - lr: 4.9999e-05 gnorm: 2.19 [ 0:20:26<1 day, 3:28:19] +[titan] 2025-10-04 22:54:45,647 - root - INFO - step: 495 loss: 4.3459 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.5461 global_avg_mtp_loss: 3.7998 +[titan] 2025-10-04 22:54:45,647 - root - INFO - lr: 4.9999e-05 gnorm: 1.79 [ 0:20:37<1 day, 3:25:57] +[titan] 2025-10-04 22:54:54,303 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:54:56,481 - root - INFO - step: 500 loss: 4.5195 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.5664 global_avg_mtp_loss: 3.9531 +[titan] 2025-10-04 22:54:56,481 - root - INFO - lr: 4.9999e-05 gnorm: 1.81 [ 0:20:48<1 day, 3:23:33] +[titan] 2025-10-04 22:55:07,316 - root - INFO - step: 505 loss: 4.3727 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.5468 global_avg_mtp_loss: 3.8259 +[titan] 2025-10-04 22:55:07,316 - root - INFO - lr: 4.9999e-05 gnorm: 1.99 [ 0:20:59<1 day, 3:21:12] +[titan] 2025-10-04 22:55:18,908 - root - INFO - step: 510 loss: 4.3913 memory: 118.84GiB(85.28%) tps: 28,268 tflops: 392.18 mfu: 39.65% global_avg_ntp_loss: 0.5477 global_avg_mtp_loss: 3.8435 +[titan] 2025-10-04 22:55:18,908 - root - INFO - lr: 4.9999e-05 gnorm: 1.64 [ 0:21:10<1 day, 3:19:52] +[titan] 2025-10-04 22:55:23,540 - root - INFO - Dumping profiler traces at step 512 +[titan] 2025-10-04 22:55:23,575 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 22:55:30,219 - root - INFO - step: 515 loss: 4.3744 memory: 118.84GiB(85.28%) tps: 28,972 tflops: 401.94 mfu: 40.64% global_avg_ntp_loss: 0.5458 global_avg_mtp_loss: 3.8286 +[titan] 2025-10-04 22:55:30,219 - root - INFO - lr: 4.9999e-05 gnorm: 1.67 [ 0:21:22<1 day, 3:18:11] +[titan] 2025-10-04 22:55:41,134 - root - INFO - step: 520 loss: 4.3427 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.5439 global_avg_mtp_loss: 3.7988 +[titan] 2025-10-04 22:55:41,135 - root - INFO - lr: 4.9999e-05 gnorm: 2.16 [ 0:21:32<1 day, 3:16:03] +[titan] 2025-10-04 22:55:52,306 - root - INFO - step: 525 loss: 4.3706 memory: 118.84GiB(85.28%) tps: 29,331 tflops: 406.92 mfu: 41.15% global_avg_ntp_loss: 0.5472 global_avg_mtp_loss: 3.8234 +[titan] 2025-10-04 22:55:52,307 - root - INFO - lr: 4.9999e-05 gnorm: 1.88 [ 0:21:44<1 day, 3:14:15] +[titan] 2025-10-04 22:56:03,131 - root - INFO - step: 530 loss: 4.3726 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.5471 global_avg_mtp_loss: 3.8256 +[titan] 2025-10-04 22:56:03,131 - root - INFO - lr: 4.9999e-05 gnorm: 2.18 [ 0:21:54<1 day, 3:12:04] +[titan] 2025-10-04 22:56:13,930 - root - INFO - step: 535 loss: 4.4086 memory: 118.84GiB(85.28%) tps: 30,344 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.5498 global_avg_mtp_loss: 3.8588 +[titan] 2025-10-04 22:56:13,930 - root - INFO - lr: 4.9999e-05 gnorm: 1.95 [ 0:22:05<1 day, 3:09:53] +[titan] 2025-10-04 22:56:24,765 - root - INFO - step: 540 loss: 4.4155 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.5521 global_avg_mtp_loss: 3.8634 +[titan] 2025-10-04 22:56:24,765 - root - INFO - lr: 4.9999e-05 gnorm: 2.04 [ 0:22:16<1 day, 3:07:47] +[titan] 2025-10-04 22:56:35,621 - root - INFO - step: 545 loss: 4.3565 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.5455 global_avg_mtp_loss: 3.8109 +[titan] 2025-10-04 22:56:35,621 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:22:27<1 day, 3:05:45] +[titan] 2025-10-04 22:56:44,311 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:56:46,499 - root - INFO - step: 550 loss: 4.2924 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.5365 global_avg_mtp_loss: 3.7559 +[titan] 2025-10-04 22:56:46,499 - root - INFO - lr: 4.9998e-05 gnorm: 1.96 [ 0:22:38<1 day, 3:03:46] +[titan] 2025-10-04 22:56:57,360 - root - INFO - step: 555 loss: 4.3086 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.5367 global_avg_mtp_loss: 3.7719 +[titan] 2025-10-04 22:56:57,361 - root - INFO - lr: 4.9998e-05 gnorm: 1.94 [ 0:22:49<1 day, 3:01:48] +[titan] 2025-10-04 22:57:08,185 - root - INFO - step: 560 loss: 4.2981 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.5349 global_avg_mtp_loss: 3.7631 +[titan] 2025-10-04 22:57:08,185 - root - INFO - lr: 4.9998e-05 gnorm: 1.84 [ 0:22:59<1 day, 2:59:49] +[titan] 2025-10-04 22:57:19,007 - root - INFO - step: 565 loss: 4.3383 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7969 +[titan] 2025-10-04 22:57:19,007 - root - INFO - lr: 4.9998e-05 gnorm: 1.66 [ 0:23:10<1 day, 2:57:52] +[titan] 2025-10-04 22:57:29,825 - root - INFO - step: 570 loss: 4.3634 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.5450 global_avg_mtp_loss: 3.8184 +[titan] 2025-10-04 22:57:29,825 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:23:21<1 day, 2:55:57] +[titan] 2025-10-04 22:57:40,662 - root - INFO - step: 575 loss: 4.2261 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.5285 global_avg_mtp_loss: 3.6977 +[titan] 2025-10-04 22:57:40,663 - root - INFO - lr: 4.9998e-05 gnorm: 1.67 [ 0:23:32<1 day, 2:54:04] +[titan] 2025-10-04 22:57:51,566 - root - INFO - step: 580 loss: 4.2298 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.5294 global_avg_mtp_loss: 3.7005 +[titan] 2025-10-04 22:57:51,566 - root - INFO - lr: 4.9998e-05 gnorm: 1.98 [ 0:23:43<1 day, 2:52:19] +[titan] 2025-10-04 22:58:02,405 - root - INFO - step: 585 loss: 4.3315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7902 +[titan] 2025-10-04 22:58:02,405 - root - INFO - lr: 4.9998e-05 gnorm: 1.72 [ 0:23:54<1 day, 2:50:30] +[titan] 2025-10-04 22:58:13,269 - root - INFO - step: 590 loss: 4.2600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.5322 global_avg_mtp_loss: 3.7278 +[titan] 2025-10-04 22:58:13,270 - root - INFO - lr: 4.9997e-05 gnorm: 1.95 [ 0:24:05<1 day, 2:48:44] +[titan] 2025-10-04 22:58:24,105 - root - INFO - step: 595 loss: 4.1808 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.5216 global_avg_mtp_loss: 3.6592 +[titan] 2025-10-04 22:58:24,105 - root - INFO - lr: 4.9997e-05 gnorm: 1.65 [ 0:24:15<1 day, 2:46:59] +[titan] 2025-10-04 22:58:32,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:58:34,964 - root - INFO - step: 600 loss: 4.1976 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.5240 global_avg_mtp_loss: 3.6736 +[titan] 2025-10-04 22:58:34,964 - root - INFO - lr: 4.9997e-05 gnorm: 1.83 [ 0:24:26<1 day, 2:45:16] +[titan] 2025-10-04 22:58:45,870 - root - INFO - step: 605 loss: 4.3159 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.5391 global_avg_mtp_loss: 3.7769 +[titan] 2025-10-04 22:58:45,870 - root - INFO - lr: 4.9997e-05 gnorm: 1.87 [ 0:24:37<1 day, 2:43:38] +[titan] 2025-10-04 22:58:56,733 - root - INFO - step: 610 loss: 4.1166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.5131 global_avg_mtp_loss: 3.6035 +[titan] 2025-10-04 22:58:56,733 - root - INFO - lr: 4.9997e-05 gnorm: 1.62 [ 0:24:48<1 day, 2:41:59] +[titan] 2025-10-04 22:59:07,585 - root - INFO - step: 615 loss: 4.2340 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.5275 global_avg_mtp_loss: 3.7065 +[titan] 2025-10-04 22:59:07,585 - root - INFO - lr: 4.9997e-05 gnorm: 1.88 [ 0:24:59<1 day, 2:40:20] +[titan] 2025-10-04 22:59:18,424 - root - INFO - step: 620 loss: 4.2004 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5249 global_avg_mtp_loss: 3.6756 +[titan] 2025-10-04 22:59:18,424 - root - INFO - lr: 4.9997e-05 gnorm: 1.91 [ 0:25:10<1 day, 2:38:42] +[titan] 2025-10-04 22:59:29,245 - root - INFO - step: 625 loss: 4.2113 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.5247 global_avg_mtp_loss: 3.6866 +[titan] 2025-10-04 22:59:29,245 - root - INFO - lr: 4.9996e-05 gnorm: 1.62 [ 0:25:21<1 day, 2:37:04] +[titan] 2025-10-04 22:59:40,085 - root - INFO - step: 630 loss: 4.1954 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.5210 global_avg_mtp_loss: 3.6745 +[titan] 2025-10-04 22:59:40,085 - root - INFO - lr: 4.9996e-05 gnorm: 1.68 [ 0:25:31<1 day, 2:35:29] +[titan] 2025-10-04 22:59:51,004 - root - INFO - step: 635 loss: 4.0965 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.5096 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 22:59:51,004 - root - INFO - lr: 4.9996e-05 gnorm: 1.82 [ 0:25:42<1 day, 2:34:00] +[titan] 2025-10-04 23:00:01,832 - root - INFO - step: 640 loss: 4.2067 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.5236 global_avg_mtp_loss: 3.6831 +[titan] 2025-10-04 23:00:01,832 - root - INFO - lr: 4.9996e-05 gnorm: 1.87 [ 0:25:53<1 day, 2:32:27] +[titan] 2025-10-04 23:00:12,683 - root - INFO - step: 645 loss: 4.0562 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.5030 global_avg_mtp_loss: 3.5532 +[titan] 2025-10-04 23:00:12,683 - root - INFO - lr: 4.9996e-05 gnorm: 1.73 [ 0:26:04<1 day, 2:30:56] +[titan] 2025-10-04 23:00:21,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:00:23,493 - root - INFO - step: 650 loss: 4.1298 memory: 118.84GiB(85.28%) tps: 30,314 tflops: 420.56 mfu: 42.52% global_avg_ntp_loss: 0.5128 global_avg_mtp_loss: 3.6170 +[titan] 2025-10-04 23:00:23,493 - root - INFO - lr: 4.9996e-05 gnorm: 1.75 [ 0:26:15<1 day, 2:29:24] +[titan] 2025-10-04 23:00:34,283 - root - INFO - step: 655 loss: 4.0941 memory: 118.84GiB(85.28%) tps: 30,369 tflops: 421.33 mfu: 42.60% global_avg_ntp_loss: 0.5089 global_avg_mtp_loss: 3.5852 +[titan] 2025-10-04 23:00:34,283 - root - INFO - lr: 4.9995e-05 gnorm: 1.70 [ 0:26:26<1 day, 2:27:52] +[titan] 2025-10-04 23:00:45,102 - root - INFO - step: 660 loss: 4.1313 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.5130 global_avg_mtp_loss: 3.6184 +[titan] 2025-10-04 23:00:45,102 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:36<1 day, 2:26:23] +[titan] 2025-10-04 23:00:55,946 - root - INFO - step: 665 loss: 4.1367 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5147 global_avg_mtp_loss: 3.6220 +[titan] 2025-10-04 23:00:55,946 - root - INFO - lr: 4.9995e-05 gnorm: 1.99 [ 0:26:47<1 day, 2:24:57] +[titan] 2025-10-04 23:01:06,742 - root - INFO - step: 670 loss: 4.0904 memory: 118.84GiB(85.28%) tps: 30,352 tflops: 421.09 mfu: 42.58% global_avg_ntp_loss: 0.5075 global_avg_mtp_loss: 3.5829 +[titan] 2025-10-04 23:01:06,743 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:58<1 day, 2:23:29] +[titan] 2025-10-04 23:01:17,585 - root - INFO - step: 675 loss: 4.0638 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.5042 global_avg_mtp_loss: 3.5596 +[titan] 2025-10-04 23:01:17,585 - root - INFO - lr: 4.9995e-05 gnorm: 2.15 [ 0:27:09<1 day, 2:22:05] +[titan] 2025-10-04 23:01:28,410 - root - INFO - step: 680 loss: 4.0064 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4976 global_avg_mtp_loss: 3.5088 +[titan] 2025-10-04 23:01:28,410 - root - INFO - lr: 4.9994e-05 gnorm: 1.81 [ 0:27:20<1 day, 2:20:41] +[titan] 2025-10-04 23:01:39,214 - root - INFO - step: 685 loss: 4.1427 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.5134 global_avg_mtp_loss: 3.6293 +[titan] 2025-10-04 23:01:39,214 - root - INFO - lr: 4.9994e-05 gnorm: 1.69 [ 0:27:30<1 day, 2:19:17] +[titan] 2025-10-04 23:01:50,056 - root - INFO - step: 690 loss: 4.0571 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.5019 global_avg_mtp_loss: 3.5553 +[titan] 2025-10-04 23:01:50,056 - root - INFO - lr: 4.9994e-05 gnorm: 1.63 [ 0:27:41<1 day, 2:17:56] +[titan] 2025-10-04 23:02:00,900 - root - INFO - step: 695 loss: 4.0380 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5011 global_avg_mtp_loss: 3.5369 +[titan] 2025-10-04 23:02:00,900 - root - INFO - lr: 4.9994e-05 gnorm: 1.77 [ 0:27:52<1 day, 2:16:36] +[titan] 2025-10-04 23:02:09,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:02:11,753 - root - INFO - step: 700 loss: 4.0879 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.5070 global_avg_mtp_loss: 3.5810 +[titan] 2025-10-04 23:02:11,754 - root - INFO - lr: 4.9994e-05 gnorm: 1.96 [ 0:28:03<1 day, 2:15:18] +[titan] 2025-10-04 23:02:22,605 - root - INFO - step: 705 loss: 4.0241 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4988 global_avg_mtp_loss: 3.5252 +[titan] 2025-10-04 23:02:22,605 - root - INFO - lr: 4.9993e-05 gnorm: 1.83 [ 0:28:14<1 day, 2:14:00] +[titan] 2025-10-04 23:02:33,405 - root - INFO - step: 710 loss: 4.0903 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.5058 global_avg_mtp_loss: 3.5844 +[titan] 2025-10-04 23:02:33,405 - root - INFO - lr: 4.9993e-05 gnorm: 1.64 [ 0:28:25<1 day, 2:12:41] +[titan] 2025-10-04 23:02:44,244 - root - INFO - step: 715 loss: 4.0535 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5015 global_avg_mtp_loss: 3.5520 +[titan] 2025-10-04 23:02:44,244 - root - INFO - lr: 4.9993e-05 gnorm: 1.50 [ 0:28:36<1 day, 2:11:25] +[titan] 2025-10-04 23:02:55,077 - root - INFO - step: 720 loss: 4.0093 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.4957 global_avg_mtp_loss: 3.5137 +[titan] 2025-10-04 23:02:55,077 - root - INFO - lr: 4.9993e-05 gnorm: 1.58 [ 0:28:46<1 day, 2:10:09] +[titan] 2025-10-04 23:03:05,902 - root - INFO - step: 725 loss: 3.9529 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4894 global_avg_mtp_loss: 3.4635 +[titan] 2025-10-04 23:03:05,902 - root - INFO - lr: 4.9992e-05 gnorm: 1.53 [ 0:28:57<1 day, 2:08:54] +[titan] 2025-10-04 23:03:16,765 - root - INFO - step: 730 loss: 3.9701 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.4916 global_avg_mtp_loss: 3.4785 +[titan] 2025-10-04 23:03:16,765 - root - INFO - lr: 4.9992e-05 gnorm: 1.57 [ 0:29:08<1 day, 2:07:41] +[titan] 2025-10-04 23:03:27,585 - root - INFO - step: 735 loss: 4.0191 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.4982 global_avg_mtp_loss: 3.5209 +[titan] 2025-10-04 23:03:27,585 - root - INFO - lr: 4.9992e-05 gnorm: 1.59 [ 0:29:19<1 day, 2:06:28] +[titan] 2025-10-04 23:03:38,404 - root - INFO - step: 740 loss: 3.9770 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.4912 global_avg_mtp_loss: 3.4857 +[titan] 2025-10-04 23:03:38,404 - root - INFO - lr: 4.9992e-05 gnorm: 1.61 [ 0:29:30<1 day, 2:05:15] +[titan] 2025-10-04 23:03:49,265 - root - INFO - step: 745 loss: 4.0755 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.5054 global_avg_mtp_loss: 3.5701 +[titan] 2025-10-04 23:03:49,265 - root - INFO - lr: 4.9992e-05 gnorm: 1.52 [ 0:29:41<1 day, 2:04:05] +[titan] 2025-10-04 23:03:57,893 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:04:00,081 - root - INFO - step: 750 loss: 3.9375 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.4868 global_avg_mtp_loss: 3.4508 +[titan] 2025-10-04 23:04:00,081 - root - INFO - lr: 4.9991e-05 gnorm: 1.67 [ 0:29:51<1 day, 2:02:53] +[titan] 2025-10-04 23:04:10,923 - root - INFO - step: 755 loss: 4.0060 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.4974 global_avg_mtp_loss: 3.5087 +[titan] 2025-10-04 23:04:10,923 - root - INFO - lr: 4.9991e-05 gnorm: 1.62 [ 0:30:02<1 day, 2:01:44] +[titan] 2025-10-04 23:04:21,765 - root - INFO - step: 760 loss: 3.9826 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.4928 global_avg_mtp_loss: 3.4897 +[titan] 2025-10-04 23:04:21,765 - root - INFO - lr: 4.9991e-05 gnorm: 1.57 [ 0:30:13<1 day, 2:00:35] +[titan] 2025-10-04 23:04:32,624 - root - INFO - step: 765 loss: 3.9503 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4871 global_avg_mtp_loss: 3.4633 +[titan] 2025-10-04 23:04:32,625 - root - INFO - lr: 4.9991e-05 gnorm: 1.73 [ 0:30:24<1 day, 1:59:28] +[titan] 2025-10-04 23:04:43,499 - root - INFO - step: 770 loss: 4.0928 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.5059 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 23:04:43,499 - root - INFO - lr: 4.9990e-05 gnorm: 1.68 [ 0:30:35<1 day, 1:58:23] +[titan] 2025-10-04 23:04:54,364 - root - INFO - step: 775 loss: 4.0138 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4966 global_avg_mtp_loss: 3.5172 +[titan] 2025-10-04 23:04:54,364 - root - INFO - lr: 4.9990e-05 gnorm: 1.84 [ 0:30:46<1 day, 1:57:18] +[titan] 2025-10-04 23:05:05,165 - root - INFO - step: 780 loss: 3.9609 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.4878 global_avg_mtp_loss: 3.4731 +[titan] 2025-10-04 23:05:05,165 - root - INFO - lr: 4.9990e-05 gnorm: 1.66 [ 0:30:56<1 day, 1:56:10] +[titan] 2025-10-04 23:05:16,001 - root - INFO - step: 785 loss: 4.0392 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.5003 global_avg_mtp_loss: 3.5389 +[titan] 2025-10-04 23:05:16,002 - root - INFO - lr: 4.9989e-05 gnorm: 1.74 [ 0:31:07<1 day, 1:55:05] +[titan] 2025-10-04 23:05:26,809 - root - INFO - step: 790 loss: 3.9123 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.4820 global_avg_mtp_loss: 3.4303 +[titan] 2025-10-04 23:05:26,809 - root - INFO - lr: 4.9989e-05 gnorm: 1.71 [ 0:31:18<1 day, 1:53:59] +[titan] 2025-10-04 23:05:37,659 - root - INFO - step: 795 loss: 3.9513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.4870 global_avg_mtp_loss: 3.4643 +[titan] 2025-10-04 23:05:37,659 - root - INFO - lr: 4.9989e-05 gnorm: 1.57 [ 0:31:29<1 day, 1:52:56] +[titan] 2025-10-04 23:05:46,349 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:05:48,524 - root - INFO - step: 800 loss: 3.8805 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4013 +[titan] 2025-10-04 23:05:48,524 - root - INFO - lr: 4.9989e-05 gnorm: 1.63 [ 0:31:40<1 day, 1:51:54] +[titan] 2025-10-04 23:05:59,423 - root - INFO - step: 805 loss: 4.0567 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.5041 global_avg_mtp_loss: 3.5527 +[titan] 2025-10-04 23:05:59,424 - root - INFO - lr: 4.9988e-05 gnorm: 1.65 [ 0:31:51<1 day, 1:50:54] +[titan] 2025-10-04 23:06:10,267 - root - INFO - step: 810 loss: 3.9384 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4865 global_avg_mtp_loss: 3.4519 +[titan] 2025-10-04 23:06:10,267 - root - INFO - lr: 4.9988e-05 gnorm: 1.62 [ 0:32:02<1 day, 1:49:53] +[titan] 2025-10-04 23:06:21,120 - root - INFO - step: 815 loss: 3.9402 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.4841 global_avg_mtp_loss: 3.4561 +[titan] 2025-10-04 23:06:21,121 - root - INFO - lr: 4.9988e-05 gnorm: 1.83 [ 0:32:12<1 day, 1:48:52] +[titan] 2025-10-04 23:06:31,962 - root - INFO - step: 820 loss: 3.8907 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.4804 global_avg_mtp_loss: 3.4102 +[titan] 2025-10-04 23:06:31,962 - root - INFO - lr: 4.9987e-05 gnorm: 1.56 [ 0:32:23<1 day, 1:47:52] +[titan] 2025-10-04 23:06:42,804 - root - INFO - step: 825 loss: 3.9391 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.4866 global_avg_mtp_loss: 3.4525 +[titan] 2025-10-04 23:06:42,804 - root - INFO - lr: 4.9987e-05 gnorm: 1.73 [ 0:32:34<1 day, 1:46:52] +[titan] 2025-10-04 23:06:53,697 - root - INFO - step: 830 loss: 3.8534 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.4757 global_avg_mtp_loss: 3.3777 +[titan] 2025-10-04 23:06:53,697 - root - INFO - lr: 4.9987e-05 gnorm: 1.46 [ 0:32:45<1 day, 1:45:55] +[titan] 2025-10-04 23:07:04,599 - root - INFO - step: 835 loss: 3.9680 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.4909 global_avg_mtp_loss: 3.4770 +[titan] 2025-10-04 23:07:04,599 - root - INFO - lr: 4.9987e-05 gnorm: 1.69 [ 0:32:56<1 day, 1:44:59] +[titan] 2025-10-04 23:07:15,482 - root - INFO - step: 840 loss: 3.8804 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4793 global_avg_mtp_loss: 3.4011 +[titan] 2025-10-04 23:07:15,483 - root - INFO - lr: 4.9986e-05 gnorm: 1.65 [ 0:33:07<1 day, 1:44:03] +[titan] 2025-10-04 23:07:26,345 - root - INFO - step: 845 loss: 3.9335 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.4859 global_avg_mtp_loss: 3.4476 +[titan] 2025-10-04 23:07:26,345 - root - INFO - lr: 4.9986e-05 gnorm: 1.67 [ 0:33:18<1 day, 1:43:06] +[titan] 2025-10-04 23:07:35,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:07:37,166 - root - INFO - step: 850 loss: 3.9466 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.4899 global_avg_mtp_loss: 3.4568 +[titan] 2025-10-04 23:07:37,166 - root - INFO - lr: 4.9986e-05 gnorm: 1.53 [ 0:33:28<1 day, 1:42:08] +[titan] 2025-10-04 23:07:48,038 - root - INFO - step: 855 loss: 3.8553 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3805 +[titan] 2025-10-04 23:07:48,038 - root - INFO - lr: 4.9985e-05 gnorm: 1.54 [ 0:33:39<1 day, 1:41:13] +[titan] 2025-10-04 23:07:58,950 - root - INFO - step: 860 loss: 3.9192 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.4837 global_avg_mtp_loss: 3.4355 +[titan] 2025-10-04 23:07:58,951 - root - INFO - lr: 4.9985e-05 gnorm: 1.63 [ 0:33:50<1 day, 1:40:20] +[titan] 2025-10-04 23:08:09,863 - root - INFO - step: 865 loss: 3.8398 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.4747 global_avg_mtp_loss: 3.3651 +[titan] 2025-10-04 23:08:09,863 - root - INFO - lr: 4.9985e-05 gnorm: 1.57 [ 0:34:01<1 day, 1:39:28] +[titan] 2025-10-04 23:08:20,763 - root - INFO - step: 870 loss: 3.9660 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.4876 global_avg_mtp_loss: 3.4784 +[titan] 2025-10-04 23:08:20,763 - root - INFO - lr: 4.9984e-05 gnorm: 1.70 [ 0:34:12<1 day, 1:38:36] +[titan] 2025-10-04 23:08:31,644 - root - INFO - step: 875 loss: 3.8236 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4708 global_avg_mtp_loss: 3.3528 +[titan] 2025-10-04 23:08:31,644 - root - INFO - lr: 4.9984e-05 gnorm: 1.58 [ 0:34:23<1 day, 1:37:43] +[titan] 2025-10-04 23:08:42,521 - root - INFO - step: 880 loss: 3.8393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4731 global_avg_mtp_loss: 3.3662 +[titan] 2025-10-04 23:08:42,522 - root - INFO - lr: 4.9984e-05 gnorm: 1.66 [ 0:34:34<1 day, 1:36:51] +[titan] 2025-10-04 23:08:53,411 - root - INFO - step: 885 loss: 3.9181 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4832 global_avg_mtp_loss: 3.4349 +[titan] 2025-10-04 23:08:53,412 - root - INFO - lr: 4.9983e-05 gnorm: 1.81 [ 0:34:45<1 day, 1:35:59] +[titan] 2025-10-04 23:09:04,287 - root - INFO - step: 890 loss: 3.8540 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4756 global_avg_mtp_loss: 3.3784 +[titan] 2025-10-04 23:09:04,287 - root - INFO - lr: 4.9983e-05 gnorm: 1.63 [ 0:34:56<1 day, 1:35:08] +[titan] 2025-10-04 23:09:15,149 - root - INFO - step: 895 loss: 3.7956 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.4646 global_avg_mtp_loss: 3.3310 +[titan] 2025-10-04 23:09:15,149 - root - INFO - lr: 4.9983e-05 gnorm: 1.59 [ 0:35:06<1 day, 1:34:16] +[titan] 2025-10-04 23:09:23,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:09:26,036 - root - INFO - step: 900 loss: 3.8814 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4023 +[titan] 2025-10-04 23:09:26,036 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:17<1 day, 1:33:26] +[titan] 2025-10-04 23:09:36,928 - root - INFO - step: 905 loss: 3.8547 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3797 +[titan] 2025-10-04 23:09:36,928 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:28<1 day, 1:32:36] +[titan] 2025-10-04 23:09:47,795 - root - INFO - step: 910 loss: 3.7503 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4614 global_avg_mtp_loss: 3.2890 +[titan] 2025-10-04 23:09:47,795 - root - INFO - lr: 4.9982e-05 gnorm: 1.63 [ 0:35:39<1 day, 1:31:46] +[titan] 2025-10-04 23:09:58,664 - root - INFO - step: 915 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3219 +[titan] 2025-10-04 23:09:58,665 - root - INFO - lr: 4.9981e-05 gnorm: 1.57 [ 0:35:50<1 day, 1:30:56] +[titan] 2025-10-04 23:10:09,537 - root - INFO - step: 920 loss: 3.8477 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.4753 global_avg_mtp_loss: 3.3723 +[titan] 2025-10-04 23:10:09,537 - root - INFO - lr: 4.9981e-05 gnorm: 1.56 [ 0:36:01<1 day, 1:30:07] +[titan] 2025-10-04 23:10:20,420 - root - INFO - step: 925 loss: 3.8141 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3439 +[titan] 2025-10-04 23:10:20,420 - root - INFO - lr: 4.9980e-05 gnorm: 1.53 [ 0:36:12<1 day, 1:29:19] +[titan] 2025-10-04 23:10:31,298 - root - INFO - step: 930 loss: 3.8185 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3482 +[titan] 2025-10-04 23:10:31,298 - root - INFO - lr: 4.9980e-05 gnorm: 1.56 [ 0:36:23<1 day, 1:28:31] +[titan] 2025-10-04 23:10:42,186 - root - INFO - step: 935 loss: 3.7234 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.4574 global_avg_mtp_loss: 3.2661 +[titan] 2025-10-04 23:10:42,186 - root - INFO - lr: 4.9980e-05 gnorm: 1.52 [ 0:36:33<1 day, 1:27:44] +[titan] 2025-10-04 23:10:53,053 - root - INFO - step: 940 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4666 global_avg_mtp_loss: 3.3211 +[titan] 2025-10-04 23:10:53,053 - root - INFO - lr: 4.9979e-05 gnorm: 1.69 [ 0:36:44<1 day, 1:26:56] +[titan] 2025-10-04 23:11:03,935 - root - INFO - step: 945 loss: 3.7815 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.4635 global_avg_mtp_loss: 3.3180 +[titan] 2025-10-04 23:11:03,935 - root - INFO - lr: 4.9979e-05 gnorm: 1.45 [ 0:36:55<1 day, 1:26:10] +[titan] 2025-10-04 23:11:12,603 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:11:14,787 - root - INFO - step: 950 loss: 3.8345 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4716 global_avg_mtp_loss: 3.3629 +[titan] 2025-10-04 23:11:14,787 - root - INFO - lr: 4.9979e-05 gnorm: 1.54 [ 0:37:06<1 day, 1:25:22] +[titan] 2025-10-04 23:11:25,662 - root - INFO - step: 955 loss: 3.7153 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4570 global_avg_mtp_loss: 3.2583 +[titan] 2025-10-04 23:11:25,662 - root - INFO - lr: 4.9978e-05 gnorm: 1.40 [ 0:37:17<1 day, 1:24:36] +[titan] 2025-10-04 23:11:36,506 - root - INFO - step: 960 loss: 3.7474 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4595 global_avg_mtp_loss: 3.2878 +[titan] 2025-10-04 23:11:36,506 - root - INFO - lr: 4.9978e-05 gnorm: 1.39 [ 0:37:28<1 day, 1:23:49] +[titan] 2025-10-04 23:11:47,428 - root - INFO - step: 965 loss: 3.7469 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4597 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:11:47,429 - root - INFO - lr: 4.9977e-05 gnorm: 1.60 [ 0:37:39<1 day, 1:23:05] +[titan] 2025-10-04 23:11:58,339 - root - INFO - step: 970 loss: 3.7767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.4638 global_avg_mtp_loss: 3.3129 +[titan] 2025-10-04 23:11:58,340 - root - INFO - lr: 4.9977e-05 gnorm: 1.59 [ 0:37:50<1 day, 1:22:21] +[titan] 2025-10-04 23:12:09,214 - root - INFO - step: 975 loss: 3.7198 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4568 global_avg_mtp_loss: 3.2630 +[titan] 2025-10-04 23:12:09,214 - root - INFO - lr: 4.9977e-05 gnorm: 1.44 [ 0:38:00<1 day, 1:21:36] +[titan] 2025-10-04 23:12:20,081 - root - INFO - step: 980 loss: 3.7702 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4623 global_avg_mtp_loss: 3.3079 +[titan] 2025-10-04 23:12:20,081 - root - INFO - lr: 4.9976e-05 gnorm: 1.42 [ 0:38:11<1 day, 1:20:52] +[titan] 2025-10-04 23:12:30,946 - root - INFO - step: 985 loss: 3.8212 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3515 +[titan] 2025-10-04 23:12:30,947 - root - INFO - lr: 4.9976e-05 gnorm: 1.39 [ 0:38:22<1 day, 1:20:07] +[titan] 2025-10-04 23:12:41,799 - root - INFO - step: 990 loss: 3.7716 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.4659 global_avg_mtp_loss: 3.3057 +[titan] 2025-10-04 23:12:41,799 - root - INFO - lr: 4.9975e-05 gnorm: 1.50 [ 0:38:33<1 day, 1:19:23] +[titan] 2025-10-04 23:12:52,700 - root - INFO - step: 995 loss: 3.8144 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3447 +[titan] 2025-10-04 23:12:52,701 - root - INFO - lr: 4.9975e-05 gnorm: 1.47 [ 0:38:44<1 day, 1:18:40] +[titan] 2025-10-04 23:13:01,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:13:03,568 - root - INFO - step: 1000 loss: 3.6411 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4473 global_avg_mtp_loss: 3.1938 +[titan] 2025-10-04 23:13:03,569 - root - INFO - lr: 4.9974e-05 gnorm: 1.70 [ 0:38:55<1 day, 1:17:57] +[titan] 2025-10-04 23:13:14,441 - root - INFO - step: 1005 loss: 3.7872 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4636 global_avg_mtp_loss: 3.3236 +[titan] 2025-10-04 23:13:14,442 - root - INFO - lr: 4.9974e-05 gnorm: 1.62 [ 0:39:06<1 day, 1:17:14] +[titan] 2025-10-04 23:13:25,308 - root - INFO - step: 1010 loss: 3.8240 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4681 global_avg_mtp_loss: 3.3560 +[titan] 2025-10-04 23:13:25,308 - root - INFO - lr: 4.9974e-05 gnorm: 1.51 [ 0:39:17<1 day, 1:16:31] +[titan] 2025-10-04 23:13:36,156 - root - INFO - step: 1015 loss: 3.7026 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.4566 global_avg_mtp_loss: 3.2461 +[titan] 2025-10-04 23:13:36,157 - root - INFO - lr: 4.9973e-05 gnorm: 1.61 [ 0:39:27<1 day, 1:15:48] +[titan] 2025-10-04 23:13:47,024 - root - INFO - step: 1020 loss: 3.8204 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4698 global_avg_mtp_loss: 3.3506 +[titan] 2025-10-04 23:13:47,025 - root - INFO - lr: 4.9973e-05 gnorm: 1.58 [ 0:39:38<1 day, 1:15:06] +[titan] 2025-10-04 23:13:55,950 - root - INFO - Dumping profiler traces at step 1024 +[titan] 2025-10-04 23:13:55,984 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 23:13:58,196 - root - INFO - step: 1025 loss: 3.7098 memory: 118.84GiB(85.28%) tps: 29,332 tflops: 406.94 mfu: 41.15% global_avg_ntp_loss: 0.4550 global_avg_mtp_loss: 3.2548 +[titan] 2025-10-04 23:13:58,196 - root - INFO - lr: 4.9972e-05 gnorm: 1.53 [ 0:39:49<1 day, 1:14:36] +[titan] 2025-10-04 23:14:09,055 - root - INFO - step: 1030 loss: 3.6684 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4505 global_avg_mtp_loss: 3.2179 +[titan] 2025-10-04 23:14:09,056 - root - INFO - lr: 4.9972e-05 gnorm: 1.49 [ 0:40:00<1 day, 1:13:54] +[titan] 2025-10-04 23:14:19,917 - root - INFO - step: 1035 loss: 3.7778 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4632 global_avg_mtp_loss: 3.3146 +[titan] 2025-10-04 23:14:19,917 - root - INFO - lr: 4.9971e-05 gnorm: 1.64 [ 0:40:11<1 day, 1:13:12] +[titan] 2025-10-04 23:14:30,784 - root - INFO - step: 1040 loss: 3.7600 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4596 global_avg_mtp_loss: 3.3004 +[titan] 2025-10-04 23:14:30,784 - root - INFO - lr: 4.9971e-05 gnorm: 1.73 [ 0:40:22<1 day, 1:12:31] +[titan] 2025-10-04 23:14:41,642 - root - INFO - step: 1045 loss: 3.7970 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3312 +[titan] 2025-10-04 23:14:41,642 - root - INFO - lr: 4.9970e-05 gnorm: 1.60 [ 0:40:33<1 day, 1:11:50] +[titan] 2025-10-04 23:14:50,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:14:52,527 - root - INFO - step: 1050 loss: 3.7607 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.4629 global_avg_mtp_loss: 3.2979 +[titan] 2025-10-04 23:14:52,527 - root - INFO - lr: 4.9970e-05 gnorm: 1.86 [ 0:40:44<1 day, 1:11:10] +[titan] 2025-10-04 23:15:03,398 - root - INFO - step: 1055 loss: 3.6921 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4533 global_avg_mtp_loss: 3.2388 +[titan] 2025-10-04 23:15:03,398 - root - INFO - lr: 4.9970e-05 gnorm: 1.59 [ 0:40:55<1 day, 1:10:30] +[titan] 2025-10-04 23:15:14,306 - root - INFO - step: 1060 loss: 3.7138 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.4561 global_avg_mtp_loss: 3.2577 +[titan] 2025-10-04 23:15:14,306 - root - INFO - lr: 4.9969e-05 gnorm: 1.89 [ 0:41:06<1 day, 1:09:52] +[titan] 2025-10-04 23:15:25,186 - root - INFO - step: 1065 loss: 3.7455 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4584 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:15:25,186 - root - INFO - lr: 4.9969e-05 gnorm: 1.72 [ 0:41:16<1 day, 1:09:13] +[titan] 2025-10-04 23:15:36,061 - root - INFO - step: 1070 loss: 3.6510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4474 global_avg_mtp_loss: 3.2036 +[titan] 2025-10-04 23:15:36,061 - root - INFO - lr: 4.9968e-05 gnorm: 1.70 [ 0:41:27<1 day, 1:08:34] +[titan] 2025-10-04 23:15:46,950 - root - INFO - step: 1075 loss: 3.7757 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4630 global_avg_mtp_loss: 3.3127 +[titan] 2025-10-04 23:15:46,950 - root - INFO - lr: 4.9968e-05 gnorm: 1.53 [ 0:41:38<1 day, 1:07:55] +[titan] 2025-10-04 23:15:57,821 - root - INFO - step: 1080 loss: 3.6997 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4542 global_avg_mtp_loss: 3.2455 +[titan] 2025-10-04 23:15:57,821 - root - INFO - lr: 4.9967e-05 gnorm: 1.40 [ 0:41:49<1 day, 1:07:17] +[titan] 2025-10-04 23:16:08,691 - root - INFO - step: 1085 loss: 3.7768 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.4652 global_avg_mtp_loss: 3.3116 +[titan] 2025-10-04 23:16:08,691 - root - INFO - lr: 4.9967e-05 gnorm: 1.71 [ 0:42:00<1 day, 1:06:38] +[titan] 2025-10-04 23:16:19,625 - root - INFO - step: 1090 loss: 3.7891 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.4653 global_avg_mtp_loss: 3.3238 +[titan] 2025-10-04 23:16:19,625 - root - INFO - lr: 4.9966e-05 gnorm: 1.32 [ 0:42:11<1 day, 1:06:02] +[titan] 2025-10-04 23:16:30,524 - root - INFO - step: 1095 loss: 3.6348 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.4440 global_avg_mtp_loss: 3.1907 +[titan] 2025-10-04 23:16:30,525 - root - INFO - lr: 4.9966e-05 gnorm: 1.55 [ 0:42:22<1 day, 1:05:25] +[titan] 2025-10-04 23:16:39,226 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:16:41,421 - root - INFO - step: 1100 loss: 3.7357 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.4573 global_avg_mtp_loss: 3.2785 +[titan] 2025-10-04 23:16:41,421 - root - INFO - lr: 4.9965e-05 gnorm: 1.50 [ 0:42:33<1 day, 1:04:48] +[titan] 2025-10-04 23:16:52,335 - root - INFO - step: 1105 loss: 3.6253 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1800 +[titan] 2025-10-04 23:16:52,335 - root - INFO - lr: 4.9965e-05 gnorm: 1.52 [ 0:42:44<1 day, 1:04:12] +[titan] 2025-10-04 23:17:03,265 - root - INFO - step: 1110 loss: 3.6786 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.4500 global_avg_mtp_loss: 3.2285 +[titan] 2025-10-04 23:17:03,266 - root - INFO - lr: 4.9964e-05 gnorm: 1.41 [ 0:42:55<1 day, 1:03:37] +[titan] 2025-10-04 23:17:14,175 - root - INFO - step: 1115 loss: 3.6578 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.4465 global_avg_mtp_loss: 3.2112 +[titan] 2025-10-04 23:17:14,175 - root - INFO - lr: 4.9964e-05 gnorm: 1.35 [ 0:43:05<1 day, 1:03:02] +[titan] 2025-10-04 23:17:25,067 - root - INFO - step: 1120 loss: 3.6849 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.4511 global_avg_mtp_loss: 3.2339 +[titan] 2025-10-04 23:17:25,067 - root - INFO - lr: 4.9963e-05 gnorm: 1.51 [ 0:43:16<1 day, 1:02:26] +[titan] 2025-10-04 23:17:35,980 - root - INFO - step: 1125 loss: 3.6812 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.4516 global_avg_mtp_loss: 3.2296 +[titan] 2025-10-04 23:17:35,980 - root - INFO - lr: 4.9963e-05 gnorm: 1.53 [ 0:43:27<1 day, 1:01:51] +[titan] 2025-10-04 23:17:46,863 - root - INFO - step: 1130 loss: 3.6167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4428 global_avg_mtp_loss: 3.1739 +[titan] 2025-10-04 23:17:46,863 - root - INFO - lr: 4.9962e-05 gnorm: 1.69 [ 0:43:38<1 day, 1:01:15] +[titan] 2025-10-04 23:17:57,754 - root - INFO - step: 1135 loss: 3.5668 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.4385 global_avg_mtp_loss: 3.1284 +[titan] 2025-10-04 23:17:57,754 - root - INFO - lr: 4.9962e-05 gnorm: 1.44 [ 0:43:49<1 day, 1:00:39] +[titan] 2025-10-04 23:18:08,676 - root - INFO - step: 1140 loss: 3.6958 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.4522 global_avg_mtp_loss: 3.2436 +[titan] 2025-10-04 23:18:08,676 - root - INFO - lr: 4.9961e-05 gnorm: 1.51 [ 0:44:00<1 day, 1:00:05] +[titan] 2025-10-04 23:18:19,548 - root - INFO - step: 1145 loss: 3.7386 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.4725 global_avg_mtp_loss: 3.2662 +[titan] 2025-10-04 23:18:19,548 - root - INFO - lr: 4.9961e-05 gnorm: 1.52 [ 0:44:11<1 day, 0:59:29] +[titan] 2025-10-04 23:18:28,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:18:30,436 - root - INFO - step: 1150 loss: 3.6554 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.4491 global_avg_mtp_loss: 3.2063 +[titan] 2025-10-04 23:18:30,436 - root - INFO - lr: 4.9960e-05 gnorm: 1.51 [ 0:44:22<1 day, 0:58:54] +[titan] 2025-10-04 23:18:41,365 - root - INFO - step: 1155 loss: 3.6986 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.4535 global_avg_mtp_loss: 3.2451 +[titan] 2025-10-04 23:18:41,365 - root - INFO - lr: 4.9960e-05 gnorm: 1.49 [ 0:44:33<1 day, 0:58:21] +[titan] 2025-10-04 23:18:52,242 - root - INFO - step: 1160 loss: 3.6068 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4415 global_avg_mtp_loss: 3.1653 +[titan] 2025-10-04 23:18:52,243 - root - INFO - lr: 4.9959e-05 gnorm: 1.49 [ 0:44:43<1 day, 0:57:46] +[titan] 2025-10-04 23:19:03,171 - root - INFO - step: 1165 loss: 3.5931 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.4398 global_avg_mtp_loss: 3.1533 +[titan] 2025-10-04 23:19:03,171 - root - INFO - lr: 4.9958e-05 gnorm: 1.54 [ 0:44:54<1 day, 0:57:13] +[titan] 2025-10-04 23:19:14,054 - root - INFO - step: 1170 loss: 3.6446 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4453 global_avg_mtp_loss: 3.1993 +[titan] 2025-10-04 23:19:14,054 - root - INFO - lr: 4.9958e-05 gnorm: 1.49 [ 0:45:05<1 day, 0:56:39] +[titan] 2025-10-04 23:19:24,934 - root - INFO - step: 1175 loss: 3.6211 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1757 +[titan] 2025-10-04 23:19:24,934 - root - INFO - lr: 4.9957e-05 gnorm: 1.48 [ 0:45:16<1 day, 0:56:05] +[titan] 2025-10-04 23:19:35,805 - root - INFO - step: 1180 loss: 3.6634 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4499 global_avg_mtp_loss: 3.2135 +[titan] 2025-10-04 23:19:35,805 - root - INFO - lr: 4.9957e-05 gnorm: 1.55 [ 0:45:27<1 day, 0:55:31] +[titan] 2025-10-04 23:19:46,722 - root - INFO - step: 1185 loss: 3.6182 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1740 +[titan] 2025-10-04 23:19:46,722 - root - INFO - lr: 4.9956e-05 gnorm: 1.56 [ 0:45:38<1 day, 0:54:58] +[titan] 2025-10-04 23:19:57,577 - root - INFO - step: 1190 loss: 3.6307 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.4437 global_avg_mtp_loss: 3.1870 +[titan] 2025-10-04 23:19:57,578 - root - INFO - lr: 4.9956e-05 gnorm: 1.44 [ 0:45:49<1 day, 0:54:24] +[titan] 2025-10-04 23:20:08,587 - root - INFO - step: 1195 loss: 3.6947 memory: 118.84GiB(85.28%) tps: 29,765 tflops: 412.95 mfu: 41.75% global_avg_ntp_loss: 0.4519 global_avg_mtp_loss: 3.2429 +[titan] 2025-10-04 23:20:08,587 - root - INFO - lr: 4.9955e-05 gnorm: 1.42 [ 0:46:00<1 day, 0:53:55] +[titan] 2025-10-04 23:20:17,299 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:20:19,487 - root - INFO - step: 1200 loss: 3.6239 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1796 +[titan] 2025-10-04 23:20:19,487 - root - INFO - lr: 4.9955e-05 gnorm: 1.44 [ 0:46:11<1 day, 0:53:22] +[titan] 2025-10-04 23:20:30,366 - root - INFO - step: 1205 loss: 3.6270 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4452 global_avg_mtp_loss: 3.1819 +[titan] 2025-10-04 23:20:30,366 - root - INFO - lr: 4.9954e-05 gnorm: 1.60 [ 0:46:22<1 day, 0:52:49] +[titan] 2025-10-04 23:20:41,259 - root - INFO - step: 1210 loss: 3.6144 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1709 +[titan] 2025-10-04 23:20:41,259 - root - INFO - lr: 4.9953e-05 gnorm: 1.66 [ 0:46:32<1 day, 0:52:17] +[titan] 2025-10-04 23:20:52,152 - root - INFO - step: 1215 loss: 3.6886 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4528 global_avg_mtp_loss: 3.2359 +[titan] 2025-10-04 23:20:52,152 - root - INFO - lr: 4.9953e-05 gnorm: 1.48 [ 0:46:43<1 day, 0:51:44] +[titan] 2025-10-04 23:21:03,098 - root - INFO - step: 1220 loss: 3.5263 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.4324 global_avg_mtp_loss: 3.0939 +[titan] 2025-10-04 23:21:03,098 - root - INFO - lr: 4.9952e-05 gnorm: 1.62 [ 0:46:54<1 day, 0:51:14] +[titan] 2025-10-04 23:21:14,014 - root - INFO - step: 1225 loss: 3.6228 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.4426 global_avg_mtp_loss: 3.1801 +[titan] 2025-10-04 23:21:14,014 - root - INFO - lr: 4.9952e-05 gnorm: 1.53 [ 0:47:05<1 day, 0:50:43] +[titan] 2025-10-04 23:21:24,903 - root - INFO - step: 1230 loss: 3.5398 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4327 global_avg_mtp_loss: 3.1072 +[titan] 2025-10-04 23:21:24,904 - root - INFO - lr: 4.9951e-05 gnorm: 1.39 [ 0:47:16<1 day, 0:50:11] +[titan] 2025-10-04 23:21:35,790 - root - INFO - step: 1235 loss: 3.5790 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.4389 global_avg_mtp_loss: 3.1401 +[titan] 2025-10-04 23:21:35,790 - root - INFO - lr: 4.9951e-05 gnorm: 1.42 [ 0:47:27<1 day, 0:49:39] +[titan] 2025-10-04 23:21:46,666 - root - INFO - step: 1240 loss: 3.6434 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4448 global_avg_mtp_loss: 3.1987 +[titan] 2025-10-04 23:21:46,666 - root - INFO - lr: 4.9950e-05 gnorm: 1.43 [ 0:47:38<1 day, 0:49:07] +[titan] 2025-10-04 23:21:57,577 - root - INFO - step: 1245 loss: 3.5452 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4350 global_avg_mtp_loss: 3.1102 +[titan] 2025-10-04 23:21:57,577 - root - INFO - lr: 4.9949e-05 gnorm: 1.40 [ 0:47:49<1 day, 0:48:37] +[titan] 2025-10-04 23:22:06,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:22:08,563 - root - INFO - step: 1250 loss: 3.5844 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.4369 global_avg_mtp_loss: 3.1475 +[titan] 2025-10-04 23:22:08,564 - root - INFO - lr: 4.9949e-05 gnorm: 1.48 [ 0:48:00<1 day, 0:48:08] +[titan] 2025-10-04 23:22:19,438 - root - INFO - step: 1255 loss: 3.6078 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1643 +[titan] 2025-10-04 23:22:19,438 - root - INFO - lr: 4.9948e-05 gnorm: 1.59 [ 0:48:11<1 day, 0:47:37] +[titan] 2025-10-04 23:22:30,309 - root - INFO - step: 1260 loss: 3.5536 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4346 global_avg_mtp_loss: 3.1191 +[titan] 2025-10-04 23:22:30,309 - root - INFO - lr: 4.9948e-05 gnorm: 1.57 [ 0:48:22<1 day, 0:47:05] +[titan] 2025-10-04 23:22:41,203 - root - INFO - step: 1265 loss: 3.5861 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.4376 global_avg_mtp_loss: 3.1485 +[titan] 2025-10-04 23:22:41,203 - root - INFO - lr: 4.9947e-05 gnorm: 1.47 [ 0:48:32<1 day, 0:46:35] +[titan] 2025-10-04 23:22:52,080 - root - INFO - step: 1270 loss: 3.6181 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4419 global_avg_mtp_loss: 3.1762 +[titan] 2025-10-04 23:22:52,081 - root - INFO - lr: 4.9946e-05 gnorm: 1.38 [ 0:48:43<1 day, 0:46:04] +[titan] 2025-10-04 23:23:02,961 - root - INFO - step: 1275 loss: 3.5508 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1168 +[titan] 2025-10-04 23:23:02,961 - root - INFO - lr: 4.9946e-05 gnorm: 1.48 [ 0:48:54<1 day, 0:45:33] +[titan] 2025-10-04 23:23:13,913 - root - INFO - step: 1280 loss: 3.5362 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.4318 global_avg_mtp_loss: 3.1044 +[titan] 2025-10-04 23:23:13,913 - root - INFO - lr: 4.9945e-05 gnorm: 1.47 [ 0:49:05<1 day, 0:45:05] +[titan] 2025-10-04 23:23:24,835 - root - INFO - step: 1285 loss: 3.5593 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4354 global_avg_mtp_loss: 3.1239 +[titan] 2025-10-04 23:23:24,835 - root - INFO - lr: 4.9944e-05 gnorm: 1.48 [ 0:49:16<1 day, 0:44:36] +[titan] 2025-10-04 23:23:35,699 - root - INFO - step: 1290 loss: 3.5751 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4357 global_avg_mtp_loss: 3.1395 +[titan] 2025-10-04 23:23:35,700 - root - INFO - lr: 4.9944e-05 gnorm: 1.42 [ 0:49:27<1 day, 0:44:05] +[titan] 2025-10-04 23:23:46,610 - root - INFO - step: 1295 loss: 3.5938 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4377 global_avg_mtp_loss: 3.1562 +[titan] 2025-10-04 23:23:46,610 - root - INFO - lr: 4.9943e-05 gnorm: 1.35 [ 0:49:38<1 day, 0:43:36] +[titan] 2025-10-04 23:23:55,309 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:23:57,488 - root - INFO - step: 1300 loss: 3.5542 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4336 global_avg_mtp_loss: 3.1206 +[titan] 2025-10-04 23:23:57,489 - root - INFO - lr: 4.9943e-05 gnorm: 1.38 [ 0:49:49<1 day, 0:43:06] +[titan] 2025-10-04 23:24:08,378 - root - INFO - step: 1305 loss: 3.5644 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.4344 global_avg_mtp_loss: 3.1301 +[titan] 2025-10-04 23:24:08,379 - root - INFO - lr: 4.9942e-05 gnorm: 1.38 [ 0:50:00<1 day, 0:42:36] +[titan] 2025-10-04 23:24:19,247 - root - INFO - step: 1310 loss: 3.5464 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1124 +[titan] 2025-10-04 23:24:19,247 - root - INFO - lr: 4.9941e-05 gnorm: 1.43 [ 0:50:10<1 day, 0:42:06] +[titan] 2025-10-04 23:24:30,161 - root - INFO - step: 1315 loss: 3.5898 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.4372 global_avg_mtp_loss: 3.1527 +[titan] 2025-10-04 23:24:30,162 - root - INFO - lr: 4.9941e-05 gnorm: 1.34 [ 0:50:21<1 day, 0:41:38] +[titan] 2025-10-04 23:24:41,039 - root - INFO - step: 1320 loss: 3.6159 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4427 global_avg_mtp_loss: 3.1731 +[titan] 2025-10-04 23:24:41,039 - root - INFO - lr: 4.9940e-05 gnorm: 1.34 [ 0:50:32<1 day, 0:41:08] +[titan] 2025-10-04 23:24:51,938 - root - INFO - step: 1325 loss: 3.4618 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4229 global_avg_mtp_loss: 3.0389 +[titan] 2025-10-04 23:24:51,938 - root - INFO - lr: 4.9939e-05 gnorm: 1.36 [ 0:50:43<1 day, 0:40:40] +[titan] 2025-10-04 23:25:02,828 - root - INFO - step: 1330 loss: 3.5160 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4280 global_avg_mtp_loss: 3.0880 +[titan] 2025-10-04 23:25:02,829 - root - INFO - lr: 4.9939e-05 gnorm: 1.38 [ 0:50:54<1 day, 0:40:11] +[titan] 2025-10-04 23:25:13,745 - root - INFO - step: 1335 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4282 global_avg_mtp_loss: 3.0763 +[titan] 2025-10-04 23:25:13,746 - root - INFO - lr: 4.9938e-05 gnorm: 1.46 [ 0:51:05<1 day, 0:39:43] +[titan] 2025-10-04 23:25:24,642 - root - INFO - step: 1340 loss: 3.5440 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4337 global_avg_mtp_loss: 3.1103 +[titan] 2025-10-04 23:25:24,642 - root - INFO - lr: 4.9937e-05 gnorm: 1.42 [ 0:51:16<1 day, 0:39:15] +[titan] 2025-10-04 23:25:35,576 - root - INFO - step: 1345 loss: 3.6036 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.4395 global_avg_mtp_loss: 3.1641 +[titan] 2025-10-04 23:25:35,576 - root - INFO - lr: 4.9937e-05 gnorm: 1.35 [ 0:51:27<1 day, 0:38:47] +[titan] 2025-10-04 23:25:44,279 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:25:46,465 - root - INFO - step: 1350 loss: 3.5202 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4296 global_avg_mtp_loss: 3.0905 +[titan] 2025-10-04 23:25:46,466 - root - INFO - lr: 4.9936e-05 gnorm: 1.31 [ 0:51:38<1 day, 0:38:19] +[titan] 2025-10-04 23:25:57,344 - root - INFO - step: 1355 loss: 3.5459 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.4309 global_avg_mtp_loss: 3.1149 +[titan] 2025-10-04 23:25:57,345 - root - INFO - lr: 4.9935e-05 gnorm: 1.30 [ 0:51:49<1 day, 0:37:51] +[titan] 2025-10-04 23:26:08,268 - root - INFO - step: 1360 loss: 3.5720 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.4351 global_avg_mtp_loss: 3.1369 +[titan] 2025-10-04 23:26:08,269 - root - INFO - lr: 4.9935e-05 gnorm: 1.39 [ 0:51:59<1 day, 0:37:24] +[titan] 2025-10-04 23:26:19,143 - root - INFO - step: 1365 loss: 3.4497 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0288 +[titan] 2025-10-04 23:26:19,143 - root - INFO - lr: 4.9934e-05 gnorm: 1.37 [ 0:52:10<1 day, 0:36:55] +[titan] 2025-10-04 23:26:30,030 - root - INFO - step: 1370 loss: 3.5847 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.4370 global_avg_mtp_loss: 3.1477 +[titan] 2025-10-04 23:26:30,030 - root - INFO - lr: 4.9933e-05 gnorm: 1.49 [ 0:52:21<1 day, 0:36:27] +[titan] 2025-10-04 23:26:40,913 - root - INFO - step: 1375 loss: 3.4970 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4269 global_avg_mtp_loss: 3.0701 +[titan] 2025-10-04 23:26:40,913 - root - INFO - lr: 4.9933e-05 gnorm: 1.38 [ 0:52:32<1 day, 0:36:00] +[titan] 2025-10-04 23:26:51,832 - root - INFO - step: 1380 loss: 3.4520 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 3.0312 +[titan] 2025-10-04 23:26:51,832 - root - INFO - lr: 4.9932e-05 gnorm: 1.36 [ 0:52:43<1 day, 0:35:33] +[titan] 2025-10-04 23:27:02,721 - root - INFO - step: 1385 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4169 global_avg_mtp_loss: 3.0149 +[titan] 2025-10-04 23:27:02,721 - root - INFO - lr: 4.9931e-05 gnorm: 1.42 [ 0:52:54<1 day, 0:35:05] +[titan] 2025-10-04 23:27:13,641 - root - INFO - step: 1390 loss: 3.4046 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.4139 global_avg_mtp_loss: 2.9907 +[titan] 2025-10-04 23:27:13,641 - root - INFO - lr: 4.9931e-05 gnorm: 1.37 [ 0:53:05<1 day, 0:34:39] +[titan] 2025-10-04 23:27:24,527 - root - INFO - step: 1395 loss: 3.4971 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.4253 global_avg_mtp_loss: 3.0717 +[titan] 2025-10-04 23:27:24,527 - root - INFO - lr: 4.9930e-05 gnorm: 1.41 [ 0:53:16<1 day, 0:34:12] +[titan] 2025-10-04 23:27:33,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:27:35,394 - root - INFO - step: 1400 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.4290 global_avg_mtp_loss: 3.0755 +[titan] 2025-10-04 23:27:35,394 - root - INFO - lr: 4.9929e-05 gnorm: 1.40 [ 0:53:27<1 day, 0:33:44] +[titan] 2025-10-04 23:27:46,287 - root - INFO - step: 1405 loss: 3.4686 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4233 global_avg_mtp_loss: 3.0453 +[titan] 2025-10-04 23:27:46,287 - root - INFO - lr: 4.9928e-05 gnorm: 1.49 [ 0:53:37<1 day, 0:33:17] +[titan] 2025-10-04 23:27:57,198 - root - INFO - step: 1410 loss: 3.5153 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4300 global_avg_mtp_loss: 3.0853 +[titan] 2025-10-04 23:27:57,198 - root - INFO - lr: 4.9928e-05 gnorm: 1.47 [ 0:53:48<1 day, 0:32:51] +[titan] 2025-10-04 23:28:08,061 - root - INFO - step: 1415 loss: 3.4739 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4242 global_avg_mtp_loss: 3.0497 +[titan] 2025-10-04 23:28:08,061 - root - INFO - lr: 4.9927e-05 gnorm: 1.34 [ 0:53:59<1 day, 0:32:23] +[titan] 2025-10-04 23:28:18,978 - root - INFO - step: 1420 loss: 3.5053 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.4276 global_avg_mtp_loss: 3.0778 +[titan] 2025-10-04 23:28:18,978 - root - INFO - lr: 4.9926e-05 gnorm: 1.41 [ 0:54:10<1 day, 0:31:57] +[titan] 2025-10-04 23:28:29,841 - root - INFO - step: 1425 loss: 3.4083 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4170 global_avg_mtp_loss: 2.9913 +[titan] 2025-10-04 23:28:29,841 - root - INFO - lr: 4.9926e-05 gnorm: 1.42 [ 0:54:21<1 day, 0:31:30] +[titan] 2025-10-04 23:28:40,714 - root - INFO - step: 1430 loss: 3.4627 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0411 +[titan] 2025-10-04 23:28:40,714 - root - INFO - lr: 4.9925e-05 gnorm: 1.43 [ 0:54:32<1 day, 0:31:03] +[titan] 2025-10-04 23:28:51,581 - root - INFO - step: 1435 loss: 3.4919 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0684 +[titan] 2025-10-04 23:28:51,582 - root - INFO - lr: 4.9924e-05 gnorm: 1.37 [ 0:54:43<1 day, 0:30:36] +[titan] 2025-10-04 23:29:02,457 - root - INFO - step: 1440 loss: 3.4907 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.4267 global_avg_mtp_loss: 3.0640 +[titan] 2025-10-04 23:29:02,457 - root - INFO - lr: 4.9923e-05 gnorm: 1.37 [ 0:54:54<1 day, 0:30:10] +[titan] 2025-10-04 23:29:13,408 - root - INFO - step: 1445 loss: 3.4656 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0447 +[titan] 2025-10-04 23:29:13,408 - root - INFO - lr: 4.9923e-05 gnorm: 1.40 [ 0:55:05<1 day, 0:29:45] +[titan] 2025-10-04 23:29:22,094 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:29:24,281 - root - INFO - step: 1450 loss: 3.4814 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4230 global_avg_mtp_loss: 3.0585 +[titan] 2025-10-04 23:29:24,281 - root - INFO - lr: 4.9922e-05 gnorm: 1.47 [ 0:55:15<1 day, 0:29:19] +[titan] 2025-10-04 23:29:35,145 - root - INFO - step: 1455 loss: 3.4419 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.4184 global_avg_mtp_loss: 3.0235 +[titan] 2025-10-04 23:29:35,145 - root - INFO - lr: 4.9921e-05 gnorm: 1.37 [ 0:55:26<1 day, 0:28:52] +[titan] 2025-10-04 23:29:46,030 - root - INFO - step: 1460 loss: 3.5546 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.4320 global_avg_mtp_loss: 3.1226 +[titan] 2025-10-04 23:29:46,030 - root - INFO - lr: 4.9920e-05 gnorm: 1.41 [ 0:55:37<1 day, 0:28:26] +[titan] 2025-10-04 23:29:56,926 - root - INFO - step: 1465 loss: 3.5290 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4295 global_avg_mtp_loss: 3.0995 +[titan] 2025-10-04 23:29:56,927 - root - INFO - lr: 4.9920e-05 gnorm: 1.36 [ 0:55:48<1 day, 0:28:01] +[titan] 2025-10-04 23:30:07,807 - root - INFO - step: 1470 loss: 3.4674 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0442 +[titan] 2025-10-04 23:30:07,808 - root - INFO - lr: 4.9919e-05 gnorm: 1.41 [ 0:55:59<1 day, 0:27:35] +[titan] 2025-10-04 23:30:18,704 - root - INFO - step: 1475 loss: 3.4400 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0220 +[titan] 2025-10-04 23:30:18,705 - root - INFO - lr: 4.9918e-05 gnorm: 1.36 [ 0:56:10<1 day, 0:27:10] +[titan] 2025-10-04 23:30:29,561 - root - INFO - step: 1480 loss: 3.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4228 global_avg_mtp_loss: 3.0463 +[titan] 2025-10-04 23:30:29,562 - root - INFO - lr: 4.9917e-05 gnorm: 1.30 [ 0:56:21<1 day, 0:26:44] +[titan] 2025-10-04 23:30:40,438 - root - INFO - step: 1485 loss: 3.4861 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4263 global_avg_mtp_loss: 3.0598 +[titan] 2025-10-04 23:30:40,438 - root - INFO - lr: 4.9917e-05 gnorm: 1.35 [ 0:56:32<1 day, 0:26:18] +[titan] 2025-10-04 23:30:51,302 - root - INFO - step: 1490 loss: 3.4181 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4151 global_avg_mtp_loss: 3.0030 +[titan] 2025-10-04 23:30:51,303 - root - INFO - lr: 4.9916e-05 gnorm: 1.47 [ 0:56:43<1 day, 0:25:52] +[titan] 2025-10-04 23:31:02,175 - root - INFO - step: 1495 loss: 3.4587 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4192 global_avg_mtp_loss: 3.0394 +[titan] 2025-10-04 23:31:02,176 - root - INFO - lr: 4.9915e-05 gnorm: 1.30 [ 0:56:53<1 day, 0:25:27] +[titan] 2025-10-04 23:31:10,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:31:13,053 - root - INFO - step: 1500 loss: 3.4454 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4203 global_avg_mtp_loss: 3.0251 +[titan] 2025-10-04 23:31:13,053 - root - INFO - lr: 4.9914e-05 gnorm: 1.32 [ 0:57:04<1 day, 0:25:01] +[titan] 2025-10-04 23:31:23,959 - root - INFO - step: 1505 loss: 3.5094 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.4278 global_avg_mtp_loss: 3.0816 +[titan] 2025-10-04 23:31:23,959 - root - INFO - lr: 4.9913e-05 gnorm: 1.39 [ 0:57:15<1 day, 0:24:37] +[titan] 2025-10-04 23:31:34,816 - root - INFO - step: 1510 loss: 3.4203 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 2.9996 +[titan] 2025-10-04 23:31:34,816 - root - INFO - lr: 4.9913e-05 gnorm: 1.40 [ 0:57:26<1 day, 0:24:11] +[titan] 2025-10-04 23:31:45,697 - root - INFO - step: 1515 loss: 3.4819 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4246 global_avg_mtp_loss: 3.0574 +[titan] 2025-10-04 23:31:45,697 - root - INFO - lr: 4.9912e-05 gnorm: 1.42 [ 0:57:37<1 day, 0:23:46] +[titan] 2025-10-04 23:31:56,581 - root - INFO - step: 1520 loss: 3.4715 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0513 +[titan] 2025-10-04 23:31:56,581 - root - INFO - lr: 4.9911e-05 gnorm: 1.54 [ 0:57:48<1 day, 0:23:22] +[titan] 2025-10-04 23:32:07,443 - root - INFO - step: 1525 loss: 3.3887 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4123 global_avg_mtp_loss: 2.9763 +[titan] 2025-10-04 23:32:07,443 - root - INFO - lr: 4.9910e-05 gnorm: 1.50 [ 0:57:59<1 day, 0:22:56] +[titan] 2025-10-04 23:32:18,324 - root - INFO - step: 1530 loss: 3.4137 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4161 global_avg_mtp_loss: 2.9977 +[titan] 2025-10-04 23:32:18,324 - root - INFO - lr: 4.9909e-05 gnorm: 1.39 [ 0:58:10<1 day, 0:22:32] +[titan] 2025-10-04 23:32:29,266 - root - INFO - step: 1535 loss: 3.4241 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.4172 global_avg_mtp_loss: 3.0069 +[titan] 2025-10-04 23:32:29,266 - root - INFO - lr: 4.9909e-05 gnorm: 1.38 [ 0:58:20<1 day, 0:22:09] +[titan] 2025-10-04 23:32:31,607 - root - INFO - Dumping profiler traces at step 1536 +[titan] 2025-10-04 23:32:31,643 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:32:40,423 - root - INFO - step: 1540 loss: 3.4722 memory: 118.84GiB(85.28%) tps: 29,370 tflops: 407.47 mfu: 41.20% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0490 +[titan] 2025-10-04 23:32:40,423 - root - INFO - lr: 4.9908e-05 gnorm: 1.48 [ 0:58:32<1 day, 0:21:51] +[titan] 2025-10-04 23:32:51,288 - root - INFO - step: 1545 loss: 3.4793 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4234 global_avg_mtp_loss: 3.0559 +[titan] 2025-10-04 23:32:51,288 - root - INFO - lr: 4.9907e-05 gnorm: 1.37 [ 0:58:42<1 day, 0:21:26] +[titan] 2025-10-04 23:32:59,970 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:33:02,152 - root - INFO - step: 1550 loss: 3.4035 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4133 global_avg_mtp_loss: 2.9902 +[titan] 2025-10-04 23:33:02,152 - root - INFO - lr: 4.9906e-05 gnorm: 1.32 [ 0:58:53<1 day, 0:21:02] +[titan] 2025-10-04 23:33:13,032 - root - INFO - step: 1555 loss: 3.4850 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4225 global_avg_mtp_loss: 3.0625 +[titan] 2025-10-04 23:33:13,033 - root - INFO - lr: 4.9905e-05 gnorm: 1.34 [ 0:59:04<1 day, 0:20:37] +[titan] 2025-10-04 23:33:23,946 - root - INFO - step: 1560 loss: 3.5272 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4289 global_avg_mtp_loss: 3.0982 +[titan] 2025-10-04 23:33:23,946 - root - INFO - lr: 4.9905e-05 gnorm: 1.37 [ 0:59:15<1 day, 0:20:14] +[titan] 2025-10-04 23:33:34,861 - root - INFO - step: 1565 loss: 3.5253 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.4294 global_avg_mtp_loss: 3.0959 +[titan] 2025-10-04 23:33:34,861 - root - INFO - lr: 4.9904e-05 gnorm: 1.37 [ 0:59:26<1 day, 0:19:51] +[titan] 2025-10-04 23:33:45,801 - root - INFO - step: 1570 loss: 3.4320 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.4173 global_avg_mtp_loss: 3.0147 +[titan] 2025-10-04 23:33:45,801 - root - INFO - lr: 4.9903e-05 gnorm: 1.35 [ 0:59:37<1 day, 0:19:28] +[titan] 2025-10-04 23:33:56,697 - root - INFO - step: 1575 loss: 3.4044 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4154 global_avg_mtp_loss: 2.9890 +[titan] 2025-10-04 23:33:56,697 - root - INFO - lr: 4.9902e-05 gnorm: 1.32 [ 0:59:48<1 day, 0:19:05] +[titan] 2025-10-04 23:34:07,560 - root - INFO - step: 1580 loss: 3.5820 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4484 global_avg_mtp_loss: 3.1336 +[titan] 2025-10-04 23:34:07,560 - root - INFO - lr: 4.9901e-05 gnorm: 1.32 [ 0:59:59<1 day, 0:18:40] +[titan] 2025-10-04 23:34:18,478 - root - INFO - step: 1585 loss: 3.3932 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.4134 global_avg_mtp_loss: 2.9798 +[titan] 2025-10-04 23:34:18,479 - root - INFO - lr: 4.9900e-05 gnorm: 1.40 [ 1:00:10<1 day, 0:18:18] +[titan] 2025-10-04 23:34:29,342 - root - INFO - step: 1590 loss: 3.4358 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.4195 global_avg_mtp_loss: 3.0163 +[titan] 2025-10-04 23:34:29,342 - root - INFO - lr: 4.9900e-05 gnorm: 1.38 [ 1:00:21<1 day, 0:17:54] +[titan] 2025-10-04 23:34:40,218 - root - INFO - step: 1595 loss: 3.3310 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4056 global_avg_mtp_loss: 2.9254 +[titan] 2025-10-04 23:34:40,218 - root - INFO - lr: 4.9899e-05 gnorm: 1.38 [ 1:00:31<1 day, 0:17:30] +[titan] 2025-10-04 23:34:48,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:34:51,087 - root - INFO - step: 1600 loss: 3.4555 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4196 global_avg_mtp_loss: 3.0358 +[titan] 2025-10-04 23:34:51,088 - root - INFO - lr: 4.9898e-05 gnorm: 1.39 [ 1:00:42<1 day, 0:17:06] +[titan] 2025-10-04 23:35:01,992 - root - INFO - step: 1605 loss: 3.4766 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0531 +[titan] 2025-10-04 23:35:01,992 - root - INFO - lr: 4.9897e-05 gnorm: 1.33 [ 1:00:53<1 day, 0:16:43] +[titan] 2025-10-04 23:35:12,867 - root - INFO - step: 1610 loss: 3.3824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4111 global_avg_mtp_loss: 2.9713 +[titan] 2025-10-04 23:35:12,867 - root - INFO - lr: 4.9896e-05 gnorm: 1.41 [ 1:01:04<1 day, 0:16:20] +[titan] 2025-10-04 23:35:23,778 - root - INFO - step: 1615 loss: 3.4363 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.4168 global_avg_mtp_loss: 3.0195 +[titan] 2025-10-04 23:35:23,779 - root - INFO - lr: 4.9895e-05 gnorm: 1.27 [ 1:01:15<1 day, 0:15:57] +[titan] 2025-10-04 23:35:34,649 - root - INFO - step: 1620 loss: 3.3175 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.4028 global_avg_mtp_loss: 2.9147 +[titan] 2025-10-04 23:35:34,649 - root - INFO - lr: 4.9895e-05 gnorm: 1.32 [ 1:01:26<1 day, 0:15:34] +[titan] 2025-10-04 23:35:45,526 - root - INFO - step: 1625 loss: 3.3715 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4086 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:35:45,527 - root - INFO - lr: 4.9894e-05 gnorm: 1.41 [ 1:01:37<1 day, 0:15:11] +[titan] 2025-10-04 23:35:56,405 - root - INFO - step: 1630 loss: 3.3383 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4038 global_avg_mtp_loss: 2.9345 +[titan] 2025-10-04 23:35:56,405 - root - INFO - lr: 4.9893e-05 gnorm: 1.32 [ 1:01:48<1 day, 0:14:47] +[titan] 2025-10-04 23:36:07,309 - root - INFO - step: 1635 loss: 3.4176 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.16% global_avg_ntp_loss: 0.4148 global_avg_mtp_loss: 3.0028 +[titan] 2025-10-04 23:36:07,309 - root - INFO - lr: 4.9892e-05 gnorm: 1.40 [ 1:01:58<1 day, 0:14:25] +[titan] 2025-10-04 23:36:18,303 - root - INFO - step: 1640 loss: 3.3374 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.4052 global_avg_mtp_loss: 2.9322 +[titan] 2025-10-04 23:36:18,304 - root - INFO - lr: 4.9891e-05 gnorm: 1.45 [ 1:02:09<1 day, 0:14:05] +[titan] 2025-10-04 23:36:29,175 - root - INFO - step: 1645 loss: 3.4862 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.4238 global_avg_mtp_loss: 3.0624 +[titan] 2025-10-04 23:36:29,175 - root - INFO - lr: 4.9890e-05 gnorm: 1.49 [ 1:02:20<1 day, 0:13:42] +[titan] 2025-10-04 23:36:37,867 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:36:40,054 - root - INFO - step: 1650 loss: 3.2615 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8657 +[titan] 2025-10-04 23:36:40,054 - root - INFO - lr: 4.9889e-05 gnorm: 1.45 [ 1:02:31<1 day, 0:13:19] +[titan] 2025-10-04 23:36:50,937 - root - INFO - step: 1655 loss: 3.4016 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4127 global_avg_mtp_loss: 2.9889 +[titan] 2025-10-04 23:36:50,937 - root - INFO - lr: 4.9888e-05 gnorm: 1.34 [ 1:02:42<1 day, 0:12:56] +[titan] 2025-10-04 23:37:01,815 - root - INFO - step: 1660 loss: 3.3760 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4106 global_avg_mtp_loss: 2.9654 +[titan] 2025-10-04 23:37:01,815 - root - INFO - lr: 4.9888e-05 gnorm: 1.33 [ 1:02:53<1 day, 0:12:34] +[titan] 2025-10-04 23:37:12,722 - root - INFO - step: 1665 loss: 3.3861 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.4119 global_avg_mtp_loss: 2.9742 +[titan] 2025-10-04 23:37:12,722 - root - INFO - lr: 4.9887e-05 gnorm: 1.28 [ 1:03:04<1 day, 0:12:12] +[titan] 2025-10-04 23:37:23,672 - root - INFO - step: 1670 loss: 3.3993 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.20 mfu: 41.98% global_avg_ntp_loss: 0.4125 global_avg_mtp_loss: 2.9867 +[titan] 2025-10-04 23:37:23,672 - root - INFO - lr: 4.9886e-05 gnorm: 1.29 [ 1:03:15<1 day, 0:11:51] +[titan] 2025-10-04 23:37:34,543 - root - INFO - step: 1675 loss: 3.3445 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9396 +[titan] 2025-10-04 23:37:34,543 - root - INFO - lr: 4.9885e-05 gnorm: 1.45 [ 1:03:26<1 day, 0:11:28] +[titan] 2025-10-04 23:37:45,421 - root - INFO - step: 1680 loss: 3.4052 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4135 global_avg_mtp_loss: 2.9917 +[titan] 2025-10-04 23:37:45,421 - root - INFO - lr: 4.9884e-05 gnorm: 1.41 [ 1:03:37<1 day, 0:11:06] +[titan] 2025-10-04 23:37:56,304 - root - INFO - step: 1685 loss: 3.3465 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4066 global_avg_mtp_loss: 2.9399 +[titan] 2025-10-04 23:37:56,305 - root - INFO - lr: 4.9883e-05 gnorm: 1.35 [ 1:03:47<1 day, 0:10:44] +[titan] 2025-10-04 23:38:07,165 - root - INFO - step: 1690 loss: 3.4157 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.4162 global_avg_mtp_loss: 2.9995 +[titan] 2025-10-04 23:38:07,165 - root - INFO - lr: 4.9882e-05 gnorm: 1.31 [ 1:03:58<1 day, 0:10:21] +[titan] 2025-10-04 23:38:18,032 - root - INFO - step: 1695 loss: 3.3211 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.4037 global_avg_mtp_loss: 2.9174 +[titan] 2025-10-04 23:38:18,032 - root - INFO - lr: 4.9881e-05 gnorm: 1.27 [ 1:04:09<1 day, 0:09:58] +[titan] 2025-10-04 23:38:26,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:38:28,977 - root - INFO - step: 1700 loss: 3.4333 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0154 +[titan] 2025-10-04 23:38:28,977 - root - INFO - lr: 4.9880e-05 gnorm: 1.47 [ 1:04:20<1 day, 0:09:38] +[titan] 2025-10-04 23:38:39,826 - root - INFO - step: 1705 loss: 3.3912 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.4113 global_avg_mtp_loss: 2.9799 +[titan] 2025-10-04 23:38:39,826 - root - INFO - lr: 4.9879e-05 gnorm: 1.35 [ 1:04:31<1 day, 0:09:15] +[titan] 2025-10-04 23:38:50,670 - root - INFO - step: 1710 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4158 global_avg_mtp_loss: 3.0159 +[titan] 2025-10-04 23:38:50,670 - root - INFO - lr: 4.9878e-05 gnorm: 1.41 [ 1:04:42<1 day, 0:08:52] +[titan] 2025-10-04 23:39:01,517 - root - INFO - step: 1715 loss: 3.4588 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0386 +[titan] 2025-10-04 23:39:01,517 - root - INFO - lr: 4.9877e-05 gnorm: 1.41 [ 1:04:53<1 day, 0:08:30] +[titan] 2025-10-04 23:39:12,377 - root - INFO - step: 1720 loss: 3.3718 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.4092 global_avg_mtp_loss: 2.9625 +[titan] 2025-10-04 23:39:12,377 - root - INFO - lr: 4.9877e-05 gnorm: 1.24 [ 1:05:04<1 day, 0:08:07] +[titan] 2025-10-04 23:39:23,301 - root - INFO - step: 1725 loss: 3.3446 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.4100 global_avg_mtp_loss: 2.9346 +[titan] 2025-10-04 23:39:23,301 - root - INFO - lr: 4.9876e-05 gnorm: 1.27 [ 1:05:14<1 day, 0:07:47] +[titan] 2025-10-04 23:39:34,194 - root - INFO - step: 1730 loss: 3.4582 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0367 +[titan] 2025-10-04 23:39:34,195 - root - INFO - lr: 4.9875e-05 gnorm: 1.32 [ 1:05:25<1 day, 0:07:25] +[titan] 2025-10-04 23:39:45,081 - root - INFO - step: 1735 loss: 3.4372 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4157 global_avg_mtp_loss: 3.0215 +[titan] 2025-10-04 23:39:45,081 - root - INFO - lr: 4.9874e-05 gnorm: 1.37 [ 1:05:36<1 day, 0:07:04] +[titan] 2025-10-04 23:39:55,972 - root - INFO - step: 1740 loss: 3.3532 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9463 +[titan] 2025-10-04 23:39:55,972 - root - INFO - lr: 4.9873e-05 gnorm: 1.36 [ 1:05:47<1 day, 0:06:42] +[titan] 2025-10-04 23:40:06,852 - root - INFO - step: 1745 loss: 3.3083 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4022 global_avg_mtp_loss: 2.9061 +[titan] 2025-10-04 23:40:06,853 - root - INFO - lr: 4.9872e-05 gnorm: 1.33 [ 1:05:58<1 day, 0:06:21] +[titan] 2025-10-04 23:40:15,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:40:17,731 - root - INFO - step: 1750 loss: 3.4480 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4191 global_avg_mtp_loss: 3.0289 +[titan] 2025-10-04 23:40:17,731 - root - INFO - lr: 4.9871e-05 gnorm: 1.35 [ 1:06:09<1 day, 0:05:59] +[titan] 2025-10-04 23:40:28,641 - root - INFO - step: 1755 loss: 3.3860 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4107 global_avg_mtp_loss: 2.9753 +[titan] 2025-10-04 23:40:28,641 - root - INFO - lr: 4.9870e-05 gnorm: 1.31 [ 1:06:20<1 day, 0:05:39] +[titan] 2025-10-04 23:40:39,515 - root - INFO - step: 1760 loss: 3.3596 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9525 +[titan] 2025-10-04 23:40:39,516 - root - INFO - lr: 4.9869e-05 gnorm: 1.44 [ 1:06:31<1 day, 0:05:17] +[titan] 2025-10-04 23:40:50,423 - root - INFO - step: 1765 loss: 3.2984 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3986 global_avg_mtp_loss: 2.8998 +[titan] 2025-10-04 23:40:50,423 - root - INFO - lr: 4.9868e-05 gnorm: 1.40 [ 1:06:42<1 day, 0:04:56] +[titan] 2025-10-04 23:41:01,295 - root - INFO - step: 1770 loss: 3.3670 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.4093 global_avg_mtp_loss: 2.9577 +[titan] 2025-10-04 23:41:01,295 - root - INFO - lr: 4.9867e-05 gnorm: 1.37 [ 1:06:52<1 day, 0:04:35] +[titan] 2025-10-04 23:41:12,156 - root - INFO - step: 1775 loss: 3.3745 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.4116 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:41:12,156 - root - INFO - lr: 4.9866e-05 gnorm: 1.36 [ 1:07:03<1 day, 0:04:13] +[titan] 2025-10-04 23:41:23,073 - root - INFO - step: 1780 loss: 3.2774 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:41:23,074 - root - INFO - lr: 4.9865e-05 gnorm: 1.44 [ 1:07:14<1 day, 0:03:53] +[titan] 2025-10-04 23:41:33,936 - root - INFO - step: 1785 loss: 3.3608 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9538 +[titan] 2025-10-04 23:41:33,937 - root - INFO - lr: 4.9864e-05 gnorm: 1.39 [ 1:07:25<1 day, 0:03:32] +[titan] 2025-10-04 23:41:44,812 - root - INFO - step: 1790 loss: 3.3548 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9479 +[titan] 2025-10-04 23:41:44,812 - root - INFO - lr: 4.9863e-05 gnorm: 1.46 [ 1:07:36<1 day, 0:03:11] +[titan] 2025-10-04 23:41:55,714 - root - INFO - step: 1795 loss: 3.4000 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.4121 global_avg_mtp_loss: 2.9879 +[titan] 2025-10-04 23:41:55,715 - root - INFO - lr: 4.9862e-05 gnorm: 1.53 [ 1:07:47<1 day, 0:02:50] +[titan] 2025-10-04 23:42:04,392 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:42:06,574 - root - INFO - step: 1800 loss: 3.3948 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.4124 global_avg_mtp_loss: 2.9824 +[titan] 2025-10-04 23:42:06,574 - root - INFO - lr: 4.9861e-05 gnorm: 1.37 [ 1:07:58<1 day, 0:02:29] +[titan] 2025-10-04 23:42:17,436 - root - INFO - step: 1805 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.4017 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:42:17,437 - root - INFO - lr: 4.9860e-05 gnorm: 1.29 [ 1:08:09<1 day, 0:02:08] +[titan] 2025-10-04 23:42:28,375 - root - INFO - step: 1810 loss: 3.3561 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:42:28,375 - root - INFO - lr: 4.9859e-05 gnorm: 1.39 [ 1:08:20<1 day, 0:01:48] +[titan] 2025-10-04 23:42:39,216 - root - INFO - step: 1815 loss: 3.3053 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.3995 global_avg_mtp_loss: 2.9058 +[titan] 2025-10-04 23:42:39,217 - root - INFO - lr: 4.9858e-05 gnorm: 1.34 [ 1:08:30<1 day, 0:01:27] +[titan] 2025-10-04 23:42:50,059 - root - INFO - step: 1820 loss: 3.2854 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8887 +[titan] 2025-10-04 23:42:50,059 - root - INFO - lr: 4.9857e-05 gnorm: 1.37 [ 1:08:41<1 day, 0:01:05] +[titan] 2025-10-04 23:43:00,958 - root - INFO - step: 1825 loss: 3.3393 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4035 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:00,958 - root - INFO - lr: 4.9856e-05 gnorm: 1.37 [ 1:08:52<1 day, 0:00:45] +[titan] 2025-10-04 23:43:11,802 - root - INFO - step: 1830 loss: 3.3421 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4062 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:11,802 - root - INFO - lr: 4.9855e-05 gnorm: 1.36 [ 1:09:03<1 day, 0:00:24] +[titan] 2025-10-04 23:43:22,644 - root - INFO - step: 1835 loss: 3.3492 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.4055 global_avg_mtp_loss: 2.9437 +[titan] 2025-10-04 23:43:22,645 - root - INFO - lr: 4.9854e-05 gnorm: 1.32 [ 1:09:14<1 day, 0:00:02] +[titan] 2025-10-04 23:43:33,561 - root - INFO - step: 1840 loss: 3.2612 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3942 global_avg_mtp_loss: 2.8670 +[titan] 2025-10-04 23:43:33,562 - root - INFO - lr: 4.9853e-05 gnorm: 1.27 [ 1:09:25<23:59:43] +[titan] 2025-10-04 23:43:44,438 - root - INFO - step: 1845 loss: 3.3605 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9537 +[titan] 2025-10-04 23:43:44,438 - root - INFO - lr: 4.9852e-05 gnorm: 1.27 [ 1:09:36<23:59:22] +[titan] 2025-10-04 23:43:53,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:43:55,315 - root - INFO - step: 1850 loss: 3.3556 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4063 global_avg_mtp_loss: 2.9493 +[titan] 2025-10-04 23:43:55,315 - root - INFO - lr: 4.9851e-05 gnorm: 1.32 [ 1:09:46<23:59:02] +[titan] 2025-10-04 23:44:06,182 - root - INFO - step: 1855 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4016 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:44:06,182 - root - INFO - lr: 4.9850e-05 gnorm: 1.40 [ 1:09:57<23:58:41] +[titan] 2025-10-04 23:44:17,099 - root - INFO - step: 1860 loss: 3.3782 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4085 global_avg_mtp_loss: 2.9697 +[titan] 2025-10-04 23:44:17,099 - root - INFO - lr: 4.9849e-05 gnorm: 1.35 [ 1:10:08<23:58:22] +[titan] 2025-10-04 23:44:28,008 - root - INFO - step: 1865 loss: 3.2855 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3997 global_avg_mtp_loss: 2.8857 +[titan] 2025-10-04 23:44:28,009 - root - INFO - lr: 4.9848e-05 gnorm: 1.35 [ 1:10:19<23:58:02] +[titan] 2025-10-04 23:44:38,889 - root - INFO - step: 1870 loss: 3.3023 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.9043 +[titan] 2025-10-04 23:44:38,889 - root - INFO - lr: 4.9847e-05 gnorm: 1.24 [ 1:10:30<23:57:42] +[titan] 2025-10-04 23:44:49,776 - root - INFO - step: 1875 loss: 3.3134 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.4008 global_avg_mtp_loss: 2.9126 +[titan] 2025-10-04 23:44:49,776 - root - INFO - lr: 4.9846e-05 gnorm: 1.32 [ 1:10:41<23:57:22] +[titan] 2025-10-04 23:45:00,642 - root - INFO - step: 1880 loss: 3.2097 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8207 +[titan] 2025-10-04 23:45:00,642 - root - INFO - lr: 4.9845e-05 gnorm: 1.33 [ 1:10:52<23:57:02] +[titan] 2025-10-04 23:45:11,496 - root - INFO - step: 1885 loss: 3.2568 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8621 +[titan] 2025-10-04 23:45:11,497 - root - INFO - lr: 4.9844e-05 gnorm: 1.34 [ 1:11:03<23:56:41] +[titan] 2025-10-04 23:45:22,417 - root - INFO - step: 1890 loss: 3.3180 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.4019 global_avg_mtp_loss: 2.9160 +[titan] 2025-10-04 23:45:22,417 - root - INFO - lr: 4.9843e-05 gnorm: 1.39 [ 1:11:14<23:56:22] +[titan] 2025-10-04 23:45:33,318 - root - INFO - step: 1895 loss: 3.2706 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3955 global_avg_mtp_loss: 2.8752 +[titan] 2025-10-04 23:45:33,318 - root - INFO - lr: 4.9842e-05 gnorm: 1.50 [ 1:11:24<23:56:03] +[titan] 2025-10-04 23:45:41,992 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:45:44,171 - root - INFO - step: 1900 loss: 3.2793 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8827 +[titan] 2025-10-04 23:45:44,171 - root - INFO - lr: 4.9841e-05 gnorm: 1.29 [ 1:11:35<23:55:42] +[titan] 2025-10-04 23:45:55,048 - root - INFO - step: 1905 loss: 3.3144 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4029 global_avg_mtp_loss: 2.9115 +[titan] 2025-10-04 23:45:55,048 - root - INFO - lr: 4.9840e-05 gnorm: 1.32 [ 1:11:46<23:55:22] +[titan] 2025-10-04 23:46:05,920 - root - INFO - step: 1910 loss: 3.2864 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3983 global_avg_mtp_loss: 2.8881 +[titan] 2025-10-04 23:46:05,920 - root - INFO - lr: 4.9839e-05 gnorm: 1.32 [ 1:11:57<23:55:02] +[titan] 2025-10-04 23:46:16,784 - root - INFO - step: 1915 loss: 3.2475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8558 +[titan] 2025-10-04 23:46:16,785 - root - INFO - lr: 4.9837e-05 gnorm: 1.28 [ 1:12:08<23:54:42] +[titan] 2025-10-04 23:46:27,699 - root - INFO - step: 1920 loss: 3.3007 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.11% global_avg_ntp_loss: 0.3987 global_avg_mtp_loss: 2.9020 +[titan] 2025-10-04 23:46:27,700 - root - INFO - lr: 4.9836e-05 gnorm: 1.39 [ 1:12:19<23:54:23] +[titan] 2025-10-04 23:46:38,626 - root - INFO - step: 1925 loss: 3.2659 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3951 global_avg_mtp_loss: 2.8709 +[titan] 2025-10-04 23:46:38,626 - root - INFO - lr: 4.9835e-05 gnorm: 1.32 [ 1:12:30<23:54:05] +[titan] 2025-10-04 23:46:49,497 - root - INFO - step: 1930 loss: 3.2880 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8914 +[titan] 2025-10-04 23:46:49,497 - root - INFO - lr: 4.9834e-05 gnorm: 1.31 [ 1:12:41<23:53:45] +[titan] 2025-10-04 23:47:00,373 - root - INFO - step: 1935 loss: 3.2719 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8754 +[titan] 2025-10-04 23:47:00,374 - root - INFO - lr: 4.9833e-05 gnorm: 1.33 [ 1:12:52<23:53:25] +[titan] 2025-10-04 23:47:11,263 - root - INFO - step: 1940 loss: 3.3395 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4048 global_avg_mtp_loss: 2.9347 +[titan] 2025-10-04 23:47:11,263 - root - INFO - lr: 4.9832e-05 gnorm: 1.41 [ 1:13:02<23:53:06] +[titan] 2025-10-04 23:47:22,130 - root - INFO - step: 1945 loss: 3.2947 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8976 +[titan] 2025-10-04 23:47:22,130 - root - INFO - lr: 4.9831e-05 gnorm: 1.48 [ 1:13:13<23:52:46] +[titan] 2025-10-04 23:47:30,853 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:47:33,036 - root - INFO - step: 1950 loss: 3.3613 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.4054 global_avg_mtp_loss: 2.9558 +[titan] 2025-10-04 23:47:33,037 - root - INFO - lr: 4.9830e-05 gnorm: 1.34 [ 1:13:24<23:52:27] +[titan] 2025-10-04 23:47:43,944 - root - INFO - step: 1955 loss: 3.2920 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3990 global_avg_mtp_loss: 2.8929 +[titan] 2025-10-04 23:47:43,944 - root - INFO - lr: 4.9829e-05 gnorm: 1.29 [ 1:13:35<23:52:09] +[titan] 2025-10-04 23:47:54,843 - root - INFO - step: 1960 loss: 3.2473 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3941 global_avg_mtp_loss: 2.8533 +[titan] 2025-10-04 23:47:54,843 - root - INFO - lr: 4.9828e-05 gnorm: 1.30 [ 1:13:46<23:51:50] +[titan] 2025-10-04 23:48:05,717 - root - INFO - step: 1965 loss: 3.2766 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3954 global_avg_mtp_loss: 2.8812 +[titan] 2025-10-04 23:48:05,717 - root - INFO - lr: 4.9827e-05 gnorm: 1.23 [ 1:13:57<23:51:30] +[titan] 2025-10-04 23:48:16,623 - root - INFO - step: 1970 loss: 3.2148 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3891 global_avg_mtp_loss: 2.8257 +[titan] 2025-10-04 23:48:16,623 - root - INFO - lr: 4.9825e-05 gnorm: 1.38 [ 1:14:08<23:51:12] +[titan] 2025-10-04 23:48:27,497 - root - INFO - step: 1975 loss: 3.2117 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3887 global_avg_mtp_loss: 2.8230 +[titan] 2025-10-04 23:48:27,497 - root - INFO - lr: 4.9824e-05 gnorm: 1.35 [ 1:14:19<23:50:52] +[titan] 2025-10-04 23:48:38,417 - root - INFO - step: 1980 loss: 3.3095 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.4021 global_avg_mtp_loss: 2.9075 +[titan] 2025-10-04 23:48:38,417 - root - INFO - lr: 4.9823e-05 gnorm: 1.35 [ 1:14:30<23:50:34] +[titan] 2025-10-04 23:48:49,319 - root - INFO - step: 1985 loss: 3.2797 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:48:49,319 - root - INFO - lr: 4.9822e-05 gnorm: 1.26 [ 1:14:40<23:50:15] +[titan] 2025-10-04 23:49:00,192 - root - INFO - step: 1990 loss: 3.3317 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4032 global_avg_mtp_loss: 2.9285 +[titan] 2025-10-04 23:49:00,193 - root - INFO - lr: 4.9821e-05 gnorm: 1.36 [ 1:14:51<23:49:56] +[titan] 2025-10-04 23:49:11,083 - root - INFO - step: 1995 loss: 3.2394 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-04 23:49:11,084 - root - INFO - lr: 4.9820e-05 gnorm: 1.25 [ 1:15:02<23:49:37] +[titan] 2025-10-04 23:49:19,763 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:49:21,941 - root - INFO - step: 2000 loss: 3.2905 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.3991 global_avg_mtp_loss: 2.8913 +[titan] 2025-10-04 23:49:21,941 - root - INFO - lr: 4.9819e-05 gnorm: 1.41 [ 1:15:13<23:49:18] +[titan] 2025-10-04 23:49:32,868 - root - INFO - step: 2005 loss: 3.2217 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8299 +[titan] 2025-10-04 23:49:32,868 - root - INFO - lr: 4.9818e-05 gnorm: 1.41 [ 1:15:24<23:49:00] +[titan] 2025-10-04 23:49:43,749 - root - INFO - step: 2010 loss: 3.2369 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3913 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:49:43,750 - root - INFO - lr: 4.9816e-05 gnorm: 1.33 [ 1:15:35<23:48:41] +[titan] 2025-10-04 23:49:54,661 - root - INFO - step: 2015 loss: 3.2498 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3947 global_avg_mtp_loss: 2.8551 +[titan] 2025-10-04 23:49:54,661 - root - INFO - lr: 4.9815e-05 gnorm: 1.34 [ 1:15:46<23:48:22] +[titan] 2025-10-04 23:50:05,578 - root - INFO - step: 2020 loss: 3.2711 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3939 global_avg_mtp_loss: 2.8772 +[titan] 2025-10-04 23:50:05,579 - root - INFO - lr: 4.9814e-05 gnorm: 1.36 [ 1:15:57<23:48:04] +[titan] 2025-10-04 23:50:16,459 - root - INFO - step: 2025 loss: 3.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3962 global_avg_mtp_loss: 2.8751 +[titan] 2025-10-04 23:50:16,459 - root - INFO - lr: 4.9813e-05 gnorm: 1.26 [ 1:16:08<23:47:46] +[titan] 2025-10-04 23:50:27,328 - root - INFO - step: 2030 loss: 3.2606 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3932 global_avg_mtp_loss: 2.8674 +[titan] 2025-10-04 23:50:27,329 - root - INFO - lr: 4.9812e-05 gnorm: 1.27 [ 1:16:18<23:47:27] +[titan] 2025-10-04 23:50:38,283 - root - INFO - step: 2035 loss: 3.3063 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3996 global_avg_mtp_loss: 2.9067 +[titan] 2025-10-04 23:50:38,284 - root - INFO - lr: 4.9811e-05 gnorm: 1.35 [ 1:16:29<23:47:09] +[titan] 2025-10-04 23:50:49,166 - root - INFO - step: 2040 loss: 3.1900 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3852 global_avg_mtp_loss: 2.8048 +[titan] 2025-10-04 23:50:49,166 - root - INFO - lr: 4.9810e-05 gnorm: 1.37 [ 1:16:40<23:46:51] +[titan] 2025-10-04 23:51:00,136 - root - INFO - step: 2045 loss: 3.2396 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.3910 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:00,136 - root - INFO - lr: 4.9808e-05 gnorm: 1.30 [ 1:16:51<23:46:34] +[titan] 2025-10-04 23:51:06,836 - root - INFO - Dumping profiler traces at step 2048 +[titan] 2025-10-04 23:51:06,871 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 23:51:09,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:51:11,279 - root - INFO - step: 2050 loss: 3.2428 memory: 118.84GiB(85.28%) tps: 29,407 tflops: 407.98 mfu: 41.25% global_avg_ntp_loss: 0.3943 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:11,279 - root - INFO - lr: 4.9807e-05 gnorm: 1.39 [ 1:17:02<23:46:20] +[titan] 2025-10-04 23:51:22,173 - root - INFO - step: 2055 loss: 3.3541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:51:22,173 - root - INFO - lr: 4.9806e-05 gnorm: 1.42 [ 1:17:13<23:46:02] +[titan] 2025-10-04 23:51:33,068 - root - INFO - step: 2060 loss: 3.2810 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3960 global_avg_mtp_loss: 2.8850 +[titan] 2025-10-04 23:51:33,069 - root - INFO - lr: 4.9805e-05 gnorm: 1.33 [ 1:17:24<23:45:43] +[titan] 2025-10-04 23:51:43,943 - root - INFO - step: 2065 loss: 3.2366 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:51:43,943 - root - INFO - lr: 4.9804e-05 gnorm: 1.45 [ 1:17:35<23:45:25] +[titan] 2025-10-04 23:51:54,802 - root - INFO - step: 2070 loss: 3.2400 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3911 global_avg_mtp_loss: 2.8489 +[titan] 2025-10-04 23:51:54,802 - root - INFO - lr: 4.9803e-05 gnorm: 1.37 [ 1:17:46<23:45:06] +[titan] 2025-10-04 23:52:05,671 - root - INFO - step: 2075 loss: 3.2363 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3927 global_avg_mtp_loss: 2.8436 +[titan] 2025-10-04 23:52:05,671 - root - INFO - lr: 4.9801e-05 gnorm: 1.32 [ 1:17:57<23:44:47] +[titan] 2025-10-04 23:52:16,539 - root - INFO - step: 2080 loss: 3.1819 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3837 global_avg_mtp_loss: 2.7983 +[titan] 2025-10-04 23:52:16,539 - root - INFO - lr: 4.9800e-05 gnorm: 1.25 [ 1:18:08<23:44:29] +[titan] 2025-10-04 23:52:27,458 - root - INFO - step: 2085 loss: 3.2817 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8845 +[titan] 2025-10-04 23:52:27,458 - root - INFO - lr: 4.9799e-05 gnorm: 1.31 [ 1:18:19<23:44:11] +[titan] 2025-10-04 23:52:38,351 - root - INFO - step: 2090 loss: 3.2776 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3974 global_avg_mtp_loss: 2.8802 +[titan] 2025-10-04 23:52:38,351 - root - INFO - lr: 4.9798e-05 gnorm: 1.27 [ 1:18:29<23:43:53] +[titan] 2025-10-04 23:52:49,245 - root - INFO - step: 2095 loss: 3.2401 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8479 +[titan] 2025-10-04 23:52:49,245 - root - INFO - lr: 4.9797e-05 gnorm: 1.35 [ 1:18:40<23:43:35] +[titan] 2025-10-04 23:52:57,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:53:00,142 - root - INFO - step: 2100 loss: 3.1666 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3827 global_avg_mtp_loss: 2.7839 +[titan] 2025-10-04 23:53:00,142 - root - INFO - lr: 4.9795e-05 gnorm: 1.31 [ 1:18:51<23:43:17] +[titan] 2025-10-04 23:53:11,021 - root - INFO - step: 2105 loss: 3.1171 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3762 global_avg_mtp_loss: 2.7409 +[titan] 2025-10-04 23:53:11,021 - root - INFO - lr: 4.9794e-05 gnorm: 1.45 [ 1:19:02<23:42:59] +[titan] 2025-10-04 23:53:21,893 - root - INFO - step: 2110 loss: 3.2816 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3984 global_avg_mtp_loss: 2.8833 +[titan] 2025-10-04 23:53:21,894 - root - INFO - lr: 4.9793e-05 gnorm: 1.35 [ 1:19:13<23:42:40] +[titan] 2025-10-04 23:53:32,852 - root - INFO - step: 2115 loss: 3.2607 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8663 +[titan] 2025-10-04 23:53:32,852 - root - INFO - lr: 4.9792e-05 gnorm: 1.27 [ 1:19:24<23:42:24] +[titan] 2025-10-04 23:53:43,730 - root - INFO - step: 2120 loss: 3.2629 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8683 +[titan] 2025-10-04 23:53:43,730 - root - INFO - lr: 4.9791e-05 gnorm: 1.25 [ 1:19:35<23:42:05] +[titan] 2025-10-04 23:53:54,620 - root - INFO - step: 2125 loss: 3.0920 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3725 global_avg_mtp_loss: 2.7195 +[titan] 2025-10-04 23:53:54,620 - root - INFO - lr: 4.9789e-05 gnorm: 1.37 [ 1:19:46<23:41:47] +[titan] 2025-10-04 23:54:05,508 - root - INFO - step: 2130 loss: 3.2038 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8165 +[titan] 2025-10-04 23:54:05,508 - root - INFO - lr: 4.9788e-05 gnorm: 1.28 [ 1:19:57<23:41:30] +[titan] 2025-10-04 23:54:16,404 - root - INFO - step: 2135 loss: 3.1616 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.3810 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-04 23:54:16,404 - root - INFO - lr: 4.9787e-05 gnorm: 1.27 [ 1:20:08<23:41:12] +[titan] 2025-10-04 23:54:27,282 - root - INFO - step: 2140 loss: 3.1455 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3792 global_avg_mtp_loss: 2.7663 +[titan] 2025-10-04 23:54:27,282 - root - INFO - lr: 4.9786e-05 gnorm: 1.36 [ 1:20:18<23:40:54] +[titan] 2025-10-04 23:54:38,216 - root - INFO - step: 2145 loss: 3.1443 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7655 +[titan] 2025-10-04 23:54:38,216 - root - INFO - lr: 4.9785e-05 gnorm: 1.24 [ 1:20:29<23:40:37] +[titan] 2025-10-04 23:54:46,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:54:49,106 - root - INFO - step: 2150 loss: 3.2432 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3921 global_avg_mtp_loss: 2.8511 +[titan] 2025-10-04 23:54:49,106 - root - INFO - lr: 4.9783e-05 gnorm: 1.23 [ 1:20:40<23:40:19] +[titan] 2025-10-04 23:54:59,985 - root - INFO - step: 2155 loss: 3.1416 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7620 +[titan] 2025-10-04 23:54:59,985 - root - INFO - lr: 4.9782e-05 gnorm: 1.23 [ 1:20:51<23:40:01] +[titan] 2025-10-04 23:55:10,860 - root - INFO - step: 2160 loss: 3.1386 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7597 +[titan] 2025-10-04 23:55:10,860 - root - INFO - lr: 4.9781e-05 gnorm: 1.27 [ 1:21:02<23:39:43] +[titan] 2025-10-04 23:55:21,730 - root - INFO - step: 2165 loss: 3.2482 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8559 +[titan] 2025-10-04 23:55:21,730 - root - INFO - lr: 4.9780e-05 gnorm: 1.29 [ 1:21:13<23:39:25] +[titan] 2025-10-04 23:55:32,617 - root - INFO - step: 2170 loss: 3.2349 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3914 global_avg_mtp_loss: 2.8435 +[titan] 2025-10-04 23:55:32,618 - root - INFO - lr: 4.9778e-05 gnorm: 1.22 [ 1:21:24<23:39:08] +[titan] 2025-10-04 23:55:43,541 - root - INFO - step: 2175 loss: 3.2325 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.3901 global_avg_mtp_loss: 2.8424 +[titan] 2025-10-04 23:55:43,542 - root - INFO - lr: 4.9777e-05 gnorm: 1.32 [ 1:21:35<23:38:51] +[titan] 2025-10-04 23:55:54,482 - root - INFO - step: 2180 loss: 3.1551 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7749 +[titan] 2025-10-04 23:55:54,482 - root - INFO - lr: 4.9776e-05 gnorm: 1.29 [ 1:21:46<23:38:34] +[titan] 2025-10-04 23:56:05,357 - root - INFO - step: 2185 loss: 3.2187 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3882 global_avg_mtp_loss: 2.8305 +[titan] 2025-10-04 23:56:05,357 - root - INFO - lr: 4.9775e-05 gnorm: 1.37 [ 1:21:56<23:38:16] +[titan] 2025-10-04 23:56:16,252 - root - INFO - step: 2190 loss: 3.1722 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7900 +[titan] 2025-10-04 23:56:16,253 - root - INFO - lr: 4.9773e-05 gnorm: 1.44 [ 1:22:07<23:37:59] +[titan] 2025-10-04 23:56:27,132 - root - INFO - step: 2195 loss: 3.1685 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3823 global_avg_mtp_loss: 2.7862 +[titan] 2025-10-04 23:56:27,132 - root - INFO - lr: 4.9772e-05 gnorm: 1.33 [ 1:22:18<23:37:41] +[titan] 2025-10-04 23:56:35,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:56:38,038 - root - INFO - step: 2200 loss: 3.1985 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3856 global_avg_mtp_loss: 2.8129 +[titan] 2025-10-04 23:56:38,038 - root - INFO - lr: 4.9771e-05 gnorm: 1.31 [ 1:22:29<23:37:24] +[titan] 2025-10-04 23:56:48,913 - root - INFO - step: 2205 loss: 3.2059 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.8192 +[titan] 2025-10-04 23:56:48,913 - root - INFO - lr: 4.9769e-05 gnorm: 1.26 [ 1:22:40<23:37:06] +[titan] 2025-10-04 23:56:59,840 - root - INFO - step: 2210 loss: 3.1541 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7729 +[titan] 2025-10-04 23:56:59,840 - root - INFO - lr: 4.9768e-05 gnorm: 1.42 [ 1:22:51<23:36:49] +[titan] 2025-10-04 23:57:10,737 - root - INFO - step: 2215 loss: 3.2356 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3903 global_avg_mtp_loss: 2.8453 +[titan] 2025-10-04 23:57:10,737 - root - INFO - lr: 4.9767e-05 gnorm: 1.51 [ 1:23:02<23:36:32] +[titan] 2025-10-04 23:57:21,630 - root - INFO - step: 2220 loss: 3.1859 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.7992 +[titan] 2025-10-04 23:57:21,630 - root - INFO - lr: 4.9766e-05 gnorm: 1.39 [ 1:23:13<23:36:15] +[titan] 2025-10-04 23:57:32,532 - root - INFO - step: 2225 loss: 3.1779 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3842 global_avg_mtp_loss: 2.7936 +[titan] 2025-10-04 23:57:32,532 - root - INFO - lr: 4.9764e-05 gnorm: 1.24 [ 1:23:24<23:35:58] +[titan] 2025-10-04 23:57:43,450 - root - INFO - step: 2230 loss: 3.2176 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3895 global_avg_mtp_loss: 2.8282 +[titan] 2025-10-04 23:57:43,450 - root - INFO - lr: 4.9763e-05 gnorm: 1.28 [ 1:23:35<23:35:41] +[titan] 2025-10-04 23:57:54,366 - root - INFO - step: 2235 loss: 3.2212 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3879 global_avg_mtp_loss: 2.8333 +[titan] 2025-10-04 23:57:54,366 - root - INFO - lr: 4.9762e-05 gnorm: 1.35 [ 1:23:45<23:35:24] +[titan] 2025-10-04 23:58:05,251 - root - INFO - step: 2240 loss: 3.2781 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8824 +[titan] 2025-10-04 23:58:05,252 - root - INFO - lr: 4.9760e-05 gnorm: 1.39 [ 1:23:56<23:35:07] +[titan] 2025-10-04 23:58:16,173 - root - INFO - step: 2245 loss: 3.1710 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7885 +[titan] 2025-10-04 23:58:16,174 - root - INFO - lr: 4.9759e-05 gnorm: 1.30 [ 1:24:07<23:34:50] +[titan] 2025-10-04 23:58:24,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:58:27,069 - root - INFO - step: 2250 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7772 +[titan] 2025-10-04 23:58:27,069 - root - INFO - lr: 4.9758e-05 gnorm: 1.33 [ 1:24:18<23:34:33] +[titan] 2025-10-04 23:58:37,973 - root - INFO - step: 2255 loss: 3.2917 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3969 global_avg_mtp_loss: 2.8947 +[titan] 2025-10-04 23:58:37,973 - root - INFO - lr: 4.9757e-05 gnorm: 1.34 [ 1:24:29<23:34:16] +[titan] 2025-10-04 23:58:48,849 - root - INFO - step: 2260 loss: 3.1742 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3832 global_avg_mtp_loss: 2.7911 +[titan] 2025-10-04 23:58:48,849 - root - INFO - lr: 4.9755e-05 gnorm: 1.32 [ 1:24:40<23:33:59] +[titan] 2025-10-04 23:58:59,727 - root - INFO - step: 2265 loss: 3.1716 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3830 global_avg_mtp_loss: 2.7886 +[titan] 2025-10-04 23:58:59,727 - root - INFO - lr: 4.9754e-05 gnorm: 1.31 [ 1:24:51<23:33:42] +[titan] 2025-10-04 23:59:10,618 - root - INFO - step: 2270 loss: 3.2242 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8352 +[titan] 2025-10-04 23:59:10,618 - root - INFO - lr: 4.9753e-05 gnorm: 1.31 [ 1:25:02<23:33:25] +[titan] 2025-10-04 23:59:21,547 - root - INFO - step: 2275 loss: 3.2006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3858 global_avg_mtp_loss: 2.8148 +[titan] 2025-10-04 23:59:21,547 - root - INFO - lr: 4.9751e-05 gnorm: 1.30 [ 1:25:13<23:33:08] +[titan] 2025-10-04 23:59:32,439 - root - INFO - step: 2280 loss: 3.1251 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7491 +[titan] 2025-10-04 23:59:32,439 - root - INFO - lr: 4.9750e-05 gnorm: 1.25 [ 1:25:24<23:32:51] +[titan] 2025-10-04 23:59:43,315 - root - INFO - step: 2285 loss: 3.1971 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3843 global_avg_mtp_loss: 2.8128 +[titan] 2025-10-04 23:59:43,315 - root - INFO - lr: 4.9749e-05 gnorm: 1.24 [ 1:25:34<23:32:34] +[titan] 2025-10-04 23:59:54,211 - root - INFO - step: 2290 loss: 3.1138 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3754 global_avg_mtp_loss: 2.7384 +[titan] 2025-10-04 23:59:54,211 - root - INFO - lr: 4.9747e-05 gnorm: 1.30 [ 1:25:45<23:32:17] +[titan] 2025-10-05 00:00:05,066 - root - INFO - step: 2295 loss: 3.1381 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3802 global_avg_mtp_loss: 2.7579 +[titan] 2025-10-05 00:00:05,067 - root - INFO - lr: 4.9746e-05 gnorm: 1.34 [ 1:25:56<23:32:00] +[titan] 2025-10-05 00:00:13,756 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:00:15,940 - root - INFO - step: 2300 loss: 3.1684 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7859 +[titan] 2025-10-05 00:00:15,940 - root - INFO - lr: 4.9745e-05 gnorm: 1.25 [ 1:26:07<23:31:43] +[titan] 2025-10-05 00:00:26,871 - root - INFO - step: 2305 loss: 3.1673 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3819 global_avg_mtp_loss: 2.7853 +[titan] 2025-10-05 00:00:26,871 - root - INFO - lr: 4.9743e-05 gnorm: 1.22 [ 1:26:18<23:31:26] +[titan] 2025-10-05 00:00:37,762 - root - INFO - step: 2310 loss: 3.1531 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7719 +[titan] 2025-10-05 00:00:37,762 - root - INFO - lr: 4.9742e-05 gnorm: 1.30 [ 1:26:29<23:31:10] +[titan] 2025-10-05 00:00:48,669 - root - INFO - step: 2315 loss: 3.1583 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3795 global_avg_mtp_loss: 2.7788 +[titan] 2025-10-05 00:00:48,669 - root - INFO - lr: 4.9741e-05 gnorm: 1.22 [ 1:26:40<23:30:53] +[titan] 2025-10-05 00:00:59,522 - root - INFO - step: 2320 loss: 3.1995 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.3851 global_avg_mtp_loss: 2.8144 +[titan] 2025-10-05 00:00:59,522 - root - INFO - lr: 4.9739e-05 gnorm: 1.29 [ 1:26:51<23:30:36] +[titan] 2025-10-05 00:01:10,409 - root - INFO - step: 2325 loss: 3.1550 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3800 global_avg_mtp_loss: 2.7750 +[titan] 2025-10-05 00:01:10,409 - root - INFO - lr: 4.9738e-05 gnorm: 1.29 [ 1:27:02<23:30:19] +[titan] 2025-10-05 00:01:21,286 - root - INFO - step: 2330 loss: 3.1042 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3734 global_avg_mtp_loss: 2.7308 +[titan] 2025-10-05 00:01:21,286 - root - INFO - lr: 4.9737e-05 gnorm: 1.25 [ 1:27:12<23:30:02] +[titan] 2025-10-05 00:01:32,170 - root - INFO - step: 2335 loss: 3.1428 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.3775 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:01:32,170 - root - INFO - lr: 4.9735e-05 gnorm: 1.24 [ 1:27:23<23:29:45] +[titan] 2025-10-05 00:01:43,255 - root - INFO - step: 2340 loss: 3.2357 memory: 118.84GiB(85.28%) tps: 29,561 tflops: 410.12 mfu: 41.47% global_avg_ntp_loss: 0.3959 global_avg_mtp_loss: 2.8398 +[titan] 2025-10-05 00:01:43,255 - root - INFO - lr: 4.9734e-05 gnorm: 1.31 [ 1:27:34<23:29:31] +[titan] 2025-10-05 00:01:54,139 - root - INFO - step: 2345 loss: 3.2594 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3938 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:01:54,139 - root - INFO - lr: 4.9732e-05 gnorm: 1.30 [ 1:27:45<23:29:15] +[titan] 2025-10-05 00:02:02,828 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:02:05,010 - root - INFO - step: 2350 loss: 3.1385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3798 global_avg_mtp_loss: 2.7587 +[titan] 2025-10-05 00:02:05,010 - root - INFO - lr: 4.9731e-05 gnorm: 1.30 [ 1:27:56<23:28:58] +[titan] 2025-10-05 00:02:15,898 - root - INFO - step: 2355 loss: 3.1702 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3826 global_avg_mtp_loss: 2.7876 +[titan] 2025-10-05 00:02:15,898 - root - INFO - lr: 4.9730e-05 gnorm: 1.32 [ 1:28:07<23:28:41] +[titan] 2025-10-05 00:02:26,769 - root - INFO - step: 2360 loss: 3.1893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8043 +[titan] 2025-10-05 00:02:26,769 - root - INFO - lr: 4.9728e-05 gnorm: 1.43 [ 1:28:18<23:28:24] +[titan] 2025-10-05 00:02:37,640 - root - INFO - step: 2365 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7481 +[titan] 2025-10-05 00:02:37,640 - root - INFO - lr: 4.9727e-05 gnorm: 1.39 [ 1:28:29<23:28:07] +[titan] 2025-10-05 00:02:48,598 - root - INFO - step: 2370 loss: 3.1988 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3869 global_avg_mtp_loss: 2.8118 +[titan] 2025-10-05 00:02:48,598 - root - INFO - lr: 4.9726e-05 gnorm: 1.28 [ 1:28:40<23:27:52] +[titan] 2025-10-05 00:02:59,464 - root - INFO - step: 2375 loss: 3.1613 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3814 global_avg_mtp_loss: 2.7799 +[titan] 2025-10-05 00:02:59,464 - root - INFO - lr: 4.9724e-05 gnorm: 1.31 [ 1:28:51<23:27:35] +[titan] 2025-10-05 00:03:10,332 - root - INFO - step: 2380 loss: 3.2049 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3860 global_avg_mtp_loss: 2.8189 +[titan] 2025-10-05 00:03:10,332 - root - INFO - lr: 4.9723e-05 gnorm: 1.34 [ 1:29:01<23:27:18] +[titan] 2025-10-05 00:03:21,196 - root - INFO - step: 2385 loss: 3.1936 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3835 global_avg_mtp_loss: 2.8101 +[titan] 2025-10-05 00:03:21,196 - root - INFO - lr: 4.9721e-05 gnorm: 1.30 [ 1:29:12<23:27:01] +[titan] 2025-10-05 00:03:32,051 - root - INFO - step: 2390 loss: 3.2440 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3919 global_avg_mtp_loss: 2.8521 +[titan] 2025-10-05 00:03:32,051 - root - INFO - lr: 4.9720e-05 gnorm: 1.33 [ 1:29:23<23:26:44] +[titan] 2025-10-05 00:03:42,978 - root - INFO - step: 2395 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3771 global_avg_mtp_loss: 2.7475 +[titan] 2025-10-05 00:03:42,978 - root - INFO - lr: 4.9719e-05 gnorm: 1.31 [ 1:29:34<23:26:28] +[titan] 2025-10-05 00:03:51,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:03:53,851 - root - INFO - step: 2400 loss: 3.2662 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8718 +[titan] 2025-10-05 00:03:53,851 - root - INFO - lr: 4.9717e-05 gnorm: 1.40 [ 1:29:45<23:26:12] +[titan] 2025-10-05 00:04:04,749 - root - INFO - step: 2405 loss: 3.2406 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-05 00:04:04,749 - root - INFO - lr: 4.9716e-05 gnorm: 1.38 [ 1:29:56<23:25:55] +[titan] 2025-10-05 00:04:15,630 - root - INFO - step: 2410 loss: 3.1271 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7504 +[titan] 2025-10-05 00:04:15,630 - root - INFO - lr: 4.9714e-05 gnorm: 1.27 [ 1:30:07<23:25:39] +[titan] 2025-10-05 00:04:26,491 - root - INFO - step: 2415 loss: 3.1402 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3779 global_avg_mtp_loss: 2.7623 +[titan] 2025-10-05 00:04:26,491 - root - INFO - lr: 4.9713e-05 gnorm: 1.39 [ 1:30:18<23:25:22] +[titan] 2025-10-05 00:04:37,350 - root - INFO - step: 2420 loss: 3.1746 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7924 +[titan] 2025-10-05 00:04:37,350 - root - INFO - lr: 4.9711e-05 gnorm: 1.45 [ 1:30:28<23:25:05] +[titan] 2025-10-05 00:04:48,268 - root - INFO - step: 2425 loss: 3.1765 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3831 global_avg_mtp_loss: 2.7934 +[titan] 2025-10-05 00:04:48,269 - root - INFO - lr: 4.9710e-05 gnorm: 1.42 [ 1:30:39<23:24:50] +[titan] 2025-10-05 00:04:59,129 - root - INFO - step: 2430 loss: 3.2456 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8539 +[titan] 2025-10-05 00:04:59,129 - root - INFO - lr: 4.9709e-05 gnorm: 1.29 [ 1:30:50<23:24:33] +[titan] 2025-10-05 00:05:10,040 - root - INFO - step: 2435 loss: 3.0885 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3728 global_avg_mtp_loss: 2.7157 +[titan] 2025-10-05 00:05:10,040 - root - INFO - lr: 4.9707e-05 gnorm: 1.30 [ 1:31:01<23:24:17] +[titan] 2025-10-05 00:05:20,901 - root - INFO - step: 2440 loss: 3.1883 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3839 global_avg_mtp_loss: 2.8044 +[titan] 2025-10-05 00:05:20,901 - root - INFO - lr: 4.9706e-05 gnorm: 1.29 [ 1:31:12<23:24:00] +[titan] 2025-10-05 00:05:31,767 - root - INFO - step: 2445 loss: 3.1123 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3742 global_avg_mtp_loss: 2.7381 +[titan] 2025-10-05 00:05:31,767 - root - INFO - lr: 4.9704e-05 gnorm: 1.28 [ 1:31:23<23:23:44] +[titan] 2025-10-05 00:05:40,461 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:05:42,650 - root - INFO - step: 2450 loss: 3.1786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3845 global_avg_mtp_loss: 2.7941 +[titan] 2025-10-05 00:05:42,650 - root - INFO - lr: 4.9703e-05 gnorm: 1.27 [ 1:31:34<23:23:27] +[titan] 2025-10-05 00:05:53,573 - root - INFO - step: 2455 loss: 3.1398 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3790 global_avg_mtp_loss: 2.7608 +[titan] 2025-10-05 00:05:53,573 - root - INFO - lr: 4.9701e-05 gnorm: 1.27 [ 1:31:45<23:23:12] +[titan] 2025-10-05 00:06:04,454 - root - INFO - step: 2460 loss: 3.2308 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8350 +[titan] 2025-10-05 00:06:04,455 - root - INFO - lr: 4.9700e-05 gnorm: 2.69 [ 1:31:56<23:22:55] +[titan] 2025-10-05 00:06:15,398 - root - INFO - step: 2465 loss: 3.1213 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7452 +[titan] 2025-10-05 00:06:15,398 - root - INFO - lr: 4.9698e-05 gnorm: 1.28 [ 1:32:07<23:22:40] +[titan] 2025-10-05 00:06:26,299 - root - INFO - step: 2470 loss: 3.1059 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3749 global_avg_mtp_loss: 2.7310 +[titan] 2025-10-05 00:06:26,299 - root - INFO - lr: 4.9697e-05 gnorm: 1.29 [ 1:32:17<23:22:24] +[titan] 2025-10-05 00:06:37,192 - root - INFO - step: 2475 loss: 3.1051 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3745 global_avg_mtp_loss: 2.7306 +[titan] 2025-10-05 00:06:37,192 - root - INFO - lr: 4.9696e-05 gnorm: 1.31 [ 1:32:28<23:22:08] +[titan] 2025-10-05 00:06:48,155 - root - INFO - step: 2480 loss: 3.1093 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.69 mfu: 41.93% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7359 +[titan] 2025-10-05 00:06:48,155 - root - INFO - lr: 4.9694e-05 gnorm: 1.32 [ 1:32:39<23:21:53] +[titan] 2025-10-05 00:06:59,038 - root - INFO - step: 2485 loss: 3.1283 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3767 global_avg_mtp_loss: 2.7516 +[titan] 2025-10-05 00:06:59,038 - root - INFO - lr: 4.9693e-05 gnorm: 1.34 [ 1:32:50<23:21:37] +[titan] 2025-10-05 00:07:09,901 - root - INFO - step: 2490 loss: 3.1376 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7607 +[titan] 2025-10-05 00:07:09,901 - root - INFO - lr: 4.9691e-05 gnorm: 1.34 [ 1:33:01<23:21:21] +[titan] 2025-10-05 00:07:20,803 - root - INFO - step: 2495 loss: 3.1543 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7736 +[titan] 2025-10-05 00:07:20,803 - root - INFO - lr: 4.9690e-05 gnorm: 1.36 [ 1:33:12<23:21:05] +[titan] 2025-10-05 00:07:29,527 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:07:31,706 - root - INFO - step: 2500 loss: 3.1575 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7787 +[titan] 2025-10-05 00:07:31,706 - root - INFO - lr: 4.9688e-05 gnorm: 1.31 [ 1:33:23<23:20:49] +[titan] 2025-10-05 00:07:42,568 - root - INFO - step: 2505 loss: 3.1325 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3760 global_avg_mtp_loss: 2.7566 +[titan] 2025-10-05 00:07:42,568 - root - INFO - lr: 4.9687e-05 gnorm: 1.22 [ 1:33:34<23:20:33] +[titan] 2025-10-05 00:07:53,496 - root - INFO - step: 2510 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.3718 global_avg_mtp_loss: 2.7142 +[titan] 2025-10-05 00:07:53,497 - root - INFO - lr: 4.9685e-05 gnorm: 1.31 [ 1:33:45<23:20:17] +[titan] 2025-10-05 00:08:04,378 - root - INFO - step: 2515 loss: 3.2003 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3862 global_avg_mtp_loss: 2.8141 +[titan] 2025-10-05 00:08:04,378 - root - INFO - lr: 4.9684e-05 gnorm: 1.43 [ 1:33:55<23:20:01] +[titan] 2025-10-05 00:08:15,255 - root - INFO - step: 2520 loss: 3.1816 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3847 global_avg_mtp_loss: 2.7969 +[titan] 2025-10-05 00:08:15,255 - root - INFO - lr: 4.9682e-05 gnorm: 1.38 [ 1:34:06<23:19:45] +[titan] 2025-10-05 00:08:26,136 - root - INFO - step: 2525 loss: 3.2579 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:08:26,136 - root - INFO - lr: 4.9681e-05 gnorm: 1.37 [ 1:34:17<23:19:29] +[titan] 2025-10-05 00:08:37,049 - root - INFO - step: 2530 loss: 3.1078 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7346 +[titan] 2025-10-05 00:08:37,049 - root - INFO - lr: 4.9679e-05 gnorm: 1.28 [ 1:34:28<23:19:14] +[titan] 2025-10-05 00:08:48,046 - root - INFO - step: 2535 loss: 3.0953 memory: 118.84GiB(85.28%) tps: 29,797 tflops: 413.39 mfu: 41.80% global_avg_ntp_loss: 0.3719 global_avg_mtp_loss: 2.7233 +[titan] 2025-10-05 00:08:48,047 - root - INFO - lr: 4.9678e-05 gnorm: 1.25 [ 1:34:39<23:18:59] +[titan] 2025-10-05 00:08:58,919 - root - INFO - step: 2540 loss: 3.1620 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3803 global_avg_mtp_loss: 2.7817 +[titan] 2025-10-05 00:08:58,919 - root - INFO - lr: 4.9676e-05 gnorm: 1.26 [ 1:34:50<23:18:43] +[titan] 2025-10-05 00:09:09,786 - root - INFO - step: 2545 loss: 3.1667 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3815 global_avg_mtp_loss: 2.7852 +[titan] 2025-10-05 00:09:09,786 - root - INFO - lr: 4.9675e-05 gnorm: 1.40 [ 1:35:01<23:18:27] +[titan] 2025-10-05 00:09:18,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:09:20,677 - root - INFO - step: 2550 loss: 3.0790 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3704 global_avg_mtp_loss: 2.7086 +[titan] 2025-10-05 00:09:20,677 - root - INFO - lr: 4.9673e-05 gnorm: 1.34 [ 1:35:12<23:18:12] +[titan] 2025-10-05 00:09:31,556 - root - INFO - step: 2555 loss: 3.0389 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3662 global_avg_mtp_loss: 2.6727 +[titan] 2025-10-05 00:09:31,557 - root - INFO - lr: 4.9672e-05 gnorm: 1.31 [ 1:35:23<23:17:56] +[titan] 2025-10-05 00:09:42,516 - root - INFO - step: 2560 loss: 3.1285 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.3755 global_avg_mtp_loss: 2.7530 +[titan] 2025-10-05 00:09:42,516 - root - INFO - lr: 4.9670e-05 gnorm: 1.23 [ 1:35:34<23:17:41] +[titan] 2025-10-05 00:09:42,686 - root - INFO - Dumping profiler traces at step 2560 +[titan] 2025-10-05 00:09:42,722 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:09:53,687 - root - INFO - step: 2565 loss: 3.0635 memory: 118.84GiB(85.28%) tps: 29,334 tflops: 406.97 mfu: 41.15% global_avg_ntp_loss: 0.3659 global_avg_mtp_loss: 2.6976 +[titan] 2025-10-05 00:09:53,687 - root - INFO - lr: 4.9669e-05 gnorm: 1.33 [ 1:35:45<23:17:29] +[titan] 2025-10-05 00:10:04,566 - root - INFO - step: 2570 loss: 3.0420 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6772 +[titan] 2025-10-05 00:10:04,566 - root - INFO - lr: 4.9667e-05 gnorm: 1.29 [ 1:35:56<23:17:13] +[titan] 2025-10-05 00:10:15,470 - root - INFO - step: 2575 loss: 3.2085 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8213 +[titan] 2025-10-05 00:10:15,471 - root - INFO - lr: 4.9666e-05 gnorm: 1.30 [ 1:36:07<23:16:58] +[titan] 2025-10-05 00:10:26,384 - root - INFO - step: 2580 loss: 3.2105 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3855 global_avg_mtp_loss: 2.8250 +[titan] 2025-10-05 00:10:26,384 - root - INFO - lr: 4.9664e-05 gnorm: 1.29 [ 1:36:17<23:16:43] +[titan] 2025-10-05 00:10:37,260 - root - INFO - step: 2585 loss: 3.0856 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3698 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:10:37,261 - root - INFO - lr: 4.9663e-05 gnorm: 1.30 [ 1:36:28<23:16:27] +[titan] 2025-10-05 00:10:48,212 - root - INFO - step: 2590 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.3650 global_avg_mtp_loss: 2.6717 +[titan] 2025-10-05 00:10:48,212 - root - INFO - lr: 4.9661e-05 gnorm: 1.26 [ 1:36:39<23:16:12] +[titan] 2025-10-05 00:10:59,142 - root - INFO - step: 2595 loss: 3.1492 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.3799 global_avg_mtp_loss: 2.7693 +[titan] 2025-10-05 00:10:59,142 - root - INFO - lr: 4.9659e-05 gnorm: 1.24 [ 1:36:50<23:15:57] +[titan] 2025-10-05 00:11:07,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:11:10,032 - root - INFO - step: 2600 loss: 3.0911 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3727 global_avg_mtp_loss: 2.7185 +[titan] 2025-10-05 00:11:10,033 - root - INFO - lr: 4.9658e-05 gnorm: 1.22 [ 1:37:01<23:15:41] +[titan] 2025-10-05 00:11:20,915 - root - INFO - step: 2605 loss: 3.1578 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3808 global_avg_mtp_loss: 2.7770 +[titan] 2025-10-05 00:11:20,915 - root - INFO - lr: 4.9656e-05 gnorm: 1.26 [ 1:37:12<23:15:26] +[titan] 2025-10-05 00:11:31,815 - root - INFO - step: 2610 loss: 3.1088 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3741 global_avg_mtp_loss: 2.7348 +[titan] 2025-10-05 00:11:31,815 - root - INFO - lr: 4.9655e-05 gnorm: 1.25 [ 1:37:23<23:15:10] +[titan] 2025-10-05 00:11:42,699 - root - INFO - step: 2615 loss: 3.1165 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7413 +[titan] 2025-10-05 00:11:42,699 - root - INFO - lr: 4.9653e-05 gnorm: 1.30 [ 1:37:34<23:14:55] +[titan] 2025-10-05 00:11:53,594 - root - INFO - step: 2620 loss: 3.1397 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7617 +[titan] 2025-10-05 00:11:53,594 - root - INFO - lr: 4.9652e-05 gnorm: 1.27 [ 1:37:45<23:14:39] +[titan] 2025-10-05 00:12:04,505 - root - INFO - step: 2625 loss: 3.1215 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7463 +[titan] 2025-10-05 00:12:04,505 - root - INFO - lr: 4.9650e-05 gnorm: 1.33 [ 1:37:56<23:14:24] +[titan] 2025-10-05 00:12:15,389 - root - INFO - step: 2630 loss: 3.1525 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7737 +[titan] 2025-10-05 00:12:15,390 - root - INFO - lr: 4.9649e-05 gnorm: 1.27 [ 1:38:06<23:14:08] +[titan] 2025-10-05 00:12:26,270 - root - INFO - step: 2635 loss: 3.1176 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3751 global_avg_mtp_loss: 2.7424 +[titan] 2025-10-05 00:12:26,271 - root - INFO - lr: 4.9647e-05 gnorm: 1.30 [ 1:38:17<23:13:53] +[titan] 2025-10-05 00:12:37,153 - root - INFO - step: 2640 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6977 +[titan] 2025-10-05 00:12:37,153 - root - INFO - lr: 4.9645e-05 gnorm: 1.28 [ 1:38:28<23:13:37] +[titan] 2025-10-05 00:12:48,055 - root - INFO - step: 2645 loss: 3.1119 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7387 +[titan] 2025-10-05 00:12:48,055 - root - INFO - lr: 4.9644e-05 gnorm: 1.30 [ 1:38:39<23:13:22] +[titan] 2025-10-05 00:12:56,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:12:58,983 - root - INFO - step: 2650 loss: 3.0548 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6872 +[titan] 2025-10-05 00:12:58,983 - root - INFO - lr: 4.9642e-05 gnorm: 1.23 [ 1:38:50<23:13:07] +[titan] 2025-10-05 00:13:09,879 - root - INFO - step: 2655 loss: 3.0496 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3671 global_avg_mtp_loss: 2.6826 +[titan] 2025-10-05 00:13:09,879 - root - INFO - lr: 4.9641e-05 gnorm: 1.28 [ 1:39:01<23:12:52] +[titan] 2025-10-05 00:13:20,805 - root - INFO - step: 2660 loss: 3.1186 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.3759 global_avg_mtp_loss: 2.7427 +[titan] 2025-10-05 00:13:20,805 - root - INFO - lr: 4.9639e-05 gnorm: 1.25 [ 1:39:12<23:12:37] +[titan] 2025-10-05 00:13:31,679 - root - INFO - step: 2665 loss: 3.0573 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3678 global_avg_mtp_loss: 2.6895 +[titan] 2025-10-05 00:13:31,680 - root - INFO - lr: 4.9637e-05 gnorm: 1.25 [ 1:39:23<23:12:21] +[titan] 2025-10-05 00:13:42,558 - root - INFO - step: 2670 loss: 3.0570 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3663 global_avg_mtp_loss: 2.6907 +[titan] 2025-10-05 00:13:42,558 - root - INFO - lr: 4.9636e-05 gnorm: 1.26 [ 1:39:34<23:12:06] +[titan] 2025-10-05 00:13:53,472 - root - INFO - step: 2675 loss: 3.1878 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8028 +[titan] 2025-10-05 00:13:53,472 - root - INFO - lr: 4.9634e-05 gnorm: 1.31 [ 1:39:45<23:11:51] +[titan] 2025-10-05 00:14:04,364 - root - INFO - step: 2680 loss: 3.1135 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3738 global_avg_mtp_loss: 2.7397 +[titan] 2025-10-05 00:14:04,365 - root - INFO - lr: 4.9633e-05 gnorm: 1.22 [ 1:39:55<23:11:35] +[titan] 2025-10-05 00:14:15,279 - root - INFO - step: 2685 loss: 3.0010 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3606 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:14:15,279 - root - INFO - lr: 4.9631e-05 gnorm: 1.32 [ 1:40:06<23:11:20] +[titan] 2025-10-05 00:14:26,223 - root - INFO - step: 2690 loss: 3.1084 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.3737 global_avg_mtp_loss: 2.7347 +[titan] 2025-10-05 00:14:26,223 - root - INFO - lr: 4.9629e-05 gnorm: 1.28 [ 1:40:17<23:11:06] +[titan] 2025-10-05 00:14:37,114 - root - INFO - step: 2695 loss: 3.1301 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3758 global_avg_mtp_loss: 2.7543 +[titan] 2025-10-05 00:14:37,114 - root - INFO - lr: 4.9628e-05 gnorm: 1.31 [ 1:40:28<23:10:51] +[titan] 2025-10-05 00:14:45,831 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:14:48,028 - root - INFO - step: 2700 loss: 3.0874 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3690 global_avg_mtp_loss: 2.7184 +[titan] 2025-10-05 00:14:48,029 - root - INFO - lr: 4.9626e-05 gnorm: 1.38 [ 1:40:39<23:10:36] +[titan] 2025-10-05 00:14:58,931 - root - INFO - step: 2705 loss: 3.1260 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3750 global_avg_mtp_loss: 2.7509 +[titan] 2025-10-05 00:14:58,931 - root - INFO - lr: 4.9625e-05 gnorm: 1.28 [ 1:40:50<23:10:21] +[titan] 2025-10-05 00:15:09,812 - root - INFO - step: 2710 loss: 3.0477 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3643 global_avg_mtp_loss: 2.6834 +[titan] 2025-10-05 00:15:09,813 - root - INFO - lr: 4.9623e-05 gnorm: 1.29 [ 1:41:01<23:10:05] +[titan] 2025-10-05 00:15:20,681 - root - INFO - step: 2715 loss: 2.9784 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3575 global_avg_mtp_loss: 2.6209 +[titan] 2025-10-05 00:15:20,681 - root - INFO - lr: 4.9621e-05 gnorm: 1.39 [ 1:41:12<23:09:50] +[titan] 2025-10-05 00:15:31,544 - root - INFO - step: 2720 loss: 3.0989 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3712 global_avg_mtp_loss: 2.7276 +[titan] 2025-10-05 00:15:31,544 - root - INFO - lr: 4.9620e-05 gnorm: 1.28 [ 1:41:23<23:09:34] +[titan] 2025-10-05 00:15:42,481 - root - INFO - step: 2725 loss: 3.0279 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.3634 global_avg_mtp_loss: 2.6645 +[titan] 2025-10-05 00:15:42,482 - root - INFO - lr: 4.9618e-05 gnorm: 1.38 [ 1:41:34<23:09:20] +[titan] 2025-10-05 00:15:53,371 - root - INFO - step: 2730 loss: 3.0629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3682 global_avg_mtp_loss: 2.6946 +[titan] 2025-10-05 00:15:53,371 - root - INFO - lr: 4.9616e-05 gnorm: 1.27 [ 1:41:44<23:09:04] +[titan] 2025-10-05 00:16:04,250 - root - INFO - step: 2735 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6257 +[titan] 2025-10-05 00:16:04,250 - root - INFO - lr: 4.9615e-05 gnorm: 1.32 [ 1:41:55<23:08:49] +[titan] 2025-10-05 00:16:15,152 - root - INFO - step: 2740 loss: 3.0246 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6608 +[titan] 2025-10-05 00:16:15,152 - root - INFO - lr: 4.9613e-05 gnorm: 1.29 [ 1:42:06<23:08:34] +[titan] 2025-10-05 00:16:26,041 - root - INFO - step: 2745 loss: 3.1571 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7775 +[titan] 2025-10-05 00:16:26,041 - root - INFO - lr: 4.9611e-05 gnorm: 1.28 [ 1:42:17<23:08:19] +[titan] 2025-10-05 00:16:34,730 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:16:36,918 - root - INFO - step: 2750 loss: 3.0736 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3686 global_avg_mtp_loss: 2.7050 +[titan] 2025-10-05 00:16:36,919 - root - INFO - lr: 4.9610e-05 gnorm: 1.24 [ 1:42:28<23:08:04] +[titan] 2025-10-05 00:16:47,865 - root - INFO - step: 2755 loss: 2.9899 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6309 +[titan] 2025-10-05 00:16:47,865 - root - INFO - lr: 4.9608e-05 gnorm: 1.22 [ 1:42:39<23:07:49] +[titan] 2025-10-05 00:16:58,851 - root - INFO - step: 2760 loss: 3.0390 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.3657 global_avg_mtp_loss: 2.6733 +[titan] 2025-10-05 00:16:58,851 - root - INFO - lr: 4.9606e-05 gnorm: 1.33 [ 1:42:50<23:07:36] +[titan] 2025-10-05 00:17:09,727 - root - INFO - step: 2765 loss: 3.1133 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3740 global_avg_mtp_loss: 2.7394 +[titan] 2025-10-05 00:17:09,727 - root - INFO - lr: 4.9605e-05 gnorm: 1.30 [ 1:43:01<23:07:20] +[titan] 2025-10-05 00:17:20,607 - root - INFO - step: 2770 loss: 3.0638 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3695 global_avg_mtp_loss: 2.6943 +[titan] 2025-10-05 00:17:20,607 - root - INFO - lr: 4.9603e-05 gnorm: 1.35 [ 1:43:12<23:07:05] +[titan] 2025-10-05 00:17:31,517 - root - INFO - step: 2775 loss: 3.0938 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3716 global_avg_mtp_loss: 2.7222 +[titan] 2025-10-05 00:17:31,517 - root - INFO - lr: 4.9601e-05 gnorm: 1.26 [ 1:43:23<23:06:50] +[titan] 2025-10-05 00:17:42,399 - root - INFO - step: 2780 loss: 3.0126 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6519 +[titan] 2025-10-05 00:17:42,399 - root - INFO - lr: 4.9600e-05 gnorm: 1.30 [ 1:43:33<23:06:35] +[titan] 2025-10-05 00:17:53,331 - root - INFO - step: 2785 loss: 3.0873 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3714 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:17:53,331 - root - INFO - lr: 4.9598e-05 gnorm: 1.28 [ 1:43:44<23:06:21] +[titan] 2025-10-05 00:18:04,263 - root - INFO - step: 2790 loss: 3.0185 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.3627 global_avg_mtp_loss: 2.6559 +[titan] 2025-10-05 00:18:04,263 - root - INFO - lr: 4.9596e-05 gnorm: 1.33 [ 1:43:55<23:06:06] +[titan] 2025-10-05 00:18:15,157 - root - INFO - step: 2795 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3677 global_avg_mtp_loss: 2.6975 +[titan] 2025-10-05 00:18:15,157 - root - INFO - lr: 4.9595e-05 gnorm: 1.25 [ 1:44:06<23:05:51] +[titan] 2025-10-05 00:18:23,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:18:26,054 - root - INFO - step: 2800 loss: 3.0213 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6588 +[titan] 2025-10-05 00:18:26,054 - root - INFO - lr: 4.9593e-05 gnorm: 1.28 [ 1:44:17<23:05:37] +[titan] 2025-10-05 00:18:36,954 - root - INFO - step: 2805 loss: 3.1425 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3783 global_avg_mtp_loss: 2.7642 +[titan] 2025-10-05 00:18:36,954 - root - INFO - lr: 4.9591e-05 gnorm: 1.28 [ 1:44:28<23:05:22] +[titan] 2025-10-05 00:18:47,864 - root - INFO - step: 2810 loss: 3.0392 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.3638 global_avg_mtp_loss: 2.6754 +[titan] 2025-10-05 00:18:47,864 - root - INFO - lr: 4.9590e-05 gnorm: 1.27 [ 1:44:39<23:05:07] +[titan] 2025-10-05 00:18:58,796 - root - INFO - step: 2815 loss: 3.0728 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3684 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:18:58,797 - root - INFO - lr: 4.9588e-05 gnorm: 1.28 [ 1:44:50<23:04:53] +[titan] 2025-10-05 00:19:09,768 - root - INFO - step: 2820 loss: 3.0759 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.3697 global_avg_mtp_loss: 2.7062 +[titan] 2025-10-05 00:19:09,768 - root - INFO - lr: 4.9586e-05 gnorm: 1.28 [ 1:45:01<23:04:39] +[titan] 2025-10-05 00:19:20,659 - root - INFO - step: 2825 loss: 3.0518 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3667 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:19:20,660 - root - INFO - lr: 4.9585e-05 gnorm: 1.38 [ 1:45:12<23:04:24] +[titan] 2025-10-05 00:19:31,538 - root - INFO - step: 2830 loss: 3.1035 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7302 +[titan] 2025-10-05 00:19:31,538 - root - INFO - lr: 4.9583e-05 gnorm: 1.34 [ 1:45:23<23:04:09] +[titan] 2025-10-05 00:19:42,419 - root - INFO - step: 2835 loss: 3.0685 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3683 global_avg_mtp_loss: 2.7002 +[titan] 2025-10-05 00:19:42,419 - root - INFO - lr: 4.9581e-05 gnorm: 1.37 [ 1:45:33<23:03:54] +[titan] 2025-10-05 00:19:53,306 - root - INFO - step: 2840 loss: 3.0223 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3652 global_avg_mtp_loss: 2.6571 +[titan] 2025-10-05 00:19:53,306 - root - INFO - lr: 4.9579e-05 gnorm: 1.32 [ 1:45:44<23:03:39] +[titan] 2025-10-05 00:20:04,219 - root - INFO - step: 2845 loss: 3.0274 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3633 global_avg_mtp_loss: 2.6641 +[titan] 2025-10-05 00:20:04,219 - root - INFO - lr: 4.9578e-05 gnorm: 1.28 [ 1:45:55<23:03:25] +[titan] 2025-10-05 00:20:12,961 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:20:15,145 - root - INFO - step: 2850 loss: 3.0430 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3661 global_avg_mtp_loss: 2.6770 +[titan] 2025-10-05 00:20:15,145 - root - INFO - lr: 4.9576e-05 gnorm: 1.26 [ 1:46:06<23:03:10] +[titan] 2025-10-05 00:20:26,027 - root - INFO - step: 2855 loss: 3.0893 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7209 +[titan] 2025-10-05 00:20:26,027 - root - INFO - lr: 4.9574e-05 gnorm: 1.27 [ 1:46:17<23:02:55] +[titan] 2025-10-05 00:20:36,904 - root - INFO - step: 2860 loss: 3.0960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3724 global_avg_mtp_loss: 2.7236 +[titan] 2025-10-05 00:20:36,904 - root - INFO - lr: 4.9573e-05 gnorm: 1.28 [ 1:46:28<23:02:40] +[titan] 2025-10-05 00:20:47,806 - root - INFO - step: 2865 loss: 3.1434 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:20:47,806 - root - INFO - lr: 4.9571e-05 gnorm: 1.30 [ 1:46:39<23:02:26] +[titan] 2025-10-05 00:20:58,761 - root - INFO - step: 2870 loss: 2.9969 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.99 mfu: 41.96% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:20:58,761 - root - INFO - lr: 4.9569e-05 gnorm: 1.30 [ 1:46:50<23:02:12] +[titan] 2025-10-05 00:21:09,643 - root - INFO - step: 2875 loss: 3.0232 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3635 global_avg_mtp_loss: 2.6597 +[titan] 2025-10-05 00:21:09,643 - root - INFO - lr: 4.9567e-05 gnorm: 1.30 [ 1:47:01<23:01:57] +[titan] 2025-10-05 00:21:20,548 - root - INFO - step: 2880 loss: 2.9737 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3570 global_avg_mtp_loss: 2.6167 +[titan] 2025-10-05 00:21:20,548 - root - INFO - lr: 4.9566e-05 gnorm: 1.28 [ 1:47:12<23:01:42] +[titan] 2025-10-05 00:21:31,529 - root - INFO - step: 2885 loss: 3.0875 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.3720 global_avg_mtp_loss: 2.7155 +[titan] 2025-10-05 00:21:31,530 - root - INFO - lr: 4.9564e-05 gnorm: 1.25 [ 1:47:23<23:01:29] +[titan] 2025-10-05 00:21:42,407 - root - INFO - step: 2890 loss: 3.0347 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6705 +[titan] 2025-10-05 00:21:42,407 - root - INFO - lr: 4.9562e-05 gnorm: 1.38 [ 1:47:33<23:01:14] +[titan] 2025-10-05 00:21:53,280 - root - INFO - step: 2895 loss: 3.0145 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3610 global_avg_mtp_loss: 2.6535 +[titan] 2025-10-05 00:21:53,280 - root - INFO - lr: 4.9560e-05 gnorm: 1.22 [ 1:47:44<23:00:59] +[titan] 2025-10-05 00:22:02,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:22:04,199 - root - INFO - step: 2900 loss: 3.1605 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-05 00:22:04,199 - root - INFO - lr: 4.9559e-05 gnorm: 1.35 [ 1:47:55<23:00:45] +[titan] 2025-10-05 00:22:15,084 - root - INFO - step: 2905 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3703 global_avg_mtp_loss: 2.7158 +[titan] 2025-10-05 00:22:15,084 - root - INFO - lr: 4.9557e-05 gnorm: 1.29 [ 1:48:06<23:00:30] +[titan] 2025-10-05 00:22:25,962 - root - INFO - step: 2910 loss: 3.0022 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6423 +[titan] 2025-10-05 00:22:25,962 - root - INFO - lr: 4.9555e-05 gnorm: 1.31 [ 1:48:17<23:00:15] +[titan] 2025-10-05 00:22:36,871 - root - INFO - step: 2915 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3797 global_avg_mtp_loss: 2.7783 +[titan] 2025-10-05 00:22:36,871 - root - INFO - lr: 4.9553e-05 gnorm: 1.42 [ 1:48:28<23:00:01] +[titan] 2025-10-05 00:22:47,815 - root - INFO - step: 2920 loss: 3.0326 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3653 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:22:47,815 - root - INFO - lr: 4.9552e-05 gnorm: 1.30 [ 1:48:39<22:59:47] +[titan] 2025-10-05 00:22:58,703 - root - INFO - step: 2925 loss: 3.0724 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3681 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:22:58,703 - root - INFO - lr: 4.9550e-05 gnorm: 1.34 [ 1:48:50<22:59:32] +[titan] 2025-10-05 00:23:09,632 - root - INFO - step: 2930 loss: 3.0482 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6818 +[titan] 2025-10-05 00:23:09,632 - root - INFO - lr: 4.9548e-05 gnorm: 1.23 [ 1:49:01<22:59:18] +[titan] 2025-10-05 00:23:20,517 - root - INFO - step: 2935 loss: 2.9200 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5708 +[titan] 2025-10-05 00:23:20,517 - root - INFO - lr: 4.9546e-05 gnorm: 1.28 [ 1:49:12<22:59:03] +[titan] 2025-10-05 00:23:31,391 - root - INFO - step: 2940 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6615 +[titan] 2025-10-05 00:23:31,391 - root - INFO - lr: 4.9544e-05 gnorm: 1.25 [ 1:49:22<22:58:48] +[titan] 2025-10-05 00:23:42,322 - root - INFO - step: 2945 loss: 3.1473 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7704 +[titan] 2025-10-05 00:23:42,322 - root - INFO - lr: 4.9543e-05 gnorm: 1.35 [ 1:49:33<22:58:34] +[titan] 2025-10-05 00:23:51,004 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:23:53,182 - root - INFO - step: 2950 loss: 3.0250 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6620 +[titan] 2025-10-05 00:23:53,183 - root - INFO - lr: 4.9541e-05 gnorm: 1.26 [ 1:49:44<22:58:19] +[titan] 2025-10-05 00:24:04,100 - root - INFO - step: 2955 loss: 2.9887 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3579 global_avg_mtp_loss: 2.6308 +[titan] 2025-10-05 00:24:04,100 - root - INFO - lr: 4.9539e-05 gnorm: 1.32 [ 1:49:55<22:58:05] +[titan] 2025-10-05 00:24:14,957 - root - INFO - step: 2960 loss: 2.9752 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6188 +[titan] 2025-10-05 00:24:14,957 - root - INFO - lr: 4.9537e-05 gnorm: 1.29 [ 1:50:06<22:57:50] +[titan] 2025-10-05 00:24:25,824 - root - INFO - step: 2965 loss: 3.0670 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3670 global_avg_mtp_loss: 2.7000 +[titan] 2025-10-05 00:24:25,824 - root - INFO - lr: 4.9535e-05 gnorm: 1.36 [ 1:50:17<22:57:35] +[titan] 2025-10-05 00:24:36,677 - root - INFO - step: 2970 loss: 3.0105 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3605 global_avg_mtp_loss: 2.6500 +[titan] 2025-10-05 00:24:36,677 - root - INFO - lr: 4.9534e-05 gnorm: 1.28 [ 1:50:28<22:57:20] +[titan] 2025-10-05 00:24:47,550 - root - INFO - step: 2975 loss: 3.0798 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7113 +[titan] 2025-10-05 00:24:47,550 - root - INFO - lr: 4.9532e-05 gnorm: 1.26 [ 1:50:39<22:57:06] +[titan] 2025-10-05 00:24:58,508 - root - INFO - step: 2980 loss: 3.0933 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7212 +[titan] 2025-10-05 00:24:58,508 - root - INFO - lr: 4.9530e-05 gnorm: 1.34 [ 1:50:50<22:56:52] +[titan] 2025-10-05 00:25:09,436 - root - INFO - step: 2985 loss: 2.9918 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6318 +[titan] 2025-10-05 00:25:09,436 - root - INFO - lr: 4.9528e-05 gnorm: 1.29 [ 1:51:00<22:56:38] +[titan] 2025-10-05 00:25:20,336 - root - INFO - step: 2990 loss: 3.0864 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3699 global_avg_mtp_loss: 2.7165 +[titan] 2025-10-05 00:25:20,336 - root - INFO - lr: 4.9526e-05 gnorm: 1.30 [ 1:51:11<22:56:24] +[titan] 2025-10-05 00:25:31,210 - root - INFO - step: 2995 loss: 3.0152 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3613 global_avg_mtp_loss: 2.6538 +[titan] 2025-10-05 00:25:31,210 - root - INFO - lr: 4.9525e-05 gnorm: 1.34 [ 1:51:22<22:56:09] +[titan] 2025-10-05 00:25:39,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:25:42,077 - root - INFO - step: 3000 loss: 2.9639 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6087 +[titan] 2025-10-05 00:25:42,077 - root - INFO - lr: 4.9523e-05 gnorm: 1.20 [ 1:51:33<22:55:54] +[titan] 2025-10-05 00:25:52,956 - root - INFO - step: 3005 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6274 +[titan] 2025-10-05 00:25:52,956 - root - INFO - lr: 4.9521e-05 gnorm: 1.25 [ 1:51:44<22:55:40] +[titan] 2025-10-05 00:26:03,943 - root - INFO - step: 3010 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:26:03,943 - root - INFO - lr: 4.9519e-05 gnorm: 1.25 [ 1:51:55<22:55:26] +[titan] 2025-10-05 00:26:14,799 - root - INFO - step: 3015 loss: 2.9622 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6081 +[titan] 2025-10-05 00:26:14,799 - root - INFO - lr: 4.9517e-05 gnorm: 1.20 [ 1:52:06<22:55:12] +[titan] 2025-10-05 00:26:25,658 - root - INFO - step: 3020 loss: 3.1014 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7293 +[titan] 2025-10-05 00:26:25,658 - root - INFO - lr: 4.9515e-05 gnorm: 1.29 [ 1:52:17<22:54:57] +[titan] 2025-10-05 00:26:36,501 - root - INFO - step: 3025 loss: 3.0035 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.3588 global_avg_mtp_loss: 2.6447 +[titan] 2025-10-05 00:26:36,501 - root - INFO - lr: 4.9514e-05 gnorm: 1.22 [ 1:52:28<22:54:42] +[titan] 2025-10-05 00:26:47,370 - root - INFO - step: 3030 loss: 2.9868 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3589 global_avg_mtp_loss: 2.6279 +[titan] 2025-10-05 00:26:47,370 - root - INFO - lr: 4.9512e-05 gnorm: 1.28 [ 1:52:38<22:54:27] +[titan] 2025-10-05 00:26:58,255 - root - INFO - step: 3035 loss: 3.0690 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.7021 +[titan] 2025-10-05 00:26:58,255 - root - INFO - lr: 4.9510e-05 gnorm: 1.29 [ 1:52:49<22:54:13] +[titan] 2025-10-05 00:27:09,176 - root - INFO - step: 3040 loss: 2.9415 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5893 +[titan] 2025-10-05 00:27:09,176 - root - INFO - lr: 4.9508e-05 gnorm: 1.23 [ 1:53:00<22:53:59] +[titan] 2025-10-05 00:27:20,081 - root - INFO - step: 3045 loss: 2.9565 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.6029 +[titan] 2025-10-05 00:27:20,081 - root - INFO - lr: 4.9506e-05 gnorm: 1.31 [ 1:53:11<22:53:45] +[titan] 2025-10-05 00:27:28,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:27:30,926 - root - INFO - step: 3050 loss: 3.0382 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.6713 +[titan] 2025-10-05 00:27:30,926 - root - INFO - lr: 4.9504e-05 gnorm: 1.32 [ 1:53:22<22:53:30] +[titan] 2025-10-05 00:27:41,788 - root - INFO - step: 3055 loss: 2.9038 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5556 +[titan] 2025-10-05 00:27:41,788 - root - INFO - lr: 4.9502e-05 gnorm: 1.27 [ 1:53:33<22:53:15] +[titan] 2025-10-05 00:27:52,674 - root - INFO - step: 3060 loss: 3.0259 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3639 global_avg_mtp_loss: 2.6619 +[titan] 2025-10-05 00:27:52,674 - root - INFO - lr: 4.9501e-05 gnorm: 1.32 [ 1:53:44<22:53:01] +[titan] 2025-10-05 00:28:03,564 - root - INFO - step: 3065 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6732 +[titan] 2025-10-05 00:28:03,564 - root - INFO - lr: 4.9499e-05 gnorm: 1.39 [ 1:53:55<22:52:46] +[titan] 2025-10-05 00:28:14,505 - root - INFO - step: 3070 loss: 2.9931 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.3595 global_avg_mtp_loss: 2.6336 +[titan] 2025-10-05 00:28:14,505 - root - INFO - lr: 4.9497e-05 gnorm: 1.46 [ 1:54:06<22:52:33] +[titan] 2025-10-05 00:28:19,016 - root - INFO - Dumping profiler traces at step 3072 +[titan] 2025-10-05 00:28:19,052 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:28:25,635 - root - INFO - step: 3075 loss: 2.9714 memory: 118.84GiB(85.28%) tps: 29,442 tflops: 408.46 mfu: 41.30% global_avg_ntp_loss: 0.3583 global_avg_mtp_loss: 2.6131 +[titan] 2025-10-05 00:28:25,635 - root - INFO - lr: 4.9495e-05 gnorm: 1.38 [ 1:54:17<22:52:21] +[titan] 2025-10-05 00:28:36,484 - root - INFO - step: 3080 loss: 3.0383 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3644 global_avg_mtp_loss: 2.6739 +[titan] 2025-10-05 00:28:36,484 - root - INFO - lr: 4.9493e-05 gnorm: 1.27 [ 1:54:28<22:52:07] +[titan] 2025-10-05 00:28:47,350 - root - INFO - step: 3085 loss: 3.0016 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:28:47,350 - root - INFO - lr: 4.9491e-05 gnorm: 1.28 [ 1:54:38<22:51:52] +[titan] 2025-10-05 00:28:58,198 - root - INFO - step: 3090 loss: 2.8733 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 00:28:58,198 - root - INFO - lr: 4.9489e-05 gnorm: 1.28 [ 1:54:49<22:51:37] +[titan] 2025-10-05 00:29:09,096 - root - INFO - step: 3095 loss: 3.0415 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6767 +[titan] 2025-10-05 00:29:09,096 - root - INFO - lr: 4.9487e-05 gnorm: 1.33 [ 1:55:00<22:51:23] +[titan] 2025-10-05 00:29:17,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:29:19,960 - root - INFO - step: 3100 loss: 2.9482 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.5947 +[titan] 2025-10-05 00:29:19,960 - root - INFO - lr: 4.9485e-05 gnorm: 1.33 [ 1:55:11<22:51:09] +[titan] 2025-10-05 00:29:30,867 - root - INFO - step: 3105 loss: 2.9859 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6283 +[titan] 2025-10-05 00:29:30,868 - root - INFO - lr: 4.9484e-05 gnorm: 1.27 [ 1:55:22<22:50:55] +[titan] 2025-10-05 00:29:41,783 - root - INFO - step: 3110 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6306 +[titan] 2025-10-05 00:29:41,784 - root - INFO - lr: 4.9482e-05 gnorm: 1.30 [ 1:55:33<22:50:41] +[titan] 2025-10-05 00:29:52,657 - root - INFO - step: 3115 loss: 2.9941 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6352 +[titan] 2025-10-05 00:29:52,657 - root - INFO - lr: 4.9480e-05 gnorm: 1.24 [ 1:55:44<22:50:26] +[titan] 2025-10-05 00:30:03,529 - root - INFO - step: 3120 loss: 3.0041 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3611 global_avg_mtp_loss: 2.6431 +[titan] 2025-10-05 00:30:03,529 - root - INFO - lr: 4.9478e-05 gnorm: 1.22 [ 1:55:55<22:50:12] +[titan] 2025-10-05 00:30:14,438 - root - INFO - step: 3125 loss: 2.9712 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6162 +[titan] 2025-10-05 00:30:14,438 - root - INFO - lr: 4.9476e-05 gnorm: 1.28 [ 1:56:05<22:49:58] +[titan] 2025-10-05 00:30:25,289 - root - INFO - step: 3130 loss: 2.9425 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:30:25,289 - root - INFO - lr: 4.9474e-05 gnorm: 1.28 [ 1:56:16<22:49:43] +[titan] 2025-10-05 00:30:36,160 - root - INFO - step: 3135 loss: 3.0775 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3696 global_avg_mtp_loss: 2.7079 +[titan] 2025-10-05 00:30:36,160 - root - INFO - lr: 4.9472e-05 gnorm: 1.26 [ 1:56:27<22:49:29] +[titan] 2025-10-05 00:30:47,054 - root - INFO - step: 3140 loss: 3.0122 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6492 +[titan] 2025-10-05 00:30:47,054 - root - INFO - lr: 4.9470e-05 gnorm: 1.22 [ 1:56:38<22:49:15] +[titan] 2025-10-05 00:30:57,914 - root - INFO - step: 3145 loss: 3.0169 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6560 +[titan] 2025-10-05 00:30:57,914 - root - INFO - lr: 4.9468e-05 gnorm: 1.27 [ 1:56:49<22:49:00] +[titan] 2025-10-05 00:31:06,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:31:08,816 - root - INFO - step: 3150 loss: 2.9327 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3545 global_avg_mtp_loss: 2.5782 +[titan] 2025-10-05 00:31:08,816 - root - INFO - lr: 4.9466e-05 gnorm: 1.26 [ 1:57:00<22:48:46] +[titan] 2025-10-05 00:31:19,715 - root - INFO - step: 3155 loss: 3.0434 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3631 global_avg_mtp_loss: 2.6803 +[titan] 2025-10-05 00:31:19,715 - root - INFO - lr: 4.9464e-05 gnorm: 1.33 [ 1:57:11<22:48:32] +[titan] 2025-10-05 00:31:30,598 - root - INFO - step: 3160 loss: 2.9152 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5676 +[titan] 2025-10-05 00:31:30,598 - root - INFO - lr: 4.9462e-05 gnorm: 1.28 [ 1:57:22<22:48:18] +[titan] 2025-10-05 00:31:41,468 - root - INFO - step: 3165 loss: 3.0228 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6602 +[titan] 2025-10-05 00:31:41,469 - root - INFO - lr: 4.9460e-05 gnorm: 1.32 [ 1:57:33<22:48:04] +[titan] 2025-10-05 00:31:52,401 - root - INFO - step: 3170 loss: 2.9954 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:31:52,401 - root - INFO - lr: 4.9459e-05 gnorm: 1.39 [ 1:57:43<22:47:50] +[titan] 2025-10-05 00:32:03,274 - root - INFO - step: 3175 loss: 2.9805 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.6231 +[titan] 2025-10-05 00:32:03,274 - root - INFO - lr: 4.9457e-05 gnorm: 1.26 [ 1:57:54<22:47:36] +[titan] 2025-10-05 00:32:14,178 - root - INFO - step: 3180 loss: 3.0141 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3598 global_avg_mtp_loss: 2.6543 +[titan] 2025-10-05 00:32:14,178 - root - INFO - lr: 4.9455e-05 gnorm: 1.31 [ 1:58:05<22:47:22] +[titan] 2025-10-05 00:32:25,055 - root - INFO - step: 3185 loss: 3.0493 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3641 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:32:25,055 - root - INFO - lr: 4.9453e-05 gnorm: 1.37 [ 1:58:16<22:47:08] +[titan] 2025-10-05 00:32:35,936 - root - INFO - step: 3190 loss: 2.9654 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6093 +[titan] 2025-10-05 00:32:35,936 - root - INFO - lr: 4.9451e-05 gnorm: 1.29 [ 1:58:27<22:46:54] +[titan] 2025-10-05 00:32:46,815 - root - INFO - step: 3195 loss: 2.9889 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3566 global_avg_mtp_loss: 2.6323 +[titan] 2025-10-05 00:32:46,815 - root - INFO - lr: 4.9449e-05 gnorm: 1.28 [ 1:58:38<22:46:40] +[titan] 2025-10-05 00:32:55,521 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:32:57,705 - root - INFO - step: 3200 loss: 2.9502 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.5953 +[titan] 2025-10-05 00:32:57,705 - root - INFO - lr: 4.9447e-05 gnorm: 1.30 [ 1:58:49<22:46:26] +[titan] 2025-10-05 00:33:08,681 - root - INFO - step: 3205 loss: 2.9709 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.22 mfu: 41.88% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6155 +[titan] 2025-10-05 00:33:08,681 - root - INFO - lr: 4.9445e-05 gnorm: 1.23 [ 1:59:00<22:46:13] +[titan] 2025-10-05 00:33:19,557 - root - INFO - step: 3210 loss: 2.9185 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5693 +[titan] 2025-10-05 00:33:19,558 - root - INFO - lr: 4.9443e-05 gnorm: 1.28 [ 1:59:11<22:45:59] +[titan] 2025-10-05 00:33:30,432 - root - INFO - step: 3215 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3514 global_avg_mtp_loss: 2.5956 +[titan] 2025-10-05 00:33:30,432 - root - INFO - lr: 4.9441e-05 gnorm: 1.39 [ 1:59:21<22:45:44] +[titan] 2025-10-05 00:33:41,300 - root - INFO - step: 3220 loss: 3.0300 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3628 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:33:41,300 - root - INFO - lr: 4.9439e-05 gnorm: 1.32 [ 1:59:32<22:45:30] +[titan] 2025-10-05 00:33:52,166 - root - INFO - step: 3225 loss: 3.0123 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6515 +[titan] 2025-10-05 00:33:52,166 - root - INFO - lr: 4.9437e-05 gnorm: 1.29 [ 1:59:43<22:45:16] +[titan] 2025-10-05 00:34:03,015 - root - INFO - step: 3230 loss: 3.0282 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3622 global_avg_mtp_loss: 2.6660 +[titan] 2025-10-05 00:34:03,015 - root - INFO - lr: 4.9435e-05 gnorm: 1.29 [ 1:59:54<22:45:01] +[titan] 2025-10-05 00:34:13,972 - root - INFO - step: 3235 loss: 3.0440 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3646 global_avg_mtp_loss: 2.6794 +[titan] 2025-10-05 00:34:13,973 - root - INFO - lr: 4.9433e-05 gnorm: 1.27 [ 2:00:05<22:44:48] +[titan] 2025-10-05 00:34:24,817 - root - INFO - step: 3240 loss: 2.9616 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:34:24,817 - root - INFO - lr: 4.9431e-05 gnorm: 1.21 [ 2:00:16<22:44:34] +[titan] 2025-10-05 00:34:35,664 - root - INFO - step: 3245 loss: 3.0402 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6760 +[titan] 2025-10-05 00:34:35,665 - root - INFO - lr: 4.9429e-05 gnorm: 1.23 [ 2:00:27<22:44:19] +[titan] 2025-10-05 00:34:44,349 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:34:46,540 - root - INFO - step: 3250 loss: 3.0298 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3617 global_avg_mtp_loss: 2.6681 +[titan] 2025-10-05 00:34:46,540 - root - INFO - lr: 4.9427e-05 gnorm: 1.26 [ 2:00:38<22:44:05] +[titan] 2025-10-05 00:34:57,421 - root - INFO - step: 3255 loss: 2.9633 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6078 +[titan] 2025-10-05 00:34:57,421 - root - INFO - lr: 4.9425e-05 gnorm: 1.31 [ 2:00:48<22:43:51] +[titan] 2025-10-05 00:35:08,296 - root - INFO - step: 3260 loss: 2.9911 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6340 +[titan] 2025-10-05 00:35:08,296 - root - INFO - lr: 4.9423e-05 gnorm: 1.27 [ 2:00:59<22:43:37] +[titan] 2025-10-05 00:35:19,241 - root - INFO - step: 3265 loss: 2.9592 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6039 +[titan] 2025-10-05 00:35:19,242 - root - INFO - lr: 4.9421e-05 gnorm: 1.30 [ 2:01:10<22:43:24] +[titan] 2025-10-05 00:35:30,115 - root - INFO - step: 3270 loss: 2.9685 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6125 +[titan] 2025-10-05 00:35:30,115 - root - INFO - lr: 4.9419e-05 gnorm: 1.33 [ 2:01:21<22:43:10] +[titan] 2025-10-05 00:35:40,981 - root - INFO - step: 3275 loss: 3.0649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3675 global_avg_mtp_loss: 2.6974 +[titan] 2025-10-05 00:35:40,981 - root - INFO - lr: 4.9417e-05 gnorm: 1.36 [ 2:01:32<22:42:56] +[titan] 2025-10-05 00:35:51,879 - root - INFO - step: 3280 loss: 2.9994 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6401 +[titan] 2025-10-05 00:35:51,879 - root - INFO - lr: 4.9415e-05 gnorm: 1.31 [ 2:01:43<22:42:42] +[titan] 2025-10-05 00:36:02,779 - root - INFO - step: 3285 loss: 2.9516 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3524 global_avg_mtp_loss: 2.5992 +[titan] 2025-10-05 00:36:02,779 - root - INFO - lr: 4.9413e-05 gnorm: 1.24 [ 2:01:54<22:42:28] +[titan] 2025-10-05 00:36:13,718 - root - INFO - step: 3290 loss: 3.0135 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6533 +[titan] 2025-10-05 00:36:13,718 - root - INFO - lr: 4.9411e-05 gnorm: 1.32 [ 2:02:05<22:42:15] +[titan] 2025-10-05 00:36:24,612 - root - INFO - step: 3295 loss: 2.9374 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3515 global_avg_mtp_loss: 2.5859 +[titan] 2025-10-05 00:36:24,613 - root - INFO - lr: 4.9409e-05 gnorm: 1.30 [ 2:02:16<22:42:01] +[titan] 2025-10-05 00:36:33,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:36:35,569 - root - INFO - step: 3300 loss: 3.0216 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6591 +[titan] 2025-10-05 00:36:35,570 - root - INFO - lr: 4.9407e-05 gnorm: 1.29 [ 2:02:27<22:41:48] +[titan] 2025-10-05 00:36:46,479 - root - INFO - step: 3305 loss: 2.9748 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6193 +[titan] 2025-10-05 00:36:46,480 - root - INFO - lr: 4.9405e-05 gnorm: 1.29 [ 2:02:38<22:41:34] +[titan] 2025-10-05 00:36:57,349 - root - INFO - step: 3310 loss: 2.9636 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3540 global_avg_mtp_loss: 2.6095 +[titan] 2025-10-05 00:36:57,350 - root - INFO - lr: 4.9403e-05 gnorm: 1.18 [ 2:02:48<22:41:20] +[titan] 2025-10-05 00:37:08,233 - root - INFO - step: 3315 loss: 2.9774 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6224 +[titan] 2025-10-05 00:37:08,233 - root - INFO - lr: 4.9401e-05 gnorm: 1.24 [ 2:02:59<22:41:07] +[titan] 2025-10-05 00:37:19,133 - root - INFO - step: 3320 loss: 2.9377 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:37:19,133 - root - INFO - lr: 4.9399e-05 gnorm: 1.24 [ 2:03:10<22:40:53] +[titan] 2025-10-05 00:37:29,998 - root - INFO - step: 3325 loss: 2.8934 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:37:29,999 - root - INFO - lr: 4.9397e-05 gnorm: 1.31 [ 2:03:21<22:40:39] +[titan] 2025-10-05 00:37:40,921 - root - INFO - step: 3330 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3596 global_avg_mtp_loss: 2.6407 +[titan] 2025-10-05 00:37:40,922 - root - INFO - lr: 4.9395e-05 gnorm: 1.29 [ 2:03:32<22:40:25] +[titan] 2025-10-05 00:37:51,784 - root - INFO - step: 3335 loss: 2.9450 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5927 +[titan] 2025-10-05 00:37:51,784 - root - INFO - lr: 4.9392e-05 gnorm: 1.29 [ 2:03:43<22:40:11] +[titan] 2025-10-05 00:38:02,640 - root - INFO - step: 3340 loss: 2.9243 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3497 global_avg_mtp_loss: 2.5746 +[titan] 2025-10-05 00:38:02,640 - root - INFO - lr: 4.9390e-05 gnorm: 1.24 [ 2:03:54<22:39:57] +[titan] 2025-10-05 00:38:13,559 - root - INFO - step: 3345 loss: 2.9258 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5767 +[titan] 2025-10-05 00:38:13,559 - root - INFO - lr: 4.9388e-05 gnorm: 1.32 [ 2:04:05<22:39:44] +[titan] 2025-10-05 00:38:22,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:38:24,447 - root - INFO - step: 3350 loss: 2.9893 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3573 global_avg_mtp_loss: 2.6320 +[titan] 2025-10-05 00:38:24,447 - root - INFO - lr: 4.9386e-05 gnorm: 1.23 [ 2:04:15<22:39:30] +[titan] 2025-10-05 00:38:35,319 - root - INFO - step: 3355 loss: 2.8550 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3405 global_avg_mtp_loss: 2.5146 +[titan] 2025-10-05 00:38:35,319 - root - INFO - lr: 4.9384e-05 gnorm: 1.25 [ 2:04:26<22:39:16] +[titan] 2025-10-05 00:38:46,199 - root - INFO - step: 3360 loss: 2.8891 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:38:46,199 - root - INFO - lr: 4.9382e-05 gnorm: 1.31 [ 2:04:37<22:39:02] +[titan] 2025-10-05 00:38:57,161 - root - INFO - step: 3365 loss: 2.9521 memory: 118.84GiB(85.28%) tps: 29,893 tflops: 414.71 mfu: 41.93% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.5991 +[titan] 2025-10-05 00:38:57,161 - root - INFO - lr: 4.9380e-05 gnorm: 1.25 [ 2:04:48<22:38:49] +[titan] 2025-10-05 00:39:08,046 - root - INFO - step: 3370 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.5919 +[titan] 2025-10-05 00:39:08,046 - root - INFO - lr: 4.9378e-05 gnorm: 1.32 [ 2:04:59<22:38:36] +[titan] 2025-10-05 00:39:18,937 - root - INFO - step: 3375 loss: 2.9184 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5694 +[titan] 2025-10-05 00:39:18,937 - root - INFO - lr: 4.9376e-05 gnorm: 1.25 [ 2:05:10<22:38:22] +[titan] 2025-10-05 00:39:29,827 - root - INFO - step: 3380 loss: 2.9621 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6080 +[titan] 2025-10-05 00:39:29,827 - root - INFO - lr: 4.9374e-05 gnorm: 1.24 [ 2:05:21<22:38:08] +[titan] 2025-10-05 00:39:40,719 - root - INFO - step: 3385 loss: 2.9011 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3465 global_avg_mtp_loss: 2.5547 +[titan] 2025-10-05 00:39:40,719 - root - INFO - lr: 4.9372e-05 gnorm: 1.22 [ 2:05:32<22:37:54] +[titan] 2025-10-05 00:39:51,594 - root - INFO - step: 3390 loss: 2.9910 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6324 +[titan] 2025-10-05 00:39:51,595 - root - INFO - lr: 4.9370e-05 gnorm: 1.24 [ 2:05:43<22:37:41] +[titan] 2025-10-05 00:40:02,576 - root - INFO - step: 3395 loss: 2.9436 memory: 118.84GiB(85.28%) tps: 29,839 tflops: 413.97 mfu: 41.86% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5907 +[titan] 2025-10-05 00:40:02,577 - root - INFO - lr: 4.9367e-05 gnorm: 1.26 [ 2:05:54<22:37:28] +[titan] 2025-10-05 00:40:11,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:40:13,489 - root - INFO - step: 3400 loss: 2.9838 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3569 global_avg_mtp_loss: 2.6269 +[titan] 2025-10-05 00:40:13,489 - root - INFO - lr: 4.9365e-05 gnorm: 1.27 [ 2:06:05<22:37:15] +[titan] 2025-10-05 00:40:24,371 - root - INFO - step: 3405 loss: 3.0515 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3666 global_avg_mtp_loss: 2.6849 +[titan] 2025-10-05 00:40:24,371 - root - INFO - lr: 4.9363e-05 gnorm: 1.23 [ 2:06:15<22:37:01] +[titan] 2025-10-05 00:40:35,244 - root - INFO - step: 3410 loss: 2.9631 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3556 global_avg_mtp_loss: 2.6074 +[titan] 2025-10-05 00:40:35,245 - root - INFO - lr: 4.9361e-05 gnorm: 1.28 [ 2:06:26<22:36:47] +[titan] 2025-10-05 00:40:46,133 - root - INFO - step: 3415 loss: 2.9578 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3547 global_avg_mtp_loss: 2.6032 +[titan] 2025-10-05 00:40:46,133 - root - INFO - lr: 4.9359e-05 gnorm: 1.23 [ 2:06:37<22:36:33] +[titan] 2025-10-05 00:40:57,009 - root - INFO - step: 3420 loss: 2.9329 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3498 global_avg_mtp_loss: 2.5832 +[titan] 2025-10-05 00:40:57,009 - root - INFO - lr: 4.9357e-05 gnorm: 1.19 [ 2:06:48<22:36:20] +[titan] 2025-10-05 00:41:07,937 - root - INFO - step: 3425 loss: 2.9564 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.6041 +[titan] 2025-10-05 00:41:07,937 - root - INFO - lr: 4.9355e-05 gnorm: 1.27 [ 2:06:59<22:36:06] +[titan] 2025-10-05 00:41:18,921 - root - INFO - step: 3430 loss: 2.9729 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.88 mfu: 41.85% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6166 +[titan] 2025-10-05 00:41:18,921 - root - INFO - lr: 4.9353e-05 gnorm: 1.26 [ 2:07:10<22:35:54] +[titan] 2025-10-05 00:41:29,788 - root - INFO - step: 3435 loss: 2.9570 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3535 global_avg_mtp_loss: 2.6035 +[titan] 2025-10-05 00:41:29,789 - root - INFO - lr: 4.9351e-05 gnorm: 1.30 [ 2:07:21<22:35:40] +[titan] 2025-10-05 00:41:40,636 - root - INFO - step: 3440 loss: 2.9121 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.3473 global_avg_mtp_loss: 2.5649 +[titan] 2025-10-05 00:41:40,637 - root - INFO - lr: 4.9348e-05 gnorm: 1.25 [ 2:07:32<22:35:26] +[titan] 2025-10-05 00:41:51,497 - root - INFO - step: 3445 loss: 2.9720 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3548 global_avg_mtp_loss: 2.6172 +[titan] 2025-10-05 00:41:51,498 - root - INFO - lr: 4.9346e-05 gnorm: 1.24 [ 2:07:43<22:35:12] +[titan] 2025-10-05 00:42:00,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:42:02,373 - root - INFO - step: 3450 loss: 3.0025 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:42:02,373 - root - INFO - lr: 4.9344e-05 gnorm: 1.40 [ 2:07:53<22:34:58] +[titan] 2025-10-05 00:42:13,236 - root - INFO - step: 3455 loss: 2.8984 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5533 +[titan] 2025-10-05 00:42:13,236 - root - INFO - lr: 4.9342e-05 gnorm: 1.33 [ 2:08:04<22:34:44] +[titan] 2025-10-05 00:42:24,195 - root - INFO - step: 3460 loss: 2.8961 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3468 global_avg_mtp_loss: 2.5493 +[titan] 2025-10-05 00:42:24,195 - root - INFO - lr: 4.9340e-05 gnorm: 1.30 [ 2:08:15<22:34:31] +[titan] 2025-10-05 00:42:35,085 - root - INFO - step: 3465 loss: 3.0085 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3591 global_avg_mtp_loss: 2.6494 +[titan] 2025-10-05 00:42:35,085 - root - INFO - lr: 4.9338e-05 gnorm: 1.28 [ 2:08:26<22:34:18] +[titan] 2025-10-05 00:42:45,952 - root - INFO - step: 3470 loss: 2.9361 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5855 +[titan] 2025-10-05 00:42:45,952 - root - INFO - lr: 4.9336e-05 gnorm: 1.26 [ 2:08:37<22:34:04] +[titan] 2025-10-05 00:42:56,840 - root - INFO - step: 3475 loss: 2.9223 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3496 global_avg_mtp_loss: 2.5727 +[titan] 2025-10-05 00:42:56,841 - root - INFO - lr: 4.9333e-05 gnorm: 1.25 [ 2:08:48<22:33:51] +[titan] 2025-10-05 00:43:07,696 - root - INFO - step: 3480 loss: 2.9007 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5539 +[titan] 2025-10-05 00:43:07,696 - root - INFO - lr: 4.9331e-05 gnorm: 1.30 [ 2:08:59<22:33:37] +[titan] 2025-10-05 00:43:18,563 - root - INFO - step: 3485 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5412 +[titan] 2025-10-05 00:43:18,564 - root - INFO - lr: 4.9329e-05 gnorm: 1.24 [ 2:09:10<22:33:23] +[titan] 2025-10-05 00:43:29,498 - root - INFO - step: 3490 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3565 global_avg_mtp_loss: 2.6312 +[titan] 2025-10-05 00:43:29,498 - root - INFO - lr: 4.9327e-05 gnorm: 1.34 [ 2:09:21<22:33:10] +[titan] 2025-10-05 00:43:40,371 - root - INFO - step: 3495 loss: 2.8500 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5087 +[titan] 2025-10-05 00:43:40,371 - root - INFO - lr: 4.9325e-05 gnorm: 1.24 [ 2:09:31<22:32:56] +[titan] 2025-10-05 00:43:49,059 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:43:51,242 - root - INFO - step: 3500 loss: 2.9053 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5576 +[titan] 2025-10-05 00:43:51,242 - root - INFO - lr: 4.9323e-05 gnorm: 1.26 [ 2:09:42<22:32:42] +[titan] 2025-10-05 00:44:02,120 - root - INFO - step: 3505 loss: 2.9596 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:44:02,120 - root - INFO - lr: 4.9320e-05 gnorm: 1.27 [ 2:09:53<22:32:29] +[titan] 2025-10-05 00:44:13,041 - root - INFO - step: 3510 loss: 2.9620 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3557 global_avg_mtp_loss: 2.6063 +[titan] 2025-10-05 00:44:13,042 - root - INFO - lr: 4.9318e-05 gnorm: 1.36 [ 2:10:04<22:32:16] +[titan] 2025-10-05 00:44:23,983 - root - INFO - step: 3515 loss: 2.9163 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5681 +[titan] 2025-10-05 00:44:23,983 - root - INFO - lr: 4.9316e-05 gnorm: 1.35 [ 2:10:15<22:32:03] +[titan] 2025-10-05 00:44:34,890 - root - INFO - step: 3520 loss: 2.9840 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6176 +[titan] 2025-10-05 00:44:34,890 - root - INFO - lr: 4.9314e-05 gnorm: 1.30 [ 2:10:26<22:31:49] +[titan] 2025-10-05 00:44:45,807 - root - INFO - step: 3525 loss: 2.8766 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3420 global_avg_mtp_loss: 2.5345 +[titan] 2025-10-05 00:44:45,807 - root - INFO - lr: 4.9312e-05 gnorm: 1.33 [ 2:10:37<22:31:36] +[titan] 2025-10-05 00:44:56,695 - root - INFO - step: 3530 loss: 2.8643 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5214 +[titan] 2025-10-05 00:44:56,696 - root - INFO - lr: 4.9309e-05 gnorm: 1.31 [ 2:10:48<22:31:23] +[titan] 2025-10-05 00:45:07,556 - root - INFO - step: 3535 loss: 2.9317 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5804 +[titan] 2025-10-05 00:45:07,556 - root - INFO - lr: 4.9307e-05 gnorm: 1.24 [ 2:10:59<22:31:09] +[titan] 2025-10-05 00:45:18,462 - root - INFO - step: 3540 loss: 2.9149 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3487 global_avg_mtp_loss: 2.5661 +[titan] 2025-10-05 00:45:18,463 - root - INFO - lr: 4.9305e-05 gnorm: 1.24 [ 2:11:09<22:30:56] +[titan] 2025-10-05 00:45:29,403 - root - INFO - step: 3545 loss: 2.9166 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5665 +[titan] 2025-10-05 00:45:29,403 - root - INFO - lr: 4.9303e-05 gnorm: 1.31 [ 2:11:20<22:30:43] +[titan] 2025-10-05 00:45:38,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:45:40,270 - root - INFO - step: 3550 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5477 +[titan] 2025-10-05 00:45:40,270 - root - INFO - lr: 4.9301e-05 gnorm: 1.20 [ 2:11:31<22:30:29] +[titan] 2025-10-05 00:45:51,156 - root - INFO - step: 3555 loss: 2.8547 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3414 global_avg_mtp_loss: 2.5133 +[titan] 2025-10-05 00:45:51,156 - root - INFO - lr: 4.9298e-05 gnorm: 1.22 [ 2:11:42<22:30:16] +[titan] 2025-10-05 00:46:02,028 - root - INFO - step: 3560 loss: 2.9708 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3553 global_avg_mtp_loss: 2.6154 +[titan] 2025-10-05 00:46:02,029 - root - INFO - lr: 4.9296e-05 gnorm: 1.25 [ 2:11:53<22:30:02] +[titan] 2025-10-05 00:46:12,872 - root - INFO - step: 3565 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3463 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:46:12,873 - root - INFO - lr: 4.9294e-05 gnorm: 1.22 [ 2:12:04<22:29:48] +[titan] 2025-10-05 00:46:23,793 - root - INFO - step: 3570 loss: 2.9591 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3538 global_avg_mtp_loss: 2.6053 +[titan] 2025-10-05 00:46:23,794 - root - INFO - lr: 4.9292e-05 gnorm: 1.26 [ 2:12:15<22:29:35] +[titan] 2025-10-05 00:46:34,664 - root - INFO - step: 3575 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3519 global_avg_mtp_loss: 2.5886 +[titan] 2025-10-05 00:46:34,664 - root - INFO - lr: 4.9290e-05 gnorm: 1.30 [ 2:12:26<22:29:21] +[titan] 2025-10-05 00:46:45,547 - root - INFO - step: 3580 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3464 global_avg_mtp_loss: 2.5473 +[titan] 2025-10-05 00:46:45,548 - root - INFO - lr: 4.9287e-05 gnorm: 1.23 [ 2:12:37<22:29:08] +[titan] 2025-10-05 00:46:54,491 - root - INFO - Dumping profiler traces at step 3584 +[titan] 2025-10-05 00:46:54,527 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:46:56,755 - root - INFO - step: 3585 loss: 2.9232 memory: 118.84GiB(85.28%) tps: 29,238 tflops: 405.64 mfu: 41.02% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5729 +[titan] 2025-10-05 00:46:56,755 - root - INFO - lr: 4.9285e-05 gnorm: 1.28 [ 2:12:48<22:28:58] +[titan] 2025-10-05 00:47:07,619 - root - INFO - step: 3590 loss: 2.9273 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3501 global_avg_mtp_loss: 2.5772 +[titan] 2025-10-05 00:47:07,619 - root - INFO - lr: 4.9283e-05 gnorm: 1.25 [ 2:12:59<22:28:44] +[titan] 2025-10-05 00:47:18,508 - root - INFO - step: 3595 loss: 2.9212 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5720 +[titan] 2025-10-05 00:47:18,509 - root - INFO - lr: 4.9281e-05 gnorm: 1.34 [ 2:13:10<22:28:31] +[titan] 2025-10-05 00:47:27,262 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:47:29,446 - root - INFO - step: 3600 loss: 2.8603 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5188 +[titan] 2025-10-05 00:47:29,447 - root - INFO - lr: 4.9278e-05 gnorm: 1.22 [ 2:13:20<22:28:18] +[titan] 2025-10-05 00:47:40,308 - root - INFO - step: 3605 loss: 2.8618 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5190 +[titan] 2025-10-05 00:47:40,309 - root - INFO - lr: 4.9276e-05 gnorm: 1.19 [ 2:13:31<22:28:04] +[titan] 2025-10-05 00:47:51,174 - root - INFO - step: 3610 loss: 2.9114 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5639 +[titan] 2025-10-05 00:47:51,174 - root - INFO - lr: 4.9274e-05 gnorm: 1.25 [ 2:13:42<22:27:51] +[titan] 2025-10-05 00:48:02,028 - root - INFO - step: 3615 loss: 2.8693 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5265 +[titan] 2025-10-05 00:48:02,028 - root - INFO - lr: 4.9272e-05 gnorm: 1.25 [ 2:13:53<22:27:37] +[titan] 2025-10-05 00:48:12,956 - root - INFO - step: 3620 loss: 2.9829 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3582 global_avg_mtp_loss: 2.6247 +[titan] 2025-10-05 00:48:12,956 - root - INFO - lr: 4.9269e-05 gnorm: 1.25 [ 2:14:04<22:27:24] +[titan] 2025-10-05 00:48:23,914 - root - INFO - step: 3625 loss: 2.9614 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.6084 +[titan] 2025-10-05 00:48:23,914 - root - INFO - lr: 4.9267e-05 gnorm: 1.19 [ 2:14:15<22:27:11] +[titan] 2025-10-05 00:48:34,821 - root - INFO - step: 3630 loss: 2.9416 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3505 global_avg_mtp_loss: 2.5911 +[titan] 2025-10-05 00:48:34,821 - root - INFO - lr: 4.9265e-05 gnorm: 1.30 [ 2:14:26<22:26:58] +[titan] 2025-10-05 00:48:45,728 - root - INFO - step: 3635 loss: 2.8827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5377 +[titan] 2025-10-05 00:48:45,728 - root - INFO - lr: 4.9263e-05 gnorm: 1.22 [ 2:14:37<22:26:45] +[titan] 2025-10-05 00:48:56,629 - root - INFO - step: 3640 loss: 2.8474 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3379 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 00:48:56,629 - root - INFO - lr: 4.9260e-05 gnorm: 1.32 [ 2:14:48<22:26:32] +[titan] 2025-10-05 00:49:07,530 - root - INFO - step: 3645 loss: 2.9298 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5793 +[titan] 2025-10-05 00:49:07,530 - root - INFO - lr: 4.9258e-05 gnorm: 1.31 [ 2:14:59<22:26:19] +[titan] 2025-10-05 00:49:16,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:49:18,488 - root - INFO - step: 3650 loss: 3.0056 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6456 +[titan] 2025-10-05 00:49:18,488 - root - INFO - lr: 4.9256e-05 gnorm: 1.32 [ 2:15:09<22:26:06] +[titan] 2025-10-05 00:49:29,386 - root - INFO - step: 3655 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5347 +[titan] 2025-10-05 00:49:29,387 - root - INFO - lr: 4.9254e-05 gnorm: 1.23 [ 2:15:20<22:25:53] +[titan] 2025-10-05 00:49:40,255 - root - INFO - step: 3660 loss: 2.8748 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5314 +[titan] 2025-10-05 00:49:40,255 - root - INFO - lr: 4.9251e-05 gnorm: 1.26 [ 2:15:31<22:25:39] +[titan] 2025-10-05 00:49:51,122 - root - INFO - step: 3665 loss: 2.9419 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:49:51,122 - root - INFO - lr: 4.9249e-05 gnorm: 1.24 [ 2:15:42<22:25:26] +[titan] 2025-10-05 00:50:01,986 - root - INFO - step: 3670 loss: 2.8845 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5398 +[titan] 2025-10-05 00:50:01,987 - root - INFO - lr: 4.9247e-05 gnorm: 1.29 [ 2:15:53<22:25:12] +[titan] 2025-10-05 00:50:12,850 - root - INFO - step: 3675 loss: 2.8906 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5453 +[titan] 2025-10-05 00:50:12,851 - root - INFO - lr: 4.9244e-05 gnorm: 1.29 [ 2:16:04<22:24:59] +[titan] 2025-10-05 00:50:23,731 - root - INFO - step: 3680 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3632 global_avg_mtp_loss: 2.6609 +[titan] 2025-10-05 00:50:23,731 - root - INFO - lr: 4.9242e-05 gnorm: 1.28 [ 2:16:15<22:24:45] +[titan] 2025-10-05 00:50:34,722 - root - INFO - step: 3685 loss: 2.9110 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.3489 global_avg_mtp_loss: 2.5621 +[titan] 2025-10-05 00:50:34,723 - root - INFO - lr: 4.9240e-05 gnorm: 1.25 [ 2:16:26<22:24:33] +[titan] 2025-10-05 00:50:45,616 - root - INFO - step: 3690 loss: 2.8445 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5064 +[titan] 2025-10-05 00:50:45,616 - root - INFO - lr: 4.9238e-05 gnorm: 1.23 [ 2:16:37<22:24:20] +[titan] 2025-10-05 00:50:56,496 - root - INFO - step: 3695 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3361 global_avg_mtp_loss: 2.4870 +[titan] 2025-10-05 00:50:56,496 - root - INFO - lr: 4.9235e-05 gnorm: 1.19 [ 2:16:47<22:24:07] +[titan] 2025-10-05 00:51:05,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:51:07,381 - root - INFO - step: 3700 loss: 2.8874 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5427 +[titan] 2025-10-05 00:51:07,381 - root - INFO - lr: 4.9233e-05 gnorm: 1.31 [ 2:16:58<22:23:53] +[titan] 2025-10-05 00:51:18,258 - root - INFO - step: 3705 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3441 global_avg_mtp_loss: 2.5425 +[titan] 2025-10-05 00:51:18,258 - root - INFO - lr: 4.9231e-05 gnorm: 1.36 [ 2:17:09<22:23:40] +[titan] 2025-10-05 00:51:29,175 - root - INFO - step: 3710 loss: 2.9115 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3467 global_avg_mtp_loss: 2.5648 +[titan] 2025-10-05 00:51:29,175 - root - INFO - lr: 4.9228e-05 gnorm: 1.27 [ 2:17:20<22:23:27] +[titan] 2025-10-05 00:51:40,064 - root - INFO - step: 3715 loss: 2.9140 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5671 +[titan] 2025-10-05 00:51:40,065 - root - INFO - lr: 4.9226e-05 gnorm: 1.23 [ 2:17:31<22:23:14] +[titan] 2025-10-05 00:51:50,950 - root - INFO - step: 3720 loss: 2.8644 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.5220 +[titan] 2025-10-05 00:51:50,950 - root - INFO - lr: 4.9224e-05 gnorm: 1.28 [ 2:17:42<22:23:00] +[titan] 2025-10-05 00:52:01,826 - root - INFO - step: 3725 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5677 +[titan] 2025-10-05 00:52:01,826 - root - INFO - lr: 4.9221e-05 gnorm: 1.30 [ 2:17:53<22:22:47] +[titan] 2025-10-05 00:52:12,692 - root - INFO - step: 3730 loss: 2.8843 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5392 +[titan] 2025-10-05 00:52:12,692 - root - INFO - lr: 4.9219e-05 gnorm: 1.27 [ 2:18:04<22:22:34] +[titan] 2025-10-05 00:52:23,581 - root - INFO - step: 3735 loss: 2.8622 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3411 global_avg_mtp_loss: 2.5211 +[titan] 2025-10-05 00:52:23,581 - root - INFO - lr: 4.9217e-05 gnorm: 1.29 [ 2:18:15<22:22:20] +[titan] 2025-10-05 00:52:34,507 - root - INFO - step: 3740 loss: 2.8833 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5382 +[titan] 2025-10-05 00:52:34,507 - root - INFO - lr: 4.9214e-05 gnorm: 1.32 [ 2:18:25<22:22:08] +[titan] 2025-10-05 00:52:45,424 - root - INFO - step: 3745 loss: 2.8876 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5439 +[titan] 2025-10-05 00:52:45,424 - root - INFO - lr: 4.9212e-05 gnorm: 1.30 [ 2:18:36<22:21:55] +[titan] 2025-10-05 00:52:54,128 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:52:56,315 - root - INFO - step: 3750 loss: 2.9081 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3472 global_avg_mtp_loss: 2.5609 +[titan] 2025-10-05 00:52:56,315 - root - INFO - lr: 4.9210e-05 gnorm: 1.37 [ 2:18:47<22:21:42] +[titan] 2025-10-05 00:53:07,243 - root - INFO - step: 3755 loss: 2.8797 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 00:53:07,243 - root - INFO - lr: 4.9207e-05 gnorm: 1.27 [ 2:18:58<22:21:29] +[titan] 2025-10-05 00:53:18,154 - root - INFO - step: 3760 loss: 2.8545 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3406 global_avg_mtp_loss: 2.5139 +[titan] 2025-10-05 00:53:18,154 - root - INFO - lr: 4.9205e-05 gnorm: 1.27 [ 2:19:09<22:21:16] +[titan] 2025-10-05 00:53:29,071 - root - INFO - step: 3765 loss: 2.8350 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.4960 +[titan] 2025-10-05 00:53:29,071 - root - INFO - lr: 4.9203e-05 gnorm: 1.27 [ 2:19:20<22:21:03] +[titan] 2025-10-05 00:53:39,977 - root - INFO - step: 3770 loss: 2.8227 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3370 global_avg_mtp_loss: 2.4857 +[titan] 2025-10-05 00:53:39,978 - root - INFO - lr: 4.9200e-05 gnorm: 1.19 [ 2:19:31<22:20:50] +[titan] 2025-10-05 00:53:50,879 - root - INFO - step: 3775 loss: 2.8842 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3430 global_avg_mtp_loss: 2.5411 +[titan] 2025-10-05 00:53:50,879 - root - INFO - lr: 4.9198e-05 gnorm: 1.23 [ 2:19:42<22:20:37] +[titan] 2025-10-05 00:54:01,831 - root - INFO - step: 3780 loss: 2.9375 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5862 +[titan] 2025-10-05 00:54:01,831 - root - INFO - lr: 4.9196e-05 gnorm: 1.20 [ 2:19:53<22:20:24] +[titan] 2025-10-05 00:54:12,711 - root - INFO - step: 3785 loss: 2.8747 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5318 +[titan] 2025-10-05 00:54:12,711 - root - INFO - lr: 4.9193e-05 gnorm: 1.23 [ 2:20:04<22:20:11] +[titan] 2025-10-05 00:54:23,577 - root - INFO - step: 3790 loss: 2.8207 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4833 +[titan] 2025-10-05 00:54:23,577 - root - INFO - lr: 4.9191e-05 gnorm: 1.27 [ 2:20:15<22:19:58] +[titan] 2025-10-05 00:54:34,480 - root - INFO - step: 3795 loss: 2.9584 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3532 global_avg_mtp_loss: 2.6052 +[titan] 2025-10-05 00:54:34,480 - root - INFO - lr: 4.9188e-05 gnorm: 1.29 [ 2:20:25<22:19:45] +[titan] 2025-10-05 00:54:43,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:54:45,353 - root - INFO - step: 3800 loss: 2.9385 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3511 global_avg_mtp_loss: 2.5874 +[titan] 2025-10-05 00:54:45,353 - root - INFO - lr: 4.9186e-05 gnorm: 1.24 [ 2:20:36<22:19:31] +[titan] 2025-10-05 00:54:56,214 - root - INFO - step: 3805 loss: 2.8516 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3400 global_avg_mtp_loss: 2.5116 +[titan] 2025-10-05 00:54:56,214 - root - INFO - lr: 4.9184e-05 gnorm: 1.32 [ 2:20:47<22:19:18] +[titan] 2025-10-05 00:55:07,134 - root - INFO - step: 3810 loss: 2.8608 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5196 +[titan] 2025-10-05 00:55:07,134 - root - INFO - lr: 4.9181e-05 gnorm: 1.27 [ 2:20:58<22:19:05] +[titan] 2025-10-05 00:55:18,019 - root - INFO - step: 3815 loss: 2.9132 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3495 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 00:55:18,019 - root - INFO - lr: 4.9179e-05 gnorm: 1.33 [ 2:21:09<22:18:52] +[titan] 2025-10-05 00:55:28,882 - root - INFO - step: 3820 loss: 2.8903 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3438 global_avg_mtp_loss: 2.5465 +[titan] 2025-10-05 00:55:28,882 - root - INFO - lr: 4.9176e-05 gnorm: 1.28 [ 2:21:20<22:18:39] +[titan] 2025-10-05 00:55:39,765 - root - INFO - step: 3825 loss: 2.8538 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3396 global_avg_mtp_loss: 2.5142 +[titan] 2025-10-05 00:55:39,765 - root - INFO - lr: 4.9174e-05 gnorm: 1.35 [ 2:21:31<22:18:26] +[titan] 2025-10-05 00:55:50,656 - root - INFO - step: 3830 loss: 2.8951 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5503 +[titan] 2025-10-05 00:55:50,656 - root - INFO - lr: 4.9172e-05 gnorm: 1.29 [ 2:21:42<22:18:12] +[titan] 2025-10-05 00:56:01,544 - root - INFO - step: 3835 loss: 2.8701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 00:56:01,544 - root - INFO - lr: 4.9169e-05 gnorm: 1.28 [ 2:21:53<22:17:59] +[titan] 2025-10-05 00:56:12,424 - root - INFO - step: 3840 loss: 2.8980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3459 global_avg_mtp_loss: 2.5521 +[titan] 2025-10-05 00:56:12,424 - root - INFO - lr: 4.9167e-05 gnorm: 1.29 [ 2:22:03<22:17:46] +[titan] 2025-10-05 00:56:23,350 - root - INFO - step: 3845 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:56:23,350 - root - INFO - lr: 4.9164e-05 gnorm: 1.33 [ 2:22:14<22:17:33] +[titan] 2025-10-05 00:56:32,043 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:56:34,236 - root - INFO - step: 3850 loss: 2.8817 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5379 +[titan] 2025-10-05 00:56:34,237 - root - INFO - lr: 4.9162e-05 gnorm: 1.28 [ 2:22:25<22:17:20] +[titan] 2025-10-05 00:56:45,120 - root - INFO - step: 3855 loss: 2.8016 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 00:56:45,120 - root - INFO - lr: 4.9160e-05 gnorm: 1.32 [ 2:22:36<22:17:07] +[titan] 2025-10-05 00:56:56,000 - root - INFO - step: 3860 loss: 2.8851 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 00:56:56,000 - root - INFO - lr: 4.9157e-05 gnorm: 1.29 [ 2:22:47<22:16:54] +[titan] 2025-10-05 00:57:06,896 - root - INFO - step: 3865 loss: 2.8534 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3401 global_avg_mtp_loss: 2.5132 +[titan] 2025-10-05 00:57:06,896 - root - INFO - lr: 4.9155e-05 gnorm: 1.25 [ 2:22:58<22:16:41] +[titan] 2025-10-05 00:57:17,779 - root - INFO - step: 3870 loss: 2.9197 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5707 +[titan] 2025-10-05 00:57:17,779 - root - INFO - lr: 4.9152e-05 gnorm: 1.28 [ 2:23:09<22:16:28] +[titan] 2025-10-05 00:57:28,718 - root - INFO - step: 3875 loss: 2.9466 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.3534 global_avg_mtp_loss: 2.5932 +[titan] 2025-10-05 00:57:28,718 - root - INFO - lr: 4.9150e-05 gnorm: 1.21 [ 2:23:20<22:16:15] +[titan] 2025-10-05 00:57:39,599 - root - INFO - step: 3880 loss: 2.8840 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3444 global_avg_mtp_loss: 2.5396 +[titan] 2025-10-05 00:57:39,600 - root - INFO - lr: 4.9148e-05 gnorm: 1.28 [ 2:23:31<22:16:02] +[titan] 2025-10-05 00:57:50,474 - root - INFO - step: 3885 loss: 2.9370 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3488 global_avg_mtp_loss: 2.5882 +[titan] 2025-10-05 00:57:50,474 - root - INFO - lr: 4.9145e-05 gnorm: 1.25 [ 2:23:41<22:15:49] +[titan] 2025-10-05 00:58:01,351 - root - INFO - step: 3890 loss: 2.9350 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3493 global_avg_mtp_loss: 2.5857 +[titan] 2025-10-05 00:58:01,351 - root - INFO - lr: 4.9143e-05 gnorm: 1.31 [ 2:23:52<22:15:36] +[titan] 2025-10-05 00:58:12,271 - root - INFO - step: 3895 loss: 2.9044 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5592 +[titan] 2025-10-05 00:58:12,271 - root - INFO - lr: 4.9140e-05 gnorm: 1.26 [ 2:24:03<22:15:23] +[titan] 2025-10-05 00:58:20,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:58:23,152 - root - INFO - step: 3900 loss: 2.7993 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 00:58:23,152 - root - INFO - lr: 4.9138e-05 gnorm: 1.27 [ 2:24:14<22:15:10] +[titan] 2025-10-05 00:58:34,070 - root - INFO - step: 3905 loss: 2.9356 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:58:34,070 - root - INFO - lr: 4.9135e-05 gnorm: 1.23 [ 2:24:25<22:14:57] +[titan] 2025-10-05 00:58:44,959 - root - INFO - step: 3910 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3475 global_avg_mtp_loss: 2.5678 +[titan] 2025-10-05 00:58:44,959 - root - INFO - lr: 4.9133e-05 gnorm: 1.26 [ 2:24:36<22:14:44] +[titan] 2025-10-05 00:58:55,830 - root - INFO - step: 3915 loss: 2.8401 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5019 +[titan] 2025-10-05 00:58:55,830 - root - INFO - lr: 4.9130e-05 gnorm: 1.23 [ 2:24:47<22:14:31] +[titan] 2025-10-05 00:59:06,689 - root - INFO - step: 3920 loss: 2.9547 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3597 global_avg_mtp_loss: 2.5950 +[titan] 2025-10-05 00:59:06,690 - root - INFO - lr: 4.9128e-05 gnorm: 1.24 [ 2:24:58<22:14:18] +[titan] 2025-10-05 00:59:17,583 - root - INFO - step: 3925 loss: 2.9231 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3478 global_avg_mtp_loss: 2.5753 +[titan] 2025-10-05 00:59:17,584 - root - INFO - lr: 4.9125e-05 gnorm: 1.29 [ 2:25:09<22:14:05] +[titan] 2025-10-05 00:59:28,459 - root - INFO - step: 3930 loss: 2.8642 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5227 +[titan] 2025-10-05 00:59:28,459 - root - INFO - lr: 4.9123e-05 gnorm: 1.29 [ 2:25:19<22:13:52] +[titan] 2025-10-05 00:59:39,392 - root - INFO - step: 3935 loss: 2.8806 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 00:59:39,393 - root - INFO - lr: 4.9121e-05 gnorm: 1.31 [ 2:25:30<22:13:39] +[titan] 2025-10-05 00:59:50,302 - root - INFO - step: 3940 loss: 2.9187 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.3484 global_avg_mtp_loss: 2.5703 +[titan] 2025-10-05 00:59:50,302 - root - INFO - lr: 4.9118e-05 gnorm: 1.23 [ 2:25:41<22:13:27] +[titan] 2025-10-05 01:00:01,171 - root - INFO - step: 3945 loss: 2.8435 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:00:01,172 - root - INFO - lr: 4.9116e-05 gnorm: 1.25 [ 2:25:52<22:13:13] +[titan] 2025-10-05 01:00:09,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:00:12,051 - root - INFO - step: 3950 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5446 +[titan] 2025-10-05 01:00:12,051 - root - INFO - lr: 4.9113e-05 gnorm: 1.27 [ 2:26:03<22:13:00] +[titan] 2025-10-05 01:00:22,938 - root - INFO - step: 3955 loss: 2.8946 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5492 +[titan] 2025-10-05 01:00:22,938 - root - INFO - lr: 4.9111e-05 gnorm: 1.31 [ 2:26:14<22:12:47] +[titan] 2025-10-05 01:00:33,863 - root - INFO - step: 3960 loss: 2.9358 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5868 +[titan] 2025-10-05 01:00:33,863 - root - INFO - lr: 4.9108e-05 gnorm: 1.30 [ 2:26:25<22:12:35] +[titan] 2025-10-05 01:00:44,742 - root - INFO - step: 3965 loss: 2.8537 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3415 global_avg_mtp_loss: 2.5123 +[titan] 2025-10-05 01:00:44,743 - root - INFO - lr: 4.9106e-05 gnorm: 1.24 [ 2:26:36<22:12:22] +[titan] 2025-10-05 01:00:55,669 - root - INFO - step: 3970 loss: 2.8697 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5257 +[titan] 2025-10-05 01:00:55,670 - root - INFO - lr: 4.9103e-05 gnorm: 1.26 [ 2:26:47<22:12:09] +[titan] 2025-10-05 01:01:06,531 - root - INFO - step: 3975 loss: 2.8184 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4844 +[titan] 2025-10-05 01:01:06,531 - root - INFO - lr: 4.9101e-05 gnorm: 1.26 [ 2:26:57<22:11:56] +[titan] 2025-10-05 01:01:17,435 - root - INFO - step: 3980 loss: 2.8685 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5264 +[titan] 2025-10-05 01:01:17,435 - root - INFO - lr: 4.9098e-05 gnorm: 1.32 [ 2:27:08<22:11:43] +[titan] 2025-10-05 01:01:28,313 - root - INFO - step: 3985 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3312 global_avg_mtp_loss: 2.4498 +[titan] 2025-10-05 01:01:28,313 - root - INFO - lr: 4.9096e-05 gnorm: 1.26 [ 2:27:19<22:11:30] +[titan] 2025-10-05 01:01:39,229 - root - INFO - step: 3990 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3337 global_avg_mtp_loss: 2.4610 +[titan] 2025-10-05 01:01:39,229 - root - INFO - lr: 4.9093e-05 gnorm: 1.30 [ 2:27:30<22:11:17] +[titan] 2025-10-05 01:01:50,092 - root - INFO - step: 3995 loss: 2.7943 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3322 global_avg_mtp_loss: 2.4621 +[titan] 2025-10-05 01:01:50,092 - root - INFO - lr: 4.9091e-05 gnorm: 1.21 [ 2:27:41<22:11:04] +[titan] 2025-10-05 01:01:58,771 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:02:00,958 - root - INFO - step: 4000 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5902 +[titan] 2025-10-05 01:02:00,958 - root - INFO - lr: 4.9088e-05 gnorm: 1.27 [ 2:27:52<22:10:51] +[titan] 2025-10-05 01:02:11,850 - root - INFO - step: 4005 loss: 2.8699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5271 +[titan] 2025-10-05 01:02:11,850 - root - INFO - lr: 4.9086e-05 gnorm: 1.29 [ 2:28:03<22:10:38] +[titan] 2025-10-05 01:02:22,761 - root - INFO - step: 4010 loss: 2.8862 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 01:02:22,761 - root - INFO - lr: 4.9083e-05 gnorm: 1.23 [ 2:28:14<22:10:26] +[titan] 2025-10-05 01:02:33,616 - root - INFO - step: 4015 loss: 2.8251 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.4858 +[titan] 2025-10-05 01:02:33,616 - root - INFO - lr: 4.9081e-05 gnorm: 1.23 [ 2:28:25<22:10:12] +[titan] 2025-10-05 01:02:44,524 - root - INFO - step: 4020 loss: 2.8756 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5328 +[titan] 2025-10-05 01:02:44,525 - root - INFO - lr: 4.9078e-05 gnorm: 1.23 [ 2:28:35<22:10:00] +[titan] 2025-10-05 01:02:55,396 - root - INFO - step: 4025 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3363 global_avg_mtp_loss: 2.4949 +[titan] 2025-10-05 01:02:55,396 - root - INFO - lr: 4.9076e-05 gnorm: 1.22 [ 2:28:46<22:09:47] +[titan] 2025-10-05 01:03:06,265 - root - INFO - step: 4030 loss: 2.8674 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5248 +[titan] 2025-10-05 01:03:06,265 - root - INFO - lr: 4.9073e-05 gnorm: 1.24 [ 2:28:57<22:09:34] +[titan] 2025-10-05 01:03:17,168 - root - INFO - step: 4035 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:03:17,168 - root - INFO - lr: 4.9070e-05 gnorm: 1.29 [ 2:29:08<22:09:21] +[titan] 2025-10-05 01:03:28,097 - root - INFO - step: 4040 loss: 2.8057 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3338 global_avg_mtp_loss: 2.4719 +[titan] 2025-10-05 01:03:28,098 - root - INFO - lr: 4.9068e-05 gnorm: 1.23 [ 2:29:19<22:09:08] +[titan] 2025-10-05 01:03:39,019 - root - INFO - step: 4045 loss: 2.8686 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5273 +[titan] 2025-10-05 01:03:39,019 - root - INFO - lr: 4.9065e-05 gnorm: 1.33 [ 2:29:30<22:08:56] +[titan] 2025-10-05 01:03:47,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:03:49,927 - root - INFO - step: 4050 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5088 +[titan] 2025-10-05 01:03:49,927 - root - INFO - lr: 4.9063e-05 gnorm: 1.25 [ 2:29:41<22:08:43] +[titan] 2025-10-05 01:04:00,828 - root - INFO - step: 4055 loss: 2.8040 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4693 +[titan] 2025-10-05 01:04:00,828 - root - INFO - lr: 4.9060e-05 gnorm: 1.23 [ 2:29:52<22:08:30] +[titan] 2025-10-05 01:04:11,717 - root - INFO - step: 4060 loss: 2.8008 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4673 +[titan] 2025-10-05 01:04:11,717 - root - INFO - lr: 4.9058e-05 gnorm: 1.27 [ 2:30:03<22:08:17] +[titan] 2025-10-05 01:04:22,649 - root - INFO - step: 4065 loss: 2.8860 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5433 +[titan] 2025-10-05 01:04:22,649 - root - INFO - lr: 4.9055e-05 gnorm: 1.27 [ 2:30:14<22:08:05] +[titan] 2025-10-05 01:04:33,534 - root - INFO - step: 4070 loss: 2.8482 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.5092 +[titan] 2025-10-05 01:04:33,534 - root - INFO - lr: 4.9053e-05 gnorm: 1.28 [ 2:30:24<22:07:52] +[titan] 2025-10-05 01:04:44,493 - root - INFO - step: 4075 loss: 2.7243 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.3989 +[titan] 2025-10-05 01:04:44,493 - root - INFO - lr: 4.9050e-05 gnorm: 1.28 [ 2:30:35<22:07:40] +[titan] 2025-10-05 01:04:55,369 - root - INFO - step: 4080 loss: 2.9124 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5655 +[titan] 2025-10-05 01:04:55,370 - root - INFO - lr: 4.9047e-05 gnorm: 1.24 [ 2:30:46<22:07:27] +[titan] 2025-10-05 01:05:06,228 - root - INFO - step: 4085 loss: 2.8731 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 01:05:06,228 - root - INFO - lr: 4.9045e-05 gnorm: 1.27 [ 2:30:57<22:07:14] +[titan] 2025-10-05 01:05:17,102 - root - INFO - step: 4090 loss: 2.7997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4672 +[titan] 2025-10-05 01:05:17,102 - root - INFO - lr: 4.9042e-05 gnorm: 1.28 [ 2:31:08<22:07:01] +[titan] 2025-10-05 01:05:28,059 - root - INFO - step: 4095 loss: 2.9035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5587 +[titan] 2025-10-05 01:05:28,060 - root - INFO - lr: 4.9040e-05 gnorm: 1.23 [ 2:31:19<22:06:49] +[titan] 2025-10-05 01:05:30,409 - root - INFO - Dumping profiler traces at step 4096 +[titan] 2025-10-05 01:05:30,447 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:05:37,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:05:39,213 - root - INFO - step: 4100 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 29,380 tflops: 407.60 mfu: 41.21% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4763 +[titan] 2025-10-05 01:05:39,213 - root - INFO - lr: 4.9037e-05 gnorm: 1.29 [ 2:31:30<22:06:38] +[titan] 2025-10-05 01:05:50,104 - root - INFO - step: 4105 loss: 2.8434 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5046 +[titan] 2025-10-05 01:05:50,104 - root - INFO - lr: 4.9035e-05 gnorm: 1.25 [ 2:31:41<22:06:25] +[titan] 2025-10-05 01:06:00,954 - root - INFO - step: 4110 loss: 2.8513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3410 global_avg_mtp_loss: 2.5103 +[titan] 2025-10-05 01:06:00,954 - root - INFO - lr: 4.9032e-05 gnorm: 1.30 [ 2:31:52<22:06:12] +[titan] 2025-10-05 01:06:11,792 - root - INFO - step: 4115 loss: 2.8687 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 01:06:11,793 - root - INFO - lr: 4.9029e-05 gnorm: 1.28 [ 2:32:03<22:05:59] +[titan] 2025-10-05 01:06:22,672 - root - INFO - step: 4120 loss: 2.7381 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3273 global_avg_mtp_loss: 2.4108 +[titan] 2025-10-05 01:06:22,673 - root - INFO - lr: 4.9027e-05 gnorm: 1.20 [ 2:32:14<22:05:46] +[titan] 2025-10-05 01:06:33,541 - root - INFO - step: 4125 loss: 2.8811 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 01:06:33,541 - root - INFO - lr: 4.9024e-05 gnorm: 1.27 [ 2:32:24<22:05:33] +[titan] 2025-10-05 01:06:44,458 - root - INFO - step: 4130 loss: 2.7955 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3316 global_avg_mtp_loss: 2.4639 +[titan] 2025-10-05 01:06:44,459 - root - INFO - lr: 4.9022e-05 gnorm: 1.22 [ 2:32:35<22:05:21] +[titan] 2025-10-05 01:06:55,338 - root - INFO - step: 4135 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3357 global_avg_mtp_loss: 2.4937 +[titan] 2025-10-05 01:06:55,338 - root - INFO - lr: 4.9019e-05 gnorm: 1.26 [ 2:32:46<22:05:08] +[titan] 2025-10-05 01:07:06,209 - root - INFO - step: 4140 loss: 2.8211 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3351 global_avg_mtp_loss: 2.4860 +[titan] 2025-10-05 01:07:06,209 - root - INFO - lr: 4.9016e-05 gnorm: 1.23 [ 2:32:57<22:04:55] +[titan] 2025-10-05 01:07:17,116 - root - INFO - step: 4145 loss: 2.7757 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4442 +[titan] 2025-10-05 01:07:17,116 - root - INFO - lr: 4.9014e-05 gnorm: 1.33 [ 2:33:08<22:04:42] +[titan] 2025-10-05 01:07:25,818 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:07:28,011 - root - INFO - step: 4150 loss: 2.8404 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.5032 +[titan] 2025-10-05 01:07:28,012 - root - INFO - lr: 4.9011e-05 gnorm: 1.29 [ 2:33:19<22:04:29] +[titan] 2025-10-05 01:07:38,919 - root - INFO - step: 4155 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5380 +[titan] 2025-10-05 01:07:38,919 - root - INFO - lr: 4.9009e-05 gnorm: 1.22 [ 2:33:30<22:04:17] +[titan] 2025-10-05 01:07:49,794 - root - INFO - step: 4160 loss: 2.8305 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3386 global_avg_mtp_loss: 2.4919 +[titan] 2025-10-05 01:07:49,794 - root - INFO - lr: 4.9006e-05 gnorm: 1.23 [ 2:33:41<22:04:04] +[titan] 2025-10-05 01:08:00,715 - root - INFO - step: 4165 loss: 2.7568 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4286 +[titan] 2025-10-05 01:08:00,715 - root - INFO - lr: 4.9003e-05 gnorm: 1.22 [ 2:33:52<22:03:52] +[titan] 2025-10-05 01:08:11,575 - root - INFO - step: 4170 loss: 2.8449 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.3395 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:08:11,575 - root - INFO - lr: 4.9001e-05 gnorm: 1.22 [ 2:34:03<22:03:39] +[titan] 2025-10-05 01:08:22,448 - root - INFO - step: 4175 loss: 2.8005 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3330 global_avg_mtp_loss: 2.4675 +[titan] 2025-10-05 01:08:22,448 - root - INFO - lr: 4.8998e-05 gnorm: 1.22 [ 2:34:13<22:03:26] +[titan] 2025-10-05 01:08:33,314 - root - INFO - step: 4180 loss: 2.7794 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4468 +[titan] 2025-10-05 01:08:33,314 - root - INFO - lr: 4.8995e-05 gnorm: 1.18 [ 2:34:24<22:03:13] +[titan] 2025-10-05 01:08:44,215 - root - INFO - step: 4185 loss: 2.8110 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3342 global_avg_mtp_loss: 2.4768 +[titan] 2025-10-05 01:08:44,215 - root - INFO - lr: 4.8993e-05 gnorm: 1.25 [ 2:34:35<22:03:00] +[titan] 2025-10-05 01:08:55,079 - root - INFO - step: 4190 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4689 +[titan] 2025-10-05 01:08:55,079 - root - INFO - lr: 4.8990e-05 gnorm: 1.20 [ 2:34:46<22:02:47] +[titan] 2025-10-05 01:09:05,968 - root - INFO - step: 4195 loss: 2.7893 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3318 global_avg_mtp_loss: 2.4575 +[titan] 2025-10-05 01:09:05,968 - root - INFO - lr: 4.8987e-05 gnorm: 1.27 [ 2:34:57<22:02:34] +[titan] 2025-10-05 01:09:14,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:09:16,867 - root - INFO - step: 4200 loss: 2.8001 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 01:09:16,867 - root - INFO - lr: 4.8985e-05 gnorm: 1.37 [ 2:35:08<22:02:22] +[titan] 2025-10-05 01:09:27,758 - root - INFO - step: 4205 loss: 2.8414 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5036 +[titan] 2025-10-05 01:09:27,758 - root - INFO - lr: 4.8982e-05 gnorm: 1.27 [ 2:35:19<22:02:09] +[titan] 2025-10-05 01:09:38,614 - root - INFO - step: 4210 loss: 2.8082 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4739 +[titan] 2025-10-05 01:09:38,614 - root - INFO - lr: 4.8980e-05 gnorm: 1.21 [ 2:35:30<22:01:56] +[titan] 2025-10-05 01:09:49,535 - root - INFO - step: 4215 loss: 2.8257 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4886 +[titan] 2025-10-05 01:09:49,535 - root - INFO - lr: 4.8977e-05 gnorm: 1.25 [ 2:35:40<22:01:44] +[titan] 2025-10-05 01:10:00,451 - root - INFO - step: 4220 loss: 2.8238 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3391 global_avg_mtp_loss: 2.4847 +[titan] 2025-10-05 01:10:00,451 - root - INFO - lr: 4.8974e-05 gnorm: 1.27 [ 2:35:51<22:01:31] +[titan] 2025-10-05 01:10:11,409 - root - INFO - step: 4225 loss: 2.7720 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4420 +[titan] 2025-10-05 01:10:11,409 - root - INFO - lr: 4.8972e-05 gnorm: 1.25 [ 2:36:02<22:01:19] +[titan] 2025-10-05 01:10:22,330 - root - INFO - step: 4230 loss: 2.8335 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3375 global_avg_mtp_loss: 2.4961 +[titan] 2025-10-05 01:10:22,330 - root - INFO - lr: 4.8969e-05 gnorm: 1.22 [ 2:36:13<22:01:07] +[titan] 2025-10-05 01:10:33,205 - root - INFO - step: 4235 loss: 2.9402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5879 +[titan] 2025-10-05 01:10:33,205 - root - INFO - lr: 4.8966e-05 gnorm: 1.26 [ 2:36:24<22:00:54] +[titan] 2025-10-05 01:10:44,111 - root - INFO - step: 4240 loss: 2.8115 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4770 +[titan] 2025-10-05 01:10:44,111 - root - INFO - lr: 4.8964e-05 gnorm: 1.23 [ 2:36:35<22:00:41] +[titan] 2025-10-05 01:10:54,992 - root - INFO - step: 4245 loss: 2.7621 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4328 +[titan] 2025-10-05 01:10:54,993 - root - INFO - lr: 4.8961e-05 gnorm: 1.25 [ 2:36:46<22:00:28] +[titan] 2025-10-05 01:11:03,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:11:05,860 - root - INFO - step: 4250 loss: 2.7919 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:11:05,860 - root - INFO - lr: 4.8958e-05 gnorm: 1.34 [ 2:36:57<22:00:16] +[titan] 2025-10-05 01:11:16,750 - root - INFO - step: 4255 loss: 2.8769 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 01:11:16,750 - root - INFO - lr: 4.8955e-05 gnorm: 1.23 [ 2:37:08<22:00:03] +[titan] 2025-10-05 01:11:27,682 - root - INFO - step: 4260 loss: 2.8447 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5059 +[titan] 2025-10-05 01:11:27,682 - root - INFO - lr: 4.8953e-05 gnorm: 1.29 [ 2:37:19<21:59:51] +[titan] 2025-10-05 01:11:38,566 - root - INFO - step: 4265 loss: 2.8553 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3402 global_avg_mtp_loss: 2.5151 +[titan] 2025-10-05 01:11:38,566 - root - INFO - lr: 4.8950e-05 gnorm: 1.28 [ 2:37:29<21:59:38] +[titan] 2025-10-05 01:11:49,489 - root - INFO - step: 4270 loss: 2.8265 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:11:49,489 - root - INFO - lr: 4.8947e-05 gnorm: 1.23 [ 2:37:40<21:59:25] +[titan] 2025-10-05 01:12:00,379 - root - INFO - step: 4275 loss: 2.7626 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3291 global_avg_mtp_loss: 2.4335 +[titan] 2025-10-05 01:12:00,379 - root - INFO - lr: 4.8945e-05 gnorm: 1.23 [ 2:37:51<21:59:13] +[titan] 2025-10-05 01:12:11,266 - root - INFO - step: 4280 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4955 +[titan] 2025-10-05 01:12:11,266 - root - INFO - lr: 4.8942e-05 gnorm: 1.25 [ 2:38:02<21:59:00] +[titan] 2025-10-05 01:12:22,135 - root - INFO - step: 4285 loss: 2.8353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3380 global_avg_mtp_loss: 2.4973 +[titan] 2025-10-05 01:12:22,135 - root - INFO - lr: 4.8939e-05 gnorm: 1.27 [ 2:38:13<21:58:47] +[titan] 2025-10-05 01:12:33,063 - root - INFO - step: 4290 loss: 2.7796 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4469 +[titan] 2025-10-05 01:12:33,064 - root - INFO - lr: 4.8937e-05 gnorm: 1.31 [ 2:38:24<21:58:35] +[titan] 2025-10-05 01:12:43,959 - root - INFO - step: 4295 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4492 +[titan] 2025-10-05 01:12:43,959 - root - INFO - lr: 4.8934e-05 gnorm: 1.37 [ 2:38:35<21:58:22] +[titan] 2025-10-05 01:12:52,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:12:54,832 - root - INFO - step: 4300 loss: 2.9113 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 01:12:54,832 - root - INFO - lr: 4.8931e-05 gnorm: 1.32 [ 2:38:46<21:58:10] +[titan] 2025-10-05 01:13:05,696 - root - INFO - step: 4305 loss: 2.8427 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:13:05,696 - root - INFO - lr: 4.8928e-05 gnorm: 1.29 [ 2:38:57<21:57:57] +[titan] 2025-10-05 01:13:16,559 - root - INFO - step: 4310 loss: 2.8552 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5127 +[titan] 2025-10-05 01:13:16,559 - root - INFO - lr: 4.8926e-05 gnorm: 1.25 [ 2:39:07<21:57:44] +[titan] 2025-10-05 01:13:27,434 - root - INFO - step: 4315 loss: 2.7587 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:13:27,434 - root - INFO - lr: 4.8923e-05 gnorm: 1.28 [ 2:39:18<21:57:31] +[titan] 2025-10-05 01:13:38,295 - root - INFO - step: 4320 loss: 2.8361 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3373 global_avg_mtp_loss: 2.4988 +[titan] 2025-10-05 01:13:38,295 - root - INFO - lr: 4.8920e-05 gnorm: 1.33 [ 2:39:29<21:57:18] +[titan] 2025-10-05 01:13:49,212 - root - INFO - step: 4325 loss: 2.8809 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5381 +[titan] 2025-10-05 01:13:49,212 - root - INFO - lr: 4.8918e-05 gnorm: 1.32 [ 2:39:40<21:57:06] +[titan] 2025-10-05 01:14:00,073 - root - INFO - step: 4330 loss: 2.8655 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5232 +[titan] 2025-10-05 01:14:00,073 - root - INFO - lr: 4.8915e-05 gnorm: 1.25 [ 2:39:51<21:56:53] +[titan] 2025-10-05 01:14:10,949 - root - INFO - step: 4335 loss: 2.8077 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4742 +[titan] 2025-10-05 01:14:10,949 - root - INFO - lr: 4.8912e-05 gnorm: 1.25 [ 2:40:02<21:56:40] +[titan] 2025-10-05 01:14:21,868 - root - INFO - step: 4340 loss: 2.8223 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3346 global_avg_mtp_loss: 2.4877 +[titan] 2025-10-05 01:14:21,868 - root - INFO - lr: 4.8909e-05 gnorm: 1.21 [ 2:40:13<21:56:28] +[titan] 2025-10-05 01:14:32,754 - root - INFO - step: 4345 loss: 2.8555 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3431 global_avg_mtp_loss: 2.5124 +[titan] 2025-10-05 01:14:32,754 - root - INFO - lr: 4.8907e-05 gnorm: 1.26 [ 2:40:24<21:56:15] +[titan] 2025-10-05 01:14:41,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:14:43,631 - root - INFO - step: 4350 loss: 2.7309 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 01:14:43,631 - root - INFO - lr: 4.8904e-05 gnorm: 1.21 [ 2:40:35<21:56:03] +[titan] 2025-10-05 01:14:54,554 - root - INFO - step: 4355 loss: 2.7817 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:14:54,555 - root - INFO - lr: 4.8901e-05 gnorm: 1.31 [ 2:40:45<21:55:50] +[titan] 2025-10-05 01:15:05,471 - root - INFO - step: 4360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:15:05,471 - root - INFO - lr: 4.8898e-05 gnorm: 1.18 [ 2:40:56<21:55:38] +[titan] 2025-10-05 01:15:16,353 - root - INFO - step: 4365 loss: 2.7543 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3265 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:15:16,353 - root - INFO - lr: 4.8896e-05 gnorm: 1.34 [ 2:41:07<21:55:25] +[titan] 2025-10-05 01:15:27,221 - root - INFO - step: 4370 loss: 2.8151 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3349 global_avg_mtp_loss: 2.4802 +[titan] 2025-10-05 01:15:27,222 - root - INFO - lr: 4.8893e-05 gnorm: 1.33 [ 2:41:18<21:55:13] +[titan] 2025-10-05 01:15:38,092 - root - INFO - step: 4375 loss: 2.8402 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:15:38,092 - root - INFO - lr: 4.8890e-05 gnorm: 1.24 [ 2:41:29<21:55:00] +[titan] 2025-10-05 01:15:48,973 - root - INFO - step: 4380 loss: 2.7636 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4347 +[titan] 2025-10-05 01:15:48,973 - root - INFO - lr: 4.8887e-05 gnorm: 1.28 [ 2:41:40<21:54:47] +[titan] 2025-10-05 01:15:59,862 - root - INFO - step: 4385 loss: 2.7822 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4512 +[titan] 2025-10-05 01:15:59,863 - root - INFO - lr: 4.8884e-05 gnorm: 1.22 [ 2:41:51<21:54:35] +[titan] 2025-10-05 01:16:10,768 - root - INFO - step: 4390 loss: 2.8774 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5334 +[titan] 2025-10-05 01:16:10,768 - root - INFO - lr: 4.8882e-05 gnorm: 1.31 [ 2:42:02<21:54:22] +[titan] 2025-10-05 01:16:21,633 - root - INFO - step: 4395 loss: 2.7736 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4446 +[titan] 2025-10-05 01:16:21,633 - root - INFO - lr: 4.8879e-05 gnorm: 1.27 [ 2:42:13<21:54:09] +[titan] 2025-10-05 01:16:30,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:16:32,515 - root - INFO - step: 4400 loss: 2.8412 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5024 +[titan] 2025-10-05 01:16:32,515 - root - INFO - lr: 4.8876e-05 gnorm: 1.24 [ 2:42:23<21:53:57] +[titan] 2025-10-05 01:16:43,378 - root - INFO - step: 4405 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4765 +[titan] 2025-10-05 01:16:43,378 - root - INFO - lr: 4.8873e-05 gnorm: 1.25 [ 2:42:34<21:53:44] +[titan] 2025-10-05 01:16:54,311 - root - INFO - step: 4410 loss: 2.7984 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3341 global_avg_mtp_loss: 2.4642 +[titan] 2025-10-05 01:16:54,312 - root - INFO - lr: 4.8871e-05 gnorm: 1.22 [ 2:42:45<21:53:32] +[titan] 2025-10-05 01:17:05,164 - root - INFO - step: 4415 loss: 2.7761 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3306 global_avg_mtp_loss: 2.4455 +[titan] 2025-10-05 01:17:05,164 - root - INFO - lr: 4.8868e-05 gnorm: 1.24 [ 2:42:56<21:53:19] +[titan] 2025-10-05 01:17:16,059 - root - INFO - step: 4420 loss: 2.8777 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5351 +[titan] 2025-10-05 01:17:16,059 - root - INFO - lr: 4.8865e-05 gnorm: 1.27 [ 2:43:07<21:53:06] +[titan] 2025-10-05 01:17:26,943 - root - INFO - step: 4425 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4104 +[titan] 2025-10-05 01:17:26,943 - root - INFO - lr: 4.8862e-05 gnorm: 1.25 [ 2:43:18<21:52:54] +[titan] 2025-10-05 01:17:37,810 - root - INFO - step: 4430 loss: 2.8315 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:17:37,810 - root - INFO - lr: 4.8859e-05 gnorm: 1.24 [ 2:43:29<21:52:41] +[titan] 2025-10-05 01:17:48,674 - root - INFO - step: 4435 loss: 2.7874 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4564 +[titan] 2025-10-05 01:17:48,674 - root - INFO - lr: 4.8857e-05 gnorm: 1.29 [ 2:43:40<21:52:28] +[titan] 2025-10-05 01:17:59,549 - root - INFO - step: 4440 loss: 2.7652 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4365 +[titan] 2025-10-05 01:17:59,549 - root - INFO - lr: 4.8854e-05 gnorm: 1.25 [ 2:43:50<21:52:16] +[titan] 2025-10-05 01:18:10,464 - root - INFO - step: 4445 loss: 2.7634 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4344 +[titan] 2025-10-05 01:18:10,464 - root - INFO - lr: 4.8851e-05 gnorm: 1.21 [ 2:44:01<21:52:03] +[titan] 2025-10-05 01:18:19,182 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:18:21,402 - root - INFO - step: 4450 loss: 2.8198 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.3358 global_avg_mtp_loss: 2.4839 +[titan] 2025-10-05 01:18:21,402 - root - INFO - lr: 4.8848e-05 gnorm: 1.25 [ 2:44:12<21:51:51] +[titan] 2025-10-05 01:18:32,290 - root - INFO - step: 4455 loss: 2.8002 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4666 +[titan] 2025-10-05 01:18:32,290 - root - INFO - lr: 4.8845e-05 gnorm: 1.21 [ 2:44:23<21:51:39] +[titan] 2025-10-05 01:18:43,182 - root - INFO - step: 4460 loss: 2.7924 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:18:43,183 - root - INFO - lr: 4.8842e-05 gnorm: 1.17 [ 2:44:34<21:51:26] +[titan] 2025-10-05 01:18:54,107 - root - INFO - step: 4465 loss: 2.8210 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3364 global_avg_mtp_loss: 2.4846 +[titan] 2025-10-05 01:18:54,107 - root - INFO - lr: 4.8840e-05 gnorm: 1.23 [ 2:44:45<21:51:14] +[titan] 2025-10-05 01:19:04,974 - root - INFO - step: 4470 loss: 2.7913 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4596 +[titan] 2025-10-05 01:19:04,974 - root - INFO - lr: 4.8837e-05 gnorm: 1.21 [ 2:44:56<21:51:01] +[titan] 2025-10-05 01:19:15,845 - root - INFO - step: 4475 loss: 2.8258 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3360 global_avg_mtp_loss: 2.4898 +[titan] 2025-10-05 01:19:15,846 - root - INFO - lr: 4.8834e-05 gnorm: 1.28 [ 2:45:07<21:50:49] +[titan] 2025-10-05 01:19:26,715 - root - INFO - step: 4480 loss: 2.7821 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:19:26,715 - root - INFO - lr: 4.8831e-05 gnorm: 1.29 [ 2:45:18<21:50:36] +[titan] 2025-10-05 01:19:37,611 - root - INFO - step: 4485 loss: 2.8154 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4798 +[titan] 2025-10-05 01:19:37,611 - root - INFO - lr: 4.8828e-05 gnorm: 1.28 [ 2:45:29<21:50:24] +[titan] 2025-10-05 01:19:48,473 - root - INFO - step: 4490 loss: 2.7910 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3319 global_avg_mtp_loss: 2.4591 +[titan] 2025-10-05 01:19:48,474 - root - INFO - lr: 4.8825e-05 gnorm: 1.39 [ 2:45:39<21:50:11] +[titan] 2025-10-05 01:19:59,363 - root - INFO - step: 4495 loss: 2.7586 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4304 +[titan] 2025-10-05 01:19:59,363 - root - INFO - lr: 4.8823e-05 gnorm: 1.26 [ 2:45:50<21:49:58] +[titan] 2025-10-05 01:20:08,035 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:20:10,224 - root - INFO - step: 4500 loss: 2.8484 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.5091 +[titan] 2025-10-05 01:20:10,224 - root - INFO - lr: 4.8820e-05 gnorm: 1.25 [ 2:46:01<21:49:46] +[titan] 2025-10-05 01:20:21,077 - root - INFO - step: 4505 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3816 +[titan] 2025-10-05 01:20:21,077 - root - INFO - lr: 4.8817e-05 gnorm: 1.25 [ 2:46:12<21:49:33] +[titan] 2025-10-05 01:20:31,932 - root - INFO - step: 4510 loss: 2.8270 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3376 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:20:31,932 - root - INFO - lr: 4.8814e-05 gnorm: 1.26 [ 2:46:23<21:49:20] +[titan] 2025-10-05 01:20:42,845 - root - INFO - step: 4515 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4207 +[titan] 2025-10-05 01:20:42,845 - root - INFO - lr: 4.8811e-05 gnorm: 1.20 [ 2:46:34<21:49:08] +[titan] 2025-10-05 01:20:53,800 - root - INFO - step: 4520 loss: 2.8244 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4888 +[titan] 2025-10-05 01:20:53,800 - root - INFO - lr: 4.8808e-05 gnorm: 1.37 [ 2:46:45<21:48:56] +[titan] 2025-10-05 01:21:04,708 - root - INFO - step: 4525 loss: 2.7186 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3964 +[titan] 2025-10-05 01:21:04,708 - root - INFO - lr: 4.8805e-05 gnorm: 1.27 [ 2:46:56<21:48:44] +[titan] 2025-10-05 01:21:15,602 - root - INFO - step: 4530 loss: 2.7206 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3236 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:21:15,602 - root - INFO - lr: 4.8803e-05 gnorm: 1.23 [ 2:47:07<21:48:31] +[titan] 2025-10-05 01:21:26,498 - root - INFO - step: 4535 loss: 2.7518 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3249 global_avg_mtp_loss: 2.4269 +[titan] 2025-10-05 01:21:26,498 - root - INFO - lr: 4.8800e-05 gnorm: 1.35 [ 2:47:17<21:48:19] +[titan] 2025-10-05 01:21:37,376 - root - INFO - step: 4540 loss: 2.7814 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3301 global_avg_mtp_loss: 2.4513 +[titan] 2025-10-05 01:21:37,376 - root - INFO - lr: 4.8797e-05 gnorm: 1.23 [ 2:47:28<21:48:06] +[titan] 2025-10-05 01:21:48,331 - root - INFO - step: 4545 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4487 +[titan] 2025-10-05 01:21:48,331 - root - INFO - lr: 4.8794e-05 gnorm: 1.25 [ 2:47:39<21:47:54] +[titan] 2025-10-05 01:21:57,058 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:21:59,248 - root - INFO - step: 4550 loss: 2.8483 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 01:21:59,249 - root - INFO - lr: 4.8791e-05 gnorm: 1.26 [ 2:47:50<21:47:42] +[titan] 2025-10-05 01:22:10,102 - root - INFO - step: 4555 loss: 2.7389 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.4138 +[titan] 2025-10-05 01:22:10,103 - root - INFO - lr: 4.8788e-05 gnorm: 1.20 [ 2:48:01<21:47:29] +[titan] 2025-10-05 01:22:20,974 - root - INFO - step: 4560 loss: 2.7847 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:22:20,974 - root - INFO - lr: 4.8785e-05 gnorm: 1.21 [ 2:48:12<21:47:17] +[titan] 2025-10-05 01:22:31,853 - root - INFO - step: 4565 loss: 2.7537 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:22:31,854 - root - INFO - lr: 4.8782e-05 gnorm: 1.27 [ 2:48:23<21:47:04] +[titan] 2025-10-05 01:22:42,729 - root - INFO - step: 4570 loss: 2.6580 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 01:22:42,729 - root - INFO - lr: 4.8779e-05 gnorm: 1.26 [ 2:48:34<21:46:52] +[titan] 2025-10-05 01:22:53,792 - root - INFO - step: 4575 loss: 2.8422 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.3385 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:22:53,792 - root - INFO - lr: 4.8777e-05 gnorm: 1.26 [ 2:48:45<21:46:41] +[titan] 2025-10-05 01:23:04,721 - root - INFO - step: 4580 loss: 2.6906 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3732 +[titan] 2025-10-05 01:23:04,721 - root - INFO - lr: 4.8774e-05 gnorm: 1.18 [ 2:48:56<21:46:28] +[titan] 2025-10-05 01:23:15,616 - root - INFO - step: 4585 loss: 2.7509 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:23:15,616 - root - INFO - lr: 4.8771e-05 gnorm: 1.23 [ 2:49:07<21:46:16] +[titan] 2025-10-05 01:23:26,529 - root - INFO - step: 4590 loss: 2.7868 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4570 +[titan] 2025-10-05 01:23:26,530 - root - INFO - lr: 4.8768e-05 gnorm: 1.24 [ 2:49:17<21:46:04] +[titan] 2025-10-05 01:23:37,394 - root - INFO - step: 4595 loss: 2.7525 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3274 global_avg_mtp_loss: 2.4251 +[titan] 2025-10-05 01:23:37,394 - root - INFO - lr: 4.8765e-05 gnorm: 1.22 [ 2:49:28<21:45:51] +[titan] 2025-10-05 01:23:46,091 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:23:48,293 - root - INFO - step: 4600 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4209 +[titan] 2025-10-05 01:23:48,294 - root - INFO - lr: 4.8762e-05 gnorm: 1.18 [ 2:49:39<21:45:39] +[titan] 2025-10-05 01:23:59,314 - root - INFO - step: 4605 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 29,734 tflops: 412.52 mfu: 41.71% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.5076 +[titan] 2025-10-05 01:23:59,314 - root - INFO - lr: 4.8759e-05 gnorm: 1.23 [ 2:49:50<21:45:27] +[titan] 2025-10-05 01:24:06,011 - root - INFO - Dumping profiler traces at step 4608 +[titan] 2025-10-05 01:24:06,047 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:24:10,470 - root - INFO - step: 4610 loss: 2.7849 memory: 118.84GiB(85.28%) tps: 29,373 tflops: 407.50 mfu: 41.20% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4534 +[titan] 2025-10-05 01:24:10,471 - root - INFO - lr: 4.8756e-05 gnorm: 1.28 [ 2:50:01<21:45:17] +[titan] 2025-10-05 01:24:21,351 - root - INFO - step: 4615 loss: 2.7549 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4283 +[titan] 2025-10-05 01:24:21,351 - root - INFO - lr: 4.8753e-05 gnorm: 1.21 [ 2:50:12<21:45:05] +[titan] 2025-10-05 01:24:32,230 - root - INFO - step: 4620 loss: 2.6761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3586 +[titan] 2025-10-05 01:24:32,230 - root - INFO - lr: 4.8750e-05 gnorm: 1.22 [ 2:50:23<21:44:52] +[titan] 2025-10-05 01:24:43,126 - root - INFO - step: 4625 loss: 2.6974 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:24:43,126 - root - INFO - lr: 4.8747e-05 gnorm: 1.18 [ 2:50:34<21:44:40] +[titan] 2025-10-05 01:24:54,032 - root - INFO - step: 4630 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3412 global_avg_mtp_loss: 2.5238 +[titan] 2025-10-05 01:24:54,032 - root - INFO - lr: 4.8744e-05 gnorm: 1.24 [ 2:50:45<21:44:27] +[titan] 2025-10-05 01:25:04,940 - root - INFO - step: 4635 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4155 +[titan] 2025-10-05 01:25:04,940 - root - INFO - lr: 4.8741e-05 gnorm: 1.21 [ 2:50:56<21:44:15] +[titan] 2025-10-05 01:25:15,817 - root - INFO - step: 4640 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3562 +[titan] 2025-10-05 01:25:15,817 - root - INFO - lr: 4.8739e-05 gnorm: 1.31 [ 2:51:07<21:44:03] +[titan] 2025-10-05 01:25:26,720 - root - INFO - step: 4645 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3352 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:25:26,721 - root - INFO - lr: 4.8736e-05 gnorm: 1.23 [ 2:51:18<21:43:50] +[titan] 2025-10-05 01:25:35,459 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:25:37,643 - root - INFO - step: 4650 loss: 2.6937 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3193 global_avg_mtp_loss: 2.3743 +[titan] 2025-10-05 01:25:37,643 - root - INFO - lr: 4.8733e-05 gnorm: 1.23 [ 2:51:29<21:43:38] +[titan] 2025-10-05 01:25:48,525 - root - INFO - step: 4655 loss: 2.7402 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4144 +[titan] 2025-10-05 01:25:48,525 - root - INFO - lr: 4.8730e-05 gnorm: 1.22 [ 2:51:39<21:43:26] +[titan] 2025-10-05 01:25:59,422 - root - INFO - step: 4660 loss: 2.7820 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4520 +[titan] 2025-10-05 01:25:59,422 - root - INFO - lr: 4.8727e-05 gnorm: 1.30 [ 2:51:50<21:43:14] +[titan] 2025-10-05 01:26:10,311 - root - INFO - step: 4665 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3392 global_avg_mtp_loss: 2.5074 +[titan] 2025-10-05 01:26:10,311 - root - INFO - lr: 4.8724e-05 gnorm: 1.25 [ 2:52:01<21:43:01] +[titan] 2025-10-05 01:26:21,210 - root - INFO - step: 4670 loss: 2.7305 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4066 +[titan] 2025-10-05 01:26:21,210 - root - INFO - lr: 4.8721e-05 gnorm: 1.25 [ 2:52:12<21:42:49] +[titan] 2025-10-05 01:26:32,122 - root - INFO - step: 4675 loss: 2.7530 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4264 +[titan] 2025-10-05 01:26:32,122 - root - INFO - lr: 4.8718e-05 gnorm: 1.25 [ 2:52:23<21:42:37] +[titan] 2025-10-05 01:26:43,055 - root - INFO - step: 4680 loss: 2.8067 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4732 +[titan] 2025-10-05 01:26:43,055 - root - INFO - lr: 4.8715e-05 gnorm: 1.24 [ 2:52:34<21:42:25] +[titan] 2025-10-05 01:26:53,990 - root - INFO - step: 4685 loss: 2.6707 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 01:26:53,990 - root - INFO - lr: 4.8712e-05 gnorm: 1.36 [ 2:52:45<21:42:13] +[titan] 2025-10-05 01:27:04,906 - root - INFO - step: 4690 loss: 2.7149 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3214 global_avg_mtp_loss: 2.3935 +[titan] 2025-10-05 01:27:04,906 - root - INFO - lr: 4.8709e-05 gnorm: 1.23 [ 2:52:56<21:42:00] +[titan] 2025-10-05 01:27:15,817 - root - INFO - step: 4695 loss: 2.6965 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3761 +[titan] 2025-10-05 01:27:15,817 - root - INFO - lr: 4.8706e-05 gnorm: 1.25 [ 2:53:07<21:41:48] +[titan] 2025-10-05 01:27:24,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:27:26,735 - root - INFO - step: 4700 loss: 2.7982 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 01:27:26,735 - root - INFO - lr: 4.8703e-05 gnorm: 1.22 [ 2:53:18<21:41:36] +[titan] 2025-10-05 01:27:37,672 - root - INFO - step: 4705 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3443 global_avg_mtp_loss: 2.5494 +[titan] 2025-10-05 01:27:37,673 - root - INFO - lr: 4.8700e-05 gnorm: 1.26 [ 2:53:29<21:41:24] +[titan] 2025-10-05 01:27:48,615 - root - INFO - step: 4710 loss: 2.7471 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.3269 global_avg_mtp_loss: 2.4201 +[titan] 2025-10-05 01:27:48,615 - root - INFO - lr: 4.8697e-05 gnorm: 1.21 [ 2:53:40<21:41:12] +[titan] 2025-10-05 01:27:59,548 - root - INFO - step: 4715 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.3271 global_avg_mtp_loss: 2.4303 +[titan] 2025-10-05 01:27:59,548 - root - INFO - lr: 4.8694e-05 gnorm: 1.22 [ 2:53:50<21:41:00] +[titan] 2025-10-05 01:28:10,470 - root - INFO - step: 4720 loss: 2.8297 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3377 global_avg_mtp_loss: 2.4920 +[titan] 2025-10-05 01:28:10,471 - root - INFO - lr: 4.8691e-05 gnorm: 1.25 [ 2:54:01<21:40:48] +[titan] 2025-10-05 01:28:21,389 - root - INFO - step: 4725 loss: 2.8079 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4747 +[titan] 2025-10-05 01:28:21,389 - root - INFO - lr: 4.8688e-05 gnorm: 1.25 [ 2:54:12<21:40:36] +[titan] 2025-10-05 01:28:32,287 - root - INFO - step: 4730 loss: 2.7460 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3255 global_avg_mtp_loss: 2.4205 +[titan] 2025-10-05 01:28:32,287 - root - INFO - lr: 4.8685e-05 gnorm: 1.27 [ 2:54:23<21:40:24] +[titan] 2025-10-05 01:28:43,251 - root - INFO - step: 4735 loss: 2.6848 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.64 mfu: 41.92% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3668 +[titan] 2025-10-05 01:28:43,252 - root - INFO - lr: 4.8682e-05 gnorm: 1.24 [ 2:54:34<21:40:12] +[titan] 2025-10-05 01:28:54,171 - root - INFO - step: 4740 loss: 2.7918 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4618 +[titan] 2025-10-05 01:28:54,171 - root - INFO - lr: 4.8679e-05 gnorm: 1.32 [ 2:54:45<21:40:00] +[titan] 2025-10-05 01:29:05,077 - root - INFO - step: 4745 loss: 2.7361 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4131 +[titan] 2025-10-05 01:29:05,077 - root - INFO - lr: 4.8676e-05 gnorm: 1.29 [ 2:54:56<21:39:47] +[titan] 2025-10-05 01:29:13,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:29:15,984 - root - INFO - step: 4750 loss: 2.7499 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:29:15,984 - root - INFO - lr: 4.8673e-05 gnorm: 1.26 [ 2:55:07<21:39:35] +[titan] 2025-10-05 01:29:26,874 - root - INFO - step: 4755 loss: 2.7721 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3285 global_avg_mtp_loss: 2.4435 +[titan] 2025-10-05 01:29:26,874 - root - INFO - lr: 4.8670e-05 gnorm: 1.19 [ 2:55:18<21:39:23] +[titan] 2025-10-05 01:29:37,761 - root - INFO - step: 4760 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3353 global_avg_mtp_loss: 2.4595 +[titan] 2025-10-05 01:29:37,761 - root - INFO - lr: 4.8667e-05 gnorm: 1.22 [ 2:55:29<21:39:11] +[titan] 2025-10-05 01:29:48,663 - root - INFO - step: 4765 loss: 2.7250 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3240 global_avg_mtp_loss: 2.4010 +[titan] 2025-10-05 01:29:48,664 - root - INFO - lr: 4.8664e-05 gnorm: 1.28 [ 2:55:40<21:38:58] +[titan] 2025-10-05 01:29:59,563 - root - INFO - step: 4770 loss: 2.7157 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3221 global_avg_mtp_loss: 2.3936 +[titan] 2025-10-05 01:29:59,563 - root - INFO - lr: 4.8661e-05 gnorm: 2.78 [ 2:55:50<21:38:46] +[titan] 2025-10-05 01:30:10,469 - root - INFO - step: 4775 loss: 2.8036 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4701 +[titan] 2025-10-05 01:30:10,469 - root - INFO - lr: 4.8658e-05 gnorm: 1.25 [ 2:56:01<21:38:34] +[titan] 2025-10-05 01:30:21,348 - root - INFO - step: 4780 loss: 2.7215 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:30:21,348 - root - INFO - lr: 4.8655e-05 gnorm: 1.38 [ 2:56:12<21:38:21] +[titan] 2025-10-05 01:30:32,231 - root - INFO - step: 4785 loss: 2.7709 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4425 +[titan] 2025-10-05 01:30:32,231 - root - INFO - lr: 4.8652e-05 gnorm: 1.21 [ 2:56:23<21:38:09] +[titan] 2025-10-05 01:30:43,113 - root - INFO - step: 4790 loss: 2.7171 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.3934 +[titan] 2025-10-05 01:30:43,113 - root - INFO - lr: 4.8649e-05 gnorm: 1.19 [ 2:56:34<21:37:57] +[titan] 2025-10-05 01:30:54,053 - root - INFO - step: 4795 loss: 2.8155 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.4731 +[titan] 2025-10-05 01:30:54,054 - root - INFO - lr: 4.8646e-05 gnorm: 1.20 [ 2:56:45<21:37:45] +[titan] 2025-10-05 01:31:02,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:31:04,944 - root - INFO - step: 4800 loss: 2.7229 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4004 +[titan] 2025-10-05 01:31:04,944 - root - INFO - lr: 4.8643e-05 gnorm: 1.27 [ 2:56:56<21:37:33] +[titan] 2025-10-05 01:31:15,845 - root - INFO - step: 4805 loss: 2.7633 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4354 +[titan] 2025-10-05 01:31:15,845 - root - INFO - lr: 4.8639e-05 gnorm: 1.30 [ 2:57:07<21:37:20] +[titan] 2025-10-05 01:31:26,718 - root - INFO - step: 4810 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 01:31:26,718 - root - INFO - lr: 4.8636e-05 gnorm: 1.23 [ 2:57:18<21:37:08] +[titan] 2025-10-05 01:31:37,587 - root - INFO - step: 4815 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4100 +[titan] 2025-10-05 01:31:37,587 - root - INFO - lr: 4.8633e-05 gnorm: 1.25 [ 2:57:28<21:36:55] +[titan] 2025-10-05 01:31:48,487 - root - INFO - step: 4820 loss: 2.7752 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4454 +[titan] 2025-10-05 01:31:48,487 - root - INFO - lr: 4.8630e-05 gnorm: 1.24 [ 2:57:39<21:36:43] +[titan] 2025-10-05 01:31:59,366 - root - INFO - step: 4825 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3299 global_avg_mtp_loss: 2.4440 +[titan] 2025-10-05 01:31:59,366 - root - INFO - lr: 4.8627e-05 gnorm: 1.27 [ 2:57:50<21:36:31] +[titan] 2025-10-05 01:32:10,285 - root - INFO - step: 4830 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3289 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:32:10,285 - root - INFO - lr: 4.8624e-05 gnorm: 1.25 [ 2:58:01<21:36:19] +[titan] 2025-10-05 01:32:21,158 - root - INFO - step: 4835 loss: 2.7916 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4584 +[titan] 2025-10-05 01:32:21,158 - root - INFO - lr: 4.8621e-05 gnorm: 1.23 [ 2:58:12<21:36:06] +[titan] 2025-10-05 01:32:32,019 - root - INFO - step: 4840 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3305 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:32:32,020 - root - INFO - lr: 4.8618e-05 gnorm: 1.25 [ 2:58:23<21:35:54] +[titan] 2025-10-05 01:32:42,890 - root - INFO - step: 4845 loss: 2.7622 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4341 +[titan] 2025-10-05 01:32:42,890 - root - INFO - lr: 4.8615e-05 gnorm: 1.24 [ 2:58:34<21:35:41] +[titan] 2025-10-05 01:32:51,571 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:32:53,752 - root - INFO - step: 4850 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3209 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 01:32:53,752 - root - INFO - lr: 4.8612e-05 gnorm: 1.26 [ 2:58:45<21:35:29] +[titan] 2025-10-05 01:33:04,624 - root - INFO - step: 4855 loss: 2.7888 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4588 +[titan] 2025-10-05 01:33:04,624 - root - INFO - lr: 4.8609e-05 gnorm: 1.30 [ 2:58:55<21:35:17] +[titan] 2025-10-05 01:33:15,520 - root - INFO - step: 4860 loss: 2.6936 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3751 +[titan] 2025-10-05 01:33:15,521 - root - INFO - lr: 4.8606e-05 gnorm: 1.24 [ 2:59:06<21:35:04] +[titan] 2025-10-05 01:33:26,393 - root - INFO - step: 4865 loss: 2.8919 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3439 global_avg_mtp_loss: 2.5480 +[titan] 2025-10-05 01:33:26,393 - root - INFO - lr: 4.8603e-05 gnorm: 1.25 [ 2:59:17<21:34:52] +[titan] 2025-10-05 01:33:37,259 - root - INFO - step: 4870 loss: 2.7240 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 01:33:37,259 - root - INFO - lr: 4.8599e-05 gnorm: 1.24 [ 2:59:28<21:34:40] +[titan] 2025-10-05 01:33:48,148 - root - INFO - step: 4875 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4417 +[titan] 2025-10-05 01:33:48,148 - root - INFO - lr: 4.8596e-05 gnorm: 1.26 [ 2:59:39<21:34:27] +[titan] 2025-10-05 01:33:59,034 - root - INFO - step: 4880 loss: 2.7227 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4000 +[titan] 2025-10-05 01:33:59,035 - root - INFO - lr: 4.8593e-05 gnorm: 1.27 [ 2:59:50<21:34:15] +[titan] 2025-10-05 01:34:09,948 - root - INFO - step: 4885 loss: 2.7234 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4009 +[titan] 2025-10-05 01:34:09,948 - root - INFO - lr: 4.8590e-05 gnorm: 1.20 [ 3:00:01<21:34:03] +[titan] 2025-10-05 01:34:20,817 - root - INFO - step: 4890 loss: 2.7314 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4083 +[titan] 2025-10-05 01:34:20,818 - root - INFO - lr: 4.8587e-05 gnorm: 1.33 [ 3:00:12<21:33:51] +[titan] 2025-10-05 01:34:31,730 - root - INFO - step: 4895 loss: 2.7077 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3210 global_avg_mtp_loss: 2.3867 +[titan] 2025-10-05 01:34:31,731 - root - INFO - lr: 4.8584e-05 gnorm: 1.29 [ 3:00:23<21:33:38] +[titan] 2025-10-05 01:34:40,425 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:34:42,619 - root - INFO - step: 4900 loss: 2.7734 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3292 global_avg_mtp_loss: 2.4443 +[titan] 2025-10-05 01:34:42,620 - root - INFO - lr: 4.8581e-05 gnorm: 1.28 [ 3:00:33<21:33:26] +[titan] 2025-10-05 01:34:53,494 - root - INFO - step: 4905 loss: 2.7406 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4146 +[titan] 2025-10-05 01:34:53,495 - root - INFO - lr: 4.8578e-05 gnorm: 1.17 [ 3:00:44<21:33:14] +[titan] 2025-10-05 01:35:04,450 - root - INFO - step: 4910 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3981 +[titan] 2025-10-05 01:35:04,451 - root - INFO - lr: 4.8575e-05 gnorm: 1.20 [ 3:00:55<21:33:02] +[titan] 2025-10-05 01:35:15,335 - root - INFO - step: 4915 loss: 2.7382 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4140 +[titan] 2025-10-05 01:35:15,335 - root - INFO - lr: 4.8571e-05 gnorm: 1.28 [ 3:01:06<21:32:50] +[titan] 2025-10-05 01:35:26,233 - root - INFO - step: 4920 loss: 2.7952 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3320 global_avg_mtp_loss: 2.4631 +[titan] 2025-10-05 01:35:26,233 - root - INFO - lr: 4.8568e-05 gnorm: 1.29 [ 3:01:17<21:32:38] +[titan] 2025-10-05 01:35:37,136 - root - INFO - step: 4925 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4403 +[titan] 2025-10-05 01:35:37,136 - root - INFO - lr: 4.8565e-05 gnorm: 1.25 [ 3:01:28<21:32:26] +[titan] 2025-10-05 01:35:48,013 - root - INFO - step: 4930 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4081 +[titan] 2025-10-05 01:35:48,014 - root - INFO - lr: 4.8562e-05 gnorm: 1.21 [ 3:01:39<21:32:13] +[titan] 2025-10-05 01:35:58,895 - root - INFO - step: 4935 loss: 2.7204 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:35:58,895 - root - INFO - lr: 4.8559e-05 gnorm: 1.20 [ 3:01:50<21:32:01] +[titan] 2025-10-05 01:36:09,806 - root - INFO - step: 4940 loss: 2.7788 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:36:09,807 - root - INFO - lr: 4.8556e-05 gnorm: 1.21 [ 3:02:01<21:31:49] +[titan] 2025-10-05 01:36:20,731 - root - INFO - step: 4945 loss: 2.7547 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3304 global_avg_mtp_loss: 2.4243 +[titan] 2025-10-05 01:36:20,732 - root - INFO - lr: 4.8553e-05 gnorm: 1.23 [ 3:02:12<21:31:37] +[titan] 2025-10-05 01:36:29,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:36:31,611 - root - INFO - step: 4950 loss: 2.6438 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3129 global_avg_mtp_loss: 2.3309 +[titan] 2025-10-05 01:36:31,611 - root - INFO - lr: 4.8549e-05 gnorm: 1.20 [ 3:02:22<21:31:25] +[titan] 2025-10-05 01:36:42,497 - root - INFO - step: 4955 loss: 2.7743 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:36:42,497 - root - INFO - lr: 4.8546e-05 gnorm: 1.29 [ 3:02:33<21:31:12] +[titan] 2025-10-05 01:36:53,369 - root - INFO - step: 4960 loss: 2.7846 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4551 +[titan] 2025-10-05 01:36:53,369 - root - INFO - lr: 4.8543e-05 gnorm: 1.25 [ 3:02:44<21:31:00] +[titan] 2025-10-05 01:37:04,267 - root - INFO - step: 4965 loss: 2.8172 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3344 global_avg_mtp_loss: 2.4828 +[titan] 2025-10-05 01:37:04,267 - root - INFO - lr: 4.8540e-05 gnorm: 1.25 [ 3:02:55<21:30:48] +[titan] 2025-10-05 01:37:15,212 - root - INFO - step: 4970 loss: 2.6436 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3310 +[titan] 2025-10-05 01:37:15,212 - root - INFO - lr: 4.8537e-05 gnorm: 1.25 [ 3:03:06<21:30:36] +[titan] 2025-10-05 01:37:26,159 - root - INFO - step: 4975 loss: 2.7551 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4289 +[titan] 2025-10-05 01:37:26,159 - root - INFO - lr: 4.8534e-05 gnorm: 1.22 [ 3:03:17<21:30:24] +[titan] 2025-10-05 01:37:37,030 - root - INFO - step: 4980 loss: 2.7052 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3218 global_avg_mtp_loss: 2.3834 +[titan] 2025-10-05 01:37:37,031 - root - INFO - lr: 4.8530e-05 gnorm: 1.26 [ 3:03:28<21:30:12] +[titan] 2025-10-05 01:37:47,943 - root - INFO - step: 4985 loss: 2.7357 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4094 +[titan] 2025-10-05 01:37:47,944 - root - INFO - lr: 4.8527e-05 gnorm: 1.27 [ 3:03:39<21:30:00] +[titan] 2025-10-05 01:37:58,856 - root - INFO - step: 4990 loss: 2.7950 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4628 +[titan] 2025-10-05 01:37:58,857 - root - INFO - lr: 4.8524e-05 gnorm: 1.22 [ 3:03:50<21:29:48] +[titan] 2025-10-05 01:38:09,823 - root - INFO - step: 4995 loss: 2.7375 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.3261 global_avg_mtp_loss: 2.4114 +[titan] 2025-10-05 01:38:09,823 - root - INFO - lr: 4.8521e-05 gnorm: 1.18 [ 3:04:01<21:29:36] +[titan] 2025-10-05 01:38:18,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:38:20,753 - root - INFO - step: 5000 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3200 global_avg_mtp_loss: 2.3792 +[titan] 2025-10-05 01:38:20,753 - root - INFO - lr: 4.8518e-05 gnorm: 1.26 [ 3:04:12<21:29:24] +[titan] 2025-10-05 01:38:20,753 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 01:38:42,144 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 01:38:42,144 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 21.39 seconds. +[titan] 2025-10-05 01:40:51,998 - root - INFO - step: 5005 loss: 2.7858 memory: 118.84GiB(85.28%) tps: 2,167 tflops: 30.06 mfu: 3.04% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:40:51,998 - root - INFO - lr: 4.8515e-05 gnorm: 1.27 [ 3:06:43<21:45:33] +[titan] 2025-10-05 01:41:02,796 - root - INFO - step: 5010 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3270 global_avg_mtp_loss: 2.4291 +[titan] 2025-10-05 01:41:02,796 - root - INFO - lr: 4.8511e-05 gnorm: 1.34 [ 3:06:54<21:45:20] +[titan] 2025-10-05 01:41:13,614 - root - INFO - step: 5015 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.3283 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:41:13,614 - root - INFO - lr: 4.8508e-05 gnorm: 1.32 [ 3:07:04<21:45:06] +[titan] 2025-10-05 01:41:24,485 - root - INFO - step: 5020 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4305 +[titan] 2025-10-05 01:41:24,485 - root - INFO - lr: 4.8505e-05 gnorm: 1.31 [ 3:07:15<21:44:52] +[titan] 2025-10-05 01:41:35,321 - root - INFO - step: 5025 loss: 2.7060 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3836 +[titan] 2025-10-05 01:41:35,321 - root - INFO - lr: 4.8502e-05 gnorm: 1.27 [ 3:07:26<21:44:39] +[titan] 2025-10-05 01:41:46,205 - root - INFO - step: 5030 loss: 2.7304 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3247 global_avg_mtp_loss: 2.4057 +[titan] 2025-10-05 01:41:46,205 - root - INFO - lr: 4.8499e-05 gnorm: 1.28 [ 3:07:37<21:44:25] +[titan] 2025-10-05 01:41:57,092 - root - INFO - step: 5035 loss: 2.7485 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4237 +[titan] 2025-10-05 01:41:57,093 - root - INFO - lr: 4.8495e-05 gnorm: 1.26 [ 3:07:48<21:44:12] +[titan] 2025-10-05 01:42:08,008 - root - INFO - step: 5040 loss: 2.7641 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4360 +[titan] 2025-10-05 01:42:08,008 - root - INFO - lr: 4.8492e-05 gnorm: 1.18 [ 3:07:59<21:43:59] +[titan] 2025-10-05 01:42:18,888 - root - INFO - step: 5045 loss: 2.6254 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3136 +[titan] 2025-10-05 01:42:18,888 - root - INFO - lr: 4.8489e-05 gnorm: 1.29 [ 3:08:10<21:43:46] +[titan] 2025-10-05 01:42:27,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:42:29,897 - root - INFO - step: 5050 loss: 2.7825 memory: 118.84GiB(85.28%) tps: 29,766 tflops: 412.96 mfu: 41.75% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4531 +[titan] 2025-10-05 01:42:29,897 - root - INFO - lr: 4.8486e-05 gnorm: 1.24 [ 3:08:21<21:43:33] +[titan] 2025-10-05 01:42:40,766 - root - INFO - step: 5055 loss: 2.7808 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4505 +[titan] 2025-10-05 01:42:40,766 - root - INFO - lr: 4.8483e-05 gnorm: 1.22 [ 3:08:32<21:43:20] +[titan] 2025-10-05 01:42:51,649 - root - INFO - step: 5060 loss: 2.6497 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3358 +[titan] 2025-10-05 01:42:51,649 - root - INFO - lr: 4.8479e-05 gnorm: 1.25 [ 3:08:43<21:43:06] +[titan] 2025-10-05 01:43:02,533 - root - INFO - step: 5065 loss: 2.7482 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 01:43:02,533 - root - INFO - lr: 4.8476e-05 gnorm: 1.21 [ 3:08:53<21:42:53] +[titan] 2025-10-05 01:43:13,418 - root - INFO - step: 5070 loss: 2.8515 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3494 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:43:13,418 - root - INFO - lr: 4.8473e-05 gnorm: 1.24 [ 3:09:04<21:42:40] +[titan] 2025-10-05 01:43:24,295 - root - INFO - step: 5075 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3955 +[titan] 2025-10-05 01:43:24,295 - root - INFO - lr: 4.8470e-05 gnorm: 1.23 [ 3:09:15<21:42:26] +[titan] 2025-10-05 01:43:35,165 - root - INFO - step: 5080 loss: 2.6731 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3557 +[titan] 2025-10-05 01:43:35,166 - root - INFO - lr: 4.8466e-05 gnorm: 1.24 [ 3:09:26<21:42:13] +[titan] 2025-10-05 01:43:46,043 - root - INFO - step: 5085 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 01:43:46,043 - root - INFO - lr: 4.8463e-05 gnorm: 1.24 [ 3:09:37<21:42:00] +[titan] 2025-10-05 01:43:56,916 - root - INFO - step: 5090 loss: 2.7316 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4079 +[titan] 2025-10-05 01:43:56,917 - root - INFO - lr: 4.8460e-05 gnorm: 1.35 [ 3:09:48<21:41:46] +[titan] 2025-10-05 01:44:07,778 - root - INFO - step: 5095 loss: 2.7611 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4327 +[titan] 2025-10-05 01:44:07,778 - root - INFO - lr: 4.8457e-05 gnorm: 1.27 [ 3:09:59<21:41:33] +[titan] 2025-10-05 01:44:16,486 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:44:18,671 - root - INFO - step: 5100 loss: 2.6824 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3640 +[titan] 2025-10-05 01:44:18,671 - root - INFO - lr: 4.8453e-05 gnorm: 1.28 [ 3:10:10<21:41:20] +[titan] 2025-10-05 01:44:29,534 - root - INFO - step: 5105 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.4782 +[titan] 2025-10-05 01:44:29,534 - root - INFO - lr: 4.8450e-05 gnorm: 1.26 [ 3:10:20<21:41:06] +[titan] 2025-10-05 01:44:40,413 - root - INFO - step: 5110 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.3923 +[titan] 2025-10-05 01:44:40,413 - root - INFO - lr: 4.8447e-05 gnorm: 1.23 [ 3:10:31<21:40:53] +[titan] 2025-10-05 01:44:51,299 - root - INFO - step: 5115 loss: 2.6959 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3758 +[titan] 2025-10-05 01:44:51,300 - root - INFO - lr: 4.8444e-05 gnorm: 1.26 [ 3:10:42<21:40:40] +[titan] 2025-10-05 01:45:02,275 - root - INFO - step: 5120 loss: 2.7516 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.21 mfu: 41.88% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4257 +[titan] 2025-10-05 01:45:02,275 - root - INFO - lr: 4.8440e-05 gnorm: 1.21 [ 3:10:53<21:40:27] +[titan] 2025-10-05 01:45:02,452 - root - INFO - Dumping profiler traces at step 5120 +[titan] 2025-10-05 01:45:02,492 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:45:13,379 - root - INFO - step: 5125 loss: 2.7714 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4427 +[titan] 2025-10-05 01:45:13,379 - root - INFO - lr: 4.8437e-05 gnorm: 1.24 [ 3:11:04<21:40:16] +[titan] 2025-10-05 01:45:24,262 - root - INFO - step: 5130 loss: 2.6786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3612 +[titan] 2025-10-05 01:45:24,263 - root - INFO - lr: 4.8434e-05 gnorm: 1.22 [ 3:11:15<21:40:02] +[titan] 2025-10-05 01:45:35,196 - root - INFO - step: 5135 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4702 +[titan] 2025-10-05 01:45:35,196 - root - INFO - lr: 4.8431e-05 gnorm: 1.27 [ 3:11:26<21:39:49] +[titan] 2025-10-05 01:45:46,094 - root - INFO - step: 5140 loss: 2.7216 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3233 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:45:46,094 - root - INFO - lr: 4.8427e-05 gnorm: 1.26 [ 3:11:37<21:39:36] +[titan] 2025-10-05 01:45:56,991 - root - INFO - step: 5145 loss: 2.7084 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3869 +[titan] 2025-10-05 01:45:56,991 - root - INFO - lr: 4.8424e-05 gnorm: 1.23 [ 3:11:48<21:39:23] +[titan] 2025-10-05 01:46:05,684 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:46:07,870 - root - INFO - step: 5150 loss: 2.7550 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4284 +[titan] 2025-10-05 01:46:07,870 - root - INFO - lr: 4.8421e-05 gnorm: 1.28 [ 3:11:59<21:39:10] +[titan] 2025-10-05 01:46:18,768 - root - INFO - step: 5155 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3142 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 01:46:18,768 - root - INFO - lr: 4.8417e-05 gnorm: 1.20 [ 3:12:10<21:38:57] +[titan] 2025-10-05 01:46:29,716 - root - INFO - step: 5160 loss: 2.7141 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3918 +[titan] 2025-10-05 01:46:29,716 - root - INFO - lr: 4.8414e-05 gnorm: 1.22 [ 3:12:21<21:38:44] +[titan] 2025-10-05 01:46:40,611 - root - INFO - step: 5165 loss: 2.7431 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3245 global_avg_mtp_loss: 2.4185 +[titan] 2025-10-05 01:46:40,611 - root - INFO - lr: 4.8411e-05 gnorm: 1.18 [ 3:12:31<21:38:31] +[titan] 2025-10-05 01:46:51,503 - root - INFO - step: 5170 loss: 2.6610 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 01:46:51,503 - root - INFO - lr: 4.8408e-05 gnorm: 1.21 [ 3:12:42<21:38:18] +[titan] 2025-10-05 01:47:02,418 - root - INFO - step: 5175 loss: 2.7319 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4077 +[titan] 2025-10-05 01:47:02,418 - root - INFO - lr: 4.8404e-05 gnorm: 1.21 [ 3:12:53<21:38:05] +[titan] 2025-10-05 01:47:13,333 - root - INFO - step: 5180 loss: 2.7303 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:47:13,333 - root - INFO - lr: 4.8401e-05 gnorm: 1.24 [ 3:13:04<21:37:52] +[titan] 2025-10-05 01:47:24,247 - root - INFO - step: 5185 loss: 2.6746 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 01:47:24,247 - root - INFO - lr: 4.8398e-05 gnorm: 1.22 [ 3:13:15<21:37:39] +[titan] 2025-10-05 01:47:35,216 - root - INFO - step: 5190 loss: 2.7738 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4456 +[titan] 2025-10-05 01:47:35,217 - root - INFO - lr: 4.8394e-05 gnorm: 1.31 [ 3:13:26<21:37:26] +[titan] 2025-10-05 01:47:46,124 - root - INFO - step: 5195 loss: 2.8394 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3384 global_avg_mtp_loss: 2.5009 +[titan] 2025-10-05 01:47:46,124 - root - INFO - lr: 4.8391e-05 gnorm: 1.27 [ 3:13:37<21:37:13] +[titan] 2025-10-05 01:47:54,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:47:57,027 - root - INFO - step: 5200 loss: 2.7263 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4026 +[titan] 2025-10-05 01:47:57,027 - root - INFO - lr: 4.8388e-05 gnorm: 1.24 [ 3:13:48<21:37:00] +[titan] 2025-10-05 01:48:07,915 - root - INFO - step: 5205 loss: 2.7277 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.4038 +[titan] 2025-10-05 01:48:07,915 - root - INFO - lr: 4.8384e-05 gnorm: 1.21 [ 3:13:59<21:36:47] +[titan] 2025-10-05 01:48:18,830 - root - INFO - step: 5210 loss: 2.6835 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3172 global_avg_mtp_loss: 2.3663 +[titan] 2025-10-05 01:48:18,830 - root - INFO - lr: 4.8381e-05 gnorm: 1.22 [ 3:14:10<21:36:34] +[titan] 2025-10-05 01:48:29,733 - root - INFO - step: 5215 loss: 2.6886 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3701 +[titan] 2025-10-05 01:48:29,733 - root - INFO - lr: 4.8378e-05 gnorm: 1.23 [ 3:14:21<21:36:21] +[titan] 2025-10-05 01:48:40,645 - root - INFO - step: 5220 loss: 2.7098 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 01:48:40,645 - root - INFO - lr: 4.8374e-05 gnorm: 1.25 [ 3:14:31<21:36:08] +[titan] 2025-10-05 01:48:51,536 - root - INFO - step: 5225 loss: 2.8169 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4830 +[titan] 2025-10-05 01:48:51,536 - root - INFO - lr: 4.8371e-05 gnorm: 1.24 [ 3:14:42<21:35:55] +[titan] 2025-10-05 01:49:02,433 - root - INFO - step: 5230 loss: 2.7455 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4197 +[titan] 2025-10-05 01:49:02,433 - root - INFO - lr: 4.8368e-05 gnorm: 1.26 [ 3:14:53<21:35:42] +[titan] 2025-10-05 01:49:13,324 - root - INFO - step: 5235 loss: 2.7873 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.3324 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:49:13,325 - root - INFO - lr: 4.8364e-05 gnorm: 1.21 [ 3:15:04<21:35:29] +[titan] 2025-10-05 01:49:24,205 - root - INFO - step: 5240 loss: 2.6851 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3675 +[titan] 2025-10-05 01:49:24,206 - root - INFO - lr: 4.8361e-05 gnorm: 1.22 [ 3:15:15<21:35:16] +[titan] 2025-10-05 01:49:35,124 - root - INFO - step: 5245 loss: 2.7664 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:49:35,124 - root - INFO - lr: 4.8358e-05 gnorm: 1.24 [ 3:15:26<21:35:03] +[titan] 2025-10-05 01:49:43,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:49:45,992 - root - INFO - step: 5250 loss: 2.7297 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4047 +[titan] 2025-10-05 01:49:45,992 - root - INFO - lr: 4.8354e-05 gnorm: 1.29 [ 3:15:37<21:34:49] +[titan] 2025-10-05 01:49:56,896 - root - INFO - step: 5255 loss: 2.7151 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3928 +[titan] 2025-10-05 01:49:56,896 - root - INFO - lr: 4.8351e-05 gnorm: 1.29 [ 3:15:48<21:34:36] +[titan] 2025-10-05 01:50:07,763 - root - INFO - step: 5260 loss: 2.7886 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3308 global_avg_mtp_loss: 2.4578 +[titan] 2025-10-05 01:50:07,763 - root - INFO - lr: 4.8348e-05 gnorm: 1.36 [ 3:15:59<21:34:23] +[titan] 2025-10-05 01:50:18,645 - root - INFO - step: 5265 loss: 2.6117 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3012 +[titan] 2025-10-05 01:50:18,645 - root - INFO - lr: 4.8344e-05 gnorm: 1.24 [ 3:16:09<21:34:10] +[titan] 2025-10-05 01:50:29,515 - root - INFO - step: 5270 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4451 +[titan] 2025-10-05 01:50:29,516 - root - INFO - lr: 4.8341e-05 gnorm: 1.24 [ 3:16:20<21:33:57] +[titan] 2025-10-05 01:50:40,456 - root - INFO - step: 5275 loss: 2.7065 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.01% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3874 +[titan] 2025-10-05 01:50:40,457 - root - INFO - lr: 4.8338e-05 gnorm: 1.25 [ 3:16:31<21:33:44] +[titan] 2025-10-05 01:50:51,334 - root - INFO - step: 5280 loss: 2.7674 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4384 +[titan] 2025-10-05 01:50:51,334 - root - INFO - lr: 4.8334e-05 gnorm: 1.25 [ 3:16:42<21:33:31] +[titan] 2025-10-05 01:51:02,214 - root - INFO - step: 5285 loss: 2.6660 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3493 +[titan] 2025-10-05 01:51:02,214 - root - INFO - lr: 4.8331e-05 gnorm: 1.20 [ 3:16:53<21:33:18] +[titan] 2025-10-05 01:51:13,075 - root - INFO - step: 5290 loss: 2.7457 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4189 +[titan] 2025-10-05 01:51:13,075 - root - INFO - lr: 4.8327e-05 gnorm: 1.25 [ 3:17:04<21:33:05] +[titan] 2025-10-05 01:51:23,938 - root - INFO - step: 5295 loss: 2.7299 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:51:23,938 - root - INFO - lr: 4.8324e-05 gnorm: 1.18 [ 3:17:15<21:32:51] +[titan] 2025-10-05 01:51:32,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:51:34,836 - root - INFO - step: 5300 loss: 2.7577 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4310 +[titan] 2025-10-05 01:51:34,836 - root - INFO - lr: 4.8321e-05 gnorm: 1.27 [ 3:17:26<21:32:38] +[titan] 2025-10-05 01:51:45,732 - root - INFO - step: 5305 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.4411 +[titan] 2025-10-05 01:51:45,732 - root - INFO - lr: 4.8317e-05 gnorm: 1.28 [ 3:17:37<21:32:25] +[titan] 2025-10-05 01:51:56,598 - root - INFO - step: 5310 loss: 2.6649 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3486 +[titan] 2025-10-05 01:51:56,598 - root - INFO - lr: 4.8314e-05 gnorm: 1.25 [ 3:17:47<21:32:12] +[titan] 2025-10-05 01:52:07,463 - root - INFO - step: 5315 loss: 2.6130 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3093 global_avg_mtp_loss: 2.3037 +[titan] 2025-10-05 01:52:07,463 - root - INFO - lr: 4.8311e-05 gnorm: 1.23 [ 3:17:58<21:31:59] +[titan] 2025-10-05 01:52:18,354 - root - INFO - step: 5320 loss: 2.7768 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4481 +[titan] 2025-10-05 01:52:18,354 - root - INFO - lr: 4.8307e-05 gnorm: 1.31 [ 3:18:09<21:31:46] +[titan] 2025-10-05 01:52:29,236 - root - INFO - step: 5325 loss: 2.7143 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3924 +[titan] 2025-10-05 01:52:29,236 - root - INFO - lr: 4.8304e-05 gnorm: 1.21 [ 3:18:20<21:31:33] +[titan] 2025-10-05 01:52:40,146 - root - INFO - step: 5330 loss: 2.7556 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4290 +[titan] 2025-10-05 01:52:40,146 - root - INFO - lr: 4.8300e-05 gnorm: 1.27 [ 3:18:31<21:31:20] +[titan] 2025-10-05 01:52:51,044 - root - INFO - step: 5335 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4168 +[titan] 2025-10-05 01:52:51,044 - root - INFO - lr: 4.8297e-05 gnorm: 1.26 [ 3:18:42<21:31:07] +[titan] 2025-10-05 01:53:01,911 - root - INFO - step: 5340 loss: 2.7097 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3206 global_avg_mtp_loss: 2.3891 +[titan] 2025-10-05 01:53:01,911 - root - INFO - lr: 4.8294e-05 gnorm: 1.30 [ 3:18:53<21:30:54] +[titan] 2025-10-05 01:53:12,786 - root - INFO - step: 5345 loss: 2.6651 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3501 +[titan] 2025-10-05 01:53:12,787 - root - INFO - lr: 4.8290e-05 gnorm: 1.21 [ 3:19:04<21:30:41] +[titan] 2025-10-05 01:53:21,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:53:23,680 - root - INFO - step: 5350 loss: 2.7279 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3226 global_avg_mtp_loss: 2.4053 +[titan] 2025-10-05 01:53:23,680 - root - INFO - lr: 4.8287e-05 gnorm: 1.24 [ 3:19:15<21:30:28] +[titan] 2025-10-05 01:53:34,600 - root - INFO - step: 5355 loss: 2.6227 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 01:53:34,600 - root - INFO - lr: 4.8283e-05 gnorm: 1.28 [ 3:19:25<21:30:15] +[titan] 2025-10-05 01:53:45,495 - root - INFO - step: 5360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4548 +[titan] 2025-10-05 01:53:45,495 - root - INFO - lr: 4.8280e-05 gnorm: 1.23 [ 3:19:36<21:30:02] +[titan] 2025-10-05 01:53:56,371 - root - INFO - step: 5365 loss: 2.7914 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4555 +[titan] 2025-10-05 01:53:56,372 - root - INFO - lr: 4.8276e-05 gnorm: 1.23 [ 3:19:47<21:29:49] +[titan] 2025-10-05 01:54:07,246 - root - INFO - step: 5370 loss: 2.6816 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3627 +[titan] 2025-10-05 01:54:07,246 - root - INFO - lr: 4.8273e-05 gnorm: 1.23 [ 3:19:58<21:29:36] +[titan] 2025-10-05 01:54:18,130 - root - INFO - step: 5375 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3272 +[titan] 2025-10-05 01:54:18,130 - root - INFO - lr: 4.8270e-05 gnorm: 1.27 [ 3:20:09<21:29:23] +[titan] 2025-10-05 01:54:28,973 - root - INFO - step: 5380 loss: 2.7116 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3899 +[titan] 2025-10-05 01:54:28,973 - root - INFO - lr: 4.8266e-05 gnorm: 1.23 [ 3:20:20<21:29:09] +[titan] 2025-10-05 01:54:39,864 - root - INFO - step: 5385 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3764 +[titan] 2025-10-05 01:54:39,864 - root - INFO - lr: 4.8263e-05 gnorm: 1.24 [ 3:20:31<21:28:56] +[titan] 2025-10-05 01:54:50,734 - root - INFO - step: 5390 loss: 2.7644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4366 +[titan] 2025-10-05 01:54:50,735 - root - INFO - lr: 4.8259e-05 gnorm: 1.25 [ 3:20:42<21:28:43] +[titan] 2025-10-05 01:55:01,593 - root - INFO - step: 5395 loss: 2.7603 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3272 global_avg_mtp_loss: 2.4331 +[titan] 2025-10-05 01:55:01,593 - root - INFO - lr: 4.8256e-05 gnorm: 1.21 [ 3:20:52<21:28:30] +[titan] 2025-10-05 01:55:10,273 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:55:12,472 - root - INFO - step: 5400 loss: 2.7045 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3833 +[titan] 2025-10-05 01:55:12,472 - root - INFO - lr: 4.8252e-05 gnorm: 1.20 [ 3:21:03<21:28:17] +[titan] 2025-10-05 01:55:23,346 - root - INFO - step: 5405 loss: 2.7062 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3213 global_avg_mtp_loss: 2.3849 +[titan] 2025-10-05 01:55:23,346 - root - INFO - lr: 4.8249e-05 gnorm: 1.20 [ 3:21:14<21:28:04] +[titan] 2025-10-05 01:55:34,207 - root - INFO - step: 5410 loss: 2.7345 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4118 +[titan] 2025-10-05 01:55:34,207 - root - INFO - lr: 4.8245e-05 gnorm: 1.26 [ 3:21:25<21:27:51] +[titan] 2025-10-05 01:55:45,114 - root - INFO - step: 5415 loss: 2.6787 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3618 +[titan] 2025-10-05 01:55:45,114 - root - INFO - lr: 4.8242e-05 gnorm: 1.16 [ 3:21:36<21:27:38] +[titan] 2025-10-05 01:55:55,985 - root - INFO - step: 5420 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.2994 +[titan] 2025-10-05 01:55:55,985 - root - INFO - lr: 4.8239e-05 gnorm: 1.23 [ 3:21:47<21:27:25] +[titan] 2025-10-05 01:56:06,858 - root - INFO - step: 5425 loss: 2.6262 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 01:56:06,858 - root - INFO - lr: 4.8235e-05 gnorm: 1.20 [ 3:21:58<21:27:12] +[titan] 2025-10-05 01:56:17,752 - root - INFO - step: 5430 loss: 2.6880 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3706 +[titan] 2025-10-05 01:56:17,752 - root - INFO - lr: 4.8232e-05 gnorm: 1.21 [ 3:22:09<21:26:59] +[titan] 2025-10-05 01:56:28,647 - root - INFO - step: 5435 loss: 2.6104 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 01:56:28,647 - root - INFO - lr: 4.8228e-05 gnorm: 1.24 [ 3:22:19<21:26:46] +[titan] 2025-10-05 01:56:39,549 - root - INFO - step: 5440 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3753 +[titan] 2025-10-05 01:56:39,549 - root - INFO - lr: 4.8225e-05 gnorm: 1.24 [ 3:22:30<21:26:33] +[titan] 2025-10-05 01:56:50,425 - root - INFO - step: 5445 loss: 2.7005 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 01:56:50,426 - root - INFO - lr: 4.8221e-05 gnorm: 1.24 [ 3:22:41<21:26:20] +[titan] 2025-10-05 01:56:59,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:57:01,307 - root - INFO - step: 5450 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3211 global_avg_mtp_loss: 2.3942 +[titan] 2025-10-05 01:57:01,307 - root - INFO - lr: 4.8218e-05 gnorm: 1.22 [ 3:22:52<21:26:07] +[titan] 2025-10-05 01:57:12,168 - root - INFO - step: 5455 loss: 2.7238 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4003 +[titan] 2025-10-05 01:57:12,168 - root - INFO - lr: 4.8214e-05 gnorm: 1.25 [ 3:23:03<21:25:54] +[titan] 2025-10-05 01:57:23,004 - root - INFO - step: 5460 loss: 2.7013 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 01:57:23,004 - root - INFO - lr: 4.8211e-05 gnorm: 1.21 [ 3:23:14<21:25:41] +[titan] 2025-10-05 01:57:33,870 - root - INFO - step: 5465 loss: 2.7566 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:57:33,870 - root - INFO - lr: 4.8207e-05 gnorm: 1.28 [ 3:23:25<21:25:28] +[titan] 2025-10-05 01:57:44,735 - root - INFO - step: 5470 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 01:57:44,736 - root - INFO - lr: 4.8204e-05 gnorm: 3.95 [ 3:23:36<21:25:15] +[titan] 2025-10-05 01:57:55,597 - root - INFO - step: 5475 loss: 2.7332 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4064 +[titan] 2025-10-05 01:57:55,597 - root - INFO - lr: 4.8200e-05 gnorm: 5.60 [ 3:23:46<21:25:02] +[titan] 2025-10-05 01:58:06,457 - root - INFO - step: 5480 loss: 2.6333 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3196 +[titan] 2025-10-05 01:58:06,457 - root - INFO - lr: 4.8197e-05 gnorm: 1.42 [ 3:23:57<21:24:49] +[titan] 2025-10-05 01:58:17,326 - root - INFO - step: 5485 loss: 2.6808 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3195 global_avg_mtp_loss: 2.3613 +[titan] 2025-10-05 01:58:17,327 - root - INFO - lr: 4.8193e-05 gnorm: 1.64 [ 3:24:08<21:24:35] +[titan] 2025-10-05 01:58:28,172 - root - INFO - step: 5490 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3789 +[titan] 2025-10-05 01:58:28,172 - root - INFO - lr: 4.8190e-05 gnorm: 1.44 [ 3:24:19<21:24:22] +[titan] 2025-10-05 01:58:39,061 - root - INFO - step: 5495 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:58:39,061 - root - INFO - lr: 4.8186e-05 gnorm: 1.37 [ 3:24:30<21:24:09] +[titan] 2025-10-05 01:58:47,779 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:58:49,967 - root - INFO - step: 5500 loss: 2.7427 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4165 +[titan] 2025-10-05 01:58:49,967 - root - INFO - lr: 4.8183e-05 gnorm: 1.30 [ 3:24:41<21:23:57] +[titan] 2025-10-05 01:59:00,823 - root - INFO - step: 5505 loss: 2.7373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4120 +[titan] 2025-10-05 01:59:00,823 - root - INFO - lr: 4.8179e-05 gnorm: 1.29 [ 3:24:52<21:23:44] +[titan] 2025-10-05 01:59:11,693 - root - INFO - step: 5510 loss: 2.6666 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3511 +[titan] 2025-10-05 01:59:11,693 - root - INFO - lr: 4.8176e-05 gnorm: 1.30 [ 3:25:03<21:23:30] +[titan] 2025-10-05 01:59:22,587 - root - INFO - step: 5515 loss: 2.7189 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.3938 +[titan] 2025-10-05 01:59:22,587 - root - INFO - lr: 4.8172e-05 gnorm: 6.71 [ 3:25:13<21:23:18] +[titan] 2025-10-05 01:59:33,471 - root - INFO - step: 5520 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3125 global_avg_mtp_loss: 2.3281 +[titan] 2025-10-05 01:59:33,471 - root - INFO - lr: 4.8169e-05 gnorm: 1.27 [ 3:25:24<21:23:05] +[titan] 2025-10-05 01:59:44,386 - root - INFO - step: 5525 loss: 2.6236 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3140 +[titan] 2025-10-05 01:59:44,386 - root - INFO - lr: 4.8165e-05 gnorm: 1.21 [ 3:25:35<21:22:52] +[titan] 2025-10-05 01:59:55,268 - root - INFO - step: 5530 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 01:59:55,268 - root - INFO - lr: 4.8162e-05 gnorm: 1.23 [ 3:25:46<21:22:39] +[titan] 2025-10-05 02:00:06,139 - root - INFO - step: 5535 loss: 2.6010 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3104 global_avg_mtp_loss: 2.2906 +[titan] 2025-10-05 02:00:06,139 - root - INFO - lr: 4.8158e-05 gnorm: 1.28 [ 3:25:57<21:22:26] +[titan] 2025-10-05 02:00:17,012 - root - INFO - step: 5540 loss: 2.6903 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 02:00:17,013 - root - INFO - lr: 4.8155e-05 gnorm: 1.28 [ 3:26:08<21:22:13] +[titan] 2025-10-05 02:00:27,882 - root - INFO - step: 5545 loss: 2.6624 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3482 +[titan] 2025-10-05 02:00:27,882 - root - INFO - lr: 4.8151e-05 gnorm: 1.25 [ 3:26:19<21:22:00] +[titan] 2025-10-05 02:00:36,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:00:38,754 - root - INFO - step: 5550 loss: 2.6437 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3307 +[titan] 2025-10-05 02:00:38,754 - root - INFO - lr: 4.8147e-05 gnorm: 1.23 [ 3:26:30<21:21:47] +[titan] 2025-10-05 02:00:49,688 - root - INFO - step: 5555 loss: 2.6840 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:00:49,689 - root - INFO - lr: 4.8144e-05 gnorm: 1.21 [ 3:26:40<21:21:35] +[titan] 2025-10-05 02:01:00,569 - root - INFO - step: 5560 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3569 +[titan] 2025-10-05 02:01:00,569 - root - INFO - lr: 4.8140e-05 gnorm: 1.21 [ 3:26:51<21:21:22] +[titan] 2025-10-05 02:01:11,488 - root - INFO - step: 5565 loss: 2.6609 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 02:01:11,488 - root - INFO - lr: 4.8137e-05 gnorm: 1.24 [ 3:27:02<21:21:09] +[titan] 2025-10-05 02:01:22,384 - root - INFO - step: 5570 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.3978 +[titan] 2025-10-05 02:01:22,384 - root - INFO - lr: 4.8133e-05 gnorm: 1.26 [ 3:27:13<21:20:56] +[titan] 2025-10-05 02:01:33,286 - root - INFO - step: 5575 loss: 2.6770 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3597 +[titan] 2025-10-05 02:01:33,287 - root - INFO - lr: 4.8130e-05 gnorm: 1.23 [ 3:27:24<21:20:43] +[titan] 2025-10-05 02:01:44,187 - root - INFO - step: 5580 loss: 2.6684 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.3160 global_avg_mtp_loss: 2.3524 +[titan] 2025-10-05 02:01:44,187 - root - INFO - lr: 4.8126e-05 gnorm: 1.22 [ 3:27:35<21:20:31] +[titan] 2025-10-05 02:01:55,071 - root - INFO - step: 5585 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3821 +[titan] 2025-10-05 02:01:55,071 - root - INFO - lr: 4.8123e-05 gnorm: 1.23 [ 3:27:46<21:20:18] +[titan] 2025-10-05 02:02:05,953 - root - INFO - step: 5590 loss: 2.7020 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 02:02:05,954 - root - INFO - lr: 4.8119e-05 gnorm: 1.29 [ 3:27:57<21:20:05] +[titan] 2025-10-05 02:02:16,866 - root - INFO - step: 5595 loss: 2.6621 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3458 +[titan] 2025-10-05 02:02:16,866 - root - INFO - lr: 4.8115e-05 gnorm: 1.25 [ 3:28:08<21:19:52] +[titan] 2025-10-05 02:02:25,553 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:02:27,738 - root - INFO - step: 5600 loss: 2.7026 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3818 +[titan] 2025-10-05 02:02:27,739 - root - INFO - lr: 4.8112e-05 gnorm: 1.26 [ 3:28:19<21:19:39] +[titan] 2025-10-05 02:02:38,604 - root - INFO - step: 5605 loss: 2.6192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:02:38,604 - root - INFO - lr: 4.8108e-05 gnorm: 1.25 [ 3:28:29<21:19:26] +[titan] 2025-10-05 02:02:49,527 - root - INFO - step: 5610 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3444 +[titan] 2025-10-05 02:02:49,527 - root - INFO - lr: 4.8105e-05 gnorm: 1.32 [ 3:28:40<21:19:14] +[titan] 2025-10-05 02:03:00,407 - root - INFO - step: 5615 loss: 2.6727 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3552 +[titan] 2025-10-05 02:03:00,407 - root - INFO - lr: 4.8101e-05 gnorm: 1.19 [ 3:28:51<21:19:01] +[titan] 2025-10-05 02:03:11,293 - root - INFO - step: 5620 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3098 +[titan] 2025-10-05 02:03:11,293 - root - INFO - lr: 4.8097e-05 gnorm: 1.24 [ 3:29:02<21:18:48] +[titan] 2025-10-05 02:03:22,216 - root - INFO - step: 5625 loss: 2.6235 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 02:03:22,216 - root - INFO - lr: 4.8094e-05 gnorm: 1.21 [ 3:29:13<21:18:35] +[titan] 2025-10-05 02:03:33,165 - root - INFO - step: 5630 loss: 2.7089 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3881 +[titan] 2025-10-05 02:03:33,165 - root - INFO - lr: 4.8090e-05 gnorm: 1.27 [ 3:29:24<21:18:23] +[titan] 2025-10-05 02:03:37,684 - root - INFO - Dumping profiler traces at step 5632 +[titan] 2025-10-05 02:03:37,721 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:03:44,274 - root - INFO - step: 5635 loss: 2.6796 memory: 118.84GiB(85.28%) tps: 29,497 tflops: 409.23 mfu: 41.38% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3615 +[titan] 2025-10-05 02:03:44,274 - root - INFO - lr: 4.8087e-05 gnorm: 1.25 [ 3:29:35<21:18:12] +[titan] 2025-10-05 02:03:55,158 - root - INFO - step: 5640 loss: 2.6061 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3075 global_avg_mtp_loss: 2.2987 +[titan] 2025-10-05 02:03:55,158 - root - INFO - lr: 4.8083e-05 gnorm: 1.25 [ 3:29:46<21:17:59] +[titan] 2025-10-05 02:04:06,053 - root - INFO - step: 5645 loss: 2.7125 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3908 +[titan] 2025-10-05 02:04:06,053 - root - INFO - lr: 4.8079e-05 gnorm: 1.34 [ 3:29:57<21:17:46] +[titan] 2025-10-05 02:04:14,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:04:16,937 - root - INFO - step: 5650 loss: 2.5977 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:04:16,937 - root - INFO - lr: 4.8076e-05 gnorm: 1.27 [ 3:30:08<21:17:33] +[titan] 2025-10-05 02:04:27,853 - root - INFO - step: 5655 loss: 2.6416 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3299 +[titan] 2025-10-05 02:04:27,854 - root - INFO - lr: 4.8072e-05 gnorm: 1.30 [ 3:30:19<21:17:20] +[titan] 2025-10-05 02:04:38,772 - root - INFO - step: 5660 loss: 2.7230 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 02:04:38,772 - root - INFO - lr: 4.8069e-05 gnorm: 1.24 [ 3:30:30<21:17:08] +[titan] 2025-10-05 02:04:49,685 - root - INFO - step: 5665 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3829 +[titan] 2025-10-05 02:04:49,685 - root - INFO - lr: 4.8065e-05 gnorm: 1.26 [ 3:30:40<21:16:55] +[titan] 2025-10-05 02:05:00,577 - root - INFO - step: 5670 loss: 2.6274 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3178 +[titan] 2025-10-05 02:05:00,577 - root - INFO - lr: 4.8061e-05 gnorm: 1.25 [ 3:30:51<21:16:42] +[titan] 2025-10-05 02:05:11,454 - root - INFO - step: 5675 loss: 2.6289 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:11,454 - root - INFO - lr: 4.8058e-05 gnorm: 1.22 [ 3:31:02<21:16:30] +[titan] 2025-10-05 02:05:22,325 - root - INFO - step: 5680 loss: 2.7071 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3216 global_avg_mtp_loss: 2.3854 +[titan] 2025-10-05 02:05:22,325 - root - INFO - lr: 4.8054e-05 gnorm: 1.24 [ 3:31:13<21:16:17] +[titan] 2025-10-05 02:05:33,190 - root - INFO - step: 5685 loss: 2.6647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3151 global_avg_mtp_loss: 2.3496 +[titan] 2025-10-05 02:05:33,190 - root - INFO - lr: 4.8051e-05 gnorm: 1.25 [ 3:31:24<21:16:04] +[titan] 2025-10-05 02:05:44,079 - root - INFO - step: 5690 loss: 2.6318 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3204 +[titan] 2025-10-05 02:05:44,079 - root - INFO - lr: 4.8047e-05 gnorm: 1.20 [ 3:31:35<21:15:51] +[titan] 2025-10-05 02:05:54,989 - root - INFO - step: 5695 loss: 2.6284 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:54,989 - root - INFO - lr: 4.8043e-05 gnorm: 1.18 [ 3:31:46<21:15:38] +[titan] 2025-10-05 02:06:03,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:06:05,856 - root - INFO - step: 5700 loss: 2.6425 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:06:05,856 - root - INFO - lr: 4.8040e-05 gnorm: 1.17 [ 3:31:57<21:15:26] +[titan] 2025-10-05 02:06:16,739 - root - INFO - step: 5705 loss: 2.6825 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3176 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:06:16,740 - root - INFO - lr: 4.8036e-05 gnorm: 1.21 [ 3:32:08<21:15:13] +[titan] 2025-10-05 02:06:27,613 - root - INFO - step: 5710 loss: 2.7487 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 02:06:27,613 - root - INFO - lr: 4.8032e-05 gnorm: 1.24 [ 3:32:18<21:15:00] +[titan] 2025-10-05 02:06:38,482 - root - INFO - step: 5715 loss: 2.6692 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3542 +[titan] 2025-10-05 02:06:38,482 - root - INFO - lr: 4.8029e-05 gnorm: 1.27 [ 3:32:29<21:14:47] +[titan] 2025-10-05 02:06:49,408 - root - INFO - step: 5720 loss: 2.6745 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3170 global_avg_mtp_loss: 2.3576 +[titan] 2025-10-05 02:06:49,408 - root - INFO - lr: 4.8025e-05 gnorm: 1.21 [ 3:32:40<21:14:34] +[titan] 2025-10-05 02:07:00,305 - root - INFO - step: 5725 loss: 2.6145 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3063 +[titan] 2025-10-05 02:07:00,305 - root - INFO - lr: 4.8021e-05 gnorm: 1.25 [ 3:32:51<21:14:22] +[titan] 2025-10-05 02:07:11,183 - root - INFO - step: 5730 loss: 2.6939 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3752 +[titan] 2025-10-05 02:07:11,183 - root - INFO - lr: 4.8018e-05 gnorm: 1.27 [ 3:33:02<21:14:09] +[titan] 2025-10-05 02:07:22,045 - root - INFO - step: 5735 loss: 2.6083 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3002 +[titan] 2025-10-05 02:07:22,046 - root - INFO - lr: 4.8014e-05 gnorm: 1.28 [ 3:33:13<21:13:56] +[titan] 2025-10-05 02:07:32,920 - root - INFO - step: 5740 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3073 global_avg_mtp_loss: 2.2892 +[titan] 2025-10-05 02:07:32,920 - root - INFO - lr: 4.8010e-05 gnorm: 1.17 [ 3:33:24<21:13:43] +[titan] 2025-10-05 02:07:43,786 - root - INFO - step: 5745 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:07:43,786 - root - INFO - lr: 4.8007e-05 gnorm: 1.24 [ 3:33:35<21:13:30] +[titan] 2025-10-05 02:07:52,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:07:54,774 - root - INFO - step: 5750 loss: 2.6142 memory: 118.84GiB(85.28%) tps: 29,821 tflops: 413.73 mfu: 41.83% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 02:07:54,774 - root - INFO - lr: 4.8003e-05 gnorm: 1.24 [ 3:33:46<21:13:18] +[titan] 2025-10-05 02:08:05,686 - root - INFO - step: 5755 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3384 +[titan] 2025-10-05 02:08:05,686 - root - INFO - lr: 4.7999e-05 gnorm: 1.20 [ 3:33:56<21:13:06] +[titan] 2025-10-05 02:08:16,606 - root - INFO - step: 5760 loss: 2.7255 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4025 +[titan] 2025-10-05 02:08:16,606 - root - INFO - lr: 4.7996e-05 gnorm: 1.24 [ 3:34:07<21:12:53] +[titan] 2025-10-05 02:08:27,488 - root - INFO - step: 5765 loss: 2.6698 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3144 global_avg_mtp_loss: 2.3553 +[titan] 2025-10-05 02:08:27,489 - root - INFO - lr: 4.7992e-05 gnorm: 1.25 [ 3:34:18<21:12:40] +[titan] 2025-10-05 02:08:38,371 - root - INFO - step: 5770 loss: 2.7107 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 02:08:38,371 - root - INFO - lr: 4.7988e-05 gnorm: 1.26 [ 3:34:29<21:12:28] +[titan] 2025-10-05 02:08:49,291 - root - INFO - step: 5775 loss: 2.7046 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.3203 global_avg_mtp_loss: 2.3843 +[titan] 2025-10-05 02:08:49,291 - root - INFO - lr: 4.7985e-05 gnorm: 1.28 [ 3:34:40<21:12:15] +[titan] 2025-10-05 02:09:00,170 - root - INFO - step: 5780 loss: 2.7717 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4437 +[titan] 2025-10-05 02:09:00,170 - root - INFO - lr: 4.7981e-05 gnorm: 1.66 [ 3:34:51<21:12:02] +[titan] 2025-10-05 02:09:11,065 - root - INFO - step: 5785 loss: 2.6598 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3451 +[titan] 2025-10-05 02:09:11,066 - root - INFO - lr: 4.7977e-05 gnorm: 1.28 [ 3:35:02<21:11:50] +[titan] 2025-10-05 02:09:21,936 - root - INFO - step: 5790 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3077 +[titan] 2025-10-05 02:09:21,936 - root - INFO - lr: 4.7973e-05 gnorm: 1.24 [ 3:35:13<21:11:37] +[titan] 2025-10-05 02:09:32,809 - root - INFO - step: 5795 loss: 2.6803 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3637 +[titan] 2025-10-05 02:09:32,809 - root - INFO - lr: 4.7970e-05 gnorm: 1.27 [ 3:35:24<21:11:24] +[titan] 2025-10-05 02:09:41,490 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:09:43,680 - root - INFO - step: 5800 loss: 2.6313 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3124 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:09:43,680 - root - INFO - lr: 4.7966e-05 gnorm: 1.25 [ 3:35:34<21:11:11] +[titan] 2025-10-05 02:09:54,628 - root - INFO - step: 5805 loss: 2.6182 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.3088 +[titan] 2025-10-05 02:09:54,629 - root - INFO - lr: 4.7962e-05 gnorm: 1.26 [ 3:35:45<21:10:59] +[titan] 2025-10-05 02:10:05,480 - root - INFO - step: 5810 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 02:10:05,481 - root - INFO - lr: 4.7959e-05 gnorm: 1.28 [ 3:35:56<21:10:46] +[titan] 2025-10-05 02:10:16,374 - root - INFO - step: 5815 loss: 2.6620 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3479 +[titan] 2025-10-05 02:10:16,374 - root - INFO - lr: 4.7955e-05 gnorm: 1.22 [ 3:36:07<21:10:33] +[titan] 2025-10-05 02:10:27,283 - root - INFO - step: 5820 loss: 2.6968 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3191 global_avg_mtp_loss: 2.3777 +[titan] 2025-10-05 02:10:27,283 - root - INFO - lr: 4.7951e-05 gnorm: 1.21 [ 3:36:18<21:10:21] +[titan] 2025-10-05 02:10:38,152 - root - INFO - step: 5825 loss: 2.6399 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3263 +[titan] 2025-10-05 02:10:38,152 - root - INFO - lr: 4.7947e-05 gnorm: 1.26 [ 3:36:29<21:10:08] +[titan] 2025-10-05 02:10:49,018 - root - INFO - step: 5830 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3158 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:10:49,018 - root - INFO - lr: 4.7944e-05 gnorm: 1.27 [ 3:36:40<21:09:55] +[titan] 2025-10-05 02:10:59,943 - root - INFO - step: 5835 loss: 2.6687 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 02:10:59,943 - root - INFO - lr: 4.7940e-05 gnorm: 1.27 [ 3:36:51<21:09:43] +[titan] 2025-10-05 02:11:10,804 - root - INFO - step: 5840 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3646 +[titan] 2025-10-05 02:11:10,804 - root - INFO - lr: 4.7936e-05 gnorm: 1.23 [ 3:37:02<21:09:30] +[titan] 2025-10-05 02:11:21,663 - root - INFO - step: 5845 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:11:21,663 - root - INFO - lr: 4.7933e-05 gnorm: 1.22 [ 3:37:12<21:09:17] +[titan] 2025-10-05 02:11:30,368 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:11:32,557 - root - INFO - step: 5850 loss: 2.5946 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2879 +[titan] 2025-10-05 02:11:32,557 - root - INFO - lr: 4.7929e-05 gnorm: 1.24 [ 3:37:23<21:09:04] +[titan] 2025-10-05 02:11:43,442 - root - INFO - step: 5855 loss: 2.6553 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3404 +[titan] 2025-10-05 02:11:43,442 - root - INFO - lr: 4.7925e-05 gnorm: 1.31 [ 3:37:34<21:08:52] +[titan] 2025-10-05 02:11:54,344 - root - INFO - step: 5860 loss: 2.6942 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3769 +[titan] 2025-10-05 02:11:54,344 - root - INFO - lr: 4.7921e-05 gnorm: 1.23 [ 3:37:45<21:08:39] +[titan] 2025-10-05 02:12:05,223 - root - INFO - step: 5865 loss: 2.5612 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3011 global_avg_mtp_loss: 2.2601 +[titan] 2025-10-05 02:12:05,223 - root - INFO - lr: 4.7918e-05 gnorm: 1.19 [ 3:37:56<21:08:26] +[titan] 2025-10-05 02:12:16,102 - root - INFO - step: 5870 loss: 2.6730 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3578 +[titan] 2025-10-05 02:12:16,102 - root - INFO - lr: 4.7914e-05 gnorm: 1.22 [ 3:38:07<21:08:14] +[titan] 2025-10-05 02:12:26,998 - root - INFO - step: 5875 loss: 2.7092 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3873 +[titan] 2025-10-05 02:12:26,998 - root - INFO - lr: 4.7910e-05 gnorm: 1.27 [ 3:38:18<21:08:01] +[titan] 2025-10-05 02:12:37,886 - root - INFO - step: 5880 loss: 2.6639 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.3140 global_avg_mtp_loss: 2.3499 +[titan] 2025-10-05 02:12:37,886 - root - INFO - lr: 4.7906e-05 gnorm: 1.23 [ 3:38:29<21:07:48] +[titan] 2025-10-05 02:12:48,782 - root - INFO - step: 5885 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3282 +[titan] 2025-10-05 02:12:48,782 - root - INFO - lr: 4.7903e-05 gnorm: 1.23 [ 3:38:40<21:07:36] +[titan] 2025-10-05 02:12:59,686 - root - INFO - step: 5890 loss: 2.6332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3225 +[titan] 2025-10-05 02:12:59,686 - root - INFO - lr: 4.7899e-05 gnorm: 1.23 [ 3:38:50<21:07:23] +[titan] 2025-10-05 02:13:10,552 - root - INFO - step: 5895 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3779 +[titan] 2025-10-05 02:13:10,553 - root - INFO - lr: 4.7895e-05 gnorm: 1.20 [ 3:39:01<21:07:10] +[titan] 2025-10-05 02:13:19,229 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:13:21,417 - root - INFO - step: 5900 loss: 2.6773 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3598 +[titan] 2025-10-05 02:13:21,418 - root - INFO - lr: 4.7891e-05 gnorm: 1.21 [ 3:39:12<21:06:58] +[titan] 2025-10-05 02:13:32,300 - root - INFO - step: 5905 loss: 2.6413 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3290 +[titan] 2025-10-05 02:13:32,300 - root - INFO - lr: 4.7888e-05 gnorm: 1.21 [ 3:39:23<21:06:45] +[titan] 2025-10-05 02:13:43,183 - root - INFO - step: 5910 loss: 2.7061 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3853 +[titan] 2025-10-05 02:13:43,184 - root - INFO - lr: 4.7884e-05 gnorm: 1.23 [ 3:39:34<21:06:32] +[titan] 2025-10-05 02:13:54,153 - root - INFO - step: 5915 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 02:13:54,153 - root - INFO - lr: 4.7880e-05 gnorm: 1.20 [ 3:39:45<21:06:20] +[titan] 2025-10-05 02:14:05,035 - root - INFO - step: 5920 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3748 +[titan] 2025-10-05 02:14:05,035 - root - INFO - lr: 4.7876e-05 gnorm: 1.23 [ 3:39:56<21:06:07] +[titan] 2025-10-05 02:14:15,930 - root - INFO - step: 5925 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 02:14:15,930 - root - INFO - lr: 4.7872e-05 gnorm: 1.31 [ 3:40:07<21:05:55] +[titan] 2025-10-05 02:14:26,810 - root - INFO - step: 5930 loss: 2.5791 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 02:14:26,810 - root - INFO - lr: 4.7869e-05 gnorm: 1.28 [ 3:40:18<21:05:42] +[titan] 2025-10-05 02:14:37,679 - root - INFO - step: 5935 loss: 2.8206 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3526 global_avg_mtp_loss: 2.4680 +[titan] 2025-10-05 02:14:37,679 - root - INFO - lr: 4.7865e-05 gnorm: 1.21 [ 3:40:28<21:05:29] +[titan] 2025-10-05 02:14:48,570 - root - INFO - step: 5940 loss: 2.6562 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3137 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:14:48,571 - root - INFO - lr: 4.7861e-05 gnorm: 1.27 [ 3:40:39<21:05:17] +[titan] 2025-10-05 02:14:59,517 - root - INFO - step: 5945 loss: 2.6955 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3188 global_avg_mtp_loss: 2.3767 +[titan] 2025-10-05 02:14:59,517 - root - INFO - lr: 4.7857e-05 gnorm: 1.24 [ 3:40:50<21:05:05] +[titan] 2025-10-05 02:15:08,209 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:15:10,403 - root - INFO - step: 5950 loss: 2.6441 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3332 +[titan] 2025-10-05 02:15:10,403 - root - INFO - lr: 4.7853e-05 gnorm: 1.24 [ 3:41:01<21:04:52] +[titan] 2025-10-05 02:15:21,261 - root - INFO - step: 5955 loss: 2.6351 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3224 +[titan] 2025-10-05 02:15:21,261 - root - INFO - lr: 4.7850e-05 gnorm: 1.27 [ 3:41:12<21:04:39] +[titan] 2025-10-05 02:15:32,145 - root - INFO - step: 5960 loss: 2.5704 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2660 +[titan] 2025-10-05 02:15:32,146 - root - INFO - lr: 4.7846e-05 gnorm: 1.24 [ 3:41:23<21:04:27] +[titan] 2025-10-05 02:15:43,038 - root - INFO - step: 5965 loss: 2.6451 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3134 global_avg_mtp_loss: 2.3317 +[titan] 2025-10-05 02:15:43,038 - root - INFO - lr: 4.7842e-05 gnorm: 1.24 [ 3:41:34<21:04:14] +[titan] 2025-10-05 02:15:53,932 - root - INFO - step: 5970 loss: 2.6446 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:15:53,932 - root - INFO - lr: 4.7838e-05 gnorm: 1.25 [ 3:41:45<21:04:01] +[titan] 2025-10-05 02:16:04,943 - root - INFO - step: 5975 loss: 2.6984 memory: 118.84GiB(85.28%) tps: 29,760 tflops: 412.88 mfu: 41.75% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 02:16:04,943 - root - INFO - lr: 4.7834e-05 gnorm: 1.22 [ 3:41:56<21:03:49] +[titan] 2025-10-05 02:16:15,864 - root - INFO - step: 5980 loss: 2.6883 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3697 +[titan] 2025-10-05 02:16:15,864 - root - INFO - lr: 4.7831e-05 gnorm: 1.23 [ 3:42:07<21:03:37] +[titan] 2025-10-05 02:16:26,743 - root - INFO - step: 5985 loss: 2.6999 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3205 global_avg_mtp_loss: 2.3795 +[titan] 2025-10-05 02:16:26,743 - root - INFO - lr: 4.7827e-05 gnorm: 1.25 [ 3:42:18<21:03:24] +[titan] 2025-10-05 02:16:37,616 - root - INFO - step: 5990 loss: 2.6514 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3368 +[titan] 2025-10-05 02:16:37,616 - root - INFO - lr: 4.7823e-05 gnorm: 1.24 [ 3:42:28<21:03:12] +[titan] 2025-10-05 02:16:48,504 - root - INFO - step: 5995 loss: 2.6633 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3143 global_avg_mtp_loss: 2.3490 +[titan] 2025-10-05 02:16:48,504 - root - INFO - lr: 4.7819e-05 gnorm: 1.24 [ 3:42:39<21:02:59] +[titan] 2025-10-05 02:16:57,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:16:59,424 - root - INFO - step: 6000 loss: 2.7331 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4096 +[titan] 2025-10-05 02:16:59,424 - root - INFO - lr: 4.7815e-05 gnorm: 1.20 [ 3:42:50<21:02:47] +[titan] 2025-10-05 02:17:10,295 - root - INFO - step: 6005 loss: 2.6202 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3105 +[titan] 2025-10-05 02:17:10,295 - root - INFO - lr: 4.7811e-05 gnorm: 1.18 [ 3:43:01<21:02:34] +[titan] 2025-10-05 02:17:21,201 - root - INFO - step: 6010 loss: 2.5634 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2608 +[titan] 2025-10-05 02:17:21,201 - root - INFO - lr: 4.7808e-05 gnorm: 1.22 [ 3:43:12<21:02:22] +[titan] 2025-10-05 02:17:32,082 - root - INFO - step: 6015 loss: 2.6412 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3292 +[titan] 2025-10-05 02:17:32,082 - root - INFO - lr: 4.7804e-05 gnorm: 1.19 [ 3:43:23<21:02:09] +[titan] 2025-10-05 02:17:42,964 - root - INFO - step: 6020 loss: 2.7137 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3909 +[titan] 2025-10-05 02:17:42,964 - root - INFO - lr: 4.7800e-05 gnorm: 1.23 [ 3:43:34<21:01:56] +[titan] 2025-10-05 02:17:53,873 - root - INFO - step: 6025 loss: 2.6409 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3232 +[titan] 2025-10-05 02:17:53,873 - root - INFO - lr: 4.7796e-05 gnorm: 1.20 [ 3:43:45<21:01:44] +[titan] 2025-10-05 02:18:04,793 - root - INFO - step: 6030 loss: 2.6673 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3165 global_avg_mtp_loss: 2.3508 +[titan] 2025-10-05 02:18:04,793 - root - INFO - lr: 4.7792e-05 gnorm: 1.27 [ 3:43:56<21:01:32] +[titan] 2025-10-05 02:18:15,648 - root - INFO - step: 6035 loss: 2.5627 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2603 +[titan] 2025-10-05 02:18:15,648 - root - INFO - lr: 4.7788e-05 gnorm: 1.20 [ 3:44:06<21:01:19] +[titan] 2025-10-05 02:18:26,520 - root - INFO - step: 6040 loss: 2.6300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:18:26,520 - root - INFO - lr: 4.7784e-05 gnorm: 1.19 [ 3:44:17<21:01:06] +[titan] 2025-10-05 02:18:37,421 - root - INFO - step: 6045 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2925 +[titan] 2025-10-05 02:18:37,421 - root - INFO - lr: 4.7781e-05 gnorm: 1.23 [ 3:44:28<21:00:54] +[titan] 2025-10-05 02:18:46,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:18:48,292 - root - INFO - step: 6050 loss: 2.6234 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3122 +[titan] 2025-10-05 02:18:48,292 - root - INFO - lr: 4.7777e-05 gnorm: 1.22 [ 3:44:39<21:00:41] +[titan] 2025-10-05 02:18:59,214 - root - INFO - step: 6055 loss: 2.7909 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3399 global_avg_mtp_loss: 2.4510 +[titan] 2025-10-05 02:18:59,214 - root - INFO - lr: 4.7773e-05 gnorm: 1.28 [ 3:44:50<21:00:29] +[titan] 2025-10-05 02:19:10,081 - root - INFO - step: 6060 loss: 2.7169 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 02:19:10,081 - root - INFO - lr: 4.7769e-05 gnorm: 1.19 [ 3:45:01<21:00:16] +[titan] 2025-10-05 02:19:20,960 - root - INFO - step: 6065 loss: 2.5899 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3057 global_avg_mtp_loss: 2.2843 +[titan] 2025-10-05 02:19:20,960 - root - INFO - lr: 4.7765e-05 gnorm: 1.20 [ 3:45:12<21:00:03] +[titan] 2025-10-05 02:19:31,815 - root - INFO - step: 6070 loss: 2.5974 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2900 +[titan] 2025-10-05 02:19:31,815 - root - INFO - lr: 4.7761e-05 gnorm: 1.19 [ 3:45:23<20:59:51] +[titan] 2025-10-05 02:19:42,704 - root - INFO - step: 6075 loss: 2.5388 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2387 +[titan] 2025-10-05 02:19:42,704 - root - INFO - lr: 4.7757e-05 gnorm: 1.24 [ 3:45:33<20:59:38] +[titan] 2025-10-05 02:19:53,571 - root - INFO - step: 6080 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2553 +[titan] 2025-10-05 02:19:53,571 - root - INFO - lr: 4.7753e-05 gnorm: 1.24 [ 3:45:44<20:59:25] +[titan] 2025-10-05 02:20:04,484 - root - INFO - step: 6085 loss: 2.6574 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 02:20:04,484 - root - INFO - lr: 4.7750e-05 gnorm: 1.25 [ 3:45:55<20:59:13] +[titan] 2025-10-05 02:20:15,352 - root - INFO - step: 6090 loss: 2.6004 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2933 +[titan] 2025-10-05 02:20:15,352 - root - INFO - lr: 4.7746e-05 gnorm: 1.29 [ 3:46:06<20:59:00] +[titan] 2025-10-05 02:20:26,230 - root - INFO - step: 6095 loss: 2.6515 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:20:26,230 - root - INFO - lr: 4.7742e-05 gnorm: 1.25 [ 3:46:17<20:58:48] +[titan] 2025-10-05 02:20:34,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:20:37,079 - root - INFO - step: 6100 loss: 2.6900 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3703 +[titan] 2025-10-05 02:20:37,079 - root - INFO - lr: 4.7738e-05 gnorm: 1.19 [ 3:46:28<20:58:35] +[titan] 2025-10-05 02:20:47,995 - root - INFO - step: 6105 loss: 2.7058 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3851 +[titan] 2025-10-05 02:20:47,995 - root - INFO - lr: 4.7734e-05 gnorm: 1.26 [ 3:46:39<20:58:23] +[titan] 2025-10-05 02:20:58,928 - root - INFO - step: 6110 loss: 2.6693 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3537 +[titan] 2025-10-05 02:20:58,928 - root - INFO - lr: 4.7730e-05 gnorm: 1.27 [ 3:46:50<20:58:10] +[titan] 2025-10-05 02:21:09,804 - root - INFO - step: 6115 loss: 2.5456 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:21:09,804 - root - INFO - lr: 4.7726e-05 gnorm: 1.13 [ 3:47:01<20:57:58] +[titan] 2025-10-05 02:21:20,686 - root - INFO - step: 6120 loss: 2.6377 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3244 +[titan] 2025-10-05 02:21:20,686 - root - INFO - lr: 4.7722e-05 gnorm: 1.17 [ 3:47:11<20:57:45] +[titan] 2025-10-05 02:21:31,544 - root - INFO - step: 6125 loss: 2.5803 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:21:31,544 - root - INFO - lr: 4.7718e-05 gnorm: 1.19 [ 3:47:22<20:57:33] +[titan] 2025-10-05 02:21:42,406 - root - INFO - step: 6130 loss: 2.6986 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3790 +[titan] 2025-10-05 02:21:42,406 - root - INFO - lr: 4.7714e-05 gnorm: 1.30 [ 3:47:33<20:57:20] +[titan] 2025-10-05 02:21:53,244 - root - INFO - step: 6135 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:21:53,244 - root - INFO - lr: 4.7710e-05 gnorm: 1.24 [ 3:47:44<20:57:07] +[titan] 2025-10-05 02:22:04,175 - root - INFO - step: 6140 loss: 2.5814 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3040 global_avg_mtp_loss: 2.2775 +[titan] 2025-10-05 02:22:04,175 - root - INFO - lr: 4.7707e-05 gnorm: 1.23 [ 3:47:55<20:56:55] +[titan] 2025-10-05 02:22:13,114 - root - INFO - Dumping profiler traces at step 6144 +[titan] 2025-10-05 02:22:13,155 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:22:15,343 - root - INFO - step: 6145 loss: 2.6735 memory: 118.84GiB(85.28%) tps: 29,341 tflops: 407.07 mfu: 41.16% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3580 +[titan] 2025-10-05 02:22:15,343 - root - INFO - lr: 4.7703e-05 gnorm: 1.26 [ 3:48:06<20:56:44] +[titan] 2025-10-05 02:22:24,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:22:26,217 - root - INFO - step: 6150 loss: 2.6490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3135 global_avg_mtp_loss: 2.3355 +[titan] 2025-10-05 02:22:26,217 - root - INFO - lr: 4.7699e-05 gnorm: 1.24 [ 3:48:17<20:56:31] +[titan] 2025-10-05 02:22:37,096 - root - INFO - step: 6155 loss: 2.6463 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3340 +[titan] 2025-10-05 02:22:37,096 - root - INFO - lr: 4.7695e-05 gnorm: 1.18 [ 3:48:28<20:56:19] +[titan] 2025-10-05 02:22:47,962 - root - INFO - step: 6160 loss: 2.6975 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:22:47,962 - root - INFO - lr: 4.7691e-05 gnorm: 1.25 [ 3:48:39<20:56:06] +[titan] 2025-10-05 02:22:58,842 - root - INFO - step: 6165 loss: 2.6719 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 02:22:58,842 - root - INFO - lr: 4.7687e-05 gnorm: 1.27 [ 3:48:50<20:55:54] +[titan] 2025-10-05 02:23:09,781 - root - INFO - step: 6170 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3183 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:23:09,781 - root - INFO - lr: 4.7683e-05 gnorm: 1.18 [ 3:49:01<20:55:41] +[titan] 2025-10-05 02:23:20,657 - root - INFO - step: 6175 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2762 +[titan] 2025-10-05 02:23:20,657 - root - INFO - lr: 4.7679e-05 gnorm: 1.25 [ 3:49:11<20:55:29] +[titan] 2025-10-05 02:23:31,536 - root - INFO - step: 6180 loss: 2.6338 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3119 global_avg_mtp_loss: 2.3219 +[titan] 2025-10-05 02:23:31,536 - root - INFO - lr: 4.7675e-05 gnorm: 1.21 [ 3:49:22<20:55:16] +[titan] 2025-10-05 02:23:42,416 - root - INFO - step: 6185 loss: 2.6751 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3584 +[titan] 2025-10-05 02:23:42,416 - root - INFO - lr: 4.7671e-05 gnorm: 1.23 [ 3:49:33<20:55:04] +[titan] 2025-10-05 02:23:53,282 - root - INFO - step: 6190 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:23:53,282 - root - INFO - lr: 4.7667e-05 gnorm: 1.94 [ 3:49:44<20:54:51] +[titan] 2025-10-05 02:24:04,176 - root - INFO - step: 6195 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.3001 +[titan] 2025-10-05 02:24:04,177 - root - INFO - lr: 4.7663e-05 gnorm: 1.30 [ 3:49:55<20:54:39] +[titan] 2025-10-05 02:24:12,861 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:24:15,046 - root - INFO - step: 6200 loss: 2.6013 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2936 +[titan] 2025-10-05 02:24:15,047 - root - INFO - lr: 4.7659e-05 gnorm: 1.22 [ 3:50:06<20:54:26] +[titan] 2025-10-05 02:24:25,976 - root - INFO - step: 6205 loss: 2.6406 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:24:25,976 - root - INFO - lr: 4.7655e-05 gnorm: 1.21 [ 3:50:17<20:54:14] +[titan] 2025-10-05 02:24:36,842 - root - INFO - step: 6210 loss: 2.5418 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 02:24:36,842 - root - INFO - lr: 4.7651e-05 gnorm: 1.17 [ 3:50:28<20:54:01] +[titan] 2025-10-05 02:24:47,725 - root - INFO - step: 6215 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2744 +[titan] 2025-10-05 02:24:47,725 - root - INFO - lr: 4.7647e-05 gnorm: 1.20 [ 3:50:38<20:53:49] +[titan] 2025-10-05 02:24:58,595 - root - INFO - step: 6220 loss: 2.6116 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 02:24:58,595 - root - INFO - lr: 4.7643e-05 gnorm: 1.26 [ 3:50:49<20:53:36] +[titan] 2025-10-05 02:25:09,462 - root - INFO - step: 6225 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3158 +[titan] 2025-10-05 02:25:09,462 - root - INFO - lr: 4.7639e-05 gnorm: 1.26 [ 3:51:00<20:53:24] +[titan] 2025-10-05 02:25:20,338 - root - INFO - step: 6230 loss: 2.6316 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3207 +[titan] 2025-10-05 02:25:20,338 - root - INFO - lr: 4.7635e-05 gnorm: 1.26 [ 3:51:11<20:53:11] +[titan] 2025-10-05 02:25:31,243 - root - INFO - step: 6235 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:25:31,243 - root - INFO - lr: 4.7631e-05 gnorm: 1.24 [ 3:51:22<20:52:59] +[titan] 2025-10-05 02:25:42,123 - root - INFO - step: 6240 loss: 2.6737 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3161 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:25:42,123 - root - INFO - lr: 4.7627e-05 gnorm: 1.21 [ 3:51:33<20:52:46] +[titan] 2025-10-05 02:25:53,008 - root - INFO - step: 6245 loss: 2.6264 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3138 +[titan] 2025-10-05 02:25:53,008 - root - INFO - lr: 4.7623e-05 gnorm: 1.21 [ 3:51:44<20:52:34] +[titan] 2025-10-05 02:26:01,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:26:03,944 - root - INFO - step: 6250 loss: 2.6166 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 02:26:03,944 - root - INFO - lr: 4.7619e-05 gnorm: 1.24 [ 3:51:55<20:52:21] +[titan] 2025-10-05 02:26:14,837 - root - INFO - step: 6255 loss: 2.5876 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2818 +[titan] 2025-10-05 02:26:14,837 - root - INFO - lr: 4.7615e-05 gnorm: 1.18 [ 3:52:06<20:52:09] +[titan] 2025-10-05 02:26:25,726 - root - INFO - step: 6260 loss: 2.7070 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3866 +[titan] 2025-10-05 02:26:25,726 - root - INFO - lr: 4.7611e-05 gnorm: 1.20 [ 3:52:16<20:51:57] +[titan] 2025-10-05 02:26:36,613 - root - INFO - step: 6265 loss: 2.6830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3168 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:26:36,614 - root - INFO - lr: 4.7607e-05 gnorm: 1.22 [ 3:52:27<20:51:44] +[titan] 2025-10-05 02:26:47,499 - root - INFO - step: 6270 loss: 2.4995 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2939 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 02:26:47,499 - root - INFO - lr: 4.7603e-05 gnorm: 1.22 [ 3:52:38<20:51:32] +[titan] 2025-10-05 02:26:58,361 - root - INFO - step: 6275 loss: 2.5337 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2345 +[titan] 2025-10-05 02:26:58,361 - root - INFO - lr: 4.7599e-05 gnorm: 1.19 [ 3:52:49<20:51:19] +[titan] 2025-10-05 02:27:09,255 - root - INFO - step: 6280 loss: 2.5465 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 02:27:09,255 - root - INFO - lr: 4.7595e-05 gnorm: 1.18 [ 3:53:00<20:51:07] +[titan] 2025-10-05 02:27:20,123 - root - INFO - step: 6285 loss: 2.6725 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3560 +[titan] 2025-10-05 02:27:20,123 - root - INFO - lr: 4.7591e-05 gnorm: 1.25 [ 3:53:11<20:50:54] +[titan] 2025-10-05 02:27:30,985 - root - INFO - step: 6290 loss: 2.6086 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.2999 +[titan] 2025-10-05 02:27:30,985 - root - INFO - lr: 4.7587e-05 gnorm: 1.20 [ 3:53:22<20:50:42] +[titan] 2025-10-05 02:27:41,851 - root - INFO - step: 6295 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:27:41,851 - root - INFO - lr: 4.7583e-05 gnorm: 1.19 [ 3:53:33<20:50:29] +[titan] 2025-10-05 02:27:50,574 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:27:52,765 - root - INFO - step: 6300 loss: 2.6057 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2983 +[titan] 2025-10-05 02:27:52,765 - root - INFO - lr: 4.7579e-05 gnorm: 1.25 [ 3:53:44<20:50:17] +[titan] 2025-10-05 02:28:03,660 - root - INFO - step: 6305 loss: 2.6038 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3067 global_avg_mtp_loss: 2.2971 +[titan] 2025-10-05 02:28:03,661 - root - INFO - lr: 4.7575e-05 gnorm: 1.34 [ 3:53:54<20:50:04] +[titan] 2025-10-05 02:28:14,554 - root - INFO - step: 6310 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3056 +[titan] 2025-10-05 02:28:14,554 - root - INFO - lr: 4.7571e-05 gnorm: 1.26 [ 3:54:05<20:49:52] +[titan] 2025-10-05 02:28:25,460 - root - INFO - step: 6315 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 02:28:25,460 - root - INFO - lr: 4.7567e-05 gnorm: 1.30 [ 3:54:16<20:49:40] +[titan] 2025-10-05 02:28:36,327 - root - INFO - step: 6320 loss: 2.6294 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3180 +[titan] 2025-10-05 02:28:36,327 - root - INFO - lr: 4.7563e-05 gnorm: 1.20 [ 3:54:27<20:49:27] +[titan] 2025-10-05 02:28:47,212 - root - INFO - step: 6325 loss: 2.5971 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:28:47,212 - root - INFO - lr: 4.7559e-05 gnorm: 1.24 [ 3:54:38<20:49:15] +[titan] 2025-10-05 02:28:58,148 - root - INFO - step: 6330 loss: 2.5947 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2896 +[titan] 2025-10-05 02:28:58,148 - root - INFO - lr: 4.7555e-05 gnorm: 1.17 [ 3:54:49<20:49:03] +[titan] 2025-10-05 02:29:09,045 - root - INFO - step: 6335 loss: 2.6560 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3421 +[titan] 2025-10-05 02:29:09,045 - root - INFO - lr: 4.7551e-05 gnorm: 1.23 [ 3:55:00<20:48:50] +[titan] 2025-10-05 02:29:19,929 - root - INFO - step: 6340 loss: 2.5919 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2841 +[titan] 2025-10-05 02:29:19,929 - root - INFO - lr: 4.7547e-05 gnorm: 1.21 [ 3:55:11<20:48:38] +[titan] 2025-10-05 02:29:30,803 - root - INFO - step: 6345 loss: 2.6337 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3228 +[titan] 2025-10-05 02:29:30,803 - root - INFO - lr: 4.7543e-05 gnorm: 1.20 [ 3:55:22<20:48:25] +[titan] 2025-10-05 02:29:39,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:29:41,698 - root - INFO - step: 6350 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.2911 +[titan] 2025-10-05 02:29:41,698 - root - INFO - lr: 4.7539e-05 gnorm: 1.21 [ 3:55:32<20:48:13] +[titan] 2025-10-05 02:29:52,582 - root - INFO - step: 6355 loss: 2.5766 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2722 +[titan] 2025-10-05 02:29:52,582 - root - INFO - lr: 4.7535e-05 gnorm: 1.31 [ 3:55:43<20:48:00] +[titan] 2025-10-05 02:30:03,454 - root - INFO - step: 6360 loss: 2.6402 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3288 +[titan] 2025-10-05 02:30:03,454 - root - INFO - lr: 4.7531e-05 gnorm: 1.19 [ 3:55:54<20:47:48] +[titan] 2025-10-05 02:30:14,404 - root - INFO - step: 6365 loss: 2.5756 memory: 118.84GiB(85.28%) tps: 29,925 tflops: 415.16 mfu: 41.98% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2718 +[titan] 2025-10-05 02:30:14,405 - root - INFO - lr: 4.7527e-05 gnorm: 1.21 [ 3:56:05<20:47:36] +[titan] 2025-10-05 02:30:25,300 - root - INFO - step: 6370 loss: 2.6721 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3566 +[titan] 2025-10-05 02:30:25,301 - root - INFO - lr: 4.7523e-05 gnorm: 1.26 [ 3:56:16<20:47:24] +[titan] 2025-10-05 02:30:36,188 - root - INFO - step: 6375 loss: 2.6701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3548 +[titan] 2025-10-05 02:30:36,189 - root - INFO - lr: 4.7519e-05 gnorm: 1.26 [ 3:56:27<20:47:11] +[titan] 2025-10-05 02:30:47,063 - root - INFO - step: 6380 loss: 2.6577 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:30:47,063 - root - INFO - lr: 4.7514e-05 gnorm: 1.19 [ 3:56:38<20:46:59] +[titan] 2025-10-05 02:30:57,930 - root - INFO - step: 6385 loss: 2.5739 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:30:57,930 - root - INFO - lr: 4.7510e-05 gnorm: 1.20 [ 3:56:49<20:46:46] +[titan] 2025-10-05 02:31:08,797 - root - INFO - step: 6390 loss: 2.6461 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3331 +[titan] 2025-10-05 02:31:08,797 - root - INFO - lr: 4.7506e-05 gnorm: 1.18 [ 3:57:00<20:46:34] +[titan] 2025-10-05 02:31:19,713 - root - INFO - step: 6395 loss: 2.6359 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3246 +[titan] 2025-10-05 02:31:19,713 - root - INFO - lr: 4.7502e-05 gnorm: 1.18 [ 3:57:10<20:46:21] +[titan] 2025-10-05 02:31:28,409 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:31:30,590 - root - INFO - step: 6400 loss: 2.6427 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.3304 +[titan] 2025-10-05 02:31:30,590 - root - INFO - lr: 4.7498e-05 gnorm: 1.20 [ 3:57:21<20:46:09] +[titan] 2025-10-05 02:31:41,458 - root - INFO - step: 6405 loss: 2.5702 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:31:41,458 - root - INFO - lr: 4.7494e-05 gnorm: 1.26 [ 3:57:32<20:45:57] +[titan] 2025-10-05 02:31:52,328 - root - INFO - step: 6410 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2830 +[titan] 2025-10-05 02:31:52,329 - root - INFO - lr: 4.7490e-05 gnorm: 1.28 [ 3:57:43<20:45:44] +[titan] 2025-10-05 02:32:03,197 - root - INFO - step: 6415 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:32:03,197 - root - INFO - lr: 4.7486e-05 gnorm: 1.24 [ 3:57:54<20:45:32] +[titan] 2025-10-05 02:32:14,082 - root - INFO - step: 6420 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2355 +[titan] 2025-10-05 02:32:14,082 - root - INFO - lr: 4.7482e-05 gnorm: 1.22 [ 3:58:05<20:45:19] +[titan] 2025-10-05 02:32:25,006 - root - INFO - step: 6425 loss: 2.6729 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:32:25,007 - root - INFO - lr: 4.7478e-05 gnorm: 1.28 [ 3:58:16<20:45:07] +[titan] 2025-10-05 02:32:35,882 - root - INFO - step: 6430 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2763 +[titan] 2025-10-05 02:32:35,883 - root - INFO - lr: 4.7474e-05 gnorm: 1.22 [ 3:58:27<20:44:55] +[titan] 2025-10-05 02:32:46,767 - root - INFO - step: 6435 loss: 2.5922 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2859 +[titan] 2025-10-05 02:32:46,767 - root - INFO - lr: 4.7469e-05 gnorm: 1.22 [ 3:58:37<20:44:42] +[titan] 2025-10-05 02:32:57,635 - root - INFO - step: 6440 loss: 2.5566 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2552 +[titan] 2025-10-05 02:32:57,635 - root - INFO - lr: 4.7465e-05 gnorm: 1.19 [ 3:58:48<20:44:30] +[titan] 2025-10-05 02:33:08,509 - root - INFO - step: 6445 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3471 +[titan] 2025-10-05 02:33:08,509 - root - INFO - lr: 4.7461e-05 gnorm: 1.18 [ 3:58:59<20:44:17] +[titan] 2025-10-05 02:33:17,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:33:19,369 - root - INFO - step: 6450 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2865 +[titan] 2025-10-05 02:33:19,369 - root - INFO - lr: 4.7457e-05 gnorm: 1.22 [ 3:59:10<20:44:05] +[titan] 2025-10-05 02:33:30,229 - root - INFO - step: 6455 loss: 2.6465 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3333 +[titan] 2025-10-05 02:33:30,229 - root - INFO - lr: 4.7453e-05 gnorm: 1.20 [ 3:59:21<20:43:52] +[titan] 2025-10-05 02:33:41,125 - root - INFO - step: 6460 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2613 +[titan] 2025-10-05 02:33:41,125 - root - INFO - lr: 4.7449e-05 gnorm: 1.21 [ 3:59:32<20:43:40] +[titan] 2025-10-05 02:33:51,972 - root - INFO - step: 6465 loss: 2.6340 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.3110 global_avg_mtp_loss: 2.3230 +[titan] 2025-10-05 02:33:51,972 - root - INFO - lr: 4.7445e-05 gnorm: 1.25 [ 3:59:43<20:43:27] +[titan] 2025-10-05 02:34:02,829 - root - INFO - step: 6470 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:34:02,830 - root - INFO - lr: 4.7441e-05 gnorm: 1.22 [ 3:59:54<20:43:15] +[titan] 2025-10-05 02:34:13,713 - root - INFO - step: 6475 loss: 2.6622 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3477 +[titan] 2025-10-05 02:34:13,713 - root - INFO - lr: 4.7436e-05 gnorm: 1.22 [ 4:00:04<20:43:03] +[titan] 2025-10-05 02:34:24,581 - root - INFO - step: 6480 loss: 2.5985 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2920 +[titan] 2025-10-05 02:34:24,581 - root - INFO - lr: 4.7432e-05 gnorm: 1.22 [ 4:00:15<20:42:50] +[titan] 2025-10-05 02:34:35,430 - root - INFO - step: 6485 loss: 2.5699 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2657 +[titan] 2025-10-05 02:34:35,430 - root - INFO - lr: 4.7428e-05 gnorm: 1.26 [ 4:00:26<20:42:38] +[titan] 2025-10-05 02:34:46,317 - root - INFO - step: 6490 loss: 2.5393 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:34:46,318 - root - INFO - lr: 4.7424e-05 gnorm: 1.22 [ 4:00:37<20:42:25] +[titan] 2025-10-05 02:34:57,192 - root - INFO - step: 6495 loss: 2.6369 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3116 global_avg_mtp_loss: 2.3253 +[titan] 2025-10-05 02:34:57,193 - root - INFO - lr: 4.7420e-05 gnorm: 1.23 [ 4:00:48<20:42:13] +[titan] 2025-10-05 02:35:05,878 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:35:08,068 - root - INFO - step: 6500 loss: 2.5435 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3003 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 02:35:08,068 - root - INFO - lr: 4.7416e-05 gnorm: 1.25 [ 4:00:59<20:42:00] +[titan] 2025-10-05 02:35:18,953 - root - INFO - step: 6505 loss: 2.6050 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2979 +[titan] 2025-10-05 02:35:18,953 - root - INFO - lr: 4.7412e-05 gnorm: 1.26 [ 4:01:10<20:41:48] +[titan] 2025-10-05 02:35:29,825 - root - INFO - step: 6510 loss: 2.5818 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2782 +[titan] 2025-10-05 02:35:29,825 - root - INFO - lr: 4.7407e-05 gnorm: 1.19 [ 4:01:21<20:41:36] +[titan] 2025-10-05 02:35:40,705 - root - INFO - step: 6515 loss: 2.5167 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2195 +[titan] 2025-10-05 02:35:40,705 - root - INFO - lr: 4.7403e-05 gnorm: 1.18 [ 4:01:31<20:41:23] +[titan] 2025-10-05 02:35:51,579 - root - INFO - step: 6520 loss: 2.6889 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3704 +[titan] 2025-10-05 02:35:51,580 - root - INFO - lr: 4.7399e-05 gnorm: 1.25 [ 4:01:42<20:41:11] +[titan] 2025-10-05 02:36:02,521 - root - INFO - step: 6525 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3046 global_avg_mtp_loss: 2.2801 +[titan] 2025-10-05 02:36:02,521 - root - INFO - lr: 4.7395e-05 gnorm: 1.21 [ 4:01:53<20:40:59] +[titan] 2025-10-05 02:36:13,407 - root - INFO - step: 6530 loss: 2.5064 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2103 +[titan] 2025-10-05 02:36:13,407 - root - INFO - lr: 4.7391e-05 gnorm: 1.17 [ 4:02:04<20:40:47] +[titan] 2025-10-05 02:36:24,288 - root - INFO - step: 6535 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:36:24,289 - root - INFO - lr: 4.7387e-05 gnorm: 1.26 [ 4:02:15<20:40:34] +[titan] 2025-10-05 02:36:35,150 - root - INFO - step: 6540 loss: 2.6944 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 02:36:35,150 - root - INFO - lr: 4.7382e-05 gnorm: 1.22 [ 4:02:26<20:40:22] +[titan] 2025-10-05 02:36:46,030 - root - INFO - step: 6545 loss: 2.5975 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2909 +[titan] 2025-10-05 02:36:46,030 - root - INFO - lr: 4.7378e-05 gnorm: 1.25 [ 4:02:37<20:40:09] +[titan] 2025-10-05 02:36:54,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:36:56,903 - root - INFO - step: 6550 loss: 2.5802 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2749 +[titan] 2025-10-05 02:36:56,904 - root - INFO - lr: 4.7374e-05 gnorm: 1.18 [ 4:02:48<20:39:57] +[titan] 2025-10-05 02:37:07,790 - root - INFO - step: 6555 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 02:37:07,790 - root - INFO - lr: 4.7370e-05 gnorm: 1.23 [ 4:02:59<20:39:45] +[titan] 2025-10-05 02:37:18,673 - root - INFO - step: 6560 loss: 2.6310 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3210 +[titan] 2025-10-05 02:37:18,673 - root - INFO - lr: 4.7366e-05 gnorm: 1.22 [ 4:03:09<20:39:32] +[titan] 2025-10-05 02:37:29,519 - root - INFO - step: 6565 loss: 2.6348 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3242 +[titan] 2025-10-05 02:37:29,520 - root - INFO - lr: 4.7361e-05 gnorm: 1.24 [ 4:03:20<20:39:20] +[titan] 2025-10-05 02:37:40,400 - root - INFO - step: 6570 loss: 2.5419 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2417 +[titan] 2025-10-05 02:37:40,400 - root - INFO - lr: 4.7357e-05 gnorm: 1.19 [ 4:03:31<20:39:07] +[titan] 2025-10-05 02:37:51,268 - root - INFO - step: 6575 loss: 2.5865 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2812 +[titan] 2025-10-05 02:37:51,269 - root - INFO - lr: 4.7353e-05 gnorm: 1.28 [ 4:03:42<20:38:55] +[titan] 2025-10-05 02:38:02,128 - root - INFO - step: 6580 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:38:02,128 - root - INFO - lr: 4.7349e-05 gnorm: 1.20 [ 4:03:53<20:38:43] +[titan] 2025-10-05 02:38:13,063 - root - INFO - step: 6585 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3373 +[titan] 2025-10-05 02:38:13,063 - root - INFO - lr: 4.7345e-05 gnorm: 1.24 [ 4:04:04<20:38:31] +[titan] 2025-10-05 02:38:23,973 - root - INFO - step: 6590 loss: 2.6349 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3243 +[titan] 2025-10-05 02:38:23,973 - root - INFO - lr: 4.7340e-05 gnorm: 1.19 [ 4:04:15<20:38:18] +[titan] 2025-10-05 02:38:34,826 - root - INFO - step: 6595 loss: 2.7415 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4095 +[titan] 2025-10-05 02:38:34,826 - root - INFO - lr: 4.7336e-05 gnorm: 1.21 [ 4:04:26<20:38:06] +[titan] 2025-10-05 02:38:43,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:38:45,682 - root - INFO - step: 6600 loss: 2.5758 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2716 +[titan] 2025-10-05 02:38:45,682 - root - INFO - lr: 4.7332e-05 gnorm: 1.18 [ 4:04:36<20:37:53] +[titan] 2025-10-05 02:38:56,550 - root - INFO - step: 6605 loss: 2.5294 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2310 +[titan] 2025-10-05 02:38:56,550 - root - INFO - lr: 4.7328e-05 gnorm: 1.19 [ 4:04:47<20:37:41] +[titan] 2025-10-05 02:39:07,416 - root - INFO - step: 6610 loss: 2.5451 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:39:07,416 - root - INFO - lr: 4.7324e-05 gnorm: 1.20 [ 4:04:58<20:37:29] +[titan] 2025-10-05 02:39:18,347 - root - INFO - step: 6615 loss: 2.7044 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3846 +[titan] 2025-10-05 02:39:18,347 - root - INFO - lr: 4.7319e-05 gnorm: 1.23 [ 4:05:09<20:37:17] +[titan] 2025-10-05 02:39:29,249 - root - INFO - step: 6620 loss: 2.5846 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3048 global_avg_mtp_loss: 2.2799 +[titan] 2025-10-05 02:39:29,249 - root - INFO - lr: 4.7315e-05 gnorm: 1.16 [ 4:05:20<20:37:04] +[titan] 2025-10-05 02:39:40,113 - root - INFO - step: 6625 loss: 2.6491 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3371 +[titan] 2025-10-05 02:39:40,113 - root - INFO - lr: 4.7311e-05 gnorm: 1.27 [ 4:05:31<20:36:52] +[titan] 2025-10-05 02:39:50,990 - root - INFO - step: 6630 loss: 2.5891 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:39:50,990 - root - INFO - lr: 4.7307e-05 gnorm: 1.21 [ 4:05:42<20:36:40] +[titan] 2025-10-05 02:40:01,853 - root - INFO - step: 6635 loss: 2.6888 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3719 +[titan] 2025-10-05 02:40:01,853 - root - INFO - lr: 4.7302e-05 gnorm: 1.21 [ 4:05:53<20:36:27] +[titan] 2025-10-05 02:40:12,718 - root - INFO - step: 6640 loss: 2.5610 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2587 +[titan] 2025-10-05 02:40:12,718 - root - INFO - lr: 4.7298e-05 gnorm: 1.19 [ 4:06:03<20:36:15] +[titan] 2025-10-05 02:40:23,628 - root - INFO - step: 6645 loss: 2.5680 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2644 +[titan] 2025-10-05 02:40:23,628 - root - INFO - lr: 4.7294e-05 gnorm: 1.27 [ 4:06:14<20:36:03] +[titan] 2025-10-05 02:40:32,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:40:34,565 - root - INFO - step: 6650 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3119 +[titan] 2025-10-05 02:40:34,566 - root - INFO - lr: 4.7290e-05 gnorm: 1.21 [ 4:06:25<20:35:51] +[titan] 2025-10-05 02:40:45,524 - root - INFO - step: 6655 loss: 2.6619 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:40:45,524 - root - INFO - lr: 4.7285e-05 gnorm: 1.16 [ 4:06:36<20:35:39] +[titan] 2025-10-05 02:40:47,878 - root - INFO - Dumping profiler traces at step 6656 +[titan] 2025-10-05 02:40:47,915 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:40:56,627 - root - INFO - step: 6660 loss: 2.5796 memory: 118.84GiB(85.28%) tps: 29,513 tflops: 409.44 mfu: 41.40% global_avg_ntp_loss: 0.3032 global_avg_mtp_loss: 2.2764 +[titan] 2025-10-05 02:40:56,627 - root - INFO - lr: 4.7281e-05 gnorm: 1.18 [ 4:06:47<20:35:28] +[titan] 2025-10-05 02:41:07,500 - root - INFO - step: 6665 loss: 2.5859 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2808 +[titan] 2025-10-05 02:41:07,500 - root - INFO - lr: 4.7277e-05 gnorm: 1.21 [ 4:06:58<20:35:15] +[titan] 2025-10-05 02:41:18,462 - root - INFO - step: 6670 loss: 2.5619 memory: 118.84GiB(85.28%) tps: 29,894 tflops: 414.73 mfu: 41.93% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:41:18,462 - root - INFO - lr: 4.7273e-05 gnorm: 1.20 [ 4:07:09<20:35:03] +[titan] 2025-10-05 02:41:29,324 - root - INFO - step: 6675 loss: 2.4816 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.1887 +[titan] 2025-10-05 02:41:29,325 - root - INFO - lr: 4.7268e-05 gnorm: 1.20 [ 4:07:20<20:34:51] +[titan] 2025-10-05 02:41:40,209 - root - INFO - step: 6680 loss: 2.6410 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3279 +[titan] 2025-10-05 02:41:40,209 - root - INFO - lr: 4.7264e-05 gnorm: 1.26 [ 4:07:31<20:34:39] +[titan] 2025-10-05 02:41:51,110 - root - INFO - step: 6685 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3116 +[titan] 2025-10-05 02:41:51,110 - root - INFO - lr: 4.7260e-05 gnorm: 1.25 [ 4:07:42<20:34:26] +[titan] 2025-10-05 02:42:01,973 - root - INFO - step: 6690 loss: 2.6096 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3014 +[titan] 2025-10-05 02:42:01,974 - root - INFO - lr: 4.7256e-05 gnorm: 1.20 [ 4:07:53<20:34:14] +[titan] 2025-10-05 02:42:12,862 - root - INFO - step: 6695 loss: 2.5175 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2207 +[titan] 2025-10-05 02:42:12,862 - root - INFO - lr: 4.7251e-05 gnorm: 1.18 [ 4:08:04<20:34:02] +[titan] 2025-10-05 02:42:21,626 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:42:23,816 - root - INFO - step: 6700 loss: 2.6088 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3010 +[titan] 2025-10-05 02:42:23,816 - root - INFO - lr: 4.7247e-05 gnorm: 1.21 [ 4:08:15<20:33:50] +[titan] 2025-10-05 02:42:34,693 - root - INFO - step: 6705 loss: 2.6071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.2991 +[titan] 2025-10-05 02:42:34,693 - root - INFO - lr: 4.7243e-05 gnorm: 1.19 [ 4:08:25<20:33:38] +[titan] 2025-10-05 02:42:45,561 - root - INFO - step: 6710 loss: 2.5118 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2161 +[titan] 2025-10-05 02:42:45,561 - root - INFO - lr: 4.7238e-05 gnorm: 1.24 [ 4:08:36<20:33:25] +[titan] 2025-10-05 02:42:56,442 - root - INFO - step: 6715 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.2997 +[titan] 2025-10-05 02:42:56,442 - root - INFO - lr: 4.7234e-05 gnorm: 1.21 [ 4:08:47<20:33:13] +[titan] 2025-10-05 02:43:07,287 - root - INFO - step: 6720 loss: 2.5570 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2554 +[titan] 2025-10-05 02:43:07,287 - root - INFO - lr: 4.7230e-05 gnorm: 1.22 [ 4:08:58<20:33:01] +[titan] 2025-10-05 02:43:18,136 - root - INFO - step: 6725 loss: 2.5707 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.3021 global_avg_mtp_loss: 2.2686 +[titan] 2025-10-05 02:43:18,136 - root - INFO - lr: 4.7226e-05 gnorm: 1.53 [ 4:09:09<20:32:48] +[titan] 2025-10-05 02:43:29,070 - root - INFO - step: 6730 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2464 +[titan] 2025-10-05 02:43:29,070 - root - INFO - lr: 4.7221e-05 gnorm: 1.26 [ 4:09:20<20:32:36] +[titan] 2025-10-05 02:43:39,913 - root - INFO - step: 6735 loss: 2.5430 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 02:43:39,913 - root - INFO - lr: 4.7217e-05 gnorm: 1.22 [ 4:09:31<20:32:24] +[titan] 2025-10-05 02:43:50,772 - root - INFO - step: 6740 loss: 2.5235 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2275 +[titan] 2025-10-05 02:43:50,772 - root - INFO - lr: 4.7213e-05 gnorm: 1.21 [ 4:09:41<20:32:11] +[titan] 2025-10-05 02:44:01,659 - root - INFO - step: 6745 loss: 2.6439 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3121 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:44:01,659 - root - INFO - lr: 4.7208e-05 gnorm: 1.19 [ 4:09:52<20:31:59] +[titan] 2025-10-05 02:44:10,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:44:12,526 - root - INFO - step: 6750 loss: 2.5875 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 02:44:12,526 - root - INFO - lr: 4.7204e-05 gnorm: 1.21 [ 4:10:03<20:31:47] +[titan] 2025-10-05 02:44:23,443 - root - INFO - step: 6755 loss: 2.4956 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2014 +[titan] 2025-10-05 02:44:23,443 - root - INFO - lr: 4.7200e-05 gnorm: 1.19 [ 4:10:14<20:31:35] +[titan] 2025-10-05 02:44:34,289 - root - INFO - step: 6760 loss: 2.5401 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2414 +[titan] 2025-10-05 02:44:34,289 - root - INFO - lr: 4.7196e-05 gnorm: 1.22 [ 4:10:25<20:31:22] +[titan] 2025-10-05 02:44:45,167 - root - INFO - step: 6765 loss: 2.5998 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2924 +[titan] 2025-10-05 02:44:45,167 - root - INFO - lr: 4.7191e-05 gnorm: 1.30 [ 4:10:36<20:31:10] +[titan] 2025-10-05 02:44:56,029 - root - INFO - step: 6770 loss: 2.5743 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2714 +[titan] 2025-10-05 02:44:56,029 - root - INFO - lr: 4.7187e-05 gnorm: 1.21 [ 4:10:47<20:30:58] +[titan] 2025-10-05 02:45:06,886 - root - INFO - step: 6775 loss: 2.5839 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3045 global_avg_mtp_loss: 2.2794 +[titan] 2025-10-05 02:45:06,886 - root - INFO - lr: 4.7183e-05 gnorm: 1.17 [ 4:10:58<20:30:45] +[titan] 2025-10-05 02:45:17,790 - root - INFO - step: 6780 loss: 2.5182 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2228 +[titan] 2025-10-05 02:45:17,790 - root - INFO - lr: 4.7178e-05 gnorm: 1.24 [ 4:11:08<20:30:33] +[titan] 2025-10-05 02:45:28,696 - root - INFO - step: 6785 loss: 2.5460 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2463 +[titan] 2025-10-05 02:45:28,696 - root - INFO - lr: 4.7174e-05 gnorm: 1.20 [ 4:11:19<20:30:21] +[titan] 2025-10-05 02:45:39,548 - root - INFO - step: 6790 loss: 2.5312 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2336 +[titan] 2025-10-05 02:45:39,548 - root - INFO - lr: 4.7170e-05 gnorm: 1.16 [ 4:11:30<20:30:09] +[titan] 2025-10-05 02:45:50,426 - root - INFO - step: 6795 loss: 2.5011 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:45:50,427 - root - INFO - lr: 4.7165e-05 gnorm: 1.18 [ 4:11:41<20:29:56] +[titan] 2025-10-05 02:45:59,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:46:01,303 - root - INFO - step: 6800 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2649 +[titan] 2025-10-05 02:46:01,303 - root - INFO - lr: 4.7161e-05 gnorm: 1.23 [ 4:11:52<20:29:44] +[titan] 2025-10-05 02:46:12,162 - root - INFO - step: 6805 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:46:12,162 - root - INFO - lr: 4.7157e-05 gnorm: 1.28 [ 4:12:03<20:29:32] +[titan] 2025-10-05 02:46:23,159 - root - INFO - step: 6810 loss: 2.5521 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.40 mfu: 41.80% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2496 +[titan] 2025-10-05 02:46:23,159 - root - INFO - lr: 4.7152e-05 gnorm: 1.22 [ 4:12:14<20:29:20] +[titan] 2025-10-05 02:46:34,046 - root - INFO - step: 6815 loss: 2.6067 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.3007 +[titan] 2025-10-05 02:46:34,046 - root - INFO - lr: 4.7148e-05 gnorm: 1.17 [ 4:12:25<20:29:08] +[titan] 2025-10-05 02:46:44,908 - root - INFO - step: 6820 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2807 +[titan] 2025-10-05 02:46:44,908 - root - INFO - lr: 4.7143e-05 gnorm: 1.17 [ 4:12:36<20:28:55] +[titan] 2025-10-05 02:46:55,788 - root - INFO - step: 6825 loss: 2.5910 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2852 +[titan] 2025-10-05 02:46:55,788 - root - INFO - lr: 4.7139e-05 gnorm: 1.16 [ 4:12:46<20:28:43] +[titan] 2025-10-05 02:47:06,620 - root - INFO - step: 6830 loss: 2.5384 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 02:47:06,620 - root - INFO - lr: 4.7135e-05 gnorm: 1.18 [ 4:12:57<20:28:31] +[titan] 2025-10-05 02:47:17,469 - root - INFO - step: 6835 loss: 2.5733 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 02:47:17,469 - root - INFO - lr: 4.7130e-05 gnorm: 1.23 [ 4:13:08<20:28:18] +[titan] 2025-10-05 02:47:28,380 - root - INFO - step: 6840 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2835 +[titan] 2025-10-05 02:47:28,380 - root - INFO - lr: 4.7126e-05 gnorm: 1.26 [ 4:13:19<20:28:06] +[titan] 2025-10-05 02:47:39,283 - root - INFO - step: 6845 loss: 2.5574 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2546 +[titan] 2025-10-05 02:47:39,283 - root - INFO - lr: 4.7122e-05 gnorm: 1.19 [ 4:13:30<20:27:54] +[titan] 2025-10-05 02:47:47,974 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:47:50,155 - root - INFO - step: 6850 loss: 2.5366 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2372 +[titan] 2025-10-05 02:47:50,155 - root - INFO - lr: 4.7117e-05 gnorm: 1.18 [ 4:13:41<20:27:42] +[titan] 2025-10-05 02:48:01,026 - root - INFO - step: 6855 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2793 +[titan] 2025-10-05 02:48:01,026 - root - INFO - lr: 4.7113e-05 gnorm: 1.17 [ 4:13:52<20:27:30] +[titan] 2025-10-05 02:48:11,906 - root - INFO - step: 6860 loss: 2.5452 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2453 +[titan] 2025-10-05 02:48:11,906 - root - INFO - lr: 4.7109e-05 gnorm: 1.21 [ 4:14:03<20:27:17] +[titan] 2025-10-05 02:48:22,764 - root - INFO - step: 6865 loss: 2.5903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2839 +[titan] 2025-10-05 02:48:22,764 - root - INFO - lr: 4.7104e-05 gnorm: 1.22 [ 4:14:13<20:27:05] +[titan] 2025-10-05 02:48:33,663 - root - INFO - step: 6870 loss: 2.5282 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2305 +[titan] 2025-10-05 02:48:33,663 - root - INFO - lr: 4.7100e-05 gnorm: 1.21 [ 4:14:24<20:26:53] +[titan] 2025-10-05 02:48:44,571 - root - INFO - step: 6875 loss: 2.5842 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2787 +[titan] 2025-10-05 02:48:44,571 - root - INFO - lr: 4.7095e-05 gnorm: 1.20 [ 4:14:35<20:26:41] +[titan] 2025-10-05 02:48:55,419 - root - INFO - step: 6880 loss: 2.5406 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2421 +[titan] 2025-10-05 02:48:55,419 - root - INFO - lr: 4.7091e-05 gnorm: 1.23 [ 4:14:46<20:26:28] +[titan] 2025-10-05 02:49:06,283 - root - INFO - step: 6885 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2631 +[titan] 2025-10-05 02:49:06,283 - root - INFO - lr: 4.7087e-05 gnorm: 1.33 [ 4:14:57<20:26:16] +[titan] 2025-10-05 02:49:17,141 - root - INFO - step: 6890 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.3033 global_avg_mtp_loss: 2.2679 +[titan] 2025-10-05 02:49:17,141 - root - INFO - lr: 4.7082e-05 gnorm: 1.20 [ 4:15:08<20:26:04] +[titan] 2025-10-05 02:49:28,078 - root - INFO - step: 6895 loss: 2.5483 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2478 +[titan] 2025-10-05 02:49:28,078 - root - INFO - lr: 4.7078e-05 gnorm: 1.18 [ 4:15:19<20:25:52] +[titan] 2025-10-05 02:49:36,751 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:49:38,935 - root - INFO - step: 6900 loss: 2.5983 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:49:38,935 - root - INFO - lr: 4.7073e-05 gnorm: 1.24 [ 4:15:30<20:25:40] +[titan] 2025-10-05 02:49:49,829 - root - INFO - step: 6905 loss: 2.5554 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2534 +[titan] 2025-10-05 02:49:49,830 - root - INFO - lr: 4.7069e-05 gnorm: 1.19 [ 4:15:41<20:25:27] +[titan] 2025-10-05 02:50:00,703 - root - INFO - step: 6910 loss: 2.6056 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.2975 +[titan] 2025-10-05 02:50:00,703 - root - INFO - lr: 4.7065e-05 gnorm: 1.20 [ 4:15:51<20:25:15] +[titan] 2025-10-05 02:50:11,565 - root - INFO - step: 6915 loss: 2.5960 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2897 +[titan] 2025-10-05 02:50:11,566 - root - INFO - lr: 4.7060e-05 gnorm: 1.24 [ 4:16:02<20:25:03] +[titan] 2025-10-05 02:50:22,427 - root - INFO - step: 6920 loss: 2.5924 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2856 +[titan] 2025-10-05 02:50:22,427 - root - INFO - lr: 4.7056e-05 gnorm: 1.19 [ 4:16:13<20:24:51] +[titan] 2025-10-05 02:50:33,321 - root - INFO - step: 6925 loss: 2.4869 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1938 +[titan] 2025-10-05 02:50:33,321 - root - INFO - lr: 4.7051e-05 gnorm: 1.18 [ 4:16:24<20:24:39] +[titan] 2025-10-05 02:50:44,192 - root - INFO - step: 6930 loss: 2.5543 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 02:50:44,193 - root - INFO - lr: 4.7047e-05 gnorm: 1.24 [ 4:16:35<20:24:26] +[titan] 2025-10-05 02:50:55,042 - root - INFO - step: 6935 loss: 2.5426 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2428 +[titan] 2025-10-05 02:50:55,042 - root - INFO - lr: 4.7043e-05 gnorm: 1.21 [ 4:16:46<20:24:14] +[titan] 2025-10-05 02:51:05,935 - root - INFO - step: 6940 loss: 2.6667 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3517 +[titan] 2025-10-05 02:51:05,935 - root - INFO - lr: 4.7038e-05 gnorm: 1.24 [ 4:16:57<20:24:02] +[titan] 2025-10-05 02:51:16,790 - root - INFO - step: 6945 loss: 2.6473 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3345 +[titan] 2025-10-05 02:51:16,790 - root - INFO - lr: 4.7034e-05 gnorm: 1.27 [ 4:17:07<20:23:50] +[titan] 2025-10-05 02:51:25,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:51:27,720 - root - INFO - step: 6950 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2047 +[titan] 2025-10-05 02:51:27,721 - root - INFO - lr: 4.7029e-05 gnorm: 1.25 [ 4:17:18<20:23:38] +[titan] 2025-10-05 02:51:38,573 - root - INFO - step: 6955 loss: 2.6408 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:51:38,573 - root - INFO - lr: 4.7025e-05 gnorm: 1.22 [ 4:17:29<20:23:25] +[titan] 2025-10-05 02:51:49,457 - root - INFO - step: 6960 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3017 global_avg_mtp_loss: 2.2580 +[titan] 2025-10-05 02:51:49,458 - root - INFO - lr: 4.7020e-05 gnorm: 1.20 [ 4:17:40<20:23:13] +[titan] 2025-10-05 02:52:00,296 - root - INFO - step: 6965 loss: 2.6601 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:52:00,296 - root - INFO - lr: 4.7016e-05 gnorm: 1.30 [ 4:17:51<20:23:01] +[titan] 2025-10-05 02:52:11,200 - root - INFO - step: 6970 loss: 2.5501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2484 +[titan] 2025-10-05 02:52:11,200 - root - INFO - lr: 4.7012e-05 gnorm: 1.25 [ 4:18:02<20:22:49] +[titan] 2025-10-05 02:52:22,059 - root - INFO - step: 6975 loss: 2.5650 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2627 +[titan] 2025-10-05 02:52:22,059 - root - INFO - lr: 4.7007e-05 gnorm: 1.18 [ 4:18:13<20:22:36] +[titan] 2025-10-05 02:52:32,953 - root - INFO - step: 6980 loss: 2.6856 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.3581 +[titan] 2025-10-05 02:52:32,953 - root - INFO - lr: 4.7003e-05 gnorm: 1.24 [ 4:18:24<20:22:24] +[titan] 2025-10-05 02:52:43,790 - root - INFO - step: 6985 loss: 2.5169 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2194 +[titan] 2025-10-05 02:52:43,790 - root - INFO - lr: 4.6998e-05 gnorm: 1.33 [ 4:18:34<20:22:12] +[titan] 2025-10-05 02:52:54,642 - root - INFO - step: 6990 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2300 +[titan] 2025-10-05 02:52:54,642 - root - INFO - lr: 4.6994e-05 gnorm: 1.18 [ 4:18:45<20:22:00] +[titan] 2025-10-05 02:53:05,477 - root - INFO - step: 6995 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3774 +[titan] 2025-10-05 02:53:05,477 - root - INFO - lr: 4.6989e-05 gnorm: 1.28 [ 4:18:56<20:21:47] +[titan] 2025-10-05 02:53:14,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:53:16,323 - root - INFO - step: 7000 loss: 2.6331 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.3240 +[titan] 2025-10-05 02:53:16,323 - root - INFO - lr: 4.6985e-05 gnorm: 1.28 [ 4:19:07<20:21:35] +[titan] 2025-10-05 02:53:27,204 - root - INFO - step: 7005 loss: 2.5777 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2742 +[titan] 2025-10-05 02:53:27,204 - root - INFO - lr: 4.6980e-05 gnorm: 1.20 [ 4:19:18<20:21:23] +[titan] 2025-10-05 02:53:38,086 - root - INFO - step: 7010 loss: 2.5633 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:53:38,086 - root - INFO - lr: 4.6976e-05 gnorm: 1.21 [ 4:19:29<20:21:11] +[titan] 2025-10-05 02:53:48,973 - root - INFO - step: 7015 loss: 2.5508 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2507 +[titan] 2025-10-05 02:53:48,973 - root - INFO - lr: 4.6971e-05 gnorm: 1.17 [ 4:19:40<20:20:58] +[titan] 2025-10-05 02:53:59,845 - root - INFO - step: 7020 loss: 2.6141 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3057 +[titan] 2025-10-05 02:53:59,845 - root - INFO - lr: 4.6967e-05 gnorm: 1.23 [ 4:19:51<20:20:46] +[titan] 2025-10-05 02:54:10,698 - root - INFO - step: 7025 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2184 +[titan] 2025-10-05 02:54:10,698 - root - INFO - lr: 4.6962e-05 gnorm: 1.18 [ 4:20:01<20:20:34] +[titan] 2025-10-05 02:54:21,549 - root - INFO - step: 7030 loss: 2.5250 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2274 +[titan] 2025-10-05 02:54:21,550 - root - INFO - lr: 4.6958e-05 gnorm: 1.18 [ 4:20:12<20:20:22] +[titan] 2025-10-05 02:54:32,510 - root - INFO - step: 7035 loss: 2.4583 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2895 global_avg_mtp_loss: 2.1687 +[titan] 2025-10-05 02:54:32,510 - root - INFO - lr: 4.6954e-05 gnorm: 1.13 [ 4:20:23<20:20:10] +[titan] 2025-10-05 02:54:43,391 - root - INFO - step: 7040 loss: 2.5911 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2860 +[titan] 2025-10-05 02:54:43,391 - root - INFO - lr: 4.6949e-05 gnorm: 1.24 [ 4:20:34<20:19:58] +[titan] 2025-10-05 02:54:54,247 - root - INFO - step: 7045 loss: 2.5161 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2197 +[titan] 2025-10-05 02:54:54,247 - root - INFO - lr: 4.6945e-05 gnorm: 1.19 [ 4:20:45<20:19:45] +[titan] 2025-10-05 02:55:02,933 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:55:05,120 - root - INFO - step: 7050 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2600 +[titan] 2025-10-05 02:55:05,120 - root - INFO - lr: 4.6940e-05 gnorm: 1.19 [ 4:20:56<20:19:33] +[titan] 2025-10-05 02:55:15,988 - root - INFO - step: 7055 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:55:15,988 - root - INFO - lr: 4.6936e-05 gnorm: 1.18 [ 4:21:07<20:19:21] +[titan] 2025-10-05 02:55:26,853 - root - INFO - step: 7060 loss: 2.6283 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3103 global_avg_mtp_loss: 2.3181 +[titan] 2025-10-05 02:55:26,853 - root - INFO - lr: 4.6931e-05 gnorm: 1.21 [ 4:21:18<20:19:09] +[titan] 2025-10-05 02:55:37,782 - root - INFO - step: 7065 loss: 2.5429 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2433 +[titan] 2025-10-05 02:55:37,782 - root - INFO - lr: 4.6927e-05 gnorm: 1.17 [ 4:21:28<20:18:57] +[titan] 2025-10-05 02:55:48,649 - root - INFO - step: 7070 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2882 +[titan] 2025-10-05 02:55:48,649 - root - INFO - lr: 4.6922e-05 gnorm: 1.22 [ 4:21:39<20:18:45] +[titan] 2025-10-05 02:55:59,510 - root - INFO - step: 7075 loss: 2.5409 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 02:55:59,510 - root - INFO - lr: 4.6918e-05 gnorm: 1.20 [ 4:21:50<20:18:32] +[titan] 2025-10-05 02:56:10,352 - root - INFO - step: 7080 loss: 2.5976 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:56:10,353 - root - INFO - lr: 4.6913e-05 gnorm: 1.19 [ 4:22:01<20:18:20] +[titan] 2025-10-05 02:56:21,217 - root - INFO - step: 7085 loss: 2.5675 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2647 +[titan] 2025-10-05 02:56:21,217 - root - INFO - lr: 4.6909e-05 gnorm: 1.26 [ 4:22:12<20:18:08] +[titan] 2025-10-05 02:56:32,130 - root - INFO - step: 7090 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 02:56:32,130 - root - INFO - lr: 4.6904e-05 gnorm: 1.19 [ 4:22:23<20:17:56] +[titan] 2025-10-05 02:56:43,006 - root - INFO - step: 7095 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 02:56:43,006 - root - INFO - lr: 4.6899e-05 gnorm: 1.16 [ 4:22:34<20:17:44] +[titan] 2025-10-05 02:56:51,730 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:56:53,930 - root - INFO - step: 7100 loss: 2.6150 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3102 global_avg_mtp_loss: 2.3048 +[titan] 2025-10-05 02:56:53,930 - root - INFO - lr: 4.6895e-05 gnorm: 1.24 [ 4:22:45<20:17:32] +[titan] 2025-10-05 02:57:04,810 - root - INFO - step: 7105 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2524 +[titan] 2025-10-05 02:57:04,810 - root - INFO - lr: 4.6890e-05 gnorm: 1.23 [ 4:22:55<20:17:20] +[titan] 2025-10-05 02:57:15,679 - root - INFO - step: 7110 loss: 2.6249 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3152 +[titan] 2025-10-05 02:57:15,679 - root - INFO - lr: 4.6886e-05 gnorm: 1.23 [ 4:23:06<20:17:08] +[titan] 2025-10-05 02:57:26,560 - root - INFO - step: 7115 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 02:57:26,560 - root - INFO - lr: 4.6881e-05 gnorm: 1.21 [ 4:23:17<20:16:55] +[titan] 2025-10-05 02:57:37,504 - root - INFO - step: 7120 loss: 2.5642 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 02:57:37,504 - root - INFO - lr: 4.6877e-05 gnorm: 1.25 [ 4:23:28<20:16:44] +[titan] 2025-10-05 02:57:48,407 - root - INFO - step: 7125 loss: 2.5252 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2269 +[titan] 2025-10-05 02:57:48,407 - root - INFO - lr: 4.6872e-05 gnorm: 1.23 [ 4:23:39<20:16:32] +[titan] 2025-10-05 02:57:59,324 - root - INFO - step: 7130 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2910 +[titan] 2025-10-05 02:57:59,324 - root - INFO - lr: 4.6868e-05 gnorm: 1.23 [ 4:23:50<20:16:20] +[titan] 2025-10-05 02:58:10,198 - root - INFO - step: 7135 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3080 +[titan] 2025-10-05 02:58:10,198 - root - INFO - lr: 4.6863e-05 gnorm: 1.30 [ 4:24:01<20:16:07] +[titan] 2025-10-05 02:58:21,069 - root - INFO - step: 7140 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:58:21,069 - root - INFO - lr: 4.6859e-05 gnorm: 1.21 [ 4:24:12<20:15:55] +[titan] 2025-10-05 02:58:31,936 - root - INFO - step: 7145 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2622 +[titan] 2025-10-05 02:58:31,936 - root - INFO - lr: 4.6854e-05 gnorm: 1.20 [ 4:24:23<20:15:43] +[titan] 2025-10-05 02:58:40,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:58:42,870 - root - INFO - step: 7150 loss: 2.5513 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2508 +[titan] 2025-10-05 02:58:42,871 - root - INFO - lr: 4.6850e-05 gnorm: 1.21 [ 4:24:34<20:15:31] +[titan] 2025-10-05 02:58:53,743 - root - INFO - step: 7155 loss: 2.5589 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2563 +[titan] 2025-10-05 02:58:53,743 - root - INFO - lr: 4.6845e-05 gnorm: 1.16 [ 4:24:44<20:15:19] +[titan] 2025-10-05 02:59:04,618 - root - INFO - step: 7160 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2723 +[titan] 2025-10-05 02:59:04,618 - root - INFO - lr: 4.6840e-05 gnorm: 1.21 [ 4:24:55<20:15:07] +[titan] 2025-10-05 02:59:15,628 - root - INFO - step: 7165 loss: 2.5541 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.91 mfu: 41.75% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2529 +[titan] 2025-10-05 02:59:15,629 - root - INFO - lr: 4.6836e-05 gnorm: 1.17 [ 4:25:06<20:14:55] +[titan] 2025-10-05 02:59:22,339 - root - INFO - Dumping profiler traces at step 7168 +[titan] 2025-10-05 02:59:22,377 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:59:26,736 - root - INFO - step: 7170 loss: 2.6199 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.29 mfu: 41.38% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3097 +[titan] 2025-10-05 02:59:26,736 - root - INFO - lr: 4.6831e-05 gnorm: 1.20 [ 4:25:17<20:14:44] +[titan] 2025-10-05 02:59:37,672 - root - INFO - step: 7175 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1801 +[titan] 2025-10-05 02:59:37,672 - root - INFO - lr: 4.6827e-05 gnorm: 1.15 [ 4:25:28<20:14:32] +[titan] 2025-10-05 02:59:48,576 - root - INFO - step: 7180 loss: 2.6188 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3101 +[titan] 2025-10-05 02:59:48,576 - root - INFO - lr: 4.6822e-05 gnorm: 1.22 [ 4:25:39<20:14:20] +[titan] 2025-10-05 02:59:59,449 - root - INFO - step: 7185 loss: 2.5330 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2333 +[titan] 2025-10-05 02:59:59,450 - root - INFO - lr: 4.6818e-05 gnorm: 1.21 [ 4:25:50<20:14:08] +[titan] 2025-10-05 03:00:10,322 - root - INFO - step: 7190 loss: 2.6028 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2970 +[titan] 2025-10-05 03:00:10,322 - root - INFO - lr: 4.6813e-05 gnorm: 1.20 [ 4:26:01<20:13:56] +[titan] 2025-10-05 03:00:21,233 - root - INFO - step: 7195 loss: 2.6073 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2996 +[titan] 2025-10-05 03:00:21,233 - root - INFO - lr: 4.6808e-05 gnorm: 1.23 [ 4:26:12<20:13:44] +[titan] 2025-10-05 03:00:29,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:00:32,108 - root - INFO - step: 7200 loss: 2.5130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:00:32,108 - root - INFO - lr: 4.6804e-05 gnorm: 1.32 [ 4:26:23<20:13:32] +[titan] 2025-10-05 03:00:43,038 - root - INFO - step: 7205 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2785 +[titan] 2025-10-05 03:00:43,038 - root - INFO - lr: 4.6799e-05 gnorm: 1.29 [ 4:26:34<20:13:20] +[titan] 2025-10-05 03:00:53,933 - root - INFO - step: 7210 loss: 2.5257 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:00:53,933 - root - INFO - lr: 4.6795e-05 gnorm: 1.20 [ 4:26:45<20:13:08] +[titan] 2025-10-05 03:01:04,827 - root - INFO - step: 7215 loss: 2.5854 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3039 global_avg_mtp_loss: 2.2815 +[titan] 2025-10-05 03:01:04,827 - root - INFO - lr: 4.6790e-05 gnorm: 1.21 [ 4:26:55<20:12:56] +[titan] 2025-10-05 03:01:15,711 - root - INFO - step: 7220 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2982 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:01:15,712 - root - INFO - lr: 4.6786e-05 gnorm: 1.19 [ 4:27:06<20:12:44] +[titan] 2025-10-05 03:01:26,615 - root - INFO - step: 7225 loss: 2.4967 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2040 +[titan] 2025-10-05 03:01:26,615 - root - INFO - lr: 4.6781e-05 gnorm: 1.24 [ 4:27:17<20:12:32] +[titan] 2025-10-05 03:01:37,539 - root - INFO - step: 7230 loss: 2.6118 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.3044 +[titan] 2025-10-05 03:01:37,539 - root - INFO - lr: 4.6776e-05 gnorm: 1.20 [ 4:27:28<20:12:20] +[titan] 2025-10-05 03:01:48,431 - root - INFO - step: 7235 loss: 2.5240 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2979 global_avg_mtp_loss: 2.2261 +[titan] 2025-10-05 03:01:48,431 - root - INFO - lr: 4.6772e-05 gnorm: 1.18 [ 4:27:39<20:12:08] +[titan] 2025-10-05 03:01:59,313 - root - INFO - step: 7240 loss: 2.5262 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2284 +[titan] 2025-10-05 03:01:59,313 - root - INFO - lr: 4.6767e-05 gnorm: 1.17 [ 4:27:50<20:11:56] +[titan] 2025-10-05 03:02:10,185 - root - INFO - step: 7245 loss: 2.5139 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2193 +[titan] 2025-10-05 03:02:10,185 - root - INFO - lr: 4.6762e-05 gnorm: 1.26 [ 4:28:01<20:11:44] +[titan] 2025-10-05 03:02:18,881 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:02:21,067 - root - INFO - step: 7250 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:02:21,067 - root - INFO - lr: 4.6758e-05 gnorm: 1.26 [ 4:28:12<20:11:32] +[titan] 2025-10-05 03:02:31,931 - root - INFO - step: 7255 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2327 +[titan] 2025-10-05 03:02:31,931 - root - INFO - lr: 4.6753e-05 gnorm: 1.22 [ 4:28:23<20:11:20] +[titan] 2025-10-05 03:02:42,869 - root - INFO - step: 7260 loss: 2.5329 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2991 global_avg_mtp_loss: 2.2339 +[titan] 2025-10-05 03:02:42,869 - root - INFO - lr: 4.6749e-05 gnorm: 1.22 [ 4:28:34<20:11:08] +[titan] 2025-10-05 03:02:53,734 - root - INFO - step: 7265 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.2033 +[titan] 2025-10-05 03:02:53,734 - root - INFO - lr: 4.6744e-05 gnorm: 1.30 [ 4:28:44<20:10:56] +[titan] 2025-10-05 03:03:04,623 - root - INFO - step: 7270 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3079 global_avg_mtp_loss: 2.3083 +[titan] 2025-10-05 03:03:04,623 - root - INFO - lr: 4.6739e-05 gnorm: 1.34 [ 4:28:55<20:10:44] +[titan] 2025-10-05 03:03:15,505 - root - INFO - step: 7275 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3072 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 03:03:15,505 - root - INFO - lr: 4.6735e-05 gnorm: 1.22 [ 4:29:06<20:10:32] +[titan] 2025-10-05 03:03:26,372 - root - INFO - step: 7280 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2876 +[titan] 2025-10-05 03:03:26,372 - root - INFO - lr: 4.6730e-05 gnorm: 1.19 [ 4:29:17<20:10:20] +[titan] 2025-10-05 03:03:37,274 - root - INFO - step: 7285 loss: 2.6024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2948 +[titan] 2025-10-05 03:03:37,274 - root - INFO - lr: 4.6725e-05 gnorm: 1.27 [ 4:29:28<20:10:08] +[titan] 2025-10-05 03:03:48,171 - root - INFO - step: 7290 loss: 2.5142 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2183 +[titan] 2025-10-05 03:03:48,171 - root - INFO - lr: 4.6721e-05 gnorm: 1.18 [ 4:29:39<20:09:56] +[titan] 2025-10-05 03:03:59,037 - root - INFO - step: 7295 loss: 2.5672 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3019 global_avg_mtp_loss: 2.2653 +[titan] 2025-10-05 03:03:59,037 - root - INFO - lr: 4.6716e-05 gnorm: 1.21 [ 4:29:50<20:09:43] +[titan] 2025-10-05 03:04:07,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:04:09,893 - root - INFO - step: 7300 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2362 +[titan] 2025-10-05 03:04:09,893 - root - INFO - lr: 4.6712e-05 gnorm: 1.19 [ 4:30:01<20:09:31] +[titan] 2025-10-05 03:04:20,769 - root - INFO - step: 7305 loss: 2.5190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:04:20,770 - root - INFO - lr: 4.6707e-05 gnorm: 1.20 [ 4:30:11<20:09:19] +[titan] 2025-10-05 03:04:31,636 - root - INFO - step: 7310 loss: 2.5542 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2522 +[titan] 2025-10-05 03:04:31,637 - root - INFO - lr: 4.6702e-05 gnorm: 1.16 [ 4:30:22<20:09:07] +[titan] 2025-10-05 03:04:42,538 - root - INFO - step: 7315 loss: 2.5823 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3047 global_avg_mtp_loss: 2.2776 +[titan] 2025-10-05 03:04:42,538 - root - INFO - lr: 4.6698e-05 gnorm: 1.19 [ 4:30:33<20:08:55] +[titan] 2025-10-05 03:04:53,396 - root - INFO - step: 7320 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.2988 +[titan] 2025-10-05 03:04:53,396 - root - INFO - lr: 4.6693e-05 gnorm: 1.20 [ 4:30:44<20:08:43] +[titan] 2025-10-05 03:05:04,291 - root - INFO - step: 7325 loss: 2.6131 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 03:05:04,291 - root - INFO - lr: 4.6688e-05 gnorm: 1.20 [ 4:30:55<20:08:31] +[titan] 2025-10-05 03:05:15,170 - root - INFO - step: 7330 loss: 2.5664 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2639 +[titan] 2025-10-05 03:05:15,170 - root - INFO - lr: 4.6684e-05 gnorm: 1.19 [ 4:31:06<20:08:19] +[titan] 2025-10-05 03:05:26,057 - root - INFO - step: 7335 loss: 2.5718 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:05:26,057 - root - INFO - lr: 4.6679e-05 gnorm: 1.19 [ 4:31:17<20:08:07] +[titan] 2025-10-05 03:05:36,944 - root - INFO - step: 7340 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:05:36,945 - root - INFO - lr: 4.6674e-05 gnorm: 1.21 [ 4:31:28<20:07:55] +[titan] 2025-10-05 03:05:47,861 - root - INFO - step: 7345 loss: 2.4951 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2008 +[titan] 2025-10-05 03:05:47,861 - root - INFO - lr: 4.6670e-05 gnorm: 1.18 [ 4:31:38<20:07:43] +[titan] 2025-10-05 03:05:56,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:05:58,742 - root - INFO - step: 7350 loss: 2.6375 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3271 +[titan] 2025-10-05 03:05:58,742 - root - INFO - lr: 4.6665e-05 gnorm: 1.20 [ 4:31:49<20:07:31] +[titan] 2025-10-05 03:06:09,631 - root - INFO - step: 7355 loss: 2.5204 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2241 +[titan] 2025-10-05 03:06:09,631 - root - INFO - lr: 4.6660e-05 gnorm: 1.13 [ 4:32:00<20:07:19] +[titan] 2025-10-05 03:06:20,514 - root - INFO - step: 7360 loss: 2.5761 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2736 +[titan] 2025-10-05 03:06:20,514 - root - INFO - lr: 4.6656e-05 gnorm: 1.20 [ 4:32:11<20:07:07] +[titan] 2025-10-05 03:06:31,396 - root - INFO - step: 7365 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2678 +[titan] 2025-10-05 03:06:31,397 - root - INFO - lr: 4.6651e-05 gnorm: 1.18 [ 4:32:22<20:06:55] +[titan] 2025-10-05 03:06:42,281 - root - INFO - step: 7370 loss: 2.5449 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2457 +[titan] 2025-10-05 03:06:42,282 - root - INFO - lr: 4.6646e-05 gnorm: 1.20 [ 4:32:33<20:06:43] +[titan] 2025-10-05 03:06:53,156 - root - INFO - step: 7375 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2076 +[titan] 2025-10-05 03:06:53,156 - root - INFO - lr: 4.6642e-05 gnorm: 1.20 [ 4:32:44<20:06:31] +[titan] 2025-10-05 03:07:04,009 - root - INFO - step: 7380 loss: 2.4884 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:07:04,009 - root - INFO - lr: 4.6637e-05 gnorm: 1.18 [ 4:32:55<20:06:19] +[titan] 2025-10-05 03:07:14,887 - root - INFO - step: 7385 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2624 +[titan] 2025-10-05 03:07:14,887 - root - INFO - lr: 4.6632e-05 gnorm: 1.29 [ 4:33:06<20:06:06] +[titan] 2025-10-05 03:07:25,781 - root - INFO - step: 7390 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:07:25,781 - root - INFO - lr: 4.6627e-05 gnorm: 1.19 [ 4:33:16<20:05:54] +[titan] 2025-10-05 03:07:36,668 - root - INFO - step: 7395 loss: 2.5215 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2253 +[titan] 2025-10-05 03:07:36,668 - root - INFO - lr: 4.6623e-05 gnorm: 1.18 [ 4:33:27<20:05:42] +[titan] 2025-10-05 03:07:45,410 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:07:47,595 - root - INFO - step: 7400 loss: 2.5552 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 03:07:47,595 - root - INFO - lr: 4.6618e-05 gnorm: 1.25 [ 4:33:38<20:05:31] +[titan] 2025-10-05 03:07:58,479 - root - INFO - step: 7405 loss: 2.5722 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2664 +[titan] 2025-10-05 03:07:58,479 - root - INFO - lr: 4.6613e-05 gnorm: 1.23 [ 4:33:49<20:05:19] +[titan] 2025-10-05 03:08:09,352 - root - INFO - step: 7410 loss: 2.6173 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 03:08:09,352 - root - INFO - lr: 4.6609e-05 gnorm: 1.26 [ 4:34:00<20:05:07] +[titan] 2025-10-05 03:08:20,245 - root - INFO - step: 7415 loss: 2.6371 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3115 global_avg_mtp_loss: 2.3256 +[titan] 2025-10-05 03:08:20,245 - root - INFO - lr: 4.6604e-05 gnorm: 1.18 [ 4:34:11<20:04:55] +[titan] 2025-10-05 03:08:31,148 - root - INFO - step: 7420 loss: 2.5121 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:08:31,148 - root - INFO - lr: 4.6599e-05 gnorm: 1.18 [ 4:34:22<20:04:43] +[titan] 2025-10-05 03:08:42,047 - root - INFO - step: 7425 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2028 +[titan] 2025-10-05 03:08:42,047 - root - INFO - lr: 4.6594e-05 gnorm: 1.17 [ 4:34:33<20:04:31] +[titan] 2025-10-05 03:08:52,923 - root - INFO - step: 7430 loss: 2.5993 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2935 +[titan] 2025-10-05 03:08:52,923 - root - INFO - lr: 4.6590e-05 gnorm: 1.19 [ 4:34:44<20:04:19] +[titan] 2025-10-05 03:09:03,806 - root - INFO - step: 7435 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2641 +[titan] 2025-10-05 03:09:03,806 - root - INFO - lr: 4.6585e-05 gnorm: 1.23 [ 4:34:54<20:04:07] +[titan] 2025-10-05 03:09:14,682 - root - INFO - step: 7440 loss: 2.4458 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:09:14,682 - root - INFO - lr: 4.6580e-05 gnorm: 1.21 [ 4:35:05<20:03:55] +[titan] 2025-10-05 03:09:25,563 - root - INFO - step: 7445 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2988 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:09:25,563 - root - INFO - lr: 4.6576e-05 gnorm: 1.20 [ 4:35:16<20:03:43] +[titan] 2025-10-05 03:09:34,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:09:36,483 - root - INFO - step: 7450 loss: 2.4992 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2041 +[titan] 2025-10-05 03:09:36,483 - root - INFO - lr: 4.6571e-05 gnorm: 1.13 [ 4:35:27<20:03:31] +[titan] 2025-10-05 03:09:47,415 - root - INFO - step: 7455 loss: 2.5685 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:09:47,415 - root - INFO - lr: 4.6566e-05 gnorm: 1.21 [ 4:35:38<20:03:19] +[titan] 2025-10-05 03:09:58,322 - root - INFO - step: 7460 loss: 2.5530 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2521 +[titan] 2025-10-05 03:09:58,322 - root - INFO - lr: 4.6561e-05 gnorm: 1.19 [ 4:35:49<20:03:07] +[titan] 2025-10-05 03:10:09,217 - root - INFO - step: 7465 loss: 2.5984 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2930 +[titan] 2025-10-05 03:10:09,217 - root - INFO - lr: 4.6557e-05 gnorm: 1.33 [ 4:36:00<20:02:55] +[titan] 2025-10-05 03:10:20,126 - root - INFO - step: 7470 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:10:20,127 - root - INFO - lr: 4.6552e-05 gnorm: 1.25 [ 4:36:11<20:02:43] +[titan] 2025-10-05 03:10:31,009 - root - INFO - step: 7475 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3233 +[titan] 2025-10-05 03:10:31,009 - root - INFO - lr: 4.6547e-05 gnorm: 1.21 [ 4:36:22<20:02:31] +[titan] 2025-10-05 03:10:41,908 - root - INFO - step: 7480 loss: 2.6221 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3088 global_avg_mtp_loss: 2.3133 +[titan] 2025-10-05 03:10:41,908 - root - INFO - lr: 4.6542e-05 gnorm: 1.24 [ 4:36:33<20:02:19] +[titan] 2025-10-05 03:10:52,859 - root - INFO - step: 7485 loss: 2.6267 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3150 +[titan] 2025-10-05 03:10:52,859 - root - INFO - lr: 4.6538e-05 gnorm: 1.23 [ 4:36:43<20:02:08] +[titan] 2025-10-05 03:11:03,748 - root - INFO - step: 7490 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:11:03,748 - root - INFO - lr: 4.6533e-05 gnorm: 1.16 [ 4:36:54<20:01:56] +[titan] 2025-10-05 03:11:14,653 - root - INFO - step: 7495 loss: 2.5041 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2100 +[titan] 2025-10-05 03:11:14,654 - root - INFO - lr: 4.6528e-05 gnorm: 1.17 [ 4:37:05<20:01:44] +[titan] 2025-10-05 03:11:23,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:11:25,557 - root - INFO - step: 7500 loss: 2.5279 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 03:11:25,558 - root - INFO - lr: 4.6523e-05 gnorm: 1.17 [ 4:37:16<20:01:32] +[titan] 2025-10-05 03:11:36,447 - root - INFO - step: 7505 loss: 2.5670 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:11:36,447 - root - INFO - lr: 4.6519e-05 gnorm: 1.26 [ 4:37:27<20:01:20] +[titan] 2025-10-05 03:11:47,366 - root - INFO - step: 7510 loss: 2.5107 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 03:11:47,366 - root - INFO - lr: 4.6514e-05 gnorm: 1.18 [ 4:37:38<20:01:08] +[titan] 2025-10-05 03:11:58,284 - root - INFO - step: 7515 loss: 2.6471 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3343 +[titan] 2025-10-05 03:11:58,284 - root - INFO - lr: 4.6509e-05 gnorm: 1.26 [ 4:37:49<20:00:56] +[titan] 2025-10-05 03:12:09,176 - root - INFO - step: 7520 loss: 2.5022 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:12:09,177 - root - INFO - lr: 4.6504e-05 gnorm: 1.24 [ 4:38:00<20:00:44] +[titan] 2025-10-05 03:12:20,065 - root - INFO - step: 7525 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2289 +[titan] 2025-10-05 03:12:20,065 - root - INFO - lr: 4.6499e-05 gnorm: 1.20 [ 4:38:11<20:00:32] +[titan] 2025-10-05 03:12:30,937 - root - INFO - step: 7530 loss: 2.5858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2803 +[titan] 2025-10-05 03:12:30,937 - root - INFO - lr: 4.6495e-05 gnorm: 1.25 [ 4:38:22<20:00:20] +[titan] 2025-10-05 03:12:41,813 - root - INFO - step: 7535 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:12:41,814 - root - INFO - lr: 4.6490e-05 gnorm: 1.20 [ 4:38:32<20:00:08] +[titan] 2025-10-05 03:12:52,684 - root - INFO - step: 7540 loss: 2.5356 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:12:52,684 - root - INFO - lr: 4.6485e-05 gnorm: 1.23 [ 4:38:43<19:59:56] +[titan] 2025-10-05 03:13:03,580 - root - INFO - step: 7545 loss: 2.5425 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2420 +[titan] 2025-10-05 03:13:03,580 - root - INFO - lr: 4.6480e-05 gnorm: 1.22 [ 4:38:54<19:59:44] +[titan] 2025-10-05 03:13:12,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:13:14,425 - root - INFO - step: 7550 loss: 2.5098 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:14,425 - root - INFO - lr: 4.6476e-05 gnorm: 1.21 [ 4:39:05<19:59:32] +[titan] 2025-10-05 03:13:25,285 - root - INFO - step: 7555 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2953 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:25,285 - root - INFO - lr: 4.6471e-05 gnorm: 1.32 [ 4:39:16<19:59:20] +[titan] 2025-10-05 03:13:36,128 - root - INFO - step: 7560 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1394 +[titan] 2025-10-05 03:13:36,128 - root - INFO - lr: 4.6466e-05 gnorm: 1.23 [ 4:39:27<19:59:08] +[titan] 2025-10-05 03:13:47,004 - root - INFO - step: 7565 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2297 +[titan] 2025-10-05 03:13:47,005 - root - INFO - lr: 4.6461e-05 gnorm: 1.21 [ 4:39:38<19:58:56] +[titan] 2025-10-05 03:13:57,856 - root - INFO - step: 7570 loss: 2.4658 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:13:57,856 - root - INFO - lr: 4.6456e-05 gnorm: 1.15 [ 4:39:48<19:58:44] +[titan] 2025-10-05 03:14:08,701 - root - INFO - step: 7575 loss: 2.5486 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2477 +[titan] 2025-10-05 03:14:08,701 - root - INFO - lr: 4.6452e-05 gnorm: 1.16 [ 4:39:59<19:58:32] +[titan] 2025-10-05 03:14:19,585 - root - INFO - step: 7580 loss: 2.4950 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:14:19,586 - root - INFO - lr: 4.6447e-05 gnorm: 1.20 [ 4:40:10<19:58:20] +[titan] 2025-10-05 03:14:30,487 - root - INFO - step: 7585 loss: 2.5519 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3008 global_avg_mtp_loss: 2.2511 +[titan] 2025-10-05 03:14:30,487 - root - INFO - lr: 4.6442e-05 gnorm: 1.18 [ 4:40:21<19:58:08] +[titan] 2025-10-05 03:14:41,356 - root - INFO - step: 7590 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2224 +[titan] 2025-10-05 03:14:41,356 - root - INFO - lr: 4.6437e-05 gnorm: 1.18 [ 4:40:32<19:57:56] +[titan] 2025-10-05 03:14:52,221 - root - INFO - step: 7595 loss: 2.5646 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 03:14:52,221 - root - INFO - lr: 4.6432e-05 gnorm: 1.16 [ 4:40:43<19:57:44] +[titan] 2025-10-05 03:15:00,910 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:15:03,087 - root - INFO - step: 7600 loss: 2.5198 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:15:03,087 - root - INFO - lr: 4.6427e-05 gnorm: 1.22 [ 4:40:54<19:57:32] +[titan] 2025-10-05 03:15:13,944 - root - INFO - step: 7605 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2354 +[titan] 2025-10-05 03:15:13,944 - root - INFO - lr: 4.6423e-05 gnorm: 1.19 [ 4:41:05<19:57:20] +[titan] 2025-10-05 03:15:24,824 - root - INFO - step: 7610 loss: 2.4376 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1514 +[titan] 2025-10-05 03:15:24,824 - root - INFO - lr: 4.6418e-05 gnorm: 1.19 [ 4:41:15<19:57:08] +[titan] 2025-10-05 03:15:35,666 - root - INFO - step: 7615 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2192 +[titan] 2025-10-05 03:15:35,666 - root - INFO - lr: 4.6413e-05 gnorm: 1.17 [ 4:41:26<19:56:55] +[titan] 2025-10-05 03:15:46,512 - root - INFO - step: 7620 loss: 2.5412 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2413 +[titan] 2025-10-05 03:15:46,512 - root - INFO - lr: 4.6408e-05 gnorm: 1.18 [ 4:41:37<19:56:43] +[titan] 2025-10-05 03:15:57,356 - root - INFO - step: 7625 loss: 2.6165 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3094 global_avg_mtp_loss: 2.3070 +[titan] 2025-10-05 03:15:57,356 - root - INFO - lr: 4.6403e-05 gnorm: 1.26 [ 4:41:48<19:56:31] +[titan] 2025-10-05 03:16:08,215 - root - INFO - step: 7630 loss: 2.5181 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:16:08,215 - root - INFO - lr: 4.6398e-05 gnorm: 1.21 [ 4:41:59<19:56:19] +[titan] 2025-10-05 03:16:19,087 - root - INFO - step: 7635 loss: 2.4574 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1676 +[titan] 2025-10-05 03:16:19,088 - root - INFO - lr: 4.6394e-05 gnorm: 1.19 [ 4:42:10<19:56:07] +[titan] 2025-10-05 03:16:29,923 - root - INFO - step: 7640 loss: 2.4611 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1719 +[titan] 2025-10-05 03:16:29,923 - root - INFO - lr: 4.6389e-05 gnorm: 1.17 [ 4:42:21<19:55:55] +[titan] 2025-10-05 03:16:40,805 - root - INFO - step: 7645 loss: 2.5518 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2500 +[titan] 2025-10-05 03:16:40,805 - root - INFO - lr: 4.6384e-05 gnorm: 1.19 [ 4:42:31<19:55:43] +[titan] 2025-10-05 03:16:49,484 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:16:51,676 - root - INFO - step: 7650 loss: 2.5593 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2579 +[titan] 2025-10-05 03:16:51,676 - root - INFO - lr: 4.6379e-05 gnorm: 1.21 [ 4:42:42<19:55:31] +[titan] 2025-10-05 03:17:02,521 - root - INFO - step: 7655 loss: 2.5404 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 03:17:02,521 - root - INFO - lr: 4.6374e-05 gnorm: 1.24 [ 4:42:53<19:55:19] +[titan] 2025-10-05 03:17:13,367 - root - INFO - step: 7660 loss: 2.5051 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2097 +[titan] 2025-10-05 03:17:13,367 - root - INFO - lr: 4.6369e-05 gnorm: 1.23 [ 4:43:04<19:55:07] +[titan] 2025-10-05 03:17:24,235 - root - INFO - step: 7665 loss: 2.6218 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.3131 +[titan] 2025-10-05 03:17:24,235 - root - INFO - lr: 4.6364e-05 gnorm: 1.19 [ 4:43:15<19:54:55] +[titan] 2025-10-05 03:17:35,066 - root - INFO - step: 7670 loss: 2.5900 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2847 +[titan] 2025-10-05 03:17:35,066 - root - INFO - lr: 4.6360e-05 gnorm: 1.23 [ 4:43:26<19:54:43] +[titan] 2025-10-05 03:17:45,893 - root - INFO - step: 7675 loss: 2.5953 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 03:17:45,893 - root - INFO - lr: 4.6355e-05 gnorm: 1.19 [ 4:43:37<19:54:30] +[titan] 2025-10-05 03:17:56,861 - root - INFO - step: 7680 loss: 2.5148 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2185 +[titan] 2025-10-05 03:17:56,861 - root - INFO - lr: 4.6350e-05 gnorm: 1.23 [ 4:43:47<19:54:19] +[titan] 2025-10-05 03:17:57,032 - root - INFO - Dumping profiler traces at step 7680 +[titan] 2025-10-05 03:17:57,070 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:18:07,907 - root - INFO - step: 7685 loss: 2.4389 memory: 118.84GiB(85.28%) tps: 29,665 tflops: 411.56 mfu: 41.61% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 03:18:07,907 - root - INFO - lr: 4.6345e-05 gnorm: 1.17 [ 4:43:59<19:54:08] +[titan] 2025-10-05 03:18:18,756 - root - INFO - step: 7690 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:18:18,757 - root - INFO - lr: 4.6340e-05 gnorm: 1.18 [ 4:44:09<19:53:56] +[titan] 2025-10-05 03:18:29,609 - root - INFO - step: 7695 loss: 2.5730 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:18:29,609 - root - INFO - lr: 4.6335e-05 gnorm: 1.36 [ 4:44:20<19:53:43] +[titan] 2025-10-05 03:18:38,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:18:40,472 - root - INFO - step: 7700 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2643 +[titan] 2025-10-05 03:18:40,473 - root - INFO - lr: 4.6330e-05 gnorm: 1.19 [ 4:44:31<19:53:31] +[titan] 2025-10-05 03:18:51,364 - root - INFO - step: 7705 loss: 2.5443 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 03:18:51,364 - root - INFO - lr: 4.6325e-05 gnorm: 1.19 [ 4:44:42<19:53:20] +[titan] 2025-10-05 03:19:02,224 - root - INFO - step: 7710 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2637 +[titan] 2025-10-05 03:19:02,224 - root - INFO - lr: 4.6321e-05 gnorm: 1.20 [ 4:44:53<19:53:08] +[titan] 2025-10-05 03:19:13,098 - root - INFO - step: 7715 loss: 2.5489 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2486 +[titan] 2025-10-05 03:19:13,098 - root - INFO - lr: 4.6316e-05 gnorm: 1.20 [ 4:45:04<19:52:56] +[titan] 2025-10-05 03:19:23,973 - root - INFO - step: 7720 loss: 2.4402 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1529 +[titan] 2025-10-05 03:19:23,974 - root - INFO - lr: 4.6311e-05 gnorm: 1.21 [ 4:45:15<19:52:44] +[titan] 2025-10-05 03:19:34,816 - root - INFO - step: 7725 loss: 2.5551 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:19:34,816 - root - INFO - lr: 4.6306e-05 gnorm: 1.19 [ 4:45:25<19:52:31] +[titan] 2025-10-05 03:19:45,679 - root - INFO - step: 7730 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2814 +[titan] 2025-10-05 03:19:45,679 - root - INFO - lr: 4.6301e-05 gnorm: 1.17 [ 4:45:36<19:52:19] +[titan] 2025-10-05 03:19:56,502 - root - INFO - step: 7735 loss: 2.5206 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2970 global_avg_mtp_loss: 2.2236 +[titan] 2025-10-05 03:19:56,502 - root - INFO - lr: 4.6296e-05 gnorm: 1.24 [ 4:45:47<19:52:07] +[titan] 2025-10-05 03:20:07,337 - root - INFO - step: 7740 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2545 +[titan] 2025-10-05 03:20:07,337 - root - INFO - lr: 4.6291e-05 gnorm: 1.19 [ 4:45:58<19:51:55] +[titan] 2025-10-05 03:20:18,166 - root - INFO - step: 7745 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.79 mfu: 42.45% global_avg_ntp_loss: 0.2938 global_avg_mtp_loss: 2.1964 +[titan] 2025-10-05 03:20:18,166 - root - INFO - lr: 4.6286e-05 gnorm: 1.20 [ 4:46:09<19:51:43] +[titan] 2025-10-05 03:20:26,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:20:29,014 - root - INFO - step: 7750 loss: 2.4800 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.1876 +[titan] 2025-10-05 03:20:29,015 - root - INFO - lr: 4.6281e-05 gnorm: 1.17 [ 4:46:20<19:51:31] +[titan] 2025-10-05 03:20:39,856 - root - INFO - step: 7755 loss: 2.4850 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1929 +[titan] 2025-10-05 03:20:39,857 - root - INFO - lr: 4.6276e-05 gnorm: 1.24 [ 4:46:30<19:51:19] +[titan] 2025-10-05 03:20:50,697 - root - INFO - step: 7760 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2144 +[titan] 2025-10-05 03:20:50,697 - root - INFO - lr: 4.6271e-05 gnorm: 1.13 [ 4:46:41<19:51:07] +[titan] 2025-10-05 03:21:01,572 - root - INFO - step: 7765 loss: 2.5168 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2200 +[titan] 2025-10-05 03:21:01,573 - root - INFO - lr: 4.6267e-05 gnorm: 1.17 [ 4:46:52<19:50:55] +[titan] 2025-10-05 03:21:12,426 - root - INFO - step: 7770 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:21:12,426 - root - INFO - lr: 4.6262e-05 gnorm: 1.24 [ 4:47:03<19:50:43] +[titan] 2025-10-05 03:21:23,262 - root - INFO - step: 7775 loss: 2.5468 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2462 +[titan] 2025-10-05 03:21:23,262 - root - INFO - lr: 4.6257e-05 gnorm: 1.22 [ 4:47:14<19:50:31] +[titan] 2025-10-05 03:21:34,121 - root - INFO - step: 7780 loss: 2.5186 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2965 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:21:34,121 - root - INFO - lr: 4.6252e-05 gnorm: 1.22 [ 4:47:25<19:50:19] +[titan] 2025-10-05 03:21:44,959 - root - INFO - step: 7785 loss: 2.5555 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2532 +[titan] 2025-10-05 03:21:44,960 - root - INFO - lr: 4.6247e-05 gnorm: 1.19 [ 4:47:36<19:50:07] +[titan] 2025-10-05 03:21:55,841 - root - INFO - step: 7790 loss: 2.5595 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2574 +[titan] 2025-10-05 03:21:55,841 - root - INFO - lr: 4.6242e-05 gnorm: 1.21 [ 4:47:46<19:49:55] +[titan] 2025-10-05 03:22:06,686 - root - INFO - step: 7795 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2407 +[titan] 2025-10-05 03:22:06,686 - root - INFO - lr: 4.6237e-05 gnorm: 1.20 [ 4:47:57<19:49:43] +[titan] 2025-10-05 03:22:15,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:22:17,501 - root - INFO - step: 7800 loss: 2.4671 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1775 +[titan] 2025-10-05 03:22:17,501 - root - INFO - lr: 4.6232e-05 gnorm: 1.31 [ 4:48:08<19:49:30] +[titan] 2025-10-05 03:22:28,367 - root - INFO - step: 7805 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2366 +[titan] 2025-10-05 03:22:28,367 - root - INFO - lr: 4.6227e-05 gnorm: 1.21 [ 4:48:19<19:49:18] +[titan] 2025-10-05 03:22:39,182 - root - INFO - step: 7810 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:22:39,182 - root - INFO - lr: 4.6222e-05 gnorm: 1.23 [ 4:48:30<19:49:06] +[titan] 2025-10-05 03:22:50,001 - root - INFO - step: 7815 loss: 2.5037 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2093 +[titan] 2025-10-05 03:22:50,001 - root - INFO - lr: 4.6217e-05 gnorm: 1.17 [ 4:48:41<19:48:54] +[titan] 2025-10-05 03:23:00,861 - root - INFO - step: 7820 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 03:23:00,861 - root - INFO - lr: 4.6212e-05 gnorm: 1.15 [ 4:48:51<19:48:42] +[titan] 2025-10-05 03:23:11,665 - root - INFO - step: 7825 loss: 2.5549 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:23:11,665 - root - INFO - lr: 4.6207e-05 gnorm: 1.18 [ 4:49:02<19:48:30] +[titan] 2025-10-05 03:23:22,463 - root - INFO - step: 7830 loss: 2.5877 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2813 +[titan] 2025-10-05 03:23:22,464 - root - INFO - lr: 4.6202e-05 gnorm: 1.22 [ 4:49:13<19:48:18] +[titan] 2025-10-05 03:23:33,276 - root - INFO - step: 7835 loss: 2.5278 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:23:33,276 - root - INFO - lr: 4.6197e-05 gnorm: 1.28 [ 4:49:24<19:48:05] +[titan] 2025-10-05 03:23:44,101 - root - INFO - step: 7840 loss: 2.5759 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 03:23:44,101 - root - INFO - lr: 4.6192e-05 gnorm: 1.19 [ 4:49:35<19:47:53] +[titan] 2025-10-05 03:23:54,974 - root - INFO - step: 7845 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 03:23:54,975 - root - INFO - lr: 4.6187e-05 gnorm: 1.19 [ 4:49:46<19:47:41] +[titan] 2025-10-05 03:24:03,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:24:05,782 - root - INFO - step: 7850 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2873 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 03:24:05,783 - root - INFO - lr: 4.6182e-05 gnorm: 1.17 [ 4:49:56<19:47:29] +[titan] 2025-10-05 03:24:16,593 - root - INFO - step: 7855 loss: 2.4523 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1636 +[titan] 2025-10-05 03:24:16,593 - root - INFO - lr: 4.6177e-05 gnorm: 1.14 [ 4:50:07<19:47:17] +[titan] 2025-10-05 03:24:27,423 - root - INFO - step: 7860 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2509 +[titan] 2025-10-05 03:24:27,424 - root - INFO - lr: 4.6172e-05 gnorm: 1.24 [ 4:50:18<19:47:05] +[titan] 2025-10-05 03:24:38,249 - root - INFO - step: 7865 loss: 2.5375 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2394 +[titan] 2025-10-05 03:24:38,249 - root - INFO - lr: 4.6167e-05 gnorm: 1.22 [ 4:50:29<19:46:53] +[titan] 2025-10-05 03:24:49,117 - root - INFO - step: 7870 loss: 2.4208 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1365 +[titan] 2025-10-05 03:24:49,117 - root - INFO - lr: 4.6163e-05 gnorm: 1.17 [ 4:50:40<19:46:41] +[titan] 2025-10-05 03:25:00,043 - root - INFO - step: 7875 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:25:00,043 - root - INFO - lr: 4.6158e-05 gnorm: 1.19 [ 4:50:51<19:46:29] +[titan] 2025-10-05 03:25:10,889 - root - INFO - step: 7880 loss: 2.5464 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2467 +[titan] 2025-10-05 03:25:10,889 - root - INFO - lr: 4.6153e-05 gnorm: 1.19 [ 4:51:01<19:46:17] +[titan] 2025-10-05 03:25:21,745 - root - INFO - step: 7885 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:25:21,745 - root - INFO - lr: 4.6148e-05 gnorm: 1.18 [ 4:51:12<19:46:05] +[titan] 2025-10-05 03:25:32,610 - root - INFO - step: 7890 loss: 2.5321 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2346 +[titan] 2025-10-05 03:25:32,610 - root - INFO - lr: 4.6143e-05 gnorm: 1.20 [ 4:51:23<19:45:53] +[titan] 2025-10-05 03:25:43,443 - root - INFO - step: 7895 loss: 2.5115 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:25:43,443 - root - INFO - lr: 4.6138e-05 gnorm: 1.14 [ 4:51:34<19:45:41] +[titan] 2025-10-05 03:25:52,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:25:54,248 - root - INFO - step: 7900 loss: 2.5320 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.75 mfu: 42.54% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2324 +[titan] 2025-10-05 03:25:54,248 - root - INFO - lr: 4.6133e-05 gnorm: 1.18 [ 4:51:45<19:45:29] +[titan] 2025-10-05 03:26:05,135 - root - INFO - step: 7905 loss: 2.5694 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2673 +[titan] 2025-10-05 03:26:05,135 - root - INFO - lr: 4.6128e-05 gnorm: 1.17 [ 4:51:56<19:45:17] +[titan] 2025-10-05 03:26:15,976 - root - INFO - step: 7910 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2989 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:26:15,976 - root - INFO - lr: 4.6123e-05 gnorm: 1.24 [ 4:52:07<19:45:05] +[titan] 2025-10-05 03:26:26,803 - root - INFO - step: 7915 loss: 2.5234 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2262 +[titan] 2025-10-05 03:26:26,803 - root - INFO - lr: 4.6118e-05 gnorm: 1.20 [ 4:52:17<19:44:53] +[titan] 2025-10-05 03:26:37,605 - root - INFO - step: 7920 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2145 +[titan] 2025-10-05 03:26:37,605 - root - INFO - lr: 4.6113e-05 gnorm: 1.21 [ 4:52:28<19:44:41] +[titan] 2025-10-05 03:26:48,452 - root - INFO - step: 7925 loss: 2.4185 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 03:26:48,452 - root - INFO - lr: 4.6107e-05 gnorm: 1.15 [ 4:52:39<19:44:29] +[titan] 2025-10-05 03:26:59,330 - root - INFO - step: 7930 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 03:26:59,330 - root - INFO - lr: 4.6102e-05 gnorm: 1.26 [ 4:52:50<19:44:17] +[titan] 2025-10-05 03:27:10,155 - root - INFO - step: 7935 loss: 2.4620 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2890 global_avg_mtp_loss: 2.1731 +[titan] 2025-10-05 03:27:10,155 - root - INFO - lr: 4.6097e-05 gnorm: 1.18 [ 4:53:01<19:44:05] +[titan] 2025-10-05 03:27:20,964 - root - INFO - step: 7940 loss: 2.4808 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.61 mfu: 42.53% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:27:20,964 - root - INFO - lr: 4.6092e-05 gnorm: 1.15 [ 4:53:12<19:43:52] +[titan] 2025-10-05 03:27:31,803 - root - INFO - step: 7945 loss: 2.5084 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2142 +[titan] 2025-10-05 03:27:31,803 - root - INFO - lr: 4.6087e-05 gnorm: 1.16 [ 4:53:22<19:43:40] +[titan] 2025-10-05 03:27:40,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:27:42,583 - root - INFO - step: 7950 loss: 2.5326 memory: 118.84GiB(85.28%) tps: 30,397 tflops: 421.71 mfu: 42.64% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2359 +[titan] 2025-10-05 03:27:42,583 - root - INFO - lr: 4.6082e-05 gnorm: 1.21 [ 4:53:33<19:43:28] +[titan] 2025-10-05 03:27:53,381 - root - INFO - step: 7955 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2591 +[titan] 2025-10-05 03:27:53,382 - root - INFO - lr: 4.6077e-05 gnorm: 1.18 [ 4:53:44<19:43:16] +[titan] 2025-10-05 03:28:04,227 - root - INFO - step: 7960 loss: 2.4969 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2940 global_avg_mtp_loss: 2.2030 +[titan] 2025-10-05 03:28:04,227 - root - INFO - lr: 4.6072e-05 gnorm: 1.15 [ 4:53:55<19:43:04] +[titan] 2025-10-05 03:28:15,055 - root - INFO - step: 7965 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1237 +[titan] 2025-10-05 03:28:15,055 - root - INFO - lr: 4.6067e-05 gnorm: 1.13 [ 4:54:06<19:42:52] +[titan] 2025-10-05 03:28:25,883 - root - INFO - step: 7970 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.2034 +[titan] 2025-10-05 03:28:25,883 - root - INFO - lr: 4.6062e-05 gnorm: 1.17 [ 4:54:16<19:42:40] +[titan] 2025-10-05 03:28:36,715 - root - INFO - step: 7975 loss: 2.5491 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2491 +[titan] 2025-10-05 03:28:36,715 - root - INFO - lr: 4.6057e-05 gnorm: 1.19 [ 4:54:27<19:42:28] +[titan] 2025-10-05 03:28:47,543 - root - INFO - step: 7980 loss: 2.4817 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:28:47,543 - root - INFO - lr: 4.6052e-05 gnorm: 1.16 [ 4:54:38<19:42:16] +[titan] 2025-10-05 03:28:58,364 - root - INFO - step: 7985 loss: 2.5422 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2429 +[titan] 2025-10-05 03:28:58,364 - root - INFO - lr: 4.6047e-05 gnorm: 1.18 [ 4:54:49<19:42:03] +[titan] 2025-10-05 03:29:09,176 - root - INFO - step: 7990 loss: 2.5558 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2558 +[titan] 2025-10-05 03:29:09,176 - root - INFO - lr: 4.6042e-05 gnorm: 1.18 [ 4:55:00<19:41:51] +[titan] 2025-10-05 03:29:19,983 - root - INFO - step: 7995 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:29:19,983 - root - INFO - lr: 4.6037e-05 gnorm: 1.16 [ 4:55:11<19:41:39] +[titan] 2025-10-05 03:29:28,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:29:30,811 - root - INFO - step: 8000 loss: 2.5669 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.3034 global_avg_mtp_loss: 2.2635 +[titan] 2025-10-05 03:29:30,811 - root - INFO - lr: 4.6032e-05 gnorm: 1.20 [ 4:55:21<19:41:27] +[titan] 2025-10-05 03:29:41,667 - root - INFO - step: 8005 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2699 +[titan] 2025-10-05 03:29:41,667 - root - INFO - lr: 4.6027e-05 gnorm: 1.25 [ 4:55:32<19:41:15] +[titan] 2025-10-05 03:29:52,487 - root - INFO - step: 8010 loss: 2.5006 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2058 +[titan] 2025-10-05 03:29:52,487 - root - INFO - lr: 4.6022e-05 gnorm: 1.26 [ 4:55:43<19:41:03] +[titan] 2025-10-05 03:30:03,339 - root - INFO - step: 8015 loss: 2.4914 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:30:03,339 - root - INFO - lr: 4.6017e-05 gnorm: 1.18 [ 4:55:54<19:40:51] +[titan] 2025-10-05 03:30:14,162 - root - INFO - step: 8020 loss: 2.4809 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:30:14,162 - root - INFO - lr: 4.6012e-05 gnorm: 1.20 [ 4:56:05<19:40:39] +[titan] 2025-10-05 03:30:25,002 - root - INFO - step: 8025 loss: 2.4991 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2042 +[titan] 2025-10-05 03:30:25,003 - root - INFO - lr: 4.6007e-05 gnorm: 1.17 [ 4:56:16<19:40:27] +[titan] 2025-10-05 03:30:35,840 - root - INFO - step: 8030 loss: 2.4390 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1512 +[titan] 2025-10-05 03:30:35,841 - root - INFO - lr: 4.6001e-05 gnorm: 1.18 [ 4:56:26<19:40:15] +[titan] 2025-10-05 03:30:46,678 - root - INFO - step: 8035 loss: 2.5127 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:30:46,678 - root - INFO - lr: 4.5996e-05 gnorm: 1.21 [ 4:56:37<19:40:03] +[titan] 2025-10-05 03:30:57,494 - root - INFO - step: 8040 loss: 2.4745 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1848 +[titan] 2025-10-05 03:30:57,495 - root - INFO - lr: 4.5991e-05 gnorm: 1.17 [ 4:56:48<19:39:51] +[titan] 2025-10-05 03:31:08,359 - root - INFO - step: 8045 loss: 2.5034 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2082 +[titan] 2025-10-05 03:31:08,360 - root - INFO - lr: 4.5986e-05 gnorm: 1.19 [ 4:56:59<19:39:39] +[titan] 2025-10-05 03:31:17,027 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:31:19,196 - root - INFO - step: 8050 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1741 +[titan] 2025-10-05 03:31:19,196 - root - INFO - lr: 4.5981e-05 gnorm: 1.19 [ 4:57:10<19:39:27] +[titan] 2025-10-05 03:31:30,047 - root - INFO - step: 8055 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2206 +[titan] 2025-10-05 03:31:30,047 - root - INFO - lr: 4.5976e-05 gnorm: 1.16 [ 4:57:21<19:39:15] +[titan] 2025-10-05 03:31:40,901 - root - INFO - step: 8060 loss: 2.4474 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 03:31:40,902 - root - INFO - lr: 4.5971e-05 gnorm: 1.14 [ 4:57:31<19:39:03] +[titan] 2025-10-05 03:31:51,725 - root - INFO - step: 8065 loss: 2.5411 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2406 +[titan] 2025-10-05 03:31:51,725 - root - INFO - lr: 4.5966e-05 gnorm: 1.17 [ 4:57:42<19:38:51] +[titan] 2025-10-05 03:32:02,621 - root - INFO - step: 8070 loss: 2.4864 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1942 +[titan] 2025-10-05 03:32:02,621 - root - INFO - lr: 4.5961e-05 gnorm: 1.20 [ 4:57:53<19:38:39] +[titan] 2025-10-05 03:32:13,441 - root - INFO - step: 8075 loss: 2.5540 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 03:32:13,441 - root - INFO - lr: 4.5956e-05 gnorm: 1.17 [ 4:58:04<19:38:27] +[titan] 2025-10-05 03:32:24,287 - root - INFO - step: 8080 loss: 2.4398 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1536 +[titan] 2025-10-05 03:32:24,287 - root - INFO - lr: 4.5951e-05 gnorm: 1.14 [ 4:58:15<19:38:15] +[titan] 2025-10-05 03:32:35,118 - root - INFO - step: 8085 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2356 +[titan] 2025-10-05 03:32:35,118 - root - INFO - lr: 4.5945e-05 gnorm: 1.21 [ 4:58:26<19:38:03] +[titan] 2025-10-05 03:32:45,958 - root - INFO - step: 8090 loss: 2.5225 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2257 +[titan] 2025-10-05 03:32:45,958 - root - INFO - lr: 4.5940e-05 gnorm: 1.12 [ 4:58:37<19:37:51] +[titan] 2025-10-05 03:32:56,823 - root - INFO - step: 8095 loss: 2.5506 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2506 +[titan] 2025-10-05 03:32:56,824 - root - INFO - lr: 4.5935e-05 gnorm: 1.21 [ 4:58:47<19:37:39] +[titan] 2025-10-05 03:33:05,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:33:07,719 - root - INFO - step: 8100 loss: 2.5049 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 03:33:07,719 - root - INFO - lr: 4.5930e-05 gnorm: 1.20 [ 4:58:58<19:37:27] +[titan] 2025-10-05 03:33:18,615 - root - INFO - step: 8105 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:33:18,615 - root - INFO - lr: 4.5925e-05 gnorm: 1.12 [ 4:59:09<19:37:16] +[titan] 2025-10-05 03:33:29,481 - root - INFO - step: 8110 loss: 2.4795 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 03:33:29,481 - root - INFO - lr: 4.5920e-05 gnorm: 1.16 [ 4:59:20<19:37:04] +[titan] 2025-10-05 03:33:40,332 - root - INFO - step: 8115 loss: 2.4748 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1833 +[titan] 2025-10-05 03:33:40,332 - root - INFO - lr: 4.5915e-05 gnorm: 1.18 [ 4:59:31<19:36:52] +[titan] 2025-10-05 03:33:51,164 - root - INFO - step: 8120 loss: 2.5292 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2311 +[titan] 2025-10-05 03:33:51,164 - root - INFO - lr: 4.5910e-05 gnorm: 1.19 [ 4:59:42<19:36:40] +[titan] 2025-10-05 03:34:02,020 - root - INFO - step: 8125 loss: 2.4881 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:34:02,020 - root - INFO - lr: 4.5904e-05 gnorm: 1.21 [ 4:59:53<19:36:28] +[titan] 2025-10-05 03:34:12,891 - root - INFO - step: 8130 loss: 2.5727 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:34:12,891 - root - INFO - lr: 4.5899e-05 gnorm: 1.22 [ 5:00:03<19:36:16] +[titan] 2025-10-05 03:34:23,761 - root - INFO - step: 8135 loss: 2.4550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1663 +[titan] 2025-10-05 03:34:23,761 - root - INFO - lr: 4.5894e-05 gnorm: 1.21 [ 5:00:14<19:36:04] +[titan] 2025-10-05 03:34:34,624 - root - INFO - step: 8140 loss: 2.4669 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:34:34,624 - root - INFO - lr: 4.5889e-05 gnorm: 1.16 [ 5:00:25<19:35:52] +[titan] 2025-10-05 03:34:45,506 - root - INFO - step: 8145 loss: 2.5656 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:34:45,506 - root - INFO - lr: 4.5884e-05 gnorm: 1.18 [ 5:00:36<19:35:40] +[titan] 2025-10-05 03:34:54,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:34:56,340 - root - INFO - step: 8150 loss: 2.4846 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1926 +[titan] 2025-10-05 03:34:56,340 - root - INFO - lr: 4.5879e-05 gnorm: 1.16 [ 5:00:47<19:35:28] +[titan] 2025-10-05 03:35:07,237 - root - INFO - step: 8155 loss: 2.5131 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2164 +[titan] 2025-10-05 03:35:07,237 - root - INFO - lr: 4.5874e-05 gnorm: 1.17 [ 5:00:58<19:35:17] +[titan] 2025-10-05 03:35:18,098 - root - INFO - step: 8160 loss: 2.6082 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3005 +[titan] 2025-10-05 03:35:18,098 - root - INFO - lr: 4.5868e-05 gnorm: 1.18 [ 5:01:09<19:35:05] +[titan] 2025-10-05 03:35:28,978 - root - INFO - step: 8165 loss: 2.5372 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2397 +[titan] 2025-10-05 03:35:28,978 - root - INFO - lr: 4.5863e-05 gnorm: 1.17 [ 5:01:20<19:34:53] +[titan] 2025-10-05 03:35:39,844 - root - INFO - step: 8170 loss: 2.4152 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 03:35:39,844 - root - INFO - lr: 4.5858e-05 gnorm: 1.18 [ 5:01:30<19:34:41] +[titan] 2025-10-05 03:35:50,781 - root - INFO - step: 8175 loss: 2.5578 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3013 global_avg_mtp_loss: 2.2565 +[titan] 2025-10-05 03:35:50,781 - root - INFO - lr: 4.5853e-05 gnorm: 1.27 [ 5:01:41<19:34:29] +[titan] 2025-10-05 03:36:01,663 - root - INFO - step: 8180 loss: 2.4462 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1586 +[titan] 2025-10-05 03:36:01,663 - root - INFO - lr: 4.5848e-05 gnorm: 1.13 [ 5:01:52<19:34:18] +[titan] 2025-10-05 03:36:12,582 - root - INFO - step: 8185 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:36:12,582 - root - INFO - lr: 4.5843e-05 gnorm: 1.20 [ 5:02:03<19:34:06] +[titan] 2025-10-05 03:36:23,548 - root - INFO - step: 8190 loss: 2.4035 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1217 +[titan] 2025-10-05 03:36:23,549 - root - INFO - lr: 4.5837e-05 gnorm: 1.16 [ 5:02:14<19:33:54] +[titan] 2025-10-05 03:36:28,080 - root - INFO - Dumping profiler traces at step 8192 +[titan] 2025-10-05 03:36:28,118 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:36:34,646 - root - INFO - step: 8195 loss: 2.4867 memory: 118.84GiB(85.28%) tps: 29,528 tflops: 409.66 mfu: 41.42% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 03:36:34,646 - root - INFO - lr: 4.5832e-05 gnorm: 1.16 [ 5:02:25<19:33:43] +[titan] 2025-10-05 03:36:43,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:36:45,533 - root - INFO - step: 8200 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2809 +[titan] 2025-10-05 03:36:45,533 - root - INFO - lr: 4.5827e-05 gnorm: 1.15 [ 5:02:36<19:33:32] +[titan] 2025-10-05 03:36:56,421 - root - INFO - step: 8205 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1517 +[titan] 2025-10-05 03:36:56,421 - root - INFO - lr: 4.5822e-05 gnorm: 1.15 [ 5:02:47<19:33:20] +[titan] 2025-10-05 03:37:07,262 - root - INFO - step: 8210 loss: 2.4422 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2866 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:37:07,262 - root - INFO - lr: 4.5817e-05 gnorm: 1.16 [ 5:02:58<19:33:08] +[titan] 2025-10-05 03:37:18,124 - root - INFO - step: 8215 loss: 2.5901 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3062 global_avg_mtp_loss: 2.2840 +[titan] 2025-10-05 03:37:18,124 - root - INFO - lr: 4.5812e-05 gnorm: 1.23 [ 5:03:09<19:32:56] +[titan] 2025-10-05 03:37:29,001 - root - INFO - step: 8220 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2582 +[titan] 2025-10-05 03:37:29,001 - root - INFO - lr: 4.5806e-05 gnorm: 1.20 [ 5:03:20<19:32:44] +[titan] 2025-10-05 03:37:39,844 - root - INFO - step: 8225 loss: 2.4659 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 03:37:39,845 - root - INFO - lr: 4.5801e-05 gnorm: 1.23 [ 5:03:30<19:32:32] +[titan] 2025-10-05 03:37:50,743 - root - INFO - step: 8230 loss: 2.5410 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 03:37:50,743 - root - INFO - lr: 4.5796e-05 gnorm: 1.19 [ 5:03:41<19:32:21] +[titan] 2025-10-05 03:38:01,585 - root - INFO - step: 8235 loss: 2.5291 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2326 +[titan] 2025-10-05 03:38:01,585 - root - INFO - lr: 4.5791e-05 gnorm: 1.15 [ 5:03:52<19:32:09] +[titan] 2025-10-05 03:38:12,474 - root - INFO - step: 8240 loss: 2.5137 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:38:12,474 - root - INFO - lr: 4.5786e-05 gnorm: 1.17 [ 5:04:03<19:31:57] +[titan] 2025-10-05 03:38:23,335 - root - INFO - step: 8245 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:38:23,335 - root - INFO - lr: 4.5780e-05 gnorm: 1.17 [ 5:04:14<19:31:45] +[titan] 2025-10-05 03:38:32,037 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:38:34,223 - root - INFO - step: 8250 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2131 +[titan] 2025-10-05 03:38:34,223 - root - INFO - lr: 4.5775e-05 gnorm: 1.18 [ 5:04:25<19:31:33] +[titan] 2025-10-05 03:38:45,088 - root - INFO - step: 8255 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2198 +[titan] 2025-10-05 03:38:45,088 - root - INFO - lr: 4.5770e-05 gnorm: 1.20 [ 5:04:36<19:31:21] +[titan] 2025-10-05 03:38:55,962 - root - INFO - step: 8260 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1524 +[titan] 2025-10-05 03:38:55,962 - root - INFO - lr: 4.5765e-05 gnorm: 1.19 [ 5:04:47<19:31:09] +[titan] 2025-10-05 03:39:06,818 - root - INFO - step: 8265 loss: 2.6017 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2949 +[titan] 2025-10-05 03:39:06,818 - root - INFO - lr: 4.5760e-05 gnorm: 1.23 [ 5:04:57<19:30:58] +[titan] 2025-10-05 03:39:17,707 - root - INFO - step: 8270 loss: 2.4450 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1572 +[titan] 2025-10-05 03:39:17,707 - root - INFO - lr: 4.5754e-05 gnorm: 1.18 [ 5:05:08<19:30:46] +[titan] 2025-10-05 03:39:28,574 - root - INFO - step: 8275 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1559 +[titan] 2025-10-05 03:39:28,574 - root - INFO - lr: 4.5749e-05 gnorm: 1.20 [ 5:05:19<19:30:34] +[titan] 2025-10-05 03:39:39,438 - root - INFO - step: 8280 loss: 2.4782 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2902 global_avg_mtp_loss: 2.1880 +[titan] 2025-10-05 03:39:39,438 - root - INFO - lr: 4.5744e-05 gnorm: 1.20 [ 5:05:30<19:30:22] +[titan] 2025-10-05 03:39:50,344 - root - INFO - step: 8285 loss: 2.4818 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:39:50,344 - root - INFO - lr: 4.5739e-05 gnorm: 1.16 [ 5:05:41<19:30:10] +[titan] 2025-10-05 03:40:01,252 - root - INFO - step: 8290 loss: 2.4954 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2009 +[titan] 2025-10-05 03:40:01,252 - root - INFO - lr: 4.5733e-05 gnorm: 1.16 [ 5:05:52<19:29:59] +[titan] 2025-10-05 03:40:12,143 - root - INFO - step: 8295 loss: 2.5302 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2317 +[titan] 2025-10-05 03:40:12,143 - root - INFO - lr: 4.5728e-05 gnorm: 1.18 [ 5:06:03<19:29:47] +[titan] 2025-10-05 03:40:20,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:40:23,034 - root - INFO - step: 8300 loss: 2.4874 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:40:23,034 - root - INFO - lr: 4.5723e-05 gnorm: 1.19 [ 5:06:14<19:29:35] +[titan] 2025-10-05 03:40:33,937 - root - INFO - step: 8305 loss: 2.5831 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2800 +[titan] 2025-10-05 03:40:33,938 - root - INFO - lr: 4.5718e-05 gnorm: 1.17 [ 5:06:24<19:29:24] +[titan] 2025-10-05 03:40:44,825 - root - INFO - step: 8310 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2514 +[titan] 2025-10-05 03:40:44,825 - root - INFO - lr: 4.5713e-05 gnorm: 1.17 [ 5:06:35<19:29:12] +[titan] 2025-10-05 03:40:55,729 - root - INFO - step: 8315 loss: 2.5111 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:40:55,729 - root - INFO - lr: 4.5707e-05 gnorm: 1.14 [ 5:06:46<19:29:00] +[titan] 2025-10-05 03:41:06,596 - root - INFO - step: 8320 loss: 2.5003 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2067 +[titan] 2025-10-05 03:41:06,596 - root - INFO - lr: 4.5702e-05 gnorm: 1.19 [ 5:06:57<19:28:48] +[titan] 2025-10-05 03:41:17,525 - root - INFO - step: 8325 loss: 2.4974 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 03:41:17,525 - root - INFO - lr: 4.5697e-05 gnorm: 1.26 [ 5:07:08<19:28:37] +[titan] 2025-10-05 03:41:28,416 - root - INFO - step: 8330 loss: 2.4791 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1878 +[titan] 2025-10-05 03:41:28,416 - root - INFO - lr: 4.5692e-05 gnorm: 1.19 [ 5:07:19<19:28:25] +[titan] 2025-10-05 03:41:39,305 - root - INFO - step: 8335 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:41:39,305 - root - INFO - lr: 4.5686e-05 gnorm: 1.25 [ 5:07:30<19:28:13] +[titan] 2025-10-05 03:41:50,197 - root - INFO - step: 8340 loss: 2.4762 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:41:50,197 - root - INFO - lr: 4.5681e-05 gnorm: 1.22 [ 5:07:41<19:28:01] +[titan] 2025-10-05 03:42:01,087 - root - INFO - step: 8345 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:42:01,087 - root - INFO - lr: 4.5676e-05 gnorm: 1.33 [ 5:07:52<19:27:50] +[titan] 2025-10-05 03:42:09,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:42:11,958 - root - INFO - step: 8350 loss: 2.5178 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2229 +[titan] 2025-10-05 03:42:11,958 - root - INFO - lr: 4.5671e-05 gnorm: 1.20 [ 5:08:03<19:27:38] +[titan] 2025-10-05 03:42:22,859 - root - INFO - step: 8355 loss: 2.5012 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2065 +[titan] 2025-10-05 03:42:22,859 - root - INFO - lr: 4.5665e-05 gnorm: 1.16 [ 5:08:13<19:27:26] +[titan] 2025-10-05 03:42:33,723 - root - INFO - step: 8360 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2087 +[titan] 2025-10-05 03:42:33,724 - root - INFO - lr: 4.5660e-05 gnorm: 1.21 [ 5:08:24<19:27:14] +[titan] 2025-10-05 03:42:44,605 - root - INFO - step: 8365 loss: 2.4169 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1341 +[titan] 2025-10-05 03:42:44,605 - root - INFO - lr: 4.5655e-05 gnorm: 1.27 [ 5:08:35<19:27:03] +[titan] 2025-10-05 03:42:55,502 - root - INFO - step: 8370 loss: 2.4654 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:42:55,503 - root - INFO - lr: 4.5649e-05 gnorm: 1.13 [ 5:08:46<19:26:51] +[titan] 2025-10-05 03:43:06,377 - root - INFO - step: 8375 loss: 2.4547 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1658 +[titan] 2025-10-05 03:43:06,377 - root - INFO - lr: 4.5644e-05 gnorm: 1.15 [ 5:08:57<19:26:39] +[titan] 2025-10-05 03:43:17,279 - root - INFO - step: 8380 loss: 2.5065 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2118 +[titan] 2025-10-05 03:43:17,279 - root - INFO - lr: 4.5639e-05 gnorm: 1.18 [ 5:09:08<19:26:27] +[titan] 2025-10-05 03:43:28,170 - root - INFO - step: 8385 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.1973 +[titan] 2025-10-05 03:43:28,171 - root - INFO - lr: 4.5634e-05 gnorm: 1.19 [ 5:09:19<19:26:16] +[titan] 2025-10-05 03:43:39,058 - root - INFO - step: 8390 loss: 2.3818 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 03:43:39,058 - root - INFO - lr: 4.5628e-05 gnorm: 1.18 [ 5:09:30<19:26:04] +[titan] 2025-10-05 03:43:49,941 - root - INFO - step: 8395 loss: 2.4979 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2044 +[titan] 2025-10-05 03:43:49,941 - root - INFO - lr: 4.5623e-05 gnorm: 1.24 [ 5:09:40<19:25:52] +[titan] 2025-10-05 03:43:58,644 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:44:00,835 - root - INFO - step: 8400 loss: 2.4609 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1710 +[titan] 2025-10-05 03:44:00,835 - root - INFO - lr: 4.5618e-05 gnorm: 1.21 [ 5:09:51<19:25:40] +[titan] 2025-10-05 03:44:11,708 - root - INFO - step: 8405 loss: 2.4714 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1804 +[titan] 2025-10-05 03:44:11,708 - root - INFO - lr: 4.5612e-05 gnorm: 1.18 [ 5:10:02<19:25:29] +[titan] 2025-10-05 03:44:22,628 - root - INFO - step: 8410 loss: 2.4894 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1963 +[titan] 2025-10-05 03:44:22,628 - root - INFO - lr: 4.5607e-05 gnorm: 1.17 [ 5:10:13<19:25:17] +[titan] 2025-10-05 03:44:33,498 - root - INFO - step: 8415 loss: 2.4601 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1709 +[titan] 2025-10-05 03:44:33,498 - root - INFO - lr: 4.5602e-05 gnorm: 1.15 [ 5:10:24<19:25:05] +[titan] 2025-10-05 03:44:44,372 - root - INFO - step: 8420 loss: 2.4695 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1796 +[titan] 2025-10-05 03:44:44,372 - root - INFO - lr: 4.5597e-05 gnorm: 1.21 [ 5:10:35<19:24:53] +[titan] 2025-10-05 03:44:55,241 - root - INFO - step: 8425 loss: 2.6043 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.2890 +[titan] 2025-10-05 03:44:55,241 - root - INFO - lr: 4.5591e-05 gnorm: 1.22 [ 5:10:46<19:24:42] +[titan] 2025-10-05 03:45:06,108 - root - INFO - step: 8430 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1610 +[titan] 2025-10-05 03:45:06,108 - root - INFO - lr: 4.5586e-05 gnorm: 1.22 [ 5:10:57<19:24:30] +[titan] 2025-10-05 03:45:17,033 - root - INFO - step: 8435 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1622 +[titan] 2025-10-05 03:45:17,033 - root - INFO - lr: 4.5581e-05 gnorm: 1.17 [ 5:11:08<19:24:18] +[titan] 2025-10-05 03:45:27,906 - root - INFO - step: 8440 loss: 2.4384 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1521 +[titan] 2025-10-05 03:45:27,906 - root - INFO - lr: 4.5575e-05 gnorm: 1.18 [ 5:11:18<19:24:06] +[titan] 2025-10-05 03:45:38,796 - root - INFO - step: 8445 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2251 +[titan] 2025-10-05 03:45:38,797 - root - INFO - lr: 4.5570e-05 gnorm: 1.18 [ 5:11:29<19:23:55] +[titan] 2025-10-05 03:45:47,504 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:45:49,701 - root - INFO - step: 8450 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1765 +[titan] 2025-10-05 03:45:49,701 - root - INFO - lr: 4.5565e-05 gnorm: 1.15 [ 5:11:40<19:23:43] +[titan] 2025-10-05 03:46:00,576 - root - INFO - step: 8455 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1407 +[titan] 2025-10-05 03:46:00,576 - root - INFO - lr: 4.5559e-05 gnorm: 1.16 [ 5:11:51<19:23:31] +[titan] 2025-10-05 03:46:11,464 - root - INFO - step: 8460 loss: 2.4581 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1695 +[titan] 2025-10-05 03:46:11,465 - root - INFO - lr: 4.5554e-05 gnorm: 1.18 [ 5:12:02<19:23:19] +[titan] 2025-10-05 03:46:22,405 - root - INFO - step: 8465 loss: 2.4681 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2912 global_avg_mtp_loss: 2.1769 +[titan] 2025-10-05 03:46:22,406 - root - INFO - lr: 4.5549e-05 gnorm: 1.26 [ 5:12:13<19:23:08] +[titan] 2025-10-05 03:46:33,303 - root - INFO - step: 8470 loss: 2.4812 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:46:33,303 - root - INFO - lr: 4.5543e-05 gnorm: 1.18 [ 5:12:24<19:22:56] +[titan] 2025-10-05 03:46:44,215 - root - INFO - step: 8475 loss: 2.4456 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:46:44,215 - root - INFO - lr: 4.5538e-05 gnorm: 1.19 [ 5:12:35<19:22:45] +[titan] 2025-10-05 03:46:55,102 - root - INFO - step: 8480 loss: 2.5134 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2167 +[titan] 2025-10-05 03:46:55,103 - root - INFO - lr: 4.5533e-05 gnorm: 1.22 [ 5:12:46<19:22:33] +[titan] 2025-10-05 03:47:05,998 - root - INFO - step: 8485 loss: 2.4337 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 03:47:05,998 - root - INFO - lr: 4.5527e-05 gnorm: 1.16 [ 5:12:57<19:22:21] +[titan] 2025-10-05 03:47:16,904 - root - INFO - step: 8490 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1235 +[titan] 2025-10-05 03:47:16,904 - root - INFO - lr: 4.5522e-05 gnorm: 1.17 [ 5:13:07<19:22:10] +[titan] 2025-10-05 03:47:27,782 - root - INFO - step: 8495 loss: 2.4698 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1791 +[titan] 2025-10-05 03:47:27,783 - root - INFO - lr: 4.5517e-05 gnorm: 1.17 [ 5:13:18<19:21:58] +[titan] 2025-10-05 03:47:36,458 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:47:38,638 - root - INFO - step: 8500 loss: 2.3537 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0773 +[titan] 2025-10-05 03:47:38,638 - root - INFO - lr: 4.5511e-05 gnorm: 1.20 [ 5:13:29<19:21:46] +[titan] 2025-10-05 03:47:49,538 - root - INFO - step: 8505 loss: 2.5368 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2395 +[titan] 2025-10-05 03:47:49,538 - root - INFO - lr: 4.5506e-05 gnorm: 1.16 [ 5:13:40<19:21:34] +[titan] 2025-10-05 03:48:00,412 - root - INFO - step: 8510 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.1961 +[titan] 2025-10-05 03:48:00,412 - root - INFO - lr: 4.5501e-05 gnorm: 1.19 [ 5:13:51<19:21:22] +[titan] 2025-10-05 03:48:11,277 - root - INFO - step: 8515 loss: 2.4264 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:48:11,277 - root - INFO - lr: 4.5495e-05 gnorm: 1.17 [ 5:14:02<19:21:11] +[titan] 2025-10-05 03:48:22,187 - root - INFO - step: 8520 loss: 2.4968 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2043 +[titan] 2025-10-05 03:48:22,188 - root - INFO - lr: 4.5490e-05 gnorm: 1.24 [ 5:14:13<19:20:59] +[titan] 2025-10-05 03:48:33,044 - root - INFO - step: 8525 loss: 2.5002 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2934 global_avg_mtp_loss: 2.2068 +[titan] 2025-10-05 03:48:33,044 - root - INFO - lr: 4.5485e-05 gnorm: 1.16 [ 5:14:24<19:20:47] +[titan] 2025-10-05 03:48:43,906 - root - INFO - step: 8530 loss: 2.5203 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2249 +[titan] 2025-10-05 03:48:43,906 - root - INFO - lr: 4.5479e-05 gnorm: 1.18 [ 5:14:34<19:20:35] +[titan] 2025-10-05 03:48:54,778 - root - INFO - step: 8535 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:48:54,778 - root - INFO - lr: 4.5474e-05 gnorm: 1.23 [ 5:14:45<19:20:24] +[titan] 2025-10-05 03:49:05,664 - root - INFO - step: 8540 loss: 2.5027 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2084 +[titan] 2025-10-05 03:49:05,664 - root - INFO - lr: 4.5468e-05 gnorm: 1.19 [ 5:14:56<19:20:12] +[titan] 2025-10-05 03:49:16,537 - root - INFO - step: 8545 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2823 global_avg_mtp_loss: 2.1266 +[titan] 2025-10-05 03:49:16,537 - root - INFO - lr: 4.5463e-05 gnorm: 1.19 [ 5:15:07<19:20:00] +[titan] 2025-10-05 03:49:25,283 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:49:27,468 - root - INFO - step: 8550 loss: 2.4984 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2049 +[titan] 2025-10-05 03:49:27,468 - root - INFO - lr: 4.5458e-05 gnorm: 1.21 [ 5:15:18<19:19:49] +[titan] 2025-10-05 03:49:38,338 - root - INFO - step: 8555 loss: 2.4539 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1653 +[titan] 2025-10-05 03:49:38,338 - root - INFO - lr: 4.5452e-05 gnorm: 1.20 [ 5:15:29<19:19:37] +[titan] 2025-10-05 03:49:49,202 - root - INFO - step: 8560 loss: 2.4721 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:49:49,202 - root - INFO - lr: 4.5447e-05 gnorm: 1.17 [ 5:15:40<19:19:25] +[titan] 2025-10-05 03:50:00,074 - root - INFO - step: 8565 loss: 2.5405 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 03:50:00,074 - root - INFO - lr: 4.5442e-05 gnorm: 1.15 [ 5:15:51<19:19:13] +[titan] 2025-10-05 03:50:10,978 - root - INFO - step: 8570 loss: 2.4470 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 03:50:10,979 - root - INFO - lr: 4.5436e-05 gnorm: 1.22 [ 5:16:02<19:19:02] +[titan] 2025-10-05 03:50:21,887 - root - INFO - step: 8575 loss: 2.4633 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1735 +[titan] 2025-10-05 03:50:21,887 - root - INFO - lr: 4.5431e-05 gnorm: 1.21 [ 5:16:12<19:18:50] +[titan] 2025-10-05 03:50:32,776 - root - INFO - step: 8580 loss: 2.4711 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1810 +[titan] 2025-10-05 03:50:32,776 - root - INFO - lr: 4.5425e-05 gnorm: 1.18 [ 5:16:23<19:18:38] +[titan] 2025-10-05 03:50:43,667 - root - INFO - step: 8585 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:50:43,667 - root - INFO - lr: 4.5420e-05 gnorm: 1.22 [ 5:16:34<19:18:27] +[titan] 2025-10-05 03:50:54,557 - root - INFO - step: 8590 loss: 2.5385 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2399 +[titan] 2025-10-05 03:50:54,558 - root - INFO - lr: 4.5415e-05 gnorm: 1.18 [ 5:16:45<19:18:15] +[titan] 2025-10-05 03:51:05,424 - root - INFO - step: 8595 loss: 2.4767 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 03:51:05,425 - root - INFO - lr: 4.5409e-05 gnorm: 1.16 [ 5:16:56<19:18:03] +[titan] 2025-10-05 03:51:14,103 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:51:16,290 - root - INFO - step: 8600 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:51:16,290 - root - INFO - lr: 4.5404e-05 gnorm: 1.14 [ 5:17:07<19:17:51] +[titan] 2025-10-05 03:51:27,250 - root - INFO - step: 8605 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2352 +[titan] 2025-10-05 03:51:27,251 - root - INFO - lr: 4.5398e-05 gnorm: 1.15 [ 5:17:18<19:17:40] +[titan] 2025-10-05 03:51:38,134 - root - INFO - step: 8610 loss: 2.4373 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1515 +[titan] 2025-10-05 03:51:38,134 - root - INFO - lr: 4.5393e-05 gnorm: 1.14 [ 5:17:29<19:17:28] +[titan] 2025-10-05 03:51:49,035 - root - INFO - step: 8615 loss: 2.5154 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2980 global_avg_mtp_loss: 2.2174 +[titan] 2025-10-05 03:51:49,036 - root - INFO - lr: 4.5388e-05 gnorm: 1.21 [ 5:17:40<19:17:17] +[titan] 2025-10-05 03:51:59,908 - root - INFO - step: 8620 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1329 +[titan] 2025-10-05 03:51:59,908 - root - INFO - lr: 4.5382e-05 gnorm: 1.19 [ 5:17:50<19:17:05] +[titan] 2025-10-05 03:52:10,800 - root - INFO - step: 8625 loss: 2.4772 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:52:10,800 - root - INFO - lr: 4.5377e-05 gnorm: 1.19 [ 5:18:01<19:16:53] +[titan] 2025-10-05 03:52:21,724 - root - INFO - step: 8630 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1254 +[titan] 2025-10-05 03:52:21,724 - root - INFO - lr: 4.5371e-05 gnorm: 1.17 [ 5:18:12<19:16:42] +[titan] 2025-10-05 03:52:32,629 - root - INFO - step: 8635 loss: 2.4666 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 03:52:32,629 - root - INFO - lr: 4.5366e-05 gnorm: 1.18 [ 5:18:23<19:16:30] +[titan] 2025-10-05 03:52:43,516 - root - INFO - step: 8640 loss: 2.5035 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:52:43,516 - root - INFO - lr: 4.5360e-05 gnorm: 1.16 [ 5:18:34<19:16:18] +[titan] 2025-10-05 03:52:54,413 - root - INFO - step: 8645 loss: 2.4079 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1260 +[titan] 2025-10-05 03:52:54,414 - root - INFO - lr: 4.5355e-05 gnorm: 1.18 [ 5:18:45<19:16:07] +[titan] 2025-10-05 03:53:03,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:53:05,278 - root - INFO - step: 8650 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:53:05,278 - root - INFO - lr: 4.5350e-05 gnorm: 1.17 [ 5:18:56<19:15:55] +[titan] 2025-10-05 03:53:16,166 - root - INFO - step: 8655 loss: 2.4949 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2017 +[titan] 2025-10-05 03:53:16,166 - root - INFO - lr: 4.5344e-05 gnorm: 1.17 [ 5:19:07<19:15:43] +[titan] 2025-10-05 03:53:27,098 - root - INFO - step: 8660 loss: 2.4590 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1706 +[titan] 2025-10-05 03:53:27,098 - root - INFO - lr: 4.5339e-05 gnorm: 1.20 [ 5:19:18<19:15:32] +[titan] 2025-10-05 03:53:38,012 - root - INFO - step: 8665 loss: 2.5151 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2199 +[titan] 2025-10-05 03:53:38,012 - root - INFO - lr: 4.5333e-05 gnorm: 1.19 [ 5:19:29<19:15:20] +[titan] 2025-10-05 03:53:48,872 - root - INFO - step: 8670 loss: 2.4344 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 03:53:48,872 - root - INFO - lr: 4.5328e-05 gnorm: 1.15 [ 5:19:39<19:15:08] +[titan] 2025-10-05 03:53:59,744 - root - INFO - step: 8675 loss: 2.4632 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1742 +[titan] 2025-10-05 03:53:59,744 - root - INFO - lr: 4.5322e-05 gnorm: 1.17 [ 5:19:50<19:14:56] +[titan] 2025-10-05 03:54:10,610 - root - INFO - step: 8680 loss: 2.4556 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 03:54:10,611 - root - INFO - lr: 4.5317e-05 gnorm: 1.17 [ 5:20:01<19:14:45] +[titan] 2025-10-05 03:54:21,508 - root - INFO - step: 8685 loss: 2.4742 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1837 +[titan] 2025-10-05 03:54:21,508 - root - INFO - lr: 4.5311e-05 gnorm: 1.20 [ 5:20:12<19:14:33] +[titan] 2025-10-05 03:54:32,411 - root - INFO - step: 8690 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2972 global_avg_mtp_loss: 2.2331 +[titan] 2025-10-05 03:54:32,411 - root - INFO - lr: 4.5306e-05 gnorm: 1.22 [ 5:20:23<19:14:21] +[titan] 2025-10-05 03:54:43,289 - root - INFO - step: 8695 loss: 2.4873 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1952 +[titan] 2025-10-05 03:54:43,290 - root - INFO - lr: 4.5301e-05 gnorm: 1.21 [ 5:20:34<19:14:10] +[titan] 2025-10-05 03:54:52,023 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:54:54,213 - root - INFO - step: 8700 loss: 2.4737 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1831 +[titan] 2025-10-05 03:54:54,213 - root - INFO - lr: 4.5295e-05 gnorm: 1.19 [ 5:20:45<19:13:58] +[titan] 2025-10-05 03:55:03,153 - root - INFO - Dumping profiler traces at step 8704 +[titan] 2025-10-05 03:55:03,191 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:55:05,378 - root - INFO - step: 8705 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 29,348 tflops: 407.16 mfu: 41.17% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:55:05,378 - root - INFO - lr: 4.5290e-05 gnorm: 1.17 [ 5:20:56<19:13:47] +[titan] 2025-10-05 03:55:16,259 - root - INFO - step: 8710 loss: 2.3993 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1181 +[titan] 2025-10-05 03:55:16,259 - root - INFO - lr: 4.5284e-05 gnorm: 1.16 [ 5:21:07<19:13:36] +[titan] 2025-10-05 03:55:27,179 - root - INFO - step: 8715 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1461 +[titan] 2025-10-05 03:55:27,179 - root - INFO - lr: 4.5279e-05 gnorm: 1.17 [ 5:21:18<19:13:24] +[titan] 2025-10-05 03:55:38,073 - root - INFO - step: 8720 loss: 2.3963 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 03:55:38,073 - root - INFO - lr: 4.5273e-05 gnorm: 1.24 [ 5:21:29<19:13:12] +[titan] 2025-10-05 03:55:48,962 - root - INFO - step: 8725 loss: 2.4482 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 03:55:48,962 - root - INFO - lr: 4.5268e-05 gnorm: 1.19 [ 5:21:39<19:13:01] +[titan] 2025-10-05 03:55:59,898 - root - INFO - step: 8730 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:55:59,898 - root - INFO - lr: 4.5262e-05 gnorm: 1.18 [ 5:21:50<19:12:49] +[titan] 2025-10-05 03:56:10,791 - root - INFO - step: 8735 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:56:10,791 - root - INFO - lr: 4.5257e-05 gnorm: 1.13 [ 5:22:01<19:12:38] +[titan] 2025-10-05 03:56:21,690 - root - INFO - step: 8740 loss: 2.5138 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:56:21,690 - root - INFO - lr: 4.5251e-05 gnorm: 1.17 [ 5:22:12<19:12:26] +[titan] 2025-10-05 03:56:32,598 - root - INFO - step: 8745 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:56:32,598 - root - INFO - lr: 4.5246e-05 gnorm: 1.21 [ 5:22:23<19:12:14] +[titan] 2025-10-05 03:56:41,299 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:56:43,483 - root - INFO - step: 8750 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 03:56:43,483 - root - INFO - lr: 4.5240e-05 gnorm: 1.23 [ 5:22:34<19:12:03] +[titan] 2025-10-05 03:56:54,343 - root - INFO - step: 8755 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1820 +[titan] 2025-10-05 03:56:54,343 - root - INFO - lr: 4.5235e-05 gnorm: 1.20 [ 5:22:45<19:11:51] +[titan] 2025-10-05 03:57:05,209 - root - INFO - step: 8760 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:57:05,209 - root - INFO - lr: 4.5229e-05 gnorm: 1.14 [ 5:22:56<19:11:39] +[titan] 2025-10-05 03:57:16,152 - root - INFO - step: 8765 loss: 2.5128 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.44 mfu: 42.01% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:57:16,152 - root - INFO - lr: 4.5224e-05 gnorm: 1.17 [ 5:23:07<19:11:28] +[titan] 2025-10-05 03:57:27,083 - root - INFO - step: 8770 loss: 2.4066 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 03:57:27,083 - root - INFO - lr: 4.5218e-05 gnorm: 1.11 [ 5:23:18<19:11:16] +[titan] 2025-10-05 03:57:37,931 - root - INFO - step: 8775 loss: 2.4260 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 03:57:37,932 - root - INFO - lr: 4.5213e-05 gnorm: 1.17 [ 5:23:28<19:11:04] +[titan] 2025-10-05 03:57:48,805 - root - INFO - step: 8780 loss: 2.4759 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1847 +[titan] 2025-10-05 03:57:48,805 - root - INFO - lr: 4.5207e-05 gnorm: 1.24 [ 5:23:39<19:10:53] +[titan] 2025-10-05 03:57:59,678 - root - INFO - step: 8785 loss: 2.4875 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:57:59,678 - root - INFO - lr: 4.5202e-05 gnorm: 1.16 [ 5:23:50<19:10:41] +[titan] 2025-10-05 03:58:10,559 - root - INFO - step: 8790 loss: 2.4424 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:58:10,559 - root - INFO - lr: 4.5196e-05 gnorm: 1.16 [ 5:24:01<19:10:29] +[titan] 2025-10-05 03:58:21,459 - root - INFO - step: 8795 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1293 +[titan] 2025-10-05 03:58:21,459 - root - INFO - lr: 4.5191e-05 gnorm: 1.13 [ 5:24:12<19:10:18] +[titan] 2025-10-05 03:58:30,178 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:58:32,360 - root - INFO - step: 8800 loss: 2.3926 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1119 +[titan] 2025-10-05 03:58:32,360 - root - INFO - lr: 4.5185e-05 gnorm: 1.16 [ 5:24:23<19:10:06] +[titan] 2025-10-05 03:58:43,220 - root - INFO - step: 8805 loss: 2.5057 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2110 +[titan] 2025-10-05 03:58:43,221 - root - INFO - lr: 4.5180e-05 gnorm: 1.16 [ 5:24:34<19:09:54] +[titan] 2025-10-05 03:58:54,092 - root - INFO - step: 8810 loss: 2.4643 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:58:54,092 - root - INFO - lr: 4.5174e-05 gnorm: 1.21 [ 5:24:45<19:09:42] +[titan] 2025-10-05 03:59:04,956 - root - INFO - step: 8815 loss: 2.5184 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2227 +[titan] 2025-10-05 03:59:04,956 - root - INFO - lr: 4.5169e-05 gnorm: 1.20 [ 5:24:55<19:09:31] +[titan] 2025-10-05 03:59:15,807 - root - INFO - step: 8820 loss: 2.3921 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 03:59:15,807 - root - INFO - lr: 4.5163e-05 gnorm: 1.12 [ 5:25:06<19:09:19] +[titan] 2025-10-05 03:59:26,817 - root - INFO - step: 8825 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.90 mfu: 41.75% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1453 +[titan] 2025-10-05 03:59:26,817 - root - INFO - lr: 4.5158e-05 gnorm: 1.14 [ 5:25:17<19:09:08] +[titan] 2025-10-05 03:59:37,700 - root - INFO - step: 8830 loss: 2.4161 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 03:59:37,700 - root - INFO - lr: 4.5152e-05 gnorm: 1.17 [ 5:25:28<19:08:56] +[titan] 2025-10-05 03:59:48,610 - root - INFO - step: 8835 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:59:48,610 - root - INFO - lr: 4.5147e-05 gnorm: 1.20 [ 5:25:39<19:08:44] +[titan] 2025-10-05 03:59:59,499 - root - INFO - step: 8840 loss: 2.4555 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 03:59:59,499 - root - INFO - lr: 4.5141e-05 gnorm: 1.16 [ 5:25:50<19:08:33] +[titan] 2025-10-05 04:00:10,375 - root - INFO - step: 8845 loss: 2.5058 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2957 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 04:00:10,376 - root - INFO - lr: 4.5136e-05 gnorm: 1.15 [ 5:26:01<19:08:21] +[titan] 2025-10-05 04:00:19,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:00:21,274 - root - INFO - step: 8850 loss: 2.4134 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:00:21,274 - root - INFO - lr: 4.5130e-05 gnorm: 1.16 [ 5:26:12<19:08:09] +[titan] 2025-10-05 04:00:32,174 - root - INFO - step: 8855 loss: 2.3939 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1130 +[titan] 2025-10-05 04:00:32,174 - root - INFO - lr: 4.5124e-05 gnorm: 1.14 [ 5:26:23<19:07:58] +[titan] 2025-10-05 04:00:43,105 - root - INFO - step: 8860 loss: 2.4901 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.1965 +[titan] 2025-10-05 04:00:43,105 - root - INFO - lr: 4.5119e-05 gnorm: 1.13 [ 5:26:34<19:07:46] +[titan] 2025-10-05 04:00:53,982 - root - INFO - step: 8865 loss: 2.4318 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1455 +[titan] 2025-10-05 04:00:53,982 - root - INFO - lr: 4.5113e-05 gnorm: 1.20 [ 5:26:44<19:07:35] +[titan] 2025-10-05 04:01:04,884 - root - INFO - step: 8870 loss: 2.4552 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 04:01:04,884 - root - INFO - lr: 4.5108e-05 gnorm: 1.17 [ 5:26:55<19:07:23] +[titan] 2025-10-05 04:01:15,755 - root - INFO - step: 8875 loss: 2.4361 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1505 +[titan] 2025-10-05 04:01:15,755 - root - INFO - lr: 4.5102e-05 gnorm: 1.11 [ 5:27:06<19:07:11] +[titan] 2025-10-05 04:01:26,620 - root - INFO - step: 8880 loss: 2.4652 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 04:01:26,621 - root - INFO - lr: 4.5097e-05 gnorm: 1.18 [ 5:27:17<19:07:00] +[titan] 2025-10-05 04:01:37,500 - root - INFO - step: 8885 loss: 2.4777 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1862 +[titan] 2025-10-05 04:01:37,500 - root - INFO - lr: 4.5091e-05 gnorm: 1.16 [ 5:27:28<19:06:48] +[titan] 2025-10-05 04:01:48,415 - root - INFO - step: 8890 loss: 2.4058 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:01:48,416 - root - INFO - lr: 4.5086e-05 gnorm: 1.17 [ 5:27:39<19:06:36] +[titan] 2025-10-05 04:01:59,279 - root - INFO - step: 8895 loss: 2.4655 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1759 +[titan] 2025-10-05 04:01:59,280 - root - INFO - lr: 4.5080e-05 gnorm: 1.19 [ 5:27:50<19:06:25] +[titan] 2025-10-05 04:02:07,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:02:10,126 - root - INFO - step: 8900 loss: 2.4494 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:02:10,126 - root - INFO - lr: 4.5074e-05 gnorm: 1.24 [ 5:28:01<19:06:13] +[titan] 2025-10-05 04:02:20,976 - root - INFO - step: 8905 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 04:02:20,976 - root - INFO - lr: 4.5069e-05 gnorm: 1.18 [ 5:28:11<19:06:01] +[titan] 2025-10-05 04:02:31,857 - root - INFO - step: 8910 loss: 2.4530 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1646 +[titan] 2025-10-05 04:02:31,857 - root - INFO - lr: 4.5063e-05 gnorm: 1.18 [ 5:28:22<19:05:49] +[titan] 2025-10-05 04:02:42,714 - root - INFO - step: 8915 loss: 2.4292 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:02:42,714 - root - INFO - lr: 4.5058e-05 gnorm: 1.18 [ 5:28:33<19:05:38] +[titan] 2025-10-05 04:02:53,586 - root - INFO - step: 8920 loss: 2.4665 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 04:02:53,586 - root - INFO - lr: 4.5052e-05 gnorm: 1.14 [ 5:28:44<19:05:26] +[titan] 2025-10-05 04:03:04,511 - root - INFO - step: 8925 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1484 +[titan] 2025-10-05 04:03:04,511 - root - INFO - lr: 4.5047e-05 gnorm: 1.20 [ 5:28:55<19:05:14] +[titan] 2025-10-05 04:03:15,417 - root - INFO - step: 8930 loss: 2.5325 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 04:03:15,417 - root - INFO - lr: 4.5041e-05 gnorm: 1.18 [ 5:29:06<19:05:03] +[titan] 2025-10-05 04:03:26,302 - root - INFO - step: 8935 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:03:26,303 - root - INFO - lr: 4.5035e-05 gnorm: 1.21 [ 5:29:17<19:04:51] +[titan] 2025-10-05 04:03:37,172 - root - INFO - step: 8940 loss: 2.6656 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3417 +[titan] 2025-10-05 04:03:37,172 - root - INFO - lr: 4.5030e-05 gnorm: 1.16 [ 5:29:28<19:04:40] +[titan] 2025-10-05 04:03:48,057 - root - INFO - step: 8945 loss: 2.4401 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1543 +[titan] 2025-10-05 04:03:48,057 - root - INFO - lr: 4.5024e-05 gnorm: 1.12 [ 5:29:39<19:04:28] +[titan] 2025-10-05 04:03:56,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:03:58,919 - root - INFO - step: 8950 loss: 2.4061 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1253 +[titan] 2025-10-05 04:03:58,919 - root - INFO - lr: 4.5019e-05 gnorm: 1.11 [ 5:29:49<19:04:16] +[titan] 2025-10-05 04:04:09,819 - root - INFO - step: 8955 loss: 2.4957 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 04:04:09,820 - root - INFO - lr: 4.5013e-05 gnorm: 1.12 [ 5:30:00<19:04:05] +[titan] 2025-10-05 04:04:20,693 - root - INFO - step: 8960 loss: 2.4047 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1227 +[titan] 2025-10-05 04:04:20,693 - root - INFO - lr: 4.5007e-05 gnorm: 1.15 [ 5:30:11<19:03:53] +[titan] 2025-10-05 04:04:31,580 - root - INFO - step: 8965 loss: 2.4637 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1740 +[titan] 2025-10-05 04:04:31,580 - root - INFO - lr: 4.5002e-05 gnorm: 1.15 [ 5:30:22<19:03:41] +[titan] 2025-10-05 04:04:42,434 - root - INFO - step: 8970 loss: 2.4642 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 04:04:42,434 - root - INFO - lr: 4.4996e-05 gnorm: 1.19 [ 5:30:33<19:03:29] +[titan] 2025-10-05 04:04:53,298 - root - INFO - step: 8975 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 04:04:53,298 - root - INFO - lr: 4.4991e-05 gnorm: 1.20 [ 5:30:44<19:03:18] +[titan] 2025-10-05 04:05:04,159 - root - INFO - step: 8980 loss: 2.4094 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1265 +[titan] 2025-10-05 04:05:04,159 - root - INFO - lr: 4.4985e-05 gnorm: 1.14 [ 5:30:55<19:03:06] +[titan] 2025-10-05 04:05:15,056 - root - INFO - step: 8985 loss: 2.4593 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1707 +[titan] 2025-10-05 04:05:15,057 - root - INFO - lr: 4.4979e-05 gnorm: 1.20 [ 5:31:06<19:02:54] +[titan] 2025-10-05 04:05:25,930 - root - INFO - step: 8990 loss: 2.3911 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 04:05:25,930 - root - INFO - lr: 4.4974e-05 gnorm: 1.15 [ 5:31:16<19:02:43] +[titan] 2025-10-05 04:05:36,798 - root - INFO - step: 8995 loss: 2.4428 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1566 +[titan] 2025-10-05 04:05:36,798 - root - INFO - lr: 4.4968e-05 gnorm: 1.17 [ 5:31:27<19:02:31] +[titan] 2025-10-05 04:05:45,469 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:05:47,651 - root - INFO - step: 9000 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0801 +[titan] 2025-10-05 04:05:47,652 - root - INFO - lr: 4.4962e-05 gnorm: 1.23 [ 5:31:38<19:02:19] +[titan] 2025-10-05 04:05:58,519 - root - INFO - step: 9005 loss: 2.4431 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1560 +[titan] 2025-10-05 04:05:58,519 - root - INFO - lr: 4.4957e-05 gnorm: 1.17 [ 5:31:49<19:02:08] +[titan] 2025-10-05 04:06:09,392 - root - INFO - step: 9010 loss: 2.4584 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1697 +[titan] 2025-10-05 04:06:09,392 - root - INFO - lr: 4.4951e-05 gnorm: 1.17 [ 5:32:00<19:01:56] +[titan] 2025-10-05 04:06:20,257 - root - INFO - step: 9015 loss: 2.4693 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1794 +[titan] 2025-10-05 04:06:20,257 - root - INFO - lr: 4.4946e-05 gnorm: 1.13 [ 5:32:11<19:01:44] +[titan] 2025-10-05 04:06:31,158 - root - INFO - step: 9020 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1487 +[titan] 2025-10-05 04:06:31,158 - root - INFO - lr: 4.4940e-05 gnorm: 1.15 [ 5:32:22<19:01:33] +[titan] 2025-10-05 04:06:42,018 - root - INFO - step: 9025 loss: 2.3968 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:06:42,018 - root - INFO - lr: 4.4934e-05 gnorm: 1.11 [ 5:32:33<19:01:21] +[titan] 2025-10-05 04:06:52,886 - root - INFO - step: 9030 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:06:52,886 - root - INFO - lr: 4.4929e-05 gnorm: 1.08 [ 5:32:43<19:01:09] +[titan] 2025-10-05 04:07:03,747 - root - INFO - step: 9035 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 04:07:03,747 - root - INFO - lr: 4.4923e-05 gnorm: 1.16 [ 5:32:54<19:00:57] +[titan] 2025-10-05 04:07:14,610 - root - INFO - step: 9040 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1859 +[titan] 2025-10-05 04:07:14,610 - root - INFO - lr: 4.4917e-05 gnorm: 1.17 [ 5:33:05<19:00:46] +[titan] 2025-10-05 04:07:25,476 - root - INFO - step: 9045 loss: 2.4520 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1641 +[titan] 2025-10-05 04:07:25,476 - root - INFO - lr: 4.4912e-05 gnorm: 1.19 [ 5:33:16<19:00:34] +[titan] 2025-10-05 04:07:34,201 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:07:36,379 - root - INFO - step: 9050 loss: 2.4771 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:07:36,379 - root - INFO - lr: 4.4906e-05 gnorm: 1.19 [ 5:33:27<19:00:22] +[titan] 2025-10-05 04:07:47,258 - root - INFO - step: 9055 loss: 2.4168 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1339 +[titan] 2025-10-05 04:07:47,258 - root - INFO - lr: 4.4900e-05 gnorm: 1.14 [ 5:33:38<19:00:11] +[titan] 2025-10-05 04:07:58,123 - root - INFO - step: 9060 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:07:58,123 - root - INFO - lr: 4.4895e-05 gnorm: 1.16 [ 5:33:49<18:59:59] +[titan] 2025-10-05 04:08:09,003 - root - INFO - step: 9065 loss: 2.4858 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2911 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:08:09,003 - root - INFO - lr: 4.4889e-05 gnorm: 1.18 [ 5:33:59<18:59:47] +[titan] 2025-10-05 04:08:19,858 - root - INFO - step: 9070 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 04:08:19,858 - root - INFO - lr: 4.4883e-05 gnorm: 1.18 [ 5:34:10<18:59:36] +[titan] 2025-10-05 04:08:30,739 - root - INFO - step: 9075 loss: 2.4338 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1483 +[titan] 2025-10-05 04:08:30,739 - root - INFO - lr: 4.4878e-05 gnorm: 1.16 [ 5:34:21<18:59:24] +[titan] 2025-10-05 04:08:41,605 - root - INFO - step: 9080 loss: 2.3786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 04:08:41,605 - root - INFO - lr: 4.4872e-05 gnorm: 1.24 [ 5:34:32<18:59:12] +[titan] 2025-10-05 04:08:52,482 - root - INFO - step: 9085 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2055 +[titan] 2025-10-05 04:08:52,482 - root - INFO - lr: 4.4866e-05 gnorm: 1.20 [ 5:34:43<18:59:01] +[titan] 2025-10-05 04:09:03,324 - root - INFO - step: 9090 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1628 +[titan] 2025-10-05 04:09:03,325 - root - INFO - lr: 4.4861e-05 gnorm: 1.24 [ 5:34:54<18:58:49] +[titan] 2025-10-05 04:09:14,169 - root - INFO - step: 9095 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1886 +[titan] 2025-10-05 04:09:14,169 - root - INFO - lr: 4.4855e-05 gnorm: 1.21 [ 5:35:05<18:58:37] +[titan] 2025-10-05 04:09:22,830 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:09:25,022 - root - INFO - step: 9100 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:09:25,022 - root - INFO - lr: 4.4849e-05 gnorm: 1.19 [ 5:35:16<18:58:25] +[titan] 2025-10-05 04:09:35,891 - root - INFO - step: 9105 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1300 +[titan] 2025-10-05 04:09:35,891 - root - INFO - lr: 4.4844e-05 gnorm: 1.18 [ 5:35:26<18:58:14] +[titan] 2025-10-05 04:09:46,754 - root - INFO - step: 9110 loss: 2.3843 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1029 +[titan] 2025-10-05 04:09:46,754 - root - INFO - lr: 4.4838e-05 gnorm: 1.28 [ 5:35:37<18:58:02] +[titan] 2025-10-05 04:09:57,624 - root - INFO - step: 9115 loss: 2.4036 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1228 +[titan] 2025-10-05 04:09:57,624 - root - INFO - lr: 4.4832e-05 gnorm: 1.19 [ 5:35:48<18:57:50] +[titan] 2025-10-05 04:10:08,470 - root - INFO - step: 9120 loss: 2.4158 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1325 +[titan] 2025-10-05 04:10:08,470 - root - INFO - lr: 4.4827e-05 gnorm: 1.14 [ 5:35:59<18:57:39] +[titan] 2025-10-05 04:10:19,323 - root - INFO - step: 9125 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 04:10:19,323 - root - INFO - lr: 4.4821e-05 gnorm: 1.19 [ 5:36:10<18:57:27] +[titan] 2025-10-05 04:10:30,178 - root - INFO - step: 9130 loss: 2.4437 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 04:10:30,178 - root - INFO - lr: 4.4815e-05 gnorm: 1.22 [ 5:36:21<18:57:15] +[titan] 2025-10-05 04:10:41,058 - root - INFO - step: 9135 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1518 +[titan] 2025-10-05 04:10:41,058 - root - INFO - lr: 4.4809e-05 gnorm: 1.15 [ 5:36:32<18:57:04] +[titan] 2025-10-05 04:10:51,913 - root - INFO - step: 9140 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:10:51,913 - root - INFO - lr: 4.4804e-05 gnorm: 1.14 [ 5:36:42<18:56:52] +[titan] 2025-10-05 04:11:02,801 - root - INFO - step: 9145 loss: 2.4160 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1331 +[titan] 2025-10-05 04:11:02,801 - root - INFO - lr: 4.4798e-05 gnorm: 1.18 [ 5:36:53<18:56:40] +[titan] 2025-10-05 04:11:11,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:11:13,643 - root - INFO - step: 9150 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1327 +[titan] 2025-10-05 04:11:13,643 - root - INFO - lr: 4.4792e-05 gnorm: 1.15 [ 5:37:04<18:56:29] +[titan] 2025-10-05 04:11:24,500 - root - INFO - step: 9155 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1789 +[titan] 2025-10-05 04:11:24,500 - root - INFO - lr: 4.4787e-05 gnorm: 1.16 [ 5:37:15<18:56:17] +[titan] 2025-10-05 04:11:35,333 - root - INFO - step: 9160 loss: 2.4173 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1345 +[titan] 2025-10-05 04:11:35,333 - root - INFO - lr: 4.4781e-05 gnorm: 1.15 [ 5:37:26<18:56:05] +[titan] 2025-10-05 04:11:46,194 - root - INFO - step: 9165 loss: 2.4180 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 04:11:46,194 - root - INFO - lr: 4.4775e-05 gnorm: 1.13 [ 5:37:37<18:55:53] +[titan] 2025-10-05 04:11:57,056 - root - INFO - step: 9170 loss: 2.3989 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 04:11:57,057 - root - INFO - lr: 4.4769e-05 gnorm: 1.15 [ 5:37:48<18:55:42] +[titan] 2025-10-05 04:12:07,928 - root - INFO - step: 9175 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 04:12:07,928 - root - INFO - lr: 4.4764e-05 gnorm: 1.11 [ 5:37:58<18:55:30] +[titan] 2025-10-05 04:12:18,847 - root - INFO - step: 9180 loss: 2.5568 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2549 +[titan] 2025-10-05 04:12:18,847 - root - INFO - lr: 4.4758e-05 gnorm: 1.20 [ 5:38:09<18:55:18] +[titan] 2025-10-05 04:12:29,719 - root - INFO - step: 9185 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1545 +[titan] 2025-10-05 04:12:29,719 - root - INFO - lr: 4.4752e-05 gnorm: 1.16 [ 5:38:20<18:55:07] +[titan] 2025-10-05 04:12:40,611 - root - INFO - step: 9190 loss: 2.3798 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.1027 +[titan] 2025-10-05 04:12:40,611 - root - INFO - lr: 4.4747e-05 gnorm: 1.15 [ 5:38:31<18:54:55] +[titan] 2025-10-05 04:12:51,477 - root - INFO - step: 9195 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1621 +[titan] 2025-10-05 04:12:51,477 - root - INFO - lr: 4.4741e-05 gnorm: 1.15 [ 5:38:42<18:54:44] +[titan] 2025-10-05 04:13:00,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:13:02,353 - root - INFO - step: 9200 loss: 2.4374 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1525 +[titan] 2025-10-05 04:13:02,353 - root - INFO - lr: 4.4735e-05 gnorm: 1.20 [ 5:38:53<18:54:32] +[titan] 2025-10-05 04:13:13,230 - root - INFO - step: 9205 loss: 2.4854 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1940 +[titan] 2025-10-05 04:13:13,230 - root - INFO - lr: 4.4729e-05 gnorm: 1.22 [ 5:39:04<18:54:20] +[titan] 2025-10-05 04:13:24,132 - root - INFO - step: 9210 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:13:24,132 - root - INFO - lr: 4.4724e-05 gnorm: 1.19 [ 5:39:15<18:54:09] +[titan] 2025-10-05 04:13:35,087 - root - INFO - step: 9215 loss: 2.4851 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1930 +[titan] 2025-10-05 04:13:35,087 - root - INFO - lr: 4.4718e-05 gnorm: 1.15 [ 5:39:26<18:53:57] +[titan] 2025-10-05 04:13:37,435 - root - INFO - Dumping profiler traces at step 9216 +[titan] 2025-10-05 04:13:37,472 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:13:46,201 - root - INFO - step: 9220 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 29,486 tflops: 409.07 mfu: 41.36% global_avg_ntp_loss: 0.2928 global_avg_mtp_loss: 2.2073 +[titan] 2025-10-05 04:13:46,201 - root - INFO - lr: 4.4712e-05 gnorm: 1.17 [ 5:39:37<18:53:47] +[titan] 2025-10-05 04:13:57,080 - root - INFO - step: 9225 loss: 2.3856 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1071 +[titan] 2025-10-05 04:13:57,080 - root - INFO - lr: 4.4706e-05 gnorm: 1.15 [ 5:39:48<18:53:35] +[titan] 2025-10-05 04:14:07,953 - root - INFO - step: 9230 loss: 2.4302 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1452 +[titan] 2025-10-05 04:14:07,953 - root - INFO - lr: 4.4701e-05 gnorm: 1.15 [ 5:39:58<18:53:23] +[titan] 2025-10-05 04:14:18,819 - root - INFO - step: 9235 loss: 2.4502 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1635 +[titan] 2025-10-05 04:14:18,819 - root - INFO - lr: 4.4695e-05 gnorm: 1.22 [ 5:40:09<18:53:12] +[titan] 2025-10-05 04:14:29,678 - root - INFO - step: 9240 loss: 2.4452 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1584 +[titan] 2025-10-05 04:14:29,678 - root - INFO - lr: 4.4689e-05 gnorm: 1.17 [ 5:40:20<18:53:00] +[titan] 2025-10-05 04:14:40,618 - root - INFO - step: 9245 loss: 2.4345 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1494 +[titan] 2025-10-05 04:14:40,618 - root - INFO - lr: 4.4683e-05 gnorm: 1.14 [ 5:40:31<18:52:48] +[titan] 2025-10-05 04:14:49,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:14:51,484 - root - INFO - step: 9250 loss: 2.5104 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 04:14:51,484 - root - INFO - lr: 4.4678e-05 gnorm: 1.18 [ 5:40:42<18:52:37] +[titan] 2025-10-05 04:15:02,363 - root - INFO - step: 9255 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1304 +[titan] 2025-10-05 04:15:02,363 - root - INFO - lr: 4.4672e-05 gnorm: 1.16 [ 5:40:53<18:52:25] +[titan] 2025-10-05 04:15:13,235 - root - INFO - step: 9260 loss: 2.4511 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:15:13,236 - root - INFO - lr: 4.4666e-05 gnorm: 1.17 [ 5:41:04<18:52:14] +[titan] 2025-10-05 04:15:24,134 - root - INFO - step: 9265 loss: 2.5208 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2247 +[titan] 2025-10-05 04:15:24,134 - root - INFO - lr: 4.4660e-05 gnorm: 1.12 [ 5:41:15<18:52:02] +[titan] 2025-10-05 04:15:35,014 - root - INFO - step: 9270 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1064 +[titan] 2025-10-05 04:15:35,015 - root - INFO - lr: 4.4655e-05 gnorm: 1.14 [ 5:41:25<18:51:50] +[titan] 2025-10-05 04:15:45,940 - root - INFO - step: 9275 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2155 +[titan] 2025-10-05 04:15:45,940 - root - INFO - lr: 4.4649e-05 gnorm: 3.57 [ 5:41:36<18:51:39] +[titan] 2025-10-05 04:15:56,816 - root - INFO - step: 9280 loss: 2.4602 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1702 +[titan] 2025-10-05 04:15:56,816 - root - INFO - lr: 4.4643e-05 gnorm: 1.17 [ 5:41:47<18:51:27] +[titan] 2025-10-05 04:16:07,687 - root - INFO - step: 9285 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1597 +[titan] 2025-10-05 04:16:07,687 - root - INFO - lr: 4.4637e-05 gnorm: 1.16 [ 5:41:58<18:51:16] +[titan] 2025-10-05 04:16:18,550 - root - INFO - step: 9290 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1896 +[titan] 2025-10-05 04:16:18,550 - root - INFO - lr: 4.4631e-05 gnorm: 1.19 [ 5:42:09<18:51:04] +[titan] 2025-10-05 04:16:29,436 - root - INFO - step: 9295 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1040 +[titan] 2025-10-05 04:16:29,436 - root - INFO - lr: 4.4626e-05 gnorm: 1.23 [ 5:42:20<18:50:52] +[titan] 2025-10-05 04:16:38,130 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:16:40,321 - root - INFO - step: 9300 loss: 2.4653 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:16:40,321 - root - INFO - lr: 4.4620e-05 gnorm: 1.12 [ 5:42:31<18:50:41] +[titan] 2025-10-05 04:16:51,231 - root - INFO - step: 9305 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:16:51,231 - root - INFO - lr: 4.4614e-05 gnorm: 1.15 [ 5:42:42<18:50:29] +[titan] 2025-10-05 04:17:02,103 - root - INFO - step: 9310 loss: 2.4882 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1966 +[titan] 2025-10-05 04:17:02,103 - root - INFO - lr: 4.4608e-05 gnorm: 1.14 [ 5:42:53<18:50:18] +[titan] 2025-10-05 04:17:13,000 - root - INFO - step: 9315 loss: 2.4906 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1975 +[titan] 2025-10-05 04:17:13,000 - root - INFO - lr: 4.4602e-05 gnorm: 1.19 [ 5:43:03<18:50:06] +[titan] 2025-10-05 04:17:23,889 - root - INFO - step: 9320 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:17:23,889 - root - INFO - lr: 4.4597e-05 gnorm: 1.23 [ 5:43:14<18:49:55] +[titan] 2025-10-05 04:17:34,759 - root - INFO - step: 9325 loss: 2.4923 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2926 global_avg_mtp_loss: 2.1997 +[titan] 2025-10-05 04:17:34,759 - root - INFO - lr: 4.4591e-05 gnorm: 1.20 [ 5:43:25<18:49:43] +[titan] 2025-10-05 04:17:45,670 - root - INFO - step: 9330 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1813 +[titan] 2025-10-05 04:17:45,670 - root - INFO - lr: 4.4585e-05 gnorm: 1.15 [ 5:43:36<18:49:31] +[titan] 2025-10-05 04:17:56,531 - root - INFO - step: 9335 loss: 2.5353 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:17:56,531 - root - INFO - lr: 4.4579e-05 gnorm: 1.15 [ 5:43:47<18:49:20] +[titan] 2025-10-05 04:18:07,423 - root - INFO - step: 9340 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 04:18:07,423 - root - INFO - lr: 4.4573e-05 gnorm: 1.22 [ 5:43:58<18:49:08] +[titan] 2025-10-05 04:18:18,296 - root - INFO - step: 9345 loss: 2.4834 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1917 +[titan] 2025-10-05 04:18:18,296 - root - INFO - lr: 4.4568e-05 gnorm: 1.16 [ 5:44:09<18:48:57] +[titan] 2025-10-05 04:18:27,002 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:18:29,196 - root - INFO - step: 9350 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:18:29,196 - root - INFO - lr: 4.4562e-05 gnorm: 1.12 [ 5:44:20<18:48:45] +[titan] 2025-10-05 04:18:40,056 - root - INFO - step: 9355 loss: 2.4321 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1466 +[titan] 2025-10-05 04:18:40,056 - root - INFO - lr: 4.4556e-05 gnorm: 1.12 [ 5:44:31<18:48:33] +[titan] 2025-10-05 04:18:50,968 - root - INFO - step: 9360 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2060 +[titan] 2025-10-05 04:18:50,968 - root - INFO - lr: 4.4550e-05 gnorm: 1.14 [ 5:44:41<18:48:22] +[titan] 2025-10-05 04:19:01,819 - root - INFO - step: 9365 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1401 +[titan] 2025-10-05 04:19:01,819 - root - INFO - lr: 4.4544e-05 gnorm: 1.14 [ 5:44:52<18:48:10] +[titan] 2025-10-05 04:19:12,717 - root - INFO - step: 9370 loss: 2.5021 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 04:19:12,718 - root - INFO - lr: 4.4538e-05 gnorm: 1.13 [ 5:45:03<18:47:59] +[titan] 2025-10-05 04:19:23,592 - root - INFO - step: 9375 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 04:19:23,592 - root - INFO - lr: 4.4533e-05 gnorm: 1.15 [ 5:45:14<18:47:47] +[titan] 2025-10-05 04:19:34,464 - root - INFO - step: 9380 loss: 2.4564 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1688 +[titan] 2025-10-05 04:19:34,465 - root - INFO - lr: 4.4527e-05 gnorm: 1.21 [ 5:45:25<18:47:35] +[titan] 2025-10-05 04:19:45,394 - root - INFO - step: 9385 loss: 2.4197 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1348 +[titan] 2025-10-05 04:19:45,394 - root - INFO - lr: 4.4521e-05 gnorm: 1.16 [ 5:45:36<18:47:24] +[titan] 2025-10-05 04:19:56,282 - root - INFO - step: 9390 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:19:56,282 - root - INFO - lr: 4.4515e-05 gnorm: 1.15 [ 5:45:47<18:47:12] +[titan] 2025-10-05 04:20:07,169 - root - INFO - step: 9395 loss: 2.4327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1475 +[titan] 2025-10-05 04:20:07,169 - root - INFO - lr: 4.4509e-05 gnorm: 1.21 [ 5:45:58<18:47:01] +[titan] 2025-10-05 04:20:15,874 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:20:18,060 - root - INFO - step: 9400 loss: 2.5009 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2079 +[titan] 2025-10-05 04:20:18,061 - root - INFO - lr: 4.4503e-05 gnorm: 1.18 [ 5:46:09<18:46:49] +[titan] 2025-10-05 04:20:28,965 - root - INFO - step: 9405 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1469 +[titan] 2025-10-05 04:20:28,966 - root - INFO - lr: 4.4498e-05 gnorm: 1.14 [ 5:46:19<18:46:38] +[titan] 2025-10-05 04:20:39,882 - root - INFO - step: 9410 loss: 2.4983 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 04:20:39,882 - root - INFO - lr: 4.4492e-05 gnorm: 1.20 [ 5:46:30<18:46:26] +[titan] 2025-10-05 04:20:50,800 - root - INFO - step: 9415 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:20:50,800 - root - INFO - lr: 4.4486e-05 gnorm: 1.13 [ 5:46:41<18:46:15] +[titan] 2025-10-05 04:21:01,668 - root - INFO - step: 9420 loss: 2.3688 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0904 +[titan] 2025-10-05 04:21:01,668 - root - INFO - lr: 4.4480e-05 gnorm: 1.16 [ 5:46:52<18:46:03] +[titan] 2025-10-05 04:21:12,542 - root - INFO - step: 9425 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 04:21:12,543 - root - INFO - lr: 4.4474e-05 gnorm: 1.16 [ 5:47:03<18:45:52] +[titan] 2025-10-05 04:21:23,412 - root - INFO - step: 9430 loss: 2.4415 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1551 +[titan] 2025-10-05 04:21:23,412 - root - INFO - lr: 4.4468e-05 gnorm: 1.20 [ 5:47:14<18:45:40] +[titan] 2025-10-05 04:21:34,322 - root - INFO - step: 9435 loss: 2.3669 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 04:21:34,322 - root - INFO - lr: 4.4462e-05 gnorm: 1.10 [ 5:47:25<18:45:28] +[titan] 2025-10-05 04:21:45,197 - root - INFO - step: 9440 loss: 2.3883 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1070 +[titan] 2025-10-05 04:21:45,197 - root - INFO - lr: 4.4457e-05 gnorm: 1.17 [ 5:47:36<18:45:17] +[titan] 2025-10-05 04:21:56,142 - root - INFO - step: 9445 loss: 2.4394 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1537 +[titan] 2025-10-05 04:21:56,142 - root - INFO - lr: 4.4451e-05 gnorm: 1.15 [ 5:47:47<18:45:05] +[titan] 2025-10-05 04:22:04,823 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:22:07,011 - root - INFO - step: 9450 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1302 +[titan] 2025-10-05 04:22:07,011 - root - INFO - lr: 4.4445e-05 gnorm: 1.11 [ 5:47:57<18:44:54] +[titan] 2025-10-05 04:22:17,891 - root - INFO - step: 9455 loss: 2.4826 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1916 +[titan] 2025-10-05 04:22:17,891 - root - INFO - lr: 4.4439e-05 gnorm: 1.14 [ 5:48:08<18:44:42] +[titan] 2025-10-05 04:22:28,768 - root - INFO - step: 9460 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 04:22:28,768 - root - INFO - lr: 4.4433e-05 gnorm: 1.12 [ 5:48:19<18:44:31] +[titan] 2025-10-05 04:22:39,662 - root - INFO - step: 9465 loss: 2.4758 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:22:39,663 - root - INFO - lr: 4.4427e-05 gnorm: 1.12 [ 5:48:30<18:44:19] +[titan] 2025-10-05 04:22:50,623 - root - INFO - step: 9470 loss: 2.4549 memory: 118.84GiB(85.28%) tps: 29,899 tflops: 414.80 mfu: 41.94% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1672 +[titan] 2025-10-05 04:22:50,623 - root - INFO - lr: 4.4421e-05 gnorm: 1.19 [ 5:48:41<18:44:08] +[titan] 2025-10-05 04:23:01,499 - root - INFO - step: 9475 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1431 +[titan] 2025-10-05 04:23:01,500 - root - INFO - lr: 4.4415e-05 gnorm: 1.12 [ 5:48:52<18:43:56] +[titan] 2025-10-05 04:23:12,360 - root - INFO - step: 9480 loss: 2.4464 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1579 +[titan] 2025-10-05 04:23:12,361 - root - INFO - lr: 4.4410e-05 gnorm: 1.18 [ 5:49:03<18:43:45] +[titan] 2025-10-05 04:23:23,239 - root - INFO - step: 9485 loss: 2.4527 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1643 +[titan] 2025-10-05 04:23:23,239 - root - INFO - lr: 4.4404e-05 gnorm: 1.19 [ 5:49:14<18:43:33] +[titan] 2025-10-05 04:23:34,114 - root - INFO - step: 9490 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:23:34,114 - root - INFO - lr: 4.4398e-05 gnorm: 1.11 [ 5:49:25<18:43:21] +[titan] 2025-10-05 04:23:44,977 - root - INFO - step: 9495 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1819 +[titan] 2025-10-05 04:23:44,977 - root - INFO - lr: 4.4392e-05 gnorm: 1.11 [ 5:49:35<18:43:10] +[titan] 2025-10-05 04:23:53,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:23:55,915 - root - INFO - step: 9500 loss: 2.4279 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1438 +[titan] 2025-10-05 04:23:55,915 - root - INFO - lr: 4.4386e-05 gnorm: 1.12 [ 5:49:46<18:42:58] +[titan] 2025-10-05 04:24:06,759 - root - INFO - step: 9505 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:24:06,759 - root - INFO - lr: 4.4380e-05 gnorm: 1.15 [ 5:49:57<18:42:47] +[titan] 2025-10-05 04:24:17,624 - root - INFO - step: 9510 loss: 2.4001 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1187 +[titan] 2025-10-05 04:24:17,624 - root - INFO - lr: 4.4374e-05 gnorm: 1.13 [ 5:50:08<18:42:35] +[titan] 2025-10-05 04:24:28,498 - root - INFO - step: 9515 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 04:24:28,498 - root - INFO - lr: 4.4368e-05 gnorm: 1.17 [ 5:50:19<18:42:23] +[titan] 2025-10-05 04:24:39,377 - root - INFO - step: 9520 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:24:39,377 - root - INFO - lr: 4.4362e-05 gnorm: 1.13 [ 5:50:30<18:42:12] +[titan] 2025-10-05 04:24:50,308 - root - INFO - step: 9525 loss: 2.3498 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0740 +[titan] 2025-10-05 04:24:50,308 - root - INFO - lr: 4.4357e-05 gnorm: 1.18 [ 5:50:41<18:42:00] +[titan] 2025-10-05 04:25:01,216 - root - INFO - step: 9530 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1821 +[titan] 2025-10-05 04:25:01,216 - root - INFO - lr: 4.4351e-05 gnorm: 1.23 [ 5:50:52<18:41:49] +[titan] 2025-10-05 04:25:12,092 - root - INFO - step: 9535 loss: 2.4240 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1392 +[titan] 2025-10-05 04:25:12,092 - root - INFO - lr: 4.4345e-05 gnorm: 1.17 [ 5:51:03<18:41:37] +[titan] 2025-10-05 04:25:22,993 - root - INFO - step: 9540 loss: 2.4342 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1477 +[titan] 2025-10-05 04:25:22,994 - root - INFO - lr: 4.4339e-05 gnorm: 1.18 [ 5:51:13<18:41:26] +[titan] 2025-10-05 04:25:33,873 - root - INFO - step: 9545 loss: 2.4536 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1647 +[titan] 2025-10-05 04:25:33,873 - root - INFO - lr: 4.4333e-05 gnorm: 1.18 [ 5:51:24<18:41:14] +[titan] 2025-10-05 04:25:42,543 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:25:44,728 - root - INFO - step: 9550 loss: 2.4518 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1638 +[titan] 2025-10-05 04:25:44,728 - root - INFO - lr: 4.4327e-05 gnorm: 1.19 [ 5:51:35<18:41:03] +[titan] 2025-10-05 04:25:55,649 - root - INFO - step: 9555 loss: 2.4091 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 04:25:55,650 - root - INFO - lr: 4.4321e-05 gnorm: 1.19 [ 5:51:46<18:40:51] +[titan] 2025-10-05 04:26:06,497 - root - INFO - step: 9560 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1277 +[titan] 2025-10-05 04:26:06,497 - root - INFO - lr: 4.4315e-05 gnorm: 1.14 [ 5:51:57<18:40:40] +[titan] 2025-10-05 04:26:17,403 - root - INFO - step: 9565 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1464 +[titan] 2025-10-05 04:26:17,403 - root - INFO - lr: 4.4309e-05 gnorm: 1.18 [ 5:52:08<18:40:28] +[titan] 2025-10-05 04:26:28,292 - root - INFO - step: 9570 loss: 2.4323 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 04:26:28,292 - root - INFO - lr: 4.4303e-05 gnorm: 1.17 [ 5:52:19<18:40:17] +[titan] 2025-10-05 04:26:39,137 - root - INFO - step: 9575 loss: 2.4565 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:26:39,137 - root - INFO - lr: 4.4297e-05 gnorm: 1.17 [ 5:52:30<18:40:05] +[titan] 2025-10-05 04:26:50,002 - root - INFO - step: 9580 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.2636 +[titan] 2025-10-05 04:26:50,002 - root - INFO - lr: 4.4291e-05 gnorm: 1.16 [ 5:52:40<18:39:53] +[titan] 2025-10-05 04:27:00,916 - root - INFO - step: 9585 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 04:27:00,916 - root - INFO - lr: 4.4285e-05 gnorm: 1.16 [ 5:52:51<18:39:42] +[titan] 2025-10-05 04:27:11,772 - root - INFO - step: 9590 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2492 +[titan] 2025-10-05 04:27:11,773 - root - INFO - lr: 4.4279e-05 gnorm: 1.14 [ 5:53:02<18:39:30] +[titan] 2025-10-05 04:27:22,632 - root - INFO - step: 9595 loss: 2.4580 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1692 +[titan] 2025-10-05 04:27:22,632 - root - INFO - lr: 4.4273e-05 gnorm: 1.16 [ 5:53:13<18:39:18] +[titan] 2025-10-05 04:27:31,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:27:33,503 - root - INFO - step: 9600 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:27:33,503 - root - INFO - lr: 4.4268e-05 gnorm: 1.14 [ 5:53:24<18:39:07] +[titan] 2025-10-05 04:27:44,378 - root - INFO - step: 9605 loss: 2.4209 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1359 +[titan] 2025-10-05 04:27:44,378 - root - INFO - lr: 4.4262e-05 gnorm: 1.14 [ 5:53:35<18:38:55] +[titan] 2025-10-05 04:27:55,281 - root - INFO - step: 9610 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1495 +[titan] 2025-10-05 04:27:55,281 - root - INFO - lr: 4.4256e-05 gnorm: 1.27 [ 5:53:46<18:38:44] +[titan] 2025-10-05 04:28:06,144 - root - INFO - step: 9615 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1414 +[titan] 2025-10-05 04:28:06,145 - root - INFO - lr: 4.4250e-05 gnorm: 1.12 [ 5:53:57<18:38:32] +[titan] 2025-10-05 04:28:17,025 - root - INFO - step: 9620 loss: 2.4380 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1520 +[titan] 2025-10-05 04:28:17,025 - root - INFO - lr: 4.4244e-05 gnorm: 1.17 [ 5:54:07<18:38:21] +[titan] 2025-10-05 04:28:27,900 - root - INFO - step: 9625 loss: 2.4092 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1278 +[titan] 2025-10-05 04:28:27,901 - root - INFO - lr: 4.4238e-05 gnorm: 1.17 [ 5:54:18<18:38:09] +[titan] 2025-10-05 04:28:38,759 - root - INFO - step: 9630 loss: 2.3955 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1144 +[titan] 2025-10-05 04:28:38,759 - root - INFO - lr: 4.4232e-05 gnorm: 1.18 [ 5:54:29<18:37:57] +[titan] 2025-10-05 04:28:49,641 - root - INFO - step: 9635 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 04:28:49,641 - root - INFO - lr: 4.4226e-05 gnorm: 1.17 [ 5:54:40<18:37:46] +[titan] 2025-10-05 04:29:00,565 - root - INFO - step: 9640 loss: 2.5391 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2382 +[titan] 2025-10-05 04:29:00,565 - root - INFO - lr: 4.4220e-05 gnorm: 1.17 [ 5:54:51<18:37:34] +[titan] 2025-10-05 04:29:11,410 - root - INFO - step: 9645 loss: 2.4192 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 04:29:11,410 - root - INFO - lr: 4.4214e-05 gnorm: 1.18 [ 5:55:02<18:37:23] +[titan] 2025-10-05 04:29:20,108 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:29:22,295 - root - INFO - step: 9650 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:29:22,295 - root - INFO - lr: 4.4208e-05 gnorm: 1.14 [ 5:55:13<18:37:11] +[titan] 2025-10-05 04:29:33,192 - root - INFO - step: 9655 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1795 +[titan] 2025-10-05 04:29:33,192 - root - INFO - lr: 4.4202e-05 gnorm: 1.18 [ 5:55:24<18:37:00] +[titan] 2025-10-05 04:29:44,075 - root - INFO - step: 9660 loss: 2.5077 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2141 +[titan] 2025-10-05 04:29:44,076 - root - INFO - lr: 4.4196e-05 gnorm: 1.19 [ 5:55:35<18:36:48] +[titan] 2025-10-05 04:29:55,012 - root - INFO - step: 9665 loss: 2.3987 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2812 global_avg_mtp_loss: 2.1174 +[titan] 2025-10-05 04:29:55,012 - root - INFO - lr: 4.4190e-05 gnorm: 1.13 [ 5:55:45<18:36:37] +[titan] 2025-10-05 04:30:05,890 - root - INFO - step: 9670 loss: 2.4206 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1355 +[titan] 2025-10-05 04:30:05,891 - root - INFO - lr: 4.4184e-05 gnorm: 1.15 [ 5:55:56<18:36:25] +[titan] 2025-10-05 04:30:16,776 - root - INFO - step: 9675 loss: 2.3409 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 04:30:16,776 - root - INFO - lr: 4.4178e-05 gnorm: 1.12 [ 5:56:07<18:36:14] +[titan] 2025-10-05 04:30:27,638 - root - INFO - step: 9680 loss: 2.4055 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1224 +[titan] 2025-10-05 04:30:27,639 - root - INFO - lr: 4.4172e-05 gnorm: 1.11 [ 5:56:18<18:36:02] +[titan] 2025-10-05 04:30:38,514 - root - INFO - step: 9685 loss: 2.4020 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1213 +[titan] 2025-10-05 04:30:38,514 - root - INFO - lr: 4.4166e-05 gnorm: 1.10 [ 5:56:29<18:35:51] +[titan] 2025-10-05 04:30:49,397 - root - INFO - step: 9690 loss: 2.3894 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 04:30:49,397 - root - INFO - lr: 4.4160e-05 gnorm: 1.14 [ 5:56:40<18:35:39] +[titan] 2025-10-05 04:31:00,376 - root - INFO - step: 9695 loss: 2.4118 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.10 mfu: 41.87% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:31:00,376 - root - INFO - lr: 4.4154e-05 gnorm: 1.13 [ 5:56:51<18:35:28] +[titan] 2025-10-05 04:31:09,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:31:11,234 - root - INFO - step: 9700 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 04:31:11,235 - root - INFO - lr: 4.4148e-05 gnorm: 1.17 [ 5:57:02<18:35:16] +[titan] 2025-10-05 04:31:22,095 - root - INFO - step: 9705 loss: 2.4525 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1644 +[titan] 2025-10-05 04:31:22,095 - root - INFO - lr: 4.4142e-05 gnorm: 1.18 [ 5:57:13<18:35:05] +[titan] 2025-10-05 04:31:32,925 - root - INFO - step: 9710 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.77 mfu: 42.44% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:31:32,925 - root - INFO - lr: 4.4136e-05 gnorm: 1.17 [ 5:57:23<18:34:53] +[titan] 2025-10-05 04:31:43,787 - root - INFO - step: 9715 loss: 2.4891 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 04:31:43,787 - root - INFO - lr: 4.4130e-05 gnorm: 1.38 [ 5:57:34<18:34:41] +[titan] 2025-10-05 04:31:54,630 - root - INFO - step: 9720 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0983 +[titan] 2025-10-05 04:31:54,630 - root - INFO - lr: 4.4124e-05 gnorm: 1.14 [ 5:57:45<18:34:30] +[titan] 2025-10-05 04:32:05,581 - root - INFO - step: 9725 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 04:32:05,582 - root - INFO - lr: 4.4118e-05 gnorm: 1.14 [ 5:57:56<18:34:18] +[titan] 2025-10-05 04:32:12,282 - root - INFO - Dumping profiler traces at step 9728 +[titan] 2025-10-05 04:32:12,319 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:32:16,691 - root - INFO - step: 9730 loss: 2.4883 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.1950 +[titan] 2025-10-05 04:32:16,691 - root - INFO - lr: 4.4112e-05 gnorm: 1.25 [ 5:58:07<18:34:07] +[titan] 2025-10-05 04:32:27,533 - root - INFO - step: 9735 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:32:27,534 - root - INFO - lr: 4.4106e-05 gnorm: 1.17 [ 5:58:18<18:33:56] +[titan] 2025-10-05 04:32:38,369 - root - INFO - step: 9740 loss: 2.4600 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1703 +[titan] 2025-10-05 04:32:38,369 - root - INFO - lr: 4.4100e-05 gnorm: 1.17 [ 5:58:29<18:33:44] +[titan] 2025-10-05 04:32:49,220 - root - INFO - step: 9745 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 04:32:49,220 - root - INFO - lr: 4.4094e-05 gnorm: 1.16 [ 5:58:40<18:33:32] +[titan] 2025-10-05 04:32:57,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:33:00,142 - root - INFO - step: 9750 loss: 2.3885 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1078 +[titan] 2025-10-05 04:33:00,143 - root - INFO - lr: 4.4088e-05 gnorm: 1.14 [ 5:58:51<18:33:21] +[titan] 2025-10-05 04:33:10,995 - root - INFO - step: 9755 loss: 2.5700 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3070 global_avg_mtp_loss: 2.2630 +[titan] 2025-10-05 04:33:10,995 - root - INFO - lr: 4.4082e-05 gnorm: 1.38 [ 5:59:01<18:33:09] +[titan] 2025-10-05 04:33:21,841 - root - INFO - step: 9760 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:33:21,841 - root - INFO - lr: 4.4076e-05 gnorm: 1.10 [ 5:59:12<18:32:58] +[titan] 2025-10-05 04:33:32,699 - root - INFO - step: 9765 loss: 2.4074 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:33:32,699 - root - INFO - lr: 4.4070e-05 gnorm: 1.12 [ 5:59:23<18:32:46] +[titan] 2025-10-05 04:33:43,562 - root - INFO - step: 9770 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 04:33:43,563 - root - INFO - lr: 4.4064e-05 gnorm: 1.20 [ 5:59:34<18:32:35] +[titan] 2025-10-05 04:33:54,429 - root - INFO - step: 9775 loss: 2.3924 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1125 +[titan] 2025-10-05 04:33:54,429 - root - INFO - lr: 4.4058e-05 gnorm: 1.13 [ 5:59:45<18:32:23] +[titan] 2025-10-05 04:34:05,364 - root - INFO - step: 9780 loss: 2.4335 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1473 +[titan] 2025-10-05 04:34:05,365 - root - INFO - lr: 4.4052e-05 gnorm: 1.19 [ 5:59:56<18:32:12] +[titan] 2025-10-05 04:34:16,251 - root - INFO - step: 9785 loss: 2.4309 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:34:16,251 - root - INFO - lr: 4.4046e-05 gnorm: 1.30 [ 6:00:07<18:32:00] +[titan] 2025-10-05 04:34:27,120 - root - INFO - step: 9790 loss: 2.4512 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2882 global_avg_mtp_loss: 2.1629 +[titan] 2025-10-05 04:34:27,120 - root - INFO - lr: 4.4039e-05 gnorm: 1.21 [ 6:00:18<18:31:48] +[titan] 2025-10-05 04:34:37,998 - root - INFO - step: 9795 loss: 2.3456 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 04:34:37,999 - root - INFO - lr: 4.4033e-05 gnorm: 1.14 [ 6:00:28<18:31:37] +[titan] 2025-10-05 04:34:46,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:34:48,882 - root - INFO - step: 9800 loss: 2.4057 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:34:48,883 - root - INFO - lr: 4.4027e-05 gnorm: 1.18 [ 6:00:39<18:31:25] +[titan] 2025-10-05 04:34:59,778 - root - INFO - step: 9805 loss: 2.5371 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2995 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:34:59,779 - root - INFO - lr: 4.4021e-05 gnorm: 1.15 [ 6:00:50<18:31:14] +[titan] 2025-10-05 04:35:10,650 - root - INFO - step: 9810 loss: 2.4142 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1309 +[titan] 2025-10-05 04:35:10,650 - root - INFO - lr: 4.4015e-05 gnorm: 1.16 [ 6:01:01<18:31:02] +[titan] 2025-10-05 04:35:21,521 - root - INFO - step: 9815 loss: 2.4068 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1250 +[titan] 2025-10-05 04:35:21,521 - root - INFO - lr: 4.4009e-05 gnorm: 1.16 [ 6:01:12<18:30:51] +[titan] 2025-10-05 04:35:32,405 - root - INFO - step: 9820 loss: 2.4191 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:35:32,405 - root - INFO - lr: 4.4003e-05 gnorm: 1.14 [ 6:01:23<18:30:39] +[titan] 2025-10-05 04:35:43,265 - root - INFO - step: 9825 loss: 2.4557 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:35:43,266 - root - INFO - lr: 4.3997e-05 gnorm: 1.11 [ 6:01:34<18:30:28] +[titan] 2025-10-05 04:35:54,144 - root - INFO - step: 9830 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 04:35:54,144 - root - INFO - lr: 4.3991e-05 gnorm: 1.10 [ 6:01:45<18:30:16] +[titan] 2025-10-05 04:36:05,038 - root - INFO - step: 9835 loss: 2.3594 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0837 +[titan] 2025-10-05 04:36:05,038 - root - INFO - lr: 4.3985e-05 gnorm: 1.17 [ 6:01:55<18:30:05] +[titan] 2025-10-05 04:36:15,903 - root - INFO - step: 9840 loss: 2.3943 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1136 +[titan] 2025-10-05 04:36:15,903 - root - INFO - lr: 4.3979e-05 gnorm: 1.13 [ 6:02:06<18:29:53] +[titan] 2025-10-05 04:36:26,766 - root - INFO - step: 9845 loss: 2.3607 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0841 +[titan] 2025-10-05 04:36:26,766 - root - INFO - lr: 4.3973e-05 gnorm: 1.11 [ 6:02:17<18:29:41] +[titan] 2025-10-05 04:36:35,444 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:36:37,631 - root - INFO - step: 9850 loss: 2.4018 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1206 +[titan] 2025-10-05 04:36:37,631 - root - INFO - lr: 4.3967e-05 gnorm: 1.18 [ 6:02:28<18:29:30] +[titan] 2025-10-05 04:36:48,494 - root - INFO - step: 9855 loss: 2.3920 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 04:36:48,494 - root - INFO - lr: 4.3961e-05 gnorm: 1.14 [ 6:02:39<18:29:18] +[titan] 2025-10-05 04:36:59,366 - root - INFO - step: 9860 loss: 2.3928 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1132 +[titan] 2025-10-05 04:36:59,366 - root - INFO - lr: 4.3955e-05 gnorm: 1.16 [ 6:02:50<18:29:07] +[titan] 2025-10-05 04:37:10,292 - root - INFO - step: 9865 loss: 2.3430 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0689 +[titan] 2025-10-05 04:37:10,292 - root - INFO - lr: 4.3948e-05 gnorm: 1.14 [ 6:03:01<18:28:55] +[titan] 2025-10-05 04:37:21,111 - root - INFO - step: 9870 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.0953 +[titan] 2025-10-05 04:37:21,111 - root - INFO - lr: 4.3942e-05 gnorm: 1.23 [ 6:03:12<18:28:44] +[titan] 2025-10-05 04:37:31,972 - root - INFO - step: 9875 loss: 2.4673 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2893 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:37:31,972 - root - INFO - lr: 4.3936e-05 gnorm: 1.12 [ 6:03:22<18:28:32] +[titan] 2025-10-05 04:37:42,800 - root - INFO - step: 9880 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 04:37:42,800 - root - INFO - lr: 4.3930e-05 gnorm: 1.18 [ 6:03:33<18:28:20] +[titan] 2025-10-05 04:37:53,645 - root - INFO - step: 9885 loss: 2.3888 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:37:53,645 - root - INFO - lr: 4.3924e-05 gnorm: 1.14 [ 6:03:44<18:28:09] +[titan] 2025-10-05 04:38:04,551 - root - INFO - step: 9890 loss: 2.3882 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 04:38:04,552 - root - INFO - lr: 4.3918e-05 gnorm: 1.12 [ 6:03:55<18:27:57] +[titan] 2025-10-05 04:38:15,412 - root - INFO - step: 9895 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1157 +[titan] 2025-10-05 04:38:15,412 - root - INFO - lr: 4.3912e-05 gnorm: 1.14 [ 6:04:06<18:27:46] +[titan] 2025-10-05 04:38:24,052 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:38:26,242 - root - INFO - step: 9900 loss: 2.3816 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1025 +[titan] 2025-10-05 04:38:26,242 - root - INFO - lr: 4.3906e-05 gnorm: 1.14 [ 6:04:17<18:27:34] +[titan] 2025-10-05 04:38:37,109 - root - INFO - step: 9905 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 04:38:37,109 - root - INFO - lr: 4.3900e-05 gnorm: 1.17 [ 6:04:28<18:27:22] +[titan] 2025-10-05 04:38:47,968 - root - INFO - step: 9910 loss: 2.4451 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 04:38:47,968 - root - INFO - lr: 4.3894e-05 gnorm: 1.17 [ 6:04:38<18:27:11] +[titan] 2025-10-05 04:38:58,828 - root - INFO - step: 9915 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0776 +[titan] 2025-10-05 04:38:58,828 - root - INFO - lr: 4.3887e-05 gnorm: 1.15 [ 6:04:49<18:26:59] +[titan] 2025-10-05 04:39:09,703 - root - INFO - step: 9920 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 04:39:09,703 - root - INFO - lr: 4.3881e-05 gnorm: 1.13 [ 6:05:00<18:26:48] +[titan] 2025-10-05 04:39:20,593 - root - INFO - step: 9925 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 04:39:20,593 - root - INFO - lr: 4.3875e-05 gnorm: 1.14 [ 6:05:11<18:26:36] +[titan] 2025-10-05 04:39:31,464 - root - INFO - step: 9930 loss: 2.2894 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 04:39:31,464 - root - INFO - lr: 4.3869e-05 gnorm: 1.11 [ 6:05:22<18:26:25] +[titan] 2025-10-05 04:39:42,337 - root - INFO - step: 9935 loss: 2.3475 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 04:39:42,338 - root - INFO - lr: 4.3863e-05 gnorm: 1.10 [ 6:05:33<18:26:13] +[titan] 2025-10-05 04:39:53,224 - root - INFO - step: 9940 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0849 +[titan] 2025-10-05 04:39:53,224 - root - INFO - lr: 4.3857e-05 gnorm: 1.13 [ 6:05:44<18:26:02] +[titan] 2025-10-05 04:40:04,154 - root - INFO - step: 9945 loss: 2.3821 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1033 +[titan] 2025-10-05 04:40:04,154 - root - INFO - lr: 4.3851e-05 gnorm: 1.14 [ 6:05:55<18:25:50] +[titan] 2025-10-05 04:40:12,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:40:15,025 - root - INFO - step: 9950 loss: 2.4179 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:40:15,026 - root - INFO - lr: 4.3845e-05 gnorm: 1.11 [ 6:06:05<18:25:39] +[titan] 2025-10-05 04:40:25,938 - root - INFO - step: 9955 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1159 +[titan] 2025-10-05 04:40:25,938 - root - INFO - lr: 4.3838e-05 gnorm: 1.14 [ 6:06:16<18:25:27] +[titan] 2025-10-05 04:40:36,795 - root - INFO - step: 9960 loss: 2.3949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 04:40:36,795 - root - INFO - lr: 4.3832e-05 gnorm: 1.17 [ 6:06:27<18:25:16] +[titan] 2025-10-05 04:40:47,648 - root - INFO - step: 9965 loss: 2.4110 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:40:47,648 - root - INFO - lr: 4.3826e-05 gnorm: 1.15 [ 6:06:38<18:25:04] +[titan] 2025-10-05 04:40:58,539 - root - INFO - step: 9970 loss: 2.3944 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1127 +[titan] 2025-10-05 04:40:58,540 - root - INFO - lr: 4.3820e-05 gnorm: 1.18 [ 6:06:49<18:24:53] +[titan] 2025-10-05 04:41:09,431 - root - INFO - step: 9975 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 04:41:09,432 - root - INFO - lr: 4.3814e-05 gnorm: 1.19 [ 6:07:00<18:24:41] +[titan] 2025-10-05 04:41:20,364 - root - INFO - step: 9980 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 04:41:20,364 - root - INFO - lr: 4.3808e-05 gnorm: 1.18 [ 6:07:11<18:24:30] +[titan] 2025-10-05 04:41:31,259 - root - INFO - step: 9985 loss: 2.4484 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:41:31,260 - root - INFO - lr: 4.3802e-05 gnorm: 1.15 [ 6:07:22<18:24:18] +[titan] 2025-10-05 04:41:42,148 - root - INFO - step: 9990 loss: 2.4717 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:41:42,148 - root - INFO - lr: 4.3795e-05 gnorm: 1.17 [ 6:07:33<18:24:07] +[titan] 2025-10-05 04:41:53,059 - root - INFO - step: 9995 loss: 2.3948 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1147 +[titan] 2025-10-05 04:41:53,059 - root - INFO - lr: 4.3789e-05 gnorm: 1.17 [ 6:07:43<18:23:56] +[titan] 2025-10-05 04:42:01,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:42:03,951 - root - INFO - step: 10000 loss: 2.4699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1790 +[titan] 2025-10-05 04:42:03,951 - root - INFO - lr: 4.3783e-05 gnorm: 1.18 [ 6:07:54<18:23:44] +[titan] 2025-10-05 04:42:03,951 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 04:42:23,124 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 04:42:23,124 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.17 seconds. +[titan] 2025-10-05 04:44:28,943 - root - INFO - step: 10005 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 2,260 tflops: 31.35 mfu: 3.17% global_avg_ntp_loss: 0.2840 global_avg_mtp_loss: 2.1282 +[titan] 2025-10-05 04:44:28,943 - root - INFO - lr: 4.3777e-05 gnorm: 1.15 [ 6:10:19<18:30:15] +[titan] 2025-10-05 04:44:39,758 - root - INFO - step: 10010 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0843 +[titan] 2025-10-05 04:44:39,759 - root - INFO - lr: 4.3771e-05 gnorm: 1.10 [ 6:10:30<18:30:03] +[titan] 2025-10-05 04:44:50,583 - root - INFO - step: 10015 loss: 2.4606 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1726 +[titan] 2025-10-05 04:44:50,583 - root - INFO - lr: 4.3765e-05 gnorm: 1.17 [ 6:10:41<18:29:51] +[titan] 2025-10-05 04:45:01,371 - root - INFO - step: 10020 loss: 2.3595 memory: 118.84GiB(85.28%) tps: 30,376 tflops: 421.43 mfu: 42.61% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0816 +[titan] 2025-10-05 04:45:01,371 - root - INFO - lr: 4.3758e-05 gnorm: 1.12 [ 6:10:52<18:29:39] +[titan] 2025-10-05 04:45:12,207 - root - INFO - step: 10025 loss: 2.3890 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1095 +[titan] 2025-10-05 04:45:12,207 - root - INFO - lr: 4.3752e-05 gnorm: 1.13 [ 6:11:03<18:29:27] +[titan] 2025-10-05 04:45:23,056 - root - INFO - step: 10030 loss: 2.4171 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:45:23,056 - root - INFO - lr: 4.3746e-05 gnorm: 1.14 [ 6:11:13<18:29:15] +[titan] 2025-10-05 04:45:33,878 - root - INFO - step: 10035 loss: 2.4258 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1417 +[titan] 2025-10-05 04:45:33,879 - root - INFO - lr: 4.3740e-05 gnorm: 1.18 [ 6:11:24<18:29:03] +[titan] 2025-10-05 04:45:44,722 - root - INFO - step: 10040 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:45:44,722 - root - INFO - lr: 4.3734e-05 gnorm: 1.13 [ 6:11:35<18:28:51] +[titan] 2025-10-05 04:45:55,531 - root - INFO - step: 10045 loss: 2.3962 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:45:55,531 - root - INFO - lr: 4.3728e-05 gnorm: 1.14 [ 6:11:46<18:28:39] +[titan] 2025-10-05 04:46:04,196 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:46:06,376 - root - INFO - step: 10050 loss: 2.4217 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1372 +[titan] 2025-10-05 04:46:06,376 - root - INFO - lr: 4.3721e-05 gnorm: 1.19 [ 6:11:57<18:28:27] +[titan] 2025-10-05 04:46:17,244 - root - INFO - step: 10055 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1117 +[titan] 2025-10-05 04:46:17,244 - root - INFO - lr: 4.3715e-05 gnorm: 1.09 [ 6:12:08<18:28:15] +[titan] 2025-10-05 04:46:28,093 - root - INFO - step: 10060 loss: 2.4776 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 04:46:28,093 - root - INFO - lr: 4.3709e-05 gnorm: 1.12 [ 6:12:18<18:28:04] +[titan] 2025-10-05 04:46:38,949 - root - INFO - step: 10065 loss: 2.3571 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0814 +[titan] 2025-10-05 04:46:38,949 - root - INFO - lr: 4.3703e-05 gnorm: 1.17 [ 6:12:29<18:27:52] +[titan] 2025-10-05 04:46:49,820 - root - INFO - step: 10070 loss: 2.4101 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:46:49,820 - root - INFO - lr: 4.3697e-05 gnorm: 1.14 [ 6:12:40<18:27:40] +[titan] 2025-10-05 04:47:00,671 - root - INFO - step: 10075 loss: 2.4112 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:47:00,671 - root - INFO - lr: 4.3690e-05 gnorm: 1.17 [ 6:12:51<18:27:28] +[titan] 2025-10-05 04:47:11,530 - root - INFO - step: 10080 loss: 2.3867 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1065 +[titan] 2025-10-05 04:47:11,530 - root - INFO - lr: 4.3684e-05 gnorm: 1.12 [ 6:13:02<18:27:16] +[titan] 2025-10-05 04:47:22,402 - root - INFO - step: 10085 loss: 2.3591 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 04:47:22,403 - root - INFO - lr: 4.3678e-05 gnorm: 1.14 [ 6:13:13<18:27:04] +[titan] 2025-10-05 04:47:33,304 - root - INFO - step: 10090 loss: 2.3953 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1153 +[titan] 2025-10-05 04:47:33,305 - root - INFO - lr: 4.3672e-05 gnorm: 1.12 [ 6:13:24<18:26:53] +[titan] 2025-10-05 04:47:44,169 - root - INFO - step: 10095 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2874 global_avg_mtp_loss: 2.1668 +[titan] 2025-10-05 04:47:44,169 - root - INFO - lr: 4.3666e-05 gnorm: 1.20 [ 6:13:35<18:26:41] +[titan] 2025-10-05 04:47:52,901 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:47:55,091 - root - INFO - step: 10100 loss: 2.4560 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1684 +[titan] 2025-10-05 04:47:55,091 - root - INFO - lr: 4.3659e-05 gnorm: 1.18 [ 6:13:45<18:26:29] +[titan] 2025-10-05 04:48:05,969 - root - INFO - step: 10105 loss: 2.4312 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:48:05,969 - root - INFO - lr: 4.3653e-05 gnorm: 1.10 [ 6:13:56<18:26:18] +[titan] 2025-10-05 04:48:16,842 - root - INFO - step: 10110 loss: 2.3985 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1175 +[titan] 2025-10-05 04:48:16,842 - root - INFO - lr: 4.3647e-05 gnorm: 1.15 [ 6:14:07<18:26:06] +[titan] 2025-10-05 04:48:27,739 - root - INFO - step: 10115 loss: 2.4183 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1354 +[titan] 2025-10-05 04:48:27,739 - root - INFO - lr: 4.3641e-05 gnorm: 1.11 [ 6:14:18<18:25:54] +[titan] 2025-10-05 04:48:38,638 - root - INFO - step: 10120 loss: 2.3862 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 04:48:38,639 - root - INFO - lr: 4.3635e-05 gnorm: 1.15 [ 6:14:29<18:25:42] +[titan] 2025-10-05 04:48:49,495 - root - INFO - step: 10125 loss: 2.4046 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1229 +[titan] 2025-10-05 04:48:49,495 - root - INFO - lr: 4.3628e-05 gnorm: 1.13 [ 6:14:40<18:25:30] +[titan] 2025-10-05 04:49:00,374 - root - INFO - step: 10130 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 04:49:00,374 - root - INFO - lr: 4.3622e-05 gnorm: 1.12 [ 6:14:51<18:25:19] +[titan] 2025-10-05 04:49:11,231 - root - INFO - step: 10135 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1240 +[titan] 2025-10-05 04:49:11,231 - root - INFO - lr: 4.3616e-05 gnorm: 1.11 [ 6:15:02<18:25:07] +[titan] 2025-10-05 04:49:22,073 - root - INFO - step: 10140 loss: 2.4295 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1448 +[titan] 2025-10-05 04:49:22,073 - root - INFO - lr: 4.3610e-05 gnorm: 1.19 [ 6:15:12<18:24:55] +[titan] 2025-10-05 04:49:32,953 - root - INFO - step: 10145 loss: 2.4182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:49:32,953 - root - INFO - lr: 4.3603e-05 gnorm: 1.13 [ 6:15:23<18:24:43] +[titan] 2025-10-05 04:49:41,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:49:43,809 - root - INFO - step: 10150 loss: 2.4033 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1220 +[titan] 2025-10-05 04:49:43,810 - root - INFO - lr: 4.3597e-05 gnorm: 1.18 [ 6:15:34<18:24:31] +[titan] 2025-10-05 04:49:54,722 - root - INFO - step: 10155 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1491 +[titan] 2025-10-05 04:49:54,722 - root - INFO - lr: 4.3591e-05 gnorm: 1.19 [ 6:15:45<18:24:20] +[titan] 2025-10-05 04:50:05,570 - root - INFO - step: 10160 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2925 global_avg_mtp_loss: 2.2000 +[titan] 2025-10-05 04:50:05,570 - root - INFO - lr: 4.3585e-05 gnorm: 1.18 [ 6:15:56<18:24:08] +[titan] 2025-10-05 04:50:16,417 - root - INFO - step: 10165 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 04:50:16,418 - root - INFO - lr: 4.3578e-05 gnorm: 1.15 [ 6:16:07<18:23:56] +[titan] 2025-10-05 04:50:27,286 - root - INFO - step: 10170 loss: 2.4892 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:50:27,286 - root - INFO - lr: 4.3572e-05 gnorm: 1.17 [ 6:16:18<18:23:44] +[titan] 2025-10-05 04:50:38,151 - root - INFO - step: 10175 loss: 2.4728 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1825 +[titan] 2025-10-05 04:50:38,151 - root - INFO - lr: 4.3566e-05 gnorm: 1.14 [ 6:16:29<18:23:33] +[titan] 2025-10-05 04:50:49,013 - root - INFO - step: 10180 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1066 +[titan] 2025-10-05 04:50:49,013 - root - INFO - lr: 4.3560e-05 gnorm: 1.11 [ 6:16:39<18:23:21] +[titan] 2025-10-05 04:50:59,879 - root - INFO - step: 10185 loss: 2.3308 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0576 +[titan] 2025-10-05 04:50:59,879 - root - INFO - lr: 4.3553e-05 gnorm: 1.10 [ 6:16:50<18:23:09] +[titan] 2025-10-05 04:51:10,735 - root - INFO - step: 10190 loss: 2.4005 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1191 +[titan] 2025-10-05 04:51:10,735 - root - INFO - lr: 4.3547e-05 gnorm: 1.12 [ 6:17:01<18:22:57] +[titan] 2025-10-05 04:51:21,605 - root - INFO - step: 10195 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:51:21,605 - root - INFO - lr: 4.3541e-05 gnorm: 1.07 [ 6:17:12<18:22:45] +[titan] 2025-10-05 04:51:30,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:51:32,491 - root - INFO - step: 10200 loss: 2.4592 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1708 +[titan] 2025-10-05 04:51:32,491 - root - INFO - lr: 4.3535e-05 gnorm: 1.19 [ 6:17:23<18:22:34] +[titan] 2025-10-05 04:51:43,357 - root - INFO - step: 10205 loss: 2.3585 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0823 +[titan] 2025-10-05 04:51:43,357 - root - INFO - lr: 4.3528e-05 gnorm: 1.08 [ 6:17:34<18:22:22] +[titan] 2025-10-05 04:51:54,234 - root - INFO - step: 10210 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 04:51:54,234 - root - INFO - lr: 4.3522e-05 gnorm: 1.13 [ 6:17:45<18:22:10] +[titan] 2025-10-05 04:52:05,148 - root - INFO - step: 10215 loss: 2.4224 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1397 +[titan] 2025-10-05 04:52:05,148 - root - INFO - lr: 4.3516e-05 gnorm: 1.15 [ 6:17:56<18:21:58] +[titan] 2025-10-05 04:52:16,012 - root - INFO - step: 10220 loss: 2.3880 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:52:16,012 - root - INFO - lr: 4.3510e-05 gnorm: 1.17 [ 6:18:06<18:21:47] +[titan] 2025-10-05 04:52:26,919 - root - INFO - step: 10225 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0969 +[titan] 2025-10-05 04:52:26,919 - root - INFO - lr: 4.3503e-05 gnorm: 1.13 [ 6:18:17<18:21:35] +[titan] 2025-10-05 04:52:37,795 - root - INFO - step: 10230 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2964 global_avg_mtp_loss: 2.1864 +[titan] 2025-10-05 04:52:37,795 - root - INFO - lr: 4.3497e-05 gnorm: 1.23 [ 6:18:28<18:21:23] +[titan] 2025-10-05 04:52:48,650 - root - INFO - step: 10235 loss: 2.3739 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0960 +[titan] 2025-10-05 04:52:48,651 - root - INFO - lr: 4.3491e-05 gnorm: 1.14 [ 6:18:39<18:21:11] +[titan] 2025-10-05 04:52:59,594 - root - INFO - step: 10240 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0761 +[titan] 2025-10-05 04:52:59,594 - root - INFO - lr: 4.3485e-05 gnorm: 1.17 [ 6:18:50<18:21:00] +[titan] 2025-10-05 04:52:59,769 - root - INFO - Dumping profiler traces at step 10240 +[titan] 2025-10-05 04:52:59,809 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:53:10,672 - root - INFO - step: 10245 loss: 2.4638 memory: 118.84GiB(85.28%) tps: 29,580 tflops: 410.38 mfu: 41.49% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1752 +[titan] 2025-10-05 04:53:10,672 - root - INFO - lr: 4.3478e-05 gnorm: 1.18 [ 6:19:01<18:20:49] +[titan] 2025-10-05 04:53:19,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:53:21,556 - root - INFO - step: 10250 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0638 +[titan] 2025-10-05 04:53:21,556 - root - INFO - lr: 4.3472e-05 gnorm: 1.18 [ 6:19:12<18:20:37] +[titan] 2025-10-05 04:53:32,460 - root - INFO - step: 10255 loss: 2.3782 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.0997 +[titan] 2025-10-05 04:53:32,460 - root - INFO - lr: 4.3466e-05 gnorm: 1.11 [ 6:19:23<18:20:25] +[titan] 2025-10-05 04:53:43,321 - root - INFO - step: 10260 loss: 2.3383 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 04:53:43,321 - root - INFO - lr: 4.3459e-05 gnorm: 1.16 [ 6:19:34<18:20:14] +[titan] 2025-10-05 04:53:54,178 - root - INFO - step: 10265 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 04:53:54,178 - root - INFO - lr: 4.3453e-05 gnorm: 1.16 [ 6:19:45<18:20:02] +[titan] 2025-10-05 04:54:05,007 - root - INFO - step: 10270 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 04:54:05,007 - root - INFO - lr: 4.3447e-05 gnorm: 1.17 [ 6:19:55<18:19:50] +[titan] 2025-10-05 04:54:15,842 - root - INFO - step: 10275 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:54:15,843 - root - INFO - lr: 4.3440e-05 gnorm: 1.14 [ 6:20:06<18:19:38] +[titan] 2025-10-05 04:54:26,778 - root - INFO - step: 10280 loss: 2.3590 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0825 +[titan] 2025-10-05 04:54:26,778 - root - INFO - lr: 4.3434e-05 gnorm: 1.09 [ 6:20:17<18:19:26] +[titan] 2025-10-05 04:54:37,611 - root - INFO - step: 10285 loss: 2.3467 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 04:54:37,611 - root - INFO - lr: 4.3428e-05 gnorm: 1.17 [ 6:20:28<18:19:15] +[titan] 2025-10-05 04:54:48,457 - root - INFO - step: 10290 loss: 2.3098 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 04:54:48,457 - root - INFO - lr: 4.3422e-05 gnorm: 1.13 [ 6:20:39<18:19:03] +[titan] 2025-10-05 04:54:59,307 - root - INFO - step: 10295 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 04:54:59,307 - root - INFO - lr: 4.3415e-05 gnorm: 1.19 [ 6:20:50<18:18:51] +[titan] 2025-10-05 04:55:07,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:55:10,154 - root - INFO - step: 10300 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:55:10,154 - root - INFO - lr: 4.3409e-05 gnorm: 1.11 [ 6:21:01<18:18:39] +[titan] 2025-10-05 04:55:20,995 - root - INFO - step: 10305 loss: 2.4115 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1272 +[titan] 2025-10-05 04:55:20,995 - root - INFO - lr: 4.3403e-05 gnorm: 1.16 [ 6:21:11<18:18:27] +[titan] 2025-10-05 04:55:31,895 - root - INFO - step: 10310 loss: 2.3942 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:55:31,895 - root - INFO - lr: 4.3396e-05 gnorm: 1.11 [ 6:21:22<18:18:16] +[titan] 2025-10-05 04:55:42,797 - root - INFO - step: 10315 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0890 +[titan] 2025-10-05 04:55:42,797 - root - INFO - lr: 4.3390e-05 gnorm: 1.14 [ 6:21:33<18:18:04] +[titan] 2025-10-05 04:55:53,631 - root - INFO - step: 10320 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0675 +[titan] 2025-10-05 04:55:53,631 - root - INFO - lr: 4.3384e-05 gnorm: 1.13 [ 6:21:44<18:17:52] +[titan] 2025-10-05 04:56:04,495 - root - INFO - step: 10325 loss: 2.3236 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 04:56:04,495 - root - INFO - lr: 4.3377e-05 gnorm: 1.11 [ 6:21:55<18:17:40] +[titan] 2025-10-05 04:56:15,368 - root - INFO - step: 10330 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0554 +[titan] 2025-10-05 04:56:15,368 - root - INFO - lr: 4.3371e-05 gnorm: 1.11 [ 6:22:06<18:17:29] +[titan] 2025-10-05 04:56:26,235 - root - INFO - step: 10335 loss: 2.3812 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1021 +[titan] 2025-10-05 04:56:26,235 - root - INFO - lr: 4.3365e-05 gnorm: 1.13 [ 6:22:17<18:17:17] +[titan] 2025-10-05 04:56:37,100 - root - INFO - step: 10340 loss: 2.4139 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 04:56:37,100 - root - INFO - lr: 4.3358e-05 gnorm: 1.15 [ 6:22:27<18:17:05] +[titan] 2025-10-05 04:56:48,014 - root - INFO - step: 10345 loss: 2.3627 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0861 +[titan] 2025-10-05 04:56:48,014 - root - INFO - lr: 4.3352e-05 gnorm: 1.15 [ 6:22:38<18:16:53] +[titan] 2025-10-05 04:56:56,705 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:56:58,888 - root - INFO - step: 10350 loss: 2.3704 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0924 +[titan] 2025-10-05 04:56:58,888 - root - INFO - lr: 4.3346e-05 gnorm: 1.12 [ 6:22:49<18:16:42] +[titan] 2025-10-05 04:57:09,755 - root - INFO - step: 10355 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0403 +[titan] 2025-10-05 04:57:09,755 - root - INFO - lr: 4.3339e-05 gnorm: 1.14 [ 6:23:00<18:16:30] +[titan] 2025-10-05 04:57:20,636 - root - INFO - step: 10360 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1400 +[titan] 2025-10-05 04:57:20,637 - root - INFO - lr: 4.3333e-05 gnorm: 1.16 [ 6:23:11<18:16:18] +[titan] 2025-10-05 04:57:31,521 - root - INFO - step: 10365 loss: 2.3992 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 04:57:31,521 - root - INFO - lr: 4.3327e-05 gnorm: 1.14 [ 6:23:22<18:16:07] +[titan] 2025-10-05 04:57:42,396 - root - INFO - step: 10370 loss: 2.4732 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1823 +[titan] 2025-10-05 04:57:42,396 - root - INFO - lr: 4.3320e-05 gnorm: 1.14 [ 6:23:33<18:15:55] +[titan] 2025-10-05 04:57:53,311 - root - INFO - step: 10375 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1107 +[titan] 2025-10-05 04:57:53,311 - root - INFO - lr: 4.3314e-05 gnorm: 1.17 [ 6:23:44<18:15:43] +[titan] 2025-10-05 04:58:04,191 - root - INFO - step: 10380 loss: 2.3285 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 04:58:04,191 - root - INFO - lr: 4.3308e-05 gnorm: 1.15 [ 6:23:55<18:15:32] +[titan] 2025-10-05 04:58:15,071 - root - INFO - step: 10385 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:58:15,072 - root - INFO - lr: 4.3301e-05 gnorm: 2.89 [ 6:24:05<18:15:20] +[titan] 2025-10-05 04:58:25,961 - root - INFO - step: 10390 loss: 2.4472 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 04:58:25,961 - root - INFO - lr: 4.3295e-05 gnorm: 1.19 [ 6:24:16<18:15:08] +[titan] 2025-10-05 04:58:36,832 - root - INFO - step: 10395 loss: 2.4116 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:58:36,832 - root - INFO - lr: 4.3289e-05 gnorm: 1.19 [ 6:24:27<18:14:56] +[titan] 2025-10-05 04:58:45,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:58:47,732 - root - INFO - step: 10400 loss: 2.3889 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:58:47,732 - root - INFO - lr: 4.3282e-05 gnorm: 1.15 [ 6:24:38<18:14:45] +[titan] 2025-10-05 04:58:58,620 - root - INFO - step: 10405 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1661 +[titan] 2025-10-05 04:58:58,620 - root - INFO - lr: 4.3276e-05 gnorm: 1.15 [ 6:24:49<18:14:33] +[titan] 2025-10-05 04:59:09,537 - root - INFO - step: 10410 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0667 +[titan] 2025-10-05 04:59:09,538 - root - INFO - lr: 4.3270e-05 gnorm: 1.09 [ 6:25:00<18:14:21] +[titan] 2025-10-05 04:59:20,430 - root - INFO - step: 10415 loss: 2.4412 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1531 +[titan] 2025-10-05 04:59:20,430 - root - INFO - lr: 4.3263e-05 gnorm: 1.11 [ 6:25:11<18:14:10] +[titan] 2025-10-05 04:59:31,331 - root - INFO - step: 10420 loss: 2.4559 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1675 +[titan] 2025-10-05 04:59:31,331 - root - INFO - lr: 4.3257e-05 gnorm: 1.18 [ 6:25:22<18:13:58] +[titan] 2025-10-05 04:59:42,198 - root - INFO - step: 10425 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0782 +[titan] 2025-10-05 04:59:42,198 - root - INFO - lr: 4.3250e-05 gnorm: 1.15 [ 6:25:33<18:13:46] +[titan] 2025-10-05 04:59:53,072 - root - INFO - step: 10430 loss: 2.3763 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0987 +[titan] 2025-10-05 04:59:53,072 - root - INFO - lr: 4.3244e-05 gnorm: 1.14 [ 6:25:43<18:13:35] +[titan] 2025-10-05 05:00:03,938 - root - INFO - step: 10435 loss: 2.4170 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2835 global_avg_mtp_loss: 2.1335 +[titan] 2025-10-05 05:00:03,939 - root - INFO - lr: 4.3238e-05 gnorm: 1.15 [ 6:25:54<18:13:23] +[titan] 2025-10-05 05:00:14,820 - root - INFO - step: 10440 loss: 2.4296 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 05:00:14,820 - root - INFO - lr: 4.3231e-05 gnorm: 1.12 [ 6:26:05<18:13:11] +[titan] 2025-10-05 05:00:25,686 - root - INFO - step: 10445 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0958 +[titan] 2025-10-05 05:00:25,686 - root - INFO - lr: 4.3225e-05 gnorm: 1.15 [ 6:26:16<18:12:59] +[titan] 2025-10-05 05:00:34,395 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:00:36,573 - root - INFO - step: 10450 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:00:36,574 - root - INFO - lr: 4.3219e-05 gnorm: 1.12 [ 6:26:27<18:12:48] +[titan] 2025-10-05 05:00:47,453 - root - INFO - step: 10455 loss: 2.2956 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 05:00:47,453 - root - INFO - lr: 4.3212e-05 gnorm: 1.12 [ 6:26:38<18:12:36] +[titan] 2025-10-05 05:00:58,326 - root - INFO - step: 10460 loss: 2.4231 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1381 +[titan] 2025-10-05 05:00:58,326 - root - INFO - lr: 4.3206e-05 gnorm: 1.13 [ 6:26:49<18:12:24] +[titan] 2025-10-05 05:01:09,212 - root - INFO - step: 10465 loss: 2.3984 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1166 +[titan] 2025-10-05 05:01:09,212 - root - INFO - lr: 4.3199e-05 gnorm: 1.16 [ 6:27:00<18:12:13] +[titan] 2025-10-05 05:01:20,082 - root - INFO - step: 10470 loss: 2.3857 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1068 +[titan] 2025-10-05 05:01:20,082 - root - INFO - lr: 4.3193e-05 gnorm: 1.17 [ 6:27:10<18:12:01] +[titan] 2025-10-05 05:01:31,025 - root - INFO - step: 10475 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0845 +[titan] 2025-10-05 05:01:31,025 - root - INFO - lr: 4.3187e-05 gnorm: 1.18 [ 6:27:21<18:11:49] +[titan] 2025-10-05 05:01:41,900 - root - INFO - step: 10480 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1605 +[titan] 2025-10-05 05:01:41,900 - root - INFO - lr: 4.3180e-05 gnorm: 1.14 [ 6:27:32<18:11:38] +[titan] 2025-10-05 05:01:52,794 - root - INFO - step: 10485 loss: 2.3469 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0715 +[titan] 2025-10-05 05:01:52,794 - root - INFO - lr: 4.3174e-05 gnorm: 1.11 [ 6:27:43<18:11:26] +[titan] 2025-10-05 05:02:03,640 - root - INFO - step: 10490 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:02:03,641 - root - INFO - lr: 4.3167e-05 gnorm: 1.12 [ 6:27:54<18:11:14] +[titan] 2025-10-05 05:02:14,499 - root - INFO - step: 10495 loss: 2.4247 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2861 global_avg_mtp_loss: 2.1386 +[titan] 2025-10-05 05:02:14,499 - root - INFO - lr: 4.3161e-05 gnorm: 1.11 [ 6:28:05<18:11:03] +[titan] 2025-10-05 05:02:23,176 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:02:25,357 - root - INFO - step: 10500 loss: 2.3813 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1019 +[titan] 2025-10-05 05:02:25,357 - root - INFO - lr: 4.3155e-05 gnorm: 1.11 [ 6:28:16<18:10:51] +[titan] 2025-10-05 05:02:36,309 - root - INFO - step: 10505 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1463 +[titan] 2025-10-05 05:02:36,309 - root - INFO - lr: 4.3148e-05 gnorm: 1.31 [ 6:28:27<18:10:39] +[titan] 2025-10-05 05:02:47,169 - root - INFO - step: 10510 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0752 +[titan] 2025-10-05 05:02:47,169 - root - INFO - lr: 4.3142e-05 gnorm: 1.12 [ 6:28:38<18:10:28] +[titan] 2025-10-05 05:02:58,035 - root - INFO - step: 10515 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1262 +[titan] 2025-10-05 05:02:58,035 - root - INFO - lr: 4.3135e-05 gnorm: 1.20 [ 6:28:48<18:10:16] +[titan] 2025-10-05 05:03:08,894 - root - INFO - step: 10520 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0523 +[titan] 2025-10-05 05:03:08,894 - root - INFO - lr: 4.3129e-05 gnorm: 1.10 [ 6:28:59<18:10:04] +[titan] 2025-10-05 05:03:19,768 - root - INFO - step: 10525 loss: 2.4870 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 05:03:19,768 - root - INFO - lr: 4.3122e-05 gnorm: 1.18 [ 6:29:10<18:09:52] +[titan] 2025-10-05 05:03:30,631 - root - INFO - step: 10530 loss: 2.3951 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:03:30,631 - root - INFO - lr: 4.3116e-05 gnorm: 1.13 [ 6:29:21<18:09:41] +[titan] 2025-10-05 05:03:41,571 - root - INFO - step: 10535 loss: 2.3677 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:03:41,571 - root - INFO - lr: 4.3110e-05 gnorm: 1.19 [ 6:29:32<18:09:29] +[titan] 2025-10-05 05:03:52,432 - root - INFO - step: 10540 loss: 2.4252 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1411 +[titan] 2025-10-05 05:03:52,432 - root - INFO - lr: 4.3103e-05 gnorm: 1.19 [ 6:29:43<18:09:17] +[titan] 2025-10-05 05:04:03,276 - root - INFO - step: 10545 loss: 2.4280 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1435 +[titan] 2025-10-05 05:04:03,277 - root - INFO - lr: 4.3097e-05 gnorm: 1.16 [ 6:29:54<18:09:06] +[titan] 2025-10-05 05:04:11,963 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:04:14,149 - root - INFO - step: 10550 loss: 2.2936 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0258 +[titan] 2025-10-05 05:04:14,149 - root - INFO - lr: 4.3090e-05 gnorm: 1.14 [ 6:30:04<18:08:54] +[titan] 2025-10-05 05:04:25,007 - root - INFO - step: 10555 loss: 2.3687 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0910 +[titan] 2025-10-05 05:04:25,007 - root - INFO - lr: 4.3084e-05 gnorm: 1.18 [ 6:30:15<18:08:42] +[titan] 2025-10-05 05:04:35,912 - root - INFO - step: 10560 loss: 2.4093 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1275 +[titan] 2025-10-05 05:04:35,912 - root - INFO - lr: 4.3077e-05 gnorm: 1.23 [ 6:30:26<18:08:30] +[titan] 2025-10-05 05:04:46,752 - root - INFO - step: 10565 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1548 +[titan] 2025-10-05 05:04:46,752 - root - INFO - lr: 4.3071e-05 gnorm: 1.10 [ 6:30:37<18:08:19] +[titan] 2025-10-05 05:04:57,630 - root - INFO - step: 10570 loss: 2.3849 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1052 +[titan] 2025-10-05 05:04:57,630 - root - INFO - lr: 4.3065e-05 gnorm: 1.13 [ 6:30:48<18:08:07] +[titan] 2025-10-05 05:05:08,469 - root - INFO - step: 10575 loss: 2.4749 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1842 +[titan] 2025-10-05 05:05:08,469 - root - INFO - lr: 4.3058e-05 gnorm: 1.19 [ 6:30:59<18:07:55] +[titan] 2025-10-05 05:05:19,334 - root - INFO - step: 10580 loss: 2.3851 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1046 +[titan] 2025-10-05 05:05:19,334 - root - INFO - lr: 4.3052e-05 gnorm: 1.12 [ 6:31:10<18:07:43] +[titan] 2025-10-05 05:05:30,220 - root - INFO - step: 10585 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0900 +[titan] 2025-10-05 05:05:30,220 - root - INFO - lr: 4.3045e-05 gnorm: 1.17 [ 6:31:21<18:07:32] +[titan] 2025-10-05 05:05:41,134 - root - INFO - step: 10590 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0866 +[titan] 2025-10-05 05:05:41,134 - root - INFO - lr: 4.3039e-05 gnorm: 1.10 [ 6:31:31<18:07:20] +[titan] 2025-10-05 05:05:51,981 - root - INFO - step: 10595 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 05:05:51,981 - root - INFO - lr: 4.3032e-05 gnorm: 1.13 [ 6:31:42<18:07:08] +[titan] 2025-10-05 05:06:00,679 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:06:02,853 - root - INFO - step: 10600 loss: 2.4272 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1436 +[titan] 2025-10-05 05:06:02,853 - root - INFO - lr: 4.3026e-05 gnorm: 1.13 [ 6:31:53<18:06:57] +[titan] 2025-10-05 05:06:13,702 - root - INFO - step: 10605 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1154 +[titan] 2025-10-05 05:06:13,702 - root - INFO - lr: 4.3019e-05 gnorm: 1.18 [ 6:32:04<18:06:45] +[titan] 2025-10-05 05:06:24,546 - root - INFO - step: 10610 loss: 2.4439 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 05:06:24,547 - root - INFO - lr: 4.3013e-05 gnorm: 1.17 [ 6:32:15<18:06:33] +[titan] 2025-10-05 05:06:35,421 - root - INFO - step: 10615 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0903 +[titan] 2025-10-05 05:06:35,421 - root - INFO - lr: 4.3006e-05 gnorm: 1.10 [ 6:32:26<18:06:21] +[titan] 2025-10-05 05:06:46,307 - root - INFO - step: 10620 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 05:06:46,307 - root - INFO - lr: 4.3000e-05 gnorm: 1.15 [ 6:32:37<18:06:10] +[titan] 2025-10-05 05:06:57,167 - root - INFO - step: 10625 loss: 2.3874 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1077 +[titan] 2025-10-05 05:06:57,168 - root - INFO - lr: 4.2993e-05 gnorm: 1.16 [ 6:32:48<18:05:58] +[titan] 2025-10-05 05:07:08,027 - root - INFO - step: 10630 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0719 +[titan] 2025-10-05 05:07:08,027 - root - INFO - lr: 4.2987e-05 gnorm: 1.17 [ 6:32:58<18:05:46] +[titan] 2025-10-05 05:07:18,912 - root - INFO - step: 10635 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0503 +[titan] 2025-10-05 05:07:18,912 - root - INFO - lr: 4.2981e-05 gnorm: 1.11 [ 6:33:09<18:05:35] +[titan] 2025-10-05 05:07:29,770 - root - INFO - step: 10640 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0909 +[titan] 2025-10-05 05:07:29,770 - root - INFO - lr: 4.2974e-05 gnorm: 1.11 [ 6:33:20<18:05:23] +[titan] 2025-10-05 05:07:40,642 - root - INFO - step: 10645 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1251 +[titan] 2025-10-05 05:07:40,642 - root - INFO - lr: 4.2968e-05 gnorm: 1.15 [ 6:33:31<18:05:11] +[titan] 2025-10-05 05:07:49,320 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:07:51,513 - root - INFO - step: 10650 loss: 2.3800 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 05:07:51,513 - root - INFO - lr: 4.2961e-05 gnorm: 1.13 [ 6:33:42<18:05:00] +[titan] 2025-10-05 05:08:02,386 - root - INFO - step: 10655 loss: 2.2876 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0198 +[titan] 2025-10-05 05:08:02,387 - root - INFO - lr: 4.2955e-05 gnorm: 1.11 [ 6:33:53<18:04:48] +[titan] 2025-10-05 05:08:13,251 - root - INFO - step: 10660 loss: 2.3831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 05:08:13,252 - root - INFO - lr: 4.2948e-05 gnorm: 1.14 [ 6:34:04<18:04:36] +[titan] 2025-10-05 05:08:24,145 - root - INFO - step: 10665 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 05:08:24,145 - root - INFO - lr: 4.2942e-05 gnorm: 1.11 [ 6:34:14<18:04:25] +[titan] 2025-10-05 05:08:34,996 - root - INFO - step: 10670 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 05:08:34,996 - root - INFO - lr: 4.2935e-05 gnorm: 1.10 [ 6:34:25<18:04:13] +[titan] 2025-10-05 05:08:45,876 - root - INFO - step: 10675 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1231 +[titan] 2025-10-05 05:08:45,876 - root - INFO - lr: 4.2929e-05 gnorm: 1.11 [ 6:34:36<18:04:01] +[titan] 2025-10-05 05:08:56,738 - root - INFO - step: 10680 loss: 2.4221 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1374 +[titan] 2025-10-05 05:08:56,738 - root - INFO - lr: 4.2922e-05 gnorm: 1.12 [ 6:34:47<18:03:49] +[titan] 2025-10-05 05:09:07,575 - root - INFO - step: 10685 loss: 2.4893 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1990 +[titan] 2025-10-05 05:09:07,575 - root - INFO - lr: 4.2916e-05 gnorm: 1.14 [ 6:34:58<18:03:38] +[titan] 2025-10-05 05:09:18,438 - root - INFO - step: 10690 loss: 2.3907 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1112 +[titan] 2025-10-05 05:09:18,438 - root - INFO - lr: 4.2909e-05 gnorm: 1.15 [ 6:35:09<18:03:26] +[titan] 2025-10-05 05:09:29,320 - root - INFO - step: 10695 loss: 2.3485 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0733 +[titan] 2025-10-05 05:09:29,320 - root - INFO - lr: 4.2903e-05 gnorm: 1.12 [ 6:35:20<18:03:14] +[titan] 2025-10-05 05:09:38,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:09:40,188 - root - INFO - step: 10700 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0915 +[titan] 2025-10-05 05:09:40,188 - root - INFO - lr: 4.2896e-05 gnorm: 1.13 [ 6:35:31<18:03:03] +[titan] 2025-10-05 05:09:51,053 - root - INFO - step: 10705 loss: 2.4598 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1721 +[titan] 2025-10-05 05:09:51,054 - root - INFO - lr: 4.2890e-05 gnorm: 1.14 [ 6:35:41<18:02:51] +[titan] 2025-10-05 05:10:01,930 - root - INFO - step: 10710 loss: 2.4459 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 05:10:01,930 - root - INFO - lr: 4.2883e-05 gnorm: 1.13 [ 6:35:52<18:02:39] +[titan] 2025-10-05 05:10:12,779 - root - INFO - step: 10715 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:10:12,779 - root - INFO - lr: 4.2877e-05 gnorm: 1.10 [ 6:36:03<18:02:27] +[titan] 2025-10-05 05:10:23,641 - root - INFO - step: 10720 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0671 +[titan] 2025-10-05 05:10:23,641 - root - INFO - lr: 4.2870e-05 gnorm: 1.07 [ 6:36:14<18:02:16] +[titan] 2025-10-05 05:10:34,518 - root - INFO - step: 10725 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 05:10:34,518 - root - INFO - lr: 4.2864e-05 gnorm: 1.07 [ 6:36:25<18:02:04] +[titan] 2025-10-05 05:10:45,426 - root - INFO - step: 10730 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0982 +[titan] 2025-10-05 05:10:45,426 - root - INFO - lr: 4.2857e-05 gnorm: 1.17 [ 6:36:36<18:01:52] +[titan] 2025-10-05 05:10:56,306 - root - INFO - step: 10735 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 05:10:56,306 - root - INFO - lr: 4.2851e-05 gnorm: 1.12 [ 6:36:47<18:01:41] +[titan] 2025-10-05 05:11:07,161 - root - INFO - step: 10740 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:11:07,161 - root - INFO - lr: 4.2844e-05 gnorm: 1.17 [ 6:36:57<18:01:29] +[titan] 2025-10-05 05:11:18,031 - root - INFO - step: 10745 loss: 2.3429 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0692 +[titan] 2025-10-05 05:11:18,031 - root - INFO - lr: 4.2837e-05 gnorm: 1.13 [ 6:37:08<18:01:17] +[titan] 2025-10-05 05:11:26,767 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:11:28,948 - root - INFO - step: 10750 loss: 2.2983 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 05:11:28,948 - root - INFO - lr: 4.2831e-05 gnorm: 1.14 [ 6:37:19<18:01:06] +[titan] 2025-10-05 05:11:33,455 - root - INFO - Dumping profiler traces at step 10752 +[titan] 2025-10-05 05:11:33,493 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:11:40,090 - root - INFO - step: 10755 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 29,410 tflops: 408.02 mfu: 41.26% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 05:11:40,090 - root - INFO - lr: 4.2824e-05 gnorm: 1.14 [ 6:37:30<18:00:55] +[titan] 2025-10-05 05:11:50,993 - root - INFO - step: 10760 loss: 2.3455 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0704 +[titan] 2025-10-05 05:11:50,993 - root - INFO - lr: 4.2818e-05 gnorm: 1.14 [ 6:37:41<18:00:43] +[titan] 2025-10-05 05:12:01,856 - root - INFO - step: 10765 loss: 2.3069 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0370 +[titan] 2025-10-05 05:12:01,857 - root - INFO - lr: 4.2811e-05 gnorm: 1.12 [ 6:37:52<18:00:32] +[titan] 2025-10-05 05:12:12,697 - root - INFO - step: 10770 loss: 2.3339 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 05:12:12,697 - root - INFO - lr: 4.2805e-05 gnorm: 1.09 [ 6:38:03<18:00:20] +[titan] 2025-10-05 05:12:23,573 - root - INFO - step: 10775 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1096 +[titan] 2025-10-05 05:12:23,573 - root - INFO - lr: 4.2798e-05 gnorm: 1.09 [ 6:38:14<18:00:08] +[titan] 2025-10-05 05:12:34,428 - root - INFO - step: 10780 loss: 2.2969 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0279 +[titan] 2025-10-05 05:12:34,428 - root - INFO - lr: 4.2792e-05 gnorm: 1.09 [ 6:38:25<17:59:56] +[titan] 2025-10-05 05:12:45,414 - root - INFO - step: 10785 loss: 2.3471 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 05:12:45,414 - root - INFO - lr: 4.2785e-05 gnorm: 1.13 [ 6:38:36<17:59:45] +[titan] 2025-10-05 05:12:56,296 - root - INFO - step: 10790 loss: 2.3752 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0968 +[titan] 2025-10-05 05:12:56,297 - root - INFO - lr: 4.2779e-05 gnorm: 1.12 [ 6:38:47<17:59:33] +[titan] 2025-10-05 05:13:07,167 - root - INFO - step: 10795 loss: 2.3683 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:13:07,167 - root - INFO - lr: 4.2772e-05 gnorm: 1.15 [ 6:38:57<17:59:22] +[titan] 2025-10-05 05:13:15,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:13:18,033 - root - INFO - step: 10800 loss: 2.3892 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1094 +[titan] 2025-10-05 05:13:18,033 - root - INFO - lr: 4.2765e-05 gnorm: 1.12 [ 6:39:08<17:59:10] +[titan] 2025-10-05 05:13:28,909 - root - INFO - step: 10805 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0879 +[titan] 2025-10-05 05:13:28,909 - root - INFO - lr: 4.2759e-05 gnorm: 1.13 [ 6:39:19<17:58:58] +[titan] 2025-10-05 05:13:39,766 - root - INFO - step: 10810 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 05:13:39,766 - root - INFO - lr: 4.2752e-05 gnorm: 1.11 [ 6:39:30<17:58:47] +[titan] 2025-10-05 05:13:50,697 - root - INFO - step: 10815 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1334 +[titan] 2025-10-05 05:13:50,697 - root - INFO - lr: 4.2746e-05 gnorm: 1.13 [ 6:39:41<17:58:35] +[titan] 2025-10-05 05:14:01,553 - root - INFO - step: 10820 loss: 2.3463 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:14:01,554 - root - INFO - lr: 4.2739e-05 gnorm: 1.09 [ 6:39:52<17:58:24] +[titan] 2025-10-05 05:14:12,442 - root - INFO - step: 10825 loss: 2.3705 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0925 +[titan] 2025-10-05 05:14:12,442 - root - INFO - lr: 4.2733e-05 gnorm: 1.17 [ 6:40:03<17:58:12] +[titan] 2025-10-05 05:14:23,285 - root - INFO - step: 10830 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:14:23,285 - root - INFO - lr: 4.2726e-05 gnorm: 1.13 [ 6:40:14<17:58:00] +[titan] 2025-10-05 05:14:34,165 - root - INFO - step: 10835 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 05:14:34,166 - root - INFO - lr: 4.2720e-05 gnorm: 1.16 [ 6:40:24<17:57:49] +[titan] 2025-10-05 05:14:45,051 - root - INFO - step: 10840 loss: 2.3728 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0952 +[titan] 2025-10-05 05:14:45,051 - root - INFO - lr: 4.2713e-05 gnorm: 1.13 [ 6:40:35<17:57:37] +[titan] 2025-10-05 05:14:55,878 - root - INFO - step: 10845 loss: 2.4128 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 05:14:55,878 - root - INFO - lr: 4.2706e-05 gnorm: 1.10 [ 6:40:46<17:57:25] +[titan] 2025-10-05 05:15:04,524 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:15:06,705 - root - INFO - step: 10850 loss: 2.3718 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:15:06,705 - root - INFO - lr: 4.2700e-05 gnorm: 1.12 [ 6:40:57<17:57:13] +[titan] 2025-10-05 05:15:17,575 - root - INFO - step: 10855 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0802 +[titan] 2025-10-05 05:15:17,575 - root - INFO - lr: 4.2693e-05 gnorm: 1.14 [ 6:41:08<17:57:02] +[titan] 2025-10-05 05:15:28,456 - root - INFO - step: 10860 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0820 +[titan] 2025-10-05 05:15:28,456 - root - INFO - lr: 4.2687e-05 gnorm: 1.13 [ 6:41:19<17:56:50] +[titan] 2025-10-05 05:15:39,313 - root - INFO - step: 10865 loss: 2.4256 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 05:15:39,313 - root - INFO - lr: 4.2680e-05 gnorm: 1.10 [ 6:41:30<17:56:38] +[titan] 2025-10-05 05:15:50,205 - root - INFO - step: 10870 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 05:15:50,205 - root - INFO - lr: 4.2673e-05 gnorm: 1.13 [ 6:41:41<17:56:27] +[titan] 2025-10-05 05:16:01,082 - root - INFO - step: 10875 loss: 2.3634 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:16:01,082 - root - INFO - lr: 4.2667e-05 gnorm: 1.15 [ 6:41:51<17:56:15] +[titan] 2025-10-05 05:16:11,946 - root - INFO - step: 10880 loss: 2.3075 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 05:16:11,946 - root - INFO - lr: 4.2660e-05 gnorm: 1.14 [ 6:42:02<17:56:03] +[titan] 2025-10-05 05:16:22,841 - root - INFO - step: 10885 loss: 2.4065 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1258 +[titan] 2025-10-05 05:16:22,841 - root - INFO - lr: 4.2654e-05 gnorm: 1.21 [ 6:42:13<17:55:52] +[titan] 2025-10-05 05:16:33,734 - root - INFO - step: 10890 loss: 2.3635 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0864 +[titan] 2025-10-05 05:16:33,734 - root - INFO - lr: 4.2647e-05 gnorm: 1.10 [ 6:42:24<17:55:40] +[titan] 2025-10-05 05:16:44,609 - root - INFO - step: 10895 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1223 +[titan] 2025-10-05 05:16:44,609 - root - INFO - lr: 4.2640e-05 gnorm: 1.11 [ 6:42:35<17:55:29] +[titan] 2025-10-05 05:16:53,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:16:55,473 - root - INFO - step: 10900 loss: 2.3494 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0759 +[titan] 2025-10-05 05:16:55,473 - root - INFO - lr: 4.2634e-05 gnorm: 1.15 [ 6:42:46<17:55:17] +[titan] 2025-10-05 05:17:06,345 - root - INFO - step: 10905 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 05:17:06,345 - root - INFO - lr: 4.2627e-05 gnorm: 1.13 [ 6:42:57<17:55:05] +[titan] 2025-10-05 05:17:17,231 - root - INFO - step: 10910 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0659 +[titan] 2025-10-05 05:17:17,231 - root - INFO - lr: 4.2621e-05 gnorm: 1.17 [ 6:43:08<17:54:54] +[titan] 2025-10-05 05:17:28,109 - root - INFO - step: 10915 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0894 +[titan] 2025-10-05 05:17:28,110 - root - INFO - lr: 4.2614e-05 gnorm: 1.19 [ 6:43:18<17:54:42] +[titan] 2025-10-05 05:17:39,014 - root - INFO - step: 10920 loss: 2.3277 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:17:39,014 - root - INFO - lr: 4.2607e-05 gnorm: 1.14 [ 6:43:29<17:54:30] +[titan] 2025-10-05 05:17:49,944 - root - INFO - step: 10925 loss: 2.3202 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0487 +[titan] 2025-10-05 05:17:49,944 - root - INFO - lr: 4.2601e-05 gnorm: 1.12 [ 6:43:40<17:54:19] +[titan] 2025-10-05 05:18:00,806 - root - INFO - step: 10930 loss: 2.3343 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0611 +[titan] 2025-10-05 05:18:00,807 - root - INFO - lr: 4.2594e-05 gnorm: 1.12 [ 6:43:51<17:54:07] +[titan] 2025-10-05 05:18:11,668 - root - INFO - step: 10935 loss: 2.4012 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1192 +[titan] 2025-10-05 05:18:11,669 - root - INFO - lr: 4.2588e-05 gnorm: 1.13 [ 6:44:02<17:53:56] +[titan] 2025-10-05 05:18:22,533 - root - INFO - step: 10940 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:18:22,533 - root - INFO - lr: 4.2581e-05 gnorm: 1.10 [ 6:44:13<17:53:44] +[titan] 2025-10-05 05:18:33,393 - root - INFO - step: 10945 loss: 2.3284 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0574 +[titan] 2025-10-05 05:18:33,393 - root - INFO - lr: 4.2574e-05 gnorm: 1.16 [ 6:44:24<17:53:32] +[titan] 2025-10-05 05:18:42,068 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:18:44,246 - root - INFO - step: 10950 loss: 2.3482 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0732 +[titan] 2025-10-05 05:18:44,246 - root - INFO - lr: 4.2568e-05 gnorm: 1.17 [ 6:44:35<17:53:20] +[titan] 2025-10-05 05:18:55,149 - root - INFO - step: 10955 loss: 2.4275 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 05:18:55,149 - root - INFO - lr: 4.2561e-05 gnorm: 1.19 [ 6:44:45<17:53:09] +[titan] 2025-10-05 05:19:06,006 - root - INFO - step: 10960 loss: 2.3559 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 05:19:06,006 - root - INFO - lr: 4.2554e-05 gnorm: 1.17 [ 6:44:56<17:52:57] +[titan] 2025-10-05 05:19:16,844 - root - INFO - step: 10965 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0690 +[titan] 2025-10-05 05:19:16,844 - root - INFO - lr: 4.2548e-05 gnorm: 1.13 [ 6:45:07<17:52:45] +[titan] 2025-10-05 05:19:27,707 - root - INFO - step: 10970 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0665 +[titan] 2025-10-05 05:19:27,707 - root - INFO - lr: 4.2541e-05 gnorm: 1.11 [ 6:45:18<17:52:34] +[titan] 2025-10-05 05:19:38,565 - root - INFO - step: 10975 loss: 2.4017 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1202 +[titan] 2025-10-05 05:19:38,565 - root - INFO - lr: 4.2535e-05 gnorm: 1.13 [ 6:45:29<17:52:22] +[titan] 2025-10-05 05:19:49,430 - root - INFO - step: 10980 loss: 2.3707 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0929 +[titan] 2025-10-05 05:19:49,430 - root - INFO - lr: 4.2528e-05 gnorm: 1.14 [ 6:45:40<17:52:10] +[titan] 2025-10-05 05:20:00,329 - root - INFO - step: 10985 loss: 2.3910 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 05:20:00,329 - root - INFO - lr: 4.2521e-05 gnorm: 1.11 [ 6:45:51<17:51:59] +[titan] 2025-10-05 05:20:11,199 - root - INFO - step: 10990 loss: 2.2943 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 05:20:11,199 - root - INFO - lr: 4.2515e-05 gnorm: 1.15 [ 6:46:02<17:51:47] +[titan] 2025-10-05 05:20:22,060 - root - INFO - step: 10995 loss: 2.4220 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1390 +[titan] 2025-10-05 05:20:22,060 - root - INFO - lr: 4.2508e-05 gnorm: 1.17 [ 6:46:12<17:51:36] +[titan] 2025-10-05 05:20:30,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:20:32,950 - root - INFO - step: 11000 loss: 2.4329 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 05:20:32,950 - root - INFO - lr: 4.2501e-05 gnorm: 1.13 [ 6:46:23<17:51:24] +[titan] 2025-10-05 05:20:43,793 - root - INFO - step: 11005 loss: 2.3674 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0912 +[titan] 2025-10-05 05:20:43,793 - root - INFO - lr: 4.2495e-05 gnorm: 1.13 [ 6:46:34<17:51:12] +[titan] 2025-10-05 05:20:54,676 - root - INFO - step: 11010 loss: 2.3859 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.1074 +[titan] 2025-10-05 05:20:54,677 - root - INFO - lr: 4.2488e-05 gnorm: 1.23 [ 6:46:45<17:51:01] +[titan] 2025-10-05 05:21:05,537 - root - INFO - step: 11015 loss: 2.4219 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 05:21:05,537 - root - INFO - lr: 4.2481e-05 gnorm: 1.14 [ 6:46:56<17:50:49] +[titan] 2025-10-05 05:21:16,444 - root - INFO - step: 11020 loss: 2.3693 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0921 +[titan] 2025-10-05 05:21:16,444 - root - INFO - lr: 4.2475e-05 gnorm: 1.15 [ 6:47:07<17:50:37] +[titan] 2025-10-05 05:21:27,322 - root - INFO - step: 11025 loss: 2.4120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1303 +[titan] 2025-10-05 05:21:27,323 - root - INFO - lr: 4.2468e-05 gnorm: 1.14 [ 6:47:18<17:50:26] +[titan] 2025-10-05 05:21:38,201 - root - INFO - step: 11030 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2721 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:21:38,202 - root - INFO - lr: 4.2461e-05 gnorm: 1.11 [ 6:47:29<17:50:14] +[titan] 2025-10-05 05:21:49,263 - root - INFO - step: 11035 loss: 2.3662 memory: 118.84GiB(85.28%) tps: 29,623 tflops: 410.98 mfu: 41.55% global_avg_ntp_loss: 0.2773 global_avg_mtp_loss: 2.0889 +[titan] 2025-10-05 05:21:49,264 - root - INFO - lr: 4.2455e-05 gnorm: 1.06 [ 6:47:40<17:50:03] +[titan] 2025-10-05 05:22:00,112 - root - INFO - step: 11040 loss: 2.3713 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0938 +[titan] 2025-10-05 05:22:00,112 - root - INFO - lr: 4.2448e-05 gnorm: 1.16 [ 6:47:50<17:49:51] +[titan] 2025-10-05 05:22:10,978 - root - INFO - step: 11045 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0794 +[titan] 2025-10-05 05:22:10,978 - root - INFO - lr: 4.2441e-05 gnorm: 1.12 [ 6:48:01<17:49:40] +[titan] 2025-10-05 05:22:19,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:22:21,866 - root - INFO - step: 11050 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:22:21,866 - root - INFO - lr: 4.2435e-05 gnorm: 1.18 [ 6:48:12<17:49:28] +[titan] 2025-10-05 05:22:32,725 - root - INFO - step: 11055 loss: 2.4619 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1734 +[titan] 2025-10-05 05:22:32,725 - root - INFO - lr: 4.2428e-05 gnorm: 1.17 [ 6:48:23<17:49:16] +[titan] 2025-10-05 05:22:43,603 - root - INFO - step: 11060 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 05:22:43,604 - root - INFO - lr: 4.2421e-05 gnorm: 1.18 [ 6:48:34<17:49:05] +[titan] 2025-10-05 05:22:54,557 - root - INFO - step: 11065 loss: 2.3059 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0363 +[titan] 2025-10-05 05:22:54,558 - root - INFO - lr: 4.2415e-05 gnorm: 1.11 [ 6:48:45<17:48:53] +[titan] 2025-10-05 05:23:05,447 - root - INFO - step: 11070 loss: 2.3833 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1048 +[titan] 2025-10-05 05:23:05,447 - root - INFO - lr: 4.2408e-05 gnorm: 1.15 [ 6:48:56<17:48:42] +[titan] 2025-10-05 05:23:16,319 - root - INFO - step: 11075 loss: 2.3472 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:23:16,319 - root - INFO - lr: 4.2401e-05 gnorm: 1.12 [ 6:49:07<17:48:30] +[titan] 2025-10-05 05:23:27,231 - root - INFO - step: 11080 loss: 2.3159 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0451 +[titan] 2025-10-05 05:23:27,231 - root - INFO - lr: 4.2395e-05 gnorm: 1.15 [ 6:49:18<17:48:19] +[titan] 2025-10-05 05:23:38,120 - root - INFO - step: 11085 loss: 2.3918 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 05:23:38,120 - root - INFO - lr: 4.2388e-05 gnorm: 1.10 [ 6:49:28<17:48:07] +[titan] 2025-10-05 05:23:48,999 - root - INFO - step: 11090 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:23:48,999 - root - INFO - lr: 4.2381e-05 gnorm: 1.12 [ 6:49:39<17:47:55] +[titan] 2025-10-05 05:23:59,936 - root - INFO - step: 11095 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:23:59,936 - root - INFO - lr: 4.2375e-05 gnorm: 1.15 [ 6:49:50<17:47:44] +[titan] 2025-10-05 05:24:08,638 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:24:10,828 - root - INFO - step: 11100 loss: 2.3700 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:24:10,828 - root - INFO - lr: 4.2368e-05 gnorm: 1.16 [ 6:50:01<17:47:32] +[titan] 2025-10-05 05:24:21,716 - root - INFO - step: 11105 loss: 2.3080 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 05:24:21,716 - root - INFO - lr: 4.2361e-05 gnorm: 1.11 [ 6:50:12<17:47:21] +[titan] 2025-10-05 05:24:32,601 - root - INFO - step: 11110 loss: 2.3389 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0656 +[titan] 2025-10-05 05:24:32,602 - root - INFO - lr: 4.2354e-05 gnorm: 1.18 [ 6:50:23<17:47:09] +[titan] 2025-10-05 05:24:43,497 - root - INFO - step: 11115 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:24:43,497 - root - INFO - lr: 4.2348e-05 gnorm: 1.16 [ 6:50:34<17:46:58] +[titan] 2025-10-05 05:24:54,381 - root - INFO - step: 11120 loss: 2.3434 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0676 +[titan] 2025-10-05 05:24:54,382 - root - INFO - lr: 4.2341e-05 gnorm: 1.17 [ 6:50:45<17:46:46] +[titan] 2025-10-05 05:25:05,236 - root - INFO - step: 11125 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:25:05,236 - root - INFO - lr: 4.2334e-05 gnorm: 1.14 [ 6:50:56<17:46:34] +[titan] 2025-10-05 05:25:16,090 - root - INFO - step: 11130 loss: 2.3586 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0834 +[titan] 2025-10-05 05:25:16,091 - root - INFO - lr: 4.2328e-05 gnorm: 1.10 [ 6:51:06<17:46:23] +[titan] 2025-10-05 05:25:26,938 - root - INFO - step: 11135 loss: 2.3923 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 05:25:26,939 - root - INFO - lr: 4.2321e-05 gnorm: 1.15 [ 6:51:17<17:46:11] +[titan] 2025-10-05 05:25:37,783 - root - INFO - step: 11140 loss: 2.3864 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 05:25:37,783 - root - INFO - lr: 4.2314e-05 gnorm: 1.15 [ 6:51:28<17:45:59] +[titan] 2025-10-05 05:25:48,642 - root - INFO - step: 11145 loss: 2.3257 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0532 +[titan] 2025-10-05 05:25:48,642 - root - INFO - lr: 4.2307e-05 gnorm: 1.12 [ 6:51:39<17:45:48] +[titan] 2025-10-05 05:25:57,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:25:59,544 - root - INFO - step: 11150 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0565 +[titan] 2025-10-05 05:25:59,544 - root - INFO - lr: 4.2301e-05 gnorm: 1.12 [ 6:51:50<17:45:36] +[titan] 2025-10-05 05:26:10,397 - root - INFO - step: 11155 loss: 2.3187 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0478 +[titan] 2025-10-05 05:26:10,397 - root - INFO - lr: 4.2294e-05 gnorm: 1.08 [ 6:52:01<17:45:24] +[titan] 2025-10-05 05:26:21,273 - root - INFO - step: 11160 loss: 2.3623 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0869 +[titan] 2025-10-05 05:26:21,273 - root - INFO - lr: 4.2287e-05 gnorm: 1.14 [ 6:52:12<17:45:13] +[titan] 2025-10-05 05:26:32,142 - root - INFO - step: 11165 loss: 2.3541 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:26:32,142 - root - INFO - lr: 4.2281e-05 gnorm: 1.13 [ 6:52:22<17:45:01] +[titan] 2025-10-05 05:26:43,035 - root - INFO - step: 11170 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 05:26:43,035 - root - INFO - lr: 4.2274e-05 gnorm: 1.13 [ 6:52:33<17:44:50] +[titan] 2025-10-05 05:26:53,989 - root - INFO - step: 11175 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.04 mfu: 41.97% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 05:26:53,989 - root - INFO - lr: 4.2267e-05 gnorm: 1.12 [ 6:52:44<17:44:38] +[titan] 2025-10-05 05:27:04,880 - root - INFO - step: 11180 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0666 +[titan] 2025-10-05 05:27:04,880 - root - INFO - lr: 4.2260e-05 gnorm: 1.19 [ 6:52:55<17:44:27] +[titan] 2025-10-05 05:27:15,757 - root - INFO - step: 11185 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0699 +[titan] 2025-10-05 05:27:15,757 - root - INFO - lr: 4.2254e-05 gnorm: 1.15 [ 6:53:06<17:44:15] +[titan] 2025-10-05 05:27:26,622 - root - INFO - step: 11190 loss: 2.3961 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1162 +[titan] 2025-10-05 05:27:26,622 - root - INFO - lr: 4.2247e-05 gnorm: 1.10 [ 6:53:17<17:44:03] +[titan] 2025-10-05 05:27:37,484 - root - INFO - step: 11195 loss: 2.3721 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 05:27:37,484 - root - INFO - lr: 4.2240e-05 gnorm: 1.15 [ 6:53:28<17:43:52] +[titan] 2025-10-05 05:27:46,183 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:27:48,372 - root - INFO - step: 11200 loss: 2.3645 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:27:48,372 - root - INFO - lr: 4.2233e-05 gnorm: 1.17 [ 6:53:39<17:43:40] +[titan] 2025-10-05 05:27:59,307 - root - INFO - step: 11205 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:27:59,307 - root - INFO - lr: 4.2227e-05 gnorm: 1.09 [ 6:53:50<17:43:29] +[titan] 2025-10-05 05:28:10,176 - root - INFO - step: 11210 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0464 +[titan] 2025-10-05 05:28:10,176 - root - INFO - lr: 4.2220e-05 gnorm: 1.15 [ 6:54:00<17:43:17] +[titan] 2025-10-05 05:28:21,076 - root - INFO - step: 11215 loss: 2.3354 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 05:28:21,076 - root - INFO - lr: 4.2213e-05 gnorm: 1.14 [ 6:54:11<17:43:06] +[titan] 2025-10-05 05:28:31,935 - root - INFO - step: 11220 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0592 +[titan] 2025-10-05 05:28:31,935 - root - INFO - lr: 4.2206e-05 gnorm: 1.10 [ 6:54:22<17:42:54] +[titan] 2025-10-05 05:28:42,804 - root - INFO - step: 11225 loss: 2.2877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 05:28:42,805 - root - INFO - lr: 4.2200e-05 gnorm: 1.15 [ 6:54:33<17:42:42] +[titan] 2025-10-05 05:28:53,662 - root - INFO - step: 11230 loss: 2.3995 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 05:28:53,662 - root - INFO - lr: 4.2193e-05 gnorm: 1.17 [ 6:54:44<17:42:31] +[titan] 2025-10-05 05:29:04,634 - root - INFO - step: 11235 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 05:29:04,634 - root - INFO - lr: 4.2186e-05 gnorm: 1.17 [ 6:54:55<17:42:19] +[titan] 2025-10-05 05:29:15,534 - root - INFO - step: 11240 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0842 +[titan] 2025-10-05 05:29:15,535 - root - INFO - lr: 4.2179e-05 gnorm: 1.12 [ 6:55:06<17:42:08] +[titan] 2025-10-05 05:29:26,383 - root - INFO - step: 11245 loss: 2.3641 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0871 +[titan] 2025-10-05 05:29:26,383 - root - INFO - lr: 4.2173e-05 gnorm: 1.08 [ 6:55:17<17:41:56] +[titan] 2025-10-05 05:29:35,042 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:29:37,226 - root - INFO - step: 11250 loss: 2.3893 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 05:29:37,226 - root - INFO - lr: 4.2166e-05 gnorm: 1.11 [ 6:55:28<17:41:44] +[titan] 2025-10-05 05:29:48,080 - root - INFO - step: 11255 loss: 2.3315 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0589 +[titan] 2025-10-05 05:29:48,080 - root - INFO - lr: 4.2159e-05 gnorm: 1.15 [ 6:55:38<17:41:33] +[titan] 2025-10-05 05:29:58,912 - root - INFO - step: 11260 loss: 2.3790 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1000 +[titan] 2025-10-05 05:29:58,912 - root - INFO - lr: 4.2152e-05 gnorm: 1.11 [ 6:55:49<17:41:21] +[titan] 2025-10-05 05:30:07,829 - root - INFO - Dumping profiler traces at step 11264 +[titan] 2025-10-05 05:30:07,867 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:30:10,050 - root - INFO - step: 11265 loss: 2.2811 memory: 118.84GiB(85.28%) tps: 29,420 tflops: 408.16 mfu: 41.27% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 05:30:10,051 - root - INFO - lr: 4.2146e-05 gnorm: 1.10 [ 6:56:00<17:41:10] +[titan] 2025-10-05 05:30:20,892 - root - INFO - step: 11270 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0809 +[titan] 2025-10-05 05:30:20,892 - root - INFO - lr: 4.2139e-05 gnorm: 1.12 [ 6:56:11<17:40:58] +[titan] 2025-10-05 05:30:31,735 - root - INFO - step: 11275 loss: 2.3738 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0947 +[titan] 2025-10-05 05:30:31,735 - root - INFO - lr: 4.2132e-05 gnorm: 1.10 [ 6:56:22<17:40:47] +[titan] 2025-10-05 05:30:42,574 - root - INFO - step: 11280 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 05:30:42,574 - root - INFO - lr: 4.2125e-05 gnorm: 1.10 [ 6:56:33<17:40:35] +[titan] 2025-10-05 05:30:53,426 - root - INFO - step: 11285 loss: 2.3915 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1121 +[titan] 2025-10-05 05:30:53,426 - root - INFO - lr: 4.2118e-05 gnorm: 1.14 [ 6:56:44<17:40:23] +[titan] 2025-10-05 05:31:04,306 - root - INFO - step: 11290 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1164 +[titan] 2025-10-05 05:31:04,307 - root - INFO - lr: 4.2112e-05 gnorm: 1.16 [ 6:56:55<17:40:12] +[titan] 2025-10-05 05:31:15,165 - root - INFO - step: 11295 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 05:31:15,165 - root - INFO - lr: 4.2105e-05 gnorm: 1.16 [ 6:57:05<17:40:00] +[titan] 2025-10-05 05:31:23,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:31:26,028 - root - INFO - step: 11300 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1297 +[titan] 2025-10-05 05:31:26,028 - root - INFO - lr: 4.2098e-05 gnorm: 1.16 [ 6:57:16<17:39:49] +[titan] 2025-10-05 05:31:36,890 - root - INFO - step: 11305 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 05:31:36,890 - root - INFO - lr: 4.2091e-05 gnorm: 1.19 [ 6:57:27<17:39:37] +[titan] 2025-10-05 05:31:47,751 - root - INFO - step: 11310 loss: 2.3629 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0867 +[titan] 2025-10-05 05:31:47,751 - root - INFO - lr: 4.2084e-05 gnorm: 1.13 [ 6:57:38<17:39:25] +[titan] 2025-10-05 05:31:58,646 - root - INFO - step: 11315 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0333 +[titan] 2025-10-05 05:31:58,646 - root - INFO - lr: 4.2078e-05 gnorm: 1.14 [ 6:57:49<17:39:14] +[titan] 2025-10-05 05:32:09,512 - root - INFO - step: 11320 loss: 2.4605 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 05:32:09,512 - root - INFO - lr: 4.2071e-05 gnorm: 1.15 [ 6:58:00<17:39:02] +[titan] 2025-10-05 05:32:20,392 - root - INFO - step: 11325 loss: 2.3568 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0807 +[titan] 2025-10-05 05:32:20,392 - root - INFO - lr: 4.2064e-05 gnorm: 1.12 [ 6:58:11<17:38:51] +[titan] 2025-10-05 05:32:31,290 - root - INFO - step: 11330 loss: 2.4028 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1208 +[titan] 2025-10-05 05:32:31,290 - root - INFO - lr: 4.2057e-05 gnorm: 1.14 [ 6:58:22<17:38:39] +[titan] 2025-10-05 05:32:42,174 - root - INFO - step: 11335 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:32:42,174 - root - INFO - lr: 4.2050e-05 gnorm: 1.16 [ 6:58:32<17:38:27] +[titan] 2025-10-05 05:32:53,063 - root - INFO - step: 11340 loss: 2.3303 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0571 +[titan] 2025-10-05 05:32:53,064 - root - INFO - lr: 4.2044e-05 gnorm: 1.10 [ 6:58:43<17:38:16] +[titan] 2025-10-05 05:33:03,971 - root - INFO - step: 11345 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.1089 +[titan] 2025-10-05 05:33:03,972 - root - INFO - lr: 4.2037e-05 gnorm: 1.10 [ 6:58:54<17:38:04] +[titan] 2025-10-05 05:33:12,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:33:14,854 - root - INFO - step: 11350 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:33:14,854 - root - INFO - lr: 4.2030e-05 gnorm: 1.16 [ 6:59:05<17:37:53] +[titan] 2025-10-05 05:33:25,725 - root - INFO - step: 11355 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:33:25,725 - root - INFO - lr: 4.2023e-05 gnorm: 1.14 [ 6:59:16<17:37:41] +[titan] 2025-10-05 05:33:36,578 - root - INFO - step: 11360 loss: 2.2858 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0185 +[titan] 2025-10-05 05:33:36,578 - root - INFO - lr: 4.2016e-05 gnorm: 1.08 [ 6:59:27<17:37:30] +[titan] 2025-10-05 05:33:47,452 - root - INFO - step: 11365 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:33:47,452 - root - INFO - lr: 4.2010e-05 gnorm: 1.07 [ 6:59:38<17:37:18] +[titan] 2025-10-05 05:33:58,347 - root - INFO - step: 11370 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0475 +[titan] 2025-10-05 05:33:58,347 - root - INFO - lr: 4.2003e-05 gnorm: 1.09 [ 6:59:49<17:37:06] +[titan] 2025-10-05 05:34:09,277 - root - INFO - step: 11375 loss: 2.4178 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1340 +[titan] 2025-10-05 05:34:09,277 - root - INFO - lr: 4.1996e-05 gnorm: 1.13 [ 7:00:00<17:36:55] +[titan] 2025-10-05 05:34:20,157 - root - INFO - step: 11380 loss: 2.3349 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:34:20,157 - root - INFO - lr: 4.1989e-05 gnorm: 1.18 [ 7:00:10<17:36:43] +[titan] 2025-10-05 05:34:31,049 - root - INFO - step: 11385 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:34:31,049 - root - INFO - lr: 4.1982e-05 gnorm: 1.10 [ 7:00:21<17:36:32] +[titan] 2025-10-05 05:34:41,929 - root - INFO - step: 11390 loss: 2.4099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1279 +[titan] 2025-10-05 05:34:41,929 - root - INFO - lr: 4.1975e-05 gnorm: 1.10 [ 7:00:32<17:36:20] +[titan] 2025-10-05 05:34:52,785 - root - INFO - step: 11395 loss: 2.3564 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:34:52,785 - root - INFO - lr: 4.1969e-05 gnorm: 1.15 [ 7:00:43<17:36:09] +[titan] 2025-10-05 05:35:01,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:35:03,690 - root - INFO - step: 11400 loss: 2.4143 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1315 +[titan] 2025-10-05 05:35:03,690 - root - INFO - lr: 4.1962e-05 gnorm: 1.14 [ 7:00:54<17:35:57] +[titan] 2025-10-05 05:35:14,535 - root - INFO - step: 11405 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 05:35:14,536 - root - INFO - lr: 4.1955e-05 gnorm: 1.17 [ 7:01:05<17:35:46] +[titan] 2025-10-05 05:35:25,412 - root - INFO - step: 11410 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0477 +[titan] 2025-10-05 05:35:25,412 - root - INFO - lr: 4.1948e-05 gnorm: 1.13 [ 7:01:16<17:35:34] +[titan] 2025-10-05 05:35:36,263 - root - INFO - step: 11415 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0351 +[titan] 2025-10-05 05:35:36,263 - root - INFO - lr: 4.1941e-05 gnorm: 1.12 [ 7:01:27<17:35:22] +[titan] 2025-10-05 05:35:47,122 - root - INFO - step: 11420 loss: 2.3875 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 05:35:47,122 - root - INFO - lr: 4.1934e-05 gnorm: 1.14 [ 7:01:37<17:35:11] +[titan] 2025-10-05 05:35:57,974 - root - INFO - step: 11425 loss: 2.3552 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0795 +[titan] 2025-10-05 05:35:57,974 - root - INFO - lr: 4.1928e-05 gnorm: 1.13 [ 7:01:48<17:34:59] +[titan] 2025-10-05 05:36:08,849 - root - INFO - step: 11430 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 05:36:08,849 - root - INFO - lr: 4.1921e-05 gnorm: 1.17 [ 7:01:59<17:34:47] +[titan] 2025-10-05 05:36:19,695 - root - INFO - step: 11435 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:36:19,695 - root - INFO - lr: 4.1914e-05 gnorm: 1.16 [ 7:02:10<17:34:36] +[titan] 2025-10-05 05:36:30,564 - root - INFO - step: 11440 loss: 2.3449 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0705 +[titan] 2025-10-05 05:36:30,564 - root - INFO - lr: 4.1907e-05 gnorm: 1.08 [ 7:02:21<17:34:24] +[titan] 2025-10-05 05:36:41,427 - root - INFO - step: 11445 loss: 2.4403 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1539 +[titan] 2025-10-05 05:36:41,427 - root - INFO - lr: 4.1900e-05 gnorm: 1.15 [ 7:02:32<17:34:13] +[titan] 2025-10-05 05:36:50,092 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:36:52,270 - root - INFO - step: 11450 loss: 2.3496 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:36:52,270 - root - INFO - lr: 4.1893e-05 gnorm: 1.14 [ 7:02:43<17:34:01] +[titan] 2025-10-05 05:37:03,144 - root - INFO - step: 11455 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 05:37:03,145 - root - INFO - lr: 4.1886e-05 gnorm: 1.13 [ 7:02:53<17:33:49] +[titan] 2025-10-05 05:37:13,972 - root - INFO - step: 11460 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 05:37:13,973 - root - INFO - lr: 4.1880e-05 gnorm: 1.13 [ 7:03:04<17:33:38] +[titan] 2025-10-05 05:37:24,845 - root - INFO - step: 11465 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0606 +[titan] 2025-10-05 05:37:24,845 - root - INFO - lr: 4.1873e-05 gnorm: 1.16 [ 7:03:15<17:33:26] +[titan] 2025-10-05 05:37:35,703 - root - INFO - step: 11470 loss: 2.3317 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0582 +[titan] 2025-10-05 05:37:35,703 - root - INFO - lr: 4.1866e-05 gnorm: 1.10 [ 7:03:26<17:33:14] +[titan] 2025-10-05 05:37:46,570 - root - INFO - step: 11475 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 05:37:46,570 - root - INFO - lr: 4.1859e-05 gnorm: 1.18 [ 7:03:37<17:33:03] +[titan] 2025-10-05 05:37:57,446 - root - INFO - step: 11480 loss: 2.3142 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0433 +[titan] 2025-10-05 05:37:57,447 - root - INFO - lr: 4.1852e-05 gnorm: 1.10 [ 7:03:48<17:32:51] +[titan] 2025-10-05 05:38:08,329 - root - INFO - step: 11485 loss: 2.3042 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0343 +[titan] 2025-10-05 05:38:08,329 - root - INFO - lr: 4.1845e-05 gnorm: 1.16 [ 7:03:59<17:32:40] +[titan] 2025-10-05 05:38:19,195 - root - INFO - step: 11490 loss: 2.4232 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1402 +[titan] 2025-10-05 05:38:19,195 - root - INFO - lr: 4.1838e-05 gnorm: 1.17 [ 7:04:09<17:32:28] +[titan] 2025-10-05 05:38:30,073 - root - INFO - step: 11495 loss: 2.3563 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0808 +[titan] 2025-10-05 05:38:30,073 - root - INFO - lr: 4.1831e-05 gnorm: 1.12 [ 7:04:20<17:32:17] +[titan] 2025-10-05 05:38:38,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:38:40,929 - root - INFO - step: 11500 loss: 2.3519 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0760 +[titan] 2025-10-05 05:38:40,929 - root - INFO - lr: 4.1825e-05 gnorm: 1.09 [ 7:04:31<17:32:05] +[titan] 2025-10-05 05:38:51,791 - root - INFO - step: 11505 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 05:38:51,792 - root - INFO - lr: 4.1818e-05 gnorm: 1.18 [ 7:04:42<17:31:53] +[titan] 2025-10-05 05:39:02,689 - root - INFO - step: 11510 loss: 2.3200 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0488 +[titan] 2025-10-05 05:39:02,689 - root - INFO - lr: 4.1811e-05 gnorm: 1.13 [ 7:04:53<17:31:42] +[titan] 2025-10-05 05:39:13,585 - root - INFO - step: 11515 loss: 2.4548 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1671 +[titan] 2025-10-05 05:39:13,586 - root - INFO - lr: 4.1804e-05 gnorm: 1.13 [ 7:05:04<17:31:30] +[titan] 2025-10-05 05:39:24,449 - root - INFO - step: 11520 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0252 +[titan] 2025-10-05 05:39:24,449 - root - INFO - lr: 4.1797e-05 gnorm: 1.15 [ 7:05:15<17:31:19] +[titan] 2025-10-05 05:39:35,295 - root - INFO - step: 11525 loss: 2.2866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 05:39:35,295 - root - INFO - lr: 4.1790e-05 gnorm: 1.07 [ 7:05:26<17:31:07] +[titan] 2025-10-05 05:39:46,183 - root - INFO - step: 11530 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0847 +[titan] 2025-10-05 05:39:46,183 - root - INFO - lr: 4.1783e-05 gnorm: 1.14 [ 7:05:36<17:30:56] +[titan] 2025-10-05 05:39:57,043 - root - INFO - step: 11535 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 05:39:57,043 - root - INFO - lr: 4.1776e-05 gnorm: 1.14 [ 7:05:47<17:30:44] +[titan] 2025-10-05 05:40:07,933 - root - INFO - step: 11540 loss: 2.3581 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0821 +[titan] 2025-10-05 05:40:07,934 - root - INFO - lr: 4.1769e-05 gnorm: 1.10 [ 7:05:58<17:30:32] +[titan] 2025-10-05 05:40:18,821 - root - INFO - step: 11545 loss: 2.4229 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:40:18,821 - root - INFO - lr: 4.1763e-05 gnorm: 1.15 [ 7:06:09<17:30:21] +[titan] 2025-10-05 05:40:27,478 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:40:29,677 - root - INFO - step: 11550 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0971 +[titan] 2025-10-05 05:40:29,677 - root - INFO - lr: 4.1756e-05 gnorm: 1.16 [ 7:06:20<17:30:09] +[titan] 2025-10-05 05:40:40,531 - root - INFO - step: 11555 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0225 +[titan] 2025-10-05 05:40:40,531 - root - INFO - lr: 4.1749e-05 gnorm: 1.07 [ 7:06:31<17:29:58] +[titan] 2025-10-05 05:40:51,372 - root - INFO - step: 11560 loss: 2.3640 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 05:40:51,372 - root - INFO - lr: 4.1742e-05 gnorm: 1.13 [ 7:06:42<17:29:46] +[titan] 2025-10-05 05:41:02,211 - root - INFO - step: 11565 loss: 2.3067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0371 +[titan] 2025-10-05 05:41:02,211 - root - INFO - lr: 4.1735e-05 gnorm: 1.09 [ 7:06:52<17:29:34] +[titan] 2025-10-05 05:41:13,062 - root - INFO - step: 11570 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:41:13,062 - root - INFO - lr: 4.1728e-05 gnorm: 1.08 [ 7:07:03<17:29:23] +[titan] 2025-10-05 05:41:23,914 - root - INFO - step: 11575 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 05:41:23,914 - root - INFO - lr: 4.1721e-05 gnorm: 1.11 [ 7:07:14<17:29:11] +[titan] 2025-10-05 05:41:34,780 - root - INFO - step: 11580 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0896 +[titan] 2025-10-05 05:41:34,780 - root - INFO - lr: 4.1714e-05 gnorm: 1.16 [ 7:07:25<17:29:00] +[titan] 2025-10-05 05:41:45,632 - root - INFO - step: 11585 loss: 2.3149 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0440 +[titan] 2025-10-05 05:41:45,632 - root - INFO - lr: 4.1707e-05 gnorm: 1.12 [ 7:07:36<17:28:48] +[titan] 2025-10-05 05:41:56,483 - root - INFO - step: 11590 loss: 2.2891 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0207 +[titan] 2025-10-05 05:41:56,483 - root - INFO - lr: 4.1700e-05 gnorm: 1.11 [ 7:07:47<17:28:36] +[titan] 2025-10-05 05:42:07,367 - root - INFO - step: 11595 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 05:42:07,367 - root - INFO - lr: 4.1693e-05 gnorm: 1.09 [ 7:07:58<17:28:25] +[titan] 2025-10-05 05:42:16,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:42:18,229 - root - INFO - step: 11600 loss: 2.3596 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0839 +[titan] 2025-10-05 05:42:18,229 - root - INFO - lr: 4.1686e-05 gnorm: 1.13 [ 7:08:08<17:28:13] +[titan] 2025-10-05 05:42:29,091 - root - INFO - step: 11605 loss: 2.3723 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0941 +[titan] 2025-10-05 05:42:29,091 - root - INFO - lr: 4.1680e-05 gnorm: 1.11 [ 7:08:19<17:28:02] +[titan] 2025-10-05 05:42:39,944 - root - INFO - step: 11610 loss: 2.3331 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0601 +[titan] 2025-10-05 05:42:39,944 - root - INFO - lr: 4.1673e-05 gnorm: 1.12 [ 7:08:30<17:27:50] +[titan] 2025-10-05 05:42:50,809 - root - INFO - step: 11615 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0661 +[titan] 2025-10-05 05:42:50,809 - root - INFO - lr: 4.1666e-05 gnorm: 1.14 [ 7:08:41<17:27:38] +[titan] 2025-10-05 05:43:01,660 - root - INFO - step: 11620 loss: 2.3817 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1022 +[titan] 2025-10-05 05:43:01,660 - root - INFO - lr: 4.1659e-05 gnorm: 1.16 [ 7:08:52<17:27:27] +[titan] 2025-10-05 05:43:12,542 - root - INFO - step: 11625 loss: 2.3129 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0422 +[titan] 2025-10-05 05:43:12,542 - root - INFO - lr: 4.1652e-05 gnorm: 1.15 [ 7:09:03<17:27:15] +[titan] 2025-10-05 05:43:23,381 - root - INFO - step: 11630 loss: 2.3032 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 05:43:23,381 - root - INFO - lr: 4.1645e-05 gnorm: 1.17 [ 7:09:14<17:27:04] +[titan] 2025-10-05 05:43:34,203 - root - INFO - step: 11635 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0697 +[titan] 2025-10-05 05:43:34,203 - root - INFO - lr: 4.1638e-05 gnorm: 1.17 [ 7:09:24<17:26:52] +[titan] 2025-10-05 05:43:45,042 - root - INFO - step: 11640 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0979 +[titan] 2025-10-05 05:43:45,042 - root - INFO - lr: 4.1631e-05 gnorm: 1.09 [ 7:09:35<17:26:40] +[titan] 2025-10-05 05:43:55,889 - root - INFO - step: 11645 loss: 2.3366 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 05:43:55,889 - root - INFO - lr: 4.1624e-05 gnorm: 1.12 [ 7:09:46<17:26:29] +[titan] 2025-10-05 05:44:04,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:44:06,731 - root - INFO - step: 11650 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0964 +[titan] 2025-10-05 05:44:06,731 - root - INFO - lr: 4.1617e-05 gnorm: 1.13 [ 7:09:57<17:26:17] +[titan] 2025-10-05 05:44:17,623 - root - INFO - step: 11655 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0423 +[titan] 2025-10-05 05:44:17,623 - root - INFO - lr: 4.1610e-05 gnorm: 1.16 [ 7:10:08<17:26:06] +[titan] 2025-10-05 05:44:28,491 - root - INFO - step: 11660 loss: 2.3791 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 05:44:28,491 - root - INFO - lr: 4.1603e-05 gnorm: 1.14 [ 7:10:19<17:25:54] +[titan] 2025-10-05 05:44:39,349 - root - INFO - step: 11665 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0347 +[titan] 2025-10-05 05:44:39,349 - root - INFO - lr: 4.1596e-05 gnorm: 1.14 [ 7:10:30<17:25:42] +[titan] 2025-10-05 05:44:50,212 - root - INFO - step: 11670 loss: 2.2728 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0088 +[titan] 2025-10-05 05:44:50,212 - root - INFO - lr: 4.1589e-05 gnorm: 1.12 [ 7:10:40<17:25:31] +[titan] 2025-10-05 05:45:01,081 - root - INFO - step: 11675 loss: 2.3589 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:45:01,081 - root - INFO - lr: 4.1582e-05 gnorm: 1.11 [ 7:10:51<17:25:19] +[titan] 2025-10-05 05:45:11,965 - root - INFO - step: 11680 loss: 2.3297 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0575 +[titan] 2025-10-05 05:45:11,965 - root - INFO - lr: 4.1575e-05 gnorm: 1.10 [ 7:11:02<17:25:08] +[titan] 2025-10-05 05:45:22,811 - root - INFO - step: 11685 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0746 +[titan] 2025-10-05 05:45:22,811 - root - INFO - lr: 4.1568e-05 gnorm: 1.11 [ 7:11:13<17:24:56] +[titan] 2025-10-05 05:45:33,673 - root - INFO - step: 11690 loss: 2.3753 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2787 global_avg_mtp_loss: 2.0966 +[titan] 2025-10-05 05:45:33,674 - root - INFO - lr: 4.1561e-05 gnorm: 1.10 [ 7:11:24<17:24:45] +[titan] 2025-10-05 05:45:44,536 - root - INFO - step: 11695 loss: 2.3906 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1103 +[titan] 2025-10-05 05:45:44,537 - root - INFO - lr: 4.1554e-05 gnorm: 1.11 [ 7:11:35<17:24:33] +[titan] 2025-10-05 05:45:53,219 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:45:55,410 - root - INFO - step: 11700 loss: 2.3089 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 05:45:55,410 - root - INFO - lr: 4.1547e-05 gnorm: 1.16 [ 7:11:46<17:24:21] +[titan] 2025-10-05 05:46:06,262 - root - INFO - step: 11705 loss: 2.3134 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 05:46:06,262 - root - INFO - lr: 4.1540e-05 gnorm: 1.11 [ 7:11:57<17:24:10] +[titan] 2025-10-05 05:46:17,130 - root - INFO - step: 11710 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:46:17,130 - root - INFO - lr: 4.1534e-05 gnorm: 1.07 [ 7:12:07<17:23:58] +[titan] 2025-10-05 05:46:27,969 - root - INFO - step: 11715 loss: 2.3153 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0455 +[titan] 2025-10-05 05:46:27,969 - root - INFO - lr: 4.1527e-05 gnorm: 1.10 [ 7:12:18<17:23:47] +[titan] 2025-10-05 05:46:38,818 - root - INFO - step: 11720 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1170 +[titan] 2025-10-05 05:46:38,818 - root - INFO - lr: 4.1520e-05 gnorm: 1.16 [ 7:12:29<17:23:35] +[titan] 2025-10-05 05:46:49,675 - root - INFO - step: 11725 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0144 +[titan] 2025-10-05 05:46:49,675 - root - INFO - lr: 4.1513e-05 gnorm: 1.16 [ 7:12:40<17:23:23] +[titan] 2025-10-05 05:47:00,544 - root - INFO - step: 11730 loss: 2.4145 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1312 +[titan] 2025-10-05 05:47:00,544 - root - INFO - lr: 4.1506e-05 gnorm: 1.10 [ 7:12:51<17:23:12] +[titan] 2025-10-05 05:47:11,419 - root - INFO - step: 11735 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0736 +[titan] 2025-10-05 05:47:11,419 - root - INFO - lr: 4.1499e-05 gnorm: 1.08 [ 7:13:02<17:23:00] +[titan] 2025-10-05 05:47:22,265 - root - INFO - step: 11740 loss: 2.3154 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 05:47:22,265 - root - INFO - lr: 4.1492e-05 gnorm: 1.11 [ 7:13:13<17:22:49] +[titan] 2025-10-05 05:47:33,131 - root - INFO - step: 11745 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 05:47:33,131 - root - INFO - lr: 4.1485e-05 gnorm: 1.13 [ 7:13:23<17:22:37] +[titan] 2025-10-05 05:47:41,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:47:43,985 - root - INFO - step: 11750 loss: 2.3279 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0557 +[titan] 2025-10-05 05:47:43,985 - root - INFO - lr: 4.1478e-05 gnorm: 1.13 [ 7:13:34<17:22:26] +[titan] 2025-10-05 05:47:54,868 - root - INFO - step: 11755 loss: 2.3253 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0524 +[titan] 2025-10-05 05:47:54,869 - root - INFO - lr: 4.1471e-05 gnorm: 1.15 [ 7:13:45<17:22:14] +[titan] 2025-10-05 05:48:05,705 - root - INFO - step: 11760 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 05:48:05,705 - root - INFO - lr: 4.1464e-05 gnorm: 1.11 [ 7:13:56<17:22:02] +[titan] 2025-10-05 05:48:16,588 - root - INFO - step: 11765 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0673 +[titan] 2025-10-05 05:48:16,588 - root - INFO - lr: 4.1457e-05 gnorm: 1.08 [ 7:14:07<17:21:51] +[titan] 2025-10-05 05:48:27,456 - root - INFO - step: 11770 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:48:27,456 - root - INFO - lr: 4.1450e-05 gnorm: 1.13 [ 7:14:18<17:21:39] +[titan] 2025-10-05 05:48:38,410 - root - INFO - step: 11775 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 29,914 tflops: 415.01 mfu: 41.96% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 05:48:38,411 - root - INFO - lr: 4.1443e-05 gnorm: 1.12 [ 7:14:29<17:21:28] +[titan] 2025-10-05 05:48:40,781 - root - INFO - Dumping profiler traces at step 11776 +[titan] 2025-10-05 05:48:40,819 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:48:49,532 - root - INFO - step: 11780 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0553 +[titan] 2025-10-05 05:48:49,532 - root - INFO - lr: 4.1436e-05 gnorm: 1.10 [ 7:14:40<17:21:17] +[titan] 2025-10-05 05:49:00,425 - root - INFO - step: 11785 loss: 2.3316 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0583 +[titan] 2025-10-05 05:49:00,425 - root - INFO - lr: 4.1429e-05 gnorm: 1.11 [ 7:14:51<17:21:06] +[titan] 2025-10-05 05:49:11,300 - root - INFO - step: 11790 loss: 2.2637 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 05:49:11,301 - root - INFO - lr: 4.1422e-05 gnorm: 1.08 [ 7:15:02<17:20:54] +[titan] 2025-10-05 05:49:22,173 - root - INFO - step: 11795 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1151 +[titan] 2025-10-05 05:49:22,173 - root - INFO - lr: 4.1415e-05 gnorm: 1.13 [ 7:15:12<17:20:42] +[titan] 2025-10-05 05:49:30,865 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:49:33,049 - root - INFO - step: 11800 loss: 2.3168 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0453 +[titan] 2025-10-05 05:49:33,050 - root - INFO - lr: 4.1408e-05 gnorm: 1.14 [ 7:15:23<17:20:31] +[titan] 2025-10-05 05:49:43,908 - root - INFO - step: 11805 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:49:43,909 - root - INFO - lr: 4.1401e-05 gnorm: 1.11 [ 7:15:34<17:20:19] +[titan] 2025-10-05 05:49:54,777 - root - INFO - step: 11810 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 05:49:54,777 - root - INFO - lr: 4.1394e-05 gnorm: 1.14 [ 7:15:45<17:20:08] +[titan] 2025-10-05 05:50:05,641 - root - INFO - step: 11815 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:50:05,641 - root - INFO - lr: 4.1387e-05 gnorm: 1.10 [ 7:15:56<17:19:56] +[titan] 2025-10-05 05:50:16,549 - root - INFO - step: 11820 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 05:50:16,549 - root - INFO - lr: 4.1379e-05 gnorm: 1.14 [ 7:16:07<17:19:45] +[titan] 2025-10-05 05:50:27,410 - root - INFO - step: 11825 loss: 2.3545 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:50:27,410 - root - INFO - lr: 4.1372e-05 gnorm: 1.11 [ 7:16:18<17:19:33] +[titan] 2025-10-05 05:50:38,296 - root - INFO - step: 11830 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 05:50:38,297 - root - INFO - lr: 4.1365e-05 gnorm: 1.17 [ 7:16:29<17:19:22] +[titan] 2025-10-05 05:50:49,183 - root - INFO - step: 11835 loss: 2.4085 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1264 +[titan] 2025-10-05 05:50:49,183 - root - INFO - lr: 4.1358e-05 gnorm: 1.12 [ 7:16:39<17:19:10] +[titan] 2025-10-05 05:51:00,086 - root - INFO - step: 11840 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 05:51:00,086 - root - INFO - lr: 4.1351e-05 gnorm: 1.11 [ 7:16:50<17:18:59] +[titan] 2025-10-05 05:51:10,957 - root - INFO - step: 11845 loss: 2.3242 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0501 +[titan] 2025-10-05 05:51:10,957 - root - INFO - lr: 4.1344e-05 gnorm: 1.08 [ 7:17:01<17:18:47] +[titan] 2025-10-05 05:51:19,706 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:51:21,898 - root - INFO - step: 11850 loss: 2.3518 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0767 +[titan] 2025-10-05 05:51:21,898 - root - INFO - lr: 4.1337e-05 gnorm: 1.12 [ 7:17:12<17:18:36] +[titan] 2025-10-05 05:51:32,790 - root - INFO - step: 11855 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 05:51:32,790 - root - INFO - lr: 4.1330e-05 gnorm: 1.12 [ 7:17:23<17:18:24] +[titan] 2025-10-05 05:51:43,664 - root - INFO - step: 11860 loss: 2.3095 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 05:51:43,664 - root - INFO - lr: 4.1323e-05 gnorm: 1.18 [ 7:17:34<17:18:13] +[titan] 2025-10-05 05:51:54,563 - root - INFO - step: 11865 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0318 +[titan] 2025-10-05 05:51:54,563 - root - INFO - lr: 4.1316e-05 gnorm: 1.09 [ 7:17:45<17:18:01] +[titan] 2025-10-05 05:52:05,455 - root - INFO - step: 11870 loss: 2.3710 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0936 +[titan] 2025-10-05 05:52:05,455 - root - INFO - lr: 4.1309e-05 gnorm: 1.11 [ 7:17:56<17:17:50] +[titan] 2025-10-05 05:52:16,379 - root - INFO - step: 11875 loss: 2.3659 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0895 +[titan] 2025-10-05 05:52:16,379 - root - INFO - lr: 4.1302e-05 gnorm: 1.15 [ 7:18:07<17:17:38] +[titan] 2025-10-05 05:52:27,265 - root - INFO - step: 11880 loss: 2.4011 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1135 +[titan] 2025-10-05 05:52:27,265 - root - INFO - lr: 4.1295e-05 gnorm: 3.35 [ 7:18:17<17:17:27] +[titan] 2025-10-05 05:52:38,136 - root - INFO - step: 11885 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0521 +[titan] 2025-10-05 05:52:38,137 - root - INFO - lr: 4.1288e-05 gnorm: 1.14 [ 7:18:28<17:17:15] +[titan] 2025-10-05 05:52:49,001 - root - INFO - step: 11890 loss: 2.3415 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0669 +[titan] 2025-10-05 05:52:49,001 - root - INFO - lr: 4.1281e-05 gnorm: 1.11 [ 7:18:39<17:17:04] +[titan] 2025-10-05 05:52:59,880 - root - INFO - step: 11895 loss: 2.3264 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2720 global_avg_mtp_loss: 2.0545 +[titan] 2025-10-05 05:52:59,880 - root - INFO - lr: 4.1274e-05 gnorm: 1.12 [ 7:18:50<17:16:52] +[titan] 2025-10-05 05:53:08,561 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:53:10,763 - root - INFO - step: 11900 loss: 2.2583 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9932 +[titan] 2025-10-05 05:53:10,763 - root - INFO - lr: 4.1267e-05 gnorm: 1.12 [ 7:19:01<17:16:41] +[titan] 2025-10-05 05:53:21,692 - root - INFO - step: 11905 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 05:53:21,692 - root - INFO - lr: 4.1260e-05 gnorm: 1.14 [ 7:19:12<17:16:29] +[titan] 2025-10-05 05:53:32,550 - root - INFO - step: 11910 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:53:32,550 - root - INFO - lr: 4.1253e-05 gnorm: 1.07 [ 7:19:23<17:16:18] +[titan] 2025-10-05 05:53:43,445 - root - INFO - step: 11915 loss: 2.3927 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:53:43,445 - root - INFO - lr: 4.1246e-05 gnorm: 1.12 [ 7:19:34<17:16:06] +[titan] 2025-10-05 05:53:54,326 - root - INFO - step: 11920 loss: 2.4016 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:53:54,326 - root - INFO - lr: 4.1239e-05 gnorm: 1.11 [ 7:19:45<17:15:55] +[titan] 2025-10-05 05:54:05,201 - root - INFO - step: 11925 loss: 2.3896 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 05:54:05,201 - root - INFO - lr: 4.1232e-05 gnorm: 1.10 [ 7:19:55<17:15:43] +[titan] 2025-10-05 05:54:16,091 - root - INFO - step: 11930 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:54:16,091 - root - INFO - lr: 4.1224e-05 gnorm: 1.18 [ 7:20:06<17:15:32] +[titan] 2025-10-05 05:54:27,039 - root - INFO - step: 11935 loss: 2.3186 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 05:54:27,039 - root - INFO - lr: 4.1217e-05 gnorm: 1.13 [ 7:20:17<17:15:21] +[titan] 2025-10-05 05:54:37,903 - root - INFO - step: 11940 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1239 +[titan] 2025-10-05 05:54:37,903 - root - INFO - lr: 4.1210e-05 gnorm: 1.14 [ 7:20:28<17:15:09] +[titan] 2025-10-05 05:54:48,775 - root - INFO - step: 11945 loss: 2.3374 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:54:48,775 - root - INFO - lr: 4.1203e-05 gnorm: 1.16 [ 7:20:39<17:14:57] +[titan] 2025-10-05 05:54:57,461 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:54:59,643 - root - INFO - step: 11950 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0633 +[titan] 2025-10-05 05:54:59,643 - root - INFO - lr: 4.1196e-05 gnorm: 1.10 [ 7:20:50<17:14:46] +[titan] 2025-10-05 05:55:10,528 - root - INFO - step: 11955 loss: 2.3258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 05:55:10,528 - root - INFO - lr: 4.1189e-05 gnorm: 1.08 [ 7:21:01<17:14:34] +[titan] 2025-10-05 05:55:21,455 - root - INFO - step: 11960 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:55:21,456 - root - INFO - lr: 4.1182e-05 gnorm: 1.12 [ 7:21:12<17:14:23] +[titan] 2025-10-05 05:55:32,338 - root - INFO - step: 11965 loss: 2.3022 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 05:55:32,338 - root - INFO - lr: 4.1175e-05 gnorm: 1.06 [ 7:21:23<17:14:12] +[titan] 2025-10-05 05:55:43,237 - root - INFO - step: 11970 loss: 2.3819 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 05:55:43,238 - root - INFO - lr: 4.1168e-05 gnorm: 1.11 [ 7:21:33<17:14:00] +[titan] 2025-10-05 05:55:54,122 - root - INFO - step: 11975 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0302 +[titan] 2025-10-05 05:55:54,122 - root - INFO - lr: 4.1161e-05 gnorm: 1.07 [ 7:21:44<17:13:49] +[titan] 2025-10-05 05:56:04,989 - root - INFO - step: 11980 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:56:04,989 - root - INFO - lr: 4.1154e-05 gnorm: 1.08 [ 7:21:55<17:13:37] +[titan] 2025-10-05 05:56:15,876 - root - INFO - step: 11985 loss: 2.3487 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:56:15,876 - root - INFO - lr: 4.1147e-05 gnorm: 1.11 [ 7:22:06<17:13:26] +[titan] 2025-10-05 05:56:26,799 - root - INFO - step: 11990 loss: 2.3624 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 05:56:26,800 - root - INFO - lr: 4.1139e-05 gnorm: 1.07 [ 7:22:17<17:13:14] +[titan] 2025-10-05 05:56:37,664 - root - INFO - step: 11995 loss: 2.3352 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:56:37,664 - root - INFO - lr: 4.1132e-05 gnorm: 1.15 [ 7:22:28<17:13:03] +[titan] 2025-10-05 05:56:46,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:56:48,558 - root - INFO - step: 12000 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0434 +[titan] 2025-10-05 05:56:48,559 - root - INFO - lr: 4.1125e-05 gnorm: 1.15 [ 7:22:39<17:12:51] +[titan] 2025-10-05 05:56:59,430 - root - INFO - step: 12005 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0683 +[titan] 2025-10-05 05:56:59,430 - root - INFO - lr: 4.1118e-05 gnorm: 1.12 [ 7:22:50<17:12:40] +[titan] 2025-10-05 05:57:10,327 - root - INFO - step: 12010 loss: 2.3294 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0572 +[titan] 2025-10-05 05:57:10,327 - root - INFO - lr: 4.1111e-05 gnorm: 1.11 [ 7:23:01<17:12:28] +[titan] 2025-10-05 05:57:21,254 - root - INFO - step: 12015 loss: 2.3689 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:57:21,254 - root - INFO - lr: 4.1104e-05 gnorm: 1.08 [ 7:23:11<17:12:17] +[titan] 2025-10-05 05:57:32,120 - root - INFO - step: 12020 loss: 2.3542 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0781 +[titan] 2025-10-05 05:57:32,120 - root - INFO - lr: 4.1097e-05 gnorm: 1.08 [ 7:23:22<17:12:05] +[titan] 2025-10-05 05:57:43,004 - root - INFO - step: 12025 loss: 2.3233 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:57:43,004 - root - INFO - lr: 4.1090e-05 gnorm: 1.13 [ 7:23:33<17:11:54] +[titan] 2025-10-05 05:57:53,894 - root - INFO - step: 12030 loss: 2.3526 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:57:53,895 - root - INFO - lr: 4.1083e-05 gnorm: 1.09 [ 7:23:44<17:11:42] +[titan] 2025-10-05 05:58:04,763 - root - INFO - step: 12035 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 05:58:04,764 - root - INFO - lr: 4.1075e-05 gnorm: 1.11 [ 7:23:55<17:11:31] +[titan] 2025-10-05 05:58:15,655 - root - INFO - step: 12040 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 05:58:15,655 - root - INFO - lr: 4.1068e-05 gnorm: 1.13 [ 7:24:06<17:11:19] +[titan] 2025-10-05 05:58:26,581 - root - INFO - step: 12045 loss: 2.2551 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9914 +[titan] 2025-10-05 05:58:26,582 - root - INFO - lr: 4.1061e-05 gnorm: 1.10 [ 7:24:17<17:11:08] +[titan] 2025-10-05 05:58:35,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:58:37,445 - root - INFO - step: 12050 loss: 2.2791 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 05:58:37,445 - root - INFO - lr: 4.1054e-05 gnorm: 1.12 [ 7:24:28<17:10:56] +[titan] 2025-10-05 05:58:48,333 - root - INFO - step: 12055 loss: 2.3027 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0322 +[titan] 2025-10-05 05:58:48,334 - root - INFO - lr: 4.1047e-05 gnorm: 1.09 [ 7:24:39<17:10:45] +[titan] 2025-10-05 05:58:59,215 - root - INFO - step: 12060 loss: 2.3599 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:58:59,215 - root - INFO - lr: 4.1040e-05 gnorm: 1.13 [ 7:24:49<17:10:33] +[titan] 2025-10-05 05:59:10,066 - root - INFO - step: 12065 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 05:59:10,066 - root - INFO - lr: 4.1033e-05 gnorm: 1.14 [ 7:25:00<17:10:22] +[titan] 2025-10-05 05:59:20,922 - root - INFO - step: 12070 loss: 2.3313 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:59:20,923 - root - INFO - lr: 4.1026e-05 gnorm: 1.12 [ 7:25:11<17:10:10] +[titan] 2025-10-05 05:59:31,844 - root - INFO - step: 12075 loss: 2.4140 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 05:59:31,844 - root - INFO - lr: 4.1018e-05 gnorm: 1.14 [ 7:25:22<17:09:59] +[titan] 2025-10-05 05:59:42,686 - root - INFO - step: 12080 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0618 +[titan] 2025-10-05 05:59:42,686 - root - INFO - lr: 4.1011e-05 gnorm: 1.13 [ 7:25:33<17:09:47] +[titan] 2025-10-05 05:59:53,539 - root - INFO - step: 12085 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0449 +[titan] 2025-10-05 05:59:53,540 - root - INFO - lr: 4.1004e-05 gnorm: 1.11 [ 7:25:44<17:09:36] +[titan] 2025-10-05 06:00:04,392 - root - INFO - step: 12090 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0005 +[titan] 2025-10-05 06:00:04,392 - root - INFO - lr: 4.0997e-05 gnorm: 1.08 [ 7:25:55<17:09:24] +[titan] 2025-10-05 06:00:15,254 - root - INFO - step: 12095 loss: 2.3576 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0822 +[titan] 2025-10-05 06:00:15,254 - root - INFO - lr: 4.0990e-05 gnorm: 1.07 [ 7:26:05<17:09:13] +[titan] 2025-10-05 06:00:23,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:00:26,169 - root - INFO - step: 12100 loss: 2.3299 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0570 +[titan] 2025-10-05 06:00:26,169 - root - INFO - lr: 4.0983e-05 gnorm: 1.12 [ 7:26:16<17:09:01] +[titan] 2025-10-05 06:00:37,019 - root - INFO - step: 12105 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 06:00:37,019 - root - INFO - lr: 4.0976e-05 gnorm: 1.10 [ 7:26:27<17:08:50] +[titan] 2025-10-05 06:00:47,875 - root - INFO - step: 12110 loss: 2.3109 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0407 +[titan] 2025-10-05 06:00:47,875 - root - INFO - lr: 4.0968e-05 gnorm: 1.14 [ 7:26:38<17:08:38] +[titan] 2025-10-05 06:00:58,710 - root - INFO - step: 12115 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0231 +[titan] 2025-10-05 06:00:58,710 - root - INFO - lr: 4.0961e-05 gnorm: 1.09 [ 7:26:49<17:08:27] +[titan] 2025-10-05 06:01:09,539 - root - INFO - step: 12120 loss: 2.3227 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0502 +[titan] 2025-10-05 06:01:09,539 - root - INFO - lr: 4.0954e-05 gnorm: 1.11 [ 7:27:00<17:08:15] +[titan] 2025-10-05 06:01:20,374 - root - INFO - step: 12125 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 06:01:20,374 - root - INFO - lr: 4.0947e-05 gnorm: 1.07 [ 7:27:11<17:08:03] +[titan] 2025-10-05 06:01:31,270 - root - INFO - step: 12130 loss: 2.2677 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0025 +[titan] 2025-10-05 06:01:31,270 - root - INFO - lr: 4.0940e-05 gnorm: 1.31 [ 7:27:21<17:07:52] +[titan] 2025-10-05 06:01:42,106 - root - INFO - step: 12135 loss: 2.2796 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:01:42,106 - root - INFO - lr: 4.0933e-05 gnorm: 1.13 [ 7:27:32<17:07:40] +[titan] 2025-10-05 06:01:52,949 - root - INFO - step: 12140 loss: 2.3222 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:01:52,949 - root - INFO - lr: 4.0926e-05 gnorm: 1.09 [ 7:27:43<17:07:29] +[titan] 2025-10-05 06:02:03,787 - root - INFO - step: 12145 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:02:03,787 - root - INFO - lr: 4.0918e-05 gnorm: 1.12 [ 7:27:54<17:07:17] +[titan] 2025-10-05 06:02:12,468 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:02:14,649 - root - INFO - step: 12150 loss: 2.3633 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2765 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 06:02:14,649 - root - INFO - lr: 4.0911e-05 gnorm: 1.10 [ 7:28:05<17:07:06] +[titan] 2025-10-05 06:02:25,544 - root - INFO - step: 12155 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 06:02:25,544 - root - INFO - lr: 4.0904e-05 gnorm: 1.08 [ 7:28:16<17:06:54] +[titan] 2025-10-05 06:02:36,407 - root - INFO - step: 12160 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:02:36,407 - root - INFO - lr: 4.0897e-05 gnorm: 1.12 [ 7:28:27<17:06:43] +[titan] 2025-10-05 06:02:47,265 - root - INFO - step: 12165 loss: 2.3191 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:02:47,265 - root - INFO - lr: 4.0890e-05 gnorm: 1.13 [ 7:28:37<17:06:31] +[titan] 2025-10-05 06:02:58,124 - root - INFO - step: 12170 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0270 +[titan] 2025-10-05 06:02:58,124 - root - INFO - lr: 4.0883e-05 gnorm: 1.13 [ 7:28:48<17:06:20] +[titan] 2025-10-05 06:03:08,999 - root - INFO - step: 12175 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 06:03:08,999 - root - INFO - lr: 4.0875e-05 gnorm: 1.10 [ 7:28:59<17:06:08] +[titan] 2025-10-05 06:03:19,864 - root - INFO - step: 12180 loss: 2.3860 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1055 +[titan] 2025-10-05 06:03:19,864 - root - INFO - lr: 4.0868e-05 gnorm: 1.08 [ 7:29:10<17:05:57] +[titan] 2025-10-05 06:03:30,733 - root - INFO - step: 12185 loss: 2.2786 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 06:03:30,733 - root - INFO - lr: 4.0861e-05 gnorm: 1.09 [ 7:29:21<17:05:45] +[titan] 2025-10-05 06:03:41,601 - root - INFO - step: 12190 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 06:03:41,601 - root - INFO - lr: 4.0854e-05 gnorm: 1.13 [ 7:29:32<17:05:34] +[titan] 2025-10-05 06:03:52,503 - root - INFO - step: 12195 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9906 +[titan] 2025-10-05 06:03:52,503 - root - INFO - lr: 4.0847e-05 gnorm: 1.13 [ 7:29:43<17:05:22] +[titan] 2025-10-05 06:04:01,176 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:04:03,365 - root - INFO - step: 12200 loss: 2.3747 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0972 +[titan] 2025-10-05 06:04:03,365 - root - INFO - lr: 4.0839e-05 gnorm: 1.12 [ 7:29:54<17:05:11] +[titan] 2025-10-05 06:04:14,208 - root - INFO - step: 12205 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0049 +[titan] 2025-10-05 06:04:14,208 - root - INFO - lr: 4.0832e-05 gnorm: 1.10 [ 7:30:04<17:04:59] +[titan] 2025-10-05 06:04:25,065 - root - INFO - step: 12210 loss: 2.3060 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:04:25,065 - root - INFO - lr: 4.0825e-05 gnorm: 1.06 [ 7:30:15<17:04:47] +[titan] 2025-10-05 06:04:35,929 - root - INFO - step: 12215 loss: 2.2793 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 06:04:35,929 - root - INFO - lr: 4.0818e-05 gnorm: 1.04 [ 7:30:26<17:04:36] +[titan] 2025-10-05 06:04:46,809 - root - INFO - step: 12220 loss: 2.3271 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 06:04:46,809 - root - INFO - lr: 4.0811e-05 gnorm: 1.14 [ 7:30:37<17:04:24] +[titan] 2025-10-05 06:04:57,691 - root - INFO - step: 12225 loss: 2.2624 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9984 +[titan] 2025-10-05 06:04:57,691 - root - INFO - lr: 4.0803e-05 gnorm: 1.17 [ 7:30:48<17:04:13] +[titan] 2025-10-05 06:05:08,549 - root - INFO - step: 12230 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 06:05:08,549 - root - INFO - lr: 4.0796e-05 gnorm: 1.09 [ 7:30:59<17:04:01] +[titan] 2025-10-05 06:05:19,441 - root - INFO - step: 12235 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0739 +[titan] 2025-10-05 06:05:19,441 - root - INFO - lr: 4.0789e-05 gnorm: 1.10 [ 7:31:10<17:03:50] +[titan] 2025-10-05 06:05:30,318 - root - INFO - step: 12240 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 06:05:30,318 - root - INFO - lr: 4.0782e-05 gnorm: 1.09 [ 7:31:21<17:03:39] +[titan] 2025-10-05 06:05:41,191 - root - INFO - step: 12245 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 06:05:41,191 - root - INFO - lr: 4.0775e-05 gnorm: 1.08 [ 7:31:31<17:03:27] +[titan] 2025-10-05 06:05:49,886 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:05:52,078 - root - INFO - step: 12250 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:05:52,079 - root - INFO - lr: 4.0767e-05 gnorm: 1.17 [ 7:31:42<17:03:16] +[titan] 2025-10-05 06:06:02,966 - root - INFO - step: 12255 loss: 2.3830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 06:06:02,966 - root - INFO - lr: 4.0760e-05 gnorm: 1.12 [ 7:31:53<17:03:04] +[titan] 2025-10-05 06:06:13,829 - root - INFO - step: 12260 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9702 +[titan] 2025-10-05 06:06:13,829 - root - INFO - lr: 4.0753e-05 gnorm: 1.10 [ 7:32:04<17:02:53] +[titan] 2025-10-05 06:06:24,716 - root - INFO - step: 12265 loss: 2.3897 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1102 +[titan] 2025-10-05 06:06:24,716 - root - INFO - lr: 4.0746e-05 gnorm: 1.13 [ 7:32:15<17:02:41] +[titan] 2025-10-05 06:06:35,605 - root - INFO - step: 12270 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0629 +[titan] 2025-10-05 06:06:35,605 - root - INFO - lr: 4.0739e-05 gnorm: 1.15 [ 7:32:26<17:02:30] +[titan] 2025-10-05 06:06:46,502 - root - INFO - step: 12275 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 06:06:46,502 - root - INFO - lr: 4.0731e-05 gnorm: 1.17 [ 7:32:37<17:02:18] +[titan] 2025-10-05 06:06:57,383 - root - INFO - step: 12280 loss: 2.3419 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0674 +[titan] 2025-10-05 06:06:57,383 - root - INFO - lr: 4.0724e-05 gnorm: 1.16 [ 7:32:48<17:02:07] +[titan] 2025-10-05 06:07:08,352 - root - INFO - step: 12285 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.44 mfu: 41.91% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 06:07:08,353 - root - INFO - lr: 4.0717e-05 gnorm: 1.14 [ 7:32:59<17:01:55] +[titan] 2025-10-05 06:07:15,073 - root - INFO - Dumping profiler traces at step 12288 +[titan] 2025-10-05 06:07:15,112 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:07:19,500 - root - INFO - step: 12290 loss: 2.3565 memory: 118.84GiB(85.28%) tps: 29,395 tflops: 407.81 mfu: 41.23% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:07:19,500 - root - INFO - lr: 4.0710e-05 gnorm: 1.08 [ 7:33:10<17:01:45] +[titan] 2025-10-05 06:07:30,465 - root - INFO - step: 12295 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 29,886 tflops: 414.62 mfu: 41.92% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 06:07:30,465 - root - INFO - lr: 4.0702e-05 gnorm: 1.07 [ 7:33:21<17:01:33] +[titan] 2025-10-05 06:07:39,154 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:07:41,347 - root - INFO - step: 12300 loss: 2.3244 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:07:41,347 - root - INFO - lr: 4.0695e-05 gnorm: 1.16 [ 7:33:32<17:01:22] +[titan] 2025-10-05 06:07:52,196 - root - INFO - step: 12305 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 06:07:52,196 - root - INFO - lr: 4.0688e-05 gnorm: 1.09 [ 7:33:42<17:01:10] +[titan] 2025-10-05 06:08:03,050 - root - INFO - step: 12310 loss: 2.3555 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:08:03,050 - root - INFO - lr: 4.0681e-05 gnorm: 1.12 [ 7:33:53<17:00:59] +[titan] 2025-10-05 06:08:13,913 - root - INFO - step: 12315 loss: 2.3066 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0365 +[titan] 2025-10-05 06:08:13,914 - root - INFO - lr: 4.0674e-05 gnorm: 1.08 [ 7:34:04<17:00:47] +[titan] 2025-10-05 06:08:24,841 - root - INFO - step: 12320 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0745 +[titan] 2025-10-05 06:08:24,842 - root - INFO - lr: 4.0666e-05 gnorm: 1.11 [ 7:34:15<17:00:36] +[titan] 2025-10-05 06:08:35,938 - root - INFO - step: 12325 loss: 2.4352 memory: 118.84GiB(85.28%) tps: 29,531 tflops: 409.69 mfu: 41.42% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1501 +[titan] 2025-10-05 06:08:35,938 - root - INFO - lr: 4.0659e-05 gnorm: 1.15 [ 7:34:26<17:00:25] +[titan] 2025-10-05 06:08:46,800 - root - INFO - step: 12330 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:08:46,800 - root - INFO - lr: 4.0652e-05 gnorm: 1.09 [ 7:34:37<17:00:13] +[titan] 2025-10-05 06:08:57,665 - root - INFO - step: 12335 loss: 2.3478 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 06:08:57,665 - root - INFO - lr: 4.0645e-05 gnorm: 1.09 [ 7:34:48<17:00:02] +[titan] 2025-10-05 06:09:08,538 - root - INFO - step: 12340 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0577 +[titan] 2025-10-05 06:09:08,539 - root - INFO - lr: 4.0637e-05 gnorm: 1.13 [ 7:34:59<16:59:50] +[titan] 2025-10-05 06:09:19,441 - root - INFO - step: 12345 loss: 2.3988 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1189 +[titan] 2025-10-05 06:09:19,441 - root - INFO - lr: 4.0630e-05 gnorm: 1.13 [ 7:35:10<16:59:39] +[titan] 2025-10-05 06:09:28,178 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:09:30,418 - root - INFO - step: 12350 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.17 mfu: 41.88% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0382 +[titan] 2025-10-05 06:09:30,418 - root - INFO - lr: 4.0623e-05 gnorm: 1.12 [ 7:35:21<16:59:28] +[titan] 2025-10-05 06:09:41,340 - root - INFO - step: 12355 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0087 +[titan] 2025-10-05 06:09:41,341 - root - INFO - lr: 4.0616e-05 gnorm: 1.16 [ 7:35:32<16:59:16] +[titan] 2025-10-05 06:09:52,209 - root - INFO - step: 12360 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0389 +[titan] 2025-10-05 06:09:52,209 - root - INFO - lr: 4.0608e-05 gnorm: 1.09 [ 7:35:42<16:59:05] +[titan] 2025-10-05 06:10:03,072 - root - INFO - step: 12365 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 06:10:03,073 - root - INFO - lr: 4.0601e-05 gnorm: 1.09 [ 7:35:53<16:58:53] +[titan] 2025-10-05 06:10:13,928 - root - INFO - step: 12370 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 06:10:13,928 - root - INFO - lr: 4.0594e-05 gnorm: 1.09 [ 7:36:04<16:58:42] +[titan] 2025-10-05 06:10:24,802 - root - INFO - step: 12375 loss: 2.3408 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:10:24,803 - root - INFO - lr: 4.0587e-05 gnorm: 1.10 [ 7:36:15<16:58:30] +[titan] 2025-10-05 06:10:35,777 - root - INFO - step: 12380 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 29,858 tflops: 414.24 mfu: 41.88% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:10:35,777 - root - INFO - lr: 4.0579e-05 gnorm: 1.08 [ 7:36:26<16:58:19] +[titan] 2025-10-05 06:10:46,648 - root - INFO - step: 12385 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:10:46,649 - root - INFO - lr: 4.0572e-05 gnorm: 1.13 [ 7:36:37<16:58:08] +[titan] 2025-10-05 06:10:57,506 - root - INFO - step: 12390 loss: 2.3730 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 06:10:57,506 - root - INFO - lr: 4.0565e-05 gnorm: 1.14 [ 7:36:48<16:57:56] +[titan] 2025-10-05 06:11:08,373 - root - INFO - step: 12395 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:11:08,373 - root - INFO - lr: 4.0558e-05 gnorm: 1.06 [ 7:36:59<16:57:45] +[titan] 2025-10-05 06:11:17,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:11:19,239 - root - INFO - step: 12400 loss: 2.3820 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 06:11:19,239 - root - INFO - lr: 4.0550e-05 gnorm: 1.12 [ 7:37:09<16:57:33] +[titan] 2025-10-05 06:11:30,093 - root - INFO - step: 12405 loss: 2.3346 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0597 +[titan] 2025-10-05 06:11:30,094 - root - INFO - lr: 4.0543e-05 gnorm: 1.09 [ 7:37:20<16:57:22] +[titan] 2025-10-05 06:11:41,037 - root - INFO - step: 12410 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0232 +[titan] 2025-10-05 06:11:41,037 - root - INFO - lr: 4.0536e-05 gnorm: 1.14 [ 7:37:31<16:57:10] +[titan] 2025-10-05 06:11:51,926 - root - INFO - step: 12415 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0550 +[titan] 2025-10-05 06:11:51,926 - root - INFO - lr: 4.0528e-05 gnorm: 1.18 [ 7:37:42<16:56:59] +[titan] 2025-10-05 06:12:02,805 - root - INFO - step: 12420 loss: 2.3265 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 06:12:02,806 - root - INFO - lr: 4.0521e-05 gnorm: 1.08 [ 7:37:53<16:56:47] +[titan] 2025-10-05 06:12:13,684 - root - INFO - step: 12425 loss: 2.3185 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0481 +[titan] 2025-10-05 06:12:13,684 - root - INFO - lr: 4.0514e-05 gnorm: 1.14 [ 7:38:04<16:56:36] +[titan] 2025-10-05 06:12:24,578 - root - INFO - step: 12430 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:12:24,578 - root - INFO - lr: 4.0507e-05 gnorm: 1.12 [ 7:38:15<16:56:25] +[titan] 2025-10-05 06:12:35,467 - root - INFO - step: 12435 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0765 +[titan] 2025-10-05 06:12:35,468 - root - INFO - lr: 4.0499e-05 gnorm: 1.10 [ 7:38:26<16:56:13] +[titan] 2025-10-05 06:12:46,337 - root - INFO - step: 12440 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:12:46,337 - root - INFO - lr: 4.0492e-05 gnorm: 1.07 [ 7:38:37<16:56:02] +[titan] 2025-10-05 06:12:57,242 - root - INFO - step: 12445 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 06:12:57,242 - root - INFO - lr: 4.0485e-05 gnorm: 1.08 [ 7:38:47<16:55:50] +[titan] 2025-10-05 06:13:05,952 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:13:08,132 - root - INFO - step: 12450 loss: 2.3232 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0520 +[titan] 2025-10-05 06:13:08,132 - root - INFO - lr: 4.0477e-05 gnorm: 1.09 [ 7:38:58<16:55:39] +[titan] 2025-10-05 06:13:19,019 - root - INFO - step: 12455 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:13:19,019 - root - INFO - lr: 4.0470e-05 gnorm: 1.08 [ 7:39:09<16:55:27] +[titan] 2025-10-05 06:13:29,895 - root - INFO - step: 12460 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:13:29,895 - root - INFO - lr: 4.0463e-05 gnorm: 1.12 [ 7:39:20<16:55:16] +[titan] 2025-10-05 06:13:40,820 - root - INFO - step: 12465 loss: 2.3135 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0425 +[titan] 2025-10-05 06:13:40,820 - root - INFO - lr: 4.0456e-05 gnorm: 1.11 [ 7:39:31<16:55:05] +[titan] 2025-10-05 06:13:51,710 - root - INFO - step: 12470 loss: 2.3792 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 06:13:51,710 - root - INFO - lr: 4.0448e-05 gnorm: 1.07 [ 7:39:42<16:54:53] +[titan] 2025-10-05 06:14:02,592 - root - INFO - step: 12475 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0962 +[titan] 2025-10-05 06:14:02,592 - root - INFO - lr: 4.0441e-05 gnorm: 1.11 [ 7:39:53<16:54:42] +[titan] 2025-10-05 06:14:13,496 - root - INFO - step: 12480 loss: 2.2332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9712 +[titan] 2025-10-05 06:14:13,497 - root - INFO - lr: 4.0434e-05 gnorm: 1.08 [ 7:40:04<16:54:30] +[titan] 2025-10-05 06:14:24,366 - root - INFO - step: 12485 loss: 2.3235 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 06:14:24,366 - root - INFO - lr: 4.0426e-05 gnorm: 1.11 [ 7:40:15<16:54:19] +[titan] 2025-10-05 06:14:35,268 - root - INFO - step: 12490 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0610 +[titan] 2025-10-05 06:14:35,269 - root - INFO - lr: 4.0419e-05 gnorm: 1.09 [ 7:40:25<16:54:07] +[titan] 2025-10-05 06:14:46,143 - root - INFO - step: 12495 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 06:14:46,144 - root - INFO - lr: 4.0412e-05 gnorm: 1.12 [ 7:40:36<16:53:56] +[titan] 2025-10-05 06:14:54,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:14:57,025 - root - INFO - step: 12500 loss: 2.2990 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0321 +[titan] 2025-10-05 06:14:57,025 - root - INFO - lr: 4.0404e-05 gnorm: 1.12 [ 7:40:47<16:53:44] +[titan] 2025-10-05 06:15:07,897 - root - INFO - step: 12505 loss: 2.3230 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 06:15:07,897 - root - INFO - lr: 4.0397e-05 gnorm: 1.14 [ 7:40:58<16:53:33] +[titan] 2025-10-05 06:15:18,787 - root - INFO - step: 12510 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0680 +[titan] 2025-10-05 06:15:18,787 - root - INFO - lr: 4.0390e-05 gnorm: 1.11 [ 7:41:09<16:53:22] +[titan] 2025-10-05 06:15:29,657 - root - INFO - step: 12515 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:15:29,657 - root - INFO - lr: 4.0383e-05 gnorm: 1.11 [ 7:41:20<16:53:10] +[titan] 2025-10-05 06:15:40,564 - root - INFO - step: 12520 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 06:15:40,564 - root - INFO - lr: 4.0375e-05 gnorm: 1.09 [ 7:41:31<16:52:59] +[titan] 2025-10-05 06:15:51,439 - root - INFO - step: 12525 loss: 2.2600 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 06:15:51,439 - root - INFO - lr: 4.0368e-05 gnorm: 1.12 [ 7:41:42<16:52:47] +[titan] 2025-10-05 06:16:02,309 - root - INFO - step: 12530 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2748 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 06:16:02,309 - root - INFO - lr: 4.0361e-05 gnorm: 1.12 [ 7:41:52<16:52:36] +[titan] 2025-10-05 06:16:13,212 - root - INFO - step: 12535 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:16:13,212 - root - INFO - lr: 4.0353e-05 gnorm: 1.11 [ 7:42:03<16:52:24] +[titan] 2025-10-05 06:16:24,126 - root - INFO - step: 12540 loss: 2.3391 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 06:16:24,127 - root - INFO - lr: 4.0346e-05 gnorm: 1.13 [ 7:42:14<16:52:13] +[titan] 2025-10-05 06:16:35,001 - root - INFO - step: 12545 loss: 2.3246 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 06:16:35,001 - root - INFO - lr: 4.0339e-05 gnorm: 1.12 [ 7:42:25<16:52:01] +[titan] 2025-10-05 06:16:43,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:16:45,891 - root - INFO - step: 12550 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0627 +[titan] 2025-10-05 06:16:45,891 - root - INFO - lr: 4.0331e-05 gnorm: 1.10 [ 7:42:36<16:51:50] +[titan] 2025-10-05 06:16:56,777 - root - INFO - step: 12555 loss: 2.2647 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 06:16:56,777 - root - INFO - lr: 4.0324e-05 gnorm: 1.14 [ 7:42:47<16:51:39] +[titan] 2025-10-05 06:17:07,666 - root - INFO - step: 12560 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 06:17:07,666 - root - INFO - lr: 4.0317e-05 gnorm: 1.12 [ 7:42:58<16:51:27] +[titan] 2025-10-05 06:17:18,556 - root - INFO - step: 12565 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:17:18,557 - root - INFO - lr: 4.0309e-05 gnorm: 1.11 [ 7:43:09<16:51:16] +[titan] 2025-10-05 06:17:29,439 - root - INFO - step: 12570 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 06:17:29,439 - root - INFO - lr: 4.0302e-05 gnorm: 1.11 [ 7:43:20<16:51:04] +[titan] 2025-10-05 06:17:40,372 - root - INFO - step: 12575 loss: 2.2819 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 06:17:40,372 - root - INFO - lr: 4.0295e-05 gnorm: 1.11 [ 7:43:31<16:50:53] +[titan] 2025-10-05 06:17:51,237 - root - INFO - step: 12580 loss: 2.3250 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0525 +[titan] 2025-10-05 06:17:51,238 - root - INFO - lr: 4.0287e-05 gnorm: 1.11 [ 7:43:41<16:50:42] +[titan] 2025-10-05 06:18:02,105 - root - INFO - step: 12585 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:18:02,105 - root - INFO - lr: 4.0280e-05 gnorm: 1.09 [ 7:43:52<16:50:30] +[titan] 2025-10-05 06:18:12,984 - root - INFO - step: 12590 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.0880 +[titan] 2025-10-05 06:18:12,984 - root - INFO - lr: 4.0273e-05 gnorm: 1.17 [ 7:44:03<16:50:19] +[titan] 2025-10-05 06:18:23,839 - root - INFO - step: 12595 loss: 2.3742 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0956 +[titan] 2025-10-05 06:18:23,839 - root - INFO - lr: 4.0265e-05 gnorm: 1.11 [ 7:44:14<16:50:07] +[titan] 2025-10-05 06:18:32,532 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:18:34,727 - root - INFO - step: 12600 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9762 +[titan] 2025-10-05 06:18:34,727 - root - INFO - lr: 4.0258e-05 gnorm: 1.14 [ 7:44:25<16:49:56] +[titan] 2025-10-05 06:18:45,664 - root - INFO - step: 12605 loss: 2.3207 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0497 +[titan] 2025-10-05 06:18:45,664 - root - INFO - lr: 4.0250e-05 gnorm: 1.17 [ 7:44:36<16:49:44] +[titan] 2025-10-05 06:18:56,552 - root - INFO - step: 12610 loss: 2.3981 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 06:18:56,552 - root - INFO - lr: 4.0243e-05 gnorm: 1.13 [ 7:44:47<16:49:33] +[titan] 2025-10-05 06:19:07,421 - root - INFO - step: 12615 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 06:19:07,421 - root - INFO - lr: 4.0236e-05 gnorm: 1.09 [ 7:44:58<16:49:21] +[titan] 2025-10-05 06:19:18,306 - root - INFO - step: 12620 loss: 2.3150 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 06:19:18,306 - root - INFO - lr: 4.0228e-05 gnorm: 1.12 [ 7:45:08<16:49:10] +[titan] 2025-10-05 06:19:29,188 - root - INFO - step: 12625 loss: 2.3979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1165 +[titan] 2025-10-05 06:19:29,189 - root - INFO - lr: 4.0221e-05 gnorm: 1.12 [ 7:45:19<16:48:59] +[titan] 2025-10-05 06:19:40,105 - root - INFO - step: 12630 loss: 2.2606 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9964 +[titan] 2025-10-05 06:19:40,105 - root - INFO - lr: 4.0214e-05 gnorm: 1.14 [ 7:45:30<16:48:47] +[titan] 2025-10-05 06:19:50,986 - root - INFO - step: 12635 loss: 2.3546 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:19:50,986 - root - INFO - lr: 4.0206e-05 gnorm: 1.11 [ 7:45:41<16:48:36] +[titan] 2025-10-05 06:20:01,908 - root - INFO - step: 12640 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:20:01,908 - root - INFO - lr: 4.0199e-05 gnorm: 1.11 [ 7:45:52<16:48:24] +[titan] 2025-10-05 06:20:12,799 - root - INFO - step: 12645 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0332 +[titan] 2025-10-05 06:20:12,799 - root - INFO - lr: 4.0192e-05 gnorm: 1.10 [ 7:46:03<16:48:13] +[titan] 2025-10-05 06:20:21,492 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:20:23,685 - root - INFO - step: 12650 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 06:20:23,685 - root - INFO - lr: 4.0184e-05 gnorm: 1.11 [ 7:46:14<16:48:02] +[titan] 2025-10-05 06:20:34,581 - root - INFO - step: 12655 loss: 2.2611 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 06:20:34,581 - root - INFO - lr: 4.0177e-05 gnorm: 1.08 [ 7:46:25<16:47:50] +[titan] 2025-10-05 06:20:45,479 - root - INFO - step: 12660 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:20:45,480 - root - INFO - lr: 4.0169e-05 gnorm: 1.09 [ 7:46:36<16:47:39] +[titan] 2025-10-05 06:20:56,352 - root - INFO - step: 12665 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:20:56,353 - root - INFO - lr: 4.0162e-05 gnorm: 1.10 [ 7:46:47<16:47:27] +[titan] 2025-10-05 06:21:07,226 - root - INFO - step: 12670 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9534 +[titan] 2025-10-05 06:21:07,226 - root - INFO - lr: 4.0155e-05 gnorm: 1.08 [ 7:46:57<16:47:16] +[titan] 2025-10-05 06:21:18,106 - root - INFO - step: 12675 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9903 +[titan] 2025-10-05 06:21:18,107 - root - INFO - lr: 4.0147e-05 gnorm: 1.16 [ 7:47:08<16:47:04] +[titan] 2025-10-05 06:21:28,978 - root - INFO - step: 12680 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 06:21:28,978 - root - INFO - lr: 4.0140e-05 gnorm: 1.11 [ 7:47:19<16:46:53] +[titan] 2025-10-05 06:21:39,844 - root - INFO - step: 12685 loss: 2.3348 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0622 +[titan] 2025-10-05 06:21:39,844 - root - INFO - lr: 4.0133e-05 gnorm: 1.13 [ 7:47:30<16:46:41] +[titan] 2025-10-05 06:21:50,731 - root - INFO - step: 12690 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 06:21:50,731 - root - INFO - lr: 4.0125e-05 gnorm: 1.14 [ 7:47:41<16:46:30] +[titan] 2025-10-05 06:22:01,611 - root - INFO - step: 12695 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 06:22:01,612 - root - INFO - lr: 4.0118e-05 gnorm: 1.10 [ 7:47:52<16:46:19] +[titan] 2025-10-05 06:22:10,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:22:12,500 - root - INFO - step: 12700 loss: 2.3396 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0653 +[titan] 2025-10-05 06:22:12,501 - root - INFO - lr: 4.0110e-05 gnorm: 1.11 [ 7:48:03<16:46:07] +[titan] 2025-10-05 06:22:23,372 - root - INFO - step: 12705 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0153 +[titan] 2025-10-05 06:22:23,372 - root - INFO - lr: 4.0103e-05 gnorm: 1.11 [ 7:48:14<16:45:56] +[titan] 2025-10-05 06:22:34,241 - root - INFO - step: 12710 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:22:34,241 - root - INFO - lr: 4.0096e-05 gnorm: 1.10 [ 7:48:24<16:45:44] +[titan] 2025-10-05 06:22:45,141 - root - INFO - step: 12715 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0103 +[titan] 2025-10-05 06:22:45,141 - root - INFO - lr: 4.0088e-05 gnorm: 1.14 [ 7:48:35<16:45:33] +[titan] 2025-10-05 06:22:56,018 - root - INFO - step: 12720 loss: 2.2452 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 06:22:56,018 - root - INFO - lr: 4.0081e-05 gnorm: 1.10 [ 7:48:46<16:45:21] +[titan] 2025-10-05 06:23:06,904 - root - INFO - step: 12725 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0456 +[titan] 2025-10-05 06:23:06,904 - root - INFO - lr: 4.0073e-05 gnorm: 1.10 [ 7:48:57<16:45:10] +[titan] 2025-10-05 06:23:17,777 - root - INFO - step: 12730 loss: 2.3547 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0804 +[titan] 2025-10-05 06:23:17,778 - root - INFO - lr: 4.0066e-05 gnorm: 1.09 [ 7:49:08<16:44:59] +[titan] 2025-10-05 06:23:28,700 - root - INFO - step: 12735 loss: 2.4579 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1714 +[titan] 2025-10-05 06:23:28,700 - root - INFO - lr: 4.0059e-05 gnorm: 1.12 [ 7:49:19<16:44:47] +[titan] 2025-10-05 06:23:39,577 - root - INFO - step: 12740 loss: 2.2807 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 06:23:39,577 - root - INFO - lr: 4.0051e-05 gnorm: 1.08 [ 7:49:30<16:44:36] +[titan] 2025-10-05 06:23:50,466 - root - INFO - step: 12745 loss: 2.2580 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9955 +[titan] 2025-10-05 06:23:50,467 - root - INFO - lr: 4.0044e-05 gnorm: 1.13 [ 7:49:41<16:44:24] +[titan] 2025-10-05 06:23:59,152 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:24:01,332 - root - INFO - step: 12750 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:24:01,332 - root - INFO - lr: 4.0036e-05 gnorm: 1.16 [ 7:49:51<16:44:13] +[titan] 2025-10-05 06:24:12,211 - root - INFO - step: 12755 loss: 2.3122 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 06:24:12,211 - root - INFO - lr: 4.0029e-05 gnorm: 1.10 [ 7:50:02<16:44:02] +[titan] 2025-10-05 06:24:23,070 - root - INFO - step: 12760 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 06:24:23,070 - root - INFO - lr: 4.0022e-05 gnorm: 1.11 [ 7:50:13<16:43:50] +[titan] 2025-10-05 06:24:33,960 - root - INFO - step: 12765 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0308 +[titan] 2025-10-05 06:24:33,960 - root - INFO - lr: 4.0014e-05 gnorm: 1.11 [ 7:50:24<16:43:39] +[titan] 2025-10-05 06:24:44,855 - root - INFO - step: 12770 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0202 +[titan] 2025-10-05 06:24:44,855 - root - INFO - lr: 4.0007e-05 gnorm: 1.10 [ 7:50:35<16:43:27] +[titan] 2025-10-05 06:24:55,732 - root - INFO - step: 12775 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 06:24:55,732 - root - INFO - lr: 3.9999e-05 gnorm: 1.13 [ 7:50:46<16:43:16] +[titan] 2025-10-05 06:25:06,578 - root - INFO - step: 12780 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:06,578 - root - INFO - lr: 3.9992e-05 gnorm: 1.10 [ 7:50:57<16:43:04] +[titan] 2025-10-05 06:25:17,446 - root - INFO - step: 12785 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0703 +[titan] 2025-10-05 06:25:17,446 - root - INFO - lr: 3.9984e-05 gnorm: 1.15 [ 7:51:08<16:42:53] +[titan] 2025-10-05 06:25:28,322 - root - INFO - step: 12790 loss: 2.1995 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 06:25:28,322 - root - INFO - lr: 3.9977e-05 gnorm: 1.08 [ 7:51:18<16:42:41] +[titan] 2025-10-05 06:25:39,196 - root - INFO - step: 12795 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0818 +[titan] 2025-10-05 06:25:39,196 - root - INFO - lr: 3.9970e-05 gnorm: 1.11 [ 7:51:29<16:42:30] +[titan] 2025-10-05 06:25:48,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:25:50,190 - root - INFO - step: 12800 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:50,191 - root - INFO - lr: 3.9962e-05 gnorm: 1.12 [ 7:51:40<16:42:19] +[titan] 2025-10-05 06:25:50,367 - root - INFO - Dumping profiler traces at step 12800 +[titan] 2025-10-05 06:25:50,405 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:26:01,279 - root - INFO - step: 12805 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 29,551 tflops: 409.97 mfu: 41.45% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 06:26:01,280 - root - INFO - lr: 3.9955e-05 gnorm: 1.13 [ 7:51:51<16:42:08] +[titan] 2025-10-05 06:26:12,154 - root - INFO - step: 12810 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:26:12,154 - root - INFO - lr: 3.9947e-05 gnorm: 1.08 [ 7:52:02<16:41:56] +[titan] 2025-10-05 06:26:23,032 - root - INFO - step: 12815 loss: 2.3306 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0579 +[titan] 2025-10-05 06:26:23,033 - root - INFO - lr: 3.9940e-05 gnorm: 1.06 [ 7:52:13<16:41:45] +[titan] 2025-10-05 06:26:33,940 - root - INFO - step: 12820 loss: 2.3775 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0989 +[titan] 2025-10-05 06:26:33,940 - root - INFO - lr: 3.9932e-05 gnorm: 1.15 [ 7:52:24<16:41:34] +[titan] 2025-10-05 06:26:44,836 - root - INFO - step: 12825 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0008 +[titan] 2025-10-05 06:26:44,836 - root - INFO - lr: 3.9925e-05 gnorm: 1.05 [ 7:52:35<16:41:22] +[titan] 2025-10-05 06:26:55,799 - root - INFO - step: 12830 loss: 2.3367 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.69 mfu: 41.93% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0630 +[titan] 2025-10-05 06:26:55,799 - root - INFO - lr: 3.9918e-05 gnorm: 1.14 [ 7:52:46<16:41:11] +[titan] 2025-10-05 06:27:06,678 - root - INFO - step: 12835 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 06:27:06,678 - root - INFO - lr: 3.9910e-05 gnorm: 1.07 [ 7:52:57<16:40:59] +[titan] 2025-10-05 06:27:17,590 - root - INFO - step: 12840 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:27:17,590 - root - INFO - lr: 3.9903e-05 gnorm: 1.10 [ 7:53:08<16:40:48] +[titan] 2025-10-05 06:27:28,495 - root - INFO - step: 12845 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0092 +[titan] 2025-10-05 06:27:28,496 - root - INFO - lr: 3.9895e-05 gnorm: 1.10 [ 7:53:19<16:40:37] +[titan] 2025-10-05 06:27:37,188 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:27:39,368 - root - INFO - step: 12850 loss: 2.2958 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0276 +[titan] 2025-10-05 06:27:39,368 - root - INFO - lr: 3.9888e-05 gnorm: 1.09 [ 7:53:30<16:40:25] +[titan] 2025-10-05 06:27:50,338 - root - INFO - step: 12855 loss: 2.2825 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0128 +[titan] 2025-10-05 06:27:50,339 - root - INFO - lr: 3.9880e-05 gnorm: 1.16 [ 7:53:40<16:40:14] +[titan] 2025-10-05 06:28:01,245 - root - INFO - step: 12860 loss: 2.3056 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:28:01,245 - root - INFO - lr: 3.9873e-05 gnorm: 1.08 [ 7:53:51<16:40:03] +[titan] 2025-10-05 06:28:12,135 - root - INFO - step: 12865 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9912 +[titan] 2025-10-05 06:28:12,135 - root - INFO - lr: 3.9865e-05 gnorm: 1.10 [ 7:54:02<16:39:51] +[titan] 2025-10-05 06:28:23,005 - root - INFO - step: 12870 loss: 2.3501 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 06:28:23,005 - root - INFO - lr: 3.9858e-05 gnorm: 1.07 [ 7:54:13<16:39:40] +[titan] 2025-10-05 06:28:33,877 - root - INFO - step: 12875 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0229 +[titan] 2025-10-05 06:28:33,877 - root - INFO - lr: 3.9850e-05 gnorm: 1.13 [ 7:54:24<16:39:28] +[titan] 2025-10-05 06:28:44,761 - root - INFO - step: 12880 loss: 2.3117 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:28:44,761 - root - INFO - lr: 3.9843e-05 gnorm: 1.15 [ 7:54:35<16:39:17] +[titan] 2025-10-05 06:28:55,685 - root - INFO - step: 12885 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:28:55,685 - root - INFO - lr: 3.9836e-05 gnorm: 1.11 [ 7:54:46<16:39:06] +[titan] 2025-10-05 06:29:06,556 - root - INFO - step: 12890 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:29:06,556 - root - INFO - lr: 3.9828e-05 gnorm: 1.12 [ 7:54:57<16:38:54] +[titan] 2025-10-05 06:29:17,467 - root - INFO - step: 12895 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:29:17,467 - root - INFO - lr: 3.9821e-05 gnorm: 1.11 [ 7:55:08<16:38:43] +[titan] 2025-10-05 06:29:26,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:29:28,342 - root - INFO - step: 12900 loss: 2.3579 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0826 +[titan] 2025-10-05 06:29:28,342 - root - INFO - lr: 3.9813e-05 gnorm: 1.11 [ 7:55:18<16:38:31] +[titan] 2025-10-05 06:29:39,206 - root - INFO - step: 12905 loss: 2.2414 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 06:29:39,206 - root - INFO - lr: 3.9806e-05 gnorm: 1.08 [ 7:55:29<16:38:20] +[titan] 2025-10-05 06:29:50,114 - root - INFO - step: 12910 loss: 2.2702 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:29:50,114 - root - INFO - lr: 3.9798e-05 gnorm: 1.13 [ 7:55:40<16:38:09] +[titan] 2025-10-05 06:30:00,993 - root - INFO - step: 12915 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0072 +[titan] 2025-10-05 06:30:00,993 - root - INFO - lr: 3.9791e-05 gnorm: 1.07 [ 7:55:51<16:37:57] +[titan] 2025-10-05 06:30:11,897 - root - INFO - step: 12920 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0307 +[titan] 2025-10-05 06:30:11,898 - root - INFO - lr: 3.9783e-05 gnorm: 1.04 [ 7:56:02<16:37:46] +[titan] 2025-10-05 06:30:22,817 - root - INFO - step: 12925 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0581 +[titan] 2025-10-05 06:30:22,817 - root - INFO - lr: 3.9776e-05 gnorm: 1.13 [ 7:56:13<16:37:35] +[titan] 2025-10-05 06:30:33,737 - root - INFO - step: 12930 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:30:33,737 - root - INFO - lr: 3.9768e-05 gnorm: 1.13 [ 7:56:24<16:37:23] +[titan] 2025-10-05 06:30:44,627 - root - INFO - step: 12935 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:30:44,627 - root - INFO - lr: 3.9761e-05 gnorm: 1.10 [ 7:56:35<16:37:12] +[titan] 2025-10-05 06:30:55,585 - root - INFO - step: 12940 loss: 2.3356 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 06:30:55,586 - root - INFO - lr: 3.9753e-05 gnorm: 1.11 [ 7:56:46<16:37:01] +[titan] 2025-10-05 06:31:06,518 - root - INFO - step: 12945 loss: 2.2859 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:31:06,518 - root - INFO - lr: 3.9746e-05 gnorm: 1.10 [ 7:56:57<16:36:49] +[titan] 2025-10-05 06:31:15,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:31:17,447 - root - INFO - step: 12950 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0289 +[titan] 2025-10-05 06:31:17,447 - root - INFO - lr: 3.9738e-05 gnorm: 1.11 [ 7:57:08<16:36:38] +[titan] 2025-10-05 06:31:28,381 - root - INFO - step: 12955 loss: 2.3005 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 06:31:28,382 - root - INFO - lr: 3.9731e-05 gnorm: 1.07 [ 7:57:19<16:36:27] +[titan] 2025-10-05 06:31:39,314 - root - INFO - step: 12960 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0616 +[titan] 2025-10-05 06:31:39,314 - root - INFO - lr: 3.9723e-05 gnorm: 1.13 [ 7:57:29<16:36:15] +[titan] 2025-10-05 06:31:50,231 - root - INFO - step: 12965 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 06:31:50,231 - root - INFO - lr: 3.9716e-05 gnorm: 1.14 [ 7:57:40<16:36:04] +[titan] 2025-10-05 06:32:01,132 - root - INFO - step: 12970 loss: 2.3312 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:32:01,132 - root - INFO - lr: 3.9708e-05 gnorm: 1.16 [ 7:57:51<16:35:53] +[titan] 2025-10-05 06:32:12,023 - root - INFO - step: 12975 loss: 2.2497 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 06:32:12,023 - root - INFO - lr: 3.9701e-05 gnorm: 1.13 [ 7:58:02<16:35:41] +[titan] 2025-10-05 06:32:22,921 - root - INFO - step: 12980 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0314 +[titan] 2025-10-05 06:32:22,922 - root - INFO - lr: 3.9693e-05 gnorm: 1.08 [ 7:58:13<16:35:30] +[titan] 2025-10-05 06:32:33,791 - root - INFO - step: 12985 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 06:32:33,791 - root - INFO - lr: 3.9686e-05 gnorm: 1.06 [ 7:58:24<16:35:18] +[titan] 2025-10-05 06:32:44,706 - root - INFO - step: 12990 loss: 2.3628 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 06:32:44,706 - root - INFO - lr: 3.9678e-05 gnorm: 1.12 [ 7:58:35<16:35:07] +[titan] 2025-10-05 06:32:55,609 - root - INFO - step: 12995 loss: 2.2830 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:32:55,609 - root - INFO - lr: 3.9671e-05 gnorm: 1.13 [ 7:58:46<16:34:56] +[titan] 2025-10-05 06:33:04,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:33:06,487 - root - INFO - step: 13000 loss: 2.2887 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:33:06,487 - root - INFO - lr: 3.9663e-05 gnorm: 1.09 [ 7:58:57<16:34:44] +[titan] 2025-10-05 06:33:17,365 - root - INFO - step: 13005 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9733 +[titan] 2025-10-05 06:33:17,365 - root - INFO - lr: 3.9656e-05 gnorm: 1.09 [ 7:59:07<16:34:33] +[titan] 2025-10-05 06:33:28,255 - root - INFO - step: 13010 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0677 +[titan] 2025-10-05 06:33:28,255 - root - INFO - lr: 3.9648e-05 gnorm: 1.10 [ 7:59:18<16:34:21] +[titan] 2025-10-05 06:33:39,107 - root - INFO - step: 13015 loss: 2.3870 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 06:33:39,107 - root - INFO - lr: 3.9641e-05 gnorm: 1.14 [ 7:59:29<16:34:10] +[titan] 2025-10-05 06:33:49,999 - root - INFO - step: 13020 loss: 2.2362 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9754 +[titan] 2025-10-05 06:33:49,999 - root - INFO - lr: 3.9633e-05 gnorm: 1.04 [ 7:59:40<16:33:59] +[titan] 2025-10-05 06:34:00,906 - root - INFO - step: 13025 loss: 2.3058 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0372 +[titan] 2025-10-05 06:34:00,906 - root - INFO - lr: 3.9626e-05 gnorm: 1.10 [ 7:59:51<16:33:47] +[titan] 2025-10-05 06:34:11,756 - root - INFO - step: 13030 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9901 +[titan] 2025-10-05 06:34:11,756 - root - INFO - lr: 3.9618e-05 gnorm: 1.10 [ 8:00:02<16:33:36] +[titan] 2025-10-05 06:34:22,620 - root - INFO - step: 13035 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:34:22,620 - root - INFO - lr: 3.9611e-05 gnorm: 1.07 [ 8:00:13<16:33:24] +[titan] 2025-10-05 06:34:33,499 - root - INFO - step: 13040 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:34:33,499 - root - INFO - lr: 3.9603e-05 gnorm: 1.10 [ 8:00:24<16:33:13] +[titan] 2025-10-05 06:34:44,365 - root - INFO - step: 13045 loss: 2.3062 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 06:34:44,365 - root - INFO - lr: 3.9596e-05 gnorm: 1.12 [ 8:00:34<16:33:01] +[titan] 2025-10-05 06:34:53,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:34:55,288 - root - INFO - step: 13050 loss: 2.2984 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:34:55,289 - root - INFO - lr: 3.9588e-05 gnorm: 1.10 [ 8:00:45<16:32:50] +[titan] 2025-10-05 06:35:06,196 - root - INFO - step: 13055 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9765 +[titan] 2025-10-05 06:35:06,196 - root - INFO - lr: 3.9581e-05 gnorm: 1.05 [ 8:00:56<16:32:39] +[titan] 2025-10-05 06:35:17,080 - root - INFO - step: 13060 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 06:35:17,080 - root - INFO - lr: 3.9573e-05 gnorm: 1.10 [ 8:01:07<16:32:27] +[titan] 2025-10-05 06:35:27,969 - root - INFO - step: 13065 loss: 2.2499 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:35:27,970 - root - INFO - lr: 3.9566e-05 gnorm: 1.09 [ 8:01:18<16:32:16] +[titan] 2025-10-05 06:35:38,866 - root - INFO - step: 13070 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0717 +[titan] 2025-10-05 06:35:38,867 - root - INFO - lr: 3.9558e-05 gnorm: 1.12 [ 8:01:29<16:32:05] +[titan] 2025-10-05 06:35:49,752 - root - INFO - step: 13075 loss: 2.3177 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0458 +[titan] 2025-10-05 06:35:49,752 - root - INFO - lr: 3.9551e-05 gnorm: 1.11 [ 8:01:40<16:31:53] +[titan] 2025-10-05 06:36:00,668 - root - INFO - step: 13080 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 06:36:00,669 - root - INFO - lr: 3.9543e-05 gnorm: 1.07 [ 8:01:51<16:31:42] +[titan] 2025-10-05 06:36:11,580 - root - INFO - step: 13085 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 06:36:11,580 - root - INFO - lr: 3.9535e-05 gnorm: 1.11 [ 8:02:02<16:31:31] +[titan] 2025-10-05 06:36:22,465 - root - INFO - step: 13090 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:36:22,465 - root - INFO - lr: 3.9528e-05 gnorm: 1.09 [ 8:02:13<16:31:19] +[titan] 2025-10-05 06:36:33,326 - root - INFO - step: 13095 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9630 +[titan] 2025-10-05 06:36:33,326 - root - INFO - lr: 3.9520e-05 gnorm: 1.10 [ 8:02:23<16:31:08] +[titan] 2025-10-05 06:36:41,994 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:36:44,174 - root - INFO - step: 13100 loss: 2.3105 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:36:44,174 - root - INFO - lr: 3.9513e-05 gnorm: 1.14 [ 8:02:34<16:30:56] +[titan] 2025-10-05 06:36:55,075 - root - INFO - step: 13105 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:36:55,075 - root - INFO - lr: 3.9505e-05 gnorm: 1.18 [ 8:02:45<16:30:45] +[titan] 2025-10-05 06:37:05,918 - root - INFO - step: 13110 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 06:37:05,918 - root - INFO - lr: 3.9498e-05 gnorm: 1.08 [ 8:02:56<16:30:33] +[titan] 2025-10-05 06:37:16,786 - root - INFO - step: 13115 loss: 2.2582 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 06:37:16,786 - root - INFO - lr: 3.9490e-05 gnorm: 1.10 [ 8:03:07<16:30:22] +[titan] 2025-10-05 06:37:27,685 - root - INFO - step: 13120 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 06:37:27,685 - root - INFO - lr: 3.9483e-05 gnorm: 1.11 [ 8:03:18<16:30:11] +[titan] 2025-10-05 06:37:38,554 - root - INFO - step: 13125 loss: 2.3124 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 06:37:38,554 - root - INFO - lr: 3.9475e-05 gnorm: 1.10 [ 8:03:29<16:29:59] +[titan] 2025-10-05 06:37:49,418 - root - INFO - step: 13130 loss: 2.3195 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:37:49,418 - root - INFO - lr: 3.9468e-05 gnorm: 1.09 [ 8:03:40<16:29:48] +[titan] 2025-10-05 06:38:00,337 - root - INFO - step: 13135 loss: 2.2981 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:38:00,337 - root - INFO - lr: 3.9460e-05 gnorm: 1.09 [ 8:03:50<16:29:36] +[titan] 2025-10-05 06:38:11,199 - root - INFO - step: 13140 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9924 +[titan] 2025-10-05 06:38:11,200 - root - INFO - lr: 3.9452e-05 gnorm: 1.06 [ 8:04:01<16:29:25] +[titan] 2025-10-05 06:38:22,080 - root - INFO - step: 13145 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2759 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 06:38:22,080 - root - INFO - lr: 3.9445e-05 gnorm: 1.08 [ 8:04:12<16:29:14] +[titan] 2025-10-05 06:38:30,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:38:32,990 - root - INFO - step: 13150 loss: 2.2897 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0219 +[titan] 2025-10-05 06:38:32,990 - root - INFO - lr: 3.9437e-05 gnorm: 1.12 [ 8:04:23<16:29:02] +[titan] 2025-10-05 06:38:43,859 - root - INFO - step: 13155 loss: 2.2817 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0147 +[titan] 2025-10-05 06:38:43,859 - root - INFO - lr: 3.9430e-05 gnorm: 1.08 [ 8:04:34<16:28:51] +[titan] 2025-10-05 06:38:54,735 - root - INFO - step: 13160 loss: 2.3131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0428 +[titan] 2025-10-05 06:38:54,736 - root - INFO - lr: 3.9422e-05 gnorm: 1.11 [ 8:04:45<16:28:39] +[titan] 2025-10-05 06:39:05,628 - root - INFO - step: 13165 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 06:39:05,629 - root - INFO - lr: 3.9415e-05 gnorm: 1.10 [ 8:04:56<16:28:28] +[titan] 2025-10-05 06:39:16,489 - root - INFO - step: 13170 loss: 2.3292 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:39:16,489 - root - INFO - lr: 3.9407e-05 gnorm: 1.11 [ 8:05:07<16:28:17] +[titan] 2025-10-05 06:39:27,377 - root - INFO - step: 13175 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9764 +[titan] 2025-10-05 06:39:27,377 - root - INFO - lr: 3.9399e-05 gnorm: 1.07 [ 8:05:17<16:28:05] +[titan] 2025-10-05 06:39:38,260 - root - INFO - step: 13180 loss: 2.2929 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0261 +[titan] 2025-10-05 06:39:38,260 - root - INFO - lr: 3.9392e-05 gnorm: 1.18 [ 8:05:28<16:27:54] +[titan] 2025-10-05 06:39:49,151 - root - INFO - step: 13185 loss: 2.2880 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0212 +[titan] 2025-10-05 06:39:49,152 - root - INFO - lr: 3.9384e-05 gnorm: 1.13 [ 8:05:39<16:27:42] +[titan] 2025-10-05 06:40:00,050 - root - INFO - step: 13190 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 06:40:00,050 - root - INFO - lr: 3.9377e-05 gnorm: 1.11 [ 8:05:50<16:27:31] +[titan] 2025-10-05 06:40:10,934 - root - INFO - step: 13195 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9939 +[titan] 2025-10-05 06:40:10,934 - root - INFO - lr: 3.9369e-05 gnorm: 1.10 [ 8:06:01<16:27:20] +[titan] 2025-10-05 06:40:19,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:40:21,820 - root - INFO - step: 13200 loss: 2.2675 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0021 +[titan] 2025-10-05 06:40:21,820 - root - INFO - lr: 3.9362e-05 gnorm: 1.13 [ 8:06:12<16:27:08] +[titan] 2025-10-05 06:40:32,683 - root - INFO - step: 13205 loss: 2.3004 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:40:32,683 - root - INFO - lr: 3.9354e-05 gnorm: 1.11 [ 8:06:23<16:26:57] +[titan] 2025-10-05 06:40:43,552 - root - INFO - step: 13210 loss: 2.3321 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0596 +[titan] 2025-10-05 06:40:43,553 - root - INFO - lr: 3.9346e-05 gnorm: 1.09 [ 8:06:34<16:26:46] +[titan] 2025-10-05 06:40:54,441 - root - INFO - step: 13215 loss: 2.3746 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 06:40:54,441 - root - INFO - lr: 3.9339e-05 gnorm: 1.09 [ 8:06:45<16:26:34] +[titan] 2025-10-05 06:41:05,315 - root - INFO - step: 13220 loss: 2.3394 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0660 +[titan] 2025-10-05 06:41:05,315 - root - INFO - lr: 3.9331e-05 gnorm: 1.13 [ 8:06:55<16:26:23] +[titan] 2025-10-05 06:41:16,174 - root - INFO - step: 13225 loss: 2.2522 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 06:41:16,174 - root - INFO - lr: 3.9324e-05 gnorm: 1.10 [ 8:07:06<16:26:11] +[titan] 2025-10-05 06:41:27,031 - root - INFO - step: 13230 loss: 2.2903 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:41:27,031 - root - INFO - lr: 3.9316e-05 gnorm: 1.10 [ 8:07:17<16:26:00] +[titan] 2025-10-05 06:41:37,890 - root - INFO - step: 13235 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0078 +[titan] 2025-10-05 06:41:37,890 - root - INFO - lr: 3.9308e-05 gnorm: 1.09 [ 8:07:28<16:25:48] +[titan] 2025-10-05 06:41:48,764 - root - INFO - step: 13240 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 06:41:48,764 - root - INFO - lr: 3.9301e-05 gnorm: 1.10 [ 8:07:39<16:25:37] +[titan] 2025-10-05 06:41:59,671 - root - INFO - step: 13245 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0046 +[titan] 2025-10-05 06:41:59,672 - root - INFO - lr: 3.9293e-05 gnorm: 1.13 [ 8:07:50<16:25:26] +[titan] 2025-10-05 06:42:08,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:42:10,557 - root - INFO - step: 13250 loss: 2.3326 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0599 +[titan] 2025-10-05 06:42:10,557 - root - INFO - lr: 3.9286e-05 gnorm: 1.14 [ 8:08:01<16:25:14] +[titan] 2025-10-05 06:42:21,421 - root - INFO - step: 13255 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 06:42:21,421 - root - INFO - lr: 3.9278e-05 gnorm: 1.14 [ 8:08:12<16:25:03] +[titan] 2025-10-05 06:42:32,317 - root - INFO - step: 13260 loss: 2.2022 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9453 +[titan] 2025-10-05 06:42:32,317 - root - INFO - lr: 3.9270e-05 gnorm: 1.07 [ 8:08:22<16:24:51] +[titan] 2025-10-05 06:42:43,197 - root - INFO - step: 13265 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 06:42:43,198 - root - INFO - lr: 3.9263e-05 gnorm: 1.11 [ 8:08:33<16:24:40] +[titan] 2025-10-05 06:42:54,090 - root - INFO - step: 13270 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 06:42:54,091 - root - INFO - lr: 3.9255e-05 gnorm: 1.10 [ 8:08:44<16:24:29] +[titan] 2025-10-05 06:43:05,001 - root - INFO - step: 13275 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 06:43:05,001 - root - INFO - lr: 3.9248e-05 gnorm: 1.10 [ 8:08:55<16:24:17] +[titan] 2025-10-05 06:43:15,880 - root - INFO - step: 13280 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:15,880 - root - INFO - lr: 3.9240e-05 gnorm: 1.07 [ 8:09:06<16:24:06] +[titan] 2025-10-05 06:43:26,737 - root - INFO - step: 13285 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:26,737 - root - INFO - lr: 3.9232e-05 gnorm: 1.11 [ 8:09:17<16:23:55] +[titan] 2025-10-05 06:43:37,602 - root - INFO - step: 13290 loss: 2.3086 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:43:37,602 - root - INFO - lr: 3.9225e-05 gnorm: 1.10 [ 8:09:28<16:23:43] +[titan] 2025-10-05 06:43:48,473 - root - INFO - step: 13295 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 06:43:48,473 - root - INFO - lr: 3.9217e-05 gnorm: 1.11 [ 8:09:39<16:23:32] +[titan] 2025-10-05 06:43:57,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:43:59,317 - root - INFO - step: 13300 loss: 2.3797 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 06:43:59,318 - root - INFO - lr: 3.9209e-05 gnorm: 1.11 [ 8:09:49<16:23:20] +[titan] 2025-10-05 06:44:10,186 - root - INFO - step: 13305 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0141 +[titan] 2025-10-05 06:44:10,186 - root - INFO - lr: 3.9202e-05 gnorm: 1.09 [ 8:10:00<16:23:09] +[titan] 2025-10-05 06:44:21,180 - root - INFO - step: 13310 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0337 +[titan] 2025-10-05 06:44:21,180 - root - INFO - lr: 3.9194e-05 gnorm: 1.09 [ 8:10:11<16:22:58] +[titan] 2025-10-05 06:44:25,700 - root - INFO - Dumping profiler traces at step 13312 +[titan] 2025-10-05 06:44:25,736 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:44:32,265 - root - INFO - step: 13315 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 29,563 tflops: 410.13 mfu: 41.47% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 06:44:32,265 - root - INFO - lr: 3.9187e-05 gnorm: 1.04 [ 8:10:22<16:22:47] +[titan] 2025-10-05 06:44:43,144 - root - INFO - step: 13320 loss: 2.3112 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 06:44:43,144 - root - INFO - lr: 3.9179e-05 gnorm: 1.13 [ 8:10:33<16:22:35] +[titan] 2025-10-05 06:44:54,006 - root - INFO - step: 13325 loss: 2.3530 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0783 +[titan] 2025-10-05 06:44:54,006 - root - INFO - lr: 3.9171e-05 gnorm: 1.06 [ 8:10:44<16:22:24] +[titan] 2025-10-05 06:45:04,897 - root - INFO - step: 13330 loss: 2.3671 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 06:45:04,897 - root - INFO - lr: 3.9164e-05 gnorm: 1.11 [ 8:10:55<16:22:13] +[titan] 2025-10-05 06:45:15,754 - root - INFO - step: 13335 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0644 +[titan] 2025-10-05 06:45:15,754 - root - INFO - lr: 3.9156e-05 gnorm: 1.16 [ 8:11:06<16:22:01] +[titan] 2025-10-05 06:45:26,632 - root - INFO - step: 13340 loss: 2.2623 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:45:26,632 - root - INFO - lr: 3.9148e-05 gnorm: 1.12 [ 8:11:17<16:21:50] +[titan] 2025-10-05 06:45:37,522 - root - INFO - step: 13345 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 06:45:37,522 - root - INFO - lr: 3.9141e-05 gnorm: 1.07 [ 8:11:28<16:21:38] +[titan] 2025-10-05 06:45:46,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:45:48,374 - root - INFO - step: 13350 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:45:48,374 - root - INFO - lr: 3.9133e-05 gnorm: 1.10 [ 8:11:38<16:21:27] +[titan] 2025-10-05 06:45:59,227 - root - INFO - step: 13355 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0173 +[titan] 2025-10-05 06:45:59,227 - root - INFO - lr: 3.9126e-05 gnorm: 1.11 [ 8:11:49<16:21:16] +[titan] 2025-10-05 06:46:10,100 - root - INFO - step: 13360 loss: 2.3111 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:46:10,100 - root - INFO - lr: 3.9118e-05 gnorm: 1.11 [ 8:12:00<16:21:04] +[titan] 2025-10-05 06:46:20,957 - root - INFO - step: 13365 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 06:46:20,957 - root - INFO - lr: 3.9110e-05 gnorm: 1.10 [ 8:12:11<16:20:53] +[titan] 2025-10-05 06:46:31,838 - root - INFO - step: 13370 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0254 +[titan] 2025-10-05 06:46:31,838 - root - INFO - lr: 3.9103e-05 gnorm: 1.13 [ 8:12:22<16:20:41] +[titan] 2025-10-05 06:46:42,735 - root - INFO - step: 13375 loss: 2.3437 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0696 +[titan] 2025-10-05 06:46:42,735 - root - INFO - lr: 3.9095e-05 gnorm: 1.12 [ 8:12:33<16:20:30] +[titan] 2025-10-05 06:46:53,595 - root - INFO - step: 13380 loss: 2.2952 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0281 +[titan] 2025-10-05 06:46:53,595 - root - INFO - lr: 3.9087e-05 gnorm: 1.07 [ 8:12:44<16:20:19] +[titan] 2025-10-05 06:47:04,484 - root - INFO - step: 13385 loss: 2.3167 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0450 +[titan] 2025-10-05 06:47:04,484 - root - INFO - lr: 3.9080e-05 gnorm: 1.12 [ 8:12:55<16:20:07] +[titan] 2025-10-05 06:47:15,385 - root - INFO - step: 13390 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:47:15,385 - root - INFO - lr: 3.9072e-05 gnorm: 1.13 [ 8:13:05<16:19:56] +[titan] 2025-10-05 06:47:26,291 - root - INFO - step: 13395 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:47:26,291 - root - INFO - lr: 3.9064e-05 gnorm: 1.09 [ 8:13:16<16:19:44] +[titan] 2025-10-05 06:47:34,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:47:37,159 - root - INFO - step: 13400 loss: 2.2934 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0255 +[titan] 2025-10-05 06:47:37,159 - root - INFO - lr: 3.9057e-05 gnorm: 1.10 [ 8:13:27<16:19:33] +[titan] 2025-10-05 06:47:48,051 - root - INFO - step: 13405 loss: 2.1829 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 06:47:48,051 - root - INFO - lr: 3.9049e-05 gnorm: 1.13 [ 8:13:38<16:19:22] +[titan] 2025-10-05 06:47:58,962 - root - INFO - step: 13410 loss: 2.3403 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0664 +[titan] 2025-10-05 06:47:58,962 - root - INFO - lr: 3.9041e-05 gnorm: 1.08 [ 8:13:49<16:19:10] +[titan] 2025-10-05 06:48:09,859 - root - INFO - step: 13415 loss: 2.2971 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:48:09,859 - root - INFO - lr: 3.9034e-05 gnorm: 1.09 [ 8:14:00<16:18:59] +[titan] 2025-10-05 06:48:20,742 - root - INFO - step: 13420 loss: 2.3033 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0348 +[titan] 2025-10-05 06:48:20,742 - root - INFO - lr: 3.9026e-05 gnorm: 1.09 [ 8:14:11<16:18:48] +[titan] 2025-10-05 06:48:31,616 - root - INFO - step: 13425 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0709 +[titan] 2025-10-05 06:48:31,616 - root - INFO - lr: 3.9018e-05 gnorm: 1.11 [ 8:14:22<16:18:36] +[titan] 2025-10-05 06:48:42,471 - root - INFO - step: 13430 loss: 2.2153 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 06:48:42,471 - root - INFO - lr: 3.9011e-05 gnorm: 1.09 [ 8:14:33<16:18:25] +[titan] 2025-10-05 06:48:53,334 - root - INFO - step: 13435 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 06:48:53,334 - root - INFO - lr: 3.9003e-05 gnorm: 1.10 [ 8:14:43<16:18:13] +[titan] 2025-10-05 06:49:04,235 - root - INFO - step: 13440 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 06:49:04,235 - root - INFO - lr: 3.8995e-05 gnorm: 1.10 [ 8:14:54<16:18:02] +[titan] 2025-10-05 06:49:15,122 - root - INFO - step: 13445 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0366 +[titan] 2025-10-05 06:49:15,122 - root - INFO - lr: 3.8988e-05 gnorm: 1.10 [ 8:15:05<16:17:51] +[titan] 2025-10-05 06:49:23,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:49:25,981 - root - INFO - step: 13450 loss: 2.2828 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0115 +[titan] 2025-10-05 06:49:25,981 - root - INFO - lr: 3.8980e-05 gnorm: 1.07 [ 8:15:16<16:17:39] +[titan] 2025-10-05 06:49:36,831 - root - INFO - step: 13455 loss: 2.2498 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9867 +[titan] 2025-10-05 06:49:36,831 - root - INFO - lr: 3.8972e-05 gnorm: 1.03 [ 8:15:27<16:17:28] +[titan] 2025-10-05 06:49:47,714 - root - INFO - step: 13460 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0682 +[titan] 2025-10-05 06:49:47,714 - root - INFO - lr: 3.8965e-05 gnorm: 1.14 [ 8:15:38<16:17:17] +[titan] 2025-10-05 06:49:58,585 - root - INFO - step: 13465 loss: 2.2324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 06:49:58,585 - root - INFO - lr: 3.8957e-05 gnorm: 1.11 [ 8:15:49<16:17:05] +[titan] 2025-10-05 06:50:09,688 - root - INFO - step: 13470 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 29,514 tflops: 409.46 mfu: 41.40% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9928 +[titan] 2025-10-05 06:50:09,688 - root - INFO - lr: 3.8949e-05 gnorm: 1.07 [ 8:16:00<16:16:54] +[titan] 2025-10-05 06:50:20,551 - root - INFO - step: 13475 loss: 2.2930 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0250 +[titan] 2025-10-05 06:50:20,551 - root - INFO - lr: 3.8942e-05 gnorm: 1.12 [ 8:16:11<16:16:43] +[titan] 2025-10-05 06:50:31,416 - root - INFO - step: 13480 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:50:31,416 - root - INFO - lr: 3.8934e-05 gnorm: 1.09 [ 8:16:21<16:16:31] +[titan] 2025-10-05 06:50:42,269 - root - INFO - step: 13485 loss: 2.2218 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9617 +[titan] 2025-10-05 06:50:42,269 - root - INFO - lr: 3.8926e-05 gnorm: 1.10 [ 8:16:32<16:16:20] +[titan] 2025-10-05 06:50:53,127 - root - INFO - step: 13490 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 06:50:53,128 - root - INFO - lr: 3.8919e-05 gnorm: 1.07 [ 8:16:43<16:16:09] +[titan] 2025-10-05 06:51:03,982 - root - INFO - step: 13495 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:51:03,982 - root - INFO - lr: 3.8911e-05 gnorm: 1.09 [ 8:16:54<16:15:57] +[titan] 2025-10-05 06:51:12,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:51:14,857 - root - INFO - step: 13500 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 06:51:14,857 - root - INFO - lr: 3.8903e-05 gnorm: 1.09 [ 8:17:05<16:15:46] +[titan] 2025-10-05 06:51:25,746 - root - INFO - step: 13505 loss: 2.2715 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 06:51:25,746 - root - INFO - lr: 3.8896e-05 gnorm: 1.09 [ 8:17:16<16:15:34] +[titan] 2025-10-05 06:51:36,614 - root - INFO - step: 13510 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 06:51:36,614 - root - INFO - lr: 3.8888e-05 gnorm: 1.08 [ 8:17:27<16:15:23] +[titan] 2025-10-05 06:51:47,494 - root - INFO - step: 13515 loss: 2.2519 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 06:51:47,494 - root - INFO - lr: 3.8880e-05 gnorm: 1.12 [ 8:17:38<16:15:12] +[titan] 2025-10-05 06:51:58,360 - root - INFO - step: 13520 loss: 2.2323 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:51:58,360 - root - INFO - lr: 3.8872e-05 gnorm: 1.05 [ 8:17:48<16:15:00] +[titan] 2025-10-05 06:52:09,236 - root - INFO - step: 13525 loss: 2.2346 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 06:52:09,236 - root - INFO - lr: 3.8865e-05 gnorm: 1.07 [ 8:17:59<16:14:49] +[titan] 2025-10-05 06:52:20,103 - root - INFO - step: 13530 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9669 +[titan] 2025-10-05 06:52:20,103 - root - INFO - lr: 3.8857e-05 gnorm: 1.08 [ 8:18:10<16:14:37] +[titan] 2025-10-05 06:52:30,992 - root - INFO - step: 13535 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9949 +[titan] 2025-10-05 06:52:30,992 - root - INFO - lr: 3.8849e-05 gnorm: 1.10 [ 8:18:21<16:14:26] +[titan] 2025-10-05 06:52:41,845 - root - INFO - step: 13540 loss: 2.2743 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0096 +[titan] 2025-10-05 06:52:41,846 - root - INFO - lr: 3.8842e-05 gnorm: 1.16 [ 8:18:32<16:14:15] +[titan] 2025-10-05 06:52:52,731 - root - INFO - step: 13545 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:52:52,731 - root - INFO - lr: 3.8834e-05 gnorm: 1.19 [ 8:18:43<16:14:03] +[titan] 2025-10-05 06:53:01,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:53:03,584 - root - INFO - step: 13550 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:53:03,584 - root - INFO - lr: 3.8826e-05 gnorm: 1.12 [ 8:18:54<16:13:52] +[titan] 2025-10-05 06:53:14,560 - root - INFO - step: 13555 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.18 mfu: 41.88% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:53:14,560 - root - INFO - lr: 3.8818e-05 gnorm: 1.18 [ 8:19:05<16:13:41] +[titan] 2025-10-05 06:53:25,426 - root - INFO - step: 13560 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:53:25,426 - root - INFO - lr: 3.8811e-05 gnorm: 1.10 [ 8:19:15<16:13:29] +[titan] 2025-10-05 06:53:36,319 - root - INFO - step: 13565 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0242 +[titan] 2025-10-05 06:53:36,320 - root - INFO - lr: 3.8803e-05 gnorm: 1.11 [ 8:19:26<16:13:18] +[titan] 2025-10-05 06:53:47,222 - root - INFO - step: 13570 loss: 2.2893 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:53:47,222 - root - INFO - lr: 3.8795e-05 gnorm: 1.11 [ 8:19:37<16:13:07] +[titan] 2025-10-05 06:53:58,096 - root - INFO - step: 13575 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9963 +[titan] 2025-10-05 06:53:58,096 - root - INFO - lr: 3.8788e-05 gnorm: 1.11 [ 8:19:48<16:12:55] +[titan] 2025-10-05 06:54:08,974 - root - INFO - step: 13580 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:54:08,975 - root - INFO - lr: 3.8780e-05 gnorm: 1.11 [ 8:19:59<16:12:44] +[titan] 2025-10-05 06:54:19,877 - root - INFO - step: 13585 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0437 +[titan] 2025-10-05 06:54:19,877 - root - INFO - lr: 3.8772e-05 gnorm: 1.15 [ 8:20:10<16:12:33] +[titan] 2025-10-05 06:54:30,750 - root - INFO - step: 13590 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0181 +[titan] 2025-10-05 06:54:30,750 - root - INFO - lr: 3.8764e-05 gnorm: 1.09 [ 8:20:21<16:12:21] +[titan] 2025-10-05 06:54:41,615 - root - INFO - step: 13595 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0386 +[titan] 2025-10-05 06:54:41,615 - root - INFO - lr: 3.8757e-05 gnorm: 1.12 [ 8:20:32<16:12:10] +[titan] 2025-10-05 06:54:50,323 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:54:52,501 - root - INFO - step: 13600 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:54:52,501 - root - INFO - lr: 3.8749e-05 gnorm: 1.12 [ 8:20:43<16:11:58] +[titan] 2025-10-05 06:55:03,350 - root - INFO - step: 13605 loss: 2.2279 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 06:55:03,350 - root - INFO - lr: 3.8741e-05 gnorm: 1.09 [ 8:20:53<16:11:47] +[titan] 2025-10-05 06:55:14,228 - root - INFO - step: 13610 loss: 2.3259 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0547 +[titan] 2025-10-05 06:55:14,228 - root - INFO - lr: 3.8734e-05 gnorm: 1.14 [ 8:21:04<16:11:36] +[titan] 2025-10-05 06:55:25,123 - root - INFO - step: 13615 loss: 2.2661 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0007 +[titan] 2025-10-05 06:55:25,123 - root - INFO - lr: 3.8726e-05 gnorm: 1.11 [ 8:21:15<16:11:24] +[titan] 2025-10-05 06:55:35,976 - root - INFO - step: 13620 loss: 2.3686 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0920 +[titan] 2025-10-05 06:55:35,976 - root - INFO - lr: 3.8718e-05 gnorm: 1.15 [ 8:21:26<16:11:13] +[titan] 2025-10-05 06:55:46,835 - root - INFO - step: 13625 loss: 2.2851 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0187 +[titan] 2025-10-05 06:55:46,835 - root - INFO - lr: 3.8710e-05 gnorm: 1.07 [ 8:21:37<16:11:01] +[titan] 2025-10-05 06:55:57,740 - root - INFO - step: 13630 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0176 +[titan] 2025-10-05 06:55:57,740 - root - INFO - lr: 3.8703e-05 gnorm: 1.08 [ 8:21:48<16:10:50] +[titan] 2025-10-05 06:56:08,602 - root - INFO - step: 13635 loss: 2.3123 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 06:56:08,602 - root - INFO - lr: 3.8695e-05 gnorm: 1.12 [ 8:21:59<16:10:39] +[titan] 2025-10-05 06:56:19,485 - root - INFO - step: 13640 loss: 2.2360 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 06:56:19,486 - root - INFO - lr: 3.8687e-05 gnorm: 1.08 [ 8:22:10<16:10:27] +[titan] 2025-10-05 06:56:30,339 - root - INFO - step: 13645 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0101 +[titan] 2025-10-05 06:56:30,339 - root - INFO - lr: 3.8679e-05 gnorm: 1.20 [ 8:22:20<16:10:16] +[titan] 2025-10-05 06:56:39,024 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:56:41,218 - root - INFO - step: 13650 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0411 +[titan] 2025-10-05 06:56:41,218 - root - INFO - lr: 3.8672e-05 gnorm: 1.10 [ 8:22:31<16:10:05] +[titan] 2025-10-05 06:56:52,067 - root - INFO - step: 13655 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 06:56:52,068 - root - INFO - lr: 3.8664e-05 gnorm: 1.09 [ 8:22:42<16:09:53] +[titan] 2025-10-05 06:57:02,942 - root - INFO - step: 13660 loss: 2.3364 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0632 +[titan] 2025-10-05 06:57:02,942 - root - INFO - lr: 3.8656e-05 gnorm: 1.13 [ 8:22:53<16:09:42] +[titan] 2025-10-05 06:57:13,852 - root - INFO - step: 13665 loss: 2.2401 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 06:57:13,852 - root - INFO - lr: 3.8648e-05 gnorm: 1.09 [ 8:23:04<16:09:30] +[titan] 2025-10-05 06:57:24,731 - root - INFO - step: 13670 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9808 +[titan] 2025-10-05 06:57:24,731 - root - INFO - lr: 3.8641e-05 gnorm: 1.12 [ 8:23:15<16:09:19] +[titan] 2025-10-05 06:57:35,601 - root - INFO - step: 13675 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 06:57:35,601 - root - INFO - lr: 3.8633e-05 gnorm: 1.12 [ 8:23:26<16:09:08] +[titan] 2025-10-05 06:57:46,492 - root - INFO - step: 13680 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9915 +[titan] 2025-10-05 06:57:46,493 - root - INFO - lr: 3.8625e-05 gnorm: 1.09 [ 8:23:37<16:08:56] +[titan] 2025-10-05 06:57:57,361 - root - INFO - step: 13685 loss: 2.2907 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:57:57,361 - root - INFO - lr: 3.8617e-05 gnorm: 1.05 [ 8:23:47<16:08:45] +[titan] 2025-10-05 06:58:08,244 - root - INFO - step: 13690 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 06:58:08,244 - root - INFO - lr: 3.8610e-05 gnorm: 1.12 [ 8:23:58<16:08:34] +[titan] 2025-10-05 06:58:19,163 - root - INFO - step: 13695 loss: 2.2749 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0089 +[titan] 2025-10-05 06:58:19,164 - root - INFO - lr: 3.8602e-05 gnorm: 1.09 [ 8:24:09<16:08:22] +[titan] 2025-10-05 06:58:27,860 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:58:30,056 - root - INFO - step: 13700 loss: 2.3146 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 06:58:30,056 - root - INFO - lr: 3.8594e-05 gnorm: 1.10 [ 8:24:20<16:08:11] +[titan] 2025-10-05 06:58:40,938 - root - INFO - step: 13705 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 06:58:40,938 - root - INFO - lr: 3.8586e-05 gnorm: 1.07 [ 8:24:31<16:08:00] +[titan] 2025-10-05 06:58:51,816 - root - INFO - step: 13710 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 06:58:51,816 - root - INFO - lr: 3.8578e-05 gnorm: 1.10 [ 8:24:42<16:07:48] +[titan] 2025-10-05 06:59:02,700 - root - INFO - step: 13715 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 06:59:02,700 - root - INFO - lr: 3.8571e-05 gnorm: 1.12 [ 8:24:53<16:07:37] +[titan] 2025-10-05 06:59:13,554 - root - INFO - step: 13720 loss: 2.3118 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:59:13,554 - root - INFO - lr: 3.8563e-05 gnorm: 1.14 [ 8:25:04<16:07:26] +[titan] 2025-10-05 06:59:24,420 - root - INFO - step: 13725 loss: 2.2285 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9687 +[titan] 2025-10-05 06:59:24,420 - root - INFO - lr: 3.8555e-05 gnorm: 1.11 [ 8:25:14<16:07:14] +[titan] 2025-10-05 06:59:35,307 - root - INFO - step: 13730 loss: 2.2243 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 06:59:35,307 - root - INFO - lr: 3.8547e-05 gnorm: 1.10 [ 8:25:25<16:07:03] +[titan] 2025-10-05 06:59:46,179 - root - INFO - step: 13735 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 06:59:46,179 - root - INFO - lr: 3.8540e-05 gnorm: 1.08 [ 8:25:36<16:06:51] +[titan] 2025-10-05 06:59:57,061 - root - INFO - step: 13740 loss: 2.2450 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9828 +[titan] 2025-10-05 06:59:57,061 - root - INFO - lr: 3.8532e-05 gnorm: 1.15 [ 8:25:47<16:06:40] +[titan] 2025-10-05 07:00:07,935 - root - INFO - step: 13745 loss: 2.3278 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:00:07,935 - root - INFO - lr: 3.8524e-05 gnorm: 1.10 [ 8:25:58<16:06:29] +[titan] 2025-10-05 07:00:16,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:00:18,832 - root - INFO - step: 13750 loss: 2.3084 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 07:00:18,833 - root - INFO - lr: 3.8516e-05 gnorm: 1.10 [ 8:26:09<16:06:17] +[titan] 2025-10-05 07:00:29,706 - root - INFO - step: 13755 loss: 2.3204 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0493 +[titan] 2025-10-05 07:00:29,706 - root - INFO - lr: 3.8509e-05 gnorm: 1.11 [ 8:26:20<16:06:06] +[titan] 2025-10-05 07:00:40,608 - root - INFO - step: 13760 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0305 +[titan] 2025-10-05 07:00:40,608 - root - INFO - lr: 3.8501e-05 gnorm: 1.15 [ 8:26:31<16:05:55] +[titan] 2025-10-05 07:00:51,487 - root - INFO - step: 13765 loss: 2.2771 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 07:00:51,487 - root - INFO - lr: 3.8493e-05 gnorm: 1.08 [ 8:26:42<16:05:43] +[titan] 2025-10-05 07:01:02,367 - root - INFO - step: 13770 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0107 +[titan] 2025-10-05 07:01:02,367 - root - INFO - lr: 3.8485e-05 gnorm: 1.52 [ 8:26:52<16:05:32] +[titan] 2025-10-05 07:01:13,257 - root - INFO - step: 13775 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0460 +[titan] 2025-10-05 07:01:13,257 - root - INFO - lr: 3.8477e-05 gnorm: 1.11 [ 8:27:03<16:05:21] +[titan] 2025-10-05 07:01:24,150 - root - INFO - step: 13780 loss: 2.3133 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 07:01:24,150 - root - INFO - lr: 3.8470e-05 gnorm: 1.05 [ 8:27:14<16:05:09] +[titan] 2025-10-05 07:01:35,054 - root - INFO - step: 13785 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9600 +[titan] 2025-10-05 07:01:35,054 - root - INFO - lr: 3.8462e-05 gnorm: 1.10 [ 8:27:25<16:04:58] +[titan] 2025-10-05 07:01:45,974 - root - INFO - step: 13790 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0014 +[titan] 2025-10-05 07:01:45,974 - root - INFO - lr: 3.8454e-05 gnorm: 1.09 [ 8:27:36<16:04:47] +[titan] 2025-10-05 07:01:56,865 - root - INFO - step: 13795 loss: 2.2879 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:01:56,865 - root - INFO - lr: 3.8446e-05 gnorm: 1.08 [ 8:27:47<16:04:35] +[titan] 2025-10-05 07:02:05,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:02:07,773 - root - INFO - step: 13800 loss: 2.2846 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0177 +[titan] 2025-10-05 07:02:07,773 - root - INFO - lr: 3.8438e-05 gnorm: 1.09 [ 8:27:58<16:04:24] +[titan] 2025-10-05 07:02:18,700 - root - INFO - step: 13805 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 07:02:18,700 - root - INFO - lr: 3.8431e-05 gnorm: 1.09 [ 8:28:09<16:04:13] +[titan] 2025-10-05 07:02:29,593 - root - INFO - step: 13810 loss: 2.2868 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 07:02:29,593 - root - INFO - lr: 3.8423e-05 gnorm: 1.08 [ 8:28:20<16:04:02] +[titan] 2025-10-05 07:02:40,489 - root - INFO - step: 13815 loss: 2.3125 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 07:02:40,489 - root - INFO - lr: 3.8415e-05 gnorm: 1.08 [ 8:28:31<16:03:50] +[titan] 2025-10-05 07:02:51,396 - root - INFO - step: 13820 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1047 +[titan] 2025-10-05 07:02:51,396 - root - INFO - lr: 3.8407e-05 gnorm: 1.13 [ 8:28:41<16:03:39] +[titan] 2025-10-05 07:03:00,397 - root - INFO - Dumping profiler traces at step 13824 +[titan] 2025-10-05 07:03:00,435 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:03:02,625 - root - INFO - step: 13825 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 29,181 tflops: 404.84 mfu: 40.93% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:03:02,626 - root - INFO - lr: 3.8399e-05 gnorm: 1.09 [ 8:28:53<16:03:28] +[titan] 2025-10-05 07:03:13,525 - root - INFO - step: 13830 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0510 +[titan] 2025-10-05 07:03:13,526 - root - INFO - lr: 3.8392e-05 gnorm: 1.08 [ 8:29:04<16:03:17] +[titan] 2025-10-05 07:03:24,465 - root - INFO - step: 13835 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:03:24,466 - root - INFO - lr: 3.8384e-05 gnorm: 1.07 [ 8:29:15<16:03:06] +[titan] 2025-10-05 07:03:35,347 - root - INFO - step: 13840 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:03:35,348 - root - INFO - lr: 3.8376e-05 gnorm: 1.09 [ 8:29:25<16:02:54] +[titan] 2025-10-05 07:03:46,225 - root - INFO - step: 13845 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 07:03:46,225 - root - INFO - lr: 3.8368e-05 gnorm: 1.11 [ 8:29:36<16:02:43] +[titan] 2025-10-05 07:03:54,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:03:57,111 - root - INFO - step: 13850 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0113 +[titan] 2025-10-05 07:03:57,111 - root - INFO - lr: 3.8360e-05 gnorm: 1.11 [ 8:29:47<16:02:32] +[titan] 2025-10-05 07:04:08,025 - root - INFO - step: 13855 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0769 +[titan] 2025-10-05 07:04:08,025 - root - INFO - lr: 3.8353e-05 gnorm: 1.11 [ 8:29:58<16:02:20] +[titan] 2025-10-05 07:04:18,937 - root - INFO - step: 13860 loss: 2.2484 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9860 +[titan] 2025-10-05 07:04:18,937 - root - INFO - lr: 3.8345e-05 gnorm: 1.13 [ 8:30:09<16:02:09] +[titan] 2025-10-05 07:04:29,819 - root - INFO - step: 13865 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9412 +[titan] 2025-10-05 07:04:29,820 - root - INFO - lr: 3.8337e-05 gnorm: 1.13 [ 8:30:20<16:01:58] +[titan] 2025-10-05 07:04:40,706 - root - INFO - step: 13870 loss: 2.1522 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 07:04:40,706 - root - INFO - lr: 3.8329e-05 gnorm: 1.10 [ 8:30:31<16:01:46] +[titan] 2025-10-05 07:04:51,600 - root - INFO - step: 13875 loss: 2.2926 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:04:51,601 - root - INFO - lr: 3.8321e-05 gnorm: 1.13 [ 8:30:42<16:01:35] +[titan] 2025-10-05 07:05:02,483 - root - INFO - step: 13880 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 07:05:02,484 - root - INFO - lr: 3.8313e-05 gnorm: 1.05 [ 8:30:53<16:01:24] +[titan] 2025-10-05 07:05:13,375 - root - INFO - step: 13885 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 07:05:13,375 - root - INFO - lr: 3.8306e-05 gnorm: 1.09 [ 8:31:03<16:01:12] +[titan] 2025-10-05 07:05:24,346 - root - INFO - step: 13890 loss: 2.3386 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 07:05:24,347 - root - INFO - lr: 3.8298e-05 gnorm: 1.09 [ 8:31:14<16:01:01] +[titan] 2025-10-05 07:05:35,221 - root - INFO - step: 13895 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 07:05:35,221 - root - INFO - lr: 3.8290e-05 gnorm: 1.10 [ 8:31:25<16:00:50] +[titan] 2025-10-05 07:05:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:05:46,101 - root - INFO - step: 13900 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 07:05:46,101 - root - INFO - lr: 3.8282e-05 gnorm: 1.11 [ 8:31:36<16:00:39] +[titan] 2025-10-05 07:05:56,991 - root - INFO - step: 13905 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 07:05:56,992 - root - INFO - lr: 3.8274e-05 gnorm: 1.09 [ 8:31:47<16:00:27] +[titan] 2025-10-05 07:06:07,860 - root - INFO - step: 13910 loss: 2.2822 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0150 +[titan] 2025-10-05 07:06:07,861 - root - INFO - lr: 3.8266e-05 gnorm: 1.06 [ 8:31:58<16:00:16] +[titan] 2025-10-05 07:06:18,755 - root - INFO - step: 13915 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0626 +[titan] 2025-10-05 07:06:18,755 - root - INFO - lr: 3.8259e-05 gnorm: 1.12 [ 8:32:09<16:00:04] +[titan] 2025-10-05 07:06:29,694 - root - INFO - step: 13920 loss: 2.3240 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0527 +[titan] 2025-10-05 07:06:29,694 - root - INFO - lr: 3.8251e-05 gnorm: 1.13 [ 8:32:20<15:59:53] +[titan] 2025-10-05 07:06:40,578 - root - INFO - step: 13925 loss: 2.2091 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9511 +[titan] 2025-10-05 07:06:40,578 - root - INFO - lr: 3.8243e-05 gnorm: 1.13 [ 8:32:31<15:59:42] +[titan] 2025-10-05 07:06:51,433 - root - INFO - step: 13930 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:06:51,434 - root - INFO - lr: 3.8235e-05 gnorm: 1.12 [ 8:32:41<15:59:31] +[titan] 2025-10-05 07:07:02,325 - root - INFO - step: 13935 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0277 +[titan] 2025-10-05 07:07:02,326 - root - INFO - lr: 3.8227e-05 gnorm: 1.12 [ 8:32:52<15:59:19] +[titan] 2025-10-05 07:07:13,223 - root - INFO - step: 13940 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 07:07:13,223 - root - INFO - lr: 3.8219e-05 gnorm: 1.05 [ 8:33:03<15:59:08] +[titan] 2025-10-05 07:07:24,144 - root - INFO - step: 13945 loss: 2.2627 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 07:07:24,144 - root - INFO - lr: 3.8212e-05 gnorm: 1.07 [ 8:33:14<15:58:57] +[titan] 2025-10-05 07:07:32,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:07:35,060 - root - INFO - step: 13950 loss: 2.3247 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0535 +[titan] 2025-10-05 07:07:35,061 - root - INFO - lr: 3.8204e-05 gnorm: 1.15 [ 8:33:25<15:58:45] +[titan] 2025-10-05 07:07:45,949 - root - INFO - step: 13955 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 1.9994 +[titan] 2025-10-05 07:07:45,949 - root - INFO - lr: 3.8196e-05 gnorm: 1.12 [ 8:33:36<15:58:34] +[titan] 2025-10-05 07:07:56,827 - root - INFO - step: 13960 loss: 2.2073 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 07:07:56,827 - root - INFO - lr: 3.8188e-05 gnorm: 1.13 [ 8:33:47<15:58:23] +[titan] 2025-10-05 07:08:07,719 - root - INFO - step: 13965 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 07:08:07,720 - root - INFO - lr: 3.8180e-05 gnorm: 1.05 [ 8:33:58<15:58:11] +[titan] 2025-10-05 07:08:18,609 - root - INFO - step: 13970 loss: 2.3210 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0494 +[titan] 2025-10-05 07:08:18,609 - root - INFO - lr: 3.8172e-05 gnorm: 1.11 [ 8:34:09<15:58:00] +[titan] 2025-10-05 07:08:29,526 - root - INFO - step: 13975 loss: 2.3414 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0678 +[titan] 2025-10-05 07:08:29,526 - root - INFO - lr: 3.8164e-05 gnorm: 1.06 [ 8:34:20<15:57:49] +[titan] 2025-10-05 07:08:40,409 - root - INFO - step: 13980 loss: 2.2904 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0234 +[titan] 2025-10-05 07:08:40,409 - root - INFO - lr: 3.8157e-05 gnorm: 1.10 [ 8:34:30<15:57:37] +[titan] 2025-10-05 07:08:51,305 - root - INFO - step: 13985 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9806 +[titan] 2025-10-05 07:08:51,305 - root - INFO - lr: 3.8149e-05 gnorm: 1.08 [ 8:34:41<15:57:26] +[titan] 2025-10-05 07:09:02,176 - root - INFO - step: 13990 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:09:02,177 - root - INFO - lr: 3.8141e-05 gnorm: 1.06 [ 8:34:52<15:57:15] +[titan] 2025-10-05 07:09:13,061 - root - INFO - step: 13995 loss: 2.2816 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0139 +[titan] 2025-10-05 07:09:13,062 - root - INFO - lr: 3.8133e-05 gnorm: 1.14 [ 8:35:03<15:57:03] +[titan] 2025-10-05 07:09:21,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:09:23,969 - root - INFO - step: 14000 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 07:09:23,969 - root - INFO - lr: 3.8125e-05 gnorm: 1.09 [ 8:35:14<15:56:52] +[titan] 2025-10-05 07:09:34,866 - root - INFO - step: 14005 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0102 +[titan] 2025-10-05 07:09:34,866 - root - INFO - lr: 3.8117e-05 gnorm: 1.06 [ 8:35:25<15:56:41] +[titan] 2025-10-05 07:09:45,752 - root - INFO - step: 14010 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0239 +[titan] 2025-10-05 07:09:45,752 - root - INFO - lr: 3.8109e-05 gnorm: 1.14 [ 8:35:36<15:56:29] +[titan] 2025-10-05 07:09:56,681 - root - INFO - step: 14015 loss: 2.2388 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9782 +[titan] 2025-10-05 07:09:56,681 - root - INFO - lr: 3.8101e-05 gnorm: 1.10 [ 8:35:47<15:56:18] +[titan] 2025-10-05 07:10:07,561 - root - INFO - step: 14020 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0100 +[titan] 2025-10-05 07:10:07,561 - root - INFO - lr: 3.8094e-05 gnorm: 1.10 [ 8:35:58<15:56:07] +[titan] 2025-10-05 07:10:18,446 - root - INFO - step: 14025 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0031 +[titan] 2025-10-05 07:10:18,446 - root - INFO - lr: 3.8086e-05 gnorm: 1.06 [ 8:36:08<15:55:56] +[titan] 2025-10-05 07:10:29,418 - root - INFO - step: 14030 loss: 2.3296 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 07:10:29,419 - root - INFO - lr: 3.8078e-05 gnorm: 1.11 [ 8:36:19<15:55:44] +[titan] 2025-10-05 07:10:40,286 - root - INFO - step: 14035 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0286 +[titan] 2025-10-05 07:10:40,286 - root - INFO - lr: 3.8070e-05 gnorm: 1.08 [ 8:36:30<15:55:33] +[titan] 2025-10-05 07:10:51,186 - root - INFO - step: 14040 loss: 2.3219 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 07:10:51,186 - root - INFO - lr: 3.8062e-05 gnorm: 1.08 [ 8:36:41<15:55:22] +[titan] 2025-10-05 07:11:02,100 - root - INFO - step: 14045 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:11:02,100 - root - INFO - lr: 3.8054e-05 gnorm: 1.03 [ 8:36:52<15:55:10] +[titan] 2025-10-05 07:11:10,810 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:11:13,002 - root - INFO - step: 14050 loss: 2.2598 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:13,002 - root - INFO - lr: 3.8046e-05 gnorm: 1.08 [ 8:37:03<15:54:59] +[titan] 2025-10-05 07:11:23,889 - root - INFO - step: 14055 loss: 2.2829 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0164 +[titan] 2025-10-05 07:11:23,889 - root - INFO - lr: 3.8038e-05 gnorm: 1.06 [ 8:37:14<15:54:48] +[titan] 2025-10-05 07:11:34,797 - root - INFO - step: 14060 loss: 2.2612 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:34,797 - root - INFO - lr: 3.8031e-05 gnorm: 1.08 [ 8:37:25<15:54:37] +[titan] 2025-10-05 07:11:45,686 - root - INFO - step: 14065 loss: 2.2504 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:11:45,686 - root - INFO - lr: 3.8023e-05 gnorm: 1.10 [ 8:37:36<15:54:25] +[titan] 2025-10-05 07:11:56,588 - root - INFO - step: 14070 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 07:11:56,588 - root - INFO - lr: 3.8015e-05 gnorm: 1.10 [ 8:37:47<15:54:14] +[titan] 2025-10-05 07:12:07,484 - root - INFO - step: 14075 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:12:07,484 - root - INFO - lr: 3.8007e-05 gnorm: 1.05 [ 8:37:58<15:54:03] +[titan] 2025-10-05 07:12:18,379 - root - INFO - step: 14080 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 07:12:18,379 - root - INFO - lr: 3.7999e-05 gnorm: 1.09 [ 8:38:08<15:53:51] +[titan] 2025-10-05 07:12:29,280 - root - INFO - step: 14085 loss: 2.2541 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 07:12:29,281 - root - INFO - lr: 3.7991e-05 gnorm: 1.11 [ 8:38:19<15:53:40] +[titan] 2025-10-05 07:12:40,158 - root - INFO - step: 14090 loss: 2.2892 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0223 +[titan] 2025-10-05 07:12:40,159 - root - INFO - lr: 3.7983e-05 gnorm: 1.07 [ 8:38:30<15:53:29] +[titan] 2025-10-05 07:12:51,038 - root - INFO - step: 14095 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0060 +[titan] 2025-10-05 07:12:51,038 - root - INFO - lr: 3.7975e-05 gnorm: 1.08 [ 8:38:41<15:53:17] +[titan] 2025-10-05 07:12:59,737 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:13:01,923 - root - INFO - step: 14100 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 07:13:01,924 - root - INFO - lr: 3.7967e-05 gnorm: 1.10 [ 8:38:52<15:53:06] +[titan] 2025-10-05 07:13:12,819 - root - INFO - step: 14105 loss: 2.2680 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0026 +[titan] 2025-10-05 07:13:12,819 - root - INFO - lr: 3.7959e-05 gnorm: 1.10 [ 8:39:03<15:52:55] +[titan] 2025-10-05 07:13:23,712 - root - INFO - step: 14110 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:13:23,712 - root - INFO - lr: 3.7952e-05 gnorm: 1.06 [ 8:39:14<15:52:43] +[titan] 2025-10-05 07:13:34,613 - root - INFO - step: 14115 loss: 2.3226 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0504 +[titan] 2025-10-05 07:13:34,613 - root - INFO - lr: 3.7944e-05 gnorm: 1.17 [ 8:39:25<15:52:32] +[titan] 2025-10-05 07:13:45,510 - root - INFO - step: 14120 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 07:13:45,510 - root - INFO - lr: 3.7936e-05 gnorm: 1.12 [ 8:39:36<15:52:21] +[titan] 2025-10-05 07:13:56,397 - root - INFO - step: 14125 loss: 2.2697 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0052 +[titan] 2025-10-05 07:13:56,397 - root - INFO - lr: 3.7928e-05 gnorm: 1.11 [ 8:39:46<15:52:10] +[titan] 2025-10-05 07:14:07,282 - root - INFO - step: 14130 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 07:14:07,282 - root - INFO - lr: 3.7920e-05 gnorm: 1.09 [ 8:39:57<15:51:58] +[titan] 2025-10-05 07:14:18,161 - root - INFO - step: 14135 loss: 2.2782 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 07:14:18,162 - root - INFO - lr: 3.7912e-05 gnorm: 1.13 [ 8:40:08<15:51:47] +[titan] 2025-10-05 07:14:29,064 - root - INFO - step: 14140 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 07:14:29,064 - root - INFO - lr: 3.7904e-05 gnorm: 1.12 [ 8:40:19<15:51:36] +[titan] 2025-10-05 07:14:39,953 - root - INFO - step: 14145 loss: 2.2613 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9977 +[titan] 2025-10-05 07:14:39,953 - root - INFO - lr: 3.7896e-05 gnorm: 1.07 [ 8:40:30<15:51:24] +[titan] 2025-10-05 07:14:48,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:14:50,851 - root - INFO - step: 14150 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 07:14:50,851 - root - INFO - lr: 3.7888e-05 gnorm: 1.09 [ 8:40:41<15:51:13] +[titan] 2025-10-05 07:15:01,722 - root - INFO - step: 14155 loss: 2.3499 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0768 +[titan] 2025-10-05 07:15:01,723 - root - INFO - lr: 3.7880e-05 gnorm: 1.07 [ 8:40:52<15:51:02] +[titan] 2025-10-05 07:15:12,596 - root - INFO - step: 14160 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 07:15:12,597 - root - INFO - lr: 3.7872e-05 gnorm: 1.07 [ 8:41:03<15:50:50] +[titan] 2025-10-05 07:15:23,478 - root - INFO - step: 14165 loss: 2.2806 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 07:15:23,478 - root - INFO - lr: 3.7865e-05 gnorm: 1.09 [ 8:41:13<15:50:39] +[titan] 2025-10-05 07:15:34,374 - root - INFO - step: 14170 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:15:34,374 - root - INFO - lr: 3.7857e-05 gnorm: 1.08 [ 8:41:24<15:50:28] +[titan] 2025-10-05 07:15:45,286 - root - INFO - step: 14175 loss: 2.2571 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9933 +[titan] 2025-10-05 07:15:45,287 - root - INFO - lr: 3.7849e-05 gnorm: 1.11 [ 8:41:35<15:50:16] +[titan] 2025-10-05 07:15:56,187 - root - INFO - step: 14180 loss: 2.3045 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 07:15:56,187 - root - INFO - lr: 3.7841e-05 gnorm: 1.13 [ 8:41:46<15:50:05] +[titan] 2025-10-05 07:16:07,077 - root - INFO - step: 14185 loss: 2.2313 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9715 +[titan] 2025-10-05 07:16:07,077 - root - INFO - lr: 3.7833e-05 gnorm: 1.08 [ 8:41:57<15:49:54] +[titan] 2025-10-05 07:16:17,954 - root - INFO - step: 14190 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9824 +[titan] 2025-10-05 07:16:17,954 - root - INFO - lr: 3.7825e-05 gnorm: 1.05 [ 8:42:08<15:49:42] +[titan] 2025-10-05 07:16:28,838 - root - INFO - step: 14195 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 07:16:28,839 - root - INFO - lr: 3.7817e-05 gnorm: 1.10 [ 8:42:19<15:49:31] +[titan] 2025-10-05 07:16:37,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:16:39,702 - root - INFO - step: 14200 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9987 +[titan] 2025-10-05 07:16:39,703 - root - INFO - lr: 3.7809e-05 gnorm: 1.10 [ 8:42:30<15:49:20] +[titan] 2025-10-05 07:16:50,596 - root - INFO - step: 14205 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 07:16:50,596 - root - INFO - lr: 3.7801e-05 gnorm: 1.05 [ 8:42:41<15:49:08] +[titan] 2025-10-05 07:17:01,477 - root - INFO - step: 14210 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:17:01,477 - root - INFO - lr: 3.7793e-05 gnorm: 1.09 [ 8:42:51<15:48:57] +[titan] 2025-10-05 07:17:12,357 - root - INFO - step: 14215 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 07:17:12,357 - root - INFO - lr: 3.7785e-05 gnorm: 1.09 [ 8:43:02<15:48:46] +[titan] 2025-10-05 07:17:23,224 - root - INFO - step: 14220 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0228 +[titan] 2025-10-05 07:17:23,224 - root - INFO - lr: 3.7777e-05 gnorm: 1.12 [ 8:43:13<15:48:34] +[titan] 2025-10-05 07:17:34,101 - root - INFO - step: 14225 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9842 +[titan] 2025-10-05 07:17:34,101 - root - INFO - lr: 3.7769e-05 gnorm: 1.11 [ 8:43:24<15:48:23] +[titan] 2025-10-05 07:17:44,966 - root - INFO - step: 14230 loss: 2.2228 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9631 +[titan] 2025-10-05 07:17:44,966 - root - INFO - lr: 3.7761e-05 gnorm: 1.06 [ 8:43:35<15:48:12] +[titan] 2025-10-05 07:17:55,865 - root - INFO - step: 14235 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:17:55,865 - root - INFO - lr: 3.7753e-05 gnorm: 1.15 [ 8:43:46<15:48:00] +[titan] 2025-10-05 07:18:06,742 - root - INFO - step: 14240 loss: 2.2274 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9683 +[titan] 2025-10-05 07:18:06,742 - root - INFO - lr: 3.7746e-05 gnorm: 1.07 [ 8:43:57<15:47:49] +[titan] 2025-10-05 07:18:17,634 - root - INFO - step: 14245 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0317 +[titan] 2025-10-05 07:18:17,634 - root - INFO - lr: 3.7738e-05 gnorm: 1.09 [ 8:44:08<15:47:38] +[titan] 2025-10-05 07:18:26,321 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:18:28,505 - root - INFO - step: 14250 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:18:28,505 - root - INFO - lr: 3.7730e-05 gnorm: 1.10 [ 8:44:19<15:47:26] +[titan] 2025-10-05 07:18:39,411 - root - INFO - step: 14255 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 07:18:39,412 - root - INFO - lr: 3.7722e-05 gnorm: 1.06 [ 8:44:29<15:47:15] +[titan] 2025-10-05 07:18:50,297 - root - INFO - step: 14260 loss: 2.3010 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0327 +[titan] 2025-10-05 07:18:50,297 - root - INFO - lr: 3.7714e-05 gnorm: 1.07 [ 8:44:40<15:47:04] +[titan] 2025-10-05 07:19:01,201 - root - INFO - step: 14265 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0104 +[titan] 2025-10-05 07:19:01,201 - root - INFO - lr: 3.7706e-05 gnorm: 1.10 [ 8:44:51<15:46:53] +[titan] 2025-10-05 07:19:12,083 - root - INFO - step: 14270 loss: 2.2667 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0022 +[titan] 2025-10-05 07:19:12,083 - root - INFO - lr: 3.7698e-05 gnorm: 1.08 [ 8:45:02<15:46:41] +[titan] 2025-10-05 07:19:22,993 - root - INFO - step: 14275 loss: 2.1944 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9385 +[titan] 2025-10-05 07:19:22,993 - root - INFO - lr: 3.7690e-05 gnorm: 1.09 [ 8:45:13<15:46:30] +[titan] 2025-10-05 07:19:33,913 - root - INFO - step: 14280 loss: 2.2467 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9851 +[titan] 2025-10-05 07:19:33,913 - root - INFO - lr: 3.7682e-05 gnorm: 1.07 [ 8:45:24<15:46:19] +[titan] 2025-10-05 07:19:44,768 - root - INFO - step: 14285 loss: 2.2223 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9628 +[titan] 2025-10-05 07:19:44,768 - root - INFO - lr: 3.7674e-05 gnorm: 1.09 [ 8:45:35<15:46:07] +[titan] 2025-10-05 07:19:55,630 - root - INFO - step: 14290 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 07:19:55,630 - root - INFO - lr: 3.7666e-05 gnorm: 1.10 [ 8:45:46<15:45:56] +[titan] 2025-10-05 07:20:06,491 - root - INFO - step: 14295 loss: 2.2948 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 07:20:06,491 - root - INFO - lr: 3.7658e-05 gnorm: 1.11 [ 8:45:56<15:45:45] +[titan] 2025-10-05 07:20:15,198 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:20:17,375 - root - INFO - step: 14300 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 07:20:17,376 - root - INFO - lr: 3.7650e-05 gnorm: 1.15 [ 8:46:07<15:45:33] +[titan] 2025-10-05 07:20:28,246 - root - INFO - step: 14305 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0609 +[titan] 2025-10-05 07:20:28,246 - root - INFO - lr: 3.7642e-05 gnorm: 1.12 [ 8:46:18<15:45:22] +[titan] 2025-10-05 07:20:39,160 - root - INFO - step: 14310 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0267 +[titan] 2025-10-05 07:20:39,160 - root - INFO - lr: 3.7634e-05 gnorm: 1.14 [ 8:46:29<15:45:11] +[titan] 2025-10-05 07:20:50,006 - root - INFO - step: 14315 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 07:20:50,006 - root - INFO - lr: 3.7626e-05 gnorm: 1.07 [ 8:46:40<15:44:59] +[titan] 2025-10-05 07:21:00,866 - root - INFO - step: 14320 loss: 2.2698 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 07:21:00,866 - root - INFO - lr: 3.7618e-05 gnorm: 1.09 [ 8:46:51<15:44:48] +[titan] 2025-10-05 07:21:11,703 - root - INFO - step: 14325 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:21:11,703 - root - INFO - lr: 3.7610e-05 gnorm: 1.09 [ 8:47:02<15:44:37] +[titan] 2025-10-05 07:21:22,593 - root - INFO - step: 14330 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0330 +[titan] 2025-10-05 07:21:22,594 - root - INFO - lr: 3.7602e-05 gnorm: 1.14 [ 8:47:13<15:44:25] +[titan] 2025-10-05 07:21:33,559 - root - INFO - step: 14335 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 07:21:33,560 - root - INFO - lr: 3.7594e-05 gnorm: 1.09 [ 8:47:24<15:44:14] +[titan] 2025-10-05 07:21:35,961 - root - INFO - Dumping profiler traces at step 14336 +[titan] 2025-10-05 07:21:35,999 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:21:44,698 - root - INFO - step: 14340 loss: 2.3096 memory: 118.84GiB(85.28%) tps: 29,418 tflops: 408.13 mfu: 41.27% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 07:21:44,699 - root - INFO - lr: 3.7586e-05 gnorm: 1.13 [ 8:47:35<15:44:03] +[titan] 2025-10-05 07:21:55,565 - root - INFO - step: 14345 loss: 2.3329 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 07:21:55,565 - root - INFO - lr: 3.7578e-05 gnorm: 1.11 [ 8:47:46<15:43:52] +[titan] 2025-10-05 07:22:04,240 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:22:06,418 - root - INFO - step: 14350 loss: 2.2380 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9769 +[titan] 2025-10-05 07:22:06,419 - root - INFO - lr: 3.7570e-05 gnorm: 1.07 [ 8:47:56<15:43:41] +[titan] 2025-10-05 07:22:17,273 - root - INFO - step: 14355 loss: 2.2325 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 07:22:17,273 - root - INFO - lr: 3.7562e-05 gnorm: 1.12 [ 8:48:07<15:43:29] +[titan] 2025-10-05 07:22:28,142 - root - INFO - step: 14360 loss: 2.3425 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0700 +[titan] 2025-10-05 07:22:28,143 - root - INFO - lr: 3.7554e-05 gnorm: 1.12 [ 8:48:18<15:43:18] +[titan] 2025-10-05 07:22:39,138 - root - INFO - step: 14365 loss: 2.2707 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.44 mfu: 41.80% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:22:39,139 - root - INFO - lr: 3.7546e-05 gnorm: 1.08 [ 8:48:29<15:43:07] +[titan] 2025-10-05 07:22:50,009 - root - INFO - step: 14370 loss: 2.2987 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 07:22:50,009 - root - INFO - lr: 3.7538e-05 gnorm: 1.13 [ 8:48:40<15:42:55] +[titan] 2025-10-05 07:23:00,863 - root - INFO - step: 14375 loss: 2.2114 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 07:23:00,864 - root - INFO - lr: 3.7530e-05 gnorm: 1.09 [ 8:48:51<15:42:44] +[titan] 2025-10-05 07:23:11,714 - root - INFO - step: 14380 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:23:11,714 - root - INFO - lr: 3.7522e-05 gnorm: 1.11 [ 8:49:02<15:42:33] +[titan] 2025-10-05 07:23:22,597 - root - INFO - step: 14385 loss: 2.3245 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0537 +[titan] 2025-10-05 07:23:22,597 - root - INFO - lr: 3.7514e-05 gnorm: 1.07 [ 8:49:13<15:42:21] +[titan] 2025-10-05 07:23:33,453 - root - INFO - step: 14390 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:23:33,453 - root - INFO - lr: 3.7506e-05 gnorm: 1.10 [ 8:49:23<15:42:10] +[titan] 2025-10-05 07:23:44,404 - root - INFO - step: 14395 loss: 2.3155 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0444 +[titan] 2025-10-05 07:23:44,404 - root - INFO - lr: 3.7498e-05 gnorm: 1.07 [ 8:49:34<15:41:59] +[titan] 2025-10-05 07:23:53,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:23:55,271 - root - INFO - step: 14400 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:23:55,271 - root - INFO - lr: 3.7490e-05 gnorm: 1.09 [ 8:49:45<15:41:48] +[titan] 2025-10-05 07:24:06,109 - root - INFO - step: 14405 loss: 2.3174 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0466 +[titan] 2025-10-05 07:24:06,109 - root - INFO - lr: 3.7482e-05 gnorm: 1.11 [ 8:49:56<15:41:36] +[titan] 2025-10-05 07:24:16,949 - root - INFO - step: 14410 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0222 +[titan] 2025-10-05 07:24:16,949 - root - INFO - lr: 3.7474e-05 gnorm: 1.22 [ 8:50:07<15:41:25] +[titan] 2025-10-05 07:24:27,813 - root - INFO - step: 14415 loss: 2.2533 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9898 +[titan] 2025-10-05 07:24:27,813 - root - INFO - lr: 3.7466e-05 gnorm: 1.06 [ 8:50:18<15:41:13] +[titan] 2025-10-05 07:24:38,740 - root - INFO - step: 14420 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9292 +[titan] 2025-10-05 07:24:38,740 - root - INFO - lr: 3.7458e-05 gnorm: 1.07 [ 8:50:29<15:41:02] +[titan] 2025-10-05 07:24:49,616 - root - INFO - step: 14425 loss: 2.2439 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 07:24:49,616 - root - INFO - lr: 3.7450e-05 gnorm: 1.09 [ 8:50:40<15:40:51] +[titan] 2025-10-05 07:25:00,495 - root - INFO - step: 14430 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0617 +[titan] 2025-10-05 07:25:00,495 - root - INFO - lr: 3.7442e-05 gnorm: 1.10 [ 8:50:50<15:40:40] +[titan] 2025-10-05 07:25:11,357 - root - INFO - step: 14435 loss: 2.2516 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9889 +[titan] 2025-10-05 07:25:11,357 - root - INFO - lr: 3.7434e-05 gnorm: 1.10 [ 8:51:01<15:40:28] +[titan] 2025-10-05 07:25:22,214 - root - INFO - step: 14440 loss: 2.2632 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:25:22,214 - root - INFO - lr: 3.7426e-05 gnorm: 1.10 [ 8:51:12<15:40:17] +[titan] 2025-10-05 07:25:33,076 - root - INFO - step: 14445 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 07:25:33,076 - root - INFO - lr: 3.7418e-05 gnorm: 1.09 [ 8:51:23<15:40:05] +[titan] 2025-10-05 07:25:41,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:25:44,001 - root - INFO - step: 14450 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 07:25:44,001 - root - INFO - lr: 3.7410e-05 gnorm: 1.07 [ 8:51:34<15:39:54] +[titan] 2025-10-05 07:25:54,893 - root - INFO - step: 14455 loss: 2.2554 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9922 +[titan] 2025-10-05 07:25:54,893 - root - INFO - lr: 3.7402e-05 gnorm: 1.13 [ 8:51:45<15:39:43] +[titan] 2025-10-05 07:26:05,774 - root - INFO - step: 14460 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9572 +[titan] 2025-10-05 07:26:05,774 - root - INFO - lr: 3.7394e-05 gnorm: 1.08 [ 8:51:56<15:39:32] +[titan] 2025-10-05 07:26:16,651 - root - INFO - step: 14465 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 07:26:16,651 - root - INFO - lr: 3.7386e-05 gnorm: 1.11 [ 8:52:07<15:39:20] +[titan] 2025-10-05 07:26:27,521 - root - INFO - step: 14470 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 07:26:27,521 - root - INFO - lr: 3.7378e-05 gnorm: 1.08 [ 8:52:18<15:39:09] +[titan] 2025-10-05 07:26:38,394 - root - INFO - step: 14475 loss: 2.2013 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 07:26:38,395 - root - INFO - lr: 3.7370e-05 gnorm: 1.08 [ 8:52:28<15:38:58] +[titan] 2025-10-05 07:26:49,332 - root - INFO - step: 14480 loss: 2.2812 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0154 +[titan] 2025-10-05 07:26:49,332 - root - INFO - lr: 3.7362e-05 gnorm: 1.12 [ 8:52:39<15:38:46] +[titan] 2025-10-05 07:27:00,212 - root - INFO - step: 14485 loss: 2.2411 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 07:27:00,213 - root - INFO - lr: 3.7354e-05 gnorm: 1.05 [ 8:52:50<15:38:35] +[titan] 2025-10-05 07:27:11,129 - root - INFO - step: 14490 loss: 2.2405 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9788 +[titan] 2025-10-05 07:27:11,129 - root - INFO - lr: 3.7346e-05 gnorm: 1.09 [ 8:53:01<15:38:24] +[titan] 2025-10-05 07:27:22,004 - root - INFO - step: 14495 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 07:27:22,004 - root - INFO - lr: 3.7338e-05 gnorm: 1.09 [ 8:53:12<15:38:13] +[titan] 2025-10-05 07:27:30,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:27:32,894 - root - INFO - step: 14500 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 07:27:32,894 - root - INFO - lr: 3.7330e-05 gnorm: 1.10 [ 8:53:23<15:38:01] +[titan] 2025-10-05 07:27:43,812 - root - INFO - step: 14505 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0409 +[titan] 2025-10-05 07:27:43,812 - root - INFO - lr: 3.7322e-05 gnorm: 1.06 [ 8:53:34<15:37:50] +[titan] 2025-10-05 07:27:54,688 - root - INFO - step: 14510 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9921 +[titan] 2025-10-05 07:27:54,688 - root - INFO - lr: 3.7314e-05 gnorm: 1.08 [ 8:53:45<15:37:39] +[titan] 2025-10-05 07:28:05,542 - root - INFO - step: 14515 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 07:28:05,542 - root - INFO - lr: 3.7306e-05 gnorm: 1.05 [ 8:53:56<15:37:27] +[titan] 2025-10-05 07:28:16,397 - root - INFO - step: 14520 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:28:16,398 - root - INFO - lr: 3.7298e-05 gnorm: 1.08 [ 8:54:06<15:37:16] +[titan] 2025-10-05 07:28:27,301 - root - INFO - step: 14525 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9512 +[titan] 2025-10-05 07:28:27,301 - root - INFO - lr: 3.7290e-05 gnorm: 1.05 [ 8:54:17<15:37:05] +[titan] 2025-10-05 07:28:38,153 - root - INFO - step: 14530 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0325 +[titan] 2025-10-05 07:28:38,153 - root - INFO - lr: 3.7282e-05 gnorm: 1.12 [ 8:54:28<15:36:53] +[titan] 2025-10-05 07:28:49,042 - root - INFO - step: 14535 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 07:28:49,042 - root - INFO - lr: 3.7274e-05 gnorm: 1.10 [ 8:54:39<15:36:42] +[titan] 2025-10-05 07:28:59,909 - root - INFO - step: 14540 loss: 2.2631 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9992 +[titan] 2025-10-05 07:28:59,910 - root - INFO - lr: 3.7266e-05 gnorm: 1.09 [ 8:54:50<15:36:31] +[titan] 2025-10-05 07:29:10,771 - root - INFO - step: 14545 loss: 2.2017 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9458 +[titan] 2025-10-05 07:29:10,771 - root - INFO - lr: 3.7258e-05 gnorm: 1.10 [ 8:55:01<15:36:19] +[titan] 2025-10-05 07:29:19,437 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:29:21,621 - root - INFO - step: 14550 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 07:29:21,621 - root - INFO - lr: 3.7250e-05 gnorm: 1.09 [ 8:55:12<15:36:08] +[titan] 2025-10-05 07:29:32,531 - root - INFO - step: 14555 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0226 +[titan] 2025-10-05 07:29:32,531 - root - INFO - lr: 3.7242e-05 gnorm: 1.14 [ 8:55:23<15:35:57] +[titan] 2025-10-05 07:29:43,472 - root - INFO - step: 14560 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8975 +[titan] 2025-10-05 07:29:43,472 - root - INFO - lr: 3.7234e-05 gnorm: 1.02 [ 8:55:33<15:35:46] +[titan] 2025-10-05 07:29:54,345 - root - INFO - step: 14565 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 07:29:54,345 - root - INFO - lr: 3.7226e-05 gnorm: 1.13 [ 8:55:44<15:35:34] +[titan] 2025-10-05 07:30:05,208 - root - INFO - step: 14570 loss: 2.3031 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0344 +[titan] 2025-10-05 07:30:05,209 - root - INFO - lr: 3.7218e-05 gnorm: 1.12 [ 8:55:55<15:35:23] +[titan] 2025-10-05 07:30:16,066 - root - INFO - step: 14575 loss: 2.2367 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 07:30:16,066 - root - INFO - lr: 3.7210e-05 gnorm: 1.10 [ 8:56:06<15:35:12] +[titan] 2025-10-05 07:30:26,932 - root - INFO - step: 14580 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 07:30:26,933 - root - INFO - lr: 3.7202e-05 gnorm: 1.07 [ 8:56:17<15:35:00] +[titan] 2025-10-05 07:30:37,811 - root - INFO - step: 14585 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9566 +[titan] 2025-10-05 07:30:37,811 - root - INFO - lr: 3.7194e-05 gnorm: 1.08 [ 8:56:28<15:34:49] +[titan] 2025-10-05 07:30:48,772 - root - INFO - step: 14590 loss: 2.3418 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0695 +[titan] 2025-10-05 07:30:48,772 - root - INFO - lr: 3.7185e-05 gnorm: 1.20 [ 8:56:39<15:34:38] +[titan] 2025-10-05 07:30:59,630 - root - INFO - step: 14595 loss: 2.2116 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 07:30:59,630 - root - INFO - lr: 3.7177e-05 gnorm: 1.09 [ 8:56:50<15:34:26] +[titan] 2025-10-05 07:31:08,302 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:31:10,494 - root - INFO - step: 14600 loss: 2.1772 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 07:31:10,494 - root - INFO - lr: 3.7169e-05 gnorm: 1.08 [ 8:57:00<15:34:15] +[titan] 2025-10-05 07:31:21,365 - root - INFO - step: 14605 loss: 2.3083 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:31:21,365 - root - INFO - lr: 3.7161e-05 gnorm: 1.09 [ 8:57:11<15:34:04] +[titan] 2025-10-05 07:31:32,251 - root - INFO - step: 14610 loss: 2.3039 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 07:31:32,251 - root - INFO - lr: 3.7153e-05 gnorm: 1.15 [ 8:57:22<15:33:53] +[titan] 2025-10-05 07:31:43,124 - root - INFO - step: 14615 loss: 2.2982 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0301 +[titan] 2025-10-05 07:31:43,125 - root - INFO - lr: 3.7145e-05 gnorm: 1.12 [ 8:57:33<15:33:41] +[titan] 2025-10-05 07:31:54,094 - root - INFO - step: 14620 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9332 +[titan] 2025-10-05 07:31:54,094 - root - INFO - lr: 3.7137e-05 gnorm: 1.09 [ 8:57:44<15:33:30] +[titan] 2025-10-05 07:32:04,989 - root - INFO - step: 14625 loss: 2.2391 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:32:04,989 - root - INFO - lr: 3.7129e-05 gnorm: 1.10 [ 8:57:55<15:33:19] +[titan] 2025-10-05 07:32:15,888 - root - INFO - step: 14630 loss: 2.3113 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 07:32:15,888 - root - INFO - lr: 3.7121e-05 gnorm: 1.10 [ 8:58:06<15:33:08] +[titan] 2025-10-05 07:32:26,771 - root - INFO - step: 14635 loss: 2.2726 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:32:26,771 - root - INFO - lr: 3.7113e-05 gnorm: 1.12 [ 8:58:17<15:32:56] +[titan] 2025-10-05 07:32:37,649 - root - INFO - step: 14640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 07:32:37,649 - root - INFO - lr: 3.7105e-05 gnorm: 1.08 [ 8:58:28<15:32:45] +[titan] 2025-10-05 07:32:48,613 - root - INFO - step: 14645 loss: 2.1989 memory: 118.84GiB(85.28%) tps: 29,888 tflops: 414.65 mfu: 41.93% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9422 +[titan] 2025-10-05 07:32:48,613 - root - INFO - lr: 3.7097e-05 gnorm: 1.05 [ 8:58:39<15:32:34] +[titan] 2025-10-05 07:32:57,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:32:59,535 - root - INFO - step: 14650 loss: 2.3040 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0349 +[titan] 2025-10-05 07:32:59,535 - root - INFO - lr: 3.7089e-05 gnorm: 1.06 [ 8:58:50<15:32:23] +[titan] 2025-10-05 07:33:10,438 - root - INFO - step: 14655 loss: 2.2889 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0209 +[titan] 2025-10-05 07:33:10,439 - root - INFO - lr: 3.7081e-05 gnorm: 1.13 [ 8:59:00<15:32:11] +[titan] 2025-10-05 07:33:21,347 - root - INFO - step: 14660 loss: 2.2514 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:33:21,348 - root - INFO - lr: 3.7073e-05 gnorm: 1.12 [ 8:59:11<15:32:00] +[titan] 2025-10-05 07:33:32,227 - root - INFO - step: 14665 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 07:33:32,227 - root - INFO - lr: 3.7064e-05 gnorm: 1.12 [ 8:59:22<15:31:49] +[titan] 2025-10-05 07:33:43,130 - root - INFO - step: 14670 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:33:43,130 - root - INFO - lr: 3.7056e-05 gnorm: 1.13 [ 8:59:33<15:31:37] +[titan] 2025-10-05 07:33:54,090 - root - INFO - step: 14675 loss: 2.2801 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 07:33:54,091 - root - INFO - lr: 3.7048e-05 gnorm: 1.08 [ 8:59:44<15:31:26] +[titan] 2025-10-05 07:34:04,932 - root - INFO - step: 14680 loss: 2.1187 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 07:34:04,932 - root - INFO - lr: 3.7040e-05 gnorm: 1.06 [ 8:59:55<15:31:15] +[titan] 2025-10-05 07:34:15,806 - root - INFO - step: 14685 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9938 +[titan] 2025-10-05 07:34:15,806 - root - INFO - lr: 3.7032e-05 gnorm: 1.10 [ 9:00:06<15:31:04] +[titan] 2025-10-05 07:34:26,671 - root - INFO - step: 14690 loss: 2.2095 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9509 +[titan] 2025-10-05 07:34:26,671 - root - INFO - lr: 3.7024e-05 gnorm: 1.06 [ 9:00:17<15:30:52] +[titan] 2025-10-05 07:34:37,510 - root - INFO - step: 14695 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 07:34:37,510 - root - INFO - lr: 3.7016e-05 gnorm: 1.06 [ 9:00:27<15:30:41] +[titan] 2025-10-05 07:34:46,191 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:34:48,450 - root - INFO - step: 14700 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9405 +[titan] 2025-10-05 07:34:48,450 - root - INFO - lr: 3.7008e-05 gnorm: 1.10 [ 9:00:38<15:30:30] +[titan] 2025-10-05 07:34:59,300 - root - INFO - step: 14705 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0240 +[titan] 2025-10-05 07:34:59,300 - root - INFO - lr: 3.7000e-05 gnorm: 1.14 [ 9:00:49<15:30:18] +[titan] 2025-10-05 07:35:10,167 - root - INFO - step: 14710 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0017 +[titan] 2025-10-05 07:35:10,168 - root - INFO - lr: 3.6992e-05 gnorm: 1.09 [ 9:01:00<15:30:07] +[titan] 2025-10-05 07:35:21,048 - root - INFO - step: 14715 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0027 +[titan] 2025-10-05 07:35:21,048 - root - INFO - lr: 3.6984e-05 gnorm: 1.13 [ 9:01:11<15:29:56] +[titan] 2025-10-05 07:35:31,930 - root - INFO - step: 14720 loss: 2.2273 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9675 +[titan] 2025-10-05 07:35:31,930 - root - INFO - lr: 3.6976e-05 gnorm: 1.08 [ 9:01:22<15:29:44] +[titan] 2025-10-05 07:35:42,810 - root - INFO - step: 14725 loss: 2.3179 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 07:35:42,810 - root - INFO - lr: 3.6967e-05 gnorm: 1.12 [ 9:01:33<15:29:33] +[titan] 2025-10-05 07:35:53,724 - root - INFO - step: 14730 loss: 2.2620 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9989 +[titan] 2025-10-05 07:35:53,724 - root - INFO - lr: 3.6959e-05 gnorm: 1.11 [ 9:01:44<15:29:22] +[titan] 2025-10-05 07:36:04,629 - root - INFO - step: 14735 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 07:36:04,629 - root - INFO - lr: 3.6951e-05 gnorm: 1.06 [ 9:01:55<15:29:11] +[titan] 2025-10-05 07:36:15,522 - root - INFO - step: 14740 loss: 2.2768 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 07:36:15,523 - root - INFO - lr: 3.6943e-05 gnorm: 1.09 [ 9:02:05<15:28:59] +[titan] 2025-10-05 07:36:26,431 - root - INFO - step: 14745 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 07:36:26,431 - root - INFO - lr: 3.6935e-05 gnorm: 1.07 [ 9:02:16<15:28:48] +[titan] 2025-10-05 07:36:35,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:36:37,321 - root - INFO - step: 14750 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 07:36:37,321 - root - INFO - lr: 3.6927e-05 gnorm: 1.10 [ 9:02:27<15:28:37] +[titan] 2025-10-05 07:36:48,227 - root - INFO - step: 14755 loss: 2.2186 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:36:48,227 - root - INFO - lr: 3.6919e-05 gnorm: 1.04 [ 9:02:38<15:28:26] +[titan] 2025-10-05 07:36:59,096 - root - INFO - step: 14760 loss: 2.2696 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0036 +[titan] 2025-10-05 07:36:59,096 - root - INFO - lr: 3.6911e-05 gnorm: 1.08 [ 9:02:49<15:28:14] +[titan] 2025-10-05 07:37:09,945 - root - INFO - step: 14765 loss: 2.2510 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9893 +[titan] 2025-10-05 07:37:09,945 - root - INFO - lr: 3.6903e-05 gnorm: 1.13 [ 9:03:00<15:28:03] +[titan] 2025-10-05 07:37:20,822 - root - INFO - step: 14770 loss: 2.2169 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:37:20,822 - root - INFO - lr: 3.6894e-05 gnorm: 1.08 [ 9:03:11<15:27:52] +[titan] 2025-10-05 07:37:31,692 - root - INFO - step: 14775 loss: 2.2524 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 07:37:31,692 - root - INFO - lr: 3.6886e-05 gnorm: 1.10 [ 9:03:22<15:27:40] +[titan] 2025-10-05 07:37:42,588 - root - INFO - step: 14780 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 07:37:42,588 - root - INFO - lr: 3.6878e-05 gnorm: 1.12 [ 9:03:33<15:27:29] +[titan] 2025-10-05 07:37:53,516 - root - INFO - step: 14785 loss: 2.1691 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9163 +[titan] 2025-10-05 07:37:53,516 - root - INFO - lr: 3.6870e-05 gnorm: 1.06 [ 9:03:43<15:27:18] +[titan] 2025-10-05 07:38:04,385 - root - INFO - step: 14790 loss: 2.1764 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 07:38:04,386 - root - INFO - lr: 3.6862e-05 gnorm: 1.05 [ 9:03:54<15:27:07] +[titan] 2025-10-05 07:38:15,271 - root - INFO - step: 14795 loss: 2.2615 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9988 +[titan] 2025-10-05 07:38:15,271 - root - INFO - lr: 3.6854e-05 gnorm: 1.11 [ 9:04:05<15:26:55] +[titan] 2025-10-05 07:38:23,946 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:38:26,148 - root - INFO - step: 14800 loss: 2.2171 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 07:38:26,148 - root - INFO - lr: 3.6846e-05 gnorm: 1.11 [ 9:04:16<15:26:44] +[titan] 2025-10-05 07:38:37,018 - root - INFO - step: 14805 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0292 +[titan] 2025-10-05 07:38:37,018 - root - INFO - lr: 3.6838e-05 gnorm: 1.12 [ 9:04:27<15:26:33] +[titan] 2025-10-05 07:38:47,933 - root - INFO - step: 14810 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 07:38:47,933 - root - INFO - lr: 3.6830e-05 gnorm: 1.11 [ 9:04:38<15:26:21] +[titan] 2025-10-05 07:38:58,873 - root - INFO - step: 14815 loss: 2.2872 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:38:58,873 - root - INFO - lr: 3.6821e-05 gnorm: 1.08 [ 9:04:49<15:26:10] +[titan] 2025-10-05 07:39:09,749 - root - INFO - step: 14820 loss: 2.2863 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:39:09,749 - root - INFO - lr: 3.6813e-05 gnorm: 1.08 [ 9:05:00<15:25:59] +[titan] 2025-10-05 07:39:20,633 - root - INFO - step: 14825 loss: 2.3248 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 07:39:20,633 - root - INFO - lr: 3.6805e-05 gnorm: 1.06 [ 9:05:11<15:25:48] +[titan] 2025-10-05 07:39:31,524 - root - INFO - step: 14830 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:39:31,525 - root - INFO - lr: 3.6797e-05 gnorm: 1.05 [ 9:05:21<15:25:36] +[titan] 2025-10-05 07:39:42,407 - root - INFO - step: 14835 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0410 +[titan] 2025-10-05 07:39:42,408 - root - INFO - lr: 3.6789e-05 gnorm: 1.09 [ 9:05:32<15:25:25] +[titan] 2025-10-05 07:39:53,316 - root - INFO - step: 14840 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:39:53,316 - root - INFO - lr: 3.6781e-05 gnorm: 1.06 [ 9:05:43<15:25:14] +[titan] 2025-10-05 07:40:04,271 - root - INFO - step: 14845 loss: 2.2304 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.97 mfu: 41.96% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9723 +[titan] 2025-10-05 07:40:04,271 - root - INFO - lr: 3.6773e-05 gnorm: 1.12 [ 9:05:54<15:25:03] +[titan] 2025-10-05 07:40:10,991 - root - INFO - Dumping profiler traces at step 14848 +[titan] 2025-10-05 07:40:11,029 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:40:13,210 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:40:15,391 - root - INFO - step: 14850 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 29,468 tflops: 408.82 mfu: 41.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:40:15,392 - root - INFO - lr: 3.6765e-05 gnorm: 1.08 [ 9:06:05<15:24:52] +[titan] 2025-10-05 07:40:26,262 - root - INFO - step: 14855 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 07:40:26,262 - root - INFO - lr: 3.6756e-05 gnorm: 1.09 [ 9:06:16<15:24:41] +[titan] 2025-10-05 07:40:37,129 - root - INFO - step: 14860 loss: 2.2444 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 07:40:37,129 - root - INFO - lr: 3.6748e-05 gnorm: 1.08 [ 9:06:27<15:24:29] +[titan] 2025-10-05 07:40:47,995 - root - INFO - step: 14865 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0098 +[titan] 2025-10-05 07:40:47,995 - root - INFO - lr: 3.6740e-05 gnorm: 1.10 [ 9:06:38<15:24:18] +[titan] 2025-10-05 07:40:58,905 - root - INFO - step: 14870 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:40:58,905 - root - INFO - lr: 3.6732e-05 gnorm: 1.10 [ 9:06:49<15:24:07] +[titan] 2025-10-05 07:41:09,784 - root - INFO - step: 14875 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 07:41:09,784 - root - INFO - lr: 3.6724e-05 gnorm: 1.10 [ 9:07:00<15:23:55] +[titan] 2025-10-05 07:41:20,683 - root - INFO - step: 14880 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 07:41:20,683 - root - INFO - lr: 3.6716e-05 gnorm: 1.08 [ 9:07:11<15:23:44] +[titan] 2025-10-05 07:41:31,553 - root - INFO - step: 14885 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 07:41:31,553 - root - INFO - lr: 3.6708e-05 gnorm: 1.05 [ 9:07:21<15:23:33] +[titan] 2025-10-05 07:41:42,413 - root - INFO - step: 14890 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 07:41:42,414 - root - INFO - lr: 3.6699e-05 gnorm: 1.15 [ 9:07:32<15:23:21] +[titan] 2025-10-05 07:41:53,308 - root - INFO - step: 14895 loss: 2.2418 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:41:53,308 - root - INFO - lr: 3.6691e-05 gnorm: 1.07 [ 9:07:43<15:23:10] +[titan] 2025-10-05 07:42:01,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:42:04,160 - root - INFO - step: 14900 loss: 2.2908 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0236 +[titan] 2025-10-05 07:42:04,160 - root - INFO - lr: 3.6683e-05 gnorm: 1.13 [ 9:07:54<15:22:59] +[titan] 2025-10-05 07:42:15,031 - root - INFO - step: 14905 loss: 2.3078 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0380 +[titan] 2025-10-05 07:42:15,031 - root - INFO - lr: 3.6675e-05 gnorm: 1.12 [ 9:08:05<15:22:48] +[titan] 2025-10-05 07:42:25,895 - root - INFO - step: 14910 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9881 +[titan] 2025-10-05 07:42:25,895 - root - INFO - lr: 3.6667e-05 gnorm: 1.09 [ 9:08:16<15:22:36] +[titan] 2025-10-05 07:42:36,754 - root - INFO - step: 14915 loss: 2.2480 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 07:42:36,754 - root - INFO - lr: 3.6659e-05 gnorm: 1.09 [ 9:08:27<15:22:25] +[titan] 2025-10-05 07:42:47,621 - root - INFO - step: 14920 loss: 2.4317 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 07:42:47,621 - root - INFO - lr: 3.6651e-05 gnorm: 1.11 [ 9:08:38<15:22:14] +[titan] 2025-10-05 07:42:58,504 - root - INFO - step: 14925 loss: 2.2167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9576 +[titan] 2025-10-05 07:42:58,504 - root - INFO - lr: 3.6642e-05 gnorm: 1.09 [ 9:08:48<15:22:02] +[titan] 2025-10-05 07:43:09,387 - root - INFO - step: 14930 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 07:43:09,388 - root - INFO - lr: 3.6634e-05 gnorm: 1.08 [ 9:08:59<15:21:51] +[titan] 2025-10-05 07:43:20,273 - root - INFO - step: 14935 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:43:20,273 - root - INFO - lr: 3.6626e-05 gnorm: 1.11 [ 9:09:10<15:21:40] +[titan] 2025-10-05 07:43:31,152 - root - INFO - step: 14940 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:43:31,152 - root - INFO - lr: 3.6618e-05 gnorm: 1.09 [ 9:09:21<15:21:28] +[titan] 2025-10-05 07:43:42,038 - root - INFO - step: 14945 loss: 2.2476 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 07:43:42,039 - root - INFO - lr: 3.6610e-05 gnorm: 1.04 [ 9:09:32<15:21:17] +[titan] 2025-10-05 07:43:50,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:43:52,910 - root - INFO - step: 14950 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9350 +[titan] 2025-10-05 07:43:52,910 - root - INFO - lr: 3.6602e-05 gnorm: 1.07 [ 9:09:43<15:21:06] +[titan] 2025-10-05 07:44:03,804 - root - INFO - step: 14955 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 07:44:03,804 - root - INFO - lr: 3.6593e-05 gnorm: 1.06 [ 9:09:54<15:20:55] +[titan] 2025-10-05 07:44:14,704 - root - INFO - step: 14960 loss: 2.2966 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0290 +[titan] 2025-10-05 07:44:14,704 - root - INFO - lr: 3.6585e-05 gnorm: 1.08 [ 9:10:05<15:20:43] +[titan] 2025-10-05 07:44:25,575 - root - INFO - step: 14965 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 07:44:25,576 - root - INFO - lr: 3.6577e-05 gnorm: 1.07 [ 9:10:16<15:20:32] +[titan] 2025-10-05 07:44:36,428 - root - INFO - step: 14970 loss: 2.2508 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:44:36,428 - root - INFO - lr: 3.6569e-05 gnorm: 1.09 [ 9:10:26<15:20:21] +[titan] 2025-10-05 07:44:47,311 - root - INFO - step: 14975 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:44:47,311 - root - INFO - lr: 3.6561e-05 gnorm: 1.08 [ 9:10:37<15:20:09] +[titan] 2025-10-05 07:44:58,219 - root - INFO - step: 14980 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:44:58,219 - root - INFO - lr: 3.6553e-05 gnorm: 1.11 [ 9:10:48<15:19:58] +[titan] 2025-10-05 07:45:09,066 - root - INFO - step: 14985 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9745 +[titan] 2025-10-05 07:45:09,066 - root - INFO - lr: 3.6544e-05 gnorm: 1.07 [ 9:10:59<15:19:47] +[titan] 2025-10-05 07:45:19,933 - root - INFO - step: 14990 loss: 2.1985 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 07:45:19,934 - root - INFO - lr: 3.6536e-05 gnorm: 1.07 [ 9:11:10<15:19:36] +[titan] 2025-10-05 07:45:30,815 - root - INFO - step: 14995 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:45:30,816 - root - INFO - lr: 3.6528e-05 gnorm: 1.05 [ 9:11:21<15:19:24] +[titan] 2025-10-05 07:45:39,479 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:45:41,661 - root - INFO - step: 15000 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 07:45:41,661 - root - INFO - lr: 3.6520e-05 gnorm: 1.11 [ 9:11:32<15:19:13] +[titan] 2025-10-05 07:45:41,661 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 07:46:00,846 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 07:46:00,846 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.18 seconds. +[titan] 2025-10-05 07:48:03,855 - root - INFO - step: 15005 loss: 2.1283 memory: 118.84GiB(85.28%) tps: 2,304 tflops: 31.97 mfu: 3.23% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 07:48:03,856 - root - INFO - lr: 3.6512e-05 gnorm: 1.06 [ 9:13:54<15:22:40] +[titan] 2025-10-05 07:48:14,694 - root - INFO - step: 15010 loss: 2.2315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 07:48:14,695 - root - INFO - lr: 3.6504e-05 gnorm: 1.12 [ 9:14:05<15:22:29] +[titan] 2025-10-05 07:48:25,509 - root - INFO - step: 15015 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 07:48:25,510 - root - INFO - lr: 3.6495e-05 gnorm: 1.09 [ 9:14:15<15:22:17] +[titan] 2025-10-05 07:48:36,334 - root - INFO - step: 15020 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 07:48:36,334 - root - INFO - lr: 3.6487e-05 gnorm: 1.09 [ 9:14:26<15:22:06] +[titan] 2025-10-05 07:48:47,212 - root - INFO - step: 15025 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:48:47,212 - root - INFO - lr: 3.6479e-05 gnorm: 1.07 [ 9:14:37<15:21:55] +[titan] 2025-10-05 07:48:58,068 - root - INFO - step: 15030 loss: 2.2843 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:48:58,069 - root - INFO - lr: 3.6471e-05 gnorm: 1.12 [ 9:14:48<15:21:43] +[titan] 2025-10-05 07:49:08,916 - root - INFO - step: 15035 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 07:49:08,916 - root - INFO - lr: 3.6463e-05 gnorm: 1.06 [ 9:14:59<15:21:32] +[titan] 2025-10-05 07:49:19,759 - root - INFO - step: 15040 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9876 +[titan] 2025-10-05 07:49:19,759 - root - INFO - lr: 3.6454e-05 gnorm: 1.06 [ 9:15:10<15:21:20] +[titan] 2025-10-05 07:49:30,613 - root - INFO - step: 15045 loss: 2.2689 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 07:49:30,613 - root - INFO - lr: 3.6446e-05 gnorm: 1.09 [ 9:15:21<15:21:09] +[titan] 2025-10-05 07:49:39,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:49:41,450 - root - INFO - step: 15050 loss: 2.2266 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 07:49:41,450 - root - INFO - lr: 3.6438e-05 gnorm: 1.08 [ 9:15:31<15:20:57] +[titan] 2025-10-05 07:49:52,326 - root - INFO - step: 15055 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0056 +[titan] 2025-10-05 07:49:52,326 - root - INFO - lr: 3.6430e-05 gnorm: 1.10 [ 9:15:42<15:20:46] +[titan] 2025-10-05 07:50:03,204 - root - INFO - step: 15060 loss: 2.2848 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:50:03,204 - root - INFO - lr: 3.6422e-05 gnorm: 1.09 [ 9:15:53<15:20:35] +[titan] 2025-10-05 07:50:14,065 - root - INFO - step: 15065 loss: 2.2635 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 07:50:14,065 - root - INFO - lr: 3.6413e-05 gnorm: 1.09 [ 9:16:04<15:20:23] +[titan] 2025-10-05 07:50:24,948 - root - INFO - step: 15070 loss: 2.2568 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 07:50:24,948 - root - INFO - lr: 3.6405e-05 gnorm: 1.08 [ 9:16:15<15:20:12] +[titan] 2025-10-05 07:50:35,794 - root - INFO - step: 15075 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0211 +[titan] 2025-10-05 07:50:35,794 - root - INFO - lr: 3.6397e-05 gnorm: 1.09 [ 9:16:26<15:20:00] +[titan] 2025-10-05 07:50:46,648 - root - INFO - step: 15080 loss: 2.2769 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0110 +[titan] 2025-10-05 07:50:46,649 - root - INFO - lr: 3.6389e-05 gnorm: 1.07 [ 9:16:37<15:19:49] +[titan] 2025-10-05 07:50:57,498 - root - INFO - step: 15085 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 07:50:57,498 - root - INFO - lr: 3.6381e-05 gnorm: 1.07 [ 9:16:47<15:19:37] +[titan] 2025-10-05 07:51:08,384 - root - INFO - step: 15090 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:51:08,385 - root - INFO - lr: 3.6373e-05 gnorm: 1.08 [ 9:16:58<15:19:26] +[titan] 2025-10-05 07:51:19,234 - root - INFO - step: 15095 loss: 2.2363 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9753 +[titan] 2025-10-05 07:51:19,234 - root - INFO - lr: 3.6364e-05 gnorm: 1.07 [ 9:17:09<15:19:15] +[titan] 2025-10-05 07:51:27,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:51:30,098 - root - INFO - step: 15100 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:51:30,098 - root - INFO - lr: 3.6356e-05 gnorm: 1.15 [ 9:17:20<15:19:03] +[titan] 2025-10-05 07:51:40,977 - root - INFO - step: 15105 loss: 2.2586 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9945 +[titan] 2025-10-05 07:51:40,977 - root - INFO - lr: 3.6348e-05 gnorm: 1.11 [ 9:17:31<15:18:52] +[titan] 2025-10-05 07:51:51,845 - root - INFO - step: 15110 loss: 2.2404 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 07:51:51,845 - root - INFO - lr: 3.6340e-05 gnorm: 1.07 [ 9:17:42<15:18:40] +[titan] 2025-10-05 07:52:02,714 - root - INFO - step: 15115 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0275 +[titan] 2025-10-05 07:52:02,715 - root - INFO - lr: 3.6331e-05 gnorm: 1.14 [ 9:17:53<15:18:29] +[titan] 2025-10-05 07:52:13,605 - root - INFO - step: 15120 loss: 2.2957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 07:52:13,605 - root - INFO - lr: 3.6323e-05 gnorm: 1.15 [ 9:18:04<15:18:18] +[titan] 2025-10-05 07:52:24,497 - root - INFO - step: 15125 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:52:24,497 - root - INFO - lr: 3.6315e-05 gnorm: 1.03 [ 9:18:14<15:18:06] +[titan] 2025-10-05 07:52:35,368 - root - INFO - step: 15130 loss: 2.2438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:35,368 - root - INFO - lr: 3.6307e-05 gnorm: 1.11 [ 9:18:25<15:17:55] +[titan] 2025-10-05 07:52:46,243 - root - INFO - step: 15135 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 07:52:46,243 - root - INFO - lr: 3.6299e-05 gnorm: 1.09 [ 9:18:36<15:17:43] +[titan] 2025-10-05 07:52:57,118 - root - INFO - step: 15140 loss: 2.2420 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:57,118 - root - INFO - lr: 3.6290e-05 gnorm: 1.10 [ 9:18:47<15:17:32] +[titan] 2025-10-05 07:53:07,980 - root - INFO - step: 15145 loss: 2.3012 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 07:53:07,981 - root - INFO - lr: 3.6282e-05 gnorm: 1.07 [ 9:18:58<15:17:21] +[titan] 2025-10-05 07:53:16,667 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:53:18,858 - root - INFO - step: 15150 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9690 +[titan] 2025-10-05 07:53:18,858 - root - INFO - lr: 3.6274e-05 gnorm: 1.07 [ 9:19:09<15:17:09] +[titan] 2025-10-05 07:53:29,759 - root - INFO - step: 15155 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:53:29,759 - root - INFO - lr: 3.6266e-05 gnorm: 1.07 [ 9:19:20<15:16:58] +[titan] 2025-10-05 07:53:40,639 - root - INFO - step: 15160 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:53:40,639 - root - INFO - lr: 3.6258e-05 gnorm: 1.11 [ 9:19:31<15:16:46] +[titan] 2025-10-05 07:53:51,512 - root - INFO - step: 15165 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:53:51,512 - root - INFO - lr: 3.6249e-05 gnorm: 1.10 [ 9:19:41<15:16:35] +[titan] 2025-10-05 07:54:02,384 - root - INFO - step: 15170 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0038 +[titan] 2025-10-05 07:54:02,384 - root - INFO - lr: 3.6241e-05 gnorm: 1.11 [ 9:19:52<15:16:24] +[titan] 2025-10-05 07:54:13,267 - root - INFO - step: 15175 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:54:13,267 - root - INFO - lr: 3.6233e-05 gnorm: 1.08 [ 9:20:03<15:16:12] +[titan] 2025-10-05 07:54:24,133 - root - INFO - step: 15180 loss: 2.3028 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 07:54:24,133 - root - INFO - lr: 3.6225e-05 gnorm: 1.09 [ 9:20:14<15:16:01] +[titan] 2025-10-05 07:54:35,038 - root - INFO - step: 15185 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:54:35,038 - root - INFO - lr: 3.6216e-05 gnorm: 1.07 [ 9:20:25<15:15:49] +[titan] 2025-10-05 07:54:45,892 - root - INFO - step: 15190 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0272 +[titan] 2025-10-05 07:54:45,892 - root - INFO - lr: 3.6208e-05 gnorm: 1.08 [ 9:20:36<15:15:38] +[titan] 2025-10-05 07:54:56,749 - root - INFO - step: 15195 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9845 +[titan] 2025-10-05 07:54:56,749 - root - INFO - lr: 3.6200e-05 gnorm: 1.08 [ 9:20:47<15:15:27] +[titan] 2025-10-05 07:55:05,428 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:55:07,614 - root - INFO - step: 15200 loss: 2.2230 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 07:55:07,615 - root - INFO - lr: 3.6192e-05 gnorm: 1.08 [ 9:20:58<15:15:15] +[titan] 2025-10-05 07:55:18,475 - root - INFO - step: 15205 loss: 2.2720 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 07:55:18,475 - root - INFO - lr: 3.6184e-05 gnorm: 1.09 [ 9:21:08<15:15:04] +[titan] 2025-10-05 07:55:29,333 - root - INFO - step: 15210 loss: 2.2496 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9871 +[titan] 2025-10-05 07:55:29,333 - root - INFO - lr: 3.6175e-05 gnorm: 1.10 [ 9:21:19<15:14:52] +[titan] 2025-10-05 07:55:40,201 - root - INFO - step: 15215 loss: 2.2704 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 07:55:40,201 - root - INFO - lr: 3.6167e-05 gnorm: 1.10 [ 9:21:30<15:14:41] +[titan] 2025-10-05 07:55:51,102 - root - INFO - step: 15220 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9944 +[titan] 2025-10-05 07:55:51,103 - root - INFO - lr: 3.6159e-05 gnorm: 1.09 [ 9:21:41<15:14:30] +[titan] 2025-10-05 07:56:01,979 - root - INFO - step: 15225 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:56:01,979 - root - INFO - lr: 3.6151e-05 gnorm: 1.08 [ 9:21:52<15:14:18] +[titan] 2025-10-05 07:56:12,856 - root - INFO - step: 15230 loss: 2.3282 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0531 +[titan] 2025-10-05 07:56:12,856 - root - INFO - lr: 3.6142e-05 gnorm: 1.08 [ 9:22:03<15:14:07] +[titan] 2025-10-05 07:56:23,718 - root - INFO - step: 15235 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0077 +[titan] 2025-10-05 07:56:23,718 - root - INFO - lr: 3.6134e-05 gnorm: 1.04 [ 9:22:14<15:13:55] +[titan] 2025-10-05 07:56:34,605 - root - INFO - step: 15240 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9751 +[titan] 2025-10-05 07:56:34,605 - root - INFO - lr: 3.6126e-05 gnorm: 1.07 [ 9:22:25<15:13:44] +[titan] 2025-10-05 07:56:45,472 - root - INFO - step: 15245 loss: 2.3360 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 07:56:45,472 - root - INFO - lr: 3.6118e-05 gnorm: 1.05 [ 9:22:35<15:13:33] +[titan] 2025-10-05 07:56:54,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:56:56,368 - root - INFO - step: 15250 loss: 2.2490 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9870 +[titan] 2025-10-05 07:56:56,368 - root - INFO - lr: 3.6109e-05 gnorm: 1.08 [ 9:22:46<15:13:21] +[titan] 2025-10-05 07:57:07,241 - root - INFO - step: 15255 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9655 +[titan] 2025-10-05 07:57:07,241 - root - INFO - lr: 3.6101e-05 gnorm: 1.06 [ 9:22:57<15:13:10] +[titan] 2025-10-05 07:57:18,136 - root - INFO - step: 15260 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:57:18,136 - root - INFO - lr: 3.6093e-05 gnorm: 1.10 [ 9:23:08<15:12:59] +[titan] 2025-10-05 07:57:29,037 - root - INFO - step: 15265 loss: 2.2358 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9744 +[titan] 2025-10-05 07:57:29,037 - root - INFO - lr: 3.6085e-05 gnorm: 1.07 [ 9:23:19<15:12:47] +[titan] 2025-10-05 07:57:39,909 - root - INFO - step: 15270 loss: 2.3087 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 07:57:39,910 - root - INFO - lr: 3.6076e-05 gnorm: 1.08 [ 9:23:30<15:12:36] +[titan] 2025-10-05 07:57:50,767 - root - INFO - step: 15275 loss: 2.2564 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 07:57:50,768 - root - INFO - lr: 3.6068e-05 gnorm: 1.08 [ 9:23:41<15:12:24] +[titan] 2025-10-05 07:58:01,662 - root - INFO - step: 15280 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9973 +[titan] 2025-10-05 07:58:01,662 - root - INFO - lr: 3.6060e-05 gnorm: 1.20 [ 9:23:52<15:12:13] +[titan] 2025-10-05 07:58:12,584 - root - INFO - step: 15285 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:58:12,584 - root - INFO - lr: 3.6052e-05 gnorm: 1.10 [ 9:24:02<15:12:02] +[titan] 2025-10-05 07:58:23,438 - root - INFO - step: 15290 loss: 2.2206 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 07:58:23,438 - root - INFO - lr: 3.6043e-05 gnorm: 1.06 [ 9:24:13<15:11:50] +[titan] 2025-10-05 07:58:34,298 - root - INFO - step: 15295 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 07:58:34,299 - root - INFO - lr: 3.6035e-05 gnorm: 1.06 [ 9:24:24<15:11:39] +[titan] 2025-10-05 07:58:42,983 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:58:45,167 - root - INFO - step: 15300 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 07:58:45,167 - root - INFO - lr: 3.6027e-05 gnorm: 1.08 [ 9:24:35<15:11:28] +[titan] 2025-10-05 07:58:56,031 - root - INFO - step: 15305 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 07:58:56,032 - root - INFO - lr: 3.6019e-05 gnorm: 1.10 [ 9:24:46<15:11:16] +[titan] 2025-10-05 07:59:06,887 - root - INFO - step: 15310 loss: 2.2775 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 07:59:06,887 - root - INFO - lr: 3.6010e-05 gnorm: 1.09 [ 9:24:57<15:11:05] +[titan] 2025-10-05 07:59:17,809 - root - INFO - step: 15315 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9766 +[titan] 2025-10-05 07:59:17,809 - root - INFO - lr: 3.6002e-05 gnorm: 1.10 [ 9:25:08<15:10:53] +[titan] 2025-10-05 07:59:28,686 - root - INFO - step: 15320 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 07:59:28,686 - root - INFO - lr: 3.5994e-05 gnorm: 1.07 [ 9:25:19<15:10:42] +[titan] 2025-10-05 07:59:39,537 - root - INFO - step: 15325 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 07:59:39,537 - root - INFO - lr: 3.5986e-05 gnorm: 1.11 [ 9:25:29<15:10:31] +[titan] 2025-10-05 07:59:50,399 - root - INFO - step: 15330 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:59:50,400 - root - INFO - lr: 3.5977e-05 gnorm: 1.11 [ 9:25:40<15:10:19] +[titan] 2025-10-05 08:00:01,278 - root - INFO - step: 15335 loss: 2.2792 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0137 +[titan] 2025-10-05 08:00:01,278 - root - INFO - lr: 3.5969e-05 gnorm: 1.15 [ 9:25:51<15:10:08] +[titan] 2025-10-05 08:00:12,178 - root - INFO - step: 15340 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 08:00:12,178 - root - INFO - lr: 3.5961e-05 gnorm: 1.06 [ 9:26:02<15:09:56] +[titan] 2025-10-05 08:00:23,116 - root - INFO - step: 15345 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:00:23,116 - root - INFO - lr: 3.5952e-05 gnorm: 1.09 [ 9:26:13<15:09:45] +[titan] 2025-10-05 08:00:31,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:00:33,989 - root - INFO - step: 15350 loss: 2.2871 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 08:00:33,989 - root - INFO - lr: 3.5944e-05 gnorm: 1.10 [ 9:26:24<15:09:34] +[titan] 2025-10-05 08:00:44,860 - root - INFO - step: 15355 loss: 2.1883 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 08:00:44,860 - root - INFO - lr: 3.5936e-05 gnorm: 1.10 [ 9:26:35<15:09:22] +[titan] 2025-10-05 08:00:55,829 - root - INFO - step: 15360 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9417 +[titan] 2025-10-05 08:00:55,830 - root - INFO - lr: 3.5928e-05 gnorm: 1.04 [ 9:26:46<15:09:11] +[titan] 2025-10-05 08:00:56,010 - root - INFO - Dumping profiler traces at step 15360 +[titan] 2025-10-05 08:00:56,051 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:01:06,999 - root - INFO - step: 15365 loss: 2.2781 memory: 118.84GiB(85.28%) tps: 29,338 tflops: 407.01 mfu: 41.15% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 08:01:06,999 - root - INFO - lr: 3.5919e-05 gnorm: 1.07 [ 9:26:57<15:09:00] +[titan] 2025-10-05 08:01:17,972 - root - INFO - step: 15370 loss: 2.2166 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 08:01:17,972 - root - INFO - lr: 3.5911e-05 gnorm: 1.06 [ 9:27:08<15:08:49] +[titan] 2025-10-05 08:01:28,828 - root - INFO - step: 15375 loss: 2.2257 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 08:01:28,829 - root - INFO - lr: 3.5903e-05 gnorm: 1.04 [ 9:27:19<15:08:38] +[titan] 2025-10-05 08:01:39,732 - root - INFO - step: 15380 loss: 2.2608 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9985 +[titan] 2025-10-05 08:01:39,732 - root - INFO - lr: 3.5895e-05 gnorm: 1.14 [ 9:27:30<15:08:26] +[titan] 2025-10-05 08:01:50,600 - root - INFO - step: 15385 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9684 +[titan] 2025-10-05 08:01:50,601 - root - INFO - lr: 3.5886e-05 gnorm: 1.10 [ 9:27:40<15:08:15] +[titan] 2025-10-05 08:02:01,449 - root - INFO - step: 15390 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 08:02:01,449 - root - INFO - lr: 3.5878e-05 gnorm: 1.10 [ 9:27:51<15:08:03] +[titan] 2025-10-05 08:02:12,375 - root - INFO - step: 15395 loss: 2.2776 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 08:02:12,375 - root - INFO - lr: 3.5870e-05 gnorm: 1.10 [ 9:28:02<15:07:52] +[titan] 2025-10-05 08:02:21,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:02:23,262 - root - INFO - step: 15400 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9035 +[titan] 2025-10-05 08:02:23,262 - root - INFO - lr: 3.5861e-05 gnorm: 1.05 [ 9:28:13<15:07:41] +[titan] 2025-10-05 08:02:34,123 - root - INFO - step: 15405 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 08:02:34,123 - root - INFO - lr: 3.5853e-05 gnorm: 1.05 [ 9:28:24<15:07:29] +[titan] 2025-10-05 08:02:45,027 - root - INFO - step: 15410 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:02:45,027 - root - INFO - lr: 3.5845e-05 gnorm: 1.09 [ 9:28:35<15:07:18] +[titan] 2025-10-05 08:02:55,897 - root - INFO - step: 15415 loss: 2.2676 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:02:55,897 - root - INFO - lr: 3.5837e-05 gnorm: 1.09 [ 9:28:46<15:07:07] +[titan] 2025-10-05 08:03:06,761 - root - INFO - step: 15420 loss: 2.2135 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9565 +[titan] 2025-10-05 08:03:06,761 - root - INFO - lr: 3.5828e-05 gnorm: 1.09 [ 9:28:57<15:06:55] +[titan] 2025-10-05 08:03:17,688 - root - INFO - step: 15425 loss: 2.2445 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9820 +[titan] 2025-10-05 08:03:17,688 - root - INFO - lr: 3.5820e-05 gnorm: 1.04 [ 9:29:08<15:06:44] +[titan] 2025-10-05 08:03:28,565 - root - INFO - step: 15430 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 08:03:28,565 - root - INFO - lr: 3.5812e-05 gnorm: 1.11 [ 9:29:18<15:06:33] +[titan] 2025-10-05 08:03:39,425 - root - INFO - step: 15435 loss: 2.2327 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:03:39,425 - root - INFO - lr: 3.5803e-05 gnorm: 1.06 [ 9:29:29<15:06:21] +[titan] 2025-10-05 08:03:50,323 - root - INFO - step: 15440 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 08:03:50,323 - root - INFO - lr: 3.5795e-05 gnorm: 1.04 [ 9:29:40<15:06:10] +[titan] 2025-10-05 08:04:01,195 - root - INFO - step: 15445 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 08:04:01,195 - root - INFO - lr: 3.5787e-05 gnorm: 1.04 [ 9:29:51<15:05:58] +[titan] 2025-10-05 08:04:09,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:04:12,045 - root - INFO - step: 15450 loss: 2.2815 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0162 +[titan] 2025-10-05 08:04:12,045 - root - INFO - lr: 3.5778e-05 gnorm: 1.12 [ 9:30:02<15:05:47] +[titan] 2025-10-05 08:04:22,960 - root - INFO - step: 15455 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 08:04:22,960 - root - INFO - lr: 3.5770e-05 gnorm: 1.11 [ 9:30:13<15:05:36] +[titan] 2025-10-05 08:04:33,818 - root - INFO - step: 15460 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9338 +[titan] 2025-10-05 08:04:33,818 - root - INFO - lr: 3.5762e-05 gnorm: 1.08 [ 9:30:24<15:05:24] +[titan] 2025-10-05 08:04:44,689 - root - INFO - step: 15465 loss: 2.1902 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:04:44,689 - root - INFO - lr: 3.5754e-05 gnorm: 1.09 [ 9:30:35<15:05:13] +[titan] 2025-10-05 08:04:55,581 - root - INFO - step: 15470 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:04:55,581 - root - INFO - lr: 3.5745e-05 gnorm: 1.07 [ 9:30:45<15:05:02] +[titan] 2025-10-05 08:05:06,501 - root - INFO - step: 15475 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0023 +[titan] 2025-10-05 08:05:06,501 - root - INFO - lr: 3.5737e-05 gnorm: 1.12 [ 9:30:56<15:04:50] +[titan] 2025-10-05 08:05:17,432 - root - INFO - step: 15480 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 08:05:17,432 - root - INFO - lr: 3.5729e-05 gnorm: 1.10 [ 9:31:07<15:04:39] +[titan] 2025-10-05 08:05:28,358 - root - INFO - step: 15485 loss: 2.2121 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:05:28,358 - root - INFO - lr: 3.5720e-05 gnorm: 1.05 [ 9:31:18<15:04:28] +[titan] 2025-10-05 08:05:39,256 - root - INFO - step: 15490 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 08:05:39,257 - root - INFO - lr: 3.5712e-05 gnorm: 1.11 [ 9:31:29<15:04:16] +[titan] 2025-10-05 08:05:50,140 - root - INFO - step: 15495 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 08:05:50,140 - root - INFO - lr: 3.5704e-05 gnorm: 1.04 [ 9:31:40<15:04:05] +[titan] 2025-10-05 08:05:58,823 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:06:01,006 - root - INFO - step: 15500 loss: 2.1526 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 08:06:01,006 - root - INFO - lr: 3.5695e-05 gnorm: 1.06 [ 9:31:51<15:03:54] +[titan] 2025-10-05 08:06:11,916 - root - INFO - step: 15505 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 08:06:11,916 - root - INFO - lr: 3.5687e-05 gnorm: 1.09 [ 9:32:02<15:03:42] +[titan] 2025-10-05 08:06:22,849 - root - INFO - step: 15510 loss: 2.2409 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 08:06:22,849 - root - INFO - lr: 3.5679e-05 gnorm: 1.06 [ 9:32:13<15:03:31] +[titan] 2025-10-05 08:06:33,698 - root - INFO - step: 15515 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 08:06:33,698 - root - INFO - lr: 3.5670e-05 gnorm: 1.08 [ 9:32:24<15:03:20] +[titan] 2025-10-05 08:06:44,607 - root - INFO - step: 15520 loss: 2.3868 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 08:06:44,607 - root - INFO - lr: 3.5662e-05 gnorm: 2.99 [ 9:32:34<15:03:08] +[titan] 2025-10-05 08:06:55,464 - root - INFO - step: 15525 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9210 +[titan] 2025-10-05 08:06:55,464 - root - INFO - lr: 3.5654e-05 gnorm: 1.10 [ 9:32:45<15:02:57] +[titan] 2025-10-05 08:07:06,333 - root - INFO - step: 15530 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:07:06,333 - root - INFO - lr: 3.5646e-05 gnorm: 1.03 [ 9:32:56<15:02:46] +[titan] 2025-10-05 08:07:17,232 - root - INFO - step: 15535 loss: 2.2054 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 08:07:17,232 - root - INFO - lr: 3.5637e-05 gnorm: 1.08 [ 9:33:07<15:02:34] +[titan] 2025-10-05 08:07:28,161 - root - INFO - step: 15540 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:07:28,161 - root - INFO - lr: 3.5629e-05 gnorm: 1.10 [ 9:33:18<15:02:23] +[titan] 2025-10-05 08:07:39,016 - root - INFO - step: 15545 loss: 2.2280 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 08:07:39,016 - root - INFO - lr: 3.5621e-05 gnorm: 1.07 [ 9:33:29<15:02:11] +[titan] 2025-10-05 08:07:47,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:07:49,876 - root - INFO - step: 15550 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 08:07:49,876 - root - INFO - lr: 3.5612e-05 gnorm: 1.07 [ 9:33:40<15:02:00] +[titan] 2025-10-05 08:08:00,741 - root - INFO - step: 15555 loss: 2.2855 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 08:08:00,741 - root - INFO - lr: 3.5604e-05 gnorm: 1.08 [ 9:33:51<15:01:49] +[titan] 2025-10-05 08:08:11,595 - root - INFO - step: 15560 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 08:08:11,595 - root - INFO - lr: 3.5596e-05 gnorm: 1.08 [ 9:34:01<15:01:37] +[titan] 2025-10-05 08:08:22,482 - root - INFO - step: 15565 loss: 2.2410 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:08:22,482 - root - INFO - lr: 3.5587e-05 gnorm: 1.08 [ 9:34:12<15:01:26] +[titan] 2025-10-05 08:08:33,383 - root - INFO - step: 15570 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 08:08:33,383 - root - INFO - lr: 3.5579e-05 gnorm: 1.11 [ 9:34:23<15:01:15] +[titan] 2025-10-05 08:08:44,242 - root - INFO - step: 15575 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 08:08:44,242 - root - INFO - lr: 3.5571e-05 gnorm: 1.06 [ 9:34:34<15:01:03] +[titan] 2025-10-05 08:08:55,120 - root - INFO - step: 15580 loss: 2.2133 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9541 +[titan] 2025-10-05 08:08:55,120 - root - INFO - lr: 3.5562e-05 gnorm: 1.05 [ 9:34:45<15:00:52] +[titan] 2025-10-05 08:09:06,000 - root - INFO - step: 15585 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9699 +[titan] 2025-10-05 08:09:06,000 - root - INFO - lr: 3.5554e-05 gnorm: 1.10 [ 9:34:56<15:00:41] +[titan] 2025-10-05 08:09:16,881 - root - INFO - step: 15590 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 08:09:16,881 - root - INFO - lr: 3.5546e-05 gnorm: 1.07 [ 9:35:07<15:00:29] +[titan] 2025-10-05 08:09:27,784 - root - INFO - step: 15595 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 08:09:27,784 - root - INFO - lr: 3.5537e-05 gnorm: 1.12 [ 9:35:18<15:00:18] +[titan] 2025-10-05 08:09:36,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:09:38,690 - root - INFO - step: 15600 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 08:09:38,690 - root - INFO - lr: 3.5529e-05 gnorm: 1.09 [ 9:35:29<15:00:07] +[titan] 2025-10-05 08:09:49,593 - root - INFO - step: 15605 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 08:09:49,594 - root - INFO - lr: 3.5521e-05 gnorm: 1.08 [ 9:35:39<14:59:55] +[titan] 2025-10-05 08:10:00,479 - root - INFO - step: 15610 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 08:10:00,480 - root - INFO - lr: 3.5512e-05 gnorm: 1.08 [ 9:35:50<14:59:44] +[titan] 2025-10-05 08:10:11,374 - root - INFO - step: 15615 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9198 +[titan] 2025-10-05 08:10:11,374 - root - INFO - lr: 3.5504e-05 gnorm: 1.05 [ 9:36:01<14:59:32] +[titan] 2025-10-05 08:10:22,287 - root - INFO - step: 15620 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9771 +[titan] 2025-10-05 08:10:22,287 - root - INFO - lr: 3.5496e-05 gnorm: 1.08 [ 9:36:12<14:59:21] +[titan] 2025-10-05 08:10:33,166 - root - INFO - step: 15625 loss: 2.2767 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 08:10:33,166 - root - INFO - lr: 3.5487e-05 gnorm: 1.12 [ 9:36:23<14:59:10] +[titan] 2025-10-05 08:10:44,056 - root - INFO - step: 15630 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 08:10:44,056 - root - INFO - lr: 3.5479e-05 gnorm: 1.07 [ 9:36:34<14:58:58] +[titan] 2025-10-05 08:10:54,998 - root - INFO - step: 15635 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 08:10:54,998 - root - INFO - lr: 3.5471e-05 gnorm: 1.07 [ 9:36:45<14:58:47] +[titan] 2025-10-05 08:11:05,867 - root - INFO - step: 15640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:11:05,867 - root - INFO - lr: 3.5462e-05 gnorm: 1.08 [ 9:36:56<14:58:36] +[titan] 2025-10-05 08:11:16,727 - root - INFO - step: 15645 loss: 2.1832 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:11:16,728 - root - INFO - lr: 3.5454e-05 gnorm: 1.08 [ 9:37:07<14:58:24] +[titan] 2025-10-05 08:11:25,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:11:27,605 - root - INFO - step: 15650 loss: 2.1882 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:11:27,605 - root - INFO - lr: 3.5445e-05 gnorm: 1.03 [ 9:37:17<14:58:13] +[titan] 2025-10-05 08:11:38,458 - root - INFO - step: 15655 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:11:38,458 - root - INFO - lr: 3.5437e-05 gnorm: 1.05 [ 9:37:28<14:58:02] +[titan] 2025-10-05 08:11:49,330 - root - INFO - step: 15660 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 08:11:49,330 - root - INFO - lr: 3.5429e-05 gnorm: 1.11 [ 9:37:39<14:57:50] +[titan] 2025-10-05 08:12:00,235 - root - INFO - step: 15665 loss: 2.1792 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9268 +[titan] 2025-10-05 08:12:00,235 - root - INFO - lr: 3.5420e-05 gnorm: 1.07 [ 9:37:50<14:57:39] +[titan] 2025-10-05 08:12:11,114 - root - INFO - step: 15670 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9779 +[titan] 2025-10-05 08:12:11,114 - root - INFO - lr: 3.5412e-05 gnorm: 1.03 [ 9:38:01<14:57:28] +[titan] 2025-10-05 08:12:21,997 - root - INFO - step: 15675 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 08:12:21,997 - root - INFO - lr: 3.5404e-05 gnorm: 1.07 [ 9:38:12<14:57:16] +[titan] 2025-10-05 08:12:32,868 - root - INFO - step: 15680 loss: 2.2075 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:12:32,868 - root - INFO - lr: 3.5395e-05 gnorm: 1.07 [ 9:38:23<14:57:05] +[titan] 2025-10-05 08:12:43,747 - root - INFO - step: 15685 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 08:12:43,747 - root - INFO - lr: 3.5387e-05 gnorm: 1.10 [ 9:38:34<14:56:54] +[titan] 2025-10-05 08:12:54,610 - root - INFO - step: 15690 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0090 +[titan] 2025-10-05 08:12:54,610 - root - INFO - lr: 3.5379e-05 gnorm: 1.08 [ 9:38:44<14:56:42] +[titan] 2025-10-05 08:13:05,466 - root - INFO - step: 15695 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 08:13:05,466 - root - INFO - lr: 3.5370e-05 gnorm: 1.06 [ 9:38:55<14:56:31] +[titan] 2025-10-05 08:13:14,193 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:13:16,388 - root - INFO - step: 15700 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0069 +[titan] 2025-10-05 08:13:16,388 - root - INFO - lr: 3.5362e-05 gnorm: 1.10 [ 9:39:06<14:56:20] +[titan] 2025-10-05 08:13:27,274 - root - INFO - step: 15705 loss: 2.2396 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9780 +[titan] 2025-10-05 08:13:27,275 - root - INFO - lr: 3.5354e-05 gnorm: 1.10 [ 9:39:17<14:56:08] +[titan] 2025-10-05 08:13:38,135 - root - INFO - step: 15710 loss: 2.2474 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 08:13:38,136 - root - INFO - lr: 3.5345e-05 gnorm: 1.12 [ 9:39:28<14:55:57] +[titan] 2025-10-05 08:13:49,010 - root - INFO - step: 15715 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9203 +[titan] 2025-10-05 08:13:49,010 - root - INFO - lr: 3.5337e-05 gnorm: 1.10 [ 9:39:39<14:55:45] +[titan] 2025-10-05 08:13:59,874 - root - INFO - step: 15720 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9736 +[titan] 2025-10-05 08:13:59,875 - root - INFO - lr: 3.5328e-05 gnorm: 1.11 [ 9:39:50<14:55:34] +[titan] 2025-10-05 08:14:10,743 - root - INFO - step: 15725 loss: 2.2138 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9568 +[titan] 2025-10-05 08:14:10,743 - root - INFO - lr: 3.5320e-05 gnorm: 1.10 [ 9:40:01<14:55:23] +[titan] 2025-10-05 08:14:21,639 - root - INFO - step: 15730 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 08:14:21,640 - root - INFO - lr: 3.5312e-05 gnorm: 1.09 [ 9:40:12<14:55:11] +[titan] 2025-10-05 08:14:32,539 - root - INFO - step: 15735 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 08:14:32,539 - root - INFO - lr: 3.5303e-05 gnorm: 1.05 [ 9:40:22<14:55:00] +[titan] 2025-10-05 08:14:43,413 - root - INFO - step: 15740 loss: 2.2798 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:14:43,413 - root - INFO - lr: 3.5295e-05 gnorm: 1.11 [ 9:40:33<14:54:49] +[titan] 2025-10-05 08:14:54,293 - root - INFO - step: 15745 loss: 2.2448 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9833 +[titan] 2025-10-05 08:14:54,293 - root - INFO - lr: 3.5287e-05 gnorm: 1.15 [ 9:40:44<14:54:37] +[titan] 2025-10-05 08:15:02,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:15:05,161 - root - INFO - step: 15750 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9407 +[titan] 2025-10-05 08:15:05,161 - root - INFO - lr: 3.5278e-05 gnorm: 1.07 [ 9:40:55<14:54:26] +[titan] 2025-10-05 08:15:16,026 - root - INFO - step: 15755 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9732 +[titan] 2025-10-05 08:15:16,026 - root - INFO - lr: 3.5270e-05 gnorm: 1.08 [ 9:41:06<14:54:15] +[titan] 2025-10-05 08:15:26,950 - root - INFO - step: 15760 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9790 +[titan] 2025-10-05 08:15:26,951 - root - INFO - lr: 3.5261e-05 gnorm: 1.13 [ 9:41:17<14:54:03] +[titan] 2025-10-05 08:15:37,835 - root - INFO - step: 15765 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0588 +[titan] 2025-10-05 08:15:37,835 - root - INFO - lr: 3.5253e-05 gnorm: 1.07 [ 9:41:28<14:53:52] +[titan] 2025-10-05 08:15:48,693 - root - INFO - step: 15770 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:15:48,694 - root - INFO - lr: 3.5245e-05 gnorm: 1.07 [ 9:41:39<14:53:41] +[titan] 2025-10-05 08:15:59,558 - root - INFO - step: 15775 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 08:15:59,558 - root - INFO - lr: 3.5236e-05 gnorm: 1.09 [ 9:41:49<14:53:29] +[titan] 2025-10-05 08:16:10,424 - root - INFO - step: 15780 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:16:10,424 - root - INFO - lr: 3.5228e-05 gnorm: 1.07 [ 9:42:00<14:53:18] +[titan] 2025-10-05 08:16:21,284 - root - INFO - step: 15785 loss: 2.2235 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9639 +[titan] 2025-10-05 08:16:21,284 - root - INFO - lr: 3.5220e-05 gnorm: 1.08 [ 9:42:11<14:53:06] +[titan] 2025-10-05 08:16:32,182 - root - INFO - step: 15790 loss: 2.2629 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 08:16:32,182 - root - INFO - lr: 3.5211e-05 gnorm: 1.05 [ 9:42:22<14:52:55] +[titan] 2025-10-05 08:16:43,101 - root - INFO - step: 15795 loss: 2.1715 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9180 +[titan] 2025-10-05 08:16:43,101 - root - INFO - lr: 3.5203e-05 gnorm: 1.08 [ 9:42:33<14:52:44] +[titan] 2025-10-05 08:16:51,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:16:53,985 - root - INFO - step: 15800 loss: 2.2694 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 08:16:53,985 - root - INFO - lr: 3.5194e-05 gnorm: 1.09 [ 9:42:44<14:52:32] +[titan] 2025-10-05 08:17:04,888 - root - INFO - step: 15805 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 08:17:04,888 - root - INFO - lr: 3.5186e-05 gnorm: 1.07 [ 9:42:55<14:52:21] +[titan] 2025-10-05 08:17:15,782 - root - INFO - step: 15810 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9612 +[titan] 2025-10-05 08:17:15,782 - root - INFO - lr: 3.5178e-05 gnorm: 1.11 [ 9:43:06<14:52:10] +[titan] 2025-10-05 08:17:26,682 - root - INFO - step: 15815 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 08:17:26,683 - root - INFO - lr: 3.5169e-05 gnorm: 1.09 [ 9:43:17<14:51:59] +[titan] 2025-10-05 08:17:37,542 - root - INFO - step: 15820 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9730 +[titan] 2025-10-05 08:17:37,542 - root - INFO - lr: 3.5161e-05 gnorm: 1.11 [ 9:43:27<14:51:47] +[titan] 2025-10-05 08:17:48,471 - root - INFO - step: 15825 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 08:17:48,471 - root - INFO - lr: 3.5152e-05 gnorm: 1.07 [ 9:43:38<14:51:36] +[titan] 2025-10-05 08:17:59,372 - root - INFO - step: 15830 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0266 +[titan] 2025-10-05 08:17:59,373 - root - INFO - lr: 3.5144e-05 gnorm: 1.07 [ 9:43:49<14:51:25] +[titan] 2025-10-05 08:18:10,255 - root - INFO - step: 15835 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9594 +[titan] 2025-10-05 08:18:10,256 - root - INFO - lr: 3.5136e-05 gnorm: 1.11 [ 9:44:00<14:51:13] +[titan] 2025-10-05 08:18:21,144 - root - INFO - step: 15840 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 08:18:21,144 - root - INFO - lr: 3.5127e-05 gnorm: 1.07 [ 9:44:11<14:51:02] +[titan] 2025-10-05 08:18:32,042 - root - INFO - step: 15845 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 08:18:32,043 - root - INFO - lr: 3.5119e-05 gnorm: 1.07 [ 9:44:22<14:50:51] +[titan] 2025-10-05 08:18:40,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:18:42,921 - root - INFO - step: 15850 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9352 +[titan] 2025-10-05 08:18:42,921 - root - INFO - lr: 3.5111e-05 gnorm: 1.08 [ 9:44:33<14:50:39] +[titan] 2025-10-05 08:18:53,795 - root - INFO - step: 15855 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 08:18:53,795 - root - INFO - lr: 3.5102e-05 gnorm: 1.15 [ 9:44:44<14:50:28] +[titan] 2025-10-05 08:19:04,726 - root - INFO - step: 15860 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0324 +[titan] 2025-10-05 08:19:04,726 - root - INFO - lr: 3.5094e-05 gnorm: 1.13 [ 9:44:55<14:50:17] +[titan] 2025-10-05 08:19:15,610 - root - INFO - step: 15865 loss: 2.2234 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 08:19:15,610 - root - INFO - lr: 3.5085e-05 gnorm: 1.07 [ 9:45:05<14:50:05] +[titan] 2025-10-05 08:19:26,577 - root - INFO - step: 15870 loss: 2.2122 memory: 118.84GiB(85.28%) tps: 29,880 tflops: 414.54 mfu: 41.92% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:19:26,577 - root - INFO - lr: 3.5077e-05 gnorm: 1.09 [ 9:45:16<14:49:54] +[titan] 2025-10-05 08:19:31,107 - root - INFO - Dumping profiler traces at step 15872 +[titan] 2025-10-05 08:19:31,146 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:19:37,696 - root - INFO - step: 15875 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 08:19:37,697 - root - INFO - lr: 3.5068e-05 gnorm: 1.05 [ 9:45:28<14:49:43] +[titan] 2025-10-05 08:19:48,571 - root - INFO - step: 15880 loss: 2.2001 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9437 +[titan] 2025-10-05 08:19:48,571 - root - INFO - lr: 3.5060e-05 gnorm: 1.08 [ 9:45:38<14:49:32] +[titan] 2025-10-05 08:19:59,444 - root - INFO - step: 15885 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9707 +[titan] 2025-10-05 08:19:59,444 - root - INFO - lr: 3.5052e-05 gnorm: 1.06 [ 9:45:49<14:49:20] +[titan] 2025-10-05 08:20:10,353 - root - INFO - step: 15890 loss: 2.2269 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 08:20:10,354 - root - INFO - lr: 3.5043e-05 gnorm: 1.09 [ 9:46:00<14:49:09] +[titan] 2025-10-05 08:20:21,229 - root - INFO - step: 15895 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9759 +[titan] 2025-10-05 08:20:21,230 - root - INFO - lr: 3.5035e-05 gnorm: 1.07 [ 9:46:11<14:48:58] +[titan] 2025-10-05 08:20:29,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:20:32,140 - root - INFO - step: 15900 loss: 2.1957 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 08:20:32,140 - root - INFO - lr: 3.5026e-05 gnorm: 1.09 [ 9:46:22<14:48:46] +[titan] 2025-10-05 08:20:43,027 - root - INFO - step: 15905 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:20:43,027 - root - INFO - lr: 3.5018e-05 gnorm: 1.06 [ 9:46:33<14:48:35] +[titan] 2025-10-05 08:20:53,932 - root - INFO - step: 15910 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 08:20:53,932 - root - INFO - lr: 3.5010e-05 gnorm: 1.11 [ 9:46:44<14:48:24] +[titan] 2025-10-05 08:21:04,803 - root - INFO - step: 15915 loss: 2.1550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9045 +[titan] 2025-10-05 08:21:04,803 - root - INFO - lr: 3.5001e-05 gnorm: 1.05 [ 9:46:55<14:48:12] +[titan] 2025-10-05 08:21:15,707 - root - INFO - step: 15920 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 08:21:15,707 - root - INFO - lr: 3.4993e-05 gnorm: 1.08 [ 9:47:06<14:48:01] +[titan] 2025-10-05 08:21:26,598 - root - INFO - step: 15925 loss: 2.2282 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 08:21:26,599 - root - INFO - lr: 3.4984e-05 gnorm: 1.10 [ 9:47:16<14:47:50] +[titan] 2025-10-05 08:21:37,540 - root - INFO - step: 15930 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0083 +[titan] 2025-10-05 08:21:37,540 - root - INFO - lr: 3.4976e-05 gnorm: 1.11 [ 9:47:27<14:47:39] +[titan] 2025-10-05 08:21:48,426 - root - INFO - step: 15935 loss: 2.2034 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9455 +[titan] 2025-10-05 08:21:48,426 - root - INFO - lr: 3.4968e-05 gnorm: 1.07 [ 9:47:38<14:47:27] +[titan] 2025-10-05 08:21:59,298 - root - INFO - step: 15940 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9502 +[titan] 2025-10-05 08:21:59,298 - root - INFO - lr: 3.4959e-05 gnorm: 1.07 [ 9:47:49<14:47:16] +[titan] 2025-10-05 08:22:10,199 - root - INFO - step: 15945 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9384 +[titan] 2025-10-05 08:22:10,199 - root - INFO - lr: 3.4951e-05 gnorm: 1.09 [ 9:48:00<14:47:05] +[titan] 2025-10-05 08:22:18,903 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:22:21,106 - root - INFO - step: 15950 loss: 2.2603 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9966 +[titan] 2025-10-05 08:22:21,106 - root - INFO - lr: 3.4942e-05 gnorm: 1.06 [ 9:48:11<14:46:53] +[titan] 2025-10-05 08:22:32,066 - root - INFO - step: 15955 loss: 2.1766 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 08:22:32,066 - root - INFO - lr: 3.4934e-05 gnorm: 1.07 [ 9:48:22<14:46:42] +[titan] 2025-10-05 08:22:42,935 - root - INFO - step: 15960 loss: 2.2164 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:22:42,935 - root - INFO - lr: 3.4925e-05 gnorm: 1.06 [ 9:48:33<14:46:31] +[titan] 2025-10-05 08:22:53,820 - root - INFO - step: 15965 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 08:22:53,820 - root - INFO - lr: 3.4917e-05 gnorm: 1.07 [ 9:48:44<14:46:19] +[titan] 2025-10-05 08:23:04,735 - root - INFO - step: 15970 loss: 2.2899 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0224 +[titan] 2025-10-05 08:23:04,735 - root - INFO - lr: 3.4909e-05 gnorm: 1.17 [ 9:48:55<14:46:08] +[titan] 2025-10-05 08:23:15,637 - root - INFO - step: 15975 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9298 +[titan] 2025-10-05 08:23:15,637 - root - INFO - lr: 3.4900e-05 gnorm: 1.05 [ 9:49:05<14:45:57] +[titan] 2025-10-05 08:23:26,529 - root - INFO - step: 15980 loss: 2.2468 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 08:23:26,529 - root - INFO - lr: 3.4892e-05 gnorm: 1.08 [ 9:49:16<14:45:45] +[titan] 2025-10-05 08:23:37,517 - root - INFO - step: 15985 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.76 mfu: 41.84% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:23:37,517 - root - INFO - lr: 3.4883e-05 gnorm: 1.09 [ 9:49:27<14:45:34] +[titan] 2025-10-05 08:23:48,403 - root - INFO - step: 15990 loss: 2.2605 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 08:23:48,403 - root - INFO - lr: 3.4875e-05 gnorm: 1.12 [ 9:49:38<14:45:23] +[titan] 2025-10-05 08:23:59,284 - root - INFO - step: 15995 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:23:59,284 - root - INFO - lr: 3.4866e-05 gnorm: 1.08 [ 9:49:49<14:45:12] +[titan] 2025-10-05 08:24:07,984 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:24:10,169 - root - INFO - step: 16000 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9270 +[titan] 2025-10-05 08:24:10,169 - root - INFO - lr: 3.4858e-05 gnorm: 1.05 [ 9:50:00<14:45:00] +[titan] 2025-10-05 08:24:21,053 - root - INFO - step: 16005 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 08:24:21,053 - root - INFO - lr: 3.4850e-05 gnorm: 1.09 [ 9:50:11<14:44:49] +[titan] 2025-10-05 08:24:31,978 - root - INFO - step: 16010 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 08:24:31,979 - root - INFO - lr: 3.4841e-05 gnorm: 1.05 [ 9:50:22<14:44:38] +[titan] 2025-10-05 08:24:42,866 - root - INFO - step: 16015 loss: 2.2354 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:24:42,866 - root - INFO - lr: 3.4833e-05 gnorm: 1.04 [ 9:50:33<14:44:26] +[titan] 2025-10-05 08:24:53,773 - root - INFO - step: 16020 loss: 2.2147 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9542 +[titan] 2025-10-05 08:24:53,773 - root - INFO - lr: 3.4824e-05 gnorm: 1.08 [ 9:50:44<14:44:15] +[titan] 2025-10-05 08:25:04,656 - root - INFO - step: 16025 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 08:25:04,656 - root - INFO - lr: 3.4816e-05 gnorm: 1.08 [ 9:50:54<14:44:04] +[titan] 2025-10-05 08:25:15,527 - root - INFO - step: 16030 loss: 2.2616 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 08:25:15,527 - root - INFO - lr: 3.4807e-05 gnorm: 1.05 [ 9:51:05<14:43:52] +[titan] 2025-10-05 08:25:26,410 - root - INFO - step: 16035 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0174 +[titan] 2025-10-05 08:25:26,411 - root - INFO - lr: 3.4799e-05 gnorm: 1.10 [ 9:51:16<14:43:41] +[titan] 2025-10-05 08:25:37,315 - root - INFO - step: 16040 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9599 +[titan] 2025-10-05 08:25:37,315 - root - INFO - lr: 3.4790e-05 gnorm: 1.09 [ 9:51:27<14:43:30] +[titan] 2025-10-05 08:25:48,166 - root - INFO - step: 16045 loss: 2.2422 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:25:48,166 - root - INFO - lr: 3.4782e-05 gnorm: 1.07 [ 9:51:38<14:43:18] +[titan] 2025-10-05 08:25:56,883 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:25:59,065 - root - INFO - step: 16050 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0259 +[titan] 2025-10-05 08:25:59,065 - root - INFO - lr: 3.4774e-05 gnorm: 1.08 [ 9:51:49<14:43:07] +[titan] 2025-10-05 08:26:09,947 - root - INFO - step: 16055 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 08:26:09,947 - root - INFO - lr: 3.4765e-05 gnorm: 1.09 [ 9:52:00<14:42:56] +[titan] 2025-10-05 08:26:20,832 - root - INFO - step: 16060 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 08:26:20,832 - root - INFO - lr: 3.4757e-05 gnorm: 1.17 [ 9:52:11<14:42:44] +[titan] 2025-10-05 08:26:31,707 - root - INFO - step: 16065 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 08:26:31,707 - root - INFO - lr: 3.4748e-05 gnorm: 1.08 [ 9:52:22<14:42:33] +[titan] 2025-10-05 08:26:42,617 - root - INFO - step: 16070 loss: 2.2299 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 08:26:42,618 - root - INFO - lr: 3.4740e-05 gnorm: 1.09 [ 9:52:32<14:42:22] +[titan] 2025-10-05 08:26:53,494 - root - INFO - step: 16075 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9418 +[titan] 2025-10-05 08:26:53,495 - root - INFO - lr: 3.4731e-05 gnorm: 1.08 [ 9:52:43<14:42:10] +[titan] 2025-10-05 08:27:04,387 - root - INFO - step: 16080 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 08:27:04,387 - root - INFO - lr: 3.4723e-05 gnorm: 1.09 [ 9:52:54<14:41:59] +[titan] 2025-10-05 08:27:15,275 - root - INFO - step: 16085 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 08:27:15,276 - root - INFO - lr: 3.4714e-05 gnorm: 1.08 [ 9:53:05<14:41:48] +[titan] 2025-10-05 08:27:26,154 - root - INFO - step: 16090 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9521 +[titan] 2025-10-05 08:27:26,154 - root - INFO - lr: 3.4706e-05 gnorm: 1.05 [ 9:53:16<14:41:36] +[titan] 2025-10-05 08:27:37,046 - root - INFO - step: 16095 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:27:37,047 - root - INFO - lr: 3.4698e-05 gnorm: 1.07 [ 9:53:27<14:41:25] +[titan] 2025-10-05 08:27:45,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:27:47,929 - root - INFO - step: 16100 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 08:27:47,930 - root - INFO - lr: 3.4689e-05 gnorm: 1.08 [ 9:53:38<14:41:14] +[titan] 2025-10-05 08:27:58,796 - root - INFO - step: 16105 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9585 +[titan] 2025-10-05 08:27:58,796 - root - INFO - lr: 3.4681e-05 gnorm: 1.07 [ 9:53:49<14:41:02] +[titan] 2025-10-05 08:28:09,669 - root - INFO - step: 16110 loss: 2.2129 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9548 +[titan] 2025-10-05 08:28:09,669 - root - INFO - lr: 3.4672e-05 gnorm: 1.07 [ 9:53:59<14:40:51] +[titan] 2025-10-05 08:28:20,594 - root - INFO - step: 16115 loss: 2.1544 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:28:20,594 - root - INFO - lr: 3.4664e-05 gnorm: 1.05 [ 9:54:10<14:40:40] +[titan] 2025-10-05 08:28:31,485 - root - INFO - step: 16120 loss: 2.2760 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 08:28:31,485 - root - INFO - lr: 3.4655e-05 gnorm: 1.09 [ 9:54:21<14:40:29] +[titan] 2025-10-05 08:28:42,397 - root - INFO - step: 16125 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 08:28:42,398 - root - INFO - lr: 3.4647e-05 gnorm: 1.10 [ 9:54:32<14:40:17] +[titan] 2025-10-05 08:28:53,284 - root - INFO - step: 16130 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 08:28:53,284 - root - INFO - lr: 3.4638e-05 gnorm: 1.14 [ 9:54:43<14:40:06] +[titan] 2025-10-05 08:29:04,160 - root - INFO - step: 16135 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0235 +[titan] 2025-10-05 08:29:04,161 - root - INFO - lr: 3.4630e-05 gnorm: 1.09 [ 9:54:54<14:39:55] +[titan] 2025-10-05 08:29:15,049 - root - INFO - step: 16140 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 08:29:15,050 - root - INFO - lr: 3.4621e-05 gnorm: 1.06 [ 9:55:05<14:39:43] +[titan] 2025-10-05 08:29:25,956 - root - INFO - step: 16145 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9694 +[titan] 2025-10-05 08:29:25,956 - root - INFO - lr: 3.4613e-05 gnorm: 1.10 [ 9:55:16<14:39:32] +[titan] 2025-10-05 08:29:34,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:29:36,856 - root - INFO - step: 16150 loss: 2.1905 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 08:29:36,857 - root - INFO - lr: 3.4604e-05 gnorm: 1.12 [ 9:55:27<14:39:21] +[titan] 2025-10-05 08:29:47,747 - root - INFO - step: 16155 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 08:29:47,747 - root - INFO - lr: 3.4596e-05 gnorm: 1.06 [ 9:55:38<14:39:09] +[titan] 2025-10-05 08:29:58,621 - root - INFO - step: 16160 loss: 2.2108 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9539 +[titan] 2025-10-05 08:29:58,621 - root - INFO - lr: 3.4588e-05 gnorm: 1.06 [ 9:55:48<14:38:58] +[titan] 2025-10-05 08:30:09,500 - root - INFO - step: 16165 loss: 2.2802 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:30:09,500 - root - INFO - lr: 3.4579e-05 gnorm: 1.11 [ 9:55:59<14:38:47] +[titan] 2025-10-05 08:30:20,377 - root - INFO - step: 16170 loss: 2.2485 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9864 +[titan] 2025-10-05 08:30:20,377 - root - INFO - lr: 3.4571e-05 gnorm: 1.07 [ 9:56:10<14:38:35] +[titan] 2025-10-05 08:30:31,256 - root - INFO - step: 16175 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0133 +[titan] 2025-10-05 08:30:31,256 - root - INFO - lr: 3.4562e-05 gnorm: 1.07 [ 9:56:21<14:38:24] +[titan] 2025-10-05 08:30:42,181 - root - INFO - step: 16180 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 08:30:42,181 - root - INFO - lr: 3.4554e-05 gnorm: 1.08 [ 9:56:32<14:38:13] +[titan] 2025-10-05 08:30:53,053 - root - INFO - step: 16185 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 08:30:53,053 - root - INFO - lr: 3.4545e-05 gnorm: 1.05 [ 9:56:43<14:38:01] +[titan] 2025-10-05 08:31:03,931 - root - INFO - step: 16190 loss: 2.1765 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9234 +[titan] 2025-10-05 08:31:03,931 - root - INFO - lr: 3.4537e-05 gnorm: 1.08 [ 9:56:54<14:37:50] +[titan] 2025-10-05 08:31:14,795 - root - INFO - step: 16195 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9284 +[titan] 2025-10-05 08:31:14,796 - root - INFO - lr: 3.4528e-05 gnorm: 1.09 [ 9:57:05<14:37:39] +[titan] 2025-10-05 08:31:23,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:31:25,652 - root - INFO - step: 16200 loss: 2.3077 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0390 +[titan] 2025-10-05 08:31:25,653 - root - INFO - lr: 3.4520e-05 gnorm: 1.10 [ 9:57:15<14:37:27] +[titan] 2025-10-05 08:31:36,508 - root - INFO - step: 16205 loss: 2.2864 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0206 +[titan] 2025-10-05 08:31:36,508 - root - INFO - lr: 3.4511e-05 gnorm: 1.04 [ 9:57:26<14:37:16] +[titan] 2025-10-05 08:31:47,457 - root - INFO - step: 16210 loss: 2.2341 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 08:31:47,457 - root - INFO - lr: 3.4503e-05 gnorm: 1.09 [ 9:57:37<14:37:05] +[titan] 2025-10-05 08:31:58,346 - root - INFO - step: 16215 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 08:31:58,346 - root - INFO - lr: 3.4494e-05 gnorm: 1.08 [ 9:57:48<14:36:54] +[titan] 2025-10-05 08:32:09,203 - root - INFO - step: 16220 loss: 2.1804 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 08:32:09,204 - root - INFO - lr: 3.4486e-05 gnorm: 1.07 [ 9:57:59<14:36:42] +[titan] 2025-10-05 08:32:20,094 - root - INFO - step: 16225 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 08:32:20,094 - root - INFO - lr: 3.4477e-05 gnorm: 1.07 [ 9:58:10<14:36:31] +[titan] 2025-10-05 08:32:30,976 - root - INFO - step: 16230 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9488 +[titan] 2025-10-05 08:32:30,977 - root - INFO - lr: 3.4469e-05 gnorm: 1.05 [ 9:58:21<14:36:20] +[titan] 2025-10-05 08:32:41,910 - root - INFO - step: 16235 loss: 2.2424 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:32:41,910 - root - INFO - lr: 3.4460e-05 gnorm: 1.06 [ 9:58:32<14:36:08] +[titan] 2025-10-05 08:32:52,835 - root - INFO - step: 16240 loss: 2.1658 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9139 +[titan] 2025-10-05 08:32:52,835 - root - INFO - lr: 3.4452e-05 gnorm: 1.04 [ 9:58:43<14:35:57] +[titan] 2025-10-05 08:33:03,725 - root - INFO - step: 16245 loss: 2.2254 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:33:03,725 - root - INFO - lr: 3.4443e-05 gnorm: 1.08 [ 9:58:54<14:35:46] +[titan] 2025-10-05 08:33:12,442 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:33:14,633 - root - INFO - step: 16250 loss: 2.2316 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 08:33:14,634 - root - INFO - lr: 3.4435e-05 gnorm: 1.10 [ 9:59:04<14:35:34] +[titan] 2025-10-05 08:33:25,534 - root - INFO - step: 16255 loss: 2.3076 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0385 +[titan] 2025-10-05 08:33:25,534 - root - INFO - lr: 3.4426e-05 gnorm: 1.10 [ 9:59:15<14:35:23] +[titan] 2025-10-05 08:33:36,432 - root - INFO - step: 16260 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 08:33:36,433 - root - INFO - lr: 3.4418e-05 gnorm: 1.13 [ 9:59:26<14:35:12] +[titan] 2025-10-05 08:33:47,313 - root - INFO - step: 16265 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9849 +[titan] 2025-10-05 08:33:47,313 - root - INFO - lr: 3.4409e-05 gnorm: 1.10 [ 9:59:37<14:35:01] +[titan] 2025-10-05 08:33:58,157 - root - INFO - step: 16270 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:33:58,157 - root - INFO - lr: 3.4401e-05 gnorm: 1.09 [ 9:59:48<14:34:49] +[titan] 2025-10-05 08:34:09,059 - root - INFO - step: 16275 loss: 2.2042 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 08:34:09,059 - root - INFO - lr: 3.4392e-05 gnorm: 1.05 [ 9:59:59<14:34:38] +[titan] 2025-10-05 08:34:19,912 - root - INFO - step: 16280 loss: 2.2416 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:34:19,912 - root - INFO - lr: 3.4384e-05 gnorm: 1.07 [10:00:10<14:34:26] +[titan] 2025-10-05 08:34:30,777 - root - INFO - step: 16285 loss: 2.1576 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:34:30,777 - root - INFO - lr: 3.4375e-05 gnorm: 1.09 [10:00:21<14:34:15] +[titan] 2025-10-05 08:34:41,653 - root - INFO - step: 16290 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 08:34:41,653 - root - INFO - lr: 3.4367e-05 gnorm: 1.05 [10:00:31<14:34:04] +[titan] 2025-10-05 08:34:52,516 - root - INFO - step: 16295 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 08:34:52,516 - root - INFO - lr: 3.4358e-05 gnorm: 1.05 [10:00:42<14:33:52] +[titan] 2025-10-05 08:35:01,184 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:35:03,369 - root - INFO - step: 16300 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0097 +[titan] 2025-10-05 08:35:03,370 - root - INFO - lr: 3.4350e-05 gnorm: 1.13 [10:00:53<14:33:41] +[titan] 2025-10-05 08:35:14,258 - root - INFO - step: 16305 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:35:14,258 - root - INFO - lr: 3.4341e-05 gnorm: 1.10 [10:01:04<14:33:30] +[titan] 2025-10-05 08:35:25,117 - root - INFO - step: 16310 loss: 2.2039 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 08:35:25,117 - root - INFO - lr: 3.4333e-05 gnorm: 1.07 [10:01:15<14:33:18] +[titan] 2025-10-05 08:35:35,923 - root - INFO - step: 16315 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:35:35,923 - root - INFO - lr: 3.4324e-05 gnorm: 1.06 [10:01:26<14:33:07] +[titan] 2025-10-05 08:35:46,803 - root - INFO - step: 16320 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9004 +[titan] 2025-10-05 08:35:46,803 - root - INFO - lr: 3.4316e-05 gnorm: 1.06 [10:01:37<14:32:56] +[titan] 2025-10-05 08:35:57,651 - root - INFO - step: 16325 loss: 2.2716 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0076 +[titan] 2025-10-05 08:35:57,651 - root - INFO - lr: 3.4307e-05 gnorm: 1.08 [10:01:47<14:32:44] +[titan] 2025-10-05 08:36:08,474 - root - INFO - step: 16330 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8746 +[titan] 2025-10-05 08:36:08,474 - root - INFO - lr: 3.4299e-05 gnorm: 1.05 [10:01:58<14:32:33] +[titan] 2025-10-05 08:36:19,326 - root - INFO - step: 16335 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 08:36:19,326 - root - INFO - lr: 3.4290e-05 gnorm: 1.05 [10:02:09<14:32:22] +[titan] 2025-10-05 08:36:30,202 - root - INFO - step: 16340 loss: 2.2109 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9528 +[titan] 2025-10-05 08:36:30,202 - root - INFO - lr: 3.4282e-05 gnorm: 1.09 [10:02:20<14:32:10] +[titan] 2025-10-05 08:36:41,056 - root - INFO - step: 16345 loss: 2.2287 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9688 +[titan] 2025-10-05 08:36:41,056 - root - INFO - lr: 3.4273e-05 gnorm: 1.09 [10:02:31<14:31:59] +[titan] 2025-10-05 08:36:49,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:36:51,933 - root - INFO - step: 16350 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 08:36:51,933 - root - INFO - lr: 3.4265e-05 gnorm: 1.08 [10:02:42<14:31:48] +[titan] 2025-10-05 08:37:02,815 - root - INFO - step: 16355 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0175 +[titan] 2025-10-05 08:37:02,815 - root - INFO - lr: 3.4256e-05 gnorm: 1.09 [10:02:53<14:31:36] +[titan] 2025-10-05 08:37:13,670 - root - INFO - step: 16360 loss: 2.1862 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:37:13,671 - root - INFO - lr: 3.4248e-05 gnorm: 1.04 [10:03:03<14:31:25] +[titan] 2025-10-05 08:37:24,518 - root - INFO - step: 16365 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:37:24,518 - root - INFO - lr: 3.4239e-05 gnorm: 1.12 [10:03:14<14:31:14] +[titan] 2025-10-05 08:37:35,400 - root - INFO - step: 16370 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9962 +[titan] 2025-10-05 08:37:35,401 - root - INFO - lr: 3.4231e-05 gnorm: 1.08 [10:03:25<14:31:02] +[titan] 2025-10-05 08:37:46,321 - root - INFO - step: 16375 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 08:37:46,321 - root - INFO - lr: 3.4222e-05 gnorm: 1.06 [10:03:36<14:30:51] +[titan] 2025-10-05 08:37:57,173 - root - INFO - step: 16380 loss: 2.2402 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9783 +[titan] 2025-10-05 08:37:57,173 - root - INFO - lr: 3.4214e-05 gnorm: 1.11 [10:03:47<14:30:40] +[titan] 2025-10-05 08:38:06,135 - root - INFO - Dumping profiler traces at step 16384 +[titan] 2025-10-05 08:38:06,186 - root - INFO - Finished dumping profiler traces in 0.05 seconds +[titan] 2025-10-05 08:38:08,373 - root - INFO - step: 16385 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 29,258 tflops: 405.90 mfu: 41.04% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9247 +[titan] 2025-10-05 08:38:08,373 - root - INFO - lr: 3.4205e-05 gnorm: 1.11 [10:03:58<14:30:29] +[titan] 2025-10-05 08:38:19,239 - root - INFO - step: 16390 loss: 2.2560 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 08:38:19,239 - root - INFO - lr: 3.4197e-05 gnorm: 1.08 [10:04:09<14:30:17] +[titan] 2025-10-05 08:38:30,091 - root - INFO - step: 16395 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 08:38:30,091 - root - INFO - lr: 3.4188e-05 gnorm: 1.06 [10:04:20<14:30:06] +[titan] 2025-10-05 08:38:38,778 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:38:41,007 - root - INFO - step: 16400 loss: 2.1921 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 08:38:41,007 - root - INFO - lr: 3.4180e-05 gnorm: 1.12 [10:04:31<14:29:55] +[titan] 2025-10-05 08:38:51,898 - root - INFO - step: 16405 loss: 2.2523 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9902 +[titan] 2025-10-05 08:38:51,898 - root - INFO - lr: 3.4171e-05 gnorm: 1.10 [10:04:42<14:29:43] +[titan] 2025-10-05 08:39:02,751 - root - INFO - step: 16410 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 08:39:02,751 - root - INFO - lr: 3.4163e-05 gnorm: 1.10 [10:04:53<14:29:32] +[titan] 2025-10-05 08:39:13,601 - root - INFO - step: 16415 loss: 2.1622 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 08:39:13,601 - root - INFO - lr: 3.4154e-05 gnorm: 1.06 [10:05:03<14:29:21] +[titan] 2025-10-05 08:39:24,471 - root - INFO - step: 16420 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9579 +[titan] 2025-10-05 08:39:24,472 - root - INFO - lr: 3.4146e-05 gnorm: 1.06 [10:05:14<14:29:09] +[titan] 2025-10-05 08:39:35,332 - root - INFO - step: 16425 loss: 2.1912 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9364 +[titan] 2025-10-05 08:39:35,333 - root - INFO - lr: 3.4137e-05 gnorm: 1.06 [10:05:25<14:28:58] +[titan] 2025-10-05 08:39:46,223 - root - INFO - step: 16430 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 08:39:46,223 - root - INFO - lr: 3.4129e-05 gnorm: 1.07 [10:05:36<14:28:47] +[titan] 2025-10-05 08:39:57,116 - root - INFO - step: 16435 loss: 2.2229 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9601 +[titan] 2025-10-05 08:39:57,116 - root - INFO - lr: 3.4120e-05 gnorm: 1.10 [10:05:47<14:28:36] +[titan] 2025-10-05 08:40:07,956 - root - INFO - step: 16440 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9241 +[titan] 2025-10-05 08:40:07,956 - root - INFO - lr: 3.4111e-05 gnorm: 1.06 [10:05:58<14:28:24] +[titan] 2025-10-05 08:40:18,791 - root - INFO - step: 16445 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:40:18,792 - root - INFO - lr: 3.4103e-05 gnorm: 1.08 [10:06:09<14:28:13] +[titan] 2025-10-05 08:40:27,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:40:29,625 - root - INFO - step: 16450 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9165 +[titan] 2025-10-05 08:40:29,626 - root - INFO - lr: 3.4094e-05 gnorm: 1.09 [10:06:19<14:28:01] +[titan] 2025-10-05 08:40:40,476 - root - INFO - step: 16455 loss: 2.1561 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 08:40:40,476 - root - INFO - lr: 3.4086e-05 gnorm: 1.05 [10:06:30<14:27:50] +[titan] 2025-10-05 08:40:51,351 - root - INFO - step: 16460 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.9013 +[titan] 2025-10-05 08:40:51,351 - root - INFO - lr: 3.4077e-05 gnorm: 1.06 [10:06:41<14:27:39] +[titan] 2025-10-05 08:41:02,252 - root - INFO - step: 16465 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:41:02,252 - root - INFO - lr: 3.4069e-05 gnorm: 1.05 [10:06:52<14:27:27] +[titan] 2025-10-05 08:41:13,112 - root - INFO - step: 16470 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9462 +[titan] 2025-10-05 08:41:13,113 - root - INFO - lr: 3.4060e-05 gnorm: 1.10 [10:07:03<14:27:16] +[titan] 2025-10-05 08:41:23,980 - root - INFO - step: 16475 loss: 2.2132 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:41:23,981 - root - INFO - lr: 3.4052e-05 gnorm: 1.05 [10:07:14<14:27:05] +[titan] 2025-10-05 08:41:34,850 - root - INFO - step: 16480 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 08:41:34,850 - root - INFO - lr: 3.4043e-05 gnorm: 1.07 [10:07:25<14:26:53] +[titan] 2025-10-05 08:41:45,728 - root - INFO - step: 16485 loss: 2.1837 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 08:41:45,728 - root - INFO - lr: 3.4035e-05 gnorm: 1.10 [10:07:36<14:26:42] +[titan] 2025-10-05 08:41:56,603 - root - INFO - step: 16490 loss: 2.2265 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 08:41:56,603 - root - INFO - lr: 3.4026e-05 gnorm: 1.08 [10:07:46<14:26:31] +[titan] 2025-10-05 08:42:07,468 - root - INFO - step: 16495 loss: 2.2288 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 08:42:07,468 - root - INFO - lr: 3.4018e-05 gnorm: 1.10 [10:07:57<14:26:19] +[titan] 2025-10-05 08:42:16,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:42:18,373 - root - INFO - step: 16500 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9920 +[titan] 2025-10-05 08:42:18,373 - root - INFO - lr: 3.4009e-05 gnorm: 1.10 [10:08:08<14:26:08] +[titan] 2025-10-05 08:42:29,248 - root - INFO - step: 16505 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 08:42:29,248 - root - INFO - lr: 3.4000e-05 gnorm: 1.06 [10:08:19<14:25:57] +[titan] 2025-10-05 08:42:40,112 - root - INFO - step: 16510 loss: 2.1951 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9387 +[titan] 2025-10-05 08:42:40,112 - root - INFO - lr: 3.3992e-05 gnorm: 1.06 [10:08:30<14:25:46] +[titan] 2025-10-05 08:42:51,000 - root - INFO - step: 16515 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9731 +[titan] 2025-10-05 08:42:51,000 - root - INFO - lr: 3.3983e-05 gnorm: 1.06 [10:08:41<14:25:34] +[titan] 2025-10-05 08:43:01,864 - root - INFO - step: 16520 loss: 2.2392 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 1.9746 +[titan] 2025-10-05 08:43:01,864 - root - INFO - lr: 3.3975e-05 gnorm: 1.07 [10:08:52<14:25:23] +[titan] 2025-10-05 08:43:12,727 - root - INFO - step: 16525 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0203 +[titan] 2025-10-05 08:43:12,727 - root - INFO - lr: 3.3966e-05 gnorm: 1.13 [10:09:03<14:25:12] +[titan] 2025-10-05 08:43:23,632 - root - INFO - step: 16530 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 08:43:23,632 - root - INFO - lr: 3.3958e-05 gnorm: 1.08 [10:09:13<14:25:00] +[titan] 2025-10-05 08:43:34,515 - root - INFO - step: 16535 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:43:34,515 - root - INFO - lr: 3.3949e-05 gnorm: 1.08 [10:09:24<14:24:49] +[titan] 2025-10-05 08:43:45,404 - root - INFO - step: 16540 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 08:43:45,404 - root - INFO - lr: 3.3941e-05 gnorm: 1.14 [10:09:35<14:24:38] +[titan] 2025-10-05 08:43:56,319 - root - INFO - step: 16545 loss: 2.1857 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 08:43:56,319 - root - INFO - lr: 3.3932e-05 gnorm: 1.07 [10:09:46<14:24:26] +[titan] 2025-10-05 08:44:05,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:44:07,197 - root - INFO - step: 16550 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 08:44:07,197 - root - INFO - lr: 3.3924e-05 gnorm: 1.05 [10:09:57<14:24:15] +[titan] 2025-10-05 08:44:18,066 - root - INFO - step: 16555 loss: 2.2226 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9632 +[titan] 2025-10-05 08:44:18,066 - root - INFO - lr: 3.3915e-05 gnorm: 1.09 [10:10:08<14:24:04] +[titan] 2025-10-05 08:44:28,972 - root - INFO - step: 16560 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 08:44:28,972 - root - INFO - lr: 3.3906e-05 gnorm: 1.05 [10:10:19<14:23:53] +[titan] 2025-10-05 08:44:39,817 - root - INFO - step: 16565 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9362 +[titan] 2025-10-05 08:44:39,817 - root - INFO - lr: 3.3898e-05 gnorm: 1.07 [10:10:30<14:23:41] +[titan] 2025-10-05 08:44:50,691 - root - INFO - step: 16570 loss: 2.1798 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9260 +[titan] 2025-10-05 08:44:50,691 - root - INFO - lr: 3.3889e-05 gnorm: 1.08 [10:10:40<14:23:30] +[titan] 2025-10-05 08:45:01,549 - root - INFO - step: 16575 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:45:01,549 - root - INFO - lr: 3.3881e-05 gnorm: 1.05 [10:10:51<14:23:19] +[titan] 2025-10-05 08:45:12,413 - root - INFO - step: 16580 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:45:12,413 - root - INFO - lr: 3.3872e-05 gnorm: 1.08 [10:11:02<14:23:07] +[titan] 2025-10-05 08:45:23,289 - root - INFO - step: 16585 loss: 2.1742 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9208 +[titan] 2025-10-05 08:45:23,289 - root - INFO - lr: 3.3864e-05 gnorm: 1.07 [10:11:13<14:22:56] +[titan] 2025-10-05 08:45:34,149 - root - INFO - step: 16590 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 08:45:34,149 - root - INFO - lr: 3.3855e-05 gnorm: 1.11 [10:11:24<14:22:45] +[titan] 2025-10-05 08:45:45,091 - root - INFO - step: 16595 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 08:45:45,091 - root - INFO - lr: 3.3847e-05 gnorm: 1.06 [10:11:35<14:22:33] +[titan] 2025-10-05 08:45:53,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:45:55,993 - root - INFO - step: 16600 loss: 2.1689 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9160 +[titan] 2025-10-05 08:45:55,993 - root - INFO - lr: 3.3838e-05 gnorm: 1.04 [10:11:46<14:22:22] +[titan] 2025-10-05 08:46:06,866 - root - INFO - step: 16605 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:46:06,866 - root - INFO - lr: 3.3829e-05 gnorm: 1.04 [10:11:57<14:22:11] +[titan] 2025-10-05 08:46:17,754 - root - INFO - step: 16610 loss: 2.2141 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 08:46:17,755 - root - INFO - lr: 3.3821e-05 gnorm: 1.09 [10:12:08<14:21:59] +[titan] 2025-10-05 08:46:28,629 - root - INFO - step: 16615 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9330 +[titan] 2025-10-05 08:46:28,629 - root - INFO - lr: 3.3812e-05 gnorm: 1.09 [10:12:18<14:21:48] +[titan] 2025-10-05 08:46:39,510 - root - INFO - step: 16620 loss: 2.1330 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 08:46:39,510 - root - INFO - lr: 3.3804e-05 gnorm: 1.07 [10:12:29<14:21:37] +[titan] 2025-10-05 08:46:50,420 - root - INFO - step: 16625 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9610 +[titan] 2025-10-05 08:46:50,420 - root - INFO - lr: 3.3795e-05 gnorm: 1.09 [10:12:40<14:21:26] +[titan] 2025-10-05 08:47:01,324 - root - INFO - step: 16630 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 08:47:01,324 - root - INFO - lr: 3.3787e-05 gnorm: 1.10 [10:12:51<14:21:14] +[titan] 2025-10-05 08:47:12,217 - root - INFO - step: 16635 loss: 2.1195 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 08:47:12,217 - root - INFO - lr: 3.3778e-05 gnorm: 1.09 [10:13:02<14:21:03] +[titan] 2025-10-05 08:47:23,110 - root - INFO - step: 16640 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:47:23,110 - root - INFO - lr: 3.3769e-05 gnorm: 1.12 [10:13:13<14:20:52] +[titan] 2025-10-05 08:47:34,010 - root - INFO - step: 16645 loss: 2.1744 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 08:47:34,010 - root - INFO - lr: 3.3761e-05 gnorm: 1.10 [10:13:24<14:20:40] +[titan] 2025-10-05 08:47:42,720 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:47:44,910 - root - INFO - step: 16650 loss: 2.1803 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 08:47:44,910 - root - INFO - lr: 3.3752e-05 gnorm: 1.11 [10:13:35<14:20:29] +[titan] 2025-10-05 08:47:55,812 - root - INFO - step: 16655 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 08:47:55,812 - root - INFO - lr: 3.3744e-05 gnorm: 1.10 [10:13:46<14:20:18] +[titan] 2025-10-05 08:48:06,738 - root - INFO - step: 16660 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0120 +[titan] 2025-10-05 08:48:06,738 - root - INFO - lr: 3.3735e-05 gnorm: 1.11 [10:13:57<14:20:07] +[titan] 2025-10-05 08:48:17,635 - root - INFO - step: 16665 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:48:17,635 - root - INFO - lr: 3.3727e-05 gnorm: 1.10 [10:14:07<14:19:55] +[titan] 2025-10-05 08:48:28,518 - root - INFO - step: 16670 loss: 2.2203 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:48:28,518 - root - INFO - lr: 3.3718e-05 gnorm: 1.10 [10:14:18<14:19:44] +[titan] 2025-10-05 08:48:39,418 - root - INFO - step: 16675 loss: 2.2253 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2593 global_avg_mtp_loss: 1.9660 +[titan] 2025-10-05 08:48:39,419 - root - INFO - lr: 3.3709e-05 gnorm: 1.14 [10:14:29<14:19:33] +[titan] 2025-10-05 08:48:50,307 - root - INFO - step: 16680 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 08:48:50,307 - root - INFO - lr: 3.3701e-05 gnorm: 1.09 [10:14:40<14:19:22] +[titan] 2025-10-05 08:49:01,231 - root - INFO - step: 16685 loss: 2.2071 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 08:49:01,231 - root - INFO - lr: 3.3692e-05 gnorm: 1.06 [10:14:51<14:19:10] +[titan] 2025-10-05 08:49:12,142 - root - INFO - step: 16690 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9875 +[titan] 2025-10-05 08:49:12,142 - root - INFO - lr: 3.3684e-05 gnorm: 1.05 [10:15:02<14:18:59] +[titan] 2025-10-05 08:49:23,035 - root - INFO - step: 16695 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0009 +[titan] 2025-10-05 08:49:23,035 - root - INFO - lr: 3.3675e-05 gnorm: 1.04 [10:15:13<14:18:48] +[titan] 2025-10-05 08:49:31,750 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:49:33,935 - root - INFO - step: 16700 loss: 2.1213 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8753 +[titan] 2025-10-05 08:49:33,935 - root - INFO - lr: 3.3667e-05 gnorm: 1.05 [10:15:24<14:18:37] +[titan] 2025-10-05 08:49:44,821 - root - INFO - step: 16705 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 08:49:44,821 - root - INFO - lr: 3.3658e-05 gnorm: 1.04 [10:15:35<14:18:25] +[titan] 2025-10-05 08:49:55,770 - root - INFO - step: 16710 loss: 2.1830 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9279 +[titan] 2025-10-05 08:49:55,770 - root - INFO - lr: 3.3649e-05 gnorm: 1.06 [10:15:46<14:18:14] +[titan] 2025-10-05 08:50:06,646 - root - INFO - step: 16715 loss: 2.1474 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 08:50:06,646 - root - INFO - lr: 3.3641e-05 gnorm: 1.05 [10:15:56<14:18:03] +[titan] 2025-10-05 08:50:17,562 - root - INFO - step: 16720 loss: 2.2478 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9859 +[titan] 2025-10-05 08:50:17,562 - root - INFO - lr: 3.3632e-05 gnorm: 1.08 [10:16:07<14:17:51] +[titan] 2025-10-05 08:50:28,447 - root - INFO - step: 16725 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 08:50:28,447 - root - INFO - lr: 3.3624e-05 gnorm: 1.03 [10:16:18<14:17:40] +[titan] 2025-10-05 08:50:39,327 - root - INFO - step: 16730 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 08:50:39,327 - root - INFO - lr: 3.3615e-05 gnorm: 1.07 [10:16:29<14:17:29] +[titan] 2025-10-05 08:50:50,218 - root - INFO - step: 16735 loss: 2.1919 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:50:50,218 - root - INFO - lr: 3.3606e-05 gnorm: 1.08 [10:16:40<14:17:18] +[titan] 2025-10-05 08:51:01,116 - root - INFO - step: 16740 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9342 +[titan] 2025-10-05 08:51:01,116 - root - INFO - lr: 3.3598e-05 gnorm: 1.01 [10:16:51<14:17:06] +[titan] 2025-10-05 08:51:11,988 - root - INFO - step: 16745 loss: 2.1719 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 08:51:11,988 - root - INFO - lr: 3.3589e-05 gnorm: 1.09 [10:17:02<14:16:55] +[titan] 2025-10-05 08:51:20,683 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:51:22,867 - root - INFO - step: 16750 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:51:22,867 - root - INFO - lr: 3.3581e-05 gnorm: 1.07 [10:17:13<14:16:44] +[titan] 2025-10-05 08:51:33,766 - root - INFO - step: 16755 loss: 2.1698 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:51:33,766 - root - INFO - lr: 3.3572e-05 gnorm: 1.08 [10:17:24<14:16:32] +[titan] 2025-10-05 08:51:44,647 - root - INFO - step: 16760 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 08:51:44,647 - root - INFO - lr: 3.3563e-05 gnorm: 1.07 [10:17:34<14:16:21] +[titan] 2025-10-05 08:51:55,539 - root - INFO - step: 16765 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 08:51:55,539 - root - INFO - lr: 3.3555e-05 gnorm: 1.08 [10:17:45<14:16:10] +[titan] 2025-10-05 08:52:06,452 - root - INFO - step: 16770 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9905 +[titan] 2025-10-05 08:52:06,452 - root - INFO - lr: 3.3546e-05 gnorm: 1.10 [10:17:56<14:15:59] +[titan] 2025-10-05 08:52:17,344 - root - INFO - step: 16775 loss: 2.2357 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 08:52:17,344 - root - INFO - lr: 3.3538e-05 gnorm: 1.12 [10:18:07<14:15:47] +[titan] 2025-10-05 08:52:28,243 - root - INFO - step: 16780 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 08:52:28,243 - root - INFO - lr: 3.3529e-05 gnorm: 1.05 [10:18:18<14:15:36] +[titan] 2025-10-05 08:52:39,158 - root - INFO - step: 16785 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 08:52:39,158 - root - INFO - lr: 3.3520e-05 gnorm: 1.08 [10:18:29<14:15:25] +[titan] 2025-10-05 08:52:50,027 - root - INFO - step: 16790 loss: 2.3254 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 08:52:50,027 - root - INFO - lr: 3.3512e-05 gnorm: 1.08 [10:18:40<14:15:13] +[titan] 2025-10-05 08:53:00,972 - root - INFO - step: 16795 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8815 +[titan] 2025-10-05 08:53:00,972 - root - INFO - lr: 3.3503e-05 gnorm: 1.05 [10:18:51<14:15:02] +[titan] 2025-10-05 08:53:09,655 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:53:11,847 - root - INFO - step: 16800 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9137 +[titan] 2025-10-05 08:53:11,847 - root - INFO - lr: 3.3495e-05 gnorm: 1.04 [10:19:02<14:14:51] +[titan] 2025-10-05 08:53:22,744 - root - INFO - step: 16805 loss: 2.2778 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0130 +[titan] 2025-10-05 08:53:22,744 - root - INFO - lr: 3.3486e-05 gnorm: 1.06 [10:19:13<14:14:40] +[titan] 2025-10-05 08:53:33,623 - root - INFO - step: 16810 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 08:53:33,623 - root - INFO - lr: 3.3477e-05 gnorm: 1.10 [10:19:23<14:14:28] +[titan] 2025-10-05 08:53:44,493 - root - INFO - step: 16815 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 08:53:44,493 - root - INFO - lr: 3.3469e-05 gnorm: 1.08 [10:19:34<14:14:17] +[titan] 2025-10-05 08:53:55,405 - root - INFO - step: 16820 loss: 2.3161 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2704 global_avg_mtp_loss: 2.0457 +[titan] 2025-10-05 08:53:55,405 - root - INFO - lr: 3.3460e-05 gnorm: 1.05 [10:19:45<14:14:06] +[titan] 2025-10-05 08:54:06,325 - root - INFO - step: 16825 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:54:06,325 - root - INFO - lr: 3.3452e-05 gnorm: 1.06 [10:19:56<14:13:55] +[titan] 2025-10-05 08:54:17,199 - root - INFO - step: 16830 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 08:54:17,199 - root - INFO - lr: 3.3443e-05 gnorm: 1.14 [10:20:07<14:13:43] +[titan] 2025-10-05 08:54:28,086 - root - INFO - step: 16835 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 08:54:28,087 - root - INFO - lr: 3.3434e-05 gnorm: 1.11 [10:20:18<14:13:32] +[titan] 2025-10-05 08:54:38,979 - root - INFO - step: 16840 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:54:38,979 - root - INFO - lr: 3.3426e-05 gnorm: 1.10 [10:20:29<14:13:21] +[titan] 2025-10-05 08:54:49,879 - root - INFO - step: 16845 loss: 2.2348 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9741 +[titan] 2025-10-05 08:54:49,879 - root - INFO - lr: 3.3417e-05 gnorm: 1.14 [10:20:40<14:13:09] +[titan] 2025-10-05 08:54:58,600 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:55:00,873 - root - INFO - step: 16850 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 08:55:00,873 - root - INFO - lr: 3.3409e-05 gnorm: 1.06 [10:20:51<14:12:58] +[titan] 2025-10-05 08:55:11,763 - root - INFO - step: 16855 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:55:11,763 - root - INFO - lr: 3.3400e-05 gnorm: 1.10 [10:21:02<14:12:47] +[titan] 2025-10-05 08:55:22,662 - root - INFO - step: 16860 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:55:22,662 - root - INFO - lr: 3.3391e-05 gnorm: 1.05 [10:21:12<14:12:36] +[titan] 2025-10-05 08:55:33,543 - root - INFO - step: 16865 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8902 +[titan] 2025-10-05 08:55:33,543 - root - INFO - lr: 3.3383e-05 gnorm: 1.08 [10:21:23<14:12:25] +[titan] 2025-10-05 08:55:44,433 - root - INFO - step: 16870 loss: 2.2119 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:55:44,433 - root - INFO - lr: 3.3374e-05 gnorm: 1.08 [10:21:34<14:12:13] +[titan] 2025-10-05 08:55:55,318 - root - INFO - step: 16875 loss: 2.2256 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:55:55,318 - root - INFO - lr: 3.3366e-05 gnorm: 1.09 [10:21:45<14:12:02] +[titan] 2025-10-05 08:56:06,283 - root - INFO - step: 16880 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:56:06,283 - root - INFO - lr: 3.3357e-05 gnorm: 1.08 [10:21:56<14:11:51] +[titan] 2025-10-05 08:56:17,168 - root - INFO - step: 16885 loss: 2.2361 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9760 +[titan] 2025-10-05 08:56:17,168 - root - INFO - lr: 3.3348e-05 gnorm: 1.07 [10:22:07<14:11:39] +[titan] 2025-10-05 08:56:28,070 - root - INFO - step: 16890 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:56:28,070 - root - INFO - lr: 3.3340e-05 gnorm: 1.03 [10:22:18<14:11:28] +[titan] 2025-10-05 08:56:39,053 - root - INFO - step: 16895 loss: 2.2559 memory: 118.84GiB(85.28%) tps: 29,836 tflops: 413.93 mfu: 41.85% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 08:56:39,054 - root - INFO - lr: 3.3331e-05 gnorm: 1.10 [10:22:29<14:11:17] +[titan] 2025-10-05 08:56:41,416 - root - INFO - Dumping profiler traces at step 16896 +[titan] 2025-10-05 08:56:41,457 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:56:47,993 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:56:50,179 - root - INFO - step: 16900 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 29,452 tflops: 408.61 mfu: 41.32% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9188 +[titan] 2025-10-05 08:56:50,180 - root - INFO - lr: 3.3322e-05 gnorm: 1.02 [10:22:40<14:11:06] +[titan] 2025-10-05 08:57:01,083 - root - INFO - step: 16905 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9226 +[titan] 2025-10-05 08:57:01,084 - root - INFO - lr: 3.3314e-05 gnorm: 1.15 [10:22:51<14:10:55] +[titan] 2025-10-05 08:57:11,941 - root - INFO - step: 16910 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9271 +[titan] 2025-10-05 08:57:11,942 - root - INFO - lr: 3.3305e-05 gnorm: 1.04 [10:23:02<14:10:44] +[titan] 2025-10-05 08:57:22,821 - root - INFO - step: 16915 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 08:57:22,822 - root - INFO - lr: 3.3297e-05 gnorm: 1.10 [10:23:13<14:10:32] +[titan] 2025-10-05 08:57:33,708 - root - INFO - step: 16920 loss: 2.1768 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9231 +[titan] 2025-10-05 08:57:33,708 - root - INFO - lr: 3.3288e-05 gnorm: 1.07 [10:23:23<14:10:21] +[titan] 2025-10-05 08:57:44,586 - root - INFO - step: 16925 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 08:57:44,586 - root - INFO - lr: 3.3279e-05 gnorm: 1.10 [10:23:34<14:10:10] +[titan] 2025-10-05 08:57:55,466 - root - INFO - step: 16930 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 08:57:55,466 - root - INFO - lr: 3.3271e-05 gnorm: 1.08 [10:23:45<14:09:58] +[titan] 2025-10-05 08:58:06,365 - root - INFO - step: 16935 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9721 +[titan] 2025-10-05 08:58:06,365 - root - INFO - lr: 3.3262e-05 gnorm: 1.09 [10:23:56<14:09:47] +[titan] 2025-10-05 08:58:17,240 - root - INFO - step: 16940 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 08:58:17,240 - root - INFO - lr: 3.3253e-05 gnorm: 1.07 [10:24:07<14:09:36] +[titan] 2025-10-05 08:58:28,143 - root - INFO - step: 16945 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9826 +[titan] 2025-10-05 08:58:28,143 - root - INFO - lr: 3.3245e-05 gnorm: 1.07 [10:24:18<14:09:25] +[titan] 2025-10-05 08:58:36,825 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:58:39,030 - root - INFO - step: 16950 loss: 2.2032 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:58:39,030 - root - INFO - lr: 3.3236e-05 gnorm: 1.07 [10:24:29<14:09:13] +[titan] 2025-10-05 08:58:49,927 - root - INFO - step: 16955 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 08:58:49,928 - root - INFO - lr: 3.3228e-05 gnorm: 1.13 [10:24:40<14:09:02] +[titan] 2025-10-05 08:59:00,813 - root - INFO - step: 16960 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 08:59:00,813 - root - INFO - lr: 3.3219e-05 gnorm: 1.07 [10:24:51<14:08:51] +[titan] 2025-10-05 08:59:11,725 - root - INFO - step: 16965 loss: 2.1770 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9229 +[titan] 2025-10-05 08:59:11,725 - root - INFO - lr: 3.3210e-05 gnorm: 1.09 [10:25:01<14:08:40] +[titan] 2025-10-05 08:59:22,600 - root - INFO - step: 16970 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 08:59:22,601 - root - INFO - lr: 3.3202e-05 gnorm: 1.13 [10:25:12<14:08:28] +[titan] 2025-10-05 08:59:33,459 - root - INFO - step: 16975 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9184 +[titan] 2025-10-05 08:59:33,460 - root - INFO - lr: 3.3193e-05 gnorm: 1.10 [10:25:23<14:08:17] +[titan] 2025-10-05 08:59:44,382 - root - INFO - step: 16980 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9735 +[titan] 2025-10-05 08:59:44,382 - root - INFO - lr: 3.3184e-05 gnorm: 1.04 [10:25:34<14:08:06] +[titan] 2025-10-05 08:59:55,274 - root - INFO - step: 16985 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8945 +[titan] 2025-10-05 08:59:55,274 - root - INFO - lr: 3.3176e-05 gnorm: 1.06 [10:25:45<14:07:54] +[titan] 2025-10-05 09:00:06,182 - root - INFO - step: 16990 loss: 2.2652 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0002 +[titan] 2025-10-05 09:00:06,183 - root - INFO - lr: 3.3167e-05 gnorm: 1.09 [10:25:56<14:07:43] +[titan] 2025-10-05 09:00:17,071 - root - INFO - step: 16995 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:00:17,071 - root - INFO - lr: 3.3158e-05 gnorm: 1.08 [10:26:07<14:07:32] +[titan] 2025-10-05 09:00:25,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:00:27,948 - root - INFO - step: 17000 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:00:27,948 - root - INFO - lr: 3.3150e-05 gnorm: 1.11 [10:26:18<14:07:21] +[titan] 2025-10-05 09:00:38,826 - root - INFO - step: 17005 loss: 2.2227 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 09:00:38,826 - root - INFO - lr: 3.3141e-05 gnorm: 1.07 [10:26:29<14:07:09] +[titan] 2025-10-05 09:00:49,742 - root - INFO - step: 17010 loss: 2.2205 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:00:49,742 - root - INFO - lr: 3.3133e-05 gnorm: 1.05 [10:26:39<14:06:58] +[titan] 2025-10-05 09:01:00,622 - root - INFO - step: 17015 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9094 +[titan] 2025-10-05 09:01:00,622 - root - INFO - lr: 3.3124e-05 gnorm: 1.08 [10:26:50<14:06:47] +[titan] 2025-10-05 09:01:11,523 - root - INFO - step: 17020 loss: 2.1800 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9266 +[titan] 2025-10-05 09:01:11,523 - root - INFO - lr: 3.3115e-05 gnorm: 1.07 [10:27:01<14:06:36] +[titan] 2025-10-05 09:01:22,424 - root - INFO - step: 17025 loss: 2.2024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9461 +[titan] 2025-10-05 09:01:22,425 - root - INFO - lr: 3.3107e-05 gnorm: 1.04 [10:27:12<14:06:24] +[titan] 2025-10-05 09:01:33,324 - root - INFO - step: 17030 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 09:01:33,324 - root - INFO - lr: 3.3098e-05 gnorm: 1.07 [10:27:23<14:06:13] +[titan] 2025-10-05 09:01:44,236 - root - INFO - step: 17035 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9083 +[titan] 2025-10-05 09:01:44,236 - root - INFO - lr: 3.3089e-05 gnorm: 1.04 [10:27:34<14:06:02] +[titan] 2025-10-05 09:01:55,136 - root - INFO - step: 17040 loss: 2.1831 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 09:01:55,136 - root - INFO - lr: 3.3081e-05 gnorm: 1.08 [10:27:45<14:05:51] +[titan] 2025-10-05 09:02:06,035 - root - INFO - step: 17045 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:02:06,035 - root - INFO - lr: 3.3072e-05 gnorm: 1.06 [10:27:56<14:05:39] +[titan] 2025-10-05 09:02:14,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:02:16,917 - root - INFO - step: 17050 loss: 2.2428 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:02:16,917 - root - INFO - lr: 3.3063e-05 gnorm: 1.04 [10:28:07<14:05:28] +[titan] 2025-10-05 09:02:27,783 - root - INFO - step: 17055 loss: 2.2213 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9627 +[titan] 2025-10-05 09:02:27,783 - root - INFO - lr: 3.3055e-05 gnorm: 1.05 [10:28:18<14:05:17] +[titan] 2025-10-05 09:02:38,654 - root - INFO - step: 17060 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8924 +[titan] 2025-10-05 09:02:38,654 - root - INFO - lr: 3.3046e-05 gnorm: 1.07 [10:28:28<14:05:05] +[titan] 2025-10-05 09:02:49,542 - root - INFO - step: 17065 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9244 +[titan] 2025-10-05 09:02:49,542 - root - INFO - lr: 3.3037e-05 gnorm: 1.10 [10:28:39<14:04:54] +[titan] 2025-10-05 09:03:00,423 - root - INFO - step: 17070 loss: 2.2506 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 09:03:00,423 - root - INFO - lr: 3.3029e-05 gnorm: 1.08 [10:28:50<14:04:43] +[titan] 2025-10-05 09:03:11,347 - root - INFO - step: 17075 loss: 2.1585 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:03:11,347 - root - INFO - lr: 3.3020e-05 gnorm: 1.09 [10:29:01<14:04:32] +[titan] 2025-10-05 09:03:22,220 - root - INFO - step: 17080 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 09:03:22,220 - root - INFO - lr: 3.3011e-05 gnorm: 1.07 [10:29:12<14:04:20] +[titan] 2025-10-05 09:03:33,091 - root - INFO - step: 17085 loss: 2.1813 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:03:33,091 - root - INFO - lr: 3.3003e-05 gnorm: 1.12 [10:29:23<14:04:09] +[titan] 2025-10-05 09:03:43,968 - root - INFO - step: 17090 loss: 2.2621 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 1.9971 +[titan] 2025-10-05 09:03:43,968 - root - INFO - lr: 3.2994e-05 gnorm: 1.09 [10:29:34<14:03:58] +[titan] 2025-10-05 09:03:54,850 - root - INFO - step: 17095 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:03:54,850 - root - INFO - lr: 3.2986e-05 gnorm: 1.05 [10:29:45<14:03:46] +[titan] 2025-10-05 09:04:03,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:04:05,728 - root - INFO - step: 17100 loss: 2.1531 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 09:04:05,728 - root - INFO - lr: 3.2977e-05 gnorm: 1.07 [10:29:55<14:03:35] +[titan] 2025-10-05 09:04:16,647 - root - INFO - step: 17105 loss: 2.1923 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 09:04:16,647 - root - INFO - lr: 3.2968e-05 gnorm: 1.11 [10:30:06<14:03:24] +[titan] 2025-10-05 09:04:27,507 - root - INFO - step: 17110 loss: 2.1551 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9046 +[titan] 2025-10-05 09:04:27,507 - root - INFO - lr: 3.2960e-05 gnorm: 1.11 [10:30:17<14:03:13] +[titan] 2025-10-05 09:04:38,376 - root - INFO - step: 17115 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 09:04:38,376 - root - INFO - lr: 3.2951e-05 gnorm: 1.09 [10:30:28<14:03:01] +[titan] 2025-10-05 09:04:49,249 - root - INFO - step: 17120 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:04:49,249 - root - INFO - lr: 3.2942e-05 gnorm: 1.04 [10:30:39<14:02:50] +[titan] 2025-10-05 09:05:00,120 - root - INFO - step: 17125 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 09:05:00,120 - root - INFO - lr: 3.2934e-05 gnorm: 1.09 [10:30:50<14:02:39] +[titan] 2025-10-05 09:05:10,996 - root - INFO - step: 17130 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:05:10,997 - root - INFO - lr: 3.2925e-05 gnorm: 6.19 [10:31:01<14:02:27] +[titan] 2025-10-05 09:05:21,856 - root - INFO - step: 17135 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 09:05:21,856 - root - INFO - lr: 3.2916e-05 gnorm: 1.04 [10:31:12<14:02:16] +[titan] 2025-10-05 09:05:32,760 - root - INFO - step: 17140 loss: 2.2847 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0182 +[titan] 2025-10-05 09:05:32,760 - root - INFO - lr: 3.2908e-05 gnorm: 1.13 [10:31:22<14:02:05] +[titan] 2025-10-05 09:05:43,616 - root - INFO - step: 17145 loss: 2.1628 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 09:05:43,616 - root - INFO - lr: 3.2899e-05 gnorm: 1.13 [10:31:33<14:01:54] +[titan] 2025-10-05 09:05:52,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:05:54,484 - root - INFO - step: 17150 loss: 2.2557 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 09:05:54,484 - root - INFO - lr: 3.2890e-05 gnorm: 1.04 [10:31:44<14:01:42] +[titan] 2025-10-05 09:06:05,356 - root - INFO - step: 17155 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 09:06:05,356 - root - INFO - lr: 3.2882e-05 gnorm: 1.06 [10:31:55<14:01:31] +[titan] 2025-10-05 09:06:16,249 - root - INFO - step: 17160 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:06:16,249 - root - INFO - lr: 3.2873e-05 gnorm: 1.06 [10:32:06<14:01:20] +[titan] 2025-10-05 09:06:27,125 - root - INFO - step: 17165 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:06:27,125 - root - INFO - lr: 3.2864e-05 gnorm: 1.06 [10:32:17<14:01:08] +[titan] 2025-10-05 09:06:38,025 - root - INFO - step: 17170 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:06:38,025 - root - INFO - lr: 3.2856e-05 gnorm: 1.14 [10:32:28<14:00:57] +[titan] 2025-10-05 09:06:48,880 - root - INFO - step: 17175 loss: 2.1394 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 09:06:48,880 - root - INFO - lr: 3.2847e-05 gnorm: 1.07 [10:32:39<14:00:46] +[titan] 2025-10-05 09:06:59,724 - root - INFO - step: 17180 loss: 2.1898 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 09:06:59,724 - root - INFO - lr: 3.2838e-05 gnorm: 1.07 [10:32:49<14:00:35] +[titan] 2025-10-05 09:07:10,582 - root - INFO - step: 17185 loss: 2.1634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9111 +[titan] 2025-10-05 09:07:10,583 - root - INFO - lr: 3.2830e-05 gnorm: 1.03 [10:33:00<14:00:23] +[titan] 2025-10-05 09:07:21,443 - root - INFO - step: 17190 loss: 2.1666 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:07:21,443 - root - INFO - lr: 3.2821e-05 gnorm: 1.09 [10:33:11<14:00:12] +[titan] 2025-10-05 09:07:32,307 - root - INFO - step: 17195 loss: 2.2954 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 09:07:32,308 - root - INFO - lr: 3.2812e-05 gnorm: 1.05 [10:33:22<14:00:01] +[titan] 2025-10-05 09:07:40,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:07:43,204 - root - INFO - step: 17200 loss: 2.2434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:07:43,204 - root - INFO - lr: 3.2804e-05 gnorm: 1.02 [10:33:33<13:59:49] +[titan] 2025-10-05 09:07:54,076 - root - INFO - step: 17205 loss: 2.2300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:07:54,076 - root - INFO - lr: 3.2795e-05 gnorm: 1.07 [10:33:44<13:59:38] +[titan] 2025-10-05 09:08:04,949 - root - INFO - step: 17210 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:08:04,949 - root - INFO - lr: 3.2786e-05 gnorm: 1.14 [10:33:55<13:59:27] +[titan] 2025-10-05 09:08:15,833 - root - INFO - step: 17215 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 09:08:15,833 - root - INFO - lr: 3.2778e-05 gnorm: 1.07 [10:34:06<13:59:16] +[titan] 2025-10-05 09:08:26,702 - root - INFO - step: 17220 loss: 2.1866 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9324 +[titan] 2025-10-05 09:08:26,702 - root - INFO - lr: 3.2769e-05 gnorm: 1.12 [10:34:16<13:59:04] +[titan] 2025-10-05 09:08:37,566 - root - INFO - step: 17225 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 09:08:37,566 - root - INFO - lr: 3.2760e-05 gnorm: 1.09 [10:34:27<13:58:53] +[titan] 2025-10-05 09:08:48,419 - root - INFO - step: 17230 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 09:08:48,419 - root - INFO - lr: 3.2752e-05 gnorm: 1.06 [10:34:38<13:58:42] +[titan] 2025-10-05 09:08:59,310 - root - INFO - step: 17235 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:08:59,310 - root - INFO - lr: 3.2743e-05 gnorm: 1.11 [10:34:49<13:58:30] +[titan] 2025-10-05 09:09:10,177 - root - INFO - step: 17240 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9319 +[titan] 2025-10-05 09:09:10,177 - root - INFO - lr: 3.2734e-05 gnorm: 1.07 [10:35:00<13:58:19] +[titan] 2025-10-05 09:09:21,054 - root - INFO - step: 17245 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:09:21,054 - root - INFO - lr: 3.2725e-05 gnorm: 1.03 [10:35:11<13:58:08] +[titan] 2025-10-05 09:09:29,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:09:31,915 - root - INFO - step: 17250 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9306 +[titan] 2025-10-05 09:09:31,915 - root - INFO - lr: 3.2717e-05 gnorm: 1.06 [10:35:22<13:57:57] +[titan] 2025-10-05 09:09:42,794 - root - INFO - step: 17255 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 09:09:42,794 - root - INFO - lr: 3.2708e-05 gnorm: 1.07 [10:35:33<13:57:45] +[titan] 2025-10-05 09:09:53,683 - root - INFO - step: 17260 loss: 2.1486 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 09:09:53,684 - root - INFO - lr: 3.2699e-05 gnorm: 1.09 [10:35:43<13:57:34] +[titan] 2025-10-05 09:10:04,613 - root - INFO - step: 17265 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 09:10:04,613 - root - INFO - lr: 3.2691e-05 gnorm: 1.10 [10:35:54<13:57:23] +[titan] 2025-10-05 09:10:15,520 - root - INFO - step: 17270 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:10:15,520 - root - INFO - lr: 3.2682e-05 gnorm: 1.07 [10:36:05<13:57:12] +[titan] 2025-10-05 09:10:26,410 - root - INFO - step: 17275 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9303 +[titan] 2025-10-05 09:10:26,410 - root - INFO - lr: 3.2673e-05 gnorm: 1.08 [10:36:16<13:57:00] +[titan] 2025-10-05 09:10:37,314 - root - INFO - step: 17280 loss: 2.3099 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 09:10:37,314 - root - INFO - lr: 3.2665e-05 gnorm: 1.11 [10:36:27<13:56:49] +[titan] 2025-10-05 09:10:48,218 - root - INFO - step: 17285 loss: 2.2025 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 09:10:48,218 - root - INFO - lr: 3.2656e-05 gnorm: 1.04 [10:36:38<13:56:38] +[titan] 2025-10-05 09:10:59,106 - root - INFO - step: 17290 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 09:10:59,106 - root - INFO - lr: 3.2647e-05 gnorm: 1.08 [10:36:49<13:56:27] +[titan] 2025-10-05 09:11:09,991 - root - INFO - step: 17295 loss: 2.2277 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 09:11:09,991 - root - INFO - lr: 3.2639e-05 gnorm: 1.09 [10:37:00<13:56:15] +[titan] 2025-10-05 09:11:18,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:11:20,963 - root - INFO - step: 17300 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 09:11:20,963 - root - INFO - lr: 3.2630e-05 gnorm: 1.10 [10:37:11<13:56:04] +[titan] 2025-10-05 09:11:31,859 - root - INFO - step: 17305 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:11:31,859 - root - INFO - lr: 3.2621e-05 gnorm: 1.04 [10:37:22<13:55:53] +[titan] 2025-10-05 09:11:42,726 - root - INFO - step: 17310 loss: 2.2050 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 09:11:42,726 - root - INFO - lr: 3.2613e-05 gnorm: 1.08 [10:37:32<13:55:42] +[titan] 2025-10-05 09:11:53,604 - root - INFO - step: 17315 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:11:53,604 - root - INFO - lr: 3.2604e-05 gnorm: 1.06 [10:37:43<13:55:30] +[titan] 2025-10-05 09:12:04,491 - root - INFO - step: 17320 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:12:04,491 - root - INFO - lr: 3.2595e-05 gnorm: 1.08 [10:37:54<13:55:19] +[titan] 2025-10-05 09:12:15,414 - root - INFO - step: 17325 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:12:15,414 - root - INFO - lr: 3.2586e-05 gnorm: 1.03 [10:38:05<13:55:08] +[titan] 2025-10-05 09:12:26,330 - root - INFO - step: 17330 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9711 +[titan] 2025-10-05 09:12:26,330 - root - INFO - lr: 3.2578e-05 gnorm: 1.08 [10:38:16<13:54:57] +[titan] 2025-10-05 09:12:37,205 - root - INFO - step: 17335 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9425 +[titan] 2025-10-05 09:12:37,206 - root - INFO - lr: 3.2569e-05 gnorm: 1.08 [10:38:27<13:54:45] +[titan] 2025-10-05 09:12:48,107 - root - INFO - step: 17340 loss: 2.2311 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 09:12:48,107 - root - INFO - lr: 3.2560e-05 gnorm: 1.07 [10:38:38<13:54:34] +[titan] 2025-10-05 09:12:58,971 - root - INFO - step: 17345 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:12:58,971 - root - INFO - lr: 3.2552e-05 gnorm: 1.02 [10:38:49<13:54:23] +[titan] 2025-10-05 09:13:07,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:13:09,828 - root - INFO - step: 17350 loss: 2.1864 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9313 +[titan] 2025-10-05 09:13:09,828 - root - INFO - lr: 3.2543e-05 gnorm: 1.12 [10:39:00<13:54:11] +[titan] 2025-10-05 09:13:20,766 - root - INFO - step: 17355 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:13:20,766 - root - INFO - lr: 3.2534e-05 gnorm: 1.05 [10:39:10<13:54:00] +[titan] 2025-10-05 09:13:31,647 - root - INFO - step: 17360 loss: 2.1890 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9339 +[titan] 2025-10-05 09:13:31,647 - root - INFO - lr: 3.2526e-05 gnorm: 1.06 [10:39:21<13:53:49] +[titan] 2025-10-05 09:13:42,494 - root - INFO - step: 17365 loss: 2.2669 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 09:13:42,494 - root - INFO - lr: 3.2517e-05 gnorm: 1.11 [10:39:32<13:53:38] +[titan] 2025-10-05 09:13:53,353 - root - INFO - step: 17370 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0243 +[titan] 2025-10-05 09:13:53,353 - root - INFO - lr: 3.2508e-05 gnorm: 1.16 [10:39:43<13:53:26] +[titan] 2025-10-05 09:14:04,232 - root - INFO - step: 17375 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9603 +[titan] 2025-10-05 09:14:04,232 - root - INFO - lr: 3.2500e-05 gnorm: 1.06 [10:39:54<13:53:15] +[titan] 2025-10-05 09:14:15,120 - root - INFO - step: 17380 loss: 2.2381 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 09:14:15,120 - root - INFO - lr: 3.2491e-05 gnorm: 1.09 [10:40:05<13:53:04] +[titan] 2025-10-05 09:14:26,052 - root - INFO - step: 17385 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 09:14:26,052 - root - INFO - lr: 3.2482e-05 gnorm: 1.07 [10:40:16<13:52:53] +[titan] 2025-10-05 09:14:36,924 - root - INFO - step: 17390 loss: 2.1808 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:14:36,924 - root - INFO - lr: 3.2473e-05 gnorm: 1.07 [10:40:27<13:52:41] +[titan] 2025-10-05 09:14:47,853 - root - INFO - step: 17395 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9403 +[titan] 2025-10-05 09:14:47,853 - root - INFO - lr: 3.2465e-05 gnorm: 1.04 [10:40:38<13:52:30] +[titan] 2025-10-05 09:14:56,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:14:58,759 - root - INFO - step: 17400 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 09:14:58,759 - root - INFO - lr: 3.2456e-05 gnorm: 1.05 [10:40:48<13:52:19] +[titan] 2025-10-05 09:15:09,750 - root - INFO - step: 17405 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 29,817 tflops: 413.66 mfu: 41.83% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 09:15:09,750 - root - INFO - lr: 3.2447e-05 gnorm: 1.05 [10:40:59<13:52:08] +[titan] 2025-10-05 09:15:16,469 - root - INFO - Dumping profiler traces at step 17408 +[titan] 2025-10-05 09:15:16,507 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:15:20,898 - root - INFO - step: 17410 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 29,393 tflops: 407.78 mfu: 41.23% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:15:20,898 - root - INFO - lr: 3.2439e-05 gnorm: 1.10 [10:41:11<13:51:57] +[titan] 2025-10-05 09:15:31,784 - root - INFO - step: 17415 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 09:15:31,784 - root - INFO - lr: 3.2430e-05 gnorm: 1.11 [10:41:21<13:51:46] +[titan] 2025-10-05 09:15:42,678 - root - INFO - step: 17420 loss: 2.1926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9383 +[titan] 2025-10-05 09:15:42,678 - root - INFO - lr: 3.2421e-05 gnorm: 1.05 [10:41:32<13:51:34] +[titan] 2025-10-05 09:15:53,585 - root - INFO - step: 17425 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 09:15:53,585 - root - INFO - lr: 3.2412e-05 gnorm: 1.05 [10:41:43<13:51:23] +[titan] 2025-10-05 09:16:04,476 - root - INFO - step: 17430 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9031 +[titan] 2025-10-05 09:16:04,476 - root - INFO - lr: 3.2404e-05 gnorm: 1.06 [10:41:54<13:51:12] +[titan] 2025-10-05 09:16:15,351 - root - INFO - step: 17435 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 09:16:15,351 - root - INFO - lr: 3.2395e-05 gnorm: 1.09 [10:42:05<13:51:01] +[titan] 2025-10-05 09:16:26,256 - root - INFO - step: 17440 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:16:26,257 - root - INFO - lr: 3.2386e-05 gnorm: 1.08 [10:42:16<13:50:49] +[titan] 2025-10-05 09:16:37,135 - root - INFO - step: 17445 loss: 2.1787 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9254 +[titan] 2025-10-05 09:16:37,135 - root - INFO - lr: 3.2378e-05 gnorm: 1.06 [10:42:27<13:50:38] +[titan] 2025-10-05 09:16:45,826 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:16:48,014 - root - INFO - step: 17450 loss: 2.1992 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9428 +[titan] 2025-10-05 09:16:48,014 - root - INFO - lr: 3.2369e-05 gnorm: 1.03 [10:42:38<13:50:27] +[titan] 2025-10-05 09:16:58,900 - root - INFO - step: 17455 loss: 2.2831 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:16:58,900 - root - INFO - lr: 3.2360e-05 gnorm: 1.09 [10:42:49<13:50:16] +[titan] 2025-10-05 09:17:09,817 - root - INFO - step: 17460 loss: 2.2252 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:17:09,817 - root - INFO - lr: 3.2351e-05 gnorm: 1.08 [10:43:00<13:50:04] +[titan] 2025-10-05 09:17:20,746 - root - INFO - step: 17465 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:17:20,746 - root - INFO - lr: 3.2343e-05 gnorm: 1.05 [10:43:10<13:49:53] +[titan] 2025-10-05 09:17:31,624 - root - INFO - step: 17470 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 09:17:31,624 - root - INFO - lr: 3.2334e-05 gnorm: 1.07 [10:43:21<13:49:42] +[titan] 2025-10-05 09:17:42,511 - root - INFO - step: 17475 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:17:42,511 - root - INFO - lr: 3.2325e-05 gnorm: 1.07 [10:43:32<13:49:31] +[titan] 2025-10-05 09:17:53,406 - root - INFO - step: 17480 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 09:17:53,407 - root - INFO - lr: 3.2317e-05 gnorm: 1.09 [10:43:43<13:49:19] +[titan] 2025-10-05 09:18:04,292 - root - INFO - step: 17485 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:18:04,292 - root - INFO - lr: 3.2308e-05 gnorm: 1.09 [10:43:54<13:49:08] +[titan] 2025-10-05 09:18:15,232 - root - INFO - step: 17490 loss: 2.1875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 09:18:15,232 - root - INFO - lr: 3.2299e-05 gnorm: 1.09 [10:44:05<13:48:57] +[titan] 2025-10-05 09:18:26,148 - root - INFO - step: 17495 loss: 2.1821 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9280 +[titan] 2025-10-05 09:18:26,148 - root - INFO - lr: 3.2290e-05 gnorm: 1.06 [10:44:16<13:48:46] +[titan] 2025-10-05 09:18:34,840 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:18:37,024 - root - INFO - step: 17500 loss: 2.2275 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9680 +[titan] 2025-10-05 09:18:37,024 - root - INFO - lr: 3.2282e-05 gnorm: 1.08 [10:44:27<13:48:35] +[titan] 2025-10-05 09:18:47,898 - root - INFO - step: 17505 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9235 +[titan] 2025-10-05 09:18:47,898 - root - INFO - lr: 3.2273e-05 gnorm: 1.10 [10:44:38<13:48:23] +[titan] 2025-10-05 09:18:58,787 - root - INFO - step: 17510 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 09:18:58,788 - root - INFO - lr: 3.2264e-05 gnorm: 1.07 [10:44:48<13:48:12] +[titan] 2025-10-05 09:19:09,664 - root - INFO - step: 17515 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9713 +[titan] 2025-10-05 09:19:09,664 - root - INFO - lr: 3.2256e-05 gnorm: 1.11 [10:44:59<13:48:01] +[titan] 2025-10-05 09:19:20,602 - root - INFO - step: 17520 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 09:19:20,603 - root - INFO - lr: 3.2247e-05 gnorm: 1.06 [10:45:10<13:47:50] +[titan] 2025-10-05 09:19:31,492 - root - INFO - step: 17525 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9837 +[titan] 2025-10-05 09:19:31,492 - root - INFO - lr: 3.2238e-05 gnorm: 1.06 [10:45:21<13:47:38] +[titan] 2025-10-05 09:19:42,388 - root - INFO - step: 17530 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:19:42,388 - root - INFO - lr: 3.2229e-05 gnorm: 1.04 [10:45:32<13:47:27] +[titan] 2025-10-05 09:19:53,275 - root - INFO - step: 17535 loss: 2.1899 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9365 +[titan] 2025-10-05 09:19:53,275 - root - INFO - lr: 3.2221e-05 gnorm: 1.11 [10:45:43<13:47:16] +[titan] 2025-10-05 09:20:04,158 - root - INFO - step: 17540 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:20:04,158 - root - INFO - lr: 3.2212e-05 gnorm: 1.08 [10:45:54<13:47:05] +[titan] 2025-10-05 09:20:15,047 - root - INFO - step: 17545 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 09:20:15,047 - root - INFO - lr: 3.2203e-05 gnorm: 1.14 [10:46:05<13:46:53] +[titan] 2025-10-05 09:20:23,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:20:25,948 - root - INFO - step: 17550 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:20:25,948 - root - INFO - lr: 3.2194e-05 gnorm: 1.06 [10:46:16<13:46:42] +[titan] 2025-10-05 09:20:36,875 - root - INFO - step: 17555 loss: 2.1706 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 09:20:36,875 - root - INFO - lr: 3.2186e-05 gnorm: 1.05 [10:46:27<13:46:31] +[titan] 2025-10-05 09:20:47,778 - root - INFO - step: 17560 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:20:47,778 - root - INFO - lr: 3.2177e-05 gnorm: 1.04 [10:46:37<13:46:20] +[titan] 2025-10-05 09:20:58,670 - root - INFO - step: 17565 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 09:20:58,670 - root - INFO - lr: 3.2168e-05 gnorm: 1.11 [10:46:48<13:46:08] +[titan] 2025-10-05 09:21:09,567 - root - INFO - step: 17570 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9227 +[titan] 2025-10-05 09:21:09,567 - root - INFO - lr: 3.2160e-05 gnorm: 1.03 [10:46:59<13:45:57] +[titan] 2025-10-05 09:21:20,447 - root - INFO - step: 17575 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:21:20,447 - root - INFO - lr: 3.2151e-05 gnorm: 1.06 [10:47:10<13:45:46] +[titan] 2025-10-05 09:21:31,358 - root - INFO - step: 17580 loss: 2.1219 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 09:21:31,358 - root - INFO - lr: 3.2142e-05 gnorm: 1.07 [10:47:21<13:45:35] +[titan] 2025-10-05 09:21:42,250 - root - INFO - step: 17585 loss: 2.2406 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 09:21:42,250 - root - INFO - lr: 3.2133e-05 gnorm: 1.10 [10:47:32<13:45:23] +[titan] 2025-10-05 09:21:53,130 - root - INFO - step: 17590 loss: 2.2175 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 09:21:53,130 - root - INFO - lr: 3.2125e-05 gnorm: 1.08 [10:47:43<13:45:12] +[titan] 2025-10-05 09:22:04,011 - root - INFO - step: 17595 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9755 +[titan] 2025-10-05 09:22:04,011 - root - INFO - lr: 3.2116e-05 gnorm: 1.05 [10:47:54<13:45:01] +[titan] 2025-10-05 09:22:12,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:22:14,893 - root - INFO - step: 17600 loss: 2.2663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0020 +[titan] 2025-10-05 09:22:14,894 - root - INFO - lr: 3.2107e-05 gnorm: 1.08 [10:48:05<13:44:50] +[titan] 2025-10-05 09:22:25,790 - root - INFO - step: 17605 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 09:22:25,790 - root - INFO - lr: 3.2098e-05 gnorm: 1.11 [10:48:15<13:44:38] +[titan] 2025-10-05 09:22:36,676 - root - INFO - step: 17610 loss: 2.2048 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9482 +[titan] 2025-10-05 09:22:36,676 - root - INFO - lr: 3.2090e-05 gnorm: 1.08 [10:48:26<13:44:27] +[titan] 2025-10-05 09:22:47,556 - root - INFO - step: 17615 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 09:22:47,556 - root - INFO - lr: 3.2081e-05 gnorm: 1.10 [10:48:37<13:44:16] +[titan] 2025-10-05 09:22:58,451 - root - INFO - step: 17620 loss: 2.1471 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:22:58,451 - root - INFO - lr: 3.2072e-05 gnorm: 1.10 [10:48:48<13:44:05] +[titan] 2025-10-05 09:23:09,330 - root - INFO - step: 17625 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 09:23:09,330 - root - INFO - lr: 3.2063e-05 gnorm: 1.04 [10:48:59<13:43:53] +[titan] 2025-10-05 09:23:20,210 - root - INFO - step: 17630 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9426 +[titan] 2025-10-05 09:23:20,210 - root - INFO - lr: 3.2055e-05 gnorm: 1.05 [10:49:10<13:43:42] +[titan] 2025-10-05 09:23:31,084 - root - INFO - step: 17635 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 09:23:31,084 - root - INFO - lr: 3.2046e-05 gnorm: 1.06 [10:49:21<13:43:31] +[titan] 2025-10-05 09:23:41,968 - root - INFO - step: 17640 loss: 2.2575 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 09:23:41,969 - root - INFO - lr: 3.2037e-05 gnorm: 1.12 [10:49:32<13:43:20] +[titan] 2025-10-05 09:23:52,856 - root - INFO - step: 17645 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9799 +[titan] 2025-10-05 09:23:52,856 - root - INFO - lr: 3.2029e-05 gnorm: 1.10 [10:49:43<13:43:08] +[titan] 2025-10-05 09:24:01,573 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:24:03,759 - root - INFO - step: 17650 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:24:03,759 - root - INFO - lr: 3.2020e-05 gnorm: 1.14 [10:49:53<13:42:57] +[titan] 2025-10-05 09:24:14,635 - root - INFO - step: 17655 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 09:24:14,636 - root - INFO - lr: 3.2011e-05 gnorm: 1.12 [10:50:04<13:42:46] +[titan] 2025-10-05 09:24:25,539 - root - INFO - step: 17660 loss: 2.1876 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9327 +[titan] 2025-10-05 09:24:25,539 - root - INFO - lr: 3.2002e-05 gnorm: 1.06 [10:50:15<13:42:35] +[titan] 2025-10-05 09:24:36,410 - root - INFO - step: 17665 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9523 +[titan] 2025-10-05 09:24:36,410 - root - INFO - lr: 3.1994e-05 gnorm: 1.09 [10:50:26<13:42:23] +[titan] 2025-10-05 09:24:47,292 - root - INFO - step: 17670 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9587 +[titan] 2025-10-05 09:24:47,292 - root - INFO - lr: 3.1985e-05 gnorm: 1.03 [10:50:37<13:42:12] +[titan] 2025-10-05 09:24:58,185 - root - INFO - step: 17675 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 09:24:58,185 - root - INFO - lr: 3.1976e-05 gnorm: 1.17 [10:50:48<13:42:01] +[titan] 2025-10-05 09:25:09,104 - root - INFO - step: 17680 loss: 2.2810 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 09:25:09,105 - root - INFO - lr: 3.1967e-05 gnorm: 1.11 [10:50:59<13:41:50] +[titan] 2025-10-05 09:25:19,979 - root - INFO - step: 17685 loss: 2.1693 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:25:19,979 - root - INFO - lr: 3.1959e-05 gnorm: 1.07 [10:51:10<13:41:38] +[titan] 2025-10-05 09:25:30,866 - root - INFO - step: 17690 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9351 +[titan] 2025-10-05 09:25:30,867 - root - INFO - lr: 3.1950e-05 gnorm: 1.10 [10:51:21<13:41:27] +[titan] 2025-10-05 09:25:41,737 - root - INFO - step: 17695 loss: 2.1997 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 09:25:41,737 - root - INFO - lr: 3.1941e-05 gnorm: 1.03 [10:51:31<13:41:16] +[titan] 2025-10-05 09:25:50,446 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:25:52,639 - root - INFO - step: 17700 loss: 2.1679 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 09:25:52,639 - root - INFO - lr: 3.1932e-05 gnorm: 1.08 [10:51:42<13:41:05] +[titan] 2025-10-05 09:26:03,528 - root - INFO - step: 17705 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9507 +[titan] 2025-10-05 09:26:03,528 - root - INFO - lr: 3.1924e-05 gnorm: 1.14 [10:51:53<13:40:53] +[titan] 2025-10-05 09:26:14,424 - root - INFO - step: 17710 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:26:14,424 - root - INFO - lr: 3.1915e-05 gnorm: 1.05 [10:52:04<13:40:42] +[titan] 2025-10-05 09:26:25,353 - root - INFO - step: 17715 loss: 2.1118 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 09:26:25,353 - root - INFO - lr: 3.1906e-05 gnorm: 1.09 [10:52:15<13:40:31] +[titan] 2025-10-05 09:26:36,227 - root - INFO - step: 17720 loss: 2.1460 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8958 +[titan] 2025-10-05 09:26:36,227 - root - INFO - lr: 3.1897e-05 gnorm: 1.09 [10:52:26<13:40:20] +[titan] 2025-10-05 09:26:47,086 - root - INFO - step: 17725 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:26:47,086 - root - INFO - lr: 3.1889e-05 gnorm: 1.04 [10:52:37<13:40:08] +[titan] 2025-10-05 09:26:57,951 - root - INFO - step: 17730 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 09:26:57,951 - root - INFO - lr: 3.1880e-05 gnorm: 1.13 [10:52:48<13:39:57] +[titan] 2025-10-05 09:27:08,802 - root - INFO - step: 17735 loss: 2.2199 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 09:27:08,802 - root - INFO - lr: 3.1871e-05 gnorm: 1.04 [10:52:58<13:39:46] +[titan] 2025-10-05 09:27:19,665 - root - INFO - step: 17740 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 09:27:19,665 - root - INFO - lr: 3.1862e-05 gnorm: 1.09 [10:53:09<13:39:35] +[titan] 2025-10-05 09:27:30,612 - root - INFO - step: 17745 loss: 2.1677 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9150 +[titan] 2025-10-05 09:27:30,612 - root - INFO - lr: 3.1854e-05 gnorm: 1.09 [10:53:20<13:39:23] +[titan] 2025-10-05 09:27:39,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:27:41,462 - root - INFO - step: 17750 loss: 2.1954 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 09:27:41,462 - root - INFO - lr: 3.1845e-05 gnorm: 1.09 [10:53:31<13:39:12] +[titan] 2025-10-05 09:27:52,328 - root - INFO - step: 17755 loss: 2.1602 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9091 +[titan] 2025-10-05 09:27:52,328 - root - INFO - lr: 3.1836e-05 gnorm: 1.04 [10:53:42<13:39:01] +[titan] 2025-10-05 09:28:03,186 - root - INFO - step: 17760 loss: 2.2440 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:28:03,186 - root - INFO - lr: 3.1827e-05 gnorm: 1.08 [10:53:53<13:38:50] +[titan] 2025-10-05 09:28:14,042 - root - INFO - step: 17765 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9224 +[titan] 2025-10-05 09:28:14,043 - root - INFO - lr: 3.1818e-05 gnorm: 1.07 [10:54:04<13:38:38] +[titan] 2025-10-05 09:28:24,918 - root - INFO - step: 17770 loss: 2.1581 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9064 +[titan] 2025-10-05 09:28:24,918 - root - INFO - lr: 3.1810e-05 gnorm: 1.06 [10:54:15<13:38:27] +[titan] 2025-10-05 09:28:35,788 - root - INFO - step: 17775 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9652 +[titan] 2025-10-05 09:28:35,788 - root - INFO - lr: 3.1801e-05 gnorm: 1.15 [10:54:25<13:38:16] +[titan] 2025-10-05 09:28:46,695 - root - INFO - step: 17780 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:28:46,696 - root - INFO - lr: 3.1792e-05 gnorm: 1.10 [10:54:36<13:38:05] +[titan] 2025-10-05 09:28:57,567 - root - INFO - step: 17785 loss: 2.1809 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9261 +[titan] 2025-10-05 09:28:57,567 - root - INFO - lr: 3.1783e-05 gnorm: 1.05 [10:54:47<13:37:53] +[titan] 2025-10-05 09:29:08,437 - root - INFO - step: 17790 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:29:08,437 - root - INFO - lr: 3.1775e-05 gnorm: 1.09 [10:54:58<13:37:42] +[titan] 2025-10-05 09:29:19,347 - root - INFO - step: 17795 loss: 2.1437 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 09:29:19,348 - root - INFO - lr: 3.1766e-05 gnorm: 1.05 [10:55:09<13:37:31] +[titan] 2025-10-05 09:29:28,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:29:30,284 - root - INFO - step: 17800 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 09:29:30,284 - root - INFO - lr: 3.1757e-05 gnorm: 1.07 [10:55:20<13:37:20] +[titan] 2025-10-05 09:29:41,184 - root - INFO - step: 17805 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9269 +[titan] 2025-10-05 09:29:41,184 - root - INFO - lr: 3.1748e-05 gnorm: 1.07 [10:55:31<13:37:08] +[titan] 2025-10-05 09:29:52,086 - root - INFO - step: 17810 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:29:52,086 - root - INFO - lr: 3.1740e-05 gnorm: 1.06 [10:55:42<13:36:57] +[titan] 2025-10-05 09:30:03,070 - root - INFO - step: 17815 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9441 +[titan] 2025-10-05 09:30:03,070 - root - INFO - lr: 3.1731e-05 gnorm: 1.04 [10:55:53<13:36:46] +[titan] 2025-10-05 09:30:13,933 - root - INFO - step: 17820 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 09:30:13,933 - root - INFO - lr: 3.1722e-05 gnorm: 1.09 [10:56:04<13:36:35] +[titan] 2025-10-05 09:30:24,824 - root - INFO - step: 17825 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:30:24,824 - root - INFO - lr: 3.1713e-05 gnorm: 1.05 [10:56:14<13:36:24] +[titan] 2025-10-05 09:30:35,792 - root - INFO - step: 17830 loss: 2.1738 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:30:35,792 - root - INFO - lr: 3.1705e-05 gnorm: 1.09 [10:56:25<13:36:12] +[titan] 2025-10-05 09:30:46,656 - root - INFO - step: 17835 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 09:30:46,656 - root - INFO - lr: 3.1696e-05 gnorm: 1.04 [10:56:36<13:36:01] +[titan] 2025-10-05 09:30:57,542 - root - INFO - step: 17840 loss: 2.1750 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 09:30:57,542 - root - INFO - lr: 3.1687e-05 gnorm: 1.05 [10:56:47<13:35:50] +[titan] 2025-10-05 09:31:08,403 - root - INFO - step: 17845 loss: 2.2534 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 09:31:08,403 - root - INFO - lr: 3.1678e-05 gnorm: 1.10 [10:56:58<13:35:39] +[titan] 2025-10-05 09:31:17,089 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:31:19,279 - root - INFO - step: 17850 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9894 +[titan] 2025-10-05 09:31:19,279 - root - INFO - lr: 3.1670e-05 gnorm: 1.07 [10:57:09<13:35:27] +[titan] 2025-10-05 09:31:30,156 - root - INFO - step: 17855 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9467 +[titan] 2025-10-05 09:31:30,156 - root - INFO - lr: 3.1661e-05 gnorm: 1.03 [10:57:20<13:35:16] +[titan] 2025-10-05 09:31:41,087 - root - INFO - step: 17860 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9406 +[titan] 2025-10-05 09:31:41,088 - root - INFO - lr: 3.1652e-05 gnorm: 1.07 [10:57:31<13:35:05] +[titan] 2025-10-05 09:31:51,960 - root - INFO - step: 17865 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:31:51,961 - root - INFO - lr: 3.1643e-05 gnorm: 1.08 [10:57:42<13:34:54] +[titan] 2025-10-05 09:32:02,826 - root - INFO - step: 17870 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 09:32:02,826 - root - INFO - lr: 3.1634e-05 gnorm: 1.08 [10:57:52<13:34:42] +[titan] 2025-10-05 09:32:13,722 - root - INFO - step: 17875 loss: 2.2074 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:32:13,722 - root - INFO - lr: 3.1626e-05 gnorm: 1.04 [10:58:03<13:34:31] +[titan] 2025-10-05 09:32:24,584 - root - INFO - step: 17880 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 09:32:24,584 - root - INFO - lr: 3.1617e-05 gnorm: 1.06 [10:58:14<13:34:20] +[titan] 2025-10-05 09:32:35,482 - root - INFO - step: 17885 loss: 2.2057 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 09:32:35,482 - root - INFO - lr: 3.1608e-05 gnorm: 1.05 [10:58:25<13:34:09] +[titan] 2025-10-05 09:32:46,343 - root - INFO - step: 17890 loss: 2.2259 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9659 +[titan] 2025-10-05 09:32:46,344 - root - INFO - lr: 3.1599e-05 gnorm: 1.05 [10:58:36<13:33:57] +[titan] 2025-10-05 09:32:57,245 - root - INFO - step: 17895 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8967 +[titan] 2025-10-05 09:32:57,245 - root - INFO - lr: 3.1591e-05 gnorm: 1.07 [10:58:47<13:33:46] +[titan] 2025-10-05 09:33:05,937 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:33:08,118 - root - INFO - step: 17900 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 09:33:08,118 - root - INFO - lr: 3.1582e-05 gnorm: 1.01 [10:58:58<13:33:35] +[titan] 2025-10-05 09:33:19,021 - root - INFO - step: 17905 loss: 2.1704 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 09:33:19,022 - root - INFO - lr: 3.1573e-05 gnorm: 1.15 [10:59:09<13:33:24] +[titan] 2025-10-05 09:33:29,882 - root - INFO - step: 17910 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 09:33:29,882 - root - INFO - lr: 3.1564e-05 gnorm: 1.05 [10:59:20<13:33:12] +[titan] 2025-10-05 09:33:40,805 - root - INFO - step: 17915 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 09:33:40,805 - root - INFO - lr: 3.1555e-05 gnorm: 1.08 [10:59:30<13:33:01] +[titan] 2025-10-05 09:33:51,745 - root - INFO - step: 17920 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 09:33:51,746 - root - INFO - lr: 3.1547e-05 gnorm: 1.12 [10:59:41<13:32:50] +[titan] 2025-10-05 09:33:51,926 - root - INFO - Dumping profiler traces at step 17920 +[titan] 2025-10-05 09:33:51,966 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:34:02,867 - root - INFO - step: 17925 loss: 2.1932 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 09:34:02,867 - root - INFO - lr: 3.1538e-05 gnorm: 1.06 [10:59:53<13:32:39] +[titan] 2025-10-05 09:34:13,739 - root - INFO - step: 17930 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:34:13,739 - root - INFO - lr: 3.1529e-05 gnorm: 1.12 [11:00:03<13:32:28] +[titan] 2025-10-05 09:34:24,600 - root - INFO - step: 17935 loss: 2.2250 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:34:24,600 - root - INFO - lr: 3.1520e-05 gnorm: 1.06 [11:00:14<13:32:17] +[titan] 2025-10-05 09:34:35,525 - root - INFO - step: 17940 loss: 2.1726 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 09:34:35,525 - root - INFO - lr: 3.1512e-05 gnorm: 1.05 [11:00:25<13:32:05] +[titan] 2025-10-05 09:34:46,392 - root - INFO - step: 17945 loss: 2.0902 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 09:34:46,392 - root - INFO - lr: 3.1503e-05 gnorm: 1.04 [11:00:36<13:31:54] +[titan] 2025-10-05 09:34:55,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:34:57,264 - root - INFO - step: 17950 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 09:34:57,264 - root - INFO - lr: 3.1494e-05 gnorm: 1.09 [11:00:47<13:31:43] +[titan] 2025-10-05 09:35:08,110 - root - INFO - step: 17955 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 09:35:08,110 - root - INFO - lr: 3.1485e-05 gnorm: 1.05 [11:00:58<13:31:32] +[titan] 2025-10-05 09:35:18,976 - root - INFO - step: 17960 loss: 2.2219 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:35:18,976 - root - INFO - lr: 3.1476e-05 gnorm: 1.08 [11:01:09<13:31:20] +[titan] 2025-10-05 09:35:29,837 - root - INFO - step: 17965 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9589 +[titan] 2025-10-05 09:35:29,838 - root - INFO - lr: 3.1468e-05 gnorm: 1.08 [11:01:19<13:31:09] +[titan] 2025-10-05 09:35:40,783 - root - INFO - step: 17970 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.35 mfu: 42.00% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9404 +[titan] 2025-10-05 09:35:40,783 - root - INFO - lr: 3.1459e-05 gnorm: 1.07 [11:01:30<13:30:58] +[titan] 2025-10-05 09:35:51,637 - root - INFO - step: 17975 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.0326 +[titan] 2025-10-05 09:35:51,637 - root - INFO - lr: 3.1450e-05 gnorm: 1.14 [11:01:41<13:30:47] +[titan] 2025-10-05 09:36:02,514 - root - INFO - step: 17980 loss: 2.1848 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 09:36:02,514 - root - INFO - lr: 3.1441e-05 gnorm: 1.06 [11:01:52<13:30:35] +[titan] 2025-10-05 09:36:13,381 - root - INFO - step: 17985 loss: 2.1655 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9132 +[titan] 2025-10-05 09:36:13,381 - root - INFO - lr: 3.1432e-05 gnorm: 1.07 [11:02:03<13:30:24] +[titan] 2025-10-05 09:36:24,242 - root - INFO - step: 17990 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 09:36:24,242 - root - INFO - lr: 3.1424e-05 gnorm: 1.05 [11:02:14<13:30:13] +[titan] 2025-10-05 09:36:35,118 - root - INFO - step: 17995 loss: 2.2044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9479 +[titan] 2025-10-05 09:36:35,118 - root - INFO - lr: 3.1415e-05 gnorm: 1.05 [11:02:25<13:30:02] +[titan] 2025-10-05 09:36:43,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:36:46,012 - root - INFO - step: 18000 loss: 2.1302 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 09:36:46,012 - root - INFO - lr: 3.1406e-05 gnorm: 1.05 [11:02:36<13:29:50] +[titan] 2025-10-05 09:36:56,870 - root - INFO - step: 18005 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9388 +[titan] 2025-10-05 09:36:56,871 - root - INFO - lr: 3.1397e-05 gnorm: 1.04 [11:02:47<13:29:39] +[titan] 2025-10-05 09:37:07,717 - root - INFO - step: 18010 loss: 2.2185 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9597 +[titan] 2025-10-05 09:37:07,718 - root - INFO - lr: 3.1389e-05 gnorm: 1.06 [11:02:57<13:29:28] +[titan] 2025-10-05 09:37:18,576 - root - INFO - step: 18015 loss: 2.2301 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:37:18,576 - root - INFO - lr: 3.1380e-05 gnorm: 1.13 [11:03:08<13:29:17] +[titan] 2025-10-05 09:37:29,423 - root - INFO - step: 18020 loss: 2.2014 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9439 +[titan] 2025-10-05 09:37:29,423 - root - INFO - lr: 3.1371e-05 gnorm: 1.04 [11:03:19<13:29:05] +[titan] 2025-10-05 09:37:40,354 - root - INFO - step: 18025 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:37:40,354 - root - INFO - lr: 3.1362e-05 gnorm: 1.05 [11:03:30<13:28:54] +[titan] 2025-10-05 09:37:51,204 - root - INFO - step: 18030 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9219 +[titan] 2025-10-05 09:37:51,204 - root - INFO - lr: 3.1353e-05 gnorm: 1.05 [11:03:41<13:28:43] +[titan] 2025-10-05 09:38:02,089 - root - INFO - step: 18035 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:38:02,089 - root - INFO - lr: 3.1345e-05 gnorm: 1.09 [11:03:52<13:28:32] +[titan] 2025-10-05 09:38:12,956 - root - INFO - step: 18040 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:38:12,956 - root - INFO - lr: 3.1336e-05 gnorm: 1.11 [11:04:03<13:28:20] +[titan] 2025-10-05 09:38:23,803 - root - INFO - step: 18045 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 09:38:23,803 - root - INFO - lr: 3.1327e-05 gnorm: 1.04 [11:04:13<13:28:09] +[titan] 2025-10-05 09:38:32,481 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:38:34,663 - root - INFO - step: 18050 loss: 2.1705 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:38:34,663 - root - INFO - lr: 3.1318e-05 gnorm: 1.05 [11:04:24<13:27:58] +[titan] 2025-10-05 09:38:45,582 - root - INFO - step: 18055 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 09:38:45,582 - root - INFO - lr: 3.1309e-05 gnorm: 1.10 [11:04:35<13:27:47] +[titan] 2025-10-05 09:38:56,462 - root - INFO - step: 18060 loss: 2.1737 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 09:38:56,462 - root - INFO - lr: 3.1301e-05 gnorm: 1.10 [11:04:46<13:27:35] +[titan] 2025-10-05 09:39:07,387 - root - INFO - step: 18065 loss: 2.2727 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0073 +[titan] 2025-10-05 09:39:07,388 - root - INFO - lr: 3.1292e-05 gnorm: 1.11 [11:04:57<13:27:24] +[titan] 2025-10-05 09:39:18,267 - root - INFO - step: 18070 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9196 +[titan] 2025-10-05 09:39:18,267 - root - INFO - lr: 3.1283e-05 gnorm: 1.03 [11:05:08<13:27:13] +[titan] 2025-10-05 09:39:29,177 - root - INFO - step: 18075 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 09:39:29,177 - root - INFO - lr: 3.1274e-05 gnorm: 1.09 [11:05:19<13:27:02] +[titan] 2025-10-05 09:39:40,351 - root - INFO - step: 18080 loss: 2.1525 memory: 118.84GiB(85.28%) tps: 29,326 tflops: 406.85 mfu: 41.14% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 09:39:40,352 - root - INFO - lr: 3.1265e-05 gnorm: 1.07 [11:05:30<13:26:51] +[titan] 2025-10-05 09:39:51,220 - root - INFO - step: 18085 loss: 2.1539 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 09:39:51,221 - root - INFO - lr: 3.1257e-05 gnorm: 1.06 [11:05:41<13:26:40] +[titan] 2025-10-05 09:40:02,072 - root - INFO - step: 18090 loss: 2.1462 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8962 +[titan] 2025-10-05 09:40:02,072 - root - INFO - lr: 3.1248e-05 gnorm: 1.09 [11:05:52<13:26:28] +[titan] 2025-10-05 09:40:12,918 - root - INFO - step: 18095 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:40:12,918 - root - INFO - lr: 3.1239e-05 gnorm: 1.10 [11:06:03<13:26:17] +[titan] 2025-10-05 09:40:21,622 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:40:23,799 - root - INFO - step: 18100 loss: 2.2201 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:40:23,799 - root - INFO - lr: 3.1230e-05 gnorm: 1.06 [11:06:13<13:26:06] +[titan] 2025-10-05 09:40:34,657 - root - INFO - step: 18105 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9333 +[titan] 2025-10-05 09:40:34,657 - root - INFO - lr: 3.1221e-05 gnorm: 1.02 [11:06:24<13:25:54] +[titan] 2025-10-05 09:40:45,595 - root - INFO - step: 18110 loss: 2.2690 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0044 +[titan] 2025-10-05 09:40:45,595 - root - INFO - lr: 3.1213e-05 gnorm: 1.11 [11:06:35<13:25:43] +[titan] 2025-10-05 09:40:56,456 - root - INFO - step: 18115 loss: 2.1375 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 09:40:56,457 - root - INFO - lr: 3.1204e-05 gnorm: 1.07 [11:06:46<13:25:32] +[titan] 2025-10-05 09:41:07,318 - root - INFO - step: 18120 loss: 2.2233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9634 +[titan] 2025-10-05 09:41:07,318 - root - INFO - lr: 3.1195e-05 gnorm: 1.08 [11:06:57<13:25:21] +[titan] 2025-10-05 09:41:18,178 - root - INFO - step: 18125 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9584 +[titan] 2025-10-05 09:41:18,178 - root - INFO - lr: 3.1186e-05 gnorm: 1.07 [11:07:08<13:25:10] +[titan] 2025-10-05 09:41:29,063 - root - INFO - step: 18130 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9142 +[titan] 2025-10-05 09:41:29,063 - root - INFO - lr: 3.1177e-05 gnorm: 1.07 [11:07:19<13:24:58] +[titan] 2025-10-05 09:41:39,951 - root - INFO - step: 18135 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9334 +[titan] 2025-10-05 09:41:39,951 - root - INFO - lr: 3.1169e-05 gnorm: 1.06 [11:07:30<13:24:47] +[titan] 2025-10-05 09:41:50,877 - root - INFO - step: 18140 loss: 2.2241 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 09:41:50,877 - root - INFO - lr: 3.1160e-05 gnorm: 1.13 [11:07:41<13:24:36] +[titan] 2025-10-05 09:42:01,736 - root - INFO - step: 18145 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 09:42:01,736 - root - INFO - lr: 3.1151e-05 gnorm: 1.11 [11:07:51<13:24:25] +[titan] 2025-10-05 09:42:10,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:42:12,607 - root - INFO - step: 18150 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8818 +[titan] 2025-10-05 09:42:12,607 - root - INFO - lr: 3.1142e-05 gnorm: 1.06 [11:08:02<13:24:13] +[titan] 2025-10-05 09:42:23,477 - root - INFO - step: 18155 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9019 +[titan] 2025-10-05 09:42:23,477 - root - INFO - lr: 3.1133e-05 gnorm: 1.02 [11:08:13<13:24:02] +[titan] 2025-10-05 09:42:34,354 - root - INFO - step: 18160 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:42:34,354 - root - INFO - lr: 3.1125e-05 gnorm: 1.06 [11:08:24<13:23:51] +[titan] 2025-10-05 09:42:45,261 - root - INFO - step: 18165 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 09:42:45,261 - root - INFO - lr: 3.1116e-05 gnorm: 1.07 [11:08:35<13:23:40] +[titan] 2025-10-05 09:42:56,138 - root - INFO - step: 18170 loss: 2.1630 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9103 +[titan] 2025-10-05 09:42:56,138 - root - INFO - lr: 3.1107e-05 gnorm: 1.06 [11:08:46<13:23:28] +[titan] 2025-10-05 09:43:06,985 - root - INFO - step: 18175 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 09:43:06,985 - root - INFO - lr: 3.1098e-05 gnorm: 1.05 [11:08:57<13:23:17] +[titan] 2025-10-05 09:43:17,840 - root - INFO - step: 18180 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 09:43:17,840 - root - INFO - lr: 3.1089e-05 gnorm: 1.06 [11:09:07<13:23:06] +[titan] 2025-10-05 09:43:28,708 - root - INFO - step: 18185 loss: 2.2232 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9641 +[titan] 2025-10-05 09:43:28,708 - root - INFO - lr: 3.1080e-05 gnorm: 1.07 [11:09:18<13:22:55] +[titan] 2025-10-05 09:43:39,585 - root - INFO - step: 18190 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:43:39,586 - root - INFO - lr: 3.1072e-05 gnorm: 1.04 [11:09:29<13:22:43] +[titan] 2025-10-05 09:43:50,582 - root - INFO - step: 18195 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.41 mfu: 41.80% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 09:43:50,582 - root - INFO - lr: 3.1063e-05 gnorm: 1.10 [11:09:40<13:22:32] +[titan] 2025-10-05 09:43:59,256 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:44:01,443 - root - INFO - step: 18200 loss: 2.1663 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9138 +[titan] 2025-10-05 09:44:01,443 - root - INFO - lr: 3.1054e-05 gnorm: 1.04 [11:09:51<13:22:21] +[titan] 2025-10-05 09:44:12,364 - root - INFO - step: 18205 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 09:44:12,364 - root - INFO - lr: 3.1045e-05 gnorm: 1.07 [11:10:02<13:22:10] +[titan] 2025-10-05 09:44:23,235 - root - INFO - step: 18210 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 09:44:23,235 - root - INFO - lr: 3.1036e-05 gnorm: 1.04 [11:10:13<13:21:59] +[titan] 2025-10-05 09:44:34,114 - root - INFO - step: 18215 loss: 2.1970 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 09:44:34,114 - root - INFO - lr: 3.1028e-05 gnorm: 1.08 [11:10:24<13:21:47] +[titan] 2025-10-05 09:44:45,034 - root - INFO - step: 18220 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:44:45,034 - root - INFO - lr: 3.1019e-05 gnorm: 1.02 [11:10:35<13:21:36] +[titan] 2025-10-05 09:44:55,926 - root - INFO - step: 18225 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 09:44:55,926 - root - INFO - lr: 3.1010e-05 gnorm: 1.06 [11:10:46<13:21:25] +[titan] 2025-10-05 09:45:06,773 - root - INFO - step: 18230 loss: 2.2584 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 09:45:06,773 - root - INFO - lr: 3.1001e-05 gnorm: 1.08 [11:10:56<13:21:14] +[titan] 2025-10-05 09:45:17,639 - root - INFO - step: 18235 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:45:17,639 - root - INFO - lr: 3.0992e-05 gnorm: 1.10 [11:11:07<13:21:02] +[titan] 2025-10-05 09:45:28,516 - root - INFO - step: 18240 loss: 2.1421 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 09:45:28,516 - root - INFO - lr: 3.0984e-05 gnorm: 1.05 [11:11:18<13:20:51] +[titan] 2025-10-05 09:45:39,379 - root - INFO - step: 18245 loss: 2.1122 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8654 +[titan] 2025-10-05 09:45:39,379 - root - INFO - lr: 3.0975e-05 gnorm: 1.06 [11:11:29<13:20:40] +[titan] 2025-10-05 09:45:48,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:45:50,324 - root - INFO - step: 18250 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9443 +[titan] 2025-10-05 09:45:50,324 - root - INFO - lr: 3.0966e-05 gnorm: 1.03 [11:11:40<13:20:29] +[titan] 2025-10-05 09:46:01,186 - root - INFO - step: 18255 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9065 +[titan] 2025-10-05 09:46:01,186 - root - INFO - lr: 3.0957e-05 gnorm: 1.04 [11:11:51<13:20:18] +[titan] 2025-10-05 09:46:12,076 - root - INFO - step: 18260 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:46:12,077 - root - INFO - lr: 3.0948e-05 gnorm: 1.05 [11:12:02<13:20:06] +[titan] 2025-10-05 09:46:22,981 - root - INFO - step: 18265 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 09:46:22,981 - root - INFO - lr: 3.0939e-05 gnorm: 1.12 [11:12:13<13:19:55] +[titan] 2025-10-05 09:46:33,839 - root - INFO - step: 18270 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:46:33,840 - root - INFO - lr: 3.0931e-05 gnorm: 1.05 [11:12:23<13:19:44] +[titan] 2025-10-05 09:46:44,752 - root - INFO - step: 18275 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 09:46:44,752 - root - INFO - lr: 3.0922e-05 gnorm: 1.10 [11:12:34<13:19:33] +[titan] 2025-10-05 09:46:55,639 - root - INFO - step: 18280 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 09:46:55,639 - root - INFO - lr: 3.0913e-05 gnorm: 1.08 [11:12:45<13:19:21] +[titan] 2025-10-05 09:47:06,489 - root - INFO - step: 18285 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 09:47:06,489 - root - INFO - lr: 3.0904e-05 gnorm: 1.06 [11:12:56<13:19:10] +[titan] 2025-10-05 09:47:17,381 - root - INFO - step: 18290 loss: 2.2060 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:47:17,381 - root - INFO - lr: 3.0895e-05 gnorm: 1.12 [11:13:07<13:18:59] +[titan] 2025-10-05 09:47:28,261 - root - INFO - step: 18295 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9274 +[titan] 2025-10-05 09:47:28,261 - root - INFO - lr: 3.0887e-05 gnorm: 1.07 [11:13:18<13:18:48] +[titan] 2025-10-05 09:47:36,941 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:47:39,132 - root - INFO - step: 18300 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 09:47:39,132 - root - INFO - lr: 3.0878e-05 gnorm: 1.07 [11:13:29<13:18:36] +[titan] 2025-10-05 09:47:50,070 - root - INFO - step: 18305 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:47:50,070 - root - INFO - lr: 3.0869e-05 gnorm: 1.04 [11:13:40<13:18:25] +[titan] 2025-10-05 09:48:00,953 - root - INFO - step: 18310 loss: 2.2365 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 09:48:00,953 - root - INFO - lr: 3.0860e-05 gnorm: 1.10 [11:13:51<13:18:14] +[titan] 2025-10-05 09:48:11,821 - root - INFO - step: 18315 loss: 2.1228 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 09:48:11,821 - root - INFO - lr: 3.0851e-05 gnorm: 1.10 [11:14:01<13:18:03] +[titan] 2025-10-05 09:48:22,702 - root - INFO - step: 18320 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:22,702 - root - INFO - lr: 3.0842e-05 gnorm: 1.10 [11:14:12<13:17:52] +[titan] 2025-10-05 09:48:33,562 - root - INFO - step: 18325 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 09:48:33,562 - root - INFO - lr: 3.0834e-05 gnorm: 1.06 [11:14:23<13:17:40] +[titan] 2025-10-05 09:48:44,442 - root - INFO - step: 18330 loss: 2.1384 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:48:44,442 - root - INFO - lr: 3.0825e-05 gnorm: 1.07 [11:14:34<13:17:29] +[titan] 2025-10-05 09:48:55,377 - root - INFO - step: 18335 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:55,377 - root - INFO - lr: 3.0816e-05 gnorm: 1.05 [11:14:45<13:17:18] +[titan] 2025-10-05 09:49:06,255 - root - INFO - step: 18340 loss: 2.1540 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:49:06,255 - root - INFO - lr: 3.0807e-05 gnorm: 1.02 [11:14:56<13:17:07] +[titan] 2025-10-05 09:49:17,139 - root - INFO - step: 18345 loss: 2.1319 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8845 +[titan] 2025-10-05 09:49:17,139 - root - INFO - lr: 3.0798e-05 gnorm: 1.06 [11:15:07<13:16:56] +[titan] 2025-10-05 09:49:25,826 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:49:28,007 - root - INFO - step: 18350 loss: 2.2255 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9664 +[titan] 2025-10-05 09:49:28,007 - root - INFO - lr: 3.0789e-05 gnorm: 1.07 [11:15:18<13:16:44] +[titan] 2025-10-05 09:49:38,916 - root - INFO - step: 18355 loss: 2.1700 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9168 +[titan] 2025-10-05 09:49:38,916 - root - INFO - lr: 3.0781e-05 gnorm: 1.03 [11:15:29<13:16:33] +[titan] 2025-10-05 09:49:49,833 - root - INFO - step: 18360 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9456 +[titan] 2025-10-05 09:49:49,834 - root - INFO - lr: 3.0772e-05 gnorm: 1.12 [11:15:39<13:16:22] +[titan] 2025-10-05 09:50:00,730 - root - INFO - step: 18365 loss: 2.2105 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 09:50:00,730 - root - INFO - lr: 3.0763e-05 gnorm: 1.05 [11:15:50<13:16:11] +[titan] 2025-10-05 09:50:11,596 - root - INFO - step: 18370 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9432 +[titan] 2025-10-05 09:50:11,596 - root - INFO - lr: 3.0754e-05 gnorm: 1.09 [11:16:01<13:15:59] +[titan] 2025-10-05 09:50:22,481 - root - INFO - step: 18375 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8909 +[titan] 2025-10-05 09:50:22,481 - root - INFO - lr: 3.0745e-05 gnorm: 1.07 [11:16:12<13:15:48] +[titan] 2025-10-05 09:50:33,379 - root - INFO - step: 18380 loss: 2.1743 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 09:50:33,379 - root - INFO - lr: 3.0736e-05 gnorm: 1.05 [11:16:23<13:15:37] +[titan] 2025-10-05 09:50:44,278 - root - INFO - step: 18385 loss: 2.2455 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9831 +[titan] 2025-10-05 09:50:44,278 - root - INFO - lr: 3.0728e-05 gnorm: 1.08 [11:16:34<13:15:26] +[titan] 2025-10-05 09:50:55,194 - root - INFO - step: 18390 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 09:50:55,194 - root - INFO - lr: 3.0719e-05 gnorm: 1.04 [11:16:45<13:15:15] +[titan] 2025-10-05 09:51:06,079 - root - INFO - step: 18395 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9155 +[titan] 2025-10-05 09:51:06,079 - root - INFO - lr: 3.0710e-05 gnorm: 1.07 [11:16:56<13:15:03] +[titan] 2025-10-05 09:51:14,780 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:51:16,960 - root - INFO - step: 18400 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:51:16,961 - root - INFO - lr: 3.0701e-05 gnorm: 1.03 [11:17:07<13:14:52] +[titan] 2025-10-05 09:51:27,817 - root - INFO - step: 18405 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 09:51:27,817 - root - INFO - lr: 3.0692e-05 gnorm: 1.07 [11:17:17<13:14:41] +[titan] 2025-10-05 09:51:38,681 - root - INFO - step: 18410 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 09:51:38,682 - root - INFO - lr: 3.0683e-05 gnorm: 1.06 [11:17:28<13:14:30] +[titan] 2025-10-05 09:51:49,581 - root - INFO - step: 18415 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8667 +[titan] 2025-10-05 09:51:49,582 - root - INFO - lr: 3.0675e-05 gnorm: 1.10 [11:17:39<13:14:18] +[titan] 2025-10-05 09:52:00,490 - root - INFO - step: 18420 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:52:00,490 - root - INFO - lr: 3.0666e-05 gnorm: 1.06 [11:17:50<13:14:07] +[titan] 2025-10-05 09:52:11,358 - root - INFO - step: 18425 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9183 +[titan] 2025-10-05 09:52:11,358 - root - INFO - lr: 3.0657e-05 gnorm: 1.09 [11:18:01<13:13:56] +[titan] 2025-10-05 09:52:22,299 - root - INFO - step: 18430 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 09:52:22,299 - root - INFO - lr: 3.0648e-05 gnorm: 1.10 [11:18:12<13:13:45] +[titan] 2025-10-05 09:52:26,818 - root - INFO - Dumping profiler traces at step 18432 +[titan] 2025-10-05 09:52:26,856 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:52:33,371 - root - INFO - step: 18435 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 29,597 tflops: 410.61 mfu: 41.52% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 09:52:33,371 - root - INFO - lr: 3.0639e-05 gnorm: 1.09 [11:18:23<13:13:34] +[titan] 2025-10-05 09:52:44,226 - root - INFO - step: 18440 loss: 2.1224 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8757 +[titan] 2025-10-05 09:52:44,226 - root - INFO - lr: 3.0630e-05 gnorm: 1.05 [11:18:34<13:13:23] +[titan] 2025-10-05 09:52:55,086 - root - INFO - step: 18445 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:52:55,086 - root - INFO - lr: 3.0622e-05 gnorm: 1.16 [11:18:45<13:13:11] +[titan] 2025-10-05 09:53:03,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:53:05,965 - root - INFO - step: 18450 loss: 2.1736 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:53:05,965 - root - INFO - lr: 3.0613e-05 gnorm: 1.04 [11:18:56<13:13:00] +[titan] 2025-10-05 09:53:16,799 - root - INFO - step: 18455 loss: 2.2016 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9454 +[titan] 2025-10-05 09:53:16,799 - root - INFO - lr: 3.0604e-05 gnorm: 1.08 [11:19:06<13:12:49] +[titan] 2025-10-05 09:53:27,669 - root - INFO - step: 18460 loss: 2.1859 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9308 +[titan] 2025-10-05 09:53:27,669 - root - INFO - lr: 3.0595e-05 gnorm: 1.09 [11:19:17<13:12:38] +[titan] 2025-10-05 09:53:38,507 - root - INFO - step: 18465 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:53:38,508 - root - INFO - lr: 3.0586e-05 gnorm: 1.11 [11:19:28<13:12:26] +[titan] 2025-10-05 09:53:49,352 - root - INFO - step: 18470 loss: 2.2070 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:53:49,352 - root - INFO - lr: 3.0577e-05 gnorm: 1.15 [11:19:39<13:12:15] +[titan] 2025-10-05 09:54:00,241 - root - INFO - step: 18475 loss: 2.2443 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:54:00,242 - root - INFO - lr: 3.0569e-05 gnorm: 1.07 [11:19:50<13:12:04] +[titan] 2025-10-05 09:54:11,123 - root - INFO - step: 18480 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 09:54:11,123 - root - INFO - lr: 3.0560e-05 gnorm: 1.02 [11:20:01<13:11:53] +[titan] 2025-10-05 09:54:21,973 - root - INFO - step: 18485 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:54:21,973 - root - INFO - lr: 3.0551e-05 gnorm: 3.61 [11:20:12<13:11:41] +[titan] 2025-10-05 09:54:32,841 - root - INFO - step: 18490 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:54:32,841 - root - INFO - lr: 3.0542e-05 gnorm: 1.08 [11:20:22<13:11:30] +[titan] 2025-10-05 09:54:43,710 - root - INFO - step: 18495 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:54:43,710 - root - INFO - lr: 3.0533e-05 gnorm: 1.03 [11:20:33<13:11:19] +[titan] 2025-10-05 09:54:52,412 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:54:54,598 - root - INFO - step: 18500 loss: 2.1801 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:54:54,598 - root - INFO - lr: 3.0524e-05 gnorm: 1.07 [11:20:44<13:11:08] +[titan] 2025-10-05 09:55:05,447 - root - INFO - step: 18505 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 09:55:05,447 - root - INFO - lr: 3.0515e-05 gnorm: 1.05 [11:20:55<13:10:56] +[titan] 2025-10-05 09:55:16,304 - root - INFO - step: 18510 loss: 2.2328 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9729 +[titan] 2025-10-05 09:55:16,304 - root - INFO - lr: 3.0507e-05 gnorm: 1.12 [11:21:06<13:10:45] +[titan] 2025-10-05 09:55:27,210 - root - INFO - step: 18515 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 09:55:27,210 - root - INFO - lr: 3.0498e-05 gnorm: 1.04 [11:21:17<13:10:34] +[titan] 2025-10-05 09:55:38,070 - root - INFO - step: 18520 loss: 2.1990 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:55:38,071 - root - INFO - lr: 3.0489e-05 gnorm: 1.06 [11:21:28<13:10:23] +[titan] 2025-10-05 09:55:48,946 - root - INFO - step: 18525 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8803 +[titan] 2025-10-05 09:55:48,946 - root - INFO - lr: 3.0480e-05 gnorm: 1.10 [11:21:39<13:10:11] +[titan] 2025-10-05 09:55:59,819 - root - INFO - step: 18530 loss: 2.1569 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:55:59,819 - root - INFO - lr: 3.0471e-05 gnorm: 1.09 [11:21:49<13:10:00] +[titan] 2025-10-05 09:56:10,667 - root - INFO - step: 18535 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 09:56:10,668 - root - INFO - lr: 3.0462e-05 gnorm: 1.08 [11:22:00<13:09:49] +[titan] 2025-10-05 09:56:21,514 - root - INFO - step: 18540 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8789 +[titan] 2025-10-05 09:56:21,515 - root - INFO - lr: 3.0454e-05 gnorm: 1.06 [11:22:11<13:09:38] +[titan] 2025-10-05 09:56:32,416 - root - INFO - step: 18545 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 09:56:32,416 - root - INFO - lr: 3.0445e-05 gnorm: 1.09 [11:22:22<13:09:27] +[titan] 2025-10-05 09:56:41,121 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:56:43,301 - root - INFO - step: 18550 loss: 2.2123 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9545 +[titan] 2025-10-05 09:56:43,301 - root - INFO - lr: 3.0436e-05 gnorm: 1.11 [11:22:33<13:09:15] +[titan] 2025-10-05 09:56:54,209 - root - INFO - step: 18555 loss: 2.1250 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8774 +[titan] 2025-10-05 09:56:54,209 - root - INFO - lr: 3.0427e-05 gnorm: 1.05 [11:22:44<13:09:04] +[titan] 2025-10-05 09:57:05,059 - root - INFO - step: 18560 loss: 2.1067 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 09:57:05,060 - root - INFO - lr: 3.0418e-05 gnorm: 1.11 [11:22:55<13:08:53] +[titan] 2025-10-05 09:57:15,909 - root - INFO - step: 18565 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8970 +[titan] 2025-10-05 09:57:15,909 - root - INFO - lr: 3.0409e-05 gnorm: 1.06 [11:23:06<13:08:42] +[titan] 2025-10-05 09:57:26,796 - root - INFO - step: 18570 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:57:26,796 - root - INFO - lr: 3.0400e-05 gnorm: 1.05 [11:23:16<13:08:30] +[titan] 2025-10-05 09:57:37,659 - root - INFO - step: 18575 loss: 2.1669 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:57:37,659 - root - INFO - lr: 3.0392e-05 gnorm: 1.07 [11:23:27<13:08:19] +[titan] 2025-10-05 09:57:48,558 - root - INFO - step: 18580 loss: 2.1694 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:57:48,559 - root - INFO - lr: 3.0383e-05 gnorm: 1.08 [11:23:38<13:08:08] +[titan] 2025-10-05 09:57:59,464 - root - INFO - step: 18585 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 09:57:59,464 - root - INFO - lr: 3.0374e-05 gnorm: 1.08 [11:23:49<13:07:57] +[titan] 2025-10-05 09:58:10,319 - root - INFO - step: 18590 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 09:58:10,319 - root - INFO - lr: 3.0365e-05 gnorm: 1.09 [11:24:00<13:07:46] +[titan] 2025-10-05 09:58:21,169 - root - INFO - step: 18595 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 09:58:21,169 - root - INFO - lr: 3.0356e-05 gnorm: 1.06 [11:24:11<13:07:34] +[titan] 2025-10-05 09:58:29,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:58:32,051 - root - INFO - step: 18600 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:58:32,051 - root - INFO - lr: 3.0347e-05 gnorm: 1.06 [11:24:22<13:07:23] +[titan] 2025-10-05 09:58:42,941 - root - INFO - step: 18605 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 09:58:42,941 - root - INFO - lr: 3.0339e-05 gnorm: 1.07 [11:24:33<13:07:12] +[titan] 2025-10-05 09:58:53,849 - root - INFO - step: 18610 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 09:58:53,849 - root - INFO - lr: 3.0330e-05 gnorm: 1.13 [11:24:43<13:07:01] +[titan] 2025-10-05 09:59:04,767 - root - INFO - step: 18615 loss: 2.1618 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9098 +[titan] 2025-10-05 09:59:04,767 - root - INFO - lr: 3.0321e-05 gnorm: 1.07 [11:24:54<13:06:49] +[titan] 2025-10-05 09:59:15,655 - root - INFO - step: 18620 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:59:15,655 - root - INFO - lr: 3.0312e-05 gnorm: 1.08 [11:25:05<13:06:38] +[titan] 2025-10-05 09:59:26,502 - root - INFO - step: 18625 loss: 2.1982 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 09:59:26,502 - root - INFO - lr: 3.0303e-05 gnorm: 1.06 [11:25:16<13:06:27] +[titan] 2025-10-05 09:59:37,342 - root - INFO - step: 18630 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:59:37,342 - root - INFO - lr: 3.0294e-05 gnorm: 1.02 [11:25:27<13:06:16] +[titan] 2025-10-05 09:59:48,204 - root - INFO - step: 18635 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:59:48,205 - root - INFO - lr: 3.0285e-05 gnorm: 1.06 [11:25:38<13:06:05] +[titan] 2025-10-05 09:59:59,141 - root - INFO - step: 18640 loss: 2.1586 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:59:59,142 - root - INFO - lr: 3.0277e-05 gnorm: 1.09 [11:25:49<13:05:53] +[titan] 2025-10-05 10:00:09,999 - root - INFO - step: 18645 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:00:09,999 - root - INFO - lr: 3.0268e-05 gnorm: 1.04 [11:26:00<13:05:42] +[titan] 2025-10-05 10:00:18,686 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:00:20,868 - root - INFO - step: 18650 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 10:00:20,868 - root - INFO - lr: 3.0259e-05 gnorm: 1.06 [11:26:10<13:05:31] +[titan] 2025-10-05 10:00:31,751 - root - INFO - step: 18655 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8955 +[titan] 2025-10-05 10:00:31,751 - root - INFO - lr: 3.0250e-05 gnorm: 1.10 [11:26:21<13:05:20] +[titan] 2025-10-05 10:00:42,624 - root - INFO - step: 18660 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9236 +[titan] 2025-10-05 10:00:42,624 - root - INFO - lr: 3.0241e-05 gnorm: 1.06 [11:26:32<13:05:08] +[titan] 2025-10-05 10:00:53,508 - root - INFO - step: 18665 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 10:00:53,508 - root - INFO - lr: 3.0232e-05 gnorm: 1.07 [11:26:43<13:04:57] +[titan] 2025-10-05 10:01:04,417 - root - INFO - step: 18670 loss: 2.1073 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8611 +[titan] 2025-10-05 10:01:04,417 - root - INFO - lr: 3.0223e-05 gnorm: 1.05 [11:26:54<13:04:46] +[titan] 2025-10-05 10:01:15,322 - root - INFO - step: 18675 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9018 +[titan] 2025-10-05 10:01:15,322 - root - INFO - lr: 3.0215e-05 gnorm: 1.01 [11:27:05<13:04:35] +[titan] 2025-10-05 10:01:26,187 - root - INFO - step: 18680 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 10:01:26,188 - root - INFO - lr: 3.0206e-05 gnorm: 1.06 [11:27:16<13:04:24] +[titan] 2025-10-05 10:01:37,075 - root - INFO - step: 18685 loss: 2.2297 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9695 +[titan] 2025-10-05 10:01:37,076 - root - INFO - lr: 3.0197e-05 gnorm: 1.09 [11:27:27<13:04:12] +[titan] 2025-10-05 10:01:47,947 - root - INFO - step: 18690 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 10:01:47,947 - root - INFO - lr: 3.0188e-05 gnorm: 1.13 [11:27:38<13:04:01] +[titan] 2025-10-05 10:01:58,853 - root - INFO - step: 18695 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 10:01:58,853 - root - INFO - lr: 3.0179e-05 gnorm: 1.11 [11:27:48<13:03:50] +[titan] 2025-10-05 10:02:07,534 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:02:09,720 - root - INFO - step: 18700 loss: 2.1760 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9218 +[titan] 2025-10-05 10:02:09,720 - root - INFO - lr: 3.0170e-05 gnorm: 1.05 [11:27:59<13:03:39] +[titan] 2025-10-05 10:02:20,646 - root - INFO - step: 18705 loss: 2.1878 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:02:20,646 - root - INFO - lr: 3.0161e-05 gnorm: 1.13 [11:28:10<13:03:28] +[titan] 2025-10-05 10:02:31,519 - root - INFO - step: 18710 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9250 +[titan] 2025-10-05 10:02:31,519 - root - INFO - lr: 3.0153e-05 gnorm: 1.03 [11:28:21<13:03:16] +[titan] 2025-10-05 10:02:42,408 - root - INFO - step: 18715 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 10:02:42,408 - root - INFO - lr: 3.0144e-05 gnorm: 1.14 [11:28:32<13:03:05] +[titan] 2025-10-05 10:02:53,291 - root - INFO - step: 18720 loss: 2.1198 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 10:02:53,292 - root - INFO - lr: 3.0135e-05 gnorm: 1.04 [11:28:43<13:02:54] +[titan] 2025-10-05 10:03:04,164 - root - INFO - step: 18725 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 10:03:04,164 - root - INFO - lr: 3.0126e-05 gnorm: 1.09 [11:28:54<13:02:43] +[titan] 2025-10-05 10:03:15,026 - root - INFO - step: 18730 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 10:03:15,026 - root - INFO - lr: 3.0117e-05 gnorm: 1.09 [11:29:05<13:02:31] +[titan] 2025-10-05 10:03:25,889 - root - INFO - step: 18735 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:03:25,889 - root - INFO - lr: 3.0108e-05 gnorm: 1.09 [11:29:15<13:02:20] +[titan] 2025-10-05 10:03:36,788 - root - INFO - step: 18740 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 10:03:36,789 - root - INFO - lr: 3.0099e-05 gnorm: 1.05 [11:29:26<13:02:09] +[titan] 2025-10-05 10:03:47,682 - root - INFO - step: 18745 loss: 2.1174 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:03:47,683 - root - INFO - lr: 3.0090e-05 gnorm: 1.02 [11:29:37<13:01:58] +[titan] 2025-10-05 10:03:56,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:03:58,557 - root - INFO - step: 18750 loss: 2.1769 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 10:03:58,557 - root - INFO - lr: 3.0082e-05 gnorm: 1.06 [11:29:48<13:01:47] +[titan] 2025-10-05 10:04:09,464 - root - INFO - step: 18755 loss: 2.1852 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9314 +[titan] 2025-10-05 10:04:09,464 - root - INFO - lr: 3.0073e-05 gnorm: 1.08 [11:29:59<13:01:35] +[titan] 2025-10-05 10:04:20,357 - root - INFO - step: 18760 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:04:20,357 - root - INFO - lr: 3.0064e-05 gnorm: 1.07 [11:30:10<13:01:24] +[titan] 2025-10-05 10:04:31,231 - root - INFO - step: 18765 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 10:04:31,232 - root - INFO - lr: 3.0055e-05 gnorm: 1.11 [11:30:21<13:01:13] +[titan] 2025-10-05 10:04:42,135 - root - INFO - step: 18770 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 10:04:42,136 - root - INFO - lr: 3.0046e-05 gnorm: 1.10 [11:30:32<13:01:02] +[titan] 2025-10-05 10:04:53,006 - root - INFO - step: 18775 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 10:04:53,006 - root - INFO - lr: 3.0037e-05 gnorm: 1.03 [11:30:43<13:00:51] +[titan] 2025-10-05 10:05:03,904 - root - INFO - step: 18780 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 10:05:03,904 - root - INFO - lr: 3.0028e-05 gnorm: 1.07 [11:30:53<13:00:39] +[titan] 2025-10-05 10:05:14,747 - root - INFO - step: 18785 loss: 2.1812 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 10:05:14,748 - root - INFO - lr: 3.0020e-05 gnorm: 1.09 [11:31:04<13:00:28] +[titan] 2025-10-05 10:05:25,598 - root - INFO - step: 18790 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 10:05:25,598 - root - INFO - lr: 3.0011e-05 gnorm: 1.06 [11:31:15<13:00:17] +[titan] 2025-10-05 10:05:36,466 - root - INFO - step: 18795 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 10:05:36,466 - root - INFO - lr: 3.0002e-05 gnorm: 1.04 [11:31:26<13:00:06] +[titan] 2025-10-05 10:05:45,130 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:05:47,339 - root - INFO - step: 18800 loss: 2.2290 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 10:05:47,340 - root - INFO - lr: 2.9993e-05 gnorm: 1.07 [11:31:37<12:59:54] +[titan] 2025-10-05 10:05:58,210 - root - INFO - step: 18805 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 10:05:58,210 - root - INFO - lr: 2.9984e-05 gnorm: 1.07 [11:31:48<12:59:43] +[titan] 2025-10-05 10:06:09,120 - root - INFO - step: 18810 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 10:06:09,121 - root - INFO - lr: 2.9975e-05 gnorm: 1.04 [11:31:59<12:59:32] +[titan] 2025-10-05 10:06:19,986 - root - INFO - step: 18815 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 10:06:19,986 - root - INFO - lr: 2.9966e-05 gnorm: 1.08 [11:32:10<12:59:21] +[titan] 2025-10-05 10:06:30,847 - root - INFO - step: 18820 loss: 2.1851 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 10:06:30,847 - root - INFO - lr: 2.9957e-05 gnorm: 1.05 [11:32:20<12:59:10] +[titan] 2025-10-05 10:06:41,727 - root - INFO - step: 18825 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:06:41,727 - root - INFO - lr: 2.9949e-05 gnorm: 1.07 [11:32:31<12:58:58] +[titan] 2025-10-05 10:06:52,600 - root - INFO - step: 18830 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 10:06:52,601 - root - INFO - lr: 2.9940e-05 gnorm: 1.10 [11:32:42<12:58:47] +[titan] 2025-10-05 10:07:03,527 - root - INFO - step: 18835 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9505 +[titan] 2025-10-05 10:07:03,527 - root - INFO - lr: 2.9931e-05 gnorm: 1.07 [11:32:53<12:58:36] +[titan] 2025-10-05 10:07:14,367 - root - INFO - step: 18840 loss: 2.2003 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 10:07:14,367 - root - INFO - lr: 2.9922e-05 gnorm: 1.08 [11:33:04<12:58:25] +[titan] 2025-10-05 10:07:25,248 - root - INFO - step: 18845 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 10:07:25,248 - root - INFO - lr: 2.9913e-05 gnorm: 1.08 [11:33:15<12:58:14] +[titan] 2025-10-05 10:07:33,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:07:36,110 - root - INFO - step: 18850 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 10:07:36,110 - root - INFO - lr: 2.9904e-05 gnorm: 1.09 [11:33:26<12:58:02] +[titan] 2025-10-05 10:07:46,979 - root - INFO - step: 18855 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:07:46,979 - root - INFO - lr: 2.9895e-05 gnorm: 1.07 [11:33:37<12:57:51] +[titan] 2025-10-05 10:07:57,853 - root - INFO - step: 18860 loss: 2.1443 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:07:57,853 - root - INFO - lr: 2.9886e-05 gnorm: 1.06 [11:33:47<12:57:40] +[titan] 2025-10-05 10:08:08,767 - root - INFO - step: 18865 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 10:08:08,767 - root - INFO - lr: 2.9878e-05 gnorm: 1.05 [11:33:58<12:57:29] +[titan] 2025-10-05 10:08:19,643 - root - INFO - step: 18870 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:08:19,643 - root - INFO - lr: 2.9869e-05 gnorm: 1.09 [11:34:09<12:57:17] +[titan] 2025-10-05 10:08:30,505 - root - INFO - step: 18875 loss: 2.1432 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:08:30,505 - root - INFO - lr: 2.9860e-05 gnorm: 1.11 [11:34:20<12:57:06] +[titan] 2025-10-05 10:08:41,373 - root - INFO - step: 18880 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8639 +[titan] 2025-10-05 10:08:41,373 - root - INFO - lr: 2.9851e-05 gnorm: 1.04 [11:34:31<12:56:55] +[titan] 2025-10-05 10:08:52,227 - root - INFO - step: 18885 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:08:52,227 - root - INFO - lr: 2.9842e-05 gnorm: 1.09 [11:34:42<12:56:44] +[titan] 2025-10-05 10:09:03,099 - root - INFO - step: 18890 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 10:09:03,099 - root - INFO - lr: 2.9833e-05 gnorm: 1.06 [11:34:53<12:56:33] +[titan] 2025-10-05 10:09:13,982 - root - INFO - step: 18895 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 10:09:13,982 - root - INFO - lr: 2.9824e-05 gnorm: 1.06 [11:35:04<12:56:21] +[titan] 2025-10-05 10:09:22,708 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:09:24,889 - root - INFO - step: 18900 loss: 2.2596 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 10:09:24,889 - root - INFO - lr: 2.9815e-05 gnorm: 1.09 [11:35:14<12:56:10] +[titan] 2025-10-05 10:09:35,723 - root - INFO - step: 18905 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9049 +[titan] 2025-10-05 10:09:35,723 - root - INFO - lr: 2.9807e-05 gnorm: 2.16 [11:35:25<12:55:59] +[titan] 2025-10-05 10:09:46,616 - root - INFO - step: 18910 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 10:09:46,616 - root - INFO - lr: 2.9798e-05 gnorm: 1.10 [11:35:36<12:55:48] +[titan] 2025-10-05 10:09:57,505 - root - INFO - step: 18915 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8990 +[titan] 2025-10-05 10:09:57,506 - root - INFO - lr: 2.9789e-05 gnorm: 1.06 [11:35:47<12:55:37] +[titan] 2025-10-05 10:10:08,408 - root - INFO - step: 18920 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 10:10:08,409 - root - INFO - lr: 2.9780e-05 gnorm: 1.11 [11:35:58<12:55:25] +[titan] 2025-10-05 10:10:19,290 - root - INFO - step: 18925 loss: 2.1401 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8910 +[titan] 2025-10-05 10:10:19,290 - root - INFO - lr: 2.9771e-05 gnorm: 1.09 [11:36:09<12:55:14] +[titan] 2025-10-05 10:10:30,188 - root - INFO - step: 18930 loss: 2.1578 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9075 +[titan] 2025-10-05 10:10:30,188 - root - INFO - lr: 2.9762e-05 gnorm: 1.08 [11:36:20<12:55:03] +[titan] 2025-10-05 10:10:41,057 - root - INFO - step: 18935 loss: 2.1455 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:10:41,057 - root - INFO - lr: 2.9753e-05 gnorm: 1.08 [11:36:31<12:54:52] +[titan] 2025-10-05 10:10:51,943 - root - INFO - step: 18940 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 10:10:51,943 - root - INFO - lr: 2.9744e-05 gnorm: 1.09 [11:36:42<12:54:41] +[titan] 2025-10-05 10:11:00,923 - root - INFO - Dumping profiler traces at step 18944 +[titan] 2025-10-05 10:11:00,961 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:11:03,165 - root - INFO - step: 18945 loss: 2.2146 memory: 118.84GiB(85.28%) tps: 29,200 tflops: 405.11 mfu: 40.96% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 10:11:03,166 - root - INFO - lr: 2.9736e-05 gnorm: 1.05 [11:36:53<12:54:30] +[titan] 2025-10-05 10:11:11,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:11:14,040 - root - INFO - step: 18950 loss: 2.2217 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9615 +[titan] 2025-10-05 10:11:14,040 - root - INFO - lr: 2.9727e-05 gnorm: 1.10 [11:37:04<12:54:18] +[titan] 2025-10-05 10:11:24,927 - root - INFO - step: 18955 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 10:11:24,927 - root - INFO - lr: 2.9718e-05 gnorm: 1.04 [11:37:14<12:54:07] +[titan] 2025-10-05 10:11:35,836 - root - INFO - step: 18960 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:11:35,836 - root - INFO - lr: 2.9709e-05 gnorm: 1.08 [11:37:25<12:53:56] +[titan] 2025-10-05 10:11:46,725 - root - INFO - step: 18965 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9561 +[titan] 2025-10-05 10:11:46,725 - root - INFO - lr: 2.9700e-05 gnorm: 1.06 [11:37:36<12:53:45] +[titan] 2025-10-05 10:11:57,607 - root - INFO - step: 18970 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 10:11:57,608 - root - INFO - lr: 2.9691e-05 gnorm: 1.06 [11:37:47<12:53:34] +[titan] 2025-10-05 10:12:08,492 - root - INFO - step: 18975 loss: 2.1885 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9336 +[titan] 2025-10-05 10:12:08,492 - root - INFO - lr: 2.9682e-05 gnorm: 1.05 [11:37:58<12:53:22] +[titan] 2025-10-05 10:12:19,363 - root - INFO - step: 18980 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 10:12:19,364 - root - INFO - lr: 2.9673e-05 gnorm: 1.05 [11:38:09<12:53:11] +[titan] 2025-10-05 10:12:30,251 - root - INFO - step: 18985 loss: 2.2178 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 10:12:30,252 - root - INFO - lr: 2.9664e-05 gnorm: 1.08 [11:38:20<12:53:00] +[titan] 2025-10-05 10:12:41,145 - root - INFO - step: 18990 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 10:12:41,146 - root - INFO - lr: 2.9656e-05 gnorm: 1.04 [11:38:31<12:52:49] +[titan] 2025-10-05 10:12:52,037 - root - INFO - step: 18995 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:12:52,037 - root - INFO - lr: 2.9647e-05 gnorm: 1.06 [11:38:42<12:52:38] +[titan] 2025-10-05 10:13:00,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:13:02,914 - root - INFO - step: 19000 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9377 +[titan] 2025-10-05 10:13:02,914 - root - INFO - lr: 2.9638e-05 gnorm: 1.06 [11:38:52<12:52:26] +[titan] 2025-10-05 10:13:13,797 - root - INFO - step: 19005 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 10:13:13,797 - root - INFO - lr: 2.9629e-05 gnorm: 1.09 [11:39:03<12:52:15] +[titan] 2025-10-05 10:13:24,684 - root - INFO - step: 19010 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 10:13:24,684 - root - INFO - lr: 2.9620e-05 gnorm: 1.04 [11:39:14<12:52:04] +[titan] 2025-10-05 10:13:35,565 - root - INFO - step: 19015 loss: 2.1615 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 10:13:35,565 - root - INFO - lr: 2.9611e-05 gnorm: 1.06 [11:39:25<12:51:53] +[titan] 2025-10-05 10:13:46,458 - root - INFO - step: 19020 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:13:46,458 - root - INFO - lr: 2.9602e-05 gnorm: 1.09 [11:39:36<12:51:42] +[titan] 2025-10-05 10:13:57,365 - root - INFO - step: 19025 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9259 +[titan] 2025-10-05 10:13:57,365 - root - INFO - lr: 2.9593e-05 gnorm: 1.11 [11:39:47<12:51:30] +[titan] 2025-10-05 10:14:08,243 - root - INFO - step: 19030 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 10:14:08,244 - root - INFO - lr: 2.9585e-05 gnorm: 1.11 [11:39:58<12:51:19] +[titan] 2025-10-05 10:14:19,180 - root - INFO - step: 19035 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.68 mfu: 42.03% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 10:14:19,180 - root - INFO - lr: 2.9576e-05 gnorm: 1.08 [11:40:09<12:51:08] +[titan] 2025-10-05 10:14:30,057 - root - INFO - step: 19040 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 10:14:30,058 - root - INFO - lr: 2.9567e-05 gnorm: 1.08 [11:40:20<12:50:57] +[titan] 2025-10-05 10:14:40,934 - root - INFO - step: 19045 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:14:40,934 - root - INFO - lr: 2.9558e-05 gnorm: 1.05 [11:40:30<12:50:46] +[titan] 2025-10-05 10:14:49,618 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:14:51,803 - root - INFO - step: 19050 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 10:14:51,803 - root - INFO - lr: 2.9549e-05 gnorm: 1.10 [11:40:41<12:50:35] +[titan] 2025-10-05 10:15:02,687 - root - INFO - step: 19055 loss: 2.1320 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:15:02,687 - root - INFO - lr: 2.9540e-05 gnorm: 1.03 [11:40:52<12:50:23] +[titan] 2025-10-05 10:15:13,599 - root - INFO - step: 19060 loss: 2.1731 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:15:13,600 - root - INFO - lr: 2.9531e-05 gnorm: 1.05 [11:41:03<12:50:12] +[titan] 2025-10-05 10:15:24,470 - root - INFO - step: 19065 loss: 2.0790 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 10:15:24,470 - root - INFO - lr: 2.9522e-05 gnorm: 1.02 [11:41:14<12:50:01] +[titan] 2025-10-05 10:15:35,340 - root - INFO - step: 19070 loss: 2.1215 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 10:15:35,340 - root - INFO - lr: 2.9513e-05 gnorm: 1.03 [11:41:25<12:49:50] +[titan] 2025-10-05 10:15:46,220 - root - INFO - step: 19075 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 10:15:46,220 - root - INFO - lr: 2.9505e-05 gnorm: 1.03 [11:41:36<12:49:39] +[titan] 2025-10-05 10:15:57,087 - root - INFO - step: 19080 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 10:15:57,087 - root - INFO - lr: 2.9496e-05 gnorm: 1.08 [11:41:47<12:49:27] +[titan] 2025-10-05 10:16:07,949 - root - INFO - step: 19085 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:16:07,949 - root - INFO - lr: 2.9487e-05 gnorm: 1.03 [11:41:58<12:49:16] +[titan] 2025-10-05 10:16:18,866 - root - INFO - step: 19090 loss: 2.1027 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:16:18,866 - root - INFO - lr: 2.9478e-05 gnorm: 1.05 [11:42:08<12:49:05] +[titan] 2025-10-05 10:16:29,722 - root - INFO - step: 19095 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 10:16:29,722 - root - INFO - lr: 2.9469e-05 gnorm: 1.07 [11:42:19<12:48:54] +[titan] 2025-10-05 10:16:38,404 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:16:40,594 - root - INFO - step: 19100 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 10:16:40,594 - root - INFO - lr: 2.9460e-05 gnorm: 1.12 [11:42:30<12:48:42] +[titan] 2025-10-05 10:16:51,467 - root - INFO - step: 19105 loss: 2.1659 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 10:16:51,467 - root - INFO - lr: 2.9451e-05 gnorm: 1.07 [11:42:41<12:48:31] +[titan] 2025-10-05 10:17:02,333 - root - INFO - step: 19110 loss: 2.1571 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:17:02,333 - root - INFO - lr: 2.9442e-05 gnorm: 1.07 [11:42:52<12:48:20] +[titan] 2025-10-05 10:17:13,254 - root - INFO - step: 19115 loss: 2.1907 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9353 +[titan] 2025-10-05 10:17:13,254 - root - INFO - lr: 2.9433e-05 gnorm: 1.05 [11:43:03<12:48:09] +[titan] 2025-10-05 10:17:24,144 - root - INFO - step: 19120 loss: 2.2215 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 10:17:24,145 - root - INFO - lr: 2.9424e-05 gnorm: 1.08 [11:43:14<12:47:58] +[titan] 2025-10-05 10:17:34,985 - root - INFO - step: 19125 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 10:17:34,985 - root - INFO - lr: 2.9416e-05 gnorm: 1.07 [11:43:25<12:47:46] +[titan] 2025-10-05 10:17:45,834 - root - INFO - step: 19130 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 10:17:45,834 - root - INFO - lr: 2.9407e-05 gnorm: 1.04 [11:43:35<12:47:35] +[titan] 2025-10-05 10:17:56,697 - root - INFO - step: 19135 loss: 2.1835 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 10:17:56,697 - root - INFO - lr: 2.9398e-05 gnorm: 1.04 [11:43:46<12:47:24] +[titan] 2025-10-05 10:18:07,545 - root - INFO - step: 19140 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 10:18:07,545 - root - INFO - lr: 2.9389e-05 gnorm: 1.06 [11:43:57<12:47:13] +[titan] 2025-10-05 10:18:18,458 - root - INFO - step: 19145 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9199 +[titan] 2025-10-05 10:18:18,458 - root - INFO - lr: 2.9380e-05 gnorm: 1.08 [11:44:08<12:47:02] +[titan] 2025-10-05 10:18:27,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:18:29,337 - root - INFO - step: 19150 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8515 +[titan] 2025-10-05 10:18:29,338 - root - INFO - lr: 2.9371e-05 gnorm: 1.34 [11:44:19<12:46:50] +[titan] 2025-10-05 10:18:40,256 - root - INFO - step: 19155 loss: 2.1332 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8849 +[titan] 2025-10-05 10:18:40,257 - root - INFO - lr: 2.9362e-05 gnorm: 1.09 [11:44:30<12:46:39] +[titan] 2025-10-05 10:18:51,145 - root - INFO - step: 19160 loss: 2.1481 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 10:18:51,145 - root - INFO - lr: 2.9353e-05 gnorm: 1.07 [11:44:41<12:46:28] +[titan] 2025-10-05 10:19:02,037 - root - INFO - step: 19165 loss: 2.1516 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 10:19:02,037 - root - INFO - lr: 2.9344e-05 gnorm: 1.05 [11:44:52<12:46:17] +[titan] 2025-10-05 10:19:12,937 - root - INFO - step: 19170 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9133 +[titan] 2025-10-05 10:19:12,937 - root - INFO - lr: 2.9336e-05 gnorm: 1.08 [11:45:02<12:46:06] +[titan] 2025-10-05 10:19:23,875 - root - INFO - step: 19175 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 10:19:23,875 - root - INFO - lr: 2.9327e-05 gnorm: 1.04 [11:45:13<12:45:55] +[titan] 2025-10-05 10:19:34,776 - root - INFO - step: 19180 loss: 2.1428 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 10:19:34,776 - root - INFO - lr: 2.9318e-05 gnorm: 1.07 [11:45:24<12:45:43] +[titan] 2025-10-05 10:19:45,696 - root - INFO - step: 19185 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 10:19:45,696 - root - INFO - lr: 2.9309e-05 gnorm: 1.05 [11:45:35<12:45:32] +[titan] 2025-10-05 10:19:56,559 - root - INFO - step: 19190 loss: 2.2063 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 10:19:56,559 - root - INFO - lr: 2.9300e-05 gnorm: 1.05 [11:45:46<12:45:21] +[titan] 2025-10-05 10:20:07,440 - root - INFO - step: 19195 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 10:20:07,440 - root - INFO - lr: 2.9291e-05 gnorm: 1.06 [11:45:57<12:45:10] +[titan] 2025-10-05 10:20:16,202 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:20:18,388 - root - INFO - step: 19200 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:20:18,389 - root - INFO - lr: 2.9282e-05 gnorm: 1.10 [11:46:08<12:44:59] +[titan] 2025-10-05 10:20:29,261 - root - INFO - step: 19205 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9722 +[titan] 2025-10-05 10:20:29,261 - root - INFO - lr: 2.9273e-05 gnorm: 1.05 [11:46:19<12:44:47] +[titan] 2025-10-05 10:20:40,136 - root - INFO - step: 19210 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 10:20:40,136 - root - INFO - lr: 2.9264e-05 gnorm: 1.05 [11:46:30<12:44:36] +[titan] 2025-10-05 10:20:51,016 - root - INFO - step: 19215 loss: 2.1099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8637 +[titan] 2025-10-05 10:20:51,016 - root - INFO - lr: 2.9255e-05 gnorm: 1.02 [11:46:41<12:44:25] +[titan] 2025-10-05 10:21:01,917 - root - INFO - step: 19220 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 10:21:01,918 - root - INFO - lr: 2.9247e-05 gnorm: 1.06 [11:46:51<12:44:14] +[titan] 2025-10-05 10:21:12,779 - root - INFO - step: 19225 loss: 2.1977 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9427 +[titan] 2025-10-05 10:21:12,779 - root - INFO - lr: 2.9238e-05 gnorm: 1.08 [11:47:02<12:44:03] +[titan] 2025-10-05 10:21:23,714 - root - INFO - step: 19230 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 10:21:23,714 - root - INFO - lr: 2.9229e-05 gnorm: 1.09 [11:47:13<12:43:51] +[titan] 2025-10-05 10:21:34,597 - root - INFO - step: 19235 loss: 2.1070 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 10:21:34,597 - root - INFO - lr: 2.9220e-05 gnorm: 1.08 [11:47:24<12:43:40] +[titan] 2025-10-05 10:21:45,492 - root - INFO - step: 19240 loss: 2.0962 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 10:21:45,492 - root - INFO - lr: 2.9211e-05 gnorm: 1.04 [11:47:35<12:43:29] +[titan] 2025-10-05 10:21:56,367 - root - INFO - step: 19245 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 10:21:56,367 - root - INFO - lr: 2.9202e-05 gnorm: 1.08 [11:47:46<12:43:18] +[titan] 2025-10-05 10:22:05,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:22:07,295 - root - INFO - step: 19250 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:22:07,296 - root - INFO - lr: 2.9193e-05 gnorm: 1.04 [11:47:57<12:43:07] +[titan] 2025-10-05 10:22:18,238 - root - INFO - step: 19255 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 10:22:18,238 - root - INFO - lr: 2.9184e-05 gnorm: 1.06 [11:48:08<12:42:56] +[titan] 2025-10-05 10:22:29,120 - root - INFO - step: 19260 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 10:22:29,120 - root - INFO - lr: 2.9175e-05 gnorm: 1.10 [11:48:19<12:42:44] +[titan] 2025-10-05 10:22:40,008 - root - INFO - step: 19265 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:22:40,008 - root - INFO - lr: 2.9167e-05 gnorm: 1.08 [11:48:30<12:42:33] +[titan] 2025-10-05 10:22:50,875 - root - INFO - step: 19270 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 10:22:50,875 - root - INFO - lr: 2.9158e-05 gnorm: 1.07 [11:48:40<12:42:22] +[titan] 2025-10-05 10:23:01,737 - root - INFO - step: 19275 loss: 2.1975 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:23:01,737 - root - INFO - lr: 2.9149e-05 gnorm: 1.08 [11:48:51<12:42:11] +[titan] 2025-10-05 10:23:12,658 - root - INFO - step: 19280 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 10:23:12,658 - root - INFO - lr: 2.9140e-05 gnorm: 1.06 [11:49:02<12:42:00] +[titan] 2025-10-05 10:23:23,595 - root - INFO - step: 19285 loss: 2.1554 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9047 +[titan] 2025-10-05 10:23:23,595 - root - INFO - lr: 2.9131e-05 gnorm: 1.11 [11:49:13<12:41:49] +[titan] 2025-10-05 10:23:34,471 - root - INFO - step: 19290 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9525 +[titan] 2025-10-05 10:23:34,471 - root - INFO - lr: 2.9122e-05 gnorm: 1.08 [11:49:24<12:41:37] +[titan] 2025-10-05 10:23:45,370 - root - INFO - step: 19295 loss: 2.2145 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 10:23:45,370 - root - INFO - lr: 2.9113e-05 gnorm: 1.11 [11:49:35<12:41:26] +[titan] 2025-10-05 10:23:54,147 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:23:56,333 - root - INFO - step: 19300 loss: 2.1524 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 10:23:56,334 - root - INFO - lr: 2.9104e-05 gnorm: 1.12 [11:49:46<12:41:15] +[titan] 2025-10-05 10:24:07,214 - root - INFO - step: 19305 loss: 2.1152 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 10:24:07,214 - root - INFO - lr: 2.9095e-05 gnorm: 1.06 [11:49:57<12:41:04] +[titan] 2025-10-05 10:24:18,150 - root - INFO - step: 19310 loss: 2.1360 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8875 +[titan] 2025-10-05 10:24:18,150 - root - INFO - lr: 2.9086e-05 gnorm: 1.11 [11:50:08<12:40:53] +[titan] 2025-10-05 10:24:29,081 - root - INFO - step: 19315 loss: 2.1682 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9158 +[titan] 2025-10-05 10:24:29,081 - root - INFO - lr: 2.9077e-05 gnorm: 1.08 [11:50:19<12:40:42] +[titan] 2025-10-05 10:24:39,944 - root - INFO - step: 19320 loss: 2.1420 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 10:24:39,945 - root - INFO - lr: 2.9069e-05 gnorm: 1.05 [11:50:29<12:40:30] +[titan] 2025-10-05 10:24:50,812 - root - INFO - step: 19325 loss: 2.1255 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 10:24:50,812 - root - INFO - lr: 2.9060e-05 gnorm: 1.04 [11:50:40<12:40:19] +[titan] 2025-10-05 10:25:01,681 - root - INFO - step: 19330 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 10:25:01,681 - root - INFO - lr: 2.9051e-05 gnorm: 1.04 [11:50:51<12:40:08] +[titan] 2025-10-05 10:25:12,540 - root - INFO - step: 19335 loss: 2.1642 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 10:25:12,540 - root - INFO - lr: 2.9042e-05 gnorm: 1.05 [11:51:02<12:39:57] +[titan] 2025-10-05 10:25:23,440 - root - INFO - step: 19340 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 10:25:23,441 - root - INFO - lr: 2.9033e-05 gnorm: 1.08 [11:51:13<12:39:46] +[titan] 2025-10-05 10:25:34,323 - root - INFO - step: 19345 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 10:25:34,323 - root - INFO - lr: 2.9024e-05 gnorm: 1.06 [11:51:24<12:39:34] +[titan] 2025-10-05 10:25:42,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:25:45,172 - root - INFO - step: 19350 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 10:25:45,172 - root - INFO - lr: 2.9015e-05 gnorm: 1.06 [11:51:35<12:39:23] +[titan] 2025-10-05 10:25:56,041 - root - INFO - step: 19355 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 10:25:56,041 - root - INFO - lr: 2.9006e-05 gnorm: 1.05 [11:51:46<12:39:12] +[titan] 2025-10-05 10:26:06,901 - root - INFO - step: 19360 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 10:26:06,901 - root - INFO - lr: 2.8997e-05 gnorm: 1.09 [11:51:56<12:39:01] +[titan] 2025-10-05 10:26:17,768 - root - INFO - step: 19365 loss: 2.2565 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 10:26:17,768 - root - INFO - lr: 2.8988e-05 gnorm: 1.06 [11:52:07<12:38:49] +[titan] 2025-10-05 10:26:28,693 - root - INFO - step: 19370 loss: 2.1913 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 10:26:28,693 - root - INFO - lr: 2.8980e-05 gnorm: 1.07 [11:52:18<12:38:38] +[titan] 2025-10-05 10:26:39,550 - root - INFO - step: 19375 loss: 2.2098 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:26:39,550 - root - INFO - lr: 2.8971e-05 gnorm: 1.10 [11:52:29<12:38:27] +[titan] 2025-10-05 10:26:50,433 - root - INFO - step: 19380 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 10:26:50,433 - root - INFO - lr: 2.8962e-05 gnorm: 1.07 [11:52:40<12:38:16] +[titan] 2025-10-05 10:27:01,279 - root - INFO - step: 19385 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 10:27:01,279 - root - INFO - lr: 2.8953e-05 gnorm: 1.04 [11:52:51<12:38:05] +[titan] 2025-10-05 10:27:12,140 - root - INFO - step: 19390 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 10:27:12,141 - root - INFO - lr: 2.8944e-05 gnorm: 1.10 [11:53:02<12:37:53] +[titan] 2025-10-05 10:27:23,043 - root - INFO - step: 19395 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:27:23,043 - root - INFO - lr: 2.8935e-05 gnorm: 1.07 [11:53:13<12:37:42] +[titan] 2025-10-05 10:27:31,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:27:33,895 - root - INFO - step: 19400 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:27:33,895 - root - INFO - lr: 2.8926e-05 gnorm: 1.06 [11:53:23<12:37:31] +[titan] 2025-10-05 10:27:44,768 - root - INFO - step: 19405 loss: 2.0933 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 10:27:44,768 - root - INFO - lr: 2.8917e-05 gnorm: 1.05 [11:53:34<12:37:20] +[titan] 2025-10-05 10:27:55,669 - root - INFO - step: 19410 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 10:27:55,669 - root - INFO - lr: 2.8908e-05 gnorm: 1.05 [11:53:45<12:37:09] +[titan] 2025-10-05 10:28:06,538 - root - INFO - step: 19415 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 10:28:06,538 - root - INFO - lr: 2.8899e-05 gnorm: 1.07 [11:53:56<12:36:58] +[titan] 2025-10-05 10:28:17,407 - root - INFO - step: 19420 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:28:17,407 - root - INFO - lr: 2.8890e-05 gnorm: 1.09 [11:54:07<12:36:46] +[titan] 2025-10-05 10:28:28,336 - root - INFO - step: 19425 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9504 +[titan] 2025-10-05 10:28:28,336 - root - INFO - lr: 2.8882e-05 gnorm: 1.11 [11:54:18<12:36:35] +[titan] 2025-10-05 10:28:39,218 - root - INFO - step: 19430 loss: 2.1045 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8601 +[titan] 2025-10-05 10:28:39,218 - root - INFO - lr: 2.8873e-05 gnorm: 1.08 [11:54:29<12:36:24] +[titan] 2025-10-05 10:28:50,126 - root - INFO - step: 19435 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9318 +[titan] 2025-10-05 10:28:50,127 - root - INFO - lr: 2.8864e-05 gnorm: 1.07 [11:54:40<12:36:13] +[titan] 2025-10-05 10:29:01,033 - root - INFO - step: 19440 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9099 +[titan] 2025-10-05 10:29:01,033 - root - INFO - lr: 2.8855e-05 gnorm: 1.04 [11:54:51<12:36:02] +[titan] 2025-10-05 10:29:11,909 - root - INFO - step: 19445 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 10:29:11,909 - root - INFO - lr: 2.8846e-05 gnorm: 1.06 [11:55:01<12:35:50] +[titan] 2025-10-05 10:29:20,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:29:22,834 - root - INFO - step: 19450 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8914 +[titan] 2025-10-05 10:29:22,834 - root - INFO - lr: 2.8837e-05 gnorm: 1.04 [11:55:12<12:35:39] +[titan] 2025-10-05 10:29:33,814 - root - INFO - step: 19455 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 29,845 tflops: 414.05 mfu: 41.87% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:29:33,814 - root - INFO - lr: 2.8828e-05 gnorm: 1.06 [11:55:23<12:35:28] +[titan] 2025-10-05 10:29:36,168 - root - INFO - Dumping profiler traces at step 19456 +[titan] 2025-10-05 10:29:36,206 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:29:44,902 - root - INFO - step: 19460 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 29,553 tflops: 410.00 mfu: 41.46% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:29:44,902 - root - INFO - lr: 2.8819e-05 gnorm: 1.04 [11:55:34<12:35:17] +[titan] 2025-10-05 10:29:55,774 - root - INFO - step: 19465 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:29:55,774 - root - INFO - lr: 2.8810e-05 gnorm: 1.05 [11:55:45<12:35:06] +[titan] 2025-10-05 10:30:06,632 - root - INFO - step: 19470 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8999 +[titan] 2025-10-05 10:30:06,632 - root - INFO - lr: 2.8801e-05 gnorm: 1.06 [11:55:56<12:34:55] +[titan] 2025-10-05 10:30:17,527 - root - INFO - step: 19475 loss: 2.0697 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8296 +[titan] 2025-10-05 10:30:17,527 - root - INFO - lr: 2.8792e-05 gnorm: 1.03 [11:56:07<12:34:44] +[titan] 2025-10-05 10:30:28,427 - root - INFO - step: 19480 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 10:30:28,427 - root - INFO - lr: 2.8784e-05 gnorm: 1.06 [11:56:18<12:34:32] +[titan] 2025-10-05 10:30:39,303 - root - INFO - step: 19485 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 10:30:39,303 - root - INFO - lr: 2.8775e-05 gnorm: 1.05 [11:56:29<12:34:21] +[titan] 2025-10-05 10:30:50,176 - root - INFO - step: 19490 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:30:50,176 - root - INFO - lr: 2.8766e-05 gnorm: 1.09 [11:56:40<12:34:10] +[titan] 2025-10-05 10:31:01,024 - root - INFO - step: 19495 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:31:01,024 - root - INFO - lr: 2.8757e-05 gnorm: 1.06 [11:56:51<12:33:59] +[titan] 2025-10-05 10:31:09,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:31:11,884 - root - INFO - step: 19500 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9032 +[titan] 2025-10-05 10:31:11,884 - root - INFO - lr: 2.8748e-05 gnorm: 1.04 [11:57:01<12:33:48] +[titan] 2025-10-05 10:31:22,776 - root - INFO - step: 19505 loss: 2.1755 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 10:31:22,776 - root - INFO - lr: 2.8739e-05 gnorm: 1.05 [11:57:12<12:33:36] +[titan] 2025-10-05 10:31:33,671 - root - INFO - step: 19510 loss: 2.1889 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:31:33,672 - root - INFO - lr: 2.8730e-05 gnorm: 1.09 [11:57:23<12:33:25] +[titan] 2025-10-05 10:31:44,519 - root - INFO - step: 19515 loss: 2.1331 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8848 +[titan] 2025-10-05 10:31:44,519 - root - INFO - lr: 2.8721e-05 gnorm: 1.09 [11:57:34<12:33:14] +[titan] 2025-10-05 10:31:55,368 - root - INFO - step: 19520 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:31:55,368 - root - INFO - lr: 2.8712e-05 gnorm: 1.09 [11:57:45<12:33:03] +[titan] 2025-10-05 10:32:06,223 - root - INFO - step: 19525 loss: 2.1590 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 10:32:06,223 - root - INFO - lr: 2.8703e-05 gnorm: 1.04 [11:57:56<12:32:52] +[titan] 2025-10-05 10:32:17,080 - root - INFO - step: 19530 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:32:17,080 - root - INFO - lr: 2.8694e-05 gnorm: 1.03 [11:58:07<12:32:40] +[titan] 2025-10-05 10:32:27,960 - root - INFO - step: 19535 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 10:32:27,960 - root - INFO - lr: 2.8686e-05 gnorm: 1.12 [11:58:17<12:32:29] +[titan] 2025-10-05 10:32:38,837 - root - INFO - step: 19540 loss: 2.1660 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 10:32:38,837 - root - INFO - lr: 2.8677e-05 gnorm: 1.05 [11:58:28<12:32:18] +[titan] 2025-10-05 10:32:49,713 - root - INFO - step: 19545 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 10:32:49,713 - root - INFO - lr: 2.8668e-05 gnorm: 1.05 [11:58:39<12:32:07] +[titan] 2025-10-05 10:32:58,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:33:00,607 - root - INFO - step: 19550 loss: 2.1396 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8912 +[titan] 2025-10-05 10:33:00,607 - root - INFO - lr: 2.8659e-05 gnorm: 1.07 [11:58:50<12:31:56] +[titan] 2025-10-05 10:33:11,463 - root - INFO - step: 19555 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8942 +[titan] 2025-10-05 10:33:11,463 - root - INFO - lr: 2.8650e-05 gnorm: 1.05 [11:59:01<12:31:44] +[titan] 2025-10-05 10:33:22,332 - root - INFO - step: 19560 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9357 +[titan] 2025-10-05 10:33:22,332 - root - INFO - lr: 2.8641e-05 gnorm: 1.08 [11:59:12<12:31:33] +[titan] 2025-10-05 10:33:33,247 - root - INFO - step: 19565 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 10:33:33,247 - root - INFO - lr: 2.8632e-05 gnorm: 1.12 [11:59:23<12:31:22] +[titan] 2025-10-05 10:33:44,148 - root - INFO - step: 19570 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:33:44,148 - root - INFO - lr: 2.8623e-05 gnorm: 1.14 [11:59:34<12:31:11] +[titan] 2025-10-05 10:33:55,019 - root - INFO - step: 19575 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 10:33:55,019 - root - INFO - lr: 2.8614e-05 gnorm: 1.09 [11:59:45<12:31:00] +[titan] 2025-10-05 10:34:05,890 - root - INFO - step: 19580 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8641 +[titan] 2025-10-05 10:34:05,890 - root - INFO - lr: 2.8605e-05 gnorm: 1.05 [11:59:55<12:30:49] +[titan] 2025-10-05 10:34:16,751 - root - INFO - step: 19585 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 10:34:16,752 - root - INFO - lr: 2.8596e-05 gnorm: 1.07 [12:00:06<12:30:37] +[titan] 2025-10-05 10:34:27,618 - root - INFO - step: 19590 loss: 2.1741 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 10:34:27,619 - root - INFO - lr: 2.8588e-05 gnorm: 1.06 [12:00:17<12:30:26] +[titan] 2025-10-05 10:34:38,478 - root - INFO - step: 19595 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 10:34:38,478 - root - INFO - lr: 2.8579e-05 gnorm: 1.07 [12:00:28<12:30:15] +[titan] 2025-10-05 10:34:47,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:34:49,385 - root - INFO - step: 19600 loss: 2.1233 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:34:49,385 - root - INFO - lr: 2.8570e-05 gnorm: 1.04 [12:00:39<12:30:04] +[titan] 2025-10-05 10:35:00,251 - root - INFO - step: 19605 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9248 +[titan] 2025-10-05 10:35:00,251 - root - INFO - lr: 2.8561e-05 gnorm: 1.04 [12:00:50<12:29:53] +[titan] 2025-10-05 10:35:11,113 - root - INFO - step: 19610 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9104 +[titan] 2025-10-05 10:35:11,113 - root - INFO - lr: 2.8552e-05 gnorm: 1.09 [12:01:01<12:29:41] +[titan] 2025-10-05 10:35:21,983 - root - INFO - step: 19615 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 10:35:21,983 - root - INFO - lr: 2.8543e-05 gnorm: 1.05 [12:01:11<12:29:30] +[titan] 2025-10-05 10:35:32,879 - root - INFO - step: 19620 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 10:35:32,879 - root - INFO - lr: 2.8534e-05 gnorm: 1.11 [12:01:22<12:29:19] +[titan] 2025-10-05 10:35:43,764 - root - INFO - step: 19625 loss: 2.1033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:35:43,764 - root - INFO - lr: 2.8525e-05 gnorm: 1.05 [12:01:33<12:29:08] +[titan] 2025-10-05 10:35:54,636 - root - INFO - step: 19630 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 10:35:54,637 - root - INFO - lr: 2.8516e-05 gnorm: 1.10 [12:01:44<12:28:57] +[titan] 2025-10-05 10:36:05,532 - root - INFO - step: 19635 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 10:36:05,532 - root - INFO - lr: 2.8507e-05 gnorm: 1.02 [12:01:55<12:28:45] +[titan] 2025-10-05 10:36:16,411 - root - INFO - step: 19640 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 10:36:16,411 - root - INFO - lr: 2.8498e-05 gnorm: 1.05 [12:02:06<12:28:34] +[titan] 2025-10-05 10:36:27,270 - root - INFO - step: 19645 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 10:36:27,270 - root - INFO - lr: 2.8489e-05 gnorm: 1.06 [12:02:17<12:28:23] +[titan] 2025-10-05 10:36:35,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:36:38,157 - root - INFO - step: 19650 loss: 2.0890 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8470 +[titan] 2025-10-05 10:36:38,157 - root - INFO - lr: 2.8481e-05 gnorm: 1.01 [12:02:28<12:28:12] +[titan] 2025-10-05 10:36:49,018 - root - INFO - step: 19655 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 10:36:49,019 - root - INFO - lr: 2.8472e-05 gnorm: 1.07 [12:02:39<12:28:01] +[titan] 2025-10-05 10:36:59,878 - root - INFO - step: 19660 loss: 2.2289 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 10:36:59,879 - root - INFO - lr: 2.8463e-05 gnorm: 1.08 [12:02:49<12:27:49] +[titan] 2025-10-05 10:37:10,783 - root - INFO - step: 19665 loss: 2.1435 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8951 +[titan] 2025-10-05 10:37:10,783 - root - INFO - lr: 2.8454e-05 gnorm: 1.08 [12:03:00<12:27:38] +[titan] 2025-10-05 10:37:21,656 - root - INFO - step: 19670 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8722 +[titan] 2025-10-05 10:37:21,657 - root - INFO - lr: 2.8445e-05 gnorm: 1.08 [12:03:11<12:27:27] +[titan] 2025-10-05 10:37:32,556 - root - INFO - step: 19675 loss: 2.2272 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9633 +[titan] 2025-10-05 10:37:32,556 - root - INFO - lr: 2.8436e-05 gnorm: 1.12 [12:03:22<12:27:16] +[titan] 2025-10-05 10:37:43,429 - root - INFO - step: 19680 loss: 2.1453 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:37:43,429 - root - INFO - lr: 2.8427e-05 gnorm: 1.06 [12:03:33<12:27:05] +[titan] 2025-10-05 10:37:54,290 - root - INFO - step: 19685 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 10:37:54,290 - root - INFO - lr: 2.8418e-05 gnorm: 1.10 [12:03:44<12:26:54] +[titan] 2025-10-05 10:38:05,156 - root - INFO - step: 19690 loss: 2.1517 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:38:05,157 - root - INFO - lr: 2.8409e-05 gnorm: 1.07 [12:03:55<12:26:42] +[titan] 2025-10-05 10:38:16,025 - root - INFO - step: 19695 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 10:38:16,025 - root - INFO - lr: 2.8400e-05 gnorm: 1.11 [12:04:06<12:26:31] +[titan] 2025-10-05 10:38:24,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:38:26,916 - root - INFO - step: 19700 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:38:26,916 - root - INFO - lr: 2.8391e-05 gnorm: 1.06 [12:04:16<12:26:20] +[titan] 2025-10-05 10:38:37,813 - root - INFO - step: 19705 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 10:38:37,813 - root - INFO - lr: 2.8382e-05 gnorm: 1.04 [12:04:27<12:26:09] +[titan] 2025-10-05 10:38:48,686 - root - INFO - step: 19710 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8558 +[titan] 2025-10-05 10:38:48,686 - root - INFO - lr: 2.8374e-05 gnorm: 1.10 [12:04:38<12:25:58] +[titan] 2025-10-05 10:38:59,549 - root - INFO - step: 19715 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 10:38:59,549 - root - INFO - lr: 2.8365e-05 gnorm: 1.07 [12:04:49<12:25:46] +[titan] 2025-10-05 10:39:10,404 - root - INFO - step: 19720 loss: 2.2251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 10:39:10,404 - root - INFO - lr: 2.8356e-05 gnorm: 1.05 [12:05:00<12:25:35] +[titan] 2025-10-05 10:39:21,281 - root - INFO - step: 19725 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8864 +[titan] 2025-10-05 10:39:21,281 - root - INFO - lr: 2.8347e-05 gnorm: 1.06 [12:05:11<12:25:24] +[titan] 2025-10-05 10:39:32,209 - root - INFO - step: 19730 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9186 +[titan] 2025-10-05 10:39:32,209 - root - INFO - lr: 2.8338e-05 gnorm: 1.05 [12:05:22<12:25:13] +[titan] 2025-10-05 10:39:43,074 - root - INFO - step: 19735 loss: 2.1410 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8919 +[titan] 2025-10-05 10:39:43,074 - root - INFO - lr: 2.8329e-05 gnorm: 1.09 [12:05:33<12:25:02] +[titan] 2025-10-05 10:39:53,944 - root - INFO - step: 19740 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9376 +[titan] 2025-10-05 10:39:53,944 - root - INFO - lr: 2.8320e-05 gnorm: 1.05 [12:05:43<12:24:50] +[titan] 2025-10-05 10:40:04,859 - root - INFO - step: 19745 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.52 mfu: 42.11% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:40:04,859 - root - INFO - lr: 2.8311e-05 gnorm: 1.04 [12:05:54<12:24:39] +[titan] 2025-10-05 10:40:13,560 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:40:15,748 - root - INFO - step: 19750 loss: 2.1520 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:40:15,748 - root - INFO - lr: 2.8302e-05 gnorm: 1.04 [12:06:05<12:24:28] +[titan] 2025-10-05 10:40:26,639 - root - INFO - step: 19755 loss: 2.1342 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8866 +[titan] 2025-10-05 10:40:26,639 - root - INFO - lr: 2.8293e-05 gnorm: 1.04 [12:06:16<12:24:17] +[titan] 2025-10-05 10:40:37,586 - root - INFO - step: 19760 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.28 mfu: 41.99% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:40:37,587 - root - INFO - lr: 2.8284e-05 gnorm: 1.06 [12:06:27<12:24:06] +[titan] 2025-10-05 10:40:48,488 - root - INFO - step: 19765 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8834 +[titan] 2025-10-05 10:40:48,488 - root - INFO - lr: 2.8275e-05 gnorm: 1.06 [12:06:38<12:23:55] +[titan] 2025-10-05 10:40:59,376 - root - INFO - step: 19770 loss: 2.2031 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 10:40:59,377 - root - INFO - lr: 2.8266e-05 gnorm: 1.07 [12:06:49<12:23:44] +[titan] 2025-10-05 10:41:10,261 - root - INFO - step: 19775 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 10:41:10,261 - root - INFO - lr: 2.8258e-05 gnorm: 1.05 [12:07:00<12:23:32] +[titan] 2025-10-05 10:41:21,161 - root - INFO - step: 19780 loss: 2.2202 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 10:41:21,161 - root - INFO - lr: 2.8249e-05 gnorm: 1.08 [12:07:11<12:23:21] +[titan] 2025-10-05 10:41:32,049 - root - INFO - step: 19785 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:41:32,049 - root - INFO - lr: 2.8240e-05 gnorm: 1.07 [12:07:22<12:23:10] +[titan] 2025-10-05 10:41:42,943 - root - INFO - step: 19790 loss: 2.0669 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 10:41:42,943 - root - INFO - lr: 2.8231e-05 gnorm: 1.04 [12:07:32<12:22:59] +[titan] 2025-10-05 10:41:53,847 - root - INFO - step: 19795 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 10:41:53,847 - root - INFO - lr: 2.8222e-05 gnorm: 1.09 [12:07:43<12:22:48] +[titan] 2025-10-05 10:42:02,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:42:04,726 - root - INFO - step: 19800 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9124 +[titan] 2025-10-05 10:42:04,726 - root - INFO - lr: 2.8213e-05 gnorm: 1.07 [12:07:54<12:22:37] +[titan] 2025-10-05 10:42:15,602 - root - INFO - step: 19805 loss: 2.1292 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8809 +[titan] 2025-10-05 10:42:15,602 - root - INFO - lr: 2.8204e-05 gnorm: 1.06 [12:08:05<12:22:25] +[titan] 2025-10-05 10:42:26,476 - root - INFO - step: 19810 loss: 2.1988 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:42:26,476 - root - INFO - lr: 2.8195e-05 gnorm: 1.07 [12:08:16<12:22:14] +[titan] 2025-10-05 10:42:37,355 - root - INFO - step: 19815 loss: 2.1111 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 10:42:37,355 - root - INFO - lr: 2.8186e-05 gnorm: 1.08 [12:08:27<12:22:03] +[titan] 2025-10-05 10:42:48,237 - root - INFO - step: 19820 loss: 2.1257 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8783 +[titan] 2025-10-05 10:42:48,237 - root - INFO - lr: 2.8177e-05 gnorm: 1.07 [12:08:38<12:21:52] +[titan] 2025-10-05 10:42:59,142 - root - INFO - step: 19825 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:42:59,142 - root - INFO - lr: 2.8168e-05 gnorm: 1.06 [12:08:49<12:21:41] +[titan] 2025-10-05 10:43:09,994 - root - INFO - step: 19830 loss: 2.1713 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9190 +[titan] 2025-10-05 10:43:09,994 - root - INFO - lr: 2.8159e-05 gnorm: 1.12 [12:08:59<12:21:29] +[titan] 2025-10-05 10:43:20,854 - root - INFO - step: 19835 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9251 +[titan] 2025-10-05 10:43:20,854 - root - INFO - lr: 2.8151e-05 gnorm: 1.09 [12:09:10<12:21:18] +[titan] 2025-10-05 10:43:31,720 - root - INFO - step: 19840 loss: 2.1270 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:43:31,720 - root - INFO - lr: 2.8142e-05 gnorm: 1.04 [12:09:21<12:21:07] +[titan] 2025-10-05 10:43:42,583 - root - INFO - step: 19845 loss: 2.1653 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9125 +[titan] 2025-10-05 10:43:42,583 - root - INFO - lr: 2.8133e-05 gnorm: 1.03 [12:09:32<12:20:56] +[titan] 2025-10-05 10:43:51,289 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:43:53,475 - root - INFO - step: 19850 loss: 2.1376 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 10:43:53,475 - root - INFO - lr: 2.8124e-05 gnorm: 1.05 [12:09:43<12:20:45] +[titan] 2025-10-05 10:44:04,341 - root - INFO - step: 19855 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 10:44:04,341 - root - INFO - lr: 2.8115e-05 gnorm: 1.09 [12:09:54<12:20:33] +[titan] 2025-10-05 10:44:15,250 - root - INFO - step: 19860 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9089 +[titan] 2025-10-05 10:44:15,250 - root - INFO - lr: 2.8106e-05 gnorm: 1.09 [12:10:05<12:20:22] +[titan] 2025-10-05 10:44:26,122 - root - INFO - step: 19865 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 10:44:26,123 - root - INFO - lr: 2.8097e-05 gnorm: 1.06 [12:10:16<12:20:11] +[titan] 2025-10-05 10:44:37,015 - root - INFO - step: 19870 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 10:44:37,015 - root - INFO - lr: 2.8088e-05 gnorm: 1.07 [12:10:26<12:20:00] +[titan] 2025-10-05 10:44:47,890 - root - INFO - step: 19875 loss: 2.1479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8987 +[titan] 2025-10-05 10:44:47,890 - root - INFO - lr: 2.8079e-05 gnorm: 1.11 [12:10:37<12:19:49] +[titan] 2025-10-05 10:44:58,757 - root - INFO - step: 19880 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 10:44:58,757 - root - INFO - lr: 2.8070e-05 gnorm: 1.06 [12:10:48<12:19:38] +[titan] 2025-10-05 10:45:09,633 - root - INFO - step: 19885 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:45:09,633 - root - INFO - lr: 2.8061e-05 gnorm: 1.08 [12:10:59<12:19:26] +[titan] 2025-10-05 10:45:20,533 - root - INFO - step: 19890 loss: 2.1170 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:45:20,533 - root - INFO - lr: 2.8052e-05 gnorm: 1.07 [12:11:10<12:19:15] +[titan] 2025-10-05 10:45:31,359 - root - INFO - step: 19895 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:45:31,360 - root - INFO - lr: 2.8043e-05 gnorm: 1.10 [12:11:21<12:19:04] +[titan] 2025-10-05 10:45:40,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:45:42,231 - root - INFO - step: 19900 loss: 2.1514 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 10:45:42,231 - root - INFO - lr: 2.8035e-05 gnorm: 1.08 [12:11:32<12:18:53] +[titan] 2025-10-05 10:45:53,088 - root - INFO - step: 19905 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 10:45:53,088 - root - INFO - lr: 2.8026e-05 gnorm: 1.03 [12:11:43<12:18:42] +[titan] 2025-10-05 10:46:03,910 - root - INFO - step: 19910 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 10:46:03,910 - root - INFO - lr: 2.8017e-05 gnorm: 1.08 [12:11:53<12:18:30] +[titan] 2025-10-05 10:46:14,770 - root - INFO - step: 19915 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 10:46:14,771 - root - INFO - lr: 2.8008e-05 gnorm: 1.08 [12:12:04<12:18:19] +[titan] 2025-10-05 10:46:25,652 - root - INFO - step: 19920 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:46:25,652 - root - INFO - lr: 2.7999e-05 gnorm: 1.09 [12:12:15<12:18:08] +[titan] 2025-10-05 10:46:36,496 - root - INFO - step: 19925 loss: 2.2094 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 10:46:36,496 - root - INFO - lr: 2.7990e-05 gnorm: 1.06 [12:12:26<12:17:57] +[titan] 2025-10-05 10:46:47,345 - root - INFO - step: 19930 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 10:46:47,345 - root - INFO - lr: 2.7981e-05 gnorm: 1.11 [12:12:37<12:17:46] +[titan] 2025-10-05 10:46:58,221 - root - INFO - step: 19935 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 10:46:58,221 - root - INFO - lr: 2.7972e-05 gnorm: 1.05 [12:12:48<12:17:34] +[titan] 2025-10-05 10:47:09,102 - root - INFO - step: 19940 loss: 2.1225 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 10:47:09,102 - root - INFO - lr: 2.7963e-05 gnorm: 1.05 [12:12:59<12:17:23] +[titan] 2025-10-05 10:47:19,968 - root - INFO - step: 19945 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8581 +[titan] 2025-10-05 10:47:19,968 - root - INFO - lr: 2.7954e-05 gnorm: 1.09 [12:13:09<12:17:12] +[titan] 2025-10-05 10:47:28,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:47:30,850 - root - INFO - step: 19950 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8703 +[titan] 2025-10-05 10:47:30,850 - root - INFO - lr: 2.7945e-05 gnorm: 1.07 [12:13:20<12:17:01] +[titan] 2025-10-05 10:47:41,822 - root - INFO - step: 19955 loss: 2.1253 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8777 +[titan] 2025-10-05 10:47:41,822 - root - INFO - lr: 2.7936e-05 gnorm: 1.09 [12:13:31<12:16:50] +[titan] 2025-10-05 10:47:52,686 - root - INFO - step: 19960 loss: 2.1316 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:47:52,686 - root - INFO - lr: 2.7927e-05 gnorm: 1.11 [12:13:42<12:16:39] +[titan] 2025-10-05 10:48:03,639 - root - INFO - step: 19965 loss: 2.1229 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8756 +[titan] 2025-10-05 10:48:03,639 - root - INFO - lr: 2.7919e-05 gnorm: 1.08 [12:13:53<12:16:28] +[titan] 2025-10-05 10:48:10,347 - root - INFO - Dumping profiler traces at step 19968 +[titan] 2025-10-05 10:48:10,387 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:48:14,746 - root - INFO - step: 19970 loss: 2.1632 memory: 118.84GiB(85.28%) tps: 29,504 tflops: 409.32 mfu: 41.39% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9112 +[titan] 2025-10-05 10:48:14,746 - root - INFO - lr: 2.7910e-05 gnorm: 1.01 [12:14:04<12:16:17] +[titan] 2025-10-05 10:48:25,610 - root - INFO - step: 19975 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 10:48:25,610 - root - INFO - lr: 2.7901e-05 gnorm: 1.06 [12:14:15<12:16:05] +[titan] 2025-10-05 10:48:36,506 - root - INFO - step: 19980 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 10:48:36,507 - root - INFO - lr: 2.7892e-05 gnorm: 1.07 [12:14:26<12:15:54] +[titan] 2025-10-05 10:48:47,491 - root - INFO - step: 19985 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,831 tflops: 413.86 mfu: 41.85% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:48:47,491 - root - INFO - lr: 2.7883e-05 gnorm: 1.06 [12:14:37<12:15:43] +[titan] 2025-10-05 10:48:58,374 - root - INFO - step: 19990 loss: 2.1671 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:48:58,374 - root - INFO - lr: 2.7874e-05 gnorm: 1.08 [12:14:48<12:15:32] +[titan] 2025-10-05 10:49:09,251 - root - INFO - step: 19995 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:49:09,251 - root - INFO - lr: 2.7865e-05 gnorm: 1.06 [12:14:59<12:15:21] +[titan] 2025-10-05 10:49:17,929 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:49:20,117 - root - INFO - step: 20000 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8613 +[titan] 2025-10-05 10:49:20,118 - root - INFO - lr: 2.7856e-05 gnorm: 1.09 [12:15:10<12:15:10] +[titan] 2025-10-05 10:49:20,118 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 10:49:39,407 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 10:49:39,408 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.29 seconds. +[titan] 2025-10-05 10:51:35,525 - root - INFO - step: 20005 loss: 2.1785 memory: 118.84GiB(85.28%) tps: 2,420 tflops: 33.57 mfu: 3.39% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 10:51:35,525 - root - INFO - lr: 2.7847e-05 gnorm: 1.02 [12:17:25<12:17:03] +[titan] 2025-10-05 10:51:46,302 - root - INFO - step: 20010 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9317 +[titan] 2025-10-05 10:51:46,302 - root - INFO - lr: 2.7838e-05 gnorm: 1.08 [12:17:36<12:16:52] +[titan] 2025-10-05 10:51:57,112 - root - INFO - step: 20015 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 10:51:57,113 - root - INFO - lr: 2.7829e-05 gnorm: 1.07 [12:17:47<12:16:40] +[titan] 2025-10-05 10:52:07,924 - root - INFO - step: 20020 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.47 mfu: 42.51% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8728 +[titan] 2025-10-05 10:52:07,925 - root - INFO - lr: 2.7820e-05 gnorm: 1.05 [12:17:57<12:16:29] +[titan] 2025-10-05 10:52:18,739 - root - INFO - step: 20025 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 10:52:18,739 - root - INFO - lr: 2.7811e-05 gnorm: 1.08 [12:18:08<12:16:18] +[titan] 2025-10-05 10:52:29,561 - root - INFO - step: 20030 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 10:52:29,561 - root - INFO - lr: 2.7803e-05 gnorm: 1.05 [12:18:19<12:16:06] +[titan] 2025-10-05 10:52:40,397 - root - INFO - step: 20035 loss: 2.1681 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:52:40,397 - root - INFO - lr: 2.7794e-05 gnorm: 1.09 [12:18:30<12:15:55] +[titan] 2025-10-05 10:52:51,270 - root - INFO - step: 20040 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:52:51,270 - root - INFO - lr: 2.7785e-05 gnorm: 1.08 [12:18:41<12:15:44] +[titan] 2025-10-05 10:53:02,099 - root - INFO - step: 20045 loss: 2.1535 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 10:53:02,099 - root - INFO - lr: 2.7776e-05 gnorm: 1.06 [12:18:52<12:15:33] +[titan] 2025-10-05 10:53:10,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:53:12,953 - root - INFO - step: 20050 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:53:12,953 - root - INFO - lr: 2.7767e-05 gnorm: 1.06 [12:19:02<12:15:21] +[titan] 2025-10-05 10:53:23,781 - root - INFO - step: 20055 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8880 +[titan] 2025-10-05 10:53:23,781 - root - INFO - lr: 2.7758e-05 gnorm: 1.07 [12:19:13<12:15:10] +[titan] 2025-10-05 10:53:34,615 - root - INFO - step: 20060 loss: 2.2260 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 10:53:34,615 - root - INFO - lr: 2.7749e-05 gnorm: 1.08 [12:19:24<12:14:59] +[titan] 2025-10-05 10:53:45,482 - root - INFO - step: 20065 loss: 2.1538 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9039 +[titan] 2025-10-05 10:53:45,482 - root - INFO - lr: 2.7740e-05 gnorm: 1.07 [12:19:35<12:14:47] +[titan] 2025-10-05 10:53:56,339 - root - INFO - step: 20070 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 10:53:56,339 - root - INFO - lr: 2.7731e-05 gnorm: 1.04 [12:19:46<12:14:36] +[titan] 2025-10-05 10:54:07,188 - root - INFO - step: 20075 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 10:54:07,188 - root - INFO - lr: 2.7722e-05 gnorm: 1.06 [12:19:57<12:14:25] +[titan] 2025-10-05 10:54:18,059 - root - INFO - step: 20080 loss: 2.1485 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:54:18,059 - root - INFO - lr: 2.7713e-05 gnorm: 1.06 [12:20:08<12:14:14] +[titan] 2025-10-05 10:54:28,894 - root - INFO - step: 20085 loss: 2.2267 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9673 +[titan] 2025-10-05 10:54:28,894 - root - INFO - lr: 2.7704e-05 gnorm: 1.85 [12:20:18<12:14:02] +[titan] 2025-10-05 10:54:39,760 - root - INFO - step: 20090 loss: 2.1383 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 10:54:39,760 - root - INFO - lr: 2.7695e-05 gnorm: 1.09 [12:20:29<12:13:51] +[titan] 2025-10-05 10:54:50,700 - root - INFO - step: 20095 loss: 2.1379 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8897 +[titan] 2025-10-05 10:54:50,700 - root - INFO - lr: 2.7687e-05 gnorm: 1.04 [12:20:40<12:13:40] +[titan] 2025-10-05 10:54:59,421 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:55:01,599 - root - INFO - step: 20100 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:55:01,599 - root - INFO - lr: 2.7678e-05 gnorm: 1.11 [12:20:51<12:13:29] +[titan] 2025-10-05 10:55:12,449 - root - INFO - step: 20105 loss: 2.1710 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 10:55:12,449 - root - INFO - lr: 2.7669e-05 gnorm: 1.03 [12:21:02<12:13:17] +[titan] 2025-10-05 10:55:23,313 - root - INFO - step: 20110 loss: 2.0931 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 10:55:23,313 - root - INFO - lr: 2.7660e-05 gnorm: 1.04 [12:21:13<12:13:06] +[titan] 2025-10-05 10:55:34,176 - root - INFO - step: 20115 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 10:55:34,176 - root - INFO - lr: 2.7651e-05 gnorm: 1.05 [12:21:24<12:12:55] +[titan] 2025-10-05 10:55:45,039 - root - INFO - step: 20120 loss: 2.1203 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 10:55:45,039 - root - INFO - lr: 2.7642e-05 gnorm: 1.06 [12:21:34<12:12:44] +[titan] 2025-10-05 10:55:55,943 - root - INFO - step: 20125 loss: 2.1150 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8697 +[titan] 2025-10-05 10:55:55,943 - root - INFO - lr: 2.7633e-05 gnorm: 1.05 [12:21:45<12:12:33] +[titan] 2025-10-05 10:56:06,800 - root - INFO - step: 20130 loss: 2.1880 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 10:56:06,800 - root - INFO - lr: 2.7624e-05 gnorm: 1.08 [12:21:56<12:12:21] +[titan] 2025-10-05 10:56:17,695 - root - INFO - step: 20135 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8794 +[titan] 2025-10-05 10:56:17,696 - root - INFO - lr: 2.7615e-05 gnorm: 1.08 [12:22:07<12:12:10] +[titan] 2025-10-05 10:56:28,544 - root - INFO - step: 20140 loss: 2.1589 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9087 +[titan] 2025-10-05 10:56:28,544 - root - INFO - lr: 2.7606e-05 gnorm: 1.04 [12:22:18<12:11:59] +[titan] 2025-10-05 10:56:39,421 - root - INFO - step: 20145 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8560 +[titan] 2025-10-05 10:56:39,422 - root - INFO - lr: 2.7597e-05 gnorm: 1.08 [12:22:29<12:11:48] +[titan] 2025-10-05 10:56:48,102 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:56:50,277 - root - INFO - step: 20150 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:56:50,277 - root - INFO - lr: 2.7588e-05 gnorm: 1.05 [12:22:40<12:11:36] +[titan] 2025-10-05 10:57:01,155 - root - INFO - step: 20155 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 10:57:01,155 - root - INFO - lr: 2.7579e-05 gnorm: 1.09 [12:22:51<12:11:25] +[titan] 2025-10-05 10:57:12,015 - root - INFO - step: 20160 loss: 2.1842 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 10:57:12,015 - root - INFO - lr: 2.7571e-05 gnorm: 1.05 [12:23:01<12:11:14] +[titan] 2025-10-05 10:57:22,907 - root - INFO - step: 20165 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 10:57:22,907 - root - INFO - lr: 2.7562e-05 gnorm: 1.05 [12:23:12<12:11:03] +[titan] 2025-10-05 10:57:33,769 - root - INFO - step: 20170 loss: 2.1734 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9209 +[titan] 2025-10-05 10:57:33,769 - root - INFO - lr: 2.7553e-05 gnorm: 1.10 [12:23:23<12:10:51] +[titan] 2025-10-05 10:57:44,629 - root - INFO - step: 20175 loss: 2.1616 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:57:44,629 - root - INFO - lr: 2.7544e-05 gnorm: 1.10 [12:23:34<12:10:40] +[titan] 2025-10-05 10:57:55,575 - root - INFO - step: 20180 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 10:57:55,575 - root - INFO - lr: 2.7535e-05 gnorm: 1.09 [12:23:45<12:10:29] +[titan] 2025-10-05 10:58:06,449 - root - INFO - step: 20185 loss: 2.0747 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 10:58:06,449 - root - INFO - lr: 2.7526e-05 gnorm: 1.09 [12:23:56<12:10:18] +[titan] 2025-10-05 10:58:17,339 - root - INFO - step: 20190 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 10:58:17,339 - root - INFO - lr: 2.7517e-05 gnorm: 1.11 [12:24:07<12:10:06] +[titan] 2025-10-05 10:58:28,224 - root - INFO - step: 20195 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 10:58:28,224 - root - INFO - lr: 2.7508e-05 gnorm: 1.09 [12:24:18<12:09:55] +[titan] 2025-10-05 10:58:36,915 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:58:39,105 - root - INFO - step: 20200 loss: 2.1272 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 10:58:39,105 - root - INFO - lr: 2.7499e-05 gnorm: 1.10 [12:24:29<12:09:44] +[titan] 2025-10-05 10:58:49,983 - root - INFO - step: 20205 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9320 +[titan] 2025-10-05 10:58:49,983 - root - INFO - lr: 2.7490e-05 gnorm: 1.10 [12:24:39<12:09:33] +[titan] 2025-10-05 10:59:00,935 - root - INFO - step: 20210 loss: 2.0945 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 10:59:00,935 - root - INFO - lr: 2.7481e-05 gnorm: 1.07 [12:24:50<12:09:22] +[titan] 2025-10-05 10:59:11,794 - root - INFO - step: 20215 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:59:11,794 - root - INFO - lr: 2.7472e-05 gnorm: 1.08 [12:25:01<12:09:10] +[titan] 2025-10-05 10:59:22,679 - root - INFO - step: 20220 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9899 +[titan] 2025-10-05 10:59:22,679 - root - INFO - lr: 2.7463e-05 gnorm: 1.09 [12:25:12<12:08:59] +[titan] 2025-10-05 10:59:33,536 - root - INFO - step: 20225 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 10:59:33,536 - root - INFO - lr: 2.7454e-05 gnorm: 1.10 [12:25:23<12:08:48] +[titan] 2025-10-05 10:59:44,381 - root - INFO - step: 20230 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 10:59:44,381 - root - INFO - lr: 2.7446e-05 gnorm: 1.07 [12:25:34<12:08:37] +[titan] 2025-10-05 10:59:55,275 - root - INFO - step: 20235 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8518 +[titan] 2025-10-05 10:59:55,275 - root - INFO - lr: 2.7437e-05 gnorm: 1.10 [12:25:45<12:08:25] +[titan] 2025-10-05 11:00:06,163 - root - INFO - step: 20240 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8998 +[titan] 2025-10-05 11:00:06,164 - root - INFO - lr: 2.7428e-05 gnorm: 1.09 [12:25:56<12:08:14] +[titan] 2025-10-05 11:00:17,039 - root - INFO - step: 20245 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 11:00:17,040 - root - INFO - lr: 2.7419e-05 gnorm: 1.06 [12:26:06<12:08:03] +[titan] 2025-10-05 11:00:25,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:00:27,899 - root - INFO - step: 20250 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 11:00:27,900 - root - INFO - lr: 2.7410e-05 gnorm: 1.06 [12:26:17<12:07:52] +[titan] 2025-10-05 11:00:38,739 - root - INFO - step: 20255 loss: 2.1856 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 11:00:38,739 - root - INFO - lr: 2.7401e-05 gnorm: 1.07 [12:26:28<12:07:40] +[titan] 2025-10-05 11:00:49,595 - root - INFO - step: 20260 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 11:00:49,595 - root - INFO - lr: 2.7392e-05 gnorm: 1.05 [12:26:39<12:07:29] +[titan] 2025-10-05 11:01:00,505 - root - INFO - step: 20265 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 11:01:00,505 - root - INFO - lr: 2.7383e-05 gnorm: 1.05 [12:26:50<12:07:18] +[titan] 2025-10-05 11:01:11,382 - root - INFO - step: 20270 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8446 +[titan] 2025-10-05 11:01:11,382 - root - INFO - lr: 2.7374e-05 gnorm: 1.08 [12:27:01<12:07:07] +[titan] 2025-10-05 11:01:22,284 - root - INFO - step: 20275 loss: 2.1344 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:01:22,284 - root - INFO - lr: 2.7365e-05 gnorm: 1.10 [12:27:12<12:06:56] +[titan] 2025-10-05 11:01:33,138 - root - INFO - step: 20280 loss: 2.1211 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:01:33,138 - root - INFO - lr: 2.7356e-05 gnorm: 1.03 [12:27:23<12:06:44] +[titan] 2025-10-05 11:01:44,002 - root - INFO - step: 20285 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:01:44,002 - root - INFO - lr: 2.7347e-05 gnorm: 1.05 [12:27:33<12:06:33] +[titan] 2025-10-05 11:01:54,890 - root - INFO - step: 20290 loss: 2.1434 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 11:01:54,890 - root - INFO - lr: 2.7338e-05 gnorm: 1.08 [12:27:44<12:06:22] +[titan] 2025-10-05 11:02:06,133 - root - INFO - step: 20295 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 29,147 tflops: 404.38 mfu: 40.89% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 11:02:06,133 - root - INFO - lr: 2.7330e-05 gnorm: 1.06 [12:27:56<12:06:11] +[titan] 2025-10-05 11:02:14,822 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:02:17,010 - root - INFO - step: 20300 loss: 2.1482 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 11:02:17,010 - root - INFO - lr: 2.7321e-05 gnorm: 1.33 [12:28:06<12:06:00] +[titan] 2025-10-05 11:02:27,926 - root - INFO - step: 20305 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 11:02:27,927 - root - INFO - lr: 2.7312e-05 gnorm: 1.05 [12:28:17<12:05:49] +[titan] 2025-10-05 11:02:38,794 - root - INFO - step: 20310 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8580 +[titan] 2025-10-05 11:02:38,794 - root - INFO - lr: 2.7303e-05 gnorm: 1.02 [12:28:28<12:05:37] +[titan] 2025-10-05 11:02:49,655 - root - INFO - step: 20315 loss: 2.1038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:02:49,655 - root - INFO - lr: 2.7294e-05 gnorm: 1.06 [12:28:39<12:05:26] +[titan] 2025-10-05 11:03:00,551 - root - INFO - step: 20320 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 11:03:00,551 - root - INFO - lr: 2.7285e-05 gnorm: 1.07 [12:28:50<12:05:15] +[titan] 2025-10-05 11:03:11,416 - root - INFO - step: 20325 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9040 +[titan] 2025-10-05 11:03:11,417 - root - INFO - lr: 2.7276e-05 gnorm: 1.04 [12:29:01<12:05:04] +[titan] 2025-10-05 11:03:22,259 - root - INFO - step: 20330 loss: 2.1001 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8555 +[titan] 2025-10-05 11:03:22,259 - root - INFO - lr: 2.7267e-05 gnorm: 1.07 [12:29:12<12:04:52] +[titan] 2025-10-05 11:03:33,113 - root - INFO - step: 20335 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8112 +[titan] 2025-10-05 11:03:33,113 - root - INFO - lr: 2.7258e-05 gnorm: 1.06 [12:29:23<12:04:41] +[titan] 2025-10-05 11:03:44,014 - root - INFO - step: 20340 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 11:03:44,014 - root - INFO - lr: 2.7249e-05 gnorm: 1.02 [12:29:33<12:04:30] +[titan] 2025-10-05 11:03:54,889 - root - INFO - step: 20345 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9095 +[titan] 2025-10-05 11:03:54,889 - root - INFO - lr: 2.7240e-05 gnorm: 1.05 [12:29:44<12:04:19] +[titan] 2025-10-05 11:04:03,595 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:04:05,779 - root - INFO - step: 20350 loss: 2.1910 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9371 +[titan] 2025-10-05 11:04:05,779 - root - INFO - lr: 2.7231e-05 gnorm: 1.07 [12:29:55<12:04:07] +[titan] 2025-10-05 11:04:16,637 - root - INFO - step: 20355 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 11:04:16,638 - root - INFO - lr: 2.7222e-05 gnorm: 1.05 [12:30:06<12:03:56] +[titan] 2025-10-05 11:04:27,458 - root - INFO - step: 20360 loss: 2.1358 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8882 +[titan] 2025-10-05 11:04:27,458 - root - INFO - lr: 2.7214e-05 gnorm: 1.06 [12:30:17<12:03:45] +[titan] 2025-10-05 11:04:38,299 - root - INFO - step: 20365 loss: 2.1403 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 11:04:38,299 - root - INFO - lr: 2.7205e-05 gnorm: 1.10 [12:30:28<12:03:34] +[titan] 2025-10-05 11:04:49,208 - root - INFO - step: 20370 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 11:04:49,208 - root - INFO - lr: 2.7196e-05 gnorm: 1.09 [12:30:39<12:03:22] +[titan] 2025-10-05 11:05:00,089 - root - INFO - step: 20375 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:05:00,089 - root - INFO - lr: 2.7187e-05 gnorm: 1.06 [12:30:50<12:03:11] +[titan] 2025-10-05 11:05:10,946 - root - INFO - step: 20380 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:05:10,946 - root - INFO - lr: 2.7178e-05 gnorm: 1.11 [12:31:00<12:03:00] +[titan] 2025-10-05 11:05:21,800 - root - INFO - step: 20385 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:05:21,800 - root - INFO - lr: 2.7169e-05 gnorm: 1.08 [12:31:11<12:02:49] +[titan] 2025-10-05 11:05:32,664 - root - INFO - step: 20390 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 11:05:32,664 - root - INFO - lr: 2.7160e-05 gnorm: 1.05 [12:31:22<12:02:38] +[titan] 2025-10-05 11:05:43,530 - root - INFO - step: 20395 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 11:05:43,530 - root - INFO - lr: 2.7151e-05 gnorm: 1.10 [12:31:33<12:02:26] +[titan] 2025-10-05 11:05:52,200 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:05:54,413 - root - INFO - step: 20400 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 11:05:54,413 - root - INFO - lr: 2.7142e-05 gnorm: 1.05 [12:31:44<12:02:15] +[titan] 2025-10-05 11:06:05,284 - root - INFO - step: 20405 loss: 2.1600 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 11:06:05,284 - root - INFO - lr: 2.7133e-05 gnorm: 1.08 [12:31:55<12:02:04] +[titan] 2025-10-05 11:06:16,130 - root - INFO - step: 20410 loss: 2.1684 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 11:06:16,130 - root - INFO - lr: 2.7124e-05 gnorm: 1.07 [12:32:06<12:01:53] +[titan] 2025-10-05 11:06:26,974 - root - INFO - step: 20415 loss: 2.1914 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:06:26,975 - root - INFO - lr: 2.7115e-05 gnorm: 1.09 [12:32:16<12:01:41] +[titan] 2025-10-05 11:06:37,832 - root - INFO - step: 20420 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 11:06:37,832 - root - INFO - lr: 2.7106e-05 gnorm: 1.09 [12:32:27<12:01:30] +[titan] 2025-10-05 11:06:48,689 - root - INFO - step: 20425 loss: 2.1157 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 11:06:48,689 - root - INFO - lr: 2.7098e-05 gnorm: 1.08 [12:32:38<12:01:19] +[titan] 2025-10-05 11:06:59,539 - root - INFO - step: 20430 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 11:06:59,539 - root - INFO - lr: 2.7089e-05 gnorm: 1.05 [12:32:49<12:01:08] +[titan] 2025-10-05 11:07:10,461 - root - INFO - step: 20435 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 11:07:10,461 - root - INFO - lr: 2.7080e-05 gnorm: 1.06 [12:33:00<12:00:56] +[titan] 2025-10-05 11:07:21,318 - root - INFO - step: 20440 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:07:21,318 - root - INFO - lr: 2.7071e-05 gnorm: 1.07 [12:33:11<12:00:45] +[titan] 2025-10-05 11:07:32,168 - root - INFO - step: 20445 loss: 2.0912 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:07:32,168 - root - INFO - lr: 2.7062e-05 gnorm: 1.09 [12:33:22<12:00:34] +[titan] 2025-10-05 11:07:40,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:07:43,023 - root - INFO - step: 20450 loss: 2.1251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 11:07:43,023 - root - INFO - lr: 2.7053e-05 gnorm: 1.07 [12:33:32<12:00:23] +[titan] 2025-10-05 11:07:53,870 - root - INFO - step: 20455 loss: 2.1649 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 11:07:53,871 - root - INFO - lr: 2.7044e-05 gnorm: 1.07 [12:33:43<12:00:11] +[titan] 2025-10-05 11:08:04,763 - root - INFO - step: 20460 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 11:08:04,763 - root - INFO - lr: 2.7035e-05 gnorm: 1.03 [12:33:54<12:00:00] +[titan] 2025-10-05 11:08:15,662 - root - INFO - step: 20465 loss: 2.1274 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 11:08:15,662 - root - INFO - lr: 2.7026e-05 gnorm: 1.03 [12:34:05<11:59:49] +[titan] 2025-10-05 11:08:26,490 - root - INFO - step: 20470 loss: 2.1025 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8578 +[titan] 2025-10-05 11:08:26,490 - root - INFO - lr: 2.7017e-05 gnorm: 1.06 [12:34:16<11:59:38] +[titan] 2025-10-05 11:08:37,320 - root - INFO - step: 20475 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 11:08:37,321 - root - INFO - lr: 2.7008e-05 gnorm: 1.11 [12:34:27<11:59:26] +[titan] 2025-10-05 11:08:48,242 - root - INFO - step: 20480 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:08:48,242 - root - INFO - lr: 2.6999e-05 gnorm: 1.04 [12:34:38<11:59:15] +[titan] 2025-10-05 11:08:48,419 - root - INFO - Dumping profiler traces at step 20480 +[titan] 2025-10-05 11:08:48,457 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:08:59,308 - root - INFO - step: 20485 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 29,611 tflops: 410.81 mfu: 41.54% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 11:08:59,308 - root - INFO - lr: 2.6990e-05 gnorm: 1.06 [12:34:49<11:59:04] +[titan] 2025-10-05 11:09:10,168 - root - INFO - step: 20490 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.8976 +[titan] 2025-10-05 11:09:10,168 - root - INFO - lr: 2.6982e-05 gnorm: 1.06 [12:35:00<11:58:53] +[titan] 2025-10-05 11:09:21,026 - root - INFO - step: 20495 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9159 +[titan] 2025-10-05 11:09:21,027 - root - INFO - lr: 2.6973e-05 gnorm: 1.10 [12:35:10<11:58:42] +[titan] 2025-10-05 11:09:29,736 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:09:31,923 - root - INFO - step: 20500 loss: 2.0830 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 11:09:31,923 - root - INFO - lr: 2.6964e-05 gnorm: 1.09 [12:35:21<11:58:31] +[titan] 2025-10-05 11:09:42,776 - root - INFO - step: 20505 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8339 +[titan] 2025-10-05 11:09:42,776 - root - INFO - lr: 2.6955e-05 gnorm: 1.10 [12:35:32<11:58:19] +[titan] 2025-10-05 11:09:53,605 - root - INFO - step: 20510 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8968 +[titan] 2025-10-05 11:09:53,605 - root - INFO - lr: 2.6946e-05 gnorm: 1.06 [12:35:43<11:58:08] +[titan] 2025-10-05 11:10:04,473 - root - INFO - step: 20515 loss: 2.1247 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8771 +[titan] 2025-10-05 11:10:04,473 - root - INFO - lr: 2.6937e-05 gnorm: 1.06 [12:35:54<11:57:57] +[titan] 2025-10-05 11:10:15,308 - root - INFO - step: 20520 loss: 2.1987 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.58 mfu: 42.43% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9434 +[titan] 2025-10-05 11:10:15,308 - root - INFO - lr: 2.6928e-05 gnorm: 1.06 [12:36:05<11:57:46] +[titan] 2025-10-05 11:10:26,169 - root - INFO - step: 20525 loss: 2.1470 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8974 +[titan] 2025-10-05 11:10:26,170 - root - INFO - lr: 2.6919e-05 gnorm: 1.04 [12:36:16<11:57:34] +[titan] 2025-10-05 11:10:37,027 - root - INFO - step: 20530 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8850 +[titan] 2025-10-05 11:10:37,027 - root - INFO - lr: 2.6910e-05 gnorm: 1.13 [12:36:26<11:57:23] +[titan] 2025-10-05 11:10:47,875 - root - INFO - step: 20535 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 11:10:47,875 - root - INFO - lr: 2.6901e-05 gnorm: 1.03 [12:36:37<11:57:12] +[titan] 2025-10-05 11:10:58,732 - root - INFO - step: 20540 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:10:58,733 - root - INFO - lr: 2.6892e-05 gnorm: 1.06 [12:36:48<11:57:01] +[titan] 2025-10-05 11:11:09,619 - root - INFO - step: 20545 loss: 2.1707 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:11:09,619 - root - INFO - lr: 2.6883e-05 gnorm: 1.10 [12:36:59<11:56:49] +[titan] 2025-10-05 11:11:18,307 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:11:20,486 - root - INFO - step: 20550 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 11:11:20,487 - root - INFO - lr: 2.6874e-05 gnorm: 2.06 [12:37:10<11:56:38] +[titan] 2025-10-05 11:11:31,328 - root - INFO - step: 20555 loss: 2.2027 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 11:11:31,329 - root - INFO - lr: 2.6866e-05 gnorm: 1.09 [12:37:21<11:56:27] +[titan] 2025-10-05 11:11:42,212 - root - INFO - step: 20560 loss: 2.0837 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 11:11:42,212 - root - INFO - lr: 2.6857e-05 gnorm: 1.05 [12:37:32<11:56:16] +[titan] 2025-10-05 11:11:53,051 - root - INFO - step: 20565 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 11:11:53,051 - root - INFO - lr: 2.6848e-05 gnorm: 1.08 [12:37:42<11:56:04] +[titan] 2025-10-05 11:12:03,886 - root - INFO - step: 20570 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 11:12:03,887 - root - INFO - lr: 2.6839e-05 gnorm: 1.14 [12:37:53<11:55:53] +[titan] 2025-10-05 11:12:14,773 - root - INFO - step: 20575 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 11:12:14,773 - root - INFO - lr: 2.6830e-05 gnorm: 1.09 [12:38:04<11:55:42] +[titan] 2025-10-05 11:12:25,620 - root - INFO - step: 20580 loss: 2.0736 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8329 +[titan] 2025-10-05 11:12:25,620 - root - INFO - lr: 2.6821e-05 gnorm: 1.09 [12:38:15<11:55:31] +[titan] 2025-10-05 11:12:36,467 - root - INFO - step: 20585 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 11:12:36,467 - root - INFO - lr: 2.6812e-05 gnorm: 1.05 [12:38:26<11:55:19] +[titan] 2025-10-05 11:12:47,318 - root - INFO - step: 20590 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 11:12:47,318 - root - INFO - lr: 2.6803e-05 gnorm: 1.07 [12:38:37<11:55:08] +[titan] 2025-10-05 11:12:58,203 - root - INFO - step: 20595 loss: 2.1151 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8687 +[titan] 2025-10-05 11:12:58,203 - root - INFO - lr: 2.6794e-05 gnorm: 1.07 [12:38:48<11:54:57] +[titan] 2025-10-05 11:13:06,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:13:09,064 - root - INFO - step: 20600 loss: 2.1894 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 11:13:09,064 - root - INFO - lr: 2.6785e-05 gnorm: 1.09 [12:38:58<11:54:46] +[titan] 2025-10-05 11:13:19,929 - root - INFO - step: 20605 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 11:13:19,929 - root - INFO - lr: 2.6776e-05 gnorm: 1.07 [12:39:09<11:54:34] +[titan] 2025-10-05 11:13:30,796 - root - INFO - step: 20610 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 11:13:30,796 - root - INFO - lr: 2.6767e-05 gnorm: 1.06 [12:39:20<11:54:23] +[titan] 2025-10-05 11:13:41,654 - root - INFO - step: 20615 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8398 +[titan] 2025-10-05 11:13:41,654 - root - INFO - lr: 2.6758e-05 gnorm: 1.03 [12:39:31<11:54:12] +[titan] 2025-10-05 11:13:52,508 - root - INFO - step: 20620 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 11:13:52,508 - root - INFO - lr: 2.6750e-05 gnorm: 1.06 [12:39:42<11:54:01] +[titan] 2025-10-05 11:14:03,381 - root - INFO - step: 20625 loss: 2.1197 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 11:14:03,381 - root - INFO - lr: 2.6741e-05 gnorm: 1.06 [12:39:53<11:53:50] +[titan] 2025-10-05 11:14:14,251 - root - INFO - step: 20630 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:14:14,251 - root - INFO - lr: 2.6732e-05 gnorm: 1.06 [12:40:04<11:53:38] +[titan] 2025-10-05 11:14:25,097 - root - INFO - step: 20635 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:14:25,098 - root - INFO - lr: 2.6723e-05 gnorm: 1.08 [12:40:15<11:53:27] +[titan] 2025-10-05 11:14:35,947 - root - INFO - step: 20640 loss: 2.0980 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8548 +[titan] 2025-10-05 11:14:35,947 - root - INFO - lr: 2.6714e-05 gnorm: 1.09 [12:40:25<11:53:16] +[titan] 2025-10-05 11:14:46,798 - root - INFO - step: 20645 loss: 2.1242 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8770 +[titan] 2025-10-05 11:14:46,799 - root - INFO - lr: 2.6705e-05 gnorm: 1.09 [12:40:36<11:53:05] +[titan] 2025-10-05 11:14:55,473 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:14:57,653 - root - INFO - step: 20650 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:14:57,653 - root - INFO - lr: 2.6696e-05 gnorm: 1.08 [12:40:47<11:52:53] +[titan] 2025-10-05 11:15:08,530 - root - INFO - step: 20655 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 11:15:08,530 - root - INFO - lr: 2.6687e-05 gnorm: 1.08 [12:40:58<11:52:42] +[titan] 2025-10-05 11:15:19,423 - root - INFO - step: 20660 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 11:15:19,423 - root - INFO - lr: 2.6678e-05 gnorm: 1.15 [12:41:09<11:52:31] +[titan] 2025-10-05 11:15:30,279 - root - INFO - step: 20665 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:15:30,279 - root - INFO - lr: 2.6669e-05 gnorm: 1.06 [12:41:20<11:52:20] +[titan] 2025-10-05 11:15:41,155 - root - INFO - step: 20670 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9037 +[titan] 2025-10-05 11:15:41,156 - root - INFO - lr: 2.6660e-05 gnorm: 1.05 [12:41:31<11:52:08] +[titan] 2025-10-05 11:15:52,007 - root - INFO - step: 20675 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 11:15:52,007 - root - INFO - lr: 2.6651e-05 gnorm: 1.04 [12:41:41<11:51:57] +[titan] 2025-10-05 11:16:02,840 - root - INFO - step: 20680 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:16:02,840 - root - INFO - lr: 2.6643e-05 gnorm: 1.03 [12:41:52<11:51:46] +[titan] 2025-10-05 11:16:13,755 - root - INFO - step: 20685 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9115 +[titan] 2025-10-05 11:16:13,756 - root - INFO - lr: 2.6634e-05 gnorm: 1.04 [12:42:03<11:51:35] +[titan] 2025-10-05 11:16:24,631 - root - INFO - step: 20690 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:16:24,632 - root - INFO - lr: 2.6625e-05 gnorm: 1.05 [12:42:14<11:51:24] +[titan] 2025-10-05 11:16:35,463 - root - INFO - step: 20695 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 11:16:35,464 - root - INFO - lr: 2.6616e-05 gnorm: 1.10 [12:42:25<11:51:12] +[titan] 2025-10-05 11:16:44,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:16:46,315 - root - INFO - step: 20700 loss: 2.1496 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:16:46,315 - root - INFO - lr: 2.6607e-05 gnorm: 1.10 [12:42:36<11:51:01] +[titan] 2025-10-05 11:16:57,157 - root - INFO - step: 20705 loss: 2.0983 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 11:16:57,157 - root - INFO - lr: 2.6598e-05 gnorm: 1.04 [12:42:47<11:50:50] +[titan] 2025-10-05 11:17:08,007 - root - INFO - step: 20710 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 11:17:08,007 - root - INFO - lr: 2.6589e-05 gnorm: 1.07 [12:42:57<11:50:39] +[titan] 2025-10-05 11:17:18,892 - root - INFO - step: 20715 loss: 2.1366 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8867 +[titan] 2025-10-05 11:17:18,892 - root - INFO - lr: 2.6580e-05 gnorm: 1.14 [12:43:08<11:50:27] +[titan] 2025-10-05 11:17:29,767 - root - INFO - step: 20720 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:17:29,768 - root - INFO - lr: 2.6571e-05 gnorm: 1.04 [12:43:19<11:50:16] +[titan] 2025-10-05 11:17:40,628 - root - INFO - step: 20725 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9005 +[titan] 2025-10-05 11:17:40,628 - root - INFO - lr: 2.6562e-05 gnorm: 1.09 [12:43:30<11:50:05] +[titan] 2025-10-05 11:17:51,474 - root - INFO - step: 20730 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:17:51,474 - root - INFO - lr: 2.6553e-05 gnorm: 1.10 [12:43:41<11:49:54] +[titan] 2025-10-05 11:18:02,326 - root - INFO - step: 20735 loss: 2.1204 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:18:02,326 - root - INFO - lr: 2.6544e-05 gnorm: 1.06 [12:43:52<11:49:42] +[titan] 2025-10-05 11:18:13,213 - root - INFO - step: 20740 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8841 +[titan] 2025-10-05 11:18:13,213 - root - INFO - lr: 2.6536e-05 gnorm: 1.08 [12:44:03<11:49:31] +[titan] 2025-10-05 11:18:24,093 - root - INFO - step: 20745 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 11:18:24,093 - root - INFO - lr: 2.6527e-05 gnorm: 1.05 [12:44:13<11:49:20] +[titan] 2025-10-05 11:18:32,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:18:34,984 - root - INFO - step: 20750 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:18:34,984 - root - INFO - lr: 2.6518e-05 gnorm: 1.06 [12:44:24<11:49:09] +[titan] 2025-10-05 11:18:45,854 - root - INFO - step: 20755 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:18:45,854 - root - INFO - lr: 2.6509e-05 gnorm: 1.09 [12:44:35<11:48:58] +[titan] 2025-10-05 11:18:56,673 - root - INFO - step: 20760 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:18:56,674 - root - INFO - lr: 2.6500e-05 gnorm: 1.04 [12:44:46<11:48:46] +[titan] 2025-10-05 11:19:07,503 - root - INFO - step: 20765 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8648 +[titan] 2025-10-05 11:19:07,504 - root - INFO - lr: 2.6491e-05 gnorm: 1.08 [12:44:57<11:48:35] +[titan] 2025-10-05 11:19:18,411 - root - INFO - step: 20770 loss: 2.2056 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9489 +[titan] 2025-10-05 11:19:18,411 - root - INFO - lr: 2.6482e-05 gnorm: 1.12 [12:45:08<11:48:24] +[titan] 2025-10-05 11:19:29,234 - root - INFO - step: 20775 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8766 +[titan] 2025-10-05 11:19:29,234 - root - INFO - lr: 2.6473e-05 gnorm: 1.05 [12:45:19<11:48:13] +[titan] 2025-10-05 11:19:40,065 - root - INFO - step: 20780 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 11:19:40,065 - root - INFO - lr: 2.6464e-05 gnorm: 1.08 [12:45:29<11:48:01] +[titan] 2025-10-05 11:19:50,928 - root - INFO - step: 20785 loss: 2.1284 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:19:50,928 - root - INFO - lr: 2.6455e-05 gnorm: 1.03 [12:45:40<11:47:50] +[titan] 2025-10-05 11:20:01,769 - root - INFO - step: 20790 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 11:20:01,769 - root - INFO - lr: 2.6446e-05 gnorm: 1.07 [12:45:51<11:47:39] +[titan] 2025-10-05 11:20:12,646 - root - INFO - step: 20795 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 11:20:12,646 - root - INFO - lr: 2.6437e-05 gnorm: 1.10 [12:46:02<11:47:28] +[titan] 2025-10-05 11:20:21,353 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:20:23,533 - root - INFO - step: 20800 loss: 2.0768 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 11:20:23,533 - root - INFO - lr: 2.6429e-05 gnorm: 1.06 [12:46:13<11:47:17] +[titan] 2025-10-05 11:20:34,392 - root - INFO - step: 20805 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:20:34,392 - root - INFO - lr: 2.6420e-05 gnorm: 1.09 [12:46:24<11:47:05] +[titan] 2025-10-05 11:20:45,231 - root - INFO - step: 20810 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 11:20:45,231 - root - INFO - lr: 2.6411e-05 gnorm: 1.04 [12:46:35<11:46:54] +[titan] 2025-10-05 11:20:56,074 - root - INFO - step: 20815 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:20:56,074 - root - INFO - lr: 2.6402e-05 gnorm: 1.08 [12:46:45<11:46:43] +[titan] 2025-10-05 11:21:06,980 - root - INFO - step: 20820 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:21:06,980 - root - INFO - lr: 2.6393e-05 gnorm: 1.07 [12:46:56<11:46:32] +[titan] 2025-10-05 11:21:17,884 - root - INFO - step: 20825 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 11:21:17,884 - root - INFO - lr: 2.6384e-05 gnorm: 1.08 [12:47:07<11:46:20] +[titan] 2025-10-05 11:21:28,741 - root - INFO - step: 20830 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:21:28,741 - root - INFO - lr: 2.6375e-05 gnorm: 1.08 [12:47:18<11:46:09] +[titan] 2025-10-05 11:21:39,613 - root - INFO - step: 20835 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8881 +[titan] 2025-10-05 11:21:39,613 - root - INFO - lr: 2.6366e-05 gnorm: 1.07 [12:47:29<11:45:58] +[titan] 2025-10-05 11:21:50,471 - root - INFO - step: 20840 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 11:21:50,471 - root - INFO - lr: 2.6357e-05 gnorm: 1.03 [12:47:40<11:45:47] +[titan] 2025-10-05 11:22:01,325 - root - INFO - step: 20845 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:22:01,325 - root - INFO - lr: 2.6348e-05 gnorm: 1.07 [12:47:51<11:45:36] +[titan] 2025-10-05 11:22:10,045 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:22:12,228 - root - INFO - step: 20850 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:22:12,228 - root - INFO - lr: 2.6339e-05 gnorm: 1.01 [12:48:02<11:45:24] +[titan] 2025-10-05 11:22:23,145 - root - INFO - step: 20855 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 11:22:23,145 - root - INFO - lr: 2.6330e-05 gnorm: 1.08 [12:48:13<11:45:13] +[titan] 2025-10-05 11:22:33,976 - root - INFO - step: 20860 loss: 2.1509 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:22:33,976 - root - INFO - lr: 2.6322e-05 gnorm: 1.08 [12:48:23<11:45:02] +[titan] 2025-10-05 11:22:44,818 - root - INFO - step: 20865 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 11:22:44,819 - root - INFO - lr: 2.6313e-05 gnorm: 1.08 [12:48:34<11:44:51] +[titan] 2025-10-05 11:22:55,670 - root - INFO - step: 20870 loss: 2.1029 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 11:22:55,671 - root - INFO - lr: 2.6304e-05 gnorm: 1.04 [12:48:45<11:44:39] +[titan] 2025-10-05 11:23:06,495 - root - INFO - step: 20875 loss: 2.1668 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 11:23:06,495 - root - INFO - lr: 2.6295e-05 gnorm: 1.03 [12:48:56<11:44:28] +[titan] 2025-10-05 11:23:17,425 - root - INFO - step: 20880 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 11:23:17,426 - root - INFO - lr: 2.6286e-05 gnorm: 1.06 [12:49:07<11:44:17] +[titan] 2025-10-05 11:23:28,304 - root - INFO - step: 20885 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:23:28,305 - root - INFO - lr: 2.6277e-05 gnorm: 1.02 [12:49:18<11:44:06] +[titan] 2025-10-05 11:23:39,146 - root - INFO - step: 20890 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 11:23:39,147 - root - INFO - lr: 2.6268e-05 gnorm: 1.04 [12:49:29<11:43:55] +[titan] 2025-10-05 11:23:50,019 - root - INFO - step: 20895 loss: 2.1373 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:23:50,019 - root - INFO - lr: 2.6259e-05 gnorm: 1.05 [12:49:39<11:43:43] +[titan] 2025-10-05 11:23:58,682 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:24:00,862 - root - INFO - step: 20900 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 11:24:00,862 - root - INFO - lr: 2.6250e-05 gnorm: 1.08 [12:49:50<11:43:32] +[titan] 2025-10-05 11:24:11,693 - root - INFO - step: 20905 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8801 +[titan] 2025-10-05 11:24:11,693 - root - INFO - lr: 2.6241e-05 gnorm: 1.09 [12:50:01<11:43:21] +[titan] 2025-10-05 11:24:22,592 - root - INFO - step: 20910 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8505 +[titan] 2025-10-05 11:24:22,592 - root - INFO - lr: 2.6232e-05 gnorm: 1.06 [12:50:12<11:43:10] +[titan] 2025-10-05 11:24:33,463 - root - INFO - step: 20915 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 11:24:33,463 - root - INFO - lr: 2.6224e-05 gnorm: 1.05 [12:50:23<11:42:58] +[titan] 2025-10-05 11:24:44,313 - root - INFO - step: 20920 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:24:44,313 - root - INFO - lr: 2.6215e-05 gnorm: 1.05 [12:50:34<11:42:47] +[titan] 2025-10-05 11:24:55,176 - root - INFO - step: 20925 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8450 +[titan] 2025-10-05 11:24:55,176 - root - INFO - lr: 2.6206e-05 gnorm: 1.05 [12:50:45<11:42:36] +[titan] 2025-10-05 11:25:06,030 - root - INFO - step: 20930 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8992 +[titan] 2025-10-05 11:25:06,030 - root - INFO - lr: 2.6197e-05 gnorm: 1.10 [12:50:55<11:42:25] +[titan] 2025-10-05 11:25:16,898 - root - INFO - step: 20935 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8741 +[titan] 2025-10-05 11:25:16,898 - root - INFO - lr: 2.6188e-05 gnorm: 1.05 [12:51:06<11:42:14] +[titan] 2025-10-05 11:25:27,781 - root - INFO - step: 20940 loss: 2.1440 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:25:27,781 - root - INFO - lr: 2.6179e-05 gnorm: 1.04 [12:51:17<11:42:02] +[titan] 2025-10-05 11:25:38,668 - root - INFO - step: 20945 loss: 2.1635 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 11:25:38,668 - root - INFO - lr: 2.6170e-05 gnorm: 1.04 [12:51:28<11:41:51] +[titan] 2025-10-05 11:25:47,372 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:25:49,579 - root - INFO - step: 20950 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8588 +[titan] 2025-10-05 11:25:49,579 - root - INFO - lr: 2.6161e-05 gnorm: 1.02 [12:51:39<11:41:40] +[titan] 2025-10-05 11:26:00,466 - root - INFO - step: 20955 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9476 +[titan] 2025-10-05 11:26:00,466 - root - INFO - lr: 2.6152e-05 gnorm: 1.08 [12:51:50<11:41:29] +[titan] 2025-10-05 11:26:11,358 - root - INFO - step: 20960 loss: 2.1680 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 11:26:11,358 - root - INFO - lr: 2.6143e-05 gnorm: 1.07 [12:52:01<11:41:18] +[titan] 2025-10-05 11:26:22,285 - root - INFO - step: 20965 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 11:26:22,285 - root - INFO - lr: 2.6134e-05 gnorm: 1.03 [12:52:12<11:41:06] +[titan] 2025-10-05 11:26:33,153 - root - INFO - step: 20970 loss: 2.0712 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 11:26:33,153 - root - INFO - lr: 2.6126e-05 gnorm: 1.04 [12:52:23<11:40:55] +[titan] 2025-10-05 11:26:44,020 - root - INFO - step: 20975 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:26:44,021 - root - INFO - lr: 2.6117e-05 gnorm: 1.09 [12:52:33<11:40:44] +[titan] 2025-10-05 11:26:54,991 - root - INFO - step: 20980 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 11:26:54,992 - root - INFO - lr: 2.6108e-05 gnorm: 1.07 [12:52:44<11:40:33] +[titan] 2025-10-05 11:27:05,851 - root - INFO - step: 20985 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8877 +[titan] 2025-10-05 11:27:05,851 - root - INFO - lr: 2.6099e-05 gnorm: 1.11 [12:52:55<11:40:22] +[titan] 2025-10-05 11:27:16,808 - root - INFO - step: 20990 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:27:16,809 - root - INFO - lr: 2.6090e-05 gnorm: 1.08 [12:53:06<11:40:11] +[titan] 2025-10-05 11:27:21,391 - root - INFO - Dumping profiler traces at step 20992 +[titan] 2025-10-05 11:27:21,430 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:27:27,954 - root - INFO - step: 20995 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,401 tflops: 407.90 mfu: 41.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 11:27:27,954 - root - INFO - lr: 2.6081e-05 gnorm: 1.05 [12:53:17<11:40:00] +[titan] 2025-10-05 11:27:36,640 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:27:38,817 - root - INFO - step: 21000 loss: 2.1220 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8750 +[titan] 2025-10-05 11:27:38,817 - root - INFO - lr: 2.6072e-05 gnorm: 1.05 [12:53:28<11:39:48] +[titan] 2025-10-05 11:27:49,677 - root - INFO - step: 21005 loss: 2.1703 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:27:49,677 - root - INFO - lr: 2.6063e-05 gnorm: 1.10 [12:53:39<11:39:37] +[titan] 2025-10-05 11:28:00,541 - root - INFO - step: 21010 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 11:28:00,541 - root - INFO - lr: 2.6054e-05 gnorm: 1.05 [12:53:50<11:39:26] +[titan] 2025-10-05 11:28:11,383 - root - INFO - step: 21015 loss: 2.1081 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8636 +[titan] 2025-10-05 11:28:11,384 - root - INFO - lr: 2.6045e-05 gnorm: 1.04 [12:54:01<11:39:15] +[titan] 2025-10-05 11:28:22,286 - root - INFO - step: 21020 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:28:22,286 - root - INFO - lr: 2.6036e-05 gnorm: 1.10 [12:54:12<11:39:03] +[titan] 2025-10-05 11:28:33,136 - root - INFO - step: 21025 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 11:28:33,136 - root - INFO - lr: 2.6028e-05 gnorm: 1.07 [12:54:23<11:38:52] +[titan] 2025-10-05 11:28:43,995 - root - INFO - step: 21030 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8395 +[titan] 2025-10-05 11:28:43,995 - root - INFO - lr: 2.6019e-05 gnorm: 1.06 [12:54:33<11:38:41] +[titan] 2025-10-05 11:28:54,868 - root - INFO - step: 21035 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8811 +[titan] 2025-10-05 11:28:54,868 - root - INFO - lr: 2.6010e-05 gnorm: 1.09 [12:54:44<11:38:30] +[titan] 2025-10-05 11:29:05,770 - root - INFO - step: 21040 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 11:29:05,770 - root - INFO - lr: 2.6001e-05 gnorm: 1.06 [12:54:55<11:38:19] +[titan] 2025-10-05 11:29:16,625 - root - INFO - step: 21045 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:29:16,625 - root - INFO - lr: 2.5992e-05 gnorm: 1.04 [12:55:06<11:38:07] +[titan] 2025-10-05 11:29:25,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:29:27,546 - root - INFO - step: 21050 loss: 2.1350 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:29:27,546 - root - INFO - lr: 2.5983e-05 gnorm: 1.09 [12:55:17<11:37:56] +[titan] 2025-10-05 11:29:38,415 - root - INFO - step: 21055 loss: 2.0977 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8537 +[titan] 2025-10-05 11:29:38,415 - root - INFO - lr: 2.5974e-05 gnorm: 1.05 [12:55:28<11:37:45] +[titan] 2025-10-05 11:29:49,289 - root - INFO - step: 21060 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 11:29:49,289 - root - INFO - lr: 2.5965e-05 gnorm: 1.09 [12:55:39<11:37:34] +[titan] 2025-10-05 11:30:00,149 - root - INFO - step: 21065 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 11:30:00,149 - root - INFO - lr: 2.5956e-05 gnorm: 1.09 [12:55:50<11:37:23] +[titan] 2025-10-05 11:30:11,032 - root - INFO - step: 21070 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:30:11,032 - root - INFO - lr: 2.5947e-05 gnorm: 1.08 [12:56:00<11:37:11] +[titan] 2025-10-05 11:30:21,932 - root - INFO - step: 21075 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 11:30:21,932 - root - INFO - lr: 2.5939e-05 gnorm: 1.07 [12:56:11<11:37:00] +[titan] 2025-10-05 11:30:32,855 - root - INFO - step: 21080 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8856 +[titan] 2025-10-05 11:30:32,855 - root - INFO - lr: 2.5930e-05 gnorm: 1.07 [12:56:22<11:36:49] +[titan] 2025-10-05 11:30:43,698 - root - INFO - step: 21085 loss: 2.1181 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:30:43,699 - root - INFO - lr: 2.5921e-05 gnorm: 1.11 [12:56:33<11:36:38] +[titan] 2025-10-05 11:30:54,563 - root - INFO - step: 21090 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 11:30:54,563 - root - INFO - lr: 2.5912e-05 gnorm: 1.03 [12:56:44<11:36:27] +[titan] 2025-10-05 11:31:05,426 - root - INFO - step: 21095 loss: 2.2239 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9636 +[titan] 2025-10-05 11:31:05,427 - root - INFO - lr: 2.5903e-05 gnorm: 1.06 [12:56:55<11:36:15] +[titan] 2025-10-05 11:31:14,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:31:16,304 - root - INFO - step: 21100 loss: 2.0959 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 11:31:16,304 - root - INFO - lr: 2.5894e-05 gnorm: 1.03 [12:57:06<11:36:04] +[titan] 2025-10-05 11:31:27,255 - root - INFO - step: 21105 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 11:31:27,256 - root - INFO - lr: 2.5885e-05 gnorm: 1.07 [12:57:17<11:35:53] +[titan] 2025-10-05 11:31:38,131 - root - INFO - step: 21110 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8621 +[titan] 2025-10-05 11:31:38,132 - root - INFO - lr: 2.5876e-05 gnorm: 1.06 [12:57:27<11:35:42] +[titan] 2025-10-05 11:31:49,004 - root - INFO - step: 21115 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:31:49,005 - root - INFO - lr: 2.5867e-05 gnorm: 1.07 [12:57:38<11:35:31] +[titan] 2025-10-05 11:31:59,893 - root - INFO - step: 21120 loss: 2.0727 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8315 +[titan] 2025-10-05 11:31:59,893 - root - INFO - lr: 2.5858e-05 gnorm: 1.07 [12:57:49<11:35:19] +[titan] 2025-10-05 11:32:10,768 - root - INFO - step: 21125 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 11:32:10,769 - root - INFO - lr: 2.5850e-05 gnorm: 1.07 [12:58:00<11:35:08] +[titan] 2025-10-05 11:32:21,633 - root - INFO - step: 21130 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8324 +[titan] 2025-10-05 11:32:21,633 - root - INFO - lr: 2.5841e-05 gnorm: 1.05 [12:58:11<11:34:57] +[titan] 2025-10-05 11:32:32,656 - root - INFO - step: 21135 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 29,729 tflops: 412.44 mfu: 41.70% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 11:32:32,656 - root - INFO - lr: 2.5832e-05 gnorm: 1.08 [12:58:22<11:34:46] +[titan] 2025-10-05 11:32:43,550 - root - INFO - step: 21140 loss: 2.1392 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 11:32:43,550 - root - INFO - lr: 2.5823e-05 gnorm: 1.07 [12:58:33<11:34:35] +[titan] 2025-10-05 11:32:54,408 - root - INFO - step: 21145 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 11:32:54,408 - root - INFO - lr: 2.5814e-05 gnorm: 1.06 [12:58:44<11:34:24] +[titan] 2025-10-05 11:33:03,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:33:05,258 - root - INFO - step: 21150 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8836 +[titan] 2025-10-05 11:33:05,258 - root - INFO - lr: 2.5805e-05 gnorm: 1.09 [12:58:55<11:34:12] +[titan] 2025-10-05 11:33:16,124 - root - INFO - step: 21155 loss: 2.1477 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 11:33:16,124 - root - INFO - lr: 2.5796e-05 gnorm: 1.07 [12:59:05<11:34:01] +[titan] 2025-10-05 11:33:27,050 - root - INFO - step: 21160 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 11:33:27,050 - root - INFO - lr: 2.5787e-05 gnorm: 1.06 [12:59:16<11:33:50] +[titan] 2025-10-05 11:33:37,906 - root - INFO - step: 21165 loss: 2.1021 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 11:33:37,906 - root - INFO - lr: 2.5778e-05 gnorm: 1.06 [12:59:27<11:33:39] +[titan] 2025-10-05 11:33:48,805 - root - INFO - step: 21170 loss: 2.1153 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8695 +[titan] 2025-10-05 11:33:48,805 - root - INFO - lr: 2.5769e-05 gnorm: 1.10 [12:59:38<11:33:28] +[titan] 2025-10-05 11:33:59,670 - root - INFO - step: 21175 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 11:33:59,670 - root - INFO - lr: 2.5761e-05 gnorm: 1.05 [12:59:49<11:33:16] +[titan] 2025-10-05 11:34:10,542 - root - INFO - step: 21180 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8645 +[titan] 2025-10-05 11:34:10,542 - root - INFO - lr: 2.5752e-05 gnorm: 1.07 [13:00:00<11:33:05] +[titan] 2025-10-05 11:34:21,425 - root - INFO - step: 21185 loss: 2.0963 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8530 +[titan] 2025-10-05 11:34:21,425 - root - INFO - lr: 2.5743e-05 gnorm: 1.01 [13:00:11<11:32:54] +[titan] 2025-10-05 11:34:32,352 - root - INFO - step: 21190 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:34:32,352 - root - INFO - lr: 2.5734e-05 gnorm: 1.08 [13:00:22<11:32:43] +[titan] 2025-10-05 11:34:43,216 - root - INFO - step: 21195 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 11:34:43,216 - root - INFO - lr: 2.5725e-05 gnorm: 1.04 [13:00:33<11:32:32] +[titan] 2025-10-05 11:34:51,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:34:54,111 - root - INFO - step: 21200 loss: 2.0921 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 11:34:54,111 - root - INFO - lr: 2.5716e-05 gnorm: 1.07 [13:00:43<11:32:20] +[titan] 2025-10-05 11:35:04,964 - root - INFO - step: 21205 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 11:35:04,964 - root - INFO - lr: 2.5707e-05 gnorm: 1.09 [13:00:54<11:32:09] +[titan] 2025-10-05 11:35:15,826 - root - INFO - step: 21210 loss: 2.1528 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 11:35:15,827 - root - INFO - lr: 2.5698e-05 gnorm: 1.09 [13:01:05<11:31:58] +[titan] 2025-10-05 11:35:26,686 - root - INFO - step: 21215 loss: 2.1911 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:35:26,686 - root - INFO - lr: 2.5689e-05 gnorm: 1.11 [13:01:16<11:31:47] +[titan] 2025-10-05 11:35:37,615 - root - INFO - step: 21220 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 11:35:37,615 - root - INFO - lr: 2.5680e-05 gnorm: 1.08 [13:01:27<11:31:36] +[titan] 2025-10-05 11:35:48,489 - root - INFO - step: 21225 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 11:35:48,489 - root - INFO - lr: 2.5672e-05 gnorm: 1.11 [13:01:38<11:31:24] +[titan] 2025-10-05 11:35:59,356 - root - INFO - step: 21230 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:35:59,356 - root - INFO - lr: 2.5663e-05 gnorm: 1.08 [13:01:49<11:31:13] +[titan] 2025-10-05 11:36:10,239 - root - INFO - step: 21235 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8980 +[titan] 2025-10-05 11:36:10,239 - root - INFO - lr: 2.5654e-05 gnorm: 1.09 [13:02:00<11:31:02] +[titan] 2025-10-05 11:36:21,092 - root - INFO - step: 21240 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:36:21,092 - root - INFO - lr: 2.5645e-05 gnorm: 1.05 [13:02:10<11:30:51] +[titan] 2025-10-05 11:36:32,021 - root - INFO - step: 21245 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 11:36:32,021 - root - INFO - lr: 2.5636e-05 gnorm: 1.09 [13:02:21<11:30:40] +[titan] 2025-10-05 11:36:40,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:36:42,889 - root - INFO - step: 21250 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 11:36:42,889 - root - INFO - lr: 2.5627e-05 gnorm: 1.07 [13:02:32<11:30:28] +[titan] 2025-10-05 11:36:53,745 - root - INFO - step: 21255 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 11:36:53,746 - root - INFO - lr: 2.5618e-05 gnorm: 1.05 [13:02:43<11:30:17] +[titan] 2025-10-05 11:37:04,622 - root - INFO - step: 21260 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 11:37:04,622 - root - INFO - lr: 2.5609e-05 gnorm: 1.08 [13:02:54<11:30:06] +[titan] 2025-10-05 11:37:15,535 - root - INFO - step: 21265 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 11:37:15,536 - root - INFO - lr: 2.5600e-05 gnorm: 1.08 [13:03:05<11:29:55] +[titan] 2025-10-05 11:37:26,391 - root - INFO - step: 21270 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 11:37:26,392 - root - INFO - lr: 2.5592e-05 gnorm: 1.05 [13:03:16<11:29:44] +[titan] 2025-10-05 11:37:37,276 - root - INFO - step: 21275 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 11:37:37,276 - root - INFO - lr: 2.5583e-05 gnorm: 1.07 [13:03:27<11:29:32] +[titan] 2025-10-05 11:37:48,150 - root - INFO - step: 21280 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8442 +[titan] 2025-10-05 11:37:48,150 - root - INFO - lr: 2.5574e-05 gnorm: 1.05 [13:03:38<11:29:21] +[titan] 2025-10-05 11:37:59,010 - root - INFO - step: 21285 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 11:37:59,010 - root - INFO - lr: 2.5565e-05 gnorm: 1.07 [13:03:48<11:29:10] +[titan] 2025-10-05 11:38:09,872 - root - INFO - step: 21290 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8545 +[titan] 2025-10-05 11:38:09,872 - root - INFO - lr: 2.5556e-05 gnorm: 1.10 [13:03:59<11:28:59] +[titan] 2025-10-05 11:38:20,741 - root - INFO - step: 21295 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 11:38:20,741 - root - INFO - lr: 2.5547e-05 gnorm: 1.12 [13:04:10<11:28:48] +[titan] 2025-10-05 11:38:29,453 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:38:31,672 - root - INFO - step: 21300 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 11:38:31,672 - root - INFO - lr: 2.5538e-05 gnorm: 1.05 [13:04:21<11:28:36] +[titan] 2025-10-05 11:38:42,540 - root - INFO - step: 21305 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:38:42,540 - root - INFO - lr: 2.5529e-05 gnorm: 1.08 [13:04:32<11:28:25] +[titan] 2025-10-05 11:38:53,411 - root - INFO - step: 21310 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 11:38:53,411 - root - INFO - lr: 2.5520e-05 gnorm: 1.07 [13:04:43<11:28:14] +[titan] 2025-10-05 11:39:04,301 - root - INFO - step: 21315 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8790 +[titan] 2025-10-05 11:39:04,301 - root - INFO - lr: 2.5511e-05 gnorm: 1.10 [13:04:54<11:28:03] +[titan] 2025-10-05 11:39:15,170 - root - INFO - step: 21320 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8772 +[titan] 2025-10-05 11:39:15,170 - root - INFO - lr: 2.5503e-05 gnorm: 1.06 [13:05:05<11:27:52] +[titan] 2025-10-05 11:39:26,035 - root - INFO - step: 21325 loss: 2.1518 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:39:26,035 - root - INFO - lr: 2.5494e-05 gnorm: 1.08 [13:05:15<11:27:40] +[titan] 2025-10-05 11:39:36,994 - root - INFO - step: 21330 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8956 +[titan] 2025-10-05 11:39:36,994 - root - INFO - lr: 2.5485e-05 gnorm: 1.06 [13:05:26<11:27:29] +[titan] 2025-10-05 11:39:47,849 - root - INFO - step: 21335 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 11:39:47,849 - root - INFO - lr: 2.5476e-05 gnorm: 1.03 [13:05:37<11:27:18] +[titan] 2025-10-05 11:39:58,709 - root - INFO - step: 21340 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 11:39:58,709 - root - INFO - lr: 2.5467e-05 gnorm: 1.07 [13:05:48<11:27:07] +[titan] 2025-10-05 11:40:09,576 - root - INFO - step: 21345 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 11:40:09,576 - root - INFO - lr: 2.5458e-05 gnorm: 1.05 [13:05:59<11:26:56] +[titan] 2025-10-05 11:40:18,258 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:40:20,450 - root - INFO - step: 21350 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9393 +[titan] 2025-10-05 11:40:20,450 - root - INFO - lr: 2.5449e-05 gnorm: 1.09 [13:06:10<11:26:44] +[titan] 2025-10-05 11:40:31,323 - root - INFO - step: 21355 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 11:40:31,323 - root - INFO - lr: 2.5440e-05 gnorm: 1.11 [13:06:21<11:26:33] +[titan] 2025-10-05 11:40:42,303 - root - INFO - step: 21360 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 29,846 tflops: 414.06 mfu: 41.87% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:40:42,303 - root - INFO - lr: 2.5431e-05 gnorm: 1.09 [13:06:32<11:26:22] +[titan] 2025-10-05 11:40:53,190 - root - INFO - step: 21365 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 11:40:53,190 - root - INFO - lr: 2.5423e-05 gnorm: 1.04 [13:06:43<11:26:11] +[titan] 2025-10-05 11:41:04,057 - root - INFO - step: 21370 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8252 +[titan] 2025-10-05 11:41:04,057 - root - INFO - lr: 2.5414e-05 gnorm: 1.06 [13:06:53<11:26:00] +[titan] 2025-10-05 11:41:14,914 - root - INFO - step: 21375 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 11:41:14,914 - root - INFO - lr: 2.5405e-05 gnorm: 1.05 [13:07:04<11:25:49] +[titan] 2025-10-05 11:41:25,788 - root - INFO - step: 21380 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 11:41:25,788 - root - INFO - lr: 2.5396e-05 gnorm: 1.08 [13:07:15<11:25:37] +[titan] 2025-10-05 11:41:36,680 - root - INFO - step: 21385 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8705 +[titan] 2025-10-05 11:41:36,680 - root - INFO - lr: 2.5387e-05 gnorm: 1.06 [13:07:26<11:25:26] +[titan] 2025-10-05 11:41:47,564 - root - INFO - step: 21390 loss: 2.0660 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 11:41:47,564 - root - INFO - lr: 2.5378e-05 gnorm: 1.06 [13:07:37<11:25:15] +[titan] 2025-10-05 11:41:58,477 - root - INFO - step: 21395 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:41:58,477 - root - INFO - lr: 2.5369e-05 gnorm: 1.05 [13:07:48<11:25:04] +[titan] 2025-10-05 11:42:07,158 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:42:09,346 - root - INFO - step: 21400 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 11:42:09,346 - root - INFO - lr: 2.5360e-05 gnorm: 1.06 [13:07:59<11:24:53] +[titan] 2025-10-05 11:42:20,226 - root - INFO - step: 21405 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:42:20,226 - root - INFO - lr: 2.5352e-05 gnorm: 1.09 [13:08:10<11:24:41] +[titan] 2025-10-05 11:42:31,111 - root - INFO - step: 21410 loss: 2.1240 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:42:31,112 - root - INFO - lr: 2.5343e-05 gnorm: 1.12 [13:08:20<11:24:30] +[titan] 2025-10-05 11:42:42,010 - root - INFO - step: 21415 loss: 2.0961 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8538 +[titan] 2025-10-05 11:42:42,011 - root - INFO - lr: 2.5334e-05 gnorm: 1.06 [13:08:31<11:24:19] +[titan] 2025-10-05 11:42:52,881 - root - INFO - step: 21420 loss: 2.1163 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 11:42:52,881 - root - INFO - lr: 2.5325e-05 gnorm: 1.06 [13:08:42<11:24:08] +[titan] 2025-10-05 11:43:03,753 - root - INFO - step: 21425 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8953 +[titan] 2025-10-05 11:43:03,753 - root - INFO - lr: 2.5316e-05 gnorm: 1.05 [13:08:53<11:23:57] +[titan] 2025-10-05 11:43:14,617 - root - INFO - step: 21430 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8631 +[titan] 2025-10-05 11:43:14,617 - root - INFO - lr: 2.5307e-05 gnorm: 1.07 [13:09:04<11:23:45] +[titan] 2025-10-05 11:43:25,474 - root - INFO - step: 21435 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 11:43:25,474 - root - INFO - lr: 2.5298e-05 gnorm: 1.04 [13:09:15<11:23:34] +[titan] 2025-10-05 11:43:36,449 - root - INFO - step: 21440 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.24 mfu: 41.89% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 11:43:36,449 - root - INFO - lr: 2.5289e-05 gnorm: 1.05 [13:09:26<11:23:23] +[titan] 2025-10-05 11:43:47,314 - root - INFO - step: 21445 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:43:47,314 - root - INFO - lr: 2.5280e-05 gnorm: 1.04 [13:09:37<11:23:12] +[titan] 2025-10-05 11:43:56,017 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:43:58,209 - root - INFO - step: 21450 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 11:43:58,209 - root - INFO - lr: 2.5272e-05 gnorm: 1.06 [13:09:48<11:23:01] +[titan] 2025-10-05 11:44:09,061 - root - INFO - step: 21455 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 11:44:09,062 - root - INFO - lr: 2.5263e-05 gnorm: 1.08 [13:09:58<11:22:50] +[titan] 2025-10-05 11:44:19,965 - root - INFO - step: 21460 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9120 +[titan] 2025-10-05 11:44:19,966 - root - INFO - lr: 2.5254e-05 gnorm: 1.07 [13:10:09<11:22:38] +[titan] 2025-10-05 11:44:30,808 - root - INFO - step: 21465 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:44:30,808 - root - INFO - lr: 2.5245e-05 gnorm: 1.04 [13:10:20<11:22:27] +[titan] 2025-10-05 11:44:41,706 - root - INFO - step: 21470 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:44:41,706 - root - INFO - lr: 2.5236e-05 gnorm: 1.08 [13:10:31<11:22:16] +[titan] 2025-10-05 11:44:52,552 - root - INFO - step: 21475 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8376 +[titan] 2025-10-05 11:44:52,552 - root - INFO - lr: 2.5227e-05 gnorm: 1.04 [13:10:42<11:22:05] +[titan] 2025-10-05 11:45:03,391 - root - INFO - step: 21480 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:45:03,391 - root - INFO - lr: 2.5218e-05 gnorm: 1.07 [13:10:53<11:21:54] +[titan] 2025-10-05 11:45:14,218 - root - INFO - step: 21485 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 11:45:14,218 - root - INFO - lr: 2.5209e-05 gnorm: 1.09 [13:11:04<11:21:42] +[titan] 2025-10-05 11:45:25,127 - root - INFO - step: 21490 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 11:45:25,127 - root - INFO - lr: 2.5201e-05 gnorm: 1.06 [13:11:14<11:21:31] +[titan] 2025-10-05 11:45:35,950 - root - INFO - step: 21495 loss: 2.1076 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 11:45:35,951 - root - INFO - lr: 2.5192e-05 gnorm: 1.05 [13:11:25<11:21:20] +[titan] 2025-10-05 11:45:44,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:45:46,815 - root - INFO - step: 21500 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9073 +[titan] 2025-10-05 11:45:46,815 - root - INFO - lr: 2.5183e-05 gnorm: 1.08 [13:11:36<11:21:09] +[titan] 2025-10-05 11:45:55,765 - root - INFO - Dumping profiler traces at step 21504 +[titan] 2025-10-05 11:45:55,805 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:45:57,977 - root - INFO - step: 21505 loss: 2.1378 memory: 118.84GiB(85.28%) tps: 29,357 tflops: 407.29 mfu: 41.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8896 +[titan] 2025-10-05 11:45:57,977 - root - INFO - lr: 2.5174e-05 gnorm: 1.10 [13:11:47<11:20:58] +[titan] 2025-10-05 11:46:08,810 - root - INFO - step: 21510 loss: 2.1100 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:46:08,810 - root - INFO - lr: 2.5165e-05 gnorm: 1.08 [13:11:58<11:20:47] +[titan] 2025-10-05 11:46:19,644 - root - INFO - step: 21515 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:46:19,645 - root - INFO - lr: 2.5156e-05 gnorm: 1.05 [13:12:09<11:20:35] +[titan] 2025-10-05 11:46:30,518 - root - INFO - step: 21520 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 11:46:30,518 - root - INFO - lr: 2.5147e-05 gnorm: 1.08 [13:12:20<11:20:24] +[titan] 2025-10-05 11:46:41,409 - root - INFO - step: 21525 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 11:46:41,409 - root - INFO - lr: 2.5138e-05 gnorm: 1.08 [13:12:31<11:20:13] +[titan] 2025-10-05 11:46:52,228 - root - INFO - step: 21530 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 11:46:52,228 - root - INFO - lr: 2.5130e-05 gnorm: 1.06 [13:12:42<11:20:02] +[titan] 2025-10-05 11:47:03,059 - root - INFO - step: 21535 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 11:47:03,059 - root - INFO - lr: 2.5121e-05 gnorm: 1.03 [13:12:52<11:19:50] +[titan] 2025-10-05 11:47:13,907 - root - INFO - step: 21540 loss: 2.1549 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 11:47:13,907 - root - INFO - lr: 2.5112e-05 gnorm: 1.09 [13:13:03<11:19:39] +[titan] 2025-10-05 11:47:24,716 - root - INFO - step: 21545 loss: 2.1223 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 11:47:24,716 - root - INFO - lr: 2.5103e-05 gnorm: 1.07 [13:13:14<11:19:28] +[titan] 2025-10-05 11:47:33,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:47:35,549 - root - INFO - step: 21550 loss: 2.1493 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8994 +[titan] 2025-10-05 11:47:35,549 - root - INFO - lr: 2.5094e-05 gnorm: 1.05 [13:13:25<11:19:17] +[titan] 2025-10-05 11:47:46,489 - root - INFO - step: 21555 loss: 2.0469 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 11:47:46,490 - root - INFO - lr: 2.5085e-05 gnorm: 1.04 [13:13:36<11:19:06] +[titan] 2025-10-05 11:47:57,291 - root - INFO - step: 21560 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:47:57,291 - root - INFO - lr: 2.5076e-05 gnorm: 1.08 [13:13:47<11:18:54] +[titan] 2025-10-05 11:48:08,089 - root - INFO - step: 21565 loss: 2.0826 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 11:48:08,090 - root - INFO - lr: 2.5067e-05 gnorm: 1.06 [13:13:57<11:18:43] +[titan] 2025-10-05 11:48:18,889 - root - INFO - step: 21570 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:48:18,889 - root - INFO - lr: 2.5059e-05 gnorm: 1.09 [13:14:08<11:18:32] +[titan] 2025-10-05 11:48:29,708 - root - INFO - step: 21575 loss: 2.1425 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:48:29,709 - root - INFO - lr: 2.5050e-05 gnorm: 1.06 [13:14:19<11:18:21] +[titan] 2025-10-05 11:48:40,539 - root - INFO - step: 21580 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.76 mfu: 42.44% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 11:48:40,539 - root - INFO - lr: 2.5041e-05 gnorm: 1.11 [13:14:30<11:18:09] +[titan] 2025-10-05 11:48:51,410 - root - INFO - step: 21585 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:48:51,410 - root - INFO - lr: 2.5032e-05 gnorm: 1.06 [13:14:41<11:17:58] +[titan] 2025-10-05 11:49:02,256 - root - INFO - step: 21590 loss: 2.1780 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9253 +[titan] 2025-10-05 11:49:02,256 - root - INFO - lr: 2.5023e-05 gnorm: 1.12 [13:14:52<11:17:47] +[titan] 2025-10-05 11:49:13,089 - root - INFO - step: 21595 loss: 2.1172 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 11:49:13,089 - root - INFO - lr: 2.5014e-05 gnorm: 1.10 [13:15:02<11:17:36] +[titan] 2025-10-05 11:49:21,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:49:23,936 - root - INFO - step: 21600 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 11:49:23,936 - root - INFO - lr: 2.5005e-05 gnorm: 1.09 [13:15:13<11:17:25] +[titan] 2025-10-05 11:49:34,750 - root - INFO - step: 21605 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8838 +[titan] 2025-10-05 11:49:34,751 - root - INFO - lr: 2.4996e-05 gnorm: 1.08 [13:15:24<11:17:13] +[titan] 2025-10-05 11:49:45,562 - root - INFO - step: 21610 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8037 +[titan] 2025-10-05 11:49:45,563 - root - INFO - lr: 2.4988e-05 gnorm: 1.02 [13:15:35<11:17:02] +[titan] 2025-10-05 11:49:56,369 - root - INFO - step: 21615 loss: 2.1371 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8883 +[titan] 2025-10-05 11:49:56,370 - root - INFO - lr: 2.4979e-05 gnorm: 1.04 [13:15:46<11:16:51] +[titan] 2025-10-05 11:50:07,237 - root - INFO - step: 21620 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:50:07,237 - root - INFO - lr: 2.4970e-05 gnorm: 1.05 [13:15:57<11:16:40] +[titan] 2025-10-05 11:50:18,053 - root - INFO - step: 21625 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8344 +[titan] 2025-10-05 11:50:18,053 - root - INFO - lr: 2.4961e-05 gnorm: 1.06 [13:16:07<11:16:28] +[titan] 2025-10-05 11:50:28,850 - root - INFO - step: 21630 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.05 mfu: 42.57% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:50:28,851 - root - INFO - lr: 2.4952e-05 gnorm: 1.04 [13:16:18<11:16:17] +[titan] 2025-10-05 11:50:39,656 - root - INFO - step: 21635 loss: 2.0898 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:50:39,656 - root - INFO - lr: 2.4943e-05 gnorm: 1.09 [13:16:29<11:16:06] +[titan] 2025-10-05 11:50:50,529 - root - INFO - step: 21640 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 11:50:50,529 - root - INFO - lr: 2.4934e-05 gnorm: 1.06 [13:16:40<11:15:55] +[titan] 2025-10-05 11:51:01,328 - root - INFO - step: 21645 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:51:01,329 - root - INFO - lr: 2.4926e-05 gnorm: 1.04 [13:16:51<11:15:43] +[titan] 2025-10-05 11:51:09,997 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:51:12,168 - root - INFO - step: 21650 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 11:51:12,168 - root - INFO - lr: 2.4917e-05 gnorm: 1.07 [13:17:01<11:15:32] +[titan] 2025-10-05 11:51:23,012 - root - INFO - step: 21655 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 11:51:23,012 - root - INFO - lr: 2.4908e-05 gnorm: 1.06 [13:17:12<11:15:21] +[titan] 2025-10-05 11:51:33,829 - root - INFO - step: 21660 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 11:51:33,830 - root - INFO - lr: 2.4899e-05 gnorm: 1.06 [13:17:23<11:15:10] +[titan] 2025-10-05 11:51:44,687 - root - INFO - step: 21665 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 11:51:44,687 - root - INFO - lr: 2.4890e-05 gnorm: 1.03 [13:17:34<11:14:59] +[titan] 2025-10-05 11:51:55,529 - root - INFO - step: 21670 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 11:51:55,529 - root - INFO - lr: 2.4881e-05 gnorm: 1.04 [13:17:45<11:14:47] +[titan] 2025-10-05 11:52:06,368 - root - INFO - step: 21675 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 11:52:06,368 - root - INFO - lr: 2.4872e-05 gnorm: 1.05 [13:17:56<11:14:36] +[titan] 2025-10-05 11:52:17,248 - root - INFO - step: 21680 loss: 2.0964 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8526 +[titan] 2025-10-05 11:52:17,248 - root - INFO - lr: 2.4863e-05 gnorm: 1.08 [13:18:07<11:14:25] +[titan] 2025-10-05 11:52:28,077 - root - INFO - step: 21685 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 11:52:28,077 - root - INFO - lr: 2.4855e-05 gnorm: 1.04 [13:18:17<11:14:14] +[titan] 2025-10-05 11:52:38,897 - root - INFO - step: 21690 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 11:52:38,897 - root - INFO - lr: 2.4846e-05 gnorm: 1.12 [13:18:28<11:14:02] +[titan] 2025-10-05 11:52:49,731 - root - INFO - step: 21695 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:52:49,731 - root - INFO - lr: 2.4837e-05 gnorm: 1.14 [13:18:39<11:13:51] +[titan] 2025-10-05 11:52:58,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:53:00,557 - root - INFO - step: 21700 loss: 2.0942 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 11:53:00,558 - root - INFO - lr: 2.4828e-05 gnorm: 1.04 [13:18:50<11:13:40] +[titan] 2025-10-05 11:53:11,384 - root - INFO - step: 21705 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8616 +[titan] 2025-10-05 11:53:11,384 - root - INFO - lr: 2.4819e-05 gnorm: 1.01 [13:19:01<11:13:29] +[titan] 2025-10-05 11:53:22,180 - root - INFO - step: 21710 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,354 tflops: 421.11 mfu: 42.58% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 11:53:22,180 - root - INFO - lr: 2.4810e-05 gnorm: 1.08 [13:19:11<11:13:18] +[titan] 2025-10-05 11:53:33,006 - root - INFO - step: 21715 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 11:53:33,006 - root - INFO - lr: 2.4801e-05 gnorm: 1.07 [13:19:22<11:13:06] +[titan] 2025-10-05 11:53:43,863 - root - INFO - step: 21720 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8820 +[titan] 2025-10-05 11:53:43,863 - root - INFO - lr: 2.4793e-05 gnorm: 1.07 [13:19:33<11:12:55] +[titan] 2025-10-05 11:53:54,726 - root - INFO - step: 21725 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 11:53:54,726 - root - INFO - lr: 2.4784e-05 gnorm: 1.07 [13:19:44<11:12:44] +[titan] 2025-10-05 11:54:05,529 - root - INFO - step: 21730 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 11:54:05,529 - root - INFO - lr: 2.4775e-05 gnorm: 1.10 [13:19:55<11:12:33] +[titan] 2025-10-05 11:54:16,329 - root - INFO - step: 21735 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,341 tflops: 420.94 mfu: 42.56% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8624 +[titan] 2025-10-05 11:54:16,329 - root - INFO - lr: 2.4766e-05 gnorm: 1.08 [13:20:06<11:12:21] +[titan] 2025-10-05 11:54:27,148 - root - INFO - step: 21740 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.19 mfu: 42.49% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9108 +[titan] 2025-10-05 11:54:27,149 - root - INFO - lr: 2.4757e-05 gnorm: 1.08 [13:20:16<11:12:10] +[titan] 2025-10-05 11:54:38,009 - root - INFO - step: 21745 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 11:54:38,009 - root - INFO - lr: 2.4748e-05 gnorm: 1.09 [13:20:27<11:11:59] +[titan] 2025-10-05 11:54:46,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:54:48,886 - root - INFO - step: 21750 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8871 +[titan] 2025-10-05 11:54:48,886 - root - INFO - lr: 2.4739e-05 gnorm: 1.11 [13:20:38<11:11:48] +[titan] 2025-10-05 11:54:59,687 - root - INFO - step: 21755 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 11:54:59,688 - root - INFO - lr: 2.4731e-05 gnorm: 1.03 [13:20:49<11:11:37] +[titan] 2025-10-05 11:55:10,503 - root - INFO - step: 21760 loss: 2.0855 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:55:10,504 - root - INFO - lr: 2.4722e-05 gnorm: 1.08 [13:21:00<11:11:25] +[titan] 2025-10-05 11:55:21,303 - root - INFO - step: 21765 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 11:55:21,304 - root - INFO - lr: 2.4713e-05 gnorm: 1.06 [13:21:11<11:11:14] +[titan] 2025-10-05 11:55:32,128 - root - INFO - step: 21770 loss: 2.0394 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 11:55:32,129 - root - INFO - lr: 2.4704e-05 gnorm: 1.07 [13:21:21<11:11:03] +[titan] 2025-10-05 11:55:42,948 - root - INFO - step: 21775 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 11:55:42,948 - root - INFO - lr: 2.4695e-05 gnorm: 1.13 [13:21:32<11:10:52] +[titan] 2025-10-05 11:55:53,849 - root - INFO - step: 21780 loss: 2.1107 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 11:55:53,849 - root - INFO - lr: 2.4686e-05 gnorm: 1.06 [13:21:43<11:10:40] +[titan] 2025-10-05 11:56:04,670 - root - INFO - step: 21785 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 11:56:04,670 - root - INFO - lr: 2.4677e-05 gnorm: 1.11 [13:21:54<11:10:29] +[titan] 2025-10-05 11:56:15,465 - root - INFO - step: 21790 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,355 tflops: 421.13 mfu: 42.58% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 11:56:15,465 - root - INFO - lr: 2.4669e-05 gnorm: 1.08 [13:22:05<11:10:18] +[titan] 2025-10-05 11:56:26,269 - root - INFO - step: 21795 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 11:56:26,269 - root - INFO - lr: 2.4660e-05 gnorm: 1.04 [13:22:16<11:10:07] +[titan] 2025-10-05 11:56:34,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:56:37,050 - root - INFO - step: 21800 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 11:56:37,050 - root - INFO - lr: 2.4651e-05 gnorm: 1.03 [13:22:26<11:09:55] +[titan] 2025-10-05 11:56:47,848 - root - INFO - step: 21805 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8423 +[titan] 2025-10-05 11:56:47,849 - root - INFO - lr: 2.4642e-05 gnorm: 1.06 [13:22:37<11:09:44] +[titan] 2025-10-05 11:56:58,686 - root - INFO - step: 21810 loss: 2.0632 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 11:56:58,686 - root - INFO - lr: 2.4633e-05 gnorm: 1.08 [13:22:48<11:09:33] +[titan] 2025-10-05 11:57:09,468 - root - INFO - step: 21815 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,391 tflops: 421.63 mfu: 42.63% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 11:57:09,469 - root - INFO - lr: 2.4624e-05 gnorm: 1.04 [13:22:59<11:09:22] +[titan] 2025-10-05 11:57:20,268 - root - INFO - step: 21820 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.56% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 11:57:20,268 - root - INFO - lr: 2.4615e-05 gnorm: 1.06 [13:23:10<11:09:11] +[titan] 2025-10-05 11:57:31,069 - root - INFO - step: 21825 loss: 2.0588 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 11:57:31,069 - root - INFO - lr: 2.4607e-05 gnorm: 1.03 [13:23:20<11:08:59] +[titan] 2025-10-05 11:57:41,865 - root - INFO - step: 21830 loss: 2.1085 memory: 118.84GiB(85.28%) tps: 30,353 tflops: 421.10 mfu: 42.58% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 11:57:41,865 - root - INFO - lr: 2.4598e-05 gnorm: 1.03 [13:23:31<11:08:48] +[titan] 2025-10-05 11:57:52,686 - root - INFO - step: 21835 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:57:52,687 - root - INFO - lr: 2.4589e-05 gnorm: 1.03 [13:23:42<11:08:37] +[titan] 2025-10-05 11:58:03,531 - root - INFO - step: 21840 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:58:03,532 - root - INFO - lr: 2.4580e-05 gnorm: 1.05 [13:23:53<11:08:26] +[titan] 2025-10-05 11:58:14,308 - root - INFO - step: 21845 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,407 tflops: 421.85 mfu: 42.65% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9173 +[titan] 2025-10-05 11:58:14,309 - root - INFO - lr: 2.4571e-05 gnorm: 1.09 [13:24:04<11:08:14] +[titan] 2025-10-05 11:58:22,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:58:25,109 - root - INFO - step: 21850 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:58:25,110 - root - INFO - lr: 2.4562e-05 gnorm: 1.08 [13:24:14<11:08:03] +[titan] 2025-10-05 11:58:35,880 - root - INFO - step: 21855 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,424 tflops: 422.09 mfu: 42.68% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 11:58:35,880 - root - INFO - lr: 2.4554e-05 gnorm: 1.08 [13:24:25<11:07:52] +[titan] 2025-10-05 11:58:46,714 - root - INFO - step: 21860 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:58:46,714 - root - INFO - lr: 2.4545e-05 gnorm: 1.03 [13:24:36<11:07:41] +[titan] 2025-10-05 11:58:57,569 - root - INFO - step: 21865 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8989 +[titan] 2025-10-05 11:58:57,569 - root - INFO - lr: 2.4536e-05 gnorm: 1.07 [13:24:47<11:07:29] +[titan] 2025-10-05 11:59:08,390 - root - INFO - step: 21870 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 11:59:08,390 - root - INFO - lr: 2.4527e-05 gnorm: 1.04 [13:24:58<11:07:18] +[titan] 2025-10-05 11:59:19,246 - root - INFO - step: 21875 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8438 +[titan] 2025-10-05 11:59:19,246 - root - INFO - lr: 2.4518e-05 gnorm: 1.06 [13:25:09<11:07:07] +[titan] 2025-10-05 11:59:30,047 - root - INFO - step: 21880 loss: 2.0852 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:59:30,047 - root - INFO - lr: 2.4509e-05 gnorm: 1.08 [13:25:19<11:06:56] +[titan] 2025-10-05 11:59:40,863 - root - INFO - step: 21885 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 11:59:40,863 - root - INFO - lr: 2.4500e-05 gnorm: 1.05 [13:25:30<11:06:45] +[titan] 2025-10-05 11:59:51,744 - root - INFO - step: 21890 loss: 2.1740 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9204 +[titan] 2025-10-05 11:59:51,744 - root - INFO - lr: 2.4492e-05 gnorm: 1.10 [13:25:41<11:06:33] +[titan] 2025-10-05 12:00:02,569 - root - INFO - step: 21895 loss: 2.2128 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9549 +[titan] 2025-10-05 12:00:02,570 - root - INFO - lr: 2.4483e-05 gnorm: 1.10 [13:25:52<11:06:22] +[titan] 2025-10-05 12:00:11,224 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:00:13,387 - root - INFO - step: 21900 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 12:00:13,387 - root - INFO - lr: 2.4474e-05 gnorm: 1.04 [13:26:03<11:06:11] +[titan] 2025-10-05 12:00:24,246 - root - INFO - step: 21905 loss: 2.1321 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 12:00:24,246 - root - INFO - lr: 2.4465e-05 gnorm: 1.05 [13:26:14<11:06:00] +[titan] 2025-10-05 12:00:35,064 - root - INFO - step: 21910 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 12:00:35,064 - root - INFO - lr: 2.4456e-05 gnorm: 1.06 [13:26:24<11:05:48] +[titan] 2025-10-05 12:00:45,889 - root - INFO - step: 21915 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:00:45,889 - root - INFO - lr: 2.4447e-05 gnorm: 1.07 [13:26:35<11:05:37] +[titan] 2025-10-05 12:00:56,747 - root - INFO - step: 21920 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 12:00:56,748 - root - INFO - lr: 2.4439e-05 gnorm: 1.11 [13:26:46<11:05:26] +[titan] 2025-10-05 12:01:07,566 - root - INFO - step: 21925 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:01:07,567 - root - INFO - lr: 2.4430e-05 gnorm: 1.06 [13:26:57<11:05:15] +[titan] 2025-10-05 12:01:18,394 - root - INFO - step: 21930 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 12:01:18,394 - root - INFO - lr: 2.4421e-05 gnorm: 1.08 [13:27:08<11:05:04] +[titan] 2025-10-05 12:01:29,213 - root - INFO - step: 21935 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 12:01:29,213 - root - INFO - lr: 2.4412e-05 gnorm: 1.05 [13:27:19<11:04:52] +[titan] 2025-10-05 12:01:40,068 - root - INFO - step: 21940 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 12:01:40,068 - root - INFO - lr: 2.4403e-05 gnorm: 1.06 [13:27:29<11:04:41] +[titan] 2025-10-05 12:01:50,925 - root - INFO - step: 21945 loss: 2.1040 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8614 +[titan] 2025-10-05 12:01:50,925 - root - INFO - lr: 2.4394e-05 gnorm: 1.09 [13:27:40<11:04:30] +[titan] 2025-10-05 12:01:59,596 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:02:01,768 - root - INFO - step: 21950 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:02:01,768 - root - INFO - lr: 2.4385e-05 gnorm: 1.08 [13:27:51<11:04:19] +[titan] 2025-10-05 12:02:12,595 - root - INFO - step: 21955 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8121 +[titan] 2025-10-05 12:02:12,595 - root - INFO - lr: 2.4377e-05 gnorm: 1.04 [13:28:02<11:04:08] +[titan] 2025-10-05 12:02:23,415 - root - INFO - step: 21960 loss: 2.0883 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8460 +[titan] 2025-10-05 12:02:23,415 - root - INFO - lr: 2.4368e-05 gnorm: 1.02 [13:28:13<11:03:56] +[titan] 2025-10-05 12:02:34,233 - root - INFO - step: 21965 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:02:34,234 - root - INFO - lr: 2.4359e-05 gnorm: 1.07 [13:28:24<11:03:45] +[titan] 2025-10-05 12:02:45,129 - root - INFO - step: 21970 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8679 +[titan] 2025-10-05 12:02:45,129 - root - INFO - lr: 2.4350e-05 gnorm: 1.06 [13:28:34<11:03:34] +[titan] 2025-10-05 12:02:56,069 - root - INFO - step: 21975 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8574 +[titan] 2025-10-05 12:02:56,069 - root - INFO - lr: 2.4341e-05 gnorm: 1.04 [13:28:45<11:03:23] +[titan] 2025-10-05 12:03:06,899 - root - INFO - step: 21980 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9135 +[titan] 2025-10-05 12:03:06,899 - root - INFO - lr: 2.4332e-05 gnorm: 1.08 [13:28:56<11:03:12] +[titan] 2025-10-05 12:03:17,738 - root - INFO - step: 21985 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:03:17,738 - root - INFO - lr: 2.4324e-05 gnorm: 1.09 [13:29:07<11:03:00] +[titan] 2025-10-05 12:03:28,567 - root - INFO - step: 21990 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 12:03:28,567 - root - INFO - lr: 2.4315e-05 gnorm: 1.02 [13:29:18<11:02:49] +[titan] 2025-10-05 12:03:39,369 - root - INFO - step: 21995 loss: 2.1137 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.87 mfu: 42.55% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 12:03:39,369 - root - INFO - lr: 2.4306e-05 gnorm: 1.06 [13:29:29<11:02:38] +[titan] 2025-10-05 12:03:48,026 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:03:50,242 - root - INFO - step: 22000 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:03:50,242 - root - INFO - lr: 2.4297e-05 gnorm: 1.08 [13:29:40<11:02:27] +[titan] 2025-10-05 12:04:01,127 - root - INFO - step: 22005 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:04:01,127 - root - INFO - lr: 2.4288e-05 gnorm: 1.01 [13:29:50<11:02:16] +[titan] 2025-10-05 12:04:11,950 - root - INFO - step: 22010 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8594 +[titan] 2025-10-05 12:04:11,950 - root - INFO - lr: 2.4279e-05 gnorm: 1.06 [13:30:01<11:02:04] +[titan] 2025-10-05 12:04:22,889 - root - INFO - step: 22015 loss: 2.0810 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:04:22,889 - root - INFO - lr: 2.4271e-05 gnorm: 1.07 [13:30:12<11:01:53] +[titan] 2025-10-05 12:04:25,245 - root - INFO - Dumping profiler traces at step 22016 +[titan] 2025-10-05 12:04:25,285 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:04:33,972 - root - INFO - step: 22020 loss: 2.1387 memory: 118.84GiB(85.28%) tps: 29,566 tflops: 410.19 mfu: 41.48% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8891 +[titan] 2025-10-05 12:04:33,972 - root - INFO - lr: 2.4262e-05 gnorm: 1.03 [13:30:23<11:01:42] +[titan] 2025-10-05 12:04:44,810 - root - INFO - step: 22025 loss: 2.1465 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 12:04:44,810 - root - INFO - lr: 2.4253e-05 gnorm: 1.08 [13:30:34<11:01:31] +[titan] 2025-10-05 12:04:55,694 - root - INFO - step: 22030 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 12:04:55,694 - root - INFO - lr: 2.4244e-05 gnorm: 1.05 [13:30:45<11:01:20] +[titan] 2025-10-05 12:05:06,571 - root - INFO - step: 22035 loss: 2.0627 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8224 +[titan] 2025-10-05 12:05:06,571 - root - INFO - lr: 2.4235e-05 gnorm: 1.04 [13:30:56<11:01:09] +[titan] 2025-10-05 12:05:17,439 - root - INFO - step: 22040 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 12:05:17,439 - root - INFO - lr: 2.4226e-05 gnorm: 1.05 [13:31:07<11:00:58] +[titan] 2025-10-05 12:05:28,290 - root - INFO - step: 22045 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 12:05:28,290 - root - INFO - lr: 2.4218e-05 gnorm: 1.06 [13:31:18<11:00:46] +[titan] 2025-10-05 12:05:36,955 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:05:39,138 - root - INFO - step: 22050 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:05:39,138 - root - INFO - lr: 2.4209e-05 gnorm: 1.06 [13:31:28<11:00:35] +[titan] 2025-10-05 12:05:49,987 - root - INFO - step: 22055 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 12:05:49,987 - root - INFO - lr: 2.4200e-05 gnorm: 1.05 [13:31:39<11:00:24] +[titan] 2025-10-05 12:06:00,891 - root - INFO - step: 22060 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 12:06:00,891 - root - INFO - lr: 2.4191e-05 gnorm: 1.10 [13:31:50<11:00:13] +[titan] 2025-10-05 12:06:11,774 - root - INFO - step: 22065 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:06:11,775 - root - INFO - lr: 2.4182e-05 gnorm: 1.05 [13:32:01<11:00:02] +[titan] 2025-10-05 12:06:22,629 - root - INFO - step: 22070 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:06:22,629 - root - INFO - lr: 2.4173e-05 gnorm: 1.06 [13:32:12<10:59:50] +[titan] 2025-10-05 12:06:33,471 - root - INFO - step: 22075 loss: 2.0401 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8033 +[titan] 2025-10-05 12:06:33,471 - root - INFO - lr: 2.4165e-05 gnorm: 1.07 [13:32:23<10:59:39] +[titan] 2025-10-05 12:06:44,307 - root - INFO - step: 22080 loss: 2.1317 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8847 +[titan] 2025-10-05 12:06:44,307 - root - INFO - lr: 2.4156e-05 gnorm: 1.08 [13:32:34<10:59:28] +[titan] 2025-10-05 12:06:55,150 - root - INFO - step: 22085 loss: 2.0997 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8559 +[titan] 2025-10-05 12:06:55,151 - root - INFO - lr: 2.4147e-05 gnorm: 1.06 [13:32:44<10:59:17] +[titan] 2025-10-05 12:07:06,030 - root - INFO - step: 22090 loss: 2.1094 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 12:07:06,030 - root - INFO - lr: 2.4138e-05 gnorm: 1.09 [13:32:55<10:59:06] +[titan] 2025-10-05 12:07:16,895 - root - INFO - step: 22095 loss: 2.1217 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:07:16,895 - root - INFO - lr: 2.4129e-05 gnorm: 1.14 [13:33:06<10:58:54] +[titan] 2025-10-05 12:07:25,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:07:27,824 - root - INFO - step: 22100 loss: 2.1006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 12:07:27,824 - root - INFO - lr: 2.4121e-05 gnorm: 1.06 [13:33:17<10:58:43] +[titan] 2025-10-05 12:07:38,689 - root - INFO - step: 22105 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 12:07:38,689 - root - INFO - lr: 2.4112e-05 gnorm: 1.07 [13:33:28<10:58:32] +[titan] 2025-10-05 12:07:49,564 - root - INFO - step: 22110 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 12:07:49,564 - root - INFO - lr: 2.4103e-05 gnorm: 1.12 [13:33:39<10:58:21] +[titan] 2025-10-05 12:08:00,491 - root - INFO - step: 22115 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8523 +[titan] 2025-10-05 12:08:00,491 - root - INFO - lr: 2.4094e-05 gnorm: 1.06 [13:33:50<10:58:10] +[titan] 2025-10-05 12:08:11,388 - root - INFO - step: 22120 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 12:08:11,388 - root - INFO - lr: 2.4085e-05 gnorm: 1.07 [13:34:01<10:57:59] +[titan] 2025-10-05 12:08:22,246 - root - INFO - step: 22125 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 12:08:22,246 - root - INFO - lr: 2.4076e-05 gnorm: 1.08 [13:34:12<10:57:48] +[titan] 2025-10-05 12:08:33,148 - root - INFO - step: 22130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:08:33,148 - root - INFO - lr: 2.4068e-05 gnorm: 1.08 [13:34:22<10:57:36] +[titan] 2025-10-05 12:08:44,031 - root - INFO - step: 22135 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:08:44,031 - root - INFO - lr: 2.4059e-05 gnorm: 1.02 [13:34:33<10:57:25] +[titan] 2025-10-05 12:08:54,887 - root - INFO - step: 22140 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:08:54,888 - root - INFO - lr: 2.4050e-05 gnorm: 1.06 [13:34:44<10:57:14] +[titan] 2025-10-05 12:09:05,796 - root - INFO - step: 22145 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 12:09:05,796 - root - INFO - lr: 2.4041e-05 gnorm: 1.02 [13:34:55<10:57:03] +[titan] 2025-10-05 12:09:14,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:09:16,655 - root - INFO - step: 22150 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 12:09:16,655 - root - INFO - lr: 2.4032e-05 gnorm: 1.06 [13:35:06<10:56:52] +[titan] 2025-10-05 12:09:27,522 - root - INFO - step: 22155 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8661 +[titan] 2025-10-05 12:09:27,522 - root - INFO - lr: 2.4024e-05 gnorm: 1.02 [13:35:17<10:56:40] +[titan] 2025-10-05 12:09:38,432 - root - INFO - step: 22160 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 12:09:38,432 - root - INFO - lr: 2.4015e-05 gnorm: 1.04 [13:35:28<10:56:29] +[titan] 2025-10-05 12:09:49,302 - root - INFO - step: 22165 loss: 2.1166 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:09:49,302 - root - INFO - lr: 2.4006e-05 gnorm: 1.12 [13:35:39<10:56:18] +[titan] 2025-10-05 12:10:00,224 - root - INFO - step: 22170 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:10:00,224 - root - INFO - lr: 2.3997e-05 gnorm: 1.09 [13:35:49<10:56:07] +[titan] 2025-10-05 12:10:11,087 - root - INFO - step: 22175 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:10:11,087 - root - INFO - lr: 2.3988e-05 gnorm: 1.05 [13:36:00<10:55:56] +[titan] 2025-10-05 12:10:21,968 - root - INFO - step: 22180 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9855 +[titan] 2025-10-05 12:10:21,968 - root - INFO - lr: 2.3979e-05 gnorm: 1.15 [13:36:11<10:55:45] +[titan] 2025-10-05 12:10:32,857 - root - INFO - step: 22185 loss: 2.1657 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9136 +[titan] 2025-10-05 12:10:32,857 - root - INFO - lr: 2.3971e-05 gnorm: 1.08 [13:36:22<10:55:34] +[titan] 2025-10-05 12:10:43,721 - root - INFO - step: 22190 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:10:43,721 - root - INFO - lr: 2.3962e-05 gnorm: 1.07 [13:36:33<10:55:22] +[titan] 2025-10-05 12:10:54,626 - root - INFO - step: 22195 loss: 2.1296 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 12:10:54,626 - root - INFO - lr: 2.3953e-05 gnorm: 1.11 [13:36:44<10:55:11] +[titan] 2025-10-05 12:11:03,352 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:11:05,537 - root - INFO - step: 22200 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 12:11:05,537 - root - INFO - lr: 2.3944e-05 gnorm: 1.09 [13:36:55<10:55:00] +[titan] 2025-10-05 12:11:16,410 - root - INFO - step: 22205 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 12:11:16,410 - root - INFO - lr: 2.3935e-05 gnorm: 1.10 [13:37:06<10:54:49] +[titan] 2025-10-05 12:11:27,277 - root - INFO - step: 22210 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 12:11:27,277 - root - INFO - lr: 2.3927e-05 gnorm: 1.10 [13:37:17<10:54:38] +[titan] 2025-10-05 12:11:38,149 - root - INFO - step: 22215 loss: 2.0858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8426 +[titan] 2025-10-05 12:11:38,149 - root - INFO - lr: 2.3918e-05 gnorm: 1.08 [13:37:27<10:54:27] +[titan] 2025-10-05 12:11:49,017 - root - INFO - step: 22220 loss: 2.1032 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 12:11:49,017 - root - INFO - lr: 2.3909e-05 gnorm: 1.08 [13:37:38<10:54:15] +[titan] 2025-10-05 12:11:59,905 - root - INFO - step: 22225 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8675 +[titan] 2025-10-05 12:11:59,905 - root - INFO - lr: 2.3900e-05 gnorm: 1.08 [13:37:49<10:54:04] +[titan] 2025-10-05 12:12:10,823 - root - INFO - step: 22230 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:12:10,823 - root - INFO - lr: 2.3891e-05 gnorm: 1.09 [13:38:00<10:53:53] +[titan] 2025-10-05 12:12:21,651 - root - INFO - step: 22235 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 12:12:21,651 - root - INFO - lr: 2.3883e-05 gnorm: 1.08 [13:38:11<10:53:42] +[titan] 2025-10-05 12:12:32,529 - root - INFO - step: 22240 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 12:12:32,530 - root - INFO - lr: 2.3874e-05 gnorm: 1.11 [13:38:22<10:53:31] +[titan] 2025-10-05 12:12:43,387 - root - INFO - step: 22245 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 12:12:43,388 - root - INFO - lr: 2.3865e-05 gnorm: 1.06 [13:38:33<10:53:19] +[titan] 2025-10-05 12:12:52,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:12:54,236 - root - INFO - step: 22250 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 12:12:54,236 - root - INFO - lr: 2.3856e-05 gnorm: 1.09 [13:38:43<10:53:08] +[titan] 2025-10-05 12:13:05,203 - root - INFO - step: 22255 loss: 2.2062 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 12:13:05,204 - root - INFO - lr: 2.3847e-05 gnorm: 1.10 [13:38:54<10:52:57] +[titan] 2025-10-05 12:13:16,105 - root - INFO - step: 22260 loss: 2.0839 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8418 +[titan] 2025-10-05 12:13:16,105 - root - INFO - lr: 2.3838e-05 gnorm: 1.06 [13:39:05<10:52:46] +[titan] 2025-10-05 12:13:26,969 - root - INFO - step: 22265 loss: 2.1143 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 12:13:26,969 - root - INFO - lr: 2.3830e-05 gnorm: 1.09 [13:39:16<10:52:35] +[titan] 2025-10-05 12:13:37,833 - root - INFO - step: 22270 loss: 2.1822 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 12:13:37,833 - root - INFO - lr: 2.3821e-05 gnorm: 1.10 [13:39:27<10:52:24] +[titan] 2025-10-05 12:13:48,696 - root - INFO - step: 22275 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 12:13:48,696 - root - INFO - lr: 2.3812e-05 gnorm: 1.05 [13:39:38<10:52:13] +[titan] 2025-10-05 12:13:59,557 - root - INFO - step: 22280 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 12:13:59,557 - root - INFO - lr: 2.3803e-05 gnorm: 1.06 [13:39:49<10:52:01] +[titan] 2025-10-05 12:14:10,442 - root - INFO - step: 22285 loss: 2.1340 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:14:10,442 - root - INFO - lr: 2.3794e-05 gnorm: 1.08 [13:40:00<10:51:50] +[titan] 2025-10-05 12:14:21,358 - root - INFO - step: 22290 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:14:21,358 - root - INFO - lr: 2.3786e-05 gnorm: 1.08 [13:40:11<10:51:39] +[titan] 2025-10-05 12:14:32,225 - root - INFO - step: 22295 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 12:14:32,225 - root - INFO - lr: 2.3777e-05 gnorm: 1.07 [13:40:21<10:51:28] +[titan] 2025-10-05 12:14:40,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:14:43,076 - root - INFO - step: 22300 loss: 2.0949 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 12:14:43,076 - root - INFO - lr: 2.3768e-05 gnorm: 1.08 [13:40:32<10:51:17] +[titan] 2025-10-05 12:14:53,944 - root - INFO - step: 22305 loss: 2.2081 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 12:14:53,944 - root - INFO - lr: 2.3759e-05 gnorm: 1.05 [13:40:43<10:51:06] +[titan] 2025-10-05 12:15:04,844 - root - INFO - step: 22310 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 12:15:04,844 - root - INFO - lr: 2.3750e-05 gnorm: 1.11 [13:40:54<10:50:54] +[titan] 2025-10-05 12:15:15,692 - root - INFO - step: 22315 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:15:15,693 - root - INFO - lr: 2.3742e-05 gnorm: 1.06 [13:41:05<10:50:43] +[titan] 2025-10-05 12:15:26,567 - root - INFO - step: 22320 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 12:15:26,567 - root - INFO - lr: 2.3733e-05 gnorm: 1.04 [13:41:16<10:50:32] +[titan] 2025-10-05 12:15:37,421 - root - INFO - step: 22325 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 12:15:37,422 - root - INFO - lr: 2.3724e-05 gnorm: 1.08 [13:41:27<10:50:21] +[titan] 2025-10-05 12:15:48,281 - root - INFO - step: 22330 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 12:15:48,281 - root - INFO - lr: 2.3715e-05 gnorm: 1.08 [13:41:38<10:50:10] +[titan] 2025-10-05 12:15:59,149 - root - INFO - step: 22335 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:15:59,149 - root - INFO - lr: 2.3706e-05 gnorm: 1.04 [13:41:48<10:49:58] +[titan] 2025-10-05 12:16:10,046 - root - INFO - step: 22340 loss: 2.0616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 12:16:10,046 - root - INFO - lr: 2.3698e-05 gnorm: 1.10 [13:41:59<10:49:47] +[titan] 2025-10-05 12:16:20,913 - root - INFO - step: 22345 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 12:16:20,913 - root - INFO - lr: 2.3689e-05 gnorm: 1.09 [13:42:10<10:49:36] +[titan] 2025-10-05 12:16:29,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:16:31,773 - root - INFO - step: 22350 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:16:31,773 - root - INFO - lr: 2.3680e-05 gnorm: 1.09 [13:42:21<10:49:25] +[titan] 2025-10-05 12:16:42,676 - root - INFO - step: 22355 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8541 +[titan] 2025-10-05 12:16:42,676 - root - INFO - lr: 2.3671e-05 gnorm: 1.06 [13:42:32<10:49:14] +[titan] 2025-10-05 12:16:53,529 - root - INFO - step: 22360 loss: 2.1363 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:16:53,529 - root - INFO - lr: 2.3662e-05 gnorm: 1.08 [13:42:43<10:49:03] +[titan] 2025-10-05 12:17:04,373 - root - INFO - step: 22365 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:17:04,373 - root - INFO - lr: 2.3654e-05 gnorm: 1.08 [13:42:54<10:48:51] +[titan] 2025-10-05 12:17:15,272 - root - INFO - step: 22370 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:17:15,272 - root - INFO - lr: 2.3645e-05 gnorm: 1.06 [13:43:05<10:48:40] +[titan] 2025-10-05 12:17:26,145 - root - INFO - step: 22375 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 12:17:26,145 - root - INFO - lr: 2.3636e-05 gnorm: 1.05 [13:43:15<10:48:29] +[titan] 2025-10-05 12:17:36,995 - root - INFO - step: 22380 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 12:17:36,995 - root - INFO - lr: 2.3627e-05 gnorm: 1.07 [13:43:26<10:48:18] +[titan] 2025-10-05 12:17:47,877 - root - INFO - step: 22385 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:17:47,877 - root - INFO - lr: 2.3619e-05 gnorm: 1.08 [13:43:37<10:48:07] +[titan] 2025-10-05 12:17:58,732 - root - INFO - step: 22390 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8286 +[titan] 2025-10-05 12:17:58,732 - root - INFO - lr: 2.3610e-05 gnorm: 1.02 [13:43:48<10:47:56] +[titan] 2025-10-05 12:18:09,619 - root - INFO - step: 22395 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8656 +[titan] 2025-10-05 12:18:09,619 - root - INFO - lr: 2.3601e-05 gnorm: 1.08 [13:43:59<10:47:44] +[titan] 2025-10-05 12:18:18,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:18:20,460 - root - INFO - step: 22400 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8304 +[titan] 2025-10-05 12:18:20,461 - root - INFO - lr: 2.3592e-05 gnorm: 1.03 [13:44:10<10:47:33] +[titan] 2025-10-05 12:18:31,320 - root - INFO - step: 22405 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:18:31,320 - root - INFO - lr: 2.3583e-05 gnorm: 1.06 [13:44:21<10:47:22] +[titan] 2025-10-05 12:18:42,176 - root - INFO - step: 22410 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 12:18:42,176 - root - INFO - lr: 2.3575e-05 gnorm: 1.06 [13:44:31<10:47:11] +[titan] 2025-10-05 12:18:53,029 - root - INFO - step: 22415 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 12:18:53,029 - root - INFO - lr: 2.3566e-05 gnorm: 1.07 [13:44:42<10:47:00] +[titan] 2025-10-05 12:19:03,920 - root - INFO - step: 22420 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 12:19:03,920 - root - INFO - lr: 2.3557e-05 gnorm: 1.04 [13:44:53<10:46:49] +[titan] 2025-10-05 12:19:14,809 - root - INFO - step: 22425 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 12:19:14,809 - root - INFO - lr: 2.3548e-05 gnorm: 1.08 [13:45:04<10:46:37] +[titan] 2025-10-05 12:19:25,666 - root - INFO - step: 22430 loss: 2.1054 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 12:19:25,666 - root - INFO - lr: 2.3539e-05 gnorm: 1.11 [13:45:15<10:46:26] +[titan] 2025-10-05 12:19:36,537 - root - INFO - step: 22435 loss: 2.0990 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:19:36,537 - root - INFO - lr: 2.3531e-05 gnorm: 1.07 [13:45:26<10:46:15] +[titan] 2025-10-05 12:19:47,408 - root - INFO - step: 22440 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:19:47,408 - root - INFO - lr: 2.3522e-05 gnorm: 1.07 [13:45:37<10:46:04] +[titan] 2025-10-05 12:19:58,267 - root - INFO - step: 22445 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:19:58,267 - root - INFO - lr: 2.3513e-05 gnorm: 1.04 [13:45:48<10:45:53] +[titan] 2025-10-05 12:20:06,985 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:20:09,202 - root - INFO - step: 22450 loss: 2.1175 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:20:09,202 - root - INFO - lr: 2.3504e-05 gnorm: 1.08 [13:45:58<10:45:42] +[titan] 2025-10-05 12:20:20,059 - root - INFO - step: 22455 loss: 2.1341 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8869 +[titan] 2025-10-05 12:20:20,059 - root - INFO - lr: 2.3495e-05 gnorm: 1.04 [13:46:09<10:45:30] +[titan] 2025-10-05 12:20:30,913 - root - INFO - step: 22460 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:20:30,913 - root - INFO - lr: 2.3487e-05 gnorm: 1.14 [13:46:20<10:45:19] +[titan] 2025-10-05 12:20:41,788 - root - INFO - step: 22465 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8576 +[titan] 2025-10-05 12:20:41,788 - root - INFO - lr: 2.3478e-05 gnorm: 1.02 [13:46:31<10:45:08] +[titan] 2025-10-05 12:20:52,649 - root - INFO - step: 22470 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 12:20:52,649 - root - INFO - lr: 2.3469e-05 gnorm: 1.04 [13:46:42<10:44:57] +[titan] 2025-10-05 12:21:03,515 - root - INFO - step: 22475 loss: 2.0698 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8297 +[titan] 2025-10-05 12:21:03,515 - root - INFO - lr: 2.3460e-05 gnorm: 1.09 [13:46:53<10:44:46] +[titan] 2025-10-05 12:21:14,426 - root - INFO - step: 22480 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8596 +[titan] 2025-10-05 12:21:14,426 - root - INFO - lr: 2.3452e-05 gnorm: 1.07 [13:47:04<10:44:35] +[titan] 2025-10-05 12:21:25,291 - root - INFO - step: 22485 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8221 +[titan] 2025-10-05 12:21:25,291 - root - INFO - lr: 2.3443e-05 gnorm: 1.07 [13:47:15<10:44:23] +[titan] 2025-10-05 12:21:36,157 - root - INFO - step: 22490 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:21:36,157 - root - INFO - lr: 2.3434e-05 gnorm: 1.07 [13:47:25<10:44:12] +[titan] 2025-10-05 12:21:47,031 - root - INFO - step: 22495 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8952 +[titan] 2025-10-05 12:21:47,032 - root - INFO - lr: 2.3425e-05 gnorm: 1.05 [13:47:36<10:44:01] +[titan] 2025-10-05 12:21:55,725 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:21:57,921 - root - INFO - step: 22500 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 12:21:57,921 - root - INFO - lr: 2.3416e-05 gnorm: 1.10 [13:47:47<10:43:50] +[titan] 2025-10-05 12:22:08,788 - root - INFO - step: 22505 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 12:22:08,788 - root - INFO - lr: 2.3408e-05 gnorm: 1.07 [13:47:58<10:43:39] +[titan] 2025-10-05 12:22:19,750 - root - INFO - step: 22510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 29,895 tflops: 414.75 mfu: 41.94% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:22:19,750 - root - INFO - lr: 2.3399e-05 gnorm: 1.09 [13:48:09<10:43:28] +[titan] 2025-10-05 12:22:30,622 - root - INFO - step: 22515 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:22:30,622 - root - INFO - lr: 2.3390e-05 gnorm: 1.11 [13:48:20<10:43:16] +[titan] 2025-10-05 12:22:41,466 - root - INFO - step: 22520 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 12:22:41,466 - root - INFO - lr: 2.3381e-05 gnorm: 1.06 [13:48:31<10:43:05] +[titan] 2025-10-05 12:22:52,408 - root - INFO - step: 22525 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:22:52,408 - root - INFO - lr: 2.3373e-05 gnorm: 1.06 [13:48:42<10:42:54] +[titan] 2025-10-05 12:22:59,127 - root - INFO - Dumping profiler traces at step 22528 +[titan] 2025-10-05 12:22:59,165 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:23:03,544 - root - INFO - step: 22530 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,427 tflops: 408.25 mfu: 41.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 12:23:03,544 - root - INFO - lr: 2.3364e-05 gnorm: 1.08 [13:48:53<10:42:43] +[titan] 2025-10-05 12:23:14,458 - root - INFO - step: 22535 loss: 2.1311 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8827 +[titan] 2025-10-05 12:23:14,458 - root - INFO - lr: 2.3355e-05 gnorm: 1.33 [13:49:04<10:42:32] +[titan] 2025-10-05 12:23:25,322 - root - INFO - step: 22540 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 12:23:25,322 - root - INFO - lr: 2.3346e-05 gnorm: 1.10 [13:49:15<10:42:21] +[titan] 2025-10-05 12:23:36,189 - root - INFO - step: 22545 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 12:23:36,190 - root - INFO - lr: 2.3338e-05 gnorm: 1.04 [13:49:25<10:42:10] +[titan] 2025-10-05 12:23:44,857 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:23:47,032 - root - INFO - step: 22550 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 12:23:47,032 - root - INFO - lr: 2.3329e-05 gnorm: 1.08 [13:49:36<10:41:59] +[titan] 2025-10-05 12:23:57,904 - root - INFO - step: 22555 loss: 2.0817 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:23:57,904 - root - INFO - lr: 2.3320e-05 gnorm: 1.06 [13:49:47<10:41:47] +[titan] 2025-10-05 12:24:08,764 - root - INFO - step: 22560 loss: 2.0564 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 12:24:08,764 - root - INFO - lr: 2.3311e-05 gnorm: 1.08 [13:49:58<10:41:36] +[titan] 2025-10-05 12:24:19,652 - root - INFO - step: 22565 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8615 +[titan] 2025-10-05 12:24:19,652 - root - INFO - lr: 2.3302e-05 gnorm: 1.10 [13:50:09<10:41:25] +[titan] 2025-10-05 12:24:30,523 - root - INFO - step: 22570 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 12:24:30,523 - root - INFO - lr: 2.3294e-05 gnorm: 1.05 [13:50:20<10:41:14] +[titan] 2025-10-05 12:24:41,397 - root - INFO - step: 22575 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 12:24:41,397 - root - INFO - lr: 2.3285e-05 gnorm: 1.06 [13:50:31<10:41:03] +[titan] 2025-10-05 12:24:52,282 - root - INFO - step: 22580 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8584 +[titan] 2025-10-05 12:24:52,283 - root - INFO - lr: 2.3276e-05 gnorm: 1.02 [13:50:42<10:40:52] +[titan] 2025-10-05 12:25:03,150 - root - INFO - step: 22585 loss: 2.0722 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:25:03,150 - root - INFO - lr: 2.3267e-05 gnorm: 1.07 [13:50:52<10:40:40] +[titan] 2025-10-05 12:25:14,069 - root - INFO - step: 22590 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 12:25:14,069 - root - INFO - lr: 2.3259e-05 gnorm: 1.07 [13:51:03<10:40:29] +[titan] 2025-10-05 12:25:24,944 - root - INFO - step: 22595 loss: 2.0307 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 12:25:24,945 - root - INFO - lr: 2.3250e-05 gnorm: 1.06 [13:51:14<10:40:18] +[titan] 2025-10-05 12:25:33,616 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:25:35,800 - root - INFO - step: 22600 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 12:25:35,800 - root - INFO - lr: 2.3241e-05 gnorm: 1.09 [13:51:25<10:40:07] +[titan] 2025-10-05 12:25:46,666 - root - INFO - step: 22605 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 12:25:46,666 - root - INFO - lr: 2.3232e-05 gnorm: 1.08 [13:51:36<10:39:56] +[titan] 2025-10-05 12:25:57,545 - root - INFO - step: 22610 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8253 +[titan] 2025-10-05 12:25:57,545 - root - INFO - lr: 2.3224e-05 gnorm: 1.04 [13:51:47<10:39:45] +[titan] 2025-10-05 12:26:08,410 - root - INFO - step: 22615 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 12:26:08,411 - root - INFO - lr: 2.3215e-05 gnorm: 1.05 [13:51:58<10:39:33] +[titan] 2025-10-05 12:26:19,368 - root - INFO - step: 22620 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 12:26:19,368 - root - INFO - lr: 2.3206e-05 gnorm: 1.13 [13:52:09<10:39:22] +[titan] 2025-10-05 12:26:30,266 - root - INFO - step: 22625 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8493 +[titan] 2025-10-05 12:26:30,266 - root - INFO - lr: 2.3197e-05 gnorm: 1.07 [13:52:19<10:39:11] +[titan] 2025-10-05 12:26:41,175 - root - INFO - step: 22630 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 12:26:41,175 - root - INFO - lr: 2.3189e-05 gnorm: 1.06 [13:52:30<10:39:00] +[titan] 2025-10-05 12:26:52,070 - root - INFO - step: 22635 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8261 +[titan] 2025-10-05 12:26:52,070 - root - INFO - lr: 2.3180e-05 gnorm: 1.03 [13:52:41<10:38:49] +[titan] 2025-10-05 12:27:02,956 - root - INFO - step: 22640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 12:27:02,956 - root - INFO - lr: 2.3171e-05 gnorm: 1.05 [13:52:52<10:38:38] +[titan] 2025-10-05 12:27:13,822 - root - INFO - step: 22645 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 12:27:13,822 - root - INFO - lr: 2.3162e-05 gnorm: 1.02 [13:53:03<10:38:27] +[titan] 2025-10-05 12:27:22,552 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:27:24,736 - root - INFO - step: 22650 loss: 2.0501 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 12:27:24,736 - root - INFO - lr: 2.3153e-05 gnorm: 1.10 [13:53:14<10:38:15] +[titan] 2025-10-05 12:27:35,626 - root - INFO - step: 22655 loss: 2.0835 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 12:27:35,626 - root - INFO - lr: 2.3145e-05 gnorm: 1.05 [13:53:25<10:38:04] +[titan] 2025-10-05 12:27:46,518 - root - INFO - step: 22660 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:27:46,518 - root - INFO - lr: 2.3136e-05 gnorm: 1.11 [13:53:36<10:37:53] +[titan] 2025-10-05 12:27:57,386 - root - INFO - step: 22665 loss: 2.1687 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9171 +[titan] 2025-10-05 12:27:57,386 - root - INFO - lr: 2.3127e-05 gnorm: 1.10 [13:53:47<10:37:42] +[titan] 2025-10-05 12:28:08,227 - root - INFO - step: 22670 loss: 2.0850 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8425 +[titan] 2025-10-05 12:28:08,227 - root - INFO - lr: 2.3118e-05 gnorm: 1.05 [13:53:57<10:37:31] +[titan] 2025-10-05 12:28:19,140 - root - INFO - step: 22675 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:28:19,140 - root - INFO - lr: 2.3110e-05 gnorm: 1.08 [13:54:08<10:37:20] +[titan] 2025-10-05 12:28:30,016 - root - INFO - step: 22680 loss: 2.1382 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 12:28:30,016 - root - INFO - lr: 2.3101e-05 gnorm: 1.16 [13:54:19<10:37:09] +[titan] 2025-10-05 12:28:40,902 - root - INFO - step: 22685 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8186 +[titan] 2025-10-05 12:28:40,902 - root - INFO - lr: 2.3092e-05 gnorm: 1.08 [13:54:30<10:36:57] +[titan] 2025-10-05 12:28:51,765 - root - INFO - step: 22690 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 12:28:51,766 - root - INFO - lr: 2.3083e-05 gnorm: 1.03 [13:54:41<10:36:46] +[titan] 2025-10-05 12:29:02,626 - root - INFO - step: 22695 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:29:02,626 - root - INFO - lr: 2.3075e-05 gnorm: 1.06 [13:54:52<10:36:35] +[titan] 2025-10-05 12:29:11,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:29:13,457 - root - INFO - step: 22700 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 12:29:13,457 - root - INFO - lr: 2.3066e-05 gnorm: 1.07 [13:55:03<10:36:24] +[titan] 2025-10-05 12:29:24,373 - root - INFO - step: 22705 loss: 2.0814 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:29:24,373 - root - INFO - lr: 2.3057e-05 gnorm: 1.08 [13:55:14<10:36:13] +[titan] 2025-10-05 12:29:35,226 - root - INFO - step: 22710 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 12:29:35,226 - root - INFO - lr: 2.3048e-05 gnorm: 1.06 [13:55:24<10:36:02] +[titan] 2025-10-05 12:29:46,086 - root - INFO - step: 22715 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8506 +[titan] 2025-10-05 12:29:46,087 - root - INFO - lr: 2.3040e-05 gnorm: 1.12 [13:55:35<10:35:50] +[titan] 2025-10-05 12:29:56,956 - root - INFO - step: 22720 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 12:29:56,956 - root - INFO - lr: 2.3031e-05 gnorm: 1.05 [13:55:46<10:35:39] +[titan] 2025-10-05 12:30:07,794 - root - INFO - step: 22725 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8635 +[titan] 2025-10-05 12:30:07,794 - root - INFO - lr: 2.3022e-05 gnorm: 1.08 [13:55:57<10:35:28] +[titan] 2025-10-05 12:30:18,701 - root - INFO - step: 22730 loss: 2.0684 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:30:18,702 - root - INFO - lr: 2.3013e-05 gnorm: 1.06 [13:56:08<10:35:17] +[titan] 2025-10-05 12:30:29,526 - root - INFO - step: 22735 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9056 +[titan] 2025-10-05 12:30:29,526 - root - INFO - lr: 2.3005e-05 gnorm: 1.08 [13:56:19<10:35:06] +[titan] 2025-10-05 12:30:40,389 - root - INFO - step: 22740 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 12:30:40,389 - root - INFO - lr: 2.2996e-05 gnorm: 1.08 [13:56:30<10:34:55] +[titan] 2025-10-05 12:30:51,240 - root - INFO - step: 22745 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 12:30:51,240 - root - INFO - lr: 2.2987e-05 gnorm: 1.07 [13:56:40<10:34:43] +[titan] 2025-10-05 12:30:59,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:31:02,123 - root - INFO - step: 22750 loss: 2.1101 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:31:02,123 - root - INFO - lr: 2.2978e-05 gnorm: 1.09 [13:56:51<10:34:32] +[titan] 2025-10-05 12:31:12,994 - root - INFO - step: 22755 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8489 +[titan] 2025-10-05 12:31:12,994 - root - INFO - lr: 2.2970e-05 gnorm: 1.07 [13:57:02<10:34:21] +[titan] 2025-10-05 12:31:23,866 - root - INFO - step: 22760 loss: 2.0378 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 12:31:23,866 - root - INFO - lr: 2.2961e-05 gnorm: 1.07 [13:57:13<10:34:10] +[titan] 2025-10-05 12:31:34,726 - root - INFO - step: 22765 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8417 +[titan] 2025-10-05 12:31:34,726 - root - INFO - lr: 2.2952e-05 gnorm: 1.06 [13:57:24<10:33:59] +[titan] 2025-10-05 12:31:45,584 - root - INFO - step: 22770 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 12:31:45,585 - root - INFO - lr: 2.2944e-05 gnorm: 1.08 [13:57:35<10:33:48] +[titan] 2025-10-05 12:31:56,424 - root - INFO - step: 22775 loss: 2.0368 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 12:31:56,424 - root - INFO - lr: 2.2935e-05 gnorm: 1.06 [13:57:46<10:33:36] +[titan] 2025-10-05 12:32:07,271 - root - INFO - step: 22780 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8625 +[titan] 2025-10-05 12:32:07,271 - root - INFO - lr: 2.2926e-05 gnorm: 1.09 [13:57:56<10:33:25] +[titan] 2025-10-05 12:32:18,125 - root - INFO - step: 22785 loss: 2.0749 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:32:18,126 - root - INFO - lr: 2.2917e-05 gnorm: 1.06 [13:58:07<10:33:14] +[titan] 2025-10-05 12:32:29,041 - root - INFO - step: 22790 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 12:32:29,041 - root - INFO - lr: 2.2909e-05 gnorm: 1.01 [13:58:18<10:33:03] +[titan] 2025-10-05 12:32:39,901 - root - INFO - step: 22795 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8495 +[titan] 2025-10-05 12:32:39,901 - root - INFO - lr: 2.2900e-05 gnorm: 1.05 [13:58:29<10:32:52] +[titan] 2025-10-05 12:32:48,566 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:32:50,763 - root - INFO - step: 22800 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:32:50,764 - root - INFO - lr: 2.2891e-05 gnorm: 1.04 [13:58:40<10:32:41] +[titan] 2025-10-05 12:33:01,622 - root - INFO - step: 22805 loss: 2.0900 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8482 +[titan] 2025-10-05 12:33:01,622 - root - INFO - lr: 2.2882e-05 gnorm: 1.02 [13:58:51<10:32:29] +[titan] 2025-10-05 12:33:12,469 - root - INFO - step: 22810 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 12:33:12,469 - root - INFO - lr: 2.2874e-05 gnorm: 1.05 [13:59:02<10:32:18] +[titan] 2025-10-05 12:33:23,367 - root - INFO - step: 22815 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8893 +[titan] 2025-10-05 12:33:23,367 - root - INFO - lr: 2.2865e-05 gnorm: 1.08 [13:59:13<10:32:07] +[titan] 2025-10-05 12:33:34,205 - root - INFO - step: 22820 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:33:34,206 - root - INFO - lr: 2.2856e-05 gnorm: 1.08 [13:59:23<10:31:56] +[titan] 2025-10-05 12:33:45,062 - root - INFO - step: 22825 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:33:45,062 - root - INFO - lr: 2.2847e-05 gnorm: 1.06 [13:59:34<10:31:45] +[titan] 2025-10-05 12:33:55,902 - root - INFO - step: 22830 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 12:33:55,902 - root - INFO - lr: 2.2839e-05 gnorm: 1.08 [13:59:45<10:31:34] +[titan] 2025-10-05 12:34:06,747 - root - INFO - step: 22835 loss: 2.0824 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 12:34:06,747 - root - INFO - lr: 2.2830e-05 gnorm: 1.04 [13:59:56<10:31:22] +[titan] 2025-10-05 12:34:17,586 - root - INFO - step: 22840 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8552 +[titan] 2025-10-05 12:34:17,586 - root - INFO - lr: 2.2821e-05 gnorm: 1.04 [14:00:07<10:31:11] +[titan] 2025-10-05 12:34:28,454 - root - INFO - step: 22845 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8428 +[titan] 2025-10-05 12:34:28,454 - root - INFO - lr: 2.2813e-05 gnorm: 1.11 [14:00:18<10:31:00] +[titan] 2025-10-05 12:34:37,135 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:34:39,324 - root - INFO - step: 22850 loss: 2.0362 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 12:34:39,324 - root - INFO - lr: 2.2804e-05 gnorm: 1.07 [14:00:29<10:30:49] +[titan] 2025-10-05 12:34:50,183 - root - INFO - step: 22855 loss: 2.0829 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:34:50,183 - root - INFO - lr: 2.2795e-05 gnorm: 1.04 [14:00:39<10:30:38] +[titan] 2025-10-05 12:35:01,017 - root - INFO - step: 22860 loss: 1.9834 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 12:35:01,017 - root - INFO - lr: 2.2786e-05 gnorm: 1.01 [14:00:50<10:30:27] +[titan] 2025-10-05 12:35:11,885 - root - INFO - step: 22865 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:35:11,885 - root - INFO - lr: 2.2778e-05 gnorm: 1.04 [14:01:01<10:30:15] +[titan] 2025-10-05 12:35:22,742 - root - INFO - step: 22870 loss: 2.1227 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8759 +[titan] 2025-10-05 12:35:22,742 - root - INFO - lr: 2.2769e-05 gnorm: 1.09 [14:01:12<10:30:04] +[titan] 2025-10-05 12:35:33,625 - root - INFO - step: 22875 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 12:35:33,625 - root - INFO - lr: 2.2760e-05 gnorm: 1.12 [14:01:23<10:29:53] +[titan] 2025-10-05 12:35:44,473 - root - INFO - step: 22880 loss: 2.0907 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8483 +[titan] 2025-10-05 12:35:44,473 - root - INFO - lr: 2.2751e-05 gnorm: 1.09 [14:01:34<10:29:42] +[titan] 2025-10-05 12:35:55,316 - root - INFO - step: 22885 loss: 2.1475 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 12:35:55,316 - root - INFO - lr: 2.2743e-05 gnorm: 1.08 [14:01:45<10:29:31] +[titan] 2025-10-05 12:36:06,165 - root - INFO - step: 22890 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8494 +[titan] 2025-10-05 12:36:06,165 - root - INFO - lr: 2.2734e-05 gnorm: 1.10 [14:01:55<10:29:20] +[titan] 2025-10-05 12:36:17,010 - root - INFO - step: 22895 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:36:17,011 - root - INFO - lr: 2.2725e-05 gnorm: 1.06 [14:02:06<10:29:08] +[titan] 2025-10-05 12:36:25,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:36:27,886 - root - INFO - step: 22900 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 12:36:27,887 - root - INFO - lr: 2.2717e-05 gnorm: 1.09 [14:02:17<10:28:57] +[titan] 2025-10-05 12:36:38,741 - root - INFO - step: 22905 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8109 +[titan] 2025-10-05 12:36:38,741 - root - INFO - lr: 2.2708e-05 gnorm: 1.06 [14:02:28<10:28:46] +[titan] 2025-10-05 12:36:49,633 - root - INFO - step: 22910 loss: 2.0954 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:36:49,633 - root - INFO - lr: 2.2699e-05 gnorm: 1.13 [14:02:39<10:28:35] +[titan] 2025-10-05 12:37:00,494 - root - INFO - step: 22915 loss: 2.1261 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8788 +[titan] 2025-10-05 12:37:00,494 - root - INFO - lr: 2.2690e-05 gnorm: 1.09 [14:02:50<10:28:24] +[titan] 2025-10-05 12:37:11,342 - root - INFO - step: 22920 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8270 +[titan] 2025-10-05 12:37:11,343 - root - INFO - lr: 2.2682e-05 gnorm: 1.05 [14:03:01<10:28:13] +[titan] 2025-10-05 12:37:22,183 - root - INFO - step: 22925 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8036 +[titan] 2025-10-05 12:37:22,183 - root - INFO - lr: 2.2673e-05 gnorm: 1.04 [14:03:11<10:28:01] +[titan] 2025-10-05 12:37:33,033 - root - INFO - step: 22930 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9081 +[titan] 2025-10-05 12:37:33,033 - root - INFO - lr: 2.2664e-05 gnorm: 1.08 [14:03:22<10:27:50] +[titan] 2025-10-05 12:37:43,902 - root - INFO - step: 22935 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:37:43,903 - root - INFO - lr: 2.2656e-05 gnorm: 1.04 [14:03:33<10:27:39] +[titan] 2025-10-05 12:37:54,792 - root - INFO - step: 22940 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 12:37:54,792 - root - INFO - lr: 2.2647e-05 gnorm: 1.09 [14:03:44<10:27:28] +[titan] 2025-10-05 12:38:05,627 - root - INFO - step: 22945 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8431 +[titan] 2025-10-05 12:38:05,628 - root - INFO - lr: 2.2638e-05 gnorm: 1.08 [14:03:55<10:27:17] +[titan] 2025-10-05 12:38:14,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:38:16,484 - root - INFO - step: 22950 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7713 +[titan] 2025-10-05 12:38:16,484 - root - INFO - lr: 2.2629e-05 gnorm: 1.08 [14:04:06<10:27:06] +[titan] 2025-10-05 12:38:27,334 - root - INFO - step: 22955 loss: 2.0812 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:38:27,334 - root - INFO - lr: 2.2621e-05 gnorm: 1.09 [14:04:17<10:26:54] +[titan] 2025-10-05 12:38:38,180 - root - INFO - step: 22960 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8473 +[titan] 2025-10-05 12:38:38,180 - root - INFO - lr: 2.2612e-05 gnorm: 1.14 [14:04:27<10:26:43] +[titan] 2025-10-05 12:38:49,045 - root - INFO - step: 22965 loss: 2.0894 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 12:38:49,045 - root - INFO - lr: 2.2603e-05 gnorm: 1.02 [14:04:38<10:26:32] +[titan] 2025-10-05 12:38:59,904 - root - INFO - step: 22970 loss: 2.0347 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7986 +[titan] 2025-10-05 12:38:59,904 - root - INFO - lr: 2.2595e-05 gnorm: 1.08 [14:04:49<10:26:21] +[titan] 2025-10-05 12:39:10,774 - root - INFO - step: 22975 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9048 +[titan] 2025-10-05 12:39:10,774 - root - INFO - lr: 2.2586e-05 gnorm: 1.09 [14:05:00<10:26:10] +[titan] 2025-10-05 12:39:21,640 - root - INFO - step: 22980 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:39:21,640 - root - INFO - lr: 2.2577e-05 gnorm: 1.09 [14:05:11<10:25:59] +[titan] 2025-10-05 12:39:32,525 - root - INFO - step: 22985 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8780 +[titan] 2025-10-05 12:39:32,525 - root - INFO - lr: 2.2568e-05 gnorm: 1.07 [14:05:22<10:25:47] +[titan] 2025-10-05 12:39:43,368 - root - INFO - step: 22990 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 12:39:43,369 - root - INFO - lr: 2.2560e-05 gnorm: 1.08 [14:05:33<10:25:36] +[titan] 2025-10-05 12:39:54,216 - root - INFO - step: 22995 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 12:39:54,217 - root - INFO - lr: 2.2551e-05 gnorm: 1.02 [14:05:43<10:25:25] +[titan] 2025-10-05 12:40:02,883 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:40:05,066 - root - INFO - step: 23000 loss: 2.1507 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 12:40:05,066 - root - INFO - lr: 2.2542e-05 gnorm: 1.06 [14:05:54<10:25:14] +[titan] 2025-10-05 12:40:15,916 - root - INFO - step: 23005 loss: 2.1008 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 12:40:15,916 - root - INFO - lr: 2.2534e-05 gnorm: 1.09 [14:06:05<10:25:03] +[titan] 2025-10-05 12:40:26,775 - root - INFO - step: 23010 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8717 +[titan] 2025-10-05 12:40:26,775 - root - INFO - lr: 2.2525e-05 gnorm: 1.06 [14:06:16<10:24:52] +[titan] 2025-10-05 12:40:37,622 - root - INFO - step: 23015 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8876 +[titan] 2025-10-05 12:40:37,622 - root - INFO - lr: 2.2516e-05 gnorm: 1.06 [14:06:27<10:24:40] +[titan] 2025-10-05 12:40:48,479 - root - INFO - step: 23020 loss: 2.1422 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:40:48,480 - root - INFO - lr: 2.2507e-05 gnorm: 1.08 [14:06:38<10:24:29] +[titan] 2025-10-05 12:40:59,327 - root - INFO - step: 23025 loss: 2.0668 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:40:59,327 - root - INFO - lr: 2.2499e-05 gnorm: 1.05 [14:06:49<10:24:18] +[titan] 2025-10-05 12:41:10,188 - root - INFO - step: 23030 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:41:10,188 - root - INFO - lr: 2.2490e-05 gnorm: 1.06 [14:06:59<10:24:07] +[titan] 2025-10-05 12:41:21,085 - root - INFO - step: 23035 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 12:41:21,085 - root - INFO - lr: 2.2481e-05 gnorm: 1.07 [14:07:10<10:23:56] +[titan] 2025-10-05 12:41:32,099 - root - INFO - step: 23040 loss: 2.1136 memory: 118.84GiB(85.28%) tps: 29,752 tflops: 412.76 mfu: 41.74% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:41:32,100 - root - INFO - lr: 2.2473e-05 gnorm: 1.05 [14:07:21<10:23:45] +[titan] 2025-10-05 12:41:32,279 - root - INFO - Dumping profiler traces at step 23040 +[titan] 2025-10-05 12:41:32,318 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:41:43,199 - root - INFO - step: 23045 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.58 mfu: 41.41% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 12:41:43,199 - root - INFO - lr: 2.2464e-05 gnorm: 1.07 [14:07:32<10:23:34] +[titan] 2025-10-05 12:41:51,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:41:54,062 - root - INFO - step: 23050 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:41:54,062 - root - INFO - lr: 2.2455e-05 gnorm: 1.10 [14:07:43<10:23:23] +[titan] 2025-10-05 12:42:04,939 - root - INFO - step: 23055 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 12:42:04,940 - root - INFO - lr: 2.2447e-05 gnorm: 1.10 [14:07:54<10:23:11] +[titan] 2025-10-05 12:42:15,807 - root - INFO - step: 23060 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 12:42:15,807 - root - INFO - lr: 2.2438e-05 gnorm: 1.09 [14:08:05<10:23:00] +[titan] 2025-10-05 12:42:26,648 - root - INFO - step: 23065 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 12:42:26,649 - root - INFO - lr: 2.2429e-05 gnorm: 1.06 [14:08:16<10:22:49] +[titan] 2025-10-05 12:42:37,523 - root - INFO - step: 23070 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 12:42:37,523 - root - INFO - lr: 2.2420e-05 gnorm: 1.06 [14:08:27<10:22:38] +[titan] 2025-10-05 12:42:48,380 - root - INFO - step: 23075 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 12:42:48,380 - root - INFO - lr: 2.2412e-05 gnorm: 1.08 [14:08:38<10:22:27] +[titan] 2025-10-05 12:42:59,255 - root - INFO - step: 23080 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 12:42:59,255 - root - INFO - lr: 2.2403e-05 gnorm: 1.11 [14:08:48<10:22:16] +[titan] 2025-10-05 12:43:10,104 - root - INFO - step: 23085 loss: 2.0492 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 12:43:10,105 - root - INFO - lr: 2.2394e-05 gnorm: 1.06 [14:08:59<10:22:04] +[titan] 2025-10-05 12:43:20,963 - root - INFO - step: 23090 loss: 2.0906 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8478 +[titan] 2025-10-05 12:43:20,963 - root - INFO - lr: 2.2386e-05 gnorm: 1.07 [14:09:10<10:21:53] +[titan] 2025-10-05 12:43:31,830 - root - INFO - step: 23095 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:43:31,830 - root - INFO - lr: 2.2377e-05 gnorm: 1.06 [14:09:21<10:21:42] +[titan] 2025-10-05 12:43:40,505 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:43:42,684 - root - INFO - step: 23100 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 12:43:42,684 - root - INFO - lr: 2.2368e-05 gnorm: 1.08 [14:09:32<10:21:31] +[titan] 2025-10-05 12:43:53,521 - root - INFO - step: 23105 loss: 2.1541 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9043 +[titan] 2025-10-05 12:43:53,521 - root - INFO - lr: 2.2360e-05 gnorm: 1.12 [14:09:43<10:21:20] +[titan] 2025-10-05 12:44:04,389 - root - INFO - step: 23110 loss: 2.0636 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:44:04,389 - root - INFO - lr: 2.2351e-05 gnorm: 1.09 [14:09:54<10:21:09] +[titan] 2025-10-05 12:44:15,253 - root - INFO - step: 23115 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 12:44:15,253 - root - INFO - lr: 2.2342e-05 gnorm: 1.06 [14:10:04<10:20:57] +[titan] 2025-10-05 12:44:26,116 - root - INFO - step: 23120 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 12:44:26,116 - root - INFO - lr: 2.2334e-05 gnorm: 1.03 [14:10:15<10:20:46] +[titan] 2025-10-05 12:44:36,986 - root - INFO - step: 23125 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:44:36,986 - root - INFO - lr: 2.2325e-05 gnorm: 1.04 [14:10:26<10:20:35] +[titan] 2025-10-05 12:44:47,859 - root - INFO - step: 23130 loss: 2.1268 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 12:44:47,860 - root - INFO - lr: 2.2316e-05 gnorm: 1.08 [14:10:37<10:20:24] +[titan] 2025-10-05 12:44:58,729 - root - INFO - step: 23135 loss: 2.1048 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 12:44:58,729 - root - INFO - lr: 2.2308e-05 gnorm: 1.10 [14:10:48<10:20:13] +[titan] 2025-10-05 12:45:09,592 - root - INFO - step: 23140 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8194 +[titan] 2025-10-05 12:45:09,592 - root - INFO - lr: 2.2299e-05 gnorm: 1.09 [14:10:59<10:20:02] +[titan] 2025-10-05 12:45:20,444 - root - INFO - step: 23145 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:45:20,444 - root - INFO - lr: 2.2290e-05 gnorm: 1.10 [14:11:10<10:19:51] +[titan] 2025-10-05 12:45:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:45:31,320 - root - INFO - step: 23150 loss: 2.0752 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:45:31,320 - root - INFO - lr: 2.2281e-05 gnorm: 1.05 [14:11:20<10:19:39] +[titan] 2025-10-05 12:45:42,180 - root - INFO - step: 23155 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:45:42,181 - root - INFO - lr: 2.2273e-05 gnorm: 1.10 [14:11:31<10:19:28] +[titan] 2025-10-05 12:45:53,058 - root - INFO - step: 23160 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 12:45:53,058 - root - INFO - lr: 2.2264e-05 gnorm: 1.06 [14:11:42<10:19:17] +[titan] 2025-10-05 12:46:03,966 - root - INFO - step: 23165 loss: 1.9940 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 12:46:03,966 - root - INFO - lr: 2.2255e-05 gnorm: 1.07 [14:11:53<10:19:06] +[titan] 2025-10-05 12:46:14,825 - root - INFO - step: 23170 loss: 2.1123 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8673 +[titan] 2025-10-05 12:46:14,825 - root - INFO - lr: 2.2247e-05 gnorm: 1.06 [14:12:04<10:18:55] +[titan] 2025-10-05 12:46:25,704 - root - INFO - step: 23175 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:46:25,705 - root - INFO - lr: 2.2238e-05 gnorm: 1.10 [14:12:15<10:18:44] +[titan] 2025-10-05 12:46:36,828 - root - INFO - step: 23180 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 29,460 tflops: 408.71 mfu: 41.33% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8561 +[titan] 2025-10-05 12:46:36,828 - root - INFO - lr: 2.2229e-05 gnorm: 1.09 [14:12:26<10:18:33] +[titan] 2025-10-05 12:46:47,680 - root - INFO - step: 23185 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 12:46:47,680 - root - INFO - lr: 2.2221e-05 gnorm: 1.07 [14:12:37<10:18:22] +[titan] 2025-10-05 12:46:58,544 - root - INFO - step: 23190 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8817 +[titan] 2025-10-05 12:46:58,544 - root - INFO - lr: 2.2212e-05 gnorm: 1.07 [14:12:48<10:18:10] +[titan] 2025-10-05 12:47:09,423 - root - INFO - step: 23195 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 12:47:09,424 - root - INFO - lr: 2.2203e-05 gnorm: 1.07 [14:12:59<10:17:59] +[titan] 2025-10-05 12:47:18,105 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:47:20,285 - root - INFO - step: 23200 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:47:20,285 - root - INFO - lr: 2.2195e-05 gnorm: 1.05 [14:13:09<10:17:48] +[titan] 2025-10-05 12:47:31,161 - root - INFO - step: 23205 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:47:31,161 - root - INFO - lr: 2.2186e-05 gnorm: 1.07 [14:13:20<10:17:37] +[titan] 2025-10-05 12:47:42,031 - root - INFO - step: 23210 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 12:47:42,031 - root - INFO - lr: 2.2177e-05 gnorm: 1.08 [14:13:31<10:17:26] +[titan] 2025-10-05 12:47:52,877 - root - INFO - step: 23215 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8652 +[titan] 2025-10-05 12:47:52,877 - root - INFO - lr: 2.2169e-05 gnorm: 1.07 [14:13:42<10:17:15] +[titan] 2025-10-05 12:48:03,720 - root - INFO - step: 23220 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:48:03,720 - root - INFO - lr: 2.2160e-05 gnorm: 1.06 [14:13:53<10:17:03] +[titan] 2025-10-05 12:48:14,569 - root - INFO - step: 23225 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8277 +[titan] 2025-10-05 12:48:14,569 - root - INFO - lr: 2.2151e-05 gnorm: 1.07 [14:14:04<10:16:52] +[titan] 2025-10-05 12:48:25,456 - root - INFO - step: 23230 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:48:25,456 - root - INFO - lr: 2.2143e-05 gnorm: 1.09 [14:14:15<10:16:41] +[titan] 2025-10-05 12:48:36,322 - root - INFO - step: 23235 loss: 2.0597 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 12:48:36,322 - root - INFO - lr: 2.2134e-05 gnorm: 1.05 [14:14:25<10:16:30] +[titan] 2025-10-05 12:48:47,191 - root - INFO - step: 23240 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 12:48:47,191 - root - INFO - lr: 2.2125e-05 gnorm: 1.06 [14:14:36<10:16:19] +[titan] 2025-10-05 12:48:58,072 - root - INFO - step: 23245 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8758 +[titan] 2025-10-05 12:48:58,072 - root - INFO - lr: 2.2117e-05 gnorm: 1.08 [14:14:47<10:16:08] +[titan] 2025-10-05 12:49:06,748 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:49:08,942 - root - INFO - step: 23250 loss: 2.0918 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 12:49:08,943 - root - INFO - lr: 2.2108e-05 gnorm: 1.10 [14:14:58<10:15:57] +[titan] 2025-10-05 12:49:19,822 - root - INFO - step: 23255 loss: 2.1127 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8665 +[titan] 2025-10-05 12:49:19,822 - root - INFO - lr: 2.2099e-05 gnorm: 1.05 [14:15:09<10:15:45] +[titan] 2025-10-05 12:49:30,722 - root - INFO - step: 23260 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8961 +[titan] 2025-10-05 12:49:30,723 - root - INFO - lr: 2.2091e-05 gnorm: 1.10 [14:15:20<10:15:34] +[titan] 2025-10-05 12:49:41,642 - root - INFO - step: 23265 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 12:49:41,642 - root - INFO - lr: 2.2082e-05 gnorm: 1.09 [14:15:31<10:15:23] +[titan] 2025-10-05 12:49:52,513 - root - INFO - step: 23270 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 12:49:52,513 - root - INFO - lr: 2.2073e-05 gnorm: 1.08 [14:15:42<10:15:12] +[titan] 2025-10-05 12:50:03,384 - root - INFO - step: 23275 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8161 +[titan] 2025-10-05 12:50:03,384 - root - INFO - lr: 2.2065e-05 gnorm: 1.07 [14:15:53<10:15:01] +[titan] 2025-10-05 12:50:14,264 - root - INFO - step: 23280 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 12:50:14,264 - root - INFO - lr: 2.2056e-05 gnorm: 1.06 [14:16:03<10:14:50] +[titan] 2025-10-05 12:50:25,152 - root - INFO - step: 23285 loss: 2.1398 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 12:50:25,152 - root - INFO - lr: 2.2047e-05 gnorm: 1.05 [14:16:14<10:14:39] +[titan] 2025-10-05 12:50:36,029 - root - INFO - step: 23290 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 12:50:36,029 - root - INFO - lr: 2.2039e-05 gnorm: 1.05 [14:16:25<10:14:27] +[titan] 2025-10-05 12:50:46,933 - root - INFO - step: 23295 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:50:46,933 - root - INFO - lr: 2.2030e-05 gnorm: 1.10 [14:16:36<10:14:16] +[titan] 2025-10-05 12:50:55,612 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:50:57,805 - root - INFO - step: 23300 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:50:57,805 - root - INFO - lr: 2.2021e-05 gnorm: 1.10 [14:16:47<10:14:05] +[titan] 2025-10-05 12:51:08,673 - root - INFO - step: 23305 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:51:08,673 - root - INFO - lr: 2.2013e-05 gnorm: 1.06 [14:16:58<10:13:54] +[titan] 2025-10-05 12:51:19,553 - root - INFO - step: 23310 loss: 2.0851 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:51:19,553 - root - INFO - lr: 2.2004e-05 gnorm: 1.07 [14:17:09<10:13:43] +[titan] 2025-10-05 12:51:30,434 - root - INFO - step: 23315 loss: 2.0776 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 12:51:30,435 - root - INFO - lr: 2.1995e-05 gnorm: 1.07 [14:17:20<10:13:32] +[titan] 2025-10-05 12:51:41,297 - root - INFO - step: 23320 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:51:41,298 - root - INFO - lr: 2.1987e-05 gnorm: 1.06 [14:17:30<10:13:21] +[titan] 2025-10-05 12:51:52,171 - root - INFO - step: 23325 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8412 +[titan] 2025-10-05 12:51:52,171 - root - INFO - lr: 2.1978e-05 gnorm: 1.06 [14:17:41<10:13:09] +[titan] 2025-10-05 12:52:03,034 - root - INFO - step: 23330 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 12:52:03,034 - root - INFO - lr: 2.1969e-05 gnorm: 1.06 [14:17:52<10:12:58] +[titan] 2025-10-05 12:52:13,904 - root - INFO - step: 23335 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:52:13,905 - root - INFO - lr: 2.1961e-05 gnorm: 1.08 [14:18:03<10:12:47] +[titan] 2025-10-05 12:52:24,765 - root - INFO - step: 23340 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 12:52:24,765 - root - INFO - lr: 2.1952e-05 gnorm: 1.05 [14:18:14<10:12:36] +[titan] 2025-10-05 12:52:35,613 - root - INFO - step: 23345 loss: 2.0713 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 12:52:35,614 - root - INFO - lr: 2.1944e-05 gnorm: 1.08 [14:18:25<10:12:25] +[titan] 2025-10-05 12:52:44,296 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:52:46,481 - root - INFO - step: 23350 loss: 2.0693 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:52:46,481 - root - INFO - lr: 2.1935e-05 gnorm: 1.06 [14:18:36<10:12:14] +[titan] 2025-10-05 12:52:57,341 - root - INFO - step: 23355 loss: 2.1206 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 12:52:57,341 - root - INFO - lr: 2.1926e-05 gnorm: 1.09 [14:18:46<10:12:03] +[titan] 2025-10-05 12:53:08,214 - root - INFO - step: 23360 loss: 2.1012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:53:08,214 - root - INFO - lr: 2.1918e-05 gnorm: 1.05 [14:18:57<10:11:51] +[titan] 2025-10-05 12:53:19,079 - root - INFO - step: 23365 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8477 +[titan] 2025-10-05 12:53:19,079 - root - INFO - lr: 2.1909e-05 gnorm: 1.11 [14:19:08<10:11:40] +[titan] 2025-10-05 12:53:29,939 - root - INFO - step: 23370 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8916 +[titan] 2025-10-05 12:53:29,939 - root - INFO - lr: 2.1900e-05 gnorm: 1.11 [14:19:19<10:11:29] +[titan] 2025-10-05 12:53:40,836 - root - INFO - step: 23375 loss: 2.0922 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 12:53:40,836 - root - INFO - lr: 2.1892e-05 gnorm: 1.13 [14:19:30<10:11:18] +[titan] 2025-10-05 12:53:51,725 - root - INFO - step: 23380 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 12:53:51,725 - root - INFO - lr: 2.1883e-05 gnorm: 1.07 [14:19:41<10:11:07] +[titan] 2025-10-05 12:54:02,631 - root - INFO - step: 23385 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 12:54:02,631 - root - INFO - lr: 2.1874e-05 gnorm: 1.07 [14:19:52<10:10:56] +[titan] 2025-10-05 12:54:13,542 - root - INFO - step: 23390 loss: 2.0791 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:54:13,543 - root - INFO - lr: 2.1866e-05 gnorm: 1.12 [14:20:03<10:10:45] +[titan] 2025-10-05 12:54:24,401 - root - INFO - step: 23395 loss: 2.0662 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 12:54:24,401 - root - INFO - lr: 2.1857e-05 gnorm: 1.08 [14:20:14<10:10:33] +[titan] 2025-10-05 12:54:33,071 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:54:35,256 - root - INFO - step: 23400 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 12:54:35,256 - root - INFO - lr: 2.1848e-05 gnorm: 1.09 [14:20:24<10:10:22] +[titan] 2025-10-05 12:54:46,105 - root - INFO - step: 23405 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8085 +[titan] 2025-10-05 12:54:46,105 - root - INFO - lr: 2.1840e-05 gnorm: 1.03 [14:20:35<10:10:11] +[titan] 2025-10-05 12:54:56,966 - root - INFO - step: 23410 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8644 +[titan] 2025-10-05 12:54:56,966 - root - INFO - lr: 2.1831e-05 gnorm: 1.09 [14:20:46<10:10:00] +[titan] 2025-10-05 12:55:07,809 - root - INFO - step: 23415 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8747 +[titan] 2025-10-05 12:55:07,810 - root - INFO - lr: 2.1823e-05 gnorm: 1.09 [14:20:57<10:09:49] +[titan] 2025-10-05 12:55:18,648 - root - INFO - step: 23420 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:55:18,648 - root - INFO - lr: 2.1814e-05 gnorm: 1.09 [14:21:08<10:09:38] +[titan] 2025-10-05 12:55:29,531 - root - INFO - step: 23425 loss: 2.1312 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 12:55:29,531 - root - INFO - lr: 2.1805e-05 gnorm: 1.07 [14:21:19<10:09:27] +[titan] 2025-10-05 12:55:40,423 - root - INFO - step: 23430 loss: 2.0740 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:55:40,423 - root - INFO - lr: 2.1797e-05 gnorm: 1.07 [14:21:30<10:09:15] +[titan] 2025-10-05 12:55:51,286 - root - INFO - step: 23435 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 12:55:51,286 - root - INFO - lr: 2.1788e-05 gnorm: 1.05 [14:21:40<10:09:04] +[titan] 2025-10-05 12:56:02,131 - root - INFO - step: 23440 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8791 +[titan] 2025-10-05 12:56:02,131 - root - INFO - lr: 2.1779e-05 gnorm: 1.05 [14:21:51<10:08:53] +[titan] 2025-10-05 12:56:12,982 - root - INFO - step: 23445 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 12:56:12,982 - root - INFO - lr: 2.1771e-05 gnorm: 1.07 [14:22:02<10:08:42] +[titan] 2025-10-05 12:56:21,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:56:23,837 - root - INFO - step: 23450 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 12:56:23,837 - root - INFO - lr: 2.1762e-05 gnorm: 1.03 [14:22:13<10:08:31] +[titan] 2025-10-05 12:56:34,722 - root - INFO - step: 23455 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 12:56:34,722 - root - INFO - lr: 2.1753e-05 gnorm: 1.08 [14:22:24<10:08:20] +[titan] 2025-10-05 12:56:45,579 - root - INFO - step: 23460 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 12:56:45,579 - root - INFO - lr: 2.1745e-05 gnorm: 1.05 [14:22:35<10:08:08] +[titan] 2025-10-05 12:56:56,409 - root - INFO - step: 23465 loss: 2.0982 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 12:56:56,409 - root - INFO - lr: 2.1736e-05 gnorm: 1.07 [14:22:46<10:07:57] +[titan] 2025-10-05 12:57:07,229 - root - INFO - step: 23470 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:57:07,229 - root - INFO - lr: 2.1728e-05 gnorm: 1.08 [14:22:56<10:07:46] +[titan] 2025-10-05 12:57:18,049 - root - INFO - step: 23475 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 12:57:18,049 - root - INFO - lr: 2.1719e-05 gnorm: 1.09 [14:23:07<10:07:35] +[titan] 2025-10-05 12:57:28,860 - root - INFO - step: 23480 loss: 2.0930 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:57:28,860 - root - INFO - lr: 2.1710e-05 gnorm: 1.07 [14:23:18<10:07:24] +[titan] 2025-10-05 12:57:39,712 - root - INFO - step: 23485 loss: 2.1212 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8737 +[titan] 2025-10-05 12:57:39,712 - root - INFO - lr: 2.1702e-05 gnorm: 1.09 [14:23:29<10:07:13] +[titan] 2025-10-05 12:57:50,568 - root - INFO - step: 23490 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 12:57:50,568 - root - INFO - lr: 2.1693e-05 gnorm: 1.06 [14:23:40<10:07:01] +[titan] 2025-10-05 12:58:01,418 - root - INFO - step: 23495 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 12:58:01,418 - root - INFO - lr: 2.1684e-05 gnorm: 1.13 [14:23:51<10:06:50] +[titan] 2025-10-05 12:58:10,087 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:58:12,263 - root - INFO - step: 23500 loss: 2.0793 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:58:12,263 - root - INFO - lr: 2.1676e-05 gnorm: 1.05 [14:24:01<10:06:39] +[titan] 2025-10-05 12:58:23,119 - root - INFO - step: 23505 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 12:58:23,119 - root - INFO - lr: 2.1667e-05 gnorm: 1.09 [14:24:12<10:06:28] +[titan] 2025-10-05 12:58:33,963 - root - INFO - step: 23510 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 12:58:33,964 - root - INFO - lr: 2.1659e-05 gnorm: 1.07 [14:24:23<10:06:17] +[titan] 2025-10-05 12:58:44,834 - root - INFO - step: 23515 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 12:58:44,834 - root - INFO - lr: 2.1650e-05 gnorm: 1.06 [14:24:34<10:06:06] +[titan] 2025-10-05 12:58:55,708 - root - INFO - step: 23520 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8363 +[titan] 2025-10-05 12:58:55,708 - root - INFO - lr: 2.1641e-05 gnorm: 1.05 [14:24:45<10:05:55] +[titan] 2025-10-05 12:59:06,592 - root - INFO - step: 23525 loss: 2.0619 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 12:59:06,592 - root - INFO - lr: 2.1633e-05 gnorm: 1.06 [14:24:56<10:05:43] +[titan] 2025-10-05 12:59:17,459 - root - INFO - step: 23530 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8941 +[titan] 2025-10-05 12:59:17,459 - root - INFO - lr: 2.1624e-05 gnorm: 1.08 [14:25:07<10:05:32] +[titan] 2025-10-05 12:59:28,332 - root - INFO - step: 23535 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:59:28,332 - root - INFO - lr: 2.1616e-05 gnorm: 1.07 [14:25:17<10:05:21] +[titan] 2025-10-05 12:59:39,189 - root - INFO - step: 23540 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 12:59:39,190 - root - INFO - lr: 2.1607e-05 gnorm: 1.06 [14:25:28<10:05:10] +[titan] 2025-10-05 12:59:50,087 - root - INFO - step: 23545 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8219 +[titan] 2025-10-05 12:59:50,087 - root - INFO - lr: 2.1598e-05 gnorm: 1.07 [14:25:39<10:04:59] +[titan] 2025-10-05 12:59:58,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:00:01,039 - root - INFO - step: 23550 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 13:00:01,039 - root - INFO - lr: 2.1590e-05 gnorm: 1.13 [14:25:50<10:04:48] +[titan] 2025-10-05 13:00:05,584 - root - INFO - Dumping profiler traces at step 23552 +[titan] 2025-10-05 13:00:05,626 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:00:12,148 - root - INFO - step: 23555 loss: 2.0620 memory: 118.84GiB(85.28%) tps: 29,498 tflops: 409.24 mfu: 41.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 13:00:12,148 - root - INFO - lr: 2.1581e-05 gnorm: 1.07 [14:26:01<10:04:37] +[titan] 2025-10-05 13:00:23,000 - root - INFO - step: 23560 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 13:00:23,000 - root - INFO - lr: 2.1572e-05 gnorm: 1.11 [14:26:12<10:04:26] +[titan] 2025-10-05 13:00:33,832 - root - INFO - step: 23565 loss: 2.1010 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.69 mfu: 42.44% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 13:00:33,833 - root - INFO - lr: 2.1564e-05 gnorm: 1.09 [14:26:23<10:04:14] +[titan] 2025-10-05 13:00:44,700 - root - INFO - step: 23570 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 13:00:44,700 - root - INFO - lr: 2.1555e-05 gnorm: 1.03 [14:26:34<10:04:03] +[titan] 2025-10-05 13:00:55,558 - root - INFO - step: 23575 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 13:00:55,558 - root - INFO - lr: 2.1547e-05 gnorm: 1.08 [14:26:45<10:03:52] +[titan] 2025-10-05 13:01:06,406 - root - INFO - step: 23580 loss: 2.1114 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 13:01:06,406 - root - INFO - lr: 2.1538e-05 gnorm: 1.10 [14:26:56<10:03:41] +[titan] 2025-10-05 13:01:17,310 - root - INFO - step: 23585 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 13:01:17,310 - root - INFO - lr: 2.1529e-05 gnorm: 1.11 [14:27:06<10:03:30] +[titan] 2025-10-05 13:01:28,160 - root - INFO - step: 23590 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:01:28,161 - root - INFO - lr: 2.1521e-05 gnorm: 1.07 [14:27:17<10:03:19] +[titan] 2025-10-05 13:01:39,026 - root - INFO - step: 23595 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 13:01:39,026 - root - INFO - lr: 2.1512e-05 gnorm: 1.08 [14:27:28<10:03:08] +[titan] 2025-10-05 13:01:47,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:01:49,899 - root - INFO - step: 23600 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 13:01:49,899 - root - INFO - lr: 2.1504e-05 gnorm: 1.10 [14:27:39<10:02:56] +[titan] 2025-10-05 13:02:00,769 - root - INFO - step: 23605 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 13:02:00,769 - root - INFO - lr: 2.1495e-05 gnorm: 1.04 [14:27:50<10:02:45] +[titan] 2025-10-05 13:02:11,618 - root - INFO - step: 23610 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 13:02:11,618 - root - INFO - lr: 2.1486e-05 gnorm: 1.09 [14:28:01<10:02:34] +[titan] 2025-10-05 13:02:22,509 - root - INFO - step: 23615 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 13:02:22,509 - root - INFO - lr: 2.1478e-05 gnorm: 1.07 [14:28:12<10:02:23] +[titan] 2025-10-05 13:02:33,369 - root - INFO - step: 23620 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 13:02:33,369 - root - INFO - lr: 2.1469e-05 gnorm: 1.06 [14:28:22<10:02:12] +[titan] 2025-10-05 13:02:44,239 - root - INFO - step: 23625 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8846 +[titan] 2025-10-05 13:02:44,239 - root - INFO - lr: 2.1461e-05 gnorm: 1.08 [14:28:33<10:02:01] +[titan] 2025-10-05 13:02:55,117 - root - INFO - step: 23630 loss: 2.0120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:02:55,117 - root - INFO - lr: 2.1452e-05 gnorm: 1.06 [14:28:44<10:01:50] +[titan] 2025-10-05 13:03:05,938 - root - INFO - step: 23635 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 13:03:05,938 - root - INFO - lr: 2.1443e-05 gnorm: 1.04 [14:28:55<10:01:38] +[titan] 2025-10-05 13:03:16,775 - root - INFO - step: 23640 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 13:03:16,775 - root - INFO - lr: 2.1435e-05 gnorm: 1.06 [14:29:06<10:01:27] +[titan] 2025-10-05 13:03:27,645 - root - INFO - step: 23645 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8228 +[titan] 2025-10-05 13:03:27,645 - root - INFO - lr: 2.1426e-05 gnorm: 1.08 [14:29:17<10:01:16] +[titan] 2025-10-05 13:03:36,315 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:03:38,490 - root - INFO - step: 23650 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 13:03:38,491 - root - INFO - lr: 2.1418e-05 gnorm: 1.14 [14:29:28<10:01:05] +[titan] 2025-10-05 13:03:49,367 - root - INFO - step: 23655 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 13:03:49,367 - root - INFO - lr: 2.1409e-05 gnorm: 1.07 [14:29:38<10:00:54] +[titan] 2025-10-05 13:04:00,220 - root - INFO - step: 23660 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 13:04:00,220 - root - INFO - lr: 2.1400e-05 gnorm: 1.05 [14:29:49<10:00:43] +[titan] 2025-10-05 13:04:11,080 - root - INFO - step: 23665 loss: 2.0569 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:04:11,080 - root - INFO - lr: 2.1392e-05 gnorm: 1.05 [14:30:00<10:00:32] +[titan] 2025-10-05 13:04:21,931 - root - INFO - step: 23670 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 13:04:21,931 - root - INFO - lr: 2.1383e-05 gnorm: 1.08 [14:30:11<10:00:20] +[titan] 2025-10-05 13:04:32,799 - root - INFO - step: 23675 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:04:32,799 - root - INFO - lr: 2.1375e-05 gnorm: 1.09 [14:30:22<10:00:09] +[titan] 2025-10-05 13:04:43,687 - root - INFO - step: 23680 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8405 +[titan] 2025-10-05 13:04:43,687 - root - INFO - lr: 2.1366e-05 gnorm: 1.09 [14:30:33< 9:59:58] +[titan] 2025-10-05 13:04:54,557 - root - INFO - step: 23685 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8546 +[titan] 2025-10-05 13:04:54,557 - root - INFO - lr: 2.1358e-05 gnorm: 1.06 [14:30:44< 9:59:47] +[titan] 2025-10-05 13:05:05,423 - root - INFO - step: 23690 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 13:05:05,423 - root - INFO - lr: 2.1349e-05 gnorm: 1.11 [14:30:55< 9:59:36] +[titan] 2025-10-05 13:05:16,292 - root - INFO - step: 23695 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 13:05:16,292 - root - INFO - lr: 2.1340e-05 gnorm: 1.07 [14:31:05< 9:59:25] +[titan] 2025-10-05 13:05:24,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:05:27,152 - root - INFO - step: 23700 loss: 2.0847 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 13:05:27,152 - root - INFO - lr: 2.1332e-05 gnorm: 1.06 [14:31:16< 9:59:14] +[titan] 2025-10-05 13:05:38,037 - root - INFO - step: 23705 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 13:05:38,037 - root - INFO - lr: 2.1323e-05 gnorm: 1.07 [14:31:27< 9:59:02] +[titan] 2025-10-05 13:05:48,993 - root - INFO - step: 23710 loss: 2.0935 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8500 +[titan] 2025-10-05 13:05:48,993 - root - INFO - lr: 2.1315e-05 gnorm: 1.06 [14:31:38< 9:58:51] +[titan] 2025-10-05 13:05:59,853 - root - INFO - step: 23715 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 13:05:59,853 - root - INFO - lr: 2.1306e-05 gnorm: 1.12 [14:31:49< 9:58:40] +[titan] 2025-10-05 13:06:10,728 - root - INFO - step: 23720 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 13:06:10,729 - root - INFO - lr: 2.1297e-05 gnorm: 1.05 [14:32:00< 9:58:29] +[titan] 2025-10-05 13:06:21,603 - root - INFO - step: 23725 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8106 +[titan] 2025-10-05 13:06:21,603 - root - INFO - lr: 2.1289e-05 gnorm: 1.04 [14:32:11< 9:58:18] +[titan] 2025-10-05 13:06:32,482 - root - INFO - step: 23730 loss: 2.0312 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 13:06:32,482 - root - INFO - lr: 2.1280e-05 gnorm: 1.09 [14:32:22< 9:58:07] +[titan] 2025-10-05 13:06:43,351 - root - INFO - step: 23735 loss: 2.0992 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 13:06:43,351 - root - INFO - lr: 2.1272e-05 gnorm: 1.09 [14:32:32< 9:57:56] +[titan] 2025-10-05 13:06:54,243 - root - INFO - step: 23740 loss: 2.0278 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 13:06:54,243 - root - INFO - lr: 2.1263e-05 gnorm: 1.08 [14:32:43< 9:57:45] +[titan] 2025-10-05 13:07:05,147 - root - INFO - step: 23745 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:07:05,147 - root - INFO - lr: 2.1255e-05 gnorm: 1.08 [14:32:54< 9:57:33] +[titan] 2025-10-05 13:07:13,826 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:07:16,019 - root - INFO - step: 23750 loss: 2.0022 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 13:07:16,019 - root - INFO - lr: 2.1246e-05 gnorm: 1.06 [14:33:05< 9:57:22] +[titan] 2025-10-05 13:07:26,891 - root - INFO - step: 23755 loss: 2.0412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 13:07:26,891 - root - INFO - lr: 2.1237e-05 gnorm: 1.10 [14:33:16< 9:57:11] +[titan] 2025-10-05 13:07:37,753 - root - INFO - step: 23760 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:07:37,753 - root - INFO - lr: 2.1229e-05 gnorm: 1.10 [14:33:27< 9:57:00] +[titan] 2025-10-05 13:07:48,618 - root - INFO - step: 23765 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 13:07:48,618 - root - INFO - lr: 2.1220e-05 gnorm: 1.07 [14:33:38< 9:56:49] +[titan] 2025-10-05 13:07:59,505 - root - INFO - step: 23770 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:07:59,505 - root - INFO - lr: 2.1212e-05 gnorm: 1.10 [14:33:49< 9:56:38] +[titan] 2025-10-05 13:08:10,407 - root - INFO - step: 23775 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 13:08:10,408 - root - INFO - lr: 2.1203e-05 gnorm: 1.07 [14:34:00< 9:56:27] +[titan] 2025-10-05 13:08:21,270 - root - INFO - step: 23780 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 13:08:21,270 - root - INFO - lr: 2.1195e-05 gnorm: 1.12 [14:34:10< 9:56:16] +[titan] 2025-10-05 13:08:32,141 - root - INFO - step: 23785 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:08:32,141 - root - INFO - lr: 2.1186e-05 gnorm: 1.07 [14:34:21< 9:56:04] +[titan] 2025-10-05 13:08:43,013 - root - INFO - step: 23790 loss: 2.0543 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8158 +[titan] 2025-10-05 13:08:43,013 - root - INFO - lr: 2.1177e-05 gnorm: 1.08 [14:34:32< 9:55:53] +[titan] 2025-10-05 13:08:53,898 - root - INFO - step: 23795 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 13:08:53,898 - root - INFO - lr: 2.1169e-05 gnorm: 1.08 [14:34:43< 9:55:42] +[titan] 2025-10-05 13:09:02,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:09:04,767 - root - INFO - step: 23800 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:09:04,767 - root - INFO - lr: 2.1160e-05 gnorm: 1.07 [14:34:54< 9:55:31] +[titan] 2025-10-05 13:09:15,675 - root - INFO - step: 23805 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:09:15,675 - root - INFO - lr: 2.1152e-05 gnorm: 1.07 [14:35:05< 9:55:20] +[titan] 2025-10-05 13:09:26,546 - root - INFO - step: 23810 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8689 +[titan] 2025-10-05 13:09:26,546 - root - INFO - lr: 2.1143e-05 gnorm: 1.06 [14:35:16< 9:55:09] +[titan] 2025-10-05 13:09:37,416 - root - INFO - step: 23815 loss: 2.0689 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:09:37,416 - root - INFO - lr: 2.1135e-05 gnorm: 1.04 [14:35:27< 9:54:58] +[titan] 2025-10-05 13:09:48,302 - root - INFO - step: 23820 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 13:09:48,302 - root - INFO - lr: 2.1126e-05 gnorm: 1.05 [14:35:37< 9:54:46] +[titan] 2025-10-05 13:09:59,200 - root - INFO - step: 23825 loss: 2.1145 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8692 +[titan] 2025-10-05 13:09:59,200 - root - INFO - lr: 2.1118e-05 gnorm: 1.10 [14:35:48< 9:54:35] +[titan] 2025-10-05 13:10:10,087 - root - INFO - step: 23830 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:10:10,087 - root - INFO - lr: 2.1109e-05 gnorm: 1.07 [14:35:59< 9:54:24] +[titan] 2025-10-05 13:10:20,968 - root - INFO - step: 23835 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8802 +[titan] 2025-10-05 13:10:20,968 - root - INFO - lr: 2.1100e-05 gnorm: 1.11 [14:36:10< 9:54:13] +[titan] 2025-10-05 13:10:31,877 - root - INFO - step: 23840 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 13:10:31,877 - root - INFO - lr: 2.1092e-05 gnorm: 1.07 [14:36:21< 9:54:02] +[titan] 2025-10-05 13:10:42,750 - root - INFO - step: 23845 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8826 +[titan] 2025-10-05 13:10:42,751 - root - INFO - lr: 2.1083e-05 gnorm: 1.08 [14:36:32< 9:53:51] +[titan] 2025-10-05 13:10:51,447 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:10:53,641 - root - INFO - step: 23850 loss: 2.0254 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 13:10:53,642 - root - INFO - lr: 2.1075e-05 gnorm: 1.07 [14:36:43< 9:53:40] +[titan] 2025-10-05 13:11:04,523 - root - INFO - step: 23855 loss: 2.0986 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 13:11:04,523 - root - INFO - lr: 2.1066e-05 gnorm: 1.09 [14:36:54< 9:53:29] +[titan] 2025-10-05 13:11:15,407 - root - INFO - step: 23860 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 13:11:15,407 - root - INFO - lr: 2.1058e-05 gnorm: 1.07 [14:37:05< 9:53:17] +[titan] 2025-10-05 13:11:26,299 - root - INFO - step: 23865 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8171 +[titan] 2025-10-05 13:11:26,299 - root - INFO - lr: 2.1049e-05 gnorm: 1.08 [14:37:15< 9:53:06] +[titan] 2025-10-05 13:11:37,198 - root - INFO - step: 23870 loss: 2.1119 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:11:37,198 - root - INFO - lr: 2.1041e-05 gnorm: 1.10 [14:37:26< 9:52:55] +[titan] 2025-10-05 13:11:48,068 - root - INFO - step: 23875 loss: 2.0789 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 13:11:48,068 - root - INFO - lr: 2.1032e-05 gnorm: 1.03 [14:37:37< 9:52:44] +[titan] 2025-10-05 13:11:58,937 - root - INFO - step: 23880 loss: 2.1572 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9068 +[titan] 2025-10-05 13:11:58,937 - root - INFO - lr: 2.1023e-05 gnorm: 1.10 [14:37:48< 9:52:33] +[titan] 2025-10-05 13:12:09,818 - root - INFO - step: 23885 loss: 2.1050 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:12:09,818 - root - INFO - lr: 2.1015e-05 gnorm: 1.07 [14:37:59< 9:52:22] +[titan] 2025-10-05 13:12:20,691 - root - INFO - step: 23890 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 13:12:20,691 - root - INFO - lr: 2.1006e-05 gnorm: 1.04 [14:38:10< 9:52:11] +[titan] 2025-10-05 13:12:31,575 - root - INFO - step: 23895 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 13:12:31,575 - root - INFO - lr: 2.0998e-05 gnorm: 1.07 [14:38:21< 9:52:00] +[titan] 2025-10-05 13:12:40,266 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:12:42,455 - root - INFO - step: 23900 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 13:12:42,455 - root - INFO - lr: 2.0989e-05 gnorm: 1.07 [14:38:32< 9:51:48] +[titan] 2025-10-05 13:12:53,357 - root - INFO - step: 23905 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 13:12:53,357 - root - INFO - lr: 2.0981e-05 gnorm: 1.10 [14:38:42< 9:51:37] +[titan] 2025-10-05 13:13:04,239 - root - INFO - step: 23910 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8487 +[titan] 2025-10-05 13:13:04,239 - root - INFO - lr: 2.0972e-05 gnorm: 1.07 [14:38:53< 9:51:26] +[titan] 2025-10-05 13:13:15,113 - root - INFO - step: 23915 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 13:13:15,113 - root - INFO - lr: 2.0964e-05 gnorm: 1.10 [14:39:04< 9:51:15] +[titan] 2025-10-05 13:13:25,979 - root - INFO - step: 23920 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 13:13:25,980 - root - INFO - lr: 2.0955e-05 gnorm: 1.05 [14:39:15< 9:51:04] +[titan] 2025-10-05 13:13:36,839 - root - INFO - step: 23925 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8255 +[titan] 2025-10-05 13:13:36,839 - root - INFO - lr: 2.0947e-05 gnorm: 1.08 [14:39:26< 9:50:53] +[titan] 2025-10-05 13:13:47,718 - root - INFO - step: 23930 loss: 2.0539 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 13:13:47,718 - root - INFO - lr: 2.0938e-05 gnorm: 1.07 [14:39:37< 9:50:42] +[titan] 2025-10-05 13:13:58,659 - root - INFO - step: 23935 loss: 2.1295 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 13:13:58,659 - root - INFO - lr: 2.0929e-05 gnorm: 1.09 [14:39:48< 9:50:31] +[titan] 2025-10-05 13:14:09,537 - root - INFO - step: 23940 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7883 +[titan] 2025-10-05 13:14:09,537 - root - INFO - lr: 2.0921e-05 gnorm: 1.06 [14:39:59< 9:50:19] +[titan] 2025-10-05 13:14:20,423 - root - INFO - step: 23945 loss: 2.0391 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8022 +[titan] 2025-10-05 13:14:20,423 - root - INFO - lr: 2.0912e-05 gnorm: 1.08 [14:40:10< 9:50:08] +[titan] 2025-10-05 13:14:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:14:31,294 - root - INFO - step: 23950 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8110 +[titan] 2025-10-05 13:14:31,294 - root - INFO - lr: 2.0904e-05 gnorm: 1.02 [14:40:20< 9:49:57] +[titan] 2025-10-05 13:14:42,149 - root - INFO - step: 23955 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:14:42,149 - root - INFO - lr: 2.0895e-05 gnorm: 1.11 [14:40:31< 9:49:46] +[titan] 2025-10-05 13:14:53,021 - root - INFO - step: 23960 loss: 2.0544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 13:14:53,021 - root - INFO - lr: 2.0887e-05 gnorm: 1.07 [14:40:42< 9:49:35] +[titan] 2025-10-05 13:15:03,924 - root - INFO - step: 23965 loss: 2.0186 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 13:15:03,925 - root - INFO - lr: 2.0878e-05 gnorm: 1.08 [14:40:53< 9:49:24] +[titan] 2025-10-05 13:15:14,778 - root - INFO - step: 23970 loss: 2.0244 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 13:15:14,778 - root - INFO - lr: 2.0870e-05 gnorm: 1.10 [14:41:04< 9:49:13] +[titan] 2025-10-05 13:15:25,658 - root - INFO - step: 23975 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:15:25,658 - root - INFO - lr: 2.0861e-05 gnorm: 1.05 [14:41:15< 9:49:02] +[titan] 2025-10-05 13:15:36,526 - root - INFO - step: 23980 loss: 2.1043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 13:15:36,526 - root - INFO - lr: 2.0853e-05 gnorm: 1.11 [14:41:26< 9:48:50] +[titan] 2025-10-05 13:15:47,390 - root - INFO - step: 23985 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 13:15:47,390 - root - INFO - lr: 2.0844e-05 gnorm: 1.10 [14:41:36< 9:48:39] +[titan] 2025-10-05 13:15:58,289 - root - INFO - step: 23990 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 13:15:58,289 - root - INFO - lr: 2.0836e-05 gnorm: 1.06 [14:41:47< 9:48:28] +[titan] 2025-10-05 13:16:09,157 - root - INFO - step: 23995 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8039 +[titan] 2025-10-05 13:16:09,157 - root - INFO - lr: 2.0827e-05 gnorm: 1.11 [14:41:58< 9:48:17] +[titan] 2025-10-05 13:16:17,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:16:20,053 - root - INFO - step: 24000 loss: 2.0037 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:16:20,054 - root - INFO - lr: 2.0819e-05 gnorm: 1.08 [14:42:09< 9:48:06] +[titan] 2025-10-05 13:16:30,898 - root - INFO - step: 24005 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 13:16:30,899 - root - INFO - lr: 2.0810e-05 gnorm: 1.07 [14:42:20< 9:47:55] +[titan] 2025-10-05 13:16:41,756 - root - INFO - step: 24010 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 13:16:41,757 - root - INFO - lr: 2.0802e-05 gnorm: 1.05 [14:42:31< 9:47:44] +[titan] 2025-10-05 13:16:52,618 - root - INFO - step: 24015 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8347 +[titan] 2025-10-05 13:16:52,618 - root - INFO - lr: 2.0793e-05 gnorm: 1.12 [14:42:42< 9:47:33] +[titan] 2025-10-05 13:17:03,489 - root - INFO - step: 24020 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:17:03,489 - root - INFO - lr: 2.0785e-05 gnorm: 1.10 [14:42:53< 9:47:21] +[titan] 2025-10-05 13:17:14,356 - root - INFO - step: 24025 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 13:17:14,356 - root - INFO - lr: 2.0776e-05 gnorm: 1.08 [14:43:03< 9:47:10] +[titan] 2025-10-05 13:17:25,293 - root - INFO - step: 24030 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 13:17:25,293 - root - INFO - lr: 2.0767e-05 gnorm: 1.14 [14:43:14< 9:46:59] +[titan] 2025-10-05 13:17:36,153 - root - INFO - step: 24035 loss: 2.0553 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8156 +[titan] 2025-10-05 13:17:36,153 - root - INFO - lr: 2.0759e-05 gnorm: 1.07 [14:43:25< 9:46:48] +[titan] 2025-10-05 13:17:47,022 - root - INFO - step: 24040 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 13:17:47,022 - root - INFO - lr: 2.0750e-05 gnorm: 1.08 [14:43:36< 9:46:37] +[titan] 2025-10-05 13:17:57,898 - root - INFO - step: 24045 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 13:17:57,899 - root - INFO - lr: 2.0742e-05 gnorm: 1.07 [14:43:47< 9:46:26] +[titan] 2025-10-05 13:18:06,588 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:18:08,771 - root - INFO - step: 24050 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:18:08,771 - root - INFO - lr: 2.0733e-05 gnorm: 1.05 [14:43:58< 9:46:15] +[titan] 2025-10-05 13:18:19,609 - root - INFO - step: 24055 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 13:18:19,609 - root - INFO - lr: 2.0725e-05 gnorm: 1.10 [14:44:09< 9:46:03] +[titan] 2025-10-05 13:18:30,457 - root - INFO - step: 24060 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 13:18:30,457 - root - INFO - lr: 2.0716e-05 gnorm: 1.12 [14:44:20< 9:45:52] +[titan] 2025-10-05 13:18:39,419 - root - INFO - Dumping profiler traces at step 24064 +[titan] 2025-10-05 13:18:39,454 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 13:18:41,660 - root - INFO - step: 24065 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,250 tflops: 405.80 mfu: 41.03% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7997 +[titan] 2025-10-05 13:18:41,660 - root - INFO - lr: 2.0708e-05 gnorm: 1.05 [14:44:31< 9:45:41] +[titan] 2025-10-05 13:18:52,499 - root - INFO - step: 24070 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 13:18:52,499 - root - INFO - lr: 2.0699e-05 gnorm: 1.05 [14:44:42< 9:45:30] +[titan] 2025-10-05 13:19:03,398 - root - INFO - step: 24075 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:19:03,398 - root - INFO - lr: 2.0691e-05 gnorm: 1.08 [14:44:52< 9:45:19] +[titan] 2025-10-05 13:19:14,221 - root - INFO - step: 24080 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 13:19:14,221 - root - INFO - lr: 2.0682e-05 gnorm: 1.08 [14:45:03< 9:45:08] +[titan] 2025-10-05 13:19:25,059 - root - INFO - step: 24085 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 13:19:25,059 - root - INFO - lr: 2.0674e-05 gnorm: 1.05 [14:45:14< 9:44:57] +[titan] 2025-10-05 13:19:35,885 - root - INFO - step: 24090 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 13:19:35,885 - root - INFO - lr: 2.0665e-05 gnorm: 1.08 [14:45:25< 9:44:46] +[titan] 2025-10-05 13:19:46,755 - root - INFO - step: 24095 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 13:19:46,755 - root - INFO - lr: 2.0657e-05 gnorm: 1.09 [14:45:36< 9:44:35] +[titan] 2025-10-05 13:19:55,428 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:19:57,605 - root - INFO - step: 24100 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 13:19:57,605 - root - INFO - lr: 2.0648e-05 gnorm: 1.05 [14:45:47< 9:44:23] +[titan] 2025-10-05 13:20:08,458 - root - INFO - step: 24105 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 13:20:08,458 - root - INFO - lr: 2.0640e-05 gnorm: 1.11 [14:45:58< 9:44:12] +[titan] 2025-10-05 13:20:19,304 - root - INFO - step: 24110 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:20:19,305 - root - INFO - lr: 2.0631e-05 gnorm: 1.04 [14:46:08< 9:44:01] +[titan] 2025-10-05 13:20:30,155 - root - INFO - step: 24115 loss: 2.0297 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 13:20:30,155 - root - INFO - lr: 2.0623e-05 gnorm: 1.07 [14:46:19< 9:43:50] +[titan] 2025-10-05 13:20:41,004 - root - INFO - step: 24120 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:20:41,005 - root - INFO - lr: 2.0614e-05 gnorm: 1.07 [14:46:30< 9:43:39] +[titan] 2025-10-05 13:20:51,867 - root - INFO - step: 24125 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8402 +[titan] 2025-10-05 13:20:51,867 - root - INFO - lr: 2.0606e-05 gnorm: 1.12 [14:46:41< 9:43:28] +[titan] 2025-10-05 13:21:02,698 - root - INFO - step: 24130 loss: 2.0869 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 13:21:02,699 - root - INFO - lr: 2.0597e-05 gnorm: 1.06 [14:46:52< 9:43:17] +[titan] 2025-10-05 13:21:13,527 - root - INFO - step: 24135 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 13:21:13,528 - root - INFO - lr: 2.0589e-05 gnorm: 1.10 [14:47:03< 9:43:05] +[titan] 2025-10-05 13:21:24,355 - root - INFO - step: 24140 loss: 2.0475 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8102 +[titan] 2025-10-05 13:21:24,355 - root - INFO - lr: 2.0580e-05 gnorm: 1.07 [14:47:13< 9:42:54] +[titan] 2025-10-05 13:21:35,208 - root - INFO - step: 24145 loss: 2.1059 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:21:35,208 - root - INFO - lr: 2.0572e-05 gnorm: 1.10 [14:47:24< 9:42:43] +[titan] 2025-10-05 13:21:43,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:21:46,037 - root - INFO - step: 24150 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 13:21:46,037 - root - INFO - lr: 2.0563e-05 gnorm: 1.05 [14:47:35< 9:42:32] +[titan] 2025-10-05 13:21:56,862 - root - INFO - step: 24155 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8062 +[titan] 2025-10-05 13:21:56,862 - root - INFO - lr: 2.0555e-05 gnorm: 1.05 [14:47:46< 9:42:21] +[titan] 2025-10-05 13:22:07,697 - root - INFO - step: 24160 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:22:07,697 - root - INFO - lr: 2.0546e-05 gnorm: 1.07 [14:47:57< 9:42:10] +[titan] 2025-10-05 13:22:18,551 - root - INFO - step: 24165 loss: 2.0865 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 13:22:18,551 - root - INFO - lr: 2.0538e-05 gnorm: 1.09 [14:48:08< 9:41:58] +[titan] 2025-10-05 13:22:29,396 - root - INFO - step: 24170 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:22:29,396 - root - INFO - lr: 2.0529e-05 gnorm: 1.08 [14:48:18< 9:41:47] +[titan] 2025-10-05 13:22:40,227 - root - INFO - step: 24175 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8696 +[titan] 2025-10-05 13:22:40,227 - root - INFO - lr: 2.0521e-05 gnorm: 1.09 [14:48:29< 9:41:36] +[titan] 2025-10-05 13:22:51,092 - root - INFO - step: 24180 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 13:22:51,092 - root - INFO - lr: 2.0512e-05 gnorm: 1.09 [14:48:40< 9:41:25] +[titan] 2025-10-05 13:23:01,953 - root - INFO - step: 24185 loss: 1.9953 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 13:23:01,953 - root - INFO - lr: 2.0504e-05 gnorm: 1.07 [14:48:51< 9:41:14] +[titan] 2025-10-05 13:23:12,844 - root - INFO - step: 24190 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 13:23:12,844 - root - INFO - lr: 2.0496e-05 gnorm: 1.15 [14:49:02< 9:41:03] +[titan] 2025-10-05 13:23:23,695 - root - INFO - step: 24195 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 13:23:23,695 - root - INFO - lr: 2.0487e-05 gnorm: 1.07 [14:49:13< 9:40:52] +[titan] 2025-10-05 13:23:32,375 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:23:34,564 - root - INFO - step: 24200 loss: 2.0236 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:23:34,564 - root - INFO - lr: 2.0479e-05 gnorm: 1.07 [14:49:24< 9:40:41] +[titan] 2025-10-05 13:23:45,424 - root - INFO - step: 24205 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 13:23:45,424 - root - INFO - lr: 2.0470e-05 gnorm: 1.07 [14:49:35< 9:40:29] +[titan] 2025-10-05 13:23:56,267 - root - INFO - step: 24210 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 13:23:56,267 - root - INFO - lr: 2.0462e-05 gnorm: 1.03 [14:49:45< 9:40:18] +[titan] 2025-10-05 13:24:07,115 - root - INFO - step: 24215 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8655 +[titan] 2025-10-05 13:24:07,115 - root - INFO - lr: 2.0453e-05 gnorm: 1.12 [14:49:56< 9:40:07] +[titan] 2025-10-05 13:24:17,952 - root - INFO - step: 24220 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 13:24:17,952 - root - INFO - lr: 2.0445e-05 gnorm: 1.13 [14:50:07< 9:39:56] +[titan] 2025-10-05 13:24:28,825 - root - INFO - step: 24225 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8509 +[titan] 2025-10-05 13:24:28,825 - root - INFO - lr: 2.0436e-05 gnorm: 1.06 [14:50:18< 9:39:45] +[titan] 2025-10-05 13:24:39,649 - root - INFO - step: 24230 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 13:24:39,649 - root - INFO - lr: 2.0428e-05 gnorm: 1.10 [14:50:29< 9:39:34] +[titan] 2025-10-05 13:24:50,487 - root - INFO - step: 24235 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 13:24:50,487 - root - INFO - lr: 2.0419e-05 gnorm: 1.07 [14:50:40< 9:39:23] +[titan] 2025-10-05 13:25:01,334 - root - INFO - step: 24240 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:25:01,335 - root - INFO - lr: 2.0411e-05 gnorm: 1.02 [14:50:50< 9:39:11] +[titan] 2025-10-05 13:25:12,172 - root - INFO - step: 24245 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 13:25:12,172 - root - INFO - lr: 2.0402e-05 gnorm: 1.07 [14:51:01< 9:39:00] +[titan] 2025-10-05 13:25:20,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:25:23,024 - root - INFO - step: 24250 loss: 2.1386 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:25:23,025 - root - INFO - lr: 2.0394e-05 gnorm: 1.10 [14:51:12< 9:38:49] +[titan] 2025-10-05 13:25:33,889 - root - INFO - step: 24255 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:25:33,889 - root - INFO - lr: 2.0385e-05 gnorm: 1.11 [14:51:23< 9:38:38] +[titan] 2025-10-05 13:25:44,730 - root - INFO - step: 24260 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 13:25:44,730 - root - INFO - lr: 2.0377e-05 gnorm: 1.07 [14:51:34< 9:38:27] +[titan] 2025-10-05 13:25:55,582 - root - INFO - step: 24265 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 13:25:55,582 - root - INFO - lr: 2.0368e-05 gnorm: 1.07 [14:51:45< 9:38:16] +[titan] 2025-10-05 13:26:06,446 - root - INFO - step: 24270 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:26:06,447 - root - INFO - lr: 2.0360e-05 gnorm: 1.08 [14:51:56< 9:38:05] +[titan] 2025-10-05 13:26:17,296 - root - INFO - step: 24275 loss: 2.0367 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8000 +[titan] 2025-10-05 13:26:17,296 - root - INFO - lr: 2.0352e-05 gnorm: 1.08 [14:52:06< 9:37:53] +[titan] 2025-10-05 13:26:28,151 - root - INFO - step: 24280 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 13:26:28,151 - root - INFO - lr: 2.0343e-05 gnorm: 1.09 [14:52:17< 9:37:42] +[titan] 2025-10-05 13:26:39,050 - root - INFO - step: 24285 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 13:26:39,051 - root - INFO - lr: 2.0335e-05 gnorm: 1.10 [14:52:28< 9:37:31] +[titan] 2025-10-05 13:26:49,902 - root - INFO - step: 24290 loss: 2.0746 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:26:49,902 - root - INFO - lr: 2.0326e-05 gnorm: 1.07 [14:52:39< 9:37:20] +[titan] 2025-10-05 13:27:00,733 - root - INFO - step: 24295 loss: 2.1061 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 13:27:00,734 - root - INFO - lr: 2.0318e-05 gnorm: 1.11 [14:52:50< 9:37:09] +[titan] 2025-10-05 13:27:09,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:27:11,587 - root - INFO - step: 24300 loss: 2.0702 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 13:27:11,587 - root - INFO - lr: 2.0309e-05 gnorm: 1.10 [14:53:01< 9:36:58] +[titan] 2025-10-05 13:27:22,433 - root - INFO - step: 24305 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 13:27:22,433 - root - INFO - lr: 2.0301e-05 gnorm: 1.05 [14:53:12< 9:36:47] +[titan] 2025-10-05 13:27:33,270 - root - INFO - step: 24310 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 13:27:33,270 - root - INFO - lr: 2.0292e-05 gnorm: 1.06 [14:53:22< 9:36:35] +[titan] 2025-10-05 13:27:44,105 - root - INFO - step: 24315 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 13:27:44,105 - root - INFO - lr: 2.0284e-05 gnorm: 1.07 [14:53:33< 9:36:24] +[titan] 2025-10-05 13:27:54,981 - root - INFO - step: 24320 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 13:27:54,981 - root - INFO - lr: 2.0275e-05 gnorm: 1.13 [14:53:44< 9:36:13] +[titan] 2025-10-05 13:28:05,837 - root - INFO - step: 24325 loss: 2.1113 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:28:05,838 - root - INFO - lr: 2.0267e-05 gnorm: 1.14 [14:53:55< 9:36:02] +[titan] 2025-10-05 13:28:16,705 - root - INFO - step: 24330 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 13:28:16,705 - root - INFO - lr: 2.0258e-05 gnorm: 1.05 [14:54:06< 9:35:51] +[titan] 2025-10-05 13:28:27,566 - root - INFO - step: 24335 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8683 +[titan] 2025-10-05 13:28:27,566 - root - INFO - lr: 2.0250e-05 gnorm: 1.15 [14:54:17< 9:35:40] +[titan] 2025-10-05 13:28:38,418 - root - INFO - step: 24340 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:28:38,418 - root - INFO - lr: 2.0242e-05 gnorm: 1.08 [14:54:27< 9:35:29] +[titan] 2025-10-05 13:28:49,296 - root - INFO - step: 24345 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 13:28:49,296 - root - INFO - lr: 2.0233e-05 gnorm: 1.14 [14:54:38< 9:35:18] +[titan] 2025-10-05 13:28:58,013 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:29:00,192 - root - INFO - step: 24350 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:29:00,192 - root - INFO - lr: 2.0225e-05 gnorm: 1.18 [14:54:49< 9:35:06] +[titan] 2025-10-05 13:29:11,072 - root - INFO - step: 24355 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 13:29:11,072 - root - INFO - lr: 2.0216e-05 gnorm: 1.09 [14:55:00< 9:34:55] +[titan] 2025-10-05 13:29:21,925 - root - INFO - step: 24360 loss: 2.1089 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 13:29:21,925 - root - INFO - lr: 2.0208e-05 gnorm: 1.07 [14:55:11< 9:34:44] +[titan] 2025-10-05 13:29:32,780 - root - INFO - step: 24365 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:29:32,781 - root - INFO - lr: 2.0199e-05 gnorm: 1.10 [14:55:22< 9:34:33] +[titan] 2025-10-05 13:29:43,663 - root - INFO - step: 24370 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:29:43,663 - root - INFO - lr: 2.0191e-05 gnorm: 1.10 [14:55:33< 9:34:22] +[titan] 2025-10-05 13:29:54,539 - root - INFO - step: 24375 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:29:54,540 - root - INFO - lr: 2.0182e-05 gnorm: 1.12 [14:55:44< 9:34:11] +[titan] 2025-10-05 13:30:05,417 - root - INFO - step: 24380 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 13:30:05,417 - root - INFO - lr: 2.0174e-05 gnorm: 1.12 [14:55:54< 9:34:00] +[titan] 2025-10-05 13:30:16,350 - root - INFO - step: 24385 loss: 2.1282 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 13:30:16,350 - root - INFO - lr: 2.0166e-05 gnorm: 1.05 [14:56:05< 9:33:49] +[titan] 2025-10-05 13:30:27,217 - root - INFO - step: 24390 loss: 2.0751 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:30:27,217 - root - INFO - lr: 2.0157e-05 gnorm: 1.12 [14:56:16< 9:33:38] +[titan] 2025-10-05 13:30:38,065 - root - INFO - step: 24395 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:30:38,065 - root - INFO - lr: 2.0149e-05 gnorm: 1.08 [14:56:27< 9:33:26] +[titan] 2025-10-05 13:30:46,747 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:30:48,926 - root - INFO - step: 24400 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 13:30:48,926 - root - INFO - lr: 2.0140e-05 gnorm: 1.09 [14:56:38< 9:33:15] +[titan] 2025-10-05 13:30:59,781 - root - INFO - step: 24405 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8166 +[titan] 2025-10-05 13:30:59,781 - root - INFO - lr: 2.0132e-05 gnorm: 1.07 [14:56:49< 9:33:04] +[titan] 2025-10-05 13:31:10,656 - root - INFO - step: 24410 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 13:31:10,656 - root - INFO - lr: 2.0123e-05 gnorm: 1.11 [14:57:00< 9:32:53] +[titan] 2025-10-05 13:31:21,555 - root - INFO - step: 24415 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8268 +[titan] 2025-10-05 13:31:21,555 - root - INFO - lr: 2.0115e-05 gnorm: 1.09 [14:57:11< 9:32:42] +[titan] 2025-10-05 13:31:32,426 - root - INFO - step: 24420 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 13:31:32,426 - root - INFO - lr: 2.0107e-05 gnorm: 1.07 [14:57:21< 9:32:31] +[titan] 2025-10-05 13:31:43,323 - root - INFO - step: 24425 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 13:31:43,323 - root - INFO - lr: 2.0098e-05 gnorm: 1.31 [14:57:32< 9:32:20] +[titan] 2025-10-05 13:31:54,203 - root - INFO - step: 24430 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 13:31:54,204 - root - INFO - lr: 2.0090e-05 gnorm: 1.05 [14:57:43< 9:32:09] +[titan] 2025-10-05 13:32:05,075 - root - INFO - step: 24435 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 13:32:05,075 - root - INFO - lr: 2.0081e-05 gnorm: 1.07 [14:57:54< 9:31:57] +[titan] 2025-10-05 13:32:15,980 - root - INFO - step: 24440 loss: 2.1665 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 13:32:15,980 - root - INFO - lr: 2.0073e-05 gnorm: 1.09 [14:58:05< 9:31:46] +[titan] 2025-10-05 13:32:26,906 - root - INFO - step: 24445 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 13:32:26,906 - root - INFO - lr: 2.0064e-05 gnorm: 1.08 [14:58:16< 9:31:35] +[titan] 2025-10-05 13:32:35,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:32:37,766 - root - INFO - step: 24450 loss: 2.0220 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 13:32:37,766 - root - INFO - lr: 2.0056e-05 gnorm: 1.06 [14:58:27< 9:31:24] +[titan] 2025-10-05 13:32:48,638 - root - INFO - step: 24455 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 13:32:48,638 - root - INFO - lr: 2.0048e-05 gnorm: 1.07 [14:58:38< 9:31:13] +[titan] 2025-10-05 13:32:59,507 - root - INFO - step: 24460 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:32:59,508 - root - INFO - lr: 2.0039e-05 gnorm: 1.07 [14:58:49< 9:31:02] +[titan] 2025-10-05 13:33:10,393 - root - INFO - step: 24465 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:33:10,393 - root - INFO - lr: 2.0031e-05 gnorm: 1.05 [14:58:59< 9:30:51] +[titan] 2025-10-05 13:33:21,345 - root - INFO - step: 24470 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8103 +[titan] 2025-10-05 13:33:21,345 - root - INFO - lr: 2.0022e-05 gnorm: 1.06 [14:59:10< 9:30:40] +[titan] 2025-10-05 13:33:32,228 - root - INFO - step: 24475 loss: 2.0788 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:33:32,228 - root - INFO - lr: 2.0014e-05 gnorm: 1.09 [14:59:21< 9:30:29] +[titan] 2025-10-05 13:33:43,179 - root - INFO - step: 24480 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8709 +[titan] 2025-10-05 13:33:43,179 - root - INFO - lr: 2.0006e-05 gnorm: 1.10 [14:59:32< 9:30:18] +[titan] 2025-10-05 13:33:54,062 - root - INFO - step: 24485 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 13:33:54,062 - root - INFO - lr: 1.9997e-05 gnorm: 1.07 [14:59:43< 9:30:06] +[titan] 2025-10-05 13:34:04,940 - root - INFO - step: 24490 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 13:34:04,940 - root - INFO - lr: 1.9989e-05 gnorm: 1.06 [14:59:54< 9:29:55] +[titan] 2025-10-05 13:34:15,844 - root - INFO - step: 24495 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8069 +[titan] 2025-10-05 13:34:15,844 - root - INFO - lr: 1.9980e-05 gnorm: 1.09 [15:00:05< 9:29:44] +[titan] 2025-10-05 13:34:24,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:34:26,706 - root - INFO - step: 24500 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 13:34:26,706 - root - INFO - lr: 1.9972e-05 gnorm: 1.11 [15:00:16< 9:29:33] +[titan] 2025-10-05 13:34:37,585 - root - INFO - step: 24505 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 13:34:37,585 - root - INFO - lr: 1.9963e-05 gnorm: 1.08 [15:00:27< 9:29:22] +[titan] 2025-10-05 13:34:48,499 - root - INFO - step: 24510 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 13:34:48,500 - root - INFO - lr: 1.9955e-05 gnorm: 1.12 [15:00:38< 9:29:11] +[titan] 2025-10-05 13:34:59,379 - root - INFO - step: 24515 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 13:34:59,379 - root - INFO - lr: 1.9947e-05 gnorm: 1.09 [15:00:48< 9:29:00] +[titan] 2025-10-05 13:35:10,244 - root - INFO - step: 24520 loss: 2.0374 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8014 +[titan] 2025-10-05 13:35:10,245 - root - INFO - lr: 1.9938e-05 gnorm: 1.03 [15:00:59< 9:28:49] +[titan] 2025-10-05 13:35:21,112 - root - INFO - step: 24525 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 13:35:21,113 - root - INFO - lr: 1.9930e-05 gnorm: 1.06 [15:01:10< 9:28:37] +[titan] 2025-10-05 13:35:31,956 - root - INFO - step: 24530 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8681 +[titan] 2025-10-05 13:35:31,957 - root - INFO - lr: 1.9921e-05 gnorm: 1.08 [15:01:21< 9:28:26] +[titan] 2025-10-05 13:35:42,842 - root - INFO - step: 24535 loss: 2.0794 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8375 +[titan] 2025-10-05 13:35:42,842 - root - INFO - lr: 1.9913e-05 gnorm: 1.10 [15:01:32< 9:28:15] +[titan] 2025-10-05 13:35:53,706 - root - INFO - step: 24540 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 13:35:53,706 - root - INFO - lr: 1.9905e-05 gnorm: 1.11 [15:01:43< 9:28:04] +[titan] 2025-10-05 13:36:04,625 - root - INFO - step: 24545 loss: 2.1385 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:36:04,625 - root - INFO - lr: 1.9896e-05 gnorm: 1.07 [15:01:54< 9:27:53] +[titan] 2025-10-05 13:36:13,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:36:15,477 - root - INFO - step: 24550 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 13:36:15,477 - root - INFO - lr: 1.9888e-05 gnorm: 1.08 [15:02:05< 9:27:42] +[titan] 2025-10-05 13:36:26,344 - root - INFO - step: 24555 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 13:36:26,344 - root - INFO - lr: 1.9879e-05 gnorm: 1.07 [15:02:15< 9:27:31] +[titan] 2025-10-05 13:36:37,204 - root - INFO - step: 24560 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 13:36:37,204 - root - INFO - lr: 1.9871e-05 gnorm: 1.07 [15:02:26< 9:27:20] +[titan] 2025-10-05 13:36:48,082 - root - INFO - step: 24565 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 13:36:48,082 - root - INFO - lr: 1.9863e-05 gnorm: 1.05 [15:02:37< 9:27:09] +[titan] 2025-10-05 13:36:58,948 - root - INFO - step: 24570 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 13:36:58,948 - root - INFO - lr: 1.9854e-05 gnorm: 1.07 [15:02:48< 9:26:57] +[titan] 2025-10-05 13:37:09,939 - root - INFO - step: 24575 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 29,815 tflops: 413.64 mfu: 41.82% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 13:37:09,939 - root - INFO - lr: 1.9846e-05 gnorm: 1.08 [15:02:59< 9:26:46] +[titan] 2025-10-05 13:37:12,299 - root - INFO - Dumping profiler traces at step 24576 +[titan] 2025-10-05 13:37:12,335 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:37:21,087 - root - INFO - step: 24580 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 29,394 tflops: 407.79 mfu: 41.23% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:37:21,087 - root - INFO - lr: 1.9837e-05 gnorm: 1.10 [15:03:10< 9:26:35] +[titan] 2025-10-05 13:37:31,945 - root - INFO - step: 24585 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:37:31,945 - root - INFO - lr: 1.9829e-05 gnorm: 1.04 [15:03:21< 9:26:24] +[titan] 2025-10-05 13:37:42,812 - root - INFO - step: 24590 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8024 +[titan] 2025-10-05 13:37:42,812 - root - INFO - lr: 1.9821e-05 gnorm: 1.05 [15:03:32< 9:26:13] +[titan] 2025-10-05 13:37:53,676 - root - INFO - step: 24595 loss: 2.0523 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 13:37:53,677 - root - INFO - lr: 1.9812e-05 gnorm: 1.07 [15:03:43< 9:26:02] +[titan] 2025-10-05 13:38:02,369 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:38:04,554 - root - INFO - step: 24600 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8521 +[titan] 2025-10-05 13:38:04,554 - root - INFO - lr: 1.9804e-05 gnorm: 1.09 [15:03:54< 9:25:51] +[titan] 2025-10-05 13:38:15,471 - root - INFO - step: 24605 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 13:38:15,471 - root - INFO - lr: 1.9796e-05 gnorm: 1.07 [15:04:05< 9:25:40] +[titan] 2025-10-05 13:38:26,377 - root - INFO - step: 24610 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8870 +[titan] 2025-10-05 13:38:26,377 - root - INFO - lr: 1.9787e-05 gnorm: 1.12 [15:04:15< 9:25:29] +[titan] 2025-10-05 13:38:37,243 - root - INFO - step: 24615 loss: 2.0786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8367 +[titan] 2025-10-05 13:38:37,243 - root - INFO - lr: 1.9779e-05 gnorm: 1.09 [15:04:26< 9:25:18] +[titan] 2025-10-05 13:38:48,119 - root - INFO - step: 24620 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 13:38:48,119 - root - INFO - lr: 1.9770e-05 gnorm: 1.07 [15:04:37< 9:25:07] +[titan] 2025-10-05 13:38:58,977 - root - INFO - step: 24625 loss: 2.0721 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8321 +[titan] 2025-10-05 13:38:58,977 - root - INFO - lr: 1.9762e-05 gnorm: 1.11 [15:04:48< 9:24:55] +[titan] 2025-10-05 13:39:09,830 - root - INFO - step: 24630 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8930 +[titan] 2025-10-05 13:39:09,830 - root - INFO - lr: 1.9754e-05 gnorm: 1.13 [15:04:59< 9:24:44] +[titan] 2025-10-05 13:39:20,732 - root - INFO - step: 24635 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 13:39:20,733 - root - INFO - lr: 1.9745e-05 gnorm: 1.10 [15:05:10< 9:24:33] +[titan] 2025-10-05 13:39:31,629 - root - INFO - step: 24640 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 13:39:31,629 - root - INFO - lr: 1.9737e-05 gnorm: 1.08 [15:05:21< 9:24:22] +[titan] 2025-10-05 13:39:42,484 - root - INFO - step: 24645 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 13:39:42,484 - root - INFO - lr: 1.9728e-05 gnorm: 1.05 [15:05:32< 9:24:11] +[titan] 2025-10-05 13:39:51,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:39:53,346 - root - INFO - step: 24650 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 13:39:53,346 - root - INFO - lr: 1.9720e-05 gnorm: 1.06 [15:05:42< 9:24:00] +[titan] 2025-10-05 13:40:04,203 - root - INFO - step: 24655 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 13:40:04,203 - root - INFO - lr: 1.9712e-05 gnorm: 1.12 [15:05:53< 9:23:49] +[titan] 2025-10-05 13:40:15,072 - root - INFO - step: 24660 loss: 2.0882 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 13:40:15,073 - root - INFO - lr: 1.9703e-05 gnorm: 1.10 [15:06:04< 9:23:38] +[titan] 2025-10-05 13:40:25,992 - root - INFO - step: 24665 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:40:25,992 - root - INFO - lr: 1.9695e-05 gnorm: 1.06 [15:06:15< 9:23:26] +[titan] 2025-10-05 13:40:36,894 - root - INFO - step: 24670 loss: 2.0856 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 13:40:36,894 - root - INFO - lr: 1.9687e-05 gnorm: 1.12 [15:06:26< 9:23:15] +[titan] 2025-10-05 13:40:47,766 - root - INFO - step: 24675 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 13:40:47,766 - root - INFO - lr: 1.9678e-05 gnorm: 1.09 [15:06:37< 9:23:04] +[titan] 2025-10-05 13:40:58,618 - root - INFO - step: 24680 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8134 +[titan] 2025-10-05 13:40:58,618 - root - INFO - lr: 1.9670e-05 gnorm: 1.07 [15:06:48< 9:22:53] +[titan] 2025-10-05 13:41:09,490 - root - INFO - step: 24685 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 13:41:09,490 - root - INFO - lr: 1.9662e-05 gnorm: 1.13 [15:06:59< 9:22:42] +[titan] 2025-10-05 13:41:20,418 - root - INFO - step: 24690 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 13:41:20,418 - root - INFO - lr: 1.9653e-05 gnorm: 1.06 [15:07:09< 9:22:31] +[titan] 2025-10-05 13:41:31,285 - root - INFO - step: 24695 loss: 2.0651 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:41:31,285 - root - INFO - lr: 1.9645e-05 gnorm: 1.08 [15:07:20< 9:22:20] +[titan] 2025-10-05 13:41:39,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:41:42,156 - root - INFO - step: 24700 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 13:41:42,156 - root - INFO - lr: 1.9636e-05 gnorm: 1.11 [15:07:31< 9:22:09] +[titan] 2025-10-05 13:41:53,063 - root - INFO - step: 24705 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 13:41:53,063 - root - INFO - lr: 1.9628e-05 gnorm: 1.08 [15:07:42< 9:21:58] +[titan] 2025-10-05 13:42:03,922 - root - INFO - step: 24710 loss: 2.0804 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 13:42:03,922 - root - INFO - lr: 1.9620e-05 gnorm: 1.06 [15:07:53< 9:21:46] +[titan] 2025-10-05 13:42:14,790 - root - INFO - step: 24715 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8274 +[titan] 2025-10-05 13:42:14,790 - root - INFO - lr: 1.9611e-05 gnorm: 1.09 [15:08:04< 9:21:35] +[titan] 2025-10-05 13:42:25,702 - root - INFO - step: 24720 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 13:42:25,702 - root - INFO - lr: 1.9603e-05 gnorm: 1.11 [15:08:15< 9:21:24] +[titan] 2025-10-05 13:42:36,573 - root - INFO - step: 24725 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 13:42:36,573 - root - INFO - lr: 1.9595e-05 gnorm: 1.08 [15:08:26< 9:21:13] +[titan] 2025-10-05 13:42:47,423 - root - INFO - step: 24730 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 13:42:47,424 - root - INFO - lr: 1.9586e-05 gnorm: 1.12 [15:08:36< 9:21:02] +[titan] 2025-10-05 13:42:58,321 - root - INFO - step: 24735 loss: 2.1290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 13:42:58,321 - root - INFO - lr: 1.9578e-05 gnorm: 1.08 [15:08:47< 9:20:51] +[titan] 2025-10-05 13:43:09,170 - root - INFO - step: 24740 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:43:09,171 - root - INFO - lr: 1.9570e-05 gnorm: 1.12 [15:08:58< 9:20:40] +[titan] 2025-10-05 13:43:20,002 - root - INFO - step: 24745 loss: 2.0612 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8216 +[titan] 2025-10-05 13:43:20,002 - root - INFO - lr: 1.9561e-05 gnorm: 1.11 [15:09:09< 9:20:29] +[titan] 2025-10-05 13:43:28,699 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:43:30,874 - root - INFO - step: 24750 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 13:43:30,874 - root - INFO - lr: 1.9553e-05 gnorm: 1.08 [15:09:20< 9:20:18] +[titan] 2025-10-05 13:43:41,719 - root - INFO - step: 24755 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 13:43:41,719 - root - INFO - lr: 1.9545e-05 gnorm: 1.11 [15:09:31< 9:20:06] +[titan] 2025-10-05 13:43:52,574 - root - INFO - step: 24760 loss: 2.0568 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 13:43:52,574 - root - INFO - lr: 1.9536e-05 gnorm: 1.07 [15:09:42< 9:19:55] +[titan] 2025-10-05 13:44:03,465 - root - INFO - step: 24765 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 13:44:03,465 - root - INFO - lr: 1.9528e-05 gnorm: 1.07 [15:09:52< 9:19:44] +[titan] 2025-10-05 13:44:14,316 - root - INFO - step: 24770 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 13:44:14,316 - root - INFO - lr: 1.9519e-05 gnorm: 1.05 [15:10:03< 9:19:33] +[titan] 2025-10-05 13:44:25,153 - root - INFO - step: 24775 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8456 +[titan] 2025-10-05 13:44:25,154 - root - INFO - lr: 1.9511e-05 gnorm: 1.07 [15:10:14< 9:19:22] +[titan] 2025-10-05 13:44:36,002 - root - INFO - step: 24780 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 13:44:36,003 - root - INFO - lr: 1.9503e-05 gnorm: 1.08 [15:10:25< 9:19:11] +[titan] 2025-10-05 13:44:46,858 - root - INFO - step: 24785 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 13:44:46,858 - root - INFO - lr: 1.9494e-05 gnorm: 1.07 [15:10:36< 9:19:00] +[titan] 2025-10-05 13:44:57,702 - root - INFO - step: 24790 loss: 2.0838 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 13:44:57,702 - root - INFO - lr: 1.9486e-05 gnorm: 1.08 [15:10:47< 9:18:49] +[titan] 2025-10-05 13:45:08,535 - root - INFO - step: 24795 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 13:45:08,535 - root - INFO - lr: 1.9478e-05 gnorm: 1.06 [15:10:58< 9:18:37] +[titan] 2025-10-05 13:45:17,247 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:45:19,421 - root - INFO - step: 24800 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8718 +[titan] 2025-10-05 13:45:19,421 - root - INFO - lr: 1.9469e-05 gnorm: 1.08 [15:11:08< 9:18:26] +[titan] 2025-10-05 13:45:30,265 - root - INFO - step: 24805 loss: 2.0238 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 13:45:30,265 - root - INFO - lr: 1.9461e-05 gnorm: 1.07 [15:11:19< 9:18:15] +[titan] 2025-10-05 13:45:41,104 - root - INFO - step: 24810 loss: 2.0540 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8154 +[titan] 2025-10-05 13:45:41,104 - root - INFO - lr: 1.9453e-05 gnorm: 1.07 [15:11:30< 9:18:04] +[titan] 2025-10-05 13:45:51,953 - root - INFO - step: 24815 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8778 +[titan] 2025-10-05 13:45:51,953 - root - INFO - lr: 1.9444e-05 gnorm: 1.11 [15:11:41< 9:17:53] +[titan] 2025-10-05 13:46:02,816 - root - INFO - step: 24820 loss: 2.1004 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 13:46:02,816 - root - INFO - lr: 1.9436e-05 gnorm: 1.07 [15:11:52< 9:17:42] +[titan] 2025-10-05 13:46:13,676 - root - INFO - step: 24825 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 13:46:13,676 - root - INFO - lr: 1.9428e-05 gnorm: 1.10 [15:12:03< 9:17:31] +[titan] 2025-10-05 13:46:24,572 - root - INFO - step: 24830 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 13:46:24,572 - root - INFO - lr: 1.9419e-05 gnorm: 1.10 [15:12:14< 9:17:20] +[titan] 2025-10-05 13:46:35,432 - root - INFO - step: 24835 loss: 2.1026 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:46:35,432 - root - INFO - lr: 1.9411e-05 gnorm: 1.07 [15:12:24< 9:17:08] +[titan] 2025-10-05 13:46:46,286 - root - INFO - step: 24840 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:46:46,287 - root - INFO - lr: 1.9403e-05 gnorm: 1.10 [15:12:35< 9:16:57] +[titan] 2025-10-05 13:46:57,123 - root - INFO - step: 24845 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8212 +[titan] 2025-10-05 13:46:57,123 - root - INFO - lr: 1.9394e-05 gnorm: 1.07 [15:12:46< 9:16:46] +[titan] 2025-10-05 13:47:05,788 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:47:07,960 - root - INFO - step: 24850 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 13:47:07,960 - root - INFO - lr: 1.9386e-05 gnorm: 1.13 [15:12:57< 9:16:35] +[titan] 2025-10-05 13:47:18,794 - root - INFO - step: 24855 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 13:47:18,794 - root - INFO - lr: 1.9378e-05 gnorm: 1.07 [15:13:08< 9:16:24] +[titan] 2025-10-05 13:47:29,672 - root - INFO - step: 24860 loss: 2.1559 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 13:47:29,672 - root - INFO - lr: 1.9369e-05 gnorm: 1.08 [15:13:19< 9:16:13] +[titan] 2025-10-05 13:47:40,555 - root - INFO - step: 24865 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 13:47:40,556 - root - INFO - lr: 1.9361e-05 gnorm: 1.14 [15:13:30< 9:16:02] +[titan] 2025-10-05 13:47:51,413 - root - INFO - step: 24870 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 13:47:51,413 - root - INFO - lr: 1.9353e-05 gnorm: 1.07 [15:13:40< 9:15:51] +[titan] 2025-10-05 13:48:02,253 - root - INFO - step: 24875 loss: 2.0532 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 13:48:02,253 - root - INFO - lr: 1.9345e-05 gnorm: 1.10 [15:13:51< 9:15:39] +[titan] 2025-10-05 13:48:13,099 - root - INFO - step: 24880 loss: 2.0338 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 13:48:13,099 - root - INFO - lr: 1.9336e-05 gnorm: 1.08 [15:14:02< 9:15:28] +[titan] 2025-10-05 13:48:23,933 - root - INFO - step: 24885 loss: 2.0834 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 13:48:23,933 - root - INFO - lr: 1.9328e-05 gnorm: 1.08 [15:14:13< 9:15:17] +[titan] 2025-10-05 13:48:34,822 - root - INFO - step: 24890 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7910 +[titan] 2025-10-05 13:48:34,822 - root - INFO - lr: 1.9320e-05 gnorm: 1.05 [15:14:24< 9:15:06] +[titan] 2025-10-05 13:48:45,673 - root - INFO - step: 24895 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:48:45,673 - root - INFO - lr: 1.9311e-05 gnorm: 1.13 [15:14:35< 9:14:55] +[titan] 2025-10-05 13:48:54,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:48:56,513 - root - INFO - step: 24900 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 13:48:56,513 - root - INFO - lr: 1.9303e-05 gnorm: 1.08 [15:14:46< 9:14:44] +[titan] 2025-10-05 13:49:07,354 - root - INFO - step: 24905 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 13:49:07,354 - root - INFO - lr: 1.9295e-05 gnorm: 1.09 [15:14:56< 9:14:33] +[titan] 2025-10-05 13:49:18,206 - root - INFO - step: 24910 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 13:49:18,206 - root - INFO - lr: 1.9286e-05 gnorm: 1.06 [15:15:07< 9:14:22] +[titan] 2025-10-05 13:49:29,079 - root - INFO - step: 24915 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 13:49:29,080 - root - INFO - lr: 1.9278e-05 gnorm: 1.07 [15:15:18< 9:14:10] +[titan] 2025-10-05 13:49:39,928 - root - INFO - step: 24920 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8887 +[titan] 2025-10-05 13:49:39,928 - root - INFO - lr: 1.9270e-05 gnorm: 1.10 [15:15:29< 9:13:59] +[titan] 2025-10-05 13:49:50,803 - root - INFO - step: 24925 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7714 +[titan] 2025-10-05 13:49:50,804 - root - INFO - lr: 1.9261e-05 gnorm: 1.05 [15:15:40< 9:13:48] +[titan] 2025-10-05 13:50:01,632 - root - INFO - step: 24930 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7977 +[titan] 2025-10-05 13:50:01,632 - root - INFO - lr: 1.9253e-05 gnorm: 1.12 [15:15:51< 9:13:37] +[titan] 2025-10-05 13:50:12,484 - root - INFO - step: 24935 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 13:50:12,484 - root - INFO - lr: 1.9245e-05 gnorm: 1.08 [15:16:01< 9:13:26] +[titan] 2025-10-05 13:50:23,352 - root - INFO - step: 24940 loss: 2.0643 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 13:50:23,352 - root - INFO - lr: 1.9236e-05 gnorm: 1.08 [15:16:12< 9:13:15] +[titan] 2025-10-05 13:50:34,241 - root - INFO - step: 24945 loss: 2.0637 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:50:34,241 - root - INFO - lr: 1.9228e-05 gnorm: 1.09 [15:16:23< 9:13:04] +[titan] 2025-10-05 13:50:42,919 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:50:45,094 - root - INFO - step: 24950 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:50:45,094 - root - INFO - lr: 1.9220e-05 gnorm: 1.09 [15:16:34< 9:12:53] +[titan] 2025-10-05 13:50:55,957 - root - INFO - step: 24955 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 13:50:55,957 - root - INFO - lr: 1.9212e-05 gnorm: 1.10 [15:16:45< 9:12:41] +[titan] 2025-10-05 13:51:06,846 - root - INFO - step: 24960 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:51:06,846 - root - INFO - lr: 1.9203e-05 gnorm: 1.18 [15:16:56< 9:12:30] +[titan] 2025-10-05 13:51:17,738 - root - INFO - step: 24965 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:51:17,738 - root - INFO - lr: 1.9195e-05 gnorm: 1.09 [15:17:07< 9:12:19] +[titan] 2025-10-05 13:51:28,604 - root - INFO - step: 24970 loss: 2.1023 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:51:28,604 - root - INFO - lr: 1.9187e-05 gnorm: 1.11 [15:17:18< 9:12:08] +[titan] 2025-10-05 13:51:39,453 - root - INFO - step: 24975 loss: 2.0306 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 13:51:39,453 - root - INFO - lr: 1.9178e-05 gnorm: 1.12 [15:17:28< 9:11:57] +[titan] 2025-10-05 13:51:50,305 - root - INFO - step: 24980 loss: 2.0966 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8535 +[titan] 2025-10-05 13:51:50,305 - root - INFO - lr: 1.9170e-05 gnorm: 1.08 [15:17:39< 9:11:46] +[titan] 2025-10-05 13:52:01,147 - root - INFO - step: 24985 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7712 +[titan] 2025-10-05 13:52:01,147 - root - INFO - lr: 1.9162e-05 gnorm: 1.09 [15:17:50< 9:11:35] +[titan] 2025-10-05 13:52:12,002 - root - INFO - step: 24990 loss: 2.0567 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 13:52:12,002 - root - INFO - lr: 1.9154e-05 gnorm: 1.09 [15:18:01< 9:11:24] +[titan] 2025-10-05 13:52:22,852 - root - INFO - step: 24995 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:52:22,852 - root - INFO - lr: 1.9145e-05 gnorm: 1.08 [15:18:12< 9:11:13] +[titan] 2025-10-05 13:52:31,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:52:33,740 - root - INFO - step: 25000 loss: 2.0319 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7959 +[titan] 2025-10-05 13:52:33,740 - root - INFO - lr: 1.9137e-05 gnorm: 1.07 [15:18:23< 9:11:01] +[titan] 2025-10-05 13:52:33,740 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 13:52:51,425 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 13:52:51,425 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.68 seconds. +[titan] 2025-10-05 13:54:51,998 - root - INFO - step: 25005 loss: 2.0275 memory: 118.84GiB(85.28%) tps: 2,370 tflops: 32.88 mfu: 3.32% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7926 +[titan] 2025-10-05 13:54:51,999 - root - INFO - lr: 1.9129e-05 gnorm: 1.11 [15:20:41< 9:12:07] +[titan] 2025-10-05 13:55:02,804 - root - INFO - step: 25010 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8047 +[titan] 2025-10-05 13:55:02,804 - root - INFO - lr: 1.9120e-05 gnorm: 1.11 [15:20:52< 9:11:56] +[titan] 2025-10-05 13:55:13,603 - root - INFO - step: 25015 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:55:13,603 - root - INFO - lr: 1.9112e-05 gnorm: 1.08 [15:21:03< 9:11:44] +[titan] 2025-10-05 13:55:24,411 - root - INFO - step: 25020 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 13:55:24,411 - root - INFO - lr: 1.9104e-05 gnorm: 1.12 [15:21:13< 9:11:33] +[titan] 2025-10-05 13:55:35,262 - root - INFO - step: 25025 loss: 2.0508 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:55:35,263 - root - INFO - lr: 1.9096e-05 gnorm: 1.09 [15:21:24< 9:11:22] +[titan] 2025-10-05 13:55:46,139 - root - INFO - step: 25030 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 13:55:46,139 - root - INFO - lr: 1.9087e-05 gnorm: 1.12 [15:21:35< 9:11:11] +[titan] 2025-10-05 13:55:56,971 - root - INFO - step: 25035 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 13:55:56,971 - root - INFO - lr: 1.9079e-05 gnorm: 1.06 [15:21:46< 9:11:00] +[titan] 2025-10-05 13:56:07,833 - root - INFO - step: 25040 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7890 +[titan] 2025-10-05 13:56:07,833 - root - INFO - lr: 1.9071e-05 gnorm: 1.09 [15:21:57< 9:10:49] +[titan] 2025-10-05 13:56:18,697 - root - INFO - step: 25045 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 13:56:18,697 - root - INFO - lr: 1.9062e-05 gnorm: 1.07 [15:22:08< 9:10:37] +[titan] 2025-10-05 13:56:27,381 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:56:29,566 - root - INFO - step: 25050 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 13:56:29,566 - root - INFO - lr: 1.9054e-05 gnorm: 1.09 [15:22:19< 9:10:26] +[titan] 2025-10-05 13:56:40,477 - root - INFO - step: 25055 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 13:56:40,478 - root - INFO - lr: 1.9046e-05 gnorm: 1.10 [15:22:29< 9:10:15] +[titan] 2025-10-05 13:56:51,355 - root - INFO - step: 25060 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 13:56:51,355 - root - INFO - lr: 1.9038e-05 gnorm: 1.09 [15:22:40< 9:10:04] +[titan] 2025-10-05 13:57:02,218 - root - INFO - step: 25065 loss: 2.1039 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:57:02,218 - root - INFO - lr: 1.9029e-05 gnorm: 1.13 [15:22:51< 9:09:53] +[titan] 2025-10-05 13:57:13,100 - root - INFO - step: 25070 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 13:57:13,100 - root - INFO - lr: 1.9021e-05 gnorm: 1.08 [15:23:02< 9:09:42] +[titan] 2025-10-05 13:57:23,991 - root - INFO - step: 25075 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 13:57:23,991 - root - INFO - lr: 1.9013e-05 gnorm: 1.07 [15:23:13< 9:09:30] +[titan] 2025-10-05 13:57:34,864 - root - INFO - step: 25080 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 13:57:34,864 - root - INFO - lr: 1.9005e-05 gnorm: 1.05 [15:23:24< 9:09:19] +[titan] 2025-10-05 13:57:45,884 - root - INFO - step: 25085 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 29,737 tflops: 412.55 mfu: 41.71% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8452 +[titan] 2025-10-05 13:57:45,884 - root - INFO - lr: 1.8996e-05 gnorm: 1.11 [15:23:35< 9:09:08] +[titan] 2025-10-05 13:57:52,584 - root - INFO - Dumping profiler traces at step 25088 +[titan] 2025-10-05 13:57:52,627 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:57:56,994 - root - INFO - step: 25090 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 13:57:56,994 - root - INFO - lr: 1.8988e-05 gnorm: 1.10 [15:23:46< 9:08:57] +[titan] 2025-10-05 13:58:07,853 - root - INFO - step: 25095 loss: 2.0873 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 13:58:07,853 - root - INFO - lr: 1.8980e-05 gnorm: 1.09 [15:23:57< 9:08:46] +[titan] 2025-10-05 13:58:16,516 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:58:18,699 - root - INFO - step: 25100 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7915 +[titan] 2025-10-05 13:58:18,700 - root - INFO - lr: 1.8972e-05 gnorm: 1.07 [15:24:08< 9:08:35] +[titan] 2025-10-05 13:58:29,551 - root - INFO - step: 25105 loss: 2.0232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 13:58:29,551 - root - INFO - lr: 1.8963e-05 gnorm: 1.09 [15:24:19< 9:08:24] +[titan] 2025-10-05 13:58:40,400 - root - INFO - step: 25110 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 13:58:40,400 - root - INFO - lr: 1.8955e-05 gnorm: 1.11 [15:24:29< 9:08:13] +[titan] 2025-10-05 13:58:51,352 - root - INFO - step: 25115 loss: 2.0288 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 13:58:51,352 - root - INFO - lr: 1.8947e-05 gnorm: 1.09 [15:24:40< 9:08:02] +[titan] 2025-10-05 13:59:02,234 - root - INFO - step: 25120 loss: 2.0905 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8475 +[titan] 2025-10-05 13:59:02,234 - root - INFO - lr: 1.8939e-05 gnorm: 1.09 [15:24:51< 9:07:50] +[titan] 2025-10-05 13:59:13,119 - root - INFO - step: 25125 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8215 +[titan] 2025-10-05 13:59:13,120 - root - INFO - lr: 1.8930e-05 gnorm: 1.07 [15:25:02< 9:07:39] +[titan] 2025-10-05 13:59:23,995 - root - INFO - step: 25130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 13:59:23,996 - root - INFO - lr: 1.8922e-05 gnorm: 1.07 [15:25:13< 9:07:28] +[titan] 2025-10-05 13:59:34,878 - root - INFO - step: 25135 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8860 +[titan] 2025-10-05 13:59:34,879 - root - INFO - lr: 1.8914e-05 gnorm: 1.12 [15:25:24< 9:07:17] +[titan] 2025-10-05 13:59:45,774 - root - INFO - step: 25140 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 13:59:45,775 - root - INFO - lr: 1.8905e-05 gnorm: 1.07 [15:25:35< 9:07:06] +[titan] 2025-10-05 13:59:56,648 - root - INFO - step: 25145 loss: 2.0630 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:59:56,648 - root - INFO - lr: 1.8897e-05 gnorm: 1.08 [15:25:46< 9:06:55] +[titan] 2025-10-05 14:00:05,337 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:00:07,518 - root - INFO - step: 25150 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8682 +[titan] 2025-10-05 14:00:07,519 - root - INFO - lr: 1.8889e-05 gnorm: 1.15 [15:25:57< 9:06:44] +[titan] 2025-10-05 14:00:18,376 - root - INFO - step: 25155 loss: 2.0122 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 14:00:18,377 - root - INFO - lr: 1.8881e-05 gnorm: 1.04 [15:26:07< 9:06:32] +[titan] 2025-10-05 14:00:29,255 - root - INFO - step: 25160 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 14:00:29,255 - root - INFO - lr: 1.8873e-05 gnorm: 1.08 [15:26:18< 9:06:21] +[titan] 2025-10-05 14:00:40,131 - root - INFO - step: 25165 loss: 2.0645 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 14:00:40,131 - root - INFO - lr: 1.8864e-05 gnorm: 1.09 [15:26:29< 9:06:10] +[titan] 2025-10-05 14:00:51,071 - root - INFO - step: 25170 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8358 +[titan] 2025-10-05 14:00:51,071 - root - INFO - lr: 1.8856e-05 gnorm: 1.06 [15:26:40< 9:05:59] +[titan] 2025-10-05 14:01:01,932 - root - INFO - step: 25175 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 14:01:01,932 - root - INFO - lr: 1.8848e-05 gnorm: 1.09 [15:26:51< 9:05:48] +[titan] 2025-10-05 14:01:12,823 - root - INFO - step: 25180 loss: 2.0514 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8133 +[titan] 2025-10-05 14:01:12,824 - root - INFO - lr: 1.8840e-05 gnorm: 1.08 [15:27:02< 9:05:37] +[titan] 2025-10-05 14:01:23,713 - root - INFO - step: 25185 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8457 +[titan] 2025-10-05 14:01:23,713 - root - INFO - lr: 1.8831e-05 gnorm: 1.04 [15:27:13< 9:05:26] +[titan] 2025-10-05 14:01:34,565 - root - INFO - step: 25190 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8554 +[titan] 2025-10-05 14:01:34,565 - root - INFO - lr: 1.8823e-05 gnorm: 1.08 [15:27:24< 9:05:14] +[titan] 2025-10-05 14:01:45,489 - root - INFO - step: 25195 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 14:01:45,489 - root - INFO - lr: 1.8815e-05 gnorm: 1.10 [15:27:34< 9:05:03] +[titan] 2025-10-05 14:01:54,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:01:56,348 - root - INFO - step: 25200 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8821 +[titan] 2025-10-05 14:01:56,348 - root - INFO - lr: 1.8807e-05 gnorm: 1.13 [15:27:45< 9:04:52] +[titan] 2025-10-05 14:02:07,198 - root - INFO - step: 25205 loss: 2.0344 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:02:07,198 - root - INFO - lr: 1.8798e-05 gnorm: 1.06 [15:27:56< 9:04:41] +[titan] 2025-10-05 14:02:18,072 - root - INFO - step: 25210 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 14:02:18,073 - root - INFO - lr: 1.8790e-05 gnorm: 1.09 [15:28:07< 9:04:30] +[titan] 2025-10-05 14:02:28,950 - root - INFO - step: 25215 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:02:28,951 - root - INFO - lr: 1.8782e-05 gnorm: 1.11 [15:28:18< 9:04:19] +[titan] 2025-10-05 14:02:39,828 - root - INFO - step: 25220 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 14:02:39,828 - root - INFO - lr: 1.8774e-05 gnorm: 1.10 [15:28:29< 9:04:08] +[titan] 2025-10-05 14:02:50,798 - root - INFO - step: 25225 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 14:02:50,798 - root - INFO - lr: 1.8765e-05 gnorm: 1.10 [15:28:40< 9:03:56] +[titan] 2025-10-05 14:03:01,706 - root - INFO - step: 25230 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 14:03:01,706 - root - INFO - lr: 1.8757e-05 gnorm: 1.07 [15:28:51< 9:03:45] +[titan] 2025-10-05 14:03:12,597 - root - INFO - step: 25235 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 14:03:12,598 - root - INFO - lr: 1.8749e-05 gnorm: 1.08 [15:29:02< 9:03:34] +[titan] 2025-10-05 14:03:23,476 - root - INFO - step: 25240 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:03:23,477 - root - INFO - lr: 1.8741e-05 gnorm: 1.05 [15:29:12< 9:03:23] +[titan] 2025-10-05 14:03:34,394 - root - INFO - step: 25245 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 14:03:34,395 - root - INFO - lr: 1.8733e-05 gnorm: 1.06 [15:29:23< 9:03:12] +[titan] 2025-10-05 14:03:43,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:03:45,291 - root - INFO - step: 25250 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7908 +[titan] 2025-10-05 14:03:45,292 - root - INFO - lr: 1.8724e-05 gnorm: 1.08 [15:29:34< 9:03:01] +[titan] 2025-10-05 14:03:56,215 - root - INFO - step: 25255 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8068 +[titan] 2025-10-05 14:03:56,215 - root - INFO - lr: 1.8716e-05 gnorm: 1.07 [15:29:45< 9:02:50] +[titan] 2025-10-05 14:04:07,069 - root - INFO - step: 25260 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7296 +[titan] 2025-10-05 14:04:07,070 - root - INFO - lr: 1.8708e-05 gnorm: 1.09 [15:29:56< 9:02:39] +[titan] 2025-10-05 14:04:17,929 - root - INFO - step: 25265 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 14:04:17,929 - root - INFO - lr: 1.8700e-05 gnorm: 1.05 [15:30:07< 9:02:27] +[titan] 2025-10-05 14:04:28,778 - root - INFO - step: 25270 loss: 2.0659 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8256 +[titan] 2025-10-05 14:04:28,778 - root - INFO - lr: 1.8692e-05 gnorm: 1.05 [15:30:18< 9:02:16] +[titan] 2025-10-05 14:04:39,663 - root - INFO - step: 25275 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:04:39,663 - root - INFO - lr: 1.8683e-05 gnorm: 1.10 [15:30:29< 9:02:05] +[titan] 2025-10-05 14:04:50,619 - root - INFO - step: 25280 loss: 2.0423 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 14:04:50,620 - root - INFO - lr: 1.8675e-05 gnorm: 1.10 [15:30:40< 9:01:54] +[titan] 2025-10-05 14:05:01,490 - root - INFO - step: 25285 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 14:05:01,490 - root - INFO - lr: 1.8667e-05 gnorm: 1.07 [15:30:50< 9:01:43] +[titan] 2025-10-05 14:05:12,363 - root - INFO - step: 25290 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 14:05:12,363 - root - INFO - lr: 1.8659e-05 gnorm: 1.08 [15:31:01< 9:01:32] +[titan] 2025-10-05 14:05:23,239 - root - INFO - step: 25295 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 14:05:23,239 - root - INFO - lr: 1.8650e-05 gnorm: 1.12 [15:31:12< 9:01:21] +[titan] 2025-10-05 14:05:31,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:05:34,099 - root - INFO - step: 25300 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:05:34,099 - root - INFO - lr: 1.8642e-05 gnorm: 1.10 [15:31:23< 9:01:09] +[titan] 2025-10-05 14:05:44,978 - root - INFO - step: 25305 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 14:05:44,978 - root - INFO - lr: 1.8634e-05 gnorm: 1.07 [15:31:34< 9:00:58] +[titan] 2025-10-05 14:05:55,924 - root - INFO - step: 25310 loss: 2.0792 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 41.99% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8356 +[titan] 2025-10-05 14:05:55,924 - root - INFO - lr: 1.8626e-05 gnorm: 1.11 [15:31:45< 9:00:47] +[titan] 2025-10-05 14:06:06,777 - root - INFO - step: 25315 loss: 2.0737 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8330 +[titan] 2025-10-05 14:06:06,777 - root - INFO - lr: 1.8618e-05 gnorm: 1.08 [15:31:56< 9:00:36] +[titan] 2025-10-05 14:06:17,654 - root - INFO - step: 25320 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 14:06:17,654 - root - INFO - lr: 1.8609e-05 gnorm: 1.06 [15:32:07< 9:00:25] +[titan] 2025-10-05 14:06:28,537 - root - INFO - step: 25325 loss: 2.1056 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 14:06:28,537 - root - INFO - lr: 1.8601e-05 gnorm: 1.08 [15:32:18< 9:00:14] +[titan] 2025-10-05 14:06:39,411 - root - INFO - step: 25330 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:06:39,411 - root - INFO - lr: 1.8593e-05 gnorm: 1.11 [15:32:28< 9:00:03] +[titan] 2025-10-05 14:06:50,340 - root - INFO - step: 25335 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:06:50,341 - root - INFO - lr: 1.8585e-05 gnorm: 1.10 [15:32:39< 8:59:51] +[titan] 2025-10-05 14:07:01,212 - root - INFO - step: 25340 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 14:07:01,212 - root - INFO - lr: 1.8577e-05 gnorm: 1.08 [15:32:50< 8:59:40] +[titan] 2025-10-05 14:07:12,114 - root - INFO - step: 25345 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 14:07:12,114 - root - INFO - lr: 1.8568e-05 gnorm: 1.06 [15:33:01< 8:59:29] +[titan] 2025-10-05 14:07:20,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:07:22,994 - root - INFO - step: 25350 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 14:07:22,994 - root - INFO - lr: 1.8560e-05 gnorm: 1.06 [15:33:12< 8:59:18] +[titan] 2025-10-05 14:07:33,878 - root - INFO - step: 25355 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8658 +[titan] 2025-10-05 14:07:33,878 - root - INFO - lr: 1.8552e-05 gnorm: 1.11 [15:33:23< 8:59:07] +[titan] 2025-10-05 14:07:44,774 - root - INFO - step: 25360 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 14:07:44,774 - root - INFO - lr: 1.8544e-05 gnorm: 1.08 [15:33:34< 8:58:56] +[titan] 2025-10-05 14:07:55,691 - root - INFO - step: 25365 loss: 2.0709 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:07:55,691 - root - INFO - lr: 1.8536e-05 gnorm: 1.08 [15:33:45< 8:58:45] +[titan] 2025-10-05 14:08:06,573 - root - INFO - step: 25370 loss: 2.0036 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 14:08:06,574 - root - INFO - lr: 1.8528e-05 gnorm: 1.08 [15:33:56< 8:58:34] +[titan] 2025-10-05 14:08:17,490 - root - INFO - step: 25375 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 14:08:17,490 - root - INFO - lr: 1.8519e-05 gnorm: 1.13 [15:34:06< 8:58:22] +[titan] 2025-10-05 14:08:28,356 - root - INFO - step: 25380 loss: 2.1491 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 14:08:28,357 - root - INFO - lr: 1.8511e-05 gnorm: 1.09 [15:34:17< 8:58:11] +[titan] 2025-10-05 14:08:39,210 - root - INFO - step: 25385 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:08:39,210 - root - INFO - lr: 1.8503e-05 gnorm: 1.09 [15:34:28< 8:58:00] +[titan] 2025-10-05 14:08:50,100 - root - INFO - step: 25390 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 14:08:50,100 - root - INFO - lr: 1.8495e-05 gnorm: 1.11 [15:34:39< 8:57:49] +[titan] 2025-10-05 14:09:00,958 - root - INFO - step: 25395 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 14:09:00,959 - root - INFO - lr: 1.8487e-05 gnorm: 1.09 [15:34:50< 8:57:38] +[titan] 2025-10-05 14:09:09,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:09:11,824 - root - INFO - step: 25400 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:09:11,824 - root - INFO - lr: 1.8478e-05 gnorm: 1.09 [15:35:01< 8:57:27] +[titan] 2025-10-05 14:09:22,722 - root - INFO - step: 25405 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 14:09:22,722 - root - INFO - lr: 1.8470e-05 gnorm: 1.06 [15:35:12< 8:57:16] +[titan] 2025-10-05 14:09:33,582 - root - INFO - step: 25410 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 14:09:33,583 - root - INFO - lr: 1.8462e-05 gnorm: 1.07 [15:35:23< 8:57:04] +[titan] 2025-10-05 14:09:44,445 - root - INFO - step: 25415 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 14:09:44,445 - root - INFO - lr: 1.8454e-05 gnorm: 1.07 [15:35:33< 8:56:53] +[titan] 2025-10-05 14:09:55,342 - root - INFO - step: 25420 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8650 +[titan] 2025-10-05 14:09:55,342 - root - INFO - lr: 1.8446e-05 gnorm: 1.08 [15:35:44< 8:56:42] +[titan] 2025-10-05 14:10:06,229 - root - INFO - step: 25425 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 14:10:06,229 - root - INFO - lr: 1.8438e-05 gnorm: 1.09 [15:35:55< 8:56:31] +[titan] 2025-10-05 14:10:17,110 - root - INFO - step: 25430 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 14:10:17,110 - root - INFO - lr: 1.8429e-05 gnorm: 1.09 [15:36:06< 8:56:20] +[titan] 2025-10-05 14:10:28,014 - root - INFO - step: 25435 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 14:10:28,014 - root - INFO - lr: 1.8421e-05 gnorm: 1.05 [15:36:17< 8:56:09] +[titan] 2025-10-05 14:10:38,939 - root - INFO - step: 25440 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8011 +[titan] 2025-10-05 14:10:38,939 - root - INFO - lr: 1.8413e-05 gnorm: 1.10 [15:36:28< 8:55:58] +[titan] 2025-10-05 14:10:49,824 - root - INFO - step: 25445 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 14:10:49,825 - root - INFO - lr: 1.8405e-05 gnorm: 1.08 [15:36:39< 8:55:47] +[titan] 2025-10-05 14:10:58,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:11:00,730 - root - INFO - step: 25450 loss: 2.0470 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 14:11:00,730 - root - INFO - lr: 1.8397e-05 gnorm: 1.07 [15:36:50< 8:55:35] +[titan] 2025-10-05 14:11:11,607 - root - INFO - step: 25455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 14:11:11,607 - root - INFO - lr: 1.8389e-05 gnorm: 1.07 [15:37:01< 8:55:24] +[titan] 2025-10-05 14:11:22,482 - root - INFO - step: 25460 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 14:11:22,482 - root - INFO - lr: 1.8380e-05 gnorm: 1.10 [15:37:11< 8:55:13] +[titan] 2025-10-05 14:11:33,348 - root - INFO - step: 25465 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:11:33,348 - root - INFO - lr: 1.8372e-05 gnorm: 1.09 [15:37:22< 8:55:02] +[titan] 2025-10-05 14:11:44,248 - root - INFO - step: 25470 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 14:11:44,248 - root - INFO - lr: 1.8364e-05 gnorm: 1.09 [15:37:33< 8:54:51] +[titan] 2025-10-05 14:11:55,157 - root - INFO - step: 25475 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 14:11:55,157 - root - INFO - lr: 1.8356e-05 gnorm: 1.09 [15:37:44< 8:54:40] +[titan] 2025-10-05 14:12:06,026 - root - INFO - step: 25480 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 14:12:06,026 - root - INFO - lr: 1.8348e-05 gnorm: 1.07 [15:37:55< 8:54:29] +[titan] 2025-10-05 14:12:16,908 - root - INFO - step: 25485 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:12:16,909 - root - INFO - lr: 1.8340e-05 gnorm: 1.10 [15:38:06< 8:54:17] +[titan] 2025-10-05 14:12:27,776 - root - INFO - step: 25490 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7677 +[titan] 2025-10-05 14:12:27,776 - root - INFO - lr: 1.8332e-05 gnorm: 1.09 [15:38:17< 8:54:06] +[titan] 2025-10-05 14:12:38,651 - root - INFO - step: 25495 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 14:12:38,651 - root - INFO - lr: 1.8323e-05 gnorm: 1.08 [15:38:28< 8:53:55] +[titan] 2025-10-05 14:12:47,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:12:49,537 - root - INFO - step: 25500 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7501 +[titan] 2025-10-05 14:12:49,537 - root - INFO - lr: 1.8315e-05 gnorm: 1.13 [15:38:38< 8:53:44] +[titan] 2025-10-05 14:13:00,470 - root - INFO - step: 25505 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:13:00,470 - root - INFO - lr: 1.8307e-05 gnorm: 1.08 [15:38:49< 8:53:33] +[titan] 2025-10-05 14:13:11,338 - root - INFO - step: 25510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 14:13:11,338 - root - INFO - lr: 1.8299e-05 gnorm: 1.11 [15:39:00< 8:53:22] +[titan] 2025-10-05 14:13:22,196 - root - INFO - step: 25515 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 14:13:22,197 - root - INFO - lr: 1.8291e-05 gnorm: 1.17 [15:39:11< 8:53:11] +[titan] 2025-10-05 14:13:33,046 - root - INFO - step: 25520 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 14:13:33,047 - root - INFO - lr: 1.8283e-05 gnorm: 1.07 [15:39:22< 8:52:59] +[titan] 2025-10-05 14:13:43,917 - root - INFO - step: 25525 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 14:13:43,917 - root - INFO - lr: 1.8275e-05 gnorm: 1.12 [15:39:33< 8:52:48] +[titan] 2025-10-05 14:13:54,888 - root - INFO - step: 25530 loss: 2.1016 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 14:13:54,888 - root - INFO - lr: 1.8266e-05 gnorm: 1.14 [15:39:44< 8:52:37] +[titan] 2025-10-05 14:14:05,796 - root - INFO - step: 25535 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:14:05,796 - root - INFO - lr: 1.8258e-05 gnorm: 1.11 [15:39:55< 8:52:26] +[titan] 2025-10-05 14:14:16,658 - root - INFO - step: 25540 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 14:14:16,658 - root - INFO - lr: 1.8250e-05 gnorm: 1.12 [15:40:06< 8:52:15] +[titan] 2025-10-05 14:14:27,520 - root - INFO - step: 25545 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 14:14:27,521 - root - INFO - lr: 1.8242e-05 gnorm: 1.08 [15:40:16< 8:52:04] +[titan] 2025-10-05 14:14:36,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:14:38,398 - root - INFO - step: 25550 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 14:14:38,398 - root - INFO - lr: 1.8234e-05 gnorm: 1.07 [15:40:27< 8:51:53] +[titan] 2025-10-05 14:14:49,271 - root - INFO - step: 25555 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:14:49,271 - root - INFO - lr: 1.8226e-05 gnorm: 1.10 [15:40:38< 8:51:42] +[titan] 2025-10-05 14:15:00,189 - root - INFO - step: 25560 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:15:00,189 - root - INFO - lr: 1.8218e-05 gnorm: 1.05 [15:40:49< 8:51:30] +[titan] 2025-10-05 14:15:11,120 - root - INFO - step: 25565 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 14:15:11,121 - root - INFO - lr: 1.8209e-05 gnorm: 1.07 [15:41:00< 8:51:19] +[titan] 2025-10-05 14:15:21,997 - root - INFO - step: 25570 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 14:15:21,997 - root - INFO - lr: 1.8201e-05 gnorm: 1.56 [15:41:11< 8:51:08] +[titan] 2025-10-05 14:15:32,888 - root - INFO - step: 25575 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 14:15:32,888 - root - INFO - lr: 1.8193e-05 gnorm: 1.07 [15:41:22< 8:50:57] +[titan] 2025-10-05 14:15:43,769 - root - INFO - step: 25580 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8710 +[titan] 2025-10-05 14:15:43,769 - root - INFO - lr: 1.8185e-05 gnorm: 1.07 [15:41:33< 8:50:46] +[titan] 2025-10-05 14:15:54,652 - root - INFO - step: 25585 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 14:15:54,652 - root - INFO - lr: 1.8177e-05 gnorm: 1.05 [15:41:44< 8:50:35] +[titan] 2025-10-05 14:16:05,536 - root - INFO - step: 25590 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 14:16:05,536 - root - INFO - lr: 1.8169e-05 gnorm: 1.07 [15:41:54< 8:50:24] +[titan] 2025-10-05 14:16:16,420 - root - INFO - step: 25595 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:16:16,420 - root - INFO - lr: 1.8161e-05 gnorm: 1.09 [15:42:05< 8:50:13] +[titan] 2025-10-05 14:16:25,234 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:16:27,418 - root - INFO - step: 25600 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 29,795 tflops: 413.36 mfu: 41.80% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 14:16:27,418 - root - INFO - lr: 1.8153e-05 gnorm: 1.10 [15:42:16< 8:50:01] +[titan] 2025-10-05 14:16:27,596 - root - INFO - Dumping profiler traces at step 25600 +[titan] 2025-10-05 14:16:27,633 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:16:38,481 - root - INFO - step: 25605 loss: 2.0476 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:16:38,481 - root - INFO - lr: 1.8144e-05 gnorm: 1.11 [15:42:27< 8:49:50] +[titan] 2025-10-05 14:16:49,316 - root - INFO - step: 25610 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8107 +[titan] 2025-10-05 14:16:49,316 - root - INFO - lr: 1.8136e-05 gnorm: 1.06 [15:42:38< 8:49:39] +[titan] 2025-10-05 14:17:00,171 - root - INFO - step: 25615 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 14:17:00,172 - root - INFO - lr: 1.8128e-05 gnorm: 1.06 [15:42:49< 8:49:28] +[titan] 2025-10-05 14:17:11,028 - root - INFO - step: 25620 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 14:17:11,028 - root - INFO - lr: 1.8120e-05 gnorm: 1.08 [15:43:00< 8:49:17] +[titan] 2025-10-05 14:17:21,893 - root - INFO - step: 25625 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 14:17:21,894 - root - INFO - lr: 1.8112e-05 gnorm: 1.08 [15:43:11< 8:49:06] +[titan] 2025-10-05 14:17:32,791 - root - INFO - step: 25630 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8510 +[titan] 2025-10-05 14:17:32,791 - root - INFO - lr: 1.8104e-05 gnorm: 1.17 [15:43:22< 8:48:55] +[titan] 2025-10-05 14:17:43,645 - root - INFO - step: 25635 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 14:17:43,645 - root - INFO - lr: 1.8096e-05 gnorm: 1.09 [15:43:33< 8:48:44] +[titan] 2025-10-05 14:17:54,490 - root - INFO - step: 25640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:17:54,490 - root - INFO - lr: 1.8088e-05 gnorm: 1.07 [15:43:43< 8:48:32] +[titan] 2025-10-05 14:18:05,362 - root - INFO - step: 25645 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:18:05,362 - root - INFO - lr: 1.8080e-05 gnorm: 1.09 [15:43:54< 8:48:21] +[titan] 2025-10-05 14:18:14,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:18:16,215 - root - INFO - step: 25650 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 14:18:16,215 - root - INFO - lr: 1.8071e-05 gnorm: 1.09 [15:44:05< 8:48:10] +[titan] 2025-10-05 14:18:27,067 - root - INFO - step: 25655 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 14:18:27,068 - root - INFO - lr: 1.8063e-05 gnorm: 1.05 [15:44:16< 8:47:59] +[titan] 2025-10-05 14:18:37,921 - root - INFO - step: 25660 loss: 2.0284 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7927 +[titan] 2025-10-05 14:18:37,921 - root - INFO - lr: 1.8055e-05 gnorm: 1.09 [15:44:27< 8:47:48] +[titan] 2025-10-05 14:18:48,835 - root - INFO - step: 25665 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 14:18:48,835 - root - INFO - lr: 1.8047e-05 gnorm: 1.08 [15:44:38< 8:47:37] +[titan] 2025-10-05 14:18:59,735 - root - INFO - step: 25670 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:18:59,736 - root - INFO - lr: 1.8039e-05 gnorm: 1.11 [15:44:49< 8:47:26] +[titan] 2025-10-05 14:19:10,621 - root - INFO - step: 25675 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 14:19:10,621 - root - INFO - lr: 1.8031e-05 gnorm: 1.12 [15:45:00< 8:47:14] +[titan] 2025-10-05 14:19:21,506 - root - INFO - step: 25680 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8389 +[titan] 2025-10-05 14:19:21,506 - root - INFO - lr: 1.8023e-05 gnorm: 1.07 [15:45:10< 8:47:03] +[titan] 2025-10-05 14:19:32,375 - root - INFO - step: 25685 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 14:19:32,375 - root - INFO - lr: 1.8015e-05 gnorm: 1.07 [15:45:21< 8:46:52] +[titan] 2025-10-05 14:19:43,253 - root - INFO - step: 25690 loss: 1.9973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7661 +[titan] 2025-10-05 14:19:43,254 - root - INFO - lr: 1.8007e-05 gnorm: 1.09 [15:45:32< 8:46:41] +[titan] 2025-10-05 14:19:54,175 - root - INFO - step: 25695 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7719 +[titan] 2025-10-05 14:19:54,175 - root - INFO - lr: 1.7999e-05 gnorm: 1.09 [15:45:43< 8:46:30] +[titan] 2025-10-05 14:20:02,862 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:20:05,037 - root - INFO - step: 25700 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 14:20:05,037 - root - INFO - lr: 1.7991e-05 gnorm: 1.10 [15:45:54< 8:46:19] +[titan] 2025-10-05 14:20:15,889 - root - INFO - step: 25705 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9084 +[titan] 2025-10-05 14:20:15,889 - root - INFO - lr: 1.7982e-05 gnorm: 1.09 [15:46:05< 8:46:08] +[titan] 2025-10-05 14:20:26,754 - root - INFO - step: 25710 loss: 2.0748 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 14:20:26,754 - root - INFO - lr: 1.7974e-05 gnorm: 1.08 [15:46:16< 8:45:57] +[titan] 2025-10-05 14:20:37,621 - root - INFO - step: 25715 loss: 2.0337 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7984 +[titan] 2025-10-05 14:20:37,621 - root - INFO - lr: 1.7966e-05 gnorm: 1.06 [15:46:27< 8:45:45] +[titan] 2025-10-05 14:20:48,501 - root - INFO - step: 25720 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 14:20:48,501 - root - INFO - lr: 1.7958e-05 gnorm: 1.07 [15:46:37< 8:45:34] +[titan] 2025-10-05 14:20:59,442 - root - INFO - step: 25725 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8266 +[titan] 2025-10-05 14:20:59,442 - root - INFO - lr: 1.7950e-05 gnorm: 1.11 [15:46:48< 8:45:23] +[titan] 2025-10-05 14:21:10,316 - root - INFO - step: 25730 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8362 +[titan] 2025-10-05 14:21:10,316 - root - INFO - lr: 1.7942e-05 gnorm: 1.10 [15:46:59< 8:45:12] +[titan] 2025-10-05 14:21:21,179 - root - INFO - step: 25735 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 14:21:21,179 - root - INFO - lr: 1.7934e-05 gnorm: 1.11 [15:47:10< 8:45:01] +[titan] 2025-10-05 14:21:32,060 - root - INFO - step: 25740 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 14:21:32,060 - root - INFO - lr: 1.7926e-05 gnorm: 1.05 [15:47:21< 8:44:50] +[titan] 2025-10-05 14:21:42,940 - root - INFO - step: 25745 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 14:21:42,940 - root - INFO - lr: 1.7918e-05 gnorm: 1.13 [15:47:32< 8:44:39] +[titan] 2025-10-05 14:21:51,620 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:21:53,800 - root - INFO - step: 25750 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 14:21:53,800 - root - INFO - lr: 1.7910e-05 gnorm: 1.09 [15:47:43< 8:44:28] +[titan] 2025-10-05 14:22:04,676 - root - INFO - step: 25755 loss: 2.0272 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:22:04,676 - root - INFO - lr: 1.7902e-05 gnorm: 1.10 [15:47:54< 8:44:16] +[titan] 2025-10-05 14:22:15,594 - root - INFO - step: 25760 loss: 2.0342 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7988 +[titan] 2025-10-05 14:22:15,594 - root - INFO - lr: 1.7894e-05 gnorm: 1.07 [15:48:05< 8:44:05] +[titan] 2025-10-05 14:22:26,449 - root - INFO - step: 25765 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 14:22:26,449 - root - INFO - lr: 1.7885e-05 gnorm: 1.09 [15:48:15< 8:43:54] +[titan] 2025-10-05 14:22:37,310 - root - INFO - step: 25770 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 14:22:37,310 - root - INFO - lr: 1.7877e-05 gnorm: 1.05 [15:48:26< 8:43:43] +[titan] 2025-10-05 14:22:48,182 - root - INFO - step: 25775 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 14:22:48,182 - root - INFO - lr: 1.7869e-05 gnorm: 1.11 [15:48:37< 8:43:32] +[titan] 2025-10-05 14:22:59,049 - root - INFO - step: 25780 loss: 2.0127 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 14:22:59,049 - root - INFO - lr: 1.7861e-05 gnorm: 1.06 [15:48:48< 8:43:21] +[titan] 2025-10-05 14:23:09,928 - root - INFO - step: 25785 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 14:23:09,928 - root - INFO - lr: 1.7853e-05 gnorm: 1.04 [15:48:59< 8:43:10] +[titan] 2025-10-05 14:23:20,861 - root - INFO - step: 25790 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 14:23:20,861 - root - INFO - lr: 1.7845e-05 gnorm: 1.11 [15:49:10< 8:42:58] +[titan] 2025-10-05 14:23:31,734 - root - INFO - step: 25795 loss: 2.0316 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 14:23:31,734 - root - INFO - lr: 1.7837e-05 gnorm: 1.08 [15:49:21< 8:42:47] +[titan] 2025-10-05 14:23:40,435 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:23:42,620 - root - INFO - step: 25800 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.8738 +[titan] 2025-10-05 14:23:42,620 - root - INFO - lr: 1.7829e-05 gnorm: 2.05 [15:49:32< 8:42:36] +[titan] 2025-10-05 14:23:53,479 - root - INFO - step: 25805 loss: 2.0499 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8114 +[titan] 2025-10-05 14:23:53,479 - root - INFO - lr: 1.7821e-05 gnorm: 1.10 [15:49:42< 8:42:25] +[titan] 2025-10-05 14:24:04,354 - root - INFO - step: 25810 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8199 +[titan] 2025-10-05 14:24:04,354 - root - INFO - lr: 1.7813e-05 gnorm: 1.10 [15:49:53< 8:42:14] +[titan] 2025-10-05 14:24:15,228 - root - INFO - step: 25815 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:24:15,228 - root - INFO - lr: 1.7805e-05 gnorm: 1.07 [15:50:04< 8:42:03] +[titan] 2025-10-05 14:24:26,126 - root - INFO - step: 25820 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:24:26,126 - root - INFO - lr: 1.7797e-05 gnorm: 1.11 [15:50:15< 8:41:52] +[titan] 2025-10-05 14:24:37,054 - root - INFO - step: 25825 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 14:24:37,055 - root - INFO - lr: 1.7789e-05 gnorm: 1.09 [15:50:26< 8:41:41] +[titan] 2025-10-05 14:24:47,925 - root - INFO - step: 25830 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 14:24:47,925 - root - INFO - lr: 1.7781e-05 gnorm: 1.08 [15:50:37< 8:41:29] +[titan] 2025-10-05 14:24:58,795 - root - INFO - step: 25835 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 14:24:58,795 - root - INFO - lr: 1.7773e-05 gnorm: 1.15 [15:50:48< 8:41:18] +[titan] 2025-10-05 14:25:09,680 - root - INFO - step: 25840 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 14:25:09,680 - root - INFO - lr: 1.7765e-05 gnorm: 1.04 [15:50:59< 8:41:07] +[titan] 2025-10-05 14:25:20,542 - root - INFO - step: 25845 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7882 +[titan] 2025-10-05 14:25:20,543 - root - INFO - lr: 1.7757e-05 gnorm: 1.08 [15:51:09< 8:40:56] +[titan] 2025-10-05 14:25:29,241 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:25:31,434 - root - INFO - step: 25850 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8073 +[titan] 2025-10-05 14:25:31,434 - root - INFO - lr: 1.7749e-05 gnorm: 1.08 [15:51:20< 8:40:45] +[titan] 2025-10-05 14:25:42,355 - root - INFO - step: 25855 loss: 2.0565 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:25:42,356 - root - INFO - lr: 1.7740e-05 gnorm: 1.09 [15:51:31< 8:40:34] +[titan] 2025-10-05 14:25:53,227 - root - INFO - step: 25860 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 14:25:53,227 - root - INFO - lr: 1.7732e-05 gnorm: 1.11 [15:51:42< 8:40:23] +[titan] 2025-10-05 14:26:04,104 - root - INFO - step: 25865 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8184 +[titan] 2025-10-05 14:26:04,105 - root - INFO - lr: 1.7724e-05 gnorm: 1.11 [15:51:53< 8:40:12] +[titan] 2025-10-05 14:26:15,028 - root - INFO - step: 25870 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 14:26:15,028 - root - INFO - lr: 1.7716e-05 gnorm: 1.04 [15:52:04< 8:40:00] +[titan] 2025-10-05 14:26:25,939 - root - INFO - step: 25875 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8127 +[titan] 2025-10-05 14:26:25,939 - root - INFO - lr: 1.7708e-05 gnorm: 1.08 [15:52:15< 8:39:49] +[titan] 2025-10-05 14:26:36,815 - root - INFO - step: 25880 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:26:36,815 - root - INFO - lr: 1.7700e-05 gnorm: 1.07 [15:52:26< 8:39:38] +[titan] 2025-10-05 14:26:47,749 - root - INFO - step: 25885 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8378 +[titan] 2025-10-05 14:26:47,749 - root - INFO - lr: 1.7692e-05 gnorm: 1.10 [15:52:37< 8:39:27] +[titan] 2025-10-05 14:26:58,622 - root - INFO - step: 25890 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:26:58,622 - root - INFO - lr: 1.7684e-05 gnorm: 1.07 [15:52:48< 8:39:16] +[titan] 2025-10-05 14:27:09,541 - root - INFO - step: 25895 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7826 +[titan] 2025-10-05 14:27:09,541 - root - INFO - lr: 1.7676e-05 gnorm: 1.10 [15:52:58< 8:39:05] +[titan] 2025-10-05 14:27:18,236 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:27:20,420 - root - INFO - step: 25900 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 14:27:20,420 - root - INFO - lr: 1.7668e-05 gnorm: 1.08 [15:53:09< 8:38:54] +[titan] 2025-10-05 14:27:31,298 - root - INFO - step: 25905 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 14:27:31,299 - root - INFO - lr: 1.7660e-05 gnorm: 1.08 [15:53:20< 8:38:43] +[titan] 2025-10-05 14:27:42,163 - root - INFO - step: 25910 loss: 2.0892 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 14:27:42,163 - root - INFO - lr: 1.7652e-05 gnorm: 1.12 [15:53:31< 8:38:31] +[titan] 2025-10-05 14:27:53,040 - root - INFO - step: 25915 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 14:27:53,041 - root - INFO - lr: 1.7644e-05 gnorm: 1.09 [15:53:42< 8:38:20] +[titan] 2025-10-05 14:28:03,938 - root - INFO - step: 25920 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 14:28:03,938 - root - INFO - lr: 1.7636e-05 gnorm: 1.05 [15:53:53< 8:38:09] +[titan] 2025-10-05 14:28:14,994 - root - INFO - step: 25925 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.18 mfu: 41.58% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8390 +[titan] 2025-10-05 14:28:14,995 - root - INFO - lr: 1.7628e-05 gnorm: 1.11 [15:54:04< 8:37:58] +[titan] 2025-10-05 14:28:25,864 - root - INFO - step: 25930 loss: 2.0995 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 14:28:25,864 - root - INFO - lr: 1.7620e-05 gnorm: 1.09 [15:54:15< 8:37:47] +[titan] 2025-10-05 14:28:36,720 - root - INFO - step: 25935 loss: 2.0585 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 14:28:36,720 - root - INFO - lr: 1.7612e-05 gnorm: 1.12 [15:54:26< 8:37:36] +[titan] 2025-10-05 14:28:47,595 - root - INFO - step: 25940 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 14:28:47,596 - root - INFO - lr: 1.7604e-05 gnorm: 1.13 [15:54:37< 8:37:25] +[titan] 2025-10-05 14:28:58,468 - root - INFO - step: 25945 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7629 +[titan] 2025-10-05 14:28:58,469 - root - INFO - lr: 1.7596e-05 gnorm: 1.11 [15:54:47< 8:37:14] +[titan] 2025-10-05 14:29:07,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:29:09,436 - root - INFO - step: 25950 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 29,879 tflops: 414.52 mfu: 41.91% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:29:09,436 - root - INFO - lr: 1.7588e-05 gnorm: 1.14 [15:54:58< 8:37:03] +[titan] 2025-10-05 14:29:20,286 - root - INFO - step: 25955 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 14:29:20,286 - root - INFO - lr: 1.7580e-05 gnorm: 1.08 [15:55:09< 8:36:51] +[titan] 2025-10-05 14:29:31,140 - root - INFO - step: 25960 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:29:31,140 - root - INFO - lr: 1.7572e-05 gnorm: 1.08 [15:55:20< 8:36:40] +[titan] 2025-10-05 14:29:42,013 - root - INFO - step: 25965 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8359 +[titan] 2025-10-05 14:29:42,013 - root - INFO - lr: 1.7564e-05 gnorm: 1.10 [15:55:31< 8:36:29] +[titan] 2025-10-05 14:29:52,914 - root - INFO - step: 25970 loss: 2.1034 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 14:29:52,914 - root - INFO - lr: 1.7556e-05 gnorm: 1.06 [15:55:42< 8:36:18] +[titan] 2025-10-05 14:30:03,792 - root - INFO - step: 25975 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 14:30:03,792 - root - INFO - lr: 1.7548e-05 gnorm: 1.08 [15:55:53< 8:36:07] +[titan] 2025-10-05 14:30:14,715 - root - INFO - step: 25980 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8170 +[titan] 2025-10-05 14:30:14,715 - root - INFO - lr: 1.7540e-05 gnorm: 1.11 [15:56:04< 8:35:56] +[titan] 2025-10-05 14:30:25,638 - root - INFO - step: 25985 loss: 2.0484 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:30:25,638 - root - INFO - lr: 1.7532e-05 gnorm: 1.07 [15:56:15< 8:35:45] +[titan] 2025-10-05 14:30:36,501 - root - INFO - step: 25990 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:30:36,501 - root - INFO - lr: 1.7524e-05 gnorm: 1.10 [15:56:25< 8:35:34] +[titan] 2025-10-05 14:30:47,379 - root - INFO - step: 25995 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7746 +[titan] 2025-10-05 14:30:47,379 - root - INFO - lr: 1.7516e-05 gnorm: 1.07 [15:56:36< 8:35:22] +[titan] 2025-10-05 14:30:56,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:30:58,259 - root - INFO - step: 26000 loss: 2.0535 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8148 +[titan] 2025-10-05 14:30:58,259 - root - INFO - lr: 1.7508e-05 gnorm: 1.14 [15:56:47< 8:35:11] +[titan] 2025-10-05 14:31:09,132 - root - INFO - step: 26005 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 14:31:09,132 - root - INFO - lr: 1.7500e-05 gnorm: 1.09 [15:56:58< 8:35:00] +[titan] 2025-10-05 14:31:20,058 - root - INFO - step: 26010 loss: 2.0243 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 14:31:20,058 - root - INFO - lr: 1.7492e-05 gnorm: 1.12 [15:57:09< 8:34:49] +[titan] 2025-10-05 14:31:30,973 - root - INFO - step: 26015 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:31:30,974 - root - INFO - lr: 1.7484e-05 gnorm: 1.10 [15:57:20< 8:34:38] +[titan] 2025-10-05 14:31:41,835 - root - INFO - step: 26020 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8519 +[titan] 2025-10-05 14:31:41,835 - root - INFO - lr: 1.7476e-05 gnorm: 1.16 [15:57:31< 8:34:27] +[titan] 2025-10-05 14:31:52,692 - root - INFO - step: 26025 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 14:31:52,692 - root - INFO - lr: 1.7468e-05 gnorm: 1.07 [15:57:42< 8:34:16] +[titan] 2025-10-05 14:32:03,561 - root - INFO - step: 26030 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 14:32:03,561 - root - INFO - lr: 1.7460e-05 gnorm: 1.09 [15:57:52< 8:34:05] +[titan] 2025-10-05 14:32:14,505 - root - INFO - step: 26035 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:32:14,505 - root - INFO - lr: 1.7452e-05 gnorm: 1.10 [15:58:03< 8:33:53] +[titan] 2025-10-05 14:32:25,388 - root - INFO - step: 26040 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:32:25,388 - root - INFO - lr: 1.7444e-05 gnorm: 1.09 [15:58:14< 8:33:42] +[titan] 2025-10-05 14:32:36,316 - root - INFO - step: 26045 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8342 +[titan] 2025-10-05 14:32:36,316 - root - INFO - lr: 1.7436e-05 gnorm: 1.10 [15:58:25< 8:33:31] +[titan] 2025-10-05 14:32:45,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:32:47,196 - root - INFO - step: 26050 loss: 2.0388 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:32:47,196 - root - INFO - lr: 1.7428e-05 gnorm: 1.08 [15:58:36< 8:33:20] +[titan] 2025-10-05 14:32:58,069 - root - INFO - step: 26055 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8131 +[titan] 2025-10-05 14:32:58,069 - root - INFO - lr: 1.7420e-05 gnorm: 1.06 [15:58:47< 8:33:09] +[titan] 2025-10-05 14:33:08,972 - root - INFO - step: 26060 loss: 2.0150 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 14:33:08,972 - root - INFO - lr: 1.7412e-05 gnorm: 1.10 [15:58:58< 8:32:58] +[titan] 2025-10-05 14:33:19,953 - root - INFO - step: 26065 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:33:19,953 - root - INFO - lr: 1.7404e-05 gnorm: 1.12 [15:59:09< 8:32:47] +[titan] 2025-10-05 14:33:30,852 - root - INFO - step: 26070 loss: 2.0795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 14:33:30,852 - root - INFO - lr: 1.7396e-05 gnorm: 1.11 [15:59:20< 8:32:36] +[titan] 2025-10-05 14:33:41,755 - root - INFO - step: 26075 loss: 2.0764 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 14:33:41,755 - root - INFO - lr: 1.7388e-05 gnorm: 1.11 [15:59:31< 8:32:25] +[titan] 2025-10-05 14:33:52,678 - root - INFO - step: 26080 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 14:33:52,678 - root - INFO - lr: 1.7380e-05 gnorm: 1.08 [15:59:42< 8:32:13] +[titan] 2025-10-05 14:34:03,540 - root - INFO - step: 26085 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:34:03,540 - root - INFO - lr: 1.7372e-05 gnorm: 1.14 [15:59:52< 8:32:02] +[titan] 2025-10-05 14:34:14,468 - root - INFO - step: 26090 loss: 2.0497 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8118 +[titan] 2025-10-05 14:34:14,468 - root - INFO - lr: 1.7364e-05 gnorm: 1.11 [16:00:03< 8:31:51] +[titan] 2025-10-05 14:34:25,355 - root - INFO - step: 26095 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7776 +[titan] 2025-10-05 14:34:25,355 - root - INFO - lr: 1.7356e-05 gnorm: 1.07 [16:00:14< 8:31:40] +[titan] 2025-10-05 14:34:34,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:34:36,218 - root - INFO - step: 26100 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 14:34:36,218 - root - INFO - lr: 1.7348e-05 gnorm: 1.06 [16:00:25< 8:31:29] +[titan] 2025-10-05 14:34:47,106 - root - INFO - step: 26105 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 14:34:47,107 - root - INFO - lr: 1.7340e-05 gnorm: 1.06 [16:00:36< 8:31:18] +[titan] 2025-10-05 14:34:58,140 - root - INFO - step: 26110 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,699 tflops: 412.03 mfu: 41.66% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:34:58,140 - root - INFO - lr: 1.7332e-05 gnorm: 1.12 [16:00:47< 8:31:07] +[titan] 2025-10-05 14:35:02,666 - root - INFO - Dumping profiler traces at step 26112 +[titan] 2025-10-05 14:35:02,705 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:35:09,255 - root - INFO - step: 26115 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 29,482 tflops: 409.02 mfu: 41.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 14:35:09,255 - root - INFO - lr: 1.7324e-05 gnorm: 1.07 [16:00:58< 8:30:56] +[titan] 2025-10-05 14:35:20,180 - root - INFO - step: 26120 loss: 1.9396 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 14:35:20,180 - root - INFO - lr: 1.7316e-05 gnorm: 1.06 [16:01:09< 8:30:45] +[titan] 2025-10-05 14:35:31,056 - root - INFO - step: 26125 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 14:35:31,056 - root - INFO - lr: 1.7309e-05 gnorm: 1.12 [16:01:20< 8:30:34] +[titan] 2025-10-05 14:35:41,959 - root - INFO - step: 26130 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:35:41,959 - root - INFO - lr: 1.7301e-05 gnorm: 1.07 [16:01:31< 8:30:23] +[titan] 2025-10-05 14:35:52,846 - root - INFO - step: 26135 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:35:52,847 - root - INFO - lr: 1.7293e-05 gnorm: 1.10 [16:01:42< 8:30:11] +[titan] 2025-10-05 14:36:03,715 - root - INFO - step: 26140 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7686 +[titan] 2025-10-05 14:36:03,715 - root - INFO - lr: 1.7285e-05 gnorm: 1.07 [16:01:53< 8:30:00] +[titan] 2025-10-05 14:36:14,674 - root - INFO - step: 26145 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7630 +[titan] 2025-10-05 14:36:14,674 - root - INFO - lr: 1.7277e-05 gnorm: 1.05 [16:02:04< 8:29:49] +[titan] 2025-10-05 14:36:23,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:36:25,541 - root - INFO - step: 26150 loss: 2.1124 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 14:36:25,541 - root - INFO - lr: 1.7269e-05 gnorm: 1.13 [16:02:14< 8:29:38] +[titan] 2025-10-05 14:36:36,415 - root - INFO - step: 26155 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:36:36,415 - root - INFO - lr: 1.7261e-05 gnorm: 1.12 [16:02:25< 8:29:27] +[titan] 2025-10-05 14:36:47,302 - root - INFO - step: 26160 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 14:36:47,302 - root - INFO - lr: 1.7253e-05 gnorm: 1.08 [16:02:36< 8:29:16] +[titan] 2025-10-05 14:36:58,194 - root - INFO - step: 26165 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 14:36:58,194 - root - INFO - lr: 1.7245e-05 gnorm: 1.08 [16:02:47< 8:29:05] +[titan] 2025-10-05 14:37:09,092 - root - INFO - step: 26170 loss: 2.1112 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8651 +[titan] 2025-10-05 14:37:09,092 - root - INFO - lr: 1.7237e-05 gnorm: 1.11 [16:02:58< 8:28:54] +[titan] 2025-10-05 14:37:20,018 - root - INFO - step: 26175 loss: 2.0516 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 14:37:20,018 - root - INFO - lr: 1.7229e-05 gnorm: 1.08 [16:03:09< 8:28:42] +[titan] 2025-10-05 14:37:30,897 - root - INFO - step: 26180 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:37:30,897 - root - INFO - lr: 1.7221e-05 gnorm: 1.09 [16:03:20< 8:28:31] +[titan] 2025-10-05 14:37:41,783 - root - INFO - step: 26185 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 14:37:41,783 - root - INFO - lr: 1.7213e-05 gnorm: 1.08 [16:03:31< 8:28:20] +[titan] 2025-10-05 14:37:52,662 - root - INFO - step: 26190 loss: 1.9604 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7331 +[titan] 2025-10-05 14:37:52,662 - root - INFO - lr: 1.7205e-05 gnorm: 1.08 [16:03:42< 8:28:09] +[titan] 2025-10-05 14:38:03,547 - root - INFO - step: 26195 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 14:38:03,548 - root - INFO - lr: 1.7197e-05 gnorm: 1.08 [16:03:52< 8:27:58] +[titan] 2025-10-05 14:38:12,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:38:14,439 - root - INFO - step: 26200 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8117 +[titan] 2025-10-05 14:38:14,440 - root - INFO - lr: 1.7189e-05 gnorm: 1.06 [16:04:03< 8:27:47] +[titan] 2025-10-05 14:38:25,405 - root - INFO - step: 26205 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:38:25,405 - root - INFO - lr: 1.7181e-05 gnorm: 1.10 [16:04:14< 8:27:36] +[titan] 2025-10-05 14:38:36,296 - root - INFO - step: 26210 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:38:36,296 - root - INFO - lr: 1.7173e-05 gnorm: 1.08 [16:04:25< 8:27:25] +[titan] 2025-10-05 14:38:47,187 - root - INFO - step: 26215 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 14:38:47,187 - root - INFO - lr: 1.7166e-05 gnorm: 1.13 [16:04:36< 8:27:14] +[titan] 2025-10-05 14:38:58,082 - root - INFO - step: 26220 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:38:58,082 - root - INFO - lr: 1.7158e-05 gnorm: 1.07 [16:04:47< 8:27:02] +[titan] 2025-10-05 14:39:08,974 - root - INFO - step: 26225 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7872 +[titan] 2025-10-05 14:39:08,975 - root - INFO - lr: 1.7150e-05 gnorm: 1.06 [16:04:58< 8:26:51] +[titan] 2025-10-05 14:39:19,875 - root - INFO - step: 26230 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 14:39:19,875 - root - INFO - lr: 1.7142e-05 gnorm: 1.07 [16:05:09< 8:26:40] +[titan] 2025-10-05 14:39:30,758 - root - INFO - step: 26235 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 14:39:30,759 - root - INFO - lr: 1.7134e-05 gnorm: 1.07 [16:05:20< 8:26:29] +[titan] 2025-10-05 14:39:41,666 - root - INFO - step: 26240 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7444 +[titan] 2025-10-05 14:39:41,666 - root - INFO - lr: 1.7126e-05 gnorm: 1.05 [16:05:31< 8:26:18] +[titan] 2025-10-05 14:39:52,544 - root - INFO - step: 26245 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 14:39:52,544 - root - INFO - lr: 1.7118e-05 gnorm: 1.08 [16:05:41< 8:26:07] +[titan] 2025-10-05 14:40:01,233 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:40:03,416 - root - INFO - step: 26250 loss: 2.0445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:40:03,416 - root - INFO - lr: 1.7110e-05 gnorm: 1.05 [16:05:52< 8:25:56] +[titan] 2025-10-05 14:40:14,284 - root - INFO - step: 26255 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9243 +[titan] 2025-10-05 14:40:14,284 - root - INFO - lr: 1.7102e-05 gnorm: 1.15 [16:06:03< 8:25:45] +[titan] 2025-10-05 14:40:25,201 - root - INFO - step: 26260 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 14:40:25,201 - root - INFO - lr: 1.7094e-05 gnorm: 1.31 [16:06:14< 8:25:33] +[titan] 2025-10-05 14:40:36,067 - root - INFO - step: 26265 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 14:40:36,067 - root - INFO - lr: 1.7086e-05 gnorm: 1.07 [16:06:25< 8:25:22] +[titan] 2025-10-05 14:40:46,986 - root - INFO - step: 26270 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 14:40:46,986 - root - INFO - lr: 1.7078e-05 gnorm: 1.12 [16:06:36< 8:25:11] +[titan] 2025-10-05 14:40:57,856 - root - INFO - step: 26275 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 14:40:57,856 - root - INFO - lr: 1.7071e-05 gnorm: 1.05 [16:06:47< 8:25:00] +[titan] 2025-10-05 14:41:08,711 - root - INFO - step: 26280 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8421 +[titan] 2025-10-05 14:41:08,711 - root - INFO - lr: 1.7063e-05 gnorm: 1.08 [16:06:58< 8:24:49] +[titan] 2025-10-05 14:41:19,613 - root - INFO - step: 26285 loss: 2.0172 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 14:41:19,613 - root - INFO - lr: 1.7055e-05 gnorm: 1.10 [16:07:08< 8:24:38] +[titan] 2025-10-05 14:41:30,475 - root - INFO - step: 26290 loss: 2.0509 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 14:41:30,475 - root - INFO - lr: 1.7047e-05 gnorm: 1.10 [16:07:19< 8:24:27] +[titan] 2025-10-05 14:41:41,346 - root - INFO - step: 26295 loss: 2.0334 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7973 +[titan] 2025-10-05 14:41:41,347 - root - INFO - lr: 1.7039e-05 gnorm: 1.04 [16:07:30< 8:24:16] +[titan] 2025-10-05 14:41:50,045 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:41:52,244 - root - INFO - step: 26300 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 14:41:52,245 - root - INFO - lr: 1.7031e-05 gnorm: 1.10 [16:07:41< 8:24:05] +[titan] 2025-10-05 14:42:03,172 - root - INFO - step: 26305 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 14:42:03,172 - root - INFO - lr: 1.7023e-05 gnorm: 1.10 [16:07:52< 8:23:53] +[titan] 2025-10-05 14:42:14,032 - root - INFO - step: 26310 loss: 2.0276 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:42:14,032 - root - INFO - lr: 1.7015e-05 gnorm: 1.10 [16:08:03< 8:23:42] +[titan] 2025-10-05 14:42:24,897 - root - INFO - step: 26315 loss: 2.0611 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:42:24,897 - root - INFO - lr: 1.7007e-05 gnorm: 1.05 [16:08:14< 8:23:31] +[titan] 2025-10-05 14:42:35,737 - root - INFO - step: 26320 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 14:42:35,737 - root - INFO - lr: 1.6999e-05 gnorm: 1.08 [16:08:25< 8:23:20] +[titan] 2025-10-05 14:42:46,593 - root - INFO - step: 26325 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8364 +[titan] 2025-10-05 14:42:46,593 - root - INFO - lr: 1.6992e-05 gnorm: 1.11 [16:08:35< 8:23:09] +[titan] 2025-10-05 14:42:57,467 - root - INFO - step: 26330 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 14:42:57,467 - root - INFO - lr: 1.6984e-05 gnorm: 1.05 [16:08:46< 8:22:58] +[titan] 2025-10-05 14:43:08,377 - root - INFO - step: 26335 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7862 +[titan] 2025-10-05 14:43:08,377 - root - INFO - lr: 1.6976e-05 gnorm: 1.10 [16:08:57< 8:22:47] +[titan] 2025-10-05 14:43:19,276 - root - INFO - step: 26340 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:43:19,276 - root - INFO - lr: 1.6968e-05 gnorm: 1.09 [16:09:08< 8:22:36] +[titan] 2025-10-05 14:43:30,150 - root - INFO - step: 26345 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 14:43:30,150 - root - INFO - lr: 1.6960e-05 gnorm: 1.09 [16:09:19< 8:22:24] +[titan] 2025-10-05 14:43:38,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:43:41,032 - root - INFO - step: 26350 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 14:43:41,033 - root - INFO - lr: 1.6952e-05 gnorm: 1.12 [16:09:30< 8:22:13] +[titan] 2025-10-05 14:43:51,910 - root - INFO - step: 26355 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 14:43:51,911 - root - INFO - lr: 1.6944e-05 gnorm: 1.10 [16:09:41< 8:22:02] +[titan] 2025-10-05 14:44:02,775 - root - INFO - step: 26360 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 14:44:02,775 - root - INFO - lr: 1.6936e-05 gnorm: 1.11 [16:09:52< 8:21:51] +[titan] 2025-10-05 14:44:13,677 - root - INFO - step: 26365 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 14:44:13,678 - root - INFO - lr: 1.6928e-05 gnorm: 1.09 [16:10:03< 8:21:40] +[titan] 2025-10-05 14:44:24,544 - root - INFO - step: 26370 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 14:44:24,544 - root - INFO - lr: 1.6921e-05 gnorm: 1.08 [16:10:13< 8:21:29] +[titan] 2025-10-05 14:44:35,405 - root - INFO - step: 26375 loss: 2.0563 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8183 +[titan] 2025-10-05 14:44:35,405 - root - INFO - lr: 1.6913e-05 gnorm: 1.09 [16:10:24< 8:21:18] +[titan] 2025-10-05 14:44:46,277 - root - INFO - step: 26380 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 14:44:46,277 - root - INFO - lr: 1.6905e-05 gnorm: 1.10 [16:10:35< 8:21:07] +[titan] 2025-10-05 14:44:57,156 - root - INFO - step: 26385 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 14:44:57,156 - root - INFO - lr: 1.6897e-05 gnorm: 1.09 [16:10:46< 8:20:55] +[titan] 2025-10-05 14:45:07,991 - root - INFO - step: 26390 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:45:07,991 - root - INFO - lr: 1.6889e-05 gnorm: 1.09 [16:10:57< 8:20:44] +[titan] 2025-10-05 14:45:18,850 - root - INFO - step: 26395 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:45:18,850 - root - INFO - lr: 1.6881e-05 gnorm: 1.09 [16:11:08< 8:20:33] +[titan] 2025-10-05 14:45:27,579 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:45:29,758 - root - INFO - step: 26400 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 14:45:29,758 - root - INFO - lr: 1.6873e-05 gnorm: 1.09 [16:11:19< 8:20:22] +[titan] 2025-10-05 14:45:40,627 - root - INFO - step: 26405 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 14:45:40,628 - root - INFO - lr: 1.6865e-05 gnorm: 1.09 [16:11:30< 8:20:11] +[titan] 2025-10-05 14:45:51,472 - root - INFO - step: 26410 loss: 2.0493 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 14:45:51,472 - root - INFO - lr: 1.6858e-05 gnorm: 1.09 [16:11:40< 8:20:00] +[titan] 2025-10-05 14:46:02,329 - root - INFO - step: 26415 loss: 2.0718 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8312 +[titan] 2025-10-05 14:46:02,329 - root - INFO - lr: 1.6850e-05 gnorm: 1.09 [16:11:51< 8:19:49] +[titan] 2025-10-05 14:46:13,208 - root - INFO - step: 26420 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 14:46:13,208 - root - INFO - lr: 1.6842e-05 gnorm: 1.09 [16:12:02< 8:19:38] +[titan] 2025-10-05 14:46:24,077 - root - INFO - step: 26425 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:46:24,077 - root - INFO - lr: 1.6834e-05 gnorm: 1.09 [16:12:13< 8:19:26] +[titan] 2025-10-05 14:46:34,964 - root - INFO - step: 26430 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8192 +[titan] 2025-10-05 14:46:34,964 - root - INFO - lr: 1.6826e-05 gnorm: 1.08 [16:12:24< 8:19:15] +[titan] 2025-10-05 14:46:45,809 - root - INFO - step: 26435 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 14:46:45,810 - root - INFO - lr: 1.6818e-05 gnorm: 1.08 [16:12:35< 8:19:04] +[titan] 2025-10-05 14:46:56,653 - root - INFO - step: 26440 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 14:46:56,654 - root - INFO - lr: 1.6810e-05 gnorm: 1.07 [16:12:46< 8:18:53] +[titan] 2025-10-05 14:47:07,510 - root - INFO - step: 26445 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 14:47:07,511 - root - INFO - lr: 1.6803e-05 gnorm: 1.09 [16:12:56< 8:18:42] +[titan] 2025-10-05 14:47:16,212 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:47:18,390 - root - INFO - step: 26450 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:47:18,390 - root - INFO - lr: 1.6795e-05 gnorm: 1.07 [16:13:07< 8:18:31] +[titan] 2025-10-05 14:47:29,255 - root - INFO - step: 26455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:47:29,255 - root - INFO - lr: 1.6787e-05 gnorm: 1.10 [16:13:18< 8:18:20] +[titan] 2025-10-05 14:47:40,123 - root - INFO - step: 26460 loss: 2.0742 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 14:47:40,123 - root - INFO - lr: 1.6779e-05 gnorm: 1.14 [16:13:29< 8:18:09] +[titan] 2025-10-05 14:47:51,023 - root - INFO - step: 26465 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 14:47:51,023 - root - INFO - lr: 1.6771e-05 gnorm: 1.10 [16:13:40< 8:17:57] +[titan] 2025-10-05 14:48:01,888 - root - INFO - step: 26470 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 14:48:01,888 - root - INFO - lr: 1.6763e-05 gnorm: 1.05 [16:13:51< 8:17:46] +[titan] 2025-10-05 14:48:12,750 - root - INFO - step: 26475 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:48:12,750 - root - INFO - lr: 1.6756e-05 gnorm: 1.10 [16:14:02< 8:17:35] +[titan] 2025-10-05 14:48:23,596 - root - INFO - step: 26480 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 14:48:23,597 - root - INFO - lr: 1.6748e-05 gnorm: 1.05 [16:14:12< 8:17:24] +[titan] 2025-10-05 14:48:34,475 - root - INFO - step: 26485 loss: 2.0429 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:48:34,475 - root - INFO - lr: 1.6740e-05 gnorm: 1.11 [16:14:23< 8:17:13] +[titan] 2025-10-05 14:48:45,347 - root - INFO - step: 26490 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 14:48:45,348 - root - INFO - lr: 1.6732e-05 gnorm: 1.11 [16:14:34< 8:17:02] +[titan] 2025-10-05 14:48:56,251 - root - INFO - step: 26495 loss: 2.1088 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 14:48:56,252 - root - INFO - lr: 1.6724e-05 gnorm: 1.15 [16:14:45< 8:16:51] +[titan] 2025-10-05 14:49:04,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:49:07,097 - root - INFO - step: 26500 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8508 +[titan] 2025-10-05 14:49:07,097 - root - INFO - lr: 1.6716e-05 gnorm: 1.14 [16:14:56< 8:16:40] +[titan] 2025-10-05 14:49:17,975 - root - INFO - step: 26505 loss: 2.0105 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 14:49:17,976 - root - INFO - lr: 1.6709e-05 gnorm: 1.12 [16:15:07< 8:16:28] +[titan] 2025-10-05 14:49:28,870 - root - INFO - step: 26510 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7377 +[titan] 2025-10-05 14:49:28,870 - root - INFO - lr: 1.6701e-05 gnorm: 1.05 [16:15:18< 8:16:17] +[titan] 2025-10-05 14:49:39,744 - root - INFO - step: 26515 loss: 2.0774 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 14:49:39,744 - root - INFO - lr: 1.6693e-05 gnorm: 1.14 [16:15:29< 8:16:06] +[titan] 2025-10-05 14:49:50,606 - root - INFO - step: 26520 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:49:50,606 - root - INFO - lr: 1.6685e-05 gnorm: 1.11 [16:15:39< 8:15:55] +[titan] 2025-10-05 14:50:01,497 - root - INFO - step: 26525 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:50:01,497 - root - INFO - lr: 1.6677e-05 gnorm: 1.07 [16:15:50< 8:15:44] +[titan] 2025-10-05 14:50:12,351 - root - INFO - step: 26530 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:50:12,351 - root - INFO - lr: 1.6669e-05 gnorm: 1.11 [16:16:01< 8:15:33] +[titan] 2025-10-05 14:50:23,197 - root - INFO - step: 26535 loss: 2.0146 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 14:50:23,197 - root - INFO - lr: 1.6662e-05 gnorm: 1.28 [16:16:12< 8:15:22] +[titan] 2025-10-05 14:50:34,070 - root - INFO - step: 26540 loss: 2.0363 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 14:50:34,070 - root - INFO - lr: 1.6654e-05 gnorm: 1.09 [16:16:23< 8:15:11] +[titan] 2025-10-05 14:50:44,935 - root - INFO - step: 26545 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 14:50:44,935 - root - INFO - lr: 1.6646e-05 gnorm: 1.07 [16:16:34< 8:14:59] +[titan] 2025-10-05 14:50:53,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:50:55,778 - root - INFO - step: 26550 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 14:50:55,778 - root - INFO - lr: 1.6638e-05 gnorm: 1.07 [16:16:45< 8:14:48] +[titan] 2025-10-05 14:51:06,624 - root - INFO - step: 26555 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 14:51:06,625 - root - INFO - lr: 1.6630e-05 gnorm: 1.11 [16:16:55< 8:14:37] +[titan] 2025-10-05 14:51:17,534 - root - INFO - step: 26560 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 14:51:17,534 - root - INFO - lr: 1.6622e-05 gnorm: 1.12 [16:17:06< 8:14:26] +[titan] 2025-10-05 14:51:28,410 - root - INFO - step: 26565 loss: 2.1178 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 14:51:28,410 - root - INFO - lr: 1.6615e-05 gnorm: 1.09 [16:17:17< 8:14:15] +[titan] 2025-10-05 14:51:39,262 - root - INFO - step: 26570 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 14:51:39,262 - root - INFO - lr: 1.6607e-05 gnorm: 1.11 [16:17:28< 8:14:04] +[titan] 2025-10-05 14:51:50,113 - root - INFO - step: 26575 loss: 2.1052 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 14:51:50,113 - root - INFO - lr: 1.6599e-05 gnorm: 1.15 [16:17:39< 8:13:53] +[titan] 2025-10-05 14:52:00,978 - root - INFO - step: 26580 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:52:00,978 - root - INFO - lr: 1.6591e-05 gnorm: 1.09 [16:17:50< 8:13:42] +[titan] 2025-10-05 14:52:11,826 - root - INFO - step: 26585 loss: 2.0519 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 14:52:11,826 - root - INFO - lr: 1.6583e-05 gnorm: 1.14 [16:18:01< 8:13:30] +[titan] 2025-10-05 14:52:22,714 - root - INFO - step: 26590 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 14:52:22,714 - root - INFO - lr: 1.6576e-05 gnorm: 1.09 [16:18:12< 8:13:19] +[titan] 2025-10-05 14:52:33,578 - root - INFO - step: 26595 loss: 2.0442 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 14:52:33,578 - root - INFO - lr: 1.6568e-05 gnorm: 1.08 [16:18:22< 8:13:08] +[titan] 2025-10-05 14:52:42,211 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:52:44,378 - root - INFO - step: 26600 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,340 tflops: 420.92 mfu: 42.56% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:52:44,379 - root - INFO - lr: 1.6560e-05 gnorm: 1.10 [16:18:33< 8:12:57] +[titan] 2025-10-05 14:52:55,225 - root - INFO - step: 26605 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 14:52:55,225 - root - INFO - lr: 1.6552e-05 gnorm: 1.10 [16:18:44< 8:12:46] +[titan] 2025-10-05 14:53:06,069 - root - INFO - step: 26610 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:53:06,070 - root - INFO - lr: 1.6544e-05 gnorm: 1.07 [16:18:55< 8:12:35] +[titan] 2025-10-05 14:53:16,896 - root - INFO - step: 26615 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 14:53:16,896 - root - INFO - lr: 1.6537e-05 gnorm: 1.08 [16:19:06< 8:12:24] +[titan] 2025-10-05 14:53:27,734 - root - INFO - step: 26620 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7725 +[titan] 2025-10-05 14:53:27,734 - root - INFO - lr: 1.6529e-05 gnorm: 1.15 [16:19:17< 8:12:13] +[titan] 2025-10-05 14:53:36,738 - root - INFO - Dumping profiler traces at step 26624 +[titan] 2025-10-05 14:53:36,774 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:53:38,970 - root - INFO - step: 26625 loss: 2.0899 memory: 118.84GiB(85.28%) tps: 29,164 tflops: 404.60 mfu: 40.91% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8466 +[titan] 2025-10-05 14:53:38,971 - root - INFO - lr: 1.6521e-05 gnorm: 1.13 [16:19:28< 8:12:02] +[titan] 2025-10-05 14:53:49,827 - root - INFO - step: 26630 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 14:53:49,827 - root - INFO - lr: 1.6513e-05 gnorm: 1.09 [16:19:39< 8:11:50] +[titan] 2025-10-05 14:54:00,657 - root - INFO - step: 26635 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 14:54:00,657 - root - INFO - lr: 1.6505e-05 gnorm: 1.12 [16:19:50< 8:11:39] +[titan] 2025-10-05 14:54:11,514 - root - INFO - step: 26640 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8209 +[titan] 2025-10-05 14:54:11,514 - root - INFO - lr: 1.6498e-05 gnorm: 1.10 [16:20:00< 8:11:28] +[titan] 2025-10-05 14:54:22,378 - root - INFO - step: 26645 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 14:54:22,378 - root - INFO - lr: 1.6490e-05 gnorm: 1.06 [16:20:11< 8:11:17] +[titan] 2025-10-05 14:54:31,074 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:54:33,259 - root - INFO - step: 26650 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 14:54:33,259 - root - INFO - lr: 1.6482e-05 gnorm: 1.12 [16:20:22< 8:11:06] +[titan] 2025-10-05 14:54:44,181 - root - INFO - step: 26655 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 14:54:44,181 - root - INFO - lr: 1.6474e-05 gnorm: 1.10 [16:20:33< 8:10:55] +[titan] 2025-10-05 14:54:55,045 - root - INFO - step: 26660 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:54:55,045 - root - INFO - lr: 1.6467e-05 gnorm: 1.09 [16:20:44< 8:10:44] +[titan] 2025-10-05 14:55:05,921 - root - INFO - step: 26665 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8484 +[titan] 2025-10-05 14:55:05,921 - root - INFO - lr: 1.6459e-05 gnorm: 1.12 [16:20:55< 8:10:33] +[titan] 2025-10-05 14:55:16,810 - root - INFO - step: 26670 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 14:55:16,810 - root - INFO - lr: 1.6451e-05 gnorm: 1.08 [16:21:06< 8:10:22] +[titan] 2025-10-05 14:55:27,678 - root - INFO - step: 26675 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 14:55:27,679 - root - INFO - lr: 1.6443e-05 gnorm: 1.09 [16:21:17< 8:10:10] +[titan] 2025-10-05 14:55:38,537 - root - INFO - step: 26680 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 14:55:38,537 - root - INFO - lr: 1.6435e-05 gnorm: 1.10 [16:21:27< 8:09:59] +[titan] 2025-10-05 14:55:49,438 - root - INFO - step: 26685 loss: 2.0107 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 14:55:49,439 - root - INFO - lr: 1.6428e-05 gnorm: 1.11 [16:21:38< 8:09:48] +[titan] 2025-10-05 14:56:00,304 - root - INFO - step: 26690 loss: 2.0743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:56:00,304 - root - INFO - lr: 1.6420e-05 gnorm: 1.11 [16:21:49< 8:09:37] +[titan] 2025-10-05 14:56:11,149 - root - INFO - step: 26695 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8035 +[titan] 2025-10-05 14:56:11,149 - root - INFO - lr: 1.6412e-05 gnorm: 1.10 [16:22:00< 8:09:26] +[titan] 2025-10-05 14:56:19,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:56:22,000 - root - INFO - step: 26700 loss: 2.0496 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8111 +[titan] 2025-10-05 14:56:22,000 - root - INFO - lr: 1.6404e-05 gnorm: 1.07 [16:22:11< 8:09:15] +[titan] 2025-10-05 14:56:32,858 - root - INFO - step: 26705 loss: 1.9909 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 14:56:32,858 - root - INFO - lr: 1.6397e-05 gnorm: 1.07 [16:22:22< 8:09:04] +[titan] 2025-10-05 14:56:43,728 - root - INFO - step: 26710 loss: 2.1246 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:56:43,729 - root - INFO - lr: 1.6389e-05 gnorm: 1.12 [16:22:33< 8:08:53] +[titan] 2025-10-05 14:56:54,594 - root - INFO - step: 26715 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 14:56:54,594 - root - INFO - lr: 1.6381e-05 gnorm: 1.08 [16:22:43< 8:08:41] +[titan] 2025-10-05 14:57:05,497 - root - INFO - step: 26720 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 14:57:05,497 - root - INFO - lr: 1.6373e-05 gnorm: 1.08 [16:22:54< 8:08:30] +[titan] 2025-10-05 14:57:16,361 - root - INFO - step: 26725 loss: 2.0885 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 14:57:16,361 - root - INFO - lr: 1.6366e-05 gnorm: 1.07 [16:23:05< 8:08:19] +[titan] 2025-10-05 14:57:27,210 - root - INFO - step: 26730 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 14:57:27,211 - root - INFO - lr: 1.6358e-05 gnorm: 1.10 [16:23:16< 8:08:08] +[titan] 2025-10-05 14:57:38,049 - root - INFO - step: 26735 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 14:57:38,050 - root - INFO - lr: 1.6350e-05 gnorm: 1.11 [16:23:27< 8:07:57] +[titan] 2025-10-05 14:57:48,918 - root - INFO - step: 26740 loss: 2.0984 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8556 +[titan] 2025-10-05 14:57:48,918 - root - INFO - lr: 1.6342e-05 gnorm: 1.15 [16:23:38< 8:07:46] +[titan] 2025-10-05 14:57:59,773 - root - INFO - step: 26745 loss: 2.0328 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 14:57:59,773 - root - INFO - lr: 1.6335e-05 gnorm: 1.10 [16:23:49< 8:07:35] +[titan] 2025-10-05 14:58:08,499 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:58:10,688 - root - INFO - step: 26750 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 14:58:10,689 - root - INFO - lr: 1.6327e-05 gnorm: 1.10 [16:24:00< 8:07:24] +[titan] 2025-10-05 14:58:21,558 - root - INFO - step: 26755 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 14:58:21,559 - root - INFO - lr: 1.6319e-05 gnorm: 1.10 [16:24:10< 8:07:13] +[titan] 2025-10-05 14:58:32,424 - root - INFO - step: 26760 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:58:32,425 - root - INFO - lr: 1.6311e-05 gnorm: 1.08 [16:24:21< 8:07:01] +[titan] 2025-10-05 14:58:43,310 - root - INFO - step: 26765 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:58:43,310 - root - INFO - lr: 1.6304e-05 gnorm: 1.07 [16:24:32< 8:06:50] +[titan] 2025-10-05 14:58:54,204 - root - INFO - step: 26770 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7566 +[titan] 2025-10-05 14:58:54,204 - root - INFO - lr: 1.6296e-05 gnorm: 1.08 [16:24:43< 8:06:39] +[titan] 2025-10-05 14:59:05,077 - root - INFO - step: 26775 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:59:05,077 - root - INFO - lr: 1.6288e-05 gnorm: 1.09 [16:24:54< 8:06:28] +[titan] 2025-10-05 14:59:15,970 - root - INFO - step: 26780 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:59:15,970 - root - INFO - lr: 1.6280e-05 gnorm: 1.13 [16:25:05< 8:06:17] +[titan] 2025-10-05 14:59:26,894 - root - INFO - step: 26785 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 14:59:26,894 - root - INFO - lr: 1.6273e-05 gnorm: 1.11 [16:25:16< 8:06:06] +[titan] 2025-10-05 14:59:37,753 - root - INFO - step: 26790 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 14:59:37,753 - root - INFO - lr: 1.6265e-05 gnorm: 1.03 [16:25:27< 8:05:55] +[titan] 2025-10-05 14:59:48,629 - root - INFO - step: 26795 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:59:48,629 - root - INFO - lr: 1.6257e-05 gnorm: 1.08 [16:25:37< 8:05:44] +[titan] 2025-10-05 14:59:57,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:59:59,514 - root - INFO - step: 26800 loss: 1.9889 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:59:59,514 - root - INFO - lr: 1.6249e-05 gnorm: 1.12 [16:25:48< 8:05:33] +[titan] 2025-10-05 15:00:10,404 - root - INFO - step: 26805 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 15:00:10,405 - root - INFO - lr: 1.6242e-05 gnorm: 1.09 [16:25:59< 8:05:21] +[titan] 2025-10-05 15:00:21,298 - root - INFO - step: 26810 loss: 2.0441 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8079 +[titan] 2025-10-05 15:00:21,298 - root - INFO - lr: 1.6234e-05 gnorm: 1.09 [16:26:10< 8:05:10] +[titan] 2025-10-05 15:00:32,228 - root - INFO - step: 26815 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 15:00:32,228 - root - INFO - lr: 1.6226e-05 gnorm: 1.15 [16:26:21< 8:04:59] +[titan] 2025-10-05 15:00:43,121 - root - INFO - step: 26820 loss: 2.0556 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 15:00:43,121 - root - INFO - lr: 1.6219e-05 gnorm: 1.10 [16:26:32< 8:04:48] +[titan] 2025-10-05 15:00:54,008 - root - INFO - step: 26825 loss: 2.0473 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8094 +[titan] 2025-10-05 15:00:54,008 - root - INFO - lr: 1.6211e-05 gnorm: 1.16 [16:26:43< 8:04:37] +[titan] 2025-10-05 15:01:04,889 - root - INFO - step: 26830 loss: 2.0024 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 15:01:04,889 - root - INFO - lr: 1.6203e-05 gnorm: 1.09 [16:26:54< 8:04:26] +[titan] 2025-10-05 15:01:15,765 - root - INFO - step: 26835 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 15:01:15,765 - root - INFO - lr: 1.6195e-05 gnorm: 1.07 [16:27:05< 8:04:15] +[titan] 2025-10-05 15:01:26,630 - root - INFO - step: 26840 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8050 +[titan] 2025-10-05 15:01:26,631 - root - INFO - lr: 1.6188e-05 gnorm: 1.11 [16:27:15< 8:04:04] +[titan] 2025-10-05 15:01:37,602 - root - INFO - step: 26845 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:01:37,602 - root - INFO - lr: 1.6180e-05 gnorm: 1.12 [16:27:26< 8:03:53] +[titan] 2025-10-05 15:01:46,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:01:48,489 - root - INFO - step: 26850 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 15:01:48,490 - root - INFO - lr: 1.6172e-05 gnorm: 1.08 [16:27:37< 8:03:41] +[titan] 2025-10-05 15:01:59,381 - root - INFO - step: 26855 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 15:01:59,382 - root - INFO - lr: 1.6165e-05 gnorm: 1.07 [16:27:48< 8:03:30] +[titan] 2025-10-05 15:02:10,248 - root - INFO - step: 26860 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:02:10,248 - root - INFO - lr: 1.6157e-05 gnorm: 1.10 [16:27:59< 8:03:19] +[titan] 2025-10-05 15:02:21,138 - root - INFO - step: 26865 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 15:02:21,139 - root - INFO - lr: 1.6149e-05 gnorm: 1.12 [16:28:10< 8:03:08] +[titan] 2025-10-05 15:02:32,019 - root - INFO - step: 26870 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 15:02:32,019 - root - INFO - lr: 1.6141e-05 gnorm: 1.07 [16:28:21< 8:02:57] +[titan] 2025-10-05 15:02:42,942 - root - INFO - step: 26875 loss: 2.0517 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 15:02:42,942 - root - INFO - lr: 1.6134e-05 gnorm: 1.12 [16:28:32< 8:02:46] +[titan] 2025-10-05 15:02:53,877 - root - INFO - step: 26880 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 15:02:53,877 - root - INFO - lr: 1.6126e-05 gnorm: 1.13 [16:28:43< 8:02:35] +[titan] 2025-10-05 15:03:04,754 - root - INFO - step: 26885 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 15:03:04,754 - root - INFO - lr: 1.6118e-05 gnorm: 1.16 [16:28:54< 8:02:24] +[titan] 2025-10-05 15:03:15,633 - root - INFO - step: 26890 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:03:15,633 - root - INFO - lr: 1.6111e-05 gnorm: 1.11 [16:29:04< 8:02:13] +[titan] 2025-10-05 15:03:26,500 - root - INFO - step: 26895 loss: 2.0231 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 15:03:26,500 - root - INFO - lr: 1.6103e-05 gnorm: 1.12 [16:29:15< 8:02:02] +[titan] 2025-10-05 15:03:35,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:03:37,367 - root - INFO - step: 26900 loss: 2.0325 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 15:03:37,368 - root - INFO - lr: 1.6095e-05 gnorm: 1.11 [16:29:26< 8:01:50] +[titan] 2025-10-05 15:03:48,288 - root - INFO - step: 26905 loss: 2.0322 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7960 +[titan] 2025-10-05 15:03:48,289 - root - INFO - lr: 1.6088e-05 gnorm: 1.12 [16:29:37< 8:01:39] +[titan] 2025-10-05 15:03:59,203 - root - INFO - step: 26910 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 15:03:59,203 - root - INFO - lr: 1.6080e-05 gnorm: 1.17 [16:29:48< 8:01:28] +[titan] 2025-10-05 15:04:10,072 - root - INFO - step: 26915 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 15:04:10,072 - root - INFO - lr: 1.6072e-05 gnorm: 1.08 [16:29:59< 8:01:17] +[titan] 2025-10-05 15:04:20,948 - root - INFO - step: 26920 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8313 +[titan] 2025-10-05 15:04:20,948 - root - INFO - lr: 1.6065e-05 gnorm: 1.11 [16:30:10< 8:01:06] +[titan] 2025-10-05 15:04:31,818 - root - INFO - step: 26925 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:04:31,818 - root - INFO - lr: 1.6057e-05 gnorm: 1.10 [16:30:21< 8:00:55] +[titan] 2025-10-05 15:04:42,737 - root - INFO - step: 26930 loss: 1.9755 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 15:04:42,737 - root - INFO - lr: 1.6049e-05 gnorm: 1.07 [16:30:32< 8:00:44] +[titan] 2025-10-05 15:04:53,614 - root - INFO - step: 26935 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 15:04:53,614 - root - INFO - lr: 1.6041e-05 gnorm: 1.10 [16:30:42< 8:00:33] +[titan] 2025-10-05 15:05:04,493 - root - INFO - step: 26940 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 15:05:04,493 - root - INFO - lr: 1.6034e-05 gnorm: 1.16 [16:30:53< 8:00:22] +[titan] 2025-10-05 15:05:15,413 - root - INFO - step: 26945 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 15:05:15,413 - root - INFO - lr: 1.6026e-05 gnorm: 1.09 [16:31:04< 8:00:10] +[titan] 2025-10-05 15:05:24,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:05:26,301 - root - INFO - step: 26950 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:05:26,302 - root - INFO - lr: 1.6018e-05 gnorm: 1.13 [16:31:15< 7:59:59] +[titan] 2025-10-05 15:05:37,170 - root - INFO - step: 26955 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:05:37,170 - root - INFO - lr: 1.6011e-05 gnorm: 1.07 [16:31:26< 7:59:48] +[titan] 2025-10-05 15:05:48,097 - root - INFO - step: 26960 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 15:05:48,097 - root - INFO - lr: 1.6003e-05 gnorm: 1.11 [16:31:37< 7:59:37] +[titan] 2025-10-05 15:05:58,956 - root - INFO - step: 26965 loss: 2.0670 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 15:05:58,956 - root - INFO - lr: 1.5995e-05 gnorm: 1.13 [16:31:48< 7:59:26] +[titan] 2025-10-05 15:06:09,830 - root - INFO - step: 26970 loss: 1.9712 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 15:06:09,830 - root - INFO - lr: 1.5988e-05 gnorm: 1.09 [16:31:59< 7:59:15] +[titan] 2025-10-05 15:06:20,738 - root - INFO - step: 26975 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 15:06:20,738 - root - INFO - lr: 1.5980e-05 gnorm: 1.14 [16:32:10< 7:59:04] +[titan] 2025-10-05 15:06:31,607 - root - INFO - step: 26980 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 15:06:31,607 - root - INFO - lr: 1.5972e-05 gnorm: 1.13 [16:32:20< 7:58:53] +[titan] 2025-10-05 15:06:42,557 - root - INFO - step: 26985 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 29,926 tflops: 415.18 mfu: 41.98% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8708 +[titan] 2025-10-05 15:06:42,557 - root - INFO - lr: 1.5965e-05 gnorm: 1.13 [16:32:31< 7:58:42] +[titan] 2025-10-05 15:06:53,430 - root - INFO - step: 26990 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:06:53,431 - root - INFO - lr: 1.5957e-05 gnorm: 1.11 [16:32:42< 7:58:31] +[titan] 2025-10-05 15:07:04,312 - root - INFO - step: 26995 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 15:07:04,312 - root - INFO - lr: 1.5949e-05 gnorm: 1.11 [16:32:53< 7:58:19] +[titan] 2025-10-05 15:07:13,022 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:07:15,207 - root - INFO - step: 27000 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 15:07:15,207 - root - INFO - lr: 1.5942e-05 gnorm: 1.13 [16:33:04< 7:58:08] +[titan] 2025-10-05 15:07:26,138 - root - INFO - step: 27005 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:07:26,138 - root - INFO - lr: 1.5934e-05 gnorm: 1.14 [16:33:15< 7:57:57] +[titan] 2025-10-05 15:07:37,028 - root - INFO - step: 27010 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 15:07:37,028 - root - INFO - lr: 1.5926e-05 gnorm: 1.14 [16:33:26< 7:57:46] +[titan] 2025-10-05 15:07:47,970 - root - INFO - step: 27015 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 15:07:47,971 - root - INFO - lr: 1.5919e-05 gnorm: 1.12 [16:33:37< 7:57:35] +[titan] 2025-10-05 15:07:58,854 - root - INFO - step: 27020 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7853 +[titan] 2025-10-05 15:07:58,855 - root - INFO - lr: 1.5911e-05 gnorm: 1.15 [16:33:48< 7:57:24] +[titan] 2025-10-05 15:08:09,736 - root - INFO - step: 27025 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:08:09,736 - root - INFO - lr: 1.5903e-05 gnorm: 1.12 [16:33:59< 7:57:13] +[titan] 2025-10-05 15:08:20,606 - root - INFO - step: 27030 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 15:08:20,606 - root - INFO - lr: 1.5896e-05 gnorm: 1.08 [16:34:09< 7:57:02] +[titan] 2025-10-05 15:08:31,489 - root - INFO - step: 27035 loss: 1.9763 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 15:08:31,490 - root - INFO - lr: 1.5888e-05 gnorm: 1.09 [16:34:20< 7:56:51] +[titan] 2025-10-05 15:08:42,436 - root - INFO - step: 27040 loss: 2.0880 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 15:08:42,436 - root - INFO - lr: 1.5881e-05 gnorm: 1.12 [16:34:31< 7:56:40] +[titan] 2025-10-05 15:08:53,408 - root - INFO - step: 27045 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 15:08:53,409 - root - INFO - lr: 1.5873e-05 gnorm: 1.16 [16:34:42< 7:56:29] +[titan] 2025-10-05 15:09:02,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:09:04,301 - root - INFO - step: 27050 loss: 2.0295 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7940 +[titan] 2025-10-05 15:09:04,301 - root - INFO - lr: 1.5865e-05 gnorm: 1.11 [16:34:53< 7:56:17] +[titan] 2025-10-05 15:09:15,194 - root - INFO - step: 27055 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 15:09:15,194 - root - INFO - lr: 1.5858e-05 gnorm: 1.10 [16:35:04< 7:56:06] +[titan] 2025-10-05 15:09:26,100 - root - INFO - step: 27060 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8091 +[titan] 2025-10-05 15:09:26,100 - root - INFO - lr: 1.5850e-05 gnorm: 1.12 [16:35:15< 7:55:55] +[titan] 2025-10-05 15:09:36,976 - root - INFO - step: 27065 loss: 1.9733 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7439 +[titan] 2025-10-05 15:09:36,976 - root - INFO - lr: 1.5842e-05 gnorm: 1.09 [16:35:26< 7:55:44] +[titan] 2025-10-05 15:09:47,942 - root - INFO - step: 27070 loss: 2.0633 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 15:09:47,942 - root - INFO - lr: 1.5835e-05 gnorm: 1.09 [16:35:37< 7:55:33] +[titan] 2025-10-05 15:09:58,812 - root - INFO - step: 27075 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 15:09:58,812 - root - INFO - lr: 1.5827e-05 gnorm: 1.08 [16:35:48< 7:55:22] +[titan] 2025-10-05 15:10:09,685 - root - INFO - step: 27080 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 15:10:09,685 - root - INFO - lr: 1.5819e-05 gnorm: 1.09 [16:35:58< 7:55:11] +[titan] 2025-10-05 15:10:20,555 - root - INFO - step: 27085 loss: 2.0147 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 15:10:20,556 - root - INFO - lr: 1.5812e-05 gnorm: 1.07 [16:36:09< 7:55:00] +[titan] 2025-10-05 15:10:31,449 - root - INFO - step: 27090 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 15:10:31,449 - root - INFO - lr: 1.5804e-05 gnorm: 1.06 [16:36:20< 7:54:49] +[titan] 2025-10-05 15:10:42,317 - root - INFO - step: 27095 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:10:42,317 - root - INFO - lr: 1.5797e-05 gnorm: 1.08 [16:36:31< 7:54:37] +[titan] 2025-10-05 15:10:51,064 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:10:53,251 - root - INFO - step: 27100 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:10:53,252 - root - INFO - lr: 1.5789e-05 gnorm: 1.15 [16:36:42< 7:54:26] +[titan] 2025-10-05 15:11:04,174 - root - INFO - step: 27105 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 15:11:04,174 - root - INFO - lr: 1.5781e-05 gnorm: 1.12 [16:36:53< 7:54:15] +[titan] 2025-10-05 15:11:15,060 - root - INFO - step: 27110 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:11:15,060 - root - INFO - lr: 1.5774e-05 gnorm: 1.15 [16:37:04< 7:54:04] +[titan] 2025-10-05 15:11:25,971 - root - INFO - step: 27115 loss: 2.0649 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:11:25,971 - root - INFO - lr: 1.5766e-05 gnorm: 1.12 [16:37:15< 7:53:53] +[titan] 2025-10-05 15:11:36,858 - root - INFO - step: 27120 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 15:11:36,859 - root - INFO - lr: 1.5759e-05 gnorm: 1.09 [16:37:26< 7:53:42] +[titan] 2025-10-05 15:11:47,776 - root - INFO - step: 27125 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7663 +[titan] 2025-10-05 15:11:47,777 - root - INFO - lr: 1.5751e-05 gnorm: 1.08 [16:37:37< 7:53:31] +[titan] 2025-10-05 15:11:58,636 - root - INFO - step: 27130 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:11:58,636 - root - INFO - lr: 1.5743e-05 gnorm: 1.13 [16:37:47< 7:53:20] +[titan] 2025-10-05 15:12:09,626 - root - INFO - step: 27135 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 29,816 tflops: 413.65 mfu: 41.83% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 15:12:09,626 - root - INFO - lr: 1.5736e-05 gnorm: 1.11 [16:37:58< 7:53:09] +[titan] 2025-10-05 15:12:11,989 - root - INFO - Dumping profiler traces at step 27136 +[titan] 2025-10-05 15:12:12,028 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:12:20,793 - root - INFO - step: 27140 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 29,344 tflops: 407.10 mfu: 41.16% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 15:12:20,794 - root - INFO - lr: 1.5728e-05 gnorm: 1.11 [16:38:10< 7:52:58] +[titan] 2025-10-05 15:12:31,651 - root - INFO - step: 27145 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 15:12:31,651 - root - INFO - lr: 1.5720e-05 gnorm: 1.09 [16:38:20< 7:52:47] +[titan] 2025-10-05 15:12:40,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:12:42,534 - root - INFO - step: 27150 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7607 +[titan] 2025-10-05 15:12:42,534 - root - INFO - lr: 1.5713e-05 gnorm: 1.06 [16:38:31< 7:52:36] +[titan] 2025-10-05 15:12:53,435 - root - INFO - step: 27155 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:12:53,435 - root - INFO - lr: 1.5705e-05 gnorm: 1.08 [16:38:42< 7:52:24] +[titan] 2025-10-05 15:13:04,284 - root - INFO - step: 27160 loss: 2.0466 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8071 +[titan] 2025-10-05 15:13:04,284 - root - INFO - lr: 1.5698e-05 gnorm: 1.11 [16:38:53< 7:52:13] +[titan] 2025-10-05 15:13:15,182 - root - INFO - step: 27165 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 15:13:15,182 - root - INFO - lr: 1.5690e-05 gnorm: 1.08 [16:39:04< 7:52:02] +[titan] 2025-10-05 15:13:26,046 - root - INFO - step: 27170 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 15:13:26,046 - root - INFO - lr: 1.5682e-05 gnorm: 1.08 [16:39:15< 7:51:51] +[titan] 2025-10-05 15:13:36,900 - root - INFO - step: 27175 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7416 +[titan] 2025-10-05 15:13:36,900 - root - INFO - lr: 1.5675e-05 gnorm: 1.10 [16:39:26< 7:51:40] +[titan] 2025-10-05 15:13:47,796 - root - INFO - step: 27180 loss: 2.1244 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8768 +[titan] 2025-10-05 15:13:47,796 - root - INFO - lr: 1.5667e-05 gnorm: 1.09 [16:39:37< 7:51:29] +[titan] 2025-10-05 15:13:58,664 - root - INFO - step: 27185 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:13:58,664 - root - INFO - lr: 1.5660e-05 gnorm: 1.10 [16:39:47< 7:51:18] +[titan] 2025-10-05 15:14:09,527 - root - INFO - step: 27190 loss: 2.0164 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:14:09,527 - root - INFO - lr: 1.5652e-05 gnorm: 1.09 [16:39:58< 7:51:07] +[titan] 2025-10-05 15:14:20,387 - root - INFO - step: 27195 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:14:20,387 - root - INFO - lr: 1.5645e-05 gnorm: 1.07 [16:40:09< 7:50:56] +[titan] 2025-10-05 15:14:29,129 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:14:31,314 - root - INFO - step: 27200 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 15:14:31,314 - root - INFO - lr: 1.5637e-05 gnorm: 1.10 [16:40:20< 7:50:44] +[titan] 2025-10-05 15:14:42,185 - root - INFO - step: 27205 loss: 2.0377 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:14:42,186 - root - INFO - lr: 1.5629e-05 gnorm: 1.08 [16:40:31< 7:50:33] +[titan] 2025-10-05 15:14:53,065 - root - INFO - step: 27210 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 15:14:53,066 - root - INFO - lr: 1.5622e-05 gnorm: 1.12 [16:40:42< 7:50:22] +[titan] 2025-10-05 15:15:03,941 - root - INFO - step: 27215 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 15:15:03,941 - root - INFO - lr: 1.5614e-05 gnorm: 1.11 [16:40:53< 7:50:11] +[titan] 2025-10-05 15:15:14,801 - root - INFO - step: 27220 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 15:15:14,801 - root - INFO - lr: 1.5607e-05 gnorm: 1.10 [16:41:04< 7:50:00] +[titan] 2025-10-05 15:15:25,653 - root - INFO - step: 27225 loss: 1.9878 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:15:25,653 - root - INFO - lr: 1.5599e-05 gnorm: 1.09 [16:41:14< 7:49:49] +[titan] 2025-10-05 15:15:36,551 - root - INFO - step: 27230 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8280 +[titan] 2025-10-05 15:15:36,552 - root - INFO - lr: 1.5591e-05 gnorm: 1.13 [16:41:25< 7:49:38] +[titan] 2025-10-05 15:15:47,426 - root - INFO - step: 27235 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 15:15:47,426 - root - INFO - lr: 1.5584e-05 gnorm: 1.10 [16:41:36< 7:49:27] +[titan] 2025-10-05 15:15:58,353 - root - INFO - step: 27240 loss: 2.0437 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.07% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 15:15:58,354 - root - INFO - lr: 1.5576e-05 gnorm: 1.08 [16:41:47< 7:49:16] +[titan] 2025-10-05 15:16:09,201 - root - INFO - step: 27245 loss: 2.0207 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 15:16:09,202 - root - INFO - lr: 1.5569e-05 gnorm: 1.11 [16:41:58< 7:49:05] +[titan] 2025-10-05 15:16:17,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:16:20,072 - root - INFO - step: 27250 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7443 +[titan] 2025-10-05 15:16:20,072 - root - INFO - lr: 1.5561e-05 gnorm: 1.08 [16:42:09< 7:48:53] +[titan] 2025-10-05 15:16:30,931 - root - INFO - step: 27255 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:16:30,931 - root - INFO - lr: 1.5554e-05 gnorm: 1.12 [16:42:20< 7:48:42] +[titan] 2025-10-05 15:16:41,802 - root - INFO - step: 27260 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 15:16:41,803 - root - INFO - lr: 1.5546e-05 gnorm: 1.15 [16:42:31< 7:48:31] +[titan] 2025-10-05 15:16:52,730 - root - INFO - step: 27265 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.07% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 15:16:52,730 - root - INFO - lr: 1.5539e-05 gnorm: 1.08 [16:42:42< 7:48:20] +[titan] 2025-10-05 15:17:03,595 - root - INFO - step: 27270 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 15:17:03,596 - root - INFO - lr: 1.5531e-05 gnorm: 1.08 [16:42:52< 7:48:09] +[titan] 2025-10-05 15:17:14,444 - root - INFO - step: 27275 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 15:17:14,444 - root - INFO - lr: 1.5523e-05 gnorm: 1.11 [16:43:03< 7:47:58] +[titan] 2025-10-05 15:17:25,317 - root - INFO - step: 27280 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 15:17:25,317 - root - INFO - lr: 1.5516e-05 gnorm: 1.11 [16:43:14< 7:47:47] +[titan] 2025-10-05 15:17:36,180 - root - INFO - step: 27285 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 15:17:36,180 - root - INFO - lr: 1.5508e-05 gnorm: 1.10 [16:43:25< 7:47:36] +[titan] 2025-10-05 15:17:47,037 - root - INFO - step: 27290 loss: 2.0421 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 15:17:47,037 - root - INFO - lr: 1.5501e-05 gnorm: 1.10 [16:43:36< 7:47:25] +[titan] 2025-10-05 15:17:57,971 - root - INFO - step: 27295 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:17:57,971 - root - INFO - lr: 1.5493e-05 gnorm: 1.10 [16:43:47< 7:47:13] +[titan] 2025-10-05 15:18:06,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:18:08,853 - root - INFO - step: 27300 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 15:18:08,853 - root - INFO - lr: 1.5486e-05 gnorm: 1.09 [16:43:58< 7:47:02] +[titan] 2025-10-05 15:18:19,735 - root - INFO - step: 27305 loss: 2.0092 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 15:18:19,735 - root - INFO - lr: 1.5478e-05 gnorm: 1.11 [16:44:09< 7:46:51] +[titan] 2025-10-05 15:18:30,612 - root - INFO - step: 27310 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 15:18:30,612 - root - INFO - lr: 1.5471e-05 gnorm: 1.10 [16:44:19< 7:46:40] +[titan] 2025-10-05 15:18:41,512 - root - INFO - step: 27315 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 15:18:41,512 - root - INFO - lr: 1.5463e-05 gnorm: 1.10 [16:44:30< 7:46:29] +[titan] 2025-10-05 15:18:52,399 - root - INFO - step: 27320 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 15:18:52,399 - root - INFO - lr: 1.5455e-05 gnorm: 1.12 [16:44:41< 7:46:18] +[titan] 2025-10-05 15:19:03,317 - root - INFO - step: 27325 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 15:19:03,317 - root - INFO - lr: 1.5448e-05 gnorm: 1.13 [16:44:52< 7:46:07] +[titan] 2025-10-05 15:19:14,195 - root - INFO - step: 27330 loss: 2.1168 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 15:19:14,195 - root - INFO - lr: 1.5440e-05 gnorm: 1.12 [16:45:03< 7:45:56] +[titan] 2025-10-05 15:19:25,053 - root - INFO - step: 27335 loss: 2.0622 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8236 +[titan] 2025-10-05 15:19:25,053 - root - INFO - lr: 1.5433e-05 gnorm: 1.14 [16:45:14< 7:45:45] +[titan] 2025-10-05 15:19:35,914 - root - INFO - step: 27340 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:19:35,914 - root - INFO - lr: 1.5425e-05 gnorm: 1.09 [16:45:25< 7:45:34] +[titan] 2025-10-05 15:19:46,795 - root - INFO - step: 27345 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8129 +[titan] 2025-10-05 15:19:46,796 - root - INFO - lr: 1.5418e-05 gnorm: 1.14 [16:45:36< 7:45:22] +[titan] 2025-10-05 15:19:55,519 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:19:57,711 - root - INFO - step: 27350 loss: 2.0646 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:19:57,711 - root - INFO - lr: 1.5410e-05 gnorm: 1.12 [16:45:46< 7:45:11] +[titan] 2025-10-05 15:20:08,574 - root - INFO - step: 27355 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:20:08,574 - root - INFO - lr: 1.5403e-05 gnorm: 1.10 [16:45:57< 7:45:00] +[titan] 2025-10-05 15:20:19,517 - root - INFO - step: 27360 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8070 +[titan] 2025-10-05 15:20:19,518 - root - INFO - lr: 1.5395e-05 gnorm: 1.10 [16:46:08< 7:44:49] +[titan] 2025-10-05 15:20:30,400 - root - INFO - step: 27365 loss: 2.0266 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 15:20:30,401 - root - INFO - lr: 1.5388e-05 gnorm: 1.10 [16:46:19< 7:44:38] +[titan] 2025-10-05 15:20:41,257 - root - INFO - step: 27370 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 15:20:41,257 - root - INFO - lr: 1.5380e-05 gnorm: 1.09 [16:46:30< 7:44:27] +[titan] 2025-10-05 15:20:52,147 - root - INFO - step: 27375 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8366 +[titan] 2025-10-05 15:20:52,147 - root - INFO - lr: 1.5373e-05 gnorm: 1.09 [16:46:41< 7:44:16] +[titan] 2025-10-05 15:21:03,049 - root - INFO - step: 27380 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 15:21:03,049 - root - INFO - lr: 1.5365e-05 gnorm: 1.10 [16:46:52< 7:44:05] +[titan] 2025-10-05 15:21:13,927 - root - INFO - step: 27385 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 15:21:13,927 - root - INFO - lr: 1.5358e-05 gnorm: 1.10 [16:47:03< 7:43:54] +[titan] 2025-10-05 15:21:24,850 - root - INFO - step: 27390 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:21:24,850 - root - INFO - lr: 1.5350e-05 gnorm: 1.13 [16:47:14< 7:43:43] +[titan] 2025-10-05 15:21:35,739 - root - INFO - step: 27395 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 15:21:35,740 - root - INFO - lr: 1.5343e-05 gnorm: 1.13 [16:47:25< 7:43:31] +[titan] 2025-10-05 15:21:44,436 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:21:46,630 - root - INFO - step: 27400 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:21:46,630 - root - INFO - lr: 1.5335e-05 gnorm: 1.08 [16:47:35< 7:43:20] +[titan] 2025-10-05 15:21:57,526 - root - INFO - step: 27405 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7972 +[titan] 2025-10-05 15:21:57,526 - root - INFO - lr: 1.5328e-05 gnorm: 1.12 [16:47:46< 7:43:09] +[titan] 2025-10-05 15:22:08,415 - root - INFO - step: 27410 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7583 +[titan] 2025-10-05 15:22:08,415 - root - INFO - lr: 1.5320e-05 gnorm: 1.07 [16:47:57< 7:42:58] +[titan] 2025-10-05 15:22:19,304 - root - INFO - step: 27415 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 15:22:19,305 - root - INFO - lr: 1.5313e-05 gnorm: 1.12 [16:48:08< 7:42:47] +[titan] 2025-10-05 15:22:30,195 - root - INFO - step: 27420 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8052 +[titan] 2025-10-05 15:22:30,196 - root - INFO - lr: 1.5305e-05 gnorm: 1.11 [16:48:19< 7:42:36] +[titan] 2025-10-05 15:22:41,161 - root - INFO - step: 27425 loss: 2.0339 memory: 118.84GiB(85.28%) tps: 29,884 tflops: 414.60 mfu: 41.92% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 15:22:41,162 - root - INFO - lr: 1.5298e-05 gnorm: 1.12 [16:48:30< 7:42:25] +[titan] 2025-10-05 15:22:52,043 - root - INFO - step: 27430 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 15:22:52,043 - root - INFO - lr: 1.5290e-05 gnorm: 1.10 [16:48:41< 7:42:14] +[titan] 2025-10-05 15:23:02,939 - root - INFO - step: 27435 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 15:23:02,939 - root - INFO - lr: 1.5283e-05 gnorm: 1.12 [16:48:52< 7:42:03] +[titan] 2025-10-05 15:23:13,837 - root - INFO - step: 27440 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 15:23:13,837 - root - INFO - lr: 1.5275e-05 gnorm: 1.12 [16:49:03< 7:41:52] +[titan] 2025-10-05 15:23:24,732 - root - INFO - step: 27445 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 15:23:24,733 - root - INFO - lr: 1.5268e-05 gnorm: 1.10 [16:49:14< 7:41:41] +[titan] 2025-10-05 15:23:33,429 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:23:35,615 - root - INFO - step: 27450 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8394 +[titan] 2025-10-05 15:23:35,615 - root - INFO - lr: 1.5260e-05 gnorm: 1.12 [16:49:24< 7:41:29] +[titan] 2025-10-05 15:23:46,565 - root - INFO - step: 27455 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.19 mfu: 41.98% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 15:23:46,565 - root - INFO - lr: 1.5253e-05 gnorm: 1.14 [16:49:35< 7:41:18] +[titan] 2025-10-05 15:23:57,475 - root - INFO - step: 27460 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 15:23:57,475 - root - INFO - lr: 1.5245e-05 gnorm: 1.11 [16:49:46< 7:41:07] +[titan] 2025-10-05 15:24:08,327 - root - INFO - step: 27465 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 15:24:08,327 - root - INFO - lr: 1.5238e-05 gnorm: 1.11 [16:49:57< 7:40:56] +[titan] 2025-10-05 15:24:19,201 - root - INFO - step: 27470 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:24:19,201 - root - INFO - lr: 1.5230e-05 gnorm: 1.08 [16:50:08< 7:40:45] +[titan] 2025-10-05 15:24:30,080 - root - INFO - step: 27475 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:24:30,080 - root - INFO - lr: 1.5223e-05 gnorm: 1.09 [16:50:19< 7:40:34] +[titan] 2025-10-05 15:24:40,967 - root - INFO - step: 27480 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8213 +[titan] 2025-10-05 15:24:40,967 - root - INFO - lr: 1.5215e-05 gnorm: 1.10 [16:50:30< 7:40:23] +[titan] 2025-10-05 15:24:51,863 - root - INFO - step: 27485 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 15:24:51,863 - root - INFO - lr: 1.5208e-05 gnorm: 1.11 [16:50:41< 7:40:12] +[titan] 2025-10-05 15:25:02,777 - root - INFO - step: 27490 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8587 +[titan] 2025-10-05 15:25:02,778 - root - INFO - lr: 1.5200e-05 gnorm: 1.16 [16:50:52< 7:40:01] +[titan] 2025-10-05 15:25:13,681 - root - INFO - step: 27495 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:25:13,681 - root - INFO - lr: 1.5193e-05 gnorm: 1.12 [16:51:02< 7:39:50] +[titan] 2025-10-05 15:25:22,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:25:24,569 - root - INFO - step: 27500 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 15:25:24,570 - root - INFO - lr: 1.5185e-05 gnorm: 1.09 [16:51:13< 7:39:39] +[titan] 2025-10-05 15:25:35,452 - root - INFO - step: 27505 loss: 2.0528 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:25:35,452 - root - INFO - lr: 1.5178e-05 gnorm: 1.14 [16:51:24< 7:39:27] +[titan] 2025-10-05 15:25:46,344 - root - INFO - step: 27510 loss: 2.2224 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 15:25:46,344 - root - INFO - lr: 1.5170e-05 gnorm: 7.42 [16:51:35< 7:39:16] +[titan] 2025-10-05 15:25:57,223 - root - INFO - step: 27515 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7946 +[titan] 2025-10-05 15:25:57,223 - root - INFO - lr: 1.5163e-05 gnorm: 1.13 [16:51:46< 7:39:05] +[titan] 2025-10-05 15:26:08,195 - root - INFO - step: 27520 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 15:26:08,195 - root - INFO - lr: 1.5155e-05 gnorm: 1.07 [16:51:57< 7:38:54] +[titan] 2025-10-05 15:26:19,076 - root - INFO - step: 27525 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 15:26:19,076 - root - INFO - lr: 1.5148e-05 gnorm: 1.13 [16:52:08< 7:38:43] +[titan] 2025-10-05 15:26:29,960 - root - INFO - step: 27530 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7786 +[titan] 2025-10-05 15:26:29,960 - root - INFO - lr: 1.5141e-05 gnorm: 1.17 [16:52:19< 7:38:32] +[titan] 2025-10-05 15:26:40,847 - root - INFO - step: 27535 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 15:26:40,847 - root - INFO - lr: 1.5133e-05 gnorm: 1.09 [16:52:30< 7:38:21] +[titan] 2025-10-05 15:26:51,718 - root - INFO - step: 27540 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 15:26:51,718 - root - INFO - lr: 1.5126e-05 gnorm: 1.13 [16:52:40< 7:38:10] +[titan] 2025-10-05 15:27:02,650 - root - INFO - step: 27545 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 15:27:02,650 - root - INFO - lr: 1.5118e-05 gnorm: 1.12 [16:52:51< 7:37:59] +[titan] 2025-10-05 15:27:11,363 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:27:13,551 - root - INFO - step: 27550 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 15:27:13,551 - root - INFO - lr: 1.5111e-05 gnorm: 1.14 [16:53:02< 7:37:48] +[titan] 2025-10-05 15:27:24,430 - root - INFO - step: 27555 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:27:24,430 - root - INFO - lr: 1.5103e-05 gnorm: 1.10 [16:53:13< 7:37:36] +[titan] 2025-10-05 15:27:35,290 - root - INFO - step: 27560 loss: 2.0098 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7771 +[titan] 2025-10-05 15:27:35,290 - root - INFO - lr: 1.5096e-05 gnorm: 1.11 [16:53:24< 7:37:25] +[titan] 2025-10-05 15:27:46,162 - root - INFO - step: 27565 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 15:27:46,162 - root - INFO - lr: 1.5088e-05 gnorm: 1.14 [16:53:35< 7:37:14] +[titan] 2025-10-05 15:27:57,049 - root - INFO - step: 27570 loss: 2.0327 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7970 +[titan] 2025-10-05 15:27:57,049 - root - INFO - lr: 1.5081e-05 gnorm: 1.09 [16:53:46< 7:37:03] +[titan] 2025-10-05 15:28:07,972 - root - INFO - step: 27575 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8349 +[titan] 2025-10-05 15:28:07,972 - root - INFO - lr: 1.5074e-05 gnorm: 1.15 [16:53:57< 7:36:52] +[titan] 2025-10-05 15:28:18,854 - root - INFO - step: 27580 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 15:28:18,855 - root - INFO - lr: 1.5066e-05 gnorm: 1.16 [16:54:08< 7:36:41] +[titan] 2025-10-05 15:28:29,772 - root - INFO - step: 27585 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8547 +[titan] 2025-10-05 15:28:29,772 - root - INFO - lr: 1.5059e-05 gnorm: 1.13 [16:54:19< 7:36:30] +[titan] 2025-10-05 15:28:40,678 - root - INFO - step: 27590 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 15:28:40,678 - root - INFO - lr: 1.5051e-05 gnorm: 1.09 [16:54:29< 7:36:19] +[titan] 2025-10-05 15:28:51,534 - root - INFO - step: 27595 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8331 +[titan] 2025-10-05 15:28:51,534 - root - INFO - lr: 1.5044e-05 gnorm: 1.10 [16:54:40< 7:36:08] +[titan] 2025-10-05 15:29:00,205 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:29:02,428 - root - INFO - step: 27600 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7298 +[titan] 2025-10-05 15:29:02,428 - root - INFO - lr: 1.5036e-05 gnorm: 1.12 [16:54:51< 7:35:57] +[titan] 2025-10-05 15:29:13,305 - root - INFO - step: 27605 loss: 2.0663 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:29:13,305 - root - INFO - lr: 1.5029e-05 gnorm: 1.09 [16:55:02< 7:35:46] +[titan] 2025-10-05 15:29:24,170 - root - INFO - step: 27610 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 15:29:24,170 - root - INFO - lr: 1.5021e-05 gnorm: 1.06 [16:55:13< 7:35:34] +[titan] 2025-10-05 15:29:35,069 - root - INFO - step: 27615 loss: 1.9817 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 15:29:35,070 - root - INFO - lr: 1.5014e-05 gnorm: 1.13 [16:55:24< 7:35:23] +[titan] 2025-10-05 15:29:45,908 - root - INFO - step: 27620 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 15:29:45,908 - root - INFO - lr: 1.5007e-05 gnorm: 1.14 [16:55:35< 7:35:12] +[titan] 2025-10-05 15:29:56,752 - root - INFO - step: 27625 loss: 2.0304 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7949 +[titan] 2025-10-05 15:29:56,752 - root - INFO - lr: 1.4999e-05 gnorm: 1.14 [16:55:46< 7:35:01] +[titan] 2025-10-05 15:30:07,651 - root - INFO - step: 27630 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 15:30:07,652 - root - INFO - lr: 1.4992e-05 gnorm: 1.10 [16:55:56< 7:34:50] +[titan] 2025-10-05 15:30:18,514 - root - INFO - step: 27635 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8163 +[titan] 2025-10-05 15:30:18,514 - root - INFO - lr: 1.4984e-05 gnorm: 1.11 [16:56:07< 7:34:39] +[titan] 2025-10-05 15:30:29,382 - root - INFO - step: 27640 loss: 2.0889 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 15:30:29,382 - root - INFO - lr: 1.4977e-05 gnorm: 1.09 [16:56:18< 7:34:28] +[titan] 2025-10-05 15:30:40,353 - root - INFO - step: 27645 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7813 +[titan] 2025-10-05 15:30:40,354 - root - INFO - lr: 1.4970e-05 gnorm: 1.12 [16:56:29< 7:34:17] +[titan] 2025-10-05 15:30:47,057 - root - INFO - Dumping profiler traces at step 27648 +[titan] 2025-10-05 15:30:47,095 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:30:49,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:30:51,455 - root - INFO - step: 27650 loss: 2.0385 memory: 118.84GiB(85.28%) tps: 29,516 tflops: 409.49 mfu: 41.40% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:30:51,456 - root - INFO - lr: 1.4962e-05 gnorm: 1.14 [16:56:40< 7:34:06] +[titan] 2025-10-05 15:31:02,322 - root - INFO - step: 27655 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 15:31:02,323 - root - INFO - lr: 1.4955e-05 gnorm: 1.07 [16:56:51< 7:33:55] +[titan] 2025-10-05 15:31:13,179 - root - INFO - step: 27660 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:31:13,179 - root - INFO - lr: 1.4947e-05 gnorm: 1.10 [16:57:02< 7:33:44] +[titan] 2025-10-05 15:31:24,033 - root - INFO - step: 27665 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 15:31:24,033 - root - INFO - lr: 1.4940e-05 gnorm: 1.09 [16:57:13< 7:33:32] +[titan] 2025-10-05 15:31:34,896 - root - INFO - step: 27670 loss: 2.0761 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 15:31:34,896 - root - INFO - lr: 1.4933e-05 gnorm: 1.11 [16:57:24< 7:33:21] +[titan] 2025-10-05 15:31:45,721 - root - INFO - step: 27675 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 15:31:45,721 - root - INFO - lr: 1.4925e-05 gnorm: 1.11 [16:57:34< 7:33:10] +[titan] 2025-10-05 15:31:56,620 - root - INFO - step: 27680 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7791 +[titan] 2025-10-05 15:31:56,620 - root - INFO - lr: 1.4918e-05 gnorm: 1.10 [16:57:45< 7:32:59] +[titan] 2025-10-05 15:32:07,525 - root - INFO - step: 27685 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 15:32:07,525 - root - INFO - lr: 1.4910e-05 gnorm: 1.10 [16:57:56< 7:32:48] +[titan] 2025-10-05 15:32:18,367 - root - INFO - step: 27690 loss: 2.0707 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 15:32:18,367 - root - INFO - lr: 1.4903e-05 gnorm: 1.09 [16:58:07< 7:32:37] +[titan] 2025-10-05 15:32:29,238 - root - INFO - step: 27695 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 15:32:29,238 - root - INFO - lr: 1.4896e-05 gnorm: 1.13 [16:58:18< 7:32:26] +[titan] 2025-10-05 15:32:37,923 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:32:40,104 - root - INFO - step: 27700 loss: 2.0988 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 15:32:40,104 - root - INFO - lr: 1.4888e-05 gnorm: 1.12 [16:58:29< 7:32:15] +[titan] 2025-10-05 15:32:50,965 - root - INFO - step: 27705 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 15:32:50,965 - root - INFO - lr: 1.4881e-05 gnorm: 1.13 [16:58:40< 7:32:04] +[titan] 2025-10-05 15:33:01,872 - root - INFO - step: 27710 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 15:33:01,872 - root - INFO - lr: 1.4873e-05 gnorm: 1.13 [16:58:51< 7:31:53] +[titan] 2025-10-05 15:33:12,825 - root - INFO - step: 27715 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8034 +[titan] 2025-10-05 15:33:12,825 - root - INFO - lr: 1.4866e-05 gnorm: 1.12 [16:59:02< 7:31:41] +[titan] 2025-10-05 15:33:23,696 - root - INFO - step: 27720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 15:33:23,696 - root - INFO - lr: 1.4859e-05 gnorm: 1.13 [16:59:12< 7:31:30] +[titan] 2025-10-05 15:33:34,571 - root - INFO - step: 27725 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:33:34,571 - root - INFO - lr: 1.4851e-05 gnorm: 1.13 [16:59:23< 7:31:19] +[titan] 2025-10-05 15:33:45,446 - root - INFO - step: 27730 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 15:33:45,446 - root - INFO - lr: 1.4844e-05 gnorm: 1.07 [16:59:34< 7:31:08] +[titan] 2025-10-05 15:33:56,288 - root - INFO - step: 27735 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7851 +[titan] 2025-10-05 15:33:56,288 - root - INFO - lr: 1.4836e-05 gnorm: 1.13 [16:59:45< 7:30:57] +[titan] 2025-10-05 15:34:07,199 - root - INFO - step: 27740 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 15:34:07,199 - root - INFO - lr: 1.4829e-05 gnorm: 1.11 [16:59:56< 7:30:46] +[titan] 2025-10-05 15:34:18,088 - root - INFO - step: 27745 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:34:18,088 - root - INFO - lr: 1.4822e-05 gnorm: 1.14 [17:00:07< 7:30:35] +[titan] 2025-10-05 15:34:26,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:34:28,919 - root - INFO - step: 27750 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7660 +[titan] 2025-10-05 15:34:28,919 - root - INFO - lr: 1.4814e-05 gnorm: 1.10 [17:00:18< 7:30:24] +[titan] 2025-10-05 15:34:39,788 - root - INFO - step: 27755 loss: 2.0085 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:34:39,788 - root - INFO - lr: 1.4807e-05 gnorm: 1.13 [17:00:29< 7:30:13] +[titan] 2025-10-05 15:34:50,649 - root - INFO - step: 27760 loss: 2.0229 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7889 +[titan] 2025-10-05 15:34:50,649 - root - INFO - lr: 1.4800e-05 gnorm: 1.08 [17:00:39< 7:30:02] +[titan] 2025-10-05 15:35:01,518 - root - INFO - step: 27765 loss: 2.0372 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:35:01,518 - root - INFO - lr: 1.4792e-05 gnorm: 1.09 [17:00:50< 7:29:50] +[titan] 2025-10-05 15:35:12,435 - root - INFO - step: 27770 loss: 2.0491 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 15:35:12,435 - root - INFO - lr: 1.4785e-05 gnorm: 1.11 [17:01:01< 7:29:39] +[titan] 2025-10-05 15:35:23,343 - root - INFO - step: 27775 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:35:23,344 - root - INFO - lr: 1.4777e-05 gnorm: 1.10 [17:01:12< 7:29:28] +[titan] 2025-10-05 15:35:34,184 - root - INFO - step: 27780 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 15:35:34,184 - root - INFO - lr: 1.4770e-05 gnorm: 1.09 [17:01:23< 7:29:17] +[titan] 2025-10-05 15:35:45,050 - root - INFO - step: 27785 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 15:35:45,050 - root - INFO - lr: 1.4763e-05 gnorm: 1.08 [17:01:34< 7:29:06] +[titan] 2025-10-05 15:35:55,912 - root - INFO - step: 27790 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:35:55,912 - root - INFO - lr: 1.4755e-05 gnorm: 1.10 [17:01:45< 7:28:55] +[titan] 2025-10-05 15:36:06,779 - root - INFO - step: 27795 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7727 +[titan] 2025-10-05 15:36:06,780 - root - INFO - lr: 1.4748e-05 gnorm: 1.09 [17:01:56< 7:28:44] +[titan] 2025-10-05 15:36:15,503 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:36:17,685 - root - INFO - step: 27800 loss: 2.0545 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8152 +[titan] 2025-10-05 15:36:17,685 - root - INFO - lr: 1.4741e-05 gnorm: 1.11 [17:02:06< 7:28:33] +[titan] 2025-10-05 15:36:28,568 - root - INFO - step: 27805 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 15:36:28,568 - root - INFO - lr: 1.4733e-05 gnorm: 1.09 [17:02:17< 7:28:22] +[titan] 2025-10-05 15:36:39,435 - root - INFO - step: 27810 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 15:36:39,435 - root - INFO - lr: 1.4726e-05 gnorm: 1.12 [17:02:28< 7:28:11] +[titan] 2025-10-05 15:36:50,300 - root - INFO - step: 27815 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:36:50,300 - root - INFO - lr: 1.4719e-05 gnorm: 1.08 [17:02:39< 7:27:59] +[titan] 2025-10-05 15:37:01,162 - root - INFO - step: 27820 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 15:37:01,162 - root - INFO - lr: 1.4711e-05 gnorm: 1.10 [17:02:50< 7:27:48] +[titan] 2025-10-05 15:37:12,068 - root - INFO - step: 27825 loss: 2.0443 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8065 +[titan] 2025-10-05 15:37:12,068 - root - INFO - lr: 1.4704e-05 gnorm: 1.12 [17:03:01< 7:27:37] +[titan] 2025-10-05 15:37:22,924 - root - INFO - step: 27830 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:37:22,924 - root - INFO - lr: 1.4697e-05 gnorm: 1.12 [17:03:12< 7:27:26] +[titan] 2025-10-05 15:37:33,784 - root - INFO - step: 27835 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 15:37:33,784 - root - INFO - lr: 1.4689e-05 gnorm: 1.09 [17:03:23< 7:27:15] +[titan] 2025-10-05 15:37:44,693 - root - INFO - step: 27840 loss: 1.9926 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 15:37:44,694 - root - INFO - lr: 1.4682e-05 gnorm: 1.08 [17:03:33< 7:27:04] +[titan] 2025-10-05 15:37:55,562 - root - INFO - step: 27845 loss: 1.9782 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 15:37:55,562 - root - INFO - lr: 1.4675e-05 gnorm: 1.11 [17:03:44< 7:26:53] +[titan] 2025-10-05 15:38:04,219 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:38:06,403 - root - INFO - step: 27850 loss: 1.9362 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 15:38:06,403 - root - INFO - lr: 1.4667e-05 gnorm: 1.10 [17:03:55< 7:26:42] +[titan] 2025-10-05 15:38:17,311 - root - INFO - step: 27855 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 15:38:17,311 - root - INFO - lr: 1.4660e-05 gnorm: 1.11 [17:04:06< 7:26:31] +[titan] 2025-10-05 15:38:28,219 - root - INFO - step: 27860 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 15:38:28,219 - root - INFO - lr: 1.4653e-05 gnorm: 1.12 [17:04:17< 7:26:20] +[titan] 2025-10-05 15:38:39,092 - root - INFO - step: 27865 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 15:38:39,092 - root - INFO - lr: 1.4645e-05 gnorm: 1.10 [17:04:28< 7:26:08] +[titan] 2025-10-05 15:38:49,999 - root - INFO - step: 27870 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 15:38:49,999 - root - INFO - lr: 1.4638e-05 gnorm: 1.15 [17:04:39< 7:25:57] +[titan] 2025-10-05 15:39:00,879 - root - INFO - step: 27875 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:39:00,879 - root - INFO - lr: 1.4631e-05 gnorm: 1.10 [17:04:50< 7:25:46] +[titan] 2025-10-05 15:39:11,796 - root - INFO - step: 27880 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 15:39:11,796 - root - INFO - lr: 1.4623e-05 gnorm: 1.09 [17:05:01< 7:25:35] +[titan] 2025-10-05 15:39:22,647 - root - INFO - step: 27885 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8326 +[titan] 2025-10-05 15:39:22,647 - root - INFO - lr: 1.4616e-05 gnorm: 1.15 [17:05:11< 7:25:24] +[titan] 2025-10-05 15:39:33,473 - root - INFO - step: 27890 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 15:39:33,473 - root - INFO - lr: 1.4609e-05 gnorm: 1.11 [17:05:22< 7:25:13] +[titan] 2025-10-05 15:39:44,338 - root - INFO - step: 27895 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 15:39:44,338 - root - INFO - lr: 1.4601e-05 gnorm: 1.12 [17:05:33< 7:25:02] +[titan] 2025-10-05 15:39:53,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:39:55,213 - root - INFO - step: 27900 loss: 2.0345 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 15:39:55,213 - root - INFO - lr: 1.4594e-05 gnorm: 1.12 [17:05:44< 7:24:51] +[titan] 2025-10-05 15:40:06,088 - root - INFO - step: 27905 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:40:06,088 - root - INFO - lr: 1.4587e-05 gnorm: 1.11 [17:05:55< 7:24:40] +[titan] 2025-10-05 15:40:16,993 - root - INFO - step: 27910 loss: 2.0536 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8146 +[titan] 2025-10-05 15:40:16,993 - root - INFO - lr: 1.4579e-05 gnorm: 1.11 [17:06:06< 7:24:29] +[titan] 2025-10-05 15:40:27,815 - root - INFO - step: 27915 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 15:40:27,815 - root - INFO - lr: 1.4572e-05 gnorm: 1.11 [17:06:17< 7:24:18] +[titan] 2025-10-05 15:40:38,643 - root - INFO - step: 27920 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8155 +[titan] 2025-10-05 15:40:38,643 - root - INFO - lr: 1.4565e-05 gnorm: 1.07 [17:06:27< 7:24:06] +[titan] 2025-10-05 15:40:49,515 - root - INFO - step: 27925 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7489 +[titan] 2025-10-05 15:40:49,515 - root - INFO - lr: 1.4558e-05 gnorm: 1.15 [17:06:38< 7:23:55] +[titan] 2025-10-05 15:41:00,387 - root - INFO - step: 27930 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 15:41:00,388 - root - INFO - lr: 1.4550e-05 gnorm: 1.08 [17:06:49< 7:23:44] +[titan] 2025-10-05 15:41:11,313 - root - INFO - step: 27935 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 15:41:11,313 - root - INFO - lr: 1.4543e-05 gnorm: 1.11 [17:07:00< 7:23:33] +[titan] 2025-10-05 15:41:22,241 - root - INFO - step: 27940 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:41:22,241 - root - INFO - lr: 1.4536e-05 gnorm: 1.12 [17:07:11< 7:23:22] +[titan] 2025-10-05 15:41:33,099 - root - INFO - step: 27945 loss: 2.0587 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 15:41:33,100 - root - INFO - lr: 1.4528e-05 gnorm: 1.10 [17:07:22< 7:23:11] +[titan] 2025-10-05 15:41:41,784 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:41:43,967 - root - INFO - step: 27950 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:41:43,968 - root - INFO - lr: 1.4521e-05 gnorm: 1.11 [17:07:33< 7:23:00] +[titan] 2025-10-05 15:41:54,868 - root - INFO - step: 27955 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 15:41:54,868 - root - INFO - lr: 1.4514e-05 gnorm: 1.12 [17:07:44< 7:22:49] +[titan] 2025-10-05 15:42:05,736 - root - INFO - step: 27960 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:05,736 - root - INFO - lr: 1.4507e-05 gnorm: 1.10 [17:07:54< 7:22:38] +[titan] 2025-10-05 15:42:16,692 - root - INFO - step: 27965 loss: 1.9991 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:42:16,692 - root - INFO - lr: 1.4499e-05 gnorm: 1.12 [17:08:05< 7:22:27] +[titan] 2025-10-05 15:42:27,570 - root - INFO - step: 27970 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 15:42:27,570 - root - INFO - lr: 1.4492e-05 gnorm: 1.12 [17:08:16< 7:22:16] +[titan] 2025-10-05 15:42:38,440 - root - INFO - step: 27975 loss: 2.0135 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:38,440 - root - INFO - lr: 1.4485e-05 gnorm: 1.13 [17:08:27< 7:22:04] +[titan] 2025-10-05 15:42:49,330 - root - INFO - step: 27980 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 15:42:49,330 - root - INFO - lr: 1.4477e-05 gnorm: 1.12 [17:08:38< 7:21:53] +[titan] 2025-10-05 15:43:00,202 - root - INFO - step: 27985 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 15:43:00,202 - root - INFO - lr: 1.4470e-05 gnorm: 1.10 [17:08:49< 7:21:42] +[titan] 2025-10-05 15:43:11,083 - root - INFO - step: 27990 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:43:11,083 - root - INFO - lr: 1.4463e-05 gnorm: 1.11 [17:09:00< 7:21:31] +[titan] 2025-10-05 15:43:21,971 - root - INFO - step: 27995 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 15:43:21,972 - root - INFO - lr: 1.4456e-05 gnorm: 1.10 [17:09:11< 7:21:20] +[titan] 2025-10-05 15:43:30,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:43:32,877 - root - INFO - step: 28000 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 15:43:32,878 - root - INFO - lr: 1.4448e-05 gnorm: 1.08 [17:09:22< 7:21:09] +[titan] 2025-10-05 15:43:43,741 - root - INFO - step: 28005 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:43:43,741 - root - INFO - lr: 1.4441e-05 gnorm: 1.13 [17:09:32< 7:20:58] +[titan] 2025-10-05 15:43:54,582 - root - INFO - step: 28010 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:43:54,582 - root - INFO - lr: 1.4434e-05 gnorm: 1.11 [17:09:43< 7:20:47] +[titan] 2025-10-05 15:44:05,429 - root - INFO - step: 28015 loss: 2.0300 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 15:44:05,429 - root - INFO - lr: 1.4426e-05 gnorm: 1.11 [17:09:54< 7:20:36] +[titan] 2025-10-05 15:44:16,331 - root - INFO - step: 28020 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 15:44:16,331 - root - INFO - lr: 1.4419e-05 gnorm: 1.09 [17:10:05< 7:20:25] +[titan] 2025-10-05 15:44:27,186 - root - INFO - step: 28025 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 15:44:27,186 - root - INFO - lr: 1.4412e-05 gnorm: 1.08 [17:10:16< 7:20:13] +[titan] 2025-10-05 15:44:38,082 - root - INFO - step: 28030 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 15:44:38,082 - root - INFO - lr: 1.4405e-05 gnorm: 1.17 [17:10:27< 7:20:02] +[titan] 2025-10-05 15:44:48,943 - root - INFO - step: 28035 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 15:44:48,943 - root - INFO - lr: 1.4397e-05 gnorm: 1.12 [17:10:38< 7:19:51] +[titan] 2025-10-05 15:44:59,808 - root - INFO - step: 28040 loss: 2.0729 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8327 +[titan] 2025-10-05 15:44:59,808 - root - INFO - lr: 1.4390e-05 gnorm: 1.14 [17:10:49< 7:19:40] +[titan] 2025-10-05 15:45:10,680 - root - INFO - step: 28045 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 15:45:10,681 - root - INFO - lr: 1.4383e-05 gnorm: 1.11 [17:10:59< 7:19:29] +[titan] 2025-10-05 15:45:19,401 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:45:21,586 - root - INFO - step: 28050 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7741 +[titan] 2025-10-05 15:45:21,586 - root - INFO - lr: 1.4376e-05 gnorm: 1.06 [17:11:10< 7:19:18] +[titan] 2025-10-05 15:45:32,449 - root - INFO - step: 28055 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:45:32,449 - root - INFO - lr: 1.4368e-05 gnorm: 1.10 [17:11:21< 7:19:07] +[titan] 2025-10-05 15:45:43,299 - root - INFO - step: 28060 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:45:43,299 - root - INFO - lr: 1.4361e-05 gnorm: 1.14 [17:11:32< 7:18:56] +[titan] 2025-10-05 15:45:54,193 - root - INFO - step: 28065 loss: 2.0655 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 15:45:54,193 - root - INFO - lr: 1.4354e-05 gnorm: 1.10 [17:11:43< 7:18:45] +[titan] 2025-10-05 15:46:05,045 - root - INFO - step: 28070 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 15:46:05,045 - root - INFO - lr: 1.4347e-05 gnorm: 1.08 [17:11:54< 7:18:34] +[titan] 2025-10-05 15:46:15,889 - root - INFO - step: 28075 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 15:46:15,889 - root - INFO - lr: 1.4339e-05 gnorm: 1.09 [17:12:05< 7:18:23] +[titan] 2025-10-05 15:46:26,781 - root - INFO - step: 28080 loss: 1.9684 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 15:46:26,781 - root - INFO - lr: 1.4332e-05 gnorm: 1.10 [17:12:15< 7:18:11] +[titan] 2025-10-05 15:46:37,625 - root - INFO - step: 28085 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 15:46:37,625 - root - INFO - lr: 1.4325e-05 gnorm: 1.08 [17:12:26< 7:18:00] +[titan] 2025-10-05 15:46:48,485 - root - INFO - step: 28090 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:46:48,486 - root - INFO - lr: 1.4318e-05 gnorm: 1.09 [17:12:37< 7:17:49] +[titan] 2025-10-05 15:46:59,393 - root - INFO - step: 28095 loss: 1.9937 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 15:46:59,393 - root - INFO - lr: 1.4311e-05 gnorm: 1.12 [17:12:48< 7:17:38] +[titan] 2025-10-05 15:47:08,070 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:47:10,253 - root - INFO - step: 28100 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:47:10,253 - root - INFO - lr: 1.4303e-05 gnorm: 1.13 [17:12:59< 7:17:27] +[titan] 2025-10-05 15:47:21,183 - root - INFO - step: 28105 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 15:47:21,183 - root - INFO - lr: 1.4296e-05 gnorm: 1.14 [17:13:10< 7:17:16] +[titan] 2025-10-05 15:47:32,061 - root - INFO - step: 28110 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 15:47:32,061 - root - INFO - lr: 1.4289e-05 gnorm: 1.07 [17:13:21< 7:17:05] +[titan] 2025-10-05 15:47:42,930 - root - INFO - step: 28115 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:47:42,930 - root - INFO - lr: 1.4282e-05 gnorm: 1.11 [17:13:32< 7:16:54] +[titan] 2025-10-05 15:47:53,817 - root - INFO - step: 28120 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7942 +[titan] 2025-10-05 15:47:53,817 - root - INFO - lr: 1.4274e-05 gnorm: 1.09 [17:13:43< 7:16:43] +[titan] 2025-10-05 15:48:04,758 - root - INFO - step: 28125 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 15:48:04,758 - root - INFO - lr: 1.4267e-05 gnorm: 1.14 [17:13:53< 7:16:32] +[titan] 2025-10-05 15:48:15,645 - root - INFO - step: 28130 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 15:48:15,645 - root - INFO - lr: 1.4260e-05 gnorm: 1.09 [17:14:04< 7:16:21] +[titan] 2025-10-05 15:48:26,569 - root - INFO - step: 28135 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 15:48:26,569 - root - INFO - lr: 1.4253e-05 gnorm: 1.15 [17:14:15< 7:16:09] +[titan] 2025-10-05 15:48:37,430 - root - INFO - step: 28140 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 15:48:37,430 - root - INFO - lr: 1.4246e-05 gnorm: 1.13 [17:14:26< 7:15:58] +[titan] 2025-10-05 15:48:48,297 - root - INFO - step: 28145 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7912 +[titan] 2025-10-05 15:48:48,297 - root - INFO - lr: 1.4238e-05 gnorm: 1.13 [17:14:37< 7:15:47] +[titan] 2025-10-05 15:48:56,997 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:48:59,187 - root - INFO - step: 28150 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 15:48:59,187 - root - INFO - lr: 1.4231e-05 gnorm: 1.13 [17:14:48< 7:15:36] +[titan] 2025-10-05 15:49:10,034 - root - INFO - step: 28155 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 15:49:10,035 - root - INFO - lr: 1.4224e-05 gnorm: 1.11 [17:14:59< 7:15:25] +[titan] 2025-10-05 15:49:21,051 - root - INFO - step: 28160 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 29,745 tflops: 412.66 mfu: 41.73% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 15:49:21,051 - root - INFO - lr: 1.4217e-05 gnorm: 1.09 [17:15:10< 7:15:14] +[titan] 2025-10-05 15:49:21,237 - root - INFO - Dumping profiler traces at step 28160 +[titan] 2025-10-05 15:49:21,281 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:49:32,111 - root - INFO - step: 28165 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 29,630 tflops: 411.07 mfu: 41.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 15:49:32,111 - root - INFO - lr: 1.4210e-05 gnorm: 1.11 [17:15:21< 7:15:03] +[titan] 2025-10-05 15:49:42,959 - root - INFO - step: 28170 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:49:42,959 - root - INFO - lr: 1.4202e-05 gnorm: 1.08 [17:15:32< 7:14:52] +[titan] 2025-10-05 15:49:53,795 - root - INFO - step: 28175 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:49:53,795 - root - INFO - lr: 1.4195e-05 gnorm: 1.14 [17:15:43< 7:14:41] +[titan] 2025-10-05 15:50:04,659 - root - INFO - step: 28180 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7994 +[titan] 2025-10-05 15:50:04,659 - root - INFO - lr: 1.4188e-05 gnorm: 1.10 [17:15:53< 7:14:30] +[titan] 2025-10-05 15:50:15,529 - root - INFO - step: 28185 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 15:50:15,529 - root - INFO - lr: 1.4181e-05 gnorm: 1.09 [17:16:04< 7:14:19] +[titan] 2025-10-05 15:50:26,481 - root - INFO - step: 28190 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 15:50:26,482 - root - INFO - lr: 1.4174e-05 gnorm: 1.11 [17:16:15< 7:14:08] +[titan] 2025-10-05 15:50:37,355 - root - INFO - step: 28195 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 15:50:37,355 - root - INFO - lr: 1.4166e-05 gnorm: 1.06 [17:16:26< 7:13:56] +[titan] 2025-10-05 15:50:46,040 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:50:48,223 - root - INFO - step: 28200 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:50:48,223 - root - INFO - lr: 1.4159e-05 gnorm: 1.12 [17:16:37< 7:13:45] +[titan] 2025-10-05 15:50:59,085 - root - INFO - step: 28205 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 15:50:59,085 - root - INFO - lr: 1.4152e-05 gnorm: 1.12 [17:16:48< 7:13:34] +[titan] 2025-10-05 15:51:09,953 - root - INFO - step: 28210 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 15:51:09,953 - root - INFO - lr: 1.4145e-05 gnorm: 1.14 [17:16:59< 7:13:23] +[titan] 2025-10-05 15:51:20,822 - root - INFO - step: 28215 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:51:20,822 - root - INFO - lr: 1.4138e-05 gnorm: 1.10 [17:17:10< 7:13:12] +[titan] 2025-10-05 15:51:31,728 - root - INFO - step: 28220 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 15:51:31,728 - root - INFO - lr: 1.4130e-05 gnorm: 1.18 [17:17:20< 7:13:01] +[titan] 2025-10-05 15:51:42,649 - root - INFO - step: 28225 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 15:51:42,650 - root - INFO - lr: 1.4123e-05 gnorm: 1.10 [17:17:31< 7:12:50] +[titan] 2025-10-05 15:51:53,522 - root - INFO - step: 28230 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 15:51:53,522 - root - INFO - lr: 1.4116e-05 gnorm: 1.09 [17:17:42< 7:12:39] +[titan] 2025-10-05 15:52:04,406 - root - INFO - step: 28235 loss: 2.0389 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:52:04,406 - root - INFO - lr: 1.4109e-05 gnorm: 1.11 [17:17:53< 7:12:28] +[titan] 2025-10-05 15:52:15,280 - root - INFO - step: 28240 loss: 1.9948 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 15:52:15,281 - root - INFO - lr: 1.4102e-05 gnorm: 1.11 [17:18:04< 7:12:17] +[titan] 2025-10-05 15:52:26,225 - root - INFO - step: 28245 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 15:52:26,225 - root - INFO - lr: 1.4095e-05 gnorm: 1.12 [17:18:15< 7:12:06] +[titan] 2025-10-05 15:52:34,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:52:37,105 - root - INFO - step: 28250 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 15:52:37,105 - root - INFO - lr: 1.4087e-05 gnorm: 1.08 [17:18:26< 7:11:55] +[titan] 2025-10-05 15:52:48,023 - root - INFO - step: 28255 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 15:52:48,023 - root - INFO - lr: 1.4080e-05 gnorm: 1.10 [17:18:37< 7:11:43] +[titan] 2025-10-05 15:52:58,912 - root - INFO - step: 28260 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 15:52:58,913 - root - INFO - lr: 1.4073e-05 gnorm: 1.10 [17:18:48< 7:11:32] +[titan] 2025-10-05 15:53:09,803 - root - INFO - step: 28265 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 15:53:09,803 - root - INFO - lr: 1.4066e-05 gnorm: 1.11 [17:18:58< 7:11:21] +[titan] 2025-10-05 15:53:20,692 - root - INFO - step: 28270 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 15:53:20,692 - root - INFO - lr: 1.4059e-05 gnorm: 1.34 [17:19:09< 7:11:10] +[titan] 2025-10-05 15:53:31,636 - root - INFO - step: 28275 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 15:53:31,637 - root - INFO - lr: 1.4052e-05 gnorm: 1.11 [17:19:20< 7:10:59] +[titan] 2025-10-05 15:53:42,507 - root - INFO - step: 28280 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7702 +[titan] 2025-10-05 15:53:42,508 - root - INFO - lr: 1.4044e-05 gnorm: 1.09 [17:19:31< 7:10:48] +[titan] 2025-10-05 15:53:53,408 - root - INFO - step: 28285 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:53:53,408 - root - INFO - lr: 1.4037e-05 gnorm: 1.12 [17:19:42< 7:10:37] +[titan] 2025-10-05 15:54:04,269 - root - INFO - step: 28290 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:54:04,269 - root - INFO - lr: 1.4030e-05 gnorm: 1.11 [17:19:53< 7:10:26] +[titan] 2025-10-05 15:54:15,140 - root - INFO - step: 28295 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:54:15,140 - root - INFO - lr: 1.4023e-05 gnorm: 1.09 [17:20:04< 7:10:15] +[titan] 2025-10-05 15:54:23,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:54:26,111 - root - INFO - step: 28300 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7860 +[titan] 2025-10-05 15:54:26,111 - root - INFO - lr: 1.4016e-05 gnorm: 1.07 [17:20:15< 7:10:04] +[titan] 2025-10-05 15:54:36,983 - root - INFO - step: 28305 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 15:54:36,984 - root - INFO - lr: 1.4009e-05 gnorm: 1.12 [17:20:26< 7:09:53] +[titan] 2025-10-05 15:54:47,858 - root - INFO - step: 28310 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:54:47,858 - root - INFO - lr: 1.4002e-05 gnorm: 1.07 [17:20:37< 7:09:42] +[titan] 2025-10-05 15:54:58,731 - root - INFO - step: 28315 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 15:54:58,732 - root - INFO - lr: 1.3994e-05 gnorm: 1.09 [17:20:47< 7:09:30] +[titan] 2025-10-05 15:55:09,654 - root - INFO - step: 28320 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 15:55:09,655 - root - INFO - lr: 1.3987e-05 gnorm: 1.46 [17:20:58< 7:09:19] +[titan] 2025-10-05 15:55:20,530 - root - INFO - step: 28325 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 15:55:20,530 - root - INFO - lr: 1.3980e-05 gnorm: 1.12 [17:21:09< 7:09:08] +[titan] 2025-10-05 15:55:31,457 - root - INFO - step: 28330 loss: 1.9576 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7300 +[titan] 2025-10-05 15:55:31,457 - root - INFO - lr: 1.3973e-05 gnorm: 1.10 [17:21:20< 7:08:57] +[titan] 2025-10-05 15:55:42,330 - root - INFO - step: 28335 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 15:55:42,330 - root - INFO - lr: 1.3966e-05 gnorm: 1.12 [17:21:31< 7:08:46] +[titan] 2025-10-05 15:55:53,200 - root - INFO - step: 28340 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:55:53,200 - root - INFO - lr: 1.3959e-05 gnorm: 1.15 [17:21:42< 7:08:35] +[titan] 2025-10-05 15:56:04,083 - root - INFO - step: 28345 loss: 2.0214 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 15:56:04,083 - root - INFO - lr: 1.3952e-05 gnorm: 1.17 [17:21:53< 7:08:24] +[titan] 2025-10-05 15:56:12,818 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:56:15,006 - root - INFO - step: 28350 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 15:56:15,006 - root - INFO - lr: 1.3944e-05 gnorm: 1.14 [17:22:04< 7:08:13] +[titan] 2025-10-05 15:56:25,936 - root - INFO - step: 28355 loss: 1.9838 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 15:56:25,936 - root - INFO - lr: 1.3937e-05 gnorm: 1.10 [17:22:15< 7:08:02] +[titan] 2025-10-05 15:56:36,882 - root - INFO - step: 28360 loss: 2.0896 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 15:56:36,882 - root - INFO - lr: 1.3930e-05 gnorm: 1.15 [17:22:26< 7:07:51] +[titan] 2025-10-05 15:56:47,760 - root - INFO - step: 28365 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 15:56:47,760 - root - INFO - lr: 1.3923e-05 gnorm: 1.11 [17:22:36< 7:07:40] +[titan] 2025-10-05 15:56:58,635 - root - INFO - step: 28370 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 15:56:58,635 - root - INFO - lr: 1.3916e-05 gnorm: 1.09 [17:22:47< 7:07:29] +[titan] 2025-10-05 15:57:09,503 - root - INFO - step: 28375 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:57:09,503 - root - INFO - lr: 1.3909e-05 gnorm: 1.05 [17:22:58< 7:07:17] +[titan] 2025-10-05 15:57:20,365 - root - INFO - step: 28380 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7154 +[titan] 2025-10-05 15:57:20,365 - root - INFO - lr: 1.3902e-05 gnorm: 1.13 [17:23:09< 7:07:06] +[titan] 2025-10-05 15:57:31,331 - root - INFO - step: 28385 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 29,881 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:57:31,331 - root - INFO - lr: 1.3895e-05 gnorm: 1.12 [17:23:20< 7:06:55] +[titan] 2025-10-05 15:57:42,204 - root - INFO - step: 28390 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 15:57:42,205 - root - INFO - lr: 1.3888e-05 gnorm: 1.13 [17:23:31< 7:06:44] +[titan] 2025-10-05 15:57:53,067 - root - INFO - step: 28395 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:57:53,067 - root - INFO - lr: 1.3880e-05 gnorm: 1.09 [17:23:42< 7:06:33] +[titan] 2025-10-05 15:58:01,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:58:03,923 - root - INFO - step: 28400 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8317 +[titan] 2025-10-05 15:58:03,923 - root - INFO - lr: 1.3873e-05 gnorm: 1.11 [17:23:53< 7:06:22] +[titan] 2025-10-05 15:58:14,796 - root - INFO - step: 28405 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 15:58:14,796 - root - INFO - lr: 1.3866e-05 gnorm: 1.09 [17:24:03< 7:06:11] +[titan] 2025-10-05 15:58:25,653 - root - INFO - step: 28410 loss: 1.9984 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7669 +[titan] 2025-10-05 15:58:25,653 - root - INFO - lr: 1.3859e-05 gnorm: 1.10 [17:24:14< 7:06:00] +[titan] 2025-10-05 15:58:36,589 - root - INFO - step: 28415 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 15:58:36,589 - root - INFO - lr: 1.3852e-05 gnorm: 1.13 [17:24:25< 7:05:49] +[titan] 2025-10-05 15:58:47,471 - root - INFO - step: 28420 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7957 +[titan] 2025-10-05 15:58:47,471 - root - INFO - lr: 1.3845e-05 gnorm: 1.10 [17:24:36< 7:05:38] +[titan] 2025-10-05 15:58:58,364 - root - INFO - step: 28425 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 15:58:58,364 - root - INFO - lr: 1.3838e-05 gnorm: 1.14 [17:24:47< 7:05:27] +[titan] 2025-10-05 15:59:09,235 - root - INFO - step: 28430 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 15:59:09,235 - root - INFO - lr: 1.3831e-05 gnorm: 1.13 [17:24:58< 7:05:16] +[titan] 2025-10-05 15:59:20,106 - root - INFO - step: 28435 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 15:59:20,106 - root - INFO - lr: 1.3824e-05 gnorm: 1.13 [17:25:09< 7:05:04] +[titan] 2025-10-05 15:59:31,054 - root - INFO - step: 28440 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7996 +[titan] 2025-10-05 15:59:31,054 - root - INFO - lr: 1.3817e-05 gnorm: 1.10 [17:25:20< 7:04:53] +[titan] 2025-10-05 15:59:41,947 - root - INFO - step: 28445 loss: 2.0638 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 15:59:41,947 - root - INFO - lr: 1.3810e-05 gnorm: 1.16 [17:25:31< 7:04:42] +[titan] 2025-10-05 15:59:50,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:59:52,821 - root - INFO - step: 28450 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 15:59:52,821 - root - INFO - lr: 1.3802e-05 gnorm: 1.11 [17:25:42< 7:04:31] +[titan] 2025-10-05 16:00:03,693 - root - INFO - step: 28455 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 16:00:03,693 - root - INFO - lr: 1.3795e-05 gnorm: 1.09 [17:25:52< 7:04:20] +[titan] 2025-10-05 16:00:14,540 - root - INFO - step: 28460 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 16:00:14,540 - root - INFO - lr: 1.3788e-05 gnorm: 1.09 [17:26:03< 7:04:09] +[titan] 2025-10-05 16:00:25,402 - root - INFO - step: 28465 loss: 2.0314 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 16:00:25,402 - root - INFO - lr: 1.3781e-05 gnorm: 1.12 [17:26:14< 7:03:58] +[titan] 2025-10-05 16:00:36,344 - root - INFO - step: 28470 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:00:36,344 - root - INFO - lr: 1.3774e-05 gnorm: 1.11 [17:26:25< 7:03:47] +[titan] 2025-10-05 16:00:47,202 - root - INFO - step: 28475 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:00:47,202 - root - INFO - lr: 1.3767e-05 gnorm: 1.13 [17:26:36< 7:03:36] +[titan] 2025-10-05 16:00:58,120 - root - INFO - step: 28480 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 16:00:58,120 - root - INFO - lr: 1.3760e-05 gnorm: 1.14 [17:26:47< 7:03:25] +[titan] 2025-10-05 16:01:09,012 - root - INFO - step: 28485 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 16:01:09,012 - root - INFO - lr: 1.3753e-05 gnorm: 1.14 [17:26:58< 7:03:14] +[titan] 2025-10-05 16:01:19,903 - root - INFO - step: 28490 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 16:01:19,903 - root - INFO - lr: 1.3746e-05 gnorm: 1.12 [17:27:09< 7:03:03] +[titan] 2025-10-05 16:01:30,819 - root - INFO - step: 28495 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 16:01:30,819 - root - INFO - lr: 1.3739e-05 gnorm: 1.18 [17:27:19< 7:02:51] +[titan] 2025-10-05 16:01:39,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:01:41,746 - root - INFO - step: 28500 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:01:41,746 - root - INFO - lr: 1.3732e-05 gnorm: 1.13 [17:27:30< 7:02:40] +[titan] 2025-10-05 16:01:52,631 - root - INFO - step: 28505 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7896 +[titan] 2025-10-05 16:01:52,632 - root - INFO - lr: 1.3725e-05 gnorm: 1.13 [17:27:41< 7:02:29] +[titan] 2025-10-05 16:02:03,551 - root - INFO - step: 28510 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:02:03,551 - root - INFO - lr: 1.3718e-05 gnorm: 1.12 [17:27:52< 7:02:18] +[titan] 2025-10-05 16:02:14,435 - root - INFO - step: 28515 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 16:02:14,435 - root - INFO - lr: 1.3711e-05 gnorm: 1.16 [17:28:03< 7:02:07] +[titan] 2025-10-05 16:02:25,309 - root - INFO - step: 28520 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 16:02:25,310 - root - INFO - lr: 1.3704e-05 gnorm: 1.09 [17:28:14< 7:01:56] +[titan] 2025-10-05 16:02:36,209 - root - INFO - step: 28525 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 16:02:36,209 - root - INFO - lr: 1.3696e-05 gnorm: 1.09 [17:28:25< 7:01:45] +[titan] 2025-10-05 16:02:47,089 - root - INFO - step: 28530 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 16:02:47,089 - root - INFO - lr: 1.3689e-05 gnorm: 1.11 [17:28:36< 7:01:34] +[titan] 2025-10-05 16:02:57,982 - root - INFO - step: 28535 loss: 2.0168 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 16:02:57,982 - root - INFO - lr: 1.3682e-05 gnorm: 1.14 [17:28:47< 7:01:23] +[titan] 2025-10-05 16:03:08,840 - root - INFO - step: 28540 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 16:03:08,840 - root - INFO - lr: 1.3675e-05 gnorm: 1.13 [17:28:58< 7:01:12] +[titan] 2025-10-05 16:03:19,756 - root - INFO - step: 28545 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 16:03:19,756 - root - INFO - lr: 1.3668e-05 gnorm: 1.15 [17:29:08< 7:01:01] +[titan] 2025-10-05 16:03:28,457 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:03:30,650 - root - INFO - step: 28550 loss: 1.9538 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:03:30,650 - root - INFO - lr: 1.3661e-05 gnorm: 1.08 [17:29:19< 7:00:50] +[titan] 2025-10-05 16:03:41,553 - root - INFO - step: 28555 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 16:03:41,553 - root - INFO - lr: 1.3654e-05 gnorm: 1.11 [17:29:30< 7:00:39] +[titan] 2025-10-05 16:03:52,429 - root - INFO - step: 28560 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 16:03:52,429 - root - INFO - lr: 1.3647e-05 gnorm: 1.07 [17:29:41< 7:00:27] +[titan] 2025-10-05 16:04:03,288 - root - INFO - step: 28565 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 16:04:03,288 - root - INFO - lr: 1.3640e-05 gnorm: 1.10 [17:29:52< 7:00:16] +[titan] 2025-10-05 16:04:14,124 - root - INFO - step: 28570 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:04:14,124 - root - INFO - lr: 1.3633e-05 gnorm: 1.11 [17:30:03< 7:00:05] +[titan] 2025-10-05 16:04:25,006 - root - INFO - step: 28575 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 16:04:25,006 - root - INFO - lr: 1.3626e-05 gnorm: 1.95 [17:30:14< 6:59:54] +[titan] 2025-10-05 16:04:35,875 - root - INFO - step: 28580 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:04:35,875 - root - INFO - lr: 1.3619e-05 gnorm: 1.09 [17:30:25< 6:59:43] +[titan] 2025-10-05 16:04:46,735 - root - INFO - step: 28585 loss: 1.9918 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 16:04:46,735 - root - INFO - lr: 1.3612e-05 gnorm: 1.12 [17:30:35< 6:59:32] +[titan] 2025-10-05 16:04:57,585 - root - INFO - step: 28590 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 16:04:57,585 - root - INFO - lr: 1.3605e-05 gnorm: 1.06 [17:30:46< 6:59:21] +[titan] 2025-10-05 16:05:08,445 - root - INFO - step: 28595 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 16:05:08,445 - root - INFO - lr: 1.3598e-05 gnorm: 1.15 [17:30:57< 6:59:10] +[titan] 2025-10-05 16:05:17,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:05:19,314 - root - INFO - step: 28600 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:05:19,314 - root - INFO - lr: 1.3591e-05 gnorm: 1.10 [17:31:08< 6:58:59] +[titan] 2025-10-05 16:05:30,213 - root - INFO - step: 28605 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 16:05:30,213 - root - INFO - lr: 1.3584e-05 gnorm: 1.11 [17:31:19< 6:58:48] +[titan] 2025-10-05 16:05:41,137 - root - INFO - step: 28610 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 16:05:41,137 - root - INFO - lr: 1.3577e-05 gnorm: 1.11 [17:31:30< 6:58:37] +[titan] 2025-10-05 16:05:52,006 - root - INFO - step: 28615 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:05:52,007 - root - INFO - lr: 1.3570e-05 gnorm: 1.10 [17:31:41< 6:58:25] +[titan] 2025-10-05 16:06:02,858 - root - INFO - step: 28620 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 16:06:02,859 - root - INFO - lr: 1.3563e-05 gnorm: 1.12 [17:31:52< 6:58:14] +[titan] 2025-10-05 16:06:13,712 - root - INFO - step: 28625 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 16:06:13,712 - root - INFO - lr: 1.3556e-05 gnorm: 1.10 [17:32:02< 6:58:03] +[titan] 2025-10-05 16:06:24,582 - root - INFO - step: 28630 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 16:06:24,582 - root - INFO - lr: 1.3549e-05 gnorm: 1.11 [17:32:13< 6:57:52] +[titan] 2025-10-05 16:06:35,472 - root - INFO - step: 28635 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7192 +[titan] 2025-10-05 16:06:35,472 - root - INFO - lr: 1.3542e-05 gnorm: 1.12 [17:32:24< 6:57:41] +[titan] 2025-10-05 16:06:46,399 - root - INFO - step: 28640 loss: 2.0089 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7766 +[titan] 2025-10-05 16:06:46,399 - root - INFO - lr: 1.3535e-05 gnorm: 1.13 [17:32:35< 6:57:30] +[titan] 2025-10-05 16:06:57,266 - root - INFO - step: 28645 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 16:06:57,266 - root - INFO - lr: 1.3528e-05 gnorm: 1.16 [17:32:46< 6:57:19] +[titan] 2025-10-05 16:07:05,934 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:07:08,125 - root - INFO - step: 28650 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 16:07:08,125 - root - INFO - lr: 1.3521e-05 gnorm: 1.08 [17:32:57< 6:57:08] +[titan] 2025-10-05 16:07:18,989 - root - INFO - step: 28655 loss: 1.9921 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 16:07:18,989 - root - INFO - lr: 1.3514e-05 gnorm: 1.13 [17:33:08< 6:56:57] +[titan] 2025-10-05 16:07:29,863 - root - INFO - step: 28660 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:07:29,863 - root - INFO - lr: 1.3507e-05 gnorm: 1.11 [17:33:19< 6:56:46] +[titan] 2025-10-05 16:07:40,776 - root - INFO - step: 28665 loss: 2.0607 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8160 +[titan] 2025-10-05 16:07:40,776 - root - INFO - lr: 1.3500e-05 gnorm: 1.24 [17:33:29< 6:56:35] +[titan] 2025-10-05 16:07:51,762 - root - INFO - step: 28670 loss: 2.0573 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.81 mfu: 41.84% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 16:07:51,762 - root - INFO - lr: 1.3493e-05 gnorm: 1.20 [17:33:40< 6:56:24] +[titan] 2025-10-05 16:07:56,294 - root - INFO - Dumping profiler traces at step 28672 +[titan] 2025-10-05 16:07:56,333 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:08:02,863 - root - INFO - step: 28675 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,520 tflops: 409.54 mfu: 41.41% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 16:08:02,863 - root - INFO - lr: 1.3486e-05 gnorm: 1.09 [17:33:52< 6:56:13] +[titan] 2025-10-05 16:08:13,727 - root - INFO - step: 28680 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 16:08:13,728 - root - INFO - lr: 1.3479e-05 gnorm: 1.10 [17:34:02< 6:56:01] +[titan] 2025-10-05 16:08:24,599 - root - INFO - step: 28685 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7697 +[titan] 2025-10-05 16:08:24,599 - root - INFO - lr: 1.3472e-05 gnorm: 1.11 [17:34:13< 6:55:50] +[titan] 2025-10-05 16:08:35,472 - root - INFO - step: 28690 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 16:08:35,472 - root - INFO - lr: 1.3465e-05 gnorm: 1.12 [17:34:24< 6:55:39] +[titan] 2025-10-05 16:08:46,365 - root - INFO - step: 28695 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:08:46,366 - root - INFO - lr: 1.3458e-05 gnorm: 1.10 [17:34:35< 6:55:28] +[titan] 2025-10-05 16:08:55,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:08:57,207 - root - INFO - step: 28700 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 16:08:57,207 - root - INFO - lr: 1.3451e-05 gnorm: 1.16 [17:34:46< 6:55:17] +[titan] 2025-10-05 16:09:08,084 - root - INFO - step: 28705 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 16:09:08,084 - root - INFO - lr: 1.3444e-05 gnorm: 1.11 [17:34:57< 6:55:06] +[titan] 2025-10-05 16:09:18,920 - root - INFO - step: 28710 loss: 1.8967 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 16:09:18,920 - root - INFO - lr: 1.3437e-05 gnorm: 1.12 [17:35:08< 6:54:55] +[titan] 2025-10-05 16:09:29,743 - root - INFO - step: 28715 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 16:09:29,743 - root - INFO - lr: 1.3430e-05 gnorm: 1.24 [17:35:18< 6:54:44] +[titan] 2025-10-05 16:09:40,610 - root - INFO - step: 28720 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:09:40,610 - root - INFO - lr: 1.3423e-05 gnorm: 1.10 [17:35:29< 6:54:33] +[titan] 2025-10-05 16:09:51,475 - root - INFO - step: 28725 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 16:09:51,475 - root - INFO - lr: 1.3416e-05 gnorm: 1.07 [17:35:40< 6:54:22] +[titan] 2025-10-05 16:10:02,333 - root - INFO - step: 28730 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:10:02,333 - root - INFO - lr: 1.3409e-05 gnorm: 1.13 [17:35:51< 6:54:11] +[titan] 2025-10-05 16:10:13,264 - root - INFO - step: 28735 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:10:13,265 - root - INFO - lr: 1.3402e-05 gnorm: 1.14 [17:36:02< 6:54:00] +[titan] 2025-10-05 16:10:24,137 - root - INFO - step: 28740 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:10:24,137 - root - INFO - lr: 1.3395e-05 gnorm: 1.07 [17:36:13< 6:53:48] +[titan] 2025-10-05 16:10:34,996 - root - INFO - step: 28745 loss: 2.0343 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7979 +[titan] 2025-10-05 16:10:34,996 - root - INFO - lr: 1.3389e-05 gnorm: 1.14 [17:36:24< 6:53:37] +[titan] 2025-10-05 16:10:43,931 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:10:46,124 - root - INFO - step: 28750 loss: 2.0411 memory: 118.84GiB(85.28%) tps: 29,446 tflops: 408.52 mfu: 41.31% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 16:10:46,125 - root - INFO - lr: 1.3382e-05 gnorm: 1.10 [17:36:35< 6:53:26] +[titan] 2025-10-05 16:10:56,975 - root - INFO - step: 28755 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7186 +[titan] 2025-10-05 16:10:56,975 - root - INFO - lr: 1.3375e-05 gnorm: 1.11 [17:36:46< 6:53:15] +[titan] 2025-10-05 16:11:07,804 - root - INFO - step: 28760 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 16:11:07,804 - root - INFO - lr: 1.3368e-05 gnorm: 1.13 [17:36:56< 6:53:04] +[titan] 2025-10-05 16:11:18,644 - root - INFO - step: 28765 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 16:11:18,644 - root - INFO - lr: 1.3361e-05 gnorm: 1.14 [17:37:07< 6:52:53] +[titan] 2025-10-05 16:11:29,465 - root - INFO - step: 28770 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 16:11:29,465 - root - INFO - lr: 1.3354e-05 gnorm: 1.11 [17:37:18< 6:52:42] +[titan] 2025-10-05 16:11:40,342 - root - INFO - step: 28775 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 16:11:40,342 - root - INFO - lr: 1.3347e-05 gnorm: 1.10 [17:37:29< 6:52:31] +[titan] 2025-10-05 16:11:51,163 - root - INFO - step: 28780 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 16:11:51,163 - root - INFO - lr: 1.3340e-05 gnorm: 1.10 [17:37:40< 6:52:20] +[titan] 2025-10-05 16:12:01,972 - root - INFO - step: 28785 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,318 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:12:01,972 - root - INFO - lr: 1.3333e-05 gnorm: 1.09 [17:37:51< 6:52:09] +[titan] 2025-10-05 16:12:12,796 - root - INFO - step: 28790 loss: 2.0542 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 16:12:12,796 - root - INFO - lr: 1.3326e-05 gnorm: 1.14 [17:38:01< 6:51:58] +[titan] 2025-10-05 16:12:23,627 - root - INFO - step: 28795 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 16:12:23,627 - root - INFO - lr: 1.3319e-05 gnorm: 5.74 [17:38:12< 6:51:46] +[titan] 2025-10-05 16:12:32,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:12:34,510 - root - INFO - step: 28800 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 16:12:34,510 - root - INFO - lr: 1.3312e-05 gnorm: 1.12 [17:38:23< 6:51:35] +[titan] 2025-10-05 16:12:45,424 - root - INFO - step: 28805 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 16:12:45,424 - root - INFO - lr: 1.3305e-05 gnorm: 1.12 [17:38:34< 6:51:24] +[titan] 2025-10-05 16:12:56,285 - root - INFO - step: 28810 loss: 1.9337 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 16:12:56,285 - root - INFO - lr: 1.3298e-05 gnorm: 1.11 [17:38:45< 6:51:13] +[titan] 2025-10-05 16:13:07,115 - root - INFO - step: 28815 loss: 2.0821 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8406 +[titan] 2025-10-05 16:13:07,115 - root - INFO - lr: 1.3291e-05 gnorm: 1.14 [17:38:56< 6:51:02] +[titan] 2025-10-05 16:13:17,934 - root - INFO - step: 28820 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 16:13:17,934 - root - INFO - lr: 1.3284e-05 gnorm: 1.14 [17:39:07< 6:50:51] +[titan] 2025-10-05 16:13:28,784 - root - INFO - step: 28825 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7000 +[titan] 2025-10-05 16:13:28,784 - root - INFO - lr: 1.3278e-05 gnorm: 1.09 [17:39:17< 6:50:40] +[titan] 2025-10-05 16:13:39,674 - root - INFO - step: 28830 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 16:13:39,675 - root - INFO - lr: 1.3271e-05 gnorm: 1.13 [17:39:28< 6:50:29] +[titan] 2025-10-05 16:13:50,584 - root - INFO - step: 28835 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 16:13:50,584 - root - INFO - lr: 1.3264e-05 gnorm: 1.14 [17:39:39< 6:50:18] +[titan] 2025-10-05 16:14:01,435 - root - INFO - step: 28840 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 16:14:01,435 - root - INFO - lr: 1.3257e-05 gnorm: 1.11 [17:39:50< 6:50:07] +[titan] 2025-10-05 16:14:12,264 - root - INFO - step: 28845 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 16:14:12,264 - root - INFO - lr: 1.3250e-05 gnorm: 1.14 [17:40:01< 6:49:56] +[titan] 2025-10-05 16:14:20,892 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:14:23,077 - root - INFO - step: 28850 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,305 tflops: 420.44 mfu: 42.51% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 16:14:23,077 - root - INFO - lr: 1.3243e-05 gnorm: 1.13 [17:40:12< 6:49:44] +[titan] 2025-10-05 16:14:33,903 - root - INFO - step: 28855 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 16:14:33,903 - root - INFO - lr: 1.3236e-05 gnorm: 1.10 [17:40:23< 6:49:33] +[titan] 2025-10-05 16:14:44,758 - root - INFO - step: 28860 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 16:14:44,758 - root - INFO - lr: 1.3229e-05 gnorm: 1.18 [17:40:33< 6:49:22] +[titan] 2025-10-05 16:14:55,624 - root - INFO - step: 28865 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 16:14:55,624 - root - INFO - lr: 1.3222e-05 gnorm: 1.13 [17:40:44< 6:49:11] +[titan] 2025-10-05 16:15:06,429 - root - INFO - step: 28870 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,328 tflops: 420.76 mfu: 42.54% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 16:15:06,429 - root - INFO - lr: 1.3215e-05 gnorm: 1.12 [17:40:55< 6:49:00] +[titan] 2025-10-05 16:15:17,255 - root - INFO - step: 28875 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 16:15:17,255 - root - INFO - lr: 1.3209e-05 gnorm: 1.14 [17:41:06< 6:48:49] +[titan] 2025-10-05 16:15:28,083 - root - INFO - step: 28880 loss: 2.0444 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8077 +[titan] 2025-10-05 16:15:28,084 - root - INFO - lr: 1.3202e-05 gnorm: 1.11 [17:41:17< 6:48:38] +[titan] 2025-10-05 16:15:38,890 - root - INFO - step: 28885 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 16:15:38,890 - root - INFO - lr: 1.3195e-05 gnorm: 1.12 [17:41:28< 6:48:27] +[titan] 2025-10-05 16:15:49,744 - root - INFO - step: 28890 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7735 +[titan] 2025-10-05 16:15:49,744 - root - INFO - lr: 1.3188e-05 gnorm: 1.17 [17:41:38< 6:48:16] +[titan] 2025-10-05 16:16:00,598 - root - INFO - step: 28895 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 16:16:00,598 - root - INFO - lr: 1.3181e-05 gnorm: 1.11 [17:41:49< 6:48:05] +[titan] 2025-10-05 16:16:09,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:16:11,420 - root - INFO - step: 28900 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 16:16:11,421 - root - INFO - lr: 1.3174e-05 gnorm: 1.14 [17:42:00< 6:47:53] +[titan] 2025-10-05 16:16:22,244 - root - INFO - step: 28905 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:16:22,245 - root - INFO - lr: 1.3167e-05 gnorm: 1.10 [17:42:11< 6:47:42] +[titan] 2025-10-05 16:16:33,059 - root - INFO - step: 28910 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 16:16:33,059 - root - INFO - lr: 1.3160e-05 gnorm: 1.14 [17:42:22< 6:47:31] +[titan] 2025-10-05 16:16:43,886 - root - INFO - step: 28915 loss: 1.9331 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7085 +[titan] 2025-10-05 16:16:43,887 - root - INFO - lr: 1.3153e-05 gnorm: 1.09 [17:42:33< 6:47:20] +[titan] 2025-10-05 16:16:54,738 - root - INFO - step: 28920 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 16:16:54,738 - root - INFO - lr: 1.3147e-05 gnorm: 1.09 [17:42:43< 6:47:09] +[titan] 2025-10-05 16:17:05,576 - root - INFO - step: 28925 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 16:17:05,576 - root - INFO - lr: 1.3140e-05 gnorm: 1.13 [17:42:54< 6:46:58] +[titan] 2025-10-05 16:17:16,436 - root - INFO - step: 28930 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 16:17:16,436 - root - INFO - lr: 1.3133e-05 gnorm: 1.12 [17:43:05< 6:46:47] +[titan] 2025-10-05 16:17:27,262 - root - INFO - step: 28935 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 16:17:27,263 - root - INFO - lr: 1.3126e-05 gnorm: 1.13 [17:43:16< 6:46:36] +[titan] 2025-10-05 16:17:38,068 - root - INFO - step: 28940 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 16:17:38,069 - root - INFO - lr: 1.3119e-05 gnorm: 1.12 [17:43:27< 6:46:25] +[titan] 2025-10-05 16:17:48,901 - root - INFO - step: 28945 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 16:17:48,902 - root - INFO - lr: 1.3112e-05 gnorm: 1.11 [17:43:38< 6:46:14] +[titan] 2025-10-05 16:17:57,530 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:17:59,707 - root - INFO - step: 28950 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 16:17:59,708 - root - INFO - lr: 1.3105e-05 gnorm: 1.13 [17:43:48< 6:46:02] +[titan] 2025-10-05 16:18:10,530 - root - INFO - step: 28955 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 16:18:10,531 - root - INFO - lr: 1.3099e-05 gnorm: 1.13 [17:43:59< 6:45:51] +[titan] 2025-10-05 16:18:21,362 - root - INFO - step: 28960 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7806 +[titan] 2025-10-05 16:18:21,362 - root - INFO - lr: 1.3092e-05 gnorm: 1.11 [17:44:10< 6:45:40] +[titan] 2025-10-05 16:18:32,177 - root - INFO - step: 28965 loss: 2.0315 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 16:18:32,178 - root - INFO - lr: 1.3085e-05 gnorm: 1.15 [17:44:21< 6:45:29] +[titan] 2025-10-05 16:18:43,001 - root - INFO - step: 28970 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7932 +[titan] 2025-10-05 16:18:43,002 - root - INFO - lr: 1.3078e-05 gnorm: 1.12 [17:44:32< 6:45:18] +[titan] 2025-10-05 16:18:53,823 - root - INFO - step: 28975 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7847 +[titan] 2025-10-05 16:18:53,823 - root - INFO - lr: 1.3071e-05 gnorm: 1.15 [17:44:42< 6:45:07] +[titan] 2025-10-05 16:19:04,658 - root - INFO - step: 28980 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7964 +[titan] 2025-10-05 16:19:04,658 - root - INFO - lr: 1.3064e-05 gnorm: 1.09 [17:44:53< 6:44:56] +[titan] 2025-10-05 16:19:15,441 - root - INFO - step: 28985 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,389 tflops: 421.61 mfu: 42.63% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 16:19:15,441 - root - INFO - lr: 1.3057e-05 gnorm: 1.14 [17:45:04< 6:44:45] +[titan] 2025-10-05 16:19:26,267 - root - INFO - step: 28990 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:19:26,267 - root - INFO - lr: 1.3051e-05 gnorm: 1.12 [17:45:15< 6:44:34] +[titan] 2025-10-05 16:19:37,046 - root - INFO - step: 28995 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,400 tflops: 421.76 mfu: 42.65% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 16:19:37,046 - root - INFO - lr: 1.3044e-05 gnorm: 1.09 [17:45:26< 6:44:23] +[titan] 2025-10-05 16:19:45,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:19:47,884 - root - INFO - step: 29000 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 16:19:47,884 - root - INFO - lr: 1.3037e-05 gnorm: 1.10 [17:45:37< 6:44:11] +[titan] 2025-10-05 16:19:58,682 - root - INFO - step: 29005 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 16:19:58,682 - root - INFO - lr: 1.3030e-05 gnorm: 1.14 [17:45:47< 6:44:00] +[titan] 2025-10-05 16:20:09,482 - root - INFO - step: 29010 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 16:20:09,482 - root - INFO - lr: 1.3023e-05 gnorm: 1.08 [17:45:58< 6:43:49] +[titan] 2025-10-05 16:20:20,322 - root - INFO - step: 29015 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 16:20:20,323 - root - INFO - lr: 1.3016e-05 gnorm: 1.11 [17:46:09< 6:43:38] +[titan] 2025-10-05 16:20:31,122 - root - INFO - step: 29020 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 16:20:31,122 - root - INFO - lr: 1.3010e-05 gnorm: 1.14 [17:46:20< 6:43:27] +[titan] 2025-10-05 16:20:42,001 - root - INFO - step: 29025 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:20:42,001 - root - INFO - lr: 1.3003e-05 gnorm: 1.11 [17:46:31< 6:43:16] +[titan] 2025-10-05 16:20:52,862 - root - INFO - step: 29030 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 16:20:52,862 - root - INFO - lr: 1.2996e-05 gnorm: 1.17 [17:46:41< 6:43:05] +[titan] 2025-10-05 16:21:03,692 - root - INFO - step: 29035 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8113 +[titan] 2025-10-05 16:21:03,692 - root - INFO - lr: 1.2989e-05 gnorm: 1.19 [17:46:52< 6:42:54] +[titan] 2025-10-05 16:21:14,546 - root - INFO - step: 29040 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 16:21:14,546 - root - INFO - lr: 1.2982e-05 gnorm: 1.13 [17:47:03< 6:42:43] +[titan] 2025-10-05 16:21:25,382 - root - INFO - step: 29045 loss: 2.0710 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 16:21:25,382 - root - INFO - lr: 1.2975e-05 gnorm: 1.11 [17:47:14< 6:42:32] +[titan] 2025-10-05 16:21:34,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:21:36,183 - root - INFO - step: 29050 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7903 +[titan] 2025-10-05 16:21:36,183 - root - INFO - lr: 1.2969e-05 gnorm: 1.15 [17:47:25< 6:42:21] +[titan] 2025-10-05 16:21:47,040 - root - INFO - step: 29055 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 16:21:47,040 - root - INFO - lr: 1.2962e-05 gnorm: 1.13 [17:47:36< 6:42:09] +[titan] 2025-10-05 16:21:57,945 - root - INFO - step: 29060 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 16:21:57,945 - root - INFO - lr: 1.2955e-05 gnorm: 1.09 [17:47:47< 6:41:58] +[titan] 2025-10-05 16:22:08,763 - root - INFO - step: 29065 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:22:08,763 - root - INFO - lr: 1.2948e-05 gnorm: 1.10 [17:47:57< 6:41:47] +[titan] 2025-10-05 16:22:19,602 - root - INFO - step: 29070 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 16:22:19,602 - root - INFO - lr: 1.2941e-05 gnorm: 1.12 [17:48:08< 6:41:36] +[titan] 2025-10-05 16:22:30,424 - root - INFO - step: 29075 loss: 1.9436 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.11 mfu: 42.48% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 16:22:30,424 - root - INFO - lr: 1.2935e-05 gnorm: 1.08 [17:48:19< 6:41:25] +[titan] 2025-10-05 16:22:41,221 - root - INFO - step: 29080 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.04 mfu: 42.57% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 16:22:41,221 - root - INFO - lr: 1.2928e-05 gnorm: 1.12 [17:48:30< 6:41:14] +[titan] 2025-10-05 16:22:52,143 - root - INFO - step: 29085 loss: 2.0455 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 16:22:52,143 - root - INFO - lr: 1.2921e-05 gnorm: 1.13 [17:48:41< 6:41:03] +[titan] 2025-10-05 16:23:02,963 - root - INFO - step: 29090 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:23:02,964 - root - INFO - lr: 1.2914e-05 gnorm: 1.15 [17:48:52< 6:40:52] +[titan] 2025-10-05 16:23:13,781 - root - INFO - step: 29095 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7612 +[titan] 2025-10-05 16:23:13,781 - root - INFO - lr: 1.2907e-05 gnorm: 1.12 [17:49:02< 6:40:41] +[titan] 2025-10-05 16:23:22,389 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:23:24,561 - root - INFO - step: 29100 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 16:23:24,562 - root - INFO - lr: 1.2901e-05 gnorm: 1.15 [17:49:13< 6:40:30] +[titan] 2025-10-05 16:23:35,362 - root - INFO - step: 29105 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:23:35,363 - root - INFO - lr: 1.2894e-05 gnorm: 1.15 [17:49:24< 6:40:18] +[titan] 2025-10-05 16:23:46,147 - root - INFO - step: 29110 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,385 tflops: 421.54 mfu: 42.62% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 16:23:46,147 - root - INFO - lr: 1.2887e-05 gnorm: 1.13 [17:49:35< 6:40:07] +[titan] 2025-10-05 16:23:56,986 - root - INFO - step: 29115 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 16:23:56,986 - root - INFO - lr: 1.2880e-05 gnorm: 1.11 [17:49:46< 6:39:56] +[titan] 2025-10-05 16:24:07,804 - root - INFO - step: 29120 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 16:24:07,804 - root - INFO - lr: 1.2873e-05 gnorm: 1.09 [17:49:56< 6:39:45] +[titan] 2025-10-05 16:24:18,657 - root - INFO - step: 29125 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 16:24:18,657 - root - INFO - lr: 1.2867e-05 gnorm: 1.08 [17:50:07< 6:39:34] +[titan] 2025-10-05 16:24:29,461 - root - INFO - step: 29130 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 16:24:29,461 - root - INFO - lr: 1.2860e-05 gnorm: 1.11 [17:50:18< 6:39:23] +[titan] 2025-10-05 16:24:40,248 - root - INFO - step: 29135 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,379 tflops: 421.46 mfu: 42.61% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:24:40,248 - root - INFO - lr: 1.2853e-05 gnorm: 1.23 [17:50:29< 6:39:12] +[titan] 2025-10-05 16:24:51,066 - root - INFO - step: 29140 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 16:24:51,066 - root - INFO - lr: 1.2846e-05 gnorm: 1.11 [17:50:40< 6:39:01] +[titan] 2025-10-05 16:25:01,882 - root - INFO - step: 29145 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.33 mfu: 42.50% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 16:25:01,882 - root - INFO - lr: 1.2840e-05 gnorm: 1.14 [17:50:50< 6:38:50] +[titan] 2025-10-05 16:25:10,529 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:25:12,729 - root - INFO - step: 29150 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:12,730 - root - INFO - lr: 1.2833e-05 gnorm: 1.16 [17:51:01< 6:38:39] +[titan] 2025-10-05 16:25:23,552 - root - INFO - step: 29155 loss: 1.9771 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:25:23,552 - root - INFO - lr: 1.2826e-05 gnorm: 1.11 [17:51:12< 6:38:27] +[titan] 2025-10-05 16:25:34,364 - root - INFO - step: 29160 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:34,364 - root - INFO - lr: 1.2819e-05 gnorm: 1.13 [17:51:23< 6:38:16] +[titan] 2025-10-05 16:25:45,141 - root - INFO - step: 29165 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:25:45,141 - root - INFO - lr: 1.2813e-05 gnorm: 1.10 [17:51:34< 6:38:05] +[titan] 2025-10-05 16:25:55,942 - root - INFO - step: 29170 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 16:25:55,943 - root - INFO - lr: 1.2806e-05 gnorm: 1.12 [17:51:45< 6:37:54] +[titan] 2025-10-05 16:26:06,754 - root - INFO - step: 29175 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:26:06,754 - root - INFO - lr: 1.2799e-05 gnorm: 1.13 [17:51:55< 6:37:43] +[titan] 2025-10-05 16:26:17,565 - root - INFO - step: 29180 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,310 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 16:26:17,566 - root - INFO - lr: 1.2792e-05 gnorm: 1.11 [17:52:06< 6:37:32] +[titan] 2025-10-05 16:26:26,492 - root - INFO - Dumping profiler traces at step 29184 +[titan] 2025-10-05 16:26:26,532 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:26:28,703 - root - INFO - step: 29185 loss: 2.0239 memory: 118.84GiB(85.28%) tps: 29,423 tflops: 408.20 mfu: 41.27% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:26:28,703 - root - INFO - lr: 1.2786e-05 gnorm: 1.13 [17:52:17< 6:37:21] +[titan] 2025-10-05 16:26:39,480 - root - INFO - step: 29190 loss: 2.0459 memory: 118.84GiB(85.28%) tps: 30,405 tflops: 421.82 mfu: 42.65% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 16:26:39,480 - root - INFO - lr: 1.2779e-05 gnorm: 1.08 [17:52:28< 6:37:10] +[titan] 2025-10-05 16:26:50,281 - root - INFO - step: 29195 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 16:26:50,282 - root - INFO - lr: 1.2772e-05 gnorm: 1.11 [17:52:39< 6:36:59] +[titan] 2025-10-05 16:26:58,914 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:27:01,083 - root - INFO - step: 29200 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:27:01,084 - root - INFO - lr: 1.2765e-05 gnorm: 1.10 [17:52:50< 6:36:48] +[titan] 2025-10-05 16:27:11,900 - root - INFO - step: 29205 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,295 tflops: 420.29 mfu: 42.50% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:27:11,900 - root - INFO - lr: 1.2759e-05 gnorm: 1.11 [17:53:01< 6:36:37] +[titan] 2025-10-05 16:27:22,704 - root - INFO - step: 29210 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:27:22,704 - root - INFO - lr: 1.2752e-05 gnorm: 1.13 [17:53:11< 6:36:25] +[titan] 2025-10-05 16:27:33,520 - root - INFO - step: 29215 loss: 1.9806 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 16:27:33,520 - root - INFO - lr: 1.2745e-05 gnorm: 1.13 [17:53:22< 6:36:14] +[titan] 2025-10-05 16:27:44,344 - root - INFO - step: 29220 loss: 2.0330 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:27:44,344 - root - INFO - lr: 1.2738e-05 gnorm: 1.11 [17:53:33< 6:36:03] +[titan] 2025-10-05 16:27:55,246 - root - INFO - step: 29225 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 16:27:55,246 - root - INFO - lr: 1.2732e-05 gnorm: 1.14 [17:53:44< 6:35:52] +[titan] 2025-10-05 16:28:06,063 - root - INFO - step: 29230 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 16:28:06,064 - root - INFO - lr: 1.2725e-05 gnorm: 1.10 [17:53:55< 6:35:41] +[titan] 2025-10-05 16:28:16,881 - root - INFO - step: 29235 loss: 1.9977 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7668 +[titan] 2025-10-05 16:28:16,882 - root - INFO - lr: 1.2718e-05 gnorm: 1.12 [17:54:05< 6:35:30] +[titan] 2025-10-05 16:28:27,741 - root - INFO - step: 29240 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 16:28:27,741 - root - INFO - lr: 1.2711e-05 gnorm: 1.12 [17:54:16< 6:35:19] +[titan] 2025-10-05 16:28:38,608 - root - INFO - step: 29245 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 16:28:38,608 - root - INFO - lr: 1.2705e-05 gnorm: 1.14 [17:54:27< 6:35:08] +[titan] 2025-10-05 16:28:47,296 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:28:49,481 - root - INFO - step: 29250 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7535 +[titan] 2025-10-05 16:28:49,482 - root - INFO - lr: 1.2698e-05 gnorm: 1.12 [17:54:38< 6:34:57] +[titan] 2025-10-05 16:29:00,345 - root - INFO - step: 29255 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 16:29:00,345 - root - INFO - lr: 1.2691e-05 gnorm: 1.13 [17:54:49< 6:34:46] +[titan] 2025-10-05 16:29:11,181 - root - INFO - step: 29260 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 16:29:11,181 - root - INFO - lr: 1.2684e-05 gnorm: 1.10 [17:55:00< 6:34:35] +[titan] 2025-10-05 16:29:22,010 - root - INFO - step: 29265 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 16:29:22,010 - root - INFO - lr: 1.2678e-05 gnorm: 1.10 [17:55:11< 6:34:23] +[titan] 2025-10-05 16:29:32,844 - root - INFO - step: 29270 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8340 +[titan] 2025-10-05 16:29:32,845 - root - INFO - lr: 1.2671e-05 gnorm: 1.14 [17:55:21< 6:34:12] +[titan] 2025-10-05 16:29:43,662 - root - INFO - step: 29275 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 16:29:43,662 - root - INFO - lr: 1.2664e-05 gnorm: 1.10 [17:55:32< 6:34:01] +[titan] 2025-10-05 16:29:54,552 - root - INFO - step: 29280 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 16:29:54,553 - root - INFO - lr: 1.2658e-05 gnorm: 1.15 [17:55:43< 6:33:50] +[titan] 2025-10-05 16:30:05,442 - root - INFO - step: 29285 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:30:05,442 - root - INFO - lr: 1.2651e-05 gnorm: 1.15 [17:55:54< 6:33:39] +[titan] 2025-10-05 16:30:16,285 - root - INFO - step: 29290 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:30:16,285 - root - INFO - lr: 1.2644e-05 gnorm: 1.13 [17:56:05< 6:33:28] +[titan] 2025-10-05 16:30:27,122 - root - INFO - step: 29295 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 16:30:27,122 - root - INFO - lr: 1.2638e-05 gnorm: 1.16 [17:56:16< 6:33:17] +[titan] 2025-10-05 16:30:35,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:30:37,974 - root - INFO - step: 29300 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:30:37,975 - root - INFO - lr: 1.2631e-05 gnorm: 1.13 [17:56:27< 6:33:06] +[titan] 2025-10-05 16:30:48,835 - root - INFO - step: 29305 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 16:30:48,835 - root - INFO - lr: 1.2624e-05 gnorm: 1.12 [17:56:37< 6:32:55] +[titan] 2025-10-05 16:30:59,735 - root - INFO - step: 29310 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 16:30:59,735 - root - INFO - lr: 1.2617e-05 gnorm: 1.16 [17:56:48< 6:32:44] +[titan] 2025-10-05 16:31:10,585 - root - INFO - step: 29315 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 16:31:10,585 - root - INFO - lr: 1.2611e-05 gnorm: 1.11 [17:56:59< 6:32:33] +[titan] 2025-10-05 16:31:21,451 - root - INFO - step: 29320 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:31:21,452 - root - INFO - lr: 1.2604e-05 gnorm: 1.14 [17:57:10< 6:32:22] +[titan] 2025-10-05 16:31:32,282 - root - INFO - step: 29325 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 16:31:32,283 - root - INFO - lr: 1.2597e-05 gnorm: 1.08 [17:57:21< 6:32:11] +[titan] 2025-10-05 16:31:43,142 - root - INFO - step: 29330 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7937 +[titan] 2025-10-05 16:31:43,143 - root - INFO - lr: 1.2591e-05 gnorm: 1.15 [17:57:32< 6:31:59] +[titan] 2025-10-05 16:31:54,012 - root - INFO - step: 29335 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 16:31:54,012 - root - INFO - lr: 1.2584e-05 gnorm: 1.12 [17:57:43< 6:31:48] +[titan] 2025-10-05 16:32:04,880 - root - INFO - step: 29340 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 16:32:04,880 - root - INFO - lr: 1.2577e-05 gnorm: 1.18 [17:57:53< 6:31:37] +[titan] 2025-10-05 16:32:15,774 - root - INFO - step: 29345 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 16:32:15,774 - root - INFO - lr: 1.2571e-05 gnorm: 1.14 [17:58:04< 6:31:26] +[titan] 2025-10-05 16:32:24,447 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:32:26,631 - root - INFO - step: 29350 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 16:32:26,632 - root - INFO - lr: 1.2564e-05 gnorm: 1.11 [17:58:15< 6:31:15] +[titan] 2025-10-05 16:32:37,480 - root - INFO - step: 29355 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:32:37,480 - root - INFO - lr: 1.2557e-05 gnorm: 1.10 [17:58:26< 6:31:04] +[titan] 2025-10-05 16:32:48,323 - root - INFO - step: 29360 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:32:48,323 - root - INFO - lr: 1.2551e-05 gnorm: 1.14 [17:58:37< 6:30:53] +[titan] 2025-10-05 16:32:59,199 - root - INFO - step: 29365 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 16:32:59,199 - root - INFO - lr: 1.2544e-05 gnorm: 1.13 [17:58:48< 6:30:42] +[titan] 2025-10-05 16:33:10,048 - root - INFO - step: 29370 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 16:33:10,048 - root - INFO - lr: 1.2537e-05 gnorm: 1.12 [17:58:59< 6:30:31] +[titan] 2025-10-05 16:33:20,934 - root - INFO - step: 29375 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 16:33:20,934 - root - INFO - lr: 1.2531e-05 gnorm: 1.15 [17:59:10< 6:30:20] +[titan] 2025-10-05 16:33:31,794 - root - INFO - step: 29380 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:33:31,794 - root - INFO - lr: 1.2524e-05 gnorm: 1.11 [17:59:20< 6:30:09] +[titan] 2025-10-05 16:33:42,652 - root - INFO - step: 29385 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7647 +[titan] 2025-10-05 16:33:42,652 - root - INFO - lr: 1.2517e-05 gnorm: 1.13 [17:59:31< 6:29:58] +[titan] 2025-10-05 16:33:53,484 - root - INFO - step: 29390 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:33:53,484 - root - INFO - lr: 1.2511e-05 gnorm: 1.15 [17:59:42< 6:29:46] +[titan] 2025-10-05 16:34:04,355 - root - INFO - step: 29395 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 16:34:04,355 - root - INFO - lr: 1.2504e-05 gnorm: 1.11 [17:59:53< 6:29:35] +[titan] 2025-10-05 16:34:13,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:34:15,217 - root - INFO - step: 29400 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 16:34:15,218 - root - INFO - lr: 1.2497e-05 gnorm: 1.12 [18:00:04< 6:29:24] +[titan] 2025-10-05 16:34:26,084 - root - INFO - step: 29405 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 16:34:26,084 - root - INFO - lr: 1.2491e-05 gnorm: 1.13 [18:00:15< 6:29:13] +[titan] 2025-10-05 16:34:36,985 - root - INFO - step: 29410 loss: 1.9746 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:34:36,985 - root - INFO - lr: 1.2484e-05 gnorm: 1.14 [18:00:26< 6:29:02] +[titan] 2025-10-05 16:34:47,862 - root - INFO - step: 29415 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 16:34:47,862 - root - INFO - lr: 1.2477e-05 gnorm: 1.14 [18:00:36< 6:28:51] +[titan] 2025-10-05 16:34:58,716 - root - INFO - step: 29420 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:34:58,716 - root - INFO - lr: 1.2471e-05 gnorm: 1.10 [18:00:47< 6:28:40] +[titan] 2025-10-05 16:35:09,613 - root - INFO - step: 29425 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 16:35:09,613 - root - INFO - lr: 1.2464e-05 gnorm: 1.13 [18:00:58< 6:28:29] +[titan] 2025-10-05 16:35:20,487 - root - INFO - step: 29430 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7675 +[titan] 2025-10-05 16:35:20,488 - root - INFO - lr: 1.2457e-05 gnorm: 1.12 [18:01:09< 6:28:18] +[titan] 2025-10-05 16:35:31,364 - root - INFO - step: 29435 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:35:31,364 - root - INFO - lr: 1.2451e-05 gnorm: 1.13 [18:01:20< 6:28:07] +[titan] 2025-10-05 16:35:42,266 - root - INFO - step: 29440 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 16:35:42,266 - root - INFO - lr: 1.2444e-05 gnorm: 1.13 [18:01:31< 6:27:56] +[titan] 2025-10-05 16:35:53,139 - root - INFO - step: 29445 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 16:35:53,139 - root - INFO - lr: 1.2438e-05 gnorm: 1.10 [18:01:42< 6:27:45] +[titan] 2025-10-05 16:36:01,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:36:04,036 - root - INFO - step: 29450 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:36:04,036 - root - INFO - lr: 1.2431e-05 gnorm: 1.10 [18:01:53< 6:27:34] +[titan] 2025-10-05 16:36:14,913 - root - INFO - step: 29455 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 16:36:14,913 - root - INFO - lr: 1.2424e-05 gnorm: 1.13 [18:02:03< 6:27:23] +[titan] 2025-10-05 16:36:25,795 - root - INFO - step: 29460 loss: 2.0213 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7869 +[titan] 2025-10-05 16:36:25,795 - root - INFO - lr: 1.2418e-05 gnorm: 1.13 [18:02:14< 6:27:11] +[titan] 2025-10-05 16:36:36,668 - root - INFO - step: 29465 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 16:36:36,668 - root - INFO - lr: 1.2411e-05 gnorm: 1.14 [18:02:25< 6:27:00] +[titan] 2025-10-05 16:36:47,594 - root - INFO - step: 29470 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 16:36:47,594 - root - INFO - lr: 1.2404e-05 gnorm: 1.17 [18:02:36< 6:26:49] +[titan] 2025-10-05 16:36:58,488 - root - INFO - step: 29475 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8272 +[titan] 2025-10-05 16:36:58,488 - root - INFO - lr: 1.2398e-05 gnorm: 1.14 [18:02:47< 6:26:38] +[titan] 2025-10-05 16:37:09,396 - root - INFO - step: 29480 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 16:37:09,396 - root - INFO - lr: 1.2391e-05 gnorm: 1.10 [18:02:58< 6:26:27] +[titan] 2025-10-05 16:37:20,276 - root - INFO - step: 29485 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7523 +[titan] 2025-10-05 16:37:20,276 - root - INFO - lr: 1.2385e-05 gnorm: 1.14 [18:03:09< 6:26:16] +[titan] 2025-10-05 16:37:31,149 - root - INFO - step: 29490 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:37:31,149 - root - INFO - lr: 1.2378e-05 gnorm: 1.18 [18:03:20< 6:26:05] +[titan] 2025-10-05 16:37:42,032 - root - INFO - step: 29495 loss: 1.9702 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:37:42,032 - root - INFO - lr: 1.2371e-05 gnorm: 1.12 [18:03:31< 6:25:54] +[titan] 2025-10-05 16:37:50,726 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:37:52,909 - root - INFO - step: 29500 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 16:37:52,910 - root - INFO - lr: 1.2365e-05 gnorm: 1.18 [18:03:41< 6:25:43] +[titan] 2025-10-05 16:38:03,862 - root - INFO - step: 29505 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7779 +[titan] 2025-10-05 16:38:03,862 - root - INFO - lr: 1.2358e-05 gnorm: 1.08 [18:03:52< 6:25:32] +[titan] 2025-10-05 16:38:14,737 - root - INFO - step: 29510 loss: 2.0280 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 16:38:14,738 - root - INFO - lr: 1.2352e-05 gnorm: 1.12 [18:04:03< 6:25:21] +[titan] 2025-10-05 16:38:25,629 - root - INFO - step: 29515 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 16:38:25,629 - root - INFO - lr: 1.2345e-05 gnorm: 1.10 [18:04:14< 6:25:10] +[titan] 2025-10-05 16:38:36,496 - root - INFO - step: 29520 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 16:38:36,497 - root - INFO - lr: 1.2338e-05 gnorm: 1.14 [18:04:25< 6:24:59] +[titan] 2025-10-05 16:38:47,375 - root - INFO - step: 29525 loss: 2.0360 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 16:38:47,375 - root - INFO - lr: 1.2332e-05 gnorm: 1.12 [18:04:36< 6:24:48] +[titan] 2025-10-05 16:38:58,269 - root - INFO - step: 29530 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 16:38:58,270 - root - INFO - lr: 1.2325e-05 gnorm: 1.14 [18:04:47< 6:24:37] +[titan] 2025-10-05 16:39:09,198 - root - INFO - step: 29535 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:39:09,198 - root - INFO - lr: 1.2319e-05 gnorm: 1.14 [18:04:58< 6:24:25] +[titan] 2025-10-05 16:39:20,067 - root - INFO - step: 29540 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7690 +[titan] 2025-10-05 16:39:20,067 - root - INFO - lr: 1.2312e-05 gnorm: 1.11 [18:05:09< 6:24:14] +[titan] 2025-10-05 16:39:30,927 - root - INFO - step: 29545 loss: 1.9548 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:39:30,927 - root - INFO - lr: 1.2305e-05 gnorm: 1.08 [18:05:20< 6:24:03] +[titan] 2025-10-05 16:39:39,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:39:41,783 - root - INFO - step: 29550 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 16:39:41,783 - root - INFO - lr: 1.2299e-05 gnorm: 1.16 [18:05:30< 6:23:52] +[titan] 2025-10-05 16:39:52,647 - root - INFO - step: 29555 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:39:52,647 - root - INFO - lr: 1.2292e-05 gnorm: 1.11 [18:05:41< 6:23:41] +[titan] 2025-10-05 16:40:03,511 - root - INFO - step: 29560 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 16:40:03,511 - root - INFO - lr: 1.2286e-05 gnorm: 1.11 [18:05:52< 6:23:30] +[titan] 2025-10-05 16:40:14,393 - root - INFO - step: 29565 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 16:40:14,393 - root - INFO - lr: 1.2279e-05 gnorm: 1.09 [18:06:03< 6:23:19] +[titan] 2025-10-05 16:40:25,289 - root - INFO - step: 29570 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 16:40:25,289 - root - INFO - lr: 1.2273e-05 gnorm: 1.15 [18:06:14< 6:23:08] +[titan] 2025-10-05 16:40:36,151 - root - INFO - step: 29575 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 16:40:36,151 - root - INFO - lr: 1.2266e-05 gnorm: 1.12 [18:06:25< 6:22:57] +[titan] 2025-10-05 16:40:47,014 - root - INFO - step: 29580 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:40:47,014 - root - INFO - lr: 1.2259e-05 gnorm: 1.15 [18:06:36< 6:22:46] +[titan] 2025-10-05 16:40:57,884 - root - INFO - step: 29585 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7835 +[titan] 2025-10-05 16:40:57,884 - root - INFO - lr: 1.2253e-05 gnorm: 1.13 [18:06:46< 6:22:35] +[titan] 2025-10-05 16:41:08,765 - root - INFO - step: 29590 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 16:41:08,765 - root - INFO - lr: 1.2246e-05 gnorm: 1.12 [18:06:57< 6:22:24] +[titan] 2025-10-05 16:41:19,628 - root - INFO - step: 29595 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:41:19,628 - root - INFO - lr: 1.2240e-05 gnorm: 1.14 [18:07:08< 6:22:13] +[titan] 2025-10-05 16:41:28,344 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:41:30,522 - root - INFO - step: 29600 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 16:41:30,522 - root - INFO - lr: 1.2233e-05 gnorm: 1.11 [18:07:19< 6:22:02] +[titan] 2025-10-05 16:41:41,388 - root - INFO - step: 29605 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 16:41:41,389 - root - INFO - lr: 1.2227e-05 gnorm: 1.11 [18:07:30< 6:21:50] +[titan] 2025-10-05 16:41:52,245 - root - INFO - step: 29610 loss: 1.9448 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 16:41:52,245 - root - INFO - lr: 1.2220e-05 gnorm: 1.09 [18:07:41< 6:21:39] +[titan] 2025-10-05 16:42:03,126 - root - INFO - step: 29615 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8316 +[titan] 2025-10-05 16:42:03,126 - root - INFO - lr: 1.2214e-05 gnorm: 1.15 [18:07:52< 6:21:28] +[titan] 2025-10-05 16:42:13,989 - root - INFO - step: 29620 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7570 +[titan] 2025-10-05 16:42:13,989 - root - INFO - lr: 1.2207e-05 gnorm: 1.13 [18:08:03< 6:21:17] +[titan] 2025-10-05 16:42:24,845 - root - INFO - step: 29625 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 16:42:24,845 - root - INFO - lr: 1.2200e-05 gnorm: 1.11 [18:08:13< 6:21:06] +[titan] 2025-10-05 16:42:35,740 - root - INFO - step: 29630 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 16:42:35,740 - root - INFO - lr: 1.2194e-05 gnorm: 1.16 [18:08:24< 6:20:55] +[titan] 2025-10-05 16:42:46,609 - root - INFO - step: 29635 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 16:42:46,609 - root - INFO - lr: 1.2187e-05 gnorm: 1.13 [18:08:35< 6:20:44] +[titan] 2025-10-05 16:42:57,451 - root - INFO - step: 29640 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:42:57,451 - root - INFO - lr: 1.2181e-05 gnorm: 1.11 [18:08:46< 6:20:33] +[titan] 2025-10-05 16:43:08,337 - root - INFO - step: 29645 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 16:43:08,337 - root - INFO - lr: 1.2174e-05 gnorm: 1.10 [18:08:57< 6:20:22] +[titan] 2025-10-05 16:43:17,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:43:19,192 - root - INFO - step: 29650 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7561 +[titan] 2025-10-05 16:43:19,192 - root - INFO - lr: 1.2168e-05 gnorm: 1.14 [18:09:08< 6:20:11] +[titan] 2025-10-05 16:43:30,040 - root - INFO - step: 29655 loss: 1.9877 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 16:43:30,040 - root - INFO - lr: 1.2161e-05 gnorm: 1.13 [18:09:19< 6:20:00] +[titan] 2025-10-05 16:43:40,896 - root - INFO - step: 29660 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:43:40,896 - root - INFO - lr: 1.2155e-05 gnorm: 1.16 [18:09:29< 6:19:49] +[titan] 2025-10-05 16:43:51,775 - root - INFO - step: 29665 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 16:43:51,776 - root - INFO - lr: 1.2148e-05 gnorm: 1.12 [18:09:40< 6:19:38] +[titan] 2025-10-05 16:44:02,651 - root - INFO - step: 29670 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7764 +[titan] 2025-10-05 16:44:02,651 - root - INFO - lr: 1.2142e-05 gnorm: 1.12 [18:09:51< 6:19:26] +[titan] 2025-10-05 16:44:13,541 - root - INFO - step: 29675 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 16:44:13,542 - root - INFO - lr: 1.2135e-05 gnorm: 1.12 [18:10:02< 6:19:15] +[titan] 2025-10-05 16:44:24,406 - root - INFO - step: 29680 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:44:24,406 - root - INFO - lr: 1.2129e-05 gnorm: 1.10 [18:10:13< 6:19:04] +[titan] 2025-10-05 16:44:35,270 - root - INFO - step: 29685 loss: 2.0294 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 16:44:35,270 - root - INFO - lr: 1.2122e-05 gnorm: 1.14 [18:10:24< 6:18:53] +[titan] 2025-10-05 16:44:46,146 - root - INFO - step: 29690 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:44:46,146 - root - INFO - lr: 1.2116e-05 gnorm: 1.14 [18:10:35< 6:18:42] +[titan] 2025-10-05 16:44:57,137 - root - INFO - step: 29695 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:44:57,138 - root - INFO - lr: 1.2109e-05 gnorm: 1.16 [18:10:46< 6:18:31] +[titan] 2025-10-05 16:44:59,488 - root - INFO - Dumping profiler traces at step 29696 +[titan] 2025-10-05 16:44:59,524 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:45:06,052 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:45:08,245 - root - INFO - step: 29700 loss: 2.0615 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.29 mfu: 41.38% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8226 +[titan] 2025-10-05 16:45:08,245 - root - INFO - lr: 1.2103e-05 gnorm: 1.15 [18:10:57< 6:18:20] +[titan] 2025-10-05 16:45:19,144 - root - INFO - step: 29705 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 16:45:19,144 - root - INFO - lr: 1.2096e-05 gnorm: 1.11 [18:11:08< 6:18:09] +[titan] 2025-10-05 16:45:30,018 - root - INFO - step: 29710 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 16:45:30,019 - root - INFO - lr: 1.2090e-05 gnorm: 1.15 [18:11:19< 6:17:58] +[titan] 2025-10-05 16:45:40,886 - root - INFO - step: 29715 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:45:40,886 - root - INFO - lr: 1.2083e-05 gnorm: 1.09 [18:11:29< 6:17:47] +[titan] 2025-10-05 16:45:51,774 - root - INFO - step: 29720 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8207 +[titan] 2025-10-05 16:45:51,775 - root - INFO - lr: 1.2077e-05 gnorm: 1.13 [18:11:40< 6:17:36] +[titan] 2025-10-05 16:46:02,667 - root - INFO - step: 29725 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 16:46:02,667 - root - INFO - lr: 1.2070e-05 gnorm: 1.11 [18:11:51< 6:17:25] +[titan] 2025-10-05 16:46:13,605 - root - INFO - step: 29730 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 16:46:13,605 - root - INFO - lr: 1.2064e-05 gnorm: 1.10 [18:12:02< 6:17:14] +[titan] 2025-10-05 16:46:24,504 - root - INFO - step: 29735 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 16:46:24,505 - root - INFO - lr: 1.2057e-05 gnorm: 1.14 [18:12:13< 6:17:03] +[titan] 2025-10-05 16:46:35,396 - root - INFO - step: 29740 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 16:46:35,397 - root - INFO - lr: 1.2051e-05 gnorm: 1.16 [18:12:24< 6:16:52] +[titan] 2025-10-05 16:46:46,263 - root - INFO - step: 29745 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:46:46,263 - root - INFO - lr: 1.2044e-05 gnorm: 1.14 [18:12:35< 6:16:41] +[titan] 2025-10-05 16:46:54,956 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:46:57,142 - root - INFO - step: 29750 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:46:57,142 - root - INFO - lr: 1.2038e-05 gnorm: 1.14 [18:12:46< 6:16:30] +[titan] 2025-10-05 16:47:08,011 - root - INFO - step: 29755 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 16:47:08,011 - root - INFO - lr: 1.2031e-05 gnorm: 1.14 [18:12:57< 6:16:18] +[titan] 2025-10-05 16:47:18,928 - root - INFO - step: 29760 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 16:47:18,929 - root - INFO - lr: 1.2025e-05 gnorm: 1.15 [18:13:07< 6:16:07] +[titan] 2025-10-05 16:47:29,805 - root - INFO - step: 29765 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 16:47:29,806 - root - INFO - lr: 1.2018e-05 gnorm: 1.11 [18:13:18< 6:15:56] +[titan] 2025-10-05 16:47:40,695 - root - INFO - step: 29770 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 16:47:40,695 - root - INFO - lr: 1.2012e-05 gnorm: 1.12 [18:13:29< 6:15:45] +[titan] 2025-10-05 16:47:51,568 - root - INFO - step: 29775 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 16:47:51,568 - root - INFO - lr: 1.2005e-05 gnorm: 1.13 [18:13:40< 6:15:34] +[titan] 2025-10-05 16:48:02,434 - root - INFO - step: 29780 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 16:48:02,434 - root - INFO - lr: 1.1999e-05 gnorm: 1.13 [18:13:51< 6:15:23] +[titan] 2025-10-05 16:48:13,326 - root - INFO - step: 29785 loss: 2.0923 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 16:48:13,326 - root - INFO - lr: 1.1992e-05 gnorm: 1.17 [18:14:02< 6:15:12] +[titan] 2025-10-05 16:48:24,246 - root - INFO - step: 29790 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 16:48:24,246 - root - INFO - lr: 1.1986e-05 gnorm: 1.21 [18:14:13< 6:15:01] +[titan] 2025-10-05 16:48:35,115 - root - INFO - step: 29795 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7438 +[titan] 2025-10-05 16:48:35,115 - root - INFO - lr: 1.1979e-05 gnorm: 1.16 [18:14:24< 6:14:50] +[titan] 2025-10-05 16:48:43,808 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:48:45,984 - root - INFO - step: 29800 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:48:45,984 - root - INFO - lr: 1.1973e-05 gnorm: 1.17 [18:14:35< 6:14:39] +[titan] 2025-10-05 16:48:56,850 - root - INFO - step: 29805 loss: 2.0467 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 16:48:56,850 - root - INFO - lr: 1.1966e-05 gnorm: 1.13 [18:14:45< 6:14:28] +[titan] 2025-10-05 16:49:07,720 - root - INFO - step: 29810 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 16:49:07,720 - root - INFO - lr: 1.1960e-05 gnorm: 1.14 [18:14:56< 6:14:17] +[titan] 2025-10-05 16:49:18,594 - root - INFO - step: 29815 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 16:49:18,594 - root - INFO - lr: 1.1954e-05 gnorm: 1.11 [18:15:07< 6:14:06] +[titan] 2025-10-05 16:49:29,475 - root - INFO - step: 29820 loss: 2.0086 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7751 +[titan] 2025-10-05 16:49:29,475 - root - INFO - lr: 1.1947e-05 gnorm: 1.16 [18:15:18< 6:13:55] +[titan] 2025-10-05 16:49:40,387 - root - INFO - step: 29825 loss: 1.9867 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7563 +[titan] 2025-10-05 16:49:40,388 - root - INFO - lr: 1.1941e-05 gnorm: 1.10 [18:15:29< 6:13:44] +[titan] 2025-10-05 16:49:51,279 - root - INFO - step: 29830 loss: 1.9675 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 16:49:51,279 - root - INFO - lr: 1.1934e-05 gnorm: 1.09 [18:15:40< 6:13:32] +[titan] 2025-10-05 16:50:02,138 - root - INFO - step: 29835 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7622 +[titan] 2025-10-05 16:50:02,138 - root - INFO - lr: 1.1928e-05 gnorm: 1.12 [18:15:51< 6:13:21] +[titan] 2025-10-05 16:50:13,006 - root - INFO - step: 29840 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 16:50:13,006 - root - INFO - lr: 1.1921e-05 gnorm: 1.13 [18:16:02< 6:13:10] +[titan] 2025-10-05 16:50:23,932 - root - INFO - step: 29845 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6997 +[titan] 2025-10-05 16:50:23,933 - root - INFO - lr: 1.1915e-05 gnorm: 1.10 [18:16:12< 6:12:59] +[titan] 2025-10-05 16:50:32,610 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:50:34,782 - root - INFO - step: 29850 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8188 +[titan] 2025-10-05 16:50:34,782 - root - INFO - lr: 1.1908e-05 gnorm: 1.18 [18:16:23< 6:12:48] +[titan] 2025-10-05 16:50:45,679 - root - INFO - step: 29855 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7933 +[titan] 2025-10-05 16:50:45,680 - root - INFO - lr: 1.1902e-05 gnorm: 1.17 [18:16:34< 6:12:37] +[titan] 2025-10-05 16:50:56,541 - root - INFO - step: 29860 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:50:56,541 - root - INFO - lr: 1.1896e-05 gnorm: 1.11 [18:16:45< 6:12:26] +[titan] 2025-10-05 16:51:07,402 - root - INFO - step: 29865 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 16:51:07,402 - root - INFO - lr: 1.1889e-05 gnorm: 1.18 [18:16:56< 6:12:15] +[titan] 2025-10-05 16:51:18,320 - root - INFO - step: 29870 loss: 1.9395 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 16:51:18,320 - root - INFO - lr: 1.1883e-05 gnorm: 1.13 [18:17:07< 6:12:04] +[titan] 2025-10-05 16:51:29,178 - root - INFO - step: 29875 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 16:51:29,178 - root - INFO - lr: 1.1876e-05 gnorm: 1.13 [18:17:18< 6:11:53] +[titan] 2025-10-05 16:51:40,033 - root - INFO - step: 29880 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 16:51:40,033 - root - INFO - lr: 1.1870e-05 gnorm: 1.12 [18:17:29< 6:11:42] +[titan] 2025-10-05 16:51:50,881 - root - INFO - step: 29885 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 16:51:50,881 - root - INFO - lr: 1.1863e-05 gnorm: 1.10 [18:17:39< 6:11:31] +[titan] 2025-10-05 16:52:01,762 - root - INFO - step: 29890 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 16:52:01,762 - root - INFO - lr: 1.1857e-05 gnorm: 1.15 [18:17:50< 6:11:20] +[titan] 2025-10-05 16:52:12,608 - root - INFO - step: 29895 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 16:52:12,608 - root - INFO - lr: 1.1851e-05 gnorm: 1.13 [18:18:01< 6:11:09] +[titan] 2025-10-05 16:52:21,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:52:23,480 - root - INFO - step: 29900 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 16:52:23,481 - root - INFO - lr: 1.1844e-05 gnorm: 1.13 [18:18:12< 6:10:58] +[titan] 2025-10-05 16:52:34,301 - root - INFO - step: 29905 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 16:52:34,301 - root - INFO - lr: 1.1838e-05 gnorm: 1.15 [18:18:23< 6:10:46] +[titan] 2025-10-05 16:52:45,148 - root - INFO - step: 29910 loss: 1.9512 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 16:52:45,148 - root - INFO - lr: 1.1831e-05 gnorm: 1.11 [18:18:34< 6:10:35] +[titan] 2025-10-05 16:52:55,998 - root - INFO - step: 29915 loss: 2.0610 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8211 +[titan] 2025-10-05 16:52:55,998 - root - INFO - lr: 1.1825e-05 gnorm: 1.13 [18:18:45< 6:10:24] +[titan] 2025-10-05 16:53:06,867 - root - INFO - step: 29920 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 16:53:06,867 - root - INFO - lr: 1.1819e-05 gnorm: 1.12 [18:18:55< 6:10:13] +[titan] 2025-10-05 16:53:17,736 - root - INFO - step: 29925 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 16:53:17,736 - root - INFO - lr: 1.1812e-05 gnorm: 1.12 [18:19:06< 6:10:02] +[titan] 2025-10-05 16:53:28,570 - root - INFO - step: 29930 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7831 +[titan] 2025-10-05 16:53:28,570 - root - INFO - lr: 1.1806e-05 gnorm: 1.12 [18:19:17< 6:09:51] +[titan] 2025-10-05 16:53:39,418 - root - INFO - step: 29935 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 16:53:39,418 - root - INFO - lr: 1.1799e-05 gnorm: 1.25 [18:19:28< 6:09:40] +[titan] 2025-10-05 16:53:50,272 - root - INFO - step: 29940 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 16:53:50,272 - root - INFO - lr: 1.1793e-05 gnorm: 1.12 [18:19:39< 6:09:29] +[titan] 2025-10-05 16:54:01,117 - root - INFO - step: 29945 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:54:01,117 - root - INFO - lr: 1.1787e-05 gnorm: 1.14 [18:19:50< 6:09:18] +[titan] 2025-10-05 16:54:09,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:54:12,029 - root - INFO - step: 29950 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 16:54:12,029 - root - INFO - lr: 1.1780e-05 gnorm: 1.18 [18:20:01< 6:09:07] +[titan] 2025-10-05 16:54:22,840 - root - INFO - step: 29955 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 16:54:22,840 - root - INFO - lr: 1.1774e-05 gnorm: 1.14 [18:20:11< 6:08:56] +[titan] 2025-10-05 16:54:33,694 - root - INFO - step: 29960 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 16:54:33,694 - root - INFO - lr: 1.1767e-05 gnorm: 1.14 [18:20:22< 6:08:45] +[titan] 2025-10-05 16:54:44,540 - root - INFO - step: 29965 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 16:54:44,540 - root - INFO - lr: 1.1761e-05 gnorm: 1.14 [18:20:33< 6:08:34] +[titan] 2025-10-05 16:54:55,380 - root - INFO - step: 29970 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 16:54:55,380 - root - INFO - lr: 1.1755e-05 gnorm: 1.13 [18:20:44< 6:08:22] +[titan] 2025-10-05 16:55:06,200 - root - INFO - step: 29975 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7975 +[titan] 2025-10-05 16:55:06,200 - root - INFO - lr: 1.1748e-05 gnorm: 1.16 [18:20:55< 6:08:11] +[titan] 2025-10-05 16:55:17,035 - root - INFO - step: 29980 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 16:55:17,035 - root - INFO - lr: 1.1742e-05 gnorm: 1.16 [18:21:06< 6:08:00] +[titan] 2025-10-05 16:55:27,861 - root - INFO - step: 29985 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 16:55:27,861 - root - INFO - lr: 1.1736e-05 gnorm: 1.11 [18:21:16< 6:07:49] +[titan] 2025-10-05 16:55:38,685 - root - INFO - step: 29990 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 16:55:38,685 - root - INFO - lr: 1.1729e-05 gnorm: 1.08 [18:21:27< 6:07:38] +[titan] 2025-10-05 16:55:49,531 - root - INFO - step: 29995 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 16:55:49,531 - root - INFO - lr: 1.1723e-05 gnorm: 1.11 [18:21:38< 6:07:27] +[titan] 2025-10-05 16:55:58,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:56:00,346 - root - INFO - step: 30000 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 16:56:00,346 - root - INFO - lr: 1.1716e-05 gnorm: 1.14 [18:21:49< 6:07:16] +[titan] 2025-10-05 16:56:00,346 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 16:56:17,595 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 16:56:17,595 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.25 seconds. +[titan] 2025-10-05 16:58:26,179 - root - INFO - step: 30005 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 2,247 tflops: 31.17 mfu: 3.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 16:58:26,179 - root - INFO - lr: 1.1710e-05 gnorm: 1.15 [18:24:15< 6:07:50] +[titan] 2025-10-05 16:58:36,943 - root - INFO - step: 30010 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,443 tflops: 422.35 mfu: 42.70% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 16:58:36,943 - root - INFO - lr: 1.1704e-05 gnorm: 1.13 [18:24:25< 6:07:39] +[titan] 2025-10-05 16:58:47,757 - root - INFO - step: 30015 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7859 +[titan] 2025-10-05 16:58:47,757 - root - INFO - lr: 1.1697e-05 gnorm: 1.19 [18:24:36< 6:07:28] +[titan] 2025-10-05 16:58:58,551 - root - INFO - step: 30020 loss: 2.0398 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.19 mfu: 42.59% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 16:58:58,551 - root - INFO - lr: 1.1691e-05 gnorm: 1.16 [18:24:47< 6:07:16] +[titan] 2025-10-05 16:59:09,338 - root - INFO - step: 30025 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,378 tflops: 421.45 mfu: 42.61% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7804 +[titan] 2025-10-05 16:59:09,338 - root - INFO - lr: 1.1685e-05 gnorm: 1.17 [18:24:58< 6:07:05] +[titan] 2025-10-05 16:59:20,123 - root - INFO - step: 30030 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,384 tflops: 421.53 mfu: 42.62% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 16:59:20,123 - root - INFO - lr: 1.1678e-05 gnorm: 1.14 [18:25:09< 6:06:54] +[titan] 2025-10-05 16:59:30,956 - root - INFO - step: 30035 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 16:59:30,956 - root - INFO - lr: 1.1672e-05 gnorm: 1.17 [18:25:19< 6:06:43] +[titan] 2025-10-05 16:59:41,784 - root - INFO - step: 30040 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7750 +[titan] 2025-10-05 16:59:41,784 - root - INFO - lr: 1.1666e-05 gnorm: 1.10 [18:25:30< 6:06:32] +[titan] 2025-10-05 16:59:52,578 - root - INFO - step: 30045 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.18 mfu: 42.59% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 16:59:52,578 - root - INFO - lr: 1.1659e-05 gnorm: 1.20 [18:25:41< 6:06:21] +[titan] 2025-10-05 17:00:01,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:00:03,430 - root - INFO - step: 30050 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:00:03,431 - root - INFO - lr: 1.1653e-05 gnorm: 1.13 [18:25:52< 6:06:10] +[titan] 2025-10-05 17:00:14,272 - root - INFO - step: 30055 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:00:14,273 - root - INFO - lr: 1.1647e-05 gnorm: 1.14 [18:26:03< 6:05:59] +[titan] 2025-10-05 17:00:25,096 - root - INFO - step: 30060 loss: 2.0424 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 17:00:25,096 - root - INFO - lr: 1.1640e-05 gnorm: 1.13 [18:26:14< 6:05:48] +[titan] 2025-10-05 17:00:35,911 - root - INFO - step: 30065 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.37 mfu: 42.50% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 17:00:35,911 - root - INFO - lr: 1.1634e-05 gnorm: 1.13 [18:26:24< 6:05:36] +[titan] 2025-10-05 17:00:46,749 - root - INFO - step: 30070 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:00:46,749 - root - INFO - lr: 1.1628e-05 gnorm: 1.12 [18:26:35< 6:05:25] +[titan] 2025-10-05 17:00:57,558 - root - INFO - step: 30075 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 17:00:57,558 - root - INFO - lr: 1.1621e-05 gnorm: 1.11 [18:26:46< 6:05:14] +[titan] 2025-10-05 17:01:08,392 - root - INFO - step: 30080 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7726 +[titan] 2025-10-05 17:01:08,392 - root - INFO - lr: 1.1615e-05 gnorm: 1.15 [18:26:57< 6:05:03] +[titan] 2025-10-05 17:01:19,229 - root - INFO - step: 30085 loss: 2.0397 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 17:01:19,229 - root - INFO - lr: 1.1609e-05 gnorm: 1.15 [18:27:08< 6:04:52] +[titan] 2025-10-05 17:01:30,104 - root - INFO - step: 30090 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:01:30,104 - root - INFO - lr: 1.1602e-05 gnorm: 1.11 [18:27:19< 6:04:41] +[titan] 2025-10-05 17:01:40,932 - root - INFO - step: 30095 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 17:01:40,932 - root - INFO - lr: 1.1596e-05 gnorm: 1.14 [18:27:29< 6:04:30] +[titan] 2025-10-05 17:01:49,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:01:51,740 - root - INFO - step: 30100 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:01:51,740 - root - INFO - lr: 1.1590e-05 gnorm: 1.12 [18:27:40< 6:04:19] +[titan] 2025-10-05 17:02:02,591 - root - INFO - step: 30105 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7463 +[titan] 2025-10-05 17:02:02,591 - root - INFO - lr: 1.1583e-05 gnorm: 1.13 [18:27:51< 6:04:08] +[titan] 2025-10-05 17:02:13,423 - root - INFO - step: 30110 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 17:02:13,423 - root - INFO - lr: 1.1577e-05 gnorm: 1.16 [18:28:02< 6:03:56] +[titan] 2025-10-05 17:02:24,227 - root - INFO - step: 30115 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.78 mfu: 42.55% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 17:02:24,227 - root - INFO - lr: 1.1571e-05 gnorm: 1.12 [18:28:13< 6:03:45] +[titan] 2025-10-05 17:02:35,077 - root - INFO - step: 30120 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 17:02:35,077 - root - INFO - lr: 1.1565e-05 gnorm: 1.14 [18:28:24< 6:03:34] +[titan] 2025-10-05 17:02:45,895 - root - INFO - step: 30125 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 17:02:45,895 - root - INFO - lr: 1.1558e-05 gnorm: 1.13 [18:28:34< 6:03:23] +[titan] 2025-10-05 17:02:56,710 - root - INFO - step: 30130 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 17:02:56,710 - root - INFO - lr: 1.1552e-05 gnorm: 1.13 [18:28:45< 6:03:12] +[titan] 2025-10-05 17:03:07,565 - root - INFO - step: 30135 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8142 +[titan] 2025-10-05 17:03:07,565 - root - INFO - lr: 1.1546e-05 gnorm: 1.11 [18:28:56< 6:03:01] +[titan] 2025-10-05 17:03:18,382 - root - INFO - step: 30140 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 17:03:18,382 - root - INFO - lr: 1.1539e-05 gnorm: 1.21 [18:29:07< 6:02:50] +[titan] 2025-10-05 17:03:29,277 - root - INFO - step: 30145 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 17:03:29,277 - root - INFO - lr: 1.1533e-05 gnorm: 1.14 [18:29:18< 6:02:39] +[titan] 2025-10-05 17:03:37,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:03:40,104 - root - INFO - step: 30150 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 17:03:40,104 - root - INFO - lr: 1.1527e-05 gnorm: 1.13 [18:29:29< 6:02:28] +[titan] 2025-10-05 17:03:50,940 - root - INFO - step: 30155 loss: 2.0613 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:03:50,940 - root - INFO - lr: 1.1521e-05 gnorm: 1.15 [18:29:39< 6:02:17] +[titan] 2025-10-05 17:04:01,762 - root - INFO - step: 30160 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 17:04:01,762 - root - INFO - lr: 1.1514e-05 gnorm: 1.14 [18:29:50< 6:02:05] +[titan] 2025-10-05 17:04:12,567 - root - INFO - step: 30165 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.75 mfu: 42.54% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 17:04:12,567 - root - INFO - lr: 1.1508e-05 gnorm: 1.12 [18:30:01< 6:01:54] +[titan] 2025-10-05 17:04:23,420 - root - INFO - step: 30170 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:04:23,420 - root - INFO - lr: 1.1502e-05 gnorm: 1.12 [18:30:12< 6:01:43] +[titan] 2025-10-05 17:04:34,282 - root - INFO - step: 30175 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 17:04:34,282 - root - INFO - lr: 1.1495e-05 gnorm: 1.12 [18:30:23< 6:01:32] +[titan] 2025-10-05 17:04:45,111 - root - INFO - step: 30180 loss: 1.9784 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:04:45,111 - root - INFO - lr: 1.1489e-05 gnorm: 1.16 [18:30:34< 6:01:21] +[titan] 2025-10-05 17:04:55,961 - root - INFO - step: 30185 loss: 2.0025 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:04:55,962 - root - INFO - lr: 1.1483e-05 gnorm: 1.13 [18:30:44< 6:01:10] +[titan] 2025-10-05 17:05:06,781 - root - INFO - step: 30190 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.18 mfu: 42.48% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 17:05:06,781 - root - INFO - lr: 1.1477e-05 gnorm: 1.16 [18:30:55< 6:00:59] +[titan] 2025-10-05 17:05:17,581 - root - INFO - step: 30195 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:05:17,581 - root - INFO - lr: 1.1470e-05 gnorm: 1.16 [18:31:06< 6:00:48] +[titan] 2025-10-05 17:05:26,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:05:28,480 - root - INFO - step: 30200 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 17:05:28,480 - root - INFO - lr: 1.1464e-05 gnorm: 1.13 [18:31:17< 6:00:37] +[titan] 2025-10-05 17:05:39,462 - root - INFO - step: 30205 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 29,840 tflops: 413.98 mfu: 41.86% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 17:05:39,462 - root - INFO - lr: 1.1458e-05 gnorm: 1.11 [18:31:28< 6:00:26] +[titan] 2025-10-05 17:05:46,167 - root - INFO - Dumping profiler traces at step 30208 +[titan] 2025-10-05 17:05:46,206 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:05:50,671 - root - INFO - step: 30210 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 29,233 tflops: 405.56 mfu: 41.01% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7633 +[titan] 2025-10-05 17:05:50,672 - root - INFO - lr: 1.1452e-05 gnorm: 1.14 [18:31:39< 6:00:15] +[titan] 2025-10-05 17:06:01,511 - root - INFO - step: 30215 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 17:06:01,511 - root - INFO - lr: 1.1445e-05 gnorm: 1.17 [18:31:50< 6:00:03] +[titan] 2025-10-05 17:06:12,360 - root - INFO - step: 30220 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7378 +[titan] 2025-10-05 17:06:12,360 - root - INFO - lr: 1.1439e-05 gnorm: 1.11 [18:32:01< 5:59:52] +[titan] 2025-10-05 17:06:23,184 - root - INFO - step: 30225 loss: 2.0049 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 17:06:23,184 - root - INFO - lr: 1.1433e-05 gnorm: 1.13 [18:32:12< 5:59:41] +[titan] 2025-10-05 17:06:34,073 - root - INFO - step: 30230 loss: 1.9745 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 17:06:34,073 - root - INFO - lr: 1.1427e-05 gnorm: 1.15 [18:32:23< 5:59:30] +[titan] 2025-10-05 17:06:44,900 - root - INFO - step: 30235 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7525 +[titan] 2025-10-05 17:06:44,900 - root - INFO - lr: 1.1420e-05 gnorm: 1.11 [18:32:33< 5:59:19] +[titan] 2025-10-05 17:06:55,740 - root - INFO - step: 30240 loss: 1.9188 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 17:06:55,740 - root - INFO - lr: 1.1414e-05 gnorm: 1.16 [18:32:44< 5:59:08] +[titan] 2025-10-05 17:07:06,541 - root - INFO - step: 30245 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:07:06,541 - root - INFO - lr: 1.1408e-05 gnorm: 1.13 [18:32:55< 5:58:57] +[titan] 2025-10-05 17:07:15,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:07:17,391 - root - INFO - step: 30250 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 17:07:17,392 - root - INFO - lr: 1.1402e-05 gnorm: 1.17 [18:33:06< 5:58:46] +[titan] 2025-10-05 17:07:28,241 - root - INFO - step: 30255 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 17:07:28,241 - root - INFO - lr: 1.1395e-05 gnorm: 1.18 [18:33:17< 5:58:35] +[titan] 2025-10-05 17:07:39,102 - root - INFO - step: 30260 loss: 2.0013 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 17:07:39,103 - root - INFO - lr: 1.1389e-05 gnorm: 1.12 [18:33:28< 5:58:23] +[titan] 2025-10-05 17:07:49,999 - root - INFO - step: 30265 loss: 1.9338 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 17:07:50,000 - root - INFO - lr: 1.1383e-05 gnorm: 1.16 [18:33:39< 5:58:12] +[titan] 2025-10-05 17:08:00,848 - root - INFO - step: 30270 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 17:08:00,848 - root - INFO - lr: 1.1377e-05 gnorm: 1.17 [18:33:49< 5:58:01] +[titan] 2025-10-05 17:08:11,692 - root - INFO - step: 30275 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 17:08:11,692 - root - INFO - lr: 1.1370e-05 gnorm: 1.17 [18:34:00< 5:57:50] +[titan] 2025-10-05 17:08:22,552 - root - INFO - step: 30280 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:08:22,552 - root - INFO - lr: 1.1364e-05 gnorm: 1.18 [18:34:11< 5:57:39] +[titan] 2025-10-05 17:08:33,450 - root - INFO - step: 30285 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 17:08:33,450 - root - INFO - lr: 1.1358e-05 gnorm: 1.11 [18:34:22< 5:57:28] +[titan] 2025-10-05 17:08:44,280 - root - INFO - step: 30290 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 17:08:44,280 - root - INFO - lr: 1.1352e-05 gnorm: 1.13 [18:34:33< 5:57:17] +[titan] 2025-10-05 17:08:55,139 - root - INFO - step: 30295 loss: 2.0245 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 17:08:55,139 - root - INFO - lr: 1.1346e-05 gnorm: 1.13 [18:34:44< 5:57:06] +[titan] 2025-10-05 17:09:03,787 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:09:05,964 - root - INFO - step: 30300 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 17:09:05,964 - root - INFO - lr: 1.1339e-05 gnorm: 1.17 [18:34:54< 5:56:55] +[titan] 2025-10-05 17:09:16,818 - root - INFO - step: 30305 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 17:09:16,818 - root - INFO - lr: 1.1333e-05 gnorm: 1.16 [18:35:05< 5:56:44] +[titan] 2025-10-05 17:09:27,662 - root - INFO - step: 30310 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 17:09:27,662 - root - INFO - lr: 1.1327e-05 gnorm: 1.15 [18:35:16< 5:56:33] +[titan] 2025-10-05 17:09:38,520 - root - INFO - step: 30315 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 17:09:38,520 - root - INFO - lr: 1.1321e-05 gnorm: 1.14 [18:35:27< 5:56:21] +[titan] 2025-10-05 17:09:49,395 - root - INFO - step: 30320 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 17:09:49,395 - root - INFO - lr: 1.1315e-05 gnorm: 1.14 [18:35:38< 5:56:10] +[titan] 2025-10-05 17:10:00,277 - root - INFO - step: 30325 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 17:10:00,277 - root - INFO - lr: 1.1308e-05 gnorm: 1.15 [18:35:49< 5:55:59] +[titan] 2025-10-05 17:10:11,173 - root - INFO - step: 30330 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:10:11,174 - root - INFO - lr: 1.1302e-05 gnorm: 1.15 [18:36:00< 5:55:48] +[titan] 2025-10-05 17:10:22,000 - root - INFO - step: 30335 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 17:10:22,000 - root - INFO - lr: 1.1296e-05 gnorm: 1.18 [18:36:11< 5:55:37] +[titan] 2025-10-05 17:10:32,877 - root - INFO - step: 30340 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 17:10:32,877 - root - INFO - lr: 1.1290e-05 gnorm: 1.13 [18:36:21< 5:55:26] +[titan] 2025-10-05 17:10:43,769 - root - INFO - step: 30345 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:10:43,769 - root - INFO - lr: 1.1284e-05 gnorm: 1.15 [18:36:32< 5:55:15] +[titan] 2025-10-05 17:10:52,407 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:10:54,603 - root - INFO - step: 30350 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 17:10:54,603 - root - INFO - lr: 1.1277e-05 gnorm: 1.15 [18:36:43< 5:55:04] +[titan] 2025-10-05 17:11:05,438 - root - INFO - step: 30355 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:11:05,438 - root - INFO - lr: 1.1271e-05 gnorm: 1.14 [18:36:54< 5:54:53] +[titan] 2025-10-05 17:11:16,300 - root - INFO - step: 30360 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 17:11:16,300 - root - INFO - lr: 1.1265e-05 gnorm: 1.11 [18:37:05< 5:54:42] +[titan] 2025-10-05 17:11:27,159 - root - INFO - step: 30365 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:11:27,159 - root - INFO - lr: 1.1259e-05 gnorm: 1.11 [18:37:16< 5:54:30] +[titan] 2025-10-05 17:11:38,071 - root - INFO - step: 30370 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 17:11:38,071 - root - INFO - lr: 1.1253e-05 gnorm: 1.18 [18:37:27< 5:54:19] +[titan] 2025-10-05 17:11:48,937 - root - INFO - step: 30375 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:11:48,937 - root - INFO - lr: 1.1247e-05 gnorm: 1.15 [18:37:37< 5:54:08] +[titan] 2025-10-05 17:11:59,780 - root - INFO - step: 30380 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 17:11:59,780 - root - INFO - lr: 1.1240e-05 gnorm: 1.13 [18:37:48< 5:53:57] +[titan] 2025-10-05 17:12:10,619 - root - INFO - step: 30385 loss: 1.9947 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 17:12:10,620 - root - INFO - lr: 1.1234e-05 gnorm: 1.15 [18:37:59< 5:53:46] +[titan] 2025-10-05 17:12:21,479 - root - INFO - step: 30390 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 17:12:21,479 - root - INFO - lr: 1.1228e-05 gnorm: 1.11 [18:38:10< 5:53:35] +[titan] 2025-10-05 17:12:32,330 - root - INFO - step: 30395 loss: 1.9584 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:12:32,330 - root - INFO - lr: 1.1222e-05 gnorm: 1.12 [18:38:21< 5:53:24] +[titan] 2025-10-05 17:12:41,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:12:43,230 - root - INFO - step: 30400 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 17:12:43,230 - root - INFO - lr: 1.1216e-05 gnorm: 1.16 [18:38:32< 5:53:13] +[titan] 2025-10-05 17:12:54,073 - root - INFO - step: 30405 loss: 1.9890 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 17:12:54,073 - root - INFO - lr: 1.1210e-05 gnorm: 1.19 [18:38:43< 5:53:02] +[titan] 2025-10-05 17:13:04,941 - root - INFO - step: 30410 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 17:13:04,942 - root - INFO - lr: 1.1203e-05 gnorm: 1.16 [18:38:53< 5:52:51] +[titan] 2025-10-05 17:13:15,791 - root - INFO - step: 30415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 17:13:15,791 - root - INFO - lr: 1.1197e-05 gnorm: 1.18 [18:39:04< 5:52:40] +[titan] 2025-10-05 17:13:26,642 - root - INFO - step: 30420 loss: 2.0087 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 17:13:26,642 - root - INFO - lr: 1.1191e-05 gnorm: 1.13 [18:39:15< 5:52:28] +[titan] 2025-10-05 17:13:37,590 - root - INFO - step: 30425 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.26 mfu: 41.99% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 17:13:37,590 - root - INFO - lr: 1.1185e-05 gnorm: 1.13 [18:39:26< 5:52:17] +[titan] 2025-10-05 17:13:48,481 - root - INFO - step: 30430 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7083 +[titan] 2025-10-05 17:13:48,481 - root - INFO - lr: 1.1179e-05 gnorm: 1.22 [18:39:37< 5:52:06] +[titan] 2025-10-05 17:13:59,341 - root - INFO - step: 30435 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 17:13:59,341 - root - INFO - lr: 1.1173e-05 gnorm: 1.10 [18:39:48< 5:51:55] +[titan] 2025-10-05 17:14:10,199 - root - INFO - step: 30440 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 17:14:10,199 - root - INFO - lr: 1.1166e-05 gnorm: 1.15 [18:39:59< 5:51:44] +[titan] 2025-10-05 17:14:21,050 - root - INFO - step: 30445 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:14:21,050 - root - INFO - lr: 1.1160e-05 gnorm: 1.17 [18:40:10< 5:51:33] +[titan] 2025-10-05 17:14:29,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:14:31,915 - root - INFO - step: 30450 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:14:31,915 - root - INFO - lr: 1.1154e-05 gnorm: 1.13 [18:40:20< 5:51:22] +[titan] 2025-10-05 17:14:42,853 - root - INFO - step: 30455 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 17:14:42,853 - root - INFO - lr: 1.1148e-05 gnorm: 1.15 [18:40:31< 5:51:11] +[titan] 2025-10-05 17:14:53,689 - root - INFO - step: 30460 loss: 1.9279 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:14:53,689 - root - INFO - lr: 1.1142e-05 gnorm: 1.16 [18:40:42< 5:51:00] +[titan] 2025-10-05 17:15:04,539 - root - INFO - step: 30465 loss: 1.9730 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7446 +[titan] 2025-10-05 17:15:04,539 - root - INFO - lr: 1.1136e-05 gnorm: 1.13 [18:40:53< 5:50:49] +[titan] 2025-10-05 17:15:15,418 - root - INFO - step: 30470 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 17:15:15,418 - root - INFO - lr: 1.1130e-05 gnorm: 1.20 [18:41:04< 5:50:38] +[titan] 2025-10-05 17:15:26,296 - root - INFO - step: 30475 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 17:15:26,297 - root - INFO - lr: 1.1124e-05 gnorm: 1.13 [18:41:15< 5:50:26] +[titan] 2025-10-05 17:15:37,128 - root - INFO - step: 30480 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 17:15:37,129 - root - INFO - lr: 1.1117e-05 gnorm: 1.16 [18:41:26< 5:50:15] +[titan] 2025-10-05 17:15:48,020 - root - INFO - step: 30485 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 17:15:48,020 - root - INFO - lr: 1.1111e-05 gnorm: 1.16 [18:41:37< 5:50:04] +[titan] 2025-10-05 17:15:58,881 - root - INFO - step: 30490 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 17:15:58,881 - root - INFO - lr: 1.1105e-05 gnorm: 1.13 [18:41:47< 5:49:53] +[titan] 2025-10-05 17:16:09,738 - root - INFO - step: 30495 loss: 2.0163 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7828 +[titan] 2025-10-05 17:16:09,738 - root - INFO - lr: 1.1099e-05 gnorm: 1.13 [18:41:58< 5:49:42] +[titan] 2025-10-05 17:16:18,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:16:20,594 - root - INFO - step: 30500 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:16:20,594 - root - INFO - lr: 1.1093e-05 gnorm: 1.15 [18:42:09< 5:49:31] +[titan] 2025-10-05 17:16:31,472 - root - INFO - step: 30505 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7582 +[titan] 2025-10-05 17:16:31,472 - root - INFO - lr: 1.1087e-05 gnorm: 1.19 [18:42:20< 5:49:20] +[titan] 2025-10-05 17:16:42,399 - root - INFO - step: 30510 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:16:42,399 - root - INFO - lr: 1.1081e-05 gnorm: 1.14 [18:42:31< 5:49:09] +[titan] 2025-10-05 17:16:53,259 - root - INFO - step: 30515 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 17:16:53,259 - root - INFO - lr: 1.1075e-05 gnorm: 1.15 [18:42:42< 5:48:58] +[titan] 2025-10-05 17:17:04,140 - root - INFO - step: 30520 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 17:17:04,140 - root - INFO - lr: 1.1069e-05 gnorm: 1.13 [18:42:53< 5:48:47] +[titan] 2025-10-05 17:17:14,989 - root - INFO - step: 30525 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 17:17:14,989 - root - INFO - lr: 1.1063e-05 gnorm: 1.36 [18:43:03< 5:48:36] +[titan] 2025-10-05 17:17:25,901 - root - INFO - step: 30530 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:17:25,902 - root - INFO - lr: 1.1056e-05 gnorm: 1.14 [18:43:14< 5:48:24] +[titan] 2025-10-05 17:17:36,768 - root - INFO - step: 30535 loss: 2.0575 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8191 +[titan] 2025-10-05 17:17:36,768 - root - INFO - lr: 1.1050e-05 gnorm: 1.17 [18:43:25< 5:48:13] +[titan] 2025-10-05 17:17:47,700 - root - INFO - step: 30540 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 17:17:47,700 - root - INFO - lr: 1.1044e-05 gnorm: 1.12 [18:43:36< 5:48:02] +[titan] 2025-10-05 17:17:58,569 - root - INFO - step: 30545 loss: 1.9982 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 17:17:58,569 - root - INFO - lr: 1.1038e-05 gnorm: 1.13 [18:43:47< 5:47:51] +[titan] 2025-10-05 17:18:07,246 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:18:09,461 - root - INFO - step: 30550 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7963 +[titan] 2025-10-05 17:18:09,461 - root - INFO - lr: 1.1032e-05 gnorm: 1.15 [18:43:58< 5:47:40] +[titan] 2025-10-05 17:18:20,334 - root - INFO - step: 30555 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 17:18:20,335 - root - INFO - lr: 1.1026e-05 gnorm: 1.13 [18:44:09< 5:47:29] +[titan] 2025-10-05 17:18:31,222 - root - INFO - step: 30560 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:18:31,222 - root - INFO - lr: 1.1020e-05 gnorm: 1.16 [18:44:20< 5:47:18] +[titan] 2025-10-05 17:18:42,115 - root - INFO - step: 30565 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 17:18:42,116 - root - INFO - lr: 1.1014e-05 gnorm: 1.18 [18:44:31< 5:47:07] +[titan] 2025-10-05 17:18:52,976 - root - INFO - step: 30570 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7939 +[titan] 2025-10-05 17:18:52,977 - root - INFO - lr: 1.1008e-05 gnorm: 1.15 [18:44:41< 5:46:56] +[titan] 2025-10-05 17:19:03,822 - root - INFO - step: 30575 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 17:19:03,822 - root - INFO - lr: 1.1002e-05 gnorm: 1.13 [18:44:52< 5:46:45] +[titan] 2025-10-05 17:19:14,680 - root - INFO - step: 30580 loss: 1.9714 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 17:19:14,680 - root - INFO - lr: 1.0996e-05 gnorm: 1.15 [18:45:03< 5:46:34] +[titan] 2025-10-05 17:19:25,560 - root - INFO - step: 30585 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 17:19:25,560 - root - INFO - lr: 1.0990e-05 gnorm: 1.12 [18:45:14< 5:46:23] +[titan] 2025-10-05 17:19:36,432 - root - INFO - step: 30590 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7819 +[titan] 2025-10-05 17:19:36,432 - root - INFO - lr: 1.0984e-05 gnorm: 1.18 [18:45:25< 5:46:11] +[titan] 2025-10-05 17:19:47,343 - root - INFO - step: 30595 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 17:19:47,343 - root - INFO - lr: 1.0977e-05 gnorm: 1.12 [18:45:36< 5:46:00] +[titan] 2025-10-05 17:19:56,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:19:58,231 - root - INFO - step: 30600 loss: 2.0557 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 17:19:58,231 - root - INFO - lr: 1.0971e-05 gnorm: 1.17 [18:45:47< 5:45:49] +[titan] 2025-10-05 17:20:09,100 - root - INFO - step: 30605 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:20:09,100 - root - INFO - lr: 1.0965e-05 gnorm: 1.15 [18:45:58< 5:45:38] +[titan] 2025-10-05 17:20:19,957 - root - INFO - step: 30610 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 17:20:19,957 - root - INFO - lr: 1.0959e-05 gnorm: 1.11 [18:46:08< 5:45:27] +[titan] 2025-10-05 17:20:30,886 - root - INFO - step: 30615 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 17:20:30,886 - root - INFO - lr: 1.0953e-05 gnorm: 1.14 [18:46:19< 5:45:16] +[titan] 2025-10-05 17:20:41,762 - root - INFO - step: 30620 loss: 1.9612 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:20:41,763 - root - INFO - lr: 1.0947e-05 gnorm: 1.19 [18:46:30< 5:45:05] +[titan] 2025-10-05 17:20:52,672 - root - INFO - step: 30625 loss: 1.9688 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.13% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7412 +[titan] 2025-10-05 17:20:52,672 - root - INFO - lr: 1.0941e-05 gnorm: 1.14 [18:46:41< 5:44:54] +[titan] 2025-10-05 17:21:03,551 - root - INFO - step: 30630 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 17:21:03,551 - root - INFO - lr: 1.0935e-05 gnorm: 1.13 [18:46:52< 5:44:43] +[titan] 2025-10-05 17:21:14,413 - root - INFO - step: 30635 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 17:21:14,413 - root - INFO - lr: 1.0929e-05 gnorm: 1.13 [18:47:03< 5:44:32] +[titan] 2025-10-05 17:21:25,276 - root - INFO - step: 30640 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 17:21:25,276 - root - INFO - lr: 1.0923e-05 gnorm: 1.18 [18:47:14< 5:44:21] +[titan] 2025-10-05 17:21:36,129 - root - INFO - step: 30645 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 17:21:36,129 - root - INFO - lr: 1.0917e-05 gnorm: 1.13 [18:47:25< 5:44:10] +[titan] 2025-10-05 17:21:44,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:21:47,049 - root - INFO - step: 30650 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:21:47,049 - root - INFO - lr: 1.0911e-05 gnorm: 1.12 [18:47:36< 5:43:58] +[titan] 2025-10-05 17:21:57,919 - root - INFO - step: 30655 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 17:21:57,919 - root - INFO - lr: 1.0905e-05 gnorm: 1.17 [18:47:46< 5:43:47] +[titan] 2025-10-05 17:22:08,772 - root - INFO - step: 30660 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:22:08,772 - root - INFO - lr: 1.0899e-05 gnorm: 1.14 [18:47:57< 5:43:36] +[titan] 2025-10-05 17:22:19,639 - root - INFO - step: 30665 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7649 +[titan] 2025-10-05 17:22:19,639 - root - INFO - lr: 1.0893e-05 gnorm: 1.17 [18:48:08< 5:43:25] +[titan] 2025-10-05 17:22:30,511 - root - INFO - step: 30670 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 17:22:30,511 - root - INFO - lr: 1.0887e-05 gnorm: 1.15 [18:48:19< 5:43:14] +[titan] 2025-10-05 17:22:41,385 - root - INFO - step: 30675 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 17:22:41,385 - root - INFO - lr: 1.0881e-05 gnorm: 1.13 [18:48:30< 5:43:03] +[titan] 2025-10-05 17:22:52,312 - root - INFO - step: 30680 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:22:52,312 - root - INFO - lr: 1.0875e-05 gnorm: 1.15 [18:48:41< 5:42:52] +[titan] 2025-10-05 17:23:03,165 - root - INFO - step: 30685 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 17:23:03,165 - root - INFO - lr: 1.0869e-05 gnorm: 1.13 [18:48:52< 5:42:41] +[titan] 2025-10-05 17:23:14,020 - root - INFO - step: 30690 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7825 +[titan] 2025-10-05 17:23:14,020 - root - INFO - lr: 1.0863e-05 gnorm: 1.14 [18:49:02< 5:42:30] +[titan] 2025-10-05 17:23:24,876 - root - INFO - step: 30695 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:23:24,876 - root - INFO - lr: 1.0857e-05 gnorm: 1.15 [18:49:13< 5:42:19] +[titan] 2025-10-05 17:23:33,557 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:23:35,744 - root - INFO - step: 30700 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:23:35,744 - root - INFO - lr: 1.0851e-05 gnorm: 1.12 [18:49:24< 5:42:08] +[titan] 2025-10-05 17:23:46,630 - root - INFO - step: 30705 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 17:23:46,630 - root - INFO - lr: 1.0845e-05 gnorm: 1.14 [18:49:35< 5:41:56] +[titan] 2025-10-05 17:23:57,506 - root - INFO - step: 30710 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:23:57,506 - root - INFO - lr: 1.0839e-05 gnorm: 1.17 [18:49:46< 5:41:45] +[titan] 2025-10-05 17:24:08,364 - root - INFO - step: 30715 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:24:08,364 - root - INFO - lr: 1.0833e-05 gnorm: 1.13 [18:49:57< 5:41:34] +[titan] 2025-10-05 17:24:19,332 - root - INFO - step: 30720 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 29,876 tflops: 414.48 mfu: 41.91% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7148 +[titan] 2025-10-05 17:24:19,332 - root - INFO - lr: 1.0827e-05 gnorm: 1.11 [18:50:08< 5:41:23] +[titan] 2025-10-05 17:24:19,514 - root - INFO - Dumping profiler traces at step 30720 +[titan] 2025-10-05 17:24:19,554 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:24:30,456 - root - INFO - step: 30725 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 29,458 tflops: 408.69 mfu: 41.32% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 17:24:30,456 - root - INFO - lr: 1.0821e-05 gnorm: 1.13 [18:50:19< 5:41:12] +[titan] 2025-10-05 17:24:41,338 - root - INFO - step: 30730 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 17:24:41,338 - root - INFO - lr: 1.0815e-05 gnorm: 1.11 [18:50:30< 5:41:01] +[titan] 2025-10-05 17:24:52,229 - root - INFO - step: 30735 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7376 +[titan] 2025-10-05 17:24:52,229 - root - INFO - lr: 1.0809e-05 gnorm: 1.15 [18:50:41< 5:40:50] +[titan] 2025-10-05 17:25:03,105 - root - INFO - step: 30740 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7807 +[titan] 2025-10-05 17:25:03,105 - root - INFO - lr: 1.0803e-05 gnorm: 1.23 [18:50:52< 5:40:39] +[titan] 2025-10-05 17:25:13,996 - root - INFO - step: 30745 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:25:13,996 - root - INFO - lr: 1.0797e-05 gnorm: 1.16 [18:51:02< 5:40:28] +[titan] 2025-10-05 17:25:22,693 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:25:24,892 - root - INFO - step: 30750 loss: 2.0403 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 17:25:24,892 - root - INFO - lr: 1.0791e-05 gnorm: 1.21 [18:51:13< 5:40:17] +[titan] 2025-10-05 17:25:35,755 - root - INFO - step: 30755 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 17:25:35,755 - root - INFO - lr: 1.0785e-05 gnorm: 1.15 [18:51:24< 5:40:06] +[titan] 2025-10-05 17:25:46,627 - root - INFO - step: 30760 loss: 1.9424 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:25:46,627 - root - INFO - lr: 1.0779e-05 gnorm: 1.14 [18:51:35< 5:39:55] +[titan] 2025-10-05 17:25:57,513 - root - INFO - step: 30765 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8128 +[titan] 2025-10-05 17:25:57,513 - root - INFO - lr: 1.0773e-05 gnorm: 1.16 [18:51:46< 5:39:44] +[titan] 2025-10-05 17:26:08,369 - root - INFO - step: 30770 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:26:08,369 - root - INFO - lr: 1.0767e-05 gnorm: 1.15 [18:51:57< 5:39:32] +[titan] 2025-10-05 17:26:19,291 - root - INFO - step: 30775 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 17:26:19,291 - root - INFO - lr: 1.0761e-05 gnorm: 1.16 [18:52:08< 5:39:21] +[titan] 2025-10-05 17:26:30,180 - root - INFO - step: 30780 loss: 1.9939 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7628 +[titan] 2025-10-05 17:26:30,180 - root - INFO - lr: 1.0755e-05 gnorm: 1.14 [18:52:19< 5:39:10] +[titan] 2025-10-05 17:26:41,064 - root - INFO - step: 30785 loss: 2.0227 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 17:26:41,065 - root - INFO - lr: 1.0749e-05 gnorm: 1.14 [18:52:30< 5:38:59] +[titan] 2025-10-05 17:26:51,961 - root - INFO - step: 30790 loss: 1.9654 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:26:51,961 - root - INFO - lr: 1.0743e-05 gnorm: 1.11 [18:52:40< 5:38:48] +[titan] 2025-10-05 17:27:02,841 - root - INFO - step: 30795 loss: 2.0724 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 17:27:02,841 - root - INFO - lr: 1.0737e-05 gnorm: 1.16 [18:52:51< 5:38:37] +[titan] 2025-10-05 17:27:11,522 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:27:13,697 - root - INFO - step: 30800 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 17:27:13,697 - root - INFO - lr: 1.0731e-05 gnorm: 1.14 [18:53:02< 5:38:26] +[titan] 2025-10-05 17:27:24,566 - root - INFO - step: 30805 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 17:27:24,566 - root - INFO - lr: 1.0725e-05 gnorm: 1.15 [18:53:13< 5:38:15] +[titan] 2025-10-05 17:27:35,469 - root - INFO - step: 30810 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 17:27:35,469 - root - INFO - lr: 1.0719e-05 gnorm: 1.14 [18:53:24< 5:38:04] +[titan] 2025-10-05 17:27:46,340 - root - INFO - step: 30815 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 17:27:46,340 - root - INFO - lr: 1.0713e-05 gnorm: 1.16 [18:53:35< 5:37:53] +[titan] 2025-10-05 17:27:57,238 - root - INFO - step: 30820 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 17:27:57,239 - root - INFO - lr: 1.0707e-05 gnorm: 1.12 [18:53:46< 5:37:42] +[titan] 2025-10-05 17:28:08,105 - root - INFO - step: 30825 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 17:28:08,105 - root - INFO - lr: 1.0702e-05 gnorm: 1.12 [18:53:57< 5:37:31] +[titan] 2025-10-05 17:28:18,970 - root - INFO - step: 30830 loss: 1.8472 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6336 +[titan] 2025-10-05 17:28:18,971 - root - INFO - lr: 1.0696e-05 gnorm: 1.15 [18:54:07< 5:37:20] +[titan] 2025-10-05 17:28:29,843 - root - INFO - step: 30835 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:28:29,844 - root - INFO - lr: 1.0690e-05 gnorm: 1.13 [18:54:18< 5:37:08] +[titan] 2025-10-05 17:28:40,744 - root - INFO - step: 30840 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 17:28:40,744 - root - INFO - lr: 1.0684e-05 gnorm: 1.13 [18:54:29< 5:36:57] +[titan] 2025-10-05 17:28:51,648 - root - INFO - step: 30845 loss: 1.9017 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6818 +[titan] 2025-10-05 17:28:51,648 - root - INFO - lr: 1.0678e-05 gnorm: 1.14 [18:54:40< 5:36:46] +[titan] 2025-10-05 17:29:00,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:29:02,544 - root - INFO - step: 30850 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 17:29:02,544 - root - INFO - lr: 1.0672e-05 gnorm: 1.15 [18:54:51< 5:36:35] +[titan] 2025-10-05 17:29:13,430 - root - INFO - step: 30855 loss: 1.9892 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:13,431 - root - INFO - lr: 1.0666e-05 gnorm: 1.16 [18:55:02< 5:36:24] +[titan] 2025-10-05 17:29:24,310 - root - INFO - step: 30860 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 17:29:24,310 - root - INFO - lr: 1.0660e-05 gnorm: 1.12 [18:55:13< 5:36:13] +[titan] 2025-10-05 17:29:35,178 - root - INFO - step: 30865 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:35,178 - root - INFO - lr: 1.0654e-05 gnorm: 1.16 [18:55:24< 5:36:02] +[titan] 2025-10-05 17:29:46,070 - root - INFO - step: 30870 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 17:29:46,070 - root - INFO - lr: 1.0648e-05 gnorm: 1.13 [18:55:35< 5:35:51] +[titan] 2025-10-05 17:29:56,949 - root - INFO - step: 30875 loss: 1.9562 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7292 +[titan] 2025-10-05 17:29:56,949 - root - INFO - lr: 1.0642e-05 gnorm: 1.14 [18:55:45< 5:35:40] +[titan] 2025-10-05 17:30:07,804 - root - INFO - step: 30880 loss: 2.0097 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 17:30:07,804 - root - INFO - lr: 1.0636e-05 gnorm: 1.15 [18:55:56< 5:35:29] +[titan] 2025-10-05 17:30:18,658 - root - INFO - step: 30885 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 17:30:18,658 - root - INFO - lr: 1.0630e-05 gnorm: 1.17 [18:56:07< 5:35:18] +[titan] 2025-10-05 17:30:29,536 - root - INFO - step: 30890 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 17:30:29,536 - root - INFO - lr: 1.0625e-05 gnorm: 1.16 [18:56:18< 5:35:07] +[titan] 2025-10-05 17:30:40,429 - root - INFO - step: 30895 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7591 +[titan] 2025-10-05 17:30:40,429 - root - INFO - lr: 1.0619e-05 gnorm: 1.14 [18:56:29< 5:34:55] +[titan] 2025-10-05 17:30:49,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:30:51,303 - root - INFO - step: 30900 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 17:30:51,303 - root - INFO - lr: 1.0613e-05 gnorm: 1.15 [18:56:40< 5:34:44] +[titan] 2025-10-05 17:31:02,242 - root - INFO - step: 30905 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:31:02,242 - root - INFO - lr: 1.0607e-05 gnorm: 1.14 [18:56:51< 5:34:33] +[titan] 2025-10-05 17:31:13,130 - root - INFO - step: 30910 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 17:31:13,130 - root - INFO - lr: 1.0601e-05 gnorm: 1.25 [18:57:02< 5:34:22] +[titan] 2025-10-05 17:31:24,016 - root - INFO - step: 30915 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7966 +[titan] 2025-10-05 17:31:24,017 - root - INFO - lr: 1.0595e-05 gnorm: 1.13 [18:57:12< 5:34:11] +[titan] 2025-10-05 17:31:34,902 - root - INFO - step: 30920 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 17:31:34,903 - root - INFO - lr: 1.0589e-05 gnorm: 1.11 [18:57:23< 5:34:00] +[titan] 2025-10-05 17:31:45,757 - root - INFO - step: 30925 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:31:45,758 - root - INFO - lr: 1.0583e-05 gnorm: 1.14 [18:57:34< 5:33:49] +[titan] 2025-10-05 17:31:56,639 - root - INFO - step: 30930 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:31:56,639 - root - INFO - lr: 1.0577e-05 gnorm: 1.15 [18:57:45< 5:33:38] +[titan] 2025-10-05 17:32:07,510 - root - INFO - step: 30935 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 17:32:07,510 - root - INFO - lr: 1.0572e-05 gnorm: 1.14 [18:57:56< 5:33:27] +[titan] 2025-10-05 17:32:18,361 - root - INFO - step: 30940 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 17:32:18,361 - root - INFO - lr: 1.0566e-05 gnorm: 1.16 [18:58:07< 5:33:16] +[titan] 2025-10-05 17:32:29,229 - root - INFO - step: 30945 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:32:29,229 - root - INFO - lr: 1.0560e-05 gnorm: 1.17 [18:58:18< 5:33:05] +[titan] 2025-10-05 17:32:37,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:32:40,069 - root - INFO - step: 30950 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:32:40,069 - root - INFO - lr: 1.0554e-05 gnorm: 1.13 [18:58:29< 5:32:54] +[titan] 2025-10-05 17:32:50,918 - root - INFO - step: 30955 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 17:32:50,918 - root - INFO - lr: 1.0548e-05 gnorm: 1.15 [18:58:39< 5:32:42] +[titan] 2025-10-05 17:33:01,839 - root - INFO - step: 30960 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6694 +[titan] 2025-10-05 17:33:01,839 - root - INFO - lr: 1.0542e-05 gnorm: 1.11 [18:58:50< 5:32:31] +[titan] 2025-10-05 17:33:12,698 - root - INFO - step: 30965 loss: 1.9487 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 17:33:12,698 - root - INFO - lr: 1.0536e-05 gnorm: 1.13 [18:59:01< 5:32:20] +[titan] 2025-10-05 17:33:23,587 - root - INFO - step: 30970 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7005 +[titan] 2025-10-05 17:33:23,587 - root - INFO - lr: 1.0530e-05 gnorm: 1.17 [18:59:12< 5:32:09] +[titan] 2025-10-05 17:33:34,467 - root - INFO - step: 30975 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 17:33:34,467 - root - INFO - lr: 1.0525e-05 gnorm: 1.19 [18:59:23< 5:31:58] +[titan] 2025-10-05 17:33:45,329 - root - INFO - step: 30980 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 17:33:45,329 - root - INFO - lr: 1.0519e-05 gnorm: 1.21 [18:59:34< 5:31:47] +[titan] 2025-10-05 17:33:56,227 - root - INFO - step: 30985 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7673 +[titan] 2025-10-05 17:33:56,227 - root - INFO - lr: 1.0513e-05 gnorm: 1.14 [18:59:45< 5:31:36] +[titan] 2025-10-05 17:34:07,068 - root - INFO - step: 30990 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 17:34:07,068 - root - INFO - lr: 1.0507e-05 gnorm: 1.14 [18:59:56< 5:31:25] +[titan] 2025-10-05 17:34:17,920 - root - INFO - step: 30995 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 17:34:17,920 - root - INFO - lr: 1.0501e-05 gnorm: 1.13 [19:00:06< 5:31:14] +[titan] 2025-10-05 17:34:26,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:34:28,785 - root - INFO - step: 31000 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:34:28,785 - root - INFO - lr: 1.0495e-05 gnorm: 1.14 [19:00:17< 5:31:03] +[titan] 2025-10-05 17:34:39,677 - root - INFO - step: 31005 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 17:34:39,677 - root - INFO - lr: 1.0490e-05 gnorm: 1.12 [19:00:28< 5:30:52] +[titan] 2025-10-05 17:34:50,557 - root - INFO - step: 31010 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 17:34:50,557 - root - INFO - lr: 1.0484e-05 gnorm: 1.13 [19:00:39< 5:30:41] +[titan] 2025-10-05 17:35:01,441 - root - INFO - step: 31015 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 17:35:01,441 - root - INFO - lr: 1.0478e-05 gnorm: 1.11 [19:00:50< 5:30:29] +[titan] 2025-10-05 17:35:12,298 - root - INFO - step: 31020 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 17:35:12,298 - root - INFO - lr: 1.0472e-05 gnorm: 1.13 [19:01:01< 5:30:18] +[titan] 2025-10-05 17:35:23,148 - root - INFO - step: 31025 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 17:35:23,149 - root - INFO - lr: 1.0466e-05 gnorm: 1.19 [19:01:12< 5:30:07] +[titan] 2025-10-05 17:35:34,041 - root - INFO - step: 31030 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:35:34,042 - root - INFO - lr: 1.0460e-05 gnorm: 1.14 [19:01:22< 5:29:56] +[titan] 2025-10-05 17:35:44,917 - root - INFO - step: 31035 loss: 2.0130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 17:35:44,917 - root - INFO - lr: 1.0455e-05 gnorm: 1.15 [19:01:33< 5:29:45] +[titan] 2025-10-05 17:35:55,789 - root - INFO - step: 31040 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 17:35:55,789 - root - INFO - lr: 1.0449e-05 gnorm: 1.14 [19:01:44< 5:29:34] +[titan] 2025-10-05 17:36:06,662 - root - INFO - step: 31045 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 17:36:06,663 - root - INFO - lr: 1.0443e-05 gnorm: 1.12 [19:01:55< 5:29:23] +[titan] 2025-10-05 17:36:15,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:36:17,541 - root - INFO - step: 31050 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 17:36:17,541 - root - INFO - lr: 1.0437e-05 gnorm: 1.15 [19:02:06< 5:29:12] +[titan] 2025-10-05 17:36:28,426 - root - INFO - step: 31055 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:36:28,426 - root - INFO - lr: 1.0431e-05 gnorm: 1.15 [19:02:17< 5:29:01] +[titan] 2025-10-05 17:36:39,289 - root - INFO - step: 31060 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 17:36:39,290 - root - INFO - lr: 1.0425e-05 gnorm: 1.14 [19:02:28< 5:28:50] +[titan] 2025-10-05 17:36:50,187 - root - INFO - step: 31065 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 17:36:50,187 - root - INFO - lr: 1.0420e-05 gnorm: 1.16 [19:02:39< 5:28:39] +[titan] 2025-10-05 17:37:01,103 - root - INFO - step: 31070 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 17:37:01,103 - root - INFO - lr: 1.0414e-05 gnorm: 1.19 [19:02:50< 5:28:28] +[titan] 2025-10-05 17:37:11,969 - root - INFO - step: 31075 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:37:11,969 - root - INFO - lr: 1.0408e-05 gnorm: 1.16 [19:03:00< 5:28:17] +[titan] 2025-10-05 17:37:22,843 - root - INFO - step: 31080 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 17:37:22,843 - root - INFO - lr: 1.0402e-05 gnorm: 1.15 [19:03:11< 5:28:05] +[titan] 2025-10-05 17:37:33,710 - root - INFO - step: 31085 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:37:33,710 - root - INFO - lr: 1.0396e-05 gnorm: 1.14 [19:03:22< 5:27:54] +[titan] 2025-10-05 17:37:44,589 - root - INFO - step: 31090 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 17:37:44,589 - root - INFO - lr: 1.0391e-05 gnorm: 1.15 [19:03:33< 5:27:43] +[titan] 2025-10-05 17:37:55,476 - root - INFO - step: 31095 loss: 1.9001 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6797 +[titan] 2025-10-05 17:37:55,476 - root - INFO - lr: 1.0385e-05 gnorm: 1.14 [19:03:44< 5:27:32] +[titan] 2025-10-05 17:38:04,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:38:06,373 - root - INFO - step: 31100 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:38:06,373 - root - INFO - lr: 1.0379e-05 gnorm: 1.18 [19:03:55< 5:27:21] +[titan] 2025-10-05 17:38:17,276 - root - INFO - step: 31105 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:38:17,277 - root - INFO - lr: 1.0373e-05 gnorm: 1.12 [19:04:06< 5:27:10] +[titan] 2025-10-05 17:38:28,149 - root - INFO - step: 31110 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 17:38:28,149 - root - INFO - lr: 1.0367e-05 gnorm: 1.11 [19:04:17< 5:26:59] +[titan] 2025-10-05 17:38:39,025 - root - INFO - step: 31115 loss: 1.9815 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 17:38:39,025 - root - INFO - lr: 1.0362e-05 gnorm: 1.15 [19:04:27< 5:26:48] +[titan] 2025-10-05 17:38:49,892 - root - INFO - step: 31120 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:38:49,892 - root - INFO - lr: 1.0356e-05 gnorm: 1.13 [19:04:38< 5:26:37] +[titan] 2025-10-05 17:39:00,809 - root - INFO - step: 31125 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 17:39:00,809 - root - INFO - lr: 1.0350e-05 gnorm: 1.12 [19:04:49< 5:26:26] +[titan] 2025-10-05 17:39:11,704 - root - INFO - step: 31130 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 17:39:11,705 - root - INFO - lr: 1.0344e-05 gnorm: 1.13 [19:05:00< 5:26:15] +[titan] 2025-10-05 17:39:22,594 - root - INFO - step: 31135 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:39:22,595 - root - INFO - lr: 1.0339e-05 gnorm: 1.19 [19:05:11< 5:26:04] +[titan] 2025-10-05 17:39:33,466 - root - INFO - step: 31140 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7578 +[titan] 2025-10-05 17:39:33,467 - root - INFO - lr: 1.0333e-05 gnorm: 1.15 [19:05:22< 5:25:53] +[titan] 2025-10-05 17:39:44,337 - root - INFO - step: 31145 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 17:39:44,337 - root - INFO - lr: 1.0327e-05 gnorm: 1.16 [19:05:33< 5:25:41] +[titan] 2025-10-05 17:39:53,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:39:55,199 - root - INFO - step: 31150 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7763 +[titan] 2025-10-05 17:39:55,199 - root - INFO - lr: 1.0321e-05 gnorm: 1.14 [19:05:44< 5:25:30] +[titan] 2025-10-05 17:40:06,057 - root - INFO - step: 31155 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 17:40:06,057 - root - INFO - lr: 1.0315e-05 gnorm: 1.17 [19:05:54< 5:25:19] +[titan] 2025-10-05 17:40:16,910 - root - INFO - step: 31160 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 17:40:16,910 - root - INFO - lr: 1.0310e-05 gnorm: 1.10 [19:06:05< 5:25:08] +[titan] 2025-10-05 17:40:27,753 - root - INFO - step: 31165 loss: 1.8951 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6755 +[titan] 2025-10-05 17:40:27,753 - root - INFO - lr: 1.0304e-05 gnorm: 1.16 [19:06:16< 5:24:57] +[titan] 2025-10-05 17:40:38,617 - root - INFO - step: 31170 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 17:40:38,617 - root - INFO - lr: 1.0298e-05 gnorm: 1.17 [19:06:27< 5:24:46] +[titan] 2025-10-05 17:40:49,491 - root - INFO - step: 31175 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7707 +[titan] 2025-10-05 17:40:49,491 - root - INFO - lr: 1.0292e-05 gnorm: 1.18 [19:06:38< 5:24:35] +[titan] 2025-10-05 17:41:00,364 - root - INFO - step: 31180 loss: 2.0114 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 17:41:00,365 - root - INFO - lr: 1.0287e-05 gnorm: 1.12 [19:06:49< 5:24:24] +[titan] 2025-10-05 17:41:11,255 - root - INFO - step: 31185 loss: 2.0026 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 17:41:11,255 - root - INFO - lr: 1.0281e-05 gnorm: 1.19 [19:07:00< 5:24:13] +[titan] 2025-10-05 17:41:22,116 - root - INFO - step: 31190 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:41:22,116 - root - INFO - lr: 1.0275e-05 gnorm: 1.10 [19:07:11< 5:24:02] +[titan] 2025-10-05 17:41:32,999 - root - INFO - step: 31195 loss: 1.9088 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 17:41:32,999 - root - INFO - lr: 1.0269e-05 gnorm: 1.13 [19:07:21< 5:23:51] +[titan] 2025-10-05 17:41:41,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:41:43,892 - root - INFO - step: 31200 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:41:43,893 - root - INFO - lr: 1.0264e-05 gnorm: 1.13 [19:07:32< 5:23:40] +[titan] 2025-10-05 17:41:54,767 - root - INFO - step: 31205 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 17:41:54,767 - root - INFO - lr: 1.0258e-05 gnorm: 1.13 [19:07:43< 5:23:28] +[titan] 2025-10-05 17:42:05,616 - root - INFO - step: 31210 loss: 1.9827 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 17:42:05,617 - root - INFO - lr: 1.0252e-05 gnorm: 1.12 [19:07:54< 5:23:17] +[titan] 2025-10-05 17:42:16,473 - root - INFO - step: 31215 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:42:16,473 - root - INFO - lr: 1.0247e-05 gnorm: 1.17 [19:08:05< 5:23:06] +[titan] 2025-10-05 17:42:27,363 - root - INFO - step: 31220 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:42:27,363 - root - INFO - lr: 1.0241e-05 gnorm: 1.16 [19:08:16< 5:22:55] +[titan] 2025-10-05 17:42:38,236 - root - INFO - step: 31225 loss: 1.8762 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 17:42:38,237 - root - INFO - lr: 1.0235e-05 gnorm: 1.18 [19:08:27< 5:22:44] +[titan] 2025-10-05 17:42:49,232 - root - INFO - step: 31230 loss: 2.0595 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.45 mfu: 41.80% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 17:42:49,232 - root - INFO - lr: 1.0229e-05 gnorm: 1.23 [19:08:38< 5:22:33] +[titan] 2025-10-05 17:42:53,762 - root - INFO - Dumping profiler traces at step 31232 +[titan] 2025-10-05 17:42:53,801 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:43:00,360 - root - INFO - step: 31235 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 29,447 tflops: 408.54 mfu: 41.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 17:43:00,360 - root - INFO - lr: 1.0224e-05 gnorm: 1.10 [19:08:49< 5:22:22] +[titan] 2025-10-05 17:43:11,236 - root - INFO - step: 31240 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 17:43:11,236 - root - INFO - lr: 1.0218e-05 gnorm: 1.12 [19:09:00< 5:22:11] +[titan] 2025-10-05 17:43:22,106 - root - INFO - step: 31245 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 17:43:22,106 - root - INFO - lr: 1.0212e-05 gnorm: 1.14 [19:09:11< 5:22:00] +[titan] 2025-10-05 17:43:30,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:43:32,976 - root - INFO - step: 31250 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:43:32,976 - root - INFO - lr: 1.0207e-05 gnorm: 1.15 [19:09:21< 5:21:49] +[titan] 2025-10-05 17:43:43,850 - root - INFO - step: 31255 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:43:43,850 - root - INFO - lr: 1.0201e-05 gnorm: 1.17 [19:09:32< 5:21:38] +[titan] 2025-10-05 17:43:54,726 - root - INFO - step: 31260 loss: 2.0422 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:43:54,727 - root - INFO - lr: 1.0195e-05 gnorm: 1.16 [19:09:43< 5:21:27] +[titan] 2025-10-05 17:44:05,648 - root - INFO - step: 31265 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:44:05,649 - root - INFO - lr: 1.0189e-05 gnorm: 1.18 [19:09:54< 5:21:16] +[titan] 2025-10-05 17:44:16,492 - root - INFO - step: 31270 loss: 1.9624 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 17:44:16,493 - root - INFO - lr: 1.0184e-05 gnorm: 1.11 [19:10:05< 5:21:05] +[titan] 2025-10-05 17:44:27,352 - root - INFO - step: 31275 loss: 1.9671 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 17:44:27,352 - root - INFO - lr: 1.0178e-05 gnorm: 1.17 [19:10:16< 5:20:53] +[titan] 2025-10-05 17:44:38,191 - root - INFO - step: 31280 loss: 1.9559 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:44:38,191 - root - INFO - lr: 1.0172e-05 gnorm: 1.11 [19:10:27< 5:20:42] +[titan] 2025-10-05 17:44:49,058 - root - INFO - step: 31285 loss: 2.0070 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 17:44:49,058 - root - INFO - lr: 1.0167e-05 gnorm: 1.16 [19:10:37< 5:20:31] +[titan] 2025-10-05 17:44:59,922 - root - INFO - step: 31290 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 17:44:59,922 - root - INFO - lr: 1.0161e-05 gnorm: 1.16 [19:10:48< 5:20:20] +[titan] 2025-10-05 17:45:10,831 - root - INFO - step: 31295 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 17:45:10,831 - root - INFO - lr: 1.0155e-05 gnorm: 1.14 [19:10:59< 5:20:09] +[titan] 2025-10-05 17:45:19,500 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:45:21,690 - root - INFO - step: 31300 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:45:21,690 - root - INFO - lr: 1.0150e-05 gnorm: 1.13 [19:11:10< 5:19:58] +[titan] 2025-10-05 17:45:32,589 - root - INFO - step: 31305 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 17:45:32,590 - root - INFO - lr: 1.0144e-05 gnorm: 1.14 [19:11:21< 5:19:47] +[titan] 2025-10-05 17:45:43,458 - root - INFO - step: 31310 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 17:45:43,458 - root - INFO - lr: 1.0138e-05 gnorm: 1.16 [19:11:32< 5:19:36] +[titan] 2025-10-05 17:45:54,309 - root - INFO - step: 31315 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7610 +[titan] 2025-10-05 17:45:54,309 - root - INFO - lr: 1.0133e-05 gnorm: 1.15 [19:11:43< 5:19:25] +[titan] 2025-10-05 17:46:05,142 - root - INFO - step: 31320 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 17:46:05,142 - root - INFO - lr: 1.0127e-05 gnorm: 1.11 [19:11:54< 5:19:14] +[titan] 2025-10-05 17:46:16,012 - root - INFO - step: 31325 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 17:46:16,012 - root - INFO - lr: 1.0121e-05 gnorm: 1.22 [19:12:04< 5:19:03] +[titan] 2025-10-05 17:46:26,886 - root - INFO - step: 31330 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 17:46:26,886 - root - INFO - lr: 1.0116e-05 gnorm: 1.16 [19:12:15< 5:18:52] +[titan] 2025-10-05 17:46:37,770 - root - INFO - step: 31335 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 17:46:37,770 - root - INFO - lr: 1.0110e-05 gnorm: 1.14 [19:12:26< 5:18:40] +[titan] 2025-10-05 17:46:48,608 - root - INFO - step: 31340 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 17:46:48,608 - root - INFO - lr: 1.0104e-05 gnorm: 1.12 [19:12:37< 5:18:29] +[titan] 2025-10-05 17:46:59,446 - root - INFO - step: 31345 loss: 1.9908 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 17:46:59,446 - root - INFO - lr: 1.0099e-05 gnorm: 1.14 [19:12:48< 5:18:18] +[titan] 2025-10-05 17:47:08,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:47:10,307 - root - INFO - step: 31350 loss: 2.0078 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7753 +[titan] 2025-10-05 17:47:10,307 - root - INFO - lr: 1.0093e-05 gnorm: 1.18 [19:12:59< 5:18:07] +[titan] 2025-10-05 17:47:21,149 - root - INFO - step: 31355 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 17:47:21,149 - root - INFO - lr: 1.0087e-05 gnorm: 1.14 [19:13:10< 5:17:56] +[titan] 2025-10-05 17:47:32,020 - root - INFO - step: 31360 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 17:47:32,020 - root - INFO - lr: 1.0082e-05 gnorm: 1.16 [19:13:20< 5:17:45] +[titan] 2025-10-05 17:47:42,860 - root - INFO - step: 31365 loss: 2.0383 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 17:47:42,861 - root - INFO - lr: 1.0076e-05 gnorm: 1.15 [19:13:31< 5:17:34] +[titan] 2025-10-05 17:47:53,707 - root - INFO - step: 31370 loss: 2.0511 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 17:47:53,708 - root - INFO - lr: 1.0070e-05 gnorm: 1.16 [19:13:42< 5:17:23] +[titan] 2025-10-05 17:48:04,561 - root - INFO - step: 31375 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7875 +[titan] 2025-10-05 17:48:04,561 - root - INFO - lr: 1.0065e-05 gnorm: 1.20 [19:13:53< 5:17:12] +[titan] 2025-10-05 17:48:15,405 - root - INFO - step: 31380 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:48:15,405 - root - INFO - lr: 1.0059e-05 gnorm: 1.15 [19:14:04< 5:17:01] +[titan] 2025-10-05 17:48:26,264 - root - INFO - step: 31385 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 17:48:26,264 - root - INFO - lr: 1.0053e-05 gnorm: 1.15 [19:14:15< 5:16:50] +[titan] 2025-10-05 17:48:37,141 - root - INFO - step: 31390 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 17:48:37,141 - root - INFO - lr: 1.0048e-05 gnorm: 1.19 [19:14:26< 5:16:39] +[titan] 2025-10-05 17:48:47,988 - root - INFO - step: 31395 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 17:48:47,988 - root - INFO - lr: 1.0042e-05 gnorm: 1.17 [19:14:36< 5:16:27] +[titan] 2025-10-05 17:48:56,653 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:48:58,841 - root - INFO - step: 31400 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 17:48:58,841 - root - INFO - lr: 1.0036e-05 gnorm: 1.19 [19:14:47< 5:16:16] +[titan] 2025-10-05 17:49:09,687 - root - INFO - step: 31405 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:49:09,687 - root - INFO - lr: 1.0031e-05 gnorm: 1.19 [19:14:58< 5:16:05] +[titan] 2025-10-05 17:49:20,527 - root - INFO - step: 31410 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 17:49:20,527 - root - INFO - lr: 1.0025e-05 gnorm: 1.12 [19:15:09< 5:15:54] +[titan] 2025-10-05 17:49:31,368 - root - INFO - step: 31415 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:49:31,368 - root - INFO - lr: 1.0020e-05 gnorm: 1.12 [19:15:20< 5:15:43] +[titan] 2025-10-05 17:49:42,213 - root - INFO - step: 31420 loss: 1.9250 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7033 +[titan] 2025-10-05 17:49:42,213 - root - INFO - lr: 1.0014e-05 gnorm: 1.15 [19:15:31< 5:15:32] +[titan] 2025-10-05 17:49:53,106 - root - INFO - step: 31425 loss: 1.9352 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 17:49:53,106 - root - INFO - lr: 1.0008e-05 gnorm: 1.16 [19:15:42< 5:15:21] +[titan] 2025-10-05 17:50:03,949 - root - INFO - step: 31430 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:50:03,949 - root - INFO - lr: 1.0003e-05 gnorm: 1.17 [19:15:52< 5:15:10] +[titan] 2025-10-05 17:50:14,831 - root - INFO - step: 31435 loss: 1.9571 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 17:50:14,832 - root - INFO - lr: 9.9971e-06 gnorm: 1.15 [19:16:03< 5:14:59] +[titan] 2025-10-05 17:50:25,694 - root - INFO - step: 31440 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 17:50:25,694 - root - INFO - lr: 9.9915e-06 gnorm: 1.18 [19:16:14< 5:14:48] +[titan] 2025-10-05 17:50:36,538 - root - INFO - step: 31445 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 17:50:36,538 - root - INFO - lr: 9.9859e-06 gnorm: 2.11 [19:16:25< 5:14:37] +[titan] 2025-10-05 17:50:45,222 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:50:47,411 - root - INFO - step: 31450 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:50:47,411 - root - INFO - lr: 9.9803e-06 gnorm: 1.16 [19:16:36< 5:14:26] +[titan] 2025-10-05 17:50:58,315 - root - INFO - step: 31455 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:50:58,315 - root - INFO - lr: 9.9747e-06 gnorm: 1.15 [19:16:47< 5:14:15] +[titan] 2025-10-05 17:51:09,156 - root - INFO - step: 31460 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7552 +[titan] 2025-10-05 17:51:09,156 - root - INFO - lr: 9.9691e-06 gnorm: 1.20 [19:16:58< 5:14:03] +[titan] 2025-10-05 17:51:20,027 - root - INFO - step: 31465 loss: 2.0529 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 17:51:20,027 - root - INFO - lr: 9.9635e-06 gnorm: 1.21 [19:17:08< 5:13:52] +[titan] 2025-10-05 17:51:30,891 - root - INFO - step: 31470 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 17:51:30,891 - root - INFO - lr: 9.9579e-06 gnorm: 1.16 [19:17:19< 5:13:41] +[titan] 2025-10-05 17:51:41,738 - root - INFO - step: 31475 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 17:51:41,739 - root - INFO - lr: 9.9524e-06 gnorm: 1.16 [19:17:30< 5:13:30] +[titan] 2025-10-05 17:51:52,590 - root - INFO - step: 31480 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 17:51:52,590 - root - INFO - lr: 9.9468e-06 gnorm: 1.15 [19:17:41< 5:13:19] +[titan] 2025-10-05 17:52:03,461 - root - INFO - step: 31485 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 17:52:03,461 - root - INFO - lr: 9.9412e-06 gnorm: 1.15 [19:17:52< 5:13:08] +[titan] 2025-10-05 17:52:14,350 - root - INFO - step: 31490 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 17:52:14,350 - root - INFO - lr: 9.9356e-06 gnorm: 1.18 [19:18:03< 5:12:57] +[titan] 2025-10-05 17:52:25,212 - root - INFO - step: 31495 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 17:52:25,212 - root - INFO - lr: 9.9300e-06 gnorm: 1.13 [19:18:14< 5:12:46] +[titan] 2025-10-05 17:52:33,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:52:36,054 - root - INFO - step: 31500 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 17:52:36,054 - root - INFO - lr: 9.9245e-06 gnorm: 1.18 [19:18:24< 5:12:35] +[titan] 2025-10-05 17:52:46,921 - root - INFO - step: 31505 loss: 1.9036 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 17:52:46,921 - root - INFO - lr: 9.9189e-06 gnorm: 1.12 [19:18:35< 5:12:24] +[titan] 2025-10-05 17:52:57,775 - root - INFO - step: 31510 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:52:57,775 - root - INFO - lr: 9.9133e-06 gnorm: 1.14 [19:18:46< 5:12:13] +[titan] 2025-10-05 17:53:08,630 - root - INFO - step: 31515 loss: 1.8954 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6762 +[titan] 2025-10-05 17:53:08,630 - root - INFO - lr: 9.9078e-06 gnorm: 1.14 [19:18:57< 5:12:02] +[titan] 2025-10-05 17:53:19,545 - root - INFO - step: 31520 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 17:53:19,545 - root - INFO - lr: 9.9022e-06 gnorm: 1.13 [19:19:08< 5:11:51] +[titan] 2025-10-05 17:53:30,414 - root - INFO - step: 31525 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:53:30,414 - root - INFO - lr: 9.8966e-06 gnorm: 1.16 [19:19:19< 5:11:39] +[titan] 2025-10-05 17:53:41,302 - root - INFO - step: 31530 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 17:53:41,302 - root - INFO - lr: 9.8911e-06 gnorm: 1.16 [19:19:30< 5:11:28] +[titan] 2025-10-05 17:53:52,151 - root - INFO - step: 31535 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:53:52,152 - root - INFO - lr: 9.8855e-06 gnorm: 1.18 [19:19:41< 5:11:17] +[titan] 2025-10-05 17:54:03,009 - root - INFO - step: 31540 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 17:54:03,009 - root - INFO - lr: 9.8800e-06 gnorm: 1.19 [19:19:51< 5:11:06] +[titan] 2025-10-05 17:54:13,869 - root - INFO - step: 31545 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6744 +[titan] 2025-10-05 17:54:13,869 - root - INFO - lr: 9.8744e-06 gnorm: 1.15 [19:20:02< 5:10:55] +[titan] 2025-10-05 17:54:22,536 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:54:24,751 - root - INFO - step: 31550 loss: 2.0225 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 17:54:24,751 - root - INFO - lr: 9.8689e-06 gnorm: 1.23 [19:20:13< 5:10:44] +[titan] 2025-10-05 17:54:35,610 - root - INFO - step: 31555 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 17:54:35,610 - root - INFO - lr: 9.8633e-06 gnorm: 1.15 [19:20:24< 5:10:33] +[titan] 2025-10-05 17:54:46,473 - root - INFO - step: 31560 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 17:54:46,473 - root - INFO - lr: 9.8578e-06 gnorm: 1.13 [19:20:35< 5:10:22] +[titan] 2025-10-05 17:54:57,341 - root - INFO - step: 31565 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:54:57,341 - root - INFO - lr: 9.8523e-06 gnorm: 1.14 [19:20:46< 5:10:11] +[titan] 2025-10-05 17:55:08,193 - root - INFO - step: 31570 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 17:55:08,193 - root - INFO - lr: 9.8467e-06 gnorm: 1.13 [19:20:57< 5:10:00] +[titan] 2025-10-05 17:55:19,059 - root - INFO - step: 31575 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 17:55:19,059 - root - INFO - lr: 9.8412e-06 gnorm: 1.16 [19:21:07< 5:09:49] +[titan] 2025-10-05 17:55:29,929 - root - INFO - step: 31580 loss: 2.0143 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7811 +[titan] 2025-10-05 17:55:29,929 - root - INFO - lr: 9.8357e-06 gnorm: 1.16 [19:21:18< 5:09:38] +[titan] 2025-10-05 17:55:40,810 - root - INFO - step: 31585 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 17:55:40,810 - root - INFO - lr: 9.8301e-06 gnorm: 1.18 [19:21:29< 5:09:27] +[titan] 2025-10-05 17:55:51,674 - root - INFO - step: 31590 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 17:55:51,674 - root - INFO - lr: 9.8246e-06 gnorm: 1.14 [19:21:40< 5:09:15] +[titan] 2025-10-05 17:56:02,575 - root - INFO - step: 31595 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 17:56:02,575 - root - INFO - lr: 9.8191e-06 gnorm: 1.17 [19:21:51< 5:09:04] +[titan] 2025-10-05 17:56:11,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:56:13,445 - root - INFO - step: 31600 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 17:56:13,445 - root - INFO - lr: 9.8136e-06 gnorm: 1.13 [19:22:02< 5:08:53] +[titan] 2025-10-05 17:56:24,318 - root - INFO - step: 31605 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:56:24,318 - root - INFO - lr: 9.8081e-06 gnorm: 1.14 [19:22:13< 5:08:42] +[titan] 2025-10-05 17:56:35,171 - root - INFO - step: 31610 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 17:56:35,172 - root - INFO - lr: 9.8025e-06 gnorm: 1.19 [19:22:24< 5:08:31] +[titan] 2025-10-05 17:56:46,086 - root - INFO - step: 31615 loss: 2.0603 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:56:46,087 - root - INFO - lr: 9.7970e-06 gnorm: 1.20 [19:22:34< 5:08:20] +[titan] 2025-10-05 17:56:56,970 - root - INFO - step: 31620 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 17:56:56,970 - root - INFO - lr: 9.7915e-06 gnorm: 1.15 [19:22:45< 5:08:09] +[titan] 2025-10-05 17:57:07,861 - root - INFO - step: 31625 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:57:07,862 - root - INFO - lr: 9.7860e-06 gnorm: 1.16 [19:22:56< 5:07:58] +[titan] 2025-10-05 17:57:18,731 - root - INFO - step: 31630 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 17:57:18,731 - root - INFO - lr: 9.7805e-06 gnorm: 1.15 [19:23:07< 5:07:47] +[titan] 2025-10-05 17:57:29,588 - root - INFO - step: 31635 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7639 +[titan] 2025-10-05 17:57:29,588 - root - INFO - lr: 9.7750e-06 gnorm: 1.13 [19:23:18< 5:07:36] +[titan] 2025-10-05 17:57:40,435 - root - INFO - step: 31640 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7360 +[titan] 2025-10-05 17:57:40,435 - root - INFO - lr: 9.7695e-06 gnorm: 1.15 [19:23:29< 5:07:25] +[titan] 2025-10-05 17:57:51,307 - root - INFO - step: 31645 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:57:51,307 - root - INFO - lr: 9.7640e-06 gnorm: 1.15 [19:23:40< 5:07:14] +[titan] 2025-10-05 17:58:00,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:58:02,211 - root - INFO - step: 31650 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 17:58:02,211 - root - INFO - lr: 9.7585e-06 gnorm: 1.14 [19:23:51< 5:07:03] +[titan] 2025-10-05 17:58:13,078 - root - INFO - step: 31655 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:58:13,078 - root - INFO - lr: 9.7531e-06 gnorm: 1.13 [19:24:01< 5:06:51] +[titan] 2025-10-05 17:58:23,965 - root - INFO - step: 31660 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:58:23,966 - root - INFO - lr: 9.7476e-06 gnorm: 1.16 [19:24:12< 5:06:40] +[titan] 2025-10-05 17:58:34,856 - root - INFO - step: 31665 loss: 1.9619 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 17:58:34,857 - root - INFO - lr: 9.7421e-06 gnorm: 1.15 [19:24:23< 5:06:29] +[titan] 2025-10-05 17:58:45,708 - root - INFO - step: 31670 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7203 +[titan] 2025-10-05 17:58:45,708 - root - INFO - lr: 9.7366e-06 gnorm: 1.17 [19:24:34< 5:06:18] +[titan] 2025-10-05 17:58:56,585 - root - INFO - step: 31675 loss: 1.9360 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7121 +[titan] 2025-10-05 17:58:56,586 - root - INFO - lr: 9.7311e-06 gnorm: 1.19 [19:24:45< 5:06:07] +[titan] 2025-10-05 17:59:07,478 - root - INFO - step: 31680 loss: 2.0159 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 17:59:07,478 - root - INFO - lr: 9.7257e-06 gnorm: 1.17 [19:24:56< 5:05:56] +[titan] 2025-10-05 17:59:18,333 - root - INFO - step: 31685 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 17:59:18,333 - root - INFO - lr: 9.7202e-06 gnorm: 1.14 [19:25:07< 5:05:45] +[titan] 2025-10-05 17:59:29,202 - root - INFO - step: 31690 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 17:59:29,202 - root - INFO - lr: 9.7147e-06 gnorm: 1.15 [19:25:18< 5:05:34] +[titan] 2025-10-05 17:59:40,087 - root - INFO - step: 31695 loss: 2.0267 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 17:59:40,087 - root - INFO - lr: 9.7093e-06 gnorm: 1.18 [19:25:28< 5:05:23] +[titan] 2025-10-05 17:59:48,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:59:50,987 - root - INFO - step: 31700 loss: 2.0901 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 17:59:50,987 - root - INFO - lr: 9.7038e-06 gnorm: 1.21 [19:25:39< 5:05:12] +[titan] 2025-10-05 18:00:01,873 - root - INFO - step: 31705 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 18:00:01,873 - root - INFO - lr: 9.6983e-06 gnorm: 1.15 [19:25:50< 5:05:01] +[titan] 2025-10-05 18:00:12,786 - root - INFO - step: 31710 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 18:00:12,786 - root - INFO - lr: 9.6929e-06 gnorm: 1.19 [19:26:01< 5:04:50] +[titan] 2025-10-05 18:00:23,689 - root - INFO - step: 31715 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 18:00:23,689 - root - INFO - lr: 9.6874e-06 gnorm: 1.20 [19:26:12< 5:04:39] +[titan] 2025-10-05 18:00:34,603 - root - INFO - step: 31720 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:00:34,603 - root - INFO - lr: 9.6820e-06 gnorm: 1.17 [19:26:23< 5:04:28] +[titan] 2025-10-05 18:00:45,500 - root - INFO - step: 31725 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 18:00:45,500 - root - INFO - lr: 9.6765e-06 gnorm: 1.17 [19:26:34< 5:04:17] +[titan] 2025-10-05 18:00:56,416 - root - INFO - step: 31730 loss: 1.8776 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6601 +[titan] 2025-10-05 18:00:56,416 - root - INFO - lr: 9.6711e-06 gnorm: 1.13 [19:26:45< 5:04:05] +[titan] 2025-10-05 18:01:07,298 - root - INFO - step: 31735 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:01:07,298 - root - INFO - lr: 9.6656e-06 gnorm: 1.14 [19:26:56< 5:03:54] +[titan] 2025-10-05 18:01:18,193 - root - INFO - step: 31740 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 18:01:18,193 - root - INFO - lr: 9.6602e-06 gnorm: 1.19 [19:27:07< 5:03:43] +[titan] 2025-10-05 18:01:27,251 - root - INFO - Dumping profiler traces at step 31744 +[titan] 2025-10-05 18:01:27,289 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:01:29,483 - root - INFO - step: 31745 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 29,024 tflops: 402.66 mfu: 40.71% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:01:29,484 - root - INFO - lr: 9.6548e-06 gnorm: 1.14 [19:27:18< 5:03:32] +[titan] 2025-10-05 18:01:38,164 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:01:40,345 - root - INFO - step: 31750 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 18:01:40,345 - root - INFO - lr: 9.6493e-06 gnorm: 1.15 [19:27:29< 5:03:21] +[titan] 2025-10-05 18:01:51,212 - root - INFO - step: 31755 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 18:01:51,212 - root - INFO - lr: 9.6439e-06 gnorm: 1.16 [19:27:40< 5:03:10] +[titan] 2025-10-05 18:02:02,067 - root - INFO - step: 31760 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 18:02:02,067 - root - INFO - lr: 9.6385e-06 gnorm: 1.20 [19:27:50< 5:02:59] +[titan] 2025-10-05 18:02:12,939 - root - INFO - step: 31765 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 18:02:12,939 - root - INFO - lr: 9.6330e-06 gnorm: 1.12 [19:28:01< 5:02:48] +[titan] 2025-10-05 18:02:24,117 - root - INFO - step: 31770 loss: 1.9667 memory: 118.84GiB(85.28%) tps: 29,315 tflops: 406.70 mfu: 41.12% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 18:02:24,118 - root - INFO - lr: 9.6276e-06 gnorm: 1.16 [19:28:12< 5:02:37] +[titan] 2025-10-05 18:02:35,008 - root - INFO - step: 31775 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 18:02:35,009 - root - INFO - lr: 9.6222e-06 gnorm: 1.21 [19:28:23< 5:02:26] +[titan] 2025-10-05 18:02:45,869 - root - INFO - step: 31780 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:02:45,869 - root - INFO - lr: 9.6168e-06 gnorm: 1.16 [19:28:34< 5:02:15] +[titan] 2025-10-05 18:02:56,730 - root - INFO - step: 31785 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:02:56,730 - root - INFO - lr: 9.6114e-06 gnorm: 1.17 [19:28:45< 5:02:04] +[titan] 2025-10-05 18:03:07,583 - root - INFO - step: 31790 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 18:03:07,583 - root - INFO - lr: 9.6059e-06 gnorm: 1.14 [19:28:56< 5:01:53] +[titan] 2025-10-05 18:03:18,426 - root - INFO - step: 31795 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 18:03:18,426 - root - INFO - lr: 9.6005e-06 gnorm: 1.12 [19:29:07< 5:01:42] +[titan] 2025-10-05 18:03:27,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:03:29,339 - root - INFO - step: 31800 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 18:03:29,339 - root - INFO - lr: 9.5951e-06 gnorm: 1.13 [19:29:18< 5:01:31] +[titan] 2025-10-05 18:03:40,190 - root - INFO - step: 31805 loss: 1.9797 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:03:40,190 - root - INFO - lr: 9.5897e-06 gnorm: 1.15 [19:29:29< 5:01:20] +[titan] 2025-10-05 18:03:51,090 - root - INFO - step: 31810 loss: 2.0140 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 18:03:51,090 - root - INFO - lr: 9.5843e-06 gnorm: 1.18 [19:29:39< 5:01:08] +[titan] 2025-10-05 18:04:01,948 - root - INFO - step: 31815 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 18:04:01,948 - root - INFO - lr: 9.5789e-06 gnorm: 1.12 [19:29:50< 5:00:57] +[titan] 2025-10-05 18:04:12,805 - root - INFO - step: 31820 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 18:04:12,805 - root - INFO - lr: 9.5735e-06 gnorm: 1.15 [19:30:01< 5:00:46] +[titan] 2025-10-05 18:04:23,715 - root - INFO - step: 31825 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7202 +[titan] 2025-10-05 18:04:23,715 - root - INFO - lr: 9.5681e-06 gnorm: 1.12 [19:30:12< 5:00:35] +[titan] 2025-10-05 18:04:34,585 - root - INFO - step: 31830 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 18:04:34,585 - root - INFO - lr: 9.5628e-06 gnorm: 1.16 [19:30:23< 5:00:24] +[titan] 2025-10-05 18:04:45,454 - root - INFO - step: 31835 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:04:45,454 - root - INFO - lr: 9.5574e-06 gnorm: 1.18 [19:30:34< 5:00:13] +[titan] 2025-10-05 18:04:56,357 - root - INFO - step: 31840 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 18:04:56,357 - root - INFO - lr: 9.5520e-06 gnorm: 1.16 [19:30:45< 5:00:02] +[titan] 2025-10-05 18:05:07,225 - root - INFO - step: 31845 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 18:05:07,226 - root - INFO - lr: 9.5466e-06 gnorm: 1.18 [19:30:56< 4:59:51] +[titan] 2025-10-05 18:05:15,912 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:05:18,103 - root - INFO - step: 31850 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 18:05:18,103 - root - INFO - lr: 9.5412e-06 gnorm: 1.16 [19:31:06< 4:59:40] +[titan] 2025-10-05 18:05:29,031 - root - INFO - step: 31855 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7969 +[titan] 2025-10-05 18:05:29,031 - root - INFO - lr: 9.5359e-06 gnorm: 1.17 [19:31:17< 4:59:29] +[titan] 2025-10-05 18:05:39,898 - root - INFO - step: 31860 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7375 +[titan] 2025-10-05 18:05:39,898 - root - INFO - lr: 9.5305e-06 gnorm: 1.17 [19:31:28< 4:59:18] +[titan] 2025-10-05 18:05:50,764 - root - INFO - step: 31865 loss: 1.9005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6802 +[titan] 2025-10-05 18:05:50,764 - root - INFO - lr: 9.5251e-06 gnorm: 1.14 [19:31:39< 4:59:07] +[titan] 2025-10-05 18:06:01,663 - root - INFO - step: 31870 loss: 1.9427 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:06:01,664 - root - INFO - lr: 9.5197e-06 gnorm: 1.17 [19:31:50< 4:58:56] +[titan] 2025-10-05 18:06:12,506 - root - INFO - step: 31875 loss: 2.0201 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 18:06:12,507 - root - INFO - lr: 9.5144e-06 gnorm: 1.20 [19:32:01< 4:58:45] +[titan] 2025-10-05 18:06:23,358 - root - INFO - step: 31880 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 18:06:23,358 - root - INFO - lr: 9.5090e-06 gnorm: 1.12 [19:32:12< 4:58:33] +[titan] 2025-10-05 18:06:34,258 - root - INFO - step: 31885 loss: 1.8475 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 18:06:34,259 - root - INFO - lr: 9.5037e-06 gnorm: 1.13 [19:32:23< 4:58:22] +[titan] 2025-10-05 18:06:45,106 - root - INFO - step: 31890 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 18:06:45,106 - root - INFO - lr: 9.4983e-06 gnorm: 1.19 [19:32:33< 4:58:11] +[titan] 2025-10-05 18:06:55,965 - root - INFO - step: 31895 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 18:06:55,965 - root - INFO - lr: 9.4930e-06 gnorm: 1.16 [19:32:44< 4:58:00] +[titan] 2025-10-05 18:07:04,625 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:07:06,799 - root - INFO - step: 31900 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:07:06,799 - root - INFO - lr: 9.4876e-06 gnorm: 1.18 [19:32:55< 4:57:49] +[titan] 2025-10-05 18:07:17,698 - root - INFO - step: 31905 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 18:07:17,698 - root - INFO - lr: 9.4823e-06 gnorm: 1.20 [19:33:06< 4:57:38] +[titan] 2025-10-05 18:07:28,596 - root - INFO - step: 31910 loss: 1.9594 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 18:07:28,597 - root - INFO - lr: 9.4769e-06 gnorm: 1.14 [19:33:17< 4:57:27] +[titan] 2025-10-05 18:07:39,465 - root - INFO - step: 31915 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 18:07:39,466 - root - INFO - lr: 9.4716e-06 gnorm: 1.15 [19:33:28< 4:57:16] +[titan] 2025-10-05 18:07:50,320 - root - INFO - step: 31920 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:07:50,320 - root - INFO - lr: 9.4662e-06 gnorm: 1.18 [19:33:39< 4:57:05] +[titan] 2025-10-05 18:08:01,166 - root - INFO - step: 31925 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:08:01,166 - root - INFO - lr: 9.4609e-06 gnorm: 1.19 [19:33:50< 4:56:54] +[titan] 2025-10-05 18:08:12,045 - root - INFO - step: 31930 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 18:08:12,045 - root - INFO - lr: 9.4556e-06 gnorm: 1.11 [19:34:00< 4:56:43] +[titan] 2025-10-05 18:08:22,957 - root - INFO - step: 31935 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7321 +[titan] 2025-10-05 18:08:22,957 - root - INFO - lr: 9.4502e-06 gnorm: 1.14 [19:34:11< 4:56:32] +[titan] 2025-10-05 18:08:33,867 - root - INFO - step: 31940 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7987 +[titan] 2025-10-05 18:08:33,867 - root - INFO - lr: 9.4449e-06 gnorm: 1.15 [19:34:22< 4:56:21] +[titan] 2025-10-05 18:08:44,766 - root - INFO - step: 31945 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 18:08:44,766 - root - INFO - lr: 9.4396e-06 gnorm: 1.16 [19:34:33< 4:56:10] +[titan] 2025-10-05 18:08:53,449 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:08:55,628 - root - INFO - step: 31950 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 18:08:55,628 - root - INFO - lr: 9.4343e-06 gnorm: 1.18 [19:34:44< 4:55:59] +[titan] 2025-10-05 18:09:06,503 - root - INFO - step: 31955 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 18:09:06,503 - root - INFO - lr: 9.4289e-06 gnorm: 1.12 [19:34:55< 4:55:47] +[titan] 2025-10-05 18:09:17,363 - root - INFO - step: 31960 loss: 2.0329 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 18:09:17,363 - root - INFO - lr: 9.4236e-06 gnorm: 1.18 [19:35:06< 4:55:36] +[titan] 2025-10-05 18:09:28,265 - root - INFO - step: 31965 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 18:09:28,265 - root - INFO - lr: 9.4183e-06 gnorm: 1.18 [19:35:17< 4:55:25] +[titan] 2025-10-05 18:09:39,153 - root - INFO - step: 31970 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 18:09:39,153 - root - INFO - lr: 9.4130e-06 gnorm: 1.15 [19:35:28< 4:55:14] +[titan] 2025-10-05 18:09:50,010 - root - INFO - step: 31975 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 18:09:50,010 - root - INFO - lr: 9.4077e-06 gnorm: 1.18 [19:35:38< 4:55:03] +[titan] 2025-10-05 18:10:00,880 - root - INFO - step: 31980 loss: 1.9569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 18:10:00,880 - root - INFO - lr: 9.4024e-06 gnorm: 1.14 [19:35:49< 4:54:52] +[titan] 2025-10-05 18:10:11,742 - root - INFO - step: 31985 loss: 1.9260 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 18:10:11,742 - root - INFO - lr: 9.3971e-06 gnorm: 1.13 [19:36:00< 4:54:41] +[titan] 2025-10-05 18:10:22,613 - root - INFO - step: 31990 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 18:10:22,613 - root - INFO - lr: 9.3918e-06 gnorm: 1.16 [19:36:11< 4:54:30] +[titan] 2025-10-05 18:10:33,523 - root - INFO - step: 31995 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 18:10:33,523 - root - INFO - lr: 9.3865e-06 gnorm: 1.14 [19:36:22< 4:54:19] +[titan] 2025-10-05 18:10:42,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:10:44,426 - root - INFO - step: 32000 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 18:10:44,426 - root - INFO - lr: 9.3812e-06 gnorm: 1.14 [19:36:33< 4:54:08] +[titan] 2025-10-05 18:10:55,314 - root - INFO - step: 32005 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 18:10:55,315 - root - INFO - lr: 9.3759e-06 gnorm: 1.17 [19:36:44< 4:53:57] +[titan] 2025-10-05 18:11:06,182 - root - INFO - step: 32010 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:11:06,182 - root - INFO - lr: 9.3706e-06 gnorm: 1.16 [19:36:55< 4:53:46] +[titan] 2025-10-05 18:11:17,044 - root - INFO - step: 32015 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 18:11:17,044 - root - INFO - lr: 9.3653e-06 gnorm: 1.16 [19:37:05< 4:53:35] +[titan] 2025-10-05 18:11:27,919 - root - INFO - step: 32020 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 18:11:27,919 - root - INFO - lr: 9.3601e-06 gnorm: 1.21 [19:37:16< 4:53:24] +[titan] 2025-10-05 18:11:38,842 - root - INFO - step: 32025 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:11:38,842 - root - INFO - lr: 9.3548e-06 gnorm: 1.19 [19:37:27< 4:53:12] +[titan] 2025-10-05 18:11:49,758 - root - INFO - step: 32030 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7644 +[titan] 2025-10-05 18:11:49,758 - root - INFO - lr: 9.3495e-06 gnorm: 1.19 [19:37:38< 4:53:01] +[titan] 2025-10-05 18:12:00,638 - root - INFO - step: 32035 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:12:00,638 - root - INFO - lr: 9.3442e-06 gnorm: 1.14 [19:37:49< 4:52:50] +[titan] 2025-10-05 18:12:11,546 - root - INFO - step: 32040 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:12:11,547 - root - INFO - lr: 9.3390e-06 gnorm: 1.15 [19:38:00< 4:52:39] +[titan] 2025-10-05 18:12:22,450 - root - INFO - step: 32045 loss: 1.8868 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 18:12:22,450 - root - INFO - lr: 9.3337e-06 gnorm: 1.14 [19:38:11< 4:52:28] +[titan] 2025-10-05 18:12:31,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:12:33,403 - root - INFO - step: 32050 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 18:12:33,403 - root - INFO - lr: 9.3284e-06 gnorm: 1.15 [19:38:22< 4:52:17] +[titan] 2025-10-05 18:12:44,298 - root - INFO - step: 32055 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 18:12:44,298 - root - INFO - lr: 9.3232e-06 gnorm: 1.15 [19:38:33< 4:52:06] +[titan] 2025-10-05 18:12:55,164 - root - INFO - step: 32060 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:12:55,164 - root - INFO - lr: 9.3179e-06 gnorm: 1.16 [19:38:44< 4:51:55] +[titan] 2025-10-05 18:13:06,043 - root - INFO - step: 32065 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7655 +[titan] 2025-10-05 18:13:06,043 - root - INFO - lr: 9.3127e-06 gnorm: 1.15 [19:38:54< 4:51:44] +[titan] 2025-10-05 18:13:16,898 - root - INFO - step: 32070 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 18:13:16,898 - root - INFO - lr: 9.3074e-06 gnorm: 1.18 [19:39:05< 4:51:33] +[titan] 2025-10-05 18:13:27,792 - root - INFO - step: 32075 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 18:13:27,793 - root - INFO - lr: 9.3022e-06 gnorm: 1.19 [19:39:16< 4:51:22] +[titan] 2025-10-05 18:13:38,698 - root - INFO - step: 32080 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 18:13:38,698 - root - INFO - lr: 9.2969e-06 gnorm: 1.19 [19:39:27< 4:51:11] +[titan] 2025-10-05 18:13:49,556 - root - INFO - step: 32085 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 18:13:49,557 - root - INFO - lr: 9.2917e-06 gnorm: 1.17 [19:39:38< 4:51:00] +[titan] 2025-10-05 18:14:00,441 - root - INFO - step: 32090 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 18:14:00,441 - root - INFO - lr: 9.2864e-06 gnorm: 1.16 [19:39:49< 4:50:49] +[titan] 2025-10-05 18:14:11,340 - root - INFO - step: 32095 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 18:14:11,340 - root - INFO - lr: 9.2812e-06 gnorm: 1.14 [19:40:00< 4:50:38] +[titan] 2025-10-05 18:14:20,021 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:14:22,203 - root - INFO - step: 32100 loss: 1.9882 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 18:14:22,203 - root - INFO - lr: 9.2759e-06 gnorm: 1.14 [19:40:11< 4:50:27] +[titan] 2025-10-05 18:14:33,146 - root - INFO - step: 32105 loss: 2.0008 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:14:33,146 - root - INFO - lr: 9.2707e-06 gnorm: 1.18 [19:40:21< 4:50:15] +[titan] 2025-10-05 18:14:44,011 - root - INFO - step: 32110 loss: 1.9522 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:14:44,011 - root - INFO - lr: 9.2655e-06 gnorm: 1.14 [19:40:32< 4:50:04] +[titan] 2025-10-05 18:14:54,863 - root - INFO - step: 32115 loss: 1.9586 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:14:54,864 - root - INFO - lr: 9.2603e-06 gnorm: 1.15 [19:40:43< 4:49:53] +[titan] 2025-10-05 18:15:05,716 - root - INFO - step: 32120 loss: 1.9321 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 18:15:05,716 - root - INFO - lr: 9.2550e-06 gnorm: 1.13 [19:40:54< 4:49:42] +[titan] 2025-10-05 18:15:16,556 - root - INFO - step: 32125 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:15:16,556 - root - INFO - lr: 9.2498e-06 gnorm: 1.15 [19:41:05< 4:49:31] +[titan] 2025-10-05 18:15:27,432 - root - INFO - step: 32130 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 18:15:27,433 - root - INFO - lr: 9.2446e-06 gnorm: 1.20 [19:41:16< 4:49:20] +[titan] 2025-10-05 18:15:38,339 - root - INFO - step: 32135 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7023 +[titan] 2025-10-05 18:15:38,339 - root - INFO - lr: 9.2394e-06 gnorm: 1.17 [19:41:27< 4:49:09] +[titan] 2025-10-05 18:15:49,183 - root - INFO - step: 32140 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 18:15:49,184 - root - INFO - lr: 9.2342e-06 gnorm: 1.19 [19:41:38< 4:48:58] +[titan] 2025-10-05 18:16:00,016 - root - INFO - step: 32145 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:16:00,017 - root - INFO - lr: 9.2290e-06 gnorm: 1.19 [19:41:48< 4:48:47] +[titan] 2025-10-05 18:16:08,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:16:10,859 - root - INFO - step: 32150 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7520 +[titan] 2025-10-05 18:16:10,859 - root - INFO - lr: 9.2237e-06 gnorm: 1.17 [19:41:59< 4:48:36] +[titan] 2025-10-05 18:16:21,712 - root - INFO - step: 32155 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 18:16:21,712 - root - INFO - lr: 9.2185e-06 gnorm: 1.14 [19:42:10< 4:48:25] +[titan] 2025-10-05 18:16:32,617 - root - INFO - step: 32160 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 18:16:32,618 - root - INFO - lr: 9.2133e-06 gnorm: 1.19 [19:42:21< 4:48:14] +[titan] 2025-10-05 18:16:43,525 - root - INFO - step: 32165 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 18:16:43,525 - root - INFO - lr: 9.2081e-06 gnorm: 1.20 [19:42:32< 4:48:03] +[titan] 2025-10-05 18:16:54,419 - root - INFO - step: 32170 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 18:16:54,419 - root - INFO - lr: 9.2029e-06 gnorm: 1.15 [19:42:43< 4:47:52] +[titan] 2025-10-05 18:17:05,289 - root - INFO - step: 32175 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 18:17:05,289 - root - INFO - lr: 9.1978e-06 gnorm: 1.15 [19:42:54< 4:47:40] +[titan] 2025-10-05 18:17:16,163 - root - INFO - step: 32180 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 18:17:16,164 - root - INFO - lr: 9.1926e-06 gnorm: 1.17 [19:43:04< 4:47:29] +[titan] 2025-10-05 18:17:27,035 - root - INFO - step: 32185 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7293 +[titan] 2025-10-05 18:17:27,035 - root - INFO - lr: 9.1874e-06 gnorm: 1.11 [19:43:15< 4:47:18] +[titan] 2025-10-05 18:17:37,986 - root - INFO - step: 32190 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:17:37,987 - root - INFO - lr: 9.1822e-06 gnorm: 1.20 [19:43:26< 4:47:07] +[titan] 2025-10-05 18:17:48,863 - root - INFO - step: 32195 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 18:17:48,863 - root - INFO - lr: 9.1770e-06 gnorm: 1.14 [19:43:37< 4:46:56] +[titan] 2025-10-05 18:17:57,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:17:59,752 - root - INFO - step: 32200 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 18:17:59,752 - root - INFO - lr: 9.1718e-06 gnorm: 1.14 [19:43:48< 4:46:45] +[titan] 2025-10-05 18:18:10,633 - root - INFO - step: 32205 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 18:18:10,633 - root - INFO - lr: 9.1667e-06 gnorm: 1.16 [19:43:59< 4:46:34] +[titan] 2025-10-05 18:18:21,504 - root - INFO - step: 32210 loss: 1.9549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 18:18:21,505 - root - INFO - lr: 9.1615e-06 gnorm: 1.14 [19:44:10< 4:46:23] +[titan] 2025-10-05 18:18:32,363 - root - INFO - step: 32215 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 18:18:32,363 - root - INFO - lr: 9.1563e-06 gnorm: 1.15 [19:44:21< 4:46:12] +[titan] 2025-10-05 18:18:43,287 - root - INFO - step: 32220 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 18:18:43,287 - root - INFO - lr: 9.1512e-06 gnorm: 1.16 [19:44:32< 4:46:01] +[titan] 2025-10-05 18:18:54,183 - root - INFO - step: 32225 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 18:18:54,183 - root - INFO - lr: 9.1460e-06 gnorm: 1.14 [19:44:43< 4:45:50] +[titan] 2025-10-05 18:19:05,065 - root - INFO - step: 32230 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:19:05,066 - root - INFO - lr: 9.1408e-06 gnorm: 1.13 [19:44:53< 4:45:39] +[titan] 2025-10-05 18:19:15,931 - root - INFO - step: 32235 loss: 1.9942 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:19:15,932 - root - INFO - lr: 9.1357e-06 gnorm: 1.19 [19:45:04< 4:45:28] +[titan] 2025-10-05 18:19:26,783 - root - INFO - step: 32240 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 18:19:26,783 - root - INFO - lr: 9.1305e-06 gnorm: 1.16 [19:45:15< 4:45:17] +[titan] 2025-10-05 18:19:37,703 - root - INFO - step: 32245 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:19:37,703 - root - INFO - lr: 9.1254e-06 gnorm: 1.16 [19:45:26< 4:45:06] +[titan] 2025-10-05 18:19:46,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:19:48,556 - root - INFO - step: 32250 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 18:19:48,556 - root - INFO - lr: 9.1202e-06 gnorm: 1.15 [19:45:37< 4:44:55] +[titan] 2025-10-05 18:19:59,512 - root - INFO - step: 32255 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:19:59,512 - root - INFO - lr: 9.1151e-06 gnorm: 1.19 [19:45:48< 4:44:43] +[titan] 2025-10-05 18:20:01,864 - root - INFO - Dumping profiler traces at step 32256 +[titan] 2025-10-05 18:20:01,901 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:20:10,612 - root - INFO - step: 32260 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.57 mfu: 41.41% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 18:20:10,612 - root - INFO - lr: 9.1099e-06 gnorm: 1.15 [19:45:59< 4:44:32] +[titan] 2025-10-05 18:20:21,505 - root - INFO - step: 32265 loss: 1.9661 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 18:20:21,505 - root - INFO - lr: 9.1048e-06 gnorm: 1.18 [19:46:10< 4:44:21] +[titan] 2025-10-05 18:20:32,390 - root - INFO - step: 32270 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 18:20:32,390 - root - INFO - lr: 9.0996e-06 gnorm: 1.19 [19:46:21< 4:44:10] +[titan] 2025-10-05 18:20:43,343 - root - INFO - step: 32275 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 18:20:43,343 - root - INFO - lr: 9.0945e-06 gnorm: 1.17 [19:46:32< 4:43:59] +[titan] 2025-10-05 18:20:54,195 - root - INFO - step: 32280 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 18:20:54,195 - root - INFO - lr: 9.0894e-06 gnorm: 1.13 [19:46:43< 4:43:48] +[titan] 2025-10-05 18:21:05,056 - root - INFO - step: 32285 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:21:05,056 - root - INFO - lr: 9.0842e-06 gnorm: 1.14 [19:46:53< 4:43:37] +[titan] 2025-10-05 18:21:15,905 - root - INFO - step: 32290 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 18:21:15,905 - root - INFO - lr: 9.0791e-06 gnorm: 1.14 [19:47:04< 4:43:26] +[titan] 2025-10-05 18:21:26,822 - root - INFO - step: 32295 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:21:26,822 - root - INFO - lr: 9.0740e-06 gnorm: 1.15 [19:47:15< 4:43:15] +[titan] 2025-10-05 18:21:35,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:21:37,747 - root - INFO - step: 32300 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:21:37,747 - root - INFO - lr: 9.0689e-06 gnorm: 1.15 [19:47:26< 4:43:04] +[titan] 2025-10-05 18:21:48,651 - root - INFO - step: 32305 loss: 1.9420 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:21:48,651 - root - INFO - lr: 9.0638e-06 gnorm: 1.13 [19:47:37< 4:42:53] +[titan] 2025-10-05 18:21:59,526 - root - INFO - step: 32310 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:21:59,526 - root - INFO - lr: 9.0586e-06 gnorm: 1.20 [19:47:48< 4:42:42] +[titan] 2025-10-05 18:22:10,410 - root - INFO - step: 32315 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 18:22:10,410 - root - INFO - lr: 9.0535e-06 gnorm: 1.16 [19:47:59< 4:42:31] +[titan] 2025-10-05 18:22:21,310 - root - INFO - step: 32320 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6910 +[titan] 2025-10-05 18:22:21,310 - root - INFO - lr: 9.0484e-06 gnorm: 1.16 [19:48:10< 4:42:20] +[titan] 2025-10-05 18:22:32,228 - root - INFO - step: 32325 loss: 1.9625 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 18:22:32,229 - root - INFO - lr: 9.0433e-06 gnorm: 1.17 [19:48:21< 4:42:09] +[titan] 2025-10-05 18:22:43,163 - root - INFO - step: 32330 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 18:22:43,164 - root - INFO - lr: 9.0382e-06 gnorm: 1.16 [19:48:31< 4:41:58] +[titan] 2025-10-05 18:22:54,059 - root - INFO - step: 32335 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:22:54,059 - root - INFO - lr: 9.0331e-06 gnorm: 1.18 [19:48:42< 4:41:47] +[titan] 2025-10-05 18:23:04,937 - root - INFO - step: 32340 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 18:23:04,938 - root - INFO - lr: 9.0280e-06 gnorm: 1.22 [19:48:53< 4:41:35] +[titan] 2025-10-05 18:23:15,809 - root - INFO - step: 32345 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:23:15,809 - root - INFO - lr: 9.0229e-06 gnorm: 1.18 [19:49:04< 4:41:24] +[titan] 2025-10-05 18:23:24,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:23:26,693 - root - INFO - step: 32350 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 18:23:26,693 - root - INFO - lr: 9.0178e-06 gnorm: 1.24 [19:49:15< 4:41:13] +[titan] 2025-10-05 18:23:37,572 - root - INFO - step: 32355 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:23:37,573 - root - INFO - lr: 9.0127e-06 gnorm: 1.15 [19:49:26< 4:41:02] +[titan] 2025-10-05 18:23:48,530 - root - INFO - step: 32360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 18:23:48,530 - root - INFO - lr: 9.0077e-06 gnorm: 1.15 [19:49:37< 4:40:51] +[titan] 2025-10-05 18:23:59,408 - root - INFO - step: 32365 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 18:23:59,408 - root - INFO - lr: 9.0026e-06 gnorm: 1.20 [19:49:48< 4:40:40] +[titan] 2025-10-05 18:24:10,292 - root - INFO - step: 32370 loss: 1.9796 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 18:24:10,292 - root - INFO - lr: 8.9975e-06 gnorm: 1.14 [19:49:59< 4:40:29] +[titan] 2025-10-05 18:24:21,173 - root - INFO - step: 32375 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:24:21,173 - root - INFO - lr: 8.9924e-06 gnorm: 1.18 [19:50:09< 4:40:18] +[titan] 2025-10-05 18:24:32,033 - root - INFO - step: 32380 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7551 +[titan] 2025-10-05 18:24:32,033 - root - INFO - lr: 8.9873e-06 gnorm: 1.17 [19:50:20< 4:40:07] +[titan] 2025-10-05 18:24:42,932 - root - INFO - step: 32385 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 18:24:42,932 - root - INFO - lr: 8.9823e-06 gnorm: 1.17 [19:50:31< 4:39:56] +[titan] 2025-10-05 18:24:53,801 - root - INFO - step: 32390 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 18:24:53,801 - root - INFO - lr: 8.9772e-06 gnorm: 1.16 [19:50:42< 4:39:45] +[titan] 2025-10-05 18:25:04,696 - root - INFO - step: 32395 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 18:25:04,696 - root - INFO - lr: 8.9721e-06 gnorm: 1.18 [19:50:53< 4:39:34] +[titan] 2025-10-05 18:25:13,384 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:25:15,566 - root - INFO - step: 32400 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:25:15,566 - root - INFO - lr: 8.9671e-06 gnorm: 1.16 [19:51:04< 4:39:23] +[titan] 2025-10-05 18:25:26,448 - root - INFO - step: 32405 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 18:25:26,449 - root - INFO - lr: 8.9620e-06 gnorm: 1.13 [19:51:15< 4:39:12] +[titan] 2025-10-05 18:25:37,323 - root - INFO - step: 32410 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 18:25:37,323 - root - INFO - lr: 8.9570e-06 gnorm: 1.15 [19:51:26< 4:39:01] +[titan] 2025-10-05 18:25:48,505 - root - INFO - step: 32415 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 29,304 tflops: 406.54 mfu: 41.11% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 18:25:48,505 - root - INFO - lr: 8.9519e-06 gnorm: 1.17 [19:51:37< 4:38:50] +[titan] 2025-10-05 18:25:59,396 - root - INFO - step: 32420 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 18:25:59,396 - root - INFO - lr: 8.9469e-06 gnorm: 1.17 [19:51:48< 4:38:39] +[titan] 2025-10-05 18:26:10,310 - root - INFO - step: 32425 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 18:26:10,310 - root - INFO - lr: 8.9418e-06 gnorm: 1.15 [19:51:59< 4:38:28] +[titan] 2025-10-05 18:26:21,195 - root - INFO - step: 32430 loss: 1.9222 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 18:26:21,195 - root - INFO - lr: 8.9368e-06 gnorm: 1.14 [19:52:10< 4:38:16] +[titan] 2025-10-05 18:26:32,089 - root - INFO - step: 32435 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:26:32,089 - root - INFO - lr: 8.9317e-06 gnorm: 1.17 [19:52:20< 4:38:05] +[titan] 2025-10-05 18:26:42,997 - root - INFO - step: 32440 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:26:42,997 - root - INFO - lr: 8.9267e-06 gnorm: 1.18 [19:52:31< 4:37:54] +[titan] 2025-10-05 18:26:53,888 - root - INFO - step: 32445 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 18:26:53,888 - root - INFO - lr: 8.9217e-06 gnorm: 1.18 [19:52:42< 4:37:43] +[titan] 2025-10-05 18:27:02,624 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:27:04,807 - root - INFO - step: 32450 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 18:27:04,807 - root - INFO - lr: 8.9166e-06 gnorm: 1.18 [19:52:53< 4:37:32] +[titan] 2025-10-05 18:27:15,706 - root - INFO - step: 32455 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7589 +[titan] 2025-10-05 18:27:15,706 - root - INFO - lr: 8.9116e-06 gnorm: 1.15 [19:53:04< 4:37:21] +[titan] 2025-10-05 18:27:26,608 - root - INFO - step: 32460 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 18:27:26,608 - root - INFO - lr: 8.9066e-06 gnorm: 1.14 [19:53:15< 4:37:10] +[titan] 2025-10-05 18:27:37,484 - root - INFO - step: 32465 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 18:27:37,485 - root - INFO - lr: 8.9015e-06 gnorm: 1.11 [19:53:26< 4:36:59] +[titan] 2025-10-05 18:27:48,368 - root - INFO - step: 32470 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:27:48,368 - root - INFO - lr: 8.8965e-06 gnorm: 1.13 [19:53:37< 4:36:48] +[titan] 2025-10-05 18:27:59,231 - root - INFO - step: 32475 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 18:27:59,231 - root - INFO - lr: 8.8915e-06 gnorm: 1.15 [19:53:48< 4:36:37] +[titan] 2025-10-05 18:28:10,108 - root - INFO - step: 32480 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7775 +[titan] 2025-10-05 18:28:10,108 - root - INFO - lr: 8.8865e-06 gnorm: 1.18 [19:53:58< 4:36:26] +[titan] 2025-10-05 18:28:20,988 - root - INFO - step: 32485 loss: 1.9823 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 18:28:20,988 - root - INFO - lr: 8.8815e-06 gnorm: 1.15 [19:54:09< 4:36:15] +[titan] 2025-10-05 18:28:31,851 - root - INFO - step: 32490 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 18:28:31,851 - root - INFO - lr: 8.8765e-06 gnorm: 1.16 [19:54:20< 4:36:04] +[titan] 2025-10-05 18:28:42,715 - root - INFO - step: 32495 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:28:42,716 - root - INFO - lr: 8.8715e-06 gnorm: 1.16 [19:54:31< 4:35:53] +[titan] 2025-10-05 18:28:51,431 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:28:53,617 - root - INFO - step: 32500 loss: 1.9959 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 18:28:53,617 - root - INFO - lr: 8.8665e-06 gnorm: 1.21 [19:54:42< 4:35:42] +[titan] 2025-10-05 18:29:04,488 - root - INFO - step: 32505 loss: 1.9052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6852 +[titan] 2025-10-05 18:29:04,489 - root - INFO - lr: 8.8615e-06 gnorm: 1.15 [19:54:53< 4:35:31] +[titan] 2025-10-05 18:29:15,355 - root - INFO - step: 32510 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 18:29:15,355 - root - INFO - lr: 8.8565e-06 gnorm: 1.20 [19:55:04< 4:35:19] +[titan] 2025-10-05 18:29:26,197 - root - INFO - step: 32515 loss: 1.9015 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 18:29:26,198 - root - INFO - lr: 8.8515e-06 gnorm: 1.16 [19:55:14< 4:35:08] +[titan] 2025-10-05 18:29:37,043 - root - INFO - step: 32520 loss: 1.9322 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:29:37,043 - root - INFO - lr: 8.8465e-06 gnorm: 1.14 [19:55:25< 4:34:57] +[titan] 2025-10-05 18:29:47,915 - root - INFO - step: 32525 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:29:47,915 - root - INFO - lr: 8.8415e-06 gnorm: 1.17 [19:55:36< 4:34:46] +[titan] 2025-10-05 18:29:58,786 - root - INFO - step: 32530 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:29:58,786 - root - INFO - lr: 8.8365e-06 gnorm: 1.14 [19:55:47< 4:34:35] +[titan] 2025-10-05 18:30:09,635 - root - INFO - step: 32535 loss: 1.9367 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 18:30:09,635 - root - INFO - lr: 8.8315e-06 gnorm: 1.18 [19:55:58< 4:34:24] +[titan] 2025-10-05 18:30:20,517 - root - INFO - step: 32540 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:30:20,517 - root - INFO - lr: 8.8265e-06 gnorm: 1.22 [19:56:09< 4:34:13] +[titan] 2025-10-05 18:30:31,388 - root - INFO - step: 32545 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 18:30:31,388 - root - INFO - lr: 8.8216e-06 gnorm: 1.18 [19:56:20< 4:34:02] +[titan] 2025-10-05 18:30:40,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:30:42,279 - root - INFO - step: 32550 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 18:30:42,279 - root - INFO - lr: 8.8166e-06 gnorm: 1.22 [19:56:31< 4:33:51] +[titan] 2025-10-05 18:30:53,167 - root - INFO - step: 32555 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7816 +[titan] 2025-10-05 18:30:53,168 - root - INFO - lr: 8.8116e-06 gnorm: 1.16 [19:56:41< 4:33:40] +[titan] 2025-10-05 18:31:04,037 - root - INFO - step: 32560 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7457 +[titan] 2025-10-05 18:31:04,037 - root - INFO - lr: 8.8066e-06 gnorm: 1.15 [19:56:52< 4:33:29] +[titan] 2025-10-05 18:31:14,905 - root - INFO - step: 32565 loss: 2.0104 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 18:31:14,905 - root - INFO - lr: 8.8017e-06 gnorm: 1.18 [19:57:03< 4:33:18] +[titan] 2025-10-05 18:31:25,750 - root - INFO - step: 32570 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:31:25,750 - root - INFO - lr: 8.7967e-06 gnorm: 1.18 [19:57:14< 4:33:07] +[titan] 2025-10-05 18:31:36,615 - root - INFO - step: 32575 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 18:31:36,615 - root - INFO - lr: 8.7917e-06 gnorm: 1.21 [19:57:25< 4:32:56] +[titan] 2025-10-05 18:31:47,505 - root - INFO - step: 32580 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 18:31:47,505 - root - INFO - lr: 8.7868e-06 gnorm: 1.18 [19:57:36< 4:32:45] +[titan] 2025-10-05 18:31:58,405 - root - INFO - step: 32585 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 18:31:58,406 - root - INFO - lr: 8.7818e-06 gnorm: 1.15 [19:57:47< 4:32:34] +[titan] 2025-10-05 18:32:09,289 - root - INFO - step: 32590 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 18:32:09,289 - root - INFO - lr: 8.7769e-06 gnorm: 1.17 [19:57:58< 4:32:22] +[titan] 2025-10-05 18:32:20,143 - root - INFO - step: 32595 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 18:32:20,143 - root - INFO - lr: 8.7719e-06 gnorm: 1.15 [19:58:08< 4:32:11] +[titan] 2025-10-05 18:32:28,834 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:32:31,015 - root - INFO - step: 32600 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:32:31,015 - root - INFO - lr: 8.7670e-06 gnorm: 1.15 [19:58:19< 4:32:00] +[titan] 2025-10-05 18:32:41,866 - root - INFO - step: 32605 loss: 1.9357 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 18:32:41,866 - root - INFO - lr: 8.7621e-06 gnorm: 1.17 [19:58:30< 4:31:49] +[titan] 2025-10-05 18:32:52,744 - root - INFO - step: 32610 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 18:32:52,744 - root - INFO - lr: 8.7571e-06 gnorm: 1.17 [19:58:41< 4:31:38] +[titan] 2025-10-05 18:33:03,626 - root - INFO - step: 32615 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:33:03,626 - root - INFO - lr: 8.7522e-06 gnorm: 1.16 [19:58:52< 4:31:27] +[titan] 2025-10-05 18:33:14,510 - root - INFO - step: 32620 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 18:33:14,510 - root - INFO - lr: 8.7472e-06 gnorm: 1.19 [19:59:03< 4:31:16] +[titan] 2025-10-05 18:33:25,381 - root - INFO - step: 32625 loss: 1.9774 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 18:33:25,381 - root - INFO - lr: 8.7423e-06 gnorm: 1.16 [19:59:14< 4:31:05] +[titan] 2025-10-05 18:33:36,243 - root - INFO - step: 32630 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 18:33:36,243 - root - INFO - lr: 8.7374e-06 gnorm: 1.17 [19:59:25< 4:30:54] +[titan] 2025-10-05 18:33:47,116 - root - INFO - step: 32635 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:33:47,116 - root - INFO - lr: 8.7325e-06 gnorm: 1.15 [19:59:35< 4:30:43] +[titan] 2025-10-05 18:33:58,057 - root - INFO - step: 32640 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:33:58,058 - root - INFO - lr: 8.7275e-06 gnorm: 1.20 [19:59:46< 4:30:32] +[titan] 2025-10-05 18:34:08,946 - root - INFO - step: 32645 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:34:08,946 - root - INFO - lr: 8.7226e-06 gnorm: 1.17 [19:59:57< 4:30:21] +[titan] 2025-10-05 18:34:17,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:34:19,814 - root - INFO - step: 32650 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7704 +[titan] 2025-10-05 18:34:19,814 - root - INFO - lr: 8.7177e-06 gnorm: 1.14 [20:00:08< 4:30:10] +[titan] 2025-10-05 18:34:30,684 - root - INFO - step: 32655 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 18:34:30,684 - root - INFO - lr: 8.7128e-06 gnorm: 1.21 [20:00:19< 4:29:59] +[titan] 2025-10-05 18:34:41,540 - root - INFO - step: 32660 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 18:34:41,540 - root - INFO - lr: 8.7079e-06 gnorm: 1.18 [20:00:30< 4:29:48] +[titan] 2025-10-05 18:34:52,419 - root - INFO - step: 32665 loss: 1.9116 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 18:34:52,419 - root - INFO - lr: 8.7030e-06 gnorm: 1.19 [20:00:41< 4:29:37] +[titan] 2025-10-05 18:35:03,284 - root - INFO - step: 32670 loss: 1.9841 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 18:35:03,284 - root - INFO - lr: 8.6981e-06 gnorm: 1.23 [20:00:52< 4:29:25] +[titan] 2025-10-05 18:35:14,164 - root - INFO - step: 32675 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:35:14,164 - root - INFO - lr: 8.6932e-06 gnorm: 1.14 [20:01:02< 4:29:14] +[titan] 2025-10-05 18:35:25,030 - root - INFO - step: 32680 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 18:35:25,030 - root - INFO - lr: 8.6883e-06 gnorm: 1.17 [20:01:13< 4:29:03] +[titan] 2025-10-05 18:35:35,901 - root - INFO - step: 32685 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 18:35:35,902 - root - INFO - lr: 8.6834e-06 gnorm: 1.19 [20:01:24< 4:28:52] +[titan] 2025-10-05 18:35:46,769 - root - INFO - step: 32690 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 18:35:46,769 - root - INFO - lr: 8.6785e-06 gnorm: 1.17 [20:01:35< 4:28:41] +[titan] 2025-10-05 18:35:57,643 - root - INFO - step: 32695 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 18:35:57,644 - root - INFO - lr: 8.6736e-06 gnorm: 1.18 [20:01:46< 4:28:30] +[titan] 2025-10-05 18:36:06,324 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:36:08,504 - root - INFO - step: 32700 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 18:36:08,504 - root - INFO - lr: 8.6687e-06 gnorm: 1.16 [20:01:57< 4:28:19] +[titan] 2025-10-05 18:36:19,368 - root - INFO - step: 32705 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6516 +[titan] 2025-10-05 18:36:19,368 - root - INFO - lr: 8.6638e-06 gnorm: 1.12 [20:02:08< 4:28:08] +[titan] 2025-10-05 18:36:30,228 - root - INFO - step: 32710 loss: 1.9004 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:36:30,228 - root - INFO - lr: 8.6590e-06 gnorm: 1.17 [20:02:19< 4:27:57] +[titan] 2025-10-05 18:36:41,098 - root - INFO - step: 32715 loss: 1.9595 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7326 +[titan] 2025-10-05 18:36:41,098 - root - INFO - lr: 8.6541e-06 gnorm: 1.21 [20:02:29< 4:27:46] +[titan] 2025-10-05 18:36:51,987 - root - INFO - step: 32720 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 18:36:51,987 - root - INFO - lr: 8.6492e-06 gnorm: 1.15 [20:02:40< 4:27:35] +[titan] 2025-10-05 18:37:02,853 - root - INFO - step: 32725 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7097 +[titan] 2025-10-05 18:37:02,853 - root - INFO - lr: 8.6443e-06 gnorm: 1.18 [20:02:51< 4:27:24] +[titan] 2025-10-05 18:37:13,725 - root - INFO - step: 32730 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 18:37:13,725 - root - INFO - lr: 8.6395e-06 gnorm: 1.18 [20:03:02< 4:27:13] +[titan] 2025-10-05 18:37:24,602 - root - INFO - step: 32735 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 18:37:24,603 - root - INFO - lr: 8.6346e-06 gnorm: 1.18 [20:03:13< 4:27:02] +[titan] 2025-10-05 18:37:35,486 - root - INFO - step: 32740 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 18:37:35,486 - root - INFO - lr: 8.6297e-06 gnorm: 1.15 [20:03:24< 4:26:51] +[titan] 2025-10-05 18:37:46,374 - root - INFO - step: 32745 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 18:37:46,374 - root - INFO - lr: 8.6249e-06 gnorm: 1.18 [20:03:35< 4:26:40] +[titan] 2025-10-05 18:37:55,082 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:37:57,268 - root - INFO - step: 32750 loss: 1.9951 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 18:37:57,268 - root - INFO - lr: 8.6200e-06 gnorm: 1.16 [20:03:46< 4:26:28] +[titan] 2025-10-05 18:38:08,165 - root - INFO - step: 32755 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 18:38:08,165 - root - INFO - lr: 8.6152e-06 gnorm: 1.15 [20:03:56< 4:26:17] +[titan] 2025-10-05 18:38:19,048 - root - INFO - step: 32760 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 18:38:19,048 - root - INFO - lr: 8.6103e-06 gnorm: 1.17 [20:04:07< 4:26:06] +[titan] 2025-10-05 18:38:30,021 - root - INFO - step: 32765 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 29,864 tflops: 414.31 mfu: 41.89% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 18:38:30,021 - root - INFO - lr: 8.6055e-06 gnorm: 1.19 [20:04:18< 4:25:55] +[titan] 2025-10-05 18:38:36,736 - root - INFO - Dumping profiler traces at step 32768 +[titan] 2025-10-05 18:38:36,775 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:38:41,140 - root - INFO - step: 32770 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 18:38:41,140 - root - INFO - lr: 8.6006e-06 gnorm: 1.18 [20:04:29< 4:25:44] +[titan] 2025-10-05 18:38:52,028 - root - INFO - step: 32775 loss: 1.8866 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 18:38:52,028 - root - INFO - lr: 8.5958e-06 gnorm: 1.16 [20:04:40< 4:25:33] +[titan] 2025-10-05 18:39:02,921 - root - INFO - step: 32780 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 18:39:02,921 - root - INFO - lr: 8.5909e-06 gnorm: 1.17 [20:04:51< 4:25:22] +[titan] 2025-10-05 18:39:13,797 - root - INFO - step: 32785 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 18:39:13,797 - root - INFO - lr: 8.5861e-06 gnorm: 1.19 [20:05:02< 4:25:11] +[titan] 2025-10-05 18:39:24,687 - root - INFO - step: 32790 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 18:39:24,688 - root - INFO - lr: 8.5813e-06 gnorm: 1.18 [20:05:13< 4:25:00] +[titan] 2025-10-05 18:39:35,548 - root - INFO - step: 32795 loss: 1.9151 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 18:39:35,548 - root - INFO - lr: 8.5764e-06 gnorm: 1.17 [20:05:24< 4:24:49] +[titan] 2025-10-05 18:39:44,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:39:46,421 - root - INFO - step: 32800 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:39:46,421 - root - INFO - lr: 8.5716e-06 gnorm: 1.14 [20:05:35< 4:24:38] +[titan] 2025-10-05 18:39:57,301 - root - INFO - step: 32805 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 18:39:57,301 - root - INFO - lr: 8.5668e-06 gnorm: 1.20 [20:05:46< 4:24:27] +[titan] 2025-10-05 18:40:08,174 - root - INFO - step: 32810 loss: 1.8700 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6536 +[titan] 2025-10-05 18:40:08,175 - root - INFO - lr: 8.5620e-06 gnorm: 1.15 [20:05:56< 4:24:16] +[titan] 2025-10-05 18:40:19,054 - root - INFO - step: 32815 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 18:40:19,054 - root - INFO - lr: 8.5572e-06 gnorm: 1.17 [20:06:07< 4:24:05] +[titan] 2025-10-05 18:40:29,922 - root - INFO - step: 32820 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:40:29,922 - root - INFO - lr: 8.5523e-06 gnorm: 1.20 [20:06:18< 4:23:54] +[titan] 2025-10-05 18:40:40,814 - root - INFO - step: 32825 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7226 +[titan] 2025-10-05 18:40:40,814 - root - INFO - lr: 8.5475e-06 gnorm: 1.14 [20:06:29< 4:23:43] +[titan] 2025-10-05 18:40:51,678 - root - INFO - step: 32830 loss: 1.9398 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:40:51,678 - root - INFO - lr: 8.5427e-06 gnorm: 1.23 [20:06:40< 4:23:32] +[titan] 2025-10-05 18:41:02,570 - root - INFO - step: 32835 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:41:02,570 - root - INFO - lr: 8.5379e-06 gnorm: 1.15 [20:06:51< 4:23:21] +[titan] 2025-10-05 18:41:13,446 - root - INFO - step: 32840 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 18:41:13,447 - root - INFO - lr: 8.5331e-06 gnorm: 1.21 [20:07:02< 4:23:09] +[titan] 2025-10-05 18:41:24,359 - root - INFO - step: 32845 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:41:24,359 - root - INFO - lr: 8.5283e-06 gnorm: 1.16 [20:07:13< 4:22:58] +[titan] 2025-10-05 18:41:33,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:41:35,240 - root - INFO - step: 32850 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:41:35,240 - root - INFO - lr: 8.5235e-06 gnorm: 1.14 [20:07:24< 4:22:47] +[titan] 2025-10-05 18:41:46,124 - root - INFO - step: 32855 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 18:41:46,125 - root - INFO - lr: 8.5187e-06 gnorm: 1.17 [20:07:34< 4:22:36] +[titan] 2025-10-05 18:41:56,993 - root - INFO - step: 32860 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:41:56,993 - root - INFO - lr: 8.5139e-06 gnorm: 1.22 [20:07:45< 4:22:25] +[titan] 2025-10-05 18:42:07,859 - root - INFO - step: 32865 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 18:42:07,859 - root - INFO - lr: 8.5091e-06 gnorm: 1.20 [20:07:56< 4:22:14] +[titan] 2025-10-05 18:42:18,752 - root - INFO - step: 32870 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 18:42:18,752 - root - INFO - lr: 8.5044e-06 gnorm: 1.13 [20:08:07< 4:22:03] +[titan] 2025-10-05 18:42:29,644 - root - INFO - step: 32875 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 18:42:29,644 - root - INFO - lr: 8.4996e-06 gnorm: 1.19 [20:08:18< 4:21:52] +[titan] 2025-10-05 18:42:40,538 - root - INFO - step: 32880 loss: 1.9506 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 18:42:40,539 - root - INFO - lr: 8.4948e-06 gnorm: 1.15 [20:08:29< 4:21:41] +[titan] 2025-10-05 18:42:51,405 - root - INFO - step: 32885 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:42:51,405 - root - INFO - lr: 8.4900e-06 gnorm: 1.14 [20:08:40< 4:21:30] +[titan] 2025-10-05 18:43:02,281 - root - INFO - step: 32890 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 18:43:02,281 - root - INFO - lr: 8.4853e-06 gnorm: 1.17 [20:08:51< 4:21:19] +[titan] 2025-10-05 18:43:13,144 - root - INFO - step: 32895 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7334 +[titan] 2025-10-05 18:43:13,144 - root - INFO - lr: 8.4805e-06 gnorm: 1.22 [20:09:01< 4:21:08] +[titan] 2025-10-05 18:43:21,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:43:24,029 - root - INFO - step: 32900 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:43:24,030 - root - INFO - lr: 8.4757e-06 gnorm: 1.16 [20:09:12< 4:20:57] +[titan] 2025-10-05 18:43:34,912 - root - INFO - step: 32905 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:43:34,912 - root - INFO - lr: 8.4710e-06 gnorm: 1.22 [20:09:23< 4:20:46] +[titan] 2025-10-05 18:43:45,784 - root - INFO - step: 32910 loss: 1.9113 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6908 +[titan] 2025-10-05 18:43:45,784 - root - INFO - lr: 8.4662e-06 gnorm: 1.19 [20:09:34< 4:20:35] +[titan] 2025-10-05 18:43:56,656 - root - INFO - step: 32915 loss: 1.9080 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6877 +[titan] 2025-10-05 18:43:56,657 - root - INFO - lr: 8.4614e-06 gnorm: 1.15 [20:09:45< 4:20:24] +[titan] 2025-10-05 18:44:07,519 - root - INFO - step: 32920 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 18:44:07,520 - root - INFO - lr: 8.4567e-06 gnorm: 1.14 [20:09:56< 4:20:13] +[titan] 2025-10-05 18:44:18,364 - root - INFO - step: 32925 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 18:44:18,364 - root - INFO - lr: 8.4519e-06 gnorm: 1.16 [20:10:07< 4:20:01] +[titan] 2025-10-05 18:44:29,222 - root - INFO - step: 32930 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:44:29,222 - root - INFO - lr: 8.4472e-06 gnorm: 1.19 [20:10:17< 4:19:50] +[titan] 2025-10-05 18:44:40,084 - root - INFO - step: 32935 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 18:44:40,084 - root - INFO - lr: 8.4424e-06 gnorm: 1.20 [20:10:28< 4:19:39] +[titan] 2025-10-05 18:44:50,961 - root - INFO - step: 32940 loss: 2.0407 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 18:44:50,961 - root - INFO - lr: 8.4377e-06 gnorm: 1.21 [20:10:39< 4:19:28] +[titan] 2025-10-05 18:45:01,827 - root - INFO - step: 32945 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:45:01,827 - root - INFO - lr: 8.4330e-06 gnorm: 1.18 [20:10:50< 4:19:17] +[titan] 2025-10-05 18:45:10,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:45:12,694 - root - INFO - step: 32950 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:45:12,695 - root - INFO - lr: 8.4282e-06 gnorm: 1.17 [20:11:01< 4:19:06] +[titan] 2025-10-05 18:45:23,553 - root - INFO - step: 32955 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 18:45:23,553 - root - INFO - lr: 8.4235e-06 gnorm: 1.19 [20:11:12< 4:18:55] +[titan] 2025-10-05 18:45:34,381 - root - INFO - step: 32960 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 18:45:34,381 - root - INFO - lr: 8.4187e-06 gnorm: 1.16 [20:11:23< 4:18:44] +[titan] 2025-10-05 18:45:45,261 - root - INFO - step: 32965 loss: 2.0361 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 18:45:45,262 - root - INFO - lr: 8.4140e-06 gnorm: 1.21 [20:11:34< 4:18:33] +[titan] 2025-10-05 18:45:56,104 - root - INFO - step: 32970 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 18:45:56,105 - root - INFO - lr: 8.4093e-06 gnorm: 1.16 [20:11:44< 4:18:22] +[titan] 2025-10-05 18:46:06,947 - root - INFO - step: 32975 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7958 +[titan] 2025-10-05 18:46:06,948 - root - INFO - lr: 8.4046e-06 gnorm: 1.24 [20:11:55< 4:18:11] +[titan] 2025-10-05 18:46:17,797 - root - INFO - step: 32980 loss: 1.9700 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7415 +[titan] 2025-10-05 18:46:17,797 - root - INFO - lr: 8.3999e-06 gnorm: 1.16 [20:12:06< 4:18:00] +[titan] 2025-10-05 18:46:28,662 - root - INFO - step: 32985 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 18:46:28,662 - root - INFO - lr: 8.3951e-06 gnorm: 1.18 [20:12:17< 4:17:49] +[titan] 2025-10-05 18:46:39,537 - root - INFO - step: 32990 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 18:46:39,537 - root - INFO - lr: 8.3904e-06 gnorm: 1.22 [20:12:28< 4:17:38] +[titan] 2025-10-05 18:46:50,425 - root - INFO - step: 32995 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:46:50,425 - root - INFO - lr: 8.3857e-06 gnorm: 1.15 [20:12:39< 4:17:27] +[titan] 2025-10-05 18:46:59,142 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:47:01,344 - root - INFO - step: 33000 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 18:47:01,344 - root - INFO - lr: 8.3810e-06 gnorm: 1.17 [20:12:50< 4:17:16] +[titan] 2025-10-05 18:47:12,220 - root - INFO - step: 33005 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:47:12,220 - root - INFO - lr: 8.3763e-06 gnorm: 1.17 [20:13:00< 4:17:05] +[titan] 2025-10-05 18:47:23,105 - root - INFO - step: 33010 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 18:47:23,105 - root - INFO - lr: 8.3716e-06 gnorm: 1.16 [20:13:11< 4:16:53] +[titan] 2025-10-05 18:47:33,991 - root - INFO - step: 33015 loss: 1.9630 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 18:47:33,991 - root - INFO - lr: 8.3669e-06 gnorm: 1.21 [20:13:22< 4:16:42] +[titan] 2025-10-05 18:47:44,854 - root - INFO - step: 33020 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:47:44,854 - root - INFO - lr: 8.3622e-06 gnorm: 1.20 [20:13:33< 4:16:31] +[titan] 2025-10-05 18:47:55,728 - root - INFO - step: 33025 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:47:55,728 - root - INFO - lr: 8.3575e-06 gnorm: 1.16 [20:13:44< 4:16:20] +[titan] 2025-10-05 18:48:06,621 - root - INFO - step: 33030 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 18:48:06,622 - root - INFO - lr: 8.3528e-06 gnorm: 1.15 [20:13:55< 4:16:09] +[titan] 2025-10-05 18:48:17,519 - root - INFO - step: 33035 loss: 2.0726 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 18:48:17,519 - root - INFO - lr: 8.3481e-06 gnorm: 1.22 [20:14:06< 4:15:58] +[titan] 2025-10-05 18:48:28,405 - root - INFO - step: 33040 loss: 1.9946 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 18:48:28,405 - root - INFO - lr: 8.3435e-06 gnorm: 1.19 [20:14:17< 4:15:47] +[titan] 2025-10-05 18:48:39,282 - root - INFO - step: 33045 loss: 1.9543 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 18:48:39,282 - root - INFO - lr: 8.3388e-06 gnorm: 1.16 [20:14:28< 4:15:36] +[titan] 2025-10-05 18:48:47,979 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:48:50,167 - root - INFO - step: 33050 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:48:50,167 - root - INFO - lr: 8.3341e-06 gnorm: 1.22 [20:14:38< 4:15:25] +[titan] 2025-10-05 18:49:01,041 - root - INFO - step: 33055 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7799 +[titan] 2025-10-05 18:49:01,041 - root - INFO - lr: 8.3294e-06 gnorm: 1.21 [20:14:49< 4:15:14] +[titan] 2025-10-05 18:49:11,966 - root - INFO - step: 33060 loss: 1.9156 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6929 +[titan] 2025-10-05 18:49:11,967 - root - INFO - lr: 8.3248e-06 gnorm: 1.15 [20:15:00< 4:15:03] +[titan] 2025-10-05 18:49:22,851 - root - INFO - step: 33065 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 18:49:22,851 - root - INFO - lr: 8.3201e-06 gnorm: 1.22 [20:15:11< 4:14:52] +[titan] 2025-10-05 18:49:33,699 - root - INFO - step: 33070 loss: 1.9488 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 18:49:33,699 - root - INFO - lr: 8.3154e-06 gnorm: 1.17 [20:15:22< 4:14:41] +[titan] 2025-10-05 18:49:44,562 - root - INFO - step: 33075 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 18:49:44,562 - root - INFO - lr: 8.3108e-06 gnorm: 1.18 [20:15:33< 4:14:30] +[titan] 2025-10-05 18:49:55,434 - root - INFO - step: 33080 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6975 +[titan] 2025-10-05 18:49:55,434 - root - INFO - lr: 8.3061e-06 gnorm: 1.15 [20:15:44< 4:14:19] +[titan] 2025-10-05 18:50:06,341 - root - INFO - step: 33085 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 18:50:06,341 - root - INFO - lr: 8.3015e-06 gnorm: 1.19 [20:15:55< 4:14:08] +[titan] 2025-10-05 18:50:17,204 - root - INFO - step: 33090 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 18:50:17,204 - root - INFO - lr: 8.2968e-06 gnorm: 1.17 [20:16:05< 4:13:57] +[titan] 2025-10-05 18:50:28,085 - root - INFO - step: 33095 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 18:50:28,085 - root - INFO - lr: 8.2922e-06 gnorm: 1.19 [20:16:16< 4:13:46] +[titan] 2025-10-05 18:50:36,771 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:50:38,970 - root - INFO - step: 33100 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 18:50:38,970 - root - INFO - lr: 8.2875e-06 gnorm: 1.16 [20:16:27< 4:13:34] +[titan] 2025-10-05 18:50:49,853 - root - INFO - step: 33105 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:50:49,853 - root - INFO - lr: 8.2829e-06 gnorm: 1.18 [20:16:38< 4:13:23] +[titan] 2025-10-05 18:51:00,737 - root - INFO - step: 33110 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 18:51:00,737 - root - INFO - lr: 8.2782e-06 gnorm: 1.20 [20:16:49< 4:13:12] +[titan] 2025-10-05 18:51:11,650 - root - INFO - step: 33115 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7236 +[titan] 2025-10-05 18:51:11,650 - root - INFO - lr: 8.2736e-06 gnorm: 1.18 [20:17:00< 4:13:01] +[titan] 2025-10-05 18:51:22,517 - root - INFO - step: 33120 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 18:51:22,518 - root - INFO - lr: 8.2690e-06 gnorm: 1.15 [20:17:11< 4:12:50] +[titan] 2025-10-05 18:51:33,423 - root - INFO - step: 33125 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 18:51:33,423 - root - INFO - lr: 8.2643e-06 gnorm: 1.16 [20:17:22< 4:12:39] +[titan] 2025-10-05 18:51:44,314 - root - INFO - step: 33130 loss: 1.9891 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:51:44,314 - root - INFO - lr: 8.2597e-06 gnorm: 1.16 [20:17:33< 4:12:28] +[titan] 2025-10-05 18:51:55,207 - root - INFO - step: 33135 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 18:51:55,207 - root - INFO - lr: 8.2551e-06 gnorm: 1.19 [20:17:43< 4:12:17] +[titan] 2025-10-05 18:52:06,124 - root - INFO - step: 33140 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 18:52:06,124 - root - INFO - lr: 8.2504e-06 gnorm: 1.15 [20:17:54< 4:12:06] +[titan] 2025-10-05 18:52:17,015 - root - INFO - step: 33145 loss: 1.8716 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6545 +[titan] 2025-10-05 18:52:17,015 - root - INFO - lr: 8.2458e-06 gnorm: 1.17 [20:18:05< 4:11:55] +[titan] 2025-10-05 18:52:25,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:52:27,896 - root - INFO - step: 33150 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 18:52:27,896 - root - INFO - lr: 8.2412e-06 gnorm: 1.28 [20:18:16< 4:11:44] +[titan] 2025-10-05 18:52:38,755 - root - INFO - step: 33155 loss: 1.9340 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 18:52:38,755 - root - INFO - lr: 8.2366e-06 gnorm: 1.17 [20:18:27< 4:11:33] +[titan] 2025-10-05 18:52:49,651 - root - INFO - step: 33160 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6592 +[titan] 2025-10-05 18:52:49,651 - root - INFO - lr: 8.2320e-06 gnorm: 1.14 [20:18:38< 4:11:22] +[titan] 2025-10-05 18:53:00,524 - root - INFO - step: 33165 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7065 +[titan] 2025-10-05 18:53:00,525 - root - INFO - lr: 8.2274e-06 gnorm: 1.16 [20:18:49< 4:11:11] +[titan] 2025-10-05 18:53:11,434 - root - INFO - step: 33170 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:53:11,435 - root - INFO - lr: 8.2228e-06 gnorm: 1.19 [20:19:00< 4:11:00] +[titan] 2025-10-05 18:53:22,306 - root - INFO - step: 33175 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:53:22,306 - root - INFO - lr: 8.2182e-06 gnorm: 1.21 [20:19:11< 4:10:49] +[titan] 2025-10-05 18:53:33,152 - root - INFO - step: 33180 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 18:53:33,152 - root - INFO - lr: 8.2136e-06 gnorm: 1.19 [20:19:21< 4:10:38] +[titan] 2025-10-05 18:53:44,004 - root - INFO - step: 33185 loss: 1.9935 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 18:53:44,004 - root - INFO - lr: 8.2090e-06 gnorm: 1.16 [20:19:32< 4:10:27] +[titan] 2025-10-05 18:53:54,872 - root - INFO - step: 33190 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:53:54,872 - root - INFO - lr: 8.2044e-06 gnorm: 1.21 [20:19:43< 4:10:15] +[titan] 2025-10-05 18:54:05,750 - root - INFO - step: 33195 loss: 2.0158 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 18:54:05,750 - root - INFO - lr: 8.1998e-06 gnorm: 1.18 [20:19:54< 4:10:04] +[titan] 2025-10-05 18:54:14,509 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:54:16,691 - root - INFO - step: 33200 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 18:54:16,691 - root - INFO - lr: 8.1952e-06 gnorm: 1.21 [20:20:05< 4:09:53] +[titan] 2025-10-05 18:54:27,562 - root - INFO - step: 33205 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7204 +[titan] 2025-10-05 18:54:27,562 - root - INFO - lr: 8.1906e-06 gnorm: 1.18 [20:20:16< 4:09:42] +[titan] 2025-10-05 18:54:38,424 - root - INFO - step: 33210 loss: 1.9533 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 18:54:38,424 - root - INFO - lr: 8.1861e-06 gnorm: 1.19 [20:20:27< 4:09:31] +[titan] 2025-10-05 18:54:49,288 - root - INFO - step: 33215 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 18:54:49,288 - root - INFO - lr: 8.1815e-06 gnorm: 1.22 [20:20:38< 4:09:20] +[titan] 2025-10-05 18:55:00,163 - root - INFO - step: 33220 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:55:00,163 - root - INFO - lr: 8.1769e-06 gnorm: 1.21 [20:20:48< 4:09:09] +[titan] 2025-10-05 18:55:11,094 - root - INFO - step: 33225 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7865 +[titan] 2025-10-05 18:55:11,094 - root - INFO - lr: 8.1723e-06 gnorm: 1.22 [20:20:59< 4:08:58] +[titan] 2025-10-05 18:55:21,957 - root - INFO - step: 33230 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 18:55:21,957 - root - INFO - lr: 8.1678e-06 gnorm: 1.18 [20:21:10< 4:08:47] +[titan] 2025-10-05 18:55:32,818 - root - INFO - step: 33235 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 18:55:32,818 - root - INFO - lr: 8.1632e-06 gnorm: 1.16 [20:21:21< 4:08:36] +[titan] 2025-10-05 18:55:43,665 - root - INFO - step: 33240 loss: 2.0182 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 18:55:43,665 - root - INFO - lr: 8.1586e-06 gnorm: 1.18 [20:21:32< 4:08:25] +[titan] 2025-10-05 18:55:54,510 - root - INFO - step: 33245 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 18:55:54,510 - root - INFO - lr: 8.1541e-06 gnorm: 1.19 [20:21:43< 4:08:14] +[titan] 2025-10-05 18:56:03,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:56:05,369 - root - INFO - step: 33250 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 18:56:05,369 - root - INFO - lr: 8.1495e-06 gnorm: 1.22 [20:21:54< 4:08:03] +[titan] 2025-10-05 18:56:16,302 - root - INFO - step: 33255 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 18:56:16,302 - root - INFO - lr: 8.1450e-06 gnorm: 1.21 [20:22:05< 4:07:52] +[titan] 2025-10-05 18:56:27,156 - root - INFO - step: 33260 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 18:56:27,156 - root - INFO - lr: 8.1404e-06 gnorm: 1.17 [20:22:15< 4:07:41] +[titan] 2025-10-05 18:56:37,991 - root - INFO - step: 33265 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 18:56:37,991 - root - INFO - lr: 8.1359e-06 gnorm: 1.18 [20:22:26< 4:07:30] +[titan] 2025-10-05 18:56:48,867 - root - INFO - step: 33270 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 18:56:48,867 - root - INFO - lr: 8.1313e-06 gnorm: 1.14 [20:22:37< 4:07:19] +[titan] 2025-10-05 18:56:59,716 - root - INFO - step: 33275 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 18:56:59,716 - root - INFO - lr: 8.1268e-06 gnorm: 1.16 [20:22:48< 4:07:08] +[titan] 2025-10-05 18:57:10,748 - root - INFO - step: 33280 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 29,703 tflops: 412.09 mfu: 41.67% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:57:10,749 - root - INFO - lr: 8.1223e-06 gnorm: 1.20 [20:22:59< 4:06:57] +[titan] 2025-10-05 18:57:10,936 - root - INFO - Dumping profiler traces at step 33280 +[titan] 2025-10-05 18:57:10,975 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:57:21,865 - root - INFO - step: 33285 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 29,477 tflops: 408.95 mfu: 41.35% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:57:21,865 - root - INFO - lr: 8.1177e-06 gnorm: 1.18 [20:23:10< 4:06:46] +[titan] 2025-10-05 18:57:32,746 - root - INFO - step: 33290 loss: 1.9692 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 18:57:32,747 - root - INFO - lr: 8.1132e-06 gnorm: 1.19 [20:23:21< 4:06:34] +[titan] 2025-10-05 18:57:43,611 - root - INFO - step: 33295 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 18:57:43,611 - root - INFO - lr: 8.1087e-06 gnorm: 1.19 [20:23:32< 4:06:23] +[titan] 2025-10-05 18:57:52,270 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:57:54,457 - root - INFO - step: 33300 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 18:57:54,457 - root - INFO - lr: 8.1041e-06 gnorm: 1.22 [20:23:43< 4:06:12] +[titan] 2025-10-05 18:58:05,339 - root - INFO - step: 33305 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:58:05,339 - root - INFO - lr: 8.0996e-06 gnorm: 1.21 [20:23:54< 4:06:01] +[titan] 2025-10-05 18:58:16,262 - root - INFO - step: 33310 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 18:58:16,263 - root - INFO - lr: 8.0951e-06 gnorm: 1.24 [20:24:04< 4:05:50] +[titan] 2025-10-05 18:58:27,116 - root - INFO - step: 33315 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 18:58:27,116 - root - INFO - lr: 8.0906e-06 gnorm: 1.20 [20:24:15< 4:05:39] +[titan] 2025-10-05 18:58:38,011 - root - INFO - step: 33320 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 18:58:38,011 - root - INFO - lr: 8.0861e-06 gnorm: 1.18 [20:24:26< 4:05:28] +[titan] 2025-10-05 18:58:48,874 - root - INFO - step: 33325 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 18:58:48,874 - root - INFO - lr: 8.0816e-06 gnorm: 1.17 [20:24:37< 4:05:17] +[titan] 2025-10-05 18:58:59,752 - root - INFO - step: 33330 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 18:58:59,752 - root - INFO - lr: 8.0771e-06 gnorm: 1.18 [20:24:48< 4:05:06] +[titan] 2025-10-05 18:59:10,641 - root - INFO - step: 33335 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 18:59:10,641 - root - INFO - lr: 8.0725e-06 gnorm: 1.20 [20:24:59< 4:04:55] +[titan] 2025-10-05 18:59:21,564 - root - INFO - step: 33340 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 18:59:21,564 - root - INFO - lr: 8.0680e-06 gnorm: 1.15 [20:25:10< 4:04:44] +[titan] 2025-10-05 18:59:32,450 - root - INFO - step: 33345 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:59:32,450 - root - INFO - lr: 8.0636e-06 gnorm: 1.18 [20:25:21< 4:04:33] +[titan] 2025-10-05 18:59:41,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:59:43,356 - root - INFO - step: 33350 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:59:43,356 - root - INFO - lr: 8.0591e-06 gnorm: 1.16 [20:25:32< 4:04:22] +[titan] 2025-10-05 18:59:54,223 - root - INFO - step: 33355 loss: 1.9358 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7114 +[titan] 2025-10-05 18:59:54,223 - root - INFO - lr: 8.0546e-06 gnorm: 1.18 [20:25:42< 4:04:11] +[titan] 2025-10-05 19:00:05,102 - root - INFO - step: 33360 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 19:00:05,102 - root - INFO - lr: 8.0501e-06 gnorm: 1.17 [20:25:53< 4:04:00] +[titan] 2025-10-05 19:00:16,037 - root - INFO - step: 33365 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:00:16,037 - root - INFO - lr: 8.0456e-06 gnorm: 1.20 [20:26:04< 4:03:49] +[titan] 2025-10-05 19:00:26,915 - root - INFO - step: 33370 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:00:26,916 - root - INFO - lr: 8.0411e-06 gnorm: 1.20 [20:26:15< 4:03:38] +[titan] 2025-10-05 19:00:37,762 - root - INFO - step: 33375 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:00:37,763 - root - INFO - lr: 8.0366e-06 gnorm: 1.21 [20:26:26< 4:03:27] +[titan] 2025-10-05 19:00:48,614 - root - INFO - step: 33380 loss: 1.9232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:00:48,614 - root - INFO - lr: 8.0322e-06 gnorm: 1.18 [20:26:37< 4:03:15] +[titan] 2025-10-05 19:00:59,483 - root - INFO - step: 33385 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:00:59,483 - root - INFO - lr: 8.0277e-06 gnorm: 1.19 [20:26:48< 4:03:04] +[titan] 2025-10-05 19:01:10,340 - root - INFO - step: 33390 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 19:01:10,340 - root - INFO - lr: 8.0232e-06 gnorm: 1.19 [20:26:59< 4:02:53] +[titan] 2025-10-05 19:01:21,250 - root - INFO - step: 33395 loss: 1.9470 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 19:01:21,250 - root - INFO - lr: 8.0187e-06 gnorm: 1.17 [20:27:09< 4:02:42] +[titan] 2025-10-05 19:01:29,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:01:32,102 - root - INFO - step: 33400 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 19:01:32,102 - root - INFO - lr: 8.0143e-06 gnorm: 1.17 [20:27:20< 4:02:31] +[titan] 2025-10-05 19:01:42,960 - root - INFO - step: 33405 loss: 1.8686 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 19:01:42,960 - root - INFO - lr: 8.0098e-06 gnorm: 1.18 [20:27:31< 4:02:20] +[titan] 2025-10-05 19:01:53,819 - root - INFO - step: 33410 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 19:01:53,819 - root - INFO - lr: 8.0054e-06 gnorm: 1.19 [20:27:42< 4:02:09] +[titan] 2025-10-05 19:02:04,734 - root - INFO - step: 33415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 19:02:04,734 - root - INFO - lr: 8.0009e-06 gnorm: 1.18 [20:27:53< 4:01:58] +[titan] 2025-10-05 19:02:15,660 - root - INFO - step: 33420 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 19:02:15,661 - root - INFO - lr: 7.9965e-06 gnorm: 1.20 [20:28:04< 4:01:47] +[titan] 2025-10-05 19:02:26,561 - root - INFO - step: 33425 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 19:02:26,561 - root - INFO - lr: 7.9920e-06 gnorm: 1.15 [20:28:15< 4:01:36] +[titan] 2025-10-05 19:02:37,445 - root - INFO - step: 33430 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 19:02:37,445 - root - INFO - lr: 7.9876e-06 gnorm: 1.18 [20:28:26< 4:01:25] +[titan] 2025-10-05 19:02:48,327 - root - INFO - step: 33435 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:02:48,327 - root - INFO - lr: 7.9831e-06 gnorm: 1.16 [20:28:37< 4:01:14] +[titan] 2025-10-05 19:02:59,208 - root - INFO - step: 33440 loss: 1.9304 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 19:02:59,209 - root - INFO - lr: 7.9787e-06 gnorm: 1.20 [20:28:47< 4:01:03] +[titan] 2025-10-05 19:03:10,117 - root - INFO - step: 33445 loss: 2.0526 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8150 +[titan] 2025-10-05 19:03:10,117 - root - INFO - lr: 7.9742e-06 gnorm: 1.21 [20:28:58< 4:00:52] +[titan] 2025-10-05 19:03:18,858 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:03:21,042 - root - INFO - step: 33450 loss: 1.9353 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:03:21,042 - root - INFO - lr: 7.9698e-06 gnorm: 1.16 [20:29:09< 4:00:41] +[titan] 2025-10-05 19:03:31,901 - root - INFO - step: 33455 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:03:31,901 - root - INFO - lr: 7.9654e-06 gnorm: 1.18 [20:29:20< 4:00:30] +[titan] 2025-10-05 19:03:42,767 - root - INFO - step: 33460 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:03:42,767 - root - INFO - lr: 7.9610e-06 gnorm: 1.19 [20:29:31< 4:00:19] +[titan] 2025-10-05 19:03:53,626 - root - INFO - step: 33465 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:03:53,626 - root - INFO - lr: 7.9565e-06 gnorm: 1.20 [20:29:42< 4:00:08] +[titan] 2025-10-05 19:04:04,499 - root - INFO - step: 33470 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:04:04,499 - root - INFO - lr: 7.9521e-06 gnorm: 1.22 [20:29:53< 3:59:57] +[titan] 2025-10-05 19:04:15,390 - root - INFO - step: 33475 loss: 1.9236 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:04:15,390 - root - INFO - lr: 7.9477e-06 gnorm: 1.15 [20:30:04< 3:59:46] +[titan] 2025-10-05 19:04:26,338 - root - INFO - step: 33480 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 19:04:26,338 - root - INFO - lr: 7.9433e-06 gnorm: 1.18 [20:30:15< 3:59:34] +[titan] 2025-10-05 19:04:37,222 - root - INFO - step: 33485 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 19:04:37,222 - root - INFO - lr: 7.9389e-06 gnorm: 1.12 [20:30:25< 3:59:23] +[titan] 2025-10-05 19:04:48,095 - root - INFO - step: 33490 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 19:04:48,095 - root - INFO - lr: 7.9345e-06 gnorm: 1.17 [20:30:36< 3:59:12] +[titan] 2025-10-05 19:04:58,969 - root - INFO - step: 33495 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 19:04:58,969 - root - INFO - lr: 7.9301e-06 gnorm: 1.19 [20:30:47< 3:59:01] +[titan] 2025-10-05 19:05:07,649 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:05:09,844 - root - INFO - step: 33500 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 19:05:09,844 - root - INFO - lr: 7.9256e-06 gnorm: 1.16 [20:30:58< 3:58:50] +[titan] 2025-10-05 19:05:20,803 - root - INFO - step: 33505 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7103 +[titan] 2025-10-05 19:05:20,803 - root - INFO - lr: 7.9212e-06 gnorm: 1.17 [20:31:09< 3:58:39] +[titan] 2025-10-05 19:05:31,705 - root - INFO - step: 33510 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 19:05:31,706 - root - INFO - lr: 7.9169e-06 gnorm: 1.18 [20:31:20< 3:58:28] +[titan] 2025-10-05 19:05:42,585 - root - INFO - step: 33515 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 19:05:42,585 - root - INFO - lr: 7.9125e-06 gnorm: 1.16 [20:31:31< 3:58:17] +[titan] 2025-10-05 19:05:53,459 - root - INFO - step: 33520 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 19:05:53,459 - root - INFO - lr: 7.9081e-06 gnorm: 1.20 [20:31:42< 3:58:06] +[titan] 2025-10-05 19:06:04,332 - root - INFO - step: 33525 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6778 +[titan] 2025-10-05 19:06:04,332 - root - INFO - lr: 7.9037e-06 gnorm: 1.12 [20:31:53< 3:57:55] +[titan] 2025-10-05 19:06:15,198 - root - INFO - step: 33530 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 19:06:15,198 - root - INFO - lr: 7.8993e-06 gnorm: 1.17 [20:32:03< 3:57:44] +[titan] 2025-10-05 19:06:26,152 - root - INFO - step: 33535 loss: 1.9859 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 19:06:26,152 - root - INFO - lr: 7.8949e-06 gnorm: 1.24 [20:32:14< 3:57:33] +[titan] 2025-10-05 19:06:37,024 - root - INFO - step: 33540 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7744 +[titan] 2025-10-05 19:06:37,024 - root - INFO - lr: 7.8905e-06 gnorm: 1.22 [20:32:25< 3:57:22] +[titan] 2025-10-05 19:06:47,931 - root - INFO - step: 33545 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 19:06:47,931 - root - INFO - lr: 7.8862e-06 gnorm: 1.24 [20:32:36< 3:57:11] +[titan] 2025-10-05 19:06:56,620 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:06:58,805 - root - INFO - step: 33550 loss: 1.9223 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:06:58,805 - root - INFO - lr: 7.8818e-06 gnorm: 1.17 [20:32:47< 3:57:00] +[titan] 2025-10-05 19:07:09,652 - root - INFO - step: 33555 loss: 1.9140 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:07:09,653 - root - INFO - lr: 7.8774e-06 gnorm: 1.21 [20:32:58< 3:56:49] +[titan] 2025-10-05 19:07:20,562 - root - INFO - step: 33560 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:07:20,563 - root - INFO - lr: 7.8731e-06 gnorm: 1.23 [20:33:09< 3:56:38] +[titan] 2025-10-05 19:07:31,425 - root - INFO - step: 33565 loss: 1.8946 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 19:07:31,425 - root - INFO - lr: 7.8687e-06 gnorm: 1.19 [20:33:20< 3:56:27] +[titan] 2025-10-05 19:07:42,303 - root - INFO - step: 33570 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:07:42,303 - root - INFO - lr: 7.8643e-06 gnorm: 1.20 [20:33:31< 3:56:16] +[titan] 2025-10-05 19:07:53,210 - root - INFO - step: 33575 loss: 1.9262 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:07:53,211 - root - INFO - lr: 7.8600e-06 gnorm: 1.18 [20:33:41< 3:56:05] +[titan] 2025-10-05 19:08:04,072 - root - INFO - step: 33580 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 19:08:04,072 - root - INFO - lr: 7.8556e-06 gnorm: 1.18 [20:33:52< 3:55:53] +[titan] 2025-10-05 19:08:14,947 - root - INFO - step: 33585 loss: 1.8953 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 19:08:14,947 - root - INFO - lr: 7.8513e-06 gnorm: 1.14 [20:34:03< 3:55:42] +[titan] 2025-10-05 19:08:25,883 - root - INFO - step: 33590 loss: 1.9998 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 19:08:25,883 - root - INFO - lr: 7.8469e-06 gnorm: 1.19 [20:34:14< 3:55:31] +[titan] 2025-10-05 19:08:36,748 - root - INFO - step: 33595 loss: 1.8788 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6618 +[titan] 2025-10-05 19:08:36,748 - root - INFO - lr: 7.8426e-06 gnorm: 1.17 [20:34:25< 3:55:20] +[titan] 2025-10-05 19:08:45,430 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:08:47,610 - root - INFO - step: 33600 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7176 +[titan] 2025-10-05 19:08:47,610 - root - INFO - lr: 7.8382e-06 gnorm: 1.20 [20:34:36< 3:55:09] +[titan] 2025-10-05 19:08:58,491 - root - INFO - step: 33605 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7542 +[titan] 2025-10-05 19:08:58,491 - root - INFO - lr: 7.8339e-06 gnorm: 1.19 [20:34:47< 3:54:58] +[titan] 2025-10-05 19:09:09,347 - root - INFO - step: 33610 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 19:09:09,347 - root - INFO - lr: 7.8296e-06 gnorm: 1.17 [20:34:58< 3:54:47] +[titan] 2025-10-05 19:09:20,217 - root - INFO - step: 33615 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 19:09:20,217 - root - INFO - lr: 7.8252e-06 gnorm: 1.18 [20:35:08< 3:54:36] +[titan] 2025-10-05 19:09:31,144 - root - INFO - step: 33620 loss: 1.9273 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 19:09:31,144 - root - INFO - lr: 7.8209e-06 gnorm: 1.16 [20:35:19< 3:54:25] +[titan] 2025-10-05 19:09:41,985 - root - INFO - step: 33625 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 19:09:41,985 - root - INFO - lr: 7.8166e-06 gnorm: 1.18 [20:35:30< 3:54:14] +[titan] 2025-10-05 19:09:52,855 - root - INFO - step: 33630 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7601 +[titan] 2025-10-05 19:09:52,855 - root - INFO - lr: 7.8123e-06 gnorm: 1.21 [20:35:41< 3:54:03] +[titan] 2025-10-05 19:10:03,725 - root - INFO - step: 33635 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:10:03,725 - root - INFO - lr: 7.8080e-06 gnorm: 1.19 [20:35:52< 3:53:52] +[titan] 2025-10-05 19:10:14,597 - root - INFO - step: 33640 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6948 +[titan] 2025-10-05 19:10:14,597 - root - INFO - lr: 7.8036e-06 gnorm: 1.18 [20:36:03< 3:53:41] +[titan] 2025-10-05 19:10:25,501 - root - INFO - step: 33645 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 19:10:25,501 - root - INFO - lr: 7.7993e-06 gnorm: 1.17 [20:36:14< 3:53:30] +[titan] 2025-10-05 19:10:34,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:10:36,352 - root - INFO - step: 33650 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:10:36,353 - root - INFO - lr: 7.7950e-06 gnorm: 1.18 [20:36:25< 3:53:19] +[titan] 2025-10-05 19:10:47,197 - root - INFO - step: 33655 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 19:10:47,198 - root - INFO - lr: 7.7907e-06 gnorm: 1.18 [20:36:35< 3:53:08] +[titan] 2025-10-05 19:10:58,037 - root - INFO - step: 33660 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:10:58,037 - root - INFO - lr: 7.7864e-06 gnorm: 1.20 [20:36:46< 3:52:57] +[titan] 2025-10-05 19:11:08,895 - root - INFO - step: 33665 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 19:11:08,896 - root - INFO - lr: 7.7821e-06 gnorm: 1.18 [20:36:57< 3:52:46] +[titan] 2025-10-05 19:11:19,804 - root - INFO - step: 33670 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:11:19,805 - root - INFO - lr: 7.7778e-06 gnorm: 1.13 [20:37:08< 3:52:35] +[titan] 2025-10-05 19:11:30,707 - root - INFO - step: 33675 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:11:30,707 - root - INFO - lr: 7.7735e-06 gnorm: 1.19 [20:37:19< 3:52:23] +[titan] 2025-10-05 19:11:41,571 - root - INFO - step: 33680 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 19:11:41,571 - root - INFO - lr: 7.7692e-06 gnorm: 1.15 [20:37:30< 3:52:12] +[titan] 2025-10-05 19:11:52,439 - root - INFO - step: 33685 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7395 +[titan] 2025-10-05 19:11:52,439 - root - INFO - lr: 7.7649e-06 gnorm: 1.17 [20:37:41< 3:52:01] +[titan] 2025-10-05 19:12:03,278 - root - INFO - step: 33690 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 19:12:03,278 - root - INFO - lr: 7.7606e-06 gnorm: 1.21 [20:37:51< 3:51:50] +[titan] 2025-10-05 19:12:14,126 - root - INFO - step: 33695 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:12:14,126 - root - INFO - lr: 7.7564e-06 gnorm: 1.23 [20:38:02< 3:51:39] +[titan] 2025-10-05 19:12:22,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:12:25,032 - root - INFO - step: 33700 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 19:12:25,032 - root - INFO - lr: 7.7521e-06 gnorm: 1.18 [20:38:13< 3:51:28] +[titan] 2025-10-05 19:12:35,912 - root - INFO - step: 33705 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 19:12:35,913 - root - INFO - lr: 7.7478e-06 gnorm: 1.21 [20:38:24< 3:51:17] +[titan] 2025-10-05 19:12:46,776 - root - INFO - step: 33710 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 19:12:46,776 - root - INFO - lr: 7.7435e-06 gnorm: 1.19 [20:38:35< 3:51:06] +[titan] 2025-10-05 19:12:57,642 - root - INFO - step: 33715 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 19:12:57,642 - root - INFO - lr: 7.7393e-06 gnorm: 1.17 [20:38:46< 3:50:55] +[titan] 2025-10-05 19:13:08,509 - root - INFO - step: 33720 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6902 +[titan] 2025-10-05 19:13:08,509 - root - INFO - lr: 7.7350e-06 gnorm: 1.19 [20:38:57< 3:50:44] +[titan] 2025-10-05 19:13:19,370 - root - INFO - step: 33725 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:13:19,370 - root - INFO - lr: 7.7307e-06 gnorm: 1.18 [20:39:08< 3:50:33] +[titan] 2025-10-05 19:13:30,375 - root - INFO - step: 33730 loss: 1.9645 memory: 118.84GiB(85.28%) tps: 29,776 tflops: 413.09 mfu: 41.77% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 19:13:30,375 - root - INFO - lr: 7.7265e-06 gnorm: 1.18 [20:39:19< 3:50:22] +[titan] 2025-10-05 19:13:41,281 - root - INFO - step: 33735 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 19:13:41,281 - root - INFO - lr: 7.7222e-06 gnorm: 1.18 [20:39:29< 3:50:11] +[titan] 2025-10-05 19:13:52,137 - root - INFO - step: 33740 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 19:13:52,137 - root - INFO - lr: 7.7180e-06 gnorm: 1.20 [20:39:40< 3:50:00] +[titan] 2025-10-05 19:14:03,003 - root - INFO - step: 33745 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 19:14:03,003 - root - INFO - lr: 7.7137e-06 gnorm: 1.17 [20:39:51< 3:49:49] +[titan] 2025-10-05 19:14:11,685 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:14:13,871 - root - INFO - step: 33750 loss: 2.0153 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7820 +[titan] 2025-10-05 19:14:13,871 - root - INFO - lr: 7.7095e-06 gnorm: 1.23 [20:40:02< 3:49:38] +[titan] 2025-10-05 19:14:24,752 - root - INFO - step: 33755 loss: 1.8533 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2139 global_avg_mtp_loss: 1.6394 +[titan] 2025-10-05 19:14:24,752 - root - INFO - lr: 7.7052e-06 gnorm: 1.18 [20:40:13< 3:49:27] +[titan] 2025-10-05 19:14:35,653 - root - INFO - step: 33760 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7664 +[titan] 2025-10-05 19:14:35,654 - root - INFO - lr: 7.7010e-06 gnorm: 1.20 [20:40:24< 3:49:16] +[titan] 2025-10-05 19:14:46,559 - root - INFO - step: 33765 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 19:14:46,559 - root - INFO - lr: 7.6967e-06 gnorm: 1.17 [20:40:35< 3:49:05] +[titan] 2025-10-05 19:14:57,429 - root - INFO - step: 33770 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6631 +[titan] 2025-10-05 19:14:57,429 - root - INFO - lr: 7.6925e-06 gnorm: 1.19 [20:40:46< 3:48:54] +[titan] 2025-10-05 19:15:08,283 - root - INFO - step: 33775 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 19:15:08,283 - root - INFO - lr: 7.6883e-06 gnorm: 1.20 [20:40:56< 3:48:43] +[titan] 2025-10-05 19:15:19,145 - root - INFO - step: 33780 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:15:19,145 - root - INFO - lr: 7.6841e-06 gnorm: 1.21 [20:41:07< 3:48:31] +[titan] 2025-10-05 19:15:30,024 - root - INFO - step: 33785 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6677 +[titan] 2025-10-05 19:15:30,024 - root - INFO - lr: 7.6798e-06 gnorm: 1.18 [20:41:18< 3:48:20] +[titan] 2025-10-05 19:15:40,968 - root - INFO - step: 33790 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:15:40,968 - root - INFO - lr: 7.6756e-06 gnorm: 1.18 [20:41:29< 3:48:09] +[titan] 2025-10-05 19:15:45,549 - root - INFO - Dumping profiler traces at step 33792 +[titan] 2025-10-05 19:15:45,589 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:15:52,106 - root - INFO - step: 33795 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 29,421 tflops: 408.17 mfu: 41.27% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 19:15:52,106 - root - INFO - lr: 7.6714e-06 gnorm: 1.19 [20:41:40< 3:47:58] +[titan] 2025-10-05 19:16:00,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:16:03,008 - root - INFO - step: 33800 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:16:03,008 - root - INFO - lr: 7.6672e-06 gnorm: 1.20 [20:41:51< 3:47:47] +[titan] 2025-10-05 19:16:13,885 - root - INFO - step: 33805 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 19:16:13,885 - root - INFO - lr: 7.6630e-06 gnorm: 1.17 [20:42:02< 3:47:36] +[titan] 2025-10-05 19:16:24,767 - root - INFO - step: 33810 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 19:16:24,767 - root - INFO - lr: 7.6587e-06 gnorm: 1.15 [20:42:13< 3:47:25] +[titan] 2025-10-05 19:16:35,714 - root - INFO - step: 33815 loss: 2.0005 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 19:16:35,714 - root - INFO - lr: 7.6545e-06 gnorm: 1.20 [20:42:24< 3:47:14] +[titan] 2025-10-05 19:16:46,588 - root - INFO - step: 33820 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 19:16:46,588 - root - INFO - lr: 7.6503e-06 gnorm: 1.18 [20:42:35< 3:47:03] +[titan] 2025-10-05 19:16:57,467 - root - INFO - step: 33825 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6954 +[titan] 2025-10-05 19:16:57,467 - root - INFO - lr: 7.6461e-06 gnorm: 1.16 [20:42:46< 3:46:52] +[titan] 2025-10-05 19:17:08,370 - root - INFO - step: 33830 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 19:17:08,371 - root - INFO - lr: 7.6419e-06 gnorm: 1.15 [20:42:57< 3:46:41] +[titan] 2025-10-05 19:17:19,239 - root - INFO - step: 33835 loss: 1.9118 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:17:19,239 - root - INFO - lr: 7.6377e-06 gnorm: 1.19 [20:43:07< 3:46:30] +[titan] 2025-10-05 19:17:30,168 - root - INFO - step: 33840 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7619 +[titan] 2025-10-05 19:17:30,168 - root - INFO - lr: 7.6335e-06 gnorm: 1.20 [20:43:18< 3:46:19] +[titan] 2025-10-05 19:17:41,047 - root - INFO - step: 33845 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:17:41,047 - root - INFO - lr: 7.6294e-06 gnorm: 1.17 [20:43:29< 3:46:08] +[titan] 2025-10-05 19:17:49,727 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:17:51,911 - root - INFO - step: 33850 loss: 1.9924 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7615 +[titan] 2025-10-05 19:17:51,911 - root - INFO - lr: 7.6252e-06 gnorm: 1.20 [20:43:40< 3:45:57] +[titan] 2025-10-05 19:18:02,789 - root - INFO - step: 33855 loss: 1.9320 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 19:18:02,789 - root - INFO - lr: 7.6210e-06 gnorm: 1.18 [20:43:51< 3:45:46] +[titan] 2025-10-05 19:18:13,634 - root - INFO - step: 33860 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 19:18:13,634 - root - INFO - lr: 7.6168e-06 gnorm: 1.20 [20:44:02< 3:45:35] +[titan] 2025-10-05 19:18:24,528 - root - INFO - step: 33865 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:18:24,528 - root - INFO - lr: 7.6126e-06 gnorm: 1.21 [20:44:13< 3:45:24] +[titan] 2025-10-05 19:18:35,439 - root - INFO - step: 33870 loss: 1.8718 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6559 +[titan] 2025-10-05 19:18:35,440 - root - INFO - lr: 7.6085e-06 gnorm: 1.16 [20:44:24< 3:45:13] +[titan] 2025-10-05 19:18:46,300 - root - INFO - step: 33875 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:18:46,300 - root - INFO - lr: 7.6043e-06 gnorm: 1.18 [20:44:34< 3:45:02] +[titan] 2025-10-05 19:18:57,171 - root - INFO - step: 33880 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 19:18:57,171 - root - INFO - lr: 7.6001e-06 gnorm: 1.18 [20:44:45< 3:44:51] +[titan] 2025-10-05 19:19:08,027 - root - INFO - step: 33885 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 19:19:08,028 - root - INFO - lr: 7.5960e-06 gnorm: 1.18 [20:44:56< 3:44:40] +[titan] 2025-10-05 19:19:18,884 - root - INFO - step: 33890 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 19:19:18,884 - root - INFO - lr: 7.5918e-06 gnorm: 1.15 [20:45:07< 3:44:28] +[titan] 2025-10-05 19:19:29,764 - root - INFO - step: 33895 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6963 +[titan] 2025-10-05 19:19:29,765 - root - INFO - lr: 7.5877e-06 gnorm: 1.14 [20:45:18< 3:44:17] +[titan] 2025-10-05 19:19:38,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:19:40,700 - root - INFO - step: 33900 loss: 1.9418 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:19:40,700 - root - INFO - lr: 7.5835e-06 gnorm: 1.15 [20:45:29< 3:44:06] +[titan] 2025-10-05 19:19:51,575 - root - INFO - step: 33905 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 19:19:51,575 - root - INFO - lr: 7.5793e-06 gnorm: 1.18 [20:45:40< 3:43:55] +[titan] 2025-10-05 19:20:02,438 - root - INFO - step: 33910 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 19:20:02,438 - root - INFO - lr: 7.5752e-06 gnorm: 1.17 [20:45:51< 3:43:44] +[titan] 2025-10-05 19:20:13,310 - root - INFO - step: 33915 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 19:20:13,310 - root - INFO - lr: 7.5711e-06 gnorm: 1.21 [20:46:01< 3:43:33] +[titan] 2025-10-05 19:20:24,174 - root - INFO - step: 33920 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 19:20:24,174 - root - INFO - lr: 7.5669e-06 gnorm: 1.18 [20:46:12< 3:43:22] +[titan] 2025-10-05 19:20:35,419 - root - INFO - step: 33925 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 29,141 tflops: 404.28 mfu: 40.88% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 19:20:35,420 - root - INFO - lr: 7.5628e-06 gnorm: 1.19 [20:46:24< 3:43:11] +[titan] 2025-10-05 19:20:46,283 - root - INFO - step: 33930 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 19:20:46,283 - root - INFO - lr: 7.5586e-06 gnorm: 1.17 [20:46:34< 3:43:00] +[titan] 2025-10-05 19:20:57,167 - root - INFO - step: 33935 loss: 1.9676 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 19:20:57,167 - root - INFO - lr: 7.5545e-06 gnorm: 1.23 [20:46:45< 3:42:49] +[titan] 2025-10-05 19:21:08,017 - root - INFO - step: 33940 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:21:08,017 - root - INFO - lr: 7.5504e-06 gnorm: 1.19 [20:46:56< 3:42:38] +[titan] 2025-10-05 19:21:18,883 - root - INFO - step: 33945 loss: 1.9536 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 19:21:18,883 - root - INFO - lr: 7.5463e-06 gnorm: 1.17 [20:47:07< 3:42:27] +[titan] 2025-10-05 19:21:27,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:21:29,744 - root - INFO - step: 33950 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:21:29,745 - root - INFO - lr: 7.5421e-06 gnorm: 1.24 [20:47:18< 3:42:16] +[titan] 2025-10-05 19:21:40,693 - root - INFO - step: 33955 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:21:40,694 - root - INFO - lr: 7.5380e-06 gnorm: 1.21 [20:47:29< 3:42:05] +[titan] 2025-10-05 19:21:51,598 - root - INFO - step: 33960 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 19:21:51,598 - root - INFO - lr: 7.5339e-06 gnorm: 1.22 [20:47:40< 3:41:54] +[titan] 2025-10-05 19:22:02,484 - root - INFO - step: 33965 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 19:22:02,484 - root - INFO - lr: 7.5298e-06 gnorm: 1.17 [20:47:51< 3:41:43] +[titan] 2025-10-05 19:22:13,362 - root - INFO - step: 33970 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 19:22:13,362 - root - INFO - lr: 7.5257e-06 gnorm: 1.15 [20:48:02< 3:41:32] +[titan] 2025-10-05 19:22:24,229 - root - INFO - step: 33975 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:22:24,229 - root - INFO - lr: 7.5216e-06 gnorm: 1.21 [20:48:12< 3:41:21] +[titan] 2025-10-05 19:22:35,177 - root - INFO - step: 33980 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:22:35,178 - root - INFO - lr: 7.5175e-06 gnorm: 1.26 [20:48:23< 3:41:10] +[titan] 2025-10-05 19:22:46,038 - root - INFO - step: 33985 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:22:46,038 - root - INFO - lr: 7.5134e-06 gnorm: 1.22 [20:48:34< 3:40:59] +[titan] 2025-10-05 19:22:56,932 - root - INFO - step: 33990 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 19:22:56,932 - root - INFO - lr: 7.5093e-06 gnorm: 1.16 [20:48:45< 3:40:48] +[titan] 2025-10-05 19:23:07,777 - root - INFO - step: 33995 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:23:07,778 - root - INFO - lr: 7.5052e-06 gnorm: 1.21 [20:48:56< 3:40:37] +[titan] 2025-10-05 19:23:16,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:23:18,638 - root - INFO - step: 34000 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:23:18,638 - root - INFO - lr: 7.5011e-06 gnorm: 1.17 [20:49:07< 3:40:25] +[titan] 2025-10-05 19:23:29,501 - root - INFO - step: 34005 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:23:29,501 - root - INFO - lr: 7.4970e-06 gnorm: 1.18 [20:49:18< 3:40:14] +[titan] 2025-10-05 19:23:40,426 - root - INFO - step: 34010 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 19:23:40,426 - root - INFO - lr: 7.4929e-06 gnorm: 1.19 [20:49:29< 3:40:03] +[titan] 2025-10-05 19:23:51,314 - root - INFO - step: 34015 loss: 1.9884 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:23:51,315 - root - INFO - lr: 7.4888e-06 gnorm: 1.21 [20:49:39< 3:39:52] +[titan] 2025-10-05 19:24:02,171 - root - INFO - step: 34020 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 19:24:02,171 - root - INFO - lr: 7.4847e-06 gnorm: 1.18 [20:49:50< 3:39:41] +[titan] 2025-10-05 19:24:13,068 - root - INFO - step: 34025 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 19:24:13,068 - root - INFO - lr: 7.4807e-06 gnorm: 1.26 [20:50:01< 3:39:30] +[titan] 2025-10-05 19:24:23,950 - root - INFO - step: 34030 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 19:24:23,951 - root - INFO - lr: 7.4766e-06 gnorm: 1.18 [20:50:12< 3:39:19] +[titan] 2025-10-05 19:24:34,827 - root - INFO - step: 34035 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6696 +[titan] 2025-10-05 19:24:34,827 - root - INFO - lr: 7.4725e-06 gnorm: 1.20 [20:50:23< 3:39:08] +[titan] 2025-10-05 19:24:45,778 - root - INFO - step: 34040 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 19:24:45,778 - root - INFO - lr: 7.4685e-06 gnorm: 1.19 [20:50:34< 3:38:57] +[titan] 2025-10-05 19:24:56,664 - root - INFO - step: 34045 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 19:24:56,664 - root - INFO - lr: 7.4644e-06 gnorm: 1.20 [20:50:45< 3:38:46] +[titan] 2025-10-05 19:25:05,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:25:07,528 - root - INFO - step: 34050 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:25:07,528 - root - INFO - lr: 7.4603e-06 gnorm: 1.21 [20:50:56< 3:38:35] +[titan] 2025-10-05 19:25:18,416 - root - INFO - step: 34055 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6971 +[titan] 2025-10-05 19:25:18,416 - root - INFO - lr: 7.4563e-06 gnorm: 1.17 [20:51:07< 3:38:24] +[titan] 2025-10-05 19:25:29,290 - root - INFO - step: 34060 loss: 1.9560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 19:25:29,291 - root - INFO - lr: 7.4522e-06 gnorm: 1.17 [20:51:17< 3:38:13] +[titan] 2025-10-05 19:25:40,227 - root - INFO - step: 34065 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6962 +[titan] 2025-10-05 19:25:40,227 - root - INFO - lr: 7.4482e-06 gnorm: 1.16 [20:51:28< 3:38:02] +[titan] 2025-10-05 19:25:51,094 - root - INFO - step: 34070 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 19:25:51,094 - root - INFO - lr: 7.4441e-06 gnorm: 1.17 [20:51:39< 3:37:51] +[titan] 2025-10-05 19:26:01,990 - root - INFO - step: 34075 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 19:26:01,990 - root - INFO - lr: 7.4401e-06 gnorm: 1.19 [20:51:50< 3:37:40] +[titan] 2025-10-05 19:26:12,871 - root - INFO - step: 34080 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6921 +[titan] 2025-10-05 19:26:12,871 - root - INFO - lr: 7.4361e-06 gnorm: 1.18 [20:52:01< 3:37:29] +[titan] 2025-10-05 19:26:23,746 - root - INFO - step: 34085 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7530 +[titan] 2025-10-05 19:26:23,746 - root - INFO - lr: 7.4320e-06 gnorm: 1.19 [20:52:12< 3:37:18] +[titan] 2025-10-05 19:26:34,615 - root - INFO - step: 34090 loss: 1.9192 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 19:26:34,615 - root - INFO - lr: 7.4280e-06 gnorm: 1.17 [20:52:23< 3:37:07] +[titan] 2025-10-05 19:26:45,574 - root - INFO - step: 34095 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 19:26:45,575 - root - INFO - lr: 7.4239e-06 gnorm: 1.24 [20:52:34< 3:36:56] +[titan] 2025-10-05 19:26:54,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:26:56,447 - root - INFO - step: 34100 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:26:56,447 - root - INFO - lr: 7.4199e-06 gnorm: 1.21 [20:52:45< 3:36:45] +[titan] 2025-10-05 19:27:07,327 - root - INFO - step: 34105 loss: 1.8752 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 19:27:07,327 - root - INFO - lr: 7.4159e-06 gnorm: 1.17 [20:52:55< 3:36:34] +[titan] 2025-10-05 19:27:18,206 - root - INFO - step: 34110 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 19:27:18,206 - root - INFO - lr: 7.4119e-06 gnorm: 1.25 [20:53:06< 3:36:23] +[titan] 2025-10-05 19:27:29,088 - root - INFO - step: 34115 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 19:27:29,088 - root - INFO - lr: 7.4079e-06 gnorm: 1.18 [20:53:17< 3:36:11] +[titan] 2025-10-05 19:27:40,016 - root - INFO - step: 34120 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 19:27:40,017 - root - INFO - lr: 7.4038e-06 gnorm: 1.18 [20:53:28< 3:36:00] +[titan] 2025-10-05 19:27:50,909 - root - INFO - step: 34125 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 19:27:50,909 - root - INFO - lr: 7.3998e-06 gnorm: 1.20 [20:53:39< 3:35:49] +[titan] 2025-10-05 19:28:01,811 - root - INFO - step: 34130 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:28:01,811 - root - INFO - lr: 7.3958e-06 gnorm: 1.15 [20:53:50< 3:35:38] +[titan] 2025-10-05 19:28:12,709 - root - INFO - step: 34135 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 19:28:12,709 - root - INFO - lr: 7.3918e-06 gnorm: 1.17 [20:54:01< 3:35:27] +[titan] 2025-10-05 19:28:23,581 - root - INFO - step: 34140 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:28:23,582 - root - INFO - lr: 7.3878e-06 gnorm: 1.23 [20:54:12< 3:35:16] +[titan] 2025-10-05 19:28:34,458 - root - INFO - step: 34145 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 19:28:34,458 - root - INFO - lr: 7.3838e-06 gnorm: 1.18 [20:54:23< 3:35:05] +[titan] 2025-10-05 19:28:43,203 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:28:45,388 - root - INFO - step: 34150 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 19:28:45,388 - root - INFO - lr: 7.3798e-06 gnorm: 1.19 [20:54:34< 3:34:54] +[titan] 2025-10-05 19:28:56,262 - root - INFO - step: 34155 loss: 1.9387 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:28:56,262 - root - INFO - lr: 7.3758e-06 gnorm: 1.16 [20:54:44< 3:34:43] +[titan] 2025-10-05 19:29:07,168 - root - INFO - step: 34160 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 19:29:07,168 - root - INFO - lr: 7.3718e-06 gnorm: 1.18 [20:54:55< 3:34:32] +[titan] 2025-10-05 19:29:18,057 - root - INFO - step: 34165 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 19:29:18,057 - root - INFO - lr: 7.3678e-06 gnorm: 1.19 [20:55:06< 3:34:21] +[titan] 2025-10-05 19:29:28,930 - root - INFO - step: 34170 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 19:29:28,930 - root - INFO - lr: 7.3639e-06 gnorm: 1.18 [20:55:17< 3:34:10] +[titan] 2025-10-05 19:29:39,843 - root - INFO - step: 34175 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 19:29:39,844 - root - INFO - lr: 7.3599e-06 gnorm: 1.25 [20:55:28< 3:33:59] +[titan] 2025-10-05 19:29:50,737 - root - INFO - step: 34180 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 19:29:50,737 - root - INFO - lr: 7.3559e-06 gnorm: 1.26 [20:55:39< 3:33:48] +[titan] 2025-10-05 19:30:01,652 - root - INFO - step: 34185 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 19:30:01,652 - root - INFO - lr: 7.3519e-06 gnorm: 1.25 [20:55:50< 3:33:37] +[titan] 2025-10-05 19:30:12,505 - root - INFO - step: 34190 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 19:30:12,506 - root - INFO - lr: 7.3480e-06 gnorm: 1.20 [20:56:01< 3:33:26] +[titan] 2025-10-05 19:30:23,389 - root - INFO - step: 34195 loss: 1.9339 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 19:30:23,390 - root - INFO - lr: 7.3440e-06 gnorm: 1.19 [20:56:12< 3:33:15] +[titan] 2025-10-05 19:30:32,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:30:34,246 - root - INFO - step: 34200 loss: 1.9408 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 19:30:34,246 - root - INFO - lr: 7.3400e-06 gnorm: 1.19 [20:56:22< 3:33:04] +[titan] 2025-10-05 19:30:45,157 - root - INFO - step: 34205 loss: 1.9115 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 19:30:45,157 - root - INFO - lr: 7.3361e-06 gnorm: 1.18 [20:56:33< 3:32:53] +[titan] 2025-10-05 19:30:56,027 - root - INFO - step: 34210 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 19:30:56,027 - root - INFO - lr: 7.3321e-06 gnorm: 1.19 [20:56:44< 3:32:42] +[titan] 2025-10-05 19:31:06,908 - root - INFO - step: 34215 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 19:31:06,908 - root - INFO - lr: 7.3281e-06 gnorm: 1.17 [20:56:55< 3:32:31] +[titan] 2025-10-05 19:31:17,775 - root - INFO - step: 34220 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7272 +[titan] 2025-10-05 19:31:17,776 - root - INFO - lr: 7.3242e-06 gnorm: 1.20 [20:57:06< 3:32:20] +[titan] 2025-10-05 19:31:28,639 - root - INFO - step: 34225 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 19:31:28,639 - root - INFO - lr: 7.3202e-06 gnorm: 1.19 [20:57:17< 3:32:09] +[titan] 2025-10-05 19:31:39,529 - root - INFO - step: 34230 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:31:39,529 - root - INFO - lr: 7.3163e-06 gnorm: 1.20 [20:57:28< 3:31:57] +[titan] 2025-10-05 19:31:50,429 - root - INFO - step: 34235 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 19:31:50,429 - root - INFO - lr: 7.3124e-06 gnorm: 1.19 [20:57:39< 3:31:46] +[titan] 2025-10-05 19:32:01,297 - root - INFO - step: 34240 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6972 +[titan] 2025-10-05 19:32:01,297 - root - INFO - lr: 7.3084e-06 gnorm: 1.22 [20:57:49< 3:31:35] +[titan] 2025-10-05 19:32:12,194 - root - INFO - step: 34245 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 19:32:12,194 - root - INFO - lr: 7.3045e-06 gnorm: 1.21 [20:58:00< 3:31:24] +[titan] 2025-10-05 19:32:20,872 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:32:23,059 - root - INFO - step: 34250 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 19:32:23,059 - root - INFO - lr: 7.3006e-06 gnorm: 1.18 [20:58:11< 3:31:13] +[titan] 2025-10-05 19:32:33,942 - root - INFO - step: 34255 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:32:33,942 - root - INFO - lr: 7.2966e-06 gnorm: 1.16 [20:58:22< 3:31:02] +[titan] 2025-10-05 19:32:44,861 - root - INFO - step: 34260 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7106 +[titan] 2025-10-05 19:32:44,861 - root - INFO - lr: 7.2927e-06 gnorm: 1.18 [20:58:33< 3:30:51] +[titan] 2025-10-05 19:32:55,734 - root - INFO - step: 34265 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 19:32:55,734 - root - INFO - lr: 7.2888e-06 gnorm: 1.16 [20:58:44< 3:30:40] +[titan] 2025-10-05 19:33:06,617 - root - INFO - step: 34270 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 19:33:06,617 - root - INFO - lr: 7.2849e-06 gnorm: 1.22 [20:58:55< 3:30:29] +[titan] 2025-10-05 19:33:17,521 - root - INFO - step: 34275 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 19:33:17,521 - root - INFO - lr: 7.2809e-06 gnorm: 1.19 [20:59:06< 3:30:18] +[titan] 2025-10-05 19:33:28,449 - root - INFO - step: 34280 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 19:33:28,449 - root - INFO - lr: 7.2770e-06 gnorm: 1.23 [20:59:17< 3:30:07] +[titan] 2025-10-05 19:33:39,327 - root - INFO - step: 34285 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:33:39,328 - root - INFO - lr: 7.2731e-06 gnorm: 1.17 [20:59:27< 3:29:56] +[titan] 2025-10-05 19:33:50,236 - root - INFO - step: 34290 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:33:50,236 - root - INFO - lr: 7.2692e-06 gnorm: 1.23 [20:59:38< 3:29:45] +[titan] 2025-10-05 19:34:01,108 - root - INFO - step: 34295 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 19:34:01,108 - root - INFO - lr: 7.2653e-06 gnorm: 1.17 [20:59:49< 3:29:34] +[titan] 2025-10-05 19:34:09,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:34:11,978 - root - INFO - step: 34300 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 19:34:11,978 - root - INFO - lr: 7.2614e-06 gnorm: 1.19 [21:00:00< 3:29:23] +[titan] 2025-10-05 19:34:20,927 - root - INFO - Dumping profiler traces at step 34304 +[titan] 2025-10-05 19:34:20,967 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:34:23,158 - root - INFO - step: 34305 loss: 1.8387 memory: 118.84GiB(85.28%) tps: 29,312 tflops: 406.65 mfu: 41.12% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6259 +[titan] 2025-10-05 19:34:23,158 - root - INFO - lr: 7.2575e-06 gnorm: 1.17 [21:00:11< 3:29:12] +[titan] 2025-10-05 19:34:34,056 - root - INFO - step: 34310 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 19:34:34,057 - root - INFO - lr: 7.2536e-06 gnorm: 1.17 [21:00:22< 3:29:01] +[titan] 2025-10-05 19:34:44,938 - root - INFO - step: 34315 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 19:34:44,938 - root - INFO - lr: 7.2497e-06 gnorm: 1.18 [21:00:33< 3:28:50] +[titan] 2025-10-05 19:34:55,805 - root - INFO - step: 34320 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:34:55,805 - root - INFO - lr: 7.2458e-06 gnorm: 1.22 [21:00:44< 3:28:39] +[titan] 2025-10-05 19:35:06,664 - root - INFO - step: 34325 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 19:35:06,664 - root - INFO - lr: 7.2419e-06 gnorm: 1.19 [21:00:55< 3:28:28] +[titan] 2025-10-05 19:35:17,530 - root - INFO - step: 34330 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 19:35:17,530 - root - INFO - lr: 7.2381e-06 gnorm: 1.18 [21:01:06< 3:28:17] +[titan] 2025-10-05 19:35:28,388 - root - INFO - step: 34335 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 19:35:28,388 - root - INFO - lr: 7.2342e-06 gnorm: 1.27 [21:01:17< 3:28:06] +[titan] 2025-10-05 19:35:39,210 - root - INFO - step: 34340 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 19:35:39,210 - root - INFO - lr: 7.2303e-06 gnorm: 1.23 [21:01:27< 3:27:55] +[titan] 2025-10-05 19:35:50,071 - root - INFO - step: 34345 loss: 1.9981 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7671 +[titan] 2025-10-05 19:35:50,072 - root - INFO - lr: 7.2264e-06 gnorm: 1.20 [21:01:38< 3:27:44] +[titan] 2025-10-05 19:35:58,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:36:00,937 - root - INFO - step: 34350 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:00,937 - root - INFO - lr: 7.2226e-06 gnorm: 1.19 [21:01:49< 3:27:32] +[titan] 2025-10-05 19:36:11,779 - root - INFO - step: 34355 loss: 1.9721 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:36:11,779 - root - INFO - lr: 7.2187e-06 gnorm: 1.22 [21:02:00< 3:27:21] +[titan] 2025-10-05 19:36:22,618 - root - INFO - step: 34360 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 19:36:22,618 - root - INFO - lr: 7.2148e-06 gnorm: 1.19 [21:02:11< 3:27:10] +[titan] 2025-10-05 19:36:33,472 - root - INFO - step: 34365 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 19:36:33,472 - root - INFO - lr: 7.2110e-06 gnorm: 1.16 [21:02:22< 3:26:59] +[titan] 2025-10-05 19:36:44,328 - root - INFO - step: 34370 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:44,329 - root - INFO - lr: 7.2071e-06 gnorm: 1.17 [21:02:32< 3:26:48] +[titan] 2025-10-05 19:36:55,235 - root - INFO - step: 34375 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 19:36:55,235 - root - INFO - lr: 7.2033e-06 gnorm: 1.14 [21:02:43< 3:26:37] +[titan] 2025-10-05 19:37:06,084 - root - INFO - step: 34380 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:37:06,084 - root - INFO - lr: 7.1994e-06 gnorm: 1.17 [21:02:54< 3:26:26] +[titan] 2025-10-05 19:37:16,961 - root - INFO - step: 34385 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7555 +[titan] 2025-10-05 19:37:16,961 - root - INFO - lr: 7.1956e-06 gnorm: 1.22 [21:03:05< 3:26:15] +[titan] 2025-10-05 19:37:27,815 - root - INFO - step: 34390 loss: 2.0305 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 19:37:27,815 - root - INFO - lr: 7.1917e-06 gnorm: 1.20 [21:03:16< 3:26:04] +[titan] 2025-10-05 19:37:38,670 - root - INFO - step: 34395 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 19:37:38,670 - root - INFO - lr: 7.1879e-06 gnorm: 1.19 [21:03:27< 3:25:53] +[titan] 2025-10-05 19:37:47,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:37:49,563 - root - INFO - step: 34400 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:37:49,563 - root - INFO - lr: 7.1840e-06 gnorm: 1.23 [21:03:38< 3:25:42] +[titan] 2025-10-05 19:38:00,476 - root - INFO - step: 34405 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:38:00,476 - root - INFO - lr: 7.1802e-06 gnorm: 1.21 [21:03:49< 3:25:31] +[titan] 2025-10-05 19:38:11,337 - root - INFO - step: 34410 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7067 +[titan] 2025-10-05 19:38:11,338 - root - INFO - lr: 7.1764e-06 gnorm: 1.16 [21:03:59< 3:25:20] +[titan] 2025-10-05 19:38:22,210 - root - INFO - step: 34415 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 19:38:22,210 - root - INFO - lr: 7.1726e-06 gnorm: 1.20 [21:04:10< 3:25:09] +[titan] 2025-10-05 19:38:33,092 - root - INFO - step: 34420 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:38:33,092 - root - INFO - lr: 7.1687e-06 gnorm: 1.23 [21:04:21< 3:24:58] +[titan] 2025-10-05 19:38:43,954 - root - INFO - step: 34425 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 19:38:43,954 - root - INFO - lr: 7.1649e-06 gnorm: 1.19 [21:04:32< 3:24:47] +[titan] 2025-10-05 19:38:54,847 - root - INFO - step: 34430 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 19:38:54,847 - root - INFO - lr: 7.1611e-06 gnorm: 1.22 [21:04:43< 3:24:36] +[titan] 2025-10-05 19:39:05,711 - root - INFO - step: 34435 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:39:05,711 - root - INFO - lr: 7.1573e-06 gnorm: 1.18 [21:04:54< 3:24:25] +[titan] 2025-10-05 19:39:16,607 - root - INFO - step: 34440 loss: 1.9084 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 19:39:16,607 - root - INFO - lr: 7.1535e-06 gnorm: 1.15 [21:05:05< 3:24:14] +[titan] 2025-10-05 19:39:27,468 - root - INFO - step: 34445 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 19:39:27,469 - root - INFO - lr: 7.1497e-06 gnorm: 1.21 [21:05:16< 3:24:03] +[titan] 2025-10-05 19:39:36,133 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:39:38,309 - root - INFO - step: 34450 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 19:39:38,309 - root - INFO - lr: 7.1458e-06 gnorm: 1.19 [21:05:26< 3:23:52] +[titan] 2025-10-05 19:39:49,168 - root - INFO - step: 34455 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7614 +[titan] 2025-10-05 19:39:49,168 - root - INFO - lr: 7.1420e-06 gnorm: 1.22 [21:05:37< 3:23:41] +[titan] 2025-10-05 19:39:59,988 - root - INFO - step: 34460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:39:59,988 - root - INFO - lr: 7.1382e-06 gnorm: 1.18 [21:05:48< 3:23:29] +[titan] 2025-10-05 19:40:10,837 - root - INFO - step: 34465 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:40:10,837 - root - INFO - lr: 7.1345e-06 gnorm: 1.20 [21:05:59< 3:23:18] +[titan] 2025-10-05 19:40:21,711 - root - INFO - step: 34470 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 19:40:21,711 - root - INFO - lr: 7.1307e-06 gnorm: 1.24 [21:06:10< 3:23:07] +[titan] 2025-10-05 19:40:32,577 - root - INFO - step: 34475 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 19:40:32,577 - root - INFO - lr: 7.1269e-06 gnorm: 1.26 [21:06:21< 3:22:56] +[titan] 2025-10-05 19:40:43,432 - root - INFO - step: 34480 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6998 +[titan] 2025-10-05 19:40:43,432 - root - INFO - lr: 7.1231e-06 gnorm: 1.19 [21:06:32< 3:22:45] +[titan] 2025-10-05 19:40:54,326 - root - INFO - step: 34485 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:40:54,326 - root - INFO - lr: 7.1193e-06 gnorm: 1.19 [21:06:42< 3:22:34] +[titan] 2025-10-05 19:41:05,190 - root - INFO - step: 34490 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 19:41:05,190 - root - INFO - lr: 7.1155e-06 gnorm: 1.19 [21:06:53< 3:22:23] +[titan] 2025-10-05 19:41:16,016 - root - INFO - step: 34495 loss: 1.9452 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7197 +[titan] 2025-10-05 19:41:16,016 - root - INFO - lr: 7.1117e-06 gnorm: 1.28 [21:07:04< 3:22:12] +[titan] 2025-10-05 19:41:24,679 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:41:26,864 - root - INFO - step: 34500 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 19:41:26,864 - root - INFO - lr: 7.1080e-06 gnorm: 1.20 [21:07:15< 3:22:01] +[titan] 2025-10-05 19:41:37,746 - root - INFO - step: 34505 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:41:37,746 - root - INFO - lr: 7.1042e-06 gnorm: 1.20 [21:07:26< 3:21:50] +[titan] 2025-10-05 19:41:48,592 - root - INFO - step: 34510 loss: 1.9716 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 19:41:48,592 - root - INFO - lr: 7.1004e-06 gnorm: 1.23 [21:07:37< 3:21:39] +[titan] 2025-10-05 19:41:59,472 - root - INFO - step: 34515 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 19:41:59,472 - root - INFO - lr: 7.0967e-06 gnorm: 1.19 [21:07:48< 3:21:28] +[titan] 2025-10-05 19:42:10,331 - root - INFO - step: 34520 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 19:42:10,331 - root - INFO - lr: 7.0929e-06 gnorm: 1.21 [21:07:58< 3:21:17] +[titan] 2025-10-05 19:42:21,195 - root - INFO - step: 34525 loss: 1.8598 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6441 +[titan] 2025-10-05 19:42:21,195 - root - INFO - lr: 7.0892e-06 gnorm: 1.20 [21:08:09< 3:21:06] +[titan] 2025-10-05 19:42:32,043 - root - INFO - step: 34530 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 19:42:32,043 - root - INFO - lr: 7.0854e-06 gnorm: 1.18 [21:08:20< 3:20:55] +[titan] 2025-10-05 19:42:42,933 - root - INFO - step: 34535 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:42:42,933 - root - INFO - lr: 7.0816e-06 gnorm: 1.17 [21:08:31< 3:20:44] +[titan] 2025-10-05 19:42:53,805 - root - INFO - step: 34540 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 19:42:53,805 - root - INFO - lr: 7.0779e-06 gnorm: 1.20 [21:08:42< 3:20:33] +[titan] 2025-10-05 19:43:04,676 - root - INFO - step: 34545 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:43:04,676 - root - INFO - lr: 7.0742e-06 gnorm: 1.19 [21:08:53< 3:20:22] +[titan] 2025-10-05 19:43:13,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:43:15,579 - root - INFO - step: 34550 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7242 +[titan] 2025-10-05 19:43:15,579 - root - INFO - lr: 7.0704e-06 gnorm: 1.21 [21:09:04< 3:20:11] +[titan] 2025-10-05 19:43:26,490 - root - INFO - step: 34555 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:43:26,490 - root - INFO - lr: 7.0667e-06 gnorm: 1.18 [21:09:15< 3:20:00] +[titan] 2025-10-05 19:43:37,391 - root - INFO - step: 34560 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8232 +[titan] 2025-10-05 19:43:37,391 - root - INFO - lr: 7.0629e-06 gnorm: 4.37 [21:09:26< 3:19:49] +[titan] 2025-10-05 19:43:48,315 - root - INFO - step: 34565 loss: 1.9033 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 19:43:48,315 - root - INFO - lr: 7.0592e-06 gnorm: 1.24 [21:09:36< 3:19:38] +[titan] 2025-10-05 19:43:59,252 - root - INFO - step: 34570 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 19:43:59,252 - root - INFO - lr: 7.0555e-06 gnorm: 1.17 [21:09:47< 3:19:27] +[titan] 2025-10-05 19:44:10,131 - root - INFO - step: 34575 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 19:44:10,131 - root - INFO - lr: 7.0518e-06 gnorm: 1.21 [21:09:58< 3:19:15] +[titan] 2025-10-05 19:44:20,965 - root - INFO - step: 34580 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7717 +[titan] 2025-10-05 19:44:20,965 - root - INFO - lr: 7.0480e-06 gnorm: 1.26 [21:10:09< 3:19:04] +[titan] 2025-10-05 19:44:31,829 - root - INFO - step: 34585 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 19:44:31,829 - root - INFO - lr: 7.0443e-06 gnorm: 1.19 [21:10:20< 3:18:53] +[titan] 2025-10-05 19:44:42,679 - root - INFO - step: 34590 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.7230 +[titan] 2025-10-05 19:44:42,680 - root - INFO - lr: 7.0406e-06 gnorm: 2.68 [21:10:31< 3:18:42] +[titan] 2025-10-05 19:44:53,560 - root - INFO - step: 34595 loss: 1.8805 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 19:44:53,561 - root - INFO - lr: 7.0369e-06 gnorm: 1.23 [21:10:42< 3:18:31] +[titan] 2025-10-05 19:45:02,273 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:45:04,448 - root - INFO - step: 34600 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 19:45:04,448 - root - INFO - lr: 7.0332e-06 gnorm: 1.18 [21:10:53< 3:18:20] +[titan] 2025-10-05 19:45:15,326 - root - INFO - step: 34605 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:45:15,326 - root - INFO - lr: 7.0295e-06 gnorm: 1.19 [21:11:03< 3:18:09] +[titan] 2025-10-05 19:45:26,191 - root - INFO - step: 34610 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:45:26,191 - root - INFO - lr: 7.0258e-06 gnorm: 1.20 [21:11:14< 3:17:58] +[titan] 2025-10-05 19:45:37,037 - root - INFO - step: 34615 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:45:37,037 - root - INFO - lr: 7.0221e-06 gnorm: 1.17 [21:11:25< 3:17:47] +[titan] 2025-10-05 19:45:47,905 - root - INFO - step: 34620 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:45:47,905 - root - INFO - lr: 7.0184e-06 gnorm: 1.23 [21:11:36< 3:17:36] +[titan] 2025-10-05 19:45:58,807 - root - INFO - step: 34625 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 19:45:58,807 - root - INFO - lr: 7.0147e-06 gnorm: 1.25 [21:11:47< 3:17:25] +[titan] 2025-10-05 19:46:09,704 - root - INFO - step: 34630 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 19:46:09,704 - root - INFO - lr: 7.0110e-06 gnorm: 1.24 [21:11:58< 3:17:14] +[titan] 2025-10-05 19:46:20,566 - root - INFO - step: 34635 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 19:46:20,566 - root - INFO - lr: 7.0073e-06 gnorm: 1.25 [21:12:09< 3:17:03] +[titan] 2025-10-05 19:46:31,407 - root - INFO - step: 34640 loss: 1.9051 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 19:46:31,407 - root - INFO - lr: 7.0036e-06 gnorm: 1.18 [21:12:20< 3:16:52] +[titan] 2025-10-05 19:46:42,249 - root - INFO - step: 34645 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 19:46:42,249 - root - INFO - lr: 6.9999e-06 gnorm: 1.17 [21:12:30< 3:16:41] +[titan] 2025-10-05 19:46:50,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:46:53,097 - root - INFO - step: 34650 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7558 +[titan] 2025-10-05 19:46:53,097 - root - INFO - lr: 6.9963e-06 gnorm: 1.18 [21:12:41< 3:16:30] +[titan] 2025-10-05 19:47:03,992 - root - INFO - step: 34655 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 19:47:03,992 - root - INFO - lr: 6.9926e-06 gnorm: 1.23 [21:12:52< 3:16:19] +[titan] 2025-10-05 19:47:14,867 - root - INFO - step: 34660 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 19:47:14,867 - root - INFO - lr: 6.9889e-06 gnorm: 1.29 [21:13:03< 3:16:08] +[titan] 2025-10-05 19:47:25,759 - root - INFO - step: 34665 loss: 1.9370 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 19:47:25,759 - root - INFO - lr: 6.9853e-06 gnorm: 1.19 [21:13:14< 3:15:57] +[titan] 2025-10-05 19:47:36,638 - root - INFO - step: 34670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 19:47:36,639 - root - INFO - lr: 6.9816e-06 gnorm: 1.16 [21:13:25< 3:15:46] +[titan] 2025-10-05 19:47:47,526 - root - INFO - step: 34675 loss: 1.9202 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:47:47,526 - root - INFO - lr: 6.9779e-06 gnorm: 1.19 [21:13:36< 3:15:35] +[titan] 2025-10-05 19:47:58,418 - root - INFO - step: 34680 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 19:47:58,418 - root - INFO - lr: 6.9743e-06 gnorm: 1.20 [21:13:47< 3:15:24] +[titan] 2025-10-05 19:48:09,256 - root - INFO - step: 34685 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6999 +[titan] 2025-10-05 19:48:09,256 - root - INFO - lr: 6.9706e-06 gnorm: 1.21 [21:13:57< 3:15:13] +[titan] 2025-10-05 19:48:20,111 - root - INFO - step: 34690 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 19:48:20,111 - root - INFO - lr: 6.9670e-06 gnorm: 1.22 [21:14:08< 3:15:02] +[titan] 2025-10-05 19:48:31,007 - root - INFO - step: 34695 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 19:48:31,007 - root - INFO - lr: 6.9633e-06 gnorm: 1.22 [21:14:19< 3:14:50] +[titan] 2025-10-05 19:48:39,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:48:41,855 - root - INFO - step: 34700 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 19:48:41,855 - root - INFO - lr: 6.9597e-06 gnorm: 1.18 [21:14:30< 3:14:39] +[titan] 2025-10-05 19:48:52,713 - root - INFO - step: 34705 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7370 +[titan] 2025-10-05 19:48:52,713 - root - INFO - lr: 6.9560e-06 gnorm: 1.17 [21:14:41< 3:14:28] +[titan] 2025-10-05 19:49:03,608 - root - INFO - step: 34710 loss: 1.9120 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6899 +[titan] 2025-10-05 19:49:03,608 - root - INFO - lr: 6.9524e-06 gnorm: 1.17 [21:14:52< 3:14:17] +[titan] 2025-10-05 19:49:14,465 - root - INFO - step: 34715 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:49:14,465 - root - INFO - lr: 6.9488e-06 gnorm: 1.22 [21:15:03< 3:14:06] +[titan] 2025-10-05 19:49:25,305 - root - INFO - step: 34720 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:49:25,305 - root - INFO - lr: 6.9451e-06 gnorm: 1.21 [21:15:13< 3:13:55] +[titan] 2025-10-05 19:49:36,189 - root - INFO - step: 34725 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:49:36,189 - root - INFO - lr: 6.9415e-06 gnorm: 1.18 [21:15:24< 3:13:44] +[titan] 2025-10-05 19:49:47,060 - root - INFO - step: 34730 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 19:49:47,060 - root - INFO - lr: 6.9379e-06 gnorm: 1.21 [21:15:35< 3:13:33] +[titan] 2025-10-05 19:49:57,949 - root - INFO - step: 34735 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 19:49:57,949 - root - INFO - lr: 6.9343e-06 gnorm: 1.21 [21:15:46< 3:13:22] +[titan] 2025-10-05 19:50:08,803 - root - INFO - step: 34740 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 19:50:08,803 - root - INFO - lr: 6.9306e-06 gnorm: 1.24 [21:15:57< 3:13:11] +[titan] 2025-10-05 19:50:19,673 - root - INFO - step: 34745 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 19:50:19,673 - root - INFO - lr: 6.9270e-06 gnorm: 1.26 [21:16:08< 3:13:00] +[titan] 2025-10-05 19:50:28,354 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:50:30,535 - root - INFO - step: 34750 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7233 +[titan] 2025-10-05 19:50:30,535 - root - INFO - lr: 6.9234e-06 gnorm: 1.24 [21:16:19< 3:12:49] +[titan] 2025-10-05 19:50:41,406 - root - INFO - step: 34755 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:50:41,406 - root - INFO - lr: 6.9198e-06 gnorm: 1.19 [21:16:29< 3:12:38] +[titan] 2025-10-05 19:50:52,304 - root - INFO - step: 34760 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 19:50:52,304 - root - INFO - lr: 6.9162e-06 gnorm: 1.19 [21:16:40< 3:12:27] +[titan] 2025-10-05 19:51:03,222 - root - INFO - step: 34765 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 19:51:03,223 - root - INFO - lr: 6.9126e-06 gnorm: 1.23 [21:16:51< 3:12:16] +[titan] 2025-10-05 19:51:14,086 - root - INFO - step: 34770 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:51:14,086 - root - INFO - lr: 6.9090e-06 gnorm: 1.16 [21:17:02< 3:12:05] +[titan] 2025-10-05 19:51:24,963 - root - INFO - step: 34775 loss: 1.9641 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 19:51:24,963 - root - INFO - lr: 6.9054e-06 gnorm: 1.22 [21:17:13< 3:11:54] +[titan] 2025-10-05 19:51:35,828 - root - INFO - step: 34780 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 19:51:35,828 - root - INFO - lr: 6.9018e-06 gnorm: 1.21 [21:17:24< 3:11:43] +[titan] 2025-10-05 19:51:46,685 - root - INFO - step: 34785 loss: 1.9053 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 19:51:46,685 - root - INFO - lr: 6.8982e-06 gnorm: 1.20 [21:17:35< 3:11:32] +[titan] 2025-10-05 19:51:57,587 - root - INFO - step: 34790 loss: 1.9201 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 19:51:57,587 - root - INFO - lr: 6.8946e-06 gnorm: 1.18 [21:17:46< 3:11:21] +[titan] 2025-10-05 19:52:08,485 - root - INFO - step: 34795 loss: 1.9967 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 19:52:08,485 - root - INFO - lr: 6.8910e-06 gnorm: 1.22 [21:17:57< 3:11:10] +[titan] 2025-10-05 19:52:17,185 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:52:19,378 - root - INFO - step: 34800 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 19:52:19,378 - root - INFO - lr: 6.8875e-06 gnorm: 1.20 [21:18:07< 3:10:59] +[titan] 2025-10-05 19:52:30,261 - root - INFO - step: 34805 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 19:52:30,261 - root - INFO - lr: 6.8839e-06 gnorm: 1.19 [21:18:18< 3:10:48] +[titan] 2025-10-05 19:52:41,146 - root - INFO - step: 34810 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 19:52:41,146 - root - INFO - lr: 6.8803e-06 gnorm: 1.20 [21:18:29< 3:10:37] +[titan] 2025-10-05 19:52:52,095 - root - INFO - step: 34815 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7667 +[titan] 2025-10-05 19:52:52,095 - root - INFO - lr: 6.8767e-06 gnorm: 1.23 [21:18:40< 3:10:26] +[titan] 2025-10-05 19:52:54,460 - root - INFO - Dumping profiler traces at step 34816 +[titan] 2025-10-05 19:52:54,497 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:53:03,216 - root - INFO - step: 34820 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 29,466 tflops: 408.80 mfu: 41.33% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:53:03,216 - root - INFO - lr: 6.8732e-06 gnorm: 1.20 [21:18:51< 3:10:15] +[titan] 2025-10-05 19:53:14,080 - root - INFO - step: 34825 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7156 +[titan] 2025-10-05 19:53:14,080 - root - INFO - lr: 6.8696e-06 gnorm: 1.17 [21:19:02< 3:10:03] +[titan] 2025-10-05 19:53:24,945 - root - INFO - step: 34830 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 19:53:24,945 - root - INFO - lr: 6.8661e-06 gnorm: 1.15 [21:19:13< 3:09:52] +[titan] 2025-10-05 19:53:35,780 - root - INFO - step: 34835 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 19:53:35,780 - root - INFO - lr: 6.8625e-06 gnorm: 1.16 [21:19:24< 3:09:41] +[titan] 2025-10-05 19:53:46,625 - root - INFO - step: 34840 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7678 +[titan] 2025-10-05 19:53:46,626 - root - INFO - lr: 6.8589e-06 gnorm: 1.21 [21:19:35< 3:09:30] +[titan] 2025-10-05 19:53:57,479 - root - INFO - step: 34845 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:53:57,479 - root - INFO - lr: 6.8554e-06 gnorm: 1.18 [21:19:46< 3:09:19] +[titan] 2025-10-05 19:54:06,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:54:08,348 - root - INFO - step: 34850 loss: 2.0208 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 19:54:08,348 - root - INFO - lr: 6.8518e-06 gnorm: 1.22 [21:19:56< 3:09:08] +[titan] 2025-10-05 19:54:19,236 - root - INFO - step: 34855 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 19:54:19,236 - root - INFO - lr: 6.8483e-06 gnorm: 1.19 [21:20:07< 3:08:57] +[titan] 2025-10-05 19:54:30,115 - root - INFO - step: 34860 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 19:54:30,115 - root - INFO - lr: 6.8448e-06 gnorm: 1.17 [21:20:18< 3:08:46] +[titan] 2025-10-05 19:54:40,989 - root - INFO - step: 34865 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 19:54:40,989 - root - INFO - lr: 6.8412e-06 gnorm: 1.22 [21:20:29< 3:08:35] +[titan] 2025-10-05 19:54:51,840 - root - INFO - step: 34870 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:54:51,840 - root - INFO - lr: 6.8377e-06 gnorm: 1.23 [21:20:40< 3:08:24] +[titan] 2025-10-05 19:55:02,739 - root - INFO - step: 34875 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:55:02,739 - root - INFO - lr: 6.8342e-06 gnorm: 1.18 [21:20:51< 3:08:13] +[titan] 2025-10-05 19:55:13,616 - root - INFO - step: 34880 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:55:13,616 - root - INFO - lr: 6.8306e-06 gnorm: 1.19 [21:21:02< 3:08:02] +[titan] 2025-10-05 19:55:24,502 - root - INFO - step: 34885 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7556 +[titan] 2025-10-05 19:55:24,502 - root - INFO - lr: 6.8271e-06 gnorm: 1.20 [21:21:13< 3:07:51] +[titan] 2025-10-05 19:55:35,390 - root - INFO - step: 34890 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 19:55:35,390 - root - INFO - lr: 6.8236e-06 gnorm: 1.20 [21:21:23< 3:07:40] +[titan] 2025-10-05 19:55:46,234 - root - INFO - step: 34895 loss: 1.9281 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 19:55:46,234 - root - INFO - lr: 6.8201e-06 gnorm: 1.21 [21:21:34< 3:07:29] +[titan] 2025-10-05 19:55:54,939 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:55:57,124 - root - INFO - step: 34900 loss: 1.9752 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 19:55:57,124 - root - INFO - lr: 6.8166e-06 gnorm: 1.22 [21:21:45< 3:07:18] +[titan] 2025-10-05 19:56:07,979 - root - INFO - step: 34905 loss: 1.8773 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6604 +[titan] 2025-10-05 19:56:07,979 - root - INFO - lr: 6.8130e-06 gnorm: 1.27 [21:21:56< 3:07:07] +[titan] 2025-10-05 19:56:18,858 - root - INFO - step: 34910 loss: 1.9375 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:56:18,858 - root - INFO - lr: 6.8095e-06 gnorm: 1.28 [21:22:07< 3:06:56] +[titan] 2025-10-05 19:56:29,723 - root - INFO - step: 34915 loss: 1.9603 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:56:29,723 - root - INFO - lr: 6.8060e-06 gnorm: 1.20 [21:22:18< 3:06:45] +[titan] 2025-10-05 19:56:40,632 - root - INFO - step: 34920 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:56:40,632 - root - INFO - lr: 6.8025e-06 gnorm: 1.20 [21:22:29< 3:06:34] +[titan] 2025-10-05 19:56:51,542 - root - INFO - step: 34925 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 19:56:51,542 - root - INFO - lr: 6.7990e-06 gnorm: 1.19 [21:22:40< 3:06:23] +[titan] 2025-10-05 19:57:02,433 - root - INFO - step: 34930 loss: 1.8978 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6780 +[titan] 2025-10-05 19:57:02,433 - root - INFO - lr: 6.7955e-06 gnorm: 1.19 [21:22:51< 3:06:12] +[titan] 2025-10-05 19:57:13,339 - root - INFO - step: 34935 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 19:57:13,339 - root - INFO - lr: 6.7920e-06 gnorm: 1.19 [21:23:01< 3:06:01] +[titan] 2025-10-05 19:57:24,225 - root - INFO - step: 34940 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 19:57:24,225 - root - INFO - lr: 6.7886e-06 gnorm: 1.21 [21:23:12< 3:05:50] +[titan] 2025-10-05 19:57:35,111 - root - INFO - step: 34945 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:57:35,112 - root - INFO - lr: 6.7851e-06 gnorm: 1.20 [21:23:23< 3:05:39] +[titan] 2025-10-05 19:57:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:57:46,109 - root - INFO - step: 34950 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 29,796 tflops: 413.37 mfu: 41.80% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:57:46,109 - root - INFO - lr: 6.7816e-06 gnorm: 1.21 [21:23:34< 3:05:28] +[titan] 2025-10-05 19:57:56,976 - root - INFO - step: 34955 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 19:57:56,976 - root - INFO - lr: 6.7781e-06 gnorm: 1.21 [21:23:45< 3:05:16] +[titan] 2025-10-05 19:58:07,860 - root - INFO - step: 34960 loss: 1.8843 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 19:58:07,860 - root - INFO - lr: 6.7746e-06 gnorm: 1.18 [21:23:56< 3:05:05] +[titan] 2025-10-05 19:58:18,737 - root - INFO - step: 34965 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 19:58:18,737 - root - INFO - lr: 6.7712e-06 gnorm: 1.21 [21:24:07< 3:04:54] +[titan] 2025-10-05 19:58:29,592 - root - INFO - step: 34970 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 19:58:29,592 - root - INFO - lr: 6.7677e-06 gnorm: 2.00 [21:24:18< 3:04:43] +[titan] 2025-10-05 19:58:40,452 - root - INFO - step: 34975 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 19:58:40,453 - root - INFO - lr: 6.7642e-06 gnorm: 1.24 [21:24:29< 3:04:32] +[titan] 2025-10-05 19:58:51,317 - root - INFO - step: 34980 loss: 1.8424 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6294 +[titan] 2025-10-05 19:58:51,317 - root - INFO - lr: 6.7608e-06 gnorm: 1.20 [21:24:39< 3:04:21] +[titan] 2025-10-05 19:59:02,209 - root - INFO - step: 34985 loss: 2.0210 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 19:59:02,209 - root - INFO - lr: 6.7573e-06 gnorm: 1.25 [21:24:50< 3:04:10] +[titan] 2025-10-05 19:59:13,085 - root - INFO - step: 34990 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 19:59:13,085 - root - INFO - lr: 6.7538e-06 gnorm: 1.21 [21:25:01< 3:03:59] +[titan] 2025-10-05 19:59:23,963 - root - INFO - step: 34995 loss: 1.9729 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7447 +[titan] 2025-10-05 19:59:23,964 - root - INFO - lr: 6.7504e-06 gnorm: 1.20 [21:25:12< 3:03:48] +[titan] 2025-10-05 19:59:32,669 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:59:34,852 - root - INFO - step: 35000 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 19:59:34,852 - root - INFO - lr: 6.7469e-06 gnorm: 1.20 [21:25:23< 3:03:37] +[titan] 2025-10-05 19:59:34,852 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 19:59:52,585 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 19:59:52,585 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.73 seconds. +[titan] 2025-10-05 20:02:00,815 - root - INFO - step: 35005 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 2,245 tflops: 31.15 mfu: 3.15% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 20:02:00,815 - root - INFO - lr: 6.7435e-06 gnorm: 1.17 [21:27:49< 3:03:45] +[titan] 2025-10-05 20:02:11,608 - root - INFO - step: 35010 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,362 tflops: 421.22 mfu: 42.59% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 20:02:11,608 - root - INFO - lr: 6.7401e-06 gnorm: 1.25 [21:28:00< 3:03:34] +[titan] 2025-10-05 20:02:22,413 - root - INFO - step: 35015 loss: 1.8869 memory: 118.84GiB(85.28%) tps: 30,329 tflops: 420.77 mfu: 42.55% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 20:02:22,413 - root - INFO - lr: 6.7366e-06 gnorm: 1.21 [21:28:10< 3:03:23] +[titan] 2025-10-05 20:02:33,281 - root - INFO - step: 35020 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 20:02:33,281 - root - INFO - lr: 6.7332e-06 gnorm: 1.19 [21:28:21< 3:03:12] +[titan] 2025-10-05 20:02:44,100 - root - INFO - step: 35025 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:02:44,100 - root - INFO - lr: 6.7297e-06 gnorm: 1.17 [21:28:32< 3:03:01] +[titan] 2025-10-05 20:02:54,948 - root - INFO - step: 35030 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 20:02:54,948 - root - INFO - lr: 6.7263e-06 gnorm: 1.23 [21:28:43< 3:02:50] +[titan] 2025-10-05 20:03:05,780 - root - INFO - step: 35035 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:03:05,780 - root - INFO - lr: 6.7229e-06 gnorm: 1.24 [21:28:54< 3:02:39] +[titan] 2025-10-05 20:03:16,638 - root - INFO - step: 35040 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 20:03:16,638 - root - INFO - lr: 6.7195e-06 gnorm: 1.23 [21:29:05< 3:02:28] +[titan] 2025-10-05 20:03:27,560 - root - INFO - step: 35045 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 20:03:27,560 - root - INFO - lr: 6.7160e-06 gnorm: 1.19 [21:29:16< 3:02:17] +[titan] 2025-10-05 20:03:36,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:03:38,413 - root - INFO - step: 35050 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7064 +[titan] 2025-10-05 20:03:38,413 - root - INFO - lr: 6.7126e-06 gnorm: 1.23 [21:29:26< 3:02:06] +[titan] 2025-10-05 20:03:49,265 - root - INFO - step: 35055 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 20:03:49,265 - root - INFO - lr: 6.7092e-06 gnorm: 1.18 [21:29:37< 3:01:55] +[titan] 2025-10-05 20:04:00,143 - root - INFO - step: 35060 loss: 1.9047 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6842 +[titan] 2025-10-05 20:04:00,143 - root - INFO - lr: 6.7058e-06 gnorm: 1.22 [21:29:48< 3:01:44] +[titan] 2025-10-05 20:04:11,001 - root - INFO - step: 35065 loss: 1.8697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 20:04:11,001 - root - INFO - lr: 6.7024e-06 gnorm: 1.21 [21:29:59< 3:01:33] +[titan] 2025-10-05 20:04:21,863 - root - INFO - step: 35070 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:04:21,863 - root - INFO - lr: 6.6990e-06 gnorm: 1.24 [21:30:10< 3:01:22] +[titan] 2025-10-05 20:04:32,799 - root - INFO - step: 35075 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 20:04:32,799 - root - INFO - lr: 6.6956e-06 gnorm: 1.21 [21:30:21< 3:01:10] +[titan] 2025-10-05 20:04:43,675 - root - INFO - step: 35080 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 20:04:43,675 - root - INFO - lr: 6.6922e-06 gnorm: 1.15 [21:30:32< 3:00:59] +[titan] 2025-10-05 20:04:54,541 - root - INFO - step: 35085 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 20:04:54,542 - root - INFO - lr: 6.6888e-06 gnorm: 1.19 [21:30:43< 3:00:48] +[titan] 2025-10-05 20:05:05,402 - root - INFO - step: 35090 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:05:05,402 - root - INFO - lr: 6.6854e-06 gnorm: 1.18 [21:30:53< 3:00:37] +[titan] 2025-10-05 20:05:16,263 - root - INFO - step: 35095 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:05:16,263 - root - INFO - lr: 6.6820e-06 gnorm: 1.22 [21:31:04< 3:00:26] +[titan] 2025-10-05 20:05:24,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:05:27,145 - root - INFO - step: 35100 loss: 1.9245 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7014 +[titan] 2025-10-05 20:05:27,145 - root - INFO - lr: 6.6786e-06 gnorm: 1.23 [21:31:15< 3:00:15] +[titan] 2025-10-05 20:05:38,035 - root - INFO - step: 35105 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 20:05:38,035 - root - INFO - lr: 6.6753e-06 gnorm: 1.17 [21:31:26< 3:00:04] +[titan] 2025-10-05 20:05:48,877 - root - INFO - step: 35110 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7243 +[titan] 2025-10-05 20:05:48,877 - root - INFO - lr: 6.6719e-06 gnorm: 1.15 [21:31:37< 2:59:53] +[titan] 2025-10-05 20:05:59,749 - root - INFO - step: 35115 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:05:59,749 - root - INFO - lr: 6.6685e-06 gnorm: 1.20 [21:31:48< 2:59:42] +[titan] 2025-10-05 20:06:10,605 - root - INFO - step: 35120 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:06:10,606 - root - INFO - lr: 6.6651e-06 gnorm: 1.17 [21:31:59< 2:59:31] +[titan] 2025-10-05 20:06:21,451 - root - INFO - step: 35125 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:06:21,451 - root - INFO - lr: 6.6618e-06 gnorm: 1.20 [21:32:10< 2:59:20] +[titan] 2025-10-05 20:06:32,365 - root - INFO - step: 35130 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:06:32,365 - root - INFO - lr: 6.6584e-06 gnorm: 1.23 [21:32:20< 2:59:09] +[titan] 2025-10-05 20:06:43,231 - root - INFO - step: 35135 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7050 +[titan] 2025-10-05 20:06:43,232 - root - INFO - lr: 6.6550e-06 gnorm: 1.18 [21:32:31< 2:58:58] +[titan] 2025-10-05 20:06:54,140 - root - INFO - step: 35140 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 20:06:54,140 - root - INFO - lr: 6.6517e-06 gnorm: 1.23 [21:32:42< 2:58:47] +[titan] 2025-10-05 20:07:05,022 - root - INFO - step: 35145 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 20:07:05,022 - root - INFO - lr: 6.6483e-06 gnorm: 1.23 [21:32:53< 2:58:36] +[titan] 2025-10-05 20:07:13,698 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:07:15,932 - root - INFO - step: 35150 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6718 +[titan] 2025-10-05 20:07:15,932 - root - INFO - lr: 6.6450e-06 gnorm: 1.21 [21:33:04< 2:58:25] +[titan] 2025-10-05 20:07:26,828 - root - INFO - step: 35155 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:07:26,828 - root - INFO - lr: 6.6416e-06 gnorm: 1.16 [21:33:15< 2:58:14] +[titan] 2025-10-05 20:07:37,740 - root - INFO - step: 35160 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 20:07:37,740 - root - INFO - lr: 6.6383e-06 gnorm: 1.17 [21:33:26< 2:58:03] +[titan] 2025-10-05 20:07:48,623 - root - INFO - step: 35165 loss: 1.9332 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 20:07:48,623 - root - INFO - lr: 6.6349e-06 gnorm: 1.21 [21:33:37< 2:57:51] +[titan] 2025-10-05 20:07:59,524 - root - INFO - step: 35170 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 20:07:59,524 - root - INFO - lr: 6.6316e-06 gnorm: 1.20 [21:33:48< 2:57:40] +[titan] 2025-10-05 20:08:10,396 - root - INFO - step: 35175 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:08:10,396 - root - INFO - lr: 6.6283e-06 gnorm: 1.19 [21:33:58< 2:57:29] +[titan] 2025-10-05 20:08:21,269 - root - INFO - step: 35180 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:08:21,270 - root - INFO - lr: 6.6249e-06 gnorm: 1.19 [21:34:09< 2:57:18] +[titan] 2025-10-05 20:08:32,174 - root - INFO - step: 35185 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 20:08:32,175 - root - INFO - lr: 6.6216e-06 gnorm: 1.21 [21:34:20< 2:57:07] +[titan] 2025-10-05 20:08:43,054 - root - INFO - step: 35190 loss: 1.9950 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7626 +[titan] 2025-10-05 20:08:43,054 - root - INFO - lr: 6.6183e-06 gnorm: 1.21 [21:34:31< 2:56:56] +[titan] 2025-10-05 20:08:53,935 - root - INFO - step: 35195 loss: 1.9405 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7155 +[titan] 2025-10-05 20:08:53,935 - root - INFO - lr: 6.6150e-06 gnorm: 1.23 [21:34:42< 2:56:45] +[titan] 2025-10-05 20:09:02,614 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:09:04,794 - root - INFO - step: 35200 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 20:09:04,794 - root - INFO - lr: 6.6116e-06 gnorm: 1.17 [21:34:53< 2:56:34] +[titan] 2025-10-05 20:09:15,694 - root - INFO - step: 35205 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:09:15,695 - root - INFO - lr: 6.6083e-06 gnorm: 1.21 [21:35:04< 2:56:23] +[titan] 2025-10-05 20:09:26,591 - root - INFO - step: 35210 loss: 1.9224 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:09:26,591 - root - INFO - lr: 6.6050e-06 gnorm: 1.21 [21:35:15< 2:56:12] +[titan] 2025-10-05 20:09:37,512 - root - INFO - step: 35215 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 20:09:37,512 - root - INFO - lr: 6.6017e-06 gnorm: 1.22 [21:35:26< 2:56:01] +[titan] 2025-10-05 20:09:48,396 - root - INFO - step: 35220 loss: 1.9286 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7057 +[titan] 2025-10-05 20:09:48,396 - root - INFO - lr: 6.5984e-06 gnorm: 1.23 [21:35:36< 2:55:50] +[titan] 2025-10-05 20:09:59,291 - root - INFO - step: 35225 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 20:09:59,291 - root - INFO - lr: 6.5951e-06 gnorm: 1.22 [21:35:47< 2:55:39] +[titan] 2025-10-05 20:10:10,147 - root - INFO - step: 35230 loss: 1.9319 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 20:10:10,147 - root - INFO - lr: 6.5918e-06 gnorm: 1.26 [21:35:58< 2:55:28] +[titan] 2025-10-05 20:10:21,054 - root - INFO - step: 35235 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 20:10:21,055 - root - INFO - lr: 6.5885e-06 gnorm: 1.18 [21:36:09< 2:55:17] +[titan] 2025-10-05 20:10:31,940 - root - INFO - step: 35240 loss: 1.8612 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:10:31,940 - root - INFO - lr: 6.5852e-06 gnorm: 1.16 [21:36:20< 2:55:06] +[titan] 2025-10-05 20:10:42,806 - root - INFO - step: 35245 loss: 2.0002 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 20:10:42,807 - root - INFO - lr: 6.5819e-06 gnorm: 1.22 [21:36:31< 2:54:55] +[titan] 2025-10-05 20:10:51,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:10:53,701 - root - INFO - step: 35250 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 20:10:53,701 - root - INFO - lr: 6.5786e-06 gnorm: 1.21 [21:36:42< 2:54:43] +[titan] 2025-10-05 20:11:04,581 - root - INFO - step: 35255 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 20:11:04,581 - root - INFO - lr: 6.5754e-06 gnorm: 1.20 [21:36:53< 2:54:32] +[titan] 2025-10-05 20:11:15,487 - root - INFO - step: 35260 loss: 1.9259 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:11:15,487 - root - INFO - lr: 6.5721e-06 gnorm: 1.23 [21:37:04< 2:54:21] +[titan] 2025-10-05 20:11:26,398 - root - INFO - step: 35265 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:11:26,398 - root - INFO - lr: 6.5688e-06 gnorm: 1.23 [21:37:14< 2:54:10] +[titan] 2025-10-05 20:11:37,313 - root - INFO - step: 35270 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:11:37,314 - root - INFO - lr: 6.5655e-06 gnorm: 1.21 [21:37:25< 2:53:59] +[titan] 2025-10-05 20:11:48,214 - root - INFO - step: 35275 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 20:11:48,214 - root - INFO - lr: 6.5623e-06 gnorm: 1.24 [21:37:36< 2:53:48] +[titan] 2025-10-05 20:11:59,075 - root - INFO - step: 35280 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 20:11:59,075 - root - INFO - lr: 6.5590e-06 gnorm: 1.20 [21:37:47< 2:53:37] +[titan] 2025-10-05 20:12:09,938 - root - INFO - step: 35285 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 20:12:09,939 - root - INFO - lr: 6.5557e-06 gnorm: 1.21 [21:37:58< 2:53:26] +[titan] 2025-10-05 20:12:20,821 - root - INFO - step: 35290 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 20:12:20,821 - root - INFO - lr: 6.5525e-06 gnorm: 1.18 [21:38:09< 2:53:15] +[titan] 2025-10-05 20:12:31,713 - root - INFO - step: 35295 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 20:12:31,713 - root - INFO - lr: 6.5492e-06 gnorm: 1.23 [21:38:20< 2:53:04] +[titan] 2025-10-05 20:12:40,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:12:42,667 - root - INFO - step: 35300 loss: 1.9229 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:12:42,667 - root - INFO - lr: 6.5460e-06 gnorm: 1.23 [21:38:31< 2:52:53] +[titan] 2025-10-05 20:12:53,570 - root - INFO - step: 35305 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 20:12:53,570 - root - INFO - lr: 6.5427e-06 gnorm: 1.21 [21:38:42< 2:52:42] +[titan] 2025-10-05 20:13:04,452 - root - INFO - step: 35310 loss: 1.9317 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7088 +[titan] 2025-10-05 20:13:04,452 - root - INFO - lr: 6.5395e-06 gnorm: 1.22 [21:38:52< 2:52:31] +[titan] 2025-10-05 20:13:15,334 - root - INFO - step: 35315 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 20:13:15,335 - root - INFO - lr: 6.5362e-06 gnorm: 1.20 [21:39:03< 2:52:20] +[titan] 2025-10-05 20:13:26,220 - root - INFO - step: 35320 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 20:13:26,221 - root - INFO - lr: 6.5330e-06 gnorm: 1.19 [21:39:14< 2:52:09] +[titan] 2025-10-05 20:13:37,204 - root - INFO - step: 35325 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 29,834 tflops: 413.90 mfu: 41.85% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 20:13:37,204 - root - INFO - lr: 6.5297e-06 gnorm: 1.20 [21:39:25< 2:51:58] +[titan] 2025-10-05 20:13:43,900 - root - INFO - Dumping profiler traces at step 35328 +[titan] 2025-10-05 20:13:43,937 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:13:48,348 - root - INFO - step: 35330 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,406 tflops: 407.96 mfu: 41.25% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:13:48,348 - root - INFO - lr: 6.5265e-06 gnorm: 1.23 [21:39:36< 2:51:47] +[titan] 2025-10-05 20:13:59,245 - root - INFO - step: 35335 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7238 +[titan] 2025-10-05 20:13:59,246 - root - INFO - lr: 6.5233e-06 gnorm: 1.23 [21:39:47< 2:51:36] +[titan] 2025-10-05 20:14:10,148 - root - INFO - step: 35340 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:14:10,149 - root - INFO - lr: 6.5201e-06 gnorm: 1.24 [21:39:58< 2:51:25] +[titan] 2025-10-05 20:14:21,047 - root - INFO - step: 35345 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7363 +[titan] 2025-10-05 20:14:21,047 - root - INFO - lr: 6.5168e-06 gnorm: 1.20 [21:40:09< 2:51:14] +[titan] 2025-10-05 20:14:29,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:14:31,931 - root - INFO - step: 35350 loss: 1.9071 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:14:31,931 - root - INFO - lr: 6.5136e-06 gnorm: 1.22 [21:40:20< 2:51:02] +[titan] 2025-10-05 20:14:42,833 - root - INFO - step: 35355 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6814 +[titan] 2025-10-05 20:14:42,833 - root - INFO - lr: 6.5104e-06 gnorm: 1.18 [21:40:31< 2:50:51] +[titan] 2025-10-05 20:14:53,713 - root - INFO - step: 35360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 20:14:53,714 - root - INFO - lr: 6.5072e-06 gnorm: 1.22 [21:40:42< 2:50:40] +[titan] 2025-10-05 20:15:04,622 - root - INFO - step: 35365 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:15:04,622 - root - INFO - lr: 6.5040e-06 gnorm: 1.20 [21:40:53< 2:50:29] +[titan] 2025-10-05 20:15:15,532 - root - INFO - step: 35370 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:15:15,532 - root - INFO - lr: 6.5008e-06 gnorm: 1.21 [21:41:04< 2:50:18] +[titan] 2025-10-05 20:15:26,422 - root - INFO - step: 35375 loss: 1.9139 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 20:15:26,422 - root - INFO - lr: 6.4976e-06 gnorm: 1.20 [21:41:14< 2:50:07] +[titan] 2025-10-05 20:15:37,640 - root - INFO - step: 35380 loss: 1.9110 memory: 118.84GiB(85.28%) tps: 29,212 tflops: 405.27 mfu: 40.98% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:15:37,640 - root - INFO - lr: 6.4944e-06 gnorm: 1.19 [21:41:26< 2:49:56] +[titan] 2025-10-05 20:15:48,502 - root - INFO - step: 35385 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6841 +[titan] 2025-10-05 20:15:48,502 - root - INFO - lr: 6.4912e-06 gnorm: 1.22 [21:41:37< 2:49:45] +[titan] 2025-10-05 20:15:59,387 - root - INFO - step: 35390 loss: 1.9078 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 20:15:59,387 - root - INFO - lr: 6.4880e-06 gnorm: 1.27 [21:41:47< 2:49:34] +[titan] 2025-10-05 20:16:10,282 - root - INFO - step: 35395 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7479 +[titan] 2025-10-05 20:16:10,282 - root - INFO - lr: 6.4848e-06 gnorm: 1.21 [21:41:58< 2:49:23] +[titan] 2025-10-05 20:16:18,973 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:16:21,149 - root - INFO - step: 35400 loss: 1.8914 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6724 +[titan] 2025-10-05 20:16:21,149 - root - INFO - lr: 6.4816e-06 gnorm: 1.20 [21:42:09< 2:49:12] +[titan] 2025-10-05 20:16:32,037 - root - INFO - step: 35405 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7194 +[titan] 2025-10-05 20:16:32,038 - root - INFO - lr: 6.4784e-06 gnorm: 1.23 [21:42:20< 2:49:01] +[titan] 2025-10-05 20:16:42,971 - root - INFO - step: 35410 loss: 1.9290 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:16:42,971 - root - INFO - lr: 6.4752e-06 gnorm: 1.20 [21:42:31< 2:48:50] +[titan] 2025-10-05 20:16:53,840 - root - INFO - step: 35415 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:16:53,840 - root - INFO - lr: 6.4721e-06 gnorm: 1.17 [21:42:42< 2:48:39] +[titan] 2025-10-05 20:17:04,705 - root - INFO - step: 35420 loss: 1.9333 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:17:04,705 - root - INFO - lr: 6.4689e-06 gnorm: 1.23 [21:42:53< 2:48:28] +[titan] 2025-10-05 20:17:15,612 - root - INFO - step: 35425 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 20:17:15,612 - root - INFO - lr: 6.4657e-06 gnorm: 1.20 [21:43:04< 2:48:17] +[titan] 2025-10-05 20:17:26,485 - root - INFO - step: 35430 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:17:26,485 - root - INFO - lr: 6.4625e-06 gnorm: 1.18 [21:43:15< 2:48:06] +[titan] 2025-10-05 20:17:37,354 - root - INFO - step: 35435 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:17:37,354 - root - INFO - lr: 6.4594e-06 gnorm: 1.23 [21:43:25< 2:47:55] +[titan] 2025-10-05 20:17:48,271 - root - INFO - step: 35440 loss: 1.9162 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:17:48,271 - root - INFO - lr: 6.4562e-06 gnorm: 1.18 [21:43:36< 2:47:44] +[titan] 2025-10-05 20:17:59,161 - root - INFO - step: 35445 loss: 1.9393 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:17:59,161 - root - INFO - lr: 6.4531e-06 gnorm: 1.21 [21:43:47< 2:47:32] +[titan] 2025-10-05 20:18:07,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:18:10,014 - root - INFO - step: 35450 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:18:10,014 - root - INFO - lr: 6.4499e-06 gnorm: 1.20 [21:43:58< 2:47:21] +[titan] 2025-10-05 20:18:20,865 - root - INFO - step: 35455 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 20:18:20,865 - root - INFO - lr: 6.4468e-06 gnorm: 1.25 [21:44:09< 2:47:10] +[titan] 2025-10-05 20:18:31,752 - root - INFO - step: 35460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.6977 +[titan] 2025-10-05 20:18:31,752 - root - INFO - lr: 6.4436e-06 gnorm: 1.27 [21:44:20< 2:46:59] +[titan] 2025-10-05 20:18:42,672 - root - INFO - step: 35465 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:18:42,672 - root - INFO - lr: 6.4405e-06 gnorm: 1.21 [21:44:31< 2:46:48] +[titan] 2025-10-05 20:18:53,523 - root - INFO - step: 35470 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 20:18:53,523 - root - INFO - lr: 6.4373e-06 gnorm: 1.22 [21:44:42< 2:46:37] +[titan] 2025-10-05 20:19:04,397 - root - INFO - step: 35475 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 20:19:04,397 - root - INFO - lr: 6.4342e-06 gnorm: 1.20 [21:44:52< 2:46:26] +[titan] 2025-10-05 20:19:15,272 - root - INFO - step: 35480 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:19:15,272 - root - INFO - lr: 6.4311e-06 gnorm: 1.15 [21:45:03< 2:46:15] +[titan] 2025-10-05 20:19:26,134 - root - INFO - step: 35485 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6964 +[titan] 2025-10-05 20:19:26,134 - root - INFO - lr: 6.4279e-06 gnorm: 1.22 [21:45:14< 2:46:04] +[titan] 2025-10-05 20:19:37,003 - root - INFO - step: 35490 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:19:37,003 - root - INFO - lr: 6.4248e-06 gnorm: 1.22 [21:45:25< 2:45:53] +[titan] 2025-10-05 20:19:48,060 - root - INFO - step: 35495 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.18 mfu: 41.57% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:19:48,060 - root - INFO - lr: 6.4217e-06 gnorm: 1.17 [21:45:36< 2:45:42] +[titan] 2025-10-05 20:19:56,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:19:58,930 - root - INFO - step: 35500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 20:19:58,930 - root - INFO - lr: 6.4186e-06 gnorm: 1.23 [21:45:47< 2:45:31] +[titan] 2025-10-05 20:20:09,779 - root - INFO - step: 35505 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:20:09,779 - root - INFO - lr: 6.4154e-06 gnorm: 1.19 [21:45:58< 2:45:20] +[titan] 2025-10-05 20:20:20,670 - root - INFO - step: 35510 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:20:20,671 - root - INFO - lr: 6.4123e-06 gnorm: 1.22 [21:46:09< 2:45:09] +[titan] 2025-10-05 20:20:31,543 - root - INFO - step: 35515 loss: 1.8943 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 20:20:31,543 - root - INFO - lr: 6.4092e-06 gnorm: 1.24 [21:46:20< 2:44:58] +[titan] 2025-10-05 20:20:42,433 - root - INFO - step: 35520 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:20:42,433 - root - INFO - lr: 6.4061e-06 gnorm: 1.20 [21:46:30< 2:44:47] +[titan] 2025-10-05 20:20:53,334 - root - INFO - step: 35525 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7142 +[titan] 2025-10-05 20:20:53,334 - root - INFO - lr: 6.4030e-06 gnorm: 1.24 [21:46:41< 2:44:36] +[titan] 2025-10-05 20:21:04,211 - root - INFO - step: 35530 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7544 +[titan] 2025-10-05 20:21:04,211 - root - INFO - lr: 6.3999e-06 gnorm: 1.20 [21:46:52< 2:44:25] +[titan] 2025-10-05 20:21:15,077 - root - INFO - step: 35535 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 20:21:15,077 - root - INFO - lr: 6.3968e-06 gnorm: 1.22 [21:47:03< 2:44:13] +[titan] 2025-10-05 20:21:25,947 - root - INFO - step: 35540 loss: 2.0043 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 20:21:25,947 - root - INFO - lr: 6.3937e-06 gnorm: 1.23 [21:47:14< 2:44:02] +[titan] 2025-10-05 20:21:36,813 - root - INFO - step: 35545 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:21:36,813 - root - INFO - lr: 6.3906e-06 gnorm: 1.26 [21:47:25< 2:43:51] +[titan] 2025-10-05 20:21:45,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:21:47,733 - root - INFO - step: 35550 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 20:21:47,733 - root - INFO - lr: 6.3875e-06 gnorm: 1.29 [21:47:36< 2:43:40] +[titan] 2025-10-05 20:21:58,624 - root - INFO - step: 35555 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6918 +[titan] 2025-10-05 20:21:58,624 - root - INFO - lr: 6.3845e-06 gnorm: 1.20 [21:47:47< 2:43:29] +[titan] 2025-10-05 20:22:09,503 - root - INFO - step: 35560 loss: 1.8840 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 20:22:09,503 - root - INFO - lr: 6.3814e-06 gnorm: 1.18 [21:47:58< 2:43:18] +[titan] 2025-10-05 20:22:20,399 - root - INFO - step: 35565 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7435 +[titan] 2025-10-05 20:22:20,399 - root - INFO - lr: 6.3783e-06 gnorm: 1.22 [21:48:08< 2:43:07] +[titan] 2025-10-05 20:22:31,288 - root - INFO - step: 35570 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 20:22:31,288 - root - INFO - lr: 6.3752e-06 gnorm: 1.21 [21:48:19< 2:42:56] +[titan] 2025-10-05 20:22:42,161 - root - INFO - step: 35575 loss: 1.9928 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 20:22:42,162 - root - INFO - lr: 6.3722e-06 gnorm: 1.31 [21:48:30< 2:42:45] +[titan] 2025-10-05 20:22:53,096 - root - INFO - step: 35580 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 20:22:53,096 - root - INFO - lr: 6.3691e-06 gnorm: 1.25 [21:48:41< 2:42:34] +[titan] 2025-10-05 20:23:04,005 - root - INFO - step: 35585 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 20:23:04,005 - root - INFO - lr: 6.3660e-06 gnorm: 1.22 [21:48:52< 2:42:23] +[titan] 2025-10-05 20:23:14,874 - root - INFO - step: 35590 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6686 +[titan] 2025-10-05 20:23:14,874 - root - INFO - lr: 6.3630e-06 gnorm: 1.15 [21:49:03< 2:42:12] +[titan] 2025-10-05 20:23:25,760 - root - INFO - step: 35595 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 20:23:25,760 - root - INFO - lr: 6.3599e-06 gnorm: 1.26 [21:49:14< 2:42:01] +[titan] 2025-10-05 20:23:34,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:23:36,637 - root - INFO - step: 35600 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:23:36,637 - root - INFO - lr: 6.3568e-06 gnorm: 1.19 [21:49:25< 2:41:50] +[titan] 2025-10-05 20:23:47,564 - root - INFO - step: 35605 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 20:23:47,564 - root - INFO - lr: 6.3538e-06 gnorm: 1.18 [21:49:36< 2:41:39] +[titan] 2025-10-05 20:23:58,425 - root - INFO - step: 35610 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 20:23:58,425 - root - INFO - lr: 6.3508e-06 gnorm: 1.20 [21:49:46< 2:41:28] +[titan] 2025-10-05 20:24:09,278 - root - INFO - step: 35615 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 20:24:09,279 - root - INFO - lr: 6.3477e-06 gnorm: 1.21 [21:49:57< 2:41:17] +[titan] 2025-10-05 20:24:20,176 - root - INFO - step: 35620 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 20:24:20,176 - root - INFO - lr: 6.3447e-06 gnorm: 1.21 [21:50:08< 2:41:06] +[titan] 2025-10-05 20:24:31,048 - root - INFO - step: 35625 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7146 +[titan] 2025-10-05 20:24:31,049 - root - INFO - lr: 6.3416e-06 gnorm: 1.18 [21:50:19< 2:40:55] +[titan] 2025-10-05 20:24:41,914 - root - INFO - step: 35630 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 20:24:41,914 - root - INFO - lr: 6.3386e-06 gnorm: 1.22 [21:50:30< 2:40:43] +[titan] 2025-10-05 20:24:52,829 - root - INFO - step: 35635 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:24:52,829 - root - INFO - lr: 6.3356e-06 gnorm: 1.18 [21:50:41< 2:40:32] +[titan] 2025-10-05 20:25:03,707 - root - INFO - step: 35640 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:25:03,707 - root - INFO - lr: 6.3325e-06 gnorm: 1.23 [21:50:52< 2:40:21] +[titan] 2025-10-05 20:25:14,555 - root - INFO - step: 35645 loss: 1.8684 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 20:25:14,556 - root - INFO - lr: 6.3295e-06 gnorm: 1.19 [21:51:03< 2:40:10] +[titan] 2025-10-05 20:25:23,268 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:25:25,451 - root - INFO - step: 35650 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7187 +[titan] 2025-10-05 20:25:25,451 - root - INFO - lr: 6.3265e-06 gnorm: 1.20 [21:51:13< 2:39:59] +[titan] 2025-10-05 20:25:36,291 - root - INFO - step: 35655 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 20:25:36,291 - root - INFO - lr: 6.3235e-06 gnorm: 1.22 [21:51:24< 2:39:48] +[titan] 2025-10-05 20:25:47,198 - root - INFO - step: 35660 loss: 1.9669 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 20:25:47,199 - root - INFO - lr: 6.3205e-06 gnorm: 1.19 [21:51:35< 2:39:37] +[titan] 2025-10-05 20:25:58,057 - root - INFO - step: 35665 loss: 1.9343 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 20:25:58,057 - root - INFO - lr: 6.3174e-06 gnorm: 1.22 [21:51:46< 2:39:26] +[titan] 2025-10-05 20:26:08,933 - root - INFO - step: 35670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:26:08,934 - root - INFO - lr: 6.3144e-06 gnorm: 1.20 [21:51:57< 2:39:15] +[titan] 2025-10-05 20:26:19,799 - root - INFO - step: 35675 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 20:26:19,799 - root - INFO - lr: 6.3114e-06 gnorm: 1.25 [21:52:08< 2:39:04] +[titan] 2025-10-05 20:26:30,670 - root - INFO - step: 35680 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 20:26:30,670 - root - INFO - lr: 6.3084e-06 gnorm: 1.22 [21:52:19< 2:38:53] +[titan] 2025-10-05 20:26:41,581 - root - INFO - step: 35685 loss: 2.0069 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 20:26:41,581 - root - INFO - lr: 6.3054e-06 gnorm: 1.29 [21:52:30< 2:38:42] +[titan] 2025-10-05 20:26:52,517 - root - INFO - step: 35690 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:26:52,517 - root - INFO - lr: 6.3024e-06 gnorm: 1.23 [21:52:41< 2:38:31] +[titan] 2025-10-05 20:27:03,395 - root - INFO - step: 35695 loss: 1.9599 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7330 +[titan] 2025-10-05 20:27:03,396 - root - INFO - lr: 6.2995e-06 gnorm: 1.23 [21:52:51< 2:38:20] +[titan] 2025-10-05 20:27:12,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:27:14,268 - root - INFO - step: 35700 loss: 1.9472 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 20:27:14,268 - root - INFO - lr: 6.2965e-06 gnorm: 1.23 [21:53:02< 2:38:09] +[titan] 2025-10-05 20:27:25,130 - root - INFO - step: 35705 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:27:25,130 - root - INFO - lr: 6.2935e-06 gnorm: 1.19 [21:53:13< 2:37:58] +[titan] 2025-10-05 20:27:35,993 - root - INFO - step: 35710 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 20:27:35,993 - root - INFO - lr: 6.2905e-06 gnorm: 1.28 [21:53:24< 2:37:47] +[titan] 2025-10-05 20:27:46,902 - root - INFO - step: 35715 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.7053 +[titan] 2025-10-05 20:27:46,902 - root - INFO - lr: 6.2875e-06 gnorm: 1.23 [21:53:35< 2:37:36] +[titan] 2025-10-05 20:27:57,813 - root - INFO - step: 35720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 20:27:57,813 - root - INFO - lr: 6.2846e-06 gnorm: 1.21 [21:53:46< 2:37:25] +[titan] 2025-10-05 20:28:08,685 - root - INFO - step: 35725 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 20:28:08,685 - root - INFO - lr: 6.2816e-06 gnorm: 1.21 [21:53:57< 2:37:13] +[titan] 2025-10-05 20:28:19,553 - root - INFO - step: 35730 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 20:28:19,553 - root - INFO - lr: 6.2786e-06 gnorm: 1.19 [21:54:08< 2:37:02] +[titan] 2025-10-05 20:28:30,452 - root - INFO - step: 35735 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 20:28:30,452 - root - INFO - lr: 6.2756e-06 gnorm: 1.25 [21:54:18< 2:36:51] +[titan] 2025-10-05 20:28:41,341 - root - INFO - step: 35740 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 20:28:41,341 - root - INFO - lr: 6.2727e-06 gnorm: 1.26 [21:54:29< 2:36:40] +[titan] 2025-10-05 20:28:52,320 - root - INFO - step: 35745 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.09 mfu: 41.87% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6496 +[titan] 2025-10-05 20:28:52,320 - root - INFO - lr: 6.2697e-06 gnorm: 1.19 [21:54:40< 2:36:29] +[titan] 2025-10-05 20:29:01,029 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:29:03,215 - root - INFO - step: 35750 loss: 1.8998 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 20:29:03,215 - root - INFO - lr: 6.2668e-06 gnorm: 1.23 [21:54:51< 2:36:18] +[titan] 2025-10-05 20:29:14,102 - root - INFO - step: 35755 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 20:29:14,102 - root - INFO - lr: 6.2638e-06 gnorm: 1.25 [21:55:02< 2:36:07] +[titan] 2025-10-05 20:29:24,977 - root - INFO - step: 35760 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 20:29:24,977 - root - INFO - lr: 6.2609e-06 gnorm: 1.19 [21:55:13< 2:35:56] +[titan] 2025-10-05 20:29:35,865 - root - INFO - step: 35765 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 20:29:35,865 - root - INFO - lr: 6.2579e-06 gnorm: 1.20 [21:55:24< 2:35:45] +[titan] 2025-10-05 20:29:46,743 - root - INFO - step: 35770 loss: 1.9516 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 20:29:46,743 - root - INFO - lr: 6.2550e-06 gnorm: 1.22 [21:55:35< 2:35:34] +[titan] 2025-10-05 20:29:57,662 - root - INFO - step: 35775 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 20:29:57,662 - root - INFO - lr: 6.2521e-06 gnorm: 1.24 [21:55:46< 2:35:23] +[titan] 2025-10-05 20:30:08,549 - root - INFO - step: 35780 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 20:30:08,549 - root - INFO - lr: 6.2491e-06 gnorm: 1.24 [21:55:57< 2:35:12] +[titan] 2025-10-05 20:30:19,428 - root - INFO - step: 35785 loss: 2.0119 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 20:30:19,428 - root - INFO - lr: 6.2462e-06 gnorm: 1.22 [21:56:07< 2:35:01] +[titan] 2025-10-05 20:30:30,298 - root - INFO - step: 35790 loss: 1.8995 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 20:30:30,298 - root - INFO - lr: 6.2433e-06 gnorm: 1.20 [21:56:18< 2:34:50] +[titan] 2025-10-05 20:30:41,166 - root - INFO - step: 35795 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 20:30:41,166 - root - INFO - lr: 6.2403e-06 gnorm: 1.22 [21:56:29< 2:34:39] +[titan] 2025-10-05 20:30:49,931 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:30:52,145 - root - INFO - step: 35800 loss: 1.8719 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.08 mfu: 41.87% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6562 +[titan] 2025-10-05 20:30:52,146 - root - INFO - lr: 6.2374e-06 gnorm: 1.18 [21:56:40< 2:34:28] +[titan] 2025-10-05 20:31:03,002 - root - INFO - step: 35805 loss: 1.8418 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6290 +[titan] 2025-10-05 20:31:03,002 - root - INFO - lr: 6.2345e-06 gnorm: 1.17 [21:56:51< 2:34:17] +[titan] 2025-10-05 20:31:13,902 - root - INFO - step: 35810 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 20:31:13,902 - root - INFO - lr: 6.2316e-06 gnorm: 1.20 [21:57:02< 2:34:06] +[titan] 2025-10-05 20:31:24,762 - root - INFO - step: 35815 loss: 1.8766 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:31:24,763 - root - INFO - lr: 6.2287e-06 gnorm: 1.21 [21:57:13< 2:33:55] +[titan] 2025-10-05 20:31:35,622 - root - INFO - step: 35820 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 20:31:35,623 - root - INFO - lr: 6.2258e-06 gnorm: 1.20 [21:57:24< 2:33:44] +[titan] 2025-10-05 20:31:46,463 - root - INFO - step: 35825 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 20:31:46,464 - root - INFO - lr: 6.2229e-06 gnorm: 1.23 [21:57:34< 2:33:32] +[titan] 2025-10-05 20:31:57,412 - root - INFO - step: 35830 loss: 1.8980 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 20:31:57,412 - root - INFO - lr: 6.2200e-06 gnorm: 1.20 [21:57:45< 2:33:21] +[titan] 2025-10-05 20:32:08,273 - root - INFO - step: 35835 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6616 +[titan] 2025-10-05 20:32:08,273 - root - INFO - lr: 6.2171e-06 gnorm: 1.17 [21:57:56< 2:33:10] +[titan] 2025-10-05 20:32:19,226 - root - INFO - step: 35840 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 20:32:19,227 - root - INFO - lr: 6.2142e-06 gnorm: 1.24 [21:58:07< 2:32:59] +[titan] 2025-10-05 20:32:19,429 - root - INFO - Dumping profiler traces at step 35840 +[titan] 2025-10-05 20:32:19,474 - root - INFO - Finished dumping profiler traces in 0.05 seconds +[titan] 2025-10-05 20:32:30,380 - root - INFO - step: 35845 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,379 tflops: 407.58 mfu: 41.21% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 20:32:30,381 - root - INFO - lr: 6.2113e-06 gnorm: 1.21 [21:58:18< 2:32:48] +[titan] 2025-10-05 20:32:39,073 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:32:41,258 - root - INFO - step: 35850 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:32:41,258 - root - INFO - lr: 6.2084e-06 gnorm: 1.18 [21:58:29< 2:32:37] +[titan] 2025-10-05 20:32:52,159 - root - INFO - step: 35855 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 20:32:52,159 - root - INFO - lr: 6.2055e-06 gnorm: 1.21 [21:58:40< 2:32:26] +[titan] 2025-10-05 20:33:03,040 - root - INFO - step: 35860 loss: 1.9254 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:33:03,041 - root - INFO - lr: 6.2026e-06 gnorm: 1.22 [21:58:51< 2:32:15] +[titan] 2025-10-05 20:33:13,902 - root - INFO - step: 35865 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7075 +[titan] 2025-10-05 20:33:13,902 - root - INFO - lr: 6.1998e-06 gnorm: 1.24 [21:59:02< 2:32:04] +[titan] 2025-10-05 20:33:24,764 - root - INFO - step: 35870 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 20:33:24,764 - root - INFO - lr: 6.1969e-06 gnorm: 1.24 [21:59:13< 2:31:53] +[titan] 2025-10-05 20:33:35,665 - root - INFO - step: 35875 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:33:35,666 - root - INFO - lr: 6.1940e-06 gnorm: 1.20 [21:59:24< 2:31:42] +[titan] 2025-10-05 20:33:46,542 - root - INFO - step: 35880 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:33:46,542 - root - INFO - lr: 6.1911e-06 gnorm: 1.22 [21:59:35< 2:31:31] +[titan] 2025-10-05 20:33:57,456 - root - INFO - step: 35885 loss: 1.9215 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 20:33:57,456 - root - INFO - lr: 6.1883e-06 gnorm: 1.21 [21:59:45< 2:31:20] +[titan] 2025-10-05 20:34:08,320 - root - INFO - step: 35890 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 20:34:08,321 - root - INFO - lr: 6.1854e-06 gnorm: 1.23 [21:59:56< 2:31:09] +[titan] 2025-10-05 20:34:19,183 - root - INFO - step: 35895 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 20:34:19,183 - root - INFO - lr: 6.1826e-06 gnorm: 1.20 [22:00:07< 2:30:58] +[titan] 2025-10-05 20:34:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:34:30,044 - root - INFO - step: 35900 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 20:34:30,044 - root - INFO - lr: 6.1797e-06 gnorm: 1.24 [22:00:18< 2:30:47] +[titan] 2025-10-05 20:34:40,962 - root - INFO - step: 35905 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 20:34:40,962 - root - INFO - lr: 6.1769e-06 gnorm: 1.21 [22:00:29< 2:30:36] +[titan] 2025-10-05 20:34:51,814 - root - INFO - step: 35910 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 20:34:51,814 - root - INFO - lr: 6.1740e-06 gnorm: 1.20 [22:00:40< 2:30:25] +[titan] 2025-10-05 20:35:02,707 - root - INFO - step: 35915 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7636 +[titan] 2025-10-05 20:35:02,708 - root - INFO - lr: 6.1712e-06 gnorm: 1.25 [22:00:51< 2:30:14] +[titan] 2025-10-05 20:35:13,561 - root - INFO - step: 35920 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 20:35:13,561 - root - INFO - lr: 6.1683e-06 gnorm: 1.21 [22:01:02< 2:30:03] +[titan] 2025-10-05 20:35:24,429 - root - INFO - step: 35925 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 20:35:24,429 - root - INFO - lr: 6.1655e-06 gnorm: 1.21 [22:01:12< 2:29:51] +[titan] 2025-10-05 20:35:35,298 - root - INFO - step: 35930 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7410 +[titan] 2025-10-05 20:35:35,298 - root - INFO - lr: 6.1627e-06 gnorm: 1.20 [22:01:23< 2:29:40] +[titan] 2025-10-05 20:35:46,161 - root - INFO - step: 35935 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6568 +[titan] 2025-10-05 20:35:46,161 - root - INFO - lr: 6.1598e-06 gnorm: 1.22 [22:01:34< 2:29:29] +[titan] 2025-10-05 20:35:57,095 - root - INFO - step: 35940 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7140 +[titan] 2025-10-05 20:35:57,096 - root - INFO - lr: 6.1570e-06 gnorm: 1.21 [22:01:45< 2:29:18] +[titan] 2025-10-05 20:36:07,977 - root - INFO - step: 35945 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 20:36:07,977 - root - INFO - lr: 6.1542e-06 gnorm: 1.23 [22:01:56< 2:29:07] +[titan] 2025-10-05 20:36:16,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:36:18,851 - root - INFO - step: 35950 loss: 1.8140 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6041 +[titan] 2025-10-05 20:36:18,851 - root - INFO - lr: 6.1514e-06 gnorm: 1.21 [22:02:07< 2:28:56] +[titan] 2025-10-05 20:36:29,728 - root - INFO - step: 35955 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:36:29,728 - root - INFO - lr: 6.1485e-06 gnorm: 1.18 [22:02:18< 2:28:45] +[titan] 2025-10-05 20:36:40,603 - root - INFO - step: 35960 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 20:36:40,603 - root - INFO - lr: 6.1457e-06 gnorm: 1.20 [22:02:29< 2:28:34] +[titan] 2025-10-05 20:36:51,466 - root - INFO - step: 35965 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7135 +[titan] 2025-10-05 20:36:51,466 - root - INFO - lr: 6.1429e-06 gnorm: 1.21 [22:02:39< 2:28:23] +[titan] 2025-10-05 20:37:02,418 - root - INFO - step: 35970 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 20:37:02,418 - root - INFO - lr: 6.1401e-06 gnorm: 1.17 [22:02:50< 2:28:12] +[titan] 2025-10-05 20:37:13,277 - root - INFO - step: 35975 loss: 1.9766 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:37:13,277 - root - INFO - lr: 6.1373e-06 gnorm: 1.23 [22:03:01< 2:28:01] +[titan] 2025-10-05 20:37:24,151 - root - INFO - step: 35980 loss: 1.9461 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:37:24,151 - root - INFO - lr: 6.1345e-06 gnorm: 1.17 [22:03:12< 2:27:50] +[titan] 2025-10-05 20:37:34,997 - root - INFO - step: 35985 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6927 +[titan] 2025-10-05 20:37:34,997 - root - INFO - lr: 6.1317e-06 gnorm: 1.19 [22:03:23< 2:27:39] +[titan] 2025-10-05 20:37:45,857 - root - INFO - step: 35990 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:37:45,857 - root - INFO - lr: 6.1289e-06 gnorm: 1.21 [22:03:34< 2:27:28] +[titan] 2025-10-05 20:37:56,761 - root - INFO - step: 35995 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 20:37:56,761 - root - INFO - lr: 6.1261e-06 gnorm: 1.22 [22:03:45< 2:27:17] +[titan] 2025-10-05 20:38:05,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:38:07,602 - root - INFO - step: 36000 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 20:38:07,602 - root - INFO - lr: 6.1233e-06 gnorm: 1.24 [22:03:56< 2:27:06] +[titan] 2025-10-05 20:38:18,465 - root - INFO - step: 36005 loss: 1.8959 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 20:38:18,465 - root - INFO - lr: 6.1206e-06 gnorm: 1.22 [22:04:06< 2:26:55] +[titan] 2025-10-05 20:38:29,352 - root - INFO - step: 36010 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:38:29,352 - root - INFO - lr: 6.1178e-06 gnorm: 1.19 [22:04:17< 2:26:44] +[titan] 2025-10-05 20:38:40,197 - root - INFO - step: 36015 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 20:38:40,197 - root - INFO - lr: 6.1150e-06 gnorm: 1.22 [22:04:28< 2:26:33] +[titan] 2025-10-05 20:38:51,058 - root - INFO - step: 36020 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 20:38:51,058 - root - INFO - lr: 6.1122e-06 gnorm: 1.21 [22:04:39< 2:26:22] +[titan] 2025-10-05 20:39:01,952 - root - INFO - step: 36025 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6606 +[titan] 2025-10-05 20:39:01,952 - root - INFO - lr: 6.1095e-06 gnorm: 1.20 [22:04:50< 2:26:10] +[titan] 2025-10-05 20:39:12,835 - root - INFO - step: 36030 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 20:39:12,835 - root - INFO - lr: 6.1067e-06 gnorm: 1.25 [22:05:01< 2:25:59] +[titan] 2025-10-05 20:39:23,710 - root - INFO - step: 36035 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 20:39:23,711 - root - INFO - lr: 6.1039e-06 gnorm: 1.23 [22:05:12< 2:25:48] +[titan] 2025-10-05 20:39:34,575 - root - INFO - step: 36040 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 20:39:34,575 - root - INFO - lr: 6.1012e-06 gnorm: 1.23 [22:05:23< 2:25:37] +[titan] 2025-10-05 20:39:45,433 - root - INFO - step: 36045 loss: 1.8945 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6757 +[titan] 2025-10-05 20:39:45,434 - root - INFO - lr: 6.0984e-06 gnorm: 1.20 [22:05:33< 2:25:26] +[titan] 2025-10-05 20:39:54,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:39:56,290 - root - INFO - step: 36050 loss: 1.9349 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7112 +[titan] 2025-10-05 20:39:56,290 - root - INFO - lr: 6.0957e-06 gnorm: 1.20 [22:05:44< 2:25:15] +[titan] 2025-10-05 20:40:07,175 - root - INFO - step: 36055 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 20:40:07,175 - root - INFO - lr: 6.0929e-06 gnorm: 1.26 [22:05:55< 2:25:04] +[titan] 2025-10-05 20:40:18,044 - root - INFO - step: 36060 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:40:18,044 - root - INFO - lr: 6.0902e-06 gnorm: 1.22 [22:06:06< 2:24:53] +[titan] 2025-10-05 20:40:28,916 - root - INFO - step: 36065 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7253 +[titan] 2025-10-05 20:40:28,916 - root - INFO - lr: 6.0874e-06 gnorm: 1.21 [22:06:17< 2:24:42] +[titan] 2025-10-05 20:40:39,778 - root - INFO - step: 36070 loss: 1.8531 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6374 +[titan] 2025-10-05 20:40:39,778 - root - INFO - lr: 6.0847e-06 gnorm: 1.21 [22:06:28< 2:24:31] +[titan] 2025-10-05 20:40:50,621 - root - INFO - step: 36075 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 20:40:50,621 - root - INFO - lr: 6.0820e-06 gnorm: 1.21 [22:06:39< 2:24:20] +[titan] 2025-10-05 20:41:01,488 - root - INFO - step: 36080 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 20:41:01,489 - root - INFO - lr: 6.0792e-06 gnorm: 1.24 [22:06:49< 2:24:09] +[titan] 2025-10-05 20:41:12,335 - root - INFO - step: 36085 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 20:41:12,335 - root - INFO - lr: 6.0765e-06 gnorm: 1.21 [22:07:00< 2:23:58] +[titan] 2025-10-05 20:41:23,167 - root - INFO - step: 36090 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 20:41:23,168 - root - INFO - lr: 6.0738e-06 gnorm: 1.24 [22:07:11< 2:23:47] +[titan] 2025-10-05 20:41:34,043 - root - INFO - step: 36095 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:41:34,044 - root - INFO - lr: 6.0710e-06 gnorm: 1.26 [22:07:22< 2:23:36] +[titan] 2025-10-05 20:41:42,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:41:44,911 - root - INFO - step: 36100 loss: 1.9238 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7003 +[titan] 2025-10-05 20:41:44,911 - root - INFO - lr: 6.0683e-06 gnorm: 1.23 [22:07:33< 2:23:25] +[titan] 2025-10-05 20:41:55,794 - root - INFO - step: 36105 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 20:41:55,794 - root - INFO - lr: 6.0656e-06 gnorm: 1.18 [22:07:44< 2:23:14] +[titan] 2025-10-05 20:42:06,656 - root - INFO - step: 36110 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 20:42:06,657 - root - INFO - lr: 6.0629e-06 gnorm: 1.22 [22:07:55< 2:23:03] +[titan] 2025-10-05 20:42:17,515 - root - INFO - step: 36115 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 20:42:17,515 - root - INFO - lr: 6.0602e-06 gnorm: 1.22 [22:08:05< 2:22:52] +[titan] 2025-10-05 20:42:28,350 - root - INFO - step: 36120 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 20:42:28,350 - root - INFO - lr: 6.0575e-06 gnorm: 1.23 [22:08:16< 2:22:41] +[titan] 2025-10-05 20:42:39,197 - root - INFO - step: 36125 loss: 1.8516 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 20:42:39,197 - root - INFO - lr: 6.0548e-06 gnorm: 1.24 [22:08:27< 2:22:29] +[titan] 2025-10-05 20:42:50,082 - root - INFO - step: 36130 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 20:42:50,083 - root - INFO - lr: 6.0521e-06 gnorm: 1.22 [22:08:38< 2:22:18] +[titan] 2025-10-05 20:43:00,986 - root - INFO - step: 36135 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:43:00,987 - root - INFO - lr: 6.0494e-06 gnorm: 1.22 [22:08:49< 2:22:07] +[titan] 2025-10-05 20:43:11,851 - root - INFO - step: 36140 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 20:43:11,851 - root - INFO - lr: 6.0467e-06 gnorm: 1.89 [22:09:00< 2:21:56] +[titan] 2025-10-05 20:43:22,694 - root - INFO - step: 36145 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 20:43:22,694 - root - INFO - lr: 6.0440e-06 gnorm: 1.18 [22:09:11< 2:21:45] +[titan] 2025-10-05 20:43:31,365 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:43:33,546 - root - INFO - step: 36150 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:43:33,547 - root - INFO - lr: 6.0413e-06 gnorm: 1.25 [22:09:22< 2:21:34] +[titan] 2025-10-05 20:43:44,389 - root - INFO - step: 36155 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 20:43:44,389 - root - INFO - lr: 6.0386e-06 gnorm: 1.20 [22:09:32< 2:21:23] +[titan] 2025-10-05 20:43:55,248 - root - INFO - step: 36160 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 20:43:55,248 - root - INFO - lr: 6.0360e-06 gnorm: 1.26 [22:09:43< 2:21:12] +[titan] 2025-10-05 20:44:06,157 - root - INFO - step: 36165 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:06,157 - root - INFO - lr: 6.0333e-06 gnorm: 1.22 [22:09:54< 2:21:01] +[titan] 2025-10-05 20:44:17,014 - root - INFO - step: 36170 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 20:44:17,014 - root - INFO - lr: 6.0306e-06 gnorm: 1.24 [22:10:05< 2:20:50] +[titan] 2025-10-05 20:44:27,854 - root - INFO - step: 36175 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:27,855 - root - INFO - lr: 6.0279e-06 gnorm: 1.23 [22:10:16< 2:20:39] +[titan] 2025-10-05 20:44:38,720 - root - INFO - step: 36180 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:44:38,720 - root - INFO - lr: 6.0253e-06 gnorm: 1.23 [22:10:27< 2:20:28] +[titan] 2025-10-05 20:44:49,571 - root - INFO - step: 36185 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:44:49,571 - root - INFO - lr: 6.0226e-06 gnorm: 1.24 [22:10:38< 2:20:17] +[titan] 2025-10-05 20:45:00,410 - root - INFO - step: 36190 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 20:45:00,411 - root - INFO - lr: 6.0200e-06 gnorm: 1.29 [22:10:48< 2:20:06] +[titan] 2025-10-05 20:45:11,321 - root - INFO - step: 36195 loss: 1.8986 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6794 +[titan] 2025-10-05 20:45:11,321 - root - INFO - lr: 6.0173e-06 gnorm: 1.22 [22:10:59< 2:19:55] +[titan] 2025-10-05 20:45:19,981 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:45:22,165 - root - INFO - step: 36200 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 20:45:22,166 - root - INFO - lr: 6.0146e-06 gnorm: 1.25 [22:11:10< 2:19:44] +[titan] 2025-10-05 20:45:33,012 - root - INFO - step: 36205 loss: 1.8677 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6514 +[titan] 2025-10-05 20:45:33,012 - root - INFO - lr: 6.0120e-06 gnorm: 1.21 [22:11:21< 2:19:33] +[titan] 2025-10-05 20:45:43,868 - root - INFO - step: 36210 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 20:45:43,868 - root - INFO - lr: 6.0094e-06 gnorm: 1.23 [22:11:32< 2:19:22] +[titan] 2025-10-05 20:45:54,736 - root - INFO - step: 36215 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6484 +[titan] 2025-10-05 20:45:54,737 - root - INFO - lr: 6.0067e-06 gnorm: 1.18 [22:11:43< 2:19:11] +[titan] 2025-10-05 20:46:05,631 - root - INFO - step: 36220 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:46:05,631 - root - INFO - lr: 6.0041e-06 gnorm: 1.19 [22:11:54< 2:19:00] +[titan] 2025-10-05 20:46:16,518 - root - INFO - step: 36225 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 20:46:16,518 - root - INFO - lr: 6.0014e-06 gnorm: 1.22 [22:12:04< 2:18:48] +[titan] 2025-10-05 20:46:27,370 - root - INFO - step: 36230 loss: 1.9836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:46:27,370 - root - INFO - lr: 5.9988e-06 gnorm: 1.19 [22:12:15< 2:18:37] +[titan] 2025-10-05 20:46:38,232 - root - INFO - step: 36235 loss: 1.8873 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 20:46:38,233 - root - INFO - lr: 5.9962e-06 gnorm: 1.22 [22:12:26< 2:18:26] +[titan] 2025-10-05 20:46:49,088 - root - INFO - step: 36240 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:46:49,088 - root - INFO - lr: 5.9936e-06 gnorm: 1.21 [22:12:37< 2:18:15] +[titan] 2025-10-05 20:46:59,957 - root - INFO - step: 36245 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6792 +[titan] 2025-10-05 20:46:59,958 - root - INFO - lr: 5.9909e-06 gnorm: 1.22 [22:12:48< 2:18:04] +[titan] 2025-10-05 20:47:08,665 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:47:10,858 - root - INFO - step: 36250 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:47:10,858 - root - INFO - lr: 5.9883e-06 gnorm: 1.19 [22:12:59< 2:17:53] +[titan] 2025-10-05 20:47:21,702 - root - INFO - step: 36255 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 20:47:21,702 - root - INFO - lr: 5.9857e-06 gnorm: 1.26 [22:13:10< 2:17:42] +[titan] 2025-10-05 20:47:32,596 - root - INFO - step: 36260 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 20:47:32,596 - root - INFO - lr: 5.9831e-06 gnorm: 1.22 [22:13:21< 2:17:31] +[titan] 2025-10-05 20:47:43,478 - root - INFO - step: 36265 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 20:47:43,478 - root - INFO - lr: 5.9805e-06 gnorm: 1.28 [22:13:31< 2:17:20] +[titan] 2025-10-05 20:47:54,366 - root - INFO - step: 36270 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:47:54,366 - root - INFO - lr: 5.9779e-06 gnorm: 1.22 [22:13:42< 2:17:09] +[titan] 2025-10-05 20:48:05,288 - root - INFO - step: 36275 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6837 +[titan] 2025-10-05 20:48:05,288 - root - INFO - lr: 5.9753e-06 gnorm: 1.22 [22:13:53< 2:16:58] +[titan] 2025-10-05 20:48:16,197 - root - INFO - step: 36280 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 20:48:16,197 - root - INFO - lr: 5.9727e-06 gnorm: 1.22 [22:14:04< 2:16:47] +[titan] 2025-10-05 20:48:27,074 - root - INFO - step: 36285 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 20:48:27,075 - root - INFO - lr: 5.9701e-06 gnorm: 1.23 [22:14:15< 2:16:36] +[titan] 2025-10-05 20:48:37,962 - root - INFO - step: 36290 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 20:48:37,962 - root - INFO - lr: 5.9675e-06 gnorm: 1.26 [22:14:26< 2:16:25] +[titan] 2025-10-05 20:48:48,831 - root - INFO - step: 36295 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 20:48:48,832 - root - INFO - lr: 5.9649e-06 gnorm: 1.22 [22:14:37< 2:16:14] +[titan] 2025-10-05 20:48:57,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:48:59,685 - root - INFO - step: 36300 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6884 +[titan] 2025-10-05 20:48:59,686 - root - INFO - lr: 5.9623e-06 gnorm: 1.23 [22:14:48< 2:16:03] +[titan] 2025-10-05 20:49:10,530 - root - INFO - step: 36305 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:49:10,530 - root - INFO - lr: 5.9597e-06 gnorm: 1.21 [22:14:58< 2:15:52] +[titan] 2025-10-05 20:49:21,373 - root - INFO - step: 36310 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7418 +[titan] 2025-10-05 20:49:21,373 - root - INFO - lr: 5.9572e-06 gnorm: 1.26 [22:15:09< 2:15:41] +[titan] 2025-10-05 20:49:32,211 - root - INFO - step: 36315 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 20:49:32,211 - root - INFO - lr: 5.9546e-06 gnorm: 1.21 [22:15:20< 2:15:30] +[titan] 2025-10-05 20:49:43,047 - root - INFO - step: 36320 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7327 +[titan] 2025-10-05 20:49:43,048 - root - INFO - lr: 5.9520e-06 gnorm: 1.23 [22:15:31< 2:15:19] +[titan] 2025-10-05 20:49:53,930 - root - INFO - step: 36325 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 20:49:53,930 - root - INFO - lr: 5.9495e-06 gnorm: 1.21 [22:15:42< 2:15:08] +[titan] 2025-10-05 20:50:04,790 - root - INFO - step: 36330 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 20:50:04,790 - root - INFO - lr: 5.9469e-06 gnorm: 1.22 [22:15:53< 2:14:56] +[titan] 2025-10-05 20:50:15,657 - root - INFO - step: 36335 loss: 1.9258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7028 +[titan] 2025-10-05 20:50:15,657 - root - INFO - lr: 5.9443e-06 gnorm: 1.28 [22:16:04< 2:14:45] +[titan] 2025-10-05 20:50:26,516 - root - INFO - step: 36340 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 20:50:26,516 - root - INFO - lr: 5.9418e-06 gnorm: 1.22 [22:16:14< 2:14:34] +[titan] 2025-10-05 20:50:37,351 - root - INFO - step: 36345 loss: 1.8859 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 20:50:37,351 - root - INFO - lr: 5.9392e-06 gnorm: 1.21 [22:16:25< 2:14:23] +[titan] 2025-10-05 20:50:46,107 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:50:48,289 - root - INFO - step: 36350 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:50:48,289 - root - INFO - lr: 5.9367e-06 gnorm: 1.26 [22:16:36< 2:14:12] +[titan] 2025-10-05 20:50:52,804 - root - INFO - Dumping profiler traces at step 36352 +[titan] 2025-10-05 20:50:52,843 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:50:59,390 - root - INFO - step: 36355 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 29,519 tflops: 409.53 mfu: 41.41% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6886 +[titan] 2025-10-05 20:50:59,390 - root - INFO - lr: 5.9341e-06 gnorm: 1.21 [22:16:47< 2:14:01] +[titan] 2025-10-05 20:51:10,256 - root - INFO - step: 36360 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6670 +[titan] 2025-10-05 20:51:10,257 - root - INFO - lr: 5.9316e-06 gnorm: 1.20 [22:16:58< 2:13:50] +[titan] 2025-10-05 20:51:21,108 - root - INFO - step: 36365 loss: 1.9715 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7430 +[titan] 2025-10-05 20:51:21,108 - root - INFO - lr: 5.9290e-06 gnorm: 1.25 [22:17:09< 2:13:39] +[titan] 2025-10-05 20:51:31,957 - root - INFO - step: 36370 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 20:51:31,957 - root - INFO - lr: 5.9265e-06 gnorm: 1.22 [22:17:20< 2:13:28] +[titan] 2025-10-05 20:51:42,813 - root - INFO - step: 36375 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:51:42,813 - root - INFO - lr: 5.9240e-06 gnorm: 1.22 [22:17:31< 2:13:17] +[titan] 2025-10-05 20:51:53,656 - root - INFO - step: 36380 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 20:51:53,656 - root - INFO - lr: 5.9214e-06 gnorm: 1.27 [22:17:42< 2:13:06] +[titan] 2025-10-05 20:52:04,533 - root - INFO - step: 36385 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7274 +[titan] 2025-10-05 20:52:04,533 - root - INFO - lr: 5.9189e-06 gnorm: 1.22 [22:17:52< 2:12:55] +[titan] 2025-10-05 20:52:15,414 - root - INFO - step: 36390 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7019 +[titan] 2025-10-05 20:52:15,414 - root - INFO - lr: 5.9164e-06 gnorm: 1.23 [22:18:03< 2:12:44] +[titan] 2025-10-05 20:52:26,295 - root - INFO - step: 36395 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 20:52:26,295 - root - INFO - lr: 5.9139e-06 gnorm: 1.21 [22:18:14< 2:12:33] +[titan] 2025-10-05 20:52:34,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:52:37,152 - root - INFO - step: 36400 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 20:52:37,153 - root - INFO - lr: 5.9114e-06 gnorm: 1.22 [22:18:25< 2:12:22] +[titan] 2025-10-05 20:52:48,028 - root - INFO - step: 36405 loss: 1.9539 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:52:48,028 - root - INFO - lr: 5.9088e-06 gnorm: 1.20 [22:18:36< 2:12:11] +[titan] 2025-10-05 20:52:58,901 - root - INFO - step: 36410 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:52:58,901 - root - INFO - lr: 5.9063e-06 gnorm: 1.21 [22:18:47< 2:12:00] +[titan] 2025-10-05 20:53:10,114 - root - INFO - step: 36415 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 29,223 tflops: 405.42 mfu: 40.99% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6627 +[titan] 2025-10-05 20:53:10,115 - root - INFO - lr: 5.9038e-06 gnorm: 1.21 [22:18:58< 2:11:49] +[titan] 2025-10-05 20:53:21,005 - root - INFO - step: 36420 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 20:53:21,005 - root - INFO - lr: 5.9013e-06 gnorm: 1.27 [22:19:09< 2:11:38] +[titan] 2025-10-05 20:53:31,873 - root - INFO - step: 36425 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 20:53:31,874 - root - INFO - lr: 5.8988e-06 gnorm: 1.24 [22:19:20< 2:11:27] +[titan] 2025-10-05 20:53:42,745 - root - INFO - step: 36430 loss: 1.8831 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6658 +[titan] 2025-10-05 20:53:42,745 - root - INFO - lr: 5.8963e-06 gnorm: 1.28 [22:19:31< 2:11:16] +[titan] 2025-10-05 20:53:53,613 - root - INFO - step: 36435 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 20:53:53,613 - root - INFO - lr: 5.8938e-06 gnorm: 1.23 [22:19:42< 2:11:05] +[titan] 2025-10-05 20:54:04,481 - root - INFO - step: 36440 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:54:04,481 - root - INFO - lr: 5.8914e-06 gnorm: 1.22 [22:19:52< 2:10:53] +[titan] 2025-10-05 20:54:15,378 - root - INFO - step: 36445 loss: 1.9147 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:54:15,378 - root - INFO - lr: 5.8889e-06 gnorm: 1.24 [22:20:03< 2:10:42] +[titan] 2025-10-05 20:54:24,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:54:26,273 - root - INFO - step: 36450 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 20:54:26,273 - root - INFO - lr: 5.8864e-06 gnorm: 1.25 [22:20:14< 2:10:31] +[titan] 2025-10-05 20:54:37,147 - root - INFO - step: 36455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 20:54:37,147 - root - INFO - lr: 5.8839e-06 gnorm: 1.20 [22:20:25< 2:10:20] +[titan] 2025-10-05 20:54:48,029 - root - INFO - step: 36460 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 20:54:48,029 - root - INFO - lr: 5.8814e-06 gnorm: 1.21 [22:20:36< 2:10:09] +[titan] 2025-10-05 20:54:58,890 - root - INFO - step: 36465 loss: 1.9169 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 20:54:58,890 - root - INFO - lr: 5.8790e-06 gnorm: 1.22 [22:20:47< 2:09:58] +[titan] 2025-10-05 20:55:09,763 - root - INFO - step: 36470 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6935 +[titan] 2025-10-05 20:55:09,764 - root - INFO - lr: 5.8765e-06 gnorm: 1.24 [22:20:58< 2:09:47] +[titan] 2025-10-05 20:55:20,621 - root - INFO - step: 36475 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 20:55:20,622 - root - INFO - lr: 5.8740e-06 gnorm: 1.24 [22:21:09< 2:09:36] +[titan] 2025-10-05 20:55:31,491 - root - INFO - step: 36480 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:55:31,491 - root - INFO - lr: 5.8716e-06 gnorm: 1.25 [22:21:19< 2:09:25] +[titan] 2025-10-05 20:55:42,366 - root - INFO - step: 36485 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 20:55:42,367 - root - INFO - lr: 5.8691e-06 gnorm: 1.22 [22:21:30< 2:09:14] +[titan] 2025-10-05 20:55:53,240 - root - INFO - step: 36490 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:55:53,241 - root - INFO - lr: 5.8667e-06 gnorm: 1.19 [22:21:41< 2:09:03] +[titan] 2025-10-05 20:56:04,092 - root - INFO - step: 36495 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 20:56:04,092 - root - INFO - lr: 5.8642e-06 gnorm: 1.28 [22:21:52< 2:08:52] +[titan] 2025-10-05 20:56:12,794 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:56:14,974 - root - INFO - step: 36500 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:56:14,974 - root - INFO - lr: 5.8618e-06 gnorm: 1.22 [22:22:03< 2:08:41] +[titan] 2025-10-05 20:56:25,858 - root - INFO - step: 36505 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:56:25,858 - root - INFO - lr: 5.8593e-06 gnorm: 1.21 [22:22:14< 2:08:30] +[titan] 2025-10-05 20:56:36,711 - root - INFO - step: 36510 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 20:56:36,712 - root - INFO - lr: 5.8569e-06 gnorm: 1.26 [22:22:25< 2:08:19] +[titan] 2025-10-05 20:56:47,594 - root - INFO - step: 36515 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:56:47,594 - root - INFO - lr: 5.8544e-06 gnorm: 1.24 [22:22:36< 2:08:08] +[titan] 2025-10-05 20:56:58,464 - root - INFO - step: 36520 loss: 1.8908 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6705 +[titan] 2025-10-05 20:56:58,465 - root - INFO - lr: 5.8520e-06 gnorm: 1.23 [22:22:46< 2:07:57] +[titan] 2025-10-05 20:57:09,332 - root - INFO - step: 36525 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:57:09,332 - root - INFO - lr: 5.8496e-06 gnorm: 1.21 [22:22:57< 2:07:46] +[titan] 2025-10-05 20:57:20,232 - root - INFO - step: 36530 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 20:57:20,232 - root - INFO - lr: 5.8471e-06 gnorm: 1.21 [22:23:08< 2:07:35] +[titan] 2025-10-05 20:57:31,124 - root - INFO - step: 36535 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 20:57:31,124 - root - INFO - lr: 5.8447e-06 gnorm: 1.23 [22:23:19< 2:07:24] +[titan] 2025-10-05 20:57:42,014 - root - INFO - step: 36540 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 20:57:42,014 - root - INFO - lr: 5.8423e-06 gnorm: 1.25 [22:23:30< 2:07:13] +[titan] 2025-10-05 20:57:52,927 - root - INFO - step: 36545 loss: 1.9727 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 20:57:52,928 - root - INFO - lr: 5.8399e-06 gnorm: 1.24 [22:23:41< 2:07:02] +[titan] 2025-10-05 20:58:01,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:58:03,825 - root - INFO - step: 36550 loss: 1.9288 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7049 +[titan] 2025-10-05 20:58:03,825 - root - INFO - lr: 5.8375e-06 gnorm: 1.24 [22:23:52< 2:06:50] +[titan] 2025-10-05 20:58:14,740 - root - INFO - step: 36555 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 20:58:14,741 - root - INFO - lr: 5.8351e-06 gnorm: 1.26 [22:24:03< 2:06:39] +[titan] 2025-10-05 20:58:25,614 - root - INFO - step: 36560 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 20:58:25,614 - root - INFO - lr: 5.8326e-06 gnorm: 1.18 [22:24:14< 2:06:28] +[titan] 2025-10-05 20:58:36,506 - root - INFO - step: 36565 loss: 1.8964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:58:36,506 - root - INFO - lr: 5.8302e-06 gnorm: 1.20 [22:24:24< 2:06:17] +[titan] 2025-10-05 20:58:47,390 - root - INFO - step: 36570 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 20:58:47,390 - root - INFO - lr: 5.8278e-06 gnorm: 1.25 [22:24:35< 2:06:06] +[titan] 2025-10-05 20:58:58,289 - root - INFO - step: 36575 loss: 1.9029 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:58:58,289 - root - INFO - lr: 5.8254e-06 gnorm: 1.20 [22:24:46< 2:05:55] +[titan] 2025-10-05 20:59:09,190 - root - INFO - step: 36580 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7074 +[titan] 2025-10-05 20:59:09,190 - root - INFO - lr: 5.8231e-06 gnorm: 1.24 [22:24:57< 2:05:44] +[titan] 2025-10-05 20:59:20,103 - root - INFO - step: 36585 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7440 +[titan] 2025-10-05 20:59:20,103 - root - INFO - lr: 5.8207e-06 gnorm: 1.22 [22:25:08< 2:05:33] +[titan] 2025-10-05 20:59:30,980 - root - INFO - step: 36590 loss: 1.9441 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:59:30,980 - root - INFO - lr: 5.8183e-06 gnorm: 1.22 [22:25:19< 2:05:22] +[titan] 2025-10-05 20:59:41,845 - root - INFO - step: 36595 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6830 +[titan] 2025-10-05 20:59:41,845 - root - INFO - lr: 5.8159e-06 gnorm: 1.20 [22:25:30< 2:05:11] +[titan] 2025-10-05 20:59:50,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:59:52,732 - root - INFO - step: 36600 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:59:52,732 - root - INFO - lr: 5.8135e-06 gnorm: 1.20 [22:25:41< 2:05:00] +[titan] 2025-10-05 21:00:03,618 - root - INFO - step: 36605 loss: 1.8614 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6465 +[titan] 2025-10-05 21:00:03,618 - root - INFO - lr: 5.8111e-06 gnorm: 1.22 [22:25:52< 2:04:49] +[titan] 2025-10-05 21:00:14,529 - root - INFO - step: 36610 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 21:00:14,529 - root - INFO - lr: 5.8088e-06 gnorm: 1.24 [22:26:02< 2:04:38] +[titan] 2025-10-05 21:00:25,449 - root - INFO - step: 36615 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:00:25,450 - root - INFO - lr: 5.8064e-06 gnorm: 1.23 [22:26:13< 2:04:27] +[titan] 2025-10-05 21:00:36,361 - root - INFO - step: 36620 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6464 +[titan] 2025-10-05 21:00:36,361 - root - INFO - lr: 5.8040e-06 gnorm: 1.24 [22:26:24< 2:04:16] +[titan] 2025-10-05 21:00:47,259 - root - INFO - step: 36625 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7482 +[titan] 2025-10-05 21:00:47,259 - root - INFO - lr: 5.8017e-06 gnorm: 1.24 [22:26:35< 2:04:05] +[titan] 2025-10-05 21:00:58,160 - root - INFO - step: 36630 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:00:58,160 - root - INFO - lr: 5.7993e-06 gnorm: 1.21 [22:26:46< 2:03:54] +[titan] 2025-10-05 21:01:09,053 - root - INFO - step: 36635 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:01:09,053 - root - INFO - lr: 5.7969e-06 gnorm: 1.26 [22:26:57< 2:03:43] +[titan] 2025-10-05 21:01:19,958 - root - INFO - step: 36640 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 21:01:19,958 - root - INFO - lr: 5.7946e-06 gnorm: 1.24 [22:27:08< 2:03:32] +[titan] 2025-10-05 21:01:30,865 - root - INFO - step: 36645 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:01:30,865 - root - INFO - lr: 5.7922e-06 gnorm: 1.22 [22:27:19< 2:03:21] +[titan] 2025-10-05 21:01:39,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:01:41,769 - root - INFO - step: 36650 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:01:41,769 - root - INFO - lr: 5.7899e-06 gnorm: 1.25 [22:27:30< 2:03:10] +[titan] 2025-10-05 21:01:52,656 - root - INFO - step: 36655 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6666 +[titan] 2025-10-05 21:01:52,656 - root - INFO - lr: 5.7876e-06 gnorm: 1.26 [22:27:41< 2:02:59] +[titan] 2025-10-05 21:02:03,549 - root - INFO - step: 36660 loss: 1.9170 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 21:02:03,549 - root - INFO - lr: 5.7852e-06 gnorm: 1.24 [22:27:51< 2:02:48] +[titan] 2025-10-05 21:02:14,436 - root - INFO - step: 36665 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6585 +[titan] 2025-10-05 21:02:14,436 - root - INFO - lr: 5.7829e-06 gnorm: 1.20 [22:28:02< 2:02:36] +[titan] 2025-10-05 21:02:25,324 - root - INFO - step: 36670 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 21:02:25,324 - root - INFO - lr: 5.7806e-06 gnorm: 1.25 [22:28:13< 2:02:25] +[titan] 2025-10-05 21:02:36,230 - root - INFO - step: 36675 loss: 1.8517 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6371 +[titan] 2025-10-05 21:02:36,230 - root - INFO - lr: 5.7782e-06 gnorm: 1.21 [22:28:24< 2:02:14] +[titan] 2025-10-05 21:02:47,119 - root - INFO - step: 36680 loss: 1.8308 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6192 +[titan] 2025-10-05 21:02:47,119 - root - INFO - lr: 5.7759e-06 gnorm: 1.21 [22:28:35< 2:02:03] +[titan] 2025-10-05 21:02:58,028 - root - INFO - step: 36685 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:02:58,028 - root - INFO - lr: 5.7736e-06 gnorm: 1.27 [22:28:46< 2:01:52] +[titan] 2025-10-05 21:03:08,899 - root - INFO - step: 36690 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 21:03:08,899 - root - INFO - lr: 5.7713e-06 gnorm: 1.23 [22:28:57< 2:01:41] +[titan] 2025-10-05 21:03:19,806 - root - INFO - step: 36695 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:03:19,806 - root - INFO - lr: 5.7689e-06 gnorm: 1.23 [22:29:08< 2:01:30] +[titan] 2025-10-05 21:03:28,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:03:30,710 - root - INFO - step: 36700 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 21:03:30,710 - root - INFO - lr: 5.7666e-06 gnorm: 1.27 [22:29:19< 2:01:19] +[titan] 2025-10-05 21:03:41,623 - root - INFO - step: 36705 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 21:03:41,624 - root - INFO - lr: 5.7643e-06 gnorm: 1.24 [22:29:30< 2:01:08] +[titan] 2025-10-05 21:03:52,525 - root - INFO - step: 36710 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:03:52,525 - root - INFO - lr: 5.7620e-06 gnorm: 1.26 [22:29:40< 2:00:57] +[titan] 2025-10-05 21:04:03,447 - root - INFO - step: 36715 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:04:03,447 - root - INFO - lr: 5.7597e-06 gnorm: 1.26 [22:29:51< 2:00:46] +[titan] 2025-10-05 21:04:14,324 - root - INFO - step: 36720 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:04:14,324 - root - INFO - lr: 5.7574e-06 gnorm: 1.20 [22:30:02< 2:00:35] +[titan] 2025-10-05 21:04:25,273 - root - INFO - step: 36725 loss: 1.9301 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:04:25,273 - root - INFO - lr: 5.7551e-06 gnorm: 1.23 [22:30:13< 2:00:24] +[titan] 2025-10-05 21:04:36,157 - root - INFO - step: 36730 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:04:36,157 - root - INFO - lr: 5.7528e-06 gnorm: 1.24 [22:30:24< 2:00:13] +[titan] 2025-10-05 21:04:47,035 - root - INFO - step: 36735 loss: 1.9023 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 21:04:47,035 - root - INFO - lr: 5.7505e-06 gnorm: 1.26 [22:30:35< 2:00:02] +[titan] 2025-10-05 21:04:57,939 - root - INFO - step: 36740 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 21:04:57,939 - root - INFO - lr: 5.7483e-06 gnorm: 1.21 [22:30:46< 1:59:51] +[titan] 2025-10-05 21:05:08,831 - root - INFO - step: 36745 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 21:05:08,831 - root - INFO - lr: 5.7460e-06 gnorm: 1.25 [22:30:57< 1:59:40] +[titan] 2025-10-05 21:05:17,519 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:05:19,701 - root - INFO - step: 36750 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:05:19,702 - root - INFO - lr: 5.7437e-06 gnorm: 1.22 [22:31:08< 1:59:29] +[titan] 2025-10-05 21:05:30,640 - root - INFO - step: 36755 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 21:05:30,640 - root - INFO - lr: 5.7414e-06 gnorm: 1.23 [22:31:19< 1:59:18] +[titan] 2025-10-05 21:05:41,514 - root - INFO - step: 36760 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:05:41,514 - root - INFO - lr: 5.7392e-06 gnorm: 1.21 [22:31:29< 1:59:07] +[titan] 2025-10-05 21:05:52,376 - root - INFO - step: 36765 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 21:05:52,376 - root - INFO - lr: 5.7369e-06 gnorm: 1.26 [22:31:40< 1:58:56] +[titan] 2025-10-05 21:06:03,266 - root - INFO - step: 36770 loss: 1.8668 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 21:06:03,266 - root - INFO - lr: 5.7346e-06 gnorm: 1.22 [22:31:51< 1:58:45] +[titan] 2025-10-05 21:06:14,143 - root - INFO - step: 36775 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 21:06:14,143 - root - INFO - lr: 5.7324e-06 gnorm: 1.23 [22:32:02< 1:58:34] +[titan] 2025-10-05 21:06:25,098 - root - INFO - step: 36780 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 21:06:25,098 - root - INFO - lr: 5.7301e-06 gnorm: 1.22 [22:32:13< 1:58:23] +[titan] 2025-10-05 21:06:35,961 - root - INFO - step: 36785 loss: 1.8486 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6355 +[titan] 2025-10-05 21:06:35,961 - root - INFO - lr: 5.7279e-06 gnorm: 1.26 [22:32:24< 1:58:11] +[titan] 2025-10-05 21:06:46,824 - root - INFO - step: 36790 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 21:06:46,825 - root - INFO - lr: 5.7256e-06 gnorm: 1.26 [22:32:35< 1:58:00] +[titan] 2025-10-05 21:06:57,688 - root - INFO - step: 36795 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7115 +[titan] 2025-10-05 21:06:57,688 - root - INFO - lr: 5.7234e-06 gnorm: 1.23 [22:32:46< 1:57:49] +[titan] 2025-10-05 21:07:06,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:07:08,540 - root - INFO - step: 36800 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 21:07:08,541 - root - INFO - lr: 5.7211e-06 gnorm: 1.23 [22:32:56< 1:57:38] +[titan] 2025-10-05 21:07:19,425 - root - INFO - step: 36805 loss: 1.9493 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:07:19,425 - root - INFO - lr: 5.7189e-06 gnorm: 1.24 [22:33:07< 1:57:27] +[titan] 2025-10-05 21:07:30,382 - root - INFO - step: 36810 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:07:30,383 - root - INFO - lr: 5.7166e-06 gnorm: 1.23 [22:33:18< 1:57:16] +[titan] 2025-10-05 21:07:41,263 - root - INFO - step: 36815 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 21:07:41,263 - root - INFO - lr: 5.7144e-06 gnorm: 1.24 [22:33:29< 1:57:05] +[titan] 2025-10-05 21:07:52,120 - root - INFO - step: 36820 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 21:07:52,120 - root - INFO - lr: 5.7122e-06 gnorm: 1.21 [22:33:40< 1:56:54] +[titan] 2025-10-05 21:08:02,998 - root - INFO - step: 36825 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6712 +[titan] 2025-10-05 21:08:02,999 - root - INFO - lr: 5.7100e-06 gnorm: 1.24 [22:33:51< 1:56:43] +[titan] 2025-10-05 21:08:13,877 - root - INFO - step: 36830 loss: 1.9915 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 21:08:13,878 - root - INFO - lr: 5.7077e-06 gnorm: 1.31 [22:34:02< 1:56:32] +[titan] 2025-10-05 21:08:25,107 - root - INFO - step: 36835 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 29,180 tflops: 404.83 mfu: 40.93% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6650 +[titan] 2025-10-05 21:08:25,108 - root - INFO - lr: 5.7055e-06 gnorm: 1.20 [22:34:13< 1:56:21] +[titan] 2025-10-05 21:08:35,977 - root - INFO - step: 36840 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 21:08:35,977 - root - INFO - lr: 5.7033e-06 gnorm: 1.24 [22:34:24< 1:56:10] +[titan] 2025-10-05 21:08:46,865 - root - INFO - step: 36845 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 21:08:46,865 - root - INFO - lr: 5.7011e-06 gnorm: 1.24 [22:34:35< 1:55:59] +[titan] 2025-10-05 21:08:55,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:08:57,718 - root - INFO - step: 36850 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 21:08:57,718 - root - INFO - lr: 5.6989e-06 gnorm: 1.26 [22:34:46< 1:55:48] +[titan] 2025-10-05 21:09:08,595 - root - INFO - step: 36855 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7308 +[titan] 2025-10-05 21:09:08,595 - root - INFO - lr: 5.6967e-06 gnorm: 1.24 [22:34:56< 1:55:37] +[titan] 2025-10-05 21:09:19,469 - root - INFO - step: 36860 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:09:19,470 - root - INFO - lr: 5.6945e-06 gnorm: 1.27 [22:35:07< 1:55:26] +[titan] 2025-10-05 21:09:28,564 - root - INFO - Dumping profiler traces at step 36864 +[titan] 2025-10-05 21:09:28,600 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:09:30,837 - root - INFO - step: 36865 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 28,827 tflops: 399.93 mfu: 40.44% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 21:09:30,837 - root - INFO - lr: 5.6923e-06 gnorm: 1.23 [22:35:19< 1:55:15] +[titan] 2025-10-05 21:09:41,699 - root - INFO - step: 36870 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 21:09:41,699 - root - INFO - lr: 5.6901e-06 gnorm: 1.24 [22:35:30< 1:55:04] +[titan] 2025-10-05 21:09:52,574 - root - INFO - step: 36875 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 21:09:52,574 - root - INFO - lr: 5.6879e-06 gnorm: 1.24 [22:35:40< 1:54:53] +[titan] 2025-10-05 21:10:03,422 - root - INFO - step: 36880 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:10:03,422 - root - INFO - lr: 5.6857e-06 gnorm: 1.21 [22:35:51< 1:54:42] +[titan] 2025-10-05 21:10:14,288 - root - INFO - step: 36885 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 21:10:14,288 - root - INFO - lr: 5.6835e-06 gnorm: 1.23 [22:36:02< 1:54:31] +[titan] 2025-10-05 21:10:25,157 - root - INFO - step: 36890 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 21:10:25,157 - root - INFO - lr: 5.6813e-06 gnorm: 1.24 [22:36:13< 1:54:20] +[titan] 2025-10-05 21:10:36,097 - root - INFO - step: 36895 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 21:10:36,097 - root - INFO - lr: 5.6792e-06 gnorm: 1.24 [22:36:24< 1:54:09] +[titan] 2025-10-05 21:10:44,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:10:46,986 - root - INFO - step: 36900 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 21:10:46,987 - root - INFO - lr: 5.6770e-06 gnorm: 1.28 [22:36:35< 1:53:58] +[titan] 2025-10-05 21:10:57,833 - root - INFO - step: 36905 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 21:10:57,833 - root - INFO - lr: 5.6748e-06 gnorm: 1.23 [22:36:46< 1:53:47] +[titan] 2025-10-05 21:11:08,682 - root - INFO - step: 36910 loss: 1.8557 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6407 +[titan] 2025-10-05 21:11:08,682 - root - INFO - lr: 5.6726e-06 gnorm: 1.19 [22:36:57< 1:53:36] +[titan] 2025-10-05 21:11:19,531 - root - INFO - step: 36915 loss: 1.8896 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:11:19,532 - root - INFO - lr: 5.6705e-06 gnorm: 1.18 [22:37:07< 1:53:24] +[titan] 2025-10-05 21:11:30,448 - root - INFO - step: 36920 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:11:30,449 - root - INFO - lr: 5.6683e-06 gnorm: 1.24 [22:37:18< 1:53:13] +[titan] 2025-10-05 21:11:41,323 - root - INFO - step: 36925 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 21:11:41,323 - root - INFO - lr: 5.6662e-06 gnorm: 1.26 [22:37:29< 1:53:02] +[titan] 2025-10-05 21:11:52,243 - root - INFO - step: 36930 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6917 +[titan] 2025-10-05 21:11:52,243 - root - INFO - lr: 5.6640e-06 gnorm: 1.28 [22:37:40< 1:52:51] +[titan] 2025-10-05 21:12:03,124 - root - INFO - step: 36935 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6806 +[titan] 2025-10-05 21:12:03,124 - root - INFO - lr: 5.6619e-06 gnorm: 1.20 [22:37:51< 1:52:40] +[titan] 2025-10-05 21:12:14,002 - root - INFO - step: 36940 loss: 1.9158 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6952 +[titan] 2025-10-05 21:12:14,002 - root - INFO - lr: 5.6597e-06 gnorm: 1.26 [22:38:02< 1:52:29] +[titan] 2025-10-05 21:12:24,869 - root - INFO - step: 36945 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 21:12:24,870 - root - INFO - lr: 5.6576e-06 gnorm: 1.21 [22:38:13< 1:52:18] +[titan] 2025-10-05 21:12:33,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:12:35,810 - root - INFO - step: 36950 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 21:12:35,810 - root - INFO - lr: 5.6554e-06 gnorm: 1.24 [22:38:24< 1:52:07] +[titan] 2025-10-05 21:12:46,684 - root - INFO - step: 36955 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7035 +[titan] 2025-10-05 21:12:46,685 - root - INFO - lr: 5.6533e-06 gnorm: 1.21 [22:38:35< 1:51:56] +[titan] 2025-10-05 21:12:57,552 - root - INFO - step: 36960 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 21:12:57,552 - root - INFO - lr: 5.6512e-06 gnorm: 1.24 [22:38:45< 1:51:45] +[titan] 2025-10-05 21:13:08,463 - root - INFO - step: 36965 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7952 +[titan] 2025-10-05 21:13:08,463 - root - INFO - lr: 5.6490e-06 gnorm: 1.24 [22:38:56< 1:51:34] +[titan] 2025-10-05 21:13:19,335 - root - INFO - step: 36970 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:19,336 - root - INFO - lr: 5.6469e-06 gnorm: 1.21 [22:39:07< 1:51:23] +[titan] 2025-10-05 21:13:30,256 - root - INFO - step: 36975 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:30,256 - root - INFO - lr: 5.6448e-06 gnorm: 1.28 [22:39:18< 1:51:12] +[titan] 2025-10-05 21:13:41,127 - root - INFO - step: 36980 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:13:41,128 - root - INFO - lr: 5.6427e-06 gnorm: 1.23 [22:39:29< 1:51:01] +[titan] 2025-10-05 21:13:51,994 - root - INFO - step: 36985 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:13:51,995 - root - INFO - lr: 5.6405e-06 gnorm: 1.29 [22:39:40< 1:50:50] +[titan] 2025-10-05 21:14:02,859 - root - INFO - step: 36990 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7044 +[titan] 2025-10-05 21:14:02,859 - root - INFO - lr: 5.6384e-06 gnorm: 1.24 [22:39:51< 1:50:39] +[titan] 2025-10-05 21:14:13,749 - root - INFO - step: 36995 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6987 +[titan] 2025-10-05 21:14:13,749 - root - INFO - lr: 5.6363e-06 gnorm: 1.22 [22:40:02< 1:50:28] +[titan] 2025-10-05 21:14:22,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:14:24,631 - root - INFO - step: 37000 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6388 +[titan] 2025-10-05 21:14:24,631 - root - INFO - lr: 5.6342e-06 gnorm: 1.20 [22:40:13< 1:50:17] +[titan] 2025-10-05 21:14:35,570 - root - INFO - step: 37005 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 21:14:35,570 - root - INFO - lr: 5.6321e-06 gnorm: 1.26 [22:40:23< 1:50:06] +[titan] 2025-10-05 21:14:46,450 - root - INFO - step: 37010 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 21:14:46,450 - root - INFO - lr: 5.6300e-06 gnorm: 1.19 [22:40:34< 1:49:55] +[titan] 2025-10-05 21:14:57,328 - root - INFO - step: 37015 loss: 1.9312 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7076 +[titan] 2025-10-05 21:14:57,329 - root - INFO - lr: 5.6279e-06 gnorm: 1.27 [22:40:45< 1:49:44] +[titan] 2025-10-05 21:15:08,191 - root - INFO - step: 37020 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 21:15:08,191 - root - INFO - lr: 5.6258e-06 gnorm: 1.28 [22:40:56< 1:49:33] +[titan] 2025-10-05 21:15:19,080 - root - INFO - step: 37025 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:15:19,080 - root - INFO - lr: 5.6237e-06 gnorm: 1.20 [22:41:07< 1:49:22] +[titan] 2025-10-05 21:15:29,953 - root - INFO - step: 37030 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 21:15:29,953 - root - INFO - lr: 5.6216e-06 gnorm: 1.21 [22:41:18< 1:49:11] +[titan] 2025-10-05 21:15:40,885 - root - INFO - step: 37035 loss: 1.8738 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6571 +[titan] 2025-10-05 21:15:40,885 - root - INFO - lr: 5.6196e-06 gnorm: 1.21 [22:41:29< 1:48:59] +[titan] 2025-10-05 21:15:51,738 - root - INFO - step: 37040 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6703 +[titan] 2025-10-05 21:15:51,738 - root - INFO - lr: 5.6175e-06 gnorm: 1.25 [22:41:40< 1:48:48] +[titan] 2025-10-05 21:16:02,623 - root - INFO - step: 37045 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:16:02,624 - root - INFO - lr: 5.6154e-06 gnorm: 1.21 [22:41:50< 1:48:37] +[titan] 2025-10-05 21:16:11,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:16:13,511 - root - INFO - step: 37050 loss: 1.9092 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 21:16:13,511 - root - INFO - lr: 5.6133e-06 gnorm: 1.23 [22:42:01< 1:48:26] +[titan] 2025-10-05 21:16:24,393 - root - INFO - step: 37055 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 21:16:24,393 - root - INFO - lr: 5.6113e-06 gnorm: 1.24 [22:42:12< 1:48:15] +[titan] 2025-10-05 21:16:35,329 - root - INFO - step: 37060 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:16:35,329 - root - INFO - lr: 5.6092e-06 gnorm: 1.25 [22:42:23< 1:48:04] +[titan] 2025-10-05 21:16:46,199 - root - INFO - step: 37065 loss: 1.9535 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:16:46,199 - root - INFO - lr: 5.6071e-06 gnorm: 1.27 [22:42:34< 1:47:53] +[titan] 2025-10-05 21:16:57,064 - root - INFO - step: 37070 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 21:16:57,064 - root - INFO - lr: 5.6051e-06 gnorm: 1.24 [22:42:45< 1:47:42] +[titan] 2025-10-05 21:17:07,940 - root - INFO - step: 37075 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7181 +[titan] 2025-10-05 21:17:07,940 - root - INFO - lr: 5.6030e-06 gnorm: 1.23 [22:42:56< 1:47:31] +[titan] 2025-10-05 21:17:18,806 - root - INFO - step: 37080 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 21:17:18,807 - root - INFO - lr: 5.6010e-06 gnorm: 1.20 [22:43:07< 1:47:20] +[titan] 2025-10-05 21:17:29,692 - root - INFO - step: 37085 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 21:17:29,692 - root - INFO - lr: 5.5989e-06 gnorm: 1.21 [22:43:18< 1:47:09] +[titan] 2025-10-05 21:17:40,647 - root - INFO - step: 37090 loss: 1.9429 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:17:40,648 - root - INFO - lr: 5.5969e-06 gnorm: 1.27 [22:43:29< 1:46:58] +[titan] 2025-10-05 21:17:51,517 - root - INFO - step: 37095 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 21:17:51,517 - root - INFO - lr: 5.5949e-06 gnorm: 1.27 [22:43:39< 1:46:47] +[titan] 2025-10-05 21:18:00,217 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:18:02,399 - root - INFO - step: 37100 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6612 +[titan] 2025-10-05 21:18:02,399 - root - INFO - lr: 5.5928e-06 gnorm: 1.26 [22:43:50< 1:46:36] +[titan] 2025-10-05 21:18:13,285 - root - INFO - step: 37105 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6690 +[titan] 2025-10-05 21:18:13,286 - root - INFO - lr: 5.5908e-06 gnorm: 1.20 [22:44:01< 1:46:25] +[titan] 2025-10-05 21:18:24,145 - root - INFO - step: 37110 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 21:18:24,145 - root - INFO - lr: 5.5888e-06 gnorm: 1.25 [22:44:12< 1:46:14] +[titan] 2025-10-05 21:18:35,081 - root - INFO - step: 37115 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 21:18:35,081 - root - INFO - lr: 5.5867e-06 gnorm: 1.20 [22:44:23< 1:46:03] +[titan] 2025-10-05 21:18:45,955 - root - INFO - step: 37120 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:18:45,955 - root - INFO - lr: 5.5847e-06 gnorm: 1.24 [22:44:34< 1:45:52] +[titan] 2025-10-05 21:18:56,847 - root - INFO - step: 37125 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7220 +[titan] 2025-10-05 21:18:56,848 - root - INFO - lr: 5.5827e-06 gnorm: 1.27 [22:44:45< 1:45:41] +[titan] 2025-10-05 21:19:07,708 - root - INFO - step: 37130 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 21:19:07,709 - root - INFO - lr: 5.5807e-06 gnorm: 1.20 [22:44:56< 1:45:30] +[titan] 2025-10-05 21:19:18,570 - root - INFO - step: 37135 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 21:19:18,570 - root - INFO - lr: 5.5787e-06 gnorm: 1.23 [22:45:06< 1:45:19] +[titan] 2025-10-05 21:19:29,432 - root - INFO - step: 37140 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 21:19:29,432 - root - INFO - lr: 5.5766e-06 gnorm: 1.25 [22:45:17< 1:45:08] +[titan] 2025-10-05 21:19:40,328 - root - INFO - step: 37145 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 21:19:40,328 - root - INFO - lr: 5.5746e-06 gnorm: 1.28 [22:45:28< 1:44:57] +[titan] 2025-10-05 21:19:48,999 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:19:51,178 - root - INFO - step: 37150 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 21:19:51,178 - root - INFO - lr: 5.5726e-06 gnorm: 1.28 [22:45:39< 1:44:46] +[titan] 2025-10-05 21:20:02,074 - root - INFO - step: 37155 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:20:02,074 - root - INFO - lr: 5.5706e-06 gnorm: 1.22 [22:45:50< 1:44:35] +[titan] 2025-10-05 21:20:12,936 - root - INFO - step: 37160 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 21:20:12,936 - root - INFO - lr: 5.5686e-06 gnorm: 1.25 [22:46:01< 1:44:23] +[titan] 2025-10-05 21:20:23,793 - root - INFO - step: 37165 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 21:20:23,793 - root - INFO - lr: 5.5666e-06 gnorm: 1.26 [22:46:12< 1:44:12] +[titan] 2025-10-05 21:20:34,674 - root - INFO - step: 37170 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:20:34,674 - root - INFO - lr: 5.5647e-06 gnorm: 1.21 [22:46:23< 1:44:01] +[titan] 2025-10-05 21:20:45,596 - root - INFO - step: 37175 loss: 1.9773 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 21:20:45,596 - root - INFO - lr: 5.5627e-06 gnorm: 1.27 [22:46:33< 1:43:50] +[titan] 2025-10-05 21:20:56,483 - root - INFO - step: 37180 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:20:56,484 - root - INFO - lr: 5.5607e-06 gnorm: 1.26 [22:46:44< 1:43:39] +[titan] 2025-10-05 21:21:07,391 - root - INFO - step: 37185 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:21:07,392 - root - INFO - lr: 5.5587e-06 gnorm: 1.24 [22:46:55< 1:43:28] +[titan] 2025-10-05 21:21:18,272 - root - INFO - step: 37190 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 21:21:18,272 - root - INFO - lr: 5.5567e-06 gnorm: 1.22 [22:47:06< 1:43:17] +[titan] 2025-10-05 21:21:29,155 - root - INFO - step: 37195 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:21:29,155 - root - INFO - lr: 5.5548e-06 gnorm: 1.25 [22:47:17< 1:43:06] +[titan] 2025-10-05 21:21:37,896 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:21:40,076 - root - INFO - step: 37200 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:21:40,076 - root - INFO - lr: 5.5528e-06 gnorm: 1.22 [22:47:28< 1:42:55] +[titan] 2025-10-05 21:21:50,943 - root - INFO - step: 37205 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 21:21:50,944 - root - INFO - lr: 5.5508e-06 gnorm: 1.21 [22:47:39< 1:42:44] +[titan] 2025-10-05 21:22:01,837 - root - INFO - step: 37210 loss: 1.9065 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 21:22:01,837 - root - INFO - lr: 5.5489e-06 gnorm: 1.21 [22:47:50< 1:42:33] +[titan] 2025-10-05 21:22:12,716 - root - INFO - step: 37215 loss: 1.8559 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6413 +[titan] 2025-10-05 21:22:12,716 - root - INFO - lr: 5.5469e-06 gnorm: 1.23 [22:48:01< 1:42:22] +[titan] 2025-10-05 21:22:23,615 - root - INFO - step: 37220 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 21:22:23,615 - root - INFO - lr: 5.5450e-06 gnorm: 1.24 [22:48:11< 1:42:11] +[titan] 2025-10-05 21:22:34,482 - root - INFO - step: 37225 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 21:22:34,483 - root - INFO - lr: 5.5430e-06 gnorm: 1.26 [22:48:22< 1:42:00] +[titan] 2025-10-05 21:22:45,400 - root - INFO - step: 37230 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 21:22:45,400 - root - INFO - lr: 5.5411e-06 gnorm: 1.23 [22:48:33< 1:41:49] +[titan] 2025-10-05 21:22:56,271 - root - INFO - step: 37235 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 21:22:56,272 - root - INFO - lr: 5.5391e-06 gnorm: 1.23 [22:48:44< 1:41:38] +[titan] 2025-10-05 21:23:07,114 - root - INFO - step: 37240 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:23:07,114 - root - INFO - lr: 5.5372e-06 gnorm: 1.23 [22:48:55< 1:41:27] +[titan] 2025-10-05 21:23:17,969 - root - INFO - step: 37245 loss: 1.8827 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 21:23:17,970 - root - INFO - lr: 5.5352e-06 gnorm: 1.23 [22:49:06< 1:41:16] +[titan] 2025-10-05 21:23:26,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:23:28,858 - root - INFO - step: 37250 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6816 +[titan] 2025-10-05 21:23:28,858 - root - INFO - lr: 5.5333e-06 gnorm: 1.21 [22:49:17< 1:41:05] +[titan] 2025-10-05 21:23:39,774 - root - INFO - step: 37255 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 21:23:39,774 - root - INFO - lr: 5.5314e-06 gnorm: 1.21 [22:49:28< 1:40:54] +[titan] 2025-10-05 21:23:50,632 - root - INFO - step: 37260 loss: 1.8928 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 21:23:50,632 - root - INFO - lr: 5.5295e-06 gnorm: 1.20 [22:49:38< 1:40:43] +[titan] 2025-10-05 21:24:01,494 - root - INFO - step: 37265 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 21:24:01,495 - root - INFO - lr: 5.5275e-06 gnorm: 1.21 [22:49:49< 1:40:32] +[titan] 2025-10-05 21:24:12,333 - root - INFO - step: 37270 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 21:24:12,333 - root - INFO - lr: 5.5256e-06 gnorm: 1.24 [22:50:00< 1:40:21] +[titan] 2025-10-05 21:24:23,189 - root - INFO - step: 37275 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 21:24:23,189 - root - INFO - lr: 5.5237e-06 gnorm: 1.22 [22:50:11< 1:40:10] +[titan] 2025-10-05 21:24:34,040 - root - INFO - step: 37280 loss: 1.8747 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 21:24:34,041 - root - INFO - lr: 5.5218e-06 gnorm: 1.19 [22:50:22< 1:39:59] +[titan] 2025-10-05 21:24:44,965 - root - INFO - step: 37285 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6500 +[titan] 2025-10-05 21:24:44,965 - root - INFO - lr: 5.5199e-06 gnorm: 1.23 [22:50:33< 1:39:48] +[titan] 2025-10-05 21:24:55,829 - root - INFO - step: 37290 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6803 +[titan] 2025-10-05 21:24:55,829 - root - INFO - lr: 5.5180e-06 gnorm: 1.24 [22:50:44< 1:39:36] +[titan] 2025-10-05 21:25:06,686 - root - INFO - step: 37295 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 21:25:06,686 - root - INFO - lr: 5.5161e-06 gnorm: 1.23 [22:50:55< 1:39:25] +[titan] 2025-10-05 21:25:15,356 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:25:17,530 - root - INFO - step: 37300 loss: 1.9230 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 21:25:17,530 - root - INFO - lr: 5.5142e-06 gnorm: 1.29 [22:51:05< 1:39:14] +[titan] 2025-10-05 21:25:28,378 - root - INFO - step: 37305 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 21:25:28,378 - root - INFO - lr: 5.5123e-06 gnorm: 1.28 [22:51:16< 1:39:03] +[titan] 2025-10-05 21:25:39,211 - root - INFO - step: 37310 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 21:25:39,211 - root - INFO - lr: 5.5104e-06 gnorm: 1.28 [22:51:27< 1:38:52] +[titan] 2025-10-05 21:25:50,153 - root - INFO - step: 37315 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 21:25:50,153 - root - INFO - lr: 5.5085e-06 gnorm: 1.24 [22:51:38< 1:38:41] +[titan] 2025-10-05 21:26:01,007 - root - INFO - step: 37320 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 21:26:01,007 - root - INFO - lr: 5.5066e-06 gnorm: 1.22 [22:51:49< 1:38:30] +[titan] 2025-10-05 21:26:11,849 - root - INFO - step: 37325 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7401 +[titan] 2025-10-05 21:26:11,849 - root - INFO - lr: 5.5047e-06 gnorm: 1.28 [22:52:00< 1:38:19] +[titan] 2025-10-05 21:26:22,692 - root - INFO - step: 37330 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 21:26:22,692 - root - INFO - lr: 5.5028e-06 gnorm: 1.20 [22:52:11< 1:38:08] +[titan] 2025-10-05 21:26:33,566 - root - INFO - step: 37335 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 21:26:33,566 - root - INFO - lr: 5.5010e-06 gnorm: 1.21 [22:52:21< 1:37:57] +[titan] 2025-10-05 21:26:44,447 - root - INFO - step: 37340 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 21:26:44,447 - root - INFO - lr: 5.4991e-06 gnorm: 1.30 [22:52:32< 1:37:46] +[titan] 2025-10-05 21:26:55,353 - root - INFO - step: 37345 loss: 1.8670 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6522 +[titan] 2025-10-05 21:26:55,354 - root - INFO - lr: 5.4972e-06 gnorm: 1.19 [22:52:43< 1:37:35] +[titan] 2025-10-05 21:27:04,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:27:06,210 - root - INFO - step: 37350 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 21:27:06,210 - root - INFO - lr: 5.4954e-06 gnorm: 1.23 [22:52:54< 1:37:24] +[titan] 2025-10-05 21:27:17,097 - root - INFO - step: 37355 loss: 1.8844 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 21:27:17,097 - root - INFO - lr: 5.4935e-06 gnorm: 1.22 [22:53:05< 1:37:13] +[titan] 2025-10-05 21:27:27,968 - root - INFO - step: 37360 loss: 1.8981 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:27:27,968 - root - INFO - lr: 5.4917e-06 gnorm: 1.24 [22:53:16< 1:37:02] +[titan] 2025-10-05 21:27:38,788 - root - INFO - step: 37365 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 21:27:38,788 - root - INFO - lr: 5.4898e-06 gnorm: 1.22 [22:53:27< 1:36:51] +[titan] 2025-10-05 21:27:49,689 - root - INFO - step: 37370 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 21:27:49,689 - root - INFO - lr: 5.4880e-06 gnorm: 1.26 [22:53:38< 1:36:40] +[titan] 2025-10-05 21:28:00,629 - root - INFO - step: 37375 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 21:28:00,630 - root - INFO - lr: 5.4861e-06 gnorm: 1.28 [22:53:48< 1:36:29] +[titan] 2025-10-05 21:28:02,992 - root - INFO - Dumping profiler traces at step 37376 +[titan] 2025-10-05 21:28:03,029 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:28:11,751 - root - INFO - step: 37380 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 29,465 tflops: 408.78 mfu: 41.33% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 21:28:11,751 - root - INFO - lr: 5.4843e-06 gnorm: 1.26 [22:54:00< 1:36:18] +[titan] 2025-10-05 21:28:22,605 - root - INFO - step: 37385 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 21:28:22,605 - root - INFO - lr: 5.4824e-06 gnorm: 1.23 [22:54:10< 1:36:07] +[titan] 2025-10-05 21:28:33,443 - root - INFO - step: 37390 loss: 1.8450 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6318 +[titan] 2025-10-05 21:28:33,443 - root - INFO - lr: 5.4806e-06 gnorm: 1.24 [22:54:21< 1:35:56] +[titan] 2025-10-05 21:28:44,325 - root - INFO - step: 37395 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:28:44,326 - root - INFO - lr: 5.4788e-06 gnorm: 1.22 [22:54:32< 1:35:45] +[titan] 2025-10-05 21:28:52,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:28:55,171 - root - INFO - step: 37400 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:28:55,171 - root - INFO - lr: 5.4769e-06 gnorm: 1.21 [22:54:43< 1:35:34] +[titan] 2025-10-05 21:29:06,005 - root - INFO - step: 37405 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 21:29:06,006 - root - INFO - lr: 5.4751e-06 gnorm: 1.21 [22:54:54< 1:35:23] +[titan] 2025-10-05 21:29:16,874 - root - INFO - step: 37410 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 21:29:16,874 - root - INFO - lr: 5.4733e-06 gnorm: 1.21 [22:55:05< 1:35:12] +[titan] 2025-10-05 21:29:27,686 - root - INFO - step: 37415 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 21:29:27,686 - root - INFO - lr: 5.4715e-06 gnorm: 1.19 [22:55:16< 1:35:01] +[titan] 2025-10-05 21:29:38,526 - root - INFO - step: 37420 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:29:38,526 - root - INFO - lr: 5.4696e-06 gnorm: 1.22 [22:55:26< 1:34:49] +[titan] 2025-10-05 21:29:49,408 - root - INFO - step: 37425 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 21:29:49,408 - root - INFO - lr: 5.4678e-06 gnorm: 1.25 [22:55:37< 1:34:38] +[titan] 2025-10-05 21:30:00,250 - root - INFO - step: 37430 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 21:30:00,250 - root - INFO - lr: 5.4660e-06 gnorm: 1.23 [22:55:48< 1:34:27] +[titan] 2025-10-05 21:30:11,084 - root - INFO - step: 37435 loss: 1.9022 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:30:11,084 - root - INFO - lr: 5.4642e-06 gnorm: 1.27 [22:55:59< 1:34:16] +[titan] 2025-10-05 21:30:21,909 - root - INFO - step: 37440 loss: 1.9502 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:30:21,909 - root - INFO - lr: 5.4624e-06 gnorm: 1.24 [22:56:10< 1:34:05] +[titan] 2025-10-05 21:30:32,791 - root - INFO - step: 37445 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7483 +[titan] 2025-10-05 21:30:32,792 - root - INFO - lr: 5.4606e-06 gnorm: 1.30 [22:56:21< 1:33:54] +[titan] 2025-10-05 21:30:41,432 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:30:43,606 - root - INFO - step: 37450 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 21:30:43,606 - root - INFO - lr: 5.4588e-06 gnorm: 1.25 [22:56:31< 1:33:43] +[titan] 2025-10-05 21:30:54,447 - root - INFO - step: 37455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 21:30:54,447 - root - INFO - lr: 5.4570e-06 gnorm: 1.27 [22:56:42< 1:33:32] +[titan] 2025-10-05 21:31:05,288 - root - INFO - step: 37460 loss: 1.8916 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:31:05,288 - root - INFO - lr: 5.4552e-06 gnorm: 1.22 [22:56:53< 1:33:21] +[titan] 2025-10-05 21:31:16,146 - root - INFO - step: 37465 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 21:31:16,146 - root - INFO - lr: 5.4535e-06 gnorm: 1.26 [22:57:04< 1:33:10] +[titan] 2025-10-05 21:31:26,988 - root - INFO - step: 37470 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 21:31:26,988 - root - INFO - lr: 5.4517e-06 gnorm: 1.26 [22:57:15< 1:32:59] +[titan] 2025-10-05 21:31:37,863 - root - INFO - step: 37475 loss: 1.8457 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2133 global_avg_mtp_loss: 1.6324 +[titan] 2025-10-05 21:31:37,863 - root - INFO - lr: 5.4499e-06 gnorm: 1.20 [22:57:26< 1:32:48] +[titan] 2025-10-05 21:31:48,716 - root - INFO - step: 37480 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6511 +[titan] 2025-10-05 21:31:48,716 - root - INFO - lr: 5.4481e-06 gnorm: 1.22 [22:57:37< 1:32:37] +[titan] 2025-10-05 21:31:59,576 - root - INFO - step: 37485 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6883 +[titan] 2025-10-05 21:31:59,577 - root - INFO - lr: 5.4463e-06 gnorm: 1.26 [22:57:47< 1:32:26] +[titan] 2025-10-05 21:32:10,434 - root - INFO - step: 37490 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 21:32:10,434 - root - INFO - lr: 5.4446e-06 gnorm: 1.24 [22:57:58< 1:32:15] +[titan] 2025-10-05 21:32:21,290 - root - INFO - step: 37495 loss: 1.9993 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 21:32:21,290 - root - INFO - lr: 5.4428e-06 gnorm: 1.24 [22:58:09< 1:32:04] +[titan] 2025-10-05 21:32:29,977 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:32:32,151 - root - INFO - step: 37500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 21:32:32,151 - root - INFO - lr: 5.4411e-06 gnorm: 1.29 [22:58:20< 1:31:53] +[titan] 2025-10-05 21:32:43,013 - root - INFO - step: 37505 loss: 1.8923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:32:43,014 - root - INFO - lr: 5.4393e-06 gnorm: 1.21 [22:58:31< 1:31:42] +[titan] 2025-10-05 21:32:53,853 - root - INFO - step: 37510 loss: 1.9490 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7237 +[titan] 2025-10-05 21:32:53,854 - root - INFO - lr: 5.4375e-06 gnorm: 1.21 [22:58:42< 1:31:31] +[titan] 2025-10-05 21:33:04,724 - root - INFO - step: 37515 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 21:33:04,724 - root - INFO - lr: 5.4358e-06 gnorm: 1.24 [22:58:53< 1:31:20] +[titan] 2025-10-05 21:33:15,605 - root - INFO - step: 37520 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:33:15,605 - root - INFO - lr: 5.4341e-06 gnorm: 1.22 [22:59:03< 1:31:09] +[titan] 2025-10-05 21:33:26,465 - root - INFO - step: 37525 loss: 1.8732 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6563 +[titan] 2025-10-05 21:33:26,465 - root - INFO - lr: 5.4323e-06 gnorm: 1.23 [22:59:14< 1:30:58] +[titan] 2025-10-05 21:33:37,315 - root - INFO - step: 37530 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 21:33:37,315 - root - INFO - lr: 5.4306e-06 gnorm: 1.23 [22:59:25< 1:30:47] +[titan] 2025-10-05 21:33:48,179 - root - INFO - step: 37535 loss: 1.8524 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6376 +[titan] 2025-10-05 21:33:48,179 - root - INFO - lr: 5.4288e-06 gnorm: 1.25 [22:59:36< 1:30:36] +[titan] 2025-10-05 21:33:59,032 - root - INFO - step: 37540 loss: 1.8890 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 21:33:59,032 - root - INFO - lr: 5.4271e-06 gnorm: 1.22 [22:59:47< 1:30:25] +[titan] 2025-10-05 21:34:09,894 - root - INFO - step: 37545 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:34:09,894 - root - INFO - lr: 5.4254e-06 gnorm: 1.24 [22:59:58< 1:30:14] +[titan] 2025-10-05 21:34:18,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:34:20,750 - root - INFO - step: 37550 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6861 +[titan] 2025-10-05 21:34:20,750 - root - INFO - lr: 5.4236e-06 gnorm: 1.24 [23:00:09< 1:30:02] +[titan] 2025-10-05 21:34:31,630 - root - INFO - step: 37555 loss: 1.9520 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 21:34:31,630 - root - INFO - lr: 5.4219e-06 gnorm: 1.21 [23:00:19< 1:29:51] +[titan] 2025-10-05 21:34:42,476 - root - INFO - step: 37560 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7291 +[titan] 2025-10-05 21:34:42,476 - root - INFO - lr: 5.4202e-06 gnorm: 1.23 [23:00:30< 1:29:40] +[titan] 2025-10-05 21:34:53,333 - root - INFO - step: 37565 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 21:34:53,333 - root - INFO - lr: 5.4185e-06 gnorm: 1.26 [23:00:41< 1:29:29] +[titan] 2025-10-05 21:35:04,184 - root - INFO - step: 37570 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 21:35:04,184 - root - INFO - lr: 5.4168e-06 gnorm: 1.30 [23:00:52< 1:29:18] +[titan] 2025-10-05 21:35:15,037 - root - INFO - step: 37575 loss: 1.8778 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6614 +[titan] 2025-10-05 21:35:15,037 - root - INFO - lr: 5.4151e-06 gnorm: 1.21 [23:01:03< 1:29:07] +[titan] 2025-10-05 21:35:25,912 - root - INFO - step: 37580 loss: 1.8864 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6681 +[titan] 2025-10-05 21:35:25,913 - root - INFO - lr: 5.4134e-06 gnorm: 1.23 [23:01:14< 1:28:56] +[titan] 2025-10-05 21:35:36,806 - root - INFO - step: 37585 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 21:35:36,807 - root - INFO - lr: 5.4117e-06 gnorm: 1.25 [23:01:25< 1:28:45] +[titan] 2025-10-05 21:35:47,715 - root - INFO - step: 37590 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 21:35:47,716 - root - INFO - lr: 5.4100e-06 gnorm: 1.22 [23:01:36< 1:28:34] +[titan] 2025-10-05 21:35:58,598 - root - INFO - step: 37595 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:35:58,598 - root - INFO - lr: 5.4083e-06 gnorm: 1.20 [23:01:46< 1:28:23] +[titan] 2025-10-05 21:36:07,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:36:09,461 - root - INFO - step: 37600 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7104 +[titan] 2025-10-05 21:36:09,461 - root - INFO - lr: 5.4066e-06 gnorm: 1.22 [23:01:57< 1:28:12] +[titan] 2025-10-05 21:36:20,345 - root - INFO - step: 37605 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 21:36:20,345 - root - INFO - lr: 5.4049e-06 gnorm: 1.28 [23:02:08< 1:28:01] +[titan] 2025-10-05 21:36:31,206 - root - INFO - step: 37610 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 21:36:31,206 - root - INFO - lr: 5.4032e-06 gnorm: 1.20 [23:02:19< 1:27:50] +[titan] 2025-10-05 21:36:42,084 - root - INFO - step: 37615 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:36:42,084 - root - INFO - lr: 5.4015e-06 gnorm: 1.28 [23:02:30< 1:27:39] +[titan] 2025-10-05 21:36:52,956 - root - INFO - step: 37620 loss: 2.0281 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 21:36:52,956 - root - INFO - lr: 5.3999e-06 gnorm: 1.25 [23:02:41< 1:27:28] +[titan] 2025-10-05 21:37:03,800 - root - INFO - step: 37625 loss: 1.8956 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6758 +[titan] 2025-10-05 21:37:03,800 - root - INFO - lr: 5.3982e-06 gnorm: 1.23 [23:02:52< 1:27:17] +[titan] 2025-10-05 21:37:14,649 - root - INFO - step: 37630 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 21:37:14,649 - root - INFO - lr: 5.3965e-06 gnorm: 1.33 [23:03:02< 1:27:06] +[titan] 2025-10-05 21:37:25,497 - root - INFO - step: 37635 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 21:37:25,497 - root - INFO - lr: 5.3948e-06 gnorm: 1.24 [23:03:13< 1:26:55] +[titan] 2025-10-05 21:37:36,353 - root - INFO - step: 37640 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 21:37:36,353 - root - INFO - lr: 5.3932e-06 gnorm: 1.22 [23:03:24< 1:26:44] +[titan] 2025-10-05 21:37:47,208 - root - INFO - step: 37645 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7232 +[titan] 2025-10-05 21:37:47,208 - root - INFO - lr: 5.3915e-06 gnorm: 1.27 [23:03:35< 1:26:33] +[titan] 2025-10-05 21:37:55,906 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:37:58,091 - root - INFO - step: 37650 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6691 +[titan] 2025-10-05 21:37:58,091 - root - INFO - lr: 5.3899e-06 gnorm: 1.23 [23:03:46< 1:26:22] +[titan] 2025-10-05 21:38:08,977 - root - INFO - step: 37655 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 21:38:08,977 - root - INFO - lr: 5.3882e-06 gnorm: 1.28 [23:03:57< 1:26:11] +[titan] 2025-10-05 21:38:19,857 - root - INFO - step: 37660 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 21:38:19,857 - root - INFO - lr: 5.3866e-06 gnorm: 1.29 [23:04:08< 1:26:00] +[titan] 2025-10-05 21:38:30,712 - root - INFO - step: 37665 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6720 +[titan] 2025-10-05 21:38:30,712 - root - INFO - lr: 5.3849e-06 gnorm: 1.23 [23:04:19< 1:25:49] +[titan] 2025-10-05 21:38:41,564 - root - INFO - step: 37670 loss: 1.8372 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6256 +[titan] 2025-10-05 21:38:41,565 - root - INFO - lr: 5.3833e-06 gnorm: 1.21 [23:04:29< 1:25:38] +[titan] 2025-10-05 21:38:52,429 - root - INFO - step: 37675 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 21:38:52,429 - root - INFO - lr: 5.3816e-06 gnorm: 1.26 [23:04:40< 1:25:27] +[titan] 2025-10-05 21:39:03,314 - root - INFO - step: 37680 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 21:39:03,314 - root - INFO - lr: 5.3800e-06 gnorm: 1.28 [23:04:51< 1:25:16] +[titan] 2025-10-05 21:39:14,212 - root - INFO - step: 37685 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 21:39:14,212 - root - INFO - lr: 5.3784e-06 gnorm: 1.22 [23:05:02< 1:25:05] +[titan] 2025-10-05 21:39:25,089 - root - INFO - step: 37690 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 21:39:25,089 - root - INFO - lr: 5.3767e-06 gnorm: 1.24 [23:05:13< 1:24:53] +[titan] 2025-10-05 21:39:35,965 - root - INFO - step: 37695 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 21:39:35,965 - root - INFO - lr: 5.3751e-06 gnorm: 1.26 [23:05:24< 1:24:42] +[titan] 2025-10-05 21:39:44,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:39:46,832 - root - INFO - step: 37700 loss: 1.8803 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:39:46,832 - root - INFO - lr: 5.3735e-06 gnorm: 1.24 [23:05:35< 1:24:31] +[titan] 2025-10-05 21:39:57,708 - root - INFO - step: 37705 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:39:57,708 - root - INFO - lr: 5.3719e-06 gnorm: 1.24 [23:05:46< 1:24:20] +[titan] 2025-10-05 21:40:08,584 - root - INFO - step: 37710 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6486 +[titan] 2025-10-05 21:40:08,584 - root - INFO - lr: 5.3703e-06 gnorm: 1.23 [23:05:56< 1:24:09] +[titan] 2025-10-05 21:40:19,491 - root - INFO - step: 37715 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 21:40:19,491 - root - INFO - lr: 5.3687e-06 gnorm: 1.24 [23:06:07< 1:23:58] +[titan] 2025-10-05 21:40:30,374 - root - INFO - step: 37720 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 21:40:30,375 - root - INFO - lr: 5.3671e-06 gnorm: 1.21 [23:06:18< 1:23:47] +[titan] 2025-10-05 21:40:41,250 - root - INFO - step: 37725 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 21:40:41,250 - root - INFO - lr: 5.3654e-06 gnorm: 1.21 [23:06:29< 1:23:36] +[titan] 2025-10-05 21:40:52,074 - root - INFO - step: 37730 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 21:40:52,075 - root - INFO - lr: 5.3638e-06 gnorm: 1.23 [23:06:40< 1:23:25] +[titan] 2025-10-05 21:41:02,927 - root - INFO - step: 37735 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 21:41:02,928 - root - INFO - lr: 5.3622e-06 gnorm: 1.24 [23:06:51< 1:23:14] +[titan] 2025-10-05 21:41:13,783 - root - INFO - step: 37740 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 21:41:13,783 - root - INFO - lr: 5.3607e-06 gnorm: 1.24 [23:07:02< 1:23:03] +[titan] 2025-10-05 21:41:24,647 - root - INFO - step: 37745 loss: 1.8905 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 21:41:24,647 - root - INFO - lr: 5.3591e-06 gnorm: 1.24 [23:07:12< 1:22:52] +[titan] 2025-10-05 21:41:33,361 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:41:35,546 - root - INFO - step: 37750 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 21:41:35,546 - root - INFO - lr: 5.3575e-06 gnorm: 1.24 [23:07:23< 1:22:41] +[titan] 2025-10-05 21:41:46,407 - root - INFO - step: 37755 loss: 1.8127 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6029 +[titan] 2025-10-05 21:41:46,407 - root - INFO - lr: 5.3559e-06 gnorm: 1.23 [23:07:34< 1:22:30] +[titan] 2025-10-05 21:41:57,261 - root - INFO - step: 37760 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 21:41:57,261 - root - INFO - lr: 5.3543e-06 gnorm: 1.27 [23:07:45< 1:22:19] +[titan] 2025-10-05 21:42:08,104 - root - INFO - step: 37765 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6996 +[titan] 2025-10-05 21:42:08,104 - root - INFO - lr: 5.3527e-06 gnorm: 1.22 [23:07:56< 1:22:08] +[titan] 2025-10-05 21:42:18,953 - root - INFO - step: 37770 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:42:18,954 - root - INFO - lr: 5.3512e-06 gnorm: 1.28 [23:08:07< 1:21:57] +[titan] 2025-10-05 21:42:29,811 - root - INFO - step: 37775 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 21:42:29,811 - root - INFO - lr: 5.3496e-06 gnorm: 1.30 [23:08:18< 1:21:46] +[titan] 2025-10-05 21:42:40,701 - root - INFO - step: 37780 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 21:42:40,701 - root - INFO - lr: 5.3480e-06 gnorm: 1.24 [23:08:29< 1:21:35] +[titan] 2025-10-05 21:42:51,568 - root - INFO - step: 37785 loss: 1.8503 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 21:42:51,568 - root - INFO - lr: 5.3465e-06 gnorm: 1.25 [23:08:39< 1:21:24] +[titan] 2025-10-05 21:43:02,441 - root - INFO - step: 37790 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 21:43:02,441 - root - INFO - lr: 5.3449e-06 gnorm: 1.29 [23:08:50< 1:21:13] +[titan] 2025-10-05 21:43:13,297 - root - INFO - step: 37795 loss: 1.9468 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7219 +[titan] 2025-10-05 21:43:13,297 - root - INFO - lr: 5.3434e-06 gnorm: 1.25 [23:09:01< 1:21:02] +[titan] 2025-10-05 21:43:21,968 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:43:24,171 - root - INFO - step: 37800 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 21:43:24,171 - root - INFO - lr: 5.3418e-06 gnorm: 1.23 [23:09:12< 1:20:51] +[titan] 2025-10-05 21:43:35,037 - root - INFO - step: 37805 loss: 1.9248 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 21:43:35,037 - root - INFO - lr: 5.3403e-06 gnorm: 1.25 [23:09:23< 1:20:40] +[titan] 2025-10-05 21:43:45,919 - root - INFO - step: 37810 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:43:45,919 - root - INFO - lr: 5.3387e-06 gnorm: 1.21 [23:09:34< 1:20:29] +[titan] 2025-10-05 21:43:56,805 - root - INFO - step: 37815 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 21:43:56,805 - root - INFO - lr: 5.3372e-06 gnorm: 1.27 [23:09:45< 1:20:18] +[titan] 2025-10-05 21:44:07,687 - root - INFO - step: 37820 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 21:44:07,687 - root - INFO - lr: 5.3356e-06 gnorm: 1.30 [23:09:55< 1:20:07] +[titan] 2025-10-05 21:44:18,545 - root - INFO - step: 37825 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:44:18,545 - root - INFO - lr: 5.3341e-06 gnorm: 1.27 [23:10:06< 1:19:56] +[titan] 2025-10-05 21:44:29,413 - root - INFO - step: 37830 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 21:44:29,413 - root - INFO - lr: 5.3326e-06 gnorm: 1.21 [23:10:17< 1:19:44] +[titan] 2025-10-05 21:44:40,283 - root - INFO - step: 37835 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 21:44:40,283 - root - INFO - lr: 5.3310e-06 gnorm: 1.22 [23:10:28< 1:19:33] +[titan] 2025-10-05 21:44:51,148 - root - INFO - step: 37840 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 21:44:51,149 - root - INFO - lr: 5.3295e-06 gnorm: 1.21 [23:10:39< 1:19:22] +[titan] 2025-10-05 21:45:02,046 - root - INFO - step: 37845 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:45:02,047 - root - INFO - lr: 5.3280e-06 gnorm: 1.25 [23:10:50< 1:19:11] +[titan] 2025-10-05 21:45:10,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:45:12,905 - root - INFO - step: 37850 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:45:12,905 - root - INFO - lr: 5.3265e-06 gnorm: 1.23 [23:11:01< 1:19:00] +[titan] 2025-10-05 21:45:23,773 - root - INFO - step: 37855 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6640 +[titan] 2025-10-05 21:45:23,773 - root - INFO - lr: 5.3250e-06 gnorm: 1.22 [23:11:12< 1:18:49] +[titan] 2025-10-05 21:45:34,638 - root - INFO - step: 37860 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7081 +[titan] 2025-10-05 21:45:34,638 - root - INFO - lr: 5.3235e-06 gnorm: 1.24 [23:11:22< 1:18:38] +[titan] 2025-10-05 21:45:45,491 - root - INFO - step: 37865 loss: 1.9514 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7254 +[titan] 2025-10-05 21:45:45,491 - root - INFO - lr: 5.3220e-06 gnorm: 1.24 [23:11:33< 1:18:27] +[titan] 2025-10-05 21:45:56,352 - root - INFO - step: 37870 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6506 +[titan] 2025-10-05 21:45:56,353 - root - INFO - lr: 5.3205e-06 gnorm: 1.21 [23:11:44< 1:18:16] +[titan] 2025-10-05 21:46:07,270 - root - INFO - step: 37875 loss: 1.9195 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 21:46:07,271 - root - INFO - lr: 5.3190e-06 gnorm: 1.24 [23:11:55< 1:18:05] +[titan] 2025-10-05 21:46:18,130 - root - INFO - step: 37880 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 21:46:18,131 - root - INFO - lr: 5.3175e-06 gnorm: 1.26 [23:12:06< 1:17:54] +[titan] 2025-10-05 21:46:29,081 - root - INFO - step: 37885 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:46:29,082 - root - INFO - lr: 5.3160e-06 gnorm: 1.22 [23:12:17< 1:17:43] +[titan] 2025-10-05 21:46:35,788 - root - INFO - Dumping profiler traces at step 37888 +[titan] 2025-10-05 21:46:35,827 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:46:40,200 - root - INFO - step: 37890 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 29,473 tflops: 408.89 mfu: 41.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 21:46:40,200 - root - INFO - lr: 5.3145e-06 gnorm: 1.28 [23:12:28< 1:17:32] +[titan] 2025-10-05 21:46:51,073 - root - INFO - step: 37895 loss: 1.9689 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 21:46:51,073 - root - INFO - lr: 5.3130e-06 gnorm: 1.23 [23:12:39< 1:17:21] +[titan] 2025-10-05 21:46:59,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:47:01,983 - root - INFO - step: 37900 loss: 1.9609 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:47:01,983 - root - INFO - lr: 5.3115e-06 gnorm: 1.24 [23:12:50< 1:17:10] +[titan] 2025-10-05 21:47:12,859 - root - INFO - step: 37905 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7007 +[titan] 2025-10-05 21:47:12,859 - root - INFO - lr: 5.3100e-06 gnorm: 1.27 [23:13:01< 1:16:59] +[titan] 2025-10-05 21:47:23,757 - root - INFO - step: 37910 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 21:47:23,757 - root - INFO - lr: 5.3086e-06 gnorm: 1.26 [23:13:12< 1:16:48] +[titan] 2025-10-05 21:47:34,635 - root - INFO - step: 37915 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6659 +[titan] 2025-10-05 21:47:34,635 - root - INFO - lr: 5.3071e-06 gnorm: 1.24 [23:13:22< 1:16:37] +[titan] 2025-10-05 21:47:45,522 - root - INFO - step: 37920 loss: 1.8835 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:47:45,522 - root - INFO - lr: 5.3056e-06 gnorm: 1.20 [23:13:33< 1:16:26] +[titan] 2025-10-05 21:47:56,386 - root - INFO - step: 37925 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 21:47:56,386 - root - INFO - lr: 5.3042e-06 gnorm: 1.28 [23:13:44< 1:16:15] +[titan] 2025-10-05 21:48:07,399 - root - INFO - step: 37930 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,753 tflops: 412.78 mfu: 41.74% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 21:48:07,400 - root - INFO - lr: 5.3027e-06 gnorm: 1.23 [23:13:55< 1:16:04] +[titan] 2025-10-05 21:48:18,249 - root - INFO - step: 37935 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 21:48:18,249 - root - INFO - lr: 5.3012e-06 gnorm: 1.24 [23:14:06< 1:15:53] +[titan] 2025-10-05 21:48:29,154 - root - INFO - step: 37940 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 21:48:29,154 - root - INFO - lr: 5.2998e-06 gnorm: 1.26 [23:14:17< 1:15:42] +[titan] 2025-10-05 21:48:40,024 - root - INFO - step: 37945 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6992 +[titan] 2025-10-05 21:48:40,024 - root - INFO - lr: 5.2983e-06 gnorm: 1.29 [23:14:28< 1:15:31] +[titan] 2025-10-05 21:48:48,689 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:48:50,876 - root - INFO - step: 37950 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:48:50,876 - root - INFO - lr: 5.2969e-06 gnorm: 1.28 [23:14:39< 1:15:20] +[titan] 2025-10-05 21:49:01,777 - root - INFO - step: 37955 loss: 1.9146 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:49:01,777 - root - INFO - lr: 5.2954e-06 gnorm: 1.23 [23:14:50< 1:15:09] +[titan] 2025-10-05 21:49:12,633 - root - INFO - step: 37960 loss: 1.9032 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:49:12,633 - root - INFO - lr: 5.2940e-06 gnorm: 1.25 [23:15:00< 1:14:58] +[titan] 2025-10-05 21:49:23,498 - root - INFO - step: 37965 loss: 1.8874 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 21:49:23,498 - root - INFO - lr: 5.2926e-06 gnorm: 1.21 [23:15:11< 1:14:47] +[titan] 2025-10-05 21:49:34,372 - root - INFO - step: 37970 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 21:49:34,372 - root - INFO - lr: 5.2911e-06 gnorm: 1.25 [23:15:22< 1:14:36] +[titan] 2025-10-05 21:49:45,244 - root - INFO - step: 37975 loss: 1.9350 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 21:49:45,244 - root - INFO - lr: 5.2897e-06 gnorm: 1.25 [23:15:33< 1:14:25] +[titan] 2025-10-05 21:49:56,122 - root - INFO - step: 37980 loss: 2.0219 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7886 +[titan] 2025-10-05 21:49:56,122 - root - INFO - lr: 5.2883e-06 gnorm: 1.31 [23:15:44< 1:14:14] +[titan] 2025-10-05 21:50:07,019 - root - INFO - step: 37985 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:50:07,019 - root - INFO - lr: 5.2869e-06 gnorm: 1.24 [23:15:55< 1:14:02] +[titan] 2025-10-05 21:50:17,884 - root - INFO - step: 37990 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 21:50:17,884 - root - INFO - lr: 5.2854e-06 gnorm: 1.22 [23:16:06< 1:13:51] +[titan] 2025-10-05 21:50:28,745 - root - INFO - step: 37995 loss: 1.8863 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 21:50:28,745 - root - INFO - lr: 5.2840e-06 gnorm: 1.21 [23:16:17< 1:13:40] +[titan] 2025-10-05 21:50:37,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:50:39,603 - root - INFO - step: 38000 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:50:39,603 - root - INFO - lr: 5.2826e-06 gnorm: 1.24 [23:16:27< 1:13:29] +[titan] 2025-10-05 21:50:50,499 - root - INFO - step: 38005 loss: 1.9446 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:50:50,499 - root - INFO - lr: 5.2812e-06 gnorm: 1.24 [23:16:38< 1:13:18] +[titan] 2025-10-05 21:51:01,361 - root - INFO - step: 38010 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 21:51:01,361 - root - INFO - lr: 5.2798e-06 gnorm: 1.25 [23:16:49< 1:13:07] +[titan] 2025-10-05 21:51:12,250 - root - INFO - step: 38015 loss: 1.9035 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 21:51:12,250 - root - INFO - lr: 5.2784e-06 gnorm: 1.23 [23:17:00< 1:12:56] +[titan] 2025-10-05 21:51:23,111 - root - INFO - step: 38020 loss: 1.9570 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 21:51:23,111 - root - INFO - lr: 5.2770e-06 gnorm: 1.26 [23:17:11< 1:12:45] +[titan] 2025-10-05 21:51:33,966 - root - INFO - step: 38025 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:51:33,966 - root - INFO - lr: 5.2756e-06 gnorm: 1.24 [23:17:22< 1:12:34] +[titan] 2025-10-05 21:51:44,841 - root - INFO - step: 38030 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6303 +[titan] 2025-10-05 21:51:44,841 - root - INFO - lr: 5.2742e-06 gnorm: 1.22 [23:17:33< 1:12:23] +[titan] 2025-10-05 21:51:55,747 - root - INFO - step: 38035 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 21:51:55,747 - root - INFO - lr: 5.2728e-06 gnorm: 1.23 [23:17:44< 1:12:12] +[titan] 2025-10-05 21:52:06,666 - root - INFO - step: 38040 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:52:06,666 - root - INFO - lr: 5.2714e-06 gnorm: 1.25 [23:17:54< 1:12:01] +[titan] 2025-10-05 21:52:17,555 - root - INFO - step: 38045 loss: 1.8640 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6482 +[titan] 2025-10-05 21:52:17,555 - root - INFO - lr: 5.2701e-06 gnorm: 1.25 [23:18:05< 1:11:50] +[titan] 2025-10-05 21:52:26,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:52:28,442 - root - INFO - step: 38050 loss: 1.8572 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6424 +[titan] 2025-10-05 21:52:28,442 - root - INFO - lr: 5.2687e-06 gnorm: 1.21 [23:18:16< 1:11:39] +[titan] 2025-10-05 21:52:39,324 - root - INFO - step: 38055 loss: 1.9652 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 21:52:39,325 - root - INFO - lr: 5.2673e-06 gnorm: 1.23 [23:18:27< 1:11:28] +[titan] 2025-10-05 21:52:50,189 - root - INFO - step: 38060 loss: 1.9568 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 21:52:50,189 - root - INFO - lr: 5.2659e-06 gnorm: 1.26 [23:18:38< 1:11:17] +[titan] 2025-10-05 21:53:01,061 - root - INFO - step: 38065 loss: 1.8871 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 21:53:01,061 - root - INFO - lr: 5.2646e-06 gnorm: 1.22 [23:18:49< 1:11:06] +[titan] 2025-10-05 21:53:12,017 - root - INFO - step: 38070 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 21:53:12,018 - root - INFO - lr: 5.2632e-06 gnorm: 1.21 [23:19:00< 1:10:55] +[titan] 2025-10-05 21:53:22,903 - root - INFO - step: 38075 loss: 1.8578 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6438 +[titan] 2025-10-05 21:53:22,903 - root - INFO - lr: 5.2619e-06 gnorm: 1.21 [23:19:11< 1:10:44] +[titan] 2025-10-05 21:53:33,778 - root - INFO - step: 38080 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:53:33,779 - root - INFO - lr: 5.2605e-06 gnorm: 1.25 [23:19:22< 1:10:33] +[titan] 2025-10-05 21:53:44,628 - root - INFO - step: 38085 loss: 1.9527 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:53:44,628 - root - INFO - lr: 5.2591e-06 gnorm: 1.23 [23:19:32< 1:10:22] +[titan] 2025-10-05 21:53:55,480 - root - INFO - step: 38090 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6835 +[titan] 2025-10-05 21:53:55,480 - root - INFO - lr: 5.2578e-06 gnorm: 1.21 [23:19:43< 1:10:11] +[titan] 2025-10-05 21:54:06,381 - root - INFO - step: 38095 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:54:06,381 - root - INFO - lr: 5.2565e-06 gnorm: 1.32 [23:19:54< 1:10:00] +[titan] 2025-10-05 21:54:15,086 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:54:17,264 - root - INFO - step: 38100 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:54:17,264 - root - INFO - lr: 5.2551e-06 gnorm: 1.21 [23:20:05< 1:09:49] +[titan] 2025-10-05 21:54:28,122 - root - INFO - step: 38105 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:54:28,123 - root - INFO - lr: 5.2538e-06 gnorm: 1.25 [23:20:16< 1:09:38] +[titan] 2025-10-05 21:54:38,982 - root - INFO - step: 38110 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:54:38,982 - root - INFO - lr: 5.2524e-06 gnorm: 1.30 [23:20:27< 1:09:27] +[titan] 2025-10-05 21:54:49,840 - root - INFO - step: 38115 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 21:54:49,840 - root - INFO - lr: 5.2511e-06 gnorm: 1.27 [23:20:38< 1:09:16] +[titan] 2025-10-05 21:55:00,693 - root - INFO - step: 38120 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:55:00,694 - root - INFO - lr: 5.2498e-06 gnorm: 1.22 [23:20:48< 1:09:05] +[titan] 2025-10-05 21:55:11,613 - root - INFO - step: 38125 loss: 1.8922 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:55:11,613 - root - INFO - lr: 5.2485e-06 gnorm: 1.22 [23:20:59< 1:08:54] +[titan] 2025-10-05 21:55:22,478 - root - INFO - step: 38130 loss: 1.8761 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6600 +[titan] 2025-10-05 21:55:22,478 - root - INFO - lr: 5.2471e-06 gnorm: 1.23 [23:21:10< 1:08:43] +[titan] 2025-10-05 21:55:33,363 - root - INFO - step: 38135 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7568 +[titan] 2025-10-05 21:55:33,364 - root - INFO - lr: 5.2458e-06 gnorm: 1.25 [23:21:21< 1:08:32] +[titan] 2025-10-05 21:55:44,229 - root - INFO - step: 38140 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 21:55:44,229 - root - INFO - lr: 5.2445e-06 gnorm: 1.25 [23:21:32< 1:08:20] +[titan] 2025-10-05 21:55:55,104 - root - INFO - step: 38145 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6732 +[titan] 2025-10-05 21:55:55,104 - root - INFO - lr: 5.2432e-06 gnorm: 1.23 [23:21:43< 1:08:09] +[titan] 2025-10-05 21:56:03,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:56:05,959 - root - INFO - step: 38150 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6637 +[titan] 2025-10-05 21:56:05,960 - root - INFO - lr: 5.2419e-06 gnorm: 1.28 [23:21:54< 1:07:58] +[titan] 2025-10-05 21:56:16,858 - root - INFO - step: 38155 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6893 +[titan] 2025-10-05 21:56:16,858 - root - INFO - lr: 5.2406e-06 gnorm: 1.23 [23:22:05< 1:07:47] +[titan] 2025-10-05 21:56:27,718 - root - INFO - step: 38160 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 21:56:27,719 - root - INFO - lr: 5.2393e-06 gnorm: 1.25 [23:22:15< 1:07:36] +[titan] 2025-10-05 21:56:38,596 - root - INFO - step: 38165 loss: 1.8754 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6582 +[titan] 2025-10-05 21:56:38,597 - root - INFO - lr: 5.2380e-06 gnorm: 1.20 [23:22:26< 1:07:25] +[titan] 2025-10-05 21:56:49,479 - root - INFO - step: 38170 loss: 1.9310 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 21:56:49,480 - root - INFO - lr: 5.2367e-06 gnorm: 1.22 [23:22:37< 1:07:14] +[titan] 2025-10-05 21:57:00,354 - root - INFO - step: 38175 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:57:00,354 - root - INFO - lr: 5.2354e-06 gnorm: 1.27 [23:22:48< 1:07:03] +[titan] 2025-10-05 21:57:11,246 - root - INFO - step: 38180 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 21:57:11,247 - root - INFO - lr: 5.2341e-06 gnorm: 1.25 [23:22:59< 1:06:52] +[titan] 2025-10-05 21:57:22,096 - root - INFO - step: 38185 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7581 +[titan] 2025-10-05 21:57:22,096 - root - INFO - lr: 5.2328e-06 gnorm: 1.27 [23:23:10< 1:06:41] +[titan] 2025-10-05 21:57:32,943 - root - INFO - step: 38190 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:57:32,943 - root - INFO - lr: 5.2316e-06 gnorm: 1.26 [23:23:21< 1:06:30] +[titan] 2025-10-05 21:57:43,812 - root - INFO - step: 38195 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 21:57:43,812 - root - INFO - lr: 5.2303e-06 gnorm: 1.23 [23:23:32< 1:06:19] +[titan] 2025-10-05 21:57:52,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:57:54,668 - root - INFO - step: 38200 loss: 1.9598 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7333 +[titan] 2025-10-05 21:57:54,668 - root - INFO - lr: 5.2290e-06 gnorm: 1.24 [23:23:42< 1:06:08] +[titan] 2025-10-05 21:58:05,542 - root - INFO - step: 38205 loss: 1.8481 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 21:58:05,543 - root - INFO - lr: 5.2277e-06 gnorm: 1.26 [23:23:53< 1:05:57] +[titan] 2025-10-05 21:58:16,438 - root - INFO - step: 38210 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7477 +[titan] 2025-10-05 21:58:16,438 - root - INFO - lr: 5.2265e-06 gnorm: 1.28 [23:24:04< 1:05:46] +[titan] 2025-10-05 21:58:27,285 - root - INFO - step: 38215 loss: 1.9355 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 21:58:27,285 - root - INFO - lr: 5.2252e-06 gnorm: 1.22 [23:24:15< 1:05:35] +[titan] 2025-10-05 21:58:38,133 - root - INFO - step: 38220 loss: 1.8546 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6405 +[titan] 2025-10-05 21:58:38,133 - root - INFO - lr: 5.2240e-06 gnorm: 1.23 [23:24:26< 1:05:24] +[titan] 2025-10-05 21:58:48,997 - root - INFO - step: 38225 loss: 1.8842 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6664 +[titan] 2025-10-05 21:58:48,997 - root - INFO - lr: 5.2227e-06 gnorm: 1.21 [23:24:37< 1:05:13] +[titan] 2025-10-05 21:58:59,888 - root - INFO - step: 38230 loss: 1.9848 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 21:58:59,888 - root - INFO - lr: 5.2214e-06 gnorm: 1.24 [23:24:48< 1:05:02] +[titan] 2025-10-05 21:59:10,888 - root - INFO - step: 38235 loss: 1.8777 memory: 118.84GiB(85.28%) tps: 29,791 tflops: 413.31 mfu: 41.79% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 21:59:10,888 - root - INFO - lr: 5.2202e-06 gnorm: 1.21 [23:24:59< 1:04:51] +[titan] 2025-10-05 21:59:21,732 - root - INFO - step: 38240 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6791 +[titan] 2025-10-05 21:59:21,732 - root - INFO - lr: 5.2190e-06 gnorm: 1.22 [23:25:09< 1:04:40] +[titan] 2025-10-05 21:59:32,592 - root - INFO - step: 38245 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 21:59:32,592 - root - INFO - lr: 5.2177e-06 gnorm: 1.26 [23:25:20< 1:04:29] +[titan] 2025-10-05 21:59:41,259 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:59:43,442 - root - INFO - step: 38250 loss: 1.8699 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6533 +[titan] 2025-10-05 21:59:43,442 - root - INFO - lr: 5.2165e-06 gnorm: 1.23 [23:25:31< 1:04:18] +[titan] 2025-10-05 21:59:54,302 - root - INFO - step: 38255 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:59:54,303 - root - INFO - lr: 5.2152e-06 gnorm: 1.28 [23:25:42< 1:04:07] +[titan] 2025-10-05 22:00:05,203 - root - INFO - step: 38260 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 22:00:05,203 - root - INFO - lr: 5.2140e-06 gnorm: 1.26 [23:25:53< 1:03:56] +[titan] 2025-10-05 22:00:16,075 - root - INFO - step: 38265 loss: 1.8744 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6580 +[titan] 2025-10-05 22:00:16,076 - root - INFO - lr: 5.2128e-06 gnorm: 1.25 [23:26:04< 1:03:45] +[titan] 2025-10-05 22:00:26,953 - root - INFO - step: 38270 loss: 1.9090 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:00:26,953 - root - INFO - lr: 5.2116e-06 gnorm: 1.26 [23:26:15< 1:03:34] +[titan] 2025-10-05 22:00:37,822 - root - INFO - step: 38275 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6897 +[titan] 2025-10-05 22:00:37,823 - root - INFO - lr: 5.2103e-06 gnorm: 1.27 [23:26:26< 1:03:23] +[titan] 2025-10-05 22:00:48,688 - root - INFO - step: 38280 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 22:00:48,688 - root - INFO - lr: 5.2091e-06 gnorm: 1.25 [23:26:36< 1:03:12] +[titan] 2025-10-05 22:00:59,558 - root - INFO - step: 38285 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 22:00:59,559 - root - INFO - lr: 5.2079e-06 gnorm: 1.24 [23:26:47< 1:03:01] +[titan] 2025-10-05 22:01:10,423 - root - INFO - step: 38290 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 22:01:10,423 - root - INFO - lr: 5.2067e-06 gnorm: 1.23 [23:26:58< 1:02:50] +[titan] 2025-10-05 22:01:21,364 - root - INFO - step: 38295 loss: 1.9718 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 22:01:21,364 - root - INFO - lr: 5.2055e-06 gnorm: 1.27 [23:27:09< 1:02:39] +[titan] 2025-10-05 22:01:30,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:01:32,234 - root - INFO - step: 38300 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7052 +[titan] 2025-10-05 22:01:32,234 - root - INFO - lr: 5.2043e-06 gnorm: 1.22 [23:27:20< 1:02:28] +[titan] 2025-10-05 22:01:43,099 - root - INFO - step: 38305 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 22:01:43,099 - root - INFO - lr: 5.2031e-06 gnorm: 1.24 [23:27:31< 1:02:16] +[titan] 2025-10-05 22:01:53,973 - root - INFO - step: 38310 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 22:01:53,973 - root - INFO - lr: 5.2019e-06 gnorm: 1.25 [23:27:42< 1:02:05] +[titan] 2025-10-05 22:02:04,844 - root - INFO - step: 38315 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 22:02:04,845 - root - INFO - lr: 5.2007e-06 gnorm: 1.28 [23:27:53< 1:01:54] +[titan] 2025-10-05 22:02:15,752 - root - INFO - step: 38320 loss: 1.9010 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6811 +[titan] 2025-10-05 22:02:15,752 - root - INFO - lr: 5.1995e-06 gnorm: 1.24 [23:28:04< 1:01:43] +[titan] 2025-10-05 22:02:26,644 - root - INFO - step: 38325 loss: 1.8521 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6383 +[titan] 2025-10-05 22:02:26,645 - root - INFO - lr: 5.1983e-06 gnorm: 1.27 [23:28:14< 1:01:32] +[titan] 2025-10-05 22:02:37,525 - root - INFO - step: 38330 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:02:37,525 - root - INFO - lr: 5.1972e-06 gnorm: 1.27 [23:28:25< 1:01:21] +[titan] 2025-10-05 22:02:48,403 - root - INFO - step: 38335 loss: 1.8947 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6753 +[titan] 2025-10-05 22:02:48,403 - root - INFO - lr: 5.1960e-06 gnorm: 1.31 [23:28:36< 1:01:10] +[titan] 2025-10-05 22:02:59,271 - root - INFO - step: 38340 loss: 1.8646 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 22:02:59,271 - root - INFO - lr: 5.1948e-06 gnorm: 1.22 [23:28:47< 1:00:59] +[titan] 2025-10-05 22:03:10,127 - root - INFO - step: 38345 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 22:03:10,127 - root - INFO - lr: 5.1936e-06 gnorm: 1.28 [23:28:58< 1:00:48] +[titan] 2025-10-05 22:03:18,835 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:03:21,025 - root - INFO - step: 38350 loss: 1.8758 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6588 +[titan] 2025-10-05 22:03:21,025 - root - INFO - lr: 5.1925e-06 gnorm: 1.22 [23:29:09< 1:00:37] +[titan] 2025-10-05 22:03:31,925 - root - INFO - step: 38355 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6874 +[titan] 2025-10-05 22:03:31,925 - root - INFO - lr: 5.1913e-06 gnorm: 1.20 [23:29:20< 1:00:26] +[titan] 2025-10-05 22:03:42,780 - root - INFO - step: 38360 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 22:03:42,780 - root - INFO - lr: 5.1902e-06 gnorm: 1.24 [23:29:31< 1:00:15] +[titan] 2025-10-05 22:03:53,638 - root - INFO - step: 38365 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 22:03:53,638 - root - INFO - lr: 5.1890e-06 gnorm: 1.25 [23:29:41< 1:00:04] +[titan] 2025-10-05 22:04:04,503 - root - INFO - step: 38370 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 22:04:04,503 - root - INFO - lr: 5.1878e-06 gnorm: 1.23 [23:29:52< 0:59:53] +[titan] 2025-10-05 22:04:15,408 - root - INFO - step: 38375 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 22:04:15,409 - root - INFO - lr: 5.1867e-06 gnorm: 1.24 [23:30:03< 0:59:42] +[titan] 2025-10-05 22:04:26,282 - root - INFO - step: 38380 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 22:04:26,282 - root - INFO - lr: 5.1856e-06 gnorm: 1.23 [23:30:14< 0:59:31] +[titan] 2025-10-05 22:04:37,152 - root - INFO - step: 38385 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6840 +[titan] 2025-10-05 22:04:37,153 - root - INFO - lr: 5.1844e-06 gnorm: 1.26 [23:30:25< 0:59:20] +[titan] 2025-10-05 22:04:48,030 - root - INFO - step: 38390 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:04:48,030 - root - INFO - lr: 5.1833e-06 gnorm: 1.26 [23:30:36< 0:59:09] +[titan] 2025-10-05 22:04:58,887 - root - INFO - step: 38395 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:04:58,887 - root - INFO - lr: 5.1821e-06 gnorm: 1.24 [23:30:47< 0:58:58] +[titan] 2025-10-05 22:05:07,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:05:09,829 - root - INFO - step: 38400 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 22:05:09,829 - root - INFO - lr: 5.1810e-06 gnorm: 1.25 [23:30:58< 0:58:47] +[titan] 2025-10-05 22:05:10,022 - root - INFO - Dumping profiler traces at step 38400 +[titan] 2025-10-05 22:05:10,065 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:05:20,979 - root - INFO - step: 38405 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 29,389 tflops: 407.73 mfu: 41.23% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 22:05:20,979 - root - INFO - lr: 5.1799e-06 gnorm: 1.21 [23:31:09< 0:58:36] +[titan] 2025-10-05 22:05:31,845 - root - INFO - step: 38410 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6711 +[titan] 2025-10-05 22:05:31,845 - root - INFO - lr: 5.1788e-06 gnorm: 1.25 [23:31:20< 0:58:25] +[titan] 2025-10-05 22:05:42,706 - root - INFO - step: 38415 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6821 +[titan] 2025-10-05 22:05:42,706 - root - INFO - lr: 5.1776e-06 gnorm: 1.25 [23:31:30< 0:58:14] +[titan] 2025-10-05 22:05:53,598 - root - INFO - step: 38420 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 22:05:53,598 - root - INFO - lr: 5.1765e-06 gnorm: 1.24 [23:31:41< 0:58:03] +[titan] 2025-10-05 22:06:04,473 - root - INFO - step: 38425 loss: 1.8931 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:06:04,473 - root - INFO - lr: 5.1754e-06 gnorm: 1.24 [23:31:52< 0:57:52] +[titan] 2025-10-05 22:06:15,341 - root - INFO - step: 38430 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:06:15,342 - root - INFO - lr: 5.1743e-06 gnorm: 1.28 [23:32:03< 0:57:41] +[titan] 2025-10-05 22:06:26,263 - root - INFO - step: 38435 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6607 +[titan] 2025-10-05 22:06:26,263 - root - INFO - lr: 5.1732e-06 gnorm: 1.21 [23:32:14< 0:57:30] +[titan] 2025-10-05 22:06:37,131 - root - INFO - step: 38440 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7207 +[titan] 2025-10-05 22:06:37,132 - root - INFO - lr: 5.1721e-06 gnorm: 1.24 [23:32:25< 0:57:19] +[titan] 2025-10-05 22:06:48,006 - root - INFO - step: 38445 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6765 +[titan] 2025-10-05 22:06:48,006 - root - INFO - lr: 5.1710e-06 gnorm: 1.26 [23:32:36< 0:57:08] +[titan] 2025-10-05 22:06:56,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:06:58,881 - root - INFO - step: 38450 loss: 1.9214 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:06:58,881 - root - INFO - lr: 5.1699e-06 gnorm: 1.25 [23:32:47< 0:56:57] +[titan] 2025-10-05 22:07:09,781 - root - INFO - step: 38455 loss: 1.9440 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:07:09,781 - root - INFO - lr: 5.1688e-06 gnorm: 1.24 [23:32:58< 0:56:46] +[titan] 2025-10-05 22:07:20,663 - root - INFO - step: 38460 loss: 1.8888 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6704 +[titan] 2025-10-05 22:07:20,663 - root - INFO - lr: 5.1677e-06 gnorm: 1.25 [23:33:08< 0:56:35] +[titan] 2025-10-05 22:07:31,515 - root - INFO - step: 38465 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:07:31,515 - root - INFO - lr: 5.1666e-06 gnorm: 1.27 [23:33:19< 0:56:24] +[titan] 2025-10-05 22:07:42,351 - root - INFO - step: 38470 loss: 1.8510 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2144 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 22:07:42,351 - root - INFO - lr: 5.1655e-06 gnorm: 1.22 [23:33:30< 0:56:13] +[titan] 2025-10-05 22:07:53,204 - root - INFO - step: 38475 loss: 1.9409 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7151 +[titan] 2025-10-05 22:07:53,204 - root - INFO - lr: 5.1645e-06 gnorm: 1.27 [23:33:41< 0:56:01] +[titan] 2025-10-05 22:08:04,067 - root - INFO - step: 38480 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 22:08:04,067 - root - INFO - lr: 5.1634e-06 gnorm: 1.21 [23:33:52< 0:55:50] +[titan] 2025-10-05 22:08:14,965 - root - INFO - step: 38485 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:08:14,965 - root - INFO - lr: 5.1623e-06 gnorm: 1.27 [23:34:03< 0:55:39] +[titan] 2025-10-05 22:08:25,908 - root - INFO - step: 38490 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.44 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:08:25,908 - root - INFO - lr: 5.1612e-06 gnorm: 1.26 [23:34:14< 0:55:28] +[titan] 2025-10-05 22:08:36,784 - root - INFO - step: 38495 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6699 +[titan] 2025-10-05 22:08:36,784 - root - INFO - lr: 5.1602e-06 gnorm: 1.32 [23:34:25< 0:55:17] +[titan] 2025-10-05 22:08:45,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:08:47,650 - root - INFO - step: 38500 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 22:08:47,650 - root - INFO - lr: 5.1591e-06 gnorm: 1.33 [23:34:35< 0:55:06] +[titan] 2025-10-05 22:08:58,527 - root - INFO - step: 38505 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 22:08:58,527 - root - INFO - lr: 5.1581e-06 gnorm: 1.28 [23:34:46< 0:54:55] +[titan] 2025-10-05 22:09:09,391 - root - INFO - step: 38510 loss: 1.9323 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:09:09,391 - root - INFO - lr: 5.1570e-06 gnorm: 1.28 [23:34:57< 0:54:44] +[titan] 2025-10-05 22:09:20,365 - root - INFO - step: 38515 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 29,861 tflops: 414.28 mfu: 41.89% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:09:20,365 - root - INFO - lr: 5.1560e-06 gnorm: 1.27 [23:35:08< 0:54:33] +[titan] 2025-10-05 22:09:31,218 - root - INFO - step: 38520 loss: 1.9315 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:09:31,219 - root - INFO - lr: 5.1549e-06 gnorm: 1.25 [23:35:19< 0:54:22] +[titan] 2025-10-05 22:09:42,070 - root - INFO - step: 38525 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6845 +[titan] 2025-10-05 22:09:42,071 - root - INFO - lr: 5.1539e-06 gnorm: 1.23 [23:35:30< 0:54:11] +[titan] 2025-10-05 22:09:52,922 - root - INFO - step: 38530 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 22:09:52,922 - root - INFO - lr: 5.1528e-06 gnorm: 1.26 [23:35:41< 0:54:00] +[titan] 2025-10-05 22:10:03,769 - root - INFO - step: 38535 loss: 1.9228 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:10:03,769 - root - INFO - lr: 5.1518e-06 gnorm: 1.25 [23:35:52< 0:53:49] +[titan] 2025-10-05 22:10:14,645 - root - INFO - step: 38540 loss: 1.9149 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6940 +[titan] 2025-10-05 22:10:14,645 - root - INFO - lr: 5.1508e-06 gnorm: 1.24 [23:36:02< 0:53:38] +[titan] 2025-10-05 22:10:25,531 - root - INFO - step: 38545 loss: 1.8971 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 22:10:25,531 - root - INFO - lr: 5.1497e-06 gnorm: 1.21 [23:36:13< 0:53:27] +[titan] 2025-10-05 22:10:34,232 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:10:36,410 - root - INFO - step: 38550 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 22:10:36,411 - root - INFO - lr: 5.1487e-06 gnorm: 1.26 [23:36:24< 0:53:16] +[titan] 2025-10-05 22:10:47,265 - root - INFO - step: 38555 loss: 1.9055 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 22:10:47,265 - root - INFO - lr: 5.1477e-06 gnorm: 1.25 [23:36:35< 0:53:05] +[titan] 2025-10-05 22:10:58,113 - root - INFO - step: 38560 loss: 1.8963 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 22:10:58,113 - root - INFO - lr: 5.1467e-06 gnorm: 1.28 [23:36:46< 0:52:54] +[titan] 2025-10-05 22:11:08,954 - root - INFO - step: 38565 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:11:08,954 - root - INFO - lr: 5.1456e-06 gnorm: 1.26 [23:36:57< 0:52:43] +[titan] 2025-10-05 22:11:19,804 - root - INFO - step: 38570 loss: 1.9003 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 22:11:19,805 - root - INFO - lr: 5.1446e-06 gnorm: 1.22 [23:37:08< 0:52:32] +[titan] 2025-10-05 22:11:30,699 - root - INFO - step: 38575 loss: 1.8708 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6535 +[titan] 2025-10-05 22:11:30,699 - root - INFO - lr: 5.1436e-06 gnorm: 1.25 [23:37:18< 0:52:21] +[titan] 2025-10-05 22:11:41,605 - root - INFO - step: 38580 loss: 1.9498 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 22:11:41,605 - root - INFO - lr: 5.1426e-06 gnorm: 1.26 [23:37:29< 0:52:10] +[titan] 2025-10-05 22:11:52,476 - root - INFO - step: 38585 loss: 1.8659 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2149 global_avg_mtp_loss: 1.6510 +[titan] 2025-10-05 22:11:52,476 - root - INFO - lr: 5.1416e-06 gnorm: 1.27 [23:37:40< 0:51:59] +[titan] 2025-10-05 22:12:03,366 - root - INFO - step: 38590 loss: 1.8820 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6644 +[titan] 2025-10-05 22:12:03,366 - root - INFO - lr: 5.1406e-06 gnorm: 1.30 [23:37:51< 0:51:48] +[titan] 2025-10-05 22:12:14,240 - root - INFO - step: 38595 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 22:12:14,241 - root - INFO - lr: 5.1396e-06 gnorm: 1.20 [23:38:02< 0:51:37] +[titan] 2025-10-05 22:12:22,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:12:25,159 - root - INFO - step: 38600 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6314 +[titan] 2025-10-05 22:12:25,159 - root - INFO - lr: 5.1386e-06 gnorm: 1.21 [23:38:13< 0:51:26] +[titan] 2025-10-05 22:12:36,019 - root - INFO - step: 38605 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 22:12:36,019 - root - INFO - lr: 5.1376e-06 gnorm: 1.24 [23:38:24< 0:51:15] +[titan] 2025-10-05 22:12:46,891 - root - INFO - step: 38610 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:12:46,891 - root - INFO - lr: 5.1367e-06 gnorm: 1.19 [23:38:35< 0:51:04] +[titan] 2025-10-05 22:12:57,808 - root - INFO - step: 38615 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 22:12:57,808 - root - INFO - lr: 5.1357e-06 gnorm: 1.29 [23:38:46< 0:50:53] +[titan] 2025-10-05 22:13:08,674 - root - INFO - step: 38620 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 22:13:08,675 - root - INFO - lr: 5.1347e-06 gnorm: 1.29 [23:38:56< 0:50:42] +[titan] 2025-10-05 22:13:19,537 - root - INFO - step: 38625 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 22:13:19,537 - root - INFO - lr: 5.1337e-06 gnorm: 1.26 [23:39:07< 0:50:31] +[titan] 2025-10-05 22:13:30,453 - root - INFO - step: 38630 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6688 +[titan] 2025-10-05 22:13:30,453 - root - INFO - lr: 5.1328e-06 gnorm: 1.24 [23:39:18< 0:50:20] +[titan] 2025-10-05 22:13:41,303 - root - INFO - step: 38635 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 22:13:41,303 - root - INFO - lr: 5.1318e-06 gnorm: 1.27 [23:39:29< 0:50:09] +[titan] 2025-10-05 22:13:52,138 - root - INFO - step: 38640 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 22:13:52,139 - root - INFO - lr: 5.1308e-06 gnorm: 1.25 [23:39:40< 0:49:58] +[titan] 2025-10-05 22:14:03,026 - root - INFO - step: 38645 loss: 1.8958 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6769 +[titan] 2025-10-05 22:14:03,026 - root - INFO - lr: 5.1299e-06 gnorm: 1.24 [23:39:51< 0:49:47] +[titan] 2025-10-05 22:14:11,665 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:14:13,845 - root - INFO - step: 38650 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6629 +[titan] 2025-10-05 22:14:13,845 - root - INFO - lr: 5.1289e-06 gnorm: 1.27 [23:40:02< 0:49:36] +[titan] 2025-10-05 22:14:24,687 - root - INFO - step: 38655 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 22:14:24,687 - root - INFO - lr: 5.1280e-06 gnorm: 1.32 [23:40:12< 0:49:24] +[titan] 2025-10-05 22:14:35,527 - root - INFO - step: 38660 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6957 +[titan] 2025-10-05 22:14:35,527 - root - INFO - lr: 5.1270e-06 gnorm: 1.28 [23:40:23< 0:49:13] +[titan] 2025-10-05 22:14:46,388 - root - INFO - step: 38665 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 22:14:46,388 - root - INFO - lr: 5.1261e-06 gnorm: 1.24 [23:40:34< 0:49:02] +[titan] 2025-10-05 22:14:57,230 - root - INFO - step: 38670 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 22:14:57,231 - root - INFO - lr: 5.1251e-06 gnorm: 1.25 [23:40:45< 0:48:51] +[titan] 2025-10-05 22:15:08,076 - root - INFO - step: 38675 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 22:15:08,076 - root - INFO - lr: 5.1242e-06 gnorm: 1.23 [23:40:56< 0:48:40] +[titan] 2025-10-05 22:15:18,905 - root - INFO - step: 38680 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 22:15:18,905 - root - INFO - lr: 5.1233e-06 gnorm: 1.24 [23:41:07< 0:48:29] +[titan] 2025-10-05 22:15:29,770 - root - INFO - step: 38685 loss: 1.8560 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6416 +[titan] 2025-10-05 22:15:29,770 - root - INFO - lr: 5.1223e-06 gnorm: 1.26 [23:41:17< 0:48:18] +[titan] 2025-10-05 22:15:40,605 - root - INFO - step: 38690 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 22:15:40,606 - root - INFO - lr: 5.1214e-06 gnorm: 1.26 [23:41:28< 0:48:07] +[titan] 2025-10-05 22:15:51,445 - root - INFO - step: 38695 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 22:15:51,445 - root - INFO - lr: 5.1205e-06 gnorm: 1.26 [23:41:39< 0:47:56] +[titan] 2025-10-05 22:16:00,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:16:02,303 - root - INFO - step: 38700 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6625 +[titan] 2025-10-05 22:16:02,303 - root - INFO - lr: 5.1195e-06 gnorm: 1.22 [23:41:50< 0:47:45] +[titan] 2025-10-05 22:16:13,157 - root - INFO - step: 38705 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 22:16:13,157 - root - INFO - lr: 5.1186e-06 gnorm: 1.24 [23:42:01< 0:47:34] +[titan] 2025-10-05 22:16:24,067 - root - INFO - step: 38710 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 22:16:24,067 - root - INFO - lr: 5.1177e-06 gnorm: 1.23 [23:42:12< 0:47:23] +[titan] 2025-10-05 22:16:34,977 - root - INFO - step: 38715 loss: 1.9159 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:16:34,977 - root - INFO - lr: 5.1168e-06 gnorm: 1.27 [23:42:23< 0:47:12] +[titan] 2025-10-05 22:16:45,845 - root - INFO - step: 38720 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 22:16:45,845 - root - INFO - lr: 5.1159e-06 gnorm: 1.26 [23:42:34< 0:47:01] +[titan] 2025-10-05 22:16:56,703 - root - INFO - step: 38725 loss: 1.8703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 22:16:56,703 - root - INFO - lr: 5.1150e-06 gnorm: 1.22 [23:42:44< 0:46:50] +[titan] 2025-10-05 22:17:07,552 - root - INFO - step: 38730 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6731 +[titan] 2025-10-05 22:17:07,553 - root - INFO - lr: 5.1141e-06 gnorm: 1.23 [23:42:55< 0:46:39] +[titan] 2025-10-05 22:17:18,393 - root - INFO - step: 38735 loss: 1.9710 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 22:17:18,394 - root - INFO - lr: 5.1132e-06 gnorm: 1.32 [23:43:06< 0:46:28] +[titan] 2025-10-05 22:17:29,314 - root - INFO - step: 38740 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 22:17:29,314 - root - INFO - lr: 5.1123e-06 gnorm: 1.24 [23:43:17< 0:46:17] +[titan] 2025-10-05 22:17:40,164 - root - INFO - step: 38745 loss: 1.8962 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6772 +[titan] 2025-10-05 22:17:40,164 - root - INFO - lr: 5.1114e-06 gnorm: 1.27 [23:43:28< 0:46:06] +[titan] 2025-10-05 22:17:48,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:17:51,020 - root - INFO - step: 38750 loss: 1.8652 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6492 +[titan] 2025-10-05 22:17:51,020 - root - INFO - lr: 5.1105e-06 gnorm: 1.30 [23:43:39< 0:45:55] +[titan] 2025-10-05 22:18:01,867 - root - INFO - step: 38755 loss: 1.8715 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6553 +[titan] 2025-10-05 22:18:01,867 - root - INFO - lr: 5.1097e-06 gnorm: 1.24 [23:43:50< 0:45:44] +[titan] 2025-10-05 22:18:12,725 - root - INFO - step: 38760 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 22:18:12,725 - root - INFO - lr: 5.1088e-06 gnorm: 1.25 [23:44:00< 0:45:33] +[titan] 2025-10-05 22:18:23,576 - root - INFO - step: 38765 loss: 1.9134 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 22:18:23,576 - root - INFO - lr: 5.1079e-06 gnorm: 1.24 [23:44:11< 0:45:22] +[titan] 2025-10-05 22:18:34,466 - root - INFO - step: 38770 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 22:18:34,466 - root - INFO - lr: 5.1070e-06 gnorm: 1.21 [23:44:22< 0:45:11] +[titan] 2025-10-05 22:18:45,359 - root - INFO - step: 38775 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 22:18:45,359 - root - INFO - lr: 5.1062e-06 gnorm: 1.22 [23:44:33< 0:45:00] +[titan] 2025-10-05 22:18:56,225 - root - INFO - step: 38780 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 22:18:56,225 - root - INFO - lr: 5.1053e-06 gnorm: 1.23 [23:44:44< 0:44:49] +[titan] 2025-10-05 22:19:07,063 - root - INFO - step: 38785 loss: 1.8911 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 22:19:07,063 - root - INFO - lr: 5.1044e-06 gnorm: 1.28 [23:44:55< 0:44:38] +[titan] 2025-10-05 22:19:17,908 - root - INFO - step: 38790 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 22:19:17,908 - root - INFO - lr: 5.1036e-06 gnorm: 1.27 [23:45:06< 0:44:27] +[titan] 2025-10-05 22:19:28,765 - root - INFO - step: 38795 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:19:28,765 - root - INFO - lr: 5.1027e-06 gnorm: 1.25 [23:45:16< 0:44:16] +[titan] 2025-10-05 22:19:37,452 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:19:39,646 - root - INFO - step: 38800 loss: 1.9199 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 22:19:39,646 - root - INFO - lr: 5.1019e-06 gnorm: 1.22 [23:45:27< 0:44:05] +[titan] 2025-10-05 22:19:50,541 - root - INFO - step: 38805 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 22:19:50,542 - root - INFO - lr: 5.1010e-06 gnorm: 1.25 [23:45:38< 0:43:54] +[titan] 2025-10-05 22:20:01,404 - root - INFO - step: 38810 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6756 +[titan] 2025-10-05 22:20:01,405 - root - INFO - lr: 5.1002e-06 gnorm: 1.25 [23:45:49< 0:43:43] +[titan] 2025-10-05 22:20:12,258 - root - INFO - step: 38815 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7061 +[titan] 2025-10-05 22:20:12,258 - root - INFO - lr: 5.0993e-06 gnorm: 1.33 [23:46:00< 0:43:32] +[titan] 2025-10-05 22:20:23,109 - root - INFO - step: 38820 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:20:23,109 - root - INFO - lr: 5.0985e-06 gnorm: 1.25 [23:46:11< 0:43:21] +[titan] 2025-10-05 22:20:33,977 - root - INFO - step: 38825 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6488 +[titan] 2025-10-05 22:20:33,977 - root - INFO - lr: 5.0977e-06 gnorm: 1.28 [23:46:22< 0:43:10] +[titan] 2025-10-05 22:20:44,821 - root - INFO - step: 38830 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6682 +[titan] 2025-10-05 22:20:44,821 - root - INFO - lr: 5.0969e-06 gnorm: 1.27 [23:46:33< 0:42:59] +[titan] 2025-10-05 22:20:55,718 - root - INFO - step: 38835 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 22:20:55,718 - root - INFO - lr: 5.0960e-06 gnorm: 1.26 [23:46:43< 0:42:48] +[titan] 2025-10-05 22:21:06,566 - root - INFO - step: 38840 loss: 1.9277 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 22:21:06,566 - root - INFO - lr: 5.0952e-06 gnorm: 1.27 [23:46:54< 0:42:36] +[titan] 2025-10-05 22:21:17,446 - root - INFO - step: 38845 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6385 +[titan] 2025-10-05 22:21:17,446 - root - INFO - lr: 5.0944e-06 gnorm: 1.24 [23:47:05< 0:42:25] +[titan] 2025-10-05 22:21:26,133 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:21:28,317 - root - INFO - step: 38850 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 22:21:28,317 - root - INFO - lr: 5.0936e-06 gnorm: 1.28 [23:47:16< 0:42:14] +[titan] 2025-10-05 22:21:39,188 - root - INFO - step: 38855 loss: 1.8571 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6436 +[titan] 2025-10-05 22:21:39,188 - root - INFO - lr: 5.0928e-06 gnorm: 1.25 [23:47:27< 0:42:03] +[titan] 2025-10-05 22:21:50,046 - root - INFO - step: 38860 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 22:21:50,047 - root - INFO - lr: 5.0920e-06 gnorm: 1.30 [23:47:38< 0:41:52] +[titan] 2025-10-05 22:22:00,909 - root - INFO - step: 38865 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:22:00,909 - root - INFO - lr: 5.0911e-06 gnorm: 1.28 [23:47:49< 0:41:41] +[titan] 2025-10-05 22:22:11,785 - root - INFO - step: 38870 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 22:22:11,786 - root - INFO - lr: 5.0903e-06 gnorm: 1.26 [23:47:59< 0:41:30] +[titan] 2025-10-05 22:22:22,628 - root - INFO - step: 38875 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7259 +[titan] 2025-10-05 22:22:22,628 - root - INFO - lr: 5.0895e-06 gnorm: 1.24 [23:48:10< 0:41:19] +[titan] 2025-10-05 22:22:33,500 - root - INFO - step: 38880 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 22:22:33,500 - root - INFO - lr: 5.0888e-06 gnorm: 1.26 [23:48:21< 0:41:08] +[titan] 2025-10-05 22:22:44,338 - root - INFO - step: 38885 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:22:44,338 - root - INFO - lr: 5.0880e-06 gnorm: 1.28 [23:48:32< 0:40:57] +[titan] 2025-10-05 22:22:55,187 - root - INFO - step: 38890 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6843 +[titan] 2025-10-05 22:22:55,187 - root - INFO - lr: 5.0872e-06 gnorm: 1.24 [23:48:43< 0:40:46] +[titan] 2025-10-05 22:23:06,026 - root - INFO - step: 38895 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:23:06,026 - root - INFO - lr: 5.0864e-06 gnorm: 1.26 [23:48:54< 0:40:35] +[titan] 2025-10-05 22:23:14,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:23:16,918 - root - INFO - step: 38900 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 22:23:16,918 - root - INFO - lr: 5.0856e-06 gnorm: 1.25 [23:49:05< 0:40:24] +[titan] 2025-10-05 22:23:27,768 - root - INFO - step: 38905 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 22:23:27,768 - root - INFO - lr: 5.0848e-06 gnorm: 1.28 [23:49:15< 0:40:13] +[titan] 2025-10-05 22:23:38,736 - root - INFO - step: 38910 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 22:23:38,736 - root - INFO - lr: 5.0841e-06 gnorm: 1.25 [23:49:26< 0:40:02] +[titan] 2025-10-05 22:23:43,284 - root - INFO - Dumping profiler traces at step 38912 +[titan] 2025-10-05 22:23:43,322 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:23:49,840 - root - INFO - step: 38915 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:23:49,840 - root - INFO - lr: 5.0833e-06 gnorm: 1.23 [23:49:38< 0:39:51] +[titan] 2025-10-05 22:24:00,685 - root - INFO - step: 38920 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 22:24:00,685 - root - INFO - lr: 5.0825e-06 gnorm: 1.24 [23:49:48< 0:39:40] +[titan] 2025-10-05 22:24:11,518 - root - INFO - step: 38925 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:24:11,518 - root - INFO - lr: 5.0818e-06 gnorm: 1.28 [23:49:59< 0:39:29] +[titan] 2025-10-05 22:24:22,383 - root - INFO - step: 38930 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:24:22,383 - root - INFO - lr: 5.0810e-06 gnorm: 1.22 [23:50:10< 0:39:18] +[titan] 2025-10-05 22:24:33,285 - root - INFO - step: 38935 loss: 1.9341 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 22:24:33,286 - root - INFO - lr: 5.0803e-06 gnorm: 1.25 [23:50:21< 0:39:07] +[titan] 2025-10-05 22:24:44,145 - root - INFO - step: 38940 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6642 +[titan] 2025-10-05 22:24:44,145 - root - INFO - lr: 5.0795e-06 gnorm: 1.33 [23:50:32< 0:38:56] +[titan] 2025-10-05 22:24:55,011 - root - INFO - step: 38945 loss: 1.8488 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6353 +[titan] 2025-10-05 22:24:55,011 - root - INFO - lr: 5.0788e-06 gnorm: 1.25 [23:50:43< 0:38:45] +[titan] 2025-10-05 22:25:03,688 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:25:05,861 - root - INFO - step: 38950 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 22:25:05,861 - root - INFO - lr: 5.0780e-06 gnorm: 1.26 [23:50:54< 0:38:34] +[titan] 2025-10-05 22:25:16,696 - root - INFO - step: 38955 loss: 1.8763 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6594 +[titan] 2025-10-05 22:25:16,696 - root - INFO - lr: 5.0773e-06 gnorm: 1.25 [23:51:04< 0:38:23] +[titan] 2025-10-05 22:25:27,557 - root - INFO - step: 38960 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:25:27,557 - root - INFO - lr: 5.0765e-06 gnorm: 1.25 [23:51:15< 0:38:12] +[titan] 2025-10-05 22:25:38,467 - root - INFO - step: 38965 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 22:25:38,467 - root - INFO - lr: 5.0758e-06 gnorm: 1.24 [23:51:26< 0:38:01] +[titan] 2025-10-05 22:25:49,317 - root - INFO - step: 38970 loss: 1.8769 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 22:25:49,317 - root - INFO - lr: 5.0751e-06 gnorm: 1.22 [23:51:37< 0:37:50] +[titan] 2025-10-05 22:26:00,183 - root - INFO - step: 38975 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 22:26:00,183 - root - INFO - lr: 5.0743e-06 gnorm: 1.28 [23:51:48< 0:37:39] +[titan] 2025-10-05 22:26:11,057 - root - INFO - step: 38980 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 22:26:11,057 - root - INFO - lr: 5.0736e-06 gnorm: 1.29 [23:51:59< 0:37:28] +[titan] 2025-10-05 22:26:21,891 - root - INFO - step: 38985 loss: 1.8837 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 22:26:21,891 - root - INFO - lr: 5.0729e-06 gnorm: 1.26 [23:52:10< 0:37:17] +[titan] 2025-10-05 22:26:32,761 - root - INFO - step: 38990 loss: 1.8936 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:26:32,761 - root - INFO - lr: 5.0722e-06 gnorm: 1.25 [23:52:20< 0:37:06] +[titan] 2025-10-05 22:26:43,668 - root - INFO - step: 38995 loss: 1.8343 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2124 global_avg_mtp_loss: 1.6219 +[titan] 2025-10-05 22:26:43,668 - root - INFO - lr: 5.0715e-06 gnorm: 1.22 [23:52:31< 0:36:55] +[titan] 2025-10-05 22:26:52,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:26:54,511 - root - INFO - step: 39000 loss: 1.8692 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6534 +[titan] 2025-10-05 22:26:54,511 - root - INFO - lr: 5.0708e-06 gnorm: 1.23 [23:52:42< 0:36:44] +[titan] 2025-10-05 22:27:05,357 - root - INFO - step: 39005 loss: 1.8448 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6317 +[titan] 2025-10-05 22:27:05,357 - root - INFO - lr: 5.0701e-06 gnorm: 1.24 [23:52:53< 0:36:33] +[titan] 2025-10-05 22:27:16,214 - root - INFO - step: 39010 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 22:27:16,214 - root - INFO - lr: 5.0694e-06 gnorm: 1.27 [23:53:04< 0:36:22] +[titan] 2025-10-05 22:27:27,027 - root - INFO - step: 39015 loss: 1.8935 memory: 118.84GiB(85.28%) tps: 30,305 tflops: 420.43 mfu: 42.51% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 22:27:27,028 - root - INFO - lr: 5.0687e-06 gnorm: 1.29 [23:53:15< 0:36:11] +[titan] 2025-10-05 22:27:37,873 - root - INFO - step: 39020 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 22:27:37,873 - root - INFO - lr: 5.0680e-06 gnorm: 1.25 [23:53:26< 0:36:00] +[titan] 2025-10-05 22:27:48,725 - root - INFO - step: 39025 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 22:27:48,725 - root - INFO - lr: 5.0673e-06 gnorm: 1.23 [23:53:36< 0:35:49] +[titan] 2025-10-05 22:27:59,585 - root - INFO - step: 39030 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7320 +[titan] 2025-10-05 22:27:59,585 - root - INFO - lr: 5.0666e-06 gnorm: 1.26 [23:53:47< 0:35:38] +[titan] 2025-10-05 22:28:10,411 - root - INFO - step: 39035 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:28:10,411 - root - INFO - lr: 5.0659e-06 gnorm: 1.29 [23:53:58< 0:35:26] +[titan] 2025-10-05 22:28:21,251 - root - INFO - step: 39040 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:28:21,251 - root - INFO - lr: 5.0652e-06 gnorm: 1.26 [23:54:09< 0:35:15] +[titan] 2025-10-05 22:28:32,077 - root - INFO - step: 39045 loss: 1.9016 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6812 +[titan] 2025-10-05 22:28:32,077 - root - INFO - lr: 5.0645e-06 gnorm: 1.24 [23:54:20< 0:35:04] +[titan] 2025-10-05 22:28:40,768 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:28:42,943 - root - INFO - step: 39050 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 22:28:42,943 - root - INFO - lr: 5.0639e-06 gnorm: 1.25 [23:54:31< 0:34:53] +[titan] 2025-10-05 22:28:53,779 - root - INFO - step: 39055 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 22:28:53,779 - root - INFO - lr: 5.0632e-06 gnorm: 1.27 [23:54:41< 0:34:42] +[titan] 2025-10-05 22:29:04,650 - root - INFO - step: 39060 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:29:04,650 - root - INFO - lr: 5.0625e-06 gnorm: 1.28 [23:54:52< 0:34:31] +[titan] 2025-10-05 22:29:15,481 - root - INFO - step: 39065 loss: 1.8892 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:29:15,481 - root - INFO - lr: 5.0619e-06 gnorm: 1.29 [23:55:03< 0:34:20] +[titan] 2025-10-05 22:29:26,319 - root - INFO - step: 39070 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7449 +[titan] 2025-10-05 22:29:26,319 - root - INFO - lr: 5.0612e-06 gnorm: 1.27 [23:55:14< 0:34:09] +[titan] 2025-10-05 22:29:37,169 - root - INFO - step: 39075 loss: 1.8711 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:29:37,169 - root - INFO - lr: 5.0606e-06 gnorm: 1.39 [23:55:25< 0:33:58] +[titan] 2025-10-05 22:29:47,983 - root - INFO - step: 39080 loss: 1.9585 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 22:29:47,983 - root - INFO - lr: 5.0599e-06 gnorm: 1.27 [23:55:36< 0:33:47] +[titan] 2025-10-05 22:29:58,811 - root - INFO - step: 39085 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 22:29:58,811 - root - INFO - lr: 5.0593e-06 gnorm: 1.28 [23:55:46< 0:33:36] +[titan] 2025-10-05 22:30:09,630 - root - INFO - step: 39090 loss: 1.8996 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:30:09,630 - root - INFO - lr: 5.0586e-06 gnorm: 1.26 [23:55:57< 0:33:25] +[titan] 2025-10-05 22:30:20,468 - root - INFO - step: 39095 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6810 +[titan] 2025-10-05 22:30:20,469 - root - INFO - lr: 5.0580e-06 gnorm: 1.24 [23:56:08< 0:33:14] +[titan] 2025-10-05 22:30:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:30:31,293 - root - INFO - step: 39100 loss: 1.9874 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:30:31,293 - root - INFO - lr: 5.0573e-06 gnorm: 1.34 [23:56:19< 0:33:03] +[titan] 2025-10-05 22:30:42,362 - root - INFO - step: 39105 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,605 tflops: 410.73 mfu: 41.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:30:42,362 - root - INFO - lr: 5.0567e-06 gnorm: 1.26 [23:56:30< 0:32:52] +[titan] 2025-10-05 22:30:53,217 - root - INFO - step: 39110 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2156 global_avg_mtp_loss: 1.6453 +[titan] 2025-10-05 22:30:53,217 - root - INFO - lr: 5.0561e-06 gnorm: 1.23 [23:56:41< 0:32:41] +[titan] 2025-10-05 22:31:04,043 - root - INFO - step: 39115 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 22:31:04,043 - root - INFO - lr: 5.0554e-06 gnorm: 1.23 [23:56:52< 0:32:30] +[titan] 2025-10-05 22:31:14,877 - root - INFO - step: 39120 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 22:31:14,877 - root - INFO - lr: 5.0548e-06 gnorm: 1.28 [23:57:03< 0:32:19] +[titan] 2025-10-05 22:31:25,759 - root - INFO - step: 39125 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 22:31:25,759 - root - INFO - lr: 5.0542e-06 gnorm: 1.27 [23:57:13< 0:32:08] +[titan] 2025-10-05 22:31:36,579 - root - INFO - step: 39130 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 22:31:36,579 - root - INFO - lr: 5.0536e-06 gnorm: 1.28 [23:57:24< 0:31:57] +[titan] 2025-10-05 22:31:47,420 - root - INFO - step: 39135 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7389 +[titan] 2025-10-05 22:31:47,420 - root - INFO - lr: 5.0530e-06 gnorm: 1.29 [23:57:35< 0:31:46] +[titan] 2025-10-05 22:31:58,260 - root - INFO - step: 39140 loss: 1.9505 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7258 +[titan] 2025-10-05 22:31:58,260 - root - INFO - lr: 5.0523e-06 gnorm: 1.27 [23:57:46< 0:31:35] +[titan] 2025-10-05 22:32:09,071 - root - INFO - step: 39145 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 22:32:09,071 - root - INFO - lr: 5.0517e-06 gnorm: 1.24 [23:57:57< 0:31:24] +[titan] 2025-10-05 22:32:17,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:32:19,885 - root - INFO - step: 39150 loss: 1.8924 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:32:19,885 - root - INFO - lr: 5.0511e-06 gnorm: 1.26 [23:58:08< 0:31:13] +[titan] 2025-10-05 22:32:30,741 - root - INFO - step: 39155 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:32:30,741 - root - INFO - lr: 5.0505e-06 gnorm: 1.26 [23:58:18< 0:31:02] +[titan] 2025-10-05 22:32:41,618 - root - INFO - step: 39160 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 22:32:41,618 - root - INFO - lr: 5.0499e-06 gnorm: 1.27 [23:58:29< 0:30:51] +[titan] 2025-10-05 22:32:52,420 - root - INFO - step: 39165 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,335 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7041 +[titan] 2025-10-05 22:32:52,421 - root - INFO - lr: 5.0493e-06 gnorm: 1.31 [23:58:40< 0:30:40] +[titan] 2025-10-05 22:33:03,241 - root - INFO - step: 39170 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6786 +[titan] 2025-10-05 22:33:03,241 - root - INFO - lr: 5.0488e-06 gnorm: 1.28 [23:58:51< 0:30:29] +[titan] 2025-10-05 22:33:14,059 - root - INFO - step: 39175 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:33:14,059 - root - INFO - lr: 5.0482e-06 gnorm: 1.25 [23:59:02< 0:30:18] +[titan] 2025-10-05 22:33:24,854 - root - INFO - step: 39180 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,356 tflops: 421.14 mfu: 42.58% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7066 +[titan] 2025-10-05 22:33:24,854 - root - INFO - lr: 5.0476e-06 gnorm: 1.25 [23:59:13< 0:30:07] +[titan] 2025-10-05 22:33:35,698 - root - INFO - step: 39185 loss: 1.8822 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6646 +[titan] 2025-10-05 22:33:35,698 - root - INFO - lr: 5.0470e-06 gnorm: 1.24 [23:59:23< 0:29:56] +[titan] 2025-10-05 22:33:46,541 - root - INFO - step: 39190 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 22:33:46,541 - root - INFO - lr: 5.0464e-06 gnorm: 1.26 [23:59:34< 0:29:45] +[titan] 2025-10-05 22:33:57,343 - root - INFO - step: 39195 loss: 1.8734 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.87 mfu: 42.56% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6567 +[titan] 2025-10-05 22:33:57,343 - root - INFO - lr: 5.0459e-06 gnorm: 1.26 [23:59:45< 0:29:34] +[titan] 2025-10-05 22:34:05,988 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:34:08,160 - root - INFO - step: 39200 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6909 +[titan] 2025-10-05 22:34:08,161 - root - INFO - lr: 5.0453e-06 gnorm: 1.24 [23:59:56< 0:29:23] +[titan] 2025-10-05 22:34:18,971 - root - INFO - step: 39205 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,312 tflops: 420.53 mfu: 42.52% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:34:18,971 - root - INFO - lr: 5.0447e-06 gnorm: 1.27 [1 day, 0:00:07< 0:29:12] +[titan] 2025-10-05 22:34:29,800 - root - INFO - step: 39210 loss: 1.8480 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 22:34:29,800 - root - INFO - lr: 5.0442e-06 gnorm: 1.20 [1 day, 0:00:17< 0:29:01] +[titan] 2025-10-05 22:34:40,603 - root - INFO - step: 39215 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 22:34:40,603 - root - INFO - lr: 5.0436e-06 gnorm: 1.28 [1 day, 0:00:28< 0:28:50] +[titan] 2025-10-05 22:34:51,467 - root - INFO - step: 39220 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6993 +[titan] 2025-10-05 22:34:51,467 - root - INFO - lr: 5.0431e-06 gnorm: 1.25 [1 day, 0:00:39< 0:28:39] +[titan] 2025-10-05 22:35:02,300 - root - INFO - step: 39225 loss: 1.9143 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 22:35:02,300 - root - INFO - lr: 5.0425e-06 gnorm: 1.26 [1 day, 0:00:50< 0:28:28] +[titan] 2025-10-05 22:35:13,119 - root - INFO - step: 39230 loss: 1.8713 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6548 +[titan] 2025-10-05 22:35:13,119 - root - INFO - lr: 5.0420e-06 gnorm: 1.29 [1 day, 0:01:01< 0:28:17] +[titan] 2025-10-05 22:35:23,922 - root - INFO - step: 39235 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.83 mfu: 42.55% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6983 +[titan] 2025-10-05 22:35:23,922 - root - INFO - lr: 5.0414e-06 gnorm: 1.26 [1 day, 0:01:12< 0:28:06] +[titan] 2025-10-05 22:35:34,763 - root - INFO - step: 39240 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 22:35:34,763 - root - INFO - lr: 5.0409e-06 gnorm: 1.28 [1 day, 0:01:22< 0:27:55] +[titan] 2025-10-05 22:35:45,607 - root - INFO - step: 39245 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 22:35:45,608 - root - INFO - lr: 5.0403e-06 gnorm: 1.28 [1 day, 0:01:33< 0:27:43] +[titan] 2025-10-05 22:35:54,249 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:35:56,420 - root - INFO - step: 39250 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 22:35:56,420 - root - INFO - lr: 5.0398e-06 gnorm: 1.22 [1 day, 0:01:44< 0:27:32] +[titan] 2025-10-05 22:36:07,280 - root - INFO - step: 39255 loss: 1.8902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6708 +[titan] 2025-10-05 22:36:07,280 - root - INFO - lr: 5.0393e-06 gnorm: 1.24 [1 day, 0:01:55< 0:27:21] +[titan] 2025-10-05 22:36:18,098 - root - INFO - step: 39260 loss: 1.9171 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:36:18,098 - root - INFO - lr: 5.0388e-06 gnorm: 1.26 [1 day, 0:02:06< 0:27:10] +[titan] 2025-10-05 22:36:28,912 - root - INFO - step: 39265 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6622 +[titan] 2025-10-05 22:36:28,912 - root - INFO - lr: 5.0382e-06 gnorm: 1.27 [1 day, 0:02:17< 0:26:59] +[titan] 2025-10-05 22:36:39,738 - root - INFO - step: 39270 loss: 1.8621 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6463 +[titan] 2025-10-05 22:36:39,739 - root - INFO - lr: 5.0377e-06 gnorm: 1.26 [1 day, 0:02:27< 0:26:48] +[titan] 2025-10-05 22:36:50,600 - root - INFO - step: 39275 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 22:36:50,600 - root - INFO - lr: 5.0372e-06 gnorm: 1.28 [1 day, 0:02:38< 0:26:37] +[titan] 2025-10-05 22:37:01,420 - root - INFO - step: 39280 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6663 +[titan] 2025-10-05 22:37:01,421 - root - INFO - lr: 5.0367e-06 gnorm: 1.20 [1 day, 0:02:49< 0:26:26] +[titan] 2025-10-05 22:37:12,238 - root - INFO - step: 39285 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 22:37:12,238 - root - INFO - lr: 5.0362e-06 gnorm: 1.27 [1 day, 0:03:00< 0:26:15] +[titan] 2025-10-05 22:37:23,040 - root - INFO - step: 39290 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 22:37:23,040 - root - INFO - lr: 5.0357e-06 gnorm: 1.26 [1 day, 0:03:11< 0:26:04] +[titan] 2025-10-05 22:37:33,840 - root - INFO - step: 39295 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:37:33,840 - root - INFO - lr: 5.0352e-06 gnorm: 1.36 [1 day, 0:03:22< 0:25:53] +[titan] 2025-10-05 22:37:42,487 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:37:44,699 - root - INFO - step: 39300 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6904 +[titan] 2025-10-05 22:37:44,699 - root - INFO - lr: 5.0347e-06 gnorm: 1.25 [1 day, 0:03:32< 0:25:42] +[titan] 2025-10-05 22:37:55,513 - root - INFO - step: 39305 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 22:37:55,513 - root - INFO - lr: 5.0342e-06 gnorm: 1.29 [1 day, 0:03:43< 0:25:31] +[titan] 2025-10-05 22:38:06,319 - root - INFO - step: 39310 loss: 1.8070 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2093 global_avg_mtp_loss: 1.5976 +[titan] 2025-10-05 22:38:06,319 - root - INFO - lr: 5.0337e-06 gnorm: 1.22 [1 day, 0:03:54< 0:25:20] +[titan] 2025-10-05 22:38:17,140 - root - INFO - step: 39315 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 22:38:17,140 - root - INFO - lr: 5.0332e-06 gnorm: 1.28 [1 day, 0:04:05< 0:25:09] +[titan] 2025-10-05 22:38:27,940 - root - INFO - step: 39320 loss: 1.8952 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:38:27,940 - root - INFO - lr: 5.0327e-06 gnorm: 1.24 [1 day, 0:04:16< 0:24:58] +[titan] 2025-10-05 22:38:38,794 - root - INFO - step: 39325 loss: 1.8206 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2106 global_avg_mtp_loss: 1.6101 +[titan] 2025-10-05 22:38:38,795 - root - INFO - lr: 5.0323e-06 gnorm: 1.23 [1 day, 0:04:26< 0:24:47] +[titan] 2025-10-05 22:38:49,643 - root - INFO - step: 39330 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7124 +[titan] 2025-10-05 22:38:49,643 - root - INFO - lr: 5.0318e-06 gnorm: 1.29 [1 day, 0:04:37< 0:24:36] +[titan] 2025-10-05 22:39:00,463 - root - INFO - step: 39335 loss: 1.9117 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:39:00,464 - root - INFO - lr: 5.0313e-06 gnorm: 1.25 [1 day, 0:04:48< 0:24:25] +[titan] 2025-10-05 22:39:11,289 - root - INFO - step: 39340 loss: 1.8200 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2105 global_avg_mtp_loss: 1.6095 +[titan] 2025-10-05 22:39:11,289 - root - INFO - lr: 5.0308e-06 gnorm: 1.26 [1 day, 0:04:59< 0:24:14] +[titan] 2025-10-05 22:39:22,114 - root - INFO - step: 39345 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:39:22,114 - root - INFO - lr: 5.0304e-06 gnorm: 1.27 [1 day, 0:05:10< 0:24:03] +[titan] 2025-10-05 22:39:30,839 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:39:33,015 - root - INFO - step: 39350 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 22:39:33,015 - root - INFO - lr: 5.0299e-06 gnorm: 1.24 [1 day, 0:05:21< 0:23:52] +[titan] 2025-10-05 22:39:43,880 - root - INFO - step: 39355 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 22:39:43,880 - root - INFO - lr: 5.0294e-06 gnorm: 1.21 [1 day, 0:05:32< 0:23:41] +[titan] 2025-10-05 22:39:54,779 - root - INFO - step: 39360 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2123 global_avg_mtp_loss: 1.6258 +[titan] 2025-10-05 22:39:54,779 - root - INFO - lr: 5.0290e-06 gnorm: 1.26 [1 day, 0:05:42< 0:23:30] +[titan] 2025-10-05 22:40:05,616 - root - INFO - step: 39365 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.42% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 22:40:05,617 - root - INFO - lr: 5.0285e-06 gnorm: 1.22 [1 day, 0:05:53< 0:23:19] +[titan] 2025-10-05 22:40:16,459 - root - INFO - step: 39370 loss: 1.8828 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:40:16,459 - root - INFO - lr: 5.0281e-06 gnorm: 1.23 [1 day, 0:06:04< 0:23:08] +[titan] 2025-10-05 22:40:27,280 - root - INFO - step: 39375 loss: 1.9073 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 22:40:27,280 - root - INFO - lr: 5.0277e-06 gnorm: 1.28 [1 day, 0:06:15< 0:22:57] +[titan] 2025-10-05 22:40:38,119 - root - INFO - step: 39380 loss: 1.9206 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:40:38,119 - root - INFO - lr: 5.0272e-06 gnorm: 1.23 [1 day, 0:06:26< 0:22:46] +[titan] 2025-10-05 22:40:49,118 - root - INFO - step: 39385 loss: 1.9186 memory: 118.84GiB(85.28%) tps: 29,794 tflops: 413.35 mfu: 41.79% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6970 +[titan] 2025-10-05 22:40:49,118 - root - INFO - lr: 5.0268e-06 gnorm: 1.25 [1 day, 0:06:37< 0:22:35] +[titan] 2025-10-05 22:40:59,990 - root - INFO - step: 39390 loss: 1.9410 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 22:40:59,990 - root - INFO - lr: 5.0263e-06 gnorm: 1.30 [1 day, 0:06:48< 0:22:24] +[titan] 2025-10-05 22:41:10,818 - root - INFO - step: 39395 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6520 +[titan] 2025-10-05 22:41:10,818 - root - INFO - lr: 5.0259e-06 gnorm: 1.23 [1 day, 0:06:58< 0:22:13] +[titan] 2025-10-05 22:41:19,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:41:21,663 - root - INFO - step: 39400 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:41:21,663 - root - INFO - lr: 5.0255e-06 gnorm: 1.24 [1 day, 0:07:09< 0:22:02] +[titan] 2025-10-05 22:41:32,499 - root - INFO - step: 39405 loss: 1.8950 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:41:32,500 - root - INFO - lr: 5.0251e-06 gnorm: 1.26 [1 day, 0:07:20< 0:21:51] +[titan] 2025-10-05 22:41:43,338 - root - INFO - step: 39410 loss: 1.9067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:41:43,339 - root - INFO - lr: 5.0246e-06 gnorm: 1.25 [1 day, 0:07:31< 0:21:40] +[titan] 2025-10-05 22:41:54,271 - root - INFO - step: 39415 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 22:41:54,271 - root - INFO - lr: 5.0242e-06 gnorm: 1.26 [1 day, 0:07:42< 0:21:29] +[titan] 2025-10-05 22:42:05,121 - root - INFO - step: 39420 loss: 1.8925 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6733 +[titan] 2025-10-05 22:42:05,121 - root - INFO - lr: 5.0238e-06 gnorm: 1.27 [1 day, 0:07:53< 0:21:18] +[titan] 2025-10-05 22:42:14,065 - root - INFO - Dumping profiler traces at step 39424 +[titan] 2025-10-05 22:42:14,104 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:42:16,301 - root - INFO - step: 39425 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 29,310 tflops: 406.63 mfu: 41.12% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 22:42:16,301 - root - INFO - lr: 5.0234e-06 gnorm: 1.27 [1 day, 0:08:04< 0:21:07] +[titan] 2025-10-05 22:42:27,154 - root - INFO - step: 39430 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 22:42:27,155 - root - INFO - lr: 5.0230e-06 gnorm: 1.25 [1 day, 0:08:15< 0:20:56] +[titan] 2025-10-05 22:42:37,960 - root - INFO - step: 39435 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 22:42:37,960 - root - INFO - lr: 5.0226e-06 gnorm: 1.27 [1 day, 0:08:26< 0:20:45] +[titan] 2025-10-05 22:42:48,874 - root - INFO - step: 39440 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:42:48,874 - root - INFO - lr: 5.0222e-06 gnorm: 1.24 [1 day, 0:08:37< 0:20:34] +[titan] 2025-10-05 22:42:59,759 - root - INFO - step: 39445 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 22:42:59,759 - root - INFO - lr: 5.0218e-06 gnorm: 1.26 [1 day, 0:08:47< 0:20:23] +[titan] 2025-10-05 22:43:08,434 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:43:10,640 - root - INFO - step: 39450 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 22:43:10,641 - root - INFO - lr: 5.0214e-06 gnorm: 1.28 [1 day, 0:08:58< 0:20:12] +[titan] 2025-10-05 22:43:21,477 - root - INFO - step: 39455 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:43:21,478 - root - INFO - lr: 5.0210e-06 gnorm: 1.32 [1 day, 0:09:09< 0:20:01] +[titan] 2025-10-05 22:43:32,319 - root - INFO - step: 39460 loss: 1.9474 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7223 +[titan] 2025-10-05 22:43:32,319 - root - INFO - lr: 5.0206e-06 gnorm: 1.30 [1 day, 0:09:20< 0:19:50] +[titan] 2025-10-05 22:43:43,178 - root - INFO - step: 39465 loss: 1.8880 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6701 +[titan] 2025-10-05 22:43:43,178 - root - INFO - lr: 5.0203e-06 gnorm: 1.28 [1 day, 0:09:31< 0:19:39] +[titan] 2025-10-05 22:43:54,102 - root - INFO - step: 39470 loss: 1.8901 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6714 +[titan] 2025-10-05 22:43:54,102 - root - INFO - lr: 5.0199e-06 gnorm: 1.25 [1 day, 0:09:42< 0:19:27] +[titan] 2025-10-05 22:44:04,938 - root - INFO - step: 39475 loss: 1.8656 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6497 +[titan] 2025-10-05 22:44:04,938 - root - INFO - lr: 5.0195e-06 gnorm: 1.23 [1 day, 0:09:53< 0:19:16] +[titan] 2025-10-05 22:44:15,757 - root - INFO - step: 39480 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 22:44:15,757 - root - INFO - lr: 5.0191e-06 gnorm: 1.27 [1 day, 0:10:03< 0:19:05] +[titan] 2025-10-05 22:44:26,640 - root - INFO - step: 39485 loss: 1.8523 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6375 +[titan] 2025-10-05 22:44:26,640 - root - INFO - lr: 5.0188e-06 gnorm: 1.23 [1 day, 0:10:14< 0:18:54] +[titan] 2025-10-05 22:44:37,455 - root - INFO - step: 39490 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6979 +[titan] 2025-10-05 22:44:37,455 - root - INFO - lr: 5.0184e-06 gnorm: 1.26 [1 day, 0:10:25< 0:18:43] +[titan] 2025-10-05 22:44:48,278 - root - INFO - step: 39495 loss: 1.8271 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2112 global_avg_mtp_loss: 1.6159 +[titan] 2025-10-05 22:44:48,278 - root - INFO - lr: 5.0181e-06 gnorm: 1.24 [1 day, 0:10:36< 0:18:32] +[titan] 2025-10-05 22:44:57,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:44:59,302 - root - INFO - step: 39500 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 29,726 tflops: 412.41 mfu: 41.70% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:44:59,302 - root - INFO - lr: 5.0177e-06 gnorm: 1.26 [1 day, 0:10:47< 0:18:21] +[titan] 2025-10-05 22:45:10,138 - root - INFO - step: 39505 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:45:10,138 - root - INFO - lr: 5.0173e-06 gnorm: 1.25 [1 day, 0:10:58< 0:18:10] +[titan] 2025-10-05 22:45:20,973 - root - INFO - step: 39510 loss: 1.9394 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 22:45:20,973 - root - INFO - lr: 5.0170e-06 gnorm: 1.26 [1 day, 0:11:09< 0:17:59] +[titan] 2025-10-05 22:45:31,857 - root - INFO - step: 39515 loss: 1.8345 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6215 +[titan] 2025-10-05 22:45:31,857 - root - INFO - lr: 5.0167e-06 gnorm: 1.32 [1 day, 0:11:20< 0:17:48] +[titan] 2025-10-05 22:45:42,715 - root - INFO - step: 39520 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 22:45:42,715 - root - INFO - lr: 5.0163e-06 gnorm: 1.30 [1 day, 0:11:30< 0:17:37] +[titan] 2025-10-05 22:45:53,618 - root - INFO - step: 39525 loss: 1.8642 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 22:45:53,618 - root - INFO - lr: 5.0160e-06 gnorm: 1.23 [1 day, 0:11:41< 0:17:26] +[titan] 2025-10-05 22:46:04,478 - root - INFO - step: 39530 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:46:04,478 - root - INFO - lr: 5.0156e-06 gnorm: 1.28 [1 day, 0:11:52< 0:17:15] +[titan] 2025-10-05 22:46:15,353 - root - INFO - step: 39535 loss: 1.8455 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6312 +[titan] 2025-10-05 22:46:15,354 - root - INFO - lr: 5.0153e-06 gnorm: 1.27 [1 day, 0:12:03< 0:17:04] +[titan] 2025-10-05 22:46:26,197 - root - INFO - step: 39540 loss: 1.8853 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 22:46:26,197 - root - INFO - lr: 5.0150e-06 gnorm: 1.25 [1 day, 0:12:14< 0:16:53] +[titan] 2025-10-05 22:46:37,052 - root - INFO - step: 39545 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 22:46:37,052 - root - INFO - lr: 5.0147e-06 gnorm: 1.30 [1 day, 0:12:25< 0:16:42] +[titan] 2025-10-05 22:46:45,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:46:47,965 - root - INFO - step: 39550 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 22:46:47,966 - root - INFO - lr: 5.0143e-06 gnorm: 1.34 [1 day, 0:12:36< 0:16:31] +[titan] 2025-10-05 22:46:58,895 - root - INFO - step: 39555 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6838 +[titan] 2025-10-05 22:46:58,895 - root - INFO - lr: 5.0140e-06 gnorm: 1.24 [1 day, 0:12:47< 0:16:20] +[titan] 2025-10-05 22:47:09,746 - root - INFO - step: 39560 loss: 1.9366 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7128 +[titan] 2025-10-05 22:47:09,746 - root - INFO - lr: 5.0137e-06 gnorm: 1.22 [1 day, 0:12:57< 0:16:09] +[titan] 2025-10-05 22:47:20,590 - root - INFO - step: 39565 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 22:47:20,590 - root - INFO - lr: 5.0134e-06 gnorm: 1.26 [1 day, 0:13:08< 0:15:58] +[titan] 2025-10-05 22:47:31,450 - root - INFO - step: 39570 loss: 1.8471 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6330 +[titan] 2025-10-05 22:47:31,450 - root - INFO - lr: 5.0131e-06 gnorm: 1.24 [1 day, 0:13:19< 0:15:47] +[titan] 2025-10-05 22:47:42,294 - root - INFO - step: 39575 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:47:42,294 - root - INFO - lr: 5.0128e-06 gnorm: 1.26 [1 day, 0:13:30< 0:15:36] +[titan] 2025-10-05 22:47:53,185 - root - INFO - step: 39580 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 22:47:53,186 - root - INFO - lr: 5.0125e-06 gnorm: 1.24 [1 day, 0:13:41< 0:15:25] +[titan] 2025-10-05 22:48:04,125 - root - INFO - step: 39585 loss: 1.8977 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6775 +[titan] 2025-10-05 22:48:04,125 - root - INFO - lr: 5.0122e-06 gnorm: 1.23 [1 day, 0:13:52< 0:15:14] +[titan] 2025-10-05 22:48:14,970 - root - INFO - step: 39590 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7450 +[titan] 2025-10-05 22:48:14,970 - root - INFO - lr: 5.0119e-06 gnorm: 1.27 [1 day, 0:14:03< 0:15:03] +[titan] 2025-10-05 22:48:25,818 - root - INFO - step: 39595 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 22:48:25,818 - root - INFO - lr: 5.0116e-06 gnorm: 1.25 [1 day, 0:14:13< 0:14:52] +[titan] 2025-10-05 22:48:34,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:48:36,655 - root - INFO - step: 39600 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:48:36,655 - root - INFO - lr: 5.0113e-06 gnorm: 1.26 [1 day, 0:14:24< 0:14:41] +[titan] 2025-10-05 22:48:47,505 - root - INFO - step: 39605 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:48:47,505 - root - INFO - lr: 5.0110e-06 gnorm: 1.25 [1 day, 0:14:35< 0:14:30] +[titan] 2025-10-05 22:48:58,396 - root - INFO - step: 39610 loss: 1.9266 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7045 +[titan] 2025-10-05 22:48:58,396 - root - INFO - lr: 5.0108e-06 gnorm: 1.27 [1 day, 0:14:46< 0:14:19] +[titan] 2025-10-05 22:49:09,249 - root - INFO - step: 39615 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:49:09,249 - root - INFO - lr: 5.0105e-06 gnorm: 1.28 [1 day, 0:14:57< 0:14:08] +[titan] 2025-10-05 22:49:20,110 - root - INFO - step: 39620 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 22:49:20,111 - root - INFO - lr: 5.0102e-06 gnorm: 1.26 [1 day, 0:15:08< 0:13:57] +[titan] 2025-10-05 22:49:30,958 - root - INFO - step: 39625 loss: 1.9163 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 22:49:30,958 - root - INFO - lr: 5.0100e-06 gnorm: 1.28 [1 day, 0:15:19< 0:13:46] +[titan] 2025-10-05 22:49:41,804 - root - INFO - step: 39630 loss: 1.8829 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:49:41,804 - root - INFO - lr: 5.0097e-06 gnorm: 1.26 [1 day, 0:15:29< 0:13:35] +[titan] 2025-10-05 22:49:52,655 - root - INFO - step: 39635 loss: 1.8627 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6476 +[titan] 2025-10-05 22:49:52,655 - root - INFO - lr: 5.0094e-06 gnorm: 1.24 [1 day, 0:15:40< 0:13:24] +[titan] 2025-10-05 22:50:03,525 - root - INFO - step: 39640 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 22:50:03,526 - root - INFO - lr: 5.0092e-06 gnorm: 1.26 [1 day, 0:15:51< 0:13:13] +[titan] 2025-10-05 22:50:14,446 - root - INFO - step: 39645 loss: 1.8260 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2114 global_avg_mtp_loss: 1.6146 +[titan] 2025-10-05 22:50:14,446 - root - INFO - lr: 5.0089e-06 gnorm: 1.27 [1 day, 0:16:02< 0:13:02] +[titan] 2025-10-05 22:50:23,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:50:25,308 - root - INFO - step: 39650 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7039 +[titan] 2025-10-05 22:50:25,308 - root - INFO - lr: 5.0087e-06 gnorm: 1.28 [1 day, 0:16:13< 0:12:51] +[titan] 2025-10-05 22:50:36,173 - root - INFO - step: 39655 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:50:36,173 - root - INFO - lr: 5.0084e-06 gnorm: 1.26 [1 day, 0:16:24< 0:12:40] +[titan] 2025-10-05 22:50:47,037 - root - INFO - step: 39660 loss: 1.9006 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:50:47,037 - root - INFO - lr: 5.0082e-06 gnorm: 1.27 [1 day, 0:16:35< 0:12:29] +[titan] 2025-10-05 22:50:57,913 - root - INFO - step: 39665 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 22:50:57,913 - root - INFO - lr: 5.0079e-06 gnorm: 1.29 [1 day, 0:16:46< 0:12:18] +[titan] 2025-10-05 22:51:08,776 - root - INFO - step: 39670 loss: 1.8655 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 22:51:08,776 - root - INFO - lr: 5.0077e-06 gnorm: 1.30 [1 day, 0:16:56< 0:12:07] +[titan] 2025-10-05 22:51:19,670 - root - INFO - step: 39675 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:51:19,670 - root - INFO - lr: 5.0075e-06 gnorm: 1.25 [1 day, 0:17:07< 0:11:56] +[titan] 2025-10-05 22:51:30,542 - root - INFO - step: 39680 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 22:51:30,542 - root - INFO - lr: 5.0072e-06 gnorm: 1.27 [1 day, 0:17:18< 0:11:45] +[titan] 2025-10-05 22:51:41,391 - root - INFO - step: 39685 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:51:41,391 - root - INFO - lr: 5.0070e-06 gnorm: 1.27 [1 day, 0:17:29< 0:11:34] +[titan] 2025-10-05 22:51:52,276 - root - INFO - step: 39690 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:51:52,276 - root - INFO - lr: 5.0068e-06 gnorm: 1.25 [1 day, 0:17:40< 0:11:23] +[titan] 2025-10-05 22:52:03,144 - root - INFO - step: 39695 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 22:52:03,144 - root - INFO - lr: 5.0066e-06 gnorm: 1.27 [1 day, 0:17:51< 0:11:12] +[titan] 2025-10-05 22:52:11,813 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:52:13,994 - root - INFO - step: 39700 loss: 1.9249 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 22:52:13,994 - root - INFO - lr: 5.0064e-06 gnorm: 1.23 [1 day, 0:18:02< 0:11:01] +[titan] 2025-10-05 22:52:24,851 - root - INFO - step: 39705 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6876 +[titan] 2025-10-05 22:52:24,852 - root - INFO - lr: 5.0062e-06 gnorm: 1.27 [1 day, 0:18:12< 0:10:50] +[titan] 2025-10-05 22:52:35,763 - root - INFO - step: 39710 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6727 +[titan] 2025-10-05 22:52:35,763 - root - INFO - lr: 5.0060e-06 gnorm: 1.30 [1 day, 0:18:23< 0:10:39] +[titan] 2025-10-05 22:52:46,625 - root - INFO - step: 39715 loss: 1.8269 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2119 global_avg_mtp_loss: 1.6151 +[titan] 2025-10-05 22:52:46,625 - root - INFO - lr: 5.0058e-06 gnorm: 1.25 [1 day, 0:18:34< 0:10:28] +[titan] 2025-10-05 22:52:57,517 - root - INFO - step: 39720 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:52:57,517 - root - INFO - lr: 5.0056e-06 gnorm: 1.26 [1 day, 0:18:45< 0:10:16] +[titan] 2025-10-05 22:53:08,394 - root - INFO - step: 39725 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 22:53:08,395 - root - INFO - lr: 5.0054e-06 gnorm: 1.27 [1 day, 0:18:56< 0:10:05] +[titan] 2025-10-05 22:53:19,248 - root - INFO - step: 39730 loss: 1.8733 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:53:19,248 - root - INFO - lr: 5.0052e-06 gnorm: 1.26 [1 day, 0:19:07< 0:09:54] +[titan] 2025-10-05 22:53:30,094 - root - INFO - step: 39735 loss: 1.8701 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:53:30,094 - root - INFO - lr: 5.0050e-06 gnorm: 1.25 [1 day, 0:19:18< 0:09:43] +[titan] 2025-10-05 22:53:40,977 - root - INFO - step: 39740 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 22:53:40,977 - root - INFO - lr: 5.0048e-06 gnorm: 1.28 [1 day, 0:19:29< 0:09:32] +[titan] 2025-10-05 22:53:51,816 - root - INFO - step: 39745 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 22:53:51,816 - root - INFO - lr: 5.0046e-06 gnorm: 1.30 [1 day, 0:19:39< 0:09:21] +[titan] 2025-10-05 22:54:00,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:54:02,694 - root - INFO - step: 39750 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 22:54:02,694 - root - INFO - lr: 5.0044e-06 gnorm: 1.25 [1 day, 0:19:50< 0:09:10] +[titan] 2025-10-05 22:54:13,563 - root - INFO - step: 39755 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 22:54:13,563 - root - INFO - lr: 5.0042e-06 gnorm: 1.31 [1 day, 0:20:01< 0:08:59] +[titan] 2025-10-05 22:54:24,438 - root - INFO - step: 39760 loss: 1.8623 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6459 +[titan] 2025-10-05 22:54:24,438 - root - INFO - lr: 5.0041e-06 gnorm: 1.23 [1 day, 0:20:12< 0:08:48] +[titan] 2025-10-05 22:54:35,297 - root - INFO - step: 39765 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:54:35,297 - root - INFO - lr: 5.0039e-06 gnorm: 1.29 [1 day, 0:20:23< 0:08:37] +[titan] 2025-10-05 22:54:46,209 - root - INFO - step: 39770 loss: 1.8709 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6539 +[titan] 2025-10-05 22:54:46,209 - root - INFO - lr: 5.0037e-06 gnorm: 1.26 [1 day, 0:20:34< 0:08:26] +[titan] 2025-10-05 22:54:57,101 - root - INFO - step: 39775 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 22:54:57,101 - root - INFO - lr: 5.0036e-06 gnorm: 1.27 [1 day, 0:20:45< 0:08:15] +[titan] 2025-10-05 22:55:08,022 - root - INFO - step: 39780 loss: 1.9966 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7659 +[titan] 2025-10-05 22:55:08,023 - root - INFO - lr: 5.0034e-06 gnorm: 1.31 [1 day, 0:20:56< 0:08:04] +[titan] 2025-10-05 22:55:18,865 - root - INFO - step: 39785 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:55:18,865 - root - INFO - lr: 5.0033e-06 gnorm: 1.29 [1 day, 0:21:06< 0:07:53] +[titan] 2025-10-05 22:55:29,721 - root - INFO - step: 39790 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:55:29,721 - root - INFO - lr: 5.0031e-06 gnorm: 1.28 [1 day, 0:21:17< 0:07:42] +[titan] 2025-10-05 22:55:40,588 - root - INFO - step: 39795 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 22:55:40,588 - root - INFO - lr: 5.0030e-06 gnorm: 1.25 [1 day, 0:21:28< 0:07:31] +[titan] 2025-10-05 22:55:49,265 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:55:51,443 - root - INFO - step: 39800 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:55:51,443 - root - INFO - lr: 5.0028e-06 gnorm: 1.25 [1 day, 0:21:39< 0:07:20] +[titan] 2025-10-05 22:56:02,382 - root - INFO - step: 39805 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6609 +[titan] 2025-10-05 22:56:02,382 - root - INFO - lr: 5.0027e-06 gnorm: 1.21 [1 day, 0:21:50< 0:07:09] +[titan] 2025-10-05 22:56:13,242 - root - INFO - step: 39810 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:56:13,242 - root - INFO - lr: 5.0026e-06 gnorm: 1.28 [1 day, 0:22:01< 0:06:58] +[titan] 2025-10-05 22:56:24,068 - root - INFO - step: 39815 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6926 +[titan] 2025-10-05 22:56:24,068 - root - INFO - lr: 5.0024e-06 gnorm: 1.28 [1 day, 0:22:12< 0:06:47] +[titan] 2025-10-05 22:56:34,883 - root - INFO - step: 39820 loss: 1.8589 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6448 +[titan] 2025-10-05 22:56:34,883 - root - INFO - lr: 5.0023e-06 gnorm: 1.23 [1 day, 0:22:22< 0:06:36] +[titan] 2025-10-05 22:56:45,691 - root - INFO - step: 39825 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6254 +[titan] 2025-10-05 22:56:45,691 - root - INFO - lr: 5.0022e-06 gnorm: 1.24 [1 day, 0:22:33< 0:06:25] +[titan] 2025-10-05 22:56:56,537 - root - INFO - step: 39830 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:56:56,537 - root - INFO - lr: 5.0020e-06 gnorm: 1.29 [1 day, 0:22:44< 0:06:14] +[titan] 2025-10-05 22:57:07,418 - root - INFO - step: 39835 loss: 1.8289 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2110 global_avg_mtp_loss: 1.6179 +[titan] 2025-10-05 22:57:07,419 - root - INFO - lr: 5.0019e-06 gnorm: 1.25 [1 day, 0:22:55< 0:06:03] +[titan] 2025-10-05 22:57:18,260 - root - INFO - step: 39840 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6716 +[titan] 2025-10-05 22:57:18,260 - root - INFO - lr: 5.0018e-06 gnorm: 1.28 [1 day, 0:23:06< 0:05:52] +[titan] 2025-10-05 22:57:29,092 - root - INFO - step: 39845 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:57:29,092 - root - INFO - lr: 5.0017e-06 gnorm: 1.31 [1 day, 0:23:17< 0:05:41] +[titan] 2025-10-05 22:57:37,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:57:39,898 - root - INFO - step: 39850 loss: 1.8816 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.69 mfu: 42.54% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 22:57:39,899 - root - INFO - lr: 5.0016e-06 gnorm: 1.24 [1 day, 0:23:28< 0:05:30] +[titan] 2025-10-05 22:57:50,741 - root - INFO - step: 39855 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 22:57:50,741 - root - INFO - lr: 5.0015e-06 gnorm: 1.32 [1 day, 0:23:38< 0:05:19] +[titan] 2025-10-05 22:58:01,598 - root - INFO - step: 39860 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 22:58:01,598 - root - INFO - lr: 5.0014e-06 gnorm: 1.33 [1 day, 0:23:49< 0:05:08] +[titan] 2025-10-05 22:58:12,433 - root - INFO - step: 39865 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6621 +[titan] 2025-10-05 22:58:12,433 - root - INFO - lr: 5.0013e-06 gnorm: 1.27 [1 day, 0:24:00< 0:04:57] +[titan] 2025-10-05 22:58:23,320 - root - INFO - step: 39870 loss: 1.8085 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2094 global_avg_mtp_loss: 1.5991 +[titan] 2025-10-05 22:58:23,320 - root - INFO - lr: 5.0012e-06 gnorm: 1.27 [1 day, 0:24:11< 0:04:46] +[titan] 2025-10-05 22:58:34,151 - root - INFO - step: 39875 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7171 +[titan] 2025-10-05 22:58:34,151 - root - INFO - lr: 5.0011e-06 gnorm: 1.29 [1 day, 0:24:22< 0:04:35] +[titan] 2025-10-05 22:58:44,982 - root - INFO - step: 39880 loss: 1.8617 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2163 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 22:58:44,982 - root - INFO - lr: 5.0010e-06 gnorm: 1.24 [1 day, 0:24:33< 0:04:24] +[titan] 2025-10-05 22:58:55,801 - root - INFO - step: 39885 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:58:55,801 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:43< 0:04:13] +[titan] 2025-10-05 22:59:06,655 - root - INFO - step: 39890 loss: 1.8466 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 22:59:06,655 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:54< 0:04:02] +[titan] 2025-10-05 22:59:17,499 - root - INFO - step: 39895 loss: 1.9303 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 22:59:17,499 - root - INFO - lr: 5.0008e-06 gnorm: 1.27 [1 day, 0:25:05< 0:03:51] +[titan] 2025-10-05 22:59:26,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:59:28,385 - root - INFO - step: 39900 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:59:28,385 - root - INFO - lr: 5.0007e-06 gnorm: 1.29 [1 day, 0:25:16< 0:03:40] +[titan] 2025-10-05 22:59:39,223 - root - INFO - step: 39905 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6730 +[titan] 2025-10-05 22:59:39,223 - root - INFO - lr: 5.0006e-06 gnorm: 1.25 [1 day, 0:25:27< 0:03:29] +[titan] 2025-10-05 22:59:50,050 - root - INFO - step: 39910 loss: 1.9026 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6824 +[titan] 2025-10-05 22:59:50,051 - root - INFO - lr: 5.0006e-06 gnorm: 1.33 [1 day, 0:25:38< 0:03:18] +[titan] 2025-10-05 23:00:00,881 - root - INFO - step: 39915 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 23:00:00,881 - root - INFO - lr: 5.0005e-06 gnorm: 1.25 [1 day, 0:25:48< 0:03:07] +[titan] 2025-10-05 23:00:11,722 - root - INFO - step: 39920 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 23:00:11,722 - root - INFO - lr: 5.0005e-06 gnorm: 1.23 [1 day, 0:25:59< 0:02:56] +[titan] 2025-10-05 23:00:22,583 - root - INFO - step: 39925 loss: 1.8682 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 23:00:22,583 - root - INFO - lr: 5.0004e-06 gnorm: 1.24 [1 day, 0:26:10< 0:02:45] +[titan] 2025-10-05 23:00:33,459 - root - INFO - step: 39930 loss: 1.8937 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 23:00:33,459 - root - INFO - lr: 5.0003e-06 gnorm: 1.28 [1 day, 0:26:21< 0:02:34] +[titan] 2025-10-05 23:00:44,397 - root - INFO - step: 39935 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 23:00:44,397 - root - INFO - lr: 5.0003e-06 gnorm: 1.37 [1 day, 0:26:32< 0:02:23] +[titan] 2025-10-05 23:00:46,759 - root - INFO - Dumping profiler traces at step 39936 +[titan] 2025-10-05 23:00:46,798 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 23:00:55,477 - root - INFO - step: 39940 loss: 1.9007 memory: 118.84GiB(85.28%) tps: 29,576 tflops: 410.32 mfu: 41.49% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 23:00:55,477 - root - INFO - lr: 5.0003e-06 gnorm: 1.22 [1 day, 0:26:43< 0:02:12] +[titan] 2025-10-05 23:01:06,304 - root - INFO - step: 39945 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 23:01:06,305 - root - INFO - lr: 5.0002e-06 gnorm: 1.26 [1 day, 0:26:54< 0:02:01] +[titan] 2025-10-05 23:01:14,966 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:01:17,145 - root - INFO - step: 39950 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6639 +[titan] 2025-10-05 23:01:17,145 - root - INFO - lr: 5.0002e-06 gnorm: 1.25 [1 day, 0:27:05< 0:01:50] +[titan] 2025-10-05 23:01:28,000 - root - INFO - step: 39955 loss: 1.8456 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2134 global_avg_mtp_loss: 1.6322 +[titan] 2025-10-05 23:01:28,000 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:16< 0:01:39] +[titan] 2025-10-05 23:01:38,823 - root - INFO - step: 39960 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 23:01:38,823 - root - INFO - lr: 5.0001e-06 gnorm: 1.26 [1 day, 0:27:26< 0:01:28] +[titan] 2025-10-05 23:01:49,702 - root - INFO - step: 39965 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6498 +[titan] 2025-10-05 23:01:49,702 - root - INFO - lr: 5.0001e-06 gnorm: 1.29 [1 day, 0:27:37< 0:01:17] +[titan] 2025-10-05 23:02:00,536 - root - INFO - step: 39970 loss: 1.8845 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 23:02:00,536 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:48< 0:01:06] +[titan] 2025-10-05 23:02:11,385 - root - INFO - step: 39975 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6632 +[titan] 2025-10-05 23:02:11,385 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:27:59< 0:00:55] +[titan] 2025-10-05 23:02:22,224 - root - INFO - step: 39980 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 23:02:22,224 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:10< 0:00:44] +[titan] 2025-10-05 23:02:33,062 - root - INFO - step: 39985 loss: 1.8577 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6423 +[titan] 2025-10-05 23:02:33,062 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:21< 0:00:33] +[titan] 2025-10-05 23:02:43,924 - root - INFO - step: 39990 loss: 1.9469 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 23:02:43,924 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:28:32< 0:00:22] +[titan] 2025-10-05 23:02:54,801 - root - INFO - step: 39995 loss: 1.8720 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6560 +[titan] 2025-10-05 23:02:54,801 - root - INFO - lr: 5.0000e-06 gnorm: 1.27 [1 day, 0:28:42< 0:00:11] +[titan] 2025-10-05 23:03:03,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:03:05,651 - root - INFO - step: 40000 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6752 +[titan] 2025-10-05 23:03:05,651 - root - INFO - lr: 5.0000e-06 gnorm: 1.24 [1 day, 0:28:53< 0:00:00] +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving a full checkpoint at last step, step 40000. +[titan] 2025-10-05 23:03:23,679 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 23:03:23,679 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 18.03 seconds. +[titan] 2025-10-05 23:03:23,679 - root - INFO - Training completed diff --git a/logs/none_99omtdbz/attempt_0/6/stderr.log b/logs/none_99omtdbz/attempt_0/6/stderr.log new file mode 100644 index 0000000000000000000000000000000000000000..626e9ac576e2ba2e22065d56ea3b44c01114659f --- /dev/null +++ b/logs/none_99omtdbz/attempt_0/6/stderr.log @@ -0,0 +1,17257 @@ +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc +wandb: Currently logged in as: zaydzuhri to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured. +[titan] 2025-10-04 22:33:17,549 - root - INFO - Starting job: default job +[titan] 2025-10-04 22:33:17,549 - root - INFO - { + "activation_checkpoint": { + "mode": "none", + "selective_ac_option": "2" + }, + "activation_offload": { + "mode": "none" + }, + "checkpoint": { + "async_mode": "disabled", + "convert_to_hf_on_save": false, + "create_seed_checkpoint": false, + "enable_checkpoint": true, + "exclude_from_loading": [], + "export_dtype": "float32", + "folder": "checkpoint", + "hf_repo_base_name": "zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000", + "hf_upload_enabled": true, + "hf_upload_format": "dcp", + "interval": 5000, + "interval_type": "steps", + "keep_latest_k": 0, + "load_step": -1, + "model_weights_only": false + }, + "comm": { + "init_timeout_seconds": 6000, + "trace_buf_size": 20000, + "train_timeout_seconds": 6000 + }, + "experimental": { + "context_parallel_degree": 1, + "context_parallel_rotate_method": "allgather", + "custom_model_path": "", + "enable_async_tensor_parallel": false, + "enable_compiled_autograd": false, + "pipeline_parallel_degree": 1, + "pipeline_parallel_microbatches": null, + "pipeline_parallel_schedule": "1F1B", + "pipeline_parallel_schedule_csv": "", + "pipeline_parallel_split_points": [] + }, + "fault_tolerance": { + "enable": false, + "group_size": 0, + "min_replica_size": 1, + "replica_id": 0 + }, + "float8": { + "enable_fsdp_float8_all_gather": false, + "force_recompute_fp8_weight_in_bwd": false, + "precompute_float8_dynamic_scale_for_fsdp": false, + "recipe_name": null + }, + "job": { + "config_file": "flame/models/fla.toml", + "description": "default job", + "dump_folder": "exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine", + "print_args": true, + "use_for_integration_test": false + }, + "lr_scheduler": { + "decay_ratio": null, + "decay_type": "cosine", + "lr_min": 0.1, + "warmup_steps": 400 + }, + "memory_estimation": { + "disable_fake_mode": false, + "enabled": false + }, + "metrics": { + "disable_color_printing": false, + "enable_tensorboard": false, + "enable_wandb": true, + "log_freq": 5, + "save_for_all_ranks": false, + "save_tb_folder": "tb" + }, + "model": { + "config": "configs/mtp_transformer_1B.json", + "converters": [], + "name": "fla", + "print_after_conversion": false, + "tokenizer_path": "fla-hub/transformer-1.3B-100B" + }, + "optimizer": { + "early_step_in_backward": false, + "eps": 1e-15, + "implementation": "fused", + "lr": 5e-05, + "name": "AdamW" + }, + "profiling": { + "enable_memory_snapshot": false, + "enable_profiling": true, + "profile_freq": 512, + "save_memory_snapshot_folder": "memory_snapshot", + "save_traces_folder": "profile_trace" + }, + "training": { + "batch_size": 16, + "compile": true, + "context_len": 4096, + "data_dir": null, + "data_files": null, + "data_parallel_replicate_degree": 1, + "data_parallel_shard_degree": -1, + "data_probs": null, + "dataset": "/root/.cache/zaydzuhri___open_math_instruct-2-text/default", + "dataset_name": "default", + "dataset_split": "train", + "deterministic": false, + "disable_loss_parallel": false, + "enable_cpu_offload": false, + "fsdp_reshard_after_forward": "default", + "gc_freq": 50, + "gradient_accumulation_steps": 1, + "max_norm": 1.0, + "mixed_precision_param": "bfloat16", + "mixed_precision_reduce": "float32", + "num_workers": 32, + "persistent_workers": false, + "pin_memory": false, + "prefetch_factor": 2, + "seed": 79, + "seq_len": 4096, + "skip_nan_inf": true, + "steps": 40000, + "streaming": false, + "tensor_parallel_degree": 1, + "varlen": false + } +} +[titan] 2025-10-04 22:33:17,549 - root - INFO - [GC] Initial GC collection. 0.00 seconds. +[titan] 2025-10-04 22:33:45,204 - root - INFO - Target Hugging Face repository for this run: zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000-20251004-223345 +[titan] 2025-10-04 22:33:45,204 - root - WARNING - ENV[TORCH_NCCL_ASYNC_ERROR_HANDLING] = 1 will be overridden to 3 based on job config +[titan] 2025-10-04 22:33:45,206 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:33:45,208 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:33:45,208 - root - INFO - Peak FLOPS used for computing MFU: 9.890e+14 +[titan] 2025-10-04 22:33:45,208 - root - INFO - Building 1-D device mesh with ['dp_shard'], [8] +[titan] 2025-10-04 22:33:45,936 - root - INFO - Loading tokenizer... +[titan] 2025-10-04 22:33:46,095 - root - INFO - LlamaTokenizerFast(name_or_path='fla-hub/transformer-1.3B-100B', vocab_size=32000, model_max_length=10000000000, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': ''}, clean_up_tokenization_spaces=False, added_tokens_decoder={ + 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), +} +) +[titan] 2025-10-04 22:33:46,095 - root - INFO - Loading dataset /root/.cache/zaydzuhri___open_math_instruct-2-text/default:default +`trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,095 - datasets.load - ERROR - `trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,888 - root - INFO - Dataset({ + features: ['text'], + num_rows: 21972791 +}) +[titan] 2025-10-04 22:33:46,889 - root - INFO - Shuffling the dataset with seed 79 +[titan] 2025-10-04 22:33:53,021 - root - INFO - Loading model config from configs/mtp_transformer_1B.json +[titan] 2025-10-04 22:33:53,023 - root - INFO - Building dataloader... +[titan] 2025-10-04 22:33:53,025 - root - INFO - Building model from the config +MTPTransformerConfig { + "bos_token_id": 1, + "elementwise_affine": true, + "eos_token_id": 2, + "fuse_cross_entropy": true, + "fuse_norm": true, + "fuse_swiglu": true, + "hidden_act": "swish", + "hidden_ratio": 4, + "hidden_size": 2048, + "initializer_range": 0.006, + "intermediate_size": null, + "max_position_embeddings": 8192, + "model_type": "mtp_transformer", + "n_future_tokens": 4, + "norm_eps": 1e-06, + "num_heads": 32, + "num_hidden_layers": 32, + "num_kv_heads": null, + "pad_token_id": 2, + "qk_norm": false, + "qkv_bias": false, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "transformers_version": "4.51.3", + "use_cache": true, + "use_custom_backward": false, + "vocab_size": 32000, + "window_size": null +} + +[titan] 2025-10-04 22:33:53,154 - root - INFO -  +MTPTransformerForCausalLM( + (model): MTPTransformerModel( + (embeddings): Embedding(32000, 2048, padding_idx=2) + (layers): ModuleList( + (0-27): 28 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (extra_heads): ModuleList( + (0-3): 4 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (norm): RMSNorm(2048, eps=1e-06) + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + (criterion): FusedLinearCrossEntropyLoss() +) + +[titan] 2025-10-04 22:33:53,181 - root - INFO - Compiling each block with torch.compile +[titan] 2025-10-04 22:33:53,181 - root - INFO - Compiling the embedding, norm, and lm_head layers with torch.compile +[titan] 2025-10-04 22:33:53,182 - root - INFO - Compiling the entire model with torch.compile +[titan] 2025-10-04 22:33:53,255 - root - INFO - Applied FSDP to the model +[titan] 2025-10-04 22:33:53,438 - root - INFO - CUDA memory usage for model: 0.84GiB(0.60%) +[titan] 2025-10-04 22:33:53,458 - root - INFO - Checkpointing active. Checkpoints will be loaded from and saved to exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/checkpoint +[titan] 2025-10-04 22:33:53,458 - root - INFO - Loading the checkpoint at step 0. +[titan] 2025-10-04 22:34:08,152 - root - INFO - [GC] GC collection for checkpoint loading. 0.65 seconds. +[titan] 2025-10-04 22:34:08,152 - root - INFO - Finished loading the checkpoint in 14.69 seconds. +[titan] 2025-10-04 22:34:08,153 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:34:08,154 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:34:09,717 - root - INFO - ***** Running training ***** +[titan] 2025-10-04 22:34:09,718 - root - INFO -  Training starts at step 1 +[titan] 2025-10-04 22:34:09,723 - root - INFO -  Number of tokens per sequence = 4,096 +[titan] 2025-10-04 22:34:09,723 - root - INFO -  Gradient Accumulation steps = 1 +[titan] 2025-10-04 22:34:09,723 - root - INFO -  Instantaneous batch size (per device) = 16 +[titan] 2025-10-04 22:34:09,724 - root - INFO -  Global batch size (w. parallel, distributed & accumulation) = 128 (524,288 tokens) +[titan] 2025-10-04 22:34:09,725 - root - INFO -  Total optimization steps = 40,000 (20,971,520,000 tokens) +[titan] 2025-10-04 22:34:09,732 - root - INFO -  Warmup steps = 400 (209,715,200 tokens) +[titan] 2025-10-04 22:34:09,733 - root - INFO -  Number of parameters = 1,775,372,288  +[titan] 2025-10-04 22:34:09,733 - root - INFO - Profiling active. Traces will be saved at exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace +[titan] 2025-10-04 22:34:47,724 - root - INFO - step: 1 loss: 12.0105 memory: 116.89GiB(83.88%) tps: 1,656 tflops: 22.98 mfu: 2.32% global_avg_ntp_loss: 2.1249 global_avg_mtp_loss: 9.8856 +[titan] 2025-10-04 22:34:47,724 - root - INFO - lr: 2.4938e-07 gnorm: 20.89 [ 0:00:39<18 days, 7:38:54] +[titan] 2025-10-04 22:34:47,724 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-04 22:34:53,086 - root - INFO - [GC] GC collection invoked by checkpointer. 0.19 seconds. +[titan] 2025-10-04 22:34:53,086 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 5.36 seconds. +[titan] 2025-10-04 22:34:53,087 - root - INFO - Synchronizing and adjusting timeout for all ProcessGroups to 1:40:00 +[titan] 2025-10-04 22:36:58,991 - root - INFO - step: 5 loss: 11.7564 memory: 118.84GiB(85.28%) tps: 1,997 tflops: 27.71 mfu: 2.80% global_avg_ntp_loss: 2.0697 global_avg_mtp_loss: 9.6867 +[titan] 2025-10-04 22:36:58,992 - root - INFO - lr: 7.4813e-07 gnorm: 19.96 [ 0:02:50<15 days, 19:35:19] +[titan] 2025-10-04 22:37:09,851 - root - INFO - step: 10 loss: 11.2335 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 1.9192 global_avg_mtp_loss: 9.3143 +[titan] 2025-10-04 22:37:09,851 - root - INFO - lr: 1.3716e-06 gnorm: 18.16 [ 0:03:01<8 days, 9:50:00] +[titan] 2025-10-04 22:37:20,642 - root - INFO - step: 15 loss: 10.8309 memory: 118.84GiB(85.28%) tps: 30,368 tflops: 421.30 mfu: 42.60% global_avg_ntp_loss: 1.7960 global_avg_mtp_loss: 9.0349 +[titan] 2025-10-04 22:37:20,642 - root - INFO - lr: 1.9950e-06 gnorm: 10.62 [ 0:03:12<5 days, 22:31:43] +[titan] 2025-10-04 22:37:31,508 - root - INFO - step: 20 loss: 10.3172 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 1.6641 global_avg_mtp_loss: 8.6531 +[titan] 2025-10-04 22:37:31,508 - root - INFO - lr: 2.6185e-06 gnorm: 8.22 [ 0:03:23<4 days, 16:54:58] +[titan] 2025-10-04 22:37:42,328 - root - INFO - step: 25 loss: 9.9294 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 1.5801 global_avg_mtp_loss: 8.3492 +[titan] 2025-10-04 22:37:42,328 - root - INFO - lr: 3.2419e-06 gnorm: 7.10 [ 0:03:34<3 days, 23:07:39] +[titan] 2025-10-04 22:37:53,161 - root - INFO - step: 30 loss: 9.5763 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 1.4997 global_avg_mtp_loss: 8.0766 +[titan] 2025-10-04 22:37:53,161 - root - INFO - lr: 3.8653e-06 gnorm: 6.23 [ 0:03:45<3 days, 11:16:19] +[titan] 2025-10-04 22:38:04,056 - root - INFO - step: 35 loss: 9.3711 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 1.4603 global_avg_mtp_loss: 7.9108 +[titan] 2025-10-04 22:38:04,056 - root - INFO - lr: 4.4888e-06 gnorm: 6.20 [ 0:03:55<3 days, 2:49:21] +[titan] 2025-10-04 22:38:14,933 - root - INFO - step: 40 loss: 9.0179 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 1.3853 global_avg_mtp_loss: 7.6325 +[titan] 2025-10-04 22:38:14,933 - root - INFO - lr: 5.1122e-06 gnorm: 5.60 [ 0:04:06<2 days, 20:28:47] +[titan] 2025-10-04 22:38:25,789 - root - INFO - step: 45 loss: 8.7524 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 1.3406 global_avg_mtp_loss: 7.4118 +[titan] 2025-10-04 22:38:25,789 - root - INFO - lr: 5.7357e-06 gnorm: 5.43 [ 0:04:17<2 days, 15:32:26] +[titan] 2025-10-04 22:38:34,543 - root - INFO - [GC] Peforming periodical GC collection. 0.04 seconds. +[titan] 2025-10-04 22:38:36,745 - root - INFO - step: 50 loss: 8.5439 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.94 mfu: 41.96% global_avg_ntp_loss: 1.3050 global_avg_mtp_loss: 7.2389 +[titan] 2025-10-04 22:38:36,746 - root - INFO - lr: 6.3591e-06 gnorm: 5.74 [ 0:04:28<2 days, 11:36:39] +[titan] 2025-10-04 22:38:47,618 - root - INFO - step: 55 loss: 8.3158 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 1.2609 global_avg_mtp_loss: 7.0549 +[titan] 2025-10-04 22:38:47,619 - root - INFO - lr: 6.9825e-06 gnorm: 5.52 [ 0:04:39<2 days, 8:22:42] +[titan] 2025-10-04 22:38:58,482 - root - INFO - step: 60 loss: 8.2006 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 1.2373 global_avg_mtp_loss: 6.9633 +[titan] 2025-10-04 22:38:58,482 - root - INFO - lr: 7.6060e-06 gnorm: 5.72 [ 0:04:50<2 days, 5:40:57] +[titan] 2025-10-04 22:39:09,360 - root - INFO - step: 65 loss: 8.1393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 1.2182 global_avg_mtp_loss: 6.9211 +[titan] 2025-10-04 22:39:09,360 - root - INFO - lr: 8.2294e-06 gnorm: 5.66 [ 0:05:01<2 days, 3:24:11] +[titan] 2025-10-04 22:39:20,248 - root - INFO - step: 70 loss: 7.7608 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 1.1495 global_avg_mtp_loss: 6.6112 +[titan] 2025-10-04 22:39:20,248 - root - INFO - lr: 8.8529e-06 gnorm: 5.54 [ 0:05:12<2 days, 1:27:02] +[titan] 2025-10-04 22:39:31,185 - root - INFO - step: 75 loss: 7.6862 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 1.1395 global_avg_mtp_loss: 6.5467 +[titan] 2025-10-04 22:39:31,185 - root - INFO - lr: 9.4763e-06 gnorm: 6.04 [ 0:05:23<1 day, 23:45:55] +[titan] 2025-10-04 22:39:42,063 - root - INFO - step: 80 loss: 7.4352 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.0959 global_avg_mtp_loss: 6.3393 +[titan] 2025-10-04 22:39:42,063 - root - INFO - lr: 1.0100e-05 gnorm: 5.61 [ 0:05:33<1 day, 22:16:56] +[titan] 2025-10-04 22:39:52,933 - root - INFO - step: 85 loss: 7.3232 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 1.0671 global_avg_mtp_loss: 6.2561 +[titan] 2025-10-04 22:39:52,934 - root - INFO - lr: 1.0723e-05 gnorm: 5.89 [ 0:05:44<1 day, 20:58:19] +[titan] 2025-10-04 22:40:03,808 - root - INFO - step: 90 loss: 7.1910 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 1.0545 global_avg_mtp_loss: 6.1364 +[titan] 2025-10-04 22:40:03,808 - root - INFO - lr: 1.1347e-05 gnorm: 6.24 [ 0:05:55<1 day, 19:48:28] +[titan] 2025-10-04 22:40:14,668 - root - INFO - step: 95 loss: 7.0637 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 1.0179 global_avg_mtp_loss: 6.0458 +[titan] 2025-10-04 22:40:14,668 - root - INFO - lr: 1.1970e-05 gnorm: 5.80 [ 0:06:06<1 day, 18:45:50] +[titan] 2025-10-04 22:40:23,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:40:25,567 - root - INFO - step: 100 loss: 7.0183 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 1.0144 global_avg_mtp_loss: 6.0039 +[titan] 2025-10-04 22:40:25,567 - root - INFO - lr: 1.2594e-05 gnorm: 5.49 [ 0:06:17<1 day, 17:49:43] +[titan] 2025-10-04 22:40:36,554 - root - INFO - step: 105 loss: 6.7845 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.77 mfu: 41.84% global_avg_ntp_loss: 0.9684 global_avg_mtp_loss: 5.8161 +[titan] 2025-10-04 22:40:36,554 - root - INFO - lr: 1.3217e-05 gnorm: 5.66 [ 0:06:28<1 day, 16:59:29] +[titan] 2025-10-04 22:40:47,440 - root - INFO - step: 110 loss: 6.7610 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.9616 global_avg_mtp_loss: 5.7993 +[titan] 2025-10-04 22:40:47,440 - root - INFO - lr: 1.3840e-05 gnorm: 5.76 [ 0:06:39<1 day, 16:13:11] +[titan] 2025-10-04 22:40:58,316 - root - INFO - step: 115 loss: 6.7822 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.9526 global_avg_mtp_loss: 5.8296 +[titan] 2025-10-04 22:40:58,316 - root - INFO - lr: 1.4464e-05 gnorm: 5.41 [ 0:06:50<1 day, 15:30:50] +[titan] 2025-10-04 22:41:09,192 - root - INFO - step: 120 loss: 6.5921 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.9190 global_avg_mtp_loss: 5.6731 +[titan] 2025-10-04 22:41:09,193 - root - INFO - lr: 1.5087e-05 gnorm: 5.18 [ 0:07:01<1 day, 14:52:00] +[titan] 2025-10-04 22:41:20,086 - root - INFO - step: 125 loss: 6.3759 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8880 global_avg_mtp_loss: 5.4878 +[titan] 2025-10-04 22:41:20,086 - root - INFO - lr: 1.5711e-05 gnorm: 4.91 [ 0:07:11<1 day, 14:16:22] +[titan] 2025-10-04 22:41:31,181 - root - INFO - step: 130 loss: 6.3566 memory: 118.84GiB(85.28%) tps: 29,536 tflops: 409.77 mfu: 41.43% global_avg_ntp_loss: 0.8781 global_avg_mtp_loss: 5.4786 +[titan] 2025-10-04 22:41:31,181 - root - INFO - lr: 1.6334e-05 gnorm: 4.37 [ 0:07:23<1 day, 13:44:28] +[titan] 2025-10-04 22:41:42,074 - root - INFO - step: 135 loss: 6.3044 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.8713 global_avg_mtp_loss: 5.4331 +[titan] 2025-10-04 22:41:42,075 - root - INFO - lr: 1.6958e-05 gnorm: 4.29 [ 0:07:33<1 day, 13:13:56] +[titan] 2025-10-04 22:41:52,936 - root - INFO - step: 140 loss: 6.3158 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.8632 global_avg_mtp_loss: 5.4526 +[titan] 2025-10-04 22:41:52,936 - root - INFO - lr: 1.7581e-05 gnorm: 3.03 [ 0:07:44<1 day, 12:45:25] +[titan] 2025-10-04 22:42:03,814 - root - INFO - step: 145 loss: 6.2266 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.8508 global_avg_mtp_loss: 5.3758 +[titan] 2025-10-04 22:42:03,815 - root - INFO - lr: 1.8204e-05 gnorm: 3.86 [ 0:07:55<1 day, 12:18:56] +[titan] 2025-10-04 22:42:12,515 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:42:14,709 - root - INFO - step: 150 loss: 6.0872 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.8237 global_avg_mtp_loss: 5.2635 +[titan] 2025-10-04 22:42:14,710 - root - INFO - lr: 1.8828e-05 gnorm: 3.31 [ 0:08:06<1 day, 11:54:17] +[titan] 2025-10-04 22:42:25,613 - root - INFO - step: 155 loss: 6.0870 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.8286 global_avg_mtp_loss: 5.2584 +[titan] 2025-10-04 22:42:25,613 - root - INFO - lr: 1.9451e-05 gnorm: 3.04 [ 0:08:17<1 day, 11:31:14] +[titan] 2025-10-04 22:42:36,528 - root - INFO - step: 160 loss: 5.9733 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.8032 global_avg_mtp_loss: 5.1701 +[titan] 2025-10-04 22:42:36,529 - root - INFO - lr: 2.0075e-05 gnorm: 3.06 [ 0:08:28<1 day, 11:09:40] +[titan] 2025-10-04 22:42:47,448 - root - INFO - step: 165 loss: 5.8683 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.7907 global_avg_mtp_loss: 5.0776 +[titan] 2025-10-04 22:42:47,448 - root - INFO - lr: 2.0698e-05 gnorm: 3.39 [ 0:08:39<1 day, 10:49:25] +[titan] 2025-10-04 22:42:58,343 - root - INFO - step: 170 loss: 5.8536 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.7847 global_avg_mtp_loss: 5.0689 +[titan] 2025-10-04 22:42:58,343 - root - INFO - lr: 2.1322e-05 gnorm: 2.80 [ 0:08:50<1 day, 10:30:15] +[titan] 2025-10-04 22:43:09,215 - root - INFO - step: 175 loss: 5.7812 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.7716 global_avg_mtp_loss: 5.0096 +[titan] 2025-10-04 22:43:09,216 - root - INFO - lr: 2.1945e-05 gnorm: 4.02 [ 0:09:01<1 day, 10:12:05] +[titan] 2025-10-04 22:43:20,097 - root - INFO - step: 180 loss: 5.7994 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.7711 global_avg_mtp_loss: 5.0283 +[titan] 2025-10-04 22:43:20,098 - root - INFO - lr: 2.2569e-05 gnorm: 3.36 [ 0:09:11<1 day, 9:54:57] +[titan] 2025-10-04 22:43:31,003 - root - INFO - step: 185 loss: 5.6617 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9134 +[titan] 2025-10-04 22:43:31,003 - root - INFO - lr: 2.3192e-05 gnorm: 2.73 [ 0:09:22<1 day, 9:38:50] +[titan] 2025-10-04 22:43:41,902 - root - INFO - step: 190 loss: 5.6564 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9080 +[titan] 2025-10-04 22:43:41,903 - root - INFO - lr: 2.3815e-05 gnorm: 3.17 [ 0:09:33<1 day, 9:23:31] +[titan] 2025-10-04 22:43:52,788 - root - INFO - step: 195 loss: 5.6643 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.7475 global_avg_mtp_loss: 4.9168 +[titan] 2025-10-04 22:43:52,788 - root - INFO - lr: 2.4439e-05 gnorm: 2.43 [ 0:09:44<1 day, 9:08:56] +[titan] 2025-10-04 22:44:01,483 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:44:03,675 - root - INFO - step: 200 loss: 5.6189 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.7360 global_avg_mtp_loss: 4.8830 +[titan] 2025-10-04 22:44:03,675 - root - INFO - lr: 2.5062e-05 gnorm: 3.47 [ 0:09:55<1 day, 8:55:04] +[titan] 2025-10-04 22:44:14,559 - root - INFO - step: 205 loss: 5.5215 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.7213 global_avg_mtp_loss: 4.8002 +[titan] 2025-10-04 22:44:14,559 - root - INFO - lr: 2.5686e-05 gnorm: 3.09 [ 0:10:06<1 day, 8:41:52] +[titan] 2025-10-04 22:44:25,433 - root - INFO - step: 210 loss: 5.5044 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.7198 global_avg_mtp_loss: 4.7846 +[titan] 2025-10-04 22:44:25,433 - root - INFO - lr: 2.6309e-05 gnorm: 2.66 [ 0:10:17<1 day, 8:29:15] +[titan] 2025-10-04 22:44:36,338 - root - INFO - step: 215 loss: 5.4728 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.7115 global_avg_mtp_loss: 4.7613 +[titan] 2025-10-04 22:44:36,338 - root - INFO - lr: 2.6933e-05 gnorm: 2.45 [ 0:10:28<1 day, 8:17:19] +[titan] 2025-10-04 22:44:47,225 - root - INFO - step: 220 loss: 5.3310 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.6944 global_avg_mtp_loss: 4.6366 +[titan] 2025-10-04 22:44:47,225 - root - INFO - lr: 2.7556e-05 gnorm: 2.66 [ 0:10:39<1 day, 8:05:51] +[titan] 2025-10-04 22:44:58,124 - root - INFO - step: 225 loss: 5.3739 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6934 global_avg_mtp_loss: 4.6805 +[titan] 2025-10-04 22:44:58,125 - root - INFO - lr: 2.8180e-05 gnorm: 2.95 [ 0:10:49<1 day, 7:54:56] +[titan] 2025-10-04 22:45:09,004 - root - INFO - step: 230 loss: 5.4216 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.7014 global_avg_mtp_loss: 4.7202 +[titan] 2025-10-04 22:45:09,004 - root - INFO - lr: 2.8803e-05 gnorm: 2.60 [ 0:11:00<1 day, 7:44:25] +[titan] 2025-10-04 22:45:19,907 - root - INFO - step: 235 loss: 5.3090 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.6909 global_avg_mtp_loss: 4.6180 +[titan] 2025-10-04 22:45:19,907 - root - INFO - lr: 2.9426e-05 gnorm: 2.68 [ 0:11:11<1 day, 7:34:25] +[titan] 2025-10-04 22:45:30,796 - root - INFO - step: 240 loss: 5.2690 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.6785 global_avg_mtp_loss: 4.5905 +[titan] 2025-10-04 22:45:30,796 - root - INFO - lr: 3.0050e-05 gnorm: 2.38 [ 0:11:22<1 day, 7:24:46] +[titan] 2025-10-04 22:45:41,709 - root - INFO - step: 245 loss: 5.1965 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.6691 global_avg_mtp_loss: 4.5274 +[titan] 2025-10-04 22:45:41,710 - root - INFO - lr: 3.0673e-05 gnorm: 2.47 [ 0:11:33<1 day, 7:15:35] +[titan] 2025-10-04 22:45:50,403 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:45:52,597 - root - INFO - step: 250 loss: 5.1858 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6654 global_avg_mtp_loss: 4.5204 +[titan] 2025-10-04 22:45:52,597 - root - INFO - lr: 3.1297e-05 gnorm: 3.00 [ 0:11:44<1 day, 7:06:42] +[titan] 2025-10-04 22:46:03,496 - root - INFO - step: 255 loss: 5.1706 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.6625 global_avg_mtp_loss: 4.5081 +[titan] 2025-10-04 22:46:03,496 - root - INFO - lr: 3.1920e-05 gnorm: 2.61 [ 0:11:55<1 day, 6:58:11] +[titan] 2025-10-04 22:46:14,369 - root - INFO - step: 260 loss: 5.1473 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.6607 global_avg_mtp_loss: 4.4865 +[titan] 2025-10-04 22:46:14,369 - root - INFO - lr: 3.2544e-05 gnorm: 2.39 [ 0:12:06<1 day, 6:49:54] +[titan] 2025-10-04 22:46:25,252 - root - INFO - step: 265 loss: 5.1300 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.6565 global_avg_mtp_loss: 4.4735 +[titan] 2025-10-04 22:46:25,253 - root - INFO - lr: 3.3167e-05 gnorm: 2.29 [ 0:12:17<1 day, 6:41:58] +[titan] 2025-10-04 22:46:36,152 - root - INFO - step: 270 loss: 5.1579 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6598 global_avg_mtp_loss: 4.4981 +[titan] 2025-10-04 22:46:36,152 - root - INFO - lr: 3.3791e-05 gnorm: 2.51 [ 0:12:27<1 day, 6:34:22] +[titan] 2025-10-04 22:46:47,010 - root - INFO - step: 275 loss: 5.0167 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.6398 global_avg_mtp_loss: 4.3769 +[titan] 2025-10-04 22:46:47,011 - root - INFO - lr: 3.4414e-05 gnorm: 2.10 [ 0:12:38<1 day, 6:26:56] +[titan] 2025-10-04 22:46:57,896 - root - INFO - step: 280 loss: 5.0898 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.6486 global_avg_mtp_loss: 4.4413 +[titan] 2025-10-04 22:46:57,896 - root - INFO - lr: 3.5037e-05 gnorm: 3.07 [ 0:12:49<1 day, 6:19:49] +[titan] 2025-10-04 22:47:08,770 - root - INFO - step: 285 loss: 5.1105 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.6521 global_avg_mtp_loss: 4.4584 +[titan] 2025-10-04 22:47:08,770 - root - INFO - lr: 3.5661e-05 gnorm: 2.23 [ 0:13:00<1 day, 6:12:55] +[titan] 2025-10-04 22:47:19,662 - root - INFO - step: 290 loss: 5.0807 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6456 global_avg_mtp_loss: 4.4352 +[titan] 2025-10-04 22:47:19,662 - root - INFO - lr: 3.6284e-05 gnorm: 2.82 [ 0:13:11<1 day, 6:06:17] +[titan] 2025-10-04 22:47:30,549 - root - INFO - step: 295 loss: 5.0464 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6427 global_avg_mtp_loss: 4.4037 +[titan] 2025-10-04 22:47:30,550 - root - INFO - lr: 3.6908e-05 gnorm: 2.35 [ 0:13:22<1 day, 5:59:52] +[titan] 2025-10-04 22:47:39,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:47:41,466 - root - INFO - step: 300 loss: 5.1119 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.6529 global_avg_mtp_loss: 4.4589 +[titan] 2025-10-04 22:47:41,466 - root - INFO - lr: 3.7531e-05 gnorm: 2.72 [ 0:13:33<1 day, 5:53:44] +[titan] 2025-10-04 22:47:52,331 - root - INFO - step: 305 loss: 4.9831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.6338 global_avg_mtp_loss: 4.3492 +[titan] 2025-10-04 22:47:52,331 - root - INFO - lr: 3.8155e-05 gnorm: 2.81 [ 0:13:44<1 day, 5:47:40] +[titan] 2025-10-04 22:48:03,188 - root - INFO - step: 310 loss: 4.9896 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.6364 global_avg_mtp_loss: 4.3532 +[titan] 2025-10-04 22:48:03,188 - root - INFO - lr: 3.8778e-05 gnorm: 2.39 [ 0:13:55<1 day, 5:41:47] +[titan] 2025-10-04 22:48:14,051 - root - INFO - step: 315 loss: 4.8865 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.6207 global_avg_mtp_loss: 4.2658 +[titan] 2025-10-04 22:48:14,051 - root - INFO - lr: 3.9401e-05 gnorm: 3.11 [ 0:14:05<1 day, 5:36:05] +[titan] 2025-10-04 22:48:24,948 - root - INFO - step: 320 loss: 4.9416 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.6290 global_avg_mtp_loss: 4.3126 +[titan] 2025-10-04 22:48:24,948 - root - INFO - lr: 4.0025e-05 gnorm: 2.57 [ 0:14:16<1 day, 5:30:38] +[titan] 2025-10-04 22:48:35,879 - root - INFO - step: 325 loss: 4.8914 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.6229 global_avg_mtp_loss: 4.2686 +[titan] 2025-10-04 22:48:35,879 - root - INFO - lr: 4.0648e-05 gnorm: 2.22 [ 0:14:27<1 day, 5:25:25] +[titan] 2025-10-04 22:48:46,771 - root - INFO - step: 330 loss: 4.8494 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.6146 global_avg_mtp_loss: 4.2348 +[titan] 2025-10-04 22:48:46,771 - root - INFO - lr: 4.1272e-05 gnorm: 2.17 [ 0:14:38<1 day, 5:20:16] +[titan] 2025-10-04 22:48:57,658 - root - INFO - step: 335 loss: 4.9431 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6274 global_avg_mtp_loss: 4.3157 +[titan] 2025-10-04 22:48:57,658 - root - INFO - lr: 4.1895e-05 gnorm: 2.41 [ 0:14:49<1 day, 5:15:15] +[titan] 2025-10-04 22:49:08,546 - root - INFO - step: 340 loss: 4.8429 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6110 global_avg_mtp_loss: 4.2319 +[titan] 2025-10-04 22:49:08,546 - root - INFO - lr: 4.2519e-05 gnorm: 2.38 [ 0:15:00<1 day, 5:10:24] +[titan] 2025-10-04 22:49:19,437 - root - INFO - step: 345 loss: 4.7699 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.6044 global_avg_mtp_loss: 4.1656 +[titan] 2025-10-04 22:49:19,437 - root - INFO - lr: 4.3142e-05 gnorm: 2.47 [ 0:15:11<1 day, 5:05:40] +[titan] 2025-10-04 22:49:28,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:49:30,329 - root - INFO - step: 350 loss: 4.8354 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6129 global_avg_mtp_loss: 4.2225 +[titan] 2025-10-04 22:49:30,329 - root - INFO - lr: 4.3766e-05 gnorm: 2.30 [ 0:15:22<1 day, 5:01:05] +[titan] 2025-10-04 22:49:41,264 - root - INFO - step: 355 loss: 4.8409 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.6123 global_avg_mtp_loss: 4.2286 +[titan] 2025-10-04 22:49:41,264 - root - INFO - lr: 4.4389e-05 gnorm: 2.44 [ 0:15:33<1 day, 4:56:41] +[titan] 2025-10-04 22:49:52,147 - root - INFO - step: 360 loss: 4.6777 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.5902 global_avg_mtp_loss: 4.0875 +[titan] 2025-10-04 22:49:52,148 - root - INFO - lr: 4.5012e-05 gnorm: 1.96 [ 0:15:43<1 day, 4:52:20] +[titan] 2025-10-04 22:50:03,033 - root - INFO - step: 365 loss: 4.8152 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.6116 global_avg_mtp_loss: 4.2037 +[titan] 2025-10-04 22:50:03,033 - root - INFO - lr: 4.5636e-05 gnorm: 2.14 [ 0:15:54<1 day, 4:48:05] +[titan] 2025-10-04 22:50:13,908 - root - INFO - step: 370 loss: 4.7797 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.6024 global_avg_mtp_loss: 4.1773 +[titan] 2025-10-04 22:50:13,908 - root - INFO - lr: 4.6259e-05 gnorm: 2.37 [ 0:16:05<1 day, 4:43:56] +[titan] 2025-10-04 22:50:24,783 - root - INFO - step: 375 loss: 4.6716 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.5906 global_avg_mtp_loss: 4.0810 +[titan] 2025-10-04 22:50:24,783 - root - INFO - lr: 4.6883e-05 gnorm: 2.26 [ 0:16:16<1 day, 4:39:53] +[titan] 2025-10-04 22:50:35,652 - root - INFO - step: 380 loss: 4.7162 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.5950 global_avg_mtp_loss: 4.1212 +[titan] 2025-10-04 22:50:35,652 - root - INFO - lr: 4.7506e-05 gnorm: 2.15 [ 0:16:27<1 day, 4:35:55] +[titan] 2025-10-04 22:50:46,574 - root - INFO - step: 385 loss: 4.8016 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.6054 global_avg_mtp_loss: 4.1962 +[titan] 2025-10-04 22:50:46,574 - root - INFO - lr: 4.8130e-05 gnorm: 2.50 [ 0:16:38<1 day, 4:32:09] +[titan] 2025-10-04 22:50:57,443 - root - INFO - step: 390 loss: 4.7078 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.5929 global_avg_mtp_loss: 4.1150 +[titan] 2025-10-04 22:50:57,444 - root - INFO - lr: 4.8753e-05 gnorm: 2.00 [ 0:16:49<1 day, 4:28:23] +[titan] 2025-10-04 22:51:08,305 - root - INFO - step: 395 loss: 4.6384 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.5834 global_avg_mtp_loss: 4.0551 +[titan] 2025-10-04 22:51:08,305 - root - INFO - lr: 4.9377e-05 gnorm: 2.37 [ 0:17:00<1 day, 4:24:42] +[titan] 2025-10-04 22:51:16,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:51:19,152 - root - INFO - step: 400 loss: 4.6918 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.5928 global_avg_mtp_loss: 4.0990 +[titan] 2025-10-04 22:51:19,152 - root - INFO - lr: 5.0000e-05 gnorm: 2.36 [ 0:17:10<1 day, 4:21:04] +[titan] 2025-10-04 22:51:30,025 - root - INFO - step: 405 loss: 4.6284 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.5843 global_avg_mtp_loss: 4.0441 +[titan] 2025-10-04 22:51:30,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.37 [ 0:17:21<1 day, 4:17:34] +[titan] 2025-10-04 22:51:40,903 - root - INFO - step: 410 loss: 4.5757 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.5764 global_avg_mtp_loss: 3.9993 +[titan] 2025-10-04 22:51:40,903 - root - INFO - lr: 5.0000e-05 gnorm: 2.16 [ 0:17:32<1 day, 4:14:10] +[titan] 2025-10-04 22:51:51,757 - root - INFO - step: 415 loss: 4.6798 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.5875 global_avg_mtp_loss: 4.0923 +[titan] 2025-10-04 22:51:51,757 - root - INFO - lr: 5.0000e-05 gnorm: 2.18 [ 0:17:43<1 day, 4:10:48] +[titan] 2025-10-04 22:52:02,632 - root - INFO - step: 420 loss: 4.6984 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.5914 global_avg_mtp_loss: 4.1070 +[titan] 2025-10-04 22:52:02,632 - root - INFO - lr: 5.0000e-05 gnorm: 2.08 [ 0:17:54<1 day, 4:07:32] +[titan] 2025-10-04 22:52:13,523 - root - INFO - step: 425 loss: 4.6583 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.5870 global_avg_mtp_loss: 4.0713 +[titan] 2025-10-04 22:52:13,523 - root - INFO - lr: 5.0000e-05 gnorm: 1.97 [ 0:18:05<1 day, 4:04:22] +[titan] 2025-10-04 22:52:24,408 - root - INFO - step: 430 loss: 4.5843 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.5750 global_avg_mtp_loss: 4.0093 +[titan] 2025-10-04 22:52:24,408 - root - INFO - lr: 5.0000e-05 gnorm: 2.22 [ 0:18:16<1 day, 4:01:16] +[titan] 2025-10-04 22:52:35,258 - root - INFO - step: 435 loss: 4.5321 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.5697 global_avg_mtp_loss: 3.9625 +[titan] 2025-10-04 22:52:35,258 - root - INFO - lr: 5.0000e-05 gnorm: 2.13 [ 0:18:27<1 day, 3:58:11] +[titan] 2025-10-04 22:52:46,145 - root - INFO - step: 440 loss: 4.5606 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.5730 global_avg_mtp_loss: 3.9875 +[titan] 2025-10-04 22:52:46,146 - root - INFO - lr: 5.0000e-05 gnorm: 2.40 [ 0:18:37<1 day, 3:55:13] +[titan] 2025-10-04 22:52:57,025 - root - INFO - step: 445 loss: 4.5406 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.5687 global_avg_mtp_loss: 3.9718 +[titan] 2025-10-04 22:52:57,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:18:48<1 day, 3:52:18] +[titan] 2025-10-04 22:53:05,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:53:07,904 - root - INFO - step: 450 loss: 4.5707 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.5740 global_avg_mtp_loss: 3.9967 +[titan] 2025-10-04 22:53:07,904 - root - INFO - lr: 5.0000e-05 gnorm: 2.34 [ 0:18:59<1 day, 3:49:27] +[titan] 2025-10-04 22:53:18,769 - root - INFO - step: 455 loss: 4.4743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.5620 global_avg_mtp_loss: 3.9123 +[titan] 2025-10-04 22:53:18,770 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:19:10<1 day, 3:46:38] +[titan] 2025-10-04 22:53:29,609 - root - INFO - step: 460 loss: 4.4303 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8761 +[titan] 2025-10-04 22:53:29,609 - root - INFO - lr: 5.0000e-05 gnorm: 2.25 [ 0:19:21<1 day, 3:43:50] +[titan] 2025-10-04 22:53:40,497 - root - INFO - step: 465 loss: 4.4283 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.5552 global_avg_mtp_loss: 3.8731 +[titan] 2025-10-04 22:53:40,497 - root - INFO - lr: 5.0000e-05 gnorm: 1.84 [ 0:19:32<1 day, 3:41:10] +[titan] 2025-10-04 22:53:51,344 - root - INFO - step: 470 loss: 4.4176 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8632 +[titan] 2025-10-04 22:53:51,344 - root - INFO - lr: 5.0000e-05 gnorm: 2.15 [ 0:19:43<1 day, 3:38:29] +[titan] 2025-10-04 22:54:02,202 - root - INFO - step: 475 loss: 4.4882 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.5655 global_avg_mtp_loss: 3.9227 +[titan] 2025-10-04 22:54:02,202 - root - INFO - lr: 5.0000e-05 gnorm: 1.78 [ 0:19:53<1 day, 3:35:53] +[titan] 2025-10-04 22:54:13,066 - root - INFO - step: 480 loss: 4.4600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.5572 global_avg_mtp_loss: 3.9028 +[titan] 2025-10-04 22:54:13,066 - root - INFO - lr: 5.0000e-05 gnorm: 2.00 [ 0:20:04<1 day, 3:33:20] +[titan] 2025-10-04 22:54:23,913 - root - INFO - step: 485 loss: 4.3781 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.5484 global_avg_mtp_loss: 3.8297 +[titan] 2025-10-04 22:54:23,913 - root - INFO - lr: 4.9999e-05 gnorm: 1.60 [ 0:20:15<1 day, 3:30:49] +[titan] 2025-10-04 22:54:34,742 - root - INFO - step: 490 loss: 4.4068 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.5524 global_avg_mtp_loss: 3.8544 +[titan] 2025-10-04 22:54:34,742 - root - INFO - lr: 4.9999e-05 gnorm: 2.19 [ 0:20:26<1 day, 3:28:19] +[titan] 2025-10-04 22:54:45,647 - root - INFO - step: 495 loss: 4.3459 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.5461 global_avg_mtp_loss: 3.7998 +[titan] 2025-10-04 22:54:45,647 - root - INFO - lr: 4.9999e-05 gnorm: 1.79 [ 0:20:37<1 day, 3:25:57] +[titan] 2025-10-04 22:54:54,303 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:54:56,481 - root - INFO - step: 500 loss: 4.5195 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.5664 global_avg_mtp_loss: 3.9531 +[titan] 2025-10-04 22:54:56,481 - root - INFO - lr: 4.9999e-05 gnorm: 1.81 [ 0:20:48<1 day, 3:23:33] +[titan] 2025-10-04 22:55:07,316 - root - INFO - step: 505 loss: 4.3727 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.5468 global_avg_mtp_loss: 3.8259 +[titan] 2025-10-04 22:55:07,316 - root - INFO - lr: 4.9999e-05 gnorm: 1.99 [ 0:20:59<1 day, 3:21:12] +[titan] 2025-10-04 22:55:18,908 - root - INFO - step: 510 loss: 4.3913 memory: 118.84GiB(85.28%) tps: 28,269 tflops: 392.18 mfu: 39.65% global_avg_ntp_loss: 0.5477 global_avg_mtp_loss: 3.8435 +[titan] 2025-10-04 22:55:18,908 - root - INFO - lr: 4.9999e-05 gnorm: 1.64 [ 0:21:10<1 day, 3:19:52] +[titan] 2025-10-04 22:55:23,569 - root - INFO - Dumping profiler traces at step 512 +[titan] 2025-10-04 22:55:23,604 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 22:55:30,219 - root - INFO - step: 515 loss: 4.3744 memory: 118.84GiB(85.28%) tps: 28,972 tflops: 401.94 mfu: 40.64% global_avg_ntp_loss: 0.5458 global_avg_mtp_loss: 3.8286 +[titan] 2025-10-04 22:55:30,219 - root - INFO - lr: 4.9999e-05 gnorm: 1.67 [ 0:21:22<1 day, 3:18:11] +[titan] 2025-10-04 22:55:41,134 - root - INFO - step: 520 loss: 4.3427 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.5439 global_avg_mtp_loss: 3.7988 +[titan] 2025-10-04 22:55:41,135 - root - INFO - lr: 4.9999e-05 gnorm: 2.16 [ 0:21:32<1 day, 3:16:03] +[titan] 2025-10-04 22:55:52,306 - root - INFO - step: 525 loss: 4.3706 memory: 118.84GiB(85.28%) tps: 29,331 tflops: 406.93 mfu: 41.15% global_avg_ntp_loss: 0.5472 global_avg_mtp_loss: 3.8234 +[titan] 2025-10-04 22:55:52,307 - root - INFO - lr: 4.9999e-05 gnorm: 1.88 [ 0:21:44<1 day, 3:14:15] +[titan] 2025-10-04 22:56:03,131 - root - INFO - step: 530 loss: 4.3726 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.5471 global_avg_mtp_loss: 3.8256 +[titan] 2025-10-04 22:56:03,131 - root - INFO - lr: 4.9999e-05 gnorm: 2.18 [ 0:21:54<1 day, 3:12:04] +[titan] 2025-10-04 22:56:13,930 - root - INFO - step: 535 loss: 4.4086 memory: 118.84GiB(85.28%) tps: 30,344 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.5498 global_avg_mtp_loss: 3.8588 +[titan] 2025-10-04 22:56:13,930 - root - INFO - lr: 4.9999e-05 gnorm: 1.95 [ 0:22:05<1 day, 3:09:53] +[titan] 2025-10-04 22:56:24,765 - root - INFO - step: 540 loss: 4.4155 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.5521 global_avg_mtp_loss: 3.8634 +[titan] 2025-10-04 22:56:24,765 - root - INFO - lr: 4.9999e-05 gnorm: 2.04 [ 0:22:16<1 day, 3:07:47] +[titan] 2025-10-04 22:56:35,621 - root - INFO - step: 545 loss: 4.3565 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.5455 global_avg_mtp_loss: 3.8109 +[titan] 2025-10-04 22:56:35,621 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:22:27<1 day, 3:05:45] +[titan] 2025-10-04 22:56:44,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:56:46,499 - root - INFO - step: 550 loss: 4.2924 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.5365 global_avg_mtp_loss: 3.7559 +[titan] 2025-10-04 22:56:46,499 - root - INFO - lr: 4.9998e-05 gnorm: 1.96 [ 0:22:38<1 day, 3:03:46] +[titan] 2025-10-04 22:56:57,360 - root - INFO - step: 555 loss: 4.3086 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.5367 global_avg_mtp_loss: 3.7719 +[titan] 2025-10-04 22:56:57,361 - root - INFO - lr: 4.9998e-05 gnorm: 1.94 [ 0:22:49<1 day, 3:01:48] +[titan] 2025-10-04 22:57:08,185 - root - INFO - step: 560 loss: 4.2981 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.5349 global_avg_mtp_loss: 3.7631 +[titan] 2025-10-04 22:57:08,185 - root - INFO - lr: 4.9998e-05 gnorm: 1.84 [ 0:22:59<1 day, 2:59:49] +[titan] 2025-10-04 22:57:19,007 - root - INFO - step: 565 loss: 4.3383 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7969 +[titan] 2025-10-04 22:57:19,007 - root - INFO - lr: 4.9998e-05 gnorm: 1.66 [ 0:23:10<1 day, 2:57:52] +[titan] 2025-10-04 22:57:29,825 - root - INFO - step: 570 loss: 4.3634 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.5450 global_avg_mtp_loss: 3.8184 +[titan] 2025-10-04 22:57:29,825 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:23:21<1 day, 2:55:57] +[titan] 2025-10-04 22:57:40,662 - root - INFO - step: 575 loss: 4.2261 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.5285 global_avg_mtp_loss: 3.6977 +[titan] 2025-10-04 22:57:40,663 - root - INFO - lr: 4.9998e-05 gnorm: 1.67 [ 0:23:32<1 day, 2:54:04] +[titan] 2025-10-04 22:57:51,566 - root - INFO - step: 580 loss: 4.2298 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.5294 global_avg_mtp_loss: 3.7005 +[titan] 2025-10-04 22:57:51,566 - root - INFO - lr: 4.9998e-05 gnorm: 1.98 [ 0:23:43<1 day, 2:52:18] +[titan] 2025-10-04 22:58:02,405 - root - INFO - step: 585 loss: 4.3315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7902 +[titan] 2025-10-04 22:58:02,405 - root - INFO - lr: 4.9998e-05 gnorm: 1.72 [ 0:23:54<1 day, 2:50:30] +[titan] 2025-10-04 22:58:13,269 - root - INFO - step: 590 loss: 4.2600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.5322 global_avg_mtp_loss: 3.7278 +[titan] 2025-10-04 22:58:13,270 - root - INFO - lr: 4.9997e-05 gnorm: 1.95 [ 0:24:05<1 day, 2:48:44] +[titan] 2025-10-04 22:58:24,105 - root - INFO - step: 595 loss: 4.1808 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.5216 global_avg_mtp_loss: 3.6592 +[titan] 2025-10-04 22:58:24,105 - root - INFO - lr: 4.9997e-05 gnorm: 1.65 [ 0:24:15<1 day, 2:46:59] +[titan] 2025-10-04 22:58:32,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:58:34,964 - root - INFO - step: 600 loss: 4.1976 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.5240 global_avg_mtp_loss: 3.6736 +[titan] 2025-10-04 22:58:34,964 - root - INFO - lr: 4.9997e-05 gnorm: 1.83 [ 0:24:26<1 day, 2:45:16] +[titan] 2025-10-04 22:58:45,870 - root - INFO - step: 605 loss: 4.3159 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.5391 global_avg_mtp_loss: 3.7769 +[titan] 2025-10-04 22:58:45,870 - root - INFO - lr: 4.9997e-05 gnorm: 1.87 [ 0:24:37<1 day, 2:43:38] +[titan] 2025-10-04 22:58:56,733 - root - INFO - step: 610 loss: 4.1166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.5131 global_avg_mtp_loss: 3.6035 +[titan] 2025-10-04 22:58:56,733 - root - INFO - lr: 4.9997e-05 gnorm: 1.62 [ 0:24:48<1 day, 2:41:59] +[titan] 2025-10-04 22:59:07,585 - root - INFO - step: 615 loss: 4.2340 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.5275 global_avg_mtp_loss: 3.7065 +[titan] 2025-10-04 22:59:07,585 - root - INFO - lr: 4.9997e-05 gnorm: 1.88 [ 0:24:59<1 day, 2:40:20] +[titan] 2025-10-04 22:59:18,424 - root - INFO - step: 620 loss: 4.2004 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5249 global_avg_mtp_loss: 3.6756 +[titan] 2025-10-04 22:59:18,424 - root - INFO - lr: 4.9997e-05 gnorm: 1.91 [ 0:25:10<1 day, 2:38:42] +[titan] 2025-10-04 22:59:29,245 - root - INFO - step: 625 loss: 4.2113 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.5247 global_avg_mtp_loss: 3.6866 +[titan] 2025-10-04 22:59:29,245 - root - INFO - lr: 4.9996e-05 gnorm: 1.62 [ 0:25:21<1 day, 2:37:04] +[titan] 2025-10-04 22:59:40,085 - root - INFO - step: 630 loss: 4.1954 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.5210 global_avg_mtp_loss: 3.6745 +[titan] 2025-10-04 22:59:40,085 - root - INFO - lr: 4.9996e-05 gnorm: 1.68 [ 0:25:31<1 day, 2:35:29] +[titan] 2025-10-04 22:59:51,004 - root - INFO - step: 635 loss: 4.0965 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.5096 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 22:59:51,004 - root - INFO - lr: 4.9996e-05 gnorm: 1.82 [ 0:25:42<1 day, 2:34:00] +[titan] 2025-10-04 23:00:01,832 - root - INFO - step: 640 loss: 4.2067 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.5236 global_avg_mtp_loss: 3.6831 +[titan] 2025-10-04 23:00:01,832 - root - INFO - lr: 4.9996e-05 gnorm: 1.87 [ 0:25:53<1 day, 2:32:27] +[titan] 2025-10-04 23:00:12,683 - root - INFO - step: 645 loss: 4.0562 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.5030 global_avg_mtp_loss: 3.5532 +[titan] 2025-10-04 23:00:12,683 - root - INFO - lr: 4.9996e-05 gnorm: 1.73 [ 0:26:04<1 day, 2:30:56] +[titan] 2025-10-04 23:00:21,312 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:00:23,493 - root - INFO - step: 650 loss: 4.1298 memory: 118.84GiB(85.28%) tps: 30,314 tflops: 420.56 mfu: 42.52% global_avg_ntp_loss: 0.5128 global_avg_mtp_loss: 3.6170 +[titan] 2025-10-04 23:00:23,493 - root - INFO - lr: 4.9996e-05 gnorm: 1.75 [ 0:26:15<1 day, 2:29:24] +[titan] 2025-10-04 23:00:34,283 - root - INFO - step: 655 loss: 4.0941 memory: 118.84GiB(85.28%) tps: 30,369 tflops: 421.33 mfu: 42.60% global_avg_ntp_loss: 0.5089 global_avg_mtp_loss: 3.5852 +[titan] 2025-10-04 23:00:34,283 - root - INFO - lr: 4.9995e-05 gnorm: 1.70 [ 0:26:26<1 day, 2:27:52] +[titan] 2025-10-04 23:00:45,102 - root - INFO - step: 660 loss: 4.1313 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.5130 global_avg_mtp_loss: 3.6184 +[titan] 2025-10-04 23:00:45,102 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:36<1 day, 2:26:23] +[titan] 2025-10-04 23:00:55,946 - root - INFO - step: 665 loss: 4.1367 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5147 global_avg_mtp_loss: 3.6220 +[titan] 2025-10-04 23:00:55,946 - root - INFO - lr: 4.9995e-05 gnorm: 1.99 [ 0:26:47<1 day, 2:24:57] +[titan] 2025-10-04 23:01:06,742 - root - INFO - step: 670 loss: 4.0904 memory: 118.84GiB(85.28%) tps: 30,352 tflops: 421.09 mfu: 42.58% global_avg_ntp_loss: 0.5075 global_avg_mtp_loss: 3.5829 +[titan] 2025-10-04 23:01:06,743 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:58<1 day, 2:23:29] +[titan] 2025-10-04 23:01:17,585 - root - INFO - step: 675 loss: 4.0638 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.5042 global_avg_mtp_loss: 3.5596 +[titan] 2025-10-04 23:01:17,585 - root - INFO - lr: 4.9995e-05 gnorm: 2.15 [ 0:27:09<1 day, 2:22:05] +[titan] 2025-10-04 23:01:28,410 - root - INFO - step: 680 loss: 4.0064 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4976 global_avg_mtp_loss: 3.5088 +[titan] 2025-10-04 23:01:28,410 - root - INFO - lr: 4.9994e-05 gnorm: 1.81 [ 0:27:20<1 day, 2:20:41] +[titan] 2025-10-04 23:01:39,214 - root - INFO - step: 685 loss: 4.1427 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.5134 global_avg_mtp_loss: 3.6293 +[titan] 2025-10-04 23:01:39,214 - root - INFO - lr: 4.9994e-05 gnorm: 1.69 [ 0:27:30<1 day, 2:19:17] +[titan] 2025-10-04 23:01:50,056 - root - INFO - step: 690 loss: 4.0571 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.5019 global_avg_mtp_loss: 3.5553 +[titan] 2025-10-04 23:01:50,056 - root - INFO - lr: 4.9994e-05 gnorm: 1.63 [ 0:27:41<1 day, 2:17:56] +[titan] 2025-10-04 23:02:00,900 - root - INFO - step: 695 loss: 4.0380 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5011 global_avg_mtp_loss: 3.5369 +[titan] 2025-10-04 23:02:00,900 - root - INFO - lr: 4.9994e-05 gnorm: 1.77 [ 0:27:52<1 day, 2:16:36] +[titan] 2025-10-04 23:02:09,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:02:11,753 - root - INFO - step: 700 loss: 4.0879 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.5070 global_avg_mtp_loss: 3.5810 +[titan] 2025-10-04 23:02:11,754 - root - INFO - lr: 4.9994e-05 gnorm: 1.96 [ 0:28:03<1 day, 2:15:18] +[titan] 2025-10-04 23:02:22,605 - root - INFO - step: 705 loss: 4.0241 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4988 global_avg_mtp_loss: 3.5252 +[titan] 2025-10-04 23:02:22,605 - root - INFO - lr: 4.9993e-05 gnorm: 1.83 [ 0:28:14<1 day, 2:14:00] +[titan] 2025-10-04 23:02:33,405 - root - INFO - step: 710 loss: 4.0903 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.5058 global_avg_mtp_loss: 3.5844 +[titan] 2025-10-04 23:02:33,405 - root - INFO - lr: 4.9993e-05 gnorm: 1.64 [ 0:28:25<1 day, 2:12:41] +[titan] 2025-10-04 23:02:44,244 - root - INFO - step: 715 loss: 4.0535 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5015 global_avg_mtp_loss: 3.5520 +[titan] 2025-10-04 23:02:44,244 - root - INFO - lr: 4.9993e-05 gnorm: 1.50 [ 0:28:36<1 day, 2:11:24] +[titan] 2025-10-04 23:02:55,077 - root - INFO - step: 720 loss: 4.0093 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.4957 global_avg_mtp_loss: 3.5137 +[titan] 2025-10-04 23:02:55,077 - root - INFO - lr: 4.9993e-05 gnorm: 1.58 [ 0:28:46<1 day, 2:10:09] +[titan] 2025-10-04 23:03:05,902 - root - INFO - step: 725 loss: 3.9529 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4894 global_avg_mtp_loss: 3.4635 +[titan] 2025-10-04 23:03:05,902 - root - INFO - lr: 4.9992e-05 gnorm: 1.53 [ 0:28:57<1 day, 2:08:54] +[titan] 2025-10-04 23:03:16,765 - root - INFO - step: 730 loss: 3.9701 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.4916 global_avg_mtp_loss: 3.4785 +[titan] 2025-10-04 23:03:16,765 - root - INFO - lr: 4.9992e-05 gnorm: 1.57 [ 0:29:08<1 day, 2:07:41] +[titan] 2025-10-04 23:03:27,585 - root - INFO - step: 735 loss: 4.0191 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.4982 global_avg_mtp_loss: 3.5209 +[titan] 2025-10-04 23:03:27,585 - root - INFO - lr: 4.9992e-05 gnorm: 1.59 [ 0:29:19<1 day, 2:06:27] +[titan] 2025-10-04 23:03:38,404 - root - INFO - step: 740 loss: 3.9770 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.4912 global_avg_mtp_loss: 3.4857 +[titan] 2025-10-04 23:03:38,404 - root - INFO - lr: 4.9992e-05 gnorm: 1.61 [ 0:29:30<1 day, 2:05:14] +[titan] 2025-10-04 23:03:49,265 - root - INFO - step: 745 loss: 4.0755 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.5054 global_avg_mtp_loss: 3.5701 +[titan] 2025-10-04 23:03:49,265 - root - INFO - lr: 4.9992e-05 gnorm: 1.52 [ 0:29:41<1 day, 2:04:05] +[titan] 2025-10-04 23:03:57,894 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:04:00,081 - root - INFO - step: 750 loss: 3.9375 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.4868 global_avg_mtp_loss: 3.4508 +[titan] 2025-10-04 23:04:00,081 - root - INFO - lr: 4.9991e-05 gnorm: 1.67 [ 0:29:51<1 day, 2:02:53] +[titan] 2025-10-04 23:04:10,923 - root - INFO - step: 755 loss: 4.0060 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.4974 global_avg_mtp_loss: 3.5087 +[titan] 2025-10-04 23:04:10,923 - root - INFO - lr: 4.9991e-05 gnorm: 1.62 [ 0:30:02<1 day, 2:01:44] +[titan] 2025-10-04 23:04:21,765 - root - INFO - step: 760 loss: 3.9826 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.4928 global_avg_mtp_loss: 3.4897 +[titan] 2025-10-04 23:04:21,765 - root - INFO - lr: 4.9991e-05 gnorm: 1.57 [ 0:30:13<1 day, 2:00:35] +[titan] 2025-10-04 23:04:32,624 - root - INFO - step: 765 loss: 3.9503 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4871 global_avg_mtp_loss: 3.4633 +[titan] 2025-10-04 23:04:32,625 - root - INFO - lr: 4.9991e-05 gnorm: 1.73 [ 0:30:24<1 day, 1:59:28] +[titan] 2025-10-04 23:04:43,499 - root - INFO - step: 770 loss: 4.0928 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.5059 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 23:04:43,499 - root - INFO - lr: 4.9990e-05 gnorm: 1.68 [ 0:30:35<1 day, 1:58:23] +[titan] 2025-10-04 23:04:54,364 - root - INFO - step: 775 loss: 4.0138 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4966 global_avg_mtp_loss: 3.5172 +[titan] 2025-10-04 23:04:54,364 - root - INFO - lr: 4.9990e-05 gnorm: 1.84 [ 0:30:46<1 day, 1:57:18] +[titan] 2025-10-04 23:05:05,165 - root - INFO - step: 780 loss: 3.9609 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.4878 global_avg_mtp_loss: 3.4731 +[titan] 2025-10-04 23:05:05,165 - root - INFO - lr: 4.9990e-05 gnorm: 1.66 [ 0:30:56<1 day, 1:56:10] +[titan] 2025-10-04 23:05:16,001 - root - INFO - step: 785 loss: 4.0392 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.5003 global_avg_mtp_loss: 3.5389 +[titan] 2025-10-04 23:05:16,002 - root - INFO - lr: 4.9989e-05 gnorm: 1.74 [ 0:31:07<1 day, 1:55:05] +[titan] 2025-10-04 23:05:26,809 - root - INFO - step: 790 loss: 3.9123 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.4820 global_avg_mtp_loss: 3.4303 +[titan] 2025-10-04 23:05:26,809 - root - INFO - lr: 4.9989e-05 gnorm: 1.71 [ 0:31:18<1 day, 1:53:59] +[titan] 2025-10-04 23:05:37,659 - root - INFO - step: 795 loss: 3.9513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.4870 global_avg_mtp_loss: 3.4643 +[titan] 2025-10-04 23:05:37,659 - root - INFO - lr: 4.9989e-05 gnorm: 1.57 [ 0:31:29<1 day, 1:52:55] +[titan] 2025-10-04 23:05:46,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:05:48,524 - root - INFO - step: 800 loss: 3.8805 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4013 +[titan] 2025-10-04 23:05:48,524 - root - INFO - lr: 4.9989e-05 gnorm: 1.63 [ 0:31:40<1 day, 1:51:54] +[titan] 2025-10-04 23:05:59,423 - root - INFO - step: 805 loss: 4.0567 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.5041 global_avg_mtp_loss: 3.5527 +[titan] 2025-10-04 23:05:59,424 - root - INFO - lr: 4.9988e-05 gnorm: 1.65 [ 0:31:51<1 day, 1:50:54] +[titan] 2025-10-04 23:06:10,267 - root - INFO - step: 810 loss: 3.9384 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4865 global_avg_mtp_loss: 3.4519 +[titan] 2025-10-04 23:06:10,267 - root - INFO - lr: 4.9988e-05 gnorm: 1.62 [ 0:32:02<1 day, 1:49:53] +[titan] 2025-10-04 23:06:21,120 - root - INFO - step: 815 loss: 3.9402 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.4841 global_avg_mtp_loss: 3.4561 +[titan] 2025-10-04 23:06:21,120 - root - INFO - lr: 4.9988e-05 gnorm: 1.83 [ 0:32:12<1 day, 1:48:52] +[titan] 2025-10-04 23:06:31,962 - root - INFO - step: 820 loss: 3.8907 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.4804 global_avg_mtp_loss: 3.4102 +[titan] 2025-10-04 23:06:31,962 - root - INFO - lr: 4.9987e-05 gnorm: 1.56 [ 0:32:23<1 day, 1:47:52] +[titan] 2025-10-04 23:06:42,804 - root - INFO - step: 825 loss: 3.9391 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.4866 global_avg_mtp_loss: 3.4525 +[titan] 2025-10-04 23:06:42,804 - root - INFO - lr: 4.9987e-05 gnorm: 1.73 [ 0:32:34<1 day, 1:46:52] +[titan] 2025-10-04 23:06:53,697 - root - INFO - step: 830 loss: 3.8534 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.4757 global_avg_mtp_loss: 3.3777 +[titan] 2025-10-04 23:06:53,697 - root - INFO - lr: 4.9987e-05 gnorm: 1.46 [ 0:32:45<1 day, 1:45:55] +[titan] 2025-10-04 23:07:04,599 - root - INFO - step: 835 loss: 3.9680 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.4909 global_avg_mtp_loss: 3.4770 +[titan] 2025-10-04 23:07:04,599 - root - INFO - lr: 4.9987e-05 gnorm: 1.69 [ 0:32:56<1 day, 1:44:59] +[titan] 2025-10-04 23:07:15,482 - root - INFO - step: 840 loss: 3.8804 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4793 global_avg_mtp_loss: 3.4011 +[titan] 2025-10-04 23:07:15,483 - root - INFO - lr: 4.9986e-05 gnorm: 1.65 [ 0:33:07<1 day, 1:44:03] +[titan] 2025-10-04 23:07:26,345 - root - INFO - step: 845 loss: 3.9335 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.4859 global_avg_mtp_loss: 3.4476 +[titan] 2025-10-04 23:07:26,345 - root - INFO - lr: 4.9986e-05 gnorm: 1.67 [ 0:33:18<1 day, 1:43:06] +[titan] 2025-10-04 23:07:35,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:07:37,166 - root - INFO - step: 850 loss: 3.9466 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.4899 global_avg_mtp_loss: 3.4568 +[titan] 2025-10-04 23:07:37,166 - root - INFO - lr: 4.9986e-05 gnorm: 1.53 [ 0:33:28<1 day, 1:42:08] +[titan] 2025-10-04 23:07:48,038 - root - INFO - step: 855 loss: 3.8553 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3805 +[titan] 2025-10-04 23:07:48,038 - root - INFO - lr: 4.9985e-05 gnorm: 1.54 [ 0:33:39<1 day, 1:41:13] +[titan] 2025-10-04 23:07:58,950 - root - INFO - step: 860 loss: 3.9192 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.4837 global_avg_mtp_loss: 3.4355 +[titan] 2025-10-04 23:07:58,951 - root - INFO - lr: 4.9985e-05 gnorm: 1.63 [ 0:33:50<1 day, 1:40:20] +[titan] 2025-10-04 23:08:09,863 - root - INFO - step: 865 loss: 3.8398 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.4747 global_avg_mtp_loss: 3.3651 +[titan] 2025-10-04 23:08:09,863 - root - INFO - lr: 4.9985e-05 gnorm: 1.57 [ 0:34:01<1 day, 1:39:28] +[titan] 2025-10-04 23:08:20,763 - root - INFO - step: 870 loss: 3.9660 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.4876 global_avg_mtp_loss: 3.4784 +[titan] 2025-10-04 23:08:20,763 - root - INFO - lr: 4.9984e-05 gnorm: 1.70 [ 0:34:12<1 day, 1:38:36] +[titan] 2025-10-04 23:08:31,644 - root - INFO - step: 875 loss: 3.8236 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4708 global_avg_mtp_loss: 3.3528 +[titan] 2025-10-04 23:08:31,644 - root - INFO - lr: 4.9984e-05 gnorm: 1.58 [ 0:34:23<1 day, 1:37:43] +[titan] 2025-10-04 23:08:42,521 - root - INFO - step: 880 loss: 3.8393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4731 global_avg_mtp_loss: 3.3662 +[titan] 2025-10-04 23:08:42,522 - root - INFO - lr: 4.9984e-05 gnorm: 1.66 [ 0:34:34<1 day, 1:36:51] +[titan] 2025-10-04 23:08:53,411 - root - INFO - step: 885 loss: 3.9181 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4832 global_avg_mtp_loss: 3.4349 +[titan] 2025-10-04 23:08:53,412 - root - INFO - lr: 4.9983e-05 gnorm: 1.81 [ 0:34:45<1 day, 1:35:59] +[titan] 2025-10-04 23:09:04,287 - root - INFO - step: 890 loss: 3.8540 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4756 global_avg_mtp_loss: 3.3784 +[titan] 2025-10-04 23:09:04,287 - root - INFO - lr: 4.9983e-05 gnorm: 1.63 [ 0:34:56<1 day, 1:35:08] +[titan] 2025-10-04 23:09:15,149 - root - INFO - step: 895 loss: 3.7956 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4646 global_avg_mtp_loss: 3.3310 +[titan] 2025-10-04 23:09:15,149 - root - INFO - lr: 4.9983e-05 gnorm: 1.59 [ 0:35:06<1 day, 1:34:16] +[titan] 2025-10-04 23:09:23,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:09:26,036 - root - INFO - step: 900 loss: 3.8814 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4023 +[titan] 2025-10-04 23:09:26,036 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:17<1 day, 1:33:26] +[titan] 2025-10-04 23:09:36,928 - root - INFO - step: 905 loss: 3.8547 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3797 +[titan] 2025-10-04 23:09:36,928 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:28<1 day, 1:32:36] +[titan] 2025-10-04 23:09:47,795 - root - INFO - step: 910 loss: 3.7503 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4614 global_avg_mtp_loss: 3.2890 +[titan] 2025-10-04 23:09:47,795 - root - INFO - lr: 4.9982e-05 gnorm: 1.63 [ 0:35:39<1 day, 1:31:46] +[titan] 2025-10-04 23:09:58,664 - root - INFO - step: 915 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3219 +[titan] 2025-10-04 23:09:58,665 - root - INFO - lr: 4.9981e-05 gnorm: 1.57 [ 0:35:50<1 day, 1:30:56] +[titan] 2025-10-04 23:10:09,537 - root - INFO - step: 920 loss: 3.8477 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.4753 global_avg_mtp_loss: 3.3723 +[titan] 2025-10-04 23:10:09,537 - root - INFO - lr: 4.9981e-05 gnorm: 1.56 [ 0:36:01<1 day, 1:30:07] +[titan] 2025-10-04 23:10:20,420 - root - INFO - step: 925 loss: 3.8141 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3439 +[titan] 2025-10-04 23:10:20,420 - root - INFO - lr: 4.9980e-05 gnorm: 1.53 [ 0:36:12<1 day, 1:29:19] +[titan] 2025-10-04 23:10:31,298 - root - INFO - step: 930 loss: 3.8185 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3482 +[titan] 2025-10-04 23:10:31,298 - root - INFO - lr: 4.9980e-05 gnorm: 1.56 [ 0:36:23<1 day, 1:28:31] +[titan] 2025-10-04 23:10:42,186 - root - INFO - step: 935 loss: 3.7234 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.4574 global_avg_mtp_loss: 3.2661 +[titan] 2025-10-04 23:10:42,186 - root - INFO - lr: 4.9980e-05 gnorm: 1.52 [ 0:36:33<1 day, 1:27:44] +[titan] 2025-10-04 23:10:53,053 - root - INFO - step: 940 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4666 global_avg_mtp_loss: 3.3211 +[titan] 2025-10-04 23:10:53,053 - root - INFO - lr: 4.9979e-05 gnorm: 1.69 [ 0:36:44<1 day, 1:26:56] +[titan] 2025-10-04 23:11:03,935 - root - INFO - step: 945 loss: 3.7815 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.4635 global_avg_mtp_loss: 3.3180 +[titan] 2025-10-04 23:11:03,935 - root - INFO - lr: 4.9979e-05 gnorm: 1.45 [ 0:36:55<1 day, 1:26:09] +[titan] 2025-10-04 23:11:12,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:11:14,787 - root - INFO - step: 950 loss: 3.8345 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4716 global_avg_mtp_loss: 3.3629 +[titan] 2025-10-04 23:11:14,787 - root - INFO - lr: 4.9979e-05 gnorm: 1.54 [ 0:37:06<1 day, 1:25:22] +[titan] 2025-10-04 23:11:25,662 - root - INFO - step: 955 loss: 3.7153 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4570 global_avg_mtp_loss: 3.2583 +[titan] 2025-10-04 23:11:25,662 - root - INFO - lr: 4.9978e-05 gnorm: 1.40 [ 0:37:17<1 day, 1:24:36] +[titan] 2025-10-04 23:11:36,506 - root - INFO - step: 960 loss: 3.7474 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4595 global_avg_mtp_loss: 3.2878 +[titan] 2025-10-04 23:11:36,506 - root - INFO - lr: 4.9978e-05 gnorm: 1.39 [ 0:37:28<1 day, 1:23:49] +[titan] 2025-10-04 23:11:47,428 - root - INFO - step: 965 loss: 3.7469 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4597 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:11:47,429 - root - INFO - lr: 4.9977e-05 gnorm: 1.60 [ 0:37:39<1 day, 1:23:05] +[titan] 2025-10-04 23:11:58,339 - root - INFO - step: 970 loss: 3.7767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.4638 global_avg_mtp_loss: 3.3129 +[titan] 2025-10-04 23:11:58,340 - root - INFO - lr: 4.9977e-05 gnorm: 1.59 [ 0:37:50<1 day, 1:22:21] +[titan] 2025-10-04 23:12:09,214 - root - INFO - step: 975 loss: 3.7198 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4568 global_avg_mtp_loss: 3.2630 +[titan] 2025-10-04 23:12:09,214 - root - INFO - lr: 4.9977e-05 gnorm: 1.44 [ 0:38:00<1 day, 1:21:36] +[titan] 2025-10-04 23:12:20,081 - root - INFO - step: 980 loss: 3.7702 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4623 global_avg_mtp_loss: 3.3079 +[titan] 2025-10-04 23:12:20,081 - root - INFO - lr: 4.9976e-05 gnorm: 1.42 [ 0:38:11<1 day, 1:20:52] +[titan] 2025-10-04 23:12:30,946 - root - INFO - step: 985 loss: 3.8212 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3515 +[titan] 2025-10-04 23:12:30,947 - root - INFO - lr: 4.9976e-05 gnorm: 1.39 [ 0:38:22<1 day, 1:20:07] +[titan] 2025-10-04 23:12:41,799 - root - INFO - step: 990 loss: 3.7716 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.4659 global_avg_mtp_loss: 3.3057 +[titan] 2025-10-04 23:12:41,799 - root - INFO - lr: 4.9975e-05 gnorm: 1.50 [ 0:38:33<1 day, 1:19:23] +[titan] 2025-10-04 23:12:52,700 - root - INFO - step: 995 loss: 3.8144 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3447 +[titan] 2025-10-04 23:12:52,701 - root - INFO - lr: 4.9975e-05 gnorm: 1.47 [ 0:38:44<1 day, 1:18:40] +[titan] 2025-10-04 23:13:01,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:13:03,568 - root - INFO - step: 1000 loss: 3.6411 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4473 global_avg_mtp_loss: 3.1938 +[titan] 2025-10-04 23:13:03,569 - root - INFO - lr: 4.9974e-05 gnorm: 1.70 [ 0:38:55<1 day, 1:17:57] +[titan] 2025-10-04 23:13:14,441 - root - INFO - step: 1005 loss: 3.7872 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4636 global_avg_mtp_loss: 3.3236 +[titan] 2025-10-04 23:13:14,442 - root - INFO - lr: 4.9974e-05 gnorm: 1.62 [ 0:39:06<1 day, 1:17:14] +[titan] 2025-10-04 23:13:25,308 - root - INFO - step: 1010 loss: 3.8240 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4681 global_avg_mtp_loss: 3.3560 +[titan] 2025-10-04 23:13:25,308 - root - INFO - lr: 4.9974e-05 gnorm: 1.51 [ 0:39:17<1 day, 1:16:31] +[titan] 2025-10-04 23:13:36,156 - root - INFO - step: 1015 loss: 3.7026 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.4566 global_avg_mtp_loss: 3.2461 +[titan] 2025-10-04 23:13:36,157 - root - INFO - lr: 4.9973e-05 gnorm: 1.61 [ 0:39:27<1 day, 1:15:48] +[titan] 2025-10-04 23:13:47,024 - root - INFO - step: 1020 loss: 3.8204 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4698 global_avg_mtp_loss: 3.3506 +[titan] 2025-10-04 23:13:47,025 - root - INFO - lr: 4.9973e-05 gnorm: 1.58 [ 0:39:38<1 day, 1:15:06] +[titan] 2025-10-04 23:13:55,951 - root - INFO - Dumping profiler traces at step 1024 +[titan] 2025-10-04 23:13:55,986 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 23:13:58,196 - root - INFO - step: 1025 loss: 3.7098 memory: 118.84GiB(85.28%) tps: 29,332 tflops: 406.94 mfu: 41.15% global_avg_ntp_loss: 0.4550 global_avg_mtp_loss: 3.2548 +[titan] 2025-10-04 23:13:58,196 - root - INFO - lr: 4.9972e-05 gnorm: 1.53 [ 0:39:49<1 day, 1:14:35] +[titan] 2025-10-04 23:14:09,055 - root - INFO - step: 1030 loss: 3.6684 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4505 global_avg_mtp_loss: 3.2179 +[titan] 2025-10-04 23:14:09,056 - root - INFO - lr: 4.9972e-05 gnorm: 1.49 [ 0:40:00<1 day, 1:13:54] +[titan] 2025-10-04 23:14:19,917 - root - INFO - step: 1035 loss: 3.7778 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.4632 global_avg_mtp_loss: 3.3146 +[titan] 2025-10-04 23:14:19,917 - root - INFO - lr: 4.9971e-05 gnorm: 1.64 [ 0:40:11<1 day, 1:13:12] +[titan] 2025-10-04 23:14:30,784 - root - INFO - step: 1040 loss: 3.7600 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.4596 global_avg_mtp_loss: 3.3004 +[titan] 2025-10-04 23:14:30,784 - root - INFO - lr: 4.9971e-05 gnorm: 1.73 [ 0:40:22<1 day, 1:12:31] +[titan] 2025-10-04 23:14:41,642 - root - INFO - step: 1045 loss: 3.7970 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3312 +[titan] 2025-10-04 23:14:41,642 - root - INFO - lr: 4.9970e-05 gnorm: 1.60 [ 0:40:33<1 day, 1:11:50] +[titan] 2025-10-04 23:14:50,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:14:52,527 - root - INFO - step: 1050 loss: 3.7607 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.4629 global_avg_mtp_loss: 3.2979 +[titan] 2025-10-04 23:14:52,527 - root - INFO - lr: 4.9970e-05 gnorm: 1.86 [ 0:40:44<1 day, 1:11:10] +[titan] 2025-10-04 23:15:03,398 - root - INFO - step: 1055 loss: 3.6921 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4533 global_avg_mtp_loss: 3.2388 +[titan] 2025-10-04 23:15:03,398 - root - INFO - lr: 4.9970e-05 gnorm: 1.59 [ 0:40:55<1 day, 1:10:30] +[titan] 2025-10-04 23:15:14,306 - root - INFO - step: 1060 loss: 3.7138 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.4561 global_avg_mtp_loss: 3.2577 +[titan] 2025-10-04 23:15:14,306 - root - INFO - lr: 4.9969e-05 gnorm: 1.89 [ 0:41:06<1 day, 1:09:52] +[titan] 2025-10-04 23:15:25,186 - root - INFO - step: 1065 loss: 3.7455 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.4584 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:15:25,186 - root - INFO - lr: 4.9969e-05 gnorm: 1.72 [ 0:41:16<1 day, 1:09:13] +[titan] 2025-10-04 23:15:36,061 - root - INFO - step: 1070 loss: 3.6510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4474 global_avg_mtp_loss: 3.2036 +[titan] 2025-10-04 23:15:36,061 - root - INFO - lr: 4.9968e-05 gnorm: 1.70 [ 0:41:27<1 day, 1:08:34] +[titan] 2025-10-04 23:15:46,950 - root - INFO - step: 1075 loss: 3.7757 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4630 global_avg_mtp_loss: 3.3127 +[titan] 2025-10-04 23:15:46,950 - root - INFO - lr: 4.9968e-05 gnorm: 1.53 [ 0:41:38<1 day, 1:07:55] +[titan] 2025-10-04 23:15:57,821 - root - INFO - step: 1080 loss: 3.6997 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4542 global_avg_mtp_loss: 3.2455 +[titan] 2025-10-04 23:15:57,821 - root - INFO - lr: 4.9967e-05 gnorm: 1.40 [ 0:41:49<1 day, 1:07:17] +[titan] 2025-10-04 23:16:08,691 - root - INFO - step: 1085 loss: 3.7768 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.4652 global_avg_mtp_loss: 3.3116 +[titan] 2025-10-04 23:16:08,691 - root - INFO - lr: 4.9967e-05 gnorm: 1.71 [ 0:42:00<1 day, 1:06:38] +[titan] 2025-10-04 23:16:19,625 - root - INFO - step: 1090 loss: 3.7891 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.4653 global_avg_mtp_loss: 3.3238 +[titan] 2025-10-04 23:16:19,625 - root - INFO - lr: 4.9966e-05 gnorm: 1.32 [ 0:42:11<1 day, 1:06:02] +[titan] 2025-10-04 23:16:30,524 - root - INFO - step: 1095 loss: 3.6348 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.4440 global_avg_mtp_loss: 3.1907 +[titan] 2025-10-04 23:16:30,525 - root - INFO - lr: 4.9966e-05 gnorm: 1.55 [ 0:42:22<1 day, 1:05:25] +[titan] 2025-10-04 23:16:39,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:16:41,421 - root - INFO - step: 1100 loss: 3.7357 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.4573 global_avg_mtp_loss: 3.2785 +[titan] 2025-10-04 23:16:41,421 - root - INFO - lr: 4.9965e-05 gnorm: 1.50 [ 0:42:33<1 day, 1:04:48] +[titan] 2025-10-04 23:16:52,335 - root - INFO - step: 1105 loss: 3.6253 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1800 +[titan] 2025-10-04 23:16:52,335 - root - INFO - lr: 4.9965e-05 gnorm: 1.52 [ 0:42:44<1 day, 1:04:12] +[titan] 2025-10-04 23:17:03,265 - root - INFO - step: 1110 loss: 3.6786 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.06% global_avg_ntp_loss: 0.4500 global_avg_mtp_loss: 3.2285 +[titan] 2025-10-04 23:17:03,266 - root - INFO - lr: 4.9964e-05 gnorm: 1.41 [ 0:42:55<1 day, 1:03:37] +[titan] 2025-10-04 23:17:14,175 - root - INFO - step: 1115 loss: 3.6578 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.4465 global_avg_mtp_loss: 3.2112 +[titan] 2025-10-04 23:17:14,175 - root - INFO - lr: 4.9964e-05 gnorm: 1.35 [ 0:43:05<1 day, 1:03:02] +[titan] 2025-10-04 23:17:25,067 - root - INFO - step: 1120 loss: 3.6849 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.4511 global_avg_mtp_loss: 3.2339 +[titan] 2025-10-04 23:17:25,067 - root - INFO - lr: 4.9963e-05 gnorm: 1.51 [ 0:43:16<1 day, 1:02:26] +[titan] 2025-10-04 23:17:35,980 - root - INFO - step: 1125 loss: 3.6812 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.4516 global_avg_mtp_loss: 3.2296 +[titan] 2025-10-04 23:17:35,980 - root - INFO - lr: 4.9963e-05 gnorm: 1.53 [ 0:43:27<1 day, 1:01:50] +[titan] 2025-10-04 23:17:46,863 - root - INFO - step: 1130 loss: 3.6167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4428 global_avg_mtp_loss: 3.1739 +[titan] 2025-10-04 23:17:46,863 - root - INFO - lr: 4.9962e-05 gnorm: 1.69 [ 0:43:38<1 day, 1:01:15] +[titan] 2025-10-04 23:17:57,754 - root - INFO - step: 1135 loss: 3.5668 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.4385 global_avg_mtp_loss: 3.1284 +[titan] 2025-10-04 23:17:57,754 - root - INFO - lr: 4.9962e-05 gnorm: 1.44 [ 0:43:49<1 day, 1:00:39] +[titan] 2025-10-04 23:18:08,676 - root - INFO - step: 1140 loss: 3.6958 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.4522 global_avg_mtp_loss: 3.2436 +[titan] 2025-10-04 23:18:08,676 - root - INFO - lr: 4.9961e-05 gnorm: 1.51 [ 0:44:00<1 day, 1:00:05] +[titan] 2025-10-04 23:18:19,548 - root - INFO - step: 1145 loss: 3.7386 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.4725 global_avg_mtp_loss: 3.2662 +[titan] 2025-10-04 23:18:19,548 - root - INFO - lr: 4.9961e-05 gnorm: 1.52 [ 0:44:11<1 day, 0:59:29] +[titan] 2025-10-04 23:18:28,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:18:30,436 - root - INFO - step: 1150 loss: 3.6554 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.4491 global_avg_mtp_loss: 3.2063 +[titan] 2025-10-04 23:18:30,436 - root - INFO - lr: 4.9960e-05 gnorm: 1.51 [ 0:44:22<1 day, 0:58:54] +[titan] 2025-10-04 23:18:41,365 - root - INFO - step: 1155 loss: 3.6986 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.4535 global_avg_mtp_loss: 3.2451 +[titan] 2025-10-04 23:18:41,365 - root - INFO - lr: 4.9960e-05 gnorm: 1.49 [ 0:44:33<1 day, 0:58:21] +[titan] 2025-10-04 23:18:52,242 - root - INFO - step: 1160 loss: 3.6068 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4415 global_avg_mtp_loss: 3.1653 +[titan] 2025-10-04 23:18:52,243 - root - INFO - lr: 4.9959e-05 gnorm: 1.49 [ 0:44:43<1 day, 0:57:46] +[titan] 2025-10-04 23:19:03,171 - root - INFO - step: 1165 loss: 3.5931 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.4398 global_avg_mtp_loss: 3.1533 +[titan] 2025-10-04 23:19:03,171 - root - INFO - lr: 4.9958e-05 gnorm: 1.54 [ 0:44:54<1 day, 0:57:13] +[titan] 2025-10-04 23:19:14,054 - root - INFO - step: 1170 loss: 3.6446 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4453 global_avg_mtp_loss: 3.1993 +[titan] 2025-10-04 23:19:14,054 - root - INFO - lr: 4.9958e-05 gnorm: 1.49 [ 0:45:05<1 day, 0:56:39] +[titan] 2025-10-04 23:19:24,934 - root - INFO - step: 1175 loss: 3.6211 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1757 +[titan] 2025-10-04 23:19:24,934 - root - INFO - lr: 4.9957e-05 gnorm: 1.48 [ 0:45:16<1 day, 0:56:05] +[titan] 2025-10-04 23:19:35,805 - root - INFO - step: 1180 loss: 3.6634 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4499 global_avg_mtp_loss: 3.2135 +[titan] 2025-10-04 23:19:35,805 - root - INFO - lr: 4.9957e-05 gnorm: 1.55 [ 0:45:27<1 day, 0:55:31] +[titan] 2025-10-04 23:19:46,722 - root - INFO - step: 1185 loss: 3.6182 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1740 +[titan] 2025-10-04 23:19:46,722 - root - INFO - lr: 4.9956e-05 gnorm: 1.56 [ 0:45:38<1 day, 0:54:58] +[titan] 2025-10-04 23:19:57,577 - root - INFO - step: 1190 loss: 3.6307 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.4437 global_avg_mtp_loss: 3.1870 +[titan] 2025-10-04 23:19:57,578 - root - INFO - lr: 4.9956e-05 gnorm: 1.44 [ 0:45:49<1 day, 0:54:24] +[titan] 2025-10-04 23:20:08,587 - root - INFO - step: 1195 loss: 3.6947 memory: 118.84GiB(85.28%) tps: 29,765 tflops: 412.95 mfu: 41.75% global_avg_ntp_loss: 0.4519 global_avg_mtp_loss: 3.2429 +[titan] 2025-10-04 23:20:08,587 - root - INFO - lr: 4.9955e-05 gnorm: 1.42 [ 0:46:00<1 day, 0:53:55] +[titan] 2025-10-04 23:20:17,298 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:20:19,487 - root - INFO - step: 1200 loss: 3.6239 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1796 +[titan] 2025-10-04 23:20:19,487 - root - INFO - lr: 4.9955e-05 gnorm: 1.44 [ 0:46:11<1 day, 0:53:22] +[titan] 2025-10-04 23:20:30,366 - root - INFO - step: 1205 loss: 3.6270 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4452 global_avg_mtp_loss: 3.1819 +[titan] 2025-10-04 23:20:30,366 - root - INFO - lr: 4.9954e-05 gnorm: 1.60 [ 0:46:22<1 day, 0:52:49] +[titan] 2025-10-04 23:20:41,259 - root - INFO - step: 1210 loss: 3.6144 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1709 +[titan] 2025-10-04 23:20:41,259 - root - INFO - lr: 4.9953e-05 gnorm: 1.66 [ 0:46:32<1 day, 0:52:17] +[titan] 2025-10-04 23:20:52,152 - root - INFO - step: 1215 loss: 3.6886 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4528 global_avg_mtp_loss: 3.2359 +[titan] 2025-10-04 23:20:52,152 - root - INFO - lr: 4.9953e-05 gnorm: 1.48 [ 0:46:43<1 day, 0:51:44] +[titan] 2025-10-04 23:21:03,098 - root - INFO - step: 1220 loss: 3.5263 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.4324 global_avg_mtp_loss: 3.0939 +[titan] 2025-10-04 23:21:03,098 - root - INFO - lr: 4.9952e-05 gnorm: 1.62 [ 0:46:54<1 day, 0:51:14] +[titan] 2025-10-04 23:21:14,014 - root - INFO - step: 1225 loss: 3.6228 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.4426 global_avg_mtp_loss: 3.1801 +[titan] 2025-10-04 23:21:14,014 - root - INFO - lr: 4.9952e-05 gnorm: 1.53 [ 0:47:05<1 day, 0:50:43] +[titan] 2025-10-04 23:21:24,904 - root - INFO - step: 1230 loss: 3.5398 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.4327 global_avg_mtp_loss: 3.1072 +[titan] 2025-10-04 23:21:24,904 - root - INFO - lr: 4.9951e-05 gnorm: 1.39 [ 0:47:16<1 day, 0:50:11] +[titan] 2025-10-04 23:21:35,790 - root - INFO - step: 1235 loss: 3.5790 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.4389 global_avg_mtp_loss: 3.1401 +[titan] 2025-10-04 23:21:35,790 - root - INFO - lr: 4.9951e-05 gnorm: 1.42 [ 0:47:27<1 day, 0:49:39] +[titan] 2025-10-04 23:21:46,666 - root - INFO - step: 1240 loss: 3.6434 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4448 global_avg_mtp_loss: 3.1987 +[titan] 2025-10-04 23:21:46,666 - root - INFO - lr: 4.9950e-05 gnorm: 1.43 [ 0:47:38<1 day, 0:49:07] +[titan] 2025-10-04 23:21:57,577 - root - INFO - step: 1245 loss: 3.5452 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4350 global_avg_mtp_loss: 3.1102 +[titan] 2025-10-04 23:21:57,577 - root - INFO - lr: 4.9949e-05 gnorm: 1.40 [ 0:47:49<1 day, 0:48:37] +[titan] 2025-10-04 23:22:06,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:22:08,563 - root - INFO - step: 1250 loss: 3.5844 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.4369 global_avg_mtp_loss: 3.1475 +[titan] 2025-10-04 23:22:08,564 - root - INFO - lr: 4.9949e-05 gnorm: 1.48 [ 0:48:00<1 day, 0:48:08] +[titan] 2025-10-04 23:22:19,438 - root - INFO - step: 1255 loss: 3.6078 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1643 +[titan] 2025-10-04 23:22:19,438 - root - INFO - lr: 4.9948e-05 gnorm: 1.59 [ 0:48:11<1 day, 0:47:37] +[titan] 2025-10-04 23:22:30,309 - root - INFO - step: 1260 loss: 3.5536 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4346 global_avg_mtp_loss: 3.1191 +[titan] 2025-10-04 23:22:30,309 - root - INFO - lr: 4.9948e-05 gnorm: 1.57 [ 0:48:22<1 day, 0:47:05] +[titan] 2025-10-04 23:22:41,203 - root - INFO - step: 1265 loss: 3.5861 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.4376 global_avg_mtp_loss: 3.1485 +[titan] 2025-10-04 23:22:41,203 - root - INFO - lr: 4.9947e-05 gnorm: 1.47 [ 0:48:32<1 day, 0:46:35] +[titan] 2025-10-04 23:22:52,080 - root - INFO - step: 1270 loss: 3.6181 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4419 global_avg_mtp_loss: 3.1762 +[titan] 2025-10-04 23:22:52,081 - root - INFO - lr: 4.9946e-05 gnorm: 1.38 [ 0:48:43<1 day, 0:46:04] +[titan] 2025-10-04 23:23:02,961 - root - INFO - step: 1275 loss: 3.5508 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1168 +[titan] 2025-10-04 23:23:02,961 - root - INFO - lr: 4.9946e-05 gnorm: 1.48 [ 0:48:54<1 day, 0:45:33] +[titan] 2025-10-04 23:23:13,913 - root - INFO - step: 1280 loss: 3.5362 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.4318 global_avg_mtp_loss: 3.1044 +[titan] 2025-10-04 23:23:13,913 - root - INFO - lr: 4.9945e-05 gnorm: 1.47 [ 0:49:05<1 day, 0:45:05] +[titan] 2025-10-04 23:23:24,835 - root - INFO - step: 1285 loss: 3.5593 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4354 global_avg_mtp_loss: 3.1239 +[titan] 2025-10-04 23:23:24,835 - root - INFO - lr: 4.9944e-05 gnorm: 1.48 [ 0:49:16<1 day, 0:44:36] +[titan] 2025-10-04 23:23:35,699 - root - INFO - step: 1290 loss: 3.5751 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4357 global_avg_mtp_loss: 3.1395 +[titan] 2025-10-04 23:23:35,700 - root - INFO - lr: 4.9944e-05 gnorm: 1.42 [ 0:49:27<1 day, 0:44:05] +[titan] 2025-10-04 23:23:46,610 - root - INFO - step: 1295 loss: 3.5938 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4377 global_avg_mtp_loss: 3.1562 +[titan] 2025-10-04 23:23:46,610 - root - INFO - lr: 4.9943e-05 gnorm: 1.35 [ 0:49:38<1 day, 0:43:36] +[titan] 2025-10-04 23:23:55,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:23:57,488 - root - INFO - step: 1300 loss: 3.5542 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4336 global_avg_mtp_loss: 3.1206 +[titan] 2025-10-04 23:23:57,489 - root - INFO - lr: 4.9943e-05 gnorm: 1.38 [ 0:49:49<1 day, 0:43:06] +[titan] 2025-10-04 23:24:08,378 - root - INFO - step: 1305 loss: 3.5644 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4344 global_avg_mtp_loss: 3.1301 +[titan] 2025-10-04 23:24:08,379 - root - INFO - lr: 4.9942e-05 gnorm: 1.38 [ 0:50:00<1 day, 0:42:36] +[titan] 2025-10-04 23:24:19,247 - root - INFO - step: 1310 loss: 3.5464 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1124 +[titan] 2025-10-04 23:24:19,247 - root - INFO - lr: 4.9941e-05 gnorm: 1.43 [ 0:50:10<1 day, 0:42:06] +[titan] 2025-10-04 23:24:30,161 - root - INFO - step: 1315 loss: 3.5898 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.4372 global_avg_mtp_loss: 3.1527 +[titan] 2025-10-04 23:24:30,162 - root - INFO - lr: 4.9941e-05 gnorm: 1.34 [ 0:50:21<1 day, 0:41:38] +[titan] 2025-10-04 23:24:41,039 - root - INFO - step: 1320 loss: 3.6159 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4427 global_avg_mtp_loss: 3.1731 +[titan] 2025-10-04 23:24:41,039 - root - INFO - lr: 4.9940e-05 gnorm: 1.34 [ 0:50:32<1 day, 0:41:08] +[titan] 2025-10-04 23:24:51,938 - root - INFO - step: 1325 loss: 3.4618 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4229 global_avg_mtp_loss: 3.0389 +[titan] 2025-10-04 23:24:51,938 - root - INFO - lr: 4.9939e-05 gnorm: 1.36 [ 0:50:43<1 day, 0:40:40] +[titan] 2025-10-04 23:25:02,828 - root - INFO - step: 1330 loss: 3.5160 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4280 global_avg_mtp_loss: 3.0880 +[titan] 2025-10-04 23:25:02,829 - root - INFO - lr: 4.9939e-05 gnorm: 1.38 [ 0:50:54<1 day, 0:40:11] +[titan] 2025-10-04 23:25:13,745 - root - INFO - step: 1335 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4282 global_avg_mtp_loss: 3.0763 +[titan] 2025-10-04 23:25:13,746 - root - INFO - lr: 4.9938e-05 gnorm: 1.46 [ 0:51:05<1 day, 0:39:43] +[titan] 2025-10-04 23:25:24,642 - root - INFO - step: 1340 loss: 3.5440 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4337 global_avg_mtp_loss: 3.1103 +[titan] 2025-10-04 23:25:24,642 - root - INFO - lr: 4.9937e-05 gnorm: 1.42 [ 0:51:16<1 day, 0:39:15] +[titan] 2025-10-04 23:25:35,576 - root - INFO - step: 1345 loss: 3.6036 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.4395 global_avg_mtp_loss: 3.1641 +[titan] 2025-10-04 23:25:35,576 - root - INFO - lr: 4.9937e-05 gnorm: 1.35 [ 0:51:27<1 day, 0:38:47] +[titan] 2025-10-04 23:25:44,278 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:25:46,465 - root - INFO - step: 1350 loss: 3.5202 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4296 global_avg_mtp_loss: 3.0905 +[titan] 2025-10-04 23:25:46,466 - root - INFO - lr: 4.9936e-05 gnorm: 1.31 [ 0:51:38<1 day, 0:38:19] +[titan] 2025-10-04 23:25:57,344 - root - INFO - step: 1355 loss: 3.5459 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.4309 global_avg_mtp_loss: 3.1149 +[titan] 2025-10-04 23:25:57,345 - root - INFO - lr: 4.9935e-05 gnorm: 1.30 [ 0:51:49<1 day, 0:37:51] +[titan] 2025-10-04 23:26:08,268 - root - INFO - step: 1360 loss: 3.5720 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.4351 global_avg_mtp_loss: 3.1369 +[titan] 2025-10-04 23:26:08,269 - root - INFO - lr: 4.9935e-05 gnorm: 1.39 [ 0:51:59<1 day, 0:37:24] +[titan] 2025-10-04 23:26:19,143 - root - INFO - step: 1365 loss: 3.4497 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0288 +[titan] 2025-10-04 23:26:19,143 - root - INFO - lr: 4.9934e-05 gnorm: 1.37 [ 0:52:10<1 day, 0:36:55] +[titan] 2025-10-04 23:26:30,030 - root - INFO - step: 1370 loss: 3.5847 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.4370 global_avg_mtp_loss: 3.1477 +[titan] 2025-10-04 23:26:30,030 - root - INFO - lr: 4.9933e-05 gnorm: 1.49 [ 0:52:21<1 day, 0:36:27] +[titan] 2025-10-04 23:26:40,913 - root - INFO - step: 1375 loss: 3.4970 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4269 global_avg_mtp_loss: 3.0701 +[titan] 2025-10-04 23:26:40,913 - root - INFO - lr: 4.9933e-05 gnorm: 1.38 [ 0:52:32<1 day, 0:36:00] +[titan] 2025-10-04 23:26:51,832 - root - INFO - step: 1380 loss: 3.4520 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 3.0312 +[titan] 2025-10-04 23:26:51,832 - root - INFO - lr: 4.9932e-05 gnorm: 1.36 [ 0:52:43<1 day, 0:35:33] +[titan] 2025-10-04 23:27:02,721 - root - INFO - step: 1385 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4169 global_avg_mtp_loss: 3.0149 +[titan] 2025-10-04 23:27:02,721 - root - INFO - lr: 4.9931e-05 gnorm: 1.42 [ 0:52:54<1 day, 0:35:05] +[titan] 2025-10-04 23:27:13,641 - root - INFO - step: 1390 loss: 3.4046 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.4139 global_avg_mtp_loss: 2.9907 +[titan] 2025-10-04 23:27:13,641 - root - INFO - lr: 4.9931e-05 gnorm: 1.37 [ 0:53:05<1 day, 0:34:39] +[titan] 2025-10-04 23:27:24,527 - root - INFO - step: 1395 loss: 3.4971 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.4253 global_avg_mtp_loss: 3.0717 +[titan] 2025-10-04 23:27:24,527 - root - INFO - lr: 4.9930e-05 gnorm: 1.41 [ 0:53:16<1 day, 0:34:12] +[titan] 2025-10-04 23:27:33,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:27:35,394 - root - INFO - step: 1400 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.4290 global_avg_mtp_loss: 3.0755 +[titan] 2025-10-04 23:27:35,394 - root - INFO - lr: 4.9929e-05 gnorm: 1.40 [ 0:53:27<1 day, 0:33:44] +[titan] 2025-10-04 23:27:46,287 - root - INFO - step: 1405 loss: 3.4686 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4233 global_avg_mtp_loss: 3.0453 +[titan] 2025-10-04 23:27:46,287 - root - INFO - lr: 4.9928e-05 gnorm: 1.49 [ 0:53:37<1 day, 0:33:17] +[titan] 2025-10-04 23:27:57,198 - root - INFO - step: 1410 loss: 3.5153 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4300 global_avg_mtp_loss: 3.0853 +[titan] 2025-10-04 23:27:57,198 - root - INFO - lr: 4.9928e-05 gnorm: 1.47 [ 0:53:48<1 day, 0:32:51] +[titan] 2025-10-04 23:28:08,061 - root - INFO - step: 1415 loss: 3.4739 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4242 global_avg_mtp_loss: 3.0497 +[titan] 2025-10-04 23:28:08,061 - root - INFO - lr: 4.9927e-05 gnorm: 1.34 [ 0:53:59<1 day, 0:32:23] +[titan] 2025-10-04 23:28:18,978 - root - INFO - step: 1420 loss: 3.5053 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.4276 global_avg_mtp_loss: 3.0778 +[titan] 2025-10-04 23:28:18,978 - root - INFO - lr: 4.9926e-05 gnorm: 1.41 [ 0:54:10<1 day, 0:31:57] +[titan] 2025-10-04 23:28:29,841 - root - INFO - step: 1425 loss: 3.4083 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4170 global_avg_mtp_loss: 2.9913 +[titan] 2025-10-04 23:28:29,841 - root - INFO - lr: 4.9926e-05 gnorm: 1.42 [ 0:54:21<1 day, 0:31:30] +[titan] 2025-10-04 23:28:40,714 - root - INFO - step: 1430 loss: 3.4627 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0411 +[titan] 2025-10-04 23:28:40,714 - root - INFO - lr: 4.9925e-05 gnorm: 1.43 [ 0:54:32<1 day, 0:31:03] +[titan] 2025-10-04 23:28:51,581 - root - INFO - step: 1435 loss: 3.4919 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0684 +[titan] 2025-10-04 23:28:51,582 - root - INFO - lr: 4.9924e-05 gnorm: 1.37 [ 0:54:43<1 day, 0:30:36] +[titan] 2025-10-04 23:29:02,457 - root - INFO - step: 1440 loss: 3.4907 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.4267 global_avg_mtp_loss: 3.0640 +[titan] 2025-10-04 23:29:02,457 - root - INFO - lr: 4.9923e-05 gnorm: 1.37 [ 0:54:54<1 day, 0:30:10] +[titan] 2025-10-04 23:29:13,408 - root - INFO - step: 1445 loss: 3.4656 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0447 +[titan] 2025-10-04 23:29:13,408 - root - INFO - lr: 4.9923e-05 gnorm: 1.40 [ 0:55:05<1 day, 0:29:45] +[titan] 2025-10-04 23:29:22,093 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:29:24,281 - root - INFO - step: 1450 loss: 3.4814 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4230 global_avg_mtp_loss: 3.0585 +[titan] 2025-10-04 23:29:24,281 - root - INFO - lr: 4.9922e-05 gnorm: 1.47 [ 0:55:15<1 day, 0:29:19] +[titan] 2025-10-04 23:29:35,145 - root - INFO - step: 1455 loss: 3.4419 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.4184 global_avg_mtp_loss: 3.0235 +[titan] 2025-10-04 23:29:35,145 - root - INFO - lr: 4.9921e-05 gnorm: 1.37 [ 0:55:26<1 day, 0:28:52] +[titan] 2025-10-04 23:29:46,030 - root - INFO - step: 1460 loss: 3.5546 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.4320 global_avg_mtp_loss: 3.1226 +[titan] 2025-10-04 23:29:46,030 - root - INFO - lr: 4.9920e-05 gnorm: 1.41 [ 0:55:37<1 day, 0:28:26] +[titan] 2025-10-04 23:29:56,926 - root - INFO - step: 1465 loss: 3.5290 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4295 global_avg_mtp_loss: 3.0995 +[titan] 2025-10-04 23:29:56,927 - root - INFO - lr: 4.9920e-05 gnorm: 1.36 [ 0:55:48<1 day, 0:28:01] +[titan] 2025-10-04 23:30:07,807 - root - INFO - step: 1470 loss: 3.4674 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0442 +[titan] 2025-10-04 23:30:07,808 - root - INFO - lr: 4.9919e-05 gnorm: 1.41 [ 0:55:59<1 day, 0:27:35] +[titan] 2025-10-04 23:30:18,704 - root - INFO - step: 1475 loss: 3.4400 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0220 +[titan] 2025-10-04 23:30:18,705 - root - INFO - lr: 4.9918e-05 gnorm: 1.36 [ 0:56:10<1 day, 0:27:10] +[titan] 2025-10-04 23:30:29,561 - root - INFO - step: 1480 loss: 3.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4228 global_avg_mtp_loss: 3.0463 +[titan] 2025-10-04 23:30:29,562 - root - INFO - lr: 4.9917e-05 gnorm: 1.30 [ 0:56:21<1 day, 0:26:44] +[titan] 2025-10-04 23:30:40,438 - root - INFO - step: 1485 loss: 3.4861 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4263 global_avg_mtp_loss: 3.0598 +[titan] 2025-10-04 23:30:40,438 - root - INFO - lr: 4.9917e-05 gnorm: 1.35 [ 0:56:32<1 day, 0:26:18] +[titan] 2025-10-04 23:30:51,302 - root - INFO - step: 1490 loss: 3.4181 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4151 global_avg_mtp_loss: 3.0030 +[titan] 2025-10-04 23:30:51,303 - root - INFO - lr: 4.9916e-05 gnorm: 1.47 [ 0:56:43<1 day, 0:25:52] +[titan] 2025-10-04 23:31:02,175 - root - INFO - step: 1495 loss: 3.4587 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4192 global_avg_mtp_loss: 3.0394 +[titan] 2025-10-04 23:31:02,176 - root - INFO - lr: 4.9915e-05 gnorm: 1.30 [ 0:56:53<1 day, 0:25:27] +[titan] 2025-10-04 23:31:10,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:31:13,053 - root - INFO - step: 1500 loss: 3.4454 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4203 global_avg_mtp_loss: 3.0251 +[titan] 2025-10-04 23:31:13,053 - root - INFO - lr: 4.9914e-05 gnorm: 1.32 [ 0:57:04<1 day, 0:25:01] +[titan] 2025-10-04 23:31:23,959 - root - INFO - step: 1505 loss: 3.5094 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.4278 global_avg_mtp_loss: 3.0816 +[titan] 2025-10-04 23:31:23,959 - root - INFO - lr: 4.9913e-05 gnorm: 1.39 [ 0:57:15<1 day, 0:24:37] +[titan] 2025-10-04 23:31:34,816 - root - INFO - step: 1510 loss: 3.4203 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 2.9996 +[titan] 2025-10-04 23:31:34,816 - root - INFO - lr: 4.9913e-05 gnorm: 1.40 [ 0:57:26<1 day, 0:24:11] +[titan] 2025-10-04 23:31:45,697 - root - INFO - step: 1515 loss: 3.4819 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4246 global_avg_mtp_loss: 3.0574 +[titan] 2025-10-04 23:31:45,697 - root - INFO - lr: 4.9912e-05 gnorm: 1.42 [ 0:57:37<1 day, 0:23:46] +[titan] 2025-10-04 23:31:56,581 - root - INFO - step: 1520 loss: 3.4715 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0513 +[titan] 2025-10-04 23:31:56,581 - root - INFO - lr: 4.9911e-05 gnorm: 1.54 [ 0:57:48<1 day, 0:23:22] +[titan] 2025-10-04 23:32:07,443 - root - INFO - step: 1525 loss: 3.3887 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4123 global_avg_mtp_loss: 2.9763 +[titan] 2025-10-04 23:32:07,443 - root - INFO - lr: 4.9910e-05 gnorm: 1.50 [ 0:57:59<1 day, 0:22:56] +[titan] 2025-10-04 23:32:18,324 - root - INFO - step: 1530 loss: 3.4137 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4161 global_avg_mtp_loss: 2.9977 +[titan] 2025-10-04 23:32:18,324 - root - INFO - lr: 4.9909e-05 gnorm: 1.39 [ 0:58:10<1 day, 0:22:32] +[titan] 2025-10-04 23:32:29,266 - root - INFO - step: 1535 loss: 3.4241 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.4172 global_avg_mtp_loss: 3.0069 +[titan] 2025-10-04 23:32:29,266 - root - INFO - lr: 4.9909e-05 gnorm: 1.38 [ 0:58:20<1 day, 0:22:09] +[titan] 2025-10-04 23:32:31,611 - root - INFO - Dumping profiler traces at step 1536 +[titan] 2025-10-04 23:32:31,652 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:32:40,423 - root - INFO - step: 1540 loss: 3.4722 memory: 118.84GiB(85.28%) tps: 29,370 tflops: 407.47 mfu: 41.20% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0490 +[titan] 2025-10-04 23:32:40,423 - root - INFO - lr: 4.9908e-05 gnorm: 1.48 [ 0:58:32<1 day, 0:21:51] +[titan] 2025-10-04 23:32:51,288 - root - INFO - step: 1545 loss: 3.4793 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4234 global_avg_mtp_loss: 3.0559 +[titan] 2025-10-04 23:32:51,288 - root - INFO - lr: 4.9907e-05 gnorm: 1.37 [ 0:58:42<1 day, 0:21:26] +[titan] 2025-10-04 23:32:59,971 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:33:02,152 - root - INFO - step: 1550 loss: 3.4035 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4133 global_avg_mtp_loss: 2.9902 +[titan] 2025-10-04 23:33:02,152 - root - INFO - lr: 4.9906e-05 gnorm: 1.32 [ 0:58:53<1 day, 0:21:02] +[titan] 2025-10-04 23:33:13,032 - root - INFO - step: 1555 loss: 3.4850 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.4225 global_avg_mtp_loss: 3.0625 +[titan] 2025-10-04 23:33:13,032 - root - INFO - lr: 4.9905e-05 gnorm: 1.34 [ 0:59:04<1 day, 0:20:37] +[titan] 2025-10-04 23:33:23,946 - root - INFO - step: 1560 loss: 3.5272 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4289 global_avg_mtp_loss: 3.0982 +[titan] 2025-10-04 23:33:23,946 - root - INFO - lr: 4.9905e-05 gnorm: 1.37 [ 0:59:15<1 day, 0:20:14] +[titan] 2025-10-04 23:33:34,861 - root - INFO - step: 1565 loss: 3.5253 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.4294 global_avg_mtp_loss: 3.0959 +[titan] 2025-10-04 23:33:34,861 - root - INFO - lr: 4.9904e-05 gnorm: 1.37 [ 0:59:26<1 day, 0:19:51] +[titan] 2025-10-04 23:33:45,801 - root - INFO - step: 1570 loss: 3.4320 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.4173 global_avg_mtp_loss: 3.0147 +[titan] 2025-10-04 23:33:45,801 - root - INFO - lr: 4.9903e-05 gnorm: 1.35 [ 0:59:37<1 day, 0:19:28] +[titan] 2025-10-04 23:33:56,697 - root - INFO - step: 1575 loss: 3.4044 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4154 global_avg_mtp_loss: 2.9890 +[titan] 2025-10-04 23:33:56,697 - root - INFO - lr: 4.9902e-05 gnorm: 1.32 [ 0:59:48<1 day, 0:19:05] +[titan] 2025-10-04 23:34:07,560 - root - INFO - step: 1580 loss: 3.5820 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4484 global_avg_mtp_loss: 3.1336 +[titan] 2025-10-04 23:34:07,560 - root - INFO - lr: 4.9901e-05 gnorm: 1.32 [ 0:59:59<1 day, 0:18:40] +[titan] 2025-10-04 23:34:18,478 - root - INFO - step: 1585 loss: 3.3932 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.4134 global_avg_mtp_loss: 2.9798 +[titan] 2025-10-04 23:34:18,479 - root - INFO - lr: 4.9900e-05 gnorm: 1.40 [ 1:00:10<1 day, 0:18:18] +[titan] 2025-10-04 23:34:29,342 - root - INFO - step: 1590 loss: 3.4358 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.4195 global_avg_mtp_loss: 3.0163 +[titan] 2025-10-04 23:34:29,342 - root - INFO - lr: 4.9900e-05 gnorm: 1.38 [ 1:00:21<1 day, 0:17:54] +[titan] 2025-10-04 23:34:40,218 - root - INFO - step: 1595 loss: 3.3310 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4056 global_avg_mtp_loss: 2.9254 +[titan] 2025-10-04 23:34:40,218 - root - INFO - lr: 4.9899e-05 gnorm: 1.38 [ 1:00:31<1 day, 0:17:30] +[titan] 2025-10-04 23:34:48,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:34:51,087 - root - INFO - step: 1600 loss: 3.4555 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4196 global_avg_mtp_loss: 3.0358 +[titan] 2025-10-04 23:34:51,088 - root - INFO - lr: 4.9898e-05 gnorm: 1.39 [ 1:00:42<1 day, 0:17:06] +[titan] 2025-10-04 23:35:01,992 - root - INFO - step: 1605 loss: 3.4766 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0531 +[titan] 2025-10-04 23:35:01,992 - root - INFO - lr: 4.9897e-05 gnorm: 1.33 [ 1:00:53<1 day, 0:16:43] +[titan] 2025-10-04 23:35:12,867 - root - INFO - step: 1610 loss: 3.3824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4111 global_avg_mtp_loss: 2.9713 +[titan] 2025-10-04 23:35:12,867 - root - INFO - lr: 4.9896e-05 gnorm: 1.41 [ 1:01:04<1 day, 0:16:20] +[titan] 2025-10-04 23:35:23,778 - root - INFO - step: 1615 loss: 3.4363 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.4168 global_avg_mtp_loss: 3.0195 +[titan] 2025-10-04 23:35:23,779 - root - INFO - lr: 4.9895e-05 gnorm: 1.27 [ 1:01:15<1 day, 0:15:57] +[titan] 2025-10-04 23:35:34,649 - root - INFO - step: 1620 loss: 3.3175 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.4028 global_avg_mtp_loss: 2.9147 +[titan] 2025-10-04 23:35:34,649 - root - INFO - lr: 4.9895e-05 gnorm: 1.32 [ 1:01:26<1 day, 0:15:34] +[titan] 2025-10-04 23:35:45,526 - root - INFO - step: 1625 loss: 3.3715 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4086 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:35:45,527 - root - INFO - lr: 4.9894e-05 gnorm: 1.41 [ 1:01:37<1 day, 0:15:11] +[titan] 2025-10-04 23:35:56,405 - root - INFO - step: 1630 loss: 3.3383 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4038 global_avg_mtp_loss: 2.9345 +[titan] 2025-10-04 23:35:56,405 - root - INFO - lr: 4.9893e-05 gnorm: 1.32 [ 1:01:48<1 day, 0:14:47] +[titan] 2025-10-04 23:36:07,309 - root - INFO - step: 1635 loss: 3.4176 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4148 global_avg_mtp_loss: 3.0028 +[titan] 2025-10-04 23:36:07,309 - root - INFO - lr: 4.9892e-05 gnorm: 1.40 [ 1:01:58<1 day, 0:14:25] +[titan] 2025-10-04 23:36:18,303 - root - INFO - step: 1640 loss: 3.3374 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.4052 global_avg_mtp_loss: 2.9322 +[titan] 2025-10-04 23:36:18,304 - root - INFO - lr: 4.9891e-05 gnorm: 1.45 [ 1:02:09<1 day, 0:14:05] +[titan] 2025-10-04 23:36:29,175 - root - INFO - step: 1645 loss: 3.4862 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.4238 global_avg_mtp_loss: 3.0624 +[titan] 2025-10-04 23:36:29,175 - root - INFO - lr: 4.9890e-05 gnorm: 1.49 [ 1:02:20<1 day, 0:13:42] +[titan] 2025-10-04 23:36:37,867 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:36:40,054 - root - INFO - step: 1650 loss: 3.2615 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8657 +[titan] 2025-10-04 23:36:40,054 - root - INFO - lr: 4.9889e-05 gnorm: 1.45 [ 1:02:31<1 day, 0:13:19] +[titan] 2025-10-04 23:36:50,937 - root - INFO - step: 1655 loss: 3.4016 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4127 global_avg_mtp_loss: 2.9889 +[titan] 2025-10-04 23:36:50,937 - root - INFO - lr: 4.9888e-05 gnorm: 1.34 [ 1:02:42<1 day, 0:12:56] +[titan] 2025-10-04 23:37:01,815 - root - INFO - step: 1660 loss: 3.3760 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4106 global_avg_mtp_loss: 2.9654 +[titan] 2025-10-04 23:37:01,815 - root - INFO - lr: 4.9888e-05 gnorm: 1.33 [ 1:02:53<1 day, 0:12:34] +[titan] 2025-10-04 23:37:12,722 - root - INFO - step: 1665 loss: 3.3861 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.4119 global_avg_mtp_loss: 2.9742 +[titan] 2025-10-04 23:37:12,722 - root - INFO - lr: 4.9887e-05 gnorm: 1.28 [ 1:03:04<1 day, 0:12:12] +[titan] 2025-10-04 23:37:23,672 - root - INFO - step: 1670 loss: 3.3993 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.20 mfu: 41.98% global_avg_ntp_loss: 0.4125 global_avg_mtp_loss: 2.9867 +[titan] 2025-10-04 23:37:23,672 - root - INFO - lr: 4.9886e-05 gnorm: 1.29 [ 1:03:15<1 day, 0:11:51] +[titan] 2025-10-04 23:37:34,543 - root - INFO - step: 1675 loss: 3.3445 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9396 +[titan] 2025-10-04 23:37:34,543 - root - INFO - lr: 4.9885e-05 gnorm: 1.45 [ 1:03:26<1 day, 0:11:28] +[titan] 2025-10-04 23:37:45,421 - root - INFO - step: 1680 loss: 3.4052 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4135 global_avg_mtp_loss: 2.9917 +[titan] 2025-10-04 23:37:45,421 - root - INFO - lr: 4.9884e-05 gnorm: 1.41 [ 1:03:37<1 day, 0:11:06] +[titan] 2025-10-04 23:37:56,304 - root - INFO - step: 1685 loss: 3.3465 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4066 global_avg_mtp_loss: 2.9399 +[titan] 2025-10-04 23:37:56,305 - root - INFO - lr: 4.9883e-05 gnorm: 1.35 [ 1:03:47<1 day, 0:10:44] +[titan] 2025-10-04 23:38:07,165 - root - INFO - step: 1690 loss: 3.4157 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.4162 global_avg_mtp_loss: 2.9995 +[titan] 2025-10-04 23:38:07,165 - root - INFO - lr: 4.9882e-05 gnorm: 1.31 [ 1:03:58<1 day, 0:10:21] +[titan] 2025-10-04 23:38:18,032 - root - INFO - step: 1695 loss: 3.3211 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.4037 global_avg_mtp_loss: 2.9174 +[titan] 2025-10-04 23:38:18,032 - root - INFO - lr: 4.9881e-05 gnorm: 1.27 [ 1:04:09<1 day, 0:09:58] +[titan] 2025-10-04 23:38:26,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:38:28,977 - root - INFO - step: 1700 loss: 3.4333 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0154 +[titan] 2025-10-04 23:38:28,977 - root - INFO - lr: 4.9880e-05 gnorm: 1.47 [ 1:04:20<1 day, 0:09:38] +[titan] 2025-10-04 23:38:39,826 - root - INFO - step: 1705 loss: 3.3912 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.4113 global_avg_mtp_loss: 2.9799 +[titan] 2025-10-04 23:38:39,826 - root - INFO - lr: 4.9879e-05 gnorm: 1.35 [ 1:04:31<1 day, 0:09:15] +[titan] 2025-10-04 23:38:50,670 - root - INFO - step: 1710 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4158 global_avg_mtp_loss: 3.0159 +[titan] 2025-10-04 23:38:50,670 - root - INFO - lr: 4.9878e-05 gnorm: 1.41 [ 1:04:42<1 day, 0:08:52] +[titan] 2025-10-04 23:39:01,517 - root - INFO - step: 1715 loss: 3.4588 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0386 +[titan] 2025-10-04 23:39:01,517 - root - INFO - lr: 4.9877e-05 gnorm: 1.41 [ 1:04:53<1 day, 0:08:30] +[titan] 2025-10-04 23:39:12,377 - root - INFO - step: 1720 loss: 3.3718 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.4092 global_avg_mtp_loss: 2.9625 +[titan] 2025-10-04 23:39:12,377 - root - INFO - lr: 4.9877e-05 gnorm: 1.24 [ 1:05:04<1 day, 0:08:07] +[titan] 2025-10-04 23:39:23,301 - root - INFO - step: 1725 loss: 3.3446 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.4100 global_avg_mtp_loss: 2.9346 +[titan] 2025-10-04 23:39:23,301 - root - INFO - lr: 4.9876e-05 gnorm: 1.27 [ 1:05:14<1 day, 0:07:47] +[titan] 2025-10-04 23:39:34,194 - root - INFO - step: 1730 loss: 3.4582 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0367 +[titan] 2025-10-04 23:39:34,195 - root - INFO - lr: 4.9875e-05 gnorm: 1.32 [ 1:05:25<1 day, 0:07:25] +[titan] 2025-10-04 23:39:45,081 - root - INFO - step: 1735 loss: 3.4372 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4157 global_avg_mtp_loss: 3.0215 +[titan] 2025-10-04 23:39:45,081 - root - INFO - lr: 4.9874e-05 gnorm: 1.37 [ 1:05:36<1 day, 0:07:04] +[titan] 2025-10-04 23:39:55,972 - root - INFO - step: 1740 loss: 3.3532 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9463 +[titan] 2025-10-04 23:39:55,972 - root - INFO - lr: 4.9873e-05 gnorm: 1.36 [ 1:05:47<1 day, 0:06:42] +[titan] 2025-10-04 23:40:06,852 - root - INFO - step: 1745 loss: 3.3083 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4022 global_avg_mtp_loss: 2.9061 +[titan] 2025-10-04 23:40:06,853 - root - INFO - lr: 4.9872e-05 gnorm: 1.33 [ 1:05:58<1 day, 0:06:21] +[titan] 2025-10-04 23:40:15,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:40:17,731 - root - INFO - step: 1750 loss: 3.4480 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4191 global_avg_mtp_loss: 3.0289 +[titan] 2025-10-04 23:40:17,731 - root - INFO - lr: 4.9871e-05 gnorm: 1.35 [ 1:06:09<1 day, 0:05:59] +[titan] 2025-10-04 23:40:28,641 - root - INFO - step: 1755 loss: 3.3860 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4107 global_avg_mtp_loss: 2.9753 +[titan] 2025-10-04 23:40:28,641 - root - INFO - lr: 4.9870e-05 gnorm: 1.31 [ 1:06:20<1 day, 0:05:39] +[titan] 2025-10-04 23:40:39,515 - root - INFO - step: 1760 loss: 3.3596 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9525 +[titan] 2025-10-04 23:40:39,516 - root - INFO - lr: 4.9869e-05 gnorm: 1.44 [ 1:06:31<1 day, 0:05:17] +[titan] 2025-10-04 23:40:50,423 - root - INFO - step: 1765 loss: 3.2984 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3986 global_avg_mtp_loss: 2.8998 +[titan] 2025-10-04 23:40:50,423 - root - INFO - lr: 4.9868e-05 gnorm: 1.40 [ 1:06:42<1 day, 0:04:56] +[titan] 2025-10-04 23:41:01,295 - root - INFO - step: 1770 loss: 3.3670 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.4093 global_avg_mtp_loss: 2.9577 +[titan] 2025-10-04 23:41:01,295 - root - INFO - lr: 4.9867e-05 gnorm: 1.37 [ 1:06:52<1 day, 0:04:35] +[titan] 2025-10-04 23:41:12,156 - root - INFO - step: 1775 loss: 3.3745 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.4116 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:41:12,156 - root - INFO - lr: 4.9866e-05 gnorm: 1.36 [ 1:07:03<1 day, 0:04:13] +[titan] 2025-10-04 23:41:23,073 - root - INFO - step: 1780 loss: 3.2774 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:41:23,074 - root - INFO - lr: 4.9865e-05 gnorm: 1.44 [ 1:07:14<1 day, 0:03:53] +[titan] 2025-10-04 23:41:33,936 - root - INFO - step: 1785 loss: 3.3608 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9538 +[titan] 2025-10-04 23:41:33,936 - root - INFO - lr: 4.9864e-05 gnorm: 1.39 [ 1:07:25<1 day, 0:03:32] +[titan] 2025-10-04 23:41:44,811 - root - INFO - step: 1790 loss: 3.3548 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9479 +[titan] 2025-10-04 23:41:44,812 - root - INFO - lr: 4.9863e-05 gnorm: 1.46 [ 1:07:36<1 day, 0:03:11] +[titan] 2025-10-04 23:41:55,714 - root - INFO - step: 1795 loss: 3.4000 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.4121 global_avg_mtp_loss: 2.9879 +[titan] 2025-10-04 23:41:55,715 - root - INFO - lr: 4.9862e-05 gnorm: 1.53 [ 1:07:47<1 day, 0:02:50] +[titan] 2025-10-04 23:42:04,388 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:42:06,574 - root - INFO - step: 1800 loss: 3.3948 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.4124 global_avg_mtp_loss: 2.9824 +[titan] 2025-10-04 23:42:06,574 - root - INFO - lr: 4.9861e-05 gnorm: 1.37 [ 1:07:58<1 day, 0:02:29] +[titan] 2025-10-04 23:42:17,436 - root - INFO - step: 1805 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.4017 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:42:17,437 - root - INFO - lr: 4.9860e-05 gnorm: 1.29 [ 1:08:09<1 day, 0:02:08] +[titan] 2025-10-04 23:42:28,375 - root - INFO - step: 1810 loss: 3.3561 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:42:28,375 - root - INFO - lr: 4.9859e-05 gnorm: 1.39 [ 1:08:20<1 day, 0:01:48] +[titan] 2025-10-04 23:42:39,216 - root - INFO - step: 1815 loss: 3.3053 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.3995 global_avg_mtp_loss: 2.9058 +[titan] 2025-10-04 23:42:39,217 - root - INFO - lr: 4.9858e-05 gnorm: 1.34 [ 1:08:30<1 day, 0:01:27] +[titan] 2025-10-04 23:42:50,059 - root - INFO - step: 1820 loss: 3.2854 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8887 +[titan] 2025-10-04 23:42:50,059 - root - INFO - lr: 4.9857e-05 gnorm: 1.37 [ 1:08:41<1 day, 0:01:05] +[titan] 2025-10-04 23:43:00,958 - root - INFO - step: 1825 loss: 3.3393 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4035 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:00,958 - root - INFO - lr: 4.9856e-05 gnorm: 1.37 [ 1:08:52<1 day, 0:00:45] +[titan] 2025-10-04 23:43:11,802 - root - INFO - step: 1830 loss: 3.3421 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4062 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:11,802 - root - INFO - lr: 4.9855e-05 gnorm: 1.36 [ 1:09:03<1 day, 0:00:24] +[titan] 2025-10-04 23:43:22,644 - root - INFO - step: 1835 loss: 3.3492 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.4055 global_avg_mtp_loss: 2.9437 +[titan] 2025-10-04 23:43:22,645 - root - INFO - lr: 4.9854e-05 gnorm: 1.32 [ 1:09:14<1 day, 0:00:02] +[titan] 2025-10-04 23:43:33,561 - root - INFO - step: 1840 loss: 3.2612 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3942 global_avg_mtp_loss: 2.8670 +[titan] 2025-10-04 23:43:33,562 - root - INFO - lr: 4.9853e-05 gnorm: 1.27 [ 1:09:25<23:59:43] +[titan] 2025-10-04 23:43:44,438 - root - INFO - step: 1845 loss: 3.3605 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9537 +[titan] 2025-10-04 23:43:44,438 - root - INFO - lr: 4.9852e-05 gnorm: 1.27 [ 1:09:36<23:59:22] +[titan] 2025-10-04 23:43:53,131 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:43:55,315 - root - INFO - step: 1850 loss: 3.3556 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4063 global_avg_mtp_loss: 2.9493 +[titan] 2025-10-04 23:43:55,315 - root - INFO - lr: 4.9851e-05 gnorm: 1.32 [ 1:09:46<23:59:02] +[titan] 2025-10-04 23:44:06,182 - root - INFO - step: 1855 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4016 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:44:06,182 - root - INFO - lr: 4.9850e-05 gnorm: 1.40 [ 1:09:57<23:58:41] +[titan] 2025-10-04 23:44:17,099 - root - INFO - step: 1860 loss: 3.3782 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4085 global_avg_mtp_loss: 2.9697 +[titan] 2025-10-04 23:44:17,099 - root - INFO - lr: 4.9849e-05 gnorm: 1.35 [ 1:10:08<23:58:22] +[titan] 2025-10-04 23:44:28,008 - root - INFO - step: 1865 loss: 3.2855 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3997 global_avg_mtp_loss: 2.8857 +[titan] 2025-10-04 23:44:28,009 - root - INFO - lr: 4.9848e-05 gnorm: 1.35 [ 1:10:19<23:58:02] +[titan] 2025-10-04 23:44:38,889 - root - INFO - step: 1870 loss: 3.3023 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.9043 +[titan] 2025-10-04 23:44:38,889 - root - INFO - lr: 4.9847e-05 gnorm: 1.24 [ 1:10:30<23:57:42] +[titan] 2025-10-04 23:44:49,776 - root - INFO - step: 1875 loss: 3.3134 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.4008 global_avg_mtp_loss: 2.9126 +[titan] 2025-10-04 23:44:49,776 - root - INFO - lr: 4.9846e-05 gnorm: 1.32 [ 1:10:41<23:57:22] +[titan] 2025-10-04 23:45:00,642 - root - INFO - step: 1880 loss: 3.2097 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8207 +[titan] 2025-10-04 23:45:00,642 - root - INFO - lr: 4.9845e-05 gnorm: 1.33 [ 1:10:52<23:57:02] +[titan] 2025-10-04 23:45:11,496 - root - INFO - step: 1885 loss: 3.2568 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8621 +[titan] 2025-10-04 23:45:11,497 - root - INFO - lr: 4.9844e-05 gnorm: 1.34 [ 1:11:03<23:56:41] +[titan] 2025-10-04 23:45:22,417 - root - INFO - step: 1890 loss: 3.3180 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.4019 global_avg_mtp_loss: 2.9160 +[titan] 2025-10-04 23:45:22,417 - root - INFO - lr: 4.9843e-05 gnorm: 1.39 [ 1:11:14<23:56:22] +[titan] 2025-10-04 23:45:33,318 - root - INFO - step: 1895 loss: 3.2706 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3955 global_avg_mtp_loss: 2.8752 +[titan] 2025-10-04 23:45:33,318 - root - INFO - lr: 4.9842e-05 gnorm: 1.50 [ 1:11:24<23:56:03] +[titan] 2025-10-04 23:45:41,992 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:45:44,171 - root - INFO - step: 1900 loss: 3.2793 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8827 +[titan] 2025-10-04 23:45:44,171 - root - INFO - lr: 4.9841e-05 gnorm: 1.29 [ 1:11:35<23:55:42] +[titan] 2025-10-04 23:45:55,048 - root - INFO - step: 1905 loss: 3.3144 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4029 global_avg_mtp_loss: 2.9115 +[titan] 2025-10-04 23:45:55,048 - root - INFO - lr: 4.9840e-05 gnorm: 1.32 [ 1:11:46<23:55:22] +[titan] 2025-10-04 23:46:05,920 - root - INFO - step: 1910 loss: 3.2864 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3983 global_avg_mtp_loss: 2.8881 +[titan] 2025-10-04 23:46:05,920 - root - INFO - lr: 4.9839e-05 gnorm: 1.32 [ 1:11:57<23:55:02] +[titan] 2025-10-04 23:46:16,784 - root - INFO - step: 1915 loss: 3.2475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8558 +[titan] 2025-10-04 23:46:16,785 - root - INFO - lr: 4.9837e-05 gnorm: 1.28 [ 1:12:08<23:54:42] +[titan] 2025-10-04 23:46:27,699 - root - INFO - step: 1920 loss: 3.3007 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.11% global_avg_ntp_loss: 0.3987 global_avg_mtp_loss: 2.9020 +[titan] 2025-10-04 23:46:27,700 - root - INFO - lr: 4.9836e-05 gnorm: 1.39 [ 1:12:19<23:54:23] +[titan] 2025-10-04 23:46:38,626 - root - INFO - step: 1925 loss: 3.2659 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3951 global_avg_mtp_loss: 2.8709 +[titan] 2025-10-04 23:46:38,626 - root - INFO - lr: 4.9835e-05 gnorm: 1.32 [ 1:12:30<23:54:05] +[titan] 2025-10-04 23:46:49,497 - root - INFO - step: 1930 loss: 3.2880 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8914 +[titan] 2025-10-04 23:46:49,497 - root - INFO - lr: 4.9834e-05 gnorm: 1.31 [ 1:12:41<23:53:45] +[titan] 2025-10-04 23:47:00,373 - root - INFO - step: 1935 loss: 3.2719 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8754 +[titan] 2025-10-04 23:47:00,374 - root - INFO - lr: 4.9833e-05 gnorm: 1.33 [ 1:12:52<23:53:25] +[titan] 2025-10-04 23:47:11,263 - root - INFO - step: 1940 loss: 3.3395 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4048 global_avg_mtp_loss: 2.9347 +[titan] 2025-10-04 23:47:11,263 - root - INFO - lr: 4.9832e-05 gnorm: 1.41 [ 1:13:02<23:53:06] +[titan] 2025-10-04 23:47:22,130 - root - INFO - step: 1945 loss: 3.2947 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8976 +[titan] 2025-10-04 23:47:22,130 - root - INFO - lr: 4.9831e-05 gnorm: 1.48 [ 1:13:13<23:52:46] +[titan] 2025-10-04 23:47:30,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:47:33,036 - root - INFO - step: 1950 loss: 3.3613 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.4054 global_avg_mtp_loss: 2.9558 +[titan] 2025-10-04 23:47:33,037 - root - INFO - lr: 4.9830e-05 gnorm: 1.34 [ 1:13:24<23:52:27] +[titan] 2025-10-04 23:47:43,944 - root - INFO - step: 1955 loss: 3.2920 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3990 global_avg_mtp_loss: 2.8929 +[titan] 2025-10-04 23:47:43,944 - root - INFO - lr: 4.9829e-05 gnorm: 1.29 [ 1:13:35<23:52:09] +[titan] 2025-10-04 23:47:54,843 - root - INFO - step: 1960 loss: 3.2473 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3941 global_avg_mtp_loss: 2.8533 +[titan] 2025-10-04 23:47:54,843 - root - INFO - lr: 4.9828e-05 gnorm: 1.30 [ 1:13:46<23:51:50] +[titan] 2025-10-04 23:48:05,717 - root - INFO - step: 1965 loss: 3.2766 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3954 global_avg_mtp_loss: 2.8812 +[titan] 2025-10-04 23:48:05,717 - root - INFO - lr: 4.9827e-05 gnorm: 1.23 [ 1:13:57<23:51:30] +[titan] 2025-10-04 23:48:16,623 - root - INFO - step: 1970 loss: 3.2148 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3891 global_avg_mtp_loss: 2.8257 +[titan] 2025-10-04 23:48:16,623 - root - INFO - lr: 4.9825e-05 gnorm: 1.38 [ 1:14:08<23:51:12] +[titan] 2025-10-04 23:48:27,497 - root - INFO - step: 1975 loss: 3.2117 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3887 global_avg_mtp_loss: 2.8230 +[titan] 2025-10-04 23:48:27,497 - root - INFO - lr: 4.9824e-05 gnorm: 1.35 [ 1:14:19<23:50:52] +[titan] 2025-10-04 23:48:38,417 - root - INFO - step: 1980 loss: 3.3095 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.4021 global_avg_mtp_loss: 2.9075 +[titan] 2025-10-04 23:48:38,417 - root - INFO - lr: 4.9823e-05 gnorm: 1.35 [ 1:14:30<23:50:34] +[titan] 2025-10-04 23:48:49,319 - root - INFO - step: 1985 loss: 3.2797 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:48:49,320 - root - INFO - lr: 4.9822e-05 gnorm: 1.26 [ 1:14:40<23:50:15] +[titan] 2025-10-04 23:49:00,192 - root - INFO - step: 1990 loss: 3.3317 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4032 global_avg_mtp_loss: 2.9285 +[titan] 2025-10-04 23:49:00,193 - root - INFO - lr: 4.9821e-05 gnorm: 1.36 [ 1:14:51<23:49:56] +[titan] 2025-10-04 23:49:11,083 - root - INFO - step: 1995 loss: 3.2394 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-04 23:49:11,084 - root - INFO - lr: 4.9820e-05 gnorm: 1.25 [ 1:15:02<23:49:37] +[titan] 2025-10-04 23:49:19,763 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:49:21,941 - root - INFO - step: 2000 loss: 3.2905 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.3991 global_avg_mtp_loss: 2.8913 +[titan] 2025-10-04 23:49:21,941 - root - INFO - lr: 4.9819e-05 gnorm: 1.41 [ 1:15:13<23:49:18] +[titan] 2025-10-04 23:49:32,868 - root - INFO - step: 2005 loss: 3.2217 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8299 +[titan] 2025-10-04 23:49:32,868 - root - INFO - lr: 4.9818e-05 gnorm: 1.41 [ 1:15:24<23:49:00] +[titan] 2025-10-04 23:49:43,749 - root - INFO - step: 2010 loss: 3.2369 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3913 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:49:43,750 - root - INFO - lr: 4.9816e-05 gnorm: 1.33 [ 1:15:35<23:48:41] +[titan] 2025-10-04 23:49:54,661 - root - INFO - step: 2015 loss: 3.2498 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3947 global_avg_mtp_loss: 2.8551 +[titan] 2025-10-04 23:49:54,661 - root - INFO - lr: 4.9815e-05 gnorm: 1.34 [ 1:15:46<23:48:22] +[titan] 2025-10-04 23:50:05,578 - root - INFO - step: 2020 loss: 3.2711 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3939 global_avg_mtp_loss: 2.8772 +[titan] 2025-10-04 23:50:05,579 - root - INFO - lr: 4.9814e-05 gnorm: 1.36 [ 1:15:57<23:48:04] +[titan] 2025-10-04 23:50:16,459 - root - INFO - step: 2025 loss: 3.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3962 global_avg_mtp_loss: 2.8751 +[titan] 2025-10-04 23:50:16,459 - root - INFO - lr: 4.9813e-05 gnorm: 1.26 [ 1:16:08<23:47:46] +[titan] 2025-10-04 23:50:27,328 - root - INFO - step: 2030 loss: 3.2606 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3932 global_avg_mtp_loss: 2.8674 +[titan] 2025-10-04 23:50:27,329 - root - INFO - lr: 4.9812e-05 gnorm: 1.27 [ 1:16:18<23:47:27] +[titan] 2025-10-04 23:50:38,283 - root - INFO - step: 2035 loss: 3.3063 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3996 global_avg_mtp_loss: 2.9067 +[titan] 2025-10-04 23:50:38,284 - root - INFO - lr: 4.9811e-05 gnorm: 1.35 [ 1:16:29<23:47:09] +[titan] 2025-10-04 23:50:49,166 - root - INFO - step: 2040 loss: 3.1900 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3852 global_avg_mtp_loss: 2.8048 +[titan] 2025-10-04 23:50:49,166 - root - INFO - lr: 4.9810e-05 gnorm: 1.37 [ 1:16:40<23:46:51] +[titan] 2025-10-04 23:51:00,136 - root - INFO - step: 2045 loss: 3.2396 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.3910 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:00,136 - root - INFO - lr: 4.9808e-05 gnorm: 1.30 [ 1:16:51<23:46:34] +[titan] 2025-10-04 23:51:06,837 - root - INFO - Dumping profiler traces at step 2048 +[titan] 2025-10-04 23:51:06,872 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 23:51:09,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:51:11,279 - root - INFO - step: 2050 loss: 3.2428 memory: 118.84GiB(85.28%) tps: 29,407 tflops: 407.98 mfu: 41.25% global_avg_ntp_loss: 0.3943 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:11,280 - root - INFO - lr: 4.9807e-05 gnorm: 1.39 [ 1:17:02<23:46:20] +[titan] 2025-10-04 23:51:22,173 - root - INFO - step: 2055 loss: 3.3541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:51:22,173 - root - INFO - lr: 4.9806e-05 gnorm: 1.42 [ 1:17:13<23:46:02] +[titan] 2025-10-04 23:51:33,068 - root - INFO - step: 2060 loss: 3.2810 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3960 global_avg_mtp_loss: 2.8850 +[titan] 2025-10-04 23:51:33,069 - root - INFO - lr: 4.9805e-05 gnorm: 1.33 [ 1:17:24<23:45:43] +[titan] 2025-10-04 23:51:43,943 - root - INFO - step: 2065 loss: 3.2366 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:51:43,943 - root - INFO - lr: 4.9804e-05 gnorm: 1.45 [ 1:17:35<23:45:25] +[titan] 2025-10-04 23:51:54,802 - root - INFO - step: 2070 loss: 3.2400 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3911 global_avg_mtp_loss: 2.8489 +[titan] 2025-10-04 23:51:54,802 - root - INFO - lr: 4.9803e-05 gnorm: 1.37 [ 1:17:46<23:45:06] +[titan] 2025-10-04 23:52:05,671 - root - INFO - step: 2075 loss: 3.2363 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3927 global_avg_mtp_loss: 2.8436 +[titan] 2025-10-04 23:52:05,671 - root - INFO - lr: 4.9801e-05 gnorm: 1.32 [ 1:17:57<23:44:47] +[titan] 2025-10-04 23:52:16,539 - root - INFO - step: 2080 loss: 3.1819 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3837 global_avg_mtp_loss: 2.7983 +[titan] 2025-10-04 23:52:16,539 - root - INFO - lr: 4.9800e-05 gnorm: 1.25 [ 1:18:08<23:44:29] +[titan] 2025-10-04 23:52:27,458 - root - INFO - step: 2085 loss: 3.2817 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8845 +[titan] 2025-10-04 23:52:27,458 - root - INFO - lr: 4.9799e-05 gnorm: 1.31 [ 1:18:19<23:44:11] +[titan] 2025-10-04 23:52:38,351 - root - INFO - step: 2090 loss: 3.2776 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3974 global_avg_mtp_loss: 2.8802 +[titan] 2025-10-04 23:52:38,351 - root - INFO - lr: 4.9798e-05 gnorm: 1.27 [ 1:18:29<23:43:53] +[titan] 2025-10-04 23:52:49,245 - root - INFO - step: 2095 loss: 3.2401 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8479 +[titan] 2025-10-04 23:52:49,245 - root - INFO - lr: 4.9797e-05 gnorm: 1.35 [ 1:18:40<23:43:35] +[titan] 2025-10-04 23:52:57,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:53:00,142 - root - INFO - step: 2100 loss: 3.1666 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3827 global_avg_mtp_loss: 2.7839 +[titan] 2025-10-04 23:53:00,142 - root - INFO - lr: 4.9795e-05 gnorm: 1.31 [ 1:18:51<23:43:17] +[titan] 2025-10-04 23:53:11,021 - root - INFO - step: 2105 loss: 3.1171 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3762 global_avg_mtp_loss: 2.7409 +[titan] 2025-10-04 23:53:11,021 - root - INFO - lr: 4.9794e-05 gnorm: 1.45 [ 1:19:02<23:42:59] +[titan] 2025-10-04 23:53:21,893 - root - INFO - step: 2110 loss: 3.2816 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3984 global_avg_mtp_loss: 2.8833 +[titan] 2025-10-04 23:53:21,894 - root - INFO - lr: 4.9793e-05 gnorm: 1.35 [ 1:19:13<23:42:40] +[titan] 2025-10-04 23:53:32,852 - root - INFO - step: 2115 loss: 3.2607 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8663 +[titan] 2025-10-04 23:53:32,852 - root - INFO - lr: 4.9792e-05 gnorm: 1.27 [ 1:19:24<23:42:24] +[titan] 2025-10-04 23:53:43,730 - root - INFO - step: 2120 loss: 3.2629 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8683 +[titan] 2025-10-04 23:53:43,730 - root - INFO - lr: 4.9791e-05 gnorm: 1.25 [ 1:19:35<23:42:05] +[titan] 2025-10-04 23:53:54,620 - root - INFO - step: 2125 loss: 3.0920 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3725 global_avg_mtp_loss: 2.7195 +[titan] 2025-10-04 23:53:54,620 - root - INFO - lr: 4.9789e-05 gnorm: 1.37 [ 1:19:46<23:41:47] +[titan] 2025-10-04 23:54:05,508 - root - INFO - step: 2130 loss: 3.2038 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8165 +[titan] 2025-10-04 23:54:05,508 - root - INFO - lr: 4.9788e-05 gnorm: 1.28 [ 1:19:57<23:41:30] +[titan] 2025-10-04 23:54:16,404 - root - INFO - step: 2135 loss: 3.1616 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.3810 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-04 23:54:16,404 - root - INFO - lr: 4.9787e-05 gnorm: 1.27 [ 1:20:08<23:41:12] +[titan] 2025-10-04 23:54:27,282 - root - INFO - step: 2140 loss: 3.1455 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3792 global_avg_mtp_loss: 2.7663 +[titan] 2025-10-04 23:54:27,282 - root - INFO - lr: 4.9786e-05 gnorm: 1.36 [ 1:20:18<23:40:54] +[titan] 2025-10-04 23:54:38,216 - root - INFO - step: 2145 loss: 3.1443 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7655 +[titan] 2025-10-04 23:54:38,216 - root - INFO - lr: 4.9785e-05 gnorm: 1.24 [ 1:20:29<23:40:37] +[titan] 2025-10-04 23:54:46,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:54:49,106 - root - INFO - step: 2150 loss: 3.2432 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3921 global_avg_mtp_loss: 2.8511 +[titan] 2025-10-04 23:54:49,106 - root - INFO - lr: 4.9783e-05 gnorm: 1.23 [ 1:20:40<23:40:19] +[titan] 2025-10-04 23:54:59,985 - root - INFO - step: 2155 loss: 3.1416 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7620 +[titan] 2025-10-04 23:54:59,985 - root - INFO - lr: 4.9782e-05 gnorm: 1.23 [ 1:20:51<23:40:01] +[titan] 2025-10-04 23:55:10,860 - root - INFO - step: 2160 loss: 3.1386 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7597 +[titan] 2025-10-04 23:55:10,860 - root - INFO - lr: 4.9781e-05 gnorm: 1.27 [ 1:21:02<23:39:43] +[titan] 2025-10-04 23:55:21,730 - root - INFO - step: 2165 loss: 3.2482 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8559 +[titan] 2025-10-04 23:55:21,730 - root - INFO - lr: 4.9780e-05 gnorm: 1.29 [ 1:21:13<23:39:25] +[titan] 2025-10-04 23:55:32,617 - root - INFO - step: 2170 loss: 3.2349 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3914 global_avg_mtp_loss: 2.8435 +[titan] 2025-10-04 23:55:32,617 - root - INFO - lr: 4.9778e-05 gnorm: 1.22 [ 1:21:24<23:39:08] +[titan] 2025-10-04 23:55:43,541 - root - INFO - step: 2175 loss: 3.2325 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.3901 global_avg_mtp_loss: 2.8424 +[titan] 2025-10-04 23:55:43,542 - root - INFO - lr: 4.9777e-05 gnorm: 1.32 [ 1:21:35<23:38:51] +[titan] 2025-10-04 23:55:54,482 - root - INFO - step: 2180 loss: 3.1551 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7749 +[titan] 2025-10-04 23:55:54,482 - root - INFO - lr: 4.9776e-05 gnorm: 1.29 [ 1:21:46<23:38:34] +[titan] 2025-10-04 23:56:05,357 - root - INFO - step: 2185 loss: 3.2187 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3882 global_avg_mtp_loss: 2.8305 +[titan] 2025-10-04 23:56:05,357 - root - INFO - lr: 4.9775e-05 gnorm: 1.37 [ 1:21:56<23:38:16] +[titan] 2025-10-04 23:56:16,252 - root - INFO - step: 2190 loss: 3.1722 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7900 +[titan] 2025-10-04 23:56:16,253 - root - INFO - lr: 4.9773e-05 gnorm: 1.44 [ 1:22:07<23:37:59] +[titan] 2025-10-04 23:56:27,132 - root - INFO - step: 2195 loss: 3.1685 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3823 global_avg_mtp_loss: 2.7862 +[titan] 2025-10-04 23:56:27,132 - root - INFO - lr: 4.9772e-05 gnorm: 1.33 [ 1:22:18<23:37:41] +[titan] 2025-10-04 23:56:35,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:56:38,038 - root - INFO - step: 2200 loss: 3.1985 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3856 global_avg_mtp_loss: 2.8129 +[titan] 2025-10-04 23:56:38,038 - root - INFO - lr: 4.9771e-05 gnorm: 1.31 [ 1:22:29<23:37:24] +[titan] 2025-10-04 23:56:48,912 - root - INFO - step: 2205 loss: 3.2059 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.8192 +[titan] 2025-10-04 23:56:48,913 - root - INFO - lr: 4.9769e-05 gnorm: 1.26 [ 1:22:40<23:37:06] +[titan] 2025-10-04 23:56:59,839 - root - INFO - step: 2210 loss: 3.1541 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7729 +[titan] 2025-10-04 23:56:59,840 - root - INFO - lr: 4.9768e-05 gnorm: 1.42 [ 1:22:51<23:36:49] +[titan] 2025-10-04 23:57:10,737 - root - INFO - step: 2215 loss: 3.2356 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3903 global_avg_mtp_loss: 2.8453 +[titan] 2025-10-04 23:57:10,737 - root - INFO - lr: 4.9767e-05 gnorm: 1.51 [ 1:23:02<23:36:32] +[titan] 2025-10-04 23:57:21,630 - root - INFO - step: 2220 loss: 3.1859 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.7992 +[titan] 2025-10-04 23:57:21,630 - root - INFO - lr: 4.9766e-05 gnorm: 1.39 [ 1:23:13<23:36:15] +[titan] 2025-10-04 23:57:32,532 - root - INFO - step: 2225 loss: 3.1779 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3842 global_avg_mtp_loss: 2.7936 +[titan] 2025-10-04 23:57:32,532 - root - INFO - lr: 4.9764e-05 gnorm: 1.24 [ 1:23:24<23:35:58] +[titan] 2025-10-04 23:57:43,450 - root - INFO - step: 2230 loss: 3.2176 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3895 global_avg_mtp_loss: 2.8282 +[titan] 2025-10-04 23:57:43,450 - root - INFO - lr: 4.9763e-05 gnorm: 1.28 [ 1:23:35<23:35:41] +[titan] 2025-10-04 23:57:54,366 - root - INFO - step: 2235 loss: 3.2212 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3879 global_avg_mtp_loss: 2.8333 +[titan] 2025-10-04 23:57:54,366 - root - INFO - lr: 4.9762e-05 gnorm: 1.35 [ 1:23:45<23:35:24] +[titan] 2025-10-04 23:58:05,251 - root - INFO - step: 2240 loss: 3.2781 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8824 +[titan] 2025-10-04 23:58:05,252 - root - INFO - lr: 4.9760e-05 gnorm: 1.39 [ 1:23:56<23:35:07] +[titan] 2025-10-04 23:58:16,173 - root - INFO - step: 2245 loss: 3.1710 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7885 +[titan] 2025-10-04 23:58:16,174 - root - INFO - lr: 4.9759e-05 gnorm: 1.30 [ 1:24:07<23:34:50] +[titan] 2025-10-04 23:58:24,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:58:27,069 - root - INFO - step: 2250 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7772 +[titan] 2025-10-04 23:58:27,069 - root - INFO - lr: 4.9758e-05 gnorm: 1.33 [ 1:24:18<23:34:33] +[titan] 2025-10-04 23:58:37,973 - root - INFO - step: 2255 loss: 3.2917 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3969 global_avg_mtp_loss: 2.8947 +[titan] 2025-10-04 23:58:37,973 - root - INFO - lr: 4.9757e-05 gnorm: 1.34 [ 1:24:29<23:34:16] +[titan] 2025-10-04 23:58:48,849 - root - INFO - step: 2260 loss: 3.1742 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3832 global_avg_mtp_loss: 2.7911 +[titan] 2025-10-04 23:58:48,849 - root - INFO - lr: 4.9755e-05 gnorm: 1.32 [ 1:24:40<23:33:59] +[titan] 2025-10-04 23:58:59,727 - root - INFO - step: 2265 loss: 3.1716 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3830 global_avg_mtp_loss: 2.7886 +[titan] 2025-10-04 23:58:59,727 - root - INFO - lr: 4.9754e-05 gnorm: 1.31 [ 1:24:51<23:33:42] +[titan] 2025-10-04 23:59:10,618 - root - INFO - step: 2270 loss: 3.2242 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8352 +[titan] 2025-10-04 23:59:10,618 - root - INFO - lr: 4.9753e-05 gnorm: 1.31 [ 1:25:02<23:33:25] +[titan] 2025-10-04 23:59:21,547 - root - INFO - step: 2275 loss: 3.2006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3858 global_avg_mtp_loss: 2.8148 +[titan] 2025-10-04 23:59:21,547 - root - INFO - lr: 4.9751e-05 gnorm: 1.30 [ 1:25:13<23:33:08] +[titan] 2025-10-04 23:59:32,439 - root - INFO - step: 2280 loss: 3.1251 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7491 +[titan] 2025-10-04 23:59:32,439 - root - INFO - lr: 4.9750e-05 gnorm: 1.25 [ 1:25:24<23:32:51] +[titan] 2025-10-04 23:59:43,315 - root - INFO - step: 2285 loss: 3.1971 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3843 global_avg_mtp_loss: 2.8128 +[titan] 2025-10-04 23:59:43,315 - root - INFO - lr: 4.9749e-05 gnorm: 1.24 [ 1:25:34<23:32:34] +[titan] 2025-10-04 23:59:54,211 - root - INFO - step: 2290 loss: 3.1138 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3754 global_avg_mtp_loss: 2.7384 +[titan] 2025-10-04 23:59:54,211 - root - INFO - lr: 4.9747e-05 gnorm: 1.30 [ 1:25:45<23:32:17] +[titan] 2025-10-05 00:00:05,066 - root - INFO - step: 2295 loss: 3.1381 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3802 global_avg_mtp_loss: 2.7579 +[titan] 2025-10-05 00:00:05,067 - root - INFO - lr: 4.9746e-05 gnorm: 1.34 [ 1:25:56<23:32:00] +[titan] 2025-10-05 00:00:13,756 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:00:15,940 - root - INFO - step: 2300 loss: 3.1684 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7859 +[titan] 2025-10-05 00:00:15,940 - root - INFO - lr: 4.9745e-05 gnorm: 1.25 [ 1:26:07<23:31:43] +[titan] 2025-10-05 00:00:26,871 - root - INFO - step: 2305 loss: 3.1673 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3819 global_avg_mtp_loss: 2.7853 +[titan] 2025-10-05 00:00:26,871 - root - INFO - lr: 4.9743e-05 gnorm: 1.22 [ 1:26:18<23:31:26] +[titan] 2025-10-05 00:00:37,762 - root - INFO - step: 2310 loss: 3.1531 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7719 +[titan] 2025-10-05 00:00:37,762 - root - INFO - lr: 4.9742e-05 gnorm: 1.30 [ 1:26:29<23:31:10] +[titan] 2025-10-05 00:00:48,669 - root - INFO - step: 2315 loss: 3.1583 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3795 global_avg_mtp_loss: 2.7788 +[titan] 2025-10-05 00:00:48,669 - root - INFO - lr: 4.9741e-05 gnorm: 1.22 [ 1:26:40<23:30:53] +[titan] 2025-10-05 00:00:59,522 - root - INFO - step: 2320 loss: 3.1995 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.3851 global_avg_mtp_loss: 2.8144 +[titan] 2025-10-05 00:00:59,522 - root - INFO - lr: 4.9739e-05 gnorm: 1.29 [ 1:26:51<23:30:36] +[titan] 2025-10-05 00:01:10,409 - root - INFO - step: 2325 loss: 3.1550 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3800 global_avg_mtp_loss: 2.7750 +[titan] 2025-10-05 00:01:10,409 - root - INFO - lr: 4.9738e-05 gnorm: 1.29 [ 1:27:02<23:30:19] +[titan] 2025-10-05 00:01:21,286 - root - INFO - step: 2330 loss: 3.1042 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3734 global_avg_mtp_loss: 2.7308 +[titan] 2025-10-05 00:01:21,286 - root - INFO - lr: 4.9737e-05 gnorm: 1.25 [ 1:27:12<23:30:02] +[titan] 2025-10-05 00:01:32,170 - root - INFO - step: 2335 loss: 3.1428 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3775 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:01:32,170 - root - INFO - lr: 4.9735e-05 gnorm: 1.24 [ 1:27:23<23:29:45] +[titan] 2025-10-05 00:01:43,255 - root - INFO - step: 2340 loss: 3.2357 memory: 118.84GiB(85.28%) tps: 29,561 tflops: 410.11 mfu: 41.47% global_avg_ntp_loss: 0.3959 global_avg_mtp_loss: 2.8398 +[titan] 2025-10-05 00:01:43,255 - root - INFO - lr: 4.9734e-05 gnorm: 1.31 [ 1:27:34<23:29:31] +[titan] 2025-10-05 00:01:54,139 - root - INFO - step: 2345 loss: 3.2594 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3938 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:01:54,139 - root - INFO - lr: 4.9732e-05 gnorm: 1.30 [ 1:27:45<23:29:15] +[titan] 2025-10-05 00:02:02,829 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:02:05,010 - root - INFO - step: 2350 loss: 3.1385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3798 global_avg_mtp_loss: 2.7587 +[titan] 2025-10-05 00:02:05,010 - root - INFO - lr: 4.9731e-05 gnorm: 1.30 [ 1:27:56<23:28:58] +[titan] 2025-10-05 00:02:15,898 - root - INFO - step: 2355 loss: 3.1702 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3826 global_avg_mtp_loss: 2.7876 +[titan] 2025-10-05 00:02:15,898 - root - INFO - lr: 4.9730e-05 gnorm: 1.32 [ 1:28:07<23:28:41] +[titan] 2025-10-05 00:02:26,769 - root - INFO - step: 2360 loss: 3.1893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8043 +[titan] 2025-10-05 00:02:26,769 - root - INFO - lr: 4.9728e-05 gnorm: 1.43 [ 1:28:18<23:28:24] +[titan] 2025-10-05 00:02:37,640 - root - INFO - step: 2365 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7481 +[titan] 2025-10-05 00:02:37,640 - root - INFO - lr: 4.9727e-05 gnorm: 1.39 [ 1:28:29<23:28:07] +[titan] 2025-10-05 00:02:48,598 - root - INFO - step: 2370 loss: 3.1988 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3869 global_avg_mtp_loss: 2.8118 +[titan] 2025-10-05 00:02:48,598 - root - INFO - lr: 4.9726e-05 gnorm: 1.28 [ 1:28:40<23:27:52] +[titan] 2025-10-05 00:02:59,464 - root - INFO - step: 2375 loss: 3.1613 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3814 global_avg_mtp_loss: 2.7799 +[titan] 2025-10-05 00:02:59,464 - root - INFO - lr: 4.9724e-05 gnorm: 1.31 [ 1:28:51<23:27:35] +[titan] 2025-10-05 00:03:10,332 - root - INFO - step: 2380 loss: 3.2049 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3860 global_avg_mtp_loss: 2.8189 +[titan] 2025-10-05 00:03:10,332 - root - INFO - lr: 4.9723e-05 gnorm: 1.34 [ 1:29:01<23:27:18] +[titan] 2025-10-05 00:03:21,196 - root - INFO - step: 2385 loss: 3.1936 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3835 global_avg_mtp_loss: 2.8101 +[titan] 2025-10-05 00:03:21,196 - root - INFO - lr: 4.9721e-05 gnorm: 1.30 [ 1:29:12<23:27:01] +[titan] 2025-10-05 00:03:32,051 - root - INFO - step: 2390 loss: 3.2440 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3919 global_avg_mtp_loss: 2.8521 +[titan] 2025-10-05 00:03:32,051 - root - INFO - lr: 4.9720e-05 gnorm: 1.33 [ 1:29:23<23:26:44] +[titan] 2025-10-05 00:03:42,978 - root - INFO - step: 2395 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3771 global_avg_mtp_loss: 2.7475 +[titan] 2025-10-05 00:03:42,978 - root - INFO - lr: 4.9719e-05 gnorm: 1.31 [ 1:29:34<23:26:28] +[titan] 2025-10-05 00:03:51,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:03:53,851 - root - INFO - step: 2400 loss: 3.2662 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8718 +[titan] 2025-10-05 00:03:53,851 - root - INFO - lr: 4.9717e-05 gnorm: 1.40 [ 1:29:45<23:26:12] +[titan] 2025-10-05 00:04:04,749 - root - INFO - step: 2405 loss: 3.2406 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-05 00:04:04,749 - root - INFO - lr: 4.9716e-05 gnorm: 1.38 [ 1:29:56<23:25:55] +[titan] 2025-10-05 00:04:15,630 - root - INFO - step: 2410 loss: 3.1271 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7504 +[titan] 2025-10-05 00:04:15,630 - root - INFO - lr: 4.9714e-05 gnorm: 1.27 [ 1:30:07<23:25:39] +[titan] 2025-10-05 00:04:26,491 - root - INFO - step: 2415 loss: 3.1402 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3779 global_avg_mtp_loss: 2.7623 +[titan] 2025-10-05 00:04:26,491 - root - INFO - lr: 4.9713e-05 gnorm: 1.39 [ 1:30:18<23:25:22] +[titan] 2025-10-05 00:04:37,350 - root - INFO - step: 2420 loss: 3.1746 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7924 +[titan] 2025-10-05 00:04:37,350 - root - INFO - lr: 4.9711e-05 gnorm: 1.45 [ 1:30:28<23:25:05] +[titan] 2025-10-05 00:04:48,268 - root - INFO - step: 2425 loss: 3.1765 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3831 global_avg_mtp_loss: 2.7934 +[titan] 2025-10-05 00:04:48,269 - root - INFO - lr: 4.9710e-05 gnorm: 1.42 [ 1:30:39<23:24:50] +[titan] 2025-10-05 00:04:59,129 - root - INFO - step: 2430 loss: 3.2456 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8539 +[titan] 2025-10-05 00:04:59,129 - root - INFO - lr: 4.9709e-05 gnorm: 1.29 [ 1:30:50<23:24:33] +[titan] 2025-10-05 00:05:10,040 - root - INFO - step: 2435 loss: 3.0885 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3728 global_avg_mtp_loss: 2.7157 +[titan] 2025-10-05 00:05:10,040 - root - INFO - lr: 4.9707e-05 gnorm: 1.30 [ 1:31:01<23:24:17] +[titan] 2025-10-05 00:05:20,901 - root - INFO - step: 2440 loss: 3.1883 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3839 global_avg_mtp_loss: 2.8044 +[titan] 2025-10-05 00:05:20,901 - root - INFO - lr: 4.9706e-05 gnorm: 1.29 [ 1:31:12<23:24:00] +[titan] 2025-10-05 00:05:31,767 - root - INFO - step: 2445 loss: 3.1123 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3742 global_avg_mtp_loss: 2.7381 +[titan] 2025-10-05 00:05:31,767 - root - INFO - lr: 4.9704e-05 gnorm: 1.28 [ 1:31:23<23:23:44] +[titan] 2025-10-05 00:05:40,461 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:05:42,650 - root - INFO - step: 2450 loss: 3.1786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3845 global_avg_mtp_loss: 2.7941 +[titan] 2025-10-05 00:05:42,650 - root - INFO - lr: 4.9703e-05 gnorm: 1.27 [ 1:31:34<23:23:27] +[titan] 2025-10-05 00:05:53,573 - root - INFO - step: 2455 loss: 3.1398 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3790 global_avg_mtp_loss: 2.7608 +[titan] 2025-10-05 00:05:53,573 - root - INFO - lr: 4.9701e-05 gnorm: 1.27 [ 1:31:45<23:23:12] +[titan] 2025-10-05 00:06:04,454 - root - INFO - step: 2460 loss: 3.2308 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8350 +[titan] 2025-10-05 00:06:04,455 - root - INFO - lr: 4.9700e-05 gnorm: 2.69 [ 1:31:56<23:22:55] +[titan] 2025-10-05 00:06:15,398 - root - INFO - step: 2465 loss: 3.1213 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7452 +[titan] 2025-10-05 00:06:15,398 - root - INFO - lr: 4.9698e-05 gnorm: 1.28 [ 1:32:07<23:22:40] +[titan] 2025-10-05 00:06:26,299 - root - INFO - step: 2470 loss: 3.1059 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3749 global_avg_mtp_loss: 2.7310 +[titan] 2025-10-05 00:06:26,299 - root - INFO - lr: 4.9697e-05 gnorm: 1.29 [ 1:32:17<23:22:24] +[titan] 2025-10-05 00:06:37,192 - root - INFO - step: 2475 loss: 3.1051 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3745 global_avg_mtp_loss: 2.7306 +[titan] 2025-10-05 00:06:37,192 - root - INFO - lr: 4.9696e-05 gnorm: 1.31 [ 1:32:28<23:22:08] +[titan] 2025-10-05 00:06:48,155 - root - INFO - step: 2480 loss: 3.1093 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.69 mfu: 41.93% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7359 +[titan] 2025-10-05 00:06:48,155 - root - INFO - lr: 4.9694e-05 gnorm: 1.32 [ 1:32:39<23:21:53] +[titan] 2025-10-05 00:06:59,038 - root - INFO - step: 2485 loss: 3.1283 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3767 global_avg_mtp_loss: 2.7516 +[titan] 2025-10-05 00:06:59,038 - root - INFO - lr: 4.9693e-05 gnorm: 1.34 [ 1:32:50<23:21:37] +[titan] 2025-10-05 00:07:09,901 - root - INFO - step: 2490 loss: 3.1376 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7607 +[titan] 2025-10-05 00:07:09,901 - root - INFO - lr: 4.9691e-05 gnorm: 1.34 [ 1:33:01<23:21:21] +[titan] 2025-10-05 00:07:20,803 - root - INFO - step: 2495 loss: 3.1543 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7736 +[titan] 2025-10-05 00:07:20,803 - root - INFO - lr: 4.9690e-05 gnorm: 1.36 [ 1:33:12<23:21:05] +[titan] 2025-10-05 00:07:29,527 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:07:31,706 - root - INFO - step: 2500 loss: 3.1575 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7787 +[titan] 2025-10-05 00:07:31,706 - root - INFO - lr: 4.9688e-05 gnorm: 1.31 [ 1:33:23<23:20:49] +[titan] 2025-10-05 00:07:42,568 - root - INFO - step: 2505 loss: 3.1325 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3760 global_avg_mtp_loss: 2.7566 +[titan] 2025-10-05 00:07:42,568 - root - INFO - lr: 4.9687e-05 gnorm: 1.22 [ 1:33:34<23:20:33] +[titan] 2025-10-05 00:07:53,496 - root - INFO - step: 2510 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3718 global_avg_mtp_loss: 2.7142 +[titan] 2025-10-05 00:07:53,497 - root - INFO - lr: 4.9685e-05 gnorm: 1.31 [ 1:33:45<23:20:17] +[titan] 2025-10-05 00:08:04,378 - root - INFO - step: 2515 loss: 3.2003 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3862 global_avg_mtp_loss: 2.8141 +[titan] 2025-10-05 00:08:04,378 - root - INFO - lr: 4.9684e-05 gnorm: 1.43 [ 1:33:55<23:20:01] +[titan] 2025-10-05 00:08:15,255 - root - INFO - step: 2520 loss: 3.1816 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3847 global_avg_mtp_loss: 2.7969 +[titan] 2025-10-05 00:08:15,255 - root - INFO - lr: 4.9682e-05 gnorm: 1.38 [ 1:34:06<23:19:45] +[titan] 2025-10-05 00:08:26,136 - root - INFO - step: 2525 loss: 3.2579 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:08:26,136 - root - INFO - lr: 4.9681e-05 gnorm: 1.37 [ 1:34:17<23:19:29] +[titan] 2025-10-05 00:08:37,049 - root - INFO - step: 2530 loss: 3.1078 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7346 +[titan] 2025-10-05 00:08:37,049 - root - INFO - lr: 4.9679e-05 gnorm: 1.28 [ 1:34:28<23:19:14] +[titan] 2025-10-05 00:08:48,047 - root - INFO - step: 2535 loss: 3.0953 memory: 118.84GiB(85.28%) tps: 29,797 tflops: 413.39 mfu: 41.80% global_avg_ntp_loss: 0.3719 global_avg_mtp_loss: 2.7233 +[titan] 2025-10-05 00:08:48,047 - root - INFO - lr: 4.9678e-05 gnorm: 1.25 [ 1:34:39<23:18:59] +[titan] 2025-10-05 00:08:58,919 - root - INFO - step: 2540 loss: 3.1620 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3803 global_avg_mtp_loss: 2.7817 +[titan] 2025-10-05 00:08:58,919 - root - INFO - lr: 4.9676e-05 gnorm: 1.26 [ 1:34:50<23:18:43] +[titan] 2025-10-05 00:09:09,786 - root - INFO - step: 2545 loss: 3.1667 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3815 global_avg_mtp_loss: 2.7852 +[titan] 2025-10-05 00:09:09,786 - root - INFO - lr: 4.9675e-05 gnorm: 1.40 [ 1:35:01<23:18:27] +[titan] 2025-10-05 00:09:18,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:09:20,677 - root - INFO - step: 2550 loss: 3.0790 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3704 global_avg_mtp_loss: 2.7086 +[titan] 2025-10-05 00:09:20,677 - root - INFO - lr: 4.9673e-05 gnorm: 1.34 [ 1:35:12<23:18:12] +[titan] 2025-10-05 00:09:31,556 - root - INFO - step: 2555 loss: 3.0389 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3662 global_avg_mtp_loss: 2.6727 +[titan] 2025-10-05 00:09:31,557 - root - INFO - lr: 4.9672e-05 gnorm: 1.31 [ 1:35:23<23:17:56] +[titan] 2025-10-05 00:09:42,516 - root - INFO - step: 2560 loss: 3.1285 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.3755 global_avg_mtp_loss: 2.7530 +[titan] 2025-10-05 00:09:42,516 - root - INFO - lr: 4.9670e-05 gnorm: 1.23 [ 1:35:34<23:17:41] +[titan] 2025-10-05 00:09:42,689 - root - INFO - Dumping profiler traces at step 2560 +[titan] 2025-10-05 00:09:42,725 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:09:53,687 - root - INFO - step: 2565 loss: 3.0635 memory: 118.84GiB(85.28%) tps: 29,334 tflops: 406.97 mfu: 41.15% global_avg_ntp_loss: 0.3659 global_avg_mtp_loss: 2.6976 +[titan] 2025-10-05 00:09:53,687 - root - INFO - lr: 4.9669e-05 gnorm: 1.33 [ 1:35:45<23:17:29] +[titan] 2025-10-05 00:10:04,566 - root - INFO - step: 2570 loss: 3.0420 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6772 +[titan] 2025-10-05 00:10:04,566 - root - INFO - lr: 4.9667e-05 gnorm: 1.29 [ 1:35:56<23:17:13] +[titan] 2025-10-05 00:10:15,470 - root - INFO - step: 2575 loss: 3.2085 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8213 +[titan] 2025-10-05 00:10:15,471 - root - INFO - lr: 4.9666e-05 gnorm: 1.30 [ 1:36:07<23:16:58] +[titan] 2025-10-05 00:10:26,384 - root - INFO - step: 2580 loss: 3.2105 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3855 global_avg_mtp_loss: 2.8250 +[titan] 2025-10-05 00:10:26,384 - root - INFO - lr: 4.9664e-05 gnorm: 1.29 [ 1:36:17<23:16:43] +[titan] 2025-10-05 00:10:37,260 - root - INFO - step: 2585 loss: 3.0856 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3698 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:10:37,261 - root - INFO - lr: 4.9663e-05 gnorm: 1.30 [ 1:36:28<23:16:27] +[titan] 2025-10-05 00:10:48,212 - root - INFO - step: 2590 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.3650 global_avg_mtp_loss: 2.6717 +[titan] 2025-10-05 00:10:48,212 - root - INFO - lr: 4.9661e-05 gnorm: 1.26 [ 1:36:39<23:16:12] +[titan] 2025-10-05 00:10:59,142 - root - INFO - step: 2595 loss: 3.1492 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.3799 global_avg_mtp_loss: 2.7693 +[titan] 2025-10-05 00:10:59,142 - root - INFO - lr: 4.9659e-05 gnorm: 1.24 [ 1:36:50<23:15:57] +[titan] 2025-10-05 00:11:07,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:11:10,032 - root - INFO - step: 2600 loss: 3.0911 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3727 global_avg_mtp_loss: 2.7185 +[titan] 2025-10-05 00:11:10,033 - root - INFO - lr: 4.9658e-05 gnorm: 1.22 [ 1:37:01<23:15:41] +[titan] 2025-10-05 00:11:20,915 - root - INFO - step: 2605 loss: 3.1578 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3808 global_avg_mtp_loss: 2.7770 +[titan] 2025-10-05 00:11:20,915 - root - INFO - lr: 4.9656e-05 gnorm: 1.26 [ 1:37:12<23:15:26] +[titan] 2025-10-05 00:11:31,815 - root - INFO - step: 2610 loss: 3.1088 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3741 global_avg_mtp_loss: 2.7348 +[titan] 2025-10-05 00:11:31,815 - root - INFO - lr: 4.9655e-05 gnorm: 1.25 [ 1:37:23<23:15:10] +[titan] 2025-10-05 00:11:42,699 - root - INFO - step: 2615 loss: 3.1165 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7413 +[titan] 2025-10-05 00:11:42,699 - root - INFO - lr: 4.9653e-05 gnorm: 1.30 [ 1:37:34<23:14:55] +[titan] 2025-10-05 00:11:53,594 - root - INFO - step: 2620 loss: 3.1397 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7617 +[titan] 2025-10-05 00:11:53,594 - root - INFO - lr: 4.9652e-05 gnorm: 1.27 [ 1:37:45<23:14:39] +[titan] 2025-10-05 00:12:04,505 - root - INFO - step: 2625 loss: 3.1215 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7463 +[titan] 2025-10-05 00:12:04,505 - root - INFO - lr: 4.9650e-05 gnorm: 1.33 [ 1:37:56<23:14:24] +[titan] 2025-10-05 00:12:15,389 - root - INFO - step: 2630 loss: 3.1525 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7737 +[titan] 2025-10-05 00:12:15,390 - root - INFO - lr: 4.9649e-05 gnorm: 1.27 [ 1:38:06<23:14:08] +[titan] 2025-10-05 00:12:26,270 - root - INFO - step: 2635 loss: 3.1176 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3751 global_avg_mtp_loss: 2.7424 +[titan] 2025-10-05 00:12:26,271 - root - INFO - lr: 4.9647e-05 gnorm: 1.30 [ 1:38:17<23:13:53] +[titan] 2025-10-05 00:12:37,153 - root - INFO - step: 2640 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6977 +[titan] 2025-10-05 00:12:37,153 - root - INFO - lr: 4.9645e-05 gnorm: 1.28 [ 1:38:28<23:13:37] +[titan] 2025-10-05 00:12:48,055 - root - INFO - step: 2645 loss: 3.1119 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7387 +[titan] 2025-10-05 00:12:48,055 - root - INFO - lr: 4.9644e-05 gnorm: 1.30 [ 1:38:39<23:13:22] +[titan] 2025-10-05 00:12:56,795 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:12:58,983 - root - INFO - step: 2650 loss: 3.0548 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6872 +[titan] 2025-10-05 00:12:58,983 - root - INFO - lr: 4.9642e-05 gnorm: 1.23 [ 1:38:50<23:13:07] +[titan] 2025-10-05 00:13:09,879 - root - INFO - step: 2655 loss: 3.0496 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3671 global_avg_mtp_loss: 2.6826 +[titan] 2025-10-05 00:13:09,879 - root - INFO - lr: 4.9641e-05 gnorm: 1.28 [ 1:39:01<23:12:52] +[titan] 2025-10-05 00:13:20,805 - root - INFO - step: 2660 loss: 3.1186 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.3759 global_avg_mtp_loss: 2.7427 +[titan] 2025-10-05 00:13:20,805 - root - INFO - lr: 4.9639e-05 gnorm: 1.25 [ 1:39:12<23:12:37] +[titan] 2025-10-05 00:13:31,679 - root - INFO - step: 2665 loss: 3.0573 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3678 global_avg_mtp_loss: 2.6895 +[titan] 2025-10-05 00:13:31,680 - root - INFO - lr: 4.9637e-05 gnorm: 1.25 [ 1:39:23<23:12:21] +[titan] 2025-10-05 00:13:42,558 - root - INFO - step: 2670 loss: 3.0570 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3663 global_avg_mtp_loss: 2.6907 +[titan] 2025-10-05 00:13:42,558 - root - INFO - lr: 4.9636e-05 gnorm: 1.26 [ 1:39:34<23:12:06] +[titan] 2025-10-05 00:13:53,472 - root - INFO - step: 2675 loss: 3.1878 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8028 +[titan] 2025-10-05 00:13:53,472 - root - INFO - lr: 4.9634e-05 gnorm: 1.31 [ 1:39:45<23:11:51] +[titan] 2025-10-05 00:14:04,364 - root - INFO - step: 2680 loss: 3.1135 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3738 global_avg_mtp_loss: 2.7397 +[titan] 2025-10-05 00:14:04,365 - root - INFO - lr: 4.9633e-05 gnorm: 1.22 [ 1:39:55<23:11:35] +[titan] 2025-10-05 00:14:15,279 - root - INFO - step: 2685 loss: 3.0010 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3606 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:14:15,279 - root - INFO - lr: 4.9631e-05 gnorm: 1.32 [ 1:40:06<23:11:20] +[titan] 2025-10-05 00:14:26,223 - root - INFO - step: 2690 loss: 3.1084 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.3737 global_avg_mtp_loss: 2.7347 +[titan] 2025-10-05 00:14:26,223 - root - INFO - lr: 4.9629e-05 gnorm: 1.28 [ 1:40:17<23:11:06] +[titan] 2025-10-05 00:14:37,114 - root - INFO - step: 2695 loss: 3.1301 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3758 global_avg_mtp_loss: 2.7543 +[titan] 2025-10-05 00:14:37,114 - root - INFO - lr: 4.9628e-05 gnorm: 1.31 [ 1:40:28<23:10:51] +[titan] 2025-10-05 00:14:45,831 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:14:48,028 - root - INFO - step: 2700 loss: 3.0874 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3690 global_avg_mtp_loss: 2.7184 +[titan] 2025-10-05 00:14:48,029 - root - INFO - lr: 4.9626e-05 gnorm: 1.38 [ 1:40:39<23:10:36] +[titan] 2025-10-05 00:14:58,931 - root - INFO - step: 2705 loss: 3.1260 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.3750 global_avg_mtp_loss: 2.7509 +[titan] 2025-10-05 00:14:58,931 - root - INFO - lr: 4.9625e-05 gnorm: 1.28 [ 1:40:50<23:10:21] +[titan] 2025-10-05 00:15:09,812 - root - INFO - step: 2710 loss: 3.0477 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3643 global_avg_mtp_loss: 2.6834 +[titan] 2025-10-05 00:15:09,812 - root - INFO - lr: 4.9623e-05 gnorm: 1.29 [ 1:41:01<23:10:05] +[titan] 2025-10-05 00:15:20,681 - root - INFO - step: 2715 loss: 2.9784 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3575 global_avg_mtp_loss: 2.6209 +[titan] 2025-10-05 00:15:20,681 - root - INFO - lr: 4.9621e-05 gnorm: 1.39 [ 1:41:12<23:09:50] +[titan] 2025-10-05 00:15:31,544 - root - INFO - step: 2720 loss: 3.0989 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3712 global_avg_mtp_loss: 2.7276 +[titan] 2025-10-05 00:15:31,544 - root - INFO - lr: 4.9620e-05 gnorm: 1.28 [ 1:41:23<23:09:34] +[titan] 2025-10-05 00:15:42,481 - root - INFO - step: 2725 loss: 3.0279 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3634 global_avg_mtp_loss: 2.6645 +[titan] 2025-10-05 00:15:42,482 - root - INFO - lr: 4.9618e-05 gnorm: 1.38 [ 1:41:34<23:09:20] +[titan] 2025-10-05 00:15:53,371 - root - INFO - step: 2730 loss: 3.0629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3682 global_avg_mtp_loss: 2.6946 +[titan] 2025-10-05 00:15:53,371 - root - INFO - lr: 4.9616e-05 gnorm: 1.27 [ 1:41:44<23:09:04] +[titan] 2025-10-05 00:16:04,250 - root - INFO - step: 2735 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6257 +[titan] 2025-10-05 00:16:04,250 - root - INFO - lr: 4.9615e-05 gnorm: 1.32 [ 1:41:55<23:08:49] +[titan] 2025-10-05 00:16:15,152 - root - INFO - step: 2740 loss: 3.0246 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6608 +[titan] 2025-10-05 00:16:15,152 - root - INFO - lr: 4.9613e-05 gnorm: 1.29 [ 1:42:06<23:08:34] +[titan] 2025-10-05 00:16:26,041 - root - INFO - step: 2745 loss: 3.1571 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7775 +[titan] 2025-10-05 00:16:26,041 - root - INFO - lr: 4.9611e-05 gnorm: 1.28 [ 1:42:17<23:08:19] +[titan] 2025-10-05 00:16:34,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:16:36,918 - root - INFO - step: 2750 loss: 3.0736 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3686 global_avg_mtp_loss: 2.7050 +[titan] 2025-10-05 00:16:36,919 - root - INFO - lr: 4.9610e-05 gnorm: 1.24 [ 1:42:28<23:08:04] +[titan] 2025-10-05 00:16:47,865 - root - INFO - step: 2755 loss: 2.9899 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6309 +[titan] 2025-10-05 00:16:47,865 - root - INFO - lr: 4.9608e-05 gnorm: 1.22 [ 1:42:39<23:07:49] +[titan] 2025-10-05 00:16:58,851 - root - INFO - step: 2760 loss: 3.0390 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.3657 global_avg_mtp_loss: 2.6733 +[titan] 2025-10-05 00:16:58,851 - root - INFO - lr: 4.9606e-05 gnorm: 1.33 [ 1:42:50<23:07:36] +[titan] 2025-10-05 00:17:09,727 - root - INFO - step: 2765 loss: 3.1133 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3740 global_avg_mtp_loss: 2.7394 +[titan] 2025-10-05 00:17:09,727 - root - INFO - lr: 4.9605e-05 gnorm: 1.30 [ 1:43:01<23:07:20] +[titan] 2025-10-05 00:17:20,607 - root - INFO - step: 2770 loss: 3.0638 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3695 global_avg_mtp_loss: 2.6943 +[titan] 2025-10-05 00:17:20,608 - root - INFO - lr: 4.9603e-05 gnorm: 1.35 [ 1:43:12<23:07:05] +[titan] 2025-10-05 00:17:31,517 - root - INFO - step: 2775 loss: 3.0938 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3716 global_avg_mtp_loss: 2.7222 +[titan] 2025-10-05 00:17:31,517 - root - INFO - lr: 4.9601e-05 gnorm: 1.26 [ 1:43:23<23:06:50] +[titan] 2025-10-05 00:17:42,399 - root - INFO - step: 2780 loss: 3.0126 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6519 +[titan] 2025-10-05 00:17:42,399 - root - INFO - lr: 4.9600e-05 gnorm: 1.30 [ 1:43:33<23:06:35] +[titan] 2025-10-05 00:17:53,331 - root - INFO - step: 2785 loss: 3.0873 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3714 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:17:53,331 - root - INFO - lr: 4.9598e-05 gnorm: 1.28 [ 1:43:44<23:06:21] +[titan] 2025-10-05 00:18:04,263 - root - INFO - step: 2790 loss: 3.0185 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.3627 global_avg_mtp_loss: 2.6559 +[titan] 2025-10-05 00:18:04,263 - root - INFO - lr: 4.9596e-05 gnorm: 1.33 [ 1:43:55<23:06:06] +[titan] 2025-10-05 00:18:15,157 - root - INFO - step: 2795 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3677 global_avg_mtp_loss: 2.6975 +[titan] 2025-10-05 00:18:15,157 - root - INFO - lr: 4.9595e-05 gnorm: 1.25 [ 1:44:06<23:05:51] +[titan] 2025-10-05 00:18:23,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:18:26,054 - root - INFO - step: 2800 loss: 3.0213 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6588 +[titan] 2025-10-05 00:18:26,054 - root - INFO - lr: 4.9593e-05 gnorm: 1.28 [ 1:44:17<23:05:37] +[titan] 2025-10-05 00:18:36,954 - root - INFO - step: 2805 loss: 3.1425 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3783 global_avg_mtp_loss: 2.7642 +[titan] 2025-10-05 00:18:36,954 - root - INFO - lr: 4.9591e-05 gnorm: 1.28 [ 1:44:28<23:05:22] +[titan] 2025-10-05 00:18:47,864 - root - INFO - step: 2810 loss: 3.0392 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.3638 global_avg_mtp_loss: 2.6754 +[titan] 2025-10-05 00:18:47,864 - root - INFO - lr: 4.9590e-05 gnorm: 1.27 [ 1:44:39<23:05:07] +[titan] 2025-10-05 00:18:58,796 - root - INFO - step: 2815 loss: 3.0728 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3684 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:18:58,796 - root - INFO - lr: 4.9588e-05 gnorm: 1.28 [ 1:44:50<23:04:53] +[titan] 2025-10-05 00:19:09,768 - root - INFO - step: 2820 loss: 3.0759 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.3697 global_avg_mtp_loss: 2.7062 +[titan] 2025-10-05 00:19:09,768 - root - INFO - lr: 4.9586e-05 gnorm: 1.28 [ 1:45:01<23:04:39] +[titan] 2025-10-05 00:19:20,659 - root - INFO - step: 2825 loss: 3.0518 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3667 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:19:20,660 - root - INFO - lr: 4.9585e-05 gnorm: 1.38 [ 1:45:12<23:04:24] +[titan] 2025-10-05 00:19:31,538 - root - INFO - step: 2830 loss: 3.1035 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7302 +[titan] 2025-10-05 00:19:31,538 - root - INFO - lr: 4.9583e-05 gnorm: 1.34 [ 1:45:23<23:04:09] +[titan] 2025-10-05 00:19:42,419 - root - INFO - step: 2835 loss: 3.0685 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3683 global_avg_mtp_loss: 2.7002 +[titan] 2025-10-05 00:19:42,419 - root - INFO - lr: 4.9581e-05 gnorm: 1.37 [ 1:45:33<23:03:54] +[titan] 2025-10-05 00:19:53,306 - root - INFO - step: 2840 loss: 3.0223 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3652 global_avg_mtp_loss: 2.6571 +[titan] 2025-10-05 00:19:53,306 - root - INFO - lr: 4.9579e-05 gnorm: 1.32 [ 1:45:44<23:03:39] +[titan] 2025-10-05 00:20:04,219 - root - INFO - step: 2845 loss: 3.0274 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3633 global_avg_mtp_loss: 2.6641 +[titan] 2025-10-05 00:20:04,219 - root - INFO - lr: 4.9578e-05 gnorm: 1.28 [ 1:45:55<23:03:24] +[titan] 2025-10-05 00:20:12,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:20:15,145 - root - INFO - step: 2850 loss: 3.0430 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3661 global_avg_mtp_loss: 2.6770 +[titan] 2025-10-05 00:20:15,145 - root - INFO - lr: 4.9576e-05 gnorm: 1.26 [ 1:46:06<23:03:10] +[titan] 2025-10-05 00:20:26,027 - root - INFO - step: 2855 loss: 3.0893 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7209 +[titan] 2025-10-05 00:20:26,027 - root - INFO - lr: 4.9574e-05 gnorm: 1.27 [ 1:46:17<23:02:55] +[titan] 2025-10-05 00:20:36,904 - root - INFO - step: 2860 loss: 3.0960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3724 global_avg_mtp_loss: 2.7236 +[titan] 2025-10-05 00:20:36,904 - root - INFO - lr: 4.9573e-05 gnorm: 1.28 [ 1:46:28<23:02:40] +[titan] 2025-10-05 00:20:47,806 - root - INFO - step: 2865 loss: 3.1434 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:20:47,806 - root - INFO - lr: 4.9571e-05 gnorm: 1.30 [ 1:46:39<23:02:26] +[titan] 2025-10-05 00:20:58,761 - root - INFO - step: 2870 loss: 2.9969 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:20:58,761 - root - INFO - lr: 4.9569e-05 gnorm: 1.30 [ 1:46:50<23:02:12] +[titan] 2025-10-05 00:21:09,643 - root - INFO - step: 2875 loss: 3.0232 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3635 global_avg_mtp_loss: 2.6597 +[titan] 2025-10-05 00:21:09,643 - root - INFO - lr: 4.9567e-05 gnorm: 1.30 [ 1:47:01<23:01:57] +[titan] 2025-10-05 00:21:20,548 - root - INFO - step: 2880 loss: 2.9737 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3570 global_avg_mtp_loss: 2.6167 +[titan] 2025-10-05 00:21:20,548 - root - INFO - lr: 4.9566e-05 gnorm: 1.28 [ 1:47:12<23:01:42] +[titan] 2025-10-05 00:21:31,529 - root - INFO - step: 2885 loss: 3.0875 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.3720 global_avg_mtp_loss: 2.7155 +[titan] 2025-10-05 00:21:31,529 - root - INFO - lr: 4.9564e-05 gnorm: 1.25 [ 1:47:23<23:01:29] +[titan] 2025-10-05 00:21:42,407 - root - INFO - step: 2890 loss: 3.0347 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6705 +[titan] 2025-10-05 00:21:42,407 - root - INFO - lr: 4.9562e-05 gnorm: 1.38 [ 1:47:33<23:01:14] +[titan] 2025-10-05 00:21:53,280 - root - INFO - step: 2895 loss: 3.0145 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3610 global_avg_mtp_loss: 2.6535 +[titan] 2025-10-05 00:21:53,280 - root - INFO - lr: 4.9560e-05 gnorm: 1.22 [ 1:47:44<23:00:59] +[titan] 2025-10-05 00:22:02,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:22:04,199 - root - INFO - step: 2900 loss: 3.1605 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-05 00:22:04,199 - root - INFO - lr: 4.9559e-05 gnorm: 1.35 [ 1:47:55<23:00:45] +[titan] 2025-10-05 00:22:15,084 - root - INFO - step: 2905 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3703 global_avg_mtp_loss: 2.7158 +[titan] 2025-10-05 00:22:15,084 - root - INFO - lr: 4.9557e-05 gnorm: 1.29 [ 1:48:06<23:00:30] +[titan] 2025-10-05 00:22:25,962 - root - INFO - step: 2910 loss: 3.0022 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6423 +[titan] 2025-10-05 00:22:25,962 - root - INFO - lr: 4.9555e-05 gnorm: 1.31 [ 1:48:17<23:00:15] +[titan] 2025-10-05 00:22:36,871 - root - INFO - step: 2915 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3797 global_avg_mtp_loss: 2.7783 +[titan] 2025-10-05 00:22:36,871 - root - INFO - lr: 4.9553e-05 gnorm: 1.42 [ 1:48:28<23:00:01] +[titan] 2025-10-05 00:22:47,815 - root - INFO - step: 2920 loss: 3.0326 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3653 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:22:47,815 - root - INFO - lr: 4.9552e-05 gnorm: 1.30 [ 1:48:39<22:59:47] +[titan] 2025-10-05 00:22:58,703 - root - INFO - step: 2925 loss: 3.0724 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3681 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:22:58,703 - root - INFO - lr: 4.9550e-05 gnorm: 1.34 [ 1:48:50<22:59:32] +[titan] 2025-10-05 00:23:09,632 - root - INFO - step: 2930 loss: 3.0482 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6818 +[titan] 2025-10-05 00:23:09,632 - root - INFO - lr: 4.9548e-05 gnorm: 1.23 [ 1:49:01<22:59:18] +[titan] 2025-10-05 00:23:20,517 - root - INFO - step: 2935 loss: 2.9200 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5708 +[titan] 2025-10-05 00:23:20,517 - root - INFO - lr: 4.9546e-05 gnorm: 1.28 [ 1:49:12<22:59:03] +[titan] 2025-10-05 00:23:31,391 - root - INFO - step: 2940 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6615 +[titan] 2025-10-05 00:23:31,391 - root - INFO - lr: 4.9544e-05 gnorm: 1.25 [ 1:49:22<22:58:48] +[titan] 2025-10-05 00:23:42,322 - root - INFO - step: 2945 loss: 3.1473 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7704 +[titan] 2025-10-05 00:23:42,322 - root - INFO - lr: 4.9543e-05 gnorm: 1.35 [ 1:49:33<22:58:34] +[titan] 2025-10-05 00:23:51,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:23:53,182 - root - INFO - step: 2950 loss: 3.0250 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6620 +[titan] 2025-10-05 00:23:53,183 - root - INFO - lr: 4.9541e-05 gnorm: 1.26 [ 1:49:44<22:58:19] +[titan] 2025-10-05 00:24:04,100 - root - INFO - step: 2955 loss: 2.9887 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3579 global_avg_mtp_loss: 2.6308 +[titan] 2025-10-05 00:24:04,100 - root - INFO - lr: 4.9539e-05 gnorm: 1.32 [ 1:49:55<22:58:05] +[titan] 2025-10-05 00:24:14,957 - root - INFO - step: 2960 loss: 2.9752 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6188 +[titan] 2025-10-05 00:24:14,957 - root - INFO - lr: 4.9537e-05 gnorm: 1.29 [ 1:50:06<22:57:50] +[titan] 2025-10-05 00:24:25,824 - root - INFO - step: 2965 loss: 3.0670 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3670 global_avg_mtp_loss: 2.7000 +[titan] 2025-10-05 00:24:25,824 - root - INFO - lr: 4.9535e-05 gnorm: 1.36 [ 1:50:17<22:57:35] +[titan] 2025-10-05 00:24:36,677 - root - INFO - step: 2970 loss: 3.0105 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3605 global_avg_mtp_loss: 2.6500 +[titan] 2025-10-05 00:24:36,677 - root - INFO - lr: 4.9534e-05 gnorm: 1.28 [ 1:50:28<22:57:20] +[titan] 2025-10-05 00:24:47,550 - root - INFO - step: 2975 loss: 3.0798 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7113 +[titan] 2025-10-05 00:24:47,550 - root - INFO - lr: 4.9532e-05 gnorm: 1.26 [ 1:50:39<22:57:06] +[titan] 2025-10-05 00:24:58,508 - root - INFO - step: 2980 loss: 3.0933 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7212 +[titan] 2025-10-05 00:24:58,508 - root - INFO - lr: 4.9530e-05 gnorm: 1.34 [ 1:50:50<22:56:52] +[titan] 2025-10-05 00:25:09,436 - root - INFO - step: 2985 loss: 2.9918 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6318 +[titan] 2025-10-05 00:25:09,436 - root - INFO - lr: 4.9528e-05 gnorm: 1.29 [ 1:51:00<22:56:38] +[titan] 2025-10-05 00:25:20,336 - root - INFO - step: 2990 loss: 3.0864 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3699 global_avg_mtp_loss: 2.7165 +[titan] 2025-10-05 00:25:20,336 - root - INFO - lr: 4.9526e-05 gnorm: 1.30 [ 1:51:11<22:56:24] +[titan] 2025-10-05 00:25:31,210 - root - INFO - step: 2995 loss: 3.0152 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3613 global_avg_mtp_loss: 2.6538 +[titan] 2025-10-05 00:25:31,210 - root - INFO - lr: 4.9525e-05 gnorm: 1.34 [ 1:51:22<22:56:09] +[titan] 2025-10-05 00:25:39,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:25:42,077 - root - INFO - step: 3000 loss: 2.9639 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6087 +[titan] 2025-10-05 00:25:42,077 - root - INFO - lr: 4.9523e-05 gnorm: 1.20 [ 1:51:33<22:55:54] +[titan] 2025-10-05 00:25:52,956 - root - INFO - step: 3005 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6274 +[titan] 2025-10-05 00:25:52,956 - root - INFO - lr: 4.9521e-05 gnorm: 1.25 [ 1:51:44<22:55:40] +[titan] 2025-10-05 00:26:03,943 - root - INFO - step: 3010 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:26:03,943 - root - INFO - lr: 4.9519e-05 gnorm: 1.25 [ 1:51:55<22:55:26] +[titan] 2025-10-05 00:26:14,799 - root - INFO - step: 3015 loss: 2.9622 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6081 +[titan] 2025-10-05 00:26:14,799 - root - INFO - lr: 4.9517e-05 gnorm: 1.20 [ 1:52:06<22:55:12] +[titan] 2025-10-05 00:26:25,658 - root - INFO - step: 3020 loss: 3.1014 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7293 +[titan] 2025-10-05 00:26:25,658 - root - INFO - lr: 4.9515e-05 gnorm: 1.29 [ 1:52:17<22:54:57] +[titan] 2025-10-05 00:26:36,501 - root - INFO - step: 3025 loss: 3.0035 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.3588 global_avg_mtp_loss: 2.6447 +[titan] 2025-10-05 00:26:36,501 - root - INFO - lr: 4.9514e-05 gnorm: 1.22 [ 1:52:28<22:54:42] +[titan] 2025-10-05 00:26:47,370 - root - INFO - step: 3030 loss: 2.9868 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3589 global_avg_mtp_loss: 2.6279 +[titan] 2025-10-05 00:26:47,370 - root - INFO - lr: 4.9512e-05 gnorm: 1.28 [ 1:52:38<22:54:27] +[titan] 2025-10-05 00:26:58,255 - root - INFO - step: 3035 loss: 3.0690 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.7021 +[titan] 2025-10-05 00:26:58,255 - root - INFO - lr: 4.9510e-05 gnorm: 1.29 [ 1:52:49<22:54:13] +[titan] 2025-10-05 00:27:09,176 - root - INFO - step: 3040 loss: 2.9415 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5893 +[titan] 2025-10-05 00:27:09,176 - root - INFO - lr: 4.9508e-05 gnorm: 1.23 [ 1:53:00<22:53:59] +[titan] 2025-10-05 00:27:20,081 - root - INFO - step: 3045 loss: 2.9565 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.6029 +[titan] 2025-10-05 00:27:20,081 - root - INFO - lr: 4.9506e-05 gnorm: 1.31 [ 1:53:11<22:53:45] +[titan] 2025-10-05 00:27:28,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:27:30,926 - root - INFO - step: 3050 loss: 3.0382 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.6713 +[titan] 2025-10-05 00:27:30,926 - root - INFO - lr: 4.9504e-05 gnorm: 1.32 [ 1:53:22<22:53:30] +[titan] 2025-10-05 00:27:41,788 - root - INFO - step: 3055 loss: 2.9038 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5556 +[titan] 2025-10-05 00:27:41,788 - root - INFO - lr: 4.9502e-05 gnorm: 1.27 [ 1:53:33<22:53:15] +[titan] 2025-10-05 00:27:52,674 - root - INFO - step: 3060 loss: 3.0259 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3639 global_avg_mtp_loss: 2.6619 +[titan] 2025-10-05 00:27:52,674 - root - INFO - lr: 4.9501e-05 gnorm: 1.32 [ 1:53:44<22:53:01] +[titan] 2025-10-05 00:28:03,564 - root - INFO - step: 3065 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6732 +[titan] 2025-10-05 00:28:03,564 - root - INFO - lr: 4.9499e-05 gnorm: 1.39 [ 1:53:55<22:52:46] +[titan] 2025-10-05 00:28:14,505 - root - INFO - step: 3070 loss: 2.9931 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.3595 global_avg_mtp_loss: 2.6336 +[titan] 2025-10-05 00:28:14,505 - root - INFO - lr: 4.9497e-05 gnorm: 1.46 [ 1:54:06<22:52:33] +[titan] 2025-10-05 00:28:19,011 - root - INFO - Dumping profiler traces at step 3072 +[titan] 2025-10-05 00:28:19,046 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:28:25,635 - root - INFO - step: 3075 loss: 2.9714 memory: 118.84GiB(85.28%) tps: 29,442 tflops: 408.46 mfu: 41.30% global_avg_ntp_loss: 0.3583 global_avg_mtp_loss: 2.6131 +[titan] 2025-10-05 00:28:25,635 - root - INFO - lr: 4.9495e-05 gnorm: 1.38 [ 1:54:17<22:52:21] +[titan] 2025-10-05 00:28:36,484 - root - INFO - step: 3080 loss: 3.0383 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3644 global_avg_mtp_loss: 2.6739 +[titan] 2025-10-05 00:28:36,484 - root - INFO - lr: 4.9493e-05 gnorm: 1.27 [ 1:54:28<22:52:07] +[titan] 2025-10-05 00:28:47,350 - root - INFO - step: 3085 loss: 3.0016 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:28:47,350 - root - INFO - lr: 4.9491e-05 gnorm: 1.28 [ 1:54:38<22:51:52] +[titan] 2025-10-05 00:28:58,198 - root - INFO - step: 3090 loss: 2.8733 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 00:28:58,198 - root - INFO - lr: 4.9489e-05 gnorm: 1.28 [ 1:54:49<22:51:37] +[titan] 2025-10-05 00:29:09,096 - root - INFO - step: 3095 loss: 3.0415 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6767 +[titan] 2025-10-05 00:29:09,096 - root - INFO - lr: 4.9487e-05 gnorm: 1.33 [ 1:55:00<22:51:23] +[titan] 2025-10-05 00:29:17,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:29:19,960 - root - INFO - step: 3100 loss: 2.9482 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.5947 +[titan] 2025-10-05 00:29:19,960 - root - INFO - lr: 4.9485e-05 gnorm: 1.33 [ 1:55:11<22:51:09] +[titan] 2025-10-05 00:29:30,867 - root - INFO - step: 3105 loss: 2.9859 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6283 +[titan] 2025-10-05 00:29:30,868 - root - INFO - lr: 4.9484e-05 gnorm: 1.27 [ 1:55:22<22:50:55] +[titan] 2025-10-05 00:29:41,783 - root - INFO - step: 3110 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6306 +[titan] 2025-10-05 00:29:41,784 - root - INFO - lr: 4.9482e-05 gnorm: 1.30 [ 1:55:33<22:50:41] +[titan] 2025-10-05 00:29:52,657 - root - INFO - step: 3115 loss: 2.9941 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6352 +[titan] 2025-10-05 00:29:52,657 - root - INFO - lr: 4.9480e-05 gnorm: 1.24 [ 1:55:44<22:50:26] +[titan] 2025-10-05 00:30:03,529 - root - INFO - step: 3120 loss: 3.0041 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3611 global_avg_mtp_loss: 2.6431 +[titan] 2025-10-05 00:30:03,529 - root - INFO - lr: 4.9478e-05 gnorm: 1.22 [ 1:55:55<22:50:12] +[titan] 2025-10-05 00:30:14,438 - root - INFO - step: 3125 loss: 2.9712 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6162 +[titan] 2025-10-05 00:30:14,438 - root - INFO - lr: 4.9476e-05 gnorm: 1.28 [ 1:56:05<22:49:58] +[titan] 2025-10-05 00:30:25,289 - root - INFO - step: 3130 loss: 2.9425 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:30:25,289 - root - INFO - lr: 4.9474e-05 gnorm: 1.28 [ 1:56:16<22:49:43] +[titan] 2025-10-05 00:30:36,160 - root - INFO - step: 3135 loss: 3.0775 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3696 global_avg_mtp_loss: 2.7079 +[titan] 2025-10-05 00:30:36,160 - root - INFO - lr: 4.9472e-05 gnorm: 1.26 [ 1:56:27<22:49:29] +[titan] 2025-10-05 00:30:47,054 - root - INFO - step: 3140 loss: 3.0122 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6492 +[titan] 2025-10-05 00:30:47,054 - root - INFO - lr: 4.9470e-05 gnorm: 1.22 [ 1:56:38<22:49:15] +[titan] 2025-10-05 00:30:57,914 - root - INFO - step: 3145 loss: 3.0169 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6560 +[titan] 2025-10-05 00:30:57,914 - root - INFO - lr: 4.9468e-05 gnorm: 1.27 [ 1:56:49<22:49:00] +[titan] 2025-10-05 00:31:06,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:31:08,816 - root - INFO - step: 3150 loss: 2.9327 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3545 global_avg_mtp_loss: 2.5782 +[titan] 2025-10-05 00:31:08,816 - root - INFO - lr: 4.9466e-05 gnorm: 1.26 [ 1:57:00<22:48:46] +[titan] 2025-10-05 00:31:19,715 - root - INFO - step: 3155 loss: 3.0434 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3631 global_avg_mtp_loss: 2.6803 +[titan] 2025-10-05 00:31:19,715 - root - INFO - lr: 4.9464e-05 gnorm: 1.33 [ 1:57:11<22:48:32] +[titan] 2025-10-05 00:31:30,598 - root - INFO - step: 3160 loss: 2.9152 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5676 +[titan] 2025-10-05 00:31:30,598 - root - INFO - lr: 4.9462e-05 gnorm: 1.28 [ 1:57:22<22:48:18] +[titan] 2025-10-05 00:31:41,468 - root - INFO - step: 3165 loss: 3.0228 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6602 +[titan] 2025-10-05 00:31:41,469 - root - INFO - lr: 4.9460e-05 gnorm: 1.32 [ 1:57:33<22:48:04] +[titan] 2025-10-05 00:31:52,401 - root - INFO - step: 3170 loss: 2.9954 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:31:52,401 - root - INFO - lr: 4.9459e-05 gnorm: 1.39 [ 1:57:43<22:47:50] +[titan] 2025-10-05 00:32:03,274 - root - INFO - step: 3175 loss: 2.9805 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.6231 +[titan] 2025-10-05 00:32:03,274 - root - INFO - lr: 4.9457e-05 gnorm: 1.26 [ 1:57:54<22:47:36] +[titan] 2025-10-05 00:32:14,178 - root - INFO - step: 3180 loss: 3.0141 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3598 global_avg_mtp_loss: 2.6543 +[titan] 2025-10-05 00:32:14,178 - root - INFO - lr: 4.9455e-05 gnorm: 1.31 [ 1:58:05<22:47:22] +[titan] 2025-10-05 00:32:25,055 - root - INFO - step: 3185 loss: 3.0493 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3641 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:32:25,055 - root - INFO - lr: 4.9453e-05 gnorm: 1.37 [ 1:58:16<22:47:08] +[titan] 2025-10-05 00:32:35,936 - root - INFO - step: 3190 loss: 2.9654 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6093 +[titan] 2025-10-05 00:32:35,936 - root - INFO - lr: 4.9451e-05 gnorm: 1.29 [ 1:58:27<22:46:54] +[titan] 2025-10-05 00:32:46,815 - root - INFO - step: 3195 loss: 2.9889 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3566 global_avg_mtp_loss: 2.6323 +[titan] 2025-10-05 00:32:46,815 - root - INFO - lr: 4.9449e-05 gnorm: 1.28 [ 1:58:38<22:46:40] +[titan] 2025-10-05 00:32:55,521 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:32:57,705 - root - INFO - step: 3200 loss: 2.9502 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.5953 +[titan] 2025-10-05 00:32:57,705 - root - INFO - lr: 4.9447e-05 gnorm: 1.30 [ 1:58:49<22:46:26] +[titan] 2025-10-05 00:33:08,681 - root - INFO - step: 3205 loss: 2.9709 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.22 mfu: 41.88% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6155 +[titan] 2025-10-05 00:33:08,681 - root - INFO - lr: 4.9445e-05 gnorm: 1.23 [ 1:59:00<22:46:13] +[titan] 2025-10-05 00:33:19,557 - root - INFO - step: 3210 loss: 2.9185 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5693 +[titan] 2025-10-05 00:33:19,558 - root - INFO - lr: 4.9443e-05 gnorm: 1.28 [ 1:59:11<22:45:59] +[titan] 2025-10-05 00:33:30,432 - root - INFO - step: 3215 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3514 global_avg_mtp_loss: 2.5956 +[titan] 2025-10-05 00:33:30,432 - root - INFO - lr: 4.9441e-05 gnorm: 1.39 [ 1:59:21<22:45:44] +[titan] 2025-10-05 00:33:41,300 - root - INFO - step: 3220 loss: 3.0300 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3628 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:33:41,300 - root - INFO - lr: 4.9439e-05 gnorm: 1.32 [ 1:59:32<22:45:30] +[titan] 2025-10-05 00:33:52,166 - root - INFO - step: 3225 loss: 3.0123 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6515 +[titan] 2025-10-05 00:33:52,166 - root - INFO - lr: 4.9437e-05 gnorm: 1.29 [ 1:59:43<22:45:16] +[titan] 2025-10-05 00:34:03,015 - root - INFO - step: 3230 loss: 3.0282 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3622 global_avg_mtp_loss: 2.6660 +[titan] 2025-10-05 00:34:03,015 - root - INFO - lr: 4.9435e-05 gnorm: 1.29 [ 1:59:54<22:45:01] +[titan] 2025-10-05 00:34:13,972 - root - INFO - step: 3235 loss: 3.0440 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3646 global_avg_mtp_loss: 2.6794 +[titan] 2025-10-05 00:34:13,973 - root - INFO - lr: 4.9433e-05 gnorm: 1.27 [ 2:00:05<22:44:48] +[titan] 2025-10-05 00:34:24,817 - root - INFO - step: 3240 loss: 2.9616 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:34:24,817 - root - INFO - lr: 4.9431e-05 gnorm: 1.21 [ 2:00:16<22:44:34] +[titan] 2025-10-05 00:34:35,664 - root - INFO - step: 3245 loss: 3.0402 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6760 +[titan] 2025-10-05 00:34:35,664 - root - INFO - lr: 4.9429e-05 gnorm: 1.23 [ 2:00:27<22:44:19] +[titan] 2025-10-05 00:34:44,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:34:46,540 - root - INFO - step: 3250 loss: 3.0298 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3617 global_avg_mtp_loss: 2.6681 +[titan] 2025-10-05 00:34:46,540 - root - INFO - lr: 4.9427e-05 gnorm: 1.26 [ 2:00:38<22:44:05] +[titan] 2025-10-05 00:34:57,421 - root - INFO - step: 3255 loss: 2.9633 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6078 +[titan] 2025-10-05 00:34:57,421 - root - INFO - lr: 4.9425e-05 gnorm: 1.31 [ 2:00:48<22:43:51] +[titan] 2025-10-05 00:35:08,296 - root - INFO - step: 3260 loss: 2.9911 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6340 +[titan] 2025-10-05 00:35:08,296 - root - INFO - lr: 4.9423e-05 gnorm: 1.27 [ 2:00:59<22:43:37] +[titan] 2025-10-05 00:35:19,241 - root - INFO - step: 3265 loss: 2.9592 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6039 +[titan] 2025-10-05 00:35:19,242 - root - INFO - lr: 4.9421e-05 gnorm: 1.30 [ 2:01:10<22:43:24] +[titan] 2025-10-05 00:35:30,115 - root - INFO - step: 3270 loss: 2.9685 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6125 +[titan] 2025-10-05 00:35:30,115 - root - INFO - lr: 4.9419e-05 gnorm: 1.33 [ 2:01:21<22:43:10] +[titan] 2025-10-05 00:35:40,981 - root - INFO - step: 3275 loss: 3.0649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3675 global_avg_mtp_loss: 2.6974 +[titan] 2025-10-05 00:35:40,981 - root - INFO - lr: 4.9417e-05 gnorm: 1.36 [ 2:01:32<22:42:56] +[titan] 2025-10-05 00:35:51,879 - root - INFO - step: 3280 loss: 2.9994 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6401 +[titan] 2025-10-05 00:35:51,879 - root - INFO - lr: 4.9415e-05 gnorm: 1.31 [ 2:01:43<22:42:42] +[titan] 2025-10-05 00:36:02,779 - root - INFO - step: 3285 loss: 2.9516 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3524 global_avg_mtp_loss: 2.5992 +[titan] 2025-10-05 00:36:02,779 - root - INFO - lr: 4.9413e-05 gnorm: 1.24 [ 2:01:54<22:42:28] +[titan] 2025-10-05 00:36:13,718 - root - INFO - step: 3290 loss: 3.0135 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6533 +[titan] 2025-10-05 00:36:13,718 - root - INFO - lr: 4.9411e-05 gnorm: 1.32 [ 2:02:05<22:42:15] +[titan] 2025-10-05 00:36:24,612 - root - INFO - step: 3295 loss: 2.9374 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3515 global_avg_mtp_loss: 2.5859 +[titan] 2025-10-05 00:36:24,613 - root - INFO - lr: 4.9409e-05 gnorm: 1.30 [ 2:02:16<22:42:01] +[titan] 2025-10-05 00:36:33,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:36:35,569 - root - INFO - step: 3300 loss: 3.0216 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6591 +[titan] 2025-10-05 00:36:35,570 - root - INFO - lr: 4.9407e-05 gnorm: 1.29 [ 2:02:27<22:41:48] +[titan] 2025-10-05 00:36:46,479 - root - INFO - step: 3305 loss: 2.9748 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6193 +[titan] 2025-10-05 00:36:46,480 - root - INFO - lr: 4.9405e-05 gnorm: 1.29 [ 2:02:38<22:41:34] +[titan] 2025-10-05 00:36:57,349 - root - INFO - step: 3310 loss: 2.9636 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3540 global_avg_mtp_loss: 2.6095 +[titan] 2025-10-05 00:36:57,350 - root - INFO - lr: 4.9403e-05 gnorm: 1.18 [ 2:02:48<22:41:20] +[titan] 2025-10-05 00:37:08,233 - root - INFO - step: 3315 loss: 2.9774 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6224 +[titan] 2025-10-05 00:37:08,233 - root - INFO - lr: 4.9401e-05 gnorm: 1.24 [ 2:02:59<22:41:07] +[titan] 2025-10-05 00:37:19,133 - root - INFO - step: 3320 loss: 2.9377 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:37:19,133 - root - INFO - lr: 4.9399e-05 gnorm: 1.24 [ 2:03:10<22:40:53] +[titan] 2025-10-05 00:37:29,998 - root - INFO - step: 3325 loss: 2.8934 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:37:29,999 - root - INFO - lr: 4.9397e-05 gnorm: 1.31 [ 2:03:21<22:40:39] +[titan] 2025-10-05 00:37:40,921 - root - INFO - step: 3330 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3596 global_avg_mtp_loss: 2.6407 +[titan] 2025-10-05 00:37:40,922 - root - INFO - lr: 4.9395e-05 gnorm: 1.29 [ 2:03:32<22:40:25] +[titan] 2025-10-05 00:37:51,784 - root - INFO - step: 3335 loss: 2.9450 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5927 +[titan] 2025-10-05 00:37:51,784 - root - INFO - lr: 4.9392e-05 gnorm: 1.29 [ 2:03:43<22:40:11] +[titan] 2025-10-05 00:38:02,640 - root - INFO - step: 3340 loss: 2.9243 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3497 global_avg_mtp_loss: 2.5746 +[titan] 2025-10-05 00:38:02,640 - root - INFO - lr: 4.9390e-05 gnorm: 1.24 [ 2:03:54<22:39:57] +[titan] 2025-10-05 00:38:13,559 - root - INFO - step: 3345 loss: 2.9258 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5767 +[titan] 2025-10-05 00:38:13,559 - root - INFO - lr: 4.9388e-05 gnorm: 1.32 [ 2:04:05<22:39:44] +[titan] 2025-10-05 00:38:22,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:38:24,447 - root - INFO - step: 3350 loss: 2.9893 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3573 global_avg_mtp_loss: 2.6320 +[titan] 2025-10-05 00:38:24,447 - root - INFO - lr: 4.9386e-05 gnorm: 1.23 [ 2:04:15<22:39:30] +[titan] 2025-10-05 00:38:35,319 - root - INFO - step: 3355 loss: 2.8550 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3405 global_avg_mtp_loss: 2.5146 +[titan] 2025-10-05 00:38:35,319 - root - INFO - lr: 4.9384e-05 gnorm: 1.25 [ 2:04:26<22:39:16] +[titan] 2025-10-05 00:38:46,199 - root - INFO - step: 3360 loss: 2.8891 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:38:46,199 - root - INFO - lr: 4.9382e-05 gnorm: 1.31 [ 2:04:37<22:39:02] +[titan] 2025-10-05 00:38:57,161 - root - INFO - step: 3365 loss: 2.9521 memory: 118.84GiB(85.28%) tps: 29,893 tflops: 414.71 mfu: 41.93% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.5991 +[titan] 2025-10-05 00:38:57,161 - root - INFO - lr: 4.9380e-05 gnorm: 1.25 [ 2:04:48<22:38:49] +[titan] 2025-10-05 00:39:08,046 - root - INFO - step: 3370 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.5919 +[titan] 2025-10-05 00:39:08,046 - root - INFO - lr: 4.9378e-05 gnorm: 1.32 [ 2:04:59<22:38:36] +[titan] 2025-10-05 00:39:18,937 - root - INFO - step: 3375 loss: 2.9184 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5694 +[titan] 2025-10-05 00:39:18,937 - root - INFO - lr: 4.9376e-05 gnorm: 1.25 [ 2:05:10<22:38:22] +[titan] 2025-10-05 00:39:29,827 - root - INFO - step: 3380 loss: 2.9621 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6080 +[titan] 2025-10-05 00:39:29,827 - root - INFO - lr: 4.9374e-05 gnorm: 1.24 [ 2:05:21<22:38:08] +[titan] 2025-10-05 00:39:40,719 - root - INFO - step: 3385 loss: 2.9011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3465 global_avg_mtp_loss: 2.5547 +[titan] 2025-10-05 00:39:40,719 - root - INFO - lr: 4.9372e-05 gnorm: 1.22 [ 2:05:32<22:37:54] +[titan] 2025-10-05 00:39:51,594 - root - INFO - step: 3390 loss: 2.9910 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6324 +[titan] 2025-10-05 00:39:51,595 - root - INFO - lr: 4.9370e-05 gnorm: 1.24 [ 2:05:43<22:37:41] +[titan] 2025-10-05 00:40:02,576 - root - INFO - step: 3395 loss: 2.9436 memory: 118.84GiB(85.28%) tps: 29,839 tflops: 413.97 mfu: 41.86% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5907 +[titan] 2025-10-05 00:40:02,577 - root - INFO - lr: 4.9367e-05 gnorm: 1.26 [ 2:05:54<22:37:28] +[titan] 2025-10-05 00:40:11,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:40:13,489 - root - INFO - step: 3400 loss: 2.9838 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3569 global_avg_mtp_loss: 2.6269 +[titan] 2025-10-05 00:40:13,489 - root - INFO - lr: 4.9365e-05 gnorm: 1.27 [ 2:06:05<22:37:15] +[titan] 2025-10-05 00:40:24,371 - root - INFO - step: 3405 loss: 3.0515 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3666 global_avg_mtp_loss: 2.6849 +[titan] 2025-10-05 00:40:24,371 - root - INFO - lr: 4.9363e-05 gnorm: 1.23 [ 2:06:15<22:37:01] +[titan] 2025-10-05 00:40:35,244 - root - INFO - step: 3410 loss: 2.9631 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3556 global_avg_mtp_loss: 2.6074 +[titan] 2025-10-05 00:40:35,244 - root - INFO - lr: 4.9361e-05 gnorm: 1.28 [ 2:06:26<22:36:47] +[titan] 2025-10-05 00:40:46,133 - root - INFO - step: 3415 loss: 2.9578 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3547 global_avg_mtp_loss: 2.6032 +[titan] 2025-10-05 00:40:46,133 - root - INFO - lr: 4.9359e-05 gnorm: 1.23 [ 2:06:37<22:36:33] +[titan] 2025-10-05 00:40:57,009 - root - INFO - step: 3420 loss: 2.9329 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3498 global_avg_mtp_loss: 2.5832 +[titan] 2025-10-05 00:40:57,009 - root - INFO - lr: 4.9357e-05 gnorm: 1.19 [ 2:06:48<22:36:20] +[titan] 2025-10-05 00:41:07,937 - root - INFO - step: 3425 loss: 2.9564 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.6041 +[titan] 2025-10-05 00:41:07,937 - root - INFO - lr: 4.9355e-05 gnorm: 1.27 [ 2:06:59<22:36:06] +[titan] 2025-10-05 00:41:18,921 - root - INFO - step: 3430 loss: 2.9729 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6166 +[titan] 2025-10-05 00:41:18,921 - root - INFO - lr: 4.9353e-05 gnorm: 1.26 [ 2:07:10<22:35:54] +[titan] 2025-10-05 00:41:29,788 - root - INFO - step: 3435 loss: 2.9570 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3535 global_avg_mtp_loss: 2.6035 +[titan] 2025-10-05 00:41:29,789 - root - INFO - lr: 4.9351e-05 gnorm: 1.30 [ 2:07:21<22:35:40] +[titan] 2025-10-05 00:41:40,636 - root - INFO - step: 3440 loss: 2.9121 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.3473 global_avg_mtp_loss: 2.5649 +[titan] 2025-10-05 00:41:40,637 - root - INFO - lr: 4.9348e-05 gnorm: 1.25 [ 2:07:32<22:35:26] +[titan] 2025-10-05 00:41:51,497 - root - INFO - step: 3445 loss: 2.9720 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3548 global_avg_mtp_loss: 2.6172 +[titan] 2025-10-05 00:41:51,498 - root - INFO - lr: 4.9346e-05 gnorm: 1.24 [ 2:07:43<22:35:12] +[titan] 2025-10-05 00:42:00,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:42:02,373 - root - INFO - step: 3450 loss: 3.0025 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:42:02,373 - root - INFO - lr: 4.9344e-05 gnorm: 1.40 [ 2:07:53<22:34:58] +[titan] 2025-10-05 00:42:13,236 - root - INFO - step: 3455 loss: 2.8984 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5533 +[titan] 2025-10-05 00:42:13,236 - root - INFO - lr: 4.9342e-05 gnorm: 1.33 [ 2:08:04<22:34:44] +[titan] 2025-10-05 00:42:24,195 - root - INFO - step: 3460 loss: 2.8961 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3468 global_avg_mtp_loss: 2.5493 +[titan] 2025-10-05 00:42:24,195 - root - INFO - lr: 4.9340e-05 gnorm: 1.30 [ 2:08:15<22:34:31] +[titan] 2025-10-05 00:42:35,085 - root - INFO - step: 3465 loss: 3.0085 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3591 global_avg_mtp_loss: 2.6494 +[titan] 2025-10-05 00:42:35,085 - root - INFO - lr: 4.9338e-05 gnorm: 1.28 [ 2:08:26<22:34:18] +[titan] 2025-10-05 00:42:45,952 - root - INFO - step: 3470 loss: 2.9361 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5855 +[titan] 2025-10-05 00:42:45,952 - root - INFO - lr: 4.9336e-05 gnorm: 1.26 [ 2:08:37<22:34:04] +[titan] 2025-10-05 00:42:56,840 - root - INFO - step: 3475 loss: 2.9223 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3496 global_avg_mtp_loss: 2.5727 +[titan] 2025-10-05 00:42:56,841 - root - INFO - lr: 4.9333e-05 gnorm: 1.25 [ 2:08:48<22:33:51] +[titan] 2025-10-05 00:43:07,696 - root - INFO - step: 3480 loss: 2.9007 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5539 +[titan] 2025-10-05 00:43:07,696 - root - INFO - lr: 4.9331e-05 gnorm: 1.30 [ 2:08:59<22:33:37] +[titan] 2025-10-05 00:43:18,563 - root - INFO - step: 3485 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5412 +[titan] 2025-10-05 00:43:18,564 - root - INFO - lr: 4.9329e-05 gnorm: 1.24 [ 2:09:10<22:33:23] +[titan] 2025-10-05 00:43:29,498 - root - INFO - step: 3490 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3565 global_avg_mtp_loss: 2.6312 +[titan] 2025-10-05 00:43:29,498 - root - INFO - lr: 4.9327e-05 gnorm: 1.34 [ 2:09:21<22:33:10] +[titan] 2025-10-05 00:43:40,371 - root - INFO - step: 3495 loss: 2.8500 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5087 +[titan] 2025-10-05 00:43:40,371 - root - INFO - lr: 4.9325e-05 gnorm: 1.24 [ 2:09:31<22:32:56] +[titan] 2025-10-05 00:43:49,059 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:43:51,242 - root - INFO - step: 3500 loss: 2.9053 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5576 +[titan] 2025-10-05 00:43:51,242 - root - INFO - lr: 4.9323e-05 gnorm: 1.26 [ 2:09:42<22:32:42] +[titan] 2025-10-05 00:44:02,120 - root - INFO - step: 3505 loss: 2.9596 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:44:02,120 - root - INFO - lr: 4.9320e-05 gnorm: 1.27 [ 2:09:53<22:32:29] +[titan] 2025-10-05 00:44:13,041 - root - INFO - step: 3510 loss: 2.9620 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3557 global_avg_mtp_loss: 2.6063 +[titan] 2025-10-05 00:44:13,042 - root - INFO - lr: 4.9318e-05 gnorm: 1.36 [ 2:10:04<22:32:16] +[titan] 2025-10-05 00:44:23,983 - root - INFO - step: 3515 loss: 2.9163 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5681 +[titan] 2025-10-05 00:44:23,984 - root - INFO - lr: 4.9316e-05 gnorm: 1.35 [ 2:10:15<22:32:03] +[titan] 2025-10-05 00:44:34,890 - root - INFO - step: 3520 loss: 2.9840 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6176 +[titan] 2025-10-05 00:44:34,890 - root - INFO - lr: 4.9314e-05 gnorm: 1.30 [ 2:10:26<22:31:49] +[titan] 2025-10-05 00:44:45,807 - root - INFO - step: 3525 loss: 2.8766 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3420 global_avg_mtp_loss: 2.5345 +[titan] 2025-10-05 00:44:45,807 - root - INFO - lr: 4.9312e-05 gnorm: 1.33 [ 2:10:37<22:31:36] +[titan] 2025-10-05 00:44:56,695 - root - INFO - step: 3530 loss: 2.8643 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5214 +[titan] 2025-10-05 00:44:56,696 - root - INFO - lr: 4.9309e-05 gnorm: 1.31 [ 2:10:48<22:31:23] +[titan] 2025-10-05 00:45:07,556 - root - INFO - step: 3535 loss: 2.9317 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5804 +[titan] 2025-10-05 00:45:07,556 - root - INFO - lr: 4.9307e-05 gnorm: 1.24 [ 2:10:59<22:31:09] +[titan] 2025-10-05 00:45:18,462 - root - INFO - step: 3540 loss: 2.9149 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3487 global_avg_mtp_loss: 2.5661 +[titan] 2025-10-05 00:45:18,463 - root - INFO - lr: 4.9305e-05 gnorm: 1.24 [ 2:11:09<22:30:56] +[titan] 2025-10-05 00:45:29,403 - root - INFO - step: 3545 loss: 2.9166 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5665 +[titan] 2025-10-05 00:45:29,403 - root - INFO - lr: 4.9303e-05 gnorm: 1.31 [ 2:11:20<22:30:43] +[titan] 2025-10-05 00:45:38,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:45:40,270 - root - INFO - step: 3550 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5477 +[titan] 2025-10-05 00:45:40,270 - root - INFO - lr: 4.9301e-05 gnorm: 1.20 [ 2:11:31<22:30:29] +[titan] 2025-10-05 00:45:51,156 - root - INFO - step: 3555 loss: 2.8547 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3414 global_avg_mtp_loss: 2.5133 +[titan] 2025-10-05 00:45:51,156 - root - INFO - lr: 4.9298e-05 gnorm: 1.22 [ 2:11:42<22:30:16] +[titan] 2025-10-05 00:46:02,028 - root - INFO - step: 3560 loss: 2.9708 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3553 global_avg_mtp_loss: 2.6154 +[titan] 2025-10-05 00:46:02,029 - root - INFO - lr: 4.9296e-05 gnorm: 1.25 [ 2:11:53<22:30:02] +[titan] 2025-10-05 00:46:12,872 - root - INFO - step: 3565 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3463 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:46:12,873 - root - INFO - lr: 4.9294e-05 gnorm: 1.22 [ 2:12:04<22:29:48] +[titan] 2025-10-05 00:46:23,793 - root - INFO - step: 3570 loss: 2.9591 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3538 global_avg_mtp_loss: 2.6053 +[titan] 2025-10-05 00:46:23,794 - root - INFO - lr: 4.9292e-05 gnorm: 1.26 [ 2:12:15<22:29:35] +[titan] 2025-10-05 00:46:34,664 - root - INFO - step: 3575 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3519 global_avg_mtp_loss: 2.5886 +[titan] 2025-10-05 00:46:34,664 - root - INFO - lr: 4.9290e-05 gnorm: 1.30 [ 2:12:26<22:29:21] +[titan] 2025-10-05 00:46:45,547 - root - INFO - step: 3580 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3464 global_avg_mtp_loss: 2.5473 +[titan] 2025-10-05 00:46:45,548 - root - INFO - lr: 4.9287e-05 gnorm: 1.23 [ 2:12:37<22:29:08] +[titan] 2025-10-05 00:46:54,491 - root - INFO - Dumping profiler traces at step 3584 +[titan] 2025-10-05 00:46:54,530 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:46:56,755 - root - INFO - step: 3585 loss: 2.9232 memory: 118.84GiB(85.28%) tps: 29,238 tflops: 405.64 mfu: 41.02% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5729 +[titan] 2025-10-05 00:46:56,755 - root - INFO - lr: 4.9285e-05 gnorm: 1.28 [ 2:12:48<22:28:58] +[titan] 2025-10-05 00:47:07,619 - root - INFO - step: 3590 loss: 2.9273 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3501 global_avg_mtp_loss: 2.5772 +[titan] 2025-10-05 00:47:07,619 - root - INFO - lr: 4.9283e-05 gnorm: 1.25 [ 2:12:59<22:28:44] +[titan] 2025-10-05 00:47:18,509 - root - INFO - step: 3595 loss: 2.9212 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5720 +[titan] 2025-10-05 00:47:18,509 - root - INFO - lr: 4.9281e-05 gnorm: 1.34 [ 2:13:10<22:28:31] +[titan] 2025-10-05 00:47:27,262 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:47:29,446 - root - INFO - step: 3600 loss: 2.8603 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5188 +[titan] 2025-10-05 00:47:29,447 - root - INFO - lr: 4.9278e-05 gnorm: 1.22 [ 2:13:20<22:28:18] +[titan] 2025-10-05 00:47:40,308 - root - INFO - step: 3605 loss: 2.8618 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5190 +[titan] 2025-10-05 00:47:40,309 - root - INFO - lr: 4.9276e-05 gnorm: 1.19 [ 2:13:31<22:28:04] +[titan] 2025-10-05 00:47:51,174 - root - INFO - step: 3610 loss: 2.9114 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5639 +[titan] 2025-10-05 00:47:51,174 - root - INFO - lr: 4.9274e-05 gnorm: 1.25 [ 2:13:42<22:27:51] +[titan] 2025-10-05 00:48:02,028 - root - INFO - step: 3615 loss: 2.8693 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5265 +[titan] 2025-10-05 00:48:02,028 - root - INFO - lr: 4.9272e-05 gnorm: 1.25 [ 2:13:53<22:27:37] +[titan] 2025-10-05 00:48:12,956 - root - INFO - step: 3620 loss: 2.9829 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3582 global_avg_mtp_loss: 2.6247 +[titan] 2025-10-05 00:48:12,956 - root - INFO - lr: 4.9269e-05 gnorm: 1.25 [ 2:14:04<22:27:24] +[titan] 2025-10-05 00:48:23,914 - root - INFO - step: 3625 loss: 2.9614 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.6084 +[titan] 2025-10-05 00:48:23,914 - root - INFO - lr: 4.9267e-05 gnorm: 1.19 [ 2:14:15<22:27:11] +[titan] 2025-10-05 00:48:34,821 - root - INFO - step: 3630 loss: 2.9416 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3505 global_avg_mtp_loss: 2.5911 +[titan] 2025-10-05 00:48:34,821 - root - INFO - lr: 4.9265e-05 gnorm: 1.30 [ 2:14:26<22:26:58] +[titan] 2025-10-05 00:48:45,728 - root - INFO - step: 3635 loss: 2.8827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5377 +[titan] 2025-10-05 00:48:45,728 - root - INFO - lr: 4.9263e-05 gnorm: 1.22 [ 2:14:37<22:26:45] +[titan] 2025-10-05 00:48:56,629 - root - INFO - step: 3640 loss: 2.8474 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3379 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 00:48:56,629 - root - INFO - lr: 4.9260e-05 gnorm: 1.32 [ 2:14:48<22:26:32] +[titan] 2025-10-05 00:49:07,530 - root - INFO - step: 3645 loss: 2.9298 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5793 +[titan] 2025-10-05 00:49:07,530 - root - INFO - lr: 4.9258e-05 gnorm: 1.31 [ 2:14:59<22:26:19] +[titan] 2025-10-05 00:49:16,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:49:18,488 - root - INFO - step: 3650 loss: 3.0056 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6456 +[titan] 2025-10-05 00:49:18,488 - root - INFO - lr: 4.9256e-05 gnorm: 1.32 [ 2:15:09<22:26:06] +[titan] 2025-10-05 00:49:29,386 - root - INFO - step: 3655 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5347 +[titan] 2025-10-05 00:49:29,387 - root - INFO - lr: 4.9254e-05 gnorm: 1.23 [ 2:15:20<22:25:53] +[titan] 2025-10-05 00:49:40,255 - root - INFO - step: 3660 loss: 2.8748 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5314 +[titan] 2025-10-05 00:49:40,255 - root - INFO - lr: 4.9251e-05 gnorm: 1.26 [ 2:15:31<22:25:39] +[titan] 2025-10-05 00:49:51,122 - root - INFO - step: 3665 loss: 2.9419 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:49:51,122 - root - INFO - lr: 4.9249e-05 gnorm: 1.24 [ 2:15:42<22:25:26] +[titan] 2025-10-05 00:50:01,986 - root - INFO - step: 3670 loss: 2.8845 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5398 +[titan] 2025-10-05 00:50:01,987 - root - INFO - lr: 4.9247e-05 gnorm: 1.29 [ 2:15:53<22:25:12] +[titan] 2025-10-05 00:50:12,850 - root - INFO - step: 3675 loss: 2.8906 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5453 +[titan] 2025-10-05 00:50:12,851 - root - INFO - lr: 4.9244e-05 gnorm: 1.29 [ 2:16:04<22:24:59] +[titan] 2025-10-05 00:50:23,731 - root - INFO - step: 3680 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3632 global_avg_mtp_loss: 2.6609 +[titan] 2025-10-05 00:50:23,731 - root - INFO - lr: 4.9242e-05 gnorm: 1.28 [ 2:16:15<22:24:45] +[titan] 2025-10-05 00:50:34,722 - root - INFO - step: 3685 loss: 2.9110 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.3489 global_avg_mtp_loss: 2.5621 +[titan] 2025-10-05 00:50:34,723 - root - INFO - lr: 4.9240e-05 gnorm: 1.25 [ 2:16:26<22:24:33] +[titan] 2025-10-05 00:50:45,616 - root - INFO - step: 3690 loss: 2.8445 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5064 +[titan] 2025-10-05 00:50:45,616 - root - INFO - lr: 4.9238e-05 gnorm: 1.23 [ 2:16:37<22:24:20] +[titan] 2025-10-05 00:50:56,496 - root - INFO - step: 3695 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3361 global_avg_mtp_loss: 2.4870 +[titan] 2025-10-05 00:50:56,496 - root - INFO - lr: 4.9235e-05 gnorm: 1.19 [ 2:16:47<22:24:07] +[titan] 2025-10-05 00:51:05,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:51:07,381 - root - INFO - step: 3700 loss: 2.8874 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5427 +[titan] 2025-10-05 00:51:07,381 - root - INFO - lr: 4.9233e-05 gnorm: 1.31 [ 2:16:58<22:23:53] +[titan] 2025-10-05 00:51:18,258 - root - INFO - step: 3705 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3441 global_avg_mtp_loss: 2.5425 +[titan] 2025-10-05 00:51:18,258 - root - INFO - lr: 4.9231e-05 gnorm: 1.36 [ 2:17:09<22:23:40] +[titan] 2025-10-05 00:51:29,175 - root - INFO - step: 3710 loss: 2.9115 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3467 global_avg_mtp_loss: 2.5648 +[titan] 2025-10-05 00:51:29,175 - root - INFO - lr: 4.9228e-05 gnorm: 1.27 [ 2:17:20<22:23:27] +[titan] 2025-10-05 00:51:40,064 - root - INFO - step: 3715 loss: 2.9140 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5671 +[titan] 2025-10-05 00:51:40,064 - root - INFO - lr: 4.9226e-05 gnorm: 1.23 [ 2:17:31<22:23:14] +[titan] 2025-10-05 00:51:50,950 - root - INFO - step: 3720 loss: 2.8644 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.5220 +[titan] 2025-10-05 00:51:50,950 - root - INFO - lr: 4.9224e-05 gnorm: 1.28 [ 2:17:42<22:23:00] +[titan] 2025-10-05 00:52:01,826 - root - INFO - step: 3725 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5677 +[titan] 2025-10-05 00:52:01,826 - root - INFO - lr: 4.9221e-05 gnorm: 1.30 [ 2:17:53<22:22:47] +[titan] 2025-10-05 00:52:12,692 - root - INFO - step: 3730 loss: 2.8843 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5392 +[titan] 2025-10-05 00:52:12,692 - root - INFO - lr: 4.9219e-05 gnorm: 1.27 [ 2:18:04<22:22:34] +[titan] 2025-10-05 00:52:23,581 - root - INFO - step: 3735 loss: 2.8622 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3411 global_avg_mtp_loss: 2.5211 +[titan] 2025-10-05 00:52:23,581 - root - INFO - lr: 4.9217e-05 gnorm: 1.29 [ 2:18:15<22:22:20] +[titan] 2025-10-05 00:52:34,507 - root - INFO - step: 3740 loss: 2.8833 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5382 +[titan] 2025-10-05 00:52:34,507 - root - INFO - lr: 4.9214e-05 gnorm: 1.32 [ 2:18:25<22:22:08] +[titan] 2025-10-05 00:52:45,424 - root - INFO - step: 3745 loss: 2.8876 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5439 +[titan] 2025-10-05 00:52:45,424 - root - INFO - lr: 4.9212e-05 gnorm: 1.30 [ 2:18:36<22:21:55] +[titan] 2025-10-05 00:52:54,123 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:52:56,315 - root - INFO - step: 3750 loss: 2.9081 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3472 global_avg_mtp_loss: 2.5609 +[titan] 2025-10-05 00:52:56,315 - root - INFO - lr: 4.9210e-05 gnorm: 1.37 [ 2:18:47<22:21:42] +[titan] 2025-10-05 00:53:07,243 - root - INFO - step: 3755 loss: 2.8797 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 00:53:07,243 - root - INFO - lr: 4.9207e-05 gnorm: 1.27 [ 2:18:58<22:21:29] +[titan] 2025-10-05 00:53:18,154 - root - INFO - step: 3760 loss: 2.8545 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3406 global_avg_mtp_loss: 2.5139 +[titan] 2025-10-05 00:53:18,154 - root - INFO - lr: 4.9205e-05 gnorm: 1.27 [ 2:19:09<22:21:16] +[titan] 2025-10-05 00:53:29,071 - root - INFO - step: 3765 loss: 2.8350 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.4960 +[titan] 2025-10-05 00:53:29,071 - root - INFO - lr: 4.9203e-05 gnorm: 1.27 [ 2:19:20<22:21:03] +[titan] 2025-10-05 00:53:39,977 - root - INFO - step: 3770 loss: 2.8227 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3370 global_avg_mtp_loss: 2.4857 +[titan] 2025-10-05 00:53:39,978 - root - INFO - lr: 4.9200e-05 gnorm: 1.19 [ 2:19:31<22:20:50] +[titan] 2025-10-05 00:53:50,879 - root - INFO - step: 3775 loss: 2.8842 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3430 global_avg_mtp_loss: 2.5411 +[titan] 2025-10-05 00:53:50,879 - root - INFO - lr: 4.9198e-05 gnorm: 1.23 [ 2:19:42<22:20:37] +[titan] 2025-10-05 00:54:01,831 - root - INFO - step: 3780 loss: 2.9375 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5862 +[titan] 2025-10-05 00:54:01,831 - root - INFO - lr: 4.9196e-05 gnorm: 1.20 [ 2:19:53<22:20:24] +[titan] 2025-10-05 00:54:12,711 - root - INFO - step: 3785 loss: 2.8747 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5318 +[titan] 2025-10-05 00:54:12,711 - root - INFO - lr: 4.9193e-05 gnorm: 1.23 [ 2:20:04<22:20:11] +[titan] 2025-10-05 00:54:23,577 - root - INFO - step: 3790 loss: 2.8207 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4833 +[titan] 2025-10-05 00:54:23,577 - root - INFO - lr: 4.9191e-05 gnorm: 1.27 [ 2:20:15<22:19:58] +[titan] 2025-10-05 00:54:34,480 - root - INFO - step: 3795 loss: 2.9584 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3532 global_avg_mtp_loss: 2.6052 +[titan] 2025-10-05 00:54:34,480 - root - INFO - lr: 4.9188e-05 gnorm: 1.29 [ 2:20:25<22:19:45] +[titan] 2025-10-05 00:54:43,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:54:45,353 - root - INFO - step: 3800 loss: 2.9385 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3511 global_avg_mtp_loss: 2.5874 +[titan] 2025-10-05 00:54:45,353 - root - INFO - lr: 4.9186e-05 gnorm: 1.24 [ 2:20:36<22:19:31] +[titan] 2025-10-05 00:54:56,214 - root - INFO - step: 3805 loss: 2.8516 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3400 global_avg_mtp_loss: 2.5116 +[titan] 2025-10-05 00:54:56,214 - root - INFO - lr: 4.9184e-05 gnorm: 1.32 [ 2:20:47<22:19:18] +[titan] 2025-10-05 00:55:07,134 - root - INFO - step: 3810 loss: 2.8608 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5196 +[titan] 2025-10-05 00:55:07,134 - root - INFO - lr: 4.9181e-05 gnorm: 1.27 [ 2:20:58<22:19:05] +[titan] 2025-10-05 00:55:18,019 - root - INFO - step: 3815 loss: 2.9132 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3495 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 00:55:18,019 - root - INFO - lr: 4.9179e-05 gnorm: 1.33 [ 2:21:09<22:18:52] +[titan] 2025-10-05 00:55:28,882 - root - INFO - step: 3820 loss: 2.8903 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3438 global_avg_mtp_loss: 2.5465 +[titan] 2025-10-05 00:55:28,882 - root - INFO - lr: 4.9176e-05 gnorm: 1.28 [ 2:21:20<22:18:39] +[titan] 2025-10-05 00:55:39,765 - root - INFO - step: 3825 loss: 2.8538 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3396 global_avg_mtp_loss: 2.5142 +[titan] 2025-10-05 00:55:39,765 - root - INFO - lr: 4.9174e-05 gnorm: 1.35 [ 2:21:31<22:18:26] +[titan] 2025-10-05 00:55:50,656 - root - INFO - step: 3830 loss: 2.8951 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5503 +[titan] 2025-10-05 00:55:50,656 - root - INFO - lr: 4.9172e-05 gnorm: 1.29 [ 2:21:42<22:18:12] +[titan] 2025-10-05 00:56:01,544 - root - INFO - step: 3835 loss: 2.8701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 00:56:01,544 - root - INFO - lr: 4.9169e-05 gnorm: 1.28 [ 2:21:53<22:17:59] +[titan] 2025-10-05 00:56:12,424 - root - INFO - step: 3840 loss: 2.8980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3459 global_avg_mtp_loss: 2.5521 +[titan] 2025-10-05 00:56:12,424 - root - INFO - lr: 4.9167e-05 gnorm: 1.29 [ 2:22:03<22:17:46] +[titan] 2025-10-05 00:56:23,350 - root - INFO - step: 3845 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:56:23,350 - root - INFO - lr: 4.9164e-05 gnorm: 1.33 [ 2:22:14<22:17:33] +[titan] 2025-10-05 00:56:32,044 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:56:34,236 - root - INFO - step: 3850 loss: 2.8817 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5379 +[titan] 2025-10-05 00:56:34,237 - root - INFO - lr: 4.9162e-05 gnorm: 1.28 [ 2:22:25<22:17:20] +[titan] 2025-10-05 00:56:45,120 - root - INFO - step: 3855 loss: 2.8016 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 00:56:45,120 - root - INFO - lr: 4.9160e-05 gnorm: 1.32 [ 2:22:36<22:17:07] +[titan] 2025-10-05 00:56:56,000 - root - INFO - step: 3860 loss: 2.8851 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 00:56:56,000 - root - INFO - lr: 4.9157e-05 gnorm: 1.29 [ 2:22:47<22:16:54] +[titan] 2025-10-05 00:57:06,896 - root - INFO - step: 3865 loss: 2.8534 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3401 global_avg_mtp_loss: 2.5132 +[titan] 2025-10-05 00:57:06,896 - root - INFO - lr: 4.9155e-05 gnorm: 1.25 [ 2:22:58<22:16:41] +[titan] 2025-10-05 00:57:17,779 - root - INFO - step: 3870 loss: 2.9197 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5707 +[titan] 2025-10-05 00:57:17,779 - root - INFO - lr: 4.9152e-05 gnorm: 1.28 [ 2:23:09<22:16:28] +[titan] 2025-10-05 00:57:28,718 - root - INFO - step: 3875 loss: 2.9466 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.3534 global_avg_mtp_loss: 2.5932 +[titan] 2025-10-05 00:57:28,718 - root - INFO - lr: 4.9150e-05 gnorm: 1.21 [ 2:23:20<22:16:15] +[titan] 2025-10-05 00:57:39,599 - root - INFO - step: 3880 loss: 2.8840 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3444 global_avg_mtp_loss: 2.5396 +[titan] 2025-10-05 00:57:39,600 - root - INFO - lr: 4.9148e-05 gnorm: 1.28 [ 2:23:31<22:16:02] +[titan] 2025-10-05 00:57:50,474 - root - INFO - step: 3885 loss: 2.9370 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3488 global_avg_mtp_loss: 2.5882 +[titan] 2025-10-05 00:57:50,474 - root - INFO - lr: 4.9145e-05 gnorm: 1.25 [ 2:23:41<22:15:49] +[titan] 2025-10-05 00:58:01,351 - root - INFO - step: 3890 loss: 2.9350 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3493 global_avg_mtp_loss: 2.5857 +[titan] 2025-10-05 00:58:01,351 - root - INFO - lr: 4.9143e-05 gnorm: 1.31 [ 2:23:52<22:15:36] +[titan] 2025-10-05 00:58:12,271 - root - INFO - step: 3895 loss: 2.9044 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5592 +[titan] 2025-10-05 00:58:12,271 - root - INFO - lr: 4.9140e-05 gnorm: 1.26 [ 2:24:03<22:15:23] +[titan] 2025-10-05 00:58:20,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:58:23,152 - root - INFO - step: 3900 loss: 2.7993 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 00:58:23,152 - root - INFO - lr: 4.9138e-05 gnorm: 1.27 [ 2:24:14<22:15:10] +[titan] 2025-10-05 00:58:34,070 - root - INFO - step: 3905 loss: 2.9356 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:58:34,070 - root - INFO - lr: 4.9135e-05 gnorm: 1.23 [ 2:24:25<22:14:57] +[titan] 2025-10-05 00:58:44,959 - root - INFO - step: 3910 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3475 global_avg_mtp_loss: 2.5678 +[titan] 2025-10-05 00:58:44,959 - root - INFO - lr: 4.9133e-05 gnorm: 1.26 [ 2:24:36<22:14:44] +[titan] 2025-10-05 00:58:55,830 - root - INFO - step: 3915 loss: 2.8401 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5019 +[titan] 2025-10-05 00:58:55,830 - root - INFO - lr: 4.9130e-05 gnorm: 1.23 [ 2:24:47<22:14:31] +[titan] 2025-10-05 00:59:06,689 - root - INFO - step: 3920 loss: 2.9547 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3597 global_avg_mtp_loss: 2.5950 +[titan] 2025-10-05 00:59:06,690 - root - INFO - lr: 4.9128e-05 gnorm: 1.24 [ 2:24:58<22:14:18] +[titan] 2025-10-05 00:59:17,583 - root - INFO - step: 3925 loss: 2.9231 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3478 global_avg_mtp_loss: 2.5753 +[titan] 2025-10-05 00:59:17,584 - root - INFO - lr: 4.9125e-05 gnorm: 1.29 [ 2:25:09<22:14:05] +[titan] 2025-10-05 00:59:28,459 - root - INFO - step: 3930 loss: 2.8642 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5227 +[titan] 2025-10-05 00:59:28,459 - root - INFO - lr: 4.9123e-05 gnorm: 1.29 [ 2:25:19<22:13:52] +[titan] 2025-10-05 00:59:39,392 - root - INFO - step: 3935 loss: 2.8806 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 00:59:39,393 - root - INFO - lr: 4.9121e-05 gnorm: 1.31 [ 2:25:30<22:13:39] +[titan] 2025-10-05 00:59:50,302 - root - INFO - step: 3940 loss: 2.9187 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.3484 global_avg_mtp_loss: 2.5703 +[titan] 2025-10-05 00:59:50,302 - root - INFO - lr: 4.9118e-05 gnorm: 1.23 [ 2:25:41<22:13:27] +[titan] 2025-10-05 01:00:01,171 - root - INFO - step: 3945 loss: 2.8435 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:00:01,172 - root - INFO - lr: 4.9116e-05 gnorm: 1.25 [ 2:25:52<22:13:13] +[titan] 2025-10-05 01:00:09,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:00:12,051 - root - INFO - step: 3950 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5446 +[titan] 2025-10-05 01:00:12,051 - root - INFO - lr: 4.9113e-05 gnorm: 1.27 [ 2:26:03<22:13:00] +[titan] 2025-10-05 01:00:22,938 - root - INFO - step: 3955 loss: 2.8946 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5492 +[titan] 2025-10-05 01:00:22,938 - root - INFO - lr: 4.9111e-05 gnorm: 1.31 [ 2:26:14<22:12:47] +[titan] 2025-10-05 01:00:33,863 - root - INFO - step: 3960 loss: 2.9358 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5868 +[titan] 2025-10-05 01:00:33,863 - root - INFO - lr: 4.9108e-05 gnorm: 1.30 [ 2:26:25<22:12:35] +[titan] 2025-10-05 01:00:44,742 - root - INFO - step: 3965 loss: 2.8537 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3415 global_avg_mtp_loss: 2.5123 +[titan] 2025-10-05 01:00:44,743 - root - INFO - lr: 4.9106e-05 gnorm: 1.24 [ 2:26:36<22:12:22] +[titan] 2025-10-05 01:00:55,669 - root - INFO - step: 3970 loss: 2.8697 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5257 +[titan] 2025-10-05 01:00:55,670 - root - INFO - lr: 4.9103e-05 gnorm: 1.26 [ 2:26:47<22:12:09] +[titan] 2025-10-05 01:01:06,531 - root - INFO - step: 3975 loss: 2.8184 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4844 +[titan] 2025-10-05 01:01:06,531 - root - INFO - lr: 4.9101e-05 gnorm: 1.26 [ 2:26:57<22:11:56] +[titan] 2025-10-05 01:01:17,435 - root - INFO - step: 3980 loss: 2.8685 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5264 +[titan] 2025-10-05 01:01:17,435 - root - INFO - lr: 4.9098e-05 gnorm: 1.32 [ 2:27:08<22:11:43] +[titan] 2025-10-05 01:01:28,313 - root - INFO - step: 3985 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3312 global_avg_mtp_loss: 2.4498 +[titan] 2025-10-05 01:01:28,313 - root - INFO - lr: 4.9096e-05 gnorm: 1.26 [ 2:27:19<22:11:30] +[titan] 2025-10-05 01:01:39,229 - root - INFO - step: 3990 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3337 global_avg_mtp_loss: 2.4610 +[titan] 2025-10-05 01:01:39,229 - root - INFO - lr: 4.9093e-05 gnorm: 1.30 [ 2:27:30<22:11:17] +[titan] 2025-10-05 01:01:50,092 - root - INFO - step: 3995 loss: 2.7943 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3322 global_avg_mtp_loss: 2.4621 +[titan] 2025-10-05 01:01:50,092 - root - INFO - lr: 4.9091e-05 gnorm: 1.21 [ 2:27:41<22:11:04] +[titan] 2025-10-05 01:01:58,771 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:02:00,958 - root - INFO - step: 4000 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5902 +[titan] 2025-10-05 01:02:00,958 - root - INFO - lr: 4.9088e-05 gnorm: 1.27 [ 2:27:52<22:10:51] +[titan] 2025-10-05 01:02:11,850 - root - INFO - step: 4005 loss: 2.8699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5271 +[titan] 2025-10-05 01:02:11,850 - root - INFO - lr: 4.9086e-05 gnorm: 1.29 [ 2:28:03<22:10:38] +[titan] 2025-10-05 01:02:22,761 - root - INFO - step: 4010 loss: 2.8862 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 01:02:22,761 - root - INFO - lr: 4.9083e-05 gnorm: 1.23 [ 2:28:14<22:10:26] +[titan] 2025-10-05 01:02:33,616 - root - INFO - step: 4015 loss: 2.8251 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.4858 +[titan] 2025-10-05 01:02:33,616 - root - INFO - lr: 4.9081e-05 gnorm: 1.23 [ 2:28:25<22:10:12] +[titan] 2025-10-05 01:02:44,524 - root - INFO - step: 4020 loss: 2.8756 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5328 +[titan] 2025-10-05 01:02:44,525 - root - INFO - lr: 4.9078e-05 gnorm: 1.23 [ 2:28:35<22:10:00] +[titan] 2025-10-05 01:02:55,396 - root - INFO - step: 4025 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3363 global_avg_mtp_loss: 2.4949 +[titan] 2025-10-05 01:02:55,396 - root - INFO - lr: 4.9076e-05 gnorm: 1.22 [ 2:28:46<22:09:47] +[titan] 2025-10-05 01:03:06,265 - root - INFO - step: 4030 loss: 2.8674 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5248 +[titan] 2025-10-05 01:03:06,265 - root - INFO - lr: 4.9073e-05 gnorm: 1.24 [ 2:28:57<22:09:34] +[titan] 2025-10-05 01:03:17,168 - root - INFO - step: 4035 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:03:17,168 - root - INFO - lr: 4.9070e-05 gnorm: 1.29 [ 2:29:08<22:09:21] +[titan] 2025-10-05 01:03:28,097 - root - INFO - step: 4040 loss: 2.8057 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3338 global_avg_mtp_loss: 2.4719 +[titan] 2025-10-05 01:03:28,098 - root - INFO - lr: 4.9068e-05 gnorm: 1.23 [ 2:29:19<22:09:08] +[titan] 2025-10-05 01:03:39,019 - root - INFO - step: 4045 loss: 2.8686 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5273 +[titan] 2025-10-05 01:03:39,019 - root - INFO - lr: 4.9065e-05 gnorm: 1.33 [ 2:29:30<22:08:56] +[titan] 2025-10-05 01:03:47,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:03:49,927 - root - INFO - step: 4050 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5088 +[titan] 2025-10-05 01:03:49,927 - root - INFO - lr: 4.9063e-05 gnorm: 1.25 [ 2:29:41<22:08:43] +[titan] 2025-10-05 01:04:00,828 - root - INFO - step: 4055 loss: 2.8040 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4693 +[titan] 2025-10-05 01:04:00,828 - root - INFO - lr: 4.9060e-05 gnorm: 1.23 [ 2:29:52<22:08:30] +[titan] 2025-10-05 01:04:11,717 - root - INFO - step: 4060 loss: 2.8008 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4673 +[titan] 2025-10-05 01:04:11,717 - root - INFO - lr: 4.9058e-05 gnorm: 1.27 [ 2:30:03<22:08:17] +[titan] 2025-10-05 01:04:22,649 - root - INFO - step: 4065 loss: 2.8860 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5433 +[titan] 2025-10-05 01:04:22,649 - root - INFO - lr: 4.9055e-05 gnorm: 1.27 [ 2:30:14<22:08:05] +[titan] 2025-10-05 01:04:33,534 - root - INFO - step: 4070 loss: 2.8482 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.5092 +[titan] 2025-10-05 01:04:33,534 - root - INFO - lr: 4.9053e-05 gnorm: 1.28 [ 2:30:24<22:07:52] +[titan] 2025-10-05 01:04:44,493 - root - INFO - step: 4075 loss: 2.7243 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.3989 +[titan] 2025-10-05 01:04:44,493 - root - INFO - lr: 4.9050e-05 gnorm: 1.28 [ 2:30:35<22:07:40] +[titan] 2025-10-05 01:04:55,369 - root - INFO - step: 4080 loss: 2.9124 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5655 +[titan] 2025-10-05 01:04:55,370 - root - INFO - lr: 4.9047e-05 gnorm: 1.24 [ 2:30:46<22:07:27] +[titan] 2025-10-05 01:05:06,228 - root - INFO - step: 4085 loss: 2.8731 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 01:05:06,228 - root - INFO - lr: 4.9045e-05 gnorm: 1.27 [ 2:30:57<22:07:14] +[titan] 2025-10-05 01:05:17,102 - root - INFO - step: 4090 loss: 2.7997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4672 +[titan] 2025-10-05 01:05:17,102 - root - INFO - lr: 4.9042e-05 gnorm: 1.28 [ 2:31:08<22:07:01] +[titan] 2025-10-05 01:05:28,060 - root - INFO - step: 4095 loss: 2.9035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5587 +[titan] 2025-10-05 01:05:28,060 - root - INFO - lr: 4.9040e-05 gnorm: 1.23 [ 2:31:19<22:06:49] +[titan] 2025-10-05 01:05:30,410 - root - INFO - Dumping profiler traces at step 4096 +[titan] 2025-10-05 01:05:30,448 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:05:37,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:05:39,213 - root - INFO - step: 4100 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 29,380 tflops: 407.60 mfu: 41.21% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4763 +[titan] 2025-10-05 01:05:39,213 - root - INFO - lr: 4.9037e-05 gnorm: 1.29 [ 2:31:30<22:06:38] +[titan] 2025-10-05 01:05:50,104 - root - INFO - step: 4105 loss: 2.8434 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5046 +[titan] 2025-10-05 01:05:50,104 - root - INFO - lr: 4.9035e-05 gnorm: 1.25 [ 2:31:41<22:06:25] +[titan] 2025-10-05 01:06:00,954 - root - INFO - step: 4110 loss: 2.8513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3410 global_avg_mtp_loss: 2.5103 +[titan] 2025-10-05 01:06:00,954 - root - INFO - lr: 4.9032e-05 gnorm: 1.30 [ 2:31:52<22:06:12] +[titan] 2025-10-05 01:06:11,792 - root - INFO - step: 4115 loss: 2.8687 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 01:06:11,793 - root - INFO - lr: 4.9029e-05 gnorm: 1.28 [ 2:32:03<22:05:59] +[titan] 2025-10-05 01:06:22,672 - root - INFO - step: 4120 loss: 2.7381 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3273 global_avg_mtp_loss: 2.4108 +[titan] 2025-10-05 01:06:22,673 - root - INFO - lr: 4.9027e-05 gnorm: 1.20 [ 2:32:14<22:05:46] +[titan] 2025-10-05 01:06:33,541 - root - INFO - step: 4125 loss: 2.8811 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 01:06:33,541 - root - INFO - lr: 4.9024e-05 gnorm: 1.27 [ 2:32:24<22:05:33] +[titan] 2025-10-05 01:06:44,458 - root - INFO - step: 4130 loss: 2.7955 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3316 global_avg_mtp_loss: 2.4639 +[titan] 2025-10-05 01:06:44,459 - root - INFO - lr: 4.9022e-05 gnorm: 1.22 [ 2:32:35<22:05:21] +[titan] 2025-10-05 01:06:55,338 - root - INFO - step: 4135 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3357 global_avg_mtp_loss: 2.4937 +[titan] 2025-10-05 01:06:55,338 - root - INFO - lr: 4.9019e-05 gnorm: 1.26 [ 2:32:46<22:05:08] +[titan] 2025-10-05 01:07:06,209 - root - INFO - step: 4140 loss: 2.8211 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3351 global_avg_mtp_loss: 2.4860 +[titan] 2025-10-05 01:07:06,209 - root - INFO - lr: 4.9016e-05 gnorm: 1.23 [ 2:32:57<22:04:55] +[titan] 2025-10-05 01:07:17,116 - root - INFO - step: 4145 loss: 2.7757 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4442 +[titan] 2025-10-05 01:07:17,116 - root - INFO - lr: 4.9014e-05 gnorm: 1.33 [ 2:33:08<22:04:42] +[titan] 2025-10-05 01:07:25,818 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:07:28,011 - root - INFO - step: 4150 loss: 2.8404 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.5032 +[titan] 2025-10-05 01:07:28,012 - root - INFO - lr: 4.9011e-05 gnorm: 1.29 [ 2:33:19<22:04:29] +[titan] 2025-10-05 01:07:38,919 - root - INFO - step: 4155 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5380 +[titan] 2025-10-05 01:07:38,919 - root - INFO - lr: 4.9009e-05 gnorm: 1.22 [ 2:33:30<22:04:17] +[titan] 2025-10-05 01:07:49,794 - root - INFO - step: 4160 loss: 2.8305 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3386 global_avg_mtp_loss: 2.4919 +[titan] 2025-10-05 01:07:49,794 - root - INFO - lr: 4.9006e-05 gnorm: 1.23 [ 2:33:41<22:04:04] +[titan] 2025-10-05 01:08:00,715 - root - INFO - step: 4165 loss: 2.7568 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4286 +[titan] 2025-10-05 01:08:00,715 - root - INFO - lr: 4.9003e-05 gnorm: 1.22 [ 2:33:52<22:03:51] +[titan] 2025-10-05 01:08:11,575 - root - INFO - step: 4170 loss: 2.8449 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.3395 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:08:11,575 - root - INFO - lr: 4.9001e-05 gnorm: 1.22 [ 2:34:03<22:03:38] +[titan] 2025-10-05 01:08:22,448 - root - INFO - step: 4175 loss: 2.8005 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3330 global_avg_mtp_loss: 2.4675 +[titan] 2025-10-05 01:08:22,448 - root - INFO - lr: 4.8998e-05 gnorm: 1.22 [ 2:34:13<22:03:26] +[titan] 2025-10-05 01:08:33,314 - root - INFO - step: 4180 loss: 2.7794 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4468 +[titan] 2025-10-05 01:08:33,314 - root - INFO - lr: 4.8995e-05 gnorm: 1.18 [ 2:34:24<22:03:13] +[titan] 2025-10-05 01:08:44,215 - root - INFO - step: 4185 loss: 2.8110 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3342 global_avg_mtp_loss: 2.4768 +[titan] 2025-10-05 01:08:44,215 - root - INFO - lr: 4.8993e-05 gnorm: 1.25 [ 2:34:35<22:03:00] +[titan] 2025-10-05 01:08:55,079 - root - INFO - step: 4190 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4689 +[titan] 2025-10-05 01:08:55,079 - root - INFO - lr: 4.8990e-05 gnorm: 1.20 [ 2:34:46<22:02:47] +[titan] 2025-10-05 01:09:05,968 - root - INFO - step: 4195 loss: 2.7893 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3318 global_avg_mtp_loss: 2.4575 +[titan] 2025-10-05 01:09:05,968 - root - INFO - lr: 4.8987e-05 gnorm: 1.27 [ 2:34:57<22:02:34] +[titan] 2025-10-05 01:09:14,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:09:16,867 - root - INFO - step: 4200 loss: 2.8001 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 01:09:16,867 - root - INFO - lr: 4.8985e-05 gnorm: 1.37 [ 2:35:08<22:02:22] +[titan] 2025-10-05 01:09:27,758 - root - INFO - step: 4205 loss: 2.8414 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5036 +[titan] 2025-10-05 01:09:27,758 - root - INFO - lr: 4.8982e-05 gnorm: 1.27 [ 2:35:19<22:02:09] +[titan] 2025-10-05 01:09:38,614 - root - INFO - step: 4210 loss: 2.8082 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4739 +[titan] 2025-10-05 01:09:38,614 - root - INFO - lr: 4.8980e-05 gnorm: 1.21 [ 2:35:30<22:01:56] +[titan] 2025-10-05 01:09:49,535 - root - INFO - step: 4215 loss: 2.8257 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4886 +[titan] 2025-10-05 01:09:49,535 - root - INFO - lr: 4.8977e-05 gnorm: 1.25 [ 2:35:40<22:01:44] +[titan] 2025-10-05 01:10:00,451 - root - INFO - step: 4220 loss: 2.8238 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3391 global_avg_mtp_loss: 2.4847 +[titan] 2025-10-05 01:10:00,451 - root - INFO - lr: 4.8974e-05 gnorm: 1.27 [ 2:35:51<22:01:31] +[titan] 2025-10-05 01:10:11,409 - root - INFO - step: 4225 loss: 2.7720 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4420 +[titan] 2025-10-05 01:10:11,409 - root - INFO - lr: 4.8972e-05 gnorm: 1.25 [ 2:36:02<22:01:19] +[titan] 2025-10-05 01:10:22,330 - root - INFO - step: 4230 loss: 2.8335 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3375 global_avg_mtp_loss: 2.4961 +[titan] 2025-10-05 01:10:22,330 - root - INFO - lr: 4.8969e-05 gnorm: 1.22 [ 2:36:13<22:01:07] +[titan] 2025-10-05 01:10:33,205 - root - INFO - step: 4235 loss: 2.9402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5879 +[titan] 2025-10-05 01:10:33,205 - root - INFO - lr: 4.8966e-05 gnorm: 1.26 [ 2:36:24<22:00:54] +[titan] 2025-10-05 01:10:44,111 - root - INFO - step: 4240 loss: 2.8115 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4770 +[titan] 2025-10-05 01:10:44,111 - root - INFO - lr: 4.8964e-05 gnorm: 1.23 [ 2:36:35<22:00:41] +[titan] 2025-10-05 01:10:54,992 - root - INFO - step: 4245 loss: 2.7621 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4328 +[titan] 2025-10-05 01:10:54,993 - root - INFO - lr: 4.8961e-05 gnorm: 1.25 [ 2:36:46<22:00:28] +[titan] 2025-10-05 01:11:03,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:11:05,860 - root - INFO - step: 4250 loss: 2.7919 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:11:05,860 - root - INFO - lr: 4.8958e-05 gnorm: 1.34 [ 2:36:57<22:00:16] +[titan] 2025-10-05 01:11:16,750 - root - INFO - step: 4255 loss: 2.8769 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 01:11:16,750 - root - INFO - lr: 4.8955e-05 gnorm: 1.23 [ 2:37:08<22:00:03] +[titan] 2025-10-05 01:11:27,682 - root - INFO - step: 4260 loss: 2.8447 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5059 +[titan] 2025-10-05 01:11:27,682 - root - INFO - lr: 4.8953e-05 gnorm: 1.29 [ 2:37:19<21:59:51] +[titan] 2025-10-05 01:11:38,566 - root - INFO - step: 4265 loss: 2.8553 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3402 global_avg_mtp_loss: 2.5151 +[titan] 2025-10-05 01:11:38,566 - root - INFO - lr: 4.8950e-05 gnorm: 1.28 [ 2:37:29<21:59:38] +[titan] 2025-10-05 01:11:49,489 - root - INFO - step: 4270 loss: 2.8265 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:11:49,489 - root - INFO - lr: 4.8947e-05 gnorm: 1.23 [ 2:37:40<21:59:25] +[titan] 2025-10-05 01:12:00,379 - root - INFO - step: 4275 loss: 2.7626 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3291 global_avg_mtp_loss: 2.4335 +[titan] 2025-10-05 01:12:00,379 - root - INFO - lr: 4.8945e-05 gnorm: 1.23 [ 2:37:51<21:59:13] +[titan] 2025-10-05 01:12:11,266 - root - INFO - step: 4280 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4955 +[titan] 2025-10-05 01:12:11,266 - root - INFO - lr: 4.8942e-05 gnorm: 1.25 [ 2:38:02<21:59:00] +[titan] 2025-10-05 01:12:22,135 - root - INFO - step: 4285 loss: 2.8353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3380 global_avg_mtp_loss: 2.4973 +[titan] 2025-10-05 01:12:22,135 - root - INFO - lr: 4.8939e-05 gnorm: 1.27 [ 2:38:13<21:58:47] +[titan] 2025-10-05 01:12:33,063 - root - INFO - step: 4290 loss: 2.7796 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4469 +[titan] 2025-10-05 01:12:33,063 - root - INFO - lr: 4.8937e-05 gnorm: 1.31 [ 2:38:24<21:58:35] +[titan] 2025-10-05 01:12:43,959 - root - INFO - step: 4295 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4492 +[titan] 2025-10-05 01:12:43,959 - root - INFO - lr: 4.8934e-05 gnorm: 1.37 [ 2:38:35<21:58:22] +[titan] 2025-10-05 01:12:52,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:12:54,832 - root - INFO - step: 4300 loss: 2.9113 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 01:12:54,832 - root - INFO - lr: 4.8931e-05 gnorm: 1.32 [ 2:38:46<21:58:10] +[titan] 2025-10-05 01:13:05,696 - root - INFO - step: 4305 loss: 2.8427 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:13:05,696 - root - INFO - lr: 4.8928e-05 gnorm: 1.29 [ 2:38:57<21:57:57] +[titan] 2025-10-05 01:13:16,559 - root - INFO - step: 4310 loss: 2.8552 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5127 +[titan] 2025-10-05 01:13:16,559 - root - INFO - lr: 4.8926e-05 gnorm: 1.25 [ 2:39:07<21:57:44] +[titan] 2025-10-05 01:13:27,434 - root - INFO - step: 4315 loss: 2.7587 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:13:27,434 - root - INFO - lr: 4.8923e-05 gnorm: 1.28 [ 2:39:18<21:57:31] +[titan] 2025-10-05 01:13:38,295 - root - INFO - step: 4320 loss: 2.8361 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3373 global_avg_mtp_loss: 2.4988 +[titan] 2025-10-05 01:13:38,295 - root - INFO - lr: 4.8920e-05 gnorm: 1.33 [ 2:39:29<21:57:18] +[titan] 2025-10-05 01:13:49,212 - root - INFO - step: 4325 loss: 2.8809 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5381 +[titan] 2025-10-05 01:13:49,212 - root - INFO - lr: 4.8918e-05 gnorm: 1.32 [ 2:39:40<21:57:06] +[titan] 2025-10-05 01:14:00,073 - root - INFO - step: 4330 loss: 2.8655 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5232 +[titan] 2025-10-05 01:14:00,073 - root - INFO - lr: 4.8915e-05 gnorm: 1.25 [ 2:39:51<21:56:53] +[titan] 2025-10-05 01:14:10,949 - root - INFO - step: 4335 loss: 2.8077 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4742 +[titan] 2025-10-05 01:14:10,949 - root - INFO - lr: 4.8912e-05 gnorm: 1.25 [ 2:40:02<21:56:40] +[titan] 2025-10-05 01:14:21,868 - root - INFO - step: 4340 loss: 2.8223 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3346 global_avg_mtp_loss: 2.4877 +[titan] 2025-10-05 01:14:21,868 - root - INFO - lr: 4.8909e-05 gnorm: 1.21 [ 2:40:13<21:56:28] +[titan] 2025-10-05 01:14:32,754 - root - INFO - step: 4345 loss: 2.8555 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3431 global_avg_mtp_loss: 2.5124 +[titan] 2025-10-05 01:14:32,754 - root - INFO - lr: 4.8907e-05 gnorm: 1.26 [ 2:40:24<21:56:15] +[titan] 2025-10-05 01:14:41,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:14:43,631 - root - INFO - step: 4350 loss: 2.7309 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 01:14:43,631 - root - INFO - lr: 4.8904e-05 gnorm: 1.21 [ 2:40:35<21:56:03] +[titan] 2025-10-05 01:14:54,554 - root - INFO - step: 4355 loss: 2.7817 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:14:54,555 - root - INFO - lr: 4.8901e-05 gnorm: 1.31 [ 2:40:45<21:55:50] +[titan] 2025-10-05 01:15:05,471 - root - INFO - step: 4360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:15:05,471 - root - INFO - lr: 4.8898e-05 gnorm: 1.18 [ 2:40:56<21:55:38] +[titan] 2025-10-05 01:15:16,353 - root - INFO - step: 4365 loss: 2.7543 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3265 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:15:16,353 - root - INFO - lr: 4.8896e-05 gnorm: 1.34 [ 2:41:07<21:55:25] +[titan] 2025-10-05 01:15:27,221 - root - INFO - step: 4370 loss: 2.8151 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3349 global_avg_mtp_loss: 2.4802 +[titan] 2025-10-05 01:15:27,222 - root - INFO - lr: 4.8893e-05 gnorm: 1.33 [ 2:41:18<21:55:13] +[titan] 2025-10-05 01:15:38,092 - root - INFO - step: 4375 loss: 2.8402 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:15:38,092 - root - INFO - lr: 4.8890e-05 gnorm: 1.24 [ 2:41:29<21:55:00] +[titan] 2025-10-05 01:15:48,973 - root - INFO - step: 4380 loss: 2.7636 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4347 +[titan] 2025-10-05 01:15:48,973 - root - INFO - lr: 4.8887e-05 gnorm: 1.28 [ 2:41:40<21:54:47] +[titan] 2025-10-05 01:15:59,862 - root - INFO - step: 4385 loss: 2.7822 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4512 +[titan] 2025-10-05 01:15:59,863 - root - INFO - lr: 4.8884e-05 gnorm: 1.22 [ 2:41:51<21:54:35] +[titan] 2025-10-05 01:16:10,768 - root - INFO - step: 4390 loss: 2.8774 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5334 +[titan] 2025-10-05 01:16:10,768 - root - INFO - lr: 4.8882e-05 gnorm: 1.31 [ 2:42:02<21:54:22] +[titan] 2025-10-05 01:16:21,633 - root - INFO - step: 4395 loss: 2.7736 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4446 +[titan] 2025-10-05 01:16:21,633 - root - INFO - lr: 4.8879e-05 gnorm: 1.27 [ 2:42:13<21:54:09] +[titan] 2025-10-05 01:16:30,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:16:32,515 - root - INFO - step: 4400 loss: 2.8412 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5024 +[titan] 2025-10-05 01:16:32,515 - root - INFO - lr: 4.8876e-05 gnorm: 1.24 [ 2:42:23<21:53:57] +[titan] 2025-10-05 01:16:43,378 - root - INFO - step: 4405 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4765 +[titan] 2025-10-05 01:16:43,378 - root - INFO - lr: 4.8873e-05 gnorm: 1.25 [ 2:42:34<21:53:44] +[titan] 2025-10-05 01:16:54,311 - root - INFO - step: 4410 loss: 2.7984 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3341 global_avg_mtp_loss: 2.4642 +[titan] 2025-10-05 01:16:54,312 - root - INFO - lr: 4.8871e-05 gnorm: 1.22 [ 2:42:45<21:53:32] +[titan] 2025-10-05 01:17:05,164 - root - INFO - step: 4415 loss: 2.7761 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3306 global_avg_mtp_loss: 2.4455 +[titan] 2025-10-05 01:17:05,164 - root - INFO - lr: 4.8868e-05 gnorm: 1.24 [ 2:42:56<21:53:19] +[titan] 2025-10-05 01:17:16,059 - root - INFO - step: 4420 loss: 2.8777 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5351 +[titan] 2025-10-05 01:17:16,059 - root - INFO - lr: 4.8865e-05 gnorm: 1.27 [ 2:43:07<21:53:06] +[titan] 2025-10-05 01:17:26,943 - root - INFO - step: 4425 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4104 +[titan] 2025-10-05 01:17:26,943 - root - INFO - lr: 4.8862e-05 gnorm: 1.25 [ 2:43:18<21:52:54] +[titan] 2025-10-05 01:17:37,810 - root - INFO - step: 4430 loss: 2.8315 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:17:37,810 - root - INFO - lr: 4.8859e-05 gnorm: 1.24 [ 2:43:29<21:52:41] +[titan] 2025-10-05 01:17:48,674 - root - INFO - step: 4435 loss: 2.7874 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4564 +[titan] 2025-10-05 01:17:48,674 - root - INFO - lr: 4.8857e-05 gnorm: 1.29 [ 2:43:40<21:52:28] +[titan] 2025-10-05 01:17:59,549 - root - INFO - step: 4440 loss: 2.7652 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4365 +[titan] 2025-10-05 01:17:59,549 - root - INFO - lr: 4.8854e-05 gnorm: 1.25 [ 2:43:50<21:52:16] +[titan] 2025-10-05 01:18:10,464 - root - INFO - step: 4445 loss: 2.7634 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4344 +[titan] 2025-10-05 01:18:10,464 - root - INFO - lr: 4.8851e-05 gnorm: 1.21 [ 2:44:01<21:52:03] +[titan] 2025-10-05 01:18:19,181 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:18:21,402 - root - INFO - step: 4450 loss: 2.8198 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.3358 global_avg_mtp_loss: 2.4839 +[titan] 2025-10-05 01:18:21,402 - root - INFO - lr: 4.8848e-05 gnorm: 1.25 [ 2:44:12<21:51:51] +[titan] 2025-10-05 01:18:32,290 - root - INFO - step: 4455 loss: 2.8002 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4666 +[titan] 2025-10-05 01:18:32,290 - root - INFO - lr: 4.8845e-05 gnorm: 1.21 [ 2:44:23<21:51:39] +[titan] 2025-10-05 01:18:43,182 - root - INFO - step: 4460 loss: 2.7924 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:18:43,183 - root - INFO - lr: 4.8842e-05 gnorm: 1.17 [ 2:44:34<21:51:26] +[titan] 2025-10-05 01:18:54,107 - root - INFO - step: 4465 loss: 2.8210 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3364 global_avg_mtp_loss: 2.4846 +[titan] 2025-10-05 01:18:54,107 - root - INFO - lr: 4.8840e-05 gnorm: 1.23 [ 2:44:45<21:51:14] +[titan] 2025-10-05 01:19:04,974 - root - INFO - step: 4470 loss: 2.7913 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4596 +[titan] 2025-10-05 01:19:04,974 - root - INFO - lr: 4.8837e-05 gnorm: 1.21 [ 2:44:56<21:51:01] +[titan] 2025-10-05 01:19:15,845 - root - INFO - step: 4475 loss: 2.8258 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3360 global_avg_mtp_loss: 2.4898 +[titan] 2025-10-05 01:19:15,846 - root - INFO - lr: 4.8834e-05 gnorm: 1.28 [ 2:45:07<21:50:49] +[titan] 2025-10-05 01:19:26,715 - root - INFO - step: 4480 loss: 2.7821 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:19:26,715 - root - INFO - lr: 4.8831e-05 gnorm: 1.29 [ 2:45:18<21:50:36] +[titan] 2025-10-05 01:19:37,611 - root - INFO - step: 4485 loss: 2.8154 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4798 +[titan] 2025-10-05 01:19:37,611 - root - INFO - lr: 4.8828e-05 gnorm: 1.28 [ 2:45:29<21:50:24] +[titan] 2025-10-05 01:19:48,473 - root - INFO - step: 4490 loss: 2.7910 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3319 global_avg_mtp_loss: 2.4591 +[titan] 2025-10-05 01:19:48,474 - root - INFO - lr: 4.8825e-05 gnorm: 1.39 [ 2:45:39<21:50:11] +[titan] 2025-10-05 01:19:59,363 - root - INFO - step: 4495 loss: 2.7586 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4304 +[titan] 2025-10-05 01:19:59,363 - root - INFO - lr: 4.8823e-05 gnorm: 1.26 [ 2:45:50<21:49:58] +[titan] 2025-10-05 01:20:08,037 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:20:10,224 - root - INFO - step: 4500 loss: 2.8484 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.5091 +[titan] 2025-10-05 01:20:10,224 - root - INFO - lr: 4.8820e-05 gnorm: 1.25 [ 2:46:01<21:49:46] +[titan] 2025-10-05 01:20:21,077 - root - INFO - step: 4505 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3816 +[titan] 2025-10-05 01:20:21,077 - root - INFO - lr: 4.8817e-05 gnorm: 1.25 [ 2:46:12<21:49:33] +[titan] 2025-10-05 01:20:31,932 - root - INFO - step: 4510 loss: 2.8270 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3376 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:20:31,932 - root - INFO - lr: 4.8814e-05 gnorm: 1.26 [ 2:46:23<21:49:20] +[titan] 2025-10-05 01:20:42,845 - root - INFO - step: 4515 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4207 +[titan] 2025-10-05 01:20:42,845 - root - INFO - lr: 4.8811e-05 gnorm: 1.20 [ 2:46:34<21:49:08] +[titan] 2025-10-05 01:20:53,800 - root - INFO - step: 4520 loss: 2.8244 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4888 +[titan] 2025-10-05 01:20:53,800 - root - INFO - lr: 4.8808e-05 gnorm: 1.37 [ 2:46:45<21:48:56] +[titan] 2025-10-05 01:21:04,708 - root - INFO - step: 4525 loss: 2.7186 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3964 +[titan] 2025-10-05 01:21:04,708 - root - INFO - lr: 4.8805e-05 gnorm: 1.27 [ 2:46:56<21:48:44] +[titan] 2025-10-05 01:21:15,602 - root - INFO - step: 4530 loss: 2.7206 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3236 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:21:15,602 - root - INFO - lr: 4.8803e-05 gnorm: 1.23 [ 2:47:07<21:48:31] +[titan] 2025-10-05 01:21:26,498 - root - INFO - step: 4535 loss: 2.7518 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3249 global_avg_mtp_loss: 2.4269 +[titan] 2025-10-05 01:21:26,498 - root - INFO - lr: 4.8800e-05 gnorm: 1.35 [ 2:47:17<21:48:19] +[titan] 2025-10-05 01:21:37,376 - root - INFO - step: 4540 loss: 2.7814 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3301 global_avg_mtp_loss: 2.4513 +[titan] 2025-10-05 01:21:37,376 - root - INFO - lr: 4.8797e-05 gnorm: 1.23 [ 2:47:28<21:48:06] +[titan] 2025-10-05 01:21:48,331 - root - INFO - step: 4545 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4487 +[titan] 2025-10-05 01:21:48,331 - root - INFO - lr: 4.8794e-05 gnorm: 1.25 [ 2:47:39<21:47:54] +[titan] 2025-10-05 01:21:57,058 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:21:59,248 - root - INFO - step: 4550 loss: 2.8483 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 01:21:59,249 - root - INFO - lr: 4.8791e-05 gnorm: 1.26 [ 2:47:50<21:47:42] +[titan] 2025-10-05 01:22:10,102 - root - INFO - step: 4555 loss: 2.7389 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.4138 +[titan] 2025-10-05 01:22:10,103 - root - INFO - lr: 4.8788e-05 gnorm: 1.20 [ 2:48:01<21:47:29] +[titan] 2025-10-05 01:22:20,974 - root - INFO - step: 4560 loss: 2.7847 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:22:20,974 - root - INFO - lr: 4.8785e-05 gnorm: 1.21 [ 2:48:12<21:47:17] +[titan] 2025-10-05 01:22:31,853 - root - INFO - step: 4565 loss: 2.7537 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:22:31,854 - root - INFO - lr: 4.8782e-05 gnorm: 1.27 [ 2:48:23<21:47:04] +[titan] 2025-10-05 01:22:42,729 - root - INFO - step: 4570 loss: 2.6580 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 01:22:42,729 - root - INFO - lr: 4.8779e-05 gnorm: 1.26 [ 2:48:34<21:46:52] +[titan] 2025-10-05 01:22:53,792 - root - INFO - step: 4575 loss: 2.8422 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.3385 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:22:53,792 - root - INFO - lr: 4.8777e-05 gnorm: 1.26 [ 2:48:45<21:46:41] +[titan] 2025-10-05 01:23:04,721 - root - INFO - step: 4580 loss: 2.6906 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3732 +[titan] 2025-10-05 01:23:04,721 - root - INFO - lr: 4.8774e-05 gnorm: 1.18 [ 2:48:56<21:46:28] +[titan] 2025-10-05 01:23:15,616 - root - INFO - step: 4585 loss: 2.7509 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:23:15,616 - root - INFO - lr: 4.8771e-05 gnorm: 1.23 [ 2:49:07<21:46:16] +[titan] 2025-10-05 01:23:26,529 - root - INFO - step: 4590 loss: 2.7868 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4570 +[titan] 2025-10-05 01:23:26,530 - root - INFO - lr: 4.8768e-05 gnorm: 1.24 [ 2:49:17<21:46:04] +[titan] 2025-10-05 01:23:37,394 - root - INFO - step: 4595 loss: 2.7525 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3274 global_avg_mtp_loss: 2.4251 +[titan] 2025-10-05 01:23:37,394 - root - INFO - lr: 4.8765e-05 gnorm: 1.22 [ 2:49:28<21:45:51] +[titan] 2025-10-05 01:23:46,092 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:23:48,293 - root - INFO - step: 4600 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4209 +[titan] 2025-10-05 01:23:48,294 - root - INFO - lr: 4.8762e-05 gnorm: 1.18 [ 2:49:39<21:45:39] +[titan] 2025-10-05 01:23:59,314 - root - INFO - step: 4605 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 29,734 tflops: 412.51 mfu: 41.71% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.5076 +[titan] 2025-10-05 01:23:59,314 - root - INFO - lr: 4.8759e-05 gnorm: 1.23 [ 2:49:50<21:45:27] +[titan] 2025-10-05 01:24:06,015 - root - INFO - Dumping profiler traces at step 4608 +[titan] 2025-10-05 01:24:06,055 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:24:10,470 - root - INFO - step: 4610 loss: 2.7849 memory: 118.84GiB(85.28%) tps: 29,373 tflops: 407.50 mfu: 41.20% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4534 +[titan] 2025-10-05 01:24:10,471 - root - INFO - lr: 4.8756e-05 gnorm: 1.28 [ 2:50:01<21:45:17] +[titan] 2025-10-05 01:24:21,351 - root - INFO - step: 4615 loss: 2.7549 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4283 +[titan] 2025-10-05 01:24:21,351 - root - INFO - lr: 4.8753e-05 gnorm: 1.21 [ 2:50:12<21:45:05] +[titan] 2025-10-05 01:24:32,230 - root - INFO - step: 4620 loss: 2.6761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3586 +[titan] 2025-10-05 01:24:32,230 - root - INFO - lr: 4.8750e-05 gnorm: 1.22 [ 2:50:23<21:44:52] +[titan] 2025-10-05 01:24:43,126 - root - INFO - step: 4625 loss: 2.6974 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:24:43,126 - root - INFO - lr: 4.8747e-05 gnorm: 1.18 [ 2:50:34<21:44:40] +[titan] 2025-10-05 01:24:54,032 - root - INFO - step: 4630 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3412 global_avg_mtp_loss: 2.5238 +[titan] 2025-10-05 01:24:54,032 - root - INFO - lr: 4.8744e-05 gnorm: 1.24 [ 2:50:45<21:44:27] +[titan] 2025-10-05 01:25:04,940 - root - INFO - step: 4635 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4155 +[titan] 2025-10-05 01:25:04,940 - root - INFO - lr: 4.8741e-05 gnorm: 1.21 [ 2:50:56<21:44:15] +[titan] 2025-10-05 01:25:15,817 - root - INFO - step: 4640 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3562 +[titan] 2025-10-05 01:25:15,817 - root - INFO - lr: 4.8739e-05 gnorm: 1.31 [ 2:51:07<21:44:03] +[titan] 2025-10-05 01:25:26,720 - root - INFO - step: 4645 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3352 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:25:26,721 - root - INFO - lr: 4.8736e-05 gnorm: 1.23 [ 2:51:18<21:43:50] +[titan] 2025-10-05 01:25:35,459 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:25:37,643 - root - INFO - step: 4650 loss: 2.6937 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3193 global_avg_mtp_loss: 2.3743 +[titan] 2025-10-05 01:25:37,643 - root - INFO - lr: 4.8733e-05 gnorm: 1.23 [ 2:51:29<21:43:38] +[titan] 2025-10-05 01:25:48,525 - root - INFO - step: 4655 loss: 2.7402 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4144 +[titan] 2025-10-05 01:25:48,525 - root - INFO - lr: 4.8730e-05 gnorm: 1.22 [ 2:51:39<21:43:26] +[titan] 2025-10-05 01:25:59,422 - root - INFO - step: 4660 loss: 2.7820 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4520 +[titan] 2025-10-05 01:25:59,422 - root - INFO - lr: 4.8727e-05 gnorm: 1.30 [ 2:51:50<21:43:14] +[titan] 2025-10-05 01:26:10,311 - root - INFO - step: 4665 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3392 global_avg_mtp_loss: 2.5074 +[titan] 2025-10-05 01:26:10,311 - root - INFO - lr: 4.8724e-05 gnorm: 1.25 [ 2:52:01<21:43:01] +[titan] 2025-10-05 01:26:21,210 - root - INFO - step: 4670 loss: 2.7305 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4066 +[titan] 2025-10-05 01:26:21,210 - root - INFO - lr: 4.8721e-05 gnorm: 1.25 [ 2:52:12<21:42:49] +[titan] 2025-10-05 01:26:32,122 - root - INFO - step: 4675 loss: 2.7530 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4264 +[titan] 2025-10-05 01:26:32,122 - root - INFO - lr: 4.8718e-05 gnorm: 1.25 [ 2:52:23<21:42:37] +[titan] 2025-10-05 01:26:43,055 - root - INFO - step: 4680 loss: 2.8067 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4732 +[titan] 2025-10-05 01:26:43,055 - root - INFO - lr: 4.8715e-05 gnorm: 1.24 [ 2:52:34<21:42:25] +[titan] 2025-10-05 01:26:53,990 - root - INFO - step: 4685 loss: 2.6707 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 01:26:53,990 - root - INFO - lr: 4.8712e-05 gnorm: 1.36 [ 2:52:45<21:42:13] +[titan] 2025-10-05 01:27:04,906 - root - INFO - step: 4690 loss: 2.7149 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3214 global_avg_mtp_loss: 2.3935 +[titan] 2025-10-05 01:27:04,906 - root - INFO - lr: 4.8709e-05 gnorm: 1.23 [ 2:52:56<21:42:00] +[titan] 2025-10-05 01:27:15,817 - root - INFO - step: 4695 loss: 2.6965 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3761 +[titan] 2025-10-05 01:27:15,817 - root - INFO - lr: 4.8706e-05 gnorm: 1.25 [ 2:53:07<21:41:48] +[titan] 2025-10-05 01:27:24,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:27:26,735 - root - INFO - step: 4700 loss: 2.7982 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 01:27:26,735 - root - INFO - lr: 4.8703e-05 gnorm: 1.22 [ 2:53:18<21:41:36] +[titan] 2025-10-05 01:27:37,672 - root - INFO - step: 4705 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3443 global_avg_mtp_loss: 2.5494 +[titan] 2025-10-05 01:27:37,673 - root - INFO - lr: 4.8700e-05 gnorm: 1.26 [ 2:53:29<21:41:24] +[titan] 2025-10-05 01:27:48,615 - root - INFO - step: 4710 loss: 2.7471 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.3269 global_avg_mtp_loss: 2.4201 +[titan] 2025-10-05 01:27:48,615 - root - INFO - lr: 4.8697e-05 gnorm: 1.21 [ 2:53:40<21:41:12] +[titan] 2025-10-05 01:27:59,548 - root - INFO - step: 4715 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.3271 global_avg_mtp_loss: 2.4303 +[titan] 2025-10-05 01:27:59,548 - root - INFO - lr: 4.8694e-05 gnorm: 1.22 [ 2:53:50<21:41:00] +[titan] 2025-10-05 01:28:10,470 - root - INFO - step: 4720 loss: 2.8297 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3377 global_avg_mtp_loss: 2.4920 +[titan] 2025-10-05 01:28:10,471 - root - INFO - lr: 4.8691e-05 gnorm: 1.25 [ 2:54:01<21:40:48] +[titan] 2025-10-05 01:28:21,389 - root - INFO - step: 4725 loss: 2.8079 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4747 +[titan] 2025-10-05 01:28:21,389 - root - INFO - lr: 4.8688e-05 gnorm: 1.25 [ 2:54:12<21:40:36] +[titan] 2025-10-05 01:28:32,287 - root - INFO - step: 4730 loss: 2.7460 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3255 global_avg_mtp_loss: 2.4205 +[titan] 2025-10-05 01:28:32,287 - root - INFO - lr: 4.8685e-05 gnorm: 1.27 [ 2:54:23<21:40:24] +[titan] 2025-10-05 01:28:43,251 - root - INFO - step: 4735 loss: 2.6848 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3668 +[titan] 2025-10-05 01:28:43,252 - root - INFO - lr: 4.8682e-05 gnorm: 1.24 [ 2:54:34<21:40:12] +[titan] 2025-10-05 01:28:54,171 - root - INFO - step: 4740 loss: 2.7918 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4618 +[titan] 2025-10-05 01:28:54,171 - root - INFO - lr: 4.8679e-05 gnorm: 1.32 [ 2:54:45<21:40:00] +[titan] 2025-10-05 01:29:05,077 - root - INFO - step: 4745 loss: 2.7361 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4131 +[titan] 2025-10-05 01:29:05,077 - root - INFO - lr: 4.8676e-05 gnorm: 1.29 [ 2:54:56<21:39:47] +[titan] 2025-10-05 01:29:13,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:29:15,984 - root - INFO - step: 4750 loss: 2.7499 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:29:15,984 - root - INFO - lr: 4.8673e-05 gnorm: 1.26 [ 2:55:07<21:39:35] +[titan] 2025-10-05 01:29:26,874 - root - INFO - step: 4755 loss: 2.7721 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3285 global_avg_mtp_loss: 2.4435 +[titan] 2025-10-05 01:29:26,874 - root - INFO - lr: 4.8670e-05 gnorm: 1.19 [ 2:55:18<21:39:23] +[titan] 2025-10-05 01:29:37,761 - root - INFO - step: 4760 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3353 global_avg_mtp_loss: 2.4595 +[titan] 2025-10-05 01:29:37,761 - root - INFO - lr: 4.8667e-05 gnorm: 1.22 [ 2:55:29<21:39:11] +[titan] 2025-10-05 01:29:48,663 - root - INFO - step: 4765 loss: 2.7250 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3240 global_avg_mtp_loss: 2.4010 +[titan] 2025-10-05 01:29:48,664 - root - INFO - lr: 4.8664e-05 gnorm: 1.28 [ 2:55:40<21:38:58] +[titan] 2025-10-05 01:29:59,563 - root - INFO - step: 4770 loss: 2.7157 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3221 global_avg_mtp_loss: 2.3936 +[titan] 2025-10-05 01:29:59,563 - root - INFO - lr: 4.8661e-05 gnorm: 2.78 [ 2:55:50<21:38:46] +[titan] 2025-10-05 01:30:10,469 - root - INFO - step: 4775 loss: 2.8036 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4701 +[titan] 2025-10-05 01:30:10,469 - root - INFO - lr: 4.8658e-05 gnorm: 1.25 [ 2:56:01<21:38:34] +[titan] 2025-10-05 01:30:21,348 - root - INFO - step: 4780 loss: 2.7215 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:30:21,348 - root - INFO - lr: 4.8655e-05 gnorm: 1.38 [ 2:56:12<21:38:21] +[titan] 2025-10-05 01:30:32,231 - root - INFO - step: 4785 loss: 2.7709 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4425 +[titan] 2025-10-05 01:30:32,231 - root - INFO - lr: 4.8652e-05 gnorm: 1.21 [ 2:56:23<21:38:09] +[titan] 2025-10-05 01:30:43,113 - root - INFO - step: 4790 loss: 2.7171 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.3934 +[titan] 2025-10-05 01:30:43,113 - root - INFO - lr: 4.8649e-05 gnorm: 1.19 [ 2:56:34<21:37:57] +[titan] 2025-10-05 01:30:54,053 - root - INFO - step: 4795 loss: 2.8155 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.4731 +[titan] 2025-10-05 01:30:54,054 - root - INFO - lr: 4.8646e-05 gnorm: 1.20 [ 2:56:45<21:37:45] +[titan] 2025-10-05 01:31:02,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:31:04,944 - root - INFO - step: 4800 loss: 2.7229 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4004 +[titan] 2025-10-05 01:31:04,944 - root - INFO - lr: 4.8643e-05 gnorm: 1.27 [ 2:56:56<21:37:33] +[titan] 2025-10-05 01:31:15,845 - root - INFO - step: 4805 loss: 2.7633 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4354 +[titan] 2025-10-05 01:31:15,845 - root - INFO - lr: 4.8639e-05 gnorm: 1.30 [ 2:57:07<21:37:20] +[titan] 2025-10-05 01:31:26,718 - root - INFO - step: 4810 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 01:31:26,718 - root - INFO - lr: 4.8636e-05 gnorm: 1.23 [ 2:57:18<21:37:08] +[titan] 2025-10-05 01:31:37,587 - root - INFO - step: 4815 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4100 +[titan] 2025-10-05 01:31:37,587 - root - INFO - lr: 4.8633e-05 gnorm: 1.25 [ 2:57:28<21:36:55] +[titan] 2025-10-05 01:31:48,487 - root - INFO - step: 4820 loss: 2.7752 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4454 +[titan] 2025-10-05 01:31:48,487 - root - INFO - lr: 4.8630e-05 gnorm: 1.24 [ 2:57:39<21:36:43] +[titan] 2025-10-05 01:31:59,366 - root - INFO - step: 4825 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3299 global_avg_mtp_loss: 2.4440 +[titan] 2025-10-05 01:31:59,366 - root - INFO - lr: 4.8627e-05 gnorm: 1.27 [ 2:57:50<21:36:31] +[titan] 2025-10-05 01:32:10,285 - root - INFO - step: 4830 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3289 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:32:10,285 - root - INFO - lr: 4.8624e-05 gnorm: 1.25 [ 2:58:01<21:36:19] +[titan] 2025-10-05 01:32:21,158 - root - INFO - step: 4835 loss: 2.7916 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4584 +[titan] 2025-10-05 01:32:21,158 - root - INFO - lr: 4.8621e-05 gnorm: 1.23 [ 2:58:12<21:36:06] +[titan] 2025-10-05 01:32:32,019 - root - INFO - step: 4840 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3305 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:32:32,020 - root - INFO - lr: 4.8618e-05 gnorm: 1.25 [ 2:58:23<21:35:54] +[titan] 2025-10-05 01:32:42,890 - root - INFO - step: 4845 loss: 2.7622 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4341 +[titan] 2025-10-05 01:32:42,890 - root - INFO - lr: 4.8615e-05 gnorm: 1.24 [ 2:58:34<21:35:41] +[titan] 2025-10-05 01:32:51,570 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:32:53,752 - root - INFO - step: 4850 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3209 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 01:32:53,752 - root - INFO - lr: 4.8612e-05 gnorm: 1.26 [ 2:58:45<21:35:29] +[titan] 2025-10-05 01:33:04,624 - root - INFO - step: 4855 loss: 2.7888 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4588 +[titan] 2025-10-05 01:33:04,624 - root - INFO - lr: 4.8609e-05 gnorm: 1.30 [ 2:58:55<21:35:17] +[titan] 2025-10-05 01:33:15,520 - root - INFO - step: 4860 loss: 2.6936 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3751 +[titan] 2025-10-05 01:33:15,521 - root - INFO - lr: 4.8606e-05 gnorm: 1.24 [ 2:59:06<21:35:04] +[titan] 2025-10-05 01:33:26,393 - root - INFO - step: 4865 loss: 2.8919 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3439 global_avg_mtp_loss: 2.5480 +[titan] 2025-10-05 01:33:26,393 - root - INFO - lr: 4.8603e-05 gnorm: 1.25 [ 2:59:17<21:34:52] +[titan] 2025-10-05 01:33:37,259 - root - INFO - step: 4870 loss: 2.7240 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 01:33:37,259 - root - INFO - lr: 4.8599e-05 gnorm: 1.24 [ 2:59:28<21:34:40] +[titan] 2025-10-05 01:33:48,148 - root - INFO - step: 4875 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4417 +[titan] 2025-10-05 01:33:48,148 - root - INFO - lr: 4.8596e-05 gnorm: 1.26 [ 2:59:39<21:34:27] +[titan] 2025-10-05 01:33:59,034 - root - INFO - step: 4880 loss: 2.7227 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4000 +[titan] 2025-10-05 01:33:59,035 - root - INFO - lr: 4.8593e-05 gnorm: 1.27 [ 2:59:50<21:34:15] +[titan] 2025-10-05 01:34:09,948 - root - INFO - step: 4885 loss: 2.7234 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4009 +[titan] 2025-10-05 01:34:09,948 - root - INFO - lr: 4.8590e-05 gnorm: 1.20 [ 3:00:01<21:34:03] +[titan] 2025-10-05 01:34:20,817 - root - INFO - step: 4890 loss: 2.7314 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4083 +[titan] 2025-10-05 01:34:20,818 - root - INFO - lr: 4.8587e-05 gnorm: 1.33 [ 3:00:12<21:33:51] +[titan] 2025-10-05 01:34:31,730 - root - INFO - step: 4895 loss: 2.7077 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3210 global_avg_mtp_loss: 2.3867 +[titan] 2025-10-05 01:34:31,731 - root - INFO - lr: 4.8584e-05 gnorm: 1.29 [ 3:00:23<21:33:38] +[titan] 2025-10-05 01:34:40,425 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:34:42,620 - root - INFO - step: 4900 loss: 2.7734 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3292 global_avg_mtp_loss: 2.4443 +[titan] 2025-10-05 01:34:42,620 - root - INFO - lr: 4.8581e-05 gnorm: 1.28 [ 3:00:33<21:33:26] +[titan] 2025-10-05 01:34:53,494 - root - INFO - step: 4905 loss: 2.7406 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4146 +[titan] 2025-10-05 01:34:53,495 - root - INFO - lr: 4.8578e-05 gnorm: 1.17 [ 3:00:44<21:33:14] +[titan] 2025-10-05 01:35:04,450 - root - INFO - step: 4910 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3981 +[titan] 2025-10-05 01:35:04,451 - root - INFO - lr: 4.8575e-05 gnorm: 1.20 [ 3:00:55<21:33:02] +[titan] 2025-10-05 01:35:15,335 - root - INFO - step: 4915 loss: 2.7382 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4140 +[titan] 2025-10-05 01:35:15,335 - root - INFO - lr: 4.8571e-05 gnorm: 1.28 [ 3:01:06<21:32:50] +[titan] 2025-10-05 01:35:26,233 - root - INFO - step: 4920 loss: 2.7952 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3320 global_avg_mtp_loss: 2.4631 +[titan] 2025-10-05 01:35:26,233 - root - INFO - lr: 4.8568e-05 gnorm: 1.29 [ 3:01:17<21:32:38] +[titan] 2025-10-05 01:35:37,136 - root - INFO - step: 4925 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4403 +[titan] 2025-10-05 01:35:37,136 - root - INFO - lr: 4.8565e-05 gnorm: 1.25 [ 3:01:28<21:32:26] +[titan] 2025-10-05 01:35:48,013 - root - INFO - step: 4930 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4081 +[titan] 2025-10-05 01:35:48,014 - root - INFO - lr: 4.8562e-05 gnorm: 1.21 [ 3:01:39<21:32:13] +[titan] 2025-10-05 01:35:58,895 - root - INFO - step: 4935 loss: 2.7204 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:35:58,895 - root - INFO - lr: 4.8559e-05 gnorm: 1.20 [ 3:01:50<21:32:01] +[titan] 2025-10-05 01:36:09,806 - root - INFO - step: 4940 loss: 2.7788 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:36:09,806 - root - INFO - lr: 4.8556e-05 gnorm: 1.21 [ 3:02:01<21:31:49] +[titan] 2025-10-05 01:36:20,731 - root - INFO - step: 4945 loss: 2.7547 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3304 global_avg_mtp_loss: 2.4243 +[titan] 2025-10-05 01:36:20,732 - root - INFO - lr: 4.8553e-05 gnorm: 1.23 [ 3:02:12<21:31:37] +[titan] 2025-10-05 01:36:29,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:36:31,611 - root - INFO - step: 4950 loss: 2.6438 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3129 global_avg_mtp_loss: 2.3309 +[titan] 2025-10-05 01:36:31,611 - root - INFO - lr: 4.8549e-05 gnorm: 1.20 [ 3:02:22<21:31:25] +[titan] 2025-10-05 01:36:42,497 - root - INFO - step: 4955 loss: 2.7743 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:36:42,497 - root - INFO - lr: 4.8546e-05 gnorm: 1.29 [ 3:02:33<21:31:12] +[titan] 2025-10-05 01:36:53,369 - root - INFO - step: 4960 loss: 2.7846 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4551 +[titan] 2025-10-05 01:36:53,369 - root - INFO - lr: 4.8543e-05 gnorm: 1.25 [ 3:02:44<21:31:00] +[titan] 2025-10-05 01:37:04,267 - root - INFO - step: 4965 loss: 2.8172 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3344 global_avg_mtp_loss: 2.4828 +[titan] 2025-10-05 01:37:04,267 - root - INFO - lr: 4.8540e-05 gnorm: 1.25 [ 3:02:55<21:30:48] +[titan] 2025-10-05 01:37:15,212 - root - INFO - step: 4970 loss: 2.6436 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3310 +[titan] 2025-10-05 01:37:15,212 - root - INFO - lr: 4.8537e-05 gnorm: 1.25 [ 3:03:06<21:30:36] +[titan] 2025-10-05 01:37:26,159 - root - INFO - step: 4975 loss: 2.7551 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4289 +[titan] 2025-10-05 01:37:26,159 - root - INFO - lr: 4.8534e-05 gnorm: 1.22 [ 3:03:17<21:30:24] +[titan] 2025-10-05 01:37:37,030 - root - INFO - step: 4980 loss: 2.7052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3218 global_avg_mtp_loss: 2.3834 +[titan] 2025-10-05 01:37:37,031 - root - INFO - lr: 4.8530e-05 gnorm: 1.26 [ 3:03:28<21:30:12] +[titan] 2025-10-05 01:37:47,943 - root - INFO - step: 4985 loss: 2.7357 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4094 +[titan] 2025-10-05 01:37:47,944 - root - INFO - lr: 4.8527e-05 gnorm: 1.27 [ 3:03:39<21:30:00] +[titan] 2025-10-05 01:37:58,856 - root - INFO - step: 4990 loss: 2.7950 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4628 +[titan] 2025-10-05 01:37:58,857 - root - INFO - lr: 4.8524e-05 gnorm: 1.22 [ 3:03:50<21:29:48] +[titan] 2025-10-05 01:38:09,823 - root - INFO - step: 4995 loss: 2.7375 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.3261 global_avg_mtp_loss: 2.4114 +[titan] 2025-10-05 01:38:09,823 - root - INFO - lr: 4.8521e-05 gnorm: 1.18 [ 3:04:01<21:29:36] +[titan] 2025-10-05 01:38:18,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:38:20,753 - root - INFO - step: 5000 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3200 global_avg_mtp_loss: 2.3792 +[titan] 2025-10-05 01:38:20,753 - root - INFO - lr: 4.8518e-05 gnorm: 1.26 [ 3:04:12<21:29:24] +[titan] 2025-10-05 01:38:20,753 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 01:38:42,136 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 01:38:42,136 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 21.38 seconds. +[titan] 2025-10-05 01:40:51,998 - root - INFO - step: 5005 loss: 2.7858 memory: 118.84GiB(85.28%) tps: 2,167 tflops: 30.06 mfu: 3.04% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:40:51,998 - root - INFO - lr: 4.8515e-05 gnorm: 1.27 [ 3:06:43<21:45:33] +[titan] 2025-10-05 01:41:02,796 - root - INFO - step: 5010 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3270 global_avg_mtp_loss: 2.4291 +[titan] 2025-10-05 01:41:02,796 - root - INFO - lr: 4.8511e-05 gnorm: 1.34 [ 3:06:54<21:45:20] +[titan] 2025-10-05 01:41:13,614 - root - INFO - step: 5015 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.3283 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:41:13,614 - root - INFO - lr: 4.8508e-05 gnorm: 1.32 [ 3:07:04<21:45:06] +[titan] 2025-10-05 01:41:24,485 - root - INFO - step: 5020 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4305 +[titan] 2025-10-05 01:41:24,485 - root - INFO - lr: 4.8505e-05 gnorm: 1.31 [ 3:07:15<21:44:52] +[titan] 2025-10-05 01:41:35,321 - root - INFO - step: 5025 loss: 2.7060 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3836 +[titan] 2025-10-05 01:41:35,321 - root - INFO - lr: 4.8502e-05 gnorm: 1.27 [ 3:07:26<21:44:39] +[titan] 2025-10-05 01:41:46,205 - root - INFO - step: 5030 loss: 2.7304 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3247 global_avg_mtp_loss: 2.4057 +[titan] 2025-10-05 01:41:46,205 - root - INFO - lr: 4.8499e-05 gnorm: 1.28 [ 3:07:37<21:44:25] +[titan] 2025-10-05 01:41:57,092 - root - INFO - step: 5035 loss: 2.7485 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4237 +[titan] 2025-10-05 01:41:57,093 - root - INFO - lr: 4.8495e-05 gnorm: 1.26 [ 3:07:48<21:44:12] +[titan] 2025-10-05 01:42:08,008 - root - INFO - step: 5040 loss: 2.7641 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4360 +[titan] 2025-10-05 01:42:08,008 - root - INFO - lr: 4.8492e-05 gnorm: 1.18 [ 3:07:59<21:43:59] +[titan] 2025-10-05 01:42:18,888 - root - INFO - step: 5045 loss: 2.6254 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3136 +[titan] 2025-10-05 01:42:18,888 - root - INFO - lr: 4.8489e-05 gnorm: 1.29 [ 3:08:10<21:43:46] +[titan] 2025-10-05 01:42:27,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:42:29,897 - root - INFO - step: 5050 loss: 2.7825 memory: 118.84GiB(85.28%) tps: 29,766 tflops: 412.96 mfu: 41.76% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4531 +[titan] 2025-10-05 01:42:29,897 - root - INFO - lr: 4.8486e-05 gnorm: 1.24 [ 3:08:21<21:43:33] +[titan] 2025-10-05 01:42:40,766 - root - INFO - step: 5055 loss: 2.7808 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4505 +[titan] 2025-10-05 01:42:40,766 - root - INFO - lr: 4.8483e-05 gnorm: 1.22 [ 3:08:32<21:43:20] +[titan] 2025-10-05 01:42:51,649 - root - INFO - step: 5060 loss: 2.6497 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3358 +[titan] 2025-10-05 01:42:51,649 - root - INFO - lr: 4.8479e-05 gnorm: 1.25 [ 3:08:43<21:43:06] +[titan] 2025-10-05 01:43:02,533 - root - INFO - step: 5065 loss: 2.7482 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 01:43:02,533 - root - INFO - lr: 4.8476e-05 gnorm: 1.21 [ 3:08:53<21:42:53] +[titan] 2025-10-05 01:43:13,418 - root - INFO - step: 5070 loss: 2.8515 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3494 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:43:13,418 - root - INFO - lr: 4.8473e-05 gnorm: 1.24 [ 3:09:04<21:42:40] +[titan] 2025-10-05 01:43:24,295 - root - INFO - step: 5075 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3955 +[titan] 2025-10-05 01:43:24,295 - root - INFO - lr: 4.8470e-05 gnorm: 1.23 [ 3:09:15<21:42:26] +[titan] 2025-10-05 01:43:35,165 - root - INFO - step: 5080 loss: 2.6731 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3557 +[titan] 2025-10-05 01:43:35,166 - root - INFO - lr: 4.8466e-05 gnorm: 1.24 [ 3:09:26<21:42:13] +[titan] 2025-10-05 01:43:46,043 - root - INFO - step: 5085 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 01:43:46,043 - root - INFO - lr: 4.8463e-05 gnorm: 1.24 [ 3:09:37<21:42:00] +[titan] 2025-10-05 01:43:56,916 - root - INFO - step: 5090 loss: 2.7316 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4079 +[titan] 2025-10-05 01:43:56,917 - root - INFO - lr: 4.8460e-05 gnorm: 1.35 [ 3:09:48<21:41:46] +[titan] 2025-10-05 01:44:07,778 - root - INFO - step: 5095 loss: 2.7611 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4327 +[titan] 2025-10-05 01:44:07,778 - root - INFO - lr: 4.8457e-05 gnorm: 1.27 [ 3:09:59<21:41:33] +[titan] 2025-10-05 01:44:16,486 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:44:18,671 - root - INFO - step: 5100 loss: 2.6824 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3640 +[titan] 2025-10-05 01:44:18,671 - root - INFO - lr: 4.8453e-05 gnorm: 1.28 [ 3:10:10<21:41:20] +[titan] 2025-10-05 01:44:29,534 - root - INFO - step: 5105 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.4782 +[titan] 2025-10-05 01:44:29,534 - root - INFO - lr: 4.8450e-05 gnorm: 1.26 [ 3:10:20<21:41:06] +[titan] 2025-10-05 01:44:40,413 - root - INFO - step: 5110 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.3923 +[titan] 2025-10-05 01:44:40,413 - root - INFO - lr: 4.8447e-05 gnorm: 1.23 [ 3:10:31<21:40:53] +[titan] 2025-10-05 01:44:51,299 - root - INFO - step: 5115 loss: 2.6959 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3758 +[titan] 2025-10-05 01:44:51,300 - root - INFO - lr: 4.8444e-05 gnorm: 1.26 [ 3:10:42<21:40:40] +[titan] 2025-10-05 01:45:02,275 - root - INFO - step: 5120 loss: 2.7516 memory: 118.84GiB(85.28%) tps: 29,856 tflops: 414.21 mfu: 41.88% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4257 +[titan] 2025-10-05 01:45:02,275 - root - INFO - lr: 4.8440e-05 gnorm: 1.21 [ 3:10:53<21:40:27] +[titan] 2025-10-05 01:45:02,444 - root - INFO - Dumping profiler traces at step 5120 +[titan] 2025-10-05 01:45:02,481 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:45:13,379 - root - INFO - step: 5125 loss: 2.7714 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4427 +[titan] 2025-10-05 01:45:13,379 - root - INFO - lr: 4.8437e-05 gnorm: 1.24 [ 3:11:04<21:40:16] +[titan] 2025-10-05 01:45:24,262 - root - INFO - step: 5130 loss: 2.6786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3612 +[titan] 2025-10-05 01:45:24,263 - root - INFO - lr: 4.8434e-05 gnorm: 1.22 [ 3:11:15<21:40:02] +[titan] 2025-10-05 01:45:35,196 - root - INFO - step: 5135 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4702 +[titan] 2025-10-05 01:45:35,196 - root - INFO - lr: 4.8431e-05 gnorm: 1.27 [ 3:11:26<21:39:49] +[titan] 2025-10-05 01:45:46,094 - root - INFO - step: 5140 loss: 2.7216 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3233 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:45:46,094 - root - INFO - lr: 4.8427e-05 gnorm: 1.26 [ 3:11:37<21:39:36] +[titan] 2025-10-05 01:45:56,991 - root - INFO - step: 5145 loss: 2.7084 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3869 +[titan] 2025-10-05 01:45:56,991 - root - INFO - lr: 4.8424e-05 gnorm: 1.23 [ 3:11:48<21:39:23] +[titan] 2025-10-05 01:46:05,684 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:46:07,870 - root - INFO - step: 5150 loss: 2.7550 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4284 +[titan] 2025-10-05 01:46:07,870 - root - INFO - lr: 4.8421e-05 gnorm: 1.28 [ 3:11:59<21:39:10] +[titan] 2025-10-05 01:46:18,768 - root - INFO - step: 5155 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3142 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 01:46:18,768 - root - INFO - lr: 4.8417e-05 gnorm: 1.20 [ 3:12:10<21:38:57] +[titan] 2025-10-05 01:46:29,716 - root - INFO - step: 5160 loss: 2.7141 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3918 +[titan] 2025-10-05 01:46:29,716 - root - INFO - lr: 4.8414e-05 gnorm: 1.22 [ 3:12:21<21:38:44] +[titan] 2025-10-05 01:46:40,611 - root - INFO - step: 5165 loss: 2.7431 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3245 global_avg_mtp_loss: 2.4185 +[titan] 2025-10-05 01:46:40,611 - root - INFO - lr: 4.8411e-05 gnorm: 1.18 [ 3:12:31<21:38:31] +[titan] 2025-10-05 01:46:51,503 - root - INFO - step: 5170 loss: 2.6610 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 01:46:51,503 - root - INFO - lr: 4.8408e-05 gnorm: 1.21 [ 3:12:42<21:38:18] +[titan] 2025-10-05 01:47:02,418 - root - INFO - step: 5175 loss: 2.7319 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4077 +[titan] 2025-10-05 01:47:02,418 - root - INFO - lr: 4.8404e-05 gnorm: 1.21 [ 3:12:53<21:38:05] +[titan] 2025-10-05 01:47:13,333 - root - INFO - step: 5180 loss: 2.7303 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:47:13,333 - root - INFO - lr: 4.8401e-05 gnorm: 1.24 [ 3:13:04<21:37:52] +[titan] 2025-10-05 01:47:24,247 - root - INFO - step: 5185 loss: 2.6746 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 01:47:24,247 - root - INFO - lr: 4.8398e-05 gnorm: 1.22 [ 3:13:15<21:37:39] +[titan] 2025-10-05 01:47:35,216 - root - INFO - step: 5190 loss: 2.7738 memory: 118.84GiB(85.28%) tps: 29,874 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4456 +[titan] 2025-10-05 01:47:35,216 - root - INFO - lr: 4.8394e-05 gnorm: 1.31 [ 3:13:26<21:37:26] +[titan] 2025-10-05 01:47:46,124 - root - INFO - step: 5195 loss: 2.8394 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3384 global_avg_mtp_loss: 2.5009 +[titan] 2025-10-05 01:47:46,124 - root - INFO - lr: 4.8391e-05 gnorm: 1.27 [ 3:13:37<21:37:13] +[titan] 2025-10-05 01:47:54,839 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:47:57,027 - root - INFO - step: 5200 loss: 2.7263 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4026 +[titan] 2025-10-05 01:47:57,027 - root - INFO - lr: 4.8388e-05 gnorm: 1.24 [ 3:13:48<21:37:00] +[titan] 2025-10-05 01:48:07,915 - root - INFO - step: 5205 loss: 2.7277 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.4038 +[titan] 2025-10-05 01:48:07,915 - root - INFO - lr: 4.8384e-05 gnorm: 1.21 [ 3:13:59<21:36:47] +[titan] 2025-10-05 01:48:18,830 - root - INFO - step: 5210 loss: 2.6835 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3172 global_avg_mtp_loss: 2.3663 +[titan] 2025-10-05 01:48:18,830 - root - INFO - lr: 4.8381e-05 gnorm: 1.22 [ 3:14:10<21:36:34] +[titan] 2025-10-05 01:48:29,733 - root - INFO - step: 5215 loss: 2.6886 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3701 +[titan] 2025-10-05 01:48:29,733 - root - INFO - lr: 4.8378e-05 gnorm: 1.23 [ 3:14:21<21:36:21] +[titan] 2025-10-05 01:48:40,645 - root - INFO - step: 5220 loss: 2.7098 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 01:48:40,645 - root - INFO - lr: 4.8374e-05 gnorm: 1.25 [ 3:14:31<21:36:08] +[titan] 2025-10-05 01:48:51,536 - root - INFO - step: 5225 loss: 2.8169 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4830 +[titan] 2025-10-05 01:48:51,536 - root - INFO - lr: 4.8371e-05 gnorm: 1.24 [ 3:14:42<21:35:55] +[titan] 2025-10-05 01:49:02,433 - root - INFO - step: 5230 loss: 2.7455 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4197 +[titan] 2025-10-05 01:49:02,433 - root - INFO - lr: 4.8368e-05 gnorm: 1.26 [ 3:14:53<21:35:42] +[titan] 2025-10-05 01:49:13,324 - root - INFO - step: 5235 loss: 2.7873 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.3324 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:49:13,325 - root - INFO - lr: 4.8364e-05 gnorm: 1.21 [ 3:15:04<21:35:29] +[titan] 2025-10-05 01:49:24,205 - root - INFO - step: 5240 loss: 2.6851 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3675 +[titan] 2025-10-05 01:49:24,206 - root - INFO - lr: 4.8361e-05 gnorm: 1.22 [ 3:15:15<21:35:16] +[titan] 2025-10-05 01:49:35,124 - root - INFO - step: 5245 loss: 2.7664 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:49:35,124 - root - INFO - lr: 4.8358e-05 gnorm: 1.24 [ 3:15:26<21:35:03] +[titan] 2025-10-05 01:49:43,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:49:45,992 - root - INFO - step: 5250 loss: 2.7297 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4047 +[titan] 2025-10-05 01:49:45,992 - root - INFO - lr: 4.8354e-05 gnorm: 1.29 [ 3:15:37<21:34:49] +[titan] 2025-10-05 01:49:56,896 - root - INFO - step: 5255 loss: 2.7151 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3928 +[titan] 2025-10-05 01:49:56,896 - root - INFO - lr: 4.8351e-05 gnorm: 1.29 [ 3:15:48<21:34:36] +[titan] 2025-10-05 01:50:07,763 - root - INFO - step: 5260 loss: 2.7886 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3308 global_avg_mtp_loss: 2.4578 +[titan] 2025-10-05 01:50:07,763 - root - INFO - lr: 4.8348e-05 gnorm: 1.36 [ 3:15:59<21:34:23] +[titan] 2025-10-05 01:50:18,645 - root - INFO - step: 5265 loss: 2.6117 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3012 +[titan] 2025-10-05 01:50:18,645 - root - INFO - lr: 4.8344e-05 gnorm: 1.24 [ 3:16:09<21:34:10] +[titan] 2025-10-05 01:50:29,515 - root - INFO - step: 5270 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4451 +[titan] 2025-10-05 01:50:29,516 - root - INFO - lr: 4.8341e-05 gnorm: 1.24 [ 3:16:20<21:33:57] +[titan] 2025-10-05 01:50:40,456 - root - INFO - step: 5275 loss: 2.7065 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.01% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3874 +[titan] 2025-10-05 01:50:40,457 - root - INFO - lr: 4.8338e-05 gnorm: 1.25 [ 3:16:31<21:33:44] +[titan] 2025-10-05 01:50:51,334 - root - INFO - step: 5280 loss: 2.7674 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4384 +[titan] 2025-10-05 01:50:51,334 - root - INFO - lr: 4.8334e-05 gnorm: 1.25 [ 3:16:42<21:33:31] +[titan] 2025-10-05 01:51:02,214 - root - INFO - step: 5285 loss: 2.6660 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3493 +[titan] 2025-10-05 01:51:02,214 - root - INFO - lr: 4.8331e-05 gnorm: 1.20 [ 3:16:53<21:33:18] +[titan] 2025-10-05 01:51:13,075 - root - INFO - step: 5290 loss: 2.7457 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4189 +[titan] 2025-10-05 01:51:13,075 - root - INFO - lr: 4.8327e-05 gnorm: 1.25 [ 3:17:04<21:33:05] +[titan] 2025-10-05 01:51:23,938 - root - INFO - step: 5295 loss: 2.7299 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:51:23,938 - root - INFO - lr: 4.8324e-05 gnorm: 1.18 [ 3:17:15<21:32:51] +[titan] 2025-10-05 01:51:32,658 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:51:34,836 - root - INFO - step: 5300 loss: 2.7577 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4310 +[titan] 2025-10-05 01:51:34,836 - root - INFO - lr: 4.8321e-05 gnorm: 1.27 [ 3:17:26<21:32:38] +[titan] 2025-10-05 01:51:45,732 - root - INFO - step: 5305 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.4411 +[titan] 2025-10-05 01:51:45,732 - root - INFO - lr: 4.8317e-05 gnorm: 1.28 [ 3:17:37<21:32:25] +[titan] 2025-10-05 01:51:56,598 - root - INFO - step: 5310 loss: 2.6649 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3486 +[titan] 2025-10-05 01:51:56,598 - root - INFO - lr: 4.8314e-05 gnorm: 1.25 [ 3:17:47<21:32:12] +[titan] 2025-10-05 01:52:07,463 - root - INFO - step: 5315 loss: 2.6130 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3093 global_avg_mtp_loss: 2.3037 +[titan] 2025-10-05 01:52:07,463 - root - INFO - lr: 4.8311e-05 gnorm: 1.23 [ 3:17:58<21:31:59] +[titan] 2025-10-05 01:52:18,354 - root - INFO - step: 5320 loss: 2.7768 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4481 +[titan] 2025-10-05 01:52:18,354 - root - INFO - lr: 4.8307e-05 gnorm: 1.31 [ 3:18:09<21:31:46] +[titan] 2025-10-05 01:52:29,236 - root - INFO - step: 5325 loss: 2.7143 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3924 +[titan] 2025-10-05 01:52:29,236 - root - INFO - lr: 4.8304e-05 gnorm: 1.21 [ 3:18:20<21:31:33] +[titan] 2025-10-05 01:52:40,146 - root - INFO - step: 5330 loss: 2.7556 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4290 +[titan] 2025-10-05 01:52:40,146 - root - INFO - lr: 4.8300e-05 gnorm: 1.27 [ 3:18:31<21:31:20] +[titan] 2025-10-05 01:52:51,044 - root - INFO - step: 5335 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4168 +[titan] 2025-10-05 01:52:51,044 - root - INFO - lr: 4.8297e-05 gnorm: 1.26 [ 3:18:42<21:31:07] +[titan] 2025-10-05 01:53:01,911 - root - INFO - step: 5340 loss: 2.7097 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3206 global_avg_mtp_loss: 2.3891 +[titan] 2025-10-05 01:53:01,911 - root - INFO - lr: 4.8294e-05 gnorm: 1.30 [ 3:18:53<21:30:54] +[titan] 2025-10-05 01:53:12,786 - root - INFO - step: 5345 loss: 2.6651 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3501 +[titan] 2025-10-05 01:53:12,787 - root - INFO - lr: 4.8290e-05 gnorm: 1.21 [ 3:19:04<21:30:41] +[titan] 2025-10-05 01:53:21,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:53:23,680 - root - INFO - step: 5350 loss: 2.7279 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3226 global_avg_mtp_loss: 2.4053 +[titan] 2025-10-05 01:53:23,680 - root - INFO - lr: 4.8287e-05 gnorm: 1.24 [ 3:19:15<21:30:28] +[titan] 2025-10-05 01:53:34,600 - root - INFO - step: 5355 loss: 2.6227 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 01:53:34,600 - root - INFO - lr: 4.8283e-05 gnorm: 1.28 [ 3:19:25<21:30:15] +[titan] 2025-10-05 01:53:45,495 - root - INFO - step: 5360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4548 +[titan] 2025-10-05 01:53:45,495 - root - INFO - lr: 4.8280e-05 gnorm: 1.23 [ 3:19:36<21:30:02] +[titan] 2025-10-05 01:53:56,371 - root - INFO - step: 5365 loss: 2.7914 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4555 +[titan] 2025-10-05 01:53:56,372 - root - INFO - lr: 4.8276e-05 gnorm: 1.23 [ 3:19:47<21:29:49] +[titan] 2025-10-05 01:54:07,246 - root - INFO - step: 5370 loss: 2.6816 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3627 +[titan] 2025-10-05 01:54:07,246 - root - INFO - lr: 4.8273e-05 gnorm: 1.23 [ 3:19:58<21:29:36] +[titan] 2025-10-05 01:54:18,130 - root - INFO - step: 5375 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3272 +[titan] 2025-10-05 01:54:18,130 - root - INFO - lr: 4.8270e-05 gnorm: 1.27 [ 3:20:09<21:29:23] +[titan] 2025-10-05 01:54:28,973 - root - INFO - step: 5380 loss: 2.7116 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3899 +[titan] 2025-10-05 01:54:28,973 - root - INFO - lr: 4.8266e-05 gnorm: 1.23 [ 3:20:20<21:29:09] +[titan] 2025-10-05 01:54:39,864 - root - INFO - step: 5385 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3764 +[titan] 2025-10-05 01:54:39,864 - root - INFO - lr: 4.8263e-05 gnorm: 1.24 [ 3:20:31<21:28:56] +[titan] 2025-10-05 01:54:50,734 - root - INFO - step: 5390 loss: 2.7644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4366 +[titan] 2025-10-05 01:54:50,735 - root - INFO - lr: 4.8259e-05 gnorm: 1.25 [ 3:20:42<21:28:43] +[titan] 2025-10-05 01:55:01,593 - root - INFO - step: 5395 loss: 2.7603 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3272 global_avg_mtp_loss: 2.4331 +[titan] 2025-10-05 01:55:01,593 - root - INFO - lr: 4.8256e-05 gnorm: 1.21 [ 3:20:52<21:28:30] +[titan] 2025-10-05 01:55:10,273 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:55:12,472 - root - INFO - step: 5400 loss: 2.7045 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3833 +[titan] 2025-10-05 01:55:12,472 - root - INFO - lr: 4.8252e-05 gnorm: 1.20 [ 3:21:03<21:28:17] +[titan] 2025-10-05 01:55:23,346 - root - INFO - step: 5405 loss: 2.7062 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3213 global_avg_mtp_loss: 2.3849 +[titan] 2025-10-05 01:55:23,346 - root - INFO - lr: 4.8249e-05 gnorm: 1.20 [ 3:21:14<21:28:04] +[titan] 2025-10-05 01:55:34,207 - root - INFO - step: 5410 loss: 2.7345 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4118 +[titan] 2025-10-05 01:55:34,207 - root - INFO - lr: 4.8245e-05 gnorm: 1.26 [ 3:21:25<21:27:51] +[titan] 2025-10-05 01:55:45,114 - root - INFO - step: 5415 loss: 2.6787 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3618 +[titan] 2025-10-05 01:55:45,114 - root - INFO - lr: 4.8242e-05 gnorm: 1.16 [ 3:21:36<21:27:38] +[titan] 2025-10-05 01:55:55,985 - root - INFO - step: 5420 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.2994 +[titan] 2025-10-05 01:55:55,985 - root - INFO - lr: 4.8239e-05 gnorm: 1.23 [ 3:21:47<21:27:25] +[titan] 2025-10-05 01:56:06,858 - root - INFO - step: 5425 loss: 2.6262 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 01:56:06,858 - root - INFO - lr: 4.8235e-05 gnorm: 1.20 [ 3:21:58<21:27:12] +[titan] 2025-10-05 01:56:17,752 - root - INFO - step: 5430 loss: 2.6880 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3706 +[titan] 2025-10-05 01:56:17,752 - root - INFO - lr: 4.8232e-05 gnorm: 1.21 [ 3:22:09<21:26:59] +[titan] 2025-10-05 01:56:28,647 - root - INFO - step: 5435 loss: 2.6104 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 01:56:28,647 - root - INFO - lr: 4.8228e-05 gnorm: 1.24 [ 3:22:19<21:26:46] +[titan] 2025-10-05 01:56:39,549 - root - INFO - step: 5440 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3753 +[titan] 2025-10-05 01:56:39,549 - root - INFO - lr: 4.8225e-05 gnorm: 1.24 [ 3:22:30<21:26:33] +[titan] 2025-10-05 01:56:50,425 - root - INFO - step: 5445 loss: 2.7005 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 01:56:50,426 - root - INFO - lr: 4.8221e-05 gnorm: 1.24 [ 3:22:41<21:26:20] +[titan] 2025-10-05 01:56:59,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:57:01,307 - root - INFO - step: 5450 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3211 global_avg_mtp_loss: 2.3942 +[titan] 2025-10-05 01:57:01,307 - root - INFO - lr: 4.8218e-05 gnorm: 1.22 [ 3:22:52<21:26:07] +[titan] 2025-10-05 01:57:12,168 - root - INFO - step: 5455 loss: 2.7238 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4003 +[titan] 2025-10-05 01:57:12,168 - root - INFO - lr: 4.8214e-05 gnorm: 1.25 [ 3:23:03<21:25:54] +[titan] 2025-10-05 01:57:23,004 - root - INFO - step: 5460 loss: 2.7013 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 01:57:23,004 - root - INFO - lr: 4.8211e-05 gnorm: 1.21 [ 3:23:14<21:25:41] +[titan] 2025-10-05 01:57:33,870 - root - INFO - step: 5465 loss: 2.7566 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:57:33,870 - root - INFO - lr: 4.8207e-05 gnorm: 1.28 [ 3:23:25<21:25:28] +[titan] 2025-10-05 01:57:44,735 - root - INFO - step: 5470 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 01:57:44,736 - root - INFO - lr: 4.8204e-05 gnorm: 3.95 [ 3:23:36<21:25:15] +[titan] 2025-10-05 01:57:55,597 - root - INFO - step: 5475 loss: 2.7332 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4064 +[titan] 2025-10-05 01:57:55,598 - root - INFO - lr: 4.8200e-05 gnorm: 5.60 [ 3:23:46<21:25:02] +[titan] 2025-10-05 01:58:06,457 - root - INFO - step: 5480 loss: 2.6333 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3196 +[titan] 2025-10-05 01:58:06,457 - root - INFO - lr: 4.8197e-05 gnorm: 1.42 [ 3:23:57<21:24:49] +[titan] 2025-10-05 01:58:17,326 - root - INFO - step: 5485 loss: 2.6808 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3195 global_avg_mtp_loss: 2.3613 +[titan] 2025-10-05 01:58:17,327 - root - INFO - lr: 4.8193e-05 gnorm: 1.64 [ 3:24:08<21:24:35] +[titan] 2025-10-05 01:58:28,172 - root - INFO - step: 5490 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3789 +[titan] 2025-10-05 01:58:28,172 - root - INFO - lr: 4.8190e-05 gnorm: 1.44 [ 3:24:19<21:24:22] +[titan] 2025-10-05 01:58:39,061 - root - INFO - step: 5495 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:58:39,061 - root - INFO - lr: 4.8186e-05 gnorm: 1.37 [ 3:24:30<21:24:09] +[titan] 2025-10-05 01:58:47,780 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:58:49,967 - root - INFO - step: 5500 loss: 2.7427 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4165 +[titan] 2025-10-05 01:58:49,967 - root - INFO - lr: 4.8183e-05 gnorm: 1.30 [ 3:24:41<21:23:57] +[titan] 2025-10-05 01:59:00,823 - root - INFO - step: 5505 loss: 2.7373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4120 +[titan] 2025-10-05 01:59:00,823 - root - INFO - lr: 4.8179e-05 gnorm: 1.29 [ 3:24:52<21:23:44] +[titan] 2025-10-05 01:59:11,693 - root - INFO - step: 5510 loss: 2.6666 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3511 +[titan] 2025-10-05 01:59:11,693 - root - INFO - lr: 4.8176e-05 gnorm: 1.30 [ 3:25:03<21:23:30] +[titan] 2025-10-05 01:59:22,587 - root - INFO - step: 5515 loss: 2.7189 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.3938 +[titan] 2025-10-05 01:59:22,587 - root - INFO - lr: 4.8172e-05 gnorm: 6.71 [ 3:25:13<21:23:18] +[titan] 2025-10-05 01:59:33,471 - root - INFO - step: 5520 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3125 global_avg_mtp_loss: 2.3281 +[titan] 2025-10-05 01:59:33,471 - root - INFO - lr: 4.8169e-05 gnorm: 1.27 [ 3:25:24<21:23:05] +[titan] 2025-10-05 01:59:44,386 - root - INFO - step: 5525 loss: 2.6236 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3140 +[titan] 2025-10-05 01:59:44,386 - root - INFO - lr: 4.8165e-05 gnorm: 1.21 [ 3:25:35<21:22:52] +[titan] 2025-10-05 01:59:55,268 - root - INFO - step: 5530 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 01:59:55,268 - root - INFO - lr: 4.8162e-05 gnorm: 1.23 [ 3:25:46<21:22:39] +[titan] 2025-10-05 02:00:06,138 - root - INFO - step: 5535 loss: 2.6010 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3104 global_avg_mtp_loss: 2.2906 +[titan] 2025-10-05 02:00:06,139 - root - INFO - lr: 4.8158e-05 gnorm: 1.28 [ 3:25:57<21:22:26] +[titan] 2025-10-05 02:00:17,012 - root - INFO - step: 5540 loss: 2.6903 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 02:00:17,013 - root - INFO - lr: 4.8155e-05 gnorm: 1.28 [ 3:26:08<21:22:13] +[titan] 2025-10-05 02:00:27,882 - root - INFO - step: 5545 loss: 2.6624 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3482 +[titan] 2025-10-05 02:00:27,882 - root - INFO - lr: 4.8151e-05 gnorm: 1.25 [ 3:26:19<21:22:00] +[titan] 2025-10-05 02:00:36,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:00:38,754 - root - INFO - step: 5550 loss: 2.6437 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3307 +[titan] 2025-10-05 02:00:38,754 - root - INFO - lr: 4.8147e-05 gnorm: 1.23 [ 3:26:30<21:21:47] +[titan] 2025-10-05 02:00:49,688 - root - INFO - step: 5555 loss: 2.6840 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:00:49,689 - root - INFO - lr: 4.8144e-05 gnorm: 1.21 [ 3:26:40<21:21:35] +[titan] 2025-10-05 02:01:00,569 - root - INFO - step: 5560 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3569 +[titan] 2025-10-05 02:01:00,569 - root - INFO - lr: 4.8140e-05 gnorm: 1.21 [ 3:26:51<21:21:22] +[titan] 2025-10-05 02:01:11,488 - root - INFO - step: 5565 loss: 2.6609 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 02:01:11,488 - root - INFO - lr: 4.8137e-05 gnorm: 1.24 [ 3:27:02<21:21:09] +[titan] 2025-10-05 02:01:22,384 - root - INFO - step: 5570 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.3978 +[titan] 2025-10-05 02:01:22,384 - root - INFO - lr: 4.8133e-05 gnorm: 1.26 [ 3:27:13<21:20:56] +[titan] 2025-10-05 02:01:33,286 - root - INFO - step: 5575 loss: 2.6770 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3597 +[titan] 2025-10-05 02:01:33,287 - root - INFO - lr: 4.8130e-05 gnorm: 1.23 [ 3:27:24<21:20:43] +[titan] 2025-10-05 02:01:44,187 - root - INFO - step: 5580 loss: 2.6684 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.3160 global_avg_mtp_loss: 2.3524 +[titan] 2025-10-05 02:01:44,187 - root - INFO - lr: 4.8126e-05 gnorm: 1.22 [ 3:27:35<21:20:31] +[titan] 2025-10-05 02:01:55,071 - root - INFO - step: 5585 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3821 +[titan] 2025-10-05 02:01:55,072 - root - INFO - lr: 4.8123e-05 gnorm: 1.23 [ 3:27:46<21:20:18] +[titan] 2025-10-05 02:02:05,953 - root - INFO - step: 5590 loss: 2.7020 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 02:02:05,954 - root - INFO - lr: 4.8119e-05 gnorm: 1.29 [ 3:27:57<21:20:05] +[titan] 2025-10-05 02:02:16,866 - root - INFO - step: 5595 loss: 2.6621 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3458 +[titan] 2025-10-05 02:02:16,866 - root - INFO - lr: 4.8115e-05 gnorm: 1.25 [ 3:28:08<21:19:52] +[titan] 2025-10-05 02:02:25,553 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:02:27,738 - root - INFO - step: 5600 loss: 2.7026 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3818 +[titan] 2025-10-05 02:02:27,739 - root - INFO - lr: 4.8112e-05 gnorm: 1.26 [ 3:28:19<21:19:39] +[titan] 2025-10-05 02:02:38,604 - root - INFO - step: 5605 loss: 2.6192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:02:38,604 - root - INFO - lr: 4.8108e-05 gnorm: 1.25 [ 3:28:29<21:19:26] +[titan] 2025-10-05 02:02:49,527 - root - INFO - step: 5610 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3444 +[titan] 2025-10-05 02:02:49,527 - root - INFO - lr: 4.8105e-05 gnorm: 1.32 [ 3:28:40<21:19:14] +[titan] 2025-10-05 02:03:00,407 - root - INFO - step: 5615 loss: 2.6727 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3552 +[titan] 2025-10-05 02:03:00,407 - root - INFO - lr: 4.8101e-05 gnorm: 1.19 [ 3:28:51<21:19:01] +[titan] 2025-10-05 02:03:11,293 - root - INFO - step: 5620 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3098 +[titan] 2025-10-05 02:03:11,293 - root - INFO - lr: 4.8097e-05 gnorm: 1.24 [ 3:29:02<21:18:48] +[titan] 2025-10-05 02:03:22,216 - root - INFO - step: 5625 loss: 2.6235 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 02:03:22,216 - root - INFO - lr: 4.8094e-05 gnorm: 1.21 [ 3:29:13<21:18:35] +[titan] 2025-10-05 02:03:33,165 - root - INFO - step: 5630 loss: 2.7089 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3881 +[titan] 2025-10-05 02:03:33,165 - root - INFO - lr: 4.8090e-05 gnorm: 1.27 [ 3:29:24<21:18:23] +[titan] 2025-10-05 02:03:37,689 - root - INFO - Dumping profiler traces at step 5632 +[titan] 2025-10-05 02:03:37,726 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:03:44,274 - root - INFO - step: 5635 loss: 2.6796 memory: 118.84GiB(85.28%) tps: 29,497 tflops: 409.23 mfu: 41.38% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3615 +[titan] 2025-10-05 02:03:44,274 - root - INFO - lr: 4.8087e-05 gnorm: 1.25 [ 3:29:35<21:18:12] +[titan] 2025-10-05 02:03:55,158 - root - INFO - step: 5640 loss: 2.6061 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3075 global_avg_mtp_loss: 2.2987 +[titan] 2025-10-05 02:03:55,158 - root - INFO - lr: 4.8083e-05 gnorm: 1.25 [ 3:29:46<21:17:59] +[titan] 2025-10-05 02:04:06,053 - root - INFO - step: 5645 loss: 2.7125 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3908 +[titan] 2025-10-05 02:04:06,053 - root - INFO - lr: 4.8079e-05 gnorm: 1.34 [ 3:29:57<21:17:46] +[titan] 2025-10-05 02:04:14,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:04:16,937 - root - INFO - step: 5650 loss: 2.5977 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:04:16,937 - root - INFO - lr: 4.8076e-05 gnorm: 1.27 [ 3:30:08<21:17:33] +[titan] 2025-10-05 02:04:27,853 - root - INFO - step: 5655 loss: 2.6416 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3299 +[titan] 2025-10-05 02:04:27,854 - root - INFO - lr: 4.8072e-05 gnorm: 1.30 [ 3:30:19<21:17:20] +[titan] 2025-10-05 02:04:38,772 - root - INFO - step: 5660 loss: 2.7230 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 02:04:38,772 - root - INFO - lr: 4.8069e-05 gnorm: 1.24 [ 3:30:30<21:17:08] +[titan] 2025-10-05 02:04:49,685 - root - INFO - step: 5665 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3829 +[titan] 2025-10-05 02:04:49,685 - root - INFO - lr: 4.8065e-05 gnorm: 1.26 [ 3:30:40<21:16:55] +[titan] 2025-10-05 02:05:00,577 - root - INFO - step: 5670 loss: 2.6274 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3178 +[titan] 2025-10-05 02:05:00,577 - root - INFO - lr: 4.8061e-05 gnorm: 1.25 [ 3:30:51<21:16:42] +[titan] 2025-10-05 02:05:11,454 - root - INFO - step: 5675 loss: 2.6289 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:11,454 - root - INFO - lr: 4.8058e-05 gnorm: 1.22 [ 3:31:02<21:16:30] +[titan] 2025-10-05 02:05:22,325 - root - INFO - step: 5680 loss: 2.7071 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3216 global_avg_mtp_loss: 2.3854 +[titan] 2025-10-05 02:05:22,325 - root - INFO - lr: 4.8054e-05 gnorm: 1.24 [ 3:31:13<21:16:17] +[titan] 2025-10-05 02:05:33,190 - root - INFO - step: 5685 loss: 2.6647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3151 global_avg_mtp_loss: 2.3496 +[titan] 2025-10-05 02:05:33,190 - root - INFO - lr: 4.8051e-05 gnorm: 1.25 [ 3:31:24<21:16:04] +[titan] 2025-10-05 02:05:44,079 - root - INFO - step: 5690 loss: 2.6318 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3204 +[titan] 2025-10-05 02:05:44,079 - root - INFO - lr: 4.8047e-05 gnorm: 1.20 [ 3:31:35<21:15:51] +[titan] 2025-10-05 02:05:54,989 - root - INFO - step: 5695 loss: 2.6284 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:54,989 - root - INFO - lr: 4.8043e-05 gnorm: 1.18 [ 3:31:46<21:15:38] +[titan] 2025-10-05 02:06:03,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:06:05,856 - root - INFO - step: 5700 loss: 2.6425 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:06:05,856 - root - INFO - lr: 4.8040e-05 gnorm: 1.17 [ 3:31:57<21:15:25] +[titan] 2025-10-05 02:06:16,739 - root - INFO - step: 5705 loss: 2.6825 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3176 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:06:16,740 - root - INFO - lr: 4.8036e-05 gnorm: 1.21 [ 3:32:08<21:15:13] +[titan] 2025-10-05 02:06:27,613 - root - INFO - step: 5710 loss: 2.7487 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 02:06:27,613 - root - INFO - lr: 4.8032e-05 gnorm: 1.24 [ 3:32:18<21:15:00] +[titan] 2025-10-05 02:06:38,482 - root - INFO - step: 5715 loss: 2.6692 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3542 +[titan] 2025-10-05 02:06:38,482 - root - INFO - lr: 4.8029e-05 gnorm: 1.27 [ 3:32:29<21:14:47] +[titan] 2025-10-05 02:06:49,408 - root - INFO - step: 5720 loss: 2.6745 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3170 global_avg_mtp_loss: 2.3576 +[titan] 2025-10-05 02:06:49,408 - root - INFO - lr: 4.8025e-05 gnorm: 1.21 [ 3:32:40<21:14:34] +[titan] 2025-10-05 02:07:00,305 - root - INFO - step: 5725 loss: 2.6145 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3063 +[titan] 2025-10-05 02:07:00,305 - root - INFO - lr: 4.8021e-05 gnorm: 1.25 [ 3:32:51<21:14:22] +[titan] 2025-10-05 02:07:11,183 - root - INFO - step: 5730 loss: 2.6939 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3752 +[titan] 2025-10-05 02:07:11,183 - root - INFO - lr: 4.8018e-05 gnorm: 1.27 [ 3:33:02<21:14:09] +[titan] 2025-10-05 02:07:22,045 - root - INFO - step: 5735 loss: 2.6083 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3002 +[titan] 2025-10-05 02:07:22,046 - root - INFO - lr: 4.8014e-05 gnorm: 1.28 [ 3:33:13<21:13:56] +[titan] 2025-10-05 02:07:32,920 - root - INFO - step: 5740 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3073 global_avg_mtp_loss: 2.2892 +[titan] 2025-10-05 02:07:32,920 - root - INFO - lr: 4.8010e-05 gnorm: 1.17 [ 3:33:24<21:13:43] +[titan] 2025-10-05 02:07:43,786 - root - INFO - step: 5745 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:07:43,786 - root - INFO - lr: 4.8007e-05 gnorm: 1.24 [ 3:33:35<21:13:30] +[titan] 2025-10-05 02:07:52,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:07:54,774 - root - INFO - step: 5750 loss: 2.6142 memory: 118.84GiB(85.28%) tps: 29,821 tflops: 413.73 mfu: 41.83% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 02:07:54,774 - root - INFO - lr: 4.8003e-05 gnorm: 1.24 [ 3:33:46<21:13:18] +[titan] 2025-10-05 02:08:05,686 - root - INFO - step: 5755 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3384 +[titan] 2025-10-05 02:08:05,686 - root - INFO - lr: 4.7999e-05 gnorm: 1.20 [ 3:33:56<21:13:06] +[titan] 2025-10-05 02:08:16,606 - root - INFO - step: 5760 loss: 2.7255 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4025 +[titan] 2025-10-05 02:08:16,606 - root - INFO - lr: 4.7996e-05 gnorm: 1.24 [ 3:34:07<21:12:53] +[titan] 2025-10-05 02:08:27,489 - root - INFO - step: 5765 loss: 2.6698 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3144 global_avg_mtp_loss: 2.3553 +[titan] 2025-10-05 02:08:27,489 - root - INFO - lr: 4.7992e-05 gnorm: 1.25 [ 3:34:18<21:12:40] +[titan] 2025-10-05 02:08:38,371 - root - INFO - step: 5770 loss: 2.7107 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 02:08:38,371 - root - INFO - lr: 4.7988e-05 gnorm: 1.26 [ 3:34:29<21:12:28] +[titan] 2025-10-05 02:08:49,290 - root - INFO - step: 5775 loss: 2.7046 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3203 global_avg_mtp_loss: 2.3843 +[titan] 2025-10-05 02:08:49,291 - root - INFO - lr: 4.7985e-05 gnorm: 1.28 [ 3:34:40<21:12:15] +[titan] 2025-10-05 02:09:00,170 - root - INFO - step: 5780 loss: 2.7717 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4437 +[titan] 2025-10-05 02:09:00,170 - root - INFO - lr: 4.7981e-05 gnorm: 1.66 [ 3:34:51<21:12:02] +[titan] 2025-10-05 02:09:11,065 - root - INFO - step: 5785 loss: 2.6598 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3451 +[titan] 2025-10-05 02:09:11,066 - root - INFO - lr: 4.7977e-05 gnorm: 1.28 [ 3:35:02<21:11:50] +[titan] 2025-10-05 02:09:21,936 - root - INFO - step: 5790 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3077 +[titan] 2025-10-05 02:09:21,936 - root - INFO - lr: 4.7973e-05 gnorm: 1.24 [ 3:35:13<21:11:37] +[titan] 2025-10-05 02:09:32,809 - root - INFO - step: 5795 loss: 2.6803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3637 +[titan] 2025-10-05 02:09:32,809 - root - INFO - lr: 4.7970e-05 gnorm: 1.27 [ 3:35:24<21:11:24] +[titan] 2025-10-05 02:09:41,490 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:09:43,680 - root - INFO - step: 5800 loss: 2.6313 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3124 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:09:43,680 - root - INFO - lr: 4.7966e-05 gnorm: 1.25 [ 3:35:34<21:11:11] +[titan] 2025-10-05 02:09:54,628 - root - INFO - step: 5805 loss: 2.6182 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.3088 +[titan] 2025-10-05 02:09:54,629 - root - INFO - lr: 4.7962e-05 gnorm: 1.26 [ 3:35:45<21:10:59] +[titan] 2025-10-05 02:10:05,480 - root - INFO - step: 5810 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 02:10:05,481 - root - INFO - lr: 4.7959e-05 gnorm: 1.28 [ 3:35:56<21:10:46] +[titan] 2025-10-05 02:10:16,374 - root - INFO - step: 5815 loss: 2.6620 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3479 +[titan] 2025-10-05 02:10:16,374 - root - INFO - lr: 4.7955e-05 gnorm: 1.22 [ 3:36:07<21:10:33] +[titan] 2025-10-05 02:10:27,283 - root - INFO - step: 5820 loss: 2.6968 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3191 global_avg_mtp_loss: 2.3777 +[titan] 2025-10-05 02:10:27,283 - root - INFO - lr: 4.7951e-05 gnorm: 1.21 [ 3:36:18<21:10:21] +[titan] 2025-10-05 02:10:38,152 - root - INFO - step: 5825 loss: 2.6399 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3263 +[titan] 2025-10-05 02:10:38,152 - root - INFO - lr: 4.7947e-05 gnorm: 1.26 [ 3:36:29<21:10:08] +[titan] 2025-10-05 02:10:49,018 - root - INFO - step: 5830 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3158 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:10:49,018 - root - INFO - lr: 4.7944e-05 gnorm: 1.27 [ 3:36:40<21:09:55] +[titan] 2025-10-05 02:10:59,943 - root - INFO - step: 5835 loss: 2.6687 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 02:10:59,943 - root - INFO - lr: 4.7940e-05 gnorm: 1.27 [ 3:36:51<21:09:43] +[titan] 2025-10-05 02:11:10,804 - root - INFO - step: 5840 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3646 +[titan] 2025-10-05 02:11:10,804 - root - INFO - lr: 4.7936e-05 gnorm: 1.23 [ 3:37:02<21:09:30] +[titan] 2025-10-05 02:11:21,663 - root - INFO - step: 5845 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:11:21,663 - root - INFO - lr: 4.7933e-05 gnorm: 1.22 [ 3:37:12<21:09:17] +[titan] 2025-10-05 02:11:30,368 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:11:32,557 - root - INFO - step: 5850 loss: 2.5946 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2879 +[titan] 2025-10-05 02:11:32,557 - root - INFO - lr: 4.7929e-05 gnorm: 1.24 [ 3:37:23<21:09:04] +[titan] 2025-10-05 02:11:43,442 - root - INFO - step: 5855 loss: 2.6553 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3404 +[titan] 2025-10-05 02:11:43,442 - root - INFO - lr: 4.7925e-05 gnorm: 1.31 [ 3:37:34<21:08:52] +[titan] 2025-10-05 02:11:54,344 - root - INFO - step: 5860 loss: 2.6942 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3769 +[titan] 2025-10-05 02:11:54,344 - root - INFO - lr: 4.7921e-05 gnorm: 1.23 [ 3:37:45<21:08:39] +[titan] 2025-10-05 02:12:05,223 - root - INFO - step: 5865 loss: 2.5612 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3011 global_avg_mtp_loss: 2.2601 +[titan] 2025-10-05 02:12:05,223 - root - INFO - lr: 4.7918e-05 gnorm: 1.19 [ 3:37:56<21:08:26] +[titan] 2025-10-05 02:12:16,102 - root - INFO - step: 5870 loss: 2.6730 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3578 +[titan] 2025-10-05 02:12:16,102 - root - INFO - lr: 4.7914e-05 gnorm: 1.22 [ 3:38:07<21:08:14] +[titan] 2025-10-05 02:12:26,998 - root - INFO - step: 5875 loss: 2.7092 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3873 +[titan] 2025-10-05 02:12:26,998 - root - INFO - lr: 4.7910e-05 gnorm: 1.27 [ 3:38:18<21:08:01] +[titan] 2025-10-05 02:12:37,886 - root - INFO - step: 5880 loss: 2.6639 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3140 global_avg_mtp_loss: 2.3499 +[titan] 2025-10-05 02:12:37,886 - root - INFO - lr: 4.7906e-05 gnorm: 1.23 [ 3:38:29<21:07:48] +[titan] 2025-10-05 02:12:48,782 - root - INFO - step: 5885 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3282 +[titan] 2025-10-05 02:12:48,782 - root - INFO - lr: 4.7903e-05 gnorm: 1.23 [ 3:38:40<21:07:36] +[titan] 2025-10-05 02:12:59,686 - root - INFO - step: 5890 loss: 2.6332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3225 +[titan] 2025-10-05 02:12:59,686 - root - INFO - lr: 4.7899e-05 gnorm: 1.23 [ 3:38:50<21:07:23] +[titan] 2025-10-05 02:13:10,552 - root - INFO - step: 5895 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3779 +[titan] 2025-10-05 02:13:10,552 - root - INFO - lr: 4.7895e-05 gnorm: 1.20 [ 3:39:01<21:07:10] +[titan] 2025-10-05 02:13:19,229 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:13:21,417 - root - INFO - step: 5900 loss: 2.6773 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3598 +[titan] 2025-10-05 02:13:21,418 - root - INFO - lr: 4.7891e-05 gnorm: 1.21 [ 3:39:12<21:06:58] +[titan] 2025-10-05 02:13:32,300 - root - INFO - step: 5905 loss: 2.6413 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3290 +[titan] 2025-10-05 02:13:32,300 - root - INFO - lr: 4.7888e-05 gnorm: 1.21 [ 3:39:23<21:06:45] +[titan] 2025-10-05 02:13:43,183 - root - INFO - step: 5910 loss: 2.7061 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3853 +[titan] 2025-10-05 02:13:43,184 - root - INFO - lr: 4.7884e-05 gnorm: 1.23 [ 3:39:34<21:06:32] +[titan] 2025-10-05 02:13:54,153 - root - INFO - step: 5915 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 02:13:54,153 - root - INFO - lr: 4.7880e-05 gnorm: 1.20 [ 3:39:45<21:06:20] +[titan] 2025-10-05 02:14:05,035 - root - INFO - step: 5920 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3748 +[titan] 2025-10-05 02:14:05,035 - root - INFO - lr: 4.7876e-05 gnorm: 1.23 [ 3:39:56<21:06:07] +[titan] 2025-10-05 02:14:15,930 - root - INFO - step: 5925 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 02:14:15,930 - root - INFO - lr: 4.7872e-05 gnorm: 1.31 [ 3:40:07<21:05:55] +[titan] 2025-10-05 02:14:26,810 - root - INFO - step: 5930 loss: 2.5791 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 02:14:26,810 - root - INFO - lr: 4.7869e-05 gnorm: 1.28 [ 3:40:18<21:05:42] +[titan] 2025-10-05 02:14:37,679 - root - INFO - step: 5935 loss: 2.8206 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3526 global_avg_mtp_loss: 2.4680 +[titan] 2025-10-05 02:14:37,679 - root - INFO - lr: 4.7865e-05 gnorm: 1.21 [ 3:40:28<21:05:29] +[titan] 2025-10-05 02:14:48,570 - root - INFO - step: 5940 loss: 2.6562 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3137 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:14:48,571 - root - INFO - lr: 4.7861e-05 gnorm: 1.27 [ 3:40:39<21:05:17] +[titan] 2025-10-05 02:14:59,517 - root - INFO - step: 5945 loss: 2.6955 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3188 global_avg_mtp_loss: 2.3767 +[titan] 2025-10-05 02:14:59,518 - root - INFO - lr: 4.7857e-05 gnorm: 1.24 [ 3:40:50<21:05:05] +[titan] 2025-10-05 02:15:08,209 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:15:10,403 - root - INFO - step: 5950 loss: 2.6441 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3332 +[titan] 2025-10-05 02:15:10,403 - root - INFO - lr: 4.7853e-05 gnorm: 1.24 [ 3:41:01<21:04:52] +[titan] 2025-10-05 02:15:21,261 - root - INFO - step: 5955 loss: 2.6351 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3224 +[titan] 2025-10-05 02:15:21,261 - root - INFO - lr: 4.7850e-05 gnorm: 1.27 [ 3:41:12<21:04:39] +[titan] 2025-10-05 02:15:32,145 - root - INFO - step: 5960 loss: 2.5704 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2660 +[titan] 2025-10-05 02:15:32,146 - root - INFO - lr: 4.7846e-05 gnorm: 1.24 [ 3:41:23<21:04:27] +[titan] 2025-10-05 02:15:43,038 - root - INFO - step: 5965 loss: 2.6451 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.3134 global_avg_mtp_loss: 2.3317 +[titan] 2025-10-05 02:15:43,038 - root - INFO - lr: 4.7842e-05 gnorm: 1.24 [ 3:41:34<21:04:14] +[titan] 2025-10-05 02:15:53,932 - root - INFO - step: 5970 loss: 2.6446 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:15:53,932 - root - INFO - lr: 4.7838e-05 gnorm: 1.25 [ 3:41:45<21:04:01] +[titan] 2025-10-05 02:16:04,943 - root - INFO - step: 5975 loss: 2.6984 memory: 118.84GiB(85.28%) tps: 29,760 tflops: 412.88 mfu: 41.75% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 02:16:04,943 - root - INFO - lr: 4.7834e-05 gnorm: 1.22 [ 3:41:56<21:03:49] +[titan] 2025-10-05 02:16:15,864 - root - INFO - step: 5980 loss: 2.6883 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3697 +[titan] 2025-10-05 02:16:15,864 - root - INFO - lr: 4.7831e-05 gnorm: 1.23 [ 3:42:07<21:03:37] +[titan] 2025-10-05 02:16:26,743 - root - INFO - step: 5985 loss: 2.6999 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3205 global_avg_mtp_loss: 2.3795 +[titan] 2025-10-05 02:16:26,743 - root - INFO - lr: 4.7827e-05 gnorm: 1.25 [ 3:42:18<21:03:24] +[titan] 2025-10-05 02:16:37,616 - root - INFO - step: 5990 loss: 2.6514 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3368 +[titan] 2025-10-05 02:16:37,616 - root - INFO - lr: 4.7823e-05 gnorm: 1.24 [ 3:42:28<21:03:12] +[titan] 2025-10-05 02:16:48,504 - root - INFO - step: 5995 loss: 2.6633 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3143 global_avg_mtp_loss: 2.3490 +[titan] 2025-10-05 02:16:48,504 - root - INFO - lr: 4.7819e-05 gnorm: 1.24 [ 3:42:39<21:02:59] +[titan] 2025-10-05 02:16:57,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:16:59,424 - root - INFO - step: 6000 loss: 2.7331 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4096 +[titan] 2025-10-05 02:16:59,424 - root - INFO - lr: 4.7815e-05 gnorm: 1.20 [ 3:42:50<21:02:47] +[titan] 2025-10-05 02:17:10,295 - root - INFO - step: 6005 loss: 2.6202 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3105 +[titan] 2025-10-05 02:17:10,295 - root - INFO - lr: 4.7811e-05 gnorm: 1.18 [ 3:43:01<21:02:34] +[titan] 2025-10-05 02:17:21,201 - root - INFO - step: 6010 loss: 2.5634 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2608 +[titan] 2025-10-05 02:17:21,201 - root - INFO - lr: 4.7808e-05 gnorm: 1.22 [ 3:43:12<21:02:22] +[titan] 2025-10-05 02:17:32,082 - root - INFO - step: 6015 loss: 2.6412 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3292 +[titan] 2025-10-05 02:17:32,082 - root - INFO - lr: 4.7804e-05 gnorm: 1.19 [ 3:43:23<21:02:09] +[titan] 2025-10-05 02:17:42,964 - root - INFO - step: 6020 loss: 2.7137 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3909 +[titan] 2025-10-05 02:17:42,964 - root - INFO - lr: 4.7800e-05 gnorm: 1.23 [ 3:43:34<21:01:56] +[titan] 2025-10-05 02:17:53,873 - root - INFO - step: 6025 loss: 2.6409 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3232 +[titan] 2025-10-05 02:17:53,873 - root - INFO - lr: 4.7796e-05 gnorm: 1.20 [ 3:43:45<21:01:44] +[titan] 2025-10-05 02:18:04,793 - root - INFO - step: 6030 loss: 2.6673 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3165 global_avg_mtp_loss: 2.3508 +[titan] 2025-10-05 02:18:04,793 - root - INFO - lr: 4.7792e-05 gnorm: 1.27 [ 3:43:56<21:01:31] +[titan] 2025-10-05 02:18:15,648 - root - INFO - step: 6035 loss: 2.5627 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2603 +[titan] 2025-10-05 02:18:15,648 - root - INFO - lr: 4.7788e-05 gnorm: 1.20 [ 3:44:06<21:01:19] +[titan] 2025-10-05 02:18:26,520 - root - INFO - step: 6040 loss: 2.6300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:18:26,520 - root - INFO - lr: 4.7784e-05 gnorm: 1.19 [ 3:44:17<21:01:06] +[titan] 2025-10-05 02:18:37,421 - root - INFO - step: 6045 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2925 +[titan] 2025-10-05 02:18:37,421 - root - INFO - lr: 4.7781e-05 gnorm: 1.23 [ 3:44:28<21:00:54] +[titan] 2025-10-05 02:18:46,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:18:48,292 - root - INFO - step: 6050 loss: 2.6234 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3122 +[titan] 2025-10-05 02:18:48,292 - root - INFO - lr: 4.7777e-05 gnorm: 1.22 [ 3:44:39<21:00:41] +[titan] 2025-10-05 02:18:59,214 - root - INFO - step: 6055 loss: 2.7909 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3399 global_avg_mtp_loss: 2.4510 +[titan] 2025-10-05 02:18:59,214 - root - INFO - lr: 4.7773e-05 gnorm: 1.28 [ 3:44:50<21:00:29] +[titan] 2025-10-05 02:19:10,081 - root - INFO - step: 6060 loss: 2.7169 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 02:19:10,081 - root - INFO - lr: 4.7769e-05 gnorm: 1.19 [ 3:45:01<21:00:16] +[titan] 2025-10-05 02:19:20,960 - root - INFO - step: 6065 loss: 2.5899 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3057 global_avg_mtp_loss: 2.2843 +[titan] 2025-10-05 02:19:20,960 - root - INFO - lr: 4.7765e-05 gnorm: 1.20 [ 3:45:12<21:00:03] +[titan] 2025-10-05 02:19:31,815 - root - INFO - step: 6070 loss: 2.5974 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2900 +[titan] 2025-10-05 02:19:31,815 - root - INFO - lr: 4.7761e-05 gnorm: 1.19 [ 3:45:23<20:59:51] +[titan] 2025-10-05 02:19:42,704 - root - INFO - step: 6075 loss: 2.5388 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2387 +[titan] 2025-10-05 02:19:42,704 - root - INFO - lr: 4.7757e-05 gnorm: 1.24 [ 3:45:33<20:59:38] +[titan] 2025-10-05 02:19:53,571 - root - INFO - step: 6080 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2553 +[titan] 2025-10-05 02:19:53,571 - root - INFO - lr: 4.7753e-05 gnorm: 1.24 [ 3:45:44<20:59:25] +[titan] 2025-10-05 02:20:04,484 - root - INFO - step: 6085 loss: 2.6574 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 02:20:04,484 - root - INFO - lr: 4.7750e-05 gnorm: 1.25 [ 3:45:55<20:59:13] +[titan] 2025-10-05 02:20:15,352 - root - INFO - step: 6090 loss: 2.6004 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2933 +[titan] 2025-10-05 02:20:15,352 - root - INFO - lr: 4.7746e-05 gnorm: 1.29 [ 3:46:06<20:59:00] +[titan] 2025-10-05 02:20:26,230 - root - INFO - step: 6095 loss: 2.6515 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:20:26,230 - root - INFO - lr: 4.7742e-05 gnorm: 1.25 [ 3:46:17<20:58:48] +[titan] 2025-10-05 02:20:34,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:20:37,079 - root - INFO - step: 6100 loss: 2.6900 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3703 +[titan] 2025-10-05 02:20:37,079 - root - INFO - lr: 4.7738e-05 gnorm: 1.19 [ 3:46:28<20:58:35] +[titan] 2025-10-05 02:20:47,995 - root - INFO - step: 6105 loss: 2.7058 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3851 +[titan] 2025-10-05 02:20:47,995 - root - INFO - lr: 4.7734e-05 gnorm: 1.26 [ 3:46:39<20:58:23] +[titan] 2025-10-05 02:20:58,928 - root - INFO - step: 6110 loss: 2.6693 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3537 +[titan] 2025-10-05 02:20:58,928 - root - INFO - lr: 4.7730e-05 gnorm: 1.27 [ 3:46:50<20:58:10] +[titan] 2025-10-05 02:21:09,804 - root - INFO - step: 6115 loss: 2.5456 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:21:09,804 - root - INFO - lr: 4.7726e-05 gnorm: 1.13 [ 3:47:01<20:57:58] +[titan] 2025-10-05 02:21:20,686 - root - INFO - step: 6120 loss: 2.6377 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3244 +[titan] 2025-10-05 02:21:20,686 - root - INFO - lr: 4.7722e-05 gnorm: 1.17 [ 3:47:11<20:57:45] +[titan] 2025-10-05 02:21:31,544 - root - INFO - step: 6125 loss: 2.5803 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:21:31,544 - root - INFO - lr: 4.7718e-05 gnorm: 1.19 [ 3:47:22<20:57:33] +[titan] 2025-10-05 02:21:42,406 - root - INFO - step: 6130 loss: 2.6986 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3790 +[titan] 2025-10-05 02:21:42,406 - root - INFO - lr: 4.7714e-05 gnorm: 1.30 [ 3:47:33<20:57:20] +[titan] 2025-10-05 02:21:53,244 - root - INFO - step: 6135 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:21:53,244 - root - INFO - lr: 4.7710e-05 gnorm: 1.24 [ 3:47:44<20:57:07] +[titan] 2025-10-05 02:22:04,175 - root - INFO - step: 6140 loss: 2.5814 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3040 global_avg_mtp_loss: 2.2775 +[titan] 2025-10-05 02:22:04,175 - root - INFO - lr: 4.7707e-05 gnorm: 1.23 [ 3:47:55<20:56:55] +[titan] 2025-10-05 02:22:13,101 - root - INFO - Dumping profiler traces at step 6144 +[titan] 2025-10-05 02:22:13,138 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:22:15,343 - root - INFO - step: 6145 loss: 2.6735 memory: 118.84GiB(85.28%) tps: 29,341 tflops: 407.07 mfu: 41.16% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3580 +[titan] 2025-10-05 02:22:15,343 - root - INFO - lr: 4.7703e-05 gnorm: 1.26 [ 3:48:06<20:56:44] +[titan] 2025-10-05 02:22:24,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:22:26,217 - root - INFO - step: 6150 loss: 2.6490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3135 global_avg_mtp_loss: 2.3355 +[titan] 2025-10-05 02:22:26,217 - root - INFO - lr: 4.7699e-05 gnorm: 1.24 [ 3:48:17<20:56:31] +[titan] 2025-10-05 02:22:37,096 - root - INFO - step: 6155 loss: 2.6463 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3340 +[titan] 2025-10-05 02:22:37,096 - root - INFO - lr: 4.7695e-05 gnorm: 1.18 [ 3:48:28<20:56:19] +[titan] 2025-10-05 02:22:47,962 - root - INFO - step: 6160 loss: 2.6975 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:22:47,962 - root - INFO - lr: 4.7691e-05 gnorm: 1.25 [ 3:48:39<20:56:06] +[titan] 2025-10-05 02:22:58,842 - root - INFO - step: 6165 loss: 2.6719 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 02:22:58,842 - root - INFO - lr: 4.7687e-05 gnorm: 1.27 [ 3:48:50<20:55:54] +[titan] 2025-10-05 02:23:09,781 - root - INFO - step: 6170 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3183 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:23:09,781 - root - INFO - lr: 4.7683e-05 gnorm: 1.18 [ 3:49:01<20:55:41] +[titan] 2025-10-05 02:23:20,657 - root - INFO - step: 6175 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2762 +[titan] 2025-10-05 02:23:20,657 - root - INFO - lr: 4.7679e-05 gnorm: 1.25 [ 3:49:11<20:55:29] +[titan] 2025-10-05 02:23:31,536 - root - INFO - step: 6180 loss: 2.6338 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3119 global_avg_mtp_loss: 2.3219 +[titan] 2025-10-05 02:23:31,536 - root - INFO - lr: 4.7675e-05 gnorm: 1.21 [ 3:49:22<20:55:16] +[titan] 2025-10-05 02:23:42,416 - root - INFO - step: 6185 loss: 2.6751 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3584 +[titan] 2025-10-05 02:23:42,416 - root - INFO - lr: 4.7671e-05 gnorm: 1.23 [ 3:49:33<20:55:04] +[titan] 2025-10-05 02:23:53,282 - root - INFO - step: 6190 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:23:53,282 - root - INFO - lr: 4.7667e-05 gnorm: 1.94 [ 3:49:44<20:54:51] +[titan] 2025-10-05 02:24:04,176 - root - INFO - step: 6195 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.3001 +[titan] 2025-10-05 02:24:04,177 - root - INFO - lr: 4.7663e-05 gnorm: 1.30 [ 3:49:55<20:54:39] +[titan] 2025-10-05 02:24:12,861 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:24:15,046 - root - INFO - step: 6200 loss: 2.6013 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2936 +[titan] 2025-10-05 02:24:15,047 - root - INFO - lr: 4.7659e-05 gnorm: 1.22 [ 3:50:06<20:54:26] +[titan] 2025-10-05 02:24:25,976 - root - INFO - step: 6205 loss: 2.6406 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:24:25,976 - root - INFO - lr: 4.7655e-05 gnorm: 1.21 [ 3:50:17<20:54:14] +[titan] 2025-10-05 02:24:36,842 - root - INFO - step: 6210 loss: 2.5418 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 02:24:36,842 - root - INFO - lr: 4.7651e-05 gnorm: 1.17 [ 3:50:28<20:54:01] +[titan] 2025-10-05 02:24:47,725 - root - INFO - step: 6215 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2744 +[titan] 2025-10-05 02:24:47,725 - root - INFO - lr: 4.7647e-05 gnorm: 1.20 [ 3:50:38<20:53:49] +[titan] 2025-10-05 02:24:58,595 - root - INFO - step: 6220 loss: 2.6116 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 02:24:58,595 - root - INFO - lr: 4.7643e-05 gnorm: 1.26 [ 3:50:49<20:53:36] +[titan] 2025-10-05 02:25:09,462 - root - INFO - step: 6225 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3158 +[titan] 2025-10-05 02:25:09,462 - root - INFO - lr: 4.7639e-05 gnorm: 1.26 [ 3:51:00<20:53:24] +[titan] 2025-10-05 02:25:20,338 - root - INFO - step: 6230 loss: 2.6316 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3207 +[titan] 2025-10-05 02:25:20,338 - root - INFO - lr: 4.7635e-05 gnorm: 1.26 [ 3:51:11<20:53:11] +[titan] 2025-10-05 02:25:31,243 - root - INFO - step: 6235 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:25:31,243 - root - INFO - lr: 4.7631e-05 gnorm: 1.24 [ 3:51:22<20:52:59] +[titan] 2025-10-05 02:25:42,123 - root - INFO - step: 6240 loss: 2.6737 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3161 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:25:42,123 - root - INFO - lr: 4.7627e-05 gnorm: 1.21 [ 3:51:33<20:52:46] +[titan] 2025-10-05 02:25:53,008 - root - INFO - step: 6245 loss: 2.6264 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3138 +[titan] 2025-10-05 02:25:53,008 - root - INFO - lr: 4.7623e-05 gnorm: 1.21 [ 3:51:44<20:52:34] +[titan] 2025-10-05 02:26:01,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:26:03,944 - root - INFO - step: 6250 loss: 2.6166 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 02:26:03,944 - root - INFO - lr: 4.7619e-05 gnorm: 1.24 [ 3:51:55<20:52:21] +[titan] 2025-10-05 02:26:14,837 - root - INFO - step: 6255 loss: 2.5876 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2818 +[titan] 2025-10-05 02:26:14,837 - root - INFO - lr: 4.7615e-05 gnorm: 1.18 [ 3:52:06<20:52:09] +[titan] 2025-10-05 02:26:25,726 - root - INFO - step: 6260 loss: 2.7070 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3866 +[titan] 2025-10-05 02:26:25,726 - root - INFO - lr: 4.7611e-05 gnorm: 1.20 [ 3:52:16<20:51:57] +[titan] 2025-10-05 02:26:36,613 - root - INFO - step: 6265 loss: 2.6830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3168 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:26:36,614 - root - INFO - lr: 4.7607e-05 gnorm: 1.22 [ 3:52:27<20:51:44] +[titan] 2025-10-05 02:26:47,499 - root - INFO - step: 6270 loss: 2.4995 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2939 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 02:26:47,500 - root - INFO - lr: 4.7603e-05 gnorm: 1.22 [ 3:52:38<20:51:32] +[titan] 2025-10-05 02:26:58,361 - root - INFO - step: 6275 loss: 2.5337 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2345 +[titan] 2025-10-05 02:26:58,361 - root - INFO - lr: 4.7599e-05 gnorm: 1.19 [ 3:52:49<20:51:19] +[titan] 2025-10-05 02:27:09,255 - root - INFO - step: 6280 loss: 2.5465 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 02:27:09,255 - root - INFO - lr: 4.7595e-05 gnorm: 1.18 [ 3:53:00<20:51:07] +[titan] 2025-10-05 02:27:20,123 - root - INFO - step: 6285 loss: 2.6725 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3560 +[titan] 2025-10-05 02:27:20,123 - root - INFO - lr: 4.7591e-05 gnorm: 1.25 [ 3:53:11<20:50:54] +[titan] 2025-10-05 02:27:30,985 - root - INFO - step: 6290 loss: 2.6086 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.2999 +[titan] 2025-10-05 02:27:30,986 - root - INFO - lr: 4.7587e-05 gnorm: 1.20 [ 3:53:22<20:50:42] +[titan] 2025-10-05 02:27:41,851 - root - INFO - step: 6295 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:27:41,851 - root - INFO - lr: 4.7583e-05 gnorm: 1.19 [ 3:53:33<20:50:29] +[titan] 2025-10-05 02:27:50,574 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:27:52,765 - root - INFO - step: 6300 loss: 2.6057 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2983 +[titan] 2025-10-05 02:27:52,765 - root - INFO - lr: 4.7579e-05 gnorm: 1.25 [ 3:53:44<20:50:17] +[titan] 2025-10-05 02:28:03,660 - root - INFO - step: 6305 loss: 2.6038 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3067 global_avg_mtp_loss: 2.2971 +[titan] 2025-10-05 02:28:03,661 - root - INFO - lr: 4.7575e-05 gnorm: 1.34 [ 3:53:54<20:50:04] +[titan] 2025-10-05 02:28:14,554 - root - INFO - step: 6310 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3056 +[titan] 2025-10-05 02:28:14,554 - root - INFO - lr: 4.7571e-05 gnorm: 1.26 [ 3:54:05<20:49:52] +[titan] 2025-10-05 02:28:25,460 - root - INFO - step: 6315 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 02:28:25,460 - root - INFO - lr: 4.7567e-05 gnorm: 1.30 [ 3:54:16<20:49:40] +[titan] 2025-10-05 02:28:36,327 - root - INFO - step: 6320 loss: 2.6294 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3180 +[titan] 2025-10-05 02:28:36,327 - root - INFO - lr: 4.7563e-05 gnorm: 1.20 [ 3:54:27<20:49:27] +[titan] 2025-10-05 02:28:47,212 - root - INFO - step: 6325 loss: 2.5971 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:28:47,212 - root - INFO - lr: 4.7559e-05 gnorm: 1.24 [ 3:54:38<20:49:15] +[titan] 2025-10-05 02:28:58,148 - root - INFO - step: 6330 loss: 2.5947 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2896 +[titan] 2025-10-05 02:28:58,148 - root - INFO - lr: 4.7555e-05 gnorm: 1.17 [ 3:54:49<20:49:03] +[titan] 2025-10-05 02:29:09,045 - root - INFO - step: 6335 loss: 2.6560 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3421 +[titan] 2025-10-05 02:29:09,045 - root - INFO - lr: 4.7551e-05 gnorm: 1.23 [ 3:55:00<20:48:50] +[titan] 2025-10-05 02:29:19,929 - root - INFO - step: 6340 loss: 2.5919 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2841 +[titan] 2025-10-05 02:29:19,929 - root - INFO - lr: 4.7547e-05 gnorm: 1.21 [ 3:55:11<20:48:38] +[titan] 2025-10-05 02:29:30,803 - root - INFO - step: 6345 loss: 2.6337 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3228 +[titan] 2025-10-05 02:29:30,803 - root - INFO - lr: 4.7543e-05 gnorm: 1.20 [ 3:55:22<20:48:25] +[titan] 2025-10-05 02:29:39,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:29:41,698 - root - INFO - step: 6350 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.2911 +[titan] 2025-10-05 02:29:41,698 - root - INFO - lr: 4.7539e-05 gnorm: 1.21 [ 3:55:32<20:48:13] +[titan] 2025-10-05 02:29:52,582 - root - INFO - step: 6355 loss: 2.5766 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2722 +[titan] 2025-10-05 02:29:52,582 - root - INFO - lr: 4.7535e-05 gnorm: 1.31 [ 3:55:43<20:48:00] +[titan] 2025-10-05 02:30:03,454 - root - INFO - step: 6360 loss: 2.6402 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3288 +[titan] 2025-10-05 02:30:03,454 - root - INFO - lr: 4.7531e-05 gnorm: 1.19 [ 3:55:54<20:47:48] +[titan] 2025-10-05 02:30:14,404 - root - INFO - step: 6365 loss: 2.5756 memory: 118.84GiB(85.28%) tps: 29,925 tflops: 415.16 mfu: 41.98% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2718 +[titan] 2025-10-05 02:30:14,405 - root - INFO - lr: 4.7527e-05 gnorm: 1.21 [ 3:56:05<20:47:36] +[titan] 2025-10-05 02:30:25,300 - root - INFO - step: 6370 loss: 2.6721 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3566 +[titan] 2025-10-05 02:30:25,301 - root - INFO - lr: 4.7523e-05 gnorm: 1.26 [ 3:56:16<20:47:24] +[titan] 2025-10-05 02:30:36,188 - root - INFO - step: 6375 loss: 2.6701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3548 +[titan] 2025-10-05 02:30:36,189 - root - INFO - lr: 4.7519e-05 gnorm: 1.26 [ 3:56:27<20:47:11] +[titan] 2025-10-05 02:30:47,063 - root - INFO - step: 6380 loss: 2.6577 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:30:47,063 - root - INFO - lr: 4.7514e-05 gnorm: 1.19 [ 3:56:38<20:46:59] +[titan] 2025-10-05 02:30:57,930 - root - INFO - step: 6385 loss: 2.5739 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:30:57,930 - root - INFO - lr: 4.7510e-05 gnorm: 1.20 [ 3:56:49<20:46:46] +[titan] 2025-10-05 02:31:08,797 - root - INFO - step: 6390 loss: 2.6461 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3331 +[titan] 2025-10-05 02:31:08,797 - root - INFO - lr: 4.7506e-05 gnorm: 1.18 [ 3:57:00<20:46:34] +[titan] 2025-10-05 02:31:19,713 - root - INFO - step: 6395 loss: 2.6359 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3246 +[titan] 2025-10-05 02:31:19,713 - root - INFO - lr: 4.7502e-05 gnorm: 1.18 [ 3:57:10<20:46:21] +[titan] 2025-10-05 02:31:28,409 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:31:30,590 - root - INFO - step: 6400 loss: 2.6427 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.3304 +[titan] 2025-10-05 02:31:30,590 - root - INFO - lr: 4.7498e-05 gnorm: 1.20 [ 3:57:21<20:46:09] +[titan] 2025-10-05 02:31:41,458 - root - INFO - step: 6405 loss: 2.5702 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:31:41,458 - root - INFO - lr: 4.7494e-05 gnorm: 1.26 [ 3:57:32<20:45:57] +[titan] 2025-10-05 02:31:52,328 - root - INFO - step: 6410 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2830 +[titan] 2025-10-05 02:31:52,329 - root - INFO - lr: 4.7490e-05 gnorm: 1.28 [ 3:57:43<20:45:44] +[titan] 2025-10-05 02:32:03,197 - root - INFO - step: 6415 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:32:03,197 - root - INFO - lr: 4.7486e-05 gnorm: 1.24 [ 3:57:54<20:45:32] +[titan] 2025-10-05 02:32:14,082 - root - INFO - step: 6420 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2355 +[titan] 2025-10-05 02:32:14,082 - root - INFO - lr: 4.7482e-05 gnorm: 1.22 [ 3:58:05<20:45:19] +[titan] 2025-10-05 02:32:25,006 - root - INFO - step: 6425 loss: 2.6729 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:32:25,007 - root - INFO - lr: 4.7478e-05 gnorm: 1.28 [ 3:58:16<20:45:07] +[titan] 2025-10-05 02:32:35,882 - root - INFO - step: 6430 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2763 +[titan] 2025-10-05 02:32:35,883 - root - INFO - lr: 4.7474e-05 gnorm: 1.22 [ 3:58:27<20:44:55] +[titan] 2025-10-05 02:32:46,767 - root - INFO - step: 6435 loss: 2.5922 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2859 +[titan] 2025-10-05 02:32:46,767 - root - INFO - lr: 4.7469e-05 gnorm: 1.22 [ 3:58:37<20:44:42] +[titan] 2025-10-05 02:32:57,635 - root - INFO - step: 6440 loss: 2.5566 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2552 +[titan] 2025-10-05 02:32:57,635 - root - INFO - lr: 4.7465e-05 gnorm: 1.19 [ 3:58:48<20:44:30] +[titan] 2025-10-05 02:33:08,509 - root - INFO - step: 6445 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3471 +[titan] 2025-10-05 02:33:08,509 - root - INFO - lr: 4.7461e-05 gnorm: 1.18 [ 3:58:59<20:44:17] +[titan] 2025-10-05 02:33:17,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:33:19,369 - root - INFO - step: 6450 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2865 +[titan] 2025-10-05 02:33:19,369 - root - INFO - lr: 4.7457e-05 gnorm: 1.22 [ 3:59:10<20:44:05] +[titan] 2025-10-05 02:33:30,229 - root - INFO - step: 6455 loss: 2.6465 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3333 +[titan] 2025-10-05 02:33:30,229 - root - INFO - lr: 4.7453e-05 gnorm: 1.20 [ 3:59:21<20:43:52] +[titan] 2025-10-05 02:33:41,125 - root - INFO - step: 6460 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2613 +[titan] 2025-10-05 02:33:41,125 - root - INFO - lr: 4.7449e-05 gnorm: 1.21 [ 3:59:32<20:43:40] +[titan] 2025-10-05 02:33:51,972 - root - INFO - step: 6465 loss: 2.6340 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.3110 global_avg_mtp_loss: 2.3230 +[titan] 2025-10-05 02:33:51,972 - root - INFO - lr: 4.7445e-05 gnorm: 1.25 [ 3:59:43<20:43:27] +[titan] 2025-10-05 02:34:02,829 - root - INFO - step: 6470 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:34:02,830 - root - INFO - lr: 4.7441e-05 gnorm: 1.22 [ 3:59:54<20:43:15] +[titan] 2025-10-05 02:34:13,713 - root - INFO - step: 6475 loss: 2.6622 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3477 +[titan] 2025-10-05 02:34:13,713 - root - INFO - lr: 4.7436e-05 gnorm: 1.22 [ 4:00:04<20:43:03] +[titan] 2025-10-05 02:34:24,581 - root - INFO - step: 6480 loss: 2.5985 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2920 +[titan] 2025-10-05 02:34:24,581 - root - INFO - lr: 4.7432e-05 gnorm: 1.22 [ 4:00:15<20:42:50] +[titan] 2025-10-05 02:34:35,430 - root - INFO - step: 6485 loss: 2.5699 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2657 +[titan] 2025-10-05 02:34:35,430 - root - INFO - lr: 4.7428e-05 gnorm: 1.26 [ 4:00:26<20:42:38] +[titan] 2025-10-05 02:34:46,317 - root - INFO - step: 6490 loss: 2.5393 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:34:46,318 - root - INFO - lr: 4.7424e-05 gnorm: 1.22 [ 4:00:37<20:42:25] +[titan] 2025-10-05 02:34:57,192 - root - INFO - step: 6495 loss: 2.6369 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3116 global_avg_mtp_loss: 2.3253 +[titan] 2025-10-05 02:34:57,193 - root - INFO - lr: 4.7420e-05 gnorm: 1.23 [ 4:00:48<20:42:13] +[titan] 2025-10-05 02:35:05,878 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:35:08,068 - root - INFO - step: 6500 loss: 2.5435 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3003 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 02:35:08,068 - root - INFO - lr: 4.7416e-05 gnorm: 1.25 [ 4:00:59<20:42:00] +[titan] 2025-10-05 02:35:18,953 - root - INFO - step: 6505 loss: 2.6050 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2979 +[titan] 2025-10-05 02:35:18,953 - root - INFO - lr: 4.7412e-05 gnorm: 1.26 [ 4:01:10<20:41:48] +[titan] 2025-10-05 02:35:29,825 - root - INFO - step: 6510 loss: 2.5818 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2782 +[titan] 2025-10-05 02:35:29,825 - root - INFO - lr: 4.7407e-05 gnorm: 1.19 [ 4:01:21<20:41:36] +[titan] 2025-10-05 02:35:40,705 - root - INFO - step: 6515 loss: 2.5167 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2195 +[titan] 2025-10-05 02:35:40,705 - root - INFO - lr: 4.7403e-05 gnorm: 1.18 [ 4:01:31<20:41:23] +[titan] 2025-10-05 02:35:51,579 - root - INFO - step: 6520 loss: 2.6889 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3704 +[titan] 2025-10-05 02:35:51,579 - root - INFO - lr: 4.7399e-05 gnorm: 1.25 [ 4:01:42<20:41:11] +[titan] 2025-10-05 02:36:02,521 - root - INFO - step: 6525 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3046 global_avg_mtp_loss: 2.2801 +[titan] 2025-10-05 02:36:02,521 - root - INFO - lr: 4.7395e-05 gnorm: 1.21 [ 4:01:53<20:40:59] +[titan] 2025-10-05 02:36:13,407 - root - INFO - step: 6530 loss: 2.5064 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2103 +[titan] 2025-10-05 02:36:13,407 - root - INFO - lr: 4.7391e-05 gnorm: 1.17 [ 4:02:04<20:40:47] +[titan] 2025-10-05 02:36:24,288 - root - INFO - step: 6535 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:36:24,289 - root - INFO - lr: 4.7387e-05 gnorm: 1.26 [ 4:02:15<20:40:34] +[titan] 2025-10-05 02:36:35,150 - root - INFO - step: 6540 loss: 2.6944 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 02:36:35,150 - root - INFO - lr: 4.7382e-05 gnorm: 1.22 [ 4:02:26<20:40:22] +[titan] 2025-10-05 02:36:46,030 - root - INFO - step: 6545 loss: 2.5975 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2909 +[titan] 2025-10-05 02:36:46,030 - root - INFO - lr: 4.7378e-05 gnorm: 1.25 [ 4:02:37<20:40:09] +[titan] 2025-10-05 02:36:54,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:36:56,903 - root - INFO - step: 6550 loss: 2.5802 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2749 +[titan] 2025-10-05 02:36:56,904 - root - INFO - lr: 4.7374e-05 gnorm: 1.18 [ 4:02:48<20:39:57] +[titan] 2025-10-05 02:37:07,790 - root - INFO - step: 6555 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 02:37:07,790 - root - INFO - lr: 4.7370e-05 gnorm: 1.23 [ 4:02:59<20:39:45] +[titan] 2025-10-05 02:37:18,673 - root - INFO - step: 6560 loss: 2.6310 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3210 +[titan] 2025-10-05 02:37:18,673 - root - INFO - lr: 4.7366e-05 gnorm: 1.22 [ 4:03:09<20:39:32] +[titan] 2025-10-05 02:37:29,519 - root - INFO - step: 6565 loss: 2.6348 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3242 +[titan] 2025-10-05 02:37:29,520 - root - INFO - lr: 4.7361e-05 gnorm: 1.24 [ 4:03:20<20:39:20] +[titan] 2025-10-05 02:37:40,400 - root - INFO - step: 6570 loss: 2.5419 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2417 +[titan] 2025-10-05 02:37:40,400 - root - INFO - lr: 4.7357e-05 gnorm: 1.19 [ 4:03:31<20:39:07] +[titan] 2025-10-05 02:37:51,268 - root - INFO - step: 6575 loss: 2.5865 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2812 +[titan] 2025-10-05 02:37:51,269 - root - INFO - lr: 4.7353e-05 gnorm: 1.28 [ 4:03:42<20:38:55] +[titan] 2025-10-05 02:38:02,128 - root - INFO - step: 6580 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:38:02,128 - root - INFO - lr: 4.7349e-05 gnorm: 1.20 [ 4:03:53<20:38:43] +[titan] 2025-10-05 02:38:13,063 - root - INFO - step: 6585 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3373 +[titan] 2025-10-05 02:38:13,063 - root - INFO - lr: 4.7345e-05 gnorm: 1.24 [ 4:04:04<20:38:31] +[titan] 2025-10-05 02:38:23,973 - root - INFO - step: 6590 loss: 2.6349 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3243 +[titan] 2025-10-05 02:38:23,973 - root - INFO - lr: 4.7340e-05 gnorm: 1.19 [ 4:04:15<20:38:18] +[titan] 2025-10-05 02:38:34,826 - root - INFO - step: 6595 loss: 2.7415 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4095 +[titan] 2025-10-05 02:38:34,826 - root - INFO - lr: 4.7336e-05 gnorm: 1.21 [ 4:04:26<20:38:06] +[titan] 2025-10-05 02:38:43,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:38:45,682 - root - INFO - step: 6600 loss: 2.5758 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2716 +[titan] 2025-10-05 02:38:45,682 - root - INFO - lr: 4.7332e-05 gnorm: 1.18 [ 4:04:36<20:37:53] +[titan] 2025-10-05 02:38:56,550 - root - INFO - step: 6605 loss: 2.5294 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2310 +[titan] 2025-10-05 02:38:56,550 - root - INFO - lr: 4.7328e-05 gnorm: 1.19 [ 4:04:47<20:37:41] +[titan] 2025-10-05 02:39:07,416 - root - INFO - step: 6610 loss: 2.5451 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:39:07,416 - root - INFO - lr: 4.7324e-05 gnorm: 1.20 [ 4:04:58<20:37:29] +[titan] 2025-10-05 02:39:18,347 - root - INFO - step: 6615 loss: 2.7044 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3846 +[titan] 2025-10-05 02:39:18,347 - root - INFO - lr: 4.7319e-05 gnorm: 1.23 [ 4:05:09<20:37:17] +[titan] 2025-10-05 02:39:29,249 - root - INFO - step: 6620 loss: 2.5846 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3048 global_avg_mtp_loss: 2.2799 +[titan] 2025-10-05 02:39:29,249 - root - INFO - lr: 4.7315e-05 gnorm: 1.16 [ 4:05:20<20:37:04] +[titan] 2025-10-05 02:39:40,113 - root - INFO - step: 6625 loss: 2.6491 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3371 +[titan] 2025-10-05 02:39:40,113 - root - INFO - lr: 4.7311e-05 gnorm: 1.27 [ 4:05:31<20:36:52] +[titan] 2025-10-05 02:39:50,990 - root - INFO - step: 6630 loss: 2.5891 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:39:50,990 - root - INFO - lr: 4.7307e-05 gnorm: 1.21 [ 4:05:42<20:36:40] +[titan] 2025-10-05 02:40:01,853 - root - INFO - step: 6635 loss: 2.6888 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3719 +[titan] 2025-10-05 02:40:01,853 - root - INFO - lr: 4.7302e-05 gnorm: 1.21 [ 4:05:53<20:36:27] +[titan] 2025-10-05 02:40:12,718 - root - INFO - step: 6640 loss: 2.5610 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2587 +[titan] 2025-10-05 02:40:12,718 - root - INFO - lr: 4.7298e-05 gnorm: 1.19 [ 4:06:03<20:36:15] +[titan] 2025-10-05 02:40:23,628 - root - INFO - step: 6645 loss: 2.5680 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2644 +[titan] 2025-10-05 02:40:23,628 - root - INFO - lr: 4.7294e-05 gnorm: 1.27 [ 4:06:14<20:36:03] +[titan] 2025-10-05 02:40:32,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:40:34,565 - root - INFO - step: 6650 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3119 +[titan] 2025-10-05 02:40:34,566 - root - INFO - lr: 4.7290e-05 gnorm: 1.21 [ 4:06:25<20:35:51] +[titan] 2025-10-05 02:40:45,524 - root - INFO - step: 6655 loss: 2.6619 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:40:45,524 - root - INFO - lr: 4.7285e-05 gnorm: 1.16 [ 4:06:36<20:35:39] +[titan] 2025-10-05 02:40:47,886 - root - INFO - Dumping profiler traces at step 6656 +[titan] 2025-10-05 02:40:47,925 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:40:56,627 - root - INFO - step: 6660 loss: 2.5796 memory: 118.84GiB(85.28%) tps: 29,513 tflops: 409.44 mfu: 41.40% global_avg_ntp_loss: 0.3032 global_avg_mtp_loss: 2.2764 +[titan] 2025-10-05 02:40:56,627 - root - INFO - lr: 4.7281e-05 gnorm: 1.18 [ 4:06:47<20:35:28] +[titan] 2025-10-05 02:41:07,500 - root - INFO - step: 6665 loss: 2.5859 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2808 +[titan] 2025-10-05 02:41:07,500 - root - INFO - lr: 4.7277e-05 gnorm: 1.21 [ 4:06:58<20:35:15] +[titan] 2025-10-05 02:41:18,462 - root - INFO - step: 6670 loss: 2.5619 memory: 118.84GiB(85.28%) tps: 29,894 tflops: 414.73 mfu: 41.93% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:41:18,462 - root - INFO - lr: 4.7273e-05 gnorm: 1.20 [ 4:07:09<20:35:03] +[titan] 2025-10-05 02:41:29,324 - root - INFO - step: 6675 loss: 2.4816 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.1887 +[titan] 2025-10-05 02:41:29,325 - root - INFO - lr: 4.7268e-05 gnorm: 1.20 [ 4:07:20<20:34:51] +[titan] 2025-10-05 02:41:40,209 - root - INFO - step: 6680 loss: 2.6410 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3279 +[titan] 2025-10-05 02:41:40,210 - root - INFO - lr: 4.7264e-05 gnorm: 1.26 [ 4:07:31<20:34:39] +[titan] 2025-10-05 02:41:51,110 - root - INFO - step: 6685 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3116 +[titan] 2025-10-05 02:41:51,110 - root - INFO - lr: 4.7260e-05 gnorm: 1.25 [ 4:07:42<20:34:26] +[titan] 2025-10-05 02:42:01,973 - root - INFO - step: 6690 loss: 2.6096 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3014 +[titan] 2025-10-05 02:42:01,973 - root - INFO - lr: 4.7256e-05 gnorm: 1.20 [ 4:07:53<20:34:14] +[titan] 2025-10-05 02:42:12,862 - root - INFO - step: 6695 loss: 2.5175 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2207 +[titan] 2025-10-05 02:42:12,862 - root - INFO - lr: 4.7251e-05 gnorm: 1.18 [ 4:08:04<20:34:02] +[titan] 2025-10-05 02:42:21,626 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:42:23,816 - root - INFO - step: 6700 loss: 2.6088 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3010 +[titan] 2025-10-05 02:42:23,816 - root - INFO - lr: 4.7247e-05 gnorm: 1.21 [ 4:08:15<20:33:50] +[titan] 2025-10-05 02:42:34,693 - root - INFO - step: 6705 loss: 2.6071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.2991 +[titan] 2025-10-05 02:42:34,693 - root - INFO - lr: 4.7243e-05 gnorm: 1.19 [ 4:08:25<20:33:38] +[titan] 2025-10-05 02:42:45,561 - root - INFO - step: 6710 loss: 2.5118 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2161 +[titan] 2025-10-05 02:42:45,561 - root - INFO - lr: 4.7238e-05 gnorm: 1.24 [ 4:08:36<20:33:25] +[titan] 2025-10-05 02:42:56,442 - root - INFO - step: 6715 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.2997 +[titan] 2025-10-05 02:42:56,442 - root - INFO - lr: 4.7234e-05 gnorm: 1.21 [ 4:08:47<20:33:13] +[titan] 2025-10-05 02:43:07,287 - root - INFO - step: 6720 loss: 2.5570 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2554 +[titan] 2025-10-05 02:43:07,287 - root - INFO - lr: 4.7230e-05 gnorm: 1.22 [ 4:08:58<20:33:01] +[titan] 2025-10-05 02:43:18,136 - root - INFO - step: 6725 loss: 2.5707 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3021 global_avg_mtp_loss: 2.2686 +[titan] 2025-10-05 02:43:18,136 - root - INFO - lr: 4.7226e-05 gnorm: 1.53 [ 4:09:09<20:32:48] +[titan] 2025-10-05 02:43:29,070 - root - INFO - step: 6730 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2464 +[titan] 2025-10-05 02:43:29,070 - root - INFO - lr: 4.7221e-05 gnorm: 1.26 [ 4:09:20<20:32:36] +[titan] 2025-10-05 02:43:39,913 - root - INFO - step: 6735 loss: 2.5430 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 02:43:39,913 - root - INFO - lr: 4.7217e-05 gnorm: 1.22 [ 4:09:31<20:32:24] +[titan] 2025-10-05 02:43:50,772 - root - INFO - step: 6740 loss: 2.5235 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2275 +[titan] 2025-10-05 02:43:50,772 - root - INFO - lr: 4.7213e-05 gnorm: 1.21 [ 4:09:41<20:32:11] +[titan] 2025-10-05 02:44:01,659 - root - INFO - step: 6745 loss: 2.6439 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3121 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:44:01,660 - root - INFO - lr: 4.7208e-05 gnorm: 1.19 [ 4:09:52<20:31:59] +[titan] 2025-10-05 02:44:10,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:44:12,526 - root - INFO - step: 6750 loss: 2.5875 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 02:44:12,526 - root - INFO - lr: 4.7204e-05 gnorm: 1.21 [ 4:10:03<20:31:47] +[titan] 2025-10-05 02:44:23,443 - root - INFO - step: 6755 loss: 2.4956 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2014 +[titan] 2025-10-05 02:44:23,443 - root - INFO - lr: 4.7200e-05 gnorm: 1.19 [ 4:10:14<20:31:35] +[titan] 2025-10-05 02:44:34,289 - root - INFO - step: 6760 loss: 2.5401 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2414 +[titan] 2025-10-05 02:44:34,289 - root - INFO - lr: 4.7196e-05 gnorm: 1.22 [ 4:10:25<20:31:22] +[titan] 2025-10-05 02:44:45,167 - root - INFO - step: 6765 loss: 2.5998 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2924 +[titan] 2025-10-05 02:44:45,167 - root - INFO - lr: 4.7191e-05 gnorm: 1.30 [ 4:10:36<20:31:10] +[titan] 2025-10-05 02:44:56,029 - root - INFO - step: 6770 loss: 2.5743 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2714 +[titan] 2025-10-05 02:44:56,029 - root - INFO - lr: 4.7187e-05 gnorm: 1.21 [ 4:10:47<20:30:58] +[titan] 2025-10-05 02:45:06,886 - root - INFO - step: 6775 loss: 2.5839 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3045 global_avg_mtp_loss: 2.2794 +[titan] 2025-10-05 02:45:06,886 - root - INFO - lr: 4.7183e-05 gnorm: 1.17 [ 4:10:58<20:30:45] +[titan] 2025-10-05 02:45:17,790 - root - INFO - step: 6780 loss: 2.5182 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2228 +[titan] 2025-10-05 02:45:17,790 - root - INFO - lr: 4.7178e-05 gnorm: 1.24 [ 4:11:08<20:30:33] +[titan] 2025-10-05 02:45:28,696 - root - INFO - step: 6785 loss: 2.5460 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2463 +[titan] 2025-10-05 02:45:28,696 - root - INFO - lr: 4.7174e-05 gnorm: 1.20 [ 4:11:19<20:30:21] +[titan] 2025-10-05 02:45:39,548 - root - INFO - step: 6790 loss: 2.5312 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2336 +[titan] 2025-10-05 02:45:39,548 - root - INFO - lr: 4.7170e-05 gnorm: 1.16 [ 4:11:30<20:30:09] +[titan] 2025-10-05 02:45:50,426 - root - INFO - step: 6795 loss: 2.5011 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:45:50,427 - root - INFO - lr: 4.7165e-05 gnorm: 1.18 [ 4:11:41<20:29:56] +[titan] 2025-10-05 02:45:59,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:46:01,303 - root - INFO - step: 6800 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2649 +[titan] 2025-10-05 02:46:01,303 - root - INFO - lr: 4.7161e-05 gnorm: 1.23 [ 4:11:52<20:29:44] +[titan] 2025-10-05 02:46:12,162 - root - INFO - step: 6805 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:46:12,162 - root - INFO - lr: 4.7157e-05 gnorm: 1.28 [ 4:12:03<20:29:32] +[titan] 2025-10-05 02:46:23,159 - root - INFO - step: 6810 loss: 2.5521 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.40 mfu: 41.80% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2496 +[titan] 2025-10-05 02:46:23,159 - root - INFO - lr: 4.7152e-05 gnorm: 1.22 [ 4:12:14<20:29:20] +[titan] 2025-10-05 02:46:34,046 - root - INFO - step: 6815 loss: 2.6067 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.3007 +[titan] 2025-10-05 02:46:34,046 - root - INFO - lr: 4.7148e-05 gnorm: 1.17 [ 4:12:25<20:29:08] +[titan] 2025-10-05 02:46:44,908 - root - INFO - step: 6820 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2807 +[titan] 2025-10-05 02:46:44,908 - root - INFO - lr: 4.7143e-05 gnorm: 1.17 [ 4:12:36<20:28:55] +[titan] 2025-10-05 02:46:55,788 - root - INFO - step: 6825 loss: 2.5910 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2852 +[titan] 2025-10-05 02:46:55,788 - root - INFO - lr: 4.7139e-05 gnorm: 1.16 [ 4:12:46<20:28:43] +[titan] 2025-10-05 02:47:06,620 - root - INFO - step: 6830 loss: 2.5384 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 02:47:06,620 - root - INFO - lr: 4.7135e-05 gnorm: 1.18 [ 4:12:57<20:28:31] +[titan] 2025-10-05 02:47:17,469 - root - INFO - step: 6835 loss: 2.5733 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 02:47:17,469 - root - INFO - lr: 4.7130e-05 gnorm: 1.23 [ 4:13:08<20:28:18] +[titan] 2025-10-05 02:47:28,380 - root - INFO - step: 6840 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2835 +[titan] 2025-10-05 02:47:28,380 - root - INFO - lr: 4.7126e-05 gnorm: 1.26 [ 4:13:19<20:28:06] +[titan] 2025-10-05 02:47:39,283 - root - INFO - step: 6845 loss: 2.5574 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2546 +[titan] 2025-10-05 02:47:39,283 - root - INFO - lr: 4.7122e-05 gnorm: 1.19 [ 4:13:30<20:27:54] +[titan] 2025-10-05 02:47:47,974 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:47:50,155 - root - INFO - step: 6850 loss: 2.5366 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2372 +[titan] 2025-10-05 02:47:50,155 - root - INFO - lr: 4.7117e-05 gnorm: 1.18 [ 4:13:41<20:27:42] +[titan] 2025-10-05 02:48:01,026 - root - INFO - step: 6855 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2793 +[titan] 2025-10-05 02:48:01,026 - root - INFO - lr: 4.7113e-05 gnorm: 1.17 [ 4:13:52<20:27:30] +[titan] 2025-10-05 02:48:11,906 - root - INFO - step: 6860 loss: 2.5452 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2453 +[titan] 2025-10-05 02:48:11,906 - root - INFO - lr: 4.7109e-05 gnorm: 1.21 [ 4:14:03<20:27:17] +[titan] 2025-10-05 02:48:22,764 - root - INFO - step: 6865 loss: 2.5903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2839 +[titan] 2025-10-05 02:48:22,764 - root - INFO - lr: 4.7104e-05 gnorm: 1.22 [ 4:14:13<20:27:05] +[titan] 2025-10-05 02:48:33,663 - root - INFO - step: 6870 loss: 2.5282 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2305 +[titan] 2025-10-05 02:48:33,663 - root - INFO - lr: 4.7100e-05 gnorm: 1.21 [ 4:14:24<20:26:53] +[titan] 2025-10-05 02:48:44,571 - root - INFO - step: 6875 loss: 2.5842 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2787 +[titan] 2025-10-05 02:48:44,571 - root - INFO - lr: 4.7095e-05 gnorm: 1.20 [ 4:14:35<20:26:41] +[titan] 2025-10-05 02:48:55,419 - root - INFO - step: 6880 loss: 2.5406 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2421 +[titan] 2025-10-05 02:48:55,419 - root - INFO - lr: 4.7091e-05 gnorm: 1.23 [ 4:14:46<20:26:28] +[titan] 2025-10-05 02:49:06,283 - root - INFO - step: 6885 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2631 +[titan] 2025-10-05 02:49:06,283 - root - INFO - lr: 4.7087e-05 gnorm: 1.33 [ 4:14:57<20:26:16] +[titan] 2025-10-05 02:49:17,141 - root - INFO - step: 6890 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3033 global_avg_mtp_loss: 2.2679 +[titan] 2025-10-05 02:49:17,141 - root - INFO - lr: 4.7082e-05 gnorm: 1.20 [ 4:15:08<20:26:04] +[titan] 2025-10-05 02:49:28,078 - root - INFO - step: 6895 loss: 2.5483 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2478 +[titan] 2025-10-05 02:49:28,078 - root - INFO - lr: 4.7078e-05 gnorm: 1.18 [ 4:15:19<20:25:52] +[titan] 2025-10-05 02:49:36,751 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:49:38,935 - root - INFO - step: 6900 loss: 2.5983 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:49:38,935 - root - INFO - lr: 4.7073e-05 gnorm: 1.24 [ 4:15:30<20:25:40] +[titan] 2025-10-05 02:49:49,829 - root - INFO - step: 6905 loss: 2.5554 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2534 +[titan] 2025-10-05 02:49:49,830 - root - INFO - lr: 4.7069e-05 gnorm: 1.19 [ 4:15:41<20:25:27] +[titan] 2025-10-05 02:50:00,703 - root - INFO - step: 6910 loss: 2.6056 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.2975 +[titan] 2025-10-05 02:50:00,703 - root - INFO - lr: 4.7065e-05 gnorm: 1.20 [ 4:15:51<20:25:15] +[titan] 2025-10-05 02:50:11,565 - root - INFO - step: 6915 loss: 2.5960 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2897 +[titan] 2025-10-05 02:50:11,566 - root - INFO - lr: 4.7060e-05 gnorm: 1.24 [ 4:16:02<20:25:03] +[titan] 2025-10-05 02:50:22,427 - root - INFO - step: 6920 loss: 2.5924 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2856 +[titan] 2025-10-05 02:50:22,427 - root - INFO - lr: 4.7056e-05 gnorm: 1.19 [ 4:16:13<20:24:51] +[titan] 2025-10-05 02:50:33,321 - root - INFO - step: 6925 loss: 2.4869 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1938 +[titan] 2025-10-05 02:50:33,321 - root - INFO - lr: 4.7051e-05 gnorm: 1.18 [ 4:16:24<20:24:39] +[titan] 2025-10-05 02:50:44,192 - root - INFO - step: 6930 loss: 2.5543 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 02:50:44,193 - root - INFO - lr: 4.7047e-05 gnorm: 1.24 [ 4:16:35<20:24:26] +[titan] 2025-10-05 02:50:55,042 - root - INFO - step: 6935 loss: 2.5426 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2428 +[titan] 2025-10-05 02:50:55,042 - root - INFO - lr: 4.7043e-05 gnorm: 1.21 [ 4:16:46<20:24:14] +[titan] 2025-10-05 02:51:05,935 - root - INFO - step: 6940 loss: 2.6667 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3517 +[titan] 2025-10-05 02:51:05,935 - root - INFO - lr: 4.7038e-05 gnorm: 1.24 [ 4:16:57<20:24:02] +[titan] 2025-10-05 02:51:16,790 - root - INFO - step: 6945 loss: 2.6473 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3345 +[titan] 2025-10-05 02:51:16,790 - root - INFO - lr: 4.7034e-05 gnorm: 1.27 [ 4:17:07<20:23:50] +[titan] 2025-10-05 02:51:25,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:51:27,720 - root - INFO - step: 6950 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2047 +[titan] 2025-10-05 02:51:27,721 - root - INFO - lr: 4.7029e-05 gnorm: 1.25 [ 4:17:18<20:23:38] +[titan] 2025-10-05 02:51:38,573 - root - INFO - step: 6955 loss: 2.6408 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:51:38,573 - root - INFO - lr: 4.7025e-05 gnorm: 1.22 [ 4:17:29<20:23:25] +[titan] 2025-10-05 02:51:49,457 - root - INFO - step: 6960 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3017 global_avg_mtp_loss: 2.2580 +[titan] 2025-10-05 02:51:49,458 - root - INFO - lr: 4.7020e-05 gnorm: 1.20 [ 4:17:40<20:23:13] +[titan] 2025-10-05 02:52:00,296 - root - INFO - step: 6965 loss: 2.6601 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:52:00,296 - root - INFO - lr: 4.7016e-05 gnorm: 1.30 [ 4:17:51<20:23:01] +[titan] 2025-10-05 02:52:11,200 - root - INFO - step: 6970 loss: 2.5501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2484 +[titan] 2025-10-05 02:52:11,200 - root - INFO - lr: 4.7012e-05 gnorm: 1.25 [ 4:18:02<20:22:49] +[titan] 2025-10-05 02:52:22,059 - root - INFO - step: 6975 loss: 2.5650 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2627 +[titan] 2025-10-05 02:52:22,059 - root - INFO - lr: 4.7007e-05 gnorm: 1.18 [ 4:18:13<20:22:36] +[titan] 2025-10-05 02:52:32,953 - root - INFO - step: 6980 loss: 2.6856 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.3581 +[titan] 2025-10-05 02:52:32,953 - root - INFO - lr: 4.7003e-05 gnorm: 1.24 [ 4:18:24<20:22:24] +[titan] 2025-10-05 02:52:43,790 - root - INFO - step: 6985 loss: 2.5169 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2194 +[titan] 2025-10-05 02:52:43,790 - root - INFO - lr: 4.6998e-05 gnorm: 1.33 [ 4:18:34<20:22:12] +[titan] 2025-10-05 02:52:54,642 - root - INFO - step: 6990 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2300 +[titan] 2025-10-05 02:52:54,642 - root - INFO - lr: 4.6994e-05 gnorm: 1.18 [ 4:18:45<20:22:00] +[titan] 2025-10-05 02:53:05,477 - root - INFO - step: 6995 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3774 +[titan] 2025-10-05 02:53:05,477 - root - INFO - lr: 4.6989e-05 gnorm: 1.28 [ 4:18:56<20:21:47] +[titan] 2025-10-05 02:53:14,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:53:16,323 - root - INFO - step: 7000 loss: 2.6331 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.3240 +[titan] 2025-10-05 02:53:16,323 - root - INFO - lr: 4.6985e-05 gnorm: 1.28 [ 4:19:07<20:21:35] +[titan] 2025-10-05 02:53:27,204 - root - INFO - step: 7005 loss: 2.5777 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2742 +[titan] 2025-10-05 02:53:27,204 - root - INFO - lr: 4.6980e-05 gnorm: 1.20 [ 4:19:18<20:21:23] +[titan] 2025-10-05 02:53:38,086 - root - INFO - step: 7010 loss: 2.5633 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:53:38,086 - root - INFO - lr: 4.6976e-05 gnorm: 1.21 [ 4:19:29<20:21:10] +[titan] 2025-10-05 02:53:48,973 - root - INFO - step: 7015 loss: 2.5508 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2507 +[titan] 2025-10-05 02:53:48,973 - root - INFO - lr: 4.6971e-05 gnorm: 1.17 [ 4:19:40<20:20:58] +[titan] 2025-10-05 02:53:59,845 - root - INFO - step: 7020 loss: 2.6141 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3057 +[titan] 2025-10-05 02:53:59,845 - root - INFO - lr: 4.6967e-05 gnorm: 1.23 [ 4:19:51<20:20:46] +[titan] 2025-10-05 02:54:10,698 - root - INFO - step: 7025 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2184 +[titan] 2025-10-05 02:54:10,698 - root - INFO - lr: 4.6962e-05 gnorm: 1.18 [ 4:20:01<20:20:34] +[titan] 2025-10-05 02:54:21,549 - root - INFO - step: 7030 loss: 2.5250 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2274 +[titan] 2025-10-05 02:54:21,549 - root - INFO - lr: 4.6958e-05 gnorm: 1.18 [ 4:20:12<20:20:22] +[titan] 2025-10-05 02:54:32,510 - root - INFO - step: 7035 loss: 2.4583 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2895 global_avg_mtp_loss: 2.1687 +[titan] 2025-10-05 02:54:32,510 - root - INFO - lr: 4.6954e-05 gnorm: 1.13 [ 4:20:23<20:20:10] +[titan] 2025-10-05 02:54:43,391 - root - INFO - step: 7040 loss: 2.5911 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2860 +[titan] 2025-10-05 02:54:43,391 - root - INFO - lr: 4.6949e-05 gnorm: 1.24 [ 4:20:34<20:19:58] +[titan] 2025-10-05 02:54:54,247 - root - INFO - step: 7045 loss: 2.5161 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2197 +[titan] 2025-10-05 02:54:54,247 - root - INFO - lr: 4.6945e-05 gnorm: 1.19 [ 4:20:45<20:19:45] +[titan] 2025-10-05 02:55:02,933 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:55:05,120 - root - INFO - step: 7050 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2600 +[titan] 2025-10-05 02:55:05,120 - root - INFO - lr: 4.6940e-05 gnorm: 1.19 [ 4:20:56<20:19:33] +[titan] 2025-10-05 02:55:15,988 - root - INFO - step: 7055 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:55:15,988 - root - INFO - lr: 4.6936e-05 gnorm: 1.18 [ 4:21:07<20:19:21] +[titan] 2025-10-05 02:55:26,853 - root - INFO - step: 7060 loss: 2.6283 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3103 global_avg_mtp_loss: 2.3181 +[titan] 2025-10-05 02:55:26,853 - root - INFO - lr: 4.6931e-05 gnorm: 1.21 [ 4:21:18<20:19:09] +[titan] 2025-10-05 02:55:37,782 - root - INFO - step: 7065 loss: 2.5429 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2433 +[titan] 2025-10-05 02:55:37,782 - root - INFO - lr: 4.6927e-05 gnorm: 1.17 [ 4:21:28<20:18:57] +[titan] 2025-10-05 02:55:48,649 - root - INFO - step: 7070 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2882 +[titan] 2025-10-05 02:55:48,649 - root - INFO - lr: 4.6922e-05 gnorm: 1.22 [ 4:21:39<20:18:45] +[titan] 2025-10-05 02:55:59,510 - root - INFO - step: 7075 loss: 2.5409 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 02:55:59,510 - root - INFO - lr: 4.6918e-05 gnorm: 1.20 [ 4:21:50<20:18:32] +[titan] 2025-10-05 02:56:10,352 - root - INFO - step: 7080 loss: 2.5976 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:56:10,353 - root - INFO - lr: 4.6913e-05 gnorm: 1.19 [ 4:22:01<20:18:20] +[titan] 2025-10-05 02:56:21,217 - root - INFO - step: 7085 loss: 2.5675 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2647 +[titan] 2025-10-05 02:56:21,217 - root - INFO - lr: 4.6909e-05 gnorm: 1.26 [ 4:22:12<20:18:08] +[titan] 2025-10-05 02:56:32,130 - root - INFO - step: 7090 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 02:56:32,130 - root - INFO - lr: 4.6904e-05 gnorm: 1.19 [ 4:22:23<20:17:56] +[titan] 2025-10-05 02:56:43,006 - root - INFO - step: 7095 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 02:56:43,006 - root - INFO - lr: 4.6899e-05 gnorm: 1.16 [ 4:22:34<20:17:44] +[titan] 2025-10-05 02:56:51,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:56:53,930 - root - INFO - step: 7100 loss: 2.6150 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3102 global_avg_mtp_loss: 2.3048 +[titan] 2025-10-05 02:56:53,930 - root - INFO - lr: 4.6895e-05 gnorm: 1.24 [ 4:22:45<20:17:32] +[titan] 2025-10-05 02:57:04,810 - root - INFO - step: 7105 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2524 +[titan] 2025-10-05 02:57:04,810 - root - INFO - lr: 4.6890e-05 gnorm: 1.23 [ 4:22:55<20:17:20] +[titan] 2025-10-05 02:57:15,679 - root - INFO - step: 7110 loss: 2.6249 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3152 +[titan] 2025-10-05 02:57:15,679 - root - INFO - lr: 4.6886e-05 gnorm: 1.23 [ 4:23:06<20:17:08] +[titan] 2025-10-05 02:57:26,560 - root - INFO - step: 7115 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 02:57:26,560 - root - INFO - lr: 4.6881e-05 gnorm: 1.21 [ 4:23:17<20:16:55] +[titan] 2025-10-05 02:57:37,504 - root - INFO - step: 7120 loss: 2.5642 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 02:57:37,504 - root - INFO - lr: 4.6877e-05 gnorm: 1.25 [ 4:23:28<20:16:44] +[titan] 2025-10-05 02:57:48,407 - root - INFO - step: 7125 loss: 2.5252 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2269 +[titan] 2025-10-05 02:57:48,408 - root - INFO - lr: 4.6872e-05 gnorm: 1.23 [ 4:23:39<20:16:32] +[titan] 2025-10-05 02:57:59,324 - root - INFO - step: 7130 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2910 +[titan] 2025-10-05 02:57:59,324 - root - INFO - lr: 4.6868e-05 gnorm: 1.23 [ 4:23:50<20:16:20] +[titan] 2025-10-05 02:58:10,198 - root - INFO - step: 7135 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3080 +[titan] 2025-10-05 02:58:10,198 - root - INFO - lr: 4.6863e-05 gnorm: 1.30 [ 4:24:01<20:16:07] +[titan] 2025-10-05 02:58:21,069 - root - INFO - step: 7140 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:58:21,069 - root - INFO - lr: 4.6859e-05 gnorm: 1.21 [ 4:24:12<20:15:55] +[titan] 2025-10-05 02:58:31,936 - root - INFO - step: 7145 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2622 +[titan] 2025-10-05 02:58:31,936 - root - INFO - lr: 4.6854e-05 gnorm: 1.20 [ 4:24:23<20:15:43] +[titan] 2025-10-05 02:58:40,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:58:42,870 - root - INFO - step: 7150 loss: 2.5513 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2508 +[titan] 2025-10-05 02:58:42,871 - root - INFO - lr: 4.6850e-05 gnorm: 1.21 [ 4:24:34<20:15:31] +[titan] 2025-10-05 02:58:53,743 - root - INFO - step: 7155 loss: 2.5589 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2563 +[titan] 2025-10-05 02:58:53,743 - root - INFO - lr: 4.6845e-05 gnorm: 1.16 [ 4:24:44<20:15:19] +[titan] 2025-10-05 02:59:04,618 - root - INFO - step: 7160 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2723 +[titan] 2025-10-05 02:59:04,618 - root - INFO - lr: 4.6840e-05 gnorm: 1.21 [ 4:24:55<20:15:07] +[titan] 2025-10-05 02:59:15,628 - root - INFO - step: 7165 loss: 2.5541 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.91 mfu: 41.75% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2529 +[titan] 2025-10-05 02:59:15,629 - root - INFO - lr: 4.6836e-05 gnorm: 1.17 [ 4:25:06<20:14:55] +[titan] 2025-10-05 02:59:22,343 - root - INFO - Dumping profiler traces at step 7168 +[titan] 2025-10-05 02:59:22,378 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:59:26,736 - root - INFO - step: 7170 loss: 2.6199 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.30 mfu: 41.38% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3097 +[titan] 2025-10-05 02:59:26,736 - root - INFO - lr: 4.6831e-05 gnorm: 1.20 [ 4:25:17<20:14:44] +[titan] 2025-10-05 02:59:37,672 - root - INFO - step: 7175 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1801 +[titan] 2025-10-05 02:59:37,672 - root - INFO - lr: 4.6827e-05 gnorm: 1.15 [ 4:25:28<20:14:32] +[titan] 2025-10-05 02:59:48,576 - root - INFO - step: 7180 loss: 2.6188 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3101 +[titan] 2025-10-05 02:59:48,576 - root - INFO - lr: 4.6822e-05 gnorm: 1.22 [ 4:25:39<20:14:20] +[titan] 2025-10-05 02:59:59,449 - root - INFO - step: 7185 loss: 2.5330 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2333 +[titan] 2025-10-05 02:59:59,450 - root - INFO - lr: 4.6818e-05 gnorm: 1.21 [ 4:25:50<20:14:08] +[titan] 2025-10-05 03:00:10,322 - root - INFO - step: 7190 loss: 2.6028 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2970 +[titan] 2025-10-05 03:00:10,322 - root - INFO - lr: 4.6813e-05 gnorm: 1.20 [ 4:26:01<20:13:56] +[titan] 2025-10-05 03:00:21,233 - root - INFO - step: 7195 loss: 2.6073 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2996 +[titan] 2025-10-05 03:00:21,233 - root - INFO - lr: 4.6808e-05 gnorm: 1.23 [ 4:26:12<20:13:44] +[titan] 2025-10-05 03:00:29,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:00:32,108 - root - INFO - step: 7200 loss: 2.5130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:00:32,108 - root - INFO - lr: 4.6804e-05 gnorm: 1.32 [ 4:26:23<20:13:32] +[titan] 2025-10-05 03:00:43,038 - root - INFO - step: 7205 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2785 +[titan] 2025-10-05 03:00:43,038 - root - INFO - lr: 4.6799e-05 gnorm: 1.29 [ 4:26:34<20:13:20] +[titan] 2025-10-05 03:00:53,933 - root - INFO - step: 7210 loss: 2.5257 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:00:53,933 - root - INFO - lr: 4.6795e-05 gnorm: 1.20 [ 4:26:45<20:13:08] +[titan] 2025-10-05 03:01:04,827 - root - INFO - step: 7215 loss: 2.5854 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3039 global_avg_mtp_loss: 2.2815 +[titan] 2025-10-05 03:01:04,827 - root - INFO - lr: 4.6790e-05 gnorm: 1.21 [ 4:26:55<20:12:56] +[titan] 2025-10-05 03:01:15,711 - root - INFO - step: 7220 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2982 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:01:15,712 - root - INFO - lr: 4.6786e-05 gnorm: 1.19 [ 4:27:06<20:12:44] +[titan] 2025-10-05 03:01:26,615 - root - INFO - step: 7225 loss: 2.4967 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2040 +[titan] 2025-10-05 03:01:26,615 - root - INFO - lr: 4.6781e-05 gnorm: 1.24 [ 4:27:17<20:12:32] +[titan] 2025-10-05 03:01:37,539 - root - INFO - step: 7230 loss: 2.6118 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.3044 +[titan] 2025-10-05 03:01:37,539 - root - INFO - lr: 4.6776e-05 gnorm: 1.20 [ 4:27:28<20:12:20] +[titan] 2025-10-05 03:01:48,431 - root - INFO - step: 7235 loss: 2.5240 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2979 global_avg_mtp_loss: 2.2261 +[titan] 2025-10-05 03:01:48,431 - root - INFO - lr: 4.6772e-05 gnorm: 1.18 [ 4:27:39<20:12:08] +[titan] 2025-10-05 03:01:59,313 - root - INFO - step: 7240 loss: 2.5262 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2284 +[titan] 2025-10-05 03:01:59,313 - root - INFO - lr: 4.6767e-05 gnorm: 1.17 [ 4:27:50<20:11:56] +[titan] 2025-10-05 03:02:10,185 - root - INFO - step: 7245 loss: 2.5139 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2193 +[titan] 2025-10-05 03:02:10,185 - root - INFO - lr: 4.6762e-05 gnorm: 1.26 [ 4:28:01<20:11:44] +[titan] 2025-10-05 03:02:18,882 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:02:21,067 - root - INFO - step: 7250 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:02:21,067 - root - INFO - lr: 4.6758e-05 gnorm: 1.26 [ 4:28:12<20:11:32] +[titan] 2025-10-05 03:02:31,931 - root - INFO - step: 7255 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2327 +[titan] 2025-10-05 03:02:31,931 - root - INFO - lr: 4.6753e-05 gnorm: 1.22 [ 4:28:23<20:11:20] +[titan] 2025-10-05 03:02:42,869 - root - INFO - step: 7260 loss: 2.5329 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2991 global_avg_mtp_loss: 2.2339 +[titan] 2025-10-05 03:02:42,869 - root - INFO - lr: 4.6749e-05 gnorm: 1.22 [ 4:28:34<20:11:08] +[titan] 2025-10-05 03:02:53,734 - root - INFO - step: 7265 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.2033 +[titan] 2025-10-05 03:02:53,734 - root - INFO - lr: 4.6744e-05 gnorm: 1.30 [ 4:28:44<20:10:56] +[titan] 2025-10-05 03:03:04,623 - root - INFO - step: 7270 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3079 global_avg_mtp_loss: 2.3083 +[titan] 2025-10-05 03:03:04,623 - root - INFO - lr: 4.6739e-05 gnorm: 1.34 [ 4:28:55<20:10:44] +[titan] 2025-10-05 03:03:15,505 - root - INFO - step: 7275 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3072 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 03:03:15,505 - root - INFO - lr: 4.6735e-05 gnorm: 1.22 [ 4:29:06<20:10:32] +[titan] 2025-10-05 03:03:26,372 - root - INFO - step: 7280 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2876 +[titan] 2025-10-05 03:03:26,372 - root - INFO - lr: 4.6730e-05 gnorm: 1.19 [ 4:29:17<20:10:20] +[titan] 2025-10-05 03:03:37,274 - root - INFO - step: 7285 loss: 2.6024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2948 +[titan] 2025-10-05 03:03:37,274 - root - INFO - lr: 4.6725e-05 gnorm: 1.27 [ 4:29:28<20:10:08] +[titan] 2025-10-05 03:03:48,171 - root - INFO - step: 7290 loss: 2.5142 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2183 +[titan] 2025-10-05 03:03:48,171 - root - INFO - lr: 4.6721e-05 gnorm: 1.18 [ 4:29:39<20:09:56] +[titan] 2025-10-05 03:03:59,037 - root - INFO - step: 7295 loss: 2.5672 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3019 global_avg_mtp_loss: 2.2653 +[titan] 2025-10-05 03:03:59,037 - root - INFO - lr: 4.6716e-05 gnorm: 1.21 [ 4:29:50<20:09:43] +[titan] 2025-10-05 03:04:07,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:04:09,893 - root - INFO - step: 7300 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2362 +[titan] 2025-10-05 03:04:09,893 - root - INFO - lr: 4.6712e-05 gnorm: 1.19 [ 4:30:01<20:09:31] +[titan] 2025-10-05 03:04:20,770 - root - INFO - step: 7305 loss: 2.5190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:04:20,770 - root - INFO - lr: 4.6707e-05 gnorm: 1.20 [ 4:30:11<20:09:19] +[titan] 2025-10-05 03:04:31,636 - root - INFO - step: 7310 loss: 2.5542 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2522 +[titan] 2025-10-05 03:04:31,637 - root - INFO - lr: 4.6702e-05 gnorm: 1.16 [ 4:30:22<20:09:07] +[titan] 2025-10-05 03:04:42,538 - root - INFO - step: 7315 loss: 2.5823 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3047 global_avg_mtp_loss: 2.2776 +[titan] 2025-10-05 03:04:42,538 - root - INFO - lr: 4.6698e-05 gnorm: 1.19 [ 4:30:33<20:08:55] +[titan] 2025-10-05 03:04:53,396 - root - INFO - step: 7320 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.2988 +[titan] 2025-10-05 03:04:53,396 - root - INFO - lr: 4.6693e-05 gnorm: 1.20 [ 4:30:44<20:08:43] +[titan] 2025-10-05 03:05:04,291 - root - INFO - step: 7325 loss: 2.6131 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 03:05:04,291 - root - INFO - lr: 4.6688e-05 gnorm: 1.20 [ 4:30:55<20:08:31] +[titan] 2025-10-05 03:05:15,170 - root - INFO - step: 7330 loss: 2.5664 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2639 +[titan] 2025-10-05 03:05:15,170 - root - INFO - lr: 4.6684e-05 gnorm: 1.19 [ 4:31:06<20:08:19] +[titan] 2025-10-05 03:05:26,057 - root - INFO - step: 7335 loss: 2.5718 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:05:26,057 - root - INFO - lr: 4.6679e-05 gnorm: 1.19 [ 4:31:17<20:08:07] +[titan] 2025-10-05 03:05:36,944 - root - INFO - step: 7340 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:05:36,945 - root - INFO - lr: 4.6674e-05 gnorm: 1.21 [ 4:31:28<20:07:55] +[titan] 2025-10-05 03:05:47,861 - root - INFO - step: 7345 loss: 2.4951 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2008 +[titan] 2025-10-05 03:05:47,861 - root - INFO - lr: 4.6670e-05 gnorm: 1.18 [ 4:31:38<20:07:43] +[titan] 2025-10-05 03:05:56,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:05:58,742 - root - INFO - step: 7350 loss: 2.6375 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3271 +[titan] 2025-10-05 03:05:58,742 - root - INFO - lr: 4.6665e-05 gnorm: 1.20 [ 4:31:49<20:07:31] +[titan] 2025-10-05 03:06:09,631 - root - INFO - step: 7355 loss: 2.5204 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2241 +[titan] 2025-10-05 03:06:09,632 - root - INFO - lr: 4.6660e-05 gnorm: 1.13 [ 4:32:00<20:07:19] +[titan] 2025-10-05 03:06:20,514 - root - INFO - step: 7360 loss: 2.5761 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2736 +[titan] 2025-10-05 03:06:20,514 - root - INFO - lr: 4.6656e-05 gnorm: 1.20 [ 4:32:11<20:07:07] +[titan] 2025-10-05 03:06:31,396 - root - INFO - step: 7365 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2678 +[titan] 2025-10-05 03:06:31,397 - root - INFO - lr: 4.6651e-05 gnorm: 1.18 [ 4:32:22<20:06:55] +[titan] 2025-10-05 03:06:42,281 - root - INFO - step: 7370 loss: 2.5449 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2457 +[titan] 2025-10-05 03:06:42,282 - root - INFO - lr: 4.6646e-05 gnorm: 1.20 [ 4:32:33<20:06:43] +[titan] 2025-10-05 03:06:53,156 - root - INFO - step: 7375 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2076 +[titan] 2025-10-05 03:06:53,156 - root - INFO - lr: 4.6642e-05 gnorm: 1.20 [ 4:32:44<20:06:31] +[titan] 2025-10-05 03:07:04,009 - root - INFO - step: 7380 loss: 2.4884 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:07:04,009 - root - INFO - lr: 4.6637e-05 gnorm: 1.18 [ 4:32:55<20:06:19] +[titan] 2025-10-05 03:07:14,887 - root - INFO - step: 7385 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2624 +[titan] 2025-10-05 03:07:14,887 - root - INFO - lr: 4.6632e-05 gnorm: 1.29 [ 4:33:06<20:06:06] +[titan] 2025-10-05 03:07:25,781 - root - INFO - step: 7390 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:07:25,781 - root - INFO - lr: 4.6627e-05 gnorm: 1.19 [ 4:33:16<20:05:54] +[titan] 2025-10-05 03:07:36,668 - root - INFO - step: 7395 loss: 2.5215 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2253 +[titan] 2025-10-05 03:07:36,668 - root - INFO - lr: 4.6623e-05 gnorm: 1.18 [ 4:33:27<20:05:42] +[titan] 2025-10-05 03:07:45,410 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:07:47,595 - root - INFO - step: 7400 loss: 2.5552 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 03:07:47,595 - root - INFO - lr: 4.6618e-05 gnorm: 1.25 [ 4:33:38<20:05:31] +[titan] 2025-10-05 03:07:58,479 - root - INFO - step: 7405 loss: 2.5722 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2664 +[titan] 2025-10-05 03:07:58,479 - root - INFO - lr: 4.6613e-05 gnorm: 1.23 [ 4:33:49<20:05:19] +[titan] 2025-10-05 03:08:09,352 - root - INFO - step: 7410 loss: 2.6173 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 03:08:09,352 - root - INFO - lr: 4.6609e-05 gnorm: 1.26 [ 4:34:00<20:05:07] +[titan] 2025-10-05 03:08:20,245 - root - INFO - step: 7415 loss: 2.6371 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.3115 global_avg_mtp_loss: 2.3256 +[titan] 2025-10-05 03:08:20,245 - root - INFO - lr: 4.6604e-05 gnorm: 1.18 [ 4:34:11<20:04:55] +[titan] 2025-10-05 03:08:31,148 - root - INFO - step: 7420 loss: 2.5121 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:08:31,148 - root - INFO - lr: 4.6599e-05 gnorm: 1.18 [ 4:34:22<20:04:43] +[titan] 2025-10-05 03:08:42,047 - root - INFO - step: 7425 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2028 +[titan] 2025-10-05 03:08:42,047 - root - INFO - lr: 4.6594e-05 gnorm: 1.17 [ 4:34:33<20:04:31] +[titan] 2025-10-05 03:08:52,923 - root - INFO - step: 7430 loss: 2.5993 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2935 +[titan] 2025-10-05 03:08:52,923 - root - INFO - lr: 4.6590e-05 gnorm: 1.19 [ 4:34:44<20:04:19] +[titan] 2025-10-05 03:09:03,806 - root - INFO - step: 7435 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2641 +[titan] 2025-10-05 03:09:03,806 - root - INFO - lr: 4.6585e-05 gnorm: 1.23 [ 4:34:54<20:04:07] +[titan] 2025-10-05 03:09:14,682 - root - INFO - step: 7440 loss: 2.4458 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:09:14,682 - root - INFO - lr: 4.6580e-05 gnorm: 1.21 [ 4:35:05<20:03:55] +[titan] 2025-10-05 03:09:25,563 - root - INFO - step: 7445 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2988 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:09:25,563 - root - INFO - lr: 4.6576e-05 gnorm: 1.20 [ 4:35:16<20:03:43] +[titan] 2025-10-05 03:09:34,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:09:36,483 - root - INFO - step: 7450 loss: 2.4992 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2041 +[titan] 2025-10-05 03:09:36,483 - root - INFO - lr: 4.6571e-05 gnorm: 1.13 [ 4:35:27<20:03:31] +[titan] 2025-10-05 03:09:47,415 - root - INFO - step: 7455 loss: 2.5685 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:09:47,415 - root - INFO - lr: 4.6566e-05 gnorm: 1.21 [ 4:35:38<20:03:19] +[titan] 2025-10-05 03:09:58,322 - root - INFO - step: 7460 loss: 2.5530 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.15% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2521 +[titan] 2025-10-05 03:09:58,322 - root - INFO - lr: 4.6561e-05 gnorm: 1.19 [ 4:35:49<20:03:07] +[titan] 2025-10-05 03:10:09,217 - root - INFO - step: 7465 loss: 2.5984 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2930 +[titan] 2025-10-05 03:10:09,217 - root - INFO - lr: 4.6557e-05 gnorm: 1.33 [ 4:36:00<20:02:55] +[titan] 2025-10-05 03:10:20,126 - root - INFO - step: 7470 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:10:20,127 - root - INFO - lr: 4.6552e-05 gnorm: 1.25 [ 4:36:11<20:02:43] +[titan] 2025-10-05 03:10:31,009 - root - INFO - step: 7475 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3233 +[titan] 2025-10-05 03:10:31,009 - root - INFO - lr: 4.6547e-05 gnorm: 1.21 [ 4:36:22<20:02:31] +[titan] 2025-10-05 03:10:41,908 - root - INFO - step: 7480 loss: 2.6221 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3088 global_avg_mtp_loss: 2.3133 +[titan] 2025-10-05 03:10:41,908 - root - INFO - lr: 4.6542e-05 gnorm: 1.24 [ 4:36:33<20:02:19] +[titan] 2025-10-05 03:10:52,859 - root - INFO - step: 7485 loss: 2.6267 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.13 mfu: 41.97% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3150 +[titan] 2025-10-05 03:10:52,859 - root - INFO - lr: 4.6538e-05 gnorm: 1.23 [ 4:36:43<20:02:08] +[titan] 2025-10-05 03:11:03,748 - root - INFO - step: 7490 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:11:03,748 - root - INFO - lr: 4.6533e-05 gnorm: 1.16 [ 4:36:54<20:01:56] +[titan] 2025-10-05 03:11:14,653 - root - INFO - step: 7495 loss: 2.5041 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2100 +[titan] 2025-10-05 03:11:14,654 - root - INFO - lr: 4.6528e-05 gnorm: 1.17 [ 4:37:05<20:01:44] +[titan] 2025-10-05 03:11:23,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:11:25,557 - root - INFO - step: 7500 loss: 2.5279 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 03:11:25,558 - root - INFO - lr: 4.6523e-05 gnorm: 1.17 [ 4:37:16<20:01:32] +[titan] 2025-10-05 03:11:36,447 - root - INFO - step: 7505 loss: 2.5670 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:11:36,447 - root - INFO - lr: 4.6519e-05 gnorm: 1.26 [ 4:37:27<20:01:20] +[titan] 2025-10-05 03:11:47,366 - root - INFO - step: 7510 loss: 2.5107 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 03:11:47,366 - root - INFO - lr: 4.6514e-05 gnorm: 1.18 [ 4:37:38<20:01:08] +[titan] 2025-10-05 03:11:58,284 - root - INFO - step: 7515 loss: 2.6471 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3343 +[titan] 2025-10-05 03:11:58,284 - root - INFO - lr: 4.6509e-05 gnorm: 1.26 [ 4:37:49<20:00:56] +[titan] 2025-10-05 03:12:09,176 - root - INFO - step: 7520 loss: 2.5022 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:12:09,177 - root - INFO - lr: 4.6504e-05 gnorm: 1.24 [ 4:38:00<20:00:44] +[titan] 2025-10-05 03:12:20,065 - root - INFO - step: 7525 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2289 +[titan] 2025-10-05 03:12:20,065 - root - INFO - lr: 4.6499e-05 gnorm: 1.20 [ 4:38:11<20:00:32] +[titan] 2025-10-05 03:12:30,937 - root - INFO - step: 7530 loss: 2.5858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2803 +[titan] 2025-10-05 03:12:30,937 - root - INFO - lr: 4.6495e-05 gnorm: 1.25 [ 4:38:22<20:00:20] +[titan] 2025-10-05 03:12:41,813 - root - INFO - step: 7535 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:12:41,814 - root - INFO - lr: 4.6490e-05 gnorm: 1.20 [ 4:38:32<20:00:08] +[titan] 2025-10-05 03:12:52,684 - root - INFO - step: 7540 loss: 2.5356 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:12:52,684 - root - INFO - lr: 4.6485e-05 gnorm: 1.23 [ 4:38:43<19:59:56] +[titan] 2025-10-05 03:13:03,580 - root - INFO - step: 7545 loss: 2.5425 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2420 +[titan] 2025-10-05 03:13:03,580 - root - INFO - lr: 4.6480e-05 gnorm: 1.22 [ 4:38:54<19:59:44] +[titan] 2025-10-05 03:13:12,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:13:14,425 - root - INFO - step: 7550 loss: 2.5098 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:14,425 - root - INFO - lr: 4.6476e-05 gnorm: 1.21 [ 4:39:05<19:59:32] +[titan] 2025-10-05 03:13:25,285 - root - INFO - step: 7555 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2953 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:25,285 - root - INFO - lr: 4.6471e-05 gnorm: 1.32 [ 4:39:16<19:59:20] +[titan] 2025-10-05 03:13:36,128 - root - INFO - step: 7560 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1394 +[titan] 2025-10-05 03:13:36,128 - root - INFO - lr: 4.6466e-05 gnorm: 1.23 [ 4:39:27<19:59:08] +[titan] 2025-10-05 03:13:47,004 - root - INFO - step: 7565 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2297 +[titan] 2025-10-05 03:13:47,005 - root - INFO - lr: 4.6461e-05 gnorm: 1.21 [ 4:39:38<19:58:56] +[titan] 2025-10-05 03:13:57,856 - root - INFO - step: 7570 loss: 2.4658 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:13:57,856 - root - INFO - lr: 4.6456e-05 gnorm: 1.15 [ 4:39:48<19:58:44] +[titan] 2025-10-05 03:14:08,701 - root - INFO - step: 7575 loss: 2.5486 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2477 +[titan] 2025-10-05 03:14:08,701 - root - INFO - lr: 4.6452e-05 gnorm: 1.16 [ 4:39:59<19:58:32] +[titan] 2025-10-05 03:14:19,585 - root - INFO - step: 7580 loss: 2.4950 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:14:19,586 - root - INFO - lr: 4.6447e-05 gnorm: 1.20 [ 4:40:10<19:58:20] +[titan] 2025-10-05 03:14:30,487 - root - INFO - step: 7585 loss: 2.5519 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3008 global_avg_mtp_loss: 2.2511 +[titan] 2025-10-05 03:14:30,487 - root - INFO - lr: 4.6442e-05 gnorm: 1.18 [ 4:40:21<19:58:08] +[titan] 2025-10-05 03:14:41,356 - root - INFO - step: 7590 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2224 +[titan] 2025-10-05 03:14:41,356 - root - INFO - lr: 4.6437e-05 gnorm: 1.18 [ 4:40:32<19:57:56] +[titan] 2025-10-05 03:14:52,221 - root - INFO - step: 7595 loss: 2.5646 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 03:14:52,221 - root - INFO - lr: 4.6432e-05 gnorm: 1.16 [ 4:40:43<19:57:44] +[titan] 2025-10-05 03:15:00,910 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:15:03,087 - root - INFO - step: 7600 loss: 2.5198 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:15:03,087 - root - INFO - lr: 4.6427e-05 gnorm: 1.22 [ 4:40:54<19:57:32] +[titan] 2025-10-05 03:15:13,944 - root - INFO - step: 7605 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2354 +[titan] 2025-10-05 03:15:13,944 - root - INFO - lr: 4.6423e-05 gnorm: 1.19 [ 4:41:05<19:57:20] +[titan] 2025-10-05 03:15:24,824 - root - INFO - step: 7610 loss: 2.4376 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1514 +[titan] 2025-10-05 03:15:24,824 - root - INFO - lr: 4.6418e-05 gnorm: 1.19 [ 4:41:15<19:57:08] +[titan] 2025-10-05 03:15:35,666 - root - INFO - step: 7615 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2192 +[titan] 2025-10-05 03:15:35,666 - root - INFO - lr: 4.6413e-05 gnorm: 1.17 [ 4:41:26<19:56:55] +[titan] 2025-10-05 03:15:46,512 - root - INFO - step: 7620 loss: 2.5412 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2413 +[titan] 2025-10-05 03:15:46,512 - root - INFO - lr: 4.6408e-05 gnorm: 1.18 [ 4:41:37<19:56:43] +[titan] 2025-10-05 03:15:57,356 - root - INFO - step: 7625 loss: 2.6165 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3094 global_avg_mtp_loss: 2.3070 +[titan] 2025-10-05 03:15:57,356 - root - INFO - lr: 4.6403e-05 gnorm: 1.26 [ 4:41:48<19:56:31] +[titan] 2025-10-05 03:16:08,215 - root - INFO - step: 7630 loss: 2.5181 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:16:08,215 - root - INFO - lr: 4.6398e-05 gnorm: 1.21 [ 4:41:59<19:56:19] +[titan] 2025-10-05 03:16:19,088 - root - INFO - step: 7635 loss: 2.4574 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1676 +[titan] 2025-10-05 03:16:19,088 - root - INFO - lr: 4.6394e-05 gnorm: 1.19 [ 4:42:10<19:56:07] +[titan] 2025-10-05 03:16:29,923 - root - INFO - step: 7640 loss: 2.4611 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1719 +[titan] 2025-10-05 03:16:29,923 - root - INFO - lr: 4.6389e-05 gnorm: 1.17 [ 4:42:21<19:55:55] +[titan] 2025-10-05 03:16:40,805 - root - INFO - step: 7645 loss: 2.5518 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2500 +[titan] 2025-10-05 03:16:40,805 - root - INFO - lr: 4.6384e-05 gnorm: 1.19 [ 4:42:31<19:55:43] +[titan] 2025-10-05 03:16:49,484 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:16:51,676 - root - INFO - step: 7650 loss: 2.5593 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2579 +[titan] 2025-10-05 03:16:51,677 - root - INFO - lr: 4.6379e-05 gnorm: 1.21 [ 4:42:42<19:55:31] +[titan] 2025-10-05 03:17:02,521 - root - INFO - step: 7655 loss: 2.5404 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 03:17:02,521 - root - INFO - lr: 4.6374e-05 gnorm: 1.24 [ 4:42:53<19:55:19] +[titan] 2025-10-05 03:17:13,367 - root - INFO - step: 7660 loss: 2.5051 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2097 +[titan] 2025-10-05 03:17:13,367 - root - INFO - lr: 4.6369e-05 gnorm: 1.23 [ 4:43:04<19:55:07] +[titan] 2025-10-05 03:17:24,235 - root - INFO - step: 7665 loss: 2.6218 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.3131 +[titan] 2025-10-05 03:17:24,235 - root - INFO - lr: 4.6364e-05 gnorm: 1.19 [ 4:43:15<19:54:55] +[titan] 2025-10-05 03:17:35,066 - root - INFO - step: 7670 loss: 2.5900 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2847 +[titan] 2025-10-05 03:17:35,066 - root - INFO - lr: 4.6360e-05 gnorm: 1.23 [ 4:43:26<19:54:43] +[titan] 2025-10-05 03:17:45,893 - root - INFO - step: 7675 loss: 2.5953 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 03:17:45,893 - root - INFO - lr: 4.6355e-05 gnorm: 1.19 [ 4:43:36<19:54:30] +[titan] 2025-10-05 03:17:56,861 - root - INFO - step: 7680 loss: 2.5148 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2185 +[titan] 2025-10-05 03:17:56,861 - root - INFO - lr: 4.6350e-05 gnorm: 1.23 [ 4:43:47<19:54:19] +[titan] 2025-10-05 03:17:57,042 - root - INFO - Dumping profiler traces at step 7680 +[titan] 2025-10-05 03:17:57,083 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:18:07,907 - root - INFO - step: 7685 loss: 2.4389 memory: 118.84GiB(85.28%) tps: 29,665 tflops: 411.56 mfu: 41.61% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 03:18:07,907 - root - INFO - lr: 4.6345e-05 gnorm: 1.17 [ 4:43:59<19:54:08] +[titan] 2025-10-05 03:18:18,756 - root - INFO - step: 7690 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:18:18,757 - root - INFO - lr: 4.6340e-05 gnorm: 1.18 [ 4:44:09<19:53:56] +[titan] 2025-10-05 03:18:29,609 - root - INFO - step: 7695 loss: 2.5730 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:18:29,609 - root - INFO - lr: 4.6335e-05 gnorm: 1.36 [ 4:44:20<19:53:43] +[titan] 2025-10-05 03:18:38,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:18:40,472 - root - INFO - step: 7700 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2643 +[titan] 2025-10-05 03:18:40,473 - root - INFO - lr: 4.6330e-05 gnorm: 1.19 [ 4:44:31<19:53:31] +[titan] 2025-10-05 03:18:51,364 - root - INFO - step: 7705 loss: 2.5443 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 03:18:51,364 - root - INFO - lr: 4.6325e-05 gnorm: 1.19 [ 4:44:42<19:53:20] +[titan] 2025-10-05 03:19:02,224 - root - INFO - step: 7710 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2637 +[titan] 2025-10-05 03:19:02,225 - root - INFO - lr: 4.6321e-05 gnorm: 1.20 [ 4:44:53<19:53:07] +[titan] 2025-10-05 03:19:13,098 - root - INFO - step: 7715 loss: 2.5489 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2486 +[titan] 2025-10-05 03:19:13,098 - root - INFO - lr: 4.6316e-05 gnorm: 1.20 [ 4:45:04<19:52:56] +[titan] 2025-10-05 03:19:23,973 - root - INFO - step: 7720 loss: 2.4402 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1529 +[titan] 2025-10-05 03:19:23,974 - root - INFO - lr: 4.6311e-05 gnorm: 1.21 [ 4:45:15<19:52:44] +[titan] 2025-10-05 03:19:34,816 - root - INFO - step: 7725 loss: 2.5551 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:19:34,816 - root - INFO - lr: 4.6306e-05 gnorm: 1.19 [ 4:45:25<19:52:31] +[titan] 2025-10-05 03:19:45,679 - root - INFO - step: 7730 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2814 +[titan] 2025-10-05 03:19:45,679 - root - INFO - lr: 4.6301e-05 gnorm: 1.17 [ 4:45:36<19:52:19] +[titan] 2025-10-05 03:19:56,502 - root - INFO - step: 7735 loss: 2.5206 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2970 global_avg_mtp_loss: 2.2236 +[titan] 2025-10-05 03:19:56,502 - root - INFO - lr: 4.6296e-05 gnorm: 1.24 [ 4:45:47<19:52:07] +[titan] 2025-10-05 03:20:07,337 - root - INFO - step: 7740 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2545 +[titan] 2025-10-05 03:20:07,337 - root - INFO - lr: 4.6291e-05 gnorm: 1.19 [ 4:45:58<19:51:55] +[titan] 2025-10-05 03:20:18,166 - root - INFO - step: 7745 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.79 mfu: 42.45% global_avg_ntp_loss: 0.2938 global_avg_mtp_loss: 2.1964 +[titan] 2025-10-05 03:20:18,166 - root - INFO - lr: 4.6286e-05 gnorm: 1.20 [ 4:46:09<19:51:43] +[titan] 2025-10-05 03:20:26,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:20:29,014 - root - INFO - step: 7750 loss: 2.4800 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.1876 +[titan] 2025-10-05 03:20:29,015 - root - INFO - lr: 4.6281e-05 gnorm: 1.17 [ 4:46:20<19:51:31] +[titan] 2025-10-05 03:20:39,856 - root - INFO - step: 7755 loss: 2.4850 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1929 +[titan] 2025-10-05 03:20:39,857 - root - INFO - lr: 4.6276e-05 gnorm: 1.24 [ 4:46:30<19:51:19] +[titan] 2025-10-05 03:20:50,697 - root - INFO - step: 7760 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2144 +[titan] 2025-10-05 03:20:50,697 - root - INFO - lr: 4.6271e-05 gnorm: 1.13 [ 4:46:41<19:51:07] +[titan] 2025-10-05 03:21:01,573 - root - INFO - step: 7765 loss: 2.5168 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2200 +[titan] 2025-10-05 03:21:01,573 - root - INFO - lr: 4.6267e-05 gnorm: 1.17 [ 4:46:52<19:50:55] +[titan] 2025-10-05 03:21:12,426 - root - INFO - step: 7770 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:21:12,426 - root - INFO - lr: 4.6262e-05 gnorm: 1.24 [ 4:47:03<19:50:43] +[titan] 2025-10-05 03:21:23,262 - root - INFO - step: 7775 loss: 2.5468 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2462 +[titan] 2025-10-05 03:21:23,262 - root - INFO - lr: 4.6257e-05 gnorm: 1.22 [ 4:47:14<19:50:31] +[titan] 2025-10-05 03:21:34,121 - root - INFO - step: 7780 loss: 2.5186 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2965 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:21:34,121 - root - INFO - lr: 4.6252e-05 gnorm: 1.22 [ 4:47:25<19:50:19] +[titan] 2025-10-05 03:21:44,959 - root - INFO - step: 7785 loss: 2.5555 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2532 +[titan] 2025-10-05 03:21:44,960 - root - INFO - lr: 4.6247e-05 gnorm: 1.19 [ 4:47:36<19:50:07] +[titan] 2025-10-05 03:21:55,841 - root - INFO - step: 7790 loss: 2.5595 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2574 +[titan] 2025-10-05 03:21:55,841 - root - INFO - lr: 4.6242e-05 gnorm: 1.21 [ 4:47:46<19:49:55] +[titan] 2025-10-05 03:22:06,686 - root - INFO - step: 7795 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2407 +[titan] 2025-10-05 03:22:06,687 - root - INFO - lr: 4.6237e-05 gnorm: 1.20 [ 4:47:57<19:49:43] +[titan] 2025-10-05 03:22:15,327 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:22:17,501 - root - INFO - step: 7800 loss: 2.4671 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1775 +[titan] 2025-10-05 03:22:17,501 - root - INFO - lr: 4.6232e-05 gnorm: 1.31 [ 4:48:08<19:49:30] +[titan] 2025-10-05 03:22:28,367 - root - INFO - step: 7805 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2366 +[titan] 2025-10-05 03:22:28,367 - root - INFO - lr: 4.6227e-05 gnorm: 1.21 [ 4:48:19<19:49:18] +[titan] 2025-10-05 03:22:39,182 - root - INFO - step: 7810 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:22:39,182 - root - INFO - lr: 4.6222e-05 gnorm: 1.23 [ 4:48:30<19:49:06] +[titan] 2025-10-05 03:22:50,001 - root - INFO - step: 7815 loss: 2.5037 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2093 +[titan] 2025-10-05 03:22:50,001 - root - INFO - lr: 4.6217e-05 gnorm: 1.17 [ 4:48:41<19:48:54] +[titan] 2025-10-05 03:23:00,861 - root - INFO - step: 7820 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 03:23:00,861 - root - INFO - lr: 4.6212e-05 gnorm: 1.15 [ 4:48:51<19:48:42] +[titan] 2025-10-05 03:23:11,665 - root - INFO - step: 7825 loss: 2.5549 memory: 118.84GiB(85.28%) tps: 30,332 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:23:11,665 - root - INFO - lr: 4.6207e-05 gnorm: 1.18 [ 4:49:02<19:48:30] +[titan] 2025-10-05 03:23:22,463 - root - INFO - step: 7830 loss: 2.5877 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2813 +[titan] 2025-10-05 03:23:22,464 - root - INFO - lr: 4.6202e-05 gnorm: 1.22 [ 4:49:13<19:48:18] +[titan] 2025-10-05 03:23:33,276 - root - INFO - step: 7835 loss: 2.5278 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:23:33,276 - root - INFO - lr: 4.6197e-05 gnorm: 1.28 [ 4:49:24<19:48:05] +[titan] 2025-10-05 03:23:44,101 - root - INFO - step: 7840 loss: 2.5759 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 03:23:44,101 - root - INFO - lr: 4.6192e-05 gnorm: 1.19 [ 4:49:35<19:47:53] +[titan] 2025-10-05 03:23:54,974 - root - INFO - step: 7845 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 03:23:54,975 - root - INFO - lr: 4.6187e-05 gnorm: 1.19 [ 4:49:46<19:47:41] +[titan] 2025-10-05 03:24:03,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:24:05,782 - root - INFO - step: 7850 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2873 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 03:24:05,783 - root - INFO - lr: 4.6182e-05 gnorm: 1.17 [ 4:49:56<19:47:29] +[titan] 2025-10-05 03:24:16,593 - root - INFO - step: 7855 loss: 2.4523 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1636 +[titan] 2025-10-05 03:24:16,593 - root - INFO - lr: 4.6177e-05 gnorm: 1.14 [ 4:50:07<19:47:17] +[titan] 2025-10-05 03:24:27,423 - root - INFO - step: 7860 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2509 +[titan] 2025-10-05 03:24:27,424 - root - INFO - lr: 4.6172e-05 gnorm: 1.24 [ 4:50:18<19:47:05] +[titan] 2025-10-05 03:24:38,249 - root - INFO - step: 7865 loss: 2.5375 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2394 +[titan] 2025-10-05 03:24:38,249 - root - INFO - lr: 4.6167e-05 gnorm: 1.22 [ 4:50:29<19:46:53] +[titan] 2025-10-05 03:24:49,117 - root - INFO - step: 7870 loss: 2.4208 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1365 +[titan] 2025-10-05 03:24:49,117 - root - INFO - lr: 4.6163e-05 gnorm: 1.17 [ 4:50:40<19:46:41] +[titan] 2025-10-05 03:25:00,043 - root - INFO - step: 7875 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:25:00,043 - root - INFO - lr: 4.6158e-05 gnorm: 1.19 [ 4:50:51<19:46:29] +[titan] 2025-10-05 03:25:10,889 - root - INFO - step: 7880 loss: 2.5464 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2467 +[titan] 2025-10-05 03:25:10,889 - root - INFO - lr: 4.6153e-05 gnorm: 1.19 [ 4:51:01<19:46:17] +[titan] 2025-10-05 03:25:21,745 - root - INFO - step: 7885 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:25:21,745 - root - INFO - lr: 4.6148e-05 gnorm: 1.18 [ 4:51:12<19:46:05] +[titan] 2025-10-05 03:25:32,610 - root - INFO - step: 7890 loss: 2.5321 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2346 +[titan] 2025-10-05 03:25:32,610 - root - INFO - lr: 4.6143e-05 gnorm: 1.20 [ 4:51:23<19:45:53] +[titan] 2025-10-05 03:25:43,443 - root - INFO - step: 7895 loss: 2.5115 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:25:43,443 - root - INFO - lr: 4.6138e-05 gnorm: 1.14 [ 4:51:34<19:45:41] +[titan] 2025-10-05 03:25:52,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:25:54,248 - root - INFO - step: 7900 loss: 2.5320 memory: 118.84GiB(85.28%) tps: 30,328 tflops: 420.75 mfu: 42.54% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2324 +[titan] 2025-10-05 03:25:54,248 - root - INFO - lr: 4.6133e-05 gnorm: 1.18 [ 4:51:45<19:45:29] +[titan] 2025-10-05 03:26:05,135 - root - INFO - step: 7905 loss: 2.5694 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2673 +[titan] 2025-10-05 03:26:05,135 - root - INFO - lr: 4.6128e-05 gnorm: 1.17 [ 4:51:56<19:45:17] +[titan] 2025-10-05 03:26:15,976 - root - INFO - step: 7910 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2989 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:26:15,976 - root - INFO - lr: 4.6123e-05 gnorm: 1.24 [ 4:52:07<19:45:05] +[titan] 2025-10-05 03:26:26,803 - root - INFO - step: 7915 loss: 2.5234 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2262 +[titan] 2025-10-05 03:26:26,803 - root - INFO - lr: 4.6118e-05 gnorm: 1.20 [ 4:52:17<19:44:53] +[titan] 2025-10-05 03:26:37,605 - root - INFO - step: 7920 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2145 +[titan] 2025-10-05 03:26:37,605 - root - INFO - lr: 4.6113e-05 gnorm: 1.21 [ 4:52:28<19:44:41] +[titan] 2025-10-05 03:26:48,452 - root - INFO - step: 7925 loss: 2.4185 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 03:26:48,452 - root - INFO - lr: 4.6107e-05 gnorm: 1.15 [ 4:52:39<19:44:29] +[titan] 2025-10-05 03:26:59,330 - root - INFO - step: 7930 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 03:26:59,330 - root - INFO - lr: 4.6102e-05 gnorm: 1.26 [ 4:52:50<19:44:17] +[titan] 2025-10-05 03:27:10,155 - root - INFO - step: 7935 loss: 2.4620 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2890 global_avg_mtp_loss: 2.1731 +[titan] 2025-10-05 03:27:10,155 - root - INFO - lr: 4.6097e-05 gnorm: 1.18 [ 4:53:01<19:44:05] +[titan] 2025-10-05 03:27:20,964 - root - INFO - step: 7940 loss: 2.4808 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.61 mfu: 42.53% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:27:20,964 - root - INFO - lr: 4.6092e-05 gnorm: 1.15 [ 4:53:12<19:43:52] +[titan] 2025-10-05 03:27:31,803 - root - INFO - step: 7945 loss: 2.5084 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2142 +[titan] 2025-10-05 03:27:31,803 - root - INFO - lr: 4.6087e-05 gnorm: 1.16 [ 4:53:22<19:43:40] +[titan] 2025-10-05 03:27:40,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:27:42,583 - root - INFO - step: 7950 loss: 2.5326 memory: 118.84GiB(85.28%) tps: 30,397 tflops: 421.71 mfu: 42.64% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2359 +[titan] 2025-10-05 03:27:42,583 - root - INFO - lr: 4.6082e-05 gnorm: 1.21 [ 4:53:33<19:43:28] +[titan] 2025-10-05 03:27:53,381 - root - INFO - step: 7955 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2591 +[titan] 2025-10-05 03:27:53,382 - root - INFO - lr: 4.6077e-05 gnorm: 1.18 [ 4:53:44<19:43:16] +[titan] 2025-10-05 03:28:04,227 - root - INFO - step: 7960 loss: 2.4969 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2940 global_avg_mtp_loss: 2.2030 +[titan] 2025-10-05 03:28:04,227 - root - INFO - lr: 4.6072e-05 gnorm: 1.15 [ 4:53:55<19:43:04] +[titan] 2025-10-05 03:28:15,055 - root - INFO - step: 7965 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1237 +[titan] 2025-10-05 03:28:15,055 - root - INFO - lr: 4.6067e-05 gnorm: 1.13 [ 4:54:06<19:42:52] +[titan] 2025-10-05 03:28:25,883 - root - INFO - step: 7970 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.2034 +[titan] 2025-10-05 03:28:25,883 - root - INFO - lr: 4.6062e-05 gnorm: 1.17 [ 4:54:16<19:42:40] +[titan] 2025-10-05 03:28:36,715 - root - INFO - step: 7975 loss: 2.5491 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2491 +[titan] 2025-10-05 03:28:36,715 - root - INFO - lr: 4.6057e-05 gnorm: 1.19 [ 4:54:27<19:42:28] +[titan] 2025-10-05 03:28:47,543 - root - INFO - step: 7980 loss: 2.4817 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:28:47,543 - root - INFO - lr: 4.6052e-05 gnorm: 1.16 [ 4:54:38<19:42:16] +[titan] 2025-10-05 03:28:58,364 - root - INFO - step: 7985 loss: 2.5422 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2429 +[titan] 2025-10-05 03:28:58,364 - root - INFO - lr: 4.6047e-05 gnorm: 1.18 [ 4:54:49<19:42:03] +[titan] 2025-10-05 03:29:09,176 - root - INFO - step: 7990 loss: 2.5558 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2558 +[titan] 2025-10-05 03:29:09,176 - root - INFO - lr: 4.6042e-05 gnorm: 1.18 [ 4:55:00<19:41:51] +[titan] 2025-10-05 03:29:19,983 - root - INFO - step: 7995 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.68 mfu: 42.54% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:29:19,983 - root - INFO - lr: 4.6037e-05 gnorm: 1.16 [ 4:55:11<19:41:39] +[titan] 2025-10-05 03:29:28,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:29:30,811 - root - INFO - step: 8000 loss: 2.5669 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.3034 global_avg_mtp_loss: 2.2635 +[titan] 2025-10-05 03:29:30,811 - root - INFO - lr: 4.6032e-05 gnorm: 1.20 [ 4:55:21<19:41:27] +[titan] 2025-10-05 03:29:41,667 - root - INFO - step: 8005 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2699 +[titan] 2025-10-05 03:29:41,667 - root - INFO - lr: 4.6027e-05 gnorm: 1.25 [ 4:55:32<19:41:15] +[titan] 2025-10-05 03:29:52,487 - root - INFO - step: 8010 loss: 2.5006 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2058 +[titan] 2025-10-05 03:29:52,487 - root - INFO - lr: 4.6022e-05 gnorm: 1.26 [ 4:55:43<19:41:03] +[titan] 2025-10-05 03:30:03,339 - root - INFO - step: 8015 loss: 2.4914 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:30:03,339 - root - INFO - lr: 4.6017e-05 gnorm: 1.18 [ 4:55:54<19:40:51] +[titan] 2025-10-05 03:30:14,162 - root - INFO - step: 8020 loss: 2.4809 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:30:14,162 - root - INFO - lr: 4.6012e-05 gnorm: 1.20 [ 4:56:05<19:40:39] +[titan] 2025-10-05 03:30:25,002 - root - INFO - step: 8025 loss: 2.4991 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2042 +[titan] 2025-10-05 03:30:25,003 - root - INFO - lr: 4.6007e-05 gnorm: 1.17 [ 4:56:16<19:40:27] +[titan] 2025-10-05 03:30:35,840 - root - INFO - step: 8030 loss: 2.4390 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1512 +[titan] 2025-10-05 03:30:35,841 - root - INFO - lr: 4.6001e-05 gnorm: 1.18 [ 4:56:26<19:40:15] +[titan] 2025-10-05 03:30:46,678 - root - INFO - step: 8035 loss: 2.5127 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:30:46,678 - root - INFO - lr: 4.5996e-05 gnorm: 1.21 [ 4:56:37<19:40:03] +[titan] 2025-10-05 03:30:57,494 - root - INFO - step: 8040 loss: 2.4745 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1848 +[titan] 2025-10-05 03:30:57,495 - root - INFO - lr: 4.5991e-05 gnorm: 1.17 [ 4:56:48<19:39:51] +[titan] 2025-10-05 03:31:08,359 - root - INFO - step: 8045 loss: 2.5034 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2082 +[titan] 2025-10-05 03:31:08,360 - root - INFO - lr: 4.5986e-05 gnorm: 1.19 [ 4:56:59<19:39:39] +[titan] 2025-10-05 03:31:17,027 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:31:19,196 - root - INFO - step: 8050 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1741 +[titan] 2025-10-05 03:31:19,196 - root - INFO - lr: 4.5981e-05 gnorm: 1.19 [ 4:57:10<19:39:27] +[titan] 2025-10-05 03:31:30,047 - root - INFO - step: 8055 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2206 +[titan] 2025-10-05 03:31:30,047 - root - INFO - lr: 4.5976e-05 gnorm: 1.16 [ 4:57:21<19:39:15] +[titan] 2025-10-05 03:31:40,901 - root - INFO - step: 8060 loss: 2.4474 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 03:31:40,901 - root - INFO - lr: 4.5971e-05 gnorm: 1.14 [ 4:57:31<19:39:03] +[titan] 2025-10-05 03:31:51,725 - root - INFO - step: 8065 loss: 2.5411 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2406 +[titan] 2025-10-05 03:31:51,725 - root - INFO - lr: 4.5966e-05 gnorm: 1.17 [ 4:57:42<19:38:51] +[titan] 2025-10-05 03:32:02,621 - root - INFO - step: 8070 loss: 2.4864 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1942 +[titan] 2025-10-05 03:32:02,621 - root - INFO - lr: 4.5961e-05 gnorm: 1.20 [ 4:57:53<19:38:39] +[titan] 2025-10-05 03:32:13,441 - root - INFO - step: 8075 loss: 2.5540 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 03:32:13,441 - root - INFO - lr: 4.5956e-05 gnorm: 1.17 [ 4:58:04<19:38:27] +[titan] 2025-10-05 03:32:24,287 - root - INFO - step: 8080 loss: 2.4398 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1536 +[titan] 2025-10-05 03:32:24,287 - root - INFO - lr: 4.5951e-05 gnorm: 1.14 [ 4:58:15<19:38:15] +[titan] 2025-10-05 03:32:35,118 - root - INFO - step: 8085 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2356 +[titan] 2025-10-05 03:32:35,118 - root - INFO - lr: 4.5945e-05 gnorm: 1.21 [ 4:58:26<19:38:03] +[titan] 2025-10-05 03:32:45,958 - root - INFO - step: 8090 loss: 2.5225 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2257 +[titan] 2025-10-05 03:32:45,959 - root - INFO - lr: 4.5940e-05 gnorm: 1.12 [ 4:58:37<19:37:51] +[titan] 2025-10-05 03:32:56,823 - root - INFO - step: 8095 loss: 2.5506 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2506 +[titan] 2025-10-05 03:32:56,824 - root - INFO - lr: 4.5935e-05 gnorm: 1.21 [ 4:58:47<19:37:39] +[titan] 2025-10-05 03:33:05,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:33:07,719 - root - INFO - step: 8100 loss: 2.5049 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 03:33:07,719 - root - INFO - lr: 4.5930e-05 gnorm: 1.20 [ 4:58:58<19:37:27] +[titan] 2025-10-05 03:33:18,615 - root - INFO - step: 8105 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:33:18,615 - root - INFO - lr: 4.5925e-05 gnorm: 1.12 [ 4:59:09<19:37:16] +[titan] 2025-10-05 03:33:29,481 - root - INFO - step: 8110 loss: 2.4795 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 03:33:29,481 - root - INFO - lr: 4.5920e-05 gnorm: 1.16 [ 4:59:20<19:37:04] +[titan] 2025-10-05 03:33:40,332 - root - INFO - step: 8115 loss: 2.4748 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1833 +[titan] 2025-10-05 03:33:40,332 - root - INFO - lr: 4.5915e-05 gnorm: 1.18 [ 4:59:31<19:36:52] +[titan] 2025-10-05 03:33:51,164 - root - INFO - step: 8120 loss: 2.5292 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2311 +[titan] 2025-10-05 03:33:51,164 - root - INFO - lr: 4.5910e-05 gnorm: 1.19 [ 4:59:42<19:36:40] +[titan] 2025-10-05 03:34:02,020 - root - INFO - step: 8125 loss: 2.4881 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:34:02,020 - root - INFO - lr: 4.5904e-05 gnorm: 1.21 [ 4:59:53<19:36:28] +[titan] 2025-10-05 03:34:12,891 - root - INFO - step: 8130 loss: 2.5727 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:34:12,891 - root - INFO - lr: 4.5899e-05 gnorm: 1.22 [ 5:00:03<19:36:16] +[titan] 2025-10-05 03:34:23,761 - root - INFO - step: 8135 loss: 2.4550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1663 +[titan] 2025-10-05 03:34:23,761 - root - INFO - lr: 4.5894e-05 gnorm: 1.21 [ 5:00:14<19:36:04] +[titan] 2025-10-05 03:34:34,624 - root - INFO - step: 8140 loss: 2.4669 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:34:34,624 - root - INFO - lr: 4.5889e-05 gnorm: 1.16 [ 5:00:25<19:35:52] +[titan] 2025-10-05 03:34:45,506 - root - INFO - step: 8145 loss: 2.5656 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:34:45,506 - root - INFO - lr: 4.5884e-05 gnorm: 1.18 [ 5:00:36<19:35:40] +[titan] 2025-10-05 03:34:54,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:34:56,340 - root - INFO - step: 8150 loss: 2.4846 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1926 +[titan] 2025-10-05 03:34:56,340 - root - INFO - lr: 4.5879e-05 gnorm: 1.16 [ 5:00:47<19:35:28] +[titan] 2025-10-05 03:35:07,237 - root - INFO - step: 8155 loss: 2.5131 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2164 +[titan] 2025-10-05 03:35:07,237 - root - INFO - lr: 4.5874e-05 gnorm: 1.17 [ 5:00:58<19:35:17] +[titan] 2025-10-05 03:35:18,098 - root - INFO - step: 8160 loss: 2.6082 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3005 +[titan] 2025-10-05 03:35:18,098 - root - INFO - lr: 4.5868e-05 gnorm: 1.18 [ 5:01:09<19:35:05] +[titan] 2025-10-05 03:35:28,978 - root - INFO - step: 8165 loss: 2.5372 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2397 +[titan] 2025-10-05 03:35:28,978 - root - INFO - lr: 4.5863e-05 gnorm: 1.17 [ 5:01:20<19:34:53] +[titan] 2025-10-05 03:35:39,844 - root - INFO - step: 8170 loss: 2.4152 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 03:35:39,844 - root - INFO - lr: 4.5858e-05 gnorm: 1.18 [ 5:01:30<19:34:41] +[titan] 2025-10-05 03:35:50,781 - root - INFO - step: 8175 loss: 2.5578 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3013 global_avg_mtp_loss: 2.2565 +[titan] 2025-10-05 03:35:50,781 - root - INFO - lr: 4.5853e-05 gnorm: 1.27 [ 5:01:41<19:34:29] +[titan] 2025-10-05 03:36:01,663 - root - INFO - step: 8180 loss: 2.4462 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1586 +[titan] 2025-10-05 03:36:01,663 - root - INFO - lr: 4.5848e-05 gnorm: 1.13 [ 5:01:52<19:34:18] +[titan] 2025-10-05 03:36:12,582 - root - INFO - step: 8185 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:36:12,582 - root - INFO - lr: 4.5843e-05 gnorm: 1.20 [ 5:02:03<19:34:06] +[titan] 2025-10-05 03:36:23,548 - root - INFO - step: 8190 loss: 2.4035 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1217 +[titan] 2025-10-05 03:36:23,549 - root - INFO - lr: 4.5837e-05 gnorm: 1.16 [ 5:02:14<19:33:54] +[titan] 2025-10-05 03:36:28,084 - root - INFO - Dumping profiler traces at step 8192 +[titan] 2025-10-05 03:36:28,120 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:36:34,646 - root - INFO - step: 8195 loss: 2.4867 memory: 118.84GiB(85.28%) tps: 29,528 tflops: 409.66 mfu: 41.42% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 03:36:34,646 - root - INFO - lr: 4.5832e-05 gnorm: 1.16 [ 5:02:25<19:33:43] +[titan] 2025-10-05 03:36:43,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:36:45,533 - root - INFO - step: 8200 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2809 +[titan] 2025-10-05 03:36:45,533 - root - INFO - lr: 4.5827e-05 gnorm: 1.15 [ 5:02:36<19:33:32] +[titan] 2025-10-05 03:36:56,421 - root - INFO - step: 8205 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1517 +[titan] 2025-10-05 03:36:56,421 - root - INFO - lr: 4.5822e-05 gnorm: 1.15 [ 5:02:47<19:33:20] +[titan] 2025-10-05 03:37:07,262 - root - INFO - step: 8210 loss: 2.4422 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2866 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:37:07,262 - root - INFO - lr: 4.5817e-05 gnorm: 1.16 [ 5:02:58<19:33:08] +[titan] 2025-10-05 03:37:18,124 - root - INFO - step: 8215 loss: 2.5901 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3062 global_avg_mtp_loss: 2.2840 +[titan] 2025-10-05 03:37:18,124 - root - INFO - lr: 4.5812e-05 gnorm: 1.23 [ 5:03:09<19:32:56] +[titan] 2025-10-05 03:37:29,001 - root - INFO - step: 8220 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2582 +[titan] 2025-10-05 03:37:29,001 - root - INFO - lr: 4.5806e-05 gnorm: 1.20 [ 5:03:20<19:32:44] +[titan] 2025-10-05 03:37:39,844 - root - INFO - step: 8225 loss: 2.4659 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 03:37:39,845 - root - INFO - lr: 4.5801e-05 gnorm: 1.23 [ 5:03:30<19:32:32] +[titan] 2025-10-05 03:37:50,743 - root - INFO - step: 8230 loss: 2.5410 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 03:37:50,743 - root - INFO - lr: 4.5796e-05 gnorm: 1.19 [ 5:03:41<19:32:20] +[titan] 2025-10-05 03:38:01,585 - root - INFO - step: 8235 loss: 2.5291 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2326 +[titan] 2025-10-05 03:38:01,585 - root - INFO - lr: 4.5791e-05 gnorm: 1.15 [ 5:03:52<19:32:09] +[titan] 2025-10-05 03:38:12,474 - root - INFO - step: 8240 loss: 2.5137 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:38:12,474 - root - INFO - lr: 4.5786e-05 gnorm: 1.17 [ 5:04:03<19:31:57] +[titan] 2025-10-05 03:38:23,335 - root - INFO - step: 8245 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:38:23,335 - root - INFO - lr: 4.5780e-05 gnorm: 1.17 [ 5:04:14<19:31:45] +[titan] 2025-10-05 03:38:32,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:38:34,223 - root - INFO - step: 8250 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2131 +[titan] 2025-10-05 03:38:34,223 - root - INFO - lr: 4.5775e-05 gnorm: 1.18 [ 5:04:25<19:31:33] +[titan] 2025-10-05 03:38:45,088 - root - INFO - step: 8255 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2198 +[titan] 2025-10-05 03:38:45,088 - root - INFO - lr: 4.5770e-05 gnorm: 1.20 [ 5:04:36<19:31:21] +[titan] 2025-10-05 03:38:55,962 - root - INFO - step: 8260 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1524 +[titan] 2025-10-05 03:38:55,962 - root - INFO - lr: 4.5765e-05 gnorm: 1.19 [ 5:04:47<19:31:09] +[titan] 2025-10-05 03:39:06,818 - root - INFO - step: 8265 loss: 2.6017 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2949 +[titan] 2025-10-05 03:39:06,818 - root - INFO - lr: 4.5760e-05 gnorm: 1.23 [ 5:04:57<19:30:58] +[titan] 2025-10-05 03:39:17,707 - root - INFO - step: 8270 loss: 2.4450 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1572 +[titan] 2025-10-05 03:39:17,707 - root - INFO - lr: 4.5754e-05 gnorm: 1.18 [ 5:05:08<19:30:46] +[titan] 2025-10-05 03:39:28,574 - root - INFO - step: 8275 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1559 +[titan] 2025-10-05 03:39:28,574 - root - INFO - lr: 4.5749e-05 gnorm: 1.20 [ 5:05:19<19:30:34] +[titan] 2025-10-05 03:39:39,438 - root - INFO - step: 8280 loss: 2.4782 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2902 global_avg_mtp_loss: 2.1880 +[titan] 2025-10-05 03:39:39,438 - root - INFO - lr: 4.5744e-05 gnorm: 1.20 [ 5:05:30<19:30:22] +[titan] 2025-10-05 03:39:50,344 - root - INFO - step: 8285 loss: 2.4818 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:39:50,344 - root - INFO - lr: 4.5739e-05 gnorm: 1.16 [ 5:05:41<19:30:10] +[titan] 2025-10-05 03:40:01,252 - root - INFO - step: 8290 loss: 2.4954 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2009 +[titan] 2025-10-05 03:40:01,252 - root - INFO - lr: 4.5733e-05 gnorm: 1.16 [ 5:05:52<19:29:59] +[titan] 2025-10-05 03:40:12,143 - root - INFO - step: 8295 loss: 2.5302 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2317 +[titan] 2025-10-05 03:40:12,143 - root - INFO - lr: 4.5728e-05 gnorm: 1.18 [ 5:06:03<19:29:47] +[titan] 2025-10-05 03:40:20,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:40:23,034 - root - INFO - step: 8300 loss: 2.4874 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:40:23,034 - root - INFO - lr: 4.5723e-05 gnorm: 1.19 [ 5:06:14<19:29:35] +[titan] 2025-10-05 03:40:33,937 - root - INFO - step: 8305 loss: 2.5831 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2800 +[titan] 2025-10-05 03:40:33,938 - root - INFO - lr: 4.5718e-05 gnorm: 1.17 [ 5:06:24<19:29:23] +[titan] 2025-10-05 03:40:44,825 - root - INFO - step: 8310 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2514 +[titan] 2025-10-05 03:40:44,825 - root - INFO - lr: 4.5713e-05 gnorm: 1.17 [ 5:06:35<19:29:12] +[titan] 2025-10-05 03:40:55,729 - root - INFO - step: 8315 loss: 2.5111 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:40:55,729 - root - INFO - lr: 4.5707e-05 gnorm: 1.14 [ 5:06:46<19:29:00] +[titan] 2025-10-05 03:41:06,596 - root - INFO - step: 8320 loss: 2.5003 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2067 +[titan] 2025-10-05 03:41:06,596 - root - INFO - lr: 4.5702e-05 gnorm: 1.19 [ 5:06:57<19:28:48] +[titan] 2025-10-05 03:41:17,525 - root - INFO - step: 8325 loss: 2.4974 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 03:41:17,525 - root - INFO - lr: 4.5697e-05 gnorm: 1.26 [ 5:07:08<19:28:37] +[titan] 2025-10-05 03:41:28,416 - root - INFO - step: 8330 loss: 2.4791 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1878 +[titan] 2025-10-05 03:41:28,416 - root - INFO - lr: 4.5692e-05 gnorm: 1.19 [ 5:07:19<19:28:25] +[titan] 2025-10-05 03:41:39,305 - root - INFO - step: 8335 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:41:39,305 - root - INFO - lr: 4.5686e-05 gnorm: 1.25 [ 5:07:30<19:28:13] +[titan] 2025-10-05 03:41:50,197 - root - INFO - step: 8340 loss: 2.4762 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:41:50,197 - root - INFO - lr: 4.5681e-05 gnorm: 1.22 [ 5:07:41<19:28:01] +[titan] 2025-10-05 03:42:01,087 - root - INFO - step: 8345 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:42:01,087 - root - INFO - lr: 4.5676e-05 gnorm: 1.33 [ 5:07:52<19:27:50] +[titan] 2025-10-05 03:42:09,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:42:11,958 - root - INFO - step: 8350 loss: 2.5178 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2229 +[titan] 2025-10-05 03:42:11,958 - root - INFO - lr: 4.5671e-05 gnorm: 1.20 [ 5:08:02<19:27:38] +[titan] 2025-10-05 03:42:22,859 - root - INFO - step: 8355 loss: 2.5012 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2065 +[titan] 2025-10-05 03:42:22,859 - root - INFO - lr: 4.5665e-05 gnorm: 1.16 [ 5:08:13<19:27:26] +[titan] 2025-10-05 03:42:33,724 - root - INFO - step: 8360 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2087 +[titan] 2025-10-05 03:42:33,724 - root - INFO - lr: 4.5660e-05 gnorm: 1.21 [ 5:08:24<19:27:14] +[titan] 2025-10-05 03:42:44,605 - root - INFO - step: 8365 loss: 2.4169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1341 +[titan] 2025-10-05 03:42:44,605 - root - INFO - lr: 4.5655e-05 gnorm: 1.27 [ 5:08:35<19:27:02] +[titan] 2025-10-05 03:42:55,502 - root - INFO - step: 8370 loss: 2.4654 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:42:55,503 - root - INFO - lr: 4.5649e-05 gnorm: 1.13 [ 5:08:46<19:26:51] +[titan] 2025-10-05 03:43:06,377 - root - INFO - step: 8375 loss: 2.4547 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1658 +[titan] 2025-10-05 03:43:06,377 - root - INFO - lr: 4.5644e-05 gnorm: 1.15 [ 5:08:57<19:26:39] +[titan] 2025-10-05 03:43:17,279 - root - INFO - step: 8380 loss: 2.5065 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2118 +[titan] 2025-10-05 03:43:17,279 - root - INFO - lr: 4.5639e-05 gnorm: 1.18 [ 5:09:08<19:26:27] +[titan] 2025-10-05 03:43:28,170 - root - INFO - step: 8385 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.1973 +[titan] 2025-10-05 03:43:28,171 - root - INFO - lr: 4.5634e-05 gnorm: 1.19 [ 5:09:19<19:26:16] +[titan] 2025-10-05 03:43:39,058 - root - INFO - step: 8390 loss: 2.3818 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 03:43:39,058 - root - INFO - lr: 4.5628e-05 gnorm: 1.18 [ 5:09:30<19:26:04] +[titan] 2025-10-05 03:43:49,941 - root - INFO - step: 8395 loss: 2.4979 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2044 +[titan] 2025-10-05 03:43:49,941 - root - INFO - lr: 4.5623e-05 gnorm: 1.24 [ 5:09:40<19:25:52] +[titan] 2025-10-05 03:43:58,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:44:00,835 - root - INFO - step: 8400 loss: 2.4609 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1710 +[titan] 2025-10-05 03:44:00,835 - root - INFO - lr: 4.5618e-05 gnorm: 1.21 [ 5:09:51<19:25:40] +[titan] 2025-10-05 03:44:11,708 - root - INFO - step: 8405 loss: 2.4714 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1804 +[titan] 2025-10-05 03:44:11,708 - root - INFO - lr: 4.5612e-05 gnorm: 1.18 [ 5:10:02<19:25:29] +[titan] 2025-10-05 03:44:22,628 - root - INFO - step: 8410 loss: 2.4894 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1963 +[titan] 2025-10-05 03:44:22,628 - root - INFO - lr: 4.5607e-05 gnorm: 1.17 [ 5:10:13<19:25:17] +[titan] 2025-10-05 03:44:33,498 - root - INFO - step: 8415 loss: 2.4601 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1709 +[titan] 2025-10-05 03:44:33,498 - root - INFO - lr: 4.5602e-05 gnorm: 1.15 [ 5:10:24<19:25:05] +[titan] 2025-10-05 03:44:44,372 - root - INFO - step: 8420 loss: 2.4695 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1796 +[titan] 2025-10-05 03:44:44,372 - root - INFO - lr: 4.5597e-05 gnorm: 1.21 [ 5:10:35<19:24:53] +[titan] 2025-10-05 03:44:55,241 - root - INFO - step: 8425 loss: 2.6043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.2890 +[titan] 2025-10-05 03:44:55,241 - root - INFO - lr: 4.5591e-05 gnorm: 1.22 [ 5:10:46<19:24:42] +[titan] 2025-10-05 03:45:06,108 - root - INFO - step: 8430 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1610 +[titan] 2025-10-05 03:45:06,108 - root - INFO - lr: 4.5586e-05 gnorm: 1.22 [ 5:10:57<19:24:30] +[titan] 2025-10-05 03:45:17,033 - root - INFO - step: 8435 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1622 +[titan] 2025-10-05 03:45:17,033 - root - INFO - lr: 4.5581e-05 gnorm: 1.17 [ 5:11:08<19:24:18] +[titan] 2025-10-05 03:45:27,906 - root - INFO - step: 8440 loss: 2.4384 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1521 +[titan] 2025-10-05 03:45:27,906 - root - INFO - lr: 4.5575e-05 gnorm: 1.18 [ 5:11:18<19:24:06] +[titan] 2025-10-05 03:45:38,796 - root - INFO - step: 8445 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2251 +[titan] 2025-10-05 03:45:38,796 - root - INFO - lr: 4.5570e-05 gnorm: 1.18 [ 5:11:29<19:23:55] +[titan] 2025-10-05 03:45:47,504 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:45:49,701 - root - INFO - step: 8450 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1765 +[titan] 2025-10-05 03:45:49,701 - root - INFO - lr: 4.5565e-05 gnorm: 1.15 [ 5:11:40<19:23:43] +[titan] 2025-10-05 03:46:00,576 - root - INFO - step: 8455 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1407 +[titan] 2025-10-05 03:46:00,576 - root - INFO - lr: 4.5559e-05 gnorm: 1.16 [ 5:11:51<19:23:31] +[titan] 2025-10-05 03:46:11,464 - root - INFO - step: 8460 loss: 2.4581 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1695 +[titan] 2025-10-05 03:46:11,465 - root - INFO - lr: 4.5554e-05 gnorm: 1.18 [ 5:12:02<19:23:19] +[titan] 2025-10-05 03:46:22,406 - root - INFO - step: 8465 loss: 2.4681 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2912 global_avg_mtp_loss: 2.1769 +[titan] 2025-10-05 03:46:22,406 - root - INFO - lr: 4.5549e-05 gnorm: 1.26 [ 5:12:13<19:23:08] +[titan] 2025-10-05 03:46:33,303 - root - INFO - step: 8470 loss: 2.4812 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:46:33,303 - root - INFO - lr: 4.5543e-05 gnorm: 1.18 [ 5:12:24<19:22:56] +[titan] 2025-10-05 03:46:44,215 - root - INFO - step: 8475 loss: 2.4456 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:46:44,215 - root - INFO - lr: 4.5538e-05 gnorm: 1.19 [ 5:12:35<19:22:45] +[titan] 2025-10-05 03:46:55,102 - root - INFO - step: 8480 loss: 2.5134 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2167 +[titan] 2025-10-05 03:46:55,103 - root - INFO - lr: 4.5533e-05 gnorm: 1.22 [ 5:12:46<19:22:33] +[titan] 2025-10-05 03:47:05,998 - root - INFO - step: 8485 loss: 2.4337 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 03:47:05,998 - root - INFO - lr: 4.5527e-05 gnorm: 1.16 [ 5:12:57<19:22:21] +[titan] 2025-10-05 03:47:16,904 - root - INFO - step: 8490 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1235 +[titan] 2025-10-05 03:47:16,904 - root - INFO - lr: 4.5522e-05 gnorm: 1.17 [ 5:13:07<19:22:10] +[titan] 2025-10-05 03:47:27,782 - root - INFO - step: 8495 loss: 2.4698 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1791 +[titan] 2025-10-05 03:47:27,783 - root - INFO - lr: 4.5517e-05 gnorm: 1.17 [ 5:13:18<19:21:58] +[titan] 2025-10-05 03:47:36,459 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:47:38,638 - root - INFO - step: 8500 loss: 2.3537 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0773 +[titan] 2025-10-05 03:47:38,638 - root - INFO - lr: 4.5511e-05 gnorm: 1.20 [ 5:13:29<19:21:46] +[titan] 2025-10-05 03:47:49,538 - root - INFO - step: 8505 loss: 2.5368 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2395 +[titan] 2025-10-05 03:47:49,538 - root - INFO - lr: 4.5506e-05 gnorm: 1.16 [ 5:13:40<19:21:34] +[titan] 2025-10-05 03:48:00,412 - root - INFO - step: 8510 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.1961 +[titan] 2025-10-05 03:48:00,412 - root - INFO - lr: 4.5501e-05 gnorm: 1.19 [ 5:13:51<19:21:22] +[titan] 2025-10-05 03:48:11,277 - root - INFO - step: 8515 loss: 2.4264 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:48:11,277 - root - INFO - lr: 4.5495e-05 gnorm: 1.17 [ 5:14:02<19:21:11] +[titan] 2025-10-05 03:48:22,187 - root - INFO - step: 8520 loss: 2.4968 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2043 +[titan] 2025-10-05 03:48:22,188 - root - INFO - lr: 4.5490e-05 gnorm: 1.24 [ 5:14:13<19:20:59] +[titan] 2025-10-05 03:48:33,044 - root - INFO - step: 8525 loss: 2.5002 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2934 global_avg_mtp_loss: 2.2068 +[titan] 2025-10-05 03:48:33,044 - root - INFO - lr: 4.5485e-05 gnorm: 1.16 [ 5:14:24<19:20:47] +[titan] 2025-10-05 03:48:43,906 - root - INFO - step: 8530 loss: 2.5203 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2249 +[titan] 2025-10-05 03:48:43,906 - root - INFO - lr: 4.5479e-05 gnorm: 1.18 [ 5:14:34<19:20:35] +[titan] 2025-10-05 03:48:54,778 - root - INFO - step: 8535 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:48:54,779 - root - INFO - lr: 4.5474e-05 gnorm: 1.23 [ 5:14:45<19:20:24] +[titan] 2025-10-05 03:49:05,664 - root - INFO - step: 8540 loss: 2.5027 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2084 +[titan] 2025-10-05 03:49:05,664 - root - INFO - lr: 4.5468e-05 gnorm: 1.19 [ 5:14:56<19:20:12] +[titan] 2025-10-05 03:49:16,537 - root - INFO - step: 8545 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2823 global_avg_mtp_loss: 2.1266 +[titan] 2025-10-05 03:49:16,537 - root - INFO - lr: 4.5463e-05 gnorm: 1.19 [ 5:15:07<19:20:00] +[titan] 2025-10-05 03:49:25,283 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:49:27,468 - root - INFO - step: 8550 loss: 2.4984 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2049 +[titan] 2025-10-05 03:49:27,468 - root - INFO - lr: 4.5458e-05 gnorm: 1.21 [ 5:15:18<19:19:49] +[titan] 2025-10-05 03:49:38,338 - root - INFO - step: 8555 loss: 2.4539 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1653 +[titan] 2025-10-05 03:49:38,338 - root - INFO - lr: 4.5452e-05 gnorm: 1.20 [ 5:15:29<19:19:37] +[titan] 2025-10-05 03:49:49,202 - root - INFO - step: 8560 loss: 2.4721 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:49:49,202 - root - INFO - lr: 4.5447e-05 gnorm: 1.17 [ 5:15:40<19:19:25] +[titan] 2025-10-05 03:50:00,074 - root - INFO - step: 8565 loss: 2.5405 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 03:50:00,074 - root - INFO - lr: 4.5442e-05 gnorm: 1.15 [ 5:15:51<19:19:13] +[titan] 2025-10-05 03:50:10,978 - root - INFO - step: 8570 loss: 2.4470 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 03:50:10,979 - root - INFO - lr: 4.5436e-05 gnorm: 1.22 [ 5:16:01<19:19:02] +[titan] 2025-10-05 03:50:21,887 - root - INFO - step: 8575 loss: 2.4633 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1735 +[titan] 2025-10-05 03:50:21,887 - root - INFO - lr: 4.5431e-05 gnorm: 1.21 [ 5:16:12<19:18:50] +[titan] 2025-10-05 03:50:32,776 - root - INFO - step: 8580 loss: 2.4711 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1810 +[titan] 2025-10-05 03:50:32,776 - root - INFO - lr: 4.5425e-05 gnorm: 1.18 [ 5:16:23<19:18:38] +[titan] 2025-10-05 03:50:43,667 - root - INFO - step: 8585 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:50:43,667 - root - INFO - lr: 4.5420e-05 gnorm: 1.22 [ 5:16:34<19:18:27] +[titan] 2025-10-05 03:50:54,557 - root - INFO - step: 8590 loss: 2.5385 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2399 +[titan] 2025-10-05 03:50:54,558 - root - INFO - lr: 4.5415e-05 gnorm: 1.18 [ 5:16:45<19:18:15] +[titan] 2025-10-05 03:51:05,424 - root - INFO - step: 8595 loss: 2.4767 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 03:51:05,425 - root - INFO - lr: 4.5409e-05 gnorm: 1.16 [ 5:16:56<19:18:03] +[titan] 2025-10-05 03:51:14,103 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:51:16,290 - root - INFO - step: 8600 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:51:16,290 - root - INFO - lr: 4.5404e-05 gnorm: 1.14 [ 5:17:07<19:17:51] +[titan] 2025-10-05 03:51:27,250 - root - INFO - step: 8605 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2352 +[titan] 2025-10-05 03:51:27,251 - root - INFO - lr: 4.5398e-05 gnorm: 1.15 [ 5:17:18<19:17:40] +[titan] 2025-10-05 03:51:38,134 - root - INFO - step: 8610 loss: 2.4373 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1515 +[titan] 2025-10-05 03:51:38,134 - root - INFO - lr: 4.5393e-05 gnorm: 1.14 [ 5:17:29<19:17:28] +[titan] 2025-10-05 03:51:49,035 - root - INFO - step: 8615 loss: 2.5154 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2980 global_avg_mtp_loss: 2.2174 +[titan] 2025-10-05 03:51:49,036 - root - INFO - lr: 4.5388e-05 gnorm: 1.21 [ 5:17:40<19:17:16] +[titan] 2025-10-05 03:51:59,908 - root - INFO - step: 8620 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1329 +[titan] 2025-10-05 03:51:59,908 - root - INFO - lr: 4.5382e-05 gnorm: 1.19 [ 5:17:50<19:17:05] +[titan] 2025-10-05 03:52:10,800 - root - INFO - step: 8625 loss: 2.4772 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:52:10,800 - root - INFO - lr: 4.5377e-05 gnorm: 1.19 [ 5:18:01<19:16:53] +[titan] 2025-10-05 03:52:21,724 - root - INFO - step: 8630 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1254 +[titan] 2025-10-05 03:52:21,724 - root - INFO - lr: 4.5371e-05 gnorm: 1.17 [ 5:18:12<19:16:41] +[titan] 2025-10-05 03:52:32,629 - root - INFO - step: 8635 loss: 2.4666 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 03:52:32,630 - root - INFO - lr: 4.5366e-05 gnorm: 1.18 [ 5:18:23<19:16:30] +[titan] 2025-10-05 03:52:43,516 - root - INFO - step: 8640 loss: 2.5035 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:52:43,516 - root - INFO - lr: 4.5360e-05 gnorm: 1.16 [ 5:18:34<19:16:18] +[titan] 2025-10-05 03:52:54,413 - root - INFO - step: 8645 loss: 2.4079 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1260 +[titan] 2025-10-05 03:52:54,414 - root - INFO - lr: 4.5355e-05 gnorm: 1.18 [ 5:18:45<19:16:06] +[titan] 2025-10-05 03:53:03,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:53:05,278 - root - INFO - step: 8650 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:53:05,278 - root - INFO - lr: 4.5350e-05 gnorm: 1.17 [ 5:18:56<19:15:55] +[titan] 2025-10-05 03:53:16,166 - root - INFO - step: 8655 loss: 2.4949 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2017 +[titan] 2025-10-05 03:53:16,166 - root - INFO - lr: 4.5344e-05 gnorm: 1.17 [ 5:19:07<19:15:43] +[titan] 2025-10-05 03:53:27,098 - root - INFO - step: 8660 loss: 2.4590 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1706 +[titan] 2025-10-05 03:53:27,098 - root - INFO - lr: 4.5339e-05 gnorm: 1.20 [ 5:19:18<19:15:31] +[titan] 2025-10-05 03:53:38,012 - root - INFO - step: 8665 loss: 2.5151 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2199 +[titan] 2025-10-05 03:53:38,012 - root - INFO - lr: 4.5333e-05 gnorm: 1.19 [ 5:19:29<19:15:20] +[titan] 2025-10-05 03:53:48,872 - root - INFO - step: 8670 loss: 2.4344 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 03:53:48,872 - root - INFO - lr: 4.5328e-05 gnorm: 1.15 [ 5:19:39<19:15:08] +[titan] 2025-10-05 03:53:59,744 - root - INFO - step: 8675 loss: 2.4632 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1742 +[titan] 2025-10-05 03:53:59,744 - root - INFO - lr: 4.5322e-05 gnorm: 1.17 [ 5:19:50<19:14:56] +[titan] 2025-10-05 03:54:10,610 - root - INFO - step: 8680 loss: 2.4556 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 03:54:10,611 - root - INFO - lr: 4.5317e-05 gnorm: 1.17 [ 5:20:01<19:14:45] +[titan] 2025-10-05 03:54:21,508 - root - INFO - step: 8685 loss: 2.4742 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1837 +[titan] 2025-10-05 03:54:21,508 - root - INFO - lr: 4.5311e-05 gnorm: 1.20 [ 5:20:12<19:14:33] +[titan] 2025-10-05 03:54:32,411 - root - INFO - step: 8690 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2972 global_avg_mtp_loss: 2.2331 +[titan] 2025-10-05 03:54:32,411 - root - INFO - lr: 4.5306e-05 gnorm: 1.22 [ 5:20:23<19:14:21] +[titan] 2025-10-05 03:54:43,289 - root - INFO - step: 8695 loss: 2.4873 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1952 +[titan] 2025-10-05 03:54:43,290 - root - INFO - lr: 4.5301e-05 gnorm: 1.21 [ 5:20:34<19:14:10] +[titan] 2025-10-05 03:54:52,024 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:54:54,213 - root - INFO - step: 8700 loss: 2.4737 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1831 +[titan] 2025-10-05 03:54:54,213 - root - INFO - lr: 4.5295e-05 gnorm: 1.19 [ 5:20:45<19:13:58] +[titan] 2025-10-05 03:55:03,155 - root - INFO - Dumping profiler traces at step 8704 +[titan] 2025-10-05 03:55:03,194 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:55:05,378 - root - INFO - step: 8705 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 29,348 tflops: 407.16 mfu: 41.17% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:55:05,378 - root - INFO - lr: 4.5290e-05 gnorm: 1.17 [ 5:20:56<19:13:47] +[titan] 2025-10-05 03:55:16,259 - root - INFO - step: 8710 loss: 2.3993 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1181 +[titan] 2025-10-05 03:55:16,259 - root - INFO - lr: 4.5284e-05 gnorm: 1.16 [ 5:21:07<19:13:36] +[titan] 2025-10-05 03:55:27,179 - root - INFO - step: 8715 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1461 +[titan] 2025-10-05 03:55:27,179 - root - INFO - lr: 4.5279e-05 gnorm: 1.17 [ 5:21:18<19:13:24] +[titan] 2025-10-05 03:55:38,073 - root - INFO - step: 8720 loss: 2.3963 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 03:55:38,073 - root - INFO - lr: 4.5273e-05 gnorm: 1.24 [ 5:21:29<19:13:12] +[titan] 2025-10-05 03:55:48,962 - root - INFO - step: 8725 loss: 2.4482 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 03:55:48,962 - root - INFO - lr: 4.5268e-05 gnorm: 1.19 [ 5:21:39<19:13:01] +[titan] 2025-10-05 03:55:59,898 - root - INFO - step: 8730 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:55:59,898 - root - INFO - lr: 4.5262e-05 gnorm: 1.18 [ 5:21:50<19:12:49] +[titan] 2025-10-05 03:56:10,791 - root - INFO - step: 8735 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:56:10,791 - root - INFO - lr: 4.5257e-05 gnorm: 1.13 [ 5:22:01<19:12:38] +[titan] 2025-10-05 03:56:21,690 - root - INFO - step: 8740 loss: 2.5138 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:56:21,690 - root - INFO - lr: 4.5251e-05 gnorm: 1.17 [ 5:22:12<19:12:26] +[titan] 2025-10-05 03:56:32,598 - root - INFO - step: 8745 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:56:32,598 - root - INFO - lr: 4.5246e-05 gnorm: 1.21 [ 5:22:23<19:12:14] +[titan] 2025-10-05 03:56:41,299 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:56:43,483 - root - INFO - step: 8750 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 03:56:43,483 - root - INFO - lr: 4.5240e-05 gnorm: 1.23 [ 5:22:34<19:12:03] +[titan] 2025-10-05 03:56:54,343 - root - INFO - step: 8755 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1820 +[titan] 2025-10-05 03:56:54,343 - root - INFO - lr: 4.5235e-05 gnorm: 1.20 [ 5:22:45<19:11:51] +[titan] 2025-10-05 03:57:05,209 - root - INFO - step: 8760 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:57:05,210 - root - INFO - lr: 4.5229e-05 gnorm: 1.14 [ 5:22:56<19:11:39] +[titan] 2025-10-05 03:57:16,152 - root - INFO - step: 8765 loss: 2.5128 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:57:16,153 - root - INFO - lr: 4.5224e-05 gnorm: 1.17 [ 5:23:07<19:11:28] +[titan] 2025-10-05 03:57:27,083 - root - INFO - step: 8770 loss: 2.4066 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 03:57:27,084 - root - INFO - lr: 4.5218e-05 gnorm: 1.11 [ 5:23:18<19:11:16] +[titan] 2025-10-05 03:57:37,931 - root - INFO - step: 8775 loss: 2.4260 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 03:57:37,932 - root - INFO - lr: 4.5213e-05 gnorm: 1.17 [ 5:23:28<19:11:04] +[titan] 2025-10-05 03:57:48,805 - root - INFO - step: 8780 loss: 2.4759 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1847 +[titan] 2025-10-05 03:57:48,805 - root - INFO - lr: 4.5207e-05 gnorm: 1.24 [ 5:23:39<19:10:53] +[titan] 2025-10-05 03:57:59,678 - root - INFO - step: 8785 loss: 2.4875 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:57:59,678 - root - INFO - lr: 4.5202e-05 gnorm: 1.16 [ 5:23:50<19:10:41] +[titan] 2025-10-05 03:58:10,559 - root - INFO - step: 8790 loss: 2.4424 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:58:10,559 - root - INFO - lr: 4.5196e-05 gnorm: 1.16 [ 5:24:01<19:10:29] +[titan] 2025-10-05 03:58:21,459 - root - INFO - step: 8795 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1293 +[titan] 2025-10-05 03:58:21,459 - root - INFO - lr: 4.5191e-05 gnorm: 1.13 [ 5:24:12<19:10:18] +[titan] 2025-10-05 03:58:30,178 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:58:32,360 - root - INFO - step: 8800 loss: 2.3926 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1119 +[titan] 2025-10-05 03:58:32,360 - root - INFO - lr: 4.5185e-05 gnorm: 1.16 [ 5:24:23<19:10:06] +[titan] 2025-10-05 03:58:43,220 - root - INFO - step: 8805 loss: 2.5057 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2110 +[titan] 2025-10-05 03:58:43,221 - root - INFO - lr: 4.5180e-05 gnorm: 1.16 [ 5:24:34<19:09:54] +[titan] 2025-10-05 03:58:54,092 - root - INFO - step: 8810 loss: 2.4643 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:58:54,092 - root - INFO - lr: 4.5174e-05 gnorm: 1.21 [ 5:24:45<19:09:42] +[titan] 2025-10-05 03:59:04,956 - root - INFO - step: 8815 loss: 2.5184 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2227 +[titan] 2025-10-05 03:59:04,956 - root - INFO - lr: 4.5169e-05 gnorm: 1.20 [ 5:24:55<19:09:31] +[titan] 2025-10-05 03:59:15,807 - root - INFO - step: 8820 loss: 2.3921 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 03:59:15,807 - root - INFO - lr: 4.5163e-05 gnorm: 1.12 [ 5:25:06<19:09:19] +[titan] 2025-10-05 03:59:26,817 - root - INFO - step: 8825 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.90 mfu: 41.75% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1453 +[titan] 2025-10-05 03:59:26,817 - root - INFO - lr: 4.5158e-05 gnorm: 1.14 [ 5:25:17<19:09:08] +[titan] 2025-10-05 03:59:37,700 - root - INFO - step: 8830 loss: 2.4161 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 03:59:37,700 - root - INFO - lr: 4.5152e-05 gnorm: 1.17 [ 5:25:28<19:08:56] +[titan] 2025-10-05 03:59:48,610 - root - INFO - step: 8835 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:59:48,610 - root - INFO - lr: 4.5147e-05 gnorm: 1.20 [ 5:25:39<19:08:44] +[titan] 2025-10-05 03:59:59,499 - root - INFO - step: 8840 loss: 2.4555 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 03:59:59,499 - root - INFO - lr: 4.5141e-05 gnorm: 1.16 [ 5:25:50<19:08:33] +[titan] 2025-10-05 04:00:10,376 - root - INFO - step: 8845 loss: 2.5058 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2957 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 04:00:10,376 - root - INFO - lr: 4.5136e-05 gnorm: 1.15 [ 5:26:01<19:08:21] +[titan] 2025-10-05 04:00:19,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:00:21,274 - root - INFO - step: 8850 loss: 2.4134 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:00:21,274 - root - INFO - lr: 4.5130e-05 gnorm: 1.16 [ 5:26:12<19:08:09] +[titan] 2025-10-05 04:00:32,174 - root - INFO - step: 8855 loss: 2.3939 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1130 +[titan] 2025-10-05 04:00:32,174 - root - INFO - lr: 4.5124e-05 gnorm: 1.14 [ 5:26:23<19:07:58] +[titan] 2025-10-05 04:00:43,105 - root - INFO - step: 8860 loss: 2.4901 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.1965 +[titan] 2025-10-05 04:00:43,105 - root - INFO - lr: 4.5119e-05 gnorm: 1.13 [ 5:26:34<19:07:46] +[titan] 2025-10-05 04:00:53,982 - root - INFO - step: 8865 loss: 2.4318 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1455 +[titan] 2025-10-05 04:00:53,982 - root - INFO - lr: 4.5113e-05 gnorm: 1.20 [ 5:26:44<19:07:35] +[titan] 2025-10-05 04:01:04,884 - root - INFO - step: 8870 loss: 2.4552 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 04:01:04,884 - root - INFO - lr: 4.5108e-05 gnorm: 1.17 [ 5:26:55<19:07:23] +[titan] 2025-10-05 04:01:15,755 - root - INFO - step: 8875 loss: 2.4361 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1505 +[titan] 2025-10-05 04:01:15,755 - root - INFO - lr: 4.5102e-05 gnorm: 1.11 [ 5:27:06<19:07:11] +[titan] 2025-10-05 04:01:26,620 - root - INFO - step: 8880 loss: 2.4652 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 04:01:26,621 - root - INFO - lr: 4.5097e-05 gnorm: 1.18 [ 5:27:17<19:07:00] +[titan] 2025-10-05 04:01:37,500 - root - INFO - step: 8885 loss: 2.4777 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1862 +[titan] 2025-10-05 04:01:37,500 - root - INFO - lr: 4.5091e-05 gnorm: 1.16 [ 5:27:28<19:06:48] +[titan] 2025-10-05 04:01:48,415 - root - INFO - step: 8890 loss: 2.4058 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:01:48,416 - root - INFO - lr: 4.5086e-05 gnorm: 1.17 [ 5:27:39<19:06:36] +[titan] 2025-10-05 04:01:59,279 - root - INFO - step: 8895 loss: 2.4655 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1759 +[titan] 2025-10-05 04:01:59,280 - root - INFO - lr: 4.5080e-05 gnorm: 1.19 [ 5:27:50<19:06:25] +[titan] 2025-10-05 04:02:07,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:02:10,126 - root - INFO - step: 8900 loss: 2.4494 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:02:10,126 - root - INFO - lr: 4.5074e-05 gnorm: 1.24 [ 5:28:01<19:06:13] +[titan] 2025-10-05 04:02:20,976 - root - INFO - step: 8905 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 04:02:20,976 - root - INFO - lr: 4.5069e-05 gnorm: 1.18 [ 5:28:11<19:06:01] +[titan] 2025-10-05 04:02:31,857 - root - INFO - step: 8910 loss: 2.4530 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1646 +[titan] 2025-10-05 04:02:31,857 - root - INFO - lr: 4.5063e-05 gnorm: 1.18 [ 5:28:22<19:05:49] +[titan] 2025-10-05 04:02:42,714 - root - INFO - step: 8915 loss: 2.4292 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:02:42,714 - root - INFO - lr: 4.5058e-05 gnorm: 1.18 [ 5:28:33<19:05:38] +[titan] 2025-10-05 04:02:53,586 - root - INFO - step: 8920 loss: 2.4665 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 04:02:53,586 - root - INFO - lr: 4.5052e-05 gnorm: 1.14 [ 5:28:44<19:05:26] +[titan] 2025-10-05 04:03:04,511 - root - INFO - step: 8925 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1484 +[titan] 2025-10-05 04:03:04,511 - root - INFO - lr: 4.5047e-05 gnorm: 1.20 [ 5:28:55<19:05:14] +[titan] 2025-10-05 04:03:15,417 - root - INFO - step: 8930 loss: 2.5325 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 04:03:15,417 - root - INFO - lr: 4.5041e-05 gnorm: 1.18 [ 5:29:06<19:05:03] +[titan] 2025-10-05 04:03:26,302 - root - INFO - step: 8935 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:03:26,303 - root - INFO - lr: 4.5035e-05 gnorm: 1.21 [ 5:29:17<19:04:51] +[titan] 2025-10-05 04:03:37,172 - root - INFO - step: 8940 loss: 2.6656 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3417 +[titan] 2025-10-05 04:03:37,172 - root - INFO - lr: 4.5030e-05 gnorm: 1.16 [ 5:29:28<19:04:39] +[titan] 2025-10-05 04:03:48,057 - root - INFO - step: 8945 loss: 2.4401 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1543 +[titan] 2025-10-05 04:03:48,057 - root - INFO - lr: 4.5024e-05 gnorm: 1.12 [ 5:29:39<19:04:28] +[titan] 2025-10-05 04:03:56,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:03:58,919 - root - INFO - step: 8950 loss: 2.4061 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1253 +[titan] 2025-10-05 04:03:58,919 - root - INFO - lr: 4.5019e-05 gnorm: 1.11 [ 5:29:49<19:04:16] +[titan] 2025-10-05 04:04:09,819 - root - INFO - step: 8955 loss: 2.4957 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 04:04:09,820 - root - INFO - lr: 4.5013e-05 gnorm: 1.12 [ 5:30:00<19:04:04] +[titan] 2025-10-05 04:04:20,693 - root - INFO - step: 8960 loss: 2.4047 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1227 +[titan] 2025-10-05 04:04:20,693 - root - INFO - lr: 4.5007e-05 gnorm: 1.15 [ 5:30:11<19:03:53] +[titan] 2025-10-05 04:04:31,580 - root - INFO - step: 8965 loss: 2.4637 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1740 +[titan] 2025-10-05 04:04:31,580 - root - INFO - lr: 4.5002e-05 gnorm: 1.15 [ 5:30:22<19:03:41] +[titan] 2025-10-05 04:04:42,434 - root - INFO - step: 8970 loss: 2.4642 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 04:04:42,434 - root - INFO - lr: 4.4996e-05 gnorm: 1.19 [ 5:30:33<19:03:29] +[titan] 2025-10-05 04:04:53,298 - root - INFO - step: 8975 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 04:04:53,298 - root - INFO - lr: 4.4991e-05 gnorm: 1.20 [ 5:30:44<19:03:18] +[titan] 2025-10-05 04:05:04,159 - root - INFO - step: 8980 loss: 2.4094 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1265 +[titan] 2025-10-05 04:05:04,159 - root - INFO - lr: 4.4985e-05 gnorm: 1.14 [ 5:30:55<19:03:06] +[titan] 2025-10-05 04:05:15,056 - root - INFO - step: 8985 loss: 2.4593 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1707 +[titan] 2025-10-05 04:05:15,057 - root - INFO - lr: 4.4979e-05 gnorm: 1.20 [ 5:31:06<19:02:54] +[titan] 2025-10-05 04:05:25,930 - root - INFO - step: 8990 loss: 2.3911 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 04:05:25,930 - root - INFO - lr: 4.4974e-05 gnorm: 1.15 [ 5:31:16<19:02:43] +[titan] 2025-10-05 04:05:36,797 - root - INFO - step: 8995 loss: 2.4428 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1566 +[titan] 2025-10-05 04:05:36,798 - root - INFO - lr: 4.4968e-05 gnorm: 1.17 [ 5:31:27<19:02:31] +[titan] 2025-10-05 04:05:45,469 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:05:47,651 - root - INFO - step: 9000 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0801 +[titan] 2025-10-05 04:05:47,652 - root - INFO - lr: 4.4962e-05 gnorm: 1.23 [ 5:31:38<19:02:19] +[titan] 2025-10-05 04:05:58,519 - root - INFO - step: 9005 loss: 2.4431 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1560 +[titan] 2025-10-05 04:05:58,519 - root - INFO - lr: 4.4957e-05 gnorm: 1.17 [ 5:31:49<19:02:08] +[titan] 2025-10-05 04:06:09,392 - root - INFO - step: 9010 loss: 2.4584 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1697 +[titan] 2025-10-05 04:06:09,392 - root - INFO - lr: 4.4951e-05 gnorm: 1.17 [ 5:32:00<19:01:56] +[titan] 2025-10-05 04:06:20,257 - root - INFO - step: 9015 loss: 2.4693 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1794 +[titan] 2025-10-05 04:06:20,257 - root - INFO - lr: 4.4946e-05 gnorm: 1.13 [ 5:32:11<19:01:44] +[titan] 2025-10-05 04:06:31,158 - root - INFO - step: 9020 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1487 +[titan] 2025-10-05 04:06:31,158 - root - INFO - lr: 4.4940e-05 gnorm: 1.15 [ 5:32:22<19:01:33] +[titan] 2025-10-05 04:06:42,018 - root - INFO - step: 9025 loss: 2.3968 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:06:42,018 - root - INFO - lr: 4.4934e-05 gnorm: 1.11 [ 5:32:32<19:01:21] +[titan] 2025-10-05 04:06:52,886 - root - INFO - step: 9030 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:06:52,886 - root - INFO - lr: 4.4929e-05 gnorm: 1.08 [ 5:32:43<19:01:09] +[titan] 2025-10-05 04:07:03,747 - root - INFO - step: 9035 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 04:07:03,747 - root - INFO - lr: 4.4923e-05 gnorm: 1.16 [ 5:32:54<19:00:57] +[titan] 2025-10-05 04:07:14,610 - root - INFO - step: 9040 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1859 +[titan] 2025-10-05 04:07:14,610 - root - INFO - lr: 4.4917e-05 gnorm: 1.17 [ 5:33:05<19:00:46] +[titan] 2025-10-05 04:07:25,476 - root - INFO - step: 9045 loss: 2.4520 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1641 +[titan] 2025-10-05 04:07:25,476 - root - INFO - lr: 4.4912e-05 gnorm: 1.19 [ 5:33:16<19:00:34] +[titan] 2025-10-05 04:07:34,201 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:07:36,379 - root - INFO - step: 9050 loss: 2.4771 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:07:36,379 - root - INFO - lr: 4.4906e-05 gnorm: 1.19 [ 5:33:27<19:00:22] +[titan] 2025-10-05 04:07:47,258 - root - INFO - step: 9055 loss: 2.4168 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1339 +[titan] 2025-10-05 04:07:47,258 - root - INFO - lr: 4.4900e-05 gnorm: 1.14 [ 5:33:38<19:00:11] +[titan] 2025-10-05 04:07:58,123 - root - INFO - step: 9060 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:07:58,123 - root - INFO - lr: 4.4895e-05 gnorm: 1.16 [ 5:33:49<18:59:59] +[titan] 2025-10-05 04:08:09,003 - root - INFO - step: 9065 loss: 2.4858 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2911 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:08:09,003 - root - INFO - lr: 4.4889e-05 gnorm: 1.18 [ 5:33:59<18:59:47] +[titan] 2025-10-05 04:08:19,858 - root - INFO - step: 9070 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 04:08:19,858 - root - INFO - lr: 4.4883e-05 gnorm: 1.18 [ 5:34:10<18:59:36] +[titan] 2025-10-05 04:08:30,739 - root - INFO - step: 9075 loss: 2.4338 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1483 +[titan] 2025-10-05 04:08:30,739 - root - INFO - lr: 4.4878e-05 gnorm: 1.16 [ 5:34:21<18:59:24] +[titan] 2025-10-05 04:08:41,605 - root - INFO - step: 9080 loss: 2.3786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 04:08:41,605 - root - INFO - lr: 4.4872e-05 gnorm: 1.24 [ 5:34:32<18:59:12] +[titan] 2025-10-05 04:08:52,482 - root - INFO - step: 9085 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2055 +[titan] 2025-10-05 04:08:52,482 - root - INFO - lr: 4.4866e-05 gnorm: 1.20 [ 5:34:43<18:59:01] +[titan] 2025-10-05 04:09:03,324 - root - INFO - step: 9090 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1628 +[titan] 2025-10-05 04:09:03,325 - root - INFO - lr: 4.4861e-05 gnorm: 1.24 [ 5:34:54<18:58:49] +[titan] 2025-10-05 04:09:14,169 - root - INFO - step: 9095 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1886 +[titan] 2025-10-05 04:09:14,169 - root - INFO - lr: 4.4855e-05 gnorm: 1.21 [ 5:35:05<18:58:37] +[titan] 2025-10-05 04:09:22,830 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:09:25,022 - root - INFO - step: 9100 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:09:25,022 - root - INFO - lr: 4.4849e-05 gnorm: 1.19 [ 5:35:15<18:58:25] +[titan] 2025-10-05 04:09:35,891 - root - INFO - step: 9105 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1300 +[titan] 2025-10-05 04:09:35,891 - root - INFO - lr: 4.4844e-05 gnorm: 1.18 [ 5:35:26<18:58:14] +[titan] 2025-10-05 04:09:46,754 - root - INFO - step: 9110 loss: 2.3843 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1029 +[titan] 2025-10-05 04:09:46,754 - root - INFO - lr: 4.4838e-05 gnorm: 1.28 [ 5:35:37<18:58:02] +[titan] 2025-10-05 04:09:57,624 - root - INFO - step: 9115 loss: 2.4036 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1228 +[titan] 2025-10-05 04:09:57,624 - root - INFO - lr: 4.4832e-05 gnorm: 1.19 [ 5:35:48<18:57:50] +[titan] 2025-10-05 04:10:08,470 - root - INFO - step: 9120 loss: 2.4158 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1325 +[titan] 2025-10-05 04:10:08,470 - root - INFO - lr: 4.4827e-05 gnorm: 1.14 [ 5:35:59<18:57:39] +[titan] 2025-10-05 04:10:19,323 - root - INFO - step: 9125 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 04:10:19,323 - root - INFO - lr: 4.4821e-05 gnorm: 1.19 [ 5:36:10<18:57:27] +[titan] 2025-10-05 04:10:30,178 - root - INFO - step: 9130 loss: 2.4437 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 04:10:30,178 - root - INFO - lr: 4.4815e-05 gnorm: 1.22 [ 5:36:21<18:57:15] +[titan] 2025-10-05 04:10:41,058 - root - INFO - step: 9135 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1518 +[titan] 2025-10-05 04:10:41,058 - root - INFO - lr: 4.4809e-05 gnorm: 1.15 [ 5:36:32<18:57:04] +[titan] 2025-10-05 04:10:51,913 - root - INFO - step: 9140 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:10:51,913 - root - INFO - lr: 4.4804e-05 gnorm: 1.14 [ 5:36:42<18:56:52] +[titan] 2025-10-05 04:11:02,801 - root - INFO - step: 9145 loss: 2.4160 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1331 +[titan] 2025-10-05 04:11:02,801 - root - INFO - lr: 4.4798e-05 gnorm: 1.18 [ 5:36:53<18:56:40] +[titan] 2025-10-05 04:11:11,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:11:13,643 - root - INFO - step: 9150 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1327 +[titan] 2025-10-05 04:11:13,643 - root - INFO - lr: 4.4792e-05 gnorm: 1.15 [ 5:37:04<18:56:28] +[titan] 2025-10-05 04:11:24,500 - root - INFO - step: 9155 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1789 +[titan] 2025-10-05 04:11:24,500 - root - INFO - lr: 4.4787e-05 gnorm: 1.16 [ 5:37:15<18:56:17] +[titan] 2025-10-05 04:11:35,333 - root - INFO - step: 9160 loss: 2.4173 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1345 +[titan] 2025-10-05 04:11:35,333 - root - INFO - lr: 4.4781e-05 gnorm: 1.15 [ 5:37:26<18:56:05] +[titan] 2025-10-05 04:11:46,194 - root - INFO - step: 9165 loss: 2.4180 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 04:11:46,194 - root - INFO - lr: 4.4775e-05 gnorm: 1.13 [ 5:37:37<18:55:53] +[titan] 2025-10-05 04:11:57,056 - root - INFO - step: 9170 loss: 2.3989 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 04:11:57,057 - root - INFO - lr: 4.4769e-05 gnorm: 1.15 [ 5:37:48<18:55:42] +[titan] 2025-10-05 04:12:07,928 - root - INFO - step: 9175 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 04:12:07,928 - root - INFO - lr: 4.4764e-05 gnorm: 1.11 [ 5:37:58<18:55:30] +[titan] 2025-10-05 04:12:18,847 - root - INFO - step: 9180 loss: 2.5568 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2549 +[titan] 2025-10-05 04:12:18,847 - root - INFO - lr: 4.4758e-05 gnorm: 1.20 [ 5:38:09<18:55:18] +[titan] 2025-10-05 04:12:29,719 - root - INFO - step: 9185 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1545 +[titan] 2025-10-05 04:12:29,719 - root - INFO - lr: 4.4752e-05 gnorm: 1.16 [ 5:38:20<18:55:07] +[titan] 2025-10-05 04:12:40,611 - root - INFO - step: 9190 loss: 2.3798 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.1027 +[titan] 2025-10-05 04:12:40,611 - root - INFO - lr: 4.4747e-05 gnorm: 1.15 [ 5:38:31<18:54:55] +[titan] 2025-10-05 04:12:51,477 - root - INFO - step: 9195 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1621 +[titan] 2025-10-05 04:12:51,477 - root - INFO - lr: 4.4741e-05 gnorm: 1.15 [ 5:38:42<18:54:44] +[titan] 2025-10-05 04:13:00,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:13:02,353 - root - INFO - step: 9200 loss: 2.4374 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1525 +[titan] 2025-10-05 04:13:02,353 - root - INFO - lr: 4.4735e-05 gnorm: 1.20 [ 5:38:53<18:54:32] +[titan] 2025-10-05 04:13:13,230 - root - INFO - step: 9205 loss: 2.4854 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1940 +[titan] 2025-10-05 04:13:13,230 - root - INFO - lr: 4.4729e-05 gnorm: 1.22 [ 5:39:04<18:54:20] +[titan] 2025-10-05 04:13:24,132 - root - INFO - step: 9210 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:13:24,132 - root - INFO - lr: 4.4724e-05 gnorm: 1.19 [ 5:39:15<18:54:09] +[titan] 2025-10-05 04:13:35,087 - root - INFO - step: 9215 loss: 2.4851 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1930 +[titan] 2025-10-05 04:13:35,087 - root - INFO - lr: 4.4718e-05 gnorm: 1.15 [ 5:39:26<18:53:57] +[titan] 2025-10-05 04:13:37,437 - root - INFO - Dumping profiler traces at step 9216 +[titan] 2025-10-05 04:13:37,474 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:13:46,201 - root - INFO - step: 9220 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 29,486 tflops: 409.07 mfu: 41.36% global_avg_ntp_loss: 0.2928 global_avg_mtp_loss: 2.2073 +[titan] 2025-10-05 04:13:46,201 - root - INFO - lr: 4.4712e-05 gnorm: 1.17 [ 5:39:37<18:53:47] +[titan] 2025-10-05 04:13:57,080 - root - INFO - step: 9225 loss: 2.3856 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1071 +[titan] 2025-10-05 04:13:57,080 - root - INFO - lr: 4.4706e-05 gnorm: 1.15 [ 5:39:48<18:53:35] +[titan] 2025-10-05 04:14:07,953 - root - INFO - step: 9230 loss: 2.4302 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1452 +[titan] 2025-10-05 04:14:07,953 - root - INFO - lr: 4.4701e-05 gnorm: 1.15 [ 5:39:58<18:53:23] +[titan] 2025-10-05 04:14:18,819 - root - INFO - step: 9235 loss: 2.4502 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1635 +[titan] 2025-10-05 04:14:18,819 - root - INFO - lr: 4.4695e-05 gnorm: 1.22 [ 5:40:09<18:53:12] +[titan] 2025-10-05 04:14:29,678 - root - INFO - step: 9240 loss: 2.4452 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1584 +[titan] 2025-10-05 04:14:29,678 - root - INFO - lr: 4.4689e-05 gnorm: 1.17 [ 5:40:20<18:53:00] +[titan] 2025-10-05 04:14:40,618 - root - INFO - step: 9245 loss: 2.4345 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1494 +[titan] 2025-10-05 04:14:40,618 - root - INFO - lr: 4.4683e-05 gnorm: 1.14 [ 5:40:31<18:52:48] +[titan] 2025-10-05 04:14:49,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:14:51,484 - root - INFO - step: 9250 loss: 2.5104 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 04:14:51,484 - root - INFO - lr: 4.4678e-05 gnorm: 1.18 [ 5:40:42<18:52:37] +[titan] 2025-10-05 04:15:02,363 - root - INFO - step: 9255 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1304 +[titan] 2025-10-05 04:15:02,363 - root - INFO - lr: 4.4672e-05 gnorm: 1.16 [ 5:40:53<18:52:25] +[titan] 2025-10-05 04:15:13,235 - root - INFO - step: 9260 loss: 2.4511 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:15:13,236 - root - INFO - lr: 4.4666e-05 gnorm: 1.17 [ 5:41:04<18:52:14] +[titan] 2025-10-05 04:15:24,134 - root - INFO - step: 9265 loss: 2.5208 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2247 +[titan] 2025-10-05 04:15:24,134 - root - INFO - lr: 4.4660e-05 gnorm: 1.12 [ 5:41:15<18:52:02] +[titan] 2025-10-05 04:15:35,014 - root - INFO - step: 9270 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1064 +[titan] 2025-10-05 04:15:35,014 - root - INFO - lr: 4.4655e-05 gnorm: 1.14 [ 5:41:25<18:51:50] +[titan] 2025-10-05 04:15:45,940 - root - INFO - step: 9275 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2155 +[titan] 2025-10-05 04:15:45,940 - root - INFO - lr: 4.4649e-05 gnorm: 3.57 [ 5:41:36<18:51:39] +[titan] 2025-10-05 04:15:56,816 - root - INFO - step: 9280 loss: 2.4602 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1702 +[titan] 2025-10-05 04:15:56,816 - root - INFO - lr: 4.4643e-05 gnorm: 1.17 [ 5:41:47<18:51:27] +[titan] 2025-10-05 04:16:07,687 - root - INFO - step: 9285 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1597 +[titan] 2025-10-05 04:16:07,687 - root - INFO - lr: 4.4637e-05 gnorm: 1.16 [ 5:41:58<18:51:16] +[titan] 2025-10-05 04:16:18,550 - root - INFO - step: 9290 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1896 +[titan] 2025-10-05 04:16:18,550 - root - INFO - lr: 4.4631e-05 gnorm: 1.19 [ 5:42:09<18:51:04] +[titan] 2025-10-05 04:16:29,436 - root - INFO - step: 9295 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1040 +[titan] 2025-10-05 04:16:29,436 - root - INFO - lr: 4.4626e-05 gnorm: 1.23 [ 5:42:20<18:50:52] +[titan] 2025-10-05 04:16:38,131 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:16:40,321 - root - INFO - step: 9300 loss: 2.4653 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:16:40,321 - root - INFO - lr: 4.4620e-05 gnorm: 1.12 [ 5:42:31<18:50:41] +[titan] 2025-10-05 04:16:51,231 - root - INFO - step: 9305 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:16:51,232 - root - INFO - lr: 4.4614e-05 gnorm: 1.15 [ 5:42:42<18:50:29] +[titan] 2025-10-05 04:17:02,103 - root - INFO - step: 9310 loss: 2.4882 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1966 +[titan] 2025-10-05 04:17:02,103 - root - INFO - lr: 4.4608e-05 gnorm: 1.14 [ 5:42:53<18:50:18] +[titan] 2025-10-05 04:17:13,000 - root - INFO - step: 9315 loss: 2.4906 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1975 +[titan] 2025-10-05 04:17:13,000 - root - INFO - lr: 4.4602e-05 gnorm: 1.19 [ 5:43:03<18:50:06] +[titan] 2025-10-05 04:17:23,889 - root - INFO - step: 9320 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:17:23,890 - root - INFO - lr: 4.4597e-05 gnorm: 1.23 [ 5:43:14<18:49:55] +[titan] 2025-10-05 04:17:34,759 - root - INFO - step: 9325 loss: 2.4923 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2926 global_avg_mtp_loss: 2.1997 +[titan] 2025-10-05 04:17:34,759 - root - INFO - lr: 4.4591e-05 gnorm: 1.20 [ 5:43:25<18:49:43] +[titan] 2025-10-05 04:17:45,670 - root - INFO - step: 9330 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1813 +[titan] 2025-10-05 04:17:45,670 - root - INFO - lr: 4.4585e-05 gnorm: 1.15 [ 5:43:36<18:49:31] +[titan] 2025-10-05 04:17:56,531 - root - INFO - step: 9335 loss: 2.5353 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:17:56,531 - root - INFO - lr: 4.4579e-05 gnorm: 1.15 [ 5:43:47<18:49:20] +[titan] 2025-10-05 04:18:07,423 - root - INFO - step: 9340 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 04:18:07,423 - root - INFO - lr: 4.4573e-05 gnorm: 1.22 [ 5:43:58<18:49:08] +[titan] 2025-10-05 04:18:18,296 - root - INFO - step: 9345 loss: 2.4834 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1917 +[titan] 2025-10-05 04:18:18,296 - root - INFO - lr: 4.4568e-05 gnorm: 1.16 [ 5:44:09<18:48:57] +[titan] 2025-10-05 04:18:27,005 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:18:29,196 - root - INFO - step: 9350 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:18:29,196 - root - INFO - lr: 4.4562e-05 gnorm: 1.12 [ 5:44:20<18:48:45] +[titan] 2025-10-05 04:18:40,056 - root - INFO - step: 9355 loss: 2.4321 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1466 +[titan] 2025-10-05 04:18:40,056 - root - INFO - lr: 4.4556e-05 gnorm: 1.12 [ 5:44:31<18:48:33] +[titan] 2025-10-05 04:18:50,968 - root - INFO - step: 9360 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2060 +[titan] 2025-10-05 04:18:50,968 - root - INFO - lr: 4.4550e-05 gnorm: 1.14 [ 5:44:41<18:48:22] +[titan] 2025-10-05 04:19:01,819 - root - INFO - step: 9365 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1401 +[titan] 2025-10-05 04:19:01,819 - root - INFO - lr: 4.4544e-05 gnorm: 1.14 [ 5:44:52<18:48:10] +[titan] 2025-10-05 04:19:12,717 - root - INFO - step: 9370 loss: 2.5021 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 04:19:12,718 - root - INFO - lr: 4.4538e-05 gnorm: 1.13 [ 5:45:03<18:47:59] +[titan] 2025-10-05 04:19:23,592 - root - INFO - step: 9375 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 04:19:23,592 - root - INFO - lr: 4.4533e-05 gnorm: 1.15 [ 5:45:14<18:47:47] +[titan] 2025-10-05 04:19:34,464 - root - INFO - step: 9380 loss: 2.4564 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1688 +[titan] 2025-10-05 04:19:34,465 - root - INFO - lr: 4.4527e-05 gnorm: 1.21 [ 5:45:25<18:47:35] +[titan] 2025-10-05 04:19:45,394 - root - INFO - step: 9385 loss: 2.4197 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1348 +[titan] 2025-10-05 04:19:45,394 - root - INFO - lr: 4.4521e-05 gnorm: 1.16 [ 5:45:36<18:47:24] +[titan] 2025-10-05 04:19:56,282 - root - INFO - step: 9390 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:19:56,282 - root - INFO - lr: 4.4515e-05 gnorm: 1.15 [ 5:45:47<18:47:12] +[titan] 2025-10-05 04:20:07,169 - root - INFO - step: 9395 loss: 2.4327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1475 +[titan] 2025-10-05 04:20:07,169 - root - INFO - lr: 4.4509e-05 gnorm: 1.21 [ 5:45:58<18:47:01] +[titan] 2025-10-05 04:20:15,874 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:20:18,060 - root - INFO - step: 9400 loss: 2.5009 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2079 +[titan] 2025-10-05 04:20:18,061 - root - INFO - lr: 4.4503e-05 gnorm: 1.18 [ 5:46:09<18:46:49] +[titan] 2025-10-05 04:20:28,965 - root - INFO - step: 9405 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1469 +[titan] 2025-10-05 04:20:28,966 - root - INFO - lr: 4.4498e-05 gnorm: 1.14 [ 5:46:19<18:46:38] +[titan] 2025-10-05 04:20:39,882 - root - INFO - step: 9410 loss: 2.4983 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 04:20:39,882 - root - INFO - lr: 4.4492e-05 gnorm: 1.20 [ 5:46:30<18:46:26] +[titan] 2025-10-05 04:20:50,800 - root - INFO - step: 9415 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:20:50,800 - root - INFO - lr: 4.4486e-05 gnorm: 1.13 [ 5:46:41<18:46:15] +[titan] 2025-10-05 04:21:01,668 - root - INFO - step: 9420 loss: 2.3688 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0904 +[titan] 2025-10-05 04:21:01,668 - root - INFO - lr: 4.4480e-05 gnorm: 1.16 [ 5:46:52<18:46:03] +[titan] 2025-10-05 04:21:12,542 - root - INFO - step: 9425 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 04:21:12,543 - root - INFO - lr: 4.4474e-05 gnorm: 1.16 [ 5:47:03<18:45:52] +[titan] 2025-10-05 04:21:23,412 - root - INFO - step: 9430 loss: 2.4415 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1551 +[titan] 2025-10-05 04:21:23,412 - root - INFO - lr: 4.4468e-05 gnorm: 1.20 [ 5:47:14<18:45:40] +[titan] 2025-10-05 04:21:34,322 - root - INFO - step: 9435 loss: 2.3669 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 04:21:34,322 - root - INFO - lr: 4.4462e-05 gnorm: 1.10 [ 5:47:25<18:45:28] +[titan] 2025-10-05 04:21:45,197 - root - INFO - step: 9440 loss: 2.3883 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1070 +[titan] 2025-10-05 04:21:45,197 - root - INFO - lr: 4.4457e-05 gnorm: 1.17 [ 5:47:36<18:45:17] +[titan] 2025-10-05 04:21:56,142 - root - INFO - step: 9445 loss: 2.4394 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1537 +[titan] 2025-10-05 04:21:56,142 - root - INFO - lr: 4.4451e-05 gnorm: 1.15 [ 5:47:47<18:45:05] +[titan] 2025-10-05 04:22:04,824 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:22:07,011 - root - INFO - step: 9450 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1302 +[titan] 2025-10-05 04:22:07,011 - root - INFO - lr: 4.4445e-05 gnorm: 1.11 [ 5:47:57<18:44:54] +[titan] 2025-10-05 04:22:17,891 - root - INFO - step: 9455 loss: 2.4826 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1916 +[titan] 2025-10-05 04:22:17,891 - root - INFO - lr: 4.4439e-05 gnorm: 1.14 [ 5:48:08<18:44:42] +[titan] 2025-10-05 04:22:28,768 - root - INFO - step: 9460 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 04:22:28,768 - root - INFO - lr: 4.4433e-05 gnorm: 1.12 [ 5:48:19<18:44:31] +[titan] 2025-10-05 04:22:39,662 - root - INFO - step: 9465 loss: 2.4758 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:22:39,663 - root - INFO - lr: 4.4427e-05 gnorm: 1.12 [ 5:48:30<18:44:19] +[titan] 2025-10-05 04:22:50,623 - root - INFO - step: 9470 loss: 2.4549 memory: 118.84GiB(85.28%) tps: 29,899 tflops: 414.80 mfu: 41.94% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1672 +[titan] 2025-10-05 04:22:50,623 - root - INFO - lr: 4.4421e-05 gnorm: 1.19 [ 5:48:41<18:44:08] +[titan] 2025-10-05 04:23:01,499 - root - INFO - step: 9475 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1431 +[titan] 2025-10-05 04:23:01,500 - root - INFO - lr: 4.4415e-05 gnorm: 1.12 [ 5:48:52<18:43:56] +[titan] 2025-10-05 04:23:12,360 - root - INFO - step: 9480 loss: 2.4464 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1579 +[titan] 2025-10-05 04:23:12,361 - root - INFO - lr: 4.4410e-05 gnorm: 1.18 [ 5:49:03<18:43:45] +[titan] 2025-10-05 04:23:23,239 - root - INFO - step: 9485 loss: 2.4527 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1643 +[titan] 2025-10-05 04:23:23,239 - root - INFO - lr: 4.4404e-05 gnorm: 1.19 [ 5:49:14<18:43:33] +[titan] 2025-10-05 04:23:34,114 - root - INFO - step: 9490 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:23:34,114 - root - INFO - lr: 4.4398e-05 gnorm: 1.11 [ 5:49:25<18:43:21] +[titan] 2025-10-05 04:23:44,977 - root - INFO - step: 9495 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1819 +[titan] 2025-10-05 04:23:44,977 - root - INFO - lr: 4.4392e-05 gnorm: 1.11 [ 5:49:35<18:43:10] +[titan] 2025-10-05 04:23:53,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:23:55,915 - root - INFO - step: 9500 loss: 2.4279 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1438 +[titan] 2025-10-05 04:23:55,915 - root - INFO - lr: 4.4386e-05 gnorm: 1.12 [ 5:49:46<18:42:58] +[titan] 2025-10-05 04:24:06,759 - root - INFO - step: 9505 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:24:06,759 - root - INFO - lr: 4.4380e-05 gnorm: 1.15 [ 5:49:57<18:42:47] +[titan] 2025-10-05 04:24:17,624 - root - INFO - step: 9510 loss: 2.4001 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1187 +[titan] 2025-10-05 04:24:17,624 - root - INFO - lr: 4.4374e-05 gnorm: 1.13 [ 5:50:08<18:42:35] +[titan] 2025-10-05 04:24:28,498 - root - INFO - step: 9515 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 04:24:28,498 - root - INFO - lr: 4.4368e-05 gnorm: 1.17 [ 5:50:19<18:42:23] +[titan] 2025-10-05 04:24:39,377 - root - INFO - step: 9520 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:24:39,377 - root - INFO - lr: 4.4362e-05 gnorm: 1.13 [ 5:50:30<18:42:12] +[titan] 2025-10-05 04:24:50,308 - root - INFO - step: 9525 loss: 2.3498 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0740 +[titan] 2025-10-05 04:24:50,308 - root - INFO - lr: 4.4357e-05 gnorm: 1.18 [ 5:50:41<18:42:00] +[titan] 2025-10-05 04:25:01,216 - root - INFO - step: 9530 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1821 +[titan] 2025-10-05 04:25:01,216 - root - INFO - lr: 4.4351e-05 gnorm: 1.23 [ 5:50:52<18:41:49] +[titan] 2025-10-05 04:25:12,092 - root - INFO - step: 9535 loss: 2.4240 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1392 +[titan] 2025-10-05 04:25:12,092 - root - INFO - lr: 4.4345e-05 gnorm: 1.17 [ 5:51:03<18:41:37] +[titan] 2025-10-05 04:25:22,993 - root - INFO - step: 9540 loss: 2.4342 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1477 +[titan] 2025-10-05 04:25:22,994 - root - INFO - lr: 4.4339e-05 gnorm: 1.18 [ 5:51:13<18:41:26] +[titan] 2025-10-05 04:25:33,873 - root - INFO - step: 9545 loss: 2.4536 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1647 +[titan] 2025-10-05 04:25:33,873 - root - INFO - lr: 4.4333e-05 gnorm: 1.18 [ 5:51:24<18:41:14] +[titan] 2025-10-05 04:25:42,543 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:25:44,728 - root - INFO - step: 9550 loss: 2.4518 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1638 +[titan] 2025-10-05 04:25:44,728 - root - INFO - lr: 4.4327e-05 gnorm: 1.19 [ 5:51:35<18:41:03] +[titan] 2025-10-05 04:25:55,649 - root - INFO - step: 9555 loss: 2.4091 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 04:25:55,650 - root - INFO - lr: 4.4321e-05 gnorm: 1.19 [ 5:51:46<18:40:51] +[titan] 2025-10-05 04:26:06,497 - root - INFO - step: 9560 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1277 +[titan] 2025-10-05 04:26:06,497 - root - INFO - lr: 4.4315e-05 gnorm: 1.14 [ 5:51:57<18:40:39] +[titan] 2025-10-05 04:26:17,403 - root - INFO - step: 9565 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1464 +[titan] 2025-10-05 04:26:17,403 - root - INFO - lr: 4.4309e-05 gnorm: 1.18 [ 5:52:08<18:40:28] +[titan] 2025-10-05 04:26:28,292 - root - INFO - step: 9570 loss: 2.4323 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 04:26:28,292 - root - INFO - lr: 4.4303e-05 gnorm: 1.17 [ 5:52:19<18:40:16] +[titan] 2025-10-05 04:26:39,137 - root - INFO - step: 9575 loss: 2.4565 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:26:39,137 - root - INFO - lr: 4.4297e-05 gnorm: 1.17 [ 5:52:30<18:40:05] +[titan] 2025-10-05 04:26:50,002 - root - INFO - step: 9580 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.2636 +[titan] 2025-10-05 04:26:50,002 - root - INFO - lr: 4.4291e-05 gnorm: 1.16 [ 5:52:40<18:39:53] +[titan] 2025-10-05 04:27:00,916 - root - INFO - step: 9585 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 04:27:00,916 - root - INFO - lr: 4.4285e-05 gnorm: 1.16 [ 5:52:51<18:39:42] +[titan] 2025-10-05 04:27:11,772 - root - INFO - step: 9590 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2492 +[titan] 2025-10-05 04:27:11,773 - root - INFO - lr: 4.4279e-05 gnorm: 1.14 [ 5:53:02<18:39:30] +[titan] 2025-10-05 04:27:22,632 - root - INFO - step: 9595 loss: 2.4580 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1692 +[titan] 2025-10-05 04:27:22,632 - root - INFO - lr: 4.4273e-05 gnorm: 1.16 [ 5:53:13<18:39:18] +[titan] 2025-10-05 04:27:31,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:27:33,503 - root - INFO - step: 9600 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:27:33,503 - root - INFO - lr: 4.4268e-05 gnorm: 1.14 [ 5:53:24<18:39:07] +[titan] 2025-10-05 04:27:44,378 - root - INFO - step: 9605 loss: 2.4209 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1359 +[titan] 2025-10-05 04:27:44,378 - root - INFO - lr: 4.4262e-05 gnorm: 1.14 [ 5:53:35<18:38:55] +[titan] 2025-10-05 04:27:55,281 - root - INFO - step: 9610 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1495 +[titan] 2025-10-05 04:27:55,281 - root - INFO - lr: 4.4256e-05 gnorm: 1.27 [ 5:53:46<18:38:44] +[titan] 2025-10-05 04:28:06,144 - root - INFO - step: 9615 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1414 +[titan] 2025-10-05 04:28:06,145 - root - INFO - lr: 4.4250e-05 gnorm: 1.12 [ 5:53:57<18:38:32] +[titan] 2025-10-05 04:28:17,025 - root - INFO - step: 9620 loss: 2.4380 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1520 +[titan] 2025-10-05 04:28:17,025 - root - INFO - lr: 4.4244e-05 gnorm: 1.17 [ 5:54:07<18:38:21] +[titan] 2025-10-05 04:28:27,900 - root - INFO - step: 9625 loss: 2.4092 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1278 +[titan] 2025-10-05 04:28:27,900 - root - INFO - lr: 4.4238e-05 gnorm: 1.17 [ 5:54:18<18:38:09] +[titan] 2025-10-05 04:28:38,759 - root - INFO - step: 9630 loss: 2.3955 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1144 +[titan] 2025-10-05 04:28:38,759 - root - INFO - lr: 4.4232e-05 gnorm: 1.18 [ 5:54:29<18:37:57] +[titan] 2025-10-05 04:28:49,641 - root - INFO - step: 9635 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 04:28:49,641 - root - INFO - lr: 4.4226e-05 gnorm: 1.17 [ 5:54:40<18:37:46] +[titan] 2025-10-05 04:29:00,565 - root - INFO - step: 9640 loss: 2.5391 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2382 +[titan] 2025-10-05 04:29:00,565 - root - INFO - lr: 4.4220e-05 gnorm: 1.17 [ 5:54:51<18:37:34] +[titan] 2025-10-05 04:29:11,410 - root - INFO - step: 9645 loss: 2.4192 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 04:29:11,410 - root - INFO - lr: 4.4214e-05 gnorm: 1.18 [ 5:55:02<18:37:23] +[titan] 2025-10-05 04:29:20,109 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:29:22,295 - root - INFO - step: 9650 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:29:22,295 - root - INFO - lr: 4.4208e-05 gnorm: 1.14 [ 5:55:13<18:37:11] +[titan] 2025-10-05 04:29:33,192 - root - INFO - step: 9655 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1795 +[titan] 2025-10-05 04:29:33,192 - root - INFO - lr: 4.4202e-05 gnorm: 1.18 [ 5:55:24<18:37:00] +[titan] 2025-10-05 04:29:44,075 - root - INFO - step: 9660 loss: 2.5077 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2141 +[titan] 2025-10-05 04:29:44,076 - root - INFO - lr: 4.4196e-05 gnorm: 1.19 [ 5:55:34<18:36:48] +[titan] 2025-10-05 04:29:55,012 - root - INFO - step: 9665 loss: 2.3987 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2812 global_avg_mtp_loss: 2.1174 +[titan] 2025-10-05 04:29:55,012 - root - INFO - lr: 4.4190e-05 gnorm: 1.13 [ 5:55:45<18:36:37] +[titan] 2025-10-05 04:30:05,890 - root - INFO - step: 9670 loss: 2.4206 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1355 +[titan] 2025-10-05 04:30:05,891 - root - INFO - lr: 4.4184e-05 gnorm: 1.15 [ 5:55:56<18:36:25] +[titan] 2025-10-05 04:30:16,776 - root - INFO - step: 9675 loss: 2.3409 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 04:30:16,776 - root - INFO - lr: 4.4178e-05 gnorm: 1.12 [ 5:56:07<18:36:14] +[titan] 2025-10-05 04:30:27,638 - root - INFO - step: 9680 loss: 2.4055 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1224 +[titan] 2025-10-05 04:30:27,639 - root - INFO - lr: 4.4172e-05 gnorm: 1.11 [ 5:56:18<18:36:02] +[titan] 2025-10-05 04:30:38,514 - root - INFO - step: 9685 loss: 2.4020 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1213 +[titan] 2025-10-05 04:30:38,514 - root - INFO - lr: 4.4166e-05 gnorm: 1.10 [ 5:56:29<18:35:51] +[titan] 2025-10-05 04:30:49,397 - root - INFO - step: 9690 loss: 2.3894 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 04:30:49,397 - root - INFO - lr: 4.4160e-05 gnorm: 1.14 [ 5:56:40<18:35:39] +[titan] 2025-10-05 04:31:00,376 - root - INFO - step: 9695 loss: 2.4118 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.10 mfu: 41.87% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:31:00,376 - root - INFO - lr: 4.4154e-05 gnorm: 1.13 [ 5:56:51<18:35:28] +[titan] 2025-10-05 04:31:09,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:31:11,234 - root - INFO - step: 9700 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 04:31:11,235 - root - INFO - lr: 4.4148e-05 gnorm: 1.17 [ 5:57:02<18:35:16] +[titan] 2025-10-05 04:31:22,095 - root - INFO - step: 9705 loss: 2.4525 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1644 +[titan] 2025-10-05 04:31:22,095 - root - INFO - lr: 4.4142e-05 gnorm: 1.18 [ 5:57:13<18:35:04] +[titan] 2025-10-05 04:31:32,925 - root - INFO - step: 9710 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:31:32,925 - root - INFO - lr: 4.4136e-05 gnorm: 1.17 [ 5:57:23<18:34:53] +[titan] 2025-10-05 04:31:43,787 - root - INFO - step: 9715 loss: 2.4891 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 04:31:43,787 - root - INFO - lr: 4.4130e-05 gnorm: 1.38 [ 5:57:34<18:34:41] +[titan] 2025-10-05 04:31:54,630 - root - INFO - step: 9720 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0983 +[titan] 2025-10-05 04:31:54,630 - root - INFO - lr: 4.4124e-05 gnorm: 1.14 [ 5:57:45<18:34:30] +[titan] 2025-10-05 04:32:05,581 - root - INFO - step: 9725 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 04:32:05,582 - root - INFO - lr: 4.4118e-05 gnorm: 1.14 [ 5:57:56<18:34:18] +[titan] 2025-10-05 04:32:12,292 - root - INFO - Dumping profiler traces at step 9728 +[titan] 2025-10-05 04:32:12,330 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:32:16,691 - root - INFO - step: 9730 loss: 2.4883 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.1950 +[titan] 2025-10-05 04:32:16,691 - root - INFO - lr: 4.4112e-05 gnorm: 1.25 [ 5:58:07<18:34:07] +[titan] 2025-10-05 04:32:27,533 - root - INFO - step: 9735 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:32:27,534 - root - INFO - lr: 4.4106e-05 gnorm: 1.17 [ 5:58:18<18:33:56] +[titan] 2025-10-05 04:32:38,369 - root - INFO - step: 9740 loss: 2.4600 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1703 +[titan] 2025-10-05 04:32:38,369 - root - INFO - lr: 4.4100e-05 gnorm: 1.17 [ 5:58:29<18:33:44] +[titan] 2025-10-05 04:32:49,220 - root - INFO - step: 9745 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 04:32:49,220 - root - INFO - lr: 4.4094e-05 gnorm: 1.16 [ 5:58:40<18:33:32] +[titan] 2025-10-05 04:32:57,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:33:00,142 - root - INFO - step: 9750 loss: 2.3885 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1078 +[titan] 2025-10-05 04:33:00,143 - root - INFO - lr: 4.4088e-05 gnorm: 1.14 [ 5:58:51<18:33:21] +[titan] 2025-10-05 04:33:10,995 - root - INFO - step: 9755 loss: 2.5700 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3070 global_avg_mtp_loss: 2.2630 +[titan] 2025-10-05 04:33:10,995 - root - INFO - lr: 4.4082e-05 gnorm: 1.38 [ 5:59:01<18:33:09] +[titan] 2025-10-05 04:33:21,841 - root - INFO - step: 9760 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:33:21,841 - root - INFO - lr: 4.4076e-05 gnorm: 1.10 [ 5:59:12<18:32:58] +[titan] 2025-10-05 04:33:32,699 - root - INFO - step: 9765 loss: 2.4074 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:33:32,699 - root - INFO - lr: 4.4070e-05 gnorm: 1.12 [ 5:59:23<18:32:46] +[titan] 2025-10-05 04:33:43,562 - root - INFO - step: 9770 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 04:33:43,563 - root - INFO - lr: 4.4064e-05 gnorm: 1.20 [ 5:59:34<18:32:34] +[titan] 2025-10-05 04:33:54,429 - root - INFO - step: 9775 loss: 2.3924 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1125 +[titan] 2025-10-05 04:33:54,429 - root - INFO - lr: 4.4058e-05 gnorm: 1.13 [ 5:59:45<18:32:23] +[titan] 2025-10-05 04:34:05,364 - root - INFO - step: 9780 loss: 2.4335 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1473 +[titan] 2025-10-05 04:34:05,365 - root - INFO - lr: 4.4052e-05 gnorm: 1.19 [ 5:59:56<18:32:12] +[titan] 2025-10-05 04:34:16,251 - root - INFO - step: 9785 loss: 2.4309 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:34:16,251 - root - INFO - lr: 4.4046e-05 gnorm: 1.30 [ 6:00:07<18:32:00] +[titan] 2025-10-05 04:34:27,120 - root - INFO - step: 9790 loss: 2.4512 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2882 global_avg_mtp_loss: 2.1629 +[titan] 2025-10-05 04:34:27,120 - root - INFO - lr: 4.4039e-05 gnorm: 1.21 [ 6:00:18<18:31:48] +[titan] 2025-10-05 04:34:37,999 - root - INFO - step: 9795 loss: 2.3456 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 04:34:37,999 - root - INFO - lr: 4.4033e-05 gnorm: 1.14 [ 6:00:28<18:31:37] +[titan] 2025-10-05 04:34:46,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:34:48,883 - root - INFO - step: 9800 loss: 2.4057 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:34:48,883 - root - INFO - lr: 4.4027e-05 gnorm: 1.18 [ 6:00:39<18:31:25] +[titan] 2025-10-05 04:34:59,779 - root - INFO - step: 9805 loss: 2.5371 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2995 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:34:59,779 - root - INFO - lr: 4.4021e-05 gnorm: 1.15 [ 6:00:50<18:31:14] +[titan] 2025-10-05 04:35:10,650 - root - INFO - step: 9810 loss: 2.4142 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1309 +[titan] 2025-10-05 04:35:10,650 - root - INFO - lr: 4.4015e-05 gnorm: 1.16 [ 6:01:01<18:31:02] +[titan] 2025-10-05 04:35:21,521 - root - INFO - step: 9815 loss: 2.4068 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1250 +[titan] 2025-10-05 04:35:21,521 - root - INFO - lr: 4.4009e-05 gnorm: 1.16 [ 6:01:12<18:30:51] +[titan] 2025-10-05 04:35:32,405 - root - INFO - step: 9820 loss: 2.4191 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:35:32,405 - root - INFO - lr: 4.4003e-05 gnorm: 1.14 [ 6:01:23<18:30:39] +[titan] 2025-10-05 04:35:43,265 - root - INFO - step: 9825 loss: 2.4557 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:35:43,266 - root - INFO - lr: 4.3997e-05 gnorm: 1.11 [ 6:01:34<18:30:28] +[titan] 2025-10-05 04:35:54,144 - root - INFO - step: 9830 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 04:35:54,144 - root - INFO - lr: 4.3991e-05 gnorm: 1.10 [ 6:01:45<18:30:16] +[titan] 2025-10-05 04:36:05,038 - root - INFO - step: 9835 loss: 2.3594 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0837 +[titan] 2025-10-05 04:36:05,038 - root - INFO - lr: 4.3985e-05 gnorm: 1.17 [ 6:01:55<18:30:05] +[titan] 2025-10-05 04:36:15,903 - root - INFO - step: 9840 loss: 2.3943 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1136 +[titan] 2025-10-05 04:36:15,903 - root - INFO - lr: 4.3979e-05 gnorm: 1.13 [ 6:02:06<18:29:53] +[titan] 2025-10-05 04:36:26,766 - root - INFO - step: 9845 loss: 2.3607 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0841 +[titan] 2025-10-05 04:36:26,766 - root - INFO - lr: 4.3973e-05 gnorm: 1.11 [ 6:02:17<18:29:41] +[titan] 2025-10-05 04:36:35,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:36:37,631 - root - INFO - step: 9850 loss: 2.4018 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1206 +[titan] 2025-10-05 04:36:37,631 - root - INFO - lr: 4.3967e-05 gnorm: 1.18 [ 6:02:28<18:29:30] +[titan] 2025-10-05 04:36:48,494 - root - INFO - step: 9855 loss: 2.3920 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 04:36:48,494 - root - INFO - lr: 4.3961e-05 gnorm: 1.14 [ 6:02:39<18:29:18] +[titan] 2025-10-05 04:36:59,366 - root - INFO - step: 9860 loss: 2.3928 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1132 +[titan] 2025-10-05 04:36:59,366 - root - INFO - lr: 4.3955e-05 gnorm: 1.16 [ 6:02:50<18:29:07] +[titan] 2025-10-05 04:37:10,292 - root - INFO - step: 9865 loss: 2.3430 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0689 +[titan] 2025-10-05 04:37:10,292 - root - INFO - lr: 4.3948e-05 gnorm: 1.14 [ 6:03:01<18:28:55] +[titan] 2025-10-05 04:37:21,111 - root - INFO - step: 9870 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.0953 +[titan] 2025-10-05 04:37:21,111 - root - INFO - lr: 4.3942e-05 gnorm: 1.23 [ 6:03:12<18:28:44] +[titan] 2025-10-05 04:37:31,972 - root - INFO - step: 9875 loss: 2.4673 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2893 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:37:31,972 - root - INFO - lr: 4.3936e-05 gnorm: 1.12 [ 6:03:22<18:28:32] +[titan] 2025-10-05 04:37:42,800 - root - INFO - step: 9880 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 04:37:42,800 - root - INFO - lr: 4.3930e-05 gnorm: 1.18 [ 6:03:33<18:28:20] +[titan] 2025-10-05 04:37:53,645 - root - INFO - step: 9885 loss: 2.3888 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:37:53,645 - root - INFO - lr: 4.3924e-05 gnorm: 1.14 [ 6:03:44<18:28:09] +[titan] 2025-10-05 04:38:04,551 - root - INFO - step: 9890 loss: 2.3882 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 04:38:04,552 - root - INFO - lr: 4.3918e-05 gnorm: 1.12 [ 6:03:55<18:27:57] +[titan] 2025-10-05 04:38:15,412 - root - INFO - step: 9895 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1157 +[titan] 2025-10-05 04:38:15,412 - root - INFO - lr: 4.3912e-05 gnorm: 1.14 [ 6:04:06<18:27:46] +[titan] 2025-10-05 04:38:24,056 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:38:26,242 - root - INFO - step: 9900 loss: 2.3816 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1025 +[titan] 2025-10-05 04:38:26,242 - root - INFO - lr: 4.3906e-05 gnorm: 1.14 [ 6:04:17<18:27:34] +[titan] 2025-10-05 04:38:37,109 - root - INFO - step: 9905 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 04:38:37,109 - root - INFO - lr: 4.3900e-05 gnorm: 1.17 [ 6:04:28<18:27:22] +[titan] 2025-10-05 04:38:47,968 - root - INFO - step: 9910 loss: 2.4451 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 04:38:47,968 - root - INFO - lr: 4.3894e-05 gnorm: 1.17 [ 6:04:38<18:27:11] +[titan] 2025-10-05 04:38:58,828 - root - INFO - step: 9915 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0776 +[titan] 2025-10-05 04:38:58,828 - root - INFO - lr: 4.3887e-05 gnorm: 1.15 [ 6:04:49<18:26:59] +[titan] 2025-10-05 04:39:09,703 - root - INFO - step: 9920 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 04:39:09,703 - root - INFO - lr: 4.3881e-05 gnorm: 1.13 [ 6:05:00<18:26:48] +[titan] 2025-10-05 04:39:20,593 - root - INFO - step: 9925 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 04:39:20,593 - root - INFO - lr: 4.3875e-05 gnorm: 1.14 [ 6:05:11<18:26:36] +[titan] 2025-10-05 04:39:31,464 - root - INFO - step: 9930 loss: 2.2894 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 04:39:31,464 - root - INFO - lr: 4.3869e-05 gnorm: 1.11 [ 6:05:22<18:26:25] +[titan] 2025-10-05 04:39:42,337 - root - INFO - step: 9935 loss: 2.3475 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 04:39:42,338 - root - INFO - lr: 4.3863e-05 gnorm: 1.10 [ 6:05:33<18:26:13] +[titan] 2025-10-05 04:39:53,224 - root - INFO - step: 9940 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0849 +[titan] 2025-10-05 04:39:53,224 - root - INFO - lr: 4.3857e-05 gnorm: 1.13 [ 6:05:44<18:26:02] +[titan] 2025-10-05 04:40:04,154 - root - INFO - step: 9945 loss: 2.3821 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1033 +[titan] 2025-10-05 04:40:04,154 - root - INFO - lr: 4.3851e-05 gnorm: 1.14 [ 6:05:55<18:25:50] +[titan] 2025-10-05 04:40:12,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:40:15,025 - root - INFO - step: 9950 loss: 2.4179 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:40:15,026 - root - INFO - lr: 4.3845e-05 gnorm: 1.11 [ 6:06:05<18:25:39] +[titan] 2025-10-05 04:40:25,938 - root - INFO - step: 9955 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1159 +[titan] 2025-10-05 04:40:25,938 - root - INFO - lr: 4.3838e-05 gnorm: 1.14 [ 6:06:16<18:25:27] +[titan] 2025-10-05 04:40:36,795 - root - INFO - step: 9960 loss: 2.3949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 04:40:36,795 - root - INFO - lr: 4.3832e-05 gnorm: 1.17 [ 6:06:27<18:25:16] +[titan] 2025-10-05 04:40:47,648 - root - INFO - step: 9965 loss: 2.4110 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:40:47,648 - root - INFO - lr: 4.3826e-05 gnorm: 1.15 [ 6:06:38<18:25:04] +[titan] 2025-10-05 04:40:58,540 - root - INFO - step: 9970 loss: 2.3944 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1127 +[titan] 2025-10-05 04:40:58,540 - root - INFO - lr: 4.3820e-05 gnorm: 1.18 [ 6:06:49<18:24:53] +[titan] 2025-10-05 04:41:09,431 - root - INFO - step: 9975 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 04:41:09,432 - root - INFO - lr: 4.3814e-05 gnorm: 1.19 [ 6:07:00<18:24:41] +[titan] 2025-10-05 04:41:20,364 - root - INFO - step: 9980 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 04:41:20,364 - root - INFO - lr: 4.3808e-05 gnorm: 1.18 [ 6:07:11<18:24:30] +[titan] 2025-10-05 04:41:31,259 - root - INFO - step: 9985 loss: 2.4484 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:41:31,260 - root - INFO - lr: 4.3802e-05 gnorm: 1.15 [ 6:07:22<18:24:18] +[titan] 2025-10-05 04:41:42,148 - root - INFO - step: 9990 loss: 2.4717 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:41:42,148 - root - INFO - lr: 4.3795e-05 gnorm: 1.17 [ 6:07:33<18:24:07] +[titan] 2025-10-05 04:41:53,059 - root - INFO - step: 9995 loss: 2.3948 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1147 +[titan] 2025-10-05 04:41:53,059 - root - INFO - lr: 4.3789e-05 gnorm: 1.17 [ 6:07:43<18:23:55] +[titan] 2025-10-05 04:42:01,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:42:03,951 - root - INFO - step: 10000 loss: 2.4699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1790 +[titan] 2025-10-05 04:42:03,951 - root - INFO - lr: 4.3783e-05 gnorm: 1.18 [ 6:07:54<18:23:44] +[titan] 2025-10-05 04:42:03,951 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 04:42:23,104 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 04:42:23,105 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.15 seconds. +[titan] 2025-10-05 04:44:28,943 - root - INFO - step: 10005 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 2,260 tflops: 31.35 mfu: 3.17% global_avg_ntp_loss: 0.2840 global_avg_mtp_loss: 2.1282 +[titan] 2025-10-05 04:44:28,943 - root - INFO - lr: 4.3777e-05 gnorm: 1.15 [ 6:10:19<18:30:15] +[titan] 2025-10-05 04:44:39,759 - root - INFO - step: 10010 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0843 +[titan] 2025-10-05 04:44:39,759 - root - INFO - lr: 4.3771e-05 gnorm: 1.10 [ 6:10:30<18:30:03] +[titan] 2025-10-05 04:44:50,583 - root - INFO - step: 10015 loss: 2.4606 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1726 +[titan] 2025-10-05 04:44:50,583 - root - INFO - lr: 4.3765e-05 gnorm: 1.17 [ 6:10:41<18:29:51] +[titan] 2025-10-05 04:45:01,371 - root - INFO - step: 10020 loss: 2.3595 memory: 118.84GiB(85.28%) tps: 30,377 tflops: 421.43 mfu: 42.61% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0816 +[titan] 2025-10-05 04:45:01,371 - root - INFO - lr: 4.3758e-05 gnorm: 1.12 [ 6:10:52<18:29:39] +[titan] 2025-10-05 04:45:12,207 - root - INFO - step: 10025 loss: 2.3890 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1095 +[titan] 2025-10-05 04:45:12,207 - root - INFO - lr: 4.3752e-05 gnorm: 1.13 [ 6:11:03<18:29:27] +[titan] 2025-10-05 04:45:23,056 - root - INFO - step: 10030 loss: 2.4171 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:45:23,056 - root - INFO - lr: 4.3746e-05 gnorm: 1.14 [ 6:11:13<18:29:15] +[titan] 2025-10-05 04:45:33,878 - root - INFO - step: 10035 loss: 2.4258 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1417 +[titan] 2025-10-05 04:45:33,879 - root - INFO - lr: 4.3740e-05 gnorm: 1.18 [ 6:11:24<18:29:03] +[titan] 2025-10-05 04:45:44,722 - root - INFO - step: 10040 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:45:44,722 - root - INFO - lr: 4.3734e-05 gnorm: 1.13 [ 6:11:35<18:28:51] +[titan] 2025-10-05 04:45:55,531 - root - INFO - step: 10045 loss: 2.3962 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:45:55,531 - root - INFO - lr: 4.3728e-05 gnorm: 1.14 [ 6:11:46<18:28:39] +[titan] 2025-10-05 04:46:04,196 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:46:06,376 - root - INFO - step: 10050 loss: 2.4217 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1372 +[titan] 2025-10-05 04:46:06,376 - root - INFO - lr: 4.3721e-05 gnorm: 1.19 [ 6:11:57<18:28:27] +[titan] 2025-10-05 04:46:17,244 - root - INFO - step: 10055 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1117 +[titan] 2025-10-05 04:46:17,244 - root - INFO - lr: 4.3715e-05 gnorm: 1.09 [ 6:12:08<18:28:15] +[titan] 2025-10-05 04:46:28,093 - root - INFO - step: 10060 loss: 2.4776 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 04:46:28,093 - root - INFO - lr: 4.3709e-05 gnorm: 1.12 [ 6:12:18<18:28:03] +[titan] 2025-10-05 04:46:38,949 - root - INFO - step: 10065 loss: 2.3571 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0814 +[titan] 2025-10-05 04:46:38,949 - root - INFO - lr: 4.3703e-05 gnorm: 1.17 [ 6:12:29<18:27:52] +[titan] 2025-10-05 04:46:49,820 - root - INFO - step: 10070 loss: 2.4101 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:46:49,820 - root - INFO - lr: 4.3697e-05 gnorm: 1.14 [ 6:12:40<18:27:40] +[titan] 2025-10-05 04:47:00,671 - root - INFO - step: 10075 loss: 2.4112 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:47:00,671 - root - INFO - lr: 4.3690e-05 gnorm: 1.17 [ 6:12:51<18:27:28] +[titan] 2025-10-05 04:47:11,530 - root - INFO - step: 10080 loss: 2.3867 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1065 +[titan] 2025-10-05 04:47:11,530 - root - INFO - lr: 4.3684e-05 gnorm: 1.12 [ 6:13:02<18:27:16] +[titan] 2025-10-05 04:47:22,402 - root - INFO - step: 10085 loss: 2.3591 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 04:47:22,403 - root - INFO - lr: 4.3678e-05 gnorm: 1.14 [ 6:13:13<18:27:04] +[titan] 2025-10-05 04:47:33,304 - root - INFO - step: 10090 loss: 2.3953 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1153 +[titan] 2025-10-05 04:47:33,305 - root - INFO - lr: 4.3672e-05 gnorm: 1.12 [ 6:13:24<18:26:53] +[titan] 2025-10-05 04:47:44,169 - root - INFO - step: 10095 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2874 global_avg_mtp_loss: 2.1668 +[titan] 2025-10-05 04:47:44,169 - root - INFO - lr: 4.3666e-05 gnorm: 1.20 [ 6:13:35<18:26:41] +[titan] 2025-10-05 04:47:52,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:47:55,091 - root - INFO - step: 10100 loss: 2.4560 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1684 +[titan] 2025-10-05 04:47:55,091 - root - INFO - lr: 4.3659e-05 gnorm: 1.18 [ 6:13:45<18:26:29] +[titan] 2025-10-05 04:48:05,969 - root - INFO - step: 10105 loss: 2.4312 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:48:05,969 - root - INFO - lr: 4.3653e-05 gnorm: 1.10 [ 6:13:56<18:26:17] +[titan] 2025-10-05 04:48:16,842 - root - INFO - step: 10110 loss: 2.3985 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1175 +[titan] 2025-10-05 04:48:16,842 - root - INFO - lr: 4.3647e-05 gnorm: 1.15 [ 6:14:07<18:26:06] +[titan] 2025-10-05 04:48:27,739 - root - INFO - step: 10115 loss: 2.4183 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1354 +[titan] 2025-10-05 04:48:27,739 - root - INFO - lr: 4.3641e-05 gnorm: 1.11 [ 6:14:18<18:25:54] +[titan] 2025-10-05 04:48:38,638 - root - INFO - step: 10120 loss: 2.3862 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 04:48:38,639 - root - INFO - lr: 4.3635e-05 gnorm: 1.15 [ 6:14:29<18:25:42] +[titan] 2025-10-05 04:48:49,495 - root - INFO - step: 10125 loss: 2.4046 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1229 +[titan] 2025-10-05 04:48:49,495 - root - INFO - lr: 4.3628e-05 gnorm: 1.13 [ 6:14:40<18:25:30] +[titan] 2025-10-05 04:49:00,374 - root - INFO - step: 10130 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 04:49:00,374 - root - INFO - lr: 4.3622e-05 gnorm: 1.12 [ 6:14:51<18:25:19] +[titan] 2025-10-05 04:49:11,231 - root - INFO - step: 10135 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1240 +[titan] 2025-10-05 04:49:11,231 - root - INFO - lr: 4.3616e-05 gnorm: 1.11 [ 6:15:02<18:25:07] +[titan] 2025-10-05 04:49:22,073 - root - INFO - step: 10140 loss: 2.4295 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1448 +[titan] 2025-10-05 04:49:22,073 - root - INFO - lr: 4.3610e-05 gnorm: 1.19 [ 6:15:12<18:24:55] +[titan] 2025-10-05 04:49:32,953 - root - INFO - step: 10145 loss: 2.4182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:49:32,953 - root - INFO - lr: 4.3603e-05 gnorm: 1.13 [ 6:15:23<18:24:43] +[titan] 2025-10-05 04:49:41,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:49:43,809 - root - INFO - step: 10150 loss: 2.4033 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1220 +[titan] 2025-10-05 04:49:43,810 - root - INFO - lr: 4.3597e-05 gnorm: 1.18 [ 6:15:34<18:24:31] +[titan] 2025-10-05 04:49:54,722 - root - INFO - step: 10155 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1491 +[titan] 2025-10-05 04:49:54,722 - root - INFO - lr: 4.3591e-05 gnorm: 1.19 [ 6:15:45<18:24:20] +[titan] 2025-10-05 04:50:05,570 - root - INFO - step: 10160 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2925 global_avg_mtp_loss: 2.2000 +[titan] 2025-10-05 04:50:05,570 - root - INFO - lr: 4.3585e-05 gnorm: 1.18 [ 6:15:56<18:24:08] +[titan] 2025-10-05 04:50:16,417 - root - INFO - step: 10165 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 04:50:16,418 - root - INFO - lr: 4.3578e-05 gnorm: 1.15 [ 6:16:07<18:23:56] +[titan] 2025-10-05 04:50:27,286 - root - INFO - step: 10170 loss: 2.4892 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:50:27,286 - root - INFO - lr: 4.3572e-05 gnorm: 1.17 [ 6:16:18<18:23:44] +[titan] 2025-10-05 04:50:38,151 - root - INFO - step: 10175 loss: 2.4728 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1825 +[titan] 2025-10-05 04:50:38,151 - root - INFO - lr: 4.3566e-05 gnorm: 1.14 [ 6:16:29<18:23:33] +[titan] 2025-10-05 04:50:49,013 - root - INFO - step: 10180 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1066 +[titan] 2025-10-05 04:50:49,013 - root - INFO - lr: 4.3560e-05 gnorm: 1.11 [ 6:16:39<18:23:21] +[titan] 2025-10-05 04:50:59,879 - root - INFO - step: 10185 loss: 2.3308 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0576 +[titan] 2025-10-05 04:50:59,879 - root - INFO - lr: 4.3553e-05 gnorm: 1.10 [ 6:16:50<18:23:09] +[titan] 2025-10-05 04:51:10,735 - root - INFO - step: 10190 loss: 2.4005 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1191 +[titan] 2025-10-05 04:51:10,735 - root - INFO - lr: 4.3547e-05 gnorm: 1.12 [ 6:17:01<18:22:57] +[titan] 2025-10-05 04:51:21,605 - root - INFO - step: 10195 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:51:21,605 - root - INFO - lr: 4.3541e-05 gnorm: 1.07 [ 6:17:12<18:22:45] +[titan] 2025-10-05 04:51:30,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:51:32,491 - root - INFO - step: 10200 loss: 2.4592 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1708 +[titan] 2025-10-05 04:51:32,491 - root - INFO - lr: 4.3535e-05 gnorm: 1.19 [ 6:17:23<18:22:34] +[titan] 2025-10-05 04:51:43,357 - root - INFO - step: 10205 loss: 2.3585 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0823 +[titan] 2025-10-05 04:51:43,357 - root - INFO - lr: 4.3528e-05 gnorm: 1.08 [ 6:17:34<18:22:22] +[titan] 2025-10-05 04:51:54,234 - root - INFO - step: 10210 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 04:51:54,234 - root - INFO - lr: 4.3522e-05 gnorm: 1.13 [ 6:17:45<18:22:10] +[titan] 2025-10-05 04:52:05,148 - root - INFO - step: 10215 loss: 2.4224 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1397 +[titan] 2025-10-05 04:52:05,148 - root - INFO - lr: 4.3516e-05 gnorm: 1.15 [ 6:17:56<18:21:58] +[titan] 2025-10-05 04:52:16,011 - root - INFO - step: 10220 loss: 2.3880 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:52:16,012 - root - INFO - lr: 4.3510e-05 gnorm: 1.17 [ 6:18:06<18:21:47] +[titan] 2025-10-05 04:52:26,919 - root - INFO - step: 10225 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0969 +[titan] 2025-10-05 04:52:26,919 - root - INFO - lr: 4.3503e-05 gnorm: 1.13 [ 6:18:17<18:21:35] +[titan] 2025-10-05 04:52:37,795 - root - INFO - step: 10230 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2964 global_avg_mtp_loss: 2.1864 +[titan] 2025-10-05 04:52:37,795 - root - INFO - lr: 4.3497e-05 gnorm: 1.23 [ 6:18:28<18:21:23] +[titan] 2025-10-05 04:52:48,651 - root - INFO - step: 10235 loss: 2.3739 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0960 +[titan] 2025-10-05 04:52:48,651 - root - INFO - lr: 4.3491e-05 gnorm: 1.14 [ 6:18:39<18:21:11] +[titan] 2025-10-05 04:52:59,594 - root - INFO - step: 10240 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.43 mfu: 42.00% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0761 +[titan] 2025-10-05 04:52:59,594 - root - INFO - lr: 4.3485e-05 gnorm: 1.17 [ 6:18:50<18:21:00] +[titan] 2025-10-05 04:52:59,769 - root - INFO - Dumping profiler traces at step 10240 +[titan] 2025-10-05 04:52:59,809 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:53:10,672 - root - INFO - step: 10245 loss: 2.4638 memory: 118.84GiB(85.28%) tps: 29,580 tflops: 410.38 mfu: 41.49% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1752 +[titan] 2025-10-05 04:53:10,672 - root - INFO - lr: 4.3478e-05 gnorm: 1.18 [ 6:19:01<18:20:49] +[titan] 2025-10-05 04:53:19,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:53:21,556 - root - INFO - step: 10250 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0638 +[titan] 2025-10-05 04:53:21,556 - root - INFO - lr: 4.3472e-05 gnorm: 1.18 [ 6:19:12<18:20:37] +[titan] 2025-10-05 04:53:32,460 - root - INFO - step: 10255 loss: 2.3782 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.0997 +[titan] 2025-10-05 04:53:32,460 - root - INFO - lr: 4.3466e-05 gnorm: 1.11 [ 6:19:23<18:20:25] +[titan] 2025-10-05 04:53:43,321 - root - INFO - step: 10260 loss: 2.3383 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 04:53:43,321 - root - INFO - lr: 4.3459e-05 gnorm: 1.16 [ 6:19:34<18:20:14] +[titan] 2025-10-05 04:53:54,178 - root - INFO - step: 10265 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 04:53:54,178 - root - INFO - lr: 4.3453e-05 gnorm: 1.16 [ 6:19:45<18:20:02] +[titan] 2025-10-05 04:54:05,007 - root - INFO - step: 10270 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 04:54:05,007 - root - INFO - lr: 4.3447e-05 gnorm: 1.17 [ 6:19:55<18:19:50] +[titan] 2025-10-05 04:54:15,842 - root - INFO - step: 10275 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:54:15,843 - root - INFO - lr: 4.3440e-05 gnorm: 1.14 [ 6:20:06<18:19:38] +[titan] 2025-10-05 04:54:26,778 - root - INFO - step: 10280 loss: 2.3590 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0825 +[titan] 2025-10-05 04:54:26,778 - root - INFO - lr: 4.3434e-05 gnorm: 1.09 [ 6:20:17<18:19:26] +[titan] 2025-10-05 04:54:37,611 - root - INFO - step: 10285 loss: 2.3467 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 04:54:37,611 - root - INFO - lr: 4.3428e-05 gnorm: 1.17 [ 6:20:28<18:19:15] +[titan] 2025-10-05 04:54:48,457 - root - INFO - step: 10290 loss: 2.3098 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 04:54:48,457 - root - INFO - lr: 4.3422e-05 gnorm: 1.13 [ 6:20:39<18:19:03] +[titan] 2025-10-05 04:54:59,307 - root - INFO - step: 10295 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 04:54:59,307 - root - INFO - lr: 4.3415e-05 gnorm: 1.19 [ 6:20:50<18:18:51] +[titan] 2025-10-05 04:55:07,970 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:55:10,154 - root - INFO - step: 10300 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:55:10,154 - root - INFO - lr: 4.3409e-05 gnorm: 1.11 [ 6:21:01<18:18:39] +[titan] 2025-10-05 04:55:20,995 - root - INFO - step: 10305 loss: 2.4115 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1272 +[titan] 2025-10-05 04:55:20,995 - root - INFO - lr: 4.3403e-05 gnorm: 1.16 [ 6:21:11<18:18:27] +[titan] 2025-10-05 04:55:31,895 - root - INFO - step: 10310 loss: 2.3942 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:55:31,895 - root - INFO - lr: 4.3396e-05 gnorm: 1.11 [ 6:21:22<18:18:16] +[titan] 2025-10-05 04:55:42,797 - root - INFO - step: 10315 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0890 +[titan] 2025-10-05 04:55:42,797 - root - INFO - lr: 4.3390e-05 gnorm: 1.14 [ 6:21:33<18:18:04] +[titan] 2025-10-05 04:55:53,631 - root - INFO - step: 10320 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0675 +[titan] 2025-10-05 04:55:53,631 - root - INFO - lr: 4.3384e-05 gnorm: 1.13 [ 6:21:44<18:17:52] +[titan] 2025-10-05 04:56:04,495 - root - INFO - step: 10325 loss: 2.3236 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 04:56:04,495 - root - INFO - lr: 4.3377e-05 gnorm: 1.11 [ 6:21:55<18:17:40] +[titan] 2025-10-05 04:56:15,368 - root - INFO - step: 10330 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0554 +[titan] 2025-10-05 04:56:15,368 - root - INFO - lr: 4.3371e-05 gnorm: 1.11 [ 6:22:06<18:17:29] +[titan] 2025-10-05 04:56:26,235 - root - INFO - step: 10335 loss: 2.3812 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1021 +[titan] 2025-10-05 04:56:26,235 - root - INFO - lr: 4.3365e-05 gnorm: 1.13 [ 6:22:17<18:17:17] +[titan] 2025-10-05 04:56:37,100 - root - INFO - step: 10340 loss: 2.4139 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 04:56:37,100 - root - INFO - lr: 4.3358e-05 gnorm: 1.15 [ 6:22:27<18:17:05] +[titan] 2025-10-05 04:56:48,014 - root - INFO - step: 10345 loss: 2.3627 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0861 +[titan] 2025-10-05 04:56:48,014 - root - INFO - lr: 4.3352e-05 gnorm: 1.15 [ 6:22:38<18:16:53] +[titan] 2025-10-05 04:56:56,705 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:56:58,888 - root - INFO - step: 10350 loss: 2.3704 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0924 +[titan] 2025-10-05 04:56:58,888 - root - INFO - lr: 4.3346e-05 gnorm: 1.12 [ 6:22:49<18:16:42] +[titan] 2025-10-05 04:57:09,755 - root - INFO - step: 10355 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0403 +[titan] 2025-10-05 04:57:09,755 - root - INFO - lr: 4.3339e-05 gnorm: 1.14 [ 6:23:00<18:16:30] +[titan] 2025-10-05 04:57:20,636 - root - INFO - step: 10360 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1400 +[titan] 2025-10-05 04:57:20,637 - root - INFO - lr: 4.3333e-05 gnorm: 1.16 [ 6:23:11<18:16:18] +[titan] 2025-10-05 04:57:31,521 - root - INFO - step: 10365 loss: 2.3992 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 04:57:31,521 - root - INFO - lr: 4.3327e-05 gnorm: 1.14 [ 6:23:22<18:16:07] +[titan] 2025-10-05 04:57:42,396 - root - INFO - step: 10370 loss: 2.4732 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1823 +[titan] 2025-10-05 04:57:42,396 - root - INFO - lr: 4.3320e-05 gnorm: 1.14 [ 6:23:33<18:15:55] +[titan] 2025-10-05 04:57:53,311 - root - INFO - step: 10375 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1107 +[titan] 2025-10-05 04:57:53,311 - root - INFO - lr: 4.3314e-05 gnorm: 1.17 [ 6:23:44<18:15:43] +[titan] 2025-10-05 04:58:04,191 - root - INFO - step: 10380 loss: 2.3285 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 04:58:04,191 - root - INFO - lr: 4.3308e-05 gnorm: 1.15 [ 6:23:55<18:15:31] +[titan] 2025-10-05 04:58:15,071 - root - INFO - step: 10385 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:58:15,072 - root - INFO - lr: 4.3301e-05 gnorm: 2.89 [ 6:24:05<18:15:20] +[titan] 2025-10-05 04:58:25,961 - root - INFO - step: 10390 loss: 2.4472 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 04:58:25,961 - root - INFO - lr: 4.3295e-05 gnorm: 1.19 [ 6:24:16<18:15:08] +[titan] 2025-10-05 04:58:36,832 - root - INFO - step: 10395 loss: 2.4116 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:58:36,832 - root - INFO - lr: 4.3289e-05 gnorm: 1.19 [ 6:24:27<18:14:56] +[titan] 2025-10-05 04:58:45,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:58:47,732 - root - INFO - step: 10400 loss: 2.3889 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:58:47,733 - root - INFO - lr: 4.3282e-05 gnorm: 1.15 [ 6:24:38<18:14:45] +[titan] 2025-10-05 04:58:58,620 - root - INFO - step: 10405 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1661 +[titan] 2025-10-05 04:58:58,620 - root - INFO - lr: 4.3276e-05 gnorm: 1.15 [ 6:24:49<18:14:33] +[titan] 2025-10-05 04:59:09,537 - root - INFO - step: 10410 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0667 +[titan] 2025-10-05 04:59:09,538 - root - INFO - lr: 4.3270e-05 gnorm: 1.09 [ 6:25:00<18:14:21] +[titan] 2025-10-05 04:59:20,430 - root - INFO - step: 10415 loss: 2.4412 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1531 +[titan] 2025-10-05 04:59:20,430 - root - INFO - lr: 4.3263e-05 gnorm: 1.11 [ 6:25:11<18:14:10] +[titan] 2025-10-05 04:59:31,331 - root - INFO - step: 10420 loss: 2.4559 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1675 +[titan] 2025-10-05 04:59:31,331 - root - INFO - lr: 4.3257e-05 gnorm: 1.18 [ 6:25:22<18:13:58] +[titan] 2025-10-05 04:59:42,198 - root - INFO - step: 10425 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0782 +[titan] 2025-10-05 04:59:42,198 - root - INFO - lr: 4.3250e-05 gnorm: 1.15 [ 6:25:33<18:13:46] +[titan] 2025-10-05 04:59:53,072 - root - INFO - step: 10430 loss: 2.3763 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0987 +[titan] 2025-10-05 04:59:53,072 - root - INFO - lr: 4.3244e-05 gnorm: 1.14 [ 6:25:43<18:13:35] +[titan] 2025-10-05 05:00:03,938 - root - INFO - step: 10435 loss: 2.4170 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2835 global_avg_mtp_loss: 2.1335 +[titan] 2025-10-05 05:00:03,939 - root - INFO - lr: 4.3238e-05 gnorm: 1.15 [ 6:25:54<18:13:23] +[titan] 2025-10-05 05:00:14,820 - root - INFO - step: 10440 loss: 2.4296 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 05:00:14,820 - root - INFO - lr: 4.3231e-05 gnorm: 1.12 [ 6:26:05<18:13:11] +[titan] 2025-10-05 05:00:25,686 - root - INFO - step: 10445 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0958 +[titan] 2025-10-05 05:00:25,686 - root - INFO - lr: 4.3225e-05 gnorm: 1.15 [ 6:26:16<18:12:59] +[titan] 2025-10-05 05:00:34,395 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:00:36,573 - root - INFO - step: 10450 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:00:36,574 - root - INFO - lr: 4.3219e-05 gnorm: 1.12 [ 6:26:27<18:12:48] +[titan] 2025-10-05 05:00:47,453 - root - INFO - step: 10455 loss: 2.2956 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 05:00:47,453 - root - INFO - lr: 4.3212e-05 gnorm: 1.12 [ 6:26:38<18:12:36] +[titan] 2025-10-05 05:00:58,326 - root - INFO - step: 10460 loss: 2.4231 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1381 +[titan] 2025-10-05 05:00:58,326 - root - INFO - lr: 4.3206e-05 gnorm: 1.13 [ 6:26:49<18:12:24] +[titan] 2025-10-05 05:01:09,212 - root - INFO - step: 10465 loss: 2.3984 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1166 +[titan] 2025-10-05 05:01:09,212 - root - INFO - lr: 4.3199e-05 gnorm: 1.16 [ 6:27:00<18:12:13] +[titan] 2025-10-05 05:01:20,082 - root - INFO - step: 10470 loss: 2.3857 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1068 +[titan] 2025-10-05 05:01:20,082 - root - INFO - lr: 4.3193e-05 gnorm: 1.17 [ 6:27:10<18:12:01] +[titan] 2025-10-05 05:01:31,025 - root - INFO - step: 10475 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0845 +[titan] 2025-10-05 05:01:31,025 - root - INFO - lr: 4.3187e-05 gnorm: 1.18 [ 6:27:21<18:11:49] +[titan] 2025-10-05 05:01:41,900 - root - INFO - step: 10480 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1605 +[titan] 2025-10-05 05:01:41,900 - root - INFO - lr: 4.3180e-05 gnorm: 1.14 [ 6:27:32<18:11:38] +[titan] 2025-10-05 05:01:52,794 - root - INFO - step: 10485 loss: 2.3469 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0715 +[titan] 2025-10-05 05:01:52,794 - root - INFO - lr: 4.3174e-05 gnorm: 1.11 [ 6:27:43<18:11:26] +[titan] 2025-10-05 05:02:03,640 - root - INFO - step: 10490 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:02:03,641 - root - INFO - lr: 4.3167e-05 gnorm: 1.12 [ 6:27:54<18:11:14] +[titan] 2025-10-05 05:02:14,499 - root - INFO - step: 10495 loss: 2.4247 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2861 global_avg_mtp_loss: 2.1386 +[titan] 2025-10-05 05:02:14,499 - root - INFO - lr: 4.3161e-05 gnorm: 1.11 [ 6:28:05<18:11:02] +[titan] 2025-10-05 05:02:23,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:02:25,357 - root - INFO - step: 10500 loss: 2.3813 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1019 +[titan] 2025-10-05 05:02:25,357 - root - INFO - lr: 4.3155e-05 gnorm: 1.11 [ 6:28:16<18:10:51] +[titan] 2025-10-05 05:02:36,309 - root - INFO - step: 10505 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.09 mfu: 41.97% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1463 +[titan] 2025-10-05 05:02:36,309 - root - INFO - lr: 4.3148e-05 gnorm: 1.31 [ 6:28:27<18:10:39] +[titan] 2025-10-05 05:02:47,169 - root - INFO - step: 10510 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0752 +[titan] 2025-10-05 05:02:47,169 - root - INFO - lr: 4.3142e-05 gnorm: 1.12 [ 6:28:38<18:10:27] +[titan] 2025-10-05 05:02:58,035 - root - INFO - step: 10515 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1262 +[titan] 2025-10-05 05:02:58,035 - root - INFO - lr: 4.3135e-05 gnorm: 1.20 [ 6:28:48<18:10:16] +[titan] 2025-10-05 05:03:08,894 - root - INFO - step: 10520 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0523 +[titan] 2025-10-05 05:03:08,894 - root - INFO - lr: 4.3129e-05 gnorm: 1.10 [ 6:28:59<18:10:04] +[titan] 2025-10-05 05:03:19,768 - root - INFO - step: 10525 loss: 2.4870 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 05:03:19,769 - root - INFO - lr: 4.3122e-05 gnorm: 1.18 [ 6:29:10<18:09:52] +[titan] 2025-10-05 05:03:30,631 - root - INFO - step: 10530 loss: 2.3951 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:03:30,632 - root - INFO - lr: 4.3116e-05 gnorm: 1.13 [ 6:29:21<18:09:41] +[titan] 2025-10-05 05:03:41,571 - root - INFO - step: 10535 loss: 2.3677 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:03:41,571 - root - INFO - lr: 4.3110e-05 gnorm: 1.19 [ 6:29:32<18:09:29] +[titan] 2025-10-05 05:03:52,432 - root - INFO - step: 10540 loss: 2.4252 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1411 +[titan] 2025-10-05 05:03:52,432 - root - INFO - lr: 4.3103e-05 gnorm: 1.19 [ 6:29:43<18:09:17] +[titan] 2025-10-05 05:04:03,276 - root - INFO - step: 10545 loss: 2.4280 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1435 +[titan] 2025-10-05 05:04:03,277 - root - INFO - lr: 4.3097e-05 gnorm: 1.16 [ 6:29:54<18:09:05] +[titan] 2025-10-05 05:04:11,963 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:04:14,149 - root - INFO - step: 10550 loss: 2.2936 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0258 +[titan] 2025-10-05 05:04:14,149 - root - INFO - lr: 4.3090e-05 gnorm: 1.14 [ 6:30:04<18:08:54] +[titan] 2025-10-05 05:04:25,007 - root - INFO - step: 10555 loss: 2.3687 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0910 +[titan] 2025-10-05 05:04:25,007 - root - INFO - lr: 4.3084e-05 gnorm: 1.18 [ 6:30:15<18:08:42] +[titan] 2025-10-05 05:04:35,912 - root - INFO - step: 10560 loss: 2.4093 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1275 +[titan] 2025-10-05 05:04:35,912 - root - INFO - lr: 4.3077e-05 gnorm: 1.23 [ 6:30:26<18:08:30] +[titan] 2025-10-05 05:04:46,752 - root - INFO - step: 10565 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1548 +[titan] 2025-10-05 05:04:46,752 - root - INFO - lr: 4.3071e-05 gnorm: 1.10 [ 6:30:37<18:08:19] +[titan] 2025-10-05 05:04:57,630 - root - INFO - step: 10570 loss: 2.3849 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1052 +[titan] 2025-10-05 05:04:57,630 - root - INFO - lr: 4.3065e-05 gnorm: 1.13 [ 6:30:48<18:08:07] +[titan] 2025-10-05 05:05:08,469 - root - INFO - step: 10575 loss: 2.4749 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1842 +[titan] 2025-10-05 05:05:08,469 - root - INFO - lr: 4.3058e-05 gnorm: 1.19 [ 6:30:59<18:07:55] +[titan] 2025-10-05 05:05:19,334 - root - INFO - step: 10580 loss: 2.3851 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1046 +[titan] 2025-10-05 05:05:19,335 - root - INFO - lr: 4.3052e-05 gnorm: 1.12 [ 6:31:10<18:07:43] +[titan] 2025-10-05 05:05:30,220 - root - INFO - step: 10585 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0900 +[titan] 2025-10-05 05:05:30,220 - root - INFO - lr: 4.3045e-05 gnorm: 1.17 [ 6:31:21<18:07:32] +[titan] 2025-10-05 05:05:41,134 - root - INFO - step: 10590 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0866 +[titan] 2025-10-05 05:05:41,134 - root - INFO - lr: 4.3039e-05 gnorm: 1.10 [ 6:31:31<18:07:20] +[titan] 2025-10-05 05:05:51,981 - root - INFO - step: 10595 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 05:05:51,981 - root - INFO - lr: 4.3032e-05 gnorm: 1.13 [ 6:31:42<18:07:08] +[titan] 2025-10-05 05:06:00,680 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:06:02,853 - root - INFO - step: 10600 loss: 2.4272 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1436 +[titan] 2025-10-05 05:06:02,853 - root - INFO - lr: 4.3026e-05 gnorm: 1.13 [ 6:31:53<18:06:57] +[titan] 2025-10-05 05:06:13,702 - root - INFO - step: 10605 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1154 +[titan] 2025-10-05 05:06:13,702 - root - INFO - lr: 4.3019e-05 gnorm: 1.18 [ 6:32:04<18:06:45] +[titan] 2025-10-05 05:06:24,546 - root - INFO - step: 10610 loss: 2.4439 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 05:06:24,547 - root - INFO - lr: 4.3013e-05 gnorm: 1.17 [ 6:32:15<18:06:33] +[titan] 2025-10-05 05:06:35,421 - root - INFO - step: 10615 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0903 +[titan] 2025-10-05 05:06:35,421 - root - INFO - lr: 4.3006e-05 gnorm: 1.10 [ 6:32:26<18:06:21] +[titan] 2025-10-05 05:06:46,307 - root - INFO - step: 10620 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 05:06:46,307 - root - INFO - lr: 4.3000e-05 gnorm: 1.15 [ 6:32:37<18:06:10] +[titan] 2025-10-05 05:06:57,167 - root - INFO - step: 10625 loss: 2.3874 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1077 +[titan] 2025-10-05 05:06:57,168 - root - INFO - lr: 4.2993e-05 gnorm: 1.16 [ 6:32:47<18:05:58] +[titan] 2025-10-05 05:07:08,027 - root - INFO - step: 10630 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0719 +[titan] 2025-10-05 05:07:08,027 - root - INFO - lr: 4.2987e-05 gnorm: 1.17 [ 6:32:58<18:05:46] +[titan] 2025-10-05 05:07:18,912 - root - INFO - step: 10635 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0503 +[titan] 2025-10-05 05:07:18,912 - root - INFO - lr: 4.2981e-05 gnorm: 1.11 [ 6:33:09<18:05:35] +[titan] 2025-10-05 05:07:29,770 - root - INFO - step: 10640 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0909 +[titan] 2025-10-05 05:07:29,770 - root - INFO - lr: 4.2974e-05 gnorm: 1.11 [ 6:33:20<18:05:23] +[titan] 2025-10-05 05:07:40,642 - root - INFO - step: 10645 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1251 +[titan] 2025-10-05 05:07:40,642 - root - INFO - lr: 4.2968e-05 gnorm: 1.15 [ 6:33:31<18:05:11] +[titan] 2025-10-05 05:07:49,320 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:07:51,513 - root - INFO - step: 10650 loss: 2.3800 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 05:07:51,514 - root - INFO - lr: 4.2961e-05 gnorm: 1.13 [ 6:33:42<18:05:00] +[titan] 2025-10-05 05:08:02,386 - root - INFO - step: 10655 loss: 2.2876 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0198 +[titan] 2025-10-05 05:08:02,387 - root - INFO - lr: 4.2955e-05 gnorm: 1.11 [ 6:33:53<18:04:48] +[titan] 2025-10-05 05:08:13,251 - root - INFO - step: 10660 loss: 2.3831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 05:08:13,252 - root - INFO - lr: 4.2948e-05 gnorm: 1.14 [ 6:34:04<18:04:36] +[titan] 2025-10-05 05:08:24,145 - root - INFO - step: 10665 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 05:08:24,145 - root - INFO - lr: 4.2942e-05 gnorm: 1.11 [ 6:34:14<18:04:25] +[titan] 2025-10-05 05:08:34,996 - root - INFO - step: 10670 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 05:08:34,996 - root - INFO - lr: 4.2935e-05 gnorm: 1.10 [ 6:34:25<18:04:13] +[titan] 2025-10-05 05:08:45,876 - root - INFO - step: 10675 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1231 +[titan] 2025-10-05 05:08:45,876 - root - INFO - lr: 4.2929e-05 gnorm: 1.11 [ 6:34:36<18:04:01] +[titan] 2025-10-05 05:08:56,738 - root - INFO - step: 10680 loss: 2.4221 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1374 +[titan] 2025-10-05 05:08:56,738 - root - INFO - lr: 4.2922e-05 gnorm: 1.12 [ 6:34:47<18:03:49] +[titan] 2025-10-05 05:09:07,575 - root - INFO - step: 10685 loss: 2.4893 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1990 +[titan] 2025-10-05 05:09:07,575 - root - INFO - lr: 4.2916e-05 gnorm: 1.14 [ 6:34:58<18:03:38] +[titan] 2025-10-05 05:09:18,438 - root - INFO - step: 10690 loss: 2.3907 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1112 +[titan] 2025-10-05 05:09:18,438 - root - INFO - lr: 4.2909e-05 gnorm: 1.15 [ 6:35:09<18:03:26] +[titan] 2025-10-05 05:09:29,320 - root - INFO - step: 10695 loss: 2.3485 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0733 +[titan] 2025-10-05 05:09:29,320 - root - INFO - lr: 4.2903e-05 gnorm: 1.12 [ 6:35:20<18:03:14] +[titan] 2025-10-05 05:09:38,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:09:40,188 - root - INFO - step: 10700 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0915 +[titan] 2025-10-05 05:09:40,188 - root - INFO - lr: 4.2896e-05 gnorm: 1.13 [ 6:35:31<18:03:03] +[titan] 2025-10-05 05:09:51,053 - root - INFO - step: 10705 loss: 2.4598 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1721 +[titan] 2025-10-05 05:09:51,054 - root - INFO - lr: 4.2890e-05 gnorm: 1.14 [ 6:35:41<18:02:51] +[titan] 2025-10-05 05:10:01,930 - root - INFO - step: 10710 loss: 2.4459 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 05:10:01,930 - root - INFO - lr: 4.2883e-05 gnorm: 1.13 [ 6:35:52<18:02:39] +[titan] 2025-10-05 05:10:12,779 - root - INFO - step: 10715 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:10:12,779 - root - INFO - lr: 4.2877e-05 gnorm: 1.10 [ 6:36:03<18:02:27] +[titan] 2025-10-05 05:10:23,641 - root - INFO - step: 10720 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0671 +[titan] 2025-10-05 05:10:23,641 - root - INFO - lr: 4.2870e-05 gnorm: 1.07 [ 6:36:14<18:02:16] +[titan] 2025-10-05 05:10:34,518 - root - INFO - step: 10725 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 05:10:34,518 - root - INFO - lr: 4.2864e-05 gnorm: 1.07 [ 6:36:25<18:02:04] +[titan] 2025-10-05 05:10:45,426 - root - INFO - step: 10730 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0982 +[titan] 2025-10-05 05:10:45,426 - root - INFO - lr: 4.2857e-05 gnorm: 1.17 [ 6:36:36<18:01:52] +[titan] 2025-10-05 05:10:56,306 - root - INFO - step: 10735 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 05:10:56,306 - root - INFO - lr: 4.2851e-05 gnorm: 1.12 [ 6:36:47<18:01:41] +[titan] 2025-10-05 05:11:07,161 - root - INFO - step: 10740 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:11:07,161 - root - INFO - lr: 4.2844e-05 gnorm: 1.17 [ 6:36:57<18:01:29] +[titan] 2025-10-05 05:11:18,031 - root - INFO - step: 10745 loss: 2.3429 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0692 +[titan] 2025-10-05 05:11:18,031 - root - INFO - lr: 4.2837e-05 gnorm: 1.13 [ 6:37:08<18:01:17] +[titan] 2025-10-05 05:11:26,767 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:11:28,948 - root - INFO - step: 10750 loss: 2.2983 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 05:11:28,948 - root - INFO - lr: 4.2831e-05 gnorm: 1.14 [ 6:37:19<18:01:06] +[titan] 2025-10-05 05:11:33,457 - root - INFO - Dumping profiler traces at step 10752 +[titan] 2025-10-05 05:11:33,496 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:11:40,090 - root - INFO - step: 10755 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 29,411 tflops: 408.03 mfu: 41.26% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 05:11:40,090 - root - INFO - lr: 4.2824e-05 gnorm: 1.14 [ 6:37:30<18:00:55] +[titan] 2025-10-05 05:11:50,993 - root - INFO - step: 10760 loss: 2.3455 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0704 +[titan] 2025-10-05 05:11:50,993 - root - INFO - lr: 4.2818e-05 gnorm: 1.14 [ 6:37:41<18:00:43] +[titan] 2025-10-05 05:12:01,856 - root - INFO - step: 10765 loss: 2.3069 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0370 +[titan] 2025-10-05 05:12:01,857 - root - INFO - lr: 4.2811e-05 gnorm: 1.12 [ 6:37:52<18:00:32] +[titan] 2025-10-05 05:12:12,697 - root - INFO - step: 10770 loss: 2.3339 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 05:12:12,697 - root - INFO - lr: 4.2805e-05 gnorm: 1.09 [ 6:38:03<18:00:20] +[titan] 2025-10-05 05:12:23,573 - root - INFO - step: 10775 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1096 +[titan] 2025-10-05 05:12:23,573 - root - INFO - lr: 4.2798e-05 gnorm: 1.09 [ 6:38:14<18:00:08] +[titan] 2025-10-05 05:12:34,428 - root - INFO - step: 10780 loss: 2.2969 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0279 +[titan] 2025-10-05 05:12:34,428 - root - INFO - lr: 4.2792e-05 gnorm: 1.09 [ 6:38:25<17:59:56] +[titan] 2025-10-05 05:12:45,414 - root - INFO - step: 10785 loss: 2.3471 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 05:12:45,414 - root - INFO - lr: 4.2785e-05 gnorm: 1.13 [ 6:38:36<17:59:45] +[titan] 2025-10-05 05:12:56,296 - root - INFO - step: 10790 loss: 2.3752 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0968 +[titan] 2025-10-05 05:12:56,297 - root - INFO - lr: 4.2779e-05 gnorm: 1.12 [ 6:38:47<17:59:33] +[titan] 2025-10-05 05:13:07,167 - root - INFO - step: 10795 loss: 2.3683 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:13:07,167 - root - INFO - lr: 4.2772e-05 gnorm: 1.15 [ 6:38:57<17:59:22] +[titan] 2025-10-05 05:13:15,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:13:18,033 - root - INFO - step: 10800 loss: 2.3892 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1094 +[titan] 2025-10-05 05:13:18,033 - root - INFO - lr: 4.2765e-05 gnorm: 1.12 [ 6:39:08<17:59:10] +[titan] 2025-10-05 05:13:28,909 - root - INFO - step: 10805 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0879 +[titan] 2025-10-05 05:13:28,909 - root - INFO - lr: 4.2759e-05 gnorm: 1.13 [ 6:39:19<17:58:58] +[titan] 2025-10-05 05:13:39,766 - root - INFO - step: 10810 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 05:13:39,766 - root - INFO - lr: 4.2752e-05 gnorm: 1.11 [ 6:39:30<17:58:47] +[titan] 2025-10-05 05:13:50,697 - root - INFO - step: 10815 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1334 +[titan] 2025-10-05 05:13:50,697 - root - INFO - lr: 4.2746e-05 gnorm: 1.13 [ 6:39:41<17:58:35] +[titan] 2025-10-05 05:14:01,553 - root - INFO - step: 10820 loss: 2.3463 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:14:01,554 - root - INFO - lr: 4.2739e-05 gnorm: 1.09 [ 6:39:52<17:58:23] +[titan] 2025-10-05 05:14:12,442 - root - INFO - step: 10825 loss: 2.3705 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0925 +[titan] 2025-10-05 05:14:12,442 - root - INFO - lr: 4.2733e-05 gnorm: 1.17 [ 6:40:03<17:58:12] +[titan] 2025-10-05 05:14:23,285 - root - INFO - step: 10830 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:14:23,285 - root - INFO - lr: 4.2726e-05 gnorm: 1.13 [ 6:40:14<17:58:00] +[titan] 2025-10-05 05:14:34,165 - root - INFO - step: 10835 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 05:14:34,166 - root - INFO - lr: 4.2720e-05 gnorm: 1.16 [ 6:40:24<17:57:48] +[titan] 2025-10-05 05:14:45,051 - root - INFO - step: 10840 loss: 2.3728 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0952 +[titan] 2025-10-05 05:14:45,052 - root - INFO - lr: 4.2713e-05 gnorm: 1.13 [ 6:40:35<17:57:37] +[titan] 2025-10-05 05:14:55,878 - root - INFO - step: 10845 loss: 2.4128 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 05:14:55,878 - root - INFO - lr: 4.2706e-05 gnorm: 1.10 [ 6:40:46<17:57:25] +[titan] 2025-10-05 05:15:04,525 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:15:06,705 - root - INFO - step: 10850 loss: 2.3718 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:15:06,705 - root - INFO - lr: 4.2700e-05 gnorm: 1.12 [ 6:40:57<17:57:13] +[titan] 2025-10-05 05:15:17,575 - root - INFO - step: 10855 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0802 +[titan] 2025-10-05 05:15:17,575 - root - INFO - lr: 4.2693e-05 gnorm: 1.14 [ 6:41:08<17:57:02] +[titan] 2025-10-05 05:15:28,456 - root - INFO - step: 10860 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0820 +[titan] 2025-10-05 05:15:28,456 - root - INFO - lr: 4.2687e-05 gnorm: 1.13 [ 6:41:19<17:56:50] +[titan] 2025-10-05 05:15:39,313 - root - INFO - step: 10865 loss: 2.4256 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 05:15:39,313 - root - INFO - lr: 4.2680e-05 gnorm: 1.10 [ 6:41:30<17:56:38] +[titan] 2025-10-05 05:15:50,205 - root - INFO - step: 10870 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 05:15:50,205 - root - INFO - lr: 4.2673e-05 gnorm: 1.13 [ 6:41:41<17:56:27] +[titan] 2025-10-05 05:16:01,082 - root - INFO - step: 10875 loss: 2.3634 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:16:01,082 - root - INFO - lr: 4.2667e-05 gnorm: 1.15 [ 6:41:51<17:56:15] +[titan] 2025-10-05 05:16:11,946 - root - INFO - step: 10880 loss: 2.3075 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 05:16:11,946 - root - INFO - lr: 4.2660e-05 gnorm: 1.14 [ 6:42:02<17:56:03] +[titan] 2025-10-05 05:16:22,841 - root - INFO - step: 10885 loss: 2.4065 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1258 +[titan] 2025-10-05 05:16:22,841 - root - INFO - lr: 4.2654e-05 gnorm: 1.21 [ 6:42:13<17:55:52] +[titan] 2025-10-05 05:16:33,734 - root - INFO - step: 10890 loss: 2.3635 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0864 +[titan] 2025-10-05 05:16:33,734 - root - INFO - lr: 4.2647e-05 gnorm: 1.10 [ 6:42:24<17:55:40] +[titan] 2025-10-05 05:16:44,609 - root - INFO - step: 10895 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1223 +[titan] 2025-10-05 05:16:44,609 - root - INFO - lr: 4.2640e-05 gnorm: 1.11 [ 6:42:35<17:55:28] +[titan] 2025-10-05 05:16:53,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:16:55,473 - root - INFO - step: 10900 loss: 2.3494 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0759 +[titan] 2025-10-05 05:16:55,473 - root - INFO - lr: 4.2634e-05 gnorm: 1.15 [ 6:42:46<17:55:17] +[titan] 2025-10-05 05:17:06,345 - root - INFO - step: 10905 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 05:17:06,345 - root - INFO - lr: 4.2627e-05 gnorm: 1.13 [ 6:42:57<17:55:05] +[titan] 2025-10-05 05:17:17,231 - root - INFO - step: 10910 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0659 +[titan] 2025-10-05 05:17:17,231 - root - INFO - lr: 4.2621e-05 gnorm: 1.17 [ 6:43:08<17:54:54] +[titan] 2025-10-05 05:17:28,109 - root - INFO - step: 10915 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0894 +[titan] 2025-10-05 05:17:28,110 - root - INFO - lr: 4.2614e-05 gnorm: 1.19 [ 6:43:18<17:54:42] +[titan] 2025-10-05 05:17:39,014 - root - INFO - step: 10920 loss: 2.3277 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:17:39,014 - root - INFO - lr: 4.2607e-05 gnorm: 1.14 [ 6:43:29<17:54:30] +[titan] 2025-10-05 05:17:49,944 - root - INFO - step: 10925 loss: 2.3202 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0487 +[titan] 2025-10-05 05:17:49,944 - root - INFO - lr: 4.2601e-05 gnorm: 1.12 [ 6:43:40<17:54:19] +[titan] 2025-10-05 05:18:00,806 - root - INFO - step: 10930 loss: 2.3343 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0611 +[titan] 2025-10-05 05:18:00,807 - root - INFO - lr: 4.2594e-05 gnorm: 1.12 [ 6:43:51<17:54:07] +[titan] 2025-10-05 05:18:11,668 - root - INFO - step: 10935 loss: 2.4012 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1192 +[titan] 2025-10-05 05:18:11,669 - root - INFO - lr: 4.2588e-05 gnorm: 1.13 [ 6:44:02<17:53:55] +[titan] 2025-10-05 05:18:22,533 - root - INFO - step: 10940 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:18:22,533 - root - INFO - lr: 4.2581e-05 gnorm: 1.10 [ 6:44:13<17:53:44] +[titan] 2025-10-05 05:18:33,393 - root - INFO - step: 10945 loss: 2.3284 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0574 +[titan] 2025-10-05 05:18:33,393 - root - INFO - lr: 4.2574e-05 gnorm: 1.16 [ 6:44:24<17:53:32] +[titan] 2025-10-05 05:18:42,067 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:18:44,246 - root - INFO - step: 10950 loss: 2.3482 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0732 +[titan] 2025-10-05 05:18:44,246 - root - INFO - lr: 4.2568e-05 gnorm: 1.17 [ 6:44:35<17:53:20] +[titan] 2025-10-05 05:18:55,149 - root - INFO - step: 10955 loss: 2.4275 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 05:18:55,149 - root - INFO - lr: 4.2561e-05 gnorm: 1.19 [ 6:44:45<17:53:09] +[titan] 2025-10-05 05:19:06,006 - root - INFO - step: 10960 loss: 2.3559 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 05:19:06,006 - root - INFO - lr: 4.2554e-05 gnorm: 1.17 [ 6:44:56<17:52:57] +[titan] 2025-10-05 05:19:16,844 - root - INFO - step: 10965 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0690 +[titan] 2025-10-05 05:19:16,844 - root - INFO - lr: 4.2548e-05 gnorm: 1.13 [ 6:45:07<17:52:45] +[titan] 2025-10-05 05:19:27,707 - root - INFO - step: 10970 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0665 +[titan] 2025-10-05 05:19:27,707 - root - INFO - lr: 4.2541e-05 gnorm: 1.11 [ 6:45:18<17:52:34] +[titan] 2025-10-05 05:19:38,565 - root - INFO - step: 10975 loss: 2.4017 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1202 +[titan] 2025-10-05 05:19:38,565 - root - INFO - lr: 4.2535e-05 gnorm: 1.13 [ 6:45:29<17:52:22] +[titan] 2025-10-05 05:19:49,430 - root - INFO - step: 10980 loss: 2.3707 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0929 +[titan] 2025-10-05 05:19:49,430 - root - INFO - lr: 4.2528e-05 gnorm: 1.14 [ 6:45:40<17:52:10] +[titan] 2025-10-05 05:20:00,329 - root - INFO - step: 10985 loss: 2.3910 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 05:20:00,329 - root - INFO - lr: 4.2521e-05 gnorm: 1.11 [ 6:45:51<17:51:59] +[titan] 2025-10-05 05:20:11,199 - root - INFO - step: 10990 loss: 2.2943 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 05:20:11,199 - root - INFO - lr: 4.2515e-05 gnorm: 1.15 [ 6:46:01<17:51:47] +[titan] 2025-10-05 05:20:22,060 - root - INFO - step: 10995 loss: 2.4220 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1390 +[titan] 2025-10-05 05:20:22,060 - root - INFO - lr: 4.2508e-05 gnorm: 1.17 [ 6:46:12<17:51:36] +[titan] 2025-10-05 05:20:30,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:20:32,950 - root - INFO - step: 11000 loss: 2.4329 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 05:20:32,950 - root - INFO - lr: 4.2501e-05 gnorm: 1.13 [ 6:46:23<17:51:24] +[titan] 2025-10-05 05:20:43,793 - root - INFO - step: 11005 loss: 2.3674 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0912 +[titan] 2025-10-05 05:20:43,793 - root - INFO - lr: 4.2495e-05 gnorm: 1.13 [ 6:46:34<17:51:12] +[titan] 2025-10-05 05:20:54,676 - root - INFO - step: 11010 loss: 2.3859 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.1074 +[titan] 2025-10-05 05:20:54,677 - root - INFO - lr: 4.2488e-05 gnorm: 1.23 [ 6:46:45<17:51:01] +[titan] 2025-10-05 05:21:05,537 - root - INFO - step: 11015 loss: 2.4219 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 05:21:05,537 - root - INFO - lr: 4.2481e-05 gnorm: 1.14 [ 6:46:56<17:50:49] +[titan] 2025-10-05 05:21:16,444 - root - INFO - step: 11020 loss: 2.3693 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0921 +[titan] 2025-10-05 05:21:16,445 - root - INFO - lr: 4.2475e-05 gnorm: 1.15 [ 6:47:07<17:50:37] +[titan] 2025-10-05 05:21:27,322 - root - INFO - step: 11025 loss: 2.4120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1303 +[titan] 2025-10-05 05:21:27,323 - root - INFO - lr: 4.2468e-05 gnorm: 1.14 [ 6:47:18<17:50:26] +[titan] 2025-10-05 05:21:38,201 - root - INFO - step: 11030 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2721 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:21:38,202 - root - INFO - lr: 4.2461e-05 gnorm: 1.11 [ 6:47:28<17:50:14] +[titan] 2025-10-05 05:21:49,263 - root - INFO - step: 11035 loss: 2.3662 memory: 118.84GiB(85.28%) tps: 29,623 tflops: 410.98 mfu: 41.55% global_avg_ntp_loss: 0.2773 global_avg_mtp_loss: 2.0889 +[titan] 2025-10-05 05:21:49,264 - root - INFO - lr: 4.2455e-05 gnorm: 1.06 [ 6:47:40<17:50:03] +[titan] 2025-10-05 05:22:00,112 - root - INFO - step: 11040 loss: 2.3713 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0938 +[titan] 2025-10-05 05:22:00,112 - root - INFO - lr: 4.2448e-05 gnorm: 1.16 [ 6:47:50<17:49:51] +[titan] 2025-10-05 05:22:10,978 - root - INFO - step: 11045 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0794 +[titan] 2025-10-05 05:22:10,978 - root - INFO - lr: 4.2441e-05 gnorm: 1.12 [ 6:48:01<17:49:40] +[titan] 2025-10-05 05:22:19,675 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:22:21,866 - root - INFO - step: 11050 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:22:21,866 - root - INFO - lr: 4.2435e-05 gnorm: 1.18 [ 6:48:12<17:49:28] +[titan] 2025-10-05 05:22:32,725 - root - INFO - step: 11055 loss: 2.4619 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1734 +[titan] 2025-10-05 05:22:32,725 - root - INFO - lr: 4.2428e-05 gnorm: 1.17 [ 6:48:23<17:49:16] +[titan] 2025-10-05 05:22:43,603 - root - INFO - step: 11060 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 05:22:43,604 - root - INFO - lr: 4.2421e-05 gnorm: 1.18 [ 6:48:34<17:49:05] +[titan] 2025-10-05 05:22:54,557 - root - INFO - step: 11065 loss: 2.3059 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0363 +[titan] 2025-10-05 05:22:54,558 - root - INFO - lr: 4.2415e-05 gnorm: 1.11 [ 6:48:45<17:48:53] +[titan] 2025-10-05 05:23:05,447 - root - INFO - step: 11070 loss: 2.3833 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1048 +[titan] 2025-10-05 05:23:05,447 - root - INFO - lr: 4.2408e-05 gnorm: 1.15 [ 6:48:56<17:48:42] +[titan] 2025-10-05 05:23:16,319 - root - INFO - step: 11075 loss: 2.3472 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:23:16,319 - root - INFO - lr: 4.2401e-05 gnorm: 1.12 [ 6:49:07<17:48:30] +[titan] 2025-10-05 05:23:27,231 - root - INFO - step: 11080 loss: 2.3159 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0451 +[titan] 2025-10-05 05:23:27,231 - root - INFO - lr: 4.2395e-05 gnorm: 1.15 [ 6:49:18<17:48:19] +[titan] 2025-10-05 05:23:38,120 - root - INFO - step: 11085 loss: 2.3918 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 05:23:38,120 - root - INFO - lr: 4.2388e-05 gnorm: 1.10 [ 6:49:28<17:48:07] +[titan] 2025-10-05 05:23:48,999 - root - INFO - step: 11090 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:23:48,999 - root - INFO - lr: 4.2381e-05 gnorm: 1.12 [ 6:49:39<17:47:55] +[titan] 2025-10-05 05:23:59,936 - root - INFO - step: 11095 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:23:59,936 - root - INFO - lr: 4.2375e-05 gnorm: 1.15 [ 6:49:50<17:47:44] +[titan] 2025-10-05 05:24:08,638 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:24:10,828 - root - INFO - step: 11100 loss: 2.3700 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:24:10,828 - root - INFO - lr: 4.2368e-05 gnorm: 1.16 [ 6:50:01<17:47:32] +[titan] 2025-10-05 05:24:21,716 - root - INFO - step: 11105 loss: 2.3080 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 05:24:21,716 - root - INFO - lr: 4.2361e-05 gnorm: 1.11 [ 6:50:12<17:47:21] +[titan] 2025-10-05 05:24:32,602 - root - INFO - step: 11110 loss: 2.3389 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0656 +[titan] 2025-10-05 05:24:32,602 - root - INFO - lr: 4.2354e-05 gnorm: 1.18 [ 6:50:23<17:47:09] +[titan] 2025-10-05 05:24:43,497 - root - INFO - step: 11115 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:24:43,498 - root - INFO - lr: 4.2348e-05 gnorm: 1.16 [ 6:50:34<17:46:58] +[titan] 2025-10-05 05:24:54,382 - root - INFO - step: 11120 loss: 2.3434 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0676 +[titan] 2025-10-05 05:24:54,382 - root - INFO - lr: 4.2341e-05 gnorm: 1.17 [ 6:50:45<17:46:46] +[titan] 2025-10-05 05:25:05,236 - root - INFO - step: 11125 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:25:05,236 - root - INFO - lr: 4.2334e-05 gnorm: 1.14 [ 6:50:56<17:46:34] +[titan] 2025-10-05 05:25:16,090 - root - INFO - step: 11130 loss: 2.3586 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0834 +[titan] 2025-10-05 05:25:16,091 - root - INFO - lr: 4.2328e-05 gnorm: 1.10 [ 6:51:06<17:46:23] +[titan] 2025-10-05 05:25:26,938 - root - INFO - step: 11135 loss: 2.3923 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 05:25:26,939 - root - INFO - lr: 4.2321e-05 gnorm: 1.15 [ 6:51:17<17:46:11] +[titan] 2025-10-05 05:25:37,783 - root - INFO - step: 11140 loss: 2.3864 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 05:25:37,783 - root - INFO - lr: 4.2314e-05 gnorm: 1.15 [ 6:51:28<17:45:59] +[titan] 2025-10-05 05:25:48,642 - root - INFO - step: 11145 loss: 2.3257 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0532 +[titan] 2025-10-05 05:25:48,642 - root - INFO - lr: 4.2307e-05 gnorm: 1.12 [ 6:51:39<17:45:48] +[titan] 2025-10-05 05:25:57,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:25:59,544 - root - INFO - step: 11150 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0565 +[titan] 2025-10-05 05:25:59,544 - root - INFO - lr: 4.2301e-05 gnorm: 1.12 [ 6:51:50<17:45:36] +[titan] 2025-10-05 05:26:10,397 - root - INFO - step: 11155 loss: 2.3187 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0478 +[titan] 2025-10-05 05:26:10,397 - root - INFO - lr: 4.2294e-05 gnorm: 1.08 [ 6:52:01<17:45:24] +[titan] 2025-10-05 05:26:21,273 - root - INFO - step: 11160 loss: 2.3623 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0869 +[titan] 2025-10-05 05:26:21,273 - root - INFO - lr: 4.2287e-05 gnorm: 1.14 [ 6:52:12<17:45:13] +[titan] 2025-10-05 05:26:32,142 - root - INFO - step: 11165 loss: 2.3541 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:26:32,142 - root - INFO - lr: 4.2281e-05 gnorm: 1.13 [ 6:52:22<17:45:01] +[titan] 2025-10-05 05:26:43,035 - root - INFO - step: 11170 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 05:26:43,035 - root - INFO - lr: 4.2274e-05 gnorm: 1.13 [ 6:52:33<17:44:50] +[titan] 2025-10-05 05:26:53,989 - root - INFO - step: 11175 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.04 mfu: 41.97% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 05:26:53,989 - root - INFO - lr: 4.2267e-05 gnorm: 1.12 [ 6:52:44<17:44:38] +[titan] 2025-10-05 05:27:04,880 - root - INFO - step: 11180 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0666 +[titan] 2025-10-05 05:27:04,880 - root - INFO - lr: 4.2260e-05 gnorm: 1.19 [ 6:52:55<17:44:27] +[titan] 2025-10-05 05:27:15,757 - root - INFO - step: 11185 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0699 +[titan] 2025-10-05 05:27:15,757 - root - INFO - lr: 4.2254e-05 gnorm: 1.15 [ 6:53:06<17:44:15] +[titan] 2025-10-05 05:27:26,622 - root - INFO - step: 11190 loss: 2.3961 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1162 +[titan] 2025-10-05 05:27:26,622 - root - INFO - lr: 4.2247e-05 gnorm: 1.10 [ 6:53:17<17:44:03] +[titan] 2025-10-05 05:27:37,484 - root - INFO - step: 11195 loss: 2.3721 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 05:27:37,484 - root - INFO - lr: 4.2240e-05 gnorm: 1.15 [ 6:53:28<17:43:52] +[titan] 2025-10-05 05:27:46,182 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:27:48,372 - root - INFO - step: 11200 loss: 2.3645 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:27:48,372 - root - INFO - lr: 4.2233e-05 gnorm: 1.17 [ 6:53:39<17:43:40] +[titan] 2025-10-05 05:27:59,307 - root - INFO - step: 11205 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:27:59,307 - root - INFO - lr: 4.2227e-05 gnorm: 1.09 [ 6:53:50<17:43:29] +[titan] 2025-10-05 05:28:10,176 - root - INFO - step: 11210 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0464 +[titan] 2025-10-05 05:28:10,176 - root - INFO - lr: 4.2220e-05 gnorm: 1.15 [ 6:54:00<17:43:17] +[titan] 2025-10-05 05:28:21,076 - root - INFO - step: 11215 loss: 2.3354 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 05:28:21,076 - root - INFO - lr: 4.2213e-05 gnorm: 1.14 [ 6:54:11<17:43:06] +[titan] 2025-10-05 05:28:31,935 - root - INFO - step: 11220 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0592 +[titan] 2025-10-05 05:28:31,935 - root - INFO - lr: 4.2206e-05 gnorm: 1.10 [ 6:54:22<17:42:54] +[titan] 2025-10-05 05:28:42,804 - root - INFO - step: 11225 loss: 2.2877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 05:28:42,805 - root - INFO - lr: 4.2200e-05 gnorm: 1.15 [ 6:54:33<17:42:42] +[titan] 2025-10-05 05:28:53,662 - root - INFO - step: 11230 loss: 2.3995 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 05:28:53,662 - root - INFO - lr: 4.2193e-05 gnorm: 1.17 [ 6:54:44<17:42:31] +[titan] 2025-10-05 05:29:04,634 - root - INFO - step: 11235 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 05:29:04,634 - root - INFO - lr: 4.2186e-05 gnorm: 1.17 [ 6:54:55<17:42:19] +[titan] 2025-10-05 05:29:15,534 - root - INFO - step: 11240 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0842 +[titan] 2025-10-05 05:29:15,535 - root - INFO - lr: 4.2179e-05 gnorm: 1.12 [ 6:55:06<17:42:08] +[titan] 2025-10-05 05:29:26,383 - root - INFO - step: 11245 loss: 2.3641 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0871 +[titan] 2025-10-05 05:29:26,383 - root - INFO - lr: 4.2173e-05 gnorm: 1.08 [ 6:55:17<17:41:56] +[titan] 2025-10-05 05:29:35,041 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:29:37,225 - root - INFO - step: 11250 loss: 2.3893 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 05:29:37,226 - root - INFO - lr: 4.2166e-05 gnorm: 1.11 [ 6:55:27<17:41:44] +[titan] 2025-10-05 05:29:48,080 - root - INFO - step: 11255 loss: 2.3315 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0589 +[titan] 2025-10-05 05:29:48,080 - root - INFO - lr: 4.2159e-05 gnorm: 1.15 [ 6:55:38<17:41:33] +[titan] 2025-10-05 05:29:58,912 - root - INFO - step: 11260 loss: 2.3790 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1000 +[titan] 2025-10-05 05:29:58,912 - root - INFO - lr: 4.2152e-05 gnorm: 1.11 [ 6:55:49<17:41:21] +[titan] 2025-10-05 05:30:07,826 - root - INFO - Dumping profiler traces at step 11264 +[titan] 2025-10-05 05:30:07,864 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:30:10,050 - root - INFO - step: 11265 loss: 2.2811 memory: 118.84GiB(85.28%) tps: 29,420 tflops: 408.16 mfu: 41.27% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 05:30:10,051 - root - INFO - lr: 4.2146e-05 gnorm: 1.10 [ 6:56:00<17:41:10] +[titan] 2025-10-05 05:30:20,892 - root - INFO - step: 11270 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0809 +[titan] 2025-10-05 05:30:20,892 - root - INFO - lr: 4.2139e-05 gnorm: 1.12 [ 6:56:11<17:40:58] +[titan] 2025-10-05 05:30:31,735 - root - INFO - step: 11275 loss: 2.3738 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0947 +[titan] 2025-10-05 05:30:31,735 - root - INFO - lr: 4.2132e-05 gnorm: 1.10 [ 6:56:22<17:40:47] +[titan] 2025-10-05 05:30:42,574 - root - INFO - step: 11280 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 05:30:42,574 - root - INFO - lr: 4.2125e-05 gnorm: 1.10 [ 6:56:33<17:40:35] +[titan] 2025-10-05 05:30:53,425 - root - INFO - step: 11285 loss: 2.3915 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1121 +[titan] 2025-10-05 05:30:53,426 - root - INFO - lr: 4.2118e-05 gnorm: 1.14 [ 6:56:44<17:40:23] +[titan] 2025-10-05 05:31:04,306 - root - INFO - step: 11290 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1164 +[titan] 2025-10-05 05:31:04,307 - root - INFO - lr: 4.2112e-05 gnorm: 1.16 [ 6:56:55<17:40:12] +[titan] 2025-10-05 05:31:15,165 - root - INFO - step: 11295 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 05:31:15,165 - root - INFO - lr: 4.2105e-05 gnorm: 1.16 [ 6:57:05<17:40:00] +[titan] 2025-10-05 05:31:23,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:31:26,028 - root - INFO - step: 11300 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1297 +[titan] 2025-10-05 05:31:26,028 - root - INFO - lr: 4.2098e-05 gnorm: 1.16 [ 6:57:16<17:39:49] +[titan] 2025-10-05 05:31:36,890 - root - INFO - step: 11305 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 05:31:36,890 - root - INFO - lr: 4.2091e-05 gnorm: 1.19 [ 6:57:27<17:39:37] +[titan] 2025-10-05 05:31:47,751 - root - INFO - step: 11310 loss: 2.3629 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0867 +[titan] 2025-10-05 05:31:47,751 - root - INFO - lr: 4.2084e-05 gnorm: 1.13 [ 6:57:38<17:39:25] +[titan] 2025-10-05 05:31:58,646 - root - INFO - step: 11315 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0333 +[titan] 2025-10-05 05:31:58,646 - root - INFO - lr: 4.2078e-05 gnorm: 1.14 [ 6:57:49<17:39:14] +[titan] 2025-10-05 05:32:09,512 - root - INFO - step: 11320 loss: 2.4605 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 05:32:09,512 - root - INFO - lr: 4.2071e-05 gnorm: 1.15 [ 6:58:00<17:39:02] +[titan] 2025-10-05 05:32:20,392 - root - INFO - step: 11325 loss: 2.3568 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0807 +[titan] 2025-10-05 05:32:20,392 - root - INFO - lr: 4.2064e-05 gnorm: 1.12 [ 6:58:11<17:38:51] +[titan] 2025-10-05 05:32:31,290 - root - INFO - step: 11330 loss: 2.4028 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1208 +[titan] 2025-10-05 05:32:31,290 - root - INFO - lr: 4.2057e-05 gnorm: 1.14 [ 6:58:22<17:38:39] +[titan] 2025-10-05 05:32:42,174 - root - INFO - step: 11335 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:32:42,174 - root - INFO - lr: 4.2050e-05 gnorm: 1.16 [ 6:58:32<17:38:27] +[titan] 2025-10-05 05:32:53,063 - root - INFO - step: 11340 loss: 2.3303 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0571 +[titan] 2025-10-05 05:32:53,064 - root - INFO - lr: 4.2044e-05 gnorm: 1.10 [ 6:58:43<17:38:16] +[titan] 2025-10-05 05:33:03,971 - root - INFO - step: 11345 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.1089 +[titan] 2025-10-05 05:33:03,972 - root - INFO - lr: 4.2037e-05 gnorm: 1.10 [ 6:58:54<17:38:04] +[titan] 2025-10-05 05:33:12,662 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:33:14,854 - root - INFO - step: 11350 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:33:14,855 - root - INFO - lr: 4.2030e-05 gnorm: 1.16 [ 6:59:05<17:37:53] +[titan] 2025-10-05 05:33:25,725 - root - INFO - step: 11355 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:33:25,725 - root - INFO - lr: 4.2023e-05 gnorm: 1.14 [ 6:59:16<17:37:41] +[titan] 2025-10-05 05:33:36,578 - root - INFO - step: 11360 loss: 2.2858 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0185 +[titan] 2025-10-05 05:33:36,578 - root - INFO - lr: 4.2016e-05 gnorm: 1.08 [ 6:59:27<17:37:30] +[titan] 2025-10-05 05:33:47,452 - root - INFO - step: 11365 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:33:47,452 - root - INFO - lr: 4.2010e-05 gnorm: 1.07 [ 6:59:38<17:37:18] +[titan] 2025-10-05 05:33:58,347 - root - INFO - step: 11370 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0475 +[titan] 2025-10-05 05:33:58,347 - root - INFO - lr: 4.2003e-05 gnorm: 1.09 [ 6:59:49<17:37:06] +[titan] 2025-10-05 05:34:09,277 - root - INFO - step: 11375 loss: 2.4178 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1340 +[titan] 2025-10-05 05:34:09,277 - root - INFO - lr: 4.1996e-05 gnorm: 1.13 [ 7:00:00<17:36:55] +[titan] 2025-10-05 05:34:20,157 - root - INFO - step: 11380 loss: 2.3349 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:34:20,157 - root - INFO - lr: 4.1989e-05 gnorm: 1.18 [ 7:00:10<17:36:43] +[titan] 2025-10-05 05:34:31,049 - root - INFO - step: 11385 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:34:31,049 - root - INFO - lr: 4.1982e-05 gnorm: 1.10 [ 7:00:21<17:36:32] +[titan] 2025-10-05 05:34:41,929 - root - INFO - step: 11390 loss: 2.4099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1279 +[titan] 2025-10-05 05:34:41,929 - root - INFO - lr: 4.1975e-05 gnorm: 1.10 [ 7:00:32<17:36:20] +[titan] 2025-10-05 05:34:52,785 - root - INFO - step: 11395 loss: 2.3564 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:34:52,785 - root - INFO - lr: 4.1969e-05 gnorm: 1.15 [ 7:00:43<17:36:09] +[titan] 2025-10-05 05:35:01,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:35:03,690 - root - INFO - step: 11400 loss: 2.4143 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1315 +[titan] 2025-10-05 05:35:03,690 - root - INFO - lr: 4.1962e-05 gnorm: 1.14 [ 7:00:54<17:35:57] +[titan] 2025-10-05 05:35:14,535 - root - INFO - step: 11405 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 05:35:14,536 - root - INFO - lr: 4.1955e-05 gnorm: 1.17 [ 7:01:05<17:35:45] +[titan] 2025-10-05 05:35:25,412 - root - INFO - step: 11410 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0477 +[titan] 2025-10-05 05:35:25,412 - root - INFO - lr: 4.1948e-05 gnorm: 1.13 [ 7:01:16<17:35:34] +[titan] 2025-10-05 05:35:36,263 - root - INFO - step: 11415 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0351 +[titan] 2025-10-05 05:35:36,263 - root - INFO - lr: 4.1941e-05 gnorm: 1.12 [ 7:01:27<17:35:22] +[titan] 2025-10-05 05:35:47,122 - root - INFO - step: 11420 loss: 2.3875 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 05:35:47,122 - root - INFO - lr: 4.1934e-05 gnorm: 1.14 [ 7:01:37<17:35:11] +[titan] 2025-10-05 05:35:57,974 - root - INFO - step: 11425 loss: 2.3552 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0795 +[titan] 2025-10-05 05:35:57,974 - root - INFO - lr: 4.1928e-05 gnorm: 1.13 [ 7:01:48<17:34:59] +[titan] 2025-10-05 05:36:08,849 - root - INFO - step: 11430 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 05:36:08,849 - root - INFO - lr: 4.1921e-05 gnorm: 1.17 [ 7:01:59<17:34:47] +[titan] 2025-10-05 05:36:19,695 - root - INFO - step: 11435 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:36:19,695 - root - INFO - lr: 4.1914e-05 gnorm: 1.16 [ 7:02:10<17:34:36] +[titan] 2025-10-05 05:36:30,564 - root - INFO - step: 11440 loss: 2.3449 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0705 +[titan] 2025-10-05 05:36:30,564 - root - INFO - lr: 4.1907e-05 gnorm: 1.08 [ 7:02:21<17:34:24] +[titan] 2025-10-05 05:36:41,427 - root - INFO - step: 11445 loss: 2.4403 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1539 +[titan] 2025-10-05 05:36:41,427 - root - INFO - lr: 4.1900e-05 gnorm: 1.15 [ 7:02:32<17:34:13] +[titan] 2025-10-05 05:36:50,091 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:36:52,270 - root - INFO - step: 11450 loss: 2.3496 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:36:52,270 - root - INFO - lr: 4.1893e-05 gnorm: 1.14 [ 7:02:43<17:34:01] +[titan] 2025-10-05 05:37:03,144 - root - INFO - step: 11455 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 05:37:03,145 - root - INFO - lr: 4.1886e-05 gnorm: 1.13 [ 7:02:53<17:33:49] +[titan] 2025-10-05 05:37:13,972 - root - INFO - step: 11460 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 05:37:13,973 - root - INFO - lr: 4.1880e-05 gnorm: 1.13 [ 7:03:04<17:33:38] +[titan] 2025-10-05 05:37:24,845 - root - INFO - step: 11465 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0606 +[titan] 2025-10-05 05:37:24,845 - root - INFO - lr: 4.1873e-05 gnorm: 1.16 [ 7:03:15<17:33:26] +[titan] 2025-10-05 05:37:35,703 - root - INFO - step: 11470 loss: 2.3317 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0582 +[titan] 2025-10-05 05:37:35,703 - root - INFO - lr: 4.1866e-05 gnorm: 1.10 [ 7:03:26<17:33:14] +[titan] 2025-10-05 05:37:46,570 - root - INFO - step: 11475 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 05:37:46,570 - root - INFO - lr: 4.1859e-05 gnorm: 1.18 [ 7:03:37<17:33:03] +[titan] 2025-10-05 05:37:57,446 - root - INFO - step: 11480 loss: 2.3142 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0433 +[titan] 2025-10-05 05:37:57,447 - root - INFO - lr: 4.1852e-05 gnorm: 1.10 [ 7:03:48<17:32:51] +[titan] 2025-10-05 05:38:08,329 - root - INFO - step: 11485 loss: 2.3042 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0343 +[titan] 2025-10-05 05:38:08,329 - root - INFO - lr: 4.1845e-05 gnorm: 1.16 [ 7:03:59<17:32:40] +[titan] 2025-10-05 05:38:19,195 - root - INFO - step: 11490 loss: 2.4232 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1402 +[titan] 2025-10-05 05:38:19,195 - root - INFO - lr: 4.1838e-05 gnorm: 1.17 [ 7:04:09<17:32:28] +[titan] 2025-10-05 05:38:30,073 - root - INFO - step: 11495 loss: 2.3563 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0808 +[titan] 2025-10-05 05:38:30,073 - root - INFO - lr: 4.1831e-05 gnorm: 1.12 [ 7:04:20<17:32:17] +[titan] 2025-10-05 05:38:38,740 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:38:40,929 - root - INFO - step: 11500 loss: 2.3519 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0760 +[titan] 2025-10-05 05:38:40,929 - root - INFO - lr: 4.1825e-05 gnorm: 1.09 [ 7:04:31<17:32:05] +[titan] 2025-10-05 05:38:51,791 - root - INFO - step: 11505 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 05:38:51,792 - root - INFO - lr: 4.1818e-05 gnorm: 1.18 [ 7:04:42<17:31:53] +[titan] 2025-10-05 05:39:02,689 - root - INFO - step: 11510 loss: 2.3200 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0488 +[titan] 2025-10-05 05:39:02,689 - root - INFO - lr: 4.1811e-05 gnorm: 1.13 [ 7:04:53<17:31:42] +[titan] 2025-10-05 05:39:13,585 - root - INFO - step: 11515 loss: 2.4548 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1671 +[titan] 2025-10-05 05:39:13,586 - root - INFO - lr: 4.1804e-05 gnorm: 1.13 [ 7:05:04<17:31:30] +[titan] 2025-10-05 05:39:24,449 - root - INFO - step: 11520 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0252 +[titan] 2025-10-05 05:39:24,449 - root - INFO - lr: 4.1797e-05 gnorm: 1.15 [ 7:05:15<17:31:19] +[titan] 2025-10-05 05:39:35,295 - root - INFO - step: 11525 loss: 2.2866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 05:39:35,295 - root - INFO - lr: 4.1790e-05 gnorm: 1.07 [ 7:05:26<17:31:07] +[titan] 2025-10-05 05:39:46,183 - root - INFO - step: 11530 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0847 +[titan] 2025-10-05 05:39:46,183 - root - INFO - lr: 4.1783e-05 gnorm: 1.14 [ 7:05:36<17:30:56] +[titan] 2025-10-05 05:39:57,043 - root - INFO - step: 11535 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 05:39:57,043 - root - INFO - lr: 4.1776e-05 gnorm: 1.14 [ 7:05:47<17:30:44] +[titan] 2025-10-05 05:40:07,934 - root - INFO - step: 11540 loss: 2.3581 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0821 +[titan] 2025-10-05 05:40:07,934 - root - INFO - lr: 4.1769e-05 gnorm: 1.10 [ 7:05:58<17:30:32] +[titan] 2025-10-05 05:40:18,821 - root - INFO - step: 11545 loss: 2.4229 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:40:18,821 - root - INFO - lr: 4.1763e-05 gnorm: 1.15 [ 7:06:09<17:30:21] +[titan] 2025-10-05 05:40:27,478 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:40:29,677 - root - INFO - step: 11550 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0971 +[titan] 2025-10-05 05:40:29,677 - root - INFO - lr: 4.1756e-05 gnorm: 1.16 [ 7:06:20<17:30:09] +[titan] 2025-10-05 05:40:40,531 - root - INFO - step: 11555 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0225 +[titan] 2025-10-05 05:40:40,531 - root - INFO - lr: 4.1749e-05 gnorm: 1.07 [ 7:06:31<17:29:58] +[titan] 2025-10-05 05:40:51,372 - root - INFO - step: 11560 loss: 2.3640 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 05:40:51,372 - root - INFO - lr: 4.1742e-05 gnorm: 1.13 [ 7:06:42<17:29:46] +[titan] 2025-10-05 05:41:02,211 - root - INFO - step: 11565 loss: 2.3067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0371 +[titan] 2025-10-05 05:41:02,211 - root - INFO - lr: 4.1735e-05 gnorm: 1.09 [ 7:06:52<17:29:34] +[titan] 2025-10-05 05:41:13,062 - root - INFO - step: 11570 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:41:13,062 - root - INFO - lr: 4.1728e-05 gnorm: 1.08 [ 7:07:03<17:29:23] +[titan] 2025-10-05 05:41:23,914 - root - INFO - step: 11575 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 05:41:23,914 - root - INFO - lr: 4.1721e-05 gnorm: 1.11 [ 7:07:14<17:29:11] +[titan] 2025-10-05 05:41:34,780 - root - INFO - step: 11580 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0896 +[titan] 2025-10-05 05:41:34,780 - root - INFO - lr: 4.1714e-05 gnorm: 1.16 [ 7:07:25<17:29:00] +[titan] 2025-10-05 05:41:45,632 - root - INFO - step: 11585 loss: 2.3149 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0440 +[titan] 2025-10-05 05:41:45,632 - root - INFO - lr: 4.1707e-05 gnorm: 1.12 [ 7:07:36<17:28:48] +[titan] 2025-10-05 05:41:56,483 - root - INFO - step: 11590 loss: 2.2891 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0207 +[titan] 2025-10-05 05:41:56,483 - root - INFO - lr: 4.1700e-05 gnorm: 1.11 [ 7:07:47<17:28:36] +[titan] 2025-10-05 05:42:07,367 - root - INFO - step: 11595 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 05:42:07,367 - root - INFO - lr: 4.1693e-05 gnorm: 1.09 [ 7:07:58<17:28:25] +[titan] 2025-10-05 05:42:16,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:42:18,229 - root - INFO - step: 11600 loss: 2.3596 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0839 +[titan] 2025-10-05 05:42:18,229 - root - INFO - lr: 4.1686e-05 gnorm: 1.13 [ 7:08:08<17:28:13] +[titan] 2025-10-05 05:42:29,091 - root - INFO - step: 11605 loss: 2.3723 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0941 +[titan] 2025-10-05 05:42:29,091 - root - INFO - lr: 4.1680e-05 gnorm: 1.11 [ 7:08:19<17:28:02] +[titan] 2025-10-05 05:42:39,944 - root - INFO - step: 11610 loss: 2.3331 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0601 +[titan] 2025-10-05 05:42:39,944 - root - INFO - lr: 4.1673e-05 gnorm: 1.12 [ 7:08:30<17:27:50] +[titan] 2025-10-05 05:42:50,809 - root - INFO - step: 11615 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0661 +[titan] 2025-10-05 05:42:50,809 - root - INFO - lr: 4.1666e-05 gnorm: 1.14 [ 7:08:41<17:27:38] +[titan] 2025-10-05 05:43:01,660 - root - INFO - step: 11620 loss: 2.3817 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1022 +[titan] 2025-10-05 05:43:01,660 - root - INFO - lr: 4.1659e-05 gnorm: 1.16 [ 7:08:52<17:27:27] +[titan] 2025-10-05 05:43:12,542 - root - INFO - step: 11625 loss: 2.3129 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0422 +[titan] 2025-10-05 05:43:12,542 - root - INFO - lr: 4.1652e-05 gnorm: 1.15 [ 7:09:03<17:27:15] +[titan] 2025-10-05 05:43:23,381 - root - INFO - step: 11630 loss: 2.3032 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 05:43:23,381 - root - INFO - lr: 4.1645e-05 gnorm: 1.17 [ 7:09:14<17:27:04] +[titan] 2025-10-05 05:43:34,203 - root - INFO - step: 11635 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0697 +[titan] 2025-10-05 05:43:34,203 - root - INFO - lr: 4.1638e-05 gnorm: 1.17 [ 7:09:24<17:26:52] +[titan] 2025-10-05 05:43:45,042 - root - INFO - step: 11640 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0979 +[titan] 2025-10-05 05:43:45,042 - root - INFO - lr: 4.1631e-05 gnorm: 1.09 [ 7:09:35<17:26:40] +[titan] 2025-10-05 05:43:55,889 - root - INFO - step: 11645 loss: 2.3366 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 05:43:55,889 - root - INFO - lr: 4.1624e-05 gnorm: 1.12 [ 7:09:46<17:26:29] +[titan] 2025-10-05 05:44:04,549 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:44:06,731 - root - INFO - step: 11650 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0964 +[titan] 2025-10-05 05:44:06,731 - root - INFO - lr: 4.1617e-05 gnorm: 1.13 [ 7:09:57<17:26:17] +[titan] 2025-10-05 05:44:17,623 - root - INFO - step: 11655 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0423 +[titan] 2025-10-05 05:44:17,623 - root - INFO - lr: 4.1610e-05 gnorm: 1.16 [ 7:10:08<17:26:05] +[titan] 2025-10-05 05:44:28,491 - root - INFO - step: 11660 loss: 2.3791 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 05:44:28,491 - root - INFO - lr: 4.1603e-05 gnorm: 1.14 [ 7:10:19<17:25:54] +[titan] 2025-10-05 05:44:39,349 - root - INFO - step: 11665 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0347 +[titan] 2025-10-05 05:44:39,349 - root - INFO - lr: 4.1596e-05 gnorm: 1.14 [ 7:10:30<17:25:42] +[titan] 2025-10-05 05:44:50,212 - root - INFO - step: 11670 loss: 2.2728 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0088 +[titan] 2025-10-05 05:44:50,212 - root - INFO - lr: 4.1589e-05 gnorm: 1.12 [ 7:10:40<17:25:31] +[titan] 2025-10-05 05:45:01,081 - root - INFO - step: 11675 loss: 2.3589 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:45:01,081 - root - INFO - lr: 4.1582e-05 gnorm: 1.11 [ 7:10:51<17:25:19] +[titan] 2025-10-05 05:45:11,965 - root - INFO - step: 11680 loss: 2.3297 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0575 +[titan] 2025-10-05 05:45:11,965 - root - INFO - lr: 4.1575e-05 gnorm: 1.10 [ 7:11:02<17:25:08] +[titan] 2025-10-05 05:45:22,811 - root - INFO - step: 11685 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0746 +[titan] 2025-10-05 05:45:22,811 - root - INFO - lr: 4.1568e-05 gnorm: 1.11 [ 7:11:13<17:24:56] +[titan] 2025-10-05 05:45:33,673 - root - INFO - step: 11690 loss: 2.3753 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2787 global_avg_mtp_loss: 2.0966 +[titan] 2025-10-05 05:45:33,674 - root - INFO - lr: 4.1561e-05 gnorm: 1.10 [ 7:11:24<17:24:44] +[titan] 2025-10-05 05:45:44,536 - root - INFO - step: 11695 loss: 2.3906 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1103 +[titan] 2025-10-05 05:45:44,537 - root - INFO - lr: 4.1554e-05 gnorm: 1.11 [ 7:11:35<17:24:33] +[titan] 2025-10-05 05:45:53,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:45:55,410 - root - INFO - step: 11700 loss: 2.3089 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 05:45:55,411 - root - INFO - lr: 4.1547e-05 gnorm: 1.16 [ 7:11:46<17:24:21] +[titan] 2025-10-05 05:46:06,262 - root - INFO - step: 11705 loss: 2.3134 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 05:46:06,262 - root - INFO - lr: 4.1540e-05 gnorm: 1.11 [ 7:11:56<17:24:10] +[titan] 2025-10-05 05:46:17,130 - root - INFO - step: 11710 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:46:17,130 - root - INFO - lr: 4.1534e-05 gnorm: 1.07 [ 7:12:07<17:23:58] +[titan] 2025-10-05 05:46:27,969 - root - INFO - step: 11715 loss: 2.3153 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0455 +[titan] 2025-10-05 05:46:27,969 - root - INFO - lr: 4.1527e-05 gnorm: 1.10 [ 7:12:18<17:23:47] +[titan] 2025-10-05 05:46:38,818 - root - INFO - step: 11720 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1170 +[titan] 2025-10-05 05:46:38,818 - root - INFO - lr: 4.1520e-05 gnorm: 1.16 [ 7:12:29<17:23:35] +[titan] 2025-10-05 05:46:49,675 - root - INFO - step: 11725 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0144 +[titan] 2025-10-05 05:46:49,675 - root - INFO - lr: 4.1513e-05 gnorm: 1.16 [ 7:12:40<17:23:23] +[titan] 2025-10-05 05:47:00,544 - root - INFO - step: 11730 loss: 2.4145 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1312 +[titan] 2025-10-05 05:47:00,544 - root - INFO - lr: 4.1506e-05 gnorm: 1.10 [ 7:12:51<17:23:12] +[titan] 2025-10-05 05:47:11,419 - root - INFO - step: 11735 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0736 +[titan] 2025-10-05 05:47:11,419 - root - INFO - lr: 4.1499e-05 gnorm: 1.08 [ 7:13:02<17:23:00] +[titan] 2025-10-05 05:47:22,265 - root - INFO - step: 11740 loss: 2.3154 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 05:47:22,265 - root - INFO - lr: 4.1492e-05 gnorm: 1.11 [ 7:13:12<17:22:49] +[titan] 2025-10-05 05:47:33,131 - root - INFO - step: 11745 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 05:47:33,131 - root - INFO - lr: 4.1485e-05 gnorm: 1.13 [ 7:13:23<17:22:37] +[titan] 2025-10-05 05:47:41,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:47:43,985 - root - INFO - step: 11750 loss: 2.3279 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0557 +[titan] 2025-10-05 05:47:43,985 - root - INFO - lr: 4.1478e-05 gnorm: 1.13 [ 7:13:34<17:22:25] +[titan] 2025-10-05 05:47:54,868 - root - INFO - step: 11755 loss: 2.3253 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0524 +[titan] 2025-10-05 05:47:54,869 - root - INFO - lr: 4.1471e-05 gnorm: 1.15 [ 7:13:45<17:22:14] +[titan] 2025-10-05 05:48:05,705 - root - INFO - step: 11760 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 05:48:05,705 - root - INFO - lr: 4.1464e-05 gnorm: 1.11 [ 7:13:56<17:22:02] +[titan] 2025-10-05 05:48:16,588 - root - INFO - step: 11765 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0673 +[titan] 2025-10-05 05:48:16,588 - root - INFO - lr: 4.1457e-05 gnorm: 1.08 [ 7:14:07<17:21:51] +[titan] 2025-10-05 05:48:27,456 - root - INFO - step: 11770 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:48:27,456 - root - INFO - lr: 4.1450e-05 gnorm: 1.13 [ 7:14:18<17:21:39] +[titan] 2025-10-05 05:48:38,410 - root - INFO - step: 11775 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 29,914 tflops: 415.01 mfu: 41.96% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 05:48:38,411 - root - INFO - lr: 4.1443e-05 gnorm: 1.12 [ 7:14:29<17:21:28] +[titan] 2025-10-05 05:48:40,781 - root - INFO - Dumping profiler traces at step 11776 +[titan] 2025-10-05 05:48:40,818 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:48:49,532 - root - INFO - step: 11780 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0553 +[titan] 2025-10-05 05:48:49,532 - root - INFO - lr: 4.1436e-05 gnorm: 1.10 [ 7:14:40<17:21:17] +[titan] 2025-10-05 05:49:00,425 - root - INFO - step: 11785 loss: 2.3316 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0583 +[titan] 2025-10-05 05:49:00,425 - root - INFO - lr: 4.1429e-05 gnorm: 1.11 [ 7:14:51<17:21:05] +[titan] 2025-10-05 05:49:11,301 - root - INFO - step: 11790 loss: 2.2637 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 05:49:11,301 - root - INFO - lr: 4.1422e-05 gnorm: 1.08 [ 7:15:02<17:20:54] +[titan] 2025-10-05 05:49:22,173 - root - INFO - step: 11795 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1151 +[titan] 2025-10-05 05:49:22,173 - root - INFO - lr: 4.1415e-05 gnorm: 1.13 [ 7:15:12<17:20:42] +[titan] 2025-10-05 05:49:30,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:49:33,049 - root - INFO - step: 11800 loss: 2.3168 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0453 +[titan] 2025-10-05 05:49:33,050 - root - INFO - lr: 4.1408e-05 gnorm: 1.14 [ 7:15:23<17:20:31] +[titan] 2025-10-05 05:49:43,908 - root - INFO - step: 11805 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:49:43,909 - root - INFO - lr: 4.1401e-05 gnorm: 1.11 [ 7:15:34<17:20:19] +[titan] 2025-10-05 05:49:54,777 - root - INFO - step: 11810 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 05:49:54,777 - root - INFO - lr: 4.1394e-05 gnorm: 1.14 [ 7:15:45<17:20:08] +[titan] 2025-10-05 05:50:05,641 - root - INFO - step: 11815 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:50:05,641 - root - INFO - lr: 4.1387e-05 gnorm: 1.10 [ 7:15:56<17:19:56] +[titan] 2025-10-05 05:50:16,549 - root - INFO - step: 11820 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 05:50:16,549 - root - INFO - lr: 4.1379e-05 gnorm: 1.14 [ 7:16:07<17:19:45] +[titan] 2025-10-05 05:50:27,410 - root - INFO - step: 11825 loss: 2.3545 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:50:27,410 - root - INFO - lr: 4.1372e-05 gnorm: 1.11 [ 7:16:18<17:19:33] +[titan] 2025-10-05 05:50:38,296 - root - INFO - step: 11830 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 05:50:38,296 - root - INFO - lr: 4.1365e-05 gnorm: 1.17 [ 7:16:29<17:19:22] +[titan] 2025-10-05 05:50:49,183 - root - INFO - step: 11835 loss: 2.4085 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1264 +[titan] 2025-10-05 05:50:49,183 - root - INFO - lr: 4.1358e-05 gnorm: 1.12 [ 7:16:39<17:19:10] +[titan] 2025-10-05 05:51:00,086 - root - INFO - step: 11840 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 05:51:00,086 - root - INFO - lr: 4.1351e-05 gnorm: 1.11 [ 7:16:50<17:18:59] +[titan] 2025-10-05 05:51:10,957 - root - INFO - step: 11845 loss: 2.3242 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0501 +[titan] 2025-10-05 05:51:10,957 - root - INFO - lr: 4.1344e-05 gnorm: 1.08 [ 7:17:01<17:18:47] +[titan] 2025-10-05 05:51:19,706 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:51:21,898 - root - INFO - step: 11850 loss: 2.3518 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0767 +[titan] 2025-10-05 05:51:21,898 - root - INFO - lr: 4.1337e-05 gnorm: 1.12 [ 7:17:12<17:18:36] +[titan] 2025-10-05 05:51:32,790 - root - INFO - step: 11855 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 05:51:32,790 - root - INFO - lr: 4.1330e-05 gnorm: 1.12 [ 7:17:23<17:18:24] +[titan] 2025-10-05 05:51:43,664 - root - INFO - step: 11860 loss: 2.3095 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 05:51:43,664 - root - INFO - lr: 4.1323e-05 gnorm: 1.18 [ 7:17:34<17:18:13] +[titan] 2025-10-05 05:51:54,563 - root - INFO - step: 11865 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0318 +[titan] 2025-10-05 05:51:54,563 - root - INFO - lr: 4.1316e-05 gnorm: 1.09 [ 7:17:45<17:18:01] +[titan] 2025-10-05 05:52:05,455 - root - INFO - step: 11870 loss: 2.3710 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0936 +[titan] 2025-10-05 05:52:05,455 - root - INFO - lr: 4.1309e-05 gnorm: 1.11 [ 7:17:56<17:17:50] +[titan] 2025-10-05 05:52:16,379 - root - INFO - step: 11875 loss: 2.3659 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0895 +[titan] 2025-10-05 05:52:16,379 - root - INFO - lr: 4.1302e-05 gnorm: 1.15 [ 7:18:07<17:17:38] +[titan] 2025-10-05 05:52:27,265 - root - INFO - step: 11880 loss: 2.4011 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1135 +[titan] 2025-10-05 05:52:27,265 - root - INFO - lr: 4.1295e-05 gnorm: 3.35 [ 7:18:17<17:17:27] +[titan] 2025-10-05 05:52:38,136 - root - INFO - step: 11885 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0521 +[titan] 2025-10-05 05:52:38,137 - root - INFO - lr: 4.1288e-05 gnorm: 1.14 [ 7:18:28<17:17:15] +[titan] 2025-10-05 05:52:49,001 - root - INFO - step: 11890 loss: 2.3415 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0669 +[titan] 2025-10-05 05:52:49,001 - root - INFO - lr: 4.1281e-05 gnorm: 1.11 [ 7:18:39<17:17:04] +[titan] 2025-10-05 05:52:59,880 - root - INFO - step: 11895 loss: 2.3264 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2720 global_avg_mtp_loss: 2.0545 +[titan] 2025-10-05 05:52:59,880 - root - INFO - lr: 4.1274e-05 gnorm: 1.12 [ 7:18:50<17:16:52] +[titan] 2025-10-05 05:53:08,562 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:53:10,763 - root - INFO - step: 11900 loss: 2.2583 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9932 +[titan] 2025-10-05 05:53:10,763 - root - INFO - lr: 4.1267e-05 gnorm: 1.12 [ 7:19:01<17:16:41] +[titan] 2025-10-05 05:53:21,692 - root - INFO - step: 11905 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 05:53:21,692 - root - INFO - lr: 4.1260e-05 gnorm: 1.14 [ 7:19:12<17:16:29] +[titan] 2025-10-05 05:53:32,550 - root - INFO - step: 11910 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:53:32,550 - root - INFO - lr: 4.1253e-05 gnorm: 1.07 [ 7:19:23<17:16:18] +[titan] 2025-10-05 05:53:43,445 - root - INFO - step: 11915 loss: 2.3927 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:53:43,445 - root - INFO - lr: 4.1246e-05 gnorm: 1.12 [ 7:19:34<17:16:06] +[titan] 2025-10-05 05:53:54,326 - root - INFO - step: 11920 loss: 2.4016 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:53:54,326 - root - INFO - lr: 4.1239e-05 gnorm: 1.11 [ 7:19:45<17:15:55] +[titan] 2025-10-05 05:54:05,201 - root - INFO - step: 11925 loss: 2.3896 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 05:54:05,201 - root - INFO - lr: 4.1232e-05 gnorm: 1.10 [ 7:19:55<17:15:43] +[titan] 2025-10-05 05:54:16,091 - root - INFO - step: 11930 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:54:16,091 - root - INFO - lr: 4.1224e-05 gnorm: 1.18 [ 7:20:06<17:15:32] +[titan] 2025-10-05 05:54:27,039 - root - INFO - step: 11935 loss: 2.3186 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 05:54:27,039 - root - INFO - lr: 4.1217e-05 gnorm: 1.13 [ 7:20:17<17:15:20] +[titan] 2025-10-05 05:54:37,903 - root - INFO - step: 11940 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1239 +[titan] 2025-10-05 05:54:37,903 - root - INFO - lr: 4.1210e-05 gnorm: 1.14 [ 7:20:28<17:15:09] +[titan] 2025-10-05 05:54:48,775 - root - INFO - step: 11945 loss: 2.3374 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:54:48,775 - root - INFO - lr: 4.1203e-05 gnorm: 1.16 [ 7:20:39<17:14:57] +[titan] 2025-10-05 05:54:57,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:54:59,643 - root - INFO - step: 11950 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0633 +[titan] 2025-10-05 05:54:59,643 - root - INFO - lr: 4.1196e-05 gnorm: 1.10 [ 7:20:50<17:14:46] +[titan] 2025-10-05 05:55:10,528 - root - INFO - step: 11955 loss: 2.3258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 05:55:10,528 - root - INFO - lr: 4.1189e-05 gnorm: 1.08 [ 7:21:01<17:14:34] +[titan] 2025-10-05 05:55:21,455 - root - INFO - step: 11960 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:55:21,456 - root - INFO - lr: 4.1182e-05 gnorm: 1.12 [ 7:21:12<17:14:23] +[titan] 2025-10-05 05:55:32,338 - root - INFO - step: 11965 loss: 2.3022 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 05:55:32,338 - root - INFO - lr: 4.1175e-05 gnorm: 1.06 [ 7:21:23<17:14:11] +[titan] 2025-10-05 05:55:43,237 - root - INFO - step: 11970 loss: 2.3819 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 05:55:43,238 - root - INFO - lr: 4.1168e-05 gnorm: 1.11 [ 7:21:33<17:14:00] +[titan] 2025-10-05 05:55:54,122 - root - INFO - step: 11975 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0302 +[titan] 2025-10-05 05:55:54,122 - root - INFO - lr: 4.1161e-05 gnorm: 1.07 [ 7:21:44<17:13:49] +[titan] 2025-10-05 05:56:04,989 - root - INFO - step: 11980 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:56:04,989 - root - INFO - lr: 4.1154e-05 gnorm: 1.08 [ 7:21:55<17:13:37] +[titan] 2025-10-05 05:56:15,876 - root - INFO - step: 11985 loss: 2.3487 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:56:15,876 - root - INFO - lr: 4.1147e-05 gnorm: 1.11 [ 7:22:06<17:13:26] +[titan] 2025-10-05 05:56:26,799 - root - INFO - step: 11990 loss: 2.3624 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 05:56:26,799 - root - INFO - lr: 4.1139e-05 gnorm: 1.07 [ 7:22:17<17:13:14] +[titan] 2025-10-05 05:56:37,664 - root - INFO - step: 11995 loss: 2.3352 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:56:37,664 - root - INFO - lr: 4.1132e-05 gnorm: 1.15 [ 7:22:28<17:13:03] +[titan] 2025-10-05 05:56:46,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:56:48,559 - root - INFO - step: 12000 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0434 +[titan] 2025-10-05 05:56:48,559 - root - INFO - lr: 4.1125e-05 gnorm: 1.15 [ 7:22:39<17:12:51] +[titan] 2025-10-05 05:56:59,430 - root - INFO - step: 12005 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0683 +[titan] 2025-10-05 05:56:59,431 - root - INFO - lr: 4.1118e-05 gnorm: 1.12 [ 7:22:50<17:12:40] +[titan] 2025-10-05 05:57:10,327 - root - INFO - step: 12010 loss: 2.3294 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0572 +[titan] 2025-10-05 05:57:10,327 - root - INFO - lr: 4.1111e-05 gnorm: 1.11 [ 7:23:01<17:12:28] +[titan] 2025-10-05 05:57:21,254 - root - INFO - step: 12015 loss: 2.3689 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:57:21,254 - root - INFO - lr: 4.1104e-05 gnorm: 1.08 [ 7:23:11<17:12:17] +[titan] 2025-10-05 05:57:32,120 - root - INFO - step: 12020 loss: 2.3542 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0781 +[titan] 2025-10-05 05:57:32,120 - root - INFO - lr: 4.1097e-05 gnorm: 1.08 [ 7:23:22<17:12:05] +[titan] 2025-10-05 05:57:43,004 - root - INFO - step: 12025 loss: 2.3233 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:57:43,004 - root - INFO - lr: 4.1090e-05 gnorm: 1.13 [ 7:23:33<17:11:54] +[titan] 2025-10-05 05:57:53,894 - root - INFO - step: 12030 loss: 2.3526 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:57:53,895 - root - INFO - lr: 4.1083e-05 gnorm: 1.09 [ 7:23:44<17:11:42] +[titan] 2025-10-05 05:58:04,763 - root - INFO - step: 12035 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 05:58:04,764 - root - INFO - lr: 4.1075e-05 gnorm: 1.11 [ 7:23:55<17:11:31] +[titan] 2025-10-05 05:58:15,655 - root - INFO - step: 12040 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 05:58:15,656 - root - INFO - lr: 4.1068e-05 gnorm: 1.13 [ 7:24:06<17:11:19] +[titan] 2025-10-05 05:58:26,581 - root - INFO - step: 12045 loss: 2.2551 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9914 +[titan] 2025-10-05 05:58:26,582 - root - INFO - lr: 4.1061e-05 gnorm: 1.10 [ 7:24:17<17:11:08] +[titan] 2025-10-05 05:58:35,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:58:37,445 - root - INFO - step: 12050 loss: 2.2791 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 05:58:37,445 - root - INFO - lr: 4.1054e-05 gnorm: 1.12 [ 7:24:28<17:10:56] +[titan] 2025-10-05 05:58:48,333 - root - INFO - step: 12055 loss: 2.3027 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0322 +[titan] 2025-10-05 05:58:48,334 - root - INFO - lr: 4.1047e-05 gnorm: 1.09 [ 7:24:39<17:10:45] +[titan] 2025-10-05 05:58:59,215 - root - INFO - step: 12060 loss: 2.3599 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:58:59,215 - root - INFO - lr: 4.1040e-05 gnorm: 1.13 [ 7:24:49<17:10:33] +[titan] 2025-10-05 05:59:10,066 - root - INFO - step: 12065 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 05:59:10,066 - root - INFO - lr: 4.1033e-05 gnorm: 1.14 [ 7:25:00<17:10:22] +[titan] 2025-10-05 05:59:20,922 - root - INFO - step: 12070 loss: 2.3313 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:59:20,923 - root - INFO - lr: 4.1026e-05 gnorm: 1.12 [ 7:25:11<17:10:10] +[titan] 2025-10-05 05:59:31,844 - root - INFO - step: 12075 loss: 2.4140 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 05:59:31,844 - root - INFO - lr: 4.1018e-05 gnorm: 1.14 [ 7:25:22<17:09:59] +[titan] 2025-10-05 05:59:42,686 - root - INFO - step: 12080 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0618 +[titan] 2025-10-05 05:59:42,686 - root - INFO - lr: 4.1011e-05 gnorm: 1.13 [ 7:25:33<17:09:47] +[titan] 2025-10-05 05:59:53,539 - root - INFO - step: 12085 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0449 +[titan] 2025-10-05 05:59:53,540 - root - INFO - lr: 4.1004e-05 gnorm: 1.11 [ 7:25:44<17:09:36] +[titan] 2025-10-05 06:00:04,392 - root - INFO - step: 12090 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0005 +[titan] 2025-10-05 06:00:04,392 - root - INFO - lr: 4.0997e-05 gnorm: 1.08 [ 7:25:55<17:09:24] +[titan] 2025-10-05 06:00:15,254 - root - INFO - step: 12095 loss: 2.3576 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0822 +[titan] 2025-10-05 06:00:15,254 - root - INFO - lr: 4.0990e-05 gnorm: 1.07 [ 7:26:05<17:09:13] +[titan] 2025-10-05 06:00:23,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:00:26,169 - root - INFO - step: 12100 loss: 2.3299 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0570 +[titan] 2025-10-05 06:00:26,169 - root - INFO - lr: 4.0983e-05 gnorm: 1.12 [ 7:26:16<17:09:01] +[titan] 2025-10-05 06:00:37,019 - root - INFO - step: 12105 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 06:00:37,019 - root - INFO - lr: 4.0976e-05 gnorm: 1.10 [ 7:26:27<17:08:50] +[titan] 2025-10-05 06:00:47,875 - root - INFO - step: 12110 loss: 2.3109 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0407 +[titan] 2025-10-05 06:00:47,875 - root - INFO - lr: 4.0968e-05 gnorm: 1.14 [ 7:26:38<17:08:38] +[titan] 2025-10-05 06:00:58,710 - root - INFO - step: 12115 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0231 +[titan] 2025-10-05 06:00:58,710 - root - INFO - lr: 4.0961e-05 gnorm: 1.09 [ 7:26:49<17:08:26] +[titan] 2025-10-05 06:01:09,539 - root - INFO - step: 12120 loss: 2.3227 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0502 +[titan] 2025-10-05 06:01:09,539 - root - INFO - lr: 4.0954e-05 gnorm: 1.11 [ 7:27:00<17:08:15] +[titan] 2025-10-05 06:01:20,374 - root - INFO - step: 12125 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 06:01:20,374 - root - INFO - lr: 4.0947e-05 gnorm: 1.07 [ 7:27:11<17:08:03] +[titan] 2025-10-05 06:01:31,270 - root - INFO - step: 12130 loss: 2.2677 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0025 +[titan] 2025-10-05 06:01:31,270 - root - INFO - lr: 4.0940e-05 gnorm: 1.31 [ 7:27:21<17:07:52] +[titan] 2025-10-05 06:01:42,106 - root - INFO - step: 12135 loss: 2.2796 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:01:42,106 - root - INFO - lr: 4.0933e-05 gnorm: 1.13 [ 7:27:32<17:07:40] +[titan] 2025-10-05 06:01:52,949 - root - INFO - step: 12140 loss: 2.3222 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:01:52,949 - root - INFO - lr: 4.0926e-05 gnorm: 1.09 [ 7:27:43<17:07:29] +[titan] 2025-10-05 06:02:03,787 - root - INFO - step: 12145 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:02:03,787 - root - INFO - lr: 4.0918e-05 gnorm: 1.12 [ 7:27:54<17:07:17] +[titan] 2025-10-05 06:02:12,468 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:02:14,649 - root - INFO - step: 12150 loss: 2.3633 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2765 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 06:02:14,649 - root - INFO - lr: 4.0911e-05 gnorm: 1.10 [ 7:28:05<17:07:06] +[titan] 2025-10-05 06:02:25,544 - root - INFO - step: 12155 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 06:02:25,544 - root - INFO - lr: 4.0904e-05 gnorm: 1.08 [ 7:28:16<17:06:54] +[titan] 2025-10-05 06:02:36,407 - root - INFO - step: 12160 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:02:36,407 - root - INFO - lr: 4.0897e-05 gnorm: 1.12 [ 7:28:27<17:06:43] +[titan] 2025-10-05 06:02:47,265 - root - INFO - step: 12165 loss: 2.3191 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:02:47,265 - root - INFO - lr: 4.0890e-05 gnorm: 1.13 [ 7:28:37<17:06:31] +[titan] 2025-10-05 06:02:58,124 - root - INFO - step: 12170 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0270 +[titan] 2025-10-05 06:02:58,124 - root - INFO - lr: 4.0883e-05 gnorm: 1.13 [ 7:28:48<17:06:19] +[titan] 2025-10-05 06:03:08,999 - root - INFO - step: 12175 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 06:03:08,999 - root - INFO - lr: 4.0875e-05 gnorm: 1.10 [ 7:28:59<17:06:08] +[titan] 2025-10-05 06:03:19,864 - root - INFO - step: 12180 loss: 2.3860 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1055 +[titan] 2025-10-05 06:03:19,864 - root - INFO - lr: 4.0868e-05 gnorm: 1.08 [ 7:29:10<17:05:56] +[titan] 2025-10-05 06:03:30,733 - root - INFO - step: 12185 loss: 2.2786 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 06:03:30,733 - root - INFO - lr: 4.0861e-05 gnorm: 1.09 [ 7:29:21<17:05:45] +[titan] 2025-10-05 06:03:41,601 - root - INFO - step: 12190 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 06:03:41,601 - root - INFO - lr: 4.0854e-05 gnorm: 1.13 [ 7:29:32<17:05:33] +[titan] 2025-10-05 06:03:52,503 - root - INFO - step: 12195 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9906 +[titan] 2025-10-05 06:03:52,503 - root - INFO - lr: 4.0847e-05 gnorm: 1.13 [ 7:29:43<17:05:22] +[titan] 2025-10-05 06:04:01,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:04:03,365 - root - INFO - step: 12200 loss: 2.3747 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0972 +[titan] 2025-10-05 06:04:03,365 - root - INFO - lr: 4.0839e-05 gnorm: 1.12 [ 7:29:54<17:05:11] +[titan] 2025-10-05 06:04:14,208 - root - INFO - step: 12205 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0049 +[titan] 2025-10-05 06:04:14,208 - root - INFO - lr: 4.0832e-05 gnorm: 1.10 [ 7:30:04<17:04:59] +[titan] 2025-10-05 06:04:25,065 - root - INFO - step: 12210 loss: 2.3060 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:04:25,065 - root - INFO - lr: 4.0825e-05 gnorm: 1.06 [ 7:30:15<17:04:47] +[titan] 2025-10-05 06:04:35,929 - root - INFO - step: 12215 loss: 2.2793 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 06:04:35,929 - root - INFO - lr: 4.0818e-05 gnorm: 1.04 [ 7:30:26<17:04:36] +[titan] 2025-10-05 06:04:46,809 - root - INFO - step: 12220 loss: 2.3271 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 06:04:46,809 - root - INFO - lr: 4.0811e-05 gnorm: 1.14 [ 7:30:37<17:04:24] +[titan] 2025-10-05 06:04:57,691 - root - INFO - step: 12225 loss: 2.2624 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9984 +[titan] 2025-10-05 06:04:57,691 - root - INFO - lr: 4.0803e-05 gnorm: 1.17 [ 7:30:48<17:04:13] +[titan] 2025-10-05 06:05:08,549 - root - INFO - step: 12230 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 06:05:08,549 - root - INFO - lr: 4.0796e-05 gnorm: 1.09 [ 7:30:59<17:04:01] +[titan] 2025-10-05 06:05:19,441 - root - INFO - step: 12235 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0739 +[titan] 2025-10-05 06:05:19,441 - root - INFO - lr: 4.0789e-05 gnorm: 1.10 [ 7:31:10<17:03:50] +[titan] 2025-10-05 06:05:30,318 - root - INFO - step: 12240 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 06:05:30,318 - root - INFO - lr: 4.0782e-05 gnorm: 1.09 [ 7:31:20<17:03:38] +[titan] 2025-10-05 06:05:41,191 - root - INFO - step: 12245 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 06:05:41,191 - root - INFO - lr: 4.0775e-05 gnorm: 1.08 [ 7:31:31<17:03:27] +[titan] 2025-10-05 06:05:49,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:05:52,078 - root - INFO - step: 12250 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:05:52,079 - root - INFO - lr: 4.0767e-05 gnorm: 1.17 [ 7:31:42<17:03:16] +[titan] 2025-10-05 06:06:02,966 - root - INFO - step: 12255 loss: 2.3830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 06:06:02,966 - root - INFO - lr: 4.0760e-05 gnorm: 1.12 [ 7:31:53<17:03:04] +[titan] 2025-10-05 06:06:13,829 - root - INFO - step: 12260 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9702 +[titan] 2025-10-05 06:06:13,829 - root - INFO - lr: 4.0753e-05 gnorm: 1.10 [ 7:32:04<17:02:53] +[titan] 2025-10-05 06:06:24,716 - root - INFO - step: 12265 loss: 2.3897 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1102 +[titan] 2025-10-05 06:06:24,716 - root - INFO - lr: 4.0746e-05 gnorm: 1.13 [ 7:32:15<17:02:41] +[titan] 2025-10-05 06:06:35,605 - root - INFO - step: 12270 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0629 +[titan] 2025-10-05 06:06:35,605 - root - INFO - lr: 4.0739e-05 gnorm: 1.15 [ 7:32:26<17:02:30] +[titan] 2025-10-05 06:06:46,502 - root - INFO - step: 12275 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 06:06:46,502 - root - INFO - lr: 4.0731e-05 gnorm: 1.17 [ 7:32:37<17:02:18] +[titan] 2025-10-05 06:06:57,383 - root - INFO - step: 12280 loss: 2.3419 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0674 +[titan] 2025-10-05 06:06:57,383 - root - INFO - lr: 4.0724e-05 gnorm: 1.16 [ 7:32:48<17:02:07] +[titan] 2025-10-05 06:07:08,352 - root - INFO - step: 12285 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.44 mfu: 41.91% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 06:07:08,353 - root - INFO - lr: 4.0717e-05 gnorm: 1.14 [ 7:32:59<17:01:55] +[titan] 2025-10-05 06:07:15,077 - root - INFO - Dumping profiler traces at step 12288 +[titan] 2025-10-05 06:07:15,117 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:07:19,500 - root - INFO - step: 12290 loss: 2.3565 memory: 118.84GiB(85.28%) tps: 29,395 tflops: 407.81 mfu: 41.23% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:07:19,500 - root - INFO - lr: 4.0710e-05 gnorm: 1.08 [ 7:33:10<17:01:45] +[titan] 2025-10-05 06:07:30,465 - root - INFO - step: 12295 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 29,886 tflops: 414.62 mfu: 41.92% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 06:07:30,465 - root - INFO - lr: 4.0702e-05 gnorm: 1.07 [ 7:33:21<17:01:33] +[titan] 2025-10-05 06:07:39,154 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:07:41,347 - root - INFO - step: 12300 loss: 2.3244 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:07:41,347 - root - INFO - lr: 4.0695e-05 gnorm: 1.16 [ 7:33:32<17:01:22] +[titan] 2025-10-05 06:07:52,196 - root - INFO - step: 12305 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 06:07:52,196 - root - INFO - lr: 4.0688e-05 gnorm: 1.09 [ 7:33:42<17:01:10] +[titan] 2025-10-05 06:08:03,050 - root - INFO - step: 12310 loss: 2.3555 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:08:03,050 - root - INFO - lr: 4.0681e-05 gnorm: 1.12 [ 7:33:53<17:00:59] +[titan] 2025-10-05 06:08:13,913 - root - INFO - step: 12315 loss: 2.3066 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0365 +[titan] 2025-10-05 06:08:13,914 - root - INFO - lr: 4.0674e-05 gnorm: 1.08 [ 7:34:04<17:00:47] +[titan] 2025-10-05 06:08:24,841 - root - INFO - step: 12320 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0745 +[titan] 2025-10-05 06:08:24,842 - root - INFO - lr: 4.0666e-05 gnorm: 1.11 [ 7:34:15<17:00:36] +[titan] 2025-10-05 06:08:35,938 - root - INFO - step: 12325 loss: 2.4352 memory: 118.84GiB(85.28%) tps: 29,531 tflops: 409.69 mfu: 41.42% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1501 +[titan] 2025-10-05 06:08:35,938 - root - INFO - lr: 4.0659e-05 gnorm: 1.15 [ 7:34:26<17:00:25] +[titan] 2025-10-05 06:08:46,800 - root - INFO - step: 12330 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:08:46,800 - root - INFO - lr: 4.0652e-05 gnorm: 1.09 [ 7:34:37<17:00:13] +[titan] 2025-10-05 06:08:57,665 - root - INFO - step: 12335 loss: 2.3478 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 06:08:57,665 - root - INFO - lr: 4.0645e-05 gnorm: 1.09 [ 7:34:48<17:00:02] +[titan] 2025-10-05 06:09:08,538 - root - INFO - step: 12340 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0577 +[titan] 2025-10-05 06:09:08,539 - root - INFO - lr: 4.0637e-05 gnorm: 1.13 [ 7:34:59<16:59:50] +[titan] 2025-10-05 06:09:19,441 - root - INFO - step: 12345 loss: 2.3988 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1189 +[titan] 2025-10-05 06:09:19,441 - root - INFO - lr: 4.0630e-05 gnorm: 1.13 [ 7:35:10<16:59:39] +[titan] 2025-10-05 06:09:28,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:09:30,418 - root - INFO - step: 12350 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.17 mfu: 41.88% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0382 +[titan] 2025-10-05 06:09:30,418 - root - INFO - lr: 4.0623e-05 gnorm: 1.12 [ 7:35:21<16:59:28] +[titan] 2025-10-05 06:09:41,340 - root - INFO - step: 12355 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0087 +[titan] 2025-10-05 06:09:41,340 - root - INFO - lr: 4.0616e-05 gnorm: 1.16 [ 7:35:32<16:59:16] +[titan] 2025-10-05 06:09:52,209 - root - INFO - step: 12360 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0389 +[titan] 2025-10-05 06:09:52,209 - root - INFO - lr: 4.0608e-05 gnorm: 1.09 [ 7:35:42<16:59:05] +[titan] 2025-10-05 06:10:03,072 - root - INFO - step: 12365 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 06:10:03,073 - root - INFO - lr: 4.0601e-05 gnorm: 1.09 [ 7:35:53<16:58:53] +[titan] 2025-10-05 06:10:13,928 - root - INFO - step: 12370 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 06:10:13,928 - root - INFO - lr: 4.0594e-05 gnorm: 1.09 [ 7:36:04<16:58:42] +[titan] 2025-10-05 06:10:24,802 - root - INFO - step: 12375 loss: 2.3408 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:10:24,803 - root - INFO - lr: 4.0587e-05 gnorm: 1.10 [ 7:36:15<16:58:30] +[titan] 2025-10-05 06:10:35,777 - root - INFO - step: 12380 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 29,858 tflops: 414.24 mfu: 41.88% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:10:35,777 - root - INFO - lr: 4.0579e-05 gnorm: 1.08 [ 7:36:26<16:58:19] +[titan] 2025-10-05 06:10:46,648 - root - INFO - step: 12385 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:10:46,649 - root - INFO - lr: 4.0572e-05 gnorm: 1.13 [ 7:36:37<16:58:08] +[titan] 2025-10-05 06:10:57,506 - root - INFO - step: 12390 loss: 2.3730 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 06:10:57,506 - root - INFO - lr: 4.0565e-05 gnorm: 1.14 [ 7:36:48<16:57:56] +[titan] 2025-10-05 06:11:08,373 - root - INFO - step: 12395 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:11:08,373 - root - INFO - lr: 4.0558e-05 gnorm: 1.06 [ 7:36:59<16:57:45] +[titan] 2025-10-05 06:11:17,061 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:11:19,239 - root - INFO - step: 12400 loss: 2.3820 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 06:11:19,240 - root - INFO - lr: 4.0550e-05 gnorm: 1.12 [ 7:37:09<16:57:33] +[titan] 2025-10-05 06:11:30,093 - root - INFO - step: 12405 loss: 2.3346 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0597 +[titan] 2025-10-05 06:11:30,094 - root - INFO - lr: 4.0543e-05 gnorm: 1.09 [ 7:37:20<16:57:22] +[titan] 2025-10-05 06:11:41,037 - root - INFO - step: 12410 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0232 +[titan] 2025-10-05 06:11:41,037 - root - INFO - lr: 4.0536e-05 gnorm: 1.14 [ 7:37:31<16:57:10] +[titan] 2025-10-05 06:11:51,926 - root - INFO - step: 12415 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0550 +[titan] 2025-10-05 06:11:51,926 - root - INFO - lr: 4.0528e-05 gnorm: 1.18 [ 7:37:42<16:56:59] +[titan] 2025-10-05 06:12:02,805 - root - INFO - step: 12420 loss: 2.3265 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 06:12:02,806 - root - INFO - lr: 4.0521e-05 gnorm: 1.08 [ 7:37:53<16:56:47] +[titan] 2025-10-05 06:12:13,684 - root - INFO - step: 12425 loss: 2.3185 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0481 +[titan] 2025-10-05 06:12:13,684 - root - INFO - lr: 4.0514e-05 gnorm: 1.14 [ 7:38:04<16:56:36] +[titan] 2025-10-05 06:12:24,578 - root - INFO - step: 12430 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:12:24,578 - root - INFO - lr: 4.0507e-05 gnorm: 1.12 [ 7:38:15<16:56:25] +[titan] 2025-10-05 06:12:35,467 - root - INFO - step: 12435 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0765 +[titan] 2025-10-05 06:12:35,468 - root - INFO - lr: 4.0499e-05 gnorm: 1.10 [ 7:38:26<16:56:13] +[titan] 2025-10-05 06:12:46,337 - root - INFO - step: 12440 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:12:46,337 - root - INFO - lr: 4.0492e-05 gnorm: 1.07 [ 7:38:36<16:56:02] +[titan] 2025-10-05 06:12:57,242 - root - INFO - step: 12445 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 06:12:57,242 - root - INFO - lr: 4.0485e-05 gnorm: 1.08 [ 7:38:47<16:55:50] +[titan] 2025-10-05 06:13:05,952 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:13:08,132 - root - INFO - step: 12450 loss: 2.3232 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0520 +[titan] 2025-10-05 06:13:08,132 - root - INFO - lr: 4.0477e-05 gnorm: 1.09 [ 7:38:58<16:55:39] +[titan] 2025-10-05 06:13:19,019 - root - INFO - step: 12455 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:13:19,019 - root - INFO - lr: 4.0470e-05 gnorm: 1.08 [ 7:39:09<16:55:27] +[titan] 2025-10-05 06:13:29,895 - root - INFO - step: 12460 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:13:29,895 - root - INFO - lr: 4.0463e-05 gnorm: 1.12 [ 7:39:20<16:55:16] +[titan] 2025-10-05 06:13:40,820 - root - INFO - step: 12465 loss: 2.3135 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0425 +[titan] 2025-10-05 06:13:40,820 - root - INFO - lr: 4.0456e-05 gnorm: 1.11 [ 7:39:31<16:55:04] +[titan] 2025-10-05 06:13:51,710 - root - INFO - step: 12470 loss: 2.3792 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 06:13:51,710 - root - INFO - lr: 4.0448e-05 gnorm: 1.07 [ 7:39:42<16:54:53] +[titan] 2025-10-05 06:14:02,592 - root - INFO - step: 12475 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0962 +[titan] 2025-10-05 06:14:02,592 - root - INFO - lr: 4.0441e-05 gnorm: 1.11 [ 7:39:53<16:54:42] +[titan] 2025-10-05 06:14:13,496 - root - INFO - step: 12480 loss: 2.2332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9712 +[titan] 2025-10-05 06:14:13,497 - root - INFO - lr: 4.0434e-05 gnorm: 1.08 [ 7:40:04<16:54:30] +[titan] 2025-10-05 06:14:24,366 - root - INFO - step: 12485 loss: 2.3235 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 06:14:24,366 - root - INFO - lr: 4.0426e-05 gnorm: 1.11 [ 7:40:15<16:54:19] +[titan] 2025-10-05 06:14:35,268 - root - INFO - step: 12490 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0610 +[titan] 2025-10-05 06:14:35,269 - root - INFO - lr: 4.0419e-05 gnorm: 1.09 [ 7:40:25<16:54:07] +[titan] 2025-10-05 06:14:46,143 - root - INFO - step: 12495 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 06:14:46,144 - root - INFO - lr: 4.0412e-05 gnorm: 1.12 [ 7:40:36<16:53:56] +[titan] 2025-10-05 06:14:54,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:14:57,025 - root - INFO - step: 12500 loss: 2.2990 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0321 +[titan] 2025-10-05 06:14:57,025 - root - INFO - lr: 4.0404e-05 gnorm: 1.12 [ 7:40:47<16:53:44] +[titan] 2025-10-05 06:15:07,897 - root - INFO - step: 12505 loss: 2.3230 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 06:15:07,897 - root - INFO - lr: 4.0397e-05 gnorm: 1.14 [ 7:40:58<16:53:33] +[titan] 2025-10-05 06:15:18,787 - root - INFO - step: 12510 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0680 +[titan] 2025-10-05 06:15:18,787 - root - INFO - lr: 4.0390e-05 gnorm: 1.11 [ 7:41:09<16:53:21] +[titan] 2025-10-05 06:15:29,657 - root - INFO - step: 12515 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:15:29,657 - root - INFO - lr: 4.0383e-05 gnorm: 1.11 [ 7:41:20<16:53:10] +[titan] 2025-10-05 06:15:40,564 - root - INFO - step: 12520 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 06:15:40,564 - root - INFO - lr: 4.0375e-05 gnorm: 1.09 [ 7:41:31<16:52:59] +[titan] 2025-10-05 06:15:51,439 - root - INFO - step: 12525 loss: 2.2600 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 06:15:51,439 - root - INFO - lr: 4.0368e-05 gnorm: 1.12 [ 7:41:42<16:52:47] +[titan] 2025-10-05 06:16:02,309 - root - INFO - step: 12530 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2748 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 06:16:02,309 - root - INFO - lr: 4.0361e-05 gnorm: 1.12 [ 7:41:52<16:52:36] +[titan] 2025-10-05 06:16:13,212 - root - INFO - step: 12535 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:16:13,212 - root - INFO - lr: 4.0353e-05 gnorm: 1.11 [ 7:42:03<16:52:24] +[titan] 2025-10-05 06:16:24,126 - root - INFO - step: 12540 loss: 2.3391 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 06:16:24,127 - root - INFO - lr: 4.0346e-05 gnorm: 1.13 [ 7:42:14<16:52:13] +[titan] 2025-10-05 06:16:35,001 - root - INFO - step: 12545 loss: 2.3246 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 06:16:35,001 - root - INFO - lr: 4.0339e-05 gnorm: 1.12 [ 7:42:25<16:52:01] +[titan] 2025-10-05 06:16:43,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:16:45,891 - root - INFO - step: 12550 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0627 +[titan] 2025-10-05 06:16:45,891 - root - INFO - lr: 4.0331e-05 gnorm: 1.10 [ 7:42:36<16:51:50] +[titan] 2025-10-05 06:16:56,777 - root - INFO - step: 12555 loss: 2.2647 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 06:16:56,777 - root - INFO - lr: 4.0324e-05 gnorm: 1.14 [ 7:42:47<16:51:39] +[titan] 2025-10-05 06:17:07,666 - root - INFO - step: 12560 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 06:17:07,666 - root - INFO - lr: 4.0317e-05 gnorm: 1.12 [ 7:42:58<16:51:27] +[titan] 2025-10-05 06:17:18,556 - root - INFO - step: 12565 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:17:18,557 - root - INFO - lr: 4.0309e-05 gnorm: 1.11 [ 7:43:09<16:51:16] +[titan] 2025-10-05 06:17:29,439 - root - INFO - step: 12570 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 06:17:29,439 - root - INFO - lr: 4.0302e-05 gnorm: 1.11 [ 7:43:20<16:51:04] +[titan] 2025-10-05 06:17:40,372 - root - INFO - step: 12575 loss: 2.2819 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 06:17:40,372 - root - INFO - lr: 4.0295e-05 gnorm: 1.11 [ 7:43:31<16:50:53] +[titan] 2025-10-05 06:17:51,237 - root - INFO - step: 12580 loss: 2.3250 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0525 +[titan] 2025-10-05 06:17:51,238 - root - INFO - lr: 4.0287e-05 gnorm: 1.11 [ 7:43:41<16:50:41] +[titan] 2025-10-05 06:18:02,105 - root - INFO - step: 12585 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:18:02,105 - root - INFO - lr: 4.0280e-05 gnorm: 1.09 [ 7:43:52<16:50:30] +[titan] 2025-10-05 06:18:12,984 - root - INFO - step: 12590 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.0880 +[titan] 2025-10-05 06:18:12,984 - root - INFO - lr: 4.0273e-05 gnorm: 1.17 [ 7:44:03<16:50:19] +[titan] 2025-10-05 06:18:23,839 - root - INFO - step: 12595 loss: 2.3742 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0956 +[titan] 2025-10-05 06:18:23,839 - root - INFO - lr: 4.0265e-05 gnorm: 1.11 [ 7:44:14<16:50:07] +[titan] 2025-10-05 06:18:32,532 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:18:34,727 - root - INFO - step: 12600 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9762 +[titan] 2025-10-05 06:18:34,727 - root - INFO - lr: 4.0258e-05 gnorm: 1.14 [ 7:44:25<16:49:56] +[titan] 2025-10-05 06:18:45,664 - root - INFO - step: 12605 loss: 2.3207 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0497 +[titan] 2025-10-05 06:18:45,664 - root - INFO - lr: 4.0250e-05 gnorm: 1.17 [ 7:44:36<16:49:44] +[titan] 2025-10-05 06:18:56,552 - root - INFO - step: 12610 loss: 2.3981 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 06:18:56,552 - root - INFO - lr: 4.0243e-05 gnorm: 1.13 [ 7:44:47<16:49:33] +[titan] 2025-10-05 06:19:07,421 - root - INFO - step: 12615 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 06:19:07,421 - root - INFO - lr: 4.0236e-05 gnorm: 1.09 [ 7:44:58<16:49:21] +[titan] 2025-10-05 06:19:18,306 - root - INFO - step: 12620 loss: 2.3150 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 06:19:18,306 - root - INFO - lr: 4.0228e-05 gnorm: 1.12 [ 7:45:08<16:49:10] +[titan] 2025-10-05 06:19:29,188 - root - INFO - step: 12625 loss: 2.3979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1165 +[titan] 2025-10-05 06:19:29,189 - root - INFO - lr: 4.0221e-05 gnorm: 1.12 [ 7:45:19<16:48:59] +[titan] 2025-10-05 06:19:40,105 - root - INFO - step: 12630 loss: 2.2606 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9964 +[titan] 2025-10-05 06:19:40,105 - root - INFO - lr: 4.0214e-05 gnorm: 1.14 [ 7:45:30<16:48:47] +[titan] 2025-10-05 06:19:50,986 - root - INFO - step: 12635 loss: 2.3546 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:19:50,986 - root - INFO - lr: 4.0206e-05 gnorm: 1.11 [ 7:45:41<16:48:36] +[titan] 2025-10-05 06:20:01,908 - root - INFO - step: 12640 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:20:01,908 - root - INFO - lr: 4.0199e-05 gnorm: 1.11 [ 7:45:52<16:48:24] +[titan] 2025-10-05 06:20:12,799 - root - INFO - step: 12645 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0332 +[titan] 2025-10-05 06:20:12,799 - root - INFO - lr: 4.0192e-05 gnorm: 1.10 [ 7:46:03<16:48:13] +[titan] 2025-10-05 06:20:21,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:20:23,685 - root - INFO - step: 12650 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 06:20:23,685 - root - INFO - lr: 4.0184e-05 gnorm: 1.11 [ 7:46:14<16:48:02] +[titan] 2025-10-05 06:20:34,581 - root - INFO - step: 12655 loss: 2.2611 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 06:20:34,581 - root - INFO - lr: 4.0177e-05 gnorm: 1.08 [ 7:46:25<16:47:50] +[titan] 2025-10-05 06:20:45,479 - root - INFO - step: 12660 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:20:45,480 - root - INFO - lr: 4.0169e-05 gnorm: 1.09 [ 7:46:36<16:47:39] +[titan] 2025-10-05 06:20:56,352 - root - INFO - step: 12665 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:20:56,352 - root - INFO - lr: 4.0162e-05 gnorm: 1.10 [ 7:46:46<16:47:27] +[titan] 2025-10-05 06:21:07,226 - root - INFO - step: 12670 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9534 +[titan] 2025-10-05 06:21:07,226 - root - INFO - lr: 4.0155e-05 gnorm: 1.08 [ 7:46:57<16:47:16] +[titan] 2025-10-05 06:21:18,106 - root - INFO - step: 12675 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9903 +[titan] 2025-10-05 06:21:18,106 - root - INFO - lr: 4.0147e-05 gnorm: 1.16 [ 7:47:08<16:47:04] +[titan] 2025-10-05 06:21:28,978 - root - INFO - step: 12680 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 06:21:28,978 - root - INFO - lr: 4.0140e-05 gnorm: 1.11 [ 7:47:19<16:46:53] +[titan] 2025-10-05 06:21:39,844 - root - INFO - step: 12685 loss: 2.3348 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0622 +[titan] 2025-10-05 06:21:39,844 - root - INFO - lr: 4.0133e-05 gnorm: 1.13 [ 7:47:30<16:46:41] +[titan] 2025-10-05 06:21:50,731 - root - INFO - step: 12690 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 06:21:50,731 - root - INFO - lr: 4.0125e-05 gnorm: 1.14 [ 7:47:41<16:46:30] +[titan] 2025-10-05 06:22:01,611 - root - INFO - step: 12695 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 06:22:01,612 - root - INFO - lr: 4.0118e-05 gnorm: 1.10 [ 7:47:52<16:46:19] +[titan] 2025-10-05 06:22:10,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:22:12,500 - root - INFO - step: 12700 loss: 2.3396 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0653 +[titan] 2025-10-05 06:22:12,501 - root - INFO - lr: 4.0110e-05 gnorm: 1.11 [ 7:48:03<16:46:07] +[titan] 2025-10-05 06:22:23,372 - root - INFO - step: 12705 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0153 +[titan] 2025-10-05 06:22:23,372 - root - INFO - lr: 4.0103e-05 gnorm: 1.11 [ 7:48:14<16:45:56] +[titan] 2025-10-05 06:22:34,241 - root - INFO - step: 12710 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:22:34,241 - root - INFO - lr: 4.0096e-05 gnorm: 1.10 [ 7:48:24<16:45:44] +[titan] 2025-10-05 06:22:45,141 - root - INFO - step: 12715 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0103 +[titan] 2025-10-05 06:22:45,141 - root - INFO - lr: 4.0088e-05 gnorm: 1.14 [ 7:48:35<16:45:33] +[titan] 2025-10-05 06:22:56,018 - root - INFO - step: 12720 loss: 2.2452 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 06:22:56,018 - root - INFO - lr: 4.0081e-05 gnorm: 1.10 [ 7:48:46<16:45:21] +[titan] 2025-10-05 06:23:06,904 - root - INFO - step: 12725 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0456 +[titan] 2025-10-05 06:23:06,904 - root - INFO - lr: 4.0073e-05 gnorm: 1.10 [ 7:48:57<16:45:10] +[titan] 2025-10-05 06:23:17,777 - root - INFO - step: 12730 loss: 2.3547 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0804 +[titan] 2025-10-05 06:23:17,778 - root - INFO - lr: 4.0066e-05 gnorm: 1.09 [ 7:49:08<16:44:59] +[titan] 2025-10-05 06:23:28,700 - root - INFO - step: 12735 loss: 2.4579 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1714 +[titan] 2025-10-05 06:23:28,700 - root - INFO - lr: 4.0059e-05 gnorm: 1.12 [ 7:49:19<16:44:47] +[titan] 2025-10-05 06:23:39,577 - root - INFO - step: 12740 loss: 2.2807 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 06:23:39,577 - root - INFO - lr: 4.0051e-05 gnorm: 1.08 [ 7:49:30<16:44:36] +[titan] 2025-10-05 06:23:50,466 - root - INFO - step: 12745 loss: 2.2580 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9955 +[titan] 2025-10-05 06:23:50,467 - root - INFO - lr: 4.0044e-05 gnorm: 1.13 [ 7:49:41<16:44:24] +[titan] 2025-10-05 06:23:59,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:24:01,332 - root - INFO - step: 12750 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:24:01,332 - root - INFO - lr: 4.0036e-05 gnorm: 1.16 [ 7:49:51<16:44:13] +[titan] 2025-10-05 06:24:12,211 - root - INFO - step: 12755 loss: 2.3122 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 06:24:12,211 - root - INFO - lr: 4.0029e-05 gnorm: 1.10 [ 7:50:02<16:44:01] +[titan] 2025-10-05 06:24:23,070 - root - INFO - step: 12760 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 06:24:23,070 - root - INFO - lr: 4.0022e-05 gnorm: 1.11 [ 7:50:13<16:43:50] +[titan] 2025-10-05 06:24:33,960 - root - INFO - step: 12765 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0308 +[titan] 2025-10-05 06:24:33,960 - root - INFO - lr: 4.0014e-05 gnorm: 1.11 [ 7:50:24<16:43:39] +[titan] 2025-10-05 06:24:44,855 - root - INFO - step: 12770 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0202 +[titan] 2025-10-05 06:24:44,855 - root - INFO - lr: 4.0007e-05 gnorm: 1.10 [ 7:50:35<16:43:27] +[titan] 2025-10-05 06:24:55,732 - root - INFO - step: 12775 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 06:24:55,732 - root - INFO - lr: 3.9999e-05 gnorm: 1.13 [ 7:50:46<16:43:16] +[titan] 2025-10-05 06:25:06,578 - root - INFO - step: 12780 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:06,578 - root - INFO - lr: 3.9992e-05 gnorm: 1.10 [ 7:50:57<16:43:04] +[titan] 2025-10-05 06:25:17,446 - root - INFO - step: 12785 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0703 +[titan] 2025-10-05 06:25:17,446 - root - INFO - lr: 3.9984e-05 gnorm: 1.15 [ 7:51:08<16:42:53] +[titan] 2025-10-05 06:25:28,322 - root - INFO - step: 12790 loss: 2.1995 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 06:25:28,322 - root - INFO - lr: 3.9977e-05 gnorm: 1.08 [ 7:51:18<16:42:41] +[titan] 2025-10-05 06:25:39,196 - root - INFO - step: 12795 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0818 +[titan] 2025-10-05 06:25:39,196 - root - INFO - lr: 3.9970e-05 gnorm: 1.11 [ 7:51:29<16:42:30] +[titan] 2025-10-05 06:25:48,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:25:50,190 - root - INFO - step: 12800 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:50,191 - root - INFO - lr: 3.9962e-05 gnorm: 1.12 [ 7:51:40<16:42:19] +[titan] 2025-10-05 06:25:50,371 - root - INFO - Dumping profiler traces at step 12800 +[titan] 2025-10-05 06:25:50,410 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:26:01,279 - root - INFO - step: 12805 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 29,551 tflops: 409.98 mfu: 41.45% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 06:26:01,280 - root - INFO - lr: 3.9955e-05 gnorm: 1.13 [ 7:51:51<16:42:08] +[titan] 2025-10-05 06:26:12,154 - root - INFO - step: 12810 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:26:12,154 - root - INFO - lr: 3.9947e-05 gnorm: 1.08 [ 7:52:02<16:41:56] +[titan] 2025-10-05 06:26:23,032 - root - INFO - step: 12815 loss: 2.3306 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0579 +[titan] 2025-10-05 06:26:23,033 - root - INFO - lr: 3.9940e-05 gnorm: 1.06 [ 7:52:13<16:41:45] +[titan] 2025-10-05 06:26:33,940 - root - INFO - step: 12820 loss: 2.3775 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0989 +[titan] 2025-10-05 06:26:33,941 - root - INFO - lr: 3.9932e-05 gnorm: 1.15 [ 7:52:24<16:41:34] +[titan] 2025-10-05 06:26:44,836 - root - INFO - step: 12825 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0008 +[titan] 2025-10-05 06:26:44,836 - root - INFO - lr: 3.9925e-05 gnorm: 1.05 [ 7:52:35<16:41:22] +[titan] 2025-10-05 06:26:55,799 - root - INFO - step: 12830 loss: 2.3367 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0630 +[titan] 2025-10-05 06:26:55,799 - root - INFO - lr: 3.9918e-05 gnorm: 1.14 [ 7:52:46<16:41:11] +[titan] 2025-10-05 06:27:06,678 - root - INFO - step: 12835 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 06:27:06,678 - root - INFO - lr: 3.9910e-05 gnorm: 1.07 [ 7:52:57<16:40:59] +[titan] 2025-10-05 06:27:17,590 - root - INFO - step: 12840 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:27:17,590 - root - INFO - lr: 3.9903e-05 gnorm: 1.10 [ 7:53:08<16:40:48] +[titan] 2025-10-05 06:27:28,495 - root - INFO - step: 12845 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0092 +[titan] 2025-10-05 06:27:28,496 - root - INFO - lr: 3.9895e-05 gnorm: 1.10 [ 7:53:19<16:40:37] +[titan] 2025-10-05 06:27:37,188 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:27:39,368 - root - INFO - step: 12850 loss: 2.2958 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0276 +[titan] 2025-10-05 06:27:39,368 - root - INFO - lr: 3.9888e-05 gnorm: 1.09 [ 7:53:29<16:40:25] +[titan] 2025-10-05 06:27:50,338 - root - INFO - step: 12855 loss: 2.2825 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0128 +[titan] 2025-10-05 06:27:50,339 - root - INFO - lr: 3.9880e-05 gnorm: 1.16 [ 7:53:40<16:40:14] +[titan] 2025-10-05 06:28:01,245 - root - INFO - step: 12860 loss: 2.3056 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:28:01,245 - root - INFO - lr: 3.9873e-05 gnorm: 1.08 [ 7:53:51<16:40:03] +[titan] 2025-10-05 06:28:12,135 - root - INFO - step: 12865 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9912 +[titan] 2025-10-05 06:28:12,135 - root - INFO - lr: 3.9865e-05 gnorm: 1.10 [ 7:54:02<16:39:51] +[titan] 2025-10-05 06:28:23,005 - root - INFO - step: 12870 loss: 2.3501 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 06:28:23,005 - root - INFO - lr: 3.9858e-05 gnorm: 1.07 [ 7:54:13<16:39:40] +[titan] 2025-10-05 06:28:33,877 - root - INFO - step: 12875 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0229 +[titan] 2025-10-05 06:28:33,877 - root - INFO - lr: 3.9850e-05 gnorm: 1.13 [ 7:54:24<16:39:28] +[titan] 2025-10-05 06:28:44,761 - root - INFO - step: 12880 loss: 2.3117 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:28:44,761 - root - INFO - lr: 3.9843e-05 gnorm: 1.15 [ 7:54:35<16:39:17] +[titan] 2025-10-05 06:28:55,685 - root - INFO - step: 12885 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:28:55,685 - root - INFO - lr: 3.9836e-05 gnorm: 1.11 [ 7:54:46<16:39:06] +[titan] 2025-10-05 06:29:06,556 - root - INFO - step: 12890 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:29:06,556 - root - INFO - lr: 3.9828e-05 gnorm: 1.12 [ 7:54:57<16:38:54] +[titan] 2025-10-05 06:29:17,466 - root - INFO - step: 12895 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:29:17,467 - root - INFO - lr: 3.9821e-05 gnorm: 1.11 [ 7:55:08<16:38:43] +[titan] 2025-10-05 06:29:26,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:29:28,342 - root - INFO - step: 12900 loss: 2.3579 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0826 +[titan] 2025-10-05 06:29:28,342 - root - INFO - lr: 3.9813e-05 gnorm: 1.11 [ 7:55:18<16:38:31] +[titan] 2025-10-05 06:29:39,206 - root - INFO - step: 12905 loss: 2.2414 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 06:29:39,206 - root - INFO - lr: 3.9806e-05 gnorm: 1.08 [ 7:55:29<16:38:20] +[titan] 2025-10-05 06:29:50,114 - root - INFO - step: 12910 loss: 2.2702 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:29:50,114 - root - INFO - lr: 3.9798e-05 gnorm: 1.13 [ 7:55:40<16:38:09] +[titan] 2025-10-05 06:30:00,993 - root - INFO - step: 12915 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0072 +[titan] 2025-10-05 06:30:00,993 - root - INFO - lr: 3.9791e-05 gnorm: 1.07 [ 7:55:51<16:37:57] +[titan] 2025-10-05 06:30:11,897 - root - INFO - step: 12920 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0307 +[titan] 2025-10-05 06:30:11,898 - root - INFO - lr: 3.9783e-05 gnorm: 1.04 [ 7:56:02<16:37:46] +[titan] 2025-10-05 06:30:22,817 - root - INFO - step: 12925 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0581 +[titan] 2025-10-05 06:30:22,817 - root - INFO - lr: 3.9776e-05 gnorm: 1.13 [ 7:56:13<16:37:34] +[titan] 2025-10-05 06:30:33,737 - root - INFO - step: 12930 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:30:33,737 - root - INFO - lr: 3.9768e-05 gnorm: 1.13 [ 7:56:24<16:37:23] +[titan] 2025-10-05 06:30:44,627 - root - INFO - step: 12935 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:30:44,627 - root - INFO - lr: 3.9761e-05 gnorm: 1.10 [ 7:56:35<16:37:12] +[titan] 2025-10-05 06:30:55,585 - root - INFO - step: 12940 loss: 2.3356 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 06:30:55,585 - root - INFO - lr: 3.9753e-05 gnorm: 1.11 [ 7:56:46<16:37:00] +[titan] 2025-10-05 06:31:06,518 - root - INFO - step: 12945 loss: 2.2859 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:31:06,518 - root - INFO - lr: 3.9746e-05 gnorm: 1.10 [ 7:56:57<16:36:49] +[titan] 2025-10-05 06:31:15,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:31:17,447 - root - INFO - step: 12950 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0289 +[titan] 2025-10-05 06:31:17,447 - root - INFO - lr: 3.9738e-05 gnorm: 1.11 [ 7:57:08<16:36:38] +[titan] 2025-10-05 06:31:28,381 - root - INFO - step: 12955 loss: 2.3005 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 06:31:28,382 - root - INFO - lr: 3.9731e-05 gnorm: 1.07 [ 7:57:18<16:36:27] +[titan] 2025-10-05 06:31:39,314 - root - INFO - step: 12960 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0616 +[titan] 2025-10-05 06:31:39,314 - root - INFO - lr: 3.9723e-05 gnorm: 1.13 [ 7:57:29<16:36:15] +[titan] 2025-10-05 06:31:50,231 - root - INFO - step: 12965 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 06:31:50,231 - root - INFO - lr: 3.9716e-05 gnorm: 1.14 [ 7:57:40<16:36:04] +[titan] 2025-10-05 06:32:01,132 - root - INFO - step: 12970 loss: 2.3312 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:32:01,132 - root - INFO - lr: 3.9708e-05 gnorm: 1.16 [ 7:57:51<16:35:53] +[titan] 2025-10-05 06:32:12,023 - root - INFO - step: 12975 loss: 2.2497 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 06:32:12,023 - root - INFO - lr: 3.9701e-05 gnorm: 1.13 [ 7:58:02<16:35:41] +[titan] 2025-10-05 06:32:22,921 - root - INFO - step: 12980 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0314 +[titan] 2025-10-05 06:32:22,922 - root - INFO - lr: 3.9693e-05 gnorm: 1.08 [ 7:58:13<16:35:30] +[titan] 2025-10-05 06:32:33,791 - root - INFO - step: 12985 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 06:32:33,791 - root - INFO - lr: 3.9686e-05 gnorm: 1.06 [ 7:58:24<16:35:18] +[titan] 2025-10-05 06:32:44,706 - root - INFO - step: 12990 loss: 2.3628 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 06:32:44,706 - root - INFO - lr: 3.9678e-05 gnorm: 1.12 [ 7:58:35<16:35:07] +[titan] 2025-10-05 06:32:55,609 - root - INFO - step: 12995 loss: 2.2830 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:32:55,609 - root - INFO - lr: 3.9671e-05 gnorm: 1.13 [ 7:58:46<16:34:56] +[titan] 2025-10-05 06:33:04,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:33:06,487 - root - INFO - step: 13000 loss: 2.2887 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:33:06,487 - root - INFO - lr: 3.9663e-05 gnorm: 1.09 [ 7:58:57<16:34:44] +[titan] 2025-10-05 06:33:17,365 - root - INFO - step: 13005 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9733 +[titan] 2025-10-05 06:33:17,365 - root - INFO - lr: 3.9656e-05 gnorm: 1.09 [ 7:59:07<16:34:33] +[titan] 2025-10-05 06:33:28,255 - root - INFO - step: 13010 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0677 +[titan] 2025-10-05 06:33:28,255 - root - INFO - lr: 3.9648e-05 gnorm: 1.10 [ 7:59:18<16:34:21] +[titan] 2025-10-05 06:33:39,107 - root - INFO - step: 13015 loss: 2.3870 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 06:33:39,107 - root - INFO - lr: 3.9641e-05 gnorm: 1.14 [ 7:59:29<16:34:10] +[titan] 2025-10-05 06:33:49,999 - root - INFO - step: 13020 loss: 2.2362 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9754 +[titan] 2025-10-05 06:33:49,999 - root - INFO - lr: 3.9633e-05 gnorm: 1.04 [ 7:59:40<16:33:59] +[titan] 2025-10-05 06:34:00,906 - root - INFO - step: 13025 loss: 2.3058 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0372 +[titan] 2025-10-05 06:34:00,906 - root - INFO - lr: 3.9626e-05 gnorm: 1.10 [ 7:59:51<16:33:47] +[titan] 2025-10-05 06:34:11,756 - root - INFO - step: 13030 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9901 +[titan] 2025-10-05 06:34:11,756 - root - INFO - lr: 3.9618e-05 gnorm: 1.10 [ 8:00:02<16:33:36] +[titan] 2025-10-05 06:34:22,620 - root - INFO - step: 13035 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:34:22,620 - root - INFO - lr: 3.9611e-05 gnorm: 1.07 [ 8:00:13<16:33:24] +[titan] 2025-10-05 06:34:33,499 - root - INFO - step: 13040 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:34:33,499 - root - INFO - lr: 3.9603e-05 gnorm: 1.10 [ 8:00:24<16:33:13] +[titan] 2025-10-05 06:34:44,365 - root - INFO - step: 13045 loss: 2.3062 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 06:34:44,365 - root - INFO - lr: 3.9596e-05 gnorm: 1.12 [ 8:00:34<16:33:01] +[titan] 2025-10-05 06:34:53,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:34:55,288 - root - INFO - step: 13050 loss: 2.2984 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:34:55,289 - root - INFO - lr: 3.9588e-05 gnorm: 1.10 [ 8:00:45<16:32:50] +[titan] 2025-10-05 06:35:06,196 - root - INFO - step: 13055 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9765 +[titan] 2025-10-05 06:35:06,196 - root - INFO - lr: 3.9581e-05 gnorm: 1.05 [ 8:00:56<16:32:39] +[titan] 2025-10-05 06:35:17,080 - root - INFO - step: 13060 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 06:35:17,080 - root - INFO - lr: 3.9573e-05 gnorm: 1.10 [ 8:01:07<16:32:27] +[titan] 2025-10-05 06:35:27,969 - root - INFO - step: 13065 loss: 2.2499 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:35:27,970 - root - INFO - lr: 3.9566e-05 gnorm: 1.09 [ 8:01:18<16:32:16] +[titan] 2025-10-05 06:35:38,866 - root - INFO - step: 13070 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0717 +[titan] 2025-10-05 06:35:38,867 - root - INFO - lr: 3.9558e-05 gnorm: 1.12 [ 8:01:29<16:32:05] +[titan] 2025-10-05 06:35:49,752 - root - INFO - step: 13075 loss: 2.3177 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0458 +[titan] 2025-10-05 06:35:49,752 - root - INFO - lr: 3.9551e-05 gnorm: 1.11 [ 8:01:40<16:31:53] +[titan] 2025-10-05 06:36:00,668 - root - INFO - step: 13080 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 06:36:00,669 - root - INFO - lr: 3.9543e-05 gnorm: 1.07 [ 8:01:51<16:31:42] +[titan] 2025-10-05 06:36:11,580 - root - INFO - step: 13085 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 06:36:11,581 - root - INFO - lr: 3.9535e-05 gnorm: 1.11 [ 8:02:02<16:31:31] +[titan] 2025-10-05 06:36:22,465 - root - INFO - step: 13090 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:36:22,465 - root - INFO - lr: 3.9528e-05 gnorm: 1.09 [ 8:02:13<16:31:19] +[titan] 2025-10-05 06:36:33,326 - root - INFO - step: 13095 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9630 +[titan] 2025-10-05 06:36:33,326 - root - INFO - lr: 3.9520e-05 gnorm: 1.10 [ 8:02:23<16:31:08] +[titan] 2025-10-05 06:36:41,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:36:44,174 - root - INFO - step: 13100 loss: 2.3105 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:36:44,174 - root - INFO - lr: 3.9513e-05 gnorm: 1.14 [ 8:02:34<16:30:56] +[titan] 2025-10-05 06:36:55,075 - root - INFO - step: 13105 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:36:55,075 - root - INFO - lr: 3.9505e-05 gnorm: 1.18 [ 8:02:45<16:30:45] +[titan] 2025-10-05 06:37:05,918 - root - INFO - step: 13110 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 06:37:05,918 - root - INFO - lr: 3.9498e-05 gnorm: 1.08 [ 8:02:56<16:30:33] +[titan] 2025-10-05 06:37:16,786 - root - INFO - step: 13115 loss: 2.2582 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 06:37:16,786 - root - INFO - lr: 3.9490e-05 gnorm: 1.10 [ 8:03:07<16:30:22] +[titan] 2025-10-05 06:37:27,685 - root - INFO - step: 13120 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 06:37:27,685 - root - INFO - lr: 3.9483e-05 gnorm: 1.11 [ 8:03:18<16:30:11] +[titan] 2025-10-05 06:37:38,554 - root - INFO - step: 13125 loss: 2.3124 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 06:37:38,554 - root - INFO - lr: 3.9475e-05 gnorm: 1.10 [ 8:03:29<16:29:59] +[titan] 2025-10-05 06:37:49,418 - root - INFO - step: 13130 loss: 2.3195 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:37:49,418 - root - INFO - lr: 3.9468e-05 gnorm: 1.09 [ 8:03:40<16:29:48] +[titan] 2025-10-05 06:38:00,337 - root - INFO - step: 13135 loss: 2.2981 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:38:00,337 - root - INFO - lr: 3.9460e-05 gnorm: 1.09 [ 8:03:50<16:29:36] +[titan] 2025-10-05 06:38:11,199 - root - INFO - step: 13140 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9924 +[titan] 2025-10-05 06:38:11,200 - root - INFO - lr: 3.9452e-05 gnorm: 1.06 [ 8:04:01<16:29:25] +[titan] 2025-10-05 06:38:22,080 - root - INFO - step: 13145 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2759 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 06:38:22,080 - root - INFO - lr: 3.9445e-05 gnorm: 1.08 [ 8:04:12<16:29:14] +[titan] 2025-10-05 06:38:30,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:38:32,990 - root - INFO - step: 13150 loss: 2.2897 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0219 +[titan] 2025-10-05 06:38:32,990 - root - INFO - lr: 3.9437e-05 gnorm: 1.12 [ 8:04:23<16:29:02] +[titan] 2025-10-05 06:38:43,859 - root - INFO - step: 13155 loss: 2.2817 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0147 +[titan] 2025-10-05 06:38:43,859 - root - INFO - lr: 3.9430e-05 gnorm: 1.08 [ 8:04:34<16:28:51] +[titan] 2025-10-05 06:38:54,735 - root - INFO - step: 13160 loss: 2.3131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0428 +[titan] 2025-10-05 06:38:54,736 - root - INFO - lr: 3.9422e-05 gnorm: 1.11 [ 8:04:45<16:28:39] +[titan] 2025-10-05 06:39:05,628 - root - INFO - step: 13165 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 06:39:05,629 - root - INFO - lr: 3.9415e-05 gnorm: 1.10 [ 8:04:56<16:28:28] +[titan] 2025-10-05 06:39:16,489 - root - INFO - step: 13170 loss: 2.3292 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:39:16,489 - root - INFO - lr: 3.9407e-05 gnorm: 1.11 [ 8:05:07<16:28:17] +[titan] 2025-10-05 06:39:27,377 - root - INFO - step: 13175 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9764 +[titan] 2025-10-05 06:39:27,377 - root - INFO - lr: 3.9399e-05 gnorm: 1.07 [ 8:05:17<16:28:05] +[titan] 2025-10-05 06:39:38,260 - root - INFO - step: 13180 loss: 2.2929 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0261 +[titan] 2025-10-05 06:39:38,260 - root - INFO - lr: 3.9392e-05 gnorm: 1.18 [ 8:05:28<16:27:54] +[titan] 2025-10-05 06:39:49,151 - root - INFO - step: 13185 loss: 2.2880 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0212 +[titan] 2025-10-05 06:39:49,152 - root - INFO - lr: 3.9384e-05 gnorm: 1.13 [ 8:05:39<16:27:42] +[titan] 2025-10-05 06:40:00,050 - root - INFO - step: 13190 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 06:40:00,050 - root - INFO - lr: 3.9377e-05 gnorm: 1.11 [ 8:05:50<16:27:31] +[titan] 2025-10-05 06:40:10,934 - root - INFO - step: 13195 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9939 +[titan] 2025-10-05 06:40:10,934 - root - INFO - lr: 3.9369e-05 gnorm: 1.10 [ 8:06:01<16:27:20] +[titan] 2025-10-05 06:40:19,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:40:21,819 - root - INFO - step: 13200 loss: 2.2675 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0021 +[titan] 2025-10-05 06:40:21,820 - root - INFO - lr: 3.9362e-05 gnorm: 1.13 [ 8:06:12<16:27:08] +[titan] 2025-10-05 06:40:32,683 - root - INFO - step: 13205 loss: 2.3004 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:40:32,683 - root - INFO - lr: 3.9354e-05 gnorm: 1.11 [ 8:06:23<16:26:57] +[titan] 2025-10-05 06:40:43,552 - root - INFO - step: 13210 loss: 2.3321 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0596 +[titan] 2025-10-05 06:40:43,552 - root - INFO - lr: 3.9346e-05 gnorm: 1.09 [ 8:06:34<16:26:45] +[titan] 2025-10-05 06:40:54,441 - root - INFO - step: 13215 loss: 2.3746 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 06:40:54,441 - root - INFO - lr: 3.9339e-05 gnorm: 1.09 [ 8:06:45<16:26:34] +[titan] 2025-10-05 06:41:05,315 - root - INFO - step: 13220 loss: 2.3394 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0660 +[titan] 2025-10-05 06:41:05,315 - root - INFO - lr: 3.9331e-05 gnorm: 1.13 [ 8:06:55<16:26:23] +[titan] 2025-10-05 06:41:16,174 - root - INFO - step: 13225 loss: 2.2522 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 06:41:16,174 - root - INFO - lr: 3.9324e-05 gnorm: 1.10 [ 8:07:06<16:26:11] +[titan] 2025-10-05 06:41:27,031 - root - INFO - step: 13230 loss: 2.2903 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:41:27,031 - root - INFO - lr: 3.9316e-05 gnorm: 1.10 [ 8:07:17<16:26:00] +[titan] 2025-10-05 06:41:37,890 - root - INFO - step: 13235 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0078 +[titan] 2025-10-05 06:41:37,890 - root - INFO - lr: 3.9308e-05 gnorm: 1.09 [ 8:07:28<16:25:48] +[titan] 2025-10-05 06:41:48,764 - root - INFO - step: 13240 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 06:41:48,764 - root - INFO - lr: 3.9301e-05 gnorm: 1.10 [ 8:07:39<16:25:37] +[titan] 2025-10-05 06:41:59,671 - root - INFO - step: 13245 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0046 +[titan] 2025-10-05 06:41:59,672 - root - INFO - lr: 3.9293e-05 gnorm: 1.13 [ 8:07:50<16:25:26] +[titan] 2025-10-05 06:42:08,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:42:10,557 - root - INFO - step: 13250 loss: 2.3326 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0599 +[titan] 2025-10-05 06:42:10,557 - root - INFO - lr: 3.9286e-05 gnorm: 1.14 [ 8:08:01<16:25:14] +[titan] 2025-10-05 06:42:21,421 - root - INFO - step: 13255 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 06:42:21,421 - root - INFO - lr: 3.9278e-05 gnorm: 1.14 [ 8:08:11<16:25:03] +[titan] 2025-10-05 06:42:32,317 - root - INFO - step: 13260 loss: 2.2022 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9453 +[titan] 2025-10-05 06:42:32,317 - root - INFO - lr: 3.9270e-05 gnorm: 1.07 [ 8:08:22<16:24:51] +[titan] 2025-10-05 06:42:43,197 - root - INFO - step: 13265 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 06:42:43,198 - root - INFO - lr: 3.9263e-05 gnorm: 1.11 [ 8:08:33<16:24:40] +[titan] 2025-10-05 06:42:54,090 - root - INFO - step: 13270 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 06:42:54,091 - root - INFO - lr: 3.9255e-05 gnorm: 1.10 [ 8:08:44<16:24:29] +[titan] 2025-10-05 06:43:05,001 - root - INFO - step: 13275 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 06:43:05,001 - root - INFO - lr: 3.9248e-05 gnorm: 1.10 [ 8:08:55<16:24:17] +[titan] 2025-10-05 06:43:15,880 - root - INFO - step: 13280 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:15,880 - root - INFO - lr: 3.9240e-05 gnorm: 1.07 [ 8:09:06<16:24:06] +[titan] 2025-10-05 06:43:26,737 - root - INFO - step: 13285 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:26,737 - root - INFO - lr: 3.9232e-05 gnorm: 1.11 [ 8:09:17<16:23:55] +[titan] 2025-10-05 06:43:37,602 - root - INFO - step: 13290 loss: 2.3086 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:43:37,602 - root - INFO - lr: 3.9225e-05 gnorm: 1.10 [ 8:09:28<16:23:43] +[titan] 2025-10-05 06:43:48,473 - root - INFO - step: 13295 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 06:43:48,473 - root - INFO - lr: 3.9217e-05 gnorm: 1.11 [ 8:09:39<16:23:32] +[titan] 2025-10-05 06:43:57,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:43:59,317 - root - INFO - step: 13300 loss: 2.3797 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 06:43:59,318 - root - INFO - lr: 3.9209e-05 gnorm: 1.11 [ 8:09:49<16:23:20] +[titan] 2025-10-05 06:44:10,186 - root - INFO - step: 13305 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0141 +[titan] 2025-10-05 06:44:10,186 - root - INFO - lr: 3.9202e-05 gnorm: 1.09 [ 8:10:00<16:23:09] +[titan] 2025-10-05 06:44:21,180 - root - INFO - step: 13310 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0337 +[titan] 2025-10-05 06:44:21,180 - root - INFO - lr: 3.9194e-05 gnorm: 1.09 [ 8:10:11<16:22:58] +[titan] 2025-10-05 06:44:25,698 - root - INFO - Dumping profiler traces at step 13312 +[titan] 2025-10-05 06:44:25,737 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:44:32,265 - root - INFO - step: 13315 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 29,562 tflops: 410.13 mfu: 41.47% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 06:44:32,265 - root - INFO - lr: 3.9187e-05 gnorm: 1.04 [ 8:10:22<16:22:47] +[titan] 2025-10-05 06:44:43,144 - root - INFO - step: 13320 loss: 2.3112 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 06:44:43,144 - root - INFO - lr: 3.9179e-05 gnorm: 1.13 [ 8:10:33<16:22:35] +[titan] 2025-10-05 06:44:54,006 - root - INFO - step: 13325 loss: 2.3530 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0783 +[titan] 2025-10-05 06:44:54,006 - root - INFO - lr: 3.9171e-05 gnorm: 1.06 [ 8:10:44<16:22:24] +[titan] 2025-10-05 06:45:04,897 - root - INFO - step: 13330 loss: 2.3671 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 06:45:04,897 - root - INFO - lr: 3.9164e-05 gnorm: 1.11 [ 8:10:55<16:22:13] +[titan] 2025-10-05 06:45:15,754 - root - INFO - step: 13335 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0644 +[titan] 2025-10-05 06:45:15,754 - root - INFO - lr: 3.9156e-05 gnorm: 1.16 [ 8:11:06<16:22:01] +[titan] 2025-10-05 06:45:26,632 - root - INFO - step: 13340 loss: 2.2623 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:45:26,632 - root - INFO - lr: 3.9148e-05 gnorm: 1.12 [ 8:11:17<16:21:50] +[titan] 2025-10-05 06:45:37,522 - root - INFO - step: 13345 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 06:45:37,522 - root - INFO - lr: 3.9141e-05 gnorm: 1.07 [ 8:11:28<16:21:38] +[titan] 2025-10-05 06:45:46,192 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:45:48,374 - root - INFO - step: 13350 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:45:48,374 - root - INFO - lr: 3.9133e-05 gnorm: 1.10 [ 8:11:38<16:21:27] +[titan] 2025-10-05 06:45:59,227 - root - INFO - step: 13355 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0173 +[titan] 2025-10-05 06:45:59,227 - root - INFO - lr: 3.9126e-05 gnorm: 1.11 [ 8:11:49<16:21:15] +[titan] 2025-10-05 06:46:10,100 - root - INFO - step: 13360 loss: 2.3111 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:46:10,100 - root - INFO - lr: 3.9118e-05 gnorm: 1.11 [ 8:12:00<16:21:04] +[titan] 2025-10-05 06:46:20,957 - root - INFO - step: 13365 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 06:46:20,958 - root - INFO - lr: 3.9110e-05 gnorm: 1.10 [ 8:12:11<16:20:53] +[titan] 2025-10-05 06:46:31,838 - root - INFO - step: 13370 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0254 +[titan] 2025-10-05 06:46:31,838 - root - INFO - lr: 3.9103e-05 gnorm: 1.13 [ 8:12:22<16:20:41] +[titan] 2025-10-05 06:46:42,735 - root - INFO - step: 13375 loss: 2.3437 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0696 +[titan] 2025-10-05 06:46:42,735 - root - INFO - lr: 3.9095e-05 gnorm: 1.12 [ 8:12:33<16:20:30] +[titan] 2025-10-05 06:46:53,595 - root - INFO - step: 13380 loss: 2.2952 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0281 +[titan] 2025-10-05 06:46:53,595 - root - INFO - lr: 3.9087e-05 gnorm: 1.07 [ 8:12:44<16:20:18] +[titan] 2025-10-05 06:47:04,484 - root - INFO - step: 13385 loss: 2.3167 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0450 +[titan] 2025-10-05 06:47:04,484 - root - INFO - lr: 3.9080e-05 gnorm: 1.12 [ 8:12:55<16:20:07] +[titan] 2025-10-05 06:47:15,385 - root - INFO - step: 13390 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:47:15,385 - root - INFO - lr: 3.9072e-05 gnorm: 1.13 [ 8:13:05<16:19:56] +[titan] 2025-10-05 06:47:26,291 - root - INFO - step: 13395 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:47:26,291 - root - INFO - lr: 3.9064e-05 gnorm: 1.09 [ 8:13:16<16:19:44] +[titan] 2025-10-05 06:47:34,979 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:47:37,159 - root - INFO - step: 13400 loss: 2.2934 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0255 +[titan] 2025-10-05 06:47:37,159 - root - INFO - lr: 3.9057e-05 gnorm: 1.10 [ 8:13:27<16:19:33] +[titan] 2025-10-05 06:47:48,051 - root - INFO - step: 13405 loss: 2.1829 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 06:47:48,051 - root - INFO - lr: 3.9049e-05 gnorm: 1.13 [ 8:13:38<16:19:22] +[titan] 2025-10-05 06:47:58,962 - root - INFO - step: 13410 loss: 2.3403 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0664 +[titan] 2025-10-05 06:47:58,962 - root - INFO - lr: 3.9041e-05 gnorm: 1.08 [ 8:13:49<16:19:10] +[titan] 2025-10-05 06:48:09,859 - root - INFO - step: 13415 loss: 2.2971 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:48:09,859 - root - INFO - lr: 3.9034e-05 gnorm: 1.09 [ 8:14:00<16:18:59] +[titan] 2025-10-05 06:48:20,742 - root - INFO - step: 13420 loss: 2.3033 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0348 +[titan] 2025-10-05 06:48:20,742 - root - INFO - lr: 3.9026e-05 gnorm: 1.09 [ 8:14:11<16:18:48] +[titan] 2025-10-05 06:48:31,616 - root - INFO - step: 13425 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0709 +[titan] 2025-10-05 06:48:31,616 - root - INFO - lr: 3.9018e-05 gnorm: 1.11 [ 8:14:22<16:18:36] +[titan] 2025-10-05 06:48:42,471 - root - INFO - step: 13430 loss: 2.2153 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 06:48:42,471 - root - INFO - lr: 3.9011e-05 gnorm: 1.09 [ 8:14:33<16:18:25] +[titan] 2025-10-05 06:48:53,334 - root - INFO - step: 13435 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 06:48:53,334 - root - INFO - lr: 3.9003e-05 gnorm: 1.10 [ 8:14:43<16:18:13] +[titan] 2025-10-05 06:49:04,235 - root - INFO - step: 13440 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 06:49:04,235 - root - INFO - lr: 3.8995e-05 gnorm: 1.10 [ 8:14:54<16:18:02] +[titan] 2025-10-05 06:49:15,122 - root - INFO - step: 13445 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0366 +[titan] 2025-10-05 06:49:15,122 - root - INFO - lr: 3.8988e-05 gnorm: 1.10 [ 8:15:05<16:17:51] +[titan] 2025-10-05 06:49:23,790 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:49:25,981 - root - INFO - step: 13450 loss: 2.2828 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2713 [37mglobal_avg_mtp_loss: 2.0115 +[titan] 2025-10-05 06:49:25,981 - root - INFO - lr: 3.8980e-05 gnorm: 1.07 [ 8:15:16<16:17:39] +[titan] 2025-10-05 06:49:36,831 - root - INFO - step: 13455 loss: 2.2498 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9867 +[titan] 2025-10-05 06:49:36,831 - root - INFO - lr: 3.8972e-05 gnorm: 1.03 [ 8:15:27<16:17:28] +[titan] 2025-10-05 06:49:47,714 - root - INFO - step: 13460 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0682 +[titan] 2025-10-05 06:49:47,714 - root - INFO - lr: 3.8965e-05 gnorm: 1.14 [ 8:15:38<16:17:16] +[titan] 2025-10-05 06:49:58,585 - root - INFO - step: 13465 loss: 2.2324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 06:49:58,585 - root - INFO - lr: 3.8957e-05 gnorm: 1.11 [ 8:15:49<16:17:05] +[titan] 2025-10-05 06:50:09,688 - root - INFO - step: 13470 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 29,514 tflops: 409.47 mfu: 41.40% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9928 +[titan] 2025-10-05 06:50:09,688 - root - INFO - lr: 3.8949e-05 gnorm: 1.07 [ 8:16:00<16:16:54] +[titan] 2025-10-05 06:50:20,551 - root - INFO - step: 13475 loss: 2.2930 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0250 +[titan] 2025-10-05 06:50:20,551 - root - INFO - lr: 3.8942e-05 gnorm: 1.12 [ 8:16:11<16:16:43] +[titan] 2025-10-05 06:50:31,416 - root - INFO - step: 13480 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:50:31,416 - root - INFO - lr: 3.8934e-05 gnorm: 1.09 [ 8:16:21<16:16:31] +[titan] 2025-10-05 06:50:42,269 - root - INFO - step: 13485 loss: 2.2218 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9617 +[titan] 2025-10-05 06:50:42,269 - root - INFO - lr: 3.8926e-05 gnorm: 1.10 [ 8:16:32<16:16:20] +[titan] 2025-10-05 06:50:53,127 - root - INFO - step: 13490 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 06:50:53,128 - root - INFO - lr: 3.8919e-05 gnorm: 1.07 [ 8:16:43<16:16:08] +[titan] 2025-10-05 06:51:03,982 - root - INFO - step: 13495 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:51:03,982 - root - INFO - lr: 3.8911e-05 gnorm: 1.09 [ 8:16:54<16:15:57] +[titan] 2025-10-05 06:51:12,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:51:14,857 - root - INFO - step: 13500 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 06:51:14,857 - root - INFO - lr: 3.8903e-05 gnorm: 1.09 [ 8:17:05<16:15:46] +[titan] 2025-10-05 06:51:25,746 - root - INFO - step: 13505 loss: 2.2715 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 06:51:25,746 - root - INFO - lr: 3.8896e-05 gnorm: 1.09 [ 8:17:16<16:15:34] +[titan] 2025-10-05 06:51:36,614 - root - INFO - step: 13510 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 06:51:36,614 - root - INFO - lr: 3.8888e-05 gnorm: 1.08 [ 8:17:27<16:15:23] +[titan] 2025-10-05 06:51:47,494 - root - INFO - step: 13515 loss: 2.2519 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 06:51:47,494 - root - INFO - lr: 3.8880e-05 gnorm: 1.12 [ 8:17:38<16:15:12] +[titan] 2025-10-05 06:51:58,360 - root - INFO - step: 13520 loss: 2.2323 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:51:58,360 - root - INFO - lr: 3.8872e-05 gnorm: 1.05 [ 8:17:48<16:15:00] +[titan] 2025-10-05 06:52:09,236 - root - INFO - step: 13525 loss: 2.2346 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 06:52:09,236 - root - INFO - lr: 3.8865e-05 gnorm: 1.07 [ 8:17:59<16:14:49] +[titan] 2025-10-05 06:52:20,103 - root - INFO - step: 13530 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9669 +[titan] 2025-10-05 06:52:20,103 - root - INFO - lr: 3.8857e-05 gnorm: 1.08 [ 8:18:10<16:14:37] +[titan] 2025-10-05 06:52:30,992 - root - INFO - step: 13535 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9949 +[titan] 2025-10-05 06:52:30,992 - root - INFO - lr: 3.8849e-05 gnorm: 1.10 [ 8:18:21<16:14:26] +[titan] 2025-10-05 06:52:41,845 - root - INFO - step: 13540 loss: 2.2743 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0096 +[titan] 2025-10-05 06:52:41,846 - root - INFO - lr: 3.8842e-05 gnorm: 1.16 [ 8:18:32<16:14:15] +[titan] 2025-10-05 06:52:52,731 - root - INFO - step: 13545 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:52:52,731 - root - INFO - lr: 3.8834e-05 gnorm: 1.19 [ 8:18:43<16:14:03] +[titan] 2025-10-05 06:53:01,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:53:03,584 - root - INFO - step: 13550 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:53:03,584 - root - INFO - lr: 3.8826e-05 gnorm: 1.12 [ 8:18:54<16:13:52] +[titan] 2025-10-05 06:53:14,560 - root - INFO - step: 13555 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.18 mfu: 41.88% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:53:14,560 - root - INFO - lr: 3.8818e-05 gnorm: 1.18 [ 8:19:05<16:13:41] +[titan] 2025-10-05 06:53:25,426 - root - INFO - step: 13560 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:53:25,426 - root - INFO - lr: 3.8811e-05 gnorm: 1.10 [ 8:19:15<16:13:29] +[titan] 2025-10-05 06:53:36,319 - root - INFO - step: 13565 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0242 +[titan] 2025-10-05 06:53:36,320 - root - INFO - lr: 3.8803e-05 gnorm: 1.11 [ 8:19:26<16:13:18] +[titan] 2025-10-05 06:53:47,222 - root - INFO - step: 13570 loss: 2.2893 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:53:47,222 - root - INFO - lr: 3.8795e-05 gnorm: 1.11 [ 8:19:37<16:13:07] +[titan] 2025-10-05 06:53:58,096 - root - INFO - step: 13575 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9963 +[titan] 2025-10-05 06:53:58,096 - root - INFO - lr: 3.8788e-05 gnorm: 1.11 [ 8:19:48<16:12:55] +[titan] 2025-10-05 06:54:08,974 - root - INFO - step: 13580 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:54:08,975 - root - INFO - lr: 3.8780e-05 gnorm: 1.11 [ 8:19:59<16:12:44] +[titan] 2025-10-05 06:54:19,877 - root - INFO - step: 13585 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0437 +[titan] 2025-10-05 06:54:19,877 - root - INFO - lr: 3.8772e-05 gnorm: 1.15 [ 8:20:10<16:12:32] +[titan] 2025-10-05 06:54:30,750 - root - INFO - step: 13590 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0181 +[titan] 2025-10-05 06:54:30,750 - root - INFO - lr: 3.8764e-05 gnorm: 1.09 [ 8:20:21<16:12:21] +[titan] 2025-10-05 06:54:41,615 - root - INFO - step: 13595 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0386 +[titan] 2025-10-05 06:54:41,615 - root - INFO - lr: 3.8757e-05 gnorm: 1.12 [ 8:20:32<16:12:10] +[titan] 2025-10-05 06:54:50,323 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:54:52,501 - root - INFO - step: 13600 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:54:52,501 - root - INFO - lr: 3.8749e-05 gnorm: 1.12 [ 8:20:43<16:11:58] +[titan] 2025-10-05 06:55:03,350 - root - INFO - step: 13605 loss: 2.2279 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 06:55:03,350 - root - INFO - lr: 3.8741e-05 gnorm: 1.09 [ 8:20:53<16:11:47] +[titan] 2025-10-05 06:55:14,228 - root - INFO - step: 13610 loss: 2.3259 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0547 +[titan] 2025-10-05 06:55:14,228 - root - INFO - lr: 3.8734e-05 gnorm: 1.14 [ 8:21:04<16:11:36] +[titan] 2025-10-05 06:55:25,123 - root - INFO - step: 13615 loss: 2.2661 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0007 +[titan] 2025-10-05 06:55:25,123 - root - INFO - lr: 3.8726e-05 gnorm: 1.11 [ 8:21:15<16:11:24] +[titan] 2025-10-05 06:55:35,976 - root - INFO - step: 13620 loss: 2.3686 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0920 +[titan] 2025-10-05 06:55:35,976 - root - INFO - lr: 3.8718e-05 gnorm: 1.15 [ 8:21:26<16:11:13] +[titan] 2025-10-05 06:55:46,835 - root - INFO - step: 13625 loss: 2.2851 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0187 +[titan] 2025-10-05 06:55:46,835 - root - INFO - lr: 3.8710e-05 gnorm: 1.07 [ 8:21:37<16:11:01] +[titan] 2025-10-05 06:55:57,740 - root - INFO - step: 13630 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0176 +[titan] 2025-10-05 06:55:57,740 - root - INFO - lr: 3.8703e-05 gnorm: 1.08 [ 8:21:48<16:10:50] +[titan] 2025-10-05 06:56:08,602 - root - INFO - step: 13635 loss: 2.3123 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 06:56:08,602 - root - INFO - lr: 3.8695e-05 gnorm: 1.12 [ 8:21:59<16:10:39] +[titan] 2025-10-05 06:56:19,485 - root - INFO - step: 13640 loss: 2.2360 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 06:56:19,486 - root - INFO - lr: 3.8687e-05 gnorm: 1.08 [ 8:22:10<16:10:27] +[titan] 2025-10-05 06:56:30,339 - root - INFO - step: 13645 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0101 +[titan] 2025-10-05 06:56:30,339 - root - INFO - lr: 3.8679e-05 gnorm: 1.20 [ 8:22:20<16:10:16] +[titan] 2025-10-05 06:56:39,025 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:56:41,218 - root - INFO - step: 13650 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0411 +[titan] 2025-10-05 06:56:41,218 - root - INFO - lr: 3.8672e-05 gnorm: 1.10 [ 8:22:31<16:10:05] +[titan] 2025-10-05 06:56:52,067 - root - INFO - step: 13655 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 06:56:52,068 - root - INFO - lr: 3.8664e-05 gnorm: 1.09 [ 8:22:42<16:09:53] +[titan] 2025-10-05 06:57:02,942 - root - INFO - step: 13660 loss: 2.3364 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0632 +[titan] 2025-10-05 06:57:02,942 - root - INFO - lr: 3.8656e-05 gnorm: 1.13 [ 8:22:53<16:09:42] +[titan] 2025-10-05 06:57:13,852 - root - INFO - step: 13665 loss: 2.2401 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 06:57:13,852 - root - INFO - lr: 3.8648e-05 gnorm: 1.09 [ 8:23:04<16:09:30] +[titan] 2025-10-05 06:57:24,731 - root - INFO - step: 13670 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9808 +[titan] 2025-10-05 06:57:24,731 - root - INFO - lr: 3.8641e-05 gnorm: 1.12 [ 8:23:15<16:09:19] +[titan] 2025-10-05 06:57:35,601 - root - INFO - step: 13675 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 06:57:35,601 - root - INFO - lr: 3.8633e-05 gnorm: 1.12 [ 8:23:26<16:09:08] +[titan] 2025-10-05 06:57:46,492 - root - INFO - step: 13680 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9915 +[titan] 2025-10-05 06:57:46,493 - root - INFO - lr: 3.8625e-05 gnorm: 1.09 [ 8:23:37<16:08:56] +[titan] 2025-10-05 06:57:57,361 - root - INFO - step: 13685 loss: 2.2907 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:57:57,361 - root - INFO - lr: 3.8617e-05 gnorm: 1.05 [ 8:23:47<16:08:45] +[titan] 2025-10-05 06:58:08,244 - root - INFO - step: 13690 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 06:58:08,244 - root - INFO - lr: 3.8610e-05 gnorm: 1.12 [ 8:23:58<16:08:34] +[titan] 2025-10-05 06:58:19,163 - root - INFO - step: 13695 loss: 2.2749 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0089 +[titan] 2025-10-05 06:58:19,164 - root - INFO - lr: 3.8602e-05 gnorm: 1.09 [ 8:24:09<16:08:22] +[titan] 2025-10-05 06:58:27,860 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:58:30,056 - root - INFO - step: 13700 loss: 2.3146 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 06:58:30,056 - root - INFO - lr: 3.8594e-05 gnorm: 1.10 [ 8:24:20<16:08:11] +[titan] 2025-10-05 06:58:40,938 - root - INFO - step: 13705 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 06:58:40,938 - root - INFO - lr: 3.8586e-05 gnorm: 1.07 [ 8:24:31<16:08:00] +[titan] 2025-10-05 06:58:51,816 - root - INFO - step: 13710 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 06:58:51,816 - root - INFO - lr: 3.8578e-05 gnorm: 1.10 [ 8:24:42<16:07:48] +[titan] 2025-10-05 06:59:02,700 - root - INFO - step: 13715 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 06:59:02,700 - root - INFO - lr: 3.8571e-05 gnorm: 1.12 [ 8:24:53<16:07:37] +[titan] 2025-10-05 06:59:13,554 - root - INFO - step: 13720 loss: 2.3118 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:59:13,554 - root - INFO - lr: 3.8563e-05 gnorm: 1.14 [ 8:25:04<16:07:26] +[titan] 2025-10-05 06:59:24,420 - root - INFO - step: 13725 loss: 2.2285 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9687 +[titan] 2025-10-05 06:59:24,420 - root - INFO - lr: 3.8555e-05 gnorm: 1.11 [ 8:25:14<16:07:14] +[titan] 2025-10-05 06:59:35,307 - root - INFO - step: 13730 loss: 2.2243 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 06:59:35,307 - root - INFO - lr: 3.8547e-05 gnorm: 1.10 [ 8:25:25<16:07:03] +[titan] 2025-10-05 06:59:46,179 - root - INFO - step: 13735 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 06:59:46,179 - root - INFO - lr: 3.8540e-05 gnorm: 1.08 [ 8:25:36<16:06:51] +[titan] 2025-10-05 06:59:57,061 - root - INFO - step: 13740 loss: 2.2450 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9828 +[titan] 2025-10-05 06:59:57,061 - root - INFO - lr: 3.8532e-05 gnorm: 1.15 [ 8:25:47<16:06:40] +[titan] 2025-10-05 07:00:07,935 - root - INFO - step: 13745 loss: 2.3278 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:00:07,935 - root - INFO - lr: 3.8524e-05 gnorm: 1.10 [ 8:25:58<16:06:29] +[titan] 2025-10-05 07:00:16,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:00:18,832 - root - INFO - step: 13750 loss: 2.3084 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 07:00:18,833 - root - INFO - lr: 3.8516e-05 gnorm: 1.10 [ 8:26:09<16:06:17] +[titan] 2025-10-05 07:00:29,706 - root - INFO - step: 13755 loss: 2.3204 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0493 +[titan] 2025-10-05 07:00:29,706 - root - INFO - lr: 3.8509e-05 gnorm: 1.11 [ 8:26:20<16:06:06] +[titan] 2025-10-05 07:00:40,608 - root - INFO - step: 13760 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0305 +[titan] 2025-10-05 07:00:40,608 - root - INFO - lr: 3.8501e-05 gnorm: 1.15 [ 8:26:31<16:05:55] +[titan] 2025-10-05 07:00:51,487 - root - INFO - step: 13765 loss: 2.2771 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 07:00:51,487 - root - INFO - lr: 3.8493e-05 gnorm: 1.08 [ 8:26:42<16:05:43] +[titan] 2025-10-05 07:01:02,367 - root - INFO - step: 13770 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0107 +[titan] 2025-10-05 07:01:02,367 - root - INFO - lr: 3.8485e-05 gnorm: 1.52 [ 8:26:52<16:05:32] +[titan] 2025-10-05 07:01:13,257 - root - INFO - step: 13775 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0460 +[titan] 2025-10-05 07:01:13,257 - root - INFO - lr: 3.8477e-05 gnorm: 1.11 [ 8:27:03<16:05:21] +[titan] 2025-10-05 07:01:24,150 - root - INFO - step: 13780 loss: 2.3133 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 07:01:24,150 - root - INFO - lr: 3.8470e-05 gnorm: 1.05 [ 8:27:14<16:05:09] +[titan] 2025-10-05 07:01:35,054 - root - INFO - step: 13785 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9600 +[titan] 2025-10-05 07:01:35,054 - root - INFO - lr: 3.8462e-05 gnorm: 1.10 [ 8:27:25<16:04:58] +[titan] 2025-10-05 07:01:45,974 - root - INFO - step: 13790 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0014 +[titan] 2025-10-05 07:01:45,974 - root - INFO - lr: 3.8454e-05 gnorm: 1.09 [ 8:27:36<16:04:47] +[titan] 2025-10-05 07:01:56,865 - root - INFO - step: 13795 loss: 2.2879 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:01:56,865 - root - INFO - lr: 3.8446e-05 gnorm: 1.08 [ 8:27:47<16:04:35] +[titan] 2025-10-05 07:02:05,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:02:07,773 - root - INFO - step: 13800 loss: 2.2846 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0177 +[titan] 2025-10-05 07:02:07,773 - root - INFO - lr: 3.8438e-05 gnorm: 1.09 [ 8:27:58<16:04:24] +[titan] 2025-10-05 07:02:18,700 - root - INFO - step: 13805 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 07:02:18,700 - root - INFO - lr: 3.8431e-05 gnorm: 1.09 [ 8:28:09<16:04:13] +[titan] 2025-10-05 07:02:29,593 - root - INFO - step: 13810 loss: 2.2868 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 07:02:29,593 - root - INFO - lr: 3.8423e-05 gnorm: 1.08 [ 8:28:20<16:04:02] +[titan] 2025-10-05 07:02:40,489 - root - INFO - step: 13815 loss: 2.3125 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 07:02:40,489 - root - INFO - lr: 3.8415e-05 gnorm: 1.08 [ 8:28:31<16:03:50] +[titan] 2025-10-05 07:02:51,396 - root - INFO - step: 13820 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1047 +[titan] 2025-10-05 07:02:51,396 - root - INFO - lr: 3.8407e-05 gnorm: 1.13 [ 8:28:41<16:03:39] +[titan] 2025-10-05 07:03:00,403 - root - INFO - Dumping profiler traces at step 13824 +[titan] 2025-10-05 07:03:00,440 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:03:02,625 - root - INFO - step: 13825 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 29,181 tflops: 404.84 mfu: 40.93% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:03:02,626 - root - INFO - lr: 3.8399e-05 gnorm: 1.09 [ 8:28:53<16:03:28] +[titan] 2025-10-05 07:03:13,525 - root - INFO - step: 13830 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0510 +[titan] 2025-10-05 07:03:13,526 - root - INFO - lr: 3.8392e-05 gnorm: 1.08 [ 8:29:04<16:03:17] +[titan] 2025-10-05 07:03:24,466 - root - INFO - step: 13835 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:03:24,466 - root - INFO - lr: 3.8384e-05 gnorm: 1.07 [ 8:29:14<16:03:06] +[titan] 2025-10-05 07:03:35,347 - root - INFO - step: 13840 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:03:35,347 - root - INFO - lr: 3.8376e-05 gnorm: 1.09 [ 8:29:25<16:02:54] +[titan] 2025-10-05 07:03:46,225 - root - INFO - step: 13845 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 07:03:46,225 - root - INFO - lr: 3.8368e-05 gnorm: 1.11 [ 8:29:36<16:02:43] +[titan] 2025-10-05 07:03:54,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:03:57,111 - root - INFO - step: 13850 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0113 +[titan] 2025-10-05 07:03:57,111 - root - INFO - lr: 3.8360e-05 gnorm: 1.11 [ 8:29:47<16:02:32] +[titan] 2025-10-05 07:04:08,025 - root - INFO - step: 13855 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0769 +[titan] 2025-10-05 07:04:08,025 - root - INFO - lr: 3.8353e-05 gnorm: 1.11 [ 8:29:58<16:02:20] +[titan] 2025-10-05 07:04:18,937 - root - INFO - step: 13860 loss: 2.2484 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9860 +[titan] 2025-10-05 07:04:18,937 - root - INFO - lr: 3.8345e-05 gnorm: 1.13 [ 8:30:09<16:02:09] +[titan] 2025-10-05 07:04:29,819 - root - INFO - step: 13865 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9412 +[titan] 2025-10-05 07:04:29,820 - root - INFO - lr: 3.8337e-05 gnorm: 1.13 [ 8:30:20<16:01:58] +[titan] 2025-10-05 07:04:40,706 - root - INFO - step: 13870 loss: 2.1522 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 07:04:40,706 - root - INFO - lr: 3.8329e-05 gnorm: 1.10 [ 8:30:31<16:01:46] +[titan] 2025-10-05 07:04:51,600 - root - INFO - step: 13875 loss: 2.2926 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:04:51,601 - root - INFO - lr: 3.8321e-05 gnorm: 1.13 [ 8:30:42<16:01:35] +[titan] 2025-10-05 07:05:02,483 - root - INFO - step: 13880 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 07:05:02,484 - root - INFO - lr: 3.8313e-05 gnorm: 1.05 [ 8:30:53<16:01:24] +[titan] 2025-10-05 07:05:13,375 - root - INFO - step: 13885 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 07:05:13,375 - root - INFO - lr: 3.8306e-05 gnorm: 1.09 [ 8:31:03<16:01:12] +[titan] 2025-10-05 07:05:24,346 - root - INFO - step: 13890 loss: 2.3386 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 07:05:24,346 - root - INFO - lr: 3.8298e-05 gnorm: 1.09 [ 8:31:14<16:01:01] +[titan] 2025-10-05 07:05:35,221 - root - INFO - step: 13895 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 07:05:35,221 - root - INFO - lr: 3.8290e-05 gnorm: 1.10 [ 8:31:25<16:00:50] +[titan] 2025-10-05 07:05:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:05:46,101 - root - INFO - step: 13900 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 07:05:46,101 - root - INFO - lr: 3.8282e-05 gnorm: 1.11 [ 8:31:36<16:00:38] +[titan] 2025-10-05 07:05:56,991 - root - INFO - step: 13905 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 07:05:56,991 - root - INFO - lr: 3.8274e-05 gnorm: 1.09 [ 8:31:47<16:00:27] +[titan] 2025-10-05 07:06:07,860 - root - INFO - step: 13910 loss: 2.2822 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0150 +[titan] 2025-10-05 07:06:07,861 - root - INFO - lr: 3.8266e-05 gnorm: 1.06 [ 8:31:58<16:00:16] +[titan] 2025-10-05 07:06:18,755 - root - INFO - step: 13915 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0626 +[titan] 2025-10-05 07:06:18,755 - root - INFO - lr: 3.8259e-05 gnorm: 1.12 [ 8:32:09<16:00:04] +[titan] 2025-10-05 07:06:29,694 - root - INFO - step: 13920 loss: 2.3240 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0527 +[titan] 2025-10-05 07:06:29,694 - root - INFO - lr: 3.8251e-05 gnorm: 1.13 [ 8:32:20<15:59:53] +[titan] 2025-10-05 07:06:40,578 - root - INFO - step: 13925 loss: 2.2091 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9511 +[titan] 2025-10-05 07:06:40,578 - root - INFO - lr: 3.8243e-05 gnorm: 1.13 [ 8:32:31<15:59:42] +[titan] 2025-10-05 07:06:51,433 - root - INFO - step: 13930 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:06:51,434 - root - INFO - lr: 3.8235e-05 gnorm: 1.12 [ 8:32:41<15:59:30] +[titan] 2025-10-05 07:07:02,325 - root - INFO - step: 13935 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0277 +[titan] 2025-10-05 07:07:02,326 - root - INFO - lr: 3.8227e-05 gnorm: 1.12 [ 8:32:52<15:59:19] +[titan] 2025-10-05 07:07:13,223 - root - INFO - step: 13940 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 07:07:13,223 - root - INFO - lr: 3.8219e-05 gnorm: 1.05 [ 8:33:03<15:59:08] +[titan] 2025-10-05 07:07:24,144 - root - INFO - step: 13945 loss: 2.2627 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 07:07:24,144 - root - INFO - lr: 3.8212e-05 gnorm: 1.07 [ 8:33:14<15:58:57] +[titan] 2025-10-05 07:07:32,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:07:35,060 - root - INFO - step: 13950 loss: 2.3247 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0535 +[titan] 2025-10-05 07:07:35,061 - root - INFO - lr: 3.8204e-05 gnorm: 1.15 [ 8:33:25<15:58:45] +[titan] 2025-10-05 07:07:45,949 - root - INFO - step: 13955 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 1.9994 +[titan] 2025-10-05 07:07:45,949 - root - INFO - lr: 3.8196e-05 gnorm: 1.12 [ 8:33:36<15:58:34] +[titan] 2025-10-05 07:07:56,827 - root - INFO - step: 13960 loss: 2.2073 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 07:07:56,827 - root - INFO - lr: 3.8188e-05 gnorm: 1.13 [ 8:33:47<15:58:23] +[titan] 2025-10-05 07:08:07,719 - root - INFO - step: 13965 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 07:08:07,720 - root - INFO - lr: 3.8180e-05 gnorm: 1.05 [ 8:33:58<15:58:11] +[titan] 2025-10-05 07:08:18,609 - root - INFO - step: 13970 loss: 2.3210 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0494 +[titan] 2025-10-05 07:08:18,609 - root - INFO - lr: 3.8172e-05 gnorm: 1.11 [ 8:34:09<15:58:00] +[titan] 2025-10-05 07:08:29,526 - root - INFO - step: 13975 loss: 2.3414 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0678 +[titan] 2025-10-05 07:08:29,526 - root - INFO - lr: 3.8164e-05 gnorm: 1.06 [ 8:34:20<15:57:49] +[titan] 2025-10-05 07:08:40,409 - root - INFO - step: 13980 loss: 2.2904 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0234 +[titan] 2025-10-05 07:08:40,409 - root - INFO - lr: 3.8157e-05 gnorm: 1.10 [ 8:34:30<15:57:37] +[titan] 2025-10-05 07:08:51,305 - root - INFO - step: 13985 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9806 +[titan] 2025-10-05 07:08:51,305 - root - INFO - lr: 3.8149e-05 gnorm: 1.08 [ 8:34:41<15:57:26] +[titan] 2025-10-05 07:09:02,176 - root - INFO - step: 13990 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:09:02,177 - root - INFO - lr: 3.8141e-05 gnorm: 1.06 [ 8:34:52<15:57:15] +[titan] 2025-10-05 07:09:13,061 - root - INFO - step: 13995 loss: 2.2816 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0139 +[titan] 2025-10-05 07:09:13,062 - root - INFO - lr: 3.8133e-05 gnorm: 1.14 [ 8:35:03<15:57:03] +[titan] 2025-10-05 07:09:21,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:09:23,969 - root - INFO - step: 14000 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 07:09:23,969 - root - INFO - lr: 3.8125e-05 gnorm: 1.09 [ 8:35:14<15:56:52] +[titan] 2025-10-05 07:09:34,866 - root - INFO - step: 14005 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0102 +[titan] 2025-10-05 07:09:34,866 - root - INFO - lr: 3.8117e-05 gnorm: 1.06 [ 8:35:25<15:56:41] +[titan] 2025-10-05 07:09:45,752 - root - INFO - step: 14010 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0239 +[titan] 2025-10-05 07:09:45,752 - root - INFO - lr: 3.8109e-05 gnorm: 1.14 [ 8:35:36<15:56:29] +[titan] 2025-10-05 07:09:56,681 - root - INFO - step: 14015 loss: 2.2388 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9782 +[titan] 2025-10-05 07:09:56,681 - root - INFO - lr: 3.8101e-05 gnorm: 1.10 [ 8:35:47<15:56:18] +[titan] 2025-10-05 07:10:07,561 - root - INFO - step: 14020 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0100 +[titan] 2025-10-05 07:10:07,561 - root - INFO - lr: 3.8094e-05 gnorm: 1.10 [ 8:35:58<15:56:07] +[titan] 2025-10-05 07:10:18,446 - root - INFO - step: 14025 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0031 +[titan] 2025-10-05 07:10:18,446 - root - INFO - lr: 3.8086e-05 gnorm: 1.06 [ 8:36:08<15:55:56] +[titan] 2025-10-05 07:10:29,418 - root - INFO - step: 14030 loss: 2.3296 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 07:10:29,419 - root - INFO - lr: 3.8078e-05 gnorm: 1.11 [ 8:36:19<15:55:44] +[titan] 2025-10-05 07:10:40,286 - root - INFO - step: 14035 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0286 +[titan] 2025-10-05 07:10:40,286 - root - INFO - lr: 3.8070e-05 gnorm: 1.08 [ 8:36:30<15:55:33] +[titan] 2025-10-05 07:10:51,186 - root - INFO - step: 14040 loss: 2.3219 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 07:10:51,186 - root - INFO - lr: 3.8062e-05 gnorm: 1.08 [ 8:36:41<15:55:22] +[titan] 2025-10-05 07:11:02,100 - root - INFO - step: 14045 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:11:02,100 - root - INFO - lr: 3.8054e-05 gnorm: 1.03 [ 8:36:52<15:55:10] +[titan] 2025-10-05 07:11:10,810 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:11:13,002 - root - INFO - step: 14050 loss: 2.2598 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:13,002 - root - INFO - lr: 3.8046e-05 gnorm: 1.08 [ 8:37:03<15:54:59] +[titan] 2025-10-05 07:11:23,889 - root - INFO - step: 14055 loss: 2.2829 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0164 +[titan] 2025-10-05 07:11:23,889 - root - INFO - lr: 3.8038e-05 gnorm: 1.06 [ 8:37:14<15:54:48] +[titan] 2025-10-05 07:11:34,797 - root - INFO - step: 14060 loss: 2.2612 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:34,797 - root - INFO - lr: 3.8031e-05 gnorm: 1.08 [ 8:37:25<15:54:37] +[titan] 2025-10-05 07:11:45,686 - root - INFO - step: 14065 loss: 2.2504 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:11:45,686 - root - INFO - lr: 3.8023e-05 gnorm: 1.10 [ 8:37:36<15:54:25] +[titan] 2025-10-05 07:11:56,588 - root - INFO - step: 14070 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 07:11:56,588 - root - INFO - lr: 3.8015e-05 gnorm: 1.10 [ 8:37:47<15:54:14] +[titan] 2025-10-05 07:12:07,484 - root - INFO - step: 14075 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:12:07,484 - root - INFO - lr: 3.8007e-05 gnorm: 1.05 [ 8:37:57<15:54:03] +[titan] 2025-10-05 07:12:18,379 - root - INFO - step: 14080 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 07:12:18,379 - root - INFO - lr: 3.7999e-05 gnorm: 1.09 [ 8:38:08<15:53:51] +[titan] 2025-10-05 07:12:29,281 - root - INFO - step: 14085 loss: 2.2541 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 07:12:29,281 - root - INFO - lr: 3.7991e-05 gnorm: 1.11 [ 8:38:19<15:53:40] +[titan] 2025-10-05 07:12:40,158 - root - INFO - step: 14090 loss: 2.2892 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0223 +[titan] 2025-10-05 07:12:40,159 - root - INFO - lr: 3.7983e-05 gnorm: 1.07 [ 8:38:30<15:53:29] +[titan] 2025-10-05 07:12:51,038 - root - INFO - step: 14095 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0060 +[titan] 2025-10-05 07:12:51,038 - root - INFO - lr: 3.7975e-05 gnorm: 1.08 [ 8:38:41<15:53:17] +[titan] 2025-10-05 07:12:59,737 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:13:01,923 - root - INFO - step: 14100 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 07:13:01,924 - root - INFO - lr: 3.7967e-05 gnorm: 1.10 [ 8:38:52<15:53:06] +[titan] 2025-10-05 07:13:12,819 - root - INFO - step: 14105 loss: 2.2680 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0026 +[titan] 2025-10-05 07:13:12,819 - root - INFO - lr: 3.7959e-05 gnorm: 1.10 [ 8:39:03<15:52:55] +[titan] 2025-10-05 07:13:23,712 - root - INFO - step: 14110 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:13:23,712 - root - INFO - lr: 3.7952e-05 gnorm: 1.06 [ 8:39:14<15:52:43] +[titan] 2025-10-05 07:13:34,613 - root - INFO - step: 14115 loss: 2.3226 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0504 +[titan] 2025-10-05 07:13:34,613 - root - INFO - lr: 3.7944e-05 gnorm: 1.17 [ 8:39:25<15:52:32] +[titan] 2025-10-05 07:13:45,510 - root - INFO - step: 14120 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 07:13:45,510 - root - INFO - lr: 3.7936e-05 gnorm: 1.12 [ 8:39:36<15:52:21] +[titan] 2025-10-05 07:13:56,397 - root - INFO - step: 14125 loss: 2.2697 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0052 +[titan] 2025-10-05 07:13:56,397 - root - INFO - lr: 3.7928e-05 gnorm: 1.11 [ 8:39:46<15:52:09] +[titan] 2025-10-05 07:14:07,282 - root - INFO - step: 14130 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 07:14:07,282 - root - INFO - lr: 3.7920e-05 gnorm: 1.09 [ 8:39:57<15:51:58] +[titan] 2025-10-05 07:14:18,161 - root - INFO - step: 14135 loss: 2.2782 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 07:14:18,162 - root - INFO - lr: 3.7912e-05 gnorm: 1.13 [ 8:40:08<15:51:47] +[titan] 2025-10-05 07:14:29,064 - root - INFO - step: 14140 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 07:14:29,064 - root - INFO - lr: 3.7904e-05 gnorm: 1.12 [ 8:40:19<15:51:36] +[titan] 2025-10-05 07:14:39,953 - root - INFO - step: 14145 loss: 2.2613 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9977 +[titan] 2025-10-05 07:14:39,953 - root - INFO - lr: 3.7896e-05 gnorm: 1.07 [ 8:40:30<15:51:24] +[titan] 2025-10-05 07:14:48,667 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:14:50,851 - root - INFO - step: 14150 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 07:14:50,851 - root - INFO - lr: 3.7888e-05 gnorm: 1.09 [ 8:40:41<15:51:13] +[titan] 2025-10-05 07:15:01,723 - root - INFO - step: 14155 loss: 2.3499 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0768 +[titan] 2025-10-05 07:15:01,723 - root - INFO - lr: 3.7880e-05 gnorm: 1.07 [ 8:40:52<15:51:02] +[titan] 2025-10-05 07:15:12,596 - root - INFO - step: 14160 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 07:15:12,597 - root - INFO - lr: 3.7872e-05 gnorm: 1.07 [ 8:41:03<15:50:50] +[titan] 2025-10-05 07:15:23,478 - root - INFO - step: 14165 loss: 2.2806 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 07:15:23,478 - root - INFO - lr: 3.7865e-05 gnorm: 1.09 [ 8:41:13<15:50:39] +[titan] 2025-10-05 07:15:34,374 - root - INFO - step: 14170 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:15:34,374 - root - INFO - lr: 3.7857e-05 gnorm: 1.08 [ 8:41:24<15:50:28] +[titan] 2025-10-05 07:15:45,286 - root - INFO - step: 14175 loss: 2.2571 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9933 +[titan] 2025-10-05 07:15:45,287 - root - INFO - lr: 3.7849e-05 gnorm: 1.11 [ 8:41:35<15:50:16] +[titan] 2025-10-05 07:15:56,187 - root - INFO - step: 14180 loss: 2.3045 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 07:15:56,187 - root - INFO - lr: 3.7841e-05 gnorm: 1.13 [ 8:41:46<15:50:05] +[titan] 2025-10-05 07:16:07,077 - root - INFO - step: 14185 loss: 2.2313 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9715 +[titan] 2025-10-05 07:16:07,077 - root - INFO - lr: 3.7833e-05 gnorm: 1.08 [ 8:41:57<15:49:54] +[titan] 2025-10-05 07:16:17,954 - root - INFO - step: 14190 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9824 +[titan] 2025-10-05 07:16:17,954 - root - INFO - lr: 3.7825e-05 gnorm: 1.05 [ 8:42:08<15:49:42] +[titan] 2025-10-05 07:16:28,838 - root - INFO - step: 14195 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 07:16:28,839 - root - INFO - lr: 3.7817e-05 gnorm: 1.10 [ 8:42:19<15:49:31] +[titan] 2025-10-05 07:16:37,518 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:16:39,702 - root - INFO - step: 14200 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9987 +[titan] 2025-10-05 07:16:39,703 - root - INFO - lr: 3.7809e-05 gnorm: 1.10 [ 8:42:30<15:49:20] +[titan] 2025-10-05 07:16:50,596 - root - INFO - step: 14205 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 07:16:50,596 - root - INFO - lr: 3.7801e-05 gnorm: 1.05 [ 8:42:41<15:49:08] +[titan] 2025-10-05 07:17:01,477 - root - INFO - step: 14210 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:17:01,477 - root - INFO - lr: 3.7793e-05 gnorm: 1.09 [ 8:42:51<15:48:57] +[titan] 2025-10-05 07:17:12,357 - root - INFO - step: 14215 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 07:17:12,357 - root - INFO - lr: 3.7785e-05 gnorm: 1.09 [ 8:43:02<15:48:46] +[titan] 2025-10-05 07:17:23,224 - root - INFO - step: 14220 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0228 +[titan] 2025-10-05 07:17:23,224 - root - INFO - lr: 3.7777e-05 gnorm: 1.12 [ 8:43:13<15:48:34] +[titan] 2025-10-05 07:17:34,101 - root - INFO - step: 14225 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9842 +[titan] 2025-10-05 07:17:34,101 - root - INFO - lr: 3.7769e-05 gnorm: 1.11 [ 8:43:24<15:48:23] +[titan] 2025-10-05 07:17:44,966 - root - INFO - step: 14230 loss: 2.2228 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9631 +[titan] 2025-10-05 07:17:44,966 - root - INFO - lr: 3.7761e-05 gnorm: 1.06 [ 8:43:35<15:48:12] +[titan] 2025-10-05 07:17:55,865 - root - INFO - step: 14235 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:17:55,865 - root - INFO - lr: 3.7753e-05 gnorm: 1.15 [ 8:43:46<15:48:00] +[titan] 2025-10-05 07:18:06,742 - root - INFO - step: 14240 loss: 2.2274 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9683 +[titan] 2025-10-05 07:18:06,742 - root - INFO - lr: 3.7746e-05 gnorm: 1.07 [ 8:43:57<15:47:49] +[titan] 2025-10-05 07:18:17,634 - root - INFO - step: 14245 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0317 +[titan] 2025-10-05 07:18:17,634 - root - INFO - lr: 3.7738e-05 gnorm: 1.09 [ 8:44:08<15:47:38] +[titan] 2025-10-05 07:18:26,321 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:18:28,505 - root - INFO - step: 14250 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:18:28,505 - root - INFO - lr: 3.7730e-05 gnorm: 1.10 [ 8:44:18<15:47:26] +[titan] 2025-10-05 07:18:39,411 - root - INFO - step: 14255 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 07:18:39,412 - root - INFO - lr: 3.7722e-05 gnorm: 1.06 [ 8:44:29<15:47:15] +[titan] 2025-10-05 07:18:50,297 - root - INFO - step: 14260 loss: 2.3010 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0327 +[titan] 2025-10-05 07:18:50,297 - root - INFO - lr: 3.7714e-05 gnorm: 1.07 [ 8:44:40<15:47:04] +[titan] 2025-10-05 07:19:01,201 - root - INFO - step: 14265 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0104 +[titan] 2025-10-05 07:19:01,201 - root - INFO - lr: 3.7706e-05 gnorm: 1.10 [ 8:44:51<15:46:53] +[titan] 2025-10-05 07:19:12,083 - root - INFO - step: 14270 loss: 2.2667 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0022 +[titan] 2025-10-05 07:19:12,084 - root - INFO - lr: 3.7698e-05 gnorm: 1.08 [ 8:45:02<15:46:41] +[titan] 2025-10-05 07:19:22,993 - root - INFO - step: 14275 loss: 2.1944 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9385 +[titan] 2025-10-05 07:19:22,993 - root - INFO - lr: 3.7690e-05 gnorm: 1.09 [ 8:45:13<15:46:30] +[titan] 2025-10-05 07:19:33,913 - root - INFO - step: 14280 loss: 2.2467 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9851 +[titan] 2025-10-05 07:19:33,913 - root - INFO - lr: 3.7682e-05 gnorm: 1.07 [ 8:45:24<15:46:19] +[titan] 2025-10-05 07:19:44,768 - root - INFO - step: 14285 loss: 2.2223 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9628 +[titan] 2025-10-05 07:19:44,768 - root - INFO - lr: 3.7674e-05 gnorm: 1.09 [ 8:45:35<15:46:07] +[titan] 2025-10-05 07:19:55,630 - root - INFO - step: 14290 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 07:19:55,630 - root - INFO - lr: 3.7666e-05 gnorm: 1.10 [ 8:45:46<15:45:56] +[titan] 2025-10-05 07:20:06,491 - root - INFO - step: 14295 loss: 2.2948 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 07:20:06,491 - root - INFO - lr: 3.7658e-05 gnorm: 1.11 [ 8:45:56<15:45:45] +[titan] 2025-10-05 07:20:15,199 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:20:17,375 - root - INFO - step: 14300 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 07:20:17,376 - root - INFO - lr: 3.7650e-05 gnorm: 1.15 [ 8:46:07<15:45:33] +[titan] 2025-10-05 07:20:28,246 - root - INFO - step: 14305 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0609 +[titan] 2025-10-05 07:20:28,246 - root - INFO - lr: 3.7642e-05 gnorm: 1.12 [ 8:46:18<15:45:22] +[titan] 2025-10-05 07:20:39,160 - root - INFO - step: 14310 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0267 +[titan] 2025-10-05 07:20:39,160 - root - INFO - lr: 3.7634e-05 gnorm: 1.14 [ 8:46:29<15:45:11] +[titan] 2025-10-05 07:20:50,006 - root - INFO - step: 14315 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 07:20:50,006 - root - INFO - lr: 3.7626e-05 gnorm: 1.07 [ 8:46:40<15:44:59] +[titan] 2025-10-05 07:21:00,866 - root - INFO - step: 14320 loss: 2.2698 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 07:21:00,866 - root - INFO - lr: 3.7618e-05 gnorm: 1.09 [ 8:46:51<15:44:48] +[titan] 2025-10-05 07:21:11,703 - root - INFO - step: 14325 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:21:11,703 - root - INFO - lr: 3.7610e-05 gnorm: 1.09 [ 8:47:02<15:44:37] +[titan] 2025-10-05 07:21:22,593 - root - INFO - step: 14330 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0330 +[titan] 2025-10-05 07:21:22,594 - root - INFO - lr: 3.7602e-05 gnorm: 1.14 [ 8:47:13<15:44:25] +[titan] 2025-10-05 07:21:33,559 - root - INFO - step: 14335 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 07:21:33,559 - root - INFO - lr: 3.7594e-05 gnorm: 1.09 [ 8:47:24<15:44:14] +[titan] 2025-10-05 07:21:35,949 - root - INFO - Dumping profiler traces at step 14336 +[titan] 2025-10-05 07:21:35,987 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:21:44,698 - root - INFO - step: 14340 loss: 2.3096 memory: 118.84GiB(85.28%) tps: 29,418 tflops: 408.13 mfu: 41.27% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 07:21:44,699 - root - INFO - lr: 3.7586e-05 gnorm: 1.13 [ 8:47:35<15:44:03] +[titan] 2025-10-05 07:21:55,565 - root - INFO - step: 14345 loss: 2.3329 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 07:21:55,565 - root - INFO - lr: 3.7578e-05 gnorm: 1.11 [ 8:47:46<15:43:52] +[titan] 2025-10-05 07:22:04,240 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:22:06,418 - root - INFO - step: 14350 loss: 2.2380 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9769 +[titan] 2025-10-05 07:22:06,419 - root - INFO - lr: 3.7570e-05 gnorm: 1.07 [ 8:47:56<15:43:41] +[titan] 2025-10-05 07:22:17,273 - root - INFO - step: 14355 loss: 2.2325 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 07:22:17,273 - root - INFO - lr: 3.7562e-05 gnorm: 1.12 [ 8:48:07<15:43:29] +[titan] 2025-10-05 07:22:28,142 - root - INFO - step: 14360 loss: 2.3425 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0700 +[titan] 2025-10-05 07:22:28,143 - root - INFO - lr: 3.7554e-05 gnorm: 1.12 [ 8:48:18<15:43:18] +[titan] 2025-10-05 07:22:39,138 - root - INFO - step: 14365 loss: 2.2707 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.44 mfu: 41.80% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:22:39,139 - root - INFO - lr: 3.7546e-05 gnorm: 1.08 [ 8:48:29<15:43:07] +[titan] 2025-10-05 07:22:50,009 - root - INFO - step: 14370 loss: 2.2987 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 07:22:50,009 - root - INFO - lr: 3.7538e-05 gnorm: 1.13 [ 8:48:40<15:42:55] +[titan] 2025-10-05 07:23:00,863 - root - INFO - step: 14375 loss: 2.2114 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 07:23:00,864 - root - INFO - lr: 3.7530e-05 gnorm: 1.09 [ 8:48:51<15:42:44] +[titan] 2025-10-05 07:23:11,714 - root - INFO - step: 14380 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:23:11,714 - root - INFO - lr: 3.7522e-05 gnorm: 1.11 [ 8:49:02<15:42:33] +[titan] 2025-10-05 07:23:22,597 - root - INFO - step: 14385 loss: 2.3245 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0537 +[titan] 2025-10-05 07:23:22,597 - root - INFO - lr: 3.7514e-05 gnorm: 1.07 [ 8:49:13<15:42:21] +[titan] 2025-10-05 07:23:33,453 - root - INFO - step: 14390 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:23:33,453 - root - INFO - lr: 3.7506e-05 gnorm: 1.10 [ 8:49:23<15:42:10] +[titan] 2025-10-05 07:23:44,404 - root - INFO - step: 14395 loss: 2.3155 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0444 +[titan] 2025-10-05 07:23:44,404 - root - INFO - lr: 3.7498e-05 gnorm: 1.07 [ 8:49:34<15:41:59] +[titan] 2025-10-05 07:23:53,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:23:55,271 - root - INFO - step: 14400 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:23:55,271 - root - INFO - lr: 3.7490e-05 gnorm: 1.09 [ 8:49:45<15:41:47] +[titan] 2025-10-05 07:24:06,109 - root - INFO - step: 14405 loss: 2.3174 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0466 +[titan] 2025-10-05 07:24:06,109 - root - INFO - lr: 3.7482e-05 gnorm: 1.11 [ 8:49:56<15:41:36] +[titan] 2025-10-05 07:24:16,949 - root - INFO - step: 14410 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0222 +[titan] 2025-10-05 07:24:16,949 - root - INFO - lr: 3.7474e-05 gnorm: 1.22 [ 8:50:07<15:41:25] +[titan] 2025-10-05 07:24:27,813 - root - INFO - step: 14415 loss: 2.2533 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9898 +[titan] 2025-10-05 07:24:27,813 - root - INFO - lr: 3.7466e-05 gnorm: 1.06 [ 8:50:18<15:41:13] +[titan] 2025-10-05 07:24:38,740 - root - INFO - step: 14420 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9292 +[titan] 2025-10-05 07:24:38,740 - root - INFO - lr: 3.7458e-05 gnorm: 1.07 [ 8:50:29<15:41:02] +[titan] 2025-10-05 07:24:49,616 - root - INFO - step: 14425 loss: 2.2439 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 07:24:49,616 - root - INFO - lr: 3.7450e-05 gnorm: 1.09 [ 8:50:40<15:40:51] +[titan] 2025-10-05 07:25:00,495 - root - INFO - step: 14430 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0617 +[titan] 2025-10-05 07:25:00,495 - root - INFO - lr: 3.7442e-05 gnorm: 1.10 [ 8:50:50<15:40:39] +[titan] 2025-10-05 07:25:11,357 - root - INFO - step: 14435 loss: 2.2516 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9889 +[titan] 2025-10-05 07:25:11,357 - root - INFO - lr: 3.7434e-05 gnorm: 1.10 [ 8:51:01<15:40:28] +[titan] 2025-10-05 07:25:22,214 - root - INFO - step: 14440 loss: 2.2632 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:25:22,214 - root - INFO - lr: 3.7426e-05 gnorm: 1.10 [ 8:51:12<15:40:17] +[titan] 2025-10-05 07:25:33,076 - root - INFO - step: 14445 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 07:25:33,076 - root - INFO - lr: 3.7418e-05 gnorm: 1.09 [ 8:51:23<15:40:05] +[titan] 2025-10-05 07:25:41,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:25:44,001 - root - INFO - step: 14450 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 07:25:44,001 - root - INFO - lr: 3.7410e-05 gnorm: 1.07 [ 8:51:34<15:39:54] +[titan] 2025-10-05 07:25:54,893 - root - INFO - step: 14455 loss: 2.2554 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9922 +[titan] 2025-10-05 07:25:54,893 - root - INFO - lr: 3.7402e-05 gnorm: 1.13 [ 8:51:45<15:39:43] +[titan] 2025-10-05 07:26:05,774 - root - INFO - step: 14460 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9572 +[titan] 2025-10-05 07:26:05,775 - root - INFO - lr: 3.7394e-05 gnorm: 1.08 [ 8:51:56<15:39:32] +[titan] 2025-10-05 07:26:16,651 - root - INFO - step: 14465 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 07:26:16,651 - root - INFO - lr: 3.7386e-05 gnorm: 1.11 [ 8:52:07<15:39:20] +[titan] 2025-10-05 07:26:27,521 - root - INFO - step: 14470 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 07:26:27,521 - root - INFO - lr: 3.7378e-05 gnorm: 1.08 [ 8:52:17<15:39:09] +[titan] 2025-10-05 07:26:38,394 - root - INFO - step: 14475 loss: 2.2013 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 07:26:38,395 - root - INFO - lr: 3.7370e-05 gnorm: 1.08 [ 8:52:28<15:38:58] +[titan] 2025-10-05 07:26:49,332 - root - INFO - step: 14480 loss: 2.2812 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0154 +[titan] 2025-10-05 07:26:49,332 - root - INFO - lr: 3.7362e-05 gnorm: 1.12 [ 8:52:39<15:38:46] +[titan] 2025-10-05 07:27:00,212 - root - INFO - step: 14485 loss: 2.2411 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 07:27:00,213 - root - INFO - lr: 3.7354e-05 gnorm: 1.05 [ 8:52:50<15:38:35] +[titan] 2025-10-05 07:27:11,129 - root - INFO - step: 14490 loss: 2.2405 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9788 +[titan] 2025-10-05 07:27:11,129 - root - INFO - lr: 3.7346e-05 gnorm: 1.09 [ 8:53:01<15:38:24] +[titan] 2025-10-05 07:27:22,004 - root - INFO - step: 14495 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 07:27:22,004 - root - INFO - lr: 3.7338e-05 gnorm: 1.09 [ 8:53:12<15:38:13] +[titan] 2025-10-05 07:27:30,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:27:32,894 - root - INFO - step: 14500 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 07:27:32,894 - root - INFO - lr: 3.7330e-05 gnorm: 1.10 [ 8:53:23<15:38:01] +[titan] 2025-10-05 07:27:43,812 - root - INFO - step: 14505 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0409 +[titan] 2025-10-05 07:27:43,812 - root - INFO - lr: 3.7322e-05 gnorm: 1.06 [ 8:53:34<15:37:50] +[titan] 2025-10-05 07:27:54,688 - root - INFO - step: 14510 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9921 +[titan] 2025-10-05 07:27:54,688 - root - INFO - lr: 3.7314e-05 gnorm: 1.08 [ 8:53:45<15:37:39] +[titan] 2025-10-05 07:28:05,542 - root - INFO - step: 14515 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 07:28:05,542 - root - INFO - lr: 3.7306e-05 gnorm: 1.05 [ 8:53:55<15:37:27] +[titan] 2025-10-05 07:28:16,397 - root - INFO - step: 14520 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:28:16,398 - root - INFO - lr: 3.7298e-05 gnorm: 1.08 [ 8:54:06<15:37:16] +[titan] 2025-10-05 07:28:27,301 - root - INFO - step: 14525 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9512 +[titan] 2025-10-05 07:28:27,301 - root - INFO - lr: 3.7290e-05 gnorm: 1.05 [ 8:54:17<15:37:05] +[titan] 2025-10-05 07:28:38,153 - root - INFO - step: 14530 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0325 +[titan] 2025-10-05 07:28:38,153 - root - INFO - lr: 3.7282e-05 gnorm: 1.12 [ 8:54:28<15:36:53] +[titan] 2025-10-05 07:28:49,042 - root - INFO - step: 14535 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 07:28:49,042 - root - INFO - lr: 3.7274e-05 gnorm: 1.10 [ 8:54:39<15:36:42] +[titan] 2025-10-05 07:28:59,909 - root - INFO - step: 14540 loss: 2.2631 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9992 +[titan] 2025-10-05 07:28:59,910 - root - INFO - lr: 3.7266e-05 gnorm: 1.09 [ 8:54:50<15:36:31] +[titan] 2025-10-05 07:29:10,771 - root - INFO - step: 14545 loss: 2.2017 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9458 +[titan] 2025-10-05 07:29:10,771 - root - INFO - lr: 3.7258e-05 gnorm: 1.10 [ 8:55:01<15:36:19] +[titan] 2025-10-05 07:29:19,438 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:29:21,621 - root - INFO - step: 14550 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 07:29:21,621 - root - INFO - lr: 3.7250e-05 gnorm: 1.09 [ 8:55:12<15:36:08] +[titan] 2025-10-05 07:29:32,531 - root - INFO - step: 14555 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0226 +[titan] 2025-10-05 07:29:32,531 - root - INFO - lr: 3.7242e-05 gnorm: 1.14 [ 8:55:22<15:35:57] +[titan] 2025-10-05 07:29:43,472 - root - INFO - step: 14560 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.01% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8975 +[titan] 2025-10-05 07:29:43,472 - root - INFO - lr: 3.7234e-05 gnorm: 1.02 [ 8:55:33<15:35:46] +[titan] 2025-10-05 07:29:54,345 - root - INFO - step: 14565 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 07:29:54,346 - root - INFO - lr: 3.7226e-05 gnorm: 1.13 [ 8:55:44<15:35:34] +[titan] 2025-10-05 07:30:05,208 - root - INFO - step: 14570 loss: 2.3031 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0344 +[titan] 2025-10-05 07:30:05,209 - root - INFO - lr: 3.7218e-05 gnorm: 1.12 [ 8:55:55<15:35:23] +[titan] 2025-10-05 07:30:16,066 - root - INFO - step: 14575 loss: 2.2367 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 07:30:16,066 - root - INFO - lr: 3.7210e-05 gnorm: 1.10 [ 8:56:06<15:35:12] +[titan] 2025-10-05 07:30:26,932 - root - INFO - step: 14580 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 07:30:26,933 - root - INFO - lr: 3.7202e-05 gnorm: 1.07 [ 8:56:17<15:35:00] +[titan] 2025-10-05 07:30:37,811 - root - INFO - step: 14585 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9566 +[titan] 2025-10-05 07:30:37,812 - root - INFO - lr: 3.7194e-05 gnorm: 1.08 [ 8:56:28<15:34:49] +[titan] 2025-10-05 07:30:48,772 - root - INFO - step: 14590 loss: 2.3418 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0695 +[titan] 2025-10-05 07:30:48,772 - root - INFO - lr: 3.7185e-05 gnorm: 1.20 [ 8:56:39<15:34:38] +[titan] 2025-10-05 07:30:59,630 - root - INFO - step: 14595 loss: 2.2116 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 07:30:59,630 - root - INFO - lr: 3.7177e-05 gnorm: 1.09 [ 8:56:50<15:34:26] +[titan] 2025-10-05 07:31:08,301 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:31:10,494 - root - INFO - step: 14600 loss: 2.1772 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 07:31:10,494 - root - INFO - lr: 3.7169e-05 gnorm: 1.08 [ 8:57:00<15:34:15] +[titan] 2025-10-05 07:31:21,365 - root - INFO - step: 14605 loss: 2.3083 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:31:21,365 - root - INFO - lr: 3.7161e-05 gnorm: 1.09 [ 8:57:11<15:34:04] +[titan] 2025-10-05 07:31:32,251 - root - INFO - step: 14610 loss: 2.3039 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 07:31:32,251 - root - INFO - lr: 3.7153e-05 gnorm: 1.15 [ 8:57:22<15:33:53] +[titan] 2025-10-05 07:31:43,124 - root - INFO - step: 14615 loss: 2.2982 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0301 +[titan] 2025-10-05 07:31:43,125 - root - INFO - lr: 3.7145e-05 gnorm: 1.12 [ 8:57:33<15:33:41] +[titan] 2025-10-05 07:31:54,094 - root - INFO - step: 14620 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9332 +[titan] 2025-10-05 07:31:54,094 - root - INFO - lr: 3.7137e-05 gnorm: 1.09 [ 8:57:44<15:33:30] +[titan] 2025-10-05 07:32:04,989 - root - INFO - step: 14625 loss: 2.2391 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:32:04,989 - root - INFO - lr: 3.7129e-05 gnorm: 1.10 [ 8:57:55<15:33:19] +[titan] 2025-10-05 07:32:15,888 - root - INFO - step: 14630 loss: 2.3113 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 07:32:15,888 - root - INFO - lr: 3.7121e-05 gnorm: 1.10 [ 8:58:06<15:33:07] +[titan] 2025-10-05 07:32:26,771 - root - INFO - step: 14635 loss: 2.2726 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:32:26,771 - root - INFO - lr: 3.7113e-05 gnorm: 1.12 [ 8:58:17<15:32:56] +[titan] 2025-10-05 07:32:37,649 - root - INFO - step: 14640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 07:32:37,649 - root - INFO - lr: 3.7105e-05 gnorm: 1.08 [ 8:58:28<15:32:45] +[titan] 2025-10-05 07:32:48,613 - root - INFO - step: 14645 loss: 2.1989 memory: 118.84GiB(85.28%) tps: 29,888 tflops: 414.65 mfu: 41.93% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9422 +[titan] 2025-10-05 07:32:48,613 - root - INFO - lr: 3.7097e-05 gnorm: 1.05 [ 8:58:39<15:32:34] +[titan] 2025-10-05 07:32:57,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:32:59,535 - root - INFO - step: 14650 loss: 2.3040 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0349 +[titan] 2025-10-05 07:32:59,535 - root - INFO - lr: 3.7089e-05 gnorm: 1.06 [ 8:58:49<15:32:23] +[titan] 2025-10-05 07:33:10,438 - root - INFO - step: 14655 loss: 2.2889 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0209 +[titan] 2025-10-05 07:33:10,439 - root - INFO - lr: 3.7081e-05 gnorm: 1.13 [ 8:59:00<15:32:11] +[titan] 2025-10-05 07:33:21,347 - root - INFO - step: 14660 loss: 2.2514 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:33:21,348 - root - INFO - lr: 3.7073e-05 gnorm: 1.12 [ 8:59:11<15:32:00] +[titan] 2025-10-05 07:33:32,227 - root - INFO - step: 14665 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 07:33:32,227 - root - INFO - lr: 3.7064e-05 gnorm: 1.12 [ 8:59:22<15:31:49] +[titan] 2025-10-05 07:33:43,130 - root - INFO - step: 14670 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:33:43,130 - root - INFO - lr: 3.7056e-05 gnorm: 1.13 [ 8:59:33<15:31:37] +[titan] 2025-10-05 07:33:54,090 - root - INFO - step: 14675 loss: 2.2801 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 07:33:54,091 - root - INFO - lr: 3.7048e-05 gnorm: 1.08 [ 8:59:44<15:31:26] +[titan] 2025-10-05 07:34:04,932 - root - INFO - step: 14680 loss: 2.1187 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 07:34:04,932 - root - INFO - lr: 3.7040e-05 gnorm: 1.06 [ 8:59:55<15:31:15] +[titan] 2025-10-05 07:34:15,806 - root - INFO - step: 14685 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9938 +[titan] 2025-10-05 07:34:15,806 - root - INFO - lr: 3.7032e-05 gnorm: 1.10 [ 9:00:06<15:31:04] +[titan] 2025-10-05 07:34:26,671 - root - INFO - step: 14690 loss: 2.2095 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9509 +[titan] 2025-10-05 07:34:26,671 - root - INFO - lr: 3.7024e-05 gnorm: 1.06 [ 9:00:17<15:30:52] +[titan] 2025-10-05 07:34:37,510 - root - INFO - step: 14695 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 07:34:37,510 - root - INFO - lr: 3.7016e-05 gnorm: 1.06 [ 9:00:27<15:30:41] +[titan] 2025-10-05 07:34:46,191 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:34:48,450 - root - INFO - step: 14700 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9405 +[titan] 2025-10-05 07:34:48,450 - root - INFO - lr: 3.7008e-05 gnorm: 1.10 [ 9:00:38<15:30:30] +[titan] 2025-10-05 07:34:59,300 - root - INFO - step: 14705 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0240 +[titan] 2025-10-05 07:34:59,300 - root - INFO - lr: 3.7000e-05 gnorm: 1.14 [ 9:00:49<15:30:18] +[titan] 2025-10-05 07:35:10,167 - root - INFO - step: 14710 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0017 +[titan] 2025-10-05 07:35:10,168 - root - INFO - lr: 3.6992e-05 gnorm: 1.09 [ 9:01:00<15:30:07] +[titan] 2025-10-05 07:35:21,048 - root - INFO - step: 14715 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0027 +[titan] 2025-10-05 07:35:21,048 - root - INFO - lr: 3.6984e-05 gnorm: 1.13 [ 9:01:11<15:29:56] +[titan] 2025-10-05 07:35:31,930 - root - INFO - step: 14720 loss: 2.2273 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9675 +[titan] 2025-10-05 07:35:31,930 - root - INFO - lr: 3.6976e-05 gnorm: 1.08 [ 9:01:22<15:29:44] +[titan] 2025-10-05 07:35:42,810 - root - INFO - step: 14725 loss: 2.3179 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 07:35:42,810 - root - INFO - lr: 3.6967e-05 gnorm: 1.12 [ 9:01:33<15:29:33] +[titan] 2025-10-05 07:35:53,724 - root - INFO - step: 14730 loss: 2.2620 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9989 +[titan] 2025-10-05 07:35:53,724 - root - INFO - lr: 3.6959e-05 gnorm: 1.11 [ 9:01:44<15:29:22] +[titan] 2025-10-05 07:36:04,629 - root - INFO - step: 14735 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 07:36:04,629 - root - INFO - lr: 3.6951e-05 gnorm: 1.06 [ 9:01:55<15:29:11] +[titan] 2025-10-05 07:36:15,522 - root - INFO - step: 14740 loss: 2.2768 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 07:36:15,522 - root - INFO - lr: 3.6943e-05 gnorm: 1.09 [ 9:02:05<15:28:59] +[titan] 2025-10-05 07:36:26,431 - root - INFO - step: 14745 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 07:36:26,431 - root - INFO - lr: 3.6935e-05 gnorm: 1.07 [ 9:02:16<15:28:48] +[titan] 2025-10-05 07:36:35,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:36:37,321 - root - INFO - step: 14750 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 07:36:37,321 - root - INFO - lr: 3.6927e-05 gnorm: 1.10 [ 9:02:27<15:28:37] +[titan] 2025-10-05 07:36:48,227 - root - INFO - step: 14755 loss: 2.2186 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:36:48,227 - root - INFO - lr: 3.6919e-05 gnorm: 1.04 [ 9:02:38<15:28:26] +[titan] 2025-10-05 07:36:59,096 - root - INFO - step: 14760 loss: 2.2696 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0036 +[titan] 2025-10-05 07:36:59,096 - root - INFO - lr: 3.6911e-05 gnorm: 1.08 [ 9:02:49<15:28:14] +[titan] 2025-10-05 07:37:09,945 - root - INFO - step: 14765 loss: 2.2510 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9893 +[titan] 2025-10-05 07:37:09,945 - root - INFO - lr: 3.6903e-05 gnorm: 1.13 [ 9:03:00<15:28:03] +[titan] 2025-10-05 07:37:20,822 - root - INFO - step: 14770 loss: 2.2169 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:37:20,822 - root - INFO - lr: 3.6894e-05 gnorm: 1.08 [ 9:03:11<15:27:52] +[titan] 2025-10-05 07:37:31,692 - root - INFO - step: 14775 loss: 2.2524 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 07:37:31,692 - root - INFO - lr: 3.6886e-05 gnorm: 1.10 [ 9:03:22<15:27:40] +[titan] 2025-10-05 07:37:42,588 - root - INFO - step: 14780 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 07:37:42,588 - root - INFO - lr: 3.6878e-05 gnorm: 1.12 [ 9:03:33<15:27:29] +[titan] 2025-10-05 07:37:53,516 - root - INFO - step: 14785 loss: 2.1691 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9163 +[titan] 2025-10-05 07:37:53,516 - root - INFO - lr: 3.6870e-05 gnorm: 1.06 [ 9:03:43<15:27:18] +[titan] 2025-10-05 07:38:04,385 - root - INFO - step: 14790 loss: 2.1764 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 07:38:04,386 - root - INFO - lr: 3.6862e-05 gnorm: 1.05 [ 9:03:54<15:27:07] +[titan] 2025-10-05 07:38:15,271 - root - INFO - step: 14795 loss: 2.2615 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9988 +[titan] 2025-10-05 07:38:15,271 - root - INFO - lr: 3.6854e-05 gnorm: 1.11 [ 9:04:05<15:26:55] +[titan] 2025-10-05 07:38:23,946 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:38:26,148 - root - INFO - step: 14800 loss: 2.2171 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 07:38:26,148 - root - INFO - lr: 3.6846e-05 gnorm: 1.11 [ 9:04:16<15:26:44] +[titan] 2025-10-05 07:38:37,018 - root - INFO - step: 14805 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0292 +[titan] 2025-10-05 07:38:37,018 - root - INFO - lr: 3.6838e-05 gnorm: 1.12 [ 9:04:27<15:26:33] +[titan] 2025-10-05 07:38:47,933 - root - INFO - step: 14810 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 07:38:47,933 - root - INFO - lr: 3.6830e-05 gnorm: 1.11 [ 9:04:38<15:26:21] +[titan] 2025-10-05 07:38:58,873 - root - INFO - step: 14815 loss: 2.2872 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:38:58,873 - root - INFO - lr: 3.6821e-05 gnorm: 1.08 [ 9:04:49<15:26:10] +[titan] 2025-10-05 07:39:09,749 - root - INFO - step: 14820 loss: 2.2863 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:39:09,749 - root - INFO - lr: 3.6813e-05 gnorm: 1.08 [ 9:05:00<15:25:59] +[titan] 2025-10-05 07:39:20,633 - root - INFO - step: 14825 loss: 2.3248 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 07:39:20,633 - root - INFO - lr: 3.6805e-05 gnorm: 1.06 [ 9:05:11<15:25:48] +[titan] 2025-10-05 07:39:31,524 - root - INFO - step: 14830 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:39:31,525 - root - INFO - lr: 3.6797e-05 gnorm: 1.05 [ 9:05:21<15:25:36] +[titan] 2025-10-05 07:39:42,407 - root - INFO - step: 14835 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0410 +[titan] 2025-10-05 07:39:42,408 - root - INFO - lr: 3.6789e-05 gnorm: 1.09 [ 9:05:32<15:25:25] +[titan] 2025-10-05 07:39:53,316 - root - INFO - step: 14840 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:39:53,316 - root - INFO - lr: 3.6781e-05 gnorm: 1.06 [ 9:05:43<15:25:14] +[titan] 2025-10-05 07:40:04,271 - root - INFO - step: 14845 loss: 2.2304 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.97 mfu: 41.96% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9723 +[titan] 2025-10-05 07:40:04,271 - root - INFO - lr: 3.6773e-05 gnorm: 1.12 [ 9:05:54<15:25:03] +[titan] 2025-10-05 07:40:10,991 - root - INFO - Dumping profiler traces at step 14848 +[titan] 2025-10-05 07:40:11,030 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:40:13,210 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:40:15,391 - root - INFO - step: 14850 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 29,468 tflops: 408.82 mfu: 41.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:40:15,392 - root - INFO - lr: 3.6765e-05 gnorm: 1.08 [ 9:06:05<15:24:52] +[titan] 2025-10-05 07:40:26,262 - root - INFO - step: 14855 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 07:40:26,262 - root - INFO - lr: 3.6756e-05 gnorm: 1.09 [ 9:06:16<15:24:40] +[titan] 2025-10-05 07:40:37,129 - root - INFO - step: 14860 loss: 2.2444 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 07:40:37,129 - root - INFO - lr: 3.6748e-05 gnorm: 1.08 [ 9:06:27<15:24:29] +[titan] 2025-10-05 07:40:47,995 - root - INFO - step: 14865 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0098 +[titan] 2025-10-05 07:40:47,995 - root - INFO - lr: 3.6740e-05 gnorm: 1.10 [ 9:06:38<15:24:18] +[titan] 2025-10-05 07:40:58,905 - root - INFO - step: 14870 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:40:58,905 - root - INFO - lr: 3.6732e-05 gnorm: 1.10 [ 9:06:49<15:24:07] +[titan] 2025-10-05 07:41:09,784 - root - INFO - step: 14875 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 07:41:09,784 - root - INFO - lr: 3.6724e-05 gnorm: 1.10 [ 9:07:00<15:23:55] +[titan] 2025-10-05 07:41:20,683 - root - INFO - step: 14880 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 07:41:20,683 - root - INFO - lr: 3.6716e-05 gnorm: 1.08 [ 9:07:11<15:23:44] +[titan] 2025-10-05 07:41:31,553 - root - INFO - step: 14885 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 07:41:31,553 - root - INFO - lr: 3.6708e-05 gnorm: 1.05 [ 9:07:21<15:23:33] +[titan] 2025-10-05 07:41:42,413 - root - INFO - step: 14890 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 07:41:42,414 - root - INFO - lr: 3.6699e-05 gnorm: 1.15 [ 9:07:32<15:23:21] +[titan] 2025-10-05 07:41:53,308 - root - INFO - step: 14895 loss: 2.2418 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:41:53,308 - root - INFO - lr: 3.6691e-05 gnorm: 1.07 [ 9:07:43<15:23:10] +[titan] 2025-10-05 07:42:01,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:42:04,160 - root - INFO - step: 14900 loss: 2.2908 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0236 +[titan] 2025-10-05 07:42:04,160 - root - INFO - lr: 3.6683e-05 gnorm: 1.13 [ 9:07:54<15:22:59] +[titan] 2025-10-05 07:42:15,031 - root - INFO - step: 14905 loss: 2.3078 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0380 +[titan] 2025-10-05 07:42:15,031 - root - INFO - lr: 3.6675e-05 gnorm: 1.12 [ 9:08:05<15:22:48] +[titan] 2025-10-05 07:42:25,895 - root - INFO - step: 14910 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9881 +[titan] 2025-10-05 07:42:25,895 - root - INFO - lr: 3.6667e-05 gnorm: 1.09 [ 9:08:16<15:22:36] +[titan] 2025-10-05 07:42:36,754 - root - INFO - step: 14915 loss: 2.2480 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 07:42:36,754 - root - INFO - lr: 3.6659e-05 gnorm: 1.09 [ 9:08:27<15:22:25] +[titan] 2025-10-05 07:42:47,621 - root - INFO - step: 14920 loss: 2.4317 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 07:42:47,621 - root - INFO - lr: 3.6651e-05 gnorm: 1.11 [ 9:08:38<15:22:14] +[titan] 2025-10-05 07:42:58,504 - root - INFO - step: 14925 loss: 2.2167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9576 +[titan] 2025-10-05 07:42:58,504 - root - INFO - lr: 3.6642e-05 gnorm: 1.09 [ 9:08:48<15:22:02] +[titan] 2025-10-05 07:43:09,387 - root - INFO - step: 14930 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 07:43:09,388 - root - INFO - lr: 3.6634e-05 gnorm: 1.08 [ 9:08:59<15:21:51] +[titan] 2025-10-05 07:43:20,273 - root - INFO - step: 14935 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:43:20,273 - root - INFO - lr: 3.6626e-05 gnorm: 1.11 [ 9:09:10<15:21:40] +[titan] 2025-10-05 07:43:31,152 - root - INFO - step: 14940 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:43:31,152 - root - INFO - lr: 3.6618e-05 gnorm: 1.09 [ 9:09:21<15:21:28] +[titan] 2025-10-05 07:43:42,038 - root - INFO - step: 14945 loss: 2.2476 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 07:43:42,038 - root - INFO - lr: 3.6610e-05 gnorm: 1.04 [ 9:09:32<15:21:17] +[titan] 2025-10-05 07:43:50,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:43:52,910 - root - INFO - step: 14950 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9350 +[titan] 2025-10-05 07:43:52,910 - root - INFO - lr: 3.6602e-05 gnorm: 1.07 [ 9:09:43<15:21:06] +[titan] 2025-10-05 07:44:03,804 - root - INFO - step: 14955 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 07:44:03,804 - root - INFO - lr: 3.6593e-05 gnorm: 1.06 [ 9:09:54<15:20:55] +[titan] 2025-10-05 07:44:14,704 - root - INFO - step: 14960 loss: 2.2966 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0290 +[titan] 2025-10-05 07:44:14,704 - root - INFO - lr: 3.6585e-05 gnorm: 1.08 [ 9:10:05<15:20:43] +[titan] 2025-10-05 07:44:25,575 - root - INFO - step: 14965 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 07:44:25,576 - root - INFO - lr: 3.6577e-05 gnorm: 1.07 [ 9:10:15<15:20:32] +[titan] 2025-10-05 07:44:36,428 - root - INFO - step: 14970 loss: 2.2508 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:44:36,428 - root - INFO - lr: 3.6569e-05 gnorm: 1.09 [ 9:10:26<15:20:21] +[titan] 2025-10-05 07:44:47,311 - root - INFO - step: 14975 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:44:47,311 - root - INFO - lr: 3.6561e-05 gnorm: 1.08 [ 9:10:37<15:20:09] +[titan] 2025-10-05 07:44:58,219 - root - INFO - step: 14980 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:44:58,219 - root - INFO - lr: 3.6553e-05 gnorm: 1.11 [ 9:10:48<15:19:58] +[titan] 2025-10-05 07:45:09,066 - root - INFO - step: 14985 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9745 +[titan] 2025-10-05 07:45:09,066 - root - INFO - lr: 3.6544e-05 gnorm: 1.07 [ 9:10:59<15:19:47] +[titan] 2025-10-05 07:45:19,933 - root - INFO - step: 14990 loss: 2.1985 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 07:45:19,934 - root - INFO - lr: 3.6536e-05 gnorm: 1.07 [ 9:11:10<15:19:36] +[titan] 2025-10-05 07:45:30,815 - root - INFO - step: 14995 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:45:30,816 - root - INFO - lr: 3.6528e-05 gnorm: 1.05 [ 9:11:21<15:19:24] +[titan] 2025-10-05 07:45:39,479 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:45:41,661 - root - INFO - step: 15000 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 07:45:41,661 - root - INFO - lr: 3.6520e-05 gnorm: 1.11 [ 9:11:32<15:19:13] +[titan] 2025-10-05 07:45:41,661 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 07:46:00,847 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 07:46:00,848 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.19 seconds. +[titan] 2025-10-05 07:48:03,855 - root - INFO - step: 15005 loss: 2.1283 memory: 118.84GiB(85.28%) tps: 2,304 tflops: 31.97 mfu: 3.23% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 07:48:03,856 - root - INFO - lr: 3.6512e-05 gnorm: 1.06 [ 9:13:54<15:22:40] +[titan] 2025-10-05 07:48:14,694 - root - INFO - step: 15010 loss: 2.2315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 07:48:14,695 - root - INFO - lr: 3.6504e-05 gnorm: 1.12 [ 9:14:05<15:22:29] +[titan] 2025-10-05 07:48:25,509 - root - INFO - step: 15015 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 07:48:25,510 - root - INFO - lr: 3.6495e-05 gnorm: 1.09 [ 9:14:15<15:22:17] +[titan] 2025-10-05 07:48:36,334 - root - INFO - step: 15020 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 07:48:36,334 - root - INFO - lr: 3.6487e-05 gnorm: 1.09 [ 9:14:26<15:22:06] +[titan] 2025-10-05 07:48:47,212 - root - INFO - step: 15025 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:48:47,212 - root - INFO - lr: 3.6479e-05 gnorm: 1.07 [ 9:14:37<15:21:55] +[titan] 2025-10-05 07:48:58,068 - root - INFO - step: 15030 loss: 2.2843 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:48:58,069 - root - INFO - lr: 3.6471e-05 gnorm: 1.12 [ 9:14:48<15:21:43] +[titan] 2025-10-05 07:49:08,916 - root - INFO - step: 15035 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 07:49:08,916 - root - INFO - lr: 3.6463e-05 gnorm: 1.06 [ 9:14:59<15:21:32] +[titan] 2025-10-05 07:49:19,759 - root - INFO - step: 15040 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9876 +[titan] 2025-10-05 07:49:19,759 - root - INFO - lr: 3.6454e-05 gnorm: 1.06 [ 9:15:10<15:21:20] +[titan] 2025-10-05 07:49:30,613 - root - INFO - step: 15045 loss: 2.2689 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 07:49:30,613 - root - INFO - lr: 3.6446e-05 gnorm: 1.09 [ 9:15:21<15:21:09] +[titan] 2025-10-05 07:49:39,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:49:41,450 - root - INFO - step: 15050 loss: 2.2266 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 07:49:41,450 - root - INFO - lr: 3.6438e-05 gnorm: 1.08 [ 9:15:31<15:20:57] +[titan] 2025-10-05 07:49:52,326 - root - INFO - step: 15055 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0056 +[titan] 2025-10-05 07:49:52,326 - root - INFO - lr: 3.6430e-05 gnorm: 1.10 [ 9:15:42<15:20:46] +[titan] 2025-10-05 07:50:03,204 - root - INFO - step: 15060 loss: 2.2848 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:50:03,204 - root - INFO - lr: 3.6422e-05 gnorm: 1.09 [ 9:15:53<15:20:34] +[titan] 2025-10-05 07:50:14,065 - root - INFO - step: 15065 loss: 2.2635 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 07:50:14,065 - root - INFO - lr: 3.6413e-05 gnorm: 1.09 [ 9:16:04<15:20:23] +[titan] 2025-10-05 07:50:24,948 - root - INFO - step: 15070 loss: 2.2568 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 07:50:24,948 - root - INFO - lr: 3.6405e-05 gnorm: 1.08 [ 9:16:15<15:20:12] +[titan] 2025-10-05 07:50:35,794 - root - INFO - step: 15075 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0211 +[titan] 2025-10-05 07:50:35,794 - root - INFO - lr: 3.6397e-05 gnorm: 1.09 [ 9:16:26<15:20:00] +[titan] 2025-10-05 07:50:46,648 - root - INFO - step: 15080 loss: 2.2769 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0110 +[titan] 2025-10-05 07:50:46,649 - root - INFO - lr: 3.6389e-05 gnorm: 1.07 [ 9:16:37<15:19:49] +[titan] 2025-10-05 07:50:57,498 - root - INFO - step: 15085 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 07:50:57,498 - root - INFO - lr: 3.6381e-05 gnorm: 1.07 [ 9:16:47<15:19:37] +[titan] 2025-10-05 07:51:08,384 - root - INFO - step: 15090 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:51:08,385 - root - INFO - lr: 3.6373e-05 gnorm: 1.08 [ 9:16:58<15:19:26] +[titan] 2025-10-05 07:51:19,234 - root - INFO - step: 15095 loss: 2.2363 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9753 +[titan] 2025-10-05 07:51:19,234 - root - INFO - lr: 3.6364e-05 gnorm: 1.07 [ 9:17:09<15:19:15] +[titan] 2025-10-05 07:51:27,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:51:30,098 - root - INFO - step: 15100 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:51:30,098 - root - INFO - lr: 3.6356e-05 gnorm: 1.15 [ 9:17:20<15:19:03] +[titan] 2025-10-05 07:51:40,976 - root - INFO - step: 15105 loss: 2.2586 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9945 +[titan] 2025-10-05 07:51:40,977 - root - INFO - lr: 3.6348e-05 gnorm: 1.11 [ 9:17:31<15:18:52] +[titan] 2025-10-05 07:51:51,845 - root - INFO - step: 15110 loss: 2.2404 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 07:51:51,845 - root - INFO - lr: 3.6340e-05 gnorm: 1.07 [ 9:17:42<15:18:40] +[titan] 2025-10-05 07:52:02,715 - root - INFO - step: 15115 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0275 +[titan] 2025-10-05 07:52:02,715 - root - INFO - lr: 3.6331e-05 gnorm: 1.14 [ 9:17:53<15:18:29] +[titan] 2025-10-05 07:52:13,605 - root - INFO - step: 15120 loss: 2.2957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 07:52:13,605 - root - INFO - lr: 3.6323e-05 gnorm: 1.15 [ 9:18:04<15:18:18] +[titan] 2025-10-05 07:52:24,497 - root - INFO - step: 15125 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:52:24,498 - root - INFO - lr: 3.6315e-05 gnorm: 1.03 [ 9:18:14<15:18:06] +[titan] 2025-10-05 07:52:35,368 - root - INFO - step: 15130 loss: 2.2438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:35,368 - root - INFO - lr: 3.6307e-05 gnorm: 1.11 [ 9:18:25<15:17:55] +[titan] 2025-10-05 07:52:46,243 - root - INFO - step: 15135 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 07:52:46,243 - root - INFO - lr: 3.6299e-05 gnorm: 1.09 [ 9:18:36<15:17:43] +[titan] 2025-10-05 07:52:57,118 - root - INFO - step: 15140 loss: 2.2420 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:57,118 - root - INFO - lr: 3.6290e-05 gnorm: 1.10 [ 9:18:47<15:17:32] +[titan] 2025-10-05 07:53:07,980 - root - INFO - step: 15145 loss: 2.3012 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 07:53:07,981 - root - INFO - lr: 3.6282e-05 gnorm: 1.07 [ 9:18:58<15:17:21] +[titan] 2025-10-05 07:53:16,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:53:18,858 - root - INFO - step: 15150 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9690 +[titan] 2025-10-05 07:53:18,858 - root - INFO - lr: 3.6274e-05 gnorm: 1.07 [ 9:19:09<15:17:09] +[titan] 2025-10-05 07:53:29,759 - root - INFO - step: 15155 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:53:29,759 - root - INFO - lr: 3.6266e-05 gnorm: 1.07 [ 9:19:20<15:16:58] +[titan] 2025-10-05 07:53:40,639 - root - INFO - step: 15160 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:53:40,639 - root - INFO - lr: 3.6258e-05 gnorm: 1.11 [ 9:19:31<15:16:46] +[titan] 2025-10-05 07:53:51,512 - root - INFO - step: 15165 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:53:51,512 - root - INFO - lr: 3.6249e-05 gnorm: 1.10 [ 9:19:41<15:16:35] +[titan] 2025-10-05 07:54:02,384 - root - INFO - step: 15170 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0038 +[titan] 2025-10-05 07:54:02,384 - root - INFO - lr: 3.6241e-05 gnorm: 1.11 [ 9:19:52<15:16:24] +[titan] 2025-10-05 07:54:13,267 - root - INFO - step: 15175 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:54:13,267 - root - INFO - lr: 3.6233e-05 gnorm: 1.08 [ 9:20:03<15:16:12] +[titan] 2025-10-05 07:54:24,133 - root - INFO - step: 15180 loss: 2.3028 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 07:54:24,133 - root - INFO - lr: 3.6225e-05 gnorm: 1.09 [ 9:20:14<15:16:01] +[titan] 2025-10-05 07:54:35,038 - root - INFO - step: 15185 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:54:35,038 - root - INFO - lr: 3.6216e-05 gnorm: 1.07 [ 9:20:25<15:15:49] +[titan] 2025-10-05 07:54:45,892 - root - INFO - step: 15190 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0272 +[titan] 2025-10-05 07:54:45,892 - root - INFO - lr: 3.6208e-05 gnorm: 1.08 [ 9:20:36<15:15:38] +[titan] 2025-10-05 07:54:56,749 - root - INFO - step: 15195 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9845 +[titan] 2025-10-05 07:54:56,749 - root - INFO - lr: 3.6200e-05 gnorm: 1.08 [ 9:20:47<15:15:27] +[titan] 2025-10-05 07:55:05,428 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:55:07,614 - root - INFO - step: 15200 loss: 2.2230 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 07:55:07,615 - root - INFO - lr: 3.6192e-05 gnorm: 1.08 [ 9:20:58<15:15:15] +[titan] 2025-10-05 07:55:18,475 - root - INFO - step: 15205 loss: 2.2720 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 07:55:18,475 - root - INFO - lr: 3.6184e-05 gnorm: 1.09 [ 9:21:08<15:15:04] +[titan] 2025-10-05 07:55:29,333 - root - INFO - step: 15210 loss: 2.2496 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9871 +[titan] 2025-10-05 07:55:29,333 - root - INFO - lr: 3.6175e-05 gnorm: 1.10 [ 9:21:19<15:14:52] +[titan] 2025-10-05 07:55:40,201 - root - INFO - step: 15215 loss: 2.2704 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 07:55:40,201 - root - INFO - lr: 3.6167e-05 gnorm: 1.10 [ 9:21:30<15:14:41] +[titan] 2025-10-05 07:55:51,102 - root - INFO - step: 15220 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9944 +[titan] 2025-10-05 07:55:51,103 - root - INFO - lr: 3.6159e-05 gnorm: 1.09 [ 9:21:41<15:14:30] +[titan] 2025-10-05 07:56:01,979 - root - INFO - step: 15225 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:56:01,979 - root - INFO - lr: 3.6151e-05 gnorm: 1.08 [ 9:21:52<15:14:18] +[titan] 2025-10-05 07:56:12,856 - root - INFO - step: 15230 loss: 2.3282 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0531 +[titan] 2025-10-05 07:56:12,856 - root - INFO - lr: 3.6142e-05 gnorm: 1.08 [ 9:22:03<15:14:07] +[titan] 2025-10-05 07:56:23,718 - root - INFO - step: 15235 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0077 +[titan] 2025-10-05 07:56:23,718 - root - INFO - lr: 3.6134e-05 gnorm: 1.04 [ 9:22:14<15:13:55] +[titan] 2025-10-05 07:56:34,605 - root - INFO - step: 15240 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9751 +[titan] 2025-10-05 07:56:34,605 - root - INFO - lr: 3.6126e-05 gnorm: 1.07 [ 9:22:24<15:13:44] +[titan] 2025-10-05 07:56:45,472 - root - INFO - step: 15245 loss: 2.3360 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 07:56:45,472 - root - INFO - lr: 3.6118e-05 gnorm: 1.05 [ 9:22:35<15:13:33] +[titan] 2025-10-05 07:56:54,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:56:56,368 - root - INFO - step: 15250 loss: 2.2490 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9870 +[titan] 2025-10-05 07:56:56,368 - root - INFO - lr: 3.6109e-05 gnorm: 1.08 [ 9:22:46<15:13:21] +[titan] 2025-10-05 07:57:07,241 - root - INFO - step: 15255 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9655 +[titan] 2025-10-05 07:57:07,241 - root - INFO - lr: 3.6101e-05 gnorm: 1.06 [ 9:22:57<15:13:10] +[titan] 2025-10-05 07:57:18,136 - root - INFO - step: 15260 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:57:18,136 - root - INFO - lr: 3.6093e-05 gnorm: 1.10 [ 9:23:08<15:12:59] +[titan] 2025-10-05 07:57:29,037 - root - INFO - step: 15265 loss: 2.2358 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9744 +[titan] 2025-10-05 07:57:29,037 - root - INFO - lr: 3.6085e-05 gnorm: 1.07 [ 9:23:19<15:12:47] +[titan] 2025-10-05 07:57:39,909 - root - INFO - step: 15270 loss: 2.3087 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 07:57:39,910 - root - INFO - lr: 3.6076e-05 gnorm: 1.08 [ 9:23:30<15:12:36] +[titan] 2025-10-05 07:57:50,767 - root - INFO - step: 15275 loss: 2.2564 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 07:57:50,768 - root - INFO - lr: 3.6068e-05 gnorm: 1.08 [ 9:23:41<15:12:24] +[titan] 2025-10-05 07:58:01,662 - root - INFO - step: 15280 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9973 +[titan] 2025-10-05 07:58:01,662 - root - INFO - lr: 3.6060e-05 gnorm: 1.20 [ 9:23:52<15:12:13] +[titan] 2025-10-05 07:58:12,584 - root - INFO - step: 15285 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:58:12,584 - root - INFO - lr: 3.6052e-05 gnorm: 1.10 [ 9:24:02<15:12:02] +[titan] 2025-10-05 07:58:23,438 - root - INFO - step: 15290 loss: 2.2206 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 07:58:23,438 - root - INFO - lr: 3.6043e-05 gnorm: 1.06 [ 9:24:13<15:11:50] +[titan] 2025-10-05 07:58:34,298 - root - INFO - step: 15295 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 07:58:34,299 - root - INFO - lr: 3.6035e-05 gnorm: 1.06 [ 9:24:24<15:11:39] +[titan] 2025-10-05 07:58:42,983 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:58:45,167 - root - INFO - step: 15300 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 07:58:45,167 - root - INFO - lr: 3.6027e-05 gnorm: 1.08 [ 9:24:35<15:11:27] +[titan] 2025-10-05 07:58:56,031 - root - INFO - step: 15305 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 07:58:56,031 - root - INFO - lr: 3.6019e-05 gnorm: 1.10 [ 9:24:46<15:11:16] +[titan] 2025-10-05 07:59:06,887 - root - INFO - step: 15310 loss: 2.2775 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 07:59:06,887 - root - INFO - lr: 3.6010e-05 gnorm: 1.09 [ 9:24:57<15:11:05] +[titan] 2025-10-05 07:59:17,809 - root - INFO - step: 15315 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9766 +[titan] 2025-10-05 07:59:17,809 - root - INFO - lr: 3.6002e-05 gnorm: 1.10 [ 9:25:08<15:10:53] +[titan] 2025-10-05 07:59:28,686 - root - INFO - step: 15320 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 07:59:28,686 - root - INFO - lr: 3.5994e-05 gnorm: 1.07 [ 9:25:19<15:10:42] +[titan] 2025-10-05 07:59:39,537 - root - INFO - step: 15325 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 07:59:39,537 - root - INFO - lr: 3.5986e-05 gnorm: 1.11 [ 9:25:29<15:10:31] +[titan] 2025-10-05 07:59:50,399 - root - INFO - step: 15330 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:59:50,400 - root - INFO - lr: 3.5977e-05 gnorm: 1.11 [ 9:25:40<15:10:19] +[titan] 2025-10-05 08:00:01,278 - root - INFO - step: 15335 loss: 2.2792 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0137 +[titan] 2025-10-05 08:00:01,278 - root - INFO - lr: 3.5969e-05 gnorm: 1.15 [ 9:25:51<15:10:08] +[titan] 2025-10-05 08:00:12,178 - root - INFO - step: 15340 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 08:00:12,178 - root - INFO - lr: 3.5961e-05 gnorm: 1.06 [ 9:26:02<15:09:56] +[titan] 2025-10-05 08:00:23,116 - root - INFO - step: 15345 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:00:23,116 - root - INFO - lr: 3.5952e-05 gnorm: 1.09 [ 9:26:13<15:09:45] +[titan] 2025-10-05 08:00:31,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:00:33,989 - root - INFO - step: 15350 loss: 2.2871 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 08:00:33,989 - root - INFO - lr: 3.5944e-05 gnorm: 1.10 [ 9:26:24<15:09:34] +[titan] 2025-10-05 08:00:44,860 - root - INFO - step: 15355 loss: 2.1883 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 08:00:44,860 - root - INFO - lr: 3.5936e-05 gnorm: 1.10 [ 9:26:35<15:09:22] +[titan] 2025-10-05 08:00:55,829 - root - INFO - step: 15360 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9417 +[titan] 2025-10-05 08:00:55,830 - root - INFO - lr: 3.5928e-05 gnorm: 1.04 [ 9:26:46<15:09:11] +[titan] 2025-10-05 08:00:56,010 - root - INFO - Dumping profiler traces at step 15360 +[titan] 2025-10-05 08:00:56,051 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:01:06,999 - root - INFO - step: 15365 loss: 2.2781 memory: 118.84GiB(85.28%) tps: 29,338 tflops: 407.01 mfu: 41.15% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 08:01:06,999 - root - INFO - lr: 3.5919e-05 gnorm: 1.07 [ 9:26:57<15:09:00] +[titan] 2025-10-05 08:01:17,972 - root - INFO - step: 15370 loss: 2.2166 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 08:01:17,972 - root - INFO - lr: 3.5911e-05 gnorm: 1.06 [ 9:27:08<15:08:49] +[titan] 2025-10-05 08:01:28,828 - root - INFO - step: 15375 loss: 2.2257 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 08:01:28,829 - root - INFO - lr: 3.5903e-05 gnorm: 1.04 [ 9:27:19<15:08:38] +[titan] 2025-10-05 08:01:39,732 - root - INFO - step: 15380 loss: 2.2608 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9985 +[titan] 2025-10-05 08:01:39,732 - root - INFO - lr: 3.5895e-05 gnorm: 1.14 [ 9:27:30<15:08:26] +[titan] 2025-10-05 08:01:50,600 - root - INFO - step: 15385 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9684 +[titan] 2025-10-05 08:01:50,601 - root - INFO - lr: 3.5886e-05 gnorm: 1.10 [ 9:27:40<15:08:15] +[titan] 2025-10-05 08:02:01,449 - root - INFO - step: 15390 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 08:02:01,449 - root - INFO - lr: 3.5878e-05 gnorm: 1.10 [ 9:27:51<15:08:03] +[titan] 2025-10-05 08:02:12,375 - root - INFO - step: 15395 loss: 2.2776 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 08:02:12,375 - root - INFO - lr: 3.5870e-05 gnorm: 1.10 [ 9:28:02<15:07:52] +[titan] 2025-10-05 08:02:21,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:02:23,262 - root - INFO - step: 15400 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9035 +[titan] 2025-10-05 08:02:23,262 - root - INFO - lr: 3.5861e-05 gnorm: 1.05 [ 9:28:13<15:07:41] +[titan] 2025-10-05 08:02:34,123 - root - INFO - step: 15405 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 08:02:34,123 - root - INFO - lr: 3.5853e-05 gnorm: 1.05 [ 9:28:24<15:07:29] +[titan] 2025-10-05 08:02:45,027 - root - INFO - step: 15410 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:02:45,027 - root - INFO - lr: 3.5845e-05 gnorm: 1.09 [ 9:28:35<15:07:18] +[titan] 2025-10-05 08:02:55,897 - root - INFO - step: 15415 loss: 2.2676 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:02:55,897 - root - INFO - lr: 3.5837e-05 gnorm: 1.09 [ 9:28:46<15:07:07] +[titan] 2025-10-05 08:03:06,761 - root - INFO - step: 15420 loss: 2.2135 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9565 +[titan] 2025-10-05 08:03:06,761 - root - INFO - lr: 3.5828e-05 gnorm: 1.09 [ 9:28:57<15:06:55] +[titan] 2025-10-05 08:03:17,688 - root - INFO - step: 15425 loss: 2.2445 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9820 +[titan] 2025-10-05 08:03:17,688 - root - INFO - lr: 3.5820e-05 gnorm: 1.04 [ 9:29:08<15:06:44] +[titan] 2025-10-05 08:03:28,565 - root - INFO - step: 15430 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 08:03:28,565 - root - INFO - lr: 3.5812e-05 gnorm: 1.11 [ 9:29:18<15:06:33] +[titan] 2025-10-05 08:03:39,425 - root - INFO - step: 15435 loss: 2.2327 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:03:39,425 - root - INFO - lr: 3.5803e-05 gnorm: 1.06 [ 9:29:29<15:06:21] +[titan] 2025-10-05 08:03:50,323 - root - INFO - step: 15440 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 08:03:50,323 - root - INFO - lr: 3.5795e-05 gnorm: 1.04 [ 9:29:40<15:06:10] +[titan] 2025-10-05 08:04:01,195 - root - INFO - step: 15445 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 08:04:01,195 - root - INFO - lr: 3.5787e-05 gnorm: 1.04 [ 9:29:51<15:05:58] +[titan] 2025-10-05 08:04:09,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:04:12,045 - root - INFO - step: 15450 loss: 2.2815 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0162 +[titan] 2025-10-05 08:04:12,045 - root - INFO - lr: 3.5778e-05 gnorm: 1.12 [ 9:30:02<15:05:47] +[titan] 2025-10-05 08:04:22,960 - root - INFO - step: 15455 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 08:04:22,960 - root - INFO - lr: 3.5770e-05 gnorm: 1.11 [ 9:30:13<15:05:36] +[titan] 2025-10-05 08:04:33,818 - root - INFO - step: 15460 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9338 +[titan] 2025-10-05 08:04:33,818 - root - INFO - lr: 3.5762e-05 gnorm: 1.08 [ 9:30:24<15:05:24] +[titan] 2025-10-05 08:04:44,689 - root - INFO - step: 15465 loss: 2.1902 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:04:44,689 - root - INFO - lr: 3.5754e-05 gnorm: 1.09 [ 9:30:35<15:05:13] +[titan] 2025-10-05 08:04:55,581 - root - INFO - step: 15470 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:04:55,581 - root - INFO - lr: 3.5745e-05 gnorm: 1.07 [ 9:30:45<15:05:02] +[titan] 2025-10-05 08:05:06,501 - root - INFO - step: 15475 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0023 +[titan] 2025-10-05 08:05:06,501 - root - INFO - lr: 3.5737e-05 gnorm: 1.12 [ 9:30:56<15:04:50] +[titan] 2025-10-05 08:05:17,432 - root - INFO - step: 15480 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 08:05:17,432 - root - INFO - lr: 3.5729e-05 gnorm: 1.10 [ 9:31:07<15:04:39] +[titan] 2025-10-05 08:05:28,358 - root - INFO - step: 15485 loss: 2.2121 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:05:28,358 - root - INFO - lr: 3.5720e-05 gnorm: 1.05 [ 9:31:18<15:04:28] +[titan] 2025-10-05 08:05:39,256 - root - INFO - step: 15490 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 08:05:39,257 - root - INFO - lr: 3.5712e-05 gnorm: 1.11 [ 9:31:29<15:04:16] +[titan] 2025-10-05 08:05:50,140 - root - INFO - step: 15495 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 08:05:50,140 - root - INFO - lr: 3.5704e-05 gnorm: 1.04 [ 9:31:40<15:04:05] +[titan] 2025-10-05 08:05:58,823 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:06:01,006 - root - INFO - step: 15500 loss: 2.1526 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 08:06:01,006 - root - INFO - lr: 3.5695e-05 gnorm: 1.06 [ 9:31:51<15:03:54] +[titan] 2025-10-05 08:06:11,916 - root - INFO - step: 15505 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 08:06:11,916 - root - INFO - lr: 3.5687e-05 gnorm: 1.09 [ 9:32:02<15:03:42] +[titan] 2025-10-05 08:06:22,849 - root - INFO - step: 15510 loss: 2.2409 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 08:06:22,849 - root - INFO - lr: 3.5679e-05 gnorm: 1.06 [ 9:32:13<15:03:31] +[titan] 2025-10-05 08:06:33,698 - root - INFO - step: 15515 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 08:06:33,698 - root - INFO - lr: 3.5670e-05 gnorm: 1.08 [ 9:32:24<15:03:20] +[titan] 2025-10-05 08:06:44,607 - root - INFO - step: 15520 loss: 2.3868 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 08:06:44,607 - root - INFO - lr: 3.5662e-05 gnorm: 2.99 [ 9:32:34<15:03:08] +[titan] 2025-10-05 08:06:55,464 - root - INFO - step: 15525 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9210 +[titan] 2025-10-05 08:06:55,464 - root - INFO - lr: 3.5654e-05 gnorm: 1.10 [ 9:32:45<15:02:57] +[titan] 2025-10-05 08:07:06,333 - root - INFO - step: 15530 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:07:06,333 - root - INFO - lr: 3.5646e-05 gnorm: 1.03 [ 9:32:56<15:02:45] +[titan] 2025-10-05 08:07:17,232 - root - INFO - step: 15535 loss: 2.2054 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 08:07:17,232 - root - INFO - lr: 3.5637e-05 gnorm: 1.08 [ 9:33:07<15:02:34] +[titan] 2025-10-05 08:07:28,161 - root - INFO - step: 15540 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:07:28,161 - root - INFO - lr: 3.5629e-05 gnorm: 1.10 [ 9:33:18<15:02:23] +[titan] 2025-10-05 08:07:39,016 - root - INFO - step: 15545 loss: 2.2280 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 08:07:39,016 - root - INFO - lr: 3.5621e-05 gnorm: 1.07 [ 9:33:29<15:02:11] +[titan] 2025-10-05 08:07:47,694 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:07:49,876 - root - INFO - step: 15550 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 08:07:49,876 - root - INFO - lr: 3.5612e-05 gnorm: 1.07 [ 9:33:40<15:02:00] +[titan] 2025-10-05 08:08:00,741 - root - INFO - step: 15555 loss: 2.2855 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 08:08:00,741 - root - INFO - lr: 3.5604e-05 gnorm: 1.08 [ 9:33:51<15:01:49] +[titan] 2025-10-05 08:08:11,595 - root - INFO - step: 15560 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 08:08:11,595 - root - INFO - lr: 3.5596e-05 gnorm: 1.08 [ 9:34:01<15:01:37] +[titan] 2025-10-05 08:08:22,482 - root - INFO - step: 15565 loss: 2.2410 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:08:22,482 - root - INFO - lr: 3.5587e-05 gnorm: 1.08 [ 9:34:12<15:01:26] +[titan] 2025-10-05 08:08:33,383 - root - INFO - step: 15570 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 08:08:33,383 - root - INFO - lr: 3.5579e-05 gnorm: 1.11 [ 9:34:23<15:01:15] +[titan] 2025-10-05 08:08:44,242 - root - INFO - step: 15575 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 08:08:44,242 - root - INFO - lr: 3.5571e-05 gnorm: 1.06 [ 9:34:34<15:01:03] +[titan] 2025-10-05 08:08:55,120 - root - INFO - step: 15580 loss: 2.2133 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9541 +[titan] 2025-10-05 08:08:55,120 - root - INFO - lr: 3.5562e-05 gnorm: 1.05 [ 9:34:45<15:00:52] +[titan] 2025-10-05 08:09:06,000 - root - INFO - step: 15585 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9699 +[titan] 2025-10-05 08:09:06,000 - root - INFO - lr: 3.5554e-05 gnorm: 1.10 [ 9:34:56<15:00:40] +[titan] 2025-10-05 08:09:16,881 - root - INFO - step: 15590 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 08:09:16,881 - root - INFO - lr: 3.5546e-05 gnorm: 1.07 [ 9:35:07<15:00:29] +[titan] 2025-10-05 08:09:27,784 - root - INFO - step: 15595 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 08:09:27,784 - root - INFO - lr: 3.5537e-05 gnorm: 1.12 [ 9:35:18<15:00:18] +[titan] 2025-10-05 08:09:36,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:09:38,690 - root - INFO - step: 15600 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 08:09:38,690 - root - INFO - lr: 3.5529e-05 gnorm: 1.09 [ 9:35:29<15:00:06] +[titan] 2025-10-05 08:09:49,593 - root - INFO - step: 15605 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 08:09:49,594 - root - INFO - lr: 3.5521e-05 gnorm: 1.08 [ 9:35:39<14:59:55] +[titan] 2025-10-05 08:10:00,479 - root - INFO - step: 15610 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 08:10:00,480 - root - INFO - lr: 3.5512e-05 gnorm: 1.08 [ 9:35:50<14:59:44] +[titan] 2025-10-05 08:10:11,374 - root - INFO - step: 15615 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9198 +[titan] 2025-10-05 08:10:11,374 - root - INFO - lr: 3.5504e-05 gnorm: 1.05 [ 9:36:01<14:59:32] +[titan] 2025-10-05 08:10:22,287 - root - INFO - step: 15620 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9771 +[titan] 2025-10-05 08:10:22,287 - root - INFO - lr: 3.5496e-05 gnorm: 1.08 [ 9:36:12<14:59:21] +[titan] 2025-10-05 08:10:33,166 - root - INFO - step: 15625 loss: 2.2767 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 08:10:33,167 - root - INFO - lr: 3.5487e-05 gnorm: 1.12 [ 9:36:23<14:59:10] +[titan] 2025-10-05 08:10:44,056 - root - INFO - step: 15630 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 08:10:44,056 - root - INFO - lr: 3.5479e-05 gnorm: 1.07 [ 9:36:34<14:58:58] +[titan] 2025-10-05 08:10:54,998 - root - INFO - step: 15635 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 08:10:54,998 - root - INFO - lr: 3.5471e-05 gnorm: 1.07 [ 9:36:45<14:58:47] +[titan] 2025-10-05 08:11:05,867 - root - INFO - step: 15640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:11:05,867 - root - INFO - lr: 3.5462e-05 gnorm: 1.08 [ 9:36:56<14:58:36] +[titan] 2025-10-05 08:11:16,727 - root - INFO - step: 15645 loss: 2.1832 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:11:16,727 - root - INFO - lr: 3.5454e-05 gnorm: 1.08 [ 9:37:07<14:58:24] +[titan] 2025-10-05 08:11:25,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:11:27,605 - root - INFO - step: 15650 loss: 2.1882 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:11:27,605 - root - INFO - lr: 3.5445e-05 gnorm: 1.03 [ 9:37:17<14:58:13] +[titan] 2025-10-05 08:11:38,458 - root - INFO - step: 15655 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:11:38,458 - root - INFO - lr: 3.5437e-05 gnorm: 1.05 [ 9:37:28<14:58:02] +[titan] 2025-10-05 08:11:49,330 - root - INFO - step: 15660 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 08:11:49,330 - root - INFO - lr: 3.5429e-05 gnorm: 1.11 [ 9:37:39<14:57:50] +[titan] 2025-10-05 08:12:00,235 - root - INFO - step: 15665 loss: 2.1792 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9268 +[titan] 2025-10-05 08:12:00,235 - root - INFO - lr: 3.5420e-05 gnorm: 1.07 [ 9:37:50<14:57:39] +[titan] 2025-10-05 08:12:11,114 - root - INFO - step: 15670 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9779 +[titan] 2025-10-05 08:12:11,114 - root - INFO - lr: 3.5412e-05 gnorm: 1.03 [ 9:38:01<14:57:28] +[titan] 2025-10-05 08:12:21,997 - root - INFO - step: 15675 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 08:12:21,997 - root - INFO - lr: 3.5404e-05 gnorm: 1.07 [ 9:38:12<14:57:16] +[titan] 2025-10-05 08:12:32,868 - root - INFO - step: 15680 loss: 2.2075 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:12:32,868 - root - INFO - lr: 3.5395e-05 gnorm: 1.07 [ 9:38:23<14:57:05] +[titan] 2025-10-05 08:12:43,747 - root - INFO - step: 15685 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 08:12:43,747 - root - INFO - lr: 3.5387e-05 gnorm: 1.10 [ 9:38:34<14:56:54] +[titan] 2025-10-05 08:12:54,610 - root - INFO - step: 15690 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0090 +[titan] 2025-10-05 08:12:54,610 - root - INFO - lr: 3.5379e-05 gnorm: 1.08 [ 9:38:44<14:56:42] +[titan] 2025-10-05 08:13:05,466 - root - INFO - step: 15695 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 08:13:05,466 - root - INFO - lr: 3.5370e-05 gnorm: 1.06 [ 9:38:55<14:56:31] +[titan] 2025-10-05 08:13:14,194 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:13:16,388 - root - INFO - step: 15700 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0069 +[titan] 2025-10-05 08:13:16,388 - root - INFO - lr: 3.5362e-05 gnorm: 1.10 [ 9:39:06<14:56:19] +[titan] 2025-10-05 08:13:27,274 - root - INFO - step: 15705 loss: 2.2396 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9780 +[titan] 2025-10-05 08:13:27,275 - root - INFO - lr: 3.5354e-05 gnorm: 1.10 [ 9:39:17<14:56:08] +[titan] 2025-10-05 08:13:38,136 - root - INFO - step: 15710 loss: 2.2474 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 08:13:38,136 - root - INFO - lr: 3.5345e-05 gnorm: 1.12 [ 9:39:28<14:55:57] +[titan] 2025-10-05 08:13:49,010 - root - INFO - step: 15715 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9203 +[titan] 2025-10-05 08:13:49,010 - root - INFO - lr: 3.5337e-05 gnorm: 1.10 [ 9:39:39<14:55:45] +[titan] 2025-10-05 08:13:59,875 - root - INFO - step: 15720 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9736 +[titan] 2025-10-05 08:13:59,875 - root - INFO - lr: 3.5328e-05 gnorm: 1.11 [ 9:39:50<14:55:34] +[titan] 2025-10-05 08:14:10,743 - root - INFO - step: 15725 loss: 2.2138 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9568 +[titan] 2025-10-05 08:14:10,743 - root - INFO - lr: 3.5320e-05 gnorm: 1.10 [ 9:40:01<14:55:23] +[titan] 2025-10-05 08:14:21,639 - root - INFO - step: 15730 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 08:14:21,640 - root - INFO - lr: 3.5312e-05 gnorm: 1.09 [ 9:40:11<14:55:11] +[titan] 2025-10-05 08:14:32,539 - root - INFO - step: 15735 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 08:14:32,539 - root - INFO - lr: 3.5303e-05 gnorm: 1.05 [ 9:40:22<14:55:00] +[titan] 2025-10-05 08:14:43,413 - root - INFO - step: 15740 loss: 2.2798 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:14:43,414 - root - INFO - lr: 3.5295e-05 gnorm: 1.11 [ 9:40:33<14:54:49] +[titan] 2025-10-05 08:14:54,293 - root - INFO - step: 15745 loss: 2.2448 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9833 +[titan] 2025-10-05 08:14:54,293 - root - INFO - lr: 3.5287e-05 gnorm: 1.15 [ 9:40:44<14:54:37] +[titan] 2025-10-05 08:15:02,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:15:05,161 - root - INFO - step: 15750 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9407 +[titan] 2025-10-05 08:15:05,161 - root - INFO - lr: 3.5278e-05 gnorm: 1.07 [ 9:40:55<14:54:26] +[titan] 2025-10-05 08:15:16,026 - root - INFO - step: 15755 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9732 +[titan] 2025-10-05 08:15:16,026 - root - INFO - lr: 3.5270e-05 gnorm: 1.08 [ 9:41:06<14:54:15] +[titan] 2025-10-05 08:15:26,950 - root - INFO - step: 15760 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9790 +[titan] 2025-10-05 08:15:26,951 - root - INFO - lr: 3.5261e-05 gnorm: 1.13 [ 9:41:17<14:54:03] +[titan] 2025-10-05 08:15:37,835 - root - INFO - step: 15765 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0588 +[titan] 2025-10-05 08:15:37,835 - root - INFO - lr: 3.5253e-05 gnorm: 1.07 [ 9:41:28<14:53:52] +[titan] 2025-10-05 08:15:48,693 - root - INFO - step: 15770 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:15:48,694 - root - INFO - lr: 3.5245e-05 gnorm: 1.07 [ 9:41:39<14:53:41] +[titan] 2025-10-05 08:15:59,558 - root - INFO - step: 15775 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 08:15:59,558 - root - INFO - lr: 3.5236e-05 gnorm: 1.09 [ 9:41:49<14:53:29] +[titan] 2025-10-05 08:16:10,424 - root - INFO - step: 15780 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:16:10,424 - root - INFO - lr: 3.5228e-05 gnorm: 1.07 [ 9:42:00<14:53:18] +[titan] 2025-10-05 08:16:21,284 - root - INFO - step: 15785 loss: 2.2235 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9639 +[titan] 2025-10-05 08:16:21,284 - root - INFO - lr: 3.5220e-05 gnorm: 1.08 [ 9:42:11<14:53:06] +[titan] 2025-10-05 08:16:32,182 - root - INFO - step: 15790 loss: 2.2629 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 08:16:32,182 - root - INFO - lr: 3.5211e-05 gnorm: 1.05 [ 9:42:22<14:52:55] +[titan] 2025-10-05 08:16:43,101 - root - INFO - step: 15795 loss: 2.1715 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9180 +[titan] 2025-10-05 08:16:43,101 - root - INFO - lr: 3.5203e-05 gnorm: 1.08 [ 9:42:33<14:52:44] +[titan] 2025-10-05 08:16:51,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:16:53,985 - root - INFO - step: 15800 loss: 2.2694 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 08:16:53,985 - root - INFO - lr: 3.5194e-05 gnorm: 1.09 [ 9:42:44<14:52:32] +[titan] 2025-10-05 08:17:04,888 - root - INFO - step: 15805 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 08:17:04,888 - root - INFO - lr: 3.5186e-05 gnorm: 1.07 [ 9:42:55<14:52:21] +[titan] 2025-10-05 08:17:15,782 - root - INFO - step: 15810 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9612 +[titan] 2025-10-05 08:17:15,782 - root - INFO - lr: 3.5178e-05 gnorm: 1.11 [ 9:43:06<14:52:10] +[titan] 2025-10-05 08:17:26,682 - root - INFO - step: 15815 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 08:17:26,682 - root - INFO - lr: 3.5169e-05 gnorm: 1.09 [ 9:43:17<14:51:58] +[titan] 2025-10-05 08:17:37,542 - root - INFO - step: 15820 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9730 +[titan] 2025-10-05 08:17:37,542 - root - INFO - lr: 3.5161e-05 gnorm: 1.11 [ 9:43:27<14:51:47] +[titan] 2025-10-05 08:17:48,471 - root - INFO - step: 15825 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 08:17:48,471 - root - INFO - lr: 3.5152e-05 gnorm: 1.07 [ 9:43:38<14:51:36] +[titan] 2025-10-05 08:17:59,372 - root - INFO - step: 15830 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0266 +[titan] 2025-10-05 08:17:59,373 - root - INFO - lr: 3.5144e-05 gnorm: 1.07 [ 9:43:49<14:51:25] +[titan] 2025-10-05 08:18:10,255 - root - INFO - step: 15835 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9594 +[titan] 2025-10-05 08:18:10,256 - root - INFO - lr: 3.5136e-05 gnorm: 1.11 [ 9:44:00<14:51:13] +[titan] 2025-10-05 08:18:21,144 - root - INFO - step: 15840 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 08:18:21,144 - root - INFO - lr: 3.5127e-05 gnorm: 1.07 [ 9:44:11<14:51:02] +[titan] 2025-10-05 08:18:32,042 - root - INFO - step: 15845 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 08:18:32,043 - root - INFO - lr: 3.5119e-05 gnorm: 1.07 [ 9:44:22<14:50:51] +[titan] 2025-10-05 08:18:40,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:18:42,921 - root - INFO - step: 15850 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9352 +[titan] 2025-10-05 08:18:42,921 - root - INFO - lr: 3.5111e-05 gnorm: 1.08 [ 9:44:33<14:50:39] +[titan] 2025-10-05 08:18:53,795 - root - INFO - step: 15855 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 08:18:53,795 - root - INFO - lr: 3.5102e-05 gnorm: 1.15 [ 9:44:44<14:50:28] +[titan] 2025-10-05 08:19:04,726 - root - INFO - step: 15860 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0324 +[titan] 2025-10-05 08:19:04,726 - root - INFO - lr: 3.5094e-05 gnorm: 1.13 [ 9:44:55<14:50:17] +[titan] 2025-10-05 08:19:15,610 - root - INFO - step: 15865 loss: 2.2234 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 08:19:15,610 - root - INFO - lr: 3.5085e-05 gnorm: 1.07 [ 9:45:05<14:50:05] +[titan] 2025-10-05 08:19:26,577 - root - INFO - step: 15870 loss: 2.2122 memory: 118.84GiB(85.28%) tps: 29,880 tflops: 414.54 mfu: 41.91% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:19:26,577 - root - INFO - lr: 3.5077e-05 gnorm: 1.09 [ 9:45:16<14:49:54] +[titan] 2025-10-05 08:19:31,105 - root - INFO - Dumping profiler traces at step 15872 +[titan] 2025-10-05 08:19:31,144 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:19:37,696 - root - INFO - step: 15875 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 08:19:37,697 - root - INFO - lr: 3.5068e-05 gnorm: 1.05 [ 9:45:28<14:49:43] +[titan] 2025-10-05 08:19:48,571 - root - INFO - step: 15880 loss: 2.2001 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9437 +[titan] 2025-10-05 08:19:48,571 - root - INFO - lr: 3.5060e-05 gnorm: 1.08 [ 9:45:38<14:49:32] +[titan] 2025-10-05 08:19:59,444 - root - INFO - step: 15885 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9707 +[titan] 2025-10-05 08:19:59,444 - root - INFO - lr: 3.5052e-05 gnorm: 1.06 [ 9:45:49<14:49:20] +[titan] 2025-10-05 08:20:10,353 - root - INFO - step: 15890 loss: 2.2269 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 08:20:10,354 - root - INFO - lr: 3.5043e-05 gnorm: 1.09 [ 9:46:00<14:49:09] +[titan] 2025-10-05 08:20:21,229 - root - INFO - step: 15895 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9759 +[titan] 2025-10-05 08:20:21,230 - root - INFO - lr: 3.5035e-05 gnorm: 1.07 [ 9:46:11<14:48:58] +[titan] 2025-10-05 08:20:29,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:20:32,140 - root - INFO - step: 15900 loss: 2.1957 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 08:20:32,140 - root - INFO - lr: 3.5026e-05 gnorm: 1.09 [ 9:46:22<14:48:46] +[titan] 2025-10-05 08:20:43,027 - root - INFO - step: 15905 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:20:43,027 - root - INFO - lr: 3.5018e-05 gnorm: 1.06 [ 9:46:33<14:48:35] +[titan] 2025-10-05 08:20:53,932 - root - INFO - step: 15910 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 08:20:53,932 - root - INFO - lr: 3.5010e-05 gnorm: 1.11 [ 9:46:44<14:48:24] +[titan] 2025-10-05 08:21:04,803 - root - INFO - step: 15915 loss: 2.1550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9045 +[titan] 2025-10-05 08:21:04,803 - root - INFO - lr: 3.5001e-05 gnorm: 1.05 [ 9:46:55<14:48:12] +[titan] 2025-10-05 08:21:15,707 - root - INFO - step: 15920 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 08:21:15,707 - root - INFO - lr: 3.4993e-05 gnorm: 1.08 [ 9:47:06<14:48:01] +[titan] 2025-10-05 08:21:26,598 - root - INFO - step: 15925 loss: 2.2282 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 08:21:26,599 - root - INFO - lr: 3.4984e-05 gnorm: 1.10 [ 9:47:16<14:47:50] +[titan] 2025-10-05 08:21:37,540 - root - INFO - step: 15930 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0083 +[titan] 2025-10-05 08:21:37,540 - root - INFO - lr: 3.4976e-05 gnorm: 1.11 [ 9:47:27<14:47:39] +[titan] 2025-10-05 08:21:48,426 - root - INFO - step: 15935 loss: 2.2034 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9455 +[titan] 2025-10-05 08:21:48,426 - root - INFO - lr: 3.4968e-05 gnorm: 1.07 [ 9:47:38<14:47:27] +[titan] 2025-10-05 08:21:59,298 - root - INFO - step: 15940 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9502 +[titan] 2025-10-05 08:21:59,298 - root - INFO - lr: 3.4959e-05 gnorm: 1.07 [ 9:47:49<14:47:16] +[titan] 2025-10-05 08:22:10,199 - root - INFO - step: 15945 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9384 +[titan] 2025-10-05 08:22:10,199 - root - INFO - lr: 3.4951e-05 gnorm: 1.09 [ 9:48:00<14:47:05] +[titan] 2025-10-05 08:22:18,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:22:21,106 - root - INFO - step: 15950 loss: 2.2603 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9966 +[titan] 2025-10-05 08:22:21,106 - root - INFO - lr: 3.4942e-05 gnorm: 1.06 [ 9:48:11<14:46:53] +[titan] 2025-10-05 08:22:32,066 - root - INFO - step: 15955 loss: 2.1766 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 08:22:32,066 - root - INFO - lr: 3.4934e-05 gnorm: 1.07 [ 9:48:22<14:46:42] +[titan] 2025-10-05 08:22:42,935 - root - INFO - step: 15960 loss: 2.2164 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:22:42,935 - root - INFO - lr: 3.4925e-05 gnorm: 1.06 [ 9:48:33<14:46:31] +[titan] 2025-10-05 08:22:53,820 - root - INFO - step: 15965 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 08:22:53,820 - root - INFO - lr: 3.4917e-05 gnorm: 1.07 [ 9:48:44<14:46:19] +[titan] 2025-10-05 08:23:04,735 - root - INFO - step: 15970 loss: 2.2899 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0224 +[titan] 2025-10-05 08:23:04,735 - root - INFO - lr: 3.4909e-05 gnorm: 1.17 [ 9:48:55<14:46:08] +[titan] 2025-10-05 08:23:15,637 - root - INFO - step: 15975 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9298 +[titan] 2025-10-05 08:23:15,637 - root - INFO - lr: 3.4900e-05 gnorm: 1.05 [ 9:49:05<14:45:57] +[titan] 2025-10-05 08:23:26,529 - root - INFO - step: 15980 loss: 2.2468 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 08:23:26,529 - root - INFO - lr: 3.4892e-05 gnorm: 1.08 [ 9:49:16<14:45:45] +[titan] 2025-10-05 08:23:37,517 - root - INFO - step: 15985 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.76 mfu: 41.84% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:23:37,517 - root - INFO - lr: 3.4883e-05 gnorm: 1.09 [ 9:49:27<14:45:34] +[titan] 2025-10-05 08:23:48,403 - root - INFO - step: 15990 loss: 2.2605 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 08:23:48,403 - root - INFO - lr: 3.4875e-05 gnorm: 1.12 [ 9:49:38<14:45:23] +[titan] 2025-10-05 08:23:59,284 - root - INFO - step: 15995 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:23:59,284 - root - INFO - lr: 3.4866e-05 gnorm: 1.08 [ 9:49:49<14:45:12] +[titan] 2025-10-05 08:24:07,984 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:24:10,169 - root - INFO - step: 16000 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9270 +[titan] 2025-10-05 08:24:10,169 - root - INFO - lr: 3.4858e-05 gnorm: 1.05 [ 9:50:00<14:45:00] +[titan] 2025-10-05 08:24:21,053 - root - INFO - step: 16005 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 08:24:21,053 - root - INFO - lr: 3.4850e-05 gnorm: 1.09 [ 9:50:11<14:44:49] +[titan] 2025-10-05 08:24:31,978 - root - INFO - step: 16010 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 08:24:31,979 - root - INFO - lr: 3.4841e-05 gnorm: 1.05 [ 9:50:22<14:44:38] +[titan] 2025-10-05 08:24:42,866 - root - INFO - step: 16015 loss: 2.2354 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:24:42,866 - root - INFO - lr: 3.4833e-05 gnorm: 1.04 [ 9:50:33<14:44:26] +[titan] 2025-10-05 08:24:53,773 - root - INFO - step: 16020 loss: 2.2147 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9542 +[titan] 2025-10-05 08:24:53,773 - root - INFO - lr: 3.4824e-05 gnorm: 1.08 [ 9:50:44<14:44:15] +[titan] 2025-10-05 08:25:04,656 - root - INFO - step: 16025 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 08:25:04,656 - root - INFO - lr: 3.4816e-05 gnorm: 1.08 [ 9:50:54<14:44:04] +[titan] 2025-10-05 08:25:15,527 - root - INFO - step: 16030 loss: 2.2616 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 08:25:15,527 - root - INFO - lr: 3.4807e-05 gnorm: 1.05 [ 9:51:05<14:43:52] +[titan] 2025-10-05 08:25:26,410 - root - INFO - step: 16035 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0174 +[titan] 2025-10-05 08:25:26,411 - root - INFO - lr: 3.4799e-05 gnorm: 1.10 [ 9:51:16<14:43:41] +[titan] 2025-10-05 08:25:37,315 - root - INFO - step: 16040 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9599 +[titan] 2025-10-05 08:25:37,315 - root - INFO - lr: 3.4790e-05 gnorm: 1.09 [ 9:51:27<14:43:30] +[titan] 2025-10-05 08:25:48,166 - root - INFO - step: 16045 loss: 2.2422 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:25:48,166 - root - INFO - lr: 3.4782e-05 gnorm: 1.07 [ 9:51:38<14:43:18] +[titan] 2025-10-05 08:25:56,884 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:25:59,065 - root - INFO - step: 16050 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0259 +[titan] 2025-10-05 08:25:59,065 - root - INFO - lr: 3.4774e-05 gnorm: 1.08 [ 9:51:49<14:43:07] +[titan] 2025-10-05 08:26:09,947 - root - INFO - step: 16055 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 08:26:09,947 - root - INFO - lr: 3.4765e-05 gnorm: 1.09 [ 9:52:00<14:42:56] +[titan] 2025-10-05 08:26:20,832 - root - INFO - step: 16060 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 08:26:20,832 - root - INFO - lr: 3.4757e-05 gnorm: 1.17 [ 9:52:11<14:42:44] +[titan] 2025-10-05 08:26:31,707 - root - INFO - step: 16065 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 08:26:31,707 - root - INFO - lr: 3.4748e-05 gnorm: 1.08 [ 9:52:22<14:42:33] +[titan] 2025-10-05 08:26:42,617 - root - INFO - step: 16070 loss: 2.2299 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 08:26:42,618 - root - INFO - lr: 3.4740e-05 gnorm: 1.09 [ 9:52:32<14:42:22] +[titan] 2025-10-05 08:26:53,494 - root - INFO - step: 16075 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9418 +[titan] 2025-10-05 08:26:53,494 - root - INFO - lr: 3.4731e-05 gnorm: 1.08 [ 9:52:43<14:42:10] +[titan] 2025-10-05 08:27:04,387 - root - INFO - step: 16080 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 08:27:04,387 - root - INFO - lr: 3.4723e-05 gnorm: 1.09 [ 9:52:54<14:41:59] +[titan] 2025-10-05 08:27:15,275 - root - INFO - step: 16085 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 08:27:15,276 - root - INFO - lr: 3.4714e-05 gnorm: 1.08 [ 9:53:05<14:41:48] +[titan] 2025-10-05 08:27:26,154 - root - INFO - step: 16090 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9521 +[titan] 2025-10-05 08:27:26,154 - root - INFO - lr: 3.4706e-05 gnorm: 1.05 [ 9:53:16<14:41:36] +[titan] 2025-10-05 08:27:37,046 - root - INFO - step: 16095 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:27:37,047 - root - INFO - lr: 3.4698e-05 gnorm: 1.07 [ 9:53:27<14:41:25] +[titan] 2025-10-05 08:27:45,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:27:47,929 - root - INFO - step: 16100 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 08:27:47,930 - root - INFO - lr: 3.4689e-05 gnorm: 1.08 [ 9:53:38<14:41:14] +[titan] 2025-10-05 08:27:58,796 - root - INFO - step: 16105 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9585 +[titan] 2025-10-05 08:27:58,796 - root - INFO - lr: 3.4681e-05 gnorm: 1.07 [ 9:53:49<14:41:02] +[titan] 2025-10-05 08:28:09,669 - root - INFO - step: 16110 loss: 2.2129 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9548 +[titan] 2025-10-05 08:28:09,669 - root - INFO - lr: 3.4672e-05 gnorm: 1.07 [ 9:53:59<14:40:51] +[titan] 2025-10-05 08:28:20,594 - root - INFO - step: 16115 loss: 2.1544 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:28:20,594 - root - INFO - lr: 3.4664e-05 gnorm: 1.05 [ 9:54:10<14:40:40] +[titan] 2025-10-05 08:28:31,485 - root - INFO - step: 16120 loss: 2.2760 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 08:28:31,485 - root - INFO - lr: 3.4655e-05 gnorm: 1.09 [ 9:54:21<14:40:29] +[titan] 2025-10-05 08:28:42,397 - root - INFO - step: 16125 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 08:28:42,398 - root - INFO - lr: 3.4647e-05 gnorm: 1.10 [ 9:54:32<14:40:17] +[titan] 2025-10-05 08:28:53,284 - root - INFO - step: 16130 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 08:28:53,284 - root - INFO - lr: 3.4638e-05 gnorm: 1.14 [ 9:54:43<14:40:06] +[titan] 2025-10-05 08:29:04,160 - root - INFO - step: 16135 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0235 +[titan] 2025-10-05 08:29:04,161 - root - INFO - lr: 3.4630e-05 gnorm: 1.09 [ 9:54:54<14:39:55] +[titan] 2025-10-05 08:29:15,049 - root - INFO - step: 16140 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 08:29:15,049 - root - INFO - lr: 3.4621e-05 gnorm: 1.06 [ 9:55:05<14:39:43] +[titan] 2025-10-05 08:29:25,956 - root - INFO - step: 16145 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9694 +[titan] 2025-10-05 08:29:25,956 - root - INFO - lr: 3.4613e-05 gnorm: 1.10 [ 9:55:16<14:39:32] +[titan] 2025-10-05 08:29:34,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:29:36,856 - root - INFO - step: 16150 loss: 2.1905 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 08:29:36,857 - root - INFO - lr: 3.4604e-05 gnorm: 1.12 [ 9:55:27<14:39:21] +[titan] 2025-10-05 08:29:47,747 - root - INFO - step: 16155 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 08:29:47,748 - root - INFO - lr: 3.4596e-05 gnorm: 1.06 [ 9:55:38<14:39:09] +[titan] 2025-10-05 08:29:58,621 - root - INFO - step: 16160 loss: 2.2108 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9539 +[titan] 2025-10-05 08:29:58,621 - root - INFO - lr: 3.4588e-05 gnorm: 1.06 [ 9:55:48<14:38:58] +[titan] 2025-10-05 08:30:09,500 - root - INFO - step: 16165 loss: 2.2802 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:30:09,500 - root - INFO - lr: 3.4579e-05 gnorm: 1.11 [ 9:55:59<14:38:47] +[titan] 2025-10-05 08:30:20,377 - root - INFO - step: 16170 loss: 2.2485 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9864 +[titan] 2025-10-05 08:30:20,377 - root - INFO - lr: 3.4571e-05 gnorm: 1.07 [ 9:56:10<14:38:35] +[titan] 2025-10-05 08:30:31,256 - root - INFO - step: 16175 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0133 +[titan] 2025-10-05 08:30:31,256 - root - INFO - lr: 3.4562e-05 gnorm: 1.07 [ 9:56:21<14:38:24] +[titan] 2025-10-05 08:30:42,181 - root - INFO - step: 16180 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 08:30:42,181 - root - INFO - lr: 3.4554e-05 gnorm: 1.08 [ 9:56:32<14:38:13] +[titan] 2025-10-05 08:30:53,053 - root - INFO - step: 16185 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 08:30:53,053 - root - INFO - lr: 3.4545e-05 gnorm: 1.05 [ 9:56:43<14:38:01] +[titan] 2025-10-05 08:31:03,931 - root - INFO - step: 16190 loss: 2.1765 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9234 +[titan] 2025-10-05 08:31:03,931 - root - INFO - lr: 3.4537e-05 gnorm: 1.08 [ 9:56:54<14:37:50] +[titan] 2025-10-05 08:31:14,795 - root - INFO - step: 16195 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9284 +[titan] 2025-10-05 08:31:14,796 - root - INFO - lr: 3.4528e-05 gnorm: 1.09 [ 9:57:05<14:37:39] +[titan] 2025-10-05 08:31:23,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:31:25,652 - root - INFO - step: 16200 loss: 2.3077 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0390 +[titan] 2025-10-05 08:31:25,653 - root - INFO - lr: 3.4520e-05 gnorm: 1.10 [ 9:57:15<14:37:27] +[titan] 2025-10-05 08:31:36,508 - root - INFO - step: 16205 loss: 2.2864 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0206 +[titan] 2025-10-05 08:31:36,508 - root - INFO - lr: 3.4511e-05 gnorm: 1.04 [ 9:57:26<14:37:16] +[titan] 2025-10-05 08:31:47,457 - root - INFO - step: 16210 loss: 2.2341 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 08:31:47,457 - root - INFO - lr: 3.4503e-05 gnorm: 1.09 [ 9:57:37<14:37:05] +[titan] 2025-10-05 08:31:58,346 - root - INFO - step: 16215 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 08:31:58,346 - root - INFO - lr: 3.4494e-05 gnorm: 1.08 [ 9:57:48<14:36:53] +[titan] 2025-10-05 08:32:09,203 - root - INFO - step: 16220 loss: 2.1804 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 08:32:09,204 - root - INFO - lr: 3.4486e-05 gnorm: 1.07 [ 9:57:59<14:36:42] +[titan] 2025-10-05 08:32:20,094 - root - INFO - step: 16225 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 08:32:20,094 - root - INFO - lr: 3.4477e-05 gnorm: 1.07 [ 9:58:10<14:36:31] +[titan] 2025-10-05 08:32:30,976 - root - INFO - step: 16230 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9488 +[titan] 2025-10-05 08:32:30,977 - root - INFO - lr: 3.4469e-05 gnorm: 1.05 [ 9:58:21<14:36:19] +[titan] 2025-10-05 08:32:41,910 - root - INFO - step: 16235 loss: 2.2424 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:32:41,911 - root - INFO - lr: 3.4460e-05 gnorm: 1.06 [ 9:58:32<14:36:08] +[titan] 2025-10-05 08:32:52,835 - root - INFO - step: 16240 loss: 2.1658 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9139 +[titan] 2025-10-05 08:32:52,835 - root - INFO - lr: 3.4452e-05 gnorm: 1.04 [ 9:58:43<14:35:57] +[titan] 2025-10-05 08:33:03,725 - root - INFO - step: 16245 loss: 2.2254 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:33:03,725 - root - INFO - lr: 3.4443e-05 gnorm: 1.08 [ 9:58:54<14:35:46] +[titan] 2025-10-05 08:33:12,441 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:33:14,633 - root - INFO - step: 16250 loss: 2.2316 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 08:33:14,634 - root - INFO - lr: 3.4435e-05 gnorm: 1.10 [ 9:59:04<14:35:34] +[titan] 2025-10-05 08:33:25,534 - root - INFO - step: 16255 loss: 2.3076 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0385 +[titan] 2025-10-05 08:33:25,534 - root - INFO - lr: 3.4426e-05 gnorm: 1.10 [ 9:59:15<14:35:23] +[titan] 2025-10-05 08:33:36,432 - root - INFO - step: 16260 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 08:33:36,433 - root - INFO - lr: 3.4418e-05 gnorm: 1.13 [ 9:59:26<14:35:12] +[titan] 2025-10-05 08:33:47,313 - root - INFO - step: 16265 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9849 +[titan] 2025-10-05 08:33:47,313 - root - INFO - lr: 3.4409e-05 gnorm: 1.10 [ 9:59:37<14:35:00] +[titan] 2025-10-05 08:33:58,157 - root - INFO - step: 16270 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:33:58,157 - root - INFO - lr: 3.4401e-05 gnorm: 1.09 [ 9:59:48<14:34:49] +[titan] 2025-10-05 08:34:09,059 - root - INFO - step: 16275 loss: 2.2042 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 08:34:09,060 - root - INFO - lr: 3.4392e-05 gnorm: 1.05 [ 9:59:59<14:34:38] +[titan] 2025-10-05 08:34:19,912 - root - INFO - step: 16280 loss: 2.2416 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:34:19,912 - root - INFO - lr: 3.4384e-05 gnorm: 1.07 [10:00:10<14:34:26] +[titan] 2025-10-05 08:34:30,777 - root - INFO - step: 16285 loss: 2.1576 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:34:30,777 - root - INFO - lr: 3.4375e-05 gnorm: 1.09 [10:00:21<14:34:15] +[titan] 2025-10-05 08:34:41,653 - root - INFO - step: 16290 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 08:34:41,653 - root - INFO - lr: 3.4367e-05 gnorm: 1.05 [10:00:31<14:34:04] +[titan] 2025-10-05 08:34:52,516 - root - INFO - step: 16295 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 08:34:52,516 - root - INFO - lr: 3.4358e-05 gnorm: 1.05 [10:00:42<14:33:52] +[titan] 2025-10-05 08:35:01,184 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:35:03,369 - root - INFO - step: 16300 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0097 +[titan] 2025-10-05 08:35:03,369 - root - INFO - lr: 3.4350e-05 gnorm: 1.13 [10:00:53<14:33:41] +[titan] 2025-10-05 08:35:14,258 - root - INFO - step: 16305 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:35:14,258 - root - INFO - lr: 3.4341e-05 gnorm: 1.10 [10:01:04<14:33:30] +[titan] 2025-10-05 08:35:25,117 - root - INFO - step: 16310 loss: 2.2039 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 08:35:25,117 - root - INFO - lr: 3.4333e-05 gnorm: 1.07 [10:01:15<14:33:18] +[titan] 2025-10-05 08:35:35,923 - root - INFO - step: 16315 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:35:35,923 - root - INFO - lr: 3.4324e-05 gnorm: 1.06 [10:01:26<14:33:07] +[titan] 2025-10-05 08:35:46,803 - root - INFO - step: 16320 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9004 +[titan] 2025-10-05 08:35:46,803 - root - INFO - lr: 3.4316e-05 gnorm: 1.06 [10:01:37<14:32:56] +[titan] 2025-10-05 08:35:57,651 - root - INFO - step: 16325 loss: 2.2716 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0076 +[titan] 2025-10-05 08:35:57,651 - root - INFO - lr: 3.4307e-05 gnorm: 1.08 [10:01:47<14:32:44] +[titan] 2025-10-05 08:36:08,474 - root - INFO - step: 16330 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8746 +[titan] 2025-10-05 08:36:08,474 - root - INFO - lr: 3.4299e-05 gnorm: 1.05 [10:01:58<14:32:33] +[titan] 2025-10-05 08:36:19,326 - root - INFO - step: 16335 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 08:36:19,326 - root - INFO - lr: 3.4290e-05 gnorm: 1.05 [10:02:09<14:32:22] +[titan] 2025-10-05 08:36:30,202 - root - INFO - step: 16340 loss: 2.2109 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9528 +[titan] 2025-10-05 08:36:30,202 - root - INFO - lr: 3.4282e-05 gnorm: 1.09 [10:02:20<14:32:10] +[titan] 2025-10-05 08:36:41,056 - root - INFO - step: 16345 loss: 2.2287 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9688 +[titan] 2025-10-05 08:36:41,056 - root - INFO - lr: 3.4273e-05 gnorm: 1.09 [10:02:31<14:31:59] +[titan] 2025-10-05 08:36:49,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:36:51,933 - root - INFO - step: 16350 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 08:36:51,933 - root - INFO - lr: 3.4265e-05 gnorm: 1.08 [10:02:42<14:31:48] +[titan] 2025-10-05 08:37:02,815 - root - INFO - step: 16355 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0175 +[titan] 2025-10-05 08:37:02,815 - root - INFO - lr: 3.4256e-05 gnorm: 1.09 [10:02:53<14:31:36] +[titan] 2025-10-05 08:37:13,670 - root - INFO - step: 16360 loss: 2.1862 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:37:13,671 - root - INFO - lr: 3.4248e-05 gnorm: 1.04 [10:03:03<14:31:25] +[titan] 2025-10-05 08:37:24,518 - root - INFO - step: 16365 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:37:24,518 - root - INFO - lr: 3.4239e-05 gnorm: 1.12 [10:03:14<14:31:14] +[titan] 2025-10-05 08:37:35,400 - root - INFO - step: 16370 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9962 +[titan] 2025-10-05 08:37:35,401 - root - INFO - lr: 3.4231e-05 gnorm: 1.08 [10:03:25<14:31:02] +[titan] 2025-10-05 08:37:46,321 - root - INFO - step: 16375 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 08:37:46,321 - root - INFO - lr: 3.4222e-05 gnorm: 1.06 [10:03:36<14:30:51] +[titan] 2025-10-05 08:37:57,173 - root - INFO - step: 16380 loss: 2.2402 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9783 +[titan] 2025-10-05 08:37:57,173 - root - INFO - lr: 3.4214e-05 gnorm: 1.11 [10:03:47<14:30:40] +[titan] 2025-10-05 08:38:06,127 - root - INFO - Dumping profiler traces at step 16384 +[titan] 2025-10-05 08:38:06,166 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:38:08,373 - root - INFO - step: 16385 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 29,257 tflops: 405.90 mfu: 41.04% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9247 +[titan] 2025-10-05 08:38:08,373 - root - INFO - lr: 3.4205e-05 gnorm: 1.11 [10:03:58<14:30:29] +[titan] 2025-10-05 08:38:19,239 - root - INFO - step: 16390 loss: 2.2560 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 08:38:19,239 - root - INFO - lr: 3.4197e-05 gnorm: 1.08 [10:04:09<14:30:17] +[titan] 2025-10-05 08:38:30,091 - root - INFO - step: 16395 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 08:38:30,091 - root - INFO - lr: 3.4188e-05 gnorm: 1.06 [10:04:20<14:30:06] +[titan] 2025-10-05 08:38:38,779 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:38:41,007 - root - INFO - step: 16400 loss: 2.1921 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 08:38:41,008 - root - INFO - lr: 3.4180e-05 gnorm: 1.12 [10:04:31<14:29:55] +[titan] 2025-10-05 08:38:51,898 - root - INFO - step: 16405 loss: 2.2523 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9902 +[titan] 2025-10-05 08:38:51,898 - root - INFO - lr: 3.4171e-05 gnorm: 1.10 [10:04:42<14:29:43] +[titan] 2025-10-05 08:39:02,751 - root - INFO - step: 16410 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 08:39:02,751 - root - INFO - lr: 3.4163e-05 gnorm: 1.10 [10:04:53<14:29:32] +[titan] 2025-10-05 08:39:13,601 - root - INFO - step: 16415 loss: 2.1622 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 08:39:13,601 - root - INFO - lr: 3.4154e-05 gnorm: 1.06 [10:05:03<14:29:21] +[titan] 2025-10-05 08:39:24,471 - root - INFO - step: 16420 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9579 +[titan] 2025-10-05 08:39:24,471 - root - INFO - lr: 3.4146e-05 gnorm: 1.06 [10:05:14<14:29:09] +[titan] 2025-10-05 08:39:35,332 - root - INFO - step: 16425 loss: 2.1912 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9364 +[titan] 2025-10-05 08:39:35,333 - root - INFO - lr: 3.4137e-05 gnorm: 1.06 [10:05:25<14:28:58] +[titan] 2025-10-05 08:39:46,223 - root - INFO - step: 16430 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 08:39:46,223 - root - INFO - lr: 3.4129e-05 gnorm: 1.07 [10:05:36<14:28:47] +[titan] 2025-10-05 08:39:57,116 - root - INFO - step: 16435 loss: 2.2229 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9601 +[titan] 2025-10-05 08:39:57,116 - root - INFO - lr: 3.4120e-05 gnorm: 1.10 [10:05:47<14:28:35] +[titan] 2025-10-05 08:40:07,956 - root - INFO - step: 16440 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9241 +[titan] 2025-10-05 08:40:07,956 - root - INFO - lr: 3.4111e-05 gnorm: 1.06 [10:05:58<14:28:24] +[titan] 2025-10-05 08:40:18,791 - root - INFO - step: 16445 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:40:18,792 - root - INFO - lr: 3.4103e-05 gnorm: 1.08 [10:06:09<14:28:13] +[titan] 2025-10-05 08:40:27,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:40:29,625 - root - INFO - step: 16450 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9165 +[titan] 2025-10-05 08:40:29,626 - root - INFO - lr: 3.4094e-05 gnorm: 1.09 [10:06:19<14:28:01] +[titan] 2025-10-05 08:40:40,476 - root - INFO - step: 16455 loss: 2.1561 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 08:40:40,476 - root - INFO - lr: 3.4086e-05 gnorm: 1.05 [10:06:30<14:27:50] +[titan] 2025-10-05 08:40:51,351 - root - INFO - step: 16460 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.9013 +[titan] 2025-10-05 08:40:51,351 - root - INFO - lr: 3.4077e-05 gnorm: 1.06 [10:06:41<14:27:39] +[titan] 2025-10-05 08:41:02,252 - root - INFO - step: 16465 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:41:02,252 - root - INFO - lr: 3.4069e-05 gnorm: 1.05 [10:06:52<14:27:27] +[titan] 2025-10-05 08:41:13,112 - root - INFO - step: 16470 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9462 +[titan] 2025-10-05 08:41:13,113 - root - INFO - lr: 3.4060e-05 gnorm: 1.10 [10:07:03<14:27:16] +[titan] 2025-10-05 08:41:23,980 - root - INFO - step: 16475 loss: 2.2132 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:41:23,981 - root - INFO - lr: 3.4052e-05 gnorm: 1.05 [10:07:14<14:27:05] +[titan] 2025-10-05 08:41:34,850 - root - INFO - step: 16480 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 08:41:34,850 - root - INFO - lr: 3.4043e-05 gnorm: 1.07 [10:07:25<14:26:53] +[titan] 2025-10-05 08:41:45,728 - root - INFO - step: 16485 loss: 2.1837 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 08:41:45,728 - root - INFO - lr: 3.4035e-05 gnorm: 1.10 [10:07:35<14:26:42] +[titan] 2025-10-05 08:41:56,603 - root - INFO - step: 16490 loss: 2.2265 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 08:41:56,603 - root - INFO - lr: 3.4026e-05 gnorm: 1.08 [10:07:46<14:26:31] +[titan] 2025-10-05 08:42:07,468 - root - INFO - step: 16495 loss: 2.2288 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 08:42:07,468 - root - INFO - lr: 3.4018e-05 gnorm: 1.10 [10:07:57<14:26:19] +[titan] 2025-10-05 08:42:16,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:42:18,373 - root - INFO - step: 16500 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9920 +[titan] 2025-10-05 08:42:18,373 - root - INFO - lr: 3.4009e-05 gnorm: 1.10 [10:08:08<14:26:08] +[titan] 2025-10-05 08:42:29,248 - root - INFO - step: 16505 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 08:42:29,248 - root - INFO - lr: 3.4000e-05 gnorm: 1.06 [10:08:19<14:25:57] +[titan] 2025-10-05 08:42:40,112 - root - INFO - step: 16510 loss: 2.1951 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9387 +[titan] 2025-10-05 08:42:40,112 - root - INFO - lr: 3.3992e-05 gnorm: 1.06 [10:08:30<14:25:46] +[titan] 2025-10-05 08:42:51,000 - root - INFO - step: 16515 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9731 +[titan] 2025-10-05 08:42:51,000 - root - INFO - lr: 3.3983e-05 gnorm: 1.06 [10:08:41<14:25:34] +[titan] 2025-10-05 08:43:01,864 - root - INFO - step: 16520 loss: 2.2392 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 1.9746 +[titan] 2025-10-05 08:43:01,864 - root - INFO - lr: 3.3975e-05 gnorm: 1.07 [10:08:52<14:25:23] +[titan] 2025-10-05 08:43:12,727 - root - INFO - step: 16525 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0203 +[titan] 2025-10-05 08:43:12,727 - root - INFO - lr: 3.3966e-05 gnorm: 1.13 [10:09:02<14:25:12] +[titan] 2025-10-05 08:43:23,632 - root - INFO - step: 16530 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 08:43:23,632 - root - INFO - lr: 3.3958e-05 gnorm: 1.08 [10:09:13<14:25:00] +[titan] 2025-10-05 08:43:34,515 - root - INFO - step: 16535 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:43:34,515 - root - INFO - lr: 3.3949e-05 gnorm: 1.08 [10:09:24<14:24:49] +[titan] 2025-10-05 08:43:45,404 - root - INFO - step: 16540 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 08:43:45,404 - root - INFO - lr: 3.3941e-05 gnorm: 1.14 [10:09:35<14:24:38] +[titan] 2025-10-05 08:43:56,319 - root - INFO - step: 16545 loss: 2.1857 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 08:43:56,320 - root - INFO - lr: 3.3932e-05 gnorm: 1.07 [10:09:46<14:24:26] +[titan] 2025-10-05 08:44:05,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:44:07,197 - root - INFO - step: 16550 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 08:44:07,197 - root - INFO - lr: 3.3924e-05 gnorm: 1.05 [10:09:57<14:24:15] +[titan] 2025-10-05 08:44:18,066 - root - INFO - step: 16555 loss: 2.2226 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9632 +[titan] 2025-10-05 08:44:18,066 - root - INFO - lr: 3.3915e-05 gnorm: 1.09 [10:10:08<14:24:04] +[titan] 2025-10-05 08:44:28,972 - root - INFO - step: 16560 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 08:44:28,972 - root - INFO - lr: 3.3906e-05 gnorm: 1.05 [10:10:19<14:23:53] +[titan] 2025-10-05 08:44:39,817 - root - INFO - step: 16565 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9362 +[titan] 2025-10-05 08:44:39,817 - root - INFO - lr: 3.3898e-05 gnorm: 1.07 [10:10:30<14:23:41] +[titan] 2025-10-05 08:44:50,691 - root - INFO - step: 16570 loss: 2.1798 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9260 +[titan] 2025-10-05 08:44:50,691 - root - INFO - lr: 3.3889e-05 gnorm: 1.08 [10:10:40<14:23:30] +[titan] 2025-10-05 08:45:01,549 - root - INFO - step: 16575 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:45:01,549 - root - INFO - lr: 3.3881e-05 gnorm: 1.05 [10:10:51<14:23:19] +[titan] 2025-10-05 08:45:12,413 - root - INFO - step: 16580 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:45:12,413 - root - INFO - lr: 3.3872e-05 gnorm: 1.08 [10:11:02<14:23:07] +[titan] 2025-10-05 08:45:23,289 - root - INFO - step: 16585 loss: 2.1742 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9208 +[titan] 2025-10-05 08:45:23,289 - root - INFO - lr: 3.3864e-05 gnorm: 1.07 [10:11:13<14:22:56] +[titan] 2025-10-05 08:45:34,149 - root - INFO - step: 16590 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 08:45:34,149 - root - INFO - lr: 3.3855e-05 gnorm: 1.11 [10:11:24<14:22:45] +[titan] 2025-10-05 08:45:45,091 - root - INFO - step: 16595 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 08:45:45,091 - root - INFO - lr: 3.3847e-05 gnorm: 1.06 [10:11:35<14:22:33] +[titan] 2025-10-05 08:45:53,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:45:55,993 - root - INFO - step: 16600 loss: 2.1689 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9160 +[titan] 2025-10-05 08:45:55,993 - root - INFO - lr: 3.3838e-05 gnorm: 1.04 [10:11:46<14:22:22] +[titan] 2025-10-05 08:46:06,866 - root - INFO - step: 16605 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:46:06,866 - root - INFO - lr: 3.3829e-05 gnorm: 1.04 [10:11:57<14:22:11] +[titan] 2025-10-05 08:46:17,754 - root - INFO - step: 16610 loss: 2.2141 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 08:46:17,755 - root - INFO - lr: 3.3821e-05 gnorm: 1.09 [10:12:08<14:21:59] +[titan] 2025-10-05 08:46:28,629 - root - INFO - step: 16615 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9330 +[titan] 2025-10-05 08:46:28,629 - root - INFO - lr: 3.3812e-05 gnorm: 1.09 [10:12:18<14:21:48] +[titan] 2025-10-05 08:46:39,510 - root - INFO - step: 16620 loss: 2.1330 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 08:46:39,510 - root - INFO - lr: 3.3804e-05 gnorm: 1.07 [10:12:29<14:21:37] +[titan] 2025-10-05 08:46:50,420 - root - INFO - step: 16625 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9610 +[titan] 2025-10-05 08:46:50,420 - root - INFO - lr: 3.3795e-05 gnorm: 1.09 [10:12:40<14:21:26] +[titan] 2025-10-05 08:47:01,324 - root - INFO - step: 16630 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 08:47:01,324 - root - INFO - lr: 3.3787e-05 gnorm: 1.10 [10:12:51<14:21:14] +[titan] 2025-10-05 08:47:12,217 - root - INFO - step: 16635 loss: 2.1195 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 08:47:12,217 - root - INFO - lr: 3.3778e-05 gnorm: 1.09 [10:13:02<14:21:03] +[titan] 2025-10-05 08:47:23,110 - root - INFO - step: 16640 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:47:23,110 - root - INFO - lr: 3.3769e-05 gnorm: 1.12 [10:13:13<14:20:52] +[titan] 2025-10-05 08:47:34,010 - root - INFO - step: 16645 loss: 2.1744 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 08:47:34,010 - root - INFO - lr: 3.3761e-05 gnorm: 1.10 [10:13:24<14:20:40] +[titan] 2025-10-05 08:47:42,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:47:44,910 - root - INFO - step: 16650 loss: 2.1803 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 08:47:44,910 - root - INFO - lr: 3.3752e-05 gnorm: 1.11 [10:13:35<14:20:29] +[titan] 2025-10-05 08:47:55,812 - root - INFO - step: 16655 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 08:47:55,812 - root - INFO - lr: 3.3744e-05 gnorm: 1.10 [10:13:46<14:20:18] +[titan] 2025-10-05 08:48:06,738 - root - INFO - step: 16660 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0120 +[titan] 2025-10-05 08:48:06,738 - root - INFO - lr: 3.3735e-05 gnorm: 1.11 [10:13:56<14:20:07] +[titan] 2025-10-05 08:48:17,635 - root - INFO - step: 16665 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:48:17,635 - root - INFO - lr: 3.3727e-05 gnorm: 1.10 [10:14:07<14:19:55] +[titan] 2025-10-05 08:48:28,518 - root - INFO - step: 16670 loss: 2.2203 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:48:28,518 - root - INFO - lr: 3.3718e-05 gnorm: 1.10 [10:14:18<14:19:44] +[titan] 2025-10-05 08:48:39,418 - root - INFO - step: 16675 loss: 2.2253 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2593 global_avg_mtp_loss: 1.9660 +[titan] 2025-10-05 08:48:39,419 - root - INFO - lr: 3.3709e-05 gnorm: 1.14 [10:14:29<14:19:33] +[titan] 2025-10-05 08:48:50,307 - root - INFO - step: 16680 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 08:48:50,307 - root - INFO - lr: 3.3701e-05 gnorm: 1.09 [10:14:40<14:19:22] +[titan] 2025-10-05 08:49:01,231 - root - INFO - step: 16685 loss: 2.2071 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 08:49:01,231 - root - INFO - lr: 3.3692e-05 gnorm: 1.06 [10:14:51<14:19:10] +[titan] 2025-10-05 08:49:12,142 - root - INFO - step: 16690 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9875 +[titan] 2025-10-05 08:49:12,142 - root - INFO - lr: 3.3684e-05 gnorm: 1.05 [10:15:02<14:18:59] +[titan] 2025-10-05 08:49:23,035 - root - INFO - step: 16695 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0009 +[titan] 2025-10-05 08:49:23,036 - root - INFO - lr: 3.3675e-05 gnorm: 1.04 [10:15:13<14:18:48] +[titan] 2025-10-05 08:49:31,750 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:49:33,935 - root - INFO - step: 16700 loss: 2.1213 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8753 +[titan] 2025-10-05 08:49:33,935 - root - INFO - lr: 3.3667e-05 gnorm: 1.05 [10:15:24<14:18:36] +[titan] 2025-10-05 08:49:44,821 - root - INFO - step: 16705 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 08:49:44,821 - root - INFO - lr: 3.3658e-05 gnorm: 1.04 [10:15:35<14:18:25] +[titan] 2025-10-05 08:49:55,770 - root - INFO - step: 16710 loss: 2.1830 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9279 +[titan] 2025-10-05 08:49:55,770 - root - INFO - lr: 3.3649e-05 gnorm: 1.06 [10:15:46<14:18:14] +[titan] 2025-10-05 08:50:06,646 - root - INFO - step: 16715 loss: 2.1474 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 08:50:06,646 - root - INFO - lr: 3.3641e-05 gnorm: 1.05 [10:15:56<14:18:03] +[titan] 2025-10-05 08:50:17,562 - root - INFO - step: 16720 loss: 2.2478 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9859 +[titan] 2025-10-05 08:50:17,562 - root - INFO - lr: 3.3632e-05 gnorm: 1.08 [10:16:07<14:17:51] +[titan] 2025-10-05 08:50:28,447 - root - INFO - step: 16725 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 08:50:28,447 - root - INFO - lr: 3.3624e-05 gnorm: 1.03 [10:16:18<14:17:40] +[titan] 2025-10-05 08:50:39,327 - root - INFO - step: 16730 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 08:50:39,327 - root - INFO - lr: 3.3615e-05 gnorm: 1.07 [10:16:29<14:17:29] +[titan] 2025-10-05 08:50:50,218 - root - INFO - step: 16735 loss: 2.1919 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:50:50,218 - root - INFO - lr: 3.3606e-05 gnorm: 1.08 [10:16:40<14:17:18] +[titan] 2025-10-05 08:51:01,116 - root - INFO - step: 16740 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9342 +[titan] 2025-10-05 08:51:01,116 - root - INFO - lr: 3.3598e-05 gnorm: 1.01 [10:16:51<14:17:06] +[titan] 2025-10-05 08:51:11,988 - root - INFO - step: 16745 loss: 2.1719 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 08:51:11,988 - root - INFO - lr: 3.3589e-05 gnorm: 1.09 [10:17:02<14:16:55] +[titan] 2025-10-05 08:51:20,683 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:51:22,867 - root - INFO - step: 16750 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:51:22,867 - root - INFO - lr: 3.3581e-05 gnorm: 1.07 [10:17:13<14:16:44] +[titan] 2025-10-05 08:51:33,766 - root - INFO - step: 16755 loss: 2.1698 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:51:33,766 - root - INFO - lr: 3.3572e-05 gnorm: 1.08 [10:17:24<14:16:32] +[titan] 2025-10-05 08:51:44,647 - root - INFO - step: 16760 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 08:51:44,647 - root - INFO - lr: 3.3563e-05 gnorm: 1.07 [10:17:34<14:16:21] +[titan] 2025-10-05 08:51:55,539 - root - INFO - step: 16765 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 08:51:55,539 - root - INFO - lr: 3.3555e-05 gnorm: 1.08 [10:17:45<14:16:10] +[titan] 2025-10-05 08:52:06,452 - root - INFO - step: 16770 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9905 +[titan] 2025-10-05 08:52:06,452 - root - INFO - lr: 3.3546e-05 gnorm: 1.10 [10:17:56<14:15:59] +[titan] 2025-10-05 08:52:17,344 - root - INFO - step: 16775 loss: 2.2357 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 08:52:17,344 - root - INFO - lr: 3.3538e-05 gnorm: 1.12 [10:18:07<14:15:47] +[titan] 2025-10-05 08:52:28,243 - root - INFO - step: 16780 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 08:52:28,243 - root - INFO - lr: 3.3529e-05 gnorm: 1.05 [10:18:18<14:15:36] +[titan] 2025-10-05 08:52:39,158 - root - INFO - step: 16785 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 08:52:39,158 - root - INFO - lr: 3.3520e-05 gnorm: 1.08 [10:18:29<14:15:25] +[titan] 2025-10-05 08:52:50,027 - root - INFO - step: 16790 loss: 2.3254 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 08:52:50,027 - root - INFO - lr: 3.3512e-05 gnorm: 1.08 [10:18:40<14:15:13] +[titan] 2025-10-05 08:53:00,972 - root - INFO - step: 16795 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8815 +[titan] 2025-10-05 08:53:00,972 - root - INFO - lr: 3.3503e-05 gnorm: 1.05 [10:18:51<14:15:02] +[titan] 2025-10-05 08:53:09,655 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:53:11,847 - root - INFO - step: 16800 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9137 +[titan] 2025-10-05 08:53:11,847 - root - INFO - lr: 3.3495e-05 gnorm: 1.04 [10:19:02<14:14:51] +[titan] 2025-10-05 08:53:22,743 - root - INFO - step: 16805 loss: 2.2778 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0130 +[titan] 2025-10-05 08:53:22,744 - root - INFO - lr: 3.3486e-05 gnorm: 1.06 [10:19:12<14:14:40] +[titan] 2025-10-05 08:53:33,623 - root - INFO - step: 16810 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 08:53:33,623 - root - INFO - lr: 3.3477e-05 gnorm: 1.10 [10:19:23<14:14:28] +[titan] 2025-10-05 08:53:44,493 - root - INFO - step: 16815 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 08:53:44,493 - root - INFO - lr: 3.3469e-05 gnorm: 1.08 [10:19:34<14:14:17] +[titan] 2025-10-05 08:53:55,405 - root - INFO - step: 16820 loss: 2.3161 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2704 global_avg_mtp_loss: 2.0457 +[titan] 2025-10-05 08:53:55,405 - root - INFO - lr: 3.3460e-05 gnorm: 1.05 [10:19:45<14:14:06] +[titan] 2025-10-05 08:54:06,325 - root - INFO - step: 16825 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:54:06,325 - root - INFO - lr: 3.3452e-05 gnorm: 1.06 [10:19:56<14:13:55] +[titan] 2025-10-05 08:54:17,199 - root - INFO - step: 16830 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 08:54:17,199 - root - INFO - lr: 3.3443e-05 gnorm: 1.14 [10:20:07<14:13:43] +[titan] 2025-10-05 08:54:28,086 - root - INFO - step: 16835 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 08:54:28,087 - root - INFO - lr: 3.3434e-05 gnorm: 1.11 [10:20:18<14:13:32] +[titan] 2025-10-05 08:54:38,979 - root - INFO - step: 16840 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:54:38,979 - root - INFO - lr: 3.3426e-05 gnorm: 1.10 [10:20:29<14:13:21] +[titan] 2025-10-05 08:54:49,879 - root - INFO - step: 16845 loss: 2.2348 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9741 +[titan] 2025-10-05 08:54:49,879 - root - INFO - lr: 3.3417e-05 gnorm: 1.14 [10:20:40<14:13:09] +[titan] 2025-10-05 08:54:58,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:55:00,873 - root - INFO - step: 16850 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 08:55:00,873 - root - INFO - lr: 3.3409e-05 gnorm: 1.06 [10:20:51<14:12:58] +[titan] 2025-10-05 08:55:11,763 - root - INFO - step: 16855 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:55:11,763 - root - INFO - lr: 3.3400e-05 gnorm: 1.10 [10:21:01<14:12:47] +[titan] 2025-10-05 08:55:22,662 - root - INFO - step: 16860 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:55:22,662 - root - INFO - lr: 3.3391e-05 gnorm: 1.05 [10:21:12<14:12:36] +[titan] 2025-10-05 08:55:33,543 - root - INFO - step: 16865 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8902 +[titan] 2025-10-05 08:55:33,543 - root - INFO - lr: 3.3383e-05 gnorm: 1.08 [10:21:23<14:12:24] +[titan] 2025-10-05 08:55:44,433 - root - INFO - step: 16870 loss: 2.2119 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:55:44,433 - root - INFO - lr: 3.3374e-05 gnorm: 1.08 [10:21:34<14:12:13] +[titan] 2025-10-05 08:55:55,318 - root - INFO - step: 16875 loss: 2.2256 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:55:55,318 - root - INFO - lr: 3.3366e-05 gnorm: 1.09 [10:21:45<14:12:02] +[titan] 2025-10-05 08:56:06,283 - root - INFO - step: 16880 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:56:06,283 - root - INFO - lr: 3.3357e-05 gnorm: 1.08 [10:21:56<14:11:51] +[titan] 2025-10-05 08:56:17,168 - root - INFO - step: 16885 loss: 2.2361 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9760 +[titan] 2025-10-05 08:56:17,168 - root - INFO - lr: 3.3348e-05 gnorm: 1.07 [10:22:07<14:11:39] +[titan] 2025-10-05 08:56:28,070 - root - INFO - step: 16890 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:56:28,070 - root - INFO - lr: 3.3340e-05 gnorm: 1.03 [10:22:18<14:11:28] +[titan] 2025-10-05 08:56:39,053 - root - INFO - step: 16895 loss: 2.2559 memory: 118.84GiB(85.28%) tps: 29,836 tflops: 413.93 mfu: 41.85% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 08:56:39,054 - root - INFO - lr: 3.3331e-05 gnorm: 1.10 [10:22:29<14:11:17] +[titan] 2025-10-05 08:56:41,418 - root - INFO - Dumping profiler traces at step 16896 +[titan] 2025-10-05 08:56:41,458 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:56:47,993 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:56:50,179 - root - INFO - step: 16900 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 29,452 tflops: 408.61 mfu: 41.32% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9188 +[titan] 2025-10-05 08:56:50,180 - root - INFO - lr: 3.3322e-05 gnorm: 1.02 [10:22:40<14:11:06] +[titan] 2025-10-05 08:57:01,083 - root - INFO - step: 16905 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9226 +[titan] 2025-10-05 08:57:01,084 - root - INFO - lr: 3.3314e-05 gnorm: 1.15 [10:22:51<14:10:55] +[titan] 2025-10-05 08:57:11,941 - root - INFO - step: 16910 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9271 +[titan] 2025-10-05 08:57:11,942 - root - INFO - lr: 3.3305e-05 gnorm: 1.04 [10:23:02<14:10:44] +[titan] 2025-10-05 08:57:22,821 - root - INFO - step: 16915 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 08:57:22,822 - root - INFO - lr: 3.3297e-05 gnorm: 1.10 [10:23:13<14:10:32] +[titan] 2025-10-05 08:57:33,708 - root - INFO - step: 16920 loss: 2.1768 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9231 +[titan] 2025-10-05 08:57:33,708 - root - INFO - lr: 3.3288e-05 gnorm: 1.07 [10:23:23<14:10:21] +[titan] 2025-10-05 08:57:44,586 - root - INFO - step: 16925 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 08:57:44,586 - root - INFO - lr: 3.3279e-05 gnorm: 1.10 [10:23:34<14:10:10] +[titan] 2025-10-05 08:57:55,466 - root - INFO - step: 16930 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 08:57:55,466 - root - INFO - lr: 3.3271e-05 gnorm: 1.08 [10:23:45<14:09:58] +[titan] 2025-10-05 08:58:06,365 - root - INFO - step: 16935 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9721 +[titan] 2025-10-05 08:58:06,365 - root - INFO - lr: 3.3262e-05 gnorm: 1.09 [10:23:56<14:09:47] +[titan] 2025-10-05 08:58:17,240 - root - INFO - step: 16940 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 08:58:17,240 - root - INFO - lr: 3.3253e-05 gnorm: 1.07 [10:24:07<14:09:36] +[titan] 2025-10-05 08:58:28,143 - root - INFO - step: 16945 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9826 +[titan] 2025-10-05 08:58:28,143 - root - INFO - lr: 3.3245e-05 gnorm: 1.07 [10:24:18<14:09:25] +[titan] 2025-10-05 08:58:36,825 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:58:39,030 - root - INFO - step: 16950 loss: 2.2032 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:58:39,030 - root - INFO - lr: 3.3236e-05 gnorm: 1.07 [10:24:29<14:09:13] +[titan] 2025-10-05 08:58:49,927 - root - INFO - step: 16955 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 08:58:49,928 - root - INFO - lr: 3.3228e-05 gnorm: 1.13 [10:24:40<14:09:02] +[titan] 2025-10-05 08:59:00,813 - root - INFO - step: 16960 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 08:59:00,813 - root - INFO - lr: 3.3219e-05 gnorm: 1.07 [10:24:51<14:08:51] +[titan] 2025-10-05 08:59:11,725 - root - INFO - step: 16965 loss: 2.1770 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9229 +[titan] 2025-10-05 08:59:11,725 - root - INFO - lr: 3.3210e-05 gnorm: 1.09 [10:25:01<14:08:39] +[titan] 2025-10-05 08:59:22,600 - root - INFO - step: 16970 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 08:59:22,601 - root - INFO - lr: 3.3202e-05 gnorm: 1.13 [10:25:12<14:08:28] +[titan] 2025-10-05 08:59:33,459 - root - INFO - step: 16975 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9184 +[titan] 2025-10-05 08:59:33,460 - root - INFO - lr: 3.3193e-05 gnorm: 1.10 [10:25:23<14:08:17] +[titan] 2025-10-05 08:59:44,382 - root - INFO - step: 16980 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9735 +[titan] 2025-10-05 08:59:44,382 - root - INFO - lr: 3.3184e-05 gnorm: 1.04 [10:25:34<14:08:06] +[titan] 2025-10-05 08:59:55,274 - root - INFO - step: 16985 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8945 +[titan] 2025-10-05 08:59:55,274 - root - INFO - lr: 3.3176e-05 gnorm: 1.06 [10:25:45<14:07:54] +[titan] 2025-10-05 09:00:06,182 - root - INFO - step: 16990 loss: 2.2652 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0002 +[titan] 2025-10-05 09:00:06,183 - root - INFO - lr: 3.3167e-05 gnorm: 1.09 [10:25:56<14:07:43] +[titan] 2025-10-05 09:00:17,071 - root - INFO - step: 16995 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:00:17,071 - root - INFO - lr: 3.3158e-05 gnorm: 1.08 [10:26:07<14:07:32] +[titan] 2025-10-05 09:00:25,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:00:27,948 - root - INFO - step: 17000 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:00:27,948 - root - INFO - lr: 3.3150e-05 gnorm: 1.11 [10:26:18<14:07:21] +[titan] 2025-10-05 09:00:38,826 - root - INFO - step: 17005 loss: 2.2227 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 09:00:38,826 - root - INFO - lr: 3.3141e-05 gnorm: 1.07 [10:26:29<14:07:09] +[titan] 2025-10-05 09:00:49,742 - root - INFO - step: 17010 loss: 2.2205 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:00:49,742 - root - INFO - lr: 3.3133e-05 gnorm: 1.05 [10:26:39<14:06:58] +[titan] 2025-10-05 09:01:00,622 - root - INFO - step: 17015 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9094 +[titan] 2025-10-05 09:01:00,623 - root - INFO - lr: 3.3124e-05 gnorm: 1.08 [10:26:50<14:06:47] +[titan] 2025-10-05 09:01:11,523 - root - INFO - step: 17020 loss: 2.1800 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9266 +[titan] 2025-10-05 09:01:11,523 - root - INFO - lr: 3.3115e-05 gnorm: 1.07 [10:27:01<14:06:35] +[titan] 2025-10-05 09:01:22,424 - root - INFO - step: 17025 loss: 2.2024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9461 +[titan] 2025-10-05 09:01:22,425 - root - INFO - lr: 3.3107e-05 gnorm: 1.04 [10:27:12<14:06:24] +[titan] 2025-10-05 09:01:33,324 - root - INFO - step: 17030 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 09:01:33,324 - root - INFO - lr: 3.3098e-05 gnorm: 1.07 [10:27:23<14:06:13] +[titan] 2025-10-05 09:01:44,236 - root - INFO - step: 17035 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9083 +[titan] 2025-10-05 09:01:44,236 - root - INFO - lr: 3.3089e-05 gnorm: 1.04 [10:27:34<14:06:02] +[titan] 2025-10-05 09:01:55,136 - root - INFO - step: 17040 loss: 2.1831 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 09:01:55,136 - root - INFO - lr: 3.3081e-05 gnorm: 1.08 [10:27:45<14:05:50] +[titan] 2025-10-05 09:02:06,035 - root - INFO - step: 17045 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:02:06,035 - root - INFO - lr: 3.3072e-05 gnorm: 1.06 [10:27:56<14:05:39] +[titan] 2025-10-05 09:02:14,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:02:16,917 - root - INFO - step: 17050 loss: 2.2428 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:02:16,917 - root - INFO - lr: 3.3063e-05 gnorm: 1.04 [10:28:07<14:05:28] +[titan] 2025-10-05 09:02:27,783 - root - INFO - step: 17055 loss: 2.2213 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9627 +[titan] 2025-10-05 09:02:27,783 - root - INFO - lr: 3.3055e-05 gnorm: 1.05 [10:28:17<14:05:17] +[titan] 2025-10-05 09:02:38,654 - root - INFO - step: 17060 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8924 +[titan] 2025-10-05 09:02:38,654 - root - INFO - lr: 3.3046e-05 gnorm: 1.07 [10:28:28<14:05:05] +[titan] 2025-10-05 09:02:49,542 - root - INFO - step: 17065 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9244 +[titan] 2025-10-05 09:02:49,542 - root - INFO - lr: 3.3037e-05 gnorm: 1.10 [10:28:39<14:04:54] +[titan] 2025-10-05 09:03:00,423 - root - INFO - step: 17070 loss: 2.2506 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 09:03:00,423 - root - INFO - lr: 3.3029e-05 gnorm: 1.08 [10:28:50<14:04:43] +[titan] 2025-10-05 09:03:11,347 - root - INFO - step: 17075 loss: 2.1585 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:03:11,347 - root - INFO - lr: 3.3020e-05 gnorm: 1.09 [10:29:01<14:04:32] +[titan] 2025-10-05 09:03:22,220 - root - INFO - step: 17080 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 09:03:22,221 - root - INFO - lr: 3.3011e-05 gnorm: 1.07 [10:29:12<14:04:20] +[titan] 2025-10-05 09:03:33,091 - root - INFO - step: 17085 loss: 2.1813 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:03:33,091 - root - INFO - lr: 3.3003e-05 gnorm: 1.12 [10:29:23<14:04:09] +[titan] 2025-10-05 09:03:43,968 - root - INFO - step: 17090 loss: 2.2621 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 1.9971 +[titan] 2025-10-05 09:03:43,968 - root - INFO - lr: 3.2994e-05 gnorm: 1.09 [10:29:34<14:03:58] +[titan] 2025-10-05 09:03:54,850 - root - INFO - step: 17095 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:03:54,850 - root - INFO - lr: 3.2986e-05 gnorm: 1.05 [10:29:45<14:03:46] +[titan] 2025-10-05 09:04:03,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:04:05,728 - root - INFO - step: 17100 loss: 2.1531 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 09:04:05,728 - root - INFO - lr: 3.2977e-05 gnorm: 1.07 [10:29:55<14:03:35] +[titan] 2025-10-05 09:04:16,647 - root - INFO - step: 17105 loss: 2.1923 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 09:04:16,647 - root - INFO - lr: 3.2968e-05 gnorm: 1.11 [10:30:06<14:03:24] +[titan] 2025-10-05 09:04:27,507 - root - INFO - step: 17110 loss: 2.1551 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9046 +[titan] 2025-10-05 09:04:27,507 - root - INFO - lr: 3.2960e-05 gnorm: 1.11 [10:30:17<14:03:13] +[titan] 2025-10-05 09:04:38,376 - root - INFO - step: 17115 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 09:04:38,376 - root - INFO - lr: 3.2951e-05 gnorm: 1.09 [10:30:28<14:03:01] +[titan] 2025-10-05 09:04:49,249 - root - INFO - step: 17120 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:04:49,249 - root - INFO - lr: 3.2942e-05 gnorm: 1.04 [10:30:39<14:02:50] +[titan] 2025-10-05 09:05:00,120 - root - INFO - step: 17125 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 09:05:00,120 - root - INFO - lr: 3.2934e-05 gnorm: 1.09 [10:30:50<14:02:39] +[titan] 2025-10-05 09:05:10,996 - root - INFO - step: 17130 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:05:10,997 - root - INFO - lr: 3.2925e-05 gnorm: 6.19 [10:31:01<14:02:27] +[titan] 2025-10-05 09:05:21,855 - root - INFO - step: 17135 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 09:05:21,856 - root - INFO - lr: 3.2916e-05 gnorm: 1.04 [10:31:12<14:02:16] +[titan] 2025-10-05 09:05:32,760 - root - INFO - step: 17140 loss: 2.2847 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0182 +[titan] 2025-10-05 09:05:32,760 - root - INFO - lr: 3.2908e-05 gnorm: 1.13 [10:31:22<14:02:05] +[titan] 2025-10-05 09:05:43,616 - root - INFO - step: 17145 loss: 2.1628 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 09:05:43,616 - root - INFO - lr: 3.2899e-05 gnorm: 1.13 [10:31:33<14:01:54] +[titan] 2025-10-05 09:05:52,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:05:54,484 - root - INFO - step: 17150 loss: 2.2557 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 09:05:54,484 - root - INFO - lr: 3.2890e-05 gnorm: 1.04 [10:31:44<14:01:42] +[titan] 2025-10-05 09:06:05,356 - root - INFO - step: 17155 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 09:06:05,356 - root - INFO - lr: 3.2882e-05 gnorm: 1.06 [10:31:55<14:01:31] +[titan] 2025-10-05 09:06:16,249 - root - INFO - step: 17160 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:06:16,249 - root - INFO - lr: 3.2873e-05 gnorm: 1.06 [10:32:06<14:01:20] +[titan] 2025-10-05 09:06:27,125 - root - INFO - step: 17165 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:06:27,125 - root - INFO - lr: 3.2864e-05 gnorm: 1.06 [10:32:17<14:01:08] +[titan] 2025-10-05 09:06:38,025 - root - INFO - step: 17170 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:06:38,025 - root - INFO - lr: 3.2856e-05 gnorm: 1.14 [10:32:28<14:00:57] +[titan] 2025-10-05 09:06:48,880 - root - INFO - step: 17175 loss: 2.1394 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 09:06:48,880 - root - INFO - lr: 3.2847e-05 gnorm: 1.07 [10:32:39<14:00:46] +[titan] 2025-10-05 09:06:59,724 - root - INFO - step: 17180 loss: 2.1898 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 09:06:59,724 - root - INFO - lr: 3.2838e-05 gnorm: 1.07 [10:32:49<14:00:35] +[titan] 2025-10-05 09:07:10,582 - root - INFO - step: 17185 loss: 2.1634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9111 +[titan] 2025-10-05 09:07:10,583 - root - INFO - lr: 3.2830e-05 gnorm: 1.03 [10:33:00<14:00:23] +[titan] 2025-10-05 09:07:21,443 - root - INFO - step: 17190 loss: 2.1666 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:07:21,443 - root - INFO - lr: 3.2821e-05 gnorm: 1.09 [10:33:11<14:00:12] +[titan] 2025-10-05 09:07:32,307 - root - INFO - step: 17195 loss: 2.2954 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 09:07:32,308 - root - INFO - lr: 3.2812e-05 gnorm: 1.05 [10:33:22<14:00:01] +[titan] 2025-10-05 09:07:40,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:07:43,204 - root - INFO - step: 17200 loss: 2.2434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:07:43,204 - root - INFO - lr: 3.2804e-05 gnorm: 1.02 [10:33:33<13:59:49] +[titan] 2025-10-05 09:07:54,076 - root - INFO - step: 17205 loss: 2.2300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:07:54,076 - root - INFO - lr: 3.2795e-05 gnorm: 1.07 [10:33:44<13:59:38] +[titan] 2025-10-05 09:08:04,949 - root - INFO - step: 17210 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:08:04,949 - root - INFO - lr: 3.2786e-05 gnorm: 1.14 [10:33:55<13:59:27] +[titan] 2025-10-05 09:08:15,833 - root - INFO - step: 17215 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 09:08:15,833 - root - INFO - lr: 3.2778e-05 gnorm: 1.07 [10:34:06<13:59:16] +[titan] 2025-10-05 09:08:26,702 - root - INFO - step: 17220 loss: 2.1866 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9324 +[titan] 2025-10-05 09:08:26,702 - root - INFO - lr: 3.2769e-05 gnorm: 1.12 [10:34:16<13:59:04] +[titan] 2025-10-05 09:08:37,566 - root - INFO - step: 17225 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 09:08:37,566 - root - INFO - lr: 3.2760e-05 gnorm: 1.09 [10:34:27<13:58:53] +[titan] 2025-10-05 09:08:48,419 - root - INFO - step: 17230 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 09:08:48,419 - root - INFO - lr: 3.2752e-05 gnorm: 1.06 [10:34:38<13:58:42] +[titan] 2025-10-05 09:08:59,310 - root - INFO - step: 17235 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:08:59,310 - root - INFO - lr: 3.2743e-05 gnorm: 1.11 [10:34:49<13:58:30] +[titan] 2025-10-05 09:09:10,177 - root - INFO - step: 17240 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9319 +[titan] 2025-10-05 09:09:10,177 - root - INFO - lr: 3.2734e-05 gnorm: 1.07 [10:35:00<13:58:19] +[titan] 2025-10-05 09:09:21,054 - root - INFO - step: 17245 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:09:21,054 - root - INFO - lr: 3.2725e-05 gnorm: 1.03 [10:35:11<13:58:08] +[titan] 2025-10-05 09:09:29,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:09:31,915 - root - INFO - step: 17250 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9306 +[titan] 2025-10-05 09:09:31,915 - root - INFO - lr: 3.2717e-05 gnorm: 1.06 [10:35:22<13:57:56] +[titan] 2025-10-05 09:09:42,794 - root - INFO - step: 17255 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 09:09:42,794 - root - INFO - lr: 3.2708e-05 gnorm: 1.07 [10:35:32<13:57:45] +[titan] 2025-10-05 09:09:53,683 - root - INFO - step: 17260 loss: 2.1486 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 09:09:53,683 - root - INFO - lr: 3.2699e-05 gnorm: 1.09 [10:35:43<13:57:34] +[titan] 2025-10-05 09:10:04,613 - root - INFO - step: 17265 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 09:10:04,613 - root - INFO - lr: 3.2691e-05 gnorm: 1.10 [10:35:54<13:57:23] +[titan] 2025-10-05 09:10:15,520 - root - INFO - step: 17270 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:10:15,520 - root - INFO - lr: 3.2682e-05 gnorm: 1.07 [10:36:05<13:57:12] +[titan] 2025-10-05 09:10:26,410 - root - INFO - step: 17275 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9303 +[titan] 2025-10-05 09:10:26,410 - root - INFO - lr: 3.2673e-05 gnorm: 1.08 [10:36:16<13:57:00] +[titan] 2025-10-05 09:10:37,314 - root - INFO - step: 17280 loss: 2.3099 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 09:10:37,314 - root - INFO - lr: 3.2665e-05 gnorm: 1.11 [10:36:27<13:56:49] +[titan] 2025-10-05 09:10:48,218 - root - INFO - step: 17285 loss: 2.2025 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 09:10:48,218 - root - INFO - lr: 3.2656e-05 gnorm: 1.04 [10:36:38<13:56:38] +[titan] 2025-10-05 09:10:59,106 - root - INFO - step: 17290 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 09:10:59,106 - root - INFO - lr: 3.2647e-05 gnorm: 1.08 [10:36:49<13:56:26] +[titan] 2025-10-05 09:11:09,991 - root - INFO - step: 17295 loss: 2.2277 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 09:11:09,991 - root - INFO - lr: 3.2639e-05 gnorm: 1.09 [10:37:00<13:56:15] +[titan] 2025-10-05 09:11:18,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:11:20,963 - root - INFO - step: 17300 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 09:11:20,963 - root - INFO - lr: 3.2630e-05 gnorm: 1.10 [10:37:11<13:56:04] +[titan] 2025-10-05 09:11:31,859 - root - INFO - step: 17305 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:11:31,859 - root - INFO - lr: 3.2621e-05 gnorm: 1.04 [10:37:22<13:55:53] +[titan] 2025-10-05 09:11:42,726 - root - INFO - step: 17310 loss: 2.2050 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 09:11:42,726 - root - INFO - lr: 3.2613e-05 gnorm: 1.08 [10:37:32<13:55:42] +[titan] 2025-10-05 09:11:53,604 - root - INFO - step: 17315 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:11:53,604 - root - INFO - lr: 3.2604e-05 gnorm: 1.06 [10:37:43<13:55:30] +[titan] 2025-10-05 09:12:04,491 - root - INFO - step: 17320 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:12:04,491 - root - INFO - lr: 3.2595e-05 gnorm: 1.08 [10:37:54<13:55:19] +[titan] 2025-10-05 09:12:15,414 - root - INFO - step: 17325 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:12:15,414 - root - INFO - lr: 3.2586e-05 gnorm: 1.03 [10:38:05<13:55:08] +[titan] 2025-10-05 09:12:26,330 - root - INFO - step: 17330 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9711 +[titan] 2025-10-05 09:12:26,330 - root - INFO - lr: 3.2578e-05 gnorm: 1.08 [10:38:16<13:54:57] +[titan] 2025-10-05 09:12:37,205 - root - INFO - step: 17335 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9425 +[titan] 2025-10-05 09:12:37,206 - root - INFO - lr: 3.2569e-05 gnorm: 1.08 [10:38:27<13:54:45] +[titan] 2025-10-05 09:12:48,107 - root - INFO - step: 17340 loss: 2.2311 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 09:12:48,107 - root - INFO - lr: 3.2560e-05 gnorm: 1.07 [10:38:38<13:54:34] +[titan] 2025-10-05 09:12:58,971 - root - INFO - step: 17345 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:12:58,971 - root - INFO - lr: 3.2552e-05 gnorm: 1.02 [10:38:49<13:54:23] +[titan] 2025-10-05 09:13:07,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:13:09,828 - root - INFO - step: 17350 loss: 2.1864 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9313 +[titan] 2025-10-05 09:13:09,828 - root - INFO - lr: 3.2543e-05 gnorm: 1.12 [10:39:00<13:54:11] +[titan] 2025-10-05 09:13:20,766 - root - INFO - step: 17355 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:13:20,766 - root - INFO - lr: 3.2534e-05 gnorm: 1.05 [10:39:10<13:54:00] +[titan] 2025-10-05 09:13:31,647 - root - INFO - step: 17360 loss: 2.1890 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9339 +[titan] 2025-10-05 09:13:31,647 - root - INFO - lr: 3.2526e-05 gnorm: 1.06 [10:39:21<13:53:49] +[titan] 2025-10-05 09:13:42,494 - root - INFO - step: 17365 loss: 2.2669 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 09:13:42,494 - root - INFO - lr: 3.2517e-05 gnorm: 1.11 [10:39:32<13:53:38] +[titan] 2025-10-05 09:13:53,353 - root - INFO - step: 17370 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0243 +[titan] 2025-10-05 09:13:53,353 - root - INFO - lr: 3.2508e-05 gnorm: 1.16 [10:39:43<13:53:26] +[titan] 2025-10-05 09:14:04,232 - root - INFO - step: 17375 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9603 +[titan] 2025-10-05 09:14:04,232 - root - INFO - lr: 3.2500e-05 gnorm: 1.06 [10:39:54<13:53:15] +[titan] 2025-10-05 09:14:15,120 - root - INFO - step: 17380 loss: 2.2381 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 09:14:15,120 - root - INFO - lr: 3.2491e-05 gnorm: 1.09 [10:40:05<13:53:04] +[titan] 2025-10-05 09:14:26,052 - root - INFO - step: 17385 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 09:14:26,052 - root - INFO - lr: 3.2482e-05 gnorm: 1.07 [10:40:16<13:52:53] +[titan] 2025-10-05 09:14:36,924 - root - INFO - step: 17390 loss: 2.1808 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:14:36,924 - root - INFO - lr: 3.2473e-05 gnorm: 1.07 [10:40:27<13:52:41] +[titan] 2025-10-05 09:14:47,853 - root - INFO - step: 17395 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9403 +[titan] 2025-10-05 09:14:47,853 - root - INFO - lr: 3.2465e-05 gnorm: 1.04 [10:40:38<13:52:30] +[titan] 2025-10-05 09:14:56,565 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:14:58,759 - root - INFO - step: 17400 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 09:14:58,759 - root - INFO - lr: 3.2456e-05 gnorm: 1.05 [10:40:48<13:52:19] +[titan] 2025-10-05 09:15:09,749 - root - INFO - step: 17405 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 29,817 tflops: 413.66 mfu: 41.83% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 09:15:09,750 - root - INFO - lr: 3.2447e-05 gnorm: 1.05 [10:40:59<13:52:08] +[titan] 2025-10-05 09:15:16,475 - root - INFO - Dumping profiler traces at step 17408 +[titan] 2025-10-05 09:15:16,515 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:15:20,898 - root - INFO - step: 17410 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 29,392 tflops: 407.77 mfu: 41.23% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:15:20,899 - root - INFO - lr: 3.2439e-05 gnorm: 1.10 [10:41:11<13:51:57] +[titan] 2025-10-05 09:15:31,784 - root - INFO - step: 17415 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 09:15:31,784 - root - INFO - lr: 3.2430e-05 gnorm: 1.11 [10:41:21<13:51:46] +[titan] 2025-10-05 09:15:42,678 - root - INFO - step: 17420 loss: 2.1926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9383 +[titan] 2025-10-05 09:15:42,678 - root - INFO - lr: 3.2421e-05 gnorm: 1.05 [10:41:32<13:51:34] +[titan] 2025-10-05 09:15:53,585 - root - INFO - step: 17425 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 09:15:53,585 - root - INFO - lr: 3.2412e-05 gnorm: 1.05 [10:41:43<13:51:23] +[titan] 2025-10-05 09:16:04,476 - root - INFO - step: 17430 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9031 +[titan] 2025-10-05 09:16:04,476 - root - INFO - lr: 3.2404e-05 gnorm: 1.06 [10:41:54<13:51:12] +[titan] 2025-10-05 09:16:15,351 - root - INFO - step: 17435 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 09:16:15,351 - root - INFO - lr: 3.2395e-05 gnorm: 1.09 [10:42:05<13:51:01] +[titan] 2025-10-05 09:16:26,256 - root - INFO - step: 17440 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:16:26,257 - root - INFO - lr: 3.2386e-05 gnorm: 1.08 [10:42:16<13:50:49] +[titan] 2025-10-05 09:16:37,135 - root - INFO - step: 17445 loss: 2.1787 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9254 +[titan] 2025-10-05 09:16:37,135 - root - INFO - lr: 3.2378e-05 gnorm: 1.06 [10:42:27<13:50:38] +[titan] 2025-10-05 09:16:45,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:16:48,014 - root - INFO - step: 17450 loss: 2.1992 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9428 +[titan] 2025-10-05 09:16:48,014 - root - INFO - lr: 3.2369e-05 gnorm: 1.03 [10:42:38<13:50:27] +[titan] 2025-10-05 09:16:58,900 - root - INFO - step: 17455 loss: 2.2831 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:16:58,900 - root - INFO - lr: 3.2360e-05 gnorm: 1.09 [10:42:49<13:50:16] +[titan] 2025-10-05 09:17:09,817 - root - INFO - step: 17460 loss: 2.2252 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:17:09,817 - root - INFO - lr: 3.2351e-05 gnorm: 1.08 [10:42:59<13:50:04] +[titan] 2025-10-05 09:17:20,746 - root - INFO - step: 17465 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:17:20,746 - root - INFO - lr: 3.2343e-05 gnorm: 1.05 [10:43:10<13:49:53] +[titan] 2025-10-05 09:17:31,624 - root - INFO - step: 17470 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 09:17:31,624 - root - INFO - lr: 3.2334e-05 gnorm: 1.07 [10:43:21<13:49:42] +[titan] 2025-10-05 09:17:42,511 - root - INFO - step: 17475 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:17:42,511 - root - INFO - lr: 3.2325e-05 gnorm: 1.07 [10:43:32<13:49:31] +[titan] 2025-10-05 09:17:53,406 - root - INFO - step: 17480 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 09:17:53,407 - root - INFO - lr: 3.2317e-05 gnorm: 1.09 [10:43:43<13:49:19] +[titan] 2025-10-05 09:18:04,291 - root - INFO - step: 17485 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:18:04,292 - root - INFO - lr: 3.2308e-05 gnorm: 1.09 [10:43:54<13:49:08] +[titan] 2025-10-05 09:18:15,232 - root - INFO - step: 17490 loss: 2.1875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 09:18:15,232 - root - INFO - lr: 3.2299e-05 gnorm: 1.09 [10:44:05<13:48:57] +[titan] 2025-10-05 09:18:26,148 - root - INFO - step: 17495 loss: 2.1821 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9280 +[titan] 2025-10-05 09:18:26,148 - root - INFO - lr: 3.2290e-05 gnorm: 1.06 [10:44:16<13:48:46] +[titan] 2025-10-05 09:18:34,840 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:18:37,024 - root - INFO - step: 17500 loss: 2.2275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9680 +[titan] 2025-10-05 09:18:37,024 - root - INFO - lr: 3.2282e-05 gnorm: 1.08 [10:44:27<13:48:34] +[titan] 2025-10-05 09:18:47,898 - root - INFO - step: 17505 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9235 +[titan] 2025-10-05 09:18:47,898 - root - INFO - lr: 3.2273e-05 gnorm: 1.10 [10:44:38<13:48:23] +[titan] 2025-10-05 09:18:58,787 - root - INFO - step: 17510 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 09:18:58,787 - root - INFO - lr: 3.2264e-05 gnorm: 1.07 [10:44:48<13:48:12] +[titan] 2025-10-05 09:19:09,664 - root - INFO - step: 17515 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9713 +[titan] 2025-10-05 09:19:09,664 - root - INFO - lr: 3.2256e-05 gnorm: 1.11 [10:44:59<13:48:01] +[titan] 2025-10-05 09:19:20,602 - root - INFO - step: 17520 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 09:19:20,603 - root - INFO - lr: 3.2247e-05 gnorm: 1.06 [10:45:10<13:47:49] +[titan] 2025-10-05 09:19:31,492 - root - INFO - step: 17525 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9837 +[titan] 2025-10-05 09:19:31,492 - root - INFO - lr: 3.2238e-05 gnorm: 1.06 [10:45:21<13:47:38] +[titan] 2025-10-05 09:19:42,388 - root - INFO - step: 17530 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:19:42,388 - root - INFO - lr: 3.2229e-05 gnorm: 1.04 [10:45:32<13:47:27] +[titan] 2025-10-05 09:19:53,275 - root - INFO - step: 17535 loss: 2.1899 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9365 +[titan] 2025-10-05 09:19:53,275 - root - INFO - lr: 3.2221e-05 gnorm: 1.11 [10:45:43<13:47:16] +[titan] 2025-10-05 09:20:04,158 - root - INFO - step: 17540 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:20:04,158 - root - INFO - lr: 3.2212e-05 gnorm: 1.08 [10:45:54<13:47:04] +[titan] 2025-10-05 09:20:15,047 - root - INFO - step: 17545 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 09:20:15,047 - root - INFO - lr: 3.2203e-05 gnorm: 1.14 [10:46:05<13:46:53] +[titan] 2025-10-05 09:20:23,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:20:25,948 - root - INFO - step: 17550 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:20:25,948 - root - INFO - lr: 3.2194e-05 gnorm: 1.06 [10:46:16<13:46:42] +[titan] 2025-10-05 09:20:36,875 - root - INFO - step: 17555 loss: 2.1706 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 09:20:36,875 - root - INFO - lr: 3.2186e-05 gnorm: 1.05 [10:46:27<13:46:31] +[titan] 2025-10-05 09:20:47,778 - root - INFO - step: 17560 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:20:47,778 - root - INFO - lr: 3.2177e-05 gnorm: 1.04 [10:46:37<13:46:20] +[titan] 2025-10-05 09:20:58,670 - root - INFO - step: 17565 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 09:20:58,670 - root - INFO - lr: 3.2168e-05 gnorm: 1.11 [10:46:48<13:46:08] +[titan] 2025-10-05 09:21:09,567 - root - INFO - step: 17570 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9227 +[titan] 2025-10-05 09:21:09,567 - root - INFO - lr: 3.2160e-05 gnorm: 1.03 [10:46:59<13:45:57] +[titan] 2025-10-05 09:21:20,447 - root - INFO - step: 17575 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:21:20,447 - root - INFO - lr: 3.2151e-05 gnorm: 1.06 [10:47:10<13:45:46] +[titan] 2025-10-05 09:21:31,358 - root - INFO - step: 17580 loss: 2.1219 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 09:21:31,358 - root - INFO - lr: 3.2142e-05 gnorm: 1.07 [10:47:21<13:45:35] +[titan] 2025-10-05 09:21:42,250 - root - INFO - step: 17585 loss: 2.2406 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 09:21:42,250 - root - INFO - lr: 3.2133e-05 gnorm: 1.10 [10:47:32<13:45:23] +[titan] 2025-10-05 09:21:53,130 - root - INFO - step: 17590 loss: 2.2175 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 09:21:53,130 - root - INFO - lr: 3.2125e-05 gnorm: 1.08 [10:47:43<13:45:12] +[titan] 2025-10-05 09:22:04,011 - root - INFO - step: 17595 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9755 +[titan] 2025-10-05 09:22:04,011 - root - INFO - lr: 3.2116e-05 gnorm: 1.05 [10:47:54<13:45:01] +[titan] 2025-10-05 09:22:12,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:22:14,893 - root - INFO - step: 17600 loss: 2.2663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0020 +[titan] 2025-10-05 09:22:14,894 - root - INFO - lr: 3.2107e-05 gnorm: 1.08 [10:48:05<13:44:50] +[titan] 2025-10-05 09:22:25,790 - root - INFO - step: 17605 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 09:22:25,790 - root - INFO - lr: 3.2098e-05 gnorm: 1.11 [10:48:15<13:44:38] +[titan] 2025-10-05 09:22:36,676 - root - INFO - step: 17610 loss: 2.2048 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9482 +[titan] 2025-10-05 09:22:36,676 - root - INFO - lr: 3.2090e-05 gnorm: 1.08 [10:48:26<13:44:27] +[titan] 2025-10-05 09:22:47,556 - root - INFO - step: 17615 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 09:22:47,556 - root - INFO - lr: 3.2081e-05 gnorm: 1.10 [10:48:37<13:44:16] +[titan] 2025-10-05 09:22:58,451 - root - INFO - step: 17620 loss: 2.1471 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:22:58,451 - root - INFO - lr: 3.2072e-05 gnorm: 1.10 [10:48:48<13:44:05] +[titan] 2025-10-05 09:23:09,330 - root - INFO - step: 17625 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 09:23:09,330 - root - INFO - lr: 3.2063e-05 gnorm: 1.04 [10:48:59<13:43:53] +[titan] 2025-10-05 09:23:20,210 - root - INFO - step: 17630 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9426 +[titan] 2025-10-05 09:23:20,210 - root - INFO - lr: 3.2055e-05 gnorm: 1.05 [10:49:10<13:43:42] +[titan] 2025-10-05 09:23:31,084 - root - INFO - step: 17635 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 09:23:31,084 - root - INFO - lr: 3.2046e-05 gnorm: 1.06 [10:49:21<13:43:31] +[titan] 2025-10-05 09:23:41,968 - root - INFO - step: 17640 loss: 2.2575 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 09:23:41,969 - root - INFO - lr: 3.2037e-05 gnorm: 1.12 [10:49:32<13:43:20] +[titan] 2025-10-05 09:23:52,856 - root - INFO - step: 17645 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9799 +[titan] 2025-10-05 09:23:52,856 - root - INFO - lr: 3.2029e-05 gnorm: 1.10 [10:49:43<13:43:08] +[titan] 2025-10-05 09:24:01,573 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:24:03,759 - root - INFO - step: 17650 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:24:03,759 - root - INFO - lr: 3.2020e-05 gnorm: 1.14 [10:49:53<13:42:57] +[titan] 2025-10-05 09:24:14,635 - root - INFO - step: 17655 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 09:24:14,636 - root - INFO - lr: 3.2011e-05 gnorm: 1.12 [10:50:04<13:42:46] +[titan] 2025-10-05 09:24:25,539 - root - INFO - step: 17660 loss: 2.1876 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9327 +[titan] 2025-10-05 09:24:25,539 - root - INFO - lr: 3.2002e-05 gnorm: 1.06 [10:50:15<13:42:35] +[titan] 2025-10-05 09:24:36,410 - root - INFO - step: 17665 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9523 +[titan] 2025-10-05 09:24:36,410 - root - INFO - lr: 3.1994e-05 gnorm: 1.09 [10:50:26<13:42:23] +[titan] 2025-10-05 09:24:47,292 - root - INFO - step: 17670 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9587 +[titan] 2025-10-05 09:24:47,292 - root - INFO - lr: 3.1985e-05 gnorm: 1.03 [10:50:37<13:42:12] +[titan] 2025-10-05 09:24:58,185 - root - INFO - step: 17675 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 09:24:58,185 - root - INFO - lr: 3.1976e-05 gnorm: 1.17 [10:50:48<13:42:01] +[titan] 2025-10-05 09:25:09,105 - root - INFO - step: 17680 loss: 2.2810 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 09:25:09,105 - root - INFO - lr: 3.1967e-05 gnorm: 1.11 [10:50:59<13:41:50] +[titan] 2025-10-05 09:25:19,979 - root - INFO - step: 17685 loss: 2.1693 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:25:19,980 - root - INFO - lr: 3.1959e-05 gnorm: 1.07 [10:51:10<13:41:38] +[titan] 2025-10-05 09:25:30,867 - root - INFO - step: 17690 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9351 +[titan] 2025-10-05 09:25:30,867 - root - INFO - lr: 3.1950e-05 gnorm: 1.10 [10:51:21<13:41:27] +[titan] 2025-10-05 09:25:41,737 - root - INFO - step: 17695 loss: 2.1997 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 09:25:41,737 - root - INFO - lr: 3.1941e-05 gnorm: 1.03 [10:51:31<13:41:16] +[titan] 2025-10-05 09:25:50,446 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:25:52,639 - root - INFO - step: 17700 loss: 2.1679 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 09:25:52,639 - root - INFO - lr: 3.1932e-05 gnorm: 1.08 [10:51:42<13:41:05] +[titan] 2025-10-05 09:26:03,528 - root - INFO - step: 17705 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9507 +[titan] 2025-10-05 09:26:03,528 - root - INFO - lr: 3.1924e-05 gnorm: 1.14 [10:51:53<13:40:53] +[titan] 2025-10-05 09:26:14,424 - root - INFO - step: 17710 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:26:14,424 - root - INFO - lr: 3.1915e-05 gnorm: 1.05 [10:52:04<13:40:42] +[titan] 2025-10-05 09:26:25,353 - root - INFO - step: 17715 loss: 2.1118 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 09:26:25,353 - root - INFO - lr: 3.1906e-05 gnorm: 1.09 [10:52:15<13:40:31] +[titan] 2025-10-05 09:26:36,227 - root - INFO - step: 17720 loss: 2.1460 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8958 +[titan] 2025-10-05 09:26:36,227 - root - INFO - lr: 3.1897e-05 gnorm: 1.09 [10:52:26<13:40:20] +[titan] 2025-10-05 09:26:47,086 - root - INFO - step: 17725 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:26:47,086 - root - INFO - lr: 3.1889e-05 gnorm: 1.04 [10:52:37<13:40:08] +[titan] 2025-10-05 09:26:57,951 - root - INFO - step: 17730 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 09:26:57,951 - root - INFO - lr: 3.1880e-05 gnorm: 1.13 [10:52:48<13:39:57] +[titan] 2025-10-05 09:27:08,802 - root - INFO - step: 17735 loss: 2.2199 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 09:27:08,802 - root - INFO - lr: 3.1871e-05 gnorm: 1.04 [10:52:58<13:39:46] +[titan] 2025-10-05 09:27:19,665 - root - INFO - step: 17740 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 09:27:19,665 - root - INFO - lr: 3.1862e-05 gnorm: 1.09 [10:53:09<13:39:35] +[titan] 2025-10-05 09:27:30,612 - root - INFO - step: 17745 loss: 2.1677 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9150 +[titan] 2025-10-05 09:27:30,612 - root - INFO - lr: 3.1854e-05 gnorm: 1.09 [10:53:20<13:39:23] +[titan] 2025-10-05 09:27:39,278 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:27:41,462 - root - INFO - step: 17750 loss: 2.1954 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 09:27:41,462 - root - INFO - lr: 3.1845e-05 gnorm: 1.09 [10:53:31<13:39:12] +[titan] 2025-10-05 09:27:52,328 - root - INFO - step: 17755 loss: 2.1602 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9091 +[titan] 2025-10-05 09:27:52,328 - root - INFO - lr: 3.1836e-05 gnorm: 1.04 [10:53:42<13:39:01] +[titan] 2025-10-05 09:28:03,186 - root - INFO - step: 17760 loss: 2.2440 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:28:03,186 - root - INFO - lr: 3.1827e-05 gnorm: 1.08 [10:53:53<13:38:50] +[titan] 2025-10-05 09:28:14,043 - root - INFO - step: 17765 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9224 +[titan] 2025-10-05 09:28:14,043 - root - INFO - lr: 3.1818e-05 gnorm: 1.07 [10:54:04<13:38:38] +[titan] 2025-10-05 09:28:24,918 - root - INFO - step: 17770 loss: 2.1581 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9064 +[titan] 2025-10-05 09:28:24,918 - root - INFO - lr: 3.1810e-05 gnorm: 1.06 [10:54:15<13:38:27] +[titan] 2025-10-05 09:28:35,788 - root - INFO - step: 17775 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9652 +[titan] 2025-10-05 09:28:35,788 - root - INFO - lr: 3.1801e-05 gnorm: 1.15 [10:54:25<13:38:16] +[titan] 2025-10-05 09:28:46,696 - root - INFO - step: 17780 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:28:46,696 - root - INFO - lr: 3.1792e-05 gnorm: 1.10 [10:54:36<13:38:05] +[titan] 2025-10-05 09:28:57,567 - root - INFO - step: 17785 loss: 2.1809 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9261 +[titan] 2025-10-05 09:28:57,567 - root - INFO - lr: 3.1783e-05 gnorm: 1.05 [10:54:47<13:37:53] +[titan] 2025-10-05 09:29:08,437 - root - INFO - step: 17790 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:29:08,437 - root - INFO - lr: 3.1775e-05 gnorm: 1.09 [10:54:58<13:37:42] +[titan] 2025-10-05 09:29:19,347 - root - INFO - step: 17795 loss: 2.1437 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 09:29:19,348 - root - INFO - lr: 3.1766e-05 gnorm: 1.05 [10:55:09<13:37:31] +[titan] 2025-10-05 09:29:28,050 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:29:30,284 - root - INFO - step: 17800 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 09:29:30,284 - root - INFO - lr: 3.1757e-05 gnorm: 1.07 [10:55:20<13:37:20] +[titan] 2025-10-05 09:29:41,184 - root - INFO - step: 17805 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9269 +[titan] 2025-10-05 09:29:41,184 - root - INFO - lr: 3.1748e-05 gnorm: 1.07 [10:55:31<13:37:08] +[titan] 2025-10-05 09:29:52,086 - root - INFO - step: 17810 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:29:52,086 - root - INFO - lr: 3.1740e-05 gnorm: 1.06 [10:55:42<13:36:57] +[titan] 2025-10-05 09:30:03,070 - root - INFO - step: 17815 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9441 +[titan] 2025-10-05 09:30:03,070 - root - INFO - lr: 3.1731e-05 gnorm: 1.04 [10:55:53<13:36:46] +[titan] 2025-10-05 09:30:13,933 - root - INFO - step: 17820 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 09:30:13,933 - root - INFO - lr: 3.1722e-05 gnorm: 1.09 [10:56:04<13:36:35] +[titan] 2025-10-05 09:30:24,824 - root - INFO - step: 17825 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:30:24,824 - root - INFO - lr: 3.1713e-05 gnorm: 1.05 [10:56:14<13:36:24] +[titan] 2025-10-05 09:30:35,792 - root - INFO - step: 17830 loss: 2.1738 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:30:35,792 - root - INFO - lr: 3.1705e-05 gnorm: 1.09 [10:56:25<13:36:12] +[titan] 2025-10-05 09:30:46,656 - root - INFO - step: 17835 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 09:30:46,656 - root - INFO - lr: 3.1696e-05 gnorm: 1.04 [10:56:36<13:36:01] +[titan] 2025-10-05 09:30:57,542 - root - INFO - step: 17840 loss: 2.1750 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 09:30:57,542 - root - INFO - lr: 3.1687e-05 gnorm: 1.05 [10:56:47<13:35:50] +[titan] 2025-10-05 09:31:08,403 - root - INFO - step: 17845 loss: 2.2534 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 09:31:08,403 - root - INFO - lr: 3.1678e-05 gnorm: 1.10 [10:56:58<13:35:39] +[titan] 2025-10-05 09:31:17,089 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:31:19,279 - root - INFO - step: 17850 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9894 +[titan] 2025-10-05 09:31:19,279 - root - INFO - lr: 3.1670e-05 gnorm: 1.07 [10:57:09<13:35:27] +[titan] 2025-10-05 09:31:30,156 - root - INFO - step: 17855 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9467 +[titan] 2025-10-05 09:31:30,156 - root - INFO - lr: 3.1661e-05 gnorm: 1.03 [10:57:20<13:35:16] +[titan] 2025-10-05 09:31:41,087 - root - INFO - step: 17860 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9406 +[titan] 2025-10-05 09:31:41,088 - root - INFO - lr: 3.1652e-05 gnorm: 1.07 [10:57:31<13:35:05] +[titan] 2025-10-05 09:31:51,960 - root - INFO - step: 17865 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:31:51,962 - root - INFO - lr: 3.1643e-05 gnorm: 1.08 [10:57:42<13:34:54] +[titan] 2025-10-05 09:32:02,826 - root - INFO - step: 17870 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 09:32:02,826 - root - INFO - lr: 3.1634e-05 gnorm: 1.08 [10:57:52<13:34:42] +[titan] 2025-10-05 09:32:13,722 - root - INFO - step: 17875 loss: 2.2074 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:32:13,722 - root - INFO - lr: 3.1626e-05 gnorm: 1.04 [10:58:03<13:34:31] +[titan] 2025-10-05 09:32:24,584 - root - INFO - step: 17880 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 09:32:24,584 - root - INFO - lr: 3.1617e-05 gnorm: 1.06 [10:58:14<13:34:20] +[titan] 2025-10-05 09:32:35,482 - root - INFO - step: 17885 loss: 2.2057 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 09:32:35,482 - root - INFO - lr: 3.1608e-05 gnorm: 1.05 [10:58:25<13:34:09] +[titan] 2025-10-05 09:32:46,343 - root - INFO - step: 17890 loss: 2.2259 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9659 +[titan] 2025-10-05 09:32:46,344 - root - INFO - lr: 3.1599e-05 gnorm: 1.05 [10:58:36<13:33:57] +[titan] 2025-10-05 09:32:57,245 - root - INFO - step: 17895 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8967 +[titan] 2025-10-05 09:32:57,245 - root - INFO - lr: 3.1591e-05 gnorm: 1.07 [10:58:47<13:33:46] +[titan] 2025-10-05 09:33:05,937 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:33:08,118 - root - INFO - step: 17900 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 09:33:08,118 - root - INFO - lr: 3.1582e-05 gnorm: 1.01 [10:58:58<13:33:35] +[titan] 2025-10-05 09:33:19,021 - root - INFO - step: 17905 loss: 2.1704 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 09:33:19,022 - root - INFO - lr: 3.1573e-05 gnorm: 1.15 [10:59:09<13:33:24] +[titan] 2025-10-05 09:33:29,882 - root - INFO - step: 17910 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 09:33:29,882 - root - INFO - lr: 3.1564e-05 gnorm: 1.05 [10:59:20<13:33:12] +[titan] 2025-10-05 09:33:40,805 - root - INFO - step: 17915 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 09:33:40,805 - root - INFO - lr: 3.1555e-05 gnorm: 1.08 [10:59:30<13:33:01] +[titan] 2025-10-05 09:33:51,745 - root - INFO - step: 17920 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 09:33:51,746 - root - INFO - lr: 3.1547e-05 gnorm: 1.12 [10:59:41<13:32:50] +[titan] 2025-10-05 09:33:51,926 - root - INFO - Dumping profiler traces at step 17920 +[titan] 2025-10-05 09:33:51,967 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:34:02,867 - root - INFO - step: 17925 loss: 2.1932 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 09:34:02,867 - root - INFO - lr: 3.1538e-05 gnorm: 1.06 [10:59:52<13:32:39] +[titan] 2025-10-05 09:34:13,739 - root - INFO - step: 17930 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:34:13,739 - root - INFO - lr: 3.1529e-05 gnorm: 1.12 [11:00:03<13:32:28] +[titan] 2025-10-05 09:34:24,600 - root - INFO - step: 17935 loss: 2.2250 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:34:24,600 - root - INFO - lr: 3.1520e-05 gnorm: 1.06 [11:00:14<13:32:17] +[titan] 2025-10-05 09:34:35,525 - root - INFO - step: 17940 loss: 2.1726 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 09:34:35,525 - root - INFO - lr: 3.1512e-05 gnorm: 1.05 [11:00:25<13:32:05] +[titan] 2025-10-05 09:34:46,392 - root - INFO - step: 17945 loss: 2.0902 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 09:34:46,392 - root - INFO - lr: 3.1503e-05 gnorm: 1.04 [11:00:36<13:31:54] +[titan] 2025-10-05 09:34:55,074 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:34:57,263 - root - INFO - step: 17950 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 09:34:57,264 - root - INFO - lr: 3.1494e-05 gnorm: 1.09 [11:00:47<13:31:43] +[titan] 2025-10-05 09:35:08,110 - root - INFO - step: 17955 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 09:35:08,110 - root - INFO - lr: 3.1485e-05 gnorm: 1.05 [11:00:58<13:31:32] +[titan] 2025-10-05 09:35:18,976 - root - INFO - step: 17960 loss: 2.2219 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:35:18,976 - root - INFO - lr: 3.1476e-05 gnorm: 1.08 [11:01:09<13:31:20] +[titan] 2025-10-05 09:35:29,837 - root - INFO - step: 17965 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9589 +[titan] 2025-10-05 09:35:29,838 - root - INFO - lr: 3.1468e-05 gnorm: 1.08 [11:01:19<13:31:09] +[titan] 2025-10-05 09:35:40,783 - root - INFO - step: 17970 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.35 mfu: 42.00% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9404 +[titan] 2025-10-05 09:35:40,783 - root - INFO - lr: 3.1459e-05 gnorm: 1.07 [11:01:30<13:30:58] +[titan] 2025-10-05 09:35:51,637 - root - INFO - step: 17975 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.0326 +[titan] 2025-10-05 09:35:51,637 - root - INFO - lr: 3.1450e-05 gnorm: 1.14 [11:01:41<13:30:47] +[titan] 2025-10-05 09:36:02,514 - root - INFO - step: 17980 loss: 2.1848 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 09:36:02,514 - root - INFO - lr: 3.1441e-05 gnorm: 1.06 [11:01:52<13:30:35] +[titan] 2025-10-05 09:36:13,381 - root - INFO - step: 17985 loss: 2.1655 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9132 +[titan] 2025-10-05 09:36:13,381 - root - INFO - lr: 3.1432e-05 gnorm: 1.07 [11:02:03<13:30:24] +[titan] 2025-10-05 09:36:24,242 - root - INFO - step: 17990 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 09:36:24,242 - root - INFO - lr: 3.1424e-05 gnorm: 1.05 [11:02:14<13:30:13] +[titan] 2025-10-05 09:36:35,118 - root - INFO - step: 17995 loss: 2.2044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9479 +[titan] 2025-10-05 09:36:35,118 - root - INFO - lr: 3.1415e-05 gnorm: 1.05 [11:02:25<13:30:02] +[titan] 2025-10-05 09:36:43,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:36:46,012 - root - INFO - step: 18000 loss: 2.1302 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 09:36:46,012 - root - INFO - lr: 3.1406e-05 gnorm: 1.05 [11:02:36<13:29:50] +[titan] 2025-10-05 09:36:56,870 - root - INFO - step: 18005 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9388 +[titan] 2025-10-05 09:36:56,871 - root - INFO - lr: 3.1397e-05 gnorm: 1.04 [11:02:46<13:29:39] +[titan] 2025-10-05 09:37:07,717 - root - INFO - step: 18010 loss: 2.2185 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9597 +[titan] 2025-10-05 09:37:07,718 - root - INFO - lr: 3.1389e-05 gnorm: 1.06 [11:02:57<13:29:28] +[titan] 2025-10-05 09:37:18,576 - root - INFO - step: 18015 loss: 2.2301 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:37:18,576 - root - INFO - lr: 3.1380e-05 gnorm: 1.13 [11:03:08<13:29:17] +[titan] 2025-10-05 09:37:29,423 - root - INFO - step: 18020 loss: 2.2014 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9439 +[titan] 2025-10-05 09:37:29,423 - root - INFO - lr: 3.1371e-05 gnorm: 1.04 [11:03:19<13:29:05] +[titan] 2025-10-05 09:37:40,354 - root - INFO - step: 18025 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:37:40,354 - root - INFO - lr: 3.1362e-05 gnorm: 1.05 [11:03:30<13:28:54] +[titan] 2025-10-05 09:37:51,204 - root - INFO - step: 18030 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9219 +[titan] 2025-10-05 09:37:51,204 - root - INFO - lr: 3.1353e-05 gnorm: 1.05 [11:03:41<13:28:43] +[titan] 2025-10-05 09:38:02,089 - root - INFO - step: 18035 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:38:02,089 - root - INFO - lr: 3.1345e-05 gnorm: 1.09 [11:03:52<13:28:32] +[titan] 2025-10-05 09:38:12,956 - root - INFO - step: 18040 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:38:12,956 - root - INFO - lr: 3.1336e-05 gnorm: 1.11 [11:04:03<13:28:20] +[titan] 2025-10-05 09:38:23,803 - root - INFO - step: 18045 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 09:38:23,803 - root - INFO - lr: 3.1327e-05 gnorm: 1.04 [11:04:13<13:28:09] +[titan] 2025-10-05 09:38:32,482 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:38:34,663 - root - INFO - step: 18050 loss: 2.1705 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:38:34,663 - root - INFO - lr: 3.1318e-05 gnorm: 1.05 [11:04:24<13:27:58] +[titan] 2025-10-05 09:38:45,582 - root - INFO - step: 18055 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 09:38:45,582 - root - INFO - lr: 3.1309e-05 gnorm: 1.10 [11:04:35<13:27:47] +[titan] 2025-10-05 09:38:56,461 - root - INFO - step: 18060 loss: 2.1737 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 09:38:56,462 - root - INFO - lr: 3.1301e-05 gnorm: 1.10 [11:04:46<13:27:35] +[titan] 2025-10-05 09:39:07,387 - root - INFO - step: 18065 loss: 2.2727 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0073 +[titan] 2025-10-05 09:39:07,388 - root - INFO - lr: 3.1292e-05 gnorm: 1.11 [11:04:57<13:27:24] +[titan] 2025-10-05 09:39:18,267 - root - INFO - step: 18070 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9196 +[titan] 2025-10-05 09:39:18,267 - root - INFO - lr: 3.1283e-05 gnorm: 1.03 [11:05:08<13:27:13] +[titan] 2025-10-05 09:39:29,177 - root - INFO - step: 18075 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 09:39:29,177 - root - INFO - lr: 3.1274e-05 gnorm: 1.09 [11:05:19<13:27:02] +[titan] 2025-10-05 09:39:40,351 - root - INFO - step: 18080 loss: 2.1525 memory: 118.84GiB(85.28%) tps: 29,326 tflops: 406.85 mfu: 41.14% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 09:39:40,352 - root - INFO - lr: 3.1265e-05 gnorm: 1.07 [11:05:30<13:26:51] +[titan] 2025-10-05 09:39:51,220 - root - INFO - step: 18085 loss: 2.1539 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 09:39:51,221 - root - INFO - lr: 3.1257e-05 gnorm: 1.06 [11:05:41<13:26:40] +[titan] 2025-10-05 09:40:02,072 - root - INFO - step: 18090 loss: 2.1462 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8962 +[titan] 2025-10-05 09:40:02,072 - root - INFO - lr: 3.1248e-05 gnorm: 1.09 [11:05:52<13:26:28] +[titan] 2025-10-05 09:40:12,918 - root - INFO - step: 18095 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:40:12,918 - root - INFO - lr: 3.1239e-05 gnorm: 1.10 [11:06:03<13:26:17] +[titan] 2025-10-05 09:40:21,622 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:40:23,799 - root - INFO - step: 18100 loss: 2.2201 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:40:23,799 - root - INFO - lr: 3.1230e-05 gnorm: 1.06 [11:06:13<13:26:06] +[titan] 2025-10-05 09:40:34,657 - root - INFO - step: 18105 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9333 +[titan] 2025-10-05 09:40:34,657 - root - INFO - lr: 3.1221e-05 gnorm: 1.02 [11:06:24<13:25:54] +[titan] 2025-10-05 09:40:45,595 - root - INFO - step: 18110 loss: 2.2690 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0044 +[titan] 2025-10-05 09:40:45,595 - root - INFO - lr: 3.1213e-05 gnorm: 1.11 [11:06:35<13:25:43] +[titan] 2025-10-05 09:40:56,456 - root - INFO - step: 18115 loss: 2.1375 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 09:40:56,457 - root - INFO - lr: 3.1204e-05 gnorm: 1.07 [11:06:46<13:25:32] +[titan] 2025-10-05 09:41:07,318 - root - INFO - step: 18120 loss: 2.2233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9634 +[titan] 2025-10-05 09:41:07,318 - root - INFO - lr: 3.1195e-05 gnorm: 1.08 [11:06:57<13:25:21] +[titan] 2025-10-05 09:41:18,178 - root - INFO - step: 18125 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9584 +[titan] 2025-10-05 09:41:18,178 - root - INFO - lr: 3.1186e-05 gnorm: 1.07 [11:07:08<13:25:10] +[titan] 2025-10-05 09:41:29,063 - root - INFO - step: 18130 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9142 +[titan] 2025-10-05 09:41:29,063 - root - INFO - lr: 3.1177e-05 gnorm: 1.07 [11:07:19<13:24:58] +[titan] 2025-10-05 09:41:39,951 - root - INFO - step: 18135 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9334 +[titan] 2025-10-05 09:41:39,951 - root - INFO - lr: 3.1169e-05 gnorm: 1.06 [11:07:30<13:24:47] +[titan] 2025-10-05 09:41:50,877 - root - INFO - step: 18140 loss: 2.2241 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 09:41:50,877 - root - INFO - lr: 3.1160e-05 gnorm: 1.13 [11:07:40<13:24:36] +[titan] 2025-10-05 09:42:01,736 - root - INFO - step: 18145 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 09:42:01,736 - root - INFO - lr: 3.1151e-05 gnorm: 1.11 [11:07:51<13:24:25] +[titan] 2025-10-05 09:42:10,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:42:12,607 - root - INFO - step: 18150 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8818 +[titan] 2025-10-05 09:42:12,608 - root - INFO - lr: 3.1142e-05 gnorm: 1.06 [11:08:02<13:24:13] +[titan] 2025-10-05 09:42:23,477 - root - INFO - step: 18155 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9019 +[titan] 2025-10-05 09:42:23,477 - root - INFO - lr: 3.1133e-05 gnorm: 1.02 [11:08:13<13:24:02] +[titan] 2025-10-05 09:42:34,354 - root - INFO - step: 18160 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:42:34,354 - root - INFO - lr: 3.1125e-05 gnorm: 1.06 [11:08:24<13:23:51] +[titan] 2025-10-05 09:42:45,261 - root - INFO - step: 18165 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 09:42:45,261 - root - INFO - lr: 3.1116e-05 gnorm: 1.07 [11:08:35<13:23:40] +[titan] 2025-10-05 09:42:56,138 - root - INFO - step: 18170 loss: 2.1630 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9103 +[titan] 2025-10-05 09:42:56,138 - root - INFO - lr: 3.1107e-05 gnorm: 1.06 [11:08:46<13:23:28] +[titan] 2025-10-05 09:43:06,985 - root - INFO - step: 18175 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 09:43:06,985 - root - INFO - lr: 3.1098e-05 gnorm: 1.05 [11:08:57<13:23:17] +[titan] 2025-10-05 09:43:17,840 - root - INFO - step: 18180 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 09:43:17,840 - root - INFO - lr: 3.1089e-05 gnorm: 1.06 [11:09:07<13:23:06] +[titan] 2025-10-05 09:43:28,708 - root - INFO - step: 18185 loss: 2.2232 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9641 +[titan] 2025-10-05 09:43:28,708 - root - INFO - lr: 3.1080e-05 gnorm: 1.07 [11:09:18<13:22:55] +[titan] 2025-10-05 09:43:39,585 - root - INFO - step: 18190 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:43:39,586 - root - INFO - lr: 3.1072e-05 gnorm: 1.04 [11:09:29<13:22:43] +[titan] 2025-10-05 09:43:50,582 - root - INFO - step: 18195 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 29,799 tflops: 413.41 mfu: 41.80% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 09:43:50,582 - root - INFO - lr: 3.1063e-05 gnorm: 1.10 [11:09:40<13:22:32] +[titan] 2025-10-05 09:43:59,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:44:01,443 - root - INFO - step: 18200 loss: 2.1663 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9138 +[titan] 2025-10-05 09:44:01,443 - root - INFO - lr: 3.1054e-05 gnorm: 1.04 [11:09:51<13:22:21] +[titan] 2025-10-05 09:44:12,364 - root - INFO - step: 18205 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 09:44:12,364 - root - INFO - lr: 3.1045e-05 gnorm: 1.07 [11:10:02<13:22:10] +[titan] 2025-10-05 09:44:23,235 - root - INFO - step: 18210 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 09:44:23,235 - root - INFO - lr: 3.1036e-05 gnorm: 1.04 [11:10:13<13:21:59] +[titan] 2025-10-05 09:44:34,114 - root - INFO - step: 18215 loss: 2.1970 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 09:44:34,114 - root - INFO - lr: 3.1028e-05 gnorm: 1.08 [11:10:24<13:21:47] +[titan] 2025-10-05 09:44:45,034 - root - INFO - step: 18220 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:44:45,034 - root - INFO - lr: 3.1019e-05 gnorm: 1.02 [11:10:35<13:21:36] +[titan] 2025-10-05 09:44:55,926 - root - INFO - step: 18225 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 09:44:55,926 - root - INFO - lr: 3.1010e-05 gnorm: 1.06 [11:10:46<13:21:25] +[titan] 2025-10-05 09:45:06,773 - root - INFO - step: 18230 loss: 2.2584 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 09:45:06,773 - root - INFO - lr: 3.1001e-05 gnorm: 1.08 [11:10:56<13:21:14] +[titan] 2025-10-05 09:45:17,639 - root - INFO - step: 18235 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:45:17,639 - root - INFO - lr: 3.0992e-05 gnorm: 1.10 [11:11:07<13:21:02] +[titan] 2025-10-05 09:45:28,516 - root - INFO - step: 18240 loss: 2.1421 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 09:45:28,516 - root - INFO - lr: 3.0984e-05 gnorm: 1.05 [11:11:18<13:20:51] +[titan] 2025-10-05 09:45:39,379 - root - INFO - step: 18245 loss: 2.1122 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8654 +[titan] 2025-10-05 09:45:39,379 - root - INFO - lr: 3.0975e-05 gnorm: 1.06 [11:11:29<13:20:40] +[titan] 2025-10-05 09:45:48,140 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:45:50,324 - root - INFO - step: 18250 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.39 mfu: 42.00% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9443 +[titan] 2025-10-05 09:45:50,324 - root - INFO - lr: 3.0966e-05 gnorm: 1.03 [11:11:40<13:20:29] +[titan] 2025-10-05 09:46:01,186 - root - INFO - step: 18255 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9065 +[titan] 2025-10-05 09:46:01,186 - root - INFO - lr: 3.0957e-05 gnorm: 1.04 [11:11:51<13:20:18] +[titan] 2025-10-05 09:46:12,076 - root - INFO - step: 18260 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:46:12,077 - root - INFO - lr: 3.0948e-05 gnorm: 1.05 [11:12:02<13:20:06] +[titan] 2025-10-05 09:46:22,981 - root - INFO - step: 18265 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 09:46:22,981 - root - INFO - lr: 3.0939e-05 gnorm: 1.12 [11:12:13<13:19:55] +[titan] 2025-10-05 09:46:33,839 - root - INFO - step: 18270 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:46:33,840 - root - INFO - lr: 3.0931e-05 gnorm: 1.05 [11:12:23<13:19:44] +[titan] 2025-10-05 09:46:44,752 - root - INFO - step: 18275 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 09:46:44,752 - root - INFO - lr: 3.0922e-05 gnorm: 1.10 [11:12:34<13:19:33] +[titan] 2025-10-05 09:46:55,639 - root - INFO - step: 18280 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 09:46:55,639 - root - INFO - lr: 3.0913e-05 gnorm: 1.08 [11:12:45<13:19:21] +[titan] 2025-10-05 09:47:06,489 - root - INFO - step: 18285 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 09:47:06,489 - root - INFO - lr: 3.0904e-05 gnorm: 1.06 [11:12:56<13:19:10] +[titan] 2025-10-05 09:47:17,381 - root - INFO - step: 18290 loss: 2.2060 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:47:17,381 - root - INFO - lr: 3.0895e-05 gnorm: 1.12 [11:13:07<13:18:59] +[titan] 2025-10-05 09:47:28,261 - root - INFO - step: 18295 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9274 +[titan] 2025-10-05 09:47:28,261 - root - INFO - lr: 3.0887e-05 gnorm: 1.07 [11:13:18<13:18:48] +[titan] 2025-10-05 09:47:36,942 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:47:39,132 - root - INFO - step: 18300 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 09:47:39,132 - root - INFO - lr: 3.0878e-05 gnorm: 1.07 [11:13:29<13:18:36] +[titan] 2025-10-05 09:47:50,070 - root - INFO - step: 18305 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:47:50,070 - root - INFO - lr: 3.0869e-05 gnorm: 1.04 [11:13:40<13:18:25] +[titan] 2025-10-05 09:48:00,953 - root - INFO - step: 18310 loss: 2.2365 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 09:48:00,953 - root - INFO - lr: 3.0860e-05 gnorm: 1.10 [11:13:51<13:18:14] +[titan] 2025-10-05 09:48:11,821 - root - INFO - step: 18315 loss: 2.1228 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 09:48:11,821 - root - INFO - lr: 3.0851e-05 gnorm: 1.10 [11:14:01<13:18:03] +[titan] 2025-10-05 09:48:22,702 - root - INFO - step: 18320 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:22,703 - root - INFO - lr: 3.0842e-05 gnorm: 1.10 [11:14:12<13:17:52] +[titan] 2025-10-05 09:48:33,562 - root - INFO - step: 18325 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 09:48:33,562 - root - INFO - lr: 3.0834e-05 gnorm: 1.06 [11:14:23<13:17:40] +[titan] 2025-10-05 09:48:44,442 - root - INFO - step: 18330 loss: 2.1384 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:48:44,442 - root - INFO - lr: 3.0825e-05 gnorm: 1.07 [11:14:34<13:17:29] +[titan] 2025-10-05 09:48:55,377 - root - INFO - step: 18335 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:55,377 - root - INFO - lr: 3.0816e-05 gnorm: 1.05 [11:14:45<13:17:18] +[titan] 2025-10-05 09:49:06,255 - root - INFO - step: 18340 loss: 2.1540 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:49:06,255 - root - INFO - lr: 3.0807e-05 gnorm: 1.02 [11:14:56<13:17:07] +[titan] 2025-10-05 09:49:17,139 - root - INFO - step: 18345 loss: 2.1319 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8845 +[titan] 2025-10-05 09:49:17,139 - root - INFO - lr: 3.0798e-05 gnorm: 1.06 [11:15:07<13:16:55] +[titan] 2025-10-05 09:49:25,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:49:28,007 - root - INFO - step: 18350 loss: 2.2255 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9664 +[titan] 2025-10-05 09:49:28,007 - root - INFO - lr: 3.0789e-05 gnorm: 1.07 [11:15:18<13:16:44] +[titan] 2025-10-05 09:49:38,916 - root - INFO - step: 18355 loss: 2.1700 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9168 +[titan] 2025-10-05 09:49:38,917 - root - INFO - lr: 3.0781e-05 gnorm: 1.03 [11:15:29<13:16:33] +[titan] 2025-10-05 09:49:49,833 - root - INFO - step: 18360 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9456 +[titan] 2025-10-05 09:49:49,834 - root - INFO - lr: 3.0772e-05 gnorm: 1.12 [11:15:39<13:16:22] +[titan] 2025-10-05 09:50:00,730 - root - INFO - step: 18365 loss: 2.2105 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 09:50:00,730 - root - INFO - lr: 3.0763e-05 gnorm: 1.05 [11:15:50<13:16:11] +[titan] 2025-10-05 09:50:11,596 - root - INFO - step: 18370 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9432 +[titan] 2025-10-05 09:50:11,596 - root - INFO - lr: 3.0754e-05 gnorm: 1.09 [11:16:01<13:15:59] +[titan] 2025-10-05 09:50:22,481 - root - INFO - step: 18375 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8909 +[titan] 2025-10-05 09:50:22,481 - root - INFO - lr: 3.0745e-05 gnorm: 1.07 [11:16:12<13:15:48] +[titan] 2025-10-05 09:50:33,379 - root - INFO - step: 18380 loss: 2.1743 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 09:50:33,379 - root - INFO - lr: 3.0736e-05 gnorm: 1.05 [11:16:23<13:15:37] +[titan] 2025-10-05 09:50:44,278 - root - INFO - step: 18385 loss: 2.2455 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9831 +[titan] 2025-10-05 09:50:44,278 - root - INFO - lr: 3.0728e-05 gnorm: 1.08 [11:16:34<13:15:26] +[titan] 2025-10-05 09:50:55,194 - root - INFO - step: 18390 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 09:50:55,194 - root - INFO - lr: 3.0719e-05 gnorm: 1.04 [11:16:45<13:15:15] +[titan] 2025-10-05 09:51:06,079 - root - INFO - step: 18395 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9155 +[titan] 2025-10-05 09:51:06,080 - root - INFO - lr: 3.0710e-05 gnorm: 1.07 [11:16:56<13:15:03] +[titan] 2025-10-05 09:51:14,780 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:51:16,960 - root - INFO - step: 18400 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:51:16,961 - root - INFO - lr: 3.0701e-05 gnorm: 1.03 [11:17:07<13:14:52] +[titan] 2025-10-05 09:51:27,817 - root - INFO - step: 18405 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 09:51:27,817 - root - INFO - lr: 3.0692e-05 gnorm: 1.07 [11:17:17<13:14:41] +[titan] 2025-10-05 09:51:38,681 - root - INFO - step: 18410 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 09:51:38,682 - root - INFO - lr: 3.0683e-05 gnorm: 1.06 [11:17:28<13:14:30] +[titan] 2025-10-05 09:51:49,581 - root - INFO - step: 18415 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8667 +[titan] 2025-10-05 09:51:49,582 - root - INFO - lr: 3.0675e-05 gnorm: 1.10 [11:17:39<13:14:18] +[titan] 2025-10-05 09:52:00,490 - root - INFO - step: 18420 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:52:00,490 - root - INFO - lr: 3.0666e-05 gnorm: 1.06 [11:17:50<13:14:07] +[titan] 2025-10-05 09:52:11,358 - root - INFO - step: 18425 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9183 +[titan] 2025-10-05 09:52:11,358 - root - INFO - lr: 3.0657e-05 gnorm: 1.09 [11:18:01<13:13:56] +[titan] 2025-10-05 09:52:22,299 - root - INFO - step: 18430 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 09:52:22,299 - root - INFO - lr: 3.0648e-05 gnorm: 1.10 [11:18:12<13:13:45] +[titan] 2025-10-05 09:52:26,823 - root - INFO - Dumping profiler traces at step 18432 +[titan] 2025-10-05 09:52:26,862 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:52:33,371 - root - INFO - step: 18435 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 29,597 tflops: 410.61 mfu: 41.52% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 09:52:33,371 - root - INFO - lr: 3.0639e-05 gnorm: 1.09 [11:18:23<13:13:34] +[titan] 2025-10-05 09:52:44,226 - root - INFO - step: 18440 loss: 2.1224 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8757 +[titan] 2025-10-05 09:52:44,226 - root - INFO - lr: 3.0630e-05 gnorm: 1.05 [11:18:34<13:13:23] +[titan] 2025-10-05 09:52:55,086 - root - INFO - step: 18445 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:52:55,086 - root - INFO - lr: 3.0622e-05 gnorm: 1.16 [11:18:45<13:13:11] +[titan] 2025-10-05 09:53:03,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:53:05,965 - root - INFO - step: 18450 loss: 2.1736 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:53:05,965 - root - INFO - lr: 3.0613e-05 gnorm: 1.04 [11:18:56<13:13:00] +[titan] 2025-10-05 09:53:16,799 - root - INFO - step: 18455 loss: 2.2016 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9454 +[titan] 2025-10-05 09:53:16,799 - root - INFO - lr: 3.0604e-05 gnorm: 1.08 [11:19:06<13:12:49] +[titan] 2025-10-05 09:53:27,669 - root - INFO - step: 18460 loss: 2.1859 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9308 +[titan] 2025-10-05 09:53:27,669 - root - INFO - lr: 3.0595e-05 gnorm: 1.09 [11:19:17<13:12:38] +[titan] 2025-10-05 09:53:38,507 - root - INFO - step: 18465 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:53:38,508 - root - INFO - lr: 3.0586e-05 gnorm: 1.11 [11:19:28<13:12:26] +[titan] 2025-10-05 09:53:49,352 - root - INFO - step: 18470 loss: 2.2070 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:53:49,352 - root - INFO - lr: 3.0577e-05 gnorm: 1.15 [11:19:39<13:12:15] +[titan] 2025-10-05 09:54:00,241 - root - INFO - step: 18475 loss: 2.2443 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:54:00,242 - root - INFO - lr: 3.0569e-05 gnorm: 1.07 [11:19:50<13:12:04] +[titan] 2025-10-05 09:54:11,123 - root - INFO - step: 18480 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 09:54:11,123 - root - INFO - lr: 3.0560e-05 gnorm: 1.02 [11:20:01<13:11:53] +[titan] 2025-10-05 09:54:21,973 - root - INFO - step: 18485 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:54:21,973 - root - INFO - lr: 3.0551e-05 gnorm: 3.61 [11:20:12<13:11:41] +[titan] 2025-10-05 09:54:32,841 - root - INFO - step: 18490 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:54:32,841 - root - INFO - lr: 3.0542e-05 gnorm: 1.08 [11:20:22<13:11:30] +[titan] 2025-10-05 09:54:43,710 - root - INFO - step: 18495 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:54:43,710 - root - INFO - lr: 3.0533e-05 gnorm: 1.03 [11:20:33<13:11:19] +[titan] 2025-10-05 09:54:52,412 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:54:54,598 - root - INFO - step: 18500 loss: 2.1801 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:54:54,598 - root - INFO - lr: 3.0524e-05 gnorm: 1.07 [11:20:44<13:11:08] +[titan] 2025-10-05 09:55:05,447 - root - INFO - step: 18505 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 09:55:05,447 - root - INFO - lr: 3.0515e-05 gnorm: 1.05 [11:20:55<13:10:56] +[titan] 2025-10-05 09:55:16,304 - root - INFO - step: 18510 loss: 2.2328 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9729 +[titan] 2025-10-05 09:55:16,304 - root - INFO - lr: 3.0507e-05 gnorm: 1.12 [11:21:06<13:10:45] +[titan] 2025-10-05 09:55:27,210 - root - INFO - step: 18515 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 09:55:27,211 - root - INFO - lr: 3.0498e-05 gnorm: 1.04 [11:21:17<13:10:34] +[titan] 2025-10-05 09:55:38,070 - root - INFO - step: 18520 loss: 2.1990 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:55:38,071 - root - INFO - lr: 3.0489e-05 gnorm: 1.06 [11:21:28<13:10:23] +[titan] 2025-10-05 09:55:48,946 - root - INFO - step: 18525 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8803 +[titan] 2025-10-05 09:55:48,946 - root - INFO - lr: 3.0480e-05 gnorm: 1.10 [11:21:39<13:10:11] +[titan] 2025-10-05 09:55:59,819 - root - INFO - step: 18530 loss: 2.1569 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:55:59,819 - root - INFO - lr: 3.0471e-05 gnorm: 1.09 [11:21:49<13:10:00] +[titan] 2025-10-05 09:56:10,667 - root - INFO - step: 18535 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 09:56:10,668 - root - INFO - lr: 3.0462e-05 gnorm: 1.08 [11:22:00<13:09:49] +[titan] 2025-10-05 09:56:21,514 - root - INFO - step: 18540 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8789 +[titan] 2025-10-05 09:56:21,515 - root - INFO - lr: 3.0454e-05 gnorm: 1.06 [11:22:11<13:09:38] +[titan] 2025-10-05 09:56:32,416 - root - INFO - step: 18545 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 09:56:32,416 - root - INFO - lr: 3.0445e-05 gnorm: 1.09 [11:22:22<13:09:27] +[titan] 2025-10-05 09:56:41,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:56:43,301 - root - INFO - step: 18550 loss: 2.2123 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9545 +[titan] 2025-10-05 09:56:43,301 - root - INFO - lr: 3.0436e-05 gnorm: 1.11 [11:22:33<13:09:15] +[titan] 2025-10-05 09:56:54,209 - root - INFO - step: 18555 loss: 2.1250 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8774 +[titan] 2025-10-05 09:56:54,209 - root - INFO - lr: 3.0427e-05 gnorm: 1.05 [11:22:44<13:09:04] +[titan] 2025-10-05 09:57:05,059 - root - INFO - step: 18560 loss: 2.1067 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 09:57:05,060 - root - INFO - lr: 3.0418e-05 gnorm: 1.11 [11:22:55<13:08:53] +[titan] 2025-10-05 09:57:15,909 - root - INFO - step: 18565 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8970 +[titan] 2025-10-05 09:57:15,909 - root - INFO - lr: 3.0409e-05 gnorm: 1.06 [11:23:05<13:08:42] +[titan] 2025-10-05 09:57:26,796 - root - INFO - step: 18570 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:57:26,797 - root - INFO - lr: 3.0400e-05 gnorm: 1.05 [11:23:16<13:08:30] +[titan] 2025-10-05 09:57:37,659 - root - INFO - step: 18575 loss: 2.1669 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:57:37,659 - root - INFO - lr: 3.0392e-05 gnorm: 1.07 [11:23:27<13:08:19] +[titan] 2025-10-05 09:57:48,558 - root - INFO - step: 18580 loss: 2.1694 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:57:48,559 - root - INFO - lr: 3.0383e-05 gnorm: 1.08 [11:23:38<13:08:08] +[titan] 2025-10-05 09:57:59,464 - root - INFO - step: 18585 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 09:57:59,464 - root - INFO - lr: 3.0374e-05 gnorm: 1.08 [11:23:49<13:07:57] +[titan] 2025-10-05 09:58:10,319 - root - INFO - step: 18590 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 09:58:10,319 - root - INFO - lr: 3.0365e-05 gnorm: 1.09 [11:24:00<13:07:45] +[titan] 2025-10-05 09:58:21,169 - root - INFO - step: 18595 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 09:58:21,169 - root - INFO - lr: 3.0356e-05 gnorm: 1.06 [11:24:11<13:07:34] +[titan] 2025-10-05 09:58:29,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:58:32,051 - root - INFO - step: 18600 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:58:32,051 - root - INFO - lr: 3.0347e-05 gnorm: 1.06 [11:24:22<13:07:23] +[titan] 2025-10-05 09:58:42,941 - root - INFO - step: 18605 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 09:58:42,941 - root - INFO - lr: 3.0339e-05 gnorm: 1.07 [11:24:33<13:07:12] +[titan] 2025-10-05 09:58:53,849 - root - INFO - step: 18610 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 09:58:53,849 - root - INFO - lr: 3.0330e-05 gnorm: 1.13 [11:24:43<13:07:01] +[titan] 2025-10-05 09:59:04,767 - root - INFO - step: 18615 loss: 2.1618 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9098 +[titan] 2025-10-05 09:59:04,767 - root - INFO - lr: 3.0321e-05 gnorm: 1.07 [11:24:54<13:06:49] +[titan] 2025-10-05 09:59:15,655 - root - INFO - step: 18620 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:59:15,655 - root - INFO - lr: 3.0312e-05 gnorm: 1.08 [11:25:05<13:06:38] +[titan] 2025-10-05 09:59:26,502 - root - INFO - step: 18625 loss: 2.1982 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 09:59:26,502 - root - INFO - lr: 3.0303e-05 gnorm: 1.06 [11:25:16<13:06:27] +[titan] 2025-10-05 09:59:37,342 - root - INFO - step: 18630 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:59:37,342 - root - INFO - lr: 3.0294e-05 gnorm: 1.02 [11:25:27<13:06:16] +[titan] 2025-10-05 09:59:48,204 - root - INFO - step: 18635 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:59:48,205 - root - INFO - lr: 3.0285e-05 gnorm: 1.06 [11:25:38<13:06:04] +[titan] 2025-10-05 09:59:59,141 - root - INFO - step: 18640 loss: 2.1586 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:59:59,142 - root - INFO - lr: 3.0277e-05 gnorm: 1.09 [11:25:49<13:05:53] +[titan] 2025-10-05 10:00:09,999 - root - INFO - step: 18645 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:00:09,999 - root - INFO - lr: 3.0268e-05 gnorm: 1.04 [11:26:00<13:05:42] +[titan] 2025-10-05 10:00:18,686 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:00:20,868 - root - INFO - step: 18650 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 10:00:20,868 - root - INFO - lr: 3.0259e-05 gnorm: 1.06 [11:26:10<13:05:31] +[titan] 2025-10-05 10:00:31,751 - root - INFO - step: 18655 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8955 +[titan] 2025-10-05 10:00:31,751 - root - INFO - lr: 3.0250e-05 gnorm: 1.10 [11:26:21<13:05:20] +[titan] 2025-10-05 10:00:42,624 - root - INFO - step: 18660 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9236 +[titan] 2025-10-05 10:00:42,624 - root - INFO - lr: 3.0241e-05 gnorm: 1.06 [11:26:32<13:05:08] +[titan] 2025-10-05 10:00:53,508 - root - INFO - step: 18665 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 10:00:53,508 - root - INFO - lr: 3.0232e-05 gnorm: 1.07 [11:26:43<13:04:57] +[titan] 2025-10-05 10:01:04,417 - root - INFO - step: 18670 loss: 2.1073 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8611 +[titan] 2025-10-05 10:01:04,417 - root - INFO - lr: 3.0223e-05 gnorm: 1.05 [11:26:54<13:04:46] +[titan] 2025-10-05 10:01:15,322 - root - INFO - step: 18675 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9018 +[titan] 2025-10-05 10:01:15,322 - root - INFO - lr: 3.0215e-05 gnorm: 1.01 [11:27:05<13:04:35] +[titan] 2025-10-05 10:01:26,187 - root - INFO - step: 18680 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 10:01:26,188 - root - INFO - lr: 3.0206e-05 gnorm: 1.06 [11:27:16<13:04:24] +[titan] 2025-10-05 10:01:37,075 - root - INFO - step: 18685 loss: 2.2297 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9695 +[titan] 2025-10-05 10:01:37,076 - root - INFO - lr: 3.0197e-05 gnorm: 1.09 [11:27:27<13:04:12] +[titan] 2025-10-05 10:01:47,947 - root - INFO - step: 18690 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 10:01:47,947 - root - INFO - lr: 3.0188e-05 gnorm: 1.13 [11:27:38<13:04:01] +[titan] 2025-10-05 10:01:58,853 - root - INFO - step: 18695 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 10:01:58,853 - root - INFO - lr: 3.0179e-05 gnorm: 1.11 [11:27:48<13:03:50] +[titan] 2025-10-05 10:02:07,534 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:02:09,720 - root - INFO - step: 18700 loss: 2.1760 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9218 +[titan] 2025-10-05 10:02:09,720 - root - INFO - lr: 3.0170e-05 gnorm: 1.05 [11:27:59<13:03:39] +[titan] 2025-10-05 10:02:20,646 - root - INFO - step: 18705 loss: 2.1878 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:02:20,646 - root - INFO - lr: 3.0161e-05 gnorm: 1.13 [11:28:10<13:03:28] +[titan] 2025-10-05 10:02:31,519 - root - INFO - step: 18710 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9250 +[titan] 2025-10-05 10:02:31,519 - root - INFO - lr: 3.0153e-05 gnorm: 1.03 [11:28:21<13:03:16] +[titan] 2025-10-05 10:02:42,408 - root - INFO - step: 18715 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 10:02:42,408 - root - INFO - lr: 3.0144e-05 gnorm: 1.14 [11:28:32<13:03:05] +[titan] 2025-10-05 10:02:53,291 - root - INFO - step: 18720 loss: 2.1198 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 10:02:53,292 - root - INFO - lr: 3.0135e-05 gnorm: 1.04 [11:28:43<13:02:54] +[titan] 2025-10-05 10:03:04,164 - root - INFO - step: 18725 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 10:03:04,164 - root - INFO - lr: 3.0126e-05 gnorm: 1.09 [11:28:54<13:02:43] +[titan] 2025-10-05 10:03:15,026 - root - INFO - step: 18730 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 10:03:15,026 - root - INFO - lr: 3.0117e-05 gnorm: 1.09 [11:29:05<13:02:31] +[titan] 2025-10-05 10:03:25,889 - root - INFO - step: 18735 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:03:25,889 - root - INFO - lr: 3.0108e-05 gnorm: 1.09 [11:29:15<13:02:20] +[titan] 2025-10-05 10:03:36,788 - root - INFO - step: 18740 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 10:03:36,789 - root - INFO - lr: 3.0099e-05 gnorm: 1.05 [11:29:26<13:02:09] +[titan] 2025-10-05 10:03:47,682 - root - INFO - step: 18745 loss: 2.1174 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:03:47,683 - root - INFO - lr: 3.0090e-05 gnorm: 1.02 [11:29:37<13:01:58] +[titan] 2025-10-05 10:03:56,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:03:58,556 - root - INFO - step: 18750 loss: 2.1769 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 10:03:58,557 - root - INFO - lr: 3.0082e-05 gnorm: 1.06 [11:29:48<13:01:47] +[titan] 2025-10-05 10:04:09,464 - root - INFO - step: 18755 loss: 2.1852 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9314 +[titan] 2025-10-05 10:04:09,464 - root - INFO - lr: 3.0073e-05 gnorm: 1.08 [11:29:59<13:01:35] +[titan] 2025-10-05 10:04:20,357 - root - INFO - step: 18760 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:04:20,357 - root - INFO - lr: 3.0064e-05 gnorm: 1.07 [11:30:10<13:01:24] +[titan] 2025-10-05 10:04:31,231 - root - INFO - step: 18765 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 10:04:31,232 - root - INFO - lr: 3.0055e-05 gnorm: 1.11 [11:30:21<13:01:13] +[titan] 2025-10-05 10:04:42,135 - root - INFO - step: 18770 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 10:04:42,136 - root - INFO - lr: 3.0046e-05 gnorm: 1.10 [11:30:32<13:01:02] +[titan] 2025-10-05 10:04:53,006 - root - INFO - step: 18775 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 10:04:53,006 - root - INFO - lr: 3.0037e-05 gnorm: 1.03 [11:30:43<13:00:51] +[titan] 2025-10-05 10:05:03,904 - root - INFO - step: 18780 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 10:05:03,904 - root - INFO - lr: 3.0028e-05 gnorm: 1.07 [11:30:53<13:00:39] +[titan] 2025-10-05 10:05:14,747 - root - INFO - step: 18785 loss: 2.1812 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 10:05:14,748 - root - INFO - lr: 3.0020e-05 gnorm: 1.09 [11:31:04<13:00:28] +[titan] 2025-10-05 10:05:25,598 - root - INFO - step: 18790 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 10:05:25,598 - root - INFO - lr: 3.0011e-05 gnorm: 1.06 [11:31:15<13:00:17] +[titan] 2025-10-05 10:05:36,466 - root - INFO - step: 18795 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 10:05:36,466 - root - INFO - lr: 3.0002e-05 gnorm: 1.04 [11:31:26<13:00:06] +[titan] 2025-10-05 10:05:45,130 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:05:47,339 - root - INFO - step: 18800 loss: 2.2290 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 10:05:47,340 - root - INFO - lr: 2.9993e-05 gnorm: 1.07 [11:31:37<12:59:54] +[titan] 2025-10-05 10:05:58,210 - root - INFO - step: 18805 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 10:05:58,210 - root - INFO - lr: 2.9984e-05 gnorm: 1.07 [11:31:48<12:59:43] +[titan] 2025-10-05 10:06:09,120 - root - INFO - step: 18810 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 10:06:09,121 - root - INFO - lr: 2.9975e-05 gnorm: 1.04 [11:31:59<12:59:32] +[titan] 2025-10-05 10:06:19,986 - root - INFO - step: 18815 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 10:06:19,986 - root - INFO - lr: 2.9966e-05 gnorm: 1.08 [11:32:10<12:59:21] +[titan] 2025-10-05 10:06:30,847 - root - INFO - step: 18820 loss: 2.1851 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 10:06:30,847 - root - INFO - lr: 2.9957e-05 gnorm: 1.05 [11:32:20<12:59:10] +[titan] 2025-10-05 10:06:41,727 - root - INFO - step: 18825 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:06:41,727 - root - INFO - lr: 2.9949e-05 gnorm: 1.07 [11:32:31<12:58:58] +[titan] 2025-10-05 10:06:52,600 - root - INFO - step: 18830 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 10:06:52,601 - root - INFO - lr: 2.9940e-05 gnorm: 1.10 [11:32:42<12:58:47] +[titan] 2025-10-05 10:07:03,527 - root - INFO - step: 18835 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9505 +[titan] 2025-10-05 10:07:03,527 - root - INFO - lr: 2.9931e-05 gnorm: 1.07 [11:32:53<12:58:36] +[titan] 2025-10-05 10:07:14,367 - root - INFO - step: 18840 loss: 2.2003 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 10:07:14,367 - root - INFO - lr: 2.9922e-05 gnorm: 1.08 [11:33:04<12:58:25] +[titan] 2025-10-05 10:07:25,248 - root - INFO - step: 18845 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 10:07:25,248 - root - INFO - lr: 2.9913e-05 gnorm: 1.08 [11:33:15<12:58:14] +[titan] 2025-10-05 10:07:33,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:07:36,110 - root - INFO - step: 18850 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 10:07:36,110 - root - INFO - lr: 2.9904e-05 gnorm: 1.09 [11:33:26<12:58:02] +[titan] 2025-10-05 10:07:46,979 - root - INFO - step: 18855 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:07:46,979 - root - INFO - lr: 2.9895e-05 gnorm: 1.07 [11:33:37<12:57:51] +[titan] 2025-10-05 10:07:57,853 - root - INFO - step: 18860 loss: 2.1443 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:07:57,853 - root - INFO - lr: 2.9886e-05 gnorm: 1.06 [11:33:47<12:57:40] +[titan] 2025-10-05 10:08:08,767 - root - INFO - step: 18865 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 10:08:08,767 - root - INFO - lr: 2.9878e-05 gnorm: 1.05 [11:33:58<12:57:29] +[titan] 2025-10-05 10:08:19,643 - root - INFO - step: 18870 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:08:19,643 - root - INFO - lr: 2.9869e-05 gnorm: 1.09 [11:34:09<12:57:17] +[titan] 2025-10-05 10:08:30,505 - root - INFO - step: 18875 loss: 2.1432 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:08:30,505 - root - INFO - lr: 2.9860e-05 gnorm: 1.11 [11:34:20<12:57:06] +[titan] 2025-10-05 10:08:41,373 - root - INFO - step: 18880 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8639 +[titan] 2025-10-05 10:08:41,373 - root - INFO - lr: 2.9851e-05 gnorm: 1.04 [11:34:31<12:56:55] +[titan] 2025-10-05 10:08:52,227 - root - INFO - step: 18885 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:08:52,227 - root - INFO - lr: 2.9842e-05 gnorm: 1.09 [11:34:42<12:56:44] +[titan] 2025-10-05 10:09:03,099 - root - INFO - step: 18890 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 10:09:03,099 - root - INFO - lr: 2.9833e-05 gnorm: 1.06 [11:34:53<12:56:33] +[titan] 2025-10-05 10:09:13,982 - root - INFO - step: 18895 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 10:09:13,982 - root - INFO - lr: 2.9824e-05 gnorm: 1.06 [11:35:04<12:56:21] +[titan] 2025-10-05 10:09:22,708 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:09:24,889 - root - INFO - step: 18900 loss: 2.2596 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 10:09:24,889 - root - INFO - lr: 2.9815e-05 gnorm: 1.09 [11:35:14<12:56:10] +[titan] 2025-10-05 10:09:35,723 - root - INFO - step: 18905 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9049 +[titan] 2025-10-05 10:09:35,723 - root - INFO - lr: 2.9807e-05 gnorm: 2.16 [11:35:25<12:55:59] +[titan] 2025-10-05 10:09:46,616 - root - INFO - step: 18910 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 10:09:46,616 - root - INFO - lr: 2.9798e-05 gnorm: 1.10 [11:35:36<12:55:48] +[titan] 2025-10-05 10:09:57,505 - root - INFO - step: 18915 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8990 +[titan] 2025-10-05 10:09:57,506 - root - INFO - lr: 2.9789e-05 gnorm: 1.06 [11:35:47<12:55:36] +[titan] 2025-10-05 10:10:08,408 - root - INFO - step: 18920 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 10:10:08,409 - root - INFO - lr: 2.9780e-05 gnorm: 1.11 [11:35:58<12:55:25] +[titan] 2025-10-05 10:10:19,290 - root - INFO - step: 18925 loss: 2.1401 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8910 +[titan] 2025-10-05 10:10:19,290 - root - INFO - lr: 2.9771e-05 gnorm: 1.09 [11:36:09<12:55:14] +[titan] 2025-10-05 10:10:30,188 - root - INFO - step: 18930 loss: 2.1578 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9075 +[titan] 2025-10-05 10:10:30,188 - root - INFO - lr: 2.9762e-05 gnorm: 1.08 [11:36:20<12:55:03] +[titan] 2025-10-05 10:10:41,057 - root - INFO - step: 18935 loss: 2.1455 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:10:41,057 - root - INFO - lr: 2.9753e-05 gnorm: 1.08 [11:36:31<12:54:52] +[titan] 2025-10-05 10:10:51,943 - root - INFO - step: 18940 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 10:10:51,943 - root - INFO - lr: 2.9744e-05 gnorm: 1.09 [11:36:41<12:54:40] +[titan] 2025-10-05 10:11:00,933 - root - INFO - Dumping profiler traces at step 18944 +[titan] 2025-10-05 10:11:00,972 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:11:03,165 - root - INFO - step: 18945 loss: 2.2146 memory: 118.84GiB(85.28%) tps: 29,200 tflops: 405.11 mfu: 40.96% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 10:11:03,166 - root - INFO - lr: 2.9736e-05 gnorm: 1.05 [11:36:53<12:54:30] +[titan] 2025-10-05 10:11:11,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:11:14,040 - root - INFO - step: 18950 loss: 2.2217 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9615 +[titan] 2025-10-05 10:11:14,040 - root - INFO - lr: 2.9727e-05 gnorm: 1.10 [11:37:04<12:54:18] +[titan] 2025-10-05 10:11:24,927 - root - INFO - step: 18955 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 10:11:24,927 - root - INFO - lr: 2.9718e-05 gnorm: 1.04 [11:37:14<12:54:07] +[titan] 2025-10-05 10:11:35,836 - root - INFO - step: 18960 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:11:35,836 - root - INFO - lr: 2.9709e-05 gnorm: 1.08 [11:37:25<12:53:56] +[titan] 2025-10-05 10:11:46,725 - root - INFO - step: 18965 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9561 +[titan] 2025-10-05 10:11:46,725 - root - INFO - lr: 2.9700e-05 gnorm: 1.06 [11:37:36<12:53:45] +[titan] 2025-10-05 10:11:57,607 - root - INFO - step: 18970 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 10:11:57,608 - root - INFO - lr: 2.9691e-05 gnorm: 1.06 [11:37:47<12:53:34] +[titan] 2025-10-05 10:12:08,492 - root - INFO - step: 18975 loss: 2.1885 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9336 +[titan] 2025-10-05 10:12:08,492 - root - INFO - lr: 2.9682e-05 gnorm: 1.05 [11:37:58<12:53:22] +[titan] 2025-10-05 10:12:19,363 - root - INFO - step: 18980 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 10:12:19,364 - root - INFO - lr: 2.9673e-05 gnorm: 1.05 [11:38:09<12:53:11] +[titan] 2025-10-05 10:12:30,251 - root - INFO - step: 18985 loss: 2.2178 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 10:12:30,252 - root - INFO - lr: 2.9664e-05 gnorm: 1.08 [11:38:20<12:53:00] +[titan] 2025-10-05 10:12:41,145 - root - INFO - step: 18990 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 10:12:41,146 - root - INFO - lr: 2.9656e-05 gnorm: 1.04 [11:38:31<12:52:49] +[titan] 2025-10-05 10:12:52,037 - root - INFO - step: 18995 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:12:52,037 - root - INFO - lr: 2.9647e-05 gnorm: 1.06 [11:38:42<12:52:38] +[titan] 2025-10-05 10:13:00,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:13:02,914 - root - INFO - step: 19000 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9377 +[titan] 2025-10-05 10:13:02,914 - root - INFO - lr: 2.9638e-05 gnorm: 1.06 [11:38:52<12:52:26] +[titan] 2025-10-05 10:13:13,797 - root - INFO - step: 19005 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 10:13:13,797 - root - INFO - lr: 2.9629e-05 gnorm: 1.09 [11:39:03<12:52:15] +[titan] 2025-10-05 10:13:24,684 - root - INFO - step: 19010 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 10:13:24,684 - root - INFO - lr: 2.9620e-05 gnorm: 1.04 [11:39:14<12:52:04] +[titan] 2025-10-05 10:13:35,565 - root - INFO - step: 19015 loss: 2.1615 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 10:13:35,565 - root - INFO - lr: 2.9611e-05 gnorm: 1.06 [11:39:25<12:51:53] +[titan] 2025-10-05 10:13:46,458 - root - INFO - step: 19020 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:13:46,458 - root - INFO - lr: 2.9602e-05 gnorm: 1.09 [11:39:36<12:51:42] +[titan] 2025-10-05 10:13:57,365 - root - INFO - step: 19025 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9259 +[titan] 2025-10-05 10:13:57,365 - root - INFO - lr: 2.9593e-05 gnorm: 1.11 [11:39:47<12:51:30] +[titan] 2025-10-05 10:14:08,243 - root - INFO - step: 19030 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 10:14:08,244 - root - INFO - lr: 2.9585e-05 gnorm: 1.11 [11:39:58<12:51:19] +[titan] 2025-10-05 10:14:19,180 - root - INFO - step: 19035 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.68 mfu: 42.03% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 10:14:19,180 - root - INFO - lr: 2.9576e-05 gnorm: 1.08 [11:40:09<12:51:08] +[titan] 2025-10-05 10:14:30,057 - root - INFO - step: 19040 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 10:14:30,058 - root - INFO - lr: 2.9567e-05 gnorm: 1.08 [11:40:20<12:50:57] +[titan] 2025-10-05 10:14:40,934 - root - INFO - step: 19045 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:14:40,934 - root - INFO - lr: 2.9558e-05 gnorm: 1.05 [11:40:30<12:50:46] +[titan] 2025-10-05 10:14:49,618 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:14:51,803 - root - INFO - step: 19050 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 10:14:51,803 - root - INFO - lr: 2.9549e-05 gnorm: 1.10 [11:40:41<12:50:34] +[titan] 2025-10-05 10:15:02,687 - root - INFO - step: 19055 loss: 2.1320 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:15:02,687 - root - INFO - lr: 2.9540e-05 gnorm: 1.03 [11:40:52<12:50:23] +[titan] 2025-10-05 10:15:13,599 - root - INFO - step: 19060 loss: 2.1731 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:15:13,600 - root - INFO - lr: 2.9531e-05 gnorm: 1.05 [11:41:03<12:50:12] +[titan] 2025-10-05 10:15:24,470 - root - INFO - step: 19065 loss: 2.0790 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 10:15:24,470 - root - INFO - lr: 2.9522e-05 gnorm: 1.02 [11:41:14<12:50:01] +[titan] 2025-10-05 10:15:35,340 - root - INFO - step: 19070 loss: 2.1215 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 10:15:35,340 - root - INFO - lr: 2.9513e-05 gnorm: 1.03 [11:41:25<12:49:50] +[titan] 2025-10-05 10:15:46,220 - root - INFO - step: 19075 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 10:15:46,221 - root - INFO - lr: 2.9505e-05 gnorm: 1.03 [11:41:36<12:49:38] +[titan] 2025-10-05 10:15:57,087 - root - INFO - step: 19080 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 10:15:57,087 - root - INFO - lr: 2.9496e-05 gnorm: 1.08 [11:41:47<12:49:27] +[titan] 2025-10-05 10:16:07,949 - root - INFO - step: 19085 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:16:07,949 - root - INFO - lr: 2.9487e-05 gnorm: 1.03 [11:41:57<12:49:16] +[titan] 2025-10-05 10:16:18,866 - root - INFO - step: 19090 loss: 2.1027 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:16:18,866 - root - INFO - lr: 2.9478e-05 gnorm: 1.05 [11:42:08<12:49:05] +[titan] 2025-10-05 10:16:29,722 - root - INFO - step: 19095 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 10:16:29,722 - root - INFO - lr: 2.9469e-05 gnorm: 1.07 [11:42:19<12:48:54] +[titan] 2025-10-05 10:16:38,404 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:16:40,594 - root - INFO - step: 19100 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 10:16:40,594 - root - INFO - lr: 2.9460e-05 gnorm: 1.12 [11:42:30<12:48:42] +[titan] 2025-10-05 10:16:51,467 - root - INFO - step: 19105 loss: 2.1659 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 10:16:51,467 - root - INFO - lr: 2.9451e-05 gnorm: 1.07 [11:42:41<12:48:31] +[titan] 2025-10-05 10:17:02,333 - root - INFO - step: 19110 loss: 2.1571 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:17:02,333 - root - INFO - lr: 2.9442e-05 gnorm: 1.07 [11:42:52<12:48:20] +[titan] 2025-10-05 10:17:13,254 - root - INFO - step: 19115 loss: 2.1907 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9353 +[titan] 2025-10-05 10:17:13,254 - root - INFO - lr: 2.9433e-05 gnorm: 1.05 [11:43:03<12:48:09] +[titan] 2025-10-05 10:17:24,144 - root - INFO - step: 19120 loss: 2.2215 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 10:17:24,145 - root - INFO - lr: 2.9424e-05 gnorm: 1.08 [11:43:14<12:47:58] +[titan] 2025-10-05 10:17:34,985 - root - INFO - step: 19125 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 10:17:34,985 - root - INFO - lr: 2.9416e-05 gnorm: 1.07 [11:43:25<12:47:46] +[titan] 2025-10-05 10:17:45,834 - root - INFO - step: 19130 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 10:17:45,834 - root - INFO - lr: 2.9407e-05 gnorm: 1.04 [11:43:35<12:47:35] +[titan] 2025-10-05 10:17:56,697 - root - INFO - step: 19135 loss: 2.1835 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 10:17:56,697 - root - INFO - lr: 2.9398e-05 gnorm: 1.04 [11:43:46<12:47:24] +[titan] 2025-10-05 10:18:07,545 - root - INFO - step: 19140 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 10:18:07,545 - root - INFO - lr: 2.9389e-05 gnorm: 1.06 [11:43:57<12:47:13] +[titan] 2025-10-05 10:18:18,458 - root - INFO - step: 19145 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9199 +[titan] 2025-10-05 10:18:18,458 - root - INFO - lr: 2.9380e-05 gnorm: 1.08 [11:44:08<12:47:02] +[titan] 2025-10-05 10:18:27,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:18:29,337 - root - INFO - step: 19150 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8515 +[titan] 2025-10-05 10:18:29,338 - root - INFO - lr: 2.9371e-05 gnorm: 1.34 [11:44:19<12:46:50] +[titan] 2025-10-05 10:18:40,256 - root - INFO - step: 19155 loss: 2.1332 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8849 +[titan] 2025-10-05 10:18:40,256 - root - INFO - lr: 2.9362e-05 gnorm: 1.09 [11:44:30<12:46:39] +[titan] 2025-10-05 10:18:51,145 - root - INFO - step: 19160 loss: 2.1481 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 10:18:51,145 - root - INFO - lr: 2.9353e-05 gnorm: 1.07 [11:44:41<12:46:28] +[titan] 2025-10-05 10:19:02,037 - root - INFO - step: 19165 loss: 2.1516 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 10:19:02,037 - root - INFO - lr: 2.9344e-05 gnorm: 1.05 [11:44:52<12:46:17] +[titan] 2025-10-05 10:19:12,937 - root - INFO - step: 19170 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9133 +[titan] 2025-10-05 10:19:12,937 - root - INFO - lr: 2.9336e-05 gnorm: 1.08 [11:45:02<12:46:06] +[titan] 2025-10-05 10:19:23,875 - root - INFO - step: 19175 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 10:19:23,875 - root - INFO - lr: 2.9327e-05 gnorm: 1.04 [11:45:13<12:45:54] +[titan] 2025-10-05 10:19:34,776 - root - INFO - step: 19180 loss: 2.1428 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 10:19:34,776 - root - INFO - lr: 2.9318e-05 gnorm: 1.07 [11:45:24<12:45:43] +[titan] 2025-10-05 10:19:45,696 - root - INFO - step: 19185 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 10:19:45,696 - root - INFO - lr: 2.9309e-05 gnorm: 1.05 [11:45:35<12:45:32] +[titan] 2025-10-05 10:19:56,559 - root - INFO - step: 19190 loss: 2.2063 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 10:19:56,559 - root - INFO - lr: 2.9300e-05 gnorm: 1.05 [11:45:46<12:45:21] +[titan] 2025-10-05 10:20:07,440 - root - INFO - step: 19195 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 10:20:07,440 - root - INFO - lr: 2.9291e-05 gnorm: 1.06 [11:45:57<12:45:10] +[titan] 2025-10-05 10:20:16,202 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:20:18,388 - root - INFO - step: 19200 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:20:18,389 - root - INFO - lr: 2.9282e-05 gnorm: 1.10 [11:46:08<12:44:59] +[titan] 2025-10-05 10:20:29,261 - root - INFO - step: 19205 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9722 +[titan] 2025-10-05 10:20:29,261 - root - INFO - lr: 2.9273e-05 gnorm: 1.05 [11:46:19<12:44:47] +[titan] 2025-10-05 10:20:40,136 - root - INFO - step: 19210 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 10:20:40,136 - root - INFO - lr: 2.9264e-05 gnorm: 1.05 [11:46:30<12:44:36] +[titan] 2025-10-05 10:20:51,016 - root - INFO - step: 19215 loss: 2.1099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8637 +[titan] 2025-10-05 10:20:51,016 - root - INFO - lr: 2.9255e-05 gnorm: 1.02 [11:46:41<12:44:25] +[titan] 2025-10-05 10:21:01,917 - root - INFO - step: 19220 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 10:21:01,918 - root - INFO - lr: 2.9247e-05 gnorm: 1.06 [11:46:51<12:44:14] +[titan] 2025-10-05 10:21:12,779 - root - INFO - step: 19225 loss: 2.1977 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9427 +[titan] 2025-10-05 10:21:12,780 - root - INFO - lr: 2.9238e-05 gnorm: 1.08 [11:47:02<12:44:03] +[titan] 2025-10-05 10:21:23,714 - root - INFO - step: 19230 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 10:21:23,714 - root - INFO - lr: 2.9229e-05 gnorm: 1.09 [11:47:13<12:43:51] +[titan] 2025-10-05 10:21:34,597 - root - INFO - step: 19235 loss: 2.1070 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 10:21:34,597 - root - INFO - lr: 2.9220e-05 gnorm: 1.08 [11:47:24<12:43:40] +[titan] 2025-10-05 10:21:45,492 - root - INFO - step: 19240 loss: 2.0962 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 10:21:45,492 - root - INFO - lr: 2.9211e-05 gnorm: 1.04 [11:47:35<12:43:29] +[titan] 2025-10-05 10:21:56,367 - root - INFO - step: 19245 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 10:21:56,367 - root - INFO - lr: 2.9202e-05 gnorm: 1.08 [11:47:46<12:43:18] +[titan] 2025-10-05 10:22:05,109 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:22:07,295 - root - INFO - step: 19250 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:22:07,296 - root - INFO - lr: 2.9193e-05 gnorm: 1.04 [11:47:57<12:43:07] +[titan] 2025-10-05 10:22:18,238 - root - INFO - step: 19255 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 10:22:18,238 - root - INFO - lr: 2.9184e-05 gnorm: 1.06 [11:48:08<12:42:56] +[titan] 2025-10-05 10:22:29,120 - root - INFO - step: 19260 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 10:22:29,120 - root - INFO - lr: 2.9175e-05 gnorm: 1.10 [11:48:19<12:42:44] +[titan] 2025-10-05 10:22:40,008 - root - INFO - step: 19265 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:22:40,008 - root - INFO - lr: 2.9167e-05 gnorm: 1.08 [11:48:30<12:42:33] +[titan] 2025-10-05 10:22:50,875 - root - INFO - step: 19270 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 10:22:50,875 - root - INFO - lr: 2.9158e-05 gnorm: 1.07 [11:48:40<12:42:22] +[titan] 2025-10-05 10:23:01,737 - root - INFO - step: 19275 loss: 2.1975 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:23:01,737 - root - INFO - lr: 2.9149e-05 gnorm: 1.08 [11:48:51<12:42:11] +[titan] 2025-10-05 10:23:12,658 - root - INFO - step: 19280 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 10:23:12,658 - root - INFO - lr: 2.9140e-05 gnorm: 1.06 [11:49:02<12:42:00] +[titan] 2025-10-05 10:23:23,594 - root - INFO - step: 19285 loss: 2.1554 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9047 +[titan] 2025-10-05 10:23:23,595 - root - INFO - lr: 2.9131e-05 gnorm: 1.11 [11:49:13<12:41:48] +[titan] 2025-10-05 10:23:34,471 - root - INFO - step: 19290 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9525 +[titan] 2025-10-05 10:23:34,471 - root - INFO - lr: 2.9122e-05 gnorm: 1.08 [11:49:24<12:41:37] +[titan] 2025-10-05 10:23:45,370 - root - INFO - step: 19295 loss: 2.2145 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 10:23:45,370 - root - INFO - lr: 2.9113e-05 gnorm: 1.11 [11:49:35<12:41:26] +[titan] 2025-10-05 10:23:54,147 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:23:56,333 - root - INFO - step: 19300 loss: 2.1524 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 10:23:56,333 - root - INFO - lr: 2.9104e-05 gnorm: 1.12 [11:49:46<12:41:15] +[titan] 2025-10-05 10:24:07,214 - root - INFO - step: 19305 loss: 2.1152 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 10:24:07,214 - root - INFO - lr: 2.9095e-05 gnorm: 1.06 [11:49:57<12:41:04] +[titan] 2025-10-05 10:24:18,150 - root - INFO - step: 19310 loss: 2.1360 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8875 +[titan] 2025-10-05 10:24:18,150 - root - INFO - lr: 2.9086e-05 gnorm: 1.11 [11:50:08<12:40:53] +[titan] 2025-10-05 10:24:29,081 - root - INFO - step: 19315 loss: 2.1682 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9158 +[titan] 2025-10-05 10:24:29,081 - root - INFO - lr: 2.9077e-05 gnorm: 1.08 [11:50:19<12:40:42] +[titan] 2025-10-05 10:24:39,944 - root - INFO - step: 19320 loss: 2.1420 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 10:24:39,945 - root - INFO - lr: 2.9069e-05 gnorm: 1.05 [11:50:29<12:40:30] +[titan] 2025-10-05 10:24:50,812 - root - INFO - step: 19325 loss: 2.1255 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 10:24:50,812 - root - INFO - lr: 2.9060e-05 gnorm: 1.04 [11:50:40<12:40:19] +[titan] 2025-10-05 10:25:01,681 - root - INFO - step: 19330 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 10:25:01,681 - root - INFO - lr: 2.9051e-05 gnorm: 1.04 [11:50:51<12:40:08] +[titan] 2025-10-05 10:25:12,540 - root - INFO - step: 19335 loss: 2.1642 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 10:25:12,540 - root - INFO - lr: 2.9042e-05 gnorm: 1.05 [11:51:02<12:39:57] +[titan] 2025-10-05 10:25:23,440 - root - INFO - step: 19340 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 10:25:23,441 - root - INFO - lr: 2.9033e-05 gnorm: 1.08 [11:51:13<12:39:46] +[titan] 2025-10-05 10:25:34,323 - root - INFO - step: 19345 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 10:25:34,323 - root - INFO - lr: 2.9024e-05 gnorm: 1.06 [11:51:24<12:39:34] +[titan] 2025-10-05 10:25:42,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:25:45,172 - root - INFO - step: 19350 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 10:25:45,173 - root - INFO - lr: 2.9015e-05 gnorm: 1.06 [11:51:35<12:39:23] +[titan] 2025-10-05 10:25:56,041 - root - INFO - step: 19355 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 10:25:56,041 - root - INFO - lr: 2.9006e-05 gnorm: 1.05 [11:51:46<12:39:12] +[titan] 2025-10-05 10:26:06,901 - root - INFO - step: 19360 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 10:26:06,901 - root - INFO - lr: 2.8997e-05 gnorm: 1.09 [11:51:56<12:39:01] +[titan] 2025-10-05 10:26:17,768 - root - INFO - step: 19365 loss: 2.2565 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 10:26:17,768 - root - INFO - lr: 2.8988e-05 gnorm: 1.06 [11:52:07<12:38:49] +[titan] 2025-10-05 10:26:28,693 - root - INFO - step: 19370 loss: 2.1913 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 10:26:28,693 - root - INFO - lr: 2.8980e-05 gnorm: 1.07 [11:52:18<12:38:38] +[titan] 2025-10-05 10:26:39,550 - root - INFO - step: 19375 loss: 2.2098 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:26:39,550 - root - INFO - lr: 2.8971e-05 gnorm: 1.10 [11:52:29<12:38:27] +[titan] 2025-10-05 10:26:50,433 - root - INFO - step: 19380 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 10:26:50,433 - root - INFO - lr: 2.8962e-05 gnorm: 1.07 [11:52:40<12:38:16] +[titan] 2025-10-05 10:27:01,279 - root - INFO - step: 19385 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 10:27:01,279 - root - INFO - lr: 2.8953e-05 gnorm: 1.04 [11:52:51<12:38:05] +[titan] 2025-10-05 10:27:12,140 - root - INFO - step: 19390 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 10:27:12,141 - root - INFO - lr: 2.8944e-05 gnorm: 1.10 [11:53:02<12:37:53] +[titan] 2025-10-05 10:27:23,043 - root - INFO - step: 19395 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:27:23,043 - root - INFO - lr: 2.8935e-05 gnorm: 1.07 [11:53:13<12:37:42] +[titan] 2025-10-05 10:27:31,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:27:33,895 - root - INFO - step: 19400 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:27:33,895 - root - INFO - lr: 2.8926e-05 gnorm: 1.06 [11:53:23<12:37:31] +[titan] 2025-10-05 10:27:44,768 - root - INFO - step: 19405 loss: 2.0933 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 10:27:44,768 - root - INFO - lr: 2.8917e-05 gnorm: 1.05 [11:53:34<12:37:20] +[titan] 2025-10-05 10:27:55,669 - root - INFO - step: 19410 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 10:27:55,669 - root - INFO - lr: 2.8908e-05 gnorm: 1.05 [11:53:45<12:37:09] +[titan] 2025-10-05 10:28:06,538 - root - INFO - step: 19415 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 10:28:06,538 - root - INFO - lr: 2.8899e-05 gnorm: 1.07 [11:53:56<12:36:57] +[titan] 2025-10-05 10:28:17,407 - root - INFO - step: 19420 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:28:17,407 - root - INFO - lr: 2.8890e-05 gnorm: 1.09 [11:54:07<12:36:46] +[titan] 2025-10-05 10:28:28,336 - root - INFO - step: 19425 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9504 +[titan] 2025-10-05 10:28:28,336 - root - INFO - lr: 2.8882e-05 gnorm: 1.11 [11:54:18<12:36:35] +[titan] 2025-10-05 10:28:39,218 - root - INFO - step: 19430 loss: 2.1045 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8601 +[titan] 2025-10-05 10:28:39,218 - root - INFO - lr: 2.8873e-05 gnorm: 1.08 [11:54:29<12:36:24] +[titan] 2025-10-05 10:28:50,126 - root - INFO - step: 19435 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9318 +[titan] 2025-10-05 10:28:50,127 - root - INFO - lr: 2.8864e-05 gnorm: 1.07 [11:54:40<12:36:13] +[titan] 2025-10-05 10:29:01,033 - root - INFO - step: 19440 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9099 +[titan] 2025-10-05 10:29:01,033 - root - INFO - lr: 2.8855e-05 gnorm: 1.04 [11:54:51<12:36:02] +[titan] 2025-10-05 10:29:11,909 - root - INFO - step: 19445 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 10:29:11,909 - root - INFO - lr: 2.8846e-05 gnorm: 1.06 [11:55:01<12:35:50] +[titan] 2025-10-05 10:29:20,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:29:22,834 - root - INFO - step: 19450 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8914 +[titan] 2025-10-05 10:29:22,834 - root - INFO - lr: 2.8837e-05 gnorm: 1.04 [11:55:12<12:35:39] +[titan] 2025-10-05 10:29:33,814 - root - INFO - step: 19455 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 29,845 tflops: 414.05 mfu: 41.87% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:29:33,814 - root - INFO - lr: 2.8828e-05 gnorm: 1.06 [11:55:23<12:35:28] +[titan] 2025-10-05 10:29:36,176 - root - INFO - Dumping profiler traces at step 19456 +[titan] 2025-10-05 10:29:36,216 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:29:44,902 - root - INFO - step: 19460 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 29,553 tflops: 410.01 mfu: 41.46% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:29:44,902 - root - INFO - lr: 2.8819e-05 gnorm: 1.04 [11:55:34<12:35:17] +[titan] 2025-10-05 10:29:55,774 - root - INFO - step: 19465 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:29:55,774 - root - INFO - lr: 2.8810e-05 gnorm: 1.05 [11:55:45<12:35:06] +[titan] 2025-10-05 10:30:06,632 - root - INFO - step: 19470 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8999 +[titan] 2025-10-05 10:30:06,632 - root - INFO - lr: 2.8801e-05 gnorm: 1.06 [11:55:56<12:34:55] +[titan] 2025-10-05 10:30:17,527 - root - INFO - step: 19475 loss: 2.0697 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8296 +[titan] 2025-10-05 10:30:17,527 - root - INFO - lr: 2.8792e-05 gnorm: 1.03 [11:56:07<12:34:44] +[titan] 2025-10-05 10:30:28,427 - root - INFO - step: 19480 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 10:30:28,427 - root - INFO - lr: 2.8784e-05 gnorm: 1.06 [11:56:18<12:34:32] +[titan] 2025-10-05 10:30:39,303 - root - INFO - step: 19485 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 10:30:39,303 - root - INFO - lr: 2.8775e-05 gnorm: 1.05 [11:56:29<12:34:21] +[titan] 2025-10-05 10:30:50,176 - root - INFO - step: 19490 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:30:50,176 - root - INFO - lr: 2.8766e-05 gnorm: 1.09 [11:56:40<12:34:10] +[titan] 2025-10-05 10:31:01,024 - root - INFO - step: 19495 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:31:01,024 - root - INFO - lr: 2.8757e-05 gnorm: 1.06 [11:56:51<12:33:59] +[titan] 2025-10-05 10:31:09,701 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:31:11,884 - root - INFO - step: 19500 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9032 +[titan] 2025-10-05 10:31:11,884 - root - INFO - lr: 2.8748e-05 gnorm: 1.04 [11:57:01<12:33:48] +[titan] 2025-10-05 10:31:22,776 - root - INFO - step: 19505 loss: 2.1755 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 10:31:22,776 - root - INFO - lr: 2.8739e-05 gnorm: 1.05 [11:57:12<12:33:36] +[titan] 2025-10-05 10:31:33,671 - root - INFO - step: 19510 loss: 2.1889 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:31:33,672 - root - INFO - lr: 2.8730e-05 gnorm: 1.09 [11:57:23<12:33:25] +[titan] 2025-10-05 10:31:44,519 - root - INFO - step: 19515 loss: 2.1331 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8848 +[titan] 2025-10-05 10:31:44,519 - root - INFO - lr: 2.8721e-05 gnorm: 1.09 [11:57:34<12:33:14] +[titan] 2025-10-05 10:31:55,368 - root - INFO - step: 19520 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:31:55,368 - root - INFO - lr: 2.8712e-05 gnorm: 1.09 [11:57:45<12:33:03] +[titan] 2025-10-05 10:32:06,223 - root - INFO - step: 19525 loss: 2.1590 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 10:32:06,223 - root - INFO - lr: 2.8703e-05 gnorm: 1.04 [11:57:56<12:32:52] +[titan] 2025-10-05 10:32:17,080 - root - INFO - step: 19530 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:32:17,081 - root - INFO - lr: 2.8694e-05 gnorm: 1.03 [11:58:07<12:32:40] +[titan] 2025-10-05 10:32:27,960 - root - INFO - step: 19535 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 10:32:27,960 - root - INFO - lr: 2.8686e-05 gnorm: 1.12 [11:58:17<12:32:29] +[titan] 2025-10-05 10:32:38,837 - root - INFO - step: 19540 loss: 2.1660 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 10:32:38,837 - root - INFO - lr: 2.8677e-05 gnorm: 1.05 [11:58:28<12:32:18] +[titan] 2025-10-05 10:32:49,713 - root - INFO - step: 19545 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 10:32:49,713 - root - INFO - lr: 2.8668e-05 gnorm: 1.05 [11:58:39<12:32:07] +[titan] 2025-10-05 10:32:58,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:33:00,607 - root - INFO - step: 19550 loss: 2.1396 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8912 +[titan] 2025-10-05 10:33:00,607 - root - INFO - lr: 2.8659e-05 gnorm: 1.07 [11:58:50<12:31:56] +[titan] 2025-10-05 10:33:11,463 - root - INFO - step: 19555 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8942 +[titan] 2025-10-05 10:33:11,463 - root - INFO - lr: 2.8650e-05 gnorm: 1.05 [11:59:01<12:31:44] +[titan] 2025-10-05 10:33:22,332 - root - INFO - step: 19560 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9357 +[titan] 2025-10-05 10:33:22,332 - root - INFO - lr: 2.8641e-05 gnorm: 1.08 [11:59:12<12:31:33] +[titan] 2025-10-05 10:33:33,247 - root - INFO - step: 19565 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 10:33:33,247 - root - INFO - lr: 2.8632e-05 gnorm: 1.12 [11:59:23<12:31:22] +[titan] 2025-10-05 10:33:44,148 - root - INFO - step: 19570 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:33:44,148 - root - INFO - lr: 2.8623e-05 gnorm: 1.14 [11:59:34<12:31:11] +[titan] 2025-10-05 10:33:55,019 - root - INFO - step: 19575 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 10:33:55,019 - root - INFO - lr: 2.8614e-05 gnorm: 1.09 [11:59:44<12:31:00] +[titan] 2025-10-05 10:34:05,890 - root - INFO - step: 19580 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8641 +[titan] 2025-10-05 10:34:05,890 - root - INFO - lr: 2.8605e-05 gnorm: 1.05 [11:59:55<12:30:49] +[titan] 2025-10-05 10:34:16,751 - root - INFO - step: 19585 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 10:34:16,752 - root - INFO - lr: 2.8596e-05 gnorm: 1.07 [12:00:06<12:30:37] +[titan] 2025-10-05 10:34:27,618 - root - INFO - step: 19590 loss: 2.1741 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 10:34:27,619 - root - INFO - lr: 2.8588e-05 gnorm: 1.06 [12:00:17<12:30:26] +[titan] 2025-10-05 10:34:38,478 - root - INFO - step: 19595 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 10:34:38,478 - root - INFO - lr: 2.8579e-05 gnorm: 1.07 [12:00:28<12:30:15] +[titan] 2025-10-05 10:34:47,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:34:49,385 - root - INFO - step: 19600 loss: 2.1233 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:34:49,385 - root - INFO - lr: 2.8570e-05 gnorm: 1.04 [12:00:39<12:30:04] +[titan] 2025-10-05 10:35:00,251 - root - INFO - step: 19605 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9248 +[titan] 2025-10-05 10:35:00,251 - root - INFO - lr: 2.8561e-05 gnorm: 1.04 [12:00:50<12:29:53] +[titan] 2025-10-05 10:35:11,113 - root - INFO - step: 19610 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9104 +[titan] 2025-10-05 10:35:11,113 - root - INFO - lr: 2.8552e-05 gnorm: 1.09 [12:01:01<12:29:41] +[titan] 2025-10-05 10:35:21,983 - root - INFO - step: 19615 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 10:35:21,983 - root - INFO - lr: 2.8543e-05 gnorm: 1.05 [12:01:11<12:29:30] +[titan] 2025-10-05 10:35:32,879 - root - INFO - step: 19620 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 10:35:32,879 - root - INFO - lr: 2.8534e-05 gnorm: 1.11 [12:01:22<12:29:19] +[titan] 2025-10-05 10:35:43,764 - root - INFO - step: 19625 loss: 2.1033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:35:43,764 - root - INFO - lr: 2.8525e-05 gnorm: 1.05 [12:01:33<12:29:08] +[titan] 2025-10-05 10:35:54,636 - root - INFO - step: 19630 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 10:35:54,637 - root - INFO - lr: 2.8516e-05 gnorm: 1.10 [12:01:44<12:28:57] +[titan] 2025-10-05 10:36:05,532 - root - INFO - step: 19635 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 10:36:05,532 - root - INFO - lr: 2.8507e-05 gnorm: 1.02 [12:01:55<12:28:45] +[titan] 2025-10-05 10:36:16,411 - root - INFO - step: 19640 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 10:36:16,411 - root - INFO - lr: 2.8498e-05 gnorm: 1.05 [12:02:06<12:28:34] +[titan] 2025-10-05 10:36:27,270 - root - INFO - step: 19645 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 10:36:27,270 - root - INFO - lr: 2.8489e-05 gnorm: 1.06 [12:02:17<12:28:23] +[titan] 2025-10-05 10:36:35,979 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:36:38,157 - root - INFO - step: 19650 loss: 2.0890 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8470 +[titan] 2025-10-05 10:36:38,157 - root - INFO - lr: 2.8481e-05 gnorm: 1.01 [12:02:28<12:28:12] +[titan] 2025-10-05 10:36:49,018 - root - INFO - step: 19655 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 10:36:49,019 - root - INFO - lr: 2.8472e-05 gnorm: 1.07 [12:02:38<12:28:01] +[titan] 2025-10-05 10:36:59,878 - root - INFO - step: 19660 loss: 2.2289 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 10:36:59,879 - root - INFO - lr: 2.8463e-05 gnorm: 1.08 [12:02:49<12:27:49] +[titan] 2025-10-05 10:37:10,783 - root - INFO - step: 19665 loss: 2.1435 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8951 +[titan] 2025-10-05 10:37:10,783 - root - INFO - lr: 2.8454e-05 gnorm: 1.08 [12:03:00<12:27:38] +[titan] 2025-10-05 10:37:21,656 - root - INFO - step: 19670 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8722 +[titan] 2025-10-05 10:37:21,657 - root - INFO - lr: 2.8445e-05 gnorm: 1.08 [12:03:11<12:27:27] +[titan] 2025-10-05 10:37:32,556 - root - INFO - step: 19675 loss: 2.2272 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9633 +[titan] 2025-10-05 10:37:32,556 - root - INFO - lr: 2.8436e-05 gnorm: 1.12 [12:03:22<12:27:16] +[titan] 2025-10-05 10:37:43,429 - root - INFO - step: 19680 loss: 2.1453 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:37:43,429 - root - INFO - lr: 2.8427e-05 gnorm: 1.06 [12:03:33<12:27:05] +[titan] 2025-10-05 10:37:54,290 - root - INFO - step: 19685 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 10:37:54,290 - root - INFO - lr: 2.8418e-05 gnorm: 1.10 [12:03:44<12:26:54] +[titan] 2025-10-05 10:38:05,157 - root - INFO - step: 19690 loss: 2.1517 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:38:05,157 - root - INFO - lr: 2.8409e-05 gnorm: 1.07 [12:03:55<12:26:42] +[titan] 2025-10-05 10:38:16,025 - root - INFO - step: 19695 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 10:38:16,025 - root - INFO - lr: 2.8400e-05 gnorm: 1.11 [12:04:05<12:26:31] +[titan] 2025-10-05 10:38:24,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:38:26,916 - root - INFO - step: 19700 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:38:26,916 - root - INFO - lr: 2.8391e-05 gnorm: 1.06 [12:04:16<12:26:20] +[titan] 2025-10-05 10:38:37,813 - root - INFO - step: 19705 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 10:38:37,813 - root - INFO - lr: 2.8382e-05 gnorm: 1.04 [12:04:27<12:26:09] +[titan] 2025-10-05 10:38:48,686 - root - INFO - step: 19710 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8558 +[titan] 2025-10-05 10:38:48,686 - root - INFO - lr: 2.8374e-05 gnorm: 1.10 [12:04:38<12:25:58] +[titan] 2025-10-05 10:38:59,549 - root - INFO - step: 19715 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 10:38:59,549 - root - INFO - lr: 2.8365e-05 gnorm: 1.07 [12:04:49<12:25:46] +[titan] 2025-10-05 10:39:10,404 - root - INFO - step: 19720 loss: 2.2251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 10:39:10,404 - root - INFO - lr: 2.8356e-05 gnorm: 1.05 [12:05:00<12:25:35] +[titan] 2025-10-05 10:39:21,281 - root - INFO - step: 19725 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8864 +[titan] 2025-10-05 10:39:21,281 - root - INFO - lr: 2.8347e-05 gnorm: 1.06 [12:05:11<12:25:24] +[titan] 2025-10-05 10:39:32,209 - root - INFO - step: 19730 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9186 +[titan] 2025-10-05 10:39:32,209 - root - INFO - lr: 2.8338e-05 gnorm: 1.05 [12:05:22<12:25:13] +[titan] 2025-10-05 10:39:43,074 - root - INFO - step: 19735 loss: 2.1410 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8919 +[titan] 2025-10-05 10:39:43,074 - root - INFO - lr: 2.8329e-05 gnorm: 1.09 [12:05:33<12:25:02] +[titan] 2025-10-05 10:39:53,944 - root - INFO - step: 19740 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9376 +[titan] 2025-10-05 10:39:53,944 - root - INFO - lr: 2.8320e-05 gnorm: 1.05 [12:05:43<12:24:50] +[titan] 2025-10-05 10:40:04,859 - root - INFO - step: 19745 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:40:04,859 - root - INFO - lr: 2.8311e-05 gnorm: 1.04 [12:05:54<12:24:39] +[titan] 2025-10-05 10:40:13,560 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:40:15,748 - root - INFO - step: 19750 loss: 2.1520 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:40:15,748 - root - INFO - lr: 2.8302e-05 gnorm: 1.04 [12:06:05<12:24:28] +[titan] 2025-10-05 10:40:26,639 - root - INFO - step: 19755 loss: 2.1342 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8866 +[titan] 2025-10-05 10:40:26,639 - root - INFO - lr: 2.8293e-05 gnorm: 1.04 [12:06:16<12:24:17] +[titan] 2025-10-05 10:40:37,586 - root - INFO - step: 19760 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.28 mfu: 41.99% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:40:37,587 - root - INFO - lr: 2.8284e-05 gnorm: 1.06 [12:06:27<12:24:06] +[titan] 2025-10-05 10:40:48,488 - root - INFO - step: 19765 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8834 +[titan] 2025-10-05 10:40:48,489 - root - INFO - lr: 2.8275e-05 gnorm: 1.06 [12:06:38<12:23:55] +[titan] 2025-10-05 10:40:59,376 - root - INFO - step: 19770 loss: 2.2031 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 10:40:59,377 - root - INFO - lr: 2.8266e-05 gnorm: 1.07 [12:06:49<12:23:44] +[titan] 2025-10-05 10:41:10,261 - root - INFO - step: 19775 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 10:41:10,261 - root - INFO - lr: 2.8258e-05 gnorm: 1.05 [12:07:00<12:23:32] +[titan] 2025-10-05 10:41:21,161 - root - INFO - step: 19780 loss: 2.2202 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 10:41:21,161 - root - INFO - lr: 2.8249e-05 gnorm: 1.08 [12:07:11<12:23:21] +[titan] 2025-10-05 10:41:32,049 - root - INFO - step: 19785 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:41:32,049 - root - INFO - lr: 2.8240e-05 gnorm: 1.07 [12:07:22<12:23:10] +[titan] 2025-10-05 10:41:42,943 - root - INFO - step: 19790 loss: 2.0669 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 10:41:42,943 - root - INFO - lr: 2.8231e-05 gnorm: 1.04 [12:07:32<12:22:59] +[titan] 2025-10-05 10:41:53,847 - root - INFO - step: 19795 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 10:41:53,847 - root - INFO - lr: 2.8222e-05 gnorm: 1.09 [12:07:43<12:22:48] +[titan] 2025-10-05 10:42:02,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:42:04,726 - root - INFO - step: 19800 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9124 +[titan] 2025-10-05 10:42:04,726 - root - INFO - lr: 2.8213e-05 gnorm: 1.07 [12:07:54<12:22:36] +[titan] 2025-10-05 10:42:15,602 - root - INFO - step: 19805 loss: 2.1292 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8809 +[titan] 2025-10-05 10:42:15,602 - root - INFO - lr: 2.8204e-05 gnorm: 1.06 [12:08:05<12:22:25] +[titan] 2025-10-05 10:42:26,476 - root - INFO - step: 19810 loss: 2.1988 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:42:26,476 - root - INFO - lr: 2.8195e-05 gnorm: 1.07 [12:08:16<12:22:14] +[titan] 2025-10-05 10:42:37,355 - root - INFO - step: 19815 loss: 2.1111 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 10:42:37,355 - root - INFO - lr: 2.8186e-05 gnorm: 1.08 [12:08:27<12:22:03] +[titan] 2025-10-05 10:42:48,237 - root - INFO - step: 19820 loss: 2.1257 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8783 +[titan] 2025-10-05 10:42:48,237 - root - INFO - lr: 2.8177e-05 gnorm: 1.07 [12:08:38<12:21:52] +[titan] 2025-10-05 10:42:59,141 - root - INFO - step: 19825 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:42:59,142 - root - INFO - lr: 2.8168e-05 gnorm: 1.06 [12:08:49<12:21:41] +[titan] 2025-10-05 10:43:09,994 - root - INFO - step: 19830 loss: 2.1713 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9190 +[titan] 2025-10-05 10:43:09,994 - root - INFO - lr: 2.8159e-05 gnorm: 1.12 [12:08:59<12:21:29] +[titan] 2025-10-05 10:43:20,854 - root - INFO - step: 19835 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9251 +[titan] 2025-10-05 10:43:20,854 - root - INFO - lr: 2.8151e-05 gnorm: 1.09 [12:09:10<12:21:18] +[titan] 2025-10-05 10:43:31,720 - root - INFO - step: 19840 loss: 2.1270 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:43:31,720 - root - INFO - lr: 2.8142e-05 gnorm: 1.04 [12:09:21<12:21:07] +[titan] 2025-10-05 10:43:42,583 - root - INFO - step: 19845 loss: 2.1653 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9125 +[titan] 2025-10-05 10:43:42,583 - root - INFO - lr: 2.8133e-05 gnorm: 1.03 [12:09:32<12:20:56] +[titan] 2025-10-05 10:43:51,290 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:43:53,475 - root - INFO - step: 19850 loss: 2.1376 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 10:43:53,475 - root - INFO - lr: 2.8124e-05 gnorm: 1.05 [12:09:43<12:20:45] +[titan] 2025-10-05 10:44:04,341 - root - INFO - step: 19855 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 10:44:04,341 - root - INFO - lr: 2.8115e-05 gnorm: 1.09 [12:09:54<12:20:33] +[titan] 2025-10-05 10:44:15,250 - root - INFO - step: 19860 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9089 +[titan] 2025-10-05 10:44:15,250 - root - INFO - lr: 2.8106e-05 gnorm: 1.09 [12:10:05<12:20:22] +[titan] 2025-10-05 10:44:26,122 - root - INFO - step: 19865 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 10:44:26,123 - root - INFO - lr: 2.8097e-05 gnorm: 1.06 [12:10:16<12:20:11] +[titan] 2025-10-05 10:44:37,015 - root - INFO - step: 19870 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 10:44:37,015 - root - INFO - lr: 2.8088e-05 gnorm: 1.07 [12:10:26<12:20:00] +[titan] 2025-10-05 10:44:47,890 - root - INFO - step: 19875 loss: 2.1479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8987 +[titan] 2025-10-05 10:44:47,890 - root - INFO - lr: 2.8079e-05 gnorm: 1.11 [12:10:37<12:19:49] +[titan] 2025-10-05 10:44:58,757 - root - INFO - step: 19880 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 10:44:58,757 - root - INFO - lr: 2.8070e-05 gnorm: 1.06 [12:10:48<12:19:38] +[titan] 2025-10-05 10:45:09,633 - root - INFO - step: 19885 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:45:09,633 - root - INFO - lr: 2.8061e-05 gnorm: 1.08 [12:10:59<12:19:26] +[titan] 2025-10-05 10:45:20,533 - root - INFO - step: 19890 loss: 2.1170 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:45:20,533 - root - INFO - lr: 2.8052e-05 gnorm: 1.07 [12:11:10<12:19:15] +[titan] 2025-10-05 10:45:31,359 - root - INFO - step: 19895 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:45:31,360 - root - INFO - lr: 2.8043e-05 gnorm: 1.10 [12:11:21<12:19:04] +[titan] 2025-10-05 10:45:40,050 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:45:42,231 - root - INFO - step: 19900 loss: 2.1514 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 10:45:42,231 - root - INFO - lr: 2.8035e-05 gnorm: 1.08 [12:11:32<12:18:53] +[titan] 2025-10-05 10:45:53,088 - root - INFO - step: 19905 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 10:45:53,088 - root - INFO - lr: 2.8026e-05 gnorm: 1.03 [12:11:43<12:18:42] +[titan] 2025-10-05 10:46:03,910 - root - INFO - step: 19910 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 10:46:03,910 - root - INFO - lr: 2.8017e-05 gnorm: 1.08 [12:11:53<12:18:30] +[titan] 2025-10-05 10:46:14,770 - root - INFO - step: 19915 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 10:46:14,771 - root - INFO - lr: 2.8008e-05 gnorm: 1.08 [12:12:04<12:18:19] +[titan] 2025-10-05 10:46:25,652 - root - INFO - step: 19920 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:46:25,652 - root - INFO - lr: 2.7999e-05 gnorm: 1.09 [12:12:15<12:18:08] +[titan] 2025-10-05 10:46:36,496 - root - INFO - step: 19925 loss: 2.2094 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 10:46:36,497 - root - INFO - lr: 2.7990e-05 gnorm: 1.06 [12:12:26<12:17:57] +[titan] 2025-10-05 10:46:47,345 - root - INFO - step: 19930 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 10:46:47,345 - root - INFO - lr: 2.7981e-05 gnorm: 1.11 [12:12:37<12:17:46] +[titan] 2025-10-05 10:46:58,221 - root - INFO - step: 19935 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 10:46:58,221 - root - INFO - lr: 2.7972e-05 gnorm: 1.05 [12:12:48<12:17:34] +[titan] 2025-10-05 10:47:09,102 - root - INFO - step: 19940 loss: 2.1225 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 10:47:09,102 - root - INFO - lr: 2.7963e-05 gnorm: 1.05 [12:12:59<12:17:23] +[titan] 2025-10-05 10:47:19,968 - root - INFO - step: 19945 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8581 +[titan] 2025-10-05 10:47:19,968 - root - INFO - lr: 2.7954e-05 gnorm: 1.09 [12:13:09<12:17:12] +[titan] 2025-10-05 10:47:28,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:47:30,850 - root - INFO - step: 19950 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8703 +[titan] 2025-10-05 10:47:30,850 - root - INFO - lr: 2.7945e-05 gnorm: 1.07 [12:13:20<12:17:01] +[titan] 2025-10-05 10:47:41,822 - root - INFO - step: 19955 loss: 2.1253 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8777 +[titan] 2025-10-05 10:47:41,822 - root - INFO - lr: 2.7936e-05 gnorm: 1.09 [12:13:31<12:16:50] +[titan] 2025-10-05 10:47:52,686 - root - INFO - step: 19960 loss: 2.1316 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:47:52,686 - root - INFO - lr: 2.7927e-05 gnorm: 1.11 [12:13:42<12:16:39] +[titan] 2025-10-05 10:48:03,639 - root - INFO - step: 19965 loss: 2.1229 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8756 +[titan] 2025-10-05 10:48:03,639 - root - INFO - lr: 2.7919e-05 gnorm: 1.08 [12:13:53<12:16:27] +[titan] 2025-10-05 10:48:10,352 - root - INFO - Dumping profiler traces at step 19968 +[titan] 2025-10-05 10:48:10,388 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:48:14,746 - root - INFO - step: 19970 loss: 2.1632 memory: 118.84GiB(85.28%) tps: 29,504 tflops: 409.32 mfu: 41.39% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9112 +[titan] 2025-10-05 10:48:14,746 - root - INFO - lr: 2.7910e-05 gnorm: 1.01 [12:14:04<12:16:17] +[titan] 2025-10-05 10:48:25,610 - root - INFO - step: 19975 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 10:48:25,610 - root - INFO - lr: 2.7901e-05 gnorm: 1.06 [12:14:15<12:16:05] +[titan] 2025-10-05 10:48:36,506 - root - INFO - step: 19980 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 10:48:36,507 - root - INFO - lr: 2.7892e-05 gnorm: 1.07 [12:14:26<12:15:54] +[titan] 2025-10-05 10:48:47,491 - root - INFO - step: 19985 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,831 tflops: 413.87 mfu: 41.85% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:48:47,491 - root - INFO - lr: 2.7883e-05 gnorm: 1.06 [12:14:37<12:15:43] +[titan] 2025-10-05 10:48:58,374 - root - INFO - step: 19990 loss: 2.1671 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:48:58,374 - root - INFO - lr: 2.7874e-05 gnorm: 1.08 [12:14:48<12:15:32] +[titan] 2025-10-05 10:49:09,251 - root - INFO - step: 19995 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:49:09,251 - root - INFO - lr: 2.7865e-05 gnorm: 1.06 [12:14:59<12:15:21] +[titan] 2025-10-05 10:49:17,928 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:49:20,117 - root - INFO - step: 20000 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8613 +[titan] 2025-10-05 10:49:20,118 - root - INFO - lr: 2.7856e-05 gnorm: 1.09 [12:15:10<12:15:10] +[titan] 2025-10-05 10:49:20,118 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 10:49:39,406 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 10:49:39,406 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.29 seconds. +[titan] 2025-10-05 10:51:35,525 - root - INFO - step: 20005 loss: 2.1785 memory: 118.84GiB(85.28%) tps: 2,420 tflops: 33.57 mfu: 3.39% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 10:51:35,525 - root - INFO - lr: 2.7847e-05 gnorm: 1.02 [12:17:25<12:17:03] +[titan] 2025-10-05 10:51:46,302 - root - INFO - step: 20010 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9317 +[titan] 2025-10-05 10:51:46,302 - root - INFO - lr: 2.7838e-05 gnorm: 1.08 [12:17:36<12:16:52] +[titan] 2025-10-05 10:51:57,112 - root - INFO - step: 20015 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 10:51:57,112 - root - INFO - lr: 2.7829e-05 gnorm: 1.07 [12:17:47<12:16:40] +[titan] 2025-10-05 10:52:07,924 - root - INFO - step: 20020 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,307 tflops: 420.47 mfu: 42.51% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8728 +[titan] 2025-10-05 10:52:07,925 - root - INFO - lr: 2.7820e-05 gnorm: 1.05 [12:17:57<12:16:29] +[titan] 2025-10-05 10:52:18,739 - root - INFO - step: 20025 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 10:52:18,739 - root - INFO - lr: 2.7811e-05 gnorm: 1.08 [12:18:08<12:16:18] +[titan] 2025-10-05 10:52:29,561 - root - INFO - step: 20030 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 10:52:29,562 - root - INFO - lr: 2.7803e-05 gnorm: 1.05 [12:18:19<12:16:06] +[titan] 2025-10-05 10:52:40,397 - root - INFO - step: 20035 loss: 2.1681 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:52:40,397 - root - INFO - lr: 2.7794e-05 gnorm: 1.09 [12:18:30<12:15:55] +[titan] 2025-10-05 10:52:51,270 - root - INFO - step: 20040 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:52:51,270 - root - INFO - lr: 2.7785e-05 gnorm: 1.08 [12:18:41<12:15:44] +[titan] 2025-10-05 10:53:02,099 - root - INFO - step: 20045 loss: 2.1535 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 10:53:02,099 - root - INFO - lr: 2.7776e-05 gnorm: 1.06 [12:18:52<12:15:32] +[titan] 2025-10-05 10:53:10,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:53:12,953 - root - INFO - step: 20050 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:53:12,953 - root - INFO - lr: 2.7767e-05 gnorm: 1.06 [12:19:02<12:15:21] +[titan] 2025-10-05 10:53:23,781 - root - INFO - step: 20055 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8880 +[titan] 2025-10-05 10:53:23,781 - root - INFO - lr: 2.7758e-05 gnorm: 1.07 [12:19:13<12:15:10] +[titan] 2025-10-05 10:53:34,615 - root - INFO - step: 20060 loss: 2.2260 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 10:53:34,615 - root - INFO - lr: 2.7749e-05 gnorm: 1.08 [12:19:24<12:14:59] +[titan] 2025-10-05 10:53:45,482 - root - INFO - step: 20065 loss: 2.1538 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9039 +[titan] 2025-10-05 10:53:45,482 - root - INFO - lr: 2.7740e-05 gnorm: 1.07 [12:19:35<12:14:47] +[titan] 2025-10-05 10:53:56,339 - root - INFO - step: 20070 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 10:53:56,339 - root - INFO - lr: 2.7731e-05 gnorm: 1.04 [12:19:46<12:14:36] +[titan] 2025-10-05 10:54:07,188 - root - INFO - step: 20075 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 10:54:07,188 - root - INFO - lr: 2.7722e-05 gnorm: 1.06 [12:19:57<12:14:25] +[titan] 2025-10-05 10:54:18,059 - root - INFO - step: 20080 loss: 2.1485 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:54:18,059 - root - INFO - lr: 2.7713e-05 gnorm: 1.06 [12:20:07<12:14:14] +[titan] 2025-10-05 10:54:28,894 - root - INFO - step: 20085 loss: 2.2267 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9673 +[titan] 2025-10-05 10:54:28,894 - root - INFO - lr: 2.7704e-05 gnorm: 1.85 [12:20:18<12:14:02] +[titan] 2025-10-05 10:54:39,760 - root - INFO - step: 20090 loss: 2.1383 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 10:54:39,760 - root - INFO - lr: 2.7695e-05 gnorm: 1.09 [12:20:29<12:13:51] +[titan] 2025-10-05 10:54:50,700 - root - INFO - step: 20095 loss: 2.1379 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8897 +[titan] 2025-10-05 10:54:50,700 - root - INFO - lr: 2.7687e-05 gnorm: 1.04 [12:20:40<12:13:40] +[titan] 2025-10-05 10:54:59,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:55:01,599 - root - INFO - step: 20100 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:55:01,599 - root - INFO - lr: 2.7678e-05 gnorm: 1.11 [12:20:51<12:13:29] +[titan] 2025-10-05 10:55:12,449 - root - INFO - step: 20105 loss: 2.1710 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 10:55:12,449 - root - INFO - lr: 2.7669e-05 gnorm: 1.03 [12:21:02<12:13:17] +[titan] 2025-10-05 10:55:23,313 - root - INFO - step: 20110 loss: 2.0931 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 10:55:23,313 - root - INFO - lr: 2.7660e-05 gnorm: 1.04 [12:21:13<12:13:06] +[titan] 2025-10-05 10:55:34,176 - root - INFO - step: 20115 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 10:55:34,176 - root - INFO - lr: 2.7651e-05 gnorm: 1.05 [12:21:24<12:12:55] +[titan] 2025-10-05 10:55:45,039 - root - INFO - step: 20120 loss: 2.1203 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 10:55:45,039 - root - INFO - lr: 2.7642e-05 gnorm: 1.06 [12:21:34<12:12:44] +[titan] 2025-10-05 10:55:55,943 - root - INFO - step: 20125 loss: 2.1150 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8697 +[titan] 2025-10-05 10:55:55,943 - root - INFO - lr: 2.7633e-05 gnorm: 1.05 [12:21:45<12:12:32] +[titan] 2025-10-05 10:56:06,800 - root - INFO - step: 20130 loss: 2.1880 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 10:56:06,800 - root - INFO - lr: 2.7624e-05 gnorm: 1.08 [12:21:56<12:12:21] +[titan] 2025-10-05 10:56:17,695 - root - INFO - step: 20135 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8794 +[titan] 2025-10-05 10:56:17,696 - root - INFO - lr: 2.7615e-05 gnorm: 1.08 [12:22:07<12:12:10] +[titan] 2025-10-05 10:56:28,544 - root - INFO - step: 20140 loss: 2.1589 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9087 +[titan] 2025-10-05 10:56:28,544 - root - INFO - lr: 2.7606e-05 gnorm: 1.04 [12:22:18<12:11:59] +[titan] 2025-10-05 10:56:39,421 - root - INFO - step: 20145 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8560 +[titan] 2025-10-05 10:56:39,422 - root - INFO - lr: 2.7597e-05 gnorm: 1.08 [12:22:29<12:11:48] +[titan] 2025-10-05 10:56:48,102 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:56:50,277 - root - INFO - step: 20150 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:56:50,277 - root - INFO - lr: 2.7588e-05 gnorm: 1.05 [12:22:40<12:11:36] +[titan] 2025-10-05 10:57:01,154 - root - INFO - step: 20155 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 10:57:01,155 - root - INFO - lr: 2.7579e-05 gnorm: 1.09 [12:22:51<12:11:25] +[titan] 2025-10-05 10:57:12,015 - root - INFO - step: 20160 loss: 2.1842 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 10:57:12,015 - root - INFO - lr: 2.7571e-05 gnorm: 1.05 [12:23:01<12:11:14] +[titan] 2025-10-05 10:57:22,907 - root - INFO - step: 20165 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 10:57:22,907 - root - INFO - lr: 2.7562e-05 gnorm: 1.05 [12:23:12<12:11:03] +[titan] 2025-10-05 10:57:33,769 - root - INFO - step: 20170 loss: 2.1734 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9209 +[titan] 2025-10-05 10:57:33,769 - root - INFO - lr: 2.7553e-05 gnorm: 1.10 [12:23:23<12:10:51] +[titan] 2025-10-05 10:57:44,629 - root - INFO - step: 20175 loss: 2.1616 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:57:44,629 - root - INFO - lr: 2.7544e-05 gnorm: 1.10 [12:23:34<12:10:40] +[titan] 2025-10-05 10:57:55,575 - root - INFO - step: 20180 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 10:57:55,575 - root - INFO - lr: 2.7535e-05 gnorm: 1.09 [12:23:45<12:10:29] +[titan] 2025-10-05 10:58:06,449 - root - INFO - step: 20185 loss: 2.0747 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 10:58:06,449 - root - INFO - lr: 2.7526e-05 gnorm: 1.09 [12:23:56<12:10:18] +[titan] 2025-10-05 10:58:17,339 - root - INFO - step: 20190 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 10:58:17,339 - root - INFO - lr: 2.7517e-05 gnorm: 1.11 [12:24:07<12:10:06] +[titan] 2025-10-05 10:58:28,224 - root - INFO - step: 20195 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 10:58:28,224 - root - INFO - lr: 2.7508e-05 gnorm: 1.09 [12:24:18<12:09:55] +[titan] 2025-10-05 10:58:36,913 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:58:39,105 - root - INFO - step: 20200 loss: 2.1272 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 10:58:39,105 - root - INFO - lr: 2.7499e-05 gnorm: 1.10 [12:24:29<12:09:44] +[titan] 2025-10-05 10:58:49,983 - root - INFO - step: 20205 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9320 +[titan] 2025-10-05 10:58:49,983 - root - INFO - lr: 2.7490e-05 gnorm: 1.10 [12:24:39<12:09:33] +[titan] 2025-10-05 10:59:00,935 - root - INFO - step: 20210 loss: 2.0945 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 10:59:00,935 - root - INFO - lr: 2.7481e-05 gnorm: 1.07 [12:24:50<12:09:22] +[titan] 2025-10-05 10:59:11,794 - root - INFO - step: 20215 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:59:11,794 - root - INFO - lr: 2.7472e-05 gnorm: 1.08 [12:25:01<12:09:10] +[titan] 2025-10-05 10:59:22,679 - root - INFO - step: 20220 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9899 +[titan] 2025-10-05 10:59:22,679 - root - INFO - lr: 2.7463e-05 gnorm: 1.09 [12:25:12<12:08:59] +[titan] 2025-10-05 10:59:33,536 - root - INFO - step: 20225 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 10:59:33,536 - root - INFO - lr: 2.7454e-05 gnorm: 1.10 [12:25:23<12:08:48] +[titan] 2025-10-05 10:59:44,381 - root - INFO - step: 20230 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 10:59:44,381 - root - INFO - lr: 2.7446e-05 gnorm: 1.07 [12:25:34<12:08:37] +[titan] 2025-10-05 10:59:55,275 - root - INFO - step: 20235 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8518 +[titan] 2025-10-05 10:59:55,275 - root - INFO - lr: 2.7437e-05 gnorm: 1.10 [12:25:45<12:08:25] +[titan] 2025-10-05 11:00:06,163 - root - INFO - step: 20240 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8998 +[titan] 2025-10-05 11:00:06,164 - root - INFO - lr: 2.7428e-05 gnorm: 1.09 [12:25:56<12:08:14] +[titan] 2025-10-05 11:00:17,040 - root - INFO - step: 20245 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 11:00:17,040 - root - INFO - lr: 2.7419e-05 gnorm: 1.06 [12:26:06<12:08:03] +[titan] 2025-10-05 11:00:25,720 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:00:27,899 - root - INFO - step: 20250 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 11:00:27,900 - root - INFO - lr: 2.7410e-05 gnorm: 1.06 [12:26:17<12:07:52] +[titan] 2025-10-05 11:00:38,739 - root - INFO - step: 20255 loss: 2.1856 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 11:00:38,739 - root - INFO - lr: 2.7401e-05 gnorm: 1.07 [12:26:28<12:07:40] +[titan] 2025-10-05 11:00:49,595 - root - INFO - step: 20260 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 11:00:49,595 - root - INFO - lr: 2.7392e-05 gnorm: 1.05 [12:26:39<12:07:29] +[titan] 2025-10-05 11:01:00,505 - root - INFO - step: 20265 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 11:01:00,505 - root - INFO - lr: 2.7383e-05 gnorm: 1.05 [12:26:50<12:07:18] +[titan] 2025-10-05 11:01:11,382 - root - INFO - step: 20270 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8446 +[titan] 2025-10-05 11:01:11,382 - root - INFO - lr: 2.7374e-05 gnorm: 1.08 [12:27:01<12:07:07] +[titan] 2025-10-05 11:01:22,284 - root - INFO - step: 20275 loss: 2.1344 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:01:22,284 - root - INFO - lr: 2.7365e-05 gnorm: 1.10 [12:27:12<12:06:56] +[titan] 2025-10-05 11:01:33,138 - root - INFO - step: 20280 loss: 2.1211 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:01:33,138 - root - INFO - lr: 2.7356e-05 gnorm: 1.03 [12:27:23<12:06:44] +[titan] 2025-10-05 11:01:44,002 - root - INFO - step: 20285 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:01:44,002 - root - INFO - lr: 2.7347e-05 gnorm: 1.05 [12:27:33<12:06:33] +[titan] 2025-10-05 11:01:54,890 - root - INFO - step: 20290 loss: 2.1434 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 11:01:54,890 - root - INFO - lr: 2.7338e-05 gnorm: 1.08 [12:27:44<12:06:22] +[titan] 2025-10-05 11:02:06,133 - root - INFO - step: 20295 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 29,148 tflops: 404.38 mfu: 40.89% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 11:02:06,133 - root - INFO - lr: 2.7330e-05 gnorm: 1.06 [12:27:56<12:06:11] +[titan] 2025-10-05 11:02:14,822 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:02:17,010 - root - INFO - step: 20300 loss: 2.1482 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 11:02:17,010 - root - INFO - lr: 2.7321e-05 gnorm: 1.33 [12:28:06<12:06:00] +[titan] 2025-10-05 11:02:27,926 - root - INFO - step: 20305 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 11:02:27,927 - root - INFO - lr: 2.7312e-05 gnorm: 1.05 [12:28:17<12:05:49] +[titan] 2025-10-05 11:02:38,794 - root - INFO - step: 20310 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8580 +[titan] 2025-10-05 11:02:38,794 - root - INFO - lr: 2.7303e-05 gnorm: 1.02 [12:28:28<12:05:37] +[titan] 2025-10-05 11:02:49,655 - root - INFO - step: 20315 loss: 2.1038 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:02:49,655 - root - INFO - lr: 2.7294e-05 gnorm: 1.06 [12:28:39<12:05:26] +[titan] 2025-10-05 11:03:00,551 - root - INFO - step: 20320 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 11:03:00,551 - root - INFO - lr: 2.7285e-05 gnorm: 1.07 [12:28:50<12:05:15] +[titan] 2025-10-05 11:03:11,416 - root - INFO - step: 20325 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9040 +[titan] 2025-10-05 11:03:11,417 - root - INFO - lr: 2.7276e-05 gnorm: 1.04 [12:29:01<12:05:04] +[titan] 2025-10-05 11:03:22,259 - root - INFO - step: 20330 loss: 2.1001 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8555 +[titan] 2025-10-05 11:03:22,259 - root - INFO - lr: 2.7267e-05 gnorm: 1.07 [12:29:12<12:04:52] +[titan] 2025-10-05 11:03:33,113 - root - INFO - step: 20335 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8112 +[titan] 2025-10-05 11:03:33,113 - root - INFO - lr: 2.7258e-05 gnorm: 1.06 [12:29:23<12:04:41] +[titan] 2025-10-05 11:03:44,014 - root - INFO - step: 20340 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 11:03:44,014 - root - INFO - lr: 2.7249e-05 gnorm: 1.02 [12:29:33<12:04:30] +[titan] 2025-10-05 11:03:54,889 - root - INFO - step: 20345 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9095 +[titan] 2025-10-05 11:03:54,889 - root - INFO - lr: 2.7240e-05 gnorm: 1.05 [12:29:44<12:04:19] +[titan] 2025-10-05 11:04:03,596 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:04:05,779 - root - INFO - step: 20350 loss: 2.1910 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9371 +[titan] 2025-10-05 11:04:05,779 - root - INFO - lr: 2.7231e-05 gnorm: 1.07 [12:29:55<12:04:07] +[titan] 2025-10-05 11:04:16,637 - root - INFO - step: 20355 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 11:04:16,638 - root - INFO - lr: 2.7222e-05 gnorm: 1.05 [12:30:06<12:03:56] +[titan] 2025-10-05 11:04:27,458 - root - INFO - step: 20360 loss: 2.1358 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8882 +[titan] 2025-10-05 11:04:27,458 - root - INFO - lr: 2.7214e-05 gnorm: 1.06 [12:30:17<12:03:45] +[titan] 2025-10-05 11:04:38,299 - root - INFO - step: 20365 loss: 2.1403 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 11:04:38,299 - root - INFO - lr: 2.7205e-05 gnorm: 1.10 [12:30:28<12:03:34] +[titan] 2025-10-05 11:04:49,208 - root - INFO - step: 20370 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 11:04:49,208 - root - INFO - lr: 2.7196e-05 gnorm: 1.09 [12:30:39<12:03:22] +[titan] 2025-10-05 11:05:00,089 - root - INFO - step: 20375 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:05:00,089 - root - INFO - lr: 2.7187e-05 gnorm: 1.06 [12:30:49<12:03:11] +[titan] 2025-10-05 11:05:10,946 - root - INFO - step: 20380 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:05:10,946 - root - INFO - lr: 2.7178e-05 gnorm: 1.11 [12:31:00<12:03:00] +[titan] 2025-10-05 11:05:21,800 - root - INFO - step: 20385 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:05:21,800 - root - INFO - lr: 2.7169e-05 gnorm: 1.08 [12:31:11<12:02:49] +[titan] 2025-10-05 11:05:32,664 - root - INFO - step: 20390 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 11:05:32,664 - root - INFO - lr: 2.7160e-05 gnorm: 1.05 [12:31:22<12:02:37] +[titan] 2025-10-05 11:05:43,530 - root - INFO - step: 20395 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 11:05:43,531 - root - INFO - lr: 2.7151e-05 gnorm: 1.10 [12:31:33<12:02:26] +[titan] 2025-10-05 11:05:52,200 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:05:54,413 - root - INFO - step: 20400 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 11:05:54,413 - root - INFO - lr: 2.7142e-05 gnorm: 1.05 [12:31:44<12:02:15] +[titan] 2025-10-05 11:06:05,284 - root - INFO - step: 20405 loss: 2.1600 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 11:06:05,284 - root - INFO - lr: 2.7133e-05 gnorm: 1.08 [12:31:55<12:02:04] +[titan] 2025-10-05 11:06:16,130 - root - INFO - step: 20410 loss: 2.1684 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 11:06:16,130 - root - INFO - lr: 2.7124e-05 gnorm: 1.07 [12:32:06<12:01:53] +[titan] 2025-10-05 11:06:26,974 - root - INFO - step: 20415 loss: 2.1914 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:06:26,975 - root - INFO - lr: 2.7115e-05 gnorm: 1.09 [12:32:16<12:01:41] +[titan] 2025-10-05 11:06:37,832 - root - INFO - step: 20420 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 11:06:37,832 - root - INFO - lr: 2.7106e-05 gnorm: 1.09 [12:32:27<12:01:30] +[titan] 2025-10-05 11:06:48,689 - root - INFO - step: 20425 loss: 2.1157 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 11:06:48,689 - root - INFO - lr: 2.7098e-05 gnorm: 1.08 [12:32:38<12:01:19] +[titan] 2025-10-05 11:06:59,539 - root - INFO - step: 20430 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 11:06:59,539 - root - INFO - lr: 2.7089e-05 gnorm: 1.05 [12:32:49<12:01:08] +[titan] 2025-10-05 11:07:10,461 - root - INFO - step: 20435 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 11:07:10,461 - root - INFO - lr: 2.7080e-05 gnorm: 1.06 [12:33:00<12:00:56] +[titan] 2025-10-05 11:07:21,318 - root - INFO - step: 20440 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:07:21,318 - root - INFO - lr: 2.7071e-05 gnorm: 1.07 [12:33:11<12:00:45] +[titan] 2025-10-05 11:07:32,168 - root - INFO - step: 20445 loss: 2.0912 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:07:32,168 - root - INFO - lr: 2.7062e-05 gnorm: 1.09 [12:33:22<12:00:34] +[titan] 2025-10-05 11:07:40,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:07:43,023 - root - INFO - step: 20450 loss: 2.1251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 11:07:43,023 - root - INFO - lr: 2.7053e-05 gnorm: 1.07 [12:33:32<12:00:23] +[titan] 2025-10-05 11:07:53,870 - root - INFO - step: 20455 loss: 2.1649 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 11:07:53,871 - root - INFO - lr: 2.7044e-05 gnorm: 1.07 [12:33:43<12:00:11] +[titan] 2025-10-05 11:08:04,763 - root - INFO - step: 20460 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 11:08:04,763 - root - INFO - lr: 2.7035e-05 gnorm: 1.03 [12:33:54<12:00:00] +[titan] 2025-10-05 11:08:15,662 - root - INFO - step: 20465 loss: 2.1274 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 11:08:15,662 - root - INFO - lr: 2.7026e-05 gnorm: 1.03 [12:34:05<11:59:49] +[titan] 2025-10-05 11:08:26,490 - root - INFO - step: 20470 loss: 2.1025 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8578 +[titan] 2025-10-05 11:08:26,490 - root - INFO - lr: 2.7017e-05 gnorm: 1.06 [12:34:16<11:59:38] +[titan] 2025-10-05 11:08:37,320 - root - INFO - step: 20475 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 11:08:37,321 - root - INFO - lr: 2.7008e-05 gnorm: 1.11 [12:34:27<11:59:26] +[titan] 2025-10-05 11:08:48,242 - root - INFO - step: 20480 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:08:48,242 - root - INFO - lr: 2.6999e-05 gnorm: 1.04 [12:34:38<11:59:15] +[titan] 2025-10-05 11:08:48,431 - root - INFO - Dumping profiler traces at step 20480 +[titan] 2025-10-05 11:08:48,471 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:08:59,308 - root - INFO - step: 20485 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 29,611 tflops: 410.81 mfu: 41.54% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 11:08:59,308 - root - INFO - lr: 2.6990e-05 gnorm: 1.06 [12:34:49<11:59:04] +[titan] 2025-10-05 11:09:10,168 - root - INFO - step: 20490 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.8976 +[titan] 2025-10-05 11:09:10,168 - root - INFO - lr: 2.6982e-05 gnorm: 1.06 [12:35:00<11:58:53] +[titan] 2025-10-05 11:09:21,026 - root - INFO - step: 20495 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9159 +[titan] 2025-10-05 11:09:21,027 - root - INFO - lr: 2.6973e-05 gnorm: 1.10 [12:35:10<11:58:42] +[titan] 2025-10-05 11:09:29,736 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:09:31,923 - root - INFO - step: 20500 loss: 2.0830 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 11:09:31,923 - root - INFO - lr: 2.6964e-05 gnorm: 1.09 [12:35:21<11:58:30] +[titan] 2025-10-05 11:09:42,776 - root - INFO - step: 20505 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8339 +[titan] 2025-10-05 11:09:42,776 - root - INFO - lr: 2.6955e-05 gnorm: 1.10 [12:35:32<11:58:19] +[titan] 2025-10-05 11:09:53,605 - root - INFO - step: 20510 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8968 +[titan] 2025-10-05 11:09:53,605 - root - INFO - lr: 2.6946e-05 gnorm: 1.06 [12:35:43<11:58:08] +[titan] 2025-10-05 11:10:04,473 - root - INFO - step: 20515 loss: 2.1247 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8771 +[titan] 2025-10-05 11:10:04,473 - root - INFO - lr: 2.6937e-05 gnorm: 1.06 [12:35:54<11:57:57] +[titan] 2025-10-05 11:10:15,308 - root - INFO - step: 20520 loss: 2.1987 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9434 +[titan] 2025-10-05 11:10:15,308 - root - INFO - lr: 2.6928e-05 gnorm: 1.06 [12:36:05<11:57:45] +[titan] 2025-10-05 11:10:26,169 - root - INFO - step: 20525 loss: 2.1470 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8974 +[titan] 2025-10-05 11:10:26,170 - root - INFO - lr: 2.6919e-05 gnorm: 1.04 [12:36:16<11:57:34] +[titan] 2025-10-05 11:10:37,027 - root - INFO - step: 20530 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8850 +[titan] 2025-10-05 11:10:37,027 - root - INFO - lr: 2.6910e-05 gnorm: 1.13 [12:36:26<11:57:23] +[titan] 2025-10-05 11:10:47,875 - root - INFO - step: 20535 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 11:10:47,875 - root - INFO - lr: 2.6901e-05 gnorm: 1.03 [12:36:37<11:57:12] +[titan] 2025-10-05 11:10:58,732 - root - INFO - step: 20540 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:10:58,733 - root - INFO - lr: 2.6892e-05 gnorm: 1.06 [12:36:48<11:57:01] +[titan] 2025-10-05 11:11:09,619 - root - INFO - step: 20545 loss: 2.1707 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:11:09,619 - root - INFO - lr: 2.6883e-05 gnorm: 1.10 [12:36:59<11:56:49] +[titan] 2025-10-05 11:11:18,306 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:11:20,486 - root - INFO - step: 20550 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 11:11:20,487 - root - INFO - lr: 2.6874e-05 gnorm: 2.06 [12:37:10<11:56:38] +[titan] 2025-10-05 11:11:31,328 - root - INFO - step: 20555 loss: 2.2027 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 11:11:31,329 - root - INFO - lr: 2.6866e-05 gnorm: 1.09 [12:37:21<11:56:27] +[titan] 2025-10-05 11:11:42,212 - root - INFO - step: 20560 loss: 2.0837 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 11:11:42,212 - root - INFO - lr: 2.6857e-05 gnorm: 1.05 [12:37:32<11:56:16] +[titan] 2025-10-05 11:11:53,051 - root - INFO - step: 20565 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 11:11:53,051 - root - INFO - lr: 2.6848e-05 gnorm: 1.08 [12:37:42<11:56:04] +[titan] 2025-10-05 11:12:03,886 - root - INFO - step: 20570 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 11:12:03,887 - root - INFO - lr: 2.6839e-05 gnorm: 1.14 [12:37:53<11:55:53] +[titan] 2025-10-05 11:12:14,773 - root - INFO - step: 20575 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 11:12:14,773 - root - INFO - lr: 2.6830e-05 gnorm: 1.09 [12:38:04<11:55:42] +[titan] 2025-10-05 11:12:25,620 - root - INFO - step: 20580 loss: 2.0736 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8329 +[titan] 2025-10-05 11:12:25,620 - root - INFO - lr: 2.6821e-05 gnorm: 1.09 [12:38:15<11:55:31] +[titan] 2025-10-05 11:12:36,467 - root - INFO - step: 20585 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 11:12:36,467 - root - INFO - lr: 2.6812e-05 gnorm: 1.05 [12:38:26<11:55:19] +[titan] 2025-10-05 11:12:47,318 - root - INFO - step: 20590 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 11:12:47,318 - root - INFO - lr: 2.6803e-05 gnorm: 1.07 [12:38:37<11:55:08] +[titan] 2025-10-05 11:12:58,203 - root - INFO - step: 20595 loss: 2.1151 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8687 +[titan] 2025-10-05 11:12:58,203 - root - INFO - lr: 2.6794e-05 gnorm: 1.07 [12:38:48<11:54:57] +[titan] 2025-10-05 11:13:06,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:13:09,064 - root - INFO - step: 20600 loss: 2.1894 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 11:13:09,064 - root - INFO - lr: 2.6785e-05 gnorm: 1.09 [12:38:58<11:54:46] +[titan] 2025-10-05 11:13:19,929 - root - INFO - step: 20605 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 11:13:19,929 - root - INFO - lr: 2.6776e-05 gnorm: 1.07 [12:39:09<11:54:34] +[titan] 2025-10-05 11:13:30,796 - root - INFO - step: 20610 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 11:13:30,796 - root - INFO - lr: 2.6767e-05 gnorm: 1.06 [12:39:20<11:54:23] +[titan] 2025-10-05 11:13:41,654 - root - INFO - step: 20615 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8398 +[titan] 2025-10-05 11:13:41,654 - root - INFO - lr: 2.6758e-05 gnorm: 1.03 [12:39:31<11:54:12] +[titan] 2025-10-05 11:13:52,508 - root - INFO - step: 20620 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 11:13:52,508 - root - INFO - lr: 2.6750e-05 gnorm: 1.06 [12:39:42<11:54:01] +[titan] 2025-10-05 11:14:03,381 - root - INFO - step: 20625 loss: 2.1197 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 11:14:03,381 - root - INFO - lr: 2.6741e-05 gnorm: 1.06 [12:39:53<11:53:50] +[titan] 2025-10-05 11:14:14,251 - root - INFO - step: 20630 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:14:14,251 - root - INFO - lr: 2.6732e-05 gnorm: 1.06 [12:40:04<11:53:38] +[titan] 2025-10-05 11:14:25,097 - root - INFO - step: 20635 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:14:25,098 - root - INFO - lr: 2.6723e-05 gnorm: 1.08 [12:40:14<11:53:27] +[titan] 2025-10-05 11:14:35,947 - root - INFO - step: 20640 loss: 2.0980 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8548 +[titan] 2025-10-05 11:14:35,947 - root - INFO - lr: 2.6714e-05 gnorm: 1.09 [12:40:25<11:53:16] +[titan] 2025-10-05 11:14:46,798 - root - INFO - step: 20645 loss: 2.1242 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8770 +[titan] 2025-10-05 11:14:46,799 - root - INFO - lr: 2.6705e-05 gnorm: 1.09 [12:40:36<11:53:05] +[titan] 2025-10-05 11:14:55,473 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:14:57,653 - root - INFO - step: 20650 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:14:57,653 - root - INFO - lr: 2.6696e-05 gnorm: 1.08 [12:40:47<11:52:53] +[titan] 2025-10-05 11:15:08,530 - root - INFO - step: 20655 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 11:15:08,530 - root - INFO - lr: 2.6687e-05 gnorm: 1.08 [12:40:58<11:52:42] +[titan] 2025-10-05 11:15:19,423 - root - INFO - step: 20660 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 11:15:19,423 - root - INFO - lr: 2.6678e-05 gnorm: 1.15 [12:41:09<11:52:31] +[titan] 2025-10-05 11:15:30,279 - root - INFO - step: 20665 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:15:30,279 - root - INFO - lr: 2.6669e-05 gnorm: 1.06 [12:41:20<11:52:20] +[titan] 2025-10-05 11:15:41,155 - root - INFO - step: 20670 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9037 +[titan] 2025-10-05 11:15:41,156 - root - INFO - lr: 2.6660e-05 gnorm: 1.05 [12:41:31<11:52:08] +[titan] 2025-10-05 11:15:52,007 - root - INFO - step: 20675 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 11:15:52,007 - root - INFO - lr: 2.6651e-05 gnorm: 1.04 [12:41:41<11:51:57] +[titan] 2025-10-05 11:16:02,840 - root - INFO - step: 20680 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:16:02,840 - root - INFO - lr: 2.6643e-05 gnorm: 1.03 [12:41:52<11:51:46] +[titan] 2025-10-05 11:16:13,755 - root - INFO - step: 20685 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9115 +[titan] 2025-10-05 11:16:13,756 - root - INFO - lr: 2.6634e-05 gnorm: 1.04 [12:42:03<11:51:35] +[titan] 2025-10-05 11:16:24,631 - root - INFO - step: 20690 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:16:24,632 - root - INFO - lr: 2.6625e-05 gnorm: 1.05 [12:42:14<11:51:24] +[titan] 2025-10-05 11:16:35,463 - root - INFO - step: 20695 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 11:16:35,464 - root - INFO - lr: 2.6616e-05 gnorm: 1.10 [12:42:25<11:51:12] +[titan] 2025-10-05 11:16:44,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:16:46,315 - root - INFO - step: 20700 loss: 2.1496 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:16:46,316 - root - INFO - lr: 2.6607e-05 gnorm: 1.10 [12:42:36<11:51:01] +[titan] 2025-10-05 11:16:57,157 - root - INFO - step: 20705 loss: 2.0983 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 11:16:57,157 - root - INFO - lr: 2.6598e-05 gnorm: 1.04 [12:42:47<11:50:50] +[titan] 2025-10-05 11:17:08,007 - root - INFO - step: 20710 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 11:17:08,007 - root - INFO - lr: 2.6589e-05 gnorm: 1.07 [12:42:57<11:50:39] +[titan] 2025-10-05 11:17:18,892 - root - INFO - step: 20715 loss: 2.1366 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8867 +[titan] 2025-10-05 11:17:18,892 - root - INFO - lr: 2.6580e-05 gnorm: 1.14 [12:43:08<11:50:27] +[titan] 2025-10-05 11:17:29,767 - root - INFO - step: 20720 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:17:29,768 - root - INFO - lr: 2.6571e-05 gnorm: 1.04 [12:43:19<11:50:16] +[titan] 2025-10-05 11:17:40,628 - root - INFO - step: 20725 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9005 +[titan] 2025-10-05 11:17:40,628 - root - INFO - lr: 2.6562e-05 gnorm: 1.09 [12:43:30<11:50:05] +[titan] 2025-10-05 11:17:51,474 - root - INFO - step: 20730 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:17:51,474 - root - INFO - lr: 2.6553e-05 gnorm: 1.10 [12:43:41<11:49:54] +[titan] 2025-10-05 11:18:02,326 - root - INFO - step: 20735 loss: 2.1204 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:18:02,326 - root - INFO - lr: 2.6544e-05 gnorm: 1.06 [12:43:52<11:49:42] +[titan] 2025-10-05 11:18:13,213 - root - INFO - step: 20740 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8841 +[titan] 2025-10-05 11:18:13,213 - root - INFO - lr: 2.6536e-05 gnorm: 1.08 [12:44:03<11:49:31] +[titan] 2025-10-05 11:18:24,093 - root - INFO - step: 20745 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 11:18:24,093 - root - INFO - lr: 2.6527e-05 gnorm: 1.05 [12:44:13<11:49:20] +[titan] 2025-10-05 11:18:32,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:18:34,984 - root - INFO - step: 20750 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:18:34,984 - root - INFO - lr: 2.6518e-05 gnorm: 1.06 [12:44:24<11:49:09] +[titan] 2025-10-05 11:18:45,854 - root - INFO - step: 20755 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:18:45,854 - root - INFO - lr: 2.6509e-05 gnorm: 1.09 [12:44:35<11:48:58] +[titan] 2025-10-05 11:18:56,673 - root - INFO - step: 20760 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:18:56,674 - root - INFO - lr: 2.6500e-05 gnorm: 1.04 [12:44:46<11:48:46] +[titan] 2025-10-05 11:19:07,503 - root - INFO - step: 20765 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8648 +[titan] 2025-10-05 11:19:07,504 - root - INFO - lr: 2.6491e-05 gnorm: 1.08 [12:44:57<11:48:35] +[titan] 2025-10-05 11:19:18,411 - root - INFO - step: 20770 loss: 2.2056 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9489 +[titan] 2025-10-05 11:19:18,411 - root - INFO - lr: 2.6482e-05 gnorm: 1.12 [12:45:08<11:48:24] +[titan] 2025-10-05 11:19:29,234 - root - INFO - step: 20775 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8766 +[titan] 2025-10-05 11:19:29,234 - root - INFO - lr: 2.6473e-05 gnorm: 1.05 [12:45:19<11:48:13] +[titan] 2025-10-05 11:19:40,065 - root - INFO - step: 20780 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 11:19:40,065 - root - INFO - lr: 2.6464e-05 gnorm: 1.08 [12:45:29<11:48:01] +[titan] 2025-10-05 11:19:50,928 - root - INFO - step: 20785 loss: 2.1284 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:19:50,928 - root - INFO - lr: 2.6455e-05 gnorm: 1.03 [12:45:40<11:47:50] +[titan] 2025-10-05 11:20:01,769 - root - INFO - step: 20790 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 11:20:01,769 - root - INFO - lr: 2.6446e-05 gnorm: 1.07 [12:45:51<11:47:39] +[titan] 2025-10-05 11:20:12,646 - root - INFO - step: 20795 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 11:20:12,646 - root - INFO - lr: 2.6437e-05 gnorm: 1.10 [12:46:02<11:47:28] +[titan] 2025-10-05 11:20:21,353 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:20:23,533 - root - INFO - step: 20800 loss: 2.0768 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 11:20:23,533 - root - INFO - lr: 2.6429e-05 gnorm: 1.06 [12:46:13<11:47:16] +[titan] 2025-10-05 11:20:34,392 - root - INFO - step: 20805 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:20:34,392 - root - INFO - lr: 2.6420e-05 gnorm: 1.09 [12:46:24<11:47:05] +[titan] 2025-10-05 11:20:45,231 - root - INFO - step: 20810 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 11:20:45,231 - root - INFO - lr: 2.6411e-05 gnorm: 1.04 [12:46:35<11:46:54] +[titan] 2025-10-05 11:20:56,074 - root - INFO - step: 20815 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:20:56,074 - root - INFO - lr: 2.6402e-05 gnorm: 1.08 [12:46:45<11:46:43] +[titan] 2025-10-05 11:21:06,980 - root - INFO - step: 20820 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:21:06,980 - root - INFO - lr: 2.6393e-05 gnorm: 1.07 [12:46:56<11:46:32] +[titan] 2025-10-05 11:21:17,884 - root - INFO - step: 20825 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 11:21:17,884 - root - INFO - lr: 2.6384e-05 gnorm: 1.08 [12:47:07<11:46:20] +[titan] 2025-10-05 11:21:28,741 - root - INFO - step: 20830 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:21:28,741 - root - INFO - lr: 2.6375e-05 gnorm: 1.08 [12:47:18<11:46:09] +[titan] 2025-10-05 11:21:39,613 - root - INFO - step: 20835 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8881 +[titan] 2025-10-05 11:21:39,613 - root - INFO - lr: 2.6366e-05 gnorm: 1.07 [12:47:29<11:45:58] +[titan] 2025-10-05 11:21:50,471 - root - INFO - step: 20840 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 11:21:50,471 - root - INFO - lr: 2.6357e-05 gnorm: 1.03 [12:47:40<11:45:47] +[titan] 2025-10-05 11:22:01,325 - root - INFO - step: 20845 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:22:01,325 - root - INFO - lr: 2.6348e-05 gnorm: 1.07 [12:47:51<11:45:35] +[titan] 2025-10-05 11:22:10,042 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:22:12,228 - root - INFO - step: 20850 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:22:12,228 - root - INFO - lr: 2.6339e-05 gnorm: 1.01 [12:48:02<11:45:24] +[titan] 2025-10-05 11:22:23,145 - root - INFO - step: 20855 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 11:22:23,145 - root - INFO - lr: 2.6330e-05 gnorm: 1.08 [12:48:12<11:45:13] +[titan] 2025-10-05 11:22:33,976 - root - INFO - step: 20860 loss: 2.1509 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:22:33,976 - root - INFO - lr: 2.6322e-05 gnorm: 1.08 [12:48:23<11:45:02] +[titan] 2025-10-05 11:22:44,818 - root - INFO - step: 20865 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 11:22:44,819 - root - INFO - lr: 2.6313e-05 gnorm: 1.08 [12:48:34<11:44:51] +[titan] 2025-10-05 11:22:55,670 - root - INFO - step: 20870 loss: 2.1029 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 11:22:55,670 - root - INFO - lr: 2.6304e-05 gnorm: 1.04 [12:48:45<11:44:39] +[titan] 2025-10-05 11:23:06,495 - root - INFO - step: 20875 loss: 2.1668 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 11:23:06,495 - root - INFO - lr: 2.6295e-05 gnorm: 1.03 [12:48:56<11:44:28] +[titan] 2025-10-05 11:23:17,425 - root - INFO - step: 20880 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 11:23:17,426 - root - INFO - lr: 2.6286e-05 gnorm: 1.06 [12:49:07<11:44:17] +[titan] 2025-10-05 11:23:28,304 - root - INFO - step: 20885 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:23:28,305 - root - INFO - lr: 2.6277e-05 gnorm: 1.02 [12:49:18<11:44:06] +[titan] 2025-10-05 11:23:39,146 - root - INFO - step: 20890 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 11:23:39,147 - root - INFO - lr: 2.6268e-05 gnorm: 1.04 [12:49:28<11:43:55] +[titan] 2025-10-05 11:23:50,019 - root - INFO - step: 20895 loss: 2.1373 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:23:50,019 - root - INFO - lr: 2.6259e-05 gnorm: 1.05 [12:49:39<11:43:43] +[titan] 2025-10-05 11:23:58,682 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:24:00,862 - root - INFO - step: 20900 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 11:24:00,862 - root - INFO - lr: 2.6250e-05 gnorm: 1.08 [12:49:50<11:43:32] +[titan] 2025-10-05 11:24:11,693 - root - INFO - step: 20905 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8801 +[titan] 2025-10-05 11:24:11,693 - root - INFO - lr: 2.6241e-05 gnorm: 1.09 [12:50:01<11:43:21] +[titan] 2025-10-05 11:24:22,592 - root - INFO - step: 20910 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8505 +[titan] 2025-10-05 11:24:22,592 - root - INFO - lr: 2.6232e-05 gnorm: 1.06 [12:50:12<11:43:10] +[titan] 2025-10-05 11:24:33,463 - root - INFO - step: 20915 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 11:24:33,463 - root - INFO - lr: 2.6224e-05 gnorm: 1.05 [12:50:23<11:42:58] +[titan] 2025-10-05 11:24:44,313 - root - INFO - step: 20920 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:24:44,314 - root - INFO - lr: 2.6215e-05 gnorm: 1.05 [12:50:34<11:42:47] +[titan] 2025-10-05 11:24:55,176 - root - INFO - step: 20925 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8450 +[titan] 2025-10-05 11:24:55,176 - root - INFO - lr: 2.6206e-05 gnorm: 1.05 [12:50:45<11:42:36] +[titan] 2025-10-05 11:25:06,030 - root - INFO - step: 20930 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8992 +[titan] 2025-10-05 11:25:06,030 - root - INFO - lr: 2.6197e-05 gnorm: 1.10 [12:50:55<11:42:25] +[titan] 2025-10-05 11:25:16,898 - root - INFO - step: 20935 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8741 +[titan] 2025-10-05 11:25:16,898 - root - INFO - lr: 2.6188e-05 gnorm: 1.05 [12:51:06<11:42:14] +[titan] 2025-10-05 11:25:27,781 - root - INFO - step: 20940 loss: 2.1440 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:25:27,781 - root - INFO - lr: 2.6179e-05 gnorm: 1.04 [12:51:17<11:42:02] +[titan] 2025-10-05 11:25:38,668 - root - INFO - step: 20945 loss: 2.1635 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 11:25:38,668 - root - INFO - lr: 2.6170e-05 gnorm: 1.04 [12:51:28<11:41:51] +[titan] 2025-10-05 11:25:47,372 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:25:49,579 - root - INFO - step: 20950 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8588 +[titan] 2025-10-05 11:25:49,579 - root - INFO - lr: 2.6161e-05 gnorm: 1.02 [12:51:39<11:41:40] +[titan] 2025-10-05 11:26:00,466 - root - INFO - step: 20955 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9476 +[titan] 2025-10-05 11:26:00,466 - root - INFO - lr: 2.6152e-05 gnorm: 1.08 [12:51:50<11:41:29] +[titan] 2025-10-05 11:26:11,358 - root - INFO - step: 20960 loss: 2.1680 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 11:26:11,358 - root - INFO - lr: 2.6143e-05 gnorm: 1.07 [12:52:01<11:41:18] +[titan] 2025-10-05 11:26:22,285 - root - INFO - step: 20965 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 11:26:22,285 - root - INFO - lr: 2.6134e-05 gnorm: 1.03 [12:52:12<11:41:06] +[titan] 2025-10-05 11:26:33,153 - root - INFO - step: 20970 loss: 2.0712 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 11:26:33,153 - root - INFO - lr: 2.6126e-05 gnorm: 1.04 [12:52:22<11:40:55] +[titan] 2025-10-05 11:26:44,020 - root - INFO - step: 20975 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:26:44,021 - root - INFO - lr: 2.6117e-05 gnorm: 1.09 [12:52:33<11:40:44] +[titan] 2025-10-05 11:26:54,991 - root - INFO - step: 20980 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 11:26:54,992 - root - INFO - lr: 2.6108e-05 gnorm: 1.07 [12:52:44<11:40:33] +[titan] 2025-10-05 11:27:05,851 - root - INFO - step: 20985 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8877 +[titan] 2025-10-05 11:27:05,851 - root - INFO - lr: 2.6099e-05 gnorm: 1.11 [12:52:55<11:40:22] +[titan] 2025-10-05 11:27:16,808 - root - INFO - step: 20990 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:27:16,809 - root - INFO - lr: 2.6090e-05 gnorm: 1.08 [12:53:06<11:40:10] +[titan] 2025-10-05 11:27:21,382 - root - INFO - Dumping profiler traces at step 20992 +[titan] 2025-10-05 11:27:21,421 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:27:27,954 - root - INFO - step: 20995 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,401 tflops: 407.90 mfu: 41.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 11:27:27,954 - root - INFO - lr: 2.6081e-05 gnorm: 1.05 [12:53:17<11:40:00] +[titan] 2025-10-05 11:27:36,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:27:38,817 - root - INFO - step: 21000 loss: 2.1220 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8750 +[titan] 2025-10-05 11:27:38,817 - root - INFO - lr: 2.6072e-05 gnorm: 1.05 [12:53:28<11:39:48] +[titan] 2025-10-05 11:27:49,677 - root - INFO - step: 21005 loss: 2.1703 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:27:49,677 - root - INFO - lr: 2.6063e-05 gnorm: 1.10 [12:53:39<11:39:37] +[titan] 2025-10-05 11:28:00,541 - root - INFO - step: 21010 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 11:28:00,541 - root - INFO - lr: 2.6054e-05 gnorm: 1.05 [12:53:50<11:39:26] +[titan] 2025-10-05 11:28:11,383 - root - INFO - step: 21015 loss: 2.1081 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8636 +[titan] 2025-10-05 11:28:11,384 - root - INFO - lr: 2.6045e-05 gnorm: 1.04 [12:54:01<11:39:15] +[titan] 2025-10-05 11:28:22,286 - root - INFO - step: 21020 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:28:22,286 - root - INFO - lr: 2.6036e-05 gnorm: 1.10 [12:54:12<11:39:03] +[titan] 2025-10-05 11:28:33,136 - root - INFO - step: 21025 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 11:28:33,136 - root - INFO - lr: 2.6028e-05 gnorm: 1.07 [12:54:22<11:38:52] +[titan] 2025-10-05 11:28:43,995 - root - INFO - step: 21030 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8395 +[titan] 2025-10-05 11:28:43,995 - root - INFO - lr: 2.6019e-05 gnorm: 1.06 [12:54:33<11:38:41] +[titan] 2025-10-05 11:28:54,868 - root - INFO - step: 21035 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8811 +[titan] 2025-10-05 11:28:54,868 - root - INFO - lr: 2.6010e-05 gnorm: 1.09 [12:54:44<11:38:30] +[titan] 2025-10-05 11:29:05,770 - root - INFO - step: 21040 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 11:29:05,770 - root - INFO - lr: 2.6001e-05 gnorm: 1.06 [12:54:55<11:38:19] +[titan] 2025-10-05 11:29:16,625 - root - INFO - step: 21045 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:29:16,625 - root - INFO - lr: 2.5992e-05 gnorm: 1.04 [12:55:06<11:38:07] +[titan] 2025-10-05 11:29:25,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:29:27,546 - root - INFO - step: 21050 loss: 2.1350 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:29:27,546 - root - INFO - lr: 2.5983e-05 gnorm: 1.09 [12:55:17<11:37:56] +[titan] 2025-10-05 11:29:38,415 - root - INFO - step: 21055 loss: 2.0977 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8537 +[titan] 2025-10-05 11:29:38,415 - root - INFO - lr: 2.5974e-05 gnorm: 1.05 [12:55:28<11:37:45] +[titan] 2025-10-05 11:29:49,289 - root - INFO - step: 21060 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 11:29:49,289 - root - INFO - lr: 2.5965e-05 gnorm: 1.09 [12:55:39<11:37:34] +[titan] 2025-10-05 11:30:00,149 - root - INFO - step: 21065 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 11:30:00,149 - root - INFO - lr: 2.5956e-05 gnorm: 1.09 [12:55:49<11:37:23] +[titan] 2025-10-05 11:30:11,032 - root - INFO - step: 21070 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:30:11,032 - root - INFO - lr: 2.5947e-05 gnorm: 1.08 [12:56:00<11:37:11] +[titan] 2025-10-05 11:30:21,932 - root - INFO - step: 21075 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 11:30:21,932 - root - INFO - lr: 2.5939e-05 gnorm: 1.07 [12:56:11<11:37:00] +[titan] 2025-10-05 11:30:32,855 - root - INFO - step: 21080 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8856 +[titan] 2025-10-05 11:30:32,855 - root - INFO - lr: 2.5930e-05 gnorm: 1.07 [12:56:22<11:36:49] +[titan] 2025-10-05 11:30:43,698 - root - INFO - step: 21085 loss: 2.1181 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:30:43,699 - root - INFO - lr: 2.5921e-05 gnorm: 1.11 [12:56:33<11:36:38] +[titan] 2025-10-05 11:30:54,563 - root - INFO - step: 21090 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 11:30:54,563 - root - INFO - lr: 2.5912e-05 gnorm: 1.03 [12:56:44<11:36:27] +[titan] 2025-10-05 11:31:05,426 - root - INFO - step: 21095 loss: 2.2239 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9636 +[titan] 2025-10-05 11:31:05,427 - root - INFO - lr: 2.5903e-05 gnorm: 1.06 [12:56:55<11:36:15] +[titan] 2025-10-05 11:31:14,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:31:16,304 - root - INFO - step: 21100 loss: 2.0959 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 11:31:16,304 - root - INFO - lr: 2.5894e-05 gnorm: 1.03 [12:57:06<11:36:04] +[titan] 2025-10-05 11:31:27,255 - root - INFO - step: 21105 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 11:31:27,256 - root - INFO - lr: 2.5885e-05 gnorm: 1.07 [12:57:17<11:35:53] +[titan] 2025-10-05 11:31:38,131 - root - INFO - step: 21110 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8621 +[titan] 2025-10-05 11:31:38,132 - root - INFO - lr: 2.5876e-05 gnorm: 1.06 [12:57:27<11:35:42] +[titan] 2025-10-05 11:31:49,004 - root - INFO - step: 21115 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:31:49,004 - root - INFO - lr: 2.5867e-05 gnorm: 1.07 [12:57:38<11:35:31] +[titan] 2025-10-05 11:31:59,893 - root - INFO - step: 21120 loss: 2.0727 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8315 +[titan] 2025-10-05 11:31:59,893 - root - INFO - lr: 2.5858e-05 gnorm: 1.07 [12:57:49<11:35:19] +[titan] 2025-10-05 11:32:10,768 - root - INFO - step: 21125 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 11:32:10,769 - root - INFO - lr: 2.5850e-05 gnorm: 1.07 [12:58:00<11:35:08] +[titan] 2025-10-05 11:32:21,633 - root - INFO - step: 21130 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8324 +[titan] 2025-10-05 11:32:21,633 - root - INFO - lr: 2.5841e-05 gnorm: 1.05 [12:58:11<11:34:57] +[titan] 2025-10-05 11:32:32,656 - root - INFO - step: 21135 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 29,729 tflops: 412.44 mfu: 41.70% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 11:32:32,656 - root - INFO - lr: 2.5832e-05 gnorm: 1.08 [12:58:22<11:34:46] +[titan] 2025-10-05 11:32:43,550 - root - INFO - step: 21140 loss: 2.1392 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 11:32:43,550 - root - INFO - lr: 2.5823e-05 gnorm: 1.07 [12:58:33<11:34:35] +[titan] 2025-10-05 11:32:54,408 - root - INFO - step: 21145 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 11:32:54,408 - root - INFO - lr: 2.5814e-05 gnorm: 1.06 [12:58:44<11:34:24] +[titan] 2025-10-05 11:33:03,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:33:05,258 - root - INFO - step: 21150 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8836 +[titan] 2025-10-05 11:33:05,258 - root - INFO - lr: 2.5805e-05 gnorm: 1.09 [12:58:55<11:34:12] +[titan] 2025-10-05 11:33:16,124 - root - INFO - step: 21155 loss: 2.1477 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 11:33:16,124 - root - INFO - lr: 2.5796e-05 gnorm: 1.07 [12:59:05<11:34:01] +[titan] 2025-10-05 11:33:27,050 - root - INFO - step: 21160 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 11:33:27,050 - root - INFO - lr: 2.5787e-05 gnorm: 1.06 [12:59:16<11:33:50] +[titan] 2025-10-05 11:33:37,906 - root - INFO - step: 21165 loss: 2.1021 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 11:33:37,906 - root - INFO - lr: 2.5778e-05 gnorm: 1.06 [12:59:27<11:33:39] +[titan] 2025-10-05 11:33:48,805 - root - INFO - step: 21170 loss: 2.1153 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8695 +[titan] 2025-10-05 11:33:48,805 - root - INFO - lr: 2.5769e-05 gnorm: 1.10 [12:59:38<11:33:28] +[titan] 2025-10-05 11:33:59,670 - root - INFO - step: 21175 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 11:33:59,670 - root - INFO - lr: 2.5761e-05 gnorm: 1.05 [12:59:49<11:33:16] +[titan] 2025-10-05 11:34:10,542 - root - INFO - step: 21180 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8645 +[titan] 2025-10-05 11:34:10,542 - root - INFO - lr: 2.5752e-05 gnorm: 1.07 [13:00:00<11:33:05] +[titan] 2025-10-05 11:34:21,425 - root - INFO - step: 21185 loss: 2.0963 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8530 +[titan] 2025-10-05 11:34:21,425 - root - INFO - lr: 2.5743e-05 gnorm: 1.01 [13:00:11<11:32:54] +[titan] 2025-10-05 11:34:32,352 - root - INFO - step: 21190 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:34:32,352 - root - INFO - lr: 2.5734e-05 gnorm: 1.08 [13:00:22<11:32:43] +[titan] 2025-10-05 11:34:43,216 - root - INFO - step: 21195 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 11:34:43,216 - root - INFO - lr: 2.5725e-05 gnorm: 1.04 [13:00:33<11:32:32] +[titan] 2025-10-05 11:34:51,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:34:54,111 - root - INFO - step: 21200 loss: 2.0921 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 11:34:54,111 - root - INFO - lr: 2.5716e-05 gnorm: 1.07 [13:00:43<11:32:20] +[titan] 2025-10-05 11:35:04,964 - root - INFO - step: 21205 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 11:35:04,964 - root - INFO - lr: 2.5707e-05 gnorm: 1.09 [13:00:54<11:32:09] +[titan] 2025-10-05 11:35:15,826 - root - INFO - step: 21210 loss: 2.1528 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 11:35:15,827 - root - INFO - lr: 2.5698e-05 gnorm: 1.09 [13:01:05<11:31:58] +[titan] 2025-10-05 11:35:26,686 - root - INFO - step: 21215 loss: 2.1911 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:35:26,686 - root - INFO - lr: 2.5689e-05 gnorm: 1.11 [13:01:16<11:31:47] +[titan] 2025-10-05 11:35:37,615 - root - INFO - step: 21220 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 11:35:37,615 - root - INFO - lr: 2.5680e-05 gnorm: 1.08 [13:01:27<11:31:36] +[titan] 2025-10-05 11:35:48,489 - root - INFO - step: 21225 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 11:35:48,489 - root - INFO - lr: 2.5672e-05 gnorm: 1.11 [13:01:38<11:31:24] +[titan] 2025-10-05 11:35:59,356 - root - INFO - step: 21230 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:35:59,356 - root - INFO - lr: 2.5663e-05 gnorm: 1.08 [13:01:49<11:31:13] +[titan] 2025-10-05 11:36:10,239 - root - INFO - step: 21235 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8980 +[titan] 2025-10-05 11:36:10,239 - root - INFO - lr: 2.5654e-05 gnorm: 1.09 [13:02:00<11:31:02] +[titan] 2025-10-05 11:36:21,092 - root - INFO - step: 21240 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:36:21,092 - root - INFO - lr: 2.5645e-05 gnorm: 1.05 [13:02:10<11:30:51] +[titan] 2025-10-05 11:36:32,020 - root - INFO - step: 21245 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 11:36:32,021 - root - INFO - lr: 2.5636e-05 gnorm: 1.09 [13:02:21<11:30:40] +[titan] 2025-10-05 11:36:40,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:36:42,889 - root - INFO - step: 21250 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 11:36:42,889 - root - INFO - lr: 2.5627e-05 gnorm: 1.07 [13:02:32<11:30:28] +[titan] 2025-10-05 11:36:53,745 - root - INFO - step: 21255 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 11:36:53,746 - root - INFO - lr: 2.5618e-05 gnorm: 1.05 [13:02:43<11:30:17] +[titan] 2025-10-05 11:37:04,622 - root - INFO - step: 21260 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 11:37:04,622 - root - INFO - lr: 2.5609e-05 gnorm: 1.08 [13:02:54<11:30:06] +[titan] 2025-10-05 11:37:15,535 - root - INFO - step: 21265 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 11:37:15,536 - root - INFO - lr: 2.5600e-05 gnorm: 1.08 [13:03:05<11:29:55] +[titan] 2025-10-05 11:37:26,391 - root - INFO - step: 21270 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 11:37:26,392 - root - INFO - lr: 2.5592e-05 gnorm: 1.05 [13:03:16<11:29:44] +[titan] 2025-10-05 11:37:37,276 - root - INFO - step: 21275 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 11:37:37,276 - root - INFO - lr: 2.5583e-05 gnorm: 1.07 [13:03:27<11:29:32] +[titan] 2025-10-05 11:37:48,150 - root - INFO - step: 21280 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8442 +[titan] 2025-10-05 11:37:48,150 - root - INFO - lr: 2.5574e-05 gnorm: 1.05 [13:03:37<11:29:21] +[titan] 2025-10-05 11:37:59,010 - root - INFO - step: 21285 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 11:37:59,010 - root - INFO - lr: 2.5565e-05 gnorm: 1.07 [13:03:48<11:29:10] +[titan] 2025-10-05 11:38:09,872 - root - INFO - step: 21290 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8545 +[titan] 2025-10-05 11:38:09,872 - root - INFO - lr: 2.5556e-05 gnorm: 1.10 [13:03:59<11:28:59] +[titan] 2025-10-05 11:38:20,741 - root - INFO - step: 21295 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 11:38:20,741 - root - INFO - lr: 2.5547e-05 gnorm: 1.12 [13:04:10<11:28:48] +[titan] 2025-10-05 11:38:29,453 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:38:31,672 - root - INFO - step: 21300 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 11:38:31,672 - root - INFO - lr: 2.5538e-05 gnorm: 1.05 [13:04:21<11:28:36] +[titan] 2025-10-05 11:38:42,540 - root - INFO - step: 21305 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:38:42,540 - root - INFO - lr: 2.5529e-05 gnorm: 1.08 [13:04:32<11:28:25] +[titan] 2025-10-05 11:38:53,411 - root - INFO - step: 21310 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 11:38:53,411 - root - INFO - lr: 2.5520e-05 gnorm: 1.07 [13:04:43<11:28:14] +[titan] 2025-10-05 11:39:04,301 - root - INFO - step: 21315 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8790 +[titan] 2025-10-05 11:39:04,301 - root - INFO - lr: 2.5511e-05 gnorm: 1.10 [13:04:54<11:28:03] +[titan] 2025-10-05 11:39:15,170 - root - INFO - step: 21320 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8772 +[titan] 2025-10-05 11:39:15,170 - root - INFO - lr: 2.5503e-05 gnorm: 1.06 [13:05:04<11:27:52] +[titan] 2025-10-05 11:39:26,035 - root - INFO - step: 21325 loss: 2.1518 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:39:26,035 - root - INFO - lr: 2.5494e-05 gnorm: 1.08 [13:05:15<11:27:40] +[titan] 2025-10-05 11:39:36,994 - root - INFO - step: 21330 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8956 +[titan] 2025-10-05 11:39:36,994 - root - INFO - lr: 2.5485e-05 gnorm: 1.06 [13:05:26<11:27:29] +[titan] 2025-10-05 11:39:47,849 - root - INFO - step: 21335 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 11:39:47,849 - root - INFO - lr: 2.5476e-05 gnorm: 1.03 [13:05:37<11:27:18] +[titan] 2025-10-05 11:39:58,709 - root - INFO - step: 21340 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 11:39:58,709 - root - INFO - lr: 2.5467e-05 gnorm: 1.07 [13:05:48<11:27:07] +[titan] 2025-10-05 11:40:09,576 - root - INFO - step: 21345 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 11:40:09,576 - root - INFO - lr: 2.5458e-05 gnorm: 1.05 [13:05:59<11:26:56] +[titan] 2025-10-05 11:40:18,258 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:40:20,450 - root - INFO - step: 21350 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9393 +[titan] 2025-10-05 11:40:20,450 - root - INFO - lr: 2.5449e-05 gnorm: 1.09 [13:06:10<11:26:44] +[titan] 2025-10-05 11:40:31,323 - root - INFO - step: 21355 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 11:40:31,323 - root - INFO - lr: 2.5440e-05 gnorm: 1.11 [13:06:21<11:26:33] +[titan] 2025-10-05 11:40:42,303 - root - INFO - step: 21360 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 29,846 tflops: 414.06 mfu: 41.87% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:40:42,303 - root - INFO - lr: 2.5431e-05 gnorm: 1.09 [13:06:32<11:26:22] +[titan] 2025-10-05 11:40:53,190 - root - INFO - step: 21365 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 11:40:53,190 - root - INFO - lr: 2.5423e-05 gnorm: 1.04 [13:06:42<11:26:11] +[titan] 2025-10-05 11:41:04,057 - root - INFO - step: 21370 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8252 +[titan] 2025-10-05 11:41:04,057 - root - INFO - lr: 2.5414e-05 gnorm: 1.06 [13:06:53<11:26:00] +[titan] 2025-10-05 11:41:14,914 - root - INFO - step: 21375 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 11:41:14,914 - root - INFO - lr: 2.5405e-05 gnorm: 1.05 [13:07:04<11:25:49] +[titan] 2025-10-05 11:41:25,788 - root - INFO - step: 21380 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 11:41:25,788 - root - INFO - lr: 2.5396e-05 gnorm: 1.08 [13:07:15<11:25:37] +[titan] 2025-10-05 11:41:36,680 - root - INFO - step: 21385 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8705 +[titan] 2025-10-05 11:41:36,680 - root - INFO - lr: 2.5387e-05 gnorm: 1.06 [13:07:26<11:25:26] +[titan] 2025-10-05 11:41:47,564 - root - INFO - step: 21390 loss: 2.0660 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 11:41:47,564 - root - INFO - lr: 2.5378e-05 gnorm: 1.06 [13:07:37<11:25:15] +[titan] 2025-10-05 11:41:58,477 - root - INFO - step: 21395 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:41:58,477 - root - INFO - lr: 2.5369e-05 gnorm: 1.05 [13:07:48<11:25:04] +[titan] 2025-10-05 11:42:07,157 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:42:09,346 - root - INFO - step: 21400 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 11:42:09,346 - root - INFO - lr: 2.5360e-05 gnorm: 1.06 [13:07:59<11:24:53] +[titan] 2025-10-05 11:42:20,225 - root - INFO - step: 21405 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:42:20,226 - root - INFO - lr: 2.5352e-05 gnorm: 1.09 [13:08:10<11:24:41] +[titan] 2025-10-05 11:42:31,111 - root - INFO - step: 21410 loss: 2.1240 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:42:31,112 - root - INFO - lr: 2.5343e-05 gnorm: 1.12 [13:08:20<11:24:30] +[titan] 2025-10-05 11:42:42,010 - root - INFO - step: 21415 loss: 2.0961 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8538 +[titan] 2025-10-05 11:42:42,011 - root - INFO - lr: 2.5334e-05 gnorm: 1.06 [13:08:31<11:24:19] +[titan] 2025-10-05 11:42:52,881 - root - INFO - step: 21420 loss: 2.1163 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 11:42:52,881 - root - INFO - lr: 2.5325e-05 gnorm: 1.06 [13:08:42<11:24:08] +[titan] 2025-10-05 11:43:03,753 - root - INFO - step: 21425 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8953 +[titan] 2025-10-05 11:43:03,753 - root - INFO - lr: 2.5316e-05 gnorm: 1.05 [13:08:53<11:23:57] +[titan] 2025-10-05 11:43:14,617 - root - INFO - step: 21430 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8631 +[titan] 2025-10-05 11:43:14,617 - root - INFO - lr: 2.5307e-05 gnorm: 1.07 [13:09:04<11:23:45] +[titan] 2025-10-05 11:43:25,474 - root - INFO - step: 21435 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 11:43:25,474 - root - INFO - lr: 2.5298e-05 gnorm: 1.04 [13:09:15<11:23:34] +[titan] 2025-10-05 11:43:36,449 - root - INFO - step: 21440 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.24 mfu: 41.89% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 11:43:36,449 - root - INFO - lr: 2.5289e-05 gnorm: 1.05 [13:09:26<11:23:23] +[titan] 2025-10-05 11:43:47,314 - root - INFO - step: 21445 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:43:47,314 - root - INFO - lr: 2.5280e-05 gnorm: 1.04 [13:09:37<11:23:12] +[titan] 2025-10-05 11:43:56,017 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:43:58,209 - root - INFO - step: 21450 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 11:43:58,209 - root - INFO - lr: 2.5272e-05 gnorm: 1.06 [13:09:48<11:23:01] +[titan] 2025-10-05 11:44:09,061 - root - INFO - step: 21455 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 11:44:09,062 - root - INFO - lr: 2.5263e-05 gnorm: 1.08 [13:09:58<11:22:50] +[titan] 2025-10-05 11:44:19,965 - root - INFO - step: 21460 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9120 +[titan] 2025-10-05 11:44:19,966 - root - INFO - lr: 2.5254e-05 gnorm: 1.07 [13:10:09<11:22:38] +[titan] 2025-10-05 11:44:30,808 - root - INFO - step: 21465 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:44:30,808 - root - INFO - lr: 2.5245e-05 gnorm: 1.04 [13:10:20<11:22:27] +[titan] 2025-10-05 11:44:41,706 - root - INFO - step: 21470 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:44:41,706 - root - INFO - lr: 2.5236e-05 gnorm: 1.08 [13:10:31<11:22:16] +[titan] 2025-10-05 11:44:52,552 - root - INFO - step: 21475 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8376 +[titan] 2025-10-05 11:44:52,552 - root - INFO - lr: 2.5227e-05 gnorm: 1.04 [13:10:42<11:22:05] +[titan] 2025-10-05 11:45:03,391 - root - INFO - step: 21480 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:45:03,391 - root - INFO - lr: 2.5218e-05 gnorm: 1.07 [13:10:53<11:21:54] +[titan] 2025-10-05 11:45:14,218 - root - INFO - step: 21485 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 11:45:14,218 - root - INFO - lr: 2.5209e-05 gnorm: 1.09 [13:11:04<11:21:42] +[titan] 2025-10-05 11:45:25,127 - root - INFO - step: 21490 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 11:45:25,127 - root - INFO - lr: 2.5201e-05 gnorm: 1.06 [13:11:14<11:21:31] +[titan] 2025-10-05 11:45:35,950 - root - INFO - step: 21495 loss: 2.1076 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 11:45:35,951 - root - INFO - lr: 2.5192e-05 gnorm: 1.05 [13:11:25<11:21:20] +[titan] 2025-10-05 11:45:44,647 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:45:46,815 - root - INFO - step: 21500 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9073 +[titan] 2025-10-05 11:45:46,815 - root - INFO - lr: 2.5183e-05 gnorm: 1.08 [13:11:36<11:21:09] +[titan] 2025-10-05 11:45:55,753 - root - INFO - Dumping profiler traces at step 21504 +[titan] 2025-10-05 11:45:55,789 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:45:57,977 - root - INFO - step: 21505 loss: 2.1378 memory: 118.84GiB(85.28%) tps: 29,357 tflops: 407.29 mfu: 41.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8896 +[titan] 2025-10-05 11:45:57,978 - root - INFO - lr: 2.5174e-05 gnorm: 1.10 [13:11:47<11:20:58] +[titan] 2025-10-05 11:46:08,810 - root - INFO - step: 21510 loss: 2.1100 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:46:08,810 - root - INFO - lr: 2.5165e-05 gnorm: 1.08 [13:11:58<11:20:46] +[titan] 2025-10-05 11:46:19,644 - root - INFO - step: 21515 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:46:19,645 - root - INFO - lr: 2.5156e-05 gnorm: 1.05 [13:12:09<11:20:35] +[titan] 2025-10-05 11:46:30,518 - root - INFO - step: 21520 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 11:46:30,518 - root - INFO - lr: 2.5147e-05 gnorm: 1.08 [13:12:20<11:20:24] +[titan] 2025-10-05 11:46:41,409 - root - INFO - step: 21525 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 11:46:41,409 - root - INFO - lr: 2.5138e-05 gnorm: 1.08 [13:12:31<11:20:13] +[titan] 2025-10-05 11:46:52,228 - root - INFO - step: 21530 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 11:46:52,228 - root - INFO - lr: 2.5130e-05 gnorm: 1.06 [13:12:42<11:20:02] +[titan] 2025-10-05 11:47:03,059 - root - INFO - step: 21535 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 11:47:03,059 - root - INFO - lr: 2.5121e-05 gnorm: 1.03 [13:12:52<11:19:50] +[titan] 2025-10-05 11:47:13,907 - root - INFO - step: 21540 loss: 2.1549 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 11:47:13,907 - root - INFO - lr: 2.5112e-05 gnorm: 1.09 [13:13:03<11:19:39] +[titan] 2025-10-05 11:47:24,716 - root - INFO - step: 21545 loss: 2.1223 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 11:47:24,716 - root - INFO - lr: 2.5103e-05 gnorm: 1.07 [13:13:14<11:19:28] +[titan] 2025-10-05 11:47:33,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:47:35,549 - root - INFO - step: 21550 loss: 2.1493 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8994 +[titan] 2025-10-05 11:47:35,549 - root - INFO - lr: 2.5094e-05 gnorm: 1.05 [13:13:25<11:19:17] +[titan] 2025-10-05 11:47:46,489 - root - INFO - step: 21555 loss: 2.0469 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 11:47:46,490 - root - INFO - lr: 2.5085e-05 gnorm: 1.04 [13:13:36<11:19:06] +[titan] 2025-10-05 11:47:57,291 - root - INFO - step: 21560 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:47:57,291 - root - INFO - lr: 2.5076e-05 gnorm: 1.08 [13:13:47<11:18:54] +[titan] 2025-10-05 11:48:08,089 - root - INFO - step: 21565 loss: 2.0826 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 11:48:08,090 - root - INFO - lr: 2.5067e-05 gnorm: 1.06 [13:13:57<11:18:43] +[titan] 2025-10-05 11:48:18,889 - root - INFO - step: 21570 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:48:18,889 - root - INFO - lr: 2.5059e-05 gnorm: 1.09 [13:14:08<11:18:32] +[titan] 2025-10-05 11:48:29,708 - root - INFO - step: 21575 loss: 2.1425 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:48:29,709 - root - INFO - lr: 2.5050e-05 gnorm: 1.06 [13:14:19<11:18:21] +[titan] 2025-10-05 11:48:40,539 - root - INFO - step: 21580 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.76 mfu: 42.44% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 11:48:40,539 - root - INFO - lr: 2.5041e-05 gnorm: 1.11 [13:14:30<11:18:09] +[titan] 2025-10-05 11:48:51,410 - root - INFO - step: 21585 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:48:51,410 - root - INFO - lr: 2.5032e-05 gnorm: 1.06 [13:14:41<11:17:58] +[titan] 2025-10-05 11:49:02,256 - root - INFO - step: 21590 loss: 2.1780 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9253 +[titan] 2025-10-05 11:49:02,256 - root - INFO - lr: 2.5023e-05 gnorm: 1.12 [13:14:52<11:17:47] +[titan] 2025-10-05 11:49:13,089 - root - INFO - step: 21595 loss: 2.1172 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 11:49:13,089 - root - INFO - lr: 2.5014e-05 gnorm: 1.10 [13:15:02<11:17:36] +[titan] 2025-10-05 11:49:21,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:49:23,936 - root - INFO - step: 21600 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 11:49:23,936 - root - INFO - lr: 2.5005e-05 gnorm: 1.09 [13:15:13<11:17:25] +[titan] 2025-10-05 11:49:34,750 - root - INFO - step: 21605 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8838 +[titan] 2025-10-05 11:49:34,751 - root - INFO - lr: 2.4996e-05 gnorm: 1.08 [13:15:24<11:17:13] +[titan] 2025-10-05 11:49:45,562 - root - INFO - step: 21610 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8037 +[titan] 2025-10-05 11:49:45,563 - root - INFO - lr: 2.4988e-05 gnorm: 1.02 [13:15:35<11:17:02] +[titan] 2025-10-05 11:49:56,369 - root - INFO - step: 21615 loss: 2.1371 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8883 +[titan] 2025-10-05 11:49:56,370 - root - INFO - lr: 2.4979e-05 gnorm: 1.04 [13:15:46<11:16:51] +[titan] 2025-10-05 11:50:07,237 - root - INFO - step: 21620 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:50:07,237 - root - INFO - lr: 2.4970e-05 gnorm: 1.05 [13:15:57<11:16:40] +[titan] 2025-10-05 11:50:18,053 - root - INFO - step: 21625 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8344 +[titan] 2025-10-05 11:50:18,053 - root - INFO - lr: 2.4961e-05 gnorm: 1.06 [13:16:07<11:16:28] +[titan] 2025-10-05 11:50:28,850 - root - INFO - step: 21630 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.05 mfu: 42.57% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:50:28,851 - root - INFO - lr: 2.4952e-05 gnorm: 1.04 [13:16:18<11:16:17] +[titan] 2025-10-05 11:50:39,656 - root - INFO - step: 21635 loss: 2.0898 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:50:39,656 - root - INFO - lr: 2.4943e-05 gnorm: 1.09 [13:16:29<11:16:06] +[titan] 2025-10-05 11:50:50,529 - root - INFO - step: 21640 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 11:50:50,529 - root - INFO - lr: 2.4934e-05 gnorm: 1.06 [13:16:40<11:15:55] +[titan] 2025-10-05 11:51:01,328 - root - INFO - step: 21645 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:51:01,329 - root - INFO - lr: 2.4926e-05 gnorm: 1.04 [13:16:51<11:15:43] +[titan] 2025-10-05 11:51:09,997 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:51:12,168 - root - INFO - step: 21650 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 11:51:12,168 - root - INFO - lr: 2.4917e-05 gnorm: 1.07 [13:17:01<11:15:32] +[titan] 2025-10-05 11:51:23,012 - root - INFO - step: 21655 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 11:51:23,012 - root - INFO - lr: 2.4908e-05 gnorm: 1.06 [13:17:12<11:15:21] +[titan] 2025-10-05 11:51:33,829 - root - INFO - step: 21660 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 11:51:33,830 - root - INFO - lr: 2.4899e-05 gnorm: 1.06 [13:17:23<11:15:10] +[titan] 2025-10-05 11:51:44,687 - root - INFO - step: 21665 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 11:51:44,687 - root - INFO - lr: 2.4890e-05 gnorm: 1.03 [13:17:34<11:14:59] +[titan] 2025-10-05 11:51:55,529 - root - INFO - step: 21670 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 11:51:55,529 - root - INFO - lr: 2.4881e-05 gnorm: 1.04 [13:17:45<11:14:47] +[titan] 2025-10-05 11:52:06,368 - root - INFO - step: 21675 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 11:52:06,368 - root - INFO - lr: 2.4872e-05 gnorm: 1.05 [13:17:56<11:14:36] +[titan] 2025-10-05 11:52:17,248 - root - INFO - step: 21680 loss: 2.0964 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8526 +[titan] 2025-10-05 11:52:17,248 - root - INFO - lr: 2.4863e-05 gnorm: 1.08 [13:18:07<11:14:25] +[titan] 2025-10-05 11:52:28,077 - root - INFO - step: 21685 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 11:52:28,077 - root - INFO - lr: 2.4855e-05 gnorm: 1.04 [13:18:17<11:14:14] +[titan] 2025-10-05 11:52:38,897 - root - INFO - step: 21690 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 11:52:38,897 - root - INFO - lr: 2.4846e-05 gnorm: 1.12 [13:18:28<11:14:02] +[titan] 2025-10-05 11:52:49,731 - root - INFO - step: 21695 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:52:49,731 - root - INFO - lr: 2.4837e-05 gnorm: 1.14 [13:18:39<11:13:51] +[titan] 2025-10-05 11:52:58,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:53:00,557 - root - INFO - step: 21700 loss: 2.0942 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 11:53:00,558 - root - INFO - lr: 2.4828e-05 gnorm: 1.04 [13:18:50<11:13:40] +[titan] 2025-10-05 11:53:11,384 - root - INFO - step: 21705 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8616 +[titan] 2025-10-05 11:53:11,384 - root - INFO - lr: 2.4819e-05 gnorm: 1.01 [13:19:01<11:13:29] +[titan] 2025-10-05 11:53:22,180 - root - INFO - step: 21710 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,354 tflops: 421.11 mfu: 42.58% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 11:53:22,180 - root - INFO - lr: 2.4810e-05 gnorm: 1.08 [13:19:11<11:13:18] +[titan] 2025-10-05 11:53:33,006 - root - INFO - step: 21715 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 11:53:33,006 - root - INFO - lr: 2.4801e-05 gnorm: 1.07 [13:19:22<11:13:06] +[titan] 2025-10-05 11:53:43,863 - root - INFO - step: 21720 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8820 +[titan] 2025-10-05 11:53:43,863 - root - INFO - lr: 2.4793e-05 gnorm: 1.07 [13:19:33<11:12:55] +[titan] 2025-10-05 11:53:54,726 - root - INFO - step: 21725 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 11:53:54,726 - root - INFO - lr: 2.4784e-05 gnorm: 1.07 [13:19:44<11:12:44] +[titan] 2025-10-05 11:54:05,529 - root - INFO - step: 21730 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 11:54:05,529 - root - INFO - lr: 2.4775e-05 gnorm: 1.10 [13:19:55<11:12:33] +[titan] 2025-10-05 11:54:16,329 - root - INFO - step: 21735 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,341 tflops: 420.94 mfu: 42.56% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8624 +[titan] 2025-10-05 11:54:16,329 - root - INFO - lr: 2.4766e-05 gnorm: 1.08 [13:20:06<11:12:21] +[titan] 2025-10-05 11:54:27,148 - root - INFO - step: 21740 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.19 mfu: 42.49% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9108 +[titan] 2025-10-05 11:54:27,149 - root - INFO - lr: 2.4757e-05 gnorm: 1.08 [13:20:16<11:12:10] +[titan] 2025-10-05 11:54:38,009 - root - INFO - step: 21745 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 11:54:38,009 - root - INFO - lr: 2.4748e-05 gnorm: 1.09 [13:20:27<11:11:59] +[titan] 2025-10-05 11:54:46,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:54:48,886 - root - INFO - step: 21750 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8871 +[titan] 2025-10-05 11:54:48,886 - root - INFO - lr: 2.4739e-05 gnorm: 1.11 [13:20:38<11:11:48] +[titan] 2025-10-05 11:54:59,687 - root - INFO - step: 21755 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 11:54:59,688 - root - INFO - lr: 2.4731e-05 gnorm: 1.03 [13:20:49<11:11:37] +[titan] 2025-10-05 11:55:10,503 - root - INFO - step: 21760 loss: 2.0855 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:55:10,504 - root - INFO - lr: 2.4722e-05 gnorm: 1.08 [13:21:00<11:11:25] +[titan] 2025-10-05 11:55:21,303 - root - INFO - step: 21765 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 11:55:21,303 - root - INFO - lr: 2.4713e-05 gnorm: 1.06 [13:21:11<11:11:14] +[titan] 2025-10-05 11:55:32,128 - root - INFO - step: 21770 loss: 2.0394 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 11:55:32,129 - root - INFO - lr: 2.4704e-05 gnorm: 1.07 [13:21:21<11:11:03] +[titan] 2025-10-05 11:55:42,948 - root - INFO - step: 21775 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 11:55:42,948 - root - INFO - lr: 2.4695e-05 gnorm: 1.13 [13:21:32<11:10:52] +[titan] 2025-10-05 11:55:53,849 - root - INFO - step: 21780 loss: 2.1107 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 11:55:53,849 - root - INFO - lr: 2.4686e-05 gnorm: 1.06 [13:21:43<11:10:40] +[titan] 2025-10-05 11:56:04,670 - root - INFO - step: 21785 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 11:56:04,670 - root - INFO - lr: 2.4677e-05 gnorm: 1.11 [13:21:54<11:10:29] +[titan] 2025-10-05 11:56:15,465 - root - INFO - step: 21790 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,355 tflops: 421.13 mfu: 42.58% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 11:56:15,465 - root - INFO - lr: 2.4669e-05 gnorm: 1.08 [13:22:05<11:10:18] +[titan] 2025-10-05 11:56:26,269 - root - INFO - step: 21795 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 11:56:26,269 - root - INFO - lr: 2.4660e-05 gnorm: 1.04 [13:22:16<11:10:07] +[titan] 2025-10-05 11:56:34,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:56:37,050 - root - INFO - step: 21800 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 11:56:37,050 - root - INFO - lr: 2.4651e-05 gnorm: 1.03 [13:22:26<11:09:55] +[titan] 2025-10-05 11:56:47,848 - root - INFO - step: 21805 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8423 +[titan] 2025-10-05 11:56:47,849 - root - INFO - lr: 2.4642e-05 gnorm: 1.06 [13:22:37<11:09:44] +[titan] 2025-10-05 11:56:58,686 - root - INFO - step: 21810 loss: 2.0632 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 11:56:58,686 - root - INFO - lr: 2.4633e-05 gnorm: 1.08 [13:22:48<11:09:33] +[titan] 2025-10-05 11:57:09,468 - root - INFO - step: 21815 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,391 tflops: 421.63 mfu: 42.63% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 11:57:09,469 - root - INFO - lr: 2.4624e-05 gnorm: 1.04 [13:22:59<11:09:22] +[titan] 2025-10-05 11:57:20,268 - root - INFO - step: 21820 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.56% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 11:57:20,268 - root - INFO - lr: 2.4615e-05 gnorm: 1.06 [13:23:10<11:09:10] +[titan] 2025-10-05 11:57:31,069 - root - INFO - step: 21825 loss: 2.0588 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 11:57:31,069 - root - INFO - lr: 2.4607e-05 gnorm: 1.03 [13:23:20<11:08:59] +[titan] 2025-10-05 11:57:41,865 - root - INFO - step: 21830 loss: 2.1085 memory: 118.84GiB(85.28%) tps: 30,353 tflops: 421.10 mfu: 42.58% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 11:57:41,865 - root - INFO - lr: 2.4598e-05 gnorm: 1.03 [13:23:31<11:08:48] +[titan] 2025-10-05 11:57:52,686 - root - INFO - step: 21835 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:57:52,687 - root - INFO - lr: 2.4589e-05 gnorm: 1.03 [13:23:42<11:08:37] +[titan] 2025-10-05 11:58:03,531 - root - INFO - step: 21840 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:58:03,532 - root - INFO - lr: 2.4580e-05 gnorm: 1.05 [13:23:53<11:08:26] +[titan] 2025-10-05 11:58:14,308 - root - INFO - step: 21845 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,407 tflops: 421.85 mfu: 42.65% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9173 +[titan] 2025-10-05 11:58:14,309 - root - INFO - lr: 2.4571e-05 gnorm: 1.09 [13:24:04<11:08:14] +[titan] 2025-10-05 11:58:22,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:58:25,109 - root - INFO - step: 21850 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:58:25,110 - root - INFO - lr: 2.4562e-05 gnorm: 1.08 [13:24:14<11:08:03] +[titan] 2025-10-05 11:58:35,880 - root - INFO - step: 21855 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,424 tflops: 422.09 mfu: 42.68% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 11:58:35,880 - root - INFO - lr: 2.4554e-05 gnorm: 1.08 [13:24:25<11:07:52] +[titan] 2025-10-05 11:58:46,714 - root - INFO - step: 21860 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:58:46,714 - root - INFO - lr: 2.4545e-05 gnorm: 1.03 [13:24:36<11:07:41] +[titan] 2025-10-05 11:58:57,569 - root - INFO - step: 21865 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8989 +[titan] 2025-10-05 11:58:57,569 - root - INFO - lr: 2.4536e-05 gnorm: 1.07 [13:24:47<11:07:29] +[titan] 2025-10-05 11:59:08,390 - root - INFO - step: 21870 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 11:59:08,390 - root - INFO - lr: 2.4527e-05 gnorm: 1.04 [13:24:58<11:07:18] +[titan] 2025-10-05 11:59:19,246 - root - INFO - step: 21875 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8438 +[titan] 2025-10-05 11:59:19,246 - root - INFO - lr: 2.4518e-05 gnorm: 1.06 [13:25:09<11:07:07] +[titan] 2025-10-05 11:59:30,047 - root - INFO - step: 21880 loss: 2.0852 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:59:30,047 - root - INFO - lr: 2.4509e-05 gnorm: 1.08 [13:25:19<11:06:56] +[titan] 2025-10-05 11:59:40,863 - root - INFO - step: 21885 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 11:59:40,863 - root - INFO - lr: 2.4500e-05 gnorm: 1.05 [13:25:30<11:06:44] +[titan] 2025-10-05 11:59:51,744 - root - INFO - step: 21890 loss: 2.1740 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9204 +[titan] 2025-10-05 11:59:51,744 - root - INFO - lr: 2.4492e-05 gnorm: 1.10 [13:25:41<11:06:33] +[titan] 2025-10-05 12:00:02,569 - root - INFO - step: 21895 loss: 2.2128 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9549 +[titan] 2025-10-05 12:00:02,570 - root - INFO - lr: 2.4483e-05 gnorm: 1.10 [13:25:52<11:06:22] +[titan] 2025-10-05 12:00:11,224 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:00:13,387 - root - INFO - step: 21900 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 12:00:13,387 - root - INFO - lr: 2.4474e-05 gnorm: 1.04 [13:26:03<11:06:11] +[titan] 2025-10-05 12:00:24,246 - root - INFO - step: 21905 loss: 2.1321 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 12:00:24,246 - root - INFO - lr: 2.4465e-05 gnorm: 1.05 [13:26:14<11:06:00] +[titan] 2025-10-05 12:00:35,064 - root - INFO - step: 21910 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 12:00:35,064 - root - INFO - lr: 2.4456e-05 gnorm: 1.06 [13:26:24<11:05:48] +[titan] 2025-10-05 12:00:45,889 - root - INFO - step: 21915 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:00:45,889 - root - INFO - lr: 2.4447e-05 gnorm: 1.07 [13:26:35<11:05:37] +[titan] 2025-10-05 12:00:56,747 - root - INFO - step: 21920 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 12:00:56,748 - root - INFO - lr: 2.4439e-05 gnorm: 1.11 [13:26:46<11:05:26] +[titan] 2025-10-05 12:01:07,566 - root - INFO - step: 21925 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:01:07,567 - root - INFO - lr: 2.4430e-05 gnorm: 1.06 [13:26:57<11:05:15] +[titan] 2025-10-05 12:01:18,394 - root - INFO - step: 21930 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 12:01:18,394 - root - INFO - lr: 2.4421e-05 gnorm: 1.08 [13:27:08<11:05:04] +[titan] 2025-10-05 12:01:29,213 - root - INFO - step: 21935 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 12:01:29,213 - root - INFO - lr: 2.4412e-05 gnorm: 1.05 [13:27:18<11:04:52] +[titan] 2025-10-05 12:01:40,068 - root - INFO - step: 21940 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 12:01:40,068 - root - INFO - lr: 2.4403e-05 gnorm: 1.06 [13:27:29<11:04:41] +[titan] 2025-10-05 12:01:50,925 - root - INFO - step: 21945 loss: 2.1040 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8614 +[titan] 2025-10-05 12:01:50,925 - root - INFO - lr: 2.4394e-05 gnorm: 1.09 [13:27:40<11:04:30] +[titan] 2025-10-05 12:01:59,596 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:02:01,768 - root - INFO - step: 21950 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:02:01,768 - root - INFO - lr: 2.4385e-05 gnorm: 1.08 [13:27:51<11:04:19] +[titan] 2025-10-05 12:02:12,595 - root - INFO - step: 21955 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8121 +[titan] 2025-10-05 12:02:12,595 - root - INFO - lr: 2.4377e-05 gnorm: 1.04 [13:28:02<11:04:08] +[titan] 2025-10-05 12:02:23,415 - root - INFO - step: 21960 loss: 2.0883 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8460 +[titan] 2025-10-05 12:02:23,415 - root - INFO - lr: 2.4368e-05 gnorm: 1.02 [13:28:13<11:03:56] +[titan] 2025-10-05 12:02:34,233 - root - INFO - step: 21965 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:02:34,234 - root - INFO - lr: 2.4359e-05 gnorm: 1.07 [13:28:23<11:03:45] +[titan] 2025-10-05 12:02:45,129 - root - INFO - step: 21970 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8679 +[titan] 2025-10-05 12:02:45,129 - root - INFO - lr: 2.4350e-05 gnorm: 1.06 [13:28:34<11:03:34] +[titan] 2025-10-05 12:02:56,069 - root - INFO - step: 21975 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8574 +[titan] 2025-10-05 12:02:56,069 - root - INFO - lr: 2.4341e-05 gnorm: 1.04 [13:28:45<11:03:23] +[titan] 2025-10-05 12:03:06,899 - root - INFO - step: 21980 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9135 +[titan] 2025-10-05 12:03:06,899 - root - INFO - lr: 2.4332e-05 gnorm: 1.08 [13:28:56<11:03:12] +[titan] 2025-10-05 12:03:17,738 - root - INFO - step: 21985 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:03:17,738 - root - INFO - lr: 2.4324e-05 gnorm: 1.09 [13:29:07<11:03:00] +[titan] 2025-10-05 12:03:28,567 - root - INFO - step: 21990 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 12:03:28,567 - root - INFO - lr: 2.4315e-05 gnorm: 1.02 [13:29:18<11:02:49] +[titan] 2025-10-05 12:03:39,369 - root - INFO - step: 21995 loss: 2.1137 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 12:03:39,369 - root - INFO - lr: 2.4306e-05 gnorm: 1.06 [13:29:29<11:02:38] +[titan] 2025-10-05 12:03:48,026 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:03:50,242 - root - INFO - step: 22000 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:03:50,242 - root - INFO - lr: 2.4297e-05 gnorm: 1.08 [13:29:39<11:02:27] +[titan] 2025-10-05 12:04:01,127 - root - INFO - step: 22005 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:04:01,127 - root - INFO - lr: 2.4288e-05 gnorm: 1.01 [13:29:50<11:02:16] +[titan] 2025-10-05 12:04:11,950 - root - INFO - step: 22010 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8594 +[titan] 2025-10-05 12:04:11,950 - root - INFO - lr: 2.4279e-05 gnorm: 1.06 [13:30:01<11:02:04] +[titan] 2025-10-05 12:04:22,889 - root - INFO - step: 22015 loss: 2.0810 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:04:22,889 - root - INFO - lr: 2.4271e-05 gnorm: 1.07 [13:30:12<11:01:53] +[titan] 2025-10-05 12:04:25,246 - root - INFO - Dumping profiler traces at step 22016 +[titan] 2025-10-05 12:04:25,285 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:04:33,972 - root - INFO - step: 22020 loss: 2.1387 memory: 118.84GiB(85.28%) tps: 29,566 tflops: 410.19 mfu: 41.47% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8891 +[titan] 2025-10-05 12:04:33,972 - root - INFO - lr: 2.4262e-05 gnorm: 1.03 [13:30:23<11:01:42] +[titan] 2025-10-05 12:04:44,810 - root - INFO - step: 22025 loss: 2.1465 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 12:04:44,810 - root - INFO - lr: 2.4253e-05 gnorm: 1.08 [13:30:34<11:01:31] +[titan] 2025-10-05 12:04:55,694 - root - INFO - step: 22030 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 12:04:55,694 - root - INFO - lr: 2.4244e-05 gnorm: 1.05 [13:30:45<11:01:20] +[titan] 2025-10-05 12:05:06,571 - root - INFO - step: 22035 loss: 2.0627 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8224 +[titan] 2025-10-05 12:05:06,571 - root - INFO - lr: 2.4235e-05 gnorm: 1.04 [13:30:56<11:01:09] +[titan] 2025-10-05 12:05:17,439 - root - INFO - step: 22040 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 12:05:17,439 - root - INFO - lr: 2.4226e-05 gnorm: 1.05 [13:31:07<11:00:58] +[titan] 2025-10-05 12:05:28,290 - root - INFO - step: 22045 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 12:05:28,290 - root - INFO - lr: 2.4218e-05 gnorm: 1.06 [13:31:18<11:00:46] +[titan] 2025-10-05 12:05:36,955 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:05:39,138 - root - INFO - step: 22050 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:05:39,138 - root - INFO - lr: 2.4209e-05 gnorm: 1.06 [13:31:28<11:00:35] +[titan] 2025-10-05 12:05:49,987 - root - INFO - step: 22055 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 12:05:49,987 - root - INFO - lr: 2.4200e-05 gnorm: 1.05 [13:31:39<11:00:24] +[titan] 2025-10-05 12:06:00,891 - root - INFO - step: 22060 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 12:06:00,891 - root - INFO - lr: 2.4191e-05 gnorm: 1.10 [13:31:50<11:00:13] +[titan] 2025-10-05 12:06:11,774 - root - INFO - step: 22065 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:06:11,775 - root - INFO - lr: 2.4182e-05 gnorm: 1.05 [13:32:01<11:00:02] +[titan] 2025-10-05 12:06:22,629 - root - INFO - step: 22070 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:06:22,629 - root - INFO - lr: 2.4173e-05 gnorm: 1.06 [13:32:12<10:59:50] +[titan] 2025-10-05 12:06:33,471 - root - INFO - step: 22075 loss: 2.0401 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8033 +[titan] 2025-10-05 12:06:33,471 - root - INFO - lr: 2.4165e-05 gnorm: 1.07 [13:32:23<10:59:39] +[titan] 2025-10-05 12:06:44,307 - root - INFO - step: 22080 loss: 2.1317 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8847 +[titan] 2025-10-05 12:06:44,307 - root - INFO - lr: 2.4156e-05 gnorm: 1.08 [13:32:34<10:59:28] +[titan] 2025-10-05 12:06:55,150 - root - INFO - step: 22085 loss: 2.0997 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8559 +[titan] 2025-10-05 12:06:55,151 - root - INFO - lr: 2.4147e-05 gnorm: 1.06 [13:32:44<10:59:17] +[titan] 2025-10-05 12:07:06,030 - root - INFO - step: 22090 loss: 2.1094 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 12:07:06,030 - root - INFO - lr: 2.4138e-05 gnorm: 1.09 [13:32:55<10:59:06] +[titan] 2025-10-05 12:07:16,895 - root - INFO - step: 22095 loss: 2.1217 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:07:16,895 - root - INFO - lr: 2.4129e-05 gnorm: 1.14 [13:33:06<10:58:54] +[titan] 2025-10-05 12:07:25,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:07:27,824 - root - INFO - step: 22100 loss: 2.1006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 12:07:27,824 - root - INFO - lr: 2.4121e-05 gnorm: 1.06 [13:33:17<10:58:43] +[titan] 2025-10-05 12:07:38,689 - root - INFO - step: 22105 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 12:07:38,689 - root - INFO - lr: 2.4112e-05 gnorm: 1.07 [13:33:28<10:58:32] +[titan] 2025-10-05 12:07:49,564 - root - INFO - step: 22110 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 12:07:49,564 - root - INFO - lr: 2.4103e-05 gnorm: 1.12 [13:33:39<10:58:21] +[titan] 2025-10-05 12:08:00,491 - root - INFO - step: 22115 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8523 +[titan] 2025-10-05 12:08:00,491 - root - INFO - lr: 2.4094e-05 gnorm: 1.06 [13:33:50<10:58:10] +[titan] 2025-10-05 12:08:11,388 - root - INFO - step: 22120 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 12:08:11,388 - root - INFO - lr: 2.4085e-05 gnorm: 1.07 [13:34:01<10:57:59] +[titan] 2025-10-05 12:08:22,246 - root - INFO - step: 22125 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 12:08:22,246 - root - INFO - lr: 2.4076e-05 gnorm: 1.08 [13:34:11<10:57:47] +[titan] 2025-10-05 12:08:33,148 - root - INFO - step: 22130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:08:33,148 - root - INFO - lr: 2.4068e-05 gnorm: 1.08 [13:34:22<10:57:36] +[titan] 2025-10-05 12:08:44,031 - root - INFO - step: 22135 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:08:44,031 - root - INFO - lr: 2.4059e-05 gnorm: 1.02 [13:34:33<10:57:25] +[titan] 2025-10-05 12:08:54,887 - root - INFO - step: 22140 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:08:54,888 - root - INFO - lr: 2.4050e-05 gnorm: 1.06 [13:34:44<10:57:14] +[titan] 2025-10-05 12:09:05,796 - root - INFO - step: 22145 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 12:09:05,796 - root - INFO - lr: 2.4041e-05 gnorm: 1.02 [13:34:55<10:57:03] +[titan] 2025-10-05 12:09:14,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:09:16,655 - root - INFO - step: 22150 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 12:09:16,655 - root - INFO - lr: 2.4032e-05 gnorm: 1.06 [13:35:06<10:56:52] +[titan] 2025-10-05 12:09:27,522 - root - INFO - step: 22155 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8661 +[titan] 2025-10-05 12:09:27,522 - root - INFO - lr: 2.4024e-05 gnorm: 1.02 [13:35:17<10:56:40] +[titan] 2025-10-05 12:09:38,432 - root - INFO - step: 22160 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 12:09:38,432 - root - INFO - lr: 2.4015e-05 gnorm: 1.04 [13:35:28<10:56:29] +[titan] 2025-10-05 12:09:49,301 - root - INFO - step: 22165 loss: 2.1166 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:09:49,302 - root - INFO - lr: 2.4006e-05 gnorm: 1.12 [13:35:39<10:56:18] +[titan] 2025-10-05 12:10:00,224 - root - INFO - step: 22170 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:10:00,224 - root - INFO - lr: 2.3997e-05 gnorm: 1.09 [13:35:49<10:56:07] +[titan] 2025-10-05 12:10:11,087 - root - INFO - step: 22175 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:10:11,087 - root - INFO - lr: 2.3988e-05 gnorm: 1.05 [13:36:00<10:55:56] +[titan] 2025-10-05 12:10:21,968 - root - INFO - step: 22180 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9855 +[titan] 2025-10-05 12:10:21,968 - root - INFO - lr: 2.3979e-05 gnorm: 1.15 [13:36:11<10:55:45] +[titan] 2025-10-05 12:10:32,857 - root - INFO - step: 22185 loss: 2.1657 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9136 +[titan] 2025-10-05 12:10:32,857 - root - INFO - lr: 2.3971e-05 gnorm: 1.08 [13:36:22<10:55:33] +[titan] 2025-10-05 12:10:43,721 - root - INFO - step: 22190 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:10:43,721 - root - INFO - lr: 2.3962e-05 gnorm: 1.07 [13:36:33<10:55:22] +[titan] 2025-10-05 12:10:54,626 - root - INFO - step: 22195 loss: 2.1296 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 12:10:54,626 - root - INFO - lr: 2.3953e-05 gnorm: 1.11 [13:36:44<10:55:11] +[titan] 2025-10-05 12:11:03,352 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:11:05,537 - root - INFO - step: 22200 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 12:11:05,537 - root - INFO - lr: 2.3944e-05 gnorm: 1.09 [13:36:55<10:55:00] +[titan] 2025-10-05 12:11:16,410 - root - INFO - step: 22205 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 12:11:16,410 - root - INFO - lr: 2.3935e-05 gnorm: 1.10 [13:37:06<10:54:49] +[titan] 2025-10-05 12:11:27,277 - root - INFO - step: 22210 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 12:11:27,277 - root - INFO - lr: 2.3927e-05 gnorm: 1.10 [13:37:17<10:54:38] +[titan] 2025-10-05 12:11:38,149 - root - INFO - step: 22215 loss: 2.0858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8426 +[titan] 2025-10-05 12:11:38,149 - root - INFO - lr: 2.3918e-05 gnorm: 1.08 [13:37:27<10:54:27] +[titan] 2025-10-05 12:11:49,017 - root - INFO - step: 22220 loss: 2.1032 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 12:11:49,017 - root - INFO - lr: 2.3909e-05 gnorm: 1.08 [13:37:38<10:54:15] +[titan] 2025-10-05 12:11:59,905 - root - INFO - step: 22225 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8675 +[titan] 2025-10-05 12:11:59,905 - root - INFO - lr: 2.3900e-05 gnorm: 1.08 [13:37:49<10:54:04] +[titan] 2025-10-05 12:12:10,823 - root - INFO - step: 22230 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:12:10,823 - root - INFO - lr: 2.3891e-05 gnorm: 1.09 [13:38:00<10:53:53] +[titan] 2025-10-05 12:12:21,651 - root - INFO - step: 22235 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 12:12:21,651 - root - INFO - lr: 2.3883e-05 gnorm: 1.08 [13:38:11<10:53:42] +[titan] 2025-10-05 12:12:32,529 - root - INFO - step: 22240 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 12:12:32,530 - root - INFO - lr: 2.3874e-05 gnorm: 1.11 [13:38:22<10:53:31] +[titan] 2025-10-05 12:12:43,387 - root - INFO - step: 22245 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 12:12:43,388 - root - INFO - lr: 2.3865e-05 gnorm: 1.06 [13:38:33<10:53:19] +[titan] 2025-10-05 12:12:52,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:12:54,236 - root - INFO - step: 22250 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 12:12:54,236 - root - INFO - lr: 2.3856e-05 gnorm: 1.09 [13:38:43<10:53:08] +[titan] 2025-10-05 12:13:05,203 - root - INFO - step: 22255 loss: 2.2062 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 12:13:05,204 - root - INFO - lr: 2.3847e-05 gnorm: 1.10 [13:38:54<10:52:57] +[titan] 2025-10-05 12:13:16,105 - root - INFO - step: 22260 loss: 2.0839 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8418 +[titan] 2025-10-05 12:13:16,105 - root - INFO - lr: 2.3838e-05 gnorm: 1.06 [13:39:05<10:52:46] +[titan] 2025-10-05 12:13:26,969 - root - INFO - step: 22265 loss: 2.1143 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 12:13:26,969 - root - INFO - lr: 2.3830e-05 gnorm: 1.09 [13:39:16<10:52:35] +[titan] 2025-10-05 12:13:37,833 - root - INFO - step: 22270 loss: 2.1822 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 12:13:37,833 - root - INFO - lr: 2.3821e-05 gnorm: 1.10 [13:39:27<10:52:24] +[titan] 2025-10-05 12:13:48,696 - root - INFO - step: 22275 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 12:13:48,696 - root - INFO - lr: 2.3812e-05 gnorm: 1.05 [13:39:38<10:52:12] +[titan] 2025-10-05 12:13:59,557 - root - INFO - step: 22280 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 12:13:59,557 - root - INFO - lr: 2.3803e-05 gnorm: 1.06 [13:39:49<10:52:01] +[titan] 2025-10-05 12:14:10,442 - root - INFO - step: 22285 loss: 2.1340 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:14:10,442 - root - INFO - lr: 2.3794e-05 gnorm: 1.08 [13:40:00<10:51:50] +[titan] 2025-10-05 12:14:21,358 - root - INFO - step: 22290 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:14:21,358 - root - INFO - lr: 2.3786e-05 gnorm: 1.08 [13:40:11<10:51:39] +[titan] 2025-10-05 12:14:32,225 - root - INFO - step: 22295 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 12:14:32,225 - root - INFO - lr: 2.3777e-05 gnorm: 1.07 [13:40:21<10:51:28] +[titan] 2025-10-05 12:14:40,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:14:43,076 - root - INFO - step: 22300 loss: 2.0949 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 12:14:43,076 - root - INFO - lr: 2.3768e-05 gnorm: 1.08 [13:40:32<10:51:17] +[titan] 2025-10-05 12:14:53,944 - root - INFO - step: 22305 loss: 2.2081 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 12:14:53,944 - root - INFO - lr: 2.3759e-05 gnorm: 1.05 [13:40:43<10:51:05] +[titan] 2025-10-05 12:15:04,844 - root - INFO - step: 22310 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 12:15:04,844 - root - INFO - lr: 2.3750e-05 gnorm: 1.11 [13:40:54<10:50:54] +[titan] 2025-10-05 12:15:15,693 - root - INFO - step: 22315 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:15:15,693 - root - INFO - lr: 2.3742e-05 gnorm: 1.06 [13:41:05<10:50:43] +[titan] 2025-10-05 12:15:26,567 - root - INFO - step: 22320 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 12:15:26,567 - root - INFO - lr: 2.3733e-05 gnorm: 1.04 [13:41:16<10:50:32] +[titan] 2025-10-05 12:15:37,421 - root - INFO - step: 22325 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 12:15:37,422 - root - INFO - lr: 2.3724e-05 gnorm: 1.08 [13:41:27<10:50:21] +[titan] 2025-10-05 12:15:48,281 - root - INFO - step: 22330 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 12:15:48,281 - root - INFO - lr: 2.3715e-05 gnorm: 1.08 [13:41:37<10:50:10] +[titan] 2025-10-05 12:15:59,149 - root - INFO - step: 22335 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:15:59,149 - root - INFO - lr: 2.3706e-05 gnorm: 1.04 [13:41:48<10:49:58] +[titan] 2025-10-05 12:16:10,046 - root - INFO - step: 22340 loss: 2.0616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 12:16:10,046 - root - INFO - lr: 2.3698e-05 gnorm: 1.10 [13:41:59<10:49:47] +[titan] 2025-10-05 12:16:20,913 - root - INFO - step: 22345 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 12:16:20,913 - root - INFO - lr: 2.3689e-05 gnorm: 1.09 [13:42:10<10:49:36] +[titan] 2025-10-05 12:16:29,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:16:31,773 - root - INFO - step: 22350 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:16:31,773 - root - INFO - lr: 2.3680e-05 gnorm: 1.09 [13:42:21<10:49:25] +[titan] 2025-10-05 12:16:42,676 - root - INFO - step: 22355 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8541 +[titan] 2025-10-05 12:16:42,676 - root - INFO - lr: 2.3671e-05 gnorm: 1.06 [13:42:32<10:49:14] +[titan] 2025-10-05 12:16:53,529 - root - INFO - step: 22360 loss: 2.1363 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:16:53,529 - root - INFO - lr: 2.3662e-05 gnorm: 1.08 [13:42:43<10:49:03] +[titan] 2025-10-05 12:17:04,373 - root - INFO - step: 22365 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:17:04,373 - root - INFO - lr: 2.3654e-05 gnorm: 1.08 [13:42:54<10:48:51] +[titan] 2025-10-05 12:17:15,272 - root - INFO - step: 22370 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:17:15,272 - root - INFO - lr: 2.3645e-05 gnorm: 1.06 [13:43:04<10:48:40] +[titan] 2025-10-05 12:17:26,145 - root - INFO - step: 22375 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 12:17:26,145 - root - INFO - lr: 2.3636e-05 gnorm: 1.05 [13:43:15<10:48:29] +[titan] 2025-10-05 12:17:36,995 - root - INFO - step: 22380 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 12:17:36,995 - root - INFO - lr: 2.3627e-05 gnorm: 1.07 [13:43:26<10:48:18] +[titan] 2025-10-05 12:17:47,877 - root - INFO - step: 22385 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:17:47,877 - root - INFO - lr: 2.3619e-05 gnorm: 1.08 [13:43:37<10:48:07] +[titan] 2025-10-05 12:17:58,732 - root - INFO - step: 22390 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8286 +[titan] 2025-10-05 12:17:58,732 - root - INFO - lr: 2.3610e-05 gnorm: 1.02 [13:43:48<10:47:56] +[titan] 2025-10-05 12:18:09,619 - root - INFO - step: 22395 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8656 +[titan] 2025-10-05 12:18:09,619 - root - INFO - lr: 2.3601e-05 gnorm: 1.08 [13:43:59<10:47:44] +[titan] 2025-10-05 12:18:18,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:18:20,460 - root - INFO - step: 22400 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8304 +[titan] 2025-10-05 12:18:20,461 - root - INFO - lr: 2.3592e-05 gnorm: 1.03 [13:44:10<10:47:33] +[titan] 2025-10-05 12:18:31,320 - root - INFO - step: 22405 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:18:31,320 - root - INFO - lr: 2.3583e-05 gnorm: 1.06 [13:44:21<10:47:22] +[titan] 2025-10-05 12:18:42,176 - root - INFO - step: 22410 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 12:18:42,176 - root - INFO - lr: 2.3575e-05 gnorm: 1.06 [13:44:31<10:47:11] +[titan] 2025-10-05 12:18:53,029 - root - INFO - step: 22415 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 12:18:53,029 - root - INFO - lr: 2.3566e-05 gnorm: 1.07 [13:44:42<10:47:00] +[titan] 2025-10-05 12:19:03,920 - root - INFO - step: 22420 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 12:19:03,920 - root - INFO - lr: 2.3557e-05 gnorm: 1.04 [13:44:53<10:46:49] +[titan] 2025-10-05 12:19:14,809 - root - INFO - step: 22425 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 12:19:14,809 - root - INFO - lr: 2.3548e-05 gnorm: 1.08 [13:45:04<10:46:37] +[titan] 2025-10-05 12:19:25,666 - root - INFO - step: 22430 loss: 2.1054 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 12:19:25,666 - root - INFO - lr: 2.3539e-05 gnorm: 1.11 [13:45:15<10:46:26] +[titan] 2025-10-05 12:19:36,537 - root - INFO - step: 22435 loss: 2.0990 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:19:36,537 - root - INFO - lr: 2.3531e-05 gnorm: 1.07 [13:45:26<10:46:15] +[titan] 2025-10-05 12:19:47,408 - root - INFO - step: 22440 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:19:47,408 - root - INFO - lr: 2.3522e-05 gnorm: 1.07 [13:45:37<10:46:04] +[titan] 2025-10-05 12:19:58,267 - root - INFO - step: 22445 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:19:58,267 - root - INFO - lr: 2.3513e-05 gnorm: 1.04 [13:45:47<10:45:53] +[titan] 2025-10-05 12:20:06,985 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:20:09,202 - root - INFO - step: 22450 loss: 2.1175 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:20:09,202 - root - INFO - lr: 2.3504e-05 gnorm: 1.08 [13:45:58<10:45:42] +[titan] 2025-10-05 12:20:20,059 - root - INFO - step: 22455 loss: 2.1341 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8869 +[titan] 2025-10-05 12:20:20,059 - root - INFO - lr: 2.3495e-05 gnorm: 1.04 [13:46:09<10:45:30] +[titan] 2025-10-05 12:20:30,913 - root - INFO - step: 22460 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:20:30,913 - root - INFO - lr: 2.3487e-05 gnorm: 1.14 [13:46:20<10:45:19] +[titan] 2025-10-05 12:20:41,788 - root - INFO - step: 22465 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8576 +[titan] 2025-10-05 12:20:41,788 - root - INFO - lr: 2.3478e-05 gnorm: 1.02 [13:46:31<10:45:08] +[titan] 2025-10-05 12:20:52,649 - root - INFO - step: 22470 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 12:20:52,649 - root - INFO - lr: 2.3469e-05 gnorm: 1.04 [13:46:42<10:44:57] +[titan] 2025-10-05 12:21:03,515 - root - INFO - step: 22475 loss: 2.0698 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8297 +[titan] 2025-10-05 12:21:03,515 - root - INFO - lr: 2.3460e-05 gnorm: 1.09 [13:46:53<10:44:46] +[titan] 2025-10-05 12:21:14,426 - root - INFO - step: 22480 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8596 +[titan] 2025-10-05 12:21:14,426 - root - INFO - lr: 2.3452e-05 gnorm: 1.07 [13:47:04<10:44:35] +[titan] 2025-10-05 12:21:25,291 - root - INFO - step: 22485 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8221 +[titan] 2025-10-05 12:21:25,291 - root - INFO - lr: 2.3443e-05 gnorm: 1.07 [13:47:14<10:44:23] +[titan] 2025-10-05 12:21:36,157 - root - INFO - step: 22490 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:21:36,157 - root - INFO - lr: 2.3434e-05 gnorm: 1.07 [13:47:25<10:44:12] +[titan] 2025-10-05 12:21:47,031 - root - INFO - step: 22495 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8952 +[titan] 2025-10-05 12:21:47,032 - root - INFO - lr: 2.3425e-05 gnorm: 1.05 [13:47:36<10:44:01] +[titan] 2025-10-05 12:21:55,725 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:21:57,921 - root - INFO - step: 22500 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 12:21:57,921 - root - INFO - lr: 2.3416e-05 gnorm: 1.10 [13:47:47<10:43:50] +[titan] 2025-10-05 12:22:08,788 - root - INFO - step: 22505 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 12:22:08,789 - root - INFO - lr: 2.3408e-05 gnorm: 1.07 [13:47:58<10:43:39] +[titan] 2025-10-05 12:22:19,750 - root - INFO - step: 22510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 29,895 tflops: 414.75 mfu: 41.94% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:22:19,750 - root - INFO - lr: 2.3399e-05 gnorm: 1.09 [13:48:09<10:43:28] +[titan] 2025-10-05 12:22:30,622 - root - INFO - step: 22515 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:22:30,622 - root - INFO - lr: 2.3390e-05 gnorm: 1.11 [13:48:20<10:43:16] +[titan] 2025-10-05 12:22:41,466 - root - INFO - step: 22520 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 12:22:41,466 - root - INFO - lr: 2.3381e-05 gnorm: 1.06 [13:48:31<10:43:05] +[titan] 2025-10-05 12:22:52,408 - root - INFO - step: 22525 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:22:52,408 - root - INFO - lr: 2.3373e-05 gnorm: 1.06 [13:48:42<10:42:54] +[titan] 2025-10-05 12:22:59,140 - root - INFO - Dumping profiler traces at step 22528 +[titan] 2025-10-05 12:22:59,177 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:23:03,544 - root - INFO - step: 22530 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,426 tflops: 408.25 mfu: 41.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 12:23:03,544 - root - INFO - lr: 2.3364e-05 gnorm: 1.08 [13:48:53<10:42:43] +[titan] 2025-10-05 12:23:14,458 - root - INFO - step: 22535 loss: 2.1311 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8827 +[titan] 2025-10-05 12:23:14,458 - root - INFO - lr: 2.3355e-05 gnorm: 1.33 [13:49:04<10:42:32] +[titan] 2025-10-05 12:23:25,322 - root - INFO - step: 22540 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 12:23:25,322 - root - INFO - lr: 2.3346e-05 gnorm: 1.10 [13:49:15<10:42:21] +[titan] 2025-10-05 12:23:36,189 - root - INFO - step: 22545 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 12:23:36,189 - root - INFO - lr: 2.3338e-05 gnorm: 1.04 [13:49:25<10:42:10] +[titan] 2025-10-05 12:23:44,857 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:23:47,032 - root - INFO - step: 22550 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 12:23:47,032 - root - INFO - lr: 2.3329e-05 gnorm: 1.08 [13:49:36<10:41:59] +[titan] 2025-10-05 12:23:57,904 - root - INFO - step: 22555 loss: 2.0817 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:23:57,904 - root - INFO - lr: 2.3320e-05 gnorm: 1.06 [13:49:47<10:41:47] +[titan] 2025-10-05 12:24:08,764 - root - INFO - step: 22560 loss: 2.0564 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 12:24:08,764 - root - INFO - lr: 2.3311e-05 gnorm: 1.08 [13:49:58<10:41:36] +[titan] 2025-10-05 12:24:19,652 - root - INFO - step: 22565 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8615 +[titan] 2025-10-05 12:24:19,652 - root - INFO - lr: 2.3302e-05 gnorm: 1.10 [13:50:09<10:41:25] +[titan] 2025-10-05 12:24:30,523 - root - INFO - step: 22570 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 12:24:30,523 - root - INFO - lr: 2.3294e-05 gnorm: 1.05 [13:50:20<10:41:14] +[titan] 2025-10-05 12:24:41,397 - root - INFO - step: 22575 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 12:24:41,397 - root - INFO - lr: 2.3285e-05 gnorm: 1.06 [13:50:31<10:41:03] +[titan] 2025-10-05 12:24:52,282 - root - INFO - step: 22580 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8584 +[titan] 2025-10-05 12:24:52,283 - root - INFO - lr: 2.3276e-05 gnorm: 1.02 [13:50:41<10:40:52] +[titan] 2025-10-05 12:25:03,150 - root - INFO - step: 22585 loss: 2.0722 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:25:03,150 - root - INFO - lr: 2.3267e-05 gnorm: 1.07 [13:50:52<10:40:40] +[titan] 2025-10-05 12:25:14,069 - root - INFO - step: 22590 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 12:25:14,069 - root - INFO - lr: 2.3259e-05 gnorm: 1.07 [13:51:03<10:40:29] +[titan] 2025-10-05 12:25:24,944 - root - INFO - step: 22595 loss: 2.0307 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 12:25:24,945 - root - INFO - lr: 2.3250e-05 gnorm: 1.06 [13:51:14<10:40:18] +[titan] 2025-10-05 12:25:33,616 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:25:35,800 - root - INFO - step: 22600 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 12:25:35,800 - root - INFO - lr: 2.3241e-05 gnorm: 1.09 [13:51:25<10:40:07] +[titan] 2025-10-05 12:25:46,666 - root - INFO - step: 22605 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 12:25:46,666 - root - INFO - lr: 2.3232e-05 gnorm: 1.08 [13:51:36<10:39:56] +[titan] 2025-10-05 12:25:57,545 - root - INFO - step: 22610 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8253 +[titan] 2025-10-05 12:25:57,545 - root - INFO - lr: 2.3224e-05 gnorm: 1.04 [13:51:47<10:39:45] +[titan] 2025-10-05 12:26:08,410 - root - INFO - step: 22615 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 12:26:08,411 - root - INFO - lr: 2.3215e-05 gnorm: 1.05 [13:51:58<10:39:33] +[titan] 2025-10-05 12:26:19,368 - root - INFO - step: 22620 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 12:26:19,368 - root - INFO - lr: 2.3206e-05 gnorm: 1.13 [13:52:09<10:39:22] +[titan] 2025-10-05 12:26:30,266 - root - INFO - step: 22625 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8493 +[titan] 2025-10-05 12:26:30,266 - root - INFO - lr: 2.3197e-05 gnorm: 1.07 [13:52:19<10:39:11] +[titan] 2025-10-05 12:26:41,175 - root - INFO - step: 22630 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 12:26:41,175 - root - INFO - lr: 2.3189e-05 gnorm: 1.06 [13:52:30<10:39:00] +[titan] 2025-10-05 12:26:52,070 - root - INFO - step: 22635 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8261 +[titan] 2025-10-05 12:26:52,070 - root - INFO - lr: 2.3180e-05 gnorm: 1.03 [13:52:41<10:38:49] +[titan] 2025-10-05 12:27:02,956 - root - INFO - step: 22640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 12:27:02,956 - root - INFO - lr: 2.3171e-05 gnorm: 1.05 [13:52:52<10:38:38] +[titan] 2025-10-05 12:27:13,822 - root - INFO - step: 22645 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 12:27:13,822 - root - INFO - lr: 2.3162e-05 gnorm: 1.02 [13:53:03<10:38:27] +[titan] 2025-10-05 12:27:22,552 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:27:24,736 - root - INFO - step: 22650 loss: 2.0501 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 12:27:24,736 - root - INFO - lr: 2.3153e-05 gnorm: 1.10 [13:53:14<10:38:15] +[titan] 2025-10-05 12:27:35,626 - root - INFO - step: 22655 loss: 2.0835 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 12:27:35,626 - root - INFO - lr: 2.3145e-05 gnorm: 1.05 [13:53:25<10:38:04] +[titan] 2025-10-05 12:27:46,518 - root - INFO - step: 22660 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:27:46,518 - root - INFO - lr: 2.3136e-05 gnorm: 1.11 [13:53:36<10:37:53] +[titan] 2025-10-05 12:27:57,386 - root - INFO - step: 22665 loss: 2.1687 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9171 +[titan] 2025-10-05 12:27:57,386 - root - INFO - lr: 2.3127e-05 gnorm: 1.10 [13:53:47<10:37:42] +[titan] 2025-10-05 12:28:08,227 - root - INFO - step: 22670 loss: 2.0850 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8425 +[titan] 2025-10-05 12:28:08,228 - root - INFO - lr: 2.3118e-05 gnorm: 1.05 [13:53:57<10:37:31] +[titan] 2025-10-05 12:28:19,140 - root - INFO - step: 22675 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:28:19,140 - root - INFO - lr: 2.3110e-05 gnorm: 1.08 [13:54:08<10:37:20] +[titan] 2025-10-05 12:28:30,016 - root - INFO - step: 22680 loss: 2.1382 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 12:28:30,016 - root - INFO - lr: 2.3101e-05 gnorm: 1.16 [13:54:19<10:37:09] +[titan] 2025-10-05 12:28:40,902 - root - INFO - step: 22685 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8186 +[titan] 2025-10-05 12:28:40,902 - root - INFO - lr: 2.3092e-05 gnorm: 1.08 [13:54:30<10:36:57] +[titan] 2025-10-05 12:28:51,765 - root - INFO - step: 22690 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 12:28:51,766 - root - INFO - lr: 2.3083e-05 gnorm: 1.03 [13:54:41<10:36:46] +[titan] 2025-10-05 12:29:02,626 - root - INFO - step: 22695 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:29:02,626 - root - INFO - lr: 2.3075e-05 gnorm: 1.06 [13:54:52<10:36:35] +[titan] 2025-10-05 12:29:11,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:29:13,457 - root - INFO - step: 22700 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 12:29:13,457 - root - INFO - lr: 2.3066e-05 gnorm: 1.07 [13:55:03<10:36:24] +[titan] 2025-10-05 12:29:24,373 - root - INFO - step: 22705 loss: 2.0814 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:29:24,373 - root - INFO - lr: 2.3057e-05 gnorm: 1.08 [13:55:14<10:36:13] +[titan] 2025-10-05 12:29:35,226 - root - INFO - step: 22710 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 12:29:35,226 - root - INFO - lr: 2.3048e-05 gnorm: 1.06 [13:55:24<10:36:02] +[titan] 2025-10-05 12:29:46,086 - root - INFO - step: 22715 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8506 +[titan] 2025-10-05 12:29:46,087 - root - INFO - lr: 2.3040e-05 gnorm: 1.12 [13:55:35<10:35:50] +[titan] 2025-10-05 12:29:56,956 - root - INFO - step: 22720 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 12:29:56,956 - root - INFO - lr: 2.3031e-05 gnorm: 1.05 [13:55:46<10:35:39] +[titan] 2025-10-05 12:30:07,794 - root - INFO - step: 22725 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8635 +[titan] 2025-10-05 12:30:07,794 - root - INFO - lr: 2.3022e-05 gnorm: 1.08 [13:55:57<10:35:28] +[titan] 2025-10-05 12:30:18,701 - root - INFO - step: 22730 loss: 2.0684 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:30:18,702 - root - INFO - lr: 2.3013e-05 gnorm: 1.06 [13:56:08<10:35:17] +[titan] 2025-10-05 12:30:29,526 - root - INFO - step: 22735 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9056 +[titan] 2025-10-05 12:30:29,526 - root - INFO - lr: 2.3005e-05 gnorm: 1.08 [13:56:19<10:35:06] +[titan] 2025-10-05 12:30:40,389 - root - INFO - step: 22740 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 12:30:40,389 - root - INFO - lr: 2.2996e-05 gnorm: 1.08 [13:56:30<10:34:55] +[titan] 2025-10-05 12:30:51,240 - root - INFO - step: 22745 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 12:30:51,240 - root - INFO - lr: 2.2987e-05 gnorm: 1.07 [13:56:40<10:34:43] +[titan] 2025-10-05 12:30:59,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:31:02,123 - root - INFO - step: 22750 loss: 2.1101 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:31:02,123 - root - INFO - lr: 2.2978e-05 gnorm: 1.09 [13:56:51<10:34:32] +[titan] 2025-10-05 12:31:12,994 - root - INFO - step: 22755 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8489 +[titan] 2025-10-05 12:31:12,994 - root - INFO - lr: 2.2970e-05 gnorm: 1.07 [13:57:02<10:34:21] +[titan] 2025-10-05 12:31:23,866 - root - INFO - step: 22760 loss: 2.0378 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 12:31:23,866 - root - INFO - lr: 2.2961e-05 gnorm: 1.07 [13:57:13<10:34:10] +[titan] 2025-10-05 12:31:34,726 - root - INFO - step: 22765 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8417 +[titan] 2025-10-05 12:31:34,726 - root - INFO - lr: 2.2952e-05 gnorm: 1.06 [13:57:24<10:33:59] +[titan] 2025-10-05 12:31:45,584 - root - INFO - step: 22770 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 12:31:45,585 - root - INFO - lr: 2.2944e-05 gnorm: 1.08 [13:57:35<10:33:48] +[titan] 2025-10-05 12:31:56,424 - root - INFO - step: 22775 loss: 2.0368 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 12:31:56,424 - root - INFO - lr: 2.2935e-05 gnorm: 1.06 [13:57:46<10:33:36] +[titan] 2025-10-05 12:32:07,271 - root - INFO - step: 22780 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8625 +[titan] 2025-10-05 12:32:07,271 - root - INFO - lr: 2.2926e-05 gnorm: 1.09 [13:57:56<10:33:25] +[titan] 2025-10-05 12:32:18,125 - root - INFO - step: 22785 loss: 2.0749 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:32:18,126 - root - INFO - lr: 2.2917e-05 gnorm: 1.06 [13:58:07<10:33:14] +[titan] 2025-10-05 12:32:29,041 - root - INFO - step: 22790 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 12:32:29,041 - root - INFO - lr: 2.2909e-05 gnorm: 1.01 [13:58:18<10:33:03] +[titan] 2025-10-05 12:32:39,901 - root - INFO - step: 22795 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8495 +[titan] 2025-10-05 12:32:39,901 - root - INFO - lr: 2.2900e-05 gnorm: 1.05 [13:58:29<10:32:52] +[titan] 2025-10-05 12:32:48,566 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:32:50,763 - root - INFO - step: 22800 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:32:50,764 - root - INFO - lr: 2.2891e-05 gnorm: 1.04 [13:58:40<10:32:41] +[titan] 2025-10-05 12:33:01,622 - root - INFO - step: 22805 loss: 2.0900 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8482 +[titan] 2025-10-05 12:33:01,622 - root - INFO - lr: 2.2882e-05 gnorm: 1.02 [13:58:51<10:32:29] +[titan] 2025-10-05 12:33:12,469 - root - INFO - step: 22810 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 12:33:12,469 - root - INFO - lr: 2.2874e-05 gnorm: 1.05 [13:59:02<10:32:18] +[titan] 2025-10-05 12:33:23,367 - root - INFO - step: 22815 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8893 +[titan] 2025-10-05 12:33:23,367 - root - INFO - lr: 2.2865e-05 gnorm: 1.08 [13:59:13<10:32:07] +[titan] 2025-10-05 12:33:34,205 - root - INFO - step: 22820 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:33:34,206 - root - INFO - lr: 2.2856e-05 gnorm: 1.08 [13:59:23<10:31:56] +[titan] 2025-10-05 12:33:45,062 - root - INFO - step: 22825 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:33:45,062 - root - INFO - lr: 2.2847e-05 gnorm: 1.06 [13:59:34<10:31:45] +[titan] 2025-10-05 12:33:55,902 - root - INFO - step: 22830 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 12:33:55,902 - root - INFO - lr: 2.2839e-05 gnorm: 1.08 [13:59:45<10:31:34] +[titan] 2025-10-05 12:34:06,747 - root - INFO - step: 22835 loss: 2.0824 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 12:34:06,747 - root - INFO - lr: 2.2830e-05 gnorm: 1.04 [13:59:56<10:31:22] +[titan] 2025-10-05 12:34:17,586 - root - INFO - step: 22840 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8552 +[titan] 2025-10-05 12:34:17,586 - root - INFO - lr: 2.2821e-05 gnorm: 1.04 [14:00:07<10:31:11] +[titan] 2025-10-05 12:34:28,454 - root - INFO - step: 22845 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8428 +[titan] 2025-10-05 12:34:28,454 - root - INFO - lr: 2.2813e-05 gnorm: 1.11 [14:00:18<10:31:00] +[titan] 2025-10-05 12:34:37,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:34:39,324 - root - INFO - step: 22850 loss: 2.0362 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 12:34:39,324 - root - INFO - lr: 2.2804e-05 gnorm: 1.07 [14:00:28<10:30:49] +[titan] 2025-10-05 12:34:50,183 - root - INFO - step: 22855 loss: 2.0829 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:34:50,183 - root - INFO - lr: 2.2795e-05 gnorm: 1.04 [14:00:39<10:30:38] +[titan] 2025-10-05 12:35:01,017 - root - INFO - step: 22860 loss: 1.9834 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 12:35:01,017 - root - INFO - lr: 2.2786e-05 gnorm: 1.01 [14:00:50<10:30:26] +[titan] 2025-10-05 12:35:11,885 - root - INFO - step: 22865 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:35:11,885 - root - INFO - lr: 2.2778e-05 gnorm: 1.04 [14:01:01<10:30:15] +[titan] 2025-10-05 12:35:22,742 - root - INFO - step: 22870 loss: 2.1227 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8759 +[titan] 2025-10-05 12:35:22,742 - root - INFO - lr: 2.2769e-05 gnorm: 1.09 [14:01:12<10:30:04] +[titan] 2025-10-05 12:35:33,625 - root - INFO - step: 22875 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 12:35:33,625 - root - INFO - lr: 2.2760e-05 gnorm: 1.12 [14:01:23<10:29:53] +[titan] 2025-10-05 12:35:44,473 - root - INFO - step: 22880 loss: 2.0907 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8483 +[titan] 2025-10-05 12:35:44,473 - root - INFO - lr: 2.2751e-05 gnorm: 1.09 [14:01:34<10:29:42] +[titan] 2025-10-05 12:35:55,316 - root - INFO - step: 22885 loss: 2.1475 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 12:35:55,316 - root - INFO - lr: 2.2743e-05 gnorm: 1.08 [14:01:44<10:29:31] +[titan] 2025-10-05 12:36:06,165 - root - INFO - step: 22890 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8494 +[titan] 2025-10-05 12:36:06,165 - root - INFO - lr: 2.2734e-05 gnorm: 1.10 [14:01:55<10:29:19] +[titan] 2025-10-05 12:36:17,010 - root - INFO - step: 22895 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:36:17,011 - root - INFO - lr: 2.2725e-05 gnorm: 1.06 [14:02:06<10:29:08] +[titan] 2025-10-05 12:36:25,704 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:36:27,886 - root - INFO - step: 22900 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 12:36:27,887 - root - INFO - lr: 2.2717e-05 gnorm: 1.09 [14:02:17<10:28:57] +[titan] 2025-10-05 12:36:38,741 - root - INFO - step: 22905 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8109 +[titan] 2025-10-05 12:36:38,741 - root - INFO - lr: 2.2708e-05 gnorm: 1.06 [14:02:28<10:28:46] +[titan] 2025-10-05 12:36:49,633 - root - INFO - step: 22910 loss: 2.0954 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:36:49,633 - root - INFO - lr: 2.2699e-05 gnorm: 1.13 [14:02:39<10:28:35] +[titan] 2025-10-05 12:37:00,494 - root - INFO - step: 22915 loss: 2.1261 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8788 +[titan] 2025-10-05 12:37:00,494 - root - INFO - lr: 2.2690e-05 gnorm: 1.09 [14:02:50<10:28:24] +[titan] 2025-10-05 12:37:11,342 - root - INFO - step: 22920 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8270 +[titan] 2025-10-05 12:37:11,343 - root - INFO - lr: 2.2682e-05 gnorm: 1.05 [14:03:01<10:28:12] +[titan] 2025-10-05 12:37:22,183 - root - INFO - step: 22925 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8036 +[titan] 2025-10-05 12:37:22,183 - root - INFO - lr: 2.2673e-05 gnorm: 1.04 [14:03:11<10:28:01] +[titan] 2025-10-05 12:37:33,033 - root - INFO - step: 22930 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9081 +[titan] 2025-10-05 12:37:33,033 - root - INFO - lr: 2.2664e-05 gnorm: 1.08 [14:03:22<10:27:50] +[titan] 2025-10-05 12:37:43,902 - root - INFO - step: 22935 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:37:43,902 - root - INFO - lr: 2.2656e-05 gnorm: 1.04 [14:03:33<10:27:39] +[titan] 2025-10-05 12:37:54,792 - root - INFO - step: 22940 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 12:37:54,792 - root - INFO - lr: 2.2647e-05 gnorm: 1.09 [14:03:44<10:27:28] +[titan] 2025-10-05 12:38:05,627 - root - INFO - step: 22945 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8431 +[titan] 2025-10-05 12:38:05,628 - root - INFO - lr: 2.2638e-05 gnorm: 1.08 [14:03:55<10:27:17] +[titan] 2025-10-05 12:38:14,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:38:16,484 - root - INFO - step: 22950 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7713 +[titan] 2025-10-05 12:38:16,484 - root - INFO - lr: 2.2629e-05 gnorm: 1.08 [14:04:06<10:27:05] +[titan] 2025-10-05 12:38:27,334 - root - INFO - step: 22955 loss: 2.0812 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:38:27,334 - root - INFO - lr: 2.2621e-05 gnorm: 1.09 [14:04:16<10:26:54] +[titan] 2025-10-05 12:38:38,180 - root - INFO - step: 22960 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8473 +[titan] 2025-10-05 12:38:38,180 - root - INFO - lr: 2.2612e-05 gnorm: 1.14 [14:04:27<10:26:43] +[titan] 2025-10-05 12:38:49,045 - root - INFO - step: 22965 loss: 2.0894 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 12:38:49,045 - root - INFO - lr: 2.2603e-05 gnorm: 1.02 [14:04:38<10:26:32] +[titan] 2025-10-05 12:38:59,904 - root - INFO - step: 22970 loss: 2.0347 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7986 +[titan] 2025-10-05 12:38:59,904 - root - INFO - lr: 2.2595e-05 gnorm: 1.08 [14:04:49<10:26:21] +[titan] 2025-10-05 12:39:10,774 - root - INFO - step: 22975 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9048 +[titan] 2025-10-05 12:39:10,774 - root - INFO - lr: 2.2586e-05 gnorm: 1.09 [14:05:00<10:26:10] +[titan] 2025-10-05 12:39:21,640 - root - INFO - step: 22980 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:39:21,640 - root - INFO - lr: 2.2577e-05 gnorm: 1.09 [14:05:11<10:25:59] +[titan] 2025-10-05 12:39:32,525 - root - INFO - step: 22985 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8780 +[titan] 2025-10-05 12:39:32,525 - root - INFO - lr: 2.2568e-05 gnorm: 1.07 [14:05:22<10:25:47] +[titan] 2025-10-05 12:39:43,368 - root - INFO - step: 22990 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 12:39:43,369 - root - INFO - lr: 2.2560e-05 gnorm: 1.08 [14:05:33<10:25:36] +[titan] 2025-10-05 12:39:54,216 - root - INFO - step: 22995 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 12:39:54,217 - root - INFO - lr: 2.2551e-05 gnorm: 1.02 [14:05:43<10:25:25] +[titan] 2025-10-05 12:40:02,883 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:40:05,066 - root - INFO - step: 23000 loss: 2.1507 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 12:40:05,066 - root - INFO - lr: 2.2542e-05 gnorm: 1.06 [14:05:54<10:25:14] +[titan] 2025-10-05 12:40:15,916 - root - INFO - step: 23005 loss: 2.1008 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 12:40:15,916 - root - INFO - lr: 2.2534e-05 gnorm: 1.09 [14:06:05<10:25:03] +[titan] 2025-10-05 12:40:26,775 - root - INFO - step: 23010 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8717 +[titan] 2025-10-05 12:40:26,775 - root - INFO - lr: 2.2525e-05 gnorm: 1.06 [14:06:16<10:24:52] +[titan] 2025-10-05 12:40:37,622 - root - INFO - step: 23015 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8876 +[titan] 2025-10-05 12:40:37,622 - root - INFO - lr: 2.2516e-05 gnorm: 1.06 [14:06:27<10:24:40] +[titan] 2025-10-05 12:40:48,479 - root - INFO - step: 23020 loss: 2.1422 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:40:48,479 - root - INFO - lr: 2.2507e-05 gnorm: 1.08 [14:06:38<10:24:29] +[titan] 2025-10-05 12:40:59,327 - root - INFO - step: 23025 loss: 2.0668 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:40:59,327 - root - INFO - lr: 2.2499e-05 gnorm: 1.05 [14:06:48<10:24:18] +[titan] 2025-10-05 12:41:10,188 - root - INFO - step: 23030 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:41:10,188 - root - INFO - lr: 2.2490e-05 gnorm: 1.06 [14:06:59<10:24:07] +[titan] 2025-10-05 12:41:21,085 - root - INFO - step: 23035 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 12:41:21,085 - root - INFO - lr: 2.2481e-05 gnorm: 1.07 [14:07:10<10:23:56] +[titan] 2025-10-05 12:41:32,099 - root - INFO - step: 23040 loss: 2.1136 memory: 118.84GiB(85.28%) tps: 29,752 tflops: 412.76 mfu: 41.74% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:41:32,100 - root - INFO - lr: 2.2473e-05 gnorm: 1.05 [14:07:21<10:23:45] +[titan] 2025-10-05 12:41:32,280 - root - INFO - Dumping profiler traces at step 23040 +[titan] 2025-10-05 12:41:32,316 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:41:43,199 - root - INFO - step: 23045 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.58 mfu: 41.41% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 12:41:43,199 - root - INFO - lr: 2.2464e-05 gnorm: 1.07 [14:07:32<10:23:34] +[titan] 2025-10-05 12:41:51,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:41:54,062 - root - INFO - step: 23050 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:41:54,062 - root - INFO - lr: 2.2455e-05 gnorm: 1.10 [14:07:43<10:23:23] +[titan] 2025-10-05 12:42:04,939 - root - INFO - step: 23055 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 12:42:04,940 - root - INFO - lr: 2.2447e-05 gnorm: 1.10 [14:07:54<10:23:11] +[titan] 2025-10-05 12:42:15,807 - root - INFO - step: 23060 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 12:42:15,807 - root - INFO - lr: 2.2438e-05 gnorm: 1.09 [14:08:05<10:23:00] +[titan] 2025-10-05 12:42:26,648 - root - INFO - step: 23065 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 12:42:26,648 - root - INFO - lr: 2.2429e-05 gnorm: 1.06 [14:08:16<10:22:49] +[titan] 2025-10-05 12:42:37,523 - root - INFO - step: 23070 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 12:42:37,523 - root - INFO - lr: 2.2420e-05 gnorm: 1.06 [14:08:27<10:22:38] +[titan] 2025-10-05 12:42:48,380 - root - INFO - step: 23075 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 12:42:48,380 - root - INFO - lr: 2.2412e-05 gnorm: 1.08 [14:08:38<10:22:27] +[titan] 2025-10-05 12:42:59,255 - root - INFO - step: 23080 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 12:42:59,255 - root - INFO - lr: 2.2403e-05 gnorm: 1.11 [14:08:48<10:22:16] +[titan] 2025-10-05 12:43:10,104 - root - INFO - step: 23085 loss: 2.0492 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 12:43:10,104 - root - INFO - lr: 2.2394e-05 gnorm: 1.06 [14:08:59<10:22:04] +[titan] 2025-10-05 12:43:20,963 - root - INFO - step: 23090 loss: 2.0906 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8478 +[titan] 2025-10-05 12:43:20,963 - root - INFO - lr: 2.2386e-05 gnorm: 1.07 [14:09:10<10:21:53] +[titan] 2025-10-05 12:43:31,830 - root - INFO - step: 23095 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:43:31,830 - root - INFO - lr: 2.2377e-05 gnorm: 1.06 [14:09:21<10:21:42] +[titan] 2025-10-05 12:43:40,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:43:42,684 - root - INFO - step: 23100 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 12:43:42,684 - root - INFO - lr: 2.2368e-05 gnorm: 1.08 [14:09:32<10:21:31] +[titan] 2025-10-05 12:43:53,521 - root - INFO - step: 23105 loss: 2.1541 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9043 +[titan] 2025-10-05 12:43:53,521 - root - INFO - lr: 2.2360e-05 gnorm: 1.12 [14:09:43<10:21:20] +[titan] 2025-10-05 12:44:04,389 - root - INFO - step: 23110 loss: 2.0636 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:44:04,389 - root - INFO - lr: 2.2351e-05 gnorm: 1.09 [14:09:54<10:21:09] +[titan] 2025-10-05 12:44:15,253 - root - INFO - step: 23115 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 12:44:15,253 - root - INFO - lr: 2.2342e-05 gnorm: 1.06 [14:10:04<10:20:57] +[titan] 2025-10-05 12:44:26,116 - root - INFO - step: 23120 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 12:44:26,116 - root - INFO - lr: 2.2334e-05 gnorm: 1.03 [14:10:15<10:20:46] +[titan] 2025-10-05 12:44:36,986 - root - INFO - step: 23125 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:44:36,986 - root - INFO - lr: 2.2325e-05 gnorm: 1.04 [14:10:26<10:20:35] +[titan] 2025-10-05 12:44:47,859 - root - INFO - step: 23130 loss: 2.1268 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 12:44:47,860 - root - INFO - lr: 2.2316e-05 gnorm: 1.08 [14:10:37<10:20:24] +[titan] 2025-10-05 12:44:58,729 - root - INFO - step: 23135 loss: 2.1048 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 12:44:58,729 - root - INFO - lr: 2.2308e-05 gnorm: 1.10 [14:10:48<10:20:13] +[titan] 2025-10-05 12:45:09,592 - root - INFO - step: 23140 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8194 +[titan] 2025-10-05 12:45:09,592 - root - INFO - lr: 2.2299e-05 gnorm: 1.09 [14:10:59<10:20:02] +[titan] 2025-10-05 12:45:20,444 - root - INFO - step: 23145 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:45:20,444 - root - INFO - lr: 2.2290e-05 gnorm: 1.10 [14:11:10<10:19:51] +[titan] 2025-10-05 12:45:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:45:31,320 - root - INFO - step: 23150 loss: 2.0752 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:45:31,320 - root - INFO - lr: 2.2281e-05 gnorm: 1.05 [14:11:20<10:19:39] +[titan] 2025-10-05 12:45:42,180 - root - INFO - step: 23155 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:45:42,181 - root - INFO - lr: 2.2273e-05 gnorm: 1.10 [14:11:31<10:19:28] +[titan] 2025-10-05 12:45:53,058 - root - INFO - step: 23160 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 12:45:53,058 - root - INFO - lr: 2.2264e-05 gnorm: 1.06 [14:11:42<10:19:17] +[titan] 2025-10-05 12:46:03,966 - root - INFO - step: 23165 loss: 1.9940 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 12:46:03,966 - root - INFO - lr: 2.2255e-05 gnorm: 1.07 [14:11:53<10:19:06] +[titan] 2025-10-05 12:46:14,825 - root - INFO - step: 23170 loss: 2.1123 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8673 +[titan] 2025-10-05 12:46:14,825 - root - INFO - lr: 2.2247e-05 gnorm: 1.06 [14:12:04<10:18:55] +[titan] 2025-10-05 12:46:25,704 - root - INFO - step: 23175 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:46:25,705 - root - INFO - lr: 2.2238e-05 gnorm: 1.10 [14:12:15<10:18:44] +[titan] 2025-10-05 12:46:36,828 - root - INFO - step: 23180 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 29,460 tflops: 408.71 mfu: 41.33% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8561 +[titan] 2025-10-05 12:46:36,828 - root - INFO - lr: 2.2229e-05 gnorm: 1.09 [14:12:26<10:18:33] +[titan] 2025-10-05 12:46:47,680 - root - INFO - step: 23185 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 12:46:47,680 - root - INFO - lr: 2.2221e-05 gnorm: 1.07 [14:12:37<10:18:22] +[titan] 2025-10-05 12:46:58,544 - root - INFO - step: 23190 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8817 +[titan] 2025-10-05 12:46:58,544 - root - INFO - lr: 2.2212e-05 gnorm: 1.07 [14:12:48<10:18:10] +[titan] 2025-10-05 12:47:09,423 - root - INFO - step: 23195 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 12:47:09,424 - root - INFO - lr: 2.2203e-05 gnorm: 1.07 [14:12:59<10:17:59] +[titan] 2025-10-05 12:47:18,105 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:47:20,285 - root - INFO - step: 23200 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:47:20,285 - root - INFO - lr: 2.2195e-05 gnorm: 1.05 [14:13:09<10:17:48] +[titan] 2025-10-05 12:47:31,161 - root - INFO - step: 23205 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:47:31,161 - root - INFO - lr: 2.2186e-05 gnorm: 1.07 [14:13:20<10:17:37] +[titan] 2025-10-05 12:47:42,031 - root - INFO - step: 23210 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 12:47:42,031 - root - INFO - lr: 2.2177e-05 gnorm: 1.08 [14:13:31<10:17:26] +[titan] 2025-10-05 12:47:52,877 - root - INFO - step: 23215 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8652 +[titan] 2025-10-05 12:47:52,877 - root - INFO - lr: 2.2169e-05 gnorm: 1.07 [14:13:42<10:17:15] +[titan] 2025-10-05 12:48:03,720 - root - INFO - step: 23220 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:48:03,720 - root - INFO - lr: 2.2160e-05 gnorm: 1.06 [14:13:53<10:17:03] +[titan] 2025-10-05 12:48:14,569 - root - INFO - step: 23225 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8277 +[titan] 2025-10-05 12:48:14,569 - root - INFO - lr: 2.2151e-05 gnorm: 1.07 [14:14:04<10:16:52] +[titan] 2025-10-05 12:48:25,456 - root - INFO - step: 23230 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:48:25,456 - root - INFO - lr: 2.2143e-05 gnorm: 1.09 [14:14:15<10:16:41] +[titan] 2025-10-05 12:48:36,322 - root - INFO - step: 23235 loss: 2.0597 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 12:48:36,322 - root - INFO - lr: 2.2134e-05 gnorm: 1.05 [14:14:25<10:16:30] +[titan] 2025-10-05 12:48:47,191 - root - INFO - step: 23240 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 12:48:47,191 - root - INFO - lr: 2.2125e-05 gnorm: 1.06 [14:14:36<10:16:19] +[titan] 2025-10-05 12:48:58,072 - root - INFO - step: 23245 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8758 +[titan] 2025-10-05 12:48:58,072 - root - INFO - lr: 2.2117e-05 gnorm: 1.08 [14:14:47<10:16:08] +[titan] 2025-10-05 12:49:06,749 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:49:08,942 - root - INFO - step: 23250 loss: 2.0918 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 12:49:08,943 - root - INFO - lr: 2.2108e-05 gnorm: 1.10 [14:14:58<10:15:57] +[titan] 2025-10-05 12:49:19,822 - root - INFO - step: 23255 loss: 2.1127 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8665 +[titan] 2025-10-05 12:49:19,822 - root - INFO - lr: 2.2099e-05 gnorm: 1.05 [14:15:09<10:15:45] +[titan] 2025-10-05 12:49:30,722 - root - INFO - step: 23260 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8961 +[titan] 2025-10-05 12:49:30,723 - root - INFO - lr: 2.2091e-05 gnorm: 1.10 [14:15:20<10:15:34] +[titan] 2025-10-05 12:49:41,642 - root - INFO - step: 23265 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 12:49:41,642 - root - INFO - lr: 2.2082e-05 gnorm: 1.09 [14:15:31<10:15:23] +[titan] 2025-10-05 12:49:52,513 - root - INFO - step: 23270 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 12:49:52,513 - root - INFO - lr: 2.2073e-05 gnorm: 1.08 [14:15:42<10:15:12] +[titan] 2025-10-05 12:50:03,384 - root - INFO - step: 23275 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8161 +[titan] 2025-10-05 12:50:03,384 - root - INFO - lr: 2.2065e-05 gnorm: 1.07 [14:15:53<10:15:01] +[titan] 2025-10-05 12:50:14,264 - root - INFO - step: 23280 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 12:50:14,264 - root - INFO - lr: 2.2056e-05 gnorm: 1.06 [14:16:03<10:14:50] +[titan] 2025-10-05 12:50:25,152 - root - INFO - step: 23285 loss: 2.1398 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 12:50:25,152 - root - INFO - lr: 2.2047e-05 gnorm: 1.05 [14:16:14<10:14:39] +[titan] 2025-10-05 12:50:36,029 - root - INFO - step: 23290 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 12:50:36,029 - root - INFO - lr: 2.2039e-05 gnorm: 1.05 [14:16:25<10:14:27] +[titan] 2025-10-05 12:50:46,933 - root - INFO - step: 23295 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:50:46,933 - root - INFO - lr: 2.2030e-05 gnorm: 1.10 [14:16:36<10:14:16] +[titan] 2025-10-05 12:50:55,612 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:50:57,805 - root - INFO - step: 23300 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:50:57,805 - root - INFO - lr: 2.2021e-05 gnorm: 1.10 [14:16:47<10:14:05] +[titan] 2025-10-05 12:51:08,673 - root - INFO - step: 23305 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:51:08,673 - root - INFO - lr: 2.2013e-05 gnorm: 1.06 [14:16:58<10:13:54] +[titan] 2025-10-05 12:51:19,553 - root - INFO - step: 23310 loss: 2.0851 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:51:19,553 - root - INFO - lr: 2.2004e-05 gnorm: 1.07 [14:17:09<10:13:43] +[titan] 2025-10-05 12:51:30,434 - root - INFO - step: 23315 loss: 2.0776 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 12:51:30,435 - root - INFO - lr: 2.1995e-05 gnorm: 1.07 [14:17:20<10:13:32] +[titan] 2025-10-05 12:51:41,297 - root - INFO - step: 23320 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:51:41,298 - root - INFO - lr: 2.1987e-05 gnorm: 1.06 [14:17:30<10:13:21] +[titan] 2025-10-05 12:51:52,171 - root - INFO - step: 23325 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8412 +[titan] 2025-10-05 12:51:52,172 - root - INFO - lr: 2.1978e-05 gnorm: 1.06 [14:17:41<10:13:09] +[titan] 2025-10-05 12:52:03,034 - root - INFO - step: 23330 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 12:52:03,034 - root - INFO - lr: 2.1969e-05 gnorm: 1.06 [14:17:52<10:12:58] +[titan] 2025-10-05 12:52:13,904 - root - INFO - step: 23335 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:52:13,905 - root - INFO - lr: 2.1961e-05 gnorm: 1.08 [14:18:03<10:12:47] +[titan] 2025-10-05 12:52:24,765 - root - INFO - step: 23340 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 12:52:24,765 - root - INFO - lr: 2.1952e-05 gnorm: 1.05 [14:18:14<10:12:36] +[titan] 2025-10-05 12:52:35,613 - root - INFO - step: 23345 loss: 2.0713 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 12:52:35,613 - root - INFO - lr: 2.1944e-05 gnorm: 1.08 [14:18:25<10:12:25] +[titan] 2025-10-05 12:52:44,297 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:52:46,481 - root - INFO - step: 23350 loss: 2.0693 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:52:46,481 - root - INFO - lr: 2.1935e-05 gnorm: 1.06 [14:18:36<10:12:14] +[titan] 2025-10-05 12:52:57,341 - root - INFO - step: 23355 loss: 2.1206 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 12:52:57,342 - root - INFO - lr: 2.1926e-05 gnorm: 1.09 [14:18:46<10:12:03] +[titan] 2025-10-05 12:53:08,214 - root - INFO - step: 23360 loss: 2.1012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:53:08,214 - root - INFO - lr: 2.1918e-05 gnorm: 1.05 [14:18:57<10:11:51] +[titan] 2025-10-05 12:53:19,079 - root - INFO - step: 23365 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8477 +[titan] 2025-10-05 12:53:19,079 - root - INFO - lr: 2.1909e-05 gnorm: 1.11 [14:19:08<10:11:40] +[titan] 2025-10-05 12:53:29,939 - root - INFO - step: 23370 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8916 +[titan] 2025-10-05 12:53:29,939 - root - INFO - lr: 2.1900e-05 gnorm: 1.11 [14:19:19<10:11:29] +[titan] 2025-10-05 12:53:40,836 - root - INFO - step: 23375 loss: 2.0922 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 12:53:40,836 - root - INFO - lr: 2.1892e-05 gnorm: 1.13 [14:19:30<10:11:18] +[titan] 2025-10-05 12:53:51,725 - root - INFO - step: 23380 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 12:53:51,725 - root - INFO - lr: 2.1883e-05 gnorm: 1.07 [14:19:41<10:11:07] +[titan] 2025-10-05 12:54:02,631 - root - INFO - step: 23385 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 12:54:02,631 - root - INFO - lr: 2.1874e-05 gnorm: 1.07 [14:19:52<10:10:56] +[titan] 2025-10-05 12:54:13,542 - root - INFO - step: 23390 loss: 2.0791 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:54:13,543 - root - INFO - lr: 2.1866e-05 gnorm: 1.12 [14:20:03<10:10:45] +[titan] 2025-10-05 12:54:24,401 - root - INFO - step: 23395 loss: 2.0662 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 12:54:24,401 - root - INFO - lr: 2.1857e-05 gnorm: 1.08 [14:20:14<10:10:33] +[titan] 2025-10-05 12:54:33,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:54:35,256 - root - INFO - step: 23400 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 12:54:35,256 - root - INFO - lr: 2.1848e-05 gnorm: 1.09 [14:20:24<10:10:22] +[titan] 2025-10-05 12:54:46,105 - root - INFO - step: 23405 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8085 +[titan] 2025-10-05 12:54:46,105 - root - INFO - lr: 2.1840e-05 gnorm: 1.03 [14:20:35<10:10:11] +[titan] 2025-10-05 12:54:56,966 - root - INFO - step: 23410 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8644 +[titan] 2025-10-05 12:54:56,966 - root - INFO - lr: 2.1831e-05 gnorm: 1.09 [14:20:46<10:10:00] +[titan] 2025-10-05 12:55:07,809 - root - INFO - step: 23415 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8747 +[titan] 2025-10-05 12:55:07,810 - root - INFO - lr: 2.1823e-05 gnorm: 1.09 [14:20:57<10:09:49] +[titan] 2025-10-05 12:55:18,648 - root - INFO - step: 23420 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:55:18,648 - root - INFO - lr: 2.1814e-05 gnorm: 1.09 [14:21:08<10:09:38] +[titan] 2025-10-05 12:55:29,531 - root - INFO - step: 23425 loss: 2.1312 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 12:55:29,531 - root - INFO - lr: 2.1805e-05 gnorm: 1.07 [14:21:19<10:09:26] +[titan] 2025-10-05 12:55:40,423 - root - INFO - step: 23430 loss: 2.0740 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:55:40,423 - root - INFO - lr: 2.1797e-05 gnorm: 1.07 [14:21:30<10:09:15] +[titan] 2025-10-05 12:55:51,286 - root - INFO - step: 23435 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 12:55:51,286 - root - INFO - lr: 2.1788e-05 gnorm: 1.05 [14:21:40<10:09:04] +[titan] 2025-10-05 12:56:02,131 - root - INFO - step: 23440 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8791 +[titan] 2025-10-05 12:56:02,131 - root - INFO - lr: 2.1779e-05 gnorm: 1.05 [14:21:51<10:08:53] +[titan] 2025-10-05 12:56:12,982 - root - INFO - step: 23445 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 12:56:12,982 - root - INFO - lr: 2.1771e-05 gnorm: 1.07 [14:22:02<10:08:42] +[titan] 2025-10-05 12:56:21,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:56:23,837 - root - INFO - step: 23450 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 12:56:23,837 - root - INFO - lr: 2.1762e-05 gnorm: 1.03 [14:22:13<10:08:31] +[titan] 2025-10-05 12:56:34,722 - root - INFO - step: 23455 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 12:56:34,722 - root - INFO - lr: 2.1753e-05 gnorm: 1.08 [14:22:24<10:08:20] +[titan] 2025-10-05 12:56:45,579 - root - INFO - step: 23460 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 12:56:45,579 - root - INFO - lr: 2.1745e-05 gnorm: 1.05 [14:22:35<10:08:08] +[titan] 2025-10-05 12:56:56,409 - root - INFO - step: 23465 loss: 2.0982 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 12:56:56,409 - root - INFO - lr: 2.1736e-05 gnorm: 1.07 [14:22:46<10:07:57] +[titan] 2025-10-05 12:57:07,229 - root - INFO - step: 23470 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:57:07,229 - root - INFO - lr: 2.1728e-05 gnorm: 1.08 [14:22:56<10:07:46] +[titan] 2025-10-05 12:57:18,049 - root - INFO - step: 23475 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 12:57:18,049 - root - INFO - lr: 2.1719e-05 gnorm: 1.09 [14:23:07<10:07:35] +[titan] 2025-10-05 12:57:28,860 - root - INFO - step: 23480 loss: 2.0930 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:57:28,860 - root - INFO - lr: 2.1710e-05 gnorm: 1.07 [14:23:18<10:07:24] +[titan] 2025-10-05 12:57:39,712 - root - INFO - step: 23485 loss: 2.1212 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8737 +[titan] 2025-10-05 12:57:39,712 - root - INFO - lr: 2.1702e-05 gnorm: 1.09 [14:23:29<10:07:13] +[titan] 2025-10-05 12:57:50,568 - root - INFO - step: 23490 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 12:57:50,568 - root - INFO - lr: 2.1693e-05 gnorm: 1.06 [14:23:40<10:07:01] +[titan] 2025-10-05 12:58:01,418 - root - INFO - step: 23495 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 12:58:01,418 - root - INFO - lr: 2.1684e-05 gnorm: 1.13 [14:23:51<10:06:50] +[titan] 2025-10-05 12:58:10,087 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:58:12,263 - root - INFO - step: 23500 loss: 2.0793 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:58:12,263 - root - INFO - lr: 2.1676e-05 gnorm: 1.05 [14:24:01<10:06:39] +[titan] 2025-10-05 12:58:23,119 - root - INFO - step: 23505 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 12:58:23,119 - root - INFO - lr: 2.1667e-05 gnorm: 1.09 [14:24:12<10:06:28] +[titan] 2025-10-05 12:58:33,963 - root - INFO - step: 23510 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 12:58:33,964 - root - INFO - lr: 2.1659e-05 gnorm: 1.07 [14:24:23<10:06:17] +[titan] 2025-10-05 12:58:44,834 - root - INFO - step: 23515 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 12:58:44,834 - root - INFO - lr: 2.1650e-05 gnorm: 1.06 [14:24:34<10:06:06] +[titan] 2025-10-05 12:58:55,708 - root - INFO - step: 23520 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8363 +[titan] 2025-10-05 12:58:55,708 - root - INFO - lr: 2.1641e-05 gnorm: 1.05 [14:24:45<10:05:55] +[titan] 2025-10-05 12:59:06,592 - root - INFO - step: 23525 loss: 2.0619 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 12:59:06,592 - root - INFO - lr: 2.1633e-05 gnorm: 1.06 [14:24:56<10:05:43] +[titan] 2025-10-05 12:59:17,459 - root - INFO - step: 23530 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8941 +[titan] 2025-10-05 12:59:17,459 - root - INFO - lr: 2.1624e-05 gnorm: 1.08 [14:25:07<10:05:32] +[titan] 2025-10-05 12:59:28,332 - root - INFO - step: 23535 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:59:28,332 - root - INFO - lr: 2.1616e-05 gnorm: 1.07 [14:25:17<10:05:21] +[titan] 2025-10-05 12:59:39,189 - root - INFO - step: 23540 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 12:59:39,190 - root - INFO - lr: 2.1607e-05 gnorm: 1.06 [14:25:28<10:05:10] +[titan] 2025-10-05 12:59:50,087 - root - INFO - step: 23545 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8219 +[titan] 2025-10-05 12:59:50,087 - root - INFO - lr: 2.1598e-05 gnorm: 1.07 [14:25:39<10:04:59] +[titan] 2025-10-05 12:59:58,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:00:01,039 - root - INFO - step: 23550 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 13:00:01,039 - root - INFO - lr: 2.1590e-05 gnorm: 1.13 [14:25:50<10:04:48] +[titan] 2025-10-05 13:00:05,590 - root - INFO - Dumping profiler traces at step 23552 +[titan] 2025-10-05 13:00:05,635 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:00:12,148 - root - INFO - step: 23555 loss: 2.0620 memory: 118.84GiB(85.28%) tps: 29,498 tflops: 409.24 mfu: 41.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 13:00:12,148 - root - INFO - lr: 2.1581e-05 gnorm: 1.07 [14:26:01<10:04:37] +[titan] 2025-10-05 13:00:23,000 - root - INFO - step: 23560 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 13:00:23,000 - root - INFO - lr: 2.1572e-05 gnorm: 1.11 [14:26:12<10:04:26] +[titan] 2025-10-05 13:00:33,832 - root - INFO - step: 23565 loss: 2.1010 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.69 mfu: 42.44% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 13:00:33,833 - root - INFO - lr: 2.1564e-05 gnorm: 1.09 [14:26:23<10:04:14] +[titan] 2025-10-05 13:00:44,700 - root - INFO - step: 23570 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 13:00:44,700 - root - INFO - lr: 2.1555e-05 gnorm: 1.03 [14:26:34<10:04:03] +[titan] 2025-10-05 13:00:55,558 - root - INFO - step: 23575 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 13:00:55,558 - root - INFO - lr: 2.1547e-05 gnorm: 1.08 [14:26:45<10:03:52] +[titan] 2025-10-05 13:01:06,406 - root - INFO - step: 23580 loss: 2.1114 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 13:01:06,406 - root - INFO - lr: 2.1538e-05 gnorm: 1.10 [14:26:56<10:03:41] +[titan] 2025-10-05 13:01:17,310 - root - INFO - step: 23585 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 13:01:17,310 - root - INFO - lr: 2.1529e-05 gnorm: 1.11 [14:27:06<10:03:30] +[titan] 2025-10-05 13:01:28,160 - root - INFO - step: 23590 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:01:28,161 - root - INFO - lr: 2.1521e-05 gnorm: 1.07 [14:27:17<10:03:19] +[titan] 2025-10-05 13:01:39,026 - root - INFO - step: 23595 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 13:01:39,026 - root - INFO - lr: 2.1512e-05 gnorm: 1.08 [14:27:28<10:03:08] +[titan] 2025-10-05 13:01:47,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:01:49,899 - root - INFO - step: 23600 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 13:01:49,899 - root - INFO - lr: 2.1504e-05 gnorm: 1.10 [14:27:39<10:02:56] +[titan] 2025-10-05 13:02:00,769 - root - INFO - step: 23605 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 13:02:00,769 - root - INFO - lr: 2.1495e-05 gnorm: 1.04 [14:27:50<10:02:45] +[titan] 2025-10-05 13:02:11,618 - root - INFO - step: 23610 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 13:02:11,618 - root - INFO - lr: 2.1486e-05 gnorm: 1.09 [14:28:01<10:02:34] +[titan] 2025-10-05 13:02:22,509 - root - INFO - step: 23615 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 13:02:22,509 - root - INFO - lr: 2.1478e-05 gnorm: 1.07 [14:28:12<10:02:23] +[titan] 2025-10-05 13:02:33,369 - root - INFO - step: 23620 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 13:02:33,369 - root - INFO - lr: 2.1469e-05 gnorm: 1.06 [14:28:22<10:02:12] +[titan] 2025-10-05 13:02:44,239 - root - INFO - step: 23625 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8846 +[titan] 2025-10-05 13:02:44,239 - root - INFO - lr: 2.1461e-05 gnorm: 1.08 [14:28:33<10:02:01] +[titan] 2025-10-05 13:02:55,117 - root - INFO - step: 23630 loss: 2.0120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:02:55,117 - root - INFO - lr: 2.1452e-05 gnorm: 1.06 [14:28:44<10:01:50] +[titan] 2025-10-05 13:03:05,938 - root - INFO - step: 23635 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 13:03:05,938 - root - INFO - lr: 2.1443e-05 gnorm: 1.04 [14:28:55<10:01:38] +[titan] 2025-10-05 13:03:16,775 - root - INFO - step: 23640 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 13:03:16,775 - root - INFO - lr: 2.1435e-05 gnorm: 1.06 [14:29:06<10:01:27] +[titan] 2025-10-05 13:03:27,645 - root - INFO - step: 23645 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8228 +[titan] 2025-10-05 13:03:27,645 - root - INFO - lr: 2.1426e-05 gnorm: 1.08 [14:29:17<10:01:16] +[titan] 2025-10-05 13:03:36,315 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:03:38,490 - root - INFO - step: 23650 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 13:03:38,491 - root - INFO - lr: 2.1418e-05 gnorm: 1.14 [14:29:28<10:01:05] +[titan] 2025-10-05 13:03:49,367 - root - INFO - step: 23655 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 13:03:49,367 - root - INFO - lr: 2.1409e-05 gnorm: 1.07 [14:29:38<10:00:54] +[titan] 2025-10-05 13:04:00,220 - root - INFO - step: 23660 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 13:04:00,220 - root - INFO - lr: 2.1400e-05 gnorm: 1.05 [14:29:49<10:00:43] +[titan] 2025-10-05 13:04:11,080 - root - INFO - step: 23665 loss: 2.0569 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:04:11,080 - root - INFO - lr: 2.1392e-05 gnorm: 1.05 [14:30:00<10:00:32] +[titan] 2025-10-05 13:04:21,931 - root - INFO - step: 23670 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 13:04:21,931 - root - INFO - lr: 2.1383e-05 gnorm: 1.08 [14:30:11<10:00:20] +[titan] 2025-10-05 13:04:32,799 - root - INFO - step: 23675 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:04:32,799 - root - INFO - lr: 2.1375e-05 gnorm: 1.09 [14:30:22<10:00:09] +[titan] 2025-10-05 13:04:43,687 - root - INFO - step: 23680 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8405 +[titan] 2025-10-05 13:04:43,687 - root - INFO - lr: 2.1366e-05 gnorm: 1.09 [14:30:33< 9:59:58] +[titan] 2025-10-05 13:04:54,557 - root - INFO - step: 23685 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8546 +[titan] 2025-10-05 13:04:54,557 - root - INFO - lr: 2.1358e-05 gnorm: 1.06 [14:30:44< 9:59:47] +[titan] 2025-10-05 13:05:05,423 - root - INFO - step: 23690 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 13:05:05,423 - root - INFO - lr: 2.1349e-05 gnorm: 1.11 [14:30:55< 9:59:36] +[titan] 2025-10-05 13:05:16,292 - root - INFO - step: 23695 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 13:05:16,292 - root - INFO - lr: 2.1340e-05 gnorm: 1.07 [14:31:05< 9:59:25] +[titan] 2025-10-05 13:05:24,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:05:27,152 - root - INFO - step: 23700 loss: 2.0847 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 13:05:27,152 - root - INFO - lr: 2.1332e-05 gnorm: 1.06 [14:31:16< 9:59:14] +[titan] 2025-10-05 13:05:38,037 - root - INFO - step: 23705 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 13:05:38,037 - root - INFO - lr: 2.1323e-05 gnorm: 1.07 [14:31:27< 9:59:02] +[titan] 2025-10-05 13:05:48,993 - root - INFO - step: 23710 loss: 2.0935 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8500 +[titan] 2025-10-05 13:05:48,993 - root - INFO - lr: 2.1315e-05 gnorm: 1.06 [14:31:38< 9:58:51] +[titan] 2025-10-05 13:05:59,853 - root - INFO - step: 23715 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 13:05:59,853 - root - INFO - lr: 2.1306e-05 gnorm: 1.12 [14:31:49< 9:58:40] +[titan] 2025-10-05 13:06:10,728 - root - INFO - step: 23720 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 13:06:10,729 - root - INFO - lr: 2.1297e-05 gnorm: 1.05 [14:32:00< 9:58:29] +[titan] 2025-10-05 13:06:21,603 - root - INFO - step: 23725 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8106 +[titan] 2025-10-05 13:06:21,603 - root - INFO - lr: 2.1289e-05 gnorm: 1.04 [14:32:11< 9:58:18] +[titan] 2025-10-05 13:06:32,482 - root - INFO - step: 23730 loss: 2.0312 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 13:06:32,482 - root - INFO - lr: 2.1280e-05 gnorm: 1.09 [14:32:22< 9:58:07] +[titan] 2025-10-05 13:06:43,351 - root - INFO - step: 23735 loss: 2.0992 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 13:06:43,351 - root - INFO - lr: 2.1272e-05 gnorm: 1.09 [14:32:32< 9:57:56] +[titan] 2025-10-05 13:06:54,243 - root - INFO - step: 23740 loss: 2.0278 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 13:06:54,243 - root - INFO - lr: 2.1263e-05 gnorm: 1.08 [14:32:43< 9:57:45] +[titan] 2025-10-05 13:07:05,147 - root - INFO - step: 23745 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:07:05,147 - root - INFO - lr: 2.1255e-05 gnorm: 1.08 [14:32:54< 9:57:33] +[titan] 2025-10-05 13:07:13,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:07:16,019 - root - INFO - step: 23750 loss: 2.0022 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 13:07:16,019 - root - INFO - lr: 2.1246e-05 gnorm: 1.06 [14:33:05< 9:57:22] +[titan] 2025-10-05 13:07:26,891 - root - INFO - step: 23755 loss: 2.0412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 13:07:26,891 - root - INFO - lr: 2.1237e-05 gnorm: 1.10 [14:33:16< 9:57:11] +[titan] 2025-10-05 13:07:37,753 - root - INFO - step: 23760 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:07:37,753 - root - INFO - lr: 2.1229e-05 gnorm: 1.10 [14:33:27< 9:57:00] +[titan] 2025-10-05 13:07:48,618 - root - INFO - step: 23765 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 13:07:48,618 - root - INFO - lr: 2.1220e-05 gnorm: 1.07 [14:33:38< 9:56:49] +[titan] 2025-10-05 13:07:59,505 - root - INFO - step: 23770 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:07:59,505 - root - INFO - lr: 2.1212e-05 gnorm: 1.10 [14:33:49< 9:56:38] +[titan] 2025-10-05 13:08:10,407 - root - INFO - step: 23775 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 13:08:10,408 - root - INFO - lr: 2.1203e-05 gnorm: 1.07 [14:33:59< 9:56:27] +[titan] 2025-10-05 13:08:21,270 - root - INFO - step: 23780 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 13:08:21,271 - root - INFO - lr: 2.1195e-05 gnorm: 1.12 [14:34:10< 9:56:15] +[titan] 2025-10-05 13:08:32,141 - root - INFO - step: 23785 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:08:32,141 - root - INFO - lr: 2.1186e-05 gnorm: 1.07 [14:34:21< 9:56:04] +[titan] 2025-10-05 13:08:43,013 - root - INFO - step: 23790 loss: 2.0543 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8158 +[titan] 2025-10-05 13:08:43,013 - root - INFO - lr: 2.1177e-05 gnorm: 1.08 [14:34:32< 9:55:53] +[titan] 2025-10-05 13:08:53,898 - root - INFO - step: 23795 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 13:08:53,898 - root - INFO - lr: 2.1169e-05 gnorm: 1.08 [14:34:43< 9:55:42] +[titan] 2025-10-05 13:09:02,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:09:04,767 - root - INFO - step: 23800 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:09:04,767 - root - INFO - lr: 2.1160e-05 gnorm: 1.07 [14:34:54< 9:55:31] +[titan] 2025-10-05 13:09:15,675 - root - INFO - step: 23805 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:09:15,675 - root - INFO - lr: 2.1152e-05 gnorm: 1.07 [14:35:05< 9:55:20] +[titan] 2025-10-05 13:09:26,546 - root - INFO - step: 23810 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8689 +[titan] 2025-10-05 13:09:26,546 - root - INFO - lr: 2.1143e-05 gnorm: 1.06 [14:35:16< 9:55:09] +[titan] 2025-10-05 13:09:37,416 - root - INFO - step: 23815 loss: 2.0689 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:09:37,416 - root - INFO - lr: 2.1135e-05 gnorm: 1.04 [14:35:26< 9:54:58] +[titan] 2025-10-05 13:09:48,302 - root - INFO - step: 23820 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 13:09:48,302 - root - INFO - lr: 2.1126e-05 gnorm: 1.05 [14:35:37< 9:54:46] +[titan] 2025-10-05 13:09:59,200 - root - INFO - step: 23825 loss: 2.1145 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8692 +[titan] 2025-10-05 13:09:59,200 - root - INFO - lr: 2.1118e-05 gnorm: 1.10 [14:35:48< 9:54:35] +[titan] 2025-10-05 13:10:10,087 - root - INFO - step: 23830 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:10:10,087 - root - INFO - lr: 2.1109e-05 gnorm: 1.07 [14:35:59< 9:54:24] +[titan] 2025-10-05 13:10:20,968 - root - INFO - step: 23835 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8802 +[titan] 2025-10-05 13:10:20,968 - root - INFO - lr: 2.1100e-05 gnorm: 1.11 [14:36:10< 9:54:13] +[titan] 2025-10-05 13:10:31,877 - root - INFO - step: 23840 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 13:10:31,877 - root - INFO - lr: 2.1092e-05 gnorm: 1.07 [14:36:21< 9:54:02] +[titan] 2025-10-05 13:10:42,750 - root - INFO - step: 23845 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8826 +[titan] 2025-10-05 13:10:42,751 - root - INFO - lr: 2.1083e-05 gnorm: 1.08 [14:36:32< 9:53:51] +[titan] 2025-10-05 13:10:51,448 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:10:53,641 - root - INFO - step: 23850 loss: 2.0254 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 13:10:53,642 - root - INFO - lr: 2.1075e-05 gnorm: 1.07 [14:36:43< 9:53:40] +[titan] 2025-10-05 13:11:04,523 - root - INFO - step: 23855 loss: 2.0986 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 13:11:04,523 - root - INFO - lr: 2.1066e-05 gnorm: 1.09 [14:36:54< 9:53:29] +[titan] 2025-10-05 13:11:15,407 - root - INFO - step: 23860 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 13:11:15,407 - root - INFO - lr: 2.1058e-05 gnorm: 1.07 [14:37:04< 9:53:17] +[titan] 2025-10-05 13:11:26,299 - root - INFO - step: 23865 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8171 +[titan] 2025-10-05 13:11:26,299 - root - INFO - lr: 2.1049e-05 gnorm: 1.08 [14:37:15< 9:53:06] +[titan] 2025-10-05 13:11:37,198 - root - INFO - step: 23870 loss: 2.1119 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:11:37,198 - root - INFO - lr: 2.1041e-05 gnorm: 1.10 [14:37:26< 9:52:55] +[titan] 2025-10-05 13:11:48,068 - root - INFO - step: 23875 loss: 2.0789 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 13:11:48,068 - root - INFO - lr: 2.1032e-05 gnorm: 1.03 [14:37:37< 9:52:44] +[titan] 2025-10-05 13:11:58,937 - root - INFO - step: 23880 loss: 2.1572 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9068 +[titan] 2025-10-05 13:11:58,937 - root - INFO - lr: 2.1023e-05 gnorm: 1.10 [14:37:48< 9:52:33] +[titan] 2025-10-05 13:12:09,818 - root - INFO - step: 23885 loss: 2.1050 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:12:09,818 - root - INFO - lr: 2.1015e-05 gnorm: 1.07 [14:37:59< 9:52:22] +[titan] 2025-10-05 13:12:20,691 - root - INFO - step: 23890 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 13:12:20,691 - root - INFO - lr: 2.1006e-05 gnorm: 1.04 [14:38:10< 9:52:11] +[titan] 2025-10-05 13:12:31,575 - root - INFO - step: 23895 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 13:12:31,575 - root - INFO - lr: 2.0998e-05 gnorm: 1.07 [14:38:21< 9:52:00] +[titan] 2025-10-05 13:12:40,266 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:12:42,455 - root - INFO - step: 23900 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 13:12:42,455 - root - INFO - lr: 2.0989e-05 gnorm: 1.07 [14:38:32< 9:51:48] +[titan] 2025-10-05 13:12:53,357 - root - INFO - step: 23905 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 13:12:53,358 - root - INFO - lr: 2.0981e-05 gnorm: 1.10 [14:38:42< 9:51:37] +[titan] 2025-10-05 13:13:04,239 - root - INFO - step: 23910 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8487 +[titan] 2025-10-05 13:13:04,239 - root - INFO - lr: 2.0972e-05 gnorm: 1.07 [14:38:53< 9:51:26] +[titan] 2025-10-05 13:13:15,113 - root - INFO - step: 23915 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 13:13:15,113 - root - INFO - lr: 2.0964e-05 gnorm: 1.10 [14:39:04< 9:51:15] +[titan] 2025-10-05 13:13:25,979 - root - INFO - step: 23920 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 13:13:25,980 - root - INFO - lr: 2.0955e-05 gnorm: 1.05 [14:39:15< 9:51:04] +[titan] 2025-10-05 13:13:36,839 - root - INFO - step: 23925 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8255 +[titan] 2025-10-05 13:13:36,839 - root - INFO - lr: 2.0947e-05 gnorm: 1.08 [14:39:26< 9:50:53] +[titan] 2025-10-05 13:13:47,718 - root - INFO - step: 23930 loss: 2.0539 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 13:13:47,718 - root - INFO - lr: 2.0938e-05 gnorm: 1.07 [14:39:37< 9:50:42] +[titan] 2025-10-05 13:13:58,659 - root - INFO - step: 23935 loss: 2.1295 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 13:13:58,659 - root - INFO - lr: 2.0929e-05 gnorm: 1.09 [14:39:48< 9:50:31] +[titan] 2025-10-05 13:14:09,537 - root - INFO - step: 23940 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7883 +[titan] 2025-10-05 13:14:09,537 - root - INFO - lr: 2.0921e-05 gnorm: 1.06 [14:39:59< 9:50:19] +[titan] 2025-10-05 13:14:20,423 - root - INFO - step: 23945 loss: 2.0391 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8022 +[titan] 2025-10-05 13:14:20,423 - root - INFO - lr: 2.0912e-05 gnorm: 1.08 [14:40:09< 9:50:08] +[titan] 2025-10-05 13:14:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:14:31,294 - root - INFO - step: 23950 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8110 +[titan] 2025-10-05 13:14:31,294 - root - INFO - lr: 2.0904e-05 gnorm: 1.02 [14:40:20< 9:49:57] +[titan] 2025-10-05 13:14:42,149 - root - INFO - step: 23955 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:14:42,149 - root - INFO - lr: 2.0895e-05 gnorm: 1.11 [14:40:31< 9:49:46] +[titan] 2025-10-05 13:14:53,021 - root - INFO - step: 23960 loss: 2.0544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 13:14:53,021 - root - INFO - lr: 2.0887e-05 gnorm: 1.07 [14:40:42< 9:49:35] +[titan] 2025-10-05 13:15:03,924 - root - INFO - step: 23965 loss: 2.0186 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 13:15:03,925 - root - INFO - lr: 2.0878e-05 gnorm: 1.08 [14:40:53< 9:49:24] +[titan] 2025-10-05 13:15:14,778 - root - INFO - step: 23970 loss: 2.0244 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 13:15:14,778 - root - INFO - lr: 2.0870e-05 gnorm: 1.10 [14:41:04< 9:49:13] +[titan] 2025-10-05 13:15:25,658 - root - INFO - step: 23975 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:15:25,658 - root - INFO - lr: 2.0861e-05 gnorm: 1.05 [14:41:15< 9:49:02] +[titan] 2025-10-05 13:15:36,526 - root - INFO - step: 23980 loss: 2.1043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 13:15:36,526 - root - INFO - lr: 2.0853e-05 gnorm: 1.11 [14:41:26< 9:48:50] +[titan] 2025-10-05 13:15:47,390 - root - INFO - step: 23985 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 13:15:47,390 - root - INFO - lr: 2.0844e-05 gnorm: 1.10 [14:41:36< 9:48:39] +[titan] 2025-10-05 13:15:58,289 - root - INFO - step: 23990 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 13:15:58,289 - root - INFO - lr: 2.0836e-05 gnorm: 1.06 [14:41:47< 9:48:28] +[titan] 2025-10-05 13:16:09,157 - root - INFO - step: 23995 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8039 +[titan] 2025-10-05 13:16:09,157 - root - INFO - lr: 2.0827e-05 gnorm: 1.11 [14:41:58< 9:48:17] +[titan] 2025-10-05 13:16:17,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:16:20,053 - root - INFO - step: 24000 loss: 2.0037 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:16:20,053 - root - INFO - lr: 2.0819e-05 gnorm: 1.08 [14:42:09< 9:48:06] +[titan] 2025-10-05 13:16:30,898 - root - INFO - step: 24005 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 13:16:30,899 - root - INFO - lr: 2.0810e-05 gnorm: 1.07 [14:42:20< 9:47:55] +[titan] 2025-10-05 13:16:41,756 - root - INFO - step: 24010 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 13:16:41,757 - root - INFO - lr: 2.0802e-05 gnorm: 1.05 [14:42:31< 9:47:44] +[titan] 2025-10-05 13:16:52,618 - root - INFO - step: 24015 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8347 +[titan] 2025-10-05 13:16:52,618 - root - INFO - lr: 2.0793e-05 gnorm: 1.12 [14:42:42< 9:47:32] +[titan] 2025-10-05 13:17:03,489 - root - INFO - step: 24020 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:17:03,489 - root - INFO - lr: 2.0785e-05 gnorm: 1.10 [14:42:53< 9:47:21] +[titan] 2025-10-05 13:17:14,356 - root - INFO - step: 24025 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 13:17:14,356 - root - INFO - lr: 2.0776e-05 gnorm: 1.08 [14:43:03< 9:47:10] +[titan] 2025-10-05 13:17:25,293 - root - INFO - step: 24030 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 13:17:25,293 - root - INFO - lr: 2.0767e-05 gnorm: 1.14 [14:43:14< 9:46:59] +[titan] 2025-10-05 13:17:36,153 - root - INFO - step: 24035 loss: 2.0553 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8156 +[titan] 2025-10-05 13:17:36,153 - root - INFO - lr: 2.0759e-05 gnorm: 1.07 [14:43:25< 9:46:48] +[titan] 2025-10-05 13:17:47,022 - root - INFO - step: 24040 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 13:17:47,022 - root - INFO - lr: 2.0750e-05 gnorm: 1.08 [14:43:36< 9:46:37] +[titan] 2025-10-05 13:17:57,898 - root - INFO - step: 24045 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 13:17:57,899 - root - INFO - lr: 2.0742e-05 gnorm: 1.07 [14:43:47< 9:46:26] +[titan] 2025-10-05 13:18:06,588 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:18:08,771 - root - INFO - step: 24050 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:18:08,771 - root - INFO - lr: 2.0733e-05 gnorm: 1.05 [14:43:58< 9:46:15] +[titan] 2025-10-05 13:18:19,609 - root - INFO - step: 24055 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 13:18:19,609 - root - INFO - lr: 2.0725e-05 gnorm: 1.10 [14:44:09< 9:46:03] +[titan] 2025-10-05 13:18:30,457 - root - INFO - step: 24060 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 13:18:30,457 - root - INFO - lr: 2.0716e-05 gnorm: 1.12 [14:44:20< 9:45:52] +[titan] 2025-10-05 13:18:39,436 - root - INFO - Dumping profiler traces at step 24064 +[titan] 2025-10-05 13:18:39,479 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:18:41,660 - root - INFO - step: 24065 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,250 tflops: 405.80 mfu: 41.03% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7997 +[titan] 2025-10-05 13:18:41,661 - root - INFO - lr: 2.0708e-05 gnorm: 1.05 [14:44:31< 9:45:41] +[titan] 2025-10-05 13:18:52,499 - root - INFO - step: 24070 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 13:18:52,499 - root - INFO - lr: 2.0699e-05 gnorm: 1.05 [14:44:42< 9:45:30] +[titan] 2025-10-05 13:19:03,398 - root - INFO - step: 24075 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:19:03,398 - root - INFO - lr: 2.0691e-05 gnorm: 1.08 [14:44:52< 9:45:19] +[titan] 2025-10-05 13:19:14,221 - root - INFO - step: 24080 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 13:19:14,221 - root - INFO - lr: 2.0682e-05 gnorm: 1.08 [14:45:03< 9:45:08] +[titan] 2025-10-05 13:19:25,059 - root - INFO - step: 24085 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 13:19:25,059 - root - INFO - lr: 2.0674e-05 gnorm: 1.05 [14:45:14< 9:44:57] +[titan] 2025-10-05 13:19:35,885 - root - INFO - step: 24090 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 13:19:35,885 - root - INFO - lr: 2.0665e-05 gnorm: 1.08 [14:45:25< 9:44:46] +[titan] 2025-10-05 13:19:46,755 - root - INFO - step: 24095 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 13:19:46,755 - root - INFO - lr: 2.0657e-05 gnorm: 1.09 [14:45:36< 9:44:35] +[titan] 2025-10-05 13:19:55,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:19:57,605 - root - INFO - step: 24100 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 13:19:57,605 - root - INFO - lr: 2.0648e-05 gnorm: 1.05 [14:45:47< 9:44:23] +[titan] 2025-10-05 13:20:08,458 - root - INFO - step: 24105 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 13:20:08,458 - root - INFO - lr: 2.0640e-05 gnorm: 1.11 [14:45:58< 9:44:12] +[titan] 2025-10-05 13:20:19,304 - root - INFO - step: 24110 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:20:19,305 - root - INFO - lr: 2.0631e-05 gnorm: 1.04 [14:46:08< 9:44:01] +[titan] 2025-10-05 13:20:30,155 - root - INFO - step: 24115 loss: 2.0297 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 13:20:30,155 - root - INFO - lr: 2.0623e-05 gnorm: 1.07 [14:46:19< 9:43:50] +[titan] 2025-10-05 13:20:41,004 - root - INFO - step: 24120 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:20:41,005 - root - INFO - lr: 2.0614e-05 gnorm: 1.07 [14:46:30< 9:43:39] +[titan] 2025-10-05 13:20:51,867 - root - INFO - step: 24125 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8402 +[titan] 2025-10-05 13:20:51,867 - root - INFO - lr: 2.0606e-05 gnorm: 1.12 [14:46:41< 9:43:28] +[titan] 2025-10-05 13:21:02,698 - root - INFO - step: 24130 loss: 2.0869 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 13:21:02,699 - root - INFO - lr: 2.0597e-05 gnorm: 1.06 [14:46:52< 9:43:17] +[titan] 2025-10-05 13:21:13,527 - root - INFO - step: 24135 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 13:21:13,527 - root - INFO - lr: 2.0589e-05 gnorm: 1.10 [14:47:03< 9:43:05] +[titan] 2025-10-05 13:21:24,355 - root - INFO - step: 24140 loss: 2.0475 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8102 +[titan] 2025-10-05 13:21:24,355 - root - INFO - lr: 2.0580e-05 gnorm: 1.07 [14:47:13< 9:42:54] +[titan] 2025-10-05 13:21:35,208 - root - INFO - step: 24145 loss: 2.1059 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:21:35,208 - root - INFO - lr: 2.0572e-05 gnorm: 1.10 [14:47:24< 9:42:43] +[titan] 2025-10-05 13:21:43,854 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:21:46,037 - root - INFO - step: 24150 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 13:21:46,037 - root - INFO - lr: 2.0563e-05 gnorm: 1.05 [14:47:35< 9:42:32] +[titan] 2025-10-05 13:21:56,862 - root - INFO - step: 24155 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8062 +[titan] 2025-10-05 13:21:56,862 - root - INFO - lr: 2.0555e-05 gnorm: 1.05 [14:47:46< 9:42:21] +[titan] 2025-10-05 13:22:07,697 - root - INFO - step: 24160 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:22:07,697 - root - INFO - lr: 2.0546e-05 gnorm: 1.07 [14:47:57< 9:42:10] +[titan] 2025-10-05 13:22:18,551 - root - INFO - step: 24165 loss: 2.0865 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 13:22:18,552 - root - INFO - lr: 2.0538e-05 gnorm: 1.09 [14:48:08< 9:41:58] +[titan] 2025-10-05 13:22:29,396 - root - INFO - step: 24170 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:22:29,396 - root - INFO - lr: 2.0529e-05 gnorm: 1.08 [14:48:18< 9:41:47] +[titan] 2025-10-05 13:22:40,227 - root - INFO - step: 24175 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8696 +[titan] 2025-10-05 13:22:40,227 - root - INFO - lr: 2.0521e-05 gnorm: 1.09 [14:48:29< 9:41:36] +[titan] 2025-10-05 13:22:51,092 - root - INFO - step: 24180 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 13:22:51,092 - root - INFO - lr: 2.0512e-05 gnorm: 1.09 [14:48:40< 9:41:25] +[titan] 2025-10-05 13:23:01,953 - root - INFO - step: 24185 loss: 1.9953 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 13:23:01,953 - root - INFO - lr: 2.0504e-05 gnorm: 1.07 [14:48:51< 9:41:14] +[titan] 2025-10-05 13:23:12,844 - root - INFO - step: 24190 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 13:23:12,844 - root - INFO - lr: 2.0496e-05 gnorm: 1.15 [14:49:02< 9:41:03] +[titan] 2025-10-05 13:23:23,695 - root - INFO - step: 24195 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 13:23:23,695 - root - INFO - lr: 2.0487e-05 gnorm: 1.07 [14:49:13< 9:40:52] +[titan] 2025-10-05 13:23:32,375 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:23:34,564 - root - INFO - step: 24200 loss: 2.0236 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:23:34,564 - root - INFO - lr: 2.0479e-05 gnorm: 1.07 [14:49:24< 9:40:41] +[titan] 2025-10-05 13:23:45,424 - root - INFO - step: 24205 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 13:23:45,425 - root - INFO - lr: 2.0470e-05 gnorm: 1.07 [14:49:34< 9:40:29] +[titan] 2025-10-05 13:23:56,267 - root - INFO - step: 24210 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 13:23:56,267 - root - INFO - lr: 2.0462e-05 gnorm: 1.03 [14:49:45< 9:40:18] +[titan] 2025-10-05 13:24:07,115 - root - INFO - step: 24215 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8655 +[titan] 2025-10-05 13:24:07,115 - root - INFO - lr: 2.0453e-05 gnorm: 1.12 [14:49:56< 9:40:07] +[titan] 2025-10-05 13:24:17,952 - root - INFO - step: 24220 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 13:24:17,952 - root - INFO - lr: 2.0445e-05 gnorm: 1.13 [14:50:07< 9:39:56] +[titan] 2025-10-05 13:24:28,825 - root - INFO - step: 24225 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8509 +[titan] 2025-10-05 13:24:28,826 - root - INFO - lr: 2.0436e-05 gnorm: 1.06 [14:50:18< 9:39:45] +[titan] 2025-10-05 13:24:39,649 - root - INFO - step: 24230 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 13:24:39,649 - root - INFO - lr: 2.0428e-05 gnorm: 1.10 [14:50:29< 9:39:34] +[titan] 2025-10-05 13:24:50,487 - root - INFO - step: 24235 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 13:24:50,487 - root - INFO - lr: 2.0419e-05 gnorm: 1.07 [14:50:40< 9:39:23] +[titan] 2025-10-05 13:25:01,334 - root - INFO - step: 24240 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:25:01,335 - root - INFO - lr: 2.0411e-05 gnorm: 1.02 [14:50:50< 9:39:11] +[titan] 2025-10-05 13:25:12,172 - root - INFO - step: 24245 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 13:25:12,172 - root - INFO - lr: 2.0402e-05 gnorm: 1.07 [14:51:01< 9:39:00] +[titan] 2025-10-05 13:25:20,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:25:23,024 - root - INFO - step: 24250 loss: 2.1386 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:25:23,025 - root - INFO - lr: 2.0394e-05 gnorm: 1.10 [14:51:12< 9:38:49] +[titan] 2025-10-05 13:25:33,889 - root - INFO - step: 24255 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:25:33,889 - root - INFO - lr: 2.0385e-05 gnorm: 1.11 [14:51:23< 9:38:38] +[titan] 2025-10-05 13:25:44,730 - root - INFO - step: 24260 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 13:25:44,730 - root - INFO - lr: 2.0377e-05 gnorm: 1.07 [14:51:34< 9:38:27] +[titan] 2025-10-05 13:25:55,582 - root - INFO - step: 24265 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 13:25:55,582 - root - INFO - lr: 2.0368e-05 gnorm: 1.07 [14:51:45< 9:38:16] +[titan] 2025-10-05 13:26:06,446 - root - INFO - step: 24270 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:26:06,447 - root - INFO - lr: 2.0360e-05 gnorm: 1.08 [14:51:55< 9:38:05] +[titan] 2025-10-05 13:26:17,296 - root - INFO - step: 24275 loss: 2.0367 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8000 +[titan] 2025-10-05 13:26:17,296 - root - INFO - lr: 2.0352e-05 gnorm: 1.08 [14:52:06< 9:37:53] +[titan] 2025-10-05 13:26:28,151 - root - INFO - step: 24280 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 13:26:28,151 - root - INFO - lr: 2.0343e-05 gnorm: 1.09 [14:52:17< 9:37:42] +[titan] 2025-10-05 13:26:39,051 - root - INFO - step: 24285 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 13:26:39,051 - root - INFO - lr: 2.0335e-05 gnorm: 1.10 [14:52:28< 9:37:31] +[titan] 2025-10-05 13:26:49,902 - root - INFO - step: 24290 loss: 2.0746 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:26:49,902 - root - INFO - lr: 2.0326e-05 gnorm: 1.07 [14:52:39< 9:37:20] +[titan] 2025-10-05 13:27:00,733 - root - INFO - step: 24295 loss: 2.1061 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 13:27:00,734 - root - INFO - lr: 2.0318e-05 gnorm: 1.11 [14:52:50< 9:37:09] +[titan] 2025-10-05 13:27:09,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:27:11,587 - root - INFO - step: 24300 loss: 2.0702 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 13:27:11,587 - root - INFO - lr: 2.0309e-05 gnorm: 1.10 [14:53:01< 9:36:58] +[titan] 2025-10-05 13:27:22,433 - root - INFO - step: 24305 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 13:27:22,433 - root - INFO - lr: 2.0301e-05 gnorm: 1.05 [14:53:11< 9:36:47] +[titan] 2025-10-05 13:27:33,270 - root - INFO - step: 24310 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 13:27:33,270 - root - INFO - lr: 2.0292e-05 gnorm: 1.06 [14:53:22< 9:36:35] +[titan] 2025-10-05 13:27:44,105 - root - INFO - step: 24315 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 13:27:44,105 - root - INFO - lr: 2.0284e-05 gnorm: 1.07 [14:53:33< 9:36:24] +[titan] 2025-10-05 13:27:54,981 - root - INFO - step: 24320 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 13:27:54,981 - root - INFO - lr: 2.0275e-05 gnorm: 1.13 [14:53:44< 9:36:13] +[titan] 2025-10-05 13:28:05,837 - root - INFO - step: 24325 loss: 2.1113 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:28:05,838 - root - INFO - lr: 2.0267e-05 gnorm: 1.14 [14:53:55< 9:36:02] +[titan] 2025-10-05 13:28:16,705 - root - INFO - step: 24330 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 13:28:16,705 - root - INFO - lr: 2.0258e-05 gnorm: 1.05 [14:54:06< 9:35:51] +[titan] 2025-10-05 13:28:27,566 - root - INFO - step: 24335 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8683 +[titan] 2025-10-05 13:28:27,566 - root - INFO - lr: 2.0250e-05 gnorm: 1.15 [14:54:17< 9:35:40] +[titan] 2025-10-05 13:28:38,418 - root - INFO - step: 24340 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:28:38,418 - root - INFO - lr: 2.0242e-05 gnorm: 1.08 [14:54:27< 9:35:29] +[titan] 2025-10-05 13:28:49,296 - root - INFO - step: 24345 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 13:28:49,296 - root - INFO - lr: 2.0233e-05 gnorm: 1.14 [14:54:38< 9:35:18] +[titan] 2025-10-05 13:28:58,013 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:29:00,192 - root - INFO - step: 24350 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:29:00,192 - root - INFO - lr: 2.0225e-05 gnorm: 1.18 [14:54:49< 9:35:06] +[titan] 2025-10-05 13:29:11,072 - root - INFO - step: 24355 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 13:29:11,072 - root - INFO - lr: 2.0216e-05 gnorm: 1.09 [14:55:00< 9:34:55] +[titan] 2025-10-05 13:29:21,925 - root - INFO - step: 24360 loss: 2.1089 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 13:29:21,925 - root - INFO - lr: 2.0208e-05 gnorm: 1.07 [14:55:11< 9:34:44] +[titan] 2025-10-05 13:29:32,780 - root - INFO - step: 24365 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:29:32,781 - root - INFO - lr: 2.0199e-05 gnorm: 1.10 [14:55:22< 9:34:33] +[titan] 2025-10-05 13:29:43,663 - root - INFO - step: 24370 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:29:43,663 - root - INFO - lr: 2.0191e-05 gnorm: 1.10 [14:55:33< 9:34:22] +[titan] 2025-10-05 13:29:54,539 - root - INFO - step: 24375 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:29:54,540 - root - INFO - lr: 2.0182e-05 gnorm: 1.12 [14:55:44< 9:34:11] +[titan] 2025-10-05 13:30:05,417 - root - INFO - step: 24380 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 13:30:05,417 - root - INFO - lr: 2.0174e-05 gnorm: 1.12 [14:55:54< 9:34:00] +[titan] 2025-10-05 13:30:16,350 - root - INFO - step: 24385 loss: 2.1282 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 13:30:16,350 - root - INFO - lr: 2.0166e-05 gnorm: 1.05 [14:56:05< 9:33:49] +[titan] 2025-10-05 13:30:27,217 - root - INFO - step: 24390 loss: 2.0751 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:30:27,217 - root - INFO - lr: 2.0157e-05 gnorm: 1.12 [14:56:16< 9:33:37] +[titan] 2025-10-05 13:30:38,065 - root - INFO - step: 24395 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:30:38,065 - root - INFO - lr: 2.0149e-05 gnorm: 1.08 [14:56:27< 9:33:26] +[titan] 2025-10-05 13:30:46,746 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:30:48,926 - root - INFO - step: 24400 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 13:30:48,926 - root - INFO - lr: 2.0140e-05 gnorm: 1.09 [14:56:38< 9:33:15] +[titan] 2025-10-05 13:30:59,781 - root - INFO - step: 24405 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8166 +[titan] 2025-10-05 13:30:59,781 - root - INFO - lr: 2.0132e-05 gnorm: 1.07 [14:56:49< 9:33:04] +[titan] 2025-10-05 13:31:10,656 - root - INFO - step: 24410 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 13:31:10,656 - root - INFO - lr: 2.0123e-05 gnorm: 1.11 [14:57:00< 9:32:53] +[titan] 2025-10-05 13:31:21,555 - root - INFO - step: 24415 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8268 +[titan] 2025-10-05 13:31:21,555 - root - INFO - lr: 2.0115e-05 gnorm: 1.09 [14:57:11< 9:32:42] +[titan] 2025-10-05 13:31:32,426 - root - INFO - step: 24420 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 13:31:32,426 - root - INFO - lr: 2.0107e-05 gnorm: 1.07 [14:57:21< 9:32:31] +[titan] 2025-10-05 13:31:43,323 - root - INFO - step: 24425 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 13:31:43,323 - root - INFO - lr: 2.0098e-05 gnorm: 1.31 [14:57:32< 9:32:20] +[titan] 2025-10-05 13:31:54,203 - root - INFO - step: 24430 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 13:31:54,204 - root - INFO - lr: 2.0090e-05 gnorm: 1.05 [14:57:43< 9:32:09] +[titan] 2025-10-05 13:32:05,075 - root - INFO - step: 24435 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 13:32:05,075 - root - INFO - lr: 2.0081e-05 gnorm: 1.07 [14:57:54< 9:31:57] +[titan] 2025-10-05 13:32:15,980 - root - INFO - step: 24440 loss: 2.1665 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 13:32:15,980 - root - INFO - lr: 2.0073e-05 gnorm: 1.09 [14:58:05< 9:31:46] +[titan] 2025-10-05 13:32:26,906 - root - INFO - step: 24445 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 13:32:26,906 - root - INFO - lr: 2.0064e-05 gnorm: 1.08 [14:58:16< 9:31:35] +[titan] 2025-10-05 13:32:35,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:32:37,766 - root - INFO - step: 24450 loss: 2.0220 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 13:32:37,766 - root - INFO - lr: 2.0056e-05 gnorm: 1.06 [14:58:27< 9:31:24] +[titan] 2025-10-05 13:32:48,638 - root - INFO - step: 24455 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 13:32:48,638 - root - INFO - lr: 2.0048e-05 gnorm: 1.07 [14:58:38< 9:31:13] +[titan] 2025-10-05 13:32:59,507 - root - INFO - step: 24460 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:32:59,507 - root - INFO - lr: 2.0039e-05 gnorm: 1.07 [14:58:49< 9:31:02] +[titan] 2025-10-05 13:33:10,393 - root - INFO - step: 24465 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:33:10,393 - root - INFO - lr: 2.0031e-05 gnorm: 1.05 [14:58:59< 9:30:51] +[titan] 2025-10-05 13:33:21,345 - root - INFO - step: 24470 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8103 +[titan] 2025-10-05 13:33:21,345 - root - INFO - lr: 2.0022e-05 gnorm: 1.06 [14:59:10< 9:30:40] +[titan] 2025-10-05 13:33:32,228 - root - INFO - step: 24475 loss: 2.0788 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:33:32,228 - root - INFO - lr: 2.0014e-05 gnorm: 1.09 [14:59:21< 9:30:29] +[titan] 2025-10-05 13:33:43,179 - root - INFO - step: 24480 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8709 +[titan] 2025-10-05 13:33:43,180 - root - INFO - lr: 2.0006e-05 gnorm: 1.10 [14:59:32< 9:30:17] +[titan] 2025-10-05 13:33:54,062 - root - INFO - step: 24485 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 13:33:54,062 - root - INFO - lr: 1.9997e-05 gnorm: 1.07 [14:59:43< 9:30:06] +[titan] 2025-10-05 13:34:04,940 - root - INFO - step: 24490 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 13:34:04,940 - root - INFO - lr: 1.9989e-05 gnorm: 1.06 [14:59:54< 9:29:55] +[titan] 2025-10-05 13:34:15,844 - root - INFO - step: 24495 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8069 +[titan] 2025-10-05 13:34:15,844 - root - INFO - lr: 1.9980e-05 gnorm: 1.09 [15:00:05< 9:29:44] +[titan] 2025-10-05 13:34:24,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:34:26,706 - root - INFO - step: 24500 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 13:34:26,706 - root - INFO - lr: 1.9972e-05 gnorm: 1.11 [15:00:16< 9:29:33] +[titan] 2025-10-05 13:34:37,585 - root - INFO - step: 24505 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 13:34:37,585 - root - INFO - lr: 1.9963e-05 gnorm: 1.08 [15:00:27< 9:29:22] +[titan] 2025-10-05 13:34:48,499 - root - INFO - step: 24510 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 13:34:48,500 - root - INFO - lr: 1.9955e-05 gnorm: 1.12 [15:00:38< 9:29:11] +[titan] 2025-10-05 13:34:59,379 - root - INFO - step: 24515 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 13:34:59,379 - root - INFO - lr: 1.9947e-05 gnorm: 1.09 [15:00:48< 9:29:00] +[titan] 2025-10-05 13:35:10,244 - root - INFO - step: 24520 loss: 2.0374 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8014 +[titan] 2025-10-05 13:35:10,245 - root - INFO - lr: 1.9938e-05 gnorm: 1.03 [15:00:59< 9:28:49] +[titan] 2025-10-05 13:35:21,112 - root - INFO - step: 24525 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 13:35:21,113 - root - INFO - lr: 1.9930e-05 gnorm: 1.06 [15:01:10< 9:28:37] +[titan] 2025-10-05 13:35:31,956 - root - INFO - step: 24530 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8681 +[titan] 2025-10-05 13:35:31,957 - root - INFO - lr: 1.9921e-05 gnorm: 1.08 [15:01:21< 9:28:26] +[titan] 2025-10-05 13:35:42,842 - root - INFO - step: 24535 loss: 2.0794 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8375 +[titan] 2025-10-05 13:35:42,842 - root - INFO - lr: 1.9913e-05 gnorm: 1.10 [15:01:32< 9:28:15] +[titan] 2025-10-05 13:35:53,706 - root - INFO - step: 24540 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 13:35:53,706 - root - INFO - lr: 1.9905e-05 gnorm: 1.11 [15:01:43< 9:28:04] +[titan] 2025-10-05 13:36:04,625 - root - INFO - step: 24545 loss: 2.1385 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:36:04,625 - root - INFO - lr: 1.9896e-05 gnorm: 1.07 [15:01:54< 9:27:53] +[titan] 2025-10-05 13:36:13,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:36:15,477 - root - INFO - step: 24550 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 13:36:15,477 - root - INFO - lr: 1.9888e-05 gnorm: 1.08 [15:02:04< 9:27:42] +[titan] 2025-10-05 13:36:26,344 - root - INFO - step: 24555 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 13:36:26,344 - root - INFO - lr: 1.9879e-05 gnorm: 1.07 [15:02:15< 9:27:31] +[titan] 2025-10-05 13:36:37,204 - root - INFO - step: 24560 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 13:36:37,204 - root - INFO - lr: 1.9871e-05 gnorm: 1.07 [15:02:26< 9:27:20] +[titan] 2025-10-05 13:36:48,082 - root - INFO - step: 24565 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 13:36:48,082 - root - INFO - lr: 1.9863e-05 gnorm: 1.05 [15:02:37< 9:27:08] +[titan] 2025-10-05 13:36:58,948 - root - INFO - step: 24570 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 13:36:58,948 - root - INFO - lr: 1.9854e-05 gnorm: 1.07 [15:02:48< 9:26:57] +[titan] 2025-10-05 13:37:09,939 - root - INFO - step: 24575 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 29,815 tflops: 413.64 mfu: 41.82% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 13:37:09,939 - root - INFO - lr: 1.9846e-05 gnorm: 1.08 [15:02:59< 9:26:46] +[titan] 2025-10-05 13:37:12,312 - root - INFO - Dumping profiler traces at step 24576 +[titan] 2025-10-05 13:37:12,353 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:37:21,087 - root - INFO - step: 24580 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 29,394 tflops: 407.79 mfu: 41.23% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:37:21,087 - root - INFO - lr: 1.9837e-05 gnorm: 1.10 [15:03:10< 9:26:35] +[titan] 2025-10-05 13:37:31,945 - root - INFO - step: 24585 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:37:31,945 - root - INFO - lr: 1.9829e-05 gnorm: 1.04 [15:03:21< 9:26:24] +[titan] 2025-10-05 13:37:42,812 - root - INFO - step: 24590 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8024 +[titan] 2025-10-05 13:37:42,812 - root - INFO - lr: 1.9821e-05 gnorm: 1.05 [15:03:32< 9:26:13] +[titan] 2025-10-05 13:37:53,676 - root - INFO - step: 24595 loss: 2.0523 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 13:37:53,677 - root - INFO - lr: 1.9812e-05 gnorm: 1.07 [15:03:43< 9:26:02] +[titan] 2025-10-05 13:38:02,369 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:38:04,554 - root - INFO - step: 24600 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8521 +[titan] 2025-10-05 13:38:04,554 - root - INFO - lr: 1.9804e-05 gnorm: 1.09 [15:03:54< 9:25:51] +[titan] 2025-10-05 13:38:15,471 - root - INFO - step: 24605 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 13:38:15,471 - root - INFO - lr: 1.9796e-05 gnorm: 1.07 [15:04:04< 9:25:40] +[titan] 2025-10-05 13:38:26,377 - root - INFO - step: 24610 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8870 +[titan] 2025-10-05 13:38:26,377 - root - INFO - lr: 1.9787e-05 gnorm: 1.12 [15:04:15< 9:25:29] +[titan] 2025-10-05 13:38:37,243 - root - INFO - step: 24615 loss: 2.0786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8367 +[titan] 2025-10-05 13:38:37,243 - root - INFO - lr: 1.9779e-05 gnorm: 1.09 [15:04:26< 9:25:18] +[titan] 2025-10-05 13:38:48,119 - root - INFO - step: 24620 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 13:38:48,119 - root - INFO - lr: 1.9770e-05 gnorm: 1.07 [15:04:37< 9:25:06] +[titan] 2025-10-05 13:38:58,977 - root - INFO - step: 24625 loss: 2.0721 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8321 +[titan] 2025-10-05 13:38:58,977 - root - INFO - lr: 1.9762e-05 gnorm: 1.11 [15:04:48< 9:24:55] +[titan] 2025-10-05 13:39:09,830 - root - INFO - step: 24630 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8930 +[titan] 2025-10-05 13:39:09,830 - root - INFO - lr: 1.9754e-05 gnorm: 1.13 [15:04:59< 9:24:44] +[titan] 2025-10-05 13:39:20,732 - root - INFO - step: 24635 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 13:39:20,733 - root - INFO - lr: 1.9745e-05 gnorm: 1.10 [15:05:10< 9:24:33] +[titan] 2025-10-05 13:39:31,629 - root - INFO - step: 24640 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 13:39:31,629 - root - INFO - lr: 1.9737e-05 gnorm: 1.08 [15:05:21< 9:24:22] +[titan] 2025-10-05 13:39:42,484 - root - INFO - step: 24645 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 13:39:42,484 - root - INFO - lr: 1.9728e-05 gnorm: 1.05 [15:05:31< 9:24:11] +[titan] 2025-10-05 13:39:51,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:39:53,346 - root - INFO - step: 24650 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 13:39:53,346 - root - INFO - lr: 1.9720e-05 gnorm: 1.06 [15:05:42< 9:24:00] +[titan] 2025-10-05 13:40:04,203 - root - INFO - step: 24655 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 13:40:04,203 - root - INFO - lr: 1.9712e-05 gnorm: 1.12 [15:05:53< 9:23:49] +[titan] 2025-10-05 13:40:15,073 - root - INFO - step: 24660 loss: 2.0882 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 13:40:15,073 - root - INFO - lr: 1.9703e-05 gnorm: 1.10 [15:06:04< 9:23:38] +[titan] 2025-10-05 13:40:25,992 - root - INFO - step: 24665 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:40:25,992 - root - INFO - lr: 1.9695e-05 gnorm: 1.06 [15:06:15< 9:23:26] +[titan] 2025-10-05 13:40:36,894 - root - INFO - step: 24670 loss: 2.0856 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 13:40:36,894 - root - INFO - lr: 1.9687e-05 gnorm: 1.12 [15:06:26< 9:23:15] +[titan] 2025-10-05 13:40:47,766 - root - INFO - step: 24675 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 13:40:47,766 - root - INFO - lr: 1.9678e-05 gnorm: 1.09 [15:06:37< 9:23:04] +[titan] 2025-10-05 13:40:58,618 - root - INFO - step: 24680 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8134 +[titan] 2025-10-05 13:40:58,618 - root - INFO - lr: 1.9670e-05 gnorm: 1.07 [15:06:48< 9:22:53] +[titan] 2025-10-05 13:41:09,490 - root - INFO - step: 24685 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 13:41:09,490 - root - INFO - lr: 1.9662e-05 gnorm: 1.13 [15:06:58< 9:22:42] +[titan] 2025-10-05 13:41:20,418 - root - INFO - step: 24690 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 13:41:20,418 - root - INFO - lr: 1.9653e-05 gnorm: 1.06 [15:07:09< 9:22:31] +[titan] 2025-10-05 13:41:31,285 - root - INFO - step: 24695 loss: 2.0651 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:41:31,285 - root - INFO - lr: 1.9645e-05 gnorm: 1.08 [15:07:20< 9:22:20] +[titan] 2025-10-05 13:41:39,977 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:41:42,156 - root - INFO - step: 24700 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 13:41:42,156 - root - INFO - lr: 1.9636e-05 gnorm: 1.11 [15:07:31< 9:22:09] +[titan] 2025-10-05 13:41:53,063 - root - INFO - step: 24705 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 13:41:53,063 - root - INFO - lr: 1.9628e-05 gnorm: 1.08 [15:07:42< 9:21:58] +[titan] 2025-10-05 13:42:03,922 - root - INFO - step: 24710 loss: 2.0804 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 13:42:03,922 - root - INFO - lr: 1.9620e-05 gnorm: 1.06 [15:07:53< 9:21:46] +[titan] 2025-10-05 13:42:14,790 - root - INFO - step: 24715 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8274 +[titan] 2025-10-05 13:42:14,790 - root - INFO - lr: 1.9611e-05 gnorm: 1.09 [15:08:04< 9:21:35] +[titan] 2025-10-05 13:42:25,702 - root - INFO - step: 24720 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 13:42:25,702 - root - INFO - lr: 1.9603e-05 gnorm: 1.11 [15:08:15< 9:21:24] +[titan] 2025-10-05 13:42:36,573 - root - INFO - step: 24725 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 13:42:36,573 - root - INFO - lr: 1.9595e-05 gnorm: 1.08 [15:08:26< 9:21:13] +[titan] 2025-10-05 13:42:47,423 - root - INFO - step: 24730 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 13:42:47,424 - root - INFO - lr: 1.9586e-05 gnorm: 1.12 [15:08:36< 9:21:02] +[titan] 2025-10-05 13:42:58,321 - root - INFO - step: 24735 loss: 2.1290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 13:42:58,321 - root - INFO - lr: 1.9578e-05 gnorm: 1.08 [15:08:47< 9:20:51] +[titan] 2025-10-05 13:43:09,170 - root - INFO - step: 24740 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:43:09,171 - root - INFO - lr: 1.9570e-05 gnorm: 1.12 [15:08:58< 9:20:40] +[titan] 2025-10-05 13:43:20,002 - root - INFO - step: 24745 loss: 2.0612 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8216 +[titan] 2025-10-05 13:43:20,002 - root - INFO - lr: 1.9561e-05 gnorm: 1.11 [15:09:09< 9:20:29] +[titan] 2025-10-05 13:43:28,699 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:43:30,874 - root - INFO - step: 24750 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 13:43:30,874 - root - INFO - lr: 1.9553e-05 gnorm: 1.08 [15:09:20< 9:20:17] +[titan] 2025-10-05 13:43:41,719 - root - INFO - step: 24755 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 13:43:41,719 - root - INFO - lr: 1.9545e-05 gnorm: 1.11 [15:09:31< 9:20:06] +[titan] 2025-10-05 13:43:52,574 - root - INFO - step: 24760 loss: 2.0568 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 13:43:52,574 - root - INFO - lr: 1.9536e-05 gnorm: 1.07 [15:09:42< 9:19:55] +[titan] 2025-10-05 13:44:03,465 - root - INFO - step: 24765 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 13:44:03,465 - root - INFO - lr: 1.9528e-05 gnorm: 1.07 [15:09:52< 9:19:44] +[titan] 2025-10-05 13:44:14,316 - root - INFO - step: 24770 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 13:44:14,316 - root - INFO - lr: 1.9519e-05 gnorm: 1.05 [15:10:03< 9:19:33] +[titan] 2025-10-05 13:44:25,153 - root - INFO - step: 24775 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8456 +[titan] 2025-10-05 13:44:25,154 - root - INFO - lr: 1.9511e-05 gnorm: 1.07 [15:10:14< 9:19:22] +[titan] 2025-10-05 13:44:36,002 - root - INFO - step: 24780 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 13:44:36,003 - root - INFO - lr: 1.9503e-05 gnorm: 1.08 [15:10:25< 9:19:11] +[titan] 2025-10-05 13:44:46,858 - root - INFO - step: 24785 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 13:44:46,858 - root - INFO - lr: 1.9494e-05 gnorm: 1.07 [15:10:36< 9:19:00] +[titan] 2025-10-05 13:44:57,702 - root - INFO - step: 24790 loss: 2.0838 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 13:44:57,702 - root - INFO - lr: 1.9486e-05 gnorm: 1.08 [15:10:47< 9:18:48] +[titan] 2025-10-05 13:45:08,535 - root - INFO - step: 24795 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 13:45:08,535 - root - INFO - lr: 1.9478e-05 gnorm: 1.06 [15:10:58< 9:18:37] +[titan] 2025-10-05 13:45:17,247 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:45:19,421 - root - INFO - step: 24800 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8718 +[titan] 2025-10-05 13:45:19,421 - root - INFO - lr: 1.9469e-05 gnorm: 1.08 [15:11:08< 9:18:26] +[titan] 2025-10-05 13:45:30,265 - root - INFO - step: 24805 loss: 2.0238 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 13:45:30,265 - root - INFO - lr: 1.9461e-05 gnorm: 1.07 [15:11:19< 9:18:15] +[titan] 2025-10-05 13:45:41,104 - root - INFO - step: 24810 loss: 2.0540 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8154 +[titan] 2025-10-05 13:45:41,104 - root - INFO - lr: 1.9453e-05 gnorm: 1.07 [15:11:30< 9:18:04] +[titan] 2025-10-05 13:45:51,953 - root - INFO - step: 24815 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8778 +[titan] 2025-10-05 13:45:51,953 - root - INFO - lr: 1.9444e-05 gnorm: 1.11 [15:11:41< 9:17:53] +[titan] 2025-10-05 13:46:02,816 - root - INFO - step: 24820 loss: 2.1004 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 13:46:02,816 - root - INFO - lr: 1.9436e-05 gnorm: 1.07 [15:11:52< 9:17:42] +[titan] 2025-10-05 13:46:13,676 - root - INFO - step: 24825 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 13:46:13,676 - root - INFO - lr: 1.9428e-05 gnorm: 1.10 [15:12:03< 9:17:31] +[titan] 2025-10-05 13:46:24,572 - root - INFO - step: 24830 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 13:46:24,572 - root - INFO - lr: 1.9419e-05 gnorm: 1.10 [15:12:14< 9:17:20] +[titan] 2025-10-05 13:46:35,432 - root - INFO - step: 24835 loss: 2.1026 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:46:35,432 - root - INFO - lr: 1.9411e-05 gnorm: 1.07 [15:12:24< 9:17:08] +[titan] 2025-10-05 13:46:46,286 - root - INFO - step: 24840 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:46:46,287 - root - INFO - lr: 1.9403e-05 gnorm: 1.10 [15:12:35< 9:16:57] +[titan] 2025-10-05 13:46:57,123 - root - INFO - step: 24845 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8212 +[titan] 2025-10-05 13:46:57,123 - root - INFO - lr: 1.9394e-05 gnorm: 1.07 [15:12:46< 9:16:46] +[titan] 2025-10-05 13:47:05,788 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:47:07,960 - root - INFO - step: 24850 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 13:47:07,960 - root - INFO - lr: 1.9386e-05 gnorm: 1.13 [15:12:57< 9:16:35] +[titan] 2025-10-05 13:47:18,794 - root - INFO - step: 24855 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 13:47:18,794 - root - INFO - lr: 1.9378e-05 gnorm: 1.07 [15:13:08< 9:16:24] +[titan] 2025-10-05 13:47:29,672 - root - INFO - step: 24860 loss: 2.1559 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 13:47:29,672 - root - INFO - lr: 1.9369e-05 gnorm: 1.08 [15:13:19< 9:16:13] +[titan] 2025-10-05 13:47:40,555 - root - INFO - step: 24865 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 13:47:40,556 - root - INFO - lr: 1.9361e-05 gnorm: 1.14 [15:13:30< 9:16:02] +[titan] 2025-10-05 13:47:51,413 - root - INFO - step: 24870 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 13:47:51,413 - root - INFO - lr: 1.9353e-05 gnorm: 1.07 [15:13:40< 9:15:51] +[titan] 2025-10-05 13:48:02,253 - root - INFO - step: 24875 loss: 2.0532 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 13:48:02,253 - root - INFO - lr: 1.9345e-05 gnorm: 1.10 [15:13:51< 9:15:39] +[titan] 2025-10-05 13:48:13,099 - root - INFO - step: 24880 loss: 2.0338 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 13:48:13,099 - root - INFO - lr: 1.9336e-05 gnorm: 1.08 [15:14:02< 9:15:28] +[titan] 2025-10-05 13:48:23,933 - root - INFO - step: 24885 loss: 2.0834 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 13:48:23,933 - root - INFO - lr: 1.9328e-05 gnorm: 1.08 [15:14:13< 9:15:17] +[titan] 2025-10-05 13:48:34,822 - root - INFO - step: 24890 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7910 +[titan] 2025-10-05 13:48:34,822 - root - INFO - lr: 1.9320e-05 gnorm: 1.05 [15:14:24< 9:15:06] +[titan] 2025-10-05 13:48:45,673 - root - INFO - step: 24895 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:48:45,673 - root - INFO - lr: 1.9311e-05 gnorm: 1.13 [15:14:35< 9:14:55] +[titan] 2025-10-05 13:48:54,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:48:56,513 - root - INFO - step: 24900 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 13:48:56,513 - root - INFO - lr: 1.9303e-05 gnorm: 1.08 [15:14:45< 9:14:44] +[titan] 2025-10-05 13:49:07,354 - root - INFO - step: 24905 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 13:49:07,354 - root - INFO - lr: 1.9295e-05 gnorm: 1.09 [15:14:56< 9:14:33] +[titan] 2025-10-05 13:49:18,206 - root - INFO - step: 24910 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 13:49:18,206 - root - INFO - lr: 1.9286e-05 gnorm: 1.06 [15:15:07< 9:14:22] +[titan] 2025-10-05 13:49:29,079 - root - INFO - step: 24915 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 13:49:29,080 - root - INFO - lr: 1.9278e-05 gnorm: 1.07 [15:15:18< 9:14:10] +[titan] 2025-10-05 13:49:39,928 - root - INFO - step: 24920 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8887 +[titan] 2025-10-05 13:49:39,928 - root - INFO - lr: 1.9270e-05 gnorm: 1.10 [15:15:29< 9:13:59] +[titan] 2025-10-05 13:49:50,803 - root - INFO - step: 24925 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7714 +[titan] 2025-10-05 13:49:50,804 - root - INFO - lr: 1.9261e-05 gnorm: 1.05 [15:15:40< 9:13:48] +[titan] 2025-10-05 13:50:01,632 - root - INFO - step: 24930 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7977 +[titan] 2025-10-05 13:50:01,632 - root - INFO - lr: 1.9253e-05 gnorm: 1.12 [15:15:51< 9:13:37] +[titan] 2025-10-05 13:50:12,484 - root - INFO - step: 24935 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 13:50:12,484 - root - INFO - lr: 1.9245e-05 gnorm: 1.08 [15:16:01< 9:13:26] +[titan] 2025-10-05 13:50:23,352 - root - INFO - step: 24940 loss: 2.0643 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 13:50:23,352 - root - INFO - lr: 1.9236e-05 gnorm: 1.08 [15:16:12< 9:13:15] +[titan] 2025-10-05 13:50:34,241 - root - INFO - step: 24945 loss: 2.0637 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:50:34,241 - root - INFO - lr: 1.9228e-05 gnorm: 1.09 [15:16:23< 9:13:04] +[titan] 2025-10-05 13:50:42,919 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:50:45,094 - root - INFO - step: 24950 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:50:45,094 - root - INFO - lr: 1.9220e-05 gnorm: 1.09 [15:16:34< 9:12:53] +[titan] 2025-10-05 13:50:55,957 - root - INFO - step: 24955 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 13:50:55,957 - root - INFO - lr: 1.9212e-05 gnorm: 1.10 [15:16:45< 9:12:41] +[titan] 2025-10-05 13:51:06,846 - root - INFO - step: 24960 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:51:06,846 - root - INFO - lr: 1.9203e-05 gnorm: 1.18 [15:16:56< 9:12:30] +[titan] 2025-10-05 13:51:17,738 - root - INFO - step: 24965 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:51:17,738 - root - INFO - lr: 1.9195e-05 gnorm: 1.09 [15:17:07< 9:12:19] +[titan] 2025-10-05 13:51:28,604 - root - INFO - step: 24970 loss: 2.1023 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:51:28,604 - root - INFO - lr: 1.9187e-05 gnorm: 1.11 [15:17:18< 9:12:08] +[titan] 2025-10-05 13:51:39,453 - root - INFO - step: 24975 loss: 2.0306 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 13:51:39,453 - root - INFO - lr: 1.9178e-05 gnorm: 1.12 [15:17:28< 9:11:57] +[titan] 2025-10-05 13:51:50,305 - root - INFO - step: 24980 loss: 2.0966 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8535 +[titan] 2025-10-05 13:51:50,305 - root - INFO - lr: 1.9170e-05 gnorm: 1.08 [15:17:39< 9:11:46] +[titan] 2025-10-05 13:52:01,147 - root - INFO - step: 24985 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7712 +[titan] 2025-10-05 13:52:01,147 - root - INFO - lr: 1.9162e-05 gnorm: 1.09 [15:17:50< 9:11:35] +[titan] 2025-10-05 13:52:12,002 - root - INFO - step: 24990 loss: 2.0567 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 13:52:12,002 - root - INFO - lr: 1.9154e-05 gnorm: 1.09 [15:18:01< 9:11:24] +[titan] 2025-10-05 13:52:22,852 - root - INFO - step: 24995 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:52:22,852 - root - INFO - lr: 1.9145e-05 gnorm: 1.08 [15:18:12< 9:11:13] +[titan] 2025-10-05 13:52:31,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:52:33,740 - root - INFO - step: 25000 loss: 2.0319 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7959 +[titan] 2025-10-05 13:52:33,740 - root - INFO - lr: 1.9137e-05 gnorm: 1.07 [15:18:23< 9:11:01] +[titan] 2025-10-05 13:52:33,740 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 13:52:51,437 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 13:52:51,437 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.70 seconds. +[titan] 2025-10-05 13:54:51,998 - root - INFO - step: 25005 loss: 2.0275 memory: 118.84GiB(85.28%) tps: 2,370 tflops: 32.88 mfu: 3.32% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7926 +[titan] 2025-10-05 13:54:51,999 - root - INFO - lr: 1.9129e-05 gnorm: 1.11 [15:20:41< 9:12:07] +[titan] 2025-10-05 13:55:02,804 - root - INFO - step: 25010 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8047 +[titan] 2025-10-05 13:55:02,804 - root - INFO - lr: 1.9120e-05 gnorm: 1.11 [15:20:52< 9:11:56] +[titan] 2025-10-05 13:55:13,603 - root - INFO - step: 25015 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:55:13,603 - root - INFO - lr: 1.9112e-05 gnorm: 1.08 [15:21:03< 9:11:44] +[titan] 2025-10-05 13:55:24,411 - root - INFO - step: 25020 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 13:55:24,411 - root - INFO - lr: 1.9104e-05 gnorm: 1.12 [15:21:13< 9:11:33] +[titan] 2025-10-05 13:55:35,262 - root - INFO - step: 25025 loss: 2.0508 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:55:35,263 - root - INFO - lr: 1.9096e-05 gnorm: 1.09 [15:21:24< 9:11:22] +[titan] 2025-10-05 13:55:46,139 - root - INFO - step: 25030 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 13:55:46,139 - root - INFO - lr: 1.9087e-05 gnorm: 1.12 [15:21:35< 9:11:11] +[titan] 2025-10-05 13:55:56,971 - root - INFO - step: 25035 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 13:55:56,971 - root - INFO - lr: 1.9079e-05 gnorm: 1.06 [15:21:46< 9:11:00] +[titan] 2025-10-05 13:56:07,833 - root - INFO - step: 25040 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7890 +[titan] 2025-10-05 13:56:07,833 - root - INFO - lr: 1.9071e-05 gnorm: 1.09 [15:21:57< 9:10:48] +[titan] 2025-10-05 13:56:18,697 - root - INFO - step: 25045 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 13:56:18,697 - root - INFO - lr: 1.9062e-05 gnorm: 1.07 [15:22:08< 9:10:37] +[titan] 2025-10-05 13:56:27,381 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:56:29,566 - root - INFO - step: 25050 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 13:56:29,566 - root - INFO - lr: 1.9054e-05 gnorm: 1.09 [15:22:19< 9:10:26] +[titan] 2025-10-05 13:56:40,477 - root - INFO - step: 25055 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 13:56:40,478 - root - INFO - lr: 1.9046e-05 gnorm: 1.10 [15:22:29< 9:10:15] +[titan] 2025-10-05 13:56:51,355 - root - INFO - step: 25060 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 13:56:51,355 - root - INFO - lr: 1.9038e-05 gnorm: 1.09 [15:22:40< 9:10:04] +[titan] 2025-10-05 13:57:02,218 - root - INFO - step: 25065 loss: 2.1039 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:57:02,218 - root - INFO - lr: 1.9029e-05 gnorm: 1.13 [15:22:51< 9:09:53] +[titan] 2025-10-05 13:57:13,100 - root - INFO - step: 25070 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 13:57:13,100 - root - INFO - lr: 1.9021e-05 gnorm: 1.08 [15:23:02< 9:09:42] +[titan] 2025-10-05 13:57:23,991 - root - INFO - step: 25075 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 13:57:23,991 - root - INFO - lr: 1.9013e-05 gnorm: 1.07 [15:23:13< 9:09:30] +[titan] 2025-10-05 13:57:34,864 - root - INFO - step: 25080 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 13:57:34,864 - root - INFO - lr: 1.9005e-05 gnorm: 1.05 [15:23:24< 9:09:19] +[titan] 2025-10-05 13:57:45,884 - root - INFO - step: 25085 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 29,737 tflops: 412.55 mfu: 41.71% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8452 +[titan] 2025-10-05 13:57:45,884 - root - INFO - lr: 1.8996e-05 gnorm: 1.11 [15:23:35< 9:09:08] +[titan] 2025-10-05 13:57:52,579 - root - INFO - Dumping profiler traces at step 25088 +[titan] 2025-10-05 13:57:52,620 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:57:56,994 - root - INFO - step: 25090 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 13:57:56,994 - root - INFO - lr: 1.8988e-05 gnorm: 1.10 [15:23:46< 9:08:57] +[titan] 2025-10-05 13:58:07,853 - root - INFO - step: 25095 loss: 2.0873 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 13:58:07,853 - root - INFO - lr: 1.8980e-05 gnorm: 1.09 [15:23:57< 9:08:46] +[titan] 2025-10-05 13:58:16,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:58:18,699 - root - INFO - step: 25100 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7915 +[titan] 2025-10-05 13:58:18,700 - root - INFO - lr: 1.8972e-05 gnorm: 1.07 [15:24:08< 9:08:35] +[titan] 2025-10-05 13:58:29,551 - root - INFO - step: 25105 loss: 2.0232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 13:58:29,551 - root - INFO - lr: 1.8963e-05 gnorm: 1.09 [15:24:19< 9:08:24] +[titan] 2025-10-05 13:58:40,400 - root - INFO - step: 25110 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 13:58:40,400 - root - INFO - lr: 1.8955e-05 gnorm: 1.11 [15:24:29< 9:08:13] +[titan] 2025-10-05 13:58:51,352 - root - INFO - step: 25115 loss: 2.0288 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 13:58:51,352 - root - INFO - lr: 1.8947e-05 gnorm: 1.09 [15:24:40< 9:08:02] +[titan] 2025-10-05 13:59:02,234 - root - INFO - step: 25120 loss: 2.0905 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8475 +[titan] 2025-10-05 13:59:02,234 - root - INFO - lr: 1.8939e-05 gnorm: 1.09 [15:24:51< 9:07:50] +[titan] 2025-10-05 13:59:13,119 - root - INFO - step: 25125 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8215 +[titan] 2025-10-05 13:59:13,120 - root - INFO - lr: 1.8930e-05 gnorm: 1.07 [15:25:02< 9:07:39] +[titan] 2025-10-05 13:59:23,995 - root - INFO - step: 25130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 13:59:23,996 - root - INFO - lr: 1.8922e-05 gnorm: 1.07 [15:25:13< 9:07:28] +[titan] 2025-10-05 13:59:34,878 - root - INFO - step: 25135 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8860 +[titan] 2025-10-05 13:59:34,879 - root - INFO - lr: 1.8914e-05 gnorm: 1.12 [15:25:24< 9:07:17] +[titan] 2025-10-05 13:59:45,774 - root - INFO - step: 25140 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 13:59:45,775 - root - INFO - lr: 1.8905e-05 gnorm: 1.07 [15:25:35< 9:07:06] +[titan] 2025-10-05 13:59:56,648 - root - INFO - step: 25145 loss: 2.0630 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:59:56,648 - root - INFO - lr: 1.8897e-05 gnorm: 1.08 [15:25:46< 9:06:55] +[titan] 2025-10-05 14:00:05,337 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:00:07,518 - root - INFO - step: 25150 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8682 +[titan] 2025-10-05 14:00:07,519 - root - INFO - lr: 1.8889e-05 gnorm: 1.15 [15:25:56< 9:06:44] +[titan] 2025-10-05 14:00:18,376 - root - INFO - step: 25155 loss: 2.0122 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 14:00:18,376 - root - INFO - lr: 1.8881e-05 gnorm: 1.04 [15:26:07< 9:06:32] +[titan] 2025-10-05 14:00:29,255 - root - INFO - step: 25160 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 14:00:29,255 - root - INFO - lr: 1.8873e-05 gnorm: 1.08 [15:26:18< 9:06:21] +[titan] 2025-10-05 14:00:40,131 - root - INFO - step: 25165 loss: 2.0645 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 14:00:40,131 - root - INFO - lr: 1.8864e-05 gnorm: 1.09 [15:26:29< 9:06:10] +[titan] 2025-10-05 14:00:51,071 - root - INFO - step: 25170 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8358 +[titan] 2025-10-05 14:00:51,071 - root - INFO - lr: 1.8856e-05 gnorm: 1.06 [15:26:40< 9:05:59] +[titan] 2025-10-05 14:01:01,932 - root - INFO - step: 25175 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 14:01:01,932 - root - INFO - lr: 1.8848e-05 gnorm: 1.09 [15:26:51< 9:05:48] +[titan] 2025-10-05 14:01:12,823 - root - INFO - step: 25180 loss: 2.0514 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8133 +[titan] 2025-10-05 14:01:12,824 - root - INFO - lr: 1.8840e-05 gnorm: 1.08 [15:27:02< 9:05:37] +[titan] 2025-10-05 14:01:23,713 - root - INFO - step: 25185 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8457 +[titan] 2025-10-05 14:01:23,713 - root - INFO - lr: 1.8831e-05 gnorm: 1.04 [15:27:13< 9:05:26] +[titan] 2025-10-05 14:01:34,565 - root - INFO - step: 25190 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8554 +[titan] 2025-10-05 14:01:34,565 - root - INFO - lr: 1.8823e-05 gnorm: 1.08 [15:27:24< 9:05:14] +[titan] 2025-10-05 14:01:45,489 - root - INFO - step: 25195 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 14:01:45,489 - root - INFO - lr: 1.8815e-05 gnorm: 1.10 [15:27:34< 9:05:03] +[titan] 2025-10-05 14:01:54,171 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:01:56,348 - root - INFO - step: 25200 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8821 +[titan] 2025-10-05 14:01:56,348 - root - INFO - lr: 1.8807e-05 gnorm: 1.13 [15:27:45< 9:04:52] +[titan] 2025-10-05 14:02:07,198 - root - INFO - step: 25205 loss: 2.0344 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:02:07,198 - root - INFO - lr: 1.8798e-05 gnorm: 1.06 [15:27:56< 9:04:41] +[titan] 2025-10-05 14:02:18,072 - root - INFO - step: 25210 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 14:02:18,072 - root - INFO - lr: 1.8790e-05 gnorm: 1.09 [15:28:07< 9:04:30] +[titan] 2025-10-05 14:02:28,950 - root - INFO - step: 25215 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:02:28,951 - root - INFO - lr: 1.8782e-05 gnorm: 1.11 [15:28:18< 9:04:19] +[titan] 2025-10-05 14:02:39,828 - root - INFO - step: 25220 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 14:02:39,828 - root - INFO - lr: 1.8774e-05 gnorm: 1.10 [15:28:29< 9:04:08] +[titan] 2025-10-05 14:02:50,798 - root - INFO - step: 25225 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 14:02:50,798 - root - INFO - lr: 1.8765e-05 gnorm: 1.10 [15:28:40< 9:03:56] +[titan] 2025-10-05 14:03:01,706 - root - INFO - step: 25230 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 14:03:01,706 - root - INFO - lr: 1.8757e-05 gnorm: 1.07 [15:28:51< 9:03:45] +[titan] 2025-10-05 14:03:12,597 - root - INFO - step: 25235 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 14:03:12,598 - root - INFO - lr: 1.8749e-05 gnorm: 1.08 [15:29:02< 9:03:34] +[titan] 2025-10-05 14:03:23,476 - root - INFO - step: 25240 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:03:23,477 - root - INFO - lr: 1.8741e-05 gnorm: 1.05 [15:29:12< 9:03:23] +[titan] 2025-10-05 14:03:34,394 - root - INFO - step: 25245 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 14:03:34,395 - root - INFO - lr: 1.8733e-05 gnorm: 1.06 [15:29:23< 9:03:12] +[titan] 2025-10-05 14:03:43,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:03:45,291 - root - INFO - step: 25250 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7908 +[titan] 2025-10-05 14:03:45,292 - root - INFO - lr: 1.8724e-05 gnorm: 1.08 [15:29:34< 9:03:01] +[titan] 2025-10-05 14:03:56,215 - root - INFO - step: 25255 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8068 +[titan] 2025-10-05 14:03:56,215 - root - INFO - lr: 1.8716e-05 gnorm: 1.07 [15:29:45< 9:02:50] +[titan] 2025-10-05 14:04:07,069 - root - INFO - step: 25260 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7296 +[titan] 2025-10-05 14:04:07,070 - root - INFO - lr: 1.8708e-05 gnorm: 1.09 [15:29:56< 9:02:39] +[titan] 2025-10-05 14:04:17,929 - root - INFO - step: 25265 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 14:04:17,929 - root - INFO - lr: 1.8700e-05 gnorm: 1.05 [15:30:07< 9:02:27] +[titan] 2025-10-05 14:04:28,778 - root - INFO - step: 25270 loss: 2.0659 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8256 +[titan] 2025-10-05 14:04:28,778 - root - INFO - lr: 1.8692e-05 gnorm: 1.05 [15:30:18< 9:02:16] +[titan] 2025-10-05 14:04:39,663 - root - INFO - step: 25275 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:04:39,663 - root - INFO - lr: 1.8683e-05 gnorm: 1.10 [15:30:29< 9:02:05] +[titan] 2025-10-05 14:04:50,619 - root - INFO - step: 25280 loss: 2.0423 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 14:04:50,620 - root - INFO - lr: 1.8675e-05 gnorm: 1.10 [15:30:40< 9:01:54] +[titan] 2025-10-05 14:05:01,490 - root - INFO - step: 25285 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 14:05:01,490 - root - INFO - lr: 1.8667e-05 gnorm: 1.07 [15:30:50< 9:01:43] +[titan] 2025-10-05 14:05:12,363 - root - INFO - step: 25290 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 14:05:12,363 - root - INFO - lr: 1.8659e-05 gnorm: 1.08 [15:31:01< 9:01:32] +[titan] 2025-10-05 14:05:23,239 - root - INFO - step: 25295 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 14:05:23,239 - root - INFO - lr: 1.8650e-05 gnorm: 1.12 [15:31:12< 9:01:21] +[titan] 2025-10-05 14:05:31,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:05:34,099 - root - INFO - step: 25300 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:05:34,100 - root - INFO - lr: 1.8642e-05 gnorm: 1.10 [15:31:23< 9:01:09] +[titan] 2025-10-05 14:05:44,978 - root - INFO - step: 25305 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 14:05:44,978 - root - INFO - lr: 1.8634e-05 gnorm: 1.07 [15:31:34< 9:00:58] +[titan] 2025-10-05 14:05:55,924 - root - INFO - step: 25310 loss: 2.0792 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 41.99% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8356 +[titan] 2025-10-05 14:05:55,924 - root - INFO - lr: 1.8626e-05 gnorm: 1.11 [15:31:45< 9:00:47] +[titan] 2025-10-05 14:06:06,777 - root - INFO - step: 25315 loss: 2.0737 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8330 +[titan] 2025-10-05 14:06:06,777 - root - INFO - lr: 1.8618e-05 gnorm: 1.08 [15:31:56< 9:00:36] +[titan] 2025-10-05 14:06:17,654 - root - INFO - step: 25320 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 14:06:17,654 - root - INFO - lr: 1.8609e-05 gnorm: 1.06 [15:32:07< 9:00:25] +[titan] 2025-10-05 14:06:28,537 - root - INFO - step: 25325 loss: 2.1056 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 14:06:28,537 - root - INFO - lr: 1.8601e-05 gnorm: 1.08 [15:32:17< 9:00:14] +[titan] 2025-10-05 14:06:39,411 - root - INFO - step: 25330 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:06:39,411 - root - INFO - lr: 1.8593e-05 gnorm: 1.11 [15:32:28< 9:00:03] +[titan] 2025-10-05 14:06:50,340 - root - INFO - step: 25335 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:06:50,341 - root - INFO - lr: 1.8585e-05 gnorm: 1.10 [15:32:39< 8:59:51] +[titan] 2025-10-05 14:07:01,212 - root - INFO - step: 25340 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 14:07:01,212 - root - INFO - lr: 1.8577e-05 gnorm: 1.08 [15:32:50< 8:59:40] +[titan] 2025-10-05 14:07:12,114 - root - INFO - step: 25345 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 14:07:12,114 - root - INFO - lr: 1.8568e-05 gnorm: 1.06 [15:33:01< 8:59:29] +[titan] 2025-10-05 14:07:20,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:07:22,994 - root - INFO - step: 25350 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 14:07:22,994 - root - INFO - lr: 1.8560e-05 gnorm: 1.06 [15:33:12< 8:59:18] +[titan] 2025-10-05 14:07:33,878 - root - INFO - step: 25355 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8658 +[titan] 2025-10-05 14:07:33,878 - root - INFO - lr: 1.8552e-05 gnorm: 1.11 [15:33:23< 8:59:07] +[titan] 2025-10-05 14:07:44,774 - root - INFO - step: 25360 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 14:07:44,774 - root - INFO - lr: 1.8544e-05 gnorm: 1.08 [15:33:34< 8:58:56] +[titan] 2025-10-05 14:07:55,691 - root - INFO - step: 25365 loss: 2.0709 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:07:55,691 - root - INFO - lr: 1.8536e-05 gnorm: 1.08 [15:33:45< 8:58:45] +[titan] 2025-10-05 14:08:06,574 - root - INFO - step: 25370 loss: 2.0036 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 14:08:06,574 - root - INFO - lr: 1.8528e-05 gnorm: 1.08 [15:33:55< 8:58:34] +[titan] 2025-10-05 14:08:17,490 - root - INFO - step: 25375 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 14:08:17,490 - root - INFO - lr: 1.8519e-05 gnorm: 1.13 [15:34:06< 8:58:22] +[titan] 2025-10-05 14:08:28,356 - root - INFO - step: 25380 loss: 2.1491 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 14:08:28,357 - root - INFO - lr: 1.8511e-05 gnorm: 1.09 [15:34:17< 8:58:11] +[titan] 2025-10-05 14:08:39,210 - root - INFO - step: 25385 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:08:39,210 - root - INFO - lr: 1.8503e-05 gnorm: 1.09 [15:34:28< 8:58:00] +[titan] 2025-10-05 14:08:50,100 - root - INFO - step: 25390 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 14:08:50,100 - root - INFO - lr: 1.8495e-05 gnorm: 1.11 [15:34:39< 8:57:49] +[titan] 2025-10-05 14:09:00,958 - root - INFO - step: 25395 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 14:09:00,959 - root - INFO - lr: 1.8487e-05 gnorm: 1.09 [15:34:50< 8:57:38] +[titan] 2025-10-05 14:09:09,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:09:11,824 - root - INFO - step: 25400 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:09:11,824 - root - INFO - lr: 1.8478e-05 gnorm: 1.09 [15:35:01< 8:57:27] +[titan] 2025-10-05 14:09:22,722 - root - INFO - step: 25405 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 14:09:22,722 - root - INFO - lr: 1.8470e-05 gnorm: 1.06 [15:35:12< 8:57:16] +[titan] 2025-10-05 14:09:33,582 - root - INFO - step: 25410 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 14:09:33,583 - root - INFO - lr: 1.8462e-05 gnorm: 1.07 [15:35:23< 8:57:04] +[titan] 2025-10-05 14:09:44,445 - root - INFO - step: 25415 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 14:09:44,445 - root - INFO - lr: 1.8454e-05 gnorm: 1.07 [15:35:33< 8:56:53] +[titan] 2025-10-05 14:09:55,342 - root - INFO - step: 25420 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8650 +[titan] 2025-10-05 14:09:55,342 - root - INFO - lr: 1.8446e-05 gnorm: 1.08 [15:35:44< 8:56:42] +[titan] 2025-10-05 14:10:06,229 - root - INFO - step: 25425 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 14:10:06,229 - root - INFO - lr: 1.8438e-05 gnorm: 1.09 [15:35:55< 8:56:31] +[titan] 2025-10-05 14:10:17,110 - root - INFO - step: 25430 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 14:10:17,110 - root - INFO - lr: 1.8429e-05 gnorm: 1.09 [15:36:06< 8:56:20] +[titan] 2025-10-05 14:10:28,014 - root - INFO - step: 25435 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 14:10:28,014 - root - INFO - lr: 1.8421e-05 gnorm: 1.05 [15:36:17< 8:56:09] +[titan] 2025-10-05 14:10:38,939 - root - INFO - step: 25440 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8011 +[titan] 2025-10-05 14:10:38,939 - root - INFO - lr: 1.8413e-05 gnorm: 1.10 [15:36:28< 8:55:58] +[titan] 2025-10-05 14:10:49,824 - root - INFO - step: 25445 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 14:10:49,825 - root - INFO - lr: 1.8405e-05 gnorm: 1.08 [15:36:39< 8:55:46] +[titan] 2025-10-05 14:10:58,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:11:00,730 - root - INFO - step: 25450 loss: 2.0470 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 14:11:00,730 - root - INFO - lr: 1.8397e-05 gnorm: 1.07 [15:36:50< 8:55:35] +[titan] 2025-10-05 14:11:11,607 - root - INFO - step: 25455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 14:11:11,607 - root - INFO - lr: 1.8389e-05 gnorm: 1.07 [15:37:01< 8:55:24] +[titan] 2025-10-05 14:11:22,482 - root - INFO - step: 25460 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 14:11:22,482 - root - INFO - lr: 1.8380e-05 gnorm: 1.10 [15:37:11< 8:55:13] +[titan] 2025-10-05 14:11:33,348 - root - INFO - step: 25465 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:11:33,348 - root - INFO - lr: 1.8372e-05 gnorm: 1.09 [15:37:22< 8:55:02] +[titan] 2025-10-05 14:11:44,248 - root - INFO - step: 25470 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 14:11:44,248 - root - INFO - lr: 1.8364e-05 gnorm: 1.09 [15:37:33< 8:54:51] +[titan] 2025-10-05 14:11:55,157 - root - INFO - step: 25475 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 14:11:55,157 - root - INFO - lr: 1.8356e-05 gnorm: 1.09 [15:37:44< 8:54:40] +[titan] 2025-10-05 14:12:06,026 - root - INFO - step: 25480 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 14:12:06,026 - root - INFO - lr: 1.8348e-05 gnorm: 1.07 [15:37:55< 8:54:29] +[titan] 2025-10-05 14:12:16,908 - root - INFO - step: 25485 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:12:16,909 - root - INFO - lr: 1.8340e-05 gnorm: 1.10 [15:38:06< 8:54:17] +[titan] 2025-10-05 14:12:27,776 - root - INFO - step: 25490 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7677 +[titan] 2025-10-05 14:12:27,776 - root - INFO - lr: 1.8332e-05 gnorm: 1.09 [15:38:17< 8:54:06] +[titan] 2025-10-05 14:12:38,651 - root - INFO - step: 25495 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 14:12:38,652 - root - INFO - lr: 1.8323e-05 gnorm: 1.08 [15:38:28< 8:53:55] +[titan] 2025-10-05 14:12:47,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:12:49,537 - root - INFO - step: 25500 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7501 +[titan] 2025-10-05 14:12:49,537 - root - INFO - lr: 1.8315e-05 gnorm: 1.13 [15:38:38< 8:53:44] +[titan] 2025-10-05 14:13:00,470 - root - INFO - step: 25505 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:13:00,470 - root - INFO - lr: 1.8307e-05 gnorm: 1.08 [15:38:49< 8:53:33] +[titan] 2025-10-05 14:13:11,338 - root - INFO - step: 25510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 14:13:11,338 - root - INFO - lr: 1.8299e-05 gnorm: 1.11 [15:39:00< 8:53:22] +[titan] 2025-10-05 14:13:22,196 - root - INFO - step: 25515 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 14:13:22,197 - root - INFO - lr: 1.8291e-05 gnorm: 1.17 [15:39:11< 8:53:11] +[titan] 2025-10-05 14:13:33,046 - root - INFO - step: 25520 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 14:13:33,047 - root - INFO - lr: 1.8283e-05 gnorm: 1.07 [15:39:22< 8:52:59] +[titan] 2025-10-05 14:13:43,917 - root - INFO - step: 25525 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 14:13:43,917 - root - INFO - lr: 1.8275e-05 gnorm: 1.12 [15:39:33< 8:52:48] +[titan] 2025-10-05 14:13:54,888 - root - INFO - step: 25530 loss: 2.1016 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 14:13:54,888 - root - INFO - lr: 1.8266e-05 gnorm: 1.14 [15:39:44< 8:52:37] +[titan] 2025-10-05 14:14:05,796 - root - INFO - step: 25535 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:14:05,796 - root - INFO - lr: 1.8258e-05 gnorm: 1.11 [15:39:55< 8:52:26] +[titan] 2025-10-05 14:14:16,658 - root - INFO - step: 25540 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 14:14:16,658 - root - INFO - lr: 1.8250e-05 gnorm: 1.12 [15:40:06< 8:52:15] +[titan] 2025-10-05 14:14:27,520 - root - INFO - step: 25545 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 14:14:27,521 - root - INFO - lr: 1.8242e-05 gnorm: 1.08 [15:40:16< 8:52:04] +[titan] 2025-10-05 14:14:36,209 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:14:38,398 - root - INFO - step: 25550 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 14:14:38,399 - root - INFO - lr: 1.8234e-05 gnorm: 1.07 [15:40:27< 8:51:53] +[titan] 2025-10-05 14:14:49,271 - root - INFO - step: 25555 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:14:49,271 - root - INFO - lr: 1.8226e-05 gnorm: 1.10 [15:40:38< 8:51:42] +[titan] 2025-10-05 14:15:00,189 - root - INFO - step: 25560 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:15:00,189 - root - INFO - lr: 1.8218e-05 gnorm: 1.05 [15:40:49< 8:51:30] +[titan] 2025-10-05 14:15:11,120 - root - INFO - step: 25565 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 14:15:11,121 - root - INFO - lr: 1.8209e-05 gnorm: 1.07 [15:41:00< 8:51:19] +[titan] 2025-10-05 14:15:21,997 - root - INFO - step: 25570 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 14:15:21,997 - root - INFO - lr: 1.8201e-05 gnorm: 1.56 [15:41:11< 8:51:08] +[titan] 2025-10-05 14:15:32,888 - root - INFO - step: 25575 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 14:15:32,888 - root - INFO - lr: 1.8193e-05 gnorm: 1.07 [15:41:22< 8:50:57] +[titan] 2025-10-05 14:15:43,769 - root - INFO - step: 25580 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8710 +[titan] 2025-10-05 14:15:43,769 - root - INFO - lr: 1.8185e-05 gnorm: 1.07 [15:41:33< 8:50:46] +[titan] 2025-10-05 14:15:54,652 - root - INFO - step: 25585 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 14:15:54,652 - root - INFO - lr: 1.8177e-05 gnorm: 1.05 [15:41:44< 8:50:35] +[titan] 2025-10-05 14:16:05,536 - root - INFO - step: 25590 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 14:16:05,536 - root - INFO - lr: 1.8169e-05 gnorm: 1.07 [15:41:54< 8:50:24] +[titan] 2025-10-05 14:16:16,420 - root - INFO - step: 25595 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:16:16,420 - root - INFO - lr: 1.8161e-05 gnorm: 1.09 [15:42:05< 8:50:13] +[titan] 2025-10-05 14:16:25,234 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:16:27,418 - root - INFO - step: 25600 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 29,795 tflops: 413.36 mfu: 41.80% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 14:16:27,418 - root - INFO - lr: 1.8153e-05 gnorm: 1.10 [15:42:16< 8:50:01] +[titan] 2025-10-05 14:16:27,598 - root - INFO - Dumping profiler traces at step 25600 +[titan] 2025-10-05 14:16:27,634 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:16:38,481 - root - INFO - step: 25605 loss: 2.0476 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:16:38,481 - root - INFO - lr: 1.8144e-05 gnorm: 1.11 [15:42:27< 8:49:50] +[titan] 2025-10-05 14:16:49,316 - root - INFO - step: 25610 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8107 +[titan] 2025-10-05 14:16:49,316 - root - INFO - lr: 1.8136e-05 gnorm: 1.06 [15:42:38< 8:49:39] +[titan] 2025-10-05 14:17:00,171 - root - INFO - step: 25615 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 14:17:00,172 - root - INFO - lr: 1.8128e-05 gnorm: 1.06 [15:42:49< 8:49:28] +[titan] 2025-10-05 14:17:11,028 - root - INFO - step: 25620 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 14:17:11,028 - root - INFO - lr: 1.8120e-05 gnorm: 1.08 [15:43:00< 8:49:17] +[titan] 2025-10-05 14:17:21,893 - root - INFO - step: 25625 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 14:17:21,894 - root - INFO - lr: 1.8112e-05 gnorm: 1.08 [15:43:11< 8:49:06] +[titan] 2025-10-05 14:17:32,791 - root - INFO - step: 25630 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8510 +[titan] 2025-10-05 14:17:32,791 - root - INFO - lr: 1.8104e-05 gnorm: 1.17 [15:43:22< 8:48:55] +[titan] 2025-10-05 14:17:43,645 - root - INFO - step: 25635 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 14:17:43,645 - root - INFO - lr: 1.8096e-05 gnorm: 1.09 [15:43:33< 8:48:44] +[titan] 2025-10-05 14:17:54,490 - root - INFO - step: 25640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:17:54,490 - root - INFO - lr: 1.8088e-05 gnorm: 1.07 [15:43:43< 8:48:32] +[titan] 2025-10-05 14:18:05,362 - root - INFO - step: 25645 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:18:05,362 - root - INFO - lr: 1.8080e-05 gnorm: 1.09 [15:43:54< 8:48:21] +[titan] 2025-10-05 14:18:14,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:18:16,215 - root - INFO - step: 25650 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 14:18:16,216 - root - INFO - lr: 1.8071e-05 gnorm: 1.09 [15:44:05< 8:48:10] +[titan] 2025-10-05 14:18:27,067 - root - INFO - step: 25655 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 14:18:27,068 - root - INFO - lr: 1.8063e-05 gnorm: 1.05 [15:44:16< 8:47:59] +[titan] 2025-10-05 14:18:37,921 - root - INFO - step: 25660 loss: 2.0284 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7927 +[titan] 2025-10-05 14:18:37,921 - root - INFO - lr: 1.8055e-05 gnorm: 1.09 [15:44:27< 8:47:48] +[titan] 2025-10-05 14:18:48,835 - root - INFO - step: 25665 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 14:18:48,835 - root - INFO - lr: 1.8047e-05 gnorm: 1.08 [15:44:38< 8:47:37] +[titan] 2025-10-05 14:18:59,735 - root - INFO - step: 25670 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:18:59,736 - root - INFO - lr: 1.8039e-05 gnorm: 1.11 [15:44:49< 8:47:26] +[titan] 2025-10-05 14:19:10,621 - root - INFO - step: 25675 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 14:19:10,621 - root - INFO - lr: 1.8031e-05 gnorm: 1.12 [15:45:00< 8:47:14] +[titan] 2025-10-05 14:19:21,506 - root - INFO - step: 25680 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8389 +[titan] 2025-10-05 14:19:21,507 - root - INFO - lr: 1.8023e-05 gnorm: 1.07 [15:45:10< 8:47:03] +[titan] 2025-10-05 14:19:32,375 - root - INFO - step: 25685 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 14:19:32,375 - root - INFO - lr: 1.8015e-05 gnorm: 1.07 [15:45:21< 8:46:52] +[titan] 2025-10-05 14:19:43,253 - root - INFO - step: 25690 loss: 1.9973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7661 +[titan] 2025-10-05 14:19:43,254 - root - INFO - lr: 1.8007e-05 gnorm: 1.09 [15:45:32< 8:46:41] +[titan] 2025-10-05 14:19:54,175 - root - INFO - step: 25695 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7719 +[titan] 2025-10-05 14:19:54,175 - root - INFO - lr: 1.7999e-05 gnorm: 1.09 [15:45:43< 8:46:30] +[titan] 2025-10-05 14:20:02,862 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:20:05,037 - root - INFO - step: 25700 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 14:20:05,037 - root - INFO - lr: 1.7991e-05 gnorm: 1.10 [15:45:54< 8:46:19] +[titan] 2025-10-05 14:20:15,889 - root - INFO - step: 25705 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9084 +[titan] 2025-10-05 14:20:15,889 - root - INFO - lr: 1.7982e-05 gnorm: 1.09 [15:46:05< 8:46:08] +[titan] 2025-10-05 14:20:26,754 - root - INFO - step: 25710 loss: 2.0748 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 14:20:26,754 - root - INFO - lr: 1.7974e-05 gnorm: 1.08 [15:46:16< 8:45:57] +[titan] 2025-10-05 14:20:37,621 - root - INFO - step: 25715 loss: 2.0337 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7984 +[titan] 2025-10-05 14:20:37,621 - root - INFO - lr: 1.7966e-05 gnorm: 1.06 [15:46:27< 8:45:45] +[titan] 2025-10-05 14:20:48,501 - root - INFO - step: 25720 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 14:20:48,501 - root - INFO - lr: 1.7958e-05 gnorm: 1.07 [15:46:37< 8:45:34] +[titan] 2025-10-05 14:20:59,442 - root - INFO - step: 25725 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8266 +[titan] 2025-10-05 14:20:59,442 - root - INFO - lr: 1.7950e-05 gnorm: 1.11 [15:46:48< 8:45:23] +[titan] 2025-10-05 14:21:10,316 - root - INFO - step: 25730 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8362 +[titan] 2025-10-05 14:21:10,316 - root - INFO - lr: 1.7942e-05 gnorm: 1.10 [15:46:59< 8:45:12] +[titan] 2025-10-05 14:21:21,179 - root - INFO - step: 25735 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 14:21:21,179 - root - INFO - lr: 1.7934e-05 gnorm: 1.11 [15:47:10< 8:45:01] +[titan] 2025-10-05 14:21:32,060 - root - INFO - step: 25740 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 14:21:32,060 - root - INFO - lr: 1.7926e-05 gnorm: 1.05 [15:47:21< 8:44:50] +[titan] 2025-10-05 14:21:42,940 - root - INFO - step: 25745 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 14:21:42,940 - root - INFO - lr: 1.7918e-05 gnorm: 1.13 [15:47:32< 8:44:39] +[titan] 2025-10-05 14:21:51,620 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:21:53,800 - root - INFO - step: 25750 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 14:21:53,800 - root - INFO - lr: 1.7910e-05 gnorm: 1.09 [15:47:43< 8:44:27] +[titan] 2025-10-05 14:22:04,676 - root - INFO - step: 25755 loss: 2.0272 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:22:04,676 - root - INFO - lr: 1.7902e-05 gnorm: 1.10 [15:47:54< 8:44:16] +[titan] 2025-10-05 14:22:15,594 - root - INFO - step: 25760 loss: 2.0342 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7988 +[titan] 2025-10-05 14:22:15,594 - root - INFO - lr: 1.7894e-05 gnorm: 1.07 [15:48:04< 8:44:05] +[titan] 2025-10-05 14:22:26,449 - root - INFO - step: 25765 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 14:22:26,449 - root - INFO - lr: 1.7885e-05 gnorm: 1.09 [15:48:15< 8:43:54] +[titan] 2025-10-05 14:22:37,310 - root - INFO - step: 25770 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 14:22:37,310 - root - INFO - lr: 1.7877e-05 gnorm: 1.05 [15:48:26< 8:43:43] +[titan] 2025-10-05 14:22:48,182 - root - INFO - step: 25775 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 14:22:48,182 - root - INFO - lr: 1.7869e-05 gnorm: 1.11 [15:48:37< 8:43:32] +[titan] 2025-10-05 14:22:59,049 - root - INFO - step: 25780 loss: 2.0127 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 14:22:59,049 - root - INFO - lr: 1.7861e-05 gnorm: 1.06 [15:48:48< 8:43:21] +[titan] 2025-10-05 14:23:09,928 - root - INFO - step: 25785 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 14:23:09,928 - root - INFO - lr: 1.7853e-05 gnorm: 1.04 [15:48:59< 8:43:10] +[titan] 2025-10-05 14:23:20,861 - root - INFO - step: 25790 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 14:23:20,861 - root - INFO - lr: 1.7845e-05 gnorm: 1.11 [15:49:10< 8:42:58] +[titan] 2025-10-05 14:23:31,734 - root - INFO - step: 25795 loss: 2.0316 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 14:23:31,734 - root - INFO - lr: 1.7837e-05 gnorm: 1.08 [15:49:21< 8:42:47] +[titan] 2025-10-05 14:23:40,435 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:23:42,620 - root - INFO - step: 25800 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.8738 +[titan] 2025-10-05 14:23:42,620 - root - INFO - lr: 1.7829e-05 gnorm: 2.05 [15:49:32< 8:42:36] +[titan] 2025-10-05 14:23:53,479 - root - INFO - step: 25805 loss: 2.0499 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8114 +[titan] 2025-10-05 14:23:53,479 - root - INFO - lr: 1.7821e-05 gnorm: 1.10 [15:49:42< 8:42:25] +[titan] 2025-10-05 14:24:04,354 - root - INFO - step: 25810 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8199 +[titan] 2025-10-05 14:24:04,354 - root - INFO - lr: 1.7813e-05 gnorm: 1.10 [15:49:53< 8:42:14] +[titan] 2025-10-05 14:24:15,228 - root - INFO - step: 25815 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:24:15,228 - root - INFO - lr: 1.7805e-05 gnorm: 1.07 [15:50:04< 8:42:03] +[titan] 2025-10-05 14:24:26,126 - root - INFO - step: 25820 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:24:26,126 - root - INFO - lr: 1.7797e-05 gnorm: 1.11 [15:50:15< 8:41:52] +[titan] 2025-10-05 14:24:37,054 - root - INFO - step: 25825 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 14:24:37,055 - root - INFO - lr: 1.7789e-05 gnorm: 1.09 [15:50:26< 8:41:41] +[titan] 2025-10-05 14:24:47,925 - root - INFO - step: 25830 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 14:24:47,925 - root - INFO - lr: 1.7781e-05 gnorm: 1.08 [15:50:37< 8:41:29] +[titan] 2025-10-05 14:24:58,795 - root - INFO - step: 25835 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 14:24:58,795 - root - INFO - lr: 1.7773e-05 gnorm: 1.15 [15:50:48< 8:41:18] +[titan] 2025-10-05 14:25:09,680 - root - INFO - step: 25840 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 14:25:09,680 - root - INFO - lr: 1.7765e-05 gnorm: 1.04 [15:50:59< 8:41:07] +[titan] 2025-10-05 14:25:20,542 - root - INFO - step: 25845 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7882 +[titan] 2025-10-05 14:25:20,543 - root - INFO - lr: 1.7757e-05 gnorm: 1.08 [15:51:09< 8:40:56] +[titan] 2025-10-05 14:25:29,239 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:25:31,434 - root - INFO - step: 25850 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8073 +[titan] 2025-10-05 14:25:31,434 - root - INFO - lr: 1.7749e-05 gnorm: 1.08 [15:51:20< 8:40:45] +[titan] 2025-10-05 14:25:42,355 - root - INFO - step: 25855 loss: 2.0565 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:25:42,356 - root - INFO - lr: 1.7740e-05 gnorm: 1.09 [15:51:31< 8:40:34] +[titan] 2025-10-05 14:25:53,227 - root - INFO - step: 25860 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 14:25:53,227 - root - INFO - lr: 1.7732e-05 gnorm: 1.11 [15:51:42< 8:40:23] +[titan] 2025-10-05 14:26:04,104 - root - INFO - step: 25865 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8184 +[titan] 2025-10-05 14:26:04,105 - root - INFO - lr: 1.7724e-05 gnorm: 1.11 [15:51:53< 8:40:12] +[titan] 2025-10-05 14:26:15,028 - root - INFO - step: 25870 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 14:26:15,028 - root - INFO - lr: 1.7716e-05 gnorm: 1.04 [15:52:04< 8:40:00] +[titan] 2025-10-05 14:26:25,939 - root - INFO - step: 25875 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8127 +[titan] 2025-10-05 14:26:25,939 - root - INFO - lr: 1.7708e-05 gnorm: 1.08 [15:52:15< 8:39:49] +[titan] 2025-10-05 14:26:36,815 - root - INFO - step: 25880 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:26:36,815 - root - INFO - lr: 1.7700e-05 gnorm: 1.07 [15:52:26< 8:39:38] +[titan] 2025-10-05 14:26:47,749 - root - INFO - step: 25885 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8378 +[titan] 2025-10-05 14:26:47,749 - root - INFO - lr: 1.7692e-05 gnorm: 1.10 [15:52:37< 8:39:27] +[titan] 2025-10-05 14:26:58,622 - root - INFO - step: 25890 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:26:58,622 - root - INFO - lr: 1.7684e-05 gnorm: 1.07 [15:52:47< 8:39:16] +[titan] 2025-10-05 14:27:09,541 - root - INFO - step: 25895 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7826 +[titan] 2025-10-05 14:27:09,541 - root - INFO - lr: 1.7676e-05 gnorm: 1.10 [15:52:58< 8:39:05] +[titan] 2025-10-05 14:27:18,236 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:27:20,420 - root - INFO - step: 25900 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 14:27:20,420 - root - INFO - lr: 1.7668e-05 gnorm: 1.08 [15:53:09< 8:38:54] +[titan] 2025-10-05 14:27:31,298 - root - INFO - step: 25905 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 14:27:31,299 - root - INFO - lr: 1.7660e-05 gnorm: 1.08 [15:53:20< 8:38:43] +[titan] 2025-10-05 14:27:42,163 - root - INFO - step: 25910 loss: 2.0892 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 14:27:42,163 - root - INFO - lr: 1.7652e-05 gnorm: 1.12 [15:53:31< 8:38:31] +[titan] 2025-10-05 14:27:53,040 - root - INFO - step: 25915 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 14:27:53,041 - root - INFO - lr: 1.7644e-05 gnorm: 1.09 [15:53:42< 8:38:20] +[titan] 2025-10-05 14:28:03,938 - root - INFO - step: 25920 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 14:28:03,938 - root - INFO - lr: 1.7636e-05 gnorm: 1.05 [15:53:53< 8:38:09] +[titan] 2025-10-05 14:28:14,994 - root - INFO - step: 25925 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.19 mfu: 41.58% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8390 +[titan] 2025-10-05 14:28:14,995 - root - INFO - lr: 1.7628e-05 gnorm: 1.11 [15:54:04< 8:37:58] +[titan] 2025-10-05 14:28:25,864 - root - INFO - step: 25930 loss: 2.0995 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 14:28:25,864 - root - INFO - lr: 1.7620e-05 gnorm: 1.09 [15:54:15< 8:37:47] +[titan] 2025-10-05 14:28:36,720 - root - INFO - step: 25935 loss: 2.0585 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 14:28:36,720 - root - INFO - lr: 1.7612e-05 gnorm: 1.12 [15:54:26< 8:37:36] +[titan] 2025-10-05 14:28:47,595 - root - INFO - step: 25940 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 14:28:47,596 - root - INFO - lr: 1.7604e-05 gnorm: 1.13 [15:54:36< 8:37:25] +[titan] 2025-10-05 14:28:58,468 - root - INFO - step: 25945 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7629 +[titan] 2025-10-05 14:28:58,469 - root - INFO - lr: 1.7596e-05 gnorm: 1.11 [15:54:47< 8:37:14] +[titan] 2025-10-05 14:29:07,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:29:09,436 - root - INFO - step: 25950 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 29,879 tflops: 414.52 mfu: 41.91% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:29:09,436 - root - INFO - lr: 1.7588e-05 gnorm: 1.14 [15:54:58< 8:37:03] +[titan] 2025-10-05 14:29:20,286 - root - INFO - step: 25955 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 14:29:20,286 - root - INFO - lr: 1.7580e-05 gnorm: 1.08 [15:55:09< 8:36:51] +[titan] 2025-10-05 14:29:31,140 - root - INFO - step: 25960 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:29:31,140 - root - INFO - lr: 1.7572e-05 gnorm: 1.08 [15:55:20< 8:36:40] +[titan] 2025-10-05 14:29:42,013 - root - INFO - step: 25965 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8359 +[titan] 2025-10-05 14:29:42,013 - root - INFO - lr: 1.7564e-05 gnorm: 1.10 [15:55:31< 8:36:29] +[titan] 2025-10-05 14:29:52,914 - root - INFO - step: 25970 loss: 2.1034 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 14:29:52,914 - root - INFO - lr: 1.7556e-05 gnorm: 1.06 [15:55:42< 8:36:18] +[titan] 2025-10-05 14:30:03,792 - root - INFO - step: 25975 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 14:30:03,792 - root - INFO - lr: 1.7548e-05 gnorm: 1.08 [15:55:53< 8:36:07] +[titan] 2025-10-05 14:30:14,715 - root - INFO - step: 25980 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8170 +[titan] 2025-10-05 14:30:14,715 - root - INFO - lr: 1.7540e-05 gnorm: 1.11 [15:56:04< 8:35:56] +[titan] 2025-10-05 14:30:25,638 - root - INFO - step: 25985 loss: 2.0484 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:30:25,638 - root - INFO - lr: 1.7532e-05 gnorm: 1.07 [15:56:15< 8:35:45] +[titan] 2025-10-05 14:30:36,501 - root - INFO - step: 25990 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:30:36,501 - root - INFO - lr: 1.7524e-05 gnorm: 1.10 [15:56:25< 8:35:34] +[titan] 2025-10-05 14:30:47,379 - root - INFO - step: 25995 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7746 +[titan] 2025-10-05 14:30:47,379 - root - INFO - lr: 1.7516e-05 gnorm: 1.07 [15:56:36< 8:35:22] +[titan] 2025-10-05 14:30:56,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:30:58,259 - root - INFO - step: 26000 loss: 2.0535 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8148 +[titan] 2025-10-05 14:30:58,259 - root - INFO - lr: 1.7508e-05 gnorm: 1.14 [15:56:47< 8:35:11] +[titan] 2025-10-05 14:31:09,132 - root - INFO - step: 26005 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 14:31:09,132 - root - INFO - lr: 1.7500e-05 gnorm: 1.09 [15:56:58< 8:35:00] +[titan] 2025-10-05 14:31:20,058 - root - INFO - step: 26010 loss: 2.0243 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 14:31:20,058 - root - INFO - lr: 1.7492e-05 gnorm: 1.12 [15:57:09< 8:34:49] +[titan] 2025-10-05 14:31:30,973 - root - INFO - step: 26015 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:31:30,974 - root - INFO - lr: 1.7484e-05 gnorm: 1.10 [15:57:20< 8:34:38] +[titan] 2025-10-05 14:31:41,835 - root - INFO - step: 26020 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8519 +[titan] 2025-10-05 14:31:41,835 - root - INFO - lr: 1.7476e-05 gnorm: 1.16 [15:57:31< 8:34:27] +[titan] 2025-10-05 14:31:52,692 - root - INFO - step: 26025 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 14:31:52,692 - root - INFO - lr: 1.7468e-05 gnorm: 1.07 [15:57:42< 8:34:16] +[titan] 2025-10-05 14:32:03,561 - root - INFO - step: 26030 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 14:32:03,561 - root - INFO - lr: 1.7460e-05 gnorm: 1.09 [15:57:52< 8:34:05] +[titan] 2025-10-05 14:32:14,505 - root - INFO - step: 26035 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:32:14,505 - root - INFO - lr: 1.7452e-05 gnorm: 1.10 [15:58:03< 8:33:53] +[titan] 2025-10-05 14:32:25,388 - root - INFO - step: 26040 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:32:25,388 - root - INFO - lr: 1.7444e-05 gnorm: 1.09 [15:58:14< 8:33:42] +[titan] 2025-10-05 14:32:36,316 - root - INFO - step: 26045 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8342 +[titan] 2025-10-05 14:32:36,316 - root - INFO - lr: 1.7436e-05 gnorm: 1.10 [15:58:25< 8:33:31] +[titan] 2025-10-05 14:32:45,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:32:47,196 - root - INFO - step: 26050 loss: 2.0388 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:32:47,196 - root - INFO - lr: 1.7428e-05 gnorm: 1.08 [15:58:36< 8:33:20] +[titan] 2025-10-05 14:32:58,069 - root - INFO - step: 26055 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8131 +[titan] 2025-10-05 14:32:58,069 - root - INFO - lr: 1.7420e-05 gnorm: 1.06 [15:58:47< 8:33:09] +[titan] 2025-10-05 14:33:08,972 - root - INFO - step: 26060 loss: 2.0150 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 14:33:08,972 - root - INFO - lr: 1.7412e-05 gnorm: 1.10 [15:58:58< 8:32:58] +[titan] 2025-10-05 14:33:19,953 - root - INFO - step: 26065 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:33:19,953 - root - INFO - lr: 1.7404e-05 gnorm: 1.12 [15:59:09< 8:32:47] +[titan] 2025-10-05 14:33:30,852 - root - INFO - step: 26070 loss: 2.0795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 14:33:30,852 - root - INFO - lr: 1.7396e-05 gnorm: 1.11 [15:59:20< 8:32:36] +[titan] 2025-10-05 14:33:41,755 - root - INFO - step: 26075 loss: 2.0764 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 14:33:41,755 - root - INFO - lr: 1.7388e-05 gnorm: 1.11 [15:59:31< 8:32:25] +[titan] 2025-10-05 14:33:52,678 - root - INFO - step: 26080 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 14:33:52,678 - root - INFO - lr: 1.7380e-05 gnorm: 1.08 [15:59:42< 8:32:13] +[titan] 2025-10-05 14:34:03,540 - root - INFO - step: 26085 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:34:03,540 - root - INFO - lr: 1.7372e-05 gnorm: 1.14 [15:59:52< 8:32:02] +[titan] 2025-10-05 14:34:14,468 - root - INFO - step: 26090 loss: 2.0497 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8118 +[titan] 2025-10-05 14:34:14,468 - root - INFO - lr: 1.7364e-05 gnorm: 1.11 [16:00:03< 8:31:51] +[titan] 2025-10-05 14:34:25,355 - root - INFO - step: 26095 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7776 +[titan] 2025-10-05 14:34:25,355 - root - INFO - lr: 1.7356e-05 gnorm: 1.07 [16:00:14< 8:31:40] +[titan] 2025-10-05 14:34:34,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:34:36,218 - root - INFO - step: 26100 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 14:34:36,218 - root - INFO - lr: 1.7348e-05 gnorm: 1.06 [16:00:25< 8:31:29] +[titan] 2025-10-05 14:34:47,106 - root - INFO - step: 26105 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 14:34:47,107 - root - INFO - lr: 1.7340e-05 gnorm: 1.06 [16:00:36< 8:31:18] +[titan] 2025-10-05 14:34:58,140 - root - INFO - step: 26110 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,699 tflops: 412.03 mfu: 41.66% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:34:58,140 - root - INFO - lr: 1.7332e-05 gnorm: 1.12 [16:00:47< 8:31:07] +[titan] 2025-10-05 14:35:02,666 - root - INFO - Dumping profiler traces at step 26112 +[titan] 2025-10-05 14:35:02,704 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:35:09,255 - root - INFO - step: 26115 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 29,482 tflops: 409.02 mfu: 41.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 14:35:09,255 - root - INFO - lr: 1.7324e-05 gnorm: 1.07 [16:00:58< 8:30:56] +[titan] 2025-10-05 14:35:20,180 - root - INFO - step: 26120 loss: 1.9396 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 14:35:20,180 - root - INFO - lr: 1.7316e-05 gnorm: 1.06 [16:01:09< 8:30:45] +[titan] 2025-10-05 14:35:31,056 - root - INFO - step: 26125 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 14:35:31,056 - root - INFO - lr: 1.7309e-05 gnorm: 1.12 [16:01:20< 8:30:34] +[titan] 2025-10-05 14:35:41,959 - root - INFO - step: 26130 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:35:41,959 - root - INFO - lr: 1.7301e-05 gnorm: 1.07 [16:01:31< 8:30:22] +[titan] 2025-10-05 14:35:52,846 - root - INFO - step: 26135 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:35:52,846 - root - INFO - lr: 1.7293e-05 gnorm: 1.10 [16:01:42< 8:30:11] +[titan] 2025-10-05 14:36:03,715 - root - INFO - step: 26140 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7686 +[titan] 2025-10-05 14:36:03,715 - root - INFO - lr: 1.7285e-05 gnorm: 1.07 [16:01:53< 8:30:00] +[titan] 2025-10-05 14:36:14,674 - root - INFO - step: 26145 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7630 +[titan] 2025-10-05 14:36:14,674 - root - INFO - lr: 1.7277e-05 gnorm: 1.05 [16:02:04< 8:29:49] +[titan] 2025-10-05 14:36:23,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:36:25,541 - root - INFO - step: 26150 loss: 2.1124 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 14:36:25,541 - root - INFO - lr: 1.7269e-05 gnorm: 1.13 [16:02:14< 8:29:38] +[titan] 2025-10-05 14:36:36,415 - root - INFO - step: 26155 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:36:36,415 - root - INFO - lr: 1.7261e-05 gnorm: 1.12 [16:02:25< 8:29:27] +[titan] 2025-10-05 14:36:47,302 - root - INFO - step: 26160 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 14:36:47,302 - root - INFO - lr: 1.7253e-05 gnorm: 1.08 [16:02:36< 8:29:16] +[titan] 2025-10-05 14:36:58,194 - root - INFO - step: 26165 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 14:36:58,194 - root - INFO - lr: 1.7245e-05 gnorm: 1.08 [16:02:47< 8:29:05] +[titan] 2025-10-05 14:37:09,092 - root - INFO - step: 26170 loss: 2.1112 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8651 +[titan] 2025-10-05 14:37:09,092 - root - INFO - lr: 1.7237e-05 gnorm: 1.11 [16:02:58< 8:28:54] +[titan] 2025-10-05 14:37:20,018 - root - INFO - step: 26175 loss: 2.0516 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 14:37:20,018 - root - INFO - lr: 1.7229e-05 gnorm: 1.08 [16:03:09< 8:28:42] +[titan] 2025-10-05 14:37:30,897 - root - INFO - step: 26180 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:37:30,897 - root - INFO - lr: 1.7221e-05 gnorm: 1.09 [16:03:20< 8:28:31] +[titan] 2025-10-05 14:37:41,783 - root - INFO - step: 26185 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 14:37:41,783 - root - INFO - lr: 1.7213e-05 gnorm: 1.08 [16:03:31< 8:28:20] +[titan] 2025-10-05 14:37:52,662 - root - INFO - step: 26190 loss: 1.9604 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7331 +[titan] 2025-10-05 14:37:52,662 - root - INFO - lr: 1.7205e-05 gnorm: 1.08 [16:03:42< 8:28:09] +[titan] 2025-10-05 14:38:03,547 - root - INFO - step: 26195 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 14:38:03,548 - root - INFO - lr: 1.7197e-05 gnorm: 1.08 [16:03:52< 8:27:58] +[titan] 2025-10-05 14:38:12,253 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:38:14,439 - root - INFO - step: 26200 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8117 +[titan] 2025-10-05 14:38:14,440 - root - INFO - lr: 1.7189e-05 gnorm: 1.06 [16:04:03< 8:27:47] +[titan] 2025-10-05 14:38:25,405 - root - INFO - step: 26205 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:38:25,405 - root - INFO - lr: 1.7181e-05 gnorm: 1.10 [16:04:14< 8:27:36] +[titan] 2025-10-05 14:38:36,296 - root - INFO - step: 26210 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:38:36,296 - root - INFO - lr: 1.7173e-05 gnorm: 1.08 [16:04:25< 8:27:25] +[titan] 2025-10-05 14:38:47,187 - root - INFO - step: 26215 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 14:38:47,187 - root - INFO - lr: 1.7166e-05 gnorm: 1.13 [16:04:36< 8:27:14] +[titan] 2025-10-05 14:38:58,082 - root - INFO - step: 26220 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:38:58,082 - root - INFO - lr: 1.7158e-05 gnorm: 1.07 [16:04:47< 8:27:02] +[titan] 2025-10-05 14:39:08,974 - root - INFO - step: 26225 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7872 +[titan] 2025-10-05 14:39:08,975 - root - INFO - lr: 1.7150e-05 gnorm: 1.06 [16:04:58< 8:26:51] +[titan] 2025-10-05 14:39:19,875 - root - INFO - step: 26230 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 14:39:19,875 - root - INFO - lr: 1.7142e-05 gnorm: 1.07 [16:05:09< 8:26:40] +[titan] 2025-10-05 14:39:30,758 - root - INFO - step: 26235 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 14:39:30,759 - root - INFO - lr: 1.7134e-05 gnorm: 1.07 [16:05:20< 8:26:29] +[titan] 2025-10-05 14:39:41,666 - root - INFO - step: 26240 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7444 +[titan] 2025-10-05 14:39:41,666 - root - INFO - lr: 1.7126e-05 gnorm: 1.05 [16:05:31< 8:26:18] +[titan] 2025-10-05 14:39:52,544 - root - INFO - step: 26245 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 14:39:52,544 - root - INFO - lr: 1.7118e-05 gnorm: 1.08 [16:05:41< 8:26:07] +[titan] 2025-10-05 14:40:01,233 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:40:03,416 - root - INFO - step: 26250 loss: 2.0445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:40:03,416 - root - INFO - lr: 1.7110e-05 gnorm: 1.05 [16:05:52< 8:25:56] +[titan] 2025-10-05 14:40:14,284 - root - INFO - step: 26255 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9243 +[titan] 2025-10-05 14:40:14,284 - root - INFO - lr: 1.7102e-05 gnorm: 1.15 [16:06:03< 8:25:45] +[titan] 2025-10-05 14:40:25,201 - root - INFO - step: 26260 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 14:40:25,201 - root - INFO - lr: 1.7094e-05 gnorm: 1.31 [16:06:14< 8:25:33] +[titan] 2025-10-05 14:40:36,067 - root - INFO - step: 26265 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 14:40:36,067 - root - INFO - lr: 1.7086e-05 gnorm: 1.07 [16:06:25< 8:25:22] +[titan] 2025-10-05 14:40:46,986 - root - INFO - step: 26270 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 14:40:46,986 - root - INFO - lr: 1.7078e-05 gnorm: 1.12 [16:06:36< 8:25:11] +[titan] 2025-10-05 14:40:57,856 - root - INFO - step: 26275 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 14:40:57,856 - root - INFO - lr: 1.7071e-05 gnorm: 1.05 [16:06:47< 8:25:00] +[titan] 2025-10-05 14:41:08,711 - root - INFO - step: 26280 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8421 +[titan] 2025-10-05 14:41:08,711 - root - INFO - lr: 1.7063e-05 gnorm: 1.08 [16:06:58< 8:24:49] +[titan] 2025-10-05 14:41:19,613 - root - INFO - step: 26285 loss: 2.0172 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 14:41:19,613 - root - INFO - lr: 1.7055e-05 gnorm: 1.10 [16:07:08< 8:24:38] +[titan] 2025-10-05 14:41:30,475 - root - INFO - step: 26290 loss: 2.0509 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 14:41:30,475 - root - INFO - lr: 1.7047e-05 gnorm: 1.10 [16:07:19< 8:24:27] +[titan] 2025-10-05 14:41:41,346 - root - INFO - step: 26295 loss: 2.0334 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7973 +[titan] 2025-10-05 14:41:41,347 - root - INFO - lr: 1.7039e-05 gnorm: 1.04 [16:07:30< 8:24:16] +[titan] 2025-10-05 14:41:50,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:41:52,244 - root - INFO - step: 26300 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 14:41:52,245 - root - INFO - lr: 1.7031e-05 gnorm: 1.10 [16:07:41< 8:24:05] +[titan] 2025-10-05 14:42:03,172 - root - INFO - step: 26305 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 14:42:03,172 - root - INFO - lr: 1.7023e-05 gnorm: 1.10 [16:07:52< 8:23:53] +[titan] 2025-10-05 14:42:14,032 - root - INFO - step: 26310 loss: 2.0276 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:42:14,032 - root - INFO - lr: 1.7015e-05 gnorm: 1.10 [16:08:03< 8:23:42] +[titan] 2025-10-05 14:42:24,897 - root - INFO - step: 26315 loss: 2.0611 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:42:24,897 - root - INFO - lr: 1.7007e-05 gnorm: 1.05 [16:08:14< 8:23:31] +[titan] 2025-10-05 14:42:35,737 - root - INFO - step: 26320 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 14:42:35,737 - root - INFO - lr: 1.6999e-05 gnorm: 1.08 [16:08:25< 8:23:20] +[titan] 2025-10-05 14:42:46,593 - root - INFO - step: 26325 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8364 +[titan] 2025-10-05 14:42:46,593 - root - INFO - lr: 1.6992e-05 gnorm: 1.11 [16:08:35< 8:23:09] +[titan] 2025-10-05 14:42:57,467 - root - INFO - step: 26330 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 14:42:57,467 - root - INFO - lr: 1.6984e-05 gnorm: 1.05 [16:08:46< 8:22:58] +[titan] 2025-10-05 14:43:08,377 - root - INFO - step: 26335 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7862 +[titan] 2025-10-05 14:43:08,377 - root - INFO - lr: 1.6976e-05 gnorm: 1.10 [16:08:57< 8:22:47] +[titan] 2025-10-05 14:43:19,276 - root - INFO - step: 26340 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:43:19,276 - root - INFO - lr: 1.6968e-05 gnorm: 1.09 [16:09:08< 8:22:36] +[titan] 2025-10-05 14:43:30,150 - root - INFO - step: 26345 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 14:43:30,150 - root - INFO - lr: 1.6960e-05 gnorm: 1.09 [16:09:19< 8:22:24] +[titan] 2025-10-05 14:43:38,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:43:41,032 - root - INFO - step: 26350 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 14:43:41,033 - root - INFO - lr: 1.6952e-05 gnorm: 1.12 [16:09:30< 8:22:13] +[titan] 2025-10-05 14:43:51,910 - root - INFO - step: 26355 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 14:43:51,911 - root - INFO - lr: 1.6944e-05 gnorm: 1.10 [16:09:41< 8:22:02] +[titan] 2025-10-05 14:44:02,775 - root - INFO - step: 26360 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 14:44:02,775 - root - INFO - lr: 1.6936e-05 gnorm: 1.11 [16:09:52< 8:21:51] +[titan] 2025-10-05 14:44:13,677 - root - INFO - step: 26365 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 14:44:13,678 - root - INFO - lr: 1.6928e-05 gnorm: 1.09 [16:10:03< 8:21:40] +[titan] 2025-10-05 14:44:24,544 - root - INFO - step: 26370 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 14:44:24,544 - root - INFO - lr: 1.6921e-05 gnorm: 1.08 [16:10:13< 8:21:29] +[titan] 2025-10-05 14:44:35,405 - root - INFO - step: 26375 loss: 2.0563 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8183 +[titan] 2025-10-05 14:44:35,405 - root - INFO - lr: 1.6913e-05 gnorm: 1.09 [16:10:24< 8:21:18] +[titan] 2025-10-05 14:44:46,277 - root - INFO - step: 26380 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 14:44:46,277 - root - INFO - lr: 1.6905e-05 gnorm: 1.10 [16:10:35< 8:21:07] +[titan] 2025-10-05 14:44:57,156 - root - INFO - step: 26385 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 14:44:57,156 - root - INFO - lr: 1.6897e-05 gnorm: 1.09 [16:10:46< 8:20:55] +[titan] 2025-10-05 14:45:07,991 - root - INFO - step: 26390 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:45:07,991 - root - INFO - lr: 1.6889e-05 gnorm: 1.09 [16:10:57< 8:20:44] +[titan] 2025-10-05 14:45:18,850 - root - INFO - step: 26395 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:45:18,850 - root - INFO - lr: 1.6881e-05 gnorm: 1.09 [16:11:08< 8:20:33] +[titan] 2025-10-05 14:45:27,579 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:45:29,758 - root - INFO - step: 26400 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 14:45:29,758 - root - INFO - lr: 1.6873e-05 gnorm: 1.09 [16:11:19< 8:20:22] +[titan] 2025-10-05 14:45:40,628 - root - INFO - step: 26405 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 14:45:40,628 - root - INFO - lr: 1.6865e-05 gnorm: 1.09 [16:11:29< 8:20:11] +[titan] 2025-10-05 14:45:51,472 - root - INFO - step: 26410 loss: 2.0493 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 14:45:51,472 - root - INFO - lr: 1.6858e-05 gnorm: 1.09 [16:11:40< 8:20:00] +[titan] 2025-10-05 14:46:02,329 - root - INFO - step: 26415 loss: 2.0718 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8312 +[titan] 2025-10-05 14:46:02,329 - root - INFO - lr: 1.6850e-05 gnorm: 1.09 [16:11:51< 8:19:49] +[titan] 2025-10-05 14:46:13,208 - root - INFO - step: 26420 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 14:46:13,208 - root - INFO - lr: 1.6842e-05 gnorm: 1.09 [16:12:02< 8:19:38] +[titan] 2025-10-05 14:46:24,077 - root - INFO - step: 26425 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:46:24,077 - root - INFO - lr: 1.6834e-05 gnorm: 1.09 [16:12:13< 8:19:26] +[titan] 2025-10-05 14:46:34,964 - root - INFO - step: 26430 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8192 +[titan] 2025-10-05 14:46:34,964 - root - INFO - lr: 1.6826e-05 gnorm: 1.08 [16:12:24< 8:19:15] +[titan] 2025-10-05 14:46:45,809 - root - INFO - step: 26435 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 14:46:45,810 - root - INFO - lr: 1.6818e-05 gnorm: 1.08 [16:12:35< 8:19:04] +[titan] 2025-10-05 14:46:56,653 - root - INFO - step: 26440 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 14:46:56,653 - root - INFO - lr: 1.6810e-05 gnorm: 1.07 [16:12:45< 8:18:53] +[titan] 2025-10-05 14:47:07,510 - root - INFO - step: 26445 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 14:47:07,511 - root - INFO - lr: 1.6803e-05 gnorm: 1.09 [16:12:56< 8:18:42] +[titan] 2025-10-05 14:47:16,212 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:47:18,390 - root - INFO - step: 26450 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:47:18,390 - root - INFO - lr: 1.6795e-05 gnorm: 1.07 [16:13:07< 8:18:31] +[titan] 2025-10-05 14:47:29,255 - root - INFO - step: 26455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:47:29,255 - root - INFO - lr: 1.6787e-05 gnorm: 1.10 [16:13:18< 8:18:20] +[titan] 2025-10-05 14:47:40,123 - root - INFO - step: 26460 loss: 2.0742 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 14:47:40,123 - root - INFO - lr: 1.6779e-05 gnorm: 1.14 [16:13:29< 8:18:09] +[titan] 2025-10-05 14:47:51,023 - root - INFO - step: 26465 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 14:47:51,023 - root - INFO - lr: 1.6771e-05 gnorm: 1.10 [16:13:40< 8:17:57] +[titan] 2025-10-05 14:48:01,888 - root - INFO - step: 26470 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 14:48:01,888 - root - INFO - lr: 1.6763e-05 gnorm: 1.05 [16:13:51< 8:17:46] +[titan] 2025-10-05 14:48:12,750 - root - INFO - step: 26475 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:48:12,750 - root - INFO - lr: 1.6756e-05 gnorm: 1.10 [16:14:02< 8:17:35] +[titan] 2025-10-05 14:48:23,596 - root - INFO - step: 26480 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 14:48:23,597 - root - INFO - lr: 1.6748e-05 gnorm: 1.05 [16:14:12< 8:17:24] +[titan] 2025-10-05 14:48:34,475 - root - INFO - step: 26485 loss: 2.0429 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:48:34,475 - root - INFO - lr: 1.6740e-05 gnorm: 1.11 [16:14:23< 8:17:13] +[titan] 2025-10-05 14:48:45,347 - root - INFO - step: 26490 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 14:48:45,348 - root - INFO - lr: 1.6732e-05 gnorm: 1.11 [16:14:34< 8:17:02] +[titan] 2025-10-05 14:48:56,251 - root - INFO - step: 26495 loss: 2.1088 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 14:48:56,252 - root - INFO - lr: 1.6724e-05 gnorm: 1.15 [16:14:45< 8:16:51] +[titan] 2025-10-05 14:49:04,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:49:07,097 - root - INFO - step: 26500 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8508 +[titan] 2025-10-05 14:49:07,097 - root - INFO - lr: 1.6716e-05 gnorm: 1.14 [16:14:56< 8:16:40] +[titan] 2025-10-05 14:49:17,975 - root - INFO - step: 26505 loss: 2.0105 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 14:49:17,976 - root - INFO - lr: 1.6709e-05 gnorm: 1.12 [16:15:07< 8:16:28] +[titan] 2025-10-05 14:49:28,870 - root - INFO - step: 26510 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7377 +[titan] 2025-10-05 14:49:28,870 - root - INFO - lr: 1.6701e-05 gnorm: 1.05 [16:15:18< 8:16:17] +[titan] 2025-10-05 14:49:39,744 - root - INFO - step: 26515 loss: 2.0774 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 14:49:39,744 - root - INFO - lr: 1.6693e-05 gnorm: 1.14 [16:15:29< 8:16:06] +[titan] 2025-10-05 14:49:50,606 - root - INFO - step: 26520 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:49:50,606 - root - INFO - lr: 1.6685e-05 gnorm: 1.11 [16:15:39< 8:15:55] +[titan] 2025-10-05 14:50:01,497 - root - INFO - step: 26525 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:50:01,497 - root - INFO - lr: 1.6677e-05 gnorm: 1.07 [16:15:50< 8:15:44] +[titan] 2025-10-05 14:50:12,351 - root - INFO - step: 26530 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:50:12,351 - root - INFO - lr: 1.6669e-05 gnorm: 1.11 [16:16:01< 8:15:33] +[titan] 2025-10-05 14:50:23,197 - root - INFO - step: 26535 loss: 2.0146 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 14:50:23,197 - root - INFO - lr: 1.6662e-05 gnorm: 1.28 [16:16:12< 8:15:22] +[titan] 2025-10-05 14:50:34,070 - root - INFO - step: 26540 loss: 2.0363 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 14:50:34,070 - root - INFO - lr: 1.6654e-05 gnorm: 1.09 [16:16:23< 8:15:11] +[titan] 2025-10-05 14:50:44,935 - root - INFO - step: 26545 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 14:50:44,935 - root - INFO - lr: 1.6646e-05 gnorm: 1.07 [16:16:34< 8:14:59] +[titan] 2025-10-05 14:50:53,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:50:55,778 - root - INFO - step: 26550 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 14:50:55,778 - root - INFO - lr: 1.6638e-05 gnorm: 1.07 [16:16:45< 8:14:48] +[titan] 2025-10-05 14:51:06,624 - root - INFO - step: 26555 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 14:51:06,625 - root - INFO - lr: 1.6630e-05 gnorm: 1.11 [16:16:55< 8:14:37] +[titan] 2025-10-05 14:51:17,534 - root - INFO - step: 26560 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 14:51:17,534 - root - INFO - lr: 1.6622e-05 gnorm: 1.12 [16:17:06< 8:14:26] +[titan] 2025-10-05 14:51:28,410 - root - INFO - step: 26565 loss: 2.1178 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 14:51:28,410 - root - INFO - lr: 1.6615e-05 gnorm: 1.09 [16:17:17< 8:14:15] +[titan] 2025-10-05 14:51:39,262 - root - INFO - step: 26570 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 14:51:39,262 - root - INFO - lr: 1.6607e-05 gnorm: 1.11 [16:17:28< 8:14:04] +[titan] 2025-10-05 14:51:50,113 - root - INFO - step: 26575 loss: 2.1052 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 14:51:50,113 - root - INFO - lr: 1.6599e-05 gnorm: 1.15 [16:17:39< 8:13:53] +[titan] 2025-10-05 14:52:00,978 - root - INFO - step: 26580 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:52:00,978 - root - INFO - lr: 1.6591e-05 gnorm: 1.09 [16:17:50< 8:13:42] +[titan] 2025-10-05 14:52:11,826 - root - INFO - step: 26585 loss: 2.0519 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 14:52:11,826 - root - INFO - lr: 1.6583e-05 gnorm: 1.14 [16:18:01< 8:13:30] +[titan] 2025-10-05 14:52:22,714 - root - INFO - step: 26590 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 14:52:22,714 - root - INFO - lr: 1.6576e-05 gnorm: 1.09 [16:18:12< 8:13:19] +[titan] 2025-10-05 14:52:33,578 - root - INFO - step: 26595 loss: 2.0442 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 14:52:33,578 - root - INFO - lr: 1.6568e-05 gnorm: 1.08 [16:18:22< 8:13:08] +[titan] 2025-10-05 14:52:42,211 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:52:44,379 - root - INFO - step: 26600 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,340 tflops: 420.92 mfu: 42.56% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:52:44,379 - root - INFO - lr: 1.6560e-05 gnorm: 1.10 [16:18:33< 8:12:57] +[titan] 2025-10-05 14:52:55,225 - root - INFO - step: 26605 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 14:52:55,225 - root - INFO - lr: 1.6552e-05 gnorm: 1.10 [16:18:44< 8:12:46] +[titan] 2025-10-05 14:53:06,069 - root - INFO - step: 26610 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:53:06,070 - root - INFO - lr: 1.6544e-05 gnorm: 1.07 [16:18:55< 8:12:35] +[titan] 2025-10-05 14:53:16,896 - root - INFO - step: 26615 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 14:53:16,896 - root - INFO - lr: 1.6537e-05 gnorm: 1.08 [16:19:06< 8:12:24] +[titan] 2025-10-05 14:53:27,734 - root - INFO - step: 26620 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7725 +[titan] 2025-10-05 14:53:27,734 - root - INFO - lr: 1.6529e-05 gnorm: 1.15 [16:19:17< 8:12:13] +[titan] 2025-10-05 14:53:36,731 - root - INFO - Dumping profiler traces at step 26624 +[titan] 2025-10-05 14:53:36,771 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:53:38,970 - root - INFO - step: 26625 loss: 2.0899 memory: 118.84GiB(85.28%) tps: 29,164 tflops: 404.60 mfu: 40.91% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8466 +[titan] 2025-10-05 14:53:38,971 - root - INFO - lr: 1.6521e-05 gnorm: 1.13 [16:19:28< 8:12:02] +[titan] 2025-10-05 14:53:49,827 - root - INFO - step: 26630 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 14:53:49,827 - root - INFO - lr: 1.6513e-05 gnorm: 1.09 [16:19:39< 8:11:50] +[titan] 2025-10-05 14:54:00,657 - root - INFO - step: 26635 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 14:54:00,657 - root - INFO - lr: 1.6505e-05 gnorm: 1.12 [16:19:49< 8:11:39] +[titan] 2025-10-05 14:54:11,514 - root - INFO - step: 26640 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8209 +[titan] 2025-10-05 14:54:11,514 - root - INFO - lr: 1.6498e-05 gnorm: 1.10 [16:20:00< 8:11:28] +[titan] 2025-10-05 14:54:22,378 - root - INFO - step: 26645 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 14:54:22,378 - root - INFO - lr: 1.6490e-05 gnorm: 1.06 [16:20:11< 8:11:17] +[titan] 2025-10-05 14:54:31,074 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:54:33,259 - root - INFO - step: 26650 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 14:54:33,259 - root - INFO - lr: 1.6482e-05 gnorm: 1.12 [16:20:22< 8:11:06] +[titan] 2025-10-05 14:54:44,181 - root - INFO - step: 26655 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 14:54:44,181 - root - INFO - lr: 1.6474e-05 gnorm: 1.10 [16:20:33< 8:10:55] +[titan] 2025-10-05 14:54:55,045 - root - INFO - step: 26660 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:54:55,046 - root - INFO - lr: 1.6467e-05 gnorm: 1.09 [16:20:44< 8:10:44] +[titan] 2025-10-05 14:55:05,921 - root - INFO - step: 26665 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8484 +[titan] 2025-10-05 14:55:05,921 - root - INFO - lr: 1.6459e-05 gnorm: 1.12 [16:20:55< 8:10:33] +[titan] 2025-10-05 14:55:16,810 - root - INFO - step: 26670 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 14:55:16,810 - root - INFO - lr: 1.6451e-05 gnorm: 1.08 [16:21:06< 8:10:22] +[titan] 2025-10-05 14:55:27,678 - root - INFO - step: 26675 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 14:55:27,679 - root - INFO - lr: 1.6443e-05 gnorm: 1.09 [16:21:16< 8:10:10] +[titan] 2025-10-05 14:55:38,537 - root - INFO - step: 26680 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 14:55:38,537 - root - INFO - lr: 1.6435e-05 gnorm: 1.10 [16:21:27< 8:09:59] +[titan] 2025-10-05 14:55:49,438 - root - INFO - step: 26685 loss: 2.0107 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 14:55:49,439 - root - INFO - lr: 1.6428e-05 gnorm: 1.11 [16:21:38< 8:09:48] +[titan] 2025-10-05 14:56:00,304 - root - INFO - step: 26690 loss: 2.0743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:56:00,304 - root - INFO - lr: 1.6420e-05 gnorm: 1.11 [16:21:49< 8:09:37] +[titan] 2025-10-05 14:56:11,149 - root - INFO - step: 26695 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8035 +[titan] 2025-10-05 14:56:11,149 - root - INFO - lr: 1.6412e-05 gnorm: 1.10 [16:22:00< 8:09:26] +[titan] 2025-10-05 14:56:19,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:56:22,000 - root - INFO - step: 26700 loss: 2.0496 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8111 +[titan] 2025-10-05 14:56:22,000 - root - INFO - lr: 1.6404e-05 gnorm: 1.07 [16:22:11< 8:09:15] +[titan] 2025-10-05 14:56:32,858 - root - INFO - step: 26705 loss: 1.9909 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 14:56:32,858 - root - INFO - lr: 1.6397e-05 gnorm: 1.07 [16:22:22< 8:09:04] +[titan] 2025-10-05 14:56:43,728 - root - INFO - step: 26710 loss: 2.1246 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:56:43,729 - root - INFO - lr: 1.6389e-05 gnorm: 1.12 [16:22:33< 8:08:53] +[titan] 2025-10-05 14:56:54,594 - root - INFO - step: 26715 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 14:56:54,594 - root - INFO - lr: 1.6381e-05 gnorm: 1.08 [16:22:43< 8:08:41] +[titan] 2025-10-05 14:57:05,497 - root - INFO - step: 26720 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 14:57:05,497 - root - INFO - lr: 1.6373e-05 gnorm: 1.08 [16:22:54< 8:08:30] +[titan] 2025-10-05 14:57:16,361 - root - INFO - step: 26725 loss: 2.0885 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 14:57:16,361 - root - INFO - lr: 1.6366e-05 gnorm: 1.07 [16:23:05< 8:08:19] +[titan] 2025-10-05 14:57:27,210 - root - INFO - step: 26730 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 14:57:27,211 - root - INFO - lr: 1.6358e-05 gnorm: 1.10 [16:23:16< 8:08:08] +[titan] 2025-10-05 14:57:38,049 - root - INFO - step: 26735 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 14:57:38,050 - root - INFO - lr: 1.6350e-05 gnorm: 1.11 [16:23:27< 8:07:57] +[titan] 2025-10-05 14:57:48,918 - root - INFO - step: 26740 loss: 2.0984 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8556 +[titan] 2025-10-05 14:57:48,918 - root - INFO - lr: 1.6342e-05 gnorm: 1.15 [16:23:38< 8:07:46] +[titan] 2025-10-05 14:57:59,773 - root - INFO - step: 26745 loss: 2.0328 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 14:57:59,773 - root - INFO - lr: 1.6335e-05 gnorm: 1.10 [16:23:49< 8:07:35] +[titan] 2025-10-05 14:58:08,498 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:58:10,688 - root - INFO - step: 26750 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 14:58:10,689 - root - INFO - lr: 1.6327e-05 gnorm: 1.10 [16:23:59< 8:07:24] +[titan] 2025-10-05 14:58:21,558 - root - INFO - step: 26755 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 14:58:21,559 - root - INFO - lr: 1.6319e-05 gnorm: 1.10 [16:24:10< 8:07:12] +[titan] 2025-10-05 14:58:32,424 - root - INFO - step: 26760 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:58:32,425 - root - INFO - lr: 1.6311e-05 gnorm: 1.08 [16:24:21< 8:07:01] +[titan] 2025-10-05 14:58:43,310 - root - INFO - step: 26765 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:58:43,310 - root - INFO - lr: 1.6304e-05 gnorm: 1.07 [16:24:32< 8:06:50] +[titan] 2025-10-05 14:58:54,204 - root - INFO - step: 26770 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7566 +[titan] 2025-10-05 14:58:54,204 - root - INFO - lr: 1.6296e-05 gnorm: 1.08 [16:24:43< 8:06:39] +[titan] 2025-10-05 14:59:05,077 - root - INFO - step: 26775 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:59:05,077 - root - INFO - lr: 1.6288e-05 gnorm: 1.09 [16:24:54< 8:06:28] +[titan] 2025-10-05 14:59:15,970 - root - INFO - step: 26780 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:59:15,970 - root - INFO - lr: 1.6280e-05 gnorm: 1.13 [16:25:05< 8:06:17] +[titan] 2025-10-05 14:59:26,894 - root - INFO - step: 26785 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 14:59:26,894 - root - INFO - lr: 1.6273e-05 gnorm: 1.11 [16:25:16< 8:06:06] +[titan] 2025-10-05 14:59:37,753 - root - INFO - step: 26790 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 14:59:37,753 - root - INFO - lr: 1.6265e-05 gnorm: 1.03 [16:25:27< 8:05:55] +[titan] 2025-10-05 14:59:48,629 - root - INFO - step: 26795 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:59:48,629 - root - INFO - lr: 1.6257e-05 gnorm: 1.08 [16:25:37< 8:05:44] +[titan] 2025-10-05 14:59:57,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:59:59,514 - root - INFO - step: 26800 loss: 1.9889 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:59:59,514 - root - INFO - lr: 1.6249e-05 gnorm: 1.12 [16:25:48< 8:05:32] +[titan] 2025-10-05 15:00:10,404 - root - INFO - step: 26805 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 15:00:10,405 - root - INFO - lr: 1.6242e-05 gnorm: 1.09 [16:25:59< 8:05:21] +[titan] 2025-10-05 15:00:21,298 - root - INFO - step: 26810 loss: 2.0441 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8079 +[titan] 2025-10-05 15:00:21,298 - root - INFO - lr: 1.6234e-05 gnorm: 1.09 [16:26:10< 8:05:10] +[titan] 2025-10-05 15:00:32,228 - root - INFO - step: 26815 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 15:00:32,228 - root - INFO - lr: 1.6226e-05 gnorm: 1.15 [16:26:21< 8:04:59] +[titan] 2025-10-05 15:00:43,121 - root - INFO - step: 26820 loss: 2.0556 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 15:00:43,121 - root - INFO - lr: 1.6219e-05 gnorm: 1.10 [16:26:32< 8:04:48] +[titan] 2025-10-05 15:00:54,008 - root - INFO - step: 26825 loss: 2.0473 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8094 +[titan] 2025-10-05 15:00:54,008 - root - INFO - lr: 1.6211e-05 gnorm: 1.16 [16:26:43< 8:04:37] +[titan] 2025-10-05 15:01:04,889 - root - INFO - step: 26830 loss: 2.0024 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 15:01:04,889 - root - INFO - lr: 1.6203e-05 gnorm: 1.09 [16:26:54< 8:04:26] +[titan] 2025-10-05 15:01:15,765 - root - INFO - step: 26835 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 15:01:15,765 - root - INFO - lr: 1.6195e-05 gnorm: 1.07 [16:27:05< 8:04:15] +[titan] 2025-10-05 15:01:26,630 - root - INFO - step: 26840 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8050 +[titan] 2025-10-05 15:01:26,631 - root - INFO - lr: 1.6188e-05 gnorm: 1.11 [16:27:15< 8:04:04] +[titan] 2025-10-05 15:01:37,602 - root - INFO - step: 26845 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:01:37,602 - root - INFO - lr: 1.6180e-05 gnorm: 1.12 [16:27:26< 8:03:53] +[titan] 2025-10-05 15:01:46,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:01:48,489 - root - INFO - step: 26850 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 15:01:48,490 - root - INFO - lr: 1.6172e-05 gnorm: 1.08 [16:27:37< 8:03:41] +[titan] 2025-10-05 15:01:59,381 - root - INFO - step: 26855 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 15:01:59,382 - root - INFO - lr: 1.6165e-05 gnorm: 1.07 [16:27:48< 8:03:30] +[titan] 2025-10-05 15:02:10,248 - root - INFO - step: 26860 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:02:10,248 - root - INFO - lr: 1.6157e-05 gnorm: 1.10 [16:27:59< 8:03:19] +[titan] 2025-10-05 15:02:21,138 - root - INFO - step: 26865 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 15:02:21,139 - root - INFO - lr: 1.6149e-05 gnorm: 1.12 [16:28:10< 8:03:08] +[titan] 2025-10-05 15:02:32,019 - root - INFO - step: 26870 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 15:02:32,019 - root - INFO - lr: 1.6141e-05 gnorm: 1.07 [16:28:21< 8:02:57] +[titan] 2025-10-05 15:02:42,942 - root - INFO - step: 26875 loss: 2.0517 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 15:02:42,942 - root - INFO - lr: 1.6134e-05 gnorm: 1.12 [16:28:32< 8:02:46] +[titan] 2025-10-05 15:02:53,877 - root - INFO - step: 26880 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 15:02:53,877 - root - INFO - lr: 1.6126e-05 gnorm: 1.13 [16:28:43< 8:02:35] +[titan] 2025-10-05 15:03:04,754 - root - INFO - step: 26885 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 15:03:04,754 - root - INFO - lr: 1.6118e-05 gnorm: 1.16 [16:28:54< 8:02:24] +[titan] 2025-10-05 15:03:15,633 - root - INFO - step: 26890 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:03:15,633 - root - INFO - lr: 1.6111e-05 gnorm: 1.11 [16:29:04< 8:02:13] +[titan] 2025-10-05 15:03:26,500 - root - INFO - step: 26895 loss: 2.0231 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 15:03:26,500 - root - INFO - lr: 1.6103e-05 gnorm: 1.12 [16:29:15< 8:02:02] +[titan] 2025-10-05 15:03:35,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:03:37,367 - root - INFO - step: 26900 loss: 2.0325 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 15:03:37,368 - root - INFO - lr: 1.6095e-05 gnorm: 1.11 [16:29:26< 8:01:50] +[titan] 2025-10-05 15:03:48,289 - root - INFO - step: 26905 loss: 2.0322 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7960 +[titan] 2025-10-05 15:03:48,289 - root - INFO - lr: 1.6088e-05 gnorm: 1.12 [16:29:37< 8:01:39] +[titan] 2025-10-05 15:03:59,203 - root - INFO - step: 26910 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 15:03:59,203 - root - INFO - lr: 1.6080e-05 gnorm: 1.17 [16:29:48< 8:01:28] +[titan] 2025-10-05 15:04:10,072 - root - INFO - step: 26915 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 15:04:10,072 - root - INFO - lr: 1.6072e-05 gnorm: 1.08 [16:29:59< 8:01:17] +[titan] 2025-10-05 15:04:20,947 - root - INFO - step: 26920 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8313 +[titan] 2025-10-05 15:04:20,948 - root - INFO - lr: 1.6065e-05 gnorm: 1.11 [16:30:10< 8:01:06] +[titan] 2025-10-05 15:04:31,818 - root - INFO - step: 26925 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:04:31,818 - root - INFO - lr: 1.6057e-05 gnorm: 1.10 [16:30:21< 8:00:55] +[titan] 2025-10-05 15:04:42,737 - root - INFO - step: 26930 loss: 1.9755 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 15:04:42,737 - root - INFO - lr: 1.6049e-05 gnorm: 1.07 [16:30:32< 8:00:44] +[titan] 2025-10-05 15:04:53,614 - root - INFO - step: 26935 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 15:04:53,614 - root - INFO - lr: 1.6041e-05 gnorm: 1.10 [16:30:42< 8:00:33] +[titan] 2025-10-05 15:05:04,493 - root - INFO - step: 26940 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 15:05:04,493 - root - INFO - lr: 1.6034e-05 gnorm: 1.16 [16:30:53< 8:00:22] +[titan] 2025-10-05 15:05:15,413 - root - INFO - step: 26945 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 15:05:15,413 - root - INFO - lr: 1.6026e-05 gnorm: 1.09 [16:31:04< 8:00:10] +[titan] 2025-10-05 15:05:24,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:05:26,301 - root - INFO - step: 26950 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:05:26,302 - root - INFO - lr: 1.6018e-05 gnorm: 1.13 [16:31:15< 7:59:59] +[titan] 2025-10-05 15:05:37,170 - root - INFO - step: 26955 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:05:37,170 - root - INFO - lr: 1.6011e-05 gnorm: 1.07 [16:31:26< 7:59:48] +[titan] 2025-10-05 15:05:48,097 - root - INFO - step: 26960 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 15:05:48,097 - root - INFO - lr: 1.6003e-05 gnorm: 1.11 [16:31:37< 7:59:37] +[titan] 2025-10-05 15:05:58,956 - root - INFO - step: 26965 loss: 2.0670 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 15:05:58,956 - root - INFO - lr: 1.5995e-05 gnorm: 1.13 [16:31:48< 7:59:26] +[titan] 2025-10-05 15:06:09,830 - root - INFO - step: 26970 loss: 1.9712 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 15:06:09,830 - root - INFO - lr: 1.5988e-05 gnorm: 1.09 [16:31:59< 7:59:15] +[titan] 2025-10-05 15:06:20,738 - root - INFO - step: 26975 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 15:06:20,738 - root - INFO - lr: 1.5980e-05 gnorm: 1.14 [16:32:10< 7:59:04] +[titan] 2025-10-05 15:06:31,607 - root - INFO - step: 26980 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 15:06:31,607 - root - INFO - lr: 1.5972e-05 gnorm: 1.13 [16:32:20< 7:58:53] +[titan] 2025-10-05 15:06:42,557 - root - INFO - step: 26985 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 29,926 tflops: 415.18 mfu: 41.98% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8708 +[titan] 2025-10-05 15:06:42,557 - root - INFO - lr: 1.5965e-05 gnorm: 1.13 [16:32:31< 7:58:42] +[titan] 2025-10-05 15:06:53,430 - root - INFO - step: 26990 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:06:53,431 - root - INFO - lr: 1.5957e-05 gnorm: 1.11 [16:32:42< 7:58:31] +[titan] 2025-10-05 15:07:04,312 - root - INFO - step: 26995 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 15:07:04,312 - root - INFO - lr: 1.5949e-05 gnorm: 1.11 [16:32:53< 7:58:19] +[titan] 2025-10-05 15:07:13,022 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:07:15,207 - root - INFO - step: 27000 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 15:07:15,207 - root - INFO - lr: 1.5942e-05 gnorm: 1.13 [16:33:04< 7:58:08] +[titan] 2025-10-05 15:07:26,138 - root - INFO - step: 27005 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:07:26,138 - root - INFO - lr: 1.5934e-05 gnorm: 1.14 [16:33:15< 7:57:57] +[titan] 2025-10-05 15:07:37,028 - root - INFO - step: 27010 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 15:07:37,028 - root - INFO - lr: 1.5926e-05 gnorm: 1.14 [16:33:26< 7:57:46] +[titan] 2025-10-05 15:07:47,970 - root - INFO - step: 27015 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 15:07:47,971 - root - INFO - lr: 1.5919e-05 gnorm: 1.12 [16:33:37< 7:57:35] +[titan] 2025-10-05 15:07:58,854 - root - INFO - step: 27020 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7853 +[titan] 2025-10-05 15:07:58,855 - root - INFO - lr: 1.5911e-05 gnorm: 1.15 [16:33:48< 7:57:24] +[titan] 2025-10-05 15:08:09,736 - root - INFO - step: 27025 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:08:09,736 - root - INFO - lr: 1.5903e-05 gnorm: 1.12 [16:33:59< 7:57:13] +[titan] 2025-10-05 15:08:20,606 - root - INFO - step: 27030 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 15:08:20,606 - root - INFO - lr: 1.5896e-05 gnorm: 1.08 [16:34:09< 7:57:02] +[titan] 2025-10-05 15:08:31,489 - root - INFO - step: 27035 loss: 1.9763 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 15:08:31,490 - root - INFO - lr: 1.5888e-05 gnorm: 1.09 [16:34:20< 7:56:51] +[titan] 2025-10-05 15:08:42,436 - root - INFO - step: 27040 loss: 2.0880 memory: 118.84GiB(85.28%) tps: 29,936 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 15:08:42,436 - root - INFO - lr: 1.5881e-05 gnorm: 1.12 [16:34:31< 7:56:40] +[titan] 2025-10-05 15:08:53,408 - root - INFO - step: 27045 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 15:08:53,409 - root - INFO - lr: 1.5873e-05 gnorm: 1.16 [16:34:42< 7:56:28] +[titan] 2025-10-05 15:09:02,118 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:09:04,301 - root - INFO - step: 27050 loss: 2.0295 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7940 +[titan] 2025-10-05 15:09:04,301 - root - INFO - lr: 1.5865e-05 gnorm: 1.11 [16:34:53< 7:56:17] +[titan] 2025-10-05 15:09:15,193 - root - INFO - step: 27055 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 15:09:15,194 - root - INFO - lr: 1.5858e-05 gnorm: 1.10 [16:35:04< 7:56:06] +[titan] 2025-10-05 15:09:26,100 - root - INFO - step: 27060 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8091 +[titan] 2025-10-05 15:09:26,100 - root - INFO - lr: 1.5850e-05 gnorm: 1.12 [16:35:15< 7:55:55] +[titan] 2025-10-05 15:09:36,976 - root - INFO - step: 27065 loss: 1.9733 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7439 +[titan] 2025-10-05 15:09:36,976 - root - INFO - lr: 1.5842e-05 gnorm: 1.09 [16:35:26< 7:55:44] +[titan] 2025-10-05 15:09:47,942 - root - INFO - step: 27070 loss: 2.0633 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.57 mfu: 41.92% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 15:09:47,942 - root - INFO - lr: 1.5835e-05 gnorm: 1.09 [16:35:37< 7:55:33] +[titan] 2025-10-05 15:09:58,812 - root - INFO - step: 27075 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 15:09:58,812 - root - INFO - lr: 1.5827e-05 gnorm: 1.08 [16:35:48< 7:55:22] +[titan] 2025-10-05 15:10:09,685 - root - INFO - step: 27080 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 15:10:09,685 - root - INFO - lr: 1.5819e-05 gnorm: 1.09 [16:35:58< 7:55:11] +[titan] 2025-10-05 15:10:20,555 - root - INFO - step: 27085 loss: 2.0147 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 15:10:20,556 - root - INFO - lr: 1.5812e-05 gnorm: 1.07 [16:36:09< 7:55:00] +[titan] 2025-10-05 15:10:31,449 - root - INFO - step: 27090 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 15:10:31,449 - root - INFO - lr: 1.5804e-05 gnorm: 1.06 [16:36:20< 7:54:49] +[titan] 2025-10-05 15:10:42,317 - root - INFO - step: 27095 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:10:42,317 - root - INFO - lr: 1.5797e-05 gnorm: 1.08 [16:36:31< 7:54:37] +[titan] 2025-10-05 15:10:51,064 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:10:53,251 - root - INFO - step: 27100 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:10:53,252 - root - INFO - lr: 1.5789e-05 gnorm: 1.15 [16:36:42< 7:54:26] +[titan] 2025-10-05 15:11:04,173 - root - INFO - step: 27105 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 15:11:04,174 - root - INFO - lr: 1.5781e-05 gnorm: 1.12 [16:36:53< 7:54:15] +[titan] 2025-10-05 15:11:15,060 - root - INFO - step: 27110 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:11:15,060 - root - INFO - lr: 1.5774e-05 gnorm: 1.15 [16:37:04< 7:54:04] +[titan] 2025-10-05 15:11:25,971 - root - INFO - step: 27115 loss: 2.0649 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:11:25,971 - root - INFO - lr: 1.5766e-05 gnorm: 1.12 [16:37:15< 7:53:53] +[titan] 2025-10-05 15:11:36,858 - root - INFO - step: 27120 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 15:11:36,859 - root - INFO - lr: 1.5759e-05 gnorm: 1.09 [16:37:26< 7:53:42] +[titan] 2025-10-05 15:11:47,776 - root - INFO - step: 27125 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7663 +[titan] 2025-10-05 15:11:47,777 - root - INFO - lr: 1.5751e-05 gnorm: 1.08 [16:37:37< 7:53:31] +[titan] 2025-10-05 15:11:58,636 - root - INFO - step: 27130 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:11:58,636 - root - INFO - lr: 1.5743e-05 gnorm: 1.13 [16:37:47< 7:53:20] +[titan] 2025-10-05 15:12:09,626 - root - INFO - step: 27135 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 29,816 tflops: 413.65 mfu: 41.83% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 15:12:09,626 - root - INFO - lr: 1.5736e-05 gnorm: 1.11 [16:37:58< 7:53:09] +[titan] 2025-10-05 15:12:11,995 - root - INFO - Dumping profiler traces at step 27136 +[titan] 2025-10-05 15:12:12,030 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:12:20,793 - root - INFO - step: 27140 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 29,344 tflops: 407.10 mfu: 41.16% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 15:12:20,794 - root - INFO - lr: 1.5728e-05 gnorm: 1.11 [16:38:10< 7:52:58] +[titan] 2025-10-05 15:12:31,651 - root - INFO - step: 27145 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 15:12:31,651 - root - INFO - lr: 1.5720e-05 gnorm: 1.09 [16:38:20< 7:52:47] +[titan] 2025-10-05 15:12:40,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:12:42,534 - root - INFO - step: 27150 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7607 +[titan] 2025-10-05 15:12:42,534 - root - INFO - lr: 1.5713e-05 gnorm: 1.06 [16:38:31< 7:52:36] +[titan] 2025-10-05 15:12:53,435 - root - INFO - step: 27155 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:12:53,435 - root - INFO - lr: 1.5705e-05 gnorm: 1.08 [16:38:42< 7:52:24] +[titan] 2025-10-05 15:13:04,284 - root - INFO - step: 27160 loss: 2.0466 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8071 +[titan] 2025-10-05 15:13:04,284 - root - INFO - lr: 1.5698e-05 gnorm: 1.11 [16:38:53< 7:52:13] +[titan] 2025-10-05 15:13:15,182 - root - INFO - step: 27165 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 15:13:15,182 - root - INFO - lr: 1.5690e-05 gnorm: 1.08 [16:39:04< 7:52:02] +[titan] 2025-10-05 15:13:26,046 - root - INFO - step: 27170 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 15:13:26,046 - root - INFO - lr: 1.5682e-05 gnorm: 1.08 [16:39:15< 7:51:51] +[titan] 2025-10-05 15:13:36,900 - root - INFO - step: 27175 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7416 +[titan] 2025-10-05 15:13:36,900 - root - INFO - lr: 1.5675e-05 gnorm: 1.10 [16:39:26< 7:51:40] +[titan] 2025-10-05 15:13:47,796 - root - INFO - step: 27180 loss: 2.1244 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8768 +[titan] 2025-10-05 15:13:47,796 - root - INFO - lr: 1.5667e-05 gnorm: 1.09 [16:39:37< 7:51:29] +[titan] 2025-10-05 15:13:58,664 - root - INFO - step: 27185 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:13:58,664 - root - INFO - lr: 1.5660e-05 gnorm: 1.10 [16:39:47< 7:51:18] +[titan] 2025-10-05 15:14:09,527 - root - INFO - step: 27190 loss: 2.0164 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:14:09,527 - root - INFO - lr: 1.5652e-05 gnorm: 1.09 [16:39:58< 7:51:07] +[titan] 2025-10-05 15:14:20,387 - root - INFO - step: 27195 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:14:20,387 - root - INFO - lr: 1.5645e-05 gnorm: 1.07 [16:40:09< 7:50:56] +[titan] 2025-10-05 15:14:29,129 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:14:31,314 - root - INFO - step: 27200 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 15:14:31,314 - root - INFO - lr: 1.5637e-05 gnorm: 1.10 [16:40:20< 7:50:44][39m +[titan] 2025-10-05 15:14:42,185 - root - INFO - step: 27205 loss: 2.0377 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:14:42,186 - root - INFO - lr: 1.5629e-05 gnorm: 1.08 [16:40:31< 7:50:33] +[titan] 2025-10-05 15:14:53,065 - root - INFO - step: 27210 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 15:14:53,065 - root - INFO - lr: 1.5622e-05 gnorm: 1.12 [16:40:42< 7:50:22] +[titan] 2025-10-05 15:15:03,941 - root - INFO - step: 27215 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 15:15:03,941 - root - INFO - lr: 1.5614e-05 gnorm: 1.11 [16:40:53< 7:50:11] +[titan] 2025-10-05 15:15:14,801 - root - INFO - step: 27220 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 15:15:14,801 - root - INFO - lr: 1.5607e-05 gnorm: 1.10 [16:41:04< 7:50:00] +[titan] 2025-10-05 15:15:25,653 - root - INFO - step: 27225 loss: 1.9878 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:15:25,653 - root - INFO - lr: 1.5599e-05 gnorm: 1.09 [16:41:14< 7:49:49] +[titan] 2025-10-05 15:15:36,551 - root - INFO - step: 27230 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8280 +[titan] 2025-10-05 15:15:36,551 - root - INFO - lr: 1.5591e-05 gnorm: 1.13 [16:41:25< 7:49:38] +[titan] 2025-10-05 15:15:47,426 - root - INFO - step: 27235 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 15:15:47,426 - root - INFO - lr: 1.5584e-05 gnorm: 1.10 [16:41:36< 7:49:27] +[titan] 2025-10-05 15:15:58,353 - root - INFO - step: 27240 loss: 2.0437 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 15:15:58,354 - root - INFO - lr: 1.5576e-05 gnorm: 1.08 [16:41:47< 7:49:16] +[titan] 2025-10-05 15:16:09,201 - root - INFO - step: 27245 loss: 2.0207 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 15:16:09,202 - root - INFO - lr: 1.5569e-05 gnorm: 1.11 [16:41:58< 7:49:05] +[titan] 2025-10-05 15:16:17,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:16:20,072 - root - INFO - step: 27250 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7443 +[titan] 2025-10-05 15:16:20,072 - root - INFO - lr: 1.5561e-05 gnorm: 1.08 [16:42:09< 7:48:53] +[titan] 2025-10-05 15:16:30,931 - root - INFO - step: 27255 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:16:30,931 - root - INFO - lr: 1.5554e-05 gnorm: 1.12 [16:42:20< 7:48:42] +[titan] 2025-10-05 15:16:41,802 - root - INFO - step: 27260 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 15:16:41,803 - root - INFO - lr: 1.5546e-05 gnorm: 1.15 [16:42:31< 7:48:31] +[titan] 2025-10-05 15:16:52,730 - root - INFO - step: 27265 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 15:16:52,730 - root - INFO - lr: 1.5539e-05 gnorm: 1.08 [16:42:41< 7:48:20] +[titan] 2025-10-05 15:17:03,595 - root - INFO - step: 27270 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 15:17:03,596 - root - INFO - lr: 1.5531e-05 gnorm: 1.08 [16:42:52< 7:48:09] +[titan] 2025-10-05 15:17:14,444 - root - INFO - step: 27275 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 15:17:14,444 - root - INFO - lr: 1.5523e-05 gnorm: 1.11 [16:43:03< 7:47:58] +[titan] 2025-10-05 15:17:25,317 - root - INFO - step: 27280 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 15:17:25,317 - root - INFO - lr: 1.5516e-05 gnorm: 1.11 [16:43:14< 7:47:47] +[titan] 2025-10-05 15:17:36,180 - root - INFO - step: 27285 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 15:17:36,180 - root - INFO - lr: 1.5508e-05 gnorm: 1.10 [16:43:25< 7:47:36] +[titan] 2025-10-05 15:17:47,037 - root - INFO - step: 27290 loss: 2.0421 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 15:17:47,037 - root - INFO - lr: 1.5501e-05 gnorm: 1.10 [16:43:36< 7:47:25] +[titan] 2025-10-05 15:17:57,971 - root - INFO - step: 27295 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:17:57,971 - root - INFO - lr: 1.5493e-05 gnorm: 1.10 [16:43:47< 7:47:13] +[titan] 2025-10-05 15:18:06,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:18:08,853 - root - INFO - step: 27300 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 15:18:08,854 - root - INFO - lr: 1.5486e-05 gnorm: 1.09 [16:43:58< 7:47:02] +[titan] 2025-10-05 15:18:19,735 - root - INFO - step: 27305 loss: 2.0092 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 15:18:19,735 - root - INFO - lr: 1.5478e-05 gnorm: 1.11 [16:44:08< 7:46:51] +[titan] 2025-10-05 15:18:30,612 - root - INFO - step: 27310 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 15:18:30,612 - root - INFO - lr: 1.5471e-05 gnorm: 1.10 [16:44:19< 7:46:40] +[titan] 2025-10-05 15:18:41,512 - root - INFO - step: 27315 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 15:18:41,512 - root - INFO - lr: 1.5463e-05 gnorm: 1.10 [16:44:30< 7:46:29] +[titan] 2025-10-05 15:18:52,399 - root - INFO - step: 27320 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 15:18:52,399 - root - INFO - lr: 1.5455e-05 gnorm: 1.12 [16:44:41< 7:46:18] +[titan] 2025-10-05 15:19:03,317 - root - INFO - step: 27325 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 15:19:03,317 - root - INFO - lr: 1.5448e-05 gnorm: 1.13 [16:44:52< 7:46:07] +[titan] 2025-10-05 15:19:14,195 - root - INFO - step: 27330 loss: 2.1168 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 15:19:14,195 - root - INFO - lr: 1.5440e-05 gnorm: 1.12 [16:45:03< 7:45:56] +[titan] 2025-10-05 15:19:25,053 - root - INFO - step: 27335 loss: 2.0622 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8236 +[titan] 2025-10-05 15:19:25,053 - root - INFO - lr: 1.5433e-05 gnorm: 1.14 [16:45:14< 7:45:45] +[titan] 2025-10-05 15:19:35,914 - root - INFO - step: 27340 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:19:35,914 - root - INFO - lr: 1.5425e-05 gnorm: 1.09 [16:45:25< 7:45:34] +[titan] 2025-10-05 15:19:46,795 - root - INFO - step: 27345 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8129 +[titan] 2025-10-05 15:19:46,796 - root - INFO - lr: 1.5418e-05 gnorm: 1.14 [16:45:36< 7:45:22] +[titan] 2025-10-05 15:19:55,519 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:19:57,711 - root - INFO - step: 27350 loss: 2.0646 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:19:57,711 - root - INFO - lr: 1.5410e-05 gnorm: 1.12 [16:45:46< 7:45:11] +[titan] 2025-10-05 15:20:08,574 - root - INFO - step: 27355 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:20:08,574 - root - INFO - lr: 1.5403e-05 gnorm: 1.10 [16:45:57< 7:45:00] +[titan] 2025-10-05 15:20:19,517 - root - INFO - step: 27360 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8070 +[titan] 2025-10-05 15:20:19,518 - root - INFO - lr: 1.5395e-05 gnorm: 1.10 [16:46:08< 7:44:49] +[titan] 2025-10-05 15:20:30,400 - root - INFO - step: 27365 loss: 2.0266 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 15:20:30,401 - root - INFO - lr: 1.5388e-05 gnorm: 1.10 [16:46:19< 7:44:38] +[titan] 2025-10-05 15:20:41,257 - root - INFO - step: 27370 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 15:20:41,257 - root - INFO - lr: 1.5380e-05 gnorm: 1.09 [16:46:30< 7:44:27] +[titan] 2025-10-05 15:20:52,147 - root - INFO - step: 27375 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8366 +[titan] 2025-10-05 15:20:52,147 - root - INFO - lr: 1.5373e-05 gnorm: 1.09 [16:46:41< 7:44:16] +[titan] 2025-10-05 15:21:03,049 - root - INFO - step: 27380 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 15:21:03,049 - root - INFO - lr: 1.5365e-05 gnorm: 1.10 [16:46:52< 7:44:05] +[titan] 2025-10-05 15:21:13,927 - root - INFO - step: 27385 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 15:21:13,927 - root - INFO - lr: 1.5358e-05 gnorm: 1.10 [16:47:03< 7:43:54] +[titan] 2025-10-05 15:21:24,850 - root - INFO - step: 27390 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:21:24,850 - root - INFO - lr: 1.5350e-05 gnorm: 1.13 [16:47:14< 7:43:43] +[titan] 2025-10-05 15:21:35,739 - root - INFO - step: 27395 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 15:21:35,739 - root - INFO - lr: 1.5343e-05 gnorm: 1.13 [16:47:24< 7:43:31] +[titan] 2025-10-05 15:21:44,437 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:21:46,630 - root - INFO - step: 27400 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:21:46,630 - root - INFO - lr: 1.5335e-05 gnorm: 1.08 [16:47:35< 7:43:20] +[titan] 2025-10-05 15:21:57,526 - root - INFO - step: 27405 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7972 +[titan] 2025-10-05 15:21:57,526 - root - INFO - lr: 1.5328e-05 gnorm: 1.12 [16:47:46< 7:43:09] +[titan] 2025-10-05 15:22:08,415 - root - INFO - step: 27410 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7583 +[titan] 2025-10-05 15:22:08,415 - root - INFO - lr: 1.5320e-05 gnorm: 1.07 [16:47:57< 7:42:58] +[titan] 2025-10-05 15:22:19,305 - root - INFO - step: 27415 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 15:22:19,305 - root - INFO - lr: 1.5313e-05 gnorm: 1.12 [16:48:08< 7:42:47] +[titan] 2025-10-05 15:22:30,195 - root - INFO - step: 27420 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8052 +[titan] 2025-10-05 15:22:30,196 - root - INFO - lr: 1.5305e-05 gnorm: 1.11 [16:48:19< 7:42:36] +[titan] 2025-10-05 15:22:41,161 - root - INFO - step: 27425 loss: 2.0339 memory: 118.84GiB(85.28%) tps: 29,884 tflops: 414.60 mfu: 41.92% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 15:22:41,162 - root - INFO - lr: 1.5298e-05 gnorm: 1.12 [16:48:30< 7:42:25] +[titan] 2025-10-05 15:22:52,043 - root - INFO - step: 27430 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 15:22:52,043 - root - INFO - lr: 1.5290e-05 gnorm: 1.10 [16:48:41< 7:42:14] +[titan] 2025-10-05 15:23:02,939 - root - INFO - step: 27435 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 15:23:02,939 - root - INFO - lr: 1.5283e-05 gnorm: 1.12 [16:48:52< 7:42:03] +[titan] 2025-10-05 15:23:13,837 - root - INFO - step: 27440 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 15:23:13,837 - root - INFO - lr: 1.5275e-05 gnorm: 1.12 [16:49:03< 7:41:52] +[titan] 2025-10-05 15:23:24,732 - root - INFO - step: 27445 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 15:23:24,733 - root - INFO - lr: 1.5268e-05 gnorm: 1.10 [16:49:13< 7:41:41] +[titan] 2025-10-05 15:23:33,429 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:23:35,615 - root - INFO - step: 27450 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8394 +[titan] 2025-10-05 15:23:35,615 - root - INFO - lr: 1.5260e-05 gnorm: 1.12 [16:49:24< 7:41:29] +[titan] 2025-10-05 15:23:46,565 - root - INFO - step: 27455 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.19 mfu: 41.98% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 15:23:46,565 - root - INFO - lr: 1.5253e-05 gnorm: 1.14 [16:49:35< 7:41:18] +[titan] 2025-10-05 15:23:57,475 - root - INFO - step: 27460 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 15:23:57,475 - root - INFO - lr: 1.5245e-05 gnorm: 1.11 [16:49:46< 7:41:07] +[titan] 2025-10-05 15:24:08,327 - root - INFO - step: 27465 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 15:24:08,327 - root - INFO - lr: 1.5238e-05 gnorm: 1.11 [16:49:57< 7:40:56] +[titan] 2025-10-05 15:24:19,201 - root - INFO - step: 27470 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:24:19,201 - root - INFO - lr: 1.5230e-05 gnorm: 1.08 [16:50:08< 7:40:45] +[titan] 2025-10-05 15:24:30,080 - root - INFO - step: 27475 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:24:30,080 - root - INFO - lr: 1.5223e-05 gnorm: 1.09 [16:50:19< 7:40:34] +[titan] 2025-10-05 15:24:40,967 - root - INFO - step: 27480 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8213 +[titan] 2025-10-05 15:24:40,967 - root - INFO - lr: 1.5215e-05 gnorm: 1.10 [16:50:30< 7:40:23] +[titan] 2025-10-05 15:24:51,863 - root - INFO - step: 27485 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 15:24:51,863 - root - INFO - lr: 1.5208e-05 gnorm: 1.11 [16:50:41< 7:40:12] +[titan] 2025-10-05 15:25:02,777 - root - INFO - step: 27490 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8587 +[titan] 2025-10-05 15:25:02,778 - root - INFO - lr: 1.5200e-05 gnorm: 1.16 [16:50:52< 7:40:01] +[titan] 2025-10-05 15:25:13,681 - root - INFO - step: 27495 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:25:13,681 - root - INFO - lr: 1.5193e-05 gnorm: 1.12 [16:51:02< 7:39:50] +[titan] 2025-10-05 15:25:22,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:25:24,569 - root - INFO - step: 27500 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 15:25:24,570 - root - INFO - lr: 1.5185e-05 gnorm: 1.09 [16:51:13< 7:39:38] +[titan] 2025-10-05 15:25:35,452 - root - INFO - step: 27505 loss: 2.0528 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:25:35,452 - root - INFO - lr: 1.5178e-05 gnorm: 1.14 [16:51:24< 7:39:27] +[titan] 2025-10-05 15:25:46,344 - root - INFO - step: 27510 loss: 2.2224 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 15:25:46,344 - root - INFO - lr: 1.5170e-05 gnorm: 7.42 [16:51:35< 7:39:16] +[titan] 2025-10-05 15:25:57,223 - root - INFO - step: 27515 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7946 +[titan] 2025-10-05 15:25:57,223 - root - INFO - lr: 1.5163e-05 gnorm: 1.13 [16:51:46< 7:39:05] +[titan] 2025-10-05 15:26:08,195 - root - INFO - step: 27520 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 15:26:08,195 - root - INFO - lr: 1.5155e-05 gnorm: 1.07 [16:51:57< 7:38:54] +[titan] 2025-10-05 15:26:19,076 - root - INFO - step: 27525 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 15:26:19,076 - root - INFO - lr: 1.5148e-05 gnorm: 1.13 [16:52:08< 7:38:43] +[titan] 2025-10-05 15:26:29,960 - root - INFO - step: 27530 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7786 +[titan] 2025-10-05 15:26:29,960 - root - INFO - lr: 1.5141e-05 gnorm: 1.17 [16:52:19< 7:38:32] +[titan] 2025-10-05 15:26:40,847 - root - INFO - step: 27535 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 15:26:40,847 - root - INFO - lr: 1.5133e-05 gnorm: 1.09 [16:52:30< 7:38:21] +[titan] 2025-10-05 15:26:51,718 - root - INFO - step: 27540 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 15:26:51,718 - root - INFO - lr: 1.5126e-05 gnorm: 1.13 [16:52:40< 7:38:10] +[titan] 2025-10-05 15:27:02,650 - root - INFO - step: 27545 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 15:27:02,650 - root - INFO - lr: 1.5118e-05 gnorm: 1.12 [16:52:51< 7:37:59] +[titan] 2025-10-05 15:27:11,363 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:27:13,551 - root - INFO - step: 27550 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 15:27:13,551 - root - INFO - lr: 1.5111e-05 gnorm: 1.14 [16:53:02< 7:37:48] +[titan] 2025-10-05 15:27:24,430 - root - INFO - step: 27555 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:27:24,430 - root - INFO - lr: 1.5103e-05 gnorm: 1.10 [16:53:13< 7:37:36] +[titan] 2025-10-05 15:27:35,290 - root - INFO - step: 27560 loss: 2.0098 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7771 +[titan] 2025-10-05 15:27:35,290 - root - INFO - lr: 1.5096e-05 gnorm: 1.11 [16:53:24< 7:37:25] +[titan] 2025-10-05 15:27:46,162 - root - INFO - step: 27565 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 15:27:46,162 - root - INFO - lr: 1.5088e-05 gnorm: 1.14 [16:53:35< 7:37:14] +[titan] 2025-10-05 15:27:57,049 - root - INFO - step: 27570 loss: 2.0327 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7970 +[titan] 2025-10-05 15:27:57,049 - root - INFO - lr: 1.5081e-05 gnorm: 1.09 [16:53:46< 7:37:03] +[titan] 2025-10-05 15:28:07,972 - root - INFO - step: 27575 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8349 +[titan] 2025-10-05 15:28:07,972 - root - INFO - lr: 1.5074e-05 gnorm: 1.15 [16:53:57< 7:36:52] +[titan] 2025-10-05 15:28:18,854 - root - INFO - step: 27580 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 15:28:18,855 - root - INFO - lr: 1.5066e-05 gnorm: 1.16 [16:54:08< 7:36:41] +[titan] 2025-10-05 15:28:29,772 - root - INFO - step: 27585 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8547 +[titan] 2025-10-05 15:28:29,772 - root - INFO - lr: 1.5059e-05 gnorm: 1.13 [16:54:18< 7:36:30] +[titan] 2025-10-05 15:28:40,678 - root - INFO - step: 27590 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 15:28:40,678 - root - INFO - lr: 1.5051e-05 gnorm: 1.09 [16:54:29< 7:36:19] +[titan] 2025-10-05 15:28:51,534 - root - INFO - step: 27595 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8331 +[titan] 2025-10-05 15:28:51,534 - root - INFO - lr: 1.5044e-05 gnorm: 1.10 [16:54:40< 7:36:08] +[titan] 2025-10-05 15:29:00,205 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:29:02,428 - root - INFO - step: 27600 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7298 +[titan] 2025-10-05 15:29:02,428 - root - INFO - lr: 1.5036e-05 gnorm: 1.12 [16:54:51< 7:35:57] +[titan] 2025-10-05 15:29:13,305 - root - INFO - step: 27605 loss: 2.0663 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:29:13,305 - root - INFO - lr: 1.5029e-05 gnorm: 1.09 [16:55:02< 7:35:46] +[titan] 2025-10-05 15:29:24,170 - root - INFO - step: 27610 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 15:29:24,170 - root - INFO - lr: 1.5021e-05 gnorm: 1.06 [16:55:13< 7:35:34] +[titan] 2025-10-05 15:29:35,069 - root - INFO - step: 27615 loss: 1.9817 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 15:29:35,070 - root - INFO - lr: 1.5014e-05 gnorm: 1.13 [16:55:24< 7:35:23] +[titan] 2025-10-05 15:29:45,908 - root - INFO - step: 27620 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 15:29:45,908 - root - INFO - lr: 1.5007e-05 gnorm: 1.14 [16:55:35< 7:35:12] +[titan] 2025-10-05 15:29:56,752 - root - INFO - step: 27625 loss: 2.0304 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7949 +[titan] 2025-10-05 15:29:56,752 - root - INFO - lr: 1.4999e-05 gnorm: 1.14 [16:55:45< 7:35:01] +[titan] 2025-10-05 15:30:07,651 - root - INFO - step: 27630 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 15:30:07,651 - root - INFO - lr: 1.4992e-05 gnorm: 1.10 [16:55:56< 7:34:50] +[titan] 2025-10-05 15:30:18,514 - root - INFO - step: 27635 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8163 +[titan] 2025-10-05 15:30:18,514 - root - INFO - lr: 1.4984e-05 gnorm: 1.11 [16:56:07< 7:34:39] +[titan] 2025-10-05 15:30:29,382 - root - INFO - step: 27640 loss: 2.0889 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 15:30:29,382 - root - INFO - lr: 1.4977e-05 gnorm: 1.09 [16:56:18< 7:34:28] +[titan] 2025-10-05 15:30:40,353 - root - INFO - step: 27645 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7813 +[titan] 2025-10-05 15:30:40,354 - root - INFO - lr: 1.4970e-05 gnorm: 1.12 [16:56:29< 7:34:17] +[titan] 2025-10-05 15:30:47,047 - root - INFO - Dumping profiler traces at step 27648 +[titan] 2025-10-05 15:30:47,084 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:30:49,279 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:30:51,455 - root - INFO - step: 27650 loss: 2.0385 memory: 118.84GiB(85.28%) tps: 29,516 tflops: 409.49 mfu: 41.40% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:30:51,456 - root - INFO - lr: 1.4962e-05 gnorm: 1.14 [16:56:40< 7:34:06] +[titan] 2025-10-05 15:31:02,322 - root - INFO - step: 27655 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 15:31:02,323 - root - INFO - lr: 1.4955e-05 gnorm: 1.07 [16:56:51< 7:33:55] +[titan] 2025-10-05 15:31:13,179 - root - INFO - step: 27660 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:31:13,179 - root - INFO - lr: 1.4947e-05 gnorm: 1.10 [16:57:02< 7:33:44] +[titan] 2025-10-05 15:31:24,033 - root - INFO - step: 27665 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 15:31:24,033 - root - INFO - lr: 1.4940e-05 gnorm: 1.09 [16:57:13< 7:33:32] +[titan] 2025-10-05 15:31:34,896 - root - INFO - step: 27670 loss: 2.0761 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 15:31:34,896 - root - INFO - lr: 1.4933e-05 gnorm: 1.11 [16:57:24< 7:33:21] +[titan] 2025-10-05 15:31:45,721 - root - INFO - step: 27675 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 15:31:45,721 - root - INFO - lr: 1.4925e-05 gnorm: 1.11 [16:57:34< 7:33:10] +[titan] 2025-10-05 15:31:56,620 - root - INFO - step: 27680 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7791 +[titan] 2025-10-05 15:31:56,620 - root - INFO - lr: 1.4918e-05 gnorm: 1.10 [16:57:45< 7:32:59] +[titan] 2025-10-05 15:32:07,525 - root - INFO - step: 27685 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 15:32:07,525 - root - INFO - lr: 1.4910e-05 gnorm: 1.10 [16:57:56< 7:32:48] +[titan] 2025-10-05 15:32:18,367 - root - INFO - step: 27690 loss: 2.0707 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 15:32:18,367 - root - INFO - lr: 1.4903e-05 gnorm: 1.09 [16:58:07< 7:32:37] +[titan] 2025-10-05 15:32:29,238 - root - INFO - step: 27695 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 15:32:29,238 - root - INFO - lr: 1.4896e-05 gnorm: 1.13 [16:58:18< 7:32:26] +[titan] 2025-10-05 15:32:37,923 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:32:40,104 - root - INFO - step: 27700 loss: 2.0988 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 15:32:40,104 - root - INFO - lr: 1.4888e-05 gnorm: 1.12 [16:58:29< 7:32:15] +[titan] 2025-10-05 15:32:50,965 - root - INFO - step: 27705 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 15:32:50,965 - root - INFO - lr: 1.4881e-05 gnorm: 1.13 [16:58:40< 7:32:04] +[titan] 2025-10-05 15:33:01,872 - root - INFO - step: 27710 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 15:33:01,872 - root - INFO - lr: 1.4873e-05 gnorm: 1.13 [16:58:51< 7:31:52] +[titan] 2025-10-05 15:33:12,825 - root - INFO - step: 27715 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8034 +[titan] 2025-10-05 15:33:12,825 - root - INFO - lr: 1.4866e-05 gnorm: 1.12 [16:59:02< 7:31:41] +[titan] 2025-10-05 15:33:23,696 - root - INFO - step: 27720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 15:33:23,696 - root - INFO - lr: 1.4859e-05 gnorm: 1.13 [16:59:12< 7:31:30] +[titan] 2025-10-05 15:33:34,571 - root - INFO - step: 27725 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:33:34,571 - root - INFO - lr: 1.4851e-05 gnorm: 1.13 [16:59:23< 7:31:19] +[titan] 2025-10-05 15:33:45,446 - root - INFO - step: 27730 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 15:33:45,446 - root - INFO - lr: 1.4844e-05 gnorm: 1.07 [16:59:34< 7:31:08] +[titan] 2025-10-05 15:33:56,288 - root - INFO - step: 27735 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7851 +[titan] 2025-10-05 15:33:56,288 - root - INFO - lr: 1.4836e-05 gnorm: 1.13 [16:59:45< 7:30:57] +[titan] 2025-10-05 15:34:07,199 - root - INFO - step: 27740 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 15:34:07,199 - root - INFO - lr: 1.4829e-05 gnorm: 1.11 [16:59:56< 7:30:46] +[titan] 2025-10-05 15:34:18,088 - root - INFO - step: 27745 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:34:18,088 - root - INFO - lr: 1.4822e-05 gnorm: 1.14 [17:00:07< 7:30:35] +[titan] 2025-10-05 15:34:26,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:34:28,919 - root - INFO - step: 27750 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7660 +[titan] 2025-10-05 15:34:28,919 - root - INFO - lr: 1.4814e-05 gnorm: 1.10 [17:00:18< 7:30:24] +[titan] 2025-10-05 15:34:39,788 - root - INFO - step: 27755 loss: 2.0085 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:34:39,788 - root - INFO - lr: 1.4807e-05 gnorm: 1.13 [17:00:28< 7:30:13] +[titan] 2025-10-05 15:34:50,649 - root - INFO - step: 27760 loss: 2.0229 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7889 +[titan] 2025-10-05 15:34:50,649 - root - INFO - lr: 1.4800e-05 gnorm: 1.08 [17:00:39< 7:30:02] +[titan] 2025-10-05 15:35:01,518 - root - INFO - step: 27765 loss: 2.0372 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:35:01,518 - root - INFO - lr: 1.4792e-05 gnorm: 1.09 [17:00:50< 7:29:50] +[titan] 2025-10-05 15:35:12,435 - root - INFO - step: 27770 loss: 2.0491 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 15:35:12,435 - root - INFO - lr: 1.4785e-05 gnorm: 1.11 [17:01:01< 7:29:39] +[titan] 2025-10-05 15:35:23,343 - root - INFO - step: 27775 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:35:23,344 - root - INFO - lr: 1.4777e-05 gnorm: 1.10 [17:01:12< 7:29:28] +[titan] 2025-10-05 15:35:34,184 - root - INFO - step: 27780 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 15:35:34,184 - root - INFO - lr: 1.4770e-05 gnorm: 1.09 [17:01:23< 7:29:17] +[titan] 2025-10-05 15:35:45,050 - root - INFO - step: 27785 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 15:35:45,050 - root - INFO - lr: 1.4763e-05 gnorm: 1.08 [17:01:34< 7:29:06] +[titan] 2025-10-05 15:35:55,912 - root - INFO - step: 27790 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:35:55,912 - root - INFO - lr: 1.4755e-05 gnorm: 1.10 [17:01:45< 7:28:55] +[titan] 2025-10-05 15:36:06,779 - root - INFO - step: 27795 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7727 +[titan] 2025-10-05 15:36:06,780 - root - INFO - lr: 1.4748e-05 gnorm: 1.09 [17:01:55< 7:28:44] +[titan] 2025-10-05 15:36:15,503 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:36:17,685 - root - INFO - step: 27800 loss: 2.0545 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8152 +[titan] 2025-10-05 15:36:17,685 - root - INFO - lr: 1.4741e-05 gnorm: 1.11 [17:02:06< 7:28:33] +[titan] 2025-10-05 15:36:28,568 - root - INFO - step: 27805 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 15:36:28,568 - root - INFO - lr: 1.4733e-05 gnorm: 1.09 [17:02:17< 7:28:22] +[titan] 2025-10-05 15:36:39,435 - root - INFO - step: 27810 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 15:36:39,435 - root - INFO - lr: 1.4726e-05 gnorm: 1.12 [17:02:28< 7:28:11] +[titan] 2025-10-05 15:36:50,300 - root - INFO - step: 27815 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:36:50,300 - root - INFO - lr: 1.4719e-05 gnorm: 1.08 [17:02:39< 7:27:59] +[titan] 2025-10-05 15:37:01,162 - root - INFO - step: 27820 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 15:37:01,162 - root - INFO - lr: 1.4711e-05 gnorm: 1.10 [17:02:50< 7:27:48] +[titan] 2025-10-05 15:37:12,068 - root - INFO - step: 27825 loss: 2.0443 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8065 +[titan] 2025-10-05 15:37:12,068 - root - INFO - lr: 1.4704e-05 gnorm: 1.12 [17:03:01< 7:27:37] +[titan] 2025-10-05 15:37:22,924 - root - INFO - step: 27830 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:37:22,924 - root - INFO - lr: 1.4697e-05 gnorm: 1.12 [17:03:12< 7:27:26] +[titan] 2025-10-05 15:37:33,784 - root - INFO - step: 27835 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 15:37:33,784 - root - INFO - lr: 1.4689e-05 gnorm: 1.09 [17:03:22< 7:27:15] +[titan] 2025-10-05 15:37:44,693 - root - INFO - step: 27840 loss: 1.9926 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 15:37:44,694 - root - INFO - lr: 1.4682e-05 gnorm: 1.08 [17:03:33< 7:27:04] +[titan] 2025-10-05 15:37:55,562 - root - INFO - step: 27845 loss: 1.9782 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 15:37:55,562 - root - INFO - lr: 1.4675e-05 gnorm: 1.11 [17:03:44< 7:26:53] +[titan] 2025-10-05 15:38:04,219 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:38:06,403 - root - INFO - step: 27850 loss: 1.9362 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 15:38:06,403 - root - INFO - lr: 1.4667e-05 gnorm: 1.10 [17:03:55< 7:26:42] +[titan] 2025-10-05 15:38:17,311 - root - INFO - step: 27855 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 15:38:17,311 - root - INFO - lr: 1.4660e-05 gnorm: 1.11 [17:04:06< 7:26:31] +[titan] 2025-10-05 15:38:28,219 - root - INFO - step: 27860 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 15:38:28,219 - root - INFO - lr: 1.4653e-05 gnorm: 1.12 [17:04:17< 7:26:20] +[titan] 2025-10-05 15:38:39,092 - root - INFO - step: 27865 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 15:38:39,092 - root - INFO - lr: 1.4645e-05 gnorm: 1.10 [17:04:28< 7:26:08] +[titan] 2025-10-05 15:38:49,999 - root - INFO - step: 27870 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 15:38:49,999 - root - INFO - lr: 1.4638e-05 gnorm: 1.15 [17:04:39< 7:25:57] +[titan] 2025-10-05 15:39:00,879 - root - INFO - step: 27875 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:39:00,879 - root - INFO - lr: 1.4631e-05 gnorm: 1.10 [17:04:50< 7:25:46] +[titan] 2025-10-05 15:39:11,796 - root - INFO - step: 27880 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 15:39:11,796 - root - INFO - lr: 1.4623e-05 gnorm: 1.09 [17:05:00< 7:25:35] +[titan] 2025-10-05 15:39:22,647 - root - INFO - step: 27885 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8326 +[titan] 2025-10-05 15:39:22,647 - root - INFO - lr: 1.4616e-05 gnorm: 1.15 [17:05:11< 7:25:24] +[titan] 2025-10-05 15:39:33,473 - root - INFO - step: 27890 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 15:39:33,473 - root - INFO - lr: 1.4609e-05 gnorm: 1.11 [17:05:22< 7:25:13] +[titan] 2025-10-05 15:39:44,338 - root - INFO - step: 27895 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 15:39:44,338 - root - INFO - lr: 1.4601e-05 gnorm: 1.12 [17:05:33< 7:25:02] +[titan] 2025-10-05 15:39:53,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:39:55,213 - root - INFO - step: 27900 loss: 2.0345 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 15:39:55,214 - root - INFO - lr: 1.4594e-05 gnorm: 1.12 [17:05:44< 7:24:51] +[titan] 2025-10-05 15:40:06,088 - root - INFO - step: 27905 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:40:06,088 - root - INFO - lr: 1.4587e-05 gnorm: 1.11 [17:05:55< 7:24:40] +[titan] 2025-10-05 15:40:16,993 - root - INFO - step: 27910 loss: 2.0536 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8146 +[titan] 2025-10-05 15:40:16,993 - root - INFO - lr: 1.4579e-05 gnorm: 1.11 [17:06:06< 7:24:29] +[titan] 2025-10-05 15:40:27,815 - root - INFO - step: 27915 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 15:40:27,815 - root - INFO - lr: 1.4572e-05 gnorm: 1.11 [17:06:17< 7:24:17] +[titan] 2025-10-05 15:40:38,643 - root - INFO - step: 27920 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8155 +[titan] 2025-10-05 15:40:38,643 - root - INFO - lr: 1.4565e-05 gnorm: 1.07 [17:06:27< 7:24:06] +[titan] 2025-10-05 15:40:49,515 - root - INFO - step: 27925 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7489 +[titan] 2025-10-05 15:40:49,515 - root - INFO - lr: 1.4558e-05 gnorm: 1.15 [17:06:38< 7:23:55] +[titan] 2025-10-05 15:41:00,387 - root - INFO - step: 27930 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 15:41:00,387 - root - INFO - lr: 1.4550e-05 gnorm: 1.08 [17:06:49< 7:23:44] +[titan] 2025-10-05 15:41:11,313 - root - INFO - step: 27935 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 15:41:11,313 - root - INFO - lr: 1.4543e-05 gnorm: 1.11 [17:07:00< 7:23:33] +[titan] 2025-10-05 15:41:22,241 - root - INFO - step: 27940 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:41:22,241 - root - INFO - lr: 1.4536e-05 gnorm: 1.12 [17:07:11< 7:23:22] +[titan] 2025-10-05 15:41:33,099 - root - INFO - step: 27945 loss: 2.0587 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 15:41:33,100 - root - INFO - lr: 1.4528e-05 gnorm: 1.10 [17:07:22< 7:23:11] +[titan] 2025-10-05 15:41:41,785 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:41:43,968 - root - INFO - step: 27950 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:41:43,968 - root - INFO - lr: 1.4521e-05 gnorm: 1.11 [17:07:33< 7:23:00] +[titan] 2025-10-05 15:41:54,868 - root - INFO - step: 27955 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 15:41:54,868 - root - INFO - lr: 1.4514e-05 gnorm: 1.12 [17:07:44< 7:22:49] +[titan] 2025-10-05 15:42:05,736 - root - INFO - step: 27960 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:05,736 - root - INFO - lr: 1.4507e-05 gnorm: 1.10 [17:07:54< 7:22:38] +[titan] 2025-10-05 15:42:16,692 - root - INFO - step: 27965 loss: 1.9991 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:42:16,692 - root - INFO - lr: 1.4499e-05 gnorm: 1.12 [17:08:05< 7:22:27] +[titan] 2025-10-05 15:42:27,570 - root - INFO - step: 27970 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 15:42:27,570 - root - INFO - lr: 1.4492e-05 gnorm: 1.12 [17:08:16< 7:22:15] +[titan] 2025-10-05 15:42:38,440 - root - INFO - step: 27975 loss: 2.0135 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:38,440 - root - INFO - lr: 1.4485e-05 gnorm: 1.13 [17:08:27< 7:22:04] +[titan] 2025-10-05 15:42:49,330 - root - INFO - step: 27980 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 15:42:49,330 - root - INFO - lr: 1.4477e-05 gnorm: 1.12 [17:08:38< 7:21:53] +[titan] 2025-10-05 15:43:00,202 - root - INFO - step: 27985 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 15:43:00,202 - root - INFO - lr: 1.4470e-05 gnorm: 1.10 [17:08:49< 7:21:42] +[titan] 2025-10-05 15:43:11,083 - root - INFO - step: 27990 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:43:11,083 - root - INFO - lr: 1.4463e-05 gnorm: 1.11 [17:09:00< 7:21:31] +[titan] 2025-10-05 15:43:21,971 - root - INFO - step: 27995 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 15:43:21,972 - root - INFO - lr: 1.4456e-05 gnorm: 1.10 [17:09:11< 7:21:20] +[titan] 2025-10-05 15:43:30,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:43:32,877 - root - INFO - step: 28000 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 15:43:32,878 - root - INFO - lr: 1.4448e-05 gnorm: 1.08 [17:09:22< 7:21:09] +[titan] 2025-10-05 15:43:43,741 - root - INFO - step: 28005 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:43:43,741 - root - INFO - lr: 1.4441e-05 gnorm: 1.13 [17:09:32< 7:20:58] +[titan] 2025-10-05 15:43:54,582 - root - INFO - step: 28010 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:43:54,582 - root - INFO - lr: 1.4434e-05 gnorm: 1.11 [17:09:43< 7:20:47] +[titan] 2025-10-05 15:44:05,429 - root - INFO - step: 28015 loss: 2.0300 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 15:44:05,429 - root - INFO - lr: 1.4426e-05 gnorm: 1.11 [17:09:54< 7:20:36] +[titan] 2025-10-05 15:44:16,331 - root - INFO - step: 28020 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 15:44:16,331 - root - INFO - lr: 1.4419e-05 gnorm: 1.09 [17:10:05< 7:20:25] +[titan] 2025-10-05 15:44:27,186 - root - INFO - step: 28025 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 15:44:27,186 - root - INFO - lr: 1.4412e-05 gnorm: 1.08 [17:10:16< 7:20:13] +[titan] 2025-10-05 15:44:38,082 - root - INFO - step: 28030 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 15:44:38,082 - root - INFO - lr: 1.4405e-05 gnorm: 1.17 [17:10:27< 7:20:02] +[titan] 2025-10-05 15:44:48,943 - root - INFO - step: 28035 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 15:44:48,943 - root - INFO - lr: 1.4397e-05 gnorm: 1.12 [17:10:38< 7:19:51] +[titan] 2025-10-05 15:44:59,808 - root - INFO - step: 28040 loss: 2.0729 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8327 +[titan] 2025-10-05 15:44:59,808 - root - INFO - lr: 1.4390e-05 gnorm: 1.14 [17:10:48< 7:19:40] +[titan] 2025-10-05 15:45:10,680 - root - INFO - step: 28045 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 15:45:10,681 - root - INFO - lr: 1.4383e-05 gnorm: 1.11 [17:10:59< 7:19:29] +[titan] 2025-10-05 15:45:19,402 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:45:21,586 - root - INFO - step: 28050 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7741 +[titan] 2025-10-05 15:45:21,586 - root - INFO - lr: 1.4376e-05 gnorm: 1.06 [17:11:10< 7:19:18] +[titan] 2025-10-05 15:45:32,449 - root - INFO - step: 28055 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:45:32,449 - root - INFO - lr: 1.4368e-05 gnorm: 1.10 [17:11:21< 7:19:07] +[titan] 2025-10-05 15:45:43,299 - root - INFO - step: 28060 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:45:43,299 - root - INFO - lr: 1.4361e-05 gnorm: 1.14 [17:11:32< 7:18:56] +[titan] 2025-10-05 15:45:54,193 - root - INFO - step: 28065 loss: 2.0655 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 15:45:54,193 - root - INFO - lr: 1.4354e-05 gnorm: 1.10 [17:11:43< 7:18:45] +[titan] 2025-10-05 15:46:05,045 - root - INFO - step: 28070 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 15:46:05,045 - root - INFO - lr: 1.4347e-05 gnorm: 1.08 [17:11:54< 7:18:34] +[titan] 2025-10-05 15:46:15,889 - root - INFO - step: 28075 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 15:46:15,889 - root - INFO - lr: 1.4339e-05 gnorm: 1.09 [17:12:05< 7:18:22] +[titan] 2025-10-05 15:46:26,781 - root - INFO - step: 28080 loss: 1.9684 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 15:46:26,781 - root - INFO - lr: 1.4332e-05 gnorm: 1.10 [17:12:15< 7:18:11] +[titan] 2025-10-05 15:46:37,625 - root - INFO - step: 28085 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 15:46:37,625 - root - INFO - lr: 1.4325e-05 gnorm: 1.08 [17:12:26< 7:18:00] +[titan] 2025-10-05 15:46:48,485 - root - INFO - step: 28090 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:46:48,486 - root - INFO - lr: 1.4318e-05 gnorm: 1.09 [17:12:37< 7:17:49] +[titan] 2025-10-05 15:46:59,393 - root - INFO - step: 28095 loss: 1.9937 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 15:46:59,393 - root - INFO - lr: 1.4311e-05 gnorm: 1.12 [17:12:48< 7:17:38] +[titan] 2025-10-05 15:47:08,069 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:47:10,253 - root - INFO - step: 28100 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:47:10,253 - root - INFO - lr: 1.4303e-05 gnorm: 1.13 [17:12:59< 7:17:27] +[titan] 2025-10-05 15:47:21,183 - root - INFO - step: 28105 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 15:47:21,183 - root - INFO - lr: 1.4296e-05 gnorm: 1.14 [17:13:10< 7:17:16] +[titan] 2025-10-05 15:47:32,061 - root - INFO - step: 28110 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 15:47:32,061 - root - INFO - lr: 1.4289e-05 gnorm: 1.07 [17:13:21< 7:17:05] +[titan] 2025-10-05 15:47:42,930 - root - INFO - step: 28115 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:47:42,930 - root - INFO - lr: 1.4282e-05 gnorm: 1.11 [17:13:32< 7:16:54] +[titan] 2025-10-05 15:47:53,817 - root - INFO - step: 28120 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7942 +[titan] 2025-10-05 15:47:53,817 - root - INFO - lr: 1.4274e-05 gnorm: 1.09 [17:13:42< 7:16:43] +[titan] 2025-10-05 15:48:04,758 - root - INFO - step: 28125 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 15:48:04,758 - root - INFO - lr: 1.4267e-05 gnorm: 1.14 [17:13:53< 7:16:32] +[titan] 2025-10-05 15:48:15,645 - root - INFO - step: 28130 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 15:48:15,645 - root - INFO - lr: 1.4260e-05 gnorm: 1.09 [17:14:04< 7:16:21] +[titan] 2025-10-05 15:48:26,569 - root - INFO - step: 28135 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 15:48:26,569 - root - INFO - lr: 1.4253e-05 gnorm: 1.15 [17:14:15< 7:16:09] +[titan] 2025-10-05 15:48:37,430 - root - INFO - step: 28140 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 15:48:37,430 - root - INFO - lr: 1.4246e-05 gnorm: 1.13 [17:14:26< 7:15:58] +[titan] 2025-10-05 15:48:48,297 - root - INFO - step: 28145 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7912 +[titan] 2025-10-05 15:48:48,297 - root - INFO - lr: 1.4238e-05 gnorm: 1.13 [17:14:37< 7:15:47] +[titan] 2025-10-05 15:48:56,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:48:59,187 - root - INFO - step: 28150 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 15:48:59,187 - root - INFO - lr: 1.4231e-05 gnorm: 1.13 [17:14:48< 7:15:36] +[titan] 2025-10-05 15:49:10,034 - root - INFO - step: 28155 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 15:49:10,035 - root - INFO - lr: 1.4224e-05 gnorm: 1.11 [17:14:59< 7:15:25] +[titan] 2025-10-05 15:49:21,051 - root - INFO - step: 28160 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 29,745 tflops: 412.66 mfu: 41.73% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 15:49:21,051 - root - INFO - lr: 1.4217e-05 gnorm: 1.09 [17:15:10< 7:15:14] +[titan] 2025-10-05 15:49:21,239 - root - INFO - Dumping profiler traces at step 28160 +[titan] 2025-10-05 15:49:21,277 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:49:32,111 - root - INFO - step: 28165 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 29,630 tflops: 411.07 mfu: 41.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 15:49:32,111 - root - INFO - lr: 1.4210e-05 gnorm: 1.11 [17:15:21< 7:15:03] +[titan] 2025-10-05 15:49:42,959 - root - INFO - step: 28170 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:49:42,959 - root - INFO - lr: 1.4202e-05 gnorm: 1.08 [17:15:32< 7:14:52] +[titan] 2025-10-05 15:49:53,795 - root - INFO - step: 28175 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:49:53,795 - root - INFO - lr: 1.4195e-05 gnorm: 1.14 [17:15:42< 7:14:41] +[titan] 2025-10-05 15:50:04,659 - root - INFO - step: 28180 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7994 +[titan] 2025-10-05 15:50:04,659 - root - INFO - lr: 1.4188e-05 gnorm: 1.10 [17:15:53< 7:14:30] +[titan] 2025-10-05 15:50:15,529 - root - INFO - step: 28185 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 15:50:15,529 - root - INFO - lr: 1.4181e-05 gnorm: 1.09 [17:16:04< 7:14:19] +[titan] 2025-10-05 15:50:26,481 - root - INFO - step: 28190 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 15:50:26,482 - root - INFO - lr: 1.4174e-05 gnorm: 1.11 [17:16:15< 7:14:08] +[titan] 2025-10-05 15:50:37,355 - root - INFO - step: 28195 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 15:50:37,355 - root - INFO - lr: 1.4166e-05 gnorm: 1.06 [17:16:26< 7:13:56] +[titan] 2025-10-05 15:50:46,040 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:50:48,223 - root - INFO - step: 28200 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:50:48,223 - root - INFO - lr: 1.4159e-05 gnorm: 1.12 [17:16:37< 7:13:45] +[titan] 2025-10-05 15:50:59,085 - root - INFO - step: 28205 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 15:50:59,085 - root - INFO - lr: 1.4152e-05 gnorm: 1.12 [17:16:48< 7:13:34] +[titan] 2025-10-05 15:51:09,953 - root - INFO - step: 28210 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 15:51:09,953 - root - INFO - lr: 1.4145e-05 gnorm: 1.14 [17:16:59< 7:13:23] +[titan] 2025-10-05 15:51:20,822 - root - INFO - step: 28215 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:51:20,822 - root - INFO - lr: 1.4138e-05 gnorm: 1.10 [17:17:09< 7:13:12] +[titan] 2025-10-05 15:51:31,728 - root - INFO - step: 28220 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 15:51:31,728 - root - INFO - lr: 1.4130e-05 gnorm: 1.18 [17:17:20< 7:13:01] +[titan] 2025-10-05 15:51:42,649 - root - INFO - step: 28225 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 15:51:42,650 - root - INFO - lr: 1.4123e-05 gnorm: 1.10 [17:17:31< 7:12:50] +[titan] 2025-10-05 15:51:53,522 - root - INFO - step: 28230 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 15:51:53,522 - root - INFO - lr: 1.4116e-05 gnorm: 1.09 [17:17:42< 7:12:39] +[titan] 2025-10-05 15:52:04,406 - root - INFO - step: 28235 loss: 2.0389 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:52:04,406 - root - INFO - lr: 1.4109e-05 gnorm: 1.11 [17:17:53< 7:12:28] +[titan] 2025-10-05 15:52:15,280 - root - INFO - step: 28240 loss: 1.9948 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 15:52:15,281 - root - INFO - lr: 1.4102e-05 gnorm: 1.11 [17:18:04< 7:12:17] +[titan] 2025-10-05 15:52:26,225 - root - INFO - step: 28245 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 15:52:26,225 - root - INFO - lr: 1.4095e-05 gnorm: 1.12 [17:18:15< 7:12:06] +[titan] 2025-10-05 15:52:34,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:52:37,105 - root - INFO - step: 28250 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 15:52:37,105 - root - INFO - lr: 1.4087e-05 gnorm: 1.08 [17:18:26< 7:11:54] +[titan] 2025-10-05 15:52:48,023 - root - INFO - step: 28255 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 15:52:48,023 - root - INFO - lr: 1.4080e-05 gnorm: 1.10 [17:18:37< 7:11:43] +[titan] 2025-10-05 15:52:58,913 - root - INFO - step: 28260 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 15:52:58,913 - root - INFO - lr: 1.4073e-05 gnorm: 1.10 [17:18:48< 7:11:32] +[titan] 2025-10-05 15:53:09,803 - root - INFO - step: 28265 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 15:53:09,803 - root - INFO - lr: 1.4066e-05 gnorm: 1.11 [17:18:58< 7:11:21] +[titan] 2025-10-05 15:53:20,692 - root - INFO - step: 28270 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 15:53:20,692 - root - INFO - lr: 1.4059e-05 gnorm: 1.34 [17:19:09< 7:11:10] +[titan] 2025-10-05 15:53:31,636 - root - INFO - step: 28275 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 15:53:31,637 - root - INFO - lr: 1.4052e-05 gnorm: 1.11 [17:19:20< 7:10:59] +[titan] 2025-10-05 15:53:42,507 - root - INFO - step: 28280 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7702 +[titan] 2025-10-05 15:53:42,508 - root - INFO - lr: 1.4044e-05 gnorm: 1.09 [17:19:31< 7:10:48] +[titan] 2025-10-05 15:53:53,408 - root - INFO - step: 28285 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:53:53,408 - root - INFO - lr: 1.4037e-05 gnorm: 1.12 [17:19:42< 7:10:37] +[titan] 2025-10-05 15:54:04,269 - root - INFO - step: 28290 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:54:04,269 - root - INFO - lr: 1.4030e-05 gnorm: 1.11 [17:19:53< 7:10:26] +[titan] 2025-10-05 15:54:15,140 - root - INFO - step: 28295 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:54:15,140 - root - INFO - lr: 1.4023e-05 gnorm: 1.09 [17:20:04< 7:10:15] +[titan] 2025-10-05 15:54:23,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:54:26,111 - root - INFO - step: 28300 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7860 +[titan] 2025-10-05 15:54:26,111 - root - INFO - lr: 1.4016e-05 gnorm: 1.07 [17:20:15< 7:10:04] +[titan] 2025-10-05 15:54:36,983 - root - INFO - step: 28305 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 15:54:36,984 - root - INFO - lr: 1.4009e-05 gnorm: 1.12 [17:20:26< 7:09:53] +[titan] 2025-10-05 15:54:47,858 - root - INFO - step: 28310 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:54:47,858 - root - INFO - lr: 1.4002e-05 gnorm: 1.07 [17:20:37< 7:09:42] +[titan] 2025-10-05 15:54:58,731 - root - INFO - step: 28315 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 15:54:58,732 - root - INFO - lr: 1.3994e-05 gnorm: 1.09 [17:20:47< 7:09:30] +[titan] 2025-10-05 15:55:09,654 - root - INFO - step: 28320 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 15:55:09,655 - root - INFO - lr: 1.3987e-05 gnorm: 1.46 [17:20:58< 7:09:19] +[titan] 2025-10-05 15:55:20,530 - root - INFO - step: 28325 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 15:55:20,530 - root - INFO - lr: 1.3980e-05 gnorm: 1.12 [17:21:09< 7:09:08] +[titan] 2025-10-05 15:55:31,457 - root - INFO - step: 28330 loss: 1.9576 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7300 +[titan] 2025-10-05 15:55:31,457 - root - INFO - lr: 1.3973e-05 gnorm: 1.10 [17:21:20< 7:08:57] +[titan] 2025-10-05 15:55:42,330 - root - INFO - step: 28335 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 15:55:42,330 - root - INFO - lr: 1.3966e-05 gnorm: 1.12 [17:21:31< 7:08:46] +[titan] 2025-10-05 15:55:53,200 - root - INFO - step: 28340 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:55:53,200 - root - INFO - lr: 1.3959e-05 gnorm: 1.15 [17:21:42< 7:08:35] +[titan] 2025-10-05 15:56:04,083 - root - INFO - step: 28345 loss: 2.0214 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 15:56:04,083 - root - INFO - lr: 1.3952e-05 gnorm: 1.17 [17:21:53< 7:08:24] +[titan] 2025-10-05 15:56:12,817 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:56:15,006 - root - INFO - step: 28350 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 15:56:15,006 - root - INFO - lr: 1.3944e-05 gnorm: 1.14 [17:22:04< 7:08:13] +[titan] 2025-10-05 15:56:25,936 - root - INFO - step: 28355 loss: 1.9838 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 15:56:25,936 - root - INFO - lr: 1.3937e-05 gnorm: 1.10 [17:22:15< 7:08:02] +[titan] 2025-10-05 15:56:36,882 - root - INFO - step: 28360 loss: 2.0896 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 15:56:36,882 - root - INFO - lr: 1.3930e-05 gnorm: 1.15 [17:22:26< 7:07:51] +[titan] 2025-10-05 15:56:47,760 - root - INFO - step: 28365 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 15:56:47,760 - root - INFO - lr: 1.3923e-05 gnorm: 1.11 [17:22:36< 7:07:40] +[titan] 2025-10-05 15:56:58,635 - root - INFO - step: 28370 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 15:56:58,635 - root - INFO - lr: 1.3916e-05 gnorm: 1.09 [17:22:47< 7:07:29] +[titan] 2025-10-05 15:57:09,503 - root - INFO - step: 28375 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:57:09,503 - root - INFO - lr: 1.3909e-05 gnorm: 1.05 [17:22:58< 7:07:17] +[titan] 2025-10-05 15:57:20,365 - root - INFO - step: 28380 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7154 +[titan] 2025-10-05 15:57:20,365 - root - INFO - lr: 1.3902e-05 gnorm: 1.13 [17:23:09< 7:07:06] +[titan] 2025-10-05 15:57:31,331 - root - INFO - step: 28385 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 29,881 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:57:31,331 - root - INFO - lr: 1.3895e-05 gnorm: 1.12 [17:23:20< 7:06:55] +[titan] 2025-10-05 15:57:42,204 - root - INFO - step: 28390 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 15:57:42,205 - root - INFO - lr: 1.3888e-05 gnorm: 1.13 [17:23:31< 7:06:44] +[titan] 2025-10-05 15:57:53,067 - root - INFO - step: 28395 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:57:53,067 - root - INFO - lr: 1.3880e-05 gnorm: 1.09 [17:23:42< 7:06:33] +[titan] 2025-10-05 15:58:01,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:58:03,923 - root - INFO - step: 28400 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8317 +[titan] 2025-10-05 15:58:03,923 - root - INFO - lr: 1.3873e-05 gnorm: 1.11 [17:23:53< 7:06:22] +[titan] 2025-10-05 15:58:14,796 - root - INFO - step: 28405 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 15:58:14,796 - root - INFO - lr: 1.3866e-05 gnorm: 1.09 [17:24:03< 7:06:11] +[titan] 2025-10-05 15:58:25,653 - root - INFO - step: 28410 loss: 1.9984 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7669 +[titan] 2025-10-05 15:58:25,653 - root - INFO - lr: 1.3859e-05 gnorm: 1.10 [17:24:14< 7:06:00] +[titan] 2025-10-05 15:58:36,589 - root - INFO - step: 28415 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 15:58:36,589 - root - INFO - lr: 1.3852e-05 gnorm: 1.13 [17:24:25< 7:05:49] +[titan] 2025-10-05 15:58:47,471 - root - INFO - step: 28420 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7957 +[titan] 2025-10-05 15:58:47,471 - root - INFO - lr: 1.3845e-05 gnorm: 1.10 [17:24:36< 7:05:38] +[titan] 2025-10-05 15:58:58,364 - root - INFO - step: 28425 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 15:58:58,364 - root - INFO - lr: 1.3838e-05 gnorm: 1.14 [17:24:47< 7:05:27] +[titan] 2025-10-05 15:59:09,235 - root - INFO - step: 28430 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 15:59:09,235 - root - INFO - lr: 1.3831e-05 gnorm: 1.13 [17:24:58< 7:05:16] +[titan] 2025-10-05 15:59:20,106 - root - INFO - step: 28435 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 15:59:20,106 - root - INFO - lr: 1.3824e-05 gnorm: 1.13 [17:25:09< 7:05:04] +[titan] 2025-10-05 15:59:31,054 - root - INFO - step: 28440 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7996 +[titan] 2025-10-05 15:59:31,054 - root - INFO - lr: 1.3817e-05 gnorm: 1.10 [17:25:20< 7:04:53] +[titan] 2025-10-05 15:59:41,947 - root - INFO - step: 28445 loss: 2.0638 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 15:59:41,947 - root - INFO - lr: 1.3810e-05 gnorm: 1.16 [17:25:31< 7:04:42] +[titan] 2025-10-05 15:59:50,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:59:52,821 - root - INFO - step: 28450 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 15:59:52,821 - root - INFO - lr: 1.3802e-05 gnorm: 1.11 [17:25:41< 7:04:31] +[titan] 2025-10-05 16:00:03,693 - root - INFO - step: 28455 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 16:00:03,693 - root - INFO - lr: 1.3795e-05 gnorm: 1.09 [17:25:52< 7:04:20] +[titan] 2025-10-05 16:00:14,540 - root - INFO - step: 28460 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 16:00:14,540 - root - INFO - lr: 1.3788e-05 gnorm: 1.09 [17:26:03< 7:04:09] +[titan] 2025-10-05 16:00:25,402 - root - INFO - step: 28465 loss: 2.0314 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 16:00:25,402 - root - INFO - lr: 1.3781e-05 gnorm: 1.12 [17:26:14< 7:03:58] +[titan] 2025-10-05 16:00:36,344 - root - INFO - step: 28470 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:00:36,344 - root - INFO - lr: 1.3774e-05 gnorm: 1.11 [17:26:25< 7:03:47] +[titan] 2025-10-05 16:00:47,202 - root - INFO - step: 28475 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:00:47,202 - root - INFO - lr: 1.3767e-05 gnorm: 1.13 [17:26:36< 7:03:36] +[titan] 2025-10-05 16:00:58,120 - root - INFO - step: 28480 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 16:00:58,120 - root - INFO - lr: 1.3760e-05 gnorm: 1.14 [17:26:47< 7:03:25] +[titan] 2025-10-05 16:01:09,012 - root - INFO - step: 28485 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 16:01:09,012 - root - INFO - lr: 1.3753e-05 gnorm: 1.14 [17:26:58< 7:03:14] +[titan] 2025-10-05 16:01:19,903 - root - INFO - step: 28490 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 16:01:19,903 - root - INFO - lr: 1.3746e-05 gnorm: 1.12 [17:27:09< 7:03:03] +[titan] 2025-10-05 16:01:30,819 - root - INFO - step: 28495 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 16:01:30,819 - root - INFO - lr: 1.3739e-05 gnorm: 1.18 [17:27:19< 7:02:51] +[titan] 2025-10-05 16:01:39,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:01:41,746 - root - INFO - step: 28500 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:01:41,746 - root - INFO - lr: 1.3732e-05 gnorm: 1.13 [17:27:30< 7:02:40] +[titan] 2025-10-05 16:01:52,631 - root - INFO - step: 28505 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7896 +[titan] 2025-10-05 16:01:52,631 - root - INFO - lr: 1.3725e-05 gnorm: 1.13 [17:27:41< 7:02:29] +[titan] 2025-10-05 16:02:03,551 - root - INFO - step: 28510 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:02:03,551 - root - INFO - lr: 1.3718e-05 gnorm: 1.12 [17:27:52< 7:02:18] +[titan] 2025-10-05 16:02:14,435 - root - INFO - step: 28515 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 16:02:14,435 - root - INFO - lr: 1.3711e-05 gnorm: 1.16 [17:28:03< 7:02:07] +[titan] 2025-10-05 16:02:25,309 - root - INFO - step: 28520 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 16:02:25,310 - root - INFO - lr: 1.3704e-05 gnorm: 1.09 [17:28:14< 7:01:56] +[titan] 2025-10-05 16:02:36,209 - root - INFO - step: 28525 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 16:02:36,209 - root - INFO - lr: 1.3696e-05 gnorm: 1.09 [17:28:25< 7:01:45] +[titan] 2025-10-05 16:02:47,089 - root - INFO - step: 28530 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 16:02:47,089 - root - INFO - lr: 1.3689e-05 gnorm: 1.11 [17:28:36< 7:01:34] +[titan] 2025-10-05 16:02:57,982 - root - INFO - step: 28535 loss: 2.0168 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 16:02:57,982 - root - INFO - lr: 1.3682e-05 gnorm: 1.14 [17:28:47< 7:01:23] +[titan] 2025-10-05 16:03:08,840 - root - INFO - step: 28540 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 16:03:08,840 - root - INFO - lr: 1.3675e-05 gnorm: 1.13 [17:28:57< 7:01:12] +[titan] 2025-10-05 16:03:19,756 - root - INFO - step: 28545 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 16:03:19,756 - root - INFO - lr: 1.3668e-05 gnorm: 1.15 [17:29:08< 7:01:01] +[titan] 2025-10-05 16:03:28,457 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:03:30,650 - root - INFO - step: 28550 loss: 1.9538 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:03:30,650 - root - INFO - lr: 1.3661e-05 gnorm: 1.08 [17:29:19< 7:00:50] +[titan] 2025-10-05 16:03:41,553 - root - INFO - step: 28555 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 16:03:41,553 - root - INFO - lr: 1.3654e-05 gnorm: 1.11 [17:29:30< 7:00:38] +[titan] 2025-10-05 16:03:52,429 - root - INFO - step: 28560 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 16:03:52,429 - root - INFO - lr: 1.3647e-05 gnorm: 1.07 [17:29:41< 7:00:27] +[titan] 2025-10-05 16:04:03,288 - root - INFO - step: 28565 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 16:04:03,288 - root - INFO - lr: 1.3640e-05 gnorm: 1.10 [17:29:52< 7:00:16] +[titan] 2025-10-05 16:04:14,124 - root - INFO - step: 28570 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:04:14,124 - root - INFO - lr: 1.3633e-05 gnorm: 1.11 [17:30:03< 7:00:05] +[titan] 2025-10-05 16:04:25,006 - root - INFO - step: 28575 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 16:04:25,006 - root - INFO - lr: 1.3626e-05 gnorm: 1.95 [17:30:14< 6:59:54] +[titan] 2025-10-05 16:04:35,875 - root - INFO - step: 28580 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:04:35,876 - root - INFO - lr: 1.3619e-05 gnorm: 1.09 [17:30:24< 6:59:43] +[titan] 2025-10-05 16:04:46,735 - root - INFO - step: 28585 loss: 1.9918 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 16:04:46,735 - root - INFO - lr: 1.3612e-05 gnorm: 1.12 [17:30:35< 6:59:32] +[titan] 2025-10-05 16:04:57,585 - root - INFO - step: 28590 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 16:04:57,585 - root - INFO - lr: 1.3605e-05 gnorm: 1.06 [17:30:46< 6:59:21] +[titan] 2025-10-05 16:05:08,445 - root - INFO - step: 28595 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 16:05:08,445 - root - INFO - lr: 1.3598e-05 gnorm: 1.15 [17:30:57< 6:59:10] +[titan] 2025-10-05 16:05:17,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:05:19,314 - root - INFO - step: 28600 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:05:19,314 - root - INFO - lr: 1.3591e-05 gnorm: 1.10 [17:31:08< 6:58:59] +[titan] 2025-10-05 16:05:30,213 - root - INFO - step: 28605 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 16:05:30,213 - root - INFO - lr: 1.3584e-05 gnorm: 1.11 [17:31:19< 6:58:48] +[titan] 2025-10-05 16:05:41,137 - root - INFO - step: 28610 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 16:05:41,137 - root - INFO - lr: 1.3577e-05 gnorm: 1.11 [17:31:30< 6:58:37] +[titan] 2025-10-05 16:05:52,006 - root - INFO - step: 28615 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:05:52,007 - root - INFO - lr: 1.3570e-05 gnorm: 1.10 [17:31:41< 6:58:25] +[titan] 2025-10-05 16:06:02,858 - root - INFO - step: 28620 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 16:06:02,859 - root - INFO - lr: 1.3563e-05 gnorm: 1.12 [17:31:51< 6:58:14] +[titan] 2025-10-05 16:06:13,712 - root - INFO - step: 28625 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 16:06:13,712 - root - INFO - lr: 1.3556e-05 gnorm: 1.10 [17:32:02< 6:58:03] +[titan] 2025-10-05 16:06:24,582 - root - INFO - step: 28630 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 16:06:24,582 - root - INFO - lr: 1.3549e-05 gnorm: 1.11 [17:32:13< 6:57:52] +[titan] 2025-10-05 16:06:35,472 - root - INFO - step: 28635 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7192 +[titan] 2025-10-05 16:06:35,472 - root - INFO - lr: 1.3542e-05 gnorm: 1.12 [17:32:24< 6:57:41] +[titan] 2025-10-05 16:06:46,399 - root - INFO - step: 28640 loss: 2.0089 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7766 +[titan] 2025-10-05 16:06:46,399 - root - INFO - lr: 1.3535e-05 gnorm: 1.13 [17:32:35< 6:57:30] +[titan] 2025-10-05 16:06:57,266 - root - INFO - step: 28645 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 16:06:57,266 - root - INFO - lr: 1.3528e-05 gnorm: 1.16 [17:32:46< 6:57:19] +[titan] 2025-10-05 16:07:05,934 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:07:08,125 - root - INFO - step: 28650 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 16:07:08,125 - root - INFO - lr: 1.3521e-05 gnorm: 1.08 [17:32:57< 6:57:08] +[titan] 2025-10-05 16:07:18,989 - root - INFO - step: 28655 loss: 1.9921 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 16:07:18,989 - root - INFO - lr: 1.3514e-05 gnorm: 1.13 [17:33:08< 6:56:57] +[titan] 2025-10-05 16:07:29,863 - root - INFO - step: 28660 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:07:29,863 - root - INFO - lr: 1.3507e-05 gnorm: 1.11 [17:33:18< 6:56:46] +[titan] 2025-10-05 16:07:40,776 - root - INFO - step: 28665 loss: 2.0607 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8160 +[titan] 2025-10-05 16:07:40,776 - root - INFO - lr: 1.3500e-05 gnorm: 1.24 [17:33:29< 6:56:35] +[titan] 2025-10-05 16:07:51,762 - root - INFO - step: 28670 loss: 2.0573 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.81 mfu: 41.84% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 16:07:51,762 - root - INFO - lr: 1.3493e-05 gnorm: 1.20 [17:33:40< 6:56:24] +[titan] 2025-10-05 16:07:56,295 - root - INFO - Dumping profiler traces at step 28672 +[titan] 2025-10-05 16:07:56,335 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:08:02,863 - root - INFO - step: 28675 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,520 tflops: 409.54 mfu: 41.41% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 16:08:02,863 - root - INFO - lr: 1.3486e-05 gnorm: 1.09 [17:33:51< 6:56:13] +[titan] 2025-10-05 16:08:13,728 - root - INFO - step: 28680 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 16:08:13,728 - root - INFO - lr: 1.3479e-05 gnorm: 1.10 [17:34:02< 6:56:01] +[titan] 2025-10-05 16:08:24,599 - root - INFO - step: 28685 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7697 +[titan] 2025-10-05 16:08:24,599 - root - INFO - lr: 1.3472e-05 gnorm: 1.11 [17:34:13< 6:55:50] +[titan] 2025-10-05 16:08:35,472 - root - INFO - step: 28690 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 16:08:35,472 - root - INFO - lr: 1.3465e-05 gnorm: 1.12 [17:34:24< 6:55:39] +[titan] 2025-10-05 16:08:46,365 - root - INFO - step: 28695 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:08:46,366 - root - INFO - lr: 1.3458e-05 gnorm: 1.10 [17:34:35< 6:55:28] +[titan] 2025-10-05 16:08:55,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:08:57,207 - root - INFO - step: 28700 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 16:08:57,207 - root - INFO - lr: 1.3451e-05 gnorm: 1.16 [17:34:46< 6:55:17] +[titan] 2025-10-05 16:09:08,084 - root - INFO - step: 28705 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 16:09:08,084 - root - INFO - lr: 1.3444e-05 gnorm: 1.11 [17:34:57< 6:55:06] +[titan] 2025-10-05 16:09:18,920 - root - INFO - step: 28710 loss: 1.8967 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 16:09:18,920 - root - INFO - lr: 1.3437e-05 gnorm: 1.12 [17:35:08< 6:54:55] +[titan] 2025-10-05 16:09:29,743 - root - INFO - step: 28715 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 16:09:29,743 - root - INFO - lr: 1.3430e-05 gnorm: 1.24 [17:35:18< 6:54:44] +[titan] 2025-10-05 16:09:40,610 - root - INFO - step: 28720 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:09:40,610 - root - INFO - lr: 1.3423e-05 gnorm: 1.10 [17:35:29< 6:54:33] +[titan] 2025-10-05 16:09:51,475 - root - INFO - step: 28725 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 16:09:51,475 - root - INFO - lr: 1.3416e-05 gnorm: 1.07 [17:35:40< 6:54:22] +[titan] 2025-10-05 16:10:02,333 - root - INFO - step: 28730 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:10:02,333 - root - INFO - lr: 1.3409e-05 gnorm: 1.13 [17:35:51< 6:54:11] +[titan] 2025-10-05 16:10:13,264 - root - INFO - step: 28735 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:10:13,265 - root - INFO - lr: 1.3402e-05 gnorm: 1.14 [17:36:02< 6:53:59] +[titan] 2025-10-05 16:10:24,137 - root - INFO - step: 28740 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:10:24,137 - root - INFO - lr: 1.3395e-05 gnorm: 1.07 [17:36:13< 6:53:48] +[titan] 2025-10-05 16:10:34,996 - root - INFO - step: 28745 loss: 2.0343 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7979 +[titan] 2025-10-05 16:10:34,996 - root - INFO - lr: 1.3389e-05 gnorm: 1.14 [17:36:24< 6:53:37] +[titan] 2025-10-05 16:10:43,931 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:10:46,124 - root - INFO - step: 28750 loss: 2.0411 memory: 118.84GiB(85.28%) tps: 29,446 tflops: 408.52 mfu: 41.31% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 16:10:46,125 - root - INFO - lr: 1.3382e-05 gnorm: 1.10 [17:36:35< 6:53:26] +[titan] 2025-10-05 16:10:56,975 - root - INFO - step: 28755 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7186 +[titan] 2025-10-05 16:10:56,975 - root - INFO - lr: 1.3375e-05 gnorm: 1.11 [17:36:46< 6:53:15] +[titan] 2025-10-05 16:11:07,804 - root - INFO - step: 28760 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 16:11:07,804 - root - INFO - lr: 1.3368e-05 gnorm: 1.13 [17:36:56< 6:53:04] +[titan] 2025-10-05 16:11:18,644 - root - INFO - step: 28765 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 16:11:18,644 - root - INFO - lr: 1.3361e-05 gnorm: 1.14 [17:37:07< 6:52:53] +[titan] 2025-10-05 16:11:29,465 - root - INFO - step: 28770 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 16:11:29,465 - root - INFO - lr: 1.3354e-05 gnorm: 1.11 [17:37:18< 6:52:42] +[titan] 2025-10-05 16:11:40,342 - root - INFO - step: 28775 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 16:11:40,342 - root - INFO - lr: 1.3347e-05 gnorm: 1.10 [17:37:29< 6:52:31] +[titan] 2025-10-05 16:11:51,163 - root - INFO - step: 28780 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 16:11:51,163 - root - INFO - lr: 1.3340e-05 gnorm: 1.10 [17:37:40< 6:52:20] +[titan] 2025-10-05 16:12:01,972 - root - INFO - step: 28785 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:12:01,972 - root - INFO - lr: 1.3333e-05 gnorm: 1.09 [17:37:51< 6:52:09] +[titan] 2025-10-05 16:12:12,796 - root - INFO - step: 28790 loss: 2.0542 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 16:12:12,796 - root - INFO - lr: 1.3326e-05 gnorm: 1.14 [17:38:01< 6:51:58] +[titan] 2025-10-05 16:12:23,627 - root - INFO - step: 28795 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 16:12:23,627 - root - INFO - lr: 1.3319e-05 gnorm: 5.74 [17:38:12< 6:51:46] +[titan] 2025-10-05 16:12:32,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:12:34,510 - root - INFO - step: 28800 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 16:12:34,510 - root - INFO - lr: 1.3312e-05 gnorm: 1.12 [17:38:23< 6:51:35] +[titan] 2025-10-05 16:12:45,424 - root - INFO - step: 28805 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 16:12:45,424 - root - INFO - lr: 1.3305e-05 gnorm: 1.12 [17:38:34< 6:51:24] +[titan] 2025-10-05 16:12:56,285 - root - INFO - step: 28810 loss: 1.9337 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 16:12:56,285 - root - INFO - lr: 1.3298e-05 gnorm: 1.11 [17:38:45< 6:51:13] +[titan] 2025-10-05 16:13:07,115 - root - INFO - step: 28815 loss: 2.0821 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8406 +[titan] 2025-10-05 16:13:07,115 - root - INFO - lr: 1.3291e-05 gnorm: 1.14 [17:38:56< 6:51:02] +[titan] 2025-10-05 16:13:17,934 - root - INFO - step: 28820 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 16:13:17,934 - root - INFO - lr: 1.3284e-05 gnorm: 1.14 [17:39:07< 6:50:51] +[titan] 2025-10-05 16:13:28,784 - root - INFO - step: 28825 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7000 +[titan] 2025-10-05 16:13:28,784 - root - INFO - lr: 1.3278e-05 gnorm: 1.09 [17:39:17< 6:50:40] +[titan] 2025-10-05 16:13:39,674 - root - INFO - step: 28830 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 16:13:39,675 - root - INFO - lr: 1.3271e-05 gnorm: 1.13 [17:39:28< 6:50:29] +[titan] 2025-10-05 16:13:50,584 - root - INFO - step: 28835 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 16:13:50,584 - root - INFO - lr: 1.3264e-05 gnorm: 1.14 [17:39:39< 6:50:18] +[titan] 2025-10-05 16:14:01,435 - root - INFO - step: 28840 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 16:14:01,435 - root - INFO - lr: 1.3257e-05 gnorm: 1.11 [17:39:50< 6:50:07] +[titan] 2025-10-05 16:14:12,264 - root - INFO - step: 28845 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 16:14:12,265 - root - INFO - lr: 1.3250e-05 gnorm: 1.14 [17:40:01< 6:49:56] +[titan] 2025-10-05 16:14:20,891 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:14:23,077 - root - INFO - step: 28850 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,305 tflops: 420.44 mfu: 42.51% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 16:14:23,077 - root - INFO - lr: 1.3243e-05 gnorm: 1.13 [17:40:12< 6:49:44] +[titan] 2025-10-05 16:14:33,903 - root - INFO - step: 28855 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 16:14:33,903 - root - INFO - lr: 1.3236e-05 gnorm: 1.10 [17:40:23< 6:49:33] +[titan] 2025-10-05 16:14:44,758 - root - INFO - step: 28860 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 16:14:44,758 - root - INFO - lr: 1.3229e-05 gnorm: 1.18 [17:40:33< 6:49:22] +[titan] 2025-10-05 16:14:55,624 - root - INFO - step: 28865 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 16:14:55,624 - root - INFO - lr: 1.3222e-05 gnorm: 1.13 [17:40:44< 6:49:11] +[titan] 2025-10-05 16:15:06,429 - root - INFO - step: 28870 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,328 tflops: 420.76 mfu: 42.54% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 16:15:06,429 - root - INFO - lr: 1.3215e-05 gnorm: 1.12 [17:40:55< 6:49:00] +[titan] 2025-10-05 16:15:17,255 - root - INFO - step: 28875 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 16:15:17,255 - root - INFO - lr: 1.3209e-05 gnorm: 1.14 [17:41:06< 6:48:49] +[titan] 2025-10-05 16:15:28,083 - root - INFO - step: 28880 loss: 2.0444 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8077 +[titan] 2025-10-05 16:15:28,084 - root - INFO - lr: 1.3202e-05 gnorm: 1.11 [17:41:17< 6:48:38] +[titan] 2025-10-05 16:15:38,890 - root - INFO - step: 28885 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 16:15:38,890 - root - INFO - lr: 1.3195e-05 gnorm: 1.12 [17:41:27< 6:48:27] +[titan] 2025-10-05 16:15:49,744 - root - INFO - step: 28890 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7735 +[titan] 2025-10-05 16:15:49,744 - root - INFO - lr: 1.3188e-05 gnorm: 1.17 [17:41:38< 6:48:16] +[titan] 2025-10-05 16:16:00,598 - root - INFO - step: 28895 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 16:16:00,598 - root - INFO - lr: 1.3181e-05 gnorm: 1.11 [17:41:49< 6:48:05] +[titan] 2025-10-05 16:16:09,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:16:11,420 - root - INFO - step: 28900 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 16:16:11,421 - root - INFO - lr: 1.3174e-05 gnorm: 1.14 [17:42:00< 6:47:53] +[titan] 2025-10-05 16:16:22,244 - root - INFO - step: 28905 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:16:22,245 - root - INFO - lr: 1.3167e-05 gnorm: 1.10 [17:42:11< 6:47:42] +[titan] 2025-10-05 16:16:33,059 - root - INFO - step: 28910 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 16:16:33,059 - root - INFO - lr: 1.3160e-05 gnorm: 1.14 [17:42:22< 6:47:31] +[titan] 2025-10-05 16:16:43,886 - root - INFO - step: 28915 loss: 1.9331 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7085 +[titan] 2025-10-05 16:16:43,887 - root - INFO - lr: 1.3153e-05 gnorm: 1.09 [17:42:32< 6:47:20] +[titan] 2025-10-05 16:16:54,738 - root - INFO - step: 28920 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 16:16:54,738 - root - INFO - lr: 1.3147e-05 gnorm: 1.09 [17:42:43< 6:47:09] +[titan] 2025-10-05 16:17:05,576 - root - INFO - step: 28925 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 16:17:05,576 - root - INFO - lr: 1.3140e-05 gnorm: 1.13 [17:42:54< 6:46:58] +[titan] 2025-10-05 16:17:16,436 - root - INFO - step: 28930 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 16:17:16,436 - root - INFO - lr: 1.3133e-05 gnorm: 1.12 [17:43:05< 6:46:47] +[titan] 2025-10-05 16:17:27,262 - root - INFO - step: 28935 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 16:17:27,263 - root - INFO - lr: 1.3126e-05 gnorm: 1.13 [17:43:16< 6:46:36] +[titan] 2025-10-05 16:17:38,068 - root - INFO - step: 28940 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 16:17:38,069 - root - INFO - lr: 1.3119e-05 gnorm: 1.12 [17:43:27< 6:46:25] +[titan] 2025-10-05 16:17:48,901 - root - INFO - step: 28945 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 16:17:48,902 - root - INFO - lr: 1.3112e-05 gnorm: 1.11 [17:43:37< 6:46:14] +[titan] 2025-10-05 16:17:57,529 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:17:59,707 - root - INFO - step: 28950 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 16:17:59,708 - root - INFO - lr: 1.3105e-05 gnorm: 1.13 [17:43:48< 6:46:02] +[titan] 2025-10-05 16:18:10,530 - root - INFO - step: 28955 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 16:18:10,531 - root - INFO - lr: 1.3099e-05 gnorm: 1.13 [17:43:59< 6:45:51] +[titan] 2025-10-05 16:18:21,362 - root - INFO - step: 28960 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7806 +[titan] 2025-10-05 16:18:21,362 - root - INFO - lr: 1.3092e-05 gnorm: 1.11 [17:44:10< 6:45:40] +[titan] 2025-10-05 16:18:32,177 - root - INFO - step: 28965 loss: 2.0315 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 16:18:32,177 - root - INFO - lr: 1.3085e-05 gnorm: 1.15 [17:44:21< 6:45:29] +[titan] 2025-10-05 16:18:43,001 - root - INFO - step: 28970 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7932 +[titan] 2025-10-05 16:18:43,002 - root - INFO - lr: 1.3078e-05 gnorm: 1.12 [17:44:32< 6:45:18] +[titan] 2025-10-05 16:18:53,823 - root - INFO - step: 28975 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7847 +[titan] 2025-10-05 16:18:53,823 - root - INFO - lr: 1.3071e-05 gnorm: 1.15 [17:44:42< 6:45:07] +[titan] 2025-10-05 16:19:04,658 - root - INFO - step: 28980 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7964 +[titan] 2025-10-05 16:19:04,658 - root - INFO - lr: 1.3064e-05 gnorm: 1.09 [17:44:53< 6:44:56] +[titan] 2025-10-05 16:19:15,441 - root - INFO - step: 28985 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,389 tflops: 421.61 mfu: 42.63% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 16:19:15,441 - root - INFO - lr: 1.3057e-05 gnorm: 1.14 [17:45:04< 6:44:45] +[titan] 2025-10-05 16:19:26,267 - root - INFO - step: 28990 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:19:26,267 - root - INFO - lr: 1.3051e-05 gnorm: 1.12 [17:45:15< 6:44:34] +[titan] 2025-10-05 16:19:37,046 - root - INFO - step: 28995 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,400 tflops: 421.76 mfu: 42.65% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 16:19:37,046 - root - INFO - lr: 1.3044e-05 gnorm: 1.09 [17:45:26< 6:44:23] +[titan] 2025-10-05 16:19:45,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:19:47,884 - root - INFO - step: 29000 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 16:19:47,884 - root - INFO - lr: 1.3037e-05 gnorm: 1.10 [17:45:36< 6:44:11] +[titan] 2025-10-05 16:19:58,682 - root - INFO - step: 29005 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 16:19:58,682 - root - INFO - lr: 1.3030e-05 gnorm: 1.14 [17:45:47< 6:44:00] +[titan] 2025-10-05 16:20:09,482 - root - INFO - step: 29010 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 16:20:09,482 - root - INFO - lr: 1.3023e-05 gnorm: 1.08 [17:45:58< 6:43:49] +[titan] 2025-10-05 16:20:20,322 - root - INFO - step: 29015 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 16:20:20,323 - root - INFO - lr: 1.3016e-05 gnorm: 1.11 [17:46:09< 6:43:38] +[titan] 2025-10-05 16:20:31,122 - root - INFO - step: 29020 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 16:20:31,122 - root - INFO - lr: 1.3010e-05 gnorm: 1.14 [17:46:20< 6:43:27] +[titan] 2025-10-05 16:20:42,001 - root - INFO - step: 29025 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:20:42,001 - root - INFO - lr: 1.3003e-05 gnorm: 1.11 [17:46:31< 6:43:16] +[titan] 2025-10-05 16:20:52,862 - root - INFO - step: 29030 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 16:20:52,862 - root - INFO - lr: 1.2996e-05 gnorm: 1.17 [17:46:41< 6:43:05] +[titan] 2025-10-05 16:21:03,692 - root - INFO - step: 29035 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8113 +[titan] 2025-10-05 16:21:03,692 - root - INFO - lr: 1.2989e-05 gnorm: 1.19 [17:46:52< 6:42:54] +[titan] 2025-10-05 16:21:14,546 - root - INFO - step: 29040 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 16:21:14,546 - root - INFO - lr: 1.2982e-05 gnorm: 1.13 [17:47:03< 6:42:43] +[titan] 2025-10-05 16:21:25,382 - root - INFO - step: 29045 loss: 2.0710 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 16:21:25,382 - root - INFO - lr: 1.2975e-05 gnorm: 1.11 [17:47:14< 6:42:32] +[titan] 2025-10-05 16:21:34,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:21:36,183 - root - INFO - step: 29050 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7903 +[titan] 2025-10-05 16:21:36,183 - root - INFO - lr: 1.2969e-05 gnorm: 1.15 [17:47:25< 6:42:20] +[titan] 2025-10-05 16:21:47,040 - root - INFO - step: 29055 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 16:21:47,040 - root - INFO - lr: 1.2962e-05 gnorm: 1.13 [17:47:36< 6:42:09] +[titan] 2025-10-05 16:21:57,945 - root - INFO - step: 29060 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 16:21:57,945 - root - INFO - lr: 1.2955e-05 gnorm: 1.09 [17:47:47< 6:41:58] +[titan] 2025-10-05 16:22:08,763 - root - INFO - step: 29065 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:22:08,763 - root - INFO - lr: 1.2948e-05 gnorm: 1.10 [17:47:57< 6:41:47] +[titan] 2025-10-05 16:22:19,602 - root - INFO - step: 29070 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 16:22:19,603 - root - INFO - lr: 1.2941e-05 gnorm: 1.12 [17:48:08< 6:41:36] +[titan] 2025-10-05 16:22:30,424 - root - INFO - step: 29075 loss: 1.9436 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.11 mfu: 42.48% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 16:22:30,424 - root - INFO - lr: 1.2935e-05 gnorm: 1.08 [17:48:19< 6:41:25] +[titan] 2025-10-05 16:22:41,221 - root - INFO - step: 29080 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.05 mfu: 42.57% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 16:22:41,221 - root - INFO - lr: 1.2928e-05 gnorm: 1.12 [17:48:30< 6:41:14] +[titan] 2025-10-05 16:22:52,143 - root - INFO - step: 29085 loss: 2.0455 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 16:22:52,143 - root - INFO - lr: 1.2921e-05 gnorm: 1.13 [17:48:41< 6:41:03] +[titan] 2025-10-05 16:23:02,963 - root - INFO - step: 29090 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:23:02,964 - root - INFO - lr: 1.2914e-05 gnorm: 1.15 [17:48:52< 6:40:52] +[titan] 2025-10-05 16:23:13,781 - root - INFO - step: 29095 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7612 +[titan] 2025-10-05 16:23:13,781 - root - INFO - lr: 1.2907e-05 gnorm: 1.12 [17:49:02< 6:40:41] +[titan] 2025-10-05 16:23:22,389 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:23:24,561 - root - INFO - step: 29100 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 16:23:24,562 - root - INFO - lr: 1.2901e-05 gnorm: 1.15 [17:49:13< 6:40:30] +[titan] 2025-10-05 16:23:35,362 - root - INFO - step: 29105 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:23:35,363 - root - INFO - lr: 1.2894e-05 gnorm: 1.15 [17:49:24< 6:40:18] +[titan] 2025-10-05 16:23:46,147 - root - INFO - step: 29110 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,385 tflops: 421.54 mfu: 42.62% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 16:23:46,147 - root - INFO - lr: 1.2887e-05 gnorm: 1.13 [17:49:35< 6:40:07] +[titan] 2025-10-05 16:23:56,986 - root - INFO - step: 29115 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 16:23:56,986 - root - INFO - lr: 1.2880e-05 gnorm: 1.11 [17:49:46< 6:39:56] +[titan] 2025-10-05 16:24:07,804 - root - INFO - step: 29120 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 16:24:07,804 - root - INFO - lr: 1.2873e-05 gnorm: 1.09 [17:49:56< 6:39:45] +[titan] 2025-10-05 16:24:18,657 - root - INFO - step: 29125 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 16:24:18,657 - root - INFO - lr: 1.2867e-05 gnorm: 1.08 [17:50:07< 6:39:34] +[titan] 2025-10-05 16:24:29,461 - root - INFO - step: 29130 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 16:24:29,461 - root - INFO - lr: 1.2860e-05 gnorm: 1.11 [17:50:18< 6:39:23] +[titan] 2025-10-05 16:24:40,248 - root - INFO - step: 29135 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,379 tflops: 421.46 mfu: 42.61% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:24:40,248 - root - INFO - lr: 1.2853e-05 gnorm: 1.23 [17:50:29< 6:39:12] +[titan] 2025-10-05 16:24:51,066 - root - INFO - step: 29140 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 16:24:51,066 - root - INFO - lr: 1.2846e-05 gnorm: 1.11 [17:50:40< 6:39:01] +[titan] 2025-10-05 16:25:01,882 - root - INFO - step: 29145 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.33 mfu: 42.50% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 16:25:01,882 - root - INFO - lr: 1.2840e-05 gnorm: 1.14 [17:50:50< 6:38:50] +[titan] 2025-10-05 16:25:10,529 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:25:12,729 - root - INFO - step: 29150 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:12,730 - root - INFO - lr: 1.2833e-05 gnorm: 1.16 [17:51:01< 6:38:39] +[titan] 2025-10-05 16:25:23,552 - root - INFO - step: 29155 loss: 1.9771 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:25:23,552 - root - INFO - lr: 1.2826e-05 gnorm: 1.11 [17:51:12< 6:38:27] +[titan] 2025-10-05 16:25:34,364 - root - INFO - step: 29160 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:34,364 - root - INFO - lr: 1.2819e-05 gnorm: 1.13 [17:51:23< 6:38:16] +[titan] 2025-10-05 16:25:45,141 - root - INFO - step: 29165 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:25:45,141 - root - INFO - lr: 1.2813e-05 gnorm: 1.10 [17:51:34< 6:38:05] +[titan] 2025-10-05 16:25:55,942 - root - INFO - step: 29170 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 16:25:55,943 - root - INFO - lr: 1.2806e-05 gnorm: 1.12 [17:51:45< 6:37:54] +[titan] 2025-10-05 16:26:06,754 - root - INFO - step: 29175 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:26:06,754 - root - INFO - lr: 1.2799e-05 gnorm: 1.13 [17:51:55< 6:37:43] +[titan] 2025-10-05 16:26:17,565 - root - INFO - step: 29180 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,310 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 16:26:17,566 - root - INFO - lr: 1.2792e-05 gnorm: 1.11 [17:52:06< 6:37:32] +[titan] 2025-10-05 16:26:26,492 - root - INFO - Dumping profiler traces at step 29184 +[titan] 2025-10-05 16:26:26,532 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:26:28,703 - root - INFO - step: 29185 loss: 2.0239 memory: 118.84GiB(85.28%) tps: 29,423 tflops: 408.20 mfu: 41.27% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:26:28,703 - root - INFO - lr: 1.2786e-05 gnorm: 1.13 [17:52:17< 6:37:21] +[titan] 2025-10-05 16:26:39,480 - root - INFO - step: 29190 loss: 2.0459 memory: 118.84GiB(85.28%) tps: 30,405 tflops: 421.82 mfu: 42.65% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 16:26:39,480 - root - INFO - lr: 1.2779e-05 gnorm: 1.08 [17:52:28< 6:37:10] +[titan] 2025-10-05 16:26:50,281 - root - INFO - step: 29195 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 16:26:50,282 - root - INFO - lr: 1.2772e-05 gnorm: 1.11 [17:52:39< 6:36:59] +[titan] 2025-10-05 16:26:58,915 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:27:01,083 - root - INFO - step: 29200 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:27:01,084 - root - INFO - lr: 1.2765e-05 gnorm: 1.10 [17:52:50< 6:36:48] +[titan] 2025-10-05 16:27:11,900 - root - INFO - step: 29205 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,295 tflops: 420.29 mfu: 42.50% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:27:11,900 - root - INFO - lr: 1.2759e-05 gnorm: 1.11 [17:53:00< 6:36:37] +[titan] 2025-10-05 16:27:22,704 - root - INFO - step: 29210 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:27:22,704 - root - INFO - lr: 1.2752e-05 gnorm: 1.13 [17:53:11< 6:36:25] +[titan] 2025-10-05 16:27:33,520 - root - INFO - step: 29215 loss: 1.9806 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 16:27:33,520 - root - INFO - lr: 1.2745e-05 gnorm: 1.13 [17:53:22< 6:36:14] +[titan] 2025-10-05 16:27:44,343 - root - INFO - step: 29220 loss: 2.0330 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:27:44,344 - root - INFO - lr: 1.2738e-05 gnorm: 1.11 [17:53:33< 6:36:03] +[titan] 2025-10-05 16:27:55,246 - root - INFO - step: 29225 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 16:27:55,246 - root - INFO - lr: 1.2732e-05 gnorm: 1.14 [17:53:44< 6:35:52] +[titan] 2025-10-05 16:28:06,063 - root - INFO - step: 29230 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 16:28:06,064 - root - INFO - lr: 1.2725e-05 gnorm: 1.10 [17:53:55< 6:35:41] +[titan] 2025-10-05 16:28:16,881 - root - INFO - step: 29235 loss: 1.9977 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7668 +[titan] 2025-10-05 16:28:16,882 - root - INFO - lr: 1.2718e-05 gnorm: 1.12 [17:54:05< 6:35:30] +[titan] 2025-10-05 16:28:27,741 - root - INFO - step: 29240 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 16:28:27,741 - root - INFO - lr: 1.2711e-05 gnorm: 1.12 [17:54:16< 6:35:19] +[titan] 2025-10-05 16:28:38,608 - root - INFO - step: 29245 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 16:28:38,608 - root - INFO - lr: 1.2705e-05 gnorm: 1.14 [17:54:27< 6:35:08] +[titan] 2025-10-05 16:28:47,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:28:49,481 - root - INFO - step: 29250 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7535 +[titan] 2025-10-05 16:28:49,482 - root - INFO - lr: 1.2698e-05 gnorm: 1.12 [17:54:38< 6:34:57] +[titan] 2025-10-05 16:29:00,345 - root - INFO - step: 29255 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 16:29:00,345 - root - INFO - lr: 1.2691e-05 gnorm: 1.13 [17:54:49< 6:34:46] +[titan] 2025-10-05 16:29:11,181 - root - INFO - step: 29260 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 16:29:11,181 - root - INFO - lr: 1.2684e-05 gnorm: 1.10 [17:55:00< 6:34:35] +[titan] 2025-10-05 16:29:22,010 - root - INFO - step: 29265 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 16:29:22,010 - root - INFO - lr: 1.2678e-05 gnorm: 1.10 [17:55:11< 6:34:23] +[titan] 2025-10-05 16:29:32,844 - root - INFO - step: 29270 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8340 +[titan] 2025-10-05 16:29:32,845 - root - INFO - lr: 1.2671e-05 gnorm: 1.14 [17:55:21< 6:34:12] +[titan] 2025-10-05 16:29:43,662 - root - INFO - step: 29275 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 16:29:43,662 - root - INFO - lr: 1.2664e-05 gnorm: 1.10 [17:55:32< 6:34:01] +[titan] 2025-10-05 16:29:54,552 - root - INFO - step: 29280 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 16:29:54,553 - root - INFO - lr: 1.2658e-05 gnorm: 1.15 [17:55:43< 6:33:50] +[titan] 2025-10-05 16:30:05,442 - root - INFO - step: 29285 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:30:05,442 - root - INFO - lr: 1.2651e-05 gnorm: 1.15 [17:55:54< 6:33:39] +[titan] 2025-10-05 16:30:16,285 - root - INFO - step: 29290 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:30:16,285 - root - INFO - lr: 1.2644e-05 gnorm: 1.13 [17:56:05< 6:33:28] +[titan] 2025-10-05 16:30:27,122 - root - INFO - step: 29295 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 16:30:27,122 - root - INFO - lr: 1.2638e-05 gnorm: 1.16 [17:56:16< 6:33:17] +[titan] 2025-10-05 16:30:35,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:30:37,974 - root - INFO - step: 29300 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:30:37,974 - root - INFO - lr: 1.2631e-05 gnorm: 1.13 [17:56:27< 6:33:06] +[titan] 2025-10-05 16:30:48,835 - root - INFO - step: 29305 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 16:30:48,835 - root - INFO - lr: 1.2624e-05 gnorm: 1.12 [17:56:37< 6:32:55] +[titan] 2025-10-05 16:30:59,735 - root - INFO - step: 29310 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 16:30:59,736 - root - INFO - lr: 1.2617e-05 gnorm: 1.16 [17:56:48< 6:32:44] +[titan] 2025-10-05 16:31:10,585 - root - INFO - step: 29315 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 16:31:10,585 - root - INFO - lr: 1.2611e-05 gnorm: 1.11 [17:56:59< 6:32:33] +[titan] 2025-10-05 16:31:21,451 - root - INFO - step: 29320 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:31:21,452 - root - INFO - lr: 1.2604e-05 gnorm: 1.14 [17:57:10< 6:32:22] +[titan] 2025-10-05 16:31:32,282 - root - INFO - step: 29325 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 16:31:32,283 - root - INFO - lr: 1.2597e-05 gnorm: 1.08 [17:57:21< 6:32:10] +[titan] 2025-10-05 16:31:43,142 - root - INFO - step: 29330 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7937 +[titan] 2025-10-05 16:31:43,143 - root - INFO - lr: 1.2591e-05 gnorm: 1.15 [17:57:32< 6:31:59] +[titan] 2025-10-05 16:31:54,012 - root - INFO - step: 29335 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 16:31:54,012 - root - INFO - lr: 1.2584e-05 gnorm: 1.12 [17:57:43< 6:31:48] +[titan] 2025-10-05 16:32:04,880 - root - INFO - step: 29340 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 16:32:04,880 - root - INFO - lr: 1.2577e-05 gnorm: 1.18 [17:57:53< 6:31:37] +[titan] 2025-10-05 16:32:15,774 - root - INFO - step: 29345 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 16:32:15,774 - root - INFO - lr: 1.2571e-05 gnorm: 1.14 [17:58:04< 6:31:26] +[titan] 2025-10-05 16:32:24,447 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:32:26,631 - root - INFO - step: 29350 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 16:32:26,632 - root - INFO - lr: 1.2564e-05 gnorm: 1.11 [17:58:15< 6:31:15] +[titan] 2025-10-05 16:32:37,480 - root - INFO - step: 29355 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:32:37,480 - root - INFO - lr: 1.2557e-05 gnorm: 1.10 [17:58:26< 6:31:04] +[titan] 2025-10-05 16:32:48,323 - root - INFO - step: 29360 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:32:48,323 - root - INFO - lr: 1.2551e-05 gnorm: 1.14 [17:58:37< 6:30:53] +[titan] 2025-10-05 16:32:59,199 - root - INFO - step: 29365 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 16:32:59,199 - root - INFO - lr: 1.2544e-05 gnorm: 1.13 [17:58:48< 6:30:42] +[titan] 2025-10-05 16:33:10,048 - root - INFO - step: 29370 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 16:33:10,048 - root - INFO - lr: 1.2537e-05 gnorm: 1.12 [17:58:59< 6:30:31] +[titan] 2025-10-05 16:33:20,934 - root - INFO - step: 29375 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 16:33:20,934 - root - INFO - lr: 1.2531e-05 gnorm: 1.15 [17:59:09< 6:30:20] +[titan] 2025-10-05 16:33:31,794 - root - INFO - step: 29380 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:33:31,794 - root - INFO - lr: 1.2524e-05 gnorm: 1.11 [17:59:20< 6:30:09] +[titan] 2025-10-05 16:33:42,652 - root - INFO - step: 29385 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7647 +[titan] 2025-10-05 16:33:42,652 - root - INFO - lr: 1.2517e-05 gnorm: 1.13 [17:59:31< 6:29:58] +[titan] 2025-10-05 16:33:53,484 - root - INFO - step: 29390 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:33:53,484 - root - INFO - lr: 1.2511e-05 gnorm: 1.15 [17:59:42< 6:29:46] +[titan] 2025-10-05 16:34:04,355 - root - INFO - step: 29395 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 16:34:04,355 - root - INFO - lr: 1.2504e-05 gnorm: 1.11 [17:59:53< 6:29:35] +[titan] 2025-10-05 16:34:13,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:34:15,217 - root - INFO - step: 29400 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 16:34:15,218 - root - INFO - lr: 1.2497e-05 gnorm: 1.12 [18:00:04< 6:29:24] +[titan] 2025-10-05 16:34:26,084 - root - INFO - step: 29405 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 16:34:26,084 - root - INFO - lr: 1.2491e-05 gnorm: 1.13 [18:00:15< 6:29:13] +[titan] 2025-10-05 16:34:36,985 - root - INFO - step: 29410 loss: 1.9746 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:34:36,985 - root - INFO - lr: 1.2484e-05 gnorm: 1.14 [18:00:26< 6:29:02] +[titan] 2025-10-05 16:34:47,862 - root - INFO - step: 29415 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 16:34:47,862 - root - INFO - lr: 1.2477e-05 gnorm: 1.14 [18:00:36< 6:28:51] +[titan] 2025-10-05 16:34:58,716 - root - INFO - step: 29420 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:34:58,716 - root - INFO - lr: 1.2471e-05 gnorm: 1.10 [18:00:47< 6:28:40] +[titan] 2025-10-05 16:35:09,612 - root - INFO - step: 29425 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 16:35:09,613 - root - INFO - lr: 1.2464e-05 gnorm: 1.13 [18:00:58< 6:28:29] +[titan] 2025-10-05 16:35:20,487 - root - INFO - step: 29430 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7675 +[titan] 2025-10-05 16:35:20,488 - root - INFO - lr: 1.2457e-05 gnorm: 1.12 [18:01:09< 6:28:18] +[titan] 2025-10-05 16:35:31,364 - root - INFO - step: 29435 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:35:31,364 - root - INFO - lr: 1.2451e-05 gnorm: 1.13 [18:01:20< 6:28:07] +[titan] 2025-10-05 16:35:42,266 - root - INFO - step: 29440 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 16:35:42,266 - root - INFO - lr: 1.2444e-05 gnorm: 1.13 [18:01:31< 6:27:56] +[titan] 2025-10-05 16:35:53,139 - root - INFO - step: 29445 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 16:35:53,139 - root - INFO - lr: 1.2438e-05 gnorm: 1.10 [18:01:42< 6:27:45] +[titan] 2025-10-05 16:36:01,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:36:04,036 - root - INFO - step: 29450 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:36:04,036 - root - INFO - lr: 1.2431e-05 gnorm: 1.10 [18:01:53< 6:27:34] +[titan] 2025-10-05 16:36:14,913 - root - INFO - step: 29455 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 16:36:14,913 - root - INFO - lr: 1.2424e-05 gnorm: 1.13 [18:02:03< 6:27:23] +[titan] 2025-10-05 16:36:25,795 - root - INFO - step: 29460 loss: 2.0213 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7869 +[titan] 2025-10-05 16:36:25,795 - root - INFO - lr: 1.2418e-05 gnorm: 1.13 [18:02:14< 6:27:11] +[titan] 2025-10-05 16:36:36,668 - root - INFO - step: 29465 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 16:36:36,669 - root - INFO - lr: 1.2411e-05 gnorm: 1.14 [18:02:25< 6:27:00] +[titan] 2025-10-05 16:36:47,594 - root - INFO - step: 29470 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 16:36:47,594 - root - INFO - lr: 1.2404e-05 gnorm: 1.17 [18:02:36< 6:26:49] +[titan] 2025-10-05 16:36:58,488 - root - INFO - step: 29475 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8272 +[titan] 2025-10-05 16:36:58,488 - root - INFO - lr: 1.2398e-05 gnorm: 1.14 [18:02:47< 6:26:38] +[titan] 2025-10-05 16:37:09,396 - root - INFO - step: 29480 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 16:37:09,396 - root - INFO - lr: 1.2391e-05 gnorm: 1.10 [18:02:58< 6:26:27] +[titan] 2025-10-05 16:37:20,276 - root - INFO - step: 29485 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7523 +[titan] 2025-10-05 16:37:20,276 - root - INFO - lr: 1.2385e-05 gnorm: 1.14 [18:03:09< 6:26:16] +[titan] 2025-10-05 16:37:31,149 - root - INFO - step: 29490 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:37:31,149 - root - INFO - lr: 1.2378e-05 gnorm: 1.18 [18:03:20< 6:26:05] +[titan] 2025-10-05 16:37:42,032 - root - INFO - step: 29495 loss: 1.9702 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:37:42,032 - root - INFO - lr: 1.2371e-05 gnorm: 1.12 [18:03:31< 6:25:54] +[titan] 2025-10-05 16:37:50,726 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:37:52,909 - root - INFO - step: 29500 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 16:37:52,910 - root - INFO - lr: 1.2365e-05 gnorm: 1.18 [18:03:41< 6:25:43] +[titan] 2025-10-05 16:38:03,862 - root - INFO - step: 29505 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7779 +[titan] 2025-10-05 16:38:03,862 - root - INFO - lr: 1.2358e-05 gnorm: 1.08 [18:03:52< 6:25:32] +[titan] 2025-10-05 16:38:14,737 - root - INFO - step: 29510 loss: 2.0280 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 16:38:14,738 - root - INFO - lr: 1.2352e-05 gnorm: 1.12 [18:04:03< 6:25:21] +[titan] 2025-10-05 16:38:25,629 - root - INFO - step: 29515 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 16:38:25,629 - root - INFO - lr: 1.2345e-05 gnorm: 1.10 [18:04:14< 6:25:10] +[titan] 2025-10-05 16:38:36,496 - root - INFO - step: 29520 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 16:38:36,497 - root - INFO - lr: 1.2338e-05 gnorm: 1.14 [18:04:25< 6:24:59] +[titan] 2025-10-05 16:38:47,375 - root - INFO - step: 29525 loss: 2.0360 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 16:38:47,375 - root - INFO - lr: 1.2332e-05 gnorm: 1.12 [18:04:36< 6:24:48] +[titan] 2025-10-05 16:38:58,269 - root - INFO - step: 29530 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 16:38:58,270 - root - INFO - lr: 1.2325e-05 gnorm: 1.14 [18:04:47< 6:24:37] +[titan] 2025-10-05 16:39:09,198 - root - INFO - step: 29535 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:39:09,198 - root - INFO - lr: 1.2319e-05 gnorm: 1.14 [18:04:58< 6:24:25] +[titan] 2025-10-05 16:39:20,067 - root - INFO - step: 29540 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7690 +[titan] 2025-10-05 16:39:20,068 - root - INFO - lr: 1.2312e-05 gnorm: 1.11 [18:05:09< 6:24:14] +[titan] 2025-10-05 16:39:30,927 - root - INFO - step: 29545 loss: 1.9548 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:39:30,927 - root - INFO - lr: 1.2305e-05 gnorm: 1.08 [18:05:19< 6:24:03] +[titan] 2025-10-05 16:39:39,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:39:41,783 - root - INFO - step: 29550 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 16:39:41,783 - root - INFO - lr: 1.2299e-05 gnorm: 1.16 [18:05:30< 6:23:52] +[titan] 2025-10-05 16:39:52,647 - root - INFO - step: 29555 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:39:52,647 - root - INFO - lr: 1.2292e-05 gnorm: 1.11 [18:05:41< 6:23:41] +[titan] 2025-10-05 16:40:03,511 - root - INFO - step: 29560 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 16:40:03,511 - root - INFO - lr: 1.2286e-05 gnorm: 1.11 [18:05:52< 6:23:30] +[titan] 2025-10-05 16:40:14,393 - root - INFO - step: 29565 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 16:40:14,393 - root - INFO - lr: 1.2279e-05 gnorm: 1.09 [18:06:03< 6:23:19] +[titan] 2025-10-05 16:40:25,289 - root - INFO - step: 29570 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 16:40:25,289 - root - INFO - lr: 1.2273e-05 gnorm: 1.15 [18:06:14< 6:23:08] +[titan] 2025-10-05 16:40:36,151 - root - INFO - step: 29575 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 16:40:36,151 - root - INFO - lr: 1.2266e-05 gnorm: 1.12 [18:06:25< 6:22:57] +[titan] 2025-10-05 16:40:47,014 - root - INFO - step: 29580 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:40:47,014 - root - INFO - lr: 1.2259e-05 gnorm: 1.15 [18:06:36< 6:22:46] +[titan] 2025-10-05 16:40:57,884 - root - INFO - step: 29585 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7835 +[titan] 2025-10-05 16:40:57,884 - root - INFO - lr: 1.2253e-05 gnorm: 1.13 [18:06:46< 6:22:35] +[titan] 2025-10-05 16:41:08,765 - root - INFO - step: 29590 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 16:41:08,765 - root - INFO - lr: 1.2246e-05 gnorm: 1.12 [18:06:57< 6:22:24] +[titan] 2025-10-05 16:41:19,628 - root - INFO - step: 29595 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:41:19,628 - root - INFO - lr: 1.2240e-05 gnorm: 1.14 [18:07:08< 6:22:13] +[titan] 2025-10-05 16:41:28,344 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:41:30,522 - root - INFO - step: 29600 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 16:41:30,522 - root - INFO - lr: 1.2233e-05 gnorm: 1.11 [18:07:19< 6:22:02] +[titan] 2025-10-05 16:41:41,389 - root - INFO - step: 29605 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 16:41:41,389 - root - INFO - lr: 1.2227e-05 gnorm: 1.11 [18:07:30< 6:21:50] +[titan] 2025-10-05 16:41:52,245 - root - INFO - step: 29610 loss: 1.9448 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 16:41:52,245 - root - INFO - lr: 1.2220e-05 gnorm: 1.09 [18:07:41< 6:21:39] +[titan] 2025-10-05 16:42:03,126 - root - INFO - step: 29615 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8316 +[titan] 2025-10-05 16:42:03,126 - root - INFO - lr: 1.2214e-05 gnorm: 1.15 [18:07:52< 6:21:28] +[titan] 2025-10-05 16:42:13,989 - root - INFO - step: 29620 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7570 +[titan] 2025-10-05 16:42:13,989 - root - INFO - lr: 1.2207e-05 gnorm: 1.13 [18:08:03< 6:21:17] +[titan] 2025-10-05 16:42:24,845 - root - INFO - step: 29625 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 16:42:24,845 - root - INFO - lr: 1.2200e-05 gnorm: 1.11 [18:08:13< 6:21:06] +[titan] 2025-10-05 16:42:35,740 - root - INFO - step: 29630 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 16:42:35,740 - root - INFO - lr: 1.2194e-05 gnorm: 1.16 [18:08:24< 6:20:55] +[titan] 2025-10-05 16:42:46,609 - root - INFO - step: 29635 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 16:42:46,609 - root - INFO - lr: 1.2187e-05 gnorm: 1.13 [18:08:35< 6:20:44] +[titan] 2025-10-05 16:42:57,451 - root - INFO - step: 29640 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:42:57,451 - root - INFO - lr: 1.2181e-05 gnorm: 1.11 [18:08:46< 6:20:33] +[titan] 2025-10-05 16:43:08,337 - root - INFO - step: 29645 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 16:43:08,337 - root - INFO - lr: 1.2174e-05 gnorm: 1.10 [18:08:57< 6:20:22] +[titan] 2025-10-05 16:43:17,012 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:43:19,192 - root - INFO - step: 29650 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7561 +[titan] 2025-10-05 16:43:19,192 - root - INFO - lr: 1.2168e-05 gnorm: 1.14 [18:09:08< 6:20:11] +[titan] 2025-10-05 16:43:30,040 - root - INFO - step: 29655 loss: 1.9877 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 16:43:30,040 - root - INFO - lr: 1.2161e-05 gnorm: 1.13 [18:09:19< 6:20:00] +[titan] 2025-10-05 16:43:40,896 - root - INFO - step: 29660 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:43:40,896 - root - INFO - lr: 1.2155e-05 gnorm: 1.16 [18:09:29< 6:19:49] +[titan] 2025-10-05 16:43:51,775 - root - INFO - step: 29665 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 16:43:51,776 - root - INFO - lr: 1.2148e-05 gnorm: 1.12 [18:09:40< 6:19:38] +[titan] 2025-10-05 16:44:02,650 - root - INFO - step: 29670 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7764 +[titan] 2025-10-05 16:44:02,651 - root - INFO - lr: 1.2142e-05 gnorm: 1.12 [18:09:51< 6:19:26] +[titan] 2025-10-05 16:44:13,541 - root - INFO - step: 29675 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 16:44:13,542 - root - INFO - lr: 1.2135e-05 gnorm: 1.12 [18:10:02< 6:19:15] +[titan] 2025-10-05 16:44:24,406 - root - INFO - step: 29680 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:44:24,406 - root - INFO - lr: 1.2129e-05 gnorm: 1.10 [18:10:13< 6:19:04] +[titan] 2025-10-05 16:44:35,270 - root - INFO - step: 29685 loss: 2.0294 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 16:44:35,270 - root - INFO - lr: 1.2122e-05 gnorm: 1.14 [18:10:24< 6:18:53] +[titan] 2025-10-05 16:44:46,146 - root - INFO - step: 29690 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:44:46,146 - root - INFO - lr: 1.2116e-05 gnorm: 1.14 [18:10:35< 6:18:42] +[titan] 2025-10-05 16:44:57,137 - root - INFO - step: 29695 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:44:57,138 - root - INFO - lr: 1.2109e-05 gnorm: 1.16 [18:10:46< 6:18:31] +[titan] 2025-10-05 16:44:59,500 - root - INFO - Dumping profiler traces at step 29696 +[titan] 2025-10-05 16:44:59,541 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:45:06,058 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:45:08,245 - root - INFO - step: 29700 loss: 2.0615 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.29 mfu: 41.38% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8226 +[titan] 2025-10-05 16:45:08,245 - root - INFO - lr: 1.2103e-05 gnorm: 1.15 [18:10:57< 6:18:20] +[titan] 2025-10-05 16:45:19,144 - root - INFO - step: 29705 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 16:45:19,144 - root - INFO - lr: 1.2096e-05 gnorm: 1.11 [18:11:08< 6:18:09] +[titan] 2025-10-05 16:45:30,018 - root - INFO - step: 29710 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 16:45:30,019 - root - INFO - lr: 1.2090e-05 gnorm: 1.15 [18:11:19< 6:17:58] +[titan] 2025-10-05 16:45:40,886 - root - INFO - step: 29715 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:45:40,886 - root - INFO - lr: 1.2083e-05 gnorm: 1.09 [18:11:29< 6:17:47] +[titan] 2025-10-05 16:45:51,774 - root - INFO - step: 29720 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8207 +[titan] 2025-10-05 16:45:51,775 - root - INFO - lr: 1.2077e-05 gnorm: 1.13 [18:11:40< 6:17:36] +[titan] 2025-10-05 16:46:02,667 - root - INFO - step: 29725 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 16:46:02,667 - root - INFO - lr: 1.2070e-05 gnorm: 1.11 [18:11:51< 6:17:25] +[titan] 2025-10-05 16:46:13,605 - root - INFO - step: 29730 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 16:46:13,605 - root - INFO - lr: 1.2064e-05 gnorm: 1.10 [18:12:02< 6:17:14] +[titan] 2025-10-05 16:46:24,504 - root - INFO - step: 29735 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 16:46:24,505 - root - INFO - lr: 1.2057e-05 gnorm: 1.14 [18:12:13< 6:17:03] +[titan] 2025-10-05 16:46:35,396 - root - INFO - step: 29740 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 16:46:35,397 - root - INFO - lr: 1.2051e-05 gnorm: 1.16 [18:12:24< 6:16:52] +[titan] 2025-10-05 16:46:46,263 - root - INFO - step: 29745 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:46:46,263 - root - INFO - lr: 1.2044e-05 gnorm: 1.14 [18:12:35< 6:16:41] +[titan] 2025-10-05 16:46:54,956 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:46:57,142 - root - INFO - step: 29750 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:46:57,142 - root - INFO - lr: 1.2038e-05 gnorm: 1.14 [18:12:46< 6:16:30] +[titan] 2025-10-05 16:47:08,011 - root - INFO - step: 29755 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 16:47:08,011 - root - INFO - lr: 1.2031e-05 gnorm: 1.14 [18:12:57< 6:16:18] +[titan] 2025-10-05 16:47:18,928 - root - INFO - step: 29760 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 16:47:18,929 - root - INFO - lr: 1.2025e-05 gnorm: 1.15 [18:13:07< 6:16:07] +[titan] 2025-10-05 16:47:29,805 - root - INFO - step: 29765 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 16:47:29,806 - root - INFO - lr: 1.2018e-05 gnorm: 1.11 [18:13:18< 6:15:56] +[titan] 2025-10-05 16:47:40,695 - root - INFO - step: 29770 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 16:47:40,695 - root - INFO - lr: 1.2012e-05 gnorm: 1.12 [18:13:29< 6:15:45] +[titan] 2025-10-05 16:47:51,568 - root - INFO - step: 29775 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 16:47:51,568 - root - INFO - lr: 1.2005e-05 gnorm: 1.13 [18:13:40< 6:15:34] +[titan] 2025-10-05 16:48:02,434 - root - INFO - step: 29780 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 16:48:02,435 - root - INFO - lr: 1.1999e-05 gnorm: 1.13 [18:13:51< 6:15:23] +[titan] 2025-10-05 16:48:13,326 - root - INFO - step: 29785 loss: 2.0923 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 16:48:13,326 - root - INFO - lr: 1.1992e-05 gnorm: 1.17 [18:14:02< 6:15:12] +[titan] 2025-10-05 16:48:24,246 - root - INFO - step: 29790 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 16:48:24,246 - root - INFO - lr: 1.1986e-05 gnorm: 1.21 [18:14:13< 6:15:01] +[titan] 2025-10-05 16:48:35,115 - root - INFO - step: 29795 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7438 +[titan] 2025-10-05 16:48:35,115 - root - INFO - lr: 1.1979e-05 gnorm: 1.16 [18:14:24< 6:14:50] +[titan] 2025-10-05 16:48:43,808 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:48:45,984 - root - INFO - step: 29800 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:48:45,984 - root - INFO - lr: 1.1973e-05 gnorm: 1.17 [18:14:34< 6:14:39] +[titan] 2025-10-05 16:48:56,850 - root - INFO - step: 29805 loss: 2.0467 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 16:48:56,850 - root - INFO - lr: 1.1966e-05 gnorm: 1.13 [18:14:45< 6:14:28] +[titan] 2025-10-05 16:49:07,720 - root - INFO - step: 29810 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 16:49:07,720 - root - INFO - lr: 1.1960e-05 gnorm: 1.14 [18:14:56< 6:14:17] +[titan] 2025-10-05 16:49:18,594 - root - INFO - step: 29815 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 16:49:18,594 - root - INFO - lr: 1.1954e-05 gnorm: 1.11 [18:15:07< 6:14:06] +[titan] 2025-10-05 16:49:29,475 - root - INFO - step: 29820 loss: 2.0086 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7751 +[titan] 2025-10-05 16:49:29,475 - root - INFO - lr: 1.1947e-05 gnorm: 1.16 [18:15:18< 6:13:55] +[titan] 2025-10-05 16:49:40,387 - root - INFO - step: 29825 loss: 1.9867 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7563 +[titan] 2025-10-05 16:49:40,388 - root - INFO - lr: 1.1941e-05 gnorm: 1.10 [18:15:29< 6:13:44] +[titan] 2025-10-05 16:49:51,279 - root - INFO - step: 29830 loss: 1.9675 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 16:49:51,279 - root - INFO - lr: 1.1934e-05 gnorm: 1.09 [18:15:40< 6:13:32] +[titan] 2025-10-05 16:50:02,138 - root - INFO - step: 29835 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7622 +[titan] 2025-10-05 16:50:02,138 - root - INFO - lr: 1.1928e-05 gnorm: 1.12 [18:15:51< 6:13:21] +[titan] 2025-10-05 16:50:13,006 - root - INFO - step: 29840 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 16:50:13,006 - root - INFO - lr: 1.1921e-05 gnorm: 1.13 [18:16:02< 6:13:10] +[titan] 2025-10-05 16:50:23,932 - root - INFO - step: 29845 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6997 +[titan] 2025-10-05 16:50:23,933 - root - INFO - lr: 1.1915e-05 gnorm: 1.10 [18:16:12< 6:12:59] +[titan] 2025-10-05 16:50:32,609 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:50:34,782 - root - INFO - step: 29850 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8188 +[titan] 2025-10-05 16:50:34,782 - root - INFO - lr: 1.1908e-05 gnorm: 1.18 [18:16:23< 6:12:48] +[titan] 2025-10-05 16:50:45,679 - root - INFO - step: 29855 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7933 +[titan] 2025-10-05 16:50:45,680 - root - INFO - lr: 1.1902e-05 gnorm: 1.17 [18:16:34< 6:12:37] +[titan] 2025-10-05 16:50:56,541 - root - INFO - step: 29860 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:50:56,541 - root - INFO - lr: 1.1896e-05 gnorm: 1.11 [18:16:45< 6:12:26] +[titan] 2025-10-05 16:51:07,402 - root - INFO - step: 29865 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 16:51:07,402 - root - INFO - lr: 1.1889e-05 gnorm: 1.18 [18:16:56< 6:12:15] +[titan] 2025-10-05 16:51:18,320 - root - INFO - step: 29870 loss: 1.9395 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 16:51:18,320 - root - INFO - lr: 1.1883e-05 gnorm: 1.13 [18:17:07< 6:12:04] +[titan] 2025-10-05 16:51:29,178 - root - INFO - step: 29875 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 16:51:29,178 - root - INFO - lr: 1.1876e-05 gnorm: 1.13 [18:17:18< 6:11:53] +[titan] 2025-10-05 16:51:40,033 - root - INFO - step: 29880 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 16:51:40,033 - root - INFO - lr: 1.1870e-05 gnorm: 1.12 [18:17:29< 6:11:42] +[titan] 2025-10-05 16:51:50,881 - root - INFO - step: 29885 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 16:51:50,881 - root - INFO - lr: 1.1863e-05 gnorm: 1.10 [18:17:39< 6:11:31] +[titan] 2025-10-05 16:52:01,762 - root - INFO - step: 29890 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 16:52:01,762 - root - INFO - lr: 1.1857e-05 gnorm: 1.15 [18:17:50< 6:11:20] +[titan] 2025-10-05 16:52:12,608 - root - INFO - step: 29895 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 16:52:12,608 - root - INFO - lr: 1.1851e-05 gnorm: 1.13 [18:18:01< 6:11:09] +[titan] 2025-10-05 16:52:21,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:52:23,480 - root - INFO - step: 29900 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 16:52:23,481 - root - INFO - lr: 1.1844e-05 gnorm: 1.13 [18:18:12< 6:10:57] +[titan] 2025-10-05 16:52:34,301 - root - INFO - step: 29905 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 16:52:34,301 - root - INFO - lr: 1.1838e-05 gnorm: 1.15 [18:18:23< 6:10:46] +[titan] 2025-10-05 16:52:45,148 - root - INFO - step: 29910 loss: 1.9512 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 16:52:45,148 - root - INFO - lr: 1.1831e-05 gnorm: 1.11 [18:18:34< 6:10:35] +[titan] 2025-10-05 16:52:55,998 - root - INFO - step: 29915 loss: 2.0610 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8211 +[titan] 2025-10-05 16:52:55,998 - root - INFO - lr: 1.1825e-05 gnorm: 1.13 [18:18:44< 6:10:24] +[titan] 2025-10-05 16:53:06,867 - root - INFO - step: 29920 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 16:53:06,867 - root - INFO - lr: 1.1819e-05 gnorm: 1.12 [18:18:55< 6:10:13] +[titan] 2025-10-05 16:53:17,736 - root - INFO - step: 29925 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 16:53:17,736 - root - INFO - lr: 1.1812e-05 gnorm: 1.12 [18:19:06< 6:10:02] +[titan] 2025-10-05 16:53:28,570 - root - INFO - step: 29930 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7831 +[titan] 2025-10-05 16:53:28,570 - root - INFO - lr: 1.1806e-05 gnorm: 1.12 [18:19:17< 6:09:51] +[titan] 2025-10-05 16:53:39,418 - root - INFO - step: 29935 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 16:53:39,418 - root - INFO - lr: 1.1799e-05 gnorm: 1.25 [18:19:28< 6:09:40] +[titan] 2025-10-05 16:53:50,272 - root - INFO - step: 29940 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 16:53:50,272 - root - INFO - lr: 1.1793e-05 gnorm: 1.12 [18:19:39< 6:09:29] +[titan] 2025-10-05 16:54:01,117 - root - INFO - step: 29945 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:54:01,117 - root - INFO - lr: 1.1787e-05 gnorm: 1.14 [18:19:50< 6:09:18] +[titan] 2025-10-05 16:54:09,772 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:54:12,029 - root - INFO - step: 29950 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 16:54:12,030 - root - INFO - lr: 1.1780e-05 gnorm: 1.18 [18:20:01< 6:09:07] +[titan] 2025-10-05 16:54:22,840 - root - INFO - step: 29955 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 16:54:22,840 - root - INFO - lr: 1.1774e-05 gnorm: 1.14 [18:20:11< 6:08:56] +[titan] 2025-10-05 16:54:33,694 - root - INFO - step: 29960 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 16:54:33,694 - root - INFO - lr: 1.1767e-05 gnorm: 1.14 [18:20:22< 6:08:45] +[titan] 2025-10-05 16:54:44,540 - root - INFO - step: 29965 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 16:54:44,540 - root - INFO - lr: 1.1761e-05 gnorm: 1.14 [18:20:33< 6:08:34] +[titan] 2025-10-05 16:54:55,380 - root - INFO - step: 29970 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 16:54:55,380 - root - INFO - lr: 1.1755e-05 gnorm: 1.13 [18:20:44< 6:08:22] +[titan] 2025-10-05 16:55:06,200 - root - INFO - step: 29975 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7975 +[titan] 2025-10-05 16:55:06,200 - root - INFO - lr: 1.1748e-05 gnorm: 1.16 [18:20:55< 6:08:11] +[titan] 2025-10-05 16:55:17,035 - root - INFO - step: 29980 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 16:55:17,035 - root - INFO - lr: 1.1742e-05 gnorm: 1.16 [18:21:06< 6:08:00] +[titan] 2025-10-05 16:55:27,861 - root - INFO - step: 29985 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 16:55:27,861 - root - INFO - lr: 1.1736e-05 gnorm: 1.11 [18:21:16< 6:07:49] +[titan] 2025-10-05 16:55:38,685 - root - INFO - step: 29990 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 16:55:38,685 - root - INFO - lr: 1.1729e-05 gnorm: 1.08 [18:21:27< 6:07:38] +[titan] 2025-10-05 16:55:49,531 - root - INFO - step: 29995 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 16:55:49,531 - root - INFO - lr: 1.1723e-05 gnorm: 1.11 [18:21:38< 6:07:27] +[titan] 2025-10-05 16:55:58,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:56:00,346 - root - INFO - step: 30000 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 16:56:00,346 - root - INFO - lr: 1.1716e-05 gnorm: 1.14 [18:21:49< 6:07:16] +[titan] 2025-10-05 16:56:00,346 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 16:56:17,644 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 16:56:17,644 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.30 seconds. +[titan] 2025-10-05 16:58:26,179 - root - INFO - step: 30005 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 2,247 tflops: 31.17 mfu: 3.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 16:58:26,179 - root - INFO - lr: 1.1710e-05 gnorm: 1.15 [18:24:15< 6:07:50] +[titan] 2025-10-05 16:58:36,943 - root - INFO - step: 30010 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,443 tflops: 422.35 mfu: 42.70% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 16:58:36,943 - root - INFO - lr: 1.1704e-05 gnorm: 1.13 [18:24:25< 6:07:39] +[titan] 2025-10-05 16:58:47,757 - root - INFO - step: 30015 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7859 +[titan] 2025-10-05 16:58:47,757 - root - INFO - lr: 1.1697e-05 gnorm: 1.19 [18:24:36< 6:07:28] +[titan] 2025-10-05 16:58:58,551 - root - INFO - step: 30020 loss: 2.0398 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.19 mfu: 42.59% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 16:58:58,551 - root - INFO - lr: 1.1691e-05 gnorm: 1.16 [18:24:47< 6:07:16] +[titan] 2025-10-05 16:59:09,338 - root - INFO - step: 30025 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,378 tflops: 421.45 mfu: 42.61% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7804 +[titan] 2025-10-05 16:59:09,338 - root - INFO - lr: 1.1685e-05 gnorm: 1.17 [18:24:58< 6:07:05] +[titan] 2025-10-05 16:59:20,123 - root - INFO - step: 30030 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,384 tflops: 421.53 mfu: 42.62% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 16:59:20,123 - root - INFO - lr: 1.1678e-05 gnorm: 1.14 [18:25:09< 6:06:54] +[titan] 2025-10-05 16:59:30,956 - root - INFO - step: 30035 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 16:59:30,956 - root - INFO - lr: 1.1672e-05 gnorm: 1.17 [18:25:19< 6:06:43] +[titan] 2025-10-05 16:59:41,784 - root - INFO - step: 30040 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7750 +[titan] 2025-10-05 16:59:41,784 - root - INFO - lr: 1.1666e-05 gnorm: 1.10 [18:25:30< 6:06:32] +[titan] 2025-10-05 16:59:52,578 - root - INFO - step: 30045 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.18 mfu: 42.59% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 16:59:52,578 - root - INFO - lr: 1.1659e-05 gnorm: 1.20 [18:25:41< 6:06:21] +[titan] 2025-10-05 17:00:01,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:00:03,430 - root - INFO - step: 30050 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:00:03,431 - root - INFO - lr: 1.1653e-05 gnorm: 1.13 [18:25:52< 6:06:10] +[titan] 2025-10-05 17:00:14,272 - root - INFO - step: 30055 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:00:14,273 - root - INFO - lr: 1.1647e-05 gnorm: 1.14 [18:26:03< 6:05:59] +[titan] 2025-10-05 17:00:25,096 - root - INFO - step: 30060 loss: 2.0424 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 17:00:25,096 - root - INFO - lr: 1.1640e-05 gnorm: 1.13 [18:26:14< 6:05:48] +[titan] 2025-10-05 17:00:35,911 - root - INFO - step: 30065 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.37 mfu: 42.50% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 17:00:35,911 - root - INFO - lr: 1.1634e-05 gnorm: 1.13 [18:26:24< 6:05:36] +[titan] 2025-10-05 17:00:46,749 - root - INFO - step: 30070 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:00:46,749 - root - INFO - lr: 1.1628e-05 gnorm: 1.12 [18:26:35< 6:05:25] +[titan] 2025-10-05 17:00:57,558 - root - INFO - step: 30075 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 17:00:57,558 - root - INFO - lr: 1.1621e-05 gnorm: 1.11 [18:26:46< 6:05:14] +[titan] 2025-10-05 17:01:08,392 - root - INFO - step: 30080 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7726 +[titan] 2025-10-05 17:01:08,392 - root - INFO - lr: 1.1615e-05 gnorm: 1.15 [18:26:57< 6:05:03] +[titan] 2025-10-05 17:01:19,229 - root - INFO - step: 30085 loss: 2.0397 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 17:01:19,229 - root - INFO - lr: 1.1609e-05 gnorm: 1.15 [18:27:08< 6:04:52] +[titan] 2025-10-05 17:01:30,104 - root - INFO - step: 30090 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:01:30,104 - root - INFO - lr: 1.1602e-05 gnorm: 1.11 [18:27:19< 6:04:41] +[titan] 2025-10-05 17:01:40,932 - root - INFO - step: 30095 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 17:01:40,932 - root - INFO - lr: 1.1596e-05 gnorm: 1.14 [18:27:29< 6:04:30] +[titan] 2025-10-05 17:01:49,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:01:51,740 - root - INFO - step: 30100 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:01:51,740 - root - INFO - lr: 1.1590e-05 gnorm: 1.12 [18:27:40< 6:04:19] +[titan] 2025-10-05 17:02:02,591 - root - INFO - step: 30105 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7463 +[titan] 2025-10-05 17:02:02,591 - root - INFO - lr: 1.1583e-05 gnorm: 1.13 [18:27:51< 6:04:08] +[titan] 2025-10-05 17:02:13,423 - root - INFO - step: 30110 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 17:02:13,423 - root - INFO - lr: 1.1577e-05 gnorm: 1.16 [18:28:02< 6:03:56] +[titan] 2025-10-05 17:02:24,227 - root - INFO - step: 30115 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.78 mfu: 42.55% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 17:02:24,227 - root - INFO - lr: 1.1571e-05 gnorm: 1.12 [18:28:13< 6:03:45] +[titan] 2025-10-05 17:02:35,077 - root - INFO - step: 30120 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 17:02:35,077 - root - INFO - lr: 1.1565e-05 gnorm: 1.14 [18:28:24< 6:03:34] +[titan] 2025-10-05 17:02:45,895 - root - INFO - step: 30125 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 17:02:45,895 - root - INFO - lr: 1.1558e-05 gnorm: 1.13 [18:28:34< 6:03:23] +[titan] 2025-10-05 17:02:56,710 - root - INFO - step: 30130 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 17:02:56,710 - root - INFO - lr: 1.1552e-05 gnorm: 1.13 [18:28:45< 6:03:12] +[titan] 2025-10-05 17:03:07,565 - root - INFO - step: 30135 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8142 +[titan] 2025-10-05 17:03:07,565 - root - INFO - lr: 1.1546e-05 gnorm: 1.11 [18:28:56< 6:03:01] +[titan] 2025-10-05 17:03:18,382 - root - INFO - step: 30140 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 17:03:18,382 - root - INFO - lr: 1.1539e-05 gnorm: 1.21 [18:29:07< 6:02:50] +[titan] 2025-10-05 17:03:29,277 - root - INFO - step: 30145 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 17:03:29,277 - root - INFO - lr: 1.1533e-05 gnorm: 1.14 [18:29:18< 6:02:39] +[titan] 2025-10-05 17:03:37,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:03:40,104 - root - INFO - step: 30150 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 17:03:40,104 - root - INFO - lr: 1.1527e-05 gnorm: 1.13 [18:29:29< 6:02:28] +[titan] 2025-10-05 17:03:50,940 - root - INFO - step: 30155 loss: 2.0613 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:03:50,940 - root - INFO - lr: 1.1521e-05 gnorm: 1.15 [18:29:39< 6:02:17] +[titan] 2025-10-05 17:04:01,762 - root - INFO - step: 30160 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 17:04:01,762 - root - INFO - lr: 1.1514e-05 gnorm: 1.14 [18:29:50< 6:02:05] +[titan] 2025-10-05 17:04:12,567 - root - INFO - step: 30165 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 17:04:12,567 - root - INFO - lr: 1.1508e-05 gnorm: 1.12 [18:30:01< 6:01:54] +[titan] 2025-10-05 17:04:23,420 - root - INFO - step: 30170 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:04:23,420 - root - INFO - lr: 1.1502e-05 gnorm: 1.12 [18:30:12< 6:01:43] +[titan] 2025-10-05 17:04:34,282 - root - INFO - step: 30175 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 17:04:34,282 - root - INFO - lr: 1.1495e-05 gnorm: 1.12 [18:30:23< 6:01:32] +[titan] 2025-10-05 17:04:45,111 - root - INFO - step: 30180 loss: 1.9784 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:04:45,111 - root - INFO - lr: 1.1489e-05 gnorm: 1.16 [18:30:34< 6:01:21] +[titan] 2025-10-05 17:04:55,961 - root - INFO - step: 30185 loss: 2.0025 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:04:55,962 - root - INFO - lr: 1.1483e-05 gnorm: 1.13 [18:30:44< 6:01:10] +[titan] 2025-10-05 17:05:06,781 - root - INFO - step: 30190 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.18 mfu: 42.48% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 17:05:06,781 - root - INFO - lr: 1.1477e-05 gnorm: 1.16 [18:30:55< 6:00:59] +[titan] 2025-10-05 17:05:17,581 - root - INFO - step: 30195 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:05:17,581 - root - INFO - lr: 1.1470e-05 gnorm: 1.16 [18:31:06< 6:00:48] +[titan] 2025-10-05 17:05:26,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:05:28,480 - root - INFO - step: 30200 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 17:05:28,480 - root - INFO - lr: 1.1464e-05 gnorm: 1.13 [18:31:17< 6:00:37] +[titan] 2025-10-05 17:05:39,462 - root - INFO - step: 30205 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 29,840 tflops: 413.98 mfu: 41.86% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 17:05:39,462 - root - INFO - lr: 1.1458e-05 gnorm: 1.11 [18:31:28< 6:00:25] +[titan] 2025-10-05 17:05:46,168 - root - INFO - Dumping profiler traces at step 30208 +[titan] 2025-10-05 17:05:46,207 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:05:50,671 - root - INFO - step: 30210 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 29,233 tflops: 405.56 mfu: 41.01% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7633 +[titan] 2025-10-05 17:05:50,672 - root - INFO - lr: 1.1452e-05 gnorm: 1.14 [18:31:39< 6:00:15] +[titan] 2025-10-05 17:06:01,511 - root - INFO - step: 30215 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 17:06:01,511 - root - INFO - lr: 1.1445e-05 gnorm: 1.17 [18:31:50< 6:00:03] +[titan] 2025-10-05 17:06:12,360 - root - INFO - step: 30220 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7378 +[titan] 2025-10-05 17:06:12,360 - root - INFO - lr: 1.1439e-05 gnorm: 1.11 [18:32:01< 5:59:52] +[titan] 2025-10-05 17:06:23,184 - root - INFO - step: 30225 loss: 2.0049 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 17:06:23,184 - root - INFO - lr: 1.1433e-05 gnorm: 1.13 [18:32:12< 5:59:41] +[titan] 2025-10-05 17:06:34,073 - root - INFO - step: 30230 loss: 1.9745 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 17:06:34,073 - root - INFO - lr: 1.1427e-05 gnorm: 1.15 [18:32:23< 5:59:30] +[titan] 2025-10-05 17:06:44,900 - root - INFO - step: 30235 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7525 +[titan] 2025-10-05 17:06:44,900 - root - INFO - lr: 1.1420e-05 gnorm: 1.11 [18:32:33< 5:59:19] +[titan] 2025-10-05 17:06:55,740 - root - INFO - step: 30240 loss: 1.9188 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 17:06:55,740 - root - INFO - lr: 1.1414e-05 gnorm: 1.16 [18:32:44< 5:59:08] +[titan] 2025-10-05 17:07:06,541 - root - INFO - step: 30245 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:07:06,541 - root - INFO - lr: 1.1408e-05 gnorm: 1.13 [18:32:55< 5:58:57] +[titan] 2025-10-05 17:07:15,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:07:17,391 - root - INFO - step: 30250 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 17:07:17,392 - root - INFO - lr: 1.1402e-05 gnorm: 1.17 [18:33:06< 5:58:46] +[titan] 2025-10-05 17:07:28,241 - root - INFO - step: 30255 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 17:07:28,241 - root - INFO - lr: 1.1395e-05 gnorm: 1.18 [18:33:17< 5:58:35] +[titan] 2025-10-05 17:07:39,102 - root - INFO - step: 30260 loss: 2.0013 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 17:07:39,103 - root - INFO - lr: 1.1389e-05 gnorm: 1.12 [18:33:28< 5:58:23] +[titan] 2025-10-05 17:07:49,999 - root - INFO - step: 30265 loss: 1.9338 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 17:07:49,999 - root - INFO - lr: 1.1383e-05 gnorm: 1.16 [18:33:38< 5:58:12] +[titan] 2025-10-05 17:08:00,848 - root - INFO - step: 30270 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 17:08:00,848 - root - INFO - lr: 1.1377e-05 gnorm: 1.17 [18:33:49< 5:58:01] +[titan] 2025-10-05 17:08:11,692 - root - INFO - step: 30275 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 17:08:11,692 - root - INFO - lr: 1.1370e-05 gnorm: 1.17 [18:34:00< 5:57:50] +[titan] 2025-10-05 17:08:22,552 - root - INFO - step: 30280 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:08:22,552 - root - INFO - lr: 1.1364e-05 gnorm: 1.18 [18:34:11< 5:57:39] +[titan] 2025-10-05 17:08:33,450 - root - INFO - step: 30285 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 17:08:33,450 - root - INFO - lr: 1.1358e-05 gnorm: 1.11 [18:34:22< 5:57:28] +[titan] 2025-10-05 17:08:44,280 - root - INFO - step: 30290 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 17:08:44,280 - root - INFO - lr: 1.1352e-05 gnorm: 1.13 [18:34:33< 5:57:17] +[titan] 2025-10-05 17:08:55,139 - root - INFO - step: 30295 loss: 2.0245 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 17:08:55,139 - root - INFO - lr: 1.1346e-05 gnorm: 1.13 [18:34:44< 5:57:06] +[titan] 2025-10-05 17:09:03,787 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:09:05,964 - root - INFO - step: 30300 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 17:09:05,964 - root - INFO - lr: 1.1339e-05 gnorm: 1.17 [18:34:54< 5:56:55] +[titan] 2025-10-05 17:09:16,818 - root - INFO - step: 30305 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 17:09:16,818 - root - INFO - lr: 1.1333e-05 gnorm: 1.16 [18:35:05< 5:56:44] +[titan] 2025-10-05 17:09:27,662 - root - INFO - step: 30310 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 17:09:27,662 - root - INFO - lr: 1.1327e-05 gnorm: 1.15 [18:35:16< 5:56:33] +[titan] 2025-10-05 17:09:38,520 - root - INFO - step: 30315 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 17:09:38,520 - root - INFO - lr: 1.1321e-05 gnorm: 1.14 [18:35:27< 5:56:21] +[titan] 2025-10-05 17:09:49,395 - root - INFO - step: 30320 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 17:09:49,395 - root - INFO - lr: 1.1315e-05 gnorm: 1.14 [18:35:38< 5:56:10] +[titan] 2025-10-05 17:10:00,277 - root - INFO - step: 30325 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 17:10:00,277 - root - INFO - lr: 1.1308e-05 gnorm: 1.15 [18:35:49< 5:55:59] +[titan] 2025-10-05 17:10:11,173 - root - INFO - step: 30330 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:10:11,174 - root - INFO - lr: 1.1302e-05 gnorm: 1.15 [18:36:00< 5:55:48] +[titan] 2025-10-05 17:10:22,000 - root - INFO - step: 30335 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 17:10:22,000 - root - INFO - lr: 1.1296e-05 gnorm: 1.18 [18:36:10< 5:55:37] +[titan] 2025-10-05 17:10:32,877 - root - INFO - step: 30340 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 17:10:32,877 - root - INFO - lr: 1.1290e-05 gnorm: 1.13 [18:36:21< 5:55:26] +[titan] 2025-10-05 17:10:43,769 - root - INFO - step: 30345 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:10:43,769 - root - INFO - lr: 1.1284e-05 gnorm: 1.15 [18:36:32< 5:55:15] +[titan] 2025-10-05 17:10:52,407 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:10:54,603 - root - INFO - step: 30350 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 17:10:54,603 - root - INFO - lr: 1.1277e-05 gnorm: 1.15 [18:36:43< 5:55:04] +[titan] 2025-10-05 17:11:05,438 - root - INFO - step: 30355 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:11:05,438 - root - INFO - lr: 1.1271e-05 gnorm: 1.14 [18:36:54< 5:54:53] +[titan] 2025-10-05 17:11:16,300 - root - INFO - step: 30360 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 17:11:16,300 - root - INFO - lr: 1.1265e-05 gnorm: 1.11 [18:37:05< 5:54:42] +[titan] 2025-10-05 17:11:27,159 - root - INFO - step: 30365 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:11:27,159 - root - INFO - lr: 1.1259e-05 gnorm: 1.11 [18:37:16< 5:54:30] +[titan] 2025-10-05 17:11:38,071 - root - INFO - step: 30370 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 17:11:38,071 - root - INFO - lr: 1.1253e-05 gnorm: 1.18 [18:37:27< 5:54:19] +[titan] 2025-10-05 17:11:48,937 - root - INFO - step: 30375 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:11:48,937 - root - INFO - lr: 1.1247e-05 gnorm: 1.15 [18:37:37< 5:54:08] +[titan] 2025-10-05 17:11:59,780 - root - INFO - step: 30380 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 17:11:59,780 - root - INFO - lr: 1.1240e-05 gnorm: 1.13 [18:37:48< 5:53:57] +[titan] 2025-10-05 17:12:10,619 - root - INFO - step: 30385 loss: 1.9947 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 17:12:10,620 - root - INFO - lr: 1.1234e-05 gnorm: 1.15 [18:37:59< 5:53:46] +[titan] 2025-10-05 17:12:21,479 - root - INFO - step: 30390 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 17:12:21,479 - root - INFO - lr: 1.1228e-05 gnorm: 1.11 [18:38:10< 5:53:35] +[titan] 2025-10-05 17:12:32,330 - root - INFO - step: 30395 loss: 1.9584 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:12:32,330 - root - INFO - lr: 1.1222e-05 gnorm: 1.12 [18:38:21< 5:53:24] +[titan] 2025-10-05 17:12:41,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:12:43,230 - root - INFO - step: 30400 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 17:12:43,230 - root - INFO - lr: 1.1216e-05 gnorm: 1.16 [18:38:32< 5:53:13] +[titan] 2025-10-05 17:12:54,073 - root - INFO - step: 30405 loss: 1.9890 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 17:12:54,073 - root - INFO - lr: 1.1210e-05 gnorm: 1.19 [18:38:43< 5:53:02] +[titan] 2025-10-05 17:13:04,941 - root - INFO - step: 30410 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 17:13:04,942 - root - INFO - lr: 1.1203e-05 gnorm: 1.16 [18:38:53< 5:52:51] +[titan] 2025-10-05 17:13:15,791 - root - INFO - step: 30415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 17:13:15,791 - root - INFO - lr: 1.1197e-05 gnorm: 1.18 [18:39:04< 5:52:40] +[titan] 2025-10-05 17:13:26,642 - root - INFO - step: 30420 loss: 2.0087 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 17:13:26,642 - root - INFO - lr: 1.1191e-05 gnorm: 1.13 [18:39:15< 5:52:28] +[titan] 2025-10-05 17:13:37,590 - root - INFO - step: 30425 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.26 mfu: 41.99% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 17:13:37,590 - root - INFO - lr: 1.1185e-05 gnorm: 1.13 [18:39:26< 5:52:17] +[titan] 2025-10-05 17:13:48,481 - root - INFO - step: 30430 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7083 +[titan] 2025-10-05 17:13:48,481 - root - INFO - lr: 1.1179e-05 gnorm: 1.22 [18:39:37< 5:52:06] +[titan] 2025-10-05 17:13:59,341 - root - INFO - step: 30435 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 17:13:59,342 - root - INFO - lr: 1.1173e-05 gnorm: 1.10 [18:39:48< 5:51:55] +[titan] 2025-10-05 17:14:10,199 - root - INFO - step: 30440 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 17:14:10,199 - root - INFO - lr: 1.1166e-05 gnorm: 1.15 [18:39:59< 5:51:44] +[titan] 2025-10-05 17:14:21,050 - root - INFO - step: 30445 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:14:21,050 - root - INFO - lr: 1.1160e-05 gnorm: 1.17 [18:40:09< 5:51:33] +[titan] 2025-10-05 17:14:29,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:14:31,915 - root - INFO - step: 30450 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:14:31,915 - root - INFO - lr: 1.1154e-05 gnorm: 1.13 [18:40:20< 5:51:22] +[titan] 2025-10-05 17:14:42,853 - root - INFO - step: 30455 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 17:14:42,853 - root - INFO - lr: 1.1148e-05 gnorm: 1.15 [18:40:31< 5:51:11] +[titan] 2025-10-05 17:14:53,689 - root - INFO - step: 30460 loss: 1.9279 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:14:53,689 - root - INFO - lr: 1.1142e-05 gnorm: 1.16 [18:40:42< 5:51:00] +[titan] 2025-10-05 17:15:04,539 - root - INFO - step: 30465 loss: 1.9730 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7446 +[titan] 2025-10-05 17:15:04,539 - root - INFO - lr: 1.1136e-05 gnorm: 1.13 [18:40:53< 5:50:49] +[titan] 2025-10-05 17:15:15,418 - root - INFO - step: 30470 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 17:15:15,418 - root - INFO - lr: 1.1130e-05 gnorm: 1.20 [18:41:04< 5:50:38] +[titan] 2025-10-05 17:15:26,296 - root - INFO - step: 30475 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 17:15:26,297 - root - INFO - lr: 1.1124e-05 gnorm: 1.13 [18:41:15< 5:50:26] +[titan] 2025-10-05 17:15:37,128 - root - INFO - step: 30480 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 17:15:37,129 - root - INFO - lr: 1.1117e-05 gnorm: 1.16 [18:41:26< 5:50:15] +[titan] 2025-10-05 17:15:48,020 - root - INFO - step: 30485 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 17:15:48,020 - root - INFO - lr: 1.1111e-05 gnorm: 1.16 [18:41:36< 5:50:04] +[titan] 2025-10-05 17:15:58,881 - root - INFO - step: 30490 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 17:15:58,881 - root - INFO - lr: 1.1105e-05 gnorm: 1.13 [18:41:47< 5:49:53] +[titan] 2025-10-05 17:16:09,738 - root - INFO - step: 30495 loss: 2.0163 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7828 +[titan] 2025-10-05 17:16:09,738 - root - INFO - lr: 1.1099e-05 gnorm: 1.13 [18:41:58< 5:49:42] +[titan] 2025-10-05 17:16:18,407 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:16:20,594 - root - INFO - step: 30500 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:16:20,594 - root - INFO - lr: 1.1093e-05 gnorm: 1.15 [18:42:09< 5:49:31] +[titan] 2025-10-05 17:16:31,472 - root - INFO - step: 30505 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7582 +[titan] 2025-10-05 17:16:31,472 - root - INFO - lr: 1.1087e-05 gnorm: 1.19 [18:42:20< 5:49:20] +[titan] 2025-10-05 17:16:42,399 - root - INFO - step: 30510 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:16:42,399 - root - INFO - lr: 1.1081e-05 gnorm: 1.14 [18:42:31< 5:49:09] +[titan] 2025-10-05 17:16:53,259 - root - INFO - step: 30515 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 17:16:53,259 - root - INFO - lr: 1.1075e-05 gnorm: 1.15 [18:42:42< 5:48:58] +[titan] 2025-10-05 17:17:04,140 - root - INFO - step: 30520 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 17:17:04,140 - root - INFO - lr: 1.1069e-05 gnorm: 1.13 [18:42:53< 5:48:47] +[titan] 2025-10-05 17:17:14,989 - root - INFO - step: 30525 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 17:17:14,989 - root - INFO - lr: 1.1063e-05 gnorm: 1.36 [18:43:03< 5:48:36] +[titan] 2025-10-05 17:17:25,901 - root - INFO - step: 30530 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.12% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:17:25,902 - root - INFO - lr: 1.1056e-05 gnorm: 1.14 [18:43:14< 5:48:24] +[titan] 2025-10-05 17:17:36,768 - root - INFO - step: 30535 loss: 2.0575 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8191 +[titan] 2025-10-05 17:17:36,768 - root - INFO - lr: 1.1050e-05 gnorm: 1.17 [18:43:25< 5:48:13] +[titan] 2025-10-05 17:17:47,700 - root - INFO - step: 30540 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 17:17:47,700 - root - INFO - lr: 1.1044e-05 gnorm: 1.12 [18:43:36< 5:48:02] +[titan] 2025-10-05 17:17:58,569 - root - INFO - step: 30545 loss: 1.9982 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 17:17:58,569 - root - INFO - lr: 1.1038e-05 gnorm: 1.13 [18:43:47< 5:47:51] +[titan] 2025-10-05 17:18:07,245 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:18:09,461 - root - INFO - step: 30550 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7963 +[titan] 2025-10-05 17:18:09,461 - root - INFO - lr: 1.1032e-05 gnorm: 1.15 [18:43:58< 5:47:40] +[titan] 2025-10-05 17:18:20,334 - root - INFO - step: 30555 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 17:18:20,335 - root - INFO - lr: 1.1026e-05 gnorm: 1.13 [18:44:09< 5:47:29] +[titan] 2025-10-05 17:18:31,222 - root - INFO - step: 30560 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:18:31,222 - root - INFO - lr: 1.1020e-05 gnorm: 1.16 [18:44:20< 5:47:18] +[titan] 2025-10-05 17:18:42,115 - root - INFO - step: 30565 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 17:18:42,116 - root - INFO - lr: 1.1014e-05 gnorm: 1.18 [18:44:31< 5:47:07] +[titan] 2025-10-05 17:18:52,976 - root - INFO - step: 30570 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7939 +[titan] 2025-10-05 17:18:52,977 - root - INFO - lr: 1.1008e-05 gnorm: 1.15 [18:44:41< 5:46:56] +[titan] 2025-10-05 17:19:03,822 - root - INFO - step: 30575 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 17:19:03,822 - root - INFO - lr: 1.1002e-05 gnorm: 1.13 [18:44:52< 5:46:45] +[titan] 2025-10-05 17:19:14,680 - root - INFO - step: 30580 loss: 1.9714 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 17:19:14,680 - root - INFO - lr: 1.0996e-05 gnorm: 1.15 [18:45:03< 5:46:34] +[titan] 2025-10-05 17:19:25,560 - root - INFO - step: 30585 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 17:19:25,560 - root - INFO - lr: 1.0990e-05 gnorm: 1.12 [18:45:14< 5:46:23] +[titan] 2025-10-05 17:19:36,432 - root - INFO - step: 30590 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7819 +[titan] 2025-10-05 17:19:36,432 - root - INFO - lr: 1.0984e-05 gnorm: 1.18 [18:45:25< 5:46:11] +[titan] 2025-10-05 17:19:47,343 - root - INFO - step: 30595 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 17:19:47,343 - root - INFO - lr: 1.0977e-05 gnorm: 1.12 [18:45:36< 5:46:00] +[titan] 2025-10-05 17:19:56,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:19:58,231 - root - INFO - step: 30600 loss: 2.0557 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 17:19:58,231 - root - INFO - lr: 1.0971e-05 gnorm: 1.17 [18:45:47< 5:45:49] +[titan] 2025-10-05 17:20:09,100 - root - INFO - step: 30605 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:20:09,100 - root - INFO - lr: 1.0965e-05 gnorm: 1.15 [18:45:58< 5:45:38] +[titan] 2025-10-05 17:20:19,957 - root - INFO - step: 30610 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 17:20:19,957 - root - INFO - lr: 1.0959e-05 gnorm: 1.11 [18:46:08< 5:45:27] +[titan] 2025-10-05 17:20:30,886 - root - INFO - step: 30615 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 17:20:30,887 - root - INFO - lr: 1.0953e-05 gnorm: 1.14 [18:46:19< 5:45:16] +[titan] 2025-10-05 17:20:41,762 - root - INFO - step: 30620 loss: 1.9612 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:20:41,763 - root - INFO - lr: 1.0947e-05 gnorm: 1.19 [18:46:30< 5:45:05] +[titan] 2025-10-05 17:20:52,672 - root - INFO - step: 30625 loss: 1.9688 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.13% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7412 +[titan] 2025-10-05 17:20:52,672 - root - INFO - lr: 1.0941e-05 gnorm: 1.14 [18:46:41< 5:44:54] +[titan] 2025-10-05 17:21:03,551 - root - INFO - step: 30630 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 17:21:03,551 - root - INFO - lr: 1.0935e-05 gnorm: 1.13 [18:46:52< 5:44:43] +[titan] 2025-10-05 17:21:14,413 - root - INFO - step: 30635 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 17:21:14,413 - root - INFO - lr: 1.0929e-05 gnorm: 1.13 [18:47:03< 5:44:32] +[titan] 2025-10-05 17:21:25,276 - root - INFO - step: 30640 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 17:21:25,276 - root - INFO - lr: 1.0923e-05 gnorm: 1.18 [18:47:14< 5:44:21] +[titan] 2025-10-05 17:21:36,129 - root - INFO - step: 30645 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 17:21:36,129 - root - INFO - lr: 1.0917e-05 gnorm: 1.13 [18:47:25< 5:44:10] +[titan] 2025-10-05 17:21:44,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:21:47,049 - root - INFO - step: 30650 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:21:47,049 - root - INFO - lr: 1.0911e-05 gnorm: 1.12 [18:47:35< 5:43:58] +[titan] 2025-10-05 17:21:57,919 - root - INFO - step: 30655 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 17:21:57,919 - root - INFO - lr: 1.0905e-05 gnorm: 1.17 [18:47:46< 5:43:47] +[titan] 2025-10-05 17:22:08,772 - root - INFO - step: 30660 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:22:08,772 - root - INFO - lr: 1.0899e-05 gnorm: 1.14 [18:47:57< 5:43:36] +[titan] 2025-10-05 17:22:19,639 - root - INFO - step: 30665 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7649 +[titan] 2025-10-05 17:22:19,639 - root - INFO - lr: 1.0893e-05 gnorm: 1.17 [18:48:08< 5:43:25] +[titan] 2025-10-05 17:22:30,511 - root - INFO - step: 30670 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 17:22:30,511 - root - INFO - lr: 1.0887e-05 gnorm: 1.15 [18:48:19< 5:43:14] +[titan] 2025-10-05 17:22:41,385 - root - INFO - step: 30675 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 17:22:41,385 - root - INFO - lr: 1.0881e-05 gnorm: 1.13 [18:48:30< 5:43:03] +[titan] 2025-10-05 17:22:52,312 - root - INFO - step: 30680 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:22:52,312 - root - INFO - lr: 1.0875e-05 gnorm: 1.15 [18:48:41< 5:42:52] +[titan] 2025-10-05 17:23:03,165 - root - INFO - step: 30685 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 17:23:03,165 - root - INFO - lr: 1.0869e-05 gnorm: 1.13 [18:48:52< 5:42:41] +[titan] 2025-10-05 17:23:14,020 - root - INFO - step: 30690 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7825 +[titan] 2025-10-05 17:23:14,020 - root - INFO - lr: 1.0863e-05 gnorm: 1.14 [18:49:02< 5:42:30] +[titan] 2025-10-05 17:23:24,876 - root - INFO - step: 30695 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:23:24,876 - root - INFO - lr: 1.0857e-05 gnorm: 1.15 [18:49:13< 5:42:19] +[titan] 2025-10-05 17:23:33,557 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:23:35,744 - root - INFO - step: 30700 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:23:35,744 - root - INFO - lr: 1.0851e-05 gnorm: 1.12 [18:49:24< 5:42:08] +[titan] 2025-10-05 17:23:46,630 - root - INFO - step: 30705 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 17:23:46,630 - root - INFO - lr: 1.0845e-05 gnorm: 1.14 [18:49:35< 5:41:56] +[titan] 2025-10-05 17:23:57,506 - root - INFO - step: 30710 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:23:57,506 - root - INFO - lr: 1.0839e-05 gnorm: 1.17 [18:49:46< 5:41:45] +[titan] 2025-10-05 17:24:08,364 - root - INFO - step: 30715 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:24:08,364 - root - INFO - lr: 1.0833e-05 gnorm: 1.13 [18:49:57< 5:41:34] +[titan] 2025-10-05 17:24:19,332 - root - INFO - step: 30720 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 29,876 tflops: 414.48 mfu: 41.91% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7148 +[titan] 2025-10-05 17:24:19,332 - root - INFO - lr: 1.0827e-05 gnorm: 1.11 [18:50:08< 5:41:23] +[titan] 2025-10-05 17:24:19,521 - root - INFO - Dumping profiler traces at step 30720 +[titan] 2025-10-05 17:24:19,560 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:24:30,456 - root - INFO - step: 30725 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 29,458 tflops: 408.69 mfu: 41.32% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 17:24:30,456 - root - INFO - lr: 1.0821e-05 gnorm: 1.13 [18:50:19< 5:41:12] +[titan] 2025-10-05 17:24:41,338 - root - INFO - step: 30730 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 17:24:41,338 - root - INFO - lr: 1.0815e-05 gnorm: 1.11 [18:50:30< 5:41:01] +[titan] 2025-10-05 17:24:52,229 - root - INFO - step: 30735 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7376 +[titan] 2025-10-05 17:24:52,229 - root - INFO - lr: 1.0809e-05 gnorm: 1.15 [18:50:41< 5:40:50] +[titan] 2025-10-05 17:25:03,105 - root - INFO - step: 30740 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7807 +[titan] 2025-10-05 17:25:03,105 - root - INFO - lr: 1.0803e-05 gnorm: 1.23 [18:50:52< 5:40:39] +[titan] 2025-10-05 17:25:13,996 - root - INFO - step: 30745 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:25:13,996 - root - INFO - lr: 1.0797e-05 gnorm: 1.16 [18:51:02< 5:40:28] +[titan] 2025-10-05 17:25:22,692 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:25:24,892 - root - INFO - step: 30750 loss: 2.0403 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 17:25:24,892 - root - INFO - lr: 1.0791e-05 gnorm: 1.21 [18:51:13< 5:40:17] +[titan] 2025-10-05 17:25:35,755 - root - INFO - step: 30755 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 17:25:35,755 - root - INFO - lr: 1.0785e-05 gnorm: 1.15 [18:51:24< 5:40:06] +[titan] 2025-10-05 17:25:46,627 - root - INFO - step: 30760 loss: 1.9424 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:25:46,627 - root - INFO - lr: 1.0779e-05 gnorm: 1.14 [18:51:35< 5:39:55] +[titan] 2025-10-05 17:25:57,513 - root - INFO - step: 30765 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8128 +[titan] 2025-10-05 17:25:57,513 - root - INFO - lr: 1.0773e-05 gnorm: 1.16 [18:51:46< 5:39:44] +[titan] 2025-10-05 17:26:08,369 - root - INFO - step: 30770 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:26:08,369 - root - INFO - lr: 1.0767e-05 gnorm: 1.15 [18:51:57< 5:39:32] +[titan] 2025-10-05 17:26:19,291 - root - INFO - step: 30775 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 17:26:19,291 - root - INFO - lr: 1.0761e-05 gnorm: 1.16 [18:52:08< 5:39:21] +[titan] 2025-10-05 17:26:30,180 - root - INFO - step: 30780 loss: 1.9939 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7628 +[titan] 2025-10-05 17:26:30,180 - root - INFO - lr: 1.0755e-05 gnorm: 1.14 [18:52:19< 5:39:10] +[titan] 2025-10-05 17:26:41,064 - root - INFO - step: 30785 loss: 2.0227 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 17:26:41,065 - root - INFO - lr: 1.0749e-05 gnorm: 1.14 [18:52:29< 5:38:59] +[titan] 2025-10-05 17:26:51,961 - root - INFO - step: 30790 loss: 1.9654 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:26:51,961 - root - INFO - lr: 1.0743e-05 gnorm: 1.11 [18:52:40< 5:38:48] +[titan] 2025-10-05 17:27:02,841 - root - INFO - step: 30795 loss: 2.0724 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 17:27:02,841 - root - INFO - lr: 1.0737e-05 gnorm: 1.16 [18:52:51< 5:38:37] +[titan] 2025-10-05 17:27:11,522 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:27:13,697 - root - INFO - step: 30800 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 17:27:13,697 - root - INFO - lr: 1.0731e-05 gnorm: 1.14 [18:53:02< 5:38:26] +[titan] 2025-10-05 17:27:24,566 - root - INFO - step: 30805 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 17:27:24,566 - root - INFO - lr: 1.0725e-05 gnorm: 1.15 [18:53:13< 5:38:15] +[titan] 2025-10-05 17:27:35,469 - root - INFO - step: 30810 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 17:27:35,469 - root - INFO - lr: 1.0719e-05 gnorm: 1.14 [18:53:24< 5:38:04] +[titan] 2025-10-05 17:27:46,340 - root - INFO - step: 30815 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 17:27:46,340 - root - INFO - lr: 1.0713e-05 gnorm: 1.16 [18:53:35< 5:37:53] +[titan] 2025-10-05 17:27:57,238 - root - INFO - step: 30820 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 17:27:57,239 - root - INFO - lr: 1.0707e-05 gnorm: 1.12 [18:53:46< 5:37:42] +[titan] 2025-10-05 17:28:08,105 - root - INFO - step: 30825 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 17:28:08,105 - root - INFO - lr: 1.0702e-05 gnorm: 1.12 [18:53:57< 5:37:31] +[titan] 2025-10-05 17:28:18,970 - root - INFO - step: 30830 loss: 1.8472 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6336 +[titan] 2025-10-05 17:28:18,971 - root - INFO - lr: 1.0696e-05 gnorm: 1.15 [18:54:07< 5:37:19] +[titan] 2025-10-05 17:28:29,844 - root - INFO - step: 30835 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:28:29,844 - root - INFO - lr: 1.0690e-05 gnorm: 1.13 [18:54:18< 5:37:08] +[titan] 2025-10-05 17:28:40,744 - root - INFO - step: 30840 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 17:28:40,744 - root - INFO - lr: 1.0684e-05 gnorm: 1.13 [18:54:29< 5:36:57] +[titan] 2025-10-05 17:28:51,648 - root - INFO - step: 30845 loss: 1.9017 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6818 +[titan] 2025-10-05 17:28:51,648 - root - INFO - lr: 1.0678e-05 gnorm: 1.14 [18:54:40< 5:36:46] +[titan] 2025-10-05 17:29:00,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:29:02,544 - root - INFO - step: 30850 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 17:29:02,544 - root - INFO - lr: 1.0672e-05 gnorm: 1.15 [18:54:51< 5:36:35] +[titan] 2025-10-05 17:29:13,430 - root - INFO - step: 30855 loss: 1.9892 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:13,431 - root - INFO - lr: 1.0666e-05 gnorm: 1.16 [18:55:02< 5:36:24] +[titan] 2025-10-05 17:29:24,310 - root - INFO - step: 30860 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 17:29:24,310 - root - INFO - lr: 1.0660e-05 gnorm: 1.12 [18:55:13< 5:36:13] +[titan] 2025-10-05 17:29:35,178 - root - INFO - step: 30865 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:35,178 - root - INFO - lr: 1.0654e-05 gnorm: 1.16 [18:55:24< 5:36:02] +[titan] 2025-10-05 17:29:46,070 - root - INFO - step: 30870 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 17:29:46,070 - root - INFO - lr: 1.0648e-05 gnorm: 1.13 [18:55:34< 5:35:51] +[titan] 2025-10-05 17:29:56,949 - root - INFO - step: 30875 loss: 1.9562 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7292 +[titan] 2025-10-05 17:29:56,949 - root - INFO - lr: 1.0642e-05 gnorm: 1.14 [18:55:45< 5:35:40] +[titan] 2025-10-05 17:30:07,804 - root - INFO - step: 30880 loss: 2.0097 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 17:30:07,804 - root - INFO - lr: 1.0636e-05 gnorm: 1.15 [18:55:56< 5:35:29] +[titan] 2025-10-05 17:30:18,658 - root - INFO - step: 30885 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 17:30:18,658 - root - INFO - lr: 1.0630e-05 gnorm: 1.17 [18:56:07< 5:35:18] +[titan] 2025-10-05 17:30:29,536 - root - INFO - step: 30890 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 17:30:29,536 - root - INFO - lr: 1.0625e-05 gnorm: 1.16 [18:56:18< 5:35:07] +[titan] 2025-10-05 17:30:40,429 - root - INFO - step: 30895 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7591 +[titan] 2025-10-05 17:30:40,429 - root - INFO - lr: 1.0619e-05 gnorm: 1.14 [18:56:29< 5:34:55] +[titan] 2025-10-05 17:30:49,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:30:51,303 - root - INFO - step: 30900 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 17:30:51,303 - root - INFO - lr: 1.0613e-05 gnorm: 1.15 [18:56:40< 5:34:44] +[titan] 2025-10-05 17:31:02,242 - root - INFO - step: 30905 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:31:02,242 - root - INFO - lr: 1.0607e-05 gnorm: 1.14 [18:56:51< 5:34:33] +[titan] 2025-10-05 17:31:13,130 - root - INFO - step: 30910 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 17:31:13,130 - root - INFO - lr: 1.0601e-05 gnorm: 1.25 [18:57:02< 5:34:22] +[titan] 2025-10-05 17:31:24,016 - root - INFO - step: 30915 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7966 +[titan] 2025-10-05 17:31:24,017 - root - INFO - lr: 1.0595e-05 gnorm: 1.13 [18:57:12< 5:34:11] +[titan] 2025-10-05 17:31:34,902 - root - INFO - step: 30920 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 17:31:34,903 - root - INFO - lr: 1.0589e-05 gnorm: 1.11 [18:57:23< 5:34:00] +[titan] 2025-10-05 17:31:45,757 - root - INFO - step: 30925 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:31:45,758 - root - INFO - lr: 1.0583e-05 gnorm: 1.14 [18:57:34< 5:33:49] +[titan] 2025-10-05 17:31:56,639 - root - INFO - step: 30930 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:31:56,639 - root - INFO - lr: 1.0577e-05 gnorm: 1.15 [18:57:45< 5:33:38] +[titan] 2025-10-05 17:32:07,510 - root - INFO - step: 30935 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 17:32:07,510 - root - INFO - lr: 1.0572e-05 gnorm: 1.14 [18:57:56< 5:33:27] +[titan] 2025-10-05 17:32:18,361 - root - INFO - step: 30940 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 17:32:18,361 - root - INFO - lr: 1.0566e-05 gnorm: 1.16 [18:58:07< 5:33:16] +[titan] 2025-10-05 17:32:29,229 - root - INFO - step: 30945 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:32:29,229 - root - INFO - lr: 1.0560e-05 gnorm: 1.17 [18:58:18< 5:33:05] +[titan] 2025-10-05 17:32:37,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:32:40,069 - root - INFO - step: 30950 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:32:40,069 - root - INFO - lr: 1.0554e-05 gnorm: 1.13 [18:58:28< 5:32:54] +[titan] 2025-10-05 17:32:50,918 - root - INFO - step: 30955 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 17:32:50,918 - root - INFO - lr: 1.0548e-05 gnorm: 1.15 [18:58:39< 5:32:42] +[titan] 2025-10-05 17:33:01,839 - root - INFO - step: 30960 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6694 +[titan] 2025-10-05 17:33:01,839 - root - INFO - lr: 1.0542e-05 gnorm: 1.11 [18:58:50< 5:32:31] +[titan] 2025-10-05 17:33:12,698 - root - INFO - step: 30965 loss: 1.9487 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 17:33:12,698 - root - INFO - lr: 1.0536e-05 gnorm: 1.13 [18:59:01< 5:32:20] +[titan] 2025-10-05 17:33:23,587 - root - INFO - step: 30970 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7005 +[titan] 2025-10-05 17:33:23,587 - root - INFO - lr: 1.0530e-05 gnorm: 1.17 [18:59:12< 5:32:09] +[titan] 2025-10-05 17:33:34,467 - root - INFO - step: 30975 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 17:33:34,467 - root - INFO - lr: 1.0525e-05 gnorm: 1.19 [18:59:23< 5:31:58] +[titan] 2025-10-05 17:33:45,329 - root - INFO - step: 30980 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 17:33:45,329 - root - INFO - lr: 1.0519e-05 gnorm: 1.21 [18:59:34< 5:31:47] +[titan] 2025-10-05 17:33:56,227 - root - INFO - step: 30985 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7673 +[titan] 2025-10-05 17:33:56,227 - root - INFO - lr: 1.0513e-05 gnorm: 1.14 [18:59:45< 5:31:36] +[titan] 2025-10-05 17:34:07,068 - root - INFO - step: 30990 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 17:34:07,068 - root - INFO - lr: 1.0507e-05 gnorm: 1.14 [18:59:55< 5:31:25] +[titan] 2025-10-05 17:34:17,920 - root - INFO - step: 30995 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 17:34:17,920 - root - INFO - lr: 1.0501e-05 gnorm: 1.13 [19:00:06< 5:31:14] +[titan] 2025-10-05 17:34:26,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:34:28,785 - root - INFO - step: 31000 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:34:28,785 - root - INFO - lr: 1.0495e-05 gnorm: 1.14 [19:00:17< 5:31:03] +[titan] 2025-10-05 17:34:39,677 - root - INFO - step: 31005 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 17:34:39,677 - root - INFO - lr: 1.0490e-05 gnorm: 1.12 [19:00:28< 5:30:52] +[titan] 2025-10-05 17:34:50,557 - root - INFO - step: 31010 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 17:34:50,557 - root - INFO - lr: 1.0484e-05 gnorm: 1.13 [19:00:39< 5:30:41] +[titan] 2025-10-05 17:35:01,441 - root - INFO - step: 31015 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 17:35:01,441 - root - INFO - lr: 1.0478e-05 gnorm: 1.11 [19:00:50< 5:30:29] +[titan] 2025-10-05 17:35:12,298 - root - INFO - step: 31020 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 17:35:12,298 - root - INFO - lr: 1.0472e-05 gnorm: 1.13 [19:01:01< 5:30:18] +[titan] 2025-10-05 17:35:23,148 - root - INFO - step: 31025 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 17:35:23,149 - root - INFO - lr: 1.0466e-05 gnorm: 1.19 [19:01:12< 5:30:07] +[titan] 2025-10-05 17:35:34,041 - root - INFO - step: 31030 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:35:34,042 - root - INFO - lr: 1.0460e-05 gnorm: 1.14 [19:01:22< 5:29:56] +[titan] 2025-10-05 17:35:44,917 - root - INFO - step: 31035 loss: 2.0130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 17:35:44,917 - root - INFO - lr: 1.0455e-05 gnorm: 1.15 [19:01:33< 5:29:45] +[titan] 2025-10-05 17:35:55,789 - root - INFO - step: 31040 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 17:35:55,789 - root - INFO - lr: 1.0449e-05 gnorm: 1.14 [19:01:44< 5:29:34] +[titan] 2025-10-05 17:36:06,662 - root - INFO - step: 31045 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 17:36:06,663 - root - INFO - lr: 1.0443e-05 gnorm: 1.12 [19:01:55< 5:29:23] +[titan] 2025-10-05 17:36:15,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:36:17,541 - root - INFO - step: 31050 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 17:36:17,541 - root - INFO - lr: 1.0437e-05 gnorm: 1.15 [19:02:06< 5:29:12] +[titan] 2025-10-05 17:36:28,426 - root - INFO - step: 31055 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:36:28,426 - root - INFO - lr: 1.0431e-05 gnorm: 1.15 [19:02:17< 5:29:01] +[titan] 2025-10-05 17:36:39,289 - root - INFO - step: 31060 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 17:36:39,290 - root - INFO - lr: 1.0425e-05 gnorm: 1.14 [19:02:28< 5:28:50] +[titan] 2025-10-05 17:36:50,187 - root - INFO - step: 31065 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 17:36:50,187 - root - INFO - lr: 1.0420e-05 gnorm: 1.16 [19:02:39< 5:28:39] +[titan] 2025-10-05 17:37:01,103 - root - INFO - step: 31070 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 17:37:01,103 - root - INFO - lr: 1.0414e-05 gnorm: 1.19 [19:02:49< 5:28:28] +[titan] 2025-10-05 17:37:11,969 - root - INFO - step: 31075 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:37:11,969 - root - INFO - lr: 1.0408e-05 gnorm: 1.16 [19:03:00< 5:28:16] +[titan] 2025-10-05 17:37:22,843 - root - INFO - step: 31080 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 17:37:22,843 - root - INFO - lr: 1.0402e-05 gnorm: 1.15 [19:03:11< 5:28:05] +[titan] 2025-10-05 17:37:33,710 - root - INFO - step: 31085 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:37:33,710 - root - INFO - lr: 1.0396e-05 gnorm: 1.14 [19:03:22< 5:27:54] +[titan] 2025-10-05 17:37:44,589 - root - INFO - step: 31090 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 17:37:44,589 - root - INFO - lr: 1.0391e-05 gnorm: 1.15 [19:03:33< 5:27:43] +[titan] 2025-10-05 17:37:55,476 - root - INFO - step: 31095 loss: 1.9001 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6797 +[titan] 2025-10-05 17:37:55,476 - root - INFO - lr: 1.0385e-05 gnorm: 1.14 [19:03:44< 5:27:32] +[titan] 2025-10-05 17:38:04,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:38:06,373 - root - INFO - step: 31100 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:38:06,373 - root - INFO - lr: 1.0379e-05 gnorm: 1.18 [19:03:55< 5:27:21] +[titan] 2025-10-05 17:38:17,276 - root - INFO - step: 31105 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:38:17,277 - root - INFO - lr: 1.0373e-05 gnorm: 1.12 [19:04:06< 5:27:10] +[titan] 2025-10-05 17:38:28,149 - root - INFO - step: 31110 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 17:38:28,149 - root - INFO - lr: 1.0367e-05 gnorm: 1.11 [19:04:17< 5:26:59] +[titan] 2025-10-05 17:38:39,025 - root - INFO - step: 31115 loss: 1.9815 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 17:38:39,025 - root - INFO - lr: 1.0362e-05 gnorm: 1.15 [19:04:27< 5:26:48] +[titan] 2025-10-05 17:38:49,892 - root - INFO - step: 31120 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:38:49,892 - root - INFO - lr: 1.0356e-05 gnorm: 1.13 [19:04:38< 5:26:37] +[titan] 2025-10-05 17:39:00,809 - root - INFO - step: 31125 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 17:39:00,809 - root - INFO - lr: 1.0350e-05 gnorm: 1.12 [19:04:49< 5:26:26] +[titan] 2025-10-05 17:39:11,704 - root - INFO - step: 31130 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 17:39:11,705 - root - INFO - lr: 1.0344e-05 gnorm: 1.13 [19:05:00< 5:26:15] +[titan] 2025-10-05 17:39:22,594 - root - INFO - step: 31135 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:39:22,595 - root - INFO - lr: 1.0339e-05 gnorm: 1.19 [19:05:11< 5:26:04] +[titan] 2025-10-05 17:39:33,466 - root - INFO - step: 31140 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7578 +[titan] 2025-10-05 17:39:33,467 - root - INFO - lr: 1.0333e-05 gnorm: 1.15 [19:05:22< 5:25:52] +[titan] 2025-10-05 17:39:44,337 - root - INFO - step: 31145 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 17:39:44,337 - root - INFO - lr: 1.0327e-05 gnorm: 1.16 [19:05:33< 5:25:41] +[titan] 2025-10-05 17:39:53,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:39:55,199 - root - INFO - step: 31150 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7763 +[titan] 2025-10-05 17:39:55,199 - root - INFO - lr: 1.0321e-05 gnorm: 1.14 [19:05:44< 5:25:30] +[titan] 2025-10-05 17:40:06,057 - root - INFO - step: 31155 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 17:40:06,057 - root - INFO - lr: 1.0315e-05 gnorm: 1.17 [19:05:54< 5:25:19] +[titan] 2025-10-05 17:40:16,910 - root - INFO - step: 31160 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 17:40:16,910 - root - INFO - lr: 1.0310e-05 gnorm: 1.10 [19:06:05< 5:25:08] +[titan] 2025-10-05 17:40:27,753 - root - INFO - step: 31165 loss: 1.8951 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6755 +[titan] 2025-10-05 17:40:27,753 - root - INFO - lr: 1.0304e-05 gnorm: 1.16 [19:06:16< 5:24:57] +[titan] 2025-10-05 17:40:38,617 - root - INFO - step: 31170 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 17:40:38,618 - root - INFO - lr: 1.0298e-05 gnorm: 1.17 [19:06:27< 5:24:46] +[titan] 2025-10-05 17:40:49,491 - root - INFO - step: 31175 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7707 +[titan] 2025-10-05 17:40:49,491 - root - INFO - lr: 1.0292e-05 gnorm: 1.18 [19:06:38< 5:24:35] +[titan] 2025-10-05 17:41:00,364 - root - INFO - step: 31180 loss: 2.0114 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 17:41:00,365 - root - INFO - lr: 1.0287e-05 gnorm: 1.12 [19:06:49< 5:24:24] +[titan] 2025-10-05 17:41:11,255 - root - INFO - step: 31185 loss: 2.0026 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 17:41:11,255 - root - INFO - lr: 1.0281e-05 gnorm: 1.19 [19:07:00< 5:24:13] +[titan] 2025-10-05 17:41:22,116 - root - INFO - step: 31190 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:41:22,116 - root - INFO - lr: 1.0275e-05 gnorm: 1.10 [19:07:10< 5:24:02] +[titan] 2025-10-05 17:41:32,999 - root - INFO - step: 31195 loss: 1.9088 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 17:41:32,999 - root - INFO - lr: 1.0269e-05 gnorm: 1.13 [19:07:21< 5:23:51] +[titan] 2025-10-05 17:41:41,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:41:43,892 - root - INFO - step: 31200 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:41:43,893 - root - INFO - lr: 1.0264e-05 gnorm: 1.13 [19:07:32< 5:23:40] +[titan] 2025-10-05 17:41:54,767 - root - INFO - step: 31205 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 17:41:54,767 - root - INFO - lr: 1.0258e-05 gnorm: 1.13 [19:07:43< 5:23:28] +[titan] 2025-10-05 17:42:05,616 - root - INFO - step: 31210 loss: 1.9827 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 17:42:05,617 - root - INFO - lr: 1.0252e-05 gnorm: 1.12 [19:07:54< 5:23:17] +[titan] 2025-10-05 17:42:16,473 - root - INFO - step: 31215 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:42:16,473 - root - INFO - lr: 1.0247e-05 gnorm: 1.17 [19:08:05< 5:23:06] +[titan] 2025-10-05 17:42:27,363 - root - INFO - step: 31220 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:42:27,363 - root - INFO - lr: 1.0241e-05 gnorm: 1.16 [19:08:16< 5:22:55] +[titan] 2025-10-05 17:42:38,236 - root - INFO - step: 31225 loss: 1.8762 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 17:42:38,237 - root - INFO - lr: 1.0235e-05 gnorm: 1.18 [19:08:27< 5:22:44] +[titan] 2025-10-05 17:42:49,232 - root - INFO - step: 31230 loss: 2.0595 memory: 118.84GiB(85.28%) tps: 29,802 tflops: 413.45 mfu: 41.80% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 17:42:49,232 - root - INFO - lr: 1.0229e-05 gnorm: 1.23 [19:08:38< 5:22:33] +[titan] 2025-10-05 17:42:53,769 - root - INFO - Dumping profiler traces at step 31232 +[titan] 2025-10-05 17:42:53,805 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:43:00,360 - root - INFO - step: 31235 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 29,447 tflops: 408.54 mfu: 41.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 17:43:00,360 - root - INFO - lr: 1.0224e-05 gnorm: 1.10 [19:08:49< 5:22:22] +[titan] 2025-10-05 17:43:11,236 - root - INFO - step: 31240 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 17:43:11,236 - root - INFO - lr: 1.0218e-05 gnorm: 1.12 [19:09:00< 5:22:11] +[titan] 2025-10-05 17:43:22,106 - root - INFO - step: 31245 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 17:43:22,106 - root - INFO - lr: 1.0212e-05 gnorm: 1.14 [19:09:10< 5:22:00] +[titan] 2025-10-05 17:43:30,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:43:32,976 - root - INFO - step: 31250 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:43:32,977 - root - INFO - lr: 1.0207e-05 gnorm: 1.15 [19:09:21< 5:21:49] +[titan] 2025-10-05 17:43:43,850 - root - INFO - step: 31255 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:43:43,850 - root - INFO - lr: 1.0201e-05 gnorm: 1.17 [19:09:32< 5:21:38] +[titan] 2025-10-05 17:43:54,726 - root - INFO - step: 31260 loss: 2.0422 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:43:54,727 - root - INFO - lr: 1.0195e-05 gnorm: 1.16 [19:09:43< 5:21:27] +[titan] 2025-10-05 17:44:05,648 - root - INFO - step: 31265 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:44:05,649 - root - INFO - lr: 1.0189e-05 gnorm: 1.18 [19:09:54< 5:21:16] +[titan] 2025-10-05 17:44:16,493 - root - INFO - step: 31270 loss: 1.9624 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 17:44:16,493 - root - INFO - lr: 1.0184e-05 gnorm: 1.11 [19:10:05< 5:21:05] +[titan] 2025-10-05 17:44:27,352 - root - INFO - step: 31275 loss: 1.9671 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 17:44:27,352 - root - INFO - lr: 1.0178e-05 gnorm: 1.17 [19:10:16< 5:20:53] +[titan] 2025-10-05 17:44:38,191 - root - INFO - step: 31280 loss: 1.9559 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:44:38,191 - root - INFO - lr: 1.0172e-05 gnorm: 1.11 [19:10:27< 5:20:42] +[titan] 2025-10-05 17:44:49,058 - root - INFO - step: 31285 loss: 2.0070 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 17:44:49,058 - root - INFO - lr: 1.0167e-05 gnorm: 1.16 [19:10:37< 5:20:31] +[titan] 2025-10-05 17:44:59,922 - root - INFO - step: 31290 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 17:44:59,922 - root - INFO - lr: 1.0161e-05 gnorm: 1.16 [19:10:48< 5:20:20] +[titan] 2025-10-05 17:45:10,831 - root - INFO - step: 31295 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 17:45:10,831 - root - INFO - lr: 1.0155e-05 gnorm: 1.14 [19:10:59< 5:20:09] +[titan] 2025-10-05 17:45:19,501 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:45:21,690 - root - INFO - step: 31300 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:45:21,690 - root - INFO - lr: 1.0150e-05 gnorm: 1.13 [19:11:10< 5:19:58] +[titan] 2025-10-05 17:45:32,589 - root - INFO - step: 31305 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 17:45:32,590 - root - INFO - lr: 1.0144e-05 gnorm: 1.14 [19:11:21< 5:19:47] +[titan] 2025-10-05 17:45:43,458 - root - INFO - step: 31310 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 17:45:43,459 - root - INFO - lr: 1.0138e-05 gnorm: 1.16 [19:11:32< 5:19:36] +[titan] 2025-10-05 17:45:54,309 - root - INFO - step: 31315 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7610 +[titan] 2025-10-05 17:45:54,309 - root - INFO - lr: 1.0133e-05 gnorm: 1.15 [19:11:43< 5:19:25] +[titan] 2025-10-05 17:46:05,142 - root - INFO - step: 31320 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 17:46:05,143 - root - INFO - lr: 1.0127e-05 gnorm: 1.11 [19:11:54< 5:19:14] +[titan] 2025-10-05 17:46:16,012 - root - INFO - step: 31325 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 17:46:16,012 - root - INFO - lr: 1.0121e-05 gnorm: 1.22 [19:12:04< 5:19:03] +[titan] 2025-10-05 17:46:26,886 - root - INFO - step: 31330 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 17:46:26,886 - root - INFO - lr: 1.0116e-05 gnorm: 1.16 [19:12:15< 5:18:52] +[titan] 2025-10-05 17:46:37,770 - root - INFO - step: 31335 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 17:46:37,770 - root - INFO - lr: 1.0110e-05 gnorm: 1.14 [19:12:26< 5:18:40] +[titan] 2025-10-05 17:46:48,608 - root - INFO - step: 31340 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 17:46:48,608 - root - INFO - lr: 1.0104e-05 gnorm: 1.12 [19:12:37< 5:18:29] +[titan] 2025-10-05 17:46:59,446 - root - INFO - step: 31345 loss: 1.9908 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 17:46:59,446 - root - INFO - lr: 1.0099e-05 gnorm: 1.14 [19:12:48< 5:18:18] +[titan] 2025-10-05 17:47:08,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:47:10,307 - root - INFO - step: 31350 loss: 2.0078 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7753 +[titan] 2025-10-05 17:47:10,307 - root - INFO - lr: 1.0093e-05 gnorm: 1.18 [19:12:59< 5:18:07] +[titan] 2025-10-05 17:47:21,149 - root - INFO - step: 31355 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 17:47:21,149 - root - INFO - lr: 1.0087e-05 gnorm: 1.14 [19:13:10< 5:17:56] +[titan] 2025-10-05 17:47:32,020 - root - INFO - step: 31360 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 17:47:32,020 - root - INFO - lr: 1.0082e-05 gnorm: 1.16 [19:13:20< 5:17:45] +[titan] 2025-10-05 17:47:42,860 - root - INFO - step: 31365 loss: 2.0383 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 17:47:42,861 - root - INFO - lr: 1.0076e-05 gnorm: 1.15 [19:13:31< 5:17:34] +[titan] 2025-10-05 17:47:53,707 - root - INFO - step: 31370 loss: 2.0511 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 17:47:53,708 - root - INFO - lr: 1.0070e-05 gnorm: 1.16 [19:13:42< 5:17:23] +[titan] 2025-10-05 17:48:04,561 - root - INFO - step: 31375 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7875 +[titan] 2025-10-05 17:48:04,561 - root - INFO - lr: 1.0065e-05 gnorm: 1.20 [19:13:53< 5:17:12] +[titan] 2025-10-05 17:48:15,405 - root - INFO - step: 31380 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:48:15,405 - root - INFO - lr: 1.0059e-05 gnorm: 1.15 [19:14:04< 5:17:01] +[titan] 2025-10-05 17:48:26,264 - root - INFO - step: 31385 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 17:48:26,265 - root - INFO - lr: 1.0053e-05 gnorm: 1.15 [19:14:15< 5:16:50] +[titan] 2025-10-05 17:48:37,141 - root - INFO - step: 31390 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 17:48:37,141 - root - INFO - lr: 1.0048e-05 gnorm: 1.19 [19:14:25< 5:16:39] +[titan] 2025-10-05 17:48:47,988 - root - INFO - step: 31395 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 17:48:47,988 - root - INFO - lr: 1.0042e-05 gnorm: 1.17 [19:14:36< 5:16:27] +[titan] 2025-10-05 17:48:56,653 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:48:58,841 - root - INFO - step: 31400 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 17:48:58,841 - root - INFO - lr: 1.0036e-05 gnorm: 1.19 [19:14:47< 5:16:16] +[titan] 2025-10-05 17:49:09,687 - root - INFO - step: 31405 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:49:09,687 - root - INFO - lr: 1.0031e-05 gnorm: 1.19 [19:14:58< 5:16:05] +[titan] 2025-10-05 17:49:20,527 - root - INFO - step: 31410 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 17:49:20,527 - root - INFO - lr: 1.0025e-05 gnorm: 1.12 [19:15:09< 5:15:54] +[titan] 2025-10-05 17:49:31,368 - root - INFO - step: 31415 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:49:31,368 - root - INFO - lr: 1.0020e-05 gnorm: 1.12 [19:15:20< 5:15:43] +[titan] 2025-10-05 17:49:42,213 - root - INFO - step: 31420 loss: 1.9250 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7033 +[titan] 2025-10-05 17:49:42,213 - root - INFO - lr: 1.0014e-05 gnorm: 1.15 [19:15:31< 5:15:32] +[titan] 2025-10-05 17:49:53,106 - root - INFO - step: 31425 loss: 1.9352 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 17:49:53,106 - root - INFO - lr: 1.0008e-05 gnorm: 1.16 [19:15:41< 5:15:21] +[titan] 2025-10-05 17:50:03,949 - root - INFO - step: 31430 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:50:03,949 - root - INFO - lr: 1.0003e-05 gnorm: 1.17 [19:15:52< 5:15:10] +[titan] 2025-10-05 17:50:14,831 - root - INFO - step: 31435 loss: 1.9571 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 17:50:14,832 - root - INFO - lr: 9.9971e-06 gnorm: 1.15 [19:16:03< 5:14:59] +[titan] 2025-10-05 17:50:25,694 - root - INFO - step: 31440 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 17:50:25,694 - root - INFO - lr: 9.9915e-06 gnorm: 1.18 [19:16:14< 5:14:48] +[titan] 2025-10-05 17:50:36,538 - root - INFO - step: 31445 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 17:50:36,538 - root - INFO - lr: 9.9859e-06 gnorm: 2.11 [19:16:25< 5:14:37] +[titan] 2025-10-05 17:50:45,222 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:50:47,411 - root - INFO - step: 31450 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:50:47,411 - root - INFO - lr: 9.9803e-06 gnorm: 1.16 [19:16:36< 5:14:26] +[titan] 2025-10-05 17:50:58,315 - root - INFO - step: 31455 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:50:58,315 - root - INFO - lr: 9.9747e-06 gnorm: 1.15 [19:16:47< 5:14:15] +[titan] 2025-10-05 17:51:09,156 - root - INFO - step: 31460 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7552 +[titan] 2025-10-05 17:51:09,156 - root - INFO - lr: 9.9691e-06 gnorm: 1.20 [19:16:58< 5:14:03] +[titan] 2025-10-05 17:51:20,027 - root - INFO - step: 31465 loss: 2.0529 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 17:51:20,027 - root - INFO - lr: 9.9635e-06 gnorm: 1.21 [19:17:08< 5:13:52] +[titan] 2025-10-05 17:51:30,891 - root - INFO - step: 31470 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 17:51:30,891 - root - INFO - lr: 9.9579e-06 gnorm: 1.16 [19:17:19< 5:13:41] +[titan] 2025-10-05 17:51:41,738 - root - INFO - step: 31475 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 17:51:41,739 - root - INFO - lr: 9.9524e-06 gnorm: 1.16 [19:17:30< 5:13:30] +[titan] 2025-10-05 17:51:52,590 - root - INFO - step: 31480 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 17:51:52,590 - root - INFO - lr: 9.9468e-06 gnorm: 1.15 [19:17:41< 5:13:19] +[titan] 2025-10-05 17:52:03,461 - root - INFO - step: 31485 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 17:52:03,461 - root - INFO - lr: 9.9412e-06 gnorm: 1.15 [19:17:52< 5:13:08] +[titan] 2025-10-05 17:52:14,350 - root - INFO - step: 31490 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 17:52:14,350 - root - INFO - lr: 9.9356e-06 gnorm: 1.18 [19:18:03< 5:12:57] +[titan] 2025-10-05 17:52:25,212 - root - INFO - step: 31495 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 17:52:25,212 - root - INFO - lr: 9.9300e-06 gnorm: 1.13 [19:18:14< 5:12:46] +[titan] 2025-10-05 17:52:33,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:52:36,054 - root - INFO - step: 31500 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 17:52:36,054 - root - INFO - lr: 9.9245e-06 gnorm: 1.18 [19:18:24< 5:12:35] +[titan] 2025-10-05 17:52:46,921 - root - INFO - step: 31505 loss: 1.9036 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 17:52:46,921 - root - INFO - lr: 9.9189e-06 gnorm: 1.12 [19:18:35< 5:12:24] +[titan] 2025-10-05 17:52:57,775 - root - INFO - step: 31510 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:52:57,775 - root - INFO - lr: 9.9133e-06 gnorm: 1.14 [19:18:46< 5:12:13] +[titan] 2025-10-05 17:53:08,630 - root - INFO - step: 31515 loss: 1.8954 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6762 +[titan] 2025-10-05 17:53:08,630 - root - INFO - lr: 9.9078e-06 gnorm: 1.14 [19:18:57< 5:12:02] +[titan] 2025-10-05 17:53:19,545 - root - INFO - step: 31520 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 17:53:19,545 - root - INFO - lr: 9.9022e-06 gnorm: 1.13 [19:19:08< 5:11:50] +[titan] 2025-10-05 17:53:30,414 - root - INFO - step: 31525 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:53:30,414 - root - INFO - lr: 9.8966e-06 gnorm: 1.16 [19:19:19< 5:11:39] +[titan] 2025-10-05 17:53:41,302 - root - INFO - step: 31530 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 17:53:41,302 - root - INFO - lr: 9.8911e-06 gnorm: 1.16 [19:19:30< 5:11:28] +[titan] 2025-10-05 17:53:52,151 - root - INFO - step: 31535 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:53:52,151 - root - INFO - lr: 9.8855e-06 gnorm: 1.18 [19:19:40< 5:11:17] +[titan] 2025-10-05 17:54:03,009 - root - INFO - step: 31540 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 17:54:03,009 - root - INFO - lr: 9.8800e-06 gnorm: 1.19 [19:19:51< 5:11:06] +[titan] 2025-10-05 17:54:13,869 - root - INFO - step: 31545 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6744 +[titan] 2025-10-05 17:54:13,869 - root - INFO - lr: 9.8744e-06 gnorm: 1.15 [19:20:02< 5:10:55] +[titan] 2025-10-05 17:54:22,535 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:54:24,751 - root - INFO - step: 31550 loss: 2.0225 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 17:54:24,751 - root - INFO - lr: 9.8689e-06 gnorm: 1.23 [19:20:13< 5:10:44] +[titan] 2025-10-05 17:54:35,610 - root - INFO - step: 31555 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 17:54:35,610 - root - INFO - lr: 9.8633e-06 gnorm: 1.15 [19:20:24< 5:10:33] +[titan] 2025-10-05 17:54:46,473 - root - INFO - step: 31560 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 17:54:46,473 - root - INFO - lr: 9.8578e-06 gnorm: 1.13 [19:20:35< 5:10:22] +[titan] 2025-10-05 17:54:57,341 - root - INFO - step: 31565 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:54:57,341 - root - INFO - lr: 9.8523e-06 gnorm: 1.14 [19:20:46< 5:10:11] +[titan] 2025-10-05 17:55:08,193 - root - INFO - step: 31570 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 17:55:08,193 - root - INFO - lr: 9.8467e-06 gnorm: 1.13 [19:20:57< 5:10:00] +[titan] 2025-10-05 17:55:19,059 - root - INFO - step: 31575 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 17:55:19,059 - root - INFO - lr: 9.8412e-06 gnorm: 1.16 [19:21:07< 5:09:49] +[titan] 2025-10-05 17:55:29,929 - root - INFO - step: 31580 loss: 2.0143 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7811 +[titan] 2025-10-05 17:55:29,929 - root - INFO - lr: 9.8357e-06 gnorm: 1.16 [19:21:18< 5:09:38] +[titan] 2025-10-05 17:55:40,810 - root - INFO - step: 31585 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 17:55:40,810 - root - INFO - lr: 9.8301e-06 gnorm: 1.18 [19:21:29< 5:09:26] +[titan] 2025-10-05 17:55:51,674 - root - INFO - step: 31590 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 17:55:51,674 - root - INFO - lr: 9.8246e-06 gnorm: 1.14 [19:21:40< 5:09:15] +[titan] 2025-10-05 17:56:02,575 - root - INFO - step: 31595 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 17:56:02,575 - root - INFO - lr: 9.8191e-06 gnorm: 1.17 [19:21:51< 5:09:04] +[titan] 2025-10-05 17:56:11,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:56:13,445 - root - INFO - step: 31600 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 17:56:13,445 - root - INFO - lr: 9.8136e-06 gnorm: 1.13 [19:22:02< 5:08:53] +[titan] 2025-10-05 17:56:24,318 - root - INFO - step: 31605 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:56:24,318 - root - INFO - lr: 9.8081e-06 gnorm: 1.14 [19:22:13< 5:08:42] +[titan] 2025-10-05 17:56:35,171 - root - INFO - step: 31610 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 17:56:35,172 - root - INFO - lr: 9.8025e-06 gnorm: 1.19 [19:22:24< 5:08:31] +[titan] 2025-10-05 17:56:46,086 - root - INFO - step: 31615 loss: 2.0603 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:56:46,087 - root - INFO - lr: 9.7970e-06 gnorm: 1.20 [19:22:34< 5:08:20] +[titan] 2025-10-05 17:56:56,970 - root - INFO - step: 31620 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 17:56:56,970 - root - INFO - lr: 9.7915e-06 gnorm: 1.15 [19:22:45< 5:08:09] +[titan] 2025-10-05 17:57:07,861 - root - INFO - step: 31625 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:57:07,862 - root - INFO - lr: 9.7860e-06 gnorm: 1.16 [19:22:56< 5:07:58] +[titan] 2025-10-05 17:57:18,731 - root - INFO - step: 31630 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 17:57:18,731 - root - INFO - lr: 9.7805e-06 gnorm: 1.15 [19:23:07< 5:07:47] +[titan] 2025-10-05 17:57:29,588 - root - INFO - step: 31635 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7639 +[titan] 2025-10-05 17:57:29,588 - root - INFO - lr: 9.7750e-06 gnorm: 1.13 [19:23:18< 5:07:36] +[titan] 2025-10-05 17:57:40,435 - root - INFO - step: 31640 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7360 +[titan] 2025-10-05 17:57:40,435 - root - INFO - lr: 9.7695e-06 gnorm: 1.15 [19:23:29< 5:07:25] +[titan] 2025-10-05 17:57:51,307 - root - INFO - step: 31645 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:57:51,307 - root - INFO - lr: 9.7640e-06 gnorm: 1.15 [19:23:40< 5:07:14] +[titan] 2025-10-05 17:58:00,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:58:02,211 - root - INFO - step: 31650 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 17:58:02,211 - root - INFO - lr: 9.7585e-06 gnorm: 1.14 [19:23:51< 5:07:03] +[titan] 2025-10-05 17:58:13,078 - root - INFO - step: 31655 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:58:13,078 - root - INFO - lr: 9.7531e-06 gnorm: 1.13 [19:24:01< 5:06:51] +[titan] 2025-10-05 17:58:23,965 - root - INFO - step: 31660 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:58:23,966 - root - INFO - lr: 9.7476e-06 gnorm: 1.16 [19:24:12< 5:06:40] +[titan] 2025-10-05 17:58:34,856 - root - INFO - step: 31665 loss: 1.9619 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 17:58:34,857 - root - INFO - lr: 9.7421e-06 gnorm: 1.15 [19:24:23< 5:06:29] +[titan] 2025-10-05 17:58:45,708 - root - INFO - step: 31670 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7203 +[titan] 2025-10-05 17:58:45,708 - root - INFO - lr: 9.7366e-06 gnorm: 1.17 [19:24:34< 5:06:18] +[titan] 2025-10-05 17:58:56,585 - root - INFO - step: 31675 loss: 1.9360 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7121 +[titan] 2025-10-05 17:58:56,586 - root - INFO - lr: 9.7311e-06 gnorm: 1.19 [19:24:45< 5:06:07] +[titan] 2025-10-05 17:59:07,478 - root - INFO - step: 31680 loss: 2.0159 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 17:59:07,478 - root - INFO - lr: 9.7257e-06 gnorm: 1.17 [19:24:56< 5:05:56] +[titan] 2025-10-05 17:59:18,333 - root - INFO - step: 31685 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 17:59:18,333 - root - INFO - lr: 9.7202e-06 gnorm: 1.14 [19:25:07< 5:05:45] +[titan] 2025-10-05 17:59:29,202 - root - INFO - step: 31690 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 17:59:29,202 - root - INFO - lr: 9.7147e-06 gnorm: 1.15 [19:25:18< 5:05:34] +[titan] 2025-10-05 17:59:40,087 - root - INFO - step: 31695 loss: 2.0267 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 17:59:40,087 - root - INFO - lr: 9.7093e-06 gnorm: 1.18 [19:25:28< 5:05:23] +[titan] 2025-10-05 17:59:48,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:59:50,987 - root - INFO - step: 31700 loss: 2.0901 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 17:59:50,987 - root - INFO - lr: 9.7038e-06 gnorm: 1.21 [19:25:39< 5:05:12] +[titan] 2025-10-05 18:00:01,873 - root - INFO - step: 31705 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 18:00:01,873 - root - INFO - lr: 9.6983e-06 gnorm: 1.15 [19:25:50< 5:05:01] +[titan] 2025-10-05 18:00:12,786 - root - INFO - step: 31710 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 18:00:12,786 - root - INFO - lr: 9.6929e-06 gnorm: 1.19 [19:26:01< 5:04:50] +[titan] 2025-10-05 18:00:23,689 - root - INFO - step: 31715 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 18:00:23,689 - root - INFO - lr: 9.6874e-06 gnorm: 1.20 [19:26:12< 5:04:39] +[titan] 2025-10-05 18:00:34,603 - root - INFO - step: 31720 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:00:34,603 - root - INFO - lr: 9.6820e-06 gnorm: 1.17 [19:26:23< 5:04:28] +[titan] 2025-10-05 18:00:45,500 - root - INFO - step: 31725 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 18:00:45,500 - root - INFO - lr: 9.6765e-06 gnorm: 1.17 [19:26:34< 5:04:16] +[titan] 2025-10-05 18:00:56,416 - root - INFO - step: 31730 loss: 1.8776 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6601 +[titan] 2025-10-05 18:00:56,416 - root - INFO - lr: 9.6711e-06 gnorm: 1.13 [19:26:45< 5:04:05] +[titan] 2025-10-05 18:01:07,298 - root - INFO - step: 31735 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:01:07,298 - root - INFO - lr: 9.6656e-06 gnorm: 1.14 [19:26:56< 5:03:54] +[titan] 2025-10-05 18:01:18,193 - root - INFO - step: 31740 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 18:01:18,193 - root - INFO - lr: 9.6602e-06 gnorm: 1.19 [19:27:07< 5:03:43] +[titan] 2025-10-05 18:01:27,261 - root - INFO - Dumping profiler traces at step 31744 +[titan] 2025-10-05 18:01:27,300 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:01:29,483 - root - INFO - step: 31745 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 29,024 tflops: 402.66 mfu: 40.71% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:01:29,484 - root - INFO - lr: 9.6548e-06 gnorm: 1.14 [19:27:18< 5:03:32] +[titan] 2025-10-05 18:01:38,164 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:01:40,345 - root - INFO - step: 31750 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 18:01:40,345 - root - INFO - lr: 9.6493e-06 gnorm: 1.15 [19:27:29< 5:03:21] +[titan] 2025-10-05 18:01:51,212 - root - INFO - step: 31755 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 18:01:51,212 - root - INFO - lr: 9.6439e-06 gnorm: 1.16 [19:27:40< 5:03:10] +[titan] 2025-10-05 18:02:02,067 - root - INFO - step: 31760 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 18:02:02,067 - root - INFO - lr: 9.6385e-06 gnorm: 1.20 [19:27:50< 5:02:59] +[titan] 2025-10-05 18:02:12,939 - root - INFO - step: 31765 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 18:02:12,939 - root - INFO - lr: 9.6330e-06 gnorm: 1.12 [19:28:01< 5:02:48] +[titan] 2025-10-05 18:02:24,117 - root - INFO - step: 31770 loss: 1.9667 memory: 118.84GiB(85.28%) tps: 29,315 tflops: 406.70 mfu: 41.12% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 18:02:24,118 - root - INFO - lr: 9.6276e-06 gnorm: 1.16 [19:28:12< 5:02:37] +[titan] 2025-10-05 18:02:35,008 - root - INFO - step: 31775 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 18:02:35,008 - root - INFO - lr: 9.6222e-06 gnorm: 1.21 [19:28:23< 5:02:26] +[titan] 2025-10-05 18:02:45,869 - root - INFO - step: 31780 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:02:45,869 - root - INFO - lr: 9.6168e-06 gnorm: 1.16 [19:28:34< 5:02:15] +[titan] 2025-10-05 18:02:56,730 - root - INFO - step: 31785 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:02:56,730 - root - INFO - lr: 9.6114e-06 gnorm: 1.17 [19:28:45< 5:02:04] +[titan] 2025-10-05 18:03:07,583 - root - INFO - step: 31790 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 18:03:07,583 - root - INFO - lr: 9.6059e-06 gnorm: 1.14 [19:28:56< 5:01:53] +[titan] 2025-10-05 18:03:18,426 - root - INFO - step: 31795 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 18:03:18,426 - root - INFO - lr: 9.6005e-06 gnorm: 1.12 [19:29:07< 5:01:42] +[titan] 2025-10-05 18:03:27,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:03:29,339 - root - INFO - step: 31800 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 18:03:29,339 - root - INFO - lr: 9.5951e-06 gnorm: 1.13 [19:29:18< 5:01:31] +[titan] 2025-10-05 18:03:40,190 - root - INFO - step: 31805 loss: 1.9797 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:03:40,190 - root - INFO - lr: 9.5897e-06 gnorm: 1.15 [19:29:29< 5:01:20] +[titan] 2025-10-05 18:03:51,090 - root - INFO - step: 31810 loss: 2.0140 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 18:03:51,090 - root - INFO - lr: 9.5843e-06 gnorm: 1.18 [19:29:39< 5:01:08] +[titan] 2025-10-05 18:04:01,948 - root - INFO - step: 31815 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 18:04:01,948 - root - INFO - lr: 9.5789e-06 gnorm: 1.12 [19:29:50< 5:00:57] +[titan] 2025-10-05 18:04:12,805 - root - INFO - step: 31820 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 18:04:12,805 - root - INFO - lr: 9.5735e-06 gnorm: 1.15 [19:30:01< 5:00:46] +[titan] 2025-10-05 18:04:23,715 - root - INFO - step: 31825 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7202 +[titan] 2025-10-05 18:04:23,715 - root - INFO - lr: 9.5681e-06 gnorm: 1.12 [19:30:12< 5:00:35] +[titan] 2025-10-05 18:04:34,585 - root - INFO - step: 31830 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 18:04:34,585 - root - INFO - lr: 9.5628e-06 gnorm: 1.16 [19:30:23< 5:00:24] +[titan] 2025-10-05 18:04:45,454 - root - INFO - step: 31835 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:04:45,454 - root - INFO - lr: 9.5574e-06 gnorm: 1.18 [19:30:34< 5:00:13] +[titan] 2025-10-05 18:04:56,357 - root - INFO - step: 31840 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 18:04:56,357 - root - INFO - lr: 9.5520e-06 gnorm: 1.16 [19:30:45< 5:00:02] +[titan] 2025-10-05 18:05:07,225 - root - INFO - step: 31845 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 18:05:07,226 - root - INFO - lr: 9.5466e-06 gnorm: 1.18 [19:30:56< 4:59:51] +[titan] 2025-10-05 18:05:15,912 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:05:18,103 - root - INFO - step: 31850 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 18:05:18,103 - root - INFO - lr: 9.5412e-06 gnorm: 1.16 [19:31:06< 4:59:40] +[titan] 2025-10-05 18:05:29,031 - root - INFO - step: 31855 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7969 +[titan] 2025-10-05 18:05:29,031 - root - INFO - lr: 9.5359e-06 gnorm: 1.17 [19:31:17< 4:59:29] +[titan] 2025-10-05 18:05:39,898 - root - INFO - step: 31860 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7375 +[titan] 2025-10-05 18:05:39,898 - root - INFO - lr: 9.5305e-06 gnorm: 1.17 [19:31:28< 4:59:18] +[titan] 2025-10-05 18:05:50,764 - root - INFO - step: 31865 loss: 1.9005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6802 +[titan] 2025-10-05 18:05:50,764 - root - INFO - lr: 9.5251e-06 gnorm: 1.14 [19:31:39< 4:59:07] +[titan] 2025-10-05 18:06:01,663 - root - INFO - step: 31870 loss: 1.9427 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:06:01,664 - root - INFO - lr: 9.5197e-06 gnorm: 1.17 [19:31:50< 4:58:56] +[titan] 2025-10-05 18:06:12,506 - root - INFO - step: 31875 loss: 2.0201 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 18:06:12,507 - root - INFO - lr: 9.5144e-06 gnorm: 1.20 [19:32:01< 4:58:45] +[titan] 2025-10-05 18:06:23,358 - root - INFO - step: 31880 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 18:06:23,358 - root - INFO - lr: 9.5090e-06 gnorm: 1.12 [19:32:12< 4:58:33] +[titan] 2025-10-05 18:06:34,258 - root - INFO - step: 31885 loss: 1.8475 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 18:06:34,258 - root - INFO - lr: 9.5037e-06 gnorm: 1.13 [19:32:23< 4:58:22] +[titan] 2025-10-05 18:06:45,106 - root - INFO - step: 31890 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 18:06:45,106 - root - INFO - lr: 9.4983e-06 gnorm: 1.19 [19:32:33< 4:58:11] +[titan] 2025-10-05 18:06:55,965 - root - INFO - step: 31895 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 18:06:55,965 - root - INFO - lr: 9.4930e-06 gnorm: 1.16 [19:32:44< 4:58:00] +[titan] 2025-10-05 18:07:04,625 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:07:06,799 - root - INFO - step: 31900 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:07:06,799 - root - INFO - lr: 9.4876e-06 gnorm: 1.18 [19:32:55< 4:57:49] +[titan] 2025-10-05 18:07:17,698 - root - INFO - step: 31905 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 18:07:17,698 - root - INFO - lr: 9.4823e-06 gnorm: 1.20 [19:33:06< 4:57:38] +[titan] 2025-10-05 18:07:28,596 - root - INFO - step: 31910 loss: 1.9594 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 18:07:28,597 - root - INFO - lr: 9.4769e-06 gnorm: 1.14 [19:33:17< 4:57:27] +[titan] 2025-10-05 18:07:39,465 - root - INFO - step: 31915 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 18:07:39,466 - root - INFO - lr: 9.4716e-06 gnorm: 1.15 [19:33:28< 4:57:16] +[titan] 2025-10-05 18:07:50,320 - root - INFO - step: 31920 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:07:50,320 - root - INFO - lr: 9.4662e-06 gnorm: 1.18 [19:33:39< 4:57:05] +[titan] 2025-10-05 18:08:01,166 - root - INFO - step: 31925 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:08:01,166 - root - INFO - lr: 9.4609e-06 gnorm: 1.19 [19:33:49< 4:56:54] +[titan] 2025-10-05 18:08:12,045 - root - INFO - step: 31930 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 18:08:12,045 - root - INFO - lr: 9.4556e-06 gnorm: 1.11 [19:34:00< 4:56:43] +[titan] 2025-10-05 18:08:22,957 - root - INFO - step: 31935 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7321 +[titan] 2025-10-05 18:08:22,957 - root - INFO - lr: 9.4502e-06 gnorm: 1.14 [19:34:11< 4:56:32] +[titan] 2025-10-05 18:08:33,867 - root - INFO - step: 31940 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7987 +[titan] 2025-10-05 18:08:33,867 - root - INFO - lr: 9.4449e-06 gnorm: 1.15 [19:34:22< 4:56:21] +[titan] 2025-10-05 18:08:44,766 - root - INFO - step: 31945 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 18:08:44,766 - root - INFO - lr: 9.4396e-06 gnorm: 1.16 [19:34:33< 4:56:10] +[titan] 2025-10-05 18:08:53,450 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:08:55,628 - root - INFO - step: 31950 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 18:08:55,628 - root - INFO - lr: 9.4343e-06 gnorm: 1.18 [19:34:44< 4:55:58] +[titan] 2025-10-05 18:09:06,503 - root - INFO - step: 31955 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 18:09:06,503 - root - INFO - lr: 9.4289e-06 gnorm: 1.12 [19:34:55< 4:55:47] +[titan] 2025-10-05 18:09:17,363 - root - INFO - step: 31960 loss: 2.0329 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 18:09:17,363 - root - INFO - lr: 9.4236e-06 gnorm: 1.18 [19:35:06< 4:55:36] +[titan] 2025-10-05 18:09:28,265 - root - INFO - step: 31965 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 18:09:28,265 - root - INFO - lr: 9.4183e-06 gnorm: 1.18 [19:35:17< 4:55:25] +[titan] 2025-10-05 18:09:39,153 - root - INFO - step: 31970 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 18:09:39,153 - root - INFO - lr: 9.4130e-06 gnorm: 1.15 [19:35:27< 4:55:14] +[titan] 2025-10-05 18:09:50,010 - root - INFO - step: 31975 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 18:09:50,010 - root - INFO - lr: 9.4077e-06 gnorm: 1.18 [19:35:38< 4:55:03] +[titan] 2025-10-05 18:10:00,880 - root - INFO - step: 31980 loss: 1.9569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 18:10:00,880 - root - INFO - lr: 9.4024e-06 gnorm: 1.14 [19:35:49< 4:54:52] +[titan] 2025-10-05 18:10:11,742 - root - INFO - step: 31985 loss: 1.9260 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 18:10:11,742 - root - INFO - lr: 9.3971e-06 gnorm: 1.13 [19:36:00< 4:54:41] +[titan] 2025-10-05 18:10:22,613 - root - INFO - step: 31990 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 18:10:22,613 - root - INFO - lr: 9.3918e-06 gnorm: 1.16 [19:36:11< 4:54:30] +[titan] 2025-10-05 18:10:33,523 - root - INFO - step: 31995 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 18:10:33,523 - root - INFO - lr: 9.3865e-06 gnorm: 1.14 [19:36:22< 4:54:19] +[titan] 2025-10-05 18:10:42,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:10:44,426 - root - INFO - step: 32000 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 18:10:44,426 - root - INFO - lr: 9.3812e-06 gnorm: 1.14 [19:36:33< 4:54:08] +[titan] 2025-10-05 18:10:55,314 - root - INFO - step: 32005 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 18:10:55,315 - root - INFO - lr: 9.3759e-06 gnorm: 1.17 [19:36:44< 4:53:57] +[titan] 2025-10-05 18:11:06,182 - root - INFO - step: 32010 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:11:06,182 - root - INFO - lr: 9.3706e-06 gnorm: 1.16 [19:36:54< 4:53:46] +[titan] 2025-10-05 18:11:17,044 - root - INFO - step: 32015 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 18:11:17,044 - root - INFO - lr: 9.3653e-06 gnorm: 1.16 [19:37:05< 4:53:35] +[titan] 2025-10-05 18:11:27,919 - root - INFO - step: 32020 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 18:11:27,919 - root - INFO - lr: 9.3601e-06 gnorm: 1.21 [19:37:16< 4:53:24] +[titan] 2025-10-05 18:11:38,842 - root - INFO - step: 32025 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:11:38,842 - root - INFO - lr: 9.3548e-06 gnorm: 1.19 [19:37:27< 4:53:12] +[titan] 2025-10-05 18:11:49,758 - root - INFO - step: 32030 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7644 +[titan] 2025-10-05 18:11:49,758 - root - INFO - lr: 9.3495e-06 gnorm: 1.19 [19:37:38< 4:53:01] +[titan] 2025-10-05 18:12:00,638 - root - INFO - step: 32035 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:12:00,638 - root - INFO - lr: 9.3442e-06 gnorm: 1.14 [19:37:49< 4:52:50] +[titan] 2025-10-05 18:12:11,546 - root - INFO - step: 32040 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:12:11,547 - root - INFO - lr: 9.3390e-06 gnorm: 1.15 [19:38:00< 4:52:39] +[titan] 2025-10-05 18:12:22,450 - root - INFO - step: 32045 loss: 1.8868 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 18:12:22,450 - root - INFO - lr: 9.3337e-06 gnorm: 1.14 [19:38:11< 4:52:28] +[titan] 2025-10-05 18:12:31,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:12:33,403 - root - INFO - step: 32050 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 18:12:33,403 - root - INFO - lr: 9.3284e-06 gnorm: 1.15 [19:38:22< 4:52:17] +[titan] 2025-10-05 18:12:44,298 - root - INFO - step: 32055 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 18:12:44,298 - root - INFO - lr: 9.3232e-06 gnorm: 1.15 [19:38:33< 4:52:06] +[titan] 2025-10-05 18:12:55,164 - root - INFO - step: 32060 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:12:55,164 - root - INFO - lr: 9.3179e-06 gnorm: 1.16 [19:38:43< 4:51:55] +[titan] 2025-10-05 18:13:06,043 - root - INFO - step: 32065 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7655 +[titan] 2025-10-05 18:13:06,044 - root - INFO - lr: 9.3127e-06 gnorm: 1.15 [19:38:54< 4:51:44] +[titan] 2025-10-05 18:13:16,898 - root - INFO - step: 32070 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 18:13:16,898 - root - INFO - lr: 9.3074e-06 gnorm: 1.18 [19:39:05< 4:51:33] +[titan] 2025-10-05 18:13:27,792 - root - INFO - step: 32075 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 18:13:27,793 - root - INFO - lr: 9.3022e-06 gnorm: 1.19 [19:39:16< 4:51:22] +[titan] 2025-10-05 18:13:38,698 - root - INFO - step: 32080 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 18:13:38,698 - root - INFO - lr: 9.2969e-06 gnorm: 1.19 [19:39:27< 4:51:11] +[titan] 2025-10-05 18:13:49,556 - root - INFO - step: 32085 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 18:13:49,557 - root - INFO - lr: 9.2917e-06 gnorm: 1.17 [19:39:38< 4:51:00] +[titan] 2025-10-05 18:14:00,441 - root - INFO - step: 32090 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 18:14:00,441 - root - INFO - lr: 9.2864e-06 gnorm: 1.16 [19:39:49< 4:50:49] +[titan] 2025-10-05 18:14:11,340 - root - INFO - step: 32095 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 18:14:11,340 - root - INFO - lr: 9.2812e-06 gnorm: 1.14 [19:40:00< 4:50:38] +[titan] 2025-10-05 18:14:20,021 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:14:22,203 - root - INFO - step: 32100 loss: 1.9882 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 18:14:22,203 - root - INFO - lr: 9.2759e-06 gnorm: 1.14 [19:40:10< 4:50:27] +[titan] 2025-10-05 18:14:33,146 - root - INFO - step: 32105 loss: 2.0008 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:14:33,146 - root - INFO - lr: 9.2707e-06 gnorm: 1.18 [19:40:21< 4:50:15] +[titan] 2025-10-05 18:14:44,011 - root - INFO - step: 32110 loss: 1.9522 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:14:44,011 - root - INFO - lr: 9.2655e-06 gnorm: 1.14 [19:40:32< 4:50:04] +[titan] 2025-10-05 18:14:54,863 - root - INFO - step: 32115 loss: 1.9586 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:14:54,864 - root - INFO - lr: 9.2603e-06 gnorm: 1.15 [19:40:43< 4:49:53] +[titan] 2025-10-05 18:15:05,716 - root - INFO - step: 32120 loss: 1.9321 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 18:15:05,716 - root - INFO - lr: 9.2550e-06 gnorm: 1.13 [19:40:54< 4:49:42] +[titan] 2025-10-05 18:15:16,556 - root - INFO - step: 32125 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:15:16,556 - root - INFO - lr: 9.2498e-06 gnorm: 1.15 [19:41:05< 4:49:31] +[titan] 2025-10-05 18:15:27,432 - root - INFO - step: 32130 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 18:15:27,433 - root - INFO - lr: 9.2446e-06 gnorm: 1.20 [19:41:16< 4:49:20] +[titan] 2025-10-05 18:15:38,339 - root - INFO - step: 32135 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7023 +[titan] 2025-10-05 18:15:38,339 - root - INFO - lr: 9.2394e-06 gnorm: 1.17 [19:41:27< 4:49:09] +[titan] 2025-10-05 18:15:49,183 - root - INFO - step: 32140 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 18:15:49,184 - root - INFO - lr: 9.2342e-06 gnorm: 1.19 [19:41:37< 4:48:58] +[titan] 2025-10-05 18:16:00,016 - root - INFO - step: 32145 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:16:00,017 - root - INFO - lr: 9.2290e-06 gnorm: 1.19 [19:41:48< 4:48:47] +[titan] 2025-10-05 18:16:08,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:16:10,859 - root - INFO - step: 32150 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7520 +[titan] 2025-10-05 18:16:10,859 - root - INFO - lr: 9.2237e-06 gnorm: 1.17 [19:41:59< 4:48:36] +[titan] 2025-10-05 18:16:21,712 - root - INFO - step: 32155 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 18:16:21,712 - root - INFO - lr: 9.2185e-06 gnorm: 1.14 [19:42:10< 4:48:25] +[titan] 2025-10-05 18:16:32,617 - root - INFO - step: 32160 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 18:16:32,618 - root - INFO - lr: 9.2133e-06 gnorm: 1.19 [19:42:21< 4:48:14] +[titan] 2025-10-05 18:16:43,525 - root - INFO - step: 32165 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 18:16:43,525 - root - INFO - lr: 9.2081e-06 gnorm: 1.20 [19:42:32< 4:48:03] +[titan] 2025-10-05 18:16:54,419 - root - INFO - step: 32170 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 18:16:54,419 - root - INFO - lr: 9.2029e-06 gnorm: 1.15 [19:42:43< 4:47:52] +[titan] 2025-10-05 18:17:05,289 - root - INFO - step: 32175 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 18:17:05,289 - root - INFO - lr: 9.1978e-06 gnorm: 1.15 [19:42:54< 4:47:40] +[titan] 2025-10-05 18:17:16,163 - root - INFO - step: 32180 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 18:17:16,163 - root - INFO - lr: 9.1926e-06 gnorm: 1.17 [19:43:04< 4:47:29] +[titan] 2025-10-05 18:17:27,035 - root - INFO - step: 32185 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7293 +[titan] 2025-10-05 18:17:27,035 - root - INFO - lr: 9.1874e-06 gnorm: 1.11 [19:43:15< 4:47:18] +[titan] 2025-10-05 18:17:37,986 - root - INFO - step: 32190 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:17:37,987 - root - INFO - lr: 9.1822e-06 gnorm: 1.20 [19:43:26< 4:47:07] +[titan] 2025-10-05 18:17:48,863 - root - INFO - step: 32195 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 18:17:48,863 - root - INFO - lr: 9.1770e-06 gnorm: 1.14 [19:43:37< 4:46:56] +[titan] 2025-10-05 18:17:57,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:17:59,752 - root - INFO - step: 32200 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 18:17:59,752 - root - INFO - lr: 9.1718e-06 gnorm: 1.14 [19:43:48< 4:46:45] +[titan] 2025-10-05 18:18:10,633 - root - INFO - step: 32205 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 18:18:10,633 - root - INFO - lr: 9.1667e-06 gnorm: 1.16 [19:43:59< 4:46:34] +[titan] 2025-10-05 18:18:21,504 - root - INFO - step: 32210 loss: 1.9549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 18:18:21,505 - root - INFO - lr: 9.1615e-06 gnorm: 1.14 [19:44:10< 4:46:23] +[titan] 2025-10-05 18:18:32,363 - root - INFO - step: 32215 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 18:18:32,363 - root - INFO - lr: 9.1563e-06 gnorm: 1.15 [19:44:21< 4:46:12] +[titan] 2025-10-05 18:18:43,287 - root - INFO - step: 32220 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 18:18:43,287 - root - INFO - lr: 9.1512e-06 gnorm: 1.16 [19:44:32< 4:46:01] +[titan] 2025-10-05 18:18:54,183 - root - INFO - step: 32225 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 18:18:54,183 - root - INFO - lr: 9.1460e-06 gnorm: 1.14 [19:44:42< 4:45:50] +[titan] 2025-10-05 18:19:05,065 - root - INFO - step: 32230 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:19:05,066 - root - INFO - lr: 9.1408e-06 gnorm: 1.13 [19:44:53< 4:45:39] +[titan] 2025-10-05 18:19:15,931 - root - INFO - step: 32235 loss: 1.9942 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:19:15,932 - root - INFO - lr: 9.1357e-06 gnorm: 1.19 [19:45:04< 4:45:28] +[titan] 2025-10-05 18:19:26,783 - root - INFO - step: 32240 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 18:19:26,783 - root - INFO - lr: 9.1305e-06 gnorm: 1.16 [19:45:15< 4:45:17] +[titan] 2025-10-05 18:19:37,703 - root - INFO - step: 32245 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:19:37,703 - root - INFO - lr: 9.1254e-06 gnorm: 1.16 [19:45:26< 4:45:06] +[titan] 2025-10-05 18:19:46,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:19:48,556 - root - INFO - step: 32250 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 18:19:48,556 - root - INFO - lr: 9.1202e-06 gnorm: 1.15 [19:45:37< 4:44:55] +[titan] 2025-10-05 18:19:59,512 - root - INFO - step: 32255 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:19:59,512 - root - INFO - lr: 9.1151e-06 gnorm: 1.19 [19:45:48< 4:44:43] +[titan] 2025-10-05 18:20:01,875 - root - INFO - Dumping profiler traces at step 32256 +[titan] 2025-10-05 18:20:01,914 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:20:10,612 - root - INFO - step: 32260 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.57 mfu: 41.41% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 18:20:10,612 - root - INFO - lr: 9.1099e-06 gnorm: 1.15 [19:45:59< 4:44:32] +[titan] 2025-10-05 18:20:21,505 - root - INFO - step: 32265 loss: 1.9661 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 18:20:21,505 - root - INFO - lr: 9.1048e-06 gnorm: 1.18 [19:46:10< 4:44:21] +[titan] 2025-10-05 18:20:32,390 - root - INFO - step: 32270 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 18:20:32,390 - root - INFO - lr: 9.0996e-06 gnorm: 1.19 [19:46:21< 4:44:10] +[titan] 2025-10-05 18:20:43,343 - root - INFO - step: 32275 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 18:20:43,343 - root - INFO - lr: 9.0945e-06 gnorm: 1.17 [19:46:32< 4:43:59] +[titan] 2025-10-05 18:20:54,195 - root - INFO - step: 32280 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 18:20:54,195 - root - INFO - lr: 9.0894e-06 gnorm: 1.13 [19:46:42< 4:43:48] +[titan] 2025-10-05 18:21:05,056 - root - INFO - step: 32285 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:21:05,056 - root - INFO - lr: 9.0842e-06 gnorm: 1.14 [19:46:53< 4:43:37] +[titan] 2025-10-05 18:21:15,905 - root - INFO - step: 32290 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 18:21:15,905 - root - INFO - lr: 9.0791e-06 gnorm: 1.14 [19:47:04< 4:43:26] +[titan] 2025-10-05 18:21:26,822 - root - INFO - step: 32295 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:21:26,822 - root - INFO - lr: 9.0740e-06 gnorm: 1.15 [19:47:15< 4:43:15] +[titan] 2025-10-05 18:21:35,510 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:21:37,747 - root - INFO - step: 32300 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:21:37,747 - root - INFO - lr: 9.0689e-06 gnorm: 1.15 [19:47:26< 4:43:04] +[titan] 2025-10-05 18:21:48,651 - root - INFO - step: 32305 loss: 1.9420 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:21:48,651 - root - INFO - lr: 9.0638e-06 gnorm: 1.13 [19:47:37< 4:42:53] +[titan] 2025-10-05 18:21:59,526 - root - INFO - step: 32310 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:21:59,526 - root - INFO - lr: 9.0586e-06 gnorm: 1.20 [19:47:48< 4:42:42] +[titan] 2025-10-05 18:22:10,410 - root - INFO - step: 32315 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 18:22:10,410 - root - INFO - lr: 9.0535e-06 gnorm: 1.16 [19:47:59< 4:42:31] +[titan] 2025-10-05 18:22:21,310 - root - INFO - step: 32320 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6910 +[titan] 2025-10-05 18:22:21,310 - root - INFO - lr: 9.0484e-06 gnorm: 1.16 [19:48:10< 4:42:20] +[titan] 2025-10-05 18:22:32,228 - root - INFO - step: 32325 loss: 1.9625 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 18:22:32,229 - root - INFO - lr: 9.0433e-06 gnorm: 1.17 [19:48:20< 4:42:09] +[titan] 2025-10-05 18:22:43,163 - root - INFO - step: 32330 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 18:22:43,164 - root - INFO - lr: 9.0382e-06 gnorm: 1.16 [19:48:31< 4:41:58] +[titan] 2025-10-05 18:22:54,059 - root - INFO - step: 32335 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:22:54,059 - root - INFO - lr: 9.0331e-06 gnorm: 1.18 [19:48:42< 4:41:47] +[titan] 2025-10-05 18:23:04,937 - root - INFO - step: 32340 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 18:23:04,938 - root - INFO - lr: 9.0280e-06 gnorm: 1.22 [19:48:53< 4:41:35] +[titan] 2025-10-05 18:23:15,809 - root - INFO - step: 32345 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:23:15,809 - root - INFO - lr: 9.0229e-06 gnorm: 1.18 [19:49:04< 4:41:24] +[titan] 2025-10-05 18:23:24,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:23:26,693 - root - INFO - step: 32350 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 18:23:26,693 - root - INFO - lr: 9.0178e-06 gnorm: 1.24 [19:49:15< 4:41:13] +[titan] 2025-10-05 18:23:37,572 - root - INFO - step: 32355 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:23:37,573 - root - INFO - lr: 9.0127e-06 gnorm: 1.15 [19:49:26< 4:41:02] +[titan] 2025-10-05 18:23:48,530 - root - INFO - step: 32360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 18:23:48,530 - root - INFO - lr: 9.0077e-06 gnorm: 1.15 [19:49:37< 4:40:51] +[titan] 2025-10-05 18:23:59,408 - root - INFO - step: 32365 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 18:23:59,408 - root - INFO - lr: 9.0026e-06 gnorm: 1.20 [19:49:48< 4:40:40] +[titan] 2025-10-05 18:24:10,292 - root - INFO - step: 32370 loss: 1.9796 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 18:24:10,292 - root - INFO - lr: 8.9975e-06 gnorm: 1.14 [19:49:59< 4:40:29] +[titan] 2025-10-05 18:24:21,173 - root - INFO - step: 32375 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:24:21,173 - root - INFO - lr: 8.9924e-06 gnorm: 1.18 [19:50:09< 4:40:18] +[titan] 2025-10-05 18:24:32,033 - root - INFO - step: 32380 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7551 +[titan] 2025-10-05 18:24:32,033 - root - INFO - lr: 8.9873e-06 gnorm: 1.17 [19:50:20< 4:40:07] +[titan] 2025-10-05 18:24:42,932 - root - INFO - step: 32385 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 18:24:42,932 - root - INFO - lr: 8.9823e-06 gnorm: 1.17 [19:50:31< 4:39:56] +[titan] 2025-10-05 18:24:53,801 - root - INFO - step: 32390 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 18:24:53,801 - root - INFO - lr: 8.9772e-06 gnorm: 1.16 [19:50:42< 4:39:45] +[titan] 2025-10-05 18:25:04,696 - root - INFO - step: 32395 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 18:25:04,696 - root - INFO - lr: 8.9721e-06 gnorm: 1.18 [19:50:53< 4:39:34] +[titan] 2025-10-05 18:25:13,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:25:15,566 - root - INFO - step: 32400 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:25:15,566 - root - INFO - lr: 8.9671e-06 gnorm: 1.16 [19:51:04< 4:39:23] +[titan] 2025-10-05 18:25:26,448 - root - INFO - step: 32405 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 18:25:26,449 - root - INFO - lr: 8.9620e-06 gnorm: 1.13 [19:51:15< 4:39:12] +[titan] 2025-10-05 18:25:37,323 - root - INFO - step: 32410 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 18:25:37,323 - root - INFO - lr: 8.9570e-06 gnorm: 1.15 [19:51:26< 4:39:01] +[titan] 2025-10-05 18:25:48,505 - root - INFO - step: 32415 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 29,304 tflops: 406.54 mfu: 41.11% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 18:25:48,505 - root - INFO - lr: 8.9519e-06 gnorm: 1.17 [19:51:37< 4:38:50] +[titan] 2025-10-05 18:25:59,396 - root - INFO - step: 32420 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 18:25:59,396 - root - INFO - lr: 8.9469e-06 gnorm: 1.17 [19:51:48< 4:38:39] +[titan] 2025-10-05 18:26:10,310 - root - INFO - step: 32425 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 18:26:10,310 - root - INFO - lr: 8.9418e-06 gnorm: 1.15 [19:51:59< 4:38:28] +[titan] 2025-10-05 18:26:21,195 - root - INFO - step: 32430 loss: 1.9222 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 18:26:21,195 - root - INFO - lr: 8.9368e-06 gnorm: 1.14 [19:52:09< 4:38:16] +[titan] 2025-10-05 18:26:32,089 - root - INFO - step: 32435 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:26:32,089 - root - INFO - lr: 8.9317e-06 gnorm: 1.17 [19:52:20< 4:38:05] +[titan] 2025-10-05 18:26:42,997 - root - INFO - step: 32440 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:26:42,997 - root - INFO - lr: 8.9267e-06 gnorm: 1.18 [19:52:31< 4:37:54] +[titan] 2025-10-05 18:26:53,888 - root - INFO - step: 32445 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 18:26:53,888 - root - INFO - lr: 8.9217e-06 gnorm: 1.18 [19:52:42< 4:37:43] +[titan] 2025-10-05 18:27:02,624 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:27:04,807 - root - INFO - step: 32450 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 18:27:04,807 - root - INFO - lr: 8.9166e-06 gnorm: 1.18 [19:52:53< 4:37:32] +[titan] 2025-10-05 18:27:15,706 - root - INFO - step: 32455 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7589 +[titan] 2025-10-05 18:27:15,706 - root - INFO - lr: 8.9116e-06 gnorm: 1.15 [19:53:04< 4:37:21] +[titan] 2025-10-05 18:27:26,608 - root - INFO - step: 32460 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 18:27:26,608 - root - INFO - lr: 8.9066e-06 gnorm: 1.14 [19:53:15< 4:37:10] +[titan] 2025-10-05 18:27:37,484 - root - INFO - step: 32465 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 18:27:37,485 - root - INFO - lr: 8.9015e-06 gnorm: 1.11 [19:53:26< 4:36:59] +[titan] 2025-10-05 18:27:48,368 - root - INFO - step: 32470 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:27:48,368 - root - INFO - lr: 8.8965e-06 gnorm: 1.13 [19:53:37< 4:36:48] +[titan] 2025-10-05 18:27:59,231 - root - INFO - step: 32475 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 18:27:59,231 - root - INFO - lr: 8.8915e-06 gnorm: 1.15 [19:53:47< 4:36:37] +[titan] 2025-10-05 18:28:10,108 - root - INFO - step: 32480 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7775 +[titan] 2025-10-05 18:28:10,108 - root - INFO - lr: 8.8865e-06 gnorm: 1.18 [19:53:58< 4:36:26] +[titan] 2025-10-05 18:28:20,988 - root - INFO - step: 32485 loss: 1.9823 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 18:28:20,988 - root - INFO - lr: 8.8815e-06 gnorm: 1.15 [19:54:09< 4:36:15] +[titan] 2025-10-05 18:28:31,851 - root - INFO - step: 32490 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 18:28:31,851 - root - INFO - lr: 8.8765e-06 gnorm: 1.16 [19:54:20< 4:36:04] +[titan] 2025-10-05 18:28:42,716 - root - INFO - step: 32495 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:28:42,716 - root - INFO - lr: 8.8715e-06 gnorm: 1.16 [19:54:31< 4:35:53] +[titan] 2025-10-05 18:28:51,431 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:28:53,617 - root - INFO - step: 32500 loss: 1.9959 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 18:28:53,617 - root - INFO - lr: 8.8665e-06 gnorm: 1.21 [19:54:42< 4:35:42] +[titan] 2025-10-05 18:29:04,488 - root - INFO - step: 32505 loss: 1.9052 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6852 +[titan] 2025-10-05 18:29:04,489 - root - INFO - lr: 8.8615e-06 gnorm: 1.15 [19:54:53< 4:35:31] +[titan] 2025-10-05 18:29:15,355 - root - INFO - step: 32510 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 18:29:15,355 - root - INFO - lr: 8.8565e-06 gnorm: 1.20 [19:55:04< 4:35:19] +[titan] 2025-10-05 18:29:26,197 - root - INFO - step: 32515 loss: 1.9015 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 18:29:26,198 - root - INFO - lr: 8.8515e-06 gnorm: 1.16 [19:55:14< 4:35:08] +[titan] 2025-10-05 18:29:37,043 - root - INFO - step: 32520 loss: 1.9322 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:29:37,043 - root - INFO - lr: 8.8465e-06 gnorm: 1.14 [19:55:25< 4:34:57] +[titan] 2025-10-05 18:29:47,915 - root - INFO - step: 32525 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:29:47,915 - root - INFO - lr: 8.8415e-06 gnorm: 1.17 [19:55:36< 4:34:46] +[titan] 2025-10-05 18:29:58,786 - root - INFO - step: 32530 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:29:58,786 - root - INFO - lr: 8.8365e-06 gnorm: 1.14 [19:55:47< 4:34:35] +[titan] 2025-10-05 18:30:09,635 - root - INFO - step: 32535 loss: 1.9367 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 18:30:09,635 - root - INFO - lr: 8.8315e-06 gnorm: 1.18 [19:55:58< 4:34:24] +[titan] 2025-10-05 18:30:20,517 - root - INFO - step: 32540 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:30:20,517 - root - INFO - lr: 8.8265e-06 gnorm: 1.22 [19:56:09< 4:34:13] +[titan] 2025-10-05 18:30:31,388 - root - INFO - step: 32545 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 18:30:31,388 - root - INFO - lr: 8.8216e-06 gnorm: 1.18 [19:56:20< 4:34:02] +[titan] 2025-10-05 18:30:40,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:30:42,279 - root - INFO - step: 32550 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 18:30:42,280 - root - INFO - lr: 8.8166e-06 gnorm: 1.22 [19:56:31< 4:33:51] +[titan] 2025-10-05 18:30:53,167 - root - INFO - step: 32555 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7816 +[titan] 2025-10-05 18:30:53,168 - root - INFO - lr: 8.8116e-06 gnorm: 1.16 [19:56:41< 4:33:40] +[titan] 2025-10-05 18:31:04,037 - root - INFO - step: 32560 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7457 +[titan] 2025-10-05 18:31:04,037 - root - INFO - lr: 8.8066e-06 gnorm: 1.15 [19:56:52< 4:33:29] +[titan] 2025-10-05 18:31:14,905 - root - INFO - step: 32565 loss: 2.0104 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 18:31:14,905 - root - INFO - lr: 8.8017e-06 gnorm: 1.18 [19:57:03< 4:33:18] +[titan] 2025-10-05 18:31:25,750 - root - INFO - step: 32570 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:31:25,750 - root - INFO - lr: 8.7967e-06 gnorm: 1.18 [19:57:14< 4:33:07] +[titan] 2025-10-05 18:31:36,615 - root - INFO - step: 32575 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 18:31:36,615 - root - INFO - lr: 8.7917e-06 gnorm: 1.21 [19:57:25< 4:32:56] +[titan] 2025-10-05 18:31:47,505 - root - INFO - step: 32580 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 18:31:47,505 - root - INFO - lr: 8.7868e-06 gnorm: 1.18 [19:57:36< 4:32:45] +[titan] 2025-10-05 18:31:58,405 - root - INFO - step: 32585 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 18:31:58,406 - root - INFO - lr: 8.7818e-06 gnorm: 1.15 [19:57:47< 4:32:33] +[titan] 2025-10-05 18:32:09,289 - root - INFO - step: 32590 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 18:32:09,289 - root - INFO - lr: 8.7769e-06 gnorm: 1.17 [19:57:58< 4:32:22] +[titan] 2025-10-05 18:32:20,143 - root - INFO - step: 32595 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 18:32:20,143 - root - INFO - lr: 8.7719e-06 gnorm: 1.15 [19:58:08< 4:32:11] +[titan] 2025-10-05 18:32:28,834 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:32:31,015 - root - INFO - step: 32600 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:32:31,015 - root - INFO - lr: 8.7670e-06 gnorm: 1.15 [19:58:19< 4:32:00] +[titan] 2025-10-05 18:32:41,866 - root - INFO - step: 32605 loss: 1.9357 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 18:32:41,866 - root - INFO - lr: 8.7621e-06 gnorm: 1.17 [19:58:30< 4:31:49] +[titan] 2025-10-05 18:32:52,744 - root - INFO - step: 32610 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 18:32:52,744 - root - INFO - lr: 8.7571e-06 gnorm: 1.17 [19:58:41< 4:31:38] +[titan] 2025-10-05 18:33:03,626 - root - INFO - step: 32615 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:33:03,626 - root - INFO - lr: 8.7522e-06 gnorm: 1.16 [19:58:52< 4:31:27] +[titan] 2025-10-05 18:33:14,510 - root - INFO - step: 32620 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 18:33:14,510 - root - INFO - lr: 8.7472e-06 gnorm: 1.19 [19:59:03< 4:31:16] +[titan] 2025-10-05 18:33:25,381 - root - INFO - step: 32625 loss: 1.9774 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 18:33:25,381 - root - INFO - lr: 8.7423e-06 gnorm: 1.16 [19:59:14< 4:31:05] +[titan] 2025-10-05 18:33:36,243 - root - INFO - step: 32630 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 18:33:36,243 - root - INFO - lr: 8.7374e-06 gnorm: 1.17 [19:59:24< 4:30:54] +[titan] 2025-10-05 18:33:47,116 - root - INFO - step: 32635 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:33:47,116 - root - INFO - lr: 8.7325e-06 gnorm: 1.15 [19:59:35< 4:30:43] +[titan] 2025-10-05 18:33:58,057 - root - INFO - step: 32640 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:33:58,058 - root - INFO - lr: 8.7275e-06 gnorm: 1.20 [19:59:46< 4:30:32] +[titan] 2025-10-05 18:34:08,946 - root - INFO - step: 32645 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:34:08,946 - root - INFO - lr: 8.7226e-06 gnorm: 1.17 [19:59:57< 4:30:21] +[titan] 2025-10-05 18:34:17,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:34:19,814 - root - INFO - step: 32650 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7704 +[titan] 2025-10-05 18:34:19,814 - root - INFO - lr: 8.7177e-06 gnorm: 1.14 [20:00:08< 4:30:10] +[titan] 2025-10-05 18:34:30,684 - root - INFO - step: 32655 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 18:34:30,685 - root - INFO - lr: 8.7128e-06 gnorm: 1.21 [20:00:19< 4:29:59] +[titan] 2025-10-05 18:34:41,540 - root - INFO - step: 32660 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 18:34:41,540 - root - INFO - lr: 8.7079e-06 gnorm: 1.18 [20:00:30< 4:29:48] +[titan] 2025-10-05 18:34:52,419 - root - INFO - step: 32665 loss: 1.9116 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 18:34:52,419 - root - INFO - lr: 8.7030e-06 gnorm: 1.19 [20:00:41< 4:29:37] +[titan] 2025-10-05 18:35:03,284 - root - INFO - step: 32670 loss: 1.9841 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 18:35:03,284 - root - INFO - lr: 8.6981e-06 gnorm: 1.23 [20:00:52< 4:29:25] +[titan] 2025-10-05 18:35:14,164 - root - INFO - step: 32675 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:35:14,164 - root - INFO - lr: 8.6932e-06 gnorm: 1.14 [20:01:02< 4:29:14] +[titan] 2025-10-05 18:35:25,030 - root - INFO - step: 32680 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 18:35:25,030 - root - INFO - lr: 8.6883e-06 gnorm: 1.17 [20:01:13< 4:29:03] +[titan] 2025-10-05 18:35:35,901 - root - INFO - step: 32685 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 18:35:35,901 - root - INFO - lr: 8.6834e-06 gnorm: 1.19 [20:01:24< 4:28:52] +[titan] 2025-10-05 18:35:46,769 - root - INFO - step: 32690 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 18:35:46,769 - root - INFO - lr: 8.6785e-06 gnorm: 1.17 [20:01:35< 4:28:41] +[titan] 2025-10-05 18:35:57,643 - root - INFO - step: 32695 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 18:35:57,644 - root - INFO - lr: 8.6736e-06 gnorm: 1.18 [20:01:46< 4:28:30] +[titan] 2025-10-05 18:36:06,324 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:36:08,504 - root - INFO - step: 32700 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 18:36:08,504 - root - INFO - lr: 8.6687e-06 gnorm: 1.16 [20:01:57< 4:28:19] +[titan] 2025-10-05 18:36:19,368 - root - INFO - step: 32705 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6516 +[titan] 2025-10-05 18:36:19,368 - root - INFO - lr: 8.6638e-06 gnorm: 1.12 [20:02:08< 4:28:08] +[titan] 2025-10-05 18:36:30,228 - root - INFO - step: 32710 loss: 1.9004 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:36:30,228 - root - INFO - lr: 8.6590e-06 gnorm: 1.17 [20:02:18< 4:27:57] +[titan] 2025-10-05 18:36:41,098 - root - INFO - step: 32715 loss: 1.9595 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7326 +[titan] 2025-10-05 18:36:41,098 - root - INFO - lr: 8.6541e-06 gnorm: 1.21 [20:02:29< 4:27:46] +[titan] 2025-10-05 18:36:51,987 - root - INFO - step: 32720 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 18:36:51,987 - root - INFO - lr: 8.6492e-06 gnorm: 1.15 [20:02:40< 4:27:35] +[titan] 2025-10-05 18:37:02,853 - root - INFO - step: 32725 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7097 +[titan] 2025-10-05 18:37:02,853 - root - INFO - lr: 8.6443e-06 gnorm: 1.18 [20:02:51< 4:27:24] +[titan] 2025-10-05 18:37:13,725 - root - INFO - step: 32730 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 18:37:13,725 - root - INFO - lr: 8.6395e-06 gnorm: 1.18 [20:03:02< 4:27:13] +[titan] 2025-10-05 18:37:24,602 - root - INFO - step: 32735 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 18:37:24,603 - root - INFO - lr: 8.6346e-06 gnorm: 1.18 [20:03:13< 4:27:02] +[titan] 2025-10-05 18:37:35,486 - root - INFO - step: 32740 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 18:37:35,486 - root - INFO - lr: 8.6297e-06 gnorm: 1.15 [20:03:24< 4:26:51] +[titan] 2025-10-05 18:37:46,374 - root - INFO - step: 32745 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 18:37:46,374 - root - INFO - lr: 8.6249e-06 gnorm: 1.18 [20:03:35< 4:26:40] +[titan] 2025-10-05 18:37:55,081 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:37:57,268 - root - INFO - step: 32750 loss: 1.9951 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 18:37:57,268 - root - INFO - lr: 8.6200e-06 gnorm: 1.16 [20:03:45< 4:26:28] +[titan] 2025-10-05 18:38:08,165 - root - INFO - step: 32755 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 18:38:08,165 - root - INFO - lr: 8.6152e-06 gnorm: 1.15 [20:03:56< 4:26:17] +[titan] 2025-10-05 18:38:19,048 - root - INFO - step: 32760 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 18:38:19,048 - root - INFO - lr: 8.6103e-06 gnorm: 1.17 [20:04:07< 4:26:06] +[titan] 2025-10-05 18:38:30,021 - root - INFO - step: 32765 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 29,864 tflops: 414.31 mfu: 41.89% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 18:38:30,021 - root - INFO - lr: 8.6055e-06 gnorm: 1.19 [20:04:18< 4:25:55] +[titan] 2025-10-05 18:38:36,735 - root - INFO - Dumping profiler traces at step 32768 +[titan] 2025-10-05 18:38:36,774 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:38:41,140 - root - INFO - step: 32770 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 18:38:41,140 - root - INFO - lr: 8.6006e-06 gnorm: 1.18 [20:04:29< 4:25:44] +[titan] 2025-10-05 18:38:52,028 - root - INFO - step: 32775 loss: 1.8866 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 18:38:52,028 - root - INFO - lr: 8.5958e-06 gnorm: 1.16 [20:04:40< 4:25:33] +[titan] 2025-10-05 18:39:02,921 - root - INFO - step: 32780 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 18:39:02,921 - root - INFO - lr: 8.5909e-06 gnorm: 1.17 [20:04:51< 4:25:22] +[titan] 2025-10-05 18:39:13,797 - root - INFO - step: 32785 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 18:39:13,797 - root - INFO - lr: 8.5861e-06 gnorm: 1.19 [20:05:02< 4:25:11] +[titan] 2025-10-05 18:39:24,687 - root - INFO - step: 32790 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 18:39:24,688 - root - INFO - lr: 8.5813e-06 gnorm: 1.18 [20:05:13< 4:25:00] +[titan] 2025-10-05 18:39:35,548 - root - INFO - step: 32795 loss: 1.9151 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 18:39:35,548 - root - INFO - lr: 8.5764e-06 gnorm: 1.17 [20:05:24< 4:24:49] +[titan] 2025-10-05 18:39:44,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:39:46,421 - root - INFO - step: 32800 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:39:46,421 - root - INFO - lr: 8.5716e-06 gnorm: 1.14 [20:05:35< 4:24:38] +[titan] 2025-10-05 18:39:57,301 - root - INFO - step: 32805 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 18:39:57,301 - root - INFO - lr: 8.5668e-06 gnorm: 1.20 [20:05:46< 4:24:27] +[titan] 2025-10-05 18:40:08,174 - root - INFO - step: 32810 loss: 1.8700 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6536 +[titan] 2025-10-05 18:40:08,175 - root - INFO - lr: 8.5620e-06 gnorm: 1.15 [20:05:56< 4:24:16] +[titan] 2025-10-05 18:40:19,054 - root - INFO - step: 32815 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 18:40:19,054 - root - INFO - lr: 8.5572e-06 gnorm: 1.17 [20:06:07< 4:24:05] +[titan] 2025-10-05 18:40:29,922 - root - INFO - step: 32820 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:40:29,922 - root - INFO - lr: 8.5523e-06 gnorm: 1.20 [20:06:18< 4:23:54] +[titan] 2025-10-05 18:40:40,814 - root - INFO - step: 32825 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7226 +[titan] 2025-10-05 18:40:40,814 - root - INFO - lr: 8.5475e-06 gnorm: 1.14 [20:06:29< 4:23:43] +[titan] 2025-10-05 18:40:51,678 - root - INFO - step: 32830 loss: 1.9398 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:40:51,678 - root - INFO - lr: 8.5427e-06 gnorm: 1.23 [20:06:40< 4:23:32] +[titan] 2025-10-05 18:41:02,570 - root - INFO - step: 32835 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:41:02,570 - root - INFO - lr: 8.5379e-06 gnorm: 1.15 [20:06:51< 4:23:21] +[titan] 2025-10-05 18:41:13,446 - root - INFO - step: 32840 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 18:41:13,447 - root - INFO - lr: 8.5331e-06 gnorm: 1.21 [20:07:02< 4:23:09] +[titan] 2025-10-05 18:41:24,359 - root - INFO - step: 32845 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:41:24,359 - root - INFO - lr: 8.5283e-06 gnorm: 1.16 [20:07:13< 4:22:58] +[titan] 2025-10-05 18:41:33,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:41:35,240 - root - INFO - step: 32850 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:41:35,240 - root - INFO - lr: 8.5235e-06 gnorm: 1.14 [20:07:23< 4:22:47] +[titan] 2025-10-05 18:41:46,124 - root - INFO - step: 32855 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 18:41:46,125 - root - INFO - lr: 8.5187e-06 gnorm: 1.17 [20:07:34< 4:22:36] +[titan] 2025-10-05 18:41:56,993 - root - INFO - step: 32860 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:41:56,993 - root - INFO - lr: 8.5139e-06 gnorm: 1.22 [20:07:45< 4:22:25] +[titan] 2025-10-05 18:42:07,859 - root - INFO - step: 32865 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 18:42:07,859 - root - INFO - lr: 8.5091e-06 gnorm: 1.20 [20:07:56< 4:22:14] +[titan] 2025-10-05 18:42:18,752 - root - INFO - step: 32870 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 18:42:18,752 - root - INFO - lr: 8.5044e-06 gnorm: 1.13 [20:08:07< 4:22:03] +[titan] 2025-10-05 18:42:29,644 - root - INFO - step: 32875 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 18:42:29,644 - root - INFO - lr: 8.4996e-06 gnorm: 1.19 [20:08:18< 4:21:52] +[titan] 2025-10-05 18:42:40,538 - root - INFO - step: 32880 loss: 1.9506 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 18:42:40,539 - root - INFO - lr: 8.4948e-06 gnorm: 1.15 [20:08:29< 4:21:41] +[titan] 2025-10-05 18:42:51,405 - root - INFO - step: 32885 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:42:51,405 - root - INFO - lr: 8.4900e-06 gnorm: 1.14 [20:08:40< 4:21:30] +[titan] 2025-10-05 18:43:02,281 - root - INFO - step: 32890 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 18:43:02,281 - root - INFO - lr: 8.4853e-06 gnorm: 1.17 [20:08:50< 4:21:19] +[titan] 2025-10-05 18:43:13,144 - root - INFO - step: 32895 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7334 +[titan] 2025-10-05 18:43:13,144 - root - INFO - lr: 8.4805e-06 gnorm: 1.22 [20:09:01< 4:21:08] +[titan] 2025-10-05 18:43:21,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:43:24,029 - root - INFO - step: 32900 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:43:24,030 - root - INFO - lr: 8.4757e-06 gnorm: 1.16 [20:09:12< 4:20:57] +[titan] 2025-10-05 18:43:34,912 - root - INFO - step: 32905 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:43:34,912 - root - INFO - lr: 8.4710e-06 gnorm: 1.22 [20:09:23< 4:20:46] +[titan] 2025-10-05 18:43:45,784 - root - INFO - step: 32910 loss: 1.9113 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6908 +[titan] 2025-10-05 18:43:45,784 - root - INFO - lr: 8.4662e-06 gnorm: 1.19 [20:09:34< 4:20:35] +[titan] 2025-10-05 18:43:56,656 - root - INFO - step: 32915 loss: 1.9080 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6877 +[titan] 2025-10-05 18:43:56,657 - root - INFO - lr: 8.4614e-06 gnorm: 1.15 [20:09:45< 4:20:24] +[titan] 2025-10-05 18:44:07,519 - root - INFO - step: 32920 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 18:44:07,520 - root - INFO - lr: 8.4567e-06 gnorm: 1.14 [20:09:56< 4:20:13] +[titan] 2025-10-05 18:44:18,364 - root - INFO - step: 32925 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 18:44:18,364 - root - INFO - lr: 8.4519e-06 gnorm: 1.16 [20:10:07< 4:20:01] +[titan] 2025-10-05 18:44:29,222 - root - INFO - step: 32930 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:44:29,222 - root - INFO - lr: 8.4472e-06 gnorm: 1.19 [20:10:17< 4:19:50] +[titan] 2025-10-05 18:44:40,084 - root - INFO - step: 32935 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 18:44:40,084 - root - INFO - lr: 8.4424e-06 gnorm: 1.20 [20:10:28< 4:19:39] +[titan] 2025-10-05 18:44:50,961 - root - INFO - step: 32940 loss: 2.0407 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 18:44:50,961 - root - INFO - lr: 8.4377e-06 gnorm: 1.21 [20:10:39< 4:19:28] +[titan] 2025-10-05 18:45:01,827 - root - INFO - step: 32945 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:45:01,827 - root - INFO - lr: 8.4330e-06 gnorm: 1.18 [20:10:50< 4:19:17] +[titan] 2025-10-05 18:45:10,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:45:12,694 - root - INFO - step: 32950 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:45:12,695 - root - INFO - lr: 8.4282e-06 gnorm: 1.17 [20:11:01< 4:19:06] +[titan] 2025-10-05 18:45:23,553 - root - INFO - step: 32955 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 18:45:23,553 - root - INFO - lr: 8.4235e-06 gnorm: 1.19 [20:11:12< 4:18:55] +[titan] 2025-10-05 18:45:34,381 - root - INFO - step: 32960 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 18:45:34,381 - root - INFO - lr: 8.4187e-06 gnorm: 1.16 [20:11:23< 4:18:44] +[titan] 2025-10-05 18:45:45,261 - root - INFO - step: 32965 loss: 2.0361 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 18:45:45,262 - root - INFO - lr: 8.4140e-06 gnorm: 1.21 [20:11:33< 4:18:33] +[titan] 2025-10-05 18:45:56,104 - root - INFO - step: 32970 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 18:45:56,105 - root - INFO - lr: 8.4093e-06 gnorm: 1.16 [20:11:44< 4:18:22] +[titan] 2025-10-05 18:46:06,947 - root - INFO - step: 32975 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7958 +[titan] 2025-10-05 18:46:06,948 - root - INFO - lr: 8.4046e-06 gnorm: 1.24 [20:11:55< 4:18:11] +[titan] 2025-10-05 18:46:17,797 - root - INFO - step: 32980 loss: 1.9700 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7415 +[titan] 2025-10-05 18:46:17,797 - root - INFO - lr: 8.3999e-06 gnorm: 1.16 [20:12:06< 4:18:00] +[titan] 2025-10-05 18:46:28,662 - root - INFO - step: 32985 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 18:46:28,662 - root - INFO - lr: 8.3951e-06 gnorm: 1.18 [20:12:17< 4:17:49] +[titan] 2025-10-05 18:46:39,537 - root - INFO - step: 32990 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 18:46:39,537 - root - INFO - lr: 8.3904e-06 gnorm: 1.22 [20:12:28< 4:17:38] +[titan] 2025-10-05 18:46:50,425 - root - INFO - step: 32995 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:46:50,425 - root - INFO - lr: 8.3857e-06 gnorm: 1.15 [20:12:39< 4:17:27] +[titan] 2025-10-05 18:46:59,142 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:47:01,344 - root - INFO - step: 33000 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 18:47:01,344 - root - INFO - lr: 8.3810e-06 gnorm: 1.17 [20:12:50< 4:17:16] +[titan] 2025-10-05 18:47:12,220 - root - INFO - step: 33005 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:47:12,220 - root - INFO - lr: 8.3763e-06 gnorm: 1.17 [20:13:00< 4:17:05] +[titan] 2025-10-05 18:47:23,105 - root - INFO - step: 33010 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 18:47:23,105 - root - INFO - lr: 8.3716e-06 gnorm: 1.16 [20:13:11< 4:16:53] +[titan] 2025-10-05 18:47:33,991 - root - INFO - step: 33015 loss: 1.9630 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 18:47:33,991 - root - INFO - lr: 8.3669e-06 gnorm: 1.21 [20:13:22< 4:16:42] +[titan] 2025-10-05 18:47:44,854 - root - INFO - step: 33020 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:47:44,854 - root - INFO - lr: 8.3622e-06 gnorm: 1.20 [20:13:33< 4:16:31] +[titan] 2025-10-05 18:47:55,728 - root - INFO - step: 33025 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:47:55,728 - root - INFO - lr: 8.3575e-06 gnorm: 1.16 [20:13:44< 4:16:20] +[titan] 2025-10-05 18:48:06,621 - root - INFO - step: 33030 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 18:48:06,622 - root - INFO - lr: 8.3528e-06 gnorm: 1.15 [20:13:55< 4:16:09] +[titan] 2025-10-05 18:48:17,519 - root - INFO - step: 33035 loss: 2.0726 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 18:48:17,519 - root - INFO - lr: 8.3481e-06 gnorm: 1.22 [20:14:06< 4:15:58] +[titan] 2025-10-05 18:48:28,405 - root - INFO - step: 33040 loss: 1.9946 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 18:48:28,405 - root - INFO - lr: 8.3435e-06 gnorm: 1.19 [20:14:17< 4:15:47] +[titan] 2025-10-05 18:48:39,282 - root - INFO - step: 33045 loss: 1.9543 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 18:48:39,282 - root - INFO - lr: 8.3388e-06 gnorm: 1.16 [20:14:27< 4:15:36] +[titan] 2025-10-05 18:48:47,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:48:50,167 - root - INFO - step: 33050 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:48:50,167 - root - INFO - lr: 8.3341e-06 gnorm: 1.22 [20:14:38< 4:15:25] +[titan] 2025-10-05 18:49:01,041 - root - INFO - step: 33055 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7799 +[titan] 2025-10-05 18:49:01,041 - root - INFO - lr: 8.3294e-06 gnorm: 1.21 [20:14:49< 4:15:14] +[titan] 2025-10-05 18:49:11,966 - root - INFO - step: 33060 loss: 1.9156 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6929 +[titan] 2025-10-05 18:49:11,967 - root - INFO - lr: 8.3248e-06 gnorm: 1.15 [20:15:00< 4:15:03] +[titan] 2025-10-05 18:49:22,851 - root - INFO - step: 33065 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 18:49:22,851 - root - INFO - lr: 8.3201e-06 gnorm: 1.22 [20:15:11< 4:14:52] +[titan] 2025-10-05 18:49:33,699 - root - INFO - step: 33070 loss: 1.9488 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 18:49:33,699 - root - INFO - lr: 8.3154e-06 gnorm: 1.17 [20:15:22< 4:14:41] +[titan] 2025-10-05 18:49:44,562 - root - INFO - step: 33075 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 18:49:44,562 - root - INFO - lr: 8.3108e-06 gnorm: 1.18 [20:15:33< 4:14:30] +[titan] 2025-10-05 18:49:55,434 - root - INFO - step: 33080 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6975 +[titan] 2025-10-05 18:49:55,434 - root - INFO - lr: 8.3061e-06 gnorm: 1.15 [20:15:44< 4:14:19] +[titan] 2025-10-05 18:50:06,341 - root - INFO - step: 33085 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 18:50:06,341 - root - INFO - lr: 8.3015e-06 gnorm: 1.19 [20:15:55< 4:14:08] +[titan] 2025-10-05 18:50:17,204 - root - INFO - step: 33090 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 18:50:17,204 - root - INFO - lr: 8.2968e-06 gnorm: 1.17 [20:16:05< 4:13:57] +[titan] 2025-10-05 18:50:28,085 - root - INFO - step: 33095 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 18:50:28,085 - root - INFO - lr: 8.2922e-06 gnorm: 1.19 [20:16:16< 4:13:46] +[titan] 2025-10-05 18:50:36,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:50:38,970 - root - INFO - step: 33100 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 18:50:38,970 - root - INFO - lr: 8.2875e-06 gnorm: 1.16 [20:16:27< 4:13:34] +[titan] 2025-10-05 18:50:49,853 - root - INFO - step: 33105 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:50:49,853 - root - INFO - lr: 8.2829e-06 gnorm: 1.18 [20:16:38< 4:13:23] +[titan] 2025-10-05 18:51:00,737 - root - INFO - step: 33110 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 18:51:00,737 - root - INFO - lr: 8.2782e-06 gnorm: 1.20 [20:16:49< 4:13:12] +[titan] 2025-10-05 18:51:11,650 - root - INFO - step: 33115 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7236 +[titan] 2025-10-05 18:51:11,650 - root - INFO - lr: 8.2736e-06 gnorm: 1.18 [20:17:00< 4:13:01] +[titan] 2025-10-05 18:51:22,517 - root - INFO - step: 33120 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 18:51:22,517 - root - INFO - lr: 8.2690e-06 gnorm: 1.15 [20:17:11< 4:12:50] +[titan] 2025-10-05 18:51:33,423 - root - INFO - step: 33125 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 18:51:33,423 - root - INFO - lr: 8.2643e-06 gnorm: 1.16 [20:17:22< 4:12:39] +[titan] 2025-10-05 18:51:44,314 - root - INFO - step: 33130 loss: 1.9891 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:51:44,314 - root - INFO - lr: 8.2597e-06 gnorm: 1.16 [20:17:33< 4:12:28] +[titan] 2025-10-05 18:51:55,207 - root - INFO - step: 33135 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 18:51:55,207 - root - INFO - lr: 8.2551e-06 gnorm: 1.19 [20:17:43< 4:12:17] +[titan] 2025-10-05 18:52:06,124 - root - INFO - step: 33140 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 18:52:06,124 - root - INFO - lr: 8.2504e-06 gnorm: 1.15 [20:17:54< 4:12:06] +[titan] 2025-10-05 18:52:17,015 - root - INFO - step: 33145 loss: 1.8716 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6545 +[titan] 2025-10-05 18:52:17,015 - root - INFO - lr: 8.2458e-06 gnorm: 1.17 [20:18:05< 4:11:55] +[titan] 2025-10-05 18:52:25,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:52:27,896 - root - INFO - step: 33150 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 18:52:27,896 - root - INFO - lr: 8.2412e-06 gnorm: 1.28 [20:18:16< 4:11:44] +[titan] 2025-10-05 18:52:38,755 - root - INFO - step: 33155 loss: 1.9340 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 18:52:38,755 - root - INFO - lr: 8.2366e-06 gnorm: 1.17 [20:18:27< 4:11:33] +[titan] 2025-10-05 18:52:49,651 - root - INFO - step: 33160 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6592 +[titan] 2025-10-05 18:52:49,651 - root - INFO - lr: 8.2320e-06 gnorm: 1.14 [20:18:38< 4:11:22] +[titan] 2025-10-05 18:53:00,524 - root - INFO - step: 33165 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7065 +[titan] 2025-10-05 18:53:00,525 - root - INFO - lr: 8.2274e-06 gnorm: 1.16 [20:18:49< 4:11:11] +[titan] 2025-10-05 18:53:11,434 - root - INFO - step: 33170 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:53:11,435 - root - INFO - lr: 8.2228e-06 gnorm: 1.19 [20:19:00< 4:11:00] +[titan] 2025-10-05 18:53:22,306 - root - INFO - step: 33175 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:53:22,306 - root - INFO - lr: 8.2182e-06 gnorm: 1.21 [20:19:10< 4:10:49] +[titan] 2025-10-05 18:53:33,152 - root - INFO - step: 33180 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 18:53:33,152 - root - INFO - lr: 8.2136e-06 gnorm: 1.19 [20:19:21< 4:10:38] +[titan] 2025-10-05 18:53:44,004 - root - INFO - step: 33185 loss: 1.9935 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 18:53:44,004 - root - INFO - lr: 8.2090e-06 gnorm: 1.16 [20:19:32< 4:10:27] +[titan] 2025-10-05 18:53:54,872 - root - INFO - step: 33190 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:53:54,872 - root - INFO - lr: 8.2044e-06 gnorm: 1.21 [20:19:43< 4:10:15] +[titan] 2025-10-05 18:54:05,750 - root - INFO - step: 33195 loss: 2.0158 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 18:54:05,750 - root - INFO - lr: 8.1998e-06 gnorm: 1.18 [20:19:54< 4:10:04] +[titan] 2025-10-05 18:54:14,509 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:54:16,691 - root - INFO - step: 33200 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 18:54:16,691 - root - INFO - lr: 8.1952e-06 gnorm: 1.21 [20:20:05< 4:09:53] +[titan] 2025-10-05 18:54:27,562 - root - INFO - step: 33205 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7204 +[titan] 2025-10-05 18:54:27,563 - root - INFO - lr: 8.1906e-06 gnorm: 1.18 [20:20:16< 4:09:42] +[titan] 2025-10-05 18:54:38,424 - root - INFO - step: 33210 loss: 1.9533 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 18:54:38,424 - root - INFO - lr: 8.1861e-06 gnorm: 1.19 [20:20:27< 4:09:31] +[titan] 2025-10-05 18:54:49,288 - root - INFO - step: 33215 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 18:54:49,288 - root - INFO - lr: 8.1815e-06 gnorm: 1.22 [20:20:37< 4:09:20] +[titan] 2025-10-05 18:55:00,163 - root - INFO - step: 33220 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:55:00,163 - root - INFO - lr: 8.1769e-06 gnorm: 1.21 [20:20:48< 4:09:09] +[titan] 2025-10-05 18:55:11,094 - root - INFO - step: 33225 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7865 +[titan] 2025-10-05 18:55:11,094 - root - INFO - lr: 8.1723e-06 gnorm: 1.22 [20:20:59< 4:08:58] +[titan] 2025-10-05 18:55:21,957 - root - INFO - step: 33230 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 18:55:21,957 - root - INFO - lr: 8.1678e-06 gnorm: 1.18 [20:21:10< 4:08:47] +[titan] 2025-10-05 18:55:32,818 - root - INFO - step: 33235 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 18:55:32,818 - root - INFO - lr: 8.1632e-06 gnorm: 1.16 [20:21:21< 4:08:36] +[titan] 2025-10-05 18:55:43,665 - root - INFO - step: 33240 loss: 2.0182 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 18:55:43,666 - root - INFO - lr: 8.1586e-06 gnorm: 1.18 [20:21:32< 4:08:25] +[titan] 2025-10-05 18:55:54,510 - root - INFO - step: 33245 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 18:55:54,510 - root - INFO - lr: 8.1541e-06 gnorm: 1.19 [20:21:43< 4:08:14] +[titan] 2025-10-05 18:56:03,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:56:05,369 - root - INFO - step: 33250 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 18:56:05,369 - root - INFO - lr: 8.1495e-06 gnorm: 1.22 [20:21:54< 4:08:03] +[titan] 2025-10-05 18:56:16,302 - root - INFO - step: 33255 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 18:56:16,302 - root - INFO - lr: 8.1450e-06 gnorm: 1.21 [20:22:04< 4:07:52] +[titan] 2025-10-05 18:56:27,156 - root - INFO - step: 33260 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 18:56:27,156 - root - INFO - lr: 8.1404e-06 gnorm: 1.17 [20:22:15< 4:07:41] +[titan] 2025-10-05 18:56:37,991 - root - INFO - step: 33265 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 18:56:37,991 - root - INFO - lr: 8.1359e-06 gnorm: 1.18 [20:22:26< 4:07:30] +[titan] 2025-10-05 18:56:48,867 - root - INFO - step: 33270 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 18:56:48,867 - root - INFO - lr: 8.1313e-06 gnorm: 1.14 [20:22:37< 4:07:19] +[titan] 2025-10-05 18:56:59,716 - root - INFO - step: 33275 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 18:56:59,716 - root - INFO - lr: 8.1268e-06 gnorm: 1.16 [20:22:48< 4:07:08] +[titan] 2025-10-05 18:57:10,748 - root - INFO - step: 33280 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 29,703 tflops: 412.09 mfu: 41.67% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:57:10,749 - root - INFO - lr: 8.1223e-06 gnorm: 1.20 [20:22:59< 4:06:56] +[titan] 2025-10-05 18:57:10,937 - root - INFO - Dumping profiler traces at step 33280 +[titan] 2025-10-05 18:57:10,975 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:57:21,865 - root - INFO - step: 33285 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 29,477 tflops: 408.95 mfu: 41.35% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:57:21,865 - root - INFO - lr: 8.1177e-06 gnorm: 1.18 [20:23:10< 4:06:45] +[titan] 2025-10-05 18:57:32,746 - root - INFO - step: 33290 loss: 1.9692 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 18:57:32,747 - root - INFO - lr: 8.1132e-06 gnorm: 1.19 [20:23:21< 4:06:34] +[titan] 2025-10-05 18:57:43,611 - root - INFO - step: 33295 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 18:57:43,611 - root - INFO - lr: 8.1087e-06 gnorm: 1.19 [20:23:32< 4:06:23] +[titan] 2025-10-05 18:57:52,270 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:57:54,457 - root - INFO - step: 33300 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 18:57:54,457 - root - INFO - lr: 8.1041e-06 gnorm: 1.22 [20:23:43< 4:06:12] +[titan] 2025-10-05 18:58:05,339 - root - INFO - step: 33305 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:58:05,339 - root - INFO - lr: 8.0996e-06 gnorm: 1.21 [20:23:54< 4:06:01] +[titan] 2025-10-05 18:58:16,262 - root - INFO - step: 33310 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 18:58:16,263 - root - INFO - lr: 8.0951e-06 gnorm: 1.24 [20:24:04< 4:05:50] +[titan] 2025-10-05 18:58:27,116 - root - INFO - step: 33315 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 18:58:27,116 - root - INFO - lr: 8.0906e-06 gnorm: 1.20 [20:24:15< 4:05:39] +[titan] 2025-10-05 18:58:38,011 - root - INFO - step: 33320 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 18:58:38,011 - root - INFO - lr: 8.0861e-06 gnorm: 1.18 [20:24:26< 4:05:28] +[titan] 2025-10-05 18:58:48,874 - root - INFO - step: 33325 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 18:58:48,874 - root - INFO - lr: 8.0816e-06 gnorm: 1.17 [20:24:37< 4:05:17] +[titan] 2025-10-05 18:58:59,752 - root - INFO - step: 33330 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 18:58:59,752 - root - INFO - lr: 8.0771e-06 gnorm: 1.18 [20:24:48< 4:05:06] +[titan] 2025-10-05 18:59:10,641 - root - INFO - step: 33335 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 18:59:10,641 - root - INFO - lr: 8.0725e-06 gnorm: 1.20 [20:24:59< 4:04:55] +[titan] 2025-10-05 18:59:21,564 - root - INFO - step: 33340 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 18:59:21,564 - root - INFO - lr: 8.0680e-06 gnorm: 1.15 [20:25:10< 4:04:44] +[titan] 2025-10-05 18:59:32,450 - root - INFO - step: 33345 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:59:32,450 - root - INFO - lr: 8.0636e-06 gnorm: 1.18 [20:25:21< 4:04:33] +[titan] 2025-10-05 18:59:41,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:59:43,356 - root - INFO - step: 33350 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:59:43,356 - root - INFO - lr: 8.0591e-06 gnorm: 1.16 [20:25:32< 4:04:22] +[titan] 2025-10-05 18:59:54,223 - root - INFO - step: 33355 loss: 1.9358 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7114 +[titan] 2025-10-05 18:59:54,223 - root - INFO - lr: 8.0546e-06 gnorm: 1.18 [20:25:42< 4:04:11] +[titan] 2025-10-05 19:00:05,102 - root - INFO - step: 33360 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 19:00:05,102 - root - INFO - lr: 8.0501e-06 gnorm: 1.17 [20:25:53< 4:04:00] +[titan] 2025-10-05 19:00:16,037 - root - INFO - step: 33365 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:00:16,037 - root - INFO - lr: 8.0456e-06 gnorm: 1.20 [20:26:04< 4:03:49] +[titan] 2025-10-05 19:00:26,915 - root - INFO - step: 33370 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:00:26,916 - root - INFO - lr: 8.0411e-06 gnorm: 1.20 [20:26:15< 4:03:38] +[titan] 2025-10-05 19:00:37,762 - root - INFO - step: 33375 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:00:37,763 - root - INFO - lr: 8.0366e-06 gnorm: 1.21 [20:26:26< 4:03:27] +[titan] 2025-10-05 19:00:48,614 - root - INFO - step: 33380 loss: 1.9232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:00:48,614 - root - INFO - lr: 8.0322e-06 gnorm: 1.18 [20:26:37< 4:03:15] +[titan] 2025-10-05 19:00:59,483 - root - INFO - step: 33385 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:00:59,483 - root - INFO - lr: 8.0277e-06 gnorm: 1.19 [20:26:48< 4:03:04] +[titan] 2025-10-05 19:01:10,340 - root - INFO - step: 33390 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 19:01:10,340 - root - INFO - lr: 8.0232e-06 gnorm: 1.19 [20:26:59< 4:02:53] +[titan] 2025-10-05 19:01:21,250 - root - INFO - step: 33395 loss: 1.9470 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 19:01:21,250 - root - INFO - lr: 8.0187e-06 gnorm: 1.17 [20:27:09< 4:02:42] +[titan] 2025-10-05 19:01:29,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:01:32,102 - root - INFO - step: 33400 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 19:01:32,102 - root - INFO - lr: 8.0143e-06 gnorm: 1.17 [20:27:20< 4:02:31] +[titan] 2025-10-05 19:01:42,959 - root - INFO - step: 33405 loss: 1.8686 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 19:01:42,960 - root - INFO - lr: 8.0098e-06 gnorm: 1.18 [20:27:31< 4:02:20] +[titan] 2025-10-05 19:01:53,819 - root - INFO - step: 33410 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 19:01:53,819 - root - INFO - lr: 8.0054e-06 gnorm: 1.19 [20:27:42< 4:02:09] +[titan] 2025-10-05 19:02:04,734 - root - INFO - step: 33415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 19:02:04,734 - root - INFO - lr: 8.0009e-06 gnorm: 1.18 [20:27:53< 4:01:58] +[titan] 2025-10-05 19:02:15,660 - root - INFO - step: 33420 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 19:02:15,661 - root - INFO - lr: 7.9965e-06 gnorm: 1.20 [20:28:04< 4:01:47] +[titan] 2025-10-05 19:02:26,561 - root - INFO - step: 33425 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 19:02:26,561 - root - INFO - lr: 7.9920e-06 gnorm: 1.15 [20:28:15< 4:01:36] +[titan] 2025-10-05 19:02:37,445 - root - INFO - step: 33430 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 19:02:37,445 - root - INFO - lr: 7.9876e-06 gnorm: 1.18 [20:28:26< 4:01:25] +[titan] 2025-10-05 19:02:48,327 - root - INFO - step: 33435 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:02:48,327 - root - INFO - lr: 7.9831e-06 gnorm: 1.16 [20:28:36< 4:01:14] +[titan] 2025-10-05 19:02:59,208 - root - INFO - step: 33440 loss: 1.9304 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 19:02:59,209 - root - INFO - lr: 7.9787e-06 gnorm: 1.20 [20:28:47< 4:01:03] +[titan] 2025-10-05 19:03:10,117 - root - INFO - step: 33445 loss: 2.0526 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8150 +[titan] 2025-10-05 19:03:10,117 - root - INFO - lr: 7.9742e-06 gnorm: 1.21 [20:28:58< 4:00:52] +[titan] 2025-10-05 19:03:18,858 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:03:21,042 - root - INFO - step: 33450 loss: 1.9353 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:03:21,042 - root - INFO - lr: 7.9698e-06 gnorm: 1.16 [20:29:09< 4:00:41] +[titan] 2025-10-05 19:03:31,901 - root - INFO - step: 33455 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:03:31,901 - root - INFO - lr: 7.9654e-06 gnorm: 1.18 [20:29:20< 4:00:30] +[titan] 2025-10-05 19:03:42,767 - root - INFO - step: 33460 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:03:42,767 - root - INFO - lr: 7.9610e-06 gnorm: 1.19 [20:29:31< 4:00:19] +[titan] 2025-10-05 19:03:53,626 - root - INFO - step: 33465 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:03:53,626 - root - INFO - lr: 7.9565e-06 gnorm: 1.20 [20:29:42< 4:00:08] +[titan] 2025-10-05 19:04:04,499 - root - INFO - step: 33470 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:04:04,499 - root - INFO - lr: 7.9521e-06 gnorm: 1.22 [20:29:53< 3:59:57] +[titan] 2025-10-05 19:04:15,390 - root - INFO - step: 33475 loss: 1.9236 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:04:15,390 - root - INFO - lr: 7.9477e-06 gnorm: 1.15 [20:30:04< 3:59:46] +[titan] 2025-10-05 19:04:26,338 - root - INFO - step: 33480 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 19:04:26,338 - root - INFO - lr: 7.9433e-06 gnorm: 1.18 [20:30:14< 3:59:34] +[titan] 2025-10-05 19:04:37,222 - root - INFO - step: 33485 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 19:04:37,222 - root - INFO - lr: 7.9389e-06 gnorm: 1.12 [20:30:25< 3:59:23] +[titan] 2025-10-05 19:04:48,095 - root - INFO - step: 33490 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 19:04:48,095 - root - INFO - lr: 7.9345e-06 gnorm: 1.17 [20:30:36< 3:59:12] +[titan] 2025-10-05 19:04:58,969 - root - INFO - step: 33495 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 19:04:58,969 - root - INFO - lr: 7.9301e-06 gnorm: 1.19 [20:30:47< 3:59:01] +[titan] 2025-10-05 19:05:07,650 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:05:09,844 - root - INFO - step: 33500 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 19:05:09,844 - root - INFO - lr: 7.9256e-06 gnorm: 1.16 [20:30:58< 3:58:50] +[titan] 2025-10-05 19:05:20,803 - root - INFO - step: 33505 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7103 +[titan] 2025-10-05 19:05:20,803 - root - INFO - lr: 7.9212e-06 gnorm: 1.17 [20:31:09< 3:58:39] +[titan] 2025-10-05 19:05:31,705 - root - INFO - step: 33510 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 19:05:31,706 - root - INFO - lr: 7.9169e-06 gnorm: 1.18 [20:31:20< 3:58:28] +[titan] 2025-10-05 19:05:42,585 - root - INFO - step: 33515 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 19:05:42,585 - root - INFO - lr: 7.9125e-06 gnorm: 1.16 [20:31:31< 3:58:17] +[titan] 2025-10-05 19:05:53,459 - root - INFO - step: 33520 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 19:05:53,459 - root - INFO - lr: 7.9081e-06 gnorm: 1.20 [20:31:42< 3:58:06] +[titan] 2025-10-05 19:06:04,332 - root - INFO - step: 33525 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6778 +[titan] 2025-10-05 19:06:04,332 - root - INFO - lr: 7.9037e-06 gnorm: 1.12 [20:31:52< 3:57:55] +[titan] 2025-10-05 19:06:15,198 - root - INFO - step: 33530 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 19:06:15,198 - root - INFO - lr: 7.8993e-06 gnorm: 1.17 [20:32:03< 3:57:44] +[titan] 2025-10-05 19:06:26,152 - root - INFO - step: 33535 loss: 1.9859 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 19:06:26,152 - root - INFO - lr: 7.8949e-06 gnorm: 1.24 [20:32:14< 3:57:33] +[titan] 2025-10-05 19:06:37,024 - root - INFO - step: 33540 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7744 +[titan] 2025-10-05 19:06:37,024 - root - INFO - lr: 7.8905e-06 gnorm: 1.22 [20:32:25< 3:57:22] +[titan] 2025-10-05 19:06:47,931 - root - INFO - step: 33545 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 19:06:47,931 - root - INFO - lr: 7.8862e-06 gnorm: 1.24 [20:32:36< 3:57:11] +[titan] 2025-10-05 19:06:56,619 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:06:58,805 - root - INFO - step: 33550 loss: 1.9223 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:06:58,805 - root - INFO - lr: 7.8818e-06 gnorm: 1.17 [20:32:47< 3:57:00] +[titan] 2025-10-05 19:07:09,652 - root - INFO - step: 33555 loss: 1.9140 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:07:09,653 - root - INFO - lr: 7.8774e-06 gnorm: 1.21 [20:32:58< 3:56:49] +[titan] 2025-10-05 19:07:20,562 - root - INFO - step: 33560 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:07:20,563 - root - INFO - lr: 7.8731e-06 gnorm: 1.23 [20:33:09< 3:56:38] +[titan] 2025-10-05 19:07:31,425 - root - INFO - step: 33565 loss: 1.8946 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 19:07:31,425 - root - INFO - lr: 7.8687e-06 gnorm: 1.19 [20:33:20< 3:56:27] +[titan] 2025-10-05 19:07:42,303 - root - INFO - step: 33570 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:07:42,303 - root - INFO - lr: 7.8643e-06 gnorm: 1.20 [20:33:30< 3:56:16] +[titan] 2025-10-05 19:07:53,210 - root - INFO - step: 33575 loss: 1.9262 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:07:53,210 - root - INFO - lr: 7.8600e-06 gnorm: 1.18 [20:33:41< 3:56:05] +[titan] 2025-10-05 19:08:04,072 - root - INFO - step: 33580 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 19:08:04,072 - root - INFO - lr: 7.8556e-06 gnorm: 1.18 [20:33:52< 3:55:53] +[titan] 2025-10-05 19:08:14,947 - root - INFO - step: 33585 loss: 1.8953 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 19:08:14,947 - root - INFO - lr: 7.8513e-06 gnorm: 1.14 [20:34:03< 3:55:42] +[titan] 2025-10-05 19:08:25,883 - root - INFO - step: 33590 loss: 1.9998 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 19:08:25,883 - root - INFO - lr: 7.8469e-06 gnorm: 1.19 [20:34:14< 3:55:31] +[titan] 2025-10-05 19:08:36,748 - root - INFO - step: 33595 loss: 1.8788 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6618 +[titan] 2025-10-05 19:08:36,748 - root - INFO - lr: 7.8426e-06 gnorm: 1.17 [20:34:25< 3:55:20] +[titan] 2025-10-05 19:08:45,430 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:08:47,610 - root - INFO - step: 33600 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7176 +[titan] 2025-10-05 19:08:47,610 - root - INFO - lr: 7.8382e-06 gnorm: 1.20 [20:34:36< 3:55:09] +[titan] 2025-10-05 19:08:58,491 - root - INFO - step: 33605 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7542 +[titan] 2025-10-05 19:08:58,491 - root - INFO - lr: 7.8339e-06 gnorm: 1.19 [20:34:47< 3:54:58] +[titan] 2025-10-05 19:09:09,347 - root - INFO - step: 33610 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 19:09:09,347 - root - INFO - lr: 7.8296e-06 gnorm: 1.17 [20:34:57< 3:54:47] +[titan] 2025-10-05 19:09:20,217 - root - INFO - step: 33615 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 19:09:20,217 - root - INFO - lr: 7.8252e-06 gnorm: 1.18 [20:35:08< 3:54:36] +[titan] 2025-10-05 19:09:31,144 - root - INFO - step: 33620 loss: 1.9273 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 19:09:31,144 - root - INFO - lr: 7.8209e-06 gnorm: 1.16 [20:35:19< 3:54:25] +[titan] 2025-10-05 19:09:41,985 - root - INFO - step: 33625 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 19:09:41,985 - root - INFO - lr: 7.8166e-06 gnorm: 1.18 [20:35:30< 3:54:14] +[titan] 2025-10-05 19:09:52,855 - root - INFO - step: 33630 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7601 +[titan] 2025-10-05 19:09:52,855 - root - INFO - lr: 7.8123e-06 gnorm: 1.21 [20:35:41< 3:54:03] +[titan] 2025-10-05 19:10:03,725 - root - INFO - step: 33635 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:10:03,725 - root - INFO - lr: 7.8080e-06 gnorm: 1.19 [20:35:52< 3:53:52] +[titan] 2025-10-05 19:10:14,597 - root - INFO - step: 33640 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6948 +[titan] 2025-10-05 19:10:14,597 - root - INFO - lr: 7.8036e-06 gnorm: 1.18 [20:36:03< 3:53:41] +[titan] 2025-10-05 19:10:25,501 - root - INFO - step: 33645 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 19:10:25,501 - root - INFO - lr: 7.7993e-06 gnorm: 1.17 [20:36:14< 3:53:30] +[titan] 2025-10-05 19:10:34,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:10:36,352 - root - INFO - step: 33650 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:10:36,352 - root - INFO - lr: 7.7950e-06 gnorm: 1.18 [20:36:24< 3:53:19] +[titan] 2025-10-05 19:10:47,197 - root - INFO - step: 33655 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 19:10:47,198 - root - INFO - lr: 7.7907e-06 gnorm: 1.18 [20:36:35< 3:53:08] +[titan] 2025-10-05 19:10:58,037 - root - INFO - step: 33660 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:10:58,037 - root - INFO - lr: 7.7864e-06 gnorm: 1.20 [20:36:46< 3:52:57] +[titan] 2025-10-05 19:11:08,895 - root - INFO - step: 33665 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 19:11:08,895 - root - INFO - lr: 7.7821e-06 gnorm: 1.18 [20:36:57< 3:52:46] +[titan] 2025-10-05 19:11:19,804 - root - INFO - step: 33670 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:11:19,805 - root - INFO - lr: 7.7778e-06 gnorm: 1.13 [20:37:08< 3:52:35] +[titan] 2025-10-05 19:11:30,707 - root - INFO - step: 33675 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:11:30,707 - root - INFO - lr: 7.7735e-06 gnorm: 1.19 [20:37:19< 3:52:23] +[titan] 2025-10-05 19:11:41,571 - root - INFO - step: 33680 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 19:11:41,571 - root - INFO - lr: 7.7692e-06 gnorm: 1.15 [20:37:30< 3:52:12] +[titan] 2025-10-05 19:11:52,439 - root - INFO - step: 33685 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7395 +[titan] 2025-10-05 19:11:52,439 - root - INFO - lr: 7.7649e-06 gnorm: 1.17 [20:37:41< 3:52:01] +[titan] 2025-10-05 19:12:03,278 - root - INFO - step: 33690 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 19:12:03,278 - root - INFO - lr: 7.7606e-06 gnorm: 1.21 [20:37:51< 3:51:50] +[titan] 2025-10-05 19:12:14,126 - root - INFO - step: 33695 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:12:14,126 - root - INFO - lr: 7.7564e-06 gnorm: 1.23 [20:38:02< 3:51:39] +[titan] 2025-10-05 19:12:22,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:12:25,032 - root - INFO - step: 33700 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 19:12:25,032 - root - INFO - lr: 7.7521e-06 gnorm: 1.18 [20:38:13< 3:51:28] +[titan] 2025-10-05 19:12:35,912 - root - INFO - step: 33705 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 19:12:35,913 - root - INFO - lr: 7.7478e-06 gnorm: 1.21 [20:38:24< 3:51:17] +[titan] 2025-10-05 19:12:46,776 - root - INFO - step: 33710 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 19:12:46,776 - root - INFO - lr: 7.7435e-06 gnorm: 1.19 [20:38:35< 3:51:06] +[titan] 2025-10-05 19:12:57,642 - root - INFO - step: 33715 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 19:12:57,642 - root - INFO - lr: 7.7393e-06 gnorm: 1.17 [20:38:46< 3:50:55] +[titan] 2025-10-05 19:13:08,509 - root - INFO - step: 33720 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6902 +[titan] 2025-10-05 19:13:08,509 - root - INFO - lr: 7.7350e-06 gnorm: 1.19 [20:38:57< 3:50:44] +[titan] 2025-10-05 19:13:19,370 - root - INFO - step: 33725 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:13:19,370 - root - INFO - lr: 7.7307e-06 gnorm: 1.18 [20:39:08< 3:50:33] +[titan] 2025-10-05 19:13:30,375 - root - INFO - step: 33730 loss: 1.9645 memory: 118.84GiB(85.28%) tps: 29,776 tflops: 413.09 mfu: 41.77% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 19:13:30,375 - root - INFO - lr: 7.7265e-06 gnorm: 1.18 [20:39:19< 3:50:22] +[titan] 2025-10-05 19:13:41,281 - root - INFO - step: 33735 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 19:13:41,281 - root - INFO - lr: 7.7222e-06 gnorm: 1.18 [20:39:29< 3:50:11] +[titan] 2025-10-05 19:13:52,137 - root - INFO - step: 33740 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 19:13:52,137 - root - INFO - lr: 7.7180e-06 gnorm: 1.20 [20:39:40< 3:50:00] +[titan] 2025-10-05 19:14:03,003 - root - INFO - step: 33745 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 19:14:03,003 - root - INFO - lr: 7.7137e-06 gnorm: 1.17 [20:39:51< 3:49:49] +[titan] 2025-10-05 19:14:11,685 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:14:13,871 - root - INFO - step: 33750 loss: 2.0153 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7820 +[titan] 2025-10-05 19:14:13,871 - root - INFO - lr: 7.7095e-06 gnorm: 1.23 [20:40:02< 3:49:38] +[titan] 2025-10-05 19:14:24,752 - root - INFO - step: 33755 loss: 1.8533 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2139 global_avg_mtp_loss: 1.6394 +[titan] 2025-10-05 19:14:24,752 - root - INFO - lr: 7.7052e-06 gnorm: 1.18 [20:40:13< 3:49:27] +[titan] 2025-10-05 19:14:35,653 - root - INFO - step: 33760 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7664 +[titan] 2025-10-05 19:14:35,654 - root - INFO - lr: 7.7010e-06 gnorm: 1.20 [20:40:24< 3:49:16] +[titan] 2025-10-05 19:14:46,559 - root - INFO - step: 33765 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 19:14:46,559 - root - INFO - lr: 7.6967e-06 gnorm: 1.17 [20:40:35< 3:49:05] +[titan] 2025-10-05 19:14:57,429 - root - INFO - step: 33770 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6631 +[titan] 2025-10-05 19:14:57,429 - root - INFO - lr: 7.6925e-06 gnorm: 1.19 [20:40:46< 3:48:54] +[titan] 2025-10-05 19:15:08,283 - root - INFO - step: 33775 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 19:15:08,283 - root - INFO - lr: 7.6883e-06 gnorm: 1.20 [20:40:56< 3:48:42] +[titan] 2025-10-05 19:15:19,145 - root - INFO - step: 33780 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:15:19,145 - root - INFO - lr: 7.6841e-06 gnorm: 1.21 [20:41:07< 3:48:31] +[titan] 2025-10-05 19:15:30,024 - root - INFO - step: 33785 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6677 +[titan] 2025-10-05 19:15:30,024 - root - INFO - lr: 7.6798e-06 gnorm: 1.18 [20:41:18< 3:48:20] +[titan] 2025-10-05 19:15:40,968 - root - INFO - step: 33790 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:15:40,968 - root - INFO - lr: 7.6756e-06 gnorm: 1.18 [20:41:29< 3:48:09] +[titan] 2025-10-05 19:15:45,497 - root - INFO - Dumping profiler traces at step 33792 +[titan] 2025-10-05 19:15:45,536 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:15:52,106 - root - INFO - step: 33795 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 29,421 tflops: 408.17 mfu: 41.27% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 19:15:52,106 - root - INFO - lr: 7.6714e-06 gnorm: 1.19 [20:41:40< 3:47:58] +[titan] 2025-10-05 19:16:00,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:16:03,008 - root - INFO - step: 33800 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:16:03,008 - root - INFO - lr: 7.6672e-06 gnorm: 1.20 [20:41:51< 3:47:47] +[titan] 2025-10-05 19:16:13,885 - root - INFO - step: 33805 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 19:16:13,885 - root - INFO - lr: 7.6630e-06 gnorm: 1.17 [20:42:02< 3:47:36] +[titan] 2025-10-05 19:16:24,767 - root - INFO - step: 33810 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 19:16:24,767 - root - INFO - lr: 7.6587e-06 gnorm: 1.15 [20:42:13< 3:47:25] +[titan] 2025-10-05 19:16:35,714 - root - INFO - step: 33815 loss: 2.0005 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 19:16:35,714 - root - INFO - lr: 7.6545e-06 gnorm: 1.20 [20:42:24< 3:47:14] +[titan] 2025-10-05 19:16:46,588 - root - INFO - step: 33820 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 19:16:46,588 - root - INFO - lr: 7.6503e-06 gnorm: 1.18 [20:42:35< 3:47:03] +[titan] 2025-10-05 19:16:57,467 - root - INFO - step: 33825 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6954 +[titan] 2025-10-05 19:16:57,467 - root - INFO - lr: 7.6461e-06 gnorm: 1.16 [20:42:46< 3:46:52] +[titan] 2025-10-05 19:17:08,370 - root - INFO - step: 33830 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 19:17:08,371 - root - INFO - lr: 7.6419e-06 gnorm: 1.15 [20:42:56< 3:46:41] +[titan] 2025-10-05 19:17:19,239 - root - INFO - step: 33835 loss: 1.9118 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:17:19,239 - root - INFO - lr: 7.6377e-06 gnorm: 1.19 [20:43:07< 3:46:30] +[titan] 2025-10-05 19:17:30,168 - root - INFO - step: 33840 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7619 +[titan] 2025-10-05 19:17:30,168 - root - INFO - lr: 7.6335e-06 gnorm: 1.20 [20:43:18< 3:46:19] +[titan] 2025-10-05 19:17:41,047 - root - INFO - step: 33845 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:17:41,047 - root - INFO - lr: 7.6294e-06 gnorm: 1.17 [20:43:29< 3:46:08] +[titan] 2025-10-05 19:17:49,727 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:17:51,911 - root - INFO - step: 33850 loss: 1.9924 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7615 +[titan] 2025-10-05 19:17:51,911 - root - INFO - lr: 7.6252e-06 gnorm: 1.20 [20:43:40< 3:45:57] +[titan] 2025-10-05 19:18:02,789 - root - INFO - step: 33855 loss: 1.9320 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 19:18:02,789 - root - INFO - lr: 7.6210e-06 gnorm: 1.18 [20:43:51< 3:45:46] +[titan] 2025-10-05 19:18:13,634 - root - INFO - step: 33860 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 19:18:13,634 - root - INFO - lr: 7.6168e-06 gnorm: 1.20 [20:44:02< 3:45:35] +[titan] 2025-10-05 19:18:24,528 - root - INFO - step: 33865 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:18:24,528 - root - INFO - lr: 7.6126e-06 gnorm: 1.21 [20:44:13< 3:45:24] +[titan] 2025-10-05 19:18:35,439 - root - INFO - step: 33870 loss: 1.8718 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6559 +[titan] 2025-10-05 19:18:35,440 - root - INFO - lr: 7.6085e-06 gnorm: 1.16 [20:44:24< 3:45:13] +[titan] 2025-10-05 19:18:46,300 - root - INFO - step: 33875 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:18:46,300 - root - INFO - lr: 7.6043e-06 gnorm: 1.18 [20:44:34< 3:45:02] +[titan] 2025-10-05 19:18:57,171 - root - INFO - step: 33880 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 19:18:57,171 - root - INFO - lr: 7.6001e-06 gnorm: 1.18 [20:44:45< 3:44:51] +[titan] 2025-10-05 19:19:08,027 - root - INFO - step: 33885 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 19:19:08,028 - root - INFO - lr: 7.5960e-06 gnorm: 1.18 [20:44:56< 3:44:40] +[titan] 2025-10-05 19:19:18,884 - root - INFO - step: 33890 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 19:19:18,884 - root - INFO - lr: 7.5918e-06 gnorm: 1.15 [20:45:07< 3:44:28] +[titan] 2025-10-05 19:19:29,764 - root - INFO - step: 33895 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6963 +[titan] 2025-10-05 19:19:29,765 - root - INFO - lr: 7.5877e-06 gnorm: 1.14 [20:45:18< 3:44:17] +[titan] 2025-10-05 19:19:38,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:19:40,700 - root - INFO - step: 33900 loss: 1.9418 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:19:40,700 - root - INFO - lr: 7.5835e-06 gnorm: 1.15 [20:45:29< 3:44:06] +[titan] 2025-10-05 19:19:51,575 - root - INFO - step: 33905 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 19:19:51,575 - root - INFO - lr: 7.5793e-06 gnorm: 1.18 [20:45:40< 3:43:55] +[titan] 2025-10-05 19:20:02,438 - root - INFO - step: 33910 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 19:20:02,438 - root - INFO - lr: 7.5752e-06 gnorm: 1.17 [20:45:51< 3:43:44] +[titan] 2025-10-05 19:20:13,310 - root - INFO - step: 33915 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 19:20:13,310 - root - INFO - lr: 7.5711e-06 gnorm: 1.21 [20:46:01< 3:43:33] +[titan] 2025-10-05 19:20:24,174 - root - INFO - step: 33920 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 19:20:24,174 - root - INFO - lr: 7.5669e-06 gnorm: 1.18 [20:46:12< 3:43:22] +[titan] 2025-10-05 19:20:35,419 - root - INFO - step: 33925 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 29,141 tflops: 404.28 mfu: 40.88% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 19:20:35,420 - root - INFO - lr: 7.5628e-06 gnorm: 1.19 [20:46:24< 3:43:11] +[titan] 2025-10-05 19:20:46,283 - root - INFO - step: 33930 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 19:20:46,283 - root - INFO - lr: 7.5586e-06 gnorm: 1.17 [20:46:34< 3:43:00] +[titan] 2025-10-05 19:20:57,167 - root - INFO - step: 33935 loss: 1.9676 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 19:20:57,167 - root - INFO - lr: 7.5545e-06 gnorm: 1.23 [20:46:45< 3:42:49] +[titan] 2025-10-05 19:21:08,017 - root - INFO - step: 33940 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:21:08,017 - root - INFO - lr: 7.5504e-06 gnorm: 1.19 [20:46:56< 3:42:38] +[titan] 2025-10-05 19:21:18,883 - root - INFO - step: 33945 loss: 1.9536 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 19:21:18,883 - root - INFO - lr: 7.5463e-06 gnorm: 1.17 [20:47:07< 3:42:27] +[titan] 2025-10-05 19:21:27,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:21:29,744 - root - INFO - step: 33950 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:21:29,745 - root - INFO - lr: 7.5421e-06 gnorm: 1.24 [20:47:18< 3:42:16] +[titan] 2025-10-05 19:21:40,693 - root - INFO - step: 33955 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:21:40,694 - root - INFO - lr: 7.5380e-06 gnorm: 1.21 [20:47:29< 3:42:05] +[titan] 2025-10-05 19:21:51,598 - root - INFO - step: 33960 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 19:21:51,598 - root - INFO - lr: 7.5339e-06 gnorm: 1.22 [20:47:40< 3:41:54] +[titan] 2025-10-05 19:22:02,484 - root - INFO - step: 33965 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 19:22:02,484 - root - INFO - lr: 7.5298e-06 gnorm: 1.17 [20:47:51< 3:41:43] +[titan] 2025-10-05 19:22:13,362 - root - INFO - step: 33970 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 19:22:13,362 - root - INFO - lr: 7.5257e-06 gnorm: 1.15 [20:48:01< 3:41:32] +[titan] 2025-10-05 19:22:24,229 - root - INFO - step: 33975 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:22:24,229 - root - INFO - lr: 7.5216e-06 gnorm: 1.21 [20:48:12< 3:41:21] +[titan] 2025-10-05 19:22:35,177 - root - INFO - step: 33980 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:22:35,178 - root - INFO - lr: 7.5175e-06 gnorm: 1.26 [20:48:23< 3:41:10] +[titan] 2025-10-05 19:22:46,038 - root - INFO - step: 33985 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:22:46,038 - root - INFO - lr: 7.5134e-06 gnorm: 1.22 [20:48:34< 3:40:59] +[titan] 2025-10-05 19:22:56,932 - root - INFO - step: 33990 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 19:22:56,932 - root - INFO - lr: 7.5093e-06 gnorm: 1.16 [20:48:45< 3:40:48] +[titan] 2025-10-05 19:23:07,777 - root - INFO - step: 33995 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:23:07,778 - root - INFO - lr: 7.5052e-06 gnorm: 1.21 [20:48:56< 3:40:37] +[titan] 2025-10-05 19:23:16,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:23:18,638 - root - INFO - step: 34000 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:23:18,638 - root - INFO - lr: 7.5011e-06 gnorm: 1.17 [20:49:07< 3:40:25] +[titan] 2025-10-05 19:23:29,501 - root - INFO - step: 34005 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:23:29,501 - root - INFO - lr: 7.4970e-06 gnorm: 1.18 [20:49:18< 3:40:14] +[titan] 2025-10-05 19:23:40,426 - root - INFO - step: 34010 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 19:23:40,426 - root - INFO - lr: 7.4929e-06 gnorm: 1.19 [20:49:29< 3:40:03] +[titan] 2025-10-05 19:23:51,314 - root - INFO - step: 34015 loss: 1.9884 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:23:51,315 - root - INFO - lr: 7.4888e-06 gnorm: 1.21 [20:49:39< 3:39:52] +[titan] 2025-10-05 19:24:02,171 - root - INFO - step: 34020 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 19:24:02,171 - root - INFO - lr: 7.4847e-06 gnorm: 1.18 [20:49:50< 3:39:41] +[titan] 2025-10-05 19:24:13,068 - root - INFO - step: 34025 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 19:24:13,068 - root - INFO - lr: 7.4807e-06 gnorm: 1.26 [20:50:01< 3:39:30] +[titan] 2025-10-05 19:24:23,950 - root - INFO - step: 34030 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 19:24:23,951 - root - INFO - lr: 7.4766e-06 gnorm: 1.18 [20:50:12< 3:39:19] +[titan] 2025-10-05 19:24:34,827 - root - INFO - step: 34035 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6696 +[titan] 2025-10-05 19:24:34,827 - root - INFO - lr: 7.4725e-06 gnorm: 1.20 [20:50:23< 3:39:08] +[titan] 2025-10-05 19:24:45,778 - root - INFO - step: 34040 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 19:24:45,778 - root - INFO - lr: 7.4685e-06 gnorm: 1.19 [20:50:34< 3:38:57] +[titan] 2025-10-05 19:24:56,664 - root - INFO - step: 34045 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 19:24:56,664 - root - INFO - lr: 7.4644e-06 gnorm: 1.20 [20:50:45< 3:38:46] +[titan] 2025-10-05 19:25:05,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:25:07,528 - root - INFO - step: 34050 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:25:07,528 - root - INFO - lr: 7.4603e-06 gnorm: 1.21 [20:50:56< 3:38:35] +[titan] 2025-10-05 19:25:18,416 - root - INFO - step: 34055 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6971 +[titan] 2025-10-05 19:25:18,416 - root - INFO - lr: 7.4563e-06 gnorm: 1.17 [20:51:07< 3:38:24] +[titan] 2025-10-05 19:25:29,290 - root - INFO - step: 34060 loss: 1.9560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 19:25:29,291 - root - INFO - lr: 7.4522e-06 gnorm: 1.17 [20:51:17< 3:38:13] +[titan] 2025-10-05 19:25:40,227 - root - INFO - step: 34065 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6962 +[titan] 2025-10-05 19:25:40,227 - root - INFO - lr: 7.4482e-06 gnorm: 1.16 [20:51:28< 3:38:02] +[titan] 2025-10-05 19:25:51,094 - root - INFO - step: 34070 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 19:25:51,094 - root - INFO - lr: 7.4441e-06 gnorm: 1.17 [20:51:39< 3:37:51] +[titan] 2025-10-05 19:26:01,990 - root - INFO - step: 34075 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 19:26:01,991 - root - INFO - lr: 7.4401e-06 gnorm: 1.19 [20:51:50< 3:37:40] +[titan] 2025-10-05 19:26:12,871 - root - INFO - step: 34080 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6921 +[titan] 2025-10-05 19:26:12,871 - root - INFO - lr: 7.4361e-06 gnorm: 1.18 [20:52:01< 3:37:29] +[titan] 2025-10-05 19:26:23,746 - root - INFO - step: 34085 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7530 +[titan] 2025-10-05 19:26:23,746 - root - INFO - lr: 7.4320e-06 gnorm: 1.19 [20:52:12< 3:37:18] +[titan] 2025-10-05 19:26:34,615 - root - INFO - step: 34090 loss: 1.9192 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 19:26:34,615 - root - INFO - lr: 7.4280e-06 gnorm: 1.17 [20:52:23< 3:37:07] +[titan] 2025-10-05 19:26:45,574 - root - INFO - step: 34095 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 19:26:45,575 - root - INFO - lr: 7.4239e-06 gnorm: 1.24 [20:52:34< 3:36:56] +[titan] 2025-10-05 19:26:54,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:26:56,447 - root - INFO - step: 34100 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:26:56,447 - root - INFO - lr: 7.4199e-06 gnorm: 1.21 [20:52:45< 3:36:45] +[titan] 2025-10-05 19:27:07,327 - root - INFO - step: 34105 loss: 1.8752 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 19:27:07,327 - root - INFO - lr: 7.4159e-06 gnorm: 1.17 [20:52:55< 3:36:34] +[titan] 2025-10-05 19:27:18,206 - root - INFO - step: 34110 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 19:27:18,206 - root - INFO - lr: 7.4119e-06 gnorm: 1.25 [20:53:06< 3:36:23] +[titan] 2025-10-05 19:27:29,088 - root - INFO - step: 34115 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 19:27:29,088 - root - INFO - lr: 7.4079e-06 gnorm: 1.18 [20:53:17< 3:36:11] +[titan] 2025-10-05 19:27:40,016 - root - INFO - step: 34120 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 19:27:40,017 - root - INFO - lr: 7.4038e-06 gnorm: 1.18 [20:53:28< 3:36:00] +[titan] 2025-10-05 19:27:50,909 - root - INFO - step: 34125 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 19:27:50,909 - root - INFO - lr: 7.3998e-06 gnorm: 1.20 [20:53:39< 3:35:49] +[titan] 2025-10-05 19:28:01,811 - root - INFO - step: 34130 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:28:01,811 - root - INFO - lr: 7.3958e-06 gnorm: 1.15 [20:53:50< 3:35:38] +[titan] 2025-10-05 19:28:12,709 - root - INFO - step: 34135 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 19:28:12,710 - root - INFO - lr: 7.3918e-06 gnorm: 1.17 [20:54:01< 3:35:27] +[titan] 2025-10-05 19:28:23,581 - root - INFO - step: 34140 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:28:23,582 - root - INFO - lr: 7.3878e-06 gnorm: 1.23 [20:54:12< 3:35:16] +[titan] 2025-10-05 19:28:34,458 - root - INFO - step: 34145 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 19:28:34,458 - root - INFO - lr: 7.3838e-06 gnorm: 1.18 [20:54:23< 3:35:05] +[titan] 2025-10-05 19:28:43,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:28:45,388 - root - INFO - step: 34150 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 19:28:45,388 - root - INFO - lr: 7.3798e-06 gnorm: 1.19 [20:54:33< 3:34:54] +[titan] 2025-10-05 19:28:56,262 - root - INFO - step: 34155 loss: 1.9387 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:28:56,262 - root - INFO - lr: 7.3758e-06 gnorm: 1.16 [20:54:44< 3:34:43] +[titan] 2025-10-05 19:29:07,168 - root - INFO - step: 34160 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 19:29:07,168 - root - INFO - lr: 7.3718e-06 gnorm: 1.18 [20:54:55< 3:34:32] +[titan] 2025-10-05 19:29:18,057 - root - INFO - step: 34165 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 19:29:18,057 - root - INFO - lr: 7.3678e-06 gnorm: 1.19 [20:55:06< 3:34:21] +[titan] 2025-10-05 19:29:28,930 - root - INFO - step: 34170 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 19:29:28,930 - root - INFO - lr: 7.3639e-06 gnorm: 1.18 [20:55:17< 3:34:10] +[titan] 2025-10-05 19:29:39,843 - root - INFO - step: 34175 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 19:29:39,844 - root - INFO - lr: 7.3599e-06 gnorm: 1.25 [20:55:28< 3:33:59] +[titan] 2025-10-05 19:29:50,737 - root - INFO - step: 34180 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 19:29:50,737 - root - INFO - lr: 7.3559e-06 gnorm: 1.26 [20:55:39< 3:33:48] +[titan] 2025-10-05 19:30:01,652 - root - INFO - step: 34185 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 19:30:01,652 - root - INFO - lr: 7.3519e-06 gnorm: 1.25 [20:55:50< 3:33:37] +[titan] 2025-10-05 19:30:12,505 - root - INFO - step: 34190 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 19:30:12,506 - root - INFO - lr: 7.3480e-06 gnorm: 1.20 [20:56:01< 3:33:26] +[titan] 2025-10-05 19:30:23,389 - root - INFO - step: 34195 loss: 1.9339 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 19:30:23,390 - root - INFO - lr: 7.3440e-06 gnorm: 1.19 [20:56:11< 3:33:15] +[titan] 2025-10-05 19:30:32,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:30:34,246 - root - INFO - step: 34200 loss: 1.9408 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 19:30:34,246 - root - INFO - lr: 7.3400e-06 gnorm: 1.19 [20:56:22< 3:33:04] +[titan] 2025-10-05 19:30:45,157 - root - INFO - step: 34205 loss: 1.9115 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 19:30:45,157 - root - INFO - lr: 7.3361e-06 gnorm: 1.18 [20:56:33< 3:32:53] +[titan] 2025-10-05 19:30:56,027 - root - INFO - step: 34210 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 19:30:56,027 - root - INFO - lr: 7.3321e-06 gnorm: 1.19 [20:56:44< 3:32:42] +[titan] 2025-10-05 19:31:06,908 - root - INFO - step: 34215 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 19:31:06,908 - root - INFO - lr: 7.3281e-06 gnorm: 1.17 [20:56:55< 3:32:31] +[titan] 2025-10-05 19:31:17,775 - root - INFO - step: 34220 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7272 +[titan] 2025-10-05 19:31:17,776 - root - INFO - lr: 7.3242e-06 gnorm: 1.20 [20:57:06< 3:32:20] +[titan] 2025-10-05 19:31:28,639 - root - INFO - step: 34225 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 19:31:28,639 - root - INFO - lr: 7.3202e-06 gnorm: 1.19 [20:57:17< 3:32:08] +[titan] 2025-10-05 19:31:39,529 - root - INFO - step: 34230 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:31:39,529 - root - INFO - lr: 7.3163e-06 gnorm: 1.20 [20:57:28< 3:31:57] +[titan] 2025-10-05 19:31:50,429 - root - INFO - step: 34235 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 19:31:50,429 - root - INFO - lr: 7.3124e-06 gnorm: 1.19 [20:57:39< 3:31:46] +[titan] 2025-10-05 19:32:01,297 - root - INFO - step: 34240 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6972 +[titan] 2025-10-05 19:32:01,297 - root - INFO - lr: 7.3084e-06 gnorm: 1.22 [20:57:49< 3:31:35] +[titan] 2025-10-05 19:32:12,194 - root - INFO - step: 34245 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 19:32:12,194 - root - INFO - lr: 7.3045e-06 gnorm: 1.21 [20:58:00< 3:31:24] +[titan] 2025-10-05 19:32:20,873 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:32:23,059 - root - INFO - step: 34250 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 19:32:23,059 - root - INFO - lr: 7.3006e-06 gnorm: 1.18 [20:58:11< 3:31:13] +[titan] 2025-10-05 19:32:33,942 - root - INFO - step: 34255 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:32:33,942 - root - INFO - lr: 7.2966e-06 gnorm: 1.16 [20:58:22< 3:31:02] +[titan] 2025-10-05 19:32:44,861 - root - INFO - step: 34260 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7106 +[titan] 2025-10-05 19:32:44,861 - root - INFO - lr: 7.2927e-06 gnorm: 1.18 [20:58:33< 3:30:51] +[titan] 2025-10-05 19:32:55,734 - root - INFO - step: 34265 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 19:32:55,735 - root - INFO - lr: 7.2888e-06 gnorm: 1.16 [20:58:44< 3:30:40] +[titan] 2025-10-05 19:33:06,617 - root - INFO - step: 34270 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 19:33:06,617 - root - INFO - lr: 7.2849e-06 gnorm: 1.22 [20:58:55< 3:30:29] +[titan] 2025-10-05 19:33:17,521 - root - INFO - step: 34275 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 19:33:17,521 - root - INFO - lr: 7.2809e-06 gnorm: 1.19 [20:59:06< 3:30:18] +[titan] 2025-10-05 19:33:28,449 - root - INFO - step: 34280 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 19:33:28,449 - root - INFO - lr: 7.2770e-06 gnorm: 1.23 [20:59:17< 3:30:07] +[titan] 2025-10-05 19:33:39,328 - root - INFO - step: 34285 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:33:39,328 - root - INFO - lr: 7.2731e-06 gnorm: 1.17 [20:59:27< 3:29:56] +[titan] 2025-10-05 19:33:50,236 - root - INFO - step: 34290 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:33:50,236 - root - INFO - lr: 7.2692e-06 gnorm: 1.23 [20:59:38< 3:29:45] +[titan] 2025-10-05 19:34:01,108 - root - INFO - step: 34295 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 19:34:01,108 - root - INFO - lr: 7.2653e-06 gnorm: 1.17 [20:59:49< 3:29:34] +[titan] 2025-10-05 19:34:09,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:34:11,978 - root - INFO - step: 34300 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 19:34:11,978 - root - INFO - lr: 7.2614e-06 gnorm: 1.19 [21:00:00< 3:29:23] +[titan] 2025-10-05 19:34:20,928 - root - INFO - Dumping profiler traces at step 34304 +[titan] 2025-10-05 19:34:20,968 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:34:23,158 - root - INFO - step: 34305 loss: 1.8387 memory: 118.84GiB(85.28%) tps: 29,312 tflops: 406.66 mfu: 41.12% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6259 +[titan] 2025-10-05 19:34:23,158 - root - INFO - lr: 7.2575e-06 gnorm: 1.17 [21:00:11< 3:29:12] +[titan] 2025-10-05 19:34:34,056 - root - INFO - step: 34310 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 19:34:34,057 - root - INFO - lr: 7.2536e-06 gnorm: 1.17 [21:00:22< 3:29:01] +[titan] 2025-10-05 19:34:44,938 - root - INFO - step: 34315 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 19:34:44,938 - root - INFO - lr: 7.2497e-06 gnorm: 1.18 [21:00:33< 3:28:50] +[titan] 2025-10-05 19:34:55,805 - root - INFO - step: 34320 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:34:55,805 - root - INFO - lr: 7.2458e-06 gnorm: 1.22 [21:00:44< 3:28:39] +[titan] 2025-10-05 19:35:06,664 - root - INFO - step: 34325 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 19:35:06,664 - root - INFO - lr: 7.2419e-06 gnorm: 1.19 [21:00:55< 3:28:28] +[titan] 2025-10-05 19:35:17,530 - root - INFO - step: 34330 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 19:35:17,530 - root - INFO - lr: 7.2381e-06 gnorm: 1.18 [21:01:06< 3:28:17] +[titan] 2025-10-05 19:35:28,388 - root - INFO - step: 34335 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 19:35:28,388 - root - INFO - lr: 7.2342e-06 gnorm: 1.27 [21:01:16< 3:28:06] +[titan] 2025-10-05 19:35:39,210 - root - INFO - step: 34340 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 19:35:39,210 - root - INFO - lr: 7.2303e-06 gnorm: 1.23 [21:01:27< 3:27:55] +[titan] 2025-10-05 19:35:50,072 - root - INFO - step: 34345 loss: 1.9981 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7671 +[titan] 2025-10-05 19:35:50,072 - root - INFO - lr: 7.2264e-06 gnorm: 1.20 [21:01:38< 3:27:43] +[titan] 2025-10-05 19:35:58,755 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:36:00,937 - root - INFO - step: 34350 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:00,938 - root - INFO - lr: 7.2226e-06 gnorm: 1.19 [21:01:49< 3:27:32] +[titan] 2025-10-05 19:36:11,779 - root - INFO - step: 34355 loss: 1.9721 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:36:11,779 - root - INFO - lr: 7.2187e-06 gnorm: 1.22 [21:02:00< 3:27:21] +[titan] 2025-10-05 19:36:22,618 - root - INFO - step: 34360 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 19:36:22,618 - root - INFO - lr: 7.2148e-06 gnorm: 1.19 [21:02:11< 3:27:10] +[titan] 2025-10-05 19:36:33,472 - root - INFO - step: 34365 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 19:36:33,472 - root - INFO - lr: 7.2110e-06 gnorm: 1.16 [21:02:22< 3:26:59] +[titan] 2025-10-05 19:36:44,328 - root - INFO - step: 34370 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:44,329 - root - INFO - lr: 7.2071e-06 gnorm: 1.17 [21:02:32< 3:26:48] +[titan] 2025-10-05 19:36:55,235 - root - INFO - step: 34375 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 19:36:55,235 - root - INFO - lr: 7.2033e-06 gnorm: 1.14 [21:02:43< 3:26:37] +[titan] 2025-10-05 19:37:06,084 - root - INFO - step: 34380 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:37:06,084 - root - INFO - lr: 7.1994e-06 gnorm: 1.17 [21:02:54< 3:26:26] +[titan] 2025-10-05 19:37:16,961 - root - INFO - step: 34385 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7555 +[titan] 2025-10-05 19:37:16,961 - root - INFO - lr: 7.1956e-06 gnorm: 1.22 [21:03:05< 3:26:15] +[titan] 2025-10-05 19:37:27,815 - root - INFO - step: 34390 loss: 2.0305 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 19:37:27,815 - root - INFO - lr: 7.1917e-06 gnorm: 1.20 [21:03:16< 3:26:04] +[titan] 2025-10-05 19:37:38,670 - root - INFO - step: 34395 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 19:37:38,670 - root - INFO - lr: 7.1879e-06 gnorm: 1.19 [21:03:27< 3:25:53] +[titan] 2025-10-05 19:37:47,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:37:49,563 - root - INFO - step: 34400 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:37:49,563 - root - INFO - lr: 7.1840e-06 gnorm: 1.23 [21:03:38< 3:25:42] +[titan] 2025-10-05 19:38:00,476 - root - INFO - step: 34405 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:38:00,476 - root - INFO - lr: 7.1802e-06 gnorm: 1.21 [21:03:49< 3:25:31] +[titan] 2025-10-05 19:38:11,337 - root - INFO - step: 34410 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7067 +[titan] 2025-10-05 19:38:11,337 - root - INFO - lr: 7.1764e-06 gnorm: 1.16 [21:03:59< 3:25:20] +[titan] 2025-10-05 19:38:22,210 - root - INFO - step: 34415 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 19:38:22,210 - root - INFO - lr: 7.1726e-06 gnorm: 1.20 [21:04:10< 3:25:09] +[titan] 2025-10-05 19:38:33,092 - root - INFO - step: 34420 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:38:33,092 - root - INFO - lr: 7.1687e-06 gnorm: 1.23 [21:04:21< 3:24:58] +[titan] 2025-10-05 19:38:43,954 - root - INFO - step: 34425 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 19:38:43,954 - root - INFO - lr: 7.1649e-06 gnorm: 1.19 [21:04:32< 3:24:47] +[titan] 2025-10-05 19:38:54,847 - root - INFO - step: 34430 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 19:38:54,847 - root - INFO - lr: 7.1611e-06 gnorm: 1.22 [21:04:43< 3:24:36] +[titan] 2025-10-05 19:39:05,711 - root - INFO - step: 34435 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:39:05,711 - root - INFO - lr: 7.1573e-06 gnorm: 1.18 [21:04:54< 3:24:25] +[titan] 2025-10-05 19:39:16,607 - root - INFO - step: 34440 loss: 1.9084 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 19:39:16,607 - root - INFO - lr: 7.1535e-06 gnorm: 1.15 [21:05:05< 3:24:14] +[titan] 2025-10-05 19:39:27,468 - root - INFO - step: 34445 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 19:39:27,469 - root - INFO - lr: 7.1497e-06 gnorm: 1.21 [21:05:16< 3:24:03] +[titan] 2025-10-05 19:39:36,132 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:39:38,309 - root - INFO - step: 34450 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 19:39:38,309 - root - INFO - lr: 7.1458e-06 gnorm: 1.19 [21:05:26< 3:23:52] +[titan] 2025-10-05 19:39:49,168 - root - INFO - step: 34455 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7614 +[titan] 2025-10-05 19:39:49,168 - root - INFO - lr: 7.1420e-06 gnorm: 1.22 [21:05:37< 3:23:41] +[titan] 2025-10-05 19:39:59,988 - root - INFO - step: 34460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:39:59,988 - root - INFO - lr: 7.1382e-06 gnorm: 1.18 [21:05:48< 3:23:29] +[titan] 2025-10-05 19:40:10,837 - root - INFO - step: 34465 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:40:10,837 - root - INFO - lr: 7.1345e-06 gnorm: 1.20 [21:05:59< 3:23:18] +[titan] 2025-10-05 19:40:21,711 - root - INFO - step: 34470 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 19:40:21,711 - root - INFO - lr: 7.1307e-06 gnorm: 1.24 [21:06:10< 3:23:07] +[titan] 2025-10-05 19:40:32,577 - root - INFO - step: 34475 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 19:40:32,577 - root - INFO - lr: 7.1269e-06 gnorm: 1.26 [21:06:21< 3:22:56] +[titan] 2025-10-05 19:40:43,432 - root - INFO - step: 34480 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6998 +[titan] 2025-10-05 19:40:43,432 - root - INFO - lr: 7.1231e-06 gnorm: 1.19 [21:06:31< 3:22:45] +[titan] 2025-10-05 19:40:54,326 - root - INFO - step: 34485 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:40:54,326 - root - INFO - lr: 7.1193e-06 gnorm: 1.19 [21:06:42< 3:22:34] +[titan] 2025-10-05 19:41:05,190 - root - INFO - step: 34490 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 19:41:05,190 - root - INFO - lr: 7.1155e-06 gnorm: 1.19 [21:06:53< 3:22:23] +[titan] 2025-10-05 19:41:16,016 - root - INFO - step: 34495 loss: 1.9452 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7197 +[titan] 2025-10-05 19:41:16,016 - root - INFO - lr: 7.1117e-06 gnorm: 1.28 [21:07:04< 3:22:12] +[titan] 2025-10-05 19:41:24,679 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:41:26,864 - root - INFO - step: 34500 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 19:41:26,864 - root - INFO - lr: 7.1080e-06 gnorm: 1.20 [21:07:15< 3:22:01] +[titan] 2025-10-05 19:41:37,746 - root - INFO - step: 34505 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:41:37,746 - root - INFO - lr: 7.1042e-06 gnorm: 1.20 [21:07:26< 3:21:50] +[titan] 2025-10-05 19:41:48,592 - root - INFO - step: 34510 loss: 1.9716 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 19:41:48,592 - root - INFO - lr: 7.1004e-06 gnorm: 1.23 [21:07:37< 3:21:39] +[titan] 2025-10-05 19:41:59,472 - root - INFO - step: 34515 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 19:41:59,472 - root - INFO - lr: 7.0967e-06 gnorm: 1.19 [21:07:48< 3:21:28] +[titan] 2025-10-05 19:42:10,331 - root - INFO - step: 34520 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 19:42:10,331 - root - INFO - lr: 7.0929e-06 gnorm: 1.21 [21:07:58< 3:21:17] +[titan] 2025-10-05 19:42:21,195 - root - INFO - step: 34525 loss: 1.8598 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6441 +[titan] 2025-10-05 19:42:21,195 - root - INFO - lr: 7.0892e-06 gnorm: 1.20 [21:08:09< 3:21:06] +[titan] 2025-10-05 19:42:32,043 - root - INFO - step: 34530 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 19:42:32,043 - root - INFO - lr: 7.0854e-06 gnorm: 1.18 [21:08:20< 3:20:55] +[titan] 2025-10-05 19:42:42,933 - root - INFO - step: 34535 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:42:42,933 - root - INFO - lr: 7.0816e-06 gnorm: 1.17 [21:08:31< 3:20:44] +[titan] 2025-10-05 19:42:53,805 - root - INFO - step: 34540 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 19:42:53,805 - root - INFO - lr: 7.0779e-06 gnorm: 1.20 [21:08:42< 3:20:33] +[titan] 2025-10-05 19:43:04,676 - root - INFO - step: 34545 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:43:04,676 - root - INFO - lr: 7.0742e-06 gnorm: 1.19 [21:08:53< 3:20:22] +[titan] 2025-10-05 19:43:13,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:43:15,579 - root - INFO - step: 34550 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7242 +[titan] 2025-10-05 19:43:15,579 - root - INFO - lr: 7.0704e-06 gnorm: 1.21 [21:09:04< 3:20:11] +[titan] 2025-10-05 19:43:26,490 - root - INFO - step: 34555 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:43:26,490 - root - INFO - lr: 7.0667e-06 gnorm: 1.18 [21:09:15< 3:20:00] +[titan] 2025-10-05 19:43:37,391 - root - INFO - step: 34560 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8232 +[titan] 2025-10-05 19:43:37,391 - root - INFO - lr: 7.0629e-06 gnorm: 4.37 [21:09:25< 3:19:49] +[titan] 2025-10-05 19:43:48,315 - root - INFO - step: 34565 loss: 1.9033 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 19:43:48,315 - root - INFO - lr: 7.0592e-06 gnorm: 1.24 [21:09:36< 3:19:38] +[titan] 2025-10-05 19:43:59,252 - root - INFO - step: 34570 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 19:43:59,252 - root - INFO - lr: 7.0555e-06 gnorm: 1.17 [21:09:47< 3:19:27] +[titan] 2025-10-05 19:44:10,131 - root - INFO - step: 34575 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 19:44:10,131 - root - INFO - lr: 7.0518e-06 gnorm: 1.21 [21:09:58< 3:19:15] +[titan] 2025-10-05 19:44:20,965 - root - INFO - step: 34580 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7717 +[titan] 2025-10-05 19:44:20,965 - root - INFO - lr: 7.0480e-06 gnorm: 1.26 [21:10:09< 3:19:04] +[titan] 2025-10-05 19:44:31,829 - root - INFO - step: 34585 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 19:44:31,829 - root - INFO - lr: 7.0443e-06 gnorm: 1.19 [21:10:20< 3:18:53] +[titan] 2025-10-05 19:44:42,679 - root - INFO - step: 34590 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.7230 +[titan] 2025-10-05 19:44:42,680 - root - INFO - lr: 7.0406e-06 gnorm: 2.68 [21:10:31< 3:18:42] +[titan] 2025-10-05 19:44:53,560 - root - INFO - step: 34595 loss: 1.8805 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 19:44:53,561 - root - INFO - lr: 7.0369e-06 gnorm: 1.23 [21:10:42< 3:18:31] +[titan] 2025-10-05 19:45:02,272 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:45:04,448 - root - INFO - step: 34600 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 19:45:04,448 - root - INFO - lr: 7.0332e-06 gnorm: 1.18 [21:10:52< 3:18:20] +[titan] 2025-10-05 19:45:15,326 - root - INFO - step: 34605 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:45:15,326 - root - INFO - lr: 7.0295e-06 gnorm: 1.19 [21:11:03< 3:18:09] +[titan] 2025-10-05 19:45:26,191 - root - INFO - step: 34610 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:45:26,191 - root - INFO - lr: 7.0258e-06 gnorm: 1.20 [21:11:14< 3:17:58] +[titan] 2025-10-05 19:45:37,037 - root - INFO - step: 34615 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:45:37,037 - root - INFO - lr: 7.0221e-06 gnorm: 1.17 [21:11:25< 3:17:47] +[titan] 2025-10-05 19:45:47,905 - root - INFO - step: 34620 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:45:47,905 - root - INFO - lr: 7.0184e-06 gnorm: 1.23 [21:11:36< 3:17:36] +[titan] 2025-10-05 19:45:58,807 - root - INFO - step: 34625 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 19:45:58,807 - root - INFO - lr: 7.0147e-06 gnorm: 1.25 [21:11:47< 3:17:25] +[titan] 2025-10-05 19:46:09,704 - root - INFO - step: 34630 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 19:46:09,704 - root - INFO - lr: 7.0110e-06 gnorm: 1.24 [21:11:58< 3:17:14] +[titan] 2025-10-05 19:46:20,566 - root - INFO - step: 34635 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 19:46:20,566 - root - INFO - lr: 7.0073e-06 gnorm: 1.25 [21:12:09< 3:17:03] +[titan] 2025-10-05 19:46:31,407 - root - INFO - step: 34640 loss: 1.9051 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 19:46:31,407 - root - INFO - lr: 7.0036e-06 gnorm: 1.18 [21:12:19< 3:16:52] +[titan] 2025-10-05 19:46:42,249 - root - INFO - step: 34645 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 19:46:42,249 - root - INFO - lr: 6.9999e-06 gnorm: 1.17 [21:12:30< 3:16:41] +[titan] 2025-10-05 19:46:50,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:46:53,097 - root - INFO - step: 34650 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7558 +[titan] 2025-10-05 19:46:53,097 - root - INFO - lr: 6.9963e-06 gnorm: 1.18 [21:12:41< 3:16:30] +[titan] 2025-10-05 19:47:03,992 - root - INFO - step: 34655 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 19:47:03,992 - root - INFO - lr: 6.9926e-06 gnorm: 1.23 [21:12:52< 3:16:19] +[titan] 2025-10-05 19:47:14,867 - root - INFO - step: 34660 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 19:47:14,867 - root - INFO - lr: 6.9889e-06 gnorm: 1.29 [21:13:03< 3:16:08] +[titan] 2025-10-05 19:47:25,759 - root - INFO - step: 34665 loss: 1.9370 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 19:47:25,759 - root - INFO - lr: 6.9853e-06 gnorm: 1.19 [21:13:14< 3:15:57] +[titan] 2025-10-05 19:47:36,638 - root - INFO - step: 34670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 19:47:36,639 - root - INFO - lr: 6.9816e-06 gnorm: 1.16 [21:13:25< 3:15:46] +[titan] 2025-10-05 19:47:47,526 - root - INFO - step: 34675 loss: 1.9202 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:47:47,526 - root - INFO - lr: 6.9779e-06 gnorm: 1.19 [21:13:36< 3:15:35] +[titan] 2025-10-05 19:47:58,418 - root - INFO - step: 34680 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 19:47:58,418 - root - INFO - lr: 6.9743e-06 gnorm: 1.20 [21:13:46< 3:15:24] +[titan] 2025-10-05 19:48:09,256 - root - INFO - step: 34685 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6999 +[titan] 2025-10-05 19:48:09,256 - root - INFO - lr: 6.9706e-06 gnorm: 1.21 [21:13:57< 3:15:13] +[titan] 2025-10-05 19:48:20,111 - root - INFO - step: 34690 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 19:48:20,111 - root - INFO - lr: 6.9670e-06 gnorm: 1.22 [21:14:08< 3:15:01] +[titan] 2025-10-05 19:48:31,007 - root - INFO - step: 34695 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 19:48:31,007 - root - INFO - lr: 6.9633e-06 gnorm: 1.22 [21:14:19< 3:14:50] +[titan] 2025-10-05 19:48:39,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:48:41,855 - root - INFO - step: 34700 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 19:48:41,855 - root - INFO - lr: 6.9597e-06 gnorm: 1.18 [21:14:30< 3:14:39] +[titan] 2025-10-05 19:48:52,713 - root - INFO - step: 34705 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7370 +[titan] 2025-10-05 19:48:52,713 - root - INFO - lr: 6.9560e-06 gnorm: 1.17 [21:14:41< 3:14:28] +[titan] 2025-10-05 19:49:03,608 - root - INFO - step: 34710 loss: 1.9120 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6899 +[titan] 2025-10-05 19:49:03,608 - root - INFO - lr: 6.9524e-06 gnorm: 1.17 [21:14:52< 3:14:17] +[titan] 2025-10-05 19:49:14,465 - root - INFO - step: 34715 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:49:14,465 - root - INFO - lr: 6.9488e-06 gnorm: 1.22 [21:15:03< 3:14:06] +[titan] 2025-10-05 19:49:25,305 - root - INFO - step: 34720 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:49:25,305 - root - INFO - lr: 6.9451e-06 gnorm: 1.21 [21:15:13< 3:13:55] +[titan] 2025-10-05 19:49:36,189 - root - INFO - step: 34725 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:49:36,189 - root - INFO - lr: 6.9415e-06 gnorm: 1.18 [21:15:24< 3:13:44] +[titan] 2025-10-05 19:49:47,060 - root - INFO - step: 34730 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 19:49:47,060 - root - INFO - lr: 6.9379e-06 gnorm: 1.21 [21:15:35< 3:13:33] +[titan] 2025-10-05 19:49:57,949 - root - INFO - step: 34735 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 19:49:57,949 - root - INFO - lr: 6.9343e-06 gnorm: 1.21 [21:15:46< 3:13:22] +[titan] 2025-10-05 19:50:08,803 - root - INFO - step: 34740 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 19:50:08,803 - root - INFO - lr: 6.9306e-06 gnorm: 1.24 [21:15:57< 3:13:11] +[titan] 2025-10-05 19:50:19,673 - root - INFO - step: 34745 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 19:50:19,673 - root - INFO - lr: 6.9270e-06 gnorm: 1.26 [21:16:08< 3:13:00] +[titan] 2025-10-05 19:50:28,354 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:50:30,535 - root - INFO - step: 34750 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7233 +[titan] 2025-10-05 19:50:30,535 - root - INFO - lr: 6.9234e-06 gnorm: 1.24 [21:16:19< 3:12:49] +[titan] 2025-10-05 19:50:41,406 - root - INFO - step: 34755 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:50:41,406 - root - INFO - lr: 6.9198e-06 gnorm: 1.19 [21:16:29< 3:12:38] +[titan] 2025-10-05 19:50:52,304 - root - INFO - step: 34760 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 19:50:52,304 - root - INFO - lr: 6.9162e-06 gnorm: 1.19 [21:16:40< 3:12:27] +[titan] 2025-10-05 19:51:03,222 - root - INFO - step: 34765 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 19:51:03,223 - root - INFO - lr: 6.9126e-06 gnorm: 1.23 [21:16:51< 3:12:16] +[titan] 2025-10-05 19:51:14,086 - root - INFO - step: 34770 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:51:14,086 - root - INFO - lr: 6.9090e-06 gnorm: 1.16 [21:17:02< 3:12:05] +[titan] 2025-10-05 19:51:24,963 - root - INFO - step: 34775 loss: 1.9641 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 19:51:24,963 - root - INFO - lr: 6.9054e-06 gnorm: 1.22 [21:17:13< 3:11:54] +[titan] 2025-10-05 19:51:35,828 - root - INFO - step: 34780 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 19:51:35,828 - root - INFO - lr: 6.9018e-06 gnorm: 1.21 [21:17:24< 3:11:43] +[titan] 2025-10-05 19:51:46,685 - root - INFO - step: 34785 loss: 1.9053 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 19:51:46,685 - root - INFO - lr: 6.8982e-06 gnorm: 1.20 [21:17:35< 3:11:32] +[titan] 2025-10-05 19:51:57,587 - root - INFO - step: 34790 loss: 1.9201 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 19:51:57,587 - root - INFO - lr: 6.8946e-06 gnorm: 1.18 [21:17:46< 3:11:21] +[titan] 2025-10-05 19:52:08,485 - root - INFO - step: 34795 loss: 1.9967 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 19:52:08,485 - root - INFO - lr: 6.8910e-06 gnorm: 1.22 [21:17:57< 3:11:10] +[titan] 2025-10-05 19:52:17,185 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:52:19,378 - root - INFO - step: 34800 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 19:52:19,378 - root - INFO - lr: 6.8875e-06 gnorm: 1.20 [21:18:07< 3:10:59] +[titan] 2025-10-05 19:52:30,261 - root - INFO - step: 34805 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 19:52:30,261 - root - INFO - lr: 6.8839e-06 gnorm: 1.19 [21:18:18< 3:10:48] +[titan] 2025-10-05 19:52:41,146 - root - INFO - step: 34810 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 19:52:41,146 - root - INFO - lr: 6.8803e-06 gnorm: 1.20 [21:18:29< 3:10:37] +[titan] 2025-10-05 19:52:52,095 - root - INFO - step: 34815 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7667 +[titan] 2025-10-05 19:52:52,095 - root - INFO - lr: 6.8767e-06 gnorm: 1.23 [21:18:40< 3:10:26] +[titan] 2025-10-05 19:52:54,455 - root - INFO - Dumping profiler traces at step 34816 +[titan] 2025-10-05 19:52:54,494 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:53:03,216 - root - INFO - step: 34820 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 29,466 tflops: 408.80 mfu: 41.33% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:53:03,216 - root - INFO - lr: 6.8732e-06 gnorm: 1.20 [21:18:51< 3:10:15] +[titan] 2025-10-05 19:53:14,080 - root - INFO - step: 34825 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7156 +[titan] 2025-10-05 19:53:14,080 - root - INFO - lr: 6.8696e-06 gnorm: 1.17 [21:19:02< 3:10:03] +[titan] 2025-10-05 19:53:24,945 - root - INFO - step: 34830 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 19:53:24,945 - root - INFO - lr: 6.8661e-06 gnorm: 1.15 [21:19:13< 3:09:52] +[titan] 2025-10-05 19:53:35,780 - root - INFO - step: 34835 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 19:53:35,780 - root - INFO - lr: 6.8625e-06 gnorm: 1.16 [21:19:24< 3:09:41] +[titan] 2025-10-05 19:53:46,625 - root - INFO - step: 34840 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7678 +[titan] 2025-10-05 19:53:46,626 - root - INFO - lr: 6.8589e-06 gnorm: 1.21 [21:19:35< 3:09:30] +[titan] 2025-10-05 19:53:57,479 - root - INFO - step: 34845 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:53:57,479 - root - INFO - lr: 6.8554e-06 gnorm: 1.18 [21:19:46< 3:09:19] +[titan] 2025-10-05 19:54:06,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:54:08,348 - root - INFO - step: 34850 loss: 2.0208 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 19:54:08,349 - root - INFO - lr: 6.8518e-06 gnorm: 1.22 [21:19:56< 3:09:08] +[titan] 2025-10-05 19:54:19,236 - root - INFO - step: 34855 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 19:54:19,236 - root - INFO - lr: 6.8483e-06 gnorm: 1.19 [21:20:07< 3:08:57] +[titan] 2025-10-05 19:54:30,115 - root - INFO - step: 34860 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 19:54:30,115 - root - INFO - lr: 6.8448e-06 gnorm: 1.17 [21:20:18< 3:08:46] +[titan] 2025-10-05 19:54:40,989 - root - INFO - step: 34865 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 19:54:40,989 - root - INFO - lr: 6.8412e-06 gnorm: 1.22 [21:20:29< 3:08:35] +[titan] 2025-10-05 19:54:51,840 - root - INFO - step: 34870 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:54:51,840 - root - INFO - lr: 6.8377e-06 gnorm: 1.23 [21:20:40< 3:08:24] +[titan] 2025-10-05 19:55:02,739 - root - INFO - step: 34875 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:55:02,739 - root - INFO - lr: 6.8342e-06 gnorm: 1.18 [21:20:51< 3:08:13] +[titan] 2025-10-05 19:55:13,616 - root - INFO - step: 34880 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:55:13,616 - root - INFO - lr: 6.8306e-06 gnorm: 1.19 [21:21:02< 3:08:02] +[titan] 2025-10-05 19:55:24,502 - root - INFO - step: 34885 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7556 +[titan] 2025-10-05 19:55:24,502 - root - INFO - lr: 6.8271e-06 gnorm: 1.20 [21:21:13< 3:07:51] +[titan] 2025-10-05 19:55:35,390 - root - INFO - step: 34890 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 19:55:35,390 - root - INFO - lr: 6.8236e-06 gnorm: 1.20 [21:21:23< 3:07:40] +[titan] 2025-10-05 19:55:46,234 - root - INFO - step: 34895 loss: 1.9281 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 19:55:46,234 - root - INFO - lr: 6.8201e-06 gnorm: 1.21 [21:21:34< 3:07:29] +[titan] 2025-10-05 19:55:54,939 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:55:57,124 - root - INFO - step: 34900 loss: 1.9752 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 19:55:57,124 - root - INFO - lr: 6.8166e-06 gnorm: 1.22 [21:21:45< 3:07:18] +[titan] 2025-10-05 19:56:07,979 - root - INFO - step: 34905 loss: 1.8773 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6604 +[titan] 2025-10-05 19:56:07,979 - root - INFO - lr: 6.8130e-06 gnorm: 1.27 [21:21:56< 3:07:07] +[titan] 2025-10-05 19:56:18,858 - root - INFO - step: 34910 loss: 1.9375 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:56:18,858 - root - INFO - lr: 6.8095e-06 gnorm: 1.28 [21:22:07< 3:06:56] +[titan] 2025-10-05 19:56:29,723 - root - INFO - step: 34915 loss: 1.9603 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:56:29,723 - root - INFO - lr: 6.8060e-06 gnorm: 1.20 [21:22:18< 3:06:45] +[titan] 2025-10-05 19:56:40,632 - root - INFO - step: 34920 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:56:40,632 - root - INFO - lr: 6.8025e-06 gnorm: 1.20 [21:22:29< 3:06:34] +[titan] 2025-10-05 19:56:51,542 - root - INFO - step: 34925 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 19:56:51,542 - root - INFO - lr: 6.7990e-06 gnorm: 1.19 [21:22:40< 3:06:23] +[titan] 2025-10-05 19:57:02,433 - root - INFO - step: 34930 loss: 1.8978 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6780 +[titan] 2025-10-05 19:57:02,433 - root - INFO - lr: 6.7955e-06 gnorm: 1.19 [21:22:50< 3:06:12] +[titan] 2025-10-05 19:57:13,339 - root - INFO - step: 34935 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 19:57:13,340 - root - INFO - lr: 6.7920e-06 gnorm: 1.19 [21:23:01< 3:06:01] +[titan] 2025-10-05 19:57:24,225 - root - INFO - step: 34940 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 19:57:24,225 - root - INFO - lr: 6.7886e-06 gnorm: 1.21 [21:23:12< 3:05:50] +[titan] 2025-10-05 19:57:35,111 - root - INFO - step: 34945 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:57:35,112 - root - INFO - lr: 6.7851e-06 gnorm: 1.20 [21:23:23< 3:05:39] +[titan] 2025-10-05 19:57:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:57:46,109 - root - INFO - step: 34950 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 29,796 tflops: 413.37 mfu: 41.80% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:57:46,109 - root - INFO - lr: 6.7816e-06 gnorm: 1.21 [21:23:34< 3:05:28] +[titan] 2025-10-05 19:57:56,976 - root - INFO - step: 34955 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 19:57:56,976 - root - INFO - lr: 6.7781e-06 gnorm: 1.21 [21:23:45< 3:05:16] +[titan] 2025-10-05 19:58:07,860 - root - INFO - step: 34960 loss: 1.8843 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 19:58:07,860 - root - INFO - lr: 6.7746e-06 gnorm: 1.18 [21:23:56< 3:05:05] +[titan] 2025-10-05 19:58:18,737 - root - INFO - step: 34965 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 19:58:18,737 - root - INFO - lr: 6.7712e-06 gnorm: 1.21 [21:24:07< 3:04:54] +[titan] 2025-10-05 19:58:29,592 - root - INFO - step: 34970 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 19:58:29,592 - root - INFO - lr: 6.7677e-06 gnorm: 2.00 [21:24:18< 3:04:43] +[titan] 2025-10-05 19:58:40,452 - root - INFO - step: 34975 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 19:58:40,453 - root - INFO - lr: 6.7642e-06 gnorm: 1.24 [21:24:28< 3:04:32] +[titan] 2025-10-05 19:58:51,317 - root - INFO - step: 34980 loss: 1.8424 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6294 +[titan] 2025-10-05 19:58:51,317 - root - INFO - lr: 6.7608e-06 gnorm: 1.20 [21:24:39< 3:04:21] +[titan] 2025-10-05 19:59:02,209 - root - INFO - step: 34985 loss: 2.0210 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 19:59:02,209 - root - INFO - lr: 6.7573e-06 gnorm: 1.25 [21:24:50< 3:04:10] +[titan] 2025-10-05 19:59:13,085 - root - INFO - step: 34990 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 19:59:13,085 - root - INFO - lr: 6.7538e-06 gnorm: 1.21 [21:25:01< 3:03:59] +[titan] 2025-10-05 19:59:23,963 - root - INFO - step: 34995 loss: 1.9729 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7447 +[titan] 2025-10-05 19:59:23,964 - root - INFO - lr: 6.7504e-06 gnorm: 1.20 [21:25:12< 3:03:48] +[titan] 2025-10-05 19:59:32,669 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:59:34,852 - root - INFO - step: 35000 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 19:59:34,852 - root - INFO - lr: 6.7469e-06 gnorm: 1.20 [21:25:23< 3:03:37] +[titan] 2025-10-05 19:59:34,852 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 19:59:52,588 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 19:59:52,588 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.74 seconds. +[titan] 2025-10-05 20:02:00,815 - root - INFO - step: 35005 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 2,245 tflops: 31.15 mfu: 3.15% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 20:02:00,816 - root - INFO - lr: 6.7435e-06 gnorm: 1.17 [21:27:49< 3:03:45] +[titan] 2025-10-05 20:02:11,608 - root - INFO - step: 35010 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,362 tflops: 421.22 mfu: 42.59% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 20:02:11,608 - root - INFO - lr: 6.7401e-06 gnorm: 1.25 [21:28:00< 3:03:34] +[titan] 2025-10-05 20:02:22,413 - root - INFO - step: 35015 loss: 1.8869 memory: 118.84GiB(85.28%) tps: 30,329 tflops: 420.77 mfu: 42.55% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 20:02:22,413 - root - INFO - lr: 6.7366e-06 gnorm: 1.21 [21:28:10< 3:03:23] +[titan] 2025-10-05 20:02:33,281 - root - INFO - step: 35020 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 20:02:33,281 - root - INFO - lr: 6.7332e-06 gnorm: 1.19 [21:28:21< 3:03:12] +[titan] 2025-10-05 20:02:44,100 - root - INFO - step: 35025 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:02:44,100 - root - INFO - lr: 6.7297e-06 gnorm: 1.17 [21:28:32< 3:03:01] +[titan] 2025-10-05 20:02:54,948 - root - INFO - step: 35030 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 20:02:54,948 - root - INFO - lr: 6.7263e-06 gnorm: 1.23 [21:28:43< 3:02:50] +[titan] 2025-10-05 20:03:05,780 - root - INFO - step: 35035 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:03:05,780 - root - INFO - lr: 6.7229e-06 gnorm: 1.24 [21:28:54< 3:02:39] +[titan] 2025-10-05 20:03:16,638 - root - INFO - step: 35040 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 20:03:16,638 - root - INFO - lr: 6.7195e-06 gnorm: 1.23 [21:29:05< 3:02:28] +[titan] 2025-10-05 20:03:27,560 - root - INFO - step: 35045 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 20:03:27,560 - root - INFO - lr: 6.7160e-06 gnorm: 1.19 [21:29:16< 3:02:17] +[titan] 2025-10-05 20:03:36,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:03:38,413 - root - INFO - step: 35050 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7064 +[titan] 2025-10-05 20:03:38,413 - root - INFO - lr: 6.7126e-06 gnorm: 1.23 [21:29:26< 3:02:06] +[titan] 2025-10-05 20:03:49,265 - root - INFO - step: 35055 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 20:03:49,265 - root - INFO - lr: 6.7092e-06 gnorm: 1.18 [21:29:37< 3:01:55] +[titan] 2025-10-05 20:04:00,143 - root - INFO - step: 35060 loss: 1.9047 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6842 +[titan] 2025-10-05 20:04:00,143 - root - INFO - lr: 6.7058e-06 gnorm: 1.22 [21:29:48< 3:01:44] +[titan] 2025-10-05 20:04:11,001 - root - INFO - step: 35065 loss: 1.8697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 20:04:11,001 - root - INFO - lr: 6.7024e-06 gnorm: 1.21 [21:29:59< 3:01:33] +[titan] 2025-10-05 20:04:21,863 - root - INFO - step: 35070 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:04:21,863 - root - INFO - lr: 6.6990e-06 gnorm: 1.24 [21:30:10< 3:01:22] +[titan] 2025-10-05 20:04:32,799 - root - INFO - step: 35075 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 20:04:32,799 - root - INFO - lr: 6.6956e-06 gnorm: 1.21 [21:30:21< 3:01:10] +[titan] 2025-10-05 20:04:43,675 - root - INFO - step: 35080 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 20:04:43,675 - root - INFO - lr: 6.6922e-06 gnorm: 1.15 [21:30:32< 3:00:59] +[titan] 2025-10-05 20:04:54,542 - root - INFO - step: 35085 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 20:04:54,542 - root - INFO - lr: 6.6888e-06 gnorm: 1.19 [21:30:43< 3:00:48] +[titan] 2025-10-05 20:05:05,402 - root - INFO - step: 35090 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:05:05,402 - root - INFO - lr: 6.6854e-06 gnorm: 1.18 [21:30:53< 3:00:37] +[titan] 2025-10-05 20:05:16,263 - root - INFO - step: 35095 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:05:16,263 - root - INFO - lr: 6.6820e-06 gnorm: 1.22 [21:31:04< 3:00:26] +[titan] 2025-10-05 20:05:24,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:05:27,144 - root - INFO - step: 35100 loss: 1.9245 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7014 +[titan] 2025-10-05 20:05:27,145 - root - INFO - lr: 6.6786e-06 gnorm: 1.23 [21:31:15< 3:00:15] +[titan] 2025-10-05 20:05:38,035 - root - INFO - step: 35105 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 20:05:38,035 - root - INFO - lr: 6.6753e-06 gnorm: 1.17 [21:31:26< 3:00:04] +[titan] 2025-10-05 20:05:48,877 - root - INFO - step: 35110 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7243 +[titan] 2025-10-05 20:05:48,877 - root - INFO - lr: 6.6719e-06 gnorm: 1.15 [21:31:37< 2:59:53] +[titan] 2025-10-05 20:05:59,749 - root - INFO - step: 35115 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:05:59,749 - root - INFO - lr: 6.6685e-06 gnorm: 1.20 [21:31:48< 2:59:42] +[titan] 2025-10-05 20:06:10,605 - root - INFO - step: 35120 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:06:10,606 - root - INFO - lr: 6.6651e-06 gnorm: 1.17 [21:31:59< 2:59:31] +[titan] 2025-10-05 20:06:21,451 - root - INFO - step: 35125 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:06:21,451 - root - INFO - lr: 6.6618e-06 gnorm: 1.20 [21:32:09< 2:59:20] +[titan] 2025-10-05 20:06:32,365 - root - INFO - step: 35130 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:06:32,365 - root - INFO - lr: 6.6584e-06 gnorm: 1.23 [21:32:20< 2:59:09] +[titan] 2025-10-05 20:06:43,231 - root - INFO - step: 35135 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7050 +[titan] 2025-10-05 20:06:43,232 - root - INFO - lr: 6.6550e-06 gnorm: 1.18 [21:32:31< 2:58:58] +[titan] 2025-10-05 20:06:54,140 - root - INFO - step: 35140 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 20:06:54,140 - root - INFO - lr: 6.6517e-06 gnorm: 1.23 [21:32:42< 2:58:47] +[titan] 2025-10-05 20:07:05,022 - root - INFO - step: 35145 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 20:07:05,022 - root - INFO - lr: 6.6483e-06 gnorm: 1.23 [21:32:53< 2:58:36] +[titan] 2025-10-05 20:07:13,698 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:07:15,932 - root - INFO - step: 35150 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6718 +[titan] 2025-10-05 20:07:15,932 - root - INFO - lr: 6.6450e-06 gnorm: 1.21 [21:33:04< 2:58:25] +[titan] 2025-10-05 20:07:26,828 - root - INFO - step: 35155 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:07:26,828 - root - INFO - lr: 6.6416e-06 gnorm: 1.16 [21:33:15< 2:58:14] +[titan] 2025-10-05 20:07:37,740 - root - INFO - step: 35160 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 20:07:37,740 - root - INFO - lr: 6.6383e-06 gnorm: 1.17 [21:33:26< 2:58:02] +[titan] 2025-10-05 20:07:48,623 - root - INFO - step: 35165 loss: 1.9332 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 20:07:48,623 - root - INFO - lr: 6.6349e-06 gnorm: 1.21 [21:33:37< 2:57:51] +[titan] 2025-10-05 20:07:59,524 - root - INFO - step: 35170 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 20:07:59,524 - root - INFO - lr: 6.6316e-06 gnorm: 1.20 [21:33:48< 2:57:40] +[titan] 2025-10-05 20:08:10,396 - root - INFO - step: 35175 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:08:10,396 - root - INFO - lr: 6.6283e-06 gnorm: 1.19 [21:33:58< 2:57:29] +[titan] 2025-10-05 20:08:21,269 - root - INFO - step: 35180 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:08:21,270 - root - INFO - lr: 6.6249e-06 gnorm: 1.19 [21:34:09< 2:57:18] +[titan] 2025-10-05 20:08:32,174 - root - INFO - step: 35185 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 20:08:32,175 - root - INFO - lr: 6.6216e-06 gnorm: 1.21 [21:34:20< 2:57:07] +[titan] 2025-10-05 20:08:43,054 - root - INFO - step: 35190 loss: 1.9950 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7626 +[titan] 2025-10-05 20:08:43,054 - root - INFO - lr: 6.6183e-06 gnorm: 1.21 [21:34:31< 2:56:56] +[titan] 2025-10-05 20:08:53,935 - root - INFO - step: 35195 loss: 1.9405 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7155 +[titan] 2025-10-05 20:08:53,935 - root - INFO - lr: 6.6150e-06 gnorm: 1.23 [21:34:42< 2:56:45] +[titan] 2025-10-05 20:09:02,614 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:09:04,794 - root - INFO - step: 35200 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 20:09:04,794 - root - INFO - lr: 6.6116e-06 gnorm: 1.17 [21:34:53< 2:56:34] +[titan] 2025-10-05 20:09:15,695 - root - INFO - step: 35205 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:09:15,695 - root - INFO - lr: 6.6083e-06 gnorm: 1.21 [21:35:04< 2:56:23] +[titan] 2025-10-05 20:09:26,591 - root - INFO - step: 35210 loss: 1.9224 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:09:26,591 - root - INFO - lr: 6.6050e-06 gnorm: 1.21 [21:35:15< 2:56:12] +[titan] 2025-10-05 20:09:37,512 - root - INFO - step: 35215 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 20:09:37,512 - root - INFO - lr: 6.6017e-06 gnorm: 1.22 [21:35:26< 2:56:01] +[titan] 2025-10-05 20:09:48,396 - root - INFO - step: 35220 loss: 1.9286 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7057 +[titan] 2025-10-05 20:09:48,396 - root - INFO - lr: 6.5984e-06 gnorm: 1.23 [21:35:36< 2:55:50] +[titan] 2025-10-05 20:09:59,291 - root - INFO - step: 35225 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 20:09:59,291 - root - INFO - lr: 6.5951e-06 gnorm: 1.22 [21:35:47< 2:55:39] +[titan] 2025-10-05 20:10:10,147 - root - INFO - step: 35230 loss: 1.9319 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 20:10:10,147 - root - INFO - lr: 6.5918e-06 gnorm: 1.26 [21:35:58< 2:55:28] +[titan] 2025-10-05 20:10:21,054 - root - INFO - step: 35235 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 20:10:21,055 - root - INFO - lr: 6.5885e-06 gnorm: 1.18 [21:36:09< 2:55:17] +[titan] 2025-10-05 20:10:31,940 - root - INFO - step: 35240 loss: 1.8612 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:10:31,940 - root - INFO - lr: 6.5852e-06 gnorm: 1.16 [21:36:20< 2:55:06] +[titan] 2025-10-05 20:10:42,806 - root - INFO - step: 35245 loss: 2.0002 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 20:10:42,807 - root - INFO - lr: 6.5819e-06 gnorm: 1.22 [21:36:31< 2:54:55] +[titan] 2025-10-05 20:10:51,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:10:53,701 - root - INFO - step: 35250 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 20:10:53,701 - root - INFO - lr: 6.5786e-06 gnorm: 1.21 [21:36:42< 2:54:43] +[titan] 2025-10-05 20:11:04,581 - root - INFO - step: 35255 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 20:11:04,581 - root - INFO - lr: 6.5754e-06 gnorm: 1.20 [21:36:53< 2:54:32] +[titan] 2025-10-05 20:11:15,487 - root - INFO - step: 35260 loss: 1.9259 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:11:15,487 - root - INFO - lr: 6.5721e-06 gnorm: 1.23 [21:37:03< 2:54:21] +[titan] 2025-10-05 20:11:26,398 - root - INFO - step: 35265 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:11:26,398 - root - INFO - lr: 6.5688e-06 gnorm: 1.23 [21:37:14< 2:54:10] +[titan] 2025-10-05 20:11:37,313 - root - INFO - step: 35270 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:11:37,314 - root - INFO - lr: 6.5655e-06 gnorm: 1.21 [21:37:25< 2:53:59] +[titan] 2025-10-05 20:11:48,214 - root - INFO - step: 35275 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 20:11:48,214 - root - INFO - lr: 6.5623e-06 gnorm: 1.24 [21:37:36< 2:53:48] +[titan] 2025-10-05 20:11:59,075 - root - INFO - step: 35280 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 20:11:59,075 - root - INFO - lr: 6.5590e-06 gnorm: 1.20 [21:37:47< 2:53:37] +[titan] 2025-10-05 20:12:09,938 - root - INFO - step: 35285 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 20:12:09,939 - root - INFO - lr: 6.5557e-06 gnorm: 1.21 [21:37:58< 2:53:26] +[titan] 2025-10-05 20:12:20,821 - root - INFO - step: 35290 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 20:12:20,821 - root - INFO - lr: 6.5525e-06 gnorm: 1.18 [21:38:09< 2:53:15] +[titan] 2025-10-05 20:12:31,713 - root - INFO - step: 35295 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 20:12:31,713 - root - INFO - lr: 6.5492e-06 gnorm: 1.23 [21:38:20< 2:53:04] +[titan] 2025-10-05 20:12:40,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:12:42,667 - root - INFO - step: 35300 loss: 1.9229 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:12:42,667 - root - INFO - lr: 6.5460e-06 gnorm: 1.23 [21:38:31< 2:52:53] +[titan] 2025-10-05 20:12:53,570 - root - INFO - step: 35305 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 20:12:53,570 - root - INFO - lr: 6.5427e-06 gnorm: 1.21 [21:38:42< 2:52:42] +[titan] 2025-10-05 20:13:04,452 - root - INFO - step: 35310 loss: 1.9317 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7088 +[titan] 2025-10-05 20:13:04,452 - root - INFO - lr: 6.5395e-06 gnorm: 1.22 [21:38:52< 2:52:31] +[titan] 2025-10-05 20:13:15,334 - root - INFO - step: 35315 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 20:13:15,335 - root - INFO - lr: 6.5362e-06 gnorm: 1.20 [21:39:03< 2:52:20] +[titan] 2025-10-05 20:13:26,220 - root - INFO - step: 35320 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 20:13:26,221 - root - INFO - lr: 6.5330e-06 gnorm: 1.19 [21:39:14< 2:52:09] +[titan] 2025-10-05 20:13:37,204 - root - INFO - step: 35325 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 29,834 tflops: 413.90 mfu: 41.85% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 20:13:37,205 - root - INFO - lr: 6.5297e-06 gnorm: 1.20 [21:39:25< 2:51:58] +[titan] 2025-10-05 20:13:43,904 - root - INFO - Dumping profiler traces at step 35328 +[titan] 2025-10-05 20:13:43,942 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:13:48,348 - root - INFO - step: 35330 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,406 tflops: 407.96 mfu: 41.25% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:13:48,348 - root - INFO - lr: 6.5265e-06 gnorm: 1.23 [21:39:36< 2:51:47] +[titan] 2025-10-05 20:13:59,245 - root - INFO - step: 35335 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7238 +[titan] 2025-10-05 20:13:59,246 - root - INFO - lr: 6.5233e-06 gnorm: 1.23 [21:39:47< 2:51:36] +[titan] 2025-10-05 20:14:10,148 - root - INFO - step: 35340 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:14:10,149 - root - INFO - lr: 6.5201e-06 gnorm: 1.24 [21:39:58< 2:51:25] +[titan] 2025-10-05 20:14:21,047 - root - INFO - step: 35345 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7363 +[titan] 2025-10-05 20:14:21,048 - root - INFO - lr: 6.5168e-06 gnorm: 1.20 [21:40:09< 2:51:13] +[titan] 2025-10-05 20:14:29,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:14:31,931 - root - INFO - step: 35350 loss: 1.9071 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:14:31,931 - root - INFO - lr: 6.5136e-06 gnorm: 1.22 [21:40:20< 2:51:02] +[titan] 2025-10-05 20:14:42,833 - root - INFO - step: 35355 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6814 +[titan] 2025-10-05 20:14:42,833 - root - INFO - lr: 6.5104e-06 gnorm: 1.18 [21:40:31< 2:50:51] +[titan] 2025-10-05 20:14:53,713 - root - INFO - step: 35360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 20:14:53,714 - root - INFO - lr: 6.5072e-06 gnorm: 1.22 [21:40:42< 2:50:40] +[titan] 2025-10-05 20:15:04,622 - root - INFO - step: 35365 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:15:04,622 - root - INFO - lr: 6.5040e-06 gnorm: 1.20 [21:40:53< 2:50:29] +[titan] 2025-10-05 20:15:15,532 - root - INFO - step: 35370 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:15:15,532 - root - INFO - lr: 6.5008e-06 gnorm: 1.21 [21:41:04< 2:50:18] +[titan] 2025-10-05 20:15:26,422 - root - INFO - step: 35375 loss: 1.9139 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 20:15:26,422 - root - INFO - lr: 6.4976e-06 gnorm: 1.20 [21:41:14< 2:50:07] +[titan] 2025-10-05 20:15:37,640 - root - INFO - step: 35380 loss: 1.9110 memory: 118.84GiB(85.28%) tps: 29,212 tflops: 405.27 mfu: 40.98% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:15:37,640 - root - INFO - lr: 6.4944e-06 gnorm: 1.19 [21:41:26< 2:49:56] +[titan] 2025-10-05 20:15:48,502 - root - INFO - step: 35385 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6841 +[titan] 2025-10-05 20:15:48,502 - root - INFO - lr: 6.4912e-06 gnorm: 1.22 [21:41:36< 2:49:45] +[titan] 2025-10-05 20:15:59,387 - root - INFO - step: 35390 loss: 1.9078 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 20:15:59,387 - root - INFO - lr: 6.4880e-06 gnorm: 1.27 [21:41:47< 2:49:34] +[titan] 2025-10-05 20:16:10,282 - root - INFO - step: 35395 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7479 +[titan] 2025-10-05 20:16:10,282 - root - INFO - lr: 6.4848e-06 gnorm: 1.21 [21:41:58< 2:49:23] +[titan] 2025-10-05 20:16:18,973 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:16:21,149 - root - INFO - step: 35400 loss: 1.8914 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6724 +[titan] 2025-10-05 20:16:21,150 - root - INFO - lr: 6.4816e-06 gnorm: 1.20 [21:42:09< 2:49:12] +[titan] 2025-10-05 20:16:32,038 - root - INFO - step: 35405 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7194 +[titan] 2025-10-05 20:16:32,038 - root - INFO - lr: 6.4784e-06 gnorm: 1.23 [21:42:20< 2:49:01] +[titan] 2025-10-05 20:16:42,971 - root - INFO - step: 35410 loss: 1.9290 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:16:42,971 - root - INFO - lr: 6.4752e-06 gnorm: 1.20 [21:42:31< 2:48:50] +[titan] 2025-10-05 20:16:53,840 - root - INFO - step: 35415 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:16:53,840 - root - INFO - lr: 6.4721e-06 gnorm: 1.17 [21:42:42< 2:48:39] +[titan] 2025-10-05 20:17:04,705 - root - INFO - step: 35420 loss: 1.9333 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:17:04,705 - root - INFO - lr: 6.4689e-06 gnorm: 1.23 [21:42:53< 2:48:28] +[titan] 2025-10-05 20:17:15,612 - root - INFO - step: 35425 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 20:17:15,612 - root - INFO - lr: 6.4657e-06 gnorm: 1.20 [21:43:04< 2:48:17] +[titan] 2025-10-05 20:17:26,485 - root - INFO - step: 35430 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:17:26,486 - root - INFO - lr: 6.4625e-06 gnorm: 1.18 [21:43:14< 2:48:06] +[titan] 2025-10-05 20:17:37,354 - root - INFO - step: 35435 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:17:37,354 - root - INFO - lr: 6.4594e-06 gnorm: 1.23 [21:43:25< 2:47:55] +[titan] 2025-10-05 20:17:48,271 - root - INFO - step: 35440 loss: 1.9162 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:17:48,271 - root - INFO - lr: 6.4562e-06 gnorm: 1.18 [21:43:36< 2:47:44] +[titan] 2025-10-05 20:17:59,161 - root - INFO - step: 35445 loss: 1.9393 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:17:59,161 - root - INFO - lr: 6.4531e-06 gnorm: 1.21 [21:43:47< 2:47:32] +[titan] 2025-10-05 20:18:07,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:18:10,014 - root - INFO - step: 35450 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:18:10,014 - root - INFO - lr: 6.4499e-06 gnorm: 1.20 [21:43:58< 2:47:21] +[titan] 2025-10-05 20:18:20,865 - root - INFO - step: 35455 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 20:18:20,865 - root - INFO - lr: 6.4468e-06 gnorm: 1.25 [21:44:09< 2:47:10] +[titan] 2025-10-05 20:18:31,752 - root - INFO - step: 35460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.6977 +[titan] 2025-10-05 20:18:31,752 - root - INFO - lr: 6.4436e-06 gnorm: 1.27 [21:44:20< 2:46:59] +[titan] 2025-10-05 20:18:42,672 - root - INFO - step: 35465 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:18:42,672 - root - INFO - lr: 6.4405e-06 gnorm: 1.21 [21:44:31< 2:46:48] +[titan] 2025-10-05 20:18:53,523 - root - INFO - step: 35470 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 20:18:53,523 - root - INFO - lr: 6.4373e-06 gnorm: 1.22 [21:44:41< 2:46:37] +[titan] 2025-10-05 20:19:04,397 - root - INFO - step: 35475 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 20:19:04,397 - root - INFO - lr: 6.4342e-06 gnorm: 1.20 [21:44:52< 2:46:26] +[titan] 2025-10-05 20:19:15,272 - root - INFO - step: 35480 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:19:15,272 - root - INFO - lr: 6.4311e-06 gnorm: 1.15 [21:45:03< 2:46:15] +[titan] 2025-10-05 20:19:26,134 - root - INFO - step: 35485 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6964 +[titan] 2025-10-05 20:19:26,134 - root - INFO - lr: 6.4279e-06 gnorm: 1.22 [21:45:14< 2:46:04] +[titan] 2025-10-05 20:19:37,003 - root - INFO - step: 35490 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:19:37,003 - root - INFO - lr: 6.4248e-06 gnorm: 1.22 [21:45:25< 2:45:53] +[titan] 2025-10-05 20:19:48,060 - root - INFO - step: 35495 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 29,637 tflops: 411.17 mfu: 41.57% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:19:48,060 - root - INFO - lr: 6.4217e-06 gnorm: 1.17 [21:45:36< 2:45:42] +[titan] 2025-10-05 20:19:56,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:19:58,930 - root - INFO - step: 35500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 20:19:58,930 - root - INFO - lr: 6.4186e-06 gnorm: 1.23 [21:45:47< 2:45:31] +[titan] 2025-10-05 20:20:09,779 - root - INFO - step: 35505 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:20:09,779 - root - INFO - lr: 6.4154e-06 gnorm: 1.19 [21:45:58< 2:45:20] +[titan] 2025-10-05 20:20:20,670 - root - INFO - step: 35510 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:20:20,671 - root - INFO - lr: 6.4123e-06 gnorm: 1.22 [21:46:09< 2:45:09] +[titan] 2025-10-05 20:20:31,543 - root - INFO - step: 35515 loss: 1.8943 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 20:20:31,543 - root - INFO - lr: 6.4092e-06 gnorm: 1.24 [21:46:20< 2:44:58] +[titan] 2025-10-05 20:20:42,433 - root - INFO - step: 35520 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:20:42,433 - root - INFO - lr: 6.4061e-06 gnorm: 1.20 [21:46:30< 2:44:47] +[titan] 2025-10-05 20:20:53,334 - root - INFO - step: 35525 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7142 +[titan] 2025-10-05 20:20:53,334 - root - INFO - lr: 6.4030e-06 gnorm: 1.24 [21:46:41< 2:44:36] +[titan] 2025-10-05 20:21:04,211 - root - INFO - step: 35530 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7544 +[titan] 2025-10-05 20:21:04,211 - root - INFO - lr: 6.3999e-06 gnorm: 1.20 [21:46:52< 2:44:25] +[titan] 2025-10-05 20:21:15,077 - root - INFO - step: 35535 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 20:21:15,077 - root - INFO - lr: 6.3968e-06 gnorm: 1.22 [21:47:03< 2:44:13] +[titan] 2025-10-05 20:21:25,947 - root - INFO - step: 35540 loss: 2.0043 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 20:21:25,947 - root - INFO - lr: 6.3937e-06 gnorm: 1.23 [21:47:14< 2:44:02] +[titan] 2025-10-05 20:21:36,813 - root - INFO - step: 35545 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:21:36,813 - root - INFO - lr: 6.3906e-06 gnorm: 1.26 [21:47:25< 2:43:51] +[titan] 2025-10-05 20:21:45,547 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:21:47,733 - root - INFO - step: 35550 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 20:21:47,733 - root - INFO - lr: 6.3875e-06 gnorm: 1.29 [21:47:36< 2:43:40] +[titan] 2025-10-05 20:21:58,624 - root - INFO - step: 35555 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6918 +[titan] 2025-10-05 20:21:58,624 - root - INFO - lr: 6.3845e-06 gnorm: 1.20 [21:47:47< 2:43:29] +[titan] 2025-10-05 20:22:09,503 - root - INFO - step: 35560 loss: 1.8840 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 20:22:09,503 - root - INFO - lr: 6.3814e-06 gnorm: 1.18 [21:47:57< 2:43:18] +[titan] 2025-10-05 20:22:20,399 - root - INFO - step: 35565 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7435 +[titan] 2025-10-05 20:22:20,399 - root - INFO - lr: 6.3783e-06 gnorm: 1.22 [21:48:08< 2:43:07] +[titan] 2025-10-05 20:22:31,288 - root - INFO - step: 35570 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 20:22:31,288 - root - INFO - lr: 6.3752e-06 gnorm: 1.21 [21:48:19< 2:42:56] +[titan] 2025-10-05 20:22:42,161 - root - INFO - step: 35575 loss: 1.9928 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 20:22:42,162 - root - INFO - lr: 6.3722e-06 gnorm: 1.31 [21:48:30< 2:42:45] +[titan] 2025-10-05 20:22:53,096 - root - INFO - step: 35580 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 20:22:53,096 - root - INFO - lr: 6.3691e-06 gnorm: 1.25 [21:48:41< 2:42:34] +[titan] 2025-10-05 20:23:04,005 - root - INFO - step: 35585 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 20:23:04,005 - root - INFO - lr: 6.3660e-06 gnorm: 1.22 [21:48:52< 2:42:23] +[titan] 2025-10-05 20:23:14,874 - root - INFO - step: 35590 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6686 +[titan] 2025-10-05 20:23:14,874 - root - INFO - lr: 6.3630e-06 gnorm: 1.15 [21:49:03< 2:42:12] +[titan] 2025-10-05 20:23:25,760 - root - INFO - step: 35595 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 20:23:25,760 - root - INFO - lr: 6.3599e-06 gnorm: 1.26 [21:49:14< 2:42:01] +[titan] 2025-10-05 20:23:34,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:23:36,637 - root - INFO - step: 35600 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:23:36,637 - root - INFO - lr: 6.3568e-06 gnorm: 1.19 [21:49:25< 2:41:50] +[titan] 2025-10-05 20:23:47,564 - root - INFO - step: 35605 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 20:23:47,564 - root - INFO - lr: 6.3538e-06 gnorm: 1.18 [21:49:36< 2:41:39] +[titan] 2025-10-05 20:23:58,425 - root - INFO - step: 35610 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 20:23:58,425 - root - INFO - lr: 6.3508e-06 gnorm: 1.20 [21:49:46< 2:41:28] +[titan] 2025-10-05 20:24:09,278 - root - INFO - step: 35615 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 20:24:09,278 - root - INFO - lr: 6.3477e-06 gnorm: 1.21 [21:49:57< 2:41:17] +[titan] 2025-10-05 20:24:20,176 - root - INFO - step: 35620 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 20:24:20,176 - root - INFO - lr: 6.3447e-06 gnorm: 1.21 [21:50:08< 2:41:06] +[titan] 2025-10-05 20:24:31,048 - root - INFO - step: 35625 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7146 +[titan] 2025-10-05 20:24:31,049 - root - INFO - lr: 6.3416e-06 gnorm: 1.18 [21:50:19< 2:40:55] +[titan] 2025-10-05 20:24:41,914 - root - INFO - step: 35630 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 20:24:41,914 - root - INFO - lr: 6.3386e-06 gnorm: 1.22 [21:50:30< 2:40:43] +[titan] 2025-10-05 20:24:52,829 - root - INFO - step: 35635 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:24:52,829 - root - INFO - lr: 6.3356e-06 gnorm: 1.18 [21:50:41< 2:40:32] +[titan] 2025-10-05 20:25:03,707 - root - INFO - step: 35640 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:25:03,707 - root - INFO - lr: 6.3325e-06 gnorm: 1.23 [21:50:52< 2:40:21] +[titan] 2025-10-05 20:25:14,555 - root - INFO - step: 35645 loss: 1.8684 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 20:25:14,556 - root - INFO - lr: 6.3295e-06 gnorm: 1.19 [21:51:03< 2:40:10] +[titan] 2025-10-05 20:25:23,267 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:25:25,451 - root - INFO - step: 35650 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7187 +[titan] 2025-10-05 20:25:25,451 - root - INFO - lr: 6.3265e-06 gnorm: 1.20 [21:51:13< 2:39:59] +[titan] 2025-10-05 20:25:36,291 - root - INFO - step: 35655 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 20:25:36,291 - root - INFO - lr: 6.3235e-06 gnorm: 1.22 [21:51:24< 2:39:48] +[titan] 2025-10-05 20:25:47,198 - root - INFO - step: 35660 loss: 1.9669 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 20:25:47,199 - root - INFO - lr: 6.3205e-06 gnorm: 1.19 [21:51:35< 2:39:37] +[titan] 2025-10-05 20:25:58,057 - root - INFO - step: 35665 loss: 1.9343 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 20:25:58,057 - root - INFO - lr: 6.3174e-06 gnorm: 1.22 [21:51:46< 2:39:26] +[titan] 2025-10-05 20:26:08,933 - root - INFO - step: 35670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:26:08,934 - root - INFO - lr: 6.3144e-06 gnorm: 1.20 [21:51:57< 2:39:15] +[titan] 2025-10-05 20:26:19,799 - root - INFO - step: 35675 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 20:26:19,799 - root - INFO - lr: 6.3114e-06 gnorm: 1.25 [21:52:08< 2:39:04] +[titan] 2025-10-05 20:26:30,670 - root - INFO - step: 35680 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 20:26:30,670 - root - INFO - lr: 6.3084e-06 gnorm: 1.22 [21:52:19< 2:38:53] +[titan] 2025-10-05 20:26:41,581 - root - INFO - step: 35685 loss: 2.0069 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 20:26:41,581 - root - INFO - lr: 6.3054e-06 gnorm: 1.29 [21:52:30< 2:38:42] +[titan] 2025-10-05 20:26:52,517 - root - INFO - step: 35690 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:26:52,517 - root - INFO - lr: 6.3024e-06 gnorm: 1.23 [21:52:40< 2:38:31] +[titan] 2025-10-05 20:27:03,395 - root - INFO - step: 35695 loss: 1.9599 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7330 +[titan] 2025-10-05 20:27:03,396 - root - INFO - lr: 6.2995e-06 gnorm: 1.23 [21:52:51< 2:38:20] +[titan] 2025-10-05 20:27:12,077 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:27:14,268 - root - INFO - step: 35700 loss: 1.9472 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 20:27:14,268 - root - INFO - lr: 6.2965e-06 gnorm: 1.23 [21:53:02< 2:38:09] +[titan] 2025-10-05 20:27:25,130 - root - INFO - step: 35705 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:27:25,130 - root - INFO - lr: 6.2935e-06 gnorm: 1.19 [21:53:13< 2:37:58] +[titan] 2025-10-05 20:27:35,993 - root - INFO - step: 35710 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 20:27:35,993 - root - INFO - lr: 6.2905e-06 gnorm: 1.28 [21:53:24< 2:37:47] +[titan] 2025-10-05 20:27:46,902 - root - INFO - step: 35715 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.7053 +[titan] 2025-10-05 20:27:46,902 - root - INFO - lr: 6.2875e-06 gnorm: 1.23 [21:53:35< 2:37:36] +[titan] 2025-10-05 20:27:57,813 - root - INFO - step: 35720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 20:27:57,813 - root - INFO - lr: 6.2846e-06 gnorm: 1.21 [21:53:46< 2:37:25] +[titan] 2025-10-05 20:28:08,684 - root - INFO - step: 35725 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 20:28:08,685 - root - INFO - lr: 6.2816e-06 gnorm: 1.21 [21:53:57< 2:37:13] +[titan] 2025-10-05 20:28:19,553 - root - INFO - step: 35730 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 20:28:19,553 - root - INFO - lr: 6.2786e-06 gnorm: 1.19 [21:54:07< 2:37:02] +[titan] 2025-10-05 20:28:30,452 - root - INFO - step: 35735 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 20:28:30,452 - root - INFO - lr: 6.2756e-06 gnorm: 1.25 [21:54:18< 2:36:51] +[titan] 2025-10-05 20:28:41,341 - root - INFO - step: 35740 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 20:28:41,341 - root - INFO - lr: 6.2727e-06 gnorm: 1.26 [21:54:29< 2:36:40] +[titan] 2025-10-05 20:28:52,320 - root - INFO - step: 35745 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.09 mfu: 41.87% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6496 +[titan] 2025-10-05 20:28:52,320 - root - INFO - lr: 6.2697e-06 gnorm: 1.19 [21:54:40< 2:36:29] +[titan] 2025-10-05 20:29:01,029 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:29:03,215 - root - INFO - step: 35750 loss: 1.8998 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 20:29:03,215 - root - INFO - lr: 6.2668e-06 gnorm: 1.23 [21:54:51< 2:36:18] +[titan] 2025-10-05 20:29:14,102 - root - INFO - step: 35755 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 20:29:14,102 - root - INFO - lr: 6.2638e-06 gnorm: 1.25 [21:55:02< 2:36:07] +[titan] 2025-10-05 20:29:24,977 - root - INFO - step: 35760 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 20:29:24,977 - root - INFO - lr: 6.2609e-06 gnorm: 1.19 [21:55:13< 2:35:56] +[titan] 2025-10-05 20:29:35,865 - root - INFO - step: 35765 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 20:29:35,865 - root - INFO - lr: 6.2579e-06 gnorm: 1.20 [21:55:24< 2:35:45] +[titan] 2025-10-05 20:29:46,743 - root - INFO - step: 35770 loss: 1.9516 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 20:29:46,743 - root - INFO - lr: 6.2550e-06 gnorm: 1.22 [21:55:35< 2:35:34] +[titan] 2025-10-05 20:29:57,662 - root - INFO - step: 35775 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 20:29:57,662 - root - INFO - lr: 6.2521e-06 gnorm: 1.24 [21:55:46< 2:35:23] +[titan] 2025-10-05 20:30:08,549 - root - INFO - step: 35780 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 20:30:08,549 - root - INFO - lr: 6.2491e-06 gnorm: 1.24 [21:55:56< 2:35:12] +[titan] 2025-10-05 20:30:19,428 - root - INFO - step: 35785 loss: 2.0119 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 20:30:19,428 - root - INFO - lr: 6.2462e-06 gnorm: 1.22 [21:56:07< 2:35:01] +[titan] 2025-10-05 20:30:30,298 - root - INFO - step: 35790 loss: 1.8995 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 20:30:30,298 - root - INFO - lr: 6.2433e-06 gnorm: 1.20 [21:56:18< 2:34:50] +[titan] 2025-10-05 20:30:41,166 - root - INFO - step: 35795 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 20:30:41,166 - root - INFO - lr: 6.2403e-06 gnorm: 1.22 [21:56:29< 2:34:39] +[titan] 2025-10-05 20:30:49,931 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:30:52,145 - root - INFO - step: 35800 loss: 1.8719 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.08 mfu: 41.87% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6562 +[titan] 2025-10-05 20:30:52,146 - root - INFO - lr: 6.2374e-06 gnorm: 1.18 [21:56:40< 2:34:28] +[titan] 2025-10-05 20:31:03,002 - root - INFO - step: 35805 loss: 1.8418 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6290 +[titan] 2025-10-05 20:31:03,002 - root - INFO - lr: 6.2345e-06 gnorm: 1.17 [21:56:51< 2:34:17] +[titan] 2025-10-05 20:31:13,902 - root - INFO - step: 35810 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 20:31:13,902 - root - INFO - lr: 6.2316e-06 gnorm: 1.20 [21:57:02< 2:34:06] +[titan] 2025-10-05 20:31:24,762 - root - INFO - step: 35815 loss: 1.8766 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:31:24,763 - root - INFO - lr: 6.2287e-06 gnorm: 1.21 [21:57:13< 2:33:55] +[titan] 2025-10-05 20:31:35,622 - root - INFO - step: 35820 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 20:31:35,623 - root - INFO - lr: 6.2258e-06 gnorm: 1.20 [21:57:24< 2:33:44] +[titan] 2025-10-05 20:31:46,463 - root - INFO - step: 35825 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 20:31:46,464 - root - INFO - lr: 6.2229e-06 gnorm: 1.23 [21:57:34< 2:33:32] +[titan] 2025-10-05 20:31:57,412 - root - INFO - step: 35830 loss: 1.8980 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 20:31:57,412 - root - INFO - lr: 6.2200e-06 gnorm: 1.20 [21:57:45< 2:33:21] +[titan] 2025-10-05 20:32:08,273 - root - INFO - step: 35835 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6616 +[titan] 2025-10-05 20:32:08,273 - root - INFO - lr: 6.2171e-06 gnorm: 1.17 [21:57:56< 2:33:10] +[titan] 2025-10-05 20:32:19,226 - root - INFO - step: 35840 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 20:32:19,227 - root - INFO - lr: 6.2142e-06 gnorm: 1.24 [21:58:07< 2:32:59] +[titan] 2025-10-05 20:32:19,410 - root - INFO - Dumping profiler traces at step 35840 +[titan] 2025-10-05 20:32:19,451 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:32:30,380 - root - INFO - step: 35845 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,379 tflops: 407.58 mfu: 41.21% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 20:32:30,381 - root - INFO - lr: 6.2113e-06 gnorm: 1.21 [21:58:18< 2:32:48] +[titan] 2025-10-05 20:32:39,073 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:32:41,258 - root - INFO - step: 35850 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:32:41,258 - root - INFO - lr: 6.2084e-06 gnorm: 1.18 [21:58:29< 2:32:37] +[titan] 2025-10-05 20:32:52,159 - root - INFO - step: 35855 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 20:32:52,159 - root - INFO - lr: 6.2055e-06 gnorm: 1.21 [21:58:40< 2:32:26] +[titan] 2025-10-05 20:33:03,040 - root - INFO - step: 35860 loss: 1.9254 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:33:03,041 - root - INFO - lr: 6.2026e-06 gnorm: 1.22 [21:58:51< 2:32:15] +[titan] 2025-10-05 20:33:13,902 - root - INFO - step: 35865 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7075 +[titan] 2025-10-05 20:33:13,902 - root - INFO - lr: 6.1998e-06 gnorm: 1.24 [21:59:02< 2:32:04] +[titan] 2025-10-05 20:33:24,764 - root - INFO - step: 35870 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 20:33:24,764 - root - INFO - lr: 6.1969e-06 gnorm: 1.24 [21:59:13< 2:31:53] +[titan] 2025-10-05 20:33:35,665 - root - INFO - step: 35875 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:33:35,666 - root - INFO - lr: 6.1940e-06 gnorm: 1.20 [21:59:24< 2:31:42] +[titan] 2025-10-05 20:33:46,542 - root - INFO - step: 35880 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:33:46,542 - root - INFO - lr: 6.1911e-06 gnorm: 1.22 [21:59:34< 2:31:31] +[titan] 2025-10-05 20:33:57,456 - root - INFO - step: 35885 loss: 1.9215 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 20:33:57,456 - root - INFO - lr: 6.1883e-06 gnorm: 1.21 [21:59:45< 2:31:20] +[titan] 2025-10-05 20:34:08,320 - root - INFO - step: 35890 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 20:34:08,321 - root - INFO - lr: 6.1854e-06 gnorm: 1.23 [21:59:56< 2:31:09] +[titan] 2025-10-05 20:34:19,183 - root - INFO - step: 35895 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 20:34:19,183 - root - INFO - lr: 6.1826e-06 gnorm: 1.20 [22:00:07< 2:30:58] +[titan] 2025-10-05 20:34:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:34:30,044 - root - INFO - step: 35900 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 20:34:30,044 - root - INFO - lr: 6.1797e-06 gnorm: 1.24 [22:00:18< 2:30:47] +[titan] 2025-10-05 20:34:40,962 - root - INFO - step: 35905 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 20:34:40,962 - root - INFO - lr: 6.1769e-06 gnorm: 1.21 [22:00:29< 2:30:36] +[titan] 2025-10-05 20:34:51,814 - root - INFO - step: 35910 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 20:34:51,814 - root - INFO - lr: 6.1740e-06 gnorm: 1.20 [22:00:40< 2:30:25] +[titan] 2025-10-05 20:35:02,707 - root - INFO - step: 35915 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7636 +[titan] 2025-10-05 20:35:02,708 - root - INFO - lr: 6.1712e-06 gnorm: 1.25 [22:00:51< 2:30:14] +[titan] 2025-10-05 20:35:13,561 - root - INFO - step: 35920 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 20:35:13,561 - root - INFO - lr: 6.1683e-06 gnorm: 1.21 [22:01:01< 2:30:03] +[titan] 2025-10-05 20:35:24,429 - root - INFO - step: 35925 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 20:35:24,429 - root - INFO - lr: 6.1655e-06 gnorm: 1.21 [22:01:12< 2:29:51] +[titan] 2025-10-05 20:35:35,298 - root - INFO - step: 35930 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7410 +[titan] 2025-10-05 20:35:35,298 - root - INFO - lr: 6.1627e-06 gnorm: 1.20 [22:01:23< 2:29:40] +[titan] 2025-10-05 20:35:46,161 - root - INFO - step: 35935 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6568 +[titan] 2025-10-05 20:35:46,161 - root - INFO - lr: 6.1598e-06 gnorm: 1.22 [22:01:34< 2:29:29] +[titan] 2025-10-05 20:35:57,095 - root - INFO - step: 35940 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7140 +[titan] 2025-10-05 20:35:57,096 - root - INFO - lr: 6.1570e-06 gnorm: 1.21 [22:01:45< 2:29:18] +[titan] 2025-10-05 20:36:07,977 - root - INFO - step: 35945 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 20:36:07,977 - root - INFO - lr: 6.1542e-06 gnorm: 1.23 [22:01:56< 2:29:07] +[titan] 2025-10-05 20:36:16,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:36:18,851 - root - INFO - step: 35950 loss: 1.8140 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6041 +[titan] 2025-10-05 20:36:18,851 - root - INFO - lr: 6.1514e-06 gnorm: 1.21 [22:02:07< 2:28:56] +[titan] 2025-10-05 20:36:29,728 - root - INFO - step: 35955 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:36:29,728 - root - INFO - lr: 6.1485e-06 gnorm: 1.18 [22:02:18< 2:28:45] +[titan] 2025-10-05 20:36:40,603 - root - INFO - step: 35960 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 20:36:40,603 - root - INFO - lr: 6.1457e-06 gnorm: 1.20 [22:02:29< 2:28:34] +[titan] 2025-10-05 20:36:51,466 - root - INFO - step: 35965 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7135 +[titan] 2025-10-05 20:36:51,467 - root - INFO - lr: 6.1429e-06 gnorm: 1.21 [22:02:39< 2:28:23] +[titan] 2025-10-05 20:37:02,418 - root - INFO - step: 35970 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 20:37:02,418 - root - INFO - lr: 6.1401e-06 gnorm: 1.17 [22:02:50< 2:28:12] +[titan] 2025-10-05 20:37:13,277 - root - INFO - step: 35975 loss: 1.9766 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:37:13,277 - root - INFO - lr: 6.1373e-06 gnorm: 1.23 [22:03:01< 2:28:01] +[titan] 2025-10-05 20:37:24,151 - root - INFO - step: 35980 loss: 1.9461 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:37:24,151 - root - INFO - lr: 6.1345e-06 gnorm: 1.17 [22:03:12< 2:27:50] +[titan] 2025-10-05 20:37:34,997 - root - INFO - step: 35985 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6927 +[titan] 2025-10-05 20:37:34,997 - root - INFO - lr: 6.1317e-06 gnorm: 1.19 [22:03:23< 2:27:39] +[titan] 2025-10-05 20:37:45,857 - root - INFO - step: 35990 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:37:45,857 - root - INFO - lr: 6.1289e-06 gnorm: 1.21 [22:03:34< 2:27:28] +[titan] 2025-10-05 20:37:56,761 - root - INFO - step: 35995 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 20:37:56,761 - root - INFO - lr: 6.1261e-06 gnorm: 1.22 [22:03:45< 2:27:17] +[titan] 2025-10-05 20:38:05,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:38:07,602 - root - INFO - step: 36000 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 20:38:07,602 - root - INFO - lr: 6.1233e-06 gnorm: 1.24 [22:03:56< 2:27:06] +[titan] 2025-10-05 20:38:18,465 - root - INFO - step: 36005 loss: 1.8959 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 20:38:18,465 - root - INFO - lr: 6.1206e-06 gnorm: 1.22 [22:04:06< 2:26:55] +[titan] 2025-10-05 20:38:29,352 - root - INFO - step: 36010 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:38:29,352 - root - INFO - lr: 6.1178e-06 gnorm: 1.19 [22:04:17< 2:26:44] +[titan] 2025-10-05 20:38:40,197 - root - INFO - step: 36015 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 20:38:40,197 - root - INFO - lr: 6.1150e-06 gnorm: 1.22 [22:04:28< 2:26:33] +[titan] 2025-10-05 20:38:51,058 - root - INFO - step: 36020 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 20:38:51,058 - root - INFO - lr: 6.1122e-06 gnorm: 1.21 [22:04:39< 2:26:22] +[titan] 2025-10-05 20:39:01,952 - root - INFO - step: 36025 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6606 +[titan] 2025-10-05 20:39:01,952 - root - INFO - lr: 6.1095e-06 gnorm: 1.20 [22:04:50< 2:26:10] +[titan] 2025-10-05 20:39:12,835 - root - INFO - step: 36030 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 20:39:12,835 - root - INFO - lr: 6.1067e-06 gnorm: 1.25 [22:05:01< 2:25:59] +[titan] 2025-10-05 20:39:23,710 - root - INFO - step: 36035 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 20:39:23,711 - root - INFO - lr: 6.1039e-06 gnorm: 1.23 [22:05:12< 2:25:48] +[titan] 2025-10-05 20:39:34,575 - root - INFO - step: 36040 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 20:39:34,575 - root - INFO - lr: 6.1012e-06 gnorm: 1.23 [22:05:22< 2:25:37] +[titan] 2025-10-05 20:39:45,433 - root - INFO - step: 36045 loss: 1.8945 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6757 +[titan] 2025-10-05 20:39:45,434 - root - INFO - lr: 6.0984e-06 gnorm: 1.20 [22:05:33< 2:25:26] +[titan] 2025-10-05 20:39:54,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:39:56,290 - root - INFO - step: 36050 loss: 1.9349 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7112 +[titan] 2025-10-05 20:39:56,290 - root - INFO - lr: 6.0957e-06 gnorm: 1.20 [22:05:44< 2:25:15] +[titan] 2025-10-05 20:40:07,175 - root - INFO - step: 36055 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 20:40:07,175 - root - INFO - lr: 6.0929e-06 gnorm: 1.26 [22:05:55< 2:25:04] +[titan] 2025-10-05 20:40:18,044 - root - INFO - step: 36060 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:40:18,044 - root - INFO - lr: 6.0902e-06 gnorm: 1.22 [22:06:06< 2:24:53] +[titan] 2025-10-05 20:40:28,916 - root - INFO - step: 36065 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7253 +[titan] 2025-10-05 20:40:28,916 - root - INFO - lr: 6.0874e-06 gnorm: 1.21 [22:06:17< 2:24:42] +[titan] 2025-10-05 20:40:39,778 - root - INFO - step: 36070 loss: 1.8531 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6374 +[titan] 2025-10-05 20:40:39,778 - root - INFO - lr: 6.0847e-06 gnorm: 1.21 [22:06:28< 2:24:31] +[titan] 2025-10-05 20:40:50,621 - root - INFO - step: 36075 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 20:40:50,621 - root - INFO - lr: 6.0820e-06 gnorm: 1.21 [22:06:39< 2:24:20] +[titan] 2025-10-05 20:41:01,488 - root - INFO - step: 36080 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 20:41:01,489 - root - INFO - lr: 6.0792e-06 gnorm: 1.24 [22:06:49< 2:24:09] +[titan] 2025-10-05 20:41:12,335 - root - INFO - step: 36085 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 20:41:12,335 - root - INFO - lr: 6.0765e-06 gnorm: 1.21 [22:07:00< 2:23:58] +[titan] 2025-10-05 20:41:23,167 - root - INFO - step: 36090 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 20:41:23,168 - root - INFO - lr: 6.0738e-06 gnorm: 1.24 [22:07:11< 2:23:47] +[titan] 2025-10-05 20:41:34,043 - root - INFO - step: 36095 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:41:34,044 - root - INFO - lr: 6.0710e-06 gnorm: 1.26 [22:07:22< 2:23:36] +[titan] 2025-10-05 20:41:42,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:41:44,911 - root - INFO - step: 36100 loss: 1.9238 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7003 +[titan] 2025-10-05 20:41:44,911 - root - INFO - lr: 6.0683e-06 gnorm: 1.23 [22:07:33< 2:23:25] +[titan] 2025-10-05 20:41:55,794 - root - INFO - step: 36105 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 20:41:55,794 - root - INFO - lr: 6.0656e-06 gnorm: 1.18 [22:07:44< 2:23:14] +[titan] 2025-10-05 20:42:06,656 - root - INFO - step: 36110 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 20:42:06,657 - root - INFO - lr: 6.0629e-06 gnorm: 1.22 [22:07:55< 2:23:03] +[titan] 2025-10-05 20:42:17,515 - root - INFO - step: 36115 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 20:42:17,515 - root - INFO - lr: 6.0602e-06 gnorm: 1.22 [22:08:05< 2:22:52] +[titan] 2025-10-05 20:42:28,350 - root - INFO - step: 36120 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 20:42:28,350 - root - INFO - lr: 6.0575e-06 gnorm: 1.23 [22:08:16< 2:22:41] +[titan] 2025-10-05 20:42:39,197 - root - INFO - step: 36125 loss: 1.8516 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 20:42:39,197 - root - INFO - lr: 6.0548e-06 gnorm: 1.24 [22:08:27< 2:22:29] +[titan] 2025-10-05 20:42:50,083 - root - INFO - step: 36130 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 20:42:50,083 - root - INFO - lr: 6.0521e-06 gnorm: 1.22 [22:08:38< 2:22:18] +[titan] 2025-10-05 20:43:00,986 - root - INFO - step: 36135 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:43:00,987 - root - INFO - lr: 6.0494e-06 gnorm: 1.22 [22:08:49< 2:22:07] +[titan] 2025-10-05 20:43:11,851 - root - INFO - step: 36140 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 20:43:11,851 - root - INFO - lr: 6.0467e-06 gnorm: 1.89 [22:09:00< 2:21:56] +[titan] 2025-10-05 20:43:22,694 - root - INFO - step: 36145 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 20:43:22,694 - root - INFO - lr: 6.0440e-06 gnorm: 1.18 [22:09:11< 2:21:45] +[titan] 2025-10-05 20:43:31,365 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:43:33,546 - root - INFO - step: 36150 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:43:33,547 - root - INFO - lr: 6.0413e-06 gnorm: 1.25 [22:09:21< 2:21:34] +[titan] 2025-10-05 20:43:44,389 - root - INFO - step: 36155 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 20:43:44,389 - root - INFO - lr: 6.0386e-06 gnorm: 1.20 [22:09:32< 2:21:23] +[titan] 2025-10-05 20:43:55,248 - root - INFO - step: 36160 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 20:43:55,248 - root - INFO - lr: 6.0360e-06 gnorm: 1.26 [22:09:43< 2:21:12] +[titan] 2025-10-05 20:44:06,157 - root - INFO - step: 36165 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:06,158 - root - INFO - lr: 6.0333e-06 gnorm: 1.22 [22:09:54< 2:21:01] +[titan] 2025-10-05 20:44:17,014 - root - INFO - step: 36170 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 20:44:17,014 - root - INFO - lr: 6.0306e-06 gnorm: 1.24 [22:10:05< 2:20:50] +[titan] 2025-10-05 20:44:27,855 - root - INFO - step: 36175 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:27,855 - root - INFO - lr: 6.0279e-06 gnorm: 1.23 [22:10:16< 2:20:39] +[titan] 2025-10-05 20:44:38,720 - root - INFO - step: 36180 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:44:38,720 - root - INFO - lr: 6.0253e-06 gnorm: 1.23 [22:10:27< 2:20:28] +[titan] 2025-10-05 20:44:49,571 - root - INFO - step: 36185 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:44:49,571 - root - INFO - lr: 6.0226e-06 gnorm: 1.24 [22:10:37< 2:20:17] +[titan] 2025-10-05 20:45:00,410 - root - INFO - step: 36190 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 20:45:00,411 - root - INFO - lr: 6.0200e-06 gnorm: 1.29 [22:10:48< 2:20:06] +[titan] 2025-10-05 20:45:11,321 - root - INFO - step: 36195 loss: 1.8986 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6794 +[titan] 2025-10-05 20:45:11,321 - root - INFO - lr: 6.0173e-06 gnorm: 1.22 [22:10:59< 2:19:55] +[titan] 2025-10-05 20:45:19,981 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:45:22,165 - root - INFO - step: 36200 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 20:45:22,166 - root - INFO - lr: 6.0146e-06 gnorm: 1.25 [22:11:10< 2:19:44] +[titan] 2025-10-05 20:45:33,012 - root - INFO - step: 36205 loss: 1.8677 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6514 +[titan] 2025-10-05 20:45:33,012 - root - INFO - lr: 6.0120e-06 gnorm: 1.21 [22:11:21< 2:19:33] +[titan] 2025-10-05 20:45:43,868 - root - INFO - step: 36210 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 20:45:43,868 - root - INFO - lr: 6.0094e-06 gnorm: 1.23 [22:11:32< 2:19:22] +[titan] 2025-10-05 20:45:54,736 - root - INFO - step: 36215 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6484 +[titan] 2025-10-05 20:45:54,737 - root - INFO - lr: 6.0067e-06 gnorm: 1.18 [22:11:43< 2:19:11] +[titan] 2025-10-05 20:46:05,631 - root - INFO - step: 36220 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:46:05,631 - root - INFO - lr: 6.0041e-06 gnorm: 1.19 [22:11:54< 2:19:00] +[titan] 2025-10-05 20:46:16,518 - root - INFO - step: 36225 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 20:46:16,518 - root - INFO - lr: 6.0014e-06 gnorm: 1.22 [22:12:04< 2:18:48] +[titan] 2025-10-05 20:46:27,370 - root - INFO - step: 36230 loss: 1.9836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:46:27,370 - root - INFO - lr: 5.9988e-06 gnorm: 1.19 [22:12:15< 2:18:37] +[titan] 2025-10-05 20:46:38,233 - root - INFO - step: 36235 loss: 1.8873 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 20:46:38,233 - root - INFO - lr: 5.9962e-06 gnorm: 1.22 [22:12:26< 2:18:26] +[titan] 2025-10-05 20:46:49,088 - root - INFO - step: 36240 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:46:49,088 - root - INFO - lr: 5.9936e-06 gnorm: 1.21 [22:12:37< 2:18:15] +[titan] 2025-10-05 20:46:59,957 - root - INFO - step: 36245 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6792 +[titan] 2025-10-05 20:46:59,958 - root - INFO - lr: 5.9909e-06 gnorm: 1.22 [22:12:48< 2:18:04] +[titan] 2025-10-05 20:47:08,665 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:47:10,858 - root - INFO - step: 36250 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:47:10,858 - root - INFO - lr: 5.9883e-06 gnorm: 1.19 [22:12:59< 2:17:53] +[titan] 2025-10-05 20:47:21,702 - root - INFO - step: 36255 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 20:47:21,702 - root - INFO - lr: 5.9857e-06 gnorm: 1.26 [22:13:10< 2:17:42] +[titan] 2025-10-05 20:47:32,596 - root - INFO - step: 36260 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 20:47:32,596 - root - INFO - lr: 5.9831e-06 gnorm: 1.22 [22:13:20< 2:17:31] +[titan] 2025-10-05 20:47:43,478 - root - INFO - step: 36265 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 20:47:43,478 - root - INFO - lr: 5.9805e-06 gnorm: 1.28 [22:13:31< 2:17:20] +[titan] 2025-10-05 20:47:54,366 - root - INFO - step: 36270 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:47:54,366 - root - INFO - lr: 5.9779e-06 gnorm: 1.22 [22:13:42< 2:17:09] +[titan] 2025-10-05 20:48:05,288 - root - INFO - step: 36275 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6837 +[titan] 2025-10-05 20:48:05,288 - root - INFO - lr: 5.9753e-06 gnorm: 1.22 [22:13:53< 2:16:58] +[titan] 2025-10-05 20:48:16,197 - root - INFO - step: 36280 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 20:48:16,197 - root - INFO - lr: 5.9727e-06 gnorm: 1.22 [22:14:04< 2:16:47] +[titan] 2025-10-05 20:48:27,074 - root - INFO - step: 36285 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 20:48:27,075 - root - INFO - lr: 5.9701e-06 gnorm: 1.23 [22:14:15< 2:16:36] +[titan] 2025-10-05 20:48:37,962 - root - INFO - step: 36290 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 20:48:37,962 - root - INFO - lr: 5.9675e-06 gnorm: 1.26 [22:14:26< 2:16:25] +[titan] 2025-10-05 20:48:48,831 - root - INFO - step: 36295 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 20:48:48,832 - root - INFO - lr: 5.9649e-06 gnorm: 1.22 [22:14:37< 2:16:14] +[titan] 2025-10-05 20:48:57,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:48:59,685 - root - INFO - step: 36300 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6884 +[titan] 2025-10-05 20:48:59,686 - root - INFO - lr: 5.9623e-06 gnorm: 1.23 [22:14:48< 2:16:03] +[titan] 2025-10-05 20:49:10,530 - root - INFO - step: 36305 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:49:10,530 - root - INFO - lr: 5.9597e-06 gnorm: 1.21 [22:14:58< 2:15:52] +[titan] 2025-10-05 20:49:21,373 - root - INFO - step: 36310 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7418 +[titan] 2025-10-05 20:49:21,373 - root - INFO - lr: 5.9572e-06 gnorm: 1.26 [22:15:09< 2:15:41] +[titan] 2025-10-05 20:49:32,211 - root - INFO - step: 36315 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 20:49:32,211 - root - INFO - lr: 5.9546e-06 gnorm: 1.21 [22:15:20< 2:15:30] +[titan] 2025-10-05 20:49:43,047 - root - INFO - step: 36320 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7327 +[titan] 2025-10-05 20:49:43,048 - root - INFO - lr: 5.9520e-06 gnorm: 1.23 [22:15:31< 2:15:19] +[titan] 2025-10-05 20:49:53,930 - root - INFO - step: 36325 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 20:49:53,930 - root - INFO - lr: 5.9495e-06 gnorm: 1.21 [22:15:42< 2:15:07] +[titan] 2025-10-05 20:50:04,790 - root - INFO - step: 36330 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 20:50:04,790 - root - INFO - lr: 5.9469e-06 gnorm: 1.22 [22:15:53< 2:14:56] +[titan] 2025-10-05 20:50:15,657 - root - INFO - step: 36335 loss: 1.9258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7028 +[titan] 2025-10-05 20:50:15,657 - root - INFO - lr: 5.9443e-06 gnorm: 1.28 [22:16:04< 2:14:45] +[titan] 2025-10-05 20:50:26,516 - root - INFO - step: 36340 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 20:50:26,516 - root - INFO - lr: 5.9418e-06 gnorm: 1.22 [22:16:14< 2:14:34] +[titan] 2025-10-05 20:50:37,351 - root - INFO - step: 36345 loss: 1.8859 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 20:50:37,351 - root - INFO - lr: 5.9392e-06 gnorm: 1.21 [22:16:25< 2:14:23] +[titan] 2025-10-05 20:50:46,107 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:50:48,289 - root - INFO - step: 36350 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:50:48,289 - root - INFO - lr: 5.9367e-06 gnorm: 1.26 [22:16:36< 2:14:12] +[titan] 2025-10-05 20:50:52,818 - root - INFO - Dumping profiler traces at step 36352 +[titan] 2025-10-05 20:50:52,857 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:50:59,390 - root - INFO - step: 36355 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 29,519 tflops: 409.53 mfu: 41.41% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6886 +[titan] 2025-10-05 20:50:59,390 - root - INFO - lr: 5.9341e-06 gnorm: 1.21 [22:16:47< 2:14:01] +[titan] 2025-10-05 20:51:10,256 - root - INFO - step: 36360 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6670 +[titan] 2025-10-05 20:51:10,257 - root - INFO - lr: 5.9316e-06 gnorm: 1.20 [22:16:58< 2:13:50] +[titan] 2025-10-05 20:51:21,108 - root - INFO - step: 36365 loss: 1.9715 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7430 +[titan] 2025-10-05 20:51:21,108 - root - INFO - lr: 5.9290e-06 gnorm: 1.25 [22:17:09< 2:13:39] +[titan] 2025-10-05 20:51:31,957 - root - INFO - step: 36370 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 20:51:31,957 - root - INFO - lr: 5.9265e-06 gnorm: 1.22 [22:17:20< 2:13:28] +[titan] 2025-10-05 20:51:42,813 - root - INFO - step: 36375 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:51:42,813 - root - INFO - lr: 5.9240e-06 gnorm: 1.22 [22:17:31< 2:13:17] +[titan] 2025-10-05 20:51:53,656 - root - INFO - step: 36380 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 20:51:53,656 - root - INFO - lr: 5.9214e-06 gnorm: 1.27 [22:17:42< 2:13:06] +[titan] 2025-10-05 20:52:04,533 - root - INFO - step: 36385 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7274 +[titan] 2025-10-05 20:52:04,533 - root - INFO - lr: 5.9189e-06 gnorm: 1.22 [22:17:52< 2:12:55] +[titan] 2025-10-05 20:52:15,414 - root - INFO - step: 36390 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7019 +[titan] 2025-10-05 20:52:15,414 - root - INFO - lr: 5.9164e-06 gnorm: 1.23 [22:18:03< 2:12:44] +[titan] 2025-10-05 20:52:26,295 - root - INFO - step: 36395 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 20:52:26,295 - root - INFO - lr: 5.9139e-06 gnorm: 1.21 [22:18:14< 2:12:33] +[titan] 2025-10-05 20:52:34,981 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:52:37,152 - root - INFO - step: 36400 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 20:52:37,153 - root - INFO - lr: 5.9114e-06 gnorm: 1.22 [22:18:25< 2:12:22] +[titan] 2025-10-05 20:52:48,028 - root - INFO - step: 36405 loss: 1.9539 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:52:48,028 - root - INFO - lr: 5.9088e-06 gnorm: 1.20 [22:18:36< 2:12:11] +[titan] 2025-10-05 20:52:58,901 - root - INFO - step: 36410 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:52:58,901 - root - INFO - lr: 5.9063e-06 gnorm: 1.21 [22:18:47< 2:12:00] +[titan] 2025-10-05 20:53:10,114 - root - INFO - step: 36415 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 29,223 tflops: 405.42 mfu: 40.99% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6627 +[titan] 2025-10-05 20:53:10,115 - root - INFO - lr: 5.9038e-06 gnorm: 1.21 [22:18:58< 2:11:49] +[titan] 2025-10-05 20:53:21,005 - root - INFO - step: 36420 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 20:53:21,005 - root - INFO - lr: 5.9013e-06 gnorm: 1.27 [22:19:09< 2:11:38] +[titan] 2025-10-05 20:53:31,873 - root - INFO - step: 36425 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 20:53:31,874 - root - INFO - lr: 5.8988e-06 gnorm: 1.24 [22:19:20< 2:11:27] +[titan] 2025-10-05 20:53:42,745 - root - INFO - step: 36430 loss: 1.8831 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6658 +[titan] 2025-10-05 20:53:42,745 - root - INFO - lr: 5.8963e-06 gnorm: 1.28 [22:19:31< 2:11:16] +[titan] 2025-10-05 20:53:53,613 - root - INFO - step: 36435 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 20:53:53,613 - root - INFO - lr: 5.8938e-06 gnorm: 1.23 [22:19:41< 2:11:05] +[titan] 2025-10-05 20:54:04,481 - root - INFO - step: 36440 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:54:04,481 - root - INFO - lr: 5.8914e-06 gnorm: 1.22 [22:19:52< 2:10:53] +[titan] 2025-10-05 20:54:15,378 - root - INFO - step: 36445 loss: 1.9147 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:54:15,378 - root - INFO - lr: 5.8889e-06 gnorm: 1.24 [22:20:03< 2:10:42] +[titan] 2025-10-05 20:54:24,084 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:54:26,273 - root - INFO - step: 36450 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 20:54:26,273 - root - INFO - lr: 5.8864e-06 gnorm: 1.25 [22:20:14< 2:10:31] +[titan] 2025-10-05 20:54:37,147 - root - INFO - step: 36455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 20:54:37,147 - root - INFO - lr: 5.8839e-06 gnorm: 1.20 [22:20:25< 2:10:20] +[titan] 2025-10-05 20:54:48,029 - root - INFO - step: 36460 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 20:54:48,029 - root - INFO - lr: 5.8814e-06 gnorm: 1.21 [22:20:36< 2:10:09] +[titan] 2025-10-05 20:54:58,890 - root - INFO - step: 36465 loss: 1.9169 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 20:54:58,890 - root - INFO - lr: 5.8790e-06 gnorm: 1.22 [22:20:47< 2:09:58] +[titan] 2025-10-05 20:55:09,763 - root - INFO - step: 36470 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6935 +[titan] 2025-10-05 20:55:09,764 - root - INFO - lr: 5.8765e-06 gnorm: 1.24 [22:20:58< 2:09:47] +[titan] 2025-10-05 20:55:20,621 - root - INFO - step: 36475 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 20:55:20,622 - root - INFO - lr: 5.8740e-06 gnorm: 1.24 [22:21:08< 2:09:36] +[titan] 2025-10-05 20:55:31,491 - root - INFO - step: 36480 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:55:31,491 - root - INFO - lr: 5.8716e-06 gnorm: 1.25 [22:21:19< 2:09:25] +[titan] 2025-10-05 20:55:42,366 - root - INFO - step: 36485 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 20:55:42,367 - root - INFO - lr: 5.8691e-06 gnorm: 1.22 [22:21:30< 2:09:14] +[titan] 2025-10-05 20:55:53,240 - root - INFO - step: 36490 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:55:53,241 - root - INFO - lr: 5.8667e-06 gnorm: 1.19 [22:21:41< 2:09:03] +[titan] 2025-10-05 20:56:04,092 - root - INFO - step: 36495 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 20:56:04,092 - root - INFO - lr: 5.8642e-06 gnorm: 1.28 [22:21:52< 2:08:52] +[titan] 2025-10-05 20:56:12,794 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:56:14,974 - root - INFO - step: 36500 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:56:14,974 - root - INFO - lr: 5.8618e-06 gnorm: 1.22 [22:22:03< 2:08:41] +[titan] 2025-10-05 20:56:25,858 - root - INFO - step: 36505 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:56:25,858 - root - INFO - lr: 5.8593e-06 gnorm: 1.21 [22:22:14< 2:08:30] +[titan] 2025-10-05 20:56:36,711 - root - INFO - step: 36510 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 20:56:36,712 - root - INFO - lr: 5.8569e-06 gnorm: 1.26 [22:22:25< 2:08:19] +[titan] 2025-10-05 20:56:47,594 - root - INFO - step: 36515 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:56:47,594 - root - INFO - lr: 5.8544e-06 gnorm: 1.24 [22:22:35< 2:08:08] +[titan] 2025-10-05 20:56:58,464 - root - INFO - step: 36520 loss: 1.8908 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6705 +[titan] 2025-10-05 20:56:58,465 - root - INFO - lr: 5.8520e-06 gnorm: 1.23 [22:22:46< 2:07:57] +[titan] 2025-10-05 20:57:09,332 - root - INFO - step: 36525 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:57:09,332 - root - INFO - lr: 5.8496e-06 gnorm: 1.21 [22:22:57< 2:07:46] +[titan] 2025-10-05 20:57:20,232 - root - INFO - step: 36530 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 20:57:20,232 - root - INFO - lr: 5.8471e-06 gnorm: 1.21 [22:23:08< 2:07:35] +[titan] 2025-10-05 20:57:31,124 - root - INFO - step: 36535 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 20:57:31,124 - root - INFO - lr: 5.8447e-06 gnorm: 1.23 [22:23:19< 2:07:24] +[titan] 2025-10-05 20:57:42,014 - root - INFO - step: 36540 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 20:57:42,014 - root - INFO - lr: 5.8423e-06 gnorm: 1.25 [22:23:30< 2:07:13] +[titan] 2025-10-05 20:57:52,927 - root - INFO - step: 36545 loss: 1.9727 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 20:57:52,928 - root - INFO - lr: 5.8399e-06 gnorm: 1.24 [22:23:41< 2:07:02] +[titan] 2025-10-05 20:58:01,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:58:03,825 - root - INFO - step: 36550 loss: 1.9288 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7049 +[titan] 2025-10-05 20:58:03,825 - root - INFO - lr: 5.8375e-06 gnorm: 1.24 [22:23:52< 2:06:50] +[titan] 2025-10-05 20:58:14,740 - root - INFO - step: 36555 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 20:58:14,741 - root - INFO - lr: 5.8351e-06 gnorm: 1.26 [22:24:03< 2:06:39] +[titan] 2025-10-05 20:58:25,614 - root - INFO - step: 36560 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 20:58:25,614 - root - INFO - lr: 5.8326e-06 gnorm: 1.18 [22:24:13< 2:06:28] +[titan] 2025-10-05 20:58:36,506 - root - INFO - step: 36565 loss: 1.8964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:58:36,506 - root - INFO - lr: 5.8302e-06 gnorm: 1.20 [22:24:24< 2:06:17] +[titan] 2025-10-05 20:58:47,390 - root - INFO - step: 36570 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 20:58:47,390 - root - INFO - lr: 5.8278e-06 gnorm: 1.25 [22:24:35< 2:06:06] +[titan] 2025-10-05 20:58:58,289 - root - INFO - step: 36575 loss: 1.9029 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:58:58,289 - root - INFO - lr: 5.8254e-06 gnorm: 1.20 [22:24:46< 2:05:55] +[titan] 2025-10-05 20:59:09,190 - root - INFO - step: 36580 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7074 +[titan] 2025-10-05 20:59:09,190 - root - INFO - lr: 5.8231e-06 gnorm: 1.24 [22:24:57< 2:05:44] +[titan] 2025-10-05 20:59:20,103 - root - INFO - step: 36585 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7440 +[titan] 2025-10-05 20:59:20,103 - root - INFO - lr: 5.8207e-06 gnorm: 1.22 [22:25:08< 2:05:33] +[titan] 2025-10-05 20:59:30,980 - root - INFO - step: 36590 loss: 1.9441 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:59:30,980 - root - INFO - lr: 5.8183e-06 gnorm: 1.22 [22:25:19< 2:05:22] +[titan] 2025-10-05 20:59:41,845 - root - INFO - step: 36595 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6830 +[titan] 2025-10-05 20:59:41,845 - root - INFO - lr: 5.8159e-06 gnorm: 1.20 [22:25:30< 2:05:11] +[titan] 2025-10-05 20:59:50,540 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:59:52,731 - root - INFO - step: 36600 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:59:52,732 - root - INFO - lr: 5.8135e-06 gnorm: 1.20 [22:25:41< 2:05:00] +[titan] 2025-10-05 21:00:03,618 - root - INFO - step: 36605 loss: 1.8614 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6465 +[titan] 2025-10-05 21:00:03,618 - root - INFO - lr: 5.8111e-06 gnorm: 1.22 [22:25:51< 2:04:49] +[titan] 2025-10-05 21:00:14,529 - root - INFO - step: 36610 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 21:00:14,529 - root - INFO - lr: 5.8088e-06 gnorm: 1.24 [22:26:02< 2:04:38] +[titan] 2025-10-05 21:00:25,449 - root - INFO - step: 36615 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:00:25,450 - root - INFO - lr: 5.8064e-06 gnorm: 1.23 [22:26:13< 2:04:27] +[titan] 2025-10-05 21:00:36,361 - root - INFO - step: 36620 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6464 +[titan] 2025-10-05 21:00:36,361 - root - INFO - lr: 5.8040e-06 gnorm: 1.24 [22:26:24< 2:04:16] +[titan] 2025-10-05 21:00:47,259 - root - INFO - step: 36625 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7482 +[titan] 2025-10-05 21:00:47,259 - root - INFO - lr: 5.8017e-06 gnorm: 1.24 [22:26:35< 2:04:05] +[titan] 2025-10-05 21:00:58,160 - root - INFO - step: 36630 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:00:58,160 - root - INFO - lr: 5.7993e-06 gnorm: 1.21 [22:26:46< 2:03:54] +[titan] 2025-10-05 21:01:09,053 - root - INFO - step: 36635 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:01:09,053 - root - INFO - lr: 5.7969e-06 gnorm: 1.26 [22:26:57< 2:03:43] +[titan] 2025-10-05 21:01:19,958 - root - INFO - step: 36640 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 21:01:19,958 - root - INFO - lr: 5.7946e-06 gnorm: 1.24 [22:27:08< 2:03:32] +[titan] 2025-10-05 21:01:30,865 - root - INFO - step: 36645 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:01:30,865 - root - INFO - lr: 5.7922e-06 gnorm: 1.22 [22:27:19< 2:03:21] +[titan] 2025-10-05 21:01:39,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:01:41,769 - root - INFO - step: 36650 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:01:41,769 - root - INFO - lr: 5.7899e-06 gnorm: 1.25 [22:27:30< 2:03:10] +[titan] 2025-10-05 21:01:52,656 - root - INFO - step: 36655 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6666 +[titan] 2025-10-05 21:01:52,656 - root - INFO - lr: 5.7876e-06 gnorm: 1.26 [22:27:41< 2:02:59] +[titan] 2025-10-05 21:02:03,549 - root - INFO - step: 36660 loss: 1.9170 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 21:02:03,549 - root - INFO - lr: 5.7852e-06 gnorm: 1.24 [22:27:51< 2:02:48] +[titan] 2025-10-05 21:02:14,436 - root - INFO - step: 36665 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6585 +[titan] 2025-10-05 21:02:14,436 - root - INFO - lr: 5.7829e-06 gnorm: 1.20 [22:28:02< 2:02:36] +[titan] 2025-10-05 21:02:25,324 - root - INFO - step: 36670 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 21:02:25,324 - root - INFO - lr: 5.7806e-06 gnorm: 1.25 [22:28:13< 2:02:25] +[titan] 2025-10-05 21:02:36,230 - root - INFO - step: 36675 loss: 1.8517 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6371 +[titan] 2025-10-05 21:02:36,230 - root - INFO - lr: 5.7782e-06 gnorm: 1.21 [22:28:24< 2:02:14] +[titan] 2025-10-05 21:02:47,119 - root - INFO - step: 36680 loss: 1.8308 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6192 +[titan] 2025-10-05 21:02:47,119 - root - INFO - lr: 5.7759e-06 gnorm: 1.21 [22:28:35< 2:02:03] +[titan] 2025-10-05 21:02:58,028 - root - INFO - step: 36685 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:02:58,028 - root - INFO - lr: 5.7736e-06 gnorm: 1.27 [22:28:46< 2:01:52] +[titan] 2025-10-05 21:03:08,899 - root - INFO - step: 36690 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 21:03:08,899 - root - INFO - lr: 5.7713e-06 gnorm: 1.23 [22:28:57< 2:01:41] +[titan] 2025-10-05 21:03:19,806 - root - INFO - step: 36695 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:03:19,806 - root - INFO - lr: 5.7689e-06 gnorm: 1.23 [22:29:08< 2:01:30] +[titan] 2025-10-05 21:03:28,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:03:30,710 - root - INFO - step: 36700 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 21:03:30,710 - root - INFO - lr: 5.7666e-06 gnorm: 1.27 [22:29:19< 2:01:19] +[titan] 2025-10-05 21:03:41,623 - root - INFO - step: 36705 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 21:03:41,624 - root - INFO - lr: 5.7643e-06 gnorm: 1.24 [22:29:29< 2:01:08] +[titan] 2025-10-05 21:03:52,525 - root - INFO - step: 36710 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:03:52,525 - root - INFO - lr: 5.7620e-06 gnorm: 1.26 [22:29:40< 2:00:57] +[titan] 2025-10-05 21:04:03,447 - root - INFO - step: 36715 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:04:03,447 - root - INFO - lr: 5.7597e-06 gnorm: 1.26 [22:29:51< 2:00:46] +[titan] 2025-10-05 21:04:14,324 - root - INFO - step: 36720 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:04:14,324 - root - INFO - lr: 5.7574e-06 gnorm: 1.20 [22:30:02< 2:00:35] +[titan] 2025-10-05 21:04:25,273 - root - INFO - step: 36725 loss: 1.9301 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:04:25,273 - root - INFO - lr: 5.7551e-06 gnorm: 1.23 [22:30:13< 2:00:24] +[titan] 2025-10-05 21:04:36,157 - root - INFO - step: 36730 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:04:36,157 - root - INFO - lr: 5.7528e-06 gnorm: 1.24 [22:30:24< 2:00:13] +[titan] 2025-10-05 21:04:47,035 - root - INFO - step: 36735 loss: 1.9023 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 21:04:47,035 - root - INFO - lr: 5.7505e-06 gnorm: 1.26 [22:30:35< 2:00:02] +[titan] 2025-10-05 21:04:57,939 - root - INFO - step: 36740 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 21:04:57,939 - root - INFO - lr: 5.7483e-06 gnorm: 1.21 [22:30:46< 1:59:51] +[titan] 2025-10-05 21:05:08,831 - root - INFO - step: 36745 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 21:05:08,831 - root - INFO - lr: 5.7460e-06 gnorm: 1.25 [22:30:57< 1:59:40] +[titan] 2025-10-05 21:05:17,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:05:19,701 - root - INFO - step: 36750 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:05:19,702 - root - INFO - lr: 5.7437e-06 gnorm: 1.22 [22:31:08< 1:59:29] +[titan] 2025-10-05 21:05:30,640 - root - INFO - step: 36755 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 21:05:30,640 - root - INFO - lr: 5.7414e-06 gnorm: 1.23 [22:31:18< 1:59:18] +[titan] 2025-10-05 21:05:41,514 - root - INFO - step: 36760 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:05:41,514 - root - INFO - lr: 5.7392e-06 gnorm: 1.21 [22:31:29< 1:59:07] +[titan] 2025-10-05 21:05:52,376 - root - INFO - step: 36765 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 21:05:52,376 - root - INFO - lr: 5.7369e-06 gnorm: 1.26 [22:31:40< 1:58:56] +[titan] 2025-10-05 21:06:03,266 - root - INFO - step: 36770 loss: 1.8668 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 21:06:03,266 - root - INFO - lr: 5.7346e-06 gnorm: 1.22 [22:31:51< 1:58:45] +[titan] 2025-10-05 21:06:14,143 - root - INFO - step: 36775 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 21:06:14,143 - root - INFO - lr: 5.7324e-06 gnorm: 1.23 [22:32:02< 1:58:34] +[titan] 2025-10-05 21:06:25,098 - root - INFO - step: 36780 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 21:06:25,098 - root - INFO - lr: 5.7301e-06 gnorm: 1.22 [22:32:13< 1:58:23] +[titan] 2025-10-05 21:06:35,961 - root - INFO - step: 36785 loss: 1.8486 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6355 +[titan] 2025-10-05 21:06:35,961 - root - INFO - lr: 5.7279e-06 gnorm: 1.26 [22:32:24< 1:58:11] +[titan] 2025-10-05 21:06:46,824 - root - INFO - step: 36790 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 21:06:46,825 - root - INFO - lr: 5.7256e-06 gnorm: 1.26 [22:32:35< 1:58:00] +[titan] 2025-10-05 21:06:57,688 - root - INFO - step: 36795 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7115 +[titan] 2025-10-05 21:06:57,688 - root - INFO - lr: 5.7234e-06 gnorm: 1.23 [22:32:46< 1:57:49] +[titan] 2025-10-05 21:07:06,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:07:08,540 - root - INFO - step: 36800 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 21:07:08,541 - root - INFO - lr: 5.7211e-06 gnorm: 1.23 [22:32:56< 1:57:38] +[titan] 2025-10-05 21:07:19,424 - root - INFO - step: 36805 loss: 1.9493 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:07:19,425 - root - INFO - lr: 5.7189e-06 gnorm: 1.24 [22:33:07< 1:57:27] +[titan] 2025-10-05 21:07:30,382 - root - INFO - step: 36810 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:07:30,383 - root - INFO - lr: 5.7166e-06 gnorm: 1.23 [22:33:18< 1:57:16] +[titan] 2025-10-05 21:07:41,263 - root - INFO - step: 36815 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 21:07:41,263 - root - INFO - lr: 5.7144e-06 gnorm: 1.24 [22:33:29< 1:57:05] +[titan] 2025-10-05 21:07:52,120 - root - INFO - step: 36820 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 21:07:52,120 - root - INFO - lr: 5.7122e-06 gnorm: 1.21 [22:33:40< 1:56:54] +[titan] 2025-10-05 21:08:02,998 - root - INFO - step: 36825 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6712 +[titan] 2025-10-05 21:08:02,999 - root - INFO - lr: 5.7100e-06 gnorm: 1.24 [22:33:51< 1:56:43] +[titan] 2025-10-05 21:08:13,877 - root - INFO - step: 36830 loss: 1.9915 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 21:08:13,878 - root - INFO - lr: 5.7077e-06 gnorm: 1.31 [22:34:02< 1:56:32] +[titan] 2025-10-05 21:08:25,107 - root - INFO - step: 36835 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 29,180 tflops: 404.83 mfu: 40.93% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6650 +[titan] 2025-10-05 21:08:25,108 - root - INFO - lr: 5.7055e-06 gnorm: 1.20 [22:34:13< 1:56:21] +[titan] 2025-10-05 21:08:35,977 - root - INFO - step: 36840 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 21:08:35,977 - root - INFO - lr: 5.7033e-06 gnorm: 1.24 [22:34:24< 1:56:10] +[titan] 2025-10-05 21:08:46,865 - root - INFO - step: 36845 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 21:08:46,865 - root - INFO - lr: 5.7011e-06 gnorm: 1.24 [22:34:35< 1:55:59] +[titan] 2025-10-05 21:08:55,537 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:08:57,718 - root - INFO - step: 36850 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 21:08:57,718 - root - INFO - lr: 5.6989e-06 gnorm: 1.26 [22:34:46< 1:55:48] +[titan] 2025-10-05 21:09:08,595 - root - INFO - step: 36855 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7308 +[titan] 2025-10-05 21:09:08,595 - root - INFO - lr: 5.6967e-06 gnorm: 1.24 [22:34:56< 1:55:37] +[titan] 2025-10-05 21:09:19,469 - root - INFO - step: 36860 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:09:19,470 - root - INFO - lr: 5.6945e-06 gnorm: 1.27 [22:35:07< 1:55:26] +[titan] 2025-10-05 21:09:28,579 - root - INFO - Dumping profiler traces at step 36864 +[titan] 2025-10-05 21:09:28,622 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:09:30,837 - root - INFO - step: 36865 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 28,827 tflops: 399.93 mfu: 40.44% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 21:09:30,837 - root - INFO - lr: 5.6923e-06 gnorm: 1.23 [22:35:19< 1:55:15] +[titan] 2025-10-05 21:09:41,699 - root - INFO - step: 36870 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 21:09:41,699 - root - INFO - lr: 5.6901e-06 gnorm: 1.24 [22:35:30< 1:55:04] +[titan] 2025-10-05 21:09:52,574 - root - INFO - step: 36875 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 21:09:52,574 - root - INFO - lr: 5.6879e-06 gnorm: 1.24 [22:35:40< 1:54:53] +[titan] 2025-10-05 21:10:03,422 - root - INFO - step: 36880 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:10:03,422 - root - INFO - lr: 5.6857e-06 gnorm: 1.21 [22:35:51< 1:54:42] +[titan] 2025-10-05 21:10:14,288 - root - INFO - step: 36885 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 21:10:14,288 - root - INFO - lr: 5.6835e-06 gnorm: 1.23 [22:36:02< 1:54:31] +[titan] 2025-10-05 21:10:25,157 - root - INFO - step: 36890 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 21:10:25,157 - root - INFO - lr: 5.6813e-06 gnorm: 1.24 [22:36:13< 1:54:20] +[titan] 2025-10-05 21:10:36,097 - root - INFO - step: 36895 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 21:10:36,097 - root - INFO - lr: 5.6792e-06 gnorm: 1.24 [22:36:24< 1:54:09] +[titan] 2025-10-05 21:10:44,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:10:46,986 - root - INFO - step: 36900 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 21:10:46,987 - root - INFO - lr: 5.6770e-06 gnorm: 1.28 [22:36:35< 1:53:58] +[titan] 2025-10-05 21:10:57,833 - root - INFO - step: 36905 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 21:10:57,833 - root - INFO - lr: 5.6748e-06 gnorm: 1.23 [22:36:46< 1:53:47] +[titan] 2025-10-05 21:11:08,682 - root - INFO - step: 36910 loss: 1.8557 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6407 +[titan] 2025-10-05 21:11:08,682 - root - INFO - lr: 5.6726e-06 gnorm: 1.19 [22:36:57< 1:53:36] +[titan] 2025-10-05 21:11:19,531 - root - INFO - step: 36915 loss: 1.8896 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:11:19,532 - root - INFO - lr: 5.6705e-06 gnorm: 1.18 [22:37:07< 1:53:24] +[titan] 2025-10-05 21:11:30,448 - root - INFO - step: 36920 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:11:30,449 - root - INFO - lr: 5.6683e-06 gnorm: 1.24 [22:37:18< 1:53:13] +[titan] 2025-10-05 21:11:41,323 - root - INFO - step: 36925 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 21:11:41,323 - root - INFO - lr: 5.6662e-06 gnorm: 1.26 [22:37:29< 1:53:02] +[titan] 2025-10-05 21:11:52,243 - root - INFO - step: 36930 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6917 +[titan] 2025-10-05 21:11:52,243 - root - INFO - lr: 5.6640e-06 gnorm: 1.28 [22:37:40< 1:52:51] +[titan] 2025-10-05 21:12:03,124 - root - INFO - step: 36935 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6806 +[titan] 2025-10-05 21:12:03,124 - root - INFO - lr: 5.6619e-06 gnorm: 1.20 [22:37:51< 1:52:40] +[titan] 2025-10-05 21:12:14,002 - root - INFO - step: 36940 loss: 1.9158 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6952 +[titan] 2025-10-05 21:12:14,002 - root - INFO - lr: 5.6597e-06 gnorm: 1.26 [22:38:02< 1:52:29] +[titan] 2025-10-05 21:12:24,869 - root - INFO - step: 36945 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 21:12:24,870 - root - INFO - lr: 5.6576e-06 gnorm: 1.21 [22:38:13< 1:52:18] +[titan] 2025-10-05 21:12:33,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:12:35,810 - root - INFO - step: 36950 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 21:12:35,810 - root - INFO - lr: 5.6554e-06 gnorm: 1.24 [22:38:24< 1:52:07] +[titan] 2025-10-05 21:12:46,684 - root - INFO - step: 36955 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7035 +[titan] 2025-10-05 21:12:46,685 - root - INFO - lr: 5.6533e-06 gnorm: 1.21 [22:38:35< 1:51:56] +[titan] 2025-10-05 21:12:57,552 - root - INFO - step: 36960 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 21:12:57,552 - root - INFO - lr: 5.6512e-06 gnorm: 1.24 [22:38:45< 1:51:45] +[titan] 2025-10-05 21:13:08,463 - root - INFO - step: 36965 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7952 +[titan] 2025-10-05 21:13:08,463 - root - INFO - lr: 5.6490e-06 gnorm: 1.24 [22:38:56< 1:51:34] +[titan] 2025-10-05 21:13:19,335 - root - INFO - step: 36970 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:19,336 - root - INFO - lr: 5.6469e-06 gnorm: 1.21 [22:39:07< 1:51:23] +[titan] 2025-10-05 21:13:30,256 - root - INFO - step: 36975 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:30,256 - root - INFO - lr: 5.6448e-06 gnorm: 1.28 [22:39:18< 1:51:12] +[titan] 2025-10-05 21:13:41,127 - root - INFO - step: 36980 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:13:41,128 - root - INFO - lr: 5.6427e-06 gnorm: 1.23 [22:39:29< 1:51:01] +[titan] 2025-10-05 21:13:51,994 - root - INFO - step: 36985 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:13:51,995 - root - INFO - lr: 5.6405e-06 gnorm: 1.29 [22:39:40< 1:50:50] +[titan] 2025-10-05 21:14:02,859 - root - INFO - step: 36990 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7044 +[titan] 2025-10-05 21:14:02,859 - root - INFO - lr: 5.6384e-06 gnorm: 1.24 [22:39:51< 1:50:39] +[titan] 2025-10-05 21:14:13,749 - root - INFO - step: 36995 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6987 +[titan] 2025-10-05 21:14:13,749 - root - INFO - lr: 5.6363e-06 gnorm: 1.22 [22:40:02< 1:50:28] +[titan] 2025-10-05 21:14:22,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:14:24,631 - root - INFO - step: 37000 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6388 +[titan] 2025-10-05 21:14:24,631 - root - INFO - lr: 5.6342e-06 gnorm: 1.20 [22:40:12< 1:50:17] +[titan] 2025-10-05 21:14:35,570 - root - INFO - step: 37005 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 21:14:35,570 - root - INFO - lr: 5.6321e-06 gnorm: 1.26 [22:40:23< 1:50:06] +[titan] 2025-10-05 21:14:46,450 - root - INFO - step: 37010 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 21:14:46,450 - root - INFO - lr: 5.6300e-06 gnorm: 1.19 [22:40:34< 1:49:55] +[titan] 2025-10-05 21:14:57,328 - root - INFO - step: 37015 loss: 1.9312 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7076 +[titan] 2025-10-05 21:14:57,329 - root - INFO - lr: 5.6279e-06 gnorm: 1.27 [22:40:45< 1:49:44] +[titan] 2025-10-05 21:15:08,191 - root - INFO - step: 37020 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 21:15:08,192 - root - INFO - lr: 5.6258e-06 gnorm: 1.28 [22:40:56< 1:49:33] +[titan] 2025-10-05 21:15:19,080 - root - INFO - step: 37025 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:15:19,080 - root - INFO - lr: 5.6237e-06 gnorm: 1.20 [22:41:07< 1:49:22] +[titan] 2025-10-05 21:15:29,953 - root - INFO - step: 37030 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 21:15:29,953 - root - INFO - lr: 5.6216e-06 gnorm: 1.21 [22:41:18< 1:49:11] +[titan] 2025-10-05 21:15:40,885 - root - INFO - step: 37035 loss: 1.8738 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6571 +[titan] 2025-10-05 21:15:40,885 - root - INFO - lr: 5.6196e-06 gnorm: 1.21 [22:41:29< 1:48:59] +[titan] 2025-10-05 21:15:51,738 - root - INFO - step: 37040 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6703 +[titan] 2025-10-05 21:15:51,738 - root - INFO - lr: 5.6175e-06 gnorm: 1.25 [22:41:40< 1:48:48] +[titan] 2025-10-05 21:16:02,623 - root - INFO - step: 37045 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:16:02,624 - root - INFO - lr: 5.6154e-06 gnorm: 1.21 [22:41:50< 1:48:37] +[titan] 2025-10-05 21:16:11,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:16:13,511 - root - INFO - step: 37050 loss: 1.9092 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 21:16:13,511 - root - INFO - lr: 5.6133e-06 gnorm: 1.23 [22:42:01< 1:48:26] +[titan] 2025-10-05 21:16:24,393 - root - INFO - step: 37055 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 21:16:24,393 - root - INFO - lr: 5.6113e-06 gnorm: 1.24 [22:42:12< 1:48:15] +[titan] 2025-10-05 21:16:35,329 - root - INFO - step: 37060 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:16:35,329 - root - INFO - lr: 5.6092e-06 gnorm: 1.25 [22:42:23< 1:48:04] +[titan] 2025-10-05 21:16:46,199 - root - INFO - step: 37065 loss: 1.9535 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:16:46,199 - root - INFO - lr: 5.6071e-06 gnorm: 1.27 [22:42:34< 1:47:53] +[titan] 2025-10-05 21:16:57,064 - root - INFO - step: 37070 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 21:16:57,064 - root - INFO - lr: 5.6051e-06 gnorm: 1.24 [22:42:45< 1:47:42] +[titan] 2025-10-05 21:17:07,940 - root - INFO - step: 37075 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7181 +[titan] 2025-10-05 21:17:07,940 - root - INFO - lr: 5.6030e-06 gnorm: 1.23 [22:42:56< 1:47:31] +[titan] 2025-10-05 21:17:18,806 - root - INFO - step: 37080 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 21:17:18,807 - root - INFO - lr: 5.6010e-06 gnorm: 1.20 [22:43:07< 1:47:20] +[titan] 2025-10-05 21:17:29,692 - root - INFO - step: 37085 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 21:17:29,692 - root - INFO - lr: 5.5989e-06 gnorm: 1.21 [22:43:18< 1:47:09] +[titan] 2025-10-05 21:17:40,647 - root - INFO - step: 37090 loss: 1.9429 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:17:40,648 - root - INFO - lr: 5.5969e-06 gnorm: 1.27 [22:43:28< 1:46:58] +[titan] 2025-10-05 21:17:51,517 - root - INFO - step: 37095 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 21:17:51,517 - root - INFO - lr: 5.5949e-06 gnorm: 1.27 [22:43:39< 1:46:47] +[titan] 2025-10-05 21:18:00,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:18:02,399 - root - INFO - step: 37100 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6612 +[titan] 2025-10-05 21:18:02,399 - root - INFO - lr: 5.5928e-06 gnorm: 1.26 [22:43:50< 1:46:36] +[titan] 2025-10-05 21:18:13,285 - root - INFO - step: 37105 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6690 +[titan] 2025-10-05 21:18:13,286 - root - INFO - lr: 5.5908e-06 gnorm: 1.20 [22:44:01< 1:46:25] +[titan] 2025-10-05 21:18:24,145 - root - INFO - step: 37110 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 21:18:24,145 - root - INFO - lr: 5.5888e-06 gnorm: 1.25 [22:44:12< 1:46:14] +[titan] 2025-10-05 21:18:35,081 - root - INFO - step: 37115 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 21:18:35,081 - root - INFO - lr: 5.5867e-06 gnorm: 1.20 [22:44:23< 1:46:03] +[titan] 2025-10-05 21:18:45,955 - root - INFO - step: 37120 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:18:45,955 - root - INFO - lr: 5.5847e-06 gnorm: 1.24 [22:44:34< 1:45:52] +[titan] 2025-10-05 21:18:56,847 - root - INFO - step: 37125 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7220 +[titan] 2025-10-05 21:18:56,848 - root - INFO - lr: 5.5827e-06 gnorm: 1.27 [22:44:45< 1:45:41] +[titan] 2025-10-05 21:19:07,708 - root - INFO - step: 37130 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 21:19:07,709 - root - INFO - lr: 5.5807e-06 gnorm: 1.20 [22:44:56< 1:45:30] +[titan] 2025-10-05 21:19:18,570 - root - INFO - step: 37135 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 21:19:18,570 - root - INFO - lr: 5.5787e-06 gnorm: 1.23 [22:45:06< 1:45:19] +[titan] 2025-10-05 21:19:29,432 - root - INFO - step: 37140 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 21:19:29,432 - root - INFO - lr: 5.5766e-06 gnorm: 1.25 [22:45:17< 1:45:08] +[titan] 2025-10-05 21:19:40,328 - root - INFO - step: 37145 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 21:19:40,328 - root - INFO - lr: 5.5746e-06 gnorm: 1.28 [22:45:28< 1:44:57] +[titan] 2025-10-05 21:19:48,999 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:19:51,178 - root - INFO - step: 37150 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 21:19:51,178 - root - INFO - lr: 5.5726e-06 gnorm: 1.28 [22:45:39< 1:44:46] +[titan] 2025-10-05 21:20:02,074 - root - INFO - step: 37155 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:20:02,074 - root - INFO - lr: 5.5706e-06 gnorm: 1.22 [22:45:50< 1:44:35] +[titan] 2025-10-05 21:20:12,936 - root - INFO - step: 37160 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 21:20:12,936 - root - INFO - lr: 5.5686e-06 gnorm: 1.25 [22:46:01< 1:44:23] +[titan] 2025-10-05 21:20:23,793 - root - INFO - step: 37165 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 21:20:23,793 - root - INFO - lr: 5.5666e-06 gnorm: 1.26 [22:46:12< 1:44:12] +[titan] 2025-10-05 21:20:34,674 - root - INFO - step: 37170 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:20:34,674 - root - INFO - lr: 5.5647e-06 gnorm: 1.21 [22:46:22< 1:44:01] +[titan] 2025-10-05 21:20:45,596 - root - INFO - step: 37175 loss: 1.9773 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 21:20:45,596 - root - INFO - lr: 5.5627e-06 gnorm: 1.27 [22:46:33< 1:43:50] +[titan] 2025-10-05 21:20:56,483 - root - INFO - step: 37180 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:20:56,484 - root - INFO - lr: 5.5607e-06 gnorm: 1.26 [22:46:44< 1:43:39] +[titan] 2025-10-05 21:21:07,391 - root - INFO - step: 37185 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:21:07,392 - root - INFO - lr: 5.5587e-06 gnorm: 1.24 [22:46:55< 1:43:28] +[titan] 2025-10-05 21:21:18,272 - root - INFO - step: 37190 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 21:21:18,272 - root - INFO - lr: 5.5567e-06 gnorm: 1.22 [22:47:06< 1:43:17] +[titan] 2025-10-05 21:21:29,155 - root - INFO - step: 37195 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:21:29,155 - root - INFO - lr: 5.5548e-06 gnorm: 1.25 [22:47:17< 1:43:06] +[titan] 2025-10-05 21:21:37,896 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:21:40,076 - root - INFO - step: 37200 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:21:40,077 - root - INFO - lr: 5.5528e-06 gnorm: 1.22 [22:47:28< 1:42:55] +[titan] 2025-10-05 21:21:50,943 - root - INFO - step: 37205 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 21:21:50,944 - root - INFO - lr: 5.5508e-06 gnorm: 1.21 [22:47:39< 1:42:44] +[titan] 2025-10-05 21:22:01,837 - root - INFO - step: 37210 loss: 1.9065 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 21:22:01,837 - root - INFO - lr: 5.5489e-06 gnorm: 1.21 [22:47:50< 1:42:33] +[titan] 2025-10-05 21:22:12,716 - root - INFO - step: 37215 loss: 1.8559 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6413 +[titan] 2025-10-05 21:22:12,716 - root - INFO - lr: 5.5469e-06 gnorm: 1.23 [22:48:01< 1:42:22] +[titan] 2025-10-05 21:22:23,615 - root - INFO - step: 37220 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 21:22:23,615 - root - INFO - lr: 5.5450e-06 gnorm: 1.24 [22:48:11< 1:42:11] +[titan] 2025-10-05 21:22:34,482 - root - INFO - step: 37225 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 21:22:34,483 - root - INFO - lr: 5.5430e-06 gnorm: 1.26 [22:48:22< 1:42:00] +[titan] 2025-10-05 21:22:45,400 - root - INFO - step: 37230 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 21:22:45,400 - root - INFO - lr: 5.5411e-06 gnorm: 1.23 [22:48:33< 1:41:49] +[titan] 2025-10-05 21:22:56,271 - root - INFO - step: 37235 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 21:22:56,272 - root - INFO - lr: 5.5391e-06 gnorm: 1.23 [22:48:44< 1:41:38] +[titan] 2025-10-05 21:23:07,114 - root - INFO - step: 37240 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:23:07,114 - root - INFO - lr: 5.5372e-06 gnorm: 1.23 [22:48:55< 1:41:27] +[titan] 2025-10-05 21:23:17,969 - root - INFO - step: 37245 loss: 1.8827 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 21:23:17,970 - root - INFO - lr: 5.5352e-06 gnorm: 1.23 [22:49:06< 1:41:16] +[titan] 2025-10-05 21:23:26,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:23:28,858 - root - INFO - step: 37250 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6816 +[titan] 2025-10-05 21:23:28,858 - root - INFO - lr: 5.5333e-06 gnorm: 1.21 [22:49:17< 1:41:05] +[titan] 2025-10-05 21:23:39,774 - root - INFO - step: 37255 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 21:23:39,774 - root - INFO - lr: 5.5314e-06 gnorm: 1.21 [22:49:28< 1:40:54] +[titan] 2025-10-05 21:23:50,632 - root - INFO - step: 37260 loss: 1.8928 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 21:23:50,632 - root - INFO - lr: 5.5295e-06 gnorm: 1.20 [22:49:38< 1:40:43] +[titan] 2025-10-05 21:24:01,494 - root - INFO - step: 37265 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 21:24:01,495 - root - INFO - lr: 5.5275e-06 gnorm: 1.21 [22:49:49< 1:40:32] +[titan] 2025-10-05 21:24:12,333 - root - INFO - step: 37270 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 21:24:12,333 - root - INFO - lr: 5.5256e-06 gnorm: 1.24 [22:50:00< 1:40:21] +[titan] 2025-10-05 21:24:23,189 - root - INFO - step: 37275 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 21:24:23,189 - root - INFO - lr: 5.5237e-06 gnorm: 1.22 [22:50:11< 1:40:10] +[titan] 2025-10-05 21:24:34,040 - root - INFO - step: 37280 loss: 1.8747 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 21:24:34,041 - root - INFO - lr: 5.5218e-06 gnorm: 1.19 [22:50:22< 1:39:59] +[titan] 2025-10-05 21:24:44,965 - root - INFO - step: 37285 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6500 +[titan] 2025-10-05 21:24:44,965 - root - INFO - lr: 5.5199e-06 gnorm: 1.23 [22:50:33< 1:39:48] +[titan] 2025-10-05 21:24:55,829 - root - INFO - step: 37290 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6803 +[titan] 2025-10-05 21:24:55,829 - root - INFO - lr: 5.5180e-06 gnorm: 1.24 [22:50:44< 1:39:36] +[titan] 2025-10-05 21:25:06,686 - root - INFO - step: 37295 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 21:25:06,686 - root - INFO - lr: 5.5161e-06 gnorm: 1.23 [22:50:54< 1:39:25] +[titan] 2025-10-05 21:25:15,356 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:25:17,530 - root - INFO - step: 37300 loss: 1.9230 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 21:25:17,530 - root - INFO - lr: 5.5142e-06 gnorm: 1.29 [22:51:05< 1:39:14] +[titan] 2025-10-05 21:25:28,378 - root - INFO - step: 37305 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 21:25:28,378 - root - INFO - lr: 5.5123e-06 gnorm: 1.28 [22:51:16< 1:39:03] +[titan] 2025-10-05 21:25:39,211 - root - INFO - step: 37310 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 21:25:39,211 - root - INFO - lr: 5.5104e-06 gnorm: 1.28 [22:51:27< 1:38:52] +[titan] 2025-10-05 21:25:50,153 - root - INFO - step: 37315 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 21:25:50,153 - root - INFO - lr: 5.5085e-06 gnorm: 1.24 [22:51:38< 1:38:41] +[titan] 2025-10-05 21:26:01,007 - root - INFO - step: 37320 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 21:26:01,007 - root - INFO - lr: 5.5066e-06 gnorm: 1.22 [22:51:49< 1:38:30] +[titan] 2025-10-05 21:26:11,849 - root - INFO - step: 37325 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7401 +[titan] 2025-10-05 21:26:11,849 - root - INFO - lr: 5.5047e-06 gnorm: 1.28 [22:52:00< 1:38:19] +[titan] 2025-10-05 21:26:22,692 - root - INFO - step: 37330 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 21:26:22,692 - root - INFO - lr: 5.5028e-06 gnorm: 1.20 [22:52:10< 1:38:08] +[titan] 2025-10-05 21:26:33,566 - root - INFO - step: 37335 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 21:26:33,566 - root - INFO - lr: 5.5010e-06 gnorm: 1.21 [22:52:21< 1:37:57] +[titan] 2025-10-05 21:26:44,447 - root - INFO - step: 37340 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 21:26:44,447 - root - INFO - lr: 5.4991e-06 gnorm: 1.30 [22:52:32< 1:37:46] +[titan] 2025-10-05 21:26:55,353 - root - INFO - step: 37345 loss: 1.8670 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6522 +[titan] 2025-10-05 21:26:55,354 - root - INFO - lr: 5.4972e-06 gnorm: 1.19 [22:52:43< 1:37:35] +[titan] 2025-10-05 21:27:04,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:27:06,210 - root - INFO - step: 37350 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 21:27:06,210 - root - INFO - lr: 5.4954e-06 gnorm: 1.23 [22:52:54< 1:37:24] +[titan] 2025-10-05 21:27:17,097 - root - INFO - step: 37355 loss: 1.8844 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 21:27:17,097 - root - INFO - lr: 5.4935e-06 gnorm: 1.22 [22:53:05< 1:37:13] +[titan] 2025-10-05 21:27:27,968 - root - INFO - step: 37360 loss: 1.8981 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:27:27,968 - root - INFO - lr: 5.4917e-06 gnorm: 1.24 [22:53:16< 1:37:02] +[titan] 2025-10-05 21:27:38,788 - root - INFO - step: 37365 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 21:27:38,788 - root - INFO - lr: 5.4898e-06 gnorm: 1.22 [22:53:27< 1:36:51] +[titan] 2025-10-05 21:27:49,689 - root - INFO - step: 37370 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 21:27:49,689 - root - INFO - lr: 5.4880e-06 gnorm: 1.26 [22:53:37< 1:36:40] +[titan] 2025-10-05 21:28:00,629 - root - INFO - step: 37375 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 21:28:00,630 - root - INFO - lr: 5.4861e-06 gnorm: 1.28 [22:53:48< 1:36:29] +[titan] 2025-10-05 21:28:03,008 - root - INFO - Dumping profiler traces at step 37376 +[titan] 2025-10-05 21:28:03,048 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:28:11,751 - root - INFO - step: 37380 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 29,465 tflops: 408.78 mfu: 41.33% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 21:28:11,751 - root - INFO - lr: 5.4843e-06 gnorm: 1.26 [22:54:00< 1:36:18] +[titan] 2025-10-05 21:28:22,605 - root - INFO - step: 37385 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 21:28:22,605 - root - INFO - lr: 5.4824e-06 gnorm: 1.23 [22:54:10< 1:36:07] +[titan] 2025-10-05 21:28:33,443 - root - INFO - step: 37390 loss: 1.8450 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6318 +[titan] 2025-10-05 21:28:33,443 - root - INFO - lr: 5.4806e-06 gnorm: 1.24 [22:54:21< 1:35:56] +[titan] 2025-10-05 21:28:44,325 - root - INFO - step: 37395 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:28:44,326 - root - INFO - lr: 5.4788e-06 gnorm: 1.22 [22:54:32< 1:35:45] +[titan] 2025-10-05 21:28:52,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:28:55,171 - root - INFO - step: 37400 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:28:55,171 - root - INFO - lr: 5.4769e-06 gnorm: 1.21 [22:54:43< 1:35:34] +[titan] 2025-10-05 21:29:06,006 - root - INFO - step: 37405 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 21:29:06,006 - root - INFO - lr: 5.4751e-06 gnorm: 1.21 [22:54:54< 1:35:23] +[titan] 2025-10-05 21:29:16,874 - root - INFO - step: 37410 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 21:29:16,874 - root - INFO - lr: 5.4733e-06 gnorm: 1.21 [22:55:05< 1:35:12] +[titan] 2025-10-05 21:29:27,686 - root - INFO - step: 37415 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 21:29:27,686 - root - INFO - lr: 5.4715e-06 gnorm: 1.19 [22:55:15< 1:35:01] +[titan] 2025-10-05 21:29:38,526 - root - INFO - step: 37420 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:29:38,526 - root - INFO - lr: 5.4696e-06 gnorm: 1.22 [22:55:26< 1:34:49] +[titan] 2025-10-05 21:29:49,408 - root - INFO - step: 37425 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 21:29:49,408 - root - INFO - lr: 5.4678e-06 gnorm: 1.25 [22:55:37< 1:34:38] +[titan] 2025-10-05 21:30:00,250 - root - INFO - step: 37430 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 21:30:00,250 - root - INFO - lr: 5.4660e-06 gnorm: 1.23 [22:55:48< 1:34:27] +[titan] 2025-10-05 21:30:11,084 - root - INFO - step: 37435 loss: 1.9022 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:30:11,084 - root - INFO - lr: 5.4642e-06 gnorm: 1.27 [22:55:59< 1:34:16] +[titan] 2025-10-05 21:30:21,909 - root - INFO - step: 37440 loss: 1.9502 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:30:21,909 - root - INFO - lr: 5.4624e-06 gnorm: 1.24 [22:56:10< 1:34:05] +[titan] 2025-10-05 21:30:32,791 - root - INFO - step: 37445 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7483 +[titan] 2025-10-05 21:30:32,792 - root - INFO - lr: 5.4606e-06 gnorm: 1.30 [22:56:21< 1:33:54] +[titan] 2025-10-05 21:30:41,432 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:30:43,606 - root - INFO - step: 37450 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.37 mfu: 42.50% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 21:30:43,606 - root - INFO - lr: 5.4588e-06 gnorm: 1.25 [22:56:31< 1:33:43] +[titan] 2025-10-05 21:30:54,447 - root - INFO - step: 37455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 21:30:54,447 - root - INFO - lr: 5.4570e-06 gnorm: 1.27 [22:56:42< 1:33:32] +[titan] 2025-10-05 21:31:05,288 - root - INFO - step: 37460 loss: 1.8916 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:31:05,288 - root - INFO - lr: 5.4552e-06 gnorm: 1.22 [22:56:53< 1:33:21] +[titan] 2025-10-05 21:31:16,146 - root - INFO - step: 37465 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 21:31:16,146 - root - INFO - lr: 5.4535e-06 gnorm: 1.26 [22:57:04< 1:33:10] +[titan] 2025-10-05 21:31:26,988 - root - INFO - step: 37470 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 21:31:26,988 - root - INFO - lr: 5.4517e-06 gnorm: 1.26 [22:57:15< 1:32:59] +[titan] 2025-10-05 21:31:37,863 - root - INFO - step: 37475 loss: 1.8457 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2133 global_avg_mtp_loss: 1.6324 +[titan] 2025-10-05 21:31:37,863 - root - INFO - lr: 5.4499e-06 gnorm: 1.20 [22:57:26< 1:32:48] +[titan] 2025-10-05 21:31:48,716 - root - INFO - step: 37480 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6511 +[titan] 2025-10-05 21:31:48,716 - root - INFO - lr: 5.4481e-06 gnorm: 1.22 [22:57:36< 1:32:37] +[titan] 2025-10-05 21:31:59,576 - root - INFO - step: 37485 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6883 +[titan] 2025-10-05 21:31:59,577 - root - INFO - lr: 5.4463e-06 gnorm: 1.26 [22:57:47< 1:32:26] +[titan] 2025-10-05 21:32:10,434 - root - INFO - step: 37490 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 21:32:10,434 - root - INFO - lr: 5.4446e-06 gnorm: 1.24 [22:57:58< 1:32:15] +[titan] 2025-10-05 21:32:21,290 - root - INFO - step: 37495 loss: 1.9993 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 21:32:21,290 - root - INFO - lr: 5.4428e-06 gnorm: 1.24 [22:58:09< 1:32:04] +[titan] 2025-10-05 21:32:29,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:32:32,151 - root - INFO - step: 37500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 21:32:32,151 - root - INFO - lr: 5.4411e-06 gnorm: 1.29 [22:58:20< 1:31:53] +[titan] 2025-10-05 21:32:43,013 - root - INFO - step: 37505 loss: 1.8923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:32:43,014 - root - INFO - lr: 5.4393e-06 gnorm: 1.21 [22:58:31< 1:31:42] +[titan] 2025-10-05 21:32:53,854 - root - INFO - step: 37510 loss: 1.9490 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7237 +[titan] 2025-10-05 21:32:53,854 - root - INFO - lr: 5.4375e-06 gnorm: 1.21 [22:58:42< 1:31:31] +[titan] 2025-10-05 21:33:04,724 - root - INFO - step: 37515 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 21:33:04,724 - root - INFO - lr: 5.4358e-06 gnorm: 1.24 [22:58:52< 1:31:20] +[titan] 2025-10-05 21:33:15,605 - root - INFO - step: 37520 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:33:15,605 - root - INFO - lr: 5.4341e-06 gnorm: 1.22 [22:59:03< 1:31:09] +[titan] 2025-10-05 21:33:26,465 - root - INFO - step: 37525 loss: 1.8732 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6563 +[titan] 2025-10-05 21:33:26,465 - root - INFO - lr: 5.4323e-06 gnorm: 1.23 [22:59:14< 1:30:58] +[titan] 2025-10-05 21:33:37,315 - root - INFO - step: 37530 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 21:33:37,315 - root - INFO - lr: 5.4306e-06 gnorm: 1.23 [22:59:25< 1:30:47] +[titan] 2025-10-05 21:33:48,179 - root - INFO - step: 37535 loss: 1.8524 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6376 +[titan] 2025-10-05 21:33:48,179 - root - INFO - lr: 5.4288e-06 gnorm: 1.25 [22:59:36< 1:30:36] +[titan] 2025-10-05 21:33:59,032 - root - INFO - step: 37540 loss: 1.8890 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 21:33:59,032 - root - INFO - lr: 5.4271e-06 gnorm: 1.22 [22:59:47< 1:30:25] +[titan] 2025-10-05 21:34:09,894 - root - INFO - step: 37545 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:34:09,894 - root - INFO - lr: 5.4254e-06 gnorm: 1.24 [22:59:58< 1:30:14] +[titan] 2025-10-05 21:34:18,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:34:20,750 - root - INFO - step: 37550 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6861 +[titan] 2025-10-05 21:34:20,750 - root - INFO - lr: 5.4236e-06 gnorm: 1.24 [23:00:09< 1:30:02] +[titan] 2025-10-05 21:34:31,630 - root - INFO - step: 37555 loss: 1.9520 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 21:34:31,630 - root - INFO - lr: 5.4219e-06 gnorm: 1.21 [23:00:19< 1:29:51] +[titan] 2025-10-05 21:34:42,476 - root - INFO - step: 37560 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7291 +[titan] 2025-10-05 21:34:42,476 - root - INFO - lr: 5.4202e-06 gnorm: 1.23 [23:00:30< 1:29:40] +[titan] 2025-10-05 21:34:53,333 - root - INFO - step: 37565 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 21:34:53,333 - root - INFO - lr: 5.4185e-06 gnorm: 1.26 [23:00:41< 1:29:29] +[titan] 2025-10-05 21:35:04,184 - root - INFO - step: 37570 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 21:35:04,184 - root - INFO - lr: 5.4168e-06 gnorm: 1.30 [23:00:52< 1:29:18] +[titan] 2025-10-05 21:35:15,037 - root - INFO - step: 37575 loss: 1.8778 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6614 +[titan] 2025-10-05 21:35:15,037 - root - INFO - lr: 5.4151e-06 gnorm: 1.21 [23:01:03< 1:29:07] +[titan] 2025-10-05 21:35:25,912 - root - INFO - step: 37580 loss: 1.8864 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6681 +[titan] 2025-10-05 21:35:25,913 - root - INFO - lr: 5.4134e-06 gnorm: 1.23 [23:01:14< 1:28:56] +[titan] 2025-10-05 21:35:36,806 - root - INFO - step: 37585 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 21:35:36,807 - root - INFO - lr: 5.4117e-06 gnorm: 1.25 [23:01:25< 1:28:45] +[titan] 2025-10-05 21:35:47,715 - root - INFO - step: 37590 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 21:35:47,716 - root - INFO - lr: 5.4100e-06 gnorm: 1.22 [23:01:35< 1:28:34] +[titan] 2025-10-05 21:35:58,598 - root - INFO - step: 37595 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:35:58,598 - root - INFO - lr: 5.4083e-06 gnorm: 1.20 [23:01:46< 1:28:23] +[titan] 2025-10-05 21:36:07,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:36:09,461 - root - INFO - step: 37600 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7104 +[titan] 2025-10-05 21:36:09,461 - root - INFO - lr: 5.4066e-06 gnorm: 1.22 [23:01:57< 1:28:12] +[titan] 2025-10-05 21:36:20,345 - root - INFO - step: 37605 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 21:36:20,345 - root - INFO - lr: 5.4049e-06 gnorm: 1.28 [23:02:08< 1:28:01] +[titan] 2025-10-05 21:36:31,206 - root - INFO - step: 37610 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 21:36:31,206 - root - INFO - lr: 5.4032e-06 gnorm: 1.20 [23:02:19< 1:27:50] +[titan] 2025-10-05 21:36:42,084 - root - INFO - step: 37615 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:36:42,084 - root - INFO - lr: 5.4015e-06 gnorm: 1.28 [23:02:30< 1:27:39] +[titan] 2025-10-05 21:36:52,956 - root - INFO - step: 37620 loss: 2.0281 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 21:36:52,956 - root - INFO - lr: 5.3999e-06 gnorm: 1.25 [23:02:41< 1:27:28] +[titan] 2025-10-05 21:37:03,800 - root - INFO - step: 37625 loss: 1.8956 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6758 +[titan] 2025-10-05 21:37:03,800 - root - INFO - lr: 5.3982e-06 gnorm: 1.23 [23:02:52< 1:27:17] +[titan] 2025-10-05 21:37:14,648 - root - INFO - step: 37630 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 21:37:14,649 - root - INFO - lr: 5.3965e-06 gnorm: 1.33 [23:03:02< 1:27:06] +[titan] 2025-10-05 21:37:25,497 - root - INFO - step: 37635 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 21:37:25,497 - root - INFO - lr: 5.3948e-06 gnorm: 1.24 [23:03:13< 1:26:55] +[titan] 2025-10-05 21:37:36,353 - root - INFO - step: 37640 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 21:37:36,353 - root - INFO - lr: 5.3932e-06 gnorm: 1.22 [23:03:24< 1:26:44] +[titan] 2025-10-05 21:37:47,208 - root - INFO - step: 37645 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7232 +[titan] 2025-10-05 21:37:47,208 - root - INFO - lr: 5.3915e-06 gnorm: 1.27 [23:03:35< 1:26:33] +[titan] 2025-10-05 21:37:55,906 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:37:58,091 - root - INFO - step: 37650 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6691 +[titan] 2025-10-05 21:37:58,091 - root - INFO - lr: 5.3899e-06 gnorm: 1.23 [23:03:46< 1:26:22] +[titan] 2025-10-05 21:38:08,977 - root - INFO - step: 37655 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 21:38:08,977 - root - INFO - lr: 5.3882e-06 gnorm: 1.28 [23:03:57< 1:26:11] +[titan] 2025-10-05 21:38:19,857 - root - INFO - step: 37660 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 21:38:19,858 - root - INFO - lr: 5.3866e-06 gnorm: 1.29 [23:04:08< 1:26:00] +[titan] 2025-10-05 21:38:30,712 - root - INFO - step: 37665 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6720 +[titan] 2025-10-05 21:38:30,712 - root - INFO - lr: 5.3849e-06 gnorm: 1.23 [23:04:18< 1:25:49] +[titan] 2025-10-05 21:38:41,564 - root - INFO - step: 37670 loss: 1.8372 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6256 +[titan] 2025-10-05 21:38:41,564 - root - INFO - lr: 5.3833e-06 gnorm: 1.21 [23:04:29< 1:25:38] +[titan] 2025-10-05 21:38:52,429 - root - INFO - step: 37675 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 21:38:52,429 - root - INFO - lr: 5.3816e-06 gnorm: 1.26 [23:04:40< 1:25:27] +[titan] 2025-10-05 21:39:03,314 - root - INFO - step: 37680 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 21:39:03,314 - root - INFO - lr: 5.3800e-06 gnorm: 1.28 [23:04:51< 1:25:16] +[titan] 2025-10-05 21:39:14,212 - root - INFO - step: 37685 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 21:39:14,212 - root - INFO - lr: 5.3784e-06 gnorm: 1.22 [23:05:02< 1:25:05] +[titan] 2025-10-05 21:39:25,089 - root - INFO - step: 37690 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 21:39:25,089 - root - INFO - lr: 5.3767e-06 gnorm: 1.24 [23:05:13< 1:24:53] +[titan] 2025-10-05 21:39:35,965 - root - INFO - step: 37695 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 21:39:35,965 - root - INFO - lr: 5.3751e-06 gnorm: 1.26 [23:05:24< 1:24:42] +[titan] 2025-10-05 21:39:44,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:39:46,832 - root - INFO - step: 37700 loss: 1.8803 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:39:46,832 - root - INFO - lr: 5.3735e-06 gnorm: 1.24 [23:05:35< 1:24:31] +[titan] 2025-10-05 21:39:57,708 - root - INFO - step: 37705 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:39:57,708 - root - INFO - lr: 5.3719e-06 gnorm: 1.24 [23:05:45< 1:24:20] +[titan] 2025-10-05 21:40:08,584 - root - INFO - step: 37710 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6486 +[titan] 2025-10-05 21:40:08,584 - root - INFO - lr: 5.3703e-06 gnorm: 1.23 [23:05:56< 1:24:09] +[titan] 2025-10-05 21:40:19,491 - root - INFO - step: 37715 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 21:40:19,491 - root - INFO - lr: 5.3687e-06 gnorm: 1.24 [23:06:07< 1:23:58] +[titan] 2025-10-05 21:40:30,374 - root - INFO - step: 37720 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 21:40:30,375 - root - INFO - lr: 5.3671e-06 gnorm: 1.21 [23:06:18< 1:23:47] +[titan] 2025-10-05 21:40:41,250 - root - INFO - step: 37725 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 21:40:41,250 - root - INFO - lr: 5.3654e-06 gnorm: 1.21 [23:06:29< 1:23:36] +[titan] 2025-10-05 21:40:52,074 - root - INFO - step: 37730 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 21:40:52,075 - root - INFO - lr: 5.3638e-06 gnorm: 1.23 [23:06:40< 1:23:25] +[titan] 2025-10-05 21:41:02,927 - root - INFO - step: 37735 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 21:41:02,928 - root - INFO - lr: 5.3622e-06 gnorm: 1.24 [23:06:51< 1:23:14] +[titan] 2025-10-05 21:41:13,783 - root - INFO - step: 37740 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 21:41:13,783 - root - INFO - lr: 5.3607e-06 gnorm: 1.24 [23:07:02< 1:23:03] +[titan] 2025-10-05 21:41:24,647 - root - INFO - step: 37745 loss: 1.8905 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 21:41:24,647 - root - INFO - lr: 5.3591e-06 gnorm: 1.24 [23:07:12< 1:22:52] +[titan] 2025-10-05 21:41:33,361 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:41:35,546 - root - INFO - step: 37750 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 21:41:35,546 - root - INFO - lr: 5.3575e-06 gnorm: 1.24 [23:07:23< 1:22:41] +[titan] 2025-10-05 21:41:46,407 - root - INFO - step: 37755 loss: 1.8127 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6029 +[titan] 2025-10-05 21:41:46,407 - root - INFO - lr: 5.3559e-06 gnorm: 1.23 [23:07:34< 1:22:30] +[titan] 2025-10-05 21:41:57,261 - root - INFO - step: 37760 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 21:41:57,261 - root - INFO - lr: 5.3543e-06 gnorm: 1.27 [23:07:45< 1:22:19] +[titan] 2025-10-05 21:42:08,104 - root - INFO - step: 37765 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6996 +[titan] 2025-10-05 21:42:08,104 - root - INFO - lr: 5.3527e-06 gnorm: 1.22 [23:07:56< 1:22:08] +[titan] 2025-10-05 21:42:18,953 - root - INFO - step: 37770 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:42:18,954 - root - INFO - lr: 5.3512e-06 gnorm: 1.28 [23:08:07< 1:21:57] +[titan] 2025-10-05 21:42:29,811 - root - INFO - step: 37775 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 21:42:29,811 - root - INFO - lr: 5.3496e-06 gnorm: 1.30 [23:08:18< 1:21:46] +[titan] 2025-10-05 21:42:40,701 - root - INFO - step: 37780 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 21:42:40,701 - root - INFO - lr: 5.3480e-06 gnorm: 1.24 [23:08:28< 1:21:35] +[titan] 2025-10-05 21:42:51,568 - root - INFO - step: 37785 loss: 1.8503 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 21:42:51,568 - root - INFO - lr: 5.3465e-06 gnorm: 1.25 [23:08:39< 1:21:24] +[titan] 2025-10-05 21:43:02,441 - root - INFO - step: 37790 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 21:43:02,441 - root - INFO - lr: 5.3449e-06 gnorm: 1.29 [23:08:50< 1:21:13] +[titan] 2025-10-05 21:43:13,297 - root - INFO - step: 37795 loss: 1.9468 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7219 +[titan] 2025-10-05 21:43:13,297 - root - INFO - lr: 5.3434e-06 gnorm: 1.25 [23:09:01< 1:21:02] +[titan] 2025-10-05 21:43:21,968 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:43:24,171 - root - INFO - step: 37800 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 21:43:24,171 - root - INFO - lr: 5.3418e-06 gnorm: 1.23 [23:09:12< 1:20:51] +[titan] 2025-10-05 21:43:35,037 - root - INFO - step: 37805 loss: 1.9248 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 21:43:35,037 - root - INFO - lr: 5.3403e-06 gnorm: 1.25 [23:09:23< 1:20:40] +[titan] 2025-10-05 21:43:45,919 - root - INFO - step: 37810 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:43:45,919 - root - INFO - lr: 5.3387e-06 gnorm: 1.21 [23:09:34< 1:20:29] +[titan] 2025-10-05 21:43:56,805 - root - INFO - step: 37815 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 21:43:56,805 - root - INFO - lr: 5.3372e-06 gnorm: 1.27 [23:09:45< 1:20:18] +[titan] 2025-10-05 21:44:07,687 - root - INFO - step: 37820 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 21:44:07,687 - root - INFO - lr: 5.3356e-06 gnorm: 1.30 [23:09:55< 1:20:07] +[titan] 2025-10-05 21:44:18,545 - root - INFO - step: 37825 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:44:18,545 - root - INFO - lr: 5.3341e-06 gnorm: 1.27 [23:10:06< 1:19:56] +[titan] 2025-10-05 21:44:29,413 - root - INFO - step: 37830 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 21:44:29,413 - root - INFO - lr: 5.3326e-06 gnorm: 1.21 [23:10:17< 1:19:44] +[titan] 2025-10-05 21:44:40,283 - root - INFO - step: 37835 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 21:44:40,283 - root - INFO - lr: 5.3310e-06 gnorm: 1.22 [23:10:28< 1:19:33] +[titan] 2025-10-05 21:44:51,148 - root - INFO - step: 37840 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 21:44:51,149 - root - INFO - lr: 5.3295e-06 gnorm: 1.21 [23:10:39< 1:19:22] +[titan] 2025-10-05 21:45:02,046 - root - INFO - step: 37845 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:45:02,047 - root - INFO - lr: 5.3280e-06 gnorm: 1.25 [23:10:50< 1:19:11] +[titan] 2025-10-05 21:45:10,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:45:12,905 - root - INFO - step: 37850 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:45:12,905 - root - INFO - lr: 5.3265e-06 gnorm: 1.23 [23:11:01< 1:19:00] +[titan] 2025-10-05 21:45:23,773 - root - INFO - step: 37855 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6640 +[titan] 2025-10-05 21:45:23,773 - root - INFO - lr: 5.3250e-06 gnorm: 1.22 [23:11:12< 1:18:49] +[titan] 2025-10-05 21:45:34,638 - root - INFO - step: 37860 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7081 +[titan] 2025-10-05 21:45:34,638 - root - INFO - lr: 5.3235e-06 gnorm: 1.24 [23:11:22< 1:18:38] +[titan] 2025-10-05 21:45:45,491 - root - INFO - step: 37865 loss: 1.9514 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7254 +[titan] 2025-10-05 21:45:45,491 - root - INFO - lr: 5.3220e-06 gnorm: 1.24 [23:11:33< 1:18:27] +[titan] 2025-10-05 21:45:56,352 - root - INFO - step: 37870 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6506 +[titan] 2025-10-05 21:45:56,353 - root - INFO - lr: 5.3205e-06 gnorm: 1.21 [23:11:44< 1:18:16] +[titan] 2025-10-05 21:46:07,270 - root - INFO - step: 37875 loss: 1.9195 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 21:46:07,271 - root - INFO - lr: 5.3190e-06 gnorm: 1.24 [23:11:55< 1:18:05] +[titan] 2025-10-05 21:46:18,130 - root - INFO - step: 37880 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 21:46:18,131 - root - INFO - lr: 5.3175e-06 gnorm: 1.26 [23:12:06< 1:17:54] +[titan] 2025-10-05 21:46:29,081 - root - INFO - step: 37885 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:46:29,082 - root - INFO - lr: 5.3160e-06 gnorm: 1.22 [23:12:17< 1:17:43] +[titan] 2025-10-05 21:46:35,780 - root - INFO - Dumping profiler traces at step 37888 +[titan] 2025-10-05 21:46:35,816 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:46:40,200 - root - INFO - step: 37890 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 29,473 tflops: 408.89 mfu: 41.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 21:46:40,200 - root - INFO - lr: 5.3145e-06 gnorm: 1.28 [23:12:28< 1:17:32] +[titan] 2025-10-05 21:46:51,073 - root - INFO - step: 37895 loss: 1.9689 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 21:46:51,073 - root - INFO - lr: 5.3130e-06 gnorm: 1.23 [23:12:39< 1:17:21] +[titan] 2025-10-05 21:46:59,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:47:01,983 - root - INFO - step: 37900 loss: 1.9609 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:47:01,983 - root - INFO - lr: 5.3115e-06 gnorm: 1.24 [23:12:50< 1:17:10] +[titan] 2025-10-05 21:47:12,859 - root - INFO - step: 37905 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7007 +[titan] 2025-10-05 21:47:12,859 - root - INFO - lr: 5.3100e-06 gnorm: 1.27 [23:13:01< 1:16:59] +[titan] 2025-10-05 21:47:23,757 - root - INFO - step: 37910 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 21:47:23,757 - root - INFO - lr: 5.3086e-06 gnorm: 1.26 [23:13:11< 1:16:48] +[titan] 2025-10-05 21:47:34,635 - root - INFO - step: 37915 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6659 +[titan] 2025-10-05 21:47:34,635 - root - INFO - lr: 5.3071e-06 gnorm: 1.24 [23:13:22< 1:16:37] +[titan] 2025-10-05 21:47:45,522 - root - INFO - step: 37920 loss: 1.8835 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:47:45,522 - root - INFO - lr: 5.3056e-06 gnorm: 1.20 [23:13:33< 1:16:26] +[titan] 2025-10-05 21:47:56,386 - root - INFO - step: 37925 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 21:47:56,386 - root - INFO - lr: 5.3042e-06 gnorm: 1.28 [23:13:44< 1:16:15] +[titan] 2025-10-05 21:48:07,400 - root - INFO - step: 37930 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,753 tflops: 412.78 mfu: 41.74% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 21:48:07,400 - root - INFO - lr: 5.3027e-06 gnorm: 1.23 [23:13:55< 1:16:04] +[titan] 2025-10-05 21:48:18,249 - root - INFO - step: 37935 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 21:48:18,249 - root - INFO - lr: 5.3012e-06 gnorm: 1.24 [23:14:06< 1:15:53] +[titan] 2025-10-05 21:48:29,154 - root - INFO - step: 37940 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 21:48:29,154 - root - INFO - lr: 5.2998e-06 gnorm: 1.26 [23:14:17< 1:15:42] +[titan] 2025-10-05 21:48:40,024 - root - INFO - step: 37945 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6992 +[titan] 2025-10-05 21:48:40,024 - root - INFO - lr: 5.2983e-06 gnorm: 1.29 [23:14:28< 1:15:31] +[titan] 2025-10-05 21:48:48,689 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:48:50,876 - root - INFO - step: 37950 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:48:50,876 - root - INFO - lr: 5.2969e-06 gnorm: 1.28 [23:14:39< 1:15:20] +[titan] 2025-10-05 21:49:01,777 - root - INFO - step: 37955 loss: 1.9146 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:49:01,777 - root - INFO - lr: 5.2954e-06 gnorm: 1.23 [23:14:50< 1:15:09] +[titan] 2025-10-05 21:49:12,633 - root - INFO - step: 37960 loss: 1.9032 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:49:12,633 - root - INFO - lr: 5.2940e-06 gnorm: 1.25 [23:15:00< 1:14:58] +[titan] 2025-10-05 21:49:23,498 - root - INFO - step: 37965 loss: 1.8874 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 21:49:23,498 - root - INFO - lr: 5.2926e-06 gnorm: 1.21 [23:15:11< 1:14:47] +[titan] 2025-10-05 21:49:34,372 - root - INFO - step: 37970 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 21:49:34,372 - root - INFO - lr: 5.2911e-06 gnorm: 1.25 [23:15:22< 1:14:36] +[titan] 2025-10-05 21:49:45,244 - root - INFO - step: 37975 loss: 1.9350 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 21:49:45,244 - root - INFO - lr: 5.2897e-06 gnorm: 1.25 [23:15:33< 1:14:25] +[titan] 2025-10-05 21:49:56,122 - root - INFO - step: 37980 loss: 2.0219 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7886 +[titan] 2025-10-05 21:49:56,122 - root - INFO - lr: 5.2883e-06 gnorm: 1.31 [23:15:44< 1:14:14] +[titan] 2025-10-05 21:50:07,019 - root - INFO - step: 37985 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:50:07,019 - root - INFO - lr: 5.2869e-06 gnorm: 1.24 [23:15:55< 1:14:02] +[titan] 2025-10-05 21:50:17,884 - root - INFO - step: 37990 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 21:50:17,884 - root - INFO - lr: 5.2854e-06 gnorm: 1.22 [23:16:06< 1:13:51] +[titan] 2025-10-05 21:50:28,745 - root - INFO - step: 37995 loss: 1.8863 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 21:50:28,745 - root - INFO - lr: 5.2840e-06 gnorm: 1.21 [23:16:16< 1:13:40] +[titan] 2025-10-05 21:50:37,416 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:50:39,603 - root - INFO - step: 38000 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:50:39,603 - root - INFO - lr: 5.2826e-06 gnorm: 1.24 [23:16:27< 1:13:29] +[titan] 2025-10-05 21:50:50,499 - root - INFO - step: 38005 loss: 1.9446 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:50:50,499 - root - INFO - lr: 5.2812e-06 gnorm: 1.24 [23:16:38< 1:13:18] +[titan] 2025-10-05 21:51:01,361 - root - INFO - step: 38010 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 21:51:01,361 - root - INFO - lr: 5.2798e-06 gnorm: 1.25 [23:16:49< 1:13:07] +[titan] 2025-10-05 21:51:12,250 - root - INFO - step: 38015 loss: 1.9035 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 21:51:12,250 - root - INFO - lr: 5.2784e-06 gnorm: 1.23 [23:17:00< 1:12:56] +[titan] 2025-10-05 21:51:23,111 - root - INFO - step: 38020 loss: 1.9570 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 21:51:23,111 - root - INFO - lr: 5.2770e-06 gnorm: 1.26 [23:17:11< 1:12:45] +[titan] 2025-10-05 21:51:33,966 - root - INFO - step: 38025 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:51:33,966 - root - INFO - lr: 5.2756e-06 gnorm: 1.24 [23:17:22< 1:12:34] +[titan] 2025-10-05 21:51:44,841 - root - INFO - step: 38030 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6303 +[titan] 2025-10-05 21:51:44,841 - root - INFO - lr: 5.2742e-06 gnorm: 1.22 [23:17:33< 1:12:23] +[titan] 2025-10-05 21:51:55,747 - root - INFO - step: 38035 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 21:51:55,747 - root - INFO - lr: 5.2728e-06 gnorm: 1.23 [23:17:43< 1:12:12] +[titan] 2025-10-05 21:52:06,666 - root - INFO - step: 38040 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:52:06,666 - root - INFO - lr: 5.2714e-06 gnorm: 1.25 [23:17:54< 1:12:01] +[titan] 2025-10-05 21:52:17,555 - root - INFO - step: 38045 loss: 1.8640 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6482 +[titan] 2025-10-05 21:52:17,555 - root - INFO - lr: 5.2701e-06 gnorm: 1.25 [23:18:05< 1:11:50] +[titan] 2025-10-05 21:52:26,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:52:28,442 - root - INFO - step: 38050 loss: 1.8572 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6424 +[titan] 2025-10-05 21:52:28,442 - root - INFO - lr: 5.2687e-06 gnorm: 1.21 [23:18:16< 1:11:39] +[titan] 2025-10-05 21:52:39,324 - root - INFO - step: 38055 loss: 1.9652 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 21:52:39,325 - root - INFO - lr: 5.2673e-06 gnorm: 1.23 [23:18:27< 1:11:28] +[titan] 2025-10-05 21:52:50,189 - root - INFO - step: 38060 loss: 1.9568 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 21:52:50,189 - root - INFO - lr: 5.2659e-06 gnorm: 1.26 [23:18:38< 1:11:17] +[titan] 2025-10-05 21:53:01,061 - root - INFO - step: 38065 loss: 1.8871 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 21:53:01,061 - root - INFO - lr: 5.2646e-06 gnorm: 1.22 [23:18:49< 1:11:06] +[titan] 2025-10-05 21:53:12,017 - root - INFO - step: 38070 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 21:53:12,018 - root - INFO - lr: 5.2632e-06 gnorm: 1.21 [23:19:00< 1:10:55] +[titan] 2025-10-05 21:53:22,903 - root - INFO - step: 38075 loss: 1.8578 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6438 +[titan] 2025-10-05 21:53:22,903 - root - INFO - lr: 5.2619e-06 gnorm: 1.21 [23:19:11< 1:10:44] +[titan] 2025-10-05 21:53:33,778 - root - INFO - step: 38080 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:53:33,779 - root - INFO - lr: 5.2605e-06 gnorm: 1.25 [23:19:22< 1:10:33] +[titan] 2025-10-05 21:53:44,628 - root - INFO - step: 38085 loss: 1.9527 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:53:44,628 - root - INFO - lr: 5.2591e-06 gnorm: 1.23 [23:19:32< 1:10:22] +[titan] 2025-10-05 21:53:55,480 - root - INFO - step: 38090 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6835 +[titan] 2025-10-05 21:53:55,480 - root - INFO - lr: 5.2578e-06 gnorm: 1.21 [23:19:43< 1:10:11] +[titan] 2025-10-05 21:54:06,381 - root - INFO - step: 38095 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:54:06,381 - root - INFO - lr: 5.2565e-06 gnorm: 1.32 [23:19:54< 1:10:00] +[titan] 2025-10-05 21:54:15,086 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:54:17,264 - root - INFO - step: 38100 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:54:17,264 - root - INFO - lr: 5.2551e-06 gnorm: 1.21 [23:20:05< 1:09:49] +[titan] 2025-10-05 21:54:28,122 - root - INFO - step: 38105 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:54:28,123 - root - INFO - lr: 5.2538e-06 gnorm: 1.25 [23:20:16< 1:09:38] +[titan] 2025-10-05 21:54:38,982 - root - INFO - step: 38110 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:54:38,982 - root - INFO - lr: 5.2524e-06 gnorm: 1.30 [23:20:27< 1:09:27] +[titan] 2025-10-05 21:54:49,840 - root - INFO - step: 38115 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 21:54:49,840 - root - INFO - lr: 5.2511e-06 gnorm: 1.27 [23:20:38< 1:09:16] +[titan] 2025-10-05 21:55:00,693 - root - INFO - step: 38120 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:55:00,694 - root - INFO - lr: 5.2498e-06 gnorm: 1.22 [23:20:48< 1:09:05] +[titan] 2025-10-05 21:55:11,613 - root - INFO - step: 38125 loss: 1.8922 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:55:11,613 - root - INFO - lr: 5.2485e-06 gnorm: 1.22 [23:20:59< 1:08:54] +[titan] 2025-10-05 21:55:22,478 - root - INFO - step: 38130 loss: 1.8761 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6600 +[titan] 2025-10-05 21:55:22,478 - root - INFO - lr: 5.2471e-06 gnorm: 1.23 [23:21:10< 1:08:43] +[titan] 2025-10-05 21:55:33,363 - root - INFO - step: 38135 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7568 +[titan] 2025-10-05 21:55:33,364 - root - INFO - lr: 5.2458e-06 gnorm: 1.25 [23:21:21< 1:08:32] +[titan] 2025-10-05 21:55:44,229 - root - INFO - step: 38140 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 21:55:44,229 - root - INFO - lr: 5.2445e-06 gnorm: 1.25 [23:21:32< 1:08:20] +[titan] 2025-10-05 21:55:55,104 - root - INFO - step: 38145 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6732 +[titan] 2025-10-05 21:55:55,104 - root - INFO - lr: 5.2432e-06 gnorm: 1.23 [23:21:43< 1:08:09] +[titan] 2025-10-05 21:56:03,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:56:05,959 - root - INFO - step: 38150 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6637 +[titan] 2025-10-05 21:56:05,960 - root - INFO - lr: 5.2419e-06 gnorm: 1.28 [23:21:54< 1:07:58] +[titan] 2025-10-05 21:56:16,858 - root - INFO - step: 38155 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6893 +[titan] 2025-10-05 21:56:16,858 - root - INFO - lr: 5.2406e-06 gnorm: 1.23 [23:22:05< 1:07:47] +[titan] 2025-10-05 21:56:27,718 - root - INFO - step: 38160 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 21:56:27,719 - root - INFO - lr: 5.2393e-06 gnorm: 1.25 [23:22:15< 1:07:36] +[titan] 2025-10-05 21:56:38,596 - root - INFO - step: 38165 loss: 1.8754 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6582 +[titan] 2025-10-05 21:56:38,597 - root - INFO - lr: 5.2380e-06 gnorm: 1.20 [23:22:26< 1:07:25] +[titan] 2025-10-05 21:56:49,479 - root - INFO - step: 38170 loss: 1.9310 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 21:56:49,480 - root - INFO - lr: 5.2367e-06 gnorm: 1.22 [23:22:37< 1:07:14] +[titan] 2025-10-05 21:57:00,354 - root - INFO - step: 38175 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:57:00,354 - root - INFO - lr: 5.2354e-06 gnorm: 1.27 [23:22:48< 1:07:03] +[titan] 2025-10-05 21:57:11,246 - root - INFO - step: 38180 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 21:57:11,247 - root - INFO - lr: 5.2341e-06 gnorm: 1.25 [23:22:59< 1:06:52] +[titan] 2025-10-05 21:57:22,096 - root - INFO - step: 38185 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7581 +[titan] 2025-10-05 21:57:22,096 - root - INFO - lr: 5.2328e-06 gnorm: 1.27 [23:23:10< 1:06:41] +[titan] 2025-10-05 21:57:32,943 - root - INFO - step: 38190 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:57:32,943 - root - INFO - lr: 5.2316e-06 gnorm: 1.26 [23:23:21< 1:06:30] +[titan] 2025-10-05 21:57:43,812 - root - INFO - step: 38195 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 21:57:43,812 - root - INFO - lr: 5.2303e-06 gnorm: 1.23 [23:23:32< 1:06:19] +[titan] 2025-10-05 21:57:52,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:57:54,668 - root - INFO - step: 38200 loss: 1.9598 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7333 +[titan] 2025-10-05 21:57:54,668 - root - INFO - lr: 5.2290e-06 gnorm: 1.24 [23:23:42< 1:06:08] +[titan] 2025-10-05 21:58:05,542 - root - INFO - step: 38205 loss: 1.8481 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 21:58:05,543 - root - INFO - lr: 5.2277e-06 gnorm: 1.26 [23:23:53< 1:05:57] +[titan] 2025-10-05 21:58:16,438 - root - INFO - step: 38210 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7477 +[titan] 2025-10-05 21:58:16,438 - root - INFO - lr: 5.2265e-06 gnorm: 1.28 [23:24:04< 1:05:46] +[titan] 2025-10-05 21:58:27,285 - root - INFO - step: 38215 loss: 1.9355 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 21:58:27,285 - root - INFO - lr: 5.2252e-06 gnorm: 1.22 [23:24:15< 1:05:35] +[titan] 2025-10-05 21:58:38,133 - root - INFO - step: 38220 loss: 1.8546 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6405 +[titan] 2025-10-05 21:58:38,133 - root - INFO - lr: 5.2240e-06 gnorm: 1.23 [23:24:26< 1:05:24] +[titan] 2025-10-05 21:58:48,997 - root - INFO - step: 38225 loss: 1.8842 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6664 +[titan] 2025-10-05 21:58:48,997 - root - INFO - lr: 5.2227e-06 gnorm: 1.21 [23:24:37< 1:05:13] +[titan] 2025-10-05 21:58:59,888 - root - INFO - step: 38230 loss: 1.9848 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 21:58:59,888 - root - INFO - lr: 5.2214e-06 gnorm: 1.24 [23:24:48< 1:05:02] +[titan] 2025-10-05 21:59:10,888 - root - INFO - step: 38235 loss: 1.8777 memory: 118.84GiB(85.28%) tps: 29,791 tflops: 413.31 mfu: 41.79% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 21:59:10,888 - root - INFO - lr: 5.2202e-06 gnorm: 1.21 [23:24:59< 1:04:51] +[titan] 2025-10-05 21:59:21,732 - root - INFO - step: 38240 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6791 +[titan] 2025-10-05 21:59:21,732 - root - INFO - lr: 5.2190e-06 gnorm: 1.22 [23:25:09< 1:04:40] +[titan] 2025-10-05 21:59:32,592 - root - INFO - step: 38245 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 21:59:32,592 - root - INFO - lr: 5.2177e-06 gnorm: 1.26 [23:25:20< 1:04:29] +[titan] 2025-10-05 21:59:41,259 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:59:43,442 - root - INFO - step: 38250 loss: 1.8699 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6533 +[titan] 2025-10-05 21:59:43,442 - root - INFO - lr: 5.2165e-06 gnorm: 1.23 [23:25:31< 1:04:18] +[titan] 2025-10-05 21:59:54,302 - root - INFO - step: 38255 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:59:54,303 - root - INFO - lr: 5.2152e-06 gnorm: 1.28 [23:25:42< 1:04:07] +[titan] 2025-10-05 22:00:05,203 - root - INFO - step: 38260 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 22:00:05,203 - root - INFO - lr: 5.2140e-06 gnorm: 1.26 [23:25:53< 1:03:56] +[titan] 2025-10-05 22:00:16,075 - root - INFO - step: 38265 loss: 1.8744 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6580 +[titan] 2025-10-05 22:00:16,076 - root - INFO - lr: 5.2128e-06 gnorm: 1.25 [23:26:04< 1:03:45] +[titan] 2025-10-05 22:00:26,953 - root - INFO - step: 38270 loss: 1.9090 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:00:26,953 - root - INFO - lr: 5.2116e-06 gnorm: 1.26 [23:26:15< 1:03:34] +[titan] 2025-10-05 22:00:37,822 - root - INFO - step: 38275 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6897 +[titan] 2025-10-05 22:00:37,823 - root - INFO - lr: 5.2103e-06 gnorm: 1.27 [23:26:26< 1:03:23] +[titan] 2025-10-05 22:00:48,688 - root - INFO - step: 38280 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 22:00:48,688 - root - INFO - lr: 5.2091e-06 gnorm: 1.25 [23:26:36< 1:03:12] +[titan] 2025-10-05 22:00:59,558 - root - INFO - step: 38285 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 22:00:59,559 - root - INFO - lr: 5.2079e-06 gnorm: 1.24 [23:26:47< 1:03:01] +[titan] 2025-10-05 22:01:10,423 - root - INFO - step: 38290 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 22:01:10,423 - root - INFO - lr: 5.2067e-06 gnorm: 1.23 [23:26:58< 1:02:50] +[titan] 2025-10-05 22:01:21,364 - root - INFO - step: 38295 loss: 1.9718 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 22:01:21,364 - root - INFO - lr: 5.2055e-06 gnorm: 1.27 [23:27:09< 1:02:39] +[titan] 2025-10-05 22:01:30,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:01:32,234 - root - INFO - step: 38300 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7052 +[titan] 2025-10-05 22:01:32,234 - root - INFO - lr: 5.2043e-06 gnorm: 1.22 [23:27:20< 1:02:28] +[titan] 2025-10-05 22:01:43,099 - root - INFO - step: 38305 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 22:01:43,099 - root - INFO - lr: 5.2031e-06 gnorm: 1.24 [23:27:31< 1:02:16] +[titan] 2025-10-05 22:01:53,973 - root - INFO - step: 38310 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 22:01:53,973 - root - INFO - lr: 5.2019e-06 gnorm: 1.25 [23:27:42< 1:02:05] +[titan] 2025-10-05 22:02:04,844 - root - INFO - step: 38315 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 22:02:04,845 - root - INFO - lr: 5.2007e-06 gnorm: 1.28 [23:27:53< 1:01:54] +[titan] 2025-10-05 22:02:15,752 - root - INFO - step: 38320 loss: 1.9010 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6811 +[titan] 2025-10-05 22:02:15,752 - root - INFO - lr: 5.1995e-06 gnorm: 1.24 [23:28:03< 1:01:43] +[titan] 2025-10-05 22:02:26,644 - root - INFO - step: 38325 loss: 1.8521 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6383 +[titan] 2025-10-05 22:02:26,645 - root - INFO - lr: 5.1983e-06 gnorm: 1.27 [23:28:14< 1:01:32] +[titan] 2025-10-05 22:02:37,525 - root - INFO - step: 38330 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:02:37,525 - root - INFO - lr: 5.1972e-06 gnorm: 1.27 [23:28:25< 1:01:21] +[titan] 2025-10-05 22:02:48,403 - root - INFO - step: 38335 loss: 1.8947 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6753 +[titan] 2025-10-05 22:02:48,403 - root - INFO - lr: 5.1960e-06 gnorm: 1.31 [23:28:36< 1:01:10] +[titan] 2025-10-05 22:02:59,271 - root - INFO - step: 38340 loss: 1.8646 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 22:02:59,271 - root - INFO - lr: 5.1948e-06 gnorm: 1.22 [23:28:47< 1:00:59] +[titan] 2025-10-05 22:03:10,127 - root - INFO - step: 38345 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 22:03:10,127 - root - INFO - lr: 5.1936e-06 gnorm: 1.28 [23:28:58< 1:00:48] +[titan] 2025-10-05 22:03:18,835 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:03:21,025 - root - INFO - step: 38350 loss: 1.8758 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6588 +[titan] 2025-10-05 22:03:21,025 - root - INFO - lr: 5.1925e-06 gnorm: 1.22 [23:29:09< 1:00:37] +[titan] 2025-10-05 22:03:31,925 - root - INFO - step: 38355 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6874 +[titan] 2025-10-05 22:03:31,925 - root - INFO - lr: 5.1913e-06 gnorm: 1.20 [23:29:20< 1:00:26] +[titan] 2025-10-05 22:03:42,780 - root - INFO - step: 38360 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 22:03:42,780 - root - INFO - lr: 5.1902e-06 gnorm: 1.24 [23:29:30< 1:00:15] +[titan] 2025-10-05 22:03:53,638 - root - INFO - step: 38365 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 22:03:53,638 - root - INFO - lr: 5.1890e-06 gnorm: 1.25 [23:29:41< 1:00:04] +[titan] 2025-10-05 22:04:04,503 - root - INFO - step: 38370 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 22:04:04,503 - root - INFO - lr: 5.1878e-06 gnorm: 1.23 [23:29:52< 0:59:53] +[titan] 2025-10-05 22:04:15,408 - root - INFO - step: 38375 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 22:04:15,409 - root - INFO - lr: 5.1867e-06 gnorm: 1.24 [23:30:03< 0:59:42] +[titan] 2025-10-05 22:04:26,282 - root - INFO - step: 38380 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 22:04:26,282 - root - INFO - lr: 5.1856e-06 gnorm: 1.23 [23:30:14< 0:59:31] +[titan] 2025-10-05 22:04:37,152 - root - INFO - step: 38385 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6840 +[titan] 2025-10-05 22:04:37,153 - root - INFO - lr: 5.1844e-06 gnorm: 1.26 [23:30:25< 0:59:20] +[titan] 2025-10-05 22:04:48,030 - root - INFO - step: 38390 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:04:48,030 - root - INFO - lr: 5.1833e-06 gnorm: 1.26 [23:30:36< 0:59:09] +[titan] 2025-10-05 22:04:58,887 - root - INFO - step: 38395 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:04:58,887 - root - INFO - lr: 5.1821e-06 gnorm: 1.24 [23:30:47< 0:58:58] +[titan] 2025-10-05 22:05:07,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:05:09,829 - root - INFO - step: 38400 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 22:05:09,829 - root - INFO - lr: 5.1810e-06 gnorm: 1.25 [23:30:58< 0:58:47] +[titan] 2025-10-05 22:05:10,009 - root - INFO - Dumping profiler traces at step 38400 +[titan] 2025-10-05 22:05:10,044 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:05:20,979 - root - INFO - step: 38405 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 29,389 tflops: 407.73 mfu: 41.23% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 22:05:20,979 - root - INFO - lr: 5.1799e-06 gnorm: 1.21 [23:31:09< 0:58:36] +[titan] 2025-10-05 22:05:31,845 - root - INFO - step: 38410 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6711 +[titan] 2025-10-05 22:05:31,845 - root - INFO - lr: 5.1788e-06 gnorm: 1.25 [23:31:20< 0:58:25] +[titan] 2025-10-05 22:05:42,706 - root - INFO - step: 38415 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6821 +[titan] 2025-10-05 22:05:42,706 - root - INFO - lr: 5.1776e-06 gnorm: 1.25 [23:31:30< 0:58:14] +[titan] 2025-10-05 22:05:53,597 - root - INFO - step: 38420 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 22:05:53,598 - root - INFO - lr: 5.1765e-06 gnorm: 1.24 [23:31:41< 0:58:03] +[titan] 2025-10-05 22:06:04,473 - root - INFO - step: 38425 loss: 1.8931 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:06:04,473 - root - INFO - lr: 5.1754e-06 gnorm: 1.24 [23:31:52< 0:57:52] +[titan] 2025-10-05 22:06:15,341 - root - INFO - step: 38430 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:06:15,342 - root - INFO - lr: 5.1743e-06 gnorm: 1.28 [23:32:03< 0:57:41] +[titan] 2025-10-05 22:06:26,263 - root - INFO - step: 38435 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6607 +[titan] 2025-10-05 22:06:26,263 - root - INFO - lr: 5.1732e-06 gnorm: 1.21 [23:32:14< 0:57:30] +[titan] 2025-10-05 22:06:37,131 - root - INFO - step: 38440 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7207 +[titan] 2025-10-05 22:06:37,132 - root - INFO - lr: 5.1721e-06 gnorm: 1.24 [23:32:25< 0:57:19] +[titan] 2025-10-05 22:06:48,006 - root - INFO - step: 38445 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6765 +[titan] 2025-10-05 22:06:48,006 - root - INFO - lr: 5.1710e-06 gnorm: 1.26 [23:32:36< 0:57:08] +[titan] 2025-10-05 22:06:56,702 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:06:58,881 - root - INFO - step: 38450 loss: 1.9214 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:06:58,881 - root - INFO - lr: 5.1699e-06 gnorm: 1.25 [23:32:47< 0:56:57] +[titan] 2025-10-05 22:07:09,781 - root - INFO - step: 38455 loss: 1.9440 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:07:09,781 - root - INFO - lr: 5.1688e-06 gnorm: 1.24 [23:32:57< 0:56:46] +[titan] 2025-10-05 22:07:20,663 - root - INFO - step: 38460 loss: 1.8888 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6704 +[titan] 2025-10-05 22:07:20,663 - root - INFO - lr: 5.1677e-06 gnorm: 1.25 [23:33:08< 0:56:35] +[titan] 2025-10-05 22:07:31,515 - root - INFO - step: 38465 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:07:31,515 - root - INFO - lr: 5.1666e-06 gnorm: 1.27 [23:33:19< 0:56:24] +[titan] 2025-10-05 22:07:42,351 - root - INFO - step: 38470 loss: 1.8510 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2144 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 22:07:42,351 - root - INFO - lr: 5.1655e-06 gnorm: 1.22 [23:33:30< 0:56:13] +[titan] 2025-10-05 22:07:53,204 - root - INFO - step: 38475 loss: 1.9409 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7151 +[titan] 2025-10-05 22:07:53,204 - root - INFO - lr: 5.1645e-06 gnorm: 1.27 [23:33:41< 0:56:01] +[titan] 2025-10-05 22:08:04,067 - root - INFO - step: 38480 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 22:08:04,067 - root - INFO - lr: 5.1634e-06 gnorm: 1.21 [23:33:52< 0:55:50] +[titan] 2025-10-05 22:08:14,965 - root - INFO - step: 38485 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:08:14,965 - root - INFO - lr: 5.1623e-06 gnorm: 1.27 [23:34:03< 0:55:39] +[titan] 2025-10-05 22:08:25,908 - root - INFO - step: 38490 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:08:25,908 - root - INFO - lr: 5.1612e-06 gnorm: 1.26 [23:34:14< 0:55:28] +[titan] 2025-10-05 22:08:36,784 - root - INFO - step: 38495 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6699 +[titan] 2025-10-05 22:08:36,784 - root - INFO - lr: 5.1602e-06 gnorm: 1.32 [23:34:24< 0:55:17] +[titan] 2025-10-05 22:08:45,474 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:08:47,650 - root - INFO - step: 38500 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 22:08:47,650 - root - INFO - lr: 5.1591e-06 gnorm: 1.33 [23:34:35< 0:55:06] +[titan] 2025-10-05 22:08:58,527 - root - INFO - step: 38505 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 22:08:58,527 - root - INFO - lr: 5.1581e-06 gnorm: 1.28 [23:34:46< 0:54:55] +[titan] 2025-10-05 22:09:09,391 - root - INFO - step: 38510 loss: 1.9323 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:09:09,391 - root - INFO - lr: 5.1570e-06 gnorm: 1.28 [23:34:57< 0:54:44] +[titan] 2025-10-05 22:09:20,365 - root - INFO - step: 38515 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 29,861 tflops: 414.28 mfu: 41.89% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:09:20,365 - root - INFO - lr: 5.1560e-06 gnorm: 1.27 [23:35:08< 0:54:33] +[titan] 2025-10-05 22:09:31,218 - root - INFO - step: 38520 loss: 1.9315 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:09:31,219 - root - INFO - lr: 5.1549e-06 gnorm: 1.25 [23:35:19< 0:54:22] +[titan] 2025-10-05 22:09:42,070 - root - INFO - step: 38525 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6845 +[titan] 2025-10-05 22:09:42,070 - root - INFO - lr: 5.1539e-06 gnorm: 1.23 [23:35:30< 0:54:11] +[titan] 2025-10-05 22:09:52,922 - root - INFO - step: 38530 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 22:09:52,922 - root - INFO - lr: 5.1528e-06 gnorm: 1.26 [23:35:41< 0:54:00] +[titan] 2025-10-05 22:10:03,769 - root - INFO - step: 38535 loss: 1.9228 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:10:03,769 - root - INFO - lr: 5.1518e-06 gnorm: 1.25 [23:35:51< 0:53:49] +[titan] 2025-10-05 22:10:14,645 - root - INFO - step: 38540 loss: 1.9149 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6940 +[titan] 2025-10-05 22:10:14,645 - root - INFO - lr: 5.1508e-06 gnorm: 1.24 [23:36:02< 0:53:38] +[titan] 2025-10-05 22:10:25,531 - root - INFO - step: 38545 loss: 1.8971 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 22:10:25,532 - root - INFO - lr: 5.1497e-06 gnorm: 1.21 [23:36:13< 0:53:27] +[titan] 2025-10-05 22:10:34,232 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:10:36,410 - root - INFO - step: 38550 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 22:10:36,411 - root - INFO - lr: 5.1487e-06 gnorm: 1.26 [23:36:24< 0:53:16] +[titan] 2025-10-05 22:10:47,265 - root - INFO - step: 38555 loss: 1.9055 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 22:10:47,265 - root - INFO - lr: 5.1477e-06 gnorm: 1.25 [23:36:35< 0:53:05] +[titan] 2025-10-05 22:10:58,113 - root - INFO - step: 38560 loss: 1.8963 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 22:10:58,113 - root - INFO - lr: 5.1467e-06 gnorm: 1.28 [23:36:46< 0:52:54] +[titan] 2025-10-05 22:11:08,954 - root - INFO - step: 38565 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:11:08,954 - root - INFO - lr: 5.1456e-06 gnorm: 1.26 [23:36:57< 0:52:43] +[titan] 2025-10-05 22:11:19,804 - root - INFO - step: 38570 loss: 1.9003 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 22:11:19,805 - root - INFO - lr: 5.1446e-06 gnorm: 1.22 [23:37:07< 0:52:32] +[titan] 2025-10-05 22:11:30,699 - root - INFO - step: 38575 loss: 1.8708 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6535 +[titan] 2025-10-05 22:11:30,699 - root - INFO - lr: 5.1436e-06 gnorm: 1.25 [23:37:18< 0:52:21] +[titan] 2025-10-05 22:11:41,605 - root - INFO - step: 38580 loss: 1.9498 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 22:11:41,605 - root - INFO - lr: 5.1426e-06 gnorm: 1.26 [23:37:29< 0:52:10] +[titan] 2025-10-05 22:11:52,476 - root - INFO - step: 38585 loss: 1.8659 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2149 global_avg_mtp_loss: 1.6510 +[titan] 2025-10-05 22:11:52,476 - root - INFO - lr: 5.1416e-06 gnorm: 1.27 [23:37:40< 0:51:59] +[titan] 2025-10-05 22:12:03,366 - root - INFO - step: 38590 loss: 1.8820 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6644 +[titan] 2025-10-05 22:12:03,366 - root - INFO - lr: 5.1406e-06 gnorm: 1.30 [23:37:51< 0:51:48] +[titan] 2025-10-05 22:12:14,240 - root - INFO - step: 38595 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 22:12:14,241 - root - INFO - lr: 5.1396e-06 gnorm: 1.20 [23:38:02< 0:51:37] +[titan] 2025-10-05 22:12:22,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:12:25,159 - root - INFO - step: 38600 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6314 +[titan] 2025-10-05 22:12:25,159 - root - INFO - lr: 5.1386e-06 gnorm: 1.21 [23:38:13< 0:51:26] +[titan] 2025-10-05 22:12:36,019 - root - INFO - step: 38605 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 22:12:36,019 - root - INFO - lr: 5.1376e-06 gnorm: 1.24 [23:38:24< 0:51:15] +[titan] 2025-10-05 22:12:46,891 - root - INFO - step: 38610 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:12:46,891 - root - INFO - lr: 5.1367e-06 gnorm: 1.19 [23:38:35< 0:51:04] +[titan] 2025-10-05 22:12:57,808 - root - INFO - step: 38615 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 22:12:57,808 - root - INFO - lr: 5.1357e-06 gnorm: 1.29 [23:38:45< 0:50:53] +[titan] 2025-10-05 22:13:08,674 - root - INFO - step: 38620 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 22:13:08,674 - root - INFO - lr: 5.1347e-06 gnorm: 1.29 [23:38:56< 0:50:42] +[titan] 2025-10-05 22:13:19,537 - root - INFO - step: 38625 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 22:13:19,537 - root - INFO - lr: 5.1337e-06 gnorm: 1.26 [23:39:07< 0:50:31] +[titan] 2025-10-05 22:13:30,453 - root - INFO - step: 38630 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6688 +[titan] 2025-10-05 22:13:30,453 - root - INFO - lr: 5.1328e-06 gnorm: 1.24 [23:39:18< 0:50:20] +[titan] 2025-10-05 22:13:41,303 - root - INFO - step: 38635 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 22:13:41,303 - root - INFO - lr: 5.1318e-06 gnorm: 1.27 [23:39:29< 0:50:09] +[titan] 2025-10-05 22:13:52,138 - root - INFO - step: 38640 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 22:13:52,139 - root - INFO - lr: 5.1308e-06 gnorm: 1.25 [23:39:40< 0:49:58] +[titan] 2025-10-05 22:14:03,026 - root - INFO - step: 38645 loss: 1.8958 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6769 +[titan] 2025-10-05 22:14:03,026 - root - INFO - lr: 5.1299e-06 gnorm: 1.24 [23:39:51< 0:49:47] +[titan] 2025-10-05 22:14:11,665 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:14:13,845 - root - INFO - step: 38650 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6629 +[titan] 2025-10-05 22:14:13,845 - root - INFO - lr: 5.1289e-06 gnorm: 1.27 [23:40:02< 0:49:36] +[titan] 2025-10-05 22:14:24,687 - root - INFO - step: 38655 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 22:14:24,687 - root - INFO - lr: 5.1280e-06 gnorm: 1.32 [23:40:12< 0:49:24] +[titan] 2025-10-05 22:14:35,527 - root - INFO - step: 38660 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6957 +[titan] 2025-10-05 22:14:35,527 - root - INFO - lr: 5.1270e-06 gnorm: 1.28 [23:40:23< 0:49:13] +[titan] 2025-10-05 22:14:46,388 - root - INFO - step: 38665 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 22:14:46,388 - root - INFO - lr: 5.1261e-06 gnorm: 1.24 [23:40:34< 0:49:02] +[titan] 2025-10-05 22:14:57,230 - root - INFO - step: 38670 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 22:14:57,230 - root - INFO - lr: 5.1251e-06 gnorm: 1.25 [23:40:45< 0:48:51] +[titan] 2025-10-05 22:15:08,076 - root - INFO - step: 38675 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 22:15:08,076 - root - INFO - lr: 5.1242e-06 gnorm: 1.23 [23:40:56< 0:48:40] +[titan] 2025-10-05 22:15:18,905 - root - INFO - step: 38680 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 22:15:18,905 - root - INFO - lr: 5.1233e-06 gnorm: 1.24 [23:41:07< 0:48:29] +[titan] 2025-10-05 22:15:29,770 - root - INFO - step: 38685 loss: 1.8560 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6416 +[titan] 2025-10-05 22:15:29,770 - root - INFO - lr: 5.1223e-06 gnorm: 1.26 [23:41:17< 0:48:18] +[titan] 2025-10-05 22:15:40,605 - root - INFO - step: 38690 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 22:15:40,606 - root - INFO - lr: 5.1214e-06 gnorm: 1.26 [23:41:28< 0:48:07] +[titan] 2025-10-05 22:15:51,445 - root - INFO - step: 38695 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 22:15:51,445 - root - INFO - lr: 5.1205e-06 gnorm: 1.26 [23:41:39< 0:47:56] +[titan] 2025-10-05 22:16:00,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:16:02,303 - root - INFO - step: 38700 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6625 +[titan] 2025-10-05 22:16:02,303 - root - INFO - lr: 5.1195e-06 gnorm: 1.22 [23:41:50< 0:47:45] +[titan] 2025-10-05 22:16:13,157 - root - INFO - step: 38705 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 22:16:13,157 - root - INFO - lr: 5.1186e-06 gnorm: 1.24 [23:42:01< 0:47:34] +[titan] 2025-10-05 22:16:24,067 - root - INFO - step: 38710 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 22:16:24,067 - root - INFO - lr: 5.1177e-06 gnorm: 1.23 [23:42:12< 0:47:23] +[titan] 2025-10-05 22:16:34,977 - root - INFO - step: 38715 loss: 1.9159 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:16:34,977 - root - INFO - lr: 5.1168e-06 gnorm: 1.27 [23:42:23< 0:47:12] +[titan] 2025-10-05 22:16:45,845 - root - INFO - step: 38720 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 22:16:45,845 - root - INFO - lr: 5.1159e-06 gnorm: 1.26 [23:42:34< 0:47:01] +[titan] 2025-10-05 22:16:56,703 - root - INFO - step: 38725 loss: 1.8703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 22:16:56,703 - root - INFO - lr: 5.1150e-06 gnorm: 1.22 [23:42:44< 0:46:50] +[titan] 2025-10-05 22:17:07,552 - root - INFO - step: 38730 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6731 +[titan] 2025-10-05 22:17:07,553 - root - INFO - lr: 5.1141e-06 gnorm: 1.23 [23:42:55< 0:46:39] +[titan] 2025-10-05 22:17:18,393 - root - INFO - step: 38735 loss: 1.9710 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 22:17:18,393 - root - INFO - lr: 5.1132e-06 gnorm: 1.32 [23:43:06< 0:46:28] +[titan] 2025-10-05 22:17:29,314 - root - INFO - step: 38740 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 22:17:29,314 - root - INFO - lr: 5.1123e-06 gnorm: 1.24 [23:43:17< 0:46:17] +[titan] 2025-10-05 22:17:40,164 - root - INFO - step: 38745 loss: 1.8962 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6772 +[titan] 2025-10-05 22:17:40,164 - root - INFO - lr: 5.1114e-06 gnorm: 1.27 [23:43:28< 0:46:06] +[titan] 2025-10-05 22:17:48,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:17:51,020 - root - INFO - step: 38750 loss: 1.8652 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6492 +[titan] 2025-10-05 22:17:51,020 - root - INFO - lr: 5.1105e-06 gnorm: 1.30 [23:43:39< 0:45:55] +[titan] 2025-10-05 22:18:01,867 - root - INFO - step: 38755 loss: 1.8715 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6553 +[titan] 2025-10-05 22:18:01,867 - root - INFO - lr: 5.1097e-06 gnorm: 1.24 [23:43:50< 0:45:44] +[titan] 2025-10-05 22:18:12,725 - root - INFO - step: 38760 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 22:18:12,725 - root - INFO - lr: 5.1088e-06 gnorm: 1.25 [23:44:00< 0:45:33] +[titan] 2025-10-05 22:18:23,576 - root - INFO - step: 38765 loss: 1.9134 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 22:18:23,576 - root - INFO - lr: 5.1079e-06 gnorm: 1.24 [23:44:11< 0:45:22] +[titan] 2025-10-05 22:18:34,466 - root - INFO - step: 38770 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 22:18:34,466 - root - INFO - lr: 5.1070e-06 gnorm: 1.21 [23:44:22< 0:45:11] +[titan] 2025-10-05 22:18:45,359 - root - INFO - step: 38775 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 22:18:45,359 - root - INFO - lr: 5.1062e-06 gnorm: 1.22 [23:44:33< 0:45:00] +[titan] 2025-10-05 22:18:56,225 - root - INFO - step: 38780 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 22:18:56,225 - root - INFO - lr: 5.1053e-06 gnorm: 1.23 [23:44:44< 0:44:49] +[titan] 2025-10-05 22:19:07,063 - root - INFO - step: 38785 loss: 1.8911 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 22:19:07,063 - root - INFO - lr: 5.1044e-06 gnorm: 1.28 [23:44:55< 0:44:38] +[titan] 2025-10-05 22:19:17,908 - root - INFO - step: 38790 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 22:19:17,908 - root - INFO - lr: 5.1036e-06 gnorm: 1.27 [23:45:06< 0:44:27] +[titan] 2025-10-05 22:19:28,765 - root - INFO - step: 38795 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:19:28,765 - root - INFO - lr: 5.1027e-06 gnorm: 1.25 [23:45:16< 0:44:16] +[titan] 2025-10-05 22:19:37,451 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:19:39,646 - root - INFO - step: 38800 loss: 1.9199 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 22:19:39,646 - root - INFO - lr: 5.1019e-06 gnorm: 1.22 [23:45:27< 0:44:05] +[titan] 2025-10-05 22:19:50,541 - root - INFO - step: 38805 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 22:19:50,542 - root - INFO - lr: 5.1010e-06 gnorm: 1.25 [23:45:38< 0:43:54] +[titan] 2025-10-05 22:20:01,404 - root - INFO - step: 38810 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6756 +[titan] 2025-10-05 22:20:01,405 - root - INFO - lr: 5.1002e-06 gnorm: 1.25 [23:45:49< 0:43:43] +[titan] 2025-10-05 22:20:12,258 - root - INFO - step: 38815 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7061 +[titan] 2025-10-05 22:20:12,258 - root - INFO - lr: 5.0993e-06 gnorm: 1.33 [23:46:00< 0:43:32] +[titan] 2025-10-05 22:20:23,109 - root - INFO - step: 38820 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:20:23,109 - root - INFO - lr: 5.0985e-06 gnorm: 1.25 [23:46:11< 0:43:21] +[titan] 2025-10-05 22:20:33,977 - root - INFO - step: 38825 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6488 +[titan] 2025-10-05 22:20:33,977 - root - INFO - lr: 5.0977e-06 gnorm: 1.28 [23:46:22< 0:43:10] +[titan] 2025-10-05 22:20:44,821 - root - INFO - step: 38830 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6682 +[titan] 2025-10-05 22:20:44,822 - root - INFO - lr: 5.0969e-06 gnorm: 1.27 [23:46:32< 0:42:59] +[titan] 2025-10-05 22:20:55,718 - root - INFO - step: 38835 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 22:20:55,718 - root - INFO - lr: 5.0960e-06 gnorm: 1.26 [23:46:43< 0:42:48] +[titan] 2025-10-05 22:21:06,566 - root - INFO - step: 38840 loss: 1.9277 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 22:21:06,566 - root - INFO - lr: 5.0952e-06 gnorm: 1.27 [23:46:54< 0:42:36] +[titan] 2025-10-05 22:21:17,446 - root - INFO - step: 38845 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6385 +[titan] 2025-10-05 22:21:17,446 - root - INFO - lr: 5.0944e-06 gnorm: 1.24 [23:47:05< 0:42:25] +[titan] 2025-10-05 22:21:26,132 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:21:28,317 - root - INFO - step: 38850 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 22:21:28,317 - root - INFO - lr: 5.0936e-06 gnorm: 1.28 [23:47:16< 0:42:14] +[titan] 2025-10-05 22:21:39,188 - root - INFO - step: 38855 loss: 1.8571 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6436 +[titan] 2025-10-05 22:21:39,188 - root - INFO - lr: 5.0928e-06 gnorm: 1.25 [23:47:27< 0:42:03] +[titan] 2025-10-05 22:21:50,046 - root - INFO - step: 38860 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 22:21:50,047 - root - INFO - lr: 5.0920e-06 gnorm: 1.30 [23:47:38< 0:41:52] +[titan] 2025-10-05 22:22:00,909 - root - INFO - step: 38865 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:22:00,909 - root - INFO - lr: 5.0911e-06 gnorm: 1.28 [23:47:49< 0:41:41] +[titan] 2025-10-05 22:22:11,785 - root - INFO - step: 38870 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 22:22:11,786 - root - INFO - lr: 5.0903e-06 gnorm: 1.26 [23:47:59< 0:41:30] +[titan] 2025-10-05 22:22:22,628 - root - INFO - step: 38875 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7259 +[titan] 2025-10-05 22:22:22,628 - root - INFO - lr: 5.0895e-06 gnorm: 1.24 [23:48:10< 0:41:19] +[titan] 2025-10-05 22:22:33,500 - root - INFO - step: 38880 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 22:22:33,500 - root - INFO - lr: 5.0888e-06 gnorm: 1.26 [23:48:21< 0:41:08] +[titan] 2025-10-05 22:22:44,338 - root - INFO - step: 38885 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:22:44,338 - root - INFO - lr: 5.0880e-06 gnorm: 1.28 [23:48:32< 0:40:57] +[titan] 2025-10-05 22:22:55,187 - root - INFO - step: 38890 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6843 +[titan] 2025-10-05 22:22:55,187 - root - INFO - lr: 5.0872e-06 gnorm: 1.24 [23:48:43< 0:40:46] +[titan] 2025-10-05 22:23:06,026 - root - INFO - step: 38895 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:23:06,026 - root - INFO - lr: 5.0864e-06 gnorm: 1.26 [23:48:54< 0:40:35] +[titan] 2025-10-05 22:23:14,733 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:23:16,918 - root - INFO - step: 38900 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 22:23:16,918 - root - INFO - lr: 5.0856e-06 gnorm: 1.25 [23:49:05< 0:40:24] +[titan] 2025-10-05 22:23:27,768 - root - INFO - step: 38905 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 22:23:27,768 - root - INFO - lr: 5.0848e-06 gnorm: 1.28 [23:49:15< 0:40:13] +[titan] 2025-10-05 22:23:38,736 - root - INFO - step: 38910 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 22:23:38,736 - root - INFO - lr: 5.0841e-06 gnorm: 1.25 [23:49:26< 0:40:02] +[titan] 2025-10-05 22:23:43,292 - root - INFO - Dumping profiler traces at step 38912 +[titan] 2025-10-05 22:23:43,332 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:23:49,840 - root - INFO - step: 38915 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:23:49,840 - root - INFO - lr: 5.0833e-06 gnorm: 1.23 [23:49:37< 0:39:51] +[titan] 2025-10-05 22:24:00,685 - root - INFO - step: 38920 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 22:24:00,685 - root - INFO - lr: 5.0825e-06 gnorm: 1.24 [23:49:48< 0:39:40] +[titan] 2025-10-05 22:24:11,518 - root - INFO - step: 38925 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:24:11,518 - root - INFO - lr: 5.0818e-06 gnorm: 1.28 [23:49:59< 0:39:29] +[titan] 2025-10-05 22:24:22,383 - root - INFO - step: 38930 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:24:22,383 - root - INFO - lr: 5.0810e-06 gnorm: 1.22 [23:50:10< 0:39:18] +[titan] 2025-10-05 22:24:33,286 - root - INFO - step: 38935 loss: 1.9341 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 22:24:33,286 - root - INFO - lr: 5.0803e-06 gnorm: 1.25 [23:50:21< 0:39:07] +[titan] 2025-10-05 22:24:44,145 - root - INFO - step: 38940 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6642 +[titan] 2025-10-05 22:24:44,145 - root - INFO - lr: 5.0795e-06 gnorm: 1.33 [23:50:32< 0:38:56] +[titan] 2025-10-05 22:24:55,011 - root - INFO - step: 38945 loss: 1.8488 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6353 +[titan] 2025-10-05 22:24:55,011 - root - INFO - lr: 5.0788e-06 gnorm: 1.25 [23:50:43< 0:38:45] +[titan] 2025-10-05 22:25:03,688 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:25:05,861 - root - INFO - step: 38950 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 22:25:05,861 - root - INFO - lr: 5.0780e-06 gnorm: 1.26 [23:50:54< 0:38:34] +[titan] 2025-10-05 22:25:16,696 - root - INFO - step: 38955 loss: 1.8763 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6594 +[titan] 2025-10-05 22:25:16,696 - root - INFO - lr: 5.0773e-06 gnorm: 1.25 [23:51:04< 0:38:23] +[titan] 2025-10-05 22:25:27,557 - root - INFO - step: 38960 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:25:27,557 - root - INFO - lr: 5.0765e-06 gnorm: 1.25 [23:51:15< 0:38:12] +[titan] 2025-10-05 22:25:38,467 - root - INFO - step: 38965 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 22:25:38,467 - root - INFO - lr: 5.0758e-06 gnorm: 1.24 [23:51:26< 0:38:01] +[titan] 2025-10-05 22:25:49,317 - root - INFO - step: 38970 loss: 1.8769 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 22:25:49,317 - root - INFO - lr: 5.0751e-06 gnorm: 1.22 [23:51:37< 0:37:50] +[titan] 2025-10-05 22:26:00,183 - root - INFO - step: 38975 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 22:26:00,183 - root - INFO - lr: 5.0743e-06 gnorm: 1.28 [23:51:48< 0:37:39] +[titan] 2025-10-05 22:26:11,057 - root - INFO - step: 38980 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 22:26:11,057 - root - INFO - lr: 5.0736e-06 gnorm: 1.29 [23:51:59< 0:37:28] +[titan] 2025-10-05 22:26:21,891 - root - INFO - step: 38985 loss: 1.8837 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 22:26:21,891 - root - INFO - lr: 5.0729e-06 gnorm: 1.26 [23:52:10< 0:37:17] +[titan] 2025-10-05 22:26:32,761 - root - INFO - step: 38990 loss: 1.8936 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:26:32,761 - root - INFO - lr: 5.0722e-06 gnorm: 1.25 [23:52:20< 0:37:06] +[titan] 2025-10-05 22:26:43,668 - root - INFO - step: 38995 loss: 1.8343 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2124 global_avg_mtp_loss: 1.6219 +[titan] 2025-10-05 22:26:43,668 - root - INFO - lr: 5.0715e-06 gnorm: 1.22 [23:52:31< 0:36:55] +[titan] 2025-10-05 22:26:52,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:26:54,511 - root - INFO - step: 39000 loss: 1.8692 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6534 +[titan] 2025-10-05 22:26:54,511 - root - INFO - lr: 5.0708e-06 gnorm: 1.23 [23:52:42< 0:36:44] +[titan] 2025-10-05 22:27:05,357 - root - INFO - step: 39005 loss: 1.8448 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6317 +[titan] 2025-10-05 22:27:05,357 - root - INFO - lr: 5.0701e-06 gnorm: 1.24 [23:52:53< 0:36:33] +[titan] 2025-10-05 22:27:16,214 - root - INFO - step: 39010 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 22:27:16,214 - root - INFO - lr: 5.0694e-06 gnorm: 1.27 [23:53:04< 0:36:22] +[titan] 2025-10-05 22:27:27,027 - root - INFO - step: 39015 loss: 1.8935 memory: 118.84GiB(85.28%) tps: 30,304 tflops: 420.43 mfu: 42.51% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 22:27:27,028 - root - INFO - lr: 5.0687e-06 gnorm: 1.29 [23:53:15< 0:36:11] +[titan] 2025-10-05 22:27:37,873 - root - INFO - step: 39020 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 22:27:37,873 - root - INFO - lr: 5.0680e-06 gnorm: 1.25 [23:53:26< 0:36:00] +[titan] 2025-10-05 22:27:48,725 - root - INFO - step: 39025 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 22:27:48,725 - root - INFO - lr: 5.0673e-06 gnorm: 1.23 [23:53:36< 0:35:49] +[titan] 2025-10-05 22:27:59,585 - root - INFO - step: 39030 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7320 +[titan] 2025-10-05 22:27:59,585 - root - INFO - lr: 5.0666e-06 gnorm: 1.26 [23:53:47< 0:35:38] +[titan] 2025-10-05 22:28:10,411 - root - INFO - step: 39035 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:28:10,411 - root - INFO - lr: 5.0659e-06 gnorm: 1.29 [23:53:58< 0:35:26] +[titan] 2025-10-05 22:28:21,251 - root - INFO - step: 39040 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:28:21,251 - root - INFO - lr: 5.0652e-06 gnorm: 1.26 [23:54:09< 0:35:15] +[titan] 2025-10-05 22:28:32,077 - root - INFO - step: 39045 loss: 1.9016 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6812 +[titan] 2025-10-05 22:28:32,077 - root - INFO - lr: 5.0645e-06 gnorm: 1.24 [23:54:20< 0:35:04] +[titan] 2025-10-05 22:28:40,768 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:28:42,943 - root - INFO - step: 39050 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 22:28:42,943 - root - INFO - lr: 5.0639e-06 gnorm: 1.25 [23:54:31< 0:34:53] +[titan] 2025-10-05 22:28:53,779 - root - INFO - step: 39055 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 22:28:53,779 - root - INFO - lr: 5.0632e-06 gnorm: 1.27 [23:54:41< 0:34:42] +[titan] 2025-10-05 22:29:04,650 - root - INFO - step: 39060 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:29:04,650 - root - INFO - lr: 5.0625e-06 gnorm: 1.28 [23:54:52< 0:34:31] +[titan] 2025-10-05 22:29:15,481 - root - INFO - step: 39065 loss: 1.8892 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:29:15,481 - root - INFO - lr: 5.0619e-06 gnorm: 1.29 [23:55:03< 0:34:20] +[titan] 2025-10-05 22:29:26,319 - root - INFO - step: 39070 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7449 +[titan] 2025-10-05 22:29:26,319 - root - INFO - lr: 5.0612e-06 gnorm: 1.27 [23:55:14< 0:34:09] +[titan] 2025-10-05 22:29:37,169 - root - INFO - step: 39075 loss: 1.8711 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:29:37,169 - root - INFO - lr: 5.0606e-06 gnorm: 1.39 [23:55:25< 0:33:58] +[titan] 2025-10-05 22:29:47,983 - root - INFO - step: 39080 loss: 1.9585 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 22:29:47,983 - root - INFO - lr: 5.0599e-06 gnorm: 1.27 [23:55:36< 0:33:47] +[titan] 2025-10-05 22:29:58,811 - root - INFO - step: 39085 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 22:29:58,811 - root - INFO - lr: 5.0593e-06 gnorm: 1.28 [23:55:46< 0:33:36] +[titan] 2025-10-05 22:30:09,630 - root - INFO - step: 39090 loss: 1.8996 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:30:09,630 - root - INFO - lr: 5.0586e-06 gnorm: 1.26 [23:55:57< 0:33:25] +[titan] 2025-10-05 22:30:20,468 - root - INFO - step: 39095 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6810 +[titan] 2025-10-05 22:30:20,469 - root - INFO - lr: 5.0580e-06 gnorm: 1.24 [23:56:08< 0:33:14] +[titan] 2025-10-05 22:30:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:30:31,293 - root - INFO - step: 39100 loss: 1.9874 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:30:31,293 - root - INFO - lr: 5.0573e-06 gnorm: 1.34 [23:56:19< 0:33:03] +[titan] 2025-10-05 22:30:42,362 - root - INFO - step: 39105 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,605 tflops: 410.73 mfu: 41.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:30:42,362 - root - INFO - lr: 5.0567e-06 gnorm: 1.26 [23:56:30< 0:32:52] +[titan] 2025-10-05 22:30:53,217 - root - INFO - step: 39110 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2156 global_avg_mtp_loss: 1.6453 +[titan] 2025-10-05 22:30:53,217 - root - INFO - lr: 5.0561e-06 gnorm: 1.23 [23:56:41< 0:32:41] +[titan] 2025-10-05 22:31:04,043 - root - INFO - step: 39115 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 22:31:04,043 - root - INFO - lr: 5.0554e-06 gnorm: 1.23 [23:56:52< 0:32:30] +[titan] 2025-10-05 22:31:14,877 - root - INFO - step: 39120 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 22:31:14,877 - root - INFO - lr: 5.0548e-06 gnorm: 1.28 [23:57:03< 0:32:19] +[titan] 2025-10-05 22:31:25,759 - root - INFO - step: 39125 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 22:31:25,759 - root - INFO - lr: 5.0542e-06 gnorm: 1.27 [23:57:13< 0:32:08] +[titan] 2025-10-05 22:31:36,579 - root - INFO - step: 39130 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 22:31:36,579 - root - INFO - lr: 5.0536e-06 gnorm: 1.28 [23:57:24< 0:31:57] +[titan] 2025-10-05 22:31:47,420 - root - INFO - step: 39135 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7389 +[titan] 2025-10-05 22:31:47,420 - root - INFO - lr: 5.0530e-06 gnorm: 1.29 [23:57:35< 0:31:46] +[titan] 2025-10-05 22:31:58,260 - root - INFO - step: 39140 loss: 1.9505 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7258 +[titan] 2025-10-05 22:31:58,260 - root - INFO - lr: 5.0523e-06 gnorm: 1.27 [23:57:46< 0:31:35] +[titan] 2025-10-05 22:32:09,071 - root - INFO - step: 39145 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 22:32:09,071 - root - INFO - lr: 5.0517e-06 gnorm: 1.24 [23:57:57< 0:31:24] +[titan] 2025-10-05 22:32:17,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:32:19,885 - root - INFO - step: 39150 loss: 1.8924 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:32:19,885 - root - INFO - lr: 5.0511e-06 gnorm: 1.26 [23:58:08< 0:31:13] +[titan] 2025-10-05 22:32:30,741 - root - INFO - step: 39155 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:32:30,741 - root - INFO - lr: 5.0505e-06 gnorm: 1.26 [23:58:18< 0:31:02] +[titan] 2025-10-05 22:32:41,618 - root - INFO - step: 39160 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 22:32:41,618 - root - INFO - lr: 5.0499e-06 gnorm: 1.27 [23:58:29< 0:30:51] +[titan] 2025-10-05 22:32:52,420 - root - INFO - step: 39165 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,335 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7041 +[titan] 2025-10-05 22:32:52,421 - root - INFO - lr: 5.0493e-06 gnorm: 1.31 [23:58:40< 0:30:40] +[titan] 2025-10-05 22:33:03,241 - root - INFO - step: 39170 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6786 +[titan] 2025-10-05 22:33:03,241 - root - INFO - lr: 5.0488e-06 gnorm: 1.28 [23:58:51< 0:30:29] +[titan] 2025-10-05 22:33:14,059 - root - INFO - step: 39175 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:33:14,059 - root - INFO - lr: 5.0482e-06 gnorm: 1.25 [23:59:02< 0:30:18] +[titan] 2025-10-05 22:33:24,854 - root - INFO - step: 39180 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,356 tflops: 421.14 mfu: 42.58% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7066 +[titan] 2025-10-05 22:33:24,854 - root - INFO - lr: 5.0476e-06 gnorm: 1.25 [23:59:12< 0:30:07] +[titan] 2025-10-05 22:33:35,698 - root - INFO - step: 39185 loss: 1.8822 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6646 +[titan] 2025-10-05 22:33:35,698 - root - INFO - lr: 5.0470e-06 gnorm: 1.24 [23:59:23< 0:29:56] +[titan] 2025-10-05 22:33:46,541 - root - INFO - step: 39190 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 22:33:46,541 - root - INFO - lr: 5.0464e-06 gnorm: 1.26 [23:59:34< 0:29:45] +[titan] 2025-10-05 22:33:57,343 - root - INFO - step: 39195 loss: 1.8734 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.87 mfu: 42.56% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6567 +[titan] 2025-10-05 22:33:57,343 - root - INFO - lr: 5.0459e-06 gnorm: 1.26 [23:59:45< 0:29:34] +[titan] 2025-10-05 22:34:05,988 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:34:08,160 - root - INFO - step: 39200 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6909 +[titan] 2025-10-05 22:34:08,161 - root - INFO - lr: 5.0453e-06 gnorm: 1.24 [23:59:56< 0:29:23] +[titan] 2025-10-05 22:34:18,971 - root - INFO - step: 39205 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,312 tflops: 420.53 mfu: 42.52% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:34:18,971 - root - INFO - lr: 5.0447e-06 gnorm: 1.27 [1 day, 0:00:07< 0:29:12] +[titan] 2025-10-05 22:34:29,800 - root - INFO - step: 39210 loss: 1.8480 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 22:34:29,800 - root - INFO - lr: 5.0442e-06 gnorm: 1.20 [1 day, 0:00:17< 0:29:01] +[titan] 2025-10-05 22:34:40,603 - root - INFO - step: 39215 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 22:34:40,603 - root - INFO - lr: 5.0436e-06 gnorm: 1.28 [1 day, 0:00:28< 0:28:50] +[titan] 2025-10-05 22:34:51,467 - root - INFO - step: 39220 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6993 +[titan] 2025-10-05 22:34:51,467 - root - INFO - lr: 5.0431e-06 gnorm: 1.25 [1 day, 0:00:39< 0:28:39] +[titan] 2025-10-05 22:35:02,300 - root - INFO - step: 39225 loss: 1.9143 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 22:35:02,300 - root - INFO - lr: 5.0425e-06 gnorm: 1.26 [1 day, 0:00:50< 0:28:28] +[titan] 2025-10-05 22:35:13,119 - root - INFO - step: 39230 loss: 1.8713 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6548 +[titan] 2025-10-05 22:35:13,119 - root - INFO - lr: 5.0420e-06 gnorm: 1.29 [1 day, 0:01:01< 0:28:17] +[titan] 2025-10-05 22:35:23,922 - root - INFO - step: 39235 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.83 mfu: 42.55% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6983 +[titan] 2025-10-05 22:35:23,922 - root - INFO - lr: 5.0414e-06 gnorm: 1.26 [1 day, 0:01:12< 0:28:06] +[titan] 2025-10-05 22:35:34,763 - root - INFO - step: 39240 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 22:35:34,763 - root - INFO - lr: 5.0409e-06 gnorm: 1.28 [1 day, 0:01:22< 0:27:54] +[titan] 2025-10-05 22:35:45,607 - root - INFO - step: 39245 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 22:35:45,608 - root - INFO - lr: 5.0403e-06 gnorm: 1.28 [1 day, 0:01:33< 0:27:43] +[titan] 2025-10-05 22:35:54,248 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:35:56,420 - root - INFO - step: 39250 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 22:35:56,420 - root - INFO - lr: 5.0398e-06 gnorm: 1.22 [1 day, 0:01:44< 0:27:32] +[titan] 2025-10-05 22:36:07,280 - root - INFO - step: 39255 loss: 1.8902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6708 +[titan] 2025-10-05 22:36:07,280 - root - INFO - lr: 5.0393e-06 gnorm: 1.24 [1 day, 0:01:55< 0:27:21] +[titan] 2025-10-05 22:36:18,098 - root - INFO - step: 39260 loss: 1.9171 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:36:18,098 - root - INFO - lr: 5.0388e-06 gnorm: 1.26 [1 day, 0:02:06< 0:27:10] +[titan] 2025-10-05 22:36:28,912 - root - INFO - step: 39265 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6622 +[titan] 2025-10-05 22:36:28,912 - root - INFO - lr: 5.0382e-06 gnorm: 1.27 [1 day, 0:02:17< 0:26:59] +[titan] 2025-10-05 22:36:39,738 - root - INFO - step: 39270 loss: 1.8621 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6463 +[titan] 2025-10-05 22:36:39,739 - root - INFO - lr: 5.0377e-06 gnorm: 1.26 [1 day, 0:02:27< 0:26:48] +[titan] 2025-10-05 22:36:50,600 - root - INFO - step: 39275 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 22:36:50,600 - root - INFO - lr: 5.0372e-06 gnorm: 1.28 [1 day, 0:02:38< 0:26:37] +[titan] 2025-10-05 22:37:01,420 - root - INFO - step: 39280 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6663 +[titan] 2025-10-05 22:37:01,421 - root - INFO - lr: 5.0367e-06 gnorm: 1.20 [1 day, 0:02:49< 0:26:26] +[titan] 2025-10-05 22:37:12,238 - root - INFO - step: 39285 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 22:37:12,238 - root - INFO - lr: 5.0362e-06 gnorm: 1.27 [1 day, 0:03:00< 0:26:15] +[titan] 2025-10-05 22:37:23,040 - root - INFO - step: 39290 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.87 mfu: 42.55% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 22:37:23,040 - root - INFO - lr: 5.0357e-06 gnorm: 1.26 [1 day, 0:03:11< 0:26:04] +[titan] 2025-10-05 22:37:33,840 - root - INFO - step: 39295 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:37:33,840 - root - INFO - lr: 5.0352e-06 gnorm: 1.36 [1 day, 0:03:21< 0:25:53] +[titan] 2025-10-05 22:37:42,487 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:37:44,699 - root - INFO - step: 39300 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6904 +[titan] 2025-10-05 22:37:44,699 - root - INFO - lr: 5.0347e-06 gnorm: 1.25 [1 day, 0:03:32< 0:25:42] +[titan] 2025-10-05 22:37:55,513 - root - INFO - step: 39305 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 22:37:55,513 - root - INFO - lr: 5.0342e-06 gnorm: 1.29 [1 day, 0:03:43< 0:25:31] +[titan] 2025-10-05 22:38:06,319 - root - INFO - step: 39310 loss: 1.8070 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2093 global_avg_mtp_loss: 1.5976 +[titan] 2025-10-05 22:38:06,319 - root - INFO - lr: 5.0337e-06 gnorm: 1.22 [1 day, 0:03:54< 0:25:20] +[titan] 2025-10-05 22:38:17,140 - root - INFO - step: 39315 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 22:38:17,140 - root - INFO - lr: 5.0332e-06 gnorm: 1.28 [1 day, 0:04:05< 0:25:09] +[titan] 2025-10-05 22:38:27,940 - root - INFO - step: 39320 loss: 1.8952 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:38:27,940 - root - INFO - lr: 5.0327e-06 gnorm: 1.24 [1 day, 0:04:16< 0:24:58] +[titan] 2025-10-05 22:38:38,794 - root - INFO - step: 39325 loss: 1.8206 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2106 global_avg_mtp_loss: 1.6101 +[titan] 2025-10-05 22:38:38,795 - root - INFO - lr: 5.0323e-06 gnorm: 1.23 [1 day, 0:04:26< 0:24:47] +[titan] 2025-10-05 22:38:49,643 - root - INFO - step: 39330 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7124 +[titan] 2025-10-05 22:38:49,643 - root - INFO - lr: 5.0318e-06 gnorm: 1.29 [1 day, 0:04:37< 0:24:36] +[titan] 2025-10-05 22:39:00,463 - root - INFO - step: 39335 loss: 1.9117 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:39:00,464 - root - INFO - lr: 5.0313e-06 gnorm: 1.25 [1 day, 0:04:48< 0:24:25] +[titan] 2025-10-05 22:39:11,289 - root - INFO - step: 39340 loss: 1.8200 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2105 global_avg_mtp_loss: 1.6095 +[titan] 2025-10-05 22:39:11,289 - root - INFO - lr: 5.0308e-06 gnorm: 1.26 [1 day, 0:04:59< 0:24:14] +[titan] 2025-10-05 22:39:22,114 - root - INFO - step: 39345 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:39:22,114 - root - INFO - lr: 5.0304e-06 gnorm: 1.27 [1 day, 0:05:10< 0:24:03] +[titan] 2025-10-05 22:39:30,839 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:39:33,015 - root - INFO - step: 39350 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 22:39:33,015 - root - INFO - lr: 5.0299e-06 gnorm: 1.24 [1 day, 0:05:21< 0:23:52] +[titan] 2025-10-05 22:39:43,880 - root - INFO - step: 39355 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 22:39:43,880 - root - INFO - lr: 5.0294e-06 gnorm: 1.21 [1 day, 0:05:31< 0:23:41] +[titan] 2025-10-05 22:39:54,779 - root - INFO - step: 39360 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2123 global_avg_mtp_loss: 1.6258 +[titan] 2025-10-05 22:39:54,779 - root - INFO - lr: 5.0290e-06 gnorm: 1.26 [1 day, 0:05:42< 0:23:30] +[titan] 2025-10-05 22:40:05,616 - root - INFO - step: 39365 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 22:40:05,617 - root - INFO - lr: 5.0285e-06 gnorm: 1.22 [1 day, 0:05:53< 0:23:19] +[titan] 2025-10-05 22:40:16,459 - root - INFO - step: 39370 loss: 1.8828 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:40:16,459 - root - INFO - lr: 5.0281e-06 gnorm: 1.23 [1 day, 0:06:04< 0:23:08] +[titan] 2025-10-05 22:40:27,280 - root - INFO - step: 39375 loss: 1.9073 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 22:40:27,280 - root - INFO - lr: 5.0277e-06 gnorm: 1.28 [1 day, 0:06:15< 0:22:57] +[titan] 2025-10-05 22:40:38,119 - root - INFO - step: 39380 loss: 1.9206 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:40:38,119 - root - INFO - lr: 5.0272e-06 gnorm: 1.23 [1 day, 0:06:26< 0:22:46] +[titan] 2025-10-05 22:40:49,118 - root - INFO - step: 39385 loss: 1.9186 memory: 118.84GiB(85.28%) tps: 29,794 tflops: 413.35 mfu: 41.79% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6970 +[titan] 2025-10-05 22:40:49,118 - root - INFO - lr: 5.0268e-06 gnorm: 1.25 [1 day, 0:06:37< 0:22:35] +[titan] 2025-10-05 22:40:59,990 - root - INFO - step: 39390 loss: 1.9410 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 22:40:59,990 - root - INFO - lr: 5.0263e-06 gnorm: 1.30 [1 day, 0:06:48< 0:22:24] +[titan] 2025-10-05 22:41:10,818 - root - INFO - step: 39395 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6520 +[titan] 2025-10-05 22:41:10,818 - root - INFO - lr: 5.0259e-06 gnorm: 1.23 [1 day, 0:06:58< 0:22:13] +[titan] 2025-10-05 22:41:19,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:41:21,663 - root - INFO - step: 39400 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:41:21,663 - root - INFO - lr: 5.0255e-06 gnorm: 1.24 [1 day, 0:07:09< 0:22:02] +[titan] 2025-10-05 22:41:32,499 - root - INFO - step: 39405 loss: 1.8950 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:41:32,500 - root - INFO - lr: 5.0251e-06 gnorm: 1.26 [1 day, 0:07:20< 0:21:51] +[titan] 2025-10-05 22:41:43,338 - root - INFO - step: 39410 loss: 1.9067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:41:43,339 - root - INFO - lr: 5.0246e-06 gnorm: 1.25 [1 day, 0:07:31< 0:21:40] +[titan] 2025-10-05 22:41:54,271 - root - INFO - step: 39415 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 22:41:54,271 - root - INFO - lr: 5.0242e-06 gnorm: 1.26 [1 day, 0:07:42< 0:21:29] +[titan] 2025-10-05 22:42:05,121 - root - INFO - step: 39420 loss: 1.8925 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6733 +[titan] 2025-10-05 22:42:05,121 - root - INFO - lr: 5.0238e-06 gnorm: 1.27 [1 day, 0:07:53< 0:21:18] +[titan] 2025-10-05 22:42:14,067 - root - INFO - Dumping profiler traces at step 39424 +[titan] 2025-10-05 22:42:14,106 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:42:16,301 - root - INFO - step: 39425 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 29,310 tflops: 406.63 mfu: 41.12% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 22:42:16,301 - root - INFO - lr: 5.0234e-06 gnorm: 1.27 [1 day, 0:08:04< 0:21:07] +[titan] 2025-10-05 22:42:27,154 - root - INFO - step: 39430 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 22:42:27,155 - root - INFO - lr: 5.0230e-06 gnorm: 1.25 [1 day, 0:08:15< 0:20:56] +[titan] 2025-10-05 22:42:37,960 - root - INFO - step: 39435 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 22:42:37,960 - root - INFO - lr: 5.0226e-06 gnorm: 1.27 [1 day, 0:08:26< 0:20:45] +[titan] 2025-10-05 22:42:48,874 - root - INFO - step: 39440 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:42:48,874 - root - INFO - lr: 5.0222e-06 gnorm: 1.24 [1 day, 0:08:36< 0:20:34] +[titan] 2025-10-05 22:42:59,759 - root - INFO - step: 39445 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 22:42:59,759 - root - INFO - lr: 5.0218e-06 gnorm: 1.26 [1 day, 0:08:47< 0:20:23] +[titan] 2025-10-05 22:43:08,435 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:43:10,640 - root - INFO - step: 39450 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 22:43:10,641 - root - INFO - lr: 5.0214e-06 gnorm: 1.28 [1 day, 0:08:58< 0:20:12] +[titan] 2025-10-05 22:43:21,477 - root - INFO - step: 39455 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:43:21,478 - root - INFO - lr: 5.0210e-06 gnorm: 1.32 [1 day, 0:09:09< 0:20:01] +[titan] 2025-10-05 22:43:32,319 - root - INFO - step: 39460 loss: 1.9474 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7223 +[titan] 2025-10-05 22:43:32,319 - root - INFO - lr: 5.0206e-06 gnorm: 1.30 [1 day, 0:09:20< 0:19:50] +[titan] 2025-10-05 22:43:43,178 - root - INFO - step: 39465 loss: 1.8880 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6701 +[titan] 2025-10-05 22:43:43,178 - root - INFO - lr: 5.0203e-06 gnorm: 1.28 [1 day, 0:09:31< 0:19:39] +[titan] 2025-10-05 22:43:54,102 - root - INFO - step: 39470 loss: 1.8901 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6714 +[titan] 2025-10-05 22:43:54,102 - root - INFO - lr: 5.0199e-06 gnorm: 1.25 [1 day, 0:09:42< 0:19:27] +[titan] 2025-10-05 22:44:04,938 - root - INFO - step: 39475 loss: 1.8656 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6497 +[titan] 2025-10-05 22:44:04,938 - root - INFO - lr: 5.0195e-06 gnorm: 1.23 [1 day, 0:09:53< 0:19:16] +[titan] 2025-10-05 22:44:15,757 - root - INFO - step: 39480 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 22:44:15,757 - root - INFO - lr: 5.0191e-06 gnorm: 1.27 [1 day, 0:10:03< 0:19:05] +[titan] 2025-10-05 22:44:26,640 - root - INFO - step: 39485 loss: 1.8523 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6375 +[titan] 2025-10-05 22:44:26,640 - root - INFO - lr: 5.0188e-06 gnorm: 1.23 [1 day, 0:10:14< 0:18:54] +[titan] 2025-10-05 22:44:37,455 - root - INFO - step: 39490 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6979 +[titan] 2025-10-05 22:44:37,455 - root - INFO - lr: 5.0184e-06 gnorm: 1.26 [1 day, 0:10:25< 0:18:43] +[titan] 2025-10-05 22:44:48,278 - root - INFO - step: 39495 loss: 1.8271 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2112 global_avg_mtp_loss: 1.6159 +[titan] 2025-10-05 22:44:48,278 - root - INFO - lr: 5.0181e-06 gnorm: 1.24 [1 day, 0:10:36< 0:18:32] +[titan] 2025-10-05 22:44:57,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:44:59,302 - root - INFO - step: 39500 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 29,726 tflops: 412.41 mfu: 41.70% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:44:59,302 - root - INFO - lr: 5.0177e-06 gnorm: 1.26 [1 day, 0:10:47< 0:18:21] +[titan] 2025-10-05 22:45:10,138 - root - INFO - step: 39505 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:45:10,138 - root - INFO - lr: 5.0173e-06 gnorm: 1.25 [1 day, 0:10:58< 0:18:10] +[titan] 2025-10-05 22:45:20,973 - root - INFO - step: 39510 loss: 1.9394 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 22:45:20,973 - root - INFO - lr: 5.0170e-06 gnorm: 1.26 [1 day, 0:11:09< 0:17:59] +[titan] 2025-10-05 22:45:31,857 - root - INFO - step: 39515 loss: 1.8345 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6215 +[titan] 2025-10-05 22:45:31,857 - root - INFO - lr: 5.0167e-06 gnorm: 1.32 [1 day, 0:11:19< 0:17:48] +[titan] 2025-10-05 22:45:42,715 - root - INFO - step: 39520 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 22:45:42,715 - root - INFO - lr: 5.0163e-06 gnorm: 1.30 [1 day, 0:11:30< 0:17:37] +[titan] 2025-10-05 22:45:53,618 - root - INFO - step: 39525 loss: 1.8642 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 22:45:53,618 - root - INFO - lr: 5.0160e-06 gnorm: 1.23 [1 day, 0:11:41< 0:17:26] +[titan] 2025-10-05 22:46:04,478 - root - INFO - step: 39530 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:46:04,478 - root - INFO - lr: 5.0156e-06 gnorm: 1.28 [1 day, 0:11:52< 0:17:15] +[titan] 2025-10-05 22:46:15,353 - root - INFO - step: 39535 loss: 1.8455 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6312 +[titan] 2025-10-05 22:46:15,353 - root - INFO - lr: 5.0153e-06 gnorm: 1.27 [1 day, 0:12:03< 0:17:04] +[titan] 2025-10-05 22:46:26,197 - root - INFO - step: 39540 loss: 1.8853 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 22:46:26,197 - root - INFO - lr: 5.0150e-06 gnorm: 1.25 [1 day, 0:12:14< 0:16:53] +[titan] 2025-10-05 22:46:37,052 - root - INFO - step: 39545 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 22:46:37,052 - root - INFO - lr: 5.0147e-06 gnorm: 1.30 [1 day, 0:12:25< 0:16:42] +[titan] 2025-10-05 22:46:45,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:46:47,965 - root - INFO - step: 39550 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 22:46:47,966 - root - INFO - lr: 5.0143e-06 gnorm: 1.34 [1 day, 0:12:36< 0:16:31] +[titan] 2025-10-05 22:46:58,895 - root - INFO - step: 39555 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6838 +[titan] 2025-10-05 22:46:58,895 - root - INFO - lr: 5.0140e-06 gnorm: 1.24 [1 day, 0:12:46< 0:16:20] +[titan] 2025-10-05 22:47:09,746 - root - INFO - step: 39560 loss: 1.9366 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7128 +[titan] 2025-10-05 22:47:09,746 - root - INFO - lr: 5.0137e-06 gnorm: 1.22 [1 day, 0:12:57< 0:16:09] +[titan] 2025-10-05 22:47:20,590 - root - INFO - step: 39565 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 22:47:20,590 - root - INFO - lr: 5.0134e-06 gnorm: 1.26 [1 day, 0:13:08< 0:15:58] +[titan] 2025-10-05 22:47:31,450 - root - INFO - step: 39570 loss: 1.8471 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6330 +[titan] 2025-10-05 22:47:31,450 - root - INFO - lr: 5.0131e-06 gnorm: 1.24 [1 day, 0:13:19< 0:15:47] +[titan] 2025-10-05 22:47:42,294 - root - INFO - step: 39575 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:47:42,294 - root - INFO - lr: 5.0128e-06 gnorm: 1.26 [1 day, 0:13:30< 0:15:36] +[titan] 2025-10-05 22:47:53,185 - root - INFO - step: 39580 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 22:47:53,186 - root - INFO - lr: 5.0125e-06 gnorm: 1.24 [1 day, 0:13:41< 0:15:25] +[titan] 2025-10-05 22:48:04,125 - root - INFO - step: 39585 loss: 1.8977 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6775 +[titan] 2025-10-05 22:48:04,125 - root - INFO - lr: 5.0122e-06 gnorm: 1.23 [1 day, 0:13:52< 0:15:14] +[titan] 2025-10-05 22:48:14,970 - root - INFO - step: 39590 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7450 +[titan] 2025-10-05 22:48:14,970 - root - INFO - lr: 5.0119e-06 gnorm: 1.27 [1 day, 0:14:03< 0:15:03] +[titan] 2025-10-05 22:48:25,818 - root - INFO - step: 39595 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 22:48:25,818 - root - INFO - lr: 5.0116e-06 gnorm: 1.25 [1 day, 0:14:13< 0:14:52] +[titan] 2025-10-05 22:48:34,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:48:36,655 - root - INFO - step: 39600 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:48:36,655 - root - INFO - lr: 5.0113e-06 gnorm: 1.26 [1 day, 0:14:24< 0:14:41] +[titan] 2025-10-05 22:48:47,505 - root - INFO - step: 39605 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:48:47,505 - root - INFO - lr: 5.0110e-06 gnorm: 1.25 [1 day, 0:14:35< 0:14:30] +[titan] 2025-10-05 22:48:58,396 - root - INFO - step: 39610 loss: 1.9266 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7045 +[titan] 2025-10-05 22:48:58,396 - root - INFO - lr: 5.0108e-06 gnorm: 1.27 [1 day, 0:14:46< 0:14:19] +[titan] 2025-10-05 22:49:09,249 - root - INFO - step: 39615 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:49:09,249 - root - INFO - lr: 5.0105e-06 gnorm: 1.28 [1 day, 0:14:57< 0:14:08] +[titan] 2025-10-05 22:49:20,110 - root - INFO - step: 39620 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 22:49:20,111 - root - INFO - lr: 5.0102e-06 gnorm: 1.26 [1 day, 0:15:08< 0:13:57] +[titan] 2025-10-05 22:49:30,958 - root - INFO - step: 39625 loss: 1.9163 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 22:49:30,958 - root - INFO - lr: 5.0100e-06 gnorm: 1.28 [1 day, 0:15:19< 0:13:46] +[titan] 2025-10-05 22:49:41,804 - root - INFO - step: 39630 loss: 1.8829 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:49:41,804 - root - INFO - lr: 5.0097e-06 gnorm: 1.26 [1 day, 0:15:29< 0:13:35] +[titan] 2025-10-05 22:49:52,655 - root - INFO - step: 39635 loss: 1.8627 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6476 +[titan] 2025-10-05 22:49:52,655 - root - INFO - lr: 5.0094e-06 gnorm: 1.24 [1 day, 0:15:40< 0:13:24] +[titan] 2025-10-05 22:50:03,525 - root - INFO - step: 39640 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 22:50:03,526 - root - INFO - lr: 5.0092e-06 gnorm: 1.26 [1 day, 0:15:51< 0:13:13] +[titan] 2025-10-05 22:50:14,446 - root - INFO - step: 39645 loss: 1.8260 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2114 global_avg_mtp_loss: 1.6146 +[titan] 2025-10-05 22:50:14,446 - root - INFO - lr: 5.0089e-06 gnorm: 1.27 [1 day, 0:16:02< 0:13:02] +[titan] 2025-10-05 22:50:23,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:50:25,308 - root - INFO - step: 39650 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7039 +[titan] 2025-10-05 22:50:25,308 - root - INFO - lr: 5.0087e-06 gnorm: 1.28 [1 day, 0:16:13< 0:12:51] +[titan] 2025-10-05 22:50:36,173 - root - INFO - step: 39655 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:50:36,173 - root - INFO - lr: 5.0084e-06 gnorm: 1.26 [1 day, 0:16:24< 0:12:40] +[titan] 2025-10-05 22:50:47,037 - root - INFO - step: 39660 loss: 1.9006 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:50:47,037 - root - INFO - lr: 5.0082e-06 gnorm: 1.27 [1 day, 0:16:35< 0:12:29] +[titan] 2025-10-05 22:50:57,913 - root - INFO - step: 39665 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 22:50:57,914 - root - INFO - lr: 5.0079e-06 gnorm: 1.29 [1 day, 0:16:45< 0:12:18] +[titan] 2025-10-05 22:51:08,776 - root - INFO - step: 39670 loss: 1.8655 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 22:51:08,776 - root - INFO - lr: 5.0077e-06 gnorm: 1.30 [1 day, 0:16:56< 0:12:07] +[titan] 2025-10-05 22:51:19,670 - root - INFO - step: 39675 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:51:19,670 - root - INFO - lr: 5.0075e-06 gnorm: 1.25 [1 day, 0:17:07< 0:11:56] +[titan] 2025-10-05 22:51:30,542 - root - INFO - step: 39680 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 22:51:30,542 - root - INFO - lr: 5.0072e-06 gnorm: 1.27 [1 day, 0:17:18< 0:11:45] +[titan] 2025-10-05 22:51:41,391 - root - INFO - step: 39685 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:51:41,391 - root - INFO - lr: 5.0070e-06 gnorm: 1.27 [1 day, 0:17:29< 0:11:34] +[titan] 2025-10-05 22:51:52,276 - root - INFO - step: 39690 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:51:52,276 - root - INFO - lr: 5.0068e-06 gnorm: 1.25 [1 day, 0:17:40< 0:11:23] +[titan] 2025-10-05 22:52:03,144 - root - INFO - step: 39695 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 22:52:03,144 - root - INFO - lr: 5.0066e-06 gnorm: 1.27 [1 day, 0:17:51< 0:11:12] +[titan] 2025-10-05 22:52:11,813 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:52:13,994 - root - INFO - step: 39700 loss: 1.9249 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 22:52:13,994 - root - INFO - lr: 5.0064e-06 gnorm: 1.23 [1 day, 0:18:02< 0:11:01] +[titan] 2025-10-05 22:52:24,851 - root - INFO - step: 39705 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6876 +[titan] 2025-10-05 22:52:24,852 - root - INFO - lr: 5.0062e-06 gnorm: 1.27 [1 day, 0:18:12< 0:10:50] +[titan] 2025-10-05 22:52:35,763 - root - INFO - step: 39710 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6727 +[titan] 2025-10-05 22:52:35,763 - root - INFO - lr: 5.0060e-06 gnorm: 1.30 [1 day, 0:18:23< 0:10:39] +[titan] 2025-10-05 22:52:46,625 - root - INFO - step: 39715 loss: 1.8269 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2119 global_avg_mtp_loss: 1.6151 +[titan] 2025-10-05 22:52:46,625 - root - INFO - lr: 5.0058e-06 gnorm: 1.25 [1 day, 0:18:34< 0:10:28] +[titan] 2025-10-05 22:52:57,517 - root - INFO - step: 39720 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:52:57,517 - root - INFO - lr: 5.0056e-06 gnorm: 1.26 [1 day, 0:18:45< 0:10:16] +[titan] 2025-10-05 22:53:08,394 - root - INFO - step: 39725 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 22:53:08,395 - root - INFO - lr: 5.0054e-06 gnorm: 1.27 [1 day, 0:18:56< 0:10:05] +[titan] 2025-10-05 22:53:19,248 - root - INFO - step: 39730 loss: 1.8733 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:53:19,248 - root - INFO - lr: 5.0052e-06 gnorm: 1.26 [1 day, 0:19:07< 0:09:54] +[titan] 2025-10-05 22:53:30,094 - root - INFO - step: 39735 loss: 1.8701 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:53:30,094 - root - INFO - lr: 5.0050e-06 gnorm: 1.25 [1 day, 0:19:18< 0:09:43] +[titan] 2025-10-05 22:53:40,977 - root - INFO - step: 39740 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 22:53:40,977 - root - INFO - lr: 5.0048e-06 gnorm: 1.28 [1 day, 0:19:29< 0:09:32] +[titan] 2025-10-05 22:53:51,816 - root - INFO - step: 39745 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 22:53:51,816 - root - INFO - lr: 5.0046e-06 gnorm: 1.30 [1 day, 0:19:39< 0:09:21] +[titan] 2025-10-05 22:54:00,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:54:02,694 - root - INFO - step: 39750 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 22:54:02,694 - root - INFO - lr: 5.0044e-06 gnorm: 1.25 [1 day, 0:19:50< 0:09:10] +[titan] 2025-10-05 22:54:13,563 - root - INFO - step: 39755 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 22:54:13,563 - root - INFO - lr: 5.0042e-06 gnorm: 1.31 [1 day, 0:20:01< 0:08:59] +[titan] 2025-10-05 22:54:24,438 - root - INFO - step: 39760 loss: 1.8623 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6459 +[titan] 2025-10-05 22:54:24,438 - root - INFO - lr: 5.0041e-06 gnorm: 1.23 [1 day, 0:20:12< 0:08:48] +[titan] 2025-10-05 22:54:35,297 - root - INFO - step: 39765 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:54:35,297 - root - INFO - lr: 5.0039e-06 gnorm: 1.29 [1 day, 0:20:23< 0:08:37] +[titan] 2025-10-05 22:54:46,209 - root - INFO - step: 39770 loss: 1.8709 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6539 +[titan] 2025-10-05 22:54:46,209 - root - INFO - lr: 5.0037e-06 gnorm: 1.26 [1 day, 0:20:34< 0:08:26] +[titan] 2025-10-05 22:54:57,101 - root - INFO - step: 39775 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 22:54:57,101 - root - INFO - lr: 5.0036e-06 gnorm: 1.27 [1 day, 0:20:45< 0:08:15] +[titan] 2025-10-05 22:55:08,022 - root - INFO - step: 39780 loss: 1.9966 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7659 +[titan] 2025-10-05 22:55:08,023 - root - INFO - lr: 5.0034e-06 gnorm: 1.31 [1 day, 0:20:56< 0:08:04] +[titan] 2025-10-05 22:55:18,865 - root - INFO - step: 39785 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:55:18,865 - root - INFO - lr: 5.0033e-06 gnorm: 1.29 [1 day, 0:21:06< 0:07:53] +[titan] 2025-10-05 22:55:29,721 - root - INFO - step: 39790 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:55:29,721 - root - INFO - lr: 5.0031e-06 gnorm: 1.28 [1 day, 0:21:17< 0:07:42] +[titan] 2025-10-05 22:55:40,588 - root - INFO - step: 39795 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 22:55:40,588 - root - INFO - lr: 5.0030e-06 gnorm: 1.25 [1 day, 0:21:28< 0:07:31] +[titan] 2025-10-05 22:55:49,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:55:51,443 - root - INFO - step: 39800 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:55:51,443 - root - INFO - lr: 5.0028e-06 gnorm: 1.25 [1 day, 0:21:39< 0:07:20] +[titan] 2025-10-05 22:56:02,382 - root - INFO - step: 39805 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6609 +[titan] 2025-10-05 22:56:02,382 - root - INFO - lr: 5.0027e-06 gnorm: 1.21 [1 day, 0:21:50< 0:07:09] +[titan] 2025-10-05 22:56:13,242 - root - INFO - step: 39810 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:56:13,242 - root - INFO - lr: 5.0026e-06 gnorm: 1.28 [1 day, 0:22:01< 0:06:58] +[titan] 2025-10-05 22:56:24,068 - root - INFO - step: 39815 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6926 +[titan] 2025-10-05 22:56:24,068 - root - INFO - lr: 5.0024e-06 gnorm: 1.28 [1 day, 0:22:12< 0:06:47] +[titan] 2025-10-05 22:56:34,883 - root - INFO - step: 39820 loss: 1.8589 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6448 +[titan] 2025-10-05 22:56:34,883 - root - INFO - lr: 5.0023e-06 gnorm: 1.23 [1 day, 0:22:22< 0:06:36] +[titan] 2025-10-05 22:56:45,691 - root - INFO - step: 39825 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6254 +[titan] 2025-10-05 22:56:45,691 - root - INFO - lr: 5.0022e-06 gnorm: 1.24 [1 day, 0:22:33< 0:06:25] +[titan] 2025-10-05 22:56:56,537 - root - INFO - step: 39830 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:56:56,537 - root - INFO - lr: 5.0020e-06 gnorm: 1.29 [1 day, 0:22:44< 0:06:14] +[titan] 2025-10-05 22:57:07,418 - root - INFO - step: 39835 loss: 1.8289 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2110 global_avg_mtp_loss: 1.6179 +[titan] 2025-10-05 22:57:07,419 - root - INFO - lr: 5.0019e-06 gnorm: 1.25 [1 day, 0:22:55< 0:06:03] +[titan] 2025-10-05 22:57:18,260 - root - INFO - step: 39840 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6716 +[titan] 2025-10-05 22:57:18,260 - root - INFO - lr: 5.0018e-06 gnorm: 1.28 [1 day, 0:23:06< 0:05:52] +[titan] 2025-10-05 22:57:29,092 - root - INFO - step: 39845 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:57:29,092 - root - INFO - lr: 5.0017e-06 gnorm: 1.31 [1 day, 0:23:17< 0:05:41] +[titan] 2025-10-05 22:57:37,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:57:39,898 - root - INFO - step: 39850 loss: 1.8816 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 22:57:39,899 - root - INFO - lr: 5.0016e-06 gnorm: 1.24 [1 day, 0:23:27< 0:05:30] +[titan] 2025-10-05 22:57:50,741 - root - INFO - step: 39855 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 22:57:50,741 - root - INFO - lr: 5.0015e-06 gnorm: 1.32 [1 day, 0:23:38< 0:05:19] +[titan] 2025-10-05 22:58:01,598 - root - INFO - step: 39860 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 22:58:01,598 - root - INFO - lr: 5.0014e-06 gnorm: 1.33 [1 day, 0:23:49< 0:05:08] +[titan] 2025-10-05 22:58:12,433 - root - INFO - step: 39865 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6621 +[titan] 2025-10-05 22:58:12,433 - root - INFO - lr: 5.0013e-06 gnorm: 1.27 [1 day, 0:24:00< 0:04:57] +[titan] 2025-10-05 22:58:23,320 - root - INFO - step: 39870 loss: 1.8085 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2094 global_avg_mtp_loss: 1.5991 +[titan] 2025-10-05 22:58:23,320 - root - INFO - lr: 5.0012e-06 gnorm: 1.27 [1 day, 0:24:11< 0:04:46] +[titan] 2025-10-05 22:58:34,151 - root - INFO - step: 39875 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7171 +[titan] 2025-10-05 22:58:34,151 - root - INFO - lr: 5.0011e-06 gnorm: 1.29 [1 day, 0:24:22< 0:04:35] +[titan] 2025-10-05 22:58:44,982 - root - INFO - step: 39880 loss: 1.8617 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2163 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 22:58:44,982 - root - INFO - lr: 5.0010e-06 gnorm: 1.24 [1 day, 0:24:33< 0:04:24] +[titan] 2025-10-05 22:58:55,801 - root - INFO - step: 39885 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:58:55,801 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:43< 0:04:13] +[titan] 2025-10-05 22:59:06,655 - root - INFO - step: 39890 loss: 1.8466 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 22:59:06,655 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:54< 0:04:02] +[titan] 2025-10-05 22:59:17,499 - root - INFO - step: 39895 loss: 1.9303 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 22:59:17,499 - root - INFO - lr: 5.0008e-06 gnorm: 1.27 [1 day, 0:25:05< 0:03:51] +[titan] 2025-10-05 22:59:26,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:59:28,385 - root - INFO - step: 39900 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:59:28,385 - root - INFO - lr: 5.0007e-06 gnorm: 1.29 [1 day, 0:25:16< 0:03:40] +[titan] 2025-10-05 22:59:39,223 - root - INFO - step: 39905 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6730 +[titan] 2025-10-05 22:59:39,223 - root - INFO - lr: 5.0006e-06 gnorm: 1.25 [1 day, 0:25:27< 0:03:29] +[titan] 2025-10-05 22:59:50,050 - root - INFO - step: 39910 loss: 1.9026 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6824 +[titan] 2025-10-05 22:59:50,051 - root - INFO - lr: 5.0006e-06 gnorm: 1.33 [1 day, 0:25:38< 0:03:18] +[titan] 2025-10-05 23:00:00,881 - root - INFO - step: 39915 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 23:00:00,881 - root - INFO - lr: 5.0005e-06 gnorm: 1.25 [1 day, 0:25:48< 0:03:07] +[titan] 2025-10-05 23:00:11,722 - root - INFO - step: 39920 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 23:00:11,722 - root - INFO - lr: 5.0005e-06 gnorm: 1.23 [1 day, 0:25:59< 0:02:56] +[titan] 2025-10-05 23:00:22,583 - root - INFO - step: 39925 loss: 1.8682 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 23:00:22,583 - root - INFO - lr: 5.0004e-06 gnorm: 1.24 [1 day, 0:26:10< 0:02:45] +[titan] 2025-10-05 23:00:33,459 - root - INFO - step: 39930 loss: 1.8937 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 23:00:33,459 - root - INFO - lr: 5.0003e-06 gnorm: 1.28 [1 day, 0:26:21< 0:02:34] +[titan] 2025-10-05 23:00:44,397 - root - INFO - step: 39935 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 23:00:44,397 - root - INFO - lr: 5.0003e-06 gnorm: 1.37 [1 day, 0:26:32< 0:02:23] +[titan] 2025-10-05 23:00:46,760 - root - INFO - Dumping profiler traces at step 39936 +[titan] 2025-10-05 23:00:46,800 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 23:00:55,477 - root - INFO - step: 39940 loss: 1.9007 memory: 118.84GiB(85.28%) tps: 29,576 tflops: 410.32 mfu: 41.49% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 23:00:55,477 - root - INFO - lr: 5.0003e-06 gnorm: 1.22 [1 day, 0:26:43< 0:02:12] +[titan] 2025-10-05 23:01:06,304 - root - INFO - step: 39945 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 23:01:06,305 - root - INFO - lr: 5.0002e-06 gnorm: 1.26 [1 day, 0:26:54< 0:02:01] +[titan] 2025-10-05 23:01:14,966 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:01:17,145 - root - INFO - step: 39950 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6639 +[titan] 2025-10-05 23:01:17,145 - root - INFO - lr: 5.0002e-06 gnorm: 1.25 [1 day, 0:27:05< 0:01:50] +[titan] 2025-10-05 23:01:28,000 - root - INFO - step: 39955 loss: 1.8456 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2134 global_avg_mtp_loss: 1.6322 +[titan] 2025-10-05 23:01:28,000 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:16< 0:01:39] +[titan] 2025-10-05 23:01:38,823 - root - INFO - step: 39960 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 23:01:38,823 - root - INFO - lr: 5.0001e-06 gnorm: 1.26 [1 day, 0:27:26< 0:01:28] +[titan] 2025-10-05 23:01:49,702 - root - INFO - step: 39965 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6498 +[titan] 2025-10-05 23:01:49,702 - root - INFO - lr: 5.0001e-06 gnorm: 1.29 [1 day, 0:27:37< 0:01:17] +[titan] 2025-10-05 23:02:00,536 - root - INFO - step: 39970 loss: 1.8845 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 23:02:00,536 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:48< 0:01:06] +[titan] 2025-10-05 23:02:11,385 - root - INFO - step: 39975 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6632 +[titan] 2025-10-05 23:02:11,385 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:27:59< 0:00:55] +[titan] 2025-10-05 23:02:22,224 - root - INFO - step: 39980 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 23:02:22,224 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:10< 0:00:44] +[titan] 2025-10-05 23:02:33,062 - root - INFO - step: 39985 loss: 1.8577 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6423 +[titan] 2025-10-05 23:02:33,062 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:21< 0:00:33] +[titan] 2025-10-05 23:02:43,924 - root - INFO - step: 39990 loss: 1.9469 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 23:02:43,924 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:28:31< 0:00:22] +[titan] 2025-10-05 23:02:54,801 - root - INFO - step: 39995 loss: 1.8720 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6560 +[titan] 2025-10-05 23:02:54,801 - root - INFO - lr: 5.0000e-06 gnorm: 1.27 [1 day, 0:28:42< 0:00:11] +[titan] 2025-10-05 23:03:03,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:03:05,651 - root - INFO - step: 40000 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6752 +[titan] 2025-10-05 23:03:05,651 - root - INFO - lr: 5.0000e-06 gnorm: 1.24 [1 day, 0:28:53< 0:00:00] +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving a full checkpoint at last step, step 40000. +[titan] 2025-10-05 23:03:23,678 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 23:03:23,678 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 18.03 seconds. +[titan] 2025-10-05 23:03:23,678 - root - INFO - Training completed diff --git a/logs/none_99omtdbz/attempt_0/7/stderr.log b/logs/none_99omtdbz/attempt_0/7/stderr.log new file mode 100644 index 0000000000000000000000000000000000000000..a0793c334e5de378d9ccd92c2449f11ca476a1aa --- /dev/null +++ b/logs/none_99omtdbz/attempt_0/7/stderr.log @@ -0,0 +1,17257 @@ +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +OpenBLAS WARNING - could not determine the L2 cache size on this system, assuming 256k +wandb: Appending key for api.wandb.ai to your netrc file: /root/.netrc +wandb: Currently logged in as: zaydzuhri to https://api.wandb.ai. Use `wandb login --relogin` to force relogin +Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured. +[titan] 2025-10-04 22:33:16,978 - root - INFO - Starting job: default job +[titan] 2025-10-04 22:33:16,978 - root - INFO - { + "activation_checkpoint": { + "mode": "none", + "selective_ac_option": "2" + }, + "activation_offload": { + "mode": "none" + }, + "checkpoint": { + "async_mode": "disabled", + "convert_to_hf_on_save": false, + "create_seed_checkpoint": false, + "enable_checkpoint": true, + "exclude_from_loading": [], + "export_dtype": "float32", + "folder": "checkpoint", + "hf_repo_base_name": "zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000", + "hf_upload_enabled": true, + "hf_upload_format": "dcp", + "interval": 5000, + "interval_type": "steps", + "keep_latest_k": 0, + "load_step": -1, + "model_weights_only": false + }, + "comm": { + "init_timeout_seconds": 6000, + "trace_buf_size": 20000, + "train_timeout_seconds": 6000 + }, + "experimental": { + "context_parallel_degree": 1, + "context_parallel_rotate_method": "allgather", + "custom_model_path": "", + "enable_async_tensor_parallel": false, + "enable_compiled_autograd": false, + "pipeline_parallel_degree": 1, + "pipeline_parallel_microbatches": null, + "pipeline_parallel_schedule": "1F1B", + "pipeline_parallel_schedule_csv": "", + "pipeline_parallel_split_points": [] + }, + "fault_tolerance": { + "enable": false, + "group_size": 0, + "min_replica_size": 1, + "replica_id": 0 + }, + "float8": { + "enable_fsdp_float8_all_gather": false, + "force_recompute_fp8_weight_in_bwd": false, + "precompute_float8_dynamic_scale_for_fsdp": false, + "recipe_name": null + }, + "job": { + "config_file": "flame/models/fla.toml", + "description": "default job", + "dump_folder": "exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine", + "print_args": true, + "use_for_integration_test": false + }, + "lr_scheduler": { + "decay_ratio": null, + "decay_type": "cosine", + "lr_min": 0.1, + "warmup_steps": 400 + }, + "memory_estimation": { + "disable_fake_mode": false, + "enabled": false + }, + "metrics": { + "disable_color_printing": false, + "enable_tensorboard": false, + "enable_wandb": true, + "log_freq": 5, + "save_for_all_ranks": false, + "save_tb_folder": "tb" + }, + "model": { + "config": "configs/mtp_transformer_1B.json", + "converters": [], + "name": "fla", + "print_after_conversion": false, + "tokenizer_path": "fla-hub/transformer-1.3B-100B" + }, + "optimizer": { + "early_step_in_backward": false, + "eps": 1e-15, + "implementation": "fused", + "lr": 5e-05, + "name": "AdamW" + }, + "profiling": { + "enable_memory_snapshot": false, + "enable_profiling": true, + "profile_freq": 512, + "save_memory_snapshot_folder": "memory_snapshot", + "save_traces_folder": "profile_trace" + }, + "training": { + "batch_size": 16, + "compile": true, + "context_len": 4096, + "data_dir": null, + "data_files": null, + "data_parallel_replicate_degree": 1, + "data_parallel_shard_degree": -1, + "data_probs": null, + "dataset": "/root/.cache/zaydzuhri___open_math_instruct-2-text/default", + "dataset_name": "default", + "dataset_split": "train", + "deterministic": false, + "disable_loss_parallel": false, + "enable_cpu_offload": false, + "fsdp_reshard_after_forward": "default", + "gc_freq": 50, + "gradient_accumulation_steps": 1, + "max_norm": 1.0, + "mixed_precision_param": "bfloat16", + "mixed_precision_reduce": "float32", + "num_workers": 32, + "persistent_workers": false, + "pin_memory": false, + "prefetch_factor": 2, + "seed": 79, + "seq_len": 4096, + "skip_nan_inf": true, + "steps": 40000, + "streaming": false, + "tensor_parallel_degree": 1, + "varlen": false + } +} +[titan] 2025-10-04 22:33:16,978 - root - INFO - [GC] Initial GC collection. 0.00 seconds. +[titan] 2025-10-04 22:33:45,895 - root - INFO - Target Hugging Face repository for this run: zaydzuhri/mtp-math-1B-4096-batch16x1-steps40000-20251004-223345 +[titan] 2025-10-04 22:33:45,895 - root - WARNING - ENV[TORCH_NCCL_ASYNC_ERROR_HANDLING] = 1 will be overridden to 3 based on job config +[titan] 2025-10-04 22:33:45,896 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:33:45,897 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:33:45,898 - root - INFO - Peak FLOPS used for computing MFU: 9.890e+14 +[titan] 2025-10-04 22:33:45,898 - root - INFO - Building 1-D device mesh with ['dp_shard'], [8] +[titan] 2025-10-04 22:33:46,046 - root - INFO - Loading tokenizer... +[titan] 2025-10-04 22:33:46,238 - root - INFO - LlamaTokenizerFast(name_or_path='fla-hub/transformer-1.3B-100B', vocab_size=32000, model_max_length=10000000000, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '', 'eos_token': '', 'unk_token': ''}, clean_up_tokenization_spaces=False, added_tokens_decoder={ + 0: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 1: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), + 2: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), +} +) +[titan] 2025-10-04 22:33:46,239 - root - INFO - Loading dataset /root/.cache/zaydzuhri___open_math_instruct-2-text/default:default +`trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:46,239 - datasets.load - ERROR - `trust_remote_code` is not supported anymore. +Please check that the Hugging Face dataset '/root/.cache/zaydzuhri___open_math_instruct-2-text/default' isn't based on a loading script and remove `trust_remote_code`. +If the dataset is based on a loading script, please ask the dataset author to remove it and convert it to a standard format like Parquet. +[titan] 2025-10-04 22:33:47,031 - root - INFO - Dataset({ + features: ['text'], + num_rows: 21972791 +}) +[titan] 2025-10-04 22:33:47,031 - root - INFO - Shuffling the dataset with seed 79 +[titan] 2025-10-04 22:33:53,290 - root - INFO - Loading model config from configs/mtp_transformer_1B.json +[titan] 2025-10-04 22:33:53,292 - root - INFO - Building dataloader... +[titan] 2025-10-04 22:33:53,294 - root - INFO - Building model from the config +MTPTransformerConfig { + "bos_token_id": 1, + "elementwise_affine": true, + "eos_token_id": 2, + "fuse_cross_entropy": true, + "fuse_norm": true, + "fuse_swiglu": true, + "hidden_act": "swish", + "hidden_ratio": 4, + "hidden_size": 2048, + "initializer_range": 0.006, + "intermediate_size": null, + "max_position_embeddings": 8192, + "model_type": "mtp_transformer", + "n_future_tokens": 4, + "norm_eps": 1e-06, + "num_heads": 32, + "num_hidden_layers": 32, + "num_kv_heads": null, + "pad_token_id": 2, + "qk_norm": false, + "qkv_bias": false, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "transformers_version": "4.51.3", + "use_cache": true, + "use_custom_backward": false, + "vocab_size": 32000, + "window_size": null +} + +[titan] 2025-10-04 22:33:53,427 - root - INFO -  +MTPTransformerForCausalLM( + (model): MTPTransformerModel( + (embeddings): Embedding(32000, 2048, padding_idx=2) + (layers): ModuleList( + (0-27): 28 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (extra_heads): ModuleList( + (0-3): 4 x MTPTransformerBlock( + (attn_norm): RMSNorm(2048, eps=1e-06) + (attn): Attention( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=2048, bias=False) + (v_proj): Linear(in_features=2048, out_features=2048, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary): RotaryEmbedding(dim=64, base=10000.0, interleaved=False, pos_idx_in_fp32=True) + ) + (mlp_norm): RMSNorm(2048, eps=1e-06) + (mlp): GatedMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (swiglu_linear): SwiGLULinear() + ) + ) + ) + (norm): RMSNorm(2048, eps=1e-06) + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + (criterion): FusedLinearCrossEntropyLoss() +) + +[titan] 2025-10-04 22:33:53,455 - root - INFO - Compiling each block with torch.compile +[titan] 2025-10-04 22:33:53,455 - root - INFO - Compiling the embedding, norm, and lm_head layers with torch.compile +[titan] 2025-10-04 22:33:53,455 - root - INFO - Compiling the entire model with torch.compile +[titan] 2025-10-04 22:33:53,531 - root - INFO - Applied FSDP to the model +[titan] 2025-10-04 22:33:53,741 - root - INFO - CUDA memory usage for model: 0.84GiB(0.60%) +[titan] 2025-10-04 22:33:53,761 - root - INFO - Checkpointing active. Checkpoints will be loaded from and saved to exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/checkpoint +[titan] 2025-10-04 22:33:53,761 - root - INFO - Loading the checkpoint at step 0. +[titan] 2025-10-04 22:34:08,076 - root - INFO - [GC] GC collection for checkpoint loading. 0.57 seconds. +[titan] 2025-10-04 22:34:08,076 - root - INFO - Finished loading the checkpoint in 14.31 seconds. +[titan] 2025-10-04 22:34:08,077 - root - INFO - CUDA capacity: NVIDIA H200 with 139.36GiB memory +[titan] 2025-10-04 22:34:08,078 - root - WARNING - Error running lspci: [Errno 2] No such file or directory: 'lspci', fallback to use device_name +[titan] 2025-10-04 22:34:09,428 - root - INFO - ***** Running training ***** +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Training starts at step 1 +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Number of tokens per sequence = 4,096 +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Gradient Accumulation steps = 1 +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Instantaneous batch size (per device) = 16 +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Global batch size (w. parallel, distributed & accumulation) = 128 (524,288 tokens) +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Total optimization steps = 40,000 (20,971,520,000 tokens) +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Warmup steps = 400 (209,715,200 tokens) +[titan] 2025-10-04 22:34:09,429 - root - INFO -  Number of parameters = 1,775,372,288  +[titan] 2025-10-04 22:34:09,429 - root - INFO - Profiling active. Traces will be saved at exp/mtp.math.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace +[titan] 2025-10-04 22:34:47,724 - root - INFO - step: 1 loss: 12.0105 memory: 116.89GiB(83.88%) tps: 1,653 tflops: 22.93 mfu: 2.32% global_avg_ntp_loss: 2.1249 global_avg_mtp_loss: 9.8856 +[titan] 2025-10-04 22:34:47,724 - root - INFO - lr: 2.4938e-07 gnorm: 20.89 [ 0:00:39<18 days, 8:29:34] +[titan] 2025-10-04 22:34:47,724 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-04 22:34:53,090 - root - INFO - [GC] GC collection invoked by checkpointer. 0.19 seconds. +[titan] 2025-10-04 22:34:53,090 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 5.37 seconds. +[titan] 2025-10-04 22:34:53,090 - root - INFO - Synchronizing and adjusting timeout for all ProcessGroups to 1:40:00 +[titan] 2025-10-04 22:36:58,991 - root - INFO - step: 5 loss: 11.7564 memory: 118.84GiB(85.28%) tps: 1,997 tflops: 27.71 mfu: 2.80% global_avg_ntp_loss: 2.0697 global_avg_mtp_loss: 9.6867 +[titan] 2025-10-04 22:36:58,992 - root - INFO - lr: 7.4813e-07 gnorm: 19.96 [ 0:02:50<15 days, 19:45:27] +[titan] 2025-10-04 22:37:09,851 - root - INFO - step: 10 loss: 11.2335 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 1.9192 global_avg_mtp_loss: 9.3143 +[titan] 2025-10-04 22:37:09,851 - root - INFO - lr: 1.3716e-06 gnorm: 18.16 [ 0:03:01<8 days, 9:55:04] +[titan] 2025-10-04 22:37:20,642 - root - INFO - step: 15 loss: 10.8309 memory: 118.84GiB(85.28%) tps: 30,368 tflops: 421.30 mfu: 42.60% global_avg_ntp_loss: 1.7960 global_avg_mtp_loss: 9.0349 +[titan] 2025-10-04 22:37:20,642 - root - INFO - lr: 1.9950e-06 gnorm: 10.62 [ 0:03:12<5 days, 22:35:06] +[titan] 2025-10-04 22:37:31,508 - root - INFO - step: 20 loss: 10.3172 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 1.6641 global_avg_mtp_loss: 8.6531 +[titan] 2025-10-04 22:37:31,508 - root - INFO - lr: 2.6185e-06 gnorm: 8.22 [ 0:03:23<4 days, 16:57:31] +[titan] 2025-10-04 22:37:42,328 - root - INFO - step: 25 loss: 9.9294 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 1.5801 global_avg_mtp_loss: 8.3492 +[titan] 2025-10-04 22:37:42,328 - root - INFO - lr: 3.2419e-06 gnorm: 7.10 [ 0:03:34<3 days, 23:09:41] +[titan] 2025-10-04 22:37:53,161 - root - INFO - step: 30 loss: 9.5763 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 1.4997 global_avg_mtp_loss: 8.0766 +[titan] 2025-10-04 22:37:53,161 - root - INFO - lr: 3.8653e-06 gnorm: 6.23 [ 0:03:45<3 days, 11:18:00] +[titan] 2025-10-04 22:38:04,056 - root - INFO - step: 35 loss: 9.3711 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 1.4603 global_avg_mtp_loss: 7.9108 +[titan] 2025-10-04 22:38:04,056 - root - INFO - lr: 4.4888e-06 gnorm: 6.20 [ 0:03:55<3 days, 2:50:48] +[titan] 2025-10-04 22:38:14,933 - root - INFO - step: 40 loss: 9.0179 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 1.3853 global_avg_mtp_loss: 7.6325 +[titan] 2025-10-04 22:38:14,933 - root - INFO - lr: 5.1122e-06 gnorm: 5.60 [ 0:04:06<2 days, 20:30:03] +[titan] 2025-10-04 22:38:25,789 - root - INFO - step: 45 loss: 8.7524 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 1.3406 global_avg_mtp_loss: 7.4118 +[titan] 2025-10-04 22:38:25,789 - root - INFO - lr: 5.7357e-06 gnorm: 5.43 [ 0:04:17<2 days, 15:33:33] +[titan] 2025-10-04 22:38:34,548 - root - INFO - [GC] Peforming periodical GC collection. 0.05 seconds. +[titan] 2025-10-04 22:38:36,745 - root - INFO - step: 50 loss: 8.5439 memory: 118.84GiB(85.28%) tps: 29,909 tflops: 414.94 mfu: 41.96% global_avg_ntp_loss: 1.3050 global_avg_mtp_loss: 7.2389 +[titan] 2025-10-04 22:38:36,746 - root - INFO - lr: 6.3591e-06 gnorm: 5.74 [ 0:04:28<2 days, 11:37:40] +[titan] 2025-10-04 22:38:47,618 - root - INFO - step: 55 loss: 8.3158 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 1.2609 global_avg_mtp_loss: 7.0549 +[titan] 2025-10-04 22:38:47,619 - root - INFO - lr: 6.9825e-06 gnorm: 5.52 [ 0:04:39<2 days, 8:23:37] +[titan] 2025-10-04 22:38:58,482 - root - INFO - step: 60 loss: 8.2006 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 1.2373 global_avg_mtp_loss: 6.9633 +[titan] 2025-10-04 22:38:58,482 - root - INFO - lr: 7.6060e-06 gnorm: 5.72 [ 0:04:50<2 days, 5:41:47] +[titan] 2025-10-04 22:39:09,360 - root - INFO - step: 65 loss: 8.1393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 1.2182 global_avg_mtp_loss: 6.9211 +[titan] 2025-10-04 22:39:09,360 - root - INFO - lr: 8.2294e-06 gnorm: 5.66 [ 0:05:01<2 days, 3:24:58] +[titan] 2025-10-04 22:39:20,248 - root - INFO - step: 70 loss: 7.7608 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 1.1495 global_avg_mtp_loss: 6.6112 +[titan] 2025-10-04 22:39:20,248 - root - INFO - lr: 8.8529e-06 gnorm: 5.54 [ 0:05:12<2 days, 1:27:46] +[titan] 2025-10-04 22:39:31,185 - root - INFO - step: 75 loss: 7.6862 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 1.1395 global_avg_mtp_loss: 6.5467 +[titan] 2025-10-04 22:39:31,185 - root - INFO - lr: 9.4763e-06 gnorm: 6.04 [ 0:05:23<1 day, 23:46:36] +[titan] 2025-10-04 22:39:42,063 - root - INFO - step: 80 loss: 7.4352 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 1.0959 global_avg_mtp_loss: 6.3393 +[titan] 2025-10-04 22:39:42,063 - root - INFO - lr: 1.0100e-05 gnorm: 5.61 [ 0:05:33<1 day, 22:17:33] +[titan] 2025-10-04 22:39:52,933 - root - INFO - step: 85 loss: 7.3232 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 1.0671 global_avg_mtp_loss: 6.2561 +[titan] 2025-10-04 22:39:52,934 - root - INFO - lr: 1.0723e-05 gnorm: 5.89 [ 0:05:44<1 day, 20:58:55] +[titan] 2025-10-04 22:40:03,808 - root - INFO - step: 90 loss: 7.1910 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 1.0545 global_avg_mtp_loss: 6.1364 +[titan] 2025-10-04 22:40:03,808 - root - INFO - lr: 1.1347e-05 gnorm: 6.24 [ 0:05:55<1 day, 19:49:01] +[titan] 2025-10-04 22:40:14,668 - root - INFO - step: 95 loss: 7.0637 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 1.0179 global_avg_mtp_loss: 6.0458 +[titan] 2025-10-04 22:40:14,668 - root - INFO - lr: 1.1970e-05 gnorm: 5.80 [ 0:06:06<1 day, 18:46:22] +[titan] 2025-10-04 22:40:23,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:40:25,567 - root - INFO - step: 100 loss: 7.0183 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 1.0144 global_avg_mtp_loss: 6.0039 +[titan] 2025-10-04 22:40:25,567 - root - INFO - lr: 1.2594e-05 gnorm: 5.49 [ 0:06:17<1 day, 17:50:13] +[titan] 2025-10-04 22:40:36,554 - root - INFO - step: 105 loss: 6.7845 memory: 118.84GiB(85.28%) tps: 29,825 tflops: 413.77 mfu: 41.84% global_avg_ntp_loss: 0.9684 global_avg_mtp_loss: 5.8161 +[titan] 2025-10-04 22:40:36,554 - root - INFO - lr: 1.3217e-05 gnorm: 5.66 [ 0:06:28<1 day, 16:59:58] +[titan] 2025-10-04 22:40:47,440 - root - INFO - step: 110 loss: 6.7610 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.9616 global_avg_mtp_loss: 5.7993 +[titan] 2025-10-04 22:40:47,440 - root - INFO - lr: 1.3840e-05 gnorm: 5.76 [ 0:06:39<1 day, 16:13:38] +[titan] 2025-10-04 22:40:58,316 - root - INFO - step: 115 loss: 6.7822 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.9526 global_avg_mtp_loss: 5.8296 +[titan] 2025-10-04 22:40:58,316 - root - INFO - lr: 1.4464e-05 gnorm: 5.41 [ 0:06:50<1 day, 15:31:16] +[titan] 2025-10-04 22:41:09,192 - root - INFO - step: 120 loss: 6.5921 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.9190 global_avg_mtp_loss: 5.6731 +[titan] 2025-10-04 22:41:09,193 - root - INFO - lr: 1.5087e-05 gnorm: 5.18 [ 0:07:01<1 day, 14:52:25] +[titan] 2025-10-04 22:41:20,086 - root - INFO - step: 125 loss: 6.3759 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.8880 global_avg_mtp_loss: 5.4878 +[titan] 2025-10-04 22:41:20,086 - root - INFO - lr: 1.5711e-05 gnorm: 4.91 [ 0:07:11<1 day, 14:16:46] +[titan] 2025-10-04 22:41:31,181 - root - INFO - step: 130 loss: 6.3566 memory: 118.84GiB(85.28%) tps: 29,536 tflops: 409.77 mfu: 41.43% global_avg_ntp_loss: 0.8781 global_avg_mtp_loss: 5.4786 +[titan] 2025-10-04 22:41:31,181 - root - INFO - lr: 1.6334e-05 gnorm: 4.37 [ 0:07:23<1 day, 13:44:51] +[titan] 2025-10-04 22:41:42,074 - root - INFO - step: 135 loss: 6.3044 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.8713 global_avg_mtp_loss: 5.4331 +[titan] 2025-10-04 22:41:42,075 - root - INFO - lr: 1.6958e-05 gnorm: 4.29 [ 0:07:33<1 day, 13:14:19] +[titan] 2025-10-04 22:41:52,936 - root - INFO - step: 140 loss: 6.3158 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.8632 global_avg_mtp_loss: 5.4526 +[titan] 2025-10-04 22:41:52,936 - root - INFO - lr: 1.7581e-05 gnorm: 3.03 [ 0:07:44<1 day, 12:45:47] +[titan] 2025-10-04 22:42:03,814 - root - INFO - step: 145 loss: 6.2266 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.8508 global_avg_mtp_loss: 5.3758 +[titan] 2025-10-04 22:42:03,815 - root - INFO - lr: 1.8204e-05 gnorm: 3.86 [ 0:07:55<1 day, 12:19:17] +[titan] 2025-10-04 22:42:12,515 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:42:14,709 - root - INFO - step: 150 loss: 6.0872 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.8237 global_avg_mtp_loss: 5.2635 +[titan] 2025-10-04 22:42:14,710 - root - INFO - lr: 1.8828e-05 gnorm: 3.31 [ 0:08:06<1 day, 11:54:37] +[titan] 2025-10-04 22:42:25,613 - root - INFO - step: 155 loss: 6.0870 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.8286 global_avg_mtp_loss: 5.2584 +[titan] 2025-10-04 22:42:25,613 - root - INFO - lr: 1.9451e-05 gnorm: 3.04 [ 0:08:17<1 day, 11:31:34] +[titan] 2025-10-04 22:42:36,528 - root - INFO - step: 160 loss: 5.9733 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.8032 global_avg_mtp_loss: 5.1701 +[titan] 2025-10-04 22:42:36,529 - root - INFO - lr: 2.0075e-05 gnorm: 3.06 [ 0:08:28<1 day, 11:09:59] +[titan] 2025-10-04 22:42:47,448 - root - INFO - step: 165 loss: 5.8683 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.7907 global_avg_mtp_loss: 5.0776 +[titan] 2025-10-04 22:42:47,448 - root - INFO - lr: 2.0698e-05 gnorm: 3.39 [ 0:08:39<1 day, 10:49:44] +[titan] 2025-10-04 22:42:58,343 - root - INFO - step: 170 loss: 5.8536 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.7847 global_avg_mtp_loss: 5.0689 +[titan] 2025-10-04 22:42:58,343 - root - INFO - lr: 2.1322e-05 gnorm: 2.80 [ 0:08:50<1 day, 10:30:33] +[titan] 2025-10-04 22:43:09,215 - root - INFO - step: 175 loss: 5.7812 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.7716 global_avg_mtp_loss: 5.0096 +[titan] 2025-10-04 22:43:09,216 - root - INFO - lr: 2.1945e-05 gnorm: 4.02 [ 0:09:01<1 day, 10:12:23] +[titan] 2025-10-04 22:43:20,097 - root - INFO - step: 180 loss: 5.7994 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.7711 global_avg_mtp_loss: 5.0283 +[titan] 2025-10-04 22:43:20,098 - root - INFO - lr: 2.2569e-05 gnorm: 3.36 [ 0:09:11<1 day, 9:55:14] +[titan] 2025-10-04 22:43:31,003 - root - INFO - step: 185 loss: 5.6617 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9134 +[titan] 2025-10-04 22:43:31,003 - root - INFO - lr: 2.3192e-05 gnorm: 2.73 [ 0:09:22<1 day, 9:39:06] +[titan] 2025-10-04 22:43:41,902 - root - INFO - step: 190 loss: 5.6564 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.7483 global_avg_mtp_loss: 4.9080 +[titan] 2025-10-04 22:43:41,903 - root - INFO - lr: 2.3815e-05 gnorm: 3.17 [ 0:09:33<1 day, 9:23:47] +[titan] 2025-10-04 22:43:52,788 - root - INFO - step: 195 loss: 5.6643 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.7475 global_avg_mtp_loss: 4.9168 +[titan] 2025-10-04 22:43:52,788 - root - INFO - lr: 2.4439e-05 gnorm: 2.43 [ 0:09:44<1 day, 9:09:11] +[titan] 2025-10-04 22:44:01,483 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:44:03,675 - root - INFO - step: 200 loss: 5.6189 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.7360 global_avg_mtp_loss: 4.8830 +[titan] 2025-10-04 22:44:03,675 - root - INFO - lr: 2.5062e-05 gnorm: 3.47 [ 0:09:55<1 day, 8:55:19] +[titan] 2025-10-04 22:44:14,559 - root - INFO - step: 205 loss: 5.5215 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.7213 global_avg_mtp_loss: 4.8002 +[titan] 2025-10-04 22:44:14,559 - root - INFO - lr: 2.5686e-05 gnorm: 3.09 [ 0:10:06<1 day, 8:42:06] +[titan] 2025-10-04 22:44:25,433 - root - INFO - step: 210 loss: 5.5044 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.7198 global_avg_mtp_loss: 4.7846 +[titan] 2025-10-04 22:44:25,433 - root - INFO - lr: 2.6309e-05 gnorm: 2.66 [ 0:10:17<1 day, 8:29:29] +[titan] 2025-10-04 22:44:36,338 - root - INFO - step: 215 loss: 5.4728 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.7115 global_avg_mtp_loss: 4.7613 +[titan] 2025-10-04 22:44:36,338 - root - INFO - lr: 2.6933e-05 gnorm: 2.45 [ 0:10:28<1 day, 8:17:33] +[titan] 2025-10-04 22:44:47,225 - root - INFO - step: 220 loss: 5.3310 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.6944 global_avg_mtp_loss: 4.6366 +[titan] 2025-10-04 22:44:47,225 - root - INFO - lr: 2.7556e-05 gnorm: 2.66 [ 0:10:39<1 day, 8:06:05] +[titan] 2025-10-04 22:44:58,124 - root - INFO - step: 225 loss: 5.3739 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6934 global_avg_mtp_loss: 4.6805 +[titan] 2025-10-04 22:44:58,125 - root - INFO - lr: 2.8180e-05 gnorm: 2.95 [ 0:10:50<1 day, 7:55:09] +[titan] 2025-10-04 22:45:09,004 - root - INFO - step: 230 loss: 5.4216 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.7014 global_avg_mtp_loss: 4.7202 +[titan] 2025-10-04 22:45:09,004 - root - INFO - lr: 2.8803e-05 gnorm: 2.60 [ 0:11:00<1 day, 7:44:38] +[titan] 2025-10-04 22:45:19,907 - root - INFO - step: 235 loss: 5.3090 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.6909 global_avg_mtp_loss: 4.6180 +[titan] 2025-10-04 22:45:19,907 - root - INFO - lr: 2.9426e-05 gnorm: 2.68 [ 0:11:11<1 day, 7:34:37] +[titan] 2025-10-04 22:45:30,796 - root - INFO - step: 240 loss: 5.2690 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.6785 global_avg_mtp_loss: 4.5905 +[titan] 2025-10-04 22:45:30,796 - root - INFO - lr: 3.0050e-05 gnorm: 2.38 [ 0:11:22<1 day, 7:24:59] +[titan] 2025-10-04 22:45:41,709 - root - INFO - step: 245 loss: 5.1965 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.6691 global_avg_mtp_loss: 4.5274 +[titan] 2025-10-04 22:45:41,710 - root - INFO - lr: 3.0673e-05 gnorm: 2.47 [ 0:11:33<1 day, 7:15:48] +[titan] 2025-10-04 22:45:50,403 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:45:52,597 - root - INFO - step: 250 loss: 5.1858 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6654 global_avg_mtp_loss: 4.5204 +[titan] 2025-10-04 22:45:52,597 - root - INFO - lr: 3.1297e-05 gnorm: 3.00 [ 0:11:44<1 day, 7:06:54] +[titan] 2025-10-04 22:46:03,496 - root - INFO - step: 255 loss: 5.1706 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.6625 global_avg_mtp_loss: 4.5081 +[titan] 2025-10-04 22:46:03,496 - root - INFO - lr: 3.1920e-05 gnorm: 2.61 [ 0:11:55<1 day, 6:58:22] +[titan] 2025-10-04 22:46:14,369 - root - INFO - step: 260 loss: 5.1473 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.6607 global_avg_mtp_loss: 4.4865 +[titan] 2025-10-04 22:46:14,369 - root - INFO - lr: 3.2544e-05 gnorm: 2.39 [ 0:12:06<1 day, 6:50:06] +[titan] 2025-10-04 22:46:25,252 - root - INFO - step: 265 loss: 5.1300 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.6565 global_avg_mtp_loss: 4.4735 +[titan] 2025-10-04 22:46:25,253 - root - INFO - lr: 3.3167e-05 gnorm: 2.29 [ 0:12:17<1 day, 6:42:10] +[titan] 2025-10-04 22:46:36,152 - root - INFO - step: 270 loss: 5.1579 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.6598 global_avg_mtp_loss: 4.4981 +[titan] 2025-10-04 22:46:36,152 - root - INFO - lr: 3.3791e-05 gnorm: 2.51 [ 0:12:28<1 day, 6:34:33] +[titan] 2025-10-04 22:46:47,010 - root - INFO - step: 275 loss: 5.0167 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.6398 global_avg_mtp_loss: 4.3769 +[titan] 2025-10-04 22:46:47,011 - root - INFO - lr: 3.4414e-05 gnorm: 2.10 [ 0:12:38<1 day, 6:27:07] +[titan] 2025-10-04 22:46:57,896 - root - INFO - step: 280 loss: 5.0898 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.6486 global_avg_mtp_loss: 4.4413 +[titan] 2025-10-04 22:46:57,896 - root - INFO - lr: 3.5037e-05 gnorm: 3.07 [ 0:12:49<1 day, 6:19:59] +[titan] 2025-10-04 22:47:08,770 - root - INFO - step: 285 loss: 5.1105 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.6521 global_avg_mtp_loss: 4.4584 +[titan] 2025-10-04 22:47:08,770 - root - INFO - lr: 3.5661e-05 gnorm: 2.23 [ 0:13:00<1 day, 6:13:05] +[titan] 2025-10-04 22:47:19,662 - root - INFO - step: 290 loss: 5.0807 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6456 global_avg_mtp_loss: 4.4352 +[titan] 2025-10-04 22:47:19,662 - root - INFO - lr: 3.6284e-05 gnorm: 2.82 [ 0:13:11<1 day, 6:06:28] +[titan] 2025-10-04 22:47:30,549 - root - INFO - step: 295 loss: 5.0464 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.6427 global_avg_mtp_loss: 4.4037 +[titan] 2025-10-04 22:47:30,550 - root - INFO - lr: 3.6908e-05 gnorm: 2.35 [ 0:13:22<1 day, 6:00:03] +[titan] 2025-10-04 22:47:39,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:47:41,466 - root - INFO - step: 300 loss: 5.1119 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.6529 global_avg_mtp_loss: 4.4589 +[titan] 2025-10-04 22:47:41,466 - root - INFO - lr: 3.7531e-05 gnorm: 2.72 [ 0:13:33<1 day, 5:53:54] +[titan] 2025-10-04 22:47:52,331 - root - INFO - step: 305 loss: 4.9831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.6338 global_avg_mtp_loss: 4.3492 +[titan] 2025-10-04 22:47:52,331 - root - INFO - lr: 3.8155e-05 gnorm: 2.81 [ 0:13:44<1 day, 5:47:50] +[titan] 2025-10-04 22:48:03,188 - root - INFO - step: 310 loss: 4.9896 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.6364 global_avg_mtp_loss: 4.3532 +[titan] 2025-10-04 22:48:03,188 - root - INFO - lr: 3.8778e-05 gnorm: 2.39 [ 0:13:55<1 day, 5:41:56] +[titan] 2025-10-04 22:48:14,051 - root - INFO - step: 315 loss: 4.8865 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.6207 global_avg_mtp_loss: 4.2658 +[titan] 2025-10-04 22:48:14,051 - root - INFO - lr: 3.9401e-05 gnorm: 3.11 [ 0:14:05<1 day, 5:36:14] +[titan] 2025-10-04 22:48:24,948 - root - INFO - step: 320 loss: 4.9416 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.6290 global_avg_mtp_loss: 4.3126 +[titan] 2025-10-04 22:48:24,948 - root - INFO - lr: 4.0025e-05 gnorm: 2.57 [ 0:14:16<1 day, 5:30:47] +[titan] 2025-10-04 22:48:35,879 - root - INFO - step: 325 loss: 4.8914 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.6229 global_avg_mtp_loss: 4.2686 +[titan] 2025-10-04 22:48:35,879 - root - INFO - lr: 4.0648e-05 gnorm: 2.22 [ 0:14:27<1 day, 5:25:34] +[titan] 2025-10-04 22:48:46,771 - root - INFO - step: 330 loss: 4.8494 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.6146 global_avg_mtp_loss: 4.2348 +[titan] 2025-10-04 22:48:46,771 - root - INFO - lr: 4.1272e-05 gnorm: 2.17 [ 0:14:38<1 day, 5:20:25] +[titan] 2025-10-04 22:48:57,658 - root - INFO - step: 335 loss: 4.9431 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.6274 global_avg_mtp_loss: 4.3157 +[titan] 2025-10-04 22:48:57,658 - root - INFO - lr: 4.1895e-05 gnorm: 2.41 [ 0:14:49<1 day, 5:15:24] +[titan] 2025-10-04 22:49:08,546 - root - INFO - step: 340 loss: 4.8429 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.6110 global_avg_mtp_loss: 4.2319 +[titan] 2025-10-04 22:49:08,546 - root - INFO - lr: 4.2519e-05 gnorm: 2.38 [ 0:15:00<1 day, 5:10:32] +[titan] 2025-10-04 22:49:19,437 - root - INFO - step: 345 loss: 4.7699 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.6044 global_avg_mtp_loss: 4.1656 +[titan] 2025-10-04 22:49:19,437 - root - INFO - lr: 4.3142e-05 gnorm: 2.47 [ 0:15:11<1 day, 5:05:49] +[titan] 2025-10-04 22:49:28,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:49:30,329 - root - INFO - step: 350 loss: 4.8354 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.6129 global_avg_mtp_loss: 4.2225 +[titan] 2025-10-04 22:49:30,329 - root - INFO - lr: 4.3766e-05 gnorm: 2.30 [ 0:15:22<1 day, 5:01:13] +[titan] 2025-10-04 22:49:41,264 - root - INFO - step: 355 loss: 4.8409 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.6123 global_avg_mtp_loss: 4.2286 +[titan] 2025-10-04 22:49:41,264 - root - INFO - lr: 4.4389e-05 gnorm: 2.44 [ 0:15:33<1 day, 4:56:50] +[titan] 2025-10-04 22:49:52,147 - root - INFO - step: 360 loss: 4.6777 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.5902 global_avg_mtp_loss: 4.0875 +[titan] 2025-10-04 22:49:52,148 - root - INFO - lr: 4.5012e-05 gnorm: 1.96 [ 0:15:44<1 day, 4:52:28] +[titan] 2025-10-04 22:50:03,033 - root - INFO - step: 365 loss: 4.8152 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.6116 global_avg_mtp_loss: 4.2037 +[titan] 2025-10-04 22:50:03,033 - root - INFO - lr: 4.5636e-05 gnorm: 2.14 [ 0:15:54<1 day, 4:48:13] +[titan] 2025-10-04 22:50:13,908 - root - INFO - step: 370 loss: 4.7797 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.6024 global_avg_mtp_loss: 4.1773 +[titan] 2025-10-04 22:50:13,908 - root - INFO - lr: 4.6259e-05 gnorm: 2.37 [ 0:16:05<1 day, 4:44:04] +[titan] 2025-10-04 22:50:24,783 - root - INFO - step: 375 loss: 4.6716 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.5906 global_avg_mtp_loss: 4.0810 +[titan] 2025-10-04 22:50:24,783 - root - INFO - lr: 4.6883e-05 gnorm: 2.26 [ 0:16:16<1 day, 4:40:01] +[titan] 2025-10-04 22:50:35,652 - root - INFO - step: 380 loss: 4.7162 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.5950 global_avg_mtp_loss: 4.1212 +[titan] 2025-10-04 22:50:35,652 - root - INFO - lr: 4.7506e-05 gnorm: 2.15 [ 0:16:27<1 day, 4:36:03] +[titan] 2025-10-04 22:50:46,574 - root - INFO - step: 385 loss: 4.8016 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.6054 global_avg_mtp_loss: 4.1962 +[titan] 2025-10-04 22:50:46,574 - root - INFO - lr: 4.8130e-05 gnorm: 2.50 [ 0:16:38<1 day, 4:32:17] +[titan] 2025-10-04 22:50:57,443 - root - INFO - step: 390 loss: 4.7078 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.5929 global_avg_mtp_loss: 4.1150 +[titan] 2025-10-04 22:50:57,444 - root - INFO - lr: 4.8753e-05 gnorm: 2.00 [ 0:16:49<1 day, 4:28:31] +[titan] 2025-10-04 22:51:08,305 - root - INFO - step: 395 loss: 4.6384 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.5834 global_avg_mtp_loss: 4.0551 +[titan] 2025-10-04 22:51:08,305 - root - INFO - lr: 4.9377e-05 gnorm: 2.37 [ 0:17:00<1 day, 4:24:49] +[titan] 2025-10-04 22:51:16,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:51:19,152 - root - INFO - step: 400 loss: 4.6918 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.5928 global_avg_mtp_loss: 4.0990 +[titan] 2025-10-04 22:51:19,152 - root - INFO - lr: 5.0000e-05 gnorm: 2.36 [ 0:17:11<1 day, 4:21:12] +[titan] 2025-10-04 22:51:30,025 - root - INFO - step: 405 loss: 4.6284 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.5843 global_avg_mtp_loss: 4.0441 +[titan] 2025-10-04 22:51:30,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.37 [ 0:17:21<1 day, 4:17:42] +[titan] 2025-10-04 22:51:40,903 - root - INFO - step: 410 loss: 4.5757 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.5764 global_avg_mtp_loss: 3.9993 +[titan] 2025-10-04 22:51:40,903 - root - INFO - lr: 5.0000e-05 gnorm: 2.16 [ 0:17:32<1 day, 4:14:17] +[titan] 2025-10-04 22:51:51,757 - root - INFO - step: 415 loss: 4.6798 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.5875 global_avg_mtp_loss: 4.0923 +[titan] 2025-10-04 22:51:51,758 - root - INFO - lr: 5.0000e-05 gnorm: 2.18 [ 0:17:43<1 day, 4:10:55] +[titan] 2025-10-04 22:52:02,632 - root - INFO - step: 420 loss: 4.6984 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.5914 global_avg_mtp_loss: 4.1070 +[titan] 2025-10-04 22:52:02,632 - root - INFO - lr: 5.0000e-05 gnorm: 2.08 [ 0:17:54<1 day, 4:07:39] +[titan] 2025-10-04 22:52:13,523 - root - INFO - step: 425 loss: 4.6583 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.5870 global_avg_mtp_loss: 4.0713 +[titan] 2025-10-04 22:52:13,523 - root - INFO - lr: 5.0000e-05 gnorm: 1.97 [ 0:18:05<1 day, 4:04:29] +[titan] 2025-10-04 22:52:24,408 - root - INFO - step: 430 loss: 4.5843 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.5750 global_avg_mtp_loss: 4.0093 +[titan] 2025-10-04 22:52:24,408 - root - INFO - lr: 5.0000e-05 gnorm: 2.22 [ 0:18:16<1 day, 4:01:23] +[titan] 2025-10-04 22:52:35,258 - root - INFO - step: 435 loss: 4.5321 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.5697 global_avg_mtp_loss: 3.9625 +[titan] 2025-10-04 22:52:35,258 - root - INFO - lr: 5.0000e-05 gnorm: 2.13 [ 0:18:27<1 day, 3:58:18] +[titan] 2025-10-04 22:52:46,145 - root - INFO - step: 440 loss: 4.5606 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.5730 global_avg_mtp_loss: 3.9875 +[titan] 2025-10-04 22:52:46,146 - root - INFO - lr: 5.0000e-05 gnorm: 2.40 [ 0:18:38<1 day, 3:55:20] +[titan] 2025-10-04 22:52:57,025 - root - INFO - step: 445 loss: 4.5406 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.5687 global_avg_mtp_loss: 3.9718 +[titan] 2025-10-04 22:52:57,025 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:18:48<1 day, 3:52:25] +[titan] 2025-10-04 22:53:05,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:53:07,904 - root - INFO - step: 450 loss: 4.5707 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.5740 global_avg_mtp_loss: 3.9967 +[titan] 2025-10-04 22:53:07,904 - root - INFO - lr: 5.0000e-05 gnorm: 2.34 [ 0:18:59<1 day, 3:49:33] +[titan] 2025-10-04 22:53:18,769 - root - INFO - step: 455 loss: 4.4743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.5620 global_avg_mtp_loss: 3.9123 +[titan] 2025-10-04 22:53:18,770 - root - INFO - lr: 5.0000e-05 gnorm: 2.20 [ 0:19:10<1 day, 3:46:44] +[titan] 2025-10-04 22:53:29,609 - root - INFO - step: 460 loss: 4.4303 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8761 +[titan] 2025-10-04 22:53:29,609 - root - INFO - lr: 5.0000e-05 gnorm: 2.25 [ 0:19:21<1 day, 3:43:56] +[titan] 2025-10-04 22:53:40,497 - root - INFO - step: 465 loss: 4.4283 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.5552 global_avg_mtp_loss: 3.8731 +[titan] 2025-10-04 22:53:40,497 - root - INFO - lr: 5.0000e-05 gnorm: 1.84 [ 0:19:32<1 day, 3:41:16] +[titan] 2025-10-04 22:53:51,344 - root - INFO - step: 470 loss: 4.4176 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.5543 global_avg_mtp_loss: 3.8632 +[titan] 2025-10-04 22:53:51,344 - root - INFO - lr: 5.0000e-05 gnorm: 2.15 [ 0:19:43<1 day, 3:38:36] +[titan] 2025-10-04 22:54:02,202 - root - INFO - step: 475 loss: 4.4882 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.5655 global_avg_mtp_loss: 3.9227 +[titan] 2025-10-04 22:54:02,202 - root - INFO - lr: 5.0000e-05 gnorm: 1.78 [ 0:19:54<1 day, 3:35:59] +[titan] 2025-10-04 22:54:13,066 - root - INFO - step: 480 loss: 4.4600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.5572 global_avg_mtp_loss: 3.9028 +[titan] 2025-10-04 22:54:13,066 - root - INFO - lr: 5.0000e-05 gnorm: 2.00 [ 0:20:04<1 day, 3:33:26] +[titan] 2025-10-04 22:54:23,913 - root - INFO - step: 485 loss: 4.3781 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.5484 global_avg_mtp_loss: 3.8297 +[titan] 2025-10-04 22:54:23,913 - root - INFO - lr: 4.9999e-05 gnorm: 1.60 [ 0:20:15<1 day, 3:30:55] +[titan] 2025-10-04 22:54:34,742 - root - INFO - step: 490 loss: 4.4068 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.5524 global_avg_mtp_loss: 3.8544 +[titan] 2025-10-04 22:54:34,742 - root - INFO - lr: 4.9999e-05 gnorm: 2.19 [ 0:20:26<1 day, 3:28:25] +[titan] 2025-10-04 22:54:45,647 - root - INFO - step: 495 loss: 4.3459 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.5461 global_avg_mtp_loss: 3.7998 +[titan] 2025-10-04 22:54:45,647 - root - INFO - lr: 4.9999e-05 gnorm: 1.79 [ 0:20:37<1 day, 3:26:03] +[titan] 2025-10-04 22:54:54,303 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:54:56,481 - root - INFO - step: 500 loss: 4.5195 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.5664 global_avg_mtp_loss: 3.9531 +[titan] 2025-10-04 22:54:56,481 - root - INFO - lr: 4.9999e-05 gnorm: 1.81 [ 0:20:48<1 day, 3:23:39] +[titan] 2025-10-04 22:55:07,316 - root - INFO - step: 505 loss: 4.3727 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.5468 global_avg_mtp_loss: 3.8259 +[titan] 2025-10-04 22:55:07,316 - root - INFO - lr: 4.9999e-05 gnorm: 1.99 [ 0:20:59<1 day, 3:21:18] +[titan] 2025-10-04 22:55:18,908 - root - INFO - step: 510 loss: 4.3913 memory: 118.84GiB(85.28%) tps: 28,268 tflops: 392.18 mfu: 39.65% global_avg_ntp_loss: 0.5477 global_avg_mtp_loss: 3.8435 +[titan] 2025-10-04 22:55:18,908 - root - INFO - lr: 4.9999e-05 gnorm: 1.64 [ 0:21:10<1 day, 3:19:58] +[titan] 2025-10-04 22:55:23,598 - root - INFO - Dumping profiler traces at step 512 +[titan] 2025-10-04 22:55:23,636 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 22:55:30,219 - root - INFO - step: 515 loss: 4.3744 memory: 118.84GiB(85.28%) tps: 28,972 tflops: 401.94 mfu: 40.64% global_avg_ntp_loss: 0.5458 global_avg_mtp_loss: 3.8286 +[titan] 2025-10-04 22:55:30,219 - root - INFO - lr: 4.9999e-05 gnorm: 1.67 [ 0:21:22<1 day, 3:18:17] +[titan] 2025-10-04 22:55:41,134 - root - INFO - step: 520 loss: 4.3427 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.5439 global_avg_mtp_loss: 3.7988 +[titan] 2025-10-04 22:55:41,135 - root - INFO - lr: 4.9999e-05 gnorm: 2.16 [ 0:21:33<1 day, 3:16:08] +[titan] 2025-10-04 22:55:52,306 - root - INFO - step: 525 loss: 4.3706 memory: 118.84GiB(85.28%) tps: 29,331 tflops: 406.93 mfu: 41.15% global_avg_ntp_loss: 0.5472 global_avg_mtp_loss: 3.8234 +[titan] 2025-10-04 22:55:52,307 - root - INFO - lr: 4.9999e-05 gnorm: 1.88 [ 0:21:44<1 day, 3:14:21] +[titan] 2025-10-04 22:56:03,131 - root - INFO - step: 530 loss: 4.3726 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.5471 global_avg_mtp_loss: 3.8256 +[titan] 2025-10-04 22:56:03,131 - root - INFO - lr: 4.9999e-05 gnorm: 2.18 [ 0:21:54<1 day, 3:12:10] +[titan] 2025-10-04 22:56:13,930 - root - INFO - step: 535 loss: 4.4086 memory: 118.84GiB(85.28%) tps: 30,344 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.5498 global_avg_mtp_loss: 3.8588 +[titan] 2025-10-04 22:56:13,930 - root - INFO - lr: 4.9999e-05 gnorm: 1.95 [ 0:22:05<1 day, 3:09:59] +[titan] 2025-10-04 22:56:24,765 - root - INFO - step: 540 loss: 4.4155 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.5521 global_avg_mtp_loss: 3.8634 +[titan] 2025-10-04 22:56:24,765 - root - INFO - lr: 4.9999e-05 gnorm: 2.04 [ 0:22:16<1 day, 3:07:53] +[titan] 2025-10-04 22:56:35,621 - root - INFO - step: 545 loss: 4.3565 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.5455 global_avg_mtp_loss: 3.8109 +[titan] 2025-10-04 22:56:35,621 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:22:27<1 day, 3:05:50] +[titan] 2025-10-04 22:56:44,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:56:46,499 - root - INFO - step: 550 loss: 4.2924 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.5365 global_avg_mtp_loss: 3.7559 +[titan] 2025-10-04 22:56:46,499 - root - INFO - lr: 4.9998e-05 gnorm: 1.96 [ 0:22:38<1 day, 3:03:51] +[titan] 2025-10-04 22:56:57,360 - root - INFO - step: 555 loss: 4.3086 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.5367 global_avg_mtp_loss: 3.7719 +[titan] 2025-10-04 22:56:57,361 - root - INFO - lr: 4.9998e-05 gnorm: 1.94 [ 0:22:49<1 day, 3:01:53] +[titan] 2025-10-04 22:57:08,185 - root - INFO - step: 560 loss: 4.2981 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.5349 global_avg_mtp_loss: 3.7631 +[titan] 2025-10-04 22:57:08,185 - root - INFO - lr: 4.9998e-05 gnorm: 1.84 [ 0:23:00<1 day, 2:59:54] +[titan] 2025-10-04 22:57:19,007 - root - INFO - step: 565 loss: 4.3383 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7969 +[titan] 2025-10-04 22:57:19,007 - root - INFO - lr: 4.9998e-05 gnorm: 1.66 [ 0:23:10<1 day, 2:57:57] +[titan] 2025-10-04 22:57:29,825 - root - INFO - step: 570 loss: 4.3634 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.5450 global_avg_mtp_loss: 3.8184 +[titan] 2025-10-04 22:57:29,825 - root - INFO - lr: 4.9998e-05 gnorm: 1.78 [ 0:23:21<1 day, 2:56:02] +[titan] 2025-10-04 22:57:40,662 - root - INFO - step: 575 loss: 4.2261 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.5285 global_avg_mtp_loss: 3.6977 +[titan] 2025-10-04 22:57:40,663 - root - INFO - lr: 4.9998e-05 gnorm: 1.67 [ 0:23:32<1 day, 2:54:10] +[titan] 2025-10-04 22:57:51,566 - root - INFO - step: 580 loss: 4.2298 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.5294 global_avg_mtp_loss: 3.7005 +[titan] 2025-10-04 22:57:51,566 - root - INFO - lr: 4.9998e-05 gnorm: 1.98 [ 0:23:43<1 day, 2:52:24] +[titan] 2025-10-04 22:58:02,405 - root - INFO - step: 585 loss: 4.3315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5413 global_avg_mtp_loss: 3.7902 +[titan] 2025-10-04 22:58:02,405 - root - INFO - lr: 4.9998e-05 gnorm: 1.72 [ 0:23:54<1 day, 2:50:35] +[titan] 2025-10-04 22:58:13,269 - root - INFO - step: 590 loss: 4.2600 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.5322 global_avg_mtp_loss: 3.7278 +[titan] 2025-10-04 22:58:13,270 - root - INFO - lr: 4.9997e-05 gnorm: 1.95 [ 0:24:05<1 day, 2:48:49] +[titan] 2025-10-04 22:58:24,105 - root - INFO - step: 595 loss: 4.1808 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.5216 global_avg_mtp_loss: 3.6592 +[titan] 2025-10-04 22:58:24,105 - root - INFO - lr: 4.9997e-05 gnorm: 1.65 [ 0:24:15<1 day, 2:47:04] +[titan] 2025-10-04 22:58:32,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 22:58:34,964 - root - INFO - step: 600 loss: 4.1976 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.5240 global_avg_mtp_loss: 3.6736 +[titan] 2025-10-04 22:58:34,964 - root - INFO - lr: 4.9997e-05 gnorm: 1.83 [ 0:24:26<1 day, 2:45:21] +[titan] 2025-10-04 22:58:45,870 - root - INFO - step: 605 loss: 4.3159 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.5391 global_avg_mtp_loss: 3.7769 +[titan] 2025-10-04 22:58:45,870 - root - INFO - lr: 4.9997e-05 gnorm: 1.87 [ 0:24:37<1 day, 2:43:43] +[titan] 2025-10-04 22:58:56,733 - root - INFO - step: 610 loss: 4.1166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.5131 global_avg_mtp_loss: 3.6035 +[titan] 2025-10-04 22:58:56,733 - root - INFO - lr: 4.9997e-05 gnorm: 1.62 [ 0:24:48<1 day, 2:42:04] +[titan] 2025-10-04 22:59:07,585 - root - INFO - step: 615 loss: 4.2340 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.5275 global_avg_mtp_loss: 3.7065 +[titan] 2025-10-04 22:59:07,585 - root - INFO - lr: 4.9997e-05 gnorm: 1.88 [ 0:24:59<1 day, 2:40:25] +[titan] 2025-10-04 22:59:18,424 - root - INFO - step: 620 loss: 4.2004 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.5249 global_avg_mtp_loss: 3.6756 +[titan] 2025-10-04 22:59:18,424 - root - INFO - lr: 4.9997e-05 gnorm: 1.91 [ 0:25:10<1 day, 2:38:47] +[titan] 2025-10-04 22:59:29,245 - root - INFO - step: 625 loss: 4.2113 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.5247 global_avg_mtp_loss: 3.6866 +[titan] 2025-10-04 22:59:29,245 - root - INFO - lr: 4.9996e-05 gnorm: 1.62 [ 0:25:21<1 day, 2:37:09] +[titan] 2025-10-04 22:59:40,085 - root - INFO - step: 630 loss: 4.1954 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.5210 global_avg_mtp_loss: 3.6745 +[titan] 2025-10-04 22:59:40,085 - root - INFO - lr: 4.9996e-05 gnorm: 1.68 [ 0:25:31<1 day, 2:35:34] +[titan] 2025-10-04 22:59:51,004 - root - INFO - step: 635 loss: 4.0965 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.5096 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 22:59:51,004 - root - INFO - lr: 4.9996e-05 gnorm: 1.82 [ 0:25:42<1 day, 2:34:05] +[titan] 2025-10-04 23:00:01,832 - root - INFO - step: 640 loss: 4.2067 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.5236 global_avg_mtp_loss: 3.6831 +[titan] 2025-10-04 23:00:01,832 - root - INFO - lr: 4.9996e-05 gnorm: 1.87 [ 0:25:53<1 day, 2:32:31] +[titan] 2025-10-04 23:00:12,683 - root - INFO - step: 645 loss: 4.0562 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.5030 global_avg_mtp_loss: 3.5532 +[titan] 2025-10-04 23:00:12,683 - root - INFO - lr: 4.9996e-05 gnorm: 1.73 [ 0:26:04<1 day, 2:31:01] +[titan] 2025-10-04 23:00:21,312 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:00:23,493 - root - INFO - step: 650 loss: 4.1298 memory: 118.84GiB(85.28%) tps: 30,314 tflops: 420.56 mfu: 42.52% global_avg_ntp_loss: 0.5128 global_avg_mtp_loss: 3.6170 +[titan] 2025-10-04 23:00:23,493 - root - INFO - lr: 4.9996e-05 gnorm: 1.75 [ 0:26:15<1 day, 2:29:29] +[titan] 2025-10-04 23:00:34,283 - root - INFO - step: 655 loss: 4.0941 memory: 118.84GiB(85.28%) tps: 30,369 tflops: 421.33 mfu: 42.60% global_avg_ntp_loss: 0.5089 global_avg_mtp_loss: 3.5852 +[titan] 2025-10-04 23:00:34,283 - root - INFO - lr: 4.9995e-05 gnorm: 1.70 [ 0:26:26<1 day, 2:27:57] +[titan] 2025-10-04 23:00:45,102 - root - INFO - step: 660 loss: 4.1313 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.5130 global_avg_mtp_loss: 3.6184 +[titan] 2025-10-04 23:00:45,102 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:36<1 day, 2:26:28] +[titan] 2025-10-04 23:00:55,946 - root - INFO - step: 665 loss: 4.1367 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5147 global_avg_mtp_loss: 3.6220 +[titan] 2025-10-04 23:00:55,946 - root - INFO - lr: 4.9995e-05 gnorm: 1.99 [ 0:26:47<1 day, 2:25:01] +[titan] 2025-10-04 23:01:06,742 - root - INFO - step: 670 loss: 4.0904 memory: 118.84GiB(85.28%) tps: 30,352 tflops: 421.09 mfu: 42.58% global_avg_ntp_loss: 0.5075 global_avg_mtp_loss: 3.5829 +[titan] 2025-10-04 23:01:06,743 - root - INFO - lr: 4.9995e-05 gnorm: 1.67 [ 0:26:58<1 day, 2:23:33] +[titan] 2025-10-04 23:01:17,585 - root - INFO - step: 675 loss: 4.0638 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.5042 global_avg_mtp_loss: 3.5596 +[titan] 2025-10-04 23:01:17,585 - root - INFO - lr: 4.9995e-05 gnorm: 2.15 [ 0:27:09<1 day, 2:22:09] +[titan] 2025-10-04 23:01:28,410 - root - INFO - step: 680 loss: 4.0064 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4976 global_avg_mtp_loss: 3.5088 +[titan] 2025-10-04 23:01:28,410 - root - INFO - lr: 4.9994e-05 gnorm: 1.81 [ 0:27:20<1 day, 2:20:45] +[titan] 2025-10-04 23:01:39,214 - root - INFO - step: 685 loss: 4.1427 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.5134 global_avg_mtp_loss: 3.6293 +[titan] 2025-10-04 23:01:39,214 - root - INFO - lr: 4.9994e-05 gnorm: 1.69 [ 0:27:31<1 day, 2:19:21] +[titan] 2025-10-04 23:01:50,056 - root - INFO - step: 690 loss: 4.0571 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.5019 global_avg_mtp_loss: 3.5553 +[titan] 2025-10-04 23:01:50,056 - root - INFO - lr: 4.9994e-05 gnorm: 1.63 [ 0:27:41<1 day, 2:18:00] +[titan] 2025-10-04 23:02:00,900 - root - INFO - step: 695 loss: 4.0380 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.5011 global_avg_mtp_loss: 3.5369 +[titan] 2025-10-04 23:02:00,900 - root - INFO - lr: 4.9994e-05 gnorm: 1.77 [ 0:27:52<1 day, 2:16:40] +[titan] 2025-10-04 23:02:09,567 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:02:11,753 - root - INFO - step: 700 loss: 4.0879 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.5070 global_avg_mtp_loss: 3.5810 +[titan] 2025-10-04 23:02:11,753 - root - INFO - lr: 4.9994e-05 gnorm: 1.96 [ 0:28:03<1 day, 2:15:22] +[titan] 2025-10-04 23:02:22,605 - root - INFO - step: 705 loss: 4.0241 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4988 global_avg_mtp_loss: 3.5252 +[titan] 2025-10-04 23:02:22,605 - root - INFO - lr: 4.9993e-05 gnorm: 1.83 [ 0:28:14<1 day, 2:14:04] +[titan] 2025-10-04 23:02:33,405 - root - INFO - step: 710 loss: 4.0903 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.5058 global_avg_mtp_loss: 3.5844 +[titan] 2025-10-04 23:02:33,405 - root - INFO - lr: 4.9993e-05 gnorm: 1.64 [ 0:28:25<1 day, 2:12:45] +[titan] 2025-10-04 23:02:44,244 - root - INFO - step: 715 loss: 4.0535 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.5015 global_avg_mtp_loss: 3.5520 +[titan] 2025-10-04 23:02:44,244 - root - INFO - lr: 4.9993e-05 gnorm: 1.50 [ 0:28:36<1 day, 2:11:29] +[titan] 2025-10-04 23:02:55,077 - root - INFO - step: 720 loss: 4.0093 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.4957 global_avg_mtp_loss: 3.5137 +[titan] 2025-10-04 23:02:55,077 - root - INFO - lr: 4.9993e-05 gnorm: 1.58 [ 0:28:46<1 day, 2:10:13] +[titan] 2025-10-04 23:03:05,902 - root - INFO - step: 725 loss: 3.9529 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.4894 global_avg_mtp_loss: 3.4635 +[titan] 2025-10-04 23:03:05,902 - root - INFO - lr: 4.9992e-05 gnorm: 1.53 [ 0:28:57<1 day, 2:08:58] +[titan] 2025-10-04 23:03:16,765 - root - INFO - step: 730 loss: 3.9701 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.4916 global_avg_mtp_loss: 3.4785 +[titan] 2025-10-04 23:03:16,765 - root - INFO - lr: 4.9992e-05 gnorm: 1.57 [ 0:29:08<1 day, 2:07:45] +[titan] 2025-10-04 23:03:27,585 - root - INFO - step: 735 loss: 4.0191 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.4982 global_avg_mtp_loss: 3.5209 +[titan] 2025-10-04 23:03:27,585 - root - INFO - lr: 4.9992e-05 gnorm: 1.59 [ 0:29:19<1 day, 2:06:31] +[titan] 2025-10-04 23:03:38,404 - root - INFO - step: 740 loss: 3.9770 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.4912 global_avg_mtp_loss: 3.4857 +[titan] 2025-10-04 23:03:38,404 - root - INFO - lr: 4.9992e-05 gnorm: 1.61 [ 0:29:30<1 day, 2:05:18] +[titan] 2025-10-04 23:03:49,265 - root - INFO - step: 745 loss: 4.0755 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.5054 global_avg_mtp_loss: 3.5701 +[titan] 2025-10-04 23:03:49,265 - root - INFO - lr: 4.9992e-05 gnorm: 1.52 [ 0:29:41<1 day, 2:04:09] +[titan] 2025-10-04 23:03:57,894 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:04:00,081 - root - INFO - step: 750 loss: 3.9375 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.4868 global_avg_mtp_loss: 3.4508 +[titan] 2025-10-04 23:04:00,081 - root - INFO - [34mlr: 4.9991e-05 gnorm: 1.67 [ 0:29:51<1 day, 2:02:57] +[titan] 2025-10-04 23:04:10,923 - root - INFO - step: 755 loss: 4.0060 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.4974 global_avg_mtp_loss: 3.5087 +[titan] 2025-10-04 23:04:10,923 - root - INFO - lr: 4.9991e-05 gnorm: 1.62 [ 0:30:02<1 day, 2:01:48] +[titan] 2025-10-04 23:04:21,765 - root - INFO - step: 760 loss: 3.9826 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.4928 global_avg_mtp_loss: 3.4897 +[titan] 2025-10-04 23:04:21,765 - root - INFO - lr: 4.9991e-05 gnorm: 1.57 [ 0:30:13<1 day, 2:00:39] +[titan] 2025-10-04 23:04:32,624 - root - INFO - step: 765 loss: 3.9503 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4871 global_avg_mtp_loss: 3.4633 +[titan] 2025-10-04 23:04:32,625 - root - INFO - lr: 4.9991e-05 gnorm: 1.73 [ 0:30:24<1 day, 1:59:32] +[titan] 2025-10-04 23:04:43,499 - root - INFO - step: 770 loss: 4.0928 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.5059 global_avg_mtp_loss: 3.5869 +[titan] 2025-10-04 23:04:43,499 - root - INFO - lr: 4.9990e-05 gnorm: 1.68 [ 0:30:35<1 day, 1:58:27] +[titan] 2025-10-04 23:04:54,364 - root - INFO - step: 775 loss: 4.0138 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.4966 global_avg_mtp_loss: 3.5172 +[titan] 2025-10-04 23:04:54,364 - root - INFO - lr: 4.9990e-05 gnorm: 1.84 [ 0:30:46<1 day, 1:57:21] +[titan] 2025-10-04 23:05:05,165 - root - INFO - step: 780 loss: 3.9609 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.4878 global_avg_mtp_loss: 3.4731 +[titan] 2025-10-04 23:05:05,165 - root - INFO - lr: 4.9990e-05 gnorm: 1.66 [ 0:30:57<1 day, 1:56:14] +[titan] 2025-10-04 23:05:16,001 - root - INFO - step: 785 loss: 4.0392 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.5003 global_avg_mtp_loss: 3.5389 +[titan] 2025-10-04 23:05:16,002 - root - INFO - lr: 4.9989e-05 gnorm: 1.74 [ 0:31:07<1 day, 1:55:08] +[titan] 2025-10-04 23:05:26,809 - root - INFO - step: 790 loss: 3.9123 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.4820 global_avg_mtp_loss: 3.4303 +[titan] 2025-10-04 23:05:26,809 - root - INFO - lr: 4.9989e-05 gnorm: 1.71 [ 0:31:18<1 day, 1:54:02] +[titan] 2025-10-04 23:05:37,659 - root - INFO - step: 795 loss: 3.9513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.4870 global_avg_mtp_loss: 3.4643 +[titan] 2025-10-04 23:05:37,659 - root - INFO - lr: 4.9989e-05 gnorm: 1.57 [ 0:31:29<1 day, 1:52:59] +[titan] 2025-10-04 23:05:46,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:05:48,524 - root - INFO - step: 800 loss: 3.8805 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4013 +[titan] 2025-10-04 23:05:48,524 - root - INFO - lr: 4.9989e-05 gnorm: 1.63 [ 0:31:40<1 day, 1:51:57] +[titan] 2025-10-04 23:05:59,423 - root - INFO - step: 805 loss: 4.0567 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.5041 global_avg_mtp_loss: 3.5527 +[titan] 2025-10-04 23:05:59,424 - root - INFO - lr: 4.9988e-05 gnorm: 1.65 [ 0:31:51<1 day, 1:50:58] +[titan] 2025-10-04 23:06:10,267 - root - INFO - step: 810 loss: 3.9384 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4865 global_avg_mtp_loss: 3.4519 +[titan] 2025-10-04 23:06:10,267 - root - INFO - lr: 4.9988e-05 gnorm: 1.62 [ 0:32:02<1 day, 1:49:56] +[titan] 2025-10-04 23:06:21,120 - root - INFO - step: 815 loss: 3.9402 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.4841 global_avg_mtp_loss: 3.4561 +[titan] 2025-10-04 23:06:21,120 - root - INFO - lr: 4.9988e-05 gnorm: 1.83 [ 0:32:12<1 day, 1:48:56] +[titan] 2025-10-04 23:06:31,962 - root - INFO - step: 820 loss: 3.8907 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.4804 global_avg_mtp_loss: 3.4102 +[titan] 2025-10-04 23:06:31,962 - root - INFO - lr: 4.9987e-05 gnorm: 1.56 [ 0:32:23<1 day, 1:47:55] +[titan] 2025-10-04 23:06:42,804 - root - INFO - step: 825 loss: 3.9391 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.4866 global_avg_mtp_loss: 3.4525 +[titan] 2025-10-04 23:06:42,804 - root - INFO - lr: 4.9987e-05 gnorm: 1.73 [ 0:32:34<1 day, 1:46:55] +[titan] 2025-10-04 23:06:53,697 - root - INFO - step: 830 loss: 3.8534 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.4757 global_avg_mtp_loss: 3.3777 +[titan] 2025-10-04 23:06:53,697 - root - INFO - lr: 4.9987e-05 gnorm: 1.46 [ 0:32:45<1 day, 1:45:59] +[titan] 2025-10-04 23:07:04,599 - root - INFO - step: 835 loss: 3.9680 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.4909 global_avg_mtp_loss: 3.4770 +[titan] 2025-10-04 23:07:04,599 - root - INFO - lr: 4.9987e-05 gnorm: 1.69 [ 0:32:56<1 day, 1:45:03] +[titan] 2025-10-04 23:07:15,482 - root - INFO - step: 840 loss: 3.8804 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4793 global_avg_mtp_loss: 3.4011 +[titan] 2025-10-04 23:07:15,483 - root - INFO - lr: 4.9986e-05 gnorm: 1.65 [ 0:33:07<1 day, 1:44:06] +[titan] 2025-10-04 23:07:26,345 - root - INFO - step: 845 loss: 3.9335 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.4859 global_avg_mtp_loss: 3.4476 +[titan] 2025-10-04 23:07:26,345 - root - INFO - lr: 4.9986e-05 gnorm: 1.67 [ 0:33:18<1 day, 1:43:10] +[titan] 2025-10-04 23:07:35,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:07:37,166 - root - INFO - step: 850 loss: 3.9466 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.4899 global_avg_mtp_loss: 3.4568 +[titan] 2025-10-04 23:07:37,166 - root - INFO - lr: 4.9986e-05 gnorm: 1.53 [ 0:33:29<1 day, 1:42:12] +[titan] 2025-10-04 23:07:48,038 - root - INFO - step: 855 loss: 3.8553 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3805 +[titan] 2025-10-04 23:07:48,039 - root - INFO - lr: 4.9985e-05 gnorm: 1.54 [ 0:33:39<1 day, 1:41:17] +[titan] 2025-10-04 23:07:58,950 - root - INFO - step: 860 loss: 3.9192 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.4837 global_avg_mtp_loss: 3.4355 +[titan] 2025-10-04 23:07:58,951 - root - INFO - lr: 4.9985e-05 gnorm: 1.63 [ 0:33:50<1 day, 1:40:24] +[titan] 2025-10-04 23:08:09,863 - root - INFO - step: 865 loss: 3.8398 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.4747 global_avg_mtp_loss: 3.3651 +[titan] 2025-10-04 23:08:09,863 - root - INFO - lr: 4.9985e-05 gnorm: 1.57 [ 0:34:01<1 day, 1:39:32] +[titan] 2025-10-04 23:08:20,763 - root - INFO - step: 870 loss: 3.9660 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.4876 global_avg_mtp_loss: 3.4784 +[titan] 2025-10-04 23:08:20,763 - root - INFO - lr: 4.9984e-05 gnorm: 1.70 [ 0:34:12<1 day, 1:38:39] +[titan] 2025-10-04 23:08:31,644 - root - INFO - step: 875 loss: 3.8236 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4708 global_avg_mtp_loss: 3.3528 +[titan] 2025-10-04 23:08:31,644 - root - INFO - lr: 4.9984e-05 gnorm: 1.58 [ 0:34:23<1 day, 1:37:46] +[titan] 2025-10-04 23:08:42,521 - root - INFO - step: 880 loss: 3.8393 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4731 global_avg_mtp_loss: 3.3662 +[titan] 2025-10-04 23:08:42,522 - root - INFO - lr: 4.9984e-05 gnorm: 1.66 [ 0:34:34<1 day, 1:36:54] +[titan] 2025-10-04 23:08:53,411 - root - INFO - step: 885 loss: 3.9181 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4832 global_avg_mtp_loss: 3.4349 +[titan] 2025-10-04 23:08:53,412 - root - INFO - lr: 4.9983e-05 gnorm: 1.81 [ 0:34:45<1 day, 1:36:03] +[titan] 2025-10-04 23:09:04,287 - root - INFO - step: 890 loss: 3.8540 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4756 global_avg_mtp_loss: 3.3784 +[titan] 2025-10-04 23:09:04,287 - root - INFO - lr: 4.9983e-05 gnorm: 1.63 [ 0:34:56<1 day, 1:35:11] +[titan] 2025-10-04 23:09:15,149 - root - INFO - step: 895 loss: 3.7956 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.4646 global_avg_mtp_loss: 3.3310 +[titan] 2025-10-04 23:09:15,149 - root - INFO - lr: 4.9983e-05 gnorm: 1.59 [ 0:35:06<1 day, 1:34:19] +[titan] 2025-10-04 23:09:23,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:09:26,036 - root - INFO - step: 900 loss: 3.8814 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4792 global_avg_mtp_loss: 3.4023 +[titan] 2025-10-04 23:09:26,036 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:17<1 day, 1:33:29] +[titan] 2025-10-04 23:09:36,928 - root - INFO - step: 905 loss: 3.8547 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.4749 global_avg_mtp_loss: 3.3797 +[titan] 2025-10-04 23:09:36,928 - root - INFO - lr: 4.9982e-05 gnorm: 1.59 [ 0:35:28<1 day, 1:32:40] +[titan] 2025-10-04 23:09:47,795 - root - INFO - step: 910 loss: 3.7503 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4614 global_avg_mtp_loss: 3.2890 +[titan] 2025-10-04 23:09:47,795 - root - INFO - lr: 4.9982e-05 gnorm: 1.63 [ 0:35:39<1 day, 1:31:49] +[titan] 2025-10-04 23:09:58,664 - root - INFO - step: 915 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3219 +[titan] 2025-10-04 23:09:58,664 - root - INFO - lr: 4.9981e-05 gnorm: 1.57 [ 0:35:50<1 day, 1:31:00] +[titan] 2025-10-04 23:10:09,537 - root - INFO - step: 920 loss: 3.8477 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.4753 global_avg_mtp_loss: 3.3723 +[titan] 2025-10-04 23:10:09,537 - root - INFO - lr: 4.9981e-05 gnorm: 1.56 [ 0:36:01<1 day, 1:30:11] +[titan] 2025-10-04 23:10:20,420 - root - INFO - step: 925 loss: 3.8141 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3439 +[titan] 2025-10-04 23:10:20,420 - root - INFO - lr: 4.9980e-05 gnorm: 1.53 [ 0:36:12<1 day, 1:29:22] +[titan] 2025-10-04 23:10:31,298 - root - INFO - step: 930 loss: 3.8185 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4702 global_avg_mtp_loss: 3.3482 +[titan] 2025-10-04 23:10:31,298 - root - INFO - lr: 4.9980e-05 gnorm: 1.56 [ 0:36:23<1 day, 1:28:34] +[titan] 2025-10-04 23:10:42,186 - root - INFO - step: 935 loss: 3.7234 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.4574 global_avg_mtp_loss: 3.2661 +[titan] 2025-10-04 23:10:42,186 - root - INFO - lr: 4.9980e-05 gnorm: 1.52 [ 0:36:34<1 day, 1:27:47] +[titan] 2025-10-04 23:10:53,053 - root - INFO - step: 940 loss: 3.7877 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4666 global_avg_mtp_loss: 3.3211 +[titan] 2025-10-04 23:10:53,053 - root - INFO - lr: 4.9979e-05 gnorm: 1.69 [ 0:36:44<1 day, 1:26:59] +[titan] 2025-10-04 23:11:03,935 - root - INFO - step: 945 loss: 3.7815 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.4635 global_avg_mtp_loss: 3.3180 +[titan] 2025-10-04 23:11:03,935 - root - INFO - lr: 4.9979e-05 gnorm: 1.45 [ 0:36:55<1 day, 1:26:13] +[titan] 2025-10-04 23:11:12,603 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:11:14,787 - root - INFO - step: 950 loss: 3.8345 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.4716 global_avg_mtp_loss: 3.3629 +[titan] 2025-10-04 23:11:14,787 - root - INFO - lr: 4.9979e-05 gnorm: 1.54 [ 0:37:06<1 day, 1:25:25] +[titan] 2025-10-04 23:11:25,662 - root - INFO - step: 955 loss: 3.7153 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4570 global_avg_mtp_loss: 3.2583 +[titan] 2025-10-04 23:11:25,662 - root - INFO - lr: 4.9978e-05 gnorm: 1.40 [ 0:37:17<1 day, 1:24:39] +[titan] 2025-10-04 23:11:36,506 - root - INFO - step: 960 loss: 3.7474 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4595 global_avg_mtp_loss: 3.2878 +[titan] 2025-10-04 23:11:36,506 - root - INFO - lr: 4.9978e-05 gnorm: 1.39 [ 0:37:28<1 day, 1:23:52] +[titan] 2025-10-04 23:11:47,428 - root - INFO - step: 965 loss: 3.7469 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4597 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:11:47,429 - root - INFO - lr: 4.9977e-05 gnorm: 1.60 [ 0:37:39<1 day, 1:23:08] +[titan] 2025-10-04 23:11:58,339 - root - INFO - step: 970 loss: 3.7767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.4638 global_avg_mtp_loss: 3.3129 +[titan] 2025-10-04 23:11:58,339 - root - INFO - lr: 4.9977e-05 gnorm: 1.59 [ 0:37:50<1 day, 1:22:24] +[titan] 2025-10-04 23:12:09,214 - root - INFO - step: 975 loss: 3.7198 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4568 global_avg_mtp_loss: 3.2630 +[titan] 2025-10-04 23:12:09,214 - root - INFO - lr: 4.9977e-05 gnorm: 1.44 [ 0:38:01<1 day, 1:21:40] +[titan] 2025-10-04 23:12:20,081 - root - INFO - step: 980 loss: 3.7702 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4623 global_avg_mtp_loss: 3.3079 +[titan] 2025-10-04 23:12:20,081 - root - INFO - lr: 4.9976e-05 gnorm: 1.42 [ 0:38:11<1 day, 1:20:55] +[titan] 2025-10-04 23:12:30,946 - root - INFO - step: 985 loss: 3.8212 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3515 +[titan] 2025-10-04 23:12:30,947 - root - INFO - lr: 4.9976e-05 gnorm: 1.39 [ 0:38:22<1 day, 1:20:10] +[titan] 2025-10-04 23:12:41,799 - root - INFO - step: 990 loss: 3.7716 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.4659 global_avg_mtp_loss: 3.3057 +[titan] 2025-10-04 23:12:41,799 - root - INFO - lr: 4.9975e-05 gnorm: 1.50 [ 0:38:33<1 day, 1:19:26] +[titan] 2025-10-04 23:12:52,700 - root - INFO - step: 995 loss: 3.8144 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.4697 global_avg_mtp_loss: 3.3447 +[titan] 2025-10-04 23:12:52,701 - root - INFO - lr: 4.9975e-05 gnorm: 1.47 [ 0:38:44<1 day, 1:18:43] +[titan] 2025-10-04 23:13:01,387 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:13:03,568 - root - INFO - step: 1000 loss: 3.6411 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4473 global_avg_mtp_loss: 3.1938 +[titan] 2025-10-04 23:13:03,569 - root - INFO - lr: 4.9974e-05 gnorm: 1.70 [ 0:38:55<1 day, 1:18:00] +[titan] 2025-10-04 23:13:14,441 - root - INFO - step: 1005 loss: 3.7872 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4636 global_avg_mtp_loss: 3.3236 +[titan] 2025-10-04 23:13:14,442 - root - INFO - lr: 4.9974e-05 gnorm: 1.62 [ 0:39:06<1 day, 1:17:17] +[titan] 2025-10-04 23:13:25,308 - root - INFO - step: 1010 loss: 3.8240 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4681 global_avg_mtp_loss: 3.3560 +[titan] 2025-10-04 23:13:25,308 - root - INFO - lr: 4.9974e-05 gnorm: 1.51 [ 0:39:17<1 day, 1:16:34] +[titan] 2025-10-04 23:13:36,156 - root - INFO - step: 1015 loss: 3.7026 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.4566 global_avg_mtp_loss: 3.2461 +[titan] 2025-10-04 23:13:36,157 - root - INFO - lr: 4.9973e-05 gnorm: 1.61 [ 0:39:27<1 day, 1:15:51] +[titan] 2025-10-04 23:13:47,024 - root - INFO - step: 1020 loss: 3.8204 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.4698 global_avg_mtp_loss: 3.3506 +[titan] 2025-10-04 23:13:47,024 - root - INFO - lr: 4.9973e-05 gnorm: 1.58 [ 0:39:38<1 day, 1:15:09] +[titan] 2025-10-04 23:13:55,944 - root - INFO - Dumping profiler traces at step 1024 +[titan] 2025-10-04 23:13:55,980 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:13:58,196 - root - INFO - step: 1025 loss: 3.7098 memory: 118.84GiB(85.28%) tps: 29,332 tflops: 406.94 mfu: 41.15% global_avg_ntp_loss: 0.4550 global_avg_mtp_loss: 3.2548 +[titan] 2025-10-04 23:13:58,196 - root - INFO - lr: 4.9972e-05 gnorm: 1.53 [ 0:39:50<1 day, 1:14:38] +[titan] 2025-10-04 23:14:09,055 - root - INFO - step: 1030 loss: 3.6684 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.4505 global_avg_mtp_loss: 3.2179 +[titan] 2025-10-04 23:14:09,056 - root - INFO - lr: 4.9972e-05 gnorm: 1.49 [ 0:40:00<1 day, 1:13:56] +[titan] 2025-10-04 23:14:19,917 - root - INFO - step: 1035 loss: 3.7778 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4632 global_avg_mtp_loss: 3.3146 +[titan] 2025-10-04 23:14:19,917 - root - INFO - lr: 4.9971e-05 gnorm: 1.64 [ 0:40:11<1 day, 1:13:15] +[titan] 2025-10-04 23:14:30,784 - root - INFO - step: 1040 loss: 3.7600 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.4596 global_avg_mtp_loss: 3.3004 +[titan] 2025-10-04 23:14:30,784 - root - INFO - lr: 4.9971e-05 gnorm: 1.73 [ 0:40:22<1 day, 1:12:34] +[titan] 2025-10-04 23:14:41,642 - root - INFO - step: 1045 loss: 3.7970 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.4658 global_avg_mtp_loss: 3.3312 +[titan] 2025-10-04 23:14:41,642 - root - INFO - lr: 4.9970e-05 gnorm: 1.60 [ 0:40:33<1 day, 1:11:53] +[titan] 2025-10-04 23:14:50,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:14:52,527 - root - INFO - step: 1050 loss: 3.7607 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.4629 global_avg_mtp_loss: 3.2979 +[titan] 2025-10-04 23:14:52,527 - root - INFO - lr: 4.9970e-05 gnorm: 1.86 [ 0:40:44<1 day, 1:11:13] +[titan] 2025-10-04 23:15:03,398 - root - INFO - step: 1055 loss: 3.6921 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4533 global_avg_mtp_loss: 3.2388 +[titan] 2025-10-04 23:15:03,398 - root - INFO - lr: 4.9970e-05 gnorm: 1.59 [ 0:40:55<1 day, 1:10:33] +[titan] 2025-10-04 23:15:14,306 - root - INFO - step: 1060 loss: 3.7138 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.4561 global_avg_mtp_loss: 3.2577 +[titan] 2025-10-04 23:15:14,306 - root - INFO - lr: 4.9969e-05 gnorm: 1.89 [ 0:41:06<1 day, 1:09:55] +[titan] 2025-10-04 23:15:25,186 - root - INFO - step: 1065 loss: 3.7455 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4584 global_avg_mtp_loss: 3.2871 +[titan] 2025-10-04 23:15:25,186 - root - INFO - lr: 4.9969e-05 gnorm: 1.72 [ 0:41:17<1 day, 1:09:15] +[titan] 2025-10-04 23:15:36,061 - root - INFO - step: 1070 loss: 3.6510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4474 global_avg_mtp_loss: 3.2036 +[titan] 2025-10-04 23:15:36,061 - root - INFO - lr: 4.9968e-05 gnorm: 1.70 [ 0:41:27<1 day, 1:08:36] +[titan] 2025-10-04 23:15:46,950 - root - INFO - step: 1075 loss: 3.7757 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4630 global_avg_mtp_loss: 3.3127 +[titan] 2025-10-04 23:15:46,950 - root - INFO - lr: 4.9968e-05 gnorm: 1.53 [ 0:41:38<1 day, 1:07:58] +[titan] 2025-10-04 23:15:57,821 - root - INFO - step: 1080 loss: 3.6997 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4542 global_avg_mtp_loss: 3.2455 +[titan] 2025-10-04 23:15:57,821 - root - INFO - lr: 4.9967e-05 gnorm: 1.40 [ 0:41:49<1 day, 1:07:19] +[titan] 2025-10-04 23:16:08,691 - root - INFO - step: 1085 loss: 3.7768 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.4652 global_avg_mtp_loss: 3.3116 +[titan] 2025-10-04 23:16:08,691 - root - INFO - lr: 4.9967e-05 gnorm: 1.71 [ 0:42:00<1 day, 1:06:41] +[titan] 2025-10-04 23:16:19,625 - root - INFO - step: 1090 loss: 3.7891 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.4653 global_avg_mtp_loss: 3.3238 +[titan] 2025-10-04 23:16:19,625 - root - INFO - lr: 4.9966e-05 gnorm: 1.32 [ 0:42:11<1 day, 1:06:05] +[titan] 2025-10-04 23:16:30,524 - root - INFO - step: 1095 loss: 3.6348 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.4440 global_avg_mtp_loss: 3.1907 +[titan] 2025-10-04 23:16:30,525 - root - INFO - lr: 4.9966e-05 gnorm: 1.55 [ 0:42:22<1 day, 1:05:28] +[titan] 2025-10-04 23:16:39,227 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:16:41,421 - root - INFO - step: 1100 loss: 3.7357 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.4573 global_avg_mtp_loss: 3.2785 +[titan] 2025-10-04 23:16:41,421 - root - INFO - lr: 4.9965e-05 gnorm: 1.50 [ 0:42:33<1 day, 1:04:51] +[titan] 2025-10-04 23:16:52,335 - root - INFO - step: 1105 loss: 3.6253 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1800 +[titan] 2025-10-04 23:16:52,335 - root - INFO - lr: 4.9965e-05 gnorm: 1.52 [ 0:42:44<1 day, 1:04:15] +[titan] 2025-10-04 23:17:03,265 - root - INFO - step: 1110 loss: 3.6786 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.4500 global_avg_mtp_loss: 3.2285 +[titan] 2025-10-04 23:17:03,266 - root - INFO - lr: 4.9964e-05 gnorm: 1.41 [ 0:42:55<1 day, 1:03:40] +[titan] 2025-10-04 23:17:14,175 - root - INFO - step: 1115 loss: 3.6578 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.4465 global_avg_mtp_loss: 3.2112 +[titan] 2025-10-04 23:17:14,175 - root - INFO - lr: 4.9964e-05 gnorm: 1.35 [ 0:43:05<1 day, 1:03:04] +[titan] 2025-10-04 23:17:25,067 - root - INFO - step: 1120 loss: 3.6849 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.4511 global_avg_mtp_loss: 3.2339 +[titan] 2025-10-04 23:17:25,067 - root - INFO - lr: 4.9963e-05 gnorm: 1.51 [ 0:43:16<1 day, 1:02:28] +[titan] 2025-10-04 23:17:35,980 - root - INFO - step: 1125 loss: 3.6812 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.4516 global_avg_mtp_loss: 3.2296 +[titan] 2025-10-04 23:17:35,980 - root - INFO - lr: 4.9963e-05 gnorm: 1.53 [ 0:43:27<1 day, 1:01:53] +[titan] 2025-10-04 23:17:46,863 - root - INFO - step: 1130 loss: 3.6167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4428 global_avg_mtp_loss: 3.1739 +[titan] 2025-10-04 23:17:46,863 - root - INFO - lr: 4.9962e-05 gnorm: 1.69 [ 0:43:38<1 day, 1:01:17] +[titan] 2025-10-04 23:17:57,754 - root - INFO - step: 1135 loss: 3.5668 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.4385 global_avg_mtp_loss: 3.1284 +[titan] 2025-10-04 23:17:57,754 - root - INFO - lr: 4.9962e-05 gnorm: 1.44 [ 0:43:49<1 day, 1:00:42] +[titan] 2025-10-04 23:18:08,676 - root - INFO - step: 1140 loss: 3.6958 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.4522 global_avg_mtp_loss: 3.2436 +[titan] 2025-10-04 23:18:08,676 - root - INFO - lr: 4.9961e-05 gnorm: 1.51 [ 0:44:00<1 day, 1:00:08] +[titan] 2025-10-04 23:18:19,548 - root - INFO - step: 1145 loss: 3.7386 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.4725 global_avg_mtp_loss: 3.2662 +[titan] 2025-10-04 23:18:19,548 - root - INFO - lr: 4.9961e-05 gnorm: 1.52 [ 0:44:11<1 day, 0:59:32] +[titan] 2025-10-04 23:18:28,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:18:30,436 - root - INFO - step: 1150 loss: 3.6554 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.4491 global_avg_mtp_loss: 3.2063 +[titan] 2025-10-04 23:18:30,436 - root - INFO - lr: 4.9960e-05 gnorm: 1.51 [ 0:44:22<1 day, 0:58:57] +[titan] 2025-10-04 23:18:41,365 - root - INFO - step: 1155 loss: 3.6986 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.4535 global_avg_mtp_loss: 3.2451 +[titan] 2025-10-04 23:18:41,365 - root - INFO - lr: 4.9960e-05 gnorm: 1.49 [ 0:44:33<1 day, 0:58:24] +[titan] 2025-10-04 23:18:52,242 - root - INFO - step: 1160 loss: 3.6068 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4415 global_avg_mtp_loss: 3.1653 +[titan] 2025-10-04 23:18:52,243 - root - INFO - lr: 4.9959e-05 gnorm: 1.49 [ 0:44:44<1 day, 0:57:49] +[titan] 2025-10-04 23:19:03,171 - root - INFO - step: 1165 loss: 3.5931 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.4398 global_avg_mtp_loss: 3.1533 +[titan] 2025-10-04 23:19:03,171 - root - INFO - lr: 4.9958e-05 gnorm: 1.54 [ 0:44:54<1 day, 0:57:16] +[titan] 2025-10-04 23:19:14,054 - root - INFO - step: 1170 loss: 3.6446 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4453 global_avg_mtp_loss: 3.1993 +[titan] 2025-10-04 23:19:14,054 - root - INFO - lr: 4.9958e-05 gnorm: 1.49 [ 0:45:05<1 day, 0:56:42] +[titan] 2025-10-04 23:19:24,934 - root - INFO - step: 1175 loss: 3.6211 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4454 global_avg_mtp_loss: 3.1757 +[titan] 2025-10-04 23:19:24,934 - root - INFO - lr: 4.9957e-05 gnorm: 1.48 [ 0:45:16<1 day, 0:56:07] +[titan] 2025-10-04 23:19:35,805 - root - INFO - step: 1180 loss: 3.6634 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.4499 global_avg_mtp_loss: 3.2135 +[titan] 2025-10-04 23:19:35,805 - root - INFO - lr: 4.9957e-05 gnorm: 1.55 [ 0:45:27<1 day, 0:55:33] +[titan] 2025-10-04 23:19:46,722 - root - INFO - step: 1185 loss: 3.6182 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1740 +[titan] 2025-10-04 23:19:46,722 - root - INFO - lr: 4.9956e-05 gnorm: 1.56 [ 0:45:38<1 day, 0:55:01] +[titan] 2025-10-04 23:19:57,578 - root - INFO - step: 1190 loss: 3.6307 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.4437 global_avg_mtp_loss: 3.1870 +[titan] 2025-10-04 23:19:57,578 - root - INFO - lr: 4.9956e-05 gnorm: 1.44 [ 0:45:49<1 day, 0:54:26] +[titan] 2025-10-04 23:20:08,587 - root - INFO - step: 1195 loss: 3.6947 memory: 118.84GiB(85.28%) tps: 29,765 tflops: 412.95 mfu: 41.75% global_avg_ntp_loss: 0.4519 global_avg_mtp_loss: 3.2429 +[titan] 2025-10-04 23:20:08,587 - root - INFO - lr: 4.9955e-05 gnorm: 1.42 [ 0:46:00<1 day, 0:53:57] +[titan] 2025-10-04 23:20:17,298 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:20:19,487 - root - INFO - step: 1200 loss: 3.6239 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.4443 global_avg_mtp_loss: 3.1796 +[titan] 2025-10-04 23:20:19,487 - root - INFO - lr: 4.9955e-05 gnorm: 1.44 [ 0:46:11<1 day, 0:53:25] +[titan] 2025-10-04 23:20:30,366 - root - INFO - step: 1205 loss: 3.6270 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4452 global_avg_mtp_loss: 3.1819 +[titan] 2025-10-04 23:20:30,366 - root - INFO - lr: 4.9954e-05 gnorm: 1.60 [ 0:46:22<1 day, 0:52:51] +[titan] 2025-10-04 23:20:41,259 - root - INFO - step: 1210 loss: 3.6144 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1709 +[titan] 2025-10-04 23:20:41,259 - root - INFO - lr: 4.9953e-05 gnorm: 1.66 [ 0:46:33<1 day, 0:52:19] +[titan] 2025-10-04 23:20:52,152 - root - INFO - step: 1215 loss: 3.6886 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4528 global_avg_mtp_loss: 3.2359 +[titan] 2025-10-04 23:20:52,152 - root - INFO - lr: 4.9953e-05 gnorm: 1.48 [ 0:46:43<1 day, 0:51:47] +[titan] 2025-10-04 23:21:03,098 - root - INFO - step: 1220 loss: 3.5263 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 42.00% global_avg_ntp_loss: 0.4324 global_avg_mtp_loss: 3.0939 +[titan] 2025-10-04 23:21:03,098 - root - INFO - lr: 4.9952e-05 gnorm: 1.62 [ 0:46:54<1 day, 0:51:16] +[titan] 2025-10-04 23:21:14,014 - root - INFO - step: 1225 loss: 3.6228 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.4426 global_avg_mtp_loss: 3.1801 +[titan] 2025-10-04 23:21:14,014 - root - INFO - lr: 4.9952e-05 gnorm: 1.53 [ 0:47:05<1 day, 0:50:45] +[titan] 2025-10-04 23:21:24,903 - root - INFO - step: 1230 loss: 3.5398 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.4327 global_avg_mtp_loss: 3.1072 +[titan] 2025-10-04 23:21:24,904 - root - INFO - lr: 4.9951e-05 gnorm: 1.39 [ 0:47:16<1 day, 0:50:13] +[titan] 2025-10-04 23:21:35,790 - root - INFO - step: 1235 loss: 3.5790 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.4389 global_avg_mtp_loss: 3.1401 +[titan] 2025-10-04 23:21:35,790 - root - INFO - lr: 4.9951e-05 gnorm: 1.42 [ 0:47:27<1 day, 0:49:41] +[titan] 2025-10-04 23:21:46,666 - root - INFO - step: 1240 loss: 3.6434 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4448 global_avg_mtp_loss: 3.1987 +[titan] 2025-10-04 23:21:46,666 - root - INFO - lr: 4.9950e-05 gnorm: 1.43 [ 0:47:38<1 day, 0:49:10] +[titan] 2025-10-04 23:21:57,577 - root - INFO - step: 1245 loss: 3.5452 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4350 global_avg_mtp_loss: 3.1102 +[titan] 2025-10-04 23:21:57,577 - root - INFO - lr: 4.9949e-05 gnorm: 1.40 [ 0:47:49<1 day, 0:48:39] +[titan] 2025-10-04 23:22:06,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:22:08,563 - root - INFO - step: 1250 loss: 3.5844 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.4369 global_avg_mtp_loss: 3.1475 +[titan] 2025-10-04 23:22:08,564 - root - INFO - lr: 4.9949e-05 gnorm: 1.48 [ 0:48:00<1 day, 0:48:11] +[titan] 2025-10-04 23:22:19,438 - root - INFO - step: 1255 loss: 3.6078 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.4435 global_avg_mtp_loss: 3.1643 +[titan] 2025-10-04 23:22:19,438 - root - INFO - lr: 4.9948e-05 gnorm: 1.59 [ 0:48:11<1 day, 0:47:39] +[titan] 2025-10-04 23:22:30,309 - root - INFO - step: 1260 loss: 3.5536 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4346 global_avg_mtp_loss: 3.1191 +[titan] 2025-10-04 23:22:30,309 - root - INFO - lr: 4.9948e-05 gnorm: 1.57 [ 0:48:22<1 day, 0:47:08] +[titan] 2025-10-04 23:22:41,203 - root - INFO - step: 1265 loss: 3.5861 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.4376 global_avg_mtp_loss: 3.1485 +[titan] 2025-10-04 23:22:41,203 - root - INFO - lr: 4.9947e-05 gnorm: 1.47 [ 0:48:32<1 day, 0:46:37] +[titan] 2025-10-04 23:22:52,080 - root - INFO - step: 1270 loss: 3.6181 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4419 global_avg_mtp_loss: 3.1762 +[titan] 2025-10-04 23:22:52,081 - root - INFO - lr: 4.9946e-05 gnorm: 1.38 [ 0:48:43<1 day, 0:46:06] +[titan] 2025-10-04 23:23:02,961 - root - INFO - step: 1275 loss: 3.5508 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1168 +[titan] 2025-10-04 23:23:02,961 - root - INFO - lr: 4.9946e-05 gnorm: 1.48 [ 0:48:54<1 day, 0:45:36] +[titan] 2025-10-04 23:23:13,913 - root - INFO - step: 1280 loss: 3.5362 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.4318 global_avg_mtp_loss: 3.1044 +[titan] 2025-10-04 23:23:13,913 - root - INFO - lr: 4.9945e-05 gnorm: 1.47 [ 0:49:05<1 day, 0:45:07] +[titan] 2025-10-04 23:23:24,835 - root - INFO - step: 1285 loss: 3.5593 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.4354 global_avg_mtp_loss: 3.1239 +[titan] 2025-10-04 23:23:24,835 - root - INFO - lr: 4.9944e-05 gnorm: 1.48 [ 0:49:16<1 day, 0:44:38] +[titan] 2025-10-04 23:23:35,699 - root - INFO - step: 1290 loss: 3.5751 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4357 global_avg_mtp_loss: 3.1395 +[titan] 2025-10-04 23:23:35,700 - root - INFO - lr: 4.9944e-05 gnorm: 1.42 [ 0:49:27<1 day, 0:44:07] +[titan] 2025-10-04 23:23:46,610 - root - INFO - step: 1295 loss: 3.5938 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4377 global_avg_mtp_loss: 3.1562 +[titan] 2025-10-04 23:23:46,610 - root - INFO - lr: 4.9943e-05 gnorm: 1.35 [ 0:49:38<1 day, 0:43:38] +[titan] 2025-10-04 23:23:55,310 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:23:57,488 - root - INFO - step: 1300 loss: 3.5542 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.4336 global_avg_mtp_loss: 3.1206 +[titan] 2025-10-04 23:23:57,489 - root - INFO - lr: 4.9943e-05 gnorm: 1.38 [ 0:49:49<1 day, 0:43:08] +[titan] 2025-10-04 23:24:08,378 - root - INFO - step: 1305 loss: 3.5644 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.4344 global_avg_mtp_loss: 3.1301 +[titan] 2025-10-04 23:24:08,379 - root - INFO - lr: 4.9942e-05 gnorm: 1.38 [ 0:50:00<1 day, 0:42:39] +[titan] 2025-10-04 23:24:19,247 - root - INFO - step: 1310 loss: 3.5464 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.4340 global_avg_mtp_loss: 3.1124 +[titan] 2025-10-04 23:24:19,247 - root - INFO - lr: 4.9941e-05 gnorm: 1.43 [ 0:50:11<1 day, 0:42:09] +[titan] 2025-10-04 23:24:30,161 - root - INFO - step: 1315 loss: 3.5898 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.4372 global_avg_mtp_loss: 3.1527 +[titan] 2025-10-04 23:24:30,161 - root - INFO - lr: 4.9941e-05 gnorm: 1.34 [ 0:50:21<1 day, 0:41:40] +[titan] 2025-10-04 23:24:41,039 - root - INFO - step: 1320 loss: 3.6159 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.4427 global_avg_mtp_loss: 3.1731 +[titan] 2025-10-04 23:24:41,039 - root - INFO - lr: 4.9940e-05 gnorm: 1.34 [ 0:50:32<1 day, 0:41:11] +[titan] 2025-10-04 23:24:51,938 - root - INFO - step: 1325 loss: 3.4618 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4229 global_avg_mtp_loss: 3.0389 +[titan] 2025-10-04 23:24:51,938 - root - INFO - lr: 4.9939e-05 gnorm: 1.36 [ 0:50:43<1 day, 0:40:42] +[titan] 2025-10-04 23:25:02,828 - root - INFO - step: 1330 loss: 3.5160 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4280 global_avg_mtp_loss: 3.0880 +[titan] 2025-10-04 23:25:02,829 - root - INFO - lr: 4.9939e-05 gnorm: 1.38 [ 0:50:54<1 day, 0:40:13] +[titan] 2025-10-04 23:25:13,745 - root - INFO - step: 1335 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4282 global_avg_mtp_loss: 3.0763 +[titan] 2025-10-04 23:25:13,746 - root - INFO - lr: 4.9938e-05 gnorm: 1.46 [ 0:51:05<1 day, 0:39:45] +[titan] 2025-10-04 23:25:24,642 - root - INFO - step: 1340 loss: 3.5440 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4337 global_avg_mtp_loss: 3.1103 +[titan] 2025-10-04 23:25:24,642 - root - INFO - lr: 4.9937e-05 gnorm: 1.42 [ 0:51:16<1 day, 0:39:17] +[titan] 2025-10-04 23:25:35,576 - root - INFO - step: 1345 loss: 3.6036 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.4395 global_avg_mtp_loss: 3.1641 +[titan] 2025-10-04 23:25:35,576 - root - INFO - lr: 4.9937e-05 gnorm: 1.35 [ 0:51:27<1 day, 0:38:50] +[titan] 2025-10-04 23:25:44,278 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:25:46,465 - root - INFO - step: 1350 loss: 3.5202 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4296 global_avg_mtp_loss: 3.0905 +[titan] 2025-10-04 23:25:46,465 - root - INFO - lr: 4.9936e-05 gnorm: 1.31 [ 0:51:38<1 day, 0:38:21] +[titan] 2025-10-04 23:25:57,344 - root - INFO - step: 1355 loss: 3.5459 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.4309 global_avg_mtp_loss: 3.1149 +[titan] 2025-10-04 23:25:57,345 - root - INFO - lr: 4.9935e-05 gnorm: 1.30 [ 0:51:49<1 day, 0:37:53] +[titan] 2025-10-04 23:26:08,268 - root - INFO - step: 1360 loss: 3.5720 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.4351 global_avg_mtp_loss: 3.1369 +[titan] 2025-10-04 23:26:08,269 - root - INFO - lr: 4.9935e-05 gnorm: 1.39 [ 0:52:00<1 day, 0:37:26] +[titan] 2025-10-04 23:26:19,143 - root - INFO - step: 1365 loss: 3.4497 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0288 +[titan] 2025-10-04 23:26:19,143 - root - INFO - lr: 4.9934e-05 gnorm: 1.37 [ 0:52:10<1 day, 0:36:57] +[titan] 2025-10-04 23:26:30,030 - root - INFO - step: 1370 loss: 3.5847 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.4370 global_avg_mtp_loss: 3.1477 +[titan] 2025-10-04 23:26:30,030 - root - INFO - lr: 4.9933e-05 gnorm: 1.49 [ 0:52:21<1 day, 0:36:30] +[titan] 2025-10-04 23:26:40,913 - root - INFO - step: 1375 loss: 3.4970 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.4269 global_avg_mtp_loss: 3.0701 +[titan] 2025-10-04 23:26:40,913 - root - INFO - lr: 4.9933e-05 gnorm: 1.38 [ 0:52:32<1 day, 0:36:02] +[titan] 2025-10-04 23:26:51,832 - root - INFO - step: 1380 loss: 3.4520 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 3.0312 +[titan] 2025-10-04 23:26:51,832 - root - INFO - lr: 4.9932e-05 gnorm: 1.36 [ 0:52:43<1 day, 0:35:35] +[titan] 2025-10-04 23:27:02,721 - root - INFO - step: 1385 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.4169 global_avg_mtp_loss: 3.0149 +[titan] 2025-10-04 23:27:02,721 - root - INFO - lr: 4.9931e-05 gnorm: 1.42 [ 0:52:54<1 day, 0:35:07] +[titan] 2025-10-04 23:27:13,641 - root - INFO - step: 1390 loss: 3.4046 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.4139 global_avg_mtp_loss: 2.9907 +[titan] 2025-10-04 23:27:13,641 - root - INFO - lr: 4.9931e-05 gnorm: 1.37 [ 0:53:05<1 day, 0:34:41] +[titan] 2025-10-04 23:27:24,527 - root - INFO - step: 1395 loss: 3.4971 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.4253 global_avg_mtp_loss: 3.0717 +[titan] 2025-10-04 23:27:24,527 - root - INFO - lr: 4.9930e-05 gnorm: 1.41 [ 0:53:16<1 day, 0:34:14] +[titan] 2025-10-04 23:27:33,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:27:35,394 - root - INFO - step: 1400 loss: 3.5045 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.4290 global_avg_mtp_loss: 3.0755 +[titan] 2025-10-04 23:27:35,394 - root - INFO - lr: 4.9929e-05 gnorm: 1.40 [ 0:53:27<1 day, 0:33:46] +[titan] 2025-10-04 23:27:46,287 - root - INFO - step: 1405 loss: 3.4686 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.4233 global_avg_mtp_loss: 3.0453 +[titan] 2025-10-04 23:27:46,287 - root - INFO - lr: 4.9928e-05 gnorm: 1.49 [ 0:53:38<1 day, 0:33:19] +[titan] 2025-10-04 23:27:57,198 - root - INFO - step: 1410 loss: 3.5153 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4300 global_avg_mtp_loss: 3.0853 +[titan] 2025-10-04 23:27:57,198 - root - INFO - lr: 4.9928e-05 gnorm: 1.47 [ 0:53:48<1 day, 0:32:53] +[titan] 2025-10-04 23:28:08,061 - root - INFO - step: 1415 loss: 3.4739 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4242 global_avg_mtp_loss: 3.0497 +[titan] 2025-10-04 23:28:08,061 - root - INFO - lr: 4.9927e-05 gnorm: 1.34 [ 0:53:59<1 day, 0:32:25] +[titan] 2025-10-04 23:28:18,978 - root - INFO - step: 1420 loss: 3.5053 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.4276 global_avg_mtp_loss: 3.0778 +[titan] 2025-10-04 23:28:18,978 - root - INFO - lr: 4.9926e-05 gnorm: 1.41 [ 0:54:10<1 day, 0:31:59] +[titan] 2025-10-04 23:28:29,841 - root - INFO - step: 1425 loss: 3.4083 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4170 global_avg_mtp_loss: 2.9913 +[titan] 2025-10-04 23:28:29,841 - root - INFO - lr: 4.9926e-05 gnorm: 1.42 [ 0:54:21<1 day, 0:31:32] +[titan] 2025-10-04 23:28:40,714 - root - INFO - step: 1430 loss: 3.4627 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0411 +[titan] 2025-10-04 23:28:40,714 - root - INFO - lr: 4.9925e-05 gnorm: 1.43 [ 0:54:32<1 day, 0:31:05] +[titan] 2025-10-04 23:28:51,581 - root - INFO - step: 1435 loss: 3.4919 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0684 +[titan] 2025-10-04 23:28:51,582 - root - INFO - lr: 4.9924e-05 gnorm: 1.37 [ 0:54:43<1 day, 0:30:38] +[titan] 2025-10-04 23:29:02,457 - root - INFO - step: 1440 loss: 3.4907 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.4267 global_avg_mtp_loss: 3.0640 +[titan] 2025-10-04 23:29:02,458 - root - INFO - lr: 4.9923e-05 gnorm: 1.37 [ 0:54:54<1 day, 0:30:12] +[titan] 2025-10-04 23:29:13,408 - root - INFO - step: 1445 loss: 3.4656 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.4209 global_avg_mtp_loss: 3.0447 +[titan] 2025-10-04 23:29:13,408 - root - INFO - lr: 4.9923e-05 gnorm: 1.40 [ 0:55:05<1 day, 0:29:47] +[titan] 2025-10-04 23:29:22,094 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:29:24,281 - root - INFO - step: 1450 loss: 3.4814 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4230 global_avg_mtp_loss: 3.0585 +[titan] 2025-10-04 23:29:24,281 - root - INFO - lr: 4.9922e-05 gnorm: 1.47 [ 0:55:16<1 day, 0:29:21] +[titan] 2025-10-04 23:29:35,145 - root - INFO - step: 1455 loss: 3.4419 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.4184 global_avg_mtp_loss: 3.0235 +[titan] 2025-10-04 23:29:35,145 - root - INFO - lr: 4.9921e-05 gnorm: 1.37 [ 0:55:26<1 day, 0:28:54] +[titan] 2025-10-04 23:29:46,030 - root - INFO - step: 1460 loss: 3.5546 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.4320 global_avg_mtp_loss: 3.1226 +[titan] 2025-10-04 23:29:46,030 - root - INFO - lr: 4.9920e-05 gnorm: 1.41 [ 0:55:37<1 day, 0:28:28] +[titan] 2025-10-04 23:29:56,926 - root - INFO - step: 1465 loss: 3.5290 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4295 global_avg_mtp_loss: 3.0995 +[titan] 2025-10-04 23:29:56,927 - root - INFO - lr: 4.9920e-05 gnorm: 1.36 [ 0:55:48<1 day, 0:28:03] +[titan] 2025-10-04 23:30:07,807 - root - INFO - step: 1470 loss: 3.4674 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0442 +[titan] 2025-10-04 23:30:07,807 - root - INFO - lr: 4.9919e-05 gnorm: 1.41 [ 0:55:59<1 day, 0:27:37] +[titan] 2025-10-04 23:30:18,704 - root - INFO - step: 1475 loss: 3.4400 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0220 +[titan] 2025-10-04 23:30:18,705 - root - INFO - lr: 4.9918e-05 gnorm: 1.36 [ 0:56:10<1 day, 0:27:12] +[titan] 2025-10-04 23:30:29,561 - root - INFO - step: 1480 loss: 3.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4228 global_avg_mtp_loss: 3.0463 +[titan] 2025-10-04 23:30:29,562 - root - INFO - lr: 4.9917e-05 gnorm: 1.30 [ 0:56:21<1 day, 0:26:46] +[titan] 2025-10-04 23:30:40,438 - root - INFO - step: 1485 loss: 3.4861 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4263 global_avg_mtp_loss: 3.0598 +[titan] 2025-10-04 23:30:40,438 - root - INFO - lr: 4.9917e-05 gnorm: 1.35 [ 0:56:32<1 day, 0:26:20] +[titan] 2025-10-04 23:30:51,302 - root - INFO - step: 1490 loss: 3.4181 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4151 global_avg_mtp_loss: 3.0030 +[titan] 2025-10-04 23:30:51,303 - root - INFO - lr: 4.9916e-05 gnorm: 1.47 [ 0:56:43<1 day, 0:25:54] +[titan] 2025-10-04 23:31:02,175 - root - INFO - step: 1495 loss: 3.4587 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4192 global_avg_mtp_loss: 3.0394 +[titan] 2025-10-04 23:31:02,176 - root - INFO - lr: 4.9915e-05 gnorm: 1.30 [ 0:56:53<1 day, 0:25:29] +[titan] 2025-10-04 23:31:10,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:31:13,053 - root - INFO - step: 1500 loss: 3.4454 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.4203 global_avg_mtp_loss: 3.0251 +[titan] 2025-10-04 23:31:13,053 - root - INFO - lr: 4.9914e-05 gnorm: 1.32 [ 0:57:04<1 day, 0:25:03] +[titan] 2025-10-04 23:31:23,959 - root - INFO - step: 1505 loss: 3.5094 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.4278 global_avg_mtp_loss: 3.0816 +[titan] 2025-10-04 23:31:23,959 - root - INFO - lr: 4.9913e-05 gnorm: 1.39 [ 0:57:15<1 day, 0:24:39] +[titan] 2025-10-04 23:31:34,816 - root - INFO - step: 1510 loss: 3.4203 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.4208 global_avg_mtp_loss: 2.9996 +[titan] 2025-10-04 23:31:34,816 - root - INFO - lr: 4.9913e-05 gnorm: 1.40 [ 0:57:26<1 day, 0:24:13] +[titan] 2025-10-04 23:31:45,697 - root - INFO - step: 1515 loss: 3.4819 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.4246 global_avg_mtp_loss: 3.0574 +[titan] 2025-10-04 23:31:45,697 - root - INFO - lr: 4.9912e-05 gnorm: 1.42 [ 0:57:37<1 day, 0:23:48] +[titan] 2025-10-04 23:31:56,581 - root - INFO - step: 1520 loss: 3.4715 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0513 +[titan] 2025-10-04 23:31:56,581 - root - INFO - lr: 4.9911e-05 gnorm: 1.54 [ 0:57:48<1 day, 0:23:24] +[titan] 2025-10-04 23:32:07,443 - root - INFO - step: 1525 loss: 3.3887 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.4123 global_avg_mtp_loss: 2.9763 +[titan] 2025-10-04 23:32:07,443 - root - INFO - lr: 4.9910e-05 gnorm: 1.50 [ 0:57:59<1 day, 0:22:58] +[titan] 2025-10-04 23:32:18,324 - root - INFO - step: 1530 loss: 3.4137 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4161 global_avg_mtp_loss: 2.9977 +[titan] 2025-10-04 23:32:18,324 - root - INFO - lr: 4.9909e-05 gnorm: 1.39 [ 0:58:10<1 day, 0:22:34] +[titan] 2025-10-04 23:32:29,266 - root - INFO - step: 1535 loss: 3.4241 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.4172 global_avg_mtp_loss: 3.0069 +[titan] 2025-10-04 23:32:29,266 - root - INFO - lr: 4.9909e-05 gnorm: 1.38 [ 0:58:21<1 day, 0:22:11] +[titan] 2025-10-04 23:32:31,601 - root - INFO - Dumping profiler traces at step 1536 +[titan] 2025-10-04 23:32:31,635 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-04 23:32:40,423 - root - INFO - step: 1540 loss: 3.4722 memory: 118.84GiB(85.28%) tps: 29,370 tflops: 407.47 mfu: 41.20% global_avg_ntp_loss: 0.4232 global_avg_mtp_loss: 3.0490 +[titan] 2025-10-04 23:32:40,423 - root - INFO - lr: 4.9908e-05 gnorm: 1.48 [ 0:58:32<1 day, 0:21:53] +[titan] 2025-10-04 23:32:51,288 - root - INFO - step: 1545 loss: 3.4793 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4234 global_avg_mtp_loss: 3.0559 +[titan] 2025-10-04 23:32:51,288 - root - INFO - lr: 4.9907e-05 gnorm: 1.37 [ 0:58:43<1 day, 0:21:28] +[titan] 2025-10-04 23:32:59,970 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:33:02,152 - root - INFO - step: 1550 loss: 3.4035 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.4133 global_avg_mtp_loss: 2.9902 +[titan] 2025-10-04 23:33:02,152 - root - INFO - lr: 4.9906e-05 gnorm: 1.32 [ 0:58:53<1 day, 0:21:04] +[titan] 2025-10-04 23:33:13,032 - root - INFO - step: 1555 loss: 3.4850 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.4225 global_avg_mtp_loss: 3.0625 +[titan] 2025-10-04 23:33:13,032 - root - INFO - lr: 4.9905e-05 gnorm: 1.34 [ 0:59:04<1 day, 0:20:39] +[titan] 2025-10-04 23:33:23,946 - root - INFO - step: 1560 loss: 3.5272 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.4289 global_avg_mtp_loss: 3.0982 +[titan] 2025-10-04 23:33:23,946 - root - INFO - lr: 4.9905e-05 gnorm: 1.37 [ 0:59:15<1 day, 0:20:16] +[titan] 2025-10-04 23:33:34,861 - root - INFO - step: 1565 loss: 3.5253 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.4294 global_avg_mtp_loss: 3.0959 +[titan] 2025-10-04 23:33:34,861 - root - INFO - lr: 4.9904e-05 gnorm: 1.37 [ 0:59:26<1 day, 0:19:53] +[titan] 2025-10-04 23:33:45,801 - root - INFO - step: 1570 loss: 3.4320 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.4173 global_avg_mtp_loss: 3.0147 +[titan] 2025-10-04 23:33:45,801 - root - INFO - lr: 4.9903e-05 gnorm: 1.35 [ 0:59:37<1 day, 0:19:30] +[titan] 2025-10-04 23:33:56,697 - root - INFO - step: 1575 loss: 3.4044 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.4154 global_avg_mtp_loss: 2.9890 +[titan] 2025-10-04 23:33:56,697 - root - INFO - lr: 4.9902e-05 gnorm: 1.32 [ 0:59:48<1 day, 0:19:07] +[titan] 2025-10-04 23:34:07,560 - root - INFO - step: 1580 loss: 3.5820 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4484 global_avg_mtp_loss: 3.1336 +[titan] 2025-10-04 23:34:07,560 - root - INFO - lr: 4.9901e-05 gnorm: 1.32 [ 0:59:59<1 day, 0:18:42] +[titan] 2025-10-04 23:34:18,478 - root - INFO - step: 1585 loss: 3.3932 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.4134 global_avg_mtp_loss: 2.9798 +[titan] 2025-10-04 23:34:18,479 - root - INFO - lr: 4.9900e-05 gnorm: 1.40 [ 1:00:10<1 day, 0:18:19] +[titan] 2025-10-04 23:34:29,342 - root - INFO - step: 1590 loss: 3.4358 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.4195 global_avg_mtp_loss: 3.0163 +[titan] 2025-10-04 23:34:29,342 - root - INFO - lr: 4.9900e-05 gnorm: 1.38 [ 1:00:21<1 day, 0:17:55] +[titan] 2025-10-04 23:34:40,218 - root - INFO - step: 1595 loss: 3.3310 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.4056 global_avg_mtp_loss: 2.9254 +[titan] 2025-10-04 23:34:40,218 - root - INFO - lr: 4.9899e-05 gnorm: 1.38 [ 1:00:31<1 day, 0:17:32] +[titan] 2025-10-04 23:34:48,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:34:51,087 - root - INFO - step: 1600 loss: 3.4555 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.4196 global_avg_mtp_loss: 3.0358 +[titan] 2025-10-04 23:34:51,088 - root - INFO - lr: 4.9898e-05 gnorm: 1.39 [ 1:00:42<1 day, 0:17:08] +[titan] 2025-10-04 23:35:01,992 - root - INFO - step: 1605 loss: 3.4766 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.4235 global_avg_mtp_loss: 3.0531 +[titan] 2025-10-04 23:35:01,992 - root - INFO - lr: 4.9897e-05 gnorm: 1.33 [ 1:00:53<1 day, 0:16:45] +[titan] 2025-10-04 23:35:12,867 - root - INFO - step: 1610 loss: 3.3824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4111 global_avg_mtp_loss: 2.9713 +[titan] 2025-10-04 23:35:12,867 - root - INFO - lr: 4.9896e-05 gnorm: 1.41 [ 1:01:04<1 day, 0:16:22] +[titan] 2025-10-04 23:35:23,778 - root - INFO - step: 1615 loss: 3.4363 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.4168 global_avg_mtp_loss: 3.0195 +[titan] 2025-10-04 23:35:23,779 - root - INFO - lr: 4.9895e-05 gnorm: 1.27 [ 1:01:15<1 day, 0:15:59] +[titan] 2025-10-04 23:35:34,649 - root - INFO - step: 1620 loss: 3.3175 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.4028 global_avg_mtp_loss: 2.9147 +[titan] 2025-10-04 23:35:34,649 - root - INFO - lr: 4.9895e-05 gnorm: 1.32 [ 1:01:26<1 day, 0:15:36] +[titan] 2025-10-04 23:35:45,526 - root - INFO - step: 1625 loss: 3.3715 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4086 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:35:45,527 - root - INFO - lr: 4.9894e-05 gnorm: 1.41 [ 1:01:37<1 day, 0:15:12] +[titan] 2025-10-04 23:35:56,405 - root - INFO - step: 1630 loss: 3.3383 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4038 global_avg_mtp_loss: 2.9345 +[titan] 2025-10-04 23:35:56,405 - root - INFO - lr: 4.9893e-05 gnorm: 1.32 [ 1:01:48<1 day, 0:14:49] +[titan] 2025-10-04 23:36:07,309 - root - INFO - step: 1635 loss: 3.4176 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.16% global_avg_ntp_loss: 0.4148 global_avg_mtp_loss: 3.0028 +[titan] 2025-10-04 23:36:07,309 - root - INFO - lr: 4.9892e-05 gnorm: 1.40 [ 1:01:59<1 day, 0:14:27] +[titan] 2025-10-04 23:36:18,303 - root - INFO - step: 1640 loss: 3.3374 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.4052 global_avg_mtp_loss: 2.9322 +[titan] 2025-10-04 23:36:18,304 - root - INFO - lr: 4.9891e-05 gnorm: 1.45 [ 1:02:10<1 day, 0:14:07] +[titan] 2025-10-04 23:36:29,175 - root - INFO - step: 1645 loss: 3.4862 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.4238 global_avg_mtp_loss: 3.0624 +[titan] 2025-10-04 23:36:29,175 - root - INFO - lr: 4.9890e-05 gnorm: 1.49 [ 1:02:20<1 day, 0:13:44] +[titan] 2025-10-04 23:36:37,867 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:36:40,054 - root - INFO - step: 1650 loss: 3.2615 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8657 +[titan] 2025-10-04 23:36:40,054 - root - INFO - lr: 4.9889e-05 gnorm: 1.45 [ 1:02:31<1 day, 0:13:21] +[titan] 2025-10-04 23:36:50,937 - root - INFO - step: 1655 loss: 3.4016 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.4127 global_avg_mtp_loss: 2.9889 +[titan] 2025-10-04 23:36:50,937 - root - INFO - lr: 4.9888e-05 gnorm: 1.34 [ 1:02:42<1 day, 0:12:58] +[titan] 2025-10-04 23:37:01,815 - root - INFO - step: 1660 loss: 3.3760 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.4106 global_avg_mtp_loss: 2.9654 +[titan] 2025-10-04 23:37:01,815 - root - INFO - lr: 4.9888e-05 gnorm: 1.33 [ 1:02:53<1 day, 0:12:35] +[titan] 2025-10-04 23:37:12,722 - root - INFO - step: 1665 loss: 3.3861 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.4119 global_avg_mtp_loss: 2.9742 +[titan] 2025-10-04 23:37:12,722 - root - INFO - lr: 4.9887e-05 gnorm: 1.28 [ 1:03:04<1 day, 0:12:13] +[titan] 2025-10-04 23:37:23,672 - root - INFO - step: 1670 loss: 3.3993 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.20 mfu: 41.98% global_avg_ntp_loss: 0.4125 global_avg_mtp_loss: 2.9867 +[titan] 2025-10-04 23:37:23,672 - root - INFO - lr: 4.9886e-05 gnorm: 1.29 [ 1:03:15<1 day, 0:11:53] +[titan] 2025-10-04 23:37:34,543 - root - INFO - step: 1675 loss: 3.3445 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9396 +[titan] 2025-10-04 23:37:34,543 - root - INFO - lr: 4.9885e-05 gnorm: 1.45 [ 1:03:26<1 day, 0:11:30] +[titan] 2025-10-04 23:37:45,421 - root - INFO - step: 1680 loss: 3.4052 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4135 global_avg_mtp_loss: 2.9917 +[titan] 2025-10-04 23:37:45,421 - root - INFO - lr: 4.9884e-05 gnorm: 1.41 [ 1:03:37<1 day, 0:11:07] +[titan] 2025-10-04 23:37:56,304 - root - INFO - step: 1685 loss: 3.3465 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.4066 global_avg_mtp_loss: 2.9399 +[titan] 2025-10-04 23:37:56,305 - root - INFO - lr: 4.9883e-05 gnorm: 1.35 [ 1:03:48<1 day, 0:10:45] +[titan] 2025-10-04 23:38:07,165 - root - INFO - step: 1690 loss: 3.4157 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.4162 global_avg_mtp_loss: 2.9995 +[titan] 2025-10-04 23:38:07,165 - root - INFO - lr: 4.9882e-05 gnorm: 1.31 [ 1:03:58<1 day, 0:10:23] +[titan] 2025-10-04 23:38:18,032 - root - INFO - step: 1695 loss: 3.3211 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.4037 global_avg_mtp_loss: 2.9174 +[titan] 2025-10-04 23:38:18,032 - root - INFO - lr: 4.9881e-05 gnorm: 1.27 [ 1:04:09<1 day, 0:10:00] +[titan] 2025-10-04 23:38:26,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:38:28,977 - root - INFO - step: 1700 loss: 3.4333 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.4180 global_avg_mtp_loss: 3.0154 +[titan] 2025-10-04 23:38:28,977 - root - INFO - lr: 4.9880e-05 gnorm: 1.47 [ 1:04:20<1 day, 0:09:39] +[titan] 2025-10-04 23:38:39,826 - root - INFO - step: 1705 loss: 3.3912 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.4113 global_avg_mtp_loss: 2.9799 +[titan] 2025-10-04 23:38:39,826 - root - INFO - lr: 4.9879e-05 gnorm: 1.35 [ 1:04:31<1 day, 0:09:17] +[titan] 2025-10-04 23:38:50,670 - root - INFO - step: 1710 loss: 3.4317 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.4158 global_avg_mtp_loss: 3.0159 +[titan] 2025-10-04 23:38:50,670 - root - INFO - lr: 4.9878e-05 gnorm: 1.41 [ 1:04:42<1 day, 0:08:54] +[titan] 2025-10-04 23:39:01,517 - root - INFO - step: 1715 loss: 3.4588 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.4202 global_avg_mtp_loss: 3.0386 +[titan] 2025-10-04 23:39:01,517 - root - INFO - lr: 4.9877e-05 gnorm: 1.41 [ 1:04:53<1 day, 0:08:31] +[titan] 2025-10-04 23:39:12,377 - root - INFO - step: 1720 loss: 3.3718 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.4092 global_avg_mtp_loss: 2.9625 +[titan] 2025-10-04 23:39:12,377 - root - INFO - lr: 4.9877e-05 gnorm: 1.24 [ 1:05:04<1 day, 0:08:09] +[titan] 2025-10-04 23:39:23,301 - root - INFO - step: 1725 loss: 3.3446 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.4100 global_avg_mtp_loss: 2.9346 +[titan] 2025-10-04 23:39:23,301 - root - INFO - lr: 4.9876e-05 gnorm: 1.27 [ 1:05:15<1 day, 0:07:48] +[titan] 2025-10-04 23:39:34,194 - root - INFO - step: 1730 loss: 3.4582 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.4216 global_avg_mtp_loss: 3.0367 +[titan] 2025-10-04 23:39:34,195 - root - INFO - lr: 4.9875e-05 gnorm: 1.32 [ 1:05:25<1 day, 0:07:27] +[titan] 2025-10-04 23:39:45,081 - root - INFO - step: 1735 loss: 3.4372 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.4157 global_avg_mtp_loss: 3.0215 +[titan] 2025-10-04 23:39:45,081 - root - INFO - lr: 4.9874e-05 gnorm: 1.37 [ 1:05:36<1 day, 0:07:05] +[titan] 2025-10-04 23:39:55,972 - root - INFO - step: 1740 loss: 3.3532 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9463 +[titan] 2025-10-04 23:39:55,972 - root - INFO - lr: 4.9873e-05 gnorm: 1.36 [ 1:05:47<1 day, 0:06:44] +[titan] 2025-10-04 23:40:06,852 - root - INFO - step: 1745 loss: 3.3083 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.4022 global_avg_mtp_loss: 2.9061 +[titan] 2025-10-04 23:40:06,853 - root - INFO - lr: 4.9872e-05 gnorm: 1.33 [ 1:05:58<1 day, 0:06:22] +[titan] 2025-10-04 23:40:15,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:40:17,731 - root - INFO - step: 1750 loss: 3.4480 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.4191 global_avg_mtp_loss: 3.0289 +[titan] 2025-10-04 23:40:17,731 - root - INFO - lr: 4.9871e-05 gnorm: 1.35 [ 1:06:09<1 day, 0:06:01] +[titan] 2025-10-04 23:40:28,641 - root - INFO - step: 1755 loss: 3.3860 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.4107 global_avg_mtp_loss: 2.9753 +[titan] 2025-10-04 23:40:28,641 - root - INFO - lr: 4.9870e-05 gnorm: 1.31 [ 1:06:20<1 day, 0:05:40] +[titan] 2025-10-04 23:40:39,515 - root - INFO - step: 1760 loss: 3.3596 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9525 +[titan] 2025-10-04 23:40:39,516 - root - INFO - lr: 4.9869e-05 gnorm: 1.44 [ 1:06:31<1 day, 0:05:19] +[titan] 2025-10-04 23:40:50,423 - root - INFO - step: 1765 loss: 3.2984 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3986 global_avg_mtp_loss: 2.8998 +[titan] 2025-10-04 23:40:50,423 - root - INFO - lr: 4.9868e-05 gnorm: 1.40 [ 1:06:42<1 day, 0:04:58] +[titan] 2025-10-04 23:41:01,295 - root - INFO - step: 1770 loss: 3.3670 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.4093 global_avg_mtp_loss: 2.9577 +[titan] 2025-10-04 23:41:01,295 - root - INFO - lr: 4.9867e-05 gnorm: 1.37 [ 1:06:53<1 day, 0:04:37] +[titan] 2025-10-04 23:41:12,156 - root - INFO - step: 1775 loss: 3.3745 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.4116 global_avg_mtp_loss: 2.9629 +[titan] 2025-10-04 23:41:12,156 - root - INFO - lr: 4.9866e-05 gnorm: 1.36 [ 1:07:03<1 day, 0:04:15] +[titan] 2025-10-04 23:41:23,073 - root - INFO - step: 1780 loss: 3.2774 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:41:23,074 - root - INFO - lr: 4.9865e-05 gnorm: 1.44 [ 1:07:14<1 day, 0:03:55] +[titan] 2025-10-04 23:41:33,936 - root - INFO - step: 1785 loss: 3.3608 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.4071 global_avg_mtp_loss: 2.9538 +[titan] 2025-10-04 23:41:33,936 - root - INFO - lr: 4.9864e-05 gnorm: 1.39 [ 1:07:25<1 day, 0:03:33] +[titan] 2025-10-04 23:41:44,811 - root - INFO - step: 1790 loss: 3.3548 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9479 +[titan] 2025-10-04 23:41:44,812 - root - INFO - lr: 4.9863e-05 gnorm: 1.46 [ 1:07:36<1 day, 0:03:12] +[titan] 2025-10-04 23:41:55,714 - root - INFO - step: 1795 loss: 3.4000 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.4121 global_avg_mtp_loss: 2.9879 +[titan] 2025-10-04 23:41:55,715 - root - INFO - lr: 4.9862e-05 gnorm: 1.53 [ 1:07:47<1 day, 0:02:52] +[titan] 2025-10-04 23:42:04,388 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:42:06,574 - root - INFO - step: 1800 loss: 3.3948 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.4124 global_avg_mtp_loss: 2.9824 +[titan] 2025-10-04 23:42:06,574 - root - INFO - lr: 4.9861e-05 gnorm: 1.37 [ 1:07:58<1 day, 0:02:30] +[titan] 2025-10-04 23:42:17,436 - root - INFO - step: 1805 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.4017 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:42:17,437 - root - INFO - lr: 4.9860e-05 gnorm: 1.29 [ 1:08:09<1 day, 0:02:09] +[titan] 2025-10-04 23:42:28,375 - root - INFO - step: 1810 loss: 3.3561 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:42:28,375 - root - INFO - lr: 4.9859e-05 gnorm: 1.39 [ 1:08:20<1 day, 0:01:50] +[titan] 2025-10-04 23:42:39,216 - root - INFO - step: 1815 loss: 3.3053 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.3995 global_avg_mtp_loss: 2.9058 +[titan] 2025-10-04 23:42:39,217 - root - INFO - lr: 4.9858e-05 gnorm: 1.34 [ 1:08:30<1 day, 0:01:28] +[titan] 2025-10-04 23:42:50,059 - root - INFO - step: 1820 loss: 3.2854 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8887 +[titan] 2025-10-04 23:42:50,059 - root - INFO - lr: 4.9857e-05 gnorm: 1.37 [ 1:08:41<1 day, 0:01:07] +[titan] 2025-10-04 23:43:00,958 - root - INFO - step: 1825 loss: 3.3393 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.4035 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:00,958 - root - INFO - lr: 4.9856e-05 gnorm: 1.37 [ 1:08:52<1 day, 0:00:47] +[titan] 2025-10-04 23:43:11,802 - root - INFO - step: 1830 loss: 3.3421 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.4062 global_avg_mtp_loss: 2.9359 +[titan] 2025-10-04 23:43:11,802 - root - INFO - lr: 4.9855e-05 gnorm: 1.36 [ 1:09:03<1 day, 0:00:25] +[titan] 2025-10-04 23:43:22,644 - root - INFO - step: 1835 loss: 3.3492 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.4055 global_avg_mtp_loss: 2.9437 +[titan] 2025-10-04 23:43:22,645 - root - INFO - lr: 4.9854e-05 gnorm: 1.32 [ 1:09:14<1 day, 0:00:04] +[titan] 2025-10-04 23:43:33,561 - root - INFO - step: 1840 loss: 3.2612 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3942 global_avg_mtp_loss: 2.8670 +[titan] 2025-10-04 23:43:33,562 - root - INFO - lr: 4.9853e-05 gnorm: 1.27 [ 1:09:25<23:59:44] +[titan] 2025-10-04 23:43:44,438 - root - INFO - step: 1845 loss: 3.3605 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.4069 global_avg_mtp_loss: 2.9537 +[titan] 2025-10-04 23:43:44,438 - root - INFO - lr: 4.9852e-05 gnorm: 1.27 [ 1:09:36<23:59:24] +[titan] 2025-10-04 23:43:53,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:43:55,315 - root - INFO - step: 1850 loss: 3.3556 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.4063 global_avg_mtp_loss: 2.9493 +[titan] 2025-10-04 23:43:55,315 - root - INFO - lr: 4.9851e-05 gnorm: 1.32 [ 1:09:47<23:59:03] +[titan] 2025-10-04 23:44:06,182 - root - INFO - step: 1855 loss: 3.3162 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.4016 global_avg_mtp_loss: 2.9146 +[titan] 2025-10-04 23:44:06,182 - root - INFO - lr: 4.9850e-05 gnorm: 1.40 [ 1:09:57<23:58:43] +[titan] 2025-10-04 23:44:17,099 - root - INFO - step: 1860 loss: 3.3782 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.4085 global_avg_mtp_loss: 2.9697 +[titan] 2025-10-04 23:44:17,099 - root - INFO - lr: 4.9849e-05 gnorm: 1.35 [ 1:10:08<23:58:23] +[titan] 2025-10-04 23:44:28,008 - root - INFO - step: 1865 loss: 3.2855 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3997 global_avg_mtp_loss: 2.8857 +[titan] 2025-10-04 23:44:28,009 - root - INFO - lr: 4.9848e-05 gnorm: 1.35 [ 1:10:19<23:58:04] +[titan] 2025-10-04 23:44:38,889 - root - INFO - step: 1870 loss: 3.3023 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.9043 +[titan] 2025-10-04 23:44:38,889 - root - INFO - lr: 4.9847e-05 gnorm: 1.24 [ 1:10:30<23:57:44] +[titan] 2025-10-04 23:44:49,776 - root - INFO - step: 1875 loss: 3.3134 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.4008 global_avg_mtp_loss: 2.9126 +[titan] 2025-10-04 23:44:49,776 - root - INFO - lr: 4.9846e-05 gnorm: 1.32 [ 1:10:41<23:57:24] +[titan] 2025-10-04 23:45:00,642 - root - INFO - step: 1880 loss: 3.2097 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8207 +[titan] 2025-10-04 23:45:00,642 - root - INFO - lr: 4.9845e-05 gnorm: 1.33 [ 1:10:52<23:57:03] +[titan] 2025-10-04 23:45:11,496 - root - INFO - step: 1885 loss: 3.2568 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8621 +[titan] 2025-10-04 23:45:11,497 - root - INFO - lr: 4.9844e-05 gnorm: 1.34 [ 1:11:03<23:56:43] +[titan] 2025-10-04 23:45:22,417 - root - INFO - step: 1890 loss: 3.3180 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.4019 global_avg_mtp_loss: 2.9160 +[titan] 2025-10-04 23:45:22,417 - root - INFO - lr: 4.9843e-05 gnorm: 1.39 [ 1:11:14<23:56:24] +[titan] 2025-10-04 23:45:33,318 - root - INFO - step: 1895 loss: 3.2706 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3955 global_avg_mtp_loss: 2.8752 +[titan] 2025-10-04 23:45:33,318 - root - INFO - lr: 4.9842e-05 gnorm: 1.50 [ 1:11:25<23:56:04] +[titan] 2025-10-04 23:45:41,992 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:45:44,171 - root - INFO - step: 1900 loss: 3.2793 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8827 +[titan] 2025-10-04 23:45:44,171 - root - INFO - lr: 4.9841e-05 gnorm: 1.29 [ 1:11:35<23:55:44] +[titan] 2025-10-04 23:45:55,048 - root - INFO - step: 1905 loss: 3.3144 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.4029 global_avg_mtp_loss: 2.9115 +[titan] 2025-10-04 23:45:55,048 - root - INFO - lr: 4.9840e-05 gnorm: 1.32 [ 1:11:46<23:55:24] +[titan] 2025-10-04 23:46:05,920 - root - INFO - step: 1910 loss: 3.2864 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3983 global_avg_mtp_loss: 2.8881 +[titan] 2025-10-04 23:46:05,920 - root - INFO - lr: 4.9839e-05 gnorm: 1.32 [ 1:11:57<23:55:04] +[titan] 2025-10-04 23:46:16,784 - root - INFO - step: 1915 loss: 3.2475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8558 +[titan] 2025-10-04 23:46:16,785 - root - INFO - lr: 4.9837e-05 gnorm: 1.28 [ 1:12:08<23:54:44] +[titan] 2025-10-04 23:46:27,699 - root - INFO - step: 1920 loss: 3.3007 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.11% global_avg_ntp_loss: 0.3987 global_avg_mtp_loss: 2.9020 +[titan] 2025-10-04 23:46:27,700 - root - INFO - lr: 4.9836e-05 gnorm: 1.39 [ 1:12:19<23:54:25] +[titan] 2025-10-04 23:46:38,626 - root - INFO - step: 1925 loss: 3.2659 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3951 global_avg_mtp_loss: 2.8709 +[titan] 2025-10-04 23:46:38,626 - root - INFO - lr: 4.9835e-05 gnorm: 1.32 [ 1:12:30<23:54:06] +[titan] 2025-10-04 23:46:49,497 - root - INFO - step: 1930 loss: 3.2880 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3967 global_avg_mtp_loss: 2.8914 +[titan] 2025-10-04 23:46:49,497 - root - INFO - lr: 4.9834e-05 gnorm: 1.31 [ 1:12:41<23:53:46] +[titan] 2025-10-04 23:47:00,373 - root - INFO - step: 1935 loss: 3.2719 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3965 global_avg_mtp_loss: 2.8754 +[titan] 2025-10-04 23:47:00,374 - root - INFO - lr: 4.9833e-05 gnorm: 1.33 [ 1:12:52<23:53:27] +[titan] 2025-10-04 23:47:11,263 - root - INFO - step: 1940 loss: 3.3395 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.4048 global_avg_mtp_loss: 2.9347 +[titan] 2025-10-04 23:47:11,263 - root - INFO - lr: 4.9832e-05 gnorm: 1.41 [ 1:13:02<23:53:08] +[titan] 2025-10-04 23:47:22,129 - root - INFO - step: 1945 loss: 3.2947 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8976 +[titan] 2025-10-04 23:47:22,130 - root - INFO - lr: 4.9831e-05 gnorm: 1.48 [ 1:13:13<23:52:48] +[titan] 2025-10-04 23:47:30,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:47:33,036 - root - INFO - step: 1950 loss: 3.3613 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.4054 global_avg_mtp_loss: 2.9558 +[titan] 2025-10-04 23:47:33,037 - root - INFO - lr: 4.9830e-05 gnorm: 1.34 [ 1:13:24<23:52:29] +[titan] 2025-10-04 23:47:43,944 - root - INFO - step: 1955 loss: 3.2920 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3990 global_avg_mtp_loss: 2.8929 +[titan] 2025-10-04 23:47:43,944 - root - INFO - lr: 4.9829e-05 gnorm: 1.29 [ 1:13:35<23:52:10] +[titan] 2025-10-04 23:47:54,843 - root - INFO - step: 1960 loss: 3.2473 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3941 global_avg_mtp_loss: 2.8533 +[titan] 2025-10-04 23:47:54,843 - root - INFO - lr: 4.9828e-05 gnorm: 1.30 [ 1:13:46<23:51:51] +[titan] 2025-10-04 23:48:05,717 - root - INFO - step: 1965 loss: 3.2766 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3954 global_avg_mtp_loss: 2.8812 +[titan] 2025-10-04 23:48:05,717 - root - INFO - lr: 4.9827e-05 gnorm: 1.23 [ 1:13:57<23:51:32] +[titan] 2025-10-04 23:48:16,623 - root - INFO - step: 1970 loss: 3.2148 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3891 global_avg_mtp_loss: 2.8257 +[titan] 2025-10-04 23:48:16,623 - root - INFO - lr: 4.9825e-05 gnorm: 1.38 [ 1:14:08<23:51:13] +[titan] 2025-10-04 23:48:27,497 - root - INFO - step: 1975 loss: 3.2117 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3887 global_avg_mtp_loss: 2.8230 +[titan] 2025-10-04 23:48:27,497 - root - INFO - lr: 4.9824e-05 gnorm: 1.35 [ 1:14:19<23:50:54] +[titan] 2025-10-04 23:48:38,417 - root - INFO - step: 1980 loss: 3.3095 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.4021 global_avg_mtp_loss: 2.9075 +[titan] 2025-10-04 23:48:38,417 - root - INFO - lr: 4.9823e-05 gnorm: 1.35 [ 1:14:30<23:50:35] +[titan] 2025-10-04 23:48:49,319 - root - INFO - step: 1985 loss: 3.2797 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3979 global_avg_mtp_loss: 2.8817 +[titan] 2025-10-04 23:48:49,320 - root - INFO - lr: 4.9822e-05 gnorm: 1.26 [ 1:14:41<23:50:17] +[titan] 2025-10-04 23:49:00,192 - root - INFO - step: 1990 loss: 3.3317 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.4032 global_avg_mtp_loss: 2.9285 +[titan] 2025-10-04 23:49:00,193 - root - INFO - lr: 4.9821e-05 gnorm: 1.36 [ 1:14:51<23:49:57] +[titan] 2025-10-04 23:49:11,083 - root - INFO - step: 1995 loss: 3.2394 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-04 23:49:11,084 - root - INFO - lr: 4.9820e-05 gnorm: 1.25 [ 1:15:02<23:49:39] +[titan] 2025-10-04 23:49:19,763 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:49:21,941 - root - INFO - step: 2000 loss: 3.2905 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.3991 global_avg_mtp_loss: 2.8913 +[titan] 2025-10-04 23:49:21,941 - root - INFO - lr: 4.9819e-05 gnorm: 1.41 [ 1:15:13<23:49:19] +[titan] 2025-10-04 23:49:32,868 - root - INFO - step: 2005 loss: 3.2217 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8299 +[titan] 2025-10-04 23:49:32,868 - root - INFO - lr: 4.9818e-05 gnorm: 1.41 [ 1:15:24<23:49:01] +[titan] 2025-10-04 23:49:43,749 - root - INFO - step: 2010 loss: 3.2369 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3913 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:49:43,750 - root - INFO - lr: 4.9816e-05 gnorm: 1.33 [ 1:15:35<23:48:42] +[titan] 2025-10-04 23:49:54,661 - root - INFO - step: 2015 loss: 3.2498 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3947 global_avg_mtp_loss: 2.8551 +[titan] 2025-10-04 23:49:54,661 - root - INFO - lr: 4.9815e-05 gnorm: 1.34 [ 1:15:46<23:48:24] +[titan] 2025-10-04 23:50:05,578 - root - INFO - step: 2020 loss: 3.2711 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3939 global_avg_mtp_loss: 2.8772 +[titan] 2025-10-04 23:50:05,579 - root - INFO - lr: 4.9814e-05 gnorm: 1.36 [ 1:15:57<23:48:06] +[titan] 2025-10-04 23:50:16,459 - root - INFO - step: 2025 loss: 3.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3962 global_avg_mtp_loss: 2.8751 +[titan] 2025-10-04 23:50:16,459 - root - INFO - lr: 4.9813e-05 gnorm: 1.26 [ 1:16:08<23:47:47] +[titan] 2025-10-04 23:50:27,328 - root - INFO - step: 2030 loss: 3.2606 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3932 global_avg_mtp_loss: 2.8674 +[titan] 2025-10-04 23:50:27,329 - root - INFO - lr: 4.9812e-05 gnorm: 1.27 [ 1:16:19<23:47:28] +[titan] 2025-10-04 23:50:38,283 - root - INFO - step: 2035 loss: 3.3063 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3996 global_avg_mtp_loss: 2.9067 +[titan] 2025-10-04 23:50:38,284 - root - INFO - lr: 4.9811e-05 gnorm: 1.35 [ 1:16:30<23:47:11] +[titan] 2025-10-04 23:50:49,166 - root - INFO - step: 2040 loss: 3.1900 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3852 global_avg_mtp_loss: 2.8048 +[titan] 2025-10-04 23:50:49,166 - root - INFO - lr: 4.9810e-05 gnorm: 1.37 [ 1:16:40<23:46:52] +[titan] 2025-10-04 23:51:00,136 - root - INFO - step: 2045 loss: 3.2396 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.3910 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:00,136 - root - INFO - lr: 4.9808e-05 gnorm: 1.30 [ 1:16:51<23:46:35] +[titan] 2025-10-04 23:51:06,836 - root - INFO - Dumping profiler traces at step 2048 +[titan] 2025-10-04 23:51:06,873 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-04 23:51:09,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:51:11,279 - root - INFO - step: 2050 loss: 3.2428 memory: 118.84GiB(85.28%) tps: 29,407 tflops: 407.98 mfu: 41.25% global_avg_ntp_loss: 0.3943 global_avg_mtp_loss: 2.8486 +[titan] 2025-10-04 23:51:11,280 - root - INFO - lr: 4.9807e-05 gnorm: 1.39 [ 1:17:02<23:46:21] +[titan] 2025-10-04 23:51:22,173 - root - INFO - step: 2055 loss: 3.3541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.4049 global_avg_mtp_loss: 2.9491 +[titan] 2025-10-04 23:51:22,173 - root - INFO - lr: 4.9806e-05 gnorm: 1.42 [ 1:17:13<23:46:03] +[titan] 2025-10-04 23:51:33,068 - root - INFO - step: 2060 loss: 3.2810 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3960 global_avg_mtp_loss: 2.8850 +[titan] 2025-10-04 23:51:33,069 - root - INFO - lr: 4.9805e-05 gnorm: 1.33 [ 1:17:24<23:45:45] +[titan] 2025-10-04 23:51:43,943 - root - INFO - step: 2065 loss: 3.2366 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3909 global_avg_mtp_loss: 2.8457 +[titan] 2025-10-04 23:51:43,943 - root - INFO - lr: 4.9804e-05 gnorm: 1.45 [ 1:17:35<23:45:26] +[titan] 2025-10-04 23:51:54,802 - root - INFO - step: 2070 loss: 3.2400 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3911 global_avg_mtp_loss: 2.8489 +[titan] 2025-10-04 23:51:54,802 - root - INFO - lr: 4.9803e-05 gnorm: 1.37 [ 1:17:46<23:45:07] +[titan] 2025-10-04 23:52:05,671 - root - INFO - step: 2075 loss: 3.2363 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3927 global_avg_mtp_loss: 2.8436 +[titan] 2025-10-04 23:52:05,671 - root - INFO - lr: 4.9801e-05 gnorm: 1.32 [ 1:17:57<23:44:49] +[titan] 2025-10-04 23:52:16,539 - root - INFO - step: 2080 loss: 3.1819 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3837 global_avg_mtp_loss: 2.7983 +[titan] 2025-10-04 23:52:16,539 - root - INFO - lr: 4.9800e-05 gnorm: 1.25 [ 1:18:08<23:44:30] +[titan] 2025-10-04 23:52:27,458 - root - INFO - step: 2085 loss: 3.2817 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3971 global_avg_mtp_loss: 2.8845 +[titan] 2025-10-04 23:52:27,458 - root - INFO - lr: 4.9799e-05 gnorm: 1.31 [ 1:18:19<23:44:12] +[titan] 2025-10-04 23:52:38,351 - root - INFO - step: 2090 loss: 3.2776 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3974 global_avg_mtp_loss: 2.8802 +[titan] 2025-10-04 23:52:38,351 - root - INFO - lr: 4.9798e-05 gnorm: 1.27 [ 1:18:30<23:43:54] +[titan] 2025-10-04 23:52:49,245 - root - INFO - step: 2095 loss: 3.2401 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8479 +[titan] 2025-10-04 23:52:49,245 - root - INFO - lr: 4.9797e-05 gnorm: 1.35 [ 1:18:40<23:43:36] +[titan] 2025-10-04 23:52:57,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:53:00,142 - root - INFO - step: 2100 loss: 3.1666 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3827 global_avg_mtp_loss: 2.7839 +[titan] 2025-10-04 23:53:00,142 - root - INFO - lr: 4.9795e-05 gnorm: 1.31 [ 1:18:51<23:43:18] +[titan] 2025-10-04 23:53:11,021 - root - INFO - step: 2105 loss: 3.1171 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3762 global_avg_mtp_loss: 2.7409 +[titan] 2025-10-04 23:53:11,021 - root - INFO - lr: 4.9794e-05 gnorm: 1.45 [ 1:19:02<23:43:00] +[titan] 2025-10-04 23:53:21,893 - root - INFO - step: 2110 loss: 3.2816 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3984 global_avg_mtp_loss: 2.8833 +[titan] 2025-10-04 23:53:21,894 - root - INFO - lr: 4.9793e-05 gnorm: 1.35 [ 1:19:13<23:42:42] +[titan] 2025-10-04 23:53:32,852 - root - INFO - step: 2115 loss: 3.2607 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8663 +[titan] 2025-10-04 23:53:32,852 - root - INFO - lr: 4.9792e-05 gnorm: 1.27 [ 1:19:24<23:42:25] +[titan] 2025-10-04 23:53:43,730 - root - INFO - step: 2120 loss: 3.2629 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3946 global_avg_mtp_loss: 2.8683 +[titan] 2025-10-04 23:53:43,730 - root - INFO - lr: 4.9791e-05 gnorm: 1.25 [ 1:19:35<23:42:07] +[titan] 2025-10-04 23:53:54,620 - root - INFO - step: 2125 loss: 3.0920 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3725 global_avg_mtp_loss: 2.7195 +[titan] 2025-10-04 23:53:54,620 - root - INFO - lr: 4.9789e-05 gnorm: 1.37 [ 1:19:46<23:41:49] +[titan] 2025-10-04 23:54:05,507 - root - INFO - step: 2130 loss: 3.2038 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8165 +[titan] 2025-10-04 23:54:05,508 - root - INFO - lr: 4.9788e-05 gnorm: 1.28 [ 1:19:57<23:41:31] +[titan] 2025-10-04 23:54:16,404 - root - INFO - step: 2135 loss: 3.1616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.3810 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-04 23:54:16,404 - root - INFO - lr: 4.9787e-05 gnorm: 1.27 [ 1:20:08<23:41:13] +[titan] 2025-10-04 23:54:27,282 - root - INFO - step: 2140 loss: 3.1455 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3792 global_avg_mtp_loss: 2.7663 +[titan] 2025-10-04 23:54:27,282 - root - INFO - lr: 4.9786e-05 gnorm: 1.36 [ 1:20:18<23:40:55] +[titan] 2025-10-04 23:54:38,216 - root - INFO - step: 2145 loss: 3.1443 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7655 +[titan] 2025-10-04 23:54:38,216 - root - INFO - lr: 4.9785e-05 gnorm: 1.24 [ 1:20:29<23:40:38] +[titan] 2025-10-04 23:54:46,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:54:49,106 - root - INFO - step: 2150 loss: 3.2432 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3921 global_avg_mtp_loss: 2.8511 +[titan] 2025-10-04 23:54:49,106 - root - INFO - lr: 4.9783e-05 gnorm: 1.23 [ 1:20:40<23:40:20] +[titan] 2025-10-04 23:54:59,985 - root - INFO - step: 2155 loss: 3.1416 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7620 +[titan] 2025-10-04 23:54:59,985 - root - INFO - lr: 4.9782e-05 gnorm: 1.23 [ 1:20:51<23:40:02] +[titan] 2025-10-04 23:55:10,860 - root - INFO - step: 2160 loss: 3.1386 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3789 global_avg_mtp_loss: 2.7597 +[titan] 2025-10-04 23:55:10,860 - root - INFO - lr: 4.9781e-05 gnorm: 1.27 [ 1:21:02<23:39:45] +[titan] 2025-10-04 23:55:21,730 - root - INFO - step: 2165 loss: 3.2482 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8559 +[titan] 2025-10-04 23:55:21,730 - root - INFO - lr: 4.9780e-05 gnorm: 1.29 [ 1:21:13<23:39:27] +[titan] 2025-10-04 23:55:32,617 - root - INFO - step: 2170 loss: 3.2349 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3914 global_avg_mtp_loss: 2.8435 +[titan] 2025-10-04 23:55:32,617 - root - INFO - lr: 4.9778e-05 gnorm: 1.22 [ 1:21:24<23:39:09] +[titan] 2025-10-04 23:55:43,541 - root - INFO - step: 2175 loss: 3.2325 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.3901 global_avg_mtp_loss: 2.8424 +[titan] 2025-10-04 23:55:43,542 - root - INFO - lr: 4.9777e-05 gnorm: 1.32 [ 1:21:35<23:38:52] +[titan] 2025-10-04 23:55:54,482 - root - INFO - step: 2180 loss: 3.1551 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7749 +[titan] 2025-10-04 23:55:54,482 - root - INFO - lr: 4.9776e-05 gnorm: 1.29 [ 1:21:46<23:38:35] +[titan] 2025-10-04 23:56:05,357 - root - INFO - step: 2185 loss: 3.2187 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3882 global_avg_mtp_loss: 2.8305 +[titan] 2025-10-04 23:56:05,357 - root - INFO - lr: 4.9775e-05 gnorm: 1.37 [ 1:21:57<23:38:17] +[titan] 2025-10-04 23:56:16,252 - root - INFO - step: 2190 loss: 3.1722 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7900 +[titan] 2025-10-04 23:56:16,253 - root - INFO - lr: 4.9773e-05 gnorm: 1.44 [ 1:22:07<23:38:00] +[titan] 2025-10-04 23:56:27,132 - root - INFO - step: 2195 loss: 3.1685 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3823 global_avg_mtp_loss: 2.7862 +[titan] 2025-10-04 23:56:27,132 - root - INFO - lr: 4.9772e-05 gnorm: 1.33 [ 1:22:18<23:37:42] +[titan] 2025-10-04 23:56:35,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:56:38,038 - root - INFO - step: 2200 loss: 3.1985 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3856 global_avg_mtp_loss: 2.8129 +[titan] 2025-10-04 23:56:38,038 - root - INFO - lr: 4.9771e-05 gnorm: 1.31 [ 1:22:29<23:37:25] +[titan] 2025-10-04 23:56:48,912 - root - INFO - step: 2205 loss: 3.2059 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.8192 +[titan] 2025-10-04 23:56:48,913 - root - INFO - lr: 4.9769e-05 gnorm: 1.26 [ 1:22:40<23:37:07] +[titan] 2025-10-04 23:56:59,839 - root - INFO - step: 2210 loss: 3.1541 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7729 +[titan] 2025-10-04 23:56:59,840 - root - INFO - lr: 4.9768e-05 gnorm: 1.42 [ 1:22:51<23:36:51] +[titan] 2025-10-04 23:57:10,737 - root - INFO - step: 2215 loss: 3.2356 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3903 global_avg_mtp_loss: 2.8453 +[titan] 2025-10-04 23:57:10,737 - root - INFO - lr: 4.9767e-05 gnorm: 1.51 [ 1:23:02<23:36:33] +[titan] 2025-10-04 23:57:21,630 - root - INFO - step: 2220 loss: 3.1859 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3867 global_avg_mtp_loss: 2.7992 +[titan] 2025-10-04 23:57:21,630 - root - INFO - lr: 4.9766e-05 gnorm: 1.39 [ 1:23:13<23:36:16] +[titan] 2025-10-04 23:57:32,532 - root - INFO - step: 2225 loss: 3.1779 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3842 global_avg_mtp_loss: 2.7936 +[titan] 2025-10-04 23:57:32,532 - root - INFO - lr: 4.9764e-05 gnorm: 1.24 [ 1:23:24<23:35:59] +[titan] 2025-10-04 23:57:43,450 - root - INFO - step: 2230 loss: 3.2176 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.3895 global_avg_mtp_loss: 2.8282 +[titan] 2025-10-04 23:57:43,450 - root - INFO - lr: 4.9763e-05 gnorm: 1.28 [ 1:23:35<23:35:42] +[titan] 2025-10-04 23:57:54,366 - root - INFO - step: 2235 loss: 3.2212 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3879 global_avg_mtp_loss: 2.8333 +[titan] 2025-10-04 23:57:54,366 - root - INFO - lr: 4.9762e-05 gnorm: 1.35 [ 1:23:46<23:35:25] +[titan] 2025-10-04 23:58:05,251 - root - INFO - step: 2240 loss: 3.2781 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3957 global_avg_mtp_loss: 2.8824 +[titan] 2025-10-04 23:58:05,252 - root - INFO - lr: 4.9760e-05 gnorm: 1.39 [ 1:23:56<23:35:08] +[titan] 2025-10-04 23:58:16,173 - root - INFO - step: 2245 loss: 3.1710 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7885 +[titan] 2025-10-04 23:58:16,174 - root - INFO - lr: 4.9759e-05 gnorm: 1.30 [ 1:24:07<23:34:51] +[titan] 2025-10-04 23:58:24,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-04 23:58:27,069 - root - INFO - step: 2250 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7772 +[titan] 2025-10-04 23:58:27,069 - root - INFO - lr: 4.9758e-05 gnorm: 1.33 [ 1:24:18<23:34:34] +[titan] 2025-10-04 23:58:37,973 - root - INFO - step: 2255 loss: 3.2917 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3969 global_avg_mtp_loss: 2.8947 +[titan] 2025-10-04 23:58:37,973 - root - INFO - lr: 4.9757e-05 gnorm: 1.34 [ 1:24:29<23:34:18] +[titan] 2025-10-04 23:58:48,849 - root - INFO - step: 2260 loss: 3.1742 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3832 global_avg_mtp_loss: 2.7911 +[titan] 2025-10-04 23:58:48,849 - root - INFO - lr: 4.9755e-05 gnorm: 1.32 [ 1:24:40<23:34:00] +[titan] 2025-10-04 23:58:59,727 - root - INFO - step: 2265 loss: 3.1716 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3830 global_avg_mtp_loss: 2.7886 +[titan] 2025-10-04 23:58:59,727 - root - INFO - lr: 4.9754e-05 gnorm: 1.31 [ 1:24:51<23:33:43] +[titan] 2025-10-04 23:59:10,618 - root - INFO - step: 2270 loss: 3.2242 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3890 global_avg_mtp_loss: 2.8352 +[titan] 2025-10-04 23:59:10,618 - root - INFO - lr: 4.9753e-05 gnorm: 1.31 [ 1:25:02<23:33:26] +[titan] 2025-10-04 23:59:21,547 - root - INFO - step: 2275 loss: 3.2006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3858 global_avg_mtp_loss: 2.8148 +[titan] 2025-10-04 23:59:21,547 - root - INFO - lr: 4.9751e-05 gnorm: 1.30 [ 1:25:13<23:33:09] +[titan] 2025-10-04 23:59:32,439 - root - INFO - step: 2280 loss: 3.1251 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7491 +[titan] 2025-10-04 23:59:32,439 - root - INFO - lr: 4.9750e-05 gnorm: 1.25 [ 1:25:24<23:32:53] +[titan] 2025-10-04 23:59:43,315 - root - INFO - step: 2285 loss: 3.1971 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3843 global_avg_mtp_loss: 2.8128 +[titan] 2025-10-04 23:59:43,315 - root - INFO - lr: 4.9749e-05 gnorm: 1.24 [ 1:25:35<23:32:35] +[titan] 2025-10-04 23:59:54,211 - root - INFO - step: 2290 loss: 3.1138 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3754 global_avg_mtp_loss: 2.7384 +[titan] 2025-10-04 23:59:54,211 - root - INFO - lr: 4.9747e-05 gnorm: 1.30 [ 1:25:45<23:32:18] +[titan] 2025-10-05 00:00:05,066 - root - INFO - step: 2295 loss: 3.1381 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3802 global_avg_mtp_loss: 2.7579 +[titan] 2025-10-05 00:00:05,067 - root - INFO - lr: 4.9746e-05 gnorm: 1.34 [ 1:25:56<23:32:01] +[titan] 2025-10-05 00:00:13,756 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:00:15,940 - root - INFO - step: 2300 loss: 3.1684 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3825 global_avg_mtp_loss: 2.7859 +[titan] 2025-10-05 00:00:15,940 - root - INFO - lr: 4.9745e-05 gnorm: 1.25 [ 1:26:07<23:31:44] +[titan] 2025-10-05 00:00:26,871 - root - INFO - step: 2305 loss: 3.1673 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3819 global_avg_mtp_loss: 2.7853 +[titan] 2025-10-05 00:00:26,871 - root - INFO - lr: 4.9743e-05 gnorm: 1.22 [ 1:26:18<23:31:28] +[titan] 2025-10-05 00:00:37,762 - root - INFO - step: 2310 loss: 3.1531 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3812 global_avg_mtp_loss: 2.7719 +[titan] 2025-10-05 00:00:37,762 - root - INFO - lr: 4.9742e-05 gnorm: 1.30 [ 1:26:29<23:31:11] +[titan] 2025-10-05 00:00:48,669 - root - INFO - step: 2315 loss: 3.1583 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3795 global_avg_mtp_loss: 2.7788 +[titan] 2025-10-05 00:00:48,669 - root - INFO - lr: 4.9741e-05 gnorm: 1.22 [ 1:26:40<23:30:54] +[titan] 2025-10-05 00:00:59,522 - root - INFO - step: 2320 loss: 3.1995 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3851 global_avg_mtp_loss: 2.8144 +[titan] 2025-10-05 00:00:59,522 - root - INFO - lr: 4.9739e-05 gnorm: 1.29 [ 1:26:51<23:30:37] +[titan] 2025-10-05 00:01:10,409 - root - INFO - step: 2325 loss: 3.1550 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3800 global_avg_mtp_loss: 2.7750 +[titan] 2025-10-05 00:01:10,409 - root - INFO - lr: 4.9738e-05 gnorm: 1.29 [ 1:27:02<23:30:20] +[titan] 2025-10-05 00:01:21,286 - root - INFO - step: 2330 loss: 3.1042 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3734 global_avg_mtp_loss: 2.7308 +[titan] 2025-10-05 00:01:21,286 - root - INFO - lr: 4.9737e-05 gnorm: 1.25 [ 1:27:12<23:30:03] +[titan] 2025-10-05 00:01:32,170 - root - INFO - step: 2335 loss: 3.1428 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3775 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:01:32,170 - root - INFO - lr: 4.9735e-05 gnorm: 1.24 [ 1:27:23<23:29:46] +[titan] 2025-10-05 00:01:43,255 - root - INFO - step: 2340 loss: 3.2357 memory: 118.84GiB(85.28%) tps: 29,561 tflops: 410.12 mfu: 41.47% global_avg_ntp_loss: 0.3959 global_avg_mtp_loss: 2.8398 +[titan] 2025-10-05 00:01:43,255 - root - INFO - lr: 4.9734e-05 gnorm: 1.31 [ 1:27:34<23:29:33] +[titan] 2025-10-05 00:01:54,139 - root - INFO - step: 2345 loss: 3.2594 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3938 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:01:54,139 - root - INFO - lr: 4.9732e-05 gnorm: 1.30 [ 1:27:45<23:29:16] +[titan] 2025-10-05 00:02:02,828 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:02:05,010 - root - INFO - step: 2350 loss: 3.1385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3798 global_avg_mtp_loss: 2.7587 +[titan] 2025-10-05 00:02:05,010 - root - INFO - lr: 4.9731e-05 gnorm: 1.30 [ 1:27:56<23:28:59] +[titan] 2025-10-05 00:02:15,898 - root - INFO - step: 2355 loss: 3.1702 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3826 global_avg_mtp_loss: 2.7876 +[titan] 2025-10-05 00:02:15,898 - root - INFO - lr: 4.9730e-05 gnorm: 1.32 [ 1:28:07<23:28:42] +[titan] 2025-10-05 00:02:26,769 - root - INFO - step: 2360 loss: 3.1893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8043 +[titan] 2025-10-05 00:02:26,769 - root - INFO - lr: 4.9728e-05 gnorm: 1.43 [ 1:28:18<23:28:25] +[titan] 2025-10-05 00:02:37,640 - root - INFO - step: 2365 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7481 +[titan] 2025-10-05 00:02:37,640 - root - INFO - lr: 4.9727e-05 gnorm: 1.39 [ 1:28:29<23:28:09] +[titan] 2025-10-05 00:02:48,598 - root - INFO - step: 2370 loss: 3.1988 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3869 global_avg_mtp_loss: 2.8118 +[titan] 2025-10-05 00:02:48,598 - root - INFO - lr: 4.9726e-05 gnorm: 1.28 [ 1:28:40<23:27:53] +[titan] 2025-10-05 00:02:59,464 - root - INFO - step: 2375 loss: 3.1613 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3814 global_avg_mtp_loss: 2.7799 +[titan] 2025-10-05 00:02:59,464 - root - INFO - lr: 4.9724e-05 gnorm: 1.31 [ 1:28:51<23:27:36] +[titan] 2025-10-05 00:03:10,332 - root - INFO - step: 2380 loss: 3.2049 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3860 global_avg_mtp_loss: 2.8189 +[titan] 2025-10-05 00:03:10,332 - root - INFO - lr: 4.9723e-05 gnorm: 1.34 [ 1:29:02<23:27:19] +[titan] 2025-10-05 00:03:21,196 - root - INFO - step: 2385 loss: 3.1936 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3835 global_avg_mtp_loss: 2.8101 +[titan] 2025-10-05 00:03:21,196 - root - INFO - lr: 4.9721e-05 gnorm: 1.30 [ 1:29:12<23:27:02] +[titan] 2025-10-05 00:03:32,051 - root - INFO - step: 2390 loss: 3.2440 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3919 global_avg_mtp_loss: 2.8521 +[titan] 2025-10-05 00:03:32,051 - root - INFO - lr: 4.9720e-05 gnorm: 1.33 [ 1:29:23<23:26:45] +[titan] 2025-10-05 00:03:42,978 - root - INFO - step: 2395 loss: 3.1247 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3771 global_avg_mtp_loss: 2.7475 +[titan] 2025-10-05 00:03:42,978 - root - INFO - lr: 4.9719e-05 gnorm: 1.31 [ 1:29:34<23:26:30] +[titan] 2025-10-05 00:03:51,666 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:03:53,851 - root - INFO - step: 2400 loss: 3.2662 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3944 global_avg_mtp_loss: 2.8718 +[titan] 2025-10-05 00:03:53,851 - root - INFO - lr: 4.9717e-05 gnorm: 1.40 [ 1:29:45<23:26:13] +[titan] 2025-10-05 00:04:04,749 - root - INFO - step: 2405 loss: 3.2406 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3922 global_avg_mtp_loss: 2.8485 +[titan] 2025-10-05 00:04:04,749 - root - INFO - lr: 4.9716e-05 gnorm: 1.38 [ 1:29:56<23:25:57] +[titan] 2025-10-05 00:04:15,630 - root - INFO - step: 2410 loss: 3.1271 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3766 global_avg_mtp_loss: 2.7504 +[titan] 2025-10-05 00:04:15,630 - root - INFO - lr: 4.9714e-05 gnorm: 1.27 [ 1:30:07<23:25:40] +[titan] 2025-10-05 00:04:26,491 - root - INFO - step: 2415 loss: 3.1402 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3779 global_avg_mtp_loss: 2.7623 +[titan] 2025-10-05 00:04:26,491 - root - INFO - lr: 4.9713e-05 gnorm: 1.39 [ 1:30:18<23:25:23] +[titan] 2025-10-05 00:04:37,350 - root - INFO - step: 2420 loss: 3.1746 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.3822 global_avg_mtp_loss: 2.7924 +[titan] 2025-10-05 00:04:37,350 - root - INFO - lr: 4.9711e-05 gnorm: 1.45 [ 1:30:29<23:25:07] +[titan] 2025-10-05 00:04:48,268 - root - INFO - step: 2425 loss: 3.1765 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3831 global_avg_mtp_loss: 2.7934 +[titan] 2025-10-05 00:04:48,269 - root - INFO - lr: 4.9710e-05 gnorm: 1.42 [ 1:30:39<23:24:51] +[titan] 2025-10-05 00:04:59,129 - root - INFO - step: 2430 loss: 3.2456 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3918 global_avg_mtp_loss: 2.8539 +[titan] 2025-10-05 00:04:59,129 - root - INFO - lr: 4.9709e-05 gnorm: 1.29 [ 1:30:50<23:24:34] +[titan] 2025-10-05 00:05:10,040 - root - INFO - step: 2435 loss: 3.0885 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3728 global_avg_mtp_loss: 2.7157 +[titan] 2025-10-05 00:05:10,040 - root - INFO - lr: 4.9707e-05 gnorm: 1.30 [ 1:31:01<23:24:18] +[titan] 2025-10-05 00:05:20,901 - root - INFO - step: 2440 loss: 3.1883 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3839 global_avg_mtp_loss: 2.8044 +[titan] 2025-10-05 00:05:20,901 - root - INFO - lr: 4.9706e-05 gnorm: 1.29 [ 1:31:12<23:24:01] +[titan] 2025-10-05 00:05:31,767 - root - INFO - step: 2445 loss: 3.1123 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3742 global_avg_mtp_loss: 2.7381 +[titan] 2025-10-05 00:05:31,767 - root - INFO - lr: 4.9704e-05 gnorm: 1.28 [ 1:31:23<23:23:45] +[titan] 2025-10-05 00:05:40,460 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:05:42,650 - root - INFO - step: 2450 loss: 3.1786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3845 global_avg_mtp_loss: 2.7941 +[titan] 2025-10-05 00:05:42,650 - root - INFO - lr: 4.9703e-05 gnorm: 1.27 [ 1:31:34<23:23:29] +[titan] 2025-10-05 00:05:53,573 - root - INFO - step: 2455 loss: 3.1398 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3790 global_avg_mtp_loss: 2.7608 +[titan] 2025-10-05 00:05:53,573 - root - INFO - lr: 4.9701e-05 gnorm: 1.27 [ 1:31:45<23:23:13] +[titan] 2025-10-05 00:06:04,454 - root - INFO - step: 2460 loss: 3.2308 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3958 global_avg_mtp_loss: 2.8350 +[titan] 2025-10-05 00:06:04,455 - root - INFO - lr: 4.9700e-05 gnorm: 2.69 [ 1:31:56<23:22:57] +[titan] 2025-10-05 00:06:15,398 - root - INFO - step: 2465 loss: 3.1213 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3761 global_avg_mtp_loss: 2.7452 +[titan] 2025-10-05 00:06:15,398 - root - INFO - lr: 4.9698e-05 gnorm: 1.28 [ 1:32:07<23:22:41] +[titan] 2025-10-05 00:06:26,299 - root - INFO - step: 2470 loss: 3.1059 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3749 global_avg_mtp_loss: 2.7310 +[titan] 2025-10-05 00:06:26,299 - root - INFO - lr: 4.9697e-05 gnorm: 1.29 [ 1:32:17<23:22:25] +[titan] 2025-10-05 00:06:37,192 - root - INFO - step: 2475 loss: 3.1051 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3745 global_avg_mtp_loss: 2.7306 +[titan] 2025-10-05 00:06:37,192 - root - INFO - lr: 4.9696e-05 gnorm: 1.31 [ 1:32:28<23:22:09] +[titan] 2025-10-05 00:06:48,155 - root - INFO - step: 2480 loss: 3.1093 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.69 mfu: 41.93% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7359 +[titan] 2025-10-05 00:06:48,155 - root - INFO - lr: 4.9694e-05 gnorm: 1.32 [ 1:32:39<23:21:54] +[titan] 2025-10-05 00:06:59,038 - root - INFO - step: 2485 loss: 3.1283 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3767 global_avg_mtp_loss: 2.7516 +[titan] 2025-10-05 00:06:59,038 - root - INFO - lr: 4.9693e-05 gnorm: 1.34 [ 1:32:50<23:21:38] +[titan] 2025-10-05 00:07:09,901 - root - INFO - step: 2490 loss: 3.1376 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7607 +[titan] 2025-10-05 00:07:09,901 - root - INFO - lr: 4.9691e-05 gnorm: 1.34 [ 1:33:01<23:21:22] +[titan] 2025-10-05 00:07:20,803 - root - INFO - step: 2495 loss: 3.1543 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3807 global_avg_mtp_loss: 2.7736 +[titan] 2025-10-05 00:07:20,803 - root - INFO - lr: 4.9690e-05 gnorm: 1.36 [ 1:33:12<23:21:06] +[titan] 2025-10-05 00:07:29,527 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:07:31,706 - root - INFO - step: 2500 loss: 3.1575 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7787 +[titan] 2025-10-05 00:07:31,706 - root - INFO - lr: 4.9688e-05 gnorm: 1.31 [ 1:33:23<23:20:50] +[titan] 2025-10-05 00:07:42,568 - root - INFO - step: 2505 loss: 3.1325 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3760 global_avg_mtp_loss: 2.7566 +[titan] 2025-10-05 00:07:42,568 - root - INFO - lr: 4.9687e-05 gnorm: 1.22 [ 1:33:34<23:20:34] +[titan] 2025-10-05 00:07:53,496 - root - INFO - step: 2510 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.3718 global_avg_mtp_loss: 2.7142 +[titan] 2025-10-05 00:07:53,497 - root - INFO - lr: 4.9685e-05 gnorm: 1.31 [ 1:33:45<23:20:19] +[titan] 2025-10-05 00:08:04,378 - root - INFO - step: 2515 loss: 3.2003 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3862 global_avg_mtp_loss: 2.8141 +[titan] 2025-10-05 00:08:04,378 - root - INFO - lr: 4.9684e-05 gnorm: 1.43 [ 1:33:56<23:20:02] +[titan] 2025-10-05 00:08:15,255 - root - INFO - step: 2520 loss: 3.1816 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3847 global_avg_mtp_loss: 2.7969 +[titan] 2025-10-05 00:08:15,255 - root - INFO - lr: 4.9682e-05 gnorm: 1.38 [ 1:34:06<23:19:46] +[titan] 2025-10-05 00:08:26,136 - root - INFO - step: 2525 loss: 3.2579 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3923 global_avg_mtp_loss: 2.8656 +[titan] 2025-10-05 00:08:26,136 - root - INFO - lr: 4.9681e-05 gnorm: 1.37 [ 1:34:17<23:19:30] +[titan] 2025-10-05 00:08:37,049 - root - INFO - step: 2530 loss: 3.1078 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7346 +[titan] 2025-10-05 00:08:37,049 - root - INFO - lr: 4.9679e-05 gnorm: 1.28 [ 1:34:28<23:19:15] +[titan] 2025-10-05 00:08:48,046 - root - INFO - step: 2535 loss: 3.0953 memory: 118.84GiB(85.28%) tps: 29,797 tflops: 413.39 mfu: 41.80% global_avg_ntp_loss: 0.3719 global_avg_mtp_loss: 2.7233 +[titan] 2025-10-05 00:08:48,047 - root - INFO - lr: 4.9678e-05 gnorm: 1.25 [ 1:34:39<23:19:01] +[titan] 2025-10-05 00:08:58,919 - root - INFO - step: 2540 loss: 3.1620 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3803 global_avg_mtp_loss: 2.7817 +[titan] 2025-10-05 00:08:58,919 - root - INFO - lr: 4.9676e-05 gnorm: 1.26 [ 1:34:50<23:18:45] +[titan] 2025-10-05 00:09:09,786 - root - INFO - step: 2545 loss: 3.1667 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3815 global_avg_mtp_loss: 2.7852 +[titan] 2025-10-05 00:09:09,786 - root - INFO - lr: 4.9675e-05 gnorm: 1.40 [ 1:35:01<23:18:28] +[titan] 2025-10-05 00:09:18,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:09:20,677 - root - INFO - step: 2550 loss: 3.0790 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3704 global_avg_mtp_loss: 2.7086 +[titan] 2025-10-05 00:09:20,677 - root - INFO - lr: 4.9673e-05 gnorm: 1.34 [ 1:35:12<23:18:13] +[titan] 2025-10-05 00:09:31,556 - root - INFO - step: 2555 loss: 3.0389 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3662 global_avg_mtp_loss: 2.6727 +[titan] 2025-10-05 00:09:31,557 - root - INFO - lr: 4.9672e-05 gnorm: 1.31 [ 1:35:23<23:17:57] +[titan] 2025-10-05 00:09:42,516 - root - INFO - step: 2560 loss: 3.1285 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.3755 global_avg_mtp_loss: 2.7530 +[titan] 2025-10-05 00:09:42,516 - root - INFO - lr: 4.9670e-05 gnorm: 1.23 [ 1:35:34<23:17:42] +[titan] 2025-10-05 00:09:42,681 - root - INFO - Dumping profiler traces at step 2560 +[titan] 2025-10-05 00:09:42,718 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:09:53,687 - root - INFO - step: 2565 loss: 3.0635 memory: 118.84GiB(85.28%) tps: 29,334 tflops: 406.97 mfu: 41.15% global_avg_ntp_loss: 0.3659 global_avg_mtp_loss: 2.6976 +[titan] 2025-10-05 00:09:53,687 - root - INFO - lr: 4.9669e-05 gnorm: 1.33 [ 1:35:45<23:17:30] +[titan] 2025-10-05 00:10:04,566 - root - INFO - step: 2570 loss: 3.0420 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6772 +[titan] 2025-10-05 00:10:04,566 - root - INFO - lr: 4.9667e-05 gnorm: 1.29 [ 1:35:56<23:17:14] +[titan] 2025-10-05 00:10:15,470 - root - INFO - step: 2575 loss: 3.2085 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3872 global_avg_mtp_loss: 2.8213 +[titan] 2025-10-05 00:10:15,471 - root - INFO - lr: 4.9666e-05 gnorm: 1.30 [ 1:36:07<23:16:59] +[titan] 2025-10-05 00:10:26,384 - root - INFO - step: 2580 loss: 3.2105 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3855 global_avg_mtp_loss: 2.8250 +[titan] 2025-10-05 00:10:26,384 - root - INFO - lr: 4.9664e-05 gnorm: 1.29 [ 1:36:18<23:16:44] +[titan] 2025-10-05 00:10:37,260 - root - INFO - step: 2585 loss: 3.0856 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3698 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:10:37,261 - root - INFO - lr: 4.9663e-05 gnorm: 1.30 [ 1:36:28<23:16:28] +[titan] 2025-10-05 00:10:48,212 - root - INFO - step: 2590 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.3650 global_avg_mtp_loss: 2.6717 +[titan] 2025-10-05 00:10:48,212 - root - INFO - lr: 4.9661e-05 gnorm: 1.26 [ 1:36:39<23:16:13] +[titan] 2025-10-05 00:10:59,142 - root - INFO - step: 2595 loss: 3.1492 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.3799 global_avg_mtp_loss: 2.7693 +[titan] 2025-10-05 00:10:59,142 - root - INFO - lr: 4.9659e-05 gnorm: 1.24 [ 1:36:50<23:15:58] +[titan] 2025-10-05 00:11:07,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:11:10,032 - root - INFO - step: 2600 loss: 3.0911 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3727 global_avg_mtp_loss: 2.7185 +[titan] 2025-10-05 00:11:10,032 - root - INFO - lr: 4.9658e-05 gnorm: 1.22 [ 1:37:01<23:15:42] +[titan] 2025-10-05 00:11:20,915 - root - INFO - step: 2605 loss: 3.1578 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3808 global_avg_mtp_loss: 2.7770 +[titan] 2025-10-05 00:11:20,915 - root - INFO - lr: 4.9656e-05 gnorm: 1.26 [ 1:37:12<23:15:27] +[titan] 2025-10-05 00:11:31,815 - root - INFO - step: 2610 loss: 3.1088 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3741 global_avg_mtp_loss: 2.7348 +[titan] 2025-10-05 00:11:31,815 - root - INFO - lr: 4.9655e-05 gnorm: 1.25 [ 1:37:23<23:15:11] +[titan] 2025-10-05 00:11:42,699 - root - INFO - step: 2615 loss: 3.1165 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7413 +[titan] 2025-10-05 00:11:42,699 - root - INFO - lr: 4.9653e-05 gnorm: 1.30 [ 1:37:34<23:14:56] +[titan] 2025-10-05 00:11:53,594 - root - INFO - step: 2620 loss: 3.1397 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7617 +[titan] 2025-10-05 00:11:53,594 - root - INFO - lr: 4.9652e-05 gnorm: 1.27 [ 1:37:45<23:14:40] +[titan] 2025-10-05 00:12:04,505 - root - INFO - step: 2625 loss: 3.1215 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3752 global_avg_mtp_loss: 2.7463 +[titan] 2025-10-05 00:12:04,505 - root - INFO - lr: 4.9650e-05 gnorm: 1.33 [ 1:37:56<23:14:25] +[titan] 2025-10-05 00:12:15,389 - root - INFO - step: 2630 loss: 3.1525 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3788 global_avg_mtp_loss: 2.7737 +[titan] 2025-10-05 00:12:15,390 - root - INFO - lr: 4.9649e-05 gnorm: 1.27 [ 1:38:07<23:14:09] +[titan] 2025-10-05 00:12:26,270 - root - INFO - step: 2635 loss: 3.1176 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3751 global_avg_mtp_loss: 2.7424 +[titan] 2025-10-05 00:12:26,271 - root - INFO - lr: 4.9647e-05 gnorm: 1.30 [ 1:38:17<23:13:54] +[titan] 2025-10-05 00:12:37,153 - root - INFO - step: 2640 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6977 +[titan] 2025-10-05 00:12:37,153 - root - INFO - lr: 4.9645e-05 gnorm: 1.28 [ 1:38:28<23:13:38] +[titan] 2025-10-05 00:12:48,055 - root - INFO - step: 2645 loss: 3.1119 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3732 global_avg_mtp_loss: 2.7387 +[titan] 2025-10-05 00:12:48,055 - root - INFO - lr: 4.9644e-05 gnorm: 1.30 [ 1:38:39<23:13:23] +[titan] 2025-10-05 00:12:56,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:12:58,983 - root - INFO - step: 2650 loss: 3.0548 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3676 global_avg_mtp_loss: 2.6872 +[titan] 2025-10-05 00:12:58,983 - root - INFO - lr: 4.9642e-05 gnorm: 1.23 [ 1:38:50<23:13:08] +[titan] 2025-10-05 00:13:09,879 - root - INFO - step: 2655 loss: 3.0496 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3671 global_avg_mtp_loss: 2.6826 +[titan] 2025-10-05 00:13:09,879 - root - INFO - lr: 4.9641e-05 gnorm: 1.28 [ 1:39:01<23:12:53] +[titan] 2025-10-05 00:13:20,805 - root - INFO - step: 2660 loss: 3.1186 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.3759 global_avg_mtp_loss: 2.7427 +[titan] 2025-10-05 00:13:20,805 - root - INFO - lr: 4.9639e-05 gnorm: 1.25 [ 1:39:12<23:12:38] +[titan] 2025-10-05 00:13:31,679 - root - INFO - step: 2665 loss: 3.0573 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3678 global_avg_mtp_loss: 2.6895 +[titan] 2025-10-05 00:13:31,680 - root - INFO - lr: 4.9637e-05 gnorm: 1.25 [ 1:39:23<23:12:22] +[titan] 2025-10-05 00:13:42,558 - root - INFO - step: 2670 loss: 3.0570 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3663 global_avg_mtp_loss: 2.6907 +[titan] 2025-10-05 00:13:42,558 - root - INFO - lr: 4.9636e-05 gnorm: 1.26 [ 1:39:34<23:12:07] +[titan] 2025-10-05 00:13:53,472 - root - INFO - step: 2675 loss: 3.1878 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3850 global_avg_mtp_loss: 2.8028 +[titan] 2025-10-05 00:13:53,472 - root - INFO - lr: 4.9634e-05 gnorm: 1.31 [ 1:39:45<23:11:52] +[titan] 2025-10-05 00:14:04,364 - root - INFO - step: 2680 loss: 3.1135 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.3738 global_avg_mtp_loss: 2.7397 +[titan] 2025-10-05 00:14:04,365 - root - INFO - lr: 4.9633e-05 gnorm: 1.22 [ 1:39:56<23:11:36] +[titan] 2025-10-05 00:14:15,279 - root - INFO - step: 2685 loss: 3.0010 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3606 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:14:15,279 - root - INFO - lr: 4.9631e-05 gnorm: 1.32 [ 1:40:06<23:11:21] +[titan] 2025-10-05 00:14:26,223 - root - INFO - step: 2690 loss: 3.1084 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.3737 global_avg_mtp_loss: 2.7347 +[titan] 2025-10-05 00:14:26,223 - root - INFO - lr: 4.9629e-05 gnorm: 1.28 [ 1:40:17<23:11:07] +[titan] 2025-10-05 00:14:37,114 - root - INFO - step: 2695 loss: 3.1301 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3758 global_avg_mtp_loss: 2.7543 +[titan] 2025-10-05 00:14:37,114 - root - INFO - lr: 4.9628e-05 gnorm: 1.31 [ 1:40:28<23:10:52] +[titan] 2025-10-05 00:14:45,831 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:14:48,028 - root - INFO - step: 2700 loss: 3.0874 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.3690 global_avg_mtp_loss: 2.7184 +[titan] 2025-10-05 00:14:48,029 - root - INFO - lr: 4.9626e-05 gnorm: 1.38 [ 1:40:39<23:10:37] +[titan] 2025-10-05 00:14:58,931 - root - INFO - step: 2705 loss: 3.1260 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3750 global_avg_mtp_loss: 2.7509 +[titan] 2025-10-05 00:14:58,931 - root - INFO - lr: 4.9625e-05 gnorm: 1.28 [ 1:40:50<23:10:22] +[titan] 2025-10-05 00:15:09,812 - root - INFO - step: 2710 loss: 3.0477 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3643 global_avg_mtp_loss: 2.6834 +[titan] 2025-10-05 00:15:09,812 - root - INFO - lr: 4.9623e-05 gnorm: 1.29 [ 1:41:01<23:10:06] +[titan] 2025-10-05 00:15:20,681 - root - INFO - step: 2715 loss: 2.9784 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3575 global_avg_mtp_loss: 2.6209 +[titan] 2025-10-05 00:15:20,681 - root - INFO - lr: 4.9621e-05 gnorm: 1.39 [ 1:41:12<23:09:51] +[titan] 2025-10-05 00:15:31,544 - root - INFO - step: 2720 loss: 3.0989 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3712 global_avg_mtp_loss: 2.7276 +[titan] 2025-10-05 00:15:31,544 - root - INFO - lr: 4.9620e-05 gnorm: 1.28 [ 1:41:23<23:09:35] +[titan] 2025-10-05 00:15:42,481 - root - INFO - step: 2725 loss: 3.0279 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3634 global_avg_mtp_loss: 2.6645 +[titan] 2025-10-05 00:15:42,482 - root - INFO - lr: 4.9618e-05 gnorm: 1.38 [ 1:41:34<23:09:21] +[titan] 2025-10-05 00:15:53,371 - root - INFO - step: 2730 loss: 3.0629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3682 global_avg_mtp_loss: 2.6946 +[titan] 2025-10-05 00:15:53,371 - root - INFO - lr: 4.9616e-05 gnorm: 1.27 [ 1:41:45<23:09:05] +[titan] 2025-10-05 00:16:04,250 - root - INFO - step: 2735 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6257 +[titan] 2025-10-05 00:16:04,250 - root - INFO - lr: 4.9615e-05 gnorm: 1.32 [ 1:41:55<23:08:50] +[titan] 2025-10-05 00:16:15,152 - root - INFO - step: 2740 loss: 3.0246 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6608 +[titan] 2025-10-05 00:16:15,152 - root - INFO - lr: 4.9613e-05 gnorm: 1.29 [ 1:42:06<23:08:35] +[titan] 2025-10-05 00:16:26,041 - root - INFO - step: 2745 loss: 3.1571 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3796 global_avg_mtp_loss: 2.7775 +[titan] 2025-10-05 00:16:26,041 - root - INFO - lr: 4.9611e-05 gnorm: 1.28 [ 1:42:17<23:08:20] +[titan] 2025-10-05 00:16:34,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:16:36,918 - root - INFO - step: 2750 loss: 3.0736 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3686 global_avg_mtp_loss: 2.7050 +[titan] 2025-10-05 00:16:36,919 - root - INFO - lr: 4.9610e-05 gnorm: 1.24 [ 1:42:28<23:08:05] +[titan] 2025-10-05 00:16:47,865 - root - INFO - step: 2755 loss: 2.9899 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6309 +[titan] 2025-10-05 00:16:47,865 - root - INFO - lr: 4.9608e-05 gnorm: 1.22 [ 1:42:39<23:07:50] +[titan] 2025-10-05 00:16:58,851 - root - INFO - step: 2760 loss: 3.0390 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.3657 global_avg_mtp_loss: 2.6733 +[titan] 2025-10-05 00:16:58,851 - root - INFO - lr: 4.9606e-05 gnorm: 1.33 [ 1:42:50<23:07:37] +[titan] 2025-10-05 00:17:09,727 - root - INFO - step: 2765 loss: 3.1133 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3740 global_avg_mtp_loss: 2.7394 +[titan] 2025-10-05 00:17:09,727 - root - INFO - lr: 4.9605e-05 gnorm: 1.30 [ 1:43:01<23:07:21] +[titan] 2025-10-05 00:17:20,607 - root - INFO - step: 2770 loss: 3.0638 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3695 global_avg_mtp_loss: 2.6943 +[titan] 2025-10-05 00:17:20,607 - root - INFO - lr: 4.9603e-05 gnorm: 1.35 [ 1:43:12<23:07:06] +[titan] 2025-10-05 00:17:31,517 - root - INFO - step: 2775 loss: 3.0938 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3716 global_avg_mtp_loss: 2.7222 +[titan] 2025-10-05 00:17:31,517 - root - INFO - lr: 4.9601e-05 gnorm: 1.26 [ 1:43:23<23:06:51] +[titan] 2025-10-05 00:17:42,399 - root - INFO - step: 2780 loss: 3.0126 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6519 +[titan] 2025-10-05 00:17:42,399 - root - INFO - lr: 4.9600e-05 gnorm: 1.30 [ 1:43:34<23:06:36] +[titan] 2025-10-05 00:17:53,331 - root - INFO - step: 2785 loss: 3.0873 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.3714 global_avg_mtp_loss: 2.7159 +[titan] 2025-10-05 00:17:53,331 - root - INFO - lr: 4.9598e-05 gnorm: 1.28 [ 1:43:44<23:06:22] +[titan] 2025-10-05 00:18:04,263 - root - INFO - step: 2790 loss: 3.0185 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.85 mfu: 42.05% global_avg_ntp_loss: 0.3627 global_avg_mtp_loss: 2.6559 +[titan] 2025-10-05 00:18:04,263 - root - INFO - lr: 4.9596e-05 gnorm: 1.33 [ 1:43:55<23:06:07] +[titan] 2025-10-05 00:18:15,157 - root - INFO - step: 2795 loss: 3.0652 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3677 global_avg_mtp_loss: 2.6975 +[titan] 2025-10-05 00:18:15,157 - root - INFO - lr: 4.9595e-05 gnorm: 1.25 [ 1:44:06<23:05:52] +[titan] 2025-10-05 00:18:23,851 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:18:26,054 - root - INFO - step: 2800 loss: 3.0213 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6588 +[titan] 2025-10-05 00:18:26,054 - root - INFO - lr: 4.9593e-05 gnorm: 1.28 [ 1:44:17<23:05:38] +[titan] 2025-10-05 00:18:36,954 - root - INFO - step: 2805 loss: 3.1425 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3783 global_avg_mtp_loss: 2.7642 +[titan] 2025-10-05 00:18:36,954 - root - INFO - lr: 4.9591e-05 gnorm: 1.28 [ 1:44:28<23:05:23] +[titan] 2025-10-05 00:18:47,864 - root - INFO - step: 2810 loss: 3.0392 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.3638 global_avg_mtp_loss: 2.6754 +[titan] 2025-10-05 00:18:47,864 - root - INFO - lr: 4.9590e-05 gnorm: 1.27 [ 1:44:39<23:05:08] +[titan] 2025-10-05 00:18:58,796 - root - INFO - step: 2815 loss: 3.0728 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3684 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:18:58,796 - root - INFO - lr: 4.9588e-05 gnorm: 1.28 [ 1:44:50<23:04:54] +[titan] 2025-10-05 00:19:09,768 - root - INFO - step: 2820 loss: 3.0759 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.3697 global_avg_mtp_loss: 2.7062 +[titan] 2025-10-05 00:19:09,768 - root - INFO - lr: 4.9586e-05 gnorm: 1.28 [ 1:45:01<23:04:40] +[titan] 2025-10-05 00:19:20,659 - root - INFO - step: 2825 loss: 3.0518 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3667 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:19:20,659 - root - INFO - lr: 4.9585e-05 gnorm: 1.38 [ 1:45:12<23:04:25] +[titan] 2025-10-05 00:19:31,538 - root - INFO - step: 2830 loss: 3.1035 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3733 global_avg_mtp_loss: 2.7302 +[titan] 2025-10-05 00:19:31,538 - root - INFO - lr: 4.9583e-05 gnorm: 1.34 [ 1:45:23<23:04:10] +[titan] 2025-10-05 00:19:42,419 - root - INFO - step: 2835 loss: 3.0685 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3683 global_avg_mtp_loss: 2.7002 +[titan] 2025-10-05 00:19:42,419 - root - INFO - lr: 4.9581e-05 gnorm: 1.37 [ 1:45:34<23:03:55] +[titan] 2025-10-05 00:19:53,306 - root - INFO - step: 2840 loss: 3.0223 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3652 global_avg_mtp_loss: 2.6571 +[titan] 2025-10-05 00:19:53,306 - root - INFO - lr: 4.9579e-05 gnorm: 1.32 [ 1:45:44<23:03:40] +[titan] 2025-10-05 00:20:04,219 - root - INFO - step: 2845 loss: 3.0274 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3633 global_avg_mtp_loss: 2.6641 +[titan] 2025-10-05 00:20:04,220 - root - INFO - lr: 4.9578e-05 gnorm: 1.28 [ 1:45:55<23:03:26] +[titan] 2025-10-05 00:20:12,960 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:20:15,145 - root - INFO - step: 2850 loss: 3.0430 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3661 global_avg_mtp_loss: 2.6770 +[titan] 2025-10-05 00:20:15,145 - root - INFO - lr: 4.9576e-05 gnorm: 1.26 [ 1:46:06<23:03:11] +[titan] 2025-10-05 00:20:26,027 - root - INFO - step: 2855 loss: 3.0893 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7209 +[titan] 2025-10-05 00:20:26,027 - root - INFO - lr: 4.9574e-05 gnorm: 1.27 [ 1:46:17<23:02:56] +[titan] 2025-10-05 00:20:36,904 - root - INFO - step: 2860 loss: 3.0960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3724 global_avg_mtp_loss: 2.7236 +[titan] 2025-10-05 00:20:36,904 - root - INFO - lr: 4.9573e-05 gnorm: 1.28 [ 1:46:28<23:02:41] +[titan] 2025-10-05 00:20:47,806 - root - INFO - step: 2865 loss: 3.1434 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3780 global_avg_mtp_loss: 2.7654 +[titan] 2025-10-05 00:20:47,806 - root - INFO - lr: 4.9571e-05 gnorm: 1.30 [ 1:46:39<23:02:27] +[titan] 2025-10-05 00:20:58,761 - root - INFO - step: 2870 loss: 2.9969 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:20:58,761 - root - INFO - lr: 4.9569e-05 gnorm: 1.30 [ 1:46:50<23:02:13] +[titan] 2025-10-05 00:21:09,643 - root - INFO - step: 2875 loss: 3.0232 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3635 global_avg_mtp_loss: 2.6597 +[titan] 2025-10-05 00:21:09,643 - root - INFO - lr: 4.9567e-05 gnorm: 1.30 [ 1:47:01<23:01:58] +[titan] 2025-10-05 00:21:20,548 - root - INFO - step: 2880 loss: 2.9737 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3570 global_avg_mtp_loss: 2.6167 +[titan] 2025-10-05 00:21:20,548 - root - INFO - lr: 4.9566e-05 gnorm: 1.28 [ 1:47:12<23:01:43] +[titan] 2025-10-05 00:21:31,529 - root - INFO - step: 2885 loss: 3.0875 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.3720 global_avg_mtp_loss: 2.7155 +[titan] 2025-10-05 00:21:31,530 - root - INFO - lr: 4.9564e-05 gnorm: 1.25 [ 1:47:23<23:01:30] +[titan] 2025-10-05 00:21:42,407 - root - INFO - step: 2890 loss: 3.0347 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6705 +[titan] 2025-10-05 00:21:42,407 - root - INFO - lr: 4.9562e-05 gnorm: 1.38 [ 1:47:34<23:01:15] +[titan] 2025-10-05 00:21:53,280 - root - INFO - step: 2895 loss: 3.0145 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3610 global_avg_mtp_loss: 2.6535 +[titan] 2025-10-05 00:21:53,280 - root - INFO - lr: 4.9560e-05 gnorm: 1.22 [ 1:47:44<23:01:00] +[titan] 2025-10-05 00:22:02,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:22:04,199 - root - INFO - step: 2900 loss: 3.1605 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3801 global_avg_mtp_loss: 2.7805 +[titan] 2025-10-05 00:22:04,199 - root - INFO - lr: 4.9559e-05 gnorm: 1.35 [ 1:47:55<23:00:46] +[titan] 2025-10-05 00:22:15,084 - root - INFO - step: 2905 loss: 3.0860 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3703 global_avg_mtp_loss: 2.7158 +[titan] 2025-10-05 00:22:15,084 - root - INFO - lr: 4.9557e-05 gnorm: 1.29 [ 1:48:06<23:00:31] +[titan] 2025-10-05 00:22:25,962 - root - INFO - step: 2910 loss: 3.0022 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6423 +[titan] 2025-10-05 00:22:25,962 - root - INFO - lr: 4.9555e-05 gnorm: 1.31 [ 1:48:17<23:00:16] +[titan] 2025-10-05 00:22:36,871 - root - INFO - step: 2915 loss: 3.1580 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.3797 global_avg_mtp_loss: 2.7783 +[titan] 2025-10-05 00:22:36,871 - root - INFO - lr: 4.9553e-05 gnorm: 1.42 [ 1:48:28<23:00:02] +[titan] 2025-10-05 00:22:47,815 - root - INFO - step: 2920 loss: 3.0326 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.3653 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:22:47,815 - root - INFO - lr: 4.9552e-05 gnorm: 1.30 [ 1:48:39<22:59:48] +[titan] 2025-10-05 00:22:58,703 - root - INFO - step: 2925 loss: 3.0724 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3681 global_avg_mtp_loss: 2.7043 +[titan] 2025-10-05 00:22:58,703 - root - INFO - lr: 4.9550e-05 gnorm: 1.34 [ 1:48:50<22:59:33] +[titan] 2025-10-05 00:23:09,632 - root - INFO - step: 2930 loss: 3.0482 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6818 +[titan] 2025-10-05 00:23:09,632 - root - INFO - lr: 4.9548e-05 gnorm: 1.23 [ 1:49:01<22:59:19] +[titan] 2025-10-05 00:23:20,517 - root - INFO - step: 2935 loss: 2.9200 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5708 +[titan] 2025-10-05 00:23:20,517 - root - INFO - lr: 4.9546e-05 gnorm: 1.28 [ 1:49:12<22:59:04] +[titan] 2025-10-05 00:23:31,391 - root - INFO - step: 2940 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6615 +[titan] 2025-10-05 00:23:31,391 - root - INFO - lr: 4.9544e-05 gnorm: 1.25 [ 1:49:23<22:58:49] +[titan] 2025-10-05 00:23:42,322 - root - INFO - step: 2945 loss: 3.1473 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3769 global_avg_mtp_loss: 2.7704 +[titan] 2025-10-05 00:23:42,322 - root - INFO - lr: 4.9543e-05 gnorm: 1.35 [ 1:49:33<22:58:35] +[titan] 2025-10-05 00:23:51,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:23:53,182 - root - INFO - step: 2950 loss: 3.0250 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6620 +[titan] 2025-10-05 00:23:53,183 - root - INFO - lr: 4.9541e-05 gnorm: 1.26 [ 1:49:44<22:58:20] +[titan] 2025-10-05 00:24:04,100 - root - INFO - step: 2955 loss: 2.9887 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3579 global_avg_mtp_loss: 2.6308 +[titan] 2025-10-05 00:24:04,100 - root - INFO - lr: 4.9539e-05 gnorm: 1.32 [ 1:49:55<22:58:06] +[titan] 2025-10-05 00:24:14,957 - root - INFO - step: 2960 loss: 2.9752 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6188 +[titan] 2025-10-05 00:24:14,957 - root - INFO - lr: 4.9537e-05 gnorm: 1.29 [ 1:50:06<22:57:51] +[titan] 2025-10-05 00:24:25,824 - root - INFO - step: 2965 loss: 3.0670 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3670 global_avg_mtp_loss: 2.7000 +[titan] 2025-10-05 00:24:25,824 - root - INFO - lr: 4.9535e-05 gnorm: 1.36 [ 1:50:17<22:57:36] +[titan] 2025-10-05 00:24:36,677 - root - INFO - step: 2970 loss: 3.0105 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3605 global_avg_mtp_loss: 2.6500 +[titan] 2025-10-05 00:24:36,677 - root - INFO - lr: 4.9534e-05 gnorm: 1.28 [ 1:50:28<22:57:21] +[titan] 2025-10-05 00:24:47,550 - root - INFO - step: 2975 loss: 3.0798 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3685 global_avg_mtp_loss: 2.7113 +[titan] 2025-10-05 00:24:47,550 - root - INFO - lr: 4.9532e-05 gnorm: 1.26 [ 1:50:39<22:57:07] +[titan] 2025-10-05 00:24:58,508 - root - INFO - step: 2980 loss: 3.0933 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7212 +[titan] 2025-10-05 00:24:58,508 - root - INFO - lr: 4.9530e-05 gnorm: 1.34 [ 1:50:50<22:56:53] +[titan] 2025-10-05 00:25:09,436 - root - INFO - step: 2985 loss: 2.9918 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6318 +[titan] 2025-10-05 00:25:09,436 - root - INFO - lr: 4.9528e-05 gnorm: 1.29 [ 1:51:01<22:56:39] +[titan] 2025-10-05 00:25:20,336 - root - INFO - step: 2990 loss: 3.0864 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3699 global_avg_mtp_loss: 2.7165 +[titan] 2025-10-05 00:25:20,336 - root - INFO - lr: 4.9526e-05 gnorm: 1.30 [ 1:51:11<22:56:25] +[titan] 2025-10-05 00:25:31,210 - root - INFO - step: 2995 loss: 3.0152 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3613 global_avg_mtp_loss: 2.6538 +[titan] 2025-10-05 00:25:31,210 - root - INFO - lr: 4.9525e-05 gnorm: 1.34 [ 1:51:22<22:56:10] +[titan] 2025-10-05 00:25:39,889 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:25:42,077 - root - INFO - step: 3000 loss: 2.9639 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6087 +[titan] 2025-10-05 00:25:42,077 - root - INFO - lr: 4.9523e-05 gnorm: 1.20 [ 1:51:33<22:55:55] +[titan] 2025-10-05 00:25:52,956 - root - INFO - step: 3005 loss: 2.9850 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6274 +[titan] 2025-10-05 00:25:52,956 - root - INFO - lr: 4.9521e-05 gnorm: 1.25 [ 1:51:44<22:55:41] +[titan] 2025-10-05 00:26:03,943 - root - INFO - step: 3010 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 29,826 tflops: 413.79 mfu: 41.84% global_avg_ntp_loss: 0.3599 global_avg_mtp_loss: 2.6404 +[titan] 2025-10-05 00:26:03,943 - root - INFO - lr: 4.9519e-05 gnorm: 1.25 [ 1:51:55<22:55:27] +[titan] 2025-10-05 00:26:14,799 - root - INFO - step: 3015 loss: 2.9622 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6081 +[titan] 2025-10-05 00:26:14,799 - root - INFO - lr: 4.9517e-05 gnorm: 1.20 [ 1:52:06<22:55:13] +[titan] 2025-10-05 00:26:25,658 - root - INFO - step: 3020 loss: 3.1014 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.3721 global_avg_mtp_loss: 2.7293 +[titan] 2025-10-05 00:26:25,658 - root - INFO - lr: 4.9515e-05 gnorm: 1.29 [ 1:52:17<22:54:58] +[titan] 2025-10-05 00:26:36,501 - root - INFO - step: 3025 loss: 3.0035 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.3588 global_avg_mtp_loss: 2.6447 +[titan] 2025-10-05 00:26:36,501 - root - INFO - lr: 4.9514e-05 gnorm: 1.22 [ 1:52:28<22:54:43] +[titan] 2025-10-05 00:26:47,370 - root - INFO - step: 3030 loss: 2.9868 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3589 global_avg_mtp_loss: 2.6279 +[titan] 2025-10-05 00:26:47,370 - root - INFO - lr: 4.9512e-05 gnorm: 1.28 [ 1:52:38<22:54:28] +[titan] 2025-10-05 00:26:58,255 - root - INFO - step: 3035 loss: 3.0690 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.7021 +[titan] 2025-10-05 00:26:58,255 - root - INFO - lr: 4.9510e-05 gnorm: 1.29 [ 1:52:49<22:54:14] +[titan] 2025-10-05 00:27:09,176 - root - INFO - step: 3040 loss: 2.9415 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5893 +[titan] 2025-10-05 00:27:09,176 - root - INFO - lr: 4.9508e-05 gnorm: 1.23 [ 1:53:00<22:54:00] +[titan] 2025-10-05 00:27:20,081 - root - INFO - step: 3045 loss: 2.9565 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.6029 +[titan] 2025-10-05 00:27:20,081 - root - INFO - lr: 4.9506e-05 gnorm: 1.31 [ 1:53:11<22:53:46] +[titan] 2025-10-05 00:27:28,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:27:30,926 - root - INFO - step: 3050 loss: 3.0382 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3669 global_avg_mtp_loss: 2.6713 +[titan] 2025-10-05 00:27:30,926 - root - INFO - lr: 4.9504e-05 gnorm: 1.32 [ 1:53:22<22:53:31] +[titan] 2025-10-05 00:27:41,788 - root - INFO - step: 3055 loss: 2.9038 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5556 +[titan] 2025-10-05 00:27:41,788 - root - INFO - lr: 4.9502e-05 gnorm: 1.27 [ 1:53:33<22:53:16] +[titan] 2025-10-05 00:27:52,674 - root - INFO - step: 3060 loss: 3.0259 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3639 global_avg_mtp_loss: 2.6619 +[titan] 2025-10-05 00:27:52,674 - root - INFO - lr: 4.9501e-05 gnorm: 1.32 [ 1:53:44<22:53:02] +[titan] 2025-10-05 00:28:03,564 - root - INFO - step: 3065 loss: 3.0368 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3637 global_avg_mtp_loss: 2.6732 +[titan] 2025-10-05 00:28:03,564 - root - INFO - lr: 4.9499e-05 gnorm: 1.39 [ 1:53:55<22:52:47] +[titan] 2025-10-05 00:28:14,505 - root - INFO - step: 3070 loss: 2.9931 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.3595 global_avg_mtp_loss: 2.6336 +[titan] 2025-10-05 00:28:14,505 - root - INFO - lr: 4.9497e-05 gnorm: 1.46 [ 1:54:06<22:52:34] +[titan] 2025-10-05 00:28:19,016 - root - INFO - Dumping profiler traces at step 3072 +[titan] 2025-10-05 00:28:19,053 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:28:25,635 - root - INFO - step: 3075 loss: 2.9714 memory: 118.84GiB(85.28%) tps: 29,442 tflops: 408.46 mfu: 41.30% global_avg_ntp_loss: 0.3583 global_avg_mtp_loss: 2.6131 +[titan] 2025-10-05 00:28:25,635 - root - INFO - lr: 4.9495e-05 gnorm: 1.38 [ 1:54:17<22:52:22] +[titan] 2025-10-05 00:28:36,484 - root - INFO - step: 3080 loss: 3.0383 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3644 global_avg_mtp_loss: 2.6739 +[titan] 2025-10-05 00:28:36,484 - root - INFO - lr: 4.9493e-05 gnorm: 1.27 [ 1:54:28<22:52:08] +[titan] 2025-10-05 00:28:47,350 - root - INFO - step: 3085 loss: 3.0016 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:28:47,350 - root - INFO - lr: 4.9491e-05 gnorm: 1.28 [ 1:54:38<22:51:53] +[titan] 2025-10-05 00:28:58,198 - root - INFO - step: 3090 loss: 2.8733 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 00:28:58,198 - root - INFO - lr: 4.9489e-05 gnorm: 1.28 [ 1:54:49<22:51:38] +[titan] 2025-10-05 00:29:09,096 - root - INFO - step: 3095 loss: 3.0415 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3648 global_avg_mtp_loss: 2.6767 +[titan] 2025-10-05 00:29:09,096 - root - INFO - lr: 4.9487e-05 gnorm: 1.33 [ 1:55:00<22:51:24] +[titan] 2025-10-05 00:29:17,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:29:19,960 - root - INFO - step: 3100 loss: 2.9482 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3536 global_avg_mtp_loss: 2.5947 +[titan] 2025-10-05 00:29:19,960 - root - INFO - lr: 4.9485e-05 gnorm: 1.33 [ 1:55:11<22:51:10] +[titan] 2025-10-05 00:29:30,867 - root - INFO - step: 3105 loss: 2.9859 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3576 global_avg_mtp_loss: 2.6283 +[titan] 2025-10-05 00:29:30,868 - root - INFO - lr: 4.9484e-05 gnorm: 1.27 [ 1:55:22<22:50:56] +[titan] 2025-10-05 00:29:41,783 - root - INFO - step: 3110 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6306 +[titan] 2025-10-05 00:29:41,784 - root - INFO - lr: 4.9482e-05 gnorm: 1.30 [ 1:55:33<22:50:42] +[titan] 2025-10-05 00:29:52,657 - root - INFO - step: 3115 loss: 2.9941 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3590 global_avg_mtp_loss: 2.6352 +[titan] 2025-10-05 00:29:52,657 - root - INFO - lr: 4.9480e-05 gnorm: 1.24 [ 1:55:44<22:50:27] +[titan] 2025-10-05 00:30:03,529 - root - INFO - step: 3120 loss: 3.0041 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3611 global_avg_mtp_loss: 2.6431 +[titan] 2025-10-05 00:30:03,529 - root - INFO - lr: 4.9478e-05 gnorm: 1.22 [ 1:55:55<22:50:13] +[titan] 2025-10-05 00:30:14,438 - root - INFO - step: 3125 loss: 2.9712 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6162 +[titan] 2025-10-05 00:30:14,438 - root - INFO - lr: 4.9476e-05 gnorm: 1.28 [ 1:56:06<22:49:59] +[titan] 2025-10-05 00:30:25,289 - root - INFO - step: 3130 loss: 2.9425 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:30:25,289 - root - INFO - lr: 4.9474e-05 gnorm: 1.28 [ 1:56:16<22:49:44] +[titan] 2025-10-05 00:30:36,160 - root - INFO - step: 3135 loss: 3.0775 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3696 global_avg_mtp_loss: 2.7079 +[titan] 2025-10-05 00:30:36,160 - root - INFO - lr: 4.9472e-05 gnorm: 1.26 [ 1:56:27<22:49:30] +[titan] 2025-10-05 00:30:47,054 - root - INFO - step: 3140 loss: 3.0122 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3630 global_avg_mtp_loss: 2.6492 +[titan] 2025-10-05 00:30:47,054 - root - INFO - lr: 4.9470e-05 gnorm: 1.22 [ 1:56:38<22:49:16] +[titan] 2025-10-05 00:30:57,914 - root - INFO - step: 3145 loss: 3.0169 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6560 +[titan] 2025-10-05 00:30:57,914 - root - INFO - lr: 4.9468e-05 gnorm: 1.27 [ 1:56:49<22:49:01] +[titan] 2025-10-05 00:31:06,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:31:08,816 - root - INFO - step: 3150 loss: 2.9327 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3545 global_avg_mtp_loss: 2.5782 +[titan] 2025-10-05 00:31:08,816 - root - INFO - lr: 4.9466e-05 gnorm: 1.26 [ 1:57:00<22:48:47] +[titan] 2025-10-05 00:31:19,715 - root - INFO - step: 3155 loss: 3.0434 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3631 global_avg_mtp_loss: 2.6803 +[titan] 2025-10-05 00:31:19,715 - root - INFO - lr: 4.9464e-05 gnorm: 1.33 [ 1:57:11<22:48:33] +[titan] 2025-10-05 00:31:30,598 - root - INFO - step: 3160 loss: 2.9152 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5676 +[titan] 2025-10-05 00:31:30,598 - root - INFO - lr: 4.9462e-05 gnorm: 1.28 [ 1:57:22<22:48:19] +[titan] 2025-10-05 00:31:41,468 - root - INFO - step: 3165 loss: 3.0228 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3626 global_avg_mtp_loss: 2.6602 +[titan] 2025-10-05 00:31:41,469 - root - INFO - lr: 4.9460e-05 gnorm: 1.32 [ 1:57:33<22:48:05] +[titan] 2025-10-05 00:31:52,401 - root - INFO - step: 3170 loss: 2.9954 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6368 +[titan] 2025-10-05 00:31:52,401 - root - INFO - lr: 4.9459e-05 gnorm: 1.39 [ 1:57:44<22:47:51] +[titan] 2025-10-05 00:32:03,274 - root - INFO - step: 3175 loss: 2.9805 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.6231 +[titan] 2025-10-05 00:32:03,274 - root - INFO - lr: 4.9457e-05 gnorm: 1.26 [ 1:57:54<22:47:37] +[titan] 2025-10-05 00:32:14,178 - root - INFO - step: 3180 loss: 3.0141 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.3598 global_avg_mtp_loss: 2.6543 +[titan] 2025-10-05 00:32:14,178 - root - INFO - lr: 4.9455e-05 gnorm: 1.31 [ 1:58:05<22:47:23] +[titan] 2025-10-05 00:32:25,055 - root - INFO - step: 3185 loss: 3.0493 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3641 global_avg_mtp_loss: 2.6851 +[titan] 2025-10-05 00:32:25,055 - root - INFO - lr: 4.9453e-05 gnorm: 1.37 [ 1:58:16<22:47:09] +[titan] 2025-10-05 00:32:35,936 - root - INFO - step: 3190 loss: 2.9654 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6093 +[titan] 2025-10-05 00:32:35,936 - root - INFO - lr: 4.9451e-05 gnorm: 1.29 [ 1:58:27<22:46:55] +[titan] 2025-10-05 00:32:46,815 - root - INFO - step: 3195 loss: 2.9889 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3566 global_avg_mtp_loss: 2.6323 +[titan] 2025-10-05 00:32:46,816 - root - INFO - lr: 4.9449e-05 gnorm: 1.28 [ 1:58:38<22:46:41] +[titan] 2025-10-05 00:32:55,521 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:32:57,705 - root - INFO - step: 3200 loss: 2.9502 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.5953 +[titan] 2025-10-05 00:32:57,705 - root - INFO - lr: 4.9447e-05 gnorm: 1.30 [ 1:58:49<22:46:27] +[titan] 2025-10-05 00:33:08,681 - root - INFO - step: 3205 loss: 2.9709 memory: 118.84GiB(85.28%) tps: 29,857 tflops: 414.22 mfu: 41.88% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6155 +[titan] 2025-10-05 00:33:08,681 - root - INFO - lr: 4.9445e-05 gnorm: 1.23 [ 1:59:00<22:46:14] +[titan] 2025-10-05 00:33:19,557 - root - INFO - step: 3210 loss: 2.9185 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3492 global_avg_mtp_loss: 2.5693 +[titan] 2025-10-05 00:33:19,558 - root - INFO - lr: 4.9443e-05 gnorm: 1.28 [ 1:59:11<22:45:59] +[titan] 2025-10-05 00:33:30,432 - root - INFO - step: 3215 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3514 global_avg_mtp_loss: 2.5956 +[titan] 2025-10-05 00:33:30,432 - root - INFO - lr: 4.9441e-05 gnorm: 1.39 [ 1:59:22<22:45:45] +[titan] 2025-10-05 00:33:41,300 - root - INFO - step: 3220 loss: 3.0300 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3628 global_avg_mtp_loss: 2.6673 +[titan] 2025-10-05 00:33:41,300 - root - INFO - lr: 4.9439e-05 gnorm: 1.32 [ 1:59:32<22:45:31] +[titan] 2025-10-05 00:33:52,166 - root - INFO - step: 3225 loss: 3.0123 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3608 global_avg_mtp_loss: 2.6515 +[titan] 2025-10-05 00:33:52,166 - root - INFO - lr: 4.9437e-05 gnorm: 1.29 [ 1:59:43<22:45:17] +[titan] 2025-10-05 00:34:03,015 - root - INFO - step: 3230 loss: 3.0282 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3622 global_avg_mtp_loss: 2.6660 +[titan] 2025-10-05 00:34:03,015 - root - INFO - lr: 4.9435e-05 gnorm: 1.29 [ 1:59:54<22:45:02] +[titan] 2025-10-05 00:34:13,972 - root - INFO - step: 3235 loss: 3.0440 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3646 global_avg_mtp_loss: 2.6794 +[titan] 2025-10-05 00:34:13,973 - root - INFO - lr: 4.9433e-05 gnorm: 1.27 [ 2:00:05<22:44:49] +[titan] 2025-10-05 00:34:24,817 - root - INFO - step: 3240 loss: 2.9616 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:34:24,817 - root - INFO - lr: 4.9431e-05 gnorm: 1.21 [ 2:00:16<22:44:35] +[titan] 2025-10-05 00:34:35,664 - root - INFO - step: 3245 loss: 3.0402 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.3642 global_avg_mtp_loss: 2.6760 +[titan] 2025-10-05 00:34:35,664 - root - INFO - lr: 4.9429e-05 gnorm: 1.23 [ 2:00:27<22:44:20] +[titan] 2025-10-05 00:34:44,353 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:34:46,540 - root - INFO - step: 3250 loss: 3.0298 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3617 global_avg_mtp_loss: 2.6681 +[titan] 2025-10-05 00:34:46,540 - root - INFO - lr: 4.9427e-05 gnorm: 1.26 [ 2:00:38<22:44:06] +[titan] 2025-10-05 00:34:57,421 - root - INFO - step: 3255 loss: 2.9633 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6078 +[titan] 2025-10-05 00:34:57,421 - root - INFO - lr: 4.9425e-05 gnorm: 1.31 [ 2:00:49<22:43:52] +[titan] 2025-10-05 00:35:08,296 - root - INFO - step: 3260 loss: 2.9911 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3571 global_avg_mtp_loss: 2.6340 +[titan] 2025-10-05 00:35:08,296 - root - INFO - lr: 4.9423e-05 gnorm: 1.27 [ 2:00:59<22:43:38] +[titan] 2025-10-05 00:35:19,241 - root - INFO - step: 3265 loss: 2.9592 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.6039 +[titan] 2025-10-05 00:35:19,242 - root - INFO - lr: 4.9421e-05 gnorm: 1.30 [ 2:01:10<22:43:25] +[titan] 2025-10-05 00:35:30,115 - root - INFO - step: 3270 loss: 2.9685 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3560 global_avg_mtp_loss: 2.6125 +[titan] 2025-10-05 00:35:30,115 - root - INFO - lr: 4.9419e-05 gnorm: 1.33 [ 2:01:21<22:43:11] +[titan] 2025-10-05 00:35:40,981 - root - INFO - step: 3275 loss: 3.0649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3675 global_avg_mtp_loss: 2.6974 +[titan] 2025-10-05 00:35:40,981 - root - INFO - lr: 4.9417e-05 gnorm: 1.36 [ 2:01:32<22:42:57] +[titan] 2025-10-05 00:35:51,879 - root - INFO - step: 3280 loss: 2.9994 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3593 global_avg_mtp_loss: 2.6401 +[titan] 2025-10-05 00:35:51,879 - root - INFO - lr: 4.9415e-05 gnorm: 1.31 [ 2:01:43<22:42:43] +[titan] 2025-10-05 00:36:02,779 - root - INFO - step: 3285 loss: 2.9516 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3524 global_avg_mtp_loss: 2.5992 +[titan] 2025-10-05 00:36:02,779 - root - INFO - lr: 4.9413e-05 gnorm: 1.24 [ 2:01:54<22:42:29] +[titan] 2025-10-05 00:36:13,718 - root - INFO - step: 3290 loss: 3.0135 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3601 global_avg_mtp_loss: 2.6533 +[titan] 2025-10-05 00:36:13,718 - root - INFO - lr: 4.9411e-05 gnorm: 1.32 [ 2:02:05<22:42:16] +[titan] 2025-10-05 00:36:24,612 - root - INFO - step: 3295 loss: 2.9374 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3515 global_avg_mtp_loss: 2.5859 +[titan] 2025-10-05 00:36:24,613 - root - INFO - lr: 4.9409e-05 gnorm: 1.30 [ 2:02:16<22:42:02] +[titan] 2025-10-05 00:36:33,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:36:35,569 - root - INFO - step: 3300 loss: 3.0216 memory: 118.84GiB(85.28%) tps: 29,907 tflops: 414.91 mfu: 41.95% global_avg_ntp_loss: 0.3625 global_avg_mtp_loss: 2.6591 +[titan] 2025-10-05 00:36:35,570 - root - INFO - lr: 4.9407e-05 gnorm: 1.29 [ 2:02:27<22:41:49] +[titan] 2025-10-05 00:36:46,479 - root - INFO - step: 3305 loss: 2.9748 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3555 global_avg_mtp_loss: 2.6193 +[titan] 2025-10-05 00:36:46,480 - root - INFO - lr: 4.9405e-05 gnorm: 1.29 [ 2:02:38<22:41:35] +[titan] 2025-10-05 00:36:57,349 - root - INFO - step: 3310 loss: 2.9636 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3540 global_avg_mtp_loss: 2.6095 +[titan] 2025-10-05 00:36:57,350 - root - INFO - lr: 4.9403e-05 gnorm: 1.18 [ 2:02:48<22:41:21] +[titan] 2025-10-05 00:37:08,233 - root - INFO - step: 3315 loss: 2.9774 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3549 global_avg_mtp_loss: 2.6224 +[titan] 2025-10-05 00:37:08,233 - root - INFO - lr: 4.9401e-05 gnorm: 1.24 [ 2:02:59<22:41:07] +[titan] 2025-10-05 00:37:19,133 - root - INFO - step: 3320 loss: 2.9377 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:37:19,133 - root - INFO - lr: 4.9399e-05 gnorm: 1.24 [ 2:03:10<22:40:54] +[titan] 2025-10-05 00:37:29,998 - root - INFO - step: 3325 loss: 2.8934 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:37:29,999 - root - INFO - lr: 4.9397e-05 gnorm: 1.31 [ 2:03:21<22:40:40] +[titan] 2025-10-05 00:37:40,921 - root - INFO - step: 3330 loss: 3.0003 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3596 global_avg_mtp_loss: 2.6407 +[titan] 2025-10-05 00:37:40,921 - root - INFO - lr: 4.9395e-05 gnorm: 1.29 [ 2:03:32<22:40:26] +[titan] 2025-10-05 00:37:51,784 - root - INFO - step: 3335 loss: 2.9450 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.5927 +[titan] 2025-10-05 00:37:51,784 - root - INFO - lr: 4.9392e-05 gnorm: 1.29 [ 2:03:43<22:40:12] +[titan] 2025-10-05 00:38:02,640 - root - INFO - step: 3340 loss: 2.9243 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3497 global_avg_mtp_loss: 2.5746 +[titan] 2025-10-05 00:38:02,640 - root - INFO - lr: 4.9390e-05 gnorm: 1.24 [ 2:03:54<22:39:58] +[titan] 2025-10-05 00:38:13,559 - root - INFO - step: 3345 loss: 2.9258 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5767 +[titan] 2025-10-05 00:38:13,559 - root - INFO - lr: 4.9388e-05 gnorm: 1.32 [ 2:04:05<22:39:45] +[titan] 2025-10-05 00:38:22,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:38:24,447 - root - INFO - step: 3350 loss: 2.9893 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3573 global_avg_mtp_loss: 2.6320 +[titan] 2025-10-05 00:38:24,447 - root - INFO - lr: 4.9386e-05 gnorm: 1.23 [ 2:04:16<22:39:31] +[titan] 2025-10-05 00:38:35,319 - root - INFO - step: 3355 loss: 2.8550 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3405 global_avg_mtp_loss: 2.5146 +[titan] 2025-10-05 00:38:35,319 - root - INFO - lr: 4.9384e-05 gnorm: 1.25 [ 2:04:26<22:39:17] +[titan] 2025-10-05 00:38:46,199 - root - INFO - step: 3360 loss: 2.8891 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:38:46,199 - root - INFO - lr: 4.9382e-05 gnorm: 1.31 [ 2:04:37<22:39:03] +[titan] 2025-10-05 00:38:57,161 - root - INFO - step: 3365 loss: 2.9521 memory: 118.84GiB(85.28%) tps: 29,893 tflops: 414.71 mfu: 41.93% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.5991 +[titan] 2025-10-05 00:38:57,161 - root - INFO - lr: 4.9380e-05 gnorm: 1.25 [ 2:04:48<22:38:50] +[titan] 2025-10-05 00:39:08,046 - root - INFO - step: 3370 loss: 2.9471 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3552 global_avg_mtp_loss: 2.5919 +[titan] 2025-10-05 00:39:08,046 - root - INFO - lr: 4.9378e-05 gnorm: 1.32 [ 2:04:59<22:38:36] +[titan] 2025-10-05 00:39:18,937 - root - INFO - step: 3375 loss: 2.9184 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5694 +[titan] 2025-10-05 00:39:18,937 - root - INFO - lr: 4.9376e-05 gnorm: 1.25 [ 2:05:10<22:38:23] +[titan] 2025-10-05 00:39:29,827 - root - INFO - step: 3380 loss: 2.9621 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.3541 global_avg_mtp_loss: 2.6080 +[titan] 2025-10-05 00:39:29,827 - root - INFO - lr: 4.9374e-05 gnorm: 1.24 [ 2:05:21<22:38:09] +[titan] 2025-10-05 00:39:40,719 - root - INFO - step: 3385 loss: 2.9011 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3465 global_avg_mtp_loss: 2.5547 +[titan] 2025-10-05 00:39:40,719 - root - INFO - lr: 4.9372e-05 gnorm: 1.22 [ 2:05:32<22:37:55] +[titan] 2025-10-05 00:39:51,594 - root - INFO - step: 3390 loss: 2.9910 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3586 global_avg_mtp_loss: 2.6324 +[titan] 2025-10-05 00:39:51,595 - root - INFO - lr: 4.9370e-05 gnorm: 1.24 [ 2:05:43<22:37:41] +[titan] 2025-10-05 00:40:02,576 - root - INFO - step: 3395 loss: 2.9436 memory: 118.84GiB(85.28%) tps: 29,839 tflops: 413.97 mfu: 41.86% global_avg_ntp_loss: 0.3529 global_avg_mtp_loss: 2.5907 +[titan] 2025-10-05 00:40:02,577 - root - INFO - lr: 4.9367e-05 gnorm: 1.26 [ 2:05:54<22:37:29] +[titan] 2025-10-05 00:40:11,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:40:13,489 - root - INFO - step: 3400 loss: 2.9838 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3569 global_avg_mtp_loss: 2.6269 +[titan] 2025-10-05 00:40:13,489 - root - INFO - lr: 4.9365e-05 gnorm: 1.27 [ 2:06:05<22:37:15] +[titan] 2025-10-05 00:40:24,371 - root - INFO - step: 3405 loss: 3.0515 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3666 global_avg_mtp_loss: 2.6849 +[titan] 2025-10-05 00:40:24,371 - root - INFO - lr: 4.9363e-05 gnorm: 1.23 [ 2:06:15<22:37:02] +[titan] 2025-10-05 00:40:35,244 - root - INFO - step: 3410 loss: 2.9631 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3556 global_avg_mtp_loss: 2.6074 +[titan] 2025-10-05 00:40:35,244 - root - INFO - lr: 4.9361e-05 gnorm: 1.28 [ 2:06:26<22:36:48] +[titan] 2025-10-05 00:40:46,133 - root - INFO - step: 3415 loss: 2.9578 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3547 global_avg_mtp_loss: 2.6032 +[titan] 2025-10-05 00:40:46,133 - root - INFO - lr: 4.9359e-05 gnorm: 1.23 [ 2:06:37<22:36:34] +[titan] 2025-10-05 00:40:57,009 - root - INFO - step: 3420 loss: 2.9329 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3498 global_avg_mtp_loss: 2.5832 +[titan] 2025-10-05 00:40:57,009 - root - INFO - lr: 4.9357e-05 gnorm: 1.19 [ 2:06:48<22:36:20] +[titan] 2025-10-05 00:41:07,937 - root - INFO - step: 3425 loss: 2.9564 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.6041 +[titan] 2025-10-05 00:41:07,937 - root - INFO - lr: 4.9355e-05 gnorm: 1.27 [ 2:06:59<22:36:07] +[titan] 2025-10-05 00:41:18,921 - root - INFO - step: 3430 loss: 2.9729 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.89 mfu: 41.85% global_avg_ntp_loss: 0.3564 global_avg_mtp_loss: 2.6166 +[titan] 2025-10-05 00:41:18,921 - root - INFO - lr: 4.9353e-05 gnorm: 1.26 [ 2:07:10<22:35:55] +[titan] 2025-10-05 00:41:29,788 - root - INFO - step: 3435 loss: 2.9570 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3535 global_avg_mtp_loss: 2.6035 +[titan] 2025-10-05 00:41:29,789 - root - INFO - lr: 4.9351e-05 gnorm: 1.30 [ 2:07:21<22:35:41] +[titan] 2025-10-05 00:41:40,636 - root - INFO - step: 3440 loss: 2.9121 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.3473 global_avg_mtp_loss: 2.5649 +[titan] 2025-10-05 00:41:40,637 - root - INFO - lr: 4.9348e-05 gnorm: 1.25 [ 2:07:32<22:35:27] +[titan] 2025-10-05 00:41:51,497 - root - INFO - step: 3445 loss: 2.9720 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3548 global_avg_mtp_loss: 2.6172 +[titan] 2025-10-05 00:41:51,498 - root - INFO - lr: 4.9346e-05 gnorm: 1.24 [ 2:07:43<22:35:13] +[titan] 2025-10-05 00:42:00,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:42:02,373 - root - INFO - step: 3450 loss: 3.0025 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3609 global_avg_mtp_loss: 2.6416 +[titan] 2025-10-05 00:42:02,373 - root - INFO - lr: 4.9344e-05 gnorm: 1.40 [ 2:07:53<22:34:59] +[titan] 2025-10-05 00:42:13,236 - root - INFO - step: 3455 loss: 2.8984 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5533 +[titan] 2025-10-05 00:42:13,236 - root - INFO - lr: 4.9342e-05 gnorm: 1.33 [ 2:08:04<22:34:45] +[titan] 2025-10-05 00:42:24,195 - root - INFO - step: 3460 loss: 2.8961 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.3468 global_avg_mtp_loss: 2.5493 +[titan] 2025-10-05 00:42:24,195 - root - INFO - lr: 4.9340e-05 gnorm: 1.30 [ 2:08:15<22:34:32] +[titan] 2025-10-05 00:42:35,085 - root - INFO - step: 3465 loss: 3.0085 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3591 global_avg_mtp_loss: 2.6494 +[titan] 2025-10-05 00:42:35,085 - root - INFO - lr: 4.9338e-05 gnorm: 1.28 [ 2:08:26<22:34:19] +[titan] 2025-10-05 00:42:45,952 - root - INFO - step: 3470 loss: 2.9361 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5855 +[titan] 2025-10-05 00:42:45,952 - root - INFO - lr: 4.9336e-05 gnorm: 1.26 [ 2:08:37<22:34:05] +[titan] 2025-10-05 00:42:56,840 - root - INFO - step: 3475 loss: 2.9223 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3496 global_avg_mtp_loss: 2.5727 +[titan] 2025-10-05 00:42:56,841 - root - INFO - lr: 4.9333e-05 gnorm: 1.25 [ 2:08:48<22:33:51] +[titan] 2025-10-05 00:43:07,696 - root - INFO - step: 3480 loss: 2.9007 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5539 +[titan] 2025-10-05 00:43:07,696 - root - INFO - lr: 4.9331e-05 gnorm: 1.30 [ 2:08:59<22:33:37] +[titan] 2025-10-05 00:43:18,563 - root - INFO - step: 3485 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5412 +[titan] 2025-10-05 00:43:18,564 - root - INFO - lr: 4.9329e-05 gnorm: 1.24 [ 2:09:10<22:33:24] +[titan] 2025-10-05 00:43:29,498 - root - INFO - step: 3490 loss: 2.9877 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3565 global_avg_mtp_loss: 2.6312 +[titan] 2025-10-05 00:43:29,498 - root - INFO - lr: 4.9327e-05 gnorm: 1.34 [ 2:09:21<22:33:11] +[titan] 2025-10-05 00:43:40,371 - root - INFO - step: 3495 loss: 2.8500 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5087 +[titan] 2025-10-05 00:43:40,371 - root - INFO - lr: 4.9325e-05 gnorm: 1.24 [ 2:09:31<22:32:57] +[titan] 2025-10-05 00:43:49,059 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:43:51,242 - root - INFO - step: 3500 loss: 2.9053 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3477 global_avg_mtp_loss: 2.5576 +[titan] 2025-10-05 00:43:51,242 - root - INFO - lr: 4.9323e-05 gnorm: 1.26 [ 2:09:42<22:32:43] +[titan] 2025-10-05 00:44:02,120 - root - INFO - step: 3505 loss: 2.9596 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3522 global_avg_mtp_loss: 2.6075 +[titan] 2025-10-05 00:44:02,120 - root - INFO - lr: 4.9320e-05 gnorm: 1.27 [ 2:09:53<22:32:30] +[titan] 2025-10-05 00:44:13,041 - root - INFO - step: 3510 loss: 2.9620 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.3557 global_avg_mtp_loss: 2.6063 +[titan] 2025-10-05 00:44:13,041 - root - INFO - lr: 4.9318e-05 gnorm: 1.36 [ 2:10:04<22:32:16] +[titan] 2025-10-05 00:44:23,983 - root - INFO - step: 3515 loss: 2.9163 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.3482 global_avg_mtp_loss: 2.5681 +[titan] 2025-10-05 00:44:23,983 - root - INFO - lr: 4.9316e-05 gnorm: 1.35 [ 2:10:15<22:32:03] +[titan] 2025-10-05 00:44:34,890 - root - INFO - step: 3520 loss: 2.9840 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3664 global_avg_mtp_loss: 2.6176 +[titan] 2025-10-05 00:44:34,890 - root - INFO - lr: 4.9314e-05 gnorm: 1.30 [ 2:10:26<22:31:50] +[titan] 2025-10-05 00:44:45,807 - root - INFO - step: 3525 loss: 2.8766 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3420 global_avg_mtp_loss: 2.5345 +[titan] 2025-10-05 00:44:45,807 - root - INFO - lr: 4.9312e-05 gnorm: 1.33 [ 2:10:37<22:31:37] +[titan] 2025-10-05 00:44:56,695 - root - INFO - step: 3530 loss: 2.8643 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5214 +[titan] 2025-10-05 00:44:56,696 - root - INFO - lr: 4.9309e-05 gnorm: 1.31 [ 2:10:48<22:31:24] +[titan] 2025-10-05 00:45:07,556 - root - INFO - step: 3535 loss: 2.9317 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5804 +[titan] 2025-10-05 00:45:07,556 - root - INFO - lr: 4.9307e-05 gnorm: 1.24 [ 2:10:59<22:31:10] +[titan] 2025-10-05 00:45:18,462 - root - INFO - step: 3540 loss: 2.9149 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3487 global_avg_mtp_loss: 2.5661 +[titan] 2025-10-05 00:45:18,463 - root - INFO - lr: 4.9305e-05 gnorm: 1.24 [ 2:11:10<22:30:56] +[titan] 2025-10-05 00:45:29,403 - root - INFO - step: 3545 loss: 2.9166 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5665 +[titan] 2025-10-05 00:45:29,403 - root - INFO - lr: 4.9303e-05 gnorm: 1.31 [ 2:11:20<22:30:44] +[titan] 2025-10-05 00:45:38,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:45:40,270 - root - INFO - step: 3550 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3460 global_avg_mtp_loss: 2.5477 +[titan] 2025-10-05 00:45:40,270 - root - INFO - lr: 4.9301e-05 gnorm: 1.20 [ 2:11:31<22:30:30] +[titan] 2025-10-05 00:45:51,156 - root - INFO - step: 3555 loss: 2.8547 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3414 global_avg_mtp_loss: 2.5133 +[titan] 2025-10-05 00:45:51,156 - root - INFO - lr: 4.9298e-05 gnorm: 1.22 [ 2:11:42<22:30:16] +[titan] 2025-10-05 00:46:02,028 - root - INFO - step: 3560 loss: 2.9708 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3553 global_avg_mtp_loss: 2.6154 +[titan] 2025-10-05 00:46:02,029 - root - INFO - lr: 4.9296e-05 gnorm: 1.25 [ 2:11:53<22:30:03] +[titan] 2025-10-05 00:46:12,872 - root - INFO - step: 3565 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3463 global_avg_mtp_loss: 2.5474 +[titan] 2025-10-05 00:46:12,873 - root - INFO - lr: 4.9294e-05 gnorm: 1.22 [ 2:12:04<22:29:49] +[titan] 2025-10-05 00:46:23,793 - root - INFO - step: 3570 loss: 2.9591 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3538 global_avg_mtp_loss: 2.6053 +[titan] 2025-10-05 00:46:23,794 - root - INFO - lr: 4.9292e-05 gnorm: 1.26 [ 2:12:15<22:29:36] +[titan] 2025-10-05 00:46:34,664 - root - INFO - step: 3575 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3519 global_avg_mtp_loss: 2.5886 +[titan] 2025-10-05 00:46:34,664 - root - INFO - lr: 4.9290e-05 gnorm: 1.30 [ 2:12:26<22:29:22] +[titan] 2025-10-05 00:46:45,547 - root - INFO - step: 3580 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3464 global_avg_mtp_loss: 2.5473 +[titan] 2025-10-05 00:46:45,548 - root - INFO - lr: 4.9287e-05 gnorm: 1.23 [ 2:12:37<22:29:09] +[titan] 2025-10-05 00:46:54,487 - root - INFO - Dumping profiler traces at step 3584 +[titan] 2025-10-05 00:46:54,522 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 00:46:56,755 - root - INFO - step: 3585 loss: 2.9232 memory: 118.84GiB(85.28%) tps: 29,238 tflops: 405.64 mfu: 41.02% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5729 +[titan] 2025-10-05 00:46:56,755 - root - INFO - lr: 4.9285e-05 gnorm: 1.28 [ 2:12:48<22:28:59] +[titan] 2025-10-05 00:47:07,619 - root - INFO - step: 3590 loss: 2.9273 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3501 global_avg_mtp_loss: 2.5772 +[titan] 2025-10-05 00:47:07,619 - root - INFO - lr: 4.9283e-05 gnorm: 1.25 [ 2:12:59<22:28:45] +[titan] 2025-10-05 00:47:18,508 - root - INFO - step: 3595 loss: 2.9212 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3491 global_avg_mtp_loss: 2.5720 +[titan] 2025-10-05 00:47:18,509 - root - INFO - lr: 4.9281e-05 gnorm: 1.34 [ 2:13:10<22:28:32] +[titan] 2025-10-05 00:47:27,262 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:47:29,446 - root - INFO - step: 3600 loss: 2.8603 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5188 +[titan] 2025-10-05 00:47:29,447 - root - INFO - lr: 4.9278e-05 gnorm: 1.22 [ 2:13:21<22:28:19] +[titan] 2025-10-05 00:47:40,308 - root - INFO - step: 3605 loss: 2.8618 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5190 +[titan] 2025-10-05 00:47:40,309 - root - INFO - lr: 4.9276e-05 gnorm: 1.19 [ 2:13:31<22:28:05] +[titan] 2025-10-05 00:47:51,174 - root - INFO - step: 3610 loss: 2.9114 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5639 +[titan] 2025-10-05 00:47:51,174 - root - INFO - lr: 4.9274e-05 gnorm: 1.25 [ 2:13:42<22:27:51] +[titan] 2025-10-05 00:48:02,028 - root - INFO - step: 3615 loss: 2.8693 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5265 +[titan] 2025-10-05 00:48:02,028 - root - INFO - lr: 4.9272e-05 gnorm: 1.25 [ 2:13:53<22:27:38] +[titan] 2025-10-05 00:48:12,956 - root - INFO - step: 3620 loss: 2.9829 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3582 global_avg_mtp_loss: 2.6247 +[titan] 2025-10-05 00:48:12,956 - root - INFO - lr: 4.9269e-05 gnorm: 1.25 [ 2:14:04<22:27:25] +[titan] 2025-10-05 00:48:23,914 - root - INFO - step: 3625 loss: 2.9614 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3530 global_avg_mtp_loss: 2.6084 +[titan] 2025-10-05 00:48:23,914 - root - INFO - lr: 4.9267e-05 gnorm: 1.19 [ 2:14:15<22:27:12] +[titan] 2025-10-05 00:48:34,821 - root - INFO - step: 3630 loss: 2.9416 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.15% global_avg_ntp_loss: 0.3505 global_avg_mtp_loss: 2.5911 +[titan] 2025-10-05 00:48:34,821 - root - INFO - lr: 4.9265e-05 gnorm: 1.30 [ 2:14:26<22:26:59] +[titan] 2025-10-05 00:48:45,728 - root - INFO - step: 3635 loss: 2.8827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.5377 +[titan] 2025-10-05 00:48:45,728 - root - INFO - lr: 4.9263e-05 gnorm: 1.22 [ 2:14:37<22:26:46] +[titan] 2025-10-05 00:48:56,629 - root - INFO - step: 3640 loss: 2.8474 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3379 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 00:48:56,629 - root - INFO - lr: 4.9260e-05 gnorm: 1.32 [ 2:14:48<22:26:33] +[titan] 2025-10-05 00:49:07,530 - root - INFO - step: 3645 loss: 2.9298 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3506 global_avg_mtp_loss: 2.5793 +[titan] 2025-10-05 00:49:07,530 - root - INFO - lr: 4.9258e-05 gnorm: 1.31 [ 2:14:59<22:26:19] +[titan] 2025-10-05 00:49:16,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:49:18,488 - root - INFO - step: 3650 loss: 3.0056 memory: 118.84GiB(85.28%) tps: 29,905 tflops: 414.89 mfu: 41.95% global_avg_ntp_loss: 0.3600 global_avg_mtp_loss: 2.6456 +[titan] 2025-10-05 00:49:18,488 - root - INFO - lr: 4.9256e-05 gnorm: 1.32 [ 2:15:10<22:26:07] +[titan] 2025-10-05 00:49:29,386 - root - INFO - step: 3655 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5347 +[titan] 2025-10-05 00:49:29,387 - root - INFO - lr: 4.9254e-05 gnorm: 1.23 [ 2:15:20<22:25:54] +[titan] 2025-10-05 00:49:40,255 - root - INFO - step: 3660 loss: 2.8748 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5314 +[titan] 2025-10-05 00:49:40,255 - root - INFO - lr: 4.9251e-05 gnorm: 1.26 [ 2:15:31<22:25:40] +[titan] 2025-10-05 00:49:51,122 - root - INFO - step: 3665 loss: 2.9419 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5896 +[titan] 2025-10-05 00:49:51,122 - root - INFO - lr: 4.9249e-05 gnorm: 1.24 [ 2:15:42<22:25:27] +[titan] 2025-10-05 00:50:01,986 - root - INFO - step: 3670 loss: 2.8845 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5398 +[titan] 2025-10-05 00:50:01,987 - root - INFO - lr: 4.9247e-05 gnorm: 1.29 [ 2:15:53<22:25:13] +[titan] 2025-10-05 00:50:12,850 - root - INFO - step: 3675 loss: 2.8906 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5453 +[titan] 2025-10-05 00:50:12,851 - root - INFO - lr: 4.9244e-05 gnorm: 1.29 [ 2:16:04<22:24:59] +[titan] 2025-10-05 00:50:23,731 - root - INFO - step: 3680 loss: 3.0241 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3632 global_avg_mtp_loss: 2.6609 +[titan] 2025-10-05 00:50:23,731 - root - INFO - lr: 4.9242e-05 gnorm: 1.28 [ 2:16:15<22:24:46] +[titan] 2025-10-05 00:50:34,722 - root - INFO - step: 3685 loss: 2.9110 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.3489 global_avg_mtp_loss: 2.5621 +[titan] 2025-10-05 00:50:34,723 - root - INFO - lr: 4.9240e-05 gnorm: 1.25 [ 2:16:26<22:24:34] +[titan] 2025-10-05 00:50:45,616 - root - INFO - step: 3690 loss: 2.8445 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5064 +[titan] 2025-10-05 00:50:45,616 - root - INFO - lr: 4.9238e-05 gnorm: 1.23 [ 2:16:37<22:24:21] +[titan] 2025-10-05 00:50:56,496 - root - INFO - step: 3695 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3361 global_avg_mtp_loss: 2.4870 +[titan] 2025-10-05 00:50:56,496 - root - INFO - lr: 4.9235e-05 gnorm: 1.19 [ 2:16:48<22:24:07] +[titan] 2025-10-05 00:51:05,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:51:07,381 - root - INFO - step: 3700 loss: 2.8874 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3447 global_avg_mtp_loss: 2.5427 +[titan] 2025-10-05 00:51:07,381 - root - INFO - lr: 4.9233e-05 gnorm: 1.31 [ 2:16:58<22:23:54] +[titan] 2025-10-05 00:51:18,258 - root - INFO - step: 3705 loss: 2.8866 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3441 global_avg_mtp_loss: 2.5425 +[titan] 2025-10-05 00:51:18,258 - root - INFO - lr: 4.9231e-05 gnorm: 1.36 [ 2:17:09<22:23:41] +[titan] 2025-10-05 00:51:29,175 - root - INFO - step: 3710 loss: 2.9115 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3467 global_avg_mtp_loss: 2.5648 +[titan] 2025-10-05 00:51:29,175 - root - INFO - lr: 4.9228e-05 gnorm: 1.27 [ 2:17:20<22:23:28] +[titan] 2025-10-05 00:51:40,064 - root - INFO - step: 3715 loss: 2.9140 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5671 +[titan] 2025-10-05 00:51:40,064 - root - INFO - lr: 4.9226e-05 gnorm: 1.23 [ 2:17:31<22:23:14] +[titan] 2025-10-05 00:51:50,950 - root - INFO - step: 3720 loss: 2.8644 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.5220 +[titan] 2025-10-05 00:51:50,950 - root - INFO - lr: 4.9224e-05 gnorm: 1.28 [ 2:17:42<22:23:01] +[titan] 2025-10-05 00:52:01,826 - root - INFO - step: 3725 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5677 +[titan] 2025-10-05 00:52:01,826 - root - INFO - lr: 4.9221e-05 gnorm: 1.30 [ 2:17:53<22:22:48] +[titan] 2025-10-05 00:52:12,692 - root - INFO - step: 3730 loss: 2.8843 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5392 +[titan] 2025-10-05 00:52:12,692 - root - INFO - lr: 4.9219e-05 gnorm: 1.27 [ 2:18:04<22:22:34] +[titan] 2025-10-05 00:52:23,581 - root - INFO - step: 3735 loss: 2.8622 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3411 global_avg_mtp_loss: 2.5211 +[titan] 2025-10-05 00:52:23,581 - root - INFO - lr: 4.9217e-05 gnorm: 1.29 [ 2:18:15<22:22:21] +[titan] 2025-10-05 00:52:34,507 - root - INFO - step: 3740 loss: 2.8833 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5382 +[titan] 2025-10-05 00:52:34,507 - root - INFO - lr: 4.9214e-05 gnorm: 1.32 [ 2:18:26<22:22:08] +[titan] 2025-10-05 00:52:45,424 - root - INFO - step: 3745 loss: 2.8876 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5439 +[titan] 2025-10-05 00:52:45,424 - root - INFO - lr: 4.9212e-05 gnorm: 1.30 [ 2:18:36<22:21:55] +[titan] 2025-10-05 00:52:54,123 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:52:56,315 - root - INFO - step: 3750 loss: 2.9081 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3472 global_avg_mtp_loss: 2.5609 +[titan] 2025-10-05 00:52:56,315 - root - INFO - lr: 4.9210e-05 gnorm: 1.37 [ 2:18:47<22:21:42] +[titan] 2025-10-05 00:53:07,243 - root - INFO - step: 3755 loss: 2.8797 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 00:53:07,243 - root - INFO - lr: 4.9207e-05 gnorm: 1.27 [ 2:18:58<22:21:29] +[titan] 2025-10-05 00:53:18,154 - root - INFO - step: 3760 loss: 2.8545 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.3406 global_avg_mtp_loss: 2.5139 +[titan] 2025-10-05 00:53:18,154 - root - INFO - lr: 4.9205e-05 gnorm: 1.27 [ 2:19:09<22:21:17] +[titan] 2025-10-05 00:53:29,071 - root - INFO - step: 3765 loss: 2.8350 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.4960 +[titan] 2025-10-05 00:53:29,071 - root - INFO - lr: 4.9203e-05 gnorm: 1.27 [ 2:19:20<22:21:04] +[titan] 2025-10-05 00:53:39,977 - root - INFO - step: 3770 loss: 2.8227 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3370 global_avg_mtp_loss: 2.4857 +[titan] 2025-10-05 00:53:39,978 - root - INFO - lr: 4.9200e-05 gnorm: 1.19 [ 2:19:31<22:20:51] +[titan] 2025-10-05 00:53:50,879 - root - INFO - step: 3775 loss: 2.8842 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3430 global_avg_mtp_loss: 2.5411 +[titan] 2025-10-05 00:53:50,879 - root - INFO - lr: 4.9198e-05 gnorm: 1.23 [ 2:19:42<22:20:38] +[titan] 2025-10-05 00:54:01,831 - root - INFO - step: 3780 loss: 2.9375 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.3513 global_avg_mtp_loss: 2.5862 +[titan] 2025-10-05 00:54:01,831 - root - INFO - lr: 4.9196e-05 gnorm: 1.20 [ 2:19:53<22:20:25] +[titan] 2025-10-05 00:54:12,711 - root - INFO - step: 3785 loss: 2.8747 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3429 global_avg_mtp_loss: 2.5318 +[titan] 2025-10-05 00:54:12,711 - root - INFO - lr: 4.9193e-05 gnorm: 1.23 [ 2:20:04<22:20:12] +[titan] 2025-10-05 00:54:23,577 - root - INFO - step: 3790 loss: 2.8207 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4833 +[titan] 2025-10-05 00:54:23,577 - root - INFO - lr: 4.9191e-05 gnorm: 1.27 [ 2:20:15<22:19:58] +[titan] 2025-10-05 00:54:34,480 - root - INFO - step: 3795 loss: 2.9584 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3532 global_avg_mtp_loss: 2.6052 +[titan] 2025-10-05 00:54:34,480 - root - INFO - lr: 4.9188e-05 gnorm: 1.29 [ 2:20:26<22:19:45] +[titan] 2025-10-05 00:54:43,173 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:54:45,353 - root - INFO - step: 3800 loss: 2.9385 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3511 global_avg_mtp_loss: 2.5874 +[titan] 2025-10-05 00:54:45,353 - root - INFO - lr: 4.9186e-05 gnorm: 1.24 [ 2:20:36<22:19:32] +[titan] 2025-10-05 00:54:56,214 - root - INFO - step: 3805 loss: 2.8516 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3400 global_avg_mtp_loss: 2.5116 +[titan] 2025-10-05 00:54:56,214 - root - INFO - lr: 4.9184e-05 gnorm: 1.32 [ 2:20:47<22:19:19] +[titan] 2025-10-05 00:55:07,134 - root - INFO - step: 3810 loss: 2.8608 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5196 +[titan] 2025-10-05 00:55:07,134 - root - INFO - lr: 4.9181e-05 gnorm: 1.27 [ 2:20:58<22:19:06] +[titan] 2025-10-05 00:55:18,019 - root - INFO - step: 3815 loss: 2.9132 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3495 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 00:55:18,019 - root - INFO - lr: 4.9179e-05 gnorm: 1.33 [ 2:21:09<22:18:53] +[titan] 2025-10-05 00:55:28,882 - root - INFO - step: 3820 loss: 2.8903 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3438 global_avg_mtp_loss: 2.5465 +[titan] 2025-10-05 00:55:28,882 - root - INFO - lr: 4.9176e-05 gnorm: 1.28 [ 2:21:20<22:18:39] +[titan] 2025-10-05 00:55:39,765 - root - INFO - step: 3825 loss: 2.8538 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3396 global_avg_mtp_loss: 2.5142 +[titan] 2025-10-05 00:55:39,765 - root - INFO - lr: 4.9174e-05 gnorm: 1.35 [ 2:21:31<22:18:26] +[titan] 2025-10-05 00:55:50,656 - root - INFO - step: 3830 loss: 2.8951 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5503 +[titan] 2025-10-05 00:55:50,656 - root - INFO - lr: 4.9172e-05 gnorm: 1.29 [ 2:21:42<22:18:13] +[titan] 2025-10-05 00:56:01,544 - root - INFO - step: 3835 loss: 2.8701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 00:56:01,544 - root - INFO - lr: 4.9169e-05 gnorm: 1.28 [ 2:21:53<22:18:00] +[titan] 2025-10-05 00:56:12,424 - root - INFO - step: 3840 loss: 2.8980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3459 global_avg_mtp_loss: 2.5521 +[titan] 2025-10-05 00:56:12,424 - root - INFO - lr: 4.9167e-05 gnorm: 1.29 [ 2:22:03<22:17:47] +[titan] 2025-10-05 00:56:23,350 - root - INFO - step: 3845 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3451 global_avg_mtp_loss: 2.5442 +[titan] 2025-10-05 00:56:23,350 - root - INFO - lr: 4.9164e-05 gnorm: 1.33 [ 2:22:14<22:17:34] +[titan] 2025-10-05 00:56:32,044 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:56:34,236 - root - INFO - step: 3850 loss: 2.8817 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5379 +[titan] 2025-10-05 00:56:34,237 - root - INFO - lr: 4.9162e-05 gnorm: 1.28 [ 2:22:25<22:17:21] +[titan] 2025-10-05 00:56:45,120 - root - INFO - step: 3855 loss: 2.8016 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 00:56:45,120 - root - INFO - lr: 4.9160e-05 gnorm: 1.32 [ 2:22:36<22:17:08] +[titan] 2025-10-05 00:56:56,000 - root - INFO - step: 3860 loss: 2.8851 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3437 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 00:56:56,000 - root - INFO - lr: 4.9157e-05 gnorm: 1.29 [ 2:22:47<22:16:55] +[titan] 2025-10-05 00:57:06,896 - root - INFO - step: 3865 loss: 2.8534 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3401 global_avg_mtp_loss: 2.5132 +[titan] 2025-10-05 00:57:06,896 - root - INFO - lr: 4.9155e-05 gnorm: 1.25 [ 2:22:58<22:16:42] +[titan] 2025-10-05 00:57:17,779 - root - INFO - step: 3870 loss: 2.9197 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5707 +[titan] 2025-10-05 00:57:17,779 - root - INFO - lr: 4.9152e-05 gnorm: 1.28 [ 2:23:09<22:16:29] +[titan] 2025-10-05 00:57:28,718 - root - INFO - step: 3875 loss: 2.9466 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.3534 global_avg_mtp_loss: 2.5932 +[titan] 2025-10-05 00:57:28,718 - root - INFO - lr: 4.9150e-05 gnorm: 1.21 [ 2:23:20<22:16:16] +[titan] 2025-10-05 00:57:39,599 - root - INFO - step: 3880 loss: 2.8840 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3444 global_avg_mtp_loss: 2.5396 +[titan] 2025-10-05 00:57:39,600 - root - INFO - lr: 4.9148e-05 gnorm: 1.28 [ 2:23:31<22:16:03] +[titan] 2025-10-05 00:57:50,474 - root - INFO - step: 3885 loss: 2.9370 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3488 global_avg_mtp_loss: 2.5882 +[titan] 2025-10-05 00:57:50,474 - root - INFO - lr: 4.9145e-05 gnorm: 1.25 [ 2:23:42<22:15:50] +[titan] 2025-10-05 00:58:01,351 - root - INFO - step: 3890 loss: 2.9350 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3493 global_avg_mtp_loss: 2.5857 +[titan] 2025-10-05 00:58:01,351 - root - INFO - lr: 4.9143e-05 gnorm: 1.31 [ 2:23:52<22:15:37] +[titan] 2025-10-05 00:58:12,271 - root - INFO - step: 3895 loss: 2.9044 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.3452 global_avg_mtp_loss: 2.5592 +[titan] 2025-10-05 00:58:12,271 - root - INFO - lr: 4.9140e-05 gnorm: 1.26 [ 2:24:03<22:15:24] +[titan] 2025-10-05 00:58:20,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 00:58:23,152 - root - INFO - step: 3900 loss: 2.7993 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 00:58:23,152 - root - INFO - lr: 4.9138e-05 gnorm: 1.27 [ 2:24:14<22:15:11] +[titan] 2025-10-05 00:58:34,070 - root - INFO - step: 3905 loss: 2.9356 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.3502 global_avg_mtp_loss: 2.5854 +[titan] 2025-10-05 00:58:34,070 - root - INFO - lr: 4.9135e-05 gnorm: 1.23 [ 2:24:25<22:14:58] +[titan] 2025-10-05 00:58:44,959 - root - INFO - step: 3910 loss: 2.9153 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3475 global_avg_mtp_loss: 2.5678 +[titan] 2025-10-05 00:58:44,959 - root - INFO - lr: 4.9133e-05 gnorm: 1.26 [ 2:24:36<22:14:45] +[titan] 2025-10-05 00:58:55,830 - root - INFO - step: 3915 loss: 2.8401 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5019 +[titan] 2025-10-05 00:58:55,830 - root - INFO - lr: 4.9130e-05 gnorm: 1.23 [ 2:24:47<22:14:32] +[titan] 2025-10-05 00:59:06,689 - root - INFO - step: 3920 loss: 2.9547 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3597 global_avg_mtp_loss: 2.5950 +[titan] 2025-10-05 00:59:06,690 - root - INFO - lr: 4.9128e-05 gnorm: 1.24 [ 2:24:58<22:14:19] +[titan] 2025-10-05 00:59:17,583 - root - INFO - step: 3925 loss: 2.9231 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3478 global_avg_mtp_loss: 2.5753 +[titan] 2025-10-05 00:59:17,584 - root - INFO - lr: 4.9125e-05 gnorm: 1.29 [ 2:25:09<22:14:06] +[titan] 2025-10-05 00:59:28,459 - root - INFO - step: 3930 loss: 2.8642 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3416 global_avg_mtp_loss: 2.5227 +[titan] 2025-10-05 00:59:28,459 - root - INFO - lr: 4.9123e-05 gnorm: 1.29 [ 2:25:19<22:13:53] +[titan] 2025-10-05 00:59:39,392 - root - INFO - step: 3935 loss: 2.8806 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3434 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 00:59:39,393 - root - INFO - lr: 4.9121e-05 gnorm: 1.31 [ 2:25:30<22:13:40] +[titan] 2025-10-05 00:59:50,302 - root - INFO - step: 3940 loss: 2.9187 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3484 global_avg_mtp_loss: 2.5703 +[titan] 2025-10-05 00:59:50,302 - root - INFO - lr: 4.9118e-05 gnorm: 1.23 [ 2:25:41<22:13:27] +[titan] 2025-10-05 01:00:01,171 - root - INFO - step: 3945 loss: 2.8435 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3382 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:00:01,172 - root - INFO - lr: 4.9116e-05 gnorm: 1.25 [ 2:25:52<22:13:14] +[titan] 2025-10-05 01:00:09,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:00:12,051 - root - INFO - step: 3950 loss: 2.8893 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5446 +[titan] 2025-10-05 01:00:12,051 - root - INFO - lr: 4.9113e-05 gnorm: 1.27 [ 2:26:03<22:13:01] +[titan] 2025-10-05 01:00:22,938 - root - INFO - step: 3955 loss: 2.8946 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3454 global_avg_mtp_loss: 2.5492 +[titan] 2025-10-05 01:00:22,938 - root - INFO - lr: 4.9111e-05 gnorm: 1.31 [ 2:26:14<22:12:48] +[titan] 2025-10-05 01:00:33,863 - root - INFO - step: 3960 loss: 2.9358 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3490 global_avg_mtp_loss: 2.5868 +[titan] 2025-10-05 01:00:33,863 - root - INFO - lr: 4.9108e-05 gnorm: 1.30 [ 2:26:25<22:12:36] +[titan] 2025-10-05 01:00:44,742 - root - INFO - step: 3965 loss: 2.8537 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3415 global_avg_mtp_loss: 2.5123 +[titan] 2025-10-05 01:00:44,743 - root - INFO - lr: 4.9106e-05 gnorm: 1.24 [ 2:26:36<22:12:22] +[titan] 2025-10-05 01:00:55,669 - root - INFO - step: 3970 loss: 2.8697 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5257 +[titan] 2025-10-05 01:00:55,670 - root - INFO - lr: 4.9103e-05 gnorm: 1.26 [ 2:26:47<22:12:10] +[titan] 2025-10-05 01:01:06,531 - root - INFO - step: 3975 loss: 2.8184 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4844 +[titan] 2025-10-05 01:01:06,531 - root - INFO - lr: 4.9101e-05 gnorm: 1.26 [ 2:26:58<22:11:57] +[titan] 2025-10-05 01:01:17,435 - root - INFO - step: 3980 loss: 2.8685 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5264 +[titan] 2025-10-05 01:01:17,435 - root - INFO - lr: 4.9098e-05 gnorm: 1.32 [ 2:27:08<22:11:44] +[titan] 2025-10-05 01:01:28,313 - root - INFO - step: 3985 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3312 global_avg_mtp_loss: 2.4498 +[titan] 2025-10-05 01:01:28,313 - root - INFO - lr: 4.9096e-05 gnorm: 1.26 [ 2:27:19<22:11:31] +[titan] 2025-10-05 01:01:39,229 - root - INFO - step: 3990 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.3337 global_avg_mtp_loss: 2.4610 +[titan] 2025-10-05 01:01:39,229 - root - INFO - lr: 4.9093e-05 gnorm: 1.30 [ 2:27:30<22:11:18] +[titan] 2025-10-05 01:01:50,092 - root - INFO - step: 3995 loss: 2.7943 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3322 global_avg_mtp_loss: 2.4621 +[titan] 2025-10-05 01:01:50,092 - root - INFO - lr: 4.9091e-05 gnorm: 1.21 [ 2:27:41<22:11:05] +[titan] 2025-10-05 01:01:58,771 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:02:00,958 - root - INFO - step: 4000 loss: 2.9406 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3504 global_avg_mtp_loss: 2.5902 +[titan] 2025-10-05 01:02:00,958 - root - INFO - lr: 4.9088e-05 gnorm: 1.27 [ 2:27:52<22:10:52] +[titan] 2025-10-05 01:02:11,850 - root - INFO - step: 4005 loss: 2.8699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5271 +[titan] 2025-10-05 01:02:11,850 - root - INFO - lr: 4.9086e-05 gnorm: 1.29 [ 2:28:03<22:10:39] +[titan] 2025-10-05 01:02:22,761 - root - INFO - step: 4010 loss: 2.8862 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3449 global_avg_mtp_loss: 2.5413 +[titan] 2025-10-05 01:02:22,761 - root - INFO - lr: 4.9083e-05 gnorm: 1.23 [ 2:28:14<22:10:26] +[titan] 2025-10-05 01:02:33,616 - root - INFO - step: 4015 loss: 2.8251 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.4858 +[titan] 2025-10-05 01:02:33,616 - root - INFO - lr: 4.9081e-05 gnorm: 1.23 [ 2:28:25<22:10:13] +[titan] 2025-10-05 01:02:44,524 - root - INFO - step: 4020 loss: 2.8756 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5328 +[titan] 2025-10-05 01:02:44,525 - root - INFO - lr: 4.9078e-05 gnorm: 1.23 [ 2:28:36<22:10:00] +[titan] 2025-10-05 01:02:55,396 - root - INFO - step: 4025 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3363 global_avg_mtp_loss: 2.4949 +[titan] 2025-10-05 01:02:55,396 - root - INFO - lr: 4.9076e-05 gnorm: 1.22 [ 2:28:46<22:09:47] +[titan] 2025-10-05 01:03:06,265 - root - INFO - step: 4030 loss: 2.8674 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5248 +[titan] 2025-10-05 01:03:06,265 - root - INFO - lr: 4.9073e-05 gnorm: 1.24 [ 2:28:57<22:09:34] +[titan] 2025-10-05 01:03:17,168 - root - INFO - step: 4035 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:03:17,168 - root - INFO - lr: 4.9070e-05 gnorm: 1.29 [ 2:29:08<22:09:22] +[titan] 2025-10-05 01:03:28,097 - root - INFO - step: 4040 loss: 2.8057 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3338 global_avg_mtp_loss: 2.4719 +[titan] 2025-10-05 01:03:28,098 - root - INFO - lr: 4.9068e-05 gnorm: 1.23 [ 2:29:19<22:09:09] +[titan] 2025-10-05 01:03:39,019 - root - INFO - step: 4045 loss: 2.8686 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5273 +[titan] 2025-10-05 01:03:39,019 - root - INFO - lr: 4.9065e-05 gnorm: 1.33 [ 2:29:30<22:08:56] +[titan] 2025-10-05 01:03:47,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:03:49,927 - root - INFO - step: 4050 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5088 +[titan] 2025-10-05 01:03:49,927 - root - INFO - lr: 4.9063e-05 gnorm: 1.25 [ 2:29:41<22:08:44] +[titan] 2025-10-05 01:04:00,828 - root - INFO - step: 4055 loss: 2.8040 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4693 +[titan] 2025-10-05 01:04:00,828 - root - INFO - lr: 4.9060e-05 gnorm: 1.23 [ 2:29:52<22:08:31] +[titan] 2025-10-05 01:04:11,717 - root - INFO - step: 4060 loss: 2.8008 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4673 +[titan] 2025-10-05 01:04:11,717 - root - INFO - lr: 4.9058e-05 gnorm: 1.27 [ 2:30:03<22:08:18] +[titan] 2025-10-05 01:04:22,649 - root - INFO - step: 4065 loss: 2.8860 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3427 global_avg_mtp_loss: 2.5433 +[titan] 2025-10-05 01:04:22,649 - root - INFO - lr: 4.9055e-05 gnorm: 1.27 [ 2:30:14<22:08:06] +[titan] 2025-10-05 01:04:33,534 - root - INFO - step: 4070 loss: 2.8482 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3390 global_avg_mtp_loss: 2.5092 +[titan] 2025-10-05 01:04:33,534 - root - INFO - lr: 4.9053e-05 gnorm: 1.28 [ 2:30:25<22:07:53] +[titan] 2025-10-05 01:04:44,493 - root - INFO - step: 4075 loss: 2.7243 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.3989 +[titan] 2025-10-05 01:04:44,493 - root - INFO - lr: 4.9050e-05 gnorm: 1.28 [ 2:30:36<22:07:41] +[titan] 2025-10-05 01:04:55,369 - root - INFO - step: 4080 loss: 2.9124 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3469 global_avg_mtp_loss: 2.5655 +[titan] 2025-10-05 01:04:55,370 - root - INFO - lr: 4.9047e-05 gnorm: 1.24 [ 2:30:46<22:07:28] +[titan] 2025-10-05 01:05:06,228 - root - INFO - step: 4085 loss: 2.8731 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5283 +[titan] 2025-10-05 01:05:06,228 - root - INFO - lr: 4.9045e-05 gnorm: 1.27 [ 2:30:57<22:07:15] +[titan] 2025-10-05 01:05:17,102 - root - INFO - step: 4090 loss: 2.7997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4672 +[titan] 2025-10-05 01:05:17,102 - root - INFO - lr: 4.9042e-05 gnorm: 1.28 [ 2:31:08<22:07:02] +[titan] 2025-10-05 01:05:28,059 - root - INFO - step: 4095 loss: 2.9035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.3448 global_avg_mtp_loss: 2.5587 +[titan] 2025-10-05 01:05:28,060 - root - INFO - lr: 4.9040e-05 gnorm: 1.23 [ 2:31:19<22:06:49] +[titan] 2025-10-05 01:05:30,409 - root - INFO - Dumping profiler traces at step 4096 +[titan] 2025-10-05 01:05:30,447 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:05:37,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:05:39,213 - root - INFO - step: 4100 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 29,380 tflops: 407.60 mfu: 41.21% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4763 +[titan] 2025-10-05 01:05:39,213 - root - INFO - lr: 4.9037e-05 gnorm: 1.29 [ 2:31:30<22:06:39] +[titan] 2025-10-05 01:05:50,104 - root - INFO - step: 4105 loss: 2.8434 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5046 +[titan] 2025-10-05 01:05:50,104 - root - INFO - lr: 4.9035e-05 gnorm: 1.25 [ 2:31:41<22:06:26] +[titan] 2025-10-05 01:06:00,954 - root - INFO - step: 4110 loss: 2.8513 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3410 global_avg_mtp_loss: 2.5103 +[titan] 2025-10-05 01:06:00,954 - root - INFO - lr: 4.9032e-05 gnorm: 1.30 [ 2:31:52<22:06:13] +[titan] 2025-10-05 01:06:11,792 - root - INFO - step: 4115 loss: 2.8687 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3413 global_avg_mtp_loss: 2.5274 +[titan] 2025-10-05 01:06:11,793 - root - INFO - lr: 4.9029e-05 gnorm: 1.28 [ 2:32:03<22:06:00] +[titan] 2025-10-05 01:06:22,672 - root - INFO - step: 4120 loss: 2.7381 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3273 global_avg_mtp_loss: 2.4108 +[titan] 2025-10-05 01:06:22,673 - root - INFO - lr: 4.9027e-05 gnorm: 1.20 [ 2:32:14<22:05:47] +[titan] 2025-10-05 01:06:33,541 - root - INFO - step: 4125 loss: 2.8811 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5371 +[titan] 2025-10-05 01:06:33,541 - root - INFO - lr: 4.9024e-05 gnorm: 1.27 [ 2:32:25<22:05:34] +[titan] 2025-10-05 01:06:44,458 - root - INFO - step: 4130 loss: 2.7955 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.3316 global_avg_mtp_loss: 2.4639 +[titan] 2025-10-05 01:06:44,459 - root - INFO - lr: 4.9022e-05 gnorm: 1.22 [ 2:32:35<22:05:21] +[titan] 2025-10-05 01:06:55,338 - root - INFO - step: 4135 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3357 global_avg_mtp_loss: 2.4937 +[titan] 2025-10-05 01:06:55,338 - root - INFO - lr: 4.9019e-05 gnorm: 1.26 [ 2:32:46<22:05:08] +[titan] 2025-10-05 01:07:06,209 - root - INFO - step: 4140 loss: 2.8211 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3351 global_avg_mtp_loss: 2.4860 +[titan] 2025-10-05 01:07:06,209 - root - INFO - lr: 4.9016e-05 gnorm: 1.23 [ 2:32:57<22:04:55] +[titan] 2025-10-05 01:07:17,116 - root - INFO - step: 4145 loss: 2.7757 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4442 +[titan] 2025-10-05 01:07:17,116 - root - INFO - lr: 4.9014e-05 gnorm: 1.33 [ 2:33:08<22:04:43] +[titan] 2025-10-05 01:07:25,818 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:07:28,011 - root - INFO - step: 4150 loss: 2.8404 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.5032 +[titan] 2025-10-05 01:07:28,012 - root - INFO - lr: 4.9011e-05 gnorm: 1.29 [ 2:33:19<22:04:30] +[titan] 2025-10-05 01:07:38,919 - root - INFO - step: 4155 loss: 2.8801 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3421 global_avg_mtp_loss: 2.5380 +[titan] 2025-10-05 01:07:38,919 - root - INFO - lr: 4.9009e-05 gnorm: 1.22 [ 2:33:30<22:04:18] +[titan] 2025-10-05 01:07:49,794 - root - INFO - step: 4160 loss: 2.8305 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3386 global_avg_mtp_loss: 2.4919 +[titan] 2025-10-05 01:07:49,794 - root - INFO - lr: 4.9006e-05 gnorm: 1.23 [ 2:33:41<22:04:05] +[titan] 2025-10-05 01:08:00,715 - root - INFO - step: 4165 loss: 2.7568 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4286 +[titan] 2025-10-05 01:08:00,715 - root - INFO - lr: 4.9003e-05 gnorm: 1.22 [ 2:33:52<22:03:52] +[titan] 2025-10-05 01:08:11,575 - root - INFO - step: 4170 loss: 2.8449 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.3395 global_avg_mtp_loss: 2.5054 +[titan] 2025-10-05 01:08:11,575 - root - INFO - lr: 4.9001e-05 gnorm: 1.22 [ 2:34:03<22:03:39] +[titan] 2025-10-05 01:08:22,448 - root - INFO - step: 4175 loss: 2.8005 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3330 global_avg_mtp_loss: 2.4675 +[titan] 2025-10-05 01:08:22,448 - root - INFO - lr: 4.8998e-05 gnorm: 1.22 [ 2:34:13<22:03:26] +[titan] 2025-10-05 01:08:33,314 - root - INFO - step: 4180 loss: 2.7794 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4468 +[titan] 2025-10-05 01:08:33,314 - root - INFO - lr: 4.8995e-05 gnorm: 1.18 [ 2:34:24<22:03:13] +[titan] 2025-10-05 01:08:44,215 - root - INFO - step: 4185 loss: 2.8110 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3342 global_avg_mtp_loss: 2.4768 +[titan] 2025-10-05 01:08:44,215 - root - INFO - lr: 4.8993e-05 gnorm: 1.25 [ 2:34:35<22:03:01] +[titan] 2025-10-05 01:08:55,079 - root - INFO - step: 4190 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4689 +[titan] 2025-10-05 01:08:55,079 - root - INFO - lr: 4.8990e-05 gnorm: 1.20 [ 2:34:46<22:02:48] +[titan] 2025-10-05 01:09:05,968 - root - INFO - step: 4195 loss: 2.7893 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3318 global_avg_mtp_loss: 2.4575 +[titan] 2025-10-05 01:09:05,968 - root - INFO - lr: 4.8987e-05 gnorm: 1.27 [ 2:34:57<22:02:35] +[titan] 2025-10-05 01:09:14,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:09:16,867 - root - INFO - step: 4200 loss: 2.8001 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4669 +[titan] 2025-10-05 01:09:16,867 - root - INFO - lr: 4.8985e-05 gnorm: 1.37 [ 2:35:08<22:02:22] +[titan] 2025-10-05 01:09:27,758 - root - INFO - step: 4205 loss: 2.8414 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3378 global_avg_mtp_loss: 2.5036 +[titan] 2025-10-05 01:09:27,758 - root - INFO - lr: 4.8982e-05 gnorm: 1.27 [ 2:35:19<22:02:10] +[titan] 2025-10-05 01:09:38,614 - root - INFO - step: 4210 loss: 2.8082 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.3343 global_avg_mtp_loss: 2.4739 +[titan] 2025-10-05 01:09:38,614 - root - INFO - lr: 4.8980e-05 gnorm: 1.21 [ 2:35:30<22:01:57] +[titan] 2025-10-05 01:09:49,535 - root - INFO - step: 4215 loss: 2.8257 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4886 +[titan] 2025-10-05 01:09:49,535 - root - INFO - lr: 4.8977e-05 gnorm: 1.25 [ 2:35:41<22:01:44] +[titan] 2025-10-05 01:10:00,451 - root - INFO - step: 4220 loss: 2.8238 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.3391 global_avg_mtp_loss: 2.4847 +[titan] 2025-10-05 01:10:00,451 - root - INFO - lr: 4.8974e-05 gnorm: 1.27 [ 2:35:51<22:01:32] +[titan] 2025-10-05 01:10:11,409 - root - INFO - step: 4225 loss: 2.7720 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.87 mfu: 41.95% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4420 +[titan] 2025-10-05 01:10:11,409 - root - INFO - lr: 4.8972e-05 gnorm: 1.25 [ 2:36:02<22:01:20] +[titan] 2025-10-05 01:10:22,330 - root - INFO - step: 4230 loss: 2.8335 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.3375 global_avg_mtp_loss: 2.4961 +[titan] 2025-10-05 01:10:22,330 - root - INFO - lr: 4.8969e-05 gnorm: 1.22 [ 2:36:13<22:01:07] +[titan] 2025-10-05 01:10:33,205 - root - INFO - step: 4235 loss: 2.9402 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3523 global_avg_mtp_loss: 2.5879 +[titan] 2025-10-05 01:10:33,205 - root - INFO - lr: 4.8966e-05 gnorm: 1.26 [ 2:36:24<22:00:54] +[titan] 2025-10-05 01:10:44,111 - root - INFO - step: 4240 loss: 2.8115 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3345 global_avg_mtp_loss: 2.4770 +[titan] 2025-10-05 01:10:44,111 - root - INFO - lr: 4.8964e-05 gnorm: 1.23 [ 2:36:35<22:00:42] +[titan] 2025-10-05 01:10:54,992 - root - INFO - step: 4245 loss: 2.7621 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4328 +[titan] 2025-10-05 01:10:54,992 - root - INFO - lr: 4.8961e-05 gnorm: 1.25 [ 2:36:46<22:00:29] +[titan] 2025-10-05 01:11:03,679 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:11:05,860 - root - INFO - step: 4250 loss: 2.7919 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:11:05,860 - root - INFO - lr: 4.8958e-05 gnorm: 1.34 [ 2:36:57<22:00:16] +[titan] 2025-10-05 01:11:16,750 - root - INFO - step: 4255 loss: 2.8769 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5346 +[titan] 2025-10-05 01:11:16,750 - root - INFO - lr: 4.8955e-05 gnorm: 1.23 [ 2:37:08<22:00:04] +[titan] 2025-10-05 01:11:27,682 - root - INFO - step: 4260 loss: 2.8447 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5059 +[titan] 2025-10-05 01:11:27,682 - root - INFO - lr: 4.8953e-05 gnorm: 1.29 [ 2:37:19<21:59:51] +[titan] 2025-10-05 01:11:38,566 - root - INFO - step: 4265 loss: 2.8553 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3402 global_avg_mtp_loss: 2.5151 +[titan] 2025-10-05 01:11:38,566 - root - INFO - lr: 4.8950e-05 gnorm: 1.28 [ 2:37:30<21:59:38] +[titan] 2025-10-05 01:11:49,489 - root - INFO - step: 4270 loss: 2.8265 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.3371 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:11:49,489 - root - INFO - lr: 4.8947e-05 gnorm: 1.23 [ 2:37:40<21:59:26] +[titan] 2025-10-05 01:12:00,379 - root - INFO - step: 4275 loss: 2.7626 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3291 global_avg_mtp_loss: 2.4335 +[titan] 2025-10-05 01:12:00,379 - root - INFO - lr: 4.8945e-05 gnorm: 1.23 [ 2:37:51<21:59:13] +[titan] 2025-10-05 01:12:11,266 - root - INFO - step: 4280 loss: 2.8313 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4955 +[titan] 2025-10-05 01:12:11,266 - root - INFO - lr: 4.8942e-05 gnorm: 1.25 [ 2:38:02<21:59:01] +[titan] 2025-10-05 01:12:22,135 - root - INFO - step: 4285 loss: 2.8353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3380 global_avg_mtp_loss: 2.4973 +[titan] 2025-10-05 01:12:22,135 - root - INFO - lr: 4.8939e-05 gnorm: 1.27 [ 2:38:13<21:58:48] +[titan] 2025-10-05 01:12:33,063 - root - INFO - step: 4290 loss: 2.7796 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.3326 global_avg_mtp_loss: 2.4469 +[titan] 2025-10-05 01:12:33,063 - root - INFO - lr: 4.8937e-05 gnorm: 1.31 [ 2:38:24<21:58:36] +[titan] 2025-10-05 01:12:43,959 - root - INFO - step: 4295 loss: 2.7810 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4492 +[titan] 2025-10-05 01:12:43,959 - root - INFO - lr: 4.8934e-05 gnorm: 1.37 [ 2:38:35<21:58:23] +[titan] 2025-10-05 01:12:52,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:12:54,832 - root - INFO - step: 4300 loss: 2.9113 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3476 global_avg_mtp_loss: 2.5637 +[titan] 2025-10-05 01:12:54,832 - root - INFO - lr: 4.8931e-05 gnorm: 1.32 [ 2:38:46<21:58:10] +[titan] 2025-10-05 01:13:05,696 - root - INFO - step: 4305 loss: 2.8427 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:13:05,696 - root - INFO - lr: 4.8928e-05 gnorm: 1.29 [ 2:38:57<21:57:57] +[titan] 2025-10-05 01:13:16,559 - root - INFO - step: 4310 loss: 2.8552 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5127 +[titan] 2025-10-05 01:13:16,559 - root - INFO - lr: 4.8926e-05 gnorm: 1.25 [ 2:39:08<21:57:45] +[titan] 2025-10-05 01:13:27,434 - root - INFO - step: 4315 loss: 2.7587 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:13:27,434 - root - INFO - lr: 4.8923e-05 gnorm: 1.28 [ 2:39:18<21:57:32] +[titan] 2025-10-05 01:13:38,295 - root - INFO - step: 4320 loss: 2.8361 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3373 global_avg_mtp_loss: 2.4988 +[titan] 2025-10-05 01:13:38,295 - root - INFO - lr: 4.8920e-05 gnorm: 1.33 [ 2:39:29<21:57:19] +[titan] 2025-10-05 01:13:49,212 - root - INFO - step: 4325 loss: 2.8809 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.3428 global_avg_mtp_loss: 2.5381 +[titan] 2025-10-05 01:13:49,212 - root - INFO - lr: 4.8918e-05 gnorm: 1.32 [ 2:39:40<21:57:07] +[titan] 2025-10-05 01:14:00,073 - root - INFO - step: 4330 loss: 2.8655 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3423 global_avg_mtp_loss: 2.5232 +[titan] 2025-10-05 01:14:00,073 - root - INFO - lr: 4.8915e-05 gnorm: 1.25 [ 2:39:51<21:56:54] +[titan] 2025-10-05 01:14:10,949 - root - INFO - step: 4335 loss: 2.8077 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4742 +[titan] 2025-10-05 01:14:10,949 - root - INFO - lr: 4.8912e-05 gnorm: 1.25 [ 2:40:02<21:56:41] +[titan] 2025-10-05 01:14:21,868 - root - INFO - step: 4340 loss: 2.8223 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3346 global_avg_mtp_loss: 2.4877 +[titan] 2025-10-05 01:14:21,868 - root - INFO - lr: 4.8909e-05 gnorm: 1.21 [ 2:40:13<21:56:29] +[titan] 2025-10-05 01:14:32,754 - root - INFO - step: 4345 loss: 2.8555 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3431 global_avg_mtp_loss: 2.5124 +[titan] 2025-10-05 01:14:32,754 - root - INFO - lr: 4.8907e-05 gnorm: 1.26 [ 2:40:24<21:56:16] +[titan] 2025-10-05 01:14:41,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:14:43,631 - root - INFO - step: 4350 loss: 2.7309 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 01:14:43,631 - root - INFO - lr: 4.8904e-05 gnorm: 1.21 [ 2:40:35<21:56:03] +[titan] 2025-10-05 01:14:54,554 - root - INFO - step: 4355 loss: 2.7817 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:14:54,555 - root - INFO - lr: 4.8901e-05 gnorm: 1.31 [ 2:40:46<21:55:51] +[titan] 2025-10-05 01:15:05,471 - root - INFO - step: 4360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:15:05,471 - root - INFO - lr: 4.8898e-05 gnorm: 1.18 [ 2:40:56<21:55:39] +[titan] 2025-10-05 01:15:16,353 - root - INFO - step: 4365 loss: 2.7543 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3265 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:15:16,353 - root - INFO - lr: 4.8896e-05 gnorm: 1.34 [ 2:41:07<21:55:26] +[titan] 2025-10-05 01:15:27,221 - root - INFO - step: 4370 loss: 2.8151 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3349 global_avg_mtp_loss: 2.4802 +[titan] 2025-10-05 01:15:27,222 - root - INFO - lr: 4.8893e-05 gnorm: 1.33 [ 2:41:18<21:55:13] +[titan] 2025-10-05 01:15:38,092 - root - INFO - step: 4375 loss: 2.8402 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3381 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:15:38,092 - root - INFO - lr: 4.8890e-05 gnorm: 1.24 [ 2:41:29<21:55:00] +[titan] 2025-10-05 01:15:48,973 - root - INFO - step: 4380 loss: 2.7636 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4347 +[titan] 2025-10-05 01:15:48,973 - root - INFO - lr: 4.8887e-05 gnorm: 1.28 [ 2:41:40<21:54:48] +[titan] 2025-10-05 01:15:59,862 - root - INFO - step: 4385 loss: 2.7822 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4512 +[titan] 2025-10-05 01:15:59,863 - root - INFO - lr: 4.8884e-05 gnorm: 1.22 [ 2:41:51<21:54:35] +[titan] 2025-10-05 01:16:10,768 - root - INFO - step: 4390 loss: 2.8774 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.3440 global_avg_mtp_loss: 2.5334 +[titan] 2025-10-05 01:16:10,768 - root - INFO - lr: 4.8882e-05 gnorm: 1.31 [ 2:42:02<21:54:23] +[titan] 2025-10-05 01:16:21,633 - root - INFO - step: 4395 loss: 2.7736 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4446 +[titan] 2025-10-05 01:16:21,633 - root - INFO - lr: 4.8879e-05 gnorm: 1.27 [ 2:42:13<21:54:10] +[titan] 2025-10-05 01:16:30,335 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:16:32,515 - root - INFO - step: 4400 loss: 2.8412 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3388 global_avg_mtp_loss: 2.5024 +[titan] 2025-10-05 01:16:32,515 - root - INFO - lr: 4.8876e-05 gnorm: 1.24 [ 2:42:24<21:53:57] +[titan] 2025-10-05 01:16:43,378 - root - INFO - step: 4405 loss: 2.8111 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3347 global_avg_mtp_loss: 2.4765 +[titan] 2025-10-05 01:16:43,378 - root - INFO - lr: 4.8873e-05 gnorm: 1.25 [ 2:42:34<21:53:45] +[titan] 2025-10-05 01:16:54,311 - root - INFO - step: 4410 loss: 2.7984 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.3341 global_avg_mtp_loss: 2.4642 +[titan] 2025-10-05 01:16:54,312 - root - INFO - lr: 4.8871e-05 gnorm: 1.22 [ 2:42:45<21:53:32] +[titan] 2025-10-05 01:17:05,164 - root - INFO - step: 4415 loss: 2.7761 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3306 global_avg_mtp_loss: 2.4455 +[titan] 2025-10-05 01:17:05,164 - root - INFO - lr: 4.8868e-05 gnorm: 1.24 [ 2:42:56<21:53:20] +[titan] 2025-10-05 01:17:16,059 - root - INFO - step: 4420 loss: 2.8777 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3426 global_avg_mtp_loss: 2.5351 +[titan] 2025-10-05 01:17:16,059 - root - INFO - lr: 4.8865e-05 gnorm: 1.27 [ 2:43:07<21:53:07] +[titan] 2025-10-05 01:17:26,943 - root - INFO - step: 4425 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4104 +[titan] 2025-10-05 01:17:26,943 - root - INFO - lr: 4.8862e-05 gnorm: 1.25 [ 2:43:18<21:52:55] +[titan] 2025-10-05 01:17:37,810 - root - INFO - step: 4430 loss: 2.8315 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3374 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:17:37,810 - root - INFO - lr: 4.8859e-05 gnorm: 1.24 [ 2:43:29<21:52:42] +[titan] 2025-10-05 01:17:48,674 - root - INFO - step: 4435 loss: 2.7874 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3310 global_avg_mtp_loss: 2.4564 +[titan] 2025-10-05 01:17:48,674 - root - INFO - lr: 4.8857e-05 gnorm: 1.29 [ 2:43:40<21:52:29] +[titan] 2025-10-05 01:17:59,549 - root - INFO - step: 4440 loss: 2.7652 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4365 +[titan] 2025-10-05 01:17:59,549 - root - INFO - lr: 4.8854e-05 gnorm: 1.25 [ 2:43:51<21:52:16] +[titan] 2025-10-05 01:18:10,464 - root - INFO - step: 4445 loss: 2.7634 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4344 +[titan] 2025-10-05 01:18:10,464 - root - INFO - lr: 4.8851e-05 gnorm: 1.21 [ 2:44:01<21:52:04] +[titan] 2025-10-05 01:18:19,181 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:18:21,402 - root - INFO - step: 4450 loss: 2.8198 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.3358 global_avg_mtp_loss: 2.4839 +[titan] 2025-10-05 01:18:21,402 - root - INFO - lr: 4.8848e-05 gnorm: 1.25 [ 2:44:12<21:51:52] +[titan] 2025-10-05 01:18:32,290 - root - INFO - step: 4455 loss: 2.8002 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4666 +[titan] 2025-10-05 01:18:32,290 - root - INFO - lr: 4.8845e-05 gnorm: 1.21 [ 2:44:23<21:51:39] +[titan] 2025-10-05 01:18:43,182 - root - INFO - step: 4460 loss: 2.7924 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4613 +[titan] 2025-10-05 01:18:43,183 - root - INFO - lr: 4.8842e-05 gnorm: 1.17 [ 2:44:34<21:51:27] +[titan] 2025-10-05 01:18:54,107 - root - INFO - step: 4465 loss: 2.8210 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.3364 global_avg_mtp_loss: 2.4846 +[titan] 2025-10-05 01:18:54,107 - root - INFO - lr: 4.8840e-05 gnorm: 1.23 [ 2:44:45<21:51:15] +[titan] 2025-10-05 01:19:04,974 - root - INFO - step: 4470 loss: 2.7913 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3317 global_avg_mtp_loss: 2.4596 +[titan] 2025-10-05 01:19:04,974 - root - INFO - lr: 4.8837e-05 gnorm: 1.21 [ 2:44:56<21:51:02] +[titan] 2025-10-05 01:19:15,845 - root - INFO - step: 4475 loss: 2.8258 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3360 global_avg_mtp_loss: 2.4898 +[titan] 2025-10-05 01:19:15,846 - root - INFO - lr: 4.8834e-05 gnorm: 1.28 [ 2:45:07<21:50:49] +[titan] 2025-10-05 01:19:26,715 - root - INFO - step: 4480 loss: 2.7821 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3307 global_avg_mtp_loss: 2.4514 +[titan] 2025-10-05 01:19:26,715 - root - INFO - lr: 4.8831e-05 gnorm: 1.29 [ 2:45:18<21:50:37] +[titan] 2025-10-05 01:19:37,611 - root - INFO - step: 4485 loss: 2.8154 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4798 +[titan] 2025-10-05 01:19:37,611 - root - INFO - lr: 4.8828e-05 gnorm: 1.28 [ 2:45:29<21:50:24] +[titan] 2025-10-05 01:19:48,473 - root - INFO - step: 4490 loss: 2.7910 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3319 global_avg_mtp_loss: 2.4591 +[titan] 2025-10-05 01:19:48,474 - root - INFO - lr: 4.8825e-05 gnorm: 1.39 [ 2:45:39<21:50:12] +[titan] 2025-10-05 01:19:59,362 - root - INFO - step: 4495 loss: 2.7586 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4304 +[titan] 2025-10-05 01:19:59,363 - root - INFO - lr: 4.8823e-05 gnorm: 1.26 [ 2:45:50<21:49:59] +[titan] 2025-10-05 01:20:08,035 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:20:10,224 - root - INFO - step: 4500 loss: 2.8484 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3393 global_avg_mtp_loss: 2.5091 +[titan] 2025-10-05 01:20:10,224 - root - INFO - lr: 4.8820e-05 gnorm: 1.25 [ 2:46:01<21:49:46] +[titan] 2025-10-05 01:20:21,077 - root - INFO - step: 4505 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3816 +[titan] 2025-10-05 01:20:21,077 - root - INFO - lr: 4.8817e-05 gnorm: 1.25 [ 2:46:12<21:49:34] +[titan] 2025-10-05 01:20:31,932 - root - INFO - step: 4510 loss: 2.8270 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3376 global_avg_mtp_loss: 2.4894 +[titan] 2025-10-05 01:20:31,932 - root - INFO - lr: 4.8814e-05 gnorm: 1.26 [ 2:46:23<21:49:21] +[titan] 2025-10-05 01:20:42,845 - root - INFO - step: 4515 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4207 +[titan] 2025-10-05 01:20:42,845 - root - INFO - lr: 4.8811e-05 gnorm: 1.20 [ 2:46:34<21:49:09] +[titan] 2025-10-05 01:20:53,800 - root - INFO - step: 4520 loss: 2.8244 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3356 global_avg_mtp_loss: 2.4888 +[titan] 2025-10-05 01:20:53,800 - root - INFO - lr: 4.8808e-05 gnorm: 1.37 [ 2:46:45<21:48:57] +[titan] 2025-10-05 01:21:04,708 - root - INFO - step: 4525 loss: 2.7186 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3964 +[titan] 2025-10-05 01:21:04,708 - root - INFO - lr: 4.8805e-05 gnorm: 1.27 [ 2:46:56<21:48:44] +[titan] 2025-10-05 01:21:15,602 - root - INFO - step: 4530 loss: 2.7206 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3236 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:21:15,602 - root - INFO - lr: 4.8803e-05 gnorm: 1.23 [ 2:47:07<21:48:32] +[titan] 2025-10-05 01:21:26,498 - root - INFO - step: 4535 loss: 2.7518 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3249 global_avg_mtp_loss: 2.4269 +[titan] 2025-10-05 01:21:26,498 - root - INFO - lr: 4.8800e-05 gnorm: 1.35 [ 2:47:17<21:48:19] +[titan] 2025-10-05 01:21:37,376 - root - INFO - step: 4540 loss: 2.7814 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3301 global_avg_mtp_loss: 2.4513 +[titan] 2025-10-05 01:21:37,376 - root - INFO - lr: 4.8797e-05 gnorm: 1.23 [ 2:47:28<21:48:07] +[titan] 2025-10-05 01:21:48,331 - root - INFO - step: 4545 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.3311 global_avg_mtp_loss: 2.4487 +[titan] 2025-10-05 01:21:48,331 - root - INFO - lr: 4.8794e-05 gnorm: 1.25 [ 2:47:39<21:47:55] +[titan] 2025-10-05 01:21:57,057 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:21:59,248 - root - INFO - step: 4550 loss: 2.8483 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.3389 global_avg_mtp_loss: 2.5094 +[titan] 2025-10-05 01:21:59,249 - root - INFO - lr: 4.8791e-05 gnorm: 1.26 [ 2:47:50<21:47:43] +[titan] 2025-10-05 01:22:10,102 - root - INFO - step: 4555 loss: 2.7389 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.4138 +[titan] 2025-10-05 01:22:10,103 - root - INFO - lr: 4.8788e-05 gnorm: 1.20 [ 2:48:01<21:47:30] +[titan] 2025-10-05 01:22:20,974 - root - INFO - step: 4560 loss: 2.7847 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4538 +[titan] 2025-10-05 01:22:20,974 - root - INFO - lr: 4.8785e-05 gnorm: 1.21 [ 2:48:12<21:47:17] +[titan] 2025-10-05 01:22:31,853 - root - INFO - step: 4565 loss: 2.7537 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:22:31,854 - root - INFO - lr: 4.8782e-05 gnorm: 1.27 [ 2:48:23<21:47:05] +[titan] 2025-10-05 01:22:42,729 - root - INFO - step: 4570 loss: 2.6580 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 01:22:42,729 - root - INFO - lr: 4.8779e-05 gnorm: 1.26 [ 2:48:34<21:46:52] +[titan] 2025-10-05 01:22:53,792 - root - INFO - step: 4575 loss: 2.8422 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.93 mfu: 41.55% global_avg_ntp_loss: 0.3385 global_avg_mtp_loss: 2.5037 +[titan] 2025-10-05 01:22:53,792 - root - INFO - lr: 4.8777e-05 gnorm: 1.26 [ 2:48:45<21:46:41] +[titan] 2025-10-05 01:23:04,721 - root - INFO - step: 4580 loss: 2.6906 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3732 +[titan] 2025-10-05 01:23:04,721 - root - INFO - lr: 4.8774e-05 gnorm: 1.18 [ 2:48:56<21:46:29] +[titan] 2025-10-05 01:23:15,616 - root - INFO - step: 4585 loss: 2.7509 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:23:15,616 - root - INFO - lr: 4.8771e-05 gnorm: 1.23 [ 2:49:07<21:46:17] +[titan] 2025-10-05 01:23:26,529 - root - INFO - step: 4590 loss: 2.7868 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4570 +[titan] 2025-10-05 01:23:26,530 - root - INFO - lr: 4.8768e-05 gnorm: 1.24 [ 2:49:18<21:46:04] +[titan] 2025-10-05 01:23:37,394 - root - INFO - step: 4595 loss: 2.7525 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3274 global_avg_mtp_loss: 2.4251 +[titan] 2025-10-05 01:23:37,394 - root - INFO - lr: 4.8765e-05 gnorm: 1.22 [ 2:49:28<21:45:52] +[titan] 2025-10-05 01:23:46,091 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:23:48,293 - root - INFO - step: 4600 loss: 2.7469 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4209 +[titan] 2025-10-05 01:23:48,293 - root - INFO - lr: 4.8762e-05 gnorm: 1.18 [ 2:49:39<21:45:39] +[titan] 2025-10-05 01:23:59,314 - root - INFO - step: 4605 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 29,734 tflops: 412.51 mfu: 41.71% global_avg_ntp_loss: 0.3574 global_avg_mtp_loss: 2.5076 +[titan] 2025-10-05 01:23:59,314 - root - INFO - lr: 4.8759e-05 gnorm: 1.23 [ 2:49:50<21:45:28] +[titan] 2025-10-05 01:24:06,006 - root - INFO - Dumping profiler traces at step 4608 +[titan] 2025-10-05 01:24:06,043 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:24:10,470 - root - INFO - step: 4610 loss: 2.7849 memory: 118.84GiB(85.28%) tps: 29,373 tflops: 407.50 mfu: 41.20% global_avg_ntp_loss: 0.3315 global_avg_mtp_loss: 2.4534 +[titan] 2025-10-05 01:24:10,471 - root - INFO - lr: 4.8756e-05 gnorm: 1.28 [ 2:50:01<21:45:18] +[titan] 2025-10-05 01:24:21,351 - root - INFO - step: 4615 loss: 2.7549 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4283 +[titan] 2025-10-05 01:24:21,351 - root - INFO - lr: 4.8753e-05 gnorm: 1.21 [ 2:50:12<21:45:05] +[titan] 2025-10-05 01:24:32,230 - root - INFO - step: 4620 loss: 2.6761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3586 +[titan] 2025-10-05 01:24:32,230 - root - INFO - lr: 4.8750e-05 gnorm: 1.22 [ 2:50:23<21:44:53] +[titan] 2025-10-05 01:24:43,126 - root - INFO - step: 4625 loss: 2.6974 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:24:43,126 - root - INFO - lr: 4.8747e-05 gnorm: 1.18 [ 2:50:34<21:44:40] +[titan] 2025-10-05 01:24:54,032 - root - INFO - step: 4630 loss: 2.8650 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.3412 global_avg_mtp_loss: 2.5238 +[titan] 2025-10-05 01:24:54,032 - root - INFO - lr: 4.8744e-05 gnorm: 1.24 [ 2:50:45<21:44:28] +[titan] 2025-10-05 01:25:04,940 - root - INFO - step: 4635 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4155 +[titan] 2025-10-05 01:25:04,940 - root - INFO - lr: 4.8741e-05 gnorm: 1.21 [ 2:50:56<21:44:16] +[titan] 2025-10-05 01:25:15,817 - root - INFO - step: 4640 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3562 +[titan] 2025-10-05 01:25:15,817 - root - INFO - lr: 4.8739e-05 gnorm: 1.31 [ 2:51:07<21:44:03] +[titan] 2025-10-05 01:25:26,720 - root - INFO - step: 4645 loss: 2.8294 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3352 global_avg_mtp_loss: 2.4941 +[titan] 2025-10-05 01:25:26,721 - root - INFO - lr: 4.8736e-05 gnorm: 1.23 [ 2:51:18<21:43:51] +[titan] 2025-10-05 01:25:35,459 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:25:37,643 - root - INFO - step: 4650 loss: 2.6937 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3193 global_avg_mtp_loss: 2.3743 +[titan] 2025-10-05 01:25:37,643 - root - INFO - lr: 4.8733e-05 gnorm: 1.23 [ 2:51:29<21:43:39] +[titan] 2025-10-05 01:25:48,524 - root - INFO - step: 4655 loss: 2.7402 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4144 +[titan] 2025-10-05 01:25:48,525 - root - INFO - lr: 4.8730e-05 gnorm: 1.22 [ 2:51:39<21:43:26] +[titan] 2025-10-05 01:25:59,422 - root - INFO - step: 4660 loss: 2.7820 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4520 +[titan] 2025-10-05 01:25:59,422 - root - INFO - lr: 4.8727e-05 gnorm: 1.30 [ 2:51:50<21:43:14] +[titan] 2025-10-05 01:26:10,311 - root - INFO - step: 4665 loss: 2.8466 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3392 global_avg_mtp_loss: 2.5074 +[titan] 2025-10-05 01:26:10,311 - root - INFO - lr: 4.8724e-05 gnorm: 1.25 [ 2:52:01<21:43:02] +[titan] 2025-10-05 01:26:21,210 - root - INFO - step: 4670 loss: 2.7305 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4066 +[titan] 2025-10-05 01:26:21,210 - root - INFO - lr: 4.8721e-05 gnorm: 1.25 [ 2:52:12<21:42:49] +[titan] 2025-10-05 01:26:32,122 - root - INFO - step: 4675 loss: 2.7530 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.3266 global_avg_mtp_loss: 2.4264 +[titan] 2025-10-05 01:26:32,122 - root - INFO - lr: 4.8718e-05 gnorm: 1.25 [ 2:52:23<21:42:37] +[titan] 2025-10-05 01:26:43,055 - root - INFO - step: 4680 loss: 2.8067 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4732 +[titan] 2025-10-05 01:26:43,055 - root - INFO - lr: 4.8715e-05 gnorm: 1.24 [ 2:52:34<21:42:25] +[titan] 2025-10-05 01:26:53,990 - root - INFO - step: 4685 loss: 2.6707 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 01:26:53,990 - root - INFO - lr: 4.8712e-05 gnorm: 1.36 [ 2:52:45<21:42:13] +[titan] 2025-10-05 01:27:04,906 - root - INFO - step: 4690 loss: 2.7149 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3214 global_avg_mtp_loss: 2.3935 +[titan] 2025-10-05 01:27:04,906 - root - INFO - lr: 4.8709e-05 gnorm: 1.23 [ 2:52:56<21:42:01] +[titan] 2025-10-05 01:27:15,817 - root - INFO - step: 4695 loss: 2.6965 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3761 +[titan] 2025-10-05 01:27:15,817 - root - INFO - lr: 4.8706e-05 gnorm: 1.25 [ 2:53:07<21:41:49] +[titan] 2025-10-05 01:27:24,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:27:26,735 - root - INFO - step: 4700 loss: 2.7982 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4650 +[titan] 2025-10-05 01:27:26,735 - root - INFO - lr: 4.8703e-05 gnorm: 1.22 [ 2:53:18<21:41:37] +[titan] 2025-10-05 01:27:37,672 - root - INFO - step: 4705 loss: 2.8937 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.3443 global_avg_mtp_loss: 2.5494 +[titan] 2025-10-05 01:27:37,673 - root - INFO - lr: 4.8700e-05 gnorm: 1.26 [ 2:53:29<21:41:25] +[titan] 2025-10-05 01:27:48,615 - root - INFO - step: 4710 loss: 2.7471 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.3269 global_avg_mtp_loss: 2.4201 +[titan] 2025-10-05 01:27:48,616 - root - INFO - lr: 4.8697e-05 gnorm: 1.21 [ 2:53:40<21:41:13] +[titan] 2025-10-05 01:27:59,548 - root - INFO - step: 4715 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.3271 global_avg_mtp_loss: 2.4303 +[titan] 2025-10-05 01:27:59,548 - root - INFO - lr: 4.8694e-05 gnorm: 1.22 [ 2:53:51<21:41:01] +[titan] 2025-10-05 01:28:10,470 - root - INFO - step: 4720 loss: 2.8297 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.3377 global_avg_mtp_loss: 2.4920 +[titan] 2025-10-05 01:28:10,471 - root - INFO - lr: 4.8691e-05 gnorm: 1.25 [ 2:54:01<21:40:49] +[titan] 2025-10-05 01:28:21,389 - root - INFO - step: 4725 loss: 2.8079 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4747 +[titan] 2025-10-05 01:28:21,389 - root - INFO - lr: 4.8688e-05 gnorm: 1.25 [ 2:54:12<21:40:36] +[titan] 2025-10-05 01:28:32,287 - root - INFO - step: 4730 loss: 2.7460 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3255 global_avg_mtp_loss: 2.4205 +[titan] 2025-10-05 01:28:32,287 - root - INFO - lr: 4.8685e-05 gnorm: 1.27 [ 2:54:23<21:40:24] +[titan] 2025-10-05 01:28:43,251 - root - INFO - step: 4735 loss: 2.6848 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3668 +[titan] 2025-10-05 01:28:43,252 - root - INFO - lr: 4.8682e-05 gnorm: 1.24 [ 2:54:34<21:40:12] +[titan] 2025-10-05 01:28:54,171 - root - INFO - step: 4740 loss: 2.7918 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4618 +[titan] 2025-10-05 01:28:54,171 - root - INFO - lr: 4.8679e-05 gnorm: 1.32 [ 2:54:45<21:40:00] +[titan] 2025-10-05 01:29:05,077 - root - INFO - step: 4745 loss: 2.7361 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4131 +[titan] 2025-10-05 01:29:05,077 - root - INFO - lr: 4.8676e-05 gnorm: 1.29 [ 2:54:56<21:39:48] +[titan] 2025-10-05 01:29:13,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:29:15,984 - root - INFO - step: 4750 loss: 2.7499 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4232 +[titan] 2025-10-05 01:29:15,984 - root - INFO - lr: 4.8673e-05 gnorm: 1.26 [ 2:55:07<21:39:36] +[titan] 2025-10-05 01:29:26,874 - root - INFO - step: 4755 loss: 2.7721 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.3285 global_avg_mtp_loss: 2.4435 +[titan] 2025-10-05 01:29:26,874 - root - INFO - lr: 4.8670e-05 gnorm: 1.19 [ 2:55:18<21:39:23] +[titan] 2025-10-05 01:29:37,761 - root - INFO - step: 4760 loss: 2.7947 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3353 global_avg_mtp_loss: 2.4595 +[titan] 2025-10-05 01:29:37,761 - root - INFO - lr: 4.8667e-05 gnorm: 1.22 [ 2:55:29<21:39:11] +[titan] 2025-10-05 01:29:48,663 - root - INFO - step: 4765 loss: 2.7250 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.3240 global_avg_mtp_loss: 2.4010 +[titan] 2025-10-05 01:29:48,664 - root - INFO - lr: 4.8664e-05 gnorm: 1.28 [ 2:55:40<21:38:59] +[titan] 2025-10-05 01:29:59,563 - root - INFO - step: 4770 loss: 2.7157 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.3221 global_avg_mtp_loss: 2.3936 +[titan] 2025-10-05 01:29:59,563 - root - INFO - lr: 4.8661e-05 gnorm: 2.78 [ 2:55:51<21:38:47] +[titan] 2025-10-05 01:30:10,469 - root - INFO - step: 4775 loss: 2.8036 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.3335 global_avg_mtp_loss: 2.4701 +[titan] 2025-10-05 01:30:10,470 - root - INFO - lr: 4.8658e-05 gnorm: 1.25 [ 2:56:01<21:38:34] +[titan] 2025-10-05 01:30:21,348 - root - INFO - step: 4780 loss: 2.7215 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:30:21,348 - root - INFO - lr: 4.8655e-05 gnorm: 1.38 [ 2:56:12<21:38:22] +[titan] 2025-10-05 01:30:32,231 - root - INFO - step: 4785 loss: 2.7709 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4425 +[titan] 2025-10-05 01:30:32,231 - root - INFO - lr: 4.8652e-05 gnorm: 1.21 [ 2:56:23<21:38:10] +[titan] 2025-10-05 01:30:43,113 - root - INFO - step: 4790 loss: 2.7171 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.3934 +[titan] 2025-10-05 01:30:43,113 - root - INFO - lr: 4.8649e-05 gnorm: 1.19 [ 2:56:34<21:37:57] +[titan] 2025-10-05 01:30:54,053 - root - INFO - step: 4795 loss: 2.8155 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.3424 global_avg_mtp_loss: 2.4731 +[titan] 2025-10-05 01:30:54,054 - root - INFO - lr: 4.8646e-05 gnorm: 1.20 [ 2:56:45<21:37:45] +[titan] 2025-10-05 01:31:02,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:31:04,944 - root - INFO - step: 4800 loss: 2.7229 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4004 +[titan] 2025-10-05 01:31:04,944 - root - INFO - lr: 4.8643e-05 gnorm: 1.27 [ 2:56:56<21:37:33] +[titan] 2025-10-05 01:31:15,845 - root - INFO - step: 4805 loss: 2.7633 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4354 +[titan] 2025-10-05 01:31:15,845 - root - INFO - lr: 4.8639e-05 gnorm: 1.30 [ 2:57:07<21:37:21] +[titan] 2025-10-05 01:31:26,718 - root - INFO - step: 4810 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 01:31:26,718 - root - INFO - lr: 4.8636e-05 gnorm: 1.23 [ 2:57:18<21:37:08] +[titan] 2025-10-05 01:31:37,587 - root - INFO - step: 4815 loss: 2.7353 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4100 +[titan] 2025-10-05 01:31:37,587 - root - INFO - lr: 4.8633e-05 gnorm: 1.25 [ 2:57:29<21:36:56] +[titan] 2025-10-05 01:31:48,487 - root - INFO - step: 4820 loss: 2.7752 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.3297 global_avg_mtp_loss: 2.4454 +[titan] 2025-10-05 01:31:48,487 - root - INFO - lr: 4.8630e-05 gnorm: 1.24 [ 2:57:39<21:36:44] +[titan] 2025-10-05 01:31:59,366 - root - INFO - step: 4825 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3299 global_avg_mtp_loss: 2.4440 +[titan] 2025-10-05 01:31:59,366 - root - INFO - lr: 4.8627e-05 gnorm: 1.27 [ 2:57:50<21:36:31] +[titan] 2025-10-05 01:32:10,285 - root - INFO - step: 4830 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3289 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:32:10,285 - root - INFO - lr: 4.8624e-05 gnorm: 1.25 [ 2:58:01<21:36:19] +[titan] 2025-10-05 01:32:21,158 - root - INFO - step: 4835 loss: 2.7916 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4584 +[titan] 2025-10-05 01:32:21,158 - root - INFO - lr: 4.8621e-05 gnorm: 1.23 [ 2:58:12<21:36:07] +[titan] 2025-10-05 01:32:32,019 - root - INFO - step: 4840 loss: 2.7798 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3305 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:32:32,020 - root - INFO - lr: 4.8618e-05 gnorm: 1.25 [ 2:58:23<21:35:54] +[titan] 2025-10-05 01:32:42,890 - root - INFO - step: 4845 loss: 2.7622 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.3280 global_avg_mtp_loss: 2.4341 +[titan] 2025-10-05 01:32:42,890 - root - INFO - lr: 4.8615e-05 gnorm: 1.24 [ 2:58:34<21:35:42] +[titan] 2025-10-05 01:32:51,570 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:32:53,752 - root - INFO - step: 4850 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3209 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 01:32:53,752 - root - INFO - lr: 4.8612e-05 gnorm: 1.26 [ 2:58:45<21:35:30] +[titan] 2025-10-05 01:33:04,624 - root - INFO - step: 4855 loss: 2.7888 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4588 +[titan] 2025-10-05 01:33:04,624 - root - INFO - lr: 4.8609e-05 gnorm: 1.30 [ 2:58:56<21:35:17] +[titan] 2025-10-05 01:33:15,520 - root - INFO - step: 4860 loss: 2.6936 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3751 +[titan] 2025-10-05 01:33:15,521 - root - INFO - lr: 4.8606e-05 gnorm: 1.24 [ 2:59:06<21:35:05] +[titan] 2025-10-05 01:33:26,393 - root - INFO - step: 4865 loss: 2.8919 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3439 global_avg_mtp_loss: 2.5480 +[titan] 2025-10-05 01:33:26,393 - root - INFO - lr: 4.8603e-05 gnorm: 1.25 [ 2:59:17<21:34:53] +[titan] 2025-10-05 01:33:37,259 - root - INFO - step: 4870 loss: 2.7240 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 01:33:37,259 - root - INFO - lr: 4.8599e-05 gnorm: 1.24 [ 2:59:28<21:34:40] +[titan] 2025-10-05 01:33:48,148 - root - INFO - step: 4875 loss: 2.7694 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.3278 global_avg_mtp_loss: 2.4417 +[titan] 2025-10-05 01:33:48,148 - root - INFO - lr: 4.8596e-05 gnorm: 1.26 [ 2:59:39<21:34:28] +[titan] 2025-10-05 01:33:59,034 - root - INFO - step: 4880 loss: 2.7227 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4000 +[titan] 2025-10-05 01:33:59,035 - root - INFO - lr: 4.8593e-05 gnorm: 1.27 [ 2:59:50<21:34:16] +[titan] 2025-10-05 01:34:09,948 - root - INFO - step: 4885 loss: 2.7234 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4009 +[titan] 2025-10-05 01:34:09,948 - root - INFO - lr: 4.8590e-05 gnorm: 1.20 [ 3:00:01<21:34:04] +[titan] 2025-10-05 01:34:20,817 - root - INFO - step: 4890 loss: 2.7314 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3231 global_avg_mtp_loss: 2.4083 +[titan] 2025-10-05 01:34:20,818 - root - INFO - lr: 4.8587e-05 gnorm: 1.33 [ 3:00:12<21:33:51] +[titan] 2025-10-05 01:34:31,730 - root - INFO - step: 4895 loss: 2.7077 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3210 global_avg_mtp_loss: 2.3867 +[titan] 2025-10-05 01:34:31,730 - root - INFO - lr: 4.8584e-05 gnorm: 1.29 [ 3:00:23<21:33:39] +[titan] 2025-10-05 01:34:40,425 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:34:42,619 - root - INFO - step: 4900 loss: 2.7734 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.3292 global_avg_mtp_loss: 2.4443 +[titan] 2025-10-05 01:34:42,620 - root - INFO - lr: 4.8581e-05 gnorm: 1.28 [ 3:00:34<21:33:27] +[titan] 2025-10-05 01:34:53,494 - root - INFO - step: 4905 loss: 2.7406 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3260 global_avg_mtp_loss: 2.4146 +[titan] 2025-10-05 01:34:53,495 - root - INFO - lr: 4.8578e-05 gnorm: 1.17 [ 3:00:44<21:33:14] +[titan] 2025-10-05 01:35:04,450 - root - INFO - step: 4910 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.3232 global_avg_mtp_loss: 2.3981 +[titan] 2025-10-05 01:35:04,451 - root - INFO - lr: 4.8575e-05 gnorm: 1.20 [ 3:00:55<21:33:03] +[titan] 2025-10-05 01:35:15,335 - root - INFO - step: 4915 loss: 2.7382 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4140 +[titan] 2025-10-05 01:35:15,335 - root - INFO - lr: 4.8571e-05 gnorm: 1.28 [ 3:01:06<21:32:50] +[titan] 2025-10-05 01:35:26,233 - root - INFO - step: 4920 loss: 2.7952 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.3320 global_avg_mtp_loss: 2.4631 +[titan] 2025-10-05 01:35:26,233 - root - INFO - lr: 4.8568e-05 gnorm: 1.29 [ 3:01:17<21:32:38] +[titan] 2025-10-05 01:35:37,136 - root - INFO - step: 4925 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4403 +[titan] 2025-10-05 01:35:37,136 - root - INFO - lr: 4.8565e-05 gnorm: 1.25 [ 3:01:28<21:32:26] +[titan] 2025-10-05 01:35:48,014 - root - INFO - step: 4930 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4081 +[titan] 2025-10-05 01:35:48,014 - root - INFO - lr: 4.8562e-05 gnorm: 1.21 [ 3:01:39<21:32:14] +[titan] 2025-10-05 01:35:58,895 - root - INFO - step: 4935 loss: 2.7204 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.3970 +[titan] 2025-10-05 01:35:58,895 - root - INFO - lr: 4.8559e-05 gnorm: 1.20 [ 3:01:50<21:32:01] +[titan] 2025-10-05 01:36:09,806 - root - INFO - step: 4940 loss: 2.7788 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4493 +[titan] 2025-10-05 01:36:09,806 - root - INFO - lr: 4.8556e-05 gnorm: 1.21 [ 3:02:01<21:31:49] +[titan] 2025-10-05 01:36:20,731 - root - INFO - step: 4945 loss: 2.7547 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.3304 global_avg_mtp_loss: 2.4243 +[titan] 2025-10-05 01:36:20,732 - root - INFO - lr: 4.8553e-05 gnorm: 1.23 [ 3:02:12<21:31:37] +[titan] 2025-10-05 01:36:29,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:36:31,611 - root - INFO - step: 4950 loss: 2.6438 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.3129 global_avg_mtp_loss: 2.3309 +[titan] 2025-10-05 01:36:31,611 - root - INFO - lr: 4.8549e-05 gnorm: 1.20 [ 3:02:23<21:31:25] +[titan] 2025-10-05 01:36:42,497 - root - INFO - step: 4955 loss: 2.7743 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.3293 global_avg_mtp_loss: 2.4450 +[titan] 2025-10-05 01:36:42,497 - root - INFO - lr: 4.8546e-05 gnorm: 1.29 [ 3:02:33<21:31:13] +[titan] 2025-10-05 01:36:53,369 - root - INFO - step: 4960 loss: 2.7846 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4551 +[titan] 2025-10-05 01:36:53,369 - root - INFO - lr: 4.8543e-05 gnorm: 1.25 [ 3:02:44<21:31:01] +[titan] 2025-10-05 01:37:04,267 - root - INFO - step: 4965 loss: 2.8172 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.3344 global_avg_mtp_loss: 2.4828 +[titan] 2025-10-05 01:37:04,267 - root - INFO - lr: 4.8540e-05 gnorm: 1.25 [ 3:02:55<21:30:48] +[titan] 2025-10-05 01:37:15,212 - root - INFO - step: 4970 loss: 2.6436 memory: 118.84GiB(85.28%) tps: 29,939 tflops: 415.36 mfu: 42.00% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3310 +[titan] 2025-10-05 01:37:15,212 - root - INFO - lr: 4.8537e-05 gnorm: 1.25 [ 3:03:06<21:30:37] +[titan] 2025-10-05 01:37:26,159 - root - INFO - step: 4975 loss: 2.7551 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4289 +[titan] 2025-10-05 01:37:26,159 - root - INFO - lr: 4.8534e-05 gnorm: 1.22 [ 3:03:17<21:30:25] +[titan] 2025-10-05 01:37:37,030 - root - INFO - step: 4980 loss: 2.7052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3218 global_avg_mtp_loss: 2.3834 +[titan] 2025-10-05 01:37:37,031 - root - INFO - lr: 4.8530e-05 gnorm: 1.26 [ 3:03:28<21:30:12] +[titan] 2025-10-05 01:37:47,943 - root - INFO - step: 4985 loss: 2.7357 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3263 global_avg_mtp_loss: 2.4094 +[titan] 2025-10-05 01:37:47,944 - root - INFO - lr: 4.8527e-05 gnorm: 1.27 [ 3:03:39<21:30:00] +[titan] 2025-10-05 01:37:58,856 - root - INFO - step: 4990 loss: 2.7950 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4628 +[titan] 2025-10-05 01:37:58,857 - root - INFO - lr: 4.8524e-05 gnorm: 1.22 [ 3:03:50<21:29:48] +[titan] 2025-10-05 01:38:09,823 - root - INFO - step: 4995 loss: 2.7375 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.3261 global_avg_mtp_loss: 2.4114 +[titan] 2025-10-05 01:38:09,823 - root - INFO - lr: 4.8521e-05 gnorm: 1.18 [ 3:04:01<21:29:37] +[titan] 2025-10-05 01:38:18,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:38:20,753 - root - INFO - step: 5000 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3200 global_avg_mtp_loss: 2.3792 +[titan] 2025-10-05 01:38:20,753 - root - INFO - lr: 4.8518e-05 gnorm: 1.26 [ 3:04:12<21:29:25] +[titan] 2025-10-05 01:38:20,753 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 01:38:42,127 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 01:38:42,127 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 21.37 seconds. +[titan] 2025-10-05 01:40:51,998 - root - INFO - step: 5005 loss: 2.7858 memory: 118.84GiB(85.28%) tps: 2,167 tflops: 30.06 mfu: 3.04% global_avg_ntp_loss: 0.3309 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:40:51,998 - root - INFO - lr: 4.8515e-05 gnorm: 1.27 [ 3:06:43<21:45:34] +[titan] 2025-10-05 01:41:02,796 - root - INFO - step: 5010 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3270 global_avg_mtp_loss: 2.4291 +[titan] 2025-10-05 01:41:02,796 - root - INFO - lr: 4.8511e-05 gnorm: 1.34 [ 3:06:54<21:45:20] +[titan] 2025-10-05 01:41:13,614 - root - INFO - step: 5015 loss: 2.7561 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.3283 global_avg_mtp_loss: 2.4278 +[titan] 2025-10-05 01:41:13,614 - root - INFO - lr: 4.8508e-05 gnorm: 1.32 [ 3:07:05<21:45:06] +[titan] 2025-10-05 01:41:24,485 - root - INFO - step: 5020 loss: 2.7573 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4305 +[titan] 2025-10-05 01:41:24,485 - root - INFO - lr: 4.8505e-05 gnorm: 1.31 [ 3:07:15<21:44:53] +[titan] 2025-10-05 01:41:35,321 - root - INFO - step: 5025 loss: 2.7060 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3836 +[titan] 2025-10-05 01:41:35,321 - root - INFO - lr: 4.8502e-05 gnorm: 1.27 [ 3:07:26<21:44:39] +[titan] 2025-10-05 01:41:46,205 - root - INFO - step: 5030 loss: 2.7304 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3247 global_avg_mtp_loss: 2.4057 +[titan] 2025-10-05 01:41:46,205 - root - INFO - lr: 4.8499e-05 gnorm: 1.28 [ 3:07:37<21:44:26] +[titan] 2025-10-05 01:41:57,092 - root - INFO - step: 5035 loss: 2.7485 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3248 global_avg_mtp_loss: 2.4237 +[titan] 2025-10-05 01:41:57,093 - root - INFO - lr: 4.8495e-05 gnorm: 1.26 [ 3:07:48<21:44:13] +[titan] 2025-10-05 01:42:08,008 - root - INFO - step: 5040 loss: 2.7641 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4360 +[titan] 2025-10-05 01:42:08,008 - root - INFO - lr: 4.8492e-05 gnorm: 1.18 [ 3:07:59<21:43:59] +[titan] 2025-10-05 01:42:18,888 - root - INFO - step: 5045 loss: 2.6254 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3136 +[titan] 2025-10-05 01:42:18,888 - root - INFO - lr: 4.8489e-05 gnorm: 1.29 [ 3:08:10<21:43:46] +[titan] 2025-10-05 01:42:27,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:42:29,897 - root - INFO - step: 5050 loss: 2.7825 memory: 118.84GiB(85.28%) tps: 29,766 tflops: 412.96 mfu: 41.75% global_avg_ntp_loss: 0.3295 global_avg_mtp_loss: 2.4531 +[titan] 2025-10-05 01:42:29,897 - root - INFO - lr: 4.8486e-05 gnorm: 1.24 [ 3:08:21<21:43:34] +[titan] 2025-10-05 01:42:40,766 - root - INFO - step: 5055 loss: 2.7808 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3303 global_avg_mtp_loss: 2.4505 +[titan] 2025-10-05 01:42:40,766 - root - INFO - lr: 4.8483e-05 gnorm: 1.22 [ 3:08:32<21:43:20] +[titan] 2025-10-05 01:42:51,649 - root - INFO - step: 5060 loss: 2.6497 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3358 +[titan] 2025-10-05 01:42:51,649 - root - INFO - lr: 4.8479e-05 gnorm: 1.25 [ 3:08:43<21:43:07] +[titan] 2025-10-05 01:43:02,533 - root - INFO - step: 5065 loss: 2.7482 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 01:43:02,533 - root - INFO - lr: 4.8476e-05 gnorm: 1.21 [ 3:08:53<21:42:54] +[titan] 2025-10-05 01:43:13,418 - root - INFO - step: 5070 loss: 2.8515 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3494 global_avg_mtp_loss: 2.5021 +[titan] 2025-10-05 01:43:13,418 - root - INFO - lr: 4.8473e-05 gnorm: 1.24 [ 3:09:04<21:42:40] +[titan] 2025-10-05 01:43:24,295 - root - INFO - step: 5075 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3955 +[titan] 2025-10-05 01:43:24,295 - root - INFO - lr: 4.8470e-05 gnorm: 1.23 [ 3:09:15<21:42:27] +[titan] 2025-10-05 01:43:35,165 - root - INFO - step: 5080 loss: 2.6731 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3557 +[titan] 2025-10-05 01:43:35,166 - root - INFO - lr: 4.8466e-05 gnorm: 1.24 [ 3:09:26<21:42:14] +[titan] 2025-10-05 01:43:46,043 - root - INFO - step: 5085 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 01:43:46,043 - root - INFO - lr: 4.8463e-05 gnorm: 1.24 [ 3:09:37<21:42:00] +[titan] 2025-10-05 01:43:56,916 - root - INFO - step: 5090 loss: 2.7316 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4079 +[titan] 2025-10-05 01:43:56,916 - root - INFO - lr: 4.8460e-05 gnorm: 1.35 [ 3:09:48<21:41:47] +[titan] 2025-10-05 01:44:07,778 - root - INFO - step: 5095 loss: 2.7611 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3284 global_avg_mtp_loss: 2.4327 +[titan] 2025-10-05 01:44:07,778 - root - INFO - lr: 4.8457e-05 gnorm: 1.27 [ 3:09:59<21:41:34] +[titan] 2025-10-05 01:44:16,486 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:44:18,671 - root - INFO - step: 5100 loss: 2.6824 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3640 +[titan] 2025-10-05 01:44:18,671 - root - INFO - lr: 4.8453e-05 gnorm: 1.28 [ 3:10:10<21:41:20] +[titan] 2025-10-05 01:44:29,534 - root - INFO - step: 5105 loss: 2.8231 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3450 global_avg_mtp_loss: 2.4782 +[titan] 2025-10-05 01:44:29,534 - root - INFO - lr: 4.8450e-05 gnorm: 1.26 [ 3:10:20<21:41:07] +[titan] 2025-10-05 01:44:40,413 - root - INFO - step: 5110 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.3923 +[titan] 2025-10-05 01:44:40,413 - root - INFO - lr: 4.8447e-05 gnorm: 1.23 [ 3:10:31<21:40:54] +[titan] 2025-10-05 01:44:51,299 - root - INFO - step: 5115 loss: 2.6959 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3758 +[titan] 2025-10-05 01:44:51,300 - root - INFO - lr: 4.8444e-05 gnorm: 1.26 [ 3:10:42<21:40:40] +[titan] 2025-10-05 01:45:02,275 - root - INFO - step: 5120 loss: 2.7516 memory: 118.84GiB(85.28%) tps: 29,856 tflops: 414.21 mfu: 41.88% global_avg_ntp_loss: 0.3259 global_avg_mtp_loss: 2.4257 +[titan] 2025-10-05 01:45:02,275 - root - INFO - lr: 4.8440e-05 gnorm: 1.21 [ 3:10:53<21:40:28] +[titan] 2025-10-05 01:45:02,451 - root - INFO - Dumping profiler traces at step 5120 +[titan] 2025-10-05 01:45:02,490 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 01:45:13,379 - root - INFO - step: 5125 loss: 2.7714 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.3288 global_avg_mtp_loss: 2.4427 +[titan] 2025-10-05 01:45:13,379 - root - INFO - lr: 4.8437e-05 gnorm: 1.24 [ 3:11:04<21:40:16] +[titan] 2025-10-05 01:45:24,262 - root - INFO - step: 5130 loss: 2.6786 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3174 global_avg_mtp_loss: 2.3612 +[titan] 2025-10-05 01:45:24,263 - root - INFO - lr: 4.8434e-05 gnorm: 1.22 [ 3:11:15<21:40:03] +[titan] 2025-10-05 01:45:35,196 - root - INFO - step: 5135 loss: 2.8034 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3332 global_avg_mtp_loss: 2.4702 +[titan] 2025-10-05 01:45:35,196 - root - INFO - lr: 4.8431e-05 gnorm: 1.27 [ 3:11:26<21:39:50] +[titan] 2025-10-05 01:45:46,094 - root - INFO - step: 5140 loss: 2.7216 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3233 global_avg_mtp_loss: 2.3983 +[titan] 2025-10-05 01:45:46,094 - root - INFO - lr: 4.8427e-05 gnorm: 1.26 [ 3:11:37<21:39:37] +[titan] 2025-10-05 01:45:56,991 - root - INFO - step: 5145 loss: 2.7084 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3869 +[titan] 2025-10-05 01:45:56,991 - root - INFO - lr: 4.8424e-05 gnorm: 1.23 [ 3:11:48<21:39:24] +[titan] 2025-10-05 01:46:05,684 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:46:07,870 - root - INFO - step: 5150 loss: 2.7550 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4284 +[titan] 2025-10-05 01:46:07,870 - root - INFO - lr: 4.8421e-05 gnorm: 1.28 [ 3:11:59<21:39:10] +[titan] 2025-10-05 01:46:18,768 - root - INFO - step: 5155 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3142 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 01:46:18,768 - root - INFO - lr: 4.8417e-05 gnorm: 1.20 [ 3:12:10<21:38:57] +[titan] 2025-10-05 01:46:29,716 - root - INFO - step: 5160 loss: 2.7141 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3918 +[titan] 2025-10-05 01:46:29,716 - root - INFO - lr: 4.8414e-05 gnorm: 1.22 [ 3:12:21<21:38:45] +[titan] 2025-10-05 01:46:40,611 - root - INFO - step: 5165 loss: 2.7431 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3245 global_avg_mtp_loss: 2.4185 +[titan] 2025-10-05 01:46:40,611 - root - INFO - lr: 4.8411e-05 gnorm: 1.18 [ 3:12:32<21:38:31] +[titan] 2025-10-05 01:46:51,503 - root - INFO - step: 5170 loss: 2.6610 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 01:46:51,503 - root - INFO - lr: 4.8408e-05 gnorm: 1.21 [ 3:12:42<21:38:18] +[titan] 2025-10-05 01:47:02,418 - root - INFO - step: 5175 loss: 2.7319 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.3242 global_avg_mtp_loss: 2.4077 +[titan] 2025-10-05 01:47:02,418 - root - INFO - lr: 4.8404e-05 gnorm: 1.21 [ 3:12:53<21:38:05] +[titan] 2025-10-05 01:47:13,333 - root - INFO - step: 5180 loss: 2.7303 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:47:13,333 - root - INFO - lr: 4.8401e-05 gnorm: 1.24 [ 3:13:04<21:37:52] +[titan] 2025-10-05 01:47:24,247 - root - INFO - step: 5185 loss: 2.6746 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 01:47:24,247 - root - INFO - lr: 4.8398e-05 gnorm: 1.22 [ 3:13:15<21:37:39] +[titan] 2025-10-05 01:47:35,216 - root - INFO - step: 5190 loss: 2.7738 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.3282 global_avg_mtp_loss: 2.4456 +[titan] 2025-10-05 01:47:35,216 - root - INFO - lr: 4.8394e-05 gnorm: 1.31 [ 3:13:26<21:37:27] +[titan] 2025-10-05 01:47:46,124 - root - INFO - step: 5195 loss: 2.8394 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3384 global_avg_mtp_loss: 2.5009 +[titan] 2025-10-05 01:47:46,124 - root - INFO - lr: 4.8391e-05 gnorm: 1.27 [ 3:13:37<21:37:14] +[titan] 2025-10-05 01:47:54,837 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:47:57,027 - root - INFO - step: 5200 loss: 2.7263 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3237 global_avg_mtp_loss: 2.4026 +[titan] 2025-10-05 01:47:57,027 - root - INFO - lr: 4.8388e-05 gnorm: 1.24 [ 3:13:48<21:37:01] +[titan] 2025-10-05 01:48:07,915 - root - INFO - step: 5205 loss: 2.7277 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.4038 +[titan] 2025-10-05 01:48:07,915 - root - INFO - lr: 4.8384e-05 gnorm: 1.21 [ 3:13:59<21:36:47] +[titan] 2025-10-05 01:48:18,830 - root - INFO - step: 5210 loss: 2.6835 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3172 global_avg_mtp_loss: 2.3663 +[titan] 2025-10-05 01:48:18,830 - root - INFO - lr: 4.8381e-05 gnorm: 1.22 [ 3:14:10<21:36:35] +[titan] 2025-10-05 01:48:29,733 - root - INFO - step: 5215 loss: 2.6886 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3701 +[titan] 2025-10-05 01:48:29,733 - root - INFO - lr: 4.8378e-05 gnorm: 1.23 [ 3:14:21<21:36:22] +[titan] 2025-10-05 01:48:40,645 - root - INFO - step: 5220 loss: 2.7098 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 01:48:40,645 - root - INFO - lr: 4.8374e-05 gnorm: 1.25 [ 3:14:32<21:36:09] +[titan] 2025-10-05 01:48:51,536 - root - INFO - step: 5225 loss: 2.8169 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3339 global_avg_mtp_loss: 2.4830 +[titan] 2025-10-05 01:48:51,536 - root - INFO - lr: 4.8371e-05 gnorm: 1.24 [ 3:14:42<21:35:55] +[titan] 2025-10-05 01:49:02,433 - root - INFO - step: 5230 loss: 2.7455 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4197 +[titan] 2025-10-05 01:49:02,433 - root - INFO - lr: 4.8368e-05 gnorm: 1.26 [ 3:14:53<21:35:42] +[titan] 2025-10-05 01:49:13,324 - root - INFO - step: 5235 loss: 2.7873 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.3324 global_avg_mtp_loss: 2.4549 +[titan] 2025-10-05 01:49:13,325 - root - INFO - lr: 4.8364e-05 gnorm: 1.21 [ 3:15:04<21:35:29] +[titan] 2025-10-05 01:49:24,205 - root - INFO - step: 5240 loss: 2.6851 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3675 +[titan] 2025-10-05 01:49:24,206 - root - INFO - lr: 4.8361e-05 gnorm: 1.22 [ 3:15:15<21:35:16] +[titan] 2025-10-05 01:49:35,124 - root - INFO - step: 5245 loss: 2.7664 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4406 +[titan] 2025-10-05 01:49:35,124 - root - INFO - lr: 4.8358e-05 gnorm: 1.24 [ 3:15:26<21:35:03] +[titan] 2025-10-05 01:49:43,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:49:45,992 - root - INFO - step: 5250 loss: 2.7297 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4047 +[titan] 2025-10-05 01:49:45,992 - root - INFO - lr: 4.8354e-05 gnorm: 1.29 [ 3:15:37<21:34:50] +[titan] 2025-10-05 01:49:56,896 - root - INFO - step: 5255 loss: 2.7151 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.3222 global_avg_mtp_loss: 2.3928 +[titan] 2025-10-05 01:49:56,896 - root - INFO - lr: 4.8351e-05 gnorm: 1.29 [ 3:15:48<21:34:37] +[titan] 2025-10-05 01:50:07,763 - root - INFO - step: 5260 loss: 2.7886 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.3308 global_avg_mtp_loss: 2.4578 +[titan] 2025-10-05 01:50:07,763 - root - INFO - lr: 4.8348e-05 gnorm: 1.36 [ 3:15:59<21:34:24] +[titan] 2025-10-05 01:50:18,645 - root - INFO - step: 5265 loss: 2.6117 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3012 +[titan] 2025-10-05 01:50:18,645 - root - INFO - lr: 4.8344e-05 gnorm: 1.24 [ 3:16:10<21:34:11] +[titan] 2025-10-05 01:50:29,515 - root - INFO - step: 5270 loss: 2.7739 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4451 +[titan] 2025-10-05 01:50:29,516 - root - INFO - lr: 4.8341e-05 gnorm: 1.24 [ 3:16:20<21:33:57] +[titan] 2025-10-05 01:50:40,456 - root - INFO - step: 5275 loss: 2.7065 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.53 mfu: 42.01% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3874 +[titan] 2025-10-05 01:50:40,457 - root - INFO - lr: 4.8338e-05 gnorm: 1.25 [ 3:16:31<21:33:45] +[titan] 2025-10-05 01:50:51,334 - root - INFO - step: 5280 loss: 2.7674 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.3290 global_avg_mtp_loss: 2.4384 +[titan] 2025-10-05 01:50:51,334 - root - INFO - lr: 4.8334e-05 gnorm: 1.25 [ 3:16:42<21:33:31] +[titan] 2025-10-05 01:51:02,214 - root - INFO - step: 5285 loss: 2.6660 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3493 +[titan] 2025-10-05 01:51:02,214 - root - INFO - lr: 4.8331e-05 gnorm: 1.20 [ 3:16:53<21:33:18] +[titan] 2025-10-05 01:51:13,075 - root - INFO - step: 5290 loss: 2.7457 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4189 +[titan] 2025-10-05 01:51:13,075 - root - INFO - lr: 4.8327e-05 gnorm: 1.25 [ 3:17:04<21:33:05] +[titan] 2025-10-05 01:51:23,938 - root - INFO - step: 5295 loss: 2.7299 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4065 +[titan] 2025-10-05 01:51:23,938 - root - INFO - lr: 4.8324e-05 gnorm: 1.18 [ 3:17:15<21:32:52] +[titan] 2025-10-05 01:51:32,658 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:51:34,836 - root - INFO - step: 5300 loss: 2.7577 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4310 +[titan] 2025-10-05 01:51:34,836 - root - INFO - lr: 4.8321e-05 gnorm: 1.27 [ 3:17:26<21:32:39] +[titan] 2025-10-05 01:51:45,732 - root - INFO - step: 5305 loss: 2.7686 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.4411 +[titan] 2025-10-05 01:51:45,732 - root - INFO - lr: 4.8317e-05 gnorm: 1.28 [ 3:17:37<21:32:26] +[titan] 2025-10-05 01:51:56,598 - root - INFO - step: 5310 loss: 2.6649 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3486 +[titan] 2025-10-05 01:51:56,598 - root - INFO - lr: 4.8314e-05 gnorm: 1.25 [ 3:17:48<21:32:13] +[titan] 2025-10-05 01:52:07,463 - root - INFO - step: 5315 loss: 2.6130 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3093 global_avg_mtp_loss: 2.3037 +[titan] 2025-10-05 01:52:07,463 - root - INFO - lr: 4.8311e-05 gnorm: 1.23 [ 3:17:58<21:31:59] +[titan] 2025-10-05 01:52:18,354 - root - INFO - step: 5320 loss: 2.7768 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.3287 global_avg_mtp_loss: 2.4481 +[titan] 2025-10-05 01:52:18,354 - root - INFO - lr: 4.8307e-05 gnorm: 1.31 [ 3:18:09<21:31:46] +[titan] 2025-10-05 01:52:29,236 - root - INFO - step: 5325 loss: 2.7143 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3924 +[titan] 2025-10-05 01:52:29,236 - root - INFO - lr: 4.8304e-05 gnorm: 1.21 [ 3:18:20<21:31:33] +[titan] 2025-10-05 01:52:40,146 - root - INFO - step: 5330 loss: 2.7556 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4290 +[titan] 2025-10-05 01:52:40,146 - root - INFO - lr: 4.8300e-05 gnorm: 1.27 [ 3:18:31<21:31:20] +[titan] 2025-10-05 01:52:51,044 - root - INFO - step: 5335 loss: 2.7418 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.3250 global_avg_mtp_loss: 2.4168 +[titan] 2025-10-05 01:52:51,044 - root - INFO - lr: 4.8297e-05 gnorm: 1.26 [ 3:18:42<21:31:07] +[titan] 2025-10-05 01:53:01,911 - root - INFO - step: 5340 loss: 2.7097 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3206 global_avg_mtp_loss: 2.3891 +[titan] 2025-10-05 01:53:01,911 - root - INFO - lr: 4.8294e-05 gnorm: 1.30 [ 3:18:53<21:30:54] +[titan] 2025-10-05 01:53:12,786 - root - INFO - step: 5345 loss: 2.6651 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3501 +[titan] 2025-10-05 01:53:12,787 - root - INFO - lr: 4.8290e-05 gnorm: 1.21 [ 3:19:04<21:30:41] +[titan] 2025-10-05 01:53:21,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:53:23,680 - root - INFO - step: 5350 loss: 2.7279 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3226 global_avg_mtp_loss: 2.4053 +[titan] 2025-10-05 01:53:23,680 - root - INFO - lr: 4.8287e-05 gnorm: 1.24 [ 3:19:15<21:30:28] +[titan] 2025-10-05 01:53:34,600 - root - INFO - step: 5355 loss: 2.6227 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 01:53:34,600 - root - INFO - lr: 4.8283e-05 gnorm: 1.28 [ 3:19:26<21:30:15] +[titan] 2025-10-05 01:53:45,495 - root - INFO - step: 5360 loss: 2.7848 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3300 global_avg_mtp_loss: 2.4548 +[titan] 2025-10-05 01:53:45,495 - root - INFO - lr: 4.8280e-05 gnorm: 1.23 [ 3:19:36<21:30:02] +[titan] 2025-10-05 01:53:56,371 - root - INFO - step: 5365 loss: 2.7914 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3359 global_avg_mtp_loss: 2.4555 +[titan] 2025-10-05 01:53:56,372 - root - INFO - lr: 4.8276e-05 gnorm: 1.23 [ 3:19:47<21:29:49] +[titan] 2025-10-05 01:54:07,246 - root - INFO - step: 5370 loss: 2.6816 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3627 +[titan] 2025-10-05 01:54:07,246 - root - INFO - lr: 4.8273e-05 gnorm: 1.23 [ 3:19:58<21:29:36] +[titan] 2025-10-05 01:54:18,130 - root - INFO - step: 5375 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3272 +[titan] 2025-10-05 01:54:18,130 - root - INFO - lr: 4.8270e-05 gnorm: 1.27 [ 3:20:09<21:29:23] +[titan] 2025-10-05 01:54:28,973 - root - INFO - step: 5380 loss: 2.7116 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3899 +[titan] 2025-10-05 01:54:28,973 - root - INFO - lr: 4.8266e-05 gnorm: 1.23 [ 3:20:20<21:29:10] +[titan] 2025-10-05 01:54:39,864 - root - INFO - step: 5385 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3764 +[titan] 2025-10-05 01:54:39,864 - root - INFO - lr: 4.8263e-05 gnorm: 1.24 [ 3:20:31<21:28:57] +[titan] 2025-10-05 01:54:50,734 - root - INFO - step: 5390 loss: 2.7644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3279 global_avg_mtp_loss: 2.4366 +[titan] 2025-10-05 01:54:50,735 - root - INFO - lr: 4.8259e-05 gnorm: 1.25 [ 3:20:42<21:28:44] +[titan] 2025-10-05 01:55:01,593 - root - INFO - step: 5395 loss: 2.7603 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3272 global_avg_mtp_loss: 2.4331 +[titan] 2025-10-05 01:55:01,593 - root - INFO - lr: 4.8256e-05 gnorm: 1.21 [ 3:20:52<21:28:31] +[titan] 2025-10-05 01:55:10,273 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:55:12,472 - root - INFO - step: 5400 loss: 2.7045 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3833 +[titan] 2025-10-05 01:55:12,472 - root - INFO - lr: 4.8252e-05 gnorm: 1.20 [ 3:21:03<21:28:18] +[titan] 2025-10-05 01:55:23,346 - root - INFO - step: 5405 loss: 2.7062 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3213 global_avg_mtp_loss: 2.3849 +[titan] 2025-10-05 01:55:23,346 - root - INFO - lr: 4.8249e-05 gnorm: 1.20 [ 3:21:14<21:28:05] +[titan] 2025-10-05 01:55:34,207 - root - INFO - step: 5410 loss: 2.7345 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.4118 +[titan] 2025-10-05 01:55:34,207 - root - INFO - lr: 4.8245e-05 gnorm: 1.26 [ 3:21:25<21:27:51] +[titan] 2025-10-05 01:55:45,114 - root - INFO - step: 5415 loss: 2.6787 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3618 +[titan] 2025-10-05 01:55:45,114 - root - INFO - lr: 4.8242e-05 gnorm: 1.16 [ 3:21:36<21:27:39] +[titan] 2025-10-05 01:55:55,985 - root - INFO - step: 5420 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.2994 +[titan] 2025-10-05 01:55:55,985 - root - INFO - lr: 4.8239e-05 gnorm: 1.23 [ 3:21:47<21:27:26] +[titan] 2025-10-05 01:56:06,858 - root - INFO - step: 5425 loss: 2.6262 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 01:56:06,858 - root - INFO - lr: 4.8235e-05 gnorm: 1.20 [ 3:21:58<21:27:12] +[titan] 2025-10-05 01:56:17,752 - root - INFO - step: 5430 loss: 2.6880 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3706 +[titan] 2025-10-05 01:56:17,752 - root - INFO - lr: 4.8232e-05 gnorm: 1.21 [ 3:22:09<21:27:00] +[titan] 2025-10-05 01:56:28,647 - root - INFO - step: 5435 loss: 2.6104 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 01:56:28,647 - root - INFO - lr: 4.8228e-05 gnorm: 1.24 [ 3:22:20<21:26:47] +[titan] 2025-10-05 01:56:39,549 - root - INFO - step: 5440 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3753 +[titan] 2025-10-05 01:56:39,549 - root - INFO - lr: 4.8225e-05 gnorm: 1.24 [ 3:22:30<21:26:34] +[titan] 2025-10-05 01:56:50,425 - root - INFO - step: 5445 loss: 2.7005 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 01:56:50,426 - root - INFO - lr: 4.8221e-05 gnorm: 1.24 [ 3:22:41<21:26:21] +[titan] 2025-10-05 01:56:59,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:57:01,307 - root - INFO - step: 5450 loss: 2.7153 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3211 global_avg_mtp_loss: 2.3942 +[titan] 2025-10-05 01:57:01,307 - root - INFO - lr: 4.8218e-05 gnorm: 1.22 [ 3:22:52<21:26:08] +[titan] 2025-10-05 01:57:12,168 - root - INFO - step: 5455 loss: 2.7238 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4003 +[titan] 2025-10-05 01:57:12,168 - root - INFO - lr: 4.8214e-05 gnorm: 1.25 [ 3:23:03<21:25:55] +[titan] 2025-10-05 01:57:23,004 - root - INFO - step: 5460 loss: 2.7013 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.3215 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 01:57:23,004 - root - INFO - lr: 4.8211e-05 gnorm: 1.21 [ 3:23:14<21:25:41] +[titan] 2025-10-05 01:57:33,870 - root - INFO - step: 5465 loss: 2.7566 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3258 global_avg_mtp_loss: 2.4308 +[titan] 2025-10-05 01:57:33,870 - root - INFO - lr: 4.8207e-05 gnorm: 1.28 [ 3:23:25<21:25:28] +[titan] 2025-10-05 01:57:44,735 - root - INFO - step: 5470 loss: 2.6960 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3201 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 01:57:44,736 - root - INFO - lr: 4.8204e-05 gnorm: 3.95 [ 3:23:36<21:25:15] +[titan] 2025-10-05 01:57:55,597 - root - INFO - step: 5475 loss: 2.7332 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3268 global_avg_mtp_loss: 2.4064 +[titan] 2025-10-05 01:57:55,598 - root - INFO - lr: 4.8200e-05 gnorm: 5.60 [ 3:23:46<21:25:02] +[titan] 2025-10-05 01:58:06,457 - root - INFO - step: 5480 loss: 2.6333 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3196 +[titan] 2025-10-05 01:58:06,457 - root - INFO - lr: 4.8197e-05 gnorm: 1.42 [ 3:23:57<21:24:49] +[titan] 2025-10-05 01:58:17,326 - root - INFO - step: 5485 loss: 2.6808 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3195 global_avg_mtp_loss: 2.3613 +[titan] 2025-10-05 01:58:17,327 - root - INFO - lr: 4.8193e-05 gnorm: 1.64 [ 3:24:08<21:24:36] +[titan] 2025-10-05 01:58:28,172 - root - INFO - step: 5490 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.3202 global_avg_mtp_loss: 2.3789 +[titan] 2025-10-05 01:58:28,172 - root - INFO - lr: 4.8190e-05 gnorm: 1.44 [ 3:24:19<21:24:23] +[titan] 2025-10-05 01:58:39,061 - root - INFO - step: 5495 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3772 +[titan] 2025-10-05 01:58:39,061 - root - INFO - lr: 4.8186e-05 gnorm: 1.37 [ 3:24:30<21:24:10] +[titan] 2025-10-05 01:58:47,779 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 01:58:49,967 - root - INFO - step: 5500 loss: 2.7427 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.3262 global_avg_mtp_loss: 2.4165 +[titan] 2025-10-05 01:58:49,967 - root - INFO - lr: 4.8183e-05 gnorm: 1.30 [ 3:24:41<21:23:57] +[titan] 2025-10-05 01:59:00,823 - root - INFO - step: 5505 loss: 2.7373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3253 global_avg_mtp_loss: 2.4120 +[titan] 2025-10-05 01:59:00,823 - root - INFO - lr: 4.8179e-05 gnorm: 1.29 [ 3:24:52<21:23:44] +[titan] 2025-10-05 01:59:11,693 - root - INFO - step: 5510 loss: 2.6666 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3511 +[titan] 2025-10-05 01:59:11,693 - root - INFO - lr: 4.8176e-05 gnorm: 1.30 [ 3:25:03<21:23:31] +[titan] 2025-10-05 01:59:22,587 - root - INFO - step: 5515 loss: 2.7189 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3251 global_avg_mtp_loss: 2.3938 +[titan] 2025-10-05 01:59:22,587 - root - INFO - lr: 4.8172e-05 gnorm: 6.71 [ 3:25:13<21:23:18] +[titan] 2025-10-05 01:59:33,471 - root - INFO - step: 5520 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3125 global_avg_mtp_loss: 2.3281 +[titan] 2025-10-05 01:59:33,471 - root - INFO - lr: 4.8169e-05 gnorm: 1.27 [ 3:25:24<21:23:05] +[titan] 2025-10-05 01:59:44,386 - root - INFO - step: 5525 loss: 2.6236 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3140 +[titan] 2025-10-05 01:59:44,386 - root - INFO - lr: 4.8165e-05 gnorm: 1.21 [ 3:25:35<21:22:53] +[titan] 2025-10-05 01:59:55,268 - root - INFO - step: 5530 loss: 2.7183 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 01:59:55,268 - root - INFO - lr: 4.8162e-05 gnorm: 1.23 [ 3:25:46<21:22:40] +[titan] 2025-10-05 02:00:06,139 - root - INFO - step: 5535 loss: 2.6010 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3104 global_avg_mtp_loss: 2.2906 +[titan] 2025-10-05 02:00:06,139 - root - INFO - lr: 4.8158e-05 gnorm: 1.28 [ 3:25:57<21:22:27] +[titan] 2025-10-05 02:00:17,012 - root - INFO - step: 5540 loss: 2.6903 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3180 global_avg_mtp_loss: 2.3723 +[titan] 2025-10-05 02:00:17,013 - root - INFO - lr: 4.8155e-05 gnorm: 1.28 [ 3:26:08<21:22:14] +[titan] 2025-10-05 02:00:27,882 - root - INFO - step: 5545 loss: 2.6624 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3482 +[titan] 2025-10-05 02:00:27,882 - root - INFO - lr: 4.8151e-05 gnorm: 1.25 [ 3:26:19<21:22:01] +[titan] 2025-10-05 02:00:36,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:00:38,754 - root - INFO - step: 5550 loss: 2.6437 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3307 +[titan] 2025-10-05 02:00:38,754 - root - INFO - lr: 4.8147e-05 gnorm: 1.23 [ 3:26:30<21:21:48] +[titan] 2025-10-05 02:00:49,688 - root - INFO - step: 5555 loss: 2.6840 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.3178 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:00:49,689 - root - INFO - lr: 4.8144e-05 gnorm: 1.21 [ 3:26:41<21:21:35] +[titan] 2025-10-05 02:01:00,569 - root - INFO - step: 5560 loss: 2.6738 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3569 +[titan] 2025-10-05 02:01:00,569 - root - INFO - lr: 4.8140e-05 gnorm: 1.21 [ 3:26:51<21:21:22] +[titan] 2025-10-05 02:01:11,488 - root - INFO - step: 5565 loss: 2.6609 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3459 +[titan] 2025-10-05 02:01:11,488 - root - INFO - lr: 4.8137e-05 gnorm: 1.24 [ 3:27:02<21:21:10] +[titan] 2025-10-05 02:01:22,384 - root - INFO - step: 5570 loss: 2.7213 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.3978 +[titan] 2025-10-05 02:01:22,384 - root - INFO - lr: 4.8133e-05 gnorm: 1.26 [ 3:27:13<21:20:57] +[titan] 2025-10-05 02:01:33,286 - root - INFO - step: 5575 loss: 2.6770 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3597 +[titan] 2025-10-05 02:01:33,287 - root - INFO - lr: 4.8130e-05 gnorm: 1.23 [ 3:27:24<21:20:44] +[titan] 2025-10-05 02:01:44,187 - root - INFO - step: 5580 loss: 2.6684 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.3160 global_avg_mtp_loss: 2.3524 +[titan] 2025-10-05 02:01:44,187 - root - INFO - lr: 4.8126e-05 gnorm: 1.22 [ 3:27:35<21:20:31] +[titan] 2025-10-05 02:01:55,071 - root - INFO - step: 5585 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3212 global_avg_mtp_loss: 2.3821 +[titan] 2025-10-05 02:01:55,072 - root - INFO - lr: 4.8123e-05 gnorm: 1.23 [ 3:27:46<21:20:18] +[titan] 2025-10-05 02:02:05,953 - root - INFO - step: 5590 loss: 2.7020 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3223 global_avg_mtp_loss: 2.3797 +[titan] 2025-10-05 02:02:05,954 - root - INFO - lr: 4.8119e-05 gnorm: 1.29 [ 3:27:57<21:20:05] +[titan] 2025-10-05 02:02:16,866 - root - INFO - step: 5595 loss: 2.6621 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.3162 global_avg_mtp_loss: 2.3458 +[titan] 2025-10-05 02:02:16,866 - root - INFO - lr: 4.8115e-05 gnorm: 1.25 [ 3:28:08<21:19:53] +[titan] 2025-10-05 02:02:25,553 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:02:27,738 - root - INFO - step: 5600 loss: 2.7026 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3818 +[titan] 2025-10-05 02:02:27,739 - root - INFO - lr: 4.8112e-05 gnorm: 1.26 [ 3:28:19<21:19:40] +[titan] 2025-10-05 02:02:38,604 - root - INFO - step: 5605 loss: 2.6192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:02:38,604 - root - INFO - lr: 4.8108e-05 gnorm: 1.25 [ 3:28:29<21:19:27] +[titan] 2025-10-05 02:02:49,527 - root - INFO - step: 5610 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3444 +[titan] 2025-10-05 02:02:49,527 - root - INFO - lr: 4.8105e-05 gnorm: 1.32 [ 3:28:40<21:19:14] +[titan] 2025-10-05 02:03:00,407 - root - INFO - step: 5615 loss: 2.6727 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3552 +[titan] 2025-10-05 02:03:00,407 - root - INFO - lr: 4.8101e-05 gnorm: 1.19 [ 3:28:51<21:19:01] +[titan] 2025-10-05 02:03:11,293 - root - INFO - step: 5620 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3098 +[titan] 2025-10-05 02:03:11,293 - root - INFO - lr: 4.8097e-05 gnorm: 1.24 [ 3:29:02<21:18:49] +[titan] 2025-10-05 02:03:22,216 - root - INFO - step: 5625 loss: 2.6235 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3126 +[titan] 2025-10-05 02:03:22,216 - root - INFO - lr: 4.8094e-05 gnorm: 1.21 [ 3:29:13<21:18:36] +[titan] 2025-10-05 02:03:33,165 - root - INFO - step: 5630 loss: 2.7089 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3881 +[titan] 2025-10-05 02:03:33,165 - root - INFO - lr: 4.8090e-05 gnorm: 1.27 [ 3:29:24<21:18:23] +[titan] 2025-10-05 02:03:37,688 - root - INFO - Dumping profiler traces at step 5632 +[titan] 2025-10-05 02:03:37,725 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:03:44,274 - root - INFO - step: 5635 loss: 2.6796 memory: 118.84GiB(85.28%) tps: 29,497 tflops: 409.23 mfu: 41.38% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3615 +[titan] 2025-10-05 02:03:44,274 - root - INFO - lr: 4.8087e-05 gnorm: 1.25 [ 3:29:35<21:18:12] +[titan] 2025-10-05 02:03:55,158 - root - INFO - step: 5640 loss: 2.6061 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3075 global_avg_mtp_loss: 2.2987 +[titan] 2025-10-05 02:03:55,158 - root - INFO - lr: 4.8083e-05 gnorm: 1.25 [ 3:29:46<21:17:59] +[titan] 2025-10-05 02:04:06,053 - root - INFO - step: 5645 loss: 2.7125 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3217 global_avg_mtp_loss: 2.3908 +[titan] 2025-10-05 02:04:06,053 - root - INFO - lr: 4.8079e-05 gnorm: 1.34 [ 3:29:57<21:17:46] +[titan] 2025-10-05 02:04:14,755 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:04:16,937 - root - INFO - step: 5650 loss: 2.5977 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:04:16,937 - root - INFO - lr: 4.8076e-05 gnorm: 1.27 [ 3:30:08<21:17:34] +[titan] 2025-10-05 02:04:27,853 - root - INFO - step: 5655 loss: 2.6416 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3118 global_avg_mtp_loss: 2.3299 +[titan] 2025-10-05 02:04:27,854 - root - INFO - lr: 4.8072e-05 gnorm: 1.30 [ 3:30:19<21:17:21] +[titan] 2025-10-05 02:04:38,772 - root - INFO - step: 5660 loss: 2.7230 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.4005 +[titan] 2025-10-05 02:04:38,772 - root - INFO - lr: 4.8069e-05 gnorm: 1.24 [ 3:30:30<21:17:08] +[titan] 2025-10-05 02:04:49,685 - root - INFO - step: 5665 loss: 2.7033 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3829 +[titan] 2025-10-05 02:04:49,685 - root - INFO - lr: 4.8065e-05 gnorm: 1.26 [ 3:30:41<21:16:56] +[titan] 2025-10-05 02:05:00,577 - root - INFO - step: 5670 loss: 2.6274 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3178 +[titan] 2025-10-05 02:05:00,577 - root - INFO - lr: 4.8061e-05 gnorm: 1.25 [ 3:30:51<21:16:43] +[titan] 2025-10-05 02:05:11,454 - root - INFO - step: 5675 loss: 2.6289 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:11,454 - root - INFO - lr: 4.8058e-05 gnorm: 1.22 [ 3:31:02<21:16:30] +[titan] 2025-10-05 02:05:22,325 - root - INFO - step: 5680 loss: 2.7071 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3216 global_avg_mtp_loss: 2.3854 +[titan] 2025-10-05 02:05:22,325 - root - INFO - lr: 4.8054e-05 gnorm: 1.24 [ 3:31:13<21:16:17] +[titan] 2025-10-05 02:05:33,190 - root - INFO - step: 5685 loss: 2.6647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3151 global_avg_mtp_loss: 2.3496 +[titan] 2025-10-05 02:05:33,190 - root - INFO - lr: 4.8051e-05 gnorm: 1.25 [ 3:31:24<21:16:04] +[titan] 2025-10-05 02:05:44,079 - root - INFO - step: 5690 loss: 2.6318 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3204 +[titan] 2025-10-05 02:05:44,079 - root - INFO - lr: 4.8047e-05 gnorm: 1.20 [ 3:31:35<21:15:52] +[titan] 2025-10-05 02:05:54,989 - root - INFO - step: 5695 loss: 2.6284 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3177 +[titan] 2025-10-05 02:05:54,989 - root - INFO - lr: 4.8043e-05 gnorm: 1.18 [ 3:31:46<21:15:39] +[titan] 2025-10-05 02:06:03,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:06:05,856 - root - INFO - step: 5700 loss: 2.6425 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:06:05,856 - root - INFO - lr: 4.8040e-05 gnorm: 1.17 [ 3:31:57<21:15:26] +[titan] 2025-10-05 02:06:16,740 - root - INFO - step: 5705 loss: 2.6825 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3176 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:06:16,740 - root - INFO - lr: 4.8036e-05 gnorm: 1.21 [ 3:32:08<21:15:13] +[titan] 2025-10-05 02:06:27,613 - root - INFO - step: 5710 loss: 2.7487 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3267 global_avg_mtp_loss: 2.4220 +[titan] 2025-10-05 02:06:27,613 - root - INFO - lr: 4.8032e-05 gnorm: 1.24 [ 3:32:18<21:15:00] +[titan] 2025-10-05 02:06:38,482 - root - INFO - step: 5715 loss: 2.6692 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3542 +[titan] 2025-10-05 02:06:38,482 - root - INFO - lr: 4.8029e-05 gnorm: 1.27 [ 3:32:29<21:14:47] +[titan] 2025-10-05 02:06:49,408 - root - INFO - step: 5720 loss: 2.6745 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.3170 global_avg_mtp_loss: 2.3576 +[titan] 2025-10-05 02:06:49,408 - root - INFO - lr: 4.8025e-05 gnorm: 1.21 [ 3:32:40<21:14:35] +[titan] 2025-10-05 02:07:00,305 - root - INFO - step: 5725 loss: 2.6145 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3063 +[titan] 2025-10-05 02:07:00,305 - root - INFO - lr: 4.8021e-05 gnorm: 1.25 [ 3:32:51<21:14:22] +[titan] 2025-10-05 02:07:11,183 - root - INFO - step: 5730 loss: 2.6939 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3752 +[titan] 2025-10-05 02:07:11,183 - root - INFO - lr: 4.8018e-05 gnorm: 1.27 [ 3:33:02<21:14:09] +[titan] 2025-10-05 02:07:22,045 - root - INFO - step: 5735 loss: 2.6083 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3002 +[titan] 2025-10-05 02:07:22,046 - root - INFO - lr: 4.8014e-05 gnorm: 1.28 [ 3:33:13<21:13:57] +[titan] 2025-10-05 02:07:32,920 - root - INFO - step: 5740 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3073 global_avg_mtp_loss: 2.2892 +[titan] 2025-10-05 02:07:32,920 - root - INFO - lr: 4.8010e-05 gnorm: 1.17 [ 3:33:24<21:13:44] +[titan] 2025-10-05 02:07:43,786 - root - INFO - step: 5745 loss: 2.6991 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3189 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:07:43,786 - root - INFO - lr: 4.8007e-05 gnorm: 1.24 [ 3:33:35<21:13:31] +[titan] 2025-10-05 02:07:52,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:07:54,774 - root - INFO - step: 5750 loss: 2.6142 memory: 118.84GiB(85.28%) tps: 29,821 tflops: 413.73 mfu: 41.83% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 02:07:54,774 - root - INFO - lr: 4.8003e-05 gnorm: 1.24 [ 3:33:46<21:13:19] +[titan] 2025-10-05 02:08:05,686 - root - INFO - step: 5755 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3384 +[titan] 2025-10-05 02:08:05,686 - root - INFO - lr: 4.7999e-05 gnorm: 1.20 [ 3:33:57<21:13:06] +[titan] 2025-10-05 02:08:16,606 - root - INFO - step: 5760 loss: 2.7255 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3230 global_avg_mtp_loss: 2.4025 +[titan] 2025-10-05 02:08:16,606 - root - INFO - lr: 4.7996e-05 gnorm: 1.24 [ 3:34:07<21:12:54] +[titan] 2025-10-05 02:08:27,488 - root - INFO - step: 5765 loss: 2.6698 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3144 global_avg_mtp_loss: 2.3553 +[titan] 2025-10-05 02:08:27,489 - root - INFO - lr: 4.7992e-05 gnorm: 1.25 [ 3:34:18<21:12:41] +[titan] 2025-10-05 02:08:38,371 - root - INFO - step: 5770 loss: 2.7107 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3900 +[titan] 2025-10-05 02:08:38,371 - root - INFO - lr: 4.7988e-05 gnorm: 1.26 [ 3:34:29<21:12:28] +[titan] 2025-10-05 02:08:49,291 - root - INFO - step: 5775 loss: 2.7046 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.3203 global_avg_mtp_loss: 2.3843 +[titan] 2025-10-05 02:08:49,291 - root - INFO - lr: 4.7985e-05 gnorm: 1.28 [ 3:34:40<21:12:16] +[titan] 2025-10-05 02:09:00,170 - root - INFO - step: 5780 loss: 2.7717 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3281 global_avg_mtp_loss: 2.4437 +[titan] 2025-10-05 02:09:00,170 - root - INFO - lr: 4.7981e-05 gnorm: 1.66 [ 3:34:51<21:12:03] +[titan] 2025-10-05 02:09:11,065 - root - INFO - step: 5785 loss: 2.6598 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3451 +[titan] 2025-10-05 02:09:11,066 - root - INFO - lr: 4.7977e-05 gnorm: 1.28 [ 3:35:02<21:11:50] +[titan] 2025-10-05 02:09:21,936 - root - INFO - step: 5790 loss: 2.6190 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3077 +[titan] 2025-10-05 02:09:21,936 - root - INFO - lr: 4.7973e-05 gnorm: 1.24 [ 3:35:13<21:11:37] +[titan] 2025-10-05 02:09:32,809 - root - INFO - step: 5795 loss: 2.6803 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3637 +[titan] 2025-10-05 02:09:32,809 - root - INFO - lr: 4.7970e-05 gnorm: 1.27 [ 3:35:24<21:11:24] +[titan] 2025-10-05 02:09:41,490 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:09:43,680 - root - INFO - step: 5800 loss: 2.6313 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3124 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:09:43,680 - root - INFO - lr: 4.7966e-05 gnorm: 1.25 [ 3:35:35<21:11:12] +[titan] 2025-10-05 02:09:54,628 - root - INFO - step: 5805 loss: 2.6182 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.3095 global_avg_mtp_loss: 2.3088 +[titan] 2025-10-05 02:09:54,629 - root - INFO - lr: 4.7962e-05 gnorm: 1.26 [ 3:35:45<21:10:59] +[titan] 2025-10-05 02:10:05,480 - root - INFO - step: 5810 loss: 2.7315 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.3238 global_avg_mtp_loss: 2.4078 +[titan] 2025-10-05 02:10:05,481 - root - INFO - lr: 4.7959e-05 gnorm: 1.28 [ 3:35:56<21:10:46] +[titan] 2025-10-05 02:10:16,374 - root - INFO - step: 5815 loss: 2.6620 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3479 +[titan] 2025-10-05 02:10:16,374 - root - INFO - lr: 4.7955e-05 gnorm: 1.22 [ 3:36:07<21:10:34] +[titan] 2025-10-05 02:10:27,283 - root - INFO - step: 5820 loss: 2.6968 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.3191 global_avg_mtp_loss: 2.3777 +[titan] 2025-10-05 02:10:27,283 - root - INFO - lr: 4.7951e-05 gnorm: 1.21 [ 3:36:18<21:10:21] +[titan] 2025-10-05 02:10:38,152 - root - INFO - step: 5825 loss: 2.6399 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3136 global_avg_mtp_loss: 2.3263 +[titan] 2025-10-05 02:10:38,152 - root - INFO - lr: 4.7947e-05 gnorm: 1.26 [ 3:36:29<21:10:08] +[titan] 2025-10-05 02:10:49,018 - root - INFO - step: 5830 loss: 2.6583 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3158 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:10:49,018 - root - INFO - lr: 4.7944e-05 gnorm: 1.27 [ 3:36:40<21:09:56] +[titan] 2025-10-05 02:10:59,943 - root - INFO - step: 5835 loss: 2.6687 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3533 +[titan] 2025-10-05 02:10:59,943 - root - INFO - lr: 4.7940e-05 gnorm: 1.27 [ 3:36:51<21:09:43] +[titan] 2025-10-05 02:11:10,804 - root - INFO - step: 5840 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3646 +[titan] 2025-10-05 02:11:10,804 - root - INFO - lr: 4.7936e-05 gnorm: 1.23 [ 3:37:02<21:09:30] +[titan] 2025-10-05 02:11:21,663 - root - INFO - step: 5845 loss: 2.6567 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3141 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:11:21,663 - root - INFO - lr: 4.7933e-05 gnorm: 1.22 [ 3:37:13<21:09:17] +[titan] 2025-10-05 02:11:30,369 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:11:32,557 - root - INFO - step: 5850 loss: 2.5946 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2879 +[titan] 2025-10-05 02:11:32,557 - root - INFO - lr: 4.7929e-05 gnorm: 1.24 [ 3:37:23<21:09:05] +[titan] 2025-10-05 02:11:43,442 - root - INFO - step: 5855 loss: 2.6553 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.3150 global_avg_mtp_loss: 2.3404 +[titan] 2025-10-05 02:11:43,442 - root - INFO - lr: 4.7925e-05 gnorm: 1.31 [ 3:37:34<21:08:52] +[titan] 2025-10-05 02:11:54,344 - root - INFO - step: 5860 loss: 2.6942 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3769 +[titan] 2025-10-05 02:11:54,344 - root - INFO - lr: 4.7921e-05 gnorm: 1.23 [ 3:37:45<21:08:39] +[titan] 2025-10-05 02:12:05,223 - root - INFO - step: 5865 loss: 2.5612 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3011 global_avg_mtp_loss: 2.2601 +[titan] 2025-10-05 02:12:05,223 - root - INFO - lr: 4.7918e-05 gnorm: 1.19 [ 3:37:56<21:08:27] +[titan] 2025-10-05 02:12:16,102 - root - INFO - step: 5870 loss: 2.6730 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3578 +[titan] 2025-10-05 02:12:16,102 - root - INFO - lr: 4.7914e-05 gnorm: 1.22 [ 3:38:07<21:08:14] +[titan] 2025-10-05 02:12:26,998 - root - INFO - step: 5875 loss: 2.7092 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3873 +[titan] 2025-10-05 02:12:26,998 - root - INFO - lr: 4.7910e-05 gnorm: 1.27 [ 3:38:18<21:08:01] +[titan] 2025-10-05 02:12:37,886 - root - INFO - step: 5880 loss: 2.6639 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3140 global_avg_mtp_loss: 2.3499 +[titan] 2025-10-05 02:12:37,886 - root - INFO - lr: 4.7906e-05 gnorm: 1.23 [ 3:38:29<21:07:49] +[titan] 2025-10-05 02:12:48,782 - root - INFO - step: 5885 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3282 +[titan] 2025-10-05 02:12:48,782 - root - INFO - lr: 4.7903e-05 gnorm: 1.23 [ 3:38:40<21:07:36] +[titan] 2025-10-05 02:12:59,686 - root - INFO - step: 5890 loss: 2.6332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3107 global_avg_mtp_loss: 2.3225 +[titan] 2025-10-05 02:12:59,686 - root - INFO - lr: 4.7899e-05 gnorm: 1.23 [ 3:38:51<21:07:24] +[titan] 2025-10-05 02:13:10,552 - root - INFO - step: 5895 loss: 2.6971 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.3192 global_avg_mtp_loss: 2.3779 +[titan] 2025-10-05 02:13:10,552 - root - INFO - lr: 4.7895e-05 gnorm: 1.20 [ 3:39:01<21:07:11] +[titan] 2025-10-05 02:13:19,229 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:13:21,417 - root - INFO - step: 5900 loss: 2.6773 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3175 global_avg_mtp_loss: 2.3598 +[titan] 2025-10-05 02:13:21,418 - root - INFO - lr: 4.7891e-05 gnorm: 1.21 [ 3:39:12<21:06:58] +[titan] 2025-10-05 02:13:32,300 - root - INFO - step: 5905 loss: 2.6413 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3290 +[titan] 2025-10-05 02:13:32,300 - root - INFO - lr: 4.7888e-05 gnorm: 1.21 [ 3:39:23<21:06:45] +[titan] 2025-10-05 02:13:43,183 - root - INFO - step: 5910 loss: 2.7061 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3853 +[titan] 2025-10-05 02:13:43,184 - root - INFO - lr: 4.7884e-05 gnorm: 1.23 [ 3:39:34<21:06:33] +[titan] 2025-10-05 02:13:54,153 - root - INFO - step: 5915 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3154 +[titan] 2025-10-05 02:13:54,153 - root - INFO - lr: 4.7880e-05 gnorm: 1.20 [ 3:39:45<21:06:21] +[titan] 2025-10-05 02:14:05,035 - root - INFO - step: 5920 loss: 2.6930 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3748 +[titan] 2025-10-05 02:14:05,035 - root - INFO - lr: 4.7876e-05 gnorm: 1.23 [ 3:39:56<21:06:08] +[titan] 2025-10-05 02:14:15,930 - root - INFO - step: 5925 loss: 2.7377 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3234 global_avg_mtp_loss: 2.4143 +[titan] 2025-10-05 02:14:15,930 - root - INFO - lr: 4.7872e-05 gnorm: 1.31 [ 3:40:07<21:05:55] +[titan] 2025-10-05 02:14:26,810 - root - INFO - step: 5930 loss: 2.5791 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 02:14:26,810 - root - INFO - lr: 4.7869e-05 gnorm: 1.28 [ 3:40:18<21:05:43] +[titan] 2025-10-05 02:14:37,679 - root - INFO - step: 5935 loss: 2.8206 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.3526 global_avg_mtp_loss: 2.4680 +[titan] 2025-10-05 02:14:37,679 - root - INFO - lr: 4.7865e-05 gnorm: 1.21 [ 3:40:29<21:05:30] +[titan] 2025-10-05 02:14:48,570 - root - INFO - step: 5940 loss: 2.6562 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3137 global_avg_mtp_loss: 2.3426 +[titan] 2025-10-05 02:14:48,571 - root - INFO - lr: 4.7861e-05 gnorm: 1.27 [ 3:40:39<21:05:17] +[titan] 2025-10-05 02:14:59,517 - root - INFO - step: 5945 loss: 2.6955 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.3188 global_avg_mtp_loss: 2.3767 +[titan] 2025-10-05 02:14:59,517 - root - INFO - lr: 4.7857e-05 gnorm: 1.24 [ 3:40:50<21:05:05] +[titan] 2025-10-05 02:15:08,203 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:15:10,403 - root - INFO - step: 5950 loss: 2.6441 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3332 +[titan] 2025-10-05 02:15:10,403 - root - INFO - lr: 4.7853e-05 gnorm: 1.24 [ 3:41:01<21:04:52] +[titan] 2025-10-05 02:15:21,261 - root - INFO - step: 5955 loss: 2.6351 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3224 +[titan] 2025-10-05 02:15:21,261 - root - INFO - lr: 4.7850e-05 gnorm: 1.27 [ 3:41:12<21:04:40] +[titan] 2025-10-05 02:15:32,145 - root - INFO - step: 5960 loss: 2.5704 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2660 +[titan] 2025-10-05 02:15:32,146 - root - INFO - lr: 4.7846e-05 gnorm: 1.24 [ 3:41:23<21:04:27] +[titan] 2025-10-05 02:15:43,038 - root - INFO - step: 5965 loss: 2.6451 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.3134 global_avg_mtp_loss: 2.3317 +[titan] 2025-10-05 02:15:43,038 - root - INFO - lr: 4.7842e-05 gnorm: 1.24 [ 3:41:34<21:04:14] +[titan] 2025-10-05 02:15:53,932 - root - INFO - step: 5970 loss: 2.6446 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:15:53,932 - root - INFO - lr: 4.7838e-05 gnorm: 1.25 [ 3:41:45<21:04:02] +[titan] 2025-10-05 02:16:04,943 - root - INFO - step: 5975 loss: 2.6984 memory: 118.84GiB(85.28%) tps: 29,760 tflops: 412.88 mfu: 41.75% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3798 +[titan] 2025-10-05 02:16:04,943 - root - INFO - lr: 4.7834e-05 gnorm: 1.22 [ 3:41:56<21:03:50] +[titan] 2025-10-05 02:16:15,864 - root - INFO - step: 5980 loss: 2.6883 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.3187 global_avg_mtp_loss: 2.3697 +[titan] 2025-10-05 02:16:15,864 - root - INFO - lr: 4.7831e-05 gnorm: 1.23 [ 3:42:07<21:03:38] +[titan] 2025-10-05 02:16:26,743 - root - INFO - step: 5985 loss: 2.6999 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.3205 global_avg_mtp_loss: 2.3795 +[titan] 2025-10-05 02:16:26,743 - root - INFO - lr: 4.7827e-05 gnorm: 1.25 [ 3:42:18<21:03:25] +[titan] 2025-10-05 02:16:37,616 - root - INFO - step: 5990 loss: 2.6514 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3368 +[titan] 2025-10-05 02:16:37,616 - root - INFO - lr: 4.7823e-05 gnorm: 1.24 [ 3:42:28<21:03:12] +[titan] 2025-10-05 02:16:48,504 - root - INFO - step: 5995 loss: 2.6633 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.3143 global_avg_mtp_loss: 2.3490 +[titan] 2025-10-05 02:16:48,504 - root - INFO - lr: 4.7819e-05 gnorm: 1.24 [ 3:42:39<21:03:00] +[titan] 2025-10-05 02:16:57,228 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:16:59,424 - root - INFO - step: 6000 loss: 2.7331 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3235 global_avg_mtp_loss: 2.4096 +[titan] 2025-10-05 02:16:59,424 - root - INFO - lr: 4.7815e-05 gnorm: 1.20 [ 3:42:50<21:02:47] +[titan] 2025-10-05 02:17:10,295 - root - INFO - step: 6005 loss: 2.6202 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3105 +[titan] 2025-10-05 02:17:10,295 - root - INFO - lr: 4.7811e-05 gnorm: 1.18 [ 3:43:01<21:02:35] +[titan] 2025-10-05 02:17:21,201 - root - INFO - step: 6010 loss: 2.5634 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2608 +[titan] 2025-10-05 02:17:21,201 - root - INFO - lr: 4.7808e-05 gnorm: 1.22 [ 3:43:12<21:02:22] +[titan] 2025-10-05 02:17:32,082 - root - INFO - step: 6015 loss: 2.6412 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3292 +[titan] 2025-10-05 02:17:32,082 - root - INFO - lr: 4.7804e-05 gnorm: 1.19 [ 3:43:23<21:02:09] +[titan] 2025-10-05 02:17:42,964 - root - INFO - step: 6020 loss: 2.7137 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3228 global_avg_mtp_loss: 2.3909 +[titan] 2025-10-05 02:17:42,964 - root - INFO - lr: 4.7800e-05 gnorm: 1.23 [ 3:43:34<21:01:57] +[titan] 2025-10-05 02:17:53,873 - root - INFO - step: 6025 loss: 2.6409 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.3177 global_avg_mtp_loss: 2.3232 +[titan] 2025-10-05 02:17:53,873 - root - INFO - lr: 4.7796e-05 gnorm: 1.20 [ 3:43:45<21:01:44] +[titan] 2025-10-05 02:18:04,793 - root - INFO - step: 6030 loss: 2.6673 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.3165 global_avg_mtp_loss: 2.3508 +[titan] 2025-10-05 02:18:04,793 - root - INFO - lr: 4.7792e-05 gnorm: 1.27 [ 3:43:56<21:01:32] +[titan] 2025-10-05 02:18:15,648 - root - INFO - step: 6035 loss: 2.5627 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2603 +[titan] 2025-10-05 02:18:15,648 - root - INFO - lr: 4.7788e-05 gnorm: 1.20 [ 3:44:06<21:01:19] +[titan] 2025-10-05 02:18:26,520 - root - INFO - step: 6040 loss: 2.6300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3189 +[titan] 2025-10-05 02:18:26,520 - root - INFO - lr: 4.7784e-05 gnorm: 1.19 [ 3:44:17<21:01:07] +[titan] 2025-10-05 02:18:37,421 - root - INFO - step: 6045 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2925 +[titan] 2025-10-05 02:18:37,422 - root - INFO - lr: 4.7781e-05 gnorm: 1.23 [ 3:44:28<21:00:54] +[titan] 2025-10-05 02:18:46,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:18:48,292 - root - INFO - step: 6050 loss: 2.6234 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3111 global_avg_mtp_loss: 2.3122 +[titan] 2025-10-05 02:18:48,292 - root - INFO - lr: 4.7777e-05 gnorm: 1.22 [ 3:44:39<21:00:41] +[titan] 2025-10-05 02:18:59,214 - root - INFO - step: 6055 loss: 2.7909 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.3399 global_avg_mtp_loss: 2.4510 +[titan] 2025-10-05 02:18:59,214 - root - INFO - lr: 4.7773e-05 gnorm: 1.28 [ 3:44:50<21:00:29] +[titan] 2025-10-05 02:19:10,081 - root - INFO - step: 6060 loss: 2.7169 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3225 global_avg_mtp_loss: 2.3944 +[titan] 2025-10-05 02:19:10,081 - root - INFO - lr: 4.7769e-05 gnorm: 1.19 [ 3:45:01<21:00:16] +[titan] 2025-10-05 02:19:20,960 - root - INFO - step: 6065 loss: 2.5899 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3057 global_avg_mtp_loss: 2.2843 +[titan] 2025-10-05 02:19:20,961 - root - INFO - lr: 4.7765e-05 gnorm: 1.20 [ 3:45:12<21:00:04] +[titan] 2025-10-05 02:19:31,815 - root - INFO - step: 6070 loss: 2.5974 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2900 +[titan] 2025-10-05 02:19:31,815 - root - INFO - lr: 4.7761e-05 gnorm: 1.19 [ 3:45:23<20:59:51] +[titan] 2025-10-05 02:19:42,704 - root - INFO - step: 6075 loss: 2.5388 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2387 +[titan] 2025-10-05 02:19:42,704 - root - INFO - lr: 4.7757e-05 gnorm: 1.24 [ 3:45:34<20:59:38] +[titan] 2025-10-05 02:19:53,571 - root - INFO - step: 6080 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2553 +[titan] 2025-10-05 02:19:53,571 - root - INFO - lr: 4.7753e-05 gnorm: 1.24 [ 3:45:44<20:59:26] +[titan] 2025-10-05 02:20:04,484 - root - INFO - step: 6085 loss: 2.6574 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3425 +[titan] 2025-10-05 02:20:04,484 - root - INFO - lr: 4.7750e-05 gnorm: 1.25 [ 3:45:55<20:59:13] +[titan] 2025-10-05 02:20:15,352 - root - INFO - step: 6090 loss: 2.6004 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2933 +[titan] 2025-10-05 02:20:15,352 - root - INFO - lr: 4.7746e-05 gnorm: 1.29 [ 3:46:06<20:59:01] +[titan] 2025-10-05 02:20:26,230 - root - INFO - step: 6095 loss: 2.6515 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:20:26,230 - root - INFO - lr: 4.7742e-05 gnorm: 1.25 [ 3:46:17<20:58:48] +[titan] 2025-10-05 02:20:34,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:20:37,079 - root - INFO - step: 6100 loss: 2.6900 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3703 +[titan] 2025-10-05 02:20:37,079 - root - INFO - lr: 4.7738e-05 gnorm: 1.19 [ 3:46:28<20:58:35] +[titan] 2025-10-05 02:20:47,995 - root - INFO - step: 6105 loss: 2.7058 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.3207 global_avg_mtp_loss: 2.3851 +[titan] 2025-10-05 02:20:47,995 - root - INFO - lr: 4.7734e-05 gnorm: 1.26 [ 3:46:39<20:58:23] +[titan] 2025-10-05 02:20:58,928 - root - INFO - step: 6110 loss: 2.6693 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3537 +[titan] 2025-10-05 02:20:58,928 - root - INFO - lr: 4.7730e-05 gnorm: 1.27 [ 3:46:50<20:58:11] +[titan] 2025-10-05 02:21:09,804 - root - INFO - step: 6115 loss: 2.5456 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:21:09,804 - root - INFO - lr: 4.7726e-05 gnorm: 1.13 [ 3:47:01<20:57:58] +[titan] 2025-10-05 02:21:20,686 - root - INFO - step: 6120 loss: 2.6377 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.3133 global_avg_mtp_loss: 2.3244 +[titan] 2025-10-05 02:21:20,686 - root - INFO - lr: 4.7722e-05 gnorm: 1.17 [ 3:47:12<20:57:46] +[titan] 2025-10-05 02:21:31,544 - root - INFO - step: 6125 loss: 2.5803 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:21:31,544 - root - INFO - lr: 4.7718e-05 gnorm: 1.19 [ 3:47:22<20:57:33] +[titan] 2025-10-05 02:21:42,406 - root - INFO - step: 6130 loss: 2.6986 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3790 +[titan] 2025-10-05 02:21:42,406 - root - INFO - lr: 4.7714e-05 gnorm: 1.30 [ 3:47:33<20:57:20] +[titan] 2025-10-05 02:21:53,244 - root - INFO - step: 6135 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3086 +[titan] 2025-10-05 02:21:53,244 - root - INFO - lr: 4.7710e-05 gnorm: 1.24 [ 3:47:44<20:57:08] +[titan] 2025-10-05 02:22:04,175 - root - INFO - step: 6140 loss: 2.5814 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.3040 global_avg_mtp_loss: 2.2775 +[titan] 2025-10-05 02:22:04,175 - root - INFO - lr: 4.7707e-05 gnorm: 1.23 [ 3:47:55<20:56:55] +[titan] 2025-10-05 02:22:13,104 - root - INFO - Dumping profiler traces at step 6144 +[titan] 2025-10-05 02:22:13,141 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:22:15,343 - root - INFO - step: 6145 loss: 2.6735 memory: 118.84GiB(85.28%) tps: 29,341 tflops: 407.07 mfu: 41.16% global_avg_ntp_loss: 0.3156 global_avg_mtp_loss: 2.3580 +[titan] 2025-10-05 02:22:15,343 - root - INFO - lr: 4.7703e-05 gnorm: 1.26 [ 3:48:06<20:56:44] +[titan] 2025-10-05 02:22:24,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:22:26,217 - root - INFO - step: 6150 loss: 2.6490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3135 global_avg_mtp_loss: 2.3355 +[titan] 2025-10-05 02:22:26,217 - root - INFO - lr: 4.7699e-05 gnorm: 1.24 [ 3:48:17<20:56:32] +[titan] 2025-10-05 02:22:37,096 - root - INFO - step: 6155 loss: 2.6463 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.3123 global_avg_mtp_loss: 2.3340 +[titan] 2025-10-05 02:22:37,096 - root - INFO - lr: 4.7695e-05 gnorm: 1.18 [ 3:48:28<20:56:19] +[titan] 2025-10-05 02:22:47,962 - root - INFO - step: 6160 loss: 2.6975 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3173 global_avg_mtp_loss: 2.3802 +[titan] 2025-10-05 02:22:47,962 - root - INFO - lr: 4.7691e-05 gnorm: 1.25 [ 3:48:39<20:56:07] +[titan] 2025-10-05 02:22:58,842 - root - INFO - step: 6165 loss: 2.6719 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3152 global_avg_mtp_loss: 2.3568 +[titan] 2025-10-05 02:22:58,842 - root - INFO - lr: 4.7687e-05 gnorm: 1.27 [ 3:48:50<20:55:54] +[titan] 2025-10-05 02:23:09,781 - root - INFO - step: 6170 loss: 2.6832 memory: 118.84GiB(85.28%) tps: 29,957 tflops: 415.60 mfu: 42.02% global_avg_ntp_loss: 0.3183 global_avg_mtp_loss: 2.3649 +[titan] 2025-10-05 02:23:09,781 - root - INFO - lr: 4.7683e-05 gnorm: 1.18 [ 3:49:01<20:55:42] +[titan] 2025-10-05 02:23:20,657 - root - INFO - step: 6175 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2762 +[titan] 2025-10-05 02:23:20,657 - root - INFO - lr: 4.7679e-05 gnorm: 1.25 [ 3:49:11<20:55:29] +[titan] 2025-10-05 02:23:31,536 - root - INFO - step: 6180 loss: 2.6338 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.3119 global_avg_mtp_loss: 2.3219 +[titan] 2025-10-05 02:23:31,536 - root - INFO - lr: 4.7675e-05 gnorm: 1.21 [ 3:49:22<20:55:17] +[titan] 2025-10-05 02:23:42,416 - root - INFO - step: 6185 loss: 2.6751 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3167 global_avg_mtp_loss: 2.3584 +[titan] 2025-10-05 02:23:42,416 - root - INFO - lr: 4.7671e-05 gnorm: 1.23 [ 3:49:33<20:55:04] +[titan] 2025-10-05 02:23:53,282 - root - INFO - step: 6190 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:23:53,282 - root - INFO - lr: 4.7667e-05 gnorm: 1.94 [ 3:49:44<20:54:52] +[titan] 2025-10-05 02:24:04,176 - root - INFO - step: 6195 loss: 2.6090 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.3001 +[titan] 2025-10-05 02:24:04,177 - root - INFO - lr: 4.7663e-05 gnorm: 1.30 [ 3:49:55<20:54:39] +[titan] 2025-10-05 02:24:12,861 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:24:15,046 - root - INFO - step: 6200 loss: 2.6013 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2936 +[titan] 2025-10-05 02:24:15,047 - root - INFO - lr: 4.7659e-05 gnorm: 1.22 [ 3:50:06<20:54:26] +[titan] 2025-10-05 02:24:25,976 - root - INFO - step: 6205 loss: 2.6406 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:24:25,976 - root - INFO - lr: 4.7655e-05 gnorm: 1.21 [ 3:50:17<20:54:14] +[titan] 2025-10-05 02:24:36,842 - root - INFO - step: 6210 loss: 2.5418 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 02:24:36,842 - root - INFO - lr: 4.7651e-05 gnorm: 1.17 [ 3:50:28<20:54:02] +[titan] 2025-10-05 02:24:47,725 - root - INFO - step: 6215 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2744 +[titan] 2025-10-05 02:24:47,725 - root - INFO - lr: 4.7647e-05 gnorm: 1.20 [ 3:50:39<20:53:49] +[titan] 2025-10-05 02:24:58,595 - root - INFO - step: 6220 loss: 2.6116 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 02:24:58,595 - root - INFO - lr: 4.7643e-05 gnorm: 1.26 [ 3:50:49<20:53:37] +[titan] 2025-10-05 02:25:09,462 - root - INFO - step: 6225 loss: 2.6255 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3097 global_avg_mtp_loss: 2.3158 +[titan] 2025-10-05 02:25:09,462 - root - INFO - lr: 4.7639e-05 gnorm: 1.26 [ 3:51:00<20:53:24] +[titan] 2025-10-05 02:25:20,338 - root - INFO - step: 6230 loss: 2.6316 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3207 +[titan] 2025-10-05 02:25:20,338 - root - INFO - lr: 4.7635e-05 gnorm: 1.26 [ 3:51:11<20:53:11] +[titan] 2025-10-05 02:25:31,243 - root - INFO - step: 6235 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:25:31,243 - root - INFO - lr: 4.7631e-05 gnorm: 1.24 [ 3:51:22<20:52:59] +[titan] 2025-10-05 02:25:42,123 - root - INFO - step: 6240 loss: 2.6737 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3161 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:25:42,123 - root - INFO - lr: 4.7627e-05 gnorm: 1.21 [ 3:51:33<20:52:47] +[titan] 2025-10-05 02:25:53,008 - root - INFO - step: 6245 loss: 2.6264 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3126 global_avg_mtp_loss: 2.3138 +[titan] 2025-10-05 02:25:53,008 - root - INFO - lr: 4.7623e-05 gnorm: 1.21 [ 3:51:44<20:52:34] +[titan] 2025-10-05 02:26:01,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:26:03,944 - root - INFO - step: 6250 loss: 2.6166 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 02:26:03,944 - root - INFO - lr: 4.7619e-05 gnorm: 1.24 [ 3:51:55<20:52:22] +[titan] 2025-10-05 02:26:14,837 - root - INFO - step: 6255 loss: 2.5876 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2818 +[titan] 2025-10-05 02:26:14,837 - root - INFO - lr: 4.7615e-05 gnorm: 1.18 [ 3:52:06<20:52:10] +[titan] 2025-10-05 02:26:25,726 - root - INFO - step: 6260 loss: 2.7070 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3204 global_avg_mtp_loss: 2.3866 +[titan] 2025-10-05 02:26:25,726 - root - INFO - lr: 4.7611e-05 gnorm: 1.20 [ 3:52:17<20:51:57] +[titan] 2025-10-05 02:26:36,613 - root - INFO - step: 6265 loss: 2.6830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.3168 global_avg_mtp_loss: 2.3662 +[titan] 2025-10-05 02:26:36,613 - root - INFO - lr: 4.7607e-05 gnorm: 1.22 [ 3:52:27<20:51:45] +[titan] 2025-10-05 02:26:47,499 - root - INFO - step: 6270 loss: 2.4995 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2939 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 02:26:47,499 - root - INFO - lr: 4.7603e-05 gnorm: 1.22 [ 3:52:38<20:51:32] +[titan] 2025-10-05 02:26:58,361 - root - INFO - step: 6275 loss: 2.5337 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2345 +[titan] 2025-10-05 02:26:58,361 - root - INFO - lr: 4.7599e-05 gnorm: 1.19 [ 3:52:49<20:51:20] +[titan] 2025-10-05 02:27:09,255 - root - INFO - step: 6280 loss: 2.5465 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 02:27:09,255 - root - INFO - lr: 4.7595e-05 gnorm: 1.18 [ 3:53:00<20:51:07] +[titan] 2025-10-05 02:27:20,123 - root - INFO - step: 6285 loss: 2.6725 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3166 global_avg_mtp_loss: 2.3560 +[titan] 2025-10-05 02:27:20,123 - root - INFO - lr: 4.7591e-05 gnorm: 1.25 [ 3:53:11<20:50:55] +[titan] 2025-10-05 02:27:30,985 - root - INFO - step: 6290 loss: 2.6086 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.2999 +[titan] 2025-10-05 02:27:30,985 - root - INFO - lr: 4.7587e-05 gnorm: 1.20 [ 3:53:22<20:50:42] +[titan] 2025-10-05 02:27:41,851 - root - INFO - step: 6295 loss: 2.6405 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3289 +[titan] 2025-10-05 02:27:41,851 - root - INFO - lr: 4.7583e-05 gnorm: 1.19 [ 3:53:33<20:50:29] +[titan] 2025-10-05 02:27:50,574 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:27:52,765 - root - INFO - step: 6300 loss: 2.6057 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2983 +[titan] 2025-10-05 02:27:52,765 - root - INFO - lr: 4.7579e-05 gnorm: 1.25 [ 3:53:44<20:50:17] +[titan] 2025-10-05 02:28:03,660 - root - INFO - step: 6305 loss: 2.6038 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3067 global_avg_mtp_loss: 2.2971 +[titan] 2025-10-05 02:28:03,661 - root - INFO - lr: 4.7575e-05 gnorm: 1.34 [ 3:53:54<20:50:05] +[titan] 2025-10-05 02:28:14,554 - root - INFO - step: 6310 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3056 +[titan] 2025-10-05 02:28:14,554 - root - INFO - lr: 4.7571e-05 gnorm: 1.26 [ 3:54:05<20:49:52] +[titan] 2025-10-05 02:28:25,460 - root - INFO - step: 6315 loss: 2.7106 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.3208 global_avg_mtp_loss: 2.3897 +[titan] 2025-10-05 02:28:25,460 - root - INFO - lr: 4.7567e-05 gnorm: 1.30 [ 3:54:16<20:49:40] +[titan] 2025-10-05 02:28:36,327 - root - INFO - step: 6320 loss: 2.6294 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3180 +[titan] 2025-10-05 02:28:36,327 - root - INFO - lr: 4.7563e-05 gnorm: 1.20 [ 3:54:27<20:49:28] +[titan] 2025-10-05 02:28:47,212 - root - INFO - step: 6325 loss: 2.5971 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:28:47,212 - root - INFO - lr: 4.7559e-05 gnorm: 1.24 [ 3:54:38<20:49:15] +[titan] 2025-10-05 02:28:58,148 - root - INFO - step: 6330 loss: 2.5947 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2896 +[titan] 2025-10-05 02:28:58,148 - root - INFO - lr: 4.7555e-05 gnorm: 1.17 [ 3:54:49<20:49:03] +[titan] 2025-10-05 02:29:09,045 - root - INFO - step: 6335 loss: 2.6560 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.3139 global_avg_mtp_loss: 2.3421 +[titan] 2025-10-05 02:29:09,045 - root - INFO - lr: 4.7551e-05 gnorm: 1.23 [ 3:55:00<20:48:51] +[titan] 2025-10-05 02:29:19,929 - root - INFO - step: 6340 loss: 2.5919 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.2841 +[titan] 2025-10-05 02:29:19,929 - root - INFO - lr: 4.7547e-05 gnorm: 1.21 [ 3:55:11<20:48:38] +[titan] 2025-10-05 02:29:30,803 - root - INFO - step: 6345 loss: 2.6337 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.3109 global_avg_mtp_loss: 2.3228 +[titan] 2025-10-05 02:29:30,803 - root - INFO - lr: 4.7543e-05 gnorm: 1.20 [ 3:55:22<20:48:26] +[titan] 2025-10-05 02:29:39,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:29:41,698 - root - INFO - step: 6350 loss: 2.6001 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.3090 global_avg_mtp_loss: 2.2911 +[titan] 2025-10-05 02:29:41,698 - root - INFO - lr: 4.7539e-05 gnorm: 1.21 [ 3:55:33<20:48:13] +[titan] 2025-10-05 02:29:52,582 - root - INFO - step: 6355 loss: 2.5766 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2722 +[titan] 2025-10-05 02:29:52,582 - root - INFO - lr: 4.7535e-05 gnorm: 1.31 [ 3:55:43<20:48:01] +[titan] 2025-10-05 02:30:03,454 - root - INFO - step: 6360 loss: 2.6402 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3288 +[titan] 2025-10-05 02:30:03,454 - root - INFO - lr: 4.7531e-05 gnorm: 1.19 [ 3:55:54<20:47:48] +[titan] 2025-10-05 02:30:14,405 - root - INFO - step: 6365 loss: 2.5756 memory: 118.84GiB(85.28%) tps: 29,925 tflops: 415.16 mfu: 41.98% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2718 +[titan] 2025-10-05 02:30:14,405 - root - INFO - lr: 4.7527e-05 gnorm: 1.21 [ 3:56:05<20:47:36] +[titan] 2025-10-05 02:30:25,300 - root - INFO - step: 6370 loss: 2.6721 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.3155 global_avg_mtp_loss: 2.3566 +[titan] 2025-10-05 02:30:25,301 - root - INFO - lr: 4.7523e-05 gnorm: 1.26 [ 3:56:16<20:47:24] +[titan] 2025-10-05 02:30:36,189 - root - INFO - step: 6375 loss: 2.6701 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3548 +[titan] 2025-10-05 02:30:36,189 - root - INFO - lr: 4.7519e-05 gnorm: 1.26 [ 3:56:27<20:47:12] +[titan] 2025-10-05 02:30:47,063 - root - INFO - step: 6380 loss: 2.6577 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3424 +[titan] 2025-10-05 02:30:47,063 - root - INFO - lr: 4.7514e-05 gnorm: 1.19 [ 3:56:38<20:46:59] +[titan] 2025-10-05 02:30:57,930 - root - INFO - step: 6385 loss: 2.5739 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2703 +[titan] 2025-10-05 02:30:57,930 - root - INFO - lr: 4.7510e-05 gnorm: 1.20 [ 3:56:49<20:46:47] +[titan] 2025-10-05 02:31:08,797 - root - INFO - step: 6390 loss: 2.6461 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3130 global_avg_mtp_loss: 2.3331 +[titan] 2025-10-05 02:31:08,797 - root - INFO - lr: 4.7506e-05 gnorm: 1.18 [ 3:57:00<20:46:34] +[titan] 2025-10-05 02:31:19,713 - root - INFO - step: 6395 loss: 2.6359 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.3114 global_avg_mtp_loss: 2.3246 +[titan] 2025-10-05 02:31:19,713 - root - INFO - lr: 4.7502e-05 gnorm: 1.18 [ 3:57:11<20:46:22] +[titan] 2025-10-05 02:31:28,410 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:31:30,590 - root - INFO - step: 6400 loss: 2.6427 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.3304 +[titan] 2025-10-05 02:31:30,590 - root - INFO - lr: 4.7498e-05 gnorm: 1.20 [ 3:57:21<20:46:09] +[titan] 2025-10-05 02:31:41,458 - root - INFO - step: 6405 loss: 2.5702 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:31:41,458 - root - INFO - lr: 4.7494e-05 gnorm: 1.26 [ 3:57:32<20:45:57] +[titan] 2025-10-05 02:31:52,328 - root - INFO - step: 6410 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2830 +[titan] 2025-10-05 02:31:52,329 - root - INFO - lr: 4.7490e-05 gnorm: 1.28 [ 3:57:43<20:45:45] +[titan] 2025-10-05 02:32:03,197 - root - INFO - step: 6415 loss: 2.5789 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2752 +[titan] 2025-10-05 02:32:03,197 - root - INFO - lr: 4.7486e-05 gnorm: 1.24 [ 3:57:54<20:45:32] +[titan] 2025-10-05 02:32:14,082 - root - INFO - step: 6420 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2355 +[titan] 2025-10-05 02:32:14,082 - root - INFO - lr: 4.7482e-05 gnorm: 1.22 [ 3:58:05<20:45:20] +[titan] 2025-10-05 02:32:25,006 - root - INFO - step: 6425 loss: 2.6729 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3154 global_avg_mtp_loss: 2.3575 +[titan] 2025-10-05 02:32:25,007 - root - INFO - lr: 4.7478e-05 gnorm: 1.28 [ 3:58:16<20:45:07] +[titan] 2025-10-05 02:32:35,882 - root - INFO - step: 6430 loss: 2.5812 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2763 +[titan] 2025-10-05 02:32:35,883 - root - INFO - lr: 4.7474e-05 gnorm: 1.22 [ 3:58:27<20:44:55] +[titan] 2025-10-05 02:32:46,767 - root - INFO - step: 6435 loss: 2.5922 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2859 +[titan] 2025-10-05 02:32:46,767 - root - INFO - lr: 4.7469e-05 gnorm: 1.22 [ 3:58:38<20:44:43] +[titan] 2025-10-05 02:32:57,635 - root - INFO - step: 6440 loss: 2.5566 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2552 +[titan] 2025-10-05 02:32:57,635 - root - INFO - lr: 4.7465e-05 gnorm: 1.19 [ 3:58:48<20:44:30] +[titan] 2025-10-05 02:33:08,509 - root - INFO - step: 6445 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.3471 +[titan] 2025-10-05 02:33:08,509 - root - INFO - lr: 4.7461e-05 gnorm: 1.18 [ 3:58:59<20:44:18] +[titan] 2025-10-05 02:33:17,190 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:33:19,369 - root - INFO - step: 6450 loss: 2.5929 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2865 +[titan] 2025-10-05 02:33:19,369 - root - INFO - lr: 4.7457e-05 gnorm: 1.22 [ 3:59:10<20:44:05] +[titan] 2025-10-05 02:33:30,229 - root - INFO - step: 6455 loss: 2.6465 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.3132 global_avg_mtp_loss: 2.3333 +[titan] 2025-10-05 02:33:30,229 - root - INFO - lr: 4.7453e-05 gnorm: 1.20 [ 3:59:21<20:43:53] +[titan] 2025-10-05 02:33:41,125 - root - INFO - step: 6460 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2613 +[titan] 2025-10-05 02:33:41,125 - root - INFO - lr: 4.7449e-05 gnorm: 1.21 [ 3:59:32<20:43:40] +[titan] 2025-10-05 02:33:51,972 - root - INFO - step: 6465 loss: 2.6340 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.3110 global_avg_mtp_loss: 2.3230 +[titan] 2025-10-05 02:33:51,972 - root - INFO - lr: 4.7445e-05 gnorm: 1.25 [ 3:59:43<20:43:28] +[titan] 2025-10-05 02:34:02,829 - root - INFO - step: 6470 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:34:02,830 - root - INFO - lr: 4.7441e-05 gnorm: 1.22 [ 3:59:54<20:43:15] +[titan] 2025-10-05 02:34:13,713 - root - INFO - step: 6475 loss: 2.6622 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3146 global_avg_mtp_loss: 2.3477 +[titan] 2025-10-05 02:34:13,713 - root - INFO - lr: 4.7436e-05 gnorm: 1.22 [ 4:00:05<20:43:03] +[titan] 2025-10-05 02:34:24,581 - root - INFO - step: 6480 loss: 2.5985 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2920 +[titan] 2025-10-05 02:34:24,581 - root - INFO - lr: 4.7432e-05 gnorm: 1.22 [ 4:00:15<20:42:51] +[titan] 2025-10-05 02:34:35,430 - root - INFO - step: 6485 loss: 2.5699 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2657 +[titan] 2025-10-05 02:34:35,430 - root - INFO - lr: 4.7428e-05 gnorm: 1.26 [ 4:00:26<20:42:38] +[titan] 2025-10-05 02:34:46,317 - root - INFO - step: 6490 loss: 2.5393 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:34:46,318 - root - INFO - lr: 4.7424e-05 gnorm: 1.22 [ 4:00:37<20:42:26] +[titan] 2025-10-05 02:34:57,192 - root - INFO - step: 6495 loss: 2.6369 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3116 global_avg_mtp_loss: 2.3253 +[titan] 2025-10-05 02:34:57,193 - root - INFO - lr: 4.7420e-05 gnorm: 1.23 [ 4:00:48<20:42:13] +[titan] 2025-10-05 02:35:05,878 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:35:08,068 - root - INFO - step: 6500 loss: 2.5435 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.3003 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 02:35:08,068 - root - INFO - lr: 4.7416e-05 gnorm: 1.25 [ 4:00:59<20:42:01] +[titan] 2025-10-05 02:35:18,953 - root - INFO - step: 6505 loss: 2.6050 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.3071 global_avg_mtp_loss: 2.2979 +[titan] 2025-10-05 02:35:18,953 - root - INFO - lr: 4.7412e-05 gnorm: 1.26 [ 4:01:10<20:41:49] +[titan] 2025-10-05 02:35:29,825 - root - INFO - step: 6510 loss: 2.5818 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2782 +[titan] 2025-10-05 02:35:29,825 - root - INFO - lr: 4.7407e-05 gnorm: 1.19 [ 4:01:21<20:41:36] +[titan] 2025-10-05 02:35:40,705 - root - INFO - step: 6515 loss: 2.5167 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2195 +[titan] 2025-10-05 02:35:40,705 - root - INFO - lr: 4.7403e-05 gnorm: 1.18 [ 4:01:32<20:41:24] +[titan] 2025-10-05 02:35:51,579 - root - INFO - step: 6520 loss: 2.6889 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3186 global_avg_mtp_loss: 2.3704 +[titan] 2025-10-05 02:35:51,580 - root - INFO - lr: 4.7399e-05 gnorm: 1.25 [ 4:01:42<20:41:11] +[titan] 2025-10-05 02:36:02,521 - root - INFO - step: 6525 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.3046 global_avg_mtp_loss: 2.2801 +[titan] 2025-10-05 02:36:02,521 - root - INFO - lr: 4.7395e-05 gnorm: 1.21 [ 4:01:53<20:40:59] +[titan] 2025-10-05 02:36:13,407 - root - INFO - step: 6530 loss: 2.5064 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2103 +[titan] 2025-10-05 02:36:13,407 - root - INFO - lr: 4.7391e-05 gnorm: 1.17 [ 4:02:04<20:40:47] +[titan] 2025-10-05 02:36:24,288 - root - INFO - step: 6535 loss: 2.6623 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:36:24,289 - root - INFO - lr: 4.7387e-05 gnorm: 1.26 [ 4:02:15<20:40:35] +[titan] 2025-10-05 02:36:35,150 - root - INFO - step: 6540 loss: 2.6944 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.3185 global_avg_mtp_loss: 2.3760 +[titan] 2025-10-05 02:36:35,150 - root - INFO - lr: 4.7382e-05 gnorm: 1.22 [ 4:02:26<20:40:22] +[titan] 2025-10-05 02:36:46,030 - root - INFO - step: 6545 loss: 2.5975 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2909 +[titan] 2025-10-05 02:36:46,030 - root - INFO - lr: 4.7378e-05 gnorm: 1.25 [ 4:02:37<20:40:10] +[titan] 2025-10-05 02:36:54,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:36:56,903 - root - INFO - step: 6550 loss: 2.5802 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2749 +[titan] 2025-10-05 02:36:56,904 - root - INFO - lr: 4.7374e-05 gnorm: 1.18 [ 4:02:48<20:39:57] +[titan] 2025-10-05 02:37:07,790 - root - INFO - step: 6555 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 02:37:07,790 - root - INFO - lr: 4.7370e-05 gnorm: 1.23 [ 4:02:59<20:39:45] +[titan] 2025-10-05 02:37:18,673 - root - INFO - step: 6560 loss: 2.6310 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3210 +[titan] 2025-10-05 02:37:18,673 - root - INFO - lr: 4.7366e-05 gnorm: 1.22 [ 4:03:09<20:39:33] +[titan] 2025-10-05 02:37:29,519 - root - INFO - step: 6565 loss: 2.6348 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3242 +[titan] 2025-10-05 02:37:29,520 - root - INFO - lr: 4.7361e-05 gnorm: 1.24 [ 4:03:20<20:39:20] +[titan] 2025-10-05 02:37:40,400 - root - INFO - step: 6570 loss: 2.5419 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2417 +[titan] 2025-10-05 02:37:40,400 - root - INFO - lr: 4.7357e-05 gnorm: 1.19 [ 4:03:31<20:39:08] +[titan] 2025-10-05 02:37:51,268 - root - INFO - step: 6575 loss: 2.5865 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2812 +[titan] 2025-10-05 02:37:51,269 - root - INFO - lr: 4.7353e-05 gnorm: 1.28 [ 4:03:42<20:38:55] +[titan] 2025-10-05 02:38:02,128 - root - INFO - step: 6580 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2661 +[titan] 2025-10-05 02:38:02,128 - root - INFO - lr: 4.7349e-05 gnorm: 1.20 [ 4:03:53<20:38:43] +[titan] 2025-10-05 02:38:13,063 - root - INFO - step: 6585 loss: 2.6520 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.3147 global_avg_mtp_loss: 2.3373 +[titan] 2025-10-05 02:38:13,063 - root - INFO - lr: 4.7345e-05 gnorm: 1.24 [ 4:04:04<20:38:31] +[titan] 2025-10-05 02:38:23,973 - root - INFO - step: 6590 loss: 2.6349 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.3106 global_avg_mtp_loss: 2.3243 +[titan] 2025-10-05 02:38:23,973 - root - INFO - lr: 4.7340e-05 gnorm: 1.19 [ 4:04:15<20:38:19] +[titan] 2025-10-05 02:38:34,826 - root - INFO - step: 6595 loss: 2.7415 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3321 global_avg_mtp_loss: 2.4095 +[titan] 2025-10-05 02:38:34,826 - root - INFO - lr: 4.7336e-05 gnorm: 1.21 [ 4:04:26<20:38:06] +[titan] 2025-10-05 02:38:43,495 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:38:45,682 - root - INFO - step: 6600 loss: 2.5758 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3042 global_avg_mtp_loss: 2.2716 +[titan] 2025-10-05 02:38:45,682 - root - INFO - lr: 4.7332e-05 gnorm: 1.18 [ 4:04:36<20:37:54] +[titan] 2025-10-05 02:38:56,550 - root - INFO - step: 6605 loss: 2.5294 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2310 +[titan] 2025-10-05 02:38:56,550 - root - INFO - lr: 4.7328e-05 gnorm: 1.19 [ 4:04:47<20:37:41] +[titan] 2025-10-05 02:39:07,416 - root - INFO - step: 6610 loss: 2.5451 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2447 +[titan] 2025-10-05 02:39:07,416 - root - INFO - lr: 4.7324e-05 gnorm: 1.20 [ 4:04:58<20:37:29] +[titan] 2025-10-05 02:39:18,347 - root - INFO - step: 6615 loss: 2.7044 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.3198 global_avg_mtp_loss: 2.3846 +[titan] 2025-10-05 02:39:18,347 - root - INFO - lr: 4.7319e-05 gnorm: 1.23 [ 4:05:09<20:37:17] +[titan] 2025-10-05 02:39:29,249 - root - INFO - step: 6620 loss: 2.5846 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.3048 global_avg_mtp_loss: 2.2799 +[titan] 2025-10-05 02:39:29,249 - root - INFO - lr: 4.7315e-05 gnorm: 1.16 [ 4:05:20<20:37:05] +[titan] 2025-10-05 02:39:40,113 - root - INFO - step: 6625 loss: 2.6491 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.3120 global_avg_mtp_loss: 2.3371 +[titan] 2025-10-05 02:39:40,113 - root - INFO - lr: 4.7311e-05 gnorm: 1.27 [ 4:05:31<20:36:52] +[titan] 2025-10-05 02:39:50,990 - root - INFO - step: 6630 loss: 2.5891 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2838 +[titan] 2025-10-05 02:39:50,990 - root - INFO - lr: 4.7307e-05 gnorm: 1.21 [ 4:05:42<20:36:40] +[titan] 2025-10-05 02:40:01,853 - root - INFO - step: 6635 loss: 2.6888 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3169 global_avg_mtp_loss: 2.3719 +[titan] 2025-10-05 02:40:01,853 - root - INFO - lr: 4.7302e-05 gnorm: 1.21 [ 4:05:53<20:36:28] +[titan] 2025-10-05 02:40:12,718 - root - INFO - step: 6640 loss: 2.5610 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2587 +[titan] 2025-10-05 02:40:12,718 - root - INFO - lr: 4.7298e-05 gnorm: 1.19 [ 4:06:04<20:36:15] +[titan] 2025-10-05 02:40:23,628 - root - INFO - step: 6645 loss: 2.5680 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.3036 global_avg_mtp_loss: 2.2644 +[titan] 2025-10-05 02:40:23,628 - root - INFO - lr: 4.7294e-05 gnorm: 1.27 [ 4:06:14<20:36:03] +[titan] 2025-10-05 02:40:32,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:40:34,565 - root - INFO - step: 6650 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.3119 +[titan] 2025-10-05 02:40:34,566 - root - INFO - lr: 4.7290e-05 gnorm: 1.21 [ 4:06:25<20:35:51] +[titan] 2025-10-05 02:40:45,524 - root - INFO - step: 6655 loss: 2.6619 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3474 +[titan] 2025-10-05 02:40:45,524 - root - INFO - lr: 4.7285e-05 gnorm: 1.16 [ 4:06:36<20:35:39] +[titan] 2025-10-05 02:40:47,877 - root - INFO - Dumping profiler traces at step 6656 +[titan] 2025-10-05 02:40:47,912 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 02:40:56,627 - root - INFO - step: 6660 loss: 2.5796 memory: 118.84GiB(85.28%) tps: 29,513 tflops: 409.44 mfu: 41.40% global_avg_ntp_loss: 0.3032 global_avg_mtp_loss: 2.2764 +[titan] 2025-10-05 02:40:56,627 - root - INFO - lr: 4.7281e-05 gnorm: 1.18 [ 4:06:47<20:35:28] +[titan] 2025-10-05 02:41:07,500 - root - INFO - step: 6665 loss: 2.5859 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2808 +[titan] 2025-10-05 02:41:07,500 - root - INFO - lr: 4.7277e-05 gnorm: 1.21 [ 4:06:58<20:35:16] +[titan] 2025-10-05 02:41:18,462 - root - INFO - step: 6670 loss: 2.5619 memory: 118.84GiB(85.28%) tps: 29,894 tflops: 414.73 mfu: 41.93% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:41:18,462 - root - INFO - lr: 4.7273e-05 gnorm: 1.20 [ 4:07:09<20:35:04] +[titan] 2025-10-05 02:41:29,324 - root - INFO - step: 6675 loss: 2.4816 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.1887 +[titan] 2025-10-05 02:41:29,325 - root - INFO - lr: 4.7268e-05 gnorm: 1.20 [ 4:07:20<20:34:51] +[titan] 2025-10-05 02:41:40,209 - root - INFO - step: 6680 loss: 2.6410 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.3131 global_avg_mtp_loss: 2.3279 +[titan] 2025-10-05 02:41:40,209 - root - INFO - lr: 4.7264e-05 gnorm: 1.26 [ 4:07:31<20:34:39] +[titan] 2025-10-05 02:41:51,110 - root - INFO - step: 6685 loss: 2.6200 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3116 +[titan] 2025-10-05 02:41:51,110 - root - INFO - lr: 4.7260e-05 gnorm: 1.25 [ 4:07:42<20:34:27] +[titan] 2025-10-05 02:42:01,973 - root - INFO - step: 6690 loss: 2.6096 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3014 +[titan] 2025-10-05 02:42:01,973 - root - INFO - lr: 4.7256e-05 gnorm: 1.20 [ 4:07:53<20:34:15] +[titan] 2025-10-05 02:42:12,862 - root - INFO - step: 6695 loss: 2.5175 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2207 +[titan] 2025-10-05 02:42:12,862 - root - INFO - lr: 4.7251e-05 gnorm: 1.18 [ 4:08:04<20:34:02] +[titan] 2025-10-05 02:42:21,626 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:42:23,816 - root - INFO - step: 6700 loss: 2.6088 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3010 +[titan] 2025-10-05 02:42:23,816 - root - INFO - lr: 4.7247e-05 gnorm: 1.21 [ 4:08:15<20:33:50] +[titan] 2025-10-05 02:42:34,693 - root - INFO - step: 6705 loss: 2.6071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.2991 +[titan] 2025-10-05 02:42:34,693 - root - INFO - lr: 4.7243e-05 gnorm: 1.19 [ 4:08:25<20:33:38] +[titan] 2025-10-05 02:42:45,561 - root - INFO - step: 6710 loss: 2.5118 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2161 +[titan] 2025-10-05 02:42:45,561 - root - INFO - lr: 4.7238e-05 gnorm: 1.24 [ 4:08:36<20:33:26] +[titan] 2025-10-05 02:42:56,442 - root - INFO - step: 6715 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.2997 +[titan] 2025-10-05 02:42:56,442 - root - INFO - lr: 4.7234e-05 gnorm: 1.21 [ 4:08:47<20:33:13] +[titan] 2025-10-05 02:43:07,287 - root - INFO - step: 6720 loss: 2.5570 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2554 +[titan] 2025-10-05 02:43:07,287 - root - INFO - lr: 4.7230e-05 gnorm: 1.22 [ 4:08:58<20:33:01] +[titan] 2025-10-05 02:43:18,136 - root - INFO - step: 6725 loss: 2.5707 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.3021 global_avg_mtp_loss: 2.2686 +[titan] 2025-10-05 02:43:18,136 - root - INFO - lr: 4.7226e-05 gnorm: 1.53 [ 4:09:09<20:32:49] +[titan] 2025-10-05 02:43:29,070 - root - INFO - step: 6730 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2464 +[titan] 2025-10-05 02:43:29,070 - root - INFO - lr: 4.7221e-05 gnorm: 1.26 [ 4:09:20<20:32:37] +[titan] 2025-10-05 02:43:39,913 - root - INFO - step: 6735 loss: 2.5430 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 02:43:39,913 - root - INFO - lr: 4.7217e-05 gnorm: 1.22 [ 4:09:31<20:32:24] +[titan] 2025-10-05 02:43:50,772 - root - INFO - step: 6740 loss: 2.5235 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2275 +[titan] 2025-10-05 02:43:50,772 - root - INFO - lr: 4.7213e-05 gnorm: 1.21 [ 4:09:42<20:32:12] +[titan] 2025-10-05 02:44:01,659 - root - INFO - step: 6745 loss: 2.6439 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3121 global_avg_mtp_loss: 2.3318 +[titan] 2025-10-05 02:44:01,659 - root - INFO - lr: 4.7208e-05 gnorm: 1.19 [ 4:09:52<20:31:59] +[titan] 2025-10-05 02:44:10,336 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:44:12,526 - root - INFO - step: 6750 loss: 2.5875 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3044 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 02:44:12,526 - root - INFO - lr: 4.7204e-05 gnorm: 1.21 [ 4:10:03<20:31:47] +[titan] 2025-10-05 02:44:23,443 - root - INFO - step: 6755 loss: 2.4956 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2014 +[titan] 2025-10-05 02:44:23,443 - root - INFO - lr: 4.7200e-05 gnorm: 1.19 [ 4:10:14<20:31:35] +[titan] 2025-10-05 02:44:34,289 - root - INFO - step: 6760 loss: 2.5401 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2414 +[titan] 2025-10-05 02:44:34,289 - root - INFO - lr: 4.7196e-05 gnorm: 1.22 [ 4:10:25<20:31:23] +[titan] 2025-10-05 02:44:45,167 - root - INFO - step: 6765 loss: 2.5998 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.2924 +[titan] 2025-10-05 02:44:45,167 - root - INFO - lr: 4.7191e-05 gnorm: 1.30 [ 4:10:36<20:31:10] +[titan] 2025-10-05 02:44:56,029 - root - INFO - step: 6770 loss: 2.5743 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2714 +[titan] 2025-10-05 02:44:56,029 - root - INFO - lr: 4.7187e-05 gnorm: 1.21 [ 4:10:47<20:30:58] +[titan] 2025-10-05 02:45:06,886 - root - INFO - step: 6775 loss: 2.5839 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.3045 global_avg_mtp_loss: 2.2794 +[titan] 2025-10-05 02:45:06,886 - root - INFO - lr: 4.7183e-05 gnorm: 1.17 [ 4:10:58<20:30:46] +[titan] 2025-10-05 02:45:17,790 - root - INFO - step: 6780 loss: 2.5182 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2228 +[titan] 2025-10-05 02:45:17,790 - root - INFO - lr: 4.7178e-05 gnorm: 1.24 [ 4:11:09<20:30:33] +[titan] 2025-10-05 02:45:28,696 - root - INFO - step: 6785 loss: 2.5460 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2463 +[titan] 2025-10-05 02:45:28,696 - root - INFO - lr: 4.7174e-05 gnorm: 1.20 [ 4:11:19<20:30:21] +[titan] 2025-10-05 02:45:39,548 - root - INFO - step: 6790 loss: 2.5312 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2336 +[titan] 2025-10-05 02:45:39,548 - root - INFO - lr: 4.7170e-05 gnorm: 1.16 [ 4:11:30<20:30:09] +[titan] 2025-10-05 02:45:50,426 - root - INFO - step: 6795 loss: 2.5011 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:45:50,427 - root - INFO - lr: 4.7165e-05 gnorm: 1.18 [ 4:11:41<20:29:57] +[titan] 2025-10-05 02:45:59,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:46:01,303 - root - INFO - step: 6800 loss: 2.5676 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2649 +[titan] 2025-10-05 02:46:01,303 - root - INFO - lr: 4.7161e-05 gnorm: 1.23 [ 4:11:52<20:29:44] +[titan] 2025-10-05 02:46:12,162 - root - INFO - step: 6805 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 02:46:12,162 - root - INFO - lr: 4.7157e-05 gnorm: 1.28 [ 4:12:03<20:29:32] +[titan] 2025-10-05 02:46:23,159 - root - INFO - step: 6810 loss: 2.5521 memory: 118.84GiB(85.28%) tps: 29,798 tflops: 413.40 mfu: 41.80% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2496 +[titan] 2025-10-05 02:46:23,159 - root - INFO - lr: 4.7152e-05 gnorm: 1.22 [ 4:12:14<20:29:20] +[titan] 2025-10-05 02:46:34,046 - root - INFO - step: 6815 loss: 2.6067 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3060 global_avg_mtp_loss: 2.3007 +[titan] 2025-10-05 02:46:34,046 - root - INFO - lr: 4.7148e-05 gnorm: 1.17 [ 4:12:25<20:29:08] +[titan] 2025-10-05 02:46:44,908 - root - INFO - step: 6820 loss: 2.5848 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.3041 global_avg_mtp_loss: 2.2807 +[titan] 2025-10-05 02:46:44,908 - root - INFO - lr: 4.7143e-05 gnorm: 1.17 [ 4:12:36<20:28:56] +[titan] 2025-10-05 02:46:55,788 - root - INFO - step: 6825 loss: 2.5910 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2852 +[titan] 2025-10-05 02:46:55,788 - root - INFO - lr: 4.7139e-05 gnorm: 1.16 [ 4:12:47<20:28:44] +[titan] 2025-10-05 02:47:06,620 - root - INFO - step: 6830 loss: 2.5384 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 02:47:06,620 - root - INFO - lr: 4.7135e-05 gnorm: 1.18 [ 4:12:57<20:28:31] +[titan] 2025-10-05 02:47:17,469 - root - INFO - step: 6835 loss: 2.5733 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 02:47:17,469 - root - INFO - lr: 4.7130e-05 gnorm: 1.23 [ 4:13:08<20:28:19] +[titan] 2025-10-05 02:47:28,380 - root - INFO - step: 6840 loss: 2.5886 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2835 +[titan] 2025-10-05 02:47:28,380 - root - INFO - lr: 4.7126e-05 gnorm: 1.26 [ 4:13:19<20:28:07] +[titan] 2025-10-05 02:47:39,283 - root - INFO - step: 6845 loss: 2.5574 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2546 +[titan] 2025-10-05 02:47:39,283 - root - INFO - lr: 4.7122e-05 gnorm: 1.19 [ 4:13:30<20:27:55] +[titan] 2025-10-05 02:47:47,974 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:47:50,155 - root - INFO - step: 6850 loss: 2.5366 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2372 +[titan] 2025-10-05 02:47:50,155 - root - INFO - lr: 4.7117e-05 gnorm: 1.18 [ 4:13:41<20:27:42] +[titan] 2025-10-05 02:48:01,026 - root - INFO - step: 6855 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2793 +[titan] 2025-10-05 02:48:01,026 - root - INFO - lr: 4.7113e-05 gnorm: 1.17 [ 4:13:52<20:27:30] +[titan] 2025-10-05 02:48:11,906 - root - INFO - step: 6860 loss: 2.5452 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2453 +[titan] 2025-10-05 02:48:11,906 - root - INFO - lr: 4.7109e-05 gnorm: 1.21 [ 4:14:03<20:27:18] +[titan] 2025-10-05 02:48:22,764 - root - INFO - step: 6865 loss: 2.5903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2839 +[titan] 2025-10-05 02:48:22,764 - root - INFO - lr: 4.7104e-05 gnorm: 1.22 [ 4:14:14<20:27:05] +[titan] 2025-10-05 02:48:33,663 - root - INFO - step: 6870 loss: 2.5282 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2305 +[titan] 2025-10-05 02:48:33,663 - root - INFO - lr: 4.7100e-05 gnorm: 1.21 [ 4:14:24<20:26:53] +[titan] 2025-10-05 02:48:44,571 - root - INFO - step: 6875 loss: 2.5842 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2787 +[titan] 2025-10-05 02:48:44,571 - root - INFO - lr: 4.7095e-05 gnorm: 1.20 [ 4:14:35<20:26:41] +[titan] 2025-10-05 02:48:55,419 - root - INFO - step: 6880 loss: 2.5406 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2421 +[titan] 2025-10-05 02:48:55,419 - root - INFO - lr: 4.7091e-05 gnorm: 1.23 [ 4:14:46<20:26:29] +[titan] 2025-10-05 02:49:06,283 - root - INFO - step: 6885 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2631 +[titan] 2025-10-05 02:49:06,283 - root - INFO - lr: 4.7087e-05 gnorm: 1.33 [ 4:14:57<20:26:17] +[titan] 2025-10-05 02:49:17,141 - root - INFO - step: 6890 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.3033 global_avg_mtp_loss: 2.2679 +[titan] 2025-10-05 02:49:17,141 - root - INFO - lr: 4.7082e-05 gnorm: 1.20 [ 4:15:08<20:26:04] +[titan] 2025-10-05 02:49:28,078 - root - INFO - step: 6895 loss: 2.5483 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2478 +[titan] 2025-10-05 02:49:28,078 - root - INFO - lr: 4.7078e-05 gnorm: 1.18 [ 4:15:19<20:25:52] +[titan] 2025-10-05 02:49:36,751 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:49:38,935 - root - INFO - step: 6900 loss: 2.5983 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.3066 global_avg_mtp_loss: 2.2917 +[titan] 2025-10-05 02:49:38,935 - root - INFO - lr: 4.7073e-05 gnorm: 1.24 [ 4:15:30<20:25:40] +[titan] 2025-10-05 02:49:49,829 - root - INFO - step: 6905 loss: 2.5554 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2534 +[titan] 2025-10-05 02:49:49,830 - root - INFO - lr: 4.7069e-05 gnorm: 1.19 [ 4:15:41<20:25:28] +[titan] 2025-10-05 02:50:00,703 - root - INFO - step: 6910 loss: 2.6056 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.3081 global_avg_mtp_loss: 2.2975 +[titan] 2025-10-05 02:50:00,703 - root - INFO - lr: 4.7065e-05 gnorm: 1.20 [ 4:15:51<20:25:16] +[titan] 2025-10-05 02:50:11,565 - root - INFO - step: 6915 loss: 2.5960 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3064 global_avg_mtp_loss: 2.2897 +[titan] 2025-10-05 02:50:11,566 - root - INFO - lr: 4.7060e-05 gnorm: 1.24 [ 4:16:02<20:25:03] +[titan] 2025-10-05 02:50:22,427 - root - INFO - step: 6920 loss: 2.5924 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2856 +[titan] 2025-10-05 02:50:22,427 - root - INFO - lr: 4.7056e-05 gnorm: 1.19 [ 4:16:13<20:24:51] +[titan] 2025-10-05 02:50:33,321 - root - INFO - step: 6925 loss: 2.4869 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1938 +[titan] 2025-10-05 02:50:33,321 - root - INFO - lr: 4.7051e-05 gnorm: 1.18 [ 4:16:24<20:24:39] +[titan] 2025-10-05 02:50:44,192 - root - INFO - step: 6930 loss: 2.5543 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 02:50:44,193 - root - INFO - lr: 4.7047e-05 gnorm: 1.24 [ 4:16:35<20:24:27] +[titan] 2025-10-05 02:50:55,042 - root - INFO - step: 6935 loss: 2.5426 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2998 global_avg_mtp_loss: 2.2428 +[titan] 2025-10-05 02:50:55,042 - root - INFO - lr: 4.7043e-05 gnorm: 1.21 [ 4:16:46<20:24:14] +[titan] 2025-10-05 02:51:05,935 - root - INFO - step: 6940 loss: 2.6667 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3149 global_avg_mtp_loss: 2.3517 +[titan] 2025-10-05 02:51:05,935 - root - INFO - lr: 4.7038e-05 gnorm: 1.24 [ 4:16:57<20:24:02] +[titan] 2025-10-05 02:51:16,790 - root - INFO - step: 6945 loss: 2.6473 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.3127 global_avg_mtp_loss: 2.3345 +[titan] 2025-10-05 02:51:16,790 - root - INFO - lr: 4.7034e-05 gnorm: 1.27 [ 4:17:08<20:23:50] +[titan] 2025-10-05 02:51:25,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:51:27,720 - root - INFO - step: 6950 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2047 +[titan] 2025-10-05 02:51:27,721 - root - INFO - lr: 4.7029e-05 gnorm: 1.25 [ 4:17:18<20:23:38] +[titan] 2025-10-05 02:51:38,573 - root - INFO - step: 6955 loss: 2.6408 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3113 global_avg_mtp_loss: 2.3294 +[titan] 2025-10-05 02:51:38,573 - root - INFO - lr: 4.7025e-05 gnorm: 1.22 [ 4:17:29<20:23:26] +[titan] 2025-10-05 02:51:49,457 - root - INFO - step: 6960 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.3017 global_avg_mtp_loss: 2.2580 +[titan] 2025-10-05 02:51:49,458 - root - INFO - lr: 4.7020e-05 gnorm: 1.20 [ 4:17:40<20:23:13] +[titan] 2025-10-05 02:52:00,296 - root - INFO - step: 6965 loss: 2.6601 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3219 global_avg_mtp_loss: 2.3382 +[titan] 2025-10-05 02:52:00,296 - root - INFO - lr: 4.7016e-05 gnorm: 1.30 [ 4:17:51<20:23:01] +[titan] 2025-10-05 02:52:11,200 - root - INFO - step: 6970 loss: 2.5501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3016 global_avg_mtp_loss: 2.2484 +[titan] 2025-10-05 02:52:11,200 - root - INFO - lr: 4.7012e-05 gnorm: 1.25 [ 4:18:02<20:22:49] +[titan] 2025-10-05 02:52:22,059 - root - INFO - step: 6975 loss: 2.5650 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2627 +[titan] 2025-10-05 02:52:22,059 - root - INFO - lr: 4.7007e-05 gnorm: 1.18 [ 4:18:13<20:22:37] +[titan] 2025-10-05 02:52:32,953 - root - INFO - step: 6980 loss: 2.6856 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.3276 global_avg_mtp_loss: 2.3581 +[titan] 2025-10-05 02:52:32,953 - root - INFO - lr: 4.7003e-05 gnorm: 1.24 [ 4:18:24<20:22:25] +[titan] 2025-10-05 02:52:43,790 - root - INFO - step: 6985 loss: 2.5169 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2194 +[titan] 2025-10-05 02:52:43,790 - root - INFO - lr: 4.6998e-05 gnorm: 1.33 [ 4:18:35<20:22:12] +[titan] 2025-10-05 02:52:54,642 - root - INFO - step: 6990 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2300 +[titan] 2025-10-05 02:52:54,642 - root - INFO - lr: 4.6994e-05 gnorm: 1.18 [ 4:18:45<20:22:00] +[titan] 2025-10-05 02:53:05,477 - root - INFO - step: 6995 loss: 2.6970 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.3196 global_avg_mtp_loss: 2.3774 +[titan] 2025-10-05 02:53:05,477 - root - INFO - lr: 4.6989e-05 gnorm: 1.28 [ 4:18:56<20:21:48] +[titan] 2025-10-05 02:53:14,141 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:53:16,323 - root - INFO - step: 7000 loss: 2.6331 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.3091 global_avg_mtp_loss: 2.3240 +[titan] 2025-10-05 02:53:16,323 - root - INFO - lr: 4.6985e-05 gnorm: 1.28 [ 4:19:07<20:21:35] +[titan] 2025-10-05 02:53:27,204 - root - INFO - step: 7005 loss: 2.5777 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2742 +[titan] 2025-10-05 02:53:27,204 - root - INFO - lr: 4.6980e-05 gnorm: 1.20 [ 4:19:18<20:21:23] +[titan] 2025-10-05 02:53:38,086 - root - INFO - step: 7010 loss: 2.5633 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2596 +[titan] 2025-10-05 02:53:38,086 - root - INFO - lr: 4.6976e-05 gnorm: 1.21 [ 4:19:29<20:21:11] +[titan] 2025-10-05 02:53:48,973 - root - INFO - step: 7015 loss: 2.5508 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2507 +[titan] 2025-10-05 02:53:48,973 - root - INFO - lr: 4.6971e-05 gnorm: 1.17 [ 4:19:40<20:20:59] +[titan] 2025-10-05 02:53:59,845 - root - INFO - step: 7020 loss: 2.6141 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.3084 global_avg_mtp_loss: 2.3057 +[titan] 2025-10-05 02:53:59,845 - root - INFO - lr: 4.6967e-05 gnorm: 1.23 [ 4:19:51<20:20:47] +[titan] 2025-10-05 02:54:10,698 - root - INFO - step: 7025 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2184 +[titan] 2025-10-05 02:54:10,698 - root - INFO - lr: 4.6962e-05 gnorm: 1.18 [ 4:20:01<20:20:34] +[titan] 2025-10-05 02:54:21,549 - root - INFO - step: 7030 loss: 2.5250 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2274 +[titan] 2025-10-05 02:54:21,550 - root - INFO - lr: 4.6958e-05 gnorm: 1.18 [ 4:20:12<20:20:22] +[titan] 2025-10-05 02:54:32,510 - root - INFO - step: 7035 loss: 2.4583 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2895 global_avg_mtp_loss: 2.1687 +[titan] 2025-10-05 02:54:32,510 - root - INFO - lr: 4.6954e-05 gnorm: 1.13 [ 4:20:23<20:20:10] +[titan] 2025-10-05 02:54:43,391 - root - INFO - step: 7040 loss: 2.5911 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2860 +[titan] 2025-10-05 02:54:43,391 - root - INFO - lr: 4.6949e-05 gnorm: 1.24 [ 4:20:34<20:19:58] +[titan] 2025-10-05 02:54:54,247 - root - INFO - step: 7045 loss: 2.5161 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2197 +[titan] 2025-10-05 02:54:54,247 - root - INFO - lr: 4.6945e-05 gnorm: 1.19 [ 4:20:45<20:19:46] +[titan] 2025-10-05 02:55:02,933 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:55:05,120 - root - INFO - step: 7050 loss: 2.5643 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2600 +[titan] 2025-10-05 02:55:05,120 - root - INFO - lr: 4.6940e-05 gnorm: 1.19 [ 4:20:56<20:19:34] +[titan] 2025-10-05 02:55:15,988 - root - INFO - step: 7055 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.3004 global_avg_mtp_loss: 2.2398 +[titan] 2025-10-05 02:55:15,988 - root - INFO - lr: 4.6936e-05 gnorm: 1.18 [ 4:21:07<20:19:21] +[titan] 2025-10-05 02:55:26,853 - root - INFO - step: 7060 loss: 2.6283 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.3103 global_avg_mtp_loss: 2.3181 +[titan] 2025-10-05 02:55:26,854 - root - INFO - lr: 4.6931e-05 gnorm: 1.21 [ 4:21:18<20:19:09] +[titan] 2025-10-05 02:55:37,782 - root - INFO - step: 7065 loss: 2.5429 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2433 +[titan] 2025-10-05 02:55:37,782 - root - INFO - lr: 4.6927e-05 gnorm: 1.17 [ 4:21:29<20:18:57] +[titan] 2025-10-05 02:55:48,649 - root - INFO - step: 7070 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3049 global_avg_mtp_loss: 2.2882 +[titan] 2025-10-05 02:55:48,649 - root - INFO - lr: 4.6922e-05 gnorm: 1.22 [ 4:21:39<20:18:45] +[titan] 2025-10-05 02:55:59,510 - root - INFO - step: 7075 loss: 2.5409 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2994 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 02:55:59,510 - root - INFO - lr: 4.6918e-05 gnorm: 1.20 [ 4:21:50<20:18:33] +[titan] 2025-10-05 02:56:10,352 - root - INFO - step: 7080 loss: 2.5976 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3063 global_avg_mtp_loss: 2.2913 +[titan] 2025-10-05 02:56:10,353 - root - INFO - lr: 4.6913e-05 gnorm: 1.19 [ 4:22:01<20:18:20] +[titan] 2025-10-05 02:56:21,217 - root - INFO - step: 7085 loss: 2.5675 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2647 +[titan] 2025-10-05 02:56:21,217 - root - INFO - lr: 4.6909e-05 gnorm: 1.26 [ 4:22:12<20:18:08] +[titan] 2025-10-05 02:56:32,130 - root - INFO - step: 7090 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 02:56:32,130 - root - INFO - lr: 4.6904e-05 gnorm: 1.19 [ 4:22:23<20:17:56] +[titan] 2025-10-05 02:56:43,006 - root - INFO - step: 7095 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 02:56:43,006 - root - INFO - lr: 4.6899e-05 gnorm: 1.16 [ 4:22:34<20:17:44] +[titan] 2025-10-05 02:56:51,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:56:53,930 - root - INFO - step: 7100 loss: 2.6150 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.3102 global_avg_mtp_loss: 2.3048 +[titan] 2025-10-05 02:56:53,930 - root - INFO - lr: 4.6895e-05 gnorm: 1.24 [ 4:22:45<20:17:32] +[titan] 2025-10-05 02:57:04,810 - root - INFO - step: 7105 loss: 2.5539 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2524 +[titan] 2025-10-05 02:57:04,810 - root - INFO - lr: 4.6890e-05 gnorm: 1.23 [ 4:22:56<20:17:20] +[titan] 2025-10-05 02:57:15,679 - root - INFO - step: 7110 loss: 2.6249 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.3096 global_avg_mtp_loss: 2.3152 +[titan] 2025-10-05 02:57:15,679 - root - INFO - lr: 4.6886e-05 gnorm: 1.23 [ 4:23:06<20:17:08] +[titan] 2025-10-05 02:57:26,560 - root - INFO - step: 7115 loss: 2.5402 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 02:57:26,560 - root - INFO - lr: 4.6881e-05 gnorm: 1.21 [ 4:23:17<20:16:56] +[titan] 2025-10-05 02:57:37,504 - root - INFO - step: 7120 loss: 2.5642 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 02:57:37,504 - root - INFO - lr: 4.6877e-05 gnorm: 1.25 [ 4:23:28<20:16:44] +[titan] 2025-10-05 02:57:48,407 - root - INFO - step: 7125 loss: 2.5252 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2269 +[titan] 2025-10-05 02:57:48,407 - root - INFO - lr: 4.6872e-05 gnorm: 1.23 [ 4:23:39<20:16:32] +[titan] 2025-10-05 02:57:59,324 - root - INFO - step: 7130 loss: 2.5966 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2910 +[titan] 2025-10-05 02:57:59,324 - root - INFO - lr: 4.6868e-05 gnorm: 1.23 [ 4:23:50<20:16:20] +[titan] 2025-10-05 02:58:10,198 - root - INFO - step: 7135 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.3082 global_avg_mtp_loss: 2.3080 +[titan] 2025-10-05 02:58:10,198 - root - INFO - lr: 4.6863e-05 gnorm: 1.30 [ 4:24:01<20:16:08] +[titan] 2025-10-05 02:58:21,069 - root - INFO - step: 7140 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 02:58:21,069 - root - INFO - lr: 4.6859e-05 gnorm: 1.21 [ 4:24:12<20:15:56] +[titan] 2025-10-05 02:58:31,936 - root - INFO - step: 7145 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2622 +[titan] 2025-10-05 02:58:31,936 - root - INFO - lr: 4.6854e-05 gnorm: 1.20 [ 4:24:23<20:15:43] +[titan] 2025-10-05 02:58:40,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 02:58:42,870 - root - INFO - step: 7150 loss: 2.5513 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2508 +[titan] 2025-10-05 02:58:42,871 - root - INFO - lr: 4.6850e-05 gnorm: 1.21 [ 4:24:34<20:15:32] +[titan] 2025-10-05 02:58:53,743 - root - INFO - step: 7155 loss: 2.5589 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2563 +[titan] 2025-10-05 02:58:53,743 - root - INFO - lr: 4.6845e-05 gnorm: 1.16 [ 4:24:44<20:15:19] +[titan] 2025-10-05 02:59:04,618 - root - INFO - step: 7160 loss: 2.5754 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2723 +[titan] 2025-10-05 02:59:04,618 - root - INFO - lr: 4.6840e-05 gnorm: 1.21 [ 4:24:55<20:15:07] +[titan] 2025-10-05 02:59:15,628 - root - INFO - step: 7165 loss: 2.5541 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.91 mfu: 41.75% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2529 +[titan] 2025-10-05 02:59:15,629 - root - INFO - lr: 4.6836e-05 gnorm: 1.17 [ 4:25:06<20:14:56] +[titan] 2025-10-05 02:59:22,345 - root - INFO - Dumping profiler traces at step 7168 +[titan] 2025-10-05 02:59:22,383 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 02:59:26,736 - root - INFO - step: 7170 loss: 2.6199 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.30 mfu: 41.38% global_avg_ntp_loss: 0.3101 global_avg_mtp_loss: 2.3097 +[titan] 2025-10-05 02:59:26,736 - root - INFO - lr: 4.6831e-05 gnorm: 1.20 [ 4:25:17<20:14:45] +[titan] 2025-10-05 02:59:37,672 - root - INFO - step: 7175 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1801 +[titan] 2025-10-05 02:59:37,672 - root - INFO - lr: 4.6827e-05 gnorm: 1.15 [ 4:25:28<20:14:33] +[titan] 2025-10-05 02:59:48,576 - root - INFO - step: 7180 loss: 2.6188 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.3087 global_avg_mtp_loss: 2.3101 +[titan] 2025-10-05 02:59:48,576 - root - INFO - lr: 4.6822e-05 gnorm: 1.22 [ 4:25:39<20:14:21] +[titan] 2025-10-05 02:59:59,449 - root - INFO - step: 7185 loss: 2.5330 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2333 +[titan] 2025-10-05 02:59:59,450 - root - INFO - lr: 4.6818e-05 gnorm: 1.21 [ 4:25:50<20:14:09] +[titan] 2025-10-05 03:00:10,322 - root - INFO - step: 7190 loss: 2.6028 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2970 +[titan] 2025-10-05 03:00:10,322 - root - INFO - lr: 4.6813e-05 gnorm: 1.20 [ 4:26:01<20:13:57] +[titan] 2025-10-05 03:00:21,233 - root - INFO - step: 7195 loss: 2.6073 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2996 +[titan] 2025-10-05 03:00:21,233 - root - INFO - lr: 4.6808e-05 gnorm: 1.23 [ 4:26:12<20:13:45] +[titan] 2025-10-05 03:00:29,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:00:32,108 - root - INFO - step: 7200 loss: 2.5130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2961 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:00:32,108 - root - INFO - lr: 4.6804e-05 gnorm: 1.32 [ 4:26:23<20:13:33] +[titan] 2025-10-05 03:00:43,038 - root - INFO - step: 7205 loss: 2.5836 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.3051 global_avg_mtp_loss: 2.2785 +[titan] 2025-10-05 03:00:43,038 - root - INFO - lr: 4.6799e-05 gnorm: 1.29 [ 4:26:34<20:13:21] +[titan] 2025-10-05 03:00:53,933 - root - INFO - step: 7210 loss: 2.5257 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:00:53,933 - root - INFO - lr: 4.6795e-05 gnorm: 1.20 [ 4:26:45<20:13:09] +[titan] 2025-10-05 03:01:04,827 - root - INFO - step: 7215 loss: 2.5854 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.3039 global_avg_mtp_loss: 2.2815 +[titan] 2025-10-05 03:01:04,827 - root - INFO - lr: 4.6790e-05 gnorm: 1.21 [ 4:26:56<20:12:57] +[titan] 2025-10-05 03:01:15,711 - root - INFO - step: 7220 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2982 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:01:15,712 - root - INFO - lr: 4.6786e-05 gnorm: 1.19 [ 4:27:06<20:12:44] +[titan] 2025-10-05 03:01:26,615 - root - INFO - step: 7225 loss: 2.4967 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2040 +[titan] 2025-10-05 03:01:26,615 - root - INFO - lr: 4.6781e-05 gnorm: 1.24 [ 4:27:17<20:12:32] +[titan] 2025-10-05 03:01:37,539 - root - INFO - step: 7230 loss: 2.6118 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3074 global_avg_mtp_loss: 2.3044 +[titan] 2025-10-05 03:01:37,539 - root - INFO - lr: 4.6776e-05 gnorm: 1.20 [ 4:27:28<20:12:21] +[titan] 2025-10-05 03:01:48,431 - root - INFO - step: 7235 loss: 2.5240 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2979 global_avg_mtp_loss: 2.2261 +[titan] 2025-10-05 03:01:48,431 - root - INFO - lr: 4.6772e-05 gnorm: 1.18 [ 4:27:39<20:12:09] +[titan] 2025-10-05 03:01:59,313 - root - INFO - step: 7240 loss: 2.5262 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2284 +[titan] 2025-10-05 03:01:59,313 - root - INFO - lr: 4.6767e-05 gnorm: 1.17 [ 4:27:50<20:11:56] +[titan] 2025-10-05 03:02:10,185 - root - INFO - step: 7245 loss: 2.5139 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2193 +[titan] 2025-10-05 03:02:10,185 - root - INFO - lr: 4.6762e-05 gnorm: 1.26 [ 4:28:01<20:11:44] +[titan] 2025-10-05 03:02:18,881 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:02:21,067 - root - INFO - step: 7250 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:02:21,067 - root - INFO - lr: 4.6758e-05 gnorm: 1.26 [ 4:28:12<20:11:32] +[titan] 2025-10-05 03:02:31,931 - root - INFO - step: 7255 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2976 global_avg_mtp_loss: 2.2327 +[titan] 2025-10-05 03:02:31,931 - root - INFO - lr: 4.6753e-05 gnorm: 1.22 [ 4:28:23<20:11:20] +[titan] 2025-10-05 03:02:42,869 - root - INFO - step: 7260 loss: 2.5329 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2991 global_avg_mtp_loss: 2.2339 +[titan] 2025-10-05 03:02:42,869 - root - INFO - lr: 4.6749e-05 gnorm: 1.22 [ 4:28:34<20:11:08] +[titan] 2025-10-05 03:02:53,734 - root - INFO - step: 7265 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2929 global_avg_mtp_loss: 2.2033 +[titan] 2025-10-05 03:02:53,734 - root - INFO - lr: 4.6744e-05 gnorm: 1.30 [ 4:28:44<20:10:56] +[titan] 2025-10-05 03:03:04,623 - root - INFO - step: 7270 loss: 2.6162 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.3079 global_avg_mtp_loss: 2.3083 +[titan] 2025-10-05 03:03:04,623 - root - INFO - lr: 4.6739e-05 gnorm: 1.34 [ 4:28:55<20:10:44] +[titan] 2025-10-05 03:03:15,505 - root - INFO - step: 7275 loss: 2.6152 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3072 global_avg_mtp_loss: 2.3079 +[titan] 2025-10-05 03:03:15,505 - root - INFO - lr: 4.6735e-05 gnorm: 1.22 [ 4:29:06<20:10:32] +[titan] 2025-10-05 03:03:26,372 - root - INFO - step: 7280 loss: 2.5931 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2876 +[titan] 2025-10-05 03:03:26,372 - root - INFO - lr: 4.6730e-05 gnorm: 1.19 [ 4:29:17<20:10:20] +[titan] 2025-10-05 03:03:37,274 - root - INFO - step: 7285 loss: 2.6024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.3076 global_avg_mtp_loss: 2.2948 +[titan] 2025-10-05 03:03:37,274 - root - INFO - lr: 4.6725e-05 gnorm: 1.27 [ 4:29:28<20:10:08] +[titan] 2025-10-05 03:03:48,171 - root - INFO - step: 7290 loss: 2.5142 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2958 global_avg_mtp_loss: 2.2183 +[titan] 2025-10-05 03:03:48,171 - root - INFO - lr: 4.6721e-05 gnorm: 1.18 [ 4:29:39<20:09:56] +[titan] 2025-10-05 03:03:59,037 - root - INFO - step: 7295 loss: 2.5672 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.3019 global_avg_mtp_loss: 2.2653 +[titan] 2025-10-05 03:03:59,037 - root - INFO - lr: 4.6716e-05 gnorm: 1.21 [ 4:29:50<20:09:44] +[titan] 2025-10-05 03:04:07,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:04:09,893 - root - INFO - step: 7300 loss: 2.5351 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2362 +[titan] 2025-10-05 03:04:09,893 - root - INFO - lr: 4.6712e-05 gnorm: 1.19 [ 4:30:01<20:09:32] +[titan] 2025-10-05 03:04:20,770 - root - INFO - step: 7305 loss: 2.5190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2969 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:04:20,770 - root - INFO - lr: 4.6707e-05 gnorm: 1.20 [ 4:30:11<20:09:20] +[titan] 2025-10-05 03:04:31,636 - root - INFO - step: 7310 loss: 2.5542 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2522 +[titan] 2025-10-05 03:04:31,637 - root - INFO - lr: 4.6702e-05 gnorm: 1.16 [ 4:30:22<20:09:07] +[titan] 2025-10-05 03:04:42,538 - root - INFO - step: 7315 loss: 2.5823 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.3047 global_avg_mtp_loss: 2.2776 +[titan] 2025-10-05 03:04:42,538 - root - INFO - lr: 4.6698e-05 gnorm: 1.19 [ 4:30:33<20:08:55] +[titan] 2025-10-05 03:04:53,396 - root - INFO - step: 7320 loss: 2.6080 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.3092 global_avg_mtp_loss: 2.2988 +[titan] 2025-10-05 03:04:53,396 - root - INFO - lr: 4.6693e-05 gnorm: 1.20 [ 4:30:44<20:08:43] +[titan] 2025-10-05 03:05:04,291 - root - INFO - step: 7325 loss: 2.6131 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.3080 global_avg_mtp_loss: 2.3051 +[titan] 2025-10-05 03:05:04,291 - root - INFO - lr: 4.6688e-05 gnorm: 1.20 [ 4:30:55<20:08:31] +[titan] 2025-10-05 03:05:15,170 - root - INFO - step: 7330 loss: 2.5664 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2639 +[titan] 2025-10-05 03:05:15,170 - root - INFO - lr: 4.6684e-05 gnorm: 1.19 [ 4:31:06<20:08:19] +[titan] 2025-10-05 03:05:26,057 - root - INFO - step: 7335 loss: 2.5718 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3029 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:05:26,057 - root - INFO - lr: 4.6679e-05 gnorm: 1.19 [ 4:31:17<20:08:07] +[titan] 2025-10-05 03:05:36,944 - root - INFO - step: 7340 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3024 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:05:36,945 - root - INFO - lr: 4.6674e-05 gnorm: 1.21 [ 4:31:28<20:07:55] +[titan] 2025-10-05 03:05:47,861 - root - INFO - step: 7345 loss: 2.4951 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2008 +[titan] 2025-10-05 03:05:47,862 - root - INFO - lr: 4.6670e-05 gnorm: 1.18 [ 4:31:39<20:07:43] +[titan] 2025-10-05 03:05:56,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:05:58,742 - root - INFO - step: 7350 loss: 2.6375 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.3105 global_avg_mtp_loss: 2.3271 +[titan] 2025-10-05 03:05:58,742 - root - INFO - lr: 4.6665e-05 gnorm: 1.20 [ 4:31:49<20:07:31] +[titan] 2025-10-05 03:06:09,631 - root - INFO - step: 7355 loss: 2.5204 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2241 +[titan] 2025-10-05 03:06:09,631 - root - INFO - lr: 4.6660e-05 gnorm: 1.13 [ 4:32:00<20:07:19] +[titan] 2025-10-05 03:06:20,514 - root - INFO - step: 7360 loss: 2.5761 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.3026 global_avg_mtp_loss: 2.2736 +[titan] 2025-10-05 03:06:20,514 - root - INFO - lr: 4.6656e-05 gnorm: 1.20 [ 4:32:11<20:07:07] +[titan] 2025-10-05 03:06:31,396 - root - INFO - step: 7365 loss: 2.5712 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2678 +[titan] 2025-10-05 03:06:31,397 - root - INFO - lr: 4.6651e-05 gnorm: 1.18 [ 4:32:22<20:06:55] +[titan] 2025-10-05 03:06:42,281 - root - INFO - step: 7370 loss: 2.5449 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2992 global_avg_mtp_loss: 2.2457 +[titan] 2025-10-05 03:06:42,282 - root - INFO - lr: 4.6646e-05 gnorm: 1.20 [ 4:32:33<20:06:43] +[titan] 2025-10-05 03:06:53,156 - root - INFO - step: 7375 loss: 2.5031 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2076 +[titan] 2025-10-05 03:06:53,156 - root - INFO - lr: 4.6642e-05 gnorm: 1.20 [ 4:32:44<20:06:31] +[titan] 2025-10-05 03:07:04,009 - root - INFO - step: 7380 loss: 2.4884 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:07:04,009 - root - INFO - lr: 4.6637e-05 gnorm: 1.18 [ 4:32:55<20:06:19] +[titan] 2025-10-05 03:07:14,887 - root - INFO - step: 7385 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2624 +[titan] 2025-10-05 03:07:14,887 - root - INFO - lr: 4.6632e-05 gnorm: 1.29 [ 4:33:06<20:06:07] +[titan] 2025-10-05 03:07:25,781 - root - INFO - step: 7390 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:07:25,781 - root - INFO - lr: 4.6627e-05 gnorm: 1.19 [ 4:33:16<20:05:55] +[titan] 2025-10-05 03:07:36,668 - root - INFO - step: 7395 loss: 2.5215 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2253 +[titan] 2025-10-05 03:07:36,668 - root - INFO - lr: 4.6623e-05 gnorm: 1.18 [ 4:33:27<20:05:43] +[titan] 2025-10-05 03:07:45,411 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:07:47,595 - root - INFO - step: 7400 loss: 2.5552 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.3037 global_avg_mtp_loss: 2.2515 +[titan] 2025-10-05 03:07:47,595 - root - INFO - lr: 4.6618e-05 gnorm: 1.25 [ 4:33:38<20:05:31] +[titan] 2025-10-05 03:07:58,479 - root - INFO - step: 7405 loss: 2.5722 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2664 +[titan] 2025-10-05 03:07:58,479 - root - INFO - lr: 4.6613e-05 gnorm: 1.23 [ 4:33:49<20:05:19] +[titan] 2025-10-05 03:08:09,352 - root - INFO - step: 7410 loss: 2.6173 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.3145 global_avg_mtp_loss: 2.3028 +[titan] 2025-10-05 03:08:09,352 - root - INFO - lr: 4.6609e-05 gnorm: 1.26 [ 4:34:00<20:05:07] +[titan] 2025-10-05 03:08:20,245 - root - INFO - step: 7415 loss: 2.6371 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.3115 global_avg_mtp_loss: 2.3256 +[titan] 2025-10-05 03:08:20,245 - root - INFO - lr: 4.6604e-05 gnorm: 1.18 [ 4:34:11<20:04:55] +[titan] 2025-10-05 03:08:31,148 - root - INFO - step: 7420 loss: 2.5121 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:08:31,148 - root - INFO - lr: 4.6599e-05 gnorm: 1.18 [ 4:34:22<20:04:43] +[titan] 2025-10-05 03:08:42,047 - root - INFO - step: 7425 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2028 +[titan] 2025-10-05 03:08:42,047 - root - INFO - lr: 4.6594e-05 gnorm: 1.17 [ 4:34:33<20:04:31] +[titan] 2025-10-05 03:08:52,923 - root - INFO - step: 7430 loss: 2.5993 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.3058 global_avg_mtp_loss: 2.2935 +[titan] 2025-10-05 03:08:52,923 - root - INFO - lr: 4.6590e-05 gnorm: 1.19 [ 4:34:44<20:04:19] +[titan] 2025-10-05 03:09:03,806 - root - INFO - step: 7435 loss: 2.5648 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2641 +[titan] 2025-10-05 03:09:03,806 - root - INFO - lr: 4.6585e-05 gnorm: 1.23 [ 4:34:55<20:04:07] +[titan] 2025-10-05 03:09:14,682 - root - INFO - step: 7440 loss: 2.4458 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:09:14,682 - root - INFO - lr: 4.6580e-05 gnorm: 1.21 [ 4:35:05<20:03:55] +[titan] 2025-10-05 03:09:25,563 - root - INFO - step: 7445 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2988 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:09:25,563 - root - INFO - lr: 4.6576e-05 gnorm: 1.20 [ 4:35:16<20:03:43] +[titan] 2025-10-05 03:09:34,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:09:36,483 - root - INFO - step: 7450 loss: 2.4992 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2041 +[titan] 2025-10-05 03:09:36,483 - root - INFO - lr: 4.6571e-05 gnorm: 1.13 [ 4:35:27<20:03:31] +[titan] 2025-10-05 03:09:47,415 - root - INFO - step: 7455 loss: 2.5685 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.3056 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:09:47,415 - root - INFO - lr: 4.6566e-05 gnorm: 1.21 [ 4:35:38<20:03:19] +[titan] 2025-10-05 03:09:58,322 - root - INFO - step: 7460 loss: 2.5530 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2521 +[titan] 2025-10-05 03:09:58,322 - root - INFO - lr: 4.6561e-05 gnorm: 1.19 [ 4:35:49<20:03:07] +[titan] 2025-10-05 03:10:09,217 - root - INFO - step: 7465 loss: 2.5984 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.3054 global_avg_mtp_loss: 2.2930 +[titan] 2025-10-05 03:10:09,217 - root - INFO - lr: 4.6557e-05 gnorm: 1.33 [ 4:36:00<20:02:55] +[titan] 2025-10-05 03:10:20,126 - root - INFO - step: 7470 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:10:20,127 - root - INFO - lr: 4.6552e-05 gnorm: 1.25 [ 4:36:11<20:02:44] +[titan] 2025-10-05 03:10:31,009 - root - INFO - step: 7475 loss: 2.6414 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.3181 global_avg_mtp_loss: 2.3233 +[titan] 2025-10-05 03:10:31,009 - root - INFO - lr: 4.6547e-05 gnorm: 1.21 [ 4:36:22<20:02:32] +[titan] 2025-10-05 03:10:41,908 - root - INFO - step: 7480 loss: 2.6221 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.3088 global_avg_mtp_loss: 2.3133 +[titan] 2025-10-05 03:10:41,908 - root - INFO - lr: 4.6542e-05 gnorm: 1.24 [ 4:36:33<20:02:20] +[titan] 2025-10-05 03:10:52,859 - root - INFO - step: 7485 loss: 2.6267 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.13 mfu: 41.97% global_avg_ntp_loss: 0.3117 global_avg_mtp_loss: 2.3150 +[titan] 2025-10-05 03:10:52,859 - root - INFO - lr: 4.6538e-05 gnorm: 1.23 [ 4:36:44<20:02:08] +[titan] 2025-10-05 03:11:03,748 - root - INFO - step: 7490 loss: 2.4962 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:11:03,748 - root - INFO - lr: 4.6533e-05 gnorm: 1.16 [ 4:36:54<20:01:56] +[titan] 2025-10-05 03:11:14,653 - root - INFO - step: 7495 loss: 2.5041 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2941 global_avg_mtp_loss: 2.2100 +[titan] 2025-10-05 03:11:14,654 - root - INFO - lr: 4.6528e-05 gnorm: 1.17 [ 4:37:05<20:01:44] +[titan] 2025-10-05 03:11:23,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:11:25,557 - root - INFO - step: 7500 loss: 2.5279 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 03:11:25,558 - root - INFO - lr: 4.6523e-05 gnorm: 1.17 [ 4:37:16<20:01:32] +[titan] 2025-10-05 03:11:36,447 - root - INFO - step: 7505 loss: 2.5670 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.3028 global_avg_mtp_loss: 2.2642 +[titan] 2025-10-05 03:11:36,447 - root - INFO - lr: 4.6519e-05 gnorm: 1.26 [ 4:37:27<20:01:20] +[titan] 2025-10-05 03:11:47,366 - root - INFO - step: 7510 loss: 2.5107 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 03:11:47,366 - root - INFO - lr: 4.6514e-05 gnorm: 1.18 [ 4:37:38<20:01:08] +[titan] 2025-10-05 03:11:58,284 - root - INFO - step: 7515 loss: 2.6471 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.3128 global_avg_mtp_loss: 2.3343 +[titan] 2025-10-05 03:11:58,284 - root - INFO - lr: 4.6509e-05 gnorm: 1.26 [ 4:37:49<20:00:56] +[titan] 2025-10-05 03:12:09,176 - root - INFO - step: 7520 loss: 2.5022 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:12:09,177 - root - INFO - lr: 4.6504e-05 gnorm: 1.24 [ 4:38:00<20:00:45] +[titan] 2025-10-05 03:12:20,065 - root - INFO - step: 7525 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2289 +[titan] 2025-10-05 03:12:20,065 - root - INFO - lr: 4.6499e-05 gnorm: 1.20 [ 4:38:11<20:00:33] +[titan] 2025-10-05 03:12:30,937 - root - INFO - step: 7530 loss: 2.5858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.3055 global_avg_mtp_loss: 2.2803 +[titan] 2025-10-05 03:12:30,937 - root - INFO - lr: 4.6495e-05 gnorm: 1.25 [ 4:38:22<20:00:21] +[titan] 2025-10-05 03:12:41,813 - root - INFO - step: 7535 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:12:41,814 - root - INFO - lr: 4.6490e-05 gnorm: 1.20 [ 4:38:33<20:00:09] +[titan] 2025-10-05 03:12:52,684 - root - INFO - step: 7540 loss: 2.5356 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2385 +[titan] 2025-10-05 03:12:52,684 - root - INFO - lr: 4.6485e-05 gnorm: 1.23 [ 4:38:43<19:59:56] +[titan] 2025-10-05 03:13:03,580 - root - INFO - step: 7545 loss: 2.5425 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.3005 global_avg_mtp_loss: 2.2420 +[titan] 2025-10-05 03:13:03,580 - root - INFO - lr: 4.6480e-05 gnorm: 1.22 [ 4:38:54<19:59:45] +[titan] 2025-10-05 03:13:12,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:13:14,425 - root - INFO - step: 7550 loss: 2.5098 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:14,425 - root - INFO - lr: 4.6476e-05 gnorm: 1.21 [ 4:39:05<19:59:32] +[titan] 2025-10-05 03:13:25,285 - root - INFO - step: 7555 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2953 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:13:25,285 - root - INFO - lr: 4.6471e-05 gnorm: 1.32 [ 4:39:16<19:59:20] +[titan] 2025-10-05 03:13:36,128 - root - INFO - step: 7560 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1394 +[titan] 2025-10-05 03:13:36,128 - root - INFO - lr: 4.6466e-05 gnorm: 1.23 [ 4:39:27<19:59:08] +[titan] 2025-10-05 03:13:47,004 - root - INFO - step: 7565 loss: 2.5275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2297 +[titan] 2025-10-05 03:13:47,005 - root - INFO - lr: 4.6461e-05 gnorm: 1.21 [ 4:39:38<19:58:56] +[titan] 2025-10-05 03:13:57,856 - root - INFO - step: 7570 loss: 2.4658 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:13:57,856 - root - INFO - lr: 4.6456e-05 gnorm: 1.15 [ 4:39:49<19:58:44] +[titan] 2025-10-05 03:14:08,701 - root - INFO - step: 7575 loss: 2.5486 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2477 +[titan] 2025-10-05 03:14:08,701 - root - INFO - lr: 4.6452e-05 gnorm: 1.16 [ 4:39:59<19:58:32] +[titan] 2025-10-05 03:14:19,585 - root - INFO - step: 7580 loss: 2.4950 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 03:14:19,586 - root - INFO - lr: 4.6447e-05 gnorm: 1.20 [ 4:40:10<19:58:20] +[titan] 2025-10-05 03:14:30,487 - root - INFO - step: 7585 loss: 2.5519 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.3008 global_avg_mtp_loss: 2.2511 +[titan] 2025-10-05 03:14:30,487 - root - INFO - lr: 4.6442e-05 gnorm: 1.18 [ 4:40:21<19:58:08] +[titan] 2025-10-05 03:14:41,356 - root - INFO - step: 7590 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2224 +[titan] 2025-10-05 03:14:41,356 - root - INFO - lr: 4.6437e-05 gnorm: 1.18 [ 4:40:32<19:57:56] +[titan] 2025-10-05 03:14:52,221 - root - INFO - step: 7595 loss: 2.5646 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2619 +[titan] 2025-10-05 03:14:52,221 - root - INFO - lr: 4.6432e-05 gnorm: 1.16 [ 4:40:43<19:57:44] +[titan] 2025-10-05 03:15:00,910 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:15:03,087 - root - INFO - step: 7600 loss: 2.5198 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:15:03,087 - root - INFO - lr: 4.6427e-05 gnorm: 1.22 [ 4:40:54<19:57:32] +[titan] 2025-10-05 03:15:13,944 - root - INFO - step: 7605 loss: 2.5332 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2354 +[titan] 2025-10-05 03:15:13,944 - root - INFO - lr: 4.6423e-05 gnorm: 1.19 [ 4:41:05<19:57:20] +[titan] 2025-10-05 03:15:24,824 - root - INFO - step: 7610 loss: 2.4376 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1514 +[titan] 2025-10-05 03:15:24,824 - root - INFO - lr: 4.6418e-05 gnorm: 1.19 [ 4:41:16<19:57:08] +[titan] 2025-10-05 03:15:35,666 - root - INFO - step: 7615 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2192 +[titan] 2025-10-05 03:15:35,666 - root - INFO - lr: 4.6413e-05 gnorm: 1.17 [ 4:41:26<19:56:56] +[titan] 2025-10-05 03:15:46,512 - root - INFO - step: 7620 loss: 2.5412 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2413 +[titan] 2025-10-05 03:15:46,512 - root - INFO - lr: 4.6408e-05 gnorm: 1.18 [ 4:41:37<19:56:44] +[titan] 2025-10-05 03:15:57,356 - root - INFO - step: 7625 loss: 2.6165 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.3094 global_avg_mtp_loss: 2.3070 +[titan] 2025-10-05 03:15:57,356 - root - INFO - lr: 4.6403e-05 gnorm: 1.26 [ 4:41:48<19:56:32] +[titan] 2025-10-05 03:16:08,215 - root - INFO - step: 7630 loss: 2.5181 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2959 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:16:08,215 - root - INFO - lr: 4.6398e-05 gnorm: 1.21 [ 4:41:59<19:56:19] +[titan] 2025-10-05 03:16:19,088 - root - INFO - step: 7635 loss: 2.4574 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1676 +[titan] 2025-10-05 03:16:19,088 - root - INFO - lr: 4.6394e-05 gnorm: 1.19 [ 4:42:10<19:56:07] +[titan] 2025-10-05 03:16:29,923 - root - INFO - step: 7640 loss: 2.4611 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1719 +[titan] 2025-10-05 03:16:29,923 - root - INFO - lr: 4.6389e-05 gnorm: 1.17 [ 4:42:21<19:55:55] +[titan] 2025-10-05 03:16:40,805 - root - INFO - step: 7645 loss: 2.5518 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2500 +[titan] 2025-10-05 03:16:40,805 - root - INFO - lr: 4.6384e-05 gnorm: 1.19 [ 4:42:31<19:55:43] +[titan] 2025-10-05 03:16:49,484 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:16:51,676 - root - INFO - step: 7650 loss: 2.5593 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.3014 global_avg_mtp_loss: 2.2579 +[titan] 2025-10-05 03:16:51,676 - root - INFO - lr: 4.6379e-05 gnorm: 1.21 [ 4:42:42<19:55:31] +[titan] 2025-10-05 03:17:02,521 - root - INFO - step: 7655 loss: 2.5404 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2412 +[titan] 2025-10-05 03:17:02,521 - root - INFO - lr: 4.6374e-05 gnorm: 1.24 [ 4:42:53<19:55:19] +[titan] 2025-10-05 03:17:13,367 - root - INFO - step: 7660 loss: 2.5051 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2097 +[titan] 2025-10-05 03:17:13,367 - root - INFO - lr: 4.6369e-05 gnorm: 1.23 [ 4:43:04<19:55:07] +[titan] 2025-10-05 03:17:24,235 - root - INFO - step: 7665 loss: 2.6218 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.3086 global_avg_mtp_loss: 2.3131 +[titan] 2025-10-05 03:17:24,235 - root - INFO - lr: 4.6364e-05 gnorm: 1.19 [ 4:43:15<19:54:55] +[titan] 2025-10-05 03:17:35,066 - root - INFO - step: 7670 loss: 2.5900 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.3053 global_avg_mtp_loss: 2.2847 +[titan] 2025-10-05 03:17:35,066 - root - INFO - lr: 4.6360e-05 gnorm: 1.23 [ 4:43:26<19:54:43] +[titan] 2025-10-05 03:17:45,893 - root - INFO - step: 7675 loss: 2.5953 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.3122 global_avg_mtp_loss: 2.2831 +[titan] 2025-10-05 03:17:45,893 - root - INFO - lr: 4.6355e-05 gnorm: 1.19 [ 4:43:37<19:54:31] +[titan] 2025-10-05 03:17:56,861 - root - INFO - step: 7680 loss: 2.5148 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2963 global_avg_mtp_loss: 2.2185 +[titan] 2025-10-05 03:17:56,861 - root - INFO - lr: 4.6350e-05 gnorm: 1.23 [ 4:43:48<19:54:19] +[titan] 2025-10-05 03:17:57,042 - root - INFO - Dumping profiler traces at step 7680 +[titan] 2025-10-05 03:17:57,080 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:18:07,907 - root - INFO - step: 7685 loss: 2.4389 memory: 118.84GiB(85.28%) tps: 29,665 tflops: 411.56 mfu: 41.61% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 03:18:07,907 - root - INFO - lr: 4.6345e-05 gnorm: 1.17 [ 4:43:59<19:54:08] +[titan] 2025-10-05 03:18:18,756 - root - INFO - step: 7690 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:18:18,757 - root - INFO - lr: 4.6340e-05 gnorm: 1.18 [ 4:44:09<19:53:56] +[titan] 2025-10-05 03:18:29,609 - root - INFO - step: 7695 loss: 2.5730 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:18:29,609 - root - INFO - lr: 4.6335e-05 gnorm: 1.36 [ 4:44:20<19:53:44] +[titan] 2025-10-05 03:18:38,281 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:18:40,472 - root - INFO - step: 7700 loss: 2.5666 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.3022 global_avg_mtp_loss: 2.2643 +[titan] 2025-10-05 03:18:40,473 - root - INFO - lr: 4.6330e-05 gnorm: 1.19 [ 4:44:31<19:53:32] +[titan] 2025-10-05 03:18:51,364 - root - INFO - step: 7705 loss: 2.5443 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2441 +[titan] 2025-10-05 03:18:51,364 - root - INFO - lr: 4.6325e-05 gnorm: 1.19 [ 4:44:42<19:53:20] +[titan] 2025-10-05 03:19:02,224 - root - INFO - step: 7710 loss: 2.5657 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2637 +[titan] 2025-10-05 03:19:02,224 - root - INFO - lr: 4.6321e-05 gnorm: 1.20 [ 4:44:53<19:53:08] +[titan] 2025-10-05 03:19:13,098 - root - INFO - step: 7715 loss: 2.5489 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.2486 +[titan] 2025-10-05 03:19:13,098 - root - INFO - lr: 4.6316e-05 gnorm: 1.20 [ 4:45:04<19:52:56] +[titan] 2025-10-05 03:19:23,973 - root - INFO - step: 7720 loss: 2.4402 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1529 +[titan] 2025-10-05 03:19:23,974 - root - INFO - lr: 4.6311e-05 gnorm: 1.21 [ 4:45:15<19:52:44] +[titan] 2025-10-05 03:19:34,816 - root - INFO - step: 7725 loss: 2.5551 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.3015 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:19:34,816 - root - INFO - lr: 4.6306e-05 gnorm: 1.19 [ 4:45:26<19:52:32] +[titan] 2025-10-05 03:19:45,679 - root - INFO - step: 7730 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.3038 global_avg_mtp_loss: 2.2814 +[titan] 2025-10-05 03:19:45,679 - root - INFO - lr: 4.6301e-05 gnorm: 1.17 [ 4:45:36<19:52:20] +[titan] 2025-10-05 03:19:56,502 - root - INFO - step: 7735 loss: 2.5206 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2970 global_avg_mtp_loss: 2.2236 +[titan] 2025-10-05 03:19:56,502 - root - INFO - lr: 4.6296e-05 gnorm: 1.24 [ 4:45:47<19:52:08] +[titan] 2025-10-05 03:20:07,337 - root - INFO - step: 7740 loss: 2.5557 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2545 +[titan] 2025-10-05 03:20:07,337 - root - INFO - lr: 4.6291e-05 gnorm: 1.19 [ 4:45:58<19:51:56] +[titan] 2025-10-05 03:20:18,166 - root - INFO - step: 7745 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.79 mfu: 42.45% global_avg_ntp_loss: 0.2938 global_avg_mtp_loss: 2.1964 +[titan] 2025-10-05 03:20:18,166 - root - INFO - lr: 4.6286e-05 gnorm: 1.20 [ 4:46:09<19:51:43] +[titan] 2025-10-05 03:20:26,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:20:29,014 - root - INFO - step: 7750 loss: 2.4800 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.1876 +[titan] 2025-10-05 03:20:29,015 - root - INFO - lr: 4.6281e-05 gnorm: 1.17 [ 4:46:20<19:51:31] +[titan] 2025-10-05 03:20:39,856 - root - INFO - step: 7755 loss: 2.4850 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1929 +[titan] 2025-10-05 03:20:39,857 - root - INFO - lr: 4.6276e-05 gnorm: 1.24 [ 4:46:31<19:51:19] +[titan] 2025-10-05 03:20:50,697 - root - INFO - step: 7760 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2144 +[titan] 2025-10-05 03:20:50,697 - root - INFO - lr: 4.6271e-05 gnorm: 1.13 [ 4:46:41<19:51:07] +[titan] 2025-10-05 03:21:01,573 - root - INFO - step: 7765 loss: 2.5168 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2200 +[titan] 2025-10-05 03:21:01,573 - root - INFO - lr: 4.6267e-05 gnorm: 1.17 [ 4:46:52<19:50:55] +[titan] 2025-10-05 03:21:12,426 - root - INFO - step: 7770 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2689 +[titan] 2025-10-05 03:21:12,426 - root - INFO - lr: 4.6262e-05 gnorm: 1.24 [ 4:47:03<19:50:43] +[titan] 2025-10-05 03:21:23,262 - root - INFO - step: 7775 loss: 2.5468 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2462 +[titan] 2025-10-05 03:21:23,262 - root - INFO - lr: 4.6257e-05 gnorm: 1.22 [ 4:47:14<19:50:31] +[titan] 2025-10-05 03:21:34,121 - root - INFO - step: 7780 loss: 2.5186 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2965 global_avg_mtp_loss: 2.2221 +[titan] 2025-10-05 03:21:34,121 - root - INFO - lr: 4.6252e-05 gnorm: 1.22 [ 4:47:25<19:50:19] +[titan] 2025-10-05 03:21:44,959 - root - INFO - step: 7785 loss: 2.5555 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.3023 global_avg_mtp_loss: 2.2532 +[titan] 2025-10-05 03:21:44,960 - root - INFO - lr: 4.6247e-05 gnorm: 1.19 [ 4:47:36<19:50:07] +[titan] 2025-10-05 03:21:55,841 - root - INFO - step: 7790 loss: 2.5595 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2574 +[titan] 2025-10-05 03:21:55,841 - root - INFO - lr: 4.6242e-05 gnorm: 1.21 [ 4:47:47<19:49:55] +[titan] 2025-10-05 03:22:06,686 - root - INFO - step: 7795 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2983 global_avg_mtp_loss: 2.2407 +[titan] 2025-10-05 03:22:06,686 - root - INFO - lr: 4.6237e-05 gnorm: 1.20 [ 4:47:57<19:49:43] +[titan] 2025-10-05 03:22:15,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:22:17,501 - root - INFO - step: 7800 loss: 2.4671 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1775 +[titan] 2025-10-05 03:22:17,501 - root - INFO - lr: 4.6232e-05 gnorm: 1.31 [ 4:48:08<19:49:31] +[titan] 2025-10-05 03:22:28,367 - root - INFO - step: 7805 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2366 +[titan] 2025-10-05 03:22:28,367 - root - INFO - lr: 4.6227e-05 gnorm: 1.21 [ 4:48:19<19:49:19] +[titan] 2025-10-05 03:22:39,182 - root - INFO - step: 7810 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:22:39,182 - root - INFO - lr: 4.6222e-05 gnorm: 1.23 [ 4:48:30<19:49:07] +[titan] 2025-10-05 03:22:50,001 - root - INFO - step: 7815 loss: 2.5037 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2093 +[titan] 2025-10-05 03:22:50,001 - root - INFO - lr: 4.6217e-05 gnorm: 1.17 [ 4:48:41<19:48:54] +[titan] 2025-10-05 03:23:00,861 - root - INFO - step: 7820 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 03:23:00,861 - root - INFO - lr: 4.6212e-05 gnorm: 1.15 [ 4:48:52<19:48:42] +[titan] 2025-10-05 03:23:11,665 - root - INFO - step: 7825 loss: 2.5549 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2537 +[titan] 2025-10-05 03:23:11,665 - root - INFO - lr: 4.6207e-05 gnorm: 1.18 [ 4:49:02<19:48:30] +[titan] 2025-10-05 03:23:22,463 - root - INFO - step: 7830 loss: 2.5877 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.3065 global_avg_mtp_loss: 2.2813 +[titan] 2025-10-05 03:23:22,464 - root - INFO - lr: 4.6202e-05 gnorm: 1.22 [ 4:49:13<19:48:18] +[titan] 2025-10-05 03:23:33,276 - root - INFO - step: 7835 loss: 2.5278 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2288 +[titan] 2025-10-05 03:23:33,276 - root - INFO - lr: 4.6197e-05 gnorm: 1.28 [ 4:49:24<19:48:06] +[titan] 2025-10-05 03:23:44,101 - root - INFO - step: 7840 loss: 2.5759 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2732 +[titan] 2025-10-05 03:23:44,101 - root - INFO - lr: 4.6192e-05 gnorm: 1.19 [ 4:49:35<19:47:54] +[titan] 2025-10-05 03:23:54,974 - root - INFO - step: 7845 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 03:23:54,975 - root - INFO - lr: 4.6187e-05 gnorm: 1.19 [ 4:49:46<19:47:42] +[titan] 2025-10-05 03:24:03,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:24:05,782 - root - INFO - step: 7850 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2873 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 03:24:05,783 - root - INFO - lr: 4.6182e-05 gnorm: 1.17 [ 4:49:56<19:47:29] +[titan] 2025-10-05 03:24:16,593 - root - INFO - step: 7855 loss: 2.4523 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1636 +[titan] 2025-10-05 03:24:16,593 - root - INFO - lr: 4.6177e-05 gnorm: 1.14 [ 4:50:07<19:47:17] +[titan] 2025-10-05 03:24:27,423 - root - INFO - step: 7860 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2509 +[titan] 2025-10-05 03:24:27,424 - root - INFO - lr: 4.6172e-05 gnorm: 1.24 [ 4:50:18<19:47:05] +[titan] 2025-10-05 03:24:38,249 - root - INFO - step: 7865 loss: 2.5375 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2394 +[titan] 2025-10-05 03:24:38,249 - root - INFO - lr: 4.6167e-05 gnorm: 1.22 [ 4:50:29<19:46:53] +[titan] 2025-10-05 03:24:49,117 - root - INFO - step: 7870 loss: 2.4208 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1365 +[titan] 2025-10-05 03:24:49,117 - root - INFO - lr: 4.6163e-05 gnorm: 1.17 [ 4:50:40<19:46:41] +[titan] 2025-10-05 03:25:00,043 - root - INFO - step: 7875 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:25:00,043 - root - INFO - lr: 4.6158e-05 gnorm: 1.19 [ 4:50:51<19:46:29] +[titan] 2025-10-05 03:25:10,889 - root - INFO - step: 7880 loss: 2.5464 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2997 global_avg_mtp_loss: 2.2467 +[titan] 2025-10-05 03:25:10,889 - root - INFO - lr: 4.6153e-05 gnorm: 1.19 [ 4:51:02<19:46:17] +[titan] 2025-10-05 03:25:21,745 - root - INFO - step: 7885 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2923 global_avg_mtp_loss: 2.2078 +[titan] 2025-10-05 03:25:21,745 - root - INFO - lr: 4.6148e-05 gnorm: 1.18 [ 4:51:12<19:46:05] +[titan] 2025-10-05 03:25:32,610 - root - INFO - step: 7890 loss: 2.5321 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2346 +[titan] 2025-10-05 03:25:32,610 - root - INFO - lr: 4.6143e-05 gnorm: 1.20 [ 4:51:23<19:45:53] +[titan] 2025-10-05 03:25:43,443 - root - INFO - step: 7895 loss: 2.5115 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2166 +[titan] 2025-10-05 03:25:43,443 - root - INFO - lr: 4.6138e-05 gnorm: 1.14 [ 4:51:34<19:45:41] +[titan] 2025-10-05 03:25:52,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:25:54,248 - root - INFO - step: 7900 loss: 2.5320 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.75 mfu: 42.54% global_avg_ntp_loss: 0.2996 global_avg_mtp_loss: 2.2324 +[titan] 2025-10-05 03:25:54,248 - root - INFO - lr: 4.6133e-05 gnorm: 1.18 [ 4:51:45<19:45:29] +[titan] 2025-10-05 03:26:05,135 - root - INFO - step: 7905 loss: 2.5694 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.3020 global_avg_mtp_loss: 2.2673 +[titan] 2025-10-05 03:26:05,135 - root - INFO - lr: 4.6128e-05 gnorm: 1.17 [ 4:51:56<19:45:17] +[titan] 2025-10-05 03:26:15,976 - root - INFO - step: 7910 loss: 2.5373 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2989 global_avg_mtp_loss: 2.2384 +[titan] 2025-10-05 03:26:15,976 - root - INFO - lr: 4.6123e-05 gnorm: 1.24 [ 4:52:07<19:45:05] +[titan] 2025-10-05 03:26:26,803 - root - INFO - step: 7915 loss: 2.5234 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2971 global_avg_mtp_loss: 2.2262 +[titan] 2025-10-05 03:26:26,803 - root - INFO - lr: 4.6118e-05 gnorm: 1.20 [ 4:52:17<19:44:53] +[titan] 2025-10-05 03:26:37,605 - root - INFO - step: 7920 loss: 2.5089 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2944 global_avg_mtp_loss: 2.2145 +[titan] 2025-10-05 03:26:37,605 - root - INFO - lr: 4.6113e-05 gnorm: 1.21 [ 4:52:28<19:44:41] +[titan] 2025-10-05 03:26:48,452 - root - INFO - step: 7925 loss: 2.4185 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 03:26:48,452 - root - INFO - lr: 4.6107e-05 gnorm: 1.15 [ 4:52:39<19:44:29] +[titan] 2025-10-05 03:26:59,330 - root - INFO - step: 7930 loss: 2.5390 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2405 +[titan] 2025-10-05 03:26:59,330 - root - INFO - lr: 4.6102e-05 gnorm: 1.26 [ 4:52:50<19:44:17] +[titan] 2025-10-05 03:27:10,155 - root - INFO - step: 7935 loss: 2.4620 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2890 global_avg_mtp_loss: 2.1731 +[titan] 2025-10-05 03:27:10,155 - root - INFO - lr: 4.6097e-05 gnorm: 1.18 [ 4:53:01<19:44:05] +[titan] 2025-10-05 03:27:20,964 - root - INFO - step: 7940 loss: 2.4808 memory: 118.84GiB(85.28%) tps: 30,317 tflops: 420.61 mfu: 42.53% global_avg_ntp_loss: 0.2909 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:27:20,964 - root - INFO - lr: 4.6092e-05 gnorm: 1.15 [ 4:53:12<19:43:53] +[titan] 2025-10-05 03:27:31,803 - root - INFO - step: 7945 loss: 2.5084 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2142 +[titan] 2025-10-05 03:27:31,803 - root - INFO - lr: 4.6087e-05 gnorm: 1.16 [ 4:53:22<19:43:41] +[titan] 2025-10-05 03:27:40,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:27:42,583 - root - INFO - step: 7950 loss: 2.5326 memory: 118.84GiB(85.28%) tps: 30,397 tflops: 421.71 mfu: 42.64% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2359 +[titan] 2025-10-05 03:27:42,583 - root - INFO - lr: 4.6082e-05 gnorm: 1.21 [ 4:53:33<19:43:28] +[titan] 2025-10-05 03:27:53,381 - root - INFO - step: 7955 loss: 2.5597 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2591 +[titan] 2025-10-05 03:27:53,382 - root - INFO - lr: 4.6077e-05 gnorm: 1.18 [ 4:53:44<19:43:16] +[titan] 2025-10-05 03:28:04,227 - root - INFO - step: 7960 loss: 2.4969 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2940 global_avg_mtp_loss: 2.2030 +[titan] 2025-10-05 03:28:04,227 - root - INFO - lr: 4.6072e-05 gnorm: 1.15 [ 4:53:55<19:43:04] +[titan] 2025-10-05 03:28:15,055 - root - INFO - step: 7965 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1237 +[titan] 2025-10-05 03:28:15,055 - root - INFO - lr: 4.6067e-05 gnorm: 1.13 [ 4:54:06<19:42:52] +[titan] 2025-10-05 03:28:25,883 - root - INFO - step: 7970 loss: 2.4965 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.2034 +[titan] 2025-10-05 03:28:25,883 - root - INFO - lr: 4.6062e-05 gnorm: 1.17 [ 4:54:17<19:42:40] +[titan] 2025-10-05 03:28:36,715 - root - INFO - step: 7975 loss: 2.5491 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2491 +[titan] 2025-10-05 03:28:36,715 - root - INFO - lr: 4.6057e-05 gnorm: 1.19 [ 4:54:27<19:42:28] +[titan] 2025-10-05 03:28:47,543 - root - INFO - step: 7980 loss: 2.4817 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:28:47,543 - root - INFO - lr: 4.6052e-05 gnorm: 1.16 [ 4:54:38<19:42:16] +[titan] 2025-10-05 03:28:58,364 - root - INFO - step: 7985 loss: 2.5422 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2429 +[titan] 2025-10-05 03:28:58,364 - root - INFO - lr: 4.6047e-05 gnorm: 1.18 [ 4:54:49<19:42:04] +[titan] 2025-10-05 03:29:09,176 - root - INFO - step: 7990 loss: 2.5558 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.3001 global_avg_mtp_loss: 2.2558 +[titan] 2025-10-05 03:29:09,176 - root - INFO - lr: 4.6042e-05 gnorm: 1.18 [ 4:55:00<19:41:52] +[titan] 2025-10-05 03:29:19,983 - root - INFO - step: 7995 loss: 2.5400 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.2416 +[titan] 2025-10-05 03:29:19,983 - root - INFO - lr: 4.6037e-05 gnorm: 1.16 [ 4:55:11<19:41:39] +[titan] 2025-10-05 03:29:28,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:29:30,811 - root - INFO - step: 8000 loss: 2.5669 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.3034 global_avg_mtp_loss: 2.2635 +[titan] 2025-10-05 03:29:30,811 - root - INFO - lr: 4.6032e-05 gnorm: 1.20 [ 4:55:21<19:41:27] +[titan] 2025-10-05 03:29:41,667 - root - INFO - step: 8005 loss: 2.5724 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.3025 global_avg_mtp_loss: 2.2699 +[titan] 2025-10-05 03:29:41,667 - root - INFO - lr: 4.6027e-05 gnorm: 1.25 [ 4:55:32<19:41:15] +[titan] 2025-10-05 03:29:52,487 - root - INFO - step: 8010 loss: 2.5006 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2058 +[titan] 2025-10-05 03:29:52,487 - root - INFO - lr: 4.6022e-05 gnorm: 1.26 [ 4:55:43<19:41:03] +[titan] 2025-10-05 03:30:03,339 - root - INFO - step: 8015 loss: 2.4914 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1995 +[titan] 2025-10-05 03:30:03,339 - root - INFO - lr: 4.6017e-05 gnorm: 1.18 [ 4:55:54<19:40:51] +[titan] 2025-10-05 03:30:14,162 - root - INFO - step: 8020 loss: 2.4809 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1895 +[titan] 2025-10-05 03:30:14,162 - root - INFO - lr: 4.6012e-05 gnorm: 1.20 [ 4:56:05<19:40:39] +[titan] 2025-10-05 03:30:25,003 - root - INFO - step: 8025 loss: 2.4991 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2042 +[titan] 2025-10-05 03:30:25,003 - root - INFO - lr: 4.6007e-05 gnorm: 1.17 [ 4:56:16<19:40:27] +[titan] 2025-10-05 03:30:35,840 - root - INFO - step: 8030 loss: 2.4390 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1512 +[titan] 2025-10-05 03:30:35,841 - root - INFO - lr: 4.6001e-05 gnorm: 1.18 [ 4:56:26<19:40:15] +[titan] 2025-10-05 03:30:46,678 - root - INFO - step: 8035 loss: 2.5127 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:30:46,678 - root - INFO - lr: 4.5996e-05 gnorm: 1.21 [ 4:56:37<19:40:03] +[titan] 2025-10-05 03:30:57,494 - root - INFO - step: 8040 loss: 2.4745 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.31 mfu: 42.50% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1848 +[titan] 2025-10-05 03:30:57,495 - root - INFO - lr: 4.5991e-05 gnorm: 1.17 [ 4:56:48<19:39:51] +[titan] 2025-10-05 03:31:08,360 - root - INFO - step: 8045 loss: 2.5034 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2082 +[titan] 2025-10-05 03:31:08,360 - root - INFO - lr: 4.5986e-05 gnorm: 1.19 [ 4:56:59<19:39:39] +[titan] 2025-10-05 03:31:17,027 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:31:19,196 - root - INFO - step: 8050 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1741 +[titan] 2025-10-05 03:31:19,196 - root - INFO - lr: 4.5981e-05 gnorm: 1.19 [ 4:57:10<19:39:27] +[titan] 2025-10-05 03:31:30,047 - root - INFO - step: 8055 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2206 +[titan] 2025-10-05 03:31:30,047 - root - INFO - lr: 4.5976e-05 gnorm: 1.16 [ 4:57:21<19:39:15] +[titan] 2025-10-05 03:31:40,901 - root - INFO - step: 8060 loss: 2.4474 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 03:31:40,901 - root - INFO - lr: 4.5971e-05 gnorm: 1.14 [ 4:57:32<19:39:03] +[titan] 2025-10-05 03:31:51,725 - root - INFO - step: 8065 loss: 2.5411 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.3006 global_avg_mtp_loss: 2.2406 +[titan] 2025-10-05 03:31:51,725 - root - INFO - lr: 4.5966e-05 gnorm: 1.17 [ 4:57:42<19:38:51] +[titan] 2025-10-05 03:32:02,621 - root - INFO - step: 8070 loss: 2.4864 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1942 +[titan] 2025-10-05 03:32:02,621 - root - INFO - lr: 4.5961e-05 gnorm: 1.20 [ 4:57:53<19:38:39] +[titan] 2025-10-05 03:32:13,441 - root - INFO - step: 8075 loss: 2.5540 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.3012 global_avg_mtp_loss: 2.2528 +[titan] 2025-10-05 03:32:13,441 - root - INFO - lr: 4.5956e-05 gnorm: 1.17 [ 4:58:04<19:38:27] +[titan] 2025-10-05 03:32:24,287 - root - INFO - step: 8080 loss: 2.4398 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1536 +[titan] 2025-10-05 03:32:24,288 - root - INFO - lr: 4.5951e-05 gnorm: 1.14 [ 4:58:15<19:38:15] +[titan] 2025-10-05 03:32:35,118 - root - INFO - step: 8085 loss: 2.5333 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2977 global_avg_mtp_loss: 2.2356 +[titan] 2025-10-05 03:32:35,118 - root - INFO - lr: 4.5945e-05 gnorm: 1.21 [ 4:58:26<19:38:03] +[titan] 2025-10-05 03:32:45,958 - root - INFO - step: 8090 loss: 2.5225 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2257 +[titan] 2025-10-05 03:32:45,959 - root - INFO - lr: 4.5940e-05 gnorm: 1.12 [ 4:58:37<19:37:51] +[titan] 2025-10-05 03:32:56,823 - root - INFO - step: 8095 loss: 2.5506 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.3000 global_avg_mtp_loss: 2.2506 +[titan] 2025-10-05 03:32:56,824 - root - INFO - lr: 4.5935e-05 gnorm: 1.21 [ 4:58:47<19:37:39] +[titan] 2025-10-05 03:33:05,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:33:07,719 - root - INFO - step: 8100 loss: 2.5049 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 03:33:07,719 - root - INFO - lr: 4.5930e-05 gnorm: 1.20 [ 4:58:58<19:37:28] +[titan] 2025-10-05 03:33:18,615 - root - INFO - step: 8105 loss: 2.5199 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:33:18,615 - root - INFO - lr: 4.5925e-05 gnorm: 1.12 [ 4:59:09<19:37:16] +[titan] 2025-10-05 03:33:29,481 - root - INFO - step: 8110 loss: 2.4795 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 03:33:29,481 - root - INFO - lr: 4.5920e-05 gnorm: 1.16 [ 4:59:20<19:37:04] +[titan] 2025-10-05 03:33:40,332 - root - INFO - step: 8115 loss: 2.4748 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1833 +[titan] 2025-10-05 03:33:40,332 - root - INFO - lr: 4.5915e-05 gnorm: 1.18 [ 4:59:31<19:36:52] +[titan] 2025-10-05 03:33:51,164 - root - INFO - step: 8120 loss: 2.5292 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2981 global_avg_mtp_loss: 2.2311 +[titan] 2025-10-05 03:33:51,164 - root - INFO - lr: 4.5910e-05 gnorm: 1.19 [ 4:59:42<19:36:40] +[titan] 2025-10-05 03:34:02,020 - root - INFO - step: 8125 loss: 2.4881 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:34:02,020 - root - INFO - lr: 4.5904e-05 gnorm: 1.21 [ 4:59:53<19:36:28] +[titan] 2025-10-05 03:34:12,891 - root - INFO - step: 8130 loss: 2.5727 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.3035 global_avg_mtp_loss: 2.2692 +[titan] 2025-10-05 03:34:12,891 - root - INFO - lr: 4.5899e-05 gnorm: 1.22 [ 5:00:04<19:36:16] +[titan] 2025-10-05 03:34:23,761 - root - INFO - step: 8135 loss: 2.4550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1663 +[titan] 2025-10-05 03:34:23,761 - root - INFO - lr: 4.5894e-05 gnorm: 1.21 [ 5:00:14<19:36:04] +[titan] 2025-10-05 03:34:34,624 - root - INFO - step: 8140 loss: 2.4669 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:34:34,624 - root - INFO - lr: 4.5889e-05 gnorm: 1.16 [ 5:00:25<19:35:52] +[titan] 2025-10-05 03:34:45,506 - root - INFO - step: 8145 loss: 2.5656 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.3027 global_avg_mtp_loss: 2.2629 +[titan] 2025-10-05 03:34:45,506 - root - INFO - lr: 4.5884e-05 gnorm: 1.18 [ 5:00:36<19:35:41] +[titan] 2025-10-05 03:34:54,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:34:56,340 - root - INFO - step: 8150 loss: 2.4846 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1926 +[titan] 2025-10-05 03:34:56,340 - root - INFO - lr: 4.5879e-05 gnorm: 1.16 [ 5:00:47<19:35:29] +[titan] 2025-10-05 03:35:07,237 - root - INFO - step: 8155 loss: 2.5131 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2967 global_avg_mtp_loss: 2.2164 +[titan] 2025-10-05 03:35:07,237 - root - INFO - lr: 4.5874e-05 gnorm: 1.17 [ 5:00:58<19:35:17] +[titan] 2025-10-05 03:35:18,098 - root - INFO - step: 8160 loss: 2.6082 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.3078 global_avg_mtp_loss: 2.3005 +[titan] 2025-10-05 03:35:18,098 - root - INFO - lr: 4.5868e-05 gnorm: 1.18 [ 5:01:09<19:35:05] +[titan] 2025-10-05 03:35:28,978 - root - INFO - step: 8165 loss: 2.5372 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2397 +[titan] 2025-10-05 03:35:28,978 - root - INFO - lr: 4.5863e-05 gnorm: 1.17 [ 5:01:20<19:34:53] +[titan] 2025-10-05 03:35:39,844 - root - INFO - step: 8170 loss: 2.4152 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 03:35:39,844 - root - INFO - lr: 4.5858e-05 gnorm: 1.18 [ 5:01:30<19:34:41] +[titan] 2025-10-05 03:35:50,781 - root - INFO - step: 8175 loss: 2.5578 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.3013 global_avg_mtp_loss: 2.2565 +[titan] 2025-10-05 03:35:50,781 - root - INFO - lr: 4.5853e-05 gnorm: 1.27 [ 5:01:41<19:34:30] +[titan] 2025-10-05 03:36:01,663 - root - INFO - step: 8180 loss: 2.4462 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1586 +[titan] 2025-10-05 03:36:01,663 - root - INFO - lr: 4.5848e-05 gnorm: 1.13 [ 5:01:52<19:34:18] +[titan] 2025-10-05 03:36:12,582 - root - INFO - step: 8185 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:36:12,582 - root - INFO - lr: 4.5843e-05 gnorm: 1.20 [ 5:02:03<19:34:06] +[titan] 2025-10-05 03:36:23,548 - root - INFO - step: 8190 loss: 2.4035 memory: 118.84GiB(85.28%) tps: 29,882 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1217 +[titan] 2025-10-05 03:36:23,549 - root - INFO - lr: 4.5837e-05 gnorm: 1.16 [ 5:02:14<19:33:55] +[titan] 2025-10-05 03:36:28,083 - root - INFO - Dumping profiler traces at step 8192 +[titan] 2025-10-05 03:36:28,118 - root - INFO - Finished dumping profiler traces in 0.03 seconds +[titan] 2025-10-05 03:36:34,646 - root - INFO - step: 8195 loss: 2.4867 memory: 118.84GiB(85.28%) tps: 29,528 tflops: 409.66 mfu: 41.42% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 03:36:34,646 - root - INFO - lr: 4.5832e-05 gnorm: 1.16 [ 5:02:25<19:33:44] +[titan] 2025-10-05 03:36:43,348 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:36:45,533 - root - INFO - step: 8200 loss: 2.5852 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.3043 global_avg_mtp_loss: 2.2809 +[titan] 2025-10-05 03:36:45,534 - root - INFO - lr: 4.5827e-05 gnorm: 1.15 [ 5:02:36<19:33:32] +[titan] 2025-10-05 03:36:56,421 - root - INFO - step: 8205 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1517 +[titan] 2025-10-05 03:36:56,421 - root - INFO - lr: 4.5822e-05 gnorm: 1.15 [ 5:02:47<19:33:20] +[titan] 2025-10-05 03:37:07,262 - root - INFO - step: 8210 loss: 2.4422 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2866 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:37:07,262 - root - INFO - lr: 4.5817e-05 gnorm: 1.16 [ 5:02:58<19:33:08] +[titan] 2025-10-05 03:37:18,124 - root - INFO - step: 8215 loss: 2.5901 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.3062 global_avg_mtp_loss: 2.2840 +[titan] 2025-10-05 03:37:18,124 - root - INFO - lr: 4.5812e-05 gnorm: 1.23 [ 5:03:09<19:32:56] +[titan] 2025-10-05 03:37:29,001 - root - INFO - step: 8220 loss: 2.5575 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2582 +[titan] 2025-10-05 03:37:29,001 - root - INFO - lr: 4.5806e-05 gnorm: 1.20 [ 5:03:20<19:32:45] +[titan] 2025-10-05 03:37:39,844 - root - INFO - step: 8225 loss: 2.4659 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 03:37:39,845 - root - INFO - lr: 4.5801e-05 gnorm: 1.23 [ 5:03:30<19:32:33] +[titan] 2025-10-05 03:37:50,743 - root - INFO - step: 8230 loss: 2.5410 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2432 +[titan] 2025-10-05 03:37:50,743 - root - INFO - lr: 4.5796e-05 gnorm: 1.19 [ 5:03:41<19:32:21] +[titan] 2025-10-05 03:38:01,585 - root - INFO - step: 8235 loss: 2.5291 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2326 +[titan] 2025-10-05 03:38:01,585 - root - INFO - lr: 4.5791e-05 gnorm: 1.15 [ 5:03:52<19:32:09] +[titan] 2025-10-05 03:38:12,474 - root - INFO - step: 8240 loss: 2.5137 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2186 +[titan] 2025-10-05 03:38:12,474 - root - INFO - lr: 4.5786e-05 gnorm: 1.17 [ 5:04:03<19:31:57] +[titan] 2025-10-05 03:38:23,335 - root - INFO - step: 8245 loss: 2.5350 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2999 global_avg_mtp_loss: 2.2351 +[titan] 2025-10-05 03:38:23,335 - root - INFO - lr: 4.5780e-05 gnorm: 1.17 [ 5:04:14<19:31:45] +[titan] 2025-10-05 03:38:32,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:38:34,223 - root - INFO - step: 8250 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2131 +[titan] 2025-10-05 03:38:34,223 - root - INFO - lr: 4.5775e-05 gnorm: 1.18 [ 5:04:25<19:31:33] +[titan] 2025-10-05 03:38:45,088 - root - INFO - step: 8255 loss: 2.5152 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2954 global_avg_mtp_loss: 2.2198 +[titan] 2025-10-05 03:38:45,088 - root - INFO - lr: 4.5770e-05 gnorm: 1.20 [ 5:04:36<19:31:22] +[titan] 2025-10-05 03:38:55,962 - root - INFO - step: 8260 loss: 2.4381 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1524 +[titan] 2025-10-05 03:38:55,962 - root - INFO - lr: 4.5765e-05 gnorm: 1.19 [ 5:04:47<19:31:10] +[titan] 2025-10-05 03:39:06,818 - root - INFO - step: 8265 loss: 2.6017 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.3068 global_avg_mtp_loss: 2.2949 +[titan] 2025-10-05 03:39:06,818 - root - INFO - lr: 4.5760e-05 gnorm: 1.23 [ 5:04:57<19:30:58] +[titan] 2025-10-05 03:39:17,707 - root - INFO - step: 8270 loss: 2.4450 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2878 global_avg_mtp_loss: 2.1572 +[titan] 2025-10-05 03:39:17,707 - root - INFO - lr: 4.5754e-05 gnorm: 1.18 [ 5:05:08<19:30:46] +[titan] 2025-10-05 03:39:28,574 - root - INFO - step: 8275 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1559 +[titan] 2025-10-05 03:39:28,574 - root - INFO - lr: 4.5749e-05 gnorm: 1.20 [ 5:05:19<19:30:34] +[titan] 2025-10-05 03:39:39,438 - root - INFO - step: 8280 loss: 2.4782 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2902 global_avg_mtp_loss: 2.1880 +[titan] 2025-10-05 03:39:39,438 - root - INFO - lr: 4.5744e-05 gnorm: 1.20 [ 5:05:30<19:30:22] +[titan] 2025-10-05 03:39:50,344 - root - INFO - step: 8285 loss: 2.4818 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2919 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 03:39:50,344 - root - INFO - lr: 4.5739e-05 gnorm: 1.16 [ 5:05:41<19:30:11] +[titan] 2025-10-05 03:40:01,252 - root - INFO - step: 8290 loss: 2.4954 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2009 +[titan] 2025-10-05 03:40:01,252 - root - INFO - lr: 4.5733e-05 gnorm: 1.16 [ 5:05:52<19:29:59] +[titan] 2025-10-05 03:40:12,143 - root - INFO - step: 8295 loss: 2.5302 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2317 +[titan] 2025-10-05 03:40:12,143 - root - INFO - lr: 4.5728e-05 gnorm: 1.18 [ 5:06:03<19:29:47] +[titan] 2025-10-05 03:40:20,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:40:23,034 - root - INFO - step: 8300 loss: 2.4874 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1954 +[titan] 2025-10-05 03:40:23,034 - root - INFO - lr: 4.5723e-05 gnorm: 1.19 [ 5:06:14<19:29:36] +[titan] 2025-10-05 03:40:33,937 - root - INFO - step: 8305 loss: 2.5831 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.3031 global_avg_mtp_loss: 2.2800 +[titan] 2025-10-05 03:40:33,938 - root - INFO - lr: 4.5718e-05 gnorm: 1.17 [ 5:06:25<19:29:24] +[titan] 2025-10-05 03:40:44,825 - root - INFO - step: 8310 loss: 2.5507 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2993 global_avg_mtp_loss: 2.2514 +[titan] 2025-10-05 03:40:44,825 - root - INFO - lr: 4.5713e-05 gnorm: 1.17 [ 5:06:35<19:29:12] +[titan] 2025-10-05 03:40:55,729 - root - INFO - step: 8315 loss: 2.5111 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2159 +[titan] 2025-10-05 03:40:55,729 - root - INFO - lr: 4.5707e-05 gnorm: 1.14 [ 5:06:46<19:29:00] +[titan] 2025-10-05 03:41:06,596 - root - INFO - step: 8320 loss: 2.5003 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2067 +[titan] 2025-10-05 03:41:06,596 - root - INFO - lr: 4.5702e-05 gnorm: 1.19 [ 5:06:57<19:28:49] +[titan] 2025-10-05 03:41:17,525 - root - INFO - step: 8325 loss: 2.4974 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 03:41:17,525 - root - INFO - lr: 4.5697e-05 gnorm: 1.26 [ 5:07:08<19:28:37] +[titan] 2025-10-05 03:41:28,416 - root - INFO - step: 8330 loss: 2.4791 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1878 +[titan] 2025-10-05 03:41:28,416 - root - INFO - lr: 4.5692e-05 gnorm: 1.19 [ 5:07:19<19:28:25] +[titan] 2025-10-05 03:41:39,305 - root - INFO - step: 8335 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2968 global_avg_mtp_loss: 2.2244 +[titan] 2025-10-05 03:41:39,305 - root - INFO - lr: 4.5686e-05 gnorm: 1.25 [ 5:07:30<19:28:13] +[titan] 2025-10-05 03:41:50,197 - root - INFO - step: 8340 loss: 2.4762 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:41:50,197 - root - INFO - lr: 4.5681e-05 gnorm: 1.22 [ 5:07:41<19:28:02] +[titan] 2025-10-05 03:42:01,087 - root - INFO - step: 8345 loss: 2.5081 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2136 +[titan] 2025-10-05 03:42:01,087 - root - INFO - lr: 4.5676e-05 gnorm: 1.33 [ 5:07:52<19:27:50] +[titan] 2025-10-05 03:42:09,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:42:11,958 - root - INFO - step: 8350 loss: 2.5178 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2950 global_avg_mtp_loss: 2.2229 +[titan] 2025-10-05 03:42:11,958 - root - INFO - lr: 4.5671e-05 gnorm: 1.20 [ 5:08:03<19:27:38] +[titan] 2025-10-05 03:42:22,859 - root - INFO - step: 8355 loss: 2.5012 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2948 global_avg_mtp_loss: 2.2065 +[titan] 2025-10-05 03:42:22,859 - root - INFO - lr: 4.5665e-05 gnorm: 1.16 [ 5:08:13<19:27:26] +[titan] 2025-10-05 03:42:33,723 - root - INFO - step: 8360 loss: 2.5033 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.2087 +[titan] 2025-10-05 03:42:33,724 - root - INFO - lr: 4.5660e-05 gnorm: 1.21 [ 5:08:24<19:27:15] +[titan] 2025-10-05 03:42:44,605 - root - INFO - step: 8365 loss: 2.4169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1341 +[titan] 2025-10-05 03:42:44,605 - root - INFO - lr: 4.5655e-05 gnorm: 1.27 [ 5:08:35<19:27:03] +[titan] 2025-10-05 03:42:55,502 - root - INFO - step: 8370 loss: 2.4654 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1749 +[titan] 2025-10-05 03:42:55,503 - root - INFO - lr: 4.5649e-05 gnorm: 1.13 [ 5:08:46<19:26:51] +[titan] 2025-10-05 03:43:06,377 - root - INFO - step: 8375 loss: 2.4547 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1658 +[titan] 2025-10-05 03:43:06,377 - root - INFO - lr: 4.5644e-05 gnorm: 1.15 [ 5:08:57<19:26:39] +[titan] 2025-10-05 03:43:17,279 - root - INFO - step: 8380 loss: 2.5065 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2118 +[titan] 2025-10-05 03:43:17,279 - root - INFO - lr: 4.5639e-05 gnorm: 1.18 [ 5:09:08<19:26:28] +[titan] 2025-10-05 03:43:28,170 - root - INFO - step: 8385 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.1973 +[titan] 2025-10-05 03:43:28,171 - root - INFO - lr: 4.5634e-05 gnorm: 1.19 [ 5:09:19<19:26:16] +[titan] 2025-10-05 03:43:39,058 - root - INFO - step: 8390 loss: 2.3818 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 03:43:39,058 - root - INFO - lr: 4.5628e-05 gnorm: 1.18 [ 5:09:30<19:26:04] +[titan] 2025-10-05 03:43:49,941 - root - INFO - step: 8395 loss: 2.4979 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2044 +[titan] 2025-10-05 03:43:49,941 - root - INFO - lr: 4.5623e-05 gnorm: 1.24 [ 5:09:41<19:25:52] +[titan] 2025-10-05 03:43:58,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:44:00,835 - root - INFO - step: 8400 loss: 2.4609 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1710 +[titan] 2025-10-05 03:44:00,835 - root - INFO - lr: 4.5618e-05 gnorm: 1.21 [ 5:09:51<19:25:41] +[titan] 2025-10-05 03:44:11,708 - root - INFO - step: 8405 loss: 2.4714 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1804 +[titan] 2025-10-05 03:44:11,708 - root - INFO - lr: 4.5612e-05 gnorm: 1.18 [ 5:10:02<19:25:29] +[titan] 2025-10-05 03:44:22,628 - root - INFO - step: 8410 loss: 2.4894 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2931 global_avg_mtp_loss: 2.1963 +[titan] 2025-10-05 03:44:22,628 - root - INFO - lr: 4.5607e-05 gnorm: 1.17 [ 5:10:13<19:25:17] +[titan] 2025-10-05 03:44:33,498 - root - INFO - step: 8415 loss: 2.4601 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1709 +[titan] 2025-10-05 03:44:33,498 - root - INFO - lr: 4.5602e-05 gnorm: 1.15 [ 5:10:24<19:25:05] +[titan] 2025-10-05 03:44:44,372 - root - INFO - step: 8420 loss: 2.4695 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1796 +[titan] 2025-10-05 03:44:44,372 - root - INFO - lr: 4.5597e-05 gnorm: 1.21 [ 5:10:35<19:24:54] +[titan] 2025-10-05 03:44:55,241 - root - INFO - step: 8425 loss: 2.6043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.3153 global_avg_mtp_loss: 2.2890 +[titan] 2025-10-05 03:44:55,241 - root - INFO - lr: 4.5591e-05 gnorm: 1.22 [ 5:10:46<19:24:42] +[titan] 2025-10-05 03:45:06,108 - root - INFO - step: 8430 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1610 +[titan] 2025-10-05 03:45:06,108 - root - INFO - lr: 4.5586e-05 gnorm: 1.22 [ 5:10:57<19:24:30] +[titan] 2025-10-05 03:45:17,033 - root - INFO - step: 8435 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1622 +[titan] 2025-10-05 03:45:17,033 - root - INFO - lr: 4.5581e-05 gnorm: 1.17 [ 5:11:08<19:24:18] +[titan] 2025-10-05 03:45:27,906 - root - INFO - step: 8440 loss: 2.4384 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1521 +[titan] 2025-10-05 03:45:27,906 - root - INFO - lr: 4.5575e-05 gnorm: 1.18 [ 5:11:19<19:24:07] +[titan] 2025-10-05 03:45:38,796 - root - INFO - step: 8445 loss: 2.5212 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2962 global_avg_mtp_loss: 2.2251 +[titan] 2025-10-05 03:45:38,796 - root - INFO - lr: 4.5570e-05 gnorm: 1.18 [ 5:11:29<19:23:55] +[titan] 2025-10-05 03:45:47,504 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:45:49,701 - root - INFO - step: 8450 loss: 2.4651 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1765 +[titan] 2025-10-05 03:45:49,701 - root - INFO - lr: 4.5565e-05 gnorm: 1.15 [ 5:11:40<19:23:43] +[titan] 2025-10-05 03:46:00,576 - root - INFO - step: 8455 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1407 +[titan] 2025-10-05 03:46:00,576 - root - INFO - lr: 4.5559e-05 gnorm: 1.16 [ 5:11:51<19:23:31] +[titan] 2025-10-05 03:46:11,464 - root - INFO - step: 8460 loss: 2.4581 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1695 +[titan] 2025-10-05 03:46:11,465 - root - INFO - lr: 4.5554e-05 gnorm: 1.18 [ 5:12:02<19:23:20] +[titan] 2025-10-05 03:46:22,406 - root - INFO - step: 8465 loss: 2.4681 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2912 global_avg_mtp_loss: 2.1769 +[titan] 2025-10-05 03:46:22,406 - root - INFO - lr: 4.5549e-05 gnorm: 1.26 [ 5:12:13<19:23:08] +[titan] 2025-10-05 03:46:33,303 - root - INFO - step: 8470 loss: 2.4812 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:46:33,303 - root - INFO - lr: 4.5543e-05 gnorm: 1.18 [ 5:12:24<19:22:57] +[titan] 2025-10-05 03:46:44,215 - root - INFO - step: 8475 loss: 2.4456 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 03:46:44,215 - root - INFO - lr: 4.5538e-05 gnorm: 1.19 [ 5:12:35<19:22:45] +[titan] 2025-10-05 03:46:55,102 - root - INFO - step: 8480 loss: 2.5134 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2966 global_avg_mtp_loss: 2.2167 +[titan] 2025-10-05 03:46:55,103 - root - INFO - lr: 4.5533e-05 gnorm: 1.22 [ 5:12:46<19:22:33] +[titan] 2025-10-05 03:47:05,998 - root - INFO - step: 8485 loss: 2.4337 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 03:47:05,998 - root - INFO - lr: 4.5527e-05 gnorm: 1.16 [ 5:12:57<19:22:21] +[titan] 2025-10-05 03:47:16,904 - root - INFO - step: 8490 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1235 +[titan] 2025-10-05 03:47:16,904 - root - INFO - lr: 4.5522e-05 gnorm: 1.17 [ 5:13:08<19:22:10] +[titan] 2025-10-05 03:47:27,782 - root - INFO - step: 8495 loss: 2.4698 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2907 global_avg_mtp_loss: 2.1791 +[titan] 2025-10-05 03:47:27,783 - root - INFO - lr: 4.5517e-05 gnorm: 1.17 [ 5:13:18<19:21:58] +[titan] 2025-10-05 03:47:36,458 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:47:38,638 - root - INFO - step: 8500 loss: 2.3537 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0773 +[titan] 2025-10-05 03:47:38,638 - root - INFO - lr: 4.5511e-05 gnorm: 1.20 [ 5:13:29<19:21:46] +[titan] 2025-10-05 03:47:49,538 - root - INFO - step: 8505 loss: 2.5368 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2973 global_avg_mtp_loss: 2.2395 +[titan] 2025-10-05 03:47:49,538 - root - INFO - lr: 4.5506e-05 gnorm: 1.16 [ 5:13:40<19:21:35] +[titan] 2025-10-05 03:48:00,412 - root - INFO - step: 8510 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.1961 +[titan] 2025-10-05 03:48:00,412 - root - INFO - lr: 4.5501e-05 gnorm: 1.19 [ 5:13:51<19:21:23] +[titan] 2025-10-05 03:48:11,277 - root - INFO - step: 8515 loss: 2.4264 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:48:11,277 - root - INFO - lr: 4.5495e-05 gnorm: 1.17 [ 5:14:02<19:21:11] +[titan] 2025-10-05 03:48:22,187 - root - INFO - step: 8520 loss: 2.4968 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2924 global_avg_mtp_loss: 2.2043 +[titan] 2025-10-05 03:48:22,188 - root - INFO - lr: 4.5490e-05 gnorm: 1.24 [ 5:14:13<19:20:59] +[titan] 2025-10-05 03:48:33,044 - root - INFO - step: 8525 loss: 2.5002 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2934 global_avg_mtp_loss: 2.2068 +[titan] 2025-10-05 03:48:33,044 - root - INFO - lr: 4.5485e-05 gnorm: 1.16 [ 5:14:24<19:20:48] +[titan] 2025-10-05 03:48:43,906 - root - INFO - step: 8530 loss: 2.5203 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2955 global_avg_mtp_loss: 2.2249 +[titan] 2025-10-05 03:48:43,906 - root - INFO - lr: 4.5479e-05 gnorm: 1.18 [ 5:14:35<19:20:36] +[titan] 2025-10-05 03:48:54,778 - root - INFO - step: 8535 loss: 2.4900 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:48:54,778 - root - INFO - lr: 4.5474e-05 gnorm: 1.23 [ 5:14:45<19:20:24] +[titan] 2025-10-05 03:49:05,664 - root - INFO - step: 8540 loss: 2.5027 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2084 +[titan] 2025-10-05 03:49:05,664 - root - INFO - lr: 4.5468e-05 gnorm: 1.19 [ 5:14:56<19:20:12] +[titan] 2025-10-05 03:49:16,537 - root - INFO - step: 8545 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2823 global_avg_mtp_loss: 2.1266 +[titan] 2025-10-05 03:49:16,537 - root - INFO - lr: 4.5463e-05 gnorm: 1.19 [ 5:15:07<19:20:00] +[titan] 2025-10-05 03:49:25,284 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:49:27,468 - root - INFO - step: 8550 loss: 2.4984 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2935 global_avg_mtp_loss: 2.2049 +[titan] 2025-10-05 03:49:27,468 - root - INFO - lr: 4.5458e-05 gnorm: 1.21 [ 5:15:18<19:19:49] +[titan] 2025-10-05 03:49:38,338 - root - INFO - step: 8555 loss: 2.4539 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1653 +[titan] 2025-10-05 03:49:38,338 - root - INFO - lr: 4.5452e-05 gnorm: 1.20 [ 5:15:29<19:19:37] +[titan] 2025-10-05 03:49:49,202 - root - INFO - step: 8560 loss: 2.4721 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1822 +[titan] 2025-10-05 03:49:49,202 - root - INFO - lr: 4.5447e-05 gnorm: 1.17 [ 5:15:40<19:19:25] +[titan] 2025-10-05 03:50:00,074 - root - INFO - step: 8565 loss: 2.5405 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2990 global_avg_mtp_loss: 2.2415 +[titan] 2025-10-05 03:50:00,074 - root - INFO - lr: 4.5442e-05 gnorm: 1.15 [ 5:15:51<19:19:14] +[titan] 2025-10-05 03:50:10,978 - root - INFO - step: 8570 loss: 2.4470 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 03:50:10,979 - root - INFO - lr: 4.5436e-05 gnorm: 1.22 [ 5:16:02<19:19:02] +[titan] 2025-10-05 03:50:21,887 - root - INFO - step: 8575 loss: 2.4633 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1735 +[titan] 2025-10-05 03:50:21,887 - root - INFO - lr: 4.5431e-05 gnorm: 1.21 [ 5:16:12<19:18:50] +[titan] 2025-10-05 03:50:32,776 - root - INFO - step: 8580 loss: 2.4711 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1810 +[titan] 2025-10-05 03:50:32,776 - root - INFO - lr: 4.5425e-05 gnorm: 1.18 [ 5:16:23<19:18:39] +[titan] 2025-10-05 03:50:43,667 - root - INFO - step: 8585 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1420 +[titan] 2025-10-05 03:50:43,667 - root - INFO - lr: 4.5420e-05 gnorm: 1.22 [ 5:16:34<19:18:27] +[titan] 2025-10-05 03:50:54,557 - root - INFO - step: 8590 loss: 2.5385 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2985 global_avg_mtp_loss: 2.2399 +[titan] 2025-10-05 03:50:54,558 - root - INFO - lr: 4.5415e-05 gnorm: 1.18 [ 5:16:45<19:18:15] +[titan] 2025-10-05 03:51:05,424 - root - INFO - step: 8595 loss: 2.4767 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 03:51:05,425 - root - INFO - lr: 4.5409e-05 gnorm: 1.16 [ 5:16:56<19:18:03] +[titan] 2025-10-05 03:51:14,103 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:51:16,290 - root - INFO - step: 8600 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:51:16,290 - root - INFO - lr: 4.5404e-05 gnorm: 1.14 [ 5:17:07<19:17:52] +[titan] 2025-10-05 03:51:27,250 - root - INFO - step: 8605 loss: 2.5339 memory: 118.84GiB(85.28%) tps: 29,897 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2987 global_avg_mtp_loss: 2.2352 +[titan] 2025-10-05 03:51:27,251 - root - INFO - lr: 4.5398e-05 gnorm: 1.15 [ 5:17:18<19:17:40] +[titan] 2025-10-05 03:51:38,134 - root - INFO - step: 8610 loss: 2.4373 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1515 +[titan] 2025-10-05 03:51:38,134 - root - INFO - lr: 4.5393e-05 gnorm: 1.14 [ 5:17:29<19:17:28] +[titan] 2025-10-05 03:51:49,035 - root - INFO - step: 8615 loss: 2.5154 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2980 global_avg_mtp_loss: 2.2174 +[titan] 2025-10-05 03:51:49,036 - root - INFO - lr: 4.5388e-05 gnorm: 1.21 [ 5:17:40<19:17:17] +[titan] 2025-10-05 03:51:59,908 - root - INFO - step: 8620 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1329 +[titan] 2025-10-05 03:51:59,909 - root - INFO - lr: 4.5382e-05 gnorm: 1.19 [ 5:17:51<19:17:05] +[titan] 2025-10-05 03:52:10,800 - root - INFO - step: 8625 loss: 2.4772 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1858 +[titan] 2025-10-05 03:52:10,800 - root - INFO - lr: 4.5377e-05 gnorm: 1.19 [ 5:18:01<19:16:53] +[titan] 2025-10-05 03:52:21,724 - root - INFO - step: 8630 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1254 +[titan] 2025-10-05 03:52:21,724 - root - INFO - lr: 4.5371e-05 gnorm: 1.17 [ 5:18:12<19:16:42] +[titan] 2025-10-05 03:52:32,629 - root - INFO - step: 8635 loss: 2.4666 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2898 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 03:52:32,629 - root - INFO - lr: 4.5366e-05 gnorm: 1.18 [ 5:18:23<19:16:30] +[titan] 2025-10-05 03:52:43,516 - root - INFO - step: 8640 loss: 2.5035 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2945 global_avg_mtp_loss: 2.2090 +[titan] 2025-10-05 03:52:43,516 - root - INFO - lr: 4.5360e-05 gnorm: 1.16 [ 5:18:34<19:16:18] +[titan] 2025-10-05 03:52:54,413 - root - INFO - step: 8645 loss: 2.4079 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1260 +[titan] 2025-10-05 03:52:54,414 - root - INFO - lr: 4.5355e-05 gnorm: 1.18 [ 5:18:45<19:16:07] +[titan] 2025-10-05 03:53:03,097 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:53:05,278 - root - INFO - step: 8650 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1441 +[titan] 2025-10-05 03:53:05,278 - root - INFO - lr: 4.5350e-05 gnorm: 1.17 [ 5:18:56<19:15:55] +[titan] 2025-10-05 03:53:16,166 - root - INFO - step: 8655 loss: 2.4949 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2017 +[titan] 2025-10-05 03:53:16,166 - root - INFO - lr: 4.5344e-05 gnorm: 1.17 [ 5:19:07<19:15:43] +[titan] 2025-10-05 03:53:27,098 - root - INFO - step: 8660 loss: 2.4590 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1706 +[titan] 2025-10-05 03:53:27,098 - root - INFO - lr: 4.5339e-05 gnorm: 1.20 [ 5:19:18<19:15:32] +[titan] 2025-10-05 03:53:38,012 - root - INFO - step: 8665 loss: 2.5151 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2199 +[titan] 2025-10-05 03:53:38,012 - root - INFO - lr: 4.5333e-05 gnorm: 1.19 [ 5:19:29<19:15:20] +[titan] 2025-10-05 03:53:48,872 - root - INFO - step: 8670 loss: 2.4344 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 03:53:48,872 - root - INFO - lr: 4.5328e-05 gnorm: 1.15 [ 5:19:39<19:15:08] +[titan] 2025-10-05 03:53:59,744 - root - INFO - step: 8675 loss: 2.4632 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2891 global_avg_mtp_loss: 2.1742 +[titan] 2025-10-05 03:53:59,744 - root - INFO - lr: 4.5322e-05 gnorm: 1.17 [ 5:19:50<19:14:57] +[titan] 2025-10-05 03:54:10,610 - root - INFO - step: 8680 loss: 2.4556 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 03:54:10,611 - root - INFO - lr: 4.5317e-05 gnorm: 1.17 [ 5:20:01<19:14:45] +[titan] 2025-10-05 03:54:21,508 - root - INFO - step: 8685 loss: 2.4742 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1837 +[titan] 2025-10-05 03:54:21,508 - root - INFO - lr: 4.5311e-05 gnorm: 1.20 [ 5:20:12<19:14:33] +[titan] 2025-10-05 03:54:32,411 - root - INFO - step: 8690 loss: 2.5303 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2972 global_avg_mtp_loss: 2.2331 +[titan] 2025-10-05 03:54:32,411 - root - INFO - lr: 4.5306e-05 gnorm: 1.22 [ 5:20:23<19:14:22] +[titan] 2025-10-05 03:54:43,289 - root - INFO - step: 8695 loss: 2.4873 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1952 +[titan] 2025-10-05 03:54:43,290 - root - INFO - lr: 4.5301e-05 gnorm: 1.21 [ 5:20:34<19:14:10] +[titan] 2025-10-05 03:54:52,024 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:54:54,213 - root - INFO - step: 8700 loss: 2.4737 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1831 +[titan] 2025-10-05 03:54:54,213 - root - INFO - lr: 4.5295e-05 gnorm: 1.19 [ 5:20:45<19:13:58] +[titan] 2025-10-05 03:55:03,157 - root - INFO - Dumping profiler traces at step 8704 +[titan] 2025-10-05 03:55:03,195 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 03:55:05,378 - root - INFO - step: 8705 loss: 2.5158 memory: 118.84GiB(85.28%) tps: 29,348 tflops: 407.16 mfu: 41.17% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:55:05,378 - root - INFO - lr: 4.5290e-05 gnorm: 1.17 [ 5:20:56<19:13:48] +[titan] 2025-10-05 03:55:16,259 - root - INFO - step: 8710 loss: 2.3993 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1181 +[titan] 2025-10-05 03:55:16,259 - root - INFO - lr: 4.5284e-05 gnorm: 1.16 [ 5:21:07<19:13:36] +[titan] 2025-10-05 03:55:27,179 - root - INFO - step: 8715 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1461 +[titan] 2025-10-05 03:55:27,179 - root - INFO - lr: 4.5279e-05 gnorm: 1.17 [ 5:21:18<19:13:24] +[titan] 2025-10-05 03:55:38,073 - root - INFO - step: 8720 loss: 2.3963 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 03:55:38,073 - root - INFO - lr: 4.5273e-05 gnorm: 1.24 [ 5:21:29<19:13:13] +[titan] 2025-10-05 03:55:48,962 - root - INFO - step: 8725 loss: 2.4482 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 03:55:48,962 - root - INFO - lr: 4.5268e-05 gnorm: 1.19 [ 5:21:40<19:13:01] +[titan] 2025-10-05 03:55:59,898 - root - INFO - step: 8730 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2921 global_avg_mtp_loss: 2.1906 +[titan] 2025-10-05 03:55:59,898 - root - INFO - lr: 4.5262e-05 gnorm: 1.18 [ 5:21:50<19:12:50] +[titan] 2025-10-05 03:56:10,791 - root - INFO - step: 8735 loss: 2.5157 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2201 +[titan] 2025-10-05 03:56:10,791 - root - INFO - lr: 4.5257e-05 gnorm: 1.13 [ 5:22:01<19:12:38] +[titan] 2025-10-05 03:56:21,690 - root - INFO - step: 8740 loss: 2.5138 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:56:21,690 - root - INFO - lr: 4.5251e-05 gnorm: 1.17 [ 5:22:12<19:12:26] +[titan] 2025-10-05 03:56:32,598 - root - INFO - step: 8745 loss: 2.5112 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2943 global_avg_mtp_loss: 2.2169 +[titan] 2025-10-05 03:56:32,598 - root - INFO - lr: 4.5246e-05 gnorm: 1.21 [ 5:22:23<19:12:15] +[titan] 2025-10-05 03:56:41,299 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:56:43,483 - root - INFO - step: 8750 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 03:56:43,483 - root - INFO - lr: 4.5240e-05 gnorm: 1.23 [ 5:22:34<19:12:03] +[titan] 2025-10-05 03:56:54,343 - root - INFO - step: 8755 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1820 +[titan] 2025-10-05 03:56:54,343 - root - INFO - lr: 4.5235e-05 gnorm: 1.20 [ 5:22:45<19:11:51] +[titan] 2025-10-05 03:57:05,209 - root - INFO - step: 8760 loss: 2.4902 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1970 +[titan] 2025-10-05 03:57:05,210 - root - INFO - lr: 4.5229e-05 gnorm: 1.14 [ 5:22:56<19:11:39] +[titan] 2025-10-05 03:57:16,152 - root - INFO - step: 8765 loss: 2.5128 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2949 global_avg_mtp_loss: 2.2178 +[titan] 2025-10-05 03:57:16,152 - root - INFO - lr: 4.5224e-05 gnorm: 1.17 [ 5:23:07<19:11:28] +[titan] 2025-10-05 03:57:27,083 - root - INFO - step: 8770 loss: 2.4066 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 03:57:27,083 - root - INFO - lr: 4.5218e-05 gnorm: 1.11 [ 5:23:18<19:11:16] +[titan] 2025-10-05 03:57:37,931 - root - INFO - step: 8775 loss: 2.4260 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 03:57:37,932 - root - INFO - lr: 4.5213e-05 gnorm: 1.17 [ 5:23:29<19:11:05] +[titan] 2025-10-05 03:57:48,805 - root - INFO - step: 8780 loss: 2.4759 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2913 global_avg_mtp_loss: 2.1847 +[titan] 2025-10-05 03:57:48,805 - root - INFO - lr: 4.5207e-05 gnorm: 1.24 [ 5:23:39<19:10:53] +[titan] 2025-10-05 03:57:59,678 - root - INFO - step: 8785 loss: 2.4875 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 03:57:59,678 - root - INFO - lr: 4.5202e-05 gnorm: 1.16 [ 5:23:50<19:10:41] +[titan] 2025-10-05 03:58:10,559 - root - INFO - step: 8790 loss: 2.4424 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2868 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 03:58:10,559 - root - INFO - lr: 4.5196e-05 gnorm: 1.16 [ 5:24:01<19:10:29] +[titan] 2025-10-05 03:58:21,459 - root - INFO - step: 8795 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1293 +[titan] 2025-10-05 03:58:21,459 - root - INFO - lr: 4.5191e-05 gnorm: 1.13 [ 5:24:12<19:10:18] +[titan] 2025-10-05 03:58:30,178 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 03:58:32,360 - root - INFO - step: 8800 loss: 2.3926 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1119 +[titan] 2025-10-05 03:58:32,360 - root - INFO - lr: 4.5185e-05 gnorm: 1.16 [ 5:24:23<19:10:06] +[titan] 2025-10-05 03:58:43,220 - root - INFO - step: 8805 loss: 2.5057 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2947 global_avg_mtp_loss: 2.2110 +[titan] 2025-10-05 03:58:43,220 - root - INFO - lr: 4.5180e-05 gnorm: 1.16 [ 5:24:34<19:09:54] +[titan] 2025-10-05 03:58:54,092 - root - INFO - step: 8810 loss: 2.4643 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 03:58:54,092 - root - INFO - lr: 4.5174e-05 gnorm: 1.21 [ 5:24:45<19:09:43] +[titan] 2025-10-05 03:59:04,956 - root - INFO - step: 8815 loss: 2.5184 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2956 global_avg_mtp_loss: 2.2227 +[titan] 2025-10-05 03:59:04,956 - root - INFO - lr: 4.5169e-05 gnorm: 1.20 [ 5:24:56<19:09:31] +[titan] 2025-10-05 03:59:15,807 - root - INFO - step: 8820 loss: 2.3921 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 03:59:15,807 - root - INFO - lr: 4.5163e-05 gnorm: 1.12 [ 5:25:06<19:09:19] +[titan] 2025-10-05 03:59:26,817 - root - INFO - step: 8825 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 29,762 tflops: 412.90 mfu: 41.75% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1453 +[titan] 2025-10-05 03:59:26,817 - root - INFO - lr: 4.5158e-05 gnorm: 1.14 [ 5:25:17<19:09:08] +[titan] 2025-10-05 03:59:37,700 - root - INFO - step: 8830 loss: 2.4161 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 03:59:37,700 - root - INFO - lr: 4.5152e-05 gnorm: 1.17 [ 5:25:28<19:08:56] +[titan] 2025-10-05 03:59:48,610 - root - INFO - step: 8835 loss: 2.4903 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.1974 +[titan] 2025-10-05 03:59:48,610 - root - INFO - lr: 4.5147e-05 gnorm: 1.20 [ 5:25:39<19:08:45] +[titan] 2025-10-05 03:59:59,499 - root - INFO - step: 8840 loss: 2.4555 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 03:59:59,499 - root - INFO - lr: 4.5141e-05 gnorm: 1.16 [ 5:25:50<19:08:33] +[titan] 2025-10-05 04:00:10,376 - root - INFO - step: 8845 loss: 2.5058 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2957 global_avg_mtp_loss: 2.2101 +[titan] 2025-10-05 04:00:10,376 - root - INFO - lr: 4.5136e-05 gnorm: 1.15 [ 5:26:01<19:08:21] +[titan] 2025-10-05 04:00:19,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:00:21,274 - root - INFO - step: 8850 loss: 2.4134 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:00:21,274 - root - INFO - lr: 4.5130e-05 gnorm: 1.16 [ 5:26:12<19:08:10] +[titan] 2025-10-05 04:00:32,174 - root - INFO - step: 8855 loss: 2.3939 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1130 +[titan] 2025-10-05 04:00:32,174 - root - INFO - lr: 4.5124e-05 gnorm: 1.14 [ 5:26:23<19:07:58] +[titan] 2025-10-05 04:00:43,105 - root - INFO - step: 8860 loss: 2.4901 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.1965 +[titan] 2025-10-05 04:00:43,105 - root - INFO - lr: 4.5119e-05 gnorm: 1.13 [ 5:26:34<19:07:47] +[titan] 2025-10-05 04:00:53,982 - root - INFO - step: 8865 loss: 2.4318 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1455 +[titan] 2025-10-05 04:00:53,982 - root - INFO - lr: 4.5113e-05 gnorm: 1.20 [ 5:26:45<19:07:35] +[titan] 2025-10-05 04:01:04,884 - root - INFO - step: 8870 loss: 2.4552 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2883 global_avg_mtp_loss: 2.1669 +[titan] 2025-10-05 04:01:04,884 - root - INFO - lr: 4.5108e-05 gnorm: 1.17 [ 5:26:55<19:07:23] +[titan] 2025-10-05 04:01:15,755 - root - INFO - step: 8875 loss: 2.4361 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2856 global_avg_mtp_loss: 2.1505 +[titan] 2025-10-05 04:01:15,755 - root - INFO - lr: 4.5102e-05 gnorm: 1.11 [ 5:27:06<19:07:12] +[titan] 2025-10-05 04:01:26,620 - root - INFO - step: 8880 loss: 2.4652 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1768 +[titan] 2025-10-05 04:01:26,621 - root - INFO - lr: 4.5097e-05 gnorm: 1.18 [ 5:27:17<19:07:00] +[titan] 2025-10-05 04:01:37,500 - root - INFO - step: 8885 loss: 2.4777 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2915 global_avg_mtp_loss: 2.1862 +[titan] 2025-10-05 04:01:37,500 - root - INFO - lr: 4.5091e-05 gnorm: 1.16 [ 5:27:28<19:06:48] +[titan] 2025-10-05 04:01:48,415 - root - INFO - step: 8890 loss: 2.4058 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:01:48,416 - root - INFO - lr: 4.5086e-05 gnorm: 1.17 [ 5:27:39<19:06:37] +[titan] 2025-10-05 04:01:59,279 - root - INFO - step: 8895 loss: 2.4655 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1759 +[titan] 2025-10-05 04:01:59,280 - root - INFO - lr: 4.5080e-05 gnorm: 1.19 [ 5:27:50<19:06:25] +[titan] 2025-10-05 04:02:07,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:02:10,126 - root - INFO - step: 8900 loss: 2.4494 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:02:10,126 - root - INFO - lr: 4.5074e-05 gnorm: 1.24 [ 5:28:01<19:06:13] +[titan] 2025-10-05 04:02:20,976 - root - INFO - step: 8905 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 04:02:20,976 - root - INFO - lr: 4.5069e-05 gnorm: 1.18 [ 5:28:12<19:06:01] +[titan] 2025-10-05 04:02:31,857 - root - INFO - step: 8910 loss: 2.4530 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1646 +[titan] 2025-10-05 04:02:31,857 - root - INFO - lr: 4.5063e-05 gnorm: 1.18 [ 5:28:22<19:05:50] +[titan] 2025-10-05 04:02:42,714 - root - INFO - step: 8915 loss: 2.4292 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:02:42,714 - root - INFO - lr: 4.5058e-05 gnorm: 1.18 [ 5:28:33<19:05:38] +[titan] 2025-10-05 04:02:53,586 - root - INFO - step: 8920 loss: 2.4665 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1760 +[titan] 2025-10-05 04:02:53,586 - root - INFO - lr: 4.5052e-05 gnorm: 1.14 [ 5:28:44<19:05:26] +[titan] 2025-10-05 04:03:04,511 - root - INFO - step: 8925 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1484 +[titan] 2025-10-05 04:03:04,511 - root - INFO - lr: 4.5047e-05 gnorm: 1.20 [ 5:28:55<19:05:15] +[titan] 2025-10-05 04:03:15,417 - root - INFO - step: 8930 loss: 2.5325 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2975 global_avg_mtp_loss: 2.2349 +[titan] 2025-10-05 04:03:15,417 - root - INFO - lr: 4.5041e-05 gnorm: 1.18 [ 5:29:06<19:05:03] +[titan] 2025-10-05 04:03:26,302 - root - INFO - step: 8935 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:03:26,303 - root - INFO - lr: 4.5035e-05 gnorm: 1.21 [ 5:29:17<19:04:51] +[titan] 2025-10-05 04:03:37,172 - root - INFO - step: 8940 loss: 2.6656 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.3239 global_avg_mtp_loss: 2.3417 +[titan] 2025-10-05 04:03:37,172 - root - INFO - lr: 4.5030e-05 gnorm: 1.16 [ 5:29:28<19:04:40] +[titan] 2025-10-05 04:03:48,057 - root - INFO - step: 8945 loss: 2.4401 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1543 +[titan] 2025-10-05 04:03:48,057 - root - INFO - lr: 4.5024e-05 gnorm: 1.12 [ 5:29:39<19:04:28] +[titan] 2025-10-05 04:03:56,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:03:58,919 - root - INFO - step: 8950 loss: 2.4061 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1253 +[titan] 2025-10-05 04:03:58,919 - root - INFO - lr: 4.5019e-05 gnorm: 1.11 [ 5:29:49<19:04:16] +[titan] 2025-10-05 04:04:09,819 - root - INFO - step: 8955 loss: 2.4957 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2021 +[titan] 2025-10-05 04:04:09,820 - root - INFO - lr: 4.5013e-05 gnorm: 1.12 [ 5:30:00<19:04:05] +[titan] 2025-10-05 04:04:20,693 - root - INFO - step: 8960 loss: 2.4047 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1227 +[titan] 2025-10-05 04:04:20,693 - root - INFO - lr: 4.5007e-05 gnorm: 1.15 [ 5:30:11<19:03:53] +[titan] 2025-10-05 04:04:31,580 - root - INFO - step: 8965 loss: 2.4637 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2896 global_avg_mtp_loss: 2.1740 +[titan] 2025-10-05 04:04:31,580 - root - INFO - lr: 4.5002e-05 gnorm: 1.15 [ 5:30:22<19:03:41] +[titan] 2025-10-05 04:04:42,434 - root - INFO - step: 8970 loss: 2.4642 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1738 +[titan] 2025-10-05 04:04:42,434 - root - INFO - lr: 4.4996e-05 gnorm: 1.19 [ 5:30:33<19:03:30] +[titan] 2025-10-05 04:04:53,298 - root - INFO - step: 8975 loss: 2.4993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.2056 +[titan] 2025-10-05 04:04:53,298 - root - INFO - lr: 4.4991e-05 gnorm: 1.20 [ 5:30:44<19:03:18] +[titan] 2025-10-05 04:05:04,159 - root - INFO - step: 8980 loss: 2.4094 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1265 +[titan] 2025-10-05 04:05:04,159 - root - INFO - lr: 4.4985e-05 gnorm: 1.14 [ 5:30:55<19:03:06] +[titan] 2025-10-05 04:05:15,056 - root - INFO - step: 8985 loss: 2.4593 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1707 +[titan] 2025-10-05 04:05:15,057 - root - INFO - lr: 4.4979e-05 gnorm: 1.20 [ 5:31:06<19:02:55] +[titan] 2025-10-05 04:05:25,930 - root - INFO - step: 8990 loss: 2.3911 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 04:05:25,930 - root - INFO - lr: 4.4974e-05 gnorm: 1.15 [ 5:31:16<19:02:43] +[titan] 2025-10-05 04:05:36,797 - root - INFO - step: 8995 loss: 2.4428 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1566 +[titan] 2025-10-05 04:05:36,798 - root - INFO - lr: 4.4968e-05 gnorm: 1.17 [ 5:31:27<19:02:31] +[titan] 2025-10-05 04:05:45,469 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:05:47,651 - root - INFO - step: 9000 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0801 +[titan] 2025-10-05 04:05:47,652 - root - INFO - lr: 4.4962e-05 gnorm: 1.23 [ 5:31:38<19:02:20] +[titan] 2025-10-05 04:05:58,519 - root - INFO - step: 9005 loss: 2.4431 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1560 +[titan] 2025-10-05 04:05:58,519 - root - INFO - lr: 4.4957e-05 gnorm: 1.17 [ 5:31:49<19:02:08] +[titan] 2025-10-05 04:06:09,392 - root - INFO - step: 9010 loss: 2.4584 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2887 global_avg_mtp_loss: 2.1697 +[titan] 2025-10-05 04:06:09,392 - root - INFO - lr: 4.4951e-05 gnorm: 1.17 [ 5:32:00<19:01:56] +[titan] 2025-10-05 04:06:20,257 - root - INFO - step: 9015 loss: 2.4693 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2899 global_avg_mtp_loss: 2.1794 +[titan] 2025-10-05 04:06:20,257 - root - INFO - lr: 4.4946e-05 gnorm: 1.13 [ 5:32:11<19:01:44] +[titan] 2025-10-05 04:06:31,158 - root - INFO - step: 9020 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1487 +[titan] 2025-10-05 04:06:31,158 - root - INFO - lr: 4.4940e-05 gnorm: 1.15 [ 5:32:22<19:01:33] +[titan] 2025-10-05 04:06:42,018 - root - INFO - step: 9025 loss: 2.3968 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:06:42,018 - root - INFO - lr: 4.4934e-05 gnorm: 1.11 [ 5:32:33<19:01:21] +[titan] 2025-10-05 04:06:52,886 - root - INFO - step: 9030 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:06:52,886 - root - INFO - lr: 4.4929e-05 gnorm: 1.08 [ 5:32:43<19:01:09] +[titan] 2025-10-05 04:07:03,747 - root - INFO - step: 9035 loss: 2.4647 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1755 +[titan] 2025-10-05 04:07:03,747 - root - INFO - lr: 4.4923e-05 gnorm: 1.16 [ 5:32:54<19:00:58] +[titan] 2025-10-05 04:07:14,610 - root - INFO - step: 9040 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1859 +[titan] 2025-10-05 04:07:14,610 - root - INFO - lr: 4.4917e-05 gnorm: 1.17 [ 5:33:05<19:00:46] +[titan] 2025-10-05 04:07:25,476 - root - INFO - step: 9045 loss: 2.4520 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2879 global_avg_mtp_loss: 2.1641 +[titan] 2025-10-05 04:07:25,476 - root - INFO - lr: 4.4912e-05 gnorm: 1.19 [ 5:33:16<19:00:34] +[titan] 2025-10-05 04:07:34,201 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:07:36,379 - root - INFO - step: 9050 loss: 2.4771 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:07:36,379 - root - INFO - lr: 4.4906e-05 gnorm: 1.19 [ 5:33:27<19:00:23] +[titan] 2025-10-05 04:07:47,258 - root - INFO - step: 9055 loss: 2.4168 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1339 +[titan] 2025-10-05 04:07:47,258 - root - INFO - lr: 4.4900e-05 gnorm: 1.14 [ 5:33:38<19:00:11] +[titan] 2025-10-05 04:07:58,123 - root - INFO - step: 9060 loss: 2.4821 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:07:58,123 - root - INFO - lr: 4.4895e-05 gnorm: 1.16 [ 5:33:49<18:59:59] +[titan] 2025-10-05 04:08:09,002 - root - INFO - step: 9065 loss: 2.4858 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2911 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:08:09,003 - root - INFO - lr: 4.4889e-05 gnorm: 1.18 [ 5:34:00<18:59:48] +[titan] 2025-10-05 04:08:19,858 - root - INFO - step: 9070 loss: 2.4766 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2901 global_avg_mtp_loss: 2.1865 +[titan] 2025-10-05 04:08:19,858 - root - INFO - lr: 4.4883e-05 gnorm: 1.18 [ 5:34:10<18:59:36] +[titan] 2025-10-05 04:08:30,739 - root - INFO - step: 9075 loss: 2.4338 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1483 +[titan] 2025-10-05 04:08:30,739 - root - INFO - lr: 4.4878e-05 gnorm: 1.16 [ 5:34:21<18:59:24] +[titan] 2025-10-05 04:08:41,605 - root - INFO - step: 9080 loss: 2.3786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 04:08:41,605 - root - INFO - lr: 4.4872e-05 gnorm: 1.24 [ 5:34:32<18:59:13] +[titan] 2025-10-05 04:08:52,482 - root - INFO - step: 9085 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.2055 +[titan] 2025-10-05 04:08:52,482 - root - INFO - lr: 4.4866e-05 gnorm: 1.20 [ 5:34:43<18:59:01] +[titan] 2025-10-05 04:09:03,324 - root - INFO - step: 9090 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1628 +[titan] 2025-10-05 04:09:03,325 - root - INFO - lr: 4.4861e-05 gnorm: 1.24 [ 5:34:54<18:58:49] +[titan] 2025-10-05 04:09:14,169 - root - INFO - step: 9095 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1886 +[titan] 2025-10-05 04:09:14,169 - root - INFO - lr: 4.4855e-05 gnorm: 1.21 [ 5:35:05<18:58:37] +[titan] 2025-10-05 04:09:22,830 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:09:25,022 - root - INFO - step: 9100 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:09:25,022 - root - INFO - lr: 4.4849e-05 gnorm: 1.19 [ 5:35:16<18:58:26] +[titan] 2025-10-05 04:09:35,891 - root - INFO - step: 9105 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1300 +[titan] 2025-10-05 04:09:35,891 - root - INFO - lr: 4.4844e-05 gnorm: 1.18 [ 5:35:26<18:58:14] +[titan] 2025-10-05 04:09:46,754 - root - INFO - step: 9110 loss: 2.3843 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1029 +[titan] 2025-10-05 04:09:46,754 - root - INFO - lr: 4.4838e-05 gnorm: 1.28 [ 5:35:37<18:58:02] +[titan] 2025-10-05 04:09:57,624 - root - INFO - step: 9115 loss: 2.4036 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1228 +[titan] 2025-10-05 04:09:57,624 - root - INFO - lr: 4.4832e-05 gnorm: 1.19 [ 5:35:48<18:57:51] +[titan] 2025-10-05 04:10:08,470 - root - INFO - step: 9120 loss: 2.4158 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1325 +[titan] 2025-10-05 04:10:08,470 - root - INFO - lr: 4.4827e-05 gnorm: 1.14 [ 5:35:59<18:57:39] +[titan] 2025-10-05 04:10:19,323 - root - INFO - step: 9125 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1509 +[titan] 2025-10-05 04:10:19,323 - root - INFO - lr: 4.4821e-05 gnorm: 1.19 [ 5:36:10<18:57:27] +[titan] 2025-10-05 04:10:30,178 - root - INFO - step: 9130 loss: 2.4437 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 04:10:30,178 - root - INFO - lr: 4.4815e-05 gnorm: 1.22 [ 5:36:21<18:57:15] +[titan] 2025-10-05 04:10:41,058 - root - INFO - step: 9135 loss: 2.4379 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1518 +[titan] 2025-10-05 04:10:41,058 - root - INFO - lr: 4.4809e-05 gnorm: 1.15 [ 5:36:32<18:57:04] +[titan] 2025-10-05 04:10:51,913 - root - INFO - step: 9140 loss: 2.4780 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1863 +[titan] 2025-10-05 04:10:51,913 - root - INFO - lr: 4.4804e-05 gnorm: 1.14 [ 5:36:42<18:56:52] +[titan] 2025-10-05 04:11:02,801 - root - INFO - step: 9145 loss: 2.4160 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1331 +[titan] 2025-10-05 04:11:02,801 - root - INFO - lr: 4.4798e-05 gnorm: 1.18 [ 5:36:53<18:56:41] +[titan] 2025-10-05 04:11:11,463 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:11:13,643 - root - INFO - step: 9150 loss: 2.4156 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1327 +[titan] 2025-10-05 04:11:13,643 - root - INFO - lr: 4.4792e-05 gnorm: 1.15 [ 5:37:04<18:56:29] +[titan] 2025-10-05 04:11:24,500 - root - INFO - step: 9155 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1789 +[titan] 2025-10-05 04:11:24,500 - root - INFO - lr: 4.4787e-05 gnorm: 1.16 [ 5:37:15<18:56:17] +[titan] 2025-10-05 04:11:35,333 - root - INFO - step: 9160 loss: 2.4173 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1345 +[titan] 2025-10-05 04:11:35,333 - root - INFO - lr: 4.4781e-05 gnorm: 1.15 [ 5:37:26<18:56:05] +[titan] 2025-10-05 04:11:46,194 - root - INFO - step: 9165 loss: 2.4180 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1344 +[titan] 2025-10-05 04:11:46,195 - root - INFO - lr: 4.4775e-05 gnorm: 1.13 [ 5:37:37<18:55:54] +[titan] 2025-10-05 04:11:57,056 - root - INFO - step: 9170 loss: 2.3989 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 04:11:57,057 - root - INFO - lr: 4.4769e-05 gnorm: 1.15 [ 5:37:48<18:55:42] +[titan] 2025-10-05 04:12:07,928 - root - INFO - step: 9175 loss: 2.4640 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1744 +[titan] 2025-10-05 04:12:07,928 - root - INFO - lr: 4.4764e-05 gnorm: 1.11 [ 5:37:58<18:55:30] +[titan] 2025-10-05 04:12:18,847 - root - INFO - step: 9180 loss: 2.5568 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.3018 global_avg_mtp_loss: 2.2549 +[titan] 2025-10-05 04:12:18,847 - root - INFO - lr: 4.4758e-05 gnorm: 1.20 [ 5:38:09<18:55:19] +[titan] 2025-10-05 04:12:29,719 - root - INFO - step: 9185 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1545 +[titan] 2025-10-05 04:12:29,719 - root - INFO - lr: 4.4752e-05 gnorm: 1.16 [ 5:38:20<18:55:07] +[titan] 2025-10-05 04:12:40,611 - root - INFO - step: 9190 loss: 2.3798 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.1027 +[titan] 2025-10-05 04:12:40,611 - root - INFO - lr: 4.4747e-05 gnorm: 1.15 [ 5:38:31<18:54:55] +[titan] 2025-10-05 04:12:51,477 - root - INFO - step: 9195 loss: 2.4513 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1621 +[titan] 2025-10-05 04:12:51,477 - root - INFO - lr: 4.4741e-05 gnorm: 1.15 [ 5:38:42<18:54:44] +[titan] 2025-10-05 04:13:00,167 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:13:02,353 - root - INFO - step: 9200 loss: 2.4374 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1525 +[titan] 2025-10-05 04:13:02,353 - root - INFO - lr: 4.4735e-05 gnorm: 1.20 [ 5:38:53<18:54:32] +[titan] 2025-10-05 04:13:13,230 - root - INFO - step: 9205 loss: 2.4854 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1940 +[titan] 2025-10-05 04:13:13,230 - root - INFO - lr: 4.4729e-05 gnorm: 1.22 [ 5:39:04<18:54:21] +[titan] 2025-10-05 04:13:24,132 - root - INFO - step: 9210 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:13:24,132 - root - INFO - lr: 4.4724e-05 gnorm: 1.19 [ 5:39:15<18:54:09] +[titan] 2025-10-05 04:13:35,087 - root - INFO - step: 9215 loss: 2.4851 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2920 global_avg_mtp_loss: 2.1930 +[titan] 2025-10-05 04:13:35,087 - root - INFO - lr: 4.4718e-05 gnorm: 1.15 [ 5:39:26<18:53:58] +[titan] 2025-10-05 04:13:37,448 - root - INFO - Dumping profiler traces at step 9216 +[titan] 2025-10-05 04:13:37,485 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:13:46,201 - root - INFO - step: 9220 loss: 2.5001 memory: 118.84GiB(85.28%) tps: 29,486 tflops: 409.07 mfu: 41.36% global_avg_ntp_loss: 0.2928 global_avg_mtp_loss: 2.2073 +[titan] 2025-10-05 04:13:46,201 - root - INFO - lr: 4.4712e-05 gnorm: 1.17 [ 5:39:37<18:53:47] +[titan] 2025-10-05 04:13:57,080 - root - INFO - step: 9225 loss: 2.3856 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1071 +[titan] 2025-10-05 04:13:57,080 - root - INFO - lr: 4.4706e-05 gnorm: 1.15 [ 5:39:48<18:53:35] +[titan] 2025-10-05 04:14:07,953 - root - INFO - step: 9230 loss: 2.4302 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1452 +[titan] 2025-10-05 04:14:07,953 - root - INFO - lr: 4.4701e-05 gnorm: 1.15 [ 5:39:58<18:53:24] +[titan] 2025-10-05 04:14:18,819 - root - INFO - step: 9235 loss: 2.4502 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1635 +[titan] 2025-10-05 04:14:18,819 - root - INFO - lr: 4.4695e-05 gnorm: 1.22 [ 5:40:09<18:53:12] +[titan] 2025-10-05 04:14:29,678 - root - INFO - step: 9240 loss: 2.4452 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2869 global_avg_mtp_loss: 2.1584 +[titan] 2025-10-05 04:14:29,678 - root - INFO - lr: 4.4689e-05 gnorm: 1.17 [ 5:40:20<18:53:00] +[titan] 2025-10-05 04:14:40,618 - root - INFO - step: 9245 loss: 2.4345 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1494 +[titan] 2025-10-05 04:14:40,618 - root - INFO - lr: 4.4683e-05 gnorm: 1.14 [ 5:40:31<18:52:49] +[titan] 2025-10-05 04:14:49,293 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:14:51,484 - root - INFO - step: 9250 loss: 2.5104 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2952 global_avg_mtp_loss: 2.2152 +[titan] 2025-10-05 04:14:51,484 - root - INFO - lr: 4.4678e-05 gnorm: 1.18 [ 5:40:42<18:52:37] +[titan] 2025-10-05 04:15:02,363 - root - INFO - step: 9255 loss: 2.4125 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1304 +[titan] 2025-10-05 04:15:02,363 - root - INFO - lr: 4.4672e-05 gnorm: 1.16 [ 5:40:53<18:52:25] +[titan] 2025-10-05 04:15:13,235 - root - INFO - step: 9260 loss: 2.4511 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1623 +[titan] 2025-10-05 04:15:13,236 - root - INFO - lr: 4.4666e-05 gnorm: 1.17 [ 5:41:04<18:52:14] +[titan] 2025-10-05 04:15:24,134 - root - INFO - step: 9265 loss: 2.5208 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2960 global_avg_mtp_loss: 2.2247 +[titan] 2025-10-05 04:15:24,134 - root - INFO - lr: 4.4660e-05 gnorm: 1.12 [ 5:41:15<18:52:02] +[titan] 2025-10-05 04:15:35,014 - root - INFO - step: 9270 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1064 +[titan] 2025-10-05 04:15:35,015 - root - INFO - lr: 4.4655e-05 gnorm: 1.14 [ 5:41:26<18:51:51] +[titan] 2025-10-05 04:15:45,940 - root - INFO - step: 9275 loss: 2.5096 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2942 global_avg_mtp_loss: 2.2155 +[titan] 2025-10-05 04:15:45,940 - root - INFO - lr: 4.4649e-05 gnorm: 3.57 [ 5:41:36<18:51:39] +[titan] 2025-10-05 04:15:56,816 - root - INFO - step: 9280 loss: 2.4602 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1702 +[titan] 2025-10-05 04:15:56,816 - root - INFO - lr: 4.4643e-05 gnorm: 1.17 [ 5:41:47<18:51:28] +[titan] 2025-10-05 04:16:07,687 - root - INFO - step: 9285 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2872 global_avg_mtp_loss: 2.1597 +[titan] 2025-10-05 04:16:07,687 - root - INFO - lr: 4.4637e-05 gnorm: 1.16 [ 5:41:58<18:51:16] +[titan] 2025-10-05 04:16:18,550 - root - INFO - step: 9290 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1896 +[titan] 2025-10-05 04:16:18,550 - root - INFO - lr: 4.4631e-05 gnorm: 1.19 [ 5:42:09<18:51:04] +[titan] 2025-10-05 04:16:29,436 - root - INFO - step: 9295 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1040 +[titan] 2025-10-05 04:16:29,436 - root - INFO - lr: 4.4626e-05 gnorm: 1.23 [ 5:42:20<18:50:53] +[titan] 2025-10-05 04:16:38,131 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:16:40,321 - root - INFO - step: 9300 loss: 2.4653 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2900 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:16:40,321 - root - INFO - lr: 4.4620e-05 gnorm: 1.12 [ 5:42:31<18:50:41] +[titan] 2025-10-05 04:16:51,231 - root - INFO - step: 9305 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:16:51,232 - root - INFO - lr: 4.4614e-05 gnorm: 1.15 [ 5:42:42<18:50:30] +[titan] 2025-10-05 04:17:02,103 - root - INFO - step: 9310 loss: 2.4882 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2916 global_avg_mtp_loss: 2.1966 +[titan] 2025-10-05 04:17:02,103 - root - INFO - lr: 4.4608e-05 gnorm: 1.14 [ 5:42:53<18:50:18] +[titan] 2025-10-05 04:17:13,000 - root - INFO - step: 9315 loss: 2.4906 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2932 global_avg_mtp_loss: 2.1975 +[titan] 2025-10-05 04:17:13,000 - root - INFO - lr: 4.4602e-05 gnorm: 1.19 [ 5:43:04<18:50:06] +[titan] 2025-10-05 04:17:23,889 - root - INFO - step: 9320 loss: 2.4806 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1899 +[titan] 2025-10-05 04:17:23,890 - root - INFO - lr: 4.4597e-05 gnorm: 1.23 [ 5:43:14<18:49:55] +[titan] 2025-10-05 04:17:34,759 - root - INFO - step: 9325 loss: 2.4923 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2926 global_avg_mtp_loss: 2.1997 +[titan] 2025-10-05 04:17:34,759 - root - INFO - lr: 4.4591e-05 gnorm: 1.20 [ 5:43:25<18:49:43] +[titan] 2025-10-05 04:17:45,670 - root - INFO - step: 9330 loss: 2.4730 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1813 +[titan] 2025-10-05 04:17:45,670 - root - INFO - lr: 4.4585e-05 gnorm: 1.15 [ 5:43:36<18:49:32] +[titan] 2025-10-05 04:17:56,531 - root - INFO - step: 9335 loss: 2.5353 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2978 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:17:56,531 - root - INFO - lr: 4.4579e-05 gnorm: 1.15 [ 5:43:47<18:49:20] +[titan] 2025-10-05 04:18:07,423 - root - INFO - step: 9340 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 04:18:07,423 - root - INFO - lr: 4.4573e-05 gnorm: 1.22 [ 5:43:58<18:49:08] +[titan] 2025-10-05 04:18:18,296 - root - INFO - step: 9345 loss: 2.4834 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2917 global_avg_mtp_loss: 2.1917 +[titan] 2025-10-05 04:18:18,296 - root - INFO - lr: 4.4568e-05 gnorm: 1.16 [ 5:44:09<18:48:57] +[titan] 2025-10-05 04:18:27,002 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:18:29,196 - root - INFO - step: 9350 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:18:29,196 - root - INFO - lr: 4.4562e-05 gnorm: 1.12 [ 5:44:20<18:48:45] +[titan] 2025-10-05 04:18:40,056 - root - INFO - step: 9355 loss: 2.4321 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2855 global_avg_mtp_loss: 2.1466 +[titan] 2025-10-05 04:18:40,056 - root - INFO - lr: 4.4556e-05 gnorm: 1.12 [ 5:44:31<18:48:34] +[titan] 2025-10-05 04:18:50,968 - root - INFO - step: 9360 loss: 2.4987 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2927 global_avg_mtp_loss: 2.2060 +[titan] 2025-10-05 04:18:50,968 - root - INFO - lr: 4.4550e-05 gnorm: 1.14 [ 5:44:41<18:48:22] +[titan] 2025-10-05 04:19:01,819 - root - INFO - step: 9365 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1401 +[titan] 2025-10-05 04:19:01,819 - root - INFO - lr: 4.4544e-05 gnorm: 1.14 [ 5:44:52<18:48:10] +[titan] 2025-10-05 04:19:12,717 - root - INFO - step: 9370 loss: 2.5021 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2951 global_avg_mtp_loss: 2.2070 +[titan] 2025-10-05 04:19:12,718 - root - INFO - lr: 4.4538e-05 gnorm: 1.13 [ 5:45:03<18:47:59] +[titan] 2025-10-05 04:19:23,592 - root - INFO - step: 9375 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 04:19:23,592 - root - INFO - lr: 4.4533e-05 gnorm: 1.15 [ 5:45:14<18:47:47] +[titan] 2025-10-05 04:19:34,464 - root - INFO - step: 9380 loss: 2.4564 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1688 +[titan] 2025-10-05 04:19:34,465 - root - INFO - lr: 4.4527e-05 gnorm: 1.21 [ 5:45:25<18:47:36] +[titan] 2025-10-05 04:19:45,394 - root - INFO - step: 9385 loss: 2.4197 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2849 global_avg_mtp_loss: 2.1348 +[titan] 2025-10-05 04:19:45,394 - root - INFO - lr: 4.4521e-05 gnorm: 1.16 [ 5:45:36<18:47:24] +[titan] 2025-10-05 04:19:56,282 - root - INFO - step: 9390 loss: 2.4630 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1753 +[titan] 2025-10-05 04:19:56,282 - root - INFO - lr: 4.4515e-05 gnorm: 1.15 [ 5:45:47<18:47:13] +[titan] 2025-10-05 04:20:07,169 - root - INFO - step: 9395 loss: 2.4327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1475 +[titan] 2025-10-05 04:20:07,169 - root - INFO - lr: 4.4509e-05 gnorm: 1.21 [ 5:45:58<18:47:01] +[titan] 2025-10-05 04:20:15,874 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:20:18,060 - root - INFO - step: 9400 loss: 2.5009 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2930 global_avg_mtp_loss: 2.2079 +[titan] 2025-10-05 04:20:18,061 - root - INFO - lr: 4.4503e-05 gnorm: 1.18 [ 5:46:09<18:46:50] +[titan] 2025-10-05 04:20:28,966 - root - INFO - step: 9405 loss: 2.4314 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1469 +[titan] 2025-10-05 04:20:28,966 - root - INFO - lr: 4.4498e-05 gnorm: 1.14 [ 5:46:19<18:46:38] +[titan] 2025-10-05 04:20:39,882 - root - INFO - step: 9410 loss: 2.4983 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.2050 +[titan] 2025-10-05 04:20:39,882 - root - INFO - lr: 4.4492e-05 gnorm: 1.20 [ 5:46:30<18:46:27] +[titan] 2025-10-05 04:20:50,800 - root - INFO - step: 9415 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:20:50,800 - root - INFO - lr: 4.4486e-05 gnorm: 1.13 [ 5:46:41<18:46:15] +[titan] 2025-10-05 04:21:01,668 - root - INFO - step: 9420 loss: 2.3688 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0904 +[titan] 2025-10-05 04:21:01,668 - root - INFO - lr: 4.4480e-05 gnorm: 1.16 [ 5:46:52<18:46:03] +[titan] 2025-10-05 04:21:12,542 - root - INFO - step: 9425 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 04:21:12,543 - root - INFO - lr: 4.4474e-05 gnorm: 1.16 [ 5:47:03<18:45:52] +[titan] 2025-10-05 04:21:23,412 - root - INFO - step: 9430 loss: 2.4415 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1551 +[titan] 2025-10-05 04:21:23,412 - root - INFO - lr: 4.4468e-05 gnorm: 1.20 [ 5:47:14<18:45:40] +[titan] 2025-10-05 04:21:34,322 - root - INFO - step: 9435 loss: 2.3669 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 04:21:34,322 - root - INFO - lr: 4.4462e-05 gnorm: 1.10 [ 5:47:25<18:45:29] +[titan] 2025-10-05 04:21:45,197 - root - INFO - step: 9440 loss: 2.3883 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1070 +[titan] 2025-10-05 04:21:45,197 - root - INFO - lr: 4.4457e-05 gnorm: 1.17 [ 5:47:36<18:45:17] +[titan] 2025-10-05 04:21:56,142 - root - INFO - step: 9445 loss: 2.4394 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1537 +[titan] 2025-10-05 04:21:56,142 - root - INFO - lr: 4.4451e-05 gnorm: 1.15 [ 5:47:47<18:45:06] +[titan] 2025-10-05 04:22:04,824 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:22:07,011 - root - INFO - step: 9450 loss: 2.4138 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1302 +[titan] 2025-10-05 04:22:07,011 - root - INFO - lr: 4.4445e-05 gnorm: 1.11 [ 5:47:58<18:44:54] +[titan] 2025-10-05 04:22:17,891 - root - INFO - step: 9455 loss: 2.4826 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1916 +[titan] 2025-10-05 04:22:17,891 - root - INFO - lr: 4.4439e-05 gnorm: 1.14 [ 5:48:08<18:44:43] +[titan] 2025-10-05 04:22:28,768 - root - INFO - step: 9460 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 04:22:28,768 - root - INFO - lr: 4.4433e-05 gnorm: 1.12 [ 5:48:19<18:44:31] +[titan] 2025-10-05 04:22:39,662 - root - INFO - step: 9465 loss: 2.4758 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1851 +[titan] 2025-10-05 04:22:39,663 - root - INFO - lr: 4.4427e-05 gnorm: 1.12 [ 5:48:30<18:44:19] +[titan] 2025-10-05 04:22:50,623 - root - INFO - step: 9470 loss: 2.4549 memory: 118.84GiB(85.28%) tps: 29,899 tflops: 414.80 mfu: 41.94% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1672 +[titan] 2025-10-05 04:22:50,623 - root - INFO - lr: 4.4421e-05 gnorm: 1.19 [ 5:48:41<18:44:08] +[titan] 2025-10-05 04:23:01,499 - root - INFO - step: 9475 loss: 2.4290 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1431 +[titan] 2025-10-05 04:23:01,500 - root - INFO - lr: 4.4415e-05 gnorm: 1.12 [ 5:48:52<18:43:56] +[titan] 2025-10-05 04:23:12,360 - root - INFO - step: 9480 loss: 2.4464 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1579 +[titan] 2025-10-05 04:23:12,360 - root - INFO - lr: 4.4410e-05 gnorm: 1.18 [ 5:49:03<18:43:45] +[titan] 2025-10-05 04:23:23,239 - root - INFO - step: 9485 loss: 2.4527 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1643 +[titan] 2025-10-05 04:23:23,239 - root - INFO - lr: 4.4404e-05 gnorm: 1.19 [ 5:49:14<18:43:33] +[titan] 2025-10-05 04:23:34,114 - root - INFO - step: 9490 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:23:34,114 - root - INFO - lr: 4.4398e-05 gnorm: 1.11 [ 5:49:25<18:43:22] +[titan] 2025-10-05 04:23:44,977 - root - INFO - step: 9495 loss: 2.4723 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2904 global_avg_mtp_loss: 2.1819 +[titan] 2025-10-05 04:23:44,977 - root - INFO - lr: 4.4392e-05 gnorm: 1.11 [ 5:49:35<18:43:10] +[titan] 2025-10-05 04:23:53,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:23:55,915 - root - INFO - step: 9500 loss: 2.4279 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1438 +[titan] 2025-10-05 04:23:55,915 - root - INFO - lr: 4.4386e-05 gnorm: 1.12 [ 5:49:46<18:42:59] +[titan] 2025-10-05 04:24:06,759 - root - INFO - step: 9505 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:24:06,759 - root - INFO - lr: 4.4380e-05 gnorm: 1.15 [ 5:49:57<18:42:47] +[titan] 2025-10-05 04:24:17,624 - root - INFO - step: 9510 loss: 2.4001 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1187 +[titan] 2025-10-05 04:24:17,624 - root - INFO - lr: 4.4374e-05 gnorm: 1.13 [ 5:50:08<18:42:35] +[titan] 2025-10-05 04:24:28,498 - root - INFO - step: 9515 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 04:24:28,498 - root - INFO - lr: 4.4368e-05 gnorm: 1.17 [ 5:50:19<18:42:24] +[titan] 2025-10-05 04:24:39,377 - root - INFO - step: 9520 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:24:39,377 - root - INFO - lr: 4.4362e-05 gnorm: 1.13 [ 5:50:30<18:42:12] +[titan] 2025-10-05 04:24:50,308 - root - INFO - step: 9525 loss: 2.3498 memory: 118.84GiB(85.28%) tps: 29,979 tflops: 415.91 mfu: 42.05% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0740 +[titan] 2025-10-05 04:24:50,308 - root - INFO - lr: 4.4357e-05 gnorm: 1.18 [ 5:50:41<18:42:01] +[titan] 2025-10-05 04:25:01,216 - root - INFO - step: 9530 loss: 2.4726 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2905 global_avg_mtp_loss: 2.1821 +[titan] 2025-10-05 04:25:01,216 - root - INFO - lr: 4.4351e-05 gnorm: 1.23 [ 5:50:52<18:41:49] +[titan] 2025-10-05 04:25:12,092 - root - INFO - step: 9535 loss: 2.4240 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1392 +[titan] 2025-10-05 04:25:12,092 - root - INFO - lr: 4.4345e-05 gnorm: 1.17 [ 5:51:03<18:41:38] +[titan] 2025-10-05 04:25:22,993 - root - INFO - step: 9540 loss: 2.4342 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1477 +[titan] 2025-10-05 04:25:22,994 - root - INFO - lr: 4.4339e-05 gnorm: 1.18 [ 5:51:14<18:41:26] +[titan] 2025-10-05 04:25:33,873 - root - INFO - step: 9545 loss: 2.4536 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2889 global_avg_mtp_loss: 2.1647 +[titan] 2025-10-05 04:25:33,873 - root - INFO - lr: 4.4333e-05 gnorm: 1.18 [ 5:51:24<18:41:15] +[titan] 2025-10-05 04:25:42,543 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:25:44,728 - root - INFO - step: 9550 loss: 2.4518 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1638 +[titan] 2025-10-05 04:25:44,728 - root - INFO - lr: 4.4327e-05 gnorm: 1.19 [ 5:51:35<18:41:03] +[titan] 2025-10-05 04:25:55,649 - root - INFO - step: 9555 loss: 2.4091 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 04:25:55,650 - root - INFO - lr: 4.4321e-05 gnorm: 1.19 [ 5:51:46<18:40:51] +[titan] 2025-10-05 04:26:06,497 - root - INFO - step: 9560 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1277 +[titan] 2025-10-05 04:26:06,497 - root - INFO - lr: 4.4315e-05 gnorm: 1.14 [ 5:51:57<18:40:40] +[titan] 2025-10-05 04:26:17,403 - root - INFO - step: 9565 loss: 2.4306 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2842 global_avg_mtp_loss: 2.1464 +[titan] 2025-10-05 04:26:17,403 - root - INFO - lr: 4.4309e-05 gnorm: 1.18 [ 5:52:08<18:40:28] +[titan] 2025-10-05 04:26:28,292 - root - INFO - step: 9570 loss: 2.4323 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1476 +[titan] 2025-10-05 04:26:28,292 - root - INFO - lr: 4.4303e-05 gnorm: 1.17 [ 5:52:19<18:40:17] +[titan] 2025-10-05 04:26:39,137 - root - INFO - step: 9575 loss: 2.4565 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2892 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:26:39,137 - root - INFO - lr: 4.4297e-05 gnorm: 1.17 [ 5:52:30<18:40:05] +[titan] 2025-10-05 04:26:50,002 - root - INFO - step: 9580 loss: 2.5647 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.3010 global_avg_mtp_loss: 2.2636 +[titan] 2025-10-05 04:26:50,002 - root - INFO - lr: 4.4291e-05 gnorm: 1.16 [ 5:52:41<18:39:53] +[titan] 2025-10-05 04:27:00,916 - root - INFO - step: 9585 loss: 2.4249 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 04:27:00,916 - root - INFO - lr: 4.4285e-05 gnorm: 1.16 [ 5:52:51<18:39:42] +[titan] 2025-10-05 04:27:11,772 - root - INFO - step: 9590 loss: 2.5479 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2986 global_avg_mtp_loss: 2.2492 +[titan] 2025-10-05 04:27:11,773 - root - INFO - lr: 4.4279e-05 gnorm: 1.14 [ 5:53:02<18:39:30] +[titan] 2025-10-05 04:27:22,632 - root - INFO - step: 9595 loss: 2.4580 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2888 global_avg_mtp_loss: 2.1692 +[titan] 2025-10-05 04:27:22,632 - root - INFO - lr: 4.4273e-05 gnorm: 1.16 [ 5:53:13<18:39:19] +[titan] 2025-10-05 04:27:31,313 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:27:33,503 - root - INFO - step: 9600 loss: 2.4248 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1404 +[titan] 2025-10-05 04:27:33,503 - root - INFO - lr: 4.4268e-05 gnorm: 1.14 [ 5:53:24<18:39:07] +[titan] 2025-10-05 04:27:44,378 - root - INFO - step: 9605 loss: 2.4209 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2850 global_avg_mtp_loss: 2.1359 +[titan] 2025-10-05 04:27:44,378 - root - INFO - lr: 4.4262e-05 gnorm: 1.14 [ 5:53:35<18:38:56] +[titan] 2025-10-05 04:27:55,281 - root - INFO - step: 9610 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1495 +[titan] 2025-10-05 04:27:55,281 - root - INFO - lr: 4.4256e-05 gnorm: 1.27 [ 5:53:46<18:38:44] +[titan] 2025-10-05 04:28:06,144 - root - INFO - step: 9615 loss: 2.4254 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1414 +[titan] 2025-10-05 04:28:06,145 - root - INFO - lr: 4.4250e-05 gnorm: 1.12 [ 5:53:57<18:38:32] +[titan] 2025-10-05 04:28:17,025 - root - INFO - step: 9620 loss: 2.4380 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2860 global_avg_mtp_loss: 2.1520 +[titan] 2025-10-05 04:28:17,025 - root - INFO - lr: 4.4244e-05 gnorm: 1.17 [ 5:54:08<18:38:21] +[titan] 2025-10-05 04:28:27,900 - root - INFO - step: 9625 loss: 2.4092 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1278 +[titan] 2025-10-05 04:28:27,900 - root - INFO - lr: 4.4238e-05 gnorm: 1.17 [ 5:54:18<18:38:09] +[titan] 2025-10-05 04:28:38,759 - root - INFO - step: 9630 loss: 2.3955 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1144 +[titan] 2025-10-05 04:28:38,759 - root - INFO - lr: 4.4232e-05 gnorm: 1.18 [ 5:54:29<18:37:58] +[titan] 2025-10-05 04:28:49,641 - root - INFO - step: 9635 loss: 2.4426 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2870 global_avg_mtp_loss: 2.1556 +[titan] 2025-10-05 04:28:49,641 - root - INFO - lr: 4.4226e-05 gnorm: 1.17 [ 5:54:40<18:37:46] +[titan] 2025-10-05 04:29:00,565 - root - INFO - step: 9640 loss: 2.5391 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.3009 global_avg_mtp_loss: 2.2382 +[titan] 2025-10-05 04:29:00,565 - root - INFO - lr: 4.4220e-05 gnorm: 1.17 [ 5:54:51<18:37:35] +[titan] 2025-10-05 04:29:11,410 - root - INFO - step: 9645 loss: 2.4192 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 04:29:11,410 - root - INFO - lr: 4.4214e-05 gnorm: 1.18 [ 5:55:02<18:37:23] +[titan] 2025-10-05 04:29:20,108 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:29:22,295 - root - INFO - step: 9650 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:29:22,295 - root - INFO - lr: 4.4208e-05 gnorm: 1.14 [ 5:55:13<18:37:11] +[titan] 2025-10-05 04:29:33,192 - root - INFO - step: 9655 loss: 2.4692 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1795 +[titan] 2025-10-05 04:29:33,192 - root - INFO - lr: 4.4202e-05 gnorm: 1.18 [ 5:55:24<18:37:00] +[titan] 2025-10-05 04:29:44,075 - root - INFO - step: 9660 loss: 2.5077 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2936 global_avg_mtp_loss: 2.2141 +[titan] 2025-10-05 04:29:44,076 - root - INFO - lr: 4.4196e-05 gnorm: 1.19 [ 5:55:35<18:36:48] +[titan] 2025-10-05 04:29:55,012 - root - INFO - step: 9665 loss: 2.3987 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2812 global_avg_mtp_loss: 2.1174 +[titan] 2025-10-05 04:29:55,012 - root - INFO - lr: 4.4190e-05 gnorm: 1.13 [ 5:55:46<18:36:37] +[titan] 2025-10-05 04:30:05,890 - root - INFO - step: 9670 loss: 2.4206 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1355 +[titan] 2025-10-05 04:30:05,891 - root - INFO - lr: 4.4184e-05 gnorm: 1.15 [ 5:55:56<18:36:25] +[titan] 2025-10-05 04:30:16,776 - root - INFO - step: 9675 loss: 2.3409 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 04:30:16,776 - root - INFO - lr: 4.4178e-05 gnorm: 1.12 [ 5:56:07<18:36:14] +[titan] 2025-10-05 04:30:27,638 - root - INFO - step: 9680 loss: 2.4055 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1224 +[titan] 2025-10-05 04:30:27,639 - root - INFO - lr: 4.4172e-05 gnorm: 1.11 [ 5:56:18<18:36:02] +[titan] 2025-10-05 04:30:38,514 - root - INFO - step: 9685 loss: 2.4020 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1213 +[titan] 2025-10-05 04:30:38,514 - root - INFO - lr: 4.4166e-05 gnorm: 1.10 [ 5:56:29<18:35:51] +[titan] 2025-10-05 04:30:49,397 - root - INFO - step: 9690 loss: 2.3894 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 04:30:49,397 - root - INFO - lr: 4.4160e-05 gnorm: 1.14 [ 5:56:40<18:35:39] +[titan] 2025-10-05 04:31:00,376 - root - INFO - step: 9695 loss: 2.4118 memory: 118.84GiB(85.28%) tps: 29,848 tflops: 414.10 mfu: 41.87% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1306 +[titan] 2025-10-05 04:31:00,376 - root - INFO - lr: 4.4154e-05 gnorm: 1.13 [ 5:56:51<18:35:28] +[titan] 2025-10-05 04:31:09,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:31:11,234 - root - INFO - step: 9700 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 04:31:11,235 - root - INFO - lr: 4.4148e-05 gnorm: 1.17 [ 5:57:02<18:35:16] +[titan] 2025-10-05 04:31:22,095 - root - INFO - step: 9705 loss: 2.4525 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1644 +[titan] 2025-10-05 04:31:22,095 - root - INFO - lr: 4.4142e-05 gnorm: 1.18 [ 5:57:13<18:35:05] +[titan] 2025-10-05 04:31:32,925 - root - INFO - step: 9710 loss: 2.4477 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.77 mfu: 42.44% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:31:32,925 - root - INFO - lr: 4.4136e-05 gnorm: 1.17 [ 5:57:23<18:34:53] +[titan] 2025-10-05 04:31:43,787 - root - INFO - step: 9715 loss: 2.4891 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.3002 global_avg_mtp_loss: 2.1889 +[titan] 2025-10-05 04:31:43,787 - root - INFO - lr: 4.4130e-05 gnorm: 1.38 [ 5:57:34<18:34:41] +[titan] 2025-10-05 04:31:54,630 - root - INFO - step: 9720 loss: 2.3767 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0983 +[titan] 2025-10-05 04:31:54,630 - root - INFO - lr: 4.4124e-05 gnorm: 1.14 [ 5:57:45<18:34:30] +[titan] 2025-10-05 04:32:05,581 - root - INFO - step: 9725 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 04:32:05,582 - root - INFO - lr: 4.4118e-05 gnorm: 1.14 [ 5:57:56<18:34:18] +[titan] 2025-10-05 04:32:12,293 - root - INFO - Dumping profiler traces at step 9728 +[titan] 2025-10-05 04:32:12,331 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:32:16,691 - root - INFO - step: 9730 loss: 2.4883 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2933 global_avg_mtp_loss: 2.1950 +[titan] 2025-10-05 04:32:16,691 - root - INFO - lr: 4.4112e-05 gnorm: 1.25 [ 5:58:07<18:34:08] +[titan] 2025-10-05 04:32:27,533 - root - INFO - step: 9735 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:32:27,534 - root - INFO - lr: 4.4106e-05 gnorm: 1.17 [ 5:58:18<18:33:56] +[titan] 2025-10-05 04:32:38,369 - root - INFO - step: 9740 loss: 2.4600 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1703 +[titan] 2025-10-05 04:32:38,369 - root - INFO - lr: 4.4100e-05 gnorm: 1.17 [ 5:58:29<18:33:44] +[titan] 2025-10-05 04:32:49,220 - root - INFO - step: 9745 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2838 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 04:32:49,220 - root - INFO - lr: 4.4094e-05 gnorm: 1.16 [ 5:58:40<18:33:33] +[titan] 2025-10-05 04:32:57,959 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:33:00,142 - root - INFO - step: 9750 loss: 2.3885 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1078 +[titan] 2025-10-05 04:33:00,143 - root - INFO - lr: 4.4088e-05 gnorm: 1.14 [ 5:58:51<18:33:21] +[titan] 2025-10-05 04:33:10,995 - root - INFO - step: 9755 loss: 2.5700 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.3070 global_avg_mtp_loss: 2.2630 +[titan] 2025-10-05 04:33:10,995 - root - INFO - lr: 4.4082e-05 gnorm: 1.38 [ 5:59:01<18:33:10] +[titan] 2025-10-05 04:33:21,841 - root - INFO - step: 9760 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0981 +[titan] 2025-10-05 04:33:21,841 - root - INFO - lr: 4.4076e-05 gnorm: 1.10 [ 5:59:12<18:32:58] +[titan] 2025-10-05 04:33:32,699 - root - INFO - step: 9765 loss: 2.4074 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:33:32,699 - root - INFO - lr: 4.4070e-05 gnorm: 1.12 [ 5:59:23<18:32:46] +[titan] 2025-10-05 04:33:43,562 - root - INFO - step: 9770 loss: 2.4336 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1479 +[titan] 2025-10-05 04:33:43,563 - root - INFO - lr: 4.4064e-05 gnorm: 1.20 [ 5:59:34<18:32:35] +[titan] 2025-10-05 04:33:54,429 - root - INFO - step: 9775 loss: 2.3924 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1125 +[titan] 2025-10-05 04:33:54,429 - root - INFO - lr: 4.4058e-05 gnorm: 1.13 [ 5:59:45<18:32:23] +[titan] 2025-10-05 04:34:05,364 - root - INFO - step: 9780 loss: 2.4335 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2863 global_avg_mtp_loss: 2.1473 +[titan] 2025-10-05 04:34:05,365 - root - INFO - lr: 4.4052e-05 gnorm: 1.19 [ 5:59:56<18:32:12] +[titan] 2025-10-05 04:34:16,251 - root - INFO - step: 9785 loss: 2.4309 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:34:16,251 - root - INFO - lr: 4.4046e-05 gnorm: 1.30 [ 6:00:07<18:32:00] +[titan] 2025-10-05 04:34:27,120 - root - INFO - step: 9790 loss: 2.4512 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2882 global_avg_mtp_loss: 2.1629 +[titan] 2025-10-05 04:34:27,120 - root - INFO - lr: 4.4039e-05 gnorm: 1.21 [ 6:00:18<18:31:49] +[titan] 2025-10-05 04:34:37,999 - root - INFO - step: 9795 loss: 2.3456 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 04:34:37,999 - root - INFO - lr: 4.4033e-05 gnorm: 1.14 [ 6:00:28<18:31:37] +[titan] 2025-10-05 04:34:46,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:34:48,883 - root - INFO - step: 9800 loss: 2.4057 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1232 +[titan] 2025-10-05 04:34:48,883 - root - INFO - lr: 4.4027e-05 gnorm: 1.18 [ 6:00:39<18:31:26] +[titan] 2025-10-05 04:34:59,779 - root - INFO - step: 9805 loss: 2.5371 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2995 global_avg_mtp_loss: 2.2375 +[titan] 2025-10-05 04:34:59,779 - root - INFO - lr: 4.4021e-05 gnorm: 1.15 [ 6:00:50<18:31:14] +[titan] 2025-10-05 04:35:10,650 - root - INFO - step: 9810 loss: 2.4142 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1309 +[titan] 2025-10-05 04:35:10,650 - root - INFO - lr: 4.4015e-05 gnorm: 1.16 [ 6:01:01<18:31:03] +[titan] 2025-10-05 04:35:21,521 - root - INFO - step: 9815 loss: 2.4068 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1250 +[titan] 2025-10-05 04:35:21,521 - root - INFO - lr: 4.4009e-05 gnorm: 1.16 [ 6:01:12<18:30:51] +[titan] 2025-10-05 04:35:32,405 - root - INFO - step: 9820 loss: 2.4191 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:35:32,405 - root - INFO - lr: 4.4003e-05 gnorm: 1.14 [ 6:01:23<18:30:39] +[titan] 2025-10-05 04:35:43,265 - root - INFO - step: 9825 loss: 2.4557 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2885 global_avg_mtp_loss: 2.1673 +[titan] 2025-10-05 04:35:43,266 - root - INFO - lr: 4.3997e-05 gnorm: 1.11 [ 6:01:34<18:30:28] +[titan] 2025-10-05 04:35:54,144 - root - INFO - step: 9830 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 04:35:54,144 - root - INFO - lr: 4.3991e-05 gnorm: 1.10 [ 6:01:45<18:30:16] +[titan] 2025-10-05 04:36:05,038 - root - INFO - step: 9835 loss: 2.3594 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0837 +[titan] 2025-10-05 04:36:05,038 - root - INFO - lr: 4.3985e-05 gnorm: 1.17 [ 6:01:56<18:30:05] +[titan] 2025-10-05 04:36:15,903 - root - INFO - step: 9840 loss: 2.3943 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1136 +[titan] 2025-10-05 04:36:15,903 - root - INFO - lr: 4.3979e-05 gnorm: 1.13 [ 6:02:06<18:29:53] +[titan] 2025-10-05 04:36:26,766 - root - INFO - step: 9845 loss: 2.3607 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0841 +[titan] 2025-10-05 04:36:26,766 - root - INFO - lr: 4.3973e-05 gnorm: 1.11 [ 6:02:17<18:29:42] +[titan] 2025-10-05 04:36:35,444 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:36:37,631 - root - INFO - step: 9850 loss: 2.4018 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2811 global_avg_mtp_loss: 2.1206 +[titan] 2025-10-05 04:36:37,631 - root - INFO - lr: 4.3967e-05 gnorm: 1.18 [ 6:02:28<18:29:30] +[titan] 2025-10-05 04:36:48,494 - root - INFO - step: 9855 loss: 2.3920 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 04:36:48,494 - root - INFO - lr: 4.3961e-05 gnorm: 1.14 [ 6:02:39<18:29:19] +[titan] 2025-10-05 04:36:59,366 - root - INFO - step: 9860 loss: 2.3928 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1132 +[titan] 2025-10-05 04:36:59,366 - root - INFO - lr: 4.3955e-05 gnorm: 1.16 [ 6:02:50<18:29:07] +[titan] 2025-10-05 04:37:10,292 - root - INFO - step: 9865 loss: 2.3430 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0689 +[titan] 2025-10-05 04:37:10,292 - root - INFO - lr: 4.3948e-05 gnorm: 1.14 [ 6:03:01<18:28:56] +[titan] 2025-10-05 04:37:21,111 - root - INFO - step: 9870 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.0953 +[titan] 2025-10-05 04:37:21,111 - root - INFO - lr: 4.3942e-05 gnorm: 1.23 [ 6:03:12<18:28:44] +[titan] 2025-10-05 04:37:31,972 - root - INFO - step: 9875 loss: 2.4673 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2893 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:37:31,972 - root - INFO - lr: 4.3936e-05 gnorm: 1.12 [ 6:03:22<18:28:32] +[titan] 2025-10-05 04:37:42,800 - root - INFO - step: 9880 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 04:37:42,800 - root - INFO - lr: 4.3930e-05 gnorm: 1.18 [ 6:03:33<18:28:21] +[titan] 2025-10-05 04:37:53,645 - root - INFO - step: 9885 loss: 2.3888 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:37:53,645 - root - INFO - lr: 4.3924e-05 gnorm: 1.14 [ 6:03:44<18:28:09] +[titan] 2025-10-05 04:38:04,551 - root - INFO - step: 9890 loss: 2.3882 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 04:38:04,552 - root - INFO - lr: 4.3918e-05 gnorm: 1.12 [ 6:03:55<18:27:58] +[titan] 2025-10-05 04:38:15,412 - root - INFO - step: 9895 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1157 +[titan] 2025-10-05 04:38:15,412 - root - INFO - lr: 4.3912e-05 gnorm: 1.14 [ 6:04:06<18:27:46] +[titan] 2025-10-05 04:38:24,052 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:38:26,242 - root - INFO - step: 9900 loss: 2.3816 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1025 +[titan] 2025-10-05 04:38:26,242 - root - INFO - lr: 4.3906e-05 gnorm: 1.14 [ 6:04:17<18:27:34] +[titan] 2025-10-05 04:38:37,109 - root - INFO - step: 9905 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 04:38:37,109 - root - INFO - lr: 4.3900e-05 gnorm: 1.17 [ 6:04:28<18:27:23] +[titan] 2025-10-05 04:38:47,968 - root - INFO - step: 9910 loss: 2.4451 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1580 +[titan] 2025-10-05 04:38:47,968 - root - INFO - lr: 4.3894e-05 gnorm: 1.17 [ 6:04:38<18:27:11] +[titan] 2025-10-05 04:38:58,828 - root - INFO - step: 9915 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0776 +[titan] 2025-10-05 04:38:58,828 - root - INFO - lr: 4.3887e-05 gnorm: 1.15 [ 6:04:49<18:27:00] +[titan] 2025-10-05 04:39:09,703 - root - INFO - step: 9920 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 04:39:09,703 - root - INFO - lr: 4.3881e-05 gnorm: 1.13 [ 6:05:00<18:26:48] +[titan] 2025-10-05 04:39:20,593 - root - INFO - step: 9925 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 04:39:20,593 - root - INFO - lr: 4.3875e-05 gnorm: 1.14 [ 6:05:11<18:26:37] +[titan] 2025-10-05 04:39:31,464 - root - INFO - step: 9930 loss: 2.2894 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 04:39:31,464 - root - INFO - lr: 4.3869e-05 gnorm: 1.11 [ 6:05:22<18:26:25] +[titan] 2025-10-05 04:39:42,337 - root - INFO - step: 9935 loss: 2.3475 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 04:39:42,338 - root - INFO - lr: 4.3863e-05 gnorm: 1.10 [ 6:05:33<18:26:13] +[titan] 2025-10-05 04:39:53,224 - root - INFO - step: 9940 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0849 +[titan] 2025-10-05 04:39:53,224 - root - INFO - lr: 4.3857e-05 gnorm: 1.13 [ 6:05:44<18:26:02] +[titan] 2025-10-05 04:40:04,154 - root - INFO - step: 9945 loss: 2.3821 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1033 +[titan] 2025-10-05 04:40:04,154 - root - INFO - lr: 4.3851e-05 gnorm: 1.14 [ 6:05:55<18:25:51] +[titan] 2025-10-05 04:40:12,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:40:15,025 - root - INFO - step: 9950 loss: 2.4179 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:40:15,026 - root - INFO - lr: 4.3845e-05 gnorm: 1.11 [ 6:06:06<18:25:39] +[titan] 2025-10-05 04:40:25,938 - root - INFO - step: 9955 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1159 +[titan] 2025-10-05 04:40:25,938 - root - INFO - lr: 4.3838e-05 gnorm: 1.14 [ 6:06:16<18:25:28] +[titan] 2025-10-05 04:40:36,795 - root - INFO - step: 9960 loss: 2.3949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 04:40:36,795 - root - INFO - lr: 4.3832e-05 gnorm: 1.17 [ 6:06:27<18:25:16] +[titan] 2025-10-05 04:40:47,648 - root - INFO - step: 9965 loss: 2.4110 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2816 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:40:47,648 - root - INFO - lr: 4.3826e-05 gnorm: 1.15 [ 6:06:38<18:25:04] +[titan] 2025-10-05 04:40:58,540 - root - INFO - step: 9970 loss: 2.3944 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1127 +[titan] 2025-10-05 04:40:58,540 - root - INFO - lr: 4.3820e-05 gnorm: 1.18 [ 6:06:49<18:24:53] +[titan] 2025-10-05 04:41:09,431 - root - INFO - step: 9975 loss: 2.5286 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2974 global_avg_mtp_loss: 2.2312 +[titan] 2025-10-05 04:41:09,432 - root - INFO - lr: 4.3814e-05 gnorm: 1.19 [ 6:07:00<18:24:41] +[titan] 2025-10-05 04:41:20,364 - root - INFO - step: 9980 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 04:41:20,364 - root - INFO - lr: 4.3808e-05 gnorm: 1.18 [ 6:07:11<18:24:30] +[titan] 2025-10-05 04:41:31,259 - root - INFO - step: 9985 loss: 2.4484 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2871 global_avg_mtp_loss: 2.1613 +[titan] 2025-10-05 04:41:31,260 - root - INFO - lr: 4.3802e-05 gnorm: 1.15 [ 6:07:22<18:24:19] +[titan] 2025-10-05 04:41:42,148 - root - INFO - step: 9990 loss: 2.4717 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2937 global_avg_mtp_loss: 2.1780 +[titan] 2025-10-05 04:41:42,148 - root - INFO - lr: 4.3795e-05 gnorm: 1.17 [ 6:07:33<18:24:07] +[titan] 2025-10-05 04:41:53,059 - root - INFO - step: 9995 loss: 2.3948 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1147 +[titan] 2025-10-05 04:41:53,059 - root - INFO - lr: 4.3789e-05 gnorm: 1.17 [ 6:07:44<18:23:56] +[titan] 2025-10-05 04:42:01,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:42:03,951 - root - INFO - step: 10000 loss: 2.4699 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1790 +[titan] 2025-10-05 04:42:03,951 - root - INFO - lr: 4.3783e-05 gnorm: 1.18 [ 6:07:54<18:23:44] +[titan] 2025-10-05 04:42:03,951 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 04:42:23,118 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 04:42:23,118 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.17 seconds. +[titan] 2025-10-05 04:44:28,943 - root - INFO - step: 10005 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 2,260 tflops: 31.35 mfu: 3.17% global_avg_ntp_loss: 0.2840 global_avg_mtp_loss: 2.1282 +[titan] 2025-10-05 04:44:28,943 - root - INFO - lr: 4.3777e-05 gnorm: 1.15 [ 6:10:19<18:30:15] +[titan] 2025-10-05 04:44:39,758 - root - INFO - step: 10010 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0843 +[titan] 2025-10-05 04:44:39,759 - root - INFO - lr: 4.3771e-05 gnorm: 1.10 [ 6:10:30<18:30:03] +[titan] 2025-10-05 04:44:50,583 - root - INFO - step: 10015 loss: 2.4606 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2880 global_avg_mtp_loss: 2.1726 +[titan] 2025-10-05 04:44:50,583 - root - INFO - lr: 4.3765e-05 gnorm: 1.17 [ 6:10:41<18:29:51] +[titan] 2025-10-05 04:45:01,371 - root - INFO - step: 10020 loss: 2.3595 memory: 118.84GiB(85.28%) tps: 30,376 tflops: 421.43 mfu: 42.61% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0816 +[titan] 2025-10-05 04:45:01,371 - root - INFO - lr: 4.3758e-05 gnorm: 1.12 [ 6:10:52<18:29:39] +[titan] 2025-10-05 04:45:12,207 - root - INFO - step: 10025 loss: 2.3890 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1095 +[titan] 2025-10-05 04:45:12,207 - root - INFO - lr: 4.3752e-05 gnorm: 1.13 [ 6:11:03<18:29:27] +[titan] 2025-10-05 04:45:23,056 - root - INFO - step: 10030 loss: 2.4171 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1350 +[titan] 2025-10-05 04:45:23,056 - root - INFO - lr: 4.3746e-05 gnorm: 1.14 [ 6:11:14<18:29:15] +[titan] 2025-10-05 04:45:33,878 - root - INFO - step: 10035 loss: 2.4258 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1417 +[titan] 2025-10-05 04:45:33,879 - root - INFO - lr: 4.3740e-05 gnorm: 1.18 [ 6:11:24<18:29:03] +[titan] 2025-10-05 04:45:44,722 - root - INFO - step: 10040 loss: 2.4313 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2857 global_avg_mtp_loss: 2.1456 +[titan] 2025-10-05 04:45:44,722 - root - INFO - lr: 4.3734e-05 gnorm: 1.13 [ 6:11:35<18:28:51] +[titan] 2025-10-05 04:45:55,531 - root - INFO - step: 10045 loss: 2.3962 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1160 +[titan] 2025-10-05 04:45:55,531 - root - INFO - lr: 4.3728e-05 gnorm: 1.14 [ 6:11:46<18:28:39] +[titan] 2025-10-05 04:46:04,196 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:46:06,376 - root - INFO - step: 10050 loss: 2.4217 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.38% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1372 +[titan] 2025-10-05 04:46:06,376 - root - INFO - lr: 4.3721e-05 gnorm: 1.19 [ 6:11:57<18:28:27] +[titan] 2025-10-05 04:46:17,244 - root - INFO - step: 10055 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1117 +[titan] 2025-10-05 04:46:17,245 - root - INFO - lr: 4.3715e-05 gnorm: 1.09 [ 6:12:08<18:28:16] +[titan] 2025-10-05 04:46:28,093 - root - INFO - step: 10060 loss: 2.4776 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2897 global_avg_mtp_loss: 2.1879 +[titan] 2025-10-05 04:46:28,093 - root - INFO - lr: 4.3709e-05 gnorm: 1.12 [ 6:12:19<18:28:04] +[titan] 2025-10-05 04:46:38,949 - root - INFO - step: 10065 loss: 2.3571 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0814 +[titan] 2025-10-05 04:46:38,949 - root - INFO - lr: 4.3703e-05 gnorm: 1.17 [ 6:12:29<18:27:52] +[titan] 2025-10-05 04:46:49,820 - root - INFO - step: 10070 loss: 2.4101 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1271 +[titan] 2025-10-05 04:46:49,820 - root - INFO - lr: 4.3697e-05 gnorm: 1.14 [ 6:12:40<18:27:40] +[titan] 2025-10-05 04:47:00,671 - root - INFO - step: 10075 loss: 2.4112 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1294 +[titan] 2025-10-05 04:47:00,671 - root - INFO - lr: 4.3690e-05 gnorm: 1.17 [ 6:12:51<18:27:28] +[titan] 2025-10-05 04:47:11,530 - root - INFO - step: 10080 loss: 2.3867 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1065 +[titan] 2025-10-05 04:47:11,530 - root - INFO - lr: 4.3684e-05 gnorm: 1.12 [ 6:13:02<18:27:16] +[titan] 2025-10-05 04:47:22,402 - root - INFO - step: 10085 loss: 2.3591 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 04:47:22,403 - root - INFO - lr: 4.3678e-05 gnorm: 1.14 [ 6:13:13<18:27:05] +[titan] 2025-10-05 04:47:33,304 - root - INFO - step: 10090 loss: 2.3953 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1153 +[titan] 2025-10-05 04:47:33,305 - root - INFO - lr: 4.3672e-05 gnorm: 1.12 [ 6:13:24<18:26:53] +[titan] 2025-10-05 04:47:44,169 - root - INFO - step: 10095 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2874 global_avg_mtp_loss: 2.1668 +[titan] 2025-10-05 04:47:44,169 - root - INFO - lr: 4.3666e-05 gnorm: 1.20 [ 6:13:35<18:26:41] +[titan] 2025-10-05 04:47:52,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:47:55,091 - root - INFO - step: 10100 loss: 2.4560 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1684 +[titan] 2025-10-05 04:47:55,091 - root - INFO - lr: 4.3659e-05 gnorm: 1.18 [ 6:13:46<18:26:29] +[titan] 2025-10-05 04:48:05,969 - root - INFO - step: 10105 loss: 2.4312 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 04:48:05,969 - root - INFO - lr: 4.3653e-05 gnorm: 1.10 [ 6:13:56<18:26:18] +[titan] 2025-10-05 04:48:16,842 - root - INFO - step: 10110 loss: 2.3985 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1175 +[titan] 2025-10-05 04:48:16,842 - root - INFO - lr: 4.3647e-05 gnorm: 1.15 [ 6:14:07<18:26:06] +[titan] 2025-10-05 04:48:27,739 - root - INFO - step: 10115 loss: 2.4183 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1354 +[titan] 2025-10-05 04:48:27,739 - root - INFO - lr: 4.3641e-05 gnorm: 1.11 [ 6:14:18<18:25:54] +[titan] 2025-10-05 04:48:38,638 - root - INFO - step: 10120 loss: 2.3862 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 04:48:38,639 - root - INFO - lr: 4.3635e-05 gnorm: 1.15 [ 6:14:29<18:25:43] +[titan] 2025-10-05 04:48:49,495 - root - INFO - step: 10125 loss: 2.4046 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1229 +[titan] 2025-10-05 04:48:49,495 - root - INFO - lr: 4.3628e-05 gnorm: 1.13 [ 6:14:40<18:25:31] +[titan] 2025-10-05 04:49:00,374 - root - INFO - step: 10130 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 04:49:00,374 - root - INFO - lr: 4.3622e-05 gnorm: 1.12 [ 6:14:51<18:25:19] +[titan] 2025-10-05 04:49:11,231 - root - INFO - step: 10135 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1240 +[titan] 2025-10-05 04:49:11,231 - root - INFO - lr: 4.3616e-05 gnorm: 1.11 [ 6:15:02<18:25:07] +[titan] 2025-10-05 04:49:22,073 - root - INFO - step: 10140 loss: 2.4295 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2847 global_avg_mtp_loss: 2.1448 +[titan] 2025-10-05 04:49:22,073 - root - INFO - lr: 4.3610e-05 gnorm: 1.19 [ 6:15:13<18:24:55] +[titan] 2025-10-05 04:49:32,953 - root - INFO - step: 10145 loss: 2.4182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2824 global_avg_mtp_loss: 2.1358 +[titan] 2025-10-05 04:49:32,953 - root - INFO - lr: 4.3603e-05 gnorm: 1.13 [ 6:15:23<18:24:44] +[titan] 2025-10-05 04:49:41,627 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:49:43,809 - root - INFO - step: 10150 loss: 2.4033 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1220 +[titan] 2025-10-05 04:49:43,810 - root - INFO - lr: 4.3597e-05 gnorm: 1.18 [ 6:15:34<18:24:32] +[titan] 2025-10-05 04:49:54,722 - root - INFO - step: 10155 loss: 2.4349 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2858 global_avg_mtp_loss: 2.1491 +[titan] 2025-10-05 04:49:54,722 - root - INFO - lr: 4.3591e-05 gnorm: 1.19 [ 6:15:45<18:24:20] +[titan] 2025-10-05 04:50:05,570 - root - INFO - step: 10160 loss: 2.4925 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2925 global_avg_mtp_loss: 2.2000 +[titan] 2025-10-05 04:50:05,570 - root - INFO - lr: 4.3585e-05 gnorm: 1.18 [ 6:15:56<18:24:08] +[titan] 2025-10-05 04:50:16,417 - root - INFO - step: 10165 loss: 2.3617 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 04:50:16,418 - root - INFO - lr: 4.3578e-05 gnorm: 1.15 [ 6:16:07<18:23:56] +[titan] 2025-10-05 04:50:27,286 - root - INFO - step: 10170 loss: 2.4892 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2946 global_avg_mtp_loss: 2.1947 +[titan] 2025-10-05 04:50:27,286 - root - INFO - lr: 4.3572e-05 gnorm: 1.17 [ 6:16:18<18:23:45] +[titan] 2025-10-05 04:50:38,151 - root - INFO - step: 10175 loss: 2.4728 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1825 +[titan] 2025-10-05 04:50:38,151 - root - INFO - lr: 4.3566e-05 gnorm: 1.14 [ 6:16:29<18:23:33] +[titan] 2025-10-05 04:50:49,013 - root - INFO - step: 10180 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1066 +[titan] 2025-10-05 04:50:49,013 - root - INFO - lr: 4.3560e-05 gnorm: 1.11 [ 6:16:39<18:23:21] +[titan] 2025-10-05 04:50:59,879 - root - INFO - step: 10185 loss: 2.3308 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0576 +[titan] 2025-10-05 04:50:59,879 - root - INFO - lr: 4.3553e-05 gnorm: 1.10 [ 6:16:50<18:23:09] +[titan] 2025-10-05 04:51:10,735 - root - INFO - step: 10190 loss: 2.4005 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1191 +[titan] 2025-10-05 04:51:10,735 - root - INFO - lr: 4.3547e-05 gnorm: 1.12 [ 6:17:01<18:22:57] +[titan] 2025-10-05 04:51:21,605 - root - INFO - step: 10195 loss: 2.4213 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2844 global_avg_mtp_loss: 2.1370 +[titan] 2025-10-05 04:51:21,605 - root - INFO - lr: 4.3541e-05 gnorm: 1.07 [ 6:17:12<18:22:46] +[titan] 2025-10-05 04:51:30,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:51:32,491 - root - INFO - step: 10200 loss: 2.4592 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1708 +[titan] 2025-10-05 04:51:32,491 - root - INFO - lr: 4.3535e-05 gnorm: 1.19 [ 6:17:23<18:22:34] +[titan] 2025-10-05 04:51:43,357 - root - INFO - step: 10205 loss: 2.3585 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0823 +[titan] 2025-10-05 04:51:43,357 - root - INFO - lr: 4.3528e-05 gnorm: 1.08 [ 6:17:34<18:22:22] +[titan] 2025-10-05 04:51:54,234 - root - INFO - step: 10210 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 04:51:54,234 - root - INFO - lr: 4.3522e-05 gnorm: 1.13 [ 6:17:45<18:22:10] +[titan] 2025-10-05 04:52:05,148 - root - INFO - step: 10215 loss: 2.4224 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2827 global_avg_mtp_loss: 2.1397 +[titan] 2025-10-05 04:52:05,148 - root - INFO - lr: 4.3516e-05 gnorm: 1.15 [ 6:17:56<18:21:59] +[titan] 2025-10-05 04:52:16,012 - root - INFO - step: 10220 loss: 2.3880 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:52:16,012 - root - INFO - lr: 4.3510e-05 gnorm: 1.17 [ 6:18:06<18:21:47] +[titan] 2025-10-05 04:52:26,919 - root - INFO - step: 10225 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0969 +[titan] 2025-10-05 04:52:26,920 - root - INFO - lr: 4.3503e-05 gnorm: 1.13 [ 6:18:17<18:21:35] +[titan] 2025-10-05 04:52:37,795 - root - INFO - step: 10230 loss: 2.4827 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2964 global_avg_mtp_loss: 2.1864 +[titan] 2025-10-05 04:52:37,795 - root - INFO - lr: 4.3497e-05 gnorm: 1.23 [ 6:18:28<18:21:24] +[titan] 2025-10-05 04:52:48,651 - root - INFO - step: 10235 loss: 2.3739 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0960 +[titan] 2025-10-05 04:52:48,651 - root - INFO - lr: 4.3491e-05 gnorm: 1.14 [ 6:18:39<18:21:12] +[titan] 2025-10-05 04:52:59,594 - root - INFO - step: 10240 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.43 mfu: 42.00% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0761 +[titan] 2025-10-05 04:52:59,594 - root - INFO - lr: 4.3485e-05 gnorm: 1.17 [ 6:18:50<18:21:00] +[titan] 2025-10-05 04:52:59,772 - root - INFO - Dumping profiler traces at step 10240 +[titan] 2025-10-05 04:52:59,812 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 04:53:10,672 - root - INFO - step: 10245 loss: 2.4638 memory: 118.84GiB(85.28%) tps: 29,580 tflops: 410.38 mfu: 41.49% global_avg_ntp_loss: 0.2886 global_avg_mtp_loss: 2.1752 +[titan] 2025-10-05 04:53:10,672 - root - INFO - lr: 4.3478e-05 gnorm: 1.18 [ 6:19:01<18:20:49] +[titan] 2025-10-05 04:53:19,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:53:21,556 - root - INFO - step: 10250 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0638 +[titan] 2025-10-05 04:53:21,556 - root - INFO - lr: 4.3472e-05 gnorm: 1.18 [ 6:19:12<18:20:37] +[titan] 2025-10-05 04:53:32,460 - root - INFO - step: 10255 loss: 2.3782 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.0997 +[titan] 2025-10-05 04:53:32,460 - root - INFO - lr: 4.3466e-05 gnorm: 1.11 [ 6:19:23<18:20:26] +[titan] 2025-10-05 04:53:43,321 - root - INFO - step: 10260 loss: 2.3383 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 04:53:43,321 - root - INFO - lr: 4.3459e-05 gnorm: 1.16 [ 6:19:34<18:20:14] +[titan] 2025-10-05 04:53:54,178 - root - INFO - step: 10265 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 04:53:54,178 - root - INFO - lr: 4.3453e-05 gnorm: 1.16 [ 6:19:45<18:20:02] +[titan] 2025-10-05 04:54:05,007 - root - INFO - step: 10270 loss: 2.3902 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 04:54:05,007 - root - INFO - lr: 4.3447e-05 gnorm: 1.17 [ 6:19:55<18:19:50] +[titan] 2025-10-05 04:54:15,842 - root - INFO - step: 10275 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1238 +[titan] 2025-10-05 04:54:15,843 - root - INFO - lr: 4.3440e-05 gnorm: 1.14 [ 6:20:06<18:19:38] +[titan] 2025-10-05 04:54:26,778 - root - INFO - step: 10280 loss: 2.3590 memory: 118.84GiB(85.28%) tps: 29,966 tflops: 415.73 mfu: 42.04% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0825 +[titan] 2025-10-05 04:54:26,778 - root - INFO - lr: 4.3434e-05 gnorm: 1.09 [ 6:20:17<18:19:27] +[titan] 2025-10-05 04:54:37,611 - root - INFO - step: 10285 loss: 2.3467 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 04:54:37,611 - root - INFO - lr: 4.3428e-05 gnorm: 1.17 [ 6:20:28<18:19:15] +[titan] 2025-10-05 04:54:48,457 - root - INFO - step: 10290 loss: 2.3098 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 04:54:48,458 - root - INFO - lr: 4.3422e-05 gnorm: 1.13 [ 6:20:39<18:19:03] +[titan] 2025-10-05 04:54:59,307 - root - INFO - step: 10295 loss: 2.4775 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2922 global_avg_mtp_loss: 2.1853 +[titan] 2025-10-05 04:54:59,307 - root - INFO - lr: 4.3415e-05 gnorm: 1.19 [ 6:20:50<18:18:51] +[titan] 2025-10-05 04:55:07,970 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:55:10,154 - root - INFO - step: 10300 loss: 2.4089 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1268 +[titan] 2025-10-05 04:55:10,154 - root - INFO - lr: 4.3409e-05 gnorm: 1.11 [ 6:21:01<18:18:39] +[titan] 2025-10-05 04:55:20,995 - root - INFO - step: 10305 loss: 2.4115 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2843 global_avg_mtp_loss: 2.1272 +[titan] 2025-10-05 04:55:20,995 - root - INFO - lr: 4.3403e-05 gnorm: 1.16 [ 6:21:11<18:18:28] +[titan] 2025-10-05 04:55:31,895 - root - INFO - step: 10310 loss: 2.3942 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.1141 +[titan] 2025-10-05 04:55:31,895 - root - INFO - lr: 4.3396e-05 gnorm: 1.11 [ 6:21:22<18:18:16] +[titan] 2025-10-05 04:55:42,797 - root - INFO - step: 10315 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0890 +[titan] 2025-10-05 04:55:42,797 - root - INFO - lr: 4.3390e-05 gnorm: 1.14 [ 6:21:33<18:18:04] +[titan] 2025-10-05 04:55:53,631 - root - INFO - step: 10320 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0675 +[titan] 2025-10-05 04:55:53,631 - root - INFO - lr: 4.3384e-05 gnorm: 1.13 [ 6:21:44<18:17:52] +[titan] 2025-10-05 04:56:04,495 - root - INFO - step: 10325 loss: 2.3236 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 04:56:04,495 - root - INFO - lr: 4.3377e-05 gnorm: 1.11 [ 6:21:55<18:17:41] +[titan] 2025-10-05 04:56:15,368 - root - INFO - step: 10330 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0554 +[titan] 2025-10-05 04:56:15,368 - root - INFO - lr: 4.3371e-05 gnorm: 1.11 [ 6:22:06<18:17:29] +[titan] 2025-10-05 04:56:26,235 - root - INFO - step: 10335 loss: 2.3812 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.1021 +[titan] 2025-10-05 04:56:26,235 - root - INFO - lr: 4.3365e-05 gnorm: 1.13 [ 6:22:17<18:17:17] +[titan] 2025-10-05 04:56:37,100 - root - INFO - step: 10340 loss: 2.4139 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 04:56:37,100 - root - INFO - lr: 4.3358e-05 gnorm: 1.15 [ 6:22:28<18:17:05] +[titan] 2025-10-05 04:56:48,014 - root - INFO - step: 10345 loss: 2.3627 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0861 +[titan] 2025-10-05 04:56:48,014 - root - INFO - lr: 4.3352e-05 gnorm: 1.15 [ 6:22:38<18:16:54] +[titan] 2025-10-05 04:56:56,705 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:56:58,888 - root - INFO - step: 10350 loss: 2.3704 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0924 +[titan] 2025-10-05 04:56:58,888 - root - INFO - lr: 4.3346e-05 gnorm: 1.12 [ 6:22:49<18:16:42] +[titan] 2025-10-05 04:57:09,755 - root - INFO - step: 10355 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0403 +[titan] 2025-10-05 04:57:09,755 - root - INFO - lr: 4.3339e-05 gnorm: 1.14 [ 6:23:00<18:16:30] +[titan] 2025-10-05 04:57:20,636 - root - INFO - step: 10360 loss: 2.4246 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1400 +[titan] 2025-10-05 04:57:20,637 - root - INFO - lr: 4.3333e-05 gnorm: 1.16 [ 6:23:11<18:16:18] +[titan] 2025-10-05 04:57:31,520 - root - INFO - step: 10365 loss: 2.3992 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2807 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 04:57:31,521 - root - INFO - lr: 4.3327e-05 gnorm: 1.14 [ 6:23:22<18:16:07] +[titan] 2025-10-05 04:57:42,396 - root - INFO - step: 10370 loss: 2.4732 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2908 global_avg_mtp_loss: 2.1823 +[titan] 2025-10-05 04:57:42,396 - root - INFO - lr: 4.3320e-05 gnorm: 1.14 [ 6:23:33<18:15:55] +[titan] 2025-10-05 04:57:53,311 - root - INFO - step: 10375 loss: 2.3904 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1107 +[titan] 2025-10-05 04:57:53,311 - root - INFO - lr: 4.3314e-05 gnorm: 1.17 [ 6:23:44<18:15:43] +[titan] 2025-10-05 04:58:04,191 - root - INFO - step: 10380 loss: 2.3285 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 04:58:04,191 - root - INFO - lr: 4.3308e-05 gnorm: 1.15 [ 6:23:55<18:15:32] +[titan] 2025-10-05 04:58:15,071 - root - INFO - step: 10385 loss: 2.4271 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2910 global_avg_mtp_loss: 2.1360 +[titan] 2025-10-05 04:58:15,072 - root - INFO - lr: 4.3301e-05 gnorm: 2.89 [ 6:24:06<18:15:20] +[titan] 2025-10-05 04:58:25,961 - root - INFO - step: 10390 loss: 2.4472 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1607 +[titan] 2025-10-05 04:58:25,961 - root - INFO - lr: 4.3295e-05 gnorm: 1.19 [ 6:24:16<18:15:08] +[titan] 2025-10-05 04:58:36,832 - root - INFO - step: 10395 loss: 2.4116 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 04:58:36,832 - root - INFO - lr: 4.3289e-05 gnorm: 1.19 [ 6:24:27<18:14:57] +[titan] 2025-10-05 04:58:45,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 04:58:47,732 - root - INFO - step: 10400 loss: 2.3889 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 04:58:47,732 - root - INFO - lr: 4.3282e-05 gnorm: 1.15 [ 6:24:38<18:14:45] +[titan] 2025-10-05 04:58:58,620 - root - INFO - step: 10405 loss: 2.4542 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1661 +[titan] 2025-10-05 04:58:58,620 - root - INFO - lr: 4.3276e-05 gnorm: 1.15 [ 6:24:49<18:14:33] +[titan] 2025-10-05 04:59:09,537 - root - INFO - step: 10410 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0667 +[titan] 2025-10-05 04:59:09,538 - root - INFO - lr: 4.3270e-05 gnorm: 1.09 [ 6:25:00<18:14:22] +[titan] 2025-10-05 04:59:20,430 - root - INFO - step: 10415 loss: 2.4412 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2881 global_avg_mtp_loss: 2.1531 +[titan] 2025-10-05 04:59:20,430 - root - INFO - lr: 4.3263e-05 gnorm: 1.11 [ 6:25:11<18:14:10] +[titan] 2025-10-05 04:59:31,331 - root - INFO - step: 10420 loss: 2.4559 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1675 +[titan] 2025-10-05 04:59:31,331 - root - INFO - lr: 4.3257e-05 gnorm: 1.18 [ 6:25:22<18:13:58] +[titan] 2025-10-05 04:59:42,198 - root - INFO - step: 10425 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0782 +[titan] 2025-10-05 04:59:42,198 - root - INFO - lr: 4.3250e-05 gnorm: 1.15 [ 6:25:33<18:13:47] +[titan] 2025-10-05 04:59:53,072 - root - INFO - step: 10430 loss: 2.3763 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0987 +[titan] 2025-10-05 04:59:53,072 - root - INFO - lr: 4.3244e-05 gnorm: 1.14 [ 6:25:44<18:13:35] +[titan] 2025-10-05 05:00:03,938 - root - INFO - step: 10435 loss: 2.4170 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2835 global_avg_mtp_loss: 2.1335 +[titan] 2025-10-05 05:00:03,939 - root - INFO - lr: 4.3238e-05 gnorm: 1.15 [ 6:25:54<18:13:23] +[titan] 2025-10-05 05:00:14,820 - root - INFO - step: 10440 loss: 2.4296 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2852 global_avg_mtp_loss: 2.1444 +[titan] 2025-10-05 05:00:14,820 - root - INFO - lr: 4.3231e-05 gnorm: 1.12 [ 6:26:05<18:13:11] +[titan] 2025-10-05 05:00:25,686 - root - INFO - step: 10445 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0958 +[titan] 2025-10-05 05:00:25,686 - root - INFO - lr: 4.3225e-05 gnorm: 1.15 [ 6:26:16<18:13:00] +[titan] 2025-10-05 05:00:34,395 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:00:36,573 - root - INFO - step: 10450 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:00:36,574 - root - INFO - lr: 4.3219e-05 gnorm: 1.12 [ 6:26:27<18:12:48] +[titan] 2025-10-05 05:00:47,453 - root - INFO - step: 10455 loss: 2.2956 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 05:00:47,453 - root - INFO - lr: 4.3212e-05 gnorm: 1.12 [ 6:26:38<18:12:36] +[titan] 2025-10-05 05:00:58,326 - root - INFO - step: 10460 loss: 2.4231 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1381 +[titan] 2025-10-05 05:00:58,326 - root - INFO - lr: 4.3206e-05 gnorm: 1.13 [ 6:26:49<18:12:25] +[titan] 2025-10-05 05:01:09,212 - root - INFO - step: 10465 loss: 2.3984 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1166 +[titan] 2025-10-05 05:01:09,212 - root - INFO - lr: 4.3199e-05 gnorm: 1.16 [ 6:27:00<18:12:13] +[titan] 2025-10-05 05:01:20,082 - root - INFO - step: 10470 loss: 2.3857 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1068 +[titan] 2025-10-05 05:01:20,082 - root - INFO - lr: 4.3193e-05 gnorm: 1.17 [ 6:27:11<18:12:01] +[titan] 2025-10-05 05:01:31,025 - root - INFO - step: 10475 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.45 mfu: 42.01% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0845 +[titan] 2025-10-05 05:01:31,025 - root - INFO - lr: 4.3187e-05 gnorm: 1.18 [ 6:27:21<18:11:50] +[titan] 2025-10-05 05:01:41,900 - root - INFO - step: 10480 loss: 2.4469 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1605 +[titan] 2025-10-05 05:01:41,900 - root - INFO - lr: 4.3180e-05 gnorm: 1.14 [ 6:27:32<18:11:38] +[titan] 2025-10-05 05:01:52,794 - root - INFO - step: 10485 loss: 2.3469 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0715 +[titan] 2025-10-05 05:01:52,794 - root - INFO - lr: 4.3174e-05 gnorm: 1.11 [ 6:27:43<18:11:26] +[titan] 2025-10-05 05:02:03,640 - root - INFO - step: 10490 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:02:03,641 - root - INFO - lr: 4.3167e-05 gnorm: 1.12 [ 6:27:54<18:11:14] +[titan] 2025-10-05 05:02:14,499 - root - INFO - step: 10495 loss: 2.4247 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2861 global_avg_mtp_loss: 2.1386 +[titan] 2025-10-05 05:02:14,499 - root - INFO - lr: 4.3161e-05 gnorm: 1.11 [ 6:28:05<18:11:03] +[titan] 2025-10-05 05:02:23,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:02:25,357 - root - INFO - step: 10500 loss: 2.3813 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1019 +[titan] 2025-10-05 05:02:25,357 - root - INFO - lr: 4.3155e-05 gnorm: 1.11 [ 6:28:16<18:10:51] +[titan] 2025-10-05 05:02:36,309 - root - INFO - step: 10505 loss: 2.4297 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1463 +[titan] 2025-10-05 05:02:36,309 - root - INFO - lr: 4.3148e-05 gnorm: 1.31 [ 6:28:27<18:10:39] +[titan] 2025-10-05 05:02:47,169 - root - INFO - step: 10510 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0752 +[titan] 2025-10-05 05:02:47,169 - root - INFO - lr: 4.3142e-05 gnorm: 1.12 [ 6:28:38<18:10:28] +[titan] 2025-10-05 05:02:58,035 - root - INFO - step: 10515 loss: 2.4087 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1262 +[titan] 2025-10-05 05:02:58,035 - root - INFO - lr: 4.3135e-05 gnorm: 1.20 [ 6:28:48<18:10:16] +[titan] 2025-10-05 05:03:08,894 - root - INFO - step: 10520 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0523 +[titan] 2025-10-05 05:03:08,894 - root - INFO - lr: 4.3129e-05 gnorm: 1.10 [ 6:28:59<18:10:04] +[titan] 2025-10-05 05:03:19,768 - root - INFO - step: 10525 loss: 2.4870 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2914 global_avg_mtp_loss: 2.1956 +[titan] 2025-10-05 05:03:19,768 - root - INFO - lr: 4.3122e-05 gnorm: 1.18 [ 6:29:10<18:09:53] +[titan] 2025-10-05 05:03:30,631 - root - INFO - step: 10530 loss: 2.3951 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:03:30,632 - root - INFO - lr: 4.3116e-05 gnorm: 1.13 [ 6:29:21<18:09:41] +[titan] 2025-10-05 05:03:41,571 - root - INFO - step: 10535 loss: 2.3677 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.57 mfu: 42.02% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:03:41,571 - root - INFO - lr: 4.3110e-05 gnorm: 1.19 [ 6:29:32<18:09:29] +[titan] 2025-10-05 05:03:52,432 - root - INFO - step: 10540 loss: 2.4252 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2841 global_avg_mtp_loss: 2.1411 +[titan] 2025-10-05 05:03:52,432 - root - INFO - lr: 4.3103e-05 gnorm: 1.19 [ 6:29:43<18:09:18] +[titan] 2025-10-05 05:04:03,276 - root - INFO - step: 10545 loss: 2.4280 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2845 global_avg_mtp_loss: 2.1435 +[titan] 2025-10-05 05:04:03,277 - root - INFO - lr: 4.3097e-05 gnorm: 1.16 [ 6:29:54<18:09:06] +[titan] 2025-10-05 05:04:11,963 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:04:14,149 - root - INFO - step: 10550 loss: 2.2936 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0258 +[titan] 2025-10-05 05:04:14,149 - root - INFO - lr: 4.3090e-05 gnorm: 1.14 [ 6:30:05<18:08:54] +[titan] 2025-10-05 05:04:25,007 - root - INFO - step: 10555 loss: 2.3687 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0910 +[titan] 2025-10-05 05:04:25,007 - root - INFO - lr: 4.3084e-05 gnorm: 1.18 [ 6:30:15<18:08:42] +[titan] 2025-10-05 05:04:35,912 - root - INFO - step: 10560 loss: 2.4093 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2818 global_avg_mtp_loss: 2.1275 +[titan] 2025-10-05 05:04:35,912 - root - INFO - lr: 4.3077e-05 gnorm: 1.23 [ 6:30:26<18:08:31] +[titan] 2025-10-05 05:04:46,752 - root - INFO - step: 10565 loss: 2.4414 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2867 global_avg_mtp_loss: 2.1548 +[titan] 2025-10-05 05:04:46,752 - root - INFO - lr: 4.3071e-05 gnorm: 1.10 [ 6:30:37<18:08:19] +[titan] 2025-10-05 05:04:57,630 - root - INFO - step: 10570 loss: 2.3849 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1052 +[titan] 2025-10-05 05:04:57,630 - root - INFO - lr: 4.3065e-05 gnorm: 1.13 [ 6:30:48<18:08:07] +[titan] 2025-10-05 05:05:08,469 - root - INFO - step: 10575 loss: 2.4749 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2906 global_avg_mtp_loss: 2.1842 +[titan] 2025-10-05 05:05:08,469 - root - INFO - lr: 4.3058e-05 gnorm: 1.19 [ 6:30:59<18:07:55] +[titan] 2025-10-05 05:05:19,334 - root - INFO - step: 10580 loss: 2.3851 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1046 +[titan] 2025-10-05 05:05:19,334 - root - INFO - lr: 4.3052e-05 gnorm: 1.12 [ 6:31:10<18:07:44] +[titan] 2025-10-05 05:05:30,220 - root - INFO - step: 10585 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0900 +[titan] 2025-10-05 05:05:30,221 - root - INFO - lr: 4.3045e-05 gnorm: 1.17 [ 6:31:21<18:07:32] +[titan] 2025-10-05 05:05:41,134 - root - INFO - step: 10590 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2777 global_avg_mtp_loss: 2.0866 +[titan] 2025-10-05 05:05:41,134 - root - INFO - lr: 4.3039e-05 gnorm: 1.10 [ 6:31:32<18:07:20] +[titan] 2025-10-05 05:05:51,981 - root - INFO - step: 10595 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 05:05:51,981 - root - INFO - lr: 4.3032e-05 gnorm: 1.13 [ 6:31:42<18:07:09] +[titan] 2025-10-05 05:06:00,680 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:06:02,853 - root - INFO - step: 10600 loss: 2.4272 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1436 +[titan] 2025-10-05 05:06:02,853 - root - INFO - lr: 4.3026e-05 gnorm: 1.13 [ 6:31:53<18:06:57] +[titan] 2025-10-05 05:06:13,702 - root - INFO - step: 10605 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1154 +[titan] 2025-10-05 05:06:13,702 - root - INFO - lr: 4.3019e-05 gnorm: 1.18 [ 6:32:04<18:06:45] +[titan] 2025-10-05 05:06:24,546 - root - INFO - step: 10610 loss: 2.4439 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2862 global_avg_mtp_loss: 2.1578 +[titan] 2025-10-05 05:06:24,547 - root - INFO - lr: 4.3013e-05 gnorm: 1.17 [ 6:32:15<18:06:33] +[titan] 2025-10-05 05:06:35,421 - root - INFO - step: 10615 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0903 +[titan] 2025-10-05 05:06:35,421 - root - INFO - lr: 4.3006e-05 gnorm: 1.10 [ 6:32:26<18:06:22] +[titan] 2025-10-05 05:06:46,307 - root - INFO - step: 10620 loss: 2.3950 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1149 +[titan] 2025-10-05 05:06:46,307 - root - INFO - lr: 4.3000e-05 gnorm: 1.15 [ 6:32:37<18:06:10] +[titan] 2025-10-05 05:06:57,167 - root - INFO - step: 10625 loss: 2.3874 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1077 +[titan] 2025-10-05 05:06:57,168 - root - INFO - lr: 4.2993e-05 gnorm: 1.16 [ 6:32:48<18:05:58] +[titan] 2025-10-05 05:07:08,027 - root - INFO - step: 10630 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0719 +[titan] 2025-10-05 05:07:08,027 - root - INFO - lr: 4.2987e-05 gnorm: 1.17 [ 6:32:58<18:05:47] +[titan] 2025-10-05 05:07:18,912 - root - INFO - step: 10635 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0503 +[titan] 2025-10-05 05:07:18,912 - root - INFO - lr: 4.2981e-05 gnorm: 1.11 [ 6:33:09<18:05:35] +[titan] 2025-10-05 05:07:29,770 - root - INFO - step: 10640 loss: 2.3678 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0909 +[titan] 2025-10-05 05:07:29,770 - root - INFO - lr: 4.2974e-05 gnorm: 1.11 [ 6:33:20<18:05:23] +[titan] 2025-10-05 05:07:40,642 - root - INFO - step: 10645 loss: 2.4070 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2819 global_avg_mtp_loss: 2.1251 +[titan] 2025-10-05 05:07:40,642 - root - INFO - lr: 4.2968e-05 gnorm: 1.15 [ 6:33:31<18:05:11] +[titan] 2025-10-05 05:07:49,320 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:07:51,513 - root - INFO - step: 10650 loss: 2.3800 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 05:07:51,514 - root - INFO - lr: 4.2961e-05 gnorm: 1.13 [ 6:33:42<18:05:00] +[titan] 2025-10-05 05:08:02,386 - root - INFO - step: 10655 loss: 2.2876 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0198 +[titan] 2025-10-05 05:08:02,387 - root - INFO - lr: 4.2955e-05 gnorm: 1.11 [ 6:33:53<18:04:48] +[titan] 2025-10-05 05:08:13,251 - root - INFO - step: 10660 loss: 2.3831 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2796 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 05:08:13,252 - root - INFO - lr: 4.2948e-05 gnorm: 1.14 [ 6:34:04<18:04:36] +[titan] 2025-10-05 05:08:24,145 - root - INFO - step: 10665 loss: 2.4086 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1270 +[titan] 2025-10-05 05:08:24,145 - root - INFO - lr: 4.2942e-05 gnorm: 1.11 [ 6:34:15<18:04:25] +[titan] 2025-10-05 05:08:34,996 - root - INFO - step: 10670 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 05:08:34,996 - root - INFO - lr: 4.2935e-05 gnorm: 1.10 [ 6:34:25<18:04:13] +[titan] 2025-10-05 05:08:45,876 - root - INFO - step: 10675 loss: 2.4056 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1231 +[titan] 2025-10-05 05:08:45,876 - root - INFO - lr: 4.2929e-05 gnorm: 1.11 [ 6:34:36<18:04:01] +[titan] 2025-10-05 05:08:56,738 - root - INFO - step: 10680 loss: 2.4221 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2846 global_avg_mtp_loss: 2.1374 +[titan] 2025-10-05 05:08:56,738 - root - INFO - lr: 4.2922e-05 gnorm: 1.12 [ 6:34:47<18:03:50] +[titan] 2025-10-05 05:09:07,575 - root - INFO - step: 10685 loss: 2.4893 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2903 global_avg_mtp_loss: 2.1990 +[titan] 2025-10-05 05:09:07,575 - root - INFO - lr: 4.2916e-05 gnorm: 1.14 [ 6:34:58<18:03:38] +[titan] 2025-10-05 05:09:18,438 - root - INFO - step: 10690 loss: 2.3907 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1112 +[titan] 2025-10-05 05:09:18,438 - root - INFO - lr: 4.2909e-05 gnorm: 1.15 [ 6:35:09<18:03:26] +[titan] 2025-10-05 05:09:29,320 - root - INFO - step: 10695 loss: 2.3485 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0733 +[titan] 2025-10-05 05:09:29,320 - root - INFO - lr: 4.2903e-05 gnorm: 1.12 [ 6:35:20<18:03:14] +[titan] 2025-10-05 05:09:38,003 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:09:40,188 - root - INFO - step: 10700 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0915 +[titan] 2025-10-05 05:09:40,188 - root - INFO - lr: 4.2896e-05 gnorm: 1.13 [ 6:35:31<18:03:03] +[titan] 2025-10-05 05:09:51,053 - root - INFO - step: 10705 loss: 2.4598 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1721 +[titan] 2025-10-05 05:09:51,054 - root - INFO - lr: 4.2890e-05 gnorm: 1.14 [ 6:35:41<18:02:51] +[titan] 2025-10-05 05:10:01,930 - root - INFO - step: 10710 loss: 2.4459 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2864 global_avg_mtp_loss: 2.1595 +[titan] 2025-10-05 05:10:01,930 - root - INFO - lr: 4.2883e-05 gnorm: 1.13 [ 6:35:52<18:02:39] +[titan] 2025-10-05 05:10:12,779 - root - INFO - step: 10715 loss: 2.3980 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:10:12,779 - root - INFO - lr: 4.2877e-05 gnorm: 1.10 [ 6:36:03<18:02:28] +[titan] 2025-10-05 05:10:23,641 - root - INFO - step: 10720 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0671 +[titan] 2025-10-05 05:10:23,641 - root - INFO - lr: 4.2870e-05 gnorm: 1.07 [ 6:36:14<18:02:16] +[titan] 2025-10-05 05:10:34,518 - root - INFO - step: 10725 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 05:10:34,518 - root - INFO - lr: 4.2864e-05 gnorm: 1.07 [ 6:36:25<18:02:04] +[titan] 2025-10-05 05:10:45,426 - root - INFO - step: 10730 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2780 global_avg_mtp_loss: 2.0982 +[titan] 2025-10-05 05:10:45,426 - root - INFO - lr: 4.2857e-05 gnorm: 1.17 [ 6:36:36<18:01:53] +[titan] 2025-10-05 05:10:56,306 - root - INFO - step: 10735 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 05:10:56,306 - root - INFO - lr: 4.2851e-05 gnorm: 1.12 [ 6:36:47<18:01:41] +[titan] 2025-10-05 05:11:07,161 - root - INFO - step: 10740 loss: 2.4225 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:11:07,161 - root - INFO - lr: 4.2844e-05 gnorm: 1.17 [ 6:36:58<18:01:29] +[titan] 2025-10-05 05:11:18,031 - root - INFO - step: 10745 loss: 2.3429 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0692 +[titan] 2025-10-05 05:11:18,031 - root - INFO - lr: 4.2837e-05 gnorm: 1.13 [ 6:37:08<18:01:18] +[titan] 2025-10-05 05:11:26,767 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:11:28,948 - root - INFO - step: 10750 loss: 2.2983 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 05:11:28,948 - root - INFO - lr: 4.2831e-05 gnorm: 1.14 [ 6:37:19<18:01:06] +[titan] 2025-10-05 05:11:33,457 - root - INFO - Dumping profiler traces at step 10752 +[titan] 2025-10-05 05:11:33,496 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:11:40,090 - root - INFO - step: 10755 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 29,411 tflops: 408.03 mfu: 41.26% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 05:11:40,090 - root - INFO - lr: 4.2824e-05 gnorm: 1.14 [ 6:37:30<18:00:55] +[titan] 2025-10-05 05:11:50,993 - root - INFO - step: 10760 loss: 2.3455 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0704 +[titan] 2025-10-05 05:11:50,993 - root - INFO - lr: 4.2818e-05 gnorm: 1.14 [ 6:37:41<18:00:44] +[titan] 2025-10-05 05:12:01,856 - root - INFO - step: 10765 loss: 2.3069 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0370 +[titan] 2025-10-05 05:12:01,857 - root - INFO - lr: 4.2811e-05 gnorm: 1.12 [ 6:37:52<18:00:32] +[titan] 2025-10-05 05:12:12,697 - root - INFO - step: 10770 loss: 2.3339 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 05:12:12,697 - root - INFO - lr: 4.2805e-05 gnorm: 1.09 [ 6:38:03<18:00:20] +[titan] 2025-10-05 05:12:23,573 - root - INFO - step: 10775 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2809 global_avg_mtp_loss: 2.1096 +[titan] 2025-10-05 05:12:23,573 - root - INFO - lr: 4.2798e-05 gnorm: 1.09 [ 6:38:14<18:00:08] +[titan] 2025-10-05 05:12:34,428 - root - INFO - step: 10780 loss: 2.2969 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0279 +[titan] 2025-10-05 05:12:34,428 - root - INFO - lr: 4.2792e-05 gnorm: 1.09 [ 6:38:25<17:59:57] +[titan] 2025-10-05 05:12:45,414 - root - INFO - step: 10785 loss: 2.3471 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.82 mfu: 41.84% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 05:12:45,414 - root - INFO - lr: 4.2785e-05 gnorm: 1.13 [ 6:38:36<17:59:45] +[titan] 2025-10-05 05:12:56,296 - root - INFO - step: 10790 loss: 2.3752 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.0968 +[titan] 2025-10-05 05:12:56,297 - root - INFO - lr: 4.2779e-05 gnorm: 1.12 [ 6:38:47<17:59:34] +[titan] 2025-10-05 05:13:07,167 - root - INFO - step: 10795 loss: 2.3683 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:13:07,167 - root - INFO - lr: 4.2772e-05 gnorm: 1.15 [ 6:38:58<17:59:22] +[titan] 2025-10-05 05:13:15,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:13:18,033 - root - INFO - step: 10800 loss: 2.3892 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1094 +[titan] 2025-10-05 05:13:18,033 - root - INFO - lr: 4.2765e-05 gnorm: 1.12 [ 6:39:08<17:59:10] +[titan] 2025-10-05 05:13:28,909 - root - INFO - step: 10805 loss: 2.3651 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0879 +[titan] 2025-10-05 05:13:28,909 - root - INFO - lr: 4.2759e-05 gnorm: 1.13 [ 6:39:19<17:58:59] +[titan] 2025-10-05 05:13:39,766 - root - INFO - step: 10810 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 05:13:39,766 - root - INFO - lr: 4.2752e-05 gnorm: 1.11 [ 6:39:30<17:58:47] +[titan] 2025-10-05 05:13:50,697 - root - INFO - step: 10815 loss: 2.4193 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2859 global_avg_mtp_loss: 2.1334 +[titan] 2025-10-05 05:13:50,697 - root - INFO - lr: 4.2746e-05 gnorm: 1.13 [ 6:39:41<17:58:35] +[titan] 2025-10-05 05:14:01,553 - root - INFO - step: 10820 loss: 2.3463 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:14:01,554 - root - INFO - lr: 4.2739e-05 gnorm: 1.09 [ 6:39:52<17:58:24] +[titan] 2025-10-05 05:14:12,442 - root - INFO - step: 10825 loss: 2.3705 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0925 +[titan] 2025-10-05 05:14:12,442 - root - INFO - lr: 4.2733e-05 gnorm: 1.17 [ 6:40:03<17:58:12] +[titan] 2025-10-05 05:14:23,285 - root - INFO - step: 10830 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1177 +[titan] 2025-10-05 05:14:23,285 - root - INFO - lr: 4.2726e-05 gnorm: 1.13 [ 6:40:14<17:58:00] +[titan] 2025-10-05 05:14:34,165 - root - INFO - step: 10835 loss: 2.3905 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1100 +[titan] 2025-10-05 05:14:34,166 - root - INFO - lr: 4.2720e-05 gnorm: 1.16 [ 6:40:25<17:57:49] +[titan] 2025-10-05 05:14:45,051 - root - INFO - step: 10840 loss: 2.3728 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0952 +[titan] 2025-10-05 05:14:45,052 - root - INFO - lr: 4.2713e-05 gnorm: 1.13 [ 6:40:35<17:57:37] +[titan] 2025-10-05 05:14:55,878 - root - INFO - step: 10845 loss: 2.4128 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1296 +[titan] 2025-10-05 05:14:55,878 - root - INFO - lr: 4.2706e-05 gnorm: 1.10 [ 6:40:46<17:57:25] +[titan] 2025-10-05 05:15:04,525 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:15:06,705 - root - INFO - step: 10850 loss: 2.3718 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:15:06,705 - root - INFO - lr: 4.2700e-05 gnorm: 1.12 [ 6:40:57<17:57:14] +[titan] 2025-10-05 05:15:17,575 - root - INFO - step: 10855 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0802 +[titan] 2025-10-05 05:15:17,575 - root - INFO - lr: 4.2693e-05 gnorm: 1.14 [ 6:41:08<17:57:02] +[titan] 2025-10-05 05:15:28,456 - root - INFO - step: 10860 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0820 +[titan] 2025-10-05 05:15:28,456 - root - INFO - lr: 4.2687e-05 gnorm: 1.13 [ 6:41:19<17:56:50] +[titan] 2025-10-05 05:15:39,313 - root - INFO - step: 10865 loss: 2.4256 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1408 +[titan] 2025-10-05 05:15:39,313 - root - INFO - lr: 4.2680e-05 gnorm: 1.10 [ 6:41:30<17:56:38] +[titan] 2025-10-05 05:15:50,205 - root - INFO - step: 10870 loss: 2.3822 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 05:15:50,205 - root - INFO - lr: 4.2673e-05 gnorm: 1.13 [ 6:41:41<17:56:27] +[titan] 2025-10-05 05:16:01,082 - root - INFO - step: 10875 loss: 2.3634 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:16:01,082 - root - INFO - lr: 4.2667e-05 gnorm: 1.15 [ 6:41:51<17:56:15] +[titan] 2025-10-05 05:16:11,946 - root - INFO - step: 10880 loss: 2.3075 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 05:16:11,946 - root - INFO - lr: 4.2660e-05 gnorm: 1.14 [ 6:42:02<17:56:04] +[titan] 2025-10-05 05:16:22,841 - root - INFO - step: 10885 loss: 2.4065 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2808 global_avg_mtp_loss: 2.1258 +[titan] 2025-10-05 05:16:22,841 - root - INFO - lr: 4.2654e-05 gnorm: 1.21 [ 6:42:13<17:55:52] +[titan] 2025-10-05 05:16:33,734 - root - INFO - step: 10890 loss: 2.3635 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0864 +[titan] 2025-10-05 05:16:33,734 - root - INFO - lr: 4.2647e-05 gnorm: 1.10 [ 6:42:24<17:55:40] +[titan] 2025-10-05 05:16:44,609 - root - INFO - step: 10895 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1223 +[titan] 2025-10-05 05:16:44,609 - root - INFO - lr: 4.2640e-05 gnorm: 1.11 [ 6:42:35<17:55:29] +[titan] 2025-10-05 05:16:53,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:16:55,473 - root - INFO - step: 10900 loss: 2.3494 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2735 global_avg_mtp_loss: 2.0759 +[titan] 2025-10-05 05:16:55,473 - root - INFO - lr: 4.2634e-05 gnorm: 1.15 [ 6:42:46<17:55:17] +[titan] 2025-10-05 05:17:06,345 - root - INFO - step: 10905 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 05:17:06,345 - root - INFO - lr: 4.2627e-05 gnorm: 1.13 [ 6:42:57<17:55:05] +[titan] 2025-10-05 05:17:17,231 - root - INFO - step: 10910 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0659 +[titan] 2025-10-05 05:17:17,231 - root - INFO - lr: 4.2621e-05 gnorm: 1.17 [ 6:43:08<17:54:54] +[titan] 2025-10-05 05:17:28,109 - root - INFO - step: 10915 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2776 global_avg_mtp_loss: 2.0894 +[titan] 2025-10-05 05:17:28,110 - root - INFO - lr: 4.2614e-05 gnorm: 1.19 [ 6:43:18<17:54:42] +[titan] 2025-10-05 05:17:39,014 - root - INFO - step: 10920 loss: 2.3277 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:17:39,014 - root - INFO - lr: 4.2607e-05 gnorm: 1.14 [ 6:43:29<17:54:31] +[titan] 2025-10-05 05:17:49,944 - root - INFO - step: 10925 loss: 2.3202 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0487 +[titan] 2025-10-05 05:17:49,944 - root - INFO - lr: 4.2601e-05 gnorm: 1.12 [ 6:43:40<17:54:19] +[titan] 2025-10-05 05:18:00,806 - root - INFO - step: 10930 loss: 2.3343 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0611 +[titan] 2025-10-05 05:18:00,807 - root - INFO - lr: 4.2594e-05 gnorm: 1.12 [ 6:43:51<17:54:07] +[titan] 2025-10-05 05:18:11,668 - root - INFO - step: 10935 loss: 2.4012 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1192 +[titan] 2025-10-05 05:18:11,669 - root - INFO - lr: 4.2588e-05 gnorm: 1.13 [ 6:44:02<17:53:56] +[titan] 2025-10-05 05:18:22,533 - root - INFO - step: 10940 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:18:22,533 - root - INFO - lr: 4.2581e-05 gnorm: 1.10 [ 6:44:13<17:53:44] +[titan] 2025-10-05 05:18:33,393 - root - INFO - step: 10945 loss: 2.3284 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0574 +[titan] 2025-10-05 05:18:33,393 - root - INFO - lr: 4.2574e-05 gnorm: 1.16 [ 6:44:24<17:53:32] +[titan] 2025-10-05 05:18:42,068 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:18:44,246 - root - INFO - step: 10950 loss: 2.3482 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0732 +[titan] 2025-10-05 05:18:44,246 - root - INFO - lr: 4.2568e-05 gnorm: 1.17 [ 6:44:35<17:53:21] +[titan] 2025-10-05 05:18:55,149 - root - INFO - step: 10955 loss: 2.4275 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2853 global_avg_mtp_loss: 2.1422 +[titan] 2025-10-05 05:18:55,149 - root - INFO - lr: 4.2561e-05 gnorm: 1.19 [ 6:44:46<17:53:09] +[titan] 2025-10-05 05:19:06,006 - root - INFO - step: 10960 loss: 2.3559 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 05:19:06,006 - root - INFO - lr: 4.2554e-05 gnorm: 1.17 [ 6:44:56<17:52:57] +[titan] 2025-10-05 05:19:16,844 - root - INFO - step: 10965 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0690 +[titan] 2025-10-05 05:19:16,844 - root - INFO - lr: 4.2548e-05 gnorm: 1.13 [ 6:45:07<17:52:46] +[titan] 2025-10-05 05:19:27,707 - root - INFO - step: 10970 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0665 +[titan] 2025-10-05 05:19:27,707 - root - INFO - lr: 4.2541e-05 gnorm: 1.11 [ 6:45:18<17:52:34] +[titan] 2025-10-05 05:19:38,565 - root - INFO - step: 10975 loss: 2.4017 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2815 global_avg_mtp_loss: 2.1202 +[titan] 2025-10-05 05:19:38,565 - root - INFO - lr: 4.2535e-05 gnorm: 1.13 [ 6:45:29<17:52:22] +[titan] 2025-10-05 05:19:49,430 - root - INFO - step: 10980 loss: 2.3707 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0929 +[titan] 2025-10-05 05:19:49,430 - root - INFO - lr: 4.2528e-05 gnorm: 1.14 [ 6:45:40<17:52:11] +[titan] 2025-10-05 05:20:00,329 - root - INFO - step: 10985 loss: 2.3910 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2806 global_avg_mtp_loss: 2.1104 +[titan] 2025-10-05 05:20:00,329 - root - INFO - lr: 4.2521e-05 gnorm: 1.11 [ 6:45:51<17:51:59] +[titan] 2025-10-05 05:20:11,199 - root - INFO - step: 10990 loss: 2.2943 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 05:20:11,199 - root - INFO - lr: 4.2515e-05 gnorm: 1.15 [ 6:46:02<17:51:47] +[titan] 2025-10-05 05:20:22,060 - root - INFO - step: 10995 loss: 2.4220 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2829 global_avg_mtp_loss: 2.1390 +[titan] 2025-10-05 05:20:22,060 - root - INFO - lr: 4.2508e-05 gnorm: 1.17 [ 6:46:12<17:51:36] +[titan] 2025-10-05 05:20:30,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:20:32,950 - root - INFO - step: 11000 loss: 2.4329 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2848 global_avg_mtp_loss: 2.1481 +[titan] 2025-10-05 05:20:32,950 - root - INFO - lr: 4.2501e-05 gnorm: 1.13 [ 6:46:23<17:51:24] +[titan] 2025-10-05 05:20:43,793 - root - INFO - step: 11005 loss: 2.3674 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0912 +[titan] 2025-10-05 05:20:43,793 - root - INFO - lr: 4.2495e-05 gnorm: 1.13 [ 6:46:34<17:51:12] +[titan] 2025-10-05 05:20:54,676 - root - INFO - step: 11010 loss: 2.3859 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2784 global_avg_mtp_loss: 2.1074 +[titan] 2025-10-05 05:20:54,677 - root - INFO - lr: 4.2488e-05 gnorm: 1.23 [ 6:46:45<17:51:01] +[titan] 2025-10-05 05:21:05,537 - root - INFO - step: 11015 loss: 2.4219 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1387 +[titan] 2025-10-05 05:21:05,537 - root - INFO - lr: 4.2481e-05 gnorm: 1.14 [ 6:46:56<17:50:49] +[titan] 2025-10-05 05:21:16,444 - root - INFO - step: 11020 loss: 2.3693 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0921 +[titan] 2025-10-05 05:21:16,444 - root - INFO - lr: 4.2475e-05 gnorm: 1.15 [ 6:47:07<17:50:38] +[titan] 2025-10-05 05:21:27,322 - root - INFO - step: 11025 loss: 2.4120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2817 global_avg_mtp_loss: 2.1303 +[titan] 2025-10-05 05:21:27,323 - root - INFO - lr: 4.2468e-05 gnorm: 1.14 [ 6:47:18<17:50:26] +[titan] 2025-10-05 05:21:38,201 - root - INFO - step: 11030 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2721 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:21:38,202 - root - INFO - lr: 4.2461e-05 gnorm: 1.11 [ 6:47:29<17:50:14] +[titan] 2025-10-05 05:21:49,263 - root - INFO - step: 11035 loss: 2.3662 memory: 118.84GiB(85.28%) tps: 29,623 tflops: 410.98 mfu: 41.55% global_avg_ntp_loss: 0.2773 global_avg_mtp_loss: 2.0889 +[titan] 2025-10-05 05:21:49,264 - root - INFO - lr: 4.2455e-05 gnorm: 1.06 [ 6:47:40<17:50:03] +[titan] 2025-10-05 05:22:00,112 - root - INFO - step: 11040 loss: 2.3713 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0938 +[titan] 2025-10-05 05:22:00,112 - root - INFO - lr: 4.2448e-05 gnorm: 1.16 [ 6:47:50<17:49:52] +[titan] 2025-10-05 05:22:10,978 - root - INFO - step: 11045 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0794 +[titan] 2025-10-05 05:22:10,978 - root - INFO - lr: 4.2441e-05 gnorm: 1.12 [ 6:48:01<17:49:40] +[titan] 2025-10-05 05:22:19,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:22:21,866 - root - INFO - step: 11050 loss: 2.3470 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:22:21,866 - root - INFO - lr: 4.2435e-05 gnorm: 1.18 [ 6:48:12<17:49:28] +[titan] 2025-10-05 05:22:32,725 - root - INFO - step: 11055 loss: 2.4619 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2884 global_avg_mtp_loss: 2.1734 +[titan] 2025-10-05 05:22:32,725 - root - INFO - lr: 4.2428e-05 gnorm: 1.17 [ 6:48:23<17:49:17] +[titan] 2025-10-05 05:22:43,603 - root - INFO - step: 11060 loss: 2.3978 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2802 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 05:22:43,604 - root - INFO - lr: 4.2421e-05 gnorm: 1.18 [ 6:48:34<17:49:05] +[titan] 2025-10-05 05:22:54,557 - root - INFO - step: 11065 loss: 2.3059 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0363 +[titan] 2025-10-05 05:22:54,558 - root - INFO - lr: 4.2415e-05 gnorm: 1.11 [ 6:48:45<17:48:54] +[titan] 2025-10-05 05:23:05,447 - root - INFO - step: 11070 loss: 2.3833 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.1048 +[titan] 2025-10-05 05:23:05,447 - root - INFO - lr: 4.2408e-05 gnorm: 1.15 [ 6:48:56<17:48:42] +[titan] 2025-10-05 05:23:16,319 - root - INFO - step: 11075 loss: 2.3472 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:23:16,319 - root - INFO - lr: 4.2401e-05 gnorm: 1.12 [ 6:49:07<17:48:30] +[titan] 2025-10-05 05:23:27,231 - root - INFO - step: 11080 loss: 2.3159 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0451 +[titan] 2025-10-05 05:23:27,231 - root - INFO - lr: 4.2395e-05 gnorm: 1.15 [ 6:49:18<17:48:19] +[titan] 2025-10-05 05:23:38,120 - root - INFO - step: 11085 loss: 2.3918 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2801 global_avg_mtp_loss: 2.1116 +[titan] 2025-10-05 05:23:38,120 - root - INFO - lr: 4.2388e-05 gnorm: 1.10 [ 6:49:28<17:48:07] +[titan] 2025-10-05 05:23:48,999 - root - INFO - step: 11090 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0724 +[titan] 2025-10-05 05:23:48,999 - root - INFO - lr: 4.2381e-05 gnorm: 1.12 [ 6:49:39<17:47:56] +[titan] 2025-10-05 05:23:59,936 - root - INFO - step: 11095 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:23:59,936 - root - INFO - lr: 4.2375e-05 gnorm: 1.15 [ 6:49:50<17:47:44] +[titan] 2025-10-05 05:24:08,638 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:24:10,828 - root - INFO - step: 11100 loss: 2.3700 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0939 +[titan] 2025-10-05 05:24:10,828 - root - INFO - lr: 4.2368e-05 gnorm: 1.16 [ 6:50:01<17:47:33] +[titan] 2025-10-05 05:24:21,716 - root - INFO - step: 11105 loss: 2.3080 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 05:24:21,716 - root - INFO - lr: 4.2361e-05 gnorm: 1.11 [ 6:50:12<17:47:21] +[titan] 2025-10-05 05:24:32,602 - root - INFO - step: 11110 loss: 2.3389 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0656 +[titan] 2025-10-05 05:24:32,602 - root - INFO - lr: 4.2354e-05 gnorm: 1.18 [ 6:50:23<17:47:09] +[titan] 2025-10-05 05:24:43,497 - root - INFO - step: 11115 loss: 2.4025 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2813 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:24:43,498 - root - INFO - lr: 4.2348e-05 gnorm: 1.16 [ 6:50:34<17:46:58] +[titan] 2025-10-05 05:24:54,382 - root - INFO - step: 11120 loss: 2.3434 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0676 +[titan] 2025-10-05 05:24:54,382 - root - INFO - lr: 4.2341e-05 gnorm: 1.17 [ 6:50:45<17:46:46] +[titan] 2025-10-05 05:25:05,236 - root - INFO - step: 11125 loss: 2.3569 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:25:05,236 - root - INFO - lr: 4.2334e-05 gnorm: 1.14 [ 6:50:56<17:46:35] +[titan] 2025-10-05 05:25:16,090 - root - INFO - step: 11130 loss: 2.3586 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0834 +[titan] 2025-10-05 05:25:16,091 - root - INFO - lr: 4.2328e-05 gnorm: 1.10 [ 6:51:06<17:46:23] +[titan] 2025-10-05 05:25:26,938 - root - INFO - step: 11135 loss: 2.3923 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1124 +[titan] 2025-10-05 05:25:26,939 - root - INFO - lr: 4.2321e-05 gnorm: 1.15 [ 6:51:17<17:46:11] +[titan] 2025-10-05 05:25:37,783 - root - INFO - step: 11140 loss: 2.3864 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1080 +[titan] 2025-10-05 05:25:37,783 - root - INFO - lr: 4.2314e-05 gnorm: 1.15 [ 6:51:28<17:45:59] +[titan] 2025-10-05 05:25:48,642 - root - INFO - step: 11145 loss: 2.3257 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0532 +[titan] 2025-10-05 05:25:48,642 - root - INFO - lr: 4.2307e-05 gnorm: 1.12 [ 6:51:39<17:45:48] +[titan] 2025-10-05 05:25:57,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:25:59,544 - root - INFO - step: 11150 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0565 +[titan] 2025-10-05 05:25:59,544 - root - INFO - lr: 4.2301e-05 gnorm: 1.12 [ 6:51:50<17:45:36] +[titan] 2025-10-05 05:26:10,397 - root - INFO - step: 11155 loss: 2.3187 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0478 +[titan] 2025-10-05 05:26:10,397 - root - INFO - lr: 4.2294e-05 gnorm: 1.08 [ 6:52:01<17:45:25] +[titan] 2025-10-05 05:26:21,273 - root - INFO - step: 11160 loss: 2.3623 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0869 +[titan] 2025-10-05 05:26:21,273 - root - INFO - lr: 4.2287e-05 gnorm: 1.14 [ 6:52:12<17:45:13] +[titan] 2025-10-05 05:26:32,142 - root - INFO - step: 11165 loss: 2.3541 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:26:32,142 - root - INFO - lr: 4.2281e-05 gnorm: 1.13 [ 6:52:23<17:45:01] +[titan] 2025-10-05 05:26:43,035 - root - INFO - step: 11170 loss: 2.3854 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.1067 +[titan] 2025-10-05 05:26:43,035 - root - INFO - lr: 4.2274e-05 gnorm: 1.13 [ 6:52:33<17:44:50] +[titan] 2025-10-05 05:26:53,989 - root - INFO - step: 11175 loss: 2.3643 memory: 118.84GiB(85.28%) tps: 29,916 tflops: 415.04 mfu: 41.97% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 05:26:53,989 - root - INFO - lr: 4.2267e-05 gnorm: 1.12 [ 6:52:44<17:44:38] +[titan] 2025-10-05 05:27:04,880 - root - INFO - step: 11180 loss: 2.3397 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0666 +[titan] 2025-10-05 05:27:04,880 - root - INFO - lr: 4.2260e-05 gnorm: 1.19 [ 6:52:55<17:44:27] +[titan] 2025-10-05 05:27:15,757 - root - INFO - step: 11185 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0699 +[titan] 2025-10-05 05:27:15,757 - root - INFO - lr: 4.2254e-05 gnorm: 1.15 [ 6:53:06<17:44:15] +[titan] 2025-10-05 05:27:26,622 - root - INFO - step: 11190 loss: 2.3961 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2799 global_avg_mtp_loss: 2.1162 +[titan] 2025-10-05 05:27:26,622 - root - INFO - lr: 4.2247e-05 gnorm: 1.10 [ 6:53:17<17:44:04] +[titan] 2025-10-05 05:27:37,484 - root - INFO - step: 11195 loss: 2.3721 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 05:27:37,484 - root - INFO - lr: 4.2240e-05 gnorm: 1.15 [ 6:53:28<17:43:52] +[titan] 2025-10-05 05:27:46,183 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:27:48,372 - root - INFO - step: 11200 loss: 2.3645 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0877 +[titan] 2025-10-05 05:27:48,372 - root - INFO - lr: 4.2233e-05 gnorm: 1.17 [ 6:53:39<17:43:40] +[titan] 2025-10-05 05:27:59,307 - root - INFO - step: 11205 loss: 2.3529 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0788 +[titan] 2025-10-05 05:27:59,307 - root - INFO - lr: 4.2227e-05 gnorm: 1.09 [ 6:53:50<17:43:29] +[titan] 2025-10-05 05:28:10,176 - root - INFO - step: 11210 loss: 2.3175 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0464 +[titan] 2025-10-05 05:28:10,176 - root - INFO - lr: 4.2220e-05 gnorm: 1.15 [ 6:54:01<17:43:17] +[titan] 2025-10-05 05:28:21,076 - root - INFO - step: 11215 loss: 2.3354 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 05:28:21,076 - root - INFO - lr: 4.2213e-05 gnorm: 1.14 [ 6:54:11<17:43:06] +[titan] 2025-10-05 05:28:31,935 - root - INFO - step: 11220 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0592 +[titan] 2025-10-05 05:28:31,935 - root - INFO - lr: 4.2206e-05 gnorm: 1.10 [ 6:54:22<17:42:54] +[titan] 2025-10-05 05:28:42,804 - root - INFO - step: 11225 loss: 2.2877 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0210 +[titan] 2025-10-05 05:28:42,805 - root - INFO - lr: 4.2200e-05 gnorm: 1.15 [ 6:54:33<17:42:42] +[titan] 2025-10-05 05:28:53,662 - root - INFO - step: 11230 loss: 2.3995 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2810 global_avg_mtp_loss: 2.1185 +[titan] 2025-10-05 05:28:53,662 - root - INFO - lr: 4.2193e-05 gnorm: 1.17 [ 6:54:44<17:42:31] +[titan] 2025-10-05 05:29:04,634 - root - INFO - step: 11235 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 05:29:04,634 - root - INFO - lr: 4.2186e-05 gnorm: 1.17 [ 6:54:55<17:42:19] +[titan] 2025-10-05 05:29:15,534 - root - INFO - step: 11240 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0842 +[titan] 2025-10-05 05:29:15,535 - root - INFO - lr: 4.2179e-05 gnorm: 1.12 [ 6:55:06<17:42:08] +[titan] 2025-10-05 05:29:26,383 - root - INFO - step: 11245 loss: 2.3641 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2770 global_avg_mtp_loss: 2.0871 +[titan] 2025-10-05 05:29:26,383 - root - INFO - lr: 4.2173e-05 gnorm: 1.08 [ 6:55:17<17:41:56] +[titan] 2025-10-05 05:29:35,042 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:29:37,226 - root - INFO - step: 11250 loss: 2.3893 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1101 +[titan] 2025-10-05 05:29:37,226 - root - INFO - lr: 4.2166e-05 gnorm: 1.11 [ 6:55:28<17:41:45] +[titan] 2025-10-05 05:29:48,080 - root - INFO - step: 11255 loss: 2.3315 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0589 +[titan] 2025-10-05 05:29:48,080 - root - INFO - lr: 4.2159e-05 gnorm: 1.15 [ 6:55:38<17:41:33] +[titan] 2025-10-05 05:29:58,912 - root - INFO - step: 11260 loss: 2.3790 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2789 global_avg_mtp_loss: 2.1000 +[titan] 2025-10-05 05:29:58,912 - root - INFO - lr: 4.2152e-05 gnorm: 1.11 [ 6:55:49<17:41:21] +[titan] 2025-10-05 05:30:07,835 - root - INFO - Dumping profiler traces at step 11264 +[titan] 2025-10-05 05:30:07,872 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:30:10,050 - root - INFO - step: 11265 loss: 2.2811 memory: 118.84GiB(85.28%) tps: 29,420 tflops: 408.16 mfu: 41.27% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 05:30:10,051 - root - INFO - lr: 4.2146e-05 gnorm: 1.10 [ 6:56:00<17:41:10] +[titan] 2025-10-05 05:30:20,892 - root - INFO - step: 11270 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0809 +[titan] 2025-10-05 05:30:20,892 - root - INFO - lr: 4.2139e-05 gnorm: 1.12 [ 6:56:11<17:40:59] +[titan] 2025-10-05 05:30:31,735 - root - INFO - step: 11275 loss: 2.3738 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0947 +[titan] 2025-10-05 05:30:31,735 - root - INFO - lr: 4.2132e-05 gnorm: 1.10 [ 6:56:22<17:40:47] +[titan] 2025-10-05 05:30:42,574 - root - INFO - step: 11280 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 05:30:42,574 - root - INFO - lr: 4.2125e-05 gnorm: 1.10 [ 6:56:33<17:40:35] +[titan] 2025-10-05 05:30:53,425 - root - INFO - step: 11285 loss: 2.3915 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2794 global_avg_mtp_loss: 2.1121 +[titan] 2025-10-05 05:30:53,426 - root - INFO - lr: 4.2118e-05 gnorm: 1.14 [ 6:56:44<17:40:24] +[titan] 2025-10-05 05:31:04,306 - root - INFO - step: 11290 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1164 +[titan] 2025-10-05 05:31:04,307 - root - INFO - lr: 4.2112e-05 gnorm: 1.16 [ 6:56:55<17:40:12] +[titan] 2025-10-05 05:31:15,165 - root - INFO - step: 11295 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 05:31:15,165 - root - INFO - lr: 4.2105e-05 gnorm: 1.16 [ 6:57:06<17:40:00] +[titan] 2025-10-05 05:31:23,836 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:31:26,028 - root - INFO - step: 11300 loss: 2.4122 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.31% global_avg_ntp_loss: 0.2825 global_avg_mtp_loss: 2.1297 +[titan] 2025-10-05 05:31:26,028 - root - INFO - lr: 4.2098e-05 gnorm: 1.16 [ 6:57:16<17:39:49] +[titan] 2025-10-05 05:31:36,890 - root - INFO - step: 11305 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 05:31:36,890 - root - INFO - lr: 4.2091e-05 gnorm: 1.19 [ 6:57:27<17:39:37] +[titan] 2025-10-05 05:31:47,751 - root - INFO - step: 11310 loss: 2.3629 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0867 +[titan] 2025-10-05 05:31:47,751 - root - INFO - lr: 4.2084e-05 gnorm: 1.13 [ 6:57:38<17:39:25] +[titan] 2025-10-05 05:31:58,646 - root - INFO - step: 11315 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0333 +[titan] 2025-10-05 05:31:58,646 - root - INFO - lr: 4.2078e-05 gnorm: 1.14 [ 6:57:49<17:39:14] +[titan] 2025-10-05 05:32:09,512 - root - INFO - step: 11320 loss: 2.4605 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2875 global_avg_mtp_loss: 2.1730 +[titan] 2025-10-05 05:32:09,513 - root - INFO - lr: 4.2071e-05 gnorm: 1.15 [ 6:58:00<17:39:02] +[titan] 2025-10-05 05:32:20,392 - root - INFO - step: 11325 loss: 2.3568 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0807 +[titan] 2025-10-05 05:32:20,392 - root - INFO - lr: 4.2064e-05 gnorm: 1.12 [ 6:58:11<17:38:51] +[titan] 2025-10-05 05:32:31,290 - root - INFO - step: 11330 loss: 2.4028 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1208 +[titan] 2025-10-05 05:32:31,290 - root - INFO - lr: 4.2057e-05 gnorm: 1.14 [ 6:58:22<17:38:39] +[titan] 2025-10-05 05:32:42,174 - root - INFO - step: 11335 loss: 2.3592 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:32:42,174 - root - INFO - lr: 4.2050e-05 gnorm: 1.16 [ 6:58:33<17:38:28] +[titan] 2025-10-05 05:32:53,063 - root - INFO - step: 11340 loss: 2.3303 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0571 +[titan] 2025-10-05 05:32:53,064 - root - INFO - lr: 4.2044e-05 gnorm: 1.10 [ 6:58:43<17:38:16] +[titan] 2025-10-05 05:33:03,971 - root - INFO - step: 11345 loss: 2.3871 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.1089 +[titan] 2025-10-05 05:33:03,972 - root - INFO - lr: 4.2037e-05 gnorm: 1.10 [ 6:58:54<17:38:05] +[titan] 2025-10-05 05:33:12,662 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:33:14,854 - root - INFO - step: 11350 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:33:14,854 - root - INFO - lr: 4.2030e-05 gnorm: 1.16 [ 6:59:05<17:37:53] +[titan] 2025-10-05 05:33:25,725 - root - INFO - step: 11355 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:33:25,725 - root - INFO - lr: 4.2023e-05 gnorm: 1.14 [ 6:59:16<17:37:41] +[titan] 2025-10-05 05:33:36,578 - root - INFO - step: 11360 loss: 2.2858 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0185 +[titan] 2025-10-05 05:33:36,578 - root - INFO - lr: 4.2016e-05 gnorm: 1.08 [ 6:59:27<17:37:30] +[titan] 2025-10-05 05:33:47,452 - root - INFO - step: 11365 loss: 2.3325 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:33:47,452 - root - INFO - lr: 4.2010e-05 gnorm: 1.07 [ 6:59:38<17:37:18] +[titan] 2025-10-05 05:33:58,347 - root - INFO - step: 11370 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0475 +[titan] 2025-10-05 05:33:58,347 - root - INFO - lr: 4.2003e-05 gnorm: 1.09 [ 6:59:49<17:37:07] +[titan] 2025-10-05 05:34:09,277 - root - INFO - step: 11375 loss: 2.4178 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.93 mfu: 42.06% global_avg_ntp_loss: 0.2837 global_avg_mtp_loss: 2.1340 +[titan] 2025-10-05 05:34:09,277 - root - INFO - lr: 4.1996e-05 gnorm: 1.13 [ 7:00:00<17:36:55] +[titan] 2025-10-05 05:34:20,157 - root - INFO - step: 11380 loss: 2.3349 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:34:20,157 - root - INFO - lr: 4.1989e-05 gnorm: 1.18 [ 7:00:10<17:36:44] +[titan] 2025-10-05 05:34:31,049 - root - INFO - step: 11385 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0728 +[titan] 2025-10-05 05:34:31,049 - root - INFO - lr: 4.1982e-05 gnorm: 1.10 [ 7:00:21<17:36:32] +[titan] 2025-10-05 05:34:41,929 - root - INFO - step: 11390 loss: 2.4099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2820 global_avg_mtp_loss: 2.1279 +[titan] 2025-10-05 05:34:41,929 - root - INFO - lr: 4.1975e-05 gnorm: 1.10 [ 7:00:32<17:36:20] +[titan] 2025-10-05 05:34:52,785 - root - INFO - step: 11395 loss: 2.3564 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0813 +[titan] 2025-10-05 05:34:52,785 - root - INFO - lr: 4.1969e-05 gnorm: 1.15 [ 7:00:43<17:36:09] +[titan] 2025-10-05 05:35:01,513 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:35:03,690 - root - INFO - step: 11400 loss: 2.4143 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2828 global_avg_mtp_loss: 2.1315 +[titan] 2025-10-05 05:35:03,690 - root - INFO - lr: 4.1962e-05 gnorm: 1.14 [ 7:00:54<17:35:57] +[titan] 2025-10-05 05:35:14,535 - root - INFO - step: 11405 loss: 2.3829 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1024 +[titan] 2025-10-05 05:35:14,536 - root - INFO - lr: 4.1955e-05 gnorm: 1.17 [ 7:01:05<17:35:46] +[titan] 2025-10-05 05:35:25,412 - root - INFO - step: 11410 loss: 2.3190 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2714 global_avg_mtp_loss: 2.0477 +[titan] 2025-10-05 05:35:25,412 - root - INFO - lr: 4.1948e-05 gnorm: 1.13 [ 7:01:16<17:35:34] +[titan] 2025-10-05 05:35:36,263 - root - INFO - step: 11415 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0351 +[titan] 2025-10-05 05:35:36,263 - root - INFO - lr: 4.1941e-05 gnorm: 1.12 [ 7:01:27<17:35:22] +[titan] 2025-10-05 05:35:47,122 - root - INFO - step: 11420 loss: 2.3875 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1081 +[titan] 2025-10-05 05:35:47,122 - root - INFO - lr: 4.1934e-05 gnorm: 1.14 [ 7:01:37<17:35:11] +[titan] 2025-10-05 05:35:57,974 - root - INFO - step: 11425 loss: 2.3552 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0795 +[titan] 2025-10-05 05:35:57,974 - root - INFO - lr: 4.1928e-05 gnorm: 1.13 [ 7:01:48<17:34:59] +[titan] 2025-10-05 05:36:08,849 - root - INFO - step: 11430 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 05:36:08,849 - root - INFO - lr: 4.1921e-05 gnorm: 1.17 [ 7:01:59<17:34:48] +[titan] 2025-10-05 05:36:19,695 - root - INFO - step: 11435 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2769 global_avg_mtp_loss: 2.0901 +[titan] 2025-10-05 05:36:19,695 - root - INFO - lr: 4.1914e-05 gnorm: 1.16 [ 7:02:10<17:34:36] +[titan] 2025-10-05 05:36:30,564 - root - INFO - step: 11440 loss: 2.3449 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0705 +[titan] 2025-10-05 05:36:30,564 - root - INFO - lr: 4.1907e-05 gnorm: 1.08 [ 7:02:21<17:34:24] +[titan] 2025-10-05 05:36:41,427 - root - INFO - step: 11445 loss: 2.4403 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1539 +[titan] 2025-10-05 05:36:41,427 - root - INFO - lr: 4.1900e-05 gnorm: 1.15 [ 7:02:32<17:34:13] +[titan] 2025-10-05 05:36:50,091 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:36:52,270 - root - INFO - step: 11450 loss: 2.3496 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 05:36:52,270 - root - INFO - lr: 4.1893e-05 gnorm: 1.14 [ 7:02:43<17:34:01] +[titan] 2025-10-05 05:37:03,144 - root - INFO - step: 11455 loss: 2.3480 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 05:37:03,145 - root - INFO - lr: 4.1886e-05 gnorm: 1.13 [ 7:02:53<17:33:50] +[titan] 2025-10-05 05:37:13,972 - root - INFO - step: 11460 loss: 2.3839 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2797 global_avg_mtp_loss: 2.1042 +[titan] 2025-10-05 05:37:13,973 - root - INFO - lr: 4.1880e-05 gnorm: 1.13 [ 7:03:04<17:33:38] +[titan] 2025-10-05 05:37:24,845 - root - INFO - step: 11465 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0606 +[titan] 2025-10-05 05:37:24,845 - root - INFO - lr: 4.1873e-05 gnorm: 1.16 [ 7:03:15<17:33:26] +[titan] 2025-10-05 05:37:35,703 - root - INFO - step: 11470 loss: 2.3317 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0582 +[titan] 2025-10-05 05:37:35,703 - root - INFO - lr: 4.1866e-05 gnorm: 1.10 [ 7:03:26<17:33:15] +[titan] 2025-10-05 05:37:46,570 - root - INFO - step: 11475 loss: 2.4200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1368 +[titan] 2025-10-05 05:37:46,570 - root - INFO - lr: 4.1859e-05 gnorm: 1.18 [ 7:03:37<17:33:03] +[titan] 2025-10-05 05:37:57,446 - root - INFO - step: 11480 loss: 2.3142 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0433 +[titan] 2025-10-05 05:37:57,447 - root - INFO - lr: 4.1852e-05 gnorm: 1.10 [ 7:03:48<17:32:51] +[titan] 2025-10-05 05:38:08,329 - root - INFO - step: 11485 loss: 2.3042 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0343 +[titan] 2025-10-05 05:38:08,329 - root - INFO - lr: 4.1845e-05 gnorm: 1.16 [ 7:03:59<17:32:40] +[titan] 2025-10-05 05:38:19,195 - root - INFO - step: 11490 loss: 2.4232 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2830 global_avg_mtp_loss: 2.1402 +[titan] 2025-10-05 05:38:19,195 - root - INFO - lr: 4.1838e-05 gnorm: 1.17 [ 7:04:10<17:32:28] +[titan] 2025-10-05 05:38:30,073 - root - INFO - step: 11495 loss: 2.3563 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0808 +[titan] 2025-10-05 05:38:30,073 - root - INFO - lr: 4.1831e-05 gnorm: 1.12 [ 7:04:20<17:32:17] +[titan] 2025-10-05 05:38:38,739 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:38:40,929 - root - INFO - step: 11500 loss: 2.3519 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0760 +[titan] 2025-10-05 05:38:40,929 - root - INFO - lr: 4.1825e-05 gnorm: 1.09 [ 7:04:31<17:32:05] +[titan] 2025-10-05 05:38:51,791 - root - INFO - step: 11505 loss: 2.3513 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 05:38:51,792 - root - INFO - lr: 4.1818e-05 gnorm: 1.18 [ 7:04:42<17:31:54] +[titan] 2025-10-05 05:39:02,689 - root - INFO - step: 11510 loss: 2.3200 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0488 +[titan] 2025-10-05 05:39:02,689 - root - INFO - lr: 4.1811e-05 gnorm: 1.13 [ 7:04:53<17:31:42] +[titan] 2025-10-05 05:39:13,585 - root - INFO - step: 11515 loss: 2.4548 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2877 global_avg_mtp_loss: 2.1671 +[titan] 2025-10-05 05:39:13,586 - root - INFO - lr: 4.1804e-05 gnorm: 1.13 [ 7:05:04<17:31:31] +[titan] 2025-10-05 05:39:24,449 - root - INFO - step: 11520 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0252 +[titan] 2025-10-05 05:39:24,449 - root - INFO - lr: 4.1797e-05 gnorm: 1.15 [ 7:05:15<17:31:19] +[titan] 2025-10-05 05:39:35,295 - root - INFO - step: 11525 loss: 2.2866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 05:39:35,295 - root - INFO - lr: 4.1790e-05 gnorm: 1.07 [ 7:05:26<17:31:07] +[titan] 2025-10-05 05:39:46,183 - root - INFO - step: 11530 loss: 2.3611 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0847 +[titan] 2025-10-05 05:39:46,183 - root - INFO - lr: 4.1783e-05 gnorm: 1.14 [ 7:05:37<17:30:56] +[titan] 2025-10-05 05:39:57,043 - root - INFO - step: 11535 loss: 2.3457 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 05:39:57,043 - root - INFO - lr: 4.1776e-05 gnorm: 1.14 [ 7:05:47<17:30:44] +[titan] 2025-10-05 05:40:07,933 - root - INFO - step: 11540 loss: 2.3581 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0821 +[titan] 2025-10-05 05:40:07,934 - root - INFO - lr: 4.1769e-05 gnorm: 1.10 [ 7:05:58<17:30:33] +[titan] 2025-10-05 05:40:18,821 - root - INFO - step: 11545 loss: 2.4229 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2836 global_avg_mtp_loss: 2.1393 +[titan] 2025-10-05 05:40:18,821 - root - INFO - lr: 4.1763e-05 gnorm: 1.15 [ 7:06:09<17:30:21] +[titan] 2025-10-05 05:40:27,478 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:40:29,677 - root - INFO - step: 11550 loss: 2.3762 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2791 global_avg_mtp_loss: 2.0971 +[titan] 2025-10-05 05:40:29,677 - root - INFO - lr: 4.1756e-05 gnorm: 1.16 [ 7:06:20<17:30:09] +[titan] 2025-10-05 05:40:40,531 - root - INFO - step: 11555 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0225 +[titan] 2025-10-05 05:40:40,531 - root - INFO - lr: 4.1749e-05 gnorm: 1.07 [ 7:06:31<17:29:58] +[titan] 2025-10-05 05:40:51,372 - root - INFO - step: 11560 loss: 2.3640 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2772 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 05:40:51,372 - root - INFO - lr: 4.1742e-05 gnorm: 1.13 [ 7:06:42<17:29:46] +[titan] 2025-10-05 05:41:02,211 - root - INFO - step: 11565 loss: 2.3067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0371 +[titan] 2025-10-05 05:41:02,211 - root - INFO - lr: 4.1735e-05 gnorm: 1.09 [ 7:06:53<17:29:35] +[titan] 2025-10-05 05:41:13,062 - root - INFO - step: 11570 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 05:41:13,062 - root - INFO - lr: 4.1728e-05 gnorm: 1.08 [ 7:07:03<17:29:23] +[titan] 2025-10-05 05:41:23,914 - root - INFO - step: 11575 loss: 2.3751 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 05:41:23,914 - root - INFO - lr: 4.1721e-05 gnorm: 1.11 [ 7:07:14<17:29:11] +[titan] 2025-10-05 05:41:34,780 - root - INFO - step: 11580 loss: 2.3670 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0896 +[titan] 2025-10-05 05:41:34,780 - root - INFO - lr: 4.1714e-05 gnorm: 1.16 [ 7:07:25<17:29:00] +[titan] 2025-10-05 05:41:45,632 - root - INFO - step: 11585 loss: 2.3149 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0440 +[titan] 2025-10-05 05:41:45,632 - root - INFO - lr: 4.1707e-05 gnorm: 1.12 [ 7:07:36<17:28:48] +[titan] 2025-10-05 05:41:56,483 - root - INFO - step: 11590 loss: 2.2891 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0207 +[titan] 2025-10-05 05:41:56,483 - root - INFO - lr: 4.1700e-05 gnorm: 1.11 [ 7:07:47<17:28:37] +[titan] 2025-10-05 05:42:07,367 - root - INFO - step: 11595 loss: 2.3114 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 05:42:07,367 - root - INFO - lr: 4.1693e-05 gnorm: 1.09 [ 7:07:58<17:28:25] +[titan] 2025-10-05 05:42:16,038 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:42:18,229 - root - INFO - step: 11600 loss: 2.3596 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0839 +[titan] 2025-10-05 05:42:18,229 - root - INFO - lr: 4.1686e-05 gnorm: 1.13 [ 7:08:09<17:28:13] +[titan] 2025-10-05 05:42:29,091 - root - INFO - step: 11605 loss: 2.3723 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0941 +[titan] 2025-10-05 05:42:29,091 - root - INFO - lr: 4.1680e-05 gnorm: 1.11 [ 7:08:19<17:28:02] +[titan] 2025-10-05 05:42:39,944 - root - INFO - step: 11610 loss: 2.3331 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0601 +[titan] 2025-10-05 05:42:39,944 - root - INFO - lr: 4.1673e-05 gnorm: 1.12 [ 7:08:30<17:27:50] +[titan] 2025-10-05 05:42:50,809 - root - INFO - step: 11615 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0661 +[titan] 2025-10-05 05:42:50,809 - root - INFO - lr: 4.1666e-05 gnorm: 1.14 [ 7:08:41<17:27:39] +[titan] 2025-10-05 05:43:01,660 - root - INFO - step: 11620 loss: 2.3817 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1022 +[titan] 2025-10-05 05:43:01,660 - root - INFO - lr: 4.1659e-05 gnorm: 1.16 [ 7:08:52<17:27:27] +[titan] 2025-10-05 05:43:12,542 - root - INFO - step: 11625 loss: 2.3129 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0422 +[titan] 2025-10-05 05:43:12,542 - root - INFO - lr: 4.1652e-05 gnorm: 1.15 [ 7:09:03<17:27:15] +[titan] 2025-10-05 05:43:23,381 - root - INFO - step: 11630 loss: 2.3032 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 05:43:23,381 - root - INFO - lr: 4.1645e-05 gnorm: 1.17 [ 7:09:14<17:27:04] +[titan] 2025-10-05 05:43:34,203 - root - INFO - step: 11635 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0697 +[titan] 2025-10-05 05:43:34,203 - root - INFO - lr: 4.1638e-05 gnorm: 1.17 [ 7:09:25<17:26:52] +[titan] 2025-10-05 05:43:45,042 - root - INFO - step: 11640 loss: 2.3761 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2782 global_avg_mtp_loss: 2.0979 +[titan] 2025-10-05 05:43:45,042 - root - INFO - lr: 4.1631e-05 gnorm: 1.09 [ 7:09:35<17:26:40] +[titan] 2025-10-05 05:43:55,889 - root - INFO - step: 11645 loss: 2.3366 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 05:43:55,889 - root - INFO - lr: 4.1624e-05 gnorm: 1.12 [ 7:09:46<17:26:29] +[titan] 2025-10-05 05:44:04,549 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:44:06,731 - root - INFO - step: 11650 loss: 2.3745 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0964 +[titan] 2025-10-05 05:44:06,731 - root - INFO - lr: 4.1617e-05 gnorm: 1.13 [ 7:09:57<17:26:17] +[titan] 2025-10-05 05:44:17,623 - root - INFO - step: 11655 loss: 2.3136 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0423 +[titan] 2025-10-05 05:44:17,623 - root - INFO - lr: 4.1610e-05 gnorm: 1.16 [ 7:10:08<17:26:06] +[titan] 2025-10-05 05:44:28,491 - root - INFO - step: 11660 loss: 2.3791 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2800 global_avg_mtp_loss: 2.0991 +[titan] 2025-10-05 05:44:28,491 - root - INFO - lr: 4.1603e-05 gnorm: 1.14 [ 7:10:19<17:25:54] +[titan] 2025-10-05 05:44:39,349 - root - INFO - step: 11665 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0347 +[titan] 2025-10-05 05:44:39,349 - root - INFO - lr: 4.1596e-05 gnorm: 1.14 [ 7:10:30<17:25:43] +[titan] 2025-10-05 05:44:50,212 - root - INFO - step: 11670 loss: 2.2728 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0088 +[titan] 2025-10-05 05:44:50,212 - root - INFO - lr: 4.1589e-05 gnorm: 1.12 [ 7:10:41<17:25:31] +[titan] 2025-10-05 05:45:01,081 - root - INFO - step: 11675 loss: 2.3589 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:45:01,081 - root - INFO - lr: 4.1582e-05 gnorm: 1.11 [ 7:10:51<17:25:19] +[titan] 2025-10-05 05:45:11,965 - root - INFO - step: 11680 loss: 2.3297 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0575 +[titan] 2025-10-05 05:45:11,965 - root - INFO - lr: 4.1575e-05 gnorm: 1.10 [ 7:11:02<17:25:08] +[titan] 2025-10-05 05:45:22,811 - root - INFO - step: 11685 loss: 2.3493 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0746 +[titan] 2025-10-05 05:45:22,811 - root - INFO - lr: 4.1568e-05 gnorm: 1.11 [ 7:11:13<17:24:56] +[titan] 2025-10-05 05:45:33,673 - root - INFO - step: 11690 loss: 2.3753 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2787 global_avg_mtp_loss: 2.0966 +[titan] 2025-10-05 05:45:33,674 - root - INFO - lr: 4.1561e-05 gnorm: 1.10 [ 7:11:24<17:24:45] +[titan] 2025-10-05 05:45:44,536 - root - INFO - step: 11695 loss: 2.3906 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1103 +[titan] 2025-10-05 05:45:44,537 - root - INFO - lr: 4.1554e-05 gnorm: 1.11 [ 7:11:35<17:24:33] +[titan] 2025-10-05 05:45:53,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:45:55,410 - root - INFO - step: 11700 loss: 2.3089 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 05:45:55,410 - root - INFO - lr: 4.1547e-05 gnorm: 1.16 [ 7:11:46<17:24:22] +[titan] 2025-10-05 05:46:06,262 - root - INFO - step: 11705 loss: 2.3134 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 05:46:06,262 - root - INFO - lr: 4.1540e-05 gnorm: 1.11 [ 7:11:57<17:24:10] +[titan] 2025-10-05 05:46:17,130 - root - INFO - step: 11710 loss: 2.3283 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0558 +[titan] 2025-10-05 05:46:17,130 - root - INFO - lr: 4.1534e-05 gnorm: 1.07 [ 7:12:07<17:23:58] +[titan] 2025-10-05 05:46:27,969 - root - INFO - step: 11715 loss: 2.3153 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0455 +[titan] 2025-10-05 05:46:27,969 - root - INFO - lr: 4.1527e-05 gnorm: 1.10 [ 7:12:18<17:23:47] +[titan] 2025-10-05 05:46:38,818 - root - INFO - step: 11720 loss: 2.3973 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2803 global_avg_mtp_loss: 2.1170 +[titan] 2025-10-05 05:46:38,818 - root - INFO - lr: 4.1520e-05 gnorm: 1.16 [ 7:12:29<17:23:35] +[titan] 2025-10-05 05:46:49,675 - root - INFO - step: 11725 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0144 +[titan] 2025-10-05 05:46:49,675 - root - INFO - lr: 4.1513e-05 gnorm: 1.16 [ 7:12:40<17:23:24] +[titan] 2025-10-05 05:47:00,544 - root - INFO - step: 11730 loss: 2.4145 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2833 global_avg_mtp_loss: 2.1312 +[titan] 2025-10-05 05:47:00,544 - root - INFO - lr: 4.1506e-05 gnorm: 1.10 [ 7:12:51<17:23:12] +[titan] 2025-10-05 05:47:11,419 - root - INFO - step: 11735 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0736 +[titan] 2025-10-05 05:47:11,419 - root - INFO - lr: 4.1499e-05 gnorm: 1.08 [ 7:13:02<17:23:00] +[titan] 2025-10-05 05:47:22,265 - root - INFO - step: 11740 loss: 2.3154 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 05:47:22,265 - root - INFO - lr: 4.1492e-05 gnorm: 1.11 [ 7:13:13<17:22:49] +[titan] 2025-10-05 05:47:33,131 - root - INFO - step: 11745 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 05:47:33,132 - root - INFO - lr: 4.1485e-05 gnorm: 1.13 [ 7:13:23<17:22:37] +[titan] 2025-10-05 05:47:41,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:47:43,985 - root - INFO - step: 11750 loss: 2.3279 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0557 +[titan] 2025-10-05 05:47:43,985 - root - INFO - lr: 4.1478e-05 gnorm: 1.13 [ 7:13:34<17:22:26] +[titan] 2025-10-05 05:47:54,868 - root - INFO - step: 11755 loss: 2.3253 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0524 +[titan] 2025-10-05 05:47:54,869 - root - INFO - lr: 4.1471e-05 gnorm: 1.15 [ 7:13:45<17:22:14] +[titan] 2025-10-05 05:48:05,705 - root - INFO - step: 11760 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 05:48:05,705 - root - INFO - lr: 4.1464e-05 gnorm: 1.11 [ 7:13:56<17:22:03] +[titan] 2025-10-05 05:48:16,588 - root - INFO - step: 11765 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0673 +[titan] 2025-10-05 05:48:16,588 - root - INFO - lr: 4.1457e-05 gnorm: 1.08 [ 7:14:07<17:21:51] +[titan] 2025-10-05 05:48:27,456 - root - INFO - step: 11770 loss: 2.3560 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:48:27,456 - root - INFO - lr: 4.1450e-05 gnorm: 1.13 [ 7:14:18<17:21:39] +[titan] 2025-10-05 05:48:38,410 - root - INFO - step: 11775 loss: 2.3373 memory: 118.84GiB(85.28%) tps: 29,914 tflops: 415.01 mfu: 41.96% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 05:48:38,411 - root - INFO - lr: 4.1443e-05 gnorm: 1.12 [ 7:14:29<17:21:28] +[titan] 2025-10-05 05:48:40,781 - root - INFO - Dumping profiler traces at step 11776 +[titan] 2025-10-05 05:48:40,818 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 05:48:49,532 - root - INFO - step: 11780 loss: 2.3261 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0553 +[titan] 2025-10-05 05:48:49,532 - root - INFO - lr: 4.1436e-05 gnorm: 1.10 [ 7:14:40<17:21:17] +[titan] 2025-10-05 05:49:00,425 - root - INFO - step: 11785 loss: 2.3316 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0583 +[titan] 2025-10-05 05:49:00,425 - root - INFO - lr: 4.1429e-05 gnorm: 1.11 [ 7:14:51<17:21:06] +[titan] 2025-10-05 05:49:11,300 - root - INFO - step: 11790 loss: 2.2637 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 05:49:11,301 - root - INFO - lr: 4.1422e-05 gnorm: 1.08 [ 7:15:02<17:20:54] +[titan] 2025-10-05 05:49:22,173 - root - INFO - step: 11795 loss: 2.3956 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1151 +[titan] 2025-10-05 05:49:22,173 - root - INFO - lr: 4.1415e-05 gnorm: 1.13 [ 7:15:12<17:20:43] +[titan] 2025-10-05 05:49:30,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:49:33,049 - root - INFO - step: 11800 loss: 2.3168 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0453 +[titan] 2025-10-05 05:49:33,050 - root - INFO - lr: 4.1408e-05 gnorm: 1.14 [ 7:15:23<17:20:31] +[titan] 2025-10-05 05:49:43,908 - root - INFO - step: 11805 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:49:43,909 - root - INFO - lr: 4.1401e-05 gnorm: 1.11 [ 7:15:34<17:20:20] +[titan] 2025-10-05 05:49:54,777 - root - INFO - step: 11810 loss: 2.4071 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2822 global_avg_mtp_loss: 2.1249 +[titan] 2025-10-05 05:49:54,777 - root - INFO - lr: 4.1394e-05 gnorm: 1.14 [ 7:15:45<17:20:08] +[titan] 2025-10-05 05:50:05,641 - root - INFO - step: 11815 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 05:50:05,641 - root - INFO - lr: 4.1387e-05 gnorm: 1.10 [ 7:15:56<17:19:56] +[titan] 2025-10-05 05:50:16,549 - root - INFO - step: 11820 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 05:50:16,549 - root - INFO - lr: 4.1379e-05 gnorm: 1.14 [ 7:16:07<17:19:45] +[titan] 2025-10-05 05:50:27,410 - root - INFO - step: 11825 loss: 2.3545 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0799 +[titan] 2025-10-05 05:50:27,410 - root - INFO - lr: 4.1372e-05 gnorm: 1.11 [ 7:16:18<17:19:33] +[titan] 2025-10-05 05:50:38,296 - root - INFO - step: 11830 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 05:50:38,297 - root - INFO - lr: 4.1365e-05 gnorm: 1.17 [ 7:16:29<17:19:22] +[titan] 2025-10-05 05:50:49,183 - root - INFO - step: 11835 loss: 2.4085 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1264 +[titan] 2025-10-05 05:50:49,183 - root - INFO - lr: 4.1358e-05 gnorm: 1.12 [ 7:16:39<17:19:10] +[titan] 2025-10-05 05:51:00,086 - root - INFO - step: 11840 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 05:51:00,086 - root - INFO - lr: 4.1351e-05 gnorm: 1.11 [ 7:16:50<17:18:59] +[titan] 2025-10-05 05:51:10,957 - root - INFO - step: 11845 loss: 2.3242 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0501 +[titan] 2025-10-05 05:51:10,957 - root - INFO - lr: 4.1344e-05 gnorm: 1.08 [ 7:17:01<17:18:47] +[titan] 2025-10-05 05:51:19,706 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:51:21,898 - root - INFO - step: 11850 loss: 2.3518 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0767 +[titan] 2025-10-05 05:51:21,898 - root - INFO - lr: 4.1337e-05 gnorm: 1.12 [ 7:17:12<17:18:36] +[titan] 2025-10-05 05:51:32,790 - root - INFO - step: 11855 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 05:51:32,790 - root - INFO - lr: 4.1330e-05 gnorm: 1.12 [ 7:17:23<17:18:24] +[titan] 2025-10-05 05:51:43,664 - root - INFO - step: 11860 loss: 2.3095 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 05:51:43,664 - root - INFO - lr: 4.1323e-05 gnorm: 1.18 [ 7:17:34<17:18:13] +[titan] 2025-10-05 05:51:54,563 - root - INFO - step: 11865 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0318 +[titan] 2025-10-05 05:51:54,563 - root - INFO - lr: 4.1316e-05 gnorm: 1.09 [ 7:17:45<17:18:01] +[titan] 2025-10-05 05:52:05,455 - root - INFO - step: 11870 loss: 2.3710 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2774 global_avg_mtp_loss: 2.0936 +[titan] 2025-10-05 05:52:05,455 - root - INFO - lr: 4.1309e-05 gnorm: 1.11 [ 7:17:56<17:17:50] +[titan] 2025-10-05 05:52:16,379 - root - INFO - step: 11875 loss: 2.3659 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0895 +[titan] 2025-10-05 05:52:16,379 - root - INFO - lr: 4.1302e-05 gnorm: 1.15 [ 7:18:07<17:17:39] +[titan] 2025-10-05 05:52:27,265 - root - INFO - step: 11880 loss: 2.4011 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2876 global_avg_mtp_loss: 2.1135 +[titan] 2025-10-05 05:52:27,265 - root - INFO - lr: 4.1295e-05 gnorm: 3.35 [ 7:18:18<17:17:27] +[titan] 2025-10-05 05:52:38,136 - root - INFO - step: 11885 loss: 2.3238 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0521 +[titan] 2025-10-05 05:52:38,137 - root - INFO - lr: 4.1288e-05 gnorm: 1.14 [ 7:18:28<17:17:16] +[titan] 2025-10-05 05:52:49,001 - root - INFO - step: 11890 loss: 2.3415 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0669 +[titan] 2025-10-05 05:52:49,001 - root - INFO - lr: 4.1281e-05 gnorm: 1.11 [ 7:18:39<17:17:04] +[titan] 2025-10-05 05:52:59,880 - root - INFO - step: 11895 loss: 2.3264 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2720 global_avg_mtp_loss: 2.0545 +[titan] 2025-10-05 05:52:59,880 - root - INFO - lr: 4.1274e-05 gnorm: 1.12 [ 7:18:50<17:16:52] +[titan] 2025-10-05 05:53:08,561 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:53:10,763 - root - INFO - step: 11900 loss: 2.2583 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9932 +[titan] 2025-10-05 05:53:10,763 - root - INFO - lr: 4.1267e-05 gnorm: 1.12 [ 7:19:01<17:16:41] +[titan] 2025-10-05 05:53:21,692 - root - INFO - step: 11905 loss: 2.3128 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 05:53:21,692 - root - INFO - lr: 4.1260e-05 gnorm: 1.14 [ 7:19:12<17:16:30] +[titan] 2025-10-05 05:53:32,550 - root - INFO - step: 11910 loss: 2.3229 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:53:32,550 - root - INFO - lr: 4.1253e-05 gnorm: 1.07 [ 7:19:23<17:16:18] +[titan] 2025-10-05 05:53:43,445 - root - INFO - step: 11915 loss: 2.3927 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2793 global_avg_mtp_loss: 2.1133 +[titan] 2025-10-05 05:53:43,445 - root - INFO - lr: 4.1246e-05 gnorm: 1.12 [ 7:19:34<17:16:07] +[titan] 2025-10-05 05:53:54,326 - root - INFO - step: 11920 loss: 2.4016 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2804 global_avg_mtp_loss: 2.1212 +[titan] 2025-10-05 05:53:54,326 - root - INFO - lr: 4.1239e-05 gnorm: 1.11 [ 7:19:45<17:15:55] +[titan] 2025-10-05 05:54:05,201 - root - INFO - step: 11925 loss: 2.3896 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2790 global_avg_mtp_loss: 2.1106 +[titan] 2025-10-05 05:54:05,201 - root - INFO - lr: 4.1232e-05 gnorm: 1.10 [ 7:19:55<17:15:44] +[titan] 2025-10-05 05:54:16,091 - root - INFO - step: 11930 loss: 2.3474 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:54:16,091 - root - INFO - lr: 4.1224e-05 gnorm: 1.18 [ 7:20:06<17:15:32] +[titan] 2025-10-05 05:54:27,039 - root - INFO - step: 11935 loss: 2.3186 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.27 mfu: 41.99% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 05:54:27,039 - root - INFO - lr: 4.1217e-05 gnorm: 1.13 [ 7:20:17<17:15:21] +[titan] 2025-10-05 05:54:37,903 - root - INFO - step: 11940 loss: 2.4060 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2821 global_avg_mtp_loss: 2.1239 +[titan] 2025-10-05 05:54:37,903 - root - INFO - lr: 4.1210e-05 gnorm: 1.14 [ 7:20:28<17:15:09] +[titan] 2025-10-05 05:54:48,775 - root - INFO - step: 11945 loss: 2.3374 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0641 +[titan] 2025-10-05 05:54:48,775 - root - INFO - lr: 4.1203e-05 gnorm: 1.16 [ 7:20:39<17:14:58] +[titan] 2025-10-05 05:54:57,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:54:59,643 - root - INFO - step: 11950 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0633 +[titan] 2025-10-05 05:54:59,643 - root - INFO - lr: 4.1196e-05 gnorm: 1.10 [ 7:20:50<17:14:46] +[titan] 2025-10-05 05:55:10,528 - root - INFO - step: 11955 loss: 2.3258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2730 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 05:55:10,528 - root - INFO - lr: 4.1189e-05 gnorm: 1.08 [ 7:21:01<17:14:35] +[titan] 2025-10-05 05:55:21,455 - root - INFO - step: 11960 loss: 2.3401 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0654 +[titan] 2025-10-05 05:55:21,456 - root - INFO - lr: 4.1182e-05 gnorm: 1.12 [ 7:21:12<17:14:23] +[titan] 2025-10-05 05:55:32,338 - root - INFO - step: 11965 loss: 2.3022 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 05:55:32,338 - root - INFO - lr: 4.1175e-05 gnorm: 1.06 [ 7:21:23<17:14:12] +[titan] 2025-10-05 05:55:43,237 - root - INFO - step: 11970 loss: 2.3819 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 05:55:43,238 - root - INFO - lr: 4.1168e-05 gnorm: 1.11 [ 7:21:34<17:14:00] +[titan] 2025-10-05 05:55:54,122 - root - INFO - step: 11975 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0302 +[titan] 2025-10-05 05:55:54,122 - root - INFO - lr: 4.1161e-05 gnorm: 1.07 [ 7:21:44<17:13:49] +[titan] 2025-10-05 05:56:04,989 - root - INFO - step: 11980 loss: 2.3476 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2746 global_avg_mtp_loss: 2.0730 +[titan] 2025-10-05 05:56:04,989 - root - INFO - lr: 4.1154e-05 gnorm: 1.08 [ 7:21:55<17:13:37] +[titan] 2025-10-05 05:56:15,876 - root - INFO - step: 11985 loss: 2.3487 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0735 +[titan] 2025-10-05 05:56:15,876 - root - INFO - lr: 4.1147e-05 gnorm: 1.11 [ 7:22:06<17:13:26] +[titan] 2025-10-05 05:56:26,799 - root - INFO - step: 11990 loss: 2.3624 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2771 global_avg_mtp_loss: 2.0853 +[titan] 2025-10-05 05:56:26,799 - root - INFO - lr: 4.1139e-05 gnorm: 1.07 [ 7:22:17<17:13:14] +[titan] 2025-10-05 05:56:37,664 - root - INFO - step: 11995 loss: 2.3352 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2733 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 05:56:37,664 - root - INFO - lr: 4.1132e-05 gnorm: 1.15 [ 7:22:28<17:13:03] +[titan] 2025-10-05 05:56:46,371 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:56:48,559 - root - INFO - step: 12000 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0434 +[titan] 2025-10-05 05:56:48,559 - root - INFO - lr: 4.1125e-05 gnorm: 1.15 [ 7:22:39<17:12:51] +[titan] 2025-10-05 05:56:59,430 - root - INFO - step: 12005 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0683 +[titan] 2025-10-05 05:56:59,431 - root - INFO - lr: 4.1118e-05 gnorm: 1.12 [ 7:22:50<17:12:40] +[titan] 2025-10-05 05:57:10,327 - root - INFO - step: 12010 loss: 2.3294 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0572 +[titan] 2025-10-05 05:57:10,327 - root - INFO - lr: 4.1111e-05 gnorm: 1.11 [ 7:23:01<17:12:28] +[titan] 2025-10-05 05:57:21,254 - root - INFO - step: 12015 loss: 2.3689 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0911 +[titan] 2025-10-05 05:57:21,254 - root - INFO - lr: 4.1104e-05 gnorm: 1.08 [ 7:23:12<17:12:17] +[titan] 2025-10-05 05:57:32,120 - root - INFO - step: 12020 loss: 2.3542 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2761 global_avg_mtp_loss: 2.0781 +[titan] 2025-10-05 05:57:32,120 - root - INFO - lr: 4.1097e-05 gnorm: 1.08 [ 7:23:22<17:12:05] +[titan] 2025-10-05 05:57:43,004 - root - INFO - step: 12025 loss: 2.3233 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 05:57:43,004 - root - INFO - lr: 4.1090e-05 gnorm: 1.13 [ 7:23:33<17:11:54] +[titan] 2025-10-05 05:57:53,894 - root - INFO - step: 12030 loss: 2.3526 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 05:57:53,895 - root - INFO - lr: 4.1083e-05 gnorm: 1.09 [ 7:23:44<17:11:42] +[titan] 2025-10-05 05:58:04,763 - root - INFO - step: 12035 loss: 2.3974 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1179 +[titan] 2025-10-05 05:58:04,764 - root - INFO - lr: 4.1075e-05 gnorm: 1.11 [ 7:23:55<17:11:31] +[titan] 2025-10-05 05:58:15,655 - root - INFO - step: 12040 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 05:58:15,656 - root - INFO - lr: 4.1068e-05 gnorm: 1.13 [ 7:24:06<17:11:19] +[titan] 2025-10-05 05:58:26,581 - root - INFO - step: 12045 loss: 2.2551 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9914 +[titan] 2025-10-05 05:58:26,582 - root - INFO - lr: 4.1061e-05 gnorm: 1.10 [ 7:24:17<17:11:08] +[titan] 2025-10-05 05:58:35,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 05:58:37,445 - root - INFO - step: 12050 loss: 2.2791 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 05:58:37,445 - root - INFO - lr: 4.1054e-05 gnorm: 1.12 [ 7:24:28<17:10:56] +[titan] 2025-10-05 05:58:48,333 - root - INFO - step: 12055 loss: 2.3027 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0322 +[titan] 2025-10-05 05:58:48,334 - root - INFO - lr: 4.1047e-05 gnorm: 1.09 [ 7:24:39<17:10:45] +[titan] 2025-10-05 05:58:59,215 - root - INFO - step: 12060 loss: 2.3599 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2767 global_avg_mtp_loss: 2.0832 +[titan] 2025-10-05 05:58:59,215 - root - INFO - lr: 4.1040e-05 gnorm: 1.13 [ 7:24:49<17:10:34] +[titan] 2025-10-05 05:59:10,066 - root - INFO - step: 12065 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 05:59:10,066 - root - INFO - lr: 4.1033e-05 gnorm: 1.14 [ 7:25:00<17:10:22] +[titan] 2025-10-05 05:59:20,922 - root - INFO - step: 12070 loss: 2.3313 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0585 +[titan] 2025-10-05 05:59:20,923 - root - INFO - lr: 4.1026e-05 gnorm: 1.12 [ 7:25:11<17:10:10] +[titan] 2025-10-05 05:59:31,844 - root - INFO - step: 12075 loss: 2.4140 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2832 global_avg_mtp_loss: 2.1308 +[titan] 2025-10-05 05:59:31,844 - root - INFO - lr: 4.1018e-05 gnorm: 1.14 [ 7:25:22<17:09:59] +[titan] 2025-10-05 05:59:42,686 - root - INFO - step: 12080 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0618 +[titan] 2025-10-05 05:59:42,686 - root - INFO - lr: 4.1011e-05 gnorm: 1.13 [ 7:25:33<17:09:47] +[titan] 2025-10-05 05:59:53,539 - root - INFO - step: 12085 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0449 +[titan] 2025-10-05 05:59:53,540 - root - INFO - lr: 4.1004e-05 gnorm: 1.11 [ 7:25:44<17:09:36] +[titan] 2025-10-05 06:00:04,392 - root - INFO - step: 12090 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0005 +[titan] 2025-10-05 06:00:04,392 - root - INFO - lr: 4.0997e-05 gnorm: 1.08 [ 7:25:55<17:09:24] +[titan] 2025-10-05 06:00:15,254 - root - INFO - step: 12095 loss: 2.3576 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0822 +[titan] 2025-10-05 06:00:15,254 - root - INFO - lr: 4.0990e-05 gnorm: 1.07 [ 7:26:06<17:09:13] +[titan] 2025-10-05 06:00:23,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:00:26,169 - root - INFO - step: 12100 loss: 2.3299 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0570 +[titan] 2025-10-05 06:00:26,169 - root - INFO - lr: 4.0983e-05 gnorm: 1.12 [ 7:26:16<17:09:01] +[titan] 2025-10-05 06:00:37,019 - root - INFO - step: 12105 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 06:00:37,019 - root - INFO - lr: 4.0976e-05 gnorm: 1.10 [ 7:26:27<17:08:50] +[titan] 2025-10-05 06:00:47,875 - root - INFO - step: 12110 loss: 2.3109 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0407 +[titan] 2025-10-05 06:00:47,875 - root - INFO - lr: 4.0968e-05 gnorm: 1.14 [ 7:26:38<17:08:38] +[titan] 2025-10-05 06:00:58,710 - root - INFO - step: 12115 loss: 2.2916 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0231 +[titan] 2025-10-05 06:00:58,711 - root - INFO - lr: 4.0961e-05 gnorm: 1.09 [ 7:26:49<17:08:27] +[titan] 2025-10-05 06:01:09,539 - root - INFO - step: 12120 loss: 2.3227 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0502 +[titan] 2025-10-05 06:01:09,539 - root - INFO - lr: 4.0954e-05 gnorm: 1.11 [ 7:27:00<17:08:15] +[titan] 2025-10-05 06:01:20,374 - root - INFO - step: 12125 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 06:01:20,374 - root - INFO - lr: 4.0947e-05 gnorm: 1.07 [ 7:27:11<17:08:03] +[titan] 2025-10-05 06:01:31,270 - root - INFO - step: 12130 loss: 2.2677 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0025 +[titan] 2025-10-05 06:01:31,270 - root - INFO - lr: 4.0940e-05 gnorm: 1.31 [ 7:27:22<17:07:52] +[titan] 2025-10-05 06:01:42,106 - root - INFO - step: 12135 loss: 2.2796 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:01:42,106 - root - INFO - lr: 4.0933e-05 gnorm: 1.13 [ 7:27:32<17:07:40] +[titan] 2025-10-05 06:01:52,949 - root - INFO - step: 12140 loss: 2.3222 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:01:52,949 - root - INFO - lr: 4.0926e-05 gnorm: 1.09 [ 7:27:43<17:07:29] +[titan] 2025-10-05 06:02:03,787 - root - INFO - step: 12145 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2760 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:02:03,787 - root - INFO - lr: 4.0918e-05 gnorm: 1.12 [ 7:27:54<17:07:17] +[titan] 2025-10-05 06:02:12,468 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:02:14,649 - root - INFO - step: 12150 loss: 2.3633 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2765 global_avg_mtp_loss: 2.0868 +[titan] 2025-10-05 06:02:14,649 - root - INFO - lr: 4.0911e-05 gnorm: 1.10 [ 7:28:05<17:07:06] +[titan] 2025-10-05 06:02:25,544 - root - INFO - step: 12155 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 06:02:25,544 - root - INFO - lr: 4.0904e-05 gnorm: 1.08 [ 7:28:16<17:06:54] +[titan] 2025-10-05 06:02:36,407 - root - INFO - step: 12160 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:02:36,407 - root - INFO - lr: 4.0897e-05 gnorm: 1.12 [ 7:28:27<17:06:43] +[titan] 2025-10-05 06:02:47,265 - root - INFO - step: 12165 loss: 2.3191 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:02:47,265 - root - INFO - lr: 4.0890e-05 gnorm: 1.13 [ 7:28:38<17:06:31] +[titan] 2025-10-05 06:02:58,124 - root - INFO - step: 12170 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0270 +[titan] 2025-10-05 06:02:58,124 - root - INFO - lr: 4.0883e-05 gnorm: 1.13 [ 7:28:48<17:06:20] +[titan] 2025-10-05 06:03:08,999 - root - INFO - step: 12175 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0702 +[titan] 2025-10-05 06:03:08,999 - root - INFO - lr: 4.0875e-05 gnorm: 1.10 [ 7:28:59<17:06:08] +[titan] 2025-10-05 06:03:19,864 - root - INFO - step: 12180 loss: 2.3860 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1055 +[titan] 2025-10-05 06:03:19,864 - root - INFO - lr: 4.0868e-05 gnorm: 1.08 [ 7:29:10<17:05:57] +[titan] 2025-10-05 06:03:30,733 - root - INFO - step: 12185 loss: 2.2786 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 06:03:30,733 - root - INFO - lr: 4.0861e-05 gnorm: 1.09 [ 7:29:21<17:05:45] +[titan] 2025-10-05 06:03:41,601 - root - INFO - step: 12190 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0738 +[titan] 2025-10-05 06:03:41,601 - root - INFO - lr: 4.0854e-05 gnorm: 1.13 [ 7:29:32<17:05:34] +[titan] 2025-10-05 06:03:52,503 - root - INFO - step: 12195 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9906 +[titan] 2025-10-05 06:03:52,503 - root - INFO - lr: 4.0847e-05 gnorm: 1.13 [ 7:29:43<17:05:22] +[titan] 2025-10-05 06:04:01,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:04:03,365 - root - INFO - step: 12200 loss: 2.3747 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2775 global_avg_mtp_loss: 2.0972 +[titan] 2025-10-05 06:04:03,365 - root - INFO - lr: 4.0839e-05 gnorm: 1.12 [ 7:29:54<17:05:11] +[titan] 2025-10-05 06:04:14,208 - root - INFO - step: 12205 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0049 +[titan] 2025-10-05 06:04:14,208 - root - INFO - lr: 4.0832e-05 gnorm: 1.10 [ 7:30:04<17:04:59] +[titan] 2025-10-05 06:04:25,065 - root - INFO - step: 12210 loss: 2.3060 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:04:25,065 - root - INFO - lr: 4.0825e-05 gnorm: 1.06 [ 7:30:15<17:04:48] +[titan] 2025-10-05 06:04:35,929 - root - INFO - step: 12215 loss: 2.2793 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 06:04:35,929 - root - INFO - lr: 4.0818e-05 gnorm: 1.04 [ 7:30:26<17:04:36] +[titan] 2025-10-05 06:04:46,809 - root - INFO - step: 12220 loss: 2.3271 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0559 +[titan] 2025-10-05 06:04:46,809 - root - INFO - lr: 4.0811e-05 gnorm: 1.14 [ 7:30:37<17:04:25] +[titan] 2025-10-05 06:04:57,691 - root - INFO - step: 12225 loss: 2.2624 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9984 +[titan] 2025-10-05 06:04:57,691 - root - INFO - lr: 4.0803e-05 gnorm: 1.17 [ 7:30:48<17:04:13] +[titan] 2025-10-05 06:05:08,549 - root - INFO - step: 12230 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0605 +[titan] 2025-10-05 06:05:08,549 - root - INFO - lr: 4.0796e-05 gnorm: 1.09 [ 7:30:59<17:04:02] +[titan] 2025-10-05 06:05:19,441 - root - INFO - step: 12235 loss: 2.3483 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0739 +[titan] 2025-10-05 06:05:19,441 - root - INFO - lr: 4.0789e-05 gnorm: 1.10 [ 7:31:10<17:03:50] +[titan] 2025-10-05 06:05:30,318 - root - INFO - step: 12240 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 06:05:30,318 - root - INFO - lr: 4.0782e-05 gnorm: 1.09 [ 7:31:21<17:03:39] +[titan] 2025-10-05 06:05:41,191 - root - INFO - step: 12245 loss: 2.3479 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2754 global_avg_mtp_loss: 2.0725 +[titan] 2025-10-05 06:05:41,191 - root - INFO - lr: 4.0775e-05 gnorm: 1.08 [ 7:31:31<17:03:27] +[titan] 2025-10-05 06:05:49,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:05:52,078 - root - INFO - step: 12250 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:05:52,079 - root - INFO - lr: 4.0767e-05 gnorm: 1.17 [ 7:31:42<17:03:16] +[titan] 2025-10-05 06:06:02,966 - root - INFO - step: 12255 loss: 2.3830 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1035 +[titan] 2025-10-05 06:06:02,966 - root - INFO - lr: 4.0760e-05 gnorm: 1.12 [ 7:31:53<17:03:04] +[titan] 2025-10-05 06:06:13,829 - root - INFO - step: 12260 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9702 +[titan] 2025-10-05 06:06:13,829 - root - INFO - lr: 4.0753e-05 gnorm: 1.10 [ 7:32:04<17:02:53] +[titan] 2025-10-05 06:06:24,716 - root - INFO - step: 12265 loss: 2.3897 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2795 global_avg_mtp_loss: 2.1102 +[titan] 2025-10-05 06:06:24,716 - root - INFO - lr: 4.0746e-05 gnorm: 1.13 [ 7:32:15<17:02:41] +[titan] 2025-10-05 06:06:35,605 - root - INFO - step: 12270 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0629 +[titan] 2025-10-05 06:06:35,605 - root - INFO - lr: 4.0739e-05 gnorm: 1.15 [ 7:32:26<17:02:30] +[titan] 2025-10-05 06:06:46,502 - root - INFO - step: 12275 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 06:06:46,502 - root - INFO - lr: 4.0731e-05 gnorm: 1.17 [ 7:32:37<17:02:18] +[titan] 2025-10-05 06:06:57,383 - root - INFO - step: 12280 loss: 2.3419 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2745 global_avg_mtp_loss: 2.0674 +[titan] 2025-10-05 06:06:57,383 - root - INFO - lr: 4.0724e-05 gnorm: 1.16 [ 7:32:48<17:02:07] +[titan] 2025-10-05 06:07:08,352 - root - INFO - step: 12285 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.44 mfu: 41.91% global_avg_ntp_loss: 0.2717 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 06:07:08,353 - root - INFO - lr: 4.0717e-05 gnorm: 1.14 [ 7:32:59<17:01:56] +[titan] 2025-10-05 06:07:15,076 - root - INFO - Dumping profiler traces at step 12288 +[titan] 2025-10-05 06:07:15,116 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:07:19,500 - root - INFO - step: 12290 loss: 2.3565 memory: 118.84GiB(85.28%) tps: 29,395 tflops: 407.81 mfu: 41.23% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:07:19,500 - root - INFO - lr: 4.0710e-05 gnorm: 1.08 [ 7:33:10<17:01:45] +[titan] 2025-10-05 06:07:30,465 - root - INFO - step: 12295 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 29,886 tflops: 414.62 mfu: 41.92% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 06:07:30,465 - root - INFO - lr: 4.0702e-05 gnorm: 1.07 [ 7:33:21<17:01:34] +[titan] 2025-10-05 06:07:39,154 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:07:41,347 - root - INFO - step: 12300 loss: 2.3244 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:07:41,347 - root - INFO - lr: 4.0695e-05 gnorm: 1.16 [ 7:33:32<17:01:22] +[titan] 2025-10-05 06:07:52,196 - root - INFO - step: 12305 loss: 2.3378 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0647 +[titan] 2025-10-05 06:07:52,196 - root - INFO - lr: 4.0688e-05 gnorm: 1.09 [ 7:33:42<17:01:10] +[titan] 2025-10-05 06:08:03,050 - root - INFO - step: 12310 loss: 2.3555 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:08:03,050 - root - INFO - lr: 4.0681e-05 gnorm: 1.12 [ 7:33:53<17:00:59] +[titan] 2025-10-05 06:08:13,913 - root - INFO - step: 12315 loss: 2.3066 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0365 +[titan] 2025-10-05 06:08:13,914 - root - INFO - lr: 4.0674e-05 gnorm: 1.08 [ 7:34:04<17:00:47] +[titan] 2025-10-05 06:08:24,841 - root - INFO - step: 12320 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0745 +[titan] 2025-10-05 06:08:24,842 - root - INFO - lr: 4.0666e-05 gnorm: 1.11 [ 7:34:15<17:00:36] +[titan] 2025-10-05 06:08:35,938 - root - INFO - step: 12325 loss: 2.4352 memory: 118.84GiB(85.28%) tps: 29,531 tflops: 409.69 mfu: 41.42% global_avg_ntp_loss: 0.2851 global_avg_mtp_loss: 2.1501 +[titan] 2025-10-05 06:08:35,938 - root - INFO - lr: 4.0659e-05 gnorm: 1.15 [ 7:34:26<17:00:25] +[titan] 2025-10-05 06:08:46,800 - root - INFO - step: 12330 loss: 2.3228 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0517 +[titan] 2025-10-05 06:08:46,800 - root - INFO - lr: 4.0652e-05 gnorm: 1.09 [ 7:34:37<17:00:14] +[titan] 2025-10-05 06:08:57,665 - root - INFO - step: 12335 loss: 2.3478 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2755 global_avg_mtp_loss: 2.0722 +[titan] 2025-10-05 06:08:57,665 - root - INFO - lr: 4.0645e-05 gnorm: 1.09 [ 7:34:48<17:00:02] +[titan] 2025-10-05 06:09:08,538 - root - INFO - step: 12340 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0577 +[titan] 2025-10-05 06:09:08,539 - root - INFO - lr: 4.0637e-05 gnorm: 1.13 [ 7:34:59<16:59:51] +[titan] 2025-10-05 06:09:19,441 - root - INFO - step: 12345 loss: 2.3988 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2798 global_avg_mtp_loss: 2.1189 +[titan] 2025-10-05 06:09:19,441 - root - INFO - lr: 4.0630e-05 gnorm: 1.13 [ 7:35:10<16:59:39] +[titan] 2025-10-05 06:09:28,177 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:09:30,418 - root - INFO - step: 12350 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.18 mfu: 41.88% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0382 +[titan] 2025-10-05 06:09:30,418 - root - INFO - lr: 4.0623e-05 gnorm: 1.12 [ 7:35:21<16:59:28] +[titan] 2025-10-05 06:09:41,340 - root - INFO - step: 12355 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0087 +[titan] 2025-10-05 06:09:41,340 - root - INFO - lr: 4.0616e-05 gnorm: 1.16 [ 7:35:32<16:59:17] +[titan] 2025-10-05 06:09:52,209 - root - INFO - step: 12360 loss: 2.3088 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0389 +[titan] 2025-10-05 06:09:52,209 - root - INFO - lr: 4.0608e-05 gnorm: 1.09 [ 7:35:42<16:59:05] +[titan] 2025-10-05 06:10:03,072 - root - INFO - step: 12365 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0260 +[titan] 2025-10-05 06:10:03,073 - root - INFO - lr: 4.0601e-05 gnorm: 1.09 [ 7:35:53<16:58:54] +[titan] 2025-10-05 06:10:13,928 - root - INFO - step: 12370 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.35% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 06:10:13,928 - root - INFO - lr: 4.0594e-05 gnorm: 1.09 [ 7:36:04<16:58:42] +[titan] 2025-10-05 06:10:24,802 - root - INFO - step: 12375 loss: 2.3408 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:10:24,803 - root - INFO - lr: 4.0587e-05 gnorm: 1.10 [ 7:36:15<16:58:31] +[titan] 2025-10-05 06:10:35,777 - root - INFO - step: 12380 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 29,858 tflops: 414.24 mfu: 41.88% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:10:35,777 - root - INFO - lr: 4.0579e-05 gnorm: 1.08 [ 7:36:26<16:58:19] +[titan] 2025-10-05 06:10:46,648 - root - INFO - step: 12385 loss: 2.3676 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2768 global_avg_mtp_loss: 2.0908 +[titan] 2025-10-05 06:10:46,649 - root - INFO - lr: 4.0572e-05 gnorm: 1.13 [ 7:36:37<16:58:08] +[titan] 2025-10-05 06:10:57,506 - root - INFO - step: 12390 loss: 2.3730 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2779 global_avg_mtp_loss: 2.0951 +[titan] 2025-10-05 06:10:57,506 - root - INFO - lr: 4.0565e-05 gnorm: 1.14 [ 7:36:48<16:57:56] +[titan] 2025-10-05 06:11:08,373 - root - INFO - step: 12395 loss: 2.3102 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2701 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:11:08,373 - root - INFO - lr: 4.0558e-05 gnorm: 1.06 [ 7:36:59<16:57:45] +[titan] 2025-10-05 06:11:17,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:11:19,239 - root - INFO - step: 12400 loss: 2.3820 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1036 +[titan] 2025-10-05 06:11:19,239 - root - INFO - lr: 4.0550e-05 gnorm: 1.12 [ 7:37:09<16:57:33] +[titan] 2025-10-05 06:11:30,093 - root - INFO - step: 12405 loss: 2.3346 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0597 +[titan] 2025-10-05 06:11:30,094 - root - INFO - lr: 4.0543e-05 gnorm: 1.09 [ 7:37:20<16:57:22] +[titan] 2025-10-05 06:11:41,037 - root - INFO - step: 12410 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0232 +[titan] 2025-10-05 06:11:41,037 - root - INFO - lr: 4.0536e-05 gnorm: 1.14 [ 7:37:31<16:57:10] +[titan] 2025-10-05 06:11:51,926 - root - INFO - step: 12415 loss: 2.3276 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0550 +[titan] 2025-10-05 06:11:51,926 - root - INFO - lr: 4.0528e-05 gnorm: 1.18 [ 7:37:42<16:56:59] +[titan] 2025-10-05 06:12:02,805 - root - INFO - step: 12420 loss: 2.3265 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0542 +[titan] 2025-10-05 06:12:02,806 - root - INFO - lr: 4.0521e-05 gnorm: 1.08 [ 7:37:53<16:56:48] +[titan] 2025-10-05 06:12:13,684 - root - INFO - step: 12425 loss: 2.3185 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0481 +[titan] 2025-10-05 06:12:13,684 - root - INFO - lr: 4.0514e-05 gnorm: 1.14 [ 7:38:04<16:56:36] +[titan] 2025-10-05 06:12:24,578 - root - INFO - step: 12430 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:12:24,578 - root - INFO - lr: 4.0507e-05 gnorm: 1.12 [ 7:38:15<16:56:25] +[titan] 2025-10-05 06:12:35,467 - root - INFO - step: 12435 loss: 2.3505 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0765 +[titan] 2025-10-05 06:12:35,468 - root - INFO - lr: 4.0499e-05 gnorm: 1.10 [ 7:38:26<16:56:13] +[titan] 2025-10-05 06:12:46,337 - root - INFO - step: 12440 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:12:46,337 - root - INFO - lr: 4.0492e-05 gnorm: 1.07 [ 7:38:37<16:56:02] +[titan] 2025-10-05 06:12:57,242 - root - INFO - step: 12445 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 06:12:57,242 - root - INFO - lr: 4.0485e-05 gnorm: 1.08 [ 7:38:47<16:55:50] +[titan] 2025-10-05 06:13:05,952 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:13:08,132 - root - INFO - step: 12450 loss: 2.3232 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0520 +[titan] 2025-10-05 06:13:08,132 - root - INFO - lr: 4.0477e-05 gnorm: 1.09 [ 7:38:58<16:55:39] +[titan] 2025-10-05 06:13:19,019 - root - INFO - step: 12455 loss: 2.3224 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0513 +[titan] 2025-10-05 06:13:19,019 - root - INFO - lr: 4.0470e-05 gnorm: 1.08 [ 7:39:09<16:55:27] +[titan] 2025-10-05 06:13:29,895 - root - INFO - step: 12460 loss: 2.3516 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2763 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:13:29,895 - root - INFO - lr: 4.0463e-05 gnorm: 1.12 [ 7:39:20<16:55:16] +[titan] 2025-10-05 06:13:40,820 - root - INFO - step: 12465 loss: 2.3135 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0425 +[titan] 2025-10-05 06:13:40,820 - root - INFO - lr: 4.0456e-05 gnorm: 1.11 [ 7:39:31<16:55:05] +[titan] 2025-10-05 06:13:51,710 - root - INFO - step: 12470 loss: 2.3792 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1007 +[titan] 2025-10-05 06:13:51,710 - root - INFO - lr: 4.0448e-05 gnorm: 1.07 [ 7:39:42<16:54:53] +[titan] 2025-10-05 06:14:02,592 - root - INFO - step: 12475 loss: 2.3726 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2764 global_avg_mtp_loss: 2.0962 +[titan] 2025-10-05 06:14:02,592 - root - INFO - lr: 4.0441e-05 gnorm: 1.11 [ 7:39:53<16:54:42] +[titan] 2025-10-05 06:14:13,496 - root - INFO - step: 12480 loss: 2.2332 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9712 +[titan] 2025-10-05 06:14:13,497 - root - INFO - lr: 4.0434e-05 gnorm: 1.08 [ 7:40:04<16:54:30] +[titan] 2025-10-05 06:14:24,366 - root - INFO - step: 12485 loss: 2.3235 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0528 +[titan] 2025-10-05 06:14:24,366 - root - INFO - lr: 4.0426e-05 gnorm: 1.11 [ 7:40:15<16:54:19] +[titan] 2025-10-05 06:14:35,268 - root - INFO - step: 12490 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0610 +[titan] 2025-10-05 06:14:35,269 - root - INFO - lr: 4.0419e-05 gnorm: 1.09 [ 7:40:26<16:54:07] +[titan] 2025-10-05 06:14:46,143 - root - INFO - step: 12495 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 06:14:46,144 - root - INFO - lr: 4.0412e-05 gnorm: 1.12 [ 7:40:36<16:53:56] +[titan] 2025-10-05 06:14:54,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:14:57,025 - root - INFO - step: 12500 loss: 2.2990 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0321 +[titan] 2025-10-05 06:14:57,025 - root - INFO - lr: 4.0404e-05 gnorm: 1.12 [ 7:40:47<16:53:45] +[titan] 2025-10-05 06:15:07,897 - root - INFO - step: 12505 loss: 2.3230 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0512 +[titan] 2025-10-05 06:15:07,897 - root - INFO - lr: 4.0397e-05 gnorm: 1.14 [ 7:40:58<16:53:33] +[titan] 2025-10-05 06:15:18,787 - root - INFO - step: 12510 loss: 2.3421 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0680 +[titan] 2025-10-05 06:15:18,787 - root - INFO - lr: 4.0390e-05 gnorm: 1.11 [ 7:41:09<16:53:22] +[titan] 2025-10-05 06:15:29,657 - root - INFO - step: 12515 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:15:29,657 - root - INFO - lr: 4.0383e-05 gnorm: 1.11 [ 7:41:20<16:53:10] +[titan] 2025-10-05 06:15:40,564 - root - INFO - step: 12520 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 06:15:40,564 - root - INFO - lr: 4.0375e-05 gnorm: 1.09 [ 7:41:31<16:52:59] +[titan] 2025-10-05 06:15:51,439 - root - INFO - step: 12525 loss: 2.2600 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 06:15:51,439 - root - INFO - lr: 4.0368e-05 gnorm: 1.12 [ 7:41:42<16:52:47] +[titan] 2025-10-05 06:16:02,309 - root - INFO - step: 12530 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2748 global_avg_mtp_loss: 2.0754 +[titan] 2025-10-05 06:16:02,309 - root - INFO - lr: 4.0361e-05 gnorm: 1.12 [ 7:41:53<16:52:36] +[titan] 2025-10-05 06:16:13,212 - root - INFO - step: 12535 loss: 2.3534 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2744 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:16:13,212 - root - INFO - lr: 4.0353e-05 gnorm: 1.11 [ 7:42:03<16:52:24] +[titan] 2025-10-05 06:16:24,126 - root - INFO - step: 12540 loss: 2.3391 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0651 +[titan] 2025-10-05 06:16:24,127 - root - INFO - lr: 4.0346e-05 gnorm: 1.13 [ 7:42:14<16:52:13] +[titan] 2025-10-05 06:16:35,001 - root - INFO - step: 12545 loss: 2.3246 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 06:16:35,001 - root - INFO - lr: 4.0339e-05 gnorm: 1.12 [ 7:42:25<16:52:02] +[titan] 2025-10-05 06:16:43,712 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:16:45,891 - root - INFO - step: 12550 loss: 2.3363 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0627 +[titan] 2025-10-05 06:16:45,891 - root - INFO - lr: 4.0331e-05 gnorm: 1.10 [ 7:42:36<16:51:50] +[titan] 2025-10-05 06:16:56,777 - root - INFO - step: 12555 loss: 2.2647 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0000 +[titan] 2025-10-05 06:16:56,777 - root - INFO - lr: 4.0324e-05 gnorm: 1.14 [ 7:42:47<16:51:39] +[titan] 2025-10-05 06:17:07,666 - root - INFO - step: 12560 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 06:17:07,666 - root - INFO - lr: 4.0317e-05 gnorm: 1.12 [ 7:42:58<16:51:27] +[titan] 2025-10-05 06:17:18,556 - root - INFO - step: 12565 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:17:18,557 - root - INFO - lr: 4.0309e-05 gnorm: 1.11 [ 7:43:09<16:51:16] +[titan] 2025-10-05 06:17:29,439 - root - INFO - step: 12570 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 06:17:29,439 - root - INFO - lr: 4.0302e-05 gnorm: 1.11 [ 7:43:20<16:51:04] +[titan] 2025-10-05 06:17:40,372 - root - INFO - step: 12575 loss: 2.2819 memory: 118.84GiB(85.28%) tps: 29,972 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 06:17:40,372 - root - INFO - lr: 4.0295e-05 gnorm: 1.11 [ 7:43:31<16:50:53] +[titan] 2025-10-05 06:17:51,237 - root - INFO - step: 12580 loss: 2.3250 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0525 +[titan] 2025-10-05 06:17:51,238 - root - INFO - lr: 4.0287e-05 gnorm: 1.11 [ 7:43:41<16:50:42] +[titan] 2025-10-05 06:18:02,105 - root - INFO - step: 12585 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:18:02,105 - root - INFO - lr: 4.0280e-05 gnorm: 1.09 [ 7:43:52<16:50:30] +[titan] 2025-10-05 06:18:12,984 - root - INFO - step: 12590 loss: 2.3668 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2788 global_avg_mtp_loss: 2.0880 +[titan] 2025-10-05 06:18:12,984 - root - INFO - lr: 4.0273e-05 gnorm: 1.17 [ 7:44:03<16:50:19] +[titan] 2025-10-05 06:18:23,839 - root - INFO - step: 12595 loss: 2.3742 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0956 +[titan] 2025-10-05 06:18:23,839 - root - INFO - lr: 4.0265e-05 gnorm: 1.11 [ 7:44:14<16:50:07] +[titan] 2025-10-05 06:18:32,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:18:34,727 - root - INFO - step: 12600 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9762 +[titan] 2025-10-05 06:18:34,727 - root - INFO - lr: 4.0258e-05 gnorm: 1.14 [ 7:44:25<16:49:56] +[titan] 2025-10-05 06:18:45,664 - root - INFO - step: 12605 loss: 2.3207 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0497 +[titan] 2025-10-05 06:18:45,664 - root - INFO - lr: 4.0250e-05 gnorm: 1.17 [ 7:44:36<16:49:44] +[titan] 2025-10-05 06:18:56,552 - root - INFO - step: 12610 loss: 2.3981 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2805 global_avg_mtp_loss: 2.1176 +[titan] 2025-10-05 06:18:56,552 - root - INFO - lr: 4.0243e-05 gnorm: 1.13 [ 7:44:47<16:49:33] +[titan] 2025-10-05 06:19:07,421 - root - INFO - step: 12615 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 06:19:07,421 - root - INFO - lr: 4.0236e-05 gnorm: 1.09 [ 7:44:58<16:49:22] +[titan] 2025-10-05 06:19:18,306 - root - INFO - step: 12620 loss: 2.3150 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0439 +[titan] 2025-10-05 06:19:18,306 - root - INFO - lr: 4.0228e-05 gnorm: 1.12 [ 7:45:09<16:49:10] +[titan] 2025-10-05 06:19:29,188 - root - INFO - step: 12625 loss: 2.3979 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.1165 +[titan] 2025-10-05 06:19:29,189 - root - INFO - lr: 4.0221e-05 gnorm: 1.12 [ 7:45:19<16:48:59] +[titan] 2025-10-05 06:19:40,105 - root - INFO - step: 12630 loss: 2.2606 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9964 +[titan] 2025-10-05 06:19:40,105 - root - INFO - lr: 4.0214e-05 gnorm: 1.14 [ 7:45:30<16:48:47] +[titan] 2025-10-05 06:19:50,986 - root - INFO - step: 12635 loss: 2.3546 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0790 +[titan] 2025-10-05 06:19:50,986 - root - INFO - lr: 4.0206e-05 gnorm: 1.11 [ 7:45:41<16:48:36] +[titan] 2025-10-05 06:20:01,908 - root - INFO - step: 12640 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:20:01,908 - root - INFO - lr: 4.0199e-05 gnorm: 1.11 [ 7:45:52<16:48:25] +[titan] 2025-10-05 06:20:12,799 - root - INFO - step: 12645 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0332 +[titan] 2025-10-05 06:20:12,799 - root - INFO - lr: 4.0192e-05 gnorm: 1.10 [ 7:46:03<16:48:13] +[titan] 2025-10-05 06:20:21,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:20:23,685 - root - INFO - step: 12650 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 06:20:23,685 - root - INFO - lr: 4.0184e-05 gnorm: 1.11 [ 7:46:14<16:48:02] +[titan] 2025-10-05 06:20:34,581 - root - INFO - step: 12655 loss: 2.2611 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 06:20:34,581 - root - INFO - lr: 4.0177e-05 gnorm: 1.08 [ 7:46:25<16:47:50] +[titan] 2025-10-05 06:20:45,479 - root - INFO - step: 12660 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:20:45,480 - root - INFO - lr: 4.0169e-05 gnorm: 1.09 [ 7:46:36<16:47:39] +[titan] 2025-10-05 06:20:56,352 - root - INFO - step: 12665 loss: 2.3502 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0753 +[titan] 2025-10-05 06:20:56,353 - root - INFO - lr: 4.0162e-05 gnorm: 1.10 [ 7:46:47<16:47:27] +[titan] 2025-10-05 06:21:07,226 - root - INFO - step: 12670 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9534 +[titan] 2025-10-05 06:21:07,226 - root - INFO - lr: 4.0155e-05 gnorm: 1.08 [ 7:46:57<16:47:16] +[titan] 2025-10-05 06:21:18,106 - root - INFO - step: 12675 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9903 +[titan] 2025-10-05 06:21:18,107 - root - INFO - lr: 4.0147e-05 gnorm: 1.16 [ 7:47:08<16:47:05] +[titan] 2025-10-05 06:21:28,978 - root - INFO - step: 12680 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 06:21:28,978 - root - INFO - lr: 4.0140e-05 gnorm: 1.11 [ 7:47:19<16:46:53] +[titan] 2025-10-05 06:21:39,844 - root - INFO - step: 12685 loss: 2.3348 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0622 +[titan] 2025-10-05 06:21:39,844 - root - INFO - lr: 4.0133e-05 gnorm: 1.13 [ 7:47:30<16:46:42] +[titan] 2025-10-05 06:21:50,731 - root - INFO - step: 12690 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2750 global_avg_mtp_loss: 2.0771 +[titan] 2025-10-05 06:21:50,731 - root - INFO - lr: 4.0125e-05 gnorm: 1.14 [ 7:47:41<16:46:30] +[titan] 2025-10-05 06:22:01,611 - root - INFO - step: 12695 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 06:22:01,612 - root - INFO - lr: 4.0118e-05 gnorm: 1.10 [ 7:47:52<16:46:19] +[titan] 2025-10-05 06:22:10,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:22:12,500 - root - INFO - step: 12700 loss: 2.3396 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2742 global_avg_mtp_loss: 2.0653 +[titan] 2025-10-05 06:22:12,501 - root - INFO - lr: 4.0110e-05 gnorm: 1.11 [ 7:48:03<16:46:07] +[titan] 2025-10-05 06:22:23,372 - root - INFO - step: 12705 loss: 2.2813 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0153 +[titan] 2025-10-05 06:22:23,372 - root - INFO - lr: 4.0103e-05 gnorm: 1.11 [ 7:48:14<16:45:56] +[titan] 2025-10-05 06:22:34,241 - root - INFO - step: 12710 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:22:34,241 - root - INFO - lr: 4.0096e-05 gnorm: 1.10 [ 7:48:24<16:45:44] +[titan] 2025-10-05 06:22:45,141 - root - INFO - step: 12715 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0103 +[titan] 2025-10-05 06:22:45,141 - root - INFO - lr: 4.0088e-05 gnorm: 1.14 [ 7:48:35<16:45:33] +[titan] 2025-10-05 06:22:56,018 - root - INFO - step: 12720 loss: 2.2452 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 06:22:56,018 - root - INFO - lr: 4.0081e-05 gnorm: 1.10 [ 7:48:46<16:45:22] +[titan] 2025-10-05 06:23:06,904 - root - INFO - step: 12725 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0456 +[titan] 2025-10-05 06:23:06,904 - root - INFO - lr: 4.0073e-05 gnorm: 1.10 [ 7:48:57<16:45:10] +[titan] 2025-10-05 06:23:17,777 - root - INFO - step: 12730 loss: 2.3547 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0804 +[titan] 2025-10-05 06:23:17,778 - root - INFO - lr: 4.0066e-05 gnorm: 1.09 [ 7:49:08<16:44:59] +[titan] 2025-10-05 06:23:28,700 - root - INFO - step: 12735 loss: 2.4579 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2865 global_avg_mtp_loss: 2.1714 +[titan] 2025-10-05 06:23:28,700 - root - INFO - lr: 4.0059e-05 gnorm: 1.12 [ 7:49:19<16:44:47] +[titan] 2025-10-05 06:23:39,577 - root - INFO - step: 12740 loss: 2.2807 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 06:23:39,577 - root - INFO - lr: 4.0051e-05 gnorm: 1.08 [ 7:49:30<16:44:36] +[titan] 2025-10-05 06:23:50,466 - root - INFO - step: 12745 loss: 2.2580 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9955 +[titan] 2025-10-05 06:23:50,467 - root - INFO - lr: 4.0044e-05 gnorm: 1.13 [ 7:49:41<16:44:25] +[titan] 2025-10-05 06:23:59,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:24:01,332 - root - INFO - step: 12750 loss: 2.3550 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0792 +[titan] 2025-10-05 06:24:01,332 - root - INFO - lr: 4.0036e-05 gnorm: 1.16 [ 7:49:52<16:44:13] +[titan] 2025-10-05 06:24:12,211 - root - INFO - step: 12755 loss: 2.3122 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 06:24:12,211 - root - INFO - lr: 4.0029e-05 gnorm: 1.10 [ 7:50:02<16:44:02] +[titan] 2025-10-05 06:24:23,070 - root - INFO - step: 12760 loss: 2.2722 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 06:24:23,070 - root - INFO - lr: 4.0022e-05 gnorm: 1.11 [ 7:50:13<16:43:50] +[titan] 2025-10-05 06:24:33,960 - root - INFO - step: 12765 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0308 +[titan] 2025-10-05 06:24:33,960 - root - INFO - lr: 4.0014e-05 gnorm: 1.11 [ 7:50:24<16:43:39] +[titan] 2025-10-05 06:24:44,855 - root - INFO - step: 12770 loss: 2.2882 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0202 +[titan] 2025-10-05 06:24:44,855 - root - INFO - lr: 4.0007e-05 gnorm: 1.10 [ 7:50:35<16:43:27] +[titan] 2025-10-05 06:24:55,732 - root - INFO - step: 12775 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 06:24:55,732 - root - INFO - lr: 3.9999e-05 gnorm: 1.13 [ 7:50:46<16:43:16] +[titan] 2025-10-05 06:25:06,578 - root - INFO - step: 12780 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:06,578 - root - INFO - lr: 3.9992e-05 gnorm: 1.10 [ 7:50:57<16:43:04] +[titan] 2025-10-05 06:25:17,446 - root - INFO - step: 12785 loss: 2.3443 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0703 +[titan] 2025-10-05 06:25:17,446 - root - INFO - lr: 3.9984e-05 gnorm: 1.15 [ 7:51:08<16:42:53] +[titan] 2025-10-05 06:25:28,322 - root - INFO - step: 12790 loss: 2.1995 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 06:25:28,322 - root - INFO - lr: 3.9977e-05 gnorm: 1.08 [ 7:51:19<16:42:42] +[titan] 2025-10-05 06:25:39,196 - root - INFO - step: 12795 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2762 global_avg_mtp_loss: 2.0818 +[titan] 2025-10-05 06:25:39,196 - root - INFO - lr: 3.9970e-05 gnorm: 1.11 [ 7:51:29<16:42:30] +[titan] 2025-10-05 06:25:48,007 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:25:50,190 - root - INFO - step: 12800 loss: 2.2572 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.49 mfu: 41.81% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 06:25:50,191 - root - INFO - lr: 3.9962e-05 gnorm: 1.12 [ 7:51:40<16:42:19] +[titan] 2025-10-05 06:25:50,378 - root - INFO - Dumping profiler traces at step 12800 +[titan] 2025-10-05 06:25:50,417 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:26:01,279 - root - INFO - step: 12805 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 29,551 tflops: 409.98 mfu: 41.45% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0714 +[titan] 2025-10-05 06:26:01,280 - root - INFO - lr: 3.9955e-05 gnorm: 1.13 [ 7:51:51<16:42:08] +[titan] 2025-10-05 06:26:12,154 - root - INFO - step: 12810 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0265 +[titan] 2025-10-05 06:26:12,155 - root - INFO - lr: 3.9947e-05 gnorm: 1.08 [ 7:52:02<16:41:56] +[titan] 2025-10-05 06:26:23,032 - root - INFO - step: 12815 loss: 2.3306 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0579 +[titan] 2025-10-05 06:26:23,033 - root - INFO - lr: 3.9940e-05 gnorm: 1.06 [ 7:52:13<16:41:45] +[titan] 2025-10-05 06:26:33,940 - root - INFO - step: 12820 loss: 2.3775 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2786 global_avg_mtp_loss: 2.0989 +[titan] 2025-10-05 06:26:33,941 - root - INFO - lr: 3.9932e-05 gnorm: 1.15 [ 7:52:24<16:41:34] +[titan] 2025-10-05 06:26:44,836 - root - INFO - step: 12825 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0008 +[titan] 2025-10-05 06:26:44,836 - root - INFO - lr: 3.9925e-05 gnorm: 1.05 [ 7:52:35<16:41:22] +[titan] 2025-10-05 06:26:55,799 - root - INFO - step: 12830 loss: 2.3367 memory: 118.84GiB(85.28%) tps: 29,891 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2737 global_avg_mtp_loss: 2.0630 +[titan] 2025-10-05 06:26:55,799 - root - INFO - lr: 3.9918e-05 gnorm: 1.14 [ 7:52:46<16:41:11] +[titan] 2025-10-05 06:27:06,678 - root - INFO - step: 12835 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 06:27:06,678 - root - INFO - lr: 3.9910e-05 gnorm: 1.07 [ 7:52:57<16:41:00] +[titan] 2025-10-05 06:27:17,590 - root - INFO - step: 12840 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0145 +[titan] 2025-10-05 06:27:17,590 - root - INFO - lr: 3.9903e-05 gnorm: 1.10 [ 7:53:08<16:40:48] +[titan] 2025-10-05 06:27:28,495 - root - INFO - step: 12845 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0092 +[titan] 2025-10-05 06:27:28,496 - root - INFO - lr: 3.9895e-05 gnorm: 1.10 [ 7:53:19<16:40:37] +[titan] 2025-10-05 06:27:37,188 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:27:39,368 - root - INFO - step: 12850 loss: 2.2958 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2682 global_avg_mtp_loss: 2.0276 +[titan] 2025-10-05 06:27:39,368 - root - INFO - lr: 3.9888e-05 gnorm: 1.09 [ 7:53:30<16:40:25] +[titan] 2025-10-05 06:27:50,338 - root - INFO - step: 12855 loss: 2.2825 memory: 118.84GiB(85.28%) tps: 29,871 tflops: 414.41 mfu: 41.90% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0128 +[titan] 2025-10-05 06:27:50,339 - root - INFO - lr: 3.9880e-05 gnorm: 1.16 [ 7:53:41<16:40:14] +[titan] 2025-10-05 06:28:01,245 - root - INFO - step: 12860 loss: 2.3056 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:28:01,245 - root - INFO - lr: 3.9873e-05 gnorm: 1.08 [ 7:53:51<16:40:03] +[titan] 2025-10-05 06:28:12,135 - root - INFO - step: 12865 loss: 2.2550 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9912 +[titan] 2025-10-05 06:28:12,135 - root - INFO - lr: 3.9865e-05 gnorm: 1.10 [ 7:54:02<16:39:51] +[titan] 2025-10-05 06:28:23,005 - root - INFO - step: 12870 loss: 2.3501 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2752 global_avg_mtp_loss: 2.0749 +[titan] 2025-10-05 06:28:23,005 - root - INFO - lr: 3.9858e-05 gnorm: 1.07 [ 7:54:13<16:39:40] +[titan] 2025-10-05 06:28:33,877 - root - INFO - step: 12875 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0229 +[titan] 2025-10-05 06:28:33,877 - root - INFO - lr: 3.9850e-05 gnorm: 1.13 [ 7:54:24<16:39:29] +[titan] 2025-10-05 06:28:44,761 - root - INFO - step: 12880 loss: 2.3117 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:28:44,761 - root - INFO - lr: 3.9843e-05 gnorm: 1.15 [ 7:54:35<16:39:17] +[titan] 2025-10-05 06:28:55,685 - root - INFO - step: 12885 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:28:55,685 - root - INFO - lr: 3.9836e-05 gnorm: 1.11 [ 7:54:46<16:39:06] +[titan] 2025-10-05 06:29:06,556 - root - INFO - step: 12890 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:29:06,556 - root - INFO - lr: 3.9828e-05 gnorm: 1.12 [ 7:54:57<16:38:54] +[titan] 2025-10-05 06:29:17,466 - root - INFO - step: 12895 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:29:17,467 - root - INFO - lr: 3.9821e-05 gnorm: 1.11 [ 7:55:08<16:38:43] +[titan] 2025-10-05 06:29:26,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:29:28,342 - root - INFO - step: 12900 loss: 2.3579 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0826 +[titan] 2025-10-05 06:29:28,342 - root - INFO - lr: 3.9813e-05 gnorm: 1.11 [ 7:55:19<16:38:32] +[titan] 2025-10-05 06:29:39,206 - root - INFO - step: 12905 loss: 2.2414 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 06:29:39,206 - root - INFO - lr: 3.9806e-05 gnorm: 1.08 [ 7:55:29<16:38:20] +[titan] 2025-10-05 06:29:50,114 - root - INFO - step: 12910 loss: 2.2702 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:29:50,114 - root - INFO - lr: 3.9798e-05 gnorm: 1.13 [ 7:55:40<16:38:09] +[titan] 2025-10-05 06:30:00,993 - root - INFO - step: 12915 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0072 +[titan] 2025-10-05 06:30:00,993 - root - INFO - lr: 3.9791e-05 gnorm: 1.07 [ 7:55:51<16:37:57] +[titan] 2025-10-05 06:30:11,897 - root - INFO - step: 12920 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0307 +[titan] 2025-10-05 06:30:11,898 - root - INFO - lr: 3.9783e-05 gnorm: 1.04 [ 7:56:02<16:37:46] +[titan] 2025-10-05 06:30:22,817 - root - INFO - step: 12925 loss: 2.3288 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0581 +[titan] 2025-10-05 06:30:22,817 - root - INFO - lr: 3.9776e-05 gnorm: 1.13 [ 7:56:13<16:37:35] +[titan] 2025-10-05 06:30:33,737 - root - INFO - step: 12930 loss: 2.3573 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2758 global_avg_mtp_loss: 2.0815 +[titan] 2025-10-05 06:30:33,737 - root - INFO - lr: 3.9768e-05 gnorm: 1.13 [ 7:56:24<16:37:23] +[titan] 2025-10-05 06:30:44,627 - root - INFO - step: 12935 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0032 +[titan] 2025-10-05 06:30:44,627 - root - INFO - lr: 3.9761e-05 gnorm: 1.10 [ 7:56:35<16:37:12] +[titan] 2025-10-05 06:30:55,585 - root - INFO - step: 12940 loss: 2.3356 memory: 118.84GiB(85.28%) tps: 29,903 tflops: 414.86 mfu: 41.95% global_avg_ntp_loss: 0.2719 global_avg_mtp_loss: 2.0636 +[titan] 2025-10-05 06:30:55,586 - root - INFO - lr: 3.9753e-05 gnorm: 1.11 [ 7:56:46<16:37:01] +[titan] 2025-10-05 06:31:06,518 - root - INFO - step: 12945 loss: 2.2859 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:31:06,518 - root - INFO - lr: 3.9746e-05 gnorm: 1.10 [ 7:56:57<16:36:49] +[titan] 2025-10-05 06:31:15,250 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:31:17,447 - root - INFO - step: 12950 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0289 +[titan] 2025-10-05 06:31:17,447 - root - INFO - lr: 3.9738e-05 gnorm: 1.11 [ 7:57:08<16:36:38] +[titan] 2025-10-05 06:31:28,381 - root - INFO - step: 12955 loss: 2.3005 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 06:31:28,382 - root - INFO - lr: 3.9731e-05 gnorm: 1.07 [ 7:57:19<16:36:27] +[titan] 2025-10-05 06:31:39,314 - root - INFO - step: 12960 loss: 2.3340 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.83 mfu: 42.05% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0616 +[titan] 2025-10-05 06:31:39,314 - root - INFO - lr: 3.9723e-05 gnorm: 1.13 [ 7:57:30<16:36:15] +[titan] 2025-10-05 06:31:50,231 - root - INFO - step: 12965 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 06:31:50,231 - root - INFO - lr: 3.9716e-05 gnorm: 1.14 [ 7:57:40<16:36:04] +[titan] 2025-10-05 06:32:01,132 - root - INFO - step: 12970 loss: 2.3312 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:32:01,132 - root - INFO - lr: 3.9708e-05 gnorm: 1.16 [ 7:57:51<16:35:53] +[titan] 2025-10-05 06:32:12,023 - root - INFO - step: 12975 loss: 2.2497 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 06:32:12,023 - root - INFO - lr: 3.9701e-05 gnorm: 1.13 [ 7:58:02<16:35:41] +[titan] 2025-10-05 06:32:22,921 - root - INFO - step: 12980 loss: 2.2994 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0314 +[titan] 2025-10-05 06:32:22,922 - root - INFO - lr: 3.9693e-05 gnorm: 1.08 [ 7:58:13<16:35:30] +[titan] 2025-10-05 06:32:33,791 - root - INFO - step: 12985 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 06:32:33,791 - root - INFO - lr: 3.9686e-05 gnorm: 1.06 [ 7:58:24<16:35:19] +[titan] 2025-10-05 06:32:44,706 - root - INFO - step: 12990 loss: 2.3628 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2756 global_avg_mtp_loss: 2.0872 +[titan] 2025-10-05 06:32:44,706 - root - INFO - lr: 3.9678e-05 gnorm: 1.12 [ 7:58:35<16:35:07] +[titan] 2025-10-05 06:32:55,609 - root - INFO - step: 12995 loss: 2.2830 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:32:55,609 - root - INFO - lr: 3.9671e-05 gnorm: 1.13 [ 7:58:46<16:34:56] +[titan] 2025-10-05 06:33:04,309 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:33:06,487 - root - INFO - step: 13000 loss: 2.2887 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:33:06,487 - root - INFO - lr: 3.9663e-05 gnorm: 1.09 [ 7:58:57<16:34:44] +[titan] 2025-10-05 06:33:17,365 - root - INFO - step: 13005 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9733 +[titan] 2025-10-05 06:33:17,365 - root - INFO - lr: 3.9656e-05 gnorm: 1.09 [ 7:59:08<16:34:33] +[titan] 2025-10-05 06:33:28,255 - root - INFO - step: 13010 loss: 2.3417 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2740 global_avg_mtp_loss: 2.0677 +[titan] 2025-10-05 06:33:28,255 - root - INFO - lr: 3.9648e-05 gnorm: 1.10 [ 7:59:18<16:34:22] +[titan] 2025-10-05 06:33:39,107 - root - INFO - step: 13015 loss: 2.3870 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2785 global_avg_mtp_loss: 2.1086 +[titan] 2025-10-05 06:33:39,107 - root - INFO - lr: 3.9641e-05 gnorm: 1.14 [ 7:59:29<16:34:10] +[titan] 2025-10-05 06:33:49,999 - root - INFO - step: 13020 loss: 2.2362 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9754 +[titan] 2025-10-05 06:33:49,999 - root - INFO - lr: 3.9633e-05 gnorm: 1.04 [ 7:59:40<16:33:59] +[titan] 2025-10-05 06:34:00,906 - root - INFO - step: 13025 loss: 2.3058 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0372 +[titan] 2025-10-05 06:34:00,906 - root - INFO - lr: 3.9626e-05 gnorm: 1.10 [ 7:59:51<16:33:47] +[titan] 2025-10-05 06:34:11,756 - root - INFO - step: 13030 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9901 +[titan] 2025-10-05 06:34:11,756 - root - INFO - lr: 3.9618e-05 gnorm: 1.10 [ 8:00:02<16:33:36] +[titan] 2025-10-05 06:34:22,620 - root - INFO - step: 13035 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 06:34:22,620 - root - INFO - lr: 3.9611e-05 gnorm: 1.07 [ 8:00:13<16:33:24] +[titan] 2025-10-05 06:34:33,499 - root - INFO - step: 13040 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:34:33,499 - root - INFO - lr: 3.9603e-05 gnorm: 1.10 [ 8:00:24<16:33:13] +[titan] 2025-10-05 06:34:44,364 - root - INFO - step: 13045 loss: 2.3062 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0375 +[titan] 2025-10-05 06:34:44,365 - root - INFO - lr: 3.9596e-05 gnorm: 1.12 [ 8:00:35<16:33:02] +[titan] 2025-10-05 06:34:53,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:34:55,288 - root - INFO - step: 13050 loss: 2.2984 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:34:55,289 - root - INFO - lr: 3.9588e-05 gnorm: 1.10 [ 8:00:45<16:32:50] +[titan] 2025-10-05 06:35:06,196 - root - INFO - step: 13055 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9765 +[titan] 2025-10-05 06:35:06,196 - root - INFO - lr: 3.9581e-05 gnorm: 1.05 [ 8:00:56<16:32:39] +[titan] 2025-10-05 06:35:17,080 - root - INFO - step: 13060 loss: 2.3351 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0619 +[titan] 2025-10-05 06:35:17,080 - root - INFO - lr: 3.9573e-05 gnorm: 1.10 [ 8:01:07<16:32:28] +[titan] 2025-10-05 06:35:27,969 - root - INFO - step: 13065 loss: 2.2499 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:35:27,969 - root - INFO - lr: 3.9566e-05 gnorm: 1.09 [ 8:01:18<16:32:16] +[titan] 2025-10-05 06:35:38,866 - root - INFO - step: 13070 loss: 2.3466 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2749 global_avg_mtp_loss: 2.0717 +[titan] 2025-10-05 06:35:38,867 - root - INFO - lr: 3.9558e-05 gnorm: 1.12 [ 8:01:29<16:32:05] +[titan] 2025-10-05 06:35:49,752 - root - INFO - step: 13075 loss: 2.3177 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2718 global_avg_mtp_loss: 2.0458 +[titan] 2025-10-05 06:35:49,752 - root - INFO - lr: 3.9551e-05 gnorm: 1.11 [ 8:01:40<16:31:53] +[titan] 2025-10-05 06:36:00,668 - root - INFO - step: 13080 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 06:36:00,669 - root - INFO - lr: 3.9543e-05 gnorm: 1.07 [ 8:01:51<16:31:42] +[titan] 2025-10-05 06:36:11,580 - root - INFO - step: 13085 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 06:36:11,580 - root - INFO - lr: 3.9535e-05 gnorm: 1.11 [ 8:02:02<16:31:31] +[titan] 2025-10-05 06:36:22,465 - root - INFO - step: 13090 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9874 +[titan] 2025-10-05 06:36:22,465 - root - INFO - lr: 3.9528e-05 gnorm: 1.09 [ 8:02:13<16:31:19] +[titan] 2025-10-05 06:36:33,326 - root - INFO - step: 13095 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9630 +[titan] 2025-10-05 06:36:33,326 - root - INFO - lr: 3.9520e-05 gnorm: 1.10 [ 8:02:24<16:31:08] +[titan] 2025-10-05 06:36:41,994 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:36:44,174 - root - INFO - step: 13100 loss: 2.3105 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0398 +[titan] 2025-10-05 06:36:44,174 - root - INFO - lr: 3.9513e-05 gnorm: 1.14 [ 8:02:34<16:30:56] +[titan] 2025-10-05 06:36:55,075 - root - INFO - step: 13105 loss: 2.3106 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0393 +[titan] 2025-10-05 06:36:55,075 - root - INFO - lr: 3.9505e-05 gnorm: 1.18 [ 8:02:45<16:30:45] +[titan] 2025-10-05 06:37:05,918 - root - INFO - step: 13110 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 06:37:05,918 - root - INFO - lr: 3.9498e-05 gnorm: 1.08 [ 8:02:56<16:30:34] +[titan] 2025-10-05 06:37:16,786 - root - INFO - step: 13115 loss: 2.2582 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 06:37:16,786 - root - INFO - lr: 3.9490e-05 gnorm: 1.10 [ 8:03:07<16:30:22] +[titan] 2025-10-05 06:37:27,685 - root - INFO - step: 13120 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0414 +[titan] 2025-10-05 06:37:27,685 - root - INFO - lr: 3.9483e-05 gnorm: 1.11 [ 8:03:18<16:30:11] +[titan] 2025-10-05 06:37:38,554 - root - INFO - step: 13125 loss: 2.3124 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0426 +[titan] 2025-10-05 06:37:38,554 - root - INFO - lr: 3.9475e-05 gnorm: 1.10 [ 8:03:29<16:29:59] +[titan] 2025-10-05 06:37:49,418 - root - INFO - step: 13130 loss: 2.3195 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0480 +[titan] 2025-10-05 06:37:49,418 - root - INFO - lr: 3.9468e-05 gnorm: 1.09 [ 8:03:40<16:29:48] +[titan] 2025-10-05 06:38:00,337 - root - INFO - step: 13135 loss: 2.2981 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0293 +[titan] 2025-10-05 06:38:00,337 - root - INFO - lr: 3.9460e-05 gnorm: 1.09 [ 8:03:51<16:29:37] +[titan] 2025-10-05 06:38:11,199 - root - INFO - step: 13140 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9924 +[titan] 2025-10-05 06:38:11,200 - root - INFO - lr: 3.9452e-05 gnorm: 1.06 [ 8:04:01<16:29:25] +[titan] 2025-10-05 06:38:22,080 - root - INFO - step: 13145 loss: 2.3521 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2759 global_avg_mtp_loss: 2.0762 +[titan] 2025-10-05 06:38:22,080 - root - INFO - lr: 3.9445e-05 gnorm: 1.08 [ 8:04:12<16:29:14] +[titan] 2025-10-05 06:38:30,798 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:38:32,990 - root - INFO - step: 13150 loss: 2.2897 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0219 +[titan] 2025-10-05 06:38:32,990 - root - INFO - lr: 3.9437e-05 gnorm: 1.12 [ 8:04:23<16:29:02] +[titan] 2025-10-05 06:38:43,859 - root - INFO - step: 13155 loss: 2.2817 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0147 +[titan] 2025-10-05 06:38:43,859 - root - INFO - lr: 3.9430e-05 gnorm: 1.08 [ 8:04:34<16:28:51] +[titan] 2025-10-05 06:38:54,735 - root - INFO - step: 13160 loss: 2.3131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0428 +[titan] 2025-10-05 06:38:54,736 - root - INFO - lr: 3.9422e-05 gnorm: 1.11 [ 8:04:45<16:28:40] +[titan] 2025-10-05 06:39:05,628 - root - INFO - step: 13165 loss: 2.3221 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0518 +[titan] 2025-10-05 06:39:05,629 - root - INFO - lr: 3.9415e-05 gnorm: 1.10 [ 8:04:56<16:28:28] +[titan] 2025-10-05 06:39:16,489 - root - INFO - step: 13170 loss: 2.3292 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0586 +[titan] 2025-10-05 06:39:16,489 - root - INFO - lr: 3.9407e-05 gnorm: 1.11 [ 8:05:07<16:28:17] +[titan] 2025-10-05 06:39:27,377 - root - INFO - step: 13175 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9764 +[titan] 2025-10-05 06:39:27,377 - root - INFO - lr: 3.9399e-05 gnorm: 1.07 [ 8:05:18<16:28:05] +[titan] 2025-10-05 06:39:38,260 - root - INFO - step: 13180 loss: 2.2929 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0261 +[titan] 2025-10-05 06:39:38,260 - root - INFO - lr: 3.9392e-05 gnorm: 1.18 [ 8:05:28<16:27:54] +[titan] 2025-10-05 06:39:49,151 - root - INFO - step: 13185 loss: 2.2880 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0212 +[titan] 2025-10-05 06:39:49,152 - root - INFO - lr: 3.9384e-05 gnorm: 1.13 [ 8:05:39<16:27:43] +[titan] 2025-10-05 06:40:00,050 - root - INFO - step: 13190 loss: 2.3580 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0827 +[titan] 2025-10-05 06:40:00,050 - root - INFO - lr: 3.9377e-05 gnorm: 1.11 [ 8:05:50<16:27:31] +[titan] 2025-10-05 06:40:10,934 - root - INFO - step: 13195 loss: 2.2570 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9939 +[titan] 2025-10-05 06:40:10,934 - root - INFO - lr: 3.9369e-05 gnorm: 1.10 [ 8:06:01<16:27:20] +[titan] 2025-10-05 06:40:19,642 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:40:21,819 - root - INFO - step: 13200 loss: 2.2675 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0021 +[titan] 2025-10-05 06:40:21,820 - root - INFO - lr: 3.9362e-05 gnorm: 1.13 [ 8:06:12<16:27:08] +[titan] 2025-10-05 06:40:32,683 - root - INFO - step: 13205 loss: 2.3004 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0299 +[titan] 2025-10-05 06:40:32,683 - root - INFO - lr: 3.9354e-05 gnorm: 1.11 [ 8:06:23<16:26:57] +[titan] 2025-10-05 06:40:43,552 - root - INFO - step: 13210 loss: 2.3321 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0596 +[titan] 2025-10-05 06:40:43,553 - root - INFO - lr: 3.9346e-05 gnorm: 1.09 [ 8:06:34<16:26:46] +[titan] 2025-10-05 06:40:54,441 - root - INFO - step: 13215 loss: 2.3746 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2781 global_avg_mtp_loss: 2.0965 +[titan] 2025-10-05 06:40:54,441 - root - INFO - lr: 3.9339e-05 gnorm: 1.09 [ 8:06:45<16:26:34] +[titan] 2025-10-05 06:41:05,315 - root - INFO - step: 13220 loss: 2.3394 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2734 global_avg_mtp_loss: 2.0660 +[titan] 2025-10-05 06:41:05,315 - root - INFO - lr: 3.9331e-05 gnorm: 1.13 [ 8:06:55<16:26:23] +[titan] 2025-10-05 06:41:16,174 - root - INFO - step: 13225 loss: 2.2522 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 06:41:16,174 - root - INFO - lr: 3.9324e-05 gnorm: 1.10 [ 8:07:06<16:26:11] +[titan] 2025-10-05 06:41:27,031 - root - INFO - step: 13230 loss: 2.2903 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:41:27,031 - root - INFO - lr: 3.9316e-05 gnorm: 1.10 [ 8:07:17<16:26:00] +[titan] 2025-10-05 06:41:37,890 - root - INFO - step: 13235 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0078 +[titan] 2025-10-05 06:41:37,890 - root - INFO - lr: 3.9308e-05 gnorm: 1.09 [ 8:07:28<16:25:49] +[titan] 2025-10-05 06:41:48,764 - root - INFO - step: 13240 loss: 2.3152 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0441 +[titan] 2025-10-05 06:41:48,764 - root - INFO - lr: 3.9301e-05 gnorm: 1.10 [ 8:07:39<16:25:37] +[titan] 2025-10-05 06:41:59,671 - root - INFO - step: 13245 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 2.0046 +[titan] 2025-10-05 06:41:59,672 - root - INFO - lr: 3.9293e-05 gnorm: 1.13 [ 8:07:50<16:25:26] +[titan] 2025-10-05 06:42:08,367 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:42:10,557 - root - INFO - step: 13250 loss: 2.3326 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0599 +[titan] 2025-10-05 06:42:10,558 - root - INFO - lr: 3.9286e-05 gnorm: 1.14 [ 8:08:01<16:25:14] +[titan] 2025-10-05 06:42:21,421 - root - INFO - step: 13255 loss: 2.3047 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 06:42:21,421 - root - INFO - lr: 3.9278e-05 gnorm: 1.14 [ 8:08:12<16:25:03] +[titan] 2025-10-05 06:42:32,317 - root - INFO - step: 13260 loss: 2.2022 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9453 +[titan] 2025-10-05 06:42:32,317 - root - INFO - lr: 3.9270e-05 gnorm: 1.07 [ 8:08:22<16:24:52] +[titan] 2025-10-05 06:42:43,197 - root - INFO - step: 13265 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 06:42:43,198 - root - INFO - lr: 3.9263e-05 gnorm: 1.11 [ 8:08:33<16:24:40] +[titan] 2025-10-05 06:42:54,090 - root - INFO - step: 13270 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 06:42:54,091 - root - INFO - lr: 3.9255e-05 gnorm: 1.10 [ 8:08:44<16:24:29] +[titan] 2025-10-05 06:43:05,001 - root - INFO - step: 13275 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0415 +[titan] 2025-10-05 06:43:05,001 - root - INFO - lr: 3.9248e-05 gnorm: 1.10 [ 8:08:55<16:24:18] +[titan] 2025-10-05 06:43:15,880 - root - INFO - step: 13280 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2656 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:15,880 - root - INFO - lr: 3.9240e-05 gnorm: 1.07 [ 8:09:06<16:24:06] +[titan] 2025-10-05 06:43:26,737 - root - INFO - step: 13285 loss: 2.2774 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0114 +[titan] 2025-10-05 06:43:26,737 - root - INFO - lr: 3.9232e-05 gnorm: 1.11 [ 8:09:17<16:23:55] +[titan] 2025-10-05 06:43:37,602 - root - INFO - step: 13290 loss: 2.3086 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:43:37,602 - root - INFO - lr: 3.9225e-05 gnorm: 1.10 [ 8:09:28<16:23:43] +[titan] 2025-10-05 06:43:48,473 - root - INFO - step: 13295 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 06:43:48,473 - root - INFO - lr: 3.9217e-05 gnorm: 1.11 [ 8:09:39<16:23:32] +[titan] 2025-10-05 06:43:57,143 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:43:59,317 - root - INFO - step: 13300 loss: 2.3797 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2783 global_avg_mtp_loss: 2.1014 +[titan] 2025-10-05 06:43:59,318 - root - INFO - lr: 3.9209e-05 gnorm: 1.11 [ 8:09:49<16:23:20] +[titan] 2025-10-05 06:44:10,186 - root - INFO - step: 13305 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0141 +[titan] 2025-10-05 06:44:10,186 - root - INFO - lr: 3.9202e-05 gnorm: 1.09 [ 8:10:00<16:23:09] +[titan] 2025-10-05 06:44:21,180 - root - INFO - step: 13310 loss: 2.3025 memory: 118.84GiB(85.28%) tps: 29,805 tflops: 413.50 mfu: 41.81% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0337 +[titan] 2025-10-05 06:44:21,180 - root - INFO - lr: 3.9194e-05 gnorm: 1.09 [ 8:10:11<16:22:58] +[titan] 2025-10-05 06:44:25,698 - root - INFO - Dumping profiler traces at step 13312 +[titan] 2025-10-05 06:44:25,737 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 06:44:32,265 - root - INFO - step: 13315 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 29,562 tflops: 410.13 mfu: 41.47% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 06:44:32,265 - root - INFO - lr: 3.9187e-05 gnorm: 1.04 [ 8:10:22<16:22:47] +[titan] 2025-10-05 06:44:43,144 - root - INFO - step: 13320 loss: 2.3112 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0397 +[titan] 2025-10-05 06:44:43,144 - root - INFO - lr: 3.9179e-05 gnorm: 1.13 [ 8:10:33<16:22:35] +[titan] 2025-10-05 06:44:54,006 - root - INFO - step: 13325 loss: 2.3530 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0783 +[titan] 2025-10-05 06:44:54,006 - root - INFO - lr: 3.9171e-05 gnorm: 1.06 [ 8:10:44<16:22:24] +[titan] 2025-10-05 06:45:04,897 - root - INFO - step: 13330 loss: 2.3671 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2778 global_avg_mtp_loss: 2.0893 +[titan] 2025-10-05 06:45:04,897 - root - INFO - lr: 3.9164e-05 gnorm: 1.11 [ 8:10:55<16:22:13] +[titan] 2025-10-05 06:45:15,754 - root - INFO - step: 13335 loss: 2.3402 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2757 global_avg_mtp_loss: 2.0644 +[titan] 2025-10-05 06:45:15,754 - root - INFO - lr: 3.9156e-05 gnorm: 1.16 [ 8:11:06<16:22:01] +[titan] 2025-10-05 06:45:26,632 - root - INFO - step: 13340 loss: 2.2623 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 06:45:26,632 - root - INFO - lr: 3.9148e-05 gnorm: 1.12 [ 8:11:17<16:21:50] +[titan] 2025-10-05 06:45:37,522 - root - INFO - step: 13345 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 06:45:37,522 - root - INFO - lr: 3.9141e-05 gnorm: 1.07 [ 8:11:28<16:21:39] +[titan] 2025-10-05 06:45:46,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:45:48,374 - root - INFO - step: 13350 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:45:48,374 - root - INFO - lr: 3.9133e-05 gnorm: 1.10 [ 8:11:39<16:21:27] +[titan] 2025-10-05 06:45:59,227 - root - INFO - step: 13355 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0173 +[titan] 2025-10-05 06:45:59,227 - root - INFO - lr: 3.9126e-05 gnorm: 1.11 [ 8:11:49<16:21:16] +[titan] 2025-10-05 06:46:10,100 - root - INFO - step: 13360 loss: 2.3111 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2703 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:46:10,100 - root - INFO - lr: 3.9118e-05 gnorm: 1.11 [ 8:12:00<16:21:04] +[titan] 2025-10-05 06:46:20,957 - root - INFO - step: 13365 loss: 2.3509 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2747 global_avg_mtp_loss: 2.0763 +[titan] 2025-10-05 06:46:20,958 - root - INFO - lr: 3.9110e-05 gnorm: 1.10 [ 8:12:11<16:20:53] +[titan] 2025-10-05 06:46:31,838 - root - INFO - step: 13370 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0254 +[titan] 2025-10-05 06:46:31,838 - root - INFO - lr: 3.9103e-05 gnorm: 1.13 [ 8:12:22<16:20:41] +[titan] 2025-10-05 06:46:42,735 - root - INFO - step: 13375 loss: 2.3437 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0696 +[titan] 2025-10-05 06:46:42,735 - root - INFO - lr: 3.9095e-05 gnorm: 1.12 [ 8:12:33<16:20:30] +[titan] 2025-10-05 06:46:53,595 - root - INFO - step: 13380 loss: 2.2952 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0281 +[titan] 2025-10-05 06:46:53,595 - root - INFO - lr: 3.9087e-05 gnorm: 1.07 [ 8:12:44<16:20:19] +[titan] 2025-10-05 06:47:04,484 - root - INFO - step: 13385 loss: 2.3167 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0450 +[titan] 2025-10-05 06:47:04,484 - root - INFO - lr: 3.9080e-05 gnorm: 1.12 [ 8:12:55<16:20:07] +[titan] 2025-10-05 06:47:15,385 - root - INFO - step: 13390 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:47:15,385 - root - INFO - lr: 3.9072e-05 gnorm: 1.13 [ 8:13:06<16:19:56] +[titan] 2025-10-05 06:47:26,291 - root - INFO - step: 13395 loss: 2.3093 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0399 +[titan] 2025-10-05 06:47:26,291 - root - INFO - lr: 3.9064e-05 gnorm: 1.09 [ 8:13:16<16:19:45] +[titan] 2025-10-05 06:47:34,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:47:37,159 - root - INFO - step: 13400 loss: 2.2934 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0255 +[titan] 2025-10-05 06:47:37,159 - root - INFO - lr: 3.9057e-05 gnorm: 1.10 [ 8:13:27<16:19:33] +[titan] 2025-10-05 06:47:48,051 - root - INFO - step: 13405 loss: 2.1829 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 06:47:48,051 - root - INFO - lr: 3.9049e-05 gnorm: 1.13 [ 8:13:38<16:19:22] +[titan] 2025-10-05 06:47:58,962 - root - INFO - step: 13410 loss: 2.3403 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0664 +[titan] 2025-10-05 06:47:58,962 - root - INFO - lr: 3.9041e-05 gnorm: 1.08 [ 8:13:49<16:19:11] +[titan] 2025-10-05 06:48:09,859 - root - INFO - step: 13415 loss: 2.2971 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0287 +[titan] 2025-10-05 06:48:09,859 - root - INFO - lr: 3.9034e-05 gnorm: 1.09 [ 8:14:00<16:18:59] +[titan] 2025-10-05 06:48:20,742 - root - INFO - step: 13420 loss: 2.3033 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0348 +[titan] 2025-10-05 06:48:20,742 - root - INFO - lr: 3.9026e-05 gnorm: 1.09 [ 8:14:11<16:18:48] +[titan] 2025-10-05 06:48:31,616 - root - INFO - step: 13425 loss: 2.3453 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2743 global_avg_mtp_loss: 2.0709 +[titan] 2025-10-05 06:48:31,616 - root - INFO - lr: 3.9018e-05 gnorm: 1.11 [ 8:14:22<16:18:36] +[titan] 2025-10-05 06:48:42,471 - root - INFO - step: 13430 loss: 2.2153 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 06:48:42,471 - root - INFO - lr: 3.9011e-05 gnorm: 1.09 [ 8:14:33<16:18:25] +[titan] 2025-10-05 06:48:53,334 - root - INFO - step: 13435 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 06:48:53,334 - root - INFO - lr: 3.9003e-05 gnorm: 1.10 [ 8:14:43<16:18:14] +[titan] 2025-10-05 06:49:04,235 - root - INFO - step: 13440 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 06:49:04,235 - root - INFO - lr: 3.8995e-05 gnorm: 1.10 [ 8:14:54<16:18:02] +[titan] 2025-10-05 06:49:15,122 - root - INFO - step: 13445 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0366 +[titan] 2025-10-05 06:49:15,122 - root - INFO - lr: 3.8988e-05 gnorm: 1.10 [ 8:15:05<16:17:51] +[titan] 2025-10-05 06:49:23,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:49:25,981 - root - INFO - step: 13450 loss: 2.2828 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0115 +[titan] 2025-10-05 06:49:25,981 - root - INFO - lr: 3.8980e-05 gnorm: 1.07 [ 8:15:16<16:17:39] +[titan] 2025-10-05 06:49:36,831 - root - INFO - step: 13455 loss: 2.2498 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9867 +[titan] 2025-10-05 06:49:36,831 - root - INFO - lr: 3.8972e-05 gnorm: 1.03 [ 8:15:27<16:17:28] +[titan] 2025-10-05 06:49:47,714 - root - INFO - step: 13460 loss: 2.3433 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0682 +[titan] 2025-10-05 06:49:47,714 - root - INFO - lr: 3.8965e-05 gnorm: 1.14 [ 8:15:38<16:17:17] +[titan] 2025-10-05 06:49:58,585 - root - INFO - step: 13465 loss: 2.2324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 06:49:58,585 - root - INFO - lr: 3.8957e-05 gnorm: 1.11 [ 8:15:49<16:17:05] +[titan] 2025-10-05 06:50:09,688 - root - INFO - step: 13470 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 29,514 tflops: 409.46 mfu: 41.40% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9928 +[titan] 2025-10-05 06:50:09,688 - root - INFO - lr: 3.8949e-05 gnorm: 1.07 [ 8:16:00<16:16:54] +[titan] 2025-10-05 06:50:20,551 - root - INFO - step: 13475 loss: 2.2930 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0250 +[titan] 2025-10-05 06:50:20,551 - root - INFO - lr: 3.8942e-05 gnorm: 1.12 [ 8:16:11<16:16:43] +[titan] 2025-10-05 06:50:31,416 - root - INFO - step: 13480 loss: 2.3044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 06:50:31,416 - root - INFO - lr: 3.8934e-05 gnorm: 1.09 [ 8:16:22<16:16:32] +[titan] 2025-10-05 06:50:42,269 - root - INFO - step: 13485 loss: 2.2218 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9617 +[titan] 2025-10-05 06:50:42,269 - root - INFO - lr: 3.8926e-05 gnorm: 1.10 [ 8:16:32<16:16:20] +[titan] 2025-10-05 06:50:53,127 - root - INFO - step: 13490 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 06:50:53,128 - root - INFO - lr: 3.8919e-05 gnorm: 1.07 [ 8:16:43<16:16:09] +[titan] 2025-10-05 06:51:03,982 - root - INFO - step: 13495 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0051 +[titan] 2025-10-05 06:51:03,982 - root - INFO - lr: 3.8911e-05 gnorm: 1.09 [ 8:16:54<16:15:57] +[titan] 2025-10-05 06:51:12,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:51:14,857 - root - INFO - step: 13500 loss: 2.2415 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 06:51:14,857 - root - INFO - lr: 3.8903e-05 gnorm: 1.09 [ 8:17:05<16:15:46] +[titan] 2025-10-05 06:51:25,746 - root - INFO - step: 13505 loss: 2.2715 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 06:51:25,746 - root - INFO - lr: 3.8896e-05 gnorm: 1.09 [ 8:17:16<16:15:34] +[titan] 2025-10-05 06:51:36,614 - root - INFO - step: 13510 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 06:51:36,614 - root - INFO - lr: 3.8888e-05 gnorm: 1.08 [ 8:17:27<16:15:23] +[titan] 2025-10-05 06:51:47,494 - root - INFO - step: 13515 loss: 2.2519 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 06:51:47,494 - root - INFO - lr: 3.8880e-05 gnorm: 1.12 [ 8:17:38<16:15:12] +[titan] 2025-10-05 06:51:58,360 - root - INFO - step: 13520 loss: 2.2323 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9720 +[titan] 2025-10-05 06:51:58,360 - root - INFO - lr: 3.8872e-05 gnorm: 1.05 [ 8:17:48<16:15:00] +[titan] 2025-10-05 06:52:09,236 - root - INFO - step: 13525 loss: 2.2346 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 06:52:09,236 - root - INFO - lr: 3.8865e-05 gnorm: 1.07 [ 8:17:59<16:14:49] +[titan] 2025-10-05 06:52:20,103 - root - INFO - step: 13530 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9669 +[titan] 2025-10-05 06:52:20,103 - root - INFO - lr: 3.8857e-05 gnorm: 1.08 [ 8:18:10<16:14:38] +[titan] 2025-10-05 06:52:30,992 - root - INFO - step: 13535 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9949 +[titan] 2025-10-05 06:52:30,992 - root - INFO - lr: 3.8849e-05 gnorm: 1.10 [ 8:18:21<16:14:26] +[titan] 2025-10-05 06:52:41,845 - root - INFO - step: 13540 loss: 2.2743 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0096 +[titan] 2025-10-05 06:52:41,846 - root - INFO - lr: 3.8842e-05 gnorm: 1.16 [ 8:18:32<16:14:15] +[titan] 2025-10-05 06:52:52,731 - root - INFO - step: 13545 loss: 2.3101 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0404 +[titan] 2025-10-05 06:52:52,731 - root - INFO - lr: 3.8834e-05 gnorm: 1.19 [ 8:18:43<16:14:03] +[titan] 2025-10-05 06:53:01,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:53:03,584 - root - INFO - step: 13550 loss: 2.3422 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2741 global_avg_mtp_loss: 2.0681 +[titan] 2025-10-05 06:53:03,584 - root - INFO - lr: 3.8826e-05 gnorm: 1.12 [ 8:18:54<16:13:52] +[titan] 2025-10-05 06:53:14,560 - root - INFO - step: 13555 loss: 2.3104 memory: 118.84GiB(85.28%) tps: 29,854 tflops: 414.18 mfu: 41.88% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 06:53:14,560 - root - INFO - lr: 3.8818e-05 gnorm: 1.18 [ 8:19:05<16:13:41] +[titan] 2025-10-05 06:53:25,426 - root - INFO - step: 13560 loss: 2.2977 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0291 +[titan] 2025-10-05 06:53:25,426 - root - INFO - lr: 3.8811e-05 gnorm: 1.10 [ 8:19:16<16:13:29] +[titan] 2025-10-05 06:53:36,319 - root - INFO - step: 13565 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0242 +[titan] 2025-10-05 06:53:36,320 - root - INFO - lr: 3.8803e-05 gnorm: 1.11 [ 8:19:26<16:13:18] +[titan] 2025-10-05 06:53:47,222 - root - INFO - step: 13570 loss: 2.2893 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0214 +[titan] 2025-10-05 06:53:47,222 - root - INFO - lr: 3.8795e-05 gnorm: 1.11 [ 8:19:37<16:13:07] +[titan] 2025-10-05 06:53:58,096 - root - INFO - step: 13575 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9963 +[titan] 2025-10-05 06:53:58,096 - root - INFO - lr: 3.8788e-05 gnorm: 1.11 [ 8:19:48<16:12:55] +[titan] 2025-10-05 06:54:08,974 - root - INFO - step: 13580 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0199 +[titan] 2025-10-05 06:54:08,975 - root - INFO - lr: 3.8780e-05 gnorm: 1.11 [ 8:19:59<16:12:44] +[titan] 2025-10-05 06:54:19,877 - root - INFO - step: 13585 loss: 2.3139 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0437 +[titan] 2025-10-05 06:54:19,877 - root - INFO - lr: 3.8772e-05 gnorm: 1.15 [ 8:20:10<16:12:33] +[titan] 2025-10-05 06:54:30,750 - root - INFO - step: 13590 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0181 +[titan] 2025-10-05 06:54:30,750 - root - INFO - lr: 3.8764e-05 gnorm: 1.09 [ 8:20:21<16:12:21] +[titan] 2025-10-05 06:54:41,615 - root - INFO - step: 13595 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0386 +[titan] 2025-10-05 06:54:41,615 - root - INFO - lr: 3.8757e-05 gnorm: 1.12 [ 8:20:32<16:12:10] +[titan] 2025-10-05 06:54:50,323 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:54:52,501 - root - INFO - step: 13600 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0168 +[titan] 2025-10-05 06:54:52,501 - root - INFO - lr: 3.8749e-05 gnorm: 1.12 [ 8:20:43<16:11:59] +[titan] 2025-10-05 06:55:03,350 - root - INFO - step: 13605 loss: 2.2279 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 06:55:03,350 - root - INFO - lr: 3.8741e-05 gnorm: 1.09 [ 8:20:53<16:11:47] +[titan] 2025-10-05 06:55:14,228 - root - INFO - step: 13610 loss: 2.3259 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0547 +[titan] 2025-10-05 06:55:14,228 - root - INFO - lr: 3.8734e-05 gnorm: 1.14 [ 8:21:04<16:11:36] +[titan] 2025-10-05 06:55:25,123 - root - INFO - step: 13615 loss: 2.2661 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0007 +[titan] 2025-10-05 06:55:25,123 - root - INFO - lr: 3.8726e-05 gnorm: 1.11 [ 8:21:15<16:11:24] +[titan] 2025-10-05 06:55:35,976 - root - INFO - step: 13620 loss: 2.3686 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2766 global_avg_mtp_loss: 2.0920 +[titan] 2025-10-05 06:55:35,976 - root - INFO - lr: 3.8718e-05 gnorm: 1.15 [ 8:21:26<16:11:13] +[titan] 2025-10-05 06:55:46,835 - root - INFO - step: 13625 loss: 2.2851 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0187 +[titan] 2025-10-05 06:55:46,835 - root - INFO - lr: 3.8710e-05 gnorm: 1.07 [ 8:21:37<16:11:02] +[titan] 2025-10-05 06:55:57,740 - root - INFO - step: 13630 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0176 +[titan] 2025-10-05 06:55:57,740 - root - INFO - lr: 3.8703e-05 gnorm: 1.08 [ 8:21:48<16:10:50] +[titan] 2025-10-05 06:56:08,602 - root - INFO - step: 13635 loss: 2.3123 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0432 +[titan] 2025-10-05 06:56:08,602 - root - INFO - lr: 3.8695e-05 gnorm: 1.12 [ 8:21:59<16:10:39] +[titan] 2025-10-05 06:56:19,485 - root - INFO - step: 13640 loss: 2.2360 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 06:56:19,486 - root - INFO - lr: 3.8687e-05 gnorm: 1.08 [ 8:22:10<16:10:27] +[titan] 2025-10-05 06:56:30,339 - root - INFO - step: 13645 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0101 +[titan] 2025-10-05 06:56:30,339 - root - INFO - lr: 3.8679e-05 gnorm: 1.20 [ 8:22:20<16:10:16] +[titan] 2025-10-05 06:56:39,025 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:56:41,218 - root - INFO - step: 13650 loss: 2.3116 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2706 global_avg_mtp_loss: 2.0411 +[titan] 2025-10-05 06:56:41,218 - root - INFO - lr: 3.8672e-05 gnorm: 1.10 [ 8:22:31<16:10:05] +[titan] 2025-10-05 06:56:52,067 - root - INFO - step: 13655 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2666 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 06:56:52,068 - root - INFO - lr: 3.8664e-05 gnorm: 1.09 [ 8:22:42<16:09:53] +[titan] 2025-10-05 06:57:02,942 - root - INFO - step: 13660 loss: 2.3364 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 2.0632 +[titan] 2025-10-05 06:57:02,942 - root - INFO - lr: 3.8656e-05 gnorm: 1.13 [ 8:22:53<16:09:42] +[titan] 2025-10-05 06:57:13,852 - root - INFO - step: 13665 loss: 2.2401 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 06:57:13,852 - root - INFO - lr: 3.8648e-05 gnorm: 1.09 [ 8:23:04<16:09:31] +[titan] 2025-10-05 06:57:24,731 - root - INFO - step: 13670 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9808 +[titan] 2025-10-05 06:57:24,731 - root - INFO - lr: 3.8641e-05 gnorm: 1.12 [ 8:23:15<16:09:19] +[titan] 2025-10-05 06:57:35,601 - root - INFO - step: 13675 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 06:57:35,601 - root - INFO - lr: 3.8633e-05 gnorm: 1.12 [ 8:23:26<16:09:08] +[titan] 2025-10-05 06:57:46,492 - root - INFO - step: 13680 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 1.9915 +[titan] 2025-10-05 06:57:46,493 - root - INFO - lr: 3.8625e-05 gnorm: 1.09 [ 8:23:37<16:08:57] +[titan] 2025-10-05 06:57:57,361 - root - INFO - step: 13685 loss: 2.2907 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0233 +[titan] 2025-10-05 06:57:57,361 - root - INFO - lr: 3.8617e-05 gnorm: 1.05 [ 8:23:47<16:08:45] +[titan] 2025-10-05 06:58:08,244 - root - INFO - step: 13690 loss: 2.2221 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 06:58:08,244 - root - INFO - lr: 3.8610e-05 gnorm: 1.12 [ 8:23:58<16:08:34] +[titan] 2025-10-05 06:58:19,163 - root - INFO - step: 13695 loss: 2.2749 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0089 +[titan] 2025-10-05 06:58:19,163 - root - INFO - lr: 3.8602e-05 gnorm: 1.09 [ 8:24:09<16:08:23] +[titan] 2025-10-05 06:58:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 06:58:30,056 - root - INFO - step: 13700 loss: 2.3146 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0448 +[titan] 2025-10-05 06:58:30,056 - root - INFO - lr: 3.8594e-05 gnorm: 1.10 [ 8:24:20<16:08:11] +[titan] 2025-10-05 06:58:40,938 - root - INFO - step: 13705 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 06:58:40,938 - root - INFO - lr: 3.8586e-05 gnorm: 1.07 [ 8:24:31<16:08:00] +[titan] 2025-10-05 06:58:51,816 - root - INFO - step: 13710 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 06:58:51,816 - root - INFO - lr: 3.8578e-05 gnorm: 1.10 [ 8:24:42<16:07:48] +[titan] 2025-10-05 06:59:02,700 - root - INFO - step: 13715 loss: 2.2805 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 06:59:02,700 - root - INFO - lr: 3.8571e-05 gnorm: 1.12 [ 8:24:53<16:07:37] +[titan] 2025-10-05 06:59:13,554 - root - INFO - step: 13720 loss: 2.3118 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2710 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 06:59:13,554 - root - INFO - lr: 3.8563e-05 gnorm: 1.14 [ 8:25:04<16:07:26] +[titan] 2025-10-05 06:59:24,420 - root - INFO - step: 13725 loss: 2.2285 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9687 +[titan] 2025-10-05 06:59:24,420 - root - INFO - lr: 3.8555e-05 gnorm: 1.11 [ 8:25:15<16:07:14] +[titan] 2025-10-05 06:59:35,307 - root - INFO - step: 13730 loss: 2.2243 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 06:59:35,307 - root - INFO - lr: 3.8547e-05 gnorm: 1.10 [ 8:25:25<16:07:03] +[titan] 2025-10-05 06:59:46,179 - root - INFO - step: 13735 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 06:59:46,179 - root - INFO - lr: 3.8540e-05 gnorm: 1.08 [ 8:25:36<16:06:52] +[titan] 2025-10-05 06:59:57,061 - root - INFO - step: 13740 loss: 2.2450 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9828 +[titan] 2025-10-05 06:59:57,061 - root - INFO - lr: 3.8532e-05 gnorm: 1.15 [ 8:25:47<16:06:40] +[titan] 2025-10-05 07:00:07,935 - root - INFO - step: 13745 loss: 2.3278 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:00:07,935 - root - INFO - lr: 3.8524e-05 gnorm: 1.10 [ 8:25:58<16:06:29] +[titan] 2025-10-05 07:00:16,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:00:18,832 - root - INFO - step: 13750 loss: 2.3084 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0396 +[titan] 2025-10-05 07:00:18,833 - root - INFO - lr: 3.8516e-05 gnorm: 1.10 [ 8:26:09<16:06:18] +[titan] 2025-10-05 07:00:29,706 - root - INFO - step: 13755 loss: 2.3204 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2711 global_avg_mtp_loss: 2.0493 +[titan] 2025-10-05 07:00:29,706 - root - INFO - lr: 3.8509e-05 gnorm: 1.11 [ 8:26:20<16:06:06] +[titan] 2025-10-05 07:00:40,608 - root - INFO - step: 13760 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2685 global_avg_mtp_loss: 2.0305 +[titan] 2025-10-05 07:00:40,608 - root - INFO - lr: 3.8501e-05 gnorm: 1.15 [ 8:26:31<16:05:55] +[titan] 2025-10-05 07:00:51,487 - root - INFO - step: 13765 loss: 2.2771 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0122 +[titan] 2025-10-05 07:00:51,487 - root - INFO - lr: 3.8493e-05 gnorm: 1.08 [ 8:26:42<16:05:44] +[titan] 2025-10-05 07:01:02,367 - root - INFO - step: 13770 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0107 +[titan] 2025-10-05 07:01:02,367 - root - INFO - lr: 3.8485e-05 gnorm: 1.52 [ 8:26:52<16:05:32] +[titan] 2025-10-05 07:01:13,257 - root - INFO - step: 13775 loss: 2.3162 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2702 global_avg_mtp_loss: 2.0460 +[titan] 2025-10-05 07:01:13,257 - root - INFO - lr: 3.8477e-05 gnorm: 1.11 [ 8:27:03<16:05:21] +[titan] 2025-10-05 07:01:24,150 - root - INFO - step: 13780 loss: 2.3133 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0424 +[titan] 2025-10-05 07:01:24,150 - root - INFO - lr: 3.8470e-05 gnorm: 1.05 [ 8:27:14<16:05:09] +[titan] 2025-10-05 07:01:35,054 - root - INFO - step: 13785 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9600 +[titan] 2025-10-05 07:01:35,054 - root - INFO - lr: 3.8462e-05 gnorm: 1.10 [ 8:27:25<16:04:58] +[titan] 2025-10-05 07:01:45,974 - root - INFO - step: 13790 loss: 2.2651 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0014 +[titan] 2025-10-05 07:01:45,974 - root - INFO - lr: 3.8454e-05 gnorm: 1.09 [ 8:27:36<16:04:47] +[titan] 2025-10-05 07:01:56,865 - root - INFO - step: 13795 loss: 2.2879 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:01:56,865 - root - INFO - lr: 3.8446e-05 gnorm: 1.08 [ 8:27:47<16:04:36] +[titan] 2025-10-05 07:02:05,584 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:02:07,773 - root - INFO - step: 13800 loss: 2.2846 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0177 +[titan] 2025-10-05 07:02:07,773 - root - INFO - lr: 3.8438e-05 gnorm: 1.09 [ 8:27:58<16:04:24] +[titan] 2025-10-05 07:02:18,700 - root - INFO - step: 13805 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 07:02:18,700 - root - INFO - lr: 3.8431e-05 gnorm: 1.09 [ 8:28:09<16:04:13] +[titan] 2025-10-05 07:02:29,593 - root - INFO - step: 13810 loss: 2.2868 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 07:02:29,593 - root - INFO - lr: 3.8423e-05 gnorm: 1.08 [ 8:28:20<16:04:02] +[titan] 2025-10-05 07:02:40,489 - root - INFO - step: 13815 loss: 2.3125 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2709 global_avg_mtp_loss: 2.0416 +[titan] 2025-10-05 07:02:40,489 - root - INFO - lr: 3.8415e-05 gnorm: 1.08 [ 8:28:31<16:03:50] +[titan] 2025-10-05 07:02:51,396 - root - INFO - step: 13820 loss: 2.3838 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2792 global_avg_mtp_loss: 2.1047 +[titan] 2025-10-05 07:02:51,396 - root - INFO - lr: 3.8407e-05 gnorm: 1.13 [ 8:28:42<16:03:39] +[titan] 2025-10-05 07:03:00,399 - root - INFO - Dumping profiler traces at step 13824 +[titan] 2025-10-05 07:03:00,438 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:03:02,625 - root - INFO - step: 13825 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 29,181 tflops: 404.84 mfu: 40.93% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:03:02,626 - root - INFO - lr: 3.8399e-05 gnorm: 1.09 [ 8:28:53<16:03:28] +[titan] 2025-10-05 07:03:13,525 - root - INFO - step: 13830 loss: 2.3225 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0510 +[titan] 2025-10-05 07:03:13,526 - root - INFO - lr: 3.8392e-05 gnorm: 1.08 [ 8:29:04<16:03:17] +[titan] 2025-10-05 07:03:24,465 - root - INFO - step: 13835 loss: 2.2875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:03:24,466 - root - INFO - lr: 3.8384e-05 gnorm: 1.07 [ 8:29:15<16:03:06] +[titan] 2025-10-05 07:03:35,347 - root - INFO - step: 13840 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:03:35,347 - root - INFO - lr: 3.8376e-05 gnorm: 1.09 [ 8:29:25<16:02:54] +[titan] 2025-10-05 07:03:46,225 - root - INFO - step: 13845 loss: 2.3100 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0405 +[titan] 2025-10-05 07:03:46,225 - root - INFO - lr: 3.8368e-05 gnorm: 1.11 [ 8:29:36<16:02:43] +[titan] 2025-10-05 07:03:54,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:03:57,111 - root - INFO - step: 13850 loss: 2.2770 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0113 +[titan] 2025-10-05 07:03:57,111 - root - INFO - lr: 3.8360e-05 gnorm: 1.11 [ 8:29:47<16:02:32] +[titan] 2025-10-05 07:04:08,025 - root - INFO - step: 13855 loss: 2.3522 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2753 global_avg_mtp_loss: 2.0769 +[titan] 2025-10-05 07:04:08,025 - root - INFO - lr: 3.8353e-05 gnorm: 1.11 [ 8:29:58<16:02:20] +[titan] 2025-10-05 07:04:18,937 - root - INFO - step: 13860 loss: 2.2484 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9860 +[titan] 2025-10-05 07:04:18,937 - root - INFO - lr: 3.8345e-05 gnorm: 1.13 [ 8:30:09<16:02:09] +[titan] 2025-10-05 07:04:29,819 - root - INFO - step: 13865 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9412 +[titan] 2025-10-05 07:04:29,819 - root - INFO - lr: 3.8337e-05 gnorm: 1.13 [ 8:30:20<16:01:58] +[titan] 2025-10-05 07:04:40,706 - root - INFO - step: 13870 loss: 2.1522 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 07:04:40,706 - root - INFO - lr: 3.8329e-05 gnorm: 1.10 [ 8:30:31<16:01:47] +[titan] 2025-10-05 07:04:51,600 - root - INFO - step: 13875 loss: 2.2926 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:04:51,600 - root - INFO - lr: 3.8321e-05 gnorm: 1.13 [ 8:30:42<16:01:35] +[titan] 2025-10-05 07:05:02,483 - root - INFO - step: 13880 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 07:05:02,483 - root - INFO - lr: 3.8313e-05 gnorm: 1.05 [ 8:30:53<16:01:24] +[titan] 2025-10-05 07:05:13,375 - root - INFO - step: 13885 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 07:05:13,375 - root - INFO - lr: 3.8306e-05 gnorm: 1.09 [ 8:31:03<16:01:13] +[titan] 2025-10-05 07:05:24,346 - root - INFO - step: 13890 loss: 2.3386 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0662 +[titan] 2025-10-05 07:05:24,346 - root - INFO - lr: 3.8298e-05 gnorm: 1.09 [ 8:31:14<16:01:01] +[titan] 2025-10-05 07:05:35,221 - root - INFO - step: 13895 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 07:05:35,221 - root - INFO - lr: 3.8290e-05 gnorm: 1.10 [ 8:31:25<16:00:50] +[titan] 2025-10-05 07:05:43,909 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:05:46,101 - root - INFO - step: 13900 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 2.0019 +[titan] 2025-10-05 07:05:46,101 - root - INFO - lr: 3.8282e-05 gnorm: 1.11 [ 8:31:36<16:00:39] +[titan] 2025-10-05 07:05:56,991 - root - INFO - step: 13905 loss: 2.2705 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 07:05:56,992 - root - INFO - lr: 3.8274e-05 gnorm: 1.09 [ 8:31:47<16:00:27] +[titan] 2025-10-05 07:06:07,860 - root - INFO - step: 13910 loss: 2.2822 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0150 +[titan] 2025-10-05 07:06:07,861 - root - INFO - lr: 3.8266e-05 gnorm: 1.06 [ 8:31:58<16:00:16] +[titan] 2025-10-05 07:06:18,755 - root - INFO - step: 13915 loss: 2.3365 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0626 +[titan] 2025-10-05 07:06:18,755 - root - INFO - lr: 3.8259e-05 gnorm: 1.12 [ 8:32:09<16:00:05] +[titan] 2025-10-05 07:06:29,694 - root - INFO - step: 13920 loss: 2.3240 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0527 +[titan] 2025-10-05 07:06:29,694 - root - INFO - lr: 3.8251e-05 gnorm: 1.13 [ 8:32:20<15:59:53] +[titan] 2025-10-05 07:06:40,578 - root - INFO - step: 13925 loss: 2.2091 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9511 +[titan] 2025-10-05 07:06:40,578 - root - INFO - lr: 3.8243e-05 gnorm: 1.13 [ 8:32:31<15:59:42] +[titan] 2025-10-05 07:06:51,433 - root - INFO - step: 13930 loss: 2.3061 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:06:51,434 - root - INFO - lr: 3.8235e-05 gnorm: 1.12 [ 8:32:42<15:59:31] +[titan] 2025-10-05 07:07:02,325 - root - INFO - step: 13935 loss: 2.2964 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0277 +[titan] 2025-10-05 07:07:02,326 - root - INFO - lr: 3.8227e-05 gnorm: 1.12 [ 8:32:52<15:59:19] +[titan] 2025-10-05 07:07:13,223 - root - INFO - step: 13940 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 07:07:13,223 - root - INFO - lr: 3.8219e-05 gnorm: 1.05 [ 8:33:03<15:59:08] +[titan] 2025-10-05 07:07:24,144 - root - INFO - step: 13945 loss: 2.2627 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 1.9982 +[titan] 2025-10-05 07:07:24,144 - root - INFO - lr: 3.8212e-05 gnorm: 1.07 [ 8:33:14<15:58:57] +[titan] 2025-10-05 07:07:32,855 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:07:35,060 - root - INFO - step: 13950 loss: 2.3247 memory: 118.84GiB(85.28%) tps: 30,018 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 2.0535 +[titan] 2025-10-05 07:07:35,061 - root - INFO - lr: 3.8204e-05 gnorm: 1.15 [ 8:33:25<15:58:45] +[titan] 2025-10-05 07:07:45,949 - root - INFO - step: 13955 loss: 2.2646 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2652 global_avg_mtp_loss: 1.9994 +[titan] 2025-10-05 07:07:45,949 - root - INFO - lr: 3.8196e-05 gnorm: 1.12 [ 8:33:36<15:58:34] +[titan] 2025-10-05 07:07:56,827 - root - INFO - step: 13960 loss: 2.2073 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 07:07:56,827 - root - INFO - lr: 3.8188e-05 gnorm: 1.13 [ 8:33:47<15:58:23] +[titan] 2025-10-05 07:08:07,719 - root - INFO - step: 13965 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9580 +[titan] 2025-10-05 07:08:07,720 - root - INFO - lr: 3.8180e-05 gnorm: 1.05 [ 8:33:58<15:58:11] +[titan] 2025-10-05 07:08:18,609 - root - INFO - step: 13970 loss: 2.3210 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0494 +[titan] 2025-10-05 07:08:18,609 - root - INFO - lr: 3.8172e-05 gnorm: 1.11 [ 8:34:09<15:58:00] +[titan] 2025-10-05 07:08:29,526 - root - INFO - step: 13975 loss: 2.3414 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2736 global_avg_mtp_loss: 2.0678 +[titan] 2025-10-05 07:08:29,526 - root - INFO - lr: 3.8164e-05 gnorm: 1.06 [ 8:34:20<15:57:49] +[titan] 2025-10-05 07:08:40,409 - root - INFO - step: 13980 loss: 2.2904 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0234 +[titan] 2025-10-05 07:08:40,409 - root - INFO - lr: 3.8157e-05 gnorm: 1.10 [ 8:34:31<15:57:38] +[titan] 2025-10-05 07:08:51,305 - root - INFO - step: 13985 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9806 +[titan] 2025-10-05 07:08:51,305 - root - INFO - lr: 3.8149e-05 gnorm: 1.08 [ 8:34:41<15:57:26] +[titan] 2025-10-05 07:09:02,176 - root - INFO - step: 13990 loss: 2.2413 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:09:02,177 - root - INFO - lr: 3.8141e-05 gnorm: 1.06 [ 8:34:52<15:57:15] +[titan] 2025-10-05 07:09:13,061 - root - INFO - step: 13995 loss: 2.2816 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2678 global_avg_mtp_loss: 2.0139 +[titan] 2025-10-05 07:09:13,062 - root - INFO - lr: 3.8133e-05 gnorm: 1.14 [ 8:35:03<15:57:04] +[titan] 2025-10-05 07:09:21,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:09:23,969 - root - INFO - step: 14000 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 07:09:23,969 - root - INFO - lr: 3.8125e-05 gnorm: 1.09 [ 8:35:14<15:56:52] +[titan] 2025-10-05 07:09:34,866 - root - INFO - step: 14005 loss: 2.2763 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0102 +[titan] 2025-10-05 07:09:34,866 - root - INFO - lr: 3.8117e-05 gnorm: 1.06 [ 8:35:25<15:56:41] +[titan] 2025-10-05 07:09:45,752 - root - INFO - step: 14010 loss: 2.2967 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2728 global_avg_mtp_loss: 2.0239 +[titan] 2025-10-05 07:09:45,752 - root - INFO - lr: 3.8109e-05 gnorm: 1.14 [ 8:35:36<15:56:30] +[titan] 2025-10-05 07:09:56,681 - root - INFO - step: 14015 loss: 2.2388 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9782 +[titan] 2025-10-05 07:09:56,681 - root - INFO - lr: 3.8101e-05 gnorm: 1.10 [ 8:35:47<15:56:18] +[titan] 2025-10-05 07:10:07,561 - root - INFO - step: 14020 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0100 +[titan] 2025-10-05 07:10:07,561 - root - INFO - lr: 3.8094e-05 gnorm: 1.10 [ 8:35:58<15:56:07] +[titan] 2025-10-05 07:10:18,446 - root - INFO - step: 14025 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0031 +[titan] 2025-10-05 07:10:18,446 - root - INFO - lr: 3.8086e-05 gnorm: 1.06 [ 8:36:09<15:55:56] +[titan] 2025-10-05 07:10:29,418 - root - INFO - step: 14030 loss: 2.3296 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0573 +[titan] 2025-10-05 07:10:29,419 - root - INFO - lr: 3.8078e-05 gnorm: 1.11 [ 8:36:20<15:55:45] +[titan] 2025-10-05 07:10:40,286 - root - INFO - step: 14035 loss: 2.2970 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2684 global_avg_mtp_loss: 2.0286 +[titan] 2025-10-05 07:10:40,286 - root - INFO - lr: 3.8070e-05 gnorm: 1.08 [ 8:36:30<15:55:33] +[titan] 2025-10-05 07:10:51,186 - root - INFO - step: 14040 loss: 2.3219 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0507 +[titan] 2025-10-05 07:10:51,186 - root - INFO - lr: 3.8062e-05 gnorm: 1.08 [ 8:36:41<15:55:22] +[titan] 2025-10-05 07:11:02,100 - root - INFO - step: 14045 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:11:02,100 - root - INFO - lr: 3.8054e-05 gnorm: 1.03 [ 8:36:52<15:55:11] +[titan] 2025-10-05 07:11:10,810 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:11:13,002 - root - INFO - step: 14050 loss: 2.2598 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:13,002 - root - INFO - lr: 3.8046e-05 gnorm: 1.08 [ 8:37:03<15:54:59] +[titan] 2025-10-05 07:11:23,889 - root - INFO - step: 14055 loss: 2.2829 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0164 +[titan] 2025-10-05 07:11:23,889 - root - INFO - lr: 3.8038e-05 gnorm: 1.06 [ 8:37:14<15:54:48] +[titan] 2025-10-05 07:11:34,797 - root - INFO - step: 14060 loss: 2.2612 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9968 +[titan] 2025-10-05 07:11:34,797 - root - INFO - lr: 3.8031e-05 gnorm: 1.08 [ 8:37:25<15:54:37] +[titan] 2025-10-05 07:11:45,686 - root - INFO - step: 14065 loss: 2.2504 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:11:45,686 - root - INFO - lr: 3.8023e-05 gnorm: 1.10 [ 8:37:36<15:54:25] +[titan] 2025-10-05 07:11:56,588 - root - INFO - step: 14070 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 07:11:56,588 - root - INFO - lr: 3.8015e-05 gnorm: 1.10 [ 8:37:47<15:54:14] +[titan] 2025-10-05 07:12:07,484 - root - INFO - step: 14075 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 07:12:07,484 - root - INFO - lr: 3.8007e-05 gnorm: 1.05 [ 8:37:58<15:54:03] +[titan] 2025-10-05 07:12:18,379 - root - INFO - step: 14080 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 07:12:18,379 - root - INFO - lr: 3.7999e-05 gnorm: 1.09 [ 8:38:08<15:53:51] +[titan] 2025-10-05 07:12:29,280 - root - INFO - step: 14085 loss: 2.2541 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 07:12:29,281 - root - INFO - lr: 3.7991e-05 gnorm: 1.11 [ 8:38:19<15:53:40] +[titan] 2025-10-05 07:12:40,158 - root - INFO - step: 14090 loss: 2.2892 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0223 +[titan] 2025-10-05 07:12:40,159 - root - INFO - lr: 3.7983e-05 gnorm: 1.07 [ 8:38:30<15:53:29] +[titan] 2025-10-05 07:12:51,038 - root - INFO - step: 14095 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0060 +[titan] 2025-10-05 07:12:51,038 - root - INFO - lr: 3.7975e-05 gnorm: 1.08 [ 8:38:41<15:53:17] +[titan] 2025-10-05 07:12:59,737 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:13:01,923 - root - INFO - step: 14100 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 1.9940 +[titan] 2025-10-05 07:13:01,924 - root - INFO - lr: 3.7967e-05 gnorm: 1.10 [ 8:38:52<15:53:06] +[titan] 2025-10-05 07:13:12,819 - root - INFO - step: 14105 loss: 2.2680 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0026 +[titan] 2025-10-05 07:13:12,819 - root - INFO - lr: 3.7959e-05 gnorm: 1.10 [ 8:39:03<15:52:55] +[titan] 2025-10-05 07:13:23,712 - root - INFO - step: 14110 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:13:23,712 - root - INFO - lr: 3.7952e-05 gnorm: 1.06 [ 8:39:14<15:52:44] +[titan] 2025-10-05 07:13:34,613 - root - INFO - step: 14115 loss: 2.3226 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2722 global_avg_mtp_loss: 2.0504 +[titan] 2025-10-05 07:13:34,613 - root - INFO - lr: 3.7944e-05 gnorm: 1.17 [ 8:39:25<15:52:32] +[titan] 2025-10-05 07:13:45,510 - root - INFO - step: 14120 loss: 2.2585 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 07:13:45,510 - root - INFO - lr: 3.7936e-05 gnorm: 1.12 [ 8:39:36<15:52:21] +[titan] 2025-10-05 07:13:56,397 - root - INFO - step: 14125 loss: 2.2697 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0052 +[titan] 2025-10-05 07:13:56,397 - root - INFO - lr: 3.7928e-05 gnorm: 1.11 [ 8:39:46<15:52:10] +[titan] 2025-10-05 07:14:07,282 - root - INFO - step: 14130 loss: 2.2668 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0028 +[titan] 2025-10-05 07:14:07,282 - root - INFO - lr: 3.7920e-05 gnorm: 1.09 [ 8:39:57<15:51:58] +[titan] 2025-10-05 07:14:18,161 - root - INFO - step: 14135 loss: 2.2782 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 07:14:18,162 - root - INFO - lr: 3.7912e-05 gnorm: 1.13 [ 8:40:08<15:51:47] +[titan] 2025-10-05 07:14:29,064 - root - INFO - step: 14140 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 07:14:29,064 - root - INFO - lr: 3.7904e-05 gnorm: 1.12 [ 8:40:19<15:51:36] +[titan] 2025-10-05 07:14:39,953 - root - INFO - step: 14145 loss: 2.2613 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9977 +[titan] 2025-10-05 07:14:39,953 - root - INFO - lr: 3.7896e-05 gnorm: 1.07 [ 8:40:30<15:51:24] +[titan] 2025-10-05 07:14:48,667 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:14:50,851 - root - INFO - step: 14150 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 07:14:50,851 - root - INFO - lr: 3.7888e-05 gnorm: 1.09 [ 8:40:41<15:51:13] +[titan] 2025-10-05 07:15:01,722 - root - INFO - step: 14155 loss: 2.3499 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2731 global_avg_mtp_loss: 2.0768 +[titan] 2025-10-05 07:15:01,723 - root - INFO - lr: 3.7880e-05 gnorm: 1.07 [ 8:40:52<15:51:02] +[titan] 2025-10-05 07:15:12,596 - root - INFO - step: 14160 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9950 +[titan] 2025-10-05 07:15:12,597 - root - INFO - lr: 3.7872e-05 gnorm: 1.07 [ 8:41:03<15:50:50] +[titan] 2025-10-05 07:15:23,478 - root - INFO - step: 14165 loss: 2.2806 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0143 +[titan] 2025-10-05 07:15:23,478 - root - INFO - lr: 3.7865e-05 gnorm: 1.09 [ 8:41:14<15:50:39] +[titan] 2025-10-05 07:15:34,374 - root - INFO - step: 14170 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:15:34,374 - root - INFO - lr: 3.7857e-05 gnorm: 1.08 [ 8:41:24<15:50:28] +[titan] 2025-10-05 07:15:45,286 - root - INFO - step: 14175 loss: 2.2571 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9933 +[titan] 2025-10-05 07:15:45,287 - root - INFO - lr: 3.7849e-05 gnorm: 1.11 [ 8:41:35<15:50:16] +[titan] 2025-10-05 07:15:56,187 - root - INFO - step: 14180 loss: 2.3045 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0352 +[titan] 2025-10-05 07:15:56,187 - root - INFO - lr: 3.7841e-05 gnorm: 1.13 [ 8:41:46<15:50:05] +[titan] 2025-10-05 07:16:07,077 - root - INFO - step: 14185 loss: 2.2313 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9715 +[titan] 2025-10-05 07:16:07,077 - root - INFO - lr: 3.7833e-05 gnorm: 1.08 [ 8:41:57<15:49:54] +[titan] 2025-10-05 07:16:17,954 - root - INFO - step: 14190 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9824 +[titan] 2025-10-05 07:16:17,954 - root - INFO - lr: 3.7825e-05 gnorm: 1.05 [ 8:42:08<15:49:43] +[titan] 2025-10-05 07:16:28,838 - root - INFO - step: 14195 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 07:16:28,839 - root - INFO - lr: 3.7817e-05 gnorm: 1.10 [ 8:42:19<15:49:31] +[titan] 2025-10-05 07:16:37,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:16:39,702 - root - INFO - step: 14200 loss: 2.2626 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9987 +[titan] 2025-10-05 07:16:39,703 - root - INFO - lr: 3.7809e-05 gnorm: 1.10 [ 8:42:30<15:49:20] +[titan] 2025-10-05 07:16:50,596 - root - INFO - step: 14205 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 07:16:50,596 - root - INFO - lr: 3.7801e-05 gnorm: 1.05 [ 8:42:41<15:49:09] +[titan] 2025-10-05 07:17:01,477 - root - INFO - step: 14210 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:17:01,477 - root - INFO - lr: 3.7793e-05 gnorm: 1.09 [ 8:42:52<15:48:57] +[titan] 2025-10-05 07:17:12,357 - root - INFO - step: 14215 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0105 +[titan] 2025-10-05 07:17:12,357 - root - INFO - lr: 3.7785e-05 gnorm: 1.09 [ 8:43:02<15:48:46] +[titan] 2025-10-05 07:17:23,224 - root - INFO - step: 14220 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2673 global_avg_mtp_loss: 2.0228 +[titan] 2025-10-05 07:17:23,224 - root - INFO - lr: 3.7777e-05 gnorm: 1.12 [ 8:43:13<15:48:35] +[titan] 2025-10-05 07:17:34,101 - root - INFO - step: 14225 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9842 +[titan] 2025-10-05 07:17:34,101 - root - INFO - lr: 3.7769e-05 gnorm: 1.11 [ 8:43:24<15:48:23] +[titan] 2025-10-05 07:17:44,966 - root - INFO - step: 14230 loss: 2.2228 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9631 +[titan] 2025-10-05 07:17:44,966 - root - INFO - lr: 3.7761e-05 gnorm: 1.06 [ 8:43:35<15:48:12] +[titan] 2025-10-05 07:17:55,865 - root - INFO - step: 14235 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2642 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:17:55,865 - root - INFO - lr: 3.7753e-05 gnorm: 1.15 [ 8:43:46<15:48:01] +[titan] 2025-10-05 07:18:06,742 - root - INFO - step: 14240 loss: 2.2274 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9683 +[titan] 2025-10-05 07:18:06,742 - root - INFO - lr: 3.7746e-05 gnorm: 1.07 [ 8:43:57<15:47:49] +[titan] 2025-10-05 07:18:17,634 - root - INFO - step: 14245 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2697 global_avg_mtp_loss: 2.0317 +[titan] 2025-10-05 07:18:17,634 - root - INFO - lr: 3.7738e-05 gnorm: 1.09 [ 8:44:08<15:47:38] +[titan] 2025-10-05 07:18:26,321 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:18:28,505 - root - INFO - step: 14250 loss: 2.3269 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0562 +[titan] 2025-10-05 07:18:28,505 - root - INFO - lr: 3.7730e-05 gnorm: 1.10 [ 8:44:19<15:47:27] +[titan] 2025-10-05 07:18:39,411 - root - INFO - step: 14255 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 07:18:39,412 - root - INFO - lr: 3.7722e-05 gnorm: 1.06 [ 8:44:29<15:47:15] +[titan] 2025-10-05 07:18:50,297 - root - INFO - step: 14260 loss: 2.3010 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0327 +[titan] 2025-10-05 07:18:50,297 - root - INFO - lr: 3.7714e-05 gnorm: 1.07 [ 8:44:40<15:47:04] +[titan] 2025-10-05 07:19:01,201 - root - INFO - step: 14265 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2669 global_avg_mtp_loss: 2.0104 +[titan] 2025-10-05 07:19:01,201 - root - INFO - lr: 3.7706e-05 gnorm: 1.10 [ 8:44:51<15:46:53] +[titan] 2025-10-05 07:19:12,083 - root - INFO - step: 14270 loss: 2.2667 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0022 +[titan] 2025-10-05 07:19:12,084 - root - INFO - lr: 3.7698e-05 gnorm: 1.08 [ 8:45:02<15:46:41] +[titan] 2025-10-05 07:19:22,993 - root - INFO - step: 14275 loss: 2.1944 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9385 +[titan] 2025-10-05 07:19:22,993 - root - INFO - lr: 3.7690e-05 gnorm: 1.09 [ 8:45:13<15:46:30] +[titan] 2025-10-05 07:19:33,913 - root - INFO - step: 14280 loss: 2.2467 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9851 +[titan] 2025-10-05 07:19:33,913 - root - INFO - lr: 3.7682e-05 gnorm: 1.07 [ 8:45:24<15:46:19] +[titan] 2025-10-05 07:19:44,768 - root - INFO - step: 14285 loss: 2.2223 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9628 +[titan] 2025-10-05 07:19:44,768 - root - INFO - lr: 3.7674e-05 gnorm: 1.09 [ 8:45:35<15:46:08] +[titan] 2025-10-05 07:19:55,630 - root - INFO - step: 14290 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0376 +[titan] 2025-10-05 07:19:55,630 - root - INFO - lr: 3.7666e-05 gnorm: 1.10 [ 8:45:46<15:45:56] +[titan] 2025-10-05 07:20:06,491 - root - INFO - step: 14295 loss: 2.2948 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0274 +[titan] 2025-10-05 07:20:06,491 - root - INFO - lr: 3.7658e-05 gnorm: 1.11 [ 8:45:57<15:45:45] +[titan] 2025-10-05 07:20:15,199 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:20:17,375 - root - INFO - step: 14300 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 07:20:17,376 - root - INFO - lr: 3.7650e-05 gnorm: 1.15 [ 8:46:07<15:45:33] +[titan] 2025-10-05 07:20:28,246 - root - INFO - step: 14305 loss: 2.3333 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2725 global_avg_mtp_loss: 2.0609 +[titan] 2025-10-05 07:20:28,246 - root - INFO - lr: 3.7642e-05 gnorm: 1.12 [ 8:46:18<15:45:22] +[titan] 2025-10-05 07:20:39,160 - root - INFO - step: 14310 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2692 global_avg_mtp_loss: 2.0267 +[titan] 2025-10-05 07:20:39,160 - root - INFO - lr: 3.7634e-05 gnorm: 1.14 [ 8:46:29<15:45:11] +[titan] 2025-10-05 07:20:50,006 - root - INFO - step: 14315 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 07:20:50,006 - root - INFO - lr: 3.7626e-05 gnorm: 1.07 [ 8:46:40<15:45:00] +[titan] 2025-10-05 07:21:00,866 - root - INFO - step: 14320 loss: 2.2698 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0054 +[titan] 2025-10-05 07:21:00,866 - root - INFO - lr: 3.7618e-05 gnorm: 1.09 [ 8:46:51<15:44:48] +[titan] 2025-10-05 07:21:11,703 - root - INFO - step: 14325 loss: 2.2740 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:21:11,703 - root - INFO - lr: 3.7610e-05 gnorm: 1.09 [ 8:47:02<15:44:37] +[titan] 2025-10-05 07:21:22,593 - root - INFO - step: 14330 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2688 global_avg_mtp_loss: 2.0330 +[titan] 2025-10-05 07:21:22,594 - root - INFO - lr: 3.7602e-05 gnorm: 1.14 [ 8:47:13<15:44:25] +[titan] 2025-10-05 07:21:33,559 - root - INFO - step: 14335 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 07:21:33,560 - root - INFO - lr: 3.7594e-05 gnorm: 1.09 [ 8:47:24<15:44:14] +[titan] 2025-10-05 07:21:35,951 - root - INFO - Dumping profiler traces at step 14336 +[titan] 2025-10-05 07:21:35,988 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:21:44,698 - root - INFO - step: 14340 loss: 2.3096 memory: 118.84GiB(85.28%) tps: 29,418 tflops: 408.13 mfu: 41.27% global_avg_ntp_loss: 0.2695 global_avg_mtp_loss: 2.0402 +[titan] 2025-10-05 07:21:44,699 - root - INFO - lr: 3.7586e-05 gnorm: 1.13 [ 8:47:35<15:44:03] +[titan] 2025-10-05 07:21:55,565 - root - INFO - step: 14345 loss: 2.3329 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2724 global_avg_mtp_loss: 2.0604 +[titan] 2025-10-05 07:21:55,565 - root - INFO - lr: 3.7578e-05 gnorm: 1.11 [ 8:47:46<15:43:52] +[titan] 2025-10-05 07:22:04,241 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:22:06,418 - root - INFO - step: 14350 loss: 2.2380 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9769 +[titan] 2025-10-05 07:22:06,419 - root - INFO - lr: 3.7570e-05 gnorm: 1.07 [ 8:47:56<15:43:41] +[titan] 2025-10-05 07:22:17,273 - root - INFO - step: 14355 loss: 2.2325 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 07:22:17,273 - root - INFO - lr: 3.7562e-05 gnorm: 1.12 [ 8:48:07<15:43:29] +[titan] 2025-10-05 07:22:28,142 - root - INFO - step: 14360 loss: 2.3425 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2726 global_avg_mtp_loss: 2.0700 +[titan] 2025-10-05 07:22:28,143 - root - INFO - lr: 3.7554e-05 gnorm: 1.12 [ 8:48:18<15:43:18] +[titan] 2025-10-05 07:22:39,138 - root - INFO - step: 14365 loss: 2.2707 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.44 mfu: 41.80% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0059 +[titan] 2025-10-05 07:22:39,139 - root - INFO - lr: 3.7546e-05 gnorm: 1.08 [ 8:48:29<15:43:07] +[titan] 2025-10-05 07:22:50,009 - root - INFO - step: 14370 loss: 2.2987 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0312 +[titan] 2025-10-05 07:22:50,009 - root - INFO - lr: 3.7538e-05 gnorm: 1.13 [ 8:48:40<15:42:56] +[titan] 2025-10-05 07:23:00,863 - root - INFO - step: 14375 loss: 2.2114 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 07:23:00,864 - root - INFO - lr: 3.7530e-05 gnorm: 1.09 [ 8:48:51<15:42:44] +[titan] 2025-10-05 07:23:11,714 - root - INFO - step: 14380 loss: 2.2991 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0304 +[titan] 2025-10-05 07:23:11,714 - root - INFO - lr: 3.7522e-05 gnorm: 1.11 [ 8:49:02<15:42:33] +[titan] 2025-10-05 07:23:22,597 - root - INFO - step: 14385 loss: 2.3245 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2707 global_avg_mtp_loss: 2.0537 +[titan] 2025-10-05 07:23:22,597 - root - INFO - lr: 3.7514e-05 gnorm: 1.07 [ 8:49:13<15:42:22] +[titan] 2025-10-05 07:23:33,453 - root - INFO - step: 14390 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:23:33,453 - root - INFO - lr: 3.7506e-05 gnorm: 1.10 [ 8:49:24<15:42:10] +[titan] 2025-10-05 07:23:44,404 - root - INFO - step: 14395 loss: 2.3155 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2712 global_avg_mtp_loss: 2.0444 +[titan] 2025-10-05 07:23:44,404 - root - INFO - lr: 3.7498e-05 gnorm: 1.07 [ 8:49:34<15:41:59] +[titan] 2025-10-05 07:23:53,088 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:23:55,271 - root - INFO - step: 14400 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:23:55,271 - root - INFO - lr: 3.7490e-05 gnorm: 1.09 [ 8:49:45<15:41:48] +[titan] 2025-10-05 07:24:06,109 - root - INFO - step: 14405 loss: 2.3174 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2708 global_avg_mtp_loss: 2.0466 +[titan] 2025-10-05 07:24:06,109 - root - INFO - lr: 3.7482e-05 gnorm: 1.11 [ 8:49:56<15:41:36] +[titan] 2025-10-05 07:24:16,949 - root - INFO - step: 14410 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0222 +[titan] 2025-10-05 07:24:16,949 - root - INFO - lr: 3.7474e-05 gnorm: 1.22 [ 8:50:07<15:41:25] +[titan] 2025-10-05 07:24:27,813 - root - INFO - step: 14415 loss: 2.2533 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9898 +[titan] 2025-10-05 07:24:27,813 - root - INFO - lr: 3.7466e-05 gnorm: 1.06 [ 8:50:18<15:41:14] +[titan] 2025-10-05 07:24:38,740 - root - INFO - step: 14420 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9292 +[titan] 2025-10-05 07:24:38,740 - root - INFO - lr: 3.7458e-05 gnorm: 1.07 [ 8:50:29<15:41:02] +[titan] 2025-10-05 07:24:49,616 - root - INFO - step: 14425 loss: 2.2439 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 07:24:49,616 - root - INFO - lr: 3.7450e-05 gnorm: 1.09 [ 8:50:40<15:40:51] +[titan] 2025-10-05 07:25:00,495 - root - INFO - step: 14430 loss: 2.3345 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2727 global_avg_mtp_loss: 2.0617 +[titan] 2025-10-05 07:25:00,495 - root - INFO - lr: 3.7442e-05 gnorm: 1.10 [ 8:50:51<15:40:40] +[titan] 2025-10-05 07:25:11,357 - root - INFO - step: 14435 loss: 2.2516 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9889 +[titan] 2025-10-05 07:25:11,357 - root - INFO - lr: 3.7434e-05 gnorm: 1.10 [ 8:51:01<15:40:28] +[titan] 2025-10-05 07:25:22,214 - root - INFO - step: 14440 loss: 2.2632 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9986 +[titan] 2025-10-05 07:25:22,214 - root - INFO - lr: 3.7426e-05 gnorm: 1.10 [ 8:51:12<15:40:17] +[titan] 2025-10-05 07:25:33,076 - root - INFO - step: 14445 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9853 +[titan] 2025-10-05 07:25:33,076 - root - INFO - lr: 3.7418e-05 gnorm: 1.09 [ 8:51:23<15:40:06] +[titan] 2025-10-05 07:25:41,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:25:44,001 - root - INFO - step: 14450 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9761 +[titan] 2025-10-05 07:25:44,001 - root - INFO - lr: 3.7410e-05 gnorm: 1.07 [ 8:51:34<15:39:54] +[titan] 2025-10-05 07:25:54,893 - root - INFO - step: 14455 loss: 2.2554 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9922 +[titan] 2025-10-05 07:25:54,893 - root - INFO - lr: 3.7402e-05 gnorm: 1.13 [ 8:51:45<15:39:43] +[titan] 2025-10-05 07:26:05,774 - root - INFO - step: 14460 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9572 +[titan] 2025-10-05 07:26:05,774 - root - INFO - lr: 3.7394e-05 gnorm: 1.08 [ 8:51:56<15:39:32] +[titan] 2025-10-05 07:26:16,651 - root - INFO - step: 14465 loss: 2.2723 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 07:26:16,651 - root - INFO - lr: 3.7386e-05 gnorm: 1.11 [ 8:52:07<15:39:20] +[titan] 2025-10-05 07:26:27,521 - root - INFO - step: 14470 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 07:26:27,521 - root - INFO - lr: 3.7378e-05 gnorm: 1.08 [ 8:52:18<15:39:09] +[titan] 2025-10-05 07:26:38,394 - root - INFO - step: 14475 loss: 2.2013 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 07:26:38,395 - root - INFO - lr: 3.7370e-05 gnorm: 1.08 [ 8:52:28<15:38:58] +[titan] 2025-10-05 07:26:49,332 - root - INFO - step: 14480 loss: 2.2812 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0154 +[titan] 2025-10-05 07:26:49,332 - root - INFO - lr: 3.7362e-05 gnorm: 1.12 [ 8:52:39<15:38:47] +[titan] 2025-10-05 07:27:00,212 - root - INFO - step: 14485 loss: 2.2411 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 07:27:00,213 - root - INFO - lr: 3.7354e-05 gnorm: 1.05 [ 8:52:50<15:38:35] +[titan] 2025-10-05 07:27:11,129 - root - INFO - step: 14490 loss: 2.2405 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.45 mfu: 42.11% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9788 +[titan] 2025-10-05 07:27:11,129 - root - INFO - lr: 3.7346e-05 gnorm: 1.09 [ 8:53:01<15:38:24] +[titan] 2025-10-05 07:27:22,004 - root - INFO - step: 14495 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 07:27:22,004 - root - INFO - lr: 3.7338e-05 gnorm: 1.09 [ 8:53:12<15:38:13] +[titan] 2025-10-05 07:27:30,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:27:32,894 - root - INFO - step: 14500 loss: 2.2764 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 07:27:32,894 - root - INFO - lr: 3.7330e-05 gnorm: 1.10 [ 8:53:23<15:38:01] +[titan] 2025-10-05 07:27:43,812 - root - INFO - step: 14505 loss: 2.3090 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0409 +[titan] 2025-10-05 07:27:43,812 - root - INFO - lr: 3.7322e-05 gnorm: 1.06 [ 8:53:34<15:37:50] +[titan] 2025-10-05 07:27:54,688 - root - INFO - step: 14510 loss: 2.2546 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9921 +[titan] 2025-10-05 07:27:54,688 - root - INFO - lr: 3.7314e-05 gnorm: 1.08 [ 8:53:45<15:37:39] +[titan] 2025-10-05 07:28:05,542 - root - INFO - step: 14515 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0245 +[titan] 2025-10-05 07:28:05,542 - root - INFO - lr: 3.7306e-05 gnorm: 1.05 [35m[ 8:53:56<15:37:27] +[titan] 2025-10-05 07:28:16,397 - root - INFO - step: 14520 loss: 2.2628 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:28:16,398 - root - INFO - lr: 3.7298e-05 gnorm: 1.08 [ 8:54:06<15:37:16] +[titan] 2025-10-05 07:28:27,301 - root - INFO - step: 14525 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9512 +[titan] 2025-10-05 07:28:27,301 - root - INFO - lr: 3.7290e-05 gnorm: 1.05 [ 8:54:17<15:37:05] +[titan] 2025-10-05 07:28:38,153 - root - INFO - step: 14530 loss: 2.3014 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2689 global_avg_mtp_loss: 2.0325 +[titan] 2025-10-05 07:28:38,153 - root - INFO - lr: 3.7282e-05 gnorm: 1.12 [ 8:54:28<15:36:54] +[titan] 2025-10-05 07:28:49,042 - root - INFO - step: 14535 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 07:28:49,042 - root - INFO - lr: 3.7274e-05 gnorm: 1.10 [ 8:54:39<15:36:42] +[titan] 2025-10-05 07:28:59,909 - root - INFO - step: 14540 loss: 2.2631 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9992 +[titan] 2025-10-05 07:28:59,910 - root - INFO - lr: 3.7266e-05 gnorm: 1.09 [ 8:54:50<15:36:31] +[titan] 2025-10-05 07:29:10,771 - root - INFO - step: 14545 loss: 2.2017 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9458 +[titan] 2025-10-05 07:29:10,771 - root - INFO - lr: 3.7258e-05 gnorm: 1.10 [ 8:55:01<15:36:20] +[titan] 2025-10-05 07:29:19,437 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:29:21,621 - root - INFO - step: 14550 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 07:29:21,621 - root - INFO - lr: 3.7250e-05 gnorm: 1.09 [ 8:55:12<15:36:08] +[titan] 2025-10-05 07:29:32,531 - root - INFO - step: 14555 loss: 2.2901 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0226 +[titan] 2025-10-05 07:29:32,531 - root - INFO - lr: 3.7242e-05 gnorm: 1.14 [ 8:55:23<15:35:57] +[titan] 2025-10-05 07:29:43,472 - root - INFO - step: 14560 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8975 +[titan] 2025-10-05 07:29:43,472 - root - INFO - lr: 3.7234e-05 gnorm: 1.02 [ 8:55:34<15:35:46] +[titan] 2025-10-05 07:29:54,345 - root - INFO - step: 14565 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0248 +[titan] 2025-10-05 07:29:54,346 - root - INFO - lr: 3.7226e-05 gnorm: 1.13 [ 8:55:44<15:35:34] +[titan] 2025-10-05 07:30:05,208 - root - INFO - step: 14570 loss: 2.3031 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0344 +[titan] 2025-10-05 07:30:05,209 - root - INFO - lr: 3.7218e-05 gnorm: 1.12 [ 8:55:55<15:35:23] +[titan] 2025-10-05 07:30:16,066 - root - INFO - step: 14575 loss: 2.2367 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 07:30:16,066 - root - INFO - lr: 3.7210e-05 gnorm: 1.10 [ 8:56:06<15:35:12] +[titan] 2025-10-05 07:30:26,932 - root - INFO - step: 14580 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 07:30:26,933 - root - INFO - lr: 3.7202e-05 gnorm: 1.07 [ 8:56:17<15:35:00] +[titan] 2025-10-05 07:30:37,811 - root - INFO - step: 14585 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9566 +[titan] 2025-10-05 07:30:37,812 - root - INFO - lr: 3.7194e-05 gnorm: 1.08 [ 8:56:28<15:34:49] +[titan] 2025-10-05 07:30:48,772 - root - INFO - step: 14590 loss: 2.3418 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.78 mfu: 41.94% global_avg_ntp_loss: 0.2723 global_avg_mtp_loss: 2.0695 +[titan] 2025-10-05 07:30:48,772 - root - INFO - lr: 3.7185e-05 gnorm: 1.20 [ 8:56:39<15:34:38] +[titan] 2025-10-05 07:30:59,630 - root - INFO - step: 14595 loss: 2.2116 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 07:30:59,630 - root - INFO - lr: 3.7177e-05 gnorm: 1.09 [ 8:56:50<15:34:27] +[titan] 2025-10-05 07:31:08,301 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:31:10,494 - root - INFO - step: 14600 loss: 2.1772 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 07:31:10,494 - root - INFO - lr: 3.7169e-05 gnorm: 1.08 [ 8:57:01<15:34:15] +[titan] 2025-10-05 07:31:21,365 - root - INFO - step: 14605 loss: 2.3083 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2705 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:31:21,365 - root - INFO - lr: 3.7161e-05 gnorm: 1.09 [ 8:57:11<15:34:04] +[titan] 2025-10-05 07:31:32,251 - root - INFO - step: 14610 loss: 2.3039 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0360 +[titan] 2025-10-05 07:31:32,251 - root - INFO - lr: 3.7153e-05 gnorm: 1.15 [ 8:57:22<15:33:53] +[titan] 2025-10-05 07:31:43,124 - root - INFO - step: 14615 loss: 2.2982 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0301 +[titan] 2025-10-05 07:31:43,125 - root - INFO - lr: 3.7145e-05 gnorm: 1.12 [ 8:57:33<15:33:41] +[titan] 2025-10-05 07:31:54,094 - root - INFO - step: 14620 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9332 +[titan] 2025-10-05 07:31:54,094 - root - INFO - lr: 3.7137e-05 gnorm: 1.09 [ 8:57:44<15:33:30] +[titan] 2025-10-05 07:32:04,989 - root - INFO - step: 14625 loss: 2.2391 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:32:04,989 - root - INFO - lr: 3.7129e-05 gnorm: 1.10 [ 8:57:55<15:33:19] +[titan] 2025-10-05 07:32:15,888 - root - INFO - step: 14630 loss: 2.3113 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0419 +[titan] 2025-10-05 07:32:15,888 - root - INFO - lr: 3.7121e-05 gnorm: 1.10 [ 8:58:06<15:33:08] +[titan] 2025-10-05 07:32:26,771 - root - INFO - step: 14635 loss: 2.2726 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0075 +[titan] 2025-10-05 07:32:26,771 - root - INFO - lr: 3.7113e-05 gnorm: 1.12 [ 8:58:17<15:32:56] +[titan] 2025-10-05 07:32:37,649 - root - INFO - step: 14640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 07:32:37,649 - root - INFO - lr: 3.7105e-05 gnorm: 1.08 [ 8:58:28<15:32:45] +[titan] 2025-10-05 07:32:48,613 - root - INFO - step: 14645 loss: 2.1989 memory: 118.84GiB(85.28%) tps: 29,888 tflops: 414.65 mfu: 41.93% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9422 +[titan] 2025-10-05 07:32:48,613 - root - INFO - lr: 3.7097e-05 gnorm: 1.05 [ 8:58:39<15:32:34] +[titan] 2025-10-05 07:32:57,350 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:32:59,535 - root - INFO - step: 14650 loss: 2.3040 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0349 +[titan] 2025-10-05 07:32:59,535 - root - INFO - lr: 3.7089e-05 gnorm: 1.06 [ 8:58:50<15:32:23] +[titan] 2025-10-05 07:33:10,438 - root - INFO - step: 14655 loss: 2.2889 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0209 +[titan] 2025-10-05 07:33:10,439 - root - INFO - lr: 3.7081e-05 gnorm: 1.13 [ 8:59:00<15:32:11] +[titan] 2025-10-05 07:33:21,347 - root - INFO - step: 14660 loss: 2.2514 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:33:21,348 - root - INFO - lr: 3.7073e-05 gnorm: 1.12 [ 8:59:11<15:32:00] +[titan] 2025-10-05 07:33:32,227 - root - INFO - step: 14665 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 07:33:32,227 - root - INFO - lr: 3.7064e-05 gnorm: 1.12 [ 8:59:22<15:31:49] +[titan] 2025-10-05 07:33:43,130 - root - INFO - step: 14670 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9343 +[titan] 2025-10-05 07:33:43,130 - root - INFO - lr: 3.7056e-05 gnorm: 1.13 [ 8:59:33<15:31:38] +[titan] 2025-10-05 07:33:54,090 - root - INFO - step: 14675 loss: 2.2801 memory: 118.84GiB(85.28%) tps: 29,898 tflops: 414.79 mfu: 41.94% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0146 +[titan] 2025-10-05 07:33:54,091 - root - INFO - lr: 3.7048e-05 gnorm: 1.08 [ 8:59:44<15:31:26] +[titan] 2025-10-05 07:34:04,932 - root - INFO - step: 14680 loss: 2.1187 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 07:34:04,932 - root - INFO - lr: 3.7040e-05 gnorm: 1.06 [ 8:59:55<15:31:15] +[titan] 2025-10-05 07:34:15,806 - root - INFO - step: 14685 loss: 2.2578 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9938 +[titan] 2025-10-05 07:34:15,806 - root - INFO - lr: 3.7032e-05 gnorm: 1.10 [ 9:00:06<15:31:04] +[titan] 2025-10-05 07:34:26,671 - root - INFO - step: 14690 loss: 2.2095 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9509 +[titan] 2025-10-05 07:34:26,671 - root - INFO - lr: 3.7024e-05 gnorm: 1.06 [ 9:00:17<15:30:52] +[titan] 2025-10-05 07:34:37,510 - root - INFO - step: 14695 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 07:34:37,510 - root - INFO - lr: 3.7016e-05 gnorm: 1.06 [ 9:00:28<15:30:41] +[titan] 2025-10-05 07:34:46,191 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:34:48,450 - root - INFO - step: 14700 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9405 +[titan] 2025-10-05 07:34:48,450 - root - INFO - lr: 3.7008e-05 gnorm: 1.10 [ 9:00:38<15:30:30] +[titan] 2025-10-05 07:34:59,300 - root - INFO - step: 14705 loss: 2.2911 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0240 +[titan] 2025-10-05 07:34:59,300 - root - INFO - lr: 3.7000e-05 gnorm: 1.14 [ 9:00:49<15:30:19] +[titan] 2025-10-05 07:35:10,167 - root - INFO - step: 14710 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0017 +[titan] 2025-10-05 07:35:10,168 - root - INFO - lr: 3.6992e-05 gnorm: 1.09 [ 9:01:00<15:30:07] +[titan] 2025-10-05 07:35:21,048 - root - INFO - step: 14715 loss: 2.2678 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0027 +[titan] 2025-10-05 07:35:21,048 - root - INFO - lr: 3.6984e-05 gnorm: 1.13 [ 9:01:11<15:29:56] +[titan] 2025-10-05 07:35:31,930 - root - INFO - step: 14720 loss: 2.2273 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9675 +[titan] 2025-10-05 07:35:31,930 - root - INFO - lr: 3.6976e-05 gnorm: 1.08 [ 9:01:22<15:29:45] +[titan] 2025-10-05 07:35:42,810 - root - INFO - step: 14725 loss: 2.3179 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0463 +[titan] 2025-10-05 07:35:42,810 - root - INFO - lr: 3.6967e-05 gnorm: 1.12 [ 9:01:33<15:29:33] +[titan] 2025-10-05 07:35:53,724 - root - INFO - step: 14730 loss: 2.2620 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9989 +[titan] 2025-10-05 07:35:53,724 - root - INFO - lr: 3.6959e-05 gnorm: 1.11 [ 9:01:44<15:29:22] +[titan] 2025-10-05 07:36:04,629 - root - INFO - step: 14735 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 07:36:04,629 - root - INFO - lr: 3.6951e-05 gnorm: 1.06 [ 9:01:55<15:29:11] +[titan] 2025-10-05 07:36:15,522 - root - INFO - step: 14740 loss: 2.2768 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 07:36:15,522 - root - INFO - lr: 3.6943e-05 gnorm: 1.09 [ 9:02:06<15:29:00] +[titan] 2025-10-05 07:36:26,431 - root - INFO - step: 14745 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 07:36:26,431 - root - INFO - lr: 3.6935e-05 gnorm: 1.07 [ 9:02:16<15:28:48] +[titan] 2025-10-05 07:36:35,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:36:37,321 - root - INFO - step: 14750 loss: 2.3442 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2729 global_avg_mtp_loss: 2.0712 +[titan] 2025-10-05 07:36:37,321 - root - INFO - lr: 3.6927e-05 gnorm: 1.10 [ 9:02:27<15:28:37] +[titan] 2025-10-05 07:36:48,227 - root - INFO - step: 14755 loss: 2.2186 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:36:48,227 - root - INFO - lr: 3.6919e-05 gnorm: 1.04 [ 9:02:38<15:28:26] +[titan] 2025-10-05 07:36:59,096 - root - INFO - step: 14760 loss: 2.2696 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0036 +[titan] 2025-10-05 07:36:59,096 - root - INFO - lr: 3.6911e-05 gnorm: 1.08 [ 9:02:49<15:28:14] +[titan] 2025-10-05 07:37:09,945 - root - INFO - step: 14765 loss: 2.2510 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9893 +[titan] 2025-10-05 07:37:09,945 - root - INFO - lr: 3.6903e-05 gnorm: 1.13 [ 9:03:00<15:28:03] +[titan] 2025-10-05 07:37:20,822 - root - INFO - step: 14770 loss: 2.2169 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 07:37:20,822 - root - INFO - lr: 3.6894e-05 gnorm: 1.08 [ 9:03:11<15:27:52] +[titan] 2025-10-05 07:37:31,692 - root - INFO - step: 14775 loss: 2.2524 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 07:37:31,692 - root - INFO - lr: 3.6886e-05 gnorm: 1.10 [ 9:03:22<15:27:40] +[titan] 2025-10-05 07:37:42,588 - root - INFO - step: 14780 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 07:37:42,588 - root - INFO - lr: 3.6878e-05 gnorm: 1.12 [ 9:03:33<15:27:29] +[titan] 2025-10-05 07:37:53,516 - root - INFO - step: 14785 loss: 2.1691 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9163 +[titan] 2025-10-05 07:37:53,516 - root - INFO - lr: 3.6870e-05 gnorm: 1.06 [ 9:03:44<15:27:18] +[titan] 2025-10-05 07:38:04,385 - root - INFO - step: 14790 loss: 2.1764 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 07:38:04,386 - root - INFO - lr: 3.6862e-05 gnorm: 1.05 [ 9:03:54<15:27:07] +[titan] 2025-10-05 07:38:15,271 - root - INFO - step: 14795 loss: 2.2615 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9988 +[titan] 2025-10-05 07:38:15,271 - root - INFO - lr: 3.6854e-05 gnorm: 1.11 [ 9:04:05<15:26:55] +[titan] 2025-10-05 07:38:23,946 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:38:26,148 - root - INFO - step: 14800 loss: 2.2171 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 07:38:26,148 - root - INFO - lr: 3.6846e-05 gnorm: 1.11 [ 9:04:16<15:26:44] +[titan] 2025-10-05 07:38:37,018 - root - INFO - step: 14805 loss: 2.2978 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2686 global_avg_mtp_loss: 2.0292 +[titan] 2025-10-05 07:38:37,018 - root - INFO - lr: 3.6838e-05 gnorm: 1.12 [ 9:04:27<15:26:33] +[titan] 2025-10-05 07:38:47,933 - root - INFO - step: 14810 loss: 2.2569 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 07:38:47,933 - root - INFO - lr: 3.6830e-05 gnorm: 1.11 [ 9:04:38<15:26:22] +[titan] 2025-10-05 07:38:58,873 - root - INFO - step: 14815 loss: 2.2872 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0204 +[titan] 2025-10-05 07:38:58,873 - root - INFO - lr: 3.6821e-05 gnorm: 1.08 [ 9:04:49<15:26:10] +[titan] 2025-10-05 07:39:09,749 - root - INFO - step: 14820 loss: 2.2863 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 07:39:09,749 - root - INFO - lr: 3.6813e-05 gnorm: 1.08 [ 9:05:00<15:25:59] +[titan] 2025-10-05 07:39:20,633 - root - INFO - step: 14825 loss: 2.3248 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2716 global_avg_mtp_loss: 2.0533 +[titan] 2025-10-05 07:39:20,633 - root - INFO - lr: 3.6805e-05 gnorm: 1.06 [ 9:05:11<15:25:48] +[titan] 2025-10-05 07:39:31,524 - root - INFO - step: 14830 loss: 2.2650 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 1.9993 +[titan] 2025-10-05 07:39:31,525 - root - INFO - lr: 3.6797e-05 gnorm: 1.05 [ 9:05:22<15:25:37] +[titan] 2025-10-05 07:39:42,407 - root - INFO - step: 14835 loss: 2.3103 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2693 global_avg_mtp_loss: 2.0410 +[titan] 2025-10-05 07:39:42,408 - root - INFO - lr: 3.6789e-05 gnorm: 1.09 [ 9:05:32<15:25:25] +[titan] 2025-10-05 07:39:53,316 - root - INFO - step: 14840 loss: 2.2518 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:39:53,316 - root - INFO - lr: 3.6781e-05 gnorm: 1.06 [ 9:05:43<15:25:14] +[titan] 2025-10-05 07:40:04,271 - root - INFO - step: 14845 loss: 2.2304 memory: 118.84GiB(85.28%) tps: 29,911 tflops: 414.97 mfu: 41.96% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9723 +[titan] 2025-10-05 07:40:04,271 - root - INFO - lr: 3.6773e-05 gnorm: 1.12 [ 9:05:54<15:25:03] +[titan] 2025-10-05 07:40:10,994 - root - INFO - Dumping profiler traces at step 14848 +[titan] 2025-10-05 07:40:11,033 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 07:40:13,209 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:40:15,391 - root - INFO - step: 14850 loss: 2.2804 memory: 118.84GiB(85.28%) tps: 29,468 tflops: 408.82 mfu: 41.34% global_avg_ntp_loss: 0.2670 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:40:15,392 - root - INFO - lr: 3.6765e-05 gnorm: 1.08 [ 9:06:05<15:24:52] +[titan] 2025-10-05 07:40:26,262 - root - INFO - step: 14855 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 07:40:26,262 - root - INFO - lr: 3.6756e-05 gnorm: 1.09 [ 9:06:16<15:24:41] +[titan] 2025-10-05 07:40:37,129 - root - INFO - step: 14860 loss: 2.2444 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 07:40:37,129 - root - INFO - lr: 3.6748e-05 gnorm: 1.08 [ 9:06:27<15:24:29] +[titan] 2025-10-05 07:40:47,995 - root - INFO - step: 14865 loss: 2.2759 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0098 +[titan] 2025-10-05 07:40:47,995 - root - INFO - lr: 3.6740e-05 gnorm: 1.10 [ 9:06:38<15:24:18] +[titan] 2025-10-05 07:40:58,905 - root - INFO - step: 14870 loss: 2.3070 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0374 +[titan] 2025-10-05 07:40:58,905 - root - INFO - lr: 3.6732e-05 gnorm: 1.10 [ 9:06:49<15:24:07] +[titan] 2025-10-05 07:41:09,784 - root - INFO - step: 14875 loss: 2.2761 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 2.0121 +[titan] 2025-10-05 07:41:09,784 - root - INFO - lr: 3.6724e-05 gnorm: 1.10 [ 9:07:00<15:23:55] +[titan] 2025-10-05 07:41:20,683 - root - INFO - step: 14880 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 07:41:20,683 - root - INFO - lr: 3.6716e-05 gnorm: 1.08 [ 9:07:11<15:23:44] +[titan] 2025-10-05 07:41:31,553 - root - INFO - step: 14885 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 07:41:31,553 - root - INFO - lr: 3.6708e-05 gnorm: 1.05 [ 9:07:22<15:23:33] +[titan] 2025-10-05 07:41:42,413 - root - INFO - step: 14890 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0082 +[titan] 2025-10-05 07:41:42,413 - root - INFO - lr: 3.6699e-05 gnorm: 1.15 [ 9:07:32<15:23:22] +[titan] 2025-10-05 07:41:53,308 - root - INFO - step: 14895 loss: 2.2418 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9792 +[titan] 2025-10-05 07:41:53,308 - root - INFO - lr: 3.6691e-05 gnorm: 1.07 [ 9:07:43<15:23:10] +[titan] 2025-10-05 07:42:01,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:42:04,160 - root - INFO - step: 14900 loss: 2.2908 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0236 +[titan] 2025-10-05 07:42:04,160 - root - INFO - lr: 3.6683e-05 gnorm: 1.13 [ 9:07:54<15:22:59] +[titan] 2025-10-05 07:42:15,031 - root - INFO - step: 14905 loss: 2.3078 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2698 global_avg_mtp_loss: 2.0380 +[titan] 2025-10-05 07:42:15,031 - root - INFO - lr: 3.6675e-05 gnorm: 1.12 [ 9:08:05<15:22:48] +[titan] 2025-10-05 07:42:25,895 - root - INFO - step: 14910 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9881 +[titan] 2025-10-05 07:42:25,895 - root - INFO - lr: 3.6667e-05 gnorm: 1.09 [ 9:08:16<15:22:36] +[titan] 2025-10-05 07:42:36,754 - root - INFO - step: 14915 loss: 2.2480 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 07:42:36,754 - root - INFO - lr: 3.6659e-05 gnorm: 1.09 [ 9:08:27<15:22:25] +[titan] 2025-10-05 07:42:47,621 - root - INFO - step: 14920 loss: 2.4317 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2984 global_avg_mtp_loss: 2.1332 +[titan] 2025-10-05 07:42:47,621 - root - INFO - lr: 3.6651e-05 gnorm: 1.11 [ 9:08:38<15:22:14] +[titan] 2025-10-05 07:42:58,504 - root - INFO - step: 14925 loss: 2.2167 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9576 +[titan] 2025-10-05 07:42:58,504 - root - INFO - lr: 3.6642e-05 gnorm: 1.09 [ 9:08:49<15:22:02] +[titan] 2025-10-05 07:43:09,387 - root - INFO - step: 14930 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9724 +[titan] 2025-10-05 07:43:09,388 - root - INFO - lr: 3.6634e-05 gnorm: 1.08 [ 9:08:59<15:21:51] +[titan] 2025-10-05 07:43:20,273 - root - INFO - step: 14935 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:43:20,273 - root - INFO - lr: 3.6626e-05 gnorm: 1.11 [ 9:09:10<15:21:40] +[titan] 2025-10-05 07:43:31,152 - root - INFO - step: 14940 loss: 2.2799 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0134 +[titan] 2025-10-05 07:43:31,152 - root - INFO - lr: 3.6618e-05 gnorm: 1.09 [ 9:09:21<15:21:29] +[titan] 2025-10-05 07:43:42,038 - root - INFO - step: 14945 loss: 2.2476 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 07:43:42,038 - root - INFO - lr: 3.6610e-05 gnorm: 1.04 [ 9:09:32<15:21:17] +[titan] 2025-10-05 07:43:50,721 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:43:52,910 - root - INFO - step: 14950 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9350 +[titan] 2025-10-05 07:43:52,910 - root - INFO - lr: 3.6602e-05 gnorm: 1.07 [ 9:09:43<15:21:06] +[titan] 2025-10-05 07:44:03,804 - root - INFO - step: 14955 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9563 +[titan] 2025-10-05 07:44:03,804 - root - INFO - lr: 3.6593e-05 gnorm: 1.06 [ 9:09:54<15:20:55] +[titan] 2025-10-05 07:44:14,704 - root - INFO - step: 14960 loss: 2.2966 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2676 global_avg_mtp_loss: 2.0290 +[titan] 2025-10-05 07:44:14,704 - root - INFO - lr: 3.6585e-05 gnorm: 1.08 [ 9:10:05<15:20:44] +[titan] 2025-10-05 07:44:25,575 - root - INFO - step: 14965 loss: 2.2163 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 07:44:25,576 - root - INFO - lr: 3.6577e-05 gnorm: 1.07 [ 9:10:16<15:20:32] +[titan] 2025-10-05 07:44:36,428 - root - INFO - step: 14970 loss: 2.2508 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9887 +[titan] 2025-10-05 07:44:36,428 - root - INFO - lr: 3.6569e-05 gnorm: 1.09 [ 9:10:26<15:20:21] +[titan] 2025-10-05 07:44:47,311 - root - INFO - step: 14975 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:44:47,311 - root - INFO - lr: 3.6561e-05 gnorm: 1.08 [ 9:10:37<15:20:10] +[titan] 2025-10-05 07:44:58,219 - root - INFO - step: 14980 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:44:58,219 - root - INFO - lr: 3.6553e-05 gnorm: 1.11 [ 9:10:48<15:19:58] +[titan] 2025-10-05 07:45:09,066 - root - INFO - step: 14985 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9745 +[titan] 2025-10-05 07:45:09,066 - root - INFO - lr: 3.6544e-05 gnorm: 1.07 [ 9:10:59<15:19:47] +[titan] 2025-10-05 07:45:19,933 - root - INFO - step: 14990 loss: 2.1985 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 07:45:19,934 - root - INFO - lr: 3.6536e-05 gnorm: 1.07 [ 9:11:10<15:19:36] +[titan] 2025-10-05 07:45:30,815 - root - INFO - step: 14995 loss: 2.2699 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2634 global_avg_mtp_loss: 2.0065 +[titan] 2025-10-05 07:45:30,816 - root - INFO - lr: 3.6528e-05 gnorm: 1.05 [ 9:11:21<15:19:24] +[titan] 2025-10-05 07:45:39,478 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:45:41,661 - root - INFO - step: 15000 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 07:45:41,661 - root - INFO - lr: 3.6520e-05 gnorm: 1.11 [ 9:11:32<15:19:13] +[titan] 2025-10-05 07:45:41,661 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 07:46:00,838 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 07:46:00,838 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.18 seconds. +[titan] 2025-10-05 07:48:03,855 - root - INFO - step: 15005 loss: 2.1283 memory: 118.84GiB(85.28%) tps: 2,304 tflops: 31.97 mfu: 3.23% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 07:48:03,856 - root - INFO - lr: 3.6512e-05 gnorm: 1.06 [ 9:13:54<15:22:41] +[titan] 2025-10-05 07:48:14,694 - root - INFO - step: 15010 loss: 2.2315 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 07:48:14,695 - root - INFO - lr: 3.6504e-05 gnorm: 1.12 [ 9:14:05<15:22:29] +[titan] 2025-10-05 07:48:25,510 - root - INFO - step: 15015 loss: 2.2773 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0116 +[titan] 2025-10-05 07:48:25,510 - root - INFO - lr: 3.6495e-05 gnorm: 1.09 [ 9:14:16<15:22:18] +[titan] 2025-10-05 07:48:36,334 - root - INFO - step: 15020 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.01 mfu: 42.47% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 07:48:36,334 - root - INFO - lr: 3.6487e-05 gnorm: 1.09 [ 9:14:26<15:22:06] +[titan] 2025-10-05 07:48:47,212 - root - INFO - step: 15025 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 07:48:47,212 - root - INFO - lr: 3.6479e-05 gnorm: 1.07 [ 9:14:37<15:21:55] +[titan] 2025-10-05 07:48:58,068 - root - INFO - step: 15030 loss: 2.2843 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:48:58,069 - root - INFO - lr: 3.6471e-05 gnorm: 1.12 [ 9:14:48<15:21:43] +[titan] 2025-10-05 07:49:08,916 - root - INFO - step: 15035 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 07:49:08,916 - root - INFO - lr: 3.6463e-05 gnorm: 1.06 [ 9:14:59<15:21:32] +[titan] 2025-10-05 07:49:19,759 - root - INFO - step: 15040 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9876 +[titan] 2025-10-05 07:49:19,759 - root - INFO - lr: 3.6454e-05 gnorm: 1.06 [ 9:15:10<15:21:20] +[titan] 2025-10-05 07:49:30,613 - root - INFO - step: 15045 loss: 2.2689 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0034 +[titan] 2025-10-05 07:49:30,613 - root - INFO - lr: 3.6446e-05 gnorm: 1.09 [ 9:15:21<15:21:09] +[titan] 2025-10-05 07:49:39,280 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:49:41,450 - root - INFO - step: 15050 loss: 2.2266 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 07:49:41,450 - root - INFO - lr: 3.6438e-05 gnorm: 1.08 [ 9:15:31<15:20:57] +[titan] 2025-10-05 07:49:52,326 - root - INFO - step: 15055 loss: 2.2710 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0056 +[titan] 2025-10-05 07:49:52,326 - root - INFO - lr: 3.6430e-05 gnorm: 1.10 [ 9:15:42<15:20:46] +[titan] 2025-10-05 07:50:03,204 - root - INFO - step: 15060 loss: 2.2848 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2668 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 07:50:03,204 - root - INFO - lr: 3.6422e-05 gnorm: 1.09 [ 9:15:53<15:20:35] +[titan] 2025-10-05 07:50:14,065 - root - INFO - step: 15065 loss: 2.2635 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 07:50:14,065 - root - INFO - lr: 3.6413e-05 gnorm: 1.09 [ 9:16:04<15:20:23] +[titan] 2025-10-05 07:50:24,948 - root - INFO - step: 15070 loss: 2.2568 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 07:50:24,948 - root - INFO - lr: 3.6405e-05 gnorm: 1.08 [ 9:16:15<15:20:12] +[titan] 2025-10-05 07:50:35,794 - root - INFO - step: 15075 loss: 2.2878 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0211 +[titan] 2025-10-05 07:50:35,794 - root - INFO - lr: 3.6397e-05 gnorm: 1.09 [ 9:16:26<15:20:00] +[titan] 2025-10-05 07:50:46,648 - root - INFO - step: 15080 loss: 2.2769 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0110 +[titan] 2025-10-05 07:50:46,649 - root - INFO - lr: 3.6389e-05 gnorm: 1.07 [ 9:16:37<15:19:49] +[titan] 2025-10-05 07:50:57,498 - root - INFO - step: 15085 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 07:50:57,498 - root - INFO - lr: 3.6381e-05 gnorm: 1.07 [ 9:16:47<15:19:37] +[titan] 2025-10-05 07:51:08,384 - root - INFO - step: 15090 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:51:08,385 - root - INFO - lr: 3.6373e-05 gnorm: 1.08 [ 9:16:58<15:19:26] +[titan] 2025-10-05 07:51:19,234 - root - INFO - step: 15095 loss: 2.2363 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9753 +[titan] 2025-10-05 07:51:19,234 - root - INFO - lr: 3.6364e-05 gnorm: 1.07 [ 9:17:09<15:19:15] +[titan] 2025-10-05 07:51:27,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:51:30,098 - root - INFO - step: 15100 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9785 +[titan] 2025-10-05 07:51:30,098 - root - INFO - lr: 3.6356e-05 gnorm: 1.15 [ 9:17:20<15:19:03] +[titan] 2025-10-05 07:51:40,977 - root - INFO - step: 15105 loss: 2.2586 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 1.9945 +[titan] 2025-10-05 07:51:40,977 - root - INFO - lr: 3.6348e-05 gnorm: 1.11 [ 9:17:31<15:18:52] +[titan] 2025-10-05 07:51:51,845 - root - INFO - step: 15110 loss: 2.2404 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9794 +[titan] 2025-10-05 07:51:51,845 - root - INFO - lr: 3.6340e-05 gnorm: 1.07 [ 9:17:42<15:18:40] +[titan] 2025-10-05 07:52:02,715 - root - INFO - step: 15115 loss: 2.2946 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0275 +[titan] 2025-10-05 07:52:02,715 - root - INFO - lr: 3.6331e-05 gnorm: 1.14 [ 9:17:53<15:18:29] +[titan] 2025-10-05 07:52:13,605 - root - INFO - step: 15120 loss: 2.2957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 07:52:13,605 - root - INFO - lr: 3.6323e-05 gnorm: 1.15 [ 9:18:04<15:18:18] +[titan] 2025-10-05 07:52:24,497 - root - INFO - step: 15125 loss: 2.2489 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9868 +[titan] 2025-10-05 07:52:24,498 - root - INFO - lr: 3.6315e-05 gnorm: 1.03 [ 9:18:14<15:18:06] +[titan] 2025-10-05 07:52:35,368 - root - INFO - step: 15130 loss: 2.2438 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:35,368 - root - INFO - lr: 3.6307e-05 gnorm: 1.11 [ 9:18:25<15:17:55] +[titan] 2025-10-05 07:52:46,243 - root - INFO - step: 15135 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 07:52:46,243 - root - INFO - lr: 3.6299e-05 gnorm: 1.09 [ 9:18:36<15:17:43] +[titan] 2025-10-05 07:52:57,118 - root - INFO - step: 15140 loss: 2.2420 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9812 +[titan] 2025-10-05 07:52:57,118 - root - INFO - lr: 3.6290e-05 gnorm: 1.10 [ 9:18:47<15:17:32] +[titan] 2025-10-05 07:53:07,980 - root - INFO - step: 15145 loss: 2.3012 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0319 +[titan] 2025-10-05 07:53:07,981 - root - INFO - lr: 3.6282e-05 gnorm: 1.07 [ 9:18:58<15:17:21] +[titan] 2025-10-05 07:53:16,667 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:53:18,858 - root - INFO - step: 15150 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9690 +[titan] 2025-10-05 07:53:18,858 - root - INFO - lr: 3.6274e-05 gnorm: 1.07 [ 9:19:09<15:17:09] +[titan] 2025-10-05 07:53:29,759 - root - INFO - step: 15155 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 07:53:29,759 - root - INFO - lr: 3.6266e-05 gnorm: 1.07 [ 9:19:20<15:16:58] +[titan] 2025-10-05 07:53:40,639 - root - INFO - step: 15160 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9577 +[titan] 2025-10-05 07:53:40,639 - root - INFO - lr: 3.6258e-05 gnorm: 1.11 [ 9:19:31<15:16:47] +[titan] 2025-10-05 07:53:51,512 - root - INFO - step: 15165 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 07:53:51,512 - root - INFO - lr: 3.6249e-05 gnorm: 1.10 [ 9:19:41<15:16:35] +[titan] 2025-10-05 07:54:02,384 - root - INFO - step: 15170 loss: 2.2679 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0038 +[titan] 2025-10-05 07:54:02,384 - root - INFO - lr: 3.6241e-05 gnorm: 1.11 [ 9:19:52<15:16:24] +[titan] 2025-10-05 07:54:13,267 - root - INFO - step: 15175 loss: 2.1958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 07:54:13,267 - root - INFO - lr: 3.6233e-05 gnorm: 1.08 [ 9:20:03<15:16:12] +[titan] 2025-10-05 07:54:24,133 - root - INFO - step: 15180 loss: 2.3028 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 07:54:24,133 - root - INFO - lr: 3.6225e-05 gnorm: 1.09 [ 9:20:14<15:16:01] +[titan] 2025-10-05 07:54:35,038 - root - INFO - step: 15185 loss: 2.2735 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2650 global_avg_mtp_loss: 2.0085 +[titan] 2025-10-05 07:54:35,038 - root - INFO - lr: 3.6216e-05 gnorm: 1.07 [ 9:20:25<15:15:50] +[titan] 2025-10-05 07:54:45,892 - root - INFO - step: 15190 loss: 2.2955 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2683 global_avg_mtp_loss: 2.0272 +[titan] 2025-10-05 07:54:45,892 - root - INFO - lr: 3.6208e-05 gnorm: 1.08 [ 9:20:36<15:15:38] +[titan] 2025-10-05 07:54:56,749 - root - INFO - step: 15195 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9845 +[titan] 2025-10-05 07:54:56,749 - root - INFO - lr: 3.6200e-05 gnorm: 1.08 [ 9:20:47<15:15:27] +[titan] 2025-10-05 07:55:05,429 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:55:07,614 - root - INFO - step: 15200 loss: 2.2230 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 07:55:07,615 - root - INFO - lr: 3.6192e-05 gnorm: 1.08 [ 9:20:58<15:15:15] +[titan] 2025-10-05 07:55:18,475 - root - INFO - step: 15205 loss: 2.2720 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0066 +[titan] 2025-10-05 07:55:18,475 - root - INFO - lr: 3.6184e-05 gnorm: 1.09 [ 9:21:08<15:15:04] +[titan] 2025-10-05 07:55:29,333 - root - INFO - step: 15210 loss: 2.2496 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9871 +[titan] 2025-10-05 07:55:29,333 - root - INFO - lr: 3.6175e-05 gnorm: 1.10 [ 9:21:19<15:14:53] +[titan] 2025-10-05 07:55:40,201 - root - INFO - step: 15215 loss: 2.2704 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 07:55:40,201 - root - INFO - lr: 3.6167e-05 gnorm: 1.10 [ 9:21:30<15:14:41] +[titan] 2025-10-05 07:55:51,102 - root - INFO - step: 15220 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 1.9944 +[titan] 2025-10-05 07:55:51,103 - root - INFO - lr: 3.6159e-05 gnorm: 1.09 [ 9:21:41<15:14:30] +[titan] 2025-10-05 07:56:01,979 - root - INFO - step: 15225 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9843 +[titan] 2025-10-05 07:56:01,979 - root - INFO - lr: 3.6151e-05 gnorm: 1.08 [ 9:21:52<15:14:18] +[titan] 2025-10-05 07:56:12,856 - root - INFO - step: 15230 loss: 2.3282 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2751 global_avg_mtp_loss: 2.0531 +[titan] 2025-10-05 07:56:12,856 - root - INFO - lr: 3.6142e-05 gnorm: 1.08 [ 9:22:03<15:14:07] +[titan] 2025-10-05 07:56:23,718 - root - INFO - step: 15235 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 2.0077 +[titan] 2025-10-05 07:56:23,718 - root - INFO - lr: 3.6134e-05 gnorm: 1.04 [ 9:22:14<15:13:56] +[titan] 2025-10-05 07:56:34,605 - root - INFO - step: 15240 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9751 +[titan] 2025-10-05 07:56:34,605 - root - INFO - lr: 3.6126e-05 gnorm: 1.07 [ 9:22:25<15:13:44] +[titan] 2025-10-05 07:56:45,472 - root - INFO - step: 15245 loss: 2.3360 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2738 global_avg_mtp_loss: 2.0623 +[titan] 2025-10-05 07:56:45,472 - root - INFO - lr: 3.6118e-05 gnorm: 1.05 [ 9:22:35<15:13:33] +[titan] 2025-10-05 07:56:54,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:56:56,368 - root - INFO - step: 15250 loss: 2.2490 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9870 +[titan] 2025-10-05 07:56:56,368 - root - INFO - lr: 3.6109e-05 gnorm: 1.08 [ 9:22:46<15:13:21] +[titan] 2025-10-05 07:57:07,241 - root - INFO - step: 15255 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9655 +[titan] 2025-10-05 07:57:07,241 - root - INFO - lr: 3.6101e-05 gnorm: 1.06 [ 9:22:57<15:13:10] +[titan] 2025-10-05 07:57:18,136 - root - INFO - step: 15260 loss: 2.2500 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9883 +[titan] 2025-10-05 07:57:18,136 - root - INFO - lr: 3.6093e-05 gnorm: 1.10 [ 9:23:08<15:12:59] +[titan] 2025-10-05 07:57:29,037 - root - INFO - step: 15265 loss: 2.2358 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9744 +[titan] 2025-10-05 07:57:29,037 - root - INFO - lr: 3.6085e-05 gnorm: 1.07 [ 9:23:19<15:12:47] +[titan] 2025-10-05 07:57:39,909 - root - INFO - step: 15270 loss: 2.3087 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0387 +[titan] 2025-10-05 07:57:39,910 - root - INFO - lr: 3.6076e-05 gnorm: 1.08 [ 9:23:30<15:12:36] +[titan] 2025-10-05 07:57:50,768 - root - INFO - step: 15275 loss: 2.2564 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 07:57:50,768 - root - INFO - lr: 3.6068e-05 gnorm: 1.08 [ 9:23:41<15:12:25] +[titan] 2025-10-05 07:58:01,662 - root - INFO - step: 15280 loss: 2.2602 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9973 +[titan] 2025-10-05 07:58:01,662 - root - INFO - lr: 3.6060e-05 gnorm: 1.20 [ 9:23:52<15:12:13] +[titan] 2025-10-05 07:58:12,584 - root - INFO - step: 15285 loss: 2.3071 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 2.0378 +[titan] 2025-10-05 07:58:12,584 - root - INFO - lr: 3.6052e-05 gnorm: 1.10 [ 9:24:03<15:12:02] +[titan] 2025-10-05 07:58:23,438 - root - INFO - step: 15290 loss: 2.2206 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 07:58:23,438 - root - INFO - lr: 3.6043e-05 gnorm: 1.06 [ 9:24:13<15:11:50] +[titan] 2025-10-05 07:58:34,299 - root - INFO - step: 15295 loss: 2.2797 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0135 +[titan] 2025-10-05 07:58:34,299 - root - INFO - lr: 3.6035e-05 gnorm: 1.06 [ 9:24:24<15:11:39] +[titan] 2025-10-05 07:58:42,983 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 07:58:45,167 - root - INFO - step: 15300 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 07:58:45,167 - root - INFO - lr: 3.6027e-05 gnorm: 1.08 [ 9:24:35<15:11:28] +[titan] 2025-10-05 07:58:56,031 - root - INFO - step: 15305 loss: 2.2914 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0238 +[titan] 2025-10-05 07:58:56,032 - root - INFO - lr: 3.6019e-05 gnorm: 1.10 [ 9:24:46<15:11:16] +[titan] 2025-10-05 07:59:06,887 - root - INFO - step: 15310 loss: 2.2775 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0118 +[titan] 2025-10-05 07:59:06,887 - root - INFO - lr: 3.6010e-05 gnorm: 1.09 [ 9:24:57<15:11:05] +[titan] 2025-10-05 07:59:17,809 - root - INFO - step: 15315 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9766 +[titan] 2025-10-05 07:59:17,809 - root - INFO - lr: 3.6002e-05 gnorm: 1.10 [ 9:25:08<15:10:53] +[titan] 2025-10-05 07:59:28,686 - root - INFO - step: 15320 loss: 2.2393 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 07:59:28,686 - root - INFO - lr: 3.5994e-05 gnorm: 1.07 [ 9:25:19<15:10:42] +[titan] 2025-10-05 07:59:39,537 - root - INFO - step: 15325 loss: 2.2296 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 07:59:39,537 - root - INFO - lr: 3.5986e-05 gnorm: 1.11 [ 9:25:30<15:10:31] +[titan] 2025-10-05 07:59:50,399 - root - INFO - step: 15330 loss: 2.2923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2677 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 07:59:50,400 - root - INFO - lr: 3.5977e-05 gnorm: 1.11 [ 9:25:40<15:10:19] +[titan] 2025-10-05 08:00:01,278 - root - INFO - step: 15335 loss: 2.2792 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2655 global_avg_mtp_loss: 2.0137 +[titan] 2025-10-05 08:00:01,278 - root - INFO - lr: 3.5969e-05 gnorm: 1.15 [ 9:25:51<15:10:08] +[titan] 2025-10-05 08:00:12,178 - root - INFO - step: 15340 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9530 +[titan] 2025-10-05 08:00:12,178 - root - INFO - lr: 3.5961e-05 gnorm: 1.06 [ 9:26:02<15:09:57] +[titan] 2025-10-05 08:00:23,116 - root - INFO - step: 15345 loss: 2.1858 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:00:23,116 - root - INFO - lr: 3.5952e-05 gnorm: 1.09 [ 9:26:13<15:09:45] +[titan] 2025-10-05 08:00:31,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:00:33,989 - root - INFO - step: 15350 loss: 2.2871 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0192 +[titan] 2025-10-05 08:00:33,989 - root - INFO - lr: 3.5944e-05 gnorm: 1.10 [ 9:26:24<15:09:34] +[titan] 2025-10-05 08:00:44,860 - root - INFO - step: 15355 loss: 2.1883 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 08:00:44,860 - root - INFO - lr: 3.5936e-05 gnorm: 1.10 [ 9:26:35<15:09:22] +[titan] 2025-10-05 08:00:55,829 - root - INFO - step: 15360 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 29,873 tflops: 414.45 mfu: 41.91% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9417 +[titan] 2025-10-05 08:00:55,830 - root - INFO - lr: 3.5928e-05 gnorm: 1.04 [ 9:26:46<15:09:11] +[titan] 2025-10-05 08:00:56,008 - root - INFO - Dumping profiler traces at step 15360 +[titan] 2025-10-05 08:00:56,044 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:01:06,999 - root - INFO - step: 15365 loss: 2.2781 memory: 118.84GiB(85.28%) tps: 29,338 tflops: 407.01 mfu: 41.15% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0132 +[titan] 2025-10-05 08:01:06,999 - root - INFO - lr: 3.5919e-05 gnorm: 1.07 [ 9:26:57<15:09:00] +[titan] 2025-10-05 08:01:17,972 - root - INFO - step: 15370 loss: 2.2166 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 08:01:17,972 - root - INFO - lr: 3.5911e-05 gnorm: 1.06 [ 9:27:08<15:08:49] +[titan] 2025-10-05 08:01:28,828 - root - INFO - step: 15375 loss: 2.2257 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 08:01:28,829 - root - INFO - lr: 3.5903e-05 gnorm: 1.04 [ 9:27:19<15:08:38] +[titan] 2025-10-05 08:01:39,731 - root - INFO - step: 15380 loss: 2.2608 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9985 +[titan] 2025-10-05 08:01:39,732 - root - INFO - lr: 3.5895e-05 gnorm: 1.14 [ 9:27:30<15:08:26] +[titan] 2025-10-05 08:01:50,600 - root - INFO - step: 15385 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9684 +[titan] 2025-10-05 08:01:50,601 - root - INFO - lr: 3.5886e-05 gnorm: 1.10 [ 9:27:41<15:08:15] +[titan] 2025-10-05 08:02:01,449 - root - INFO - step: 15390 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9848 +[titan] 2025-10-05 08:02:01,449 - root - INFO - lr: 3.5878e-05 gnorm: 1.10 [ 9:27:51<15:08:04] +[titan] 2025-10-05 08:02:12,375 - root - INFO - step: 15395 loss: 2.2776 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0117 +[titan] 2025-10-05 08:02:12,375 - root - INFO - lr: 3.5870e-05 gnorm: 1.10 [ 9:28:02<15:07:52] +[titan] 2025-10-05 08:02:21,078 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:02:23,262 - root - INFO - step: 15400 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9035 +[titan] 2025-10-05 08:02:23,262 - root - INFO - lr: 3.5861e-05 gnorm: 1.05 [ 9:28:13<15:07:41] +[titan] 2025-10-05 08:02:34,123 - root - INFO - step: 15405 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9658 +[titan] 2025-10-05 08:02:34,123 - root - INFO - lr: 3.5853e-05 gnorm: 1.05 [ 9:28:24<15:07:30] +[titan] 2025-10-05 08:02:45,027 - root - INFO - step: 15410 loss: 2.2281 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:02:45,027 - root - INFO - lr: 3.5845e-05 gnorm: 1.09 [ 9:28:35<15:07:18] +[titan] 2025-10-05 08:02:55,897 - root - INFO - step: 15415 loss: 2.2676 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:02:55,897 - root - INFO - lr: 3.5837e-05 gnorm: 1.09 [ 9:28:46<15:07:07] +[titan] 2025-10-05 08:03:06,761 - root - INFO - step: 15420 loss: 2.2135 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9565 +[titan] 2025-10-05 08:03:06,761 - root - INFO - lr: 3.5828e-05 gnorm: 1.09 [ 9:28:57<15:06:55] +[titan] 2025-10-05 08:03:17,688 - root - INFO - step: 15425 loss: 2.2445 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9820 +[titan] 2025-10-05 08:03:17,688 - root - INFO - lr: 3.5820e-05 gnorm: 1.04 [ 9:29:08<15:06:44] +[titan] 2025-10-05 08:03:28,565 - root - INFO - step: 15430 loss: 2.2960 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2679 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 08:03:28,565 - root - INFO - lr: 3.5812e-05 gnorm: 1.11 [ 9:29:19<15:06:33] +[titan] 2025-10-05 08:03:39,425 - root - INFO - step: 15435 loss: 2.2327 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:03:39,425 - root - INFO - lr: 3.5803e-05 gnorm: 1.06 [ 9:29:29<15:06:21] +[titan] 2025-10-05 08:03:50,323 - root - INFO - step: 15440 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 08:03:50,323 - root - INFO - lr: 3.5795e-05 gnorm: 1.04 [ 9:29:40<15:06:10] +[titan] 2025-10-05 08:04:01,195 - root - INFO - step: 15445 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 08:04:01,195 - root - INFO - lr: 3.5787e-05 gnorm: 1.04 [ 9:29:51<15:05:59] +[titan] 2025-10-05 08:04:09,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:04:12,045 - root - INFO - step: 15450 loss: 2.2815 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0162 +[titan] 2025-10-05 08:04:12,045 - root - INFO - lr: 3.5778e-05 gnorm: 1.12 [ 9:30:02<15:05:47] +[titan] 2025-10-05 08:04:22,960 - root - INFO - step: 15455 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9619 +[titan] 2025-10-05 08:04:22,960 - root - INFO - lr: 3.5770e-05 gnorm: 1.11 [ 9:30:13<15:05:36] +[titan] 2025-10-05 08:04:33,818 - root - INFO - step: 15460 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9338 +[titan] 2025-10-05 08:04:33,818 - root - INFO - lr: 3.5762e-05 gnorm: 1.08 [ 9:30:24<15:05:24] +[titan] 2025-10-05 08:04:44,689 - root - INFO - step: 15465 loss: 2.1902 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:04:44,689 - root - INFO - lr: 3.5754e-05 gnorm: 1.09 [ 9:30:35<15:05:13] +[titan] 2025-10-05 08:04:55,581 - root - INFO - step: 15470 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:04:55,581 - root - INFO - lr: 3.5745e-05 gnorm: 1.07 [ 9:30:46<15:05:02] +[titan] 2025-10-05 08:05:06,501 - root - INFO - step: 15475 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0023 +[titan] 2025-10-05 08:05:06,501 - root - INFO - lr: 3.5737e-05 gnorm: 1.12 [ 9:30:56<15:04:50] +[titan] 2025-10-05 08:05:17,432 - root - INFO - step: 15480 loss: 2.2594 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 08:05:17,432 - root - INFO - lr: 3.5729e-05 gnorm: 1.10 [ 9:31:07<15:04:39] +[titan] 2025-10-05 08:05:28,358 - root - INFO - step: 15485 loss: 2.2121 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:05:28,358 - root - INFO - lr: 3.5720e-05 gnorm: 1.05 [ 9:31:18<15:04:28] +[titan] 2025-10-05 08:05:39,256 - root - INFO - step: 15490 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0068 +[titan] 2025-10-05 08:05:39,257 - root - INFO - lr: 3.5712e-05 gnorm: 1.11 [ 9:31:29<15:04:16] +[titan] 2025-10-05 08:05:50,140 - root - INFO - step: 15495 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 08:05:50,140 - root - INFO - lr: 3.5704e-05 gnorm: 1.04 [ 9:31:40<15:04:05] +[titan] 2025-10-05 08:05:58,823 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:06:01,006 - root - INFO - step: 15500 loss: 2.1526 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 08:06:01,006 - root - INFO - lr: 3.5695e-05 gnorm: 1.06 [ 9:31:51<15:03:54] +[titan] 2025-10-05 08:06:11,916 - root - INFO - step: 15505 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 08:06:11,916 - root - INFO - lr: 3.5687e-05 gnorm: 1.09 [ 9:32:02<15:03:42] +[titan] 2025-10-05 08:06:22,849 - root - INFO - step: 15510 loss: 2.2409 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.81 mfu: 42.04% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9798 +[titan] 2025-10-05 08:06:22,849 - root - INFO - lr: 3.5679e-05 gnorm: 1.06 [ 9:32:13<15:03:31] +[titan] 2025-10-05 08:06:33,698 - root - INFO - step: 15515 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0180 +[titan] 2025-10-05 08:06:33,698 - root - INFO - lr: 3.5670e-05 gnorm: 1.08 [ 9:32:24<15:03:20] +[titan] 2025-10-05 08:06:44,607 - root - INFO - step: 15520 loss: 2.3868 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2831 global_avg_mtp_loss: 2.1037 +[titan] 2025-10-05 08:06:44,607 - root - INFO - lr: 3.5662e-05 gnorm: 2.99 [ 9:32:35<15:03:08] +[titan] 2025-10-05 08:06:55,464 - root - INFO - step: 15525 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9210 +[titan] 2025-10-05 08:06:55,464 - root - INFO - lr: 3.5654e-05 gnorm: 1.10 [ 9:32:45<15:02:57] +[titan] 2025-10-05 08:07:06,333 - root - INFO - step: 15530 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:07:06,333 - root - INFO - lr: 3.5646e-05 gnorm: 1.03 [ 9:32:56<15:02:46] +[titan] 2025-10-05 08:07:17,232 - root - INFO - step: 15535 loss: 2.2054 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 08:07:17,232 - root - INFO - lr: 3.5637e-05 gnorm: 1.08 [ 9:33:07<15:02:34] +[titan] 2025-10-05 08:07:28,161 - root - INFO - step: 15540 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:07:28,161 - root - INFO - lr: 3.5629e-05 gnorm: 1.10 [ 9:33:18<15:02:23] +[titan] 2025-10-05 08:07:39,016 - root - INFO - step: 15545 loss: 2.2280 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 08:07:39,016 - root - INFO - lr: 3.5621e-05 gnorm: 1.07 [ 9:33:29<15:02:12] +[titan] 2025-10-05 08:07:47,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:07:49,876 - root - INFO - step: 15550 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9897 +[titan] 2025-10-05 08:07:49,876 - root - INFO - lr: 3.5612e-05 gnorm: 1.07 [ 9:33:40<15:02:00] +[titan] 2025-10-05 08:08:00,741 - root - INFO - step: 15555 loss: 2.2855 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0191 +[titan] 2025-10-05 08:08:00,741 - root - INFO - lr: 3.5604e-05 gnorm: 1.08 [ 9:33:51<15:01:49] +[titan] 2025-10-05 08:08:11,595 - root - INFO - step: 15560 loss: 2.2921 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2680 global_avg_mtp_loss: 2.0241 +[titan] 2025-10-05 08:08:11,595 - root - INFO - lr: 3.5596e-05 gnorm: 1.08 [ 9:34:02<15:01:37] +[titan] 2025-10-05 08:08:22,482 - root - INFO - step: 15565 loss: 2.2410 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:08:22,482 - root - INFO - lr: 3.5587e-05 gnorm: 1.08 [ 9:34:12<15:01:26] +[titan] 2025-10-05 08:08:33,383 - root - INFO - step: 15570 loss: 2.2709 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0063 +[titan] 2025-10-05 08:08:33,383 - root - INFO - lr: 3.5579e-05 gnorm: 1.11 [ 9:34:23<15:01:15] +[titan] 2025-10-05 08:08:44,242 - root - INFO - step: 15575 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 08:08:44,242 - root - INFO - lr: 3.5571e-05 gnorm: 1.06 [ 9:34:34<15:01:03] +[titan] 2025-10-05 08:08:55,120 - root - INFO - step: 15580 loss: 2.2133 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9541 +[titan] 2025-10-05 08:08:55,120 - root - INFO - lr: 3.5562e-05 gnorm: 1.05 [ 9:34:45<15:00:52] +[titan] 2025-10-05 08:09:06,000 - root - INFO - step: 15585 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9699 +[titan] 2025-10-05 08:09:06,000 - root - INFO - lr: 3.5554e-05 gnorm: 1.10 [ 9:34:56<15:00:41] +[titan] 2025-10-05 08:09:16,881 - root - INFO - step: 15590 loss: 2.2225 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 08:09:16,881 - root - INFO - lr: 3.5546e-05 gnorm: 1.07 [ 9:35:07<15:00:29] +[titan] 2025-10-05 08:09:27,784 - root - INFO - step: 15595 loss: 2.2587 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2620 global_avg_mtp_loss: 1.9967 +[titan] 2025-10-05 08:09:27,784 - root - INFO - lr: 3.5537e-05 gnorm: 1.12 [ 9:35:18<15:00:18] +[titan] 2025-10-05 08:09:36,462 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:09:38,690 - root - INFO - step: 15600 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 08:09:38,690 - root - INFO - lr: 3.5529e-05 gnorm: 1.09 [ 9:35:29<15:00:07] +[titan] 2025-10-05 08:09:49,593 - root - INFO - step: 15605 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 08:09:49,594 - root - INFO - lr: 3.5521e-05 gnorm: 1.08 [ 9:35:40<14:59:55] +[titan] 2025-10-05 08:10:00,479 - root - INFO - step: 15610 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9895 +[titan] 2025-10-05 08:10:00,480 - root - INFO - lr: 3.5512e-05 gnorm: 1.08 [ 9:35:50<14:59:44] +[titan] 2025-10-05 08:10:11,374 - root - INFO - step: 15615 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9198 +[titan] 2025-10-05 08:10:11,374 - root - INFO - lr: 3.5504e-05 gnorm: 1.05 [ 9:36:01<14:59:33] +[titan] 2025-10-05 08:10:22,287 - root - INFO - step: 15620 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9771 +[titan] 2025-10-05 08:10:22,287 - root - INFO - lr: 3.5496e-05 gnorm: 1.08 [ 9:36:12<14:59:21] +[titan] 2025-10-05 08:10:33,166 - root - INFO - step: 15625 loss: 2.2767 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2661 global_avg_mtp_loss: 2.0106 +[titan] 2025-10-05 08:10:33,167 - root - INFO - lr: 3.5487e-05 gnorm: 1.12 [ 9:36:23<14:59:10] +[titan] 2025-10-05 08:10:44,056 - root - INFO - step: 15630 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 08:10:44,056 - root - INFO - lr: 3.5479e-05 gnorm: 1.07 [ 9:36:34<14:58:59] +[titan] 2025-10-05 08:10:54,998 - root - INFO - step: 15635 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 08:10:54,998 - root - INFO - lr: 3.5471e-05 gnorm: 1.07 [ 9:36:45<14:58:47] +[titan] 2025-10-05 08:11:05,867 - root - INFO - step: 15640 loss: 2.2470 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2713 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:11:05,868 - root - INFO - lr: 3.5462e-05 gnorm: 1.08 [ 9:36:56<14:58:36] +[titan] 2025-10-05 08:11:16,727 - root - INFO - step: 15645 loss: 2.1832 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 08:11:16,727 - root - INFO - lr: 3.5454e-05 gnorm: 1.08 [ 9:37:07<14:58:25] +[titan] 2025-10-05 08:11:25,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:11:27,605 - root - INFO - step: 15650 loss: 2.1882 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9345 +[titan] 2025-10-05 08:11:27,605 - root - INFO - lr: 3.5445e-05 gnorm: 1.03 [ 9:37:18<14:58:13] +[titan] 2025-10-05 08:11:38,458 - root - INFO - step: 15655 loss: 2.2283 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:11:38,458 - root - INFO - lr: 3.5437e-05 gnorm: 1.05 [ 9:37:28<14:58:02] +[titan] 2025-10-05 08:11:49,330 - root - INFO - step: 15660 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 08:11:49,330 - root - INFO - lr: 3.5429e-05 gnorm: 1.11 [ 9:37:39<14:57:50] +[titan] 2025-10-05 08:12:00,235 - root - INFO - step: 15665 loss: 2.1792 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9268 +[titan] 2025-10-05 08:12:00,235 - root - INFO - lr: 3.5420e-05 gnorm: 1.07 [ 9:37:50<14:57:39] +[titan] 2025-10-05 08:12:11,114 - root - INFO - step: 15670 loss: 2.2377 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9779 +[titan] 2025-10-05 08:12:11,114 - root - INFO - lr: 3.5412e-05 gnorm: 1.03 [ 9:38:01<14:57:28] +[titan] 2025-10-05 08:12:21,997 - root - INFO - step: 15675 loss: 2.2555 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2625 global_avg_mtp_loss: 1.9930 +[titan] 2025-10-05 08:12:21,997 - root - INFO - lr: 3.5404e-05 gnorm: 1.07 [ 9:38:12<14:57:16] +[titan] 2025-10-05 08:12:32,868 - root - INFO - step: 15680 loss: 2.2075 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:12:32,868 - root - INFO - lr: 3.5395e-05 gnorm: 1.07 [ 9:38:23<14:57:05] +[titan] 2025-10-05 08:12:43,747 - root - INFO - step: 15685 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9575 +[titan] 2025-10-05 08:12:43,747 - root - INFO - lr: 3.5387e-05 gnorm: 1.10 [ 9:38:34<14:56:54] +[titan] 2025-10-05 08:12:54,610 - root - INFO - step: 15690 loss: 2.2739 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0090 +[titan] 2025-10-05 08:12:54,610 - root - INFO - lr: 3.5379e-05 gnorm: 1.08 [ 9:38:45<14:56:42] +[titan] 2025-10-05 08:13:05,466 - root - INFO - step: 15695 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 08:13:05,466 - root - INFO - lr: 3.5370e-05 gnorm: 1.06 [ 9:38:55<14:56:31] +[titan] 2025-10-05 08:13:14,194 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:13:16,388 - root - INFO - step: 15700 loss: 2.2714 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0069 +[titan] 2025-10-05 08:13:16,388 - root - INFO - lr: 3.5362e-05 gnorm: 1.10 [ 9:39:06<14:56:20] +[titan] 2025-10-05 08:13:27,274 - root - INFO - step: 15705 loss: 2.2396 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9780 +[titan] 2025-10-05 08:13:27,275 - root - INFO - lr: 3.5354e-05 gnorm: 1.10 [ 9:39:17<14:56:08] +[titan] 2025-10-05 08:13:38,136 - root - INFO - step: 15710 loss: 2.2474 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9846 +[titan] 2025-10-05 08:13:38,136 - root - INFO - lr: 3.5345e-05 gnorm: 1.12 [ 9:39:28<14:55:57] +[titan] 2025-10-05 08:13:49,010 - root - INFO - step: 15715 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9203 +[titan] 2025-10-05 08:13:49,010 - root - INFO - lr: 3.5337e-05 gnorm: 1.10 [ 9:39:39<14:55:46] +[titan] 2025-10-05 08:13:59,875 - root - INFO - step: 15720 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9736 +[titan] 2025-10-05 08:13:59,875 - root - INFO - lr: 3.5328e-05 gnorm: 1.11 [ 9:39:50<14:55:34] +[titan] 2025-10-05 08:14:10,743 - root - INFO - step: 15725 loss: 2.2138 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9568 +[titan] 2025-10-05 08:14:10,743 - root - INFO - lr: 3.5320e-05 gnorm: 1.10 [ 9:40:01<14:55:23] +[titan] 2025-10-05 08:14:21,639 - root - INFO - step: 15730 loss: 2.2684 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 2.0047 +[titan] 2025-10-05 08:14:21,640 - root - INFO - lr: 3.5312e-05 gnorm: 1.09 [ 9:40:12<14:55:11] +[titan] 2025-10-05 08:14:32,539 - root - INFO - step: 15735 loss: 2.2336 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9727 +[titan] 2025-10-05 08:14:32,539 - root - INFO - lr: 3.5303e-05 gnorm: 1.05 [ 9:40:22<14:55:00] +[titan] 2025-10-05 08:14:43,413 - root - INFO - step: 15740 loss: 2.2798 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:14:43,413 - root - INFO - lr: 3.5295e-05 gnorm: 1.11 [ 9:40:33<14:54:49] +[titan] 2025-10-05 08:14:54,293 - root - INFO - step: 15745 loss: 2.2448 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9833 +[titan] 2025-10-05 08:14:54,293 - root - INFO - lr: 3.5287e-05 gnorm: 1.15 [ 9:40:44<14:54:37] +[titan] 2025-10-05 08:15:02,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:15:05,161 - root - INFO - step: 15750 loss: 2.1960 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9407 +[titan] 2025-10-05 08:15:05,161 - root - INFO - lr: 3.5278e-05 gnorm: 1.07 [ 9:40:55<14:54:26] +[titan] 2025-10-05 08:15:16,026 - root - INFO - step: 15755 loss: 2.2333 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9732 +[titan] 2025-10-05 08:15:16,026 - root - INFO - lr: 3.5270e-05 gnorm: 1.08 [ 9:41:06<14:54:15] +[titan] 2025-10-05 08:15:26,950 - root - INFO - step: 15760 loss: 2.2390 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9790 +[titan] 2025-10-05 08:15:26,951 - root - INFO - lr: 3.5261e-05 gnorm: 1.13 [ 9:41:17<14:54:03] +[titan] 2025-10-05 08:15:37,835 - root - INFO - step: 15765 loss: 2.3327 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2739 global_avg_mtp_loss: 2.0588 +[titan] 2025-10-05 08:15:37,835 - root - INFO - lr: 3.5253e-05 gnorm: 1.07 [ 9:41:28<14:53:52] +[titan] 2025-10-05 08:15:48,693 - root - INFO - step: 15770 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:15:48,694 - root - INFO - lr: 3.5245e-05 gnorm: 1.07 [ 9:41:39<14:53:41] +[titan] 2025-10-05 08:15:59,558 - root - INFO - step: 15775 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 08:15:59,558 - root - INFO - lr: 3.5236e-05 gnorm: 1.09 [ 9:41:49<14:53:29] +[titan] 2025-10-05 08:16:10,424 - root - INFO - step: 15780 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:16:10,424 - root - INFO - lr: 3.5228e-05 gnorm: 1.07 [ 9:42:00<14:53:18] +[titan] 2025-10-05 08:16:21,284 - root - INFO - step: 15785 loss: 2.2235 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9639 +[titan] 2025-10-05 08:16:21,284 - root - INFO - lr: 3.5220e-05 gnorm: 1.08 [ 9:42:11<14:53:07] +[titan] 2025-10-05 08:16:32,182 - root - INFO - step: 15790 loss: 2.2629 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 1.9983 +[titan] 2025-10-05 08:16:32,183 - root - INFO - lr: 3.5211e-05 gnorm: 1.05 [ 9:42:22<14:52:55] +[titan] 2025-10-05 08:16:43,101 - root - INFO - step: 15795 loss: 2.1715 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9180 +[titan] 2025-10-05 08:16:43,101 - root - INFO - lr: 3.5203e-05 gnorm: 1.08 [ 9:42:33<14:52:44] +[titan] 2025-10-05 08:16:51,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:16:53,985 - root - INFO - step: 15800 loss: 2.2694 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0053 +[titan] 2025-10-05 08:16:53,985 - root - INFO - lr: 3.5194e-05 gnorm: 1.09 [ 9:42:44<14:52:33] +[titan] 2025-10-05 08:17:04,888 - root - INFO - step: 15805 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 08:17:04,888 - root - INFO - lr: 3.5186e-05 gnorm: 1.07 [ 9:42:55<14:52:21] +[titan] 2025-10-05 08:17:15,782 - root - INFO - step: 15810 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9612 +[titan] 2025-10-05 08:17:15,782 - root - INFO - lr: 3.5178e-05 gnorm: 1.11 [ 9:43:06<14:52:10] +[titan] 2025-10-05 08:17:26,682 - root - INFO - step: 15815 loss: 2.2718 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 2.0074 +[titan] 2025-10-05 08:17:26,683 - root - INFO - lr: 3.5169e-05 gnorm: 1.09 [ 9:43:17<14:51:59] +[titan] 2025-10-05 08:17:37,542 - root - INFO - step: 15820 loss: 2.2350 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9730 +[titan] 2025-10-05 08:17:37,542 - root - INFO - lr: 3.5161e-05 gnorm: 1.11 [ 9:43:27<14:51:47] +[titan] 2025-10-05 08:17:48,471 - root - INFO - step: 15825 loss: 2.2558 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9919 +[titan] 2025-10-05 08:17:48,471 - root - INFO - lr: 3.5152e-05 gnorm: 1.07 [ 9:43:38<14:51:36] +[titan] 2025-10-05 08:17:59,372 - root - INFO - step: 15830 loss: 2.2928 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2663 global_avg_mtp_loss: 2.0266 +[titan] 2025-10-05 08:17:59,373 - root - INFO - lr: 3.5144e-05 gnorm: 1.07 [ 9:43:49<14:51:25] +[titan] 2025-10-05 08:18:10,256 - root - INFO - step: 15835 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9594 +[titan] 2025-10-05 08:18:10,256 - root - INFO - lr: 3.5136e-05 gnorm: 1.11 [ 9:44:00<14:51:13] +[titan] 2025-10-05 08:18:21,144 - root - INFO - step: 15840 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 08:18:21,144 - root - INFO - lr: 3.5127e-05 gnorm: 1.07 [ 9:44:11<14:51:02] +[titan] 2025-10-05 08:18:32,042 - root - INFO - step: 15845 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 08:18:32,043 - root - INFO - lr: 3.5119e-05 gnorm: 1.07 [ 9:44:22<14:50:51] +[titan] 2025-10-05 08:18:40,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:18:42,921 - root - INFO - step: 15850 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9352 +[titan] 2025-10-05 08:18:42,921 - root - INFO - lr: 3.5111e-05 gnorm: 1.08 [ 9:44:33<14:50:39] +[titan] 2025-10-05 08:18:53,795 - root - INFO - step: 15855 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9787 +[titan] 2025-10-05 08:18:53,795 - root - INFO - lr: 3.5102e-05 gnorm: 1.15 [ 9:44:44<14:50:28] +[titan] 2025-10-05 08:19:04,726 - root - INFO - step: 15860 loss: 2.2996 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0324 +[titan] 2025-10-05 08:19:04,726 - root - INFO - lr: 3.5094e-05 gnorm: 1.13 [ 9:44:55<14:50:17] +[titan] 2025-10-05 08:19:15,610 - root - INFO - step: 15865 loss: 2.2234 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9643 +[titan] 2025-10-05 08:19:15,610 - root - INFO - lr: 3.5085e-05 gnorm: 1.07 [ 9:45:06<14:50:05] +[titan] 2025-10-05 08:19:26,577 - root - INFO - step: 15870 loss: 2.2122 memory: 118.84GiB(85.28%) tps: 29,880 tflops: 414.54 mfu: 41.91% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9538 +[titan] 2025-10-05 08:19:26,577 - root - INFO - lr: 3.5077e-05 gnorm: 1.09 [ 9:45:16<14:49:54] +[titan] 2025-10-05 08:19:31,107 - root - INFO - Dumping profiler traces at step 15872 +[titan] 2025-10-05 08:19:31,146 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:19:37,696 - root - INFO - step: 15875 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,471 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 08:19:37,697 - root - INFO - lr: 3.5068e-05 gnorm: 1.05 [ 9:45:28<14:49:43] +[titan] 2025-10-05 08:19:48,571 - root - INFO - step: 15880 loss: 2.2001 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9437 +[titan] 2025-10-05 08:19:48,571 - root - INFO - lr: 3.5060e-05 gnorm: 1.08 [ 9:45:38<14:49:32] +[titan] 2025-10-05 08:19:59,444 - root - INFO - step: 15885 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9707 +[titan] 2025-10-05 08:19:59,444 - root - INFO - lr: 3.5052e-05 gnorm: 1.06 [ 9:45:49<14:49:20] +[titan] 2025-10-05 08:20:10,353 - root - INFO - step: 15890 loss: 2.2269 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 08:20:10,354 - root - INFO - lr: 3.5043e-05 gnorm: 1.09 [ 9:46:00<14:49:09] +[titan] 2025-10-05 08:20:21,229 - root - INFO - step: 15895 loss: 2.2372 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9759 +[titan] 2025-10-05 08:20:21,230 - root - INFO - lr: 3.5035e-05 gnorm: 1.07 [ 9:46:11<14:48:58] +[titan] 2025-10-05 08:20:29,953 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:20:32,140 - root - INFO - step: 15900 loss: 2.1957 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9396 +[titan] 2025-10-05 08:20:32,140 - root - INFO - lr: 3.5026e-05 gnorm: 1.09 [ 9:46:22<14:48:47] +[titan] 2025-10-05 08:20:43,027 - root - INFO - step: 15905 loss: 2.2685 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2645 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:20:43,027 - root - INFO - lr: 3.5018e-05 gnorm: 1.06 [ 9:46:33<14:48:35] +[titan] 2025-10-05 08:20:53,932 - root - INFO - step: 15910 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9586 +[titan] 2025-10-05 08:20:53,932 - root - INFO - lr: 3.5010e-05 gnorm: 1.11 [ 9:46:44<14:48:24] +[titan] 2025-10-05 08:21:04,803 - root - INFO - step: 15915 loss: 2.1550 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9045 +[titan] 2025-10-05 08:21:04,803 - root - INFO - lr: 3.5001e-05 gnorm: 1.05 [ 9:46:55<14:48:13] +[titan] 2025-10-05 08:21:15,707 - root - INFO - step: 15920 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9410 +[titan] 2025-10-05 08:21:15,707 - root - INFO - lr: 3.4993e-05 gnorm: 1.08 [ 9:47:06<14:48:01] +[titan] 2025-10-05 08:21:26,598 - root - INFO - step: 15925 loss: 2.2282 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2604 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 08:21:26,599 - root - INFO - lr: 3.4984e-05 gnorm: 1.10 [ 9:47:17<14:47:50] +[titan] 2025-10-05 08:21:37,540 - root - INFO - step: 15930 loss: 2.2725 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2641 global_avg_mtp_loss: 2.0083 +[titan] 2025-10-05 08:21:37,540 - root - INFO - lr: 3.4976e-05 gnorm: 1.11 [ 9:47:27<14:47:39] +[titan] 2025-10-05 08:21:48,426 - root - INFO - step: 15935 loss: 2.2034 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9455 +[titan] 2025-10-05 08:21:48,426 - root - INFO - lr: 3.4968e-05 gnorm: 1.07 [ 9:47:38<14:47:27] +[titan] 2025-10-05 08:21:59,298 - root - INFO - step: 15940 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9502 +[titan] 2025-10-05 08:21:59,298 - root - INFO - lr: 3.4959e-05 gnorm: 1.07 [ 9:47:49<14:47:16] +[titan] 2025-10-05 08:22:10,199 - root - INFO - step: 15945 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9384 +[titan] 2025-10-05 08:22:10,199 - root - INFO - lr: 3.4951e-05 gnorm: 1.09 [ 9:48:00<14:47:05] +[titan] 2025-10-05 08:22:18,902 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:22:21,106 - root - INFO - step: 15950 loss: 2.2603 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9966 +[titan] 2025-10-05 08:22:21,107 - root - INFO - lr: 3.4942e-05 gnorm: 1.06 [ 9:48:11<14:46:53] +[titan] 2025-10-05 08:22:32,066 - root - INFO - step: 15955 loss: 2.1766 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 08:22:32,066 - root - INFO - lr: 3.4934e-05 gnorm: 1.07 [ 9:48:22<14:46:42] +[titan] 2025-10-05 08:22:42,935 - root - INFO - step: 15960 loss: 2.2164 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:22:42,935 - root - INFO - lr: 3.4925e-05 gnorm: 1.06 [ 9:48:33<14:46:31] +[titan] 2025-10-05 08:22:53,820 - root - INFO - step: 15965 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 08:22:53,820 - root - INFO - lr: 3.4917e-05 gnorm: 1.07 [ 9:48:44<14:46:19] +[titan] 2025-10-05 08:23:04,735 - root - INFO - step: 15970 loss: 2.2899 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2675 global_avg_mtp_loss: 2.0224 +[titan] 2025-10-05 08:23:04,735 - root - INFO - lr: 3.4909e-05 gnorm: 1.17 [ 9:48:55<14:46:08] +[titan] 2025-10-05 08:23:15,637 - root - INFO - step: 15975 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9298 +[titan] 2025-10-05 08:23:15,637 - root - INFO - lr: 3.4900e-05 gnorm: 1.05 [ 9:49:06<14:45:57] +[titan] 2025-10-05 08:23:26,529 - root - INFO - step: 15980 loss: 2.2468 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9854 +[titan] 2025-10-05 08:23:26,529 - root - INFO - lr: 3.4892e-05 gnorm: 1.08 [ 9:49:16<14:45:46] +[titan] 2025-10-05 08:23:37,517 - root - INFO - step: 15985 loss: 2.2160 memory: 118.84GiB(85.28%) tps: 29,824 tflops: 413.76 mfu: 41.84% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9583 +[titan] 2025-10-05 08:23:37,517 - root - INFO - lr: 3.4883e-05 gnorm: 1.09 [ 9:49:27<14:45:34] +[titan] 2025-10-05 08:23:48,403 - root - INFO - step: 15990 loss: 2.2605 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9970 +[titan] 2025-10-05 08:23:48,403 - root - INFO - lr: 3.4875e-05 gnorm: 1.12 [ 9:49:38<14:45:23] +[titan] 2025-10-05 08:23:59,284 - root - INFO - step: 15995 loss: 2.2671 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 2.0040 +[titan] 2025-10-05 08:23:59,284 - root - INFO - lr: 3.4866e-05 gnorm: 1.08 [ 9:49:49<14:45:12] +[titan] 2025-10-05 08:24:07,984 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:24:10,169 - root - INFO - step: 16000 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9270 +[titan] 2025-10-05 08:24:10,169 - root - INFO - lr: 3.4858e-05 gnorm: 1.05 [ 9:50:00<14:45:00] +[titan] 2025-10-05 08:24:21,053 - root - INFO - step: 16005 loss: 2.2423 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9804 +[titan] 2025-10-05 08:24:21,053 - root - INFO - lr: 3.4850e-05 gnorm: 1.09 [ 9:50:11<14:44:49] +[titan] 2025-10-05 08:24:31,978 - root - INFO - step: 16010 loss: 2.1853 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 08:24:31,979 - root - INFO - lr: 3.4841e-05 gnorm: 1.05 [ 9:50:22<14:44:38] +[titan] 2025-10-05 08:24:42,866 - root - INFO - step: 16015 loss: 2.2354 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 08:24:42,866 - root - INFO - lr: 3.4833e-05 gnorm: 1.04 [ 9:50:33<14:44:26] +[titan] 2025-10-05 08:24:53,773 - root - INFO - step: 16020 loss: 2.2147 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2605 global_avg_mtp_loss: 1.9542 +[titan] 2025-10-05 08:24:53,773 - root - INFO - lr: 3.4824e-05 gnorm: 1.08 [ 9:50:44<14:44:15] +[titan] 2025-10-05 08:25:04,656 - root - INFO - step: 16025 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 08:25:04,656 - root - INFO - lr: 3.4816e-05 gnorm: 1.08 [ 9:50:55<14:44:04] +[titan] 2025-10-05 08:25:15,527 - root - INFO - step: 16030 loss: 2.2616 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 08:25:15,527 - root - INFO - lr: 3.4807e-05 gnorm: 1.05 [ 9:51:05<14:43:52] +[titan] 2025-10-05 08:25:26,410 - root - INFO - step: 16035 loss: 2.2834 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2660 global_avg_mtp_loss: 2.0174 +[titan] 2025-10-05 08:25:26,411 - root - INFO - lr: 3.4799e-05 gnorm: 1.10 [ 9:51:16<14:43:41] +[titan] 2025-10-05 08:25:37,315 - root - INFO - step: 16040 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9599 +[titan] 2025-10-05 08:25:37,315 - root - INFO - lr: 3.4790e-05 gnorm: 1.09 [ 9:51:27<14:43:30] +[titan] 2025-10-05 08:25:48,166 - root - INFO - step: 16045 loss: 2.2422 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2613 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:25:48,166 - root - INFO - lr: 3.4782e-05 gnorm: 1.07 [ 9:51:38<14:43:18] +[titan] 2025-10-05 08:25:56,883 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:25:59,065 - root - INFO - step: 16050 loss: 2.2940 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0259 +[titan] 2025-10-05 08:25:59,065 - root - INFO - lr: 3.4774e-05 gnorm: 1.08 [ 9:51:49<14:43:07] +[titan] 2025-10-05 08:26:09,947 - root - INFO - step: 16055 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 08:26:09,947 - root - INFO - lr: 3.4765e-05 gnorm: 1.09 [ 9:52:00<14:42:56] +[titan] 2025-10-05 08:26:20,832 - root - INFO - step: 16060 loss: 2.2703 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 08:26:20,832 - root - INFO - lr: 3.4757e-05 gnorm: 1.17 [ 9:52:11<14:42:44] +[titan] 2025-10-05 08:26:31,707 - root - INFO - step: 16065 loss: 2.2788 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2662 global_avg_mtp_loss: 2.0127 +[titan] 2025-10-05 08:26:31,707 - root - INFO - lr: 3.4748e-05 gnorm: 1.08 [ 9:52:22<14:42:33] +[titan] 2025-10-05 08:26:42,617 - root - INFO - step: 16070 loss: 2.2299 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 08:26:42,618 - root - INFO - lr: 3.4740e-05 gnorm: 1.09 [ 9:52:33<14:42:22] +[titan] 2025-10-05 08:26:53,494 - root - INFO - step: 16075 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9418 +[titan] 2025-10-05 08:26:53,494 - root - INFO - lr: 3.4731e-05 gnorm: 1.08 [ 9:52:43<14:42:11] +[titan] 2025-10-05 08:27:04,387 - root - INFO - step: 16080 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9852 +[titan] 2025-10-05 08:27:04,387 - root - INFO - lr: 3.4723e-05 gnorm: 1.09 [ 9:52:54<14:41:59] +[titan] 2025-10-05 08:27:15,275 - root - INFO - step: 16085 loss: 2.3081 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2700 global_avg_mtp_loss: 2.0381 +[titan] 2025-10-05 08:27:15,276 - root - INFO - lr: 3.4714e-05 gnorm: 1.08 [ 9:53:05<14:41:48] +[titan] 2025-10-05 08:27:26,154 - root - INFO - step: 16090 loss: 2.2093 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9521 +[titan] 2025-10-05 08:27:26,154 - root - INFO - lr: 3.4706e-05 gnorm: 1.05 [ 9:53:16<14:41:37] +[titan] 2025-10-05 08:27:37,046 - root - INFO - step: 16095 loss: 2.1897 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:27:37,047 - root - INFO - lr: 3.4698e-05 gnorm: 1.07 [ 9:53:27<14:41:25] +[titan] 2025-10-05 08:27:45,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:27:47,929 - root - INFO - step: 16100 loss: 2.2837 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0179 +[titan] 2025-10-05 08:27:47,930 - root - INFO - lr: 3.4689e-05 gnorm: 1.08 [ 9:53:38<14:41:14] +[titan] 2025-10-05 08:27:58,796 - root - INFO - step: 16105 loss: 2.2174 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9585 +[titan] 2025-10-05 08:27:58,796 - root - INFO - lr: 3.4681e-05 gnorm: 1.07 [ 9:53:49<14:41:03] +[titan] 2025-10-05 08:28:09,669 - root - INFO - step: 16110 loss: 2.2129 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9548 +[titan] 2025-10-05 08:28:09,669 - root - INFO - lr: 3.4672e-05 gnorm: 1.07 [ 9:54:00<14:40:51] +[titan] 2025-10-05 08:28:20,594 - root - INFO - step: 16115 loss: 2.1544 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:28:20,594 - root - INFO - lr: 3.4664e-05 gnorm: 1.05 [ 9:54:10<14:40:40] +[titan] 2025-10-05 08:28:31,485 - root - INFO - step: 16120 loss: 2.2760 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0109 +[titan] 2025-10-05 08:28:31,485 - root - INFO - lr: 3.4655e-05 gnorm: 1.09 [ 9:54:21<14:40:29] +[titan] 2025-10-05 08:28:42,397 - root - INFO - step: 16125 loss: 2.1730 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 08:28:42,398 - root - INFO - lr: 3.4647e-05 gnorm: 1.10 [ 9:54:32<14:40:17] +[titan] 2025-10-05 08:28:53,284 - root - INFO - step: 16130 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9775 +[titan] 2025-10-05 08:28:53,284 - root - INFO - lr: 3.4638e-05 gnorm: 1.14 [ 9:54:43<14:40:06] +[titan] 2025-10-05 08:29:04,160 - root - INFO - step: 16135 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0235 +[titan] 2025-10-05 08:29:04,161 - root - INFO - lr: 3.4630e-05 gnorm: 1.09 [ 9:54:54<14:39:55] +[titan] 2025-10-05 08:29:15,049 - root - INFO - step: 16140 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9956 +[titan] 2025-10-05 08:29:15,049 - root - INFO - lr: 3.4621e-05 gnorm: 1.06 [ 9:55:05<14:39:43] +[titan] 2025-10-05 08:29:25,956 - root - INFO - step: 16145 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9694 +[titan] 2025-10-05 08:29:25,956 - root - INFO - lr: 3.4613e-05 gnorm: 1.10 [ 9:55:16<14:39:32] +[titan] 2025-10-05 08:29:34,646 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:29:36,856 - root - INFO - step: 16150 loss: 2.1905 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 08:29:36,857 - root - INFO - lr: 3.4604e-05 gnorm: 1.12 [ 9:55:27<14:39:21] +[titan] 2025-10-05 08:29:47,747 - root - INFO - step: 16155 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 08:29:47,748 - root - INFO - lr: 3.4596e-05 gnorm: 1.06 [ 9:55:38<14:39:09] +[titan] 2025-10-05 08:29:58,621 - root - INFO - step: 16160 loss: 2.2108 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9539 +[titan] 2025-10-05 08:29:58,621 - root - INFO - lr: 3.4588e-05 gnorm: 1.06 [ 9:55:49<14:38:58] +[titan] 2025-10-05 08:30:09,500 - root - INFO - step: 16165 loss: 2.2802 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2667 global_avg_mtp_loss: 2.0136 +[titan] 2025-10-05 08:30:09,500 - root - INFO - lr: 3.4579e-05 gnorm: 1.11 [ 9:55:59<14:38:47] +[titan] 2025-10-05 08:30:20,377 - root - INFO - step: 16170 loss: 2.2485 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9864 +[titan] 2025-10-05 08:30:20,377 - root - INFO - lr: 3.4571e-05 gnorm: 1.07 [ 9:56:10<14:38:35] +[titan] 2025-10-05 08:30:31,256 - root - INFO - step: 16175 loss: 2.2787 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0133 +[titan] 2025-10-05 08:30:31,256 - root - INFO - lr: 3.4562e-05 gnorm: 1.07 [ 9:56:21<14:38:24] +[titan] 2025-10-05 08:30:42,181 - root - INFO - step: 16180 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9817 +[titan] 2025-10-05 08:30:42,181 - root - INFO - lr: 3.4554e-05 gnorm: 1.08 [ 9:56:32<14:38:13] +[titan] 2025-10-05 08:30:53,053 - root - INFO - step: 16185 loss: 2.2102 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 08:30:53,053 - root - INFO - lr: 3.4545e-05 gnorm: 1.05 [ 9:56:43<14:38:02] +[titan] 2025-10-05 08:31:03,931 - root - INFO - step: 16190 loss: 2.1765 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9234 +[titan] 2025-10-05 08:31:03,931 - root - INFO - lr: 3.4537e-05 gnorm: 1.08 [ 9:56:54<14:37:50] +[titan] 2025-10-05 08:31:14,795 - root - INFO - step: 16195 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9284 +[titan] 2025-10-05 08:31:14,796 - root - INFO - lr: 3.4528e-05 gnorm: 1.09 [ 9:57:05<14:37:39] +[titan] 2025-10-05 08:31:23,464 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:31:25,652 - root - INFO - step: 16200 loss: 2.3077 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2687 global_avg_mtp_loss: 2.0390 +[titan] 2025-10-05 08:31:25,653 - root - INFO - lr: 3.4520e-05 gnorm: 1.10 [ 9:57:16<14:37:28] +[titan] 2025-10-05 08:31:36,508 - root - INFO - step: 16205 loss: 2.2864 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0206 +[titan] 2025-10-05 08:31:36,508 - root - INFO - lr: 3.4511e-05 gnorm: 1.04 [ 9:57:26<14:37:16] +[titan] 2025-10-05 08:31:47,457 - root - INFO - step: 16210 loss: 2.2341 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9739 +[titan] 2025-10-05 08:31:47,457 - root - INFO - lr: 3.4503e-05 gnorm: 1.09 [ 9:57:37<14:37:05] +[titan] 2025-10-05 08:31:58,346 - root - INFO - step: 16215 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 08:31:58,346 - root - INFO - lr: 3.4494e-05 gnorm: 1.08 [ 9:57:48<14:36:54] +[titan] 2025-10-05 08:32:09,203 - root - INFO - step: 16220 loss: 2.1804 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 08:32:09,204 - root - INFO - lr: 3.4486e-05 gnorm: 1.07 [ 9:57:59<14:36:42] +[titan] 2025-10-05 08:32:20,094 - root - INFO - step: 16225 loss: 2.1974 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 08:32:20,094 - root - INFO - lr: 3.4477e-05 gnorm: 1.07 [ 9:58:10<14:36:31] +[titan] 2025-10-05 08:32:30,976 - root - INFO - step: 16230 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9488 +[titan] 2025-10-05 08:32:30,977 - root - INFO - lr: 3.4469e-05 gnorm: 1.05 [ 9:58:21<14:36:20] +[titan] 2025-10-05 08:32:41,910 - root - INFO - step: 16235 loss: 2.2424 memory: 118.84GiB(85.28%) tps: 29,970 tflops: 415.79 mfu: 42.04% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9800 +[titan] 2025-10-05 08:32:41,910 - root - INFO - lr: 3.4460e-05 gnorm: 1.06 [ 9:58:32<14:36:08] +[titan] 2025-10-05 08:32:52,835 - root - INFO - step: 16240 loss: 2.1658 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9139 +[titan] 2025-10-05 08:32:52,835 - root - INFO - lr: 3.4452e-05 gnorm: 1.04 [ 9:58:43<14:35:57] +[titan] 2025-10-05 08:33:03,725 - root - INFO - step: 16245 loss: 2.2254 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:33:03,725 - root - INFO - lr: 3.4443e-05 gnorm: 1.08 [ 9:58:54<14:35:46] +[titan] 2025-10-05 08:33:12,442 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:33:14,633 - root - INFO - step: 16250 loss: 2.2316 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9717 +[titan] 2025-10-05 08:33:14,634 - root - INFO - lr: 3.4435e-05 gnorm: 1.10 [ 9:59:05<14:35:35] +[titan] 2025-10-05 08:33:25,534 - root - INFO - step: 16255 loss: 2.3076 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2691 global_avg_mtp_loss: 2.0385 +[titan] 2025-10-05 08:33:25,534 - root - INFO - lr: 3.4426e-05 gnorm: 1.10 [ 9:59:15<14:35:23] +[titan] 2025-10-05 08:33:36,432 - root - INFO - step: 16260 loss: 2.2507 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9884 +[titan] 2025-10-05 08:33:36,433 - root - INFO - lr: 3.4418e-05 gnorm: 1.13 [ 9:59:26<14:35:12] +[titan] 2025-10-05 08:33:47,313 - root - INFO - step: 16265 loss: 2.2475 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2626 global_avg_mtp_loss: 1.9849 +[titan] 2025-10-05 08:33:47,313 - root - INFO - lr: 3.4409e-05 gnorm: 1.10 [ 9:59:37<14:35:01] +[titan] 2025-10-05 08:33:58,157 - root - INFO - step: 16270 loss: 2.2432 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2622 global_avg_mtp_loss: 1.9809 +[titan] 2025-10-05 08:33:58,157 - root - INFO - lr: 3.4401e-05 gnorm: 1.09 [ 9:59:48<14:34:49] +[titan] 2025-10-05 08:34:09,059 - root - INFO - step: 16275 loss: 2.2042 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9481 +[titan] 2025-10-05 08:34:09,059 - root - INFO - lr: 3.4392e-05 gnorm: 1.05 [ 9:59:59<14:34:38] +[titan] 2025-10-05 08:34:19,912 - root - INFO - step: 16280 loss: 2.2416 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:34:19,912 - root - INFO - lr: 3.4384e-05 gnorm: 1.07 [10:00:10<14:34:27] +[titan] 2025-10-05 08:34:30,777 - root - INFO - step: 16285 loss: 2.1576 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:34:30,777 - root - INFO - lr: 3.4375e-05 gnorm: 1.09 [10:00:21<14:34:15] +[titan] 2025-10-05 08:34:41,653 - root - INFO - step: 16290 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 08:34:41,653 - root - INFO - lr: 3.4367e-05 gnorm: 1.05 [10:00:32<14:34:04] +[titan] 2025-10-05 08:34:52,516 - root - INFO - step: 16295 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9552 +[titan] 2025-10-05 08:34:52,516 - root - INFO - lr: 3.4358e-05 gnorm: 1.05 [10:00:42<14:33:53] +[titan] 2025-10-05 08:35:01,183 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:35:03,369 - root - INFO - step: 16300 loss: 2.2756 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0097 +[titan] 2025-10-05 08:35:03,369 - root - INFO - lr: 3.4350e-05 gnorm: 1.13 [10:00:53<14:33:41] +[titan] 2025-10-05 08:35:14,258 - root - INFO - step: 16305 loss: 2.2158 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:35:14,258 - root - INFO - lr: 3.4341e-05 gnorm: 1.10 [10:01:04<14:33:30] +[titan] 2025-10-05 08:35:25,117 - root - INFO - step: 16310 loss: 2.2039 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 08:35:25,117 - root - INFO - lr: 3.4333e-05 gnorm: 1.07 [10:01:15<14:33:19] +[titan] 2025-10-05 08:35:35,923 - root - INFO - step: 16315 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:35:35,923 - root - INFO - lr: 3.4324e-05 gnorm: 1.06 [10:01:26<14:33:07] +[titan] 2025-10-05 08:35:46,803 - root - INFO - step: 16320 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9004 +[titan] 2025-10-05 08:35:46,803 - root - INFO - lr: 3.4316e-05 gnorm: 1.06 [10:01:37<14:32:56] +[titan] 2025-10-05 08:35:57,651 - root - INFO - step: 16325 loss: 2.2716 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2640 global_avg_mtp_loss: 2.0076 +[titan] 2025-10-05 08:35:57,651 - root - INFO - lr: 3.4307e-05 gnorm: 1.08 [10:01:48<14:32:44] +[titan] 2025-10-05 08:36:08,474 - root - INFO - step: 16330 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8746 +[titan] 2025-10-05 08:36:08,474 - root - INFO - lr: 3.4299e-05 gnorm: 1.05 [10:01:58<14:32:33] +[titan] 2025-10-05 08:36:19,326 - root - INFO - step: 16335 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 08:36:19,326 - root - INFO - lr: 3.4290e-05 gnorm: 1.05 [10:02:09<14:32:22] +[titan] 2025-10-05 08:36:30,202 - root - INFO - step: 16340 loss: 2.2109 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9528 +[titan] 2025-10-05 08:36:30,202 - root - INFO - lr: 3.4282e-05 gnorm: 1.09 [10:02:20<14:32:10] +[titan] 2025-10-05 08:36:41,056 - root - INFO - step: 16345 loss: 2.2287 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9688 +[titan] 2025-10-05 08:36:41,056 - root - INFO - lr: 3.4273e-05 gnorm: 1.09 [10:02:31<14:31:59] +[titan] 2025-10-05 08:36:49,743 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:36:51,933 - root - INFO - step: 16350 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 08:36:51,933 - root - INFO - lr: 3.4265e-05 gnorm: 1.08 [10:02:42<14:31:48] +[titan] 2025-10-05 08:37:02,815 - root - INFO - step: 16355 loss: 2.2845 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2671 global_avg_mtp_loss: 2.0175 +[titan] 2025-10-05 08:37:02,815 - root - INFO - lr: 3.4256e-05 gnorm: 1.09 [10:02:53<14:31:36] +[titan] 2025-10-05 08:37:13,670 - root - INFO - step: 16360 loss: 2.1862 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9310 +[titan] 2025-10-05 08:37:13,671 - root - INFO - lr: 3.4248e-05 gnorm: 1.04 [10:03:04<14:31:25] +[titan] 2025-10-05 08:37:24,518 - root - INFO - step: 16365 loss: 2.2530 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9904 +[titan] 2025-10-05 08:37:24,518 - root - INFO - lr: 3.4239e-05 gnorm: 1.12 [10:03:14<14:31:14] +[titan] 2025-10-05 08:37:35,400 - root - INFO - step: 16370 loss: 2.2595 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2633 global_avg_mtp_loss: 1.9962 +[titan] 2025-10-05 08:37:35,401 - root - INFO - lr: 3.4231e-05 gnorm: 1.08 [10:03:25<14:31:02] +[titan] 2025-10-05 08:37:46,321 - root - INFO - step: 16375 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 08:37:46,321 - root - INFO - lr: 3.4222e-05 gnorm: 1.06 [10:03:36<14:30:51] +[titan] 2025-10-05 08:37:57,173 - root - INFO - step: 16380 loss: 2.2402 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9783 +[titan] 2025-10-05 08:37:57,173 - root - INFO - lr: 3.4214e-05 gnorm: 1.11 [10:03:47<14:30:40] +[titan] 2025-10-05 08:38:06,121 - root - INFO - Dumping profiler traces at step 16384 +[titan] 2025-10-05 08:38:06,160 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:38:08,373 - root - INFO - step: 16385 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 29,257 tflops: 405.90 mfu: 41.04% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9247 +[titan] 2025-10-05 08:38:08,373 - root - INFO - lr: 3.4205e-05 gnorm: 1.11 [10:03:58<14:30:29] +[titan] 2025-10-05 08:38:19,239 - root - INFO - step: 16390 loss: 2.2560 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2631 global_avg_mtp_loss: 1.9929 +[titan] 2025-10-05 08:38:19,239 - root - INFO - lr: 3.4197e-05 gnorm: 1.08 [10:04:09<14:30:18] +[titan] 2025-10-05 08:38:30,091 - root - INFO - step: 16395 loss: 2.2370 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2617 global_avg_mtp_loss: 1.9752 +[titan] 2025-10-05 08:38:30,092 - root - INFO - lr: 3.4188e-05 gnorm: 1.06 [10:04:20<14:30:06] +[titan] 2025-10-05 08:38:38,778 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:38:41,007 - root - INFO - step: 16400 loss: 2.1921 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 08:38:41,007 - root - INFO - lr: 3.4180e-05 gnorm: 1.12 [10:04:31<14:29:55] +[titan] 2025-10-05 08:38:51,898 - root - INFO - step: 16405 loss: 2.2523 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9902 +[titan] 2025-10-05 08:38:51,898 - root - INFO - lr: 3.4171e-05 gnorm: 1.10 [10:04:42<14:29:44] +[titan] 2025-10-05 08:39:02,751 - root - INFO - step: 16410 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 08:39:02,751 - root - INFO - lr: 3.4163e-05 gnorm: 1.10 [10:04:53<14:29:32] +[titan] 2025-10-05 08:39:13,601 - root - INFO - step: 16415 loss: 2.1622 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 08:39:13,601 - root - INFO - lr: 3.4154e-05 gnorm: 1.06 [10:05:03<14:29:21] +[titan] 2025-10-05 08:39:24,471 - root - INFO - step: 16420 loss: 2.2162 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9579 +[titan] 2025-10-05 08:39:24,471 - root - INFO - lr: 3.4146e-05 gnorm: 1.06 [10:05:14<14:29:10] +[titan] 2025-10-05 08:39:35,332 - root - INFO - step: 16425 loss: 2.1912 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9364 +[titan] 2025-10-05 08:39:35,333 - root - INFO - lr: 3.4137e-05 gnorm: 1.06 [10:05:25<14:28:58] +[titan] 2025-10-05 08:39:46,223 - root - INFO - step: 16430 loss: 2.2211 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2585 global_avg_mtp_loss: 1.9626 +[titan] 2025-10-05 08:39:46,223 - root - INFO - lr: 3.4129e-05 gnorm: 1.07 [10:05:36<14:28:47] +[titan] 2025-10-05 08:39:57,116 - root - INFO - step: 16435 loss: 2.2229 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9601 +[titan] 2025-10-05 08:39:57,116 - root - INFO - lr: 3.4120e-05 gnorm: 1.10 [10:05:47<14:28:36] +[titan] 2025-10-05 08:40:07,956 - root - INFO - step: 16440 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9241 +[titan] 2025-10-05 08:40:07,956 - root - INFO - lr: 3.4111e-05 gnorm: 1.06 [10:05:58<14:28:24] +[titan] 2025-10-05 08:40:18,791 - root - INFO - step: 16445 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 08:40:18,792 - root - INFO - lr: 3.4103e-05 gnorm: 1.08 [10:06:09<14:28:13] +[titan] 2025-10-05 08:40:27,445 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:40:29,625 - root - INFO - step: 16450 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9165 +[titan] 2025-10-05 08:40:29,626 - root - INFO - lr: 3.4094e-05 gnorm: 1.09 [10:06:19<14:28:01] +[titan] 2025-10-05 08:40:40,476 - root - INFO - step: 16455 loss: 2.1561 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 08:40:40,476 - root - INFO - lr: 3.4086e-05 gnorm: 1.05 [10:06:30<14:27:50] +[titan] 2025-10-05 08:40:51,351 - root - INFO - step: 16460 loss: 2.1510 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.9013 +[titan] 2025-10-05 08:40:51,351 - root - INFO - lr: 3.4077e-05 gnorm: 1.06 [10:06:41<14:27:39] +[titan] 2025-10-05 08:41:02,252 - root - INFO - step: 16465 loss: 2.2687 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0041 +[titan] 2025-10-05 08:41:02,252 - root - INFO - lr: 3.4069e-05 gnorm: 1.05 [10:06:52<14:27:28] +[titan] 2025-10-05 08:41:13,112 - root - INFO - step: 16470 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.33% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9462 +[titan] 2025-10-05 08:41:13,113 - root - INFO - lr: 3.4060e-05 gnorm: 1.10 [10:07:03<14:27:16] +[titan] 2025-10-05 08:41:23,980 - root - INFO - step: 16475 loss: 2.2132 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:41:23,981 - root - INFO - lr: 3.4052e-05 gnorm: 1.05 [10:07:14<14:27:05] +[titan] 2025-10-05 08:41:34,850 - root - INFO - step: 16480 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 08:41:34,850 - root - INFO - lr: 3.4043e-05 gnorm: 1.07 [10:07:25<14:26:54] +[titan] 2025-10-05 08:41:45,728 - root - INFO - step: 16485 loss: 2.1837 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 08:41:45,728 - root - INFO - lr: 3.4035e-05 gnorm: 1.10 [10:07:36<14:26:42] +[titan] 2025-10-05 08:41:56,603 - root - INFO - step: 16490 loss: 2.2265 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 08:41:56,603 - root - INFO - lr: 3.4026e-05 gnorm: 1.08 [10:07:46<14:26:31] +[titan] 2025-10-05 08:42:07,468 - root - INFO - step: 16495 loss: 2.2288 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9685 +[titan] 2025-10-05 08:42:07,468 - root - INFO - lr: 3.4018e-05 gnorm: 1.10 [10:07:57<14:26:20] +[titan] 2025-10-05 08:42:16,187 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:42:18,373 - root - INFO - step: 16500 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2629 global_avg_mtp_loss: 1.9920 +[titan] 2025-10-05 08:42:18,373 - root - INFO - lr: 3.4009e-05 gnorm: 1.10 [10:08:08<14:26:08] +[titan] 2025-10-05 08:42:29,248 - root - INFO - step: 16505 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 08:42:29,248 - root - INFO - lr: 3.4000e-05 gnorm: 1.06 [10:08:19<14:25:57] +[titan] 2025-10-05 08:42:40,112 - root - INFO - step: 16510 loss: 2.1951 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9387 +[titan] 2025-10-05 08:42:40,112 - root - INFO - lr: 3.3992e-05 gnorm: 1.06 [10:08:30<14:25:46] +[titan] 2025-10-05 08:42:51,000 - root - INFO - step: 16515 loss: 2.2338 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9731 +[titan] 2025-10-05 08:42:51,000 - root - INFO - lr: 3.3983e-05 gnorm: 1.06 [10:08:41<14:25:34] +[titan] 2025-10-05 08:43:01,864 - root - INFO - step: 16520 loss: 2.2392 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2647 global_avg_mtp_loss: 1.9746 +[titan] 2025-10-05 08:43:01,864 - root - INFO - lr: 3.3975e-05 gnorm: 1.07 [10:08:52<14:25:23] +[titan] 2025-10-05 08:43:12,727 - root - INFO - step: 16525 loss: 2.2902 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2699 global_avg_mtp_loss: 2.0203 +[titan] 2025-10-05 08:43:12,727 - root - INFO - lr: 3.3966e-05 gnorm: 1.13 [10:09:03<14:25:12] +[titan] 2025-10-05 08:43:23,632 - root - INFO - step: 16530 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 08:43:23,632 - root - INFO - lr: 3.3958e-05 gnorm: 1.08 [10:09:13<14:25:00] +[titan] 2025-10-05 08:43:34,515 - root - INFO - step: 16535 loss: 2.2340 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9726 +[titan] 2025-10-05 08:43:34,515 - root - INFO - lr: 3.3949e-05 gnorm: 1.08 [10:09:24<14:24:49] +[titan] 2025-10-05 08:43:45,404 - root - INFO - step: 16540 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 08:43:45,404 - root - INFO - lr: 3.3941e-05 gnorm: 1.14 [10:09:35<14:24:38] +[titan] 2025-10-05 08:43:56,319 - root - INFO - step: 16545 loss: 2.1857 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 08:43:56,319 - root - INFO - lr: 3.3932e-05 gnorm: 1.07 [10:09:46<14:24:27] +[titan] 2025-10-05 08:44:05,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:44:07,197 - root - INFO - step: 16550 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 08:44:07,198 - root - INFO - lr: 3.3924e-05 gnorm: 1.05 [10:09:57<14:24:15] +[titan] 2025-10-05 08:44:18,066 - root - INFO - step: 16555 loss: 2.2226 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9632 +[titan] 2025-10-05 08:44:18,066 - root - INFO - lr: 3.3915e-05 gnorm: 1.09 [10:10:08<14:24:04] +[titan] 2025-10-05 08:44:28,972 - root - INFO - step: 16560 loss: 2.1751 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 08:44:28,972 - root - INFO - lr: 3.3906e-05 gnorm: 1.05 [10:10:19<14:23:53] +[titan] 2025-10-05 08:44:39,817 - root - INFO - step: 16565 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9362 +[titan] 2025-10-05 08:44:39,817 - root - INFO - lr: 3.3898e-05 gnorm: 1.07 [10:10:30<14:23:41] +[titan] 2025-10-05 08:44:50,691 - root - INFO - step: 16570 loss: 2.1798 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9260 +[titan] 2025-10-05 08:44:50,691 - root - INFO - lr: 3.3889e-05 gnorm: 1.08 [10:10:41<14:23:30] +[titan] 2025-10-05 08:45:01,549 - root - INFO - step: 16575 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9819 +[titan] 2025-10-05 08:45:01,549 - root - INFO - lr: 3.3881e-05 gnorm: 1.05 [10:10:51<14:23:19] +[titan] 2025-10-05 08:45:12,413 - root - INFO - step: 16580 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 08:45:12,413 - root - INFO - lr: 3.3872e-05 gnorm: 1.08 [10:11:02<14:23:07] +[titan] 2025-10-05 08:45:23,289 - root - INFO - step: 16585 loss: 2.1742 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9208 +[titan] 2025-10-05 08:45:23,289 - root - INFO - lr: 3.3864e-05 gnorm: 1.07 [10:11:13<14:22:56] +[titan] 2025-10-05 08:45:34,149 - root - INFO - step: 16590 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 08:45:34,149 - root - INFO - lr: 3.3855e-05 gnorm: 1.11 [10:11:24<14:22:45] +[titan] 2025-10-05 08:45:45,091 - root - INFO - step: 16595 loss: 2.2134 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 08:45:45,091 - root - INFO - lr: 3.3847e-05 gnorm: 1.06 [10:11:35<14:22:33] +[titan] 2025-10-05 08:45:53,799 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:45:55,993 - root - INFO - step: 16600 loss: 2.1689 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9160 +[titan] 2025-10-05 08:45:55,993 - root - INFO - lr: 3.3838e-05 gnorm: 1.04 [10:11:46<14:22:22] +[titan] 2025-10-05 08:46:06,866 - root - INFO - step: 16605 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:46:06,866 - root - INFO - lr: 3.3829e-05 gnorm: 1.04 [10:11:57<14:22:11] +[titan] 2025-10-05 08:46:17,754 - root - INFO - step: 16610 loss: 2.2141 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 08:46:17,755 - root - INFO - lr: 3.3821e-05 gnorm: 1.09 [10:12:08<14:22:00] +[titan] 2025-10-05 08:46:28,629 - root - INFO - step: 16615 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9330 +[titan] 2025-10-05 08:46:28,629 - root - INFO - lr: 3.3812e-05 gnorm: 1.09 [10:12:18<14:21:48] +[titan] 2025-10-05 08:46:39,510 - root - INFO - step: 16620 loss: 2.1330 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 08:46:39,510 - root - INFO - lr: 3.3804e-05 gnorm: 1.07 [10:12:29<14:21:37] +[titan] 2025-10-05 08:46:50,420 - root - INFO - step: 16625 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9610 +[titan] 2025-10-05 08:46:50,420 - root - INFO - lr: 3.3795e-05 gnorm: 1.09 [10:12:40<14:21:26] +[titan] 2025-10-05 08:47:01,324 - root - INFO - step: 16630 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 08:47:01,324 - root - INFO - lr: 3.3787e-05 gnorm: 1.10 [10:12:51<14:21:14] +[titan] 2025-10-05 08:47:12,217 - root - INFO - step: 16635 loss: 2.1195 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 08:47:12,217 - root - INFO - lr: 3.3778e-05 gnorm: 1.09 [10:13:02<14:21:03] +[titan] 2025-10-05 08:47:23,110 - root - INFO - step: 16640 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:47:23,110 - root - INFO - lr: 3.3769e-05 gnorm: 1.12 [10:13:13<14:20:52] +[titan] 2025-10-05 08:47:34,010 - root - INFO - step: 16645 loss: 2.1744 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 08:47:34,010 - root - INFO - lr: 3.3761e-05 gnorm: 1.10 [10:13:24<14:20:41] +[titan] 2025-10-05 08:47:42,720 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:47:44,910 - root - INFO - step: 16650 loss: 2.1803 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 08:47:44,910 - root - INFO - lr: 3.3752e-05 gnorm: 1.11 [10:13:35<14:20:29] +[titan] 2025-10-05 08:47:55,812 - root - INFO - step: 16655 loss: 2.2429 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9822 +[titan] 2025-10-05 08:47:55,812 - root - INFO - lr: 3.3744e-05 gnorm: 1.10 [10:13:46<14:20:18] +[titan] 2025-10-05 08:48:06,738 - root - INFO - step: 16660 loss: 2.2777 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2657 global_avg_mtp_loss: 2.0120 +[titan] 2025-10-05 08:48:06,738 - root - INFO - lr: 3.3735e-05 gnorm: 1.11 [10:13:57<14:20:07] +[titan] 2025-10-05 08:48:17,635 - root - INFO - step: 16665 loss: 2.2284 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9686 +[titan] 2025-10-05 08:48:17,635 - root - INFO - lr: 3.3727e-05 gnorm: 1.10 [10:14:07<14:19:56] +[titan] 2025-10-05 08:48:28,518 - root - INFO - step: 16670 loss: 2.2203 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9614 +[titan] 2025-10-05 08:48:28,518 - root - INFO - lr: 3.3718e-05 gnorm: 1.10 [10:14:18<14:19:44] +[titan] 2025-10-05 08:48:39,418 - root - INFO - step: 16675 loss: 2.2253 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2593 global_avg_mtp_loss: 1.9660 +[titan] 2025-10-05 08:48:39,419 - root - INFO - lr: 3.3709e-05 gnorm: 1.14 [10:14:29<14:19:33] +[titan] 2025-10-05 08:48:50,307 - root - INFO - step: 16680 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9942 +[titan] 2025-10-05 08:48:50,307 - root - INFO - lr: 3.3701e-05 gnorm: 1.09 [10:14:40<14:19:22] +[titan] 2025-10-05 08:49:01,231 - root - INFO - step: 16685 loss: 2.2071 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.18 mfu: 42.08% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 08:49:01,231 - root - INFO - lr: 3.3692e-05 gnorm: 1.06 [10:14:51<14:19:10] +[titan] 2025-10-05 08:49:12,142 - root - INFO - step: 16690 loss: 2.2505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9875 +[titan] 2025-10-05 08:49:12,142 - root - INFO - lr: 3.3684e-05 gnorm: 1.05 [10:15:02<14:18:59] +[titan] 2025-10-05 08:49:23,035 - root - INFO - step: 16695 loss: 2.2662 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2653 global_avg_mtp_loss: 2.0009 +[titan] 2025-10-05 08:49:23,035 - root - INFO - lr: 3.3675e-05 gnorm: 1.04 [10:15:13<14:18:48] +[titan] 2025-10-05 08:49:31,750 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:49:33,935 - root - INFO - step: 16700 loss: 2.1213 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8753 +[titan] 2025-10-05 08:49:33,935 - root - INFO - lr: 3.3667e-05 gnorm: 1.05 [10:15:24<14:18:37] +[titan] 2025-10-05 08:49:44,821 - root - INFO - step: 16705 loss: 2.2305 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 08:49:44,821 - root - INFO - lr: 3.3658e-05 gnorm: 1.04 [10:15:35<14:18:25] +[titan] 2025-10-05 08:49:55,770 - root - INFO - step: 16710 loss: 2.1830 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9279 +[titan] 2025-10-05 08:49:55,770 - root - INFO - lr: 3.3649e-05 gnorm: 1.06 [10:15:46<14:18:14] +[titan] 2025-10-05 08:50:06,646 - root - INFO - step: 16715 loss: 2.1474 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 08:50:06,646 - root - INFO - lr: 3.3641e-05 gnorm: 1.05 [10:15:56<14:18:03] +[titan] 2025-10-05 08:50:17,562 - root - INFO - step: 16720 loss: 2.2478 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9859 +[titan] 2025-10-05 08:50:17,562 - root - INFO - lr: 3.3632e-05 gnorm: 1.08 [10:16:07<14:17:52] +[titan] 2025-10-05 08:50:28,447 - root - INFO - step: 16725 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 08:50:28,447 - root - INFO - lr: 3.3624e-05 gnorm: 1.03 [10:16:18<14:17:40] +[titan] 2025-10-05 08:50:39,327 - root - INFO - step: 16730 loss: 2.2471 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9857 +[titan] 2025-10-05 08:50:39,327 - root - INFO - lr: 3.3615e-05 gnorm: 1.07 [10:16:29<14:17:29] +[titan] 2025-10-05 08:50:50,218 - root - INFO - step: 16735 loss: 2.1919 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 08:50:50,218 - root - INFO - lr: 3.3606e-05 gnorm: 1.08 [10:16:40<14:17:18] +[titan] 2025-10-05 08:51:01,116 - root - INFO - step: 16740 loss: 2.1893 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9342 +[titan] 2025-10-05 08:51:01,116 - root - INFO - lr: 3.3598e-05 gnorm: 1.01 [10:16:51<14:17:06] +[titan] 2025-10-05 08:51:11,988 - root - INFO - step: 16745 loss: 2.1719 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 08:51:11,988 - root - INFO - lr: 3.3589e-05 gnorm: 1.09 [10:17:02<14:16:55] +[titan] 2025-10-05 08:51:20,683 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:51:22,867 - root - INFO - step: 16750 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2582 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:51:22,867 - root - INFO - lr: 3.3581e-05 gnorm: 1.07 [10:17:13<14:16:44] +[titan] 2025-10-05 08:51:33,766 - root - INFO - step: 16755 loss: 2.1698 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 08:51:33,766 - root - INFO - lr: 3.3572e-05 gnorm: 1.08 [10:17:24<14:16:33] +[titan] 2025-10-05 08:51:44,647 - root - INFO - step: 16760 loss: 2.1888 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 08:51:44,647 - root - INFO - lr: 3.3563e-05 gnorm: 1.07 [10:17:34<14:16:21] +[titan] 2025-10-05 08:51:55,539 - root - INFO - step: 16765 loss: 2.2245 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 08:51:55,539 - root - INFO - lr: 3.3555e-05 gnorm: 1.08 [10:17:45<14:16:10] +[titan] 2025-10-05 08:52:06,452 - root - INFO - step: 16770 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.2636 global_avg_mtp_loss: 1.9905 +[titan] 2025-10-05 08:52:06,452 - root - INFO - lr: 3.3546e-05 gnorm: 1.10 [10:17:56<14:15:59] +[titan] 2025-10-05 08:52:17,344 - root - INFO - step: 16775 loss: 2.2357 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2619 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 08:52:17,344 - root - INFO - lr: 3.3538e-05 gnorm: 1.12 [10:18:07<14:15:47] +[titan] 2025-10-05 08:52:28,243 - root - INFO - step: 16780 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 08:52:28,243 - root - INFO - lr: 3.3529e-05 gnorm: 1.05 [10:18:18<14:15:36] +[titan] 2025-10-05 08:52:39,158 - root - INFO - step: 16785 loss: 2.2713 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0062 +[titan] 2025-10-05 08:52:39,158 - root - INFO - lr: 3.3520e-05 gnorm: 1.08 [10:18:29<14:15:25] +[titan] 2025-10-05 08:52:50,027 - root - INFO - step: 16790 loss: 2.3254 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2715 global_avg_mtp_loss: 2.0539 +[titan] 2025-10-05 08:52:50,027 - root - INFO - lr: 3.3512e-05 gnorm: 1.08 [10:18:40<14:15:14] +[titan] 2025-10-05 08:53:00,972 - root - INFO - step: 16795 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 29,940 tflops: 415.37 mfu: 42.00% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8815 +[titan] 2025-10-05 08:53:00,972 - root - INFO - lr: 3.3503e-05 gnorm: 1.05 [10:18:51<14:15:02] +[titan] 2025-10-05 08:53:09,655 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:53:11,847 - root - INFO - step: 16800 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9137 +[titan] 2025-10-05 08:53:11,847 - root - INFO - lr: 3.3495e-05 gnorm: 1.04 [10:19:02<14:14:51] +[titan] 2025-10-05 08:53:22,743 - root - INFO - step: 16805 loss: 2.2778 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 2.0130 +[titan] 2025-10-05 08:53:22,744 - root - INFO - lr: 3.3486e-05 gnorm: 1.06 [10:19:13<14:14:40] +[titan] 2025-10-05 08:53:33,623 - root - INFO - step: 16810 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9335 +[titan] 2025-10-05 08:53:33,623 - root - INFO - lr: 3.3477e-05 gnorm: 1.10 [10:19:23<14:14:29] +[titan] 2025-10-05 08:53:44,493 - root - INFO - step: 16815 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 08:53:44,493 - root - INFO - lr: 3.3469e-05 gnorm: 1.08 [10:19:34<14:14:17] +[titan] 2025-10-05 08:53:55,405 - root - INFO - step: 16820 loss: 2.3161 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2704 global_avg_mtp_loss: 2.0457 +[titan] 2025-10-05 08:53:55,405 - root - INFO - lr: 3.3460e-05 gnorm: 1.05 [10:19:45<14:14:06] +[titan] 2025-10-05 08:54:06,325 - root - INFO - step: 16825 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9445 +[titan] 2025-10-05 08:54:06,325 - root - INFO - lr: 3.3452e-05 gnorm: 1.06 [10:19:56<14:13:55] +[titan] 2025-10-05 08:54:17,199 - root - INFO - step: 16830 loss: 2.2758 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0112 +[titan] 2025-10-05 08:54:17,199 - root - INFO - lr: 3.3443e-05 gnorm: 1.14 [10:20:07<14:13:43] +[titan] 2025-10-05 08:54:28,086 - root - INFO - step: 16835 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 08:54:28,087 - root - INFO - lr: 3.3434e-05 gnorm: 1.11 [10:20:18<14:13:32] +[titan] 2025-10-05 08:54:38,979 - root - INFO - step: 16840 loss: 2.2159 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9570 +[titan] 2025-10-05 08:54:38,979 - root - INFO - lr: 3.3426e-05 gnorm: 1.10 [10:20:29<14:13:21] +[titan] 2025-10-05 08:54:49,879 - root - INFO - step: 16845 loss: 2.2348 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2607 global_avg_mtp_loss: 1.9741 +[titan] 2025-10-05 08:54:49,879 - root - INFO - lr: 3.3417e-05 gnorm: 1.14 [10:20:40<14:13:10] +[titan] 2025-10-05 08:54:58,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:55:00,873 - root - INFO - step: 16850 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 29,806 tflops: 413.51 mfu: 41.81% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 08:55:00,873 - root - INFO - lr: 3.3409e-05 gnorm: 1.06 [10:20:51<14:12:58] +[titan] 2025-10-05 08:55:11,763 - root - INFO - step: 16855 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9506 +[titan] 2025-10-05 08:55:11,763 - root - INFO - lr: 3.3400e-05 gnorm: 1.10 [10:21:02<14:12:47] +[titan] 2025-10-05 08:55:22,662 - root - INFO - step: 16860 loss: 2.1930 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:55:22,662 - root - INFO - lr: 3.3391e-05 gnorm: 1.05 [10:21:12<14:12:36] +[titan] 2025-10-05 08:55:33,543 - root - INFO - step: 16865 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8902 +[titan] 2025-10-05 08:55:33,543 - root - INFO - lr: 3.3383e-05 gnorm: 1.08 [10:21:23<14:12:25] +[titan] 2025-10-05 08:55:44,433 - root - INFO - step: 16870 loss: 2.2119 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9546 +[titan] 2025-10-05 08:55:44,433 - root - INFO - lr: 3.3374e-05 gnorm: 1.08 [10:21:34<14:12:13] +[titan] 2025-10-05 08:55:55,318 - root - INFO - step: 16875 loss: 2.2256 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9654 +[titan] 2025-10-05 08:55:55,319 - root - INFO - lr: 3.3366e-05 gnorm: 1.09 [10:21:45<14:12:02] +[titan] 2025-10-05 08:56:06,283 - root - INFO - step: 16880 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 29,887 tflops: 414.63 mfu: 41.92% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 08:56:06,283 - root - INFO - lr: 3.3357e-05 gnorm: 1.08 [10:21:56<14:11:51] +[titan] 2025-10-05 08:56:17,168 - root - INFO - step: 16885 loss: 2.2361 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9760 +[titan] 2025-10-05 08:56:17,168 - root - INFO - lr: 3.3348e-05 gnorm: 1.07 [10:22:07<14:11:40] +[titan] 2025-10-05 08:56:28,070 - root - INFO - step: 16890 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 08:56:28,070 - root - INFO - lr: 3.3340e-05 gnorm: 1.03 [10:22:18<14:11:28] +[titan] 2025-10-05 08:56:39,053 - root - INFO - step: 16895 loss: 2.2559 memory: 118.84GiB(85.28%) tps: 29,836 tflops: 413.93 mfu: 41.85% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 08:56:39,054 - root - INFO - lr: 3.3331e-05 gnorm: 1.10 [10:22:29<14:11:17] +[titan] 2025-10-05 08:56:41,415 - root - INFO - Dumping profiler traces at step 16896 +[titan] 2025-10-05 08:56:41,453 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 08:56:47,993 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:56:50,179 - root - INFO - step: 16900 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 29,452 tflops: 408.61 mfu: 41.32% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9188 +[titan] 2025-10-05 08:56:50,180 - root - INFO - lr: 3.3322e-05 gnorm: 1.02 [10:22:40<14:11:06] +[titan] 2025-10-05 08:57:01,083 - root - INFO - step: 16905 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9226 +[titan] 2025-10-05 08:57:01,084 - root - INFO - lr: 3.3314e-05 gnorm: 1.15 [10:22:51<14:10:55] +[titan] 2025-10-05 08:57:11,941 - root - INFO - step: 16910 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9271 +[titan] 2025-10-05 08:57:11,942 - root - INFO - lr: 3.3305e-05 gnorm: 1.04 [10:23:02<14:10:44] +[titan] 2025-10-05 08:57:22,821 - root - INFO - step: 16915 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 08:57:22,822 - root - INFO - lr: 3.3297e-05 gnorm: 1.10 [10:23:13<14:10:32] +[titan] 2025-10-05 08:57:33,708 - root - INFO - step: 16920 loss: 2.1768 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9231 +[titan] 2025-10-05 08:57:33,708 - root - INFO - lr: 3.3288e-05 gnorm: 1.07 [10:23:24<14:10:21] +[titan] 2025-10-05 08:57:44,586 - root - INFO - step: 16925 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 08:57:44,586 - root - INFO - lr: 3.3279e-05 gnorm: 1.10 [10:23:34<14:10:10] +[titan] 2025-10-05 08:57:55,466 - root - INFO - step: 16930 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 08:57:55,466 - root - INFO - lr: 3.3271e-05 gnorm: 1.08 [10:23:45<14:09:58] +[titan] 2025-10-05 08:58:06,365 - root - INFO - step: 16935 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9721 +[titan] 2025-10-05 08:58:06,365 - root - INFO - lr: 3.3262e-05 gnorm: 1.09 [10:23:56<14:09:47] +[titan] 2025-10-05 08:58:17,240 - root - INFO - step: 16940 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9465 +[titan] 2025-10-05 08:58:17,240 - root - INFO - lr: 3.3253e-05 gnorm: 1.07 [10:24:07<14:09:36] +[titan] 2025-10-05 08:58:28,143 - root - INFO - step: 16945 loss: 2.2441 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9826 +[titan] 2025-10-05 08:58:28,143 - root - INFO - lr: 3.3245e-05 gnorm: 1.07 [10:24:18<14:09:25] +[titan] 2025-10-05 08:58:36,824 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 08:58:39,030 - root - INFO - step: 16950 loss: 2.2032 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 08:58:39,030 - root - INFO - lr: 3.3236e-05 gnorm: 1.07 [10:24:29<14:09:13] +[titan] 2025-10-05 08:58:49,927 - root - INFO - step: 16955 loss: 2.2276 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9681 +[titan] 2025-10-05 08:58:49,928 - root - INFO - lr: 3.3228e-05 gnorm: 1.13 [10:24:40<14:09:02] +[titan] 2025-10-05 08:59:00,813 - root - INFO - step: 16960 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 08:59:00,813 - root - INFO - lr: 3.3219e-05 gnorm: 1.07 [10:24:51<14:08:51] +[titan] 2025-10-05 08:59:11,725 - root - INFO - step: 16965 loss: 2.1770 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.63 mfu: 42.13% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9229 +[titan] 2025-10-05 08:59:11,725 - root - INFO - lr: 3.3210e-05 gnorm: 1.09 [10:25:02<14:08:40] +[titan] 2025-10-05 08:59:22,600 - root - INFO - step: 16970 loss: 2.1896 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 08:59:22,601 - root - INFO - lr: 3.3202e-05 gnorm: 1.13 [10:25:12<14:08:28] +[titan] 2025-10-05 08:59:33,459 - root - INFO - step: 16975 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9184 +[titan] 2025-10-05 08:59:33,460 - root - INFO - lr: 3.3193e-05 gnorm: 1.10 [10:25:23<14:08:17] +[titan] 2025-10-05 08:59:44,382 - root - INFO - step: 16980 loss: 2.2345 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9735 +[titan] 2025-10-05 08:59:44,382 - root - INFO - lr: 3.3184e-05 gnorm: 1.04 [10:25:34<14:08:06] +[titan] 2025-10-05 08:59:55,274 - root - INFO - step: 16985 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8945 +[titan] 2025-10-05 08:59:55,274 - root - INFO - lr: 3.3176e-05 gnorm: 1.06 [10:25:45<14:07:54] +[titan] 2025-10-05 09:00:06,182 - root - INFO - step: 16990 loss: 2.2652 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0002 +[titan] 2025-10-05 09:00:06,183 - root - INFO - lr: 3.3167e-05 gnorm: 1.09 [10:25:56<14:07:43] +[titan] 2025-10-05 09:00:17,071 - root - INFO - step: 16995 loss: 2.1686 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:00:17,071 - root - INFO - lr: 3.3158e-05 gnorm: 1.08 [10:26:07<14:07:32] +[titan] 2025-10-05 09:00:25,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:00:27,948 - root - INFO - step: 17000 loss: 2.2808 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2651 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:00:27,948 - root - INFO - lr: 3.3150e-05 gnorm: 1.11 [10:26:18<14:07:21] +[titan] 2025-10-05 09:00:38,826 - root - INFO - step: 17005 loss: 2.2227 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 09:00:38,826 - root - INFO - lr: 3.3141e-05 gnorm: 1.07 [10:26:29<14:07:09] +[titan] 2025-10-05 09:00:49,742 - root - INFO - step: 17010 loss: 2.2205 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:00:49,742 - root - INFO - lr: 3.3133e-05 gnorm: 1.05 [10:26:40<14:06:58] +[titan] 2025-10-05 09:01:00,622 - root - INFO - step: 17015 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9094 +[titan] 2025-10-05 09:01:00,622 - root - INFO - lr: 3.3124e-05 gnorm: 1.08 [10:26:50<14:06:47] +[titan] 2025-10-05 09:01:11,523 - root - INFO - step: 17020 loss: 2.1800 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9266 +[titan] 2025-10-05 09:01:11,523 - root - INFO - lr: 3.3115e-05 gnorm: 1.07 [10:27:01<14:06:36] +[titan] 2025-10-05 09:01:22,424 - root - INFO - step: 17025 loss: 2.2024 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9461 +[titan] 2025-10-05 09:01:22,425 - root - INFO - lr: 3.3107e-05 gnorm: 1.04 [10:27:12<14:06:24] +[titan] 2025-10-05 09:01:33,324 - root - INFO - step: 17030 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 09:01:33,324 - root - INFO - lr: 3.3098e-05 gnorm: 1.07 [10:27:23<14:06:13] +[titan] 2025-10-05 09:01:44,236 - root - INFO - step: 17035 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.12% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9083 +[titan] 2025-10-05 09:01:44,236 - root - INFO - lr: 3.3089e-05 gnorm: 1.04 [10:27:34<14:06:02] +[titan] 2025-10-05 09:01:55,136 - root - INFO - step: 17040 loss: 2.1831 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 09:01:55,136 - root - INFO - lr: 3.3081e-05 gnorm: 1.08 [10:27:45<14:05:51] +[titan] 2025-10-05 09:02:06,035 - root - INFO - step: 17045 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:02:06,035 - root - INFO - lr: 3.3072e-05 gnorm: 1.06 [10:27:56<14:05:39] +[titan] 2025-10-05 09:02:14,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:02:16,917 - root - INFO - step: 17050 loss: 2.2428 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:02:16,917 - root - INFO - lr: 3.3063e-05 gnorm: 1.04 [10:28:07<14:05:28] +[titan] 2025-10-05 09:02:27,783 - root - INFO - step: 17055 loss: 2.2213 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9627 +[titan] 2025-10-05 09:02:27,783 - root - INFO - lr: 3.3055e-05 gnorm: 1.05 [10:28:18<14:05:17] +[titan] 2025-10-05 09:02:38,654 - root - INFO - step: 17060 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8924 +[titan] 2025-10-05 09:02:38,654 - root - INFO - lr: 3.3046e-05 gnorm: 1.07 [10:28:28<14:05:05] +[titan] 2025-10-05 09:02:49,542 - root - INFO - step: 17065 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9244 +[titan] 2025-10-05 09:02:49,542 - root - INFO - lr: 3.3037e-05 gnorm: 1.10 [10:28:39<14:04:54] +[titan] 2025-10-05 09:03:00,423 - root - INFO - step: 17070 loss: 2.2506 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9885 +[titan] 2025-10-05 09:03:00,423 - root - INFO - lr: 3.3029e-05 gnorm: 1.08 [10:28:50<14:04:43] +[titan] 2025-10-05 09:03:11,347 - root - INFO - step: 17075 loss: 2.1585 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:03:11,347 - root - INFO - lr: 3.3020e-05 gnorm: 1.09 [10:29:01<14:04:32] +[titan] 2025-10-05 09:03:22,220 - root - INFO - step: 17080 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 09:03:22,221 - root - INFO - lr: 3.3011e-05 gnorm: 1.07 [10:29:12<14:04:20] +[titan] 2025-10-05 09:03:33,091 - root - INFO - step: 17085 loss: 2.1813 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:03:33,091 - root - INFO - lr: 3.3003e-05 gnorm: 1.12 [10:29:23<14:04:09] +[titan] 2025-10-05 09:03:43,968 - root - INFO - step: 17090 loss: 2.2621 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2649 global_avg_mtp_loss: 1.9971 +[titan] 2025-10-05 09:03:43,968 - root - INFO - lr: 3.2994e-05 gnorm: 1.09 [10:29:34<14:03:58] +[titan] 2025-10-05 09:03:54,850 - root - INFO - step: 17095 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:03:54,850 - root - INFO - lr: 3.2986e-05 gnorm: 1.05 [10:29:45<14:03:47] +[titan] 2025-10-05 09:04:03,543 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:04:05,728 - root - INFO - step: 17100 loss: 2.1531 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 09:04:05,728 - root - INFO - lr: 3.2977e-05 gnorm: 1.07 [10:29:56<14:03:35] +[titan] 2025-10-05 09:04:16,647 - root - INFO - step: 17105 loss: 2.1923 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 09:04:16,647 - root - INFO - lr: 3.2968e-05 gnorm: 1.11 [10:30:06<14:03:24] +[titan] 2025-10-05 09:04:27,507 - root - INFO - step: 17110 loss: 2.1551 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9046 +[titan] 2025-10-05 09:04:27,507 - root - INFO - lr: 3.2960e-05 gnorm: 1.11 [10:30:17<14:03:13] +[titan] 2025-10-05 09:04:38,376 - root - INFO - step: 17115 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 09:04:38,376 - root - INFO - lr: 3.2951e-05 gnorm: 1.09 [10:30:28<14:03:01] +[titan] 2025-10-05 09:04:49,249 - root - INFO - step: 17120 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:04:49,249 - root - INFO - lr: 3.2942e-05 gnorm: 1.04 [10:30:39<14:02:50] +[titan] 2025-10-05 09:05:00,120 - root - INFO - step: 17125 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2612 global_avg_mtp_loss: 1.9737 +[titan] 2025-10-05 09:05:00,120 - root - INFO - lr: 3.2934e-05 gnorm: 1.09 [10:30:50<14:02:39] +[titan] 2025-10-05 09:05:10,996 - root - INFO - step: 17130 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:05:10,997 - root - INFO - lr: 3.2925e-05 gnorm: 6.19 [10:31:01<14:02:28] +[titan] 2025-10-05 09:05:21,856 - root - INFO - step: 17135 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 09:05:21,856 - root - INFO - lr: 3.2916e-05 gnorm: 1.04 [10:31:12<14:02:16] +[titan] 2025-10-05 09:05:32,760 - root - INFO - step: 17140 loss: 2.2847 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0182 +[titan] 2025-10-05 09:05:32,760 - root - INFO - lr: 3.2908e-05 gnorm: 1.13 [10:31:23<14:02:05] +[titan] 2025-10-05 09:05:43,616 - root - INFO - step: 17145 loss: 2.1628 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 09:05:43,616 - root - INFO - lr: 3.2899e-05 gnorm: 1.13 [10:31:33<14:01:54] +[titan] 2025-10-05 09:05:52,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:05:54,484 - root - INFO - step: 17150 loss: 2.2557 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 1.9926 +[titan] 2025-10-05 09:05:54,484 - root - INFO - lr: 3.2890e-05 gnorm: 1.04 [10:31:44<14:01:42] +[titan] 2025-10-05 09:06:05,356 - root - INFO - step: 17155 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9062 +[titan] 2025-10-05 09:06:05,356 - root - INFO - lr: 3.2882e-05 gnorm: 1.06 [10:31:55<14:01:31] +[titan] 2025-10-05 09:06:16,249 - root - INFO - step: 17160 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:06:16,249 - root - INFO - lr: 3.2873e-05 gnorm: 1.06 [10:32:06<14:01:20] +[titan] 2025-10-05 09:06:27,125 - root - INFO - step: 17165 loss: 2.2376 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:06:27,125 - root - INFO - lr: 3.2864e-05 gnorm: 1.06 [10:32:17<14:01:09] +[titan] 2025-10-05 09:06:38,025 - root - INFO - step: 17170 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:06:38,025 - root - INFO - lr: 3.2856e-05 gnorm: 1.14 [10:32:28<14:00:57] +[titan] 2025-10-05 09:06:48,880 - root - INFO - step: 17175 loss: 2.1394 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 09:06:48,880 - root - INFO - lr: 3.2847e-05 gnorm: 1.07 [10:32:39<14:00:46] +[titan] 2025-10-05 09:06:59,724 - root - INFO - step: 17180 loss: 2.1898 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9359 +[titan] 2025-10-05 09:06:59,724 - root - INFO - lr: 3.2838e-05 gnorm: 1.07 [10:32:50<14:00:35] +[titan] 2025-10-05 09:07:10,582 - root - INFO - step: 17185 loss: 2.1634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9111 +[titan] 2025-10-05 09:07:10,583 - root - INFO - lr: 3.2830e-05 gnorm: 1.03 [10:33:00<14:00:23] +[titan] 2025-10-05 09:07:21,443 - root - INFO - step: 17190 loss: 2.1666 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:07:21,443 - root - INFO - lr: 3.2821e-05 gnorm: 1.09 [10:33:11<14:00:12] +[titan] 2025-10-05 09:07:32,307 - root - INFO - step: 17195 loss: 2.2954 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0282 +[titan] 2025-10-05 09:07:32,308 - root - INFO - lr: 3.2812e-05 gnorm: 1.05 [10:33:22<14:00:01] +[titan] 2025-10-05 09:07:40,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:07:43,204 - root - INFO - step: 17200 loss: 2.2434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9810 +[titan] 2025-10-05 09:07:43,204 - root - INFO - lr: 3.2804e-05 gnorm: 1.02 [10:33:33<13:59:49] +[titan] 2025-10-05 09:07:54,076 - root - INFO - step: 17205 loss: 2.2300 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2638 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:07:54,076 - root - INFO - lr: 3.2795e-05 gnorm: 1.07 [10:33:44<13:59:38] +[titan] 2025-10-05 09:08:04,949 - root - INFO - step: 17210 loss: 2.1805 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:08:04,949 - root - INFO - lr: 3.2786e-05 gnorm: 1.14 [10:33:55<13:59:27] +[titan] 2025-10-05 09:08:15,833 - root - INFO - step: 17215 loss: 2.2540 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2644 global_avg_mtp_loss: 1.9896 +[titan] 2025-10-05 09:08:15,833 - root - INFO - lr: 3.2778e-05 gnorm: 1.07 [10:34:06<13:59:16] +[titan] 2025-10-05 09:08:26,702 - root - INFO - step: 17220 loss: 2.1866 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9324 +[titan] 2025-10-05 09:08:26,702 - root - INFO - lr: 3.2769e-05 gnorm: 1.12 [10:34:16<13:59:04] +[titan] 2025-10-05 09:08:37,566 - root - INFO - step: 17225 loss: 2.2151 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9564 +[titan] 2025-10-05 09:08:37,566 - root - INFO - lr: 3.2760e-05 gnorm: 1.09 [10:34:27<13:58:53] +[titan] 2025-10-05 09:08:48,419 - root - INFO - step: 17230 loss: 2.1964 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 09:08:48,419 - root - INFO - lr: 3.2752e-05 gnorm: 1.06 [10:34:38<13:58:42] +[titan] 2025-10-05 09:08:59,310 - root - INFO - step: 17235 loss: 2.2244 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:08:59,310 - root - INFO - lr: 3.2743e-05 gnorm: 1.11 [10:34:49<13:58:30] +[titan] 2025-10-05 09:09:10,177 - root - INFO - step: 17240 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9319 +[titan] 2025-10-05 09:09:10,177 - root - INFO - lr: 3.2734e-05 gnorm: 1.07 [10:35:00<13:58:19] +[titan] 2025-10-05 09:09:21,054 - root - INFO - step: 17245 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 09:09:21,054 - root - INFO - lr: 3.2725e-05 gnorm: 1.03 [10:35:11<13:58:08] +[titan] 2025-10-05 09:09:29,731 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:09:31,915 - root - INFO - step: 17250 loss: 2.1844 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9306 +[titan] 2025-10-05 09:09:31,915 - root - INFO - lr: 3.2717e-05 gnorm: 1.06 [10:35:22<13:57:57] +[titan] 2025-10-05 09:09:42,794 - root - INFO - step: 17255 loss: 2.2189 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 09:09:42,794 - root - INFO - lr: 3.2708e-05 gnorm: 1.07 [10:35:33<13:57:45] +[titan] 2025-10-05 09:09:53,683 - root - INFO - step: 17260 loss: 2.1486 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8993 +[titan] 2025-10-05 09:09:53,683 - root - INFO - lr: 3.2699e-05 gnorm: 1.09 [10:35:43<13:57:34] +[titan] 2025-10-05 09:10:04,613 - root - INFO - step: 17265 loss: 2.2170 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9590 +[titan] 2025-10-05 09:10:04,613 - root - INFO - lr: 3.2691e-05 gnorm: 1.10 [10:35:54<13:57:23] +[titan] 2025-10-05 09:10:15,520 - root - INFO - step: 17270 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:10:15,520 - root - INFO - lr: 3.2682e-05 gnorm: 1.07 [10:36:05<13:57:12] +[titan] 2025-10-05 09:10:26,410 - root - INFO - step: 17275 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9303 +[titan] 2025-10-05 09:10:26,410 - root - INFO - lr: 3.2673e-05 gnorm: 1.08 [10:36:16<13:57:00] +[titan] 2025-10-05 09:10:37,314 - root - INFO - step: 17280 loss: 2.3099 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0408 +[titan] 2025-10-05 09:10:37,314 - root - INFO - lr: 3.2665e-05 gnorm: 1.11 [10:36:27<13:56:49] +[titan] 2025-10-05 09:10:48,218 - root - INFO - step: 17285 loss: 2.2025 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 09:10:48,218 - root - INFO - lr: 3.2656e-05 gnorm: 1.04 [10:36:38<13:56:38] +[titan] 2025-10-05 09:10:59,106 - root - INFO - step: 17290 loss: 2.1607 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 09:10:59,106 - root - INFO - lr: 3.2647e-05 gnorm: 1.08 [10:36:49<13:56:27] +[titan] 2025-10-05 09:11:09,991 - root - INFO - step: 17295 loss: 2.2277 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9678 +[titan] 2025-10-05 09:11:09,991 - root - INFO - lr: 3.2639e-05 gnorm: 1.09 [10:37:00<13:56:15] +[titan] 2025-10-05 09:11:18,773 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:11:20,963 - root - INFO - step: 17300 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 29,867 tflops: 414.36 mfu: 41.90% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9516 +[titan] 2025-10-05 09:11:20,963 - root - INFO - lr: 3.2630e-05 gnorm: 1.10 [10:37:11<13:56:04] +[titan] 2025-10-05 09:11:31,859 - root - INFO - step: 17305 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:11:31,859 - root - INFO - lr: 3.2621e-05 gnorm: 1.04 [10:37:22<13:55:53] +[titan] 2025-10-05 09:11:42,726 - root - INFO - step: 17310 loss: 2.2050 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 09:11:42,726 - root - INFO - lr: 3.2613e-05 gnorm: 1.08 [10:37:33<13:55:42] +[titan] 2025-10-05 09:11:53,604 - root - INFO - step: 17315 loss: 2.1973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:11:53,604 - root - INFO - lr: 3.2604e-05 gnorm: 1.06 [10:37:43<13:55:30] +[titan] 2025-10-05 09:12:04,491 - root - INFO - step: 17320 loss: 2.2188 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:12:04,491 - root - INFO - lr: 3.2595e-05 gnorm: 1.08 [10:37:54<13:55:19] +[titan] 2025-10-05 09:12:15,414 - root - INFO - step: 17325 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:12:15,414 - root - INFO - lr: 3.2586e-05 gnorm: 1.03 [10:38:05<13:55:08] +[titan] 2025-10-05 09:12:26,330 - root - INFO - step: 17330 loss: 2.2326 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9711 +[titan] 2025-10-05 09:12:26,330 - root - INFO - lr: 3.2578e-05 gnorm: 1.08 [10:38:16<13:54:57] +[titan] 2025-10-05 09:12:37,205 - root - INFO - step: 17335 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9425 +[titan] 2025-10-05 09:12:37,205 - root - INFO - lr: 3.2569e-05 gnorm: 1.08 [10:38:27<13:54:45] +[titan] 2025-10-05 09:12:48,107 - root - INFO - step: 17340 loss: 2.2311 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9714 +[titan] 2025-10-05 09:12:48,107 - root - INFO - lr: 3.2560e-05 gnorm: 1.07 [10:38:38<13:54:34] +[titan] 2025-10-05 09:12:58,971 - root - INFO - step: 17345 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:12:58,971 - root - INFO - lr: 3.2552e-05 gnorm: 1.02 [10:38:49<13:54:23] +[titan] 2025-10-05 09:13:07,640 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:13:09,828 - root - INFO - step: 17350 loss: 2.1864 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9313 +[titan] 2025-10-05 09:13:09,828 - root - INFO - lr: 3.2543e-05 gnorm: 1.12 [10:39:00<13:54:12] +[titan] 2025-10-05 09:13:20,766 - root - INFO - step: 17355 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9300 +[titan] 2025-10-05 09:13:20,766 - root - INFO - lr: 3.2534e-05 gnorm: 1.05 [10:39:11<13:54:00] +[titan] 2025-10-05 09:13:31,647 - root - INFO - step: 17360 loss: 2.1890 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9339 +[titan] 2025-10-05 09:13:31,647 - root - INFO - lr: 3.2526e-05 gnorm: 1.06 [10:39:21<13:53:49] +[titan] 2025-10-05 09:13:42,494 - root - INFO - step: 17365 loss: 2.2669 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2630 global_avg_mtp_loss: 2.0039 +[titan] 2025-10-05 09:13:42,494 - root - INFO - lr: 3.2517e-05 gnorm: 1.11 [10:39:32<13:53:38] +[titan] 2025-10-05 09:13:53,353 - root - INFO - step: 17370 loss: 2.2915 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2672 global_avg_mtp_loss: 2.0243 +[titan] 2025-10-05 09:13:53,353 - root - INFO - lr: 3.2508e-05 gnorm: 1.16 [10:39:43<13:53:26] +[titan] 2025-10-05 09:14:04,232 - root - INFO - step: 17375 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9603 +[titan] 2025-10-05 09:14:04,232 - root - INFO - lr: 3.2500e-05 gnorm: 1.06 [10:39:54<13:53:15] +[titan] 2025-10-05 09:14:15,120 - root - INFO - step: 17380 loss: 2.2381 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9777 +[titan] 2025-10-05 09:14:15,120 - root - INFO - lr: 3.2491e-05 gnorm: 1.09 [10:40:05<13:53:04] +[titan] 2025-10-05 09:14:26,052 - root - INFO - step: 17385 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9464 +[titan] 2025-10-05 09:14:26,052 - root - INFO - lr: 3.2482e-05 gnorm: 1.07 [10:40:16<13:52:53] +[titan] 2025-10-05 09:14:36,924 - root - INFO - step: 17390 loss: 2.1808 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:14:36,924 - root - INFO - lr: 3.2473e-05 gnorm: 1.07 [10:40:27<13:52:41] +[titan] 2025-10-05 09:14:47,853 - root - INFO - step: 17395 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9403 +[titan] 2025-10-05 09:14:47,853 - root - INFO - lr: 3.2465e-05 gnorm: 1.04 [10:40:38<13:52:30] +[titan] 2025-10-05 09:14:56,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:14:58,759 - root - INFO - step: 17400 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 09:14:58,759 - root - INFO - lr: 3.2456e-05 gnorm: 1.05 [10:40:49<13:52:19] +[titan] 2025-10-05 09:15:09,749 - root - INFO - step: 17405 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 29,817 tflops: 413.66 mfu: 41.83% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 09:15:09,750 - root - INFO - lr: 3.2447e-05 gnorm: 1.05 [10:41:00<13:52:08] +[titan] 2025-10-05 09:15:16,478 - root - INFO - Dumping profiler traces at step 17408 +[titan] 2025-10-05 09:15:16,515 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:15:20,898 - root - INFO - step: 17410 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 29,392 tflops: 407.78 mfu: 41.23% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:15:20,899 - root - INFO - lr: 3.2439e-05 gnorm: 1.10 [10:41:11<13:51:57] +[titan] 2025-10-05 09:15:31,784 - root - INFO - step: 17415 loss: 2.3019 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2690 global_avg_mtp_loss: 2.0328 +[titan] 2025-10-05 09:15:31,784 - root - INFO - lr: 3.2430e-05 gnorm: 1.11 [10:41:22<13:51:46] +[titan] 2025-10-05 09:15:42,678 - root - INFO - step: 17420 loss: 2.1926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9383 +[titan] 2025-10-05 09:15:42,678 - root - INFO - lr: 3.2421e-05 gnorm: 1.05 [10:41:32<13:51:34] +[titan] 2025-10-05 09:15:53,585 - root - INFO - step: 17425 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2584 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 09:15:53,585 - root - INFO - lr: 3.2412e-05 gnorm: 1.05 [10:41:43<13:51:23] +[titan] 2025-10-05 09:16:04,476 - root - INFO - step: 17430 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9031 +[titan] 2025-10-05 09:16:04,476 - root - INFO - lr: 3.2404e-05 gnorm: 1.06 [10:41:54<13:51:12] +[titan] 2025-10-05 09:16:15,351 - root - INFO - step: 17435 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 09:16:15,351 - root - INFO - lr: 3.2395e-05 gnorm: 1.09 [10:42:05<13:51:01] +[titan] 2025-10-05 09:16:26,256 - root - INFO - step: 17440 loss: 2.2052 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:16:26,257 - root - INFO - lr: 3.2386e-05 gnorm: 1.08 [10:42:16<13:50:50] +[titan] 2025-10-05 09:16:37,135 - root - INFO - step: 17445 loss: 2.1787 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9254 +[titan] 2025-10-05 09:16:37,135 - root - INFO - lr: 3.2378e-05 gnorm: 1.06 [10:42:27<13:50:38] +[titan] 2025-10-05 09:16:45,826 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:16:48,014 - root - INFO - step: 17450 loss: 2.1992 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9428 +[titan] 2025-10-05 09:16:48,014 - root - INFO - lr: 3.2369e-05 gnorm: 1.03 [10:42:38<13:50:27] +[titan] 2025-10-05 09:16:58,900 - root - INFO - step: 17455 loss: 2.2831 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2674 global_avg_mtp_loss: 2.0157 +[titan] 2025-10-05 09:16:58,900 - root - INFO - lr: 3.2360e-05 gnorm: 1.09 [10:42:49<13:50:16] +[titan] 2025-10-05 09:17:09,817 - root - INFO - step: 17460 loss: 2.2252 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9662 +[titan] 2025-10-05 09:17:09,817 - root - INFO - lr: 3.2351e-05 gnorm: 1.08 [10:43:00<13:50:04] +[titan] 2025-10-05 09:17:20,746 - root - INFO - step: 17465 loss: 2.2387 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2614 global_avg_mtp_loss: 1.9773 +[titan] 2025-10-05 09:17:20,746 - root - INFO - lr: 3.2343e-05 gnorm: 1.05 [10:43:11<13:49:53] +[titan] 2025-10-05 09:17:31,624 - root - INFO - step: 17470 loss: 2.2465 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9834 +[titan] 2025-10-05 09:17:31,624 - root - INFO - lr: 3.2334e-05 gnorm: 1.07 [10:43:21<13:49:42] +[titan] 2025-10-05 09:17:42,511 - root - INFO - step: 17475 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:17:42,511 - root - INFO - lr: 3.2325e-05 gnorm: 1.07 [10:43:32<13:49:31] +[titan] 2025-10-05 09:17:53,406 - root - INFO - step: 17480 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 09:17:53,407 - root - INFO - lr: 3.2317e-05 gnorm: 1.09 [10:43:43<13:49:20] +[titan] 2025-10-05 09:18:04,291 - root - INFO - step: 17485 loss: 2.2076 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9498 +[titan] 2025-10-05 09:18:04,292 - root - INFO - lr: 3.2308e-05 gnorm: 1.09 [10:43:54<13:49:08] +[titan] 2025-10-05 09:18:15,232 - root - INFO - step: 17490 loss: 2.1875 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 09:18:15,232 - root - INFO - lr: 3.2299e-05 gnorm: 1.09 [10:44:05<13:48:57] +[titan] 2025-10-05 09:18:26,148 - root - INFO - step: 17495 loss: 2.1821 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9280 +[titan] 2025-10-05 09:18:26,148 - root - INFO - lr: 3.2290e-05 gnorm: 1.06 [10:44:16<13:48:46] +[titan] 2025-10-05 09:18:34,840 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:18:37,024 - root - INFO - step: 17500 loss: 2.2275 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9680 +[titan] 2025-10-05 09:18:37,024 - root - INFO - lr: 3.2282e-05 gnorm: 1.08 [10:44:27<13:48:35] +[titan] 2025-10-05 09:18:47,898 - root - INFO - step: 17505 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9235 +[titan] 2025-10-05 09:18:47,898 - root - INFO - lr: 3.2273e-05 gnorm: 1.10 [10:44:38<13:48:23] +[titan] 2025-10-05 09:18:58,787 - root - INFO - step: 17510 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9344 +[titan] 2025-10-05 09:18:58,787 - root - INFO - lr: 3.2264e-05 gnorm: 1.07 [10:44:49<13:48:12] +[titan] 2025-10-05 09:19:09,664 - root - INFO - step: 17515 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2597 global_avg_mtp_loss: 1.9713 +[titan] 2025-10-05 09:19:09,664 - root - INFO - lr: 3.2256e-05 gnorm: 1.11 [10:44:59<13:48:01] +[titan] 2025-10-05 09:19:20,602 - root - INFO - step: 17520 loss: 2.2567 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.61 mfu: 42.02% global_avg_ntp_loss: 0.2621 global_avg_mtp_loss: 1.9947 +[titan] 2025-10-05 09:19:20,603 - root - INFO - lr: 3.2247e-05 gnorm: 1.06 [10:45:10<13:47:50] +[titan] 2025-10-05 09:19:31,492 - root - INFO - step: 17525 loss: 2.2453 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2616 global_avg_mtp_loss: 1.9837 +[titan] 2025-10-05 09:19:31,492 - root - INFO - lr: 3.2238e-05 gnorm: 1.06 [10:45:21<13:47:38] +[titan] 2025-10-05 09:19:42,388 - root - INFO - step: 17530 loss: 2.1815 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 09:19:42,388 - root - INFO - lr: 3.2229e-05 gnorm: 1.04 [10:45:32<13:47:27] +[titan] 2025-10-05 09:19:53,275 - root - INFO - step: 17535 loss: 2.1899 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9365 +[titan] 2025-10-05 09:19:53,275 - root - INFO - lr: 3.2221e-05 gnorm: 1.11 [10:45:43<13:47:16] +[titan] 2025-10-05 09:20:04,158 - root - INFO - step: 17540 loss: 2.2182 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9604 +[titan] 2025-10-05 09:20:04,158 - root - INFO - lr: 3.2212e-05 gnorm: 1.08 [10:45:54<13:47:05] +[titan] 2025-10-05 09:20:15,047 - root - INFO - step: 17545 loss: 2.2278 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9677 +[titan] 2025-10-05 09:20:15,047 - root - INFO - lr: 3.2203e-05 gnorm: 1.14 [10:46:05<13:46:53] +[titan] 2025-10-05 09:20:23,764 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:20:25,948 - root - INFO - step: 17550 loss: 2.1810 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9276 +[titan] 2025-10-05 09:20:25,948 - root - INFO - lr: 3.2194e-05 gnorm: 1.06 [10:46:16<13:46:42] +[titan] 2025-10-05 09:20:36,875 - root - INFO - step: 17555 loss: 2.1706 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 09:20:36,875 - root - INFO - lr: 3.2186e-05 gnorm: 1.05 [10:46:27<13:46:31] +[titan] 2025-10-05 09:20:47,778 - root - INFO - step: 17560 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:20:47,778 - root - INFO - lr: 3.2177e-05 gnorm: 1.04 [10:46:38<13:46:20] +[titan] 2025-10-05 09:20:58,670 - root - INFO - step: 17565 loss: 2.3097 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2696 global_avg_mtp_loss: 2.0401 +[titan] 2025-10-05 09:20:58,670 - root - INFO - lr: 3.2168e-05 gnorm: 1.11 [10:46:48<13:46:08] +[titan] 2025-10-05 09:21:09,567 - root - INFO - step: 17570 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9227 +[titan] 2025-10-05 09:21:09,567 - root - INFO - lr: 3.2160e-05 gnorm: 1.03 [10:46:59<13:45:57] +[titan] 2025-10-05 09:21:20,447 - root - INFO - step: 17575 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:21:20,447 - root - INFO - lr: 3.2151e-05 gnorm: 1.06 [10:47:10<13:45:46] +[titan] 2025-10-05 09:21:31,358 - root - INFO - step: 17580 loss: 2.1219 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 09:21:31,358 - root - INFO - lr: 3.2142e-05 gnorm: 1.07 [10:47:21<13:45:35] +[titan] 2025-10-05 09:21:42,250 - root - INFO - step: 17585 loss: 2.2406 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2615 global_avg_mtp_loss: 1.9791 +[titan] 2025-10-05 09:21:42,250 - root - INFO - lr: 3.2133e-05 gnorm: 1.10 [10:47:32<13:45:23] +[titan] 2025-10-05 09:21:53,130 - root - INFO - step: 17590 loss: 2.2175 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9593 +[titan] 2025-10-05 09:21:53,130 - root - INFO - lr: 3.2125e-05 gnorm: 1.08 [10:47:43<13:45:12] +[titan] 2025-10-05 09:22:04,011 - root - INFO - step: 17595 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9755 +[titan] 2025-10-05 09:22:04,011 - root - INFO - lr: 3.2116e-05 gnorm: 1.05 [10:47:54<13:45:01] +[titan] 2025-10-05 09:22:12,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:22:14,893 - root - INFO - step: 17600 loss: 2.2663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 2.0020 +[titan] 2025-10-05 09:22:14,894 - root - INFO - lr: 3.2107e-05 gnorm: 1.08 [10:48:05<13:44:50] +[titan] 2025-10-05 09:22:25,790 - root - INFO - step: 17605 loss: 2.2383 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2609 global_avg_mtp_loss: 1.9774 +[titan] 2025-10-05 09:22:25,790 - root - INFO - lr: 3.2098e-05 gnorm: 1.11 [10:48:16<13:44:38] +[titan] 2025-10-05 09:22:36,676 - root - INFO - step: 17610 loss: 2.2048 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9482 +[titan] 2025-10-05 09:22:36,677 - root - INFO - lr: 3.2090e-05 gnorm: 1.08 [10:48:26<13:44:27] +[titan] 2025-10-05 09:22:47,556 - root - INFO - step: 17615 loss: 2.3016 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2681 global_avg_mtp_loss: 2.0336 +[titan] 2025-10-05 09:22:47,556 - root - INFO - lr: 3.2081e-05 gnorm: 1.10 [10:48:37<13:44:16] +[titan] 2025-10-05 09:22:58,451 - root - INFO - step: 17620 loss: 2.1471 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8984 +[titan] 2025-10-05 09:22:58,451 - root - INFO - lr: 3.2072e-05 gnorm: 1.10 [10:48:48<13:44:05] +[titan] 2025-10-05 09:23:09,330 - root - INFO - step: 17625 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 09:23:09,330 - root - INFO - lr: 3.2063e-05 gnorm: 1.04 [10:48:59<13:43:53] +[titan] 2025-10-05 09:23:20,210 - root - INFO - step: 17630 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9426 +[titan] 2025-10-05 09:23:20,210 - root - INFO - lr: 3.2055e-05 gnorm: 1.05 [10:49:10<13:43:42] +[titan] 2025-10-05 09:23:31,084 - root - INFO - step: 17635 loss: 2.2549 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9913 +[titan] 2025-10-05 09:23:31,084 - root - INFO - lr: 3.2046e-05 gnorm: 1.06 [10:49:21<13:43:31] +[titan] 2025-10-05 09:23:41,968 - root - INFO - step: 17640 loss: 2.2575 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9952 +[titan] 2025-10-05 09:23:41,969 - root - INFO - lr: 3.2037e-05 gnorm: 1.12 [10:49:32<13:43:20] +[titan] 2025-10-05 09:23:52,856 - root - INFO - step: 17645 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9799 +[titan] 2025-10-05 09:23:52,856 - root - INFO - lr: 3.2029e-05 gnorm: 1.10 [10:49:43<13:43:08] +[titan] 2025-10-05 09:24:01,572 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:24:03,759 - root - INFO - step: 17650 loss: 2.2045 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9473 +[titan] 2025-10-05 09:24:03,759 - root - INFO - lr: 3.2020e-05 gnorm: 1.14 [10:49:54<13:42:57] +[titan] 2025-10-05 09:24:14,635 - root - INFO - step: 17655 loss: 2.2860 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2664 global_avg_mtp_loss: 2.0197 +[titan] 2025-10-05 09:24:14,636 - root - INFO - lr: 3.2011e-05 gnorm: 1.12 [10:50:04<13:42:46] +[titan] 2025-10-05 09:24:25,539 - root - INFO - step: 17660 loss: 2.1876 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9327 +[titan] 2025-10-05 09:24:25,539 - root - INFO - lr: 3.2002e-05 gnorm: 1.06 [10:50:15<13:42:35] +[titan] 2025-10-05 09:24:36,410 - root - INFO - step: 17665 loss: 2.2088 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9523 +[titan] 2025-10-05 09:24:36,410 - root - INFO - lr: 3.1994e-05 gnorm: 1.09 [10:50:26<13:42:23] +[titan] 2025-10-05 09:24:47,292 - root - INFO - step: 17670 loss: 2.2179 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2592 global_avg_mtp_loss: 1.9587 +[titan] 2025-10-05 09:24:47,292 - root - INFO - lr: 3.1985e-05 gnorm: 1.03 [10:50:37<13:42:12] +[titan] 2025-10-05 09:24:58,185 - root - INFO - step: 17675 loss: 2.2047 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9478 +[titan] 2025-10-05 09:24:58,185 - root - INFO - lr: 3.1976e-05 gnorm: 1.17 [10:50:48<13:42:01] +[titan] 2025-10-05 09:25:09,105 - root - INFO - step: 17680 loss: 2.2810 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2659 global_avg_mtp_loss: 2.0151 +[titan] 2025-10-05 09:25:09,105 - root - INFO - lr: 3.1967e-05 gnorm: 1.11 [10:50:59<13:41:50] +[titan] 2025-10-05 09:25:19,979 - root - INFO - step: 17685 loss: 2.1693 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:25:19,980 - root - INFO - lr: 3.1959e-05 gnorm: 1.07 [10:51:10<13:41:38] +[titan] 2025-10-05 09:25:30,867 - root - INFO - step: 17690 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9351 +[titan] 2025-10-05 09:25:30,867 - root - INFO - lr: 3.1950e-05 gnorm: 1.10 [10:51:21<13:41:27] +[titan] 2025-10-05 09:25:41,737 - root - INFO - step: 17695 loss: 2.1997 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 09:25:41,737 - root - INFO - lr: 3.1941e-05 gnorm: 1.03 [10:51:31<13:41:16] +[titan] 2025-10-05 09:25:50,446 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:25:52,639 - root - INFO - step: 17700 loss: 2.1679 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 09:25:52,639 - root - INFO - lr: 3.1932e-05 gnorm: 1.08 [10:51:42<13:41:05] +[titan] 2025-10-05 09:26:03,528 - root - INFO - step: 17705 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9507 +[titan] 2025-10-05 09:26:03,528 - root - INFO - lr: 3.1924e-05 gnorm: 1.14 [10:51:53<13:40:53] +[titan] 2025-10-05 09:26:14,424 - root - INFO - step: 17710 loss: 2.2058 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:26:14,424 - root - INFO - lr: 3.1915e-05 gnorm: 1.05 [10:52:04<13:40:42] +[titan] 2025-10-05 09:26:25,353 - root - INFO - step: 17715 loss: 2.1118 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 09:26:25,353 - root - INFO - lr: 3.1906e-05 gnorm: 1.09 [10:52:15<13:40:31] +[titan] 2025-10-05 09:26:36,227 - root - INFO - step: 17720 loss: 2.1460 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8958 +[titan] 2025-10-05 09:26:36,227 - root - INFO - lr: 3.1897e-05 gnorm: 1.09 [10:52:26<13:40:20] +[titan] 2025-10-05 09:26:47,086 - root - INFO - step: 17725 loss: 2.1797 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9257 +[titan] 2025-10-05 09:26:47,086 - root - INFO - lr: 3.1889e-05 gnorm: 1.04 [10:52:37<13:40:09] +[titan] 2025-10-05 09:26:57,951 - root - INFO - step: 17730 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 09:26:57,951 - root - INFO - lr: 3.1880e-05 gnorm: 1.13 [10:52:48<13:39:57] +[titan] 2025-10-05 09:27:08,802 - root - INFO - step: 17735 loss: 2.2199 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9620 +[titan] 2025-10-05 09:27:08,802 - root - INFO - lr: 3.1871e-05 gnorm: 1.04 [10:52:59<13:39:46] +[titan] 2025-10-05 09:27:19,665 - root - INFO - step: 17740 loss: 2.1845 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9299 +[titan] 2025-10-05 09:27:19,665 - root - INFO - lr: 3.1862e-05 gnorm: 1.09 [10:53:09<13:39:35] +[titan] 2025-10-05 09:27:30,612 - root - INFO - step: 17745 loss: 2.1677 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9150 +[titan] 2025-10-05 09:27:30,612 - root - INFO - lr: 3.1854e-05 gnorm: 1.09 [10:53:20<13:39:23] +[titan] 2025-10-05 09:27:39,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:27:41,462 - root - INFO - step: 17750 loss: 2.1954 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 09:27:41,462 - root - INFO - lr: 3.1845e-05 gnorm: 1.09 [10:53:31<13:39:12] +[titan] 2025-10-05 09:27:52,328 - root - INFO - step: 17755 loss: 2.1602 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9091 +[titan] 2025-10-05 09:27:52,328 - root - INFO - lr: 3.1836e-05 gnorm: 1.04 [10:53:42<13:39:01] +[titan] 2025-10-05 09:28:03,186 - root - INFO - step: 17760 loss: 2.2440 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2623 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:28:03,186 - root - INFO - lr: 3.1827e-05 gnorm: 1.08 [10:53:53<13:38:50] +[titan] 2025-10-05 09:28:14,043 - root - INFO - step: 17765 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9224 +[titan] 2025-10-05 09:28:14,043 - root - INFO - lr: 3.1818e-05 gnorm: 1.07 [10:54:04<13:38:38] +[titan] 2025-10-05 09:28:24,918 - root - INFO - step: 17770 loss: 2.1581 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9064 +[titan] 2025-10-05 09:28:24,918 - root - INFO - lr: 3.1810e-05 gnorm: 1.06 [10:54:15<13:38:27] +[titan] 2025-10-05 09:28:35,788 - root - INFO - step: 17775 loss: 2.2240 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9652 +[titan] 2025-10-05 09:28:35,788 - root - INFO - lr: 3.1801e-05 gnorm: 1.15 [10:54:26<13:38:16] +[titan] 2025-10-05 09:28:46,696 - root - INFO - step: 17780 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:28:46,696 - root - INFO - lr: 3.1792e-05 gnorm: 1.10 [10:54:36<13:38:05] +[titan] 2025-10-05 09:28:57,567 - root - INFO - step: 17785 loss: 2.1809 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9261 +[titan] 2025-10-05 09:28:57,567 - root - INFO - lr: 3.1783e-05 gnorm: 1.05 [10:54:47<13:37:53] +[titan] 2025-10-05 09:29:08,437 - root - INFO - step: 17790 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:29:08,437 - root - INFO - lr: 3.1775e-05 gnorm: 1.09 [10:54:58<13:37:42] +[titan] 2025-10-05 09:29:19,347 - root - INFO - step: 17795 loss: 2.1437 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 09:29:19,348 - root - INFO - lr: 3.1766e-05 gnorm: 1.05 [10:55:09<13:37:31] +[titan] 2025-10-05 09:29:28,050 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:29:30,284 - root - INFO - step: 17800 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 09:29:30,284 - root - INFO - lr: 3.1757e-05 gnorm: 1.07 [10:55:20<13:37:20] +[titan] 2025-10-05 09:29:41,184 - root - INFO - step: 17805 loss: 2.1794 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9269 +[titan] 2025-10-05 09:29:41,184 - root - INFO - lr: 3.1748e-05 gnorm: 1.07 [10:55:31<13:37:08] +[titan] 2025-10-05 09:29:52,086 - root - INFO - step: 17810 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:29:52,086 - root - INFO - lr: 3.1740e-05 gnorm: 1.06 [10:55:42<13:36:57] +[titan] 2025-10-05 09:30:03,070 - root - INFO - step: 17815 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 29,833 tflops: 413.88 mfu: 41.85% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9441 +[titan] 2025-10-05 09:30:03,070 - root - INFO - lr: 3.1731e-05 gnorm: 1.04 [10:55:53<13:36:46] +[titan] 2025-10-05 09:30:13,933 - root - INFO - step: 17820 loss: 2.2026 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 09:30:13,933 - root - INFO - lr: 3.1722e-05 gnorm: 1.09 [10:56:04<13:36:35] +[titan] 2025-10-05 09:30:24,824 - root - INFO - step: 17825 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:30:24,824 - root - INFO - lr: 3.1713e-05 gnorm: 1.05 [10:56:15<13:36:24] +[titan] 2025-10-05 09:30:35,792 - root - INFO - step: 17830 loss: 2.1738 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:30:35,792 - root - INFO - lr: 3.1705e-05 gnorm: 1.09 [10:56:26<13:36:12] +[titan] 2025-10-05 09:30:46,656 - root - INFO - step: 17835 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 09:30:46,656 - root - INFO - lr: 3.1696e-05 gnorm: 1.04 [10:56:36<13:36:01] +[titan] 2025-10-05 09:30:57,542 - root - INFO - step: 17840 loss: 2.1750 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 09:30:57,542 - root - INFO - lr: 3.1687e-05 gnorm: 1.05 [10:56:47<13:35:50] +[titan] 2025-10-05 09:31:08,403 - root - INFO - step: 17845 loss: 2.2534 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2635 global_avg_mtp_loss: 1.9900 +[titan] 2025-10-05 09:31:08,403 - root - INFO - lr: 3.1678e-05 gnorm: 1.10 [10:56:58<13:35:39] +[titan] 2025-10-05 09:31:17,089 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:31:19,279 - root - INFO - step: 17850 loss: 2.2531 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2637 global_avg_mtp_loss: 1.9894 +[titan] 2025-10-05 09:31:19,279 - root - INFO - lr: 3.1670e-05 gnorm: 1.07 [10:57:09<13:35:27] +[titan] 2025-10-05 09:31:30,156 - root - INFO - step: 17855 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9467 +[titan] 2025-10-05 09:31:30,156 - root - INFO - lr: 3.1661e-05 gnorm: 1.03 [10:57:20<13:35:16] +[titan] 2025-10-05 09:31:41,087 - root - INFO - step: 17860 loss: 2.1966 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9406 +[titan] 2025-10-05 09:31:41,088 - root - INFO - lr: 3.1652e-05 gnorm: 1.07 [10:57:31<13:35:05] +[titan] 2025-10-05 09:31:51,960 - root - INFO - step: 17865 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9147 +[titan] 2025-10-05 09:31:51,961 - root - INFO - lr: 3.1643e-05 gnorm: 1.08 [10:57:42<13:34:54] +[titan] 2025-10-05 09:32:02,826 - root - INFO - step: 17870 loss: 2.1459 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 09:32:02,826 - root - INFO - lr: 3.1634e-05 gnorm: 1.08 [10:57:53<13:34:42] +[titan] 2025-10-05 09:32:13,722 - root - INFO - step: 17875 loss: 2.2074 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 09:32:13,722 - root - INFO - lr: 3.1626e-05 gnorm: 1.04 [10:58:03<13:34:31] +[titan] 2025-10-05 09:32:24,584 - root - INFO - step: 17880 loss: 2.2430 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9823 +[titan] 2025-10-05 09:32:24,584 - root - INFO - lr: 3.1617e-05 gnorm: 1.06 [10:58:14<13:34:20] +[titan] 2025-10-05 09:32:35,482 - root - INFO - step: 17885 loss: 2.2057 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9491 +[titan] 2025-10-05 09:32:35,482 - root - INFO - lr: 3.1608e-05 gnorm: 1.05 [10:58:25<13:34:09] +[titan] 2025-10-05 09:32:46,343 - root - INFO - step: 17890 loss: 2.2259 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9659 +[titan] 2025-10-05 09:32:46,344 - root - INFO - lr: 3.1599e-05 gnorm: 1.05 [10:58:36<13:33:57] +[titan] 2025-10-05 09:32:57,245 - root - INFO - step: 17895 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8967 +[titan] 2025-10-05 09:32:57,245 - root - INFO - lr: 3.1591e-05 gnorm: 1.07 [10:58:47<13:33:46] +[titan] 2025-10-05 09:33:05,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:33:08,118 - root - INFO - step: 17900 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 09:33:08,118 - root - INFO - lr: 3.1582e-05 gnorm: 1.01 [10:58:58<13:33:35] +[titan] 2025-10-05 09:33:19,021 - root - INFO - step: 17905 loss: 2.1704 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 09:33:19,022 - root - INFO - lr: 3.1573e-05 gnorm: 1.15 [10:59:09<13:33:24] +[titan] 2025-10-05 09:33:29,882 - root - INFO - step: 17910 loss: 2.1763 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9245 +[titan] 2025-10-05 09:33:29,882 - root - INFO - lr: 3.1564e-05 gnorm: 1.05 [10:59:20<13:33:13] +[titan] 2025-10-05 09:33:40,805 - root - INFO - step: 17915 loss: 2.1793 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 09:33:40,805 - root - INFO - lr: 3.1555e-05 gnorm: 1.08 [10:59:31<13:33:01] +[titan] 2025-10-05 09:33:51,745 - root - INFO - step: 17920 loss: 2.2295 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2595 global_avg_mtp_loss: 1.9700 +[titan] 2025-10-05 09:33:51,746 - root - INFO - lr: 3.1547e-05 gnorm: 1.12 [10:59:41<13:32:50] +[titan] 2025-10-05 09:33:51,939 - root - INFO - Dumping profiler traces at step 17920 +[titan] 2025-10-05 09:33:51,979 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:34:02,867 - root - INFO - step: 17925 loss: 2.1932 memory: 118.84GiB(85.28%) tps: 29,464 tflops: 408.77 mfu: 41.33% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9372 +[titan] 2025-10-05 09:34:02,867 - root - INFO - lr: 3.1538e-05 gnorm: 1.06 [10:59:53<13:32:39] +[titan] 2025-10-05 09:34:13,739 - root - INFO - step: 17930 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9442 +[titan] 2025-10-05 09:34:13,739 - root - INFO - lr: 3.1529e-05 gnorm: 1.12 [11:00:03<13:32:28] +[titan] 2025-10-05 09:34:24,600 - root - INFO - step: 17935 loss: 2.2250 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:34:24,600 - root - INFO - lr: 3.1520e-05 gnorm: 1.06 [11:00:14<13:32:17] +[titan] 2025-10-05 09:34:35,525 - root - INFO - step: 17940 loss: 2.1726 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.15 mfu: 42.08% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 09:34:35,525 - root - INFO - lr: 3.1512e-05 gnorm: 1.05 [11:00:25<13:32:05] +[titan] 2025-10-05 09:34:46,392 - root - INFO - step: 17945 loss: 2.0902 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 09:34:46,392 - root - INFO - lr: 3.1503e-05 gnorm: 1.04 [11:00:36<13:31:54] +[titan] 2025-10-05 09:34:55,074 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:34:57,263 - root - INFO - step: 17950 loss: 2.2458 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9840 +[titan] 2025-10-05 09:34:57,264 - root - INFO - lr: 3.1494e-05 gnorm: 1.09 [11:00:47<13:31:43] +[titan] 2025-10-05 09:35:08,110 - root - INFO - step: 17955 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 09:35:08,110 - root - INFO - lr: 3.1485e-05 gnorm: 1.05 [11:00:58<13:31:32] +[titan] 2025-10-05 09:35:18,976 - root - INFO - step: 17960 loss: 2.2219 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2596 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:35:18,976 - root - INFO - lr: 3.1476e-05 gnorm: 1.08 [11:01:09<13:31:20] +[titan] 2025-10-05 09:35:29,838 - root - INFO - step: 17965 loss: 2.2172 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9589 +[titan] 2025-10-05 09:35:29,838 - root - INFO - lr: 3.1468e-05 gnorm: 1.08 [11:01:20<13:31:09] +[titan] 2025-10-05 09:35:40,783 - root - INFO - step: 17970 loss: 2.1969 memory: 118.84GiB(85.28%) tps: 29,938 tflops: 415.35 mfu: 42.00% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9404 +[titan] 2025-10-05 09:35:40,783 - root - INFO - lr: 3.1459e-05 gnorm: 1.07 [11:01:31<13:30:58] +[titan] 2025-10-05 09:35:51,637 - root - INFO - step: 17975 loss: 2.3140 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2814 global_avg_mtp_loss: 2.0326 +[titan] 2025-10-05 09:35:51,637 - root - INFO - lr: 3.1450e-05 gnorm: 1.14 [11:01:41<13:30:47] +[titan] 2025-10-05 09:36:02,514 - root - INFO - step: 17980 loss: 2.1848 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 09:36:02,514 - root - INFO - lr: 3.1441e-05 gnorm: 1.06 [11:01:52<13:30:35] +[titan] 2025-10-05 09:36:13,381 - root - INFO - step: 17985 loss: 2.1655 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9132 +[titan] 2025-10-05 09:36:13,381 - root - INFO - lr: 3.1432e-05 gnorm: 1.07 [11:02:03<13:30:24] +[titan] 2025-10-05 09:36:24,242 - root - INFO - step: 17990 loss: 2.2038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2568 global_avg_mtp_loss: 1.9470 +[titan] 2025-10-05 09:36:24,242 - root - INFO - lr: 3.1424e-05 gnorm: 1.05 [11:02:14<13:30:13] +[titan] 2025-10-05 09:36:35,118 - root - INFO - step: 17995 loss: 2.2044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9479 +[titan] 2025-10-05 09:36:35,118 - root - INFO - lr: 3.1415e-05 gnorm: 1.05 [11:02:25<13:30:02] +[titan] 2025-10-05 09:36:43,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:36:46,012 - root - INFO - step: 18000 loss: 2.1302 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 09:36:46,012 - root - INFO - lr: 3.1406e-05 gnorm: 1.05 [11:02:36<13:29:50] +[titan] 2025-10-05 09:36:56,870 - root - INFO - step: 18005 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9388 +[titan] 2025-10-05 09:36:56,871 - root - INFO - lr: 3.1397e-05 gnorm: 1.04 [11:02:47<13:29:39] +[titan] 2025-10-05 09:37:07,717 - root - INFO - step: 18010 loss: 2.2185 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9597 +[titan] 2025-10-05 09:37:07,718 - root - INFO - lr: 3.1389e-05 gnorm: 1.06 [11:02:57<13:29:28] +[titan] 2025-10-05 09:37:18,576 - root - INFO - step: 18015 loss: 2.2301 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2610 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:37:18,576 - root - INFO - lr: 3.1380e-05 gnorm: 1.13 [11:03:08<13:29:17] +[titan] 2025-10-05 09:37:29,423 - root - INFO - step: 18020 loss: 2.2014 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9439 +[titan] 2025-10-05 09:37:29,423 - root - INFO - lr: 3.1371e-05 gnorm: 1.04 [11:03:19<13:29:05] +[titan] 2025-10-05 09:37:40,354 - root - INFO - step: 18025 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8852 +[titan] 2025-10-05 09:37:40,354 - root - INFO - lr: 3.1362e-05 gnorm: 1.05 [11:03:30<13:28:54] +[titan] 2025-10-05 09:37:51,204 - root - INFO - step: 18030 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9219 +[titan] 2025-10-05 09:37:51,204 - root - INFO - lr: 3.1353e-05 gnorm: 1.05 [11:03:41<13:28:43] +[titan] 2025-10-05 09:38:02,089 - root - INFO - step: 18035 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:38:02,089 - root - INFO - lr: 3.1345e-05 gnorm: 1.09 [11:03:52<13:28:32] +[titan] 2025-10-05 09:38:12,956 - root - INFO - step: 18040 loss: 2.2292 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9691 +[titan] 2025-10-05 09:38:12,956 - root - INFO - lr: 3.1336e-05 gnorm: 1.11 [11:04:03<13:28:20] +[titan] 2025-10-05 09:38:23,803 - root - INFO - step: 18045 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 09:38:23,803 - root - INFO - lr: 3.1327e-05 gnorm: 1.04 [11:04:14<13:28:09] +[titan] 2025-10-05 09:38:32,482 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:38:34,663 - root - INFO - step: 18050 loss: 2.1705 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9179 +[titan] 2025-10-05 09:38:34,663 - root - INFO - lr: 3.1318e-05 gnorm: 1.05 [11:04:24<13:27:58] +[titan] 2025-10-05 09:38:45,582 - root - INFO - step: 18055 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 09:38:45,582 - root - INFO - lr: 3.1309e-05 gnorm: 1.10 [11:04:35<13:27:47] +[titan] 2025-10-05 09:38:56,461 - root - INFO - step: 18060 loss: 2.1737 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9215 +[titan] 2025-10-05 09:38:56,462 - root - INFO - lr: 3.1301e-05 gnorm: 1.10 [11:04:46<13:27:35] +[titan] 2025-10-05 09:39:07,387 - root - INFO - step: 18065 loss: 2.2727 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2654 global_avg_mtp_loss: 2.0073 +[titan] 2025-10-05 09:39:07,388 - root - INFO - lr: 3.1292e-05 gnorm: 1.11 [11:04:57<13:27:24] +[titan] 2025-10-05 09:39:18,267 - root - INFO - step: 18070 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2533 global_avg_mtp_loss: 1.9196 +[titan] 2025-10-05 09:39:18,267 - root - INFO - lr: 3.1283e-05 gnorm: 1.03 [11:05:08<13:27:13] +[titan] 2025-10-05 09:39:29,177 - root - INFO - step: 18075 loss: 2.1735 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 09:39:29,177 - root - INFO - lr: 3.1274e-05 gnorm: 1.09 [11:05:19<13:27:02] +[titan] 2025-10-05 09:39:40,351 - root - INFO - step: 18080 loss: 2.1525 memory: 118.84GiB(85.28%) tps: 29,326 tflops: 406.85 mfu: 41.14% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 09:39:40,352 - root - INFO - lr: 3.1265e-05 gnorm: 1.07 [11:05:30<13:26:51] +[titan] 2025-10-05 09:39:51,220 - root - INFO - step: 18085 loss: 2.1539 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 09:39:51,221 - root - INFO - lr: 3.1257e-05 gnorm: 1.06 [11:05:41<13:26:40] +[titan] 2025-10-05 09:40:02,072 - root - INFO - step: 18090 loss: 2.1462 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8962 +[titan] 2025-10-05 09:40:02,072 - root - INFO - lr: 3.1248e-05 gnorm: 1.09 [11:05:52<13:26:28] +[titan] 2025-10-05 09:40:12,918 - root - INFO - step: 18095 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9398 +[titan] 2025-10-05 09:40:12,918 - root - INFO - lr: 3.1239e-05 gnorm: 1.10 [11:06:03<13:26:17] +[titan] 2025-10-05 09:40:21,622 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:40:23,799 - root - INFO - step: 18100 loss: 2.2201 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9624 +[titan] 2025-10-05 09:40:23,799 - root - INFO - lr: 3.1230e-05 gnorm: 1.06 [11:06:14<13:26:06] +[titan] 2025-10-05 09:40:34,657 - root - INFO - step: 18105 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9333 +[titan] 2025-10-05 09:40:34,657 - root - INFO - lr: 3.1221e-05 gnorm: 1.02 [11:06:24<13:25:55] +[titan] 2025-10-05 09:40:45,595 - root - INFO - step: 18110 loss: 2.2690 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2646 global_avg_mtp_loss: 2.0044 +[titan] 2025-10-05 09:40:45,595 - root - INFO - lr: 3.1213e-05 gnorm: 1.11 [11:06:35<13:25:43] +[titan] 2025-10-05 09:40:56,456 - root - INFO - step: 18115 loss: 2.1375 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 09:40:56,457 - root - INFO - lr: 3.1204e-05 gnorm: 1.07 [11:06:46<13:25:32] +[titan] 2025-10-05 09:41:07,318 - root - INFO - step: 18120 loss: 2.2233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2600 global_avg_mtp_loss: 1.9634 +[titan] 2025-10-05 09:41:07,318 - root - INFO - lr: 3.1195e-05 gnorm: 1.08 [11:06:57<13:25:21] +[titan] 2025-10-05 09:41:18,178 - root - INFO - step: 18125 loss: 2.2168 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9584 +[titan] 2025-10-05 09:41:18,178 - root - INFO - lr: 3.1186e-05 gnorm: 1.07 [11:07:08<13:25:10] +[titan] 2025-10-05 09:41:29,063 - root - INFO - step: 18130 loss: 2.1672 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9142 +[titan] 2025-10-05 09:41:29,063 - root - INFO - lr: 3.1177e-05 gnorm: 1.07 [11:07:19<13:24:58] +[titan] 2025-10-05 09:41:39,951 - root - INFO - step: 18135 loss: 2.1887 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9334 +[titan] 2025-10-05 09:41:39,951 - root - INFO - lr: 3.1169e-05 gnorm: 1.06 [11:07:30<13:24:47] +[titan] 2025-10-05 09:41:50,877 - root - INFO - step: 18140 loss: 2.2241 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2587 global_avg_mtp_loss: 1.9653 +[titan] 2025-10-05 09:41:50,877 - root - INFO - lr: 3.1160e-05 gnorm: 1.13 [11:07:41<13:24:36] +[titan] 2025-10-05 09:42:01,736 - root - INFO - step: 18145 loss: 2.1996 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2575 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 09:42:01,736 - root - INFO - lr: 3.1151e-05 gnorm: 1.11 [11:07:51<13:24:25] +[titan] 2025-10-05 09:42:10,417 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:42:12,607 - root - INFO - step: 18150 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8818 +[titan] 2025-10-05 09:42:12,608 - root - INFO - lr: 3.1142e-05 gnorm: 1.06 [11:08:02<13:24:13] +[titan] 2025-10-05 09:42:23,477 - root - INFO - step: 18155 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9019 +[titan] 2025-10-05 09:42:23,477 - root - INFO - lr: 3.1133e-05 gnorm: 1.02 [11:08:13<13:24:02] +[titan] 2025-10-05 09:42:34,354 - root - INFO - step: 18160 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:42:34,355 - root - INFO - lr: 3.1125e-05 gnorm: 1.06 [11:08:24<13:23:51] +[titan] 2025-10-05 09:42:45,261 - root - INFO - step: 18165 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 09:42:45,261 - root - INFO - lr: 3.1116e-05 gnorm: 1.07 [11:08:35<13:23:40] +[titan] 2025-10-05 09:42:56,138 - root - INFO - step: 18170 loss: 2.1630 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9103 +[titan] 2025-10-05 09:42:56,138 - root - INFO - lr: 3.1107e-05 gnorm: 1.06 [11:08:46<13:23:29] +[titan] 2025-10-05 09:43:06,985 - root - INFO - step: 18175 loss: 2.2130 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9550 +[titan] 2025-10-05 09:43:06,985 - root - INFO - lr: 3.1098e-05 gnorm: 1.05 [11:08:57<13:23:17] +[titan] 2025-10-05 09:43:17,840 - root - INFO - step: 18180 loss: 2.2349 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9748 +[titan] 2025-10-05 09:43:17,840 - root - INFO - lr: 3.1089e-05 gnorm: 1.06 [11:09:08<13:23:06] +[titan] 2025-10-05 09:43:28,708 - root - INFO - step: 18185 loss: 2.2232 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9641 +[titan] 2025-10-05 09:43:28,708 - root - INFO - lr: 3.1080e-05 gnorm: 1.07 [11:09:18<13:22:55] +[titan] 2025-10-05 09:43:39,585 - root - INFO - step: 18190 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:43:39,586 - root - INFO - lr: 3.1072e-05 gnorm: 1.04 [11:09:29<13:22:43] +[titan] 2025-10-05 09:43:50,582 - root - INFO - step: 18195 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 29,799 tflops: 413.41 mfu: 41.80% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 09:43:50,582 - root - INFO - lr: 3.1063e-05 gnorm: 1.10 [11:09:40<13:22:32] +[titan] 2025-10-05 09:43:59,257 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:44:01,443 - root - INFO - step: 18200 loss: 2.1663 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9138 +[titan] 2025-10-05 09:44:01,443 - root - INFO - lr: 3.1054e-05 gnorm: 1.04 [11:09:51<13:22:21] +[titan] 2025-10-05 09:44:12,364 - root - INFO - step: 18205 loss: 2.1441 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 09:44:12,364 - root - INFO - lr: 3.1045e-05 gnorm: 1.07 [11:10:02<13:22:10] +[titan] 2025-10-05 09:44:23,235 - root - INFO - step: 18210 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 09:44:23,235 - root - INFO - lr: 3.1036e-05 gnorm: 1.04 [11:10:13<13:21:59] +[titan] 2025-10-05 09:44:34,114 - root - INFO - step: 18215 loss: 2.1970 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9420 +[titan] 2025-10-05 09:44:34,114 - root - INFO - lr: 3.1028e-05 gnorm: 1.08 [11:10:24<13:21:47] +[titan] 2025-10-05 09:44:45,034 - root - INFO - step: 18220 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:44:45,034 - root - INFO - lr: 3.1019e-05 gnorm: 1.02 [11:10:35<13:21:36] +[titan] 2025-10-05 09:44:55,926 - root - INFO - step: 18225 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 09:44:55,926 - root - INFO - lr: 3.1010e-05 gnorm: 1.06 [11:10:46<13:21:25] +[titan] 2025-10-05 09:45:06,773 - root - INFO - step: 18230 loss: 2.2584 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2648 global_avg_mtp_loss: 1.9936 +[titan] 2025-10-05 09:45:06,773 - root - INFO - lr: 3.1001e-05 gnorm: 1.08 [11:10:56<13:21:14] +[titan] 2025-10-05 09:45:17,639 - root - INFO - step: 18235 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9027 +[titan] 2025-10-05 09:45:17,639 - root - INFO - lr: 3.0992e-05 gnorm: 1.10 [11:11:07<13:21:03] +[titan] 2025-10-05 09:45:28,516 - root - INFO - step: 18240 loss: 2.1421 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 09:45:28,516 - root - INFO - lr: 3.0984e-05 gnorm: 1.05 [11:11:18<13:20:51] +[titan] 2025-10-05 09:45:39,379 - root - INFO - step: 18245 loss: 2.1122 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8654 +[titan] 2025-10-05 09:45:39,379 - root - INFO - lr: 3.0975e-05 gnorm: 1.06 [11:11:29<13:20:40] +[titan] 2025-10-05 09:45:48,140 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:45:50,324 - root - INFO - step: 18250 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9443 +[titan] 2025-10-05 09:45:50,324 - root - INFO - lr: 3.0966e-05 gnorm: 1.03 [11:11:40<13:20:29] +[titan] 2025-10-05 09:46:01,186 - root - INFO - step: 18255 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9065 +[titan] 2025-10-05 09:46:01,186 - root - INFO - lr: 3.0957e-05 gnorm: 1.04 [11:11:51<13:20:18] +[titan] 2025-10-05 09:46:12,076 - root - INFO - step: 18260 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 09:46:12,077 - root - INFO - lr: 3.0948e-05 gnorm: 1.05 [11:12:02<13:20:06] +[titan] 2025-10-05 09:46:22,981 - root - INFO - step: 18265 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 09:46:22,981 - root - INFO - lr: 3.0939e-05 gnorm: 1.12 [11:12:13<13:19:55] +[titan] 2025-10-05 09:46:33,839 - root - INFO - step: 18270 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 09:46:33,840 - root - INFO - lr: 3.0931e-05 gnorm: 1.05 [11:12:24<13:19:44] +[titan] 2025-10-05 09:46:44,752 - root - INFO - step: 18275 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 09:46:44,752 - root - INFO - lr: 3.0922e-05 gnorm: 1.10 [11:12:34<13:19:33] +[titan] 2025-10-05 09:46:55,639 - root - INFO - step: 18280 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 09:46:55,639 - root - INFO - lr: 3.0913e-05 gnorm: 1.08 [11:12:45<13:19:22] +[titan] 2025-10-05 09:47:06,489 - root - INFO - step: 18285 loss: 2.1580 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 09:47:06,489 - root - INFO - lr: 3.0904e-05 gnorm: 1.06 [11:12:56<13:19:10] +[titan] 2025-10-05 09:47:17,381 - root - INFO - step: 18290 loss: 2.2060 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9486 +[titan] 2025-10-05 09:47:17,381 - root - INFO - lr: 3.0895e-05 gnorm: 1.12 [11:13:07<13:18:59] +[titan] 2025-10-05 09:47:28,261 - root - INFO - step: 18295 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9274 +[titan] 2025-10-05 09:47:28,261 - root - INFO - lr: 3.0887e-05 gnorm: 1.07 [11:13:18<13:18:48] +[titan] 2025-10-05 09:47:36,942 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:47:39,132 - root - INFO - step: 18300 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9391 +[titan] 2025-10-05 09:47:39,132 - root - INFO - lr: 3.0878e-05 gnorm: 1.07 [11:13:29<13:18:37] +[titan] 2025-10-05 09:47:50,070 - root - INFO - step: 18305 loss: 2.1579 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.63 mfu: 42.02% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:47:50,070 - root - INFO - lr: 3.0869e-05 gnorm: 1.04 [11:13:40<13:18:25] +[titan] 2025-10-05 09:48:00,953 - root - INFO - step: 18310 loss: 2.2365 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2608 global_avg_mtp_loss: 1.9757 +[titan] 2025-10-05 09:48:00,953 - root - INFO - lr: 3.0860e-05 gnorm: 1.10 [11:13:51<13:18:14] +[titan] 2025-10-05 09:48:11,821 - root - INFO - step: 18315 loss: 2.1228 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 09:48:11,821 - root - INFO - lr: 3.0851e-05 gnorm: 1.10 [11:14:02<13:18:03] +[titan] 2025-10-05 09:48:22,702 - root - INFO - step: 18320 loss: 2.2161 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:22,703 - root - INFO - lr: 3.0842e-05 gnorm: 1.10 [11:14:12<13:17:52] +[titan] 2025-10-05 09:48:33,562 - root - INFO - step: 18325 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 09:48:33,562 - root - INFO - lr: 3.0834e-05 gnorm: 1.06 [11:14:23<13:17:40] +[titan] 2025-10-05 09:48:44,442 - root - INFO - step: 18330 loss: 2.1384 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 09:48:44,442 - root - INFO - lr: 3.0825e-05 gnorm: 1.07 [11:14:34<13:17:29] +[titan] 2025-10-05 09:48:55,377 - root - INFO - step: 18335 loss: 2.2157 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.74 mfu: 42.04% global_avg_ntp_loss: 0.2576 global_avg_mtp_loss: 1.9581 +[titan] 2025-10-05 09:48:55,377 - root - INFO - lr: 3.0816e-05 gnorm: 1.05 [11:14:45<13:17:18] +[titan] 2025-10-05 09:49:06,255 - root - INFO - step: 18340 loss: 2.1540 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:49:06,255 - root - INFO - lr: 3.0807e-05 gnorm: 1.02 [11:14:56<13:17:07] +[titan] 2025-10-05 09:49:17,139 - root - INFO - step: 18345 loss: 2.1319 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8845 +[titan] 2025-10-05 09:49:17,139 - root - INFO - lr: 3.0798e-05 gnorm: 1.06 [11:15:07<13:16:56] +[titan] 2025-10-05 09:49:25,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:49:28,007 - root - INFO - step: 18350 loss: 2.2255 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9664 +[titan] 2025-10-05 09:49:28,007 - root - INFO - lr: 3.0789e-05 gnorm: 1.07 [11:15:18<13:16:44] +[titan] 2025-10-05 09:49:38,916 - root - INFO - step: 18355 loss: 2.1700 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9168 +[titan] 2025-10-05 09:49:38,916 - root - INFO - lr: 3.0781e-05 gnorm: 1.03 [11:15:29<13:16:33] +[titan] 2025-10-05 09:49:49,833 - root - INFO - step: 18360 loss: 2.2021 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9456 +[titan] 2025-10-05 09:49:49,834 - root - INFO - lr: 3.0772e-05 gnorm: 1.12 [11:15:40<13:16:22] +[titan] 2025-10-05 09:50:00,730 - root - INFO - step: 18365 loss: 2.2105 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9526 +[titan] 2025-10-05 09:50:00,730 - root - INFO - lr: 3.0763e-05 gnorm: 1.05 [11:15:50<13:16:11] +[titan] 2025-10-05 09:50:11,596 - root - INFO - step: 18370 loss: 2.2005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9432 +[titan] 2025-10-05 09:50:11,596 - root - INFO - lr: 3.0754e-05 gnorm: 1.09 [11:16:01<13:16:00] +[titan] 2025-10-05 09:50:22,481 - root - INFO - step: 18375 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8909 +[titan] 2025-10-05 09:50:22,481 - root - INFO - lr: 3.0745e-05 gnorm: 1.07 [11:16:12<13:15:48] +[titan] 2025-10-05 09:50:33,379 - root - INFO - step: 18380 loss: 2.1743 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9217 +[titan] 2025-10-05 09:50:33,379 - root - INFO - lr: 3.0736e-05 gnorm: 1.05 [11:16:23<13:15:37] +[titan] 2025-10-05 09:50:44,278 - root - INFO - step: 18385 loss: 2.2455 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9831 +[titan] 2025-10-05 09:50:44,278 - root - INFO - lr: 3.0728e-05 gnorm: 1.08 [11:16:34<13:15:26] +[titan] 2025-10-05 09:50:55,194 - root - INFO - step: 18390 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9172 +[titan] 2025-10-05 09:50:55,194 - root - INFO - lr: 3.0719e-05 gnorm: 1.04 [11:16:45<13:15:15] +[titan] 2025-10-05 09:51:06,079 - root - INFO - step: 18395 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9155 +[titan] 2025-10-05 09:51:06,080 - root - INFO - lr: 3.0710e-05 gnorm: 1.07 [11:16:56<13:15:03] +[titan] 2025-10-05 09:51:14,780 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:51:16,960 - root - INFO - step: 18400 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:51:16,961 - root - INFO - lr: 3.0701e-05 gnorm: 1.03 [11:17:07<13:14:52] +[titan] 2025-10-05 09:51:27,817 - root - INFO - step: 18405 loss: 2.1939 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 09:51:27,817 - root - INFO - lr: 3.0692e-05 gnorm: 1.07 [11:17:17<13:14:41] +[titan] 2025-10-05 09:51:38,681 - root - INFO - step: 18410 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 09:51:38,682 - root - INFO - lr: 3.0683e-05 gnorm: 1.06 [11:17:28<13:14:30] +[titan] 2025-10-05 09:51:49,581 - root - INFO - step: 18415 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8667 +[titan] 2025-10-05 09:51:49,582 - root - INFO - lr: 3.0675e-05 gnorm: 1.10 [11:17:39<13:14:19] +[titan] 2025-10-05 09:52:00,490 - root - INFO - step: 18420 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:52:00,490 - root - INFO - lr: 3.0666e-05 gnorm: 1.06 [11:17:50<13:14:07] +[titan] 2025-10-05 09:52:11,358 - root - INFO - step: 18425 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9183 +[titan] 2025-10-05 09:52:11,358 - root - INFO - lr: 3.0657e-05 gnorm: 1.09 [11:18:01<13:13:56] +[titan] 2025-10-05 09:52:22,299 - root - INFO - step: 18430 loss: 2.1542 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.50 mfu: 42.01% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9034 +[titan] 2025-10-05 09:52:22,299 - root - INFO - lr: 3.0648e-05 gnorm: 1.10 [11:18:12<13:13:45] +[titan] 2025-10-05 09:52:26,822 - root - INFO - Dumping profiler traces at step 18432 +[titan] 2025-10-05 09:52:26,861 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 09:52:33,371 - root - INFO - step: 18435 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 29,597 tflops: 410.61 mfu: 41.52% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 09:52:33,371 - root - INFO - lr: 3.0639e-05 gnorm: 1.09 [11:18:23<13:13:34] +[titan] 2025-10-05 09:52:44,226 - root - INFO - step: 18440 loss: 2.1224 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8757 +[titan] 2025-10-05 09:52:44,226 - root - INFO - lr: 3.0630e-05 gnorm: 1.05 [11:18:34<13:13:23] +[titan] 2025-10-05 09:52:55,086 - root - INFO - step: 18445 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 09:52:55,086 - root - INFO - lr: 3.0622e-05 gnorm: 1.16 [11:18:45<13:13:11] +[titan] 2025-10-05 09:53:03,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:53:05,965 - root - INFO - step: 18450 loss: 2.1736 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 09:53:05,965 - root - INFO - lr: 3.0613e-05 gnorm: 1.04 [11:18:56<13:13:00] +[titan] 2025-10-05 09:53:16,799 - root - INFO - step: 18455 loss: 2.2016 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9454 +[titan] 2025-10-05 09:53:16,799 - root - INFO - lr: 3.0604e-05 gnorm: 1.08 [11:19:06<13:12:49] +[titan] 2025-10-05 09:53:27,669 - root - INFO - step: 18460 loss: 2.1859 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9308 +[titan] 2025-10-05 09:53:27,669 - root - INFO - lr: 3.0595e-05 gnorm: 1.09 [11:19:17<13:12:38] +[titan] 2025-10-05 09:53:38,507 - root - INFO - step: 18465 loss: 2.1946 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2557 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:53:38,508 - root - INFO - lr: 3.0586e-05 gnorm: 1.11 [11:19:28<13:12:26] +[titan] 2025-10-05 09:53:49,352 - root - INFO - step: 18470 loss: 2.2070 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9495 +[titan] 2025-10-05 09:53:49,352 - root - INFO - lr: 3.0577e-05 gnorm: 1.15 [11:19:39<13:12:15] +[titan] 2025-10-05 09:54:00,241 - root - INFO - step: 18475 loss: 2.2443 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2627 global_avg_mtp_loss: 1.9816 +[titan] 2025-10-05 09:54:00,242 - root - INFO - lr: 3.0569e-05 gnorm: 1.07 [11:19:50<13:12:04] +[titan] 2025-10-05 09:54:11,123 - root - INFO - step: 18480 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 09:54:11,123 - root - INFO - lr: 3.0560e-05 gnorm: 1.02 [11:20:01<13:11:53] +[titan] 2025-10-05 09:54:21,973 - root - INFO - step: 18485 loss: 2.2356 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2694 global_avg_mtp_loss: 1.9661 +[titan] 2025-10-05 09:54:21,973 - root - INFO - lr: 3.0551e-05 gnorm: 3.61 [11:20:12<13:11:41] +[titan] 2025-10-05 09:54:32,841 - root - INFO - step: 18490 loss: 2.2210 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9622 +[titan] 2025-10-05 09:54:32,841 - root - INFO - lr: 3.0542e-05 gnorm: 1.08 [11:20:23<13:11:30] +[titan] 2025-10-05 09:54:43,710 - root - INFO - step: 18495 loss: 2.1961 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2558 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 09:54:43,710 - root - INFO - lr: 3.0533e-05 gnorm: 1.03 [11:20:33<13:11:19] +[titan] 2025-10-05 09:54:52,412 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:54:54,598 - root - INFO - step: 18500 loss: 2.1801 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 09:54:54,598 - root - INFO - lr: 3.0524e-05 gnorm: 1.07 [11:20:44<13:11:08] +[titan] 2025-10-05 09:55:05,447 - root - INFO - step: 18505 loss: 2.1641 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 09:55:05,447 - root - INFO - lr: 3.0515e-05 gnorm: 1.05 [11:20:55<13:10:56] +[titan] 2025-10-05 09:55:16,304 - root - INFO - step: 18510 loss: 2.2328 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9729 +[titan] 2025-10-05 09:55:16,304 - root - INFO - lr: 3.0507e-05 gnorm: 1.12 [11:21:06<13:10:45] +[titan] 2025-10-05 09:55:27,210 - root - INFO - step: 18515 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 09:55:27,211 - root - INFO - lr: 3.0498e-05 gnorm: 1.04 [11:21:17<13:10:34] +[titan] 2025-10-05 09:55:38,070 - root - INFO - step: 18520 loss: 2.1990 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 09:55:38,071 - root - INFO - lr: 3.0489e-05 gnorm: 1.06 [11:21:28<13:10:23] +[titan] 2025-10-05 09:55:48,946 - root - INFO - step: 18525 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8803 +[titan] 2025-10-05 09:55:48,946 - root - INFO - lr: 3.0480e-05 gnorm: 1.10 [11:21:39<13:10:12] +[titan] 2025-10-05 09:55:59,819 - root - INFO - step: 18530 loss: 2.1569 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9067 +[titan] 2025-10-05 09:55:59,819 - root - INFO - lr: 3.0471e-05 gnorm: 1.09 [11:21:49<13:10:00] +[titan] 2025-10-05 09:56:10,667 - root - INFO - step: 18535 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 09:56:10,668 - root - INFO - lr: 3.0462e-05 gnorm: 1.08 [11:22:00<13:09:49] +[titan] 2025-10-05 09:56:21,514 - root - INFO - step: 18540 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8789 +[titan] 2025-10-05 09:56:21,515 - root - INFO - lr: 3.0454e-05 gnorm: 1.06 [11:22:11<13:09:38] +[titan] 2025-10-05 09:56:32,416 - root - INFO - step: 18545 loss: 2.2399 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9793 +[titan] 2025-10-05 09:56:32,416 - root - INFO - lr: 3.0445e-05 gnorm: 1.09 [11:22:22<13:09:27] +[titan] 2025-10-05 09:56:41,122 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:56:43,301 - root - INFO - step: 18550 loss: 2.2123 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9545 +[titan] 2025-10-05 09:56:43,301 - root - INFO - lr: 3.0436e-05 gnorm: 1.11 [11:22:33<13:09:15] +[titan] 2025-10-05 09:56:54,209 - root - INFO - step: 18555 loss: 2.1250 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8774 +[titan] 2025-10-05 09:56:54,209 - root - INFO - lr: 3.0427e-05 gnorm: 1.05 [11:22:44<13:09:04] +[titan] 2025-10-05 09:57:05,059 - root - INFO - step: 18560 loss: 2.1067 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 09:57:05,060 - root - INFO - lr: 3.0418e-05 gnorm: 1.11 [11:22:55<13:08:53] +[titan] 2025-10-05 09:57:15,909 - root - INFO - step: 18565 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8970 +[titan] 2025-10-05 09:57:15,909 - root - INFO - lr: 3.0409e-05 gnorm: 1.06 [11:23:06<13:08:42] +[titan] 2025-10-05 09:57:26,796 - root - INFO - step: 18570 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9015 +[titan] 2025-10-05 09:57:26,796 - root - INFO - lr: 3.0400e-05 gnorm: 1.05 [11:23:16<13:08:30] +[titan] 2025-10-05 09:57:37,659 - root - INFO - step: 18575 loss: 2.1669 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9146 +[titan] 2025-10-05 09:57:37,660 - root - INFO - lr: 3.0392e-05 gnorm: 1.07 [11:23:27<13:08:19] +[titan] 2025-10-05 09:57:48,558 - root - INFO - step: 18580 loss: 2.1694 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 09:57:48,559 - root - INFO - lr: 3.0383e-05 gnorm: 1.08 [11:23:38<13:08:08] +[titan] 2025-10-05 09:57:59,464 - root - INFO - step: 18585 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 09:57:59,464 - root - INFO - lr: 3.0374e-05 gnorm: 1.08 [11:23:49<13:07:57] +[titan] 2025-10-05 09:58:10,319 - root - INFO - step: 18590 loss: 2.1661 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9144 +[titan] 2025-10-05 09:58:10,319 - root - INFO - lr: 3.0365e-05 gnorm: 1.09 [11:24:00<13:07:46] +[titan] 2025-10-05 09:58:21,169 - root - INFO - step: 18595 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 09:58:21,169 - root - INFO - lr: 3.0356e-05 gnorm: 1.06 [11:24:11<13:07:34] +[titan] 2025-10-05 09:58:29,863 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 09:58:32,051 - root - INFO - step: 18600 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9191 +[titan] 2025-10-05 09:58:32,052 - root - INFO - lr: 3.0347e-05 gnorm: 1.06 [11:24:22<13:07:23] +[titan] 2025-10-05 09:58:42,941 - root - INFO - step: 18605 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 09:58:42,941 - root - INFO - lr: 3.0339e-05 gnorm: 1.07 [11:24:33<13:07:12] +[titan] 2025-10-05 09:58:53,849 - root - INFO - step: 18610 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 09:58:53,849 - root - INFO - lr: 3.0330e-05 gnorm: 1.13 [11:24:44<13:07:01] +[titan] 2025-10-05 09:59:04,767 - root - INFO - step: 18615 loss: 2.1618 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9098 +[titan] 2025-10-05 09:59:04,767 - root - INFO - lr: 3.0321e-05 gnorm: 1.07 [11:24:54<13:06:50] +[titan] 2025-10-05 09:59:15,655 - root - INFO - step: 18620 loss: 2.1523 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 09:59:15,655 - root - INFO - lr: 3.0312e-05 gnorm: 1.08 [11:25:05<13:06:38] +[titan] 2025-10-05 09:59:26,502 - root - INFO - step: 18625 loss: 2.1982 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2559 global_avg_mtp_loss: 1.9423 +[titan] 2025-10-05 09:59:26,502 - root - INFO - lr: 3.0303e-05 gnorm: 1.06 [11:25:16<13:06:27] +[titan] 2025-10-05 09:59:37,342 - root - INFO - step: 18630 loss: 2.1702 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9176 +[titan] 2025-10-05 09:59:37,342 - root - INFO - lr: 3.0294e-05 gnorm: 1.02 [11:25:27<13:06:16] +[titan] 2025-10-05 09:59:48,204 - root - INFO - step: 18635 loss: 2.1942 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9389 +[titan] 2025-10-05 09:59:48,205 - root - INFO - lr: 3.0285e-05 gnorm: 1.06 [11:25:38<13:06:05] +[titan] 2025-10-05 09:59:59,141 - root - INFO - step: 18640 loss: 2.1586 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.67 mfu: 42.03% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9071 +[titan] 2025-10-05 09:59:59,142 - root - INFO - lr: 3.0277e-05 gnorm: 1.09 [11:25:49<13:05:53] +[titan] 2025-10-05 10:00:09,999 - root - INFO - step: 18645 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:00:09,999 - root - INFO - lr: 3.0268e-05 gnorm: 1.04 [11:26:00<13:05:42] +[titan] 2025-10-05 10:00:18,686 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:00:20,868 - root - INFO - step: 18650 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8928 +[titan] 2025-10-05 10:00:20,868 - root - INFO - lr: 3.0259e-05 gnorm: 1.06 [11:26:11<13:05:31] +[titan] 2025-10-05 10:00:31,751 - root - INFO - step: 18655 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.8955 +[titan] 2025-10-05 10:00:31,751 - root - INFO - lr: 3.0250e-05 gnorm: 1.10 [11:26:21<13:05:20] +[titan] 2025-10-05 10:00:42,624 - root - INFO - step: 18660 loss: 2.1761 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9236 +[titan] 2025-10-05 10:00:42,624 - root - INFO - lr: 3.0241e-05 gnorm: 1.06 [11:26:32<13:05:08] +[titan] 2025-10-05 10:00:53,508 - root - INFO - step: 18665 loss: 2.2236 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9646 +[titan] 2025-10-05 10:00:53,508 - root - INFO - lr: 3.0232e-05 gnorm: 1.07 [11:26:43<13:04:57] +[titan] 2025-10-05 10:01:04,417 - root - INFO - step: 18670 loss: 2.1073 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8611 +[titan] 2025-10-05 10:01:04,417 - root - INFO - lr: 3.0223e-05 gnorm: 1.05 [11:26:54<13:04:46] +[titan] 2025-10-05 10:01:15,322 - root - INFO - step: 18675 loss: 2.1543 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9018 +[titan] 2025-10-05 10:01:15,322 - root - INFO - lr: 3.0215e-05 gnorm: 1.01 [11:27:05<13:04:35] +[titan] 2025-10-05 10:01:26,187 - root - INFO - step: 18680 loss: 2.1937 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9386 +[titan] 2025-10-05 10:01:26,188 - root - INFO - lr: 3.0206e-05 gnorm: 1.06 [11:27:16<13:04:24] +[titan] 2025-10-05 10:01:37,075 - root - INFO - step: 18685 loss: 2.2297 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9695 +[titan] 2025-10-05 10:01:37,076 - root - INFO - lr: 3.0197e-05 gnorm: 1.09 [11:27:27<13:04:12] +[titan] 2025-10-05 10:01:47,947 - root - INFO - step: 18690 loss: 2.2573 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9954 +[titan] 2025-10-05 10:01:47,947 - root - INFO - lr: 3.0188e-05 gnorm: 1.13 [11:27:38<13:04:01] +[titan] 2025-10-05 10:01:58,853 - root - INFO - step: 18695 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9399 +[titan] 2025-10-05 10:01:58,853 - root - INFO - lr: 3.0179e-05 gnorm: 1.11 [11:27:49<13:03:50] +[titan] 2025-10-05 10:02:07,533 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:02:09,720 - root - INFO - step: 18700 loss: 2.1760 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2541 global_avg_mtp_loss: 1.9218 +[titan] 2025-10-05 10:02:09,720 - root - INFO - lr: 3.0170e-05 gnorm: 1.05 [11:27:59<13:03:39] +[titan] 2025-10-05 10:02:20,646 - root - INFO - step: 18705 loss: 2.1878 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:02:20,646 - root - INFO - lr: 3.0161e-05 gnorm: 1.13 [11:28:10<13:03:28] +[titan] 2025-10-05 10:02:31,519 - root - INFO - step: 18710 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9250 +[titan] 2025-10-05 10:02:31,519 - root - INFO - lr: 3.0153e-05 gnorm: 1.03 [11:28:21<13:03:16] +[titan] 2025-10-05 10:02:42,408 - root - INFO - step: 18715 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 10:02:42,408 - root - INFO - lr: 3.0144e-05 gnorm: 1.14 [11:28:32<13:03:05] +[titan] 2025-10-05 10:02:53,291 - root - INFO - step: 18720 loss: 2.1198 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 10:02:53,292 - root - INFO - lr: 3.0135e-05 gnorm: 1.04 [11:28:43<13:02:54] +[titan] 2025-10-05 10:03:04,164 - root - INFO - step: 18725 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 10:03:04,164 - root - INFO - lr: 3.0126e-05 gnorm: 1.09 [11:28:54<13:02:43] +[titan] 2025-10-05 10:03:15,026 - root - INFO - step: 18730 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9214 +[titan] 2025-10-05 10:03:15,026 - root - INFO - lr: 3.0117e-05 gnorm: 1.09 [11:29:05<13:02:32] +[titan] 2025-10-05 10:03:25,889 - root - INFO - step: 18735 loss: 2.1746 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:03:25,889 - root - INFO - lr: 3.0108e-05 gnorm: 1.09 [11:29:16<13:02:20] +[titan] 2025-10-05 10:03:36,788 - root - INFO - step: 18740 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 10:03:36,789 - root - INFO - lr: 3.0099e-05 gnorm: 1.05 [11:29:26<13:02:09] +[titan] 2025-10-05 10:03:47,682 - root - INFO - step: 18745 loss: 2.1174 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:03:47,683 - root - INFO - lr: 3.0090e-05 gnorm: 1.02 [11:29:37<13:01:58] +[titan] 2025-10-05 10:03:56,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:03:58,556 - root - INFO - step: 18750 loss: 2.1769 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 10:03:58,557 - root - INFO - lr: 3.0082e-05 gnorm: 1.06 [11:29:48<13:01:47] +[titan] 2025-10-05 10:04:09,464 - root - INFO - step: 18755 loss: 2.1852 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9314 +[titan] 2025-10-05 10:04:09,464 - root - INFO - lr: 3.0073e-05 gnorm: 1.08 [11:29:59<13:01:36] +[titan] 2025-10-05 10:04:20,357 - root - INFO - step: 18760 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:04:20,357 - root - INFO - lr: 3.0064e-05 gnorm: 1.07 [11:30:10<13:01:24] +[titan] 2025-10-05 10:04:31,231 - root - INFO - step: 18765 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 10:04:31,232 - root - INFO - lr: 3.0055e-05 gnorm: 1.11 [11:30:21<13:01:13] +[titan] 2025-10-05 10:04:42,135 - root - INFO - step: 18770 loss: 2.1701 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9175 +[titan] 2025-10-05 10:04:42,136 - root - INFO - lr: 3.0046e-05 gnorm: 1.10 [11:30:32<13:01:02] +[titan] 2025-10-05 10:04:53,006 - root - INFO - step: 18775 loss: 2.2030 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9457 +[titan] 2025-10-05 10:04:53,006 - root - INFO - lr: 3.0037e-05 gnorm: 1.03 [11:30:43<13:00:51] +[titan] 2025-10-05 10:05:03,904 - root - INFO - step: 18780 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9169 +[titan] 2025-10-05 10:05:03,904 - root - INFO - lr: 3.0028e-05 gnorm: 1.07 [11:30:54<13:00:39] +[titan] 2025-10-05 10:05:14,747 - root - INFO - step: 18785 loss: 2.1812 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 10:05:14,748 - root - INFO - lr: 3.0020e-05 gnorm: 1.09 [11:31:04<13:00:28] +[titan] 2025-10-05 10:05:25,598 - root - INFO - step: 18790 loss: 2.1748 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9221 +[titan] 2025-10-05 10:05:25,598 - root - INFO - lr: 3.0011e-05 gnorm: 1.06 [11:31:15<13:00:17] +[titan] 2025-10-05 10:05:36,466 - root - INFO - step: 18795 loss: 2.1754 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 10:05:36,466 - root - INFO - lr: 3.0002e-05 gnorm: 1.04 [11:31:26<13:00:06] +[titan] 2025-10-05 10:05:45,130 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:05:47,339 - root - INFO - step: 18800 loss: 2.2290 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9689 +[titan] 2025-10-05 10:05:47,340 - root - INFO - lr: 2.9993e-05 gnorm: 1.07 [11:31:37<12:59:55] +[titan] 2025-10-05 10:05:58,210 - root - INFO - step: 18805 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 10:05:58,210 - root - INFO - lr: 2.9984e-05 gnorm: 1.07 [11:31:48<12:59:43] +[titan] 2025-10-05 10:06:09,120 - root - INFO - step: 18810 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 10:06:09,121 - root - INFO - lr: 2.9975e-05 gnorm: 1.04 [11:31:59<12:59:32] +[titan] 2025-10-05 10:06:19,986 - root - INFO - step: 18815 loss: 2.2912 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2665 global_avg_mtp_loss: 2.0247 +[titan] 2025-10-05 10:06:19,986 - root - INFO - lr: 2.9966e-05 gnorm: 1.08 [11:32:10<12:59:21] +[titan] 2025-10-05 10:06:30,847 - root - INFO - step: 18820 loss: 2.1851 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9312 +[titan] 2025-10-05 10:06:30,847 - root - INFO - lr: 2.9957e-05 gnorm: 1.05 [11:32:20<12:59:10] +[titan] 2025-10-05 10:06:41,727 - root - INFO - step: 18825 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:06:41,727 - root - INFO - lr: 2.9949e-05 gnorm: 1.07 [11:32:31<12:58:58] +[titan] 2025-10-05 10:06:52,600 - root - INFO - step: 18830 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 10:06:52,601 - root - INFO - lr: 2.9940e-05 gnorm: 1.10 [11:32:42<12:58:47] +[titan] 2025-10-05 10:07:03,527 - root - INFO - step: 18835 loss: 2.2067 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9505 +[titan] 2025-10-05 10:07:03,527 - root - INFO - lr: 2.9931e-05 gnorm: 1.07 [11:32:53<12:58:36] +[titan] 2025-10-05 10:07:14,367 - root - INFO - step: 18840 loss: 2.2003 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 10:07:14,367 - root - INFO - lr: 2.9922e-05 gnorm: 1.08 [11:33:04<12:58:25] +[titan] 2025-10-05 10:07:25,248 - root - INFO - step: 18845 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 10:07:25,248 - root - INFO - lr: 2.9913e-05 gnorm: 1.08 [11:33:15<12:58:14] +[titan] 2025-10-05 10:07:33,925 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:07:36,110 - root - INFO - step: 18850 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9378 +[titan] 2025-10-05 10:07:36,110 - root - INFO - lr: 2.9904e-05 gnorm: 1.09 [11:33:26<12:58:02] +[titan] 2025-10-05 10:07:46,979 - root - INFO - step: 18855 loss: 2.2101 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:07:46,979 - root - INFO - lr: 2.9895e-05 gnorm: 1.07 [11:33:37<12:57:51] +[titan] 2025-10-05 10:07:57,853 - root - INFO - step: 18860 loss: 2.1443 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:07:57,853 - root - INFO - lr: 2.9886e-05 gnorm: 1.06 [11:33:47<12:57:40] +[titan] 2025-10-05 10:08:08,767 - root - INFO - step: 18865 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 10:08:08,767 - root - INFO - lr: 2.9878e-05 gnorm: 1.05 [11:33:58<12:57:29] +[titan] 2025-10-05 10:08:19,643 - root - INFO - step: 18870 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:08:19,643 - root - INFO - lr: 2.9869e-05 gnorm: 1.09 [11:34:09<12:57:18] +[titan] 2025-10-05 10:08:30,505 - root - INFO - step: 18875 loss: 2.1432 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8943 +[titan] 2025-10-05 10:08:30,505 - root - INFO - lr: 2.9860e-05 gnorm: 1.11 [11:34:20<12:57:06] +[titan] 2025-10-05 10:08:41,373 - root - INFO - step: 18880 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8639 +[titan] 2025-10-05 10:08:41,373 - root - INFO - lr: 2.9851e-05 gnorm: 1.04 [11:34:31<12:56:55] +[titan] 2025-10-05 10:08:52,227 - root - INFO - step: 18885 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:08:52,227 - root - INFO - lr: 2.9842e-05 gnorm: 1.09 [11:34:42<12:56:44] +[titan] 2025-10-05 10:09:03,099 - root - INFO - step: 18890 loss: 2.1963 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9402 +[titan] 2025-10-05 10:09:03,099 - root - INFO - lr: 2.9833e-05 gnorm: 1.06 [11:34:53<12:56:33] +[titan] 2025-10-05 10:09:13,982 - root - INFO - step: 18895 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 10:09:13,983 - root - INFO - lr: 2.9824e-05 gnorm: 1.06 [11:35:04<12:56:21] +[titan] 2025-10-05 10:09:22,708 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:09:24,889 - root - INFO - step: 18900 loss: 2.2596 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2624 global_avg_mtp_loss: 1.9972 +[titan] 2025-10-05 10:09:24,889 - root - INFO - lr: 2.9815e-05 gnorm: 1.09 [11:35:15<12:56:10] +[titan] 2025-10-05 10:09:35,723 - root - INFO - step: 18905 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9049 +[titan] 2025-10-05 10:09:35,723 - root - INFO - lr: 2.9807e-05 gnorm: 2.16 [11:35:25<12:55:59] +[titan] 2025-10-05 10:09:46,616 - root - INFO - step: 18910 loss: 2.1918 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9367 +[titan] 2025-10-05 10:09:46,616 - root - INFO - lr: 2.9798e-05 gnorm: 1.10 [11:35:36<12:55:48] +[titan] 2025-10-05 10:09:57,505 - root - INFO - step: 18915 loss: 2.1487 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8990 +[titan] 2025-10-05 10:09:57,506 - root - INFO - lr: 2.9789e-05 gnorm: 1.06 [11:35:47<12:55:37] +[titan] 2025-10-05 10:10:08,408 - root - INFO - step: 18920 loss: 2.2542 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2632 global_avg_mtp_loss: 1.9911 +[titan] 2025-10-05 10:10:08,409 - root - INFO - lr: 2.9780e-05 gnorm: 1.11 [11:35:58<12:55:25] +[titan] 2025-10-05 10:10:19,290 - root - INFO - step: 18925 loss: 2.1401 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8910 +[titan] 2025-10-05 10:10:19,290 - root - INFO - lr: 2.9771e-05 gnorm: 1.09 [11:36:09<12:55:14] +[titan] 2025-10-05 10:10:30,188 - root - INFO - step: 18930 loss: 2.1578 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9075 +[titan] 2025-10-05 10:10:30,188 - root - INFO - lr: 2.9762e-05 gnorm: 1.08 [11:36:20<12:55:03] +[titan] 2025-10-05 10:10:41,057 - root - INFO - step: 18935 loss: 2.1455 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:10:41,057 - root - INFO - lr: 2.9753e-05 gnorm: 1.08 [11:36:31<12:54:52] +[titan] 2025-10-05 10:10:51,943 - root - INFO - step: 18940 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9185 +[titan] 2025-10-05 10:10:51,943 - root - INFO - lr: 2.9744e-05 gnorm: 1.09 [11:36:42<12:54:41] +[titan] 2025-10-05 10:11:00,930 - root - INFO - Dumping profiler traces at step 18944 +[titan] 2025-10-05 10:11:00,969 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:11:03,165 - root - INFO - step: 18945 loss: 2.2146 memory: 118.84GiB(85.28%) tps: 29,200 tflops: 405.11 mfu: 40.96% global_avg_ntp_loss: 0.2589 global_avg_mtp_loss: 1.9557 +[titan] 2025-10-05 10:11:03,166 - root - INFO - lr: 2.9736e-05 gnorm: 1.05 [11:36:53<12:54:30] +[titan] 2025-10-05 10:11:11,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:11:14,040 - root - INFO - step: 18950 loss: 2.2217 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2602 global_avg_mtp_loss: 1.9615 +[titan] 2025-10-05 10:11:14,040 - root - INFO - lr: 2.9727e-05 gnorm: 1.10 [11:37:04<12:54:19] +[titan] 2025-10-05 10:11:24,927 - root - INFO - step: 18955 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9322 +[titan] 2025-10-05 10:11:24,927 - root - INFO - lr: 2.9718e-05 gnorm: 1.04 [11:37:15<12:54:07] +[titan] 2025-10-05 10:11:35,836 - root - INFO - step: 18960 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:11:35,836 - root - INFO - lr: 2.9709e-05 gnorm: 1.08 [11:37:25<12:53:56] +[titan] 2025-10-05 10:11:46,725 - root - INFO - step: 18965 loss: 2.2139 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9561 +[titan] 2025-10-05 10:11:46,725 - root - INFO - lr: 2.9700e-05 gnorm: 1.06 [11:37:36<12:53:45] +[titan] 2025-10-05 10:11:57,607 - root - INFO - step: 18970 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 10:11:57,608 - root - INFO - lr: 2.9691e-05 gnorm: 1.06 [11:37:47<12:53:34] +[titan] 2025-10-05 10:12:08,492 - root - INFO - step: 18975 loss: 2.1885 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9336 +[titan] 2025-10-05 10:12:08,492 - root - INFO - lr: 2.9682e-05 gnorm: 1.05 [11:37:58<12:53:23] +[titan] 2025-10-05 10:12:19,363 - root - INFO - step: 18980 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9272 +[titan] 2025-10-05 10:12:19,364 - root - INFO - lr: 2.9673e-05 gnorm: 1.05 [11:38:09<12:53:11] +[titan] 2025-10-05 10:12:30,251 - root - INFO - step: 18985 loss: 2.2178 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9592 +[titan] 2025-10-05 10:12:30,251 - root - INFO - lr: 2.9664e-05 gnorm: 1.08 [11:38:20<12:53:00] +[titan] 2025-10-05 10:12:41,145 - root - INFO - step: 18990 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 10:12:41,146 - root - INFO - lr: 2.9656e-05 gnorm: 1.04 [11:38:31<12:52:49] +[titan] 2025-10-05 10:12:52,037 - root - INFO - step: 18995 loss: 2.1633 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:12:52,037 - root - INFO - lr: 2.9647e-05 gnorm: 1.06 [11:38:42<12:52:38] +[titan] 2025-10-05 10:13:00,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:13:02,914 - root - INFO - step: 19000 loss: 2.1925 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9377 +[titan] 2025-10-05 10:13:02,914 - root - INFO - lr: 2.9638e-05 gnorm: 1.06 [11:38:53<12:52:27] +[titan] 2025-10-05 10:13:13,797 - root - INFO - step: 19005 loss: 2.2035 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9475 +[titan] 2025-10-05 10:13:13,797 - root - INFO - lr: 2.9629e-05 gnorm: 1.09 [11:39:03<12:52:15] +[titan] 2025-10-05 10:13:24,684 - root - INFO - step: 19010 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 10:13:24,684 - root - INFO - lr: 2.9620e-05 gnorm: 1.04 [11:39:14<12:52:04] +[titan] 2025-10-05 10:13:35,565 - root - INFO - step: 19015 loss: 2.1615 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 10:13:35,565 - root - INFO - lr: 2.9611e-05 gnorm: 1.06 [11:39:25<12:51:53] +[titan] 2025-10-05 10:13:46,458 - root - INFO - step: 19020 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:13:46,458 - root - INFO - lr: 2.9602e-05 gnorm: 1.09 [11:39:36<12:51:42] +[titan] 2025-10-05 10:13:57,365 - root - INFO - step: 19025 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9259 +[titan] 2025-10-05 10:13:57,365 - root - INFO - lr: 2.9593e-05 gnorm: 1.11 [11:39:47<12:51:31] +[titan] 2025-10-05 10:14:08,243 - root - INFO - step: 19030 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2581 global_avg_mtp_loss: 1.9421 +[titan] 2025-10-05 10:14:08,244 - root - INFO - lr: 2.9585e-05 gnorm: 1.11 [11:39:58<12:51:19] +[titan] 2025-10-05 10:14:19,180 - root - INFO - step: 19035 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 29,962 tflops: 415.68 mfu: 42.03% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 10:14:19,180 - root - INFO - lr: 2.9576e-05 gnorm: 1.08 [11:40:09<12:51:08] +[titan] 2025-10-05 10:14:30,057 - root - INFO - step: 19040 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 10:14:30,058 - root - INFO - lr: 2.9567e-05 gnorm: 1.08 [11:40:20<12:50:57] +[titan] 2025-10-05 10:14:40,934 - root - INFO - step: 19045 loss: 2.1807 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:14:40,934 - root - INFO - lr: 2.9558e-05 gnorm: 1.05 [11:40:31<12:50:46] +[titan] 2025-10-05 10:14:49,618 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:14:51,803 - root - INFO - step: 19050 loss: 2.2142 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2569 global_avg_mtp_loss: 1.9573 +[titan] 2025-10-05 10:14:51,803 - root - INFO - lr: 2.9549e-05 gnorm: 1.10 [11:40:41<12:50:35] +[titan] 2025-10-05 10:15:02,687 - root - INFO - step: 19055 loss: 2.1320 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:15:02,687 - root - INFO - lr: 2.9540e-05 gnorm: 1.03 [11:40:52<12:50:23] +[titan] 2025-10-05 10:15:13,599 - root - INFO - step: 19060 loss: 2.1731 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:15:13,600 - root - INFO - lr: 2.9531e-05 gnorm: 1.05 [11:41:03<12:50:12] +[titan] 2025-10-05 10:15:24,470 - root - INFO - step: 19065 loss: 2.0790 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 10:15:24,470 - root - INFO - lr: 2.9522e-05 gnorm: 1.02 [11:41:14<12:50:01] +[titan] 2025-10-05 10:15:35,340 - root - INFO - step: 19070 loss: 2.1215 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 10:15:35,340 - root - INFO - lr: 2.9513e-05 gnorm: 1.03 [11:41:25<12:49:50] +[titan] 2025-10-05 10:15:46,220 - root - INFO - step: 19075 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2535 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 10:15:46,220 - root - INFO - lr: 2.9505e-05 gnorm: 1.03 [11:41:36<12:49:39] +[titan] 2025-10-05 10:15:57,087 - root - INFO - step: 19080 loss: 2.1476 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 10:15:57,087 - root - INFO - lr: 2.9496e-05 gnorm: 1.08 [11:41:47<12:49:27] +[titan] 2025-10-05 10:16:07,949 - root - INFO - step: 19085 loss: 2.1692 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:16:07,949 - root - INFO - lr: 2.9487e-05 gnorm: 1.03 [11:41:58<12:49:16] +[titan] 2025-10-05 10:16:18,866 - root - INFO - step: 19090 loss: 2.1027 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:16:18,866 - root - INFO - lr: 2.9478e-05 gnorm: 1.05 [11:42:08<12:49:05] +[titan] 2025-10-05 10:16:29,722 - root - INFO - step: 19095 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 10:16:29,722 - root - INFO - lr: 2.9469e-05 gnorm: 1.07 [11:42:19<12:48:54] +[titan] 2025-10-05 10:16:38,405 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:16:40,594 - root - INFO - step: 19100 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 10:16:40,594 - root - INFO - lr: 2.9460e-05 gnorm: 1.12 [11:42:30<12:48:43] +[titan] 2025-10-05 10:16:51,467 - root - INFO - step: 19105 loss: 2.1659 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 10:16:51,467 - root - INFO - lr: 2.9451e-05 gnorm: 1.07 [11:42:41<12:48:31] +[titan] 2025-10-05 10:17:02,333 - root - INFO - step: 19110 loss: 2.1571 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9057 +[titan] 2025-10-05 10:17:02,333 - root - INFO - lr: 2.9442e-05 gnorm: 1.07 [11:42:52<12:48:20] +[titan] 2025-10-05 10:17:13,254 - root - INFO - step: 19115 loss: 2.1907 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9353 +[titan] 2025-10-05 10:17:13,254 - root - INFO - lr: 2.9433e-05 gnorm: 1.05 [11:43:03<12:48:09] +[titan] 2025-10-05 10:17:24,144 - root - INFO - step: 19120 loss: 2.2215 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2578 global_avg_mtp_loss: 1.9638 +[titan] 2025-10-05 10:17:24,145 - root - INFO - lr: 2.9424e-05 gnorm: 1.08 [11:43:14<12:47:58] +[titan] 2025-10-05 10:17:34,985 - root - INFO - step: 19125 loss: 2.1915 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 10:17:34,985 - root - INFO - lr: 2.9416e-05 gnorm: 1.07 [11:43:25<12:47:47] +[titan] 2025-10-05 10:17:45,834 - root - INFO - step: 19130 loss: 2.1337 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 10:17:45,834 - root - INFO - lr: 2.9407e-05 gnorm: 1.04 [11:43:35<12:47:35] +[titan] 2025-10-05 10:17:56,697 - root - INFO - step: 19135 loss: 2.1835 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9291 +[titan] 2025-10-05 10:17:56,697 - root - INFO - lr: 2.9398e-05 gnorm: 1.04 [11:43:46<12:47:24] +[titan] 2025-10-05 10:18:07,545 - root - INFO - step: 19140 loss: 2.1442 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8946 +[titan] 2025-10-05 10:18:07,545 - root - INFO - lr: 2.9389e-05 gnorm: 1.06 [11:43:57<12:47:13] +[titan] 2025-10-05 10:18:18,458 - root - INFO - step: 19145 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9199 +[titan] 2025-10-05 10:18:18,458 - root - INFO - lr: 2.9380e-05 gnorm: 1.08 [11:44:08<12:47:02] +[titan] 2025-10-05 10:18:27,153 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:18:29,337 - root - INFO - step: 19150 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8515 +[titan] 2025-10-05 10:18:29,338 - root - INFO - lr: 2.9371e-05 gnorm: 1.34 [11:44:19<12:46:50] +[titan] 2025-10-05 10:18:40,256 - root - INFO - step: 19155 loss: 2.1332 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8849 +[titan] 2025-10-05 10:18:40,257 - root - INFO - lr: 2.9362e-05 gnorm: 1.09 [11:44:30<12:46:39] +[titan] 2025-10-05 10:18:51,145 - root - INFO - step: 19160 loss: 2.1481 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 10:18:51,145 - root - INFO - lr: 2.9353e-05 gnorm: 1.07 [11:44:41<12:46:28] +[titan] 2025-10-05 10:19:02,037 - root - INFO - step: 19165 loss: 2.1516 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 10:19:02,037 - root - INFO - lr: 2.9344e-05 gnorm: 1.05 [11:44:52<12:46:17] +[titan] 2025-10-05 10:19:12,937 - root - INFO - step: 19170 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9133 +[titan] 2025-10-05 10:19:12,937 - root - INFO - lr: 2.9336e-05 gnorm: 1.08 [11:45:03<12:46:06] +[titan] 2025-10-05 10:19:23,875 - root - INFO - step: 19175 loss: 2.1790 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9258 +[titan] 2025-10-05 10:19:23,875 - root - INFO - lr: 2.9327e-05 gnorm: 1.04 [11:45:13<12:45:55] +[titan] 2025-10-05 10:19:34,776 - root - INFO - step: 19180 loss: 2.1428 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8944 +[titan] 2025-10-05 10:19:34,776 - root - INFO - lr: 2.9318e-05 gnorm: 1.07 [11:45:24<12:45:43] +[titan] 2025-10-05 10:19:45,696 - root - INFO - step: 19185 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 10:19:45,696 - root - INFO - lr: 2.9309e-05 gnorm: 1.05 [11:45:35<12:45:32] +[titan] 2025-10-05 10:19:56,559 - root - INFO - step: 19190 loss: 2.2063 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9493 +[titan] 2025-10-05 10:19:56,559 - root - INFO - lr: 2.9300e-05 gnorm: 1.05 [11:45:46<12:45:21] +[titan] 2025-10-05 10:20:07,440 - root - INFO - step: 19195 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 10:20:07,440 - root - INFO - lr: 2.9291e-05 gnorm: 1.06 [11:45:57<12:45:10] +[titan] 2025-10-05 10:20:16,202 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:20:18,388 - root - INFO - step: 19200 loss: 2.1895 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.22 mfu: 41.98% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:20:18,389 - root - INFO - lr: 2.9282e-05 gnorm: 1.10 [11:46:08<12:44:59] +[titan] 2025-10-05 10:20:29,261 - root - INFO - step: 19205 loss: 2.2321 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9722 +[titan] 2025-10-05 10:20:29,261 - root - INFO - lr: 2.9273e-05 gnorm: 1.05 [11:46:19<12:44:47] +[titan] 2025-10-05 10:20:40,136 - root - INFO - step: 19210 loss: 2.2012 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9452 +[titan] 2025-10-05 10:20:40,136 - root - INFO - lr: 2.9264e-05 gnorm: 1.05 [11:46:30<12:44:36] +[titan] 2025-10-05 10:20:51,016 - root - INFO - step: 19215 loss: 2.1099 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8637 +[titan] 2025-10-05 10:20:51,016 - root - INFO - lr: 2.9255e-05 gnorm: 1.02 [11:46:41<12:44:25] +[titan] 2025-10-05 10:21:01,917 - root - INFO - step: 19220 loss: 2.1688 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 10:21:01,918 - root - INFO - lr: 2.9247e-05 gnorm: 1.06 [11:46:52<12:44:14] +[titan] 2025-10-05 10:21:12,779 - root - INFO - step: 19225 loss: 2.1977 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9427 +[titan] 2025-10-05 10:21:12,780 - root - INFO - lr: 2.9238e-05 gnorm: 1.08 [11:47:02<12:44:03] +[titan] 2025-10-05 10:21:23,714 - root - INFO - step: 19230 loss: 2.1818 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9256 +[titan] 2025-10-05 10:21:23,714 - root - INFO - lr: 2.9229e-05 gnorm: 1.09 [11:47:13<12:43:52] +[titan] 2025-10-05 10:21:34,597 - root - INFO - step: 19235 loss: 2.1070 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 10:21:34,597 - root - INFO - lr: 2.9220e-05 gnorm: 1.08 [11:47:24<12:43:40] +[titan] 2025-10-05 10:21:45,492 - root - INFO - step: 19240 loss: 2.0962 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 10:21:45,492 - root - INFO - lr: 2.9211e-05 gnorm: 1.04 [11:47:35<12:43:29] +[titan] 2025-10-05 10:21:56,367 - root - INFO - step: 19245 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 10:21:56,367 - root - INFO - lr: 2.9202e-05 gnorm: 1.08 [11:47:46<12:43:18] +[titan] 2025-10-05 10:22:05,111 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:22:07,295 - root - INFO - step: 19250 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:22:07,296 - root - INFO - lr: 2.9193e-05 gnorm: 1.04 [11:47:57<12:43:07] +[titan] 2025-10-05 10:22:18,238 - root - INFO - step: 19255 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 10:22:18,238 - root - INFO - lr: 2.9184e-05 gnorm: 1.06 [11:48:08<12:42:56] +[titan] 2025-10-05 10:22:29,120 - root - INFO - step: 19260 loss: 2.1938 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9382 +[titan] 2025-10-05 10:22:29,120 - root - INFO - lr: 2.9175e-05 gnorm: 1.10 [11:48:19<12:42:44] +[titan] 2025-10-05 10:22:40,008 - root - INFO - step: 19265 loss: 2.1728 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 10:22:40,008 - root - INFO - lr: 2.9167e-05 gnorm: 1.08 [11:48:30<12:42:33] +[titan] 2025-10-05 10:22:50,875 - root - INFO - step: 19270 loss: 2.2006 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2566 global_avg_mtp_loss: 1.9440 +[titan] 2025-10-05 10:22:50,875 - root - INFO - lr: 2.9158e-05 gnorm: 1.07 [11:48:40<12:42:22] +[titan] 2025-10-05 10:23:01,737 - root - INFO - step: 19275 loss: 2.1975 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:23:01,737 - root - INFO - lr: 2.9149e-05 gnorm: 1.08 [11:48:51<12:42:11] +[titan] 2025-10-05 10:23:12,658 - root - INFO - step: 19280 loss: 2.1967 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9400 +[titan] 2025-10-05 10:23:12,658 - root - INFO - lr: 2.9140e-05 gnorm: 1.06 [11:49:02<12:42:00] +[titan] 2025-10-05 10:23:23,594 - root - INFO - step: 19285 loss: 2.1554 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9047 +[titan] 2025-10-05 10:23:23,595 - root - INFO - lr: 2.9131e-05 gnorm: 1.11 [11:49:13<12:41:49] +[titan] 2025-10-05 10:23:34,471 - root - INFO - step: 19290 loss: 2.2097 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2572 global_avg_mtp_loss: 1.9525 +[titan] 2025-10-05 10:23:34,471 - root - INFO - lr: 2.9122e-05 gnorm: 1.08 [11:49:24<12:41:37] +[titan] 2025-10-05 10:23:45,370 - root - INFO - step: 19295 loss: 2.2145 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2590 global_avg_mtp_loss: 1.9555 +[titan] 2025-10-05 10:23:45,370 - root - INFO - lr: 2.9113e-05 gnorm: 1.11 [11:49:35<12:41:26] +[titan] 2025-10-05 10:23:54,147 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:23:56,333 - root - INFO - step: 19300 loss: 2.1524 memory: 118.84GiB(85.28%) tps: 29,890 tflops: 414.68 mfu: 41.93% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9030 +[titan] 2025-10-05 10:23:56,333 - root - INFO - lr: 2.9104e-05 gnorm: 1.12 [11:49:46<12:41:15] +[titan] 2025-10-05 10:24:07,214 - root - INFO - step: 19305 loss: 2.1152 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 10:24:07,214 - root - INFO - lr: 2.9095e-05 gnorm: 1.06 [11:49:57<12:41:04] +[titan] 2025-10-05 10:24:18,150 - root - INFO - step: 19310 loss: 2.1360 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8875 +[titan] 2025-10-05 10:24:18,150 - root - INFO - lr: 2.9086e-05 gnorm: 1.11 [11:50:08<12:40:53] +[titan] 2025-10-05 10:24:29,081 - root - INFO - step: 19315 loss: 2.1682 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9158 +[titan] 2025-10-05 10:24:29,081 - root - INFO - lr: 2.9077e-05 gnorm: 1.08 [11:50:19<12:40:42] +[titan] 2025-10-05 10:24:39,944 - root - INFO - step: 19320 loss: 2.1420 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 10:24:39,945 - root - INFO - lr: 2.9069e-05 gnorm: 1.05 [11:50:30<12:40:30] +[titan] 2025-10-05 10:24:50,812 - root - INFO - step: 19325 loss: 2.1255 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 10:24:50,812 - root - INFO - lr: 2.9060e-05 gnorm: 1.04 [11:50:40<12:40:19] +[titan] 2025-10-05 10:25:01,681 - root - INFO - step: 19330 loss: 2.1276 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 10:25:01,681 - root - INFO - lr: 2.9051e-05 gnorm: 1.04 [11:50:51<12:40:08] +[titan] 2025-10-05 10:25:12,540 - root - INFO - step: 19335 loss: 2.1642 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9118 +[titan] 2025-10-05 10:25:12,540 - root - INFO - lr: 2.9042e-05 gnorm: 1.05 [11:51:02<12:39:57] +[titan] 2025-10-05 10:25:23,440 - root - INFO - step: 19340 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 10:25:23,441 - root - INFO - lr: 2.9033e-05 gnorm: 1.08 [11:51:13<12:39:46] +[titan] 2025-10-05 10:25:34,323 - root - INFO - step: 19345 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8960 +[titan] 2025-10-05 10:25:34,323 - root - INFO - lr: 2.9024e-05 gnorm: 1.06 [11:51:24<12:39:34] +[titan] 2025-10-05 10:25:42,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:25:45,172 - root - INFO - step: 19350 loss: 2.1499 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 10:25:45,173 - root - INFO - lr: 2.9015e-05 gnorm: 1.06 [11:51:35<12:39:23] +[titan] 2025-10-05 10:25:56,041 - root - INFO - step: 19355 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 10:25:56,041 - root - INFO - lr: 2.9006e-05 gnorm: 1.05 [11:51:46<12:39:12] +[titan] 2025-10-05 10:26:06,901 - root - INFO - step: 19360 loss: 2.2092 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9527 +[titan] 2025-10-05 10:26:06,901 - root - INFO - lr: 2.8997e-05 gnorm: 1.09 [11:51:56<12:39:01] +[titan] 2025-10-05 10:26:17,768 - root - INFO - step: 19365 loss: 2.2565 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2628 global_avg_mtp_loss: 1.9937 +[titan] 2025-10-05 10:26:17,768 - root - INFO - lr: 2.8988e-05 gnorm: 1.06 [11:52:07<12:38:50] +[titan] 2025-10-05 10:26:28,693 - root - INFO - step: 19370 loss: 2.1913 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9380 +[titan] 2025-10-05 10:26:28,693 - root - INFO - lr: 2.8980e-05 gnorm: 1.07 [11:52:18<12:38:38] +[titan] 2025-10-05 10:26:39,550 - root - INFO - step: 19375 loss: 2.2098 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9524 +[titan] 2025-10-05 10:26:39,550 - root - INFO - lr: 2.8971e-05 gnorm: 1.10 [11:52:29<12:38:27] +[titan] 2025-10-05 10:26:50,433 - root - INFO - step: 19380 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 10:26:50,433 - root - INFO - lr: 2.8962e-05 gnorm: 1.07 [11:52:40<12:38:16] +[titan] 2025-10-05 10:27:01,279 - root - INFO - step: 19385 loss: 2.1129 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 10:27:01,279 - root - INFO - lr: 2.8953e-05 gnorm: 1.04 [11:52:51<12:38:05] +[titan] 2025-10-05 10:27:12,141 - root - INFO - step: 19390 loss: 2.1980 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2550 global_avg_mtp_loss: 1.9430 +[titan] 2025-10-05 10:27:12,141 - root - INFO - lr: 2.8944e-05 gnorm: 1.10 [11:53:02<12:37:54] +[titan] 2025-10-05 10:27:23,043 - root - INFO - step: 19395 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9220 +[titan] 2025-10-05 10:27:23,043 - root - INFO - lr: 2.8935e-05 gnorm: 1.07 [11:53:13<12:37:42] +[titan] 2025-10-05 10:27:31,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:27:33,895 - root - INFO - step: 19400 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:27:33,895 - root - INFO - lr: 2.8926e-05 gnorm: 1.06 [11:53:23<12:37:31] +[titan] 2025-10-05 10:27:44,768 - root - INFO - step: 19405 loss: 2.0933 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 10:27:44,768 - root - INFO - lr: 2.8917e-05 gnorm: 1.05 [11:53:34<12:37:20] +[titan] 2025-10-05 10:27:55,669 - root - INFO - step: 19410 loss: 2.1568 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 10:27:55,669 - root - INFO - lr: 2.8908e-05 gnorm: 1.05 [11:53:45<12:37:09] +[titan] 2025-10-05 10:28:06,538 - root - INFO - step: 19415 loss: 2.1727 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9212 +[titan] 2025-10-05 10:28:06,538 - root - INFO - lr: 2.8899e-05 gnorm: 1.07 [11:53:56<12:36:58] +[titan] 2025-10-05 10:28:17,407 - root - INFO - step: 19420 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:28:17,407 - root - INFO - lr: 2.8890e-05 gnorm: 1.09 [11:54:07<12:36:46] +[titan] 2025-10-05 10:28:28,336 - root - INFO - step: 19425 loss: 2.2069 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2565 global_avg_mtp_loss: 1.9504 +[titan] 2025-10-05 10:28:28,336 - root - INFO - lr: 2.8882e-05 gnorm: 1.11 [11:54:18<12:36:35] +[titan] 2025-10-05 10:28:39,218 - root - INFO - step: 19430 loss: 2.1045 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8601 +[titan] 2025-10-05 10:28:39,218 - root - INFO - lr: 2.8873e-05 gnorm: 1.08 [11:54:29<12:36:24] +[titan] 2025-10-05 10:28:50,126 - root - INFO - step: 19435 loss: 2.1868 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9318 +[titan] 2025-10-05 10:28:50,127 - root - INFO - lr: 2.8864e-05 gnorm: 1.07 [11:54:40<12:36:13] +[titan] 2025-10-05 10:29:01,033 - root - INFO - step: 19440 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9099 +[titan] 2025-10-05 10:29:01,033 - root - INFO - lr: 2.8855e-05 gnorm: 1.04 [11:54:51<12:36:02] +[titan] 2025-10-05 10:29:11,909 - root - INFO - step: 19445 loss: 2.1656 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9141 +[titan] 2025-10-05 10:29:11,909 - root - INFO - lr: 2.8846e-05 gnorm: 1.06 [11:55:01<12:35:51] +[titan] 2025-10-05 10:29:20,623 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:29:22,834 - root - INFO - step: 19450 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.8914 +[titan] 2025-10-05 10:29:22,834 - root - INFO - lr: 2.8837e-05 gnorm: 1.04 [11:55:12<12:35:39] +[titan] 2025-10-05 10:29:33,814 - root - INFO - step: 19455 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 29,845 tflops: 414.05 mfu: 41.87% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:29:33,814 - root - INFO - lr: 2.8828e-05 gnorm: 1.06 [11:55:23<12:35:28] +[titan] 2025-10-05 10:29:36,173 - root - INFO - Dumping profiler traces at step 19456 +[titan] 2025-10-05 10:29:36,212 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:29:44,902 - root - INFO - step: 19460 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 29,553 tflops: 410.00 mfu: 41.46% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:29:44,902 - root - INFO - lr: 2.8819e-05 gnorm: 1.04 [11:55:34<12:35:17] +[titan] 2025-10-05 10:29:55,774 - root - INFO - step: 19465 loss: 2.1881 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9329 +[titan] 2025-10-05 10:29:55,774 - root - INFO - lr: 2.8810e-05 gnorm: 1.05 [11:55:45<12:35:06] +[titan] 2025-10-05 10:30:06,632 - root - INFO - step: 19470 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8999 +[titan] 2025-10-05 10:30:06,632 - root - INFO - lr: 2.8801e-05 gnorm: 1.06 [11:55:56<12:34:55] +[titan] 2025-10-05 10:30:17,527 - root - INFO - step: 19475 loss: 2.0697 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8296 +[titan] 2025-10-05 10:30:17,527 - root - INFO - lr: 2.8792e-05 gnorm: 1.03 [11:56:07<12:34:44] +[titan] 2025-10-05 10:30:28,427 - root - INFO - step: 19480 loss: 2.2055 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9494 +[titan] 2025-10-05 10:30:28,427 - root - INFO - lr: 2.8784e-05 gnorm: 1.06 [11:56:18<12:34:33] +[titan] 2025-10-05 10:30:39,303 - root - INFO - step: 19485 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 10:30:39,303 - root - INFO - lr: 2.8775e-05 gnorm: 1.05 [11:56:29<12:34:21] +[titan] 2025-10-05 10:30:50,176 - root - INFO - step: 19490 loss: 2.1840 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:30:50,176 - root - INFO - lr: 2.8766e-05 gnorm: 1.09 [11:56:40<12:34:10] +[titan] 2025-10-05 10:31:01,024 - root - INFO - step: 19495 loss: 2.1843 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:31:01,024 - root - INFO - lr: 2.8757e-05 gnorm: 1.06 [11:56:51<12:33:59] +[titan] 2025-10-05 10:31:09,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:31:11,884 - root - INFO - step: 19500 loss: 2.1533 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9032 +[titan] 2025-10-05 10:31:11,884 - root - INFO - lr: 2.8748e-05 gnorm: 1.04 [11:57:01<12:33:48] +[titan] 2025-10-05 10:31:22,776 - root - INFO - step: 19505 loss: 2.1755 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9228 +[titan] 2025-10-05 10:31:22,776 - root - INFO - lr: 2.8739e-05 gnorm: 1.05 [11:57:12<12:33:37] +[titan] 2025-10-05 10:31:33,671 - root - INFO - step: 19510 loss: 2.1889 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9346 +[titan] 2025-10-05 10:31:33,672 - root - INFO - lr: 2.8730e-05 gnorm: 1.09 [11:57:23<12:33:25] +[titan] 2025-10-05 10:31:44,519 - root - INFO - step: 19515 loss: 2.1331 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8848 +[titan] 2025-10-05 10:31:44,519 - root - INFO - lr: 2.8721e-05 gnorm: 1.09 [11:57:34<12:33:14] +[titan] 2025-10-05 10:31:55,368 - root - INFO - step: 19520 loss: 2.1532 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:31:55,368 - root - INFO - lr: 2.8712e-05 gnorm: 1.09 [11:57:45<12:33:03] +[titan] 2025-10-05 10:32:06,223 - root - INFO - step: 19525 loss: 2.1590 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 10:32:06,223 - root - INFO - lr: 2.8703e-05 gnorm: 1.04 [11:57:56<12:32:52] +[titan] 2025-10-05 10:32:17,080 - root - INFO - step: 19530 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8988 +[titan] 2025-10-05 10:32:17,081 - root - INFO - lr: 2.8694e-05 gnorm: 1.03 [11:58:07<12:32:40] +[titan] 2025-10-05 10:32:27,960 - root - INFO - step: 19535 loss: 2.2115 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2571 global_avg_mtp_loss: 1.9544 +[titan] 2025-10-05 10:32:27,960 - root - INFO - lr: 2.8686e-05 gnorm: 1.12 [11:58:18<12:32:29] +[titan] 2025-10-05 10:32:38,837 - root - INFO - step: 19540 loss: 2.1660 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9129 +[titan] 2025-10-05 10:32:38,837 - root - INFO - lr: 2.8677e-05 gnorm: 1.05 [11:58:28<12:32:18] +[titan] 2025-10-05 10:32:49,713 - root - INFO - step: 19545 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 10:32:49,713 - root - INFO - lr: 2.8668e-05 gnorm: 1.05 [11:58:39<12:32:07] +[titan] 2025-10-05 10:32:58,420 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:33:00,607 - root - INFO - step: 19550 loss: 2.1396 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8912 +[titan] 2025-10-05 10:33:00,607 - root - INFO - lr: 2.8659e-05 gnorm: 1.07 [11:58:50<12:31:56] +[titan] 2025-10-05 10:33:11,463 - root - INFO - step: 19555 loss: 2.1426 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8942 +[titan] 2025-10-05 10:33:11,463 - root - INFO - lr: 2.8650e-05 gnorm: 1.05 [11:59:01<12:31:45] +[titan] 2025-10-05 10:33:22,332 - root - INFO - step: 19560 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2563 global_avg_mtp_loss: 1.9357 +[titan] 2025-10-05 10:33:22,332 - root - INFO - lr: 2.8641e-05 gnorm: 1.08 [11:59:12<12:31:33] +[titan] 2025-10-05 10:33:33,247 - root - INFO - step: 19565 loss: 2.1871 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2556 global_avg_mtp_loss: 1.9315 +[titan] 2025-10-05 10:33:33,247 - root - INFO - lr: 2.8632e-05 gnorm: 1.12 [11:59:23<12:31:22] +[titan] 2025-10-05 10:33:44,148 - root - INFO - step: 19570 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:33:44,148 - root - INFO - lr: 2.8623e-05 gnorm: 1.14 [11:59:34<12:31:11] +[titan] 2025-10-05 10:33:55,019 - root - INFO - step: 19575 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 10:33:55,019 - root - INFO - lr: 2.8614e-05 gnorm: 1.09 [11:59:45<12:31:00] +[titan] 2025-10-05 10:34:05,890 - root - INFO - step: 19580 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8641 +[titan] 2025-10-05 10:34:05,890 - root - INFO - lr: 2.8605e-05 gnorm: 1.05 [11:59:55<12:30:49] +[titan] 2025-10-05 10:34:16,751 - root - INFO - step: 19585 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 10:34:16,752 - root - INFO - lr: 2.8596e-05 gnorm: 1.07 [12:00:06<12:30:37] +[titan] 2025-10-05 10:34:27,618 - root - INFO - step: 19590 loss: 2.1741 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9213 +[titan] 2025-10-05 10:34:27,619 - root - INFO - lr: 2.8588e-05 gnorm: 1.06 [12:00:17<12:30:26] +[titan] 2025-10-05 10:34:38,478 - root - INFO - step: 19595 loss: 2.1817 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9289 +[titan] 2025-10-05 10:34:38,478 - root - INFO - lr: 2.8579e-05 gnorm: 1.07 [12:00:28<12:30:15] +[titan] 2025-10-05 10:34:47,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:34:49,385 - root - INFO - step: 19600 loss: 2.1233 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 10:34:49,385 - root - INFO - lr: 2.8570e-05 gnorm: 1.04 [12:00:39<12:30:04] +[titan] 2025-10-05 10:35:00,251 - root - INFO - step: 19605 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9248 +[titan] 2025-10-05 10:35:00,251 - root - INFO - lr: 2.8561e-05 gnorm: 1.04 [12:00:50<12:29:53] +[titan] 2025-10-05 10:35:11,113 - root - INFO - step: 19610 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9104 +[titan] 2025-10-05 10:35:11,113 - root - INFO - lr: 2.8552e-05 gnorm: 1.09 [12:01:01<12:29:41] +[titan] 2025-10-05 10:35:21,983 - root - INFO - step: 19615 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 10:35:21,983 - root - INFO - lr: 2.8543e-05 gnorm: 1.05 [12:01:12<12:29:30] +[titan] 2025-10-05 10:35:32,879 - root - INFO - step: 19620 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 10:35:32,879 - root - INFO - lr: 2.8534e-05 gnorm: 1.11 [12:01:22<12:29:19] +[titan] 2025-10-05 10:35:43,764 - root - INFO - step: 19625 loss: 2.1033 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8582 +[titan] 2025-10-05 10:35:43,764 - root - INFO - lr: 2.8525e-05 gnorm: 1.05 [12:01:33<12:29:08] +[titan] 2025-10-05 10:35:54,636 - root - INFO - step: 19630 loss: 2.2204 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2599 global_avg_mtp_loss: 1.9605 +[titan] 2025-10-05 10:35:54,637 - root - INFO - lr: 2.8516e-05 gnorm: 1.10 [12:01:44<12:28:57] +[titan] 2025-10-05 10:36:05,532 - root - INFO - step: 19635 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 10:36:05,532 - root - INFO - lr: 2.8507e-05 gnorm: 1.02 [12:01:55<12:28:46] +[titan] 2025-10-05 10:36:16,411 - root - INFO - step: 19640 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 10:36:16,411 - root - INFO - lr: 2.8498e-05 gnorm: 1.05 [12:02:06<12:28:34] +[titan] 2025-10-05 10:36:27,270 - root - INFO - step: 19645 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 10:36:27,270 - root - INFO - lr: 2.8489e-05 gnorm: 1.06 [12:02:17<12:28:23] +[titan] 2025-10-05 10:36:35,978 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:36:38,157 - root - INFO - step: 19650 loss: 2.0890 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8470 +[titan] 2025-10-05 10:36:38,157 - root - INFO - lr: 2.8481e-05 gnorm: 1.01 [12:02:28<12:28:12] +[titan] 2025-10-05 10:36:49,018 - root - INFO - step: 19655 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 10:36:49,019 - root - INFO - lr: 2.8472e-05 gnorm: 1.07 [12:02:39<12:28:01] +[titan] 2025-10-05 10:36:59,878 - root - INFO - step: 19660 loss: 2.2289 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2588 global_avg_mtp_loss: 1.9701 +[titan] 2025-10-05 10:36:59,879 - root - INFO - lr: 2.8463e-05 gnorm: 1.08 [12:02:49<12:27:50] +[titan] 2025-10-05 10:37:10,783 - root - INFO - step: 19665 loss: 2.1435 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8951 +[titan] 2025-10-05 10:37:10,783 - root - INFO - lr: 2.8454e-05 gnorm: 1.08 [12:03:00<12:27:38] +[titan] 2025-10-05 10:37:21,656 - root - INFO - step: 19670 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8722 +[titan] 2025-10-05 10:37:21,657 - root - INFO - lr: 2.8445e-05 gnorm: 1.08 [12:03:11<12:27:27] +[titan] 2025-10-05 10:37:32,556 - root - INFO - step: 19675 loss: 2.2272 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2639 global_avg_mtp_loss: 1.9633 +[titan] 2025-10-05 10:37:32,556 - root - INFO - lr: 2.8436e-05 gnorm: 1.12 [12:03:22<12:27:16] +[titan] 2025-10-05 10:37:43,429 - root - INFO - step: 19680 loss: 2.1453 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:37:43,429 - root - INFO - lr: 2.8427e-05 gnorm: 1.06 [12:03:33<12:27:05] +[titan] 2025-10-05 10:37:54,290 - root - INFO - step: 19685 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9408 +[titan] 2025-10-05 10:37:54,290 - root - INFO - lr: 2.8418e-05 gnorm: 1.10 [12:03:44<12:26:54] +[titan] 2025-10-05 10:38:05,156 - root - INFO - step: 19690 loss: 2.1517 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:38:05,157 - root - INFO - lr: 2.8409e-05 gnorm: 1.07 [12:03:55<12:26:42] +[titan] 2025-10-05 10:38:16,025 - root - INFO - step: 19695 loss: 2.1827 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9296 +[titan] 2025-10-05 10:38:16,025 - root - INFO - lr: 2.8400e-05 gnorm: 1.11 [12:04:06<12:26:31] +[titan] 2025-10-05 10:38:24,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:38:26,916 - root - INFO - step: 19700 loss: 2.1777 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9255 +[titan] 2025-10-05 10:38:26,916 - root - INFO - lr: 2.8391e-05 gnorm: 1.06 [12:04:16<12:26:20] +[titan] 2025-10-05 10:38:37,813 - root - INFO - step: 19705 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 10:38:37,813 - root - INFO - lr: 2.8382e-05 gnorm: 1.04 [12:04:27<12:26:09] +[titan] 2025-10-05 10:38:48,686 - root - INFO - step: 19710 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8558 +[titan] 2025-10-05 10:38:48,686 - root - INFO - lr: 2.8374e-05 gnorm: 1.10 [12:04:38<12:25:58] +[titan] 2025-10-05 10:38:59,549 - root - INFO - step: 19715 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 10:38:59,549 - root - INFO - lr: 2.8365e-05 gnorm: 1.07 [12:04:49<12:25:46] +[titan] 2025-10-05 10:39:10,404 - root - INFO - step: 19720 loss: 2.2251 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2580 global_avg_mtp_loss: 1.9671 +[titan] 2025-10-05 10:39:10,404 - root - INFO - lr: 2.8356e-05 gnorm: 1.05 [12:05:00<12:25:35] +[titan] 2025-10-05 10:39:21,281 - root - INFO - step: 19725 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8864 +[titan] 2025-10-05 10:39:21,281 - root - INFO - lr: 2.8347e-05 gnorm: 1.06 [12:05:11<12:25:24] +[titan] 2025-10-05 10:39:32,209 - root - INFO - step: 19730 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9186 +[titan] 2025-10-05 10:39:32,209 - root - INFO - lr: 2.8338e-05 gnorm: 1.05 [12:05:22<12:25:13] +[titan] 2025-10-05 10:39:43,074 - root - INFO - step: 19735 loss: 2.1410 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8919 +[titan] 2025-10-05 10:39:43,074 - root - INFO - lr: 2.8329e-05 gnorm: 1.09 [12:05:33<12:25:02] +[titan] 2025-10-05 10:39:53,944 - root - INFO - step: 19740 loss: 2.1920 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2544 global_avg_mtp_loss: 1.9376 +[titan] 2025-10-05 10:39:53,944 - root - INFO - lr: 2.8320e-05 gnorm: 1.05 [12:05:43<12:24:51] +[titan] 2025-10-05 10:40:04,859 - root - INFO - step: 19745 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 10:40:04,859 - root - INFO - lr: 2.8311e-05 gnorm: 1.04 [12:05:54<12:24:39] +[titan] 2025-10-05 10:40:13,560 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:40:15,748 - root - INFO - step: 19750 loss: 2.1520 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 10:40:15,748 - root - INFO - lr: 2.8302e-05 gnorm: 1.04 [12:06:05<12:24:28] +[titan] 2025-10-05 10:40:26,639 - root - INFO - step: 19755 loss: 2.1342 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8866 +[titan] 2025-10-05 10:40:26,639 - root - INFO - lr: 2.8293e-05 gnorm: 1.04 [12:06:16<12:24:17] +[titan] 2025-10-05 10:40:37,586 - root - INFO - step: 19760 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,933 tflops: 415.28 mfu: 41.99% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 10:40:37,587 - root - INFO - lr: 2.8284e-05 gnorm: 1.06 [12:06:27<12:24:06] +[titan] 2025-10-05 10:40:48,488 - root - INFO - step: 19765 loss: 2.1309 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8834 +[titan] 2025-10-05 10:40:48,489 - root - INFO - lr: 2.8275e-05 gnorm: 1.06 [12:06:38<12:23:55] +[titan] 2025-10-05 10:40:59,376 - root - INFO - step: 19770 loss: 2.2031 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2560 global_avg_mtp_loss: 1.9471 +[titan] 2025-10-05 10:40:59,377 - root - INFO - lr: 2.8266e-05 gnorm: 1.07 [12:06:49<12:23:44] +[titan] 2025-10-05 10:41:10,261 - root - INFO - step: 19775 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 10:41:10,261 - root - INFO - lr: 2.8258e-05 gnorm: 1.05 [12:07:00<12:23:32] +[titan] 2025-10-05 10:41:21,161 - root - INFO - step: 19780 loss: 2.2202 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2586 global_avg_mtp_loss: 1.9616 +[titan] 2025-10-05 10:41:21,161 - root - INFO - lr: 2.8249e-05 gnorm: 1.08 [12:07:11<12:23:21] +[titan] 2025-10-05 10:41:32,049 - root - INFO - step: 19785 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.8965 +[titan] 2025-10-05 10:41:32,049 - root - INFO - lr: 2.8240e-05 gnorm: 1.07 [12:07:22<12:23:10] +[titan] 2025-10-05 10:41:42,943 - root - INFO - step: 19790 loss: 2.0669 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 10:41:42,943 - root - INFO - lr: 2.8231e-05 gnorm: 1.04 [12:07:32<12:22:59] +[titan] 2025-10-05 10:41:53,847 - root - INFO - step: 19795 loss: 2.2087 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 10:41:53,847 - root - INFO - lr: 2.8222e-05 gnorm: 1.09 [12:07:43<12:22:48] +[titan] 2025-10-05 10:42:02,542 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:42:04,726 - root - INFO - step: 19800 loss: 2.1645 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9124 +[titan] 2025-10-05 10:42:04,726 - root - INFO - lr: 2.8213e-05 gnorm: 1.07 [12:07:54<12:22:37] +[titan] 2025-10-05 10:42:15,602 - root - INFO - step: 19805 loss: 2.1292 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8809 +[titan] 2025-10-05 10:42:15,602 - root - INFO - lr: 2.8204e-05 gnorm: 1.06 [12:08:05<12:22:25] +[titan] 2025-10-05 10:42:26,476 - root - INFO - step: 19810 loss: 2.1988 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9424 +[titan] 2025-10-05 10:42:26,476 - root - INFO - lr: 2.8195e-05 gnorm: 1.07 [12:08:16<12:22:14] +[titan] 2025-10-05 10:42:37,355 - root - INFO - step: 19815 loss: 2.1111 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 10:42:37,355 - root - INFO - lr: 2.8186e-05 gnorm: 1.08 [12:08:27<12:22:03] +[titan] 2025-10-05 10:42:48,237 - root - INFO - step: 19820 loss: 2.1257 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8783 +[titan] 2025-10-05 10:42:48,237 - root - INFO - lr: 2.8177e-05 gnorm: 1.07 [12:08:38<12:21:52] +[titan] 2025-10-05 10:42:59,141 - root - INFO - step: 19825 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:42:59,142 - root - INFO - lr: 2.8168e-05 gnorm: 1.06 [12:08:49<12:21:41] +[titan] 2025-10-05 10:43:09,994 - root - INFO - step: 19830 loss: 2.1713 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9190 +[titan] 2025-10-05 10:43:09,994 - root - INFO - lr: 2.8159e-05 gnorm: 1.12 [12:09:00<12:21:29] +[titan] 2025-10-05 10:43:20,854 - root - INFO - step: 19835 loss: 2.1789 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2538 global_avg_mtp_loss: 1.9251 +[titan] 2025-10-05 10:43:20,854 - root - INFO - lr: 2.8151e-05 gnorm: 1.09 [12:09:10<12:21:18] +[titan] 2025-10-05 10:43:31,720 - root - INFO - step: 19840 loss: 2.1270 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:43:31,720 - root - INFO - lr: 2.8142e-05 gnorm: 1.04 [12:09:21<12:21:07] +[titan] 2025-10-05 10:43:42,583 - root - INFO - step: 19845 loss: 2.1653 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9125 +[titan] 2025-10-05 10:43:42,583 - root - INFO - lr: 2.8133e-05 gnorm: 1.03 [12:09:32<12:20:56] +[titan] 2025-10-05 10:43:51,289 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:43:53,475 - root - INFO - step: 19850 loss: 2.1376 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2489 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 10:43:53,475 - root - INFO - lr: 2.8124e-05 gnorm: 1.05 [12:09:43<12:20:45] +[titan] 2025-10-05 10:44:04,341 - root - INFO - step: 19855 loss: 2.1956 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9401 +[titan] 2025-10-05 10:44:04,341 - root - INFO - lr: 2.8115e-05 gnorm: 1.09 [12:09:54<12:20:34] +[titan] 2025-10-05 10:44:15,250 - root - INFO - step: 19860 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9089 +[titan] 2025-10-05 10:44:15,250 - root - INFO - lr: 2.8106e-05 gnorm: 1.09 [12:10:05<12:20:22] +[titan] 2025-10-05 10:44:26,122 - root - INFO - step: 19865 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 10:44:26,122 - root - INFO - lr: 2.8097e-05 gnorm: 1.06 [12:10:16<12:20:11] +[titan] 2025-10-05 10:44:37,015 - root - INFO - step: 19870 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 10:44:37,015 - root - INFO - lr: 2.8088e-05 gnorm: 1.07 [12:10:27<12:20:00] +[titan] 2025-10-05 10:44:47,890 - root - INFO - step: 19875 loss: 2.1479 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8987 +[titan] 2025-10-05 10:44:47,890 - root - INFO - lr: 2.8079e-05 gnorm: 1.11 [12:10:37<12:19:49] +[titan] 2025-10-05 10:44:58,757 - root - INFO - step: 19880 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 10:44:58,757 - root - INFO - lr: 2.8070e-05 gnorm: 1.06 [12:10:48<12:19:38] +[titan] 2025-10-05 10:45:09,633 - root - INFO - step: 19885 loss: 2.1570 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9058 +[titan] 2025-10-05 10:45:09,633 - root - INFO - lr: 2.8061e-05 gnorm: 1.08 [12:10:59<12:19:26] +[titan] 2025-10-05 10:45:20,533 - root - INFO - step: 19890 loss: 2.1170 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 10:45:20,533 - root - INFO - lr: 2.8052e-05 gnorm: 1.07 [12:11:10<12:19:15] +[titan] 2025-10-05 10:45:31,359 - root - INFO - step: 19895 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2529 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 10:45:31,360 - root - INFO - lr: 2.8043e-05 gnorm: 1.10 [12:11:21<12:19:04] +[titan] 2025-10-05 10:45:40,049 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:45:42,231 - root - INFO - step: 19900 loss: 2.1514 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9011 +[titan] 2025-10-05 10:45:42,231 - root - INFO - lr: 2.8035e-05 gnorm: 1.08 [12:11:32<12:18:53] +[titan] 2025-10-05 10:45:53,088 - root - INFO - step: 19905 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 10:45:53,088 - root - INFO - lr: 2.8026e-05 gnorm: 1.03 [12:11:43<12:18:42] +[titan] 2025-10-05 10:46:03,910 - root - INFO - step: 19910 loss: 2.1849 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9309 +[titan] 2025-10-05 10:46:03,910 - root - INFO - lr: 2.8017e-05 gnorm: 1.08 [12:11:53<12:18:30] +[titan] 2025-10-05 10:46:14,770 - root - INFO - step: 19915 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 10:46:14,771 - root - INFO - lr: 2.8008e-05 gnorm: 1.08 [12:12:04<12:18:19] +[titan] 2025-10-05 10:46:25,652 - root - INFO - step: 19920 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 10:46:25,652 - root - INFO - lr: 2.7999e-05 gnorm: 1.09 [12:12:15<12:18:08] +[titan] 2025-10-05 10:46:36,496 - root - INFO - step: 19925 loss: 2.2094 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2573 global_avg_mtp_loss: 1.9522 +[titan] 2025-10-05 10:46:36,496 - root - INFO - lr: 2.7990e-05 gnorm: 1.06 [12:12:26<12:17:57] +[titan] 2025-10-05 10:46:47,345 - root - INFO - step: 19930 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 10:46:47,345 - root - INFO - lr: 2.7981e-05 gnorm: 1.11 [12:12:37<12:17:46] +[titan] 2025-10-05 10:46:58,221 - root - INFO - step: 19935 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 10:46:58,221 - root - INFO - lr: 2.7972e-05 gnorm: 1.05 [12:12:48<12:17:34] +[titan] 2025-10-05 10:47:09,102 - root - INFO - step: 19940 loss: 2.1225 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 10:47:09,102 - root - INFO - lr: 2.7963e-05 gnorm: 1.05 [12:12:59<12:17:23] +[titan] 2025-10-05 10:47:19,968 - root - INFO - step: 19945 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8581 +[titan] 2025-10-05 10:47:19,968 - root - INFO - lr: 2.7954e-05 gnorm: 1.09 [12:13:10<12:17:12] +[titan] 2025-10-05 10:47:28,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:47:30,850 - root - INFO - step: 19950 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8703 +[titan] 2025-10-05 10:47:30,850 - root - INFO - lr: 2.7945e-05 gnorm: 1.07 [12:13:20<12:17:01] +[titan] 2025-10-05 10:47:41,822 - root - INFO - step: 19955 loss: 2.1253 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.34 mfu: 41.90% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8777 +[titan] 2025-10-05 10:47:41,822 - root - INFO - lr: 2.7936e-05 gnorm: 1.09 [12:13:31<12:16:50] +[titan] 2025-10-05 10:47:52,686 - root - INFO - step: 19960 loss: 2.1316 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 10:47:52,686 - root - INFO - lr: 2.7927e-05 gnorm: 1.11 [12:13:42<12:16:39] +[titan] 2025-10-05 10:48:03,639 - root - INFO - step: 19965 loss: 2.1229 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8756 +[titan] 2025-10-05 10:48:03,639 - root - INFO - lr: 2.7919e-05 gnorm: 1.08 [12:13:53<12:16:28] +[titan] 2025-10-05 10:48:10,350 - root - INFO - Dumping profiler traces at step 19968 +[titan] 2025-10-05 10:48:10,390 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 10:48:14,746 - root - INFO - step: 19970 loss: 2.1632 memory: 118.84GiB(85.28%) tps: 29,504 tflops: 409.32 mfu: 41.39% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9112 +[titan] 2025-10-05 10:48:14,746 - root - INFO - lr: 2.7910e-05 gnorm: 1.01 [12:14:04<12:16:17] +[titan] 2025-10-05 10:48:25,610 - root - INFO - step: 19975 loss: 2.2036 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2577 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 10:48:25,610 - root - INFO - lr: 2.7901e-05 gnorm: 1.06 [12:14:15<12:16:05] +[titan] 2025-10-05 10:48:36,506 - root - INFO - step: 19980 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 10:48:36,507 - root - INFO - lr: 2.7892e-05 gnorm: 1.07 [12:14:26<12:15:54] +[titan] 2025-10-05 10:48:47,491 - root - INFO - step: 19985 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,831 tflops: 413.86 mfu: 41.85% global_avg_ntp_loss: 0.2542 global_avg_mtp_loss: 1.9295 +[titan] 2025-10-05 10:48:47,491 - root - INFO - lr: 2.7883e-05 gnorm: 1.06 [12:14:37<12:15:43] +[titan] 2025-10-05 10:48:58,374 - root - INFO - step: 19990 loss: 2.1671 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:48:58,374 - root - INFO - lr: 2.7874e-05 gnorm: 1.08 [12:14:48<12:15:32] +[titan] 2025-10-05 10:49:09,251 - root - INFO - step: 19995 loss: 2.1696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2583 global_avg_mtp_loss: 1.9113 +[titan] 2025-10-05 10:49:09,251 - root - INFO - lr: 2.7865e-05 gnorm: 1.06 [12:14:59<12:15:21] +[titan] 2025-10-05 10:49:17,928 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:49:20,117 - root - INFO - step: 20000 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8613 +[titan] 2025-10-05 10:49:20,118 - root - INFO - lr: 2.7856e-05 gnorm: 1.09 [12:15:10<12:15:10] +[titan] 2025-10-05 10:49:20,118 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 10:49:39,415 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 10:49:39,415 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 19.30 seconds. +[titan] 2025-10-05 10:51:35,525 - root - INFO - step: 20005 loss: 2.1785 memory: 118.84GiB(85.28%) tps: 2,420 tflops: 33.57 mfu: 3.39% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9246 +[titan] 2025-10-05 10:51:35,525 - root - INFO - lr: 2.7847e-05 gnorm: 1.02 [12:17:25<12:17:03] +[titan] 2025-10-05 10:51:46,302 - root - INFO - step: 20010 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2548 global_avg_mtp_loss: 1.9317 +[titan] 2025-10-05 10:51:46,302 - root - INFO - lr: 2.7838e-05 gnorm: 1.08 [12:17:36<12:16:52] +[titan] 2025-10-05 10:51:57,112 - root - INFO - step: 20015 loss: 2.1049 memory: 118.84GiB(85.28%) tps: 30,313 tflops: 420.55 mfu: 42.52% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 10:51:57,113 - root - INFO - lr: 2.7829e-05 gnorm: 1.07 [12:17:47<12:16:40] +[titan] 2025-10-05 10:52:07,924 - root - INFO - step: 20020 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.47 mfu: 42.51% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8728 +[titan] 2025-10-05 10:52:07,925 - root - INFO - lr: 2.7820e-05 gnorm: 1.05 [12:17:57<12:16:29] +[titan] 2025-10-05 10:52:18,739 - root - INFO - step: 20025 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 10:52:18,739 - root - INFO - lr: 2.7811e-05 gnorm: 1.08 [12:18:08<12:16:18] +[titan] 2025-10-05 10:52:29,561 - root - INFO - step: 20030 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8674 +[titan] 2025-10-05 10:52:29,562 - root - INFO - lr: 2.7803e-05 gnorm: 1.05 [12:18:19<12:16:06] +[titan] 2025-10-05 10:52:40,397 - root - INFO - step: 20035 loss: 2.1681 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9151 +[titan] 2025-10-05 10:52:40,397 - root - INFO - lr: 2.7794e-05 gnorm: 1.09 [12:18:30<12:15:55] +[titan] 2025-10-05 10:52:51,270 - root - INFO - step: 20040 loss: 2.1610 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:52:51,270 - root - INFO - lr: 2.7785e-05 gnorm: 1.08 [12:18:41<12:15:44] +[titan] 2025-10-05 10:53:02,099 - root - INFO - step: 20045 loss: 2.1535 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9033 +[titan] 2025-10-05 10:53:02,099 - root - INFO - lr: 2.7776e-05 gnorm: 1.06 [12:18:52<12:15:33] +[titan] 2025-10-05 10:53:10,777 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:53:12,953 - root - INFO - step: 20050 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 10:53:12,953 - root - INFO - lr: 2.7767e-05 gnorm: 1.06 [12:19:02<12:15:21] +[titan] 2025-10-05 10:53:23,781 - root - INFO - step: 20055 loss: 2.1359 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8880 +[titan] 2025-10-05 10:53:23,781 - root - INFO - lr: 2.7758e-05 gnorm: 1.07 [12:19:13<12:15:10] +[titan] 2025-10-05 10:53:34,615 - root - INFO - step: 20060 loss: 2.2260 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2591 global_avg_mtp_loss: 1.9668 +[titan] 2025-10-05 10:53:34,615 - root - INFO - lr: 2.7749e-05 gnorm: 1.08 [12:19:24<12:14:59] +[titan] 2025-10-05 10:53:45,482 - root - INFO - step: 20065 loss: 2.1538 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9039 +[titan] 2025-10-05 10:53:45,482 - root - INFO - lr: 2.7740e-05 gnorm: 1.07 [12:19:35<12:14:47] +[titan] 2025-10-05 10:53:56,339 - root - INFO - step: 20070 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 10:53:56,339 - root - INFO - lr: 2.7731e-05 gnorm: 1.04 [12:19:46<12:14:36] +[titan] 2025-10-05 10:54:07,188 - root - INFO - step: 20075 loss: 2.1991 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2555 global_avg_mtp_loss: 1.9436 +[titan] 2025-10-05 10:54:07,188 - root - INFO - lr: 2.7722e-05 gnorm: 1.06 [12:19:57<12:14:25] +[titan] 2025-10-05 10:54:18,059 - root - INFO - step: 20080 loss: 2.1485 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8996 +[titan] 2025-10-05 10:54:18,059 - root - INFO - lr: 2.7713e-05 gnorm: 1.06 [12:20:08<12:14:14] +[titan] 2025-10-05 10:54:28,894 - root - INFO - step: 20085 loss: 2.2267 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2594 global_avg_mtp_loss: 1.9673 +[titan] 2025-10-05 10:54:28,894 - root - INFO - lr: 2.7704e-05 gnorm: 1.85 [12:20:18<12:14:02] +[titan] 2025-10-05 10:54:39,760 - root - INFO - step: 20090 loss: 2.1383 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 10:54:39,760 - root - INFO - lr: 2.7695e-05 gnorm: 1.09 [12:20:29<12:13:51] +[titan] 2025-10-05 10:54:50,700 - root - INFO - step: 20095 loss: 2.1379 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8897 +[titan] 2025-10-05 10:54:50,700 - root - INFO - lr: 2.7687e-05 gnorm: 1.04 [12:20:40<12:13:40] +[titan] 2025-10-05 10:54:59,422 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:55:01,599 - root - INFO - step: 20100 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 10:55:01,599 - root - INFO - lr: 2.7678e-05 gnorm: 1.11 [12:20:51<12:13:29] +[titan] 2025-10-05 10:55:12,449 - root - INFO - step: 20105 loss: 2.1710 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9193 +[titan] 2025-10-05 10:55:12,449 - root - INFO - lr: 2.7669e-05 gnorm: 1.03 [12:21:02<12:13:18] +[titan] 2025-10-05 10:55:23,313 - root - INFO - step: 20110 loss: 2.0931 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 10:55:23,313 - root - INFO - lr: 2.7660e-05 gnorm: 1.04 [12:21:13<12:13:06] +[titan] 2025-10-05 10:55:34,176 - root - INFO - step: 20115 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 10:55:34,176 - root - INFO - lr: 2.7651e-05 gnorm: 1.05 [12:21:24<12:12:55] +[titan] 2025-10-05 10:55:45,039 - root - INFO - step: 20120 loss: 2.1203 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8725 +[titan] 2025-10-05 10:55:45,039 - root - INFO - lr: 2.7642e-05 gnorm: 1.06 [12:21:35<12:12:44] +[titan] 2025-10-05 10:55:55,943 - root - INFO - step: 20125 loss: 2.1150 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8697 +[titan] 2025-10-05 10:55:55,943 - root - INFO - lr: 2.7633e-05 gnorm: 1.05 [12:21:45<12:12:33] +[titan] 2025-10-05 10:56:06,800 - root - INFO - step: 20130 loss: 2.1880 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2543 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 10:56:06,800 - root - INFO - lr: 2.7624e-05 gnorm: 1.08 [12:21:56<12:12:21] +[titan] 2025-10-05 10:56:17,695 - root - INFO - step: 20135 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8794 +[titan] 2025-10-05 10:56:17,696 - root - INFO - lr: 2.7615e-05 gnorm: 1.08 [12:22:07<12:12:10] +[titan] 2025-10-05 10:56:28,544 - root - INFO - step: 20140 loss: 2.1589 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.9087 +[titan] 2025-10-05 10:56:28,544 - root - INFO - lr: 2.7606e-05 gnorm: 1.04 [12:22:18<12:11:59] +[titan] 2025-10-05 10:56:39,421 - root - INFO - step: 20145 loss: 2.1005 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8560 +[titan] 2025-10-05 10:56:39,422 - root - INFO - lr: 2.7597e-05 gnorm: 1.08 [12:22:29<12:11:48] +[titan] 2025-10-05 10:56:48,102 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:56:50,277 - root - INFO - step: 20150 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 10:56:50,277 - root - INFO - lr: 2.7588e-05 gnorm: 1.05 [12:22:40<12:11:36] +[titan] 2025-10-05 10:57:01,154 - root - INFO - step: 20155 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 10:57:01,155 - root - INFO - lr: 2.7579e-05 gnorm: 1.09 [12:22:51<12:11:25] +[titan] 2025-10-05 10:57:12,015 - root - INFO - step: 20160 loss: 2.1842 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9288 +[titan] 2025-10-05 10:57:12,015 - root - INFO - lr: 2.7571e-05 gnorm: 1.05 [12:23:02<12:11:14] +[titan] 2025-10-05 10:57:22,907 - root - INFO - step: 20165 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 10:57:22,907 - root - INFO - lr: 2.7562e-05 gnorm: 1.05 [12:23:12<12:11:03] +[titan] 2025-10-05 10:57:33,769 - root - INFO - step: 20170 loss: 2.1734 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9209 +[titan] 2025-10-05 10:57:33,769 - root - INFO - lr: 2.7553e-05 gnorm: 1.10 [12:23:23<12:10:51] +[titan] 2025-10-05 10:57:44,629 - root - INFO - step: 20175 loss: 2.1616 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2523 global_avg_mtp_loss: 1.9093 +[titan] 2025-10-05 10:57:44,629 - root - INFO - lr: 2.7544e-05 gnorm: 1.10 [12:23:34<12:10:40] +[titan] 2025-10-05 10:57:55,575 - root - INFO - step: 20180 loss: 2.1149 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 10:57:55,575 - root - INFO - lr: 2.7535e-05 gnorm: 1.09 [12:23:45<12:10:29] +[titan] 2025-10-05 10:58:06,449 - root - INFO - step: 20185 loss: 2.0747 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 10:58:06,449 - root - INFO - lr: 2.7526e-05 gnorm: 1.09 [12:23:56<12:10:18] +[titan] 2025-10-05 10:58:17,339 - root - INFO - step: 20190 loss: 2.1574 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9060 +[titan] 2025-10-05 10:58:17,339 - root - INFO - lr: 2.7517e-05 gnorm: 1.11 [12:24:07<12:10:07] +[titan] 2025-10-05 10:58:28,224 - root - INFO - step: 20195 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 10:58:28,224 - root - INFO - lr: 2.7508e-05 gnorm: 1.09 [12:24:18<12:09:55] +[titan] 2025-10-05 10:58:36,913 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 10:58:39,105 - root - INFO - step: 20200 loss: 2.1272 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 10:58:39,105 - root - INFO - lr: 2.7499e-05 gnorm: 1.10 [12:24:29<12:09:44] +[titan] 2025-10-05 10:58:49,983 - root - INFO - step: 20205 loss: 2.1865 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9320 +[titan] 2025-10-05 10:58:49,983 - root - INFO - lr: 2.7490e-05 gnorm: 1.10 [12:24:39<12:09:33] +[titan] 2025-10-05 10:59:00,935 - root - INFO - step: 20210 loss: 2.0945 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 10:59:00,935 - root - INFO - lr: 2.7481e-05 gnorm: 1.07 [12:24:50<12:09:22] +[titan] 2025-10-05 10:59:11,794 - root - INFO - step: 20215 loss: 2.1183 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8721 +[titan] 2025-10-05 10:59:11,794 - root - INFO - lr: 2.7472e-05 gnorm: 1.08 [12:25:01<12:09:10] +[titan] 2025-10-05 10:59:22,679 - root - INFO - step: 20220 loss: 2.2517 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2618 global_avg_mtp_loss: 1.9899 +[titan] 2025-10-05 10:59:22,679 - root - INFO - lr: 2.7463e-05 gnorm: 1.09 [12:25:12<12:08:59] +[titan] 2025-10-05 10:59:33,536 - root - INFO - step: 20225 loss: 2.1534 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9028 +[titan] 2025-10-05 10:59:33,536 - root - INFO - lr: 2.7454e-05 gnorm: 1.10 [12:25:23<12:08:48] +[titan] 2025-10-05 10:59:44,381 - root - INFO - step: 20230 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 10:59:44,381 - root - INFO - lr: 2.7446e-05 gnorm: 1.07 [12:25:34<12:08:37] +[titan] 2025-10-05 10:59:55,274 - root - INFO - step: 20235 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8518 +[titan] 2025-10-05 10:59:55,275 - root - INFO - lr: 2.7437e-05 gnorm: 1.10 [12:25:45<12:08:25] +[titan] 2025-10-05 11:00:06,163 - root - INFO - step: 20240 loss: 2.1497 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8998 +[titan] 2025-10-05 11:00:06,164 - root - INFO - lr: 2.7428e-05 gnorm: 1.09 [12:25:56<12:08:14] +[titan] 2025-10-05 11:00:17,039 - root - INFO - step: 20245 loss: 2.1884 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9337 +[titan] 2025-10-05 11:00:17,040 - root - INFO - lr: 2.7419e-05 gnorm: 1.06 [12:26:07<12:08:03] +[titan] 2025-10-05 11:00:25,720 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:00:27,899 - root - INFO - step: 20250 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8888 +[titan] 2025-10-05 11:00:27,900 - root - INFO - lr: 2.7410e-05 gnorm: 1.06 [12:26:17<12:07:52] +[titan] 2025-10-05 11:00:38,739 - root - INFO - step: 20255 loss: 2.1856 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9311 +[titan] 2025-10-05 11:00:38,739 - root - INFO - lr: 2.7401e-05 gnorm: 1.07 [12:26:28<12:07:41] +[titan] 2025-10-05 11:00:49,595 - root - INFO - step: 20260 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 11:00:49,595 - root - INFO - lr: 2.7392e-05 gnorm: 1.05 [12:26:39<12:07:29] +[titan] 2025-10-05 11:01:00,505 - root - INFO - step: 20265 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 11:01:00,505 - root - INFO - lr: 2.7383e-05 gnorm: 1.05 [12:26:50<12:07:18] +[titan] 2025-10-05 11:01:11,382 - root - INFO - step: 20270 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8446 +[titan] 2025-10-05 11:01:11,382 - root - INFO - lr: 2.7374e-05 gnorm: 1.08 [12:27:01<12:07:07] +[titan] 2025-10-05 11:01:22,284 - root - INFO - step: 20275 loss: 2.1344 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:01:22,284 - root - INFO - lr: 2.7365e-05 gnorm: 1.10 [12:27:12<12:06:56] +[titan] 2025-10-05 11:01:33,138 - root - INFO - step: 20280 loss: 2.1211 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:01:33,138 - root - INFO - lr: 2.7356e-05 gnorm: 1.03 [12:27:23<12:06:44] +[titan] 2025-10-05 11:01:44,002 - root - INFO - step: 20285 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:01:44,002 - root - INFO - lr: 2.7347e-05 gnorm: 1.05 [12:27:34<12:06:33] +[titan] 2025-10-05 11:01:54,890 - root - INFO - step: 20290 loss: 2.1434 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8938 +[titan] 2025-10-05 11:01:54,890 - root - INFO - lr: 2.7338e-05 gnorm: 1.08 [12:27:44<12:06:22] +[titan] 2025-10-05 11:02:06,133 - root - INFO - step: 20295 loss: 2.2604 memory: 118.84GiB(85.28%) tps: 29,147 tflops: 404.37 mfu: 40.89% global_avg_ntp_loss: 0.2643 global_avg_mtp_loss: 1.9960 +[titan] 2025-10-05 11:02:06,133 - root - INFO - lr: 2.7330e-05 gnorm: 1.06 [12:27:56<12:06:11] +[titan] 2025-10-05 11:02:14,822 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:02:17,010 - root - INFO - step: 20300 loss: 2.1482 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 11:02:17,010 - root - INFO - lr: 2.7321e-05 gnorm: 1.33 [12:28:07<12:06:00] +[titan] 2025-10-05 11:02:27,926 - root - INFO - step: 20305 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 11:02:27,927 - root - INFO - lr: 2.7312e-05 gnorm: 1.05 [12:28:17<12:05:49] +[titan] 2025-10-05 11:02:38,794 - root - INFO - step: 20310 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8580 +[titan] 2025-10-05 11:02:38,794 - root - INFO - lr: 2.7303e-05 gnorm: 1.02 [12:28:28<12:05:37] +[titan] 2025-10-05 11:02:49,655 - root - INFO - step: 20315 loss: 2.1038 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:02:49,655 - root - INFO - lr: 2.7294e-05 gnorm: 1.06 [12:28:39<12:05:26] +[titan] 2025-10-05 11:03:00,551 - root - INFO - step: 20320 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 11:03:00,551 - root - INFO - lr: 2.7285e-05 gnorm: 1.07 [12:28:50<12:05:15] +[titan] 2025-10-05 11:03:11,416 - root - INFO - step: 20325 loss: 2.1548 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9040 +[titan] 2025-10-05 11:03:11,417 - root - INFO - lr: 2.7276e-05 gnorm: 1.04 [12:29:01<12:05:04] +[titan] 2025-10-05 11:03:22,259 - root - INFO - step: 20330 loss: 2.1001 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8555 +[titan] 2025-10-05 11:03:22,259 - root - INFO - lr: 2.7267e-05 gnorm: 1.07 [12:29:12<12:04:52] +[titan] 2025-10-05 11:03:33,113 - root - INFO - step: 20335 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8112 +[titan] 2025-10-05 11:03:33,113 - root - INFO - lr: 2.7258e-05 gnorm: 1.06 [12:29:23<12:04:41] +[titan] 2025-10-05 11:03:44,014 - root - INFO - step: 20340 loss: 2.0999 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 11:03:44,014 - root - INFO - lr: 2.7249e-05 gnorm: 1.02 [12:29:34<12:04:30] +[titan] 2025-10-05 11:03:54,889 - root - INFO - step: 20345 loss: 2.1601 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.9095 +[titan] 2025-10-05 11:03:54,889 - root - INFO - lr: 2.7240e-05 gnorm: 1.05 [12:29:44<12:04:19] +[titan] 2025-10-05 11:04:03,595 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:04:05,779 - root - INFO - step: 20350 loss: 2.1910 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9371 +[titan] 2025-10-05 11:04:05,779 - root - INFO - lr: 2.7231e-05 gnorm: 1.07 [12:29:55<12:04:08] +[titan] 2025-10-05 11:04:16,637 - root - INFO - step: 20355 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 11:04:16,638 - root - INFO - lr: 2.7222e-05 gnorm: 1.05 [12:30:06<12:03:56] +[titan] 2025-10-05 11:04:27,458 - root - INFO - step: 20360 loss: 2.1358 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8882 +[titan] 2025-10-05 11:04:27,458 - root - INFO - lr: 2.7214e-05 gnorm: 1.06 [12:30:17<12:03:45] +[titan] 2025-10-05 11:04:38,299 - root - INFO - step: 20365 loss: 2.1403 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 11:04:38,299 - root - INFO - lr: 2.7205e-05 gnorm: 1.10 [12:30:28<12:03:34] +[titan] 2025-10-05 11:04:49,208 - root - INFO - step: 20370 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 11:04:49,208 - root - INFO - lr: 2.7196e-05 gnorm: 1.09 [12:30:39<12:03:23] +[titan] 2025-10-05 11:05:00,089 - root - INFO - step: 20375 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:05:00,089 - root - INFO - lr: 2.7187e-05 gnorm: 1.06 [12:30:50<12:03:11] +[titan] 2025-10-05 11:05:10,946 - root - INFO - step: 20380 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:05:10,946 - root - INFO - lr: 2.7178e-05 gnorm: 1.11 [12:31:00<12:03:00] +[titan] 2025-10-05 11:05:21,800 - root - INFO - step: 20385 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:05:21,800 - root - INFO - lr: 2.7169e-05 gnorm: 1.08 [12:31:11<12:02:49] +[titan] 2025-10-05 11:05:32,664 - root - INFO - step: 20390 loss: 2.1412 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 11:05:32,664 - root - INFO - lr: 2.7160e-05 gnorm: 1.05 [12:31:22<12:02:38] +[titan] 2025-10-05 11:05:43,530 - root - INFO - step: 20395 loss: 2.2310 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2606 global_avg_mtp_loss: 1.9704 +[titan] 2025-10-05 11:05:43,531 - root - INFO - lr: 2.7151e-05 gnorm: 1.10 [12:31:33<12:02:26] +[titan] 2025-10-05 11:05:52,200 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:05:54,413 - root - INFO - step: 20400 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 11:05:54,413 - root - INFO - lr: 2.7142e-05 gnorm: 1.05 [12:31:44<12:02:15] +[titan] 2025-10-05 11:06:05,284 - root - INFO - step: 20405 loss: 2.1600 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 11:06:05,284 - root - INFO - lr: 2.7133e-05 gnorm: 1.08 [12:31:55<12:02:04] +[titan] 2025-10-05 11:06:16,130 - root - INFO - step: 20410 loss: 2.1684 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9167 +[titan] 2025-10-05 11:06:16,130 - root - INFO - lr: 2.7124e-05 gnorm: 1.07 [12:32:06<12:01:53] +[titan] 2025-10-05 11:06:26,974 - root - INFO - step: 20415 loss: 2.1914 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:06:26,975 - root - INFO - lr: 2.7115e-05 gnorm: 1.09 [12:32:16<12:01:41] +[titan] 2025-10-05 11:06:37,832 - root - INFO - step: 20420 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 11:06:37,832 - root - INFO - lr: 2.7106e-05 gnorm: 1.09 [12:32:27<12:01:30] +[titan] 2025-10-05 11:06:48,689 - root - INFO - step: 20425 loss: 2.1157 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 11:06:48,689 - root - INFO - lr: 2.7098e-05 gnorm: 1.08 [12:32:38<12:01:19] +[titan] 2025-10-05 11:06:59,539 - root - INFO - step: 20430 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8859 +[titan] 2025-10-05 11:06:59,539 - root - INFO - lr: 2.7089e-05 gnorm: 1.05 [12:32:49<12:01:08] +[titan] 2025-10-05 11:07:10,461 - root - INFO - step: 20435 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 11:07:10,461 - root - INFO - lr: 2.7080e-05 gnorm: 1.06 [12:33:00<12:00:56] +[titan] 2025-10-05 11:07:21,318 - root - INFO - step: 20440 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:07:21,318 - root - INFO - lr: 2.7071e-05 gnorm: 1.07 [12:33:11<12:00:45] +[titan] 2025-10-05 11:07:32,168 - root - INFO - step: 20445 loss: 2.0912 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 11:07:32,168 - root - INFO - lr: 2.7062e-05 gnorm: 1.09 [12:33:22<12:00:34] +[titan] 2025-10-05 11:07:40,845 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:07:43,023 - root - INFO - step: 20450 loss: 2.1251 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8781 +[titan] 2025-10-05 11:07:43,023 - root - INFO - lr: 2.7053e-05 gnorm: 1.07 [12:33:33<12:00:23] +[titan] 2025-10-05 11:07:53,871 - root - INFO - step: 20455 loss: 2.1649 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9123 +[titan] 2025-10-05 11:07:53,871 - root - INFO - lr: 2.7044e-05 gnorm: 1.07 [12:33:43<12:00:11] +[titan] 2025-10-05 11:08:04,763 - root - INFO - step: 20460 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 11:08:04,763 - root - INFO - lr: 2.7035e-05 gnorm: 1.03 [12:33:54<12:00:00] +[titan] 2025-10-05 11:08:15,662 - root - INFO - step: 20465 loss: 2.1274 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8812 +[titan] 2025-10-05 11:08:15,662 - root - INFO - lr: 2.7026e-05 gnorm: 1.03 [12:34:05<11:59:49] +[titan] 2025-10-05 11:08:26,490 - root - INFO - step: 20470 loss: 2.1025 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8578 +[titan] 2025-10-05 11:08:26,490 - root - INFO - lr: 2.7017e-05 gnorm: 1.06 [12:34:16<11:59:38] +[titan] 2025-10-05 11:08:37,320 - root - INFO - step: 20475 loss: 2.1724 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9206 +[titan] 2025-10-05 11:08:37,321 - root - INFO - lr: 2.7008e-05 gnorm: 1.11 [12:34:27<11:59:26] +[titan] 2025-10-05 11:08:48,242 - root - INFO - step: 20480 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:08:48,242 - root - INFO - lr: 2.6999e-05 gnorm: 1.04 [12:34:38<11:59:15] +[titan] 2025-10-05 11:08:48,422 - root - INFO - Dumping profiler traces at step 20480 +[titan] 2025-10-05 11:08:48,460 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:08:59,308 - root - INFO - step: 20485 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 29,611 tflops: 410.81 mfu: 41.54% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 11:08:59,308 - root - INFO - lr: 2.6990e-05 gnorm: 1.06 [12:34:49<11:59:04] +[titan] 2025-10-05 11:09:10,168 - root - INFO - step: 20490 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.8976 +[titan] 2025-10-05 11:09:10,168 - root - INFO - lr: 2.6982e-05 gnorm: 1.06 [12:35:00<11:58:53] +[titan] 2025-10-05 11:09:21,026 - root - INFO - step: 20495 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9159 +[titan] 2025-10-05 11:09:21,027 - root - INFO - lr: 2.6973e-05 gnorm: 1.10 [12:35:11<11:58:42] +[titan] 2025-10-05 11:09:29,736 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:09:31,923 - root - INFO - step: 20500 loss: 2.0830 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 11:09:31,923 - root - INFO - lr: 2.6964e-05 gnorm: 1.09 [12:35:21<11:58:31] +[titan] 2025-10-05 11:09:42,776 - root - INFO - step: 20505 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8339 +[titan] 2025-10-05 11:09:42,776 - root - INFO - lr: 2.6955e-05 gnorm: 1.10 [12:35:32<11:58:19] +[titan] 2025-10-05 11:09:53,605 - root - INFO - step: 20510 loss: 2.1466 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8968 +[titan] 2025-10-05 11:09:53,605 - root - INFO - lr: 2.6946e-05 gnorm: 1.06 [12:35:43<11:58:08] +[titan] 2025-10-05 11:10:04,473 - root - INFO - step: 20515 loss: 2.1247 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8771 +[titan] 2025-10-05 11:10:04,473 - root - INFO - lr: 2.6937e-05 gnorm: 1.06 [12:35:54<11:57:57] +[titan] 2025-10-05 11:10:15,308 - root - INFO - step: 20520 loss: 2.1987 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2553 global_avg_mtp_loss: 1.9434 +[titan] 2025-10-05 11:10:15,308 - root - INFO - lr: 2.6928e-05 gnorm: 1.06 [12:36:05<11:57:46] +[titan] 2025-10-05 11:10:26,169 - root - INFO - step: 20525 loss: 2.1470 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8974 +[titan] 2025-10-05 11:10:26,170 - root - INFO - lr: 2.6919e-05 gnorm: 1.04 [12:36:16<11:57:34] +[titan] 2025-10-05 11:10:37,027 - root - INFO - step: 20530 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8850 +[titan] 2025-10-05 11:10:37,027 - root - INFO - lr: 2.6910e-05 gnorm: 1.13 [12:36:27<11:57:23] +[titan] 2025-10-05 11:10:47,875 - root - INFO - step: 20535 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 11:10:47,875 - root - INFO - lr: 2.6901e-05 gnorm: 1.03 [12:36:37<11:57:12] +[titan] 2025-10-05 11:10:58,732 - root - INFO - step: 20540 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8735 +[titan] 2025-10-05 11:10:58,733 - root - INFO - lr: 2.6892e-05 gnorm: 1.06 [12:36:48<11:57:01] +[titan] 2025-10-05 11:11:09,619 - root - INFO - step: 20545 loss: 2.1707 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:11:09,619 - root - INFO - lr: 2.6883e-05 gnorm: 1.10 [12:36:59<11:56:49] +[titan] 2025-10-05 11:11:18,306 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:11:20,486 - root - INFO - step: 20550 loss: 2.1879 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9277 +[titan] 2025-10-05 11:11:20,487 - root - INFO - lr: 2.6874e-05 gnorm: 2.06 [12:37:10<11:56:38] +[titan] 2025-10-05 11:11:31,328 - root - INFO - step: 20555 loss: 2.2027 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9460 +[titan] 2025-10-05 11:11:31,329 - root - INFO - lr: 2.6866e-05 gnorm: 1.09 [12:37:21<11:56:27] +[titan] 2025-10-05 11:11:42,212 - root - INFO - step: 20560 loss: 2.0837 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 11:11:42,212 - root - INFO - lr: 2.6857e-05 gnorm: 1.05 [12:37:32<11:56:16] +[titan] 2025-10-05 11:11:53,051 - root - INFO - step: 20565 loss: 2.1605 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.9097 +[titan] 2025-10-05 11:11:53,051 - root - INFO - lr: 2.6848e-05 gnorm: 1.08 [12:37:43<11:56:04] +[titan] 2025-10-05 11:12:03,886 - root - INFO - step: 20570 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8798 +[titan] 2025-10-05 11:12:03,887 - root - INFO - lr: 2.6839e-05 gnorm: 1.14 [12:37:53<11:55:53] +[titan] 2025-10-05 11:12:14,773 - root - INFO - step: 20575 loss: 2.1886 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9341 +[titan] 2025-10-05 11:12:14,773 - root - INFO - lr: 2.6830e-05 gnorm: 1.09 [12:38:04<11:55:42] +[titan] 2025-10-05 11:12:25,620 - root - INFO - step: 20580 loss: 2.0736 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8329 +[titan] 2025-10-05 11:12:25,620 - root - INFO - lr: 2.6821e-05 gnorm: 1.09 [12:38:15<11:55:31] +[titan] 2025-10-05 11:12:36,467 - root - INFO - step: 20585 loss: 2.1397 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8895 +[titan] 2025-10-05 11:12:36,467 - root - INFO - lr: 2.6812e-05 gnorm: 1.05 [12:38:26<11:55:19] +[titan] 2025-10-05 11:12:47,318 - root - INFO - step: 20590 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 11:12:47,318 - root - INFO - lr: 2.6803e-05 gnorm: 1.07 [12:38:37<11:55:08] +[titan] 2025-10-05 11:12:58,203 - root - INFO - step: 20595 loss: 2.1151 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8687 +[titan] 2025-10-05 11:12:58,203 - root - INFO - lr: 2.6794e-05 gnorm: 1.07 [12:38:48<11:54:57] +[titan] 2025-10-05 11:13:06,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:13:09,064 - root - INFO - step: 20600 loss: 2.1894 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2545 global_avg_mtp_loss: 1.9349 +[titan] 2025-10-05 11:13:09,064 - root - INFO - lr: 2.6785e-05 gnorm: 1.09 [12:38:59<11:54:46] +[titan] 2025-10-05 11:13:19,929 - root - INFO - step: 20605 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 11:13:19,929 - root - INFO - lr: 2.6776e-05 gnorm: 1.07 [12:39:09<11:54:35] +[titan] 2025-10-05 11:13:30,796 - root - INFO - step: 20610 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9110 +[titan] 2025-10-05 11:13:30,796 - root - INFO - lr: 2.6767e-05 gnorm: 1.06 [12:39:20<11:54:23] +[titan] 2025-10-05 11:13:41,654 - root - INFO - step: 20615 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8398 +[titan] 2025-10-05 11:13:41,654 - root - INFO - lr: 2.6758e-05 gnorm: 1.03 [12:39:31<11:54:12] +[titan] 2025-10-05 11:13:52,508 - root - INFO - step: 20620 loss: 2.2291 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2598 global_avg_mtp_loss: 1.9693 +[titan] 2025-10-05 11:13:52,508 - root - INFO - lr: 2.6750e-05 gnorm: 1.06 [12:39:42<11:54:01] +[titan] 2025-10-05 11:14:03,381 - root - INFO - step: 20625 loss: 2.1197 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8740 +[titan] 2025-10-05 11:14:03,381 - root - INFO - lr: 2.6741e-05 gnorm: 1.06 [12:39:53<11:53:50] +[titan] 2025-10-05 11:14:14,251 - root - INFO - step: 20630 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:14:14,251 - root - INFO - lr: 2.6732e-05 gnorm: 1.06 [12:40:04<11:53:38] +[titan] 2025-10-05 11:14:25,097 - root - INFO - step: 20635 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:14:25,098 - root - INFO - lr: 2.6723e-05 gnorm: 1.08 [12:40:15<11:53:27] +[titan] 2025-10-05 11:14:35,947 - root - INFO - step: 20640 loss: 2.0980 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8548 +[titan] 2025-10-05 11:14:35,947 - root - INFO - lr: 2.6714e-05 gnorm: 1.09 [12:40:25<11:53:16] +[titan] 2025-10-05 11:14:46,798 - root - INFO - step: 20645 loss: 2.1242 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8770 +[titan] 2025-10-05 11:14:46,799 - root - INFO - lr: 2.6705e-05 gnorm: 1.09 [12:40:36<11:53:05] +[titan] 2025-10-05 11:14:55,474 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:14:57,653 - root - INFO - step: 20650 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:14:57,653 - root - INFO - lr: 2.6696e-05 gnorm: 1.08 [12:40:47<11:52:53] +[titan] 2025-10-05 11:15:08,530 - root - INFO - step: 20655 loss: 2.1917 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9368 +[titan] 2025-10-05 11:15:08,530 - root - INFO - lr: 2.6687e-05 gnorm: 1.08 [12:40:58<11:52:42] +[titan] 2025-10-05 11:15:19,423 - root - INFO - step: 20660 loss: 2.2002 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2554 global_avg_mtp_loss: 1.9447 +[titan] 2025-10-05 11:15:19,423 - root - INFO - lr: 2.6678e-05 gnorm: 1.15 [12:41:09<11:52:31] +[titan] 2025-10-05 11:15:30,279 - root - INFO - step: 20665 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:15:30,279 - root - INFO - lr: 2.6669e-05 gnorm: 1.06 [12:41:20<11:52:20] +[titan] 2025-10-05 11:15:41,155 - root - INFO - step: 20670 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9037 +[titan] 2025-10-05 11:15:41,156 - root - INFO - lr: 2.6660e-05 gnorm: 1.05 [12:41:31<11:52:09] +[titan] 2025-10-05 11:15:52,007 - root - INFO - step: 20675 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 11:15:52,007 - root - INFO - lr: 2.6651e-05 gnorm: 1.04 [12:41:41<11:51:57] +[titan] 2025-10-05 11:16:02,840 - root - INFO - step: 20680 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:16:02,840 - root - INFO - lr: 2.6643e-05 gnorm: 1.03 [12:41:52<11:51:46] +[titan] 2025-10-05 11:16:13,755 - root - INFO - step: 20685 loss: 2.1629 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9115 +[titan] 2025-10-05 11:16:13,756 - root - INFO - lr: 2.6634e-05 gnorm: 1.04 [12:42:03<11:51:35] +[titan] 2025-10-05 11:16:24,631 - root - INFO - step: 20690 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:16:24,632 - root - INFO - lr: 2.6625e-05 gnorm: 1.05 [12:42:14<11:51:24] +[titan] 2025-10-05 11:16:35,463 - root - INFO - step: 20695 loss: 2.1783 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2546 global_avg_mtp_loss: 1.9238 +[titan] 2025-10-05 11:16:35,464 - root - INFO - lr: 2.6616e-05 gnorm: 1.10 [12:42:25<11:51:12] +[titan] 2025-10-05 11:16:44,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:16:46,315 - root - INFO - step: 20700 loss: 2.1496 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2501 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:16:46,316 - root - INFO - lr: 2.6607e-05 gnorm: 1.10 [12:42:36<11:51:01] +[titan] 2025-10-05 11:16:57,157 - root - INFO - step: 20705 loss: 2.0983 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 11:16:57,157 - root - INFO - lr: 2.6598e-05 gnorm: 1.04 [12:42:47<11:50:50] +[titan] 2025-10-05 11:17:08,007 - root - INFO - step: 20710 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 11:17:08,007 - root - INFO - lr: 2.6589e-05 gnorm: 1.07 [12:42:57<11:50:39] +[titan] 2025-10-05 11:17:18,892 - root - INFO - step: 20715 loss: 2.1366 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8867 +[titan] 2025-10-05 11:17:18,892 - root - INFO - lr: 2.6580e-05 gnorm: 1.14 [12:43:08<11:50:27] +[titan] 2025-10-05 11:17:29,767 - root - INFO - step: 20720 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:17:29,768 - root - INFO - lr: 2.6571e-05 gnorm: 1.04 [12:43:19<11:50:16] +[titan] 2025-10-05 11:17:40,628 - root - INFO - step: 20725 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9005 +[titan] 2025-10-05 11:17:40,628 - root - INFO - lr: 2.6562e-05 gnorm: 1.09 [12:43:30<11:50:05] +[titan] 2025-10-05 11:17:51,474 - root - INFO - step: 20730 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2522 global_avg_mtp_loss: 1.9187 +[titan] 2025-10-05 11:17:51,474 - root - INFO - lr: 2.6553e-05 gnorm: 1.10 [12:43:41<11:49:54] +[titan] 2025-10-05 11:18:02,326 - root - INFO - step: 20735 loss: 2.1204 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:18:02,326 - root - INFO - lr: 2.6544e-05 gnorm: 1.06 [12:43:52<11:49:43] +[titan] 2025-10-05 11:18:13,213 - root - INFO - step: 20740 loss: 2.1323 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8841 +[titan] 2025-10-05 11:18:13,213 - root - INFO - lr: 2.6536e-05 gnorm: 1.08 [12:44:03<11:49:31] +[titan] 2025-10-05 11:18:24,093 - root - INFO - step: 20745 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 11:18:24,093 - root - INFO - lr: 2.6527e-05 gnorm: 1.05 [12:44:14<11:49:20] +[titan] 2025-10-05 11:18:32,791 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:18:34,984 - root - INFO - step: 20750 loss: 2.0863 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:18:34,984 - root - INFO - lr: 2.6518e-05 gnorm: 1.06 [12:44:24<11:49:09] +[titan] 2025-10-05 11:18:45,854 - root - INFO - step: 20755 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:18:45,854 - root - INFO - lr: 2.6509e-05 gnorm: 1.09 [12:44:35<11:48:58] +[titan] 2025-10-05 11:18:56,673 - root - INFO - step: 20760 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:18:56,674 - root - INFO - lr: 2.6500e-05 gnorm: 1.04 [12:44:46<11:48:46] +[titan] 2025-10-05 11:19:07,503 - root - INFO - step: 20765 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8648 +[titan] 2025-10-05 11:19:07,504 - root - INFO - lr: 2.6491e-05 gnorm: 1.08 [12:44:57<11:48:35] +[titan] 2025-10-05 11:19:18,411 - root - INFO - step: 20770 loss: 2.2056 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2567 global_avg_mtp_loss: 1.9489 +[titan] 2025-10-05 11:19:18,411 - root - INFO - lr: 2.6482e-05 gnorm: 1.12 [12:45:08<11:48:24] +[titan] 2025-10-05 11:19:29,234 - root - INFO - step: 20775 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8766 +[titan] 2025-10-05 11:19:29,234 - root - INFO - lr: 2.6473e-05 gnorm: 1.05 [12:45:19<11:48:13] +[titan] 2025-10-05 11:19:40,065 - root - INFO - step: 20780 loss: 2.0825 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.75 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 11:19:40,065 - root - INFO - lr: 2.6464e-05 gnorm: 1.08 [12:45:30<11:48:01] +[titan] 2025-10-05 11:19:50,928 - root - INFO - step: 20785 loss: 2.1284 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 11:19:50,928 - root - INFO - lr: 2.6455e-05 gnorm: 1.03 [12:45:40<11:47:50] +[titan] 2025-10-05 11:20:01,769 - root - INFO - step: 20790 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 11:20:01,769 - root - INFO - lr: 2.6446e-05 gnorm: 1.07 [12:45:51<11:47:39] +[titan] 2025-10-05 11:20:12,646 - root - INFO - step: 20795 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 11:20:12,646 - root - INFO - lr: 2.6437e-05 gnorm: 1.10 [12:46:02<11:47:28] +[titan] 2025-10-05 11:20:21,353 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:20:23,533 - root - INFO - step: 20800 loss: 2.0768 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 11:20:23,533 - root - INFO - lr: 2.6429e-05 gnorm: 1.06 [12:46:13<11:47:17] +[titan] 2025-10-05 11:20:34,392 - root - INFO - step: 20805 loss: 2.1185 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8715 +[titan] 2025-10-05 11:20:34,392 - root - INFO - lr: 2.6420e-05 gnorm: 1.09 [12:46:24<11:47:05] +[titan] 2025-10-05 11:20:45,231 - root - INFO - step: 20810 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 11:20:45,231 - root - INFO - lr: 2.6411e-05 gnorm: 1.04 [12:46:35<11:46:54] +[titan] 2025-10-05 11:20:56,074 - root - INFO - step: 20815 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:20:56,074 - root - INFO - lr: 2.6402e-05 gnorm: 1.08 [12:46:46<11:46:43] +[titan] 2025-10-05 11:21:06,980 - root - INFO - step: 20820 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:21:06,980 - root - INFO - lr: 2.6393e-05 gnorm: 1.07 [12:46:56<11:46:32] +[titan] 2025-10-05 11:21:17,884 - root - INFO - step: 20825 loss: 2.1501 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9006 +[titan] 2025-10-05 11:21:17,884 - root - INFO - lr: 2.6384e-05 gnorm: 1.08 [12:47:07<11:46:20] +[titan] 2025-10-05 11:21:28,741 - root - INFO - step: 20830 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8872 +[titan] 2025-10-05 11:21:28,741 - root - INFO - lr: 2.6375e-05 gnorm: 1.08 [12:47:18<11:46:09] +[titan] 2025-10-05 11:21:39,613 - root - INFO - step: 20835 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8881 +[titan] 2025-10-05 11:21:39,613 - root - INFO - lr: 2.6366e-05 gnorm: 1.07 [12:47:29<11:45:58] +[titan] 2025-10-05 11:21:50,471 - root - INFO - step: 20840 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 11:21:50,471 - root - INFO - lr: 2.6357e-05 gnorm: 1.03 [12:47:40<11:45:47] +[titan] 2025-10-05 11:22:01,325 - root - INFO - step: 20845 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:22:01,325 - root - INFO - lr: 2.6348e-05 gnorm: 1.07 [12:47:51<11:45:36] +[titan] 2025-10-05 11:22:10,042 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:22:12,228 - root - INFO - step: 20850 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 11:22:12,228 - root - INFO - lr: 2.6339e-05 gnorm: 1.01 [12:48:02<11:45:24] +[titan] 2025-10-05 11:22:23,145 - root - INFO - step: 20855 loss: 2.1714 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9200 +[titan] 2025-10-05 11:22:23,145 - root - INFO - lr: 2.6330e-05 gnorm: 1.08 [12:48:13<11:45:13] +[titan] 2025-10-05 11:22:33,976 - root - INFO - step: 20860 loss: 2.1509 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:22:33,976 - root - INFO - lr: 2.6322e-05 gnorm: 1.08 [12:48:23<11:45:02] +[titan] 2025-10-05 11:22:44,818 - root - INFO - step: 20865 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 11:22:44,819 - root - INFO - lr: 2.6313e-05 gnorm: 1.08 [12:48:34<11:44:51] +[titan] 2025-10-05 11:22:55,670 - root - INFO - step: 20870 loss: 2.1029 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 11:22:55,670 - root - INFO - lr: 2.6304e-05 gnorm: 1.04 [12:48:45<11:44:39] +[titan] 2025-10-05 11:23:06,495 - root - INFO - step: 20875 loss: 2.1668 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2525 global_avg_mtp_loss: 1.9143 +[titan] 2025-10-05 11:23:06,495 - root - INFO - lr: 2.6295e-05 gnorm: 1.03 [12:48:56<11:44:28] +[titan] 2025-10-05 11:23:17,425 - root - INFO - step: 20880 loss: 2.1836 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2539 global_avg_mtp_loss: 1.9297 +[titan] 2025-10-05 11:23:17,426 - root - INFO - lr: 2.6286e-05 gnorm: 1.06 [12:49:07<11:44:17] +[titan] 2025-10-05 11:23:28,304 - root - INFO - step: 20885 loss: 2.1708 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:23:28,305 - root - INFO - lr: 2.6277e-05 gnorm: 1.02 [12:49:18<11:44:06] +[titan] 2025-10-05 11:23:39,146 - root - INFO - step: 20890 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 11:23:39,147 - root - INFO - lr: 2.6268e-05 gnorm: 1.04 [12:49:29<11:43:55] +[titan] 2025-10-05 11:23:50,019 - root - INFO - step: 20895 loss: 2.1373 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 11:23:50,019 - root - INFO - lr: 2.6259e-05 gnorm: 1.05 [12:49:39<11:43:43] +[titan] 2025-10-05 11:23:58,682 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:24:00,862 - root - INFO - step: 20900 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8719 +[titan] 2025-10-05 11:24:00,862 - root - INFO - lr: 2.6250e-05 gnorm: 1.08 [12:49:50<11:43:32] +[titan] 2025-10-05 11:24:11,693 - root - INFO - step: 20905 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8801 +[titan] 2025-10-05 11:24:11,693 - root - INFO - lr: 2.6241e-05 gnorm: 1.09 [12:50:01<11:43:21] +[titan] 2025-10-05 11:24:22,592 - root - INFO - step: 20910 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8505 +[titan] 2025-10-05 11:24:22,592 - root - INFO - lr: 2.6232e-05 gnorm: 1.06 [12:50:12<11:43:10] +[titan] 2025-10-05 11:24:33,463 - root - INFO - step: 20915 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8499 +[titan] 2025-10-05 11:24:33,463 - root - INFO - lr: 2.6224e-05 gnorm: 1.05 [12:50:23<11:42:58] +[titan] 2025-10-05 11:24:44,313 - root - INFO - step: 20920 loss: 2.1717 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:24:44,314 - root - INFO - lr: 2.6215e-05 gnorm: 1.05 [12:50:34<11:42:47] +[titan] 2025-10-05 11:24:55,176 - root - INFO - step: 20925 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8450 +[titan] 2025-10-05 11:24:55,176 - root - INFO - lr: 2.6206e-05 gnorm: 1.05 [12:50:45<11:42:36] +[titan] 2025-10-05 11:25:06,030 - root - INFO - step: 20930 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8992 +[titan] 2025-10-05 11:25:06,030 - root - INFO - lr: 2.6197e-05 gnorm: 1.10 [12:50:55<11:42:25] +[titan] 2025-10-05 11:25:16,898 - root - INFO - step: 20935 loss: 2.1194 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8741 +[titan] 2025-10-05 11:25:16,898 - root - INFO - lr: 2.6188e-05 gnorm: 1.05 [12:51:06<11:42:14] +[titan] 2025-10-05 11:25:27,781 - root - INFO - step: 20940 loss: 2.1440 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:25:27,781 - root - INFO - lr: 2.6179e-05 gnorm: 1.04 [12:51:17<11:42:02] +[titan] 2025-10-05 11:25:38,668 - root - INFO - step: 20945 loss: 2.1635 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2513 global_avg_mtp_loss: 1.9122 +[titan] 2025-10-05 11:25:38,668 - root - INFO - lr: 2.6170e-05 gnorm: 1.04 [12:51:28<11:41:51] +[titan] 2025-10-05 11:25:47,372 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:25:49,579 - root - INFO - step: 20950 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8588 +[titan] 2025-10-05 11:25:49,579 - root - INFO - lr: 2.6161e-05 gnorm: 1.02 [12:51:39<11:41:40] +[titan] 2025-10-05 11:26:00,466 - root - INFO - step: 20955 loss: 2.2028 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9476 +[titan] 2025-10-05 11:26:00,466 - root - INFO - lr: 2.6152e-05 gnorm: 1.08 [12:51:50<11:41:29] +[titan] 2025-10-05 11:26:11,358 - root - INFO - step: 20960 loss: 2.1680 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 11:26:11,358 - root - INFO - lr: 2.6143e-05 gnorm: 1.07 [12:52:01<11:41:18] +[titan] 2025-10-05 11:26:22,285 - root - INFO - step: 20965 loss: 2.1237 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 11:26:22,285 - root - INFO - lr: 2.6134e-05 gnorm: 1.03 [12:52:12<11:41:06] +[titan] 2025-10-05 11:26:33,153 - root - INFO - step: 20970 loss: 2.0712 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 11:26:33,153 - root - INFO - lr: 2.6126e-05 gnorm: 1.04 [12:52:23<11:40:55] +[titan] 2025-10-05 11:26:44,020 - root - INFO - step: 20975 loss: 2.1369 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:26:44,021 - root - INFO - lr: 2.6117e-05 gnorm: 1.09 [12:52:33<11:40:44] +[titan] 2025-10-05 11:26:54,991 - root - INFO - step: 20980 loss: 2.1685 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9161 +[titan] 2025-10-05 11:26:54,992 - root - INFO - lr: 2.6108e-05 gnorm: 1.07 [12:52:44<11:40:33] +[titan] 2025-10-05 11:27:05,851 - root - INFO - step: 20985 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8877 +[titan] 2025-10-05 11:27:05,851 - root - INFO - lr: 2.6099e-05 gnorm: 1.11 [12:52:55<11:40:22] +[titan] 2025-10-05 11:27:16,808 - root - INFO - step: 20990 loss: 2.1352 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8879 +[titan] 2025-10-05 11:27:16,809 - root - INFO - lr: 2.6090e-05 gnorm: 1.08 [12:53:06<11:40:11] +[titan] 2025-10-05 11:27:21,386 - root - INFO - Dumping profiler traces at step 20992 +[titan] 2025-10-05 11:27:21,425 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:27:27,954 - root - INFO - step: 20995 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,401 tflops: 407.90 mfu: 41.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 11:27:27,954 - root - INFO - lr: 2.6081e-05 gnorm: 1.05 [12:53:17<11:40:00] +[titan] 2025-10-05 11:27:36,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:27:38,817 - root - INFO - step: 21000 loss: 2.1220 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8750 +[titan] 2025-10-05 11:27:38,817 - root - INFO - lr: 2.6072e-05 gnorm: 1.05 [12:53:28<11:39:48] +[titan] 2025-10-05 11:27:49,677 - root - INFO - step: 21005 loss: 2.1703 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 11:27:49,677 - root - INFO - lr: 2.6063e-05 gnorm: 1.10 [12:53:39<11:39:37] +[titan] 2025-10-05 11:28:00,541 - root - INFO - step: 21010 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 11:28:00,541 - root - INFO - lr: 2.6054e-05 gnorm: 1.05 [12:53:50<11:39:26] +[titan] 2025-10-05 11:28:11,384 - root - INFO - step: 21015 loss: 2.1081 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8636 +[titan] 2025-10-05 11:28:11,384 - root - INFO - lr: 2.6045e-05 gnorm: 1.04 [12:54:01<11:39:15] +[titan] 2025-10-05 11:28:22,286 - root - INFO - step: 21020 loss: 2.1447 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8957 +[titan] 2025-10-05 11:28:22,286 - root - INFO - lr: 2.6036e-05 gnorm: 1.10 [12:54:12<11:39:04] +[titan] 2025-10-05 11:28:33,136 - root - INFO - step: 21025 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 11:28:33,136 - root - INFO - lr: 2.6028e-05 gnorm: 1.07 [12:54:23<11:38:52] +[titan] 2025-10-05 11:28:43,995 - root - INFO - step: 21030 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8395 +[titan] 2025-10-05 11:28:43,995 - root - INFO - lr: 2.6019e-05 gnorm: 1.06 [12:54:33<11:38:41] +[titan] 2025-10-05 11:28:54,868 - root - INFO - step: 21035 loss: 2.1288 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8811 +[titan] 2025-10-05 11:28:54,868 - root - INFO - lr: 2.6010e-05 gnorm: 1.09 [12:54:44<11:38:30] +[titan] 2025-10-05 11:29:05,770 - root - INFO - step: 21040 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8810 +[titan] 2025-10-05 11:29:05,770 - root - INFO - lr: 2.6001e-05 gnorm: 1.06 [12:54:55<11:38:19] +[titan] 2025-10-05 11:29:16,625 - root - INFO - step: 21045 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8628 +[titan] 2025-10-05 11:29:16,625 - root - INFO - lr: 2.5992e-05 gnorm: 1.04 [12:55:06<11:38:07] +[titan] 2025-10-05 11:29:25,364 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:29:27,546 - root - INFO - step: 21050 loss: 2.1350 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 11:29:27,546 - root - INFO - lr: 2.5983e-05 gnorm: 1.09 [12:55:17<11:37:56] +[titan] 2025-10-05 11:29:38,415 - root - INFO - step: 21055 loss: 2.0977 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8537 +[titan] 2025-10-05 11:29:38,415 - root - INFO - lr: 2.5974e-05 gnorm: 1.05 [12:55:28<11:37:45] +[titan] 2025-10-05 11:29:49,289 - root - INFO - step: 21060 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8921 +[titan] 2025-10-05 11:29:49,289 - root - INFO - lr: 2.5965e-05 gnorm: 1.09 [12:55:39<11:37:34] +[titan] 2025-10-05 11:30:00,149 - root - INFO - step: 21065 loss: 2.1427 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 11:30:00,149 - root - INFO - lr: 2.5956e-05 gnorm: 1.09 [12:55:50<11:37:23] +[titan] 2025-10-05 11:30:11,032 - root - INFO - step: 21070 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:30:11,032 - root - INFO - lr: 2.5947e-05 gnorm: 1.08 [12:56:00<11:37:11] +[titan] 2025-10-05 11:30:21,932 - root - INFO - step: 21075 loss: 2.1709 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2530 global_avg_mtp_loss: 1.9178 +[titan] 2025-10-05 11:30:21,933 - root - INFO - lr: 2.5939e-05 gnorm: 1.07 [12:56:11<11:37:00] +[titan] 2025-10-05 11:30:32,855 - root - INFO - step: 21080 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.09% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8856 +[titan] 2025-10-05 11:30:32,855 - root - INFO - lr: 2.5930e-05 gnorm: 1.07 [12:56:22<11:36:49] +[titan] 2025-10-05 11:30:43,698 - root - INFO - step: 21085 loss: 2.1181 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:30:43,699 - root - INFO - lr: 2.5921e-05 gnorm: 1.11 [12:56:33<11:36:38] +[titan] 2025-10-05 11:30:54,563 - root - INFO - step: 21090 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 11:30:54,563 - root - INFO - lr: 2.5912e-05 gnorm: 1.03 [12:56:44<11:36:27] +[titan] 2025-10-05 11:31:05,426 - root - INFO - step: 21095 loss: 2.2239 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2603 global_avg_mtp_loss: 1.9636 +[titan] 2025-10-05 11:31:05,427 - root - INFO - lr: 2.5903e-05 gnorm: 1.06 [12:56:55<11:36:15] +[titan] 2025-10-05 11:31:14,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:31:16,304 - root - INFO - step: 21100 loss: 2.0959 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8520 +[titan] 2025-10-05 11:31:16,304 - root - INFO - lr: 2.5894e-05 gnorm: 1.03 [12:57:06<11:36:04] +[titan] 2025-10-05 11:31:27,255 - root - INFO - step: 21105 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.13 mfu: 41.97% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8985 +[titan] 2025-10-05 11:31:27,256 - root - INFO - lr: 2.5885e-05 gnorm: 1.07 [12:57:17<11:35:53] +[titan] 2025-10-05 11:31:38,131 - root - INFO - step: 21110 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8621 +[titan] 2025-10-05 11:31:38,132 - root - INFO - lr: 2.5876e-05 gnorm: 1.06 [12:57:28<11:35:42] +[titan] 2025-10-05 11:31:49,004 - root - INFO - step: 21115 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:31:49,005 - root - INFO - lr: 2.5867e-05 gnorm: 1.07 [12:57:38<11:35:31] +[titan] 2025-10-05 11:31:59,893 - root - INFO - step: 21120 loss: 2.0727 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8315 +[titan] 2025-10-05 11:31:59,893 - root - INFO - lr: 2.5858e-05 gnorm: 1.07 [12:57:49<11:35:19] +[titan] 2025-10-05 11:32:10,768 - root - INFO - step: 21125 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 11:32:10,769 - root - INFO - lr: 2.5850e-05 gnorm: 1.07 [12:58:00<11:35:08] +[titan] 2025-10-05 11:32:21,633 - root - INFO - step: 21130 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8324 +[titan] 2025-10-05 11:32:21,633 - root - INFO - lr: 2.5841e-05 gnorm: 1.05 [12:58:11<11:34:57] +[titan] 2025-10-05 11:32:32,656 - root - INFO - step: 21135 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 29,729 tflops: 412.45 mfu: 41.70% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 11:32:32,656 - root - INFO - lr: 2.5832e-05 gnorm: 1.08 [12:58:22<11:34:46] +[titan] 2025-10-05 11:32:43,550 - root - INFO - step: 21140 loss: 2.1392 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2485 global_avg_mtp_loss: 1.8906 +[titan] 2025-10-05 11:32:43,550 - root - INFO - lr: 2.5823e-05 gnorm: 1.07 [12:58:33<11:34:35] +[titan] 2025-10-05 11:32:54,408 - root - INFO - step: 21145 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 11:32:54,408 - root - INFO - lr: 2.5814e-05 gnorm: 1.06 [12:58:44<11:34:24] +[titan] 2025-10-05 11:33:03,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:33:05,258 - root - INFO - step: 21150 loss: 2.1304 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8836 +[titan] 2025-10-05 11:33:05,258 - root - INFO - lr: 2.5805e-05 gnorm: 1.09 [12:58:55<11:34:12] +[titan] 2025-10-05 11:33:16,124 - root - INFO - step: 21155 loss: 2.1477 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8983 +[titan] 2025-10-05 11:33:16,124 - root - INFO - lr: 2.5796e-05 gnorm: 1.07 [12:59:06<11:34:01] +[titan] 2025-10-05 11:33:27,050 - root - INFO - step: 21160 loss: 2.1767 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9240 +[titan] 2025-10-05 11:33:27,050 - root - INFO - lr: 2.5787e-05 gnorm: 1.06 [12:59:16<11:33:50] +[titan] 2025-10-05 11:33:37,906 - root - INFO - step: 21165 loss: 2.1021 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8583 +[titan] 2025-10-05 11:33:37,906 - root - INFO - lr: 2.5778e-05 gnorm: 1.06 [12:59:27<11:33:39] +[titan] 2025-10-05 11:33:48,805 - root - INFO - step: 21170 loss: 2.1153 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8695 +[titan] 2025-10-05 11:33:48,805 - root - INFO - lr: 2.5769e-05 gnorm: 1.10 [12:59:38<11:33:28] +[titan] 2025-10-05 11:33:59,670 - root - INFO - step: 21175 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8816 +[titan] 2025-10-05 11:33:59,670 - root - INFO - lr: 2.5761e-05 gnorm: 1.05 [12:59:49<11:33:16] +[titan] 2025-10-05 11:34:10,542 - root - INFO - step: 21180 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8645 +[titan] 2025-10-05 11:34:10,542 - root - INFO - lr: 2.5752e-05 gnorm: 1.07 [13:00:00<11:33:05] +[titan] 2025-10-05 11:34:21,425 - root - INFO - step: 21185 loss: 2.0963 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8530 +[titan] 2025-10-05 11:34:21,425 - root - INFO - lr: 2.5743e-05 gnorm: 1.01 [13:00:11<11:32:54] +[titan] 2025-10-05 11:34:32,352 - root - INFO - step: 21190 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8130 +[titan] 2025-10-05 11:34:32,352 - root - INFO - lr: 2.5734e-05 gnorm: 1.08 [13:00:22<11:32:43] +[titan] 2025-10-05 11:34:43,216 - root - INFO - step: 21195 loss: 2.1495 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.9000 +[titan] 2025-10-05 11:34:43,216 - root - INFO - lr: 2.5725e-05 gnorm: 1.04 [13:00:33<11:32:32] +[titan] 2025-10-05 11:34:51,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:34:54,111 - root - INFO - step: 21200 loss: 2.0921 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 11:34:54,111 - root - INFO - lr: 2.5716e-05 gnorm: 1.07 [13:00:44<11:32:20] +[titan] 2025-10-05 11:35:04,964 - root - INFO - step: 21205 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 11:35:04,964 - root - INFO - lr: 2.5707e-05 gnorm: 1.09 [13:00:54<11:32:09] +[titan] 2025-10-05 11:35:15,826 - root - INFO - step: 21210 loss: 2.1528 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2506 global_avg_mtp_loss: 1.9022 +[titan] 2025-10-05 11:35:15,827 - root - INFO - lr: 2.5698e-05 gnorm: 1.09 [13:01:05<11:31:58] +[titan] 2025-10-05 11:35:26,686 - root - INFO - step: 21215 loss: 2.1911 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2537 global_avg_mtp_loss: 1.9374 +[titan] 2025-10-05 11:35:26,686 - root - INFO - lr: 2.5689e-05 gnorm: 1.11 [13:01:16<11:31:47] +[titan] 2025-10-05 11:35:37,615 - root - INFO - step: 21220 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 29,983 tflops: 415.97 mfu: 42.06% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 11:35:37,615 - root - INFO - lr: 2.5680e-05 gnorm: 1.08 [13:01:27<11:31:36] +[titan] 2025-10-05 11:35:48,489 - root - INFO - step: 21225 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 11:35:48,489 - root - INFO - lr: 2.5672e-05 gnorm: 1.11 [13:01:38<11:31:24] +[titan] 2025-10-05 11:35:59,356 - root - INFO - step: 21230 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:35:59,356 - root - INFO - lr: 2.5663e-05 gnorm: 1.08 [13:01:49<11:31:13] +[titan] 2025-10-05 11:36:10,239 - root - INFO - step: 21235 loss: 2.1478 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8980 +[titan] 2025-10-05 11:36:10,239 - root - INFO - lr: 2.5654e-05 gnorm: 1.09 [13:02:00<11:31:02] +[titan] 2025-10-05 11:36:21,092 - root - INFO - step: 21240 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8603 +[titan] 2025-10-05 11:36:21,092 - root - INFO - lr: 2.5645e-05 gnorm: 1.05 [13:02:11<11:30:51] +[titan] 2025-10-05 11:36:32,021 - root - INFO - step: 21245 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 11:36:32,021 - root - INFO - lr: 2.5636e-05 gnorm: 1.09 [13:02:21<11:30:40] +[titan] 2025-10-05 11:36:40,711 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:36:42,889 - root - INFO - step: 21250 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 11:36:42,889 - root - INFO - lr: 2.5627e-05 gnorm: 1.07 [13:02:32<11:30:28] +[titan] 2025-10-05 11:36:53,745 - root - INFO - step: 21255 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 11:36:53,746 - root - INFO - lr: 2.5618e-05 gnorm: 1.05 [13:02:43<11:30:17] +[titan] 2025-10-05 11:37:04,622 - root - INFO - step: 21260 loss: 2.1931 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2552 global_avg_mtp_loss: 1.9379 +[titan] 2025-10-05 11:37:04,622 - root - INFO - lr: 2.5609e-05 gnorm: 1.08 [13:02:54<11:30:06] +[titan] 2025-10-05 11:37:15,535 - root - INFO - step: 21265 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 11:37:15,536 - root - INFO - lr: 2.5600e-05 gnorm: 1.08 [13:03:05<11:29:55] +[titan] 2025-10-05 11:37:26,391 - root - INFO - step: 21270 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 11:37:26,392 - root - INFO - lr: 2.5592e-05 gnorm: 1.05 [13:03:16<11:29:44] +[titan] 2025-10-05 11:37:37,276 - root - INFO - step: 21275 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 11:37:37,276 - root - INFO - lr: 2.5583e-05 gnorm: 1.07 [13:03:27<11:29:32] +[titan] 2025-10-05 11:37:48,150 - root - INFO - step: 21280 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8442 +[titan] 2025-10-05 11:37:48,150 - root - INFO - lr: 2.5574e-05 gnorm: 1.05 [13:03:38<11:29:21] +[titan] 2025-10-05 11:37:59,010 - root - INFO - step: 21285 loss: 2.0891 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 11:37:59,010 - root - INFO - lr: 2.5565e-05 gnorm: 1.07 [13:03:48<11:29:10] +[titan] 2025-10-05 11:38:09,872 - root - INFO - step: 21290 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8545 +[titan] 2025-10-05 11:38:09,872 - root - INFO - lr: 2.5556e-05 gnorm: 1.10 [13:03:59<11:28:59] +[titan] 2025-10-05 11:38:20,741 - root - INFO - step: 21295 loss: 2.1126 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 11:38:20,741 - root - INFO - lr: 2.5547e-05 gnorm: 1.12 [13:04:10<11:28:48] +[titan] 2025-10-05 11:38:29,453 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:38:31,672 - root - INFO - step: 21300 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9020 +[titan] 2025-10-05 11:38:31,672 - root - INFO - lr: 2.5538e-05 gnorm: 1.05 [13:04:21<11:28:36] +[titan] 2025-10-05 11:38:42,540 - root - INFO - step: 21305 loss: 2.1191 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:38:42,540 - root - INFO - lr: 2.5529e-05 gnorm: 1.08 [13:04:32<11:28:25] +[titan] 2025-10-05 11:38:53,411 - root - INFO - step: 21310 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8443 +[titan] 2025-10-05 11:38:53,411 - root - INFO - lr: 2.5520e-05 gnorm: 1.07 [13:04:43<11:28:14] +[titan] 2025-10-05 11:39:04,301 - root - INFO - step: 21315 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8790 +[titan] 2025-10-05 11:39:04,301 - root - INFO - lr: 2.5511e-05 gnorm: 1.10 [13:04:54<11:28:03] +[titan] 2025-10-05 11:39:15,170 - root - INFO - step: 21320 loss: 2.1232 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8772 +[titan] 2025-10-05 11:39:15,170 - root - INFO - lr: 2.5503e-05 gnorm: 1.06 [13:05:05<11:27:52] +[titan] 2025-10-05 11:39:26,035 - root - INFO - step: 21325 loss: 2.1518 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2510 global_avg_mtp_loss: 1.9009 +[titan] 2025-10-05 11:39:26,035 - root - INFO - lr: 2.5494e-05 gnorm: 1.08 [13:05:15<11:27:40] +[titan] 2025-10-05 11:39:36,994 - root - INFO - step: 21330 loss: 2.1464 memory: 118.84GiB(85.28%) tps: 29,901 tflops: 414.82 mfu: 41.94% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8956 +[titan] 2025-10-05 11:39:36,995 - root - INFO - lr: 2.5485e-05 gnorm: 1.06 [13:05:26<11:27:29] +[titan] 2025-10-05 11:39:47,849 - root - INFO - step: 21335 loss: 2.1553 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.9061 +[titan] 2025-10-05 11:39:47,849 - root - INFO - lr: 2.5476e-05 gnorm: 1.03 [13:05:37<11:27:18] +[titan] 2025-10-05 11:39:58,709 - root - INFO - step: 21340 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 11:39:58,709 - root - INFO - lr: 2.5467e-05 gnorm: 1.07 [13:05:48<11:27:07] +[titan] 2025-10-05 11:40:09,576 - root - INFO - step: 21345 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 11:40:09,576 - root - INFO - lr: 2.5458e-05 gnorm: 1.05 [13:05:59<11:26:56] +[titan] 2025-10-05 11:40:18,258 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:40:20,450 - root - INFO - step: 21350 loss: 2.1943 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2549 global_avg_mtp_loss: 1.9393 +[titan] 2025-10-05 11:40:20,450 - root - INFO - lr: 2.5449e-05 gnorm: 1.09 [13:06:10<11:26:45] +[titan] 2025-10-05 11:40:31,323 - root - INFO - step: 21355 loss: 2.1745 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2524 global_avg_mtp_loss: 1.9222 +[titan] 2025-10-05 11:40:31,323 - root - INFO - lr: 2.5440e-05 gnorm: 1.11 [13:06:21<11:26:33] +[titan] 2025-10-05 11:40:42,303 - root - INFO - step: 21360 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 29,846 tflops: 414.06 mfu: 41.87% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:40:42,303 - root - INFO - lr: 2.5431e-05 gnorm: 1.09 [13:06:32<11:26:22] +[titan] 2025-10-05 11:40:53,190 - root - INFO - step: 21365 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 11:40:53,190 - root - INFO - lr: 2.5423e-05 gnorm: 1.04 [13:06:43<11:26:11] +[titan] 2025-10-05 11:41:04,057 - root - INFO - step: 21370 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8252 +[titan] 2025-10-05 11:41:04,057 - root - INFO - lr: 2.5414e-05 gnorm: 1.06 [13:06:53<11:26:00] +[titan] 2025-10-05 11:41:14,914 - root - INFO - step: 21375 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 11:41:14,914 - root - INFO - lr: 2.5405e-05 gnorm: 1.05 [13:07:04<11:25:49] +[titan] 2025-10-05 11:41:25,788 - root - INFO - step: 21380 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8950 +[titan] 2025-10-05 11:41:25,788 - root - INFO - lr: 2.5396e-05 gnorm: 1.08 [13:07:15<11:25:37] +[titan] 2025-10-05 11:41:36,680 - root - INFO - step: 21385 loss: 2.1162 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8705 +[titan] 2025-10-05 11:41:36,680 - root - INFO - lr: 2.5387e-05 gnorm: 1.06 [13:07:26<11:25:26] +[titan] 2025-10-05 11:41:47,564 - root - INFO - step: 21390 loss: 2.0660 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 11:41:47,564 - root - INFO - lr: 2.5378e-05 gnorm: 1.06 [13:07:37<11:25:15] +[titan] 2025-10-05 11:41:58,477 - root - INFO - step: 21395 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:41:58,477 - root - INFO - lr: 2.5369e-05 gnorm: 1.05 [13:07:48<11:25:04] +[titan] 2025-10-05 11:42:07,157 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:42:09,346 - root - INFO - step: 21400 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 11:42:09,346 - root - INFO - lr: 2.5360e-05 gnorm: 1.06 [13:07:59<11:24:53] +[titan] 2025-10-05 11:42:20,225 - root - INFO - step: 21405 loss: 2.1729 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:42:20,226 - root - INFO - lr: 2.5352e-05 gnorm: 1.09 [13:08:10<11:24:41] +[titan] 2025-10-05 11:42:31,111 - root - INFO - step: 21410 loss: 2.1240 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8769 +[titan] 2025-10-05 11:42:31,112 - root - INFO - lr: 2.5343e-05 gnorm: 1.12 [13:08:21<11:24:30] +[titan] 2025-10-05 11:42:42,010 - root - INFO - step: 21415 loss: 2.0961 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8538 +[titan] 2025-10-05 11:42:42,011 - root - INFO - lr: 2.5334e-05 gnorm: 1.06 [13:08:31<11:24:19] +[titan] 2025-10-05 11:42:52,881 - root - INFO - step: 21420 loss: 2.1163 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8712 +[titan] 2025-10-05 11:42:52,881 - root - INFO - lr: 2.5325e-05 gnorm: 1.06 [13:08:42<11:24:08] +[titan] 2025-10-05 11:43:03,753 - root - INFO - step: 21425 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8953 +[titan] 2025-10-05 11:43:03,753 - root - INFO - lr: 2.5316e-05 gnorm: 1.05 [13:08:53<11:23:57] +[titan] 2025-10-05 11:43:14,617 - root - INFO - step: 21430 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8631 +[titan] 2025-10-05 11:43:14,617 - root - INFO - lr: 2.5307e-05 gnorm: 1.07 [13:09:04<11:23:46] +[titan] 2025-10-05 11:43:25,474 - root - INFO - step: 21435 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 11:43:25,474 - root - INFO - lr: 2.5298e-05 gnorm: 1.04 [13:09:15<11:23:34] +[titan] 2025-10-05 11:43:36,449 - root - INFO - step: 21440 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 29,859 tflops: 414.24 mfu: 41.88% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 11:43:36,449 - root - INFO - lr: 2.5289e-05 gnorm: 1.05 [13:09:26<11:23:23] +[titan] 2025-10-05 11:43:47,314 - root - INFO - step: 21445 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 11:43:47,314 - root - INFO - lr: 2.5280e-05 gnorm: 1.04 [13:09:37<11:23:12] +[titan] 2025-10-05 11:43:56,018 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:43:58,209 - root - INFO - step: 21450 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 11:43:58,209 - root - INFO - lr: 2.5272e-05 gnorm: 1.06 [13:09:48<11:23:01] +[titan] 2025-10-05 11:44:09,061 - root - INFO - step: 21455 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 11:44:09,062 - root - INFO - lr: 2.5263e-05 gnorm: 1.08 [13:09:58<11:22:50] +[titan] 2025-10-05 11:44:19,965 - root - INFO - step: 21460 loss: 2.1648 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2528 global_avg_mtp_loss: 1.9120 +[titan] 2025-10-05 11:44:19,966 - root - INFO - lr: 2.5254e-05 gnorm: 1.07 [13:10:09<11:22:38] +[titan] 2025-10-05 11:44:30,808 - root - INFO - step: 21465 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8514 +[titan] 2025-10-05 11:44:30,808 - root - INFO - lr: 2.5245e-05 gnorm: 1.04 [13:10:20<11:22:27] +[titan] 2025-10-05 11:44:41,706 - root - INFO - step: 21470 loss: 2.1490 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2495 global_avg_mtp_loss: 1.8995 +[titan] 2025-10-05 11:44:41,706 - root - INFO - lr: 2.5236e-05 gnorm: 1.08 [13:10:31<11:22:16] +[titan] 2025-10-05 11:44:52,552 - root - INFO - step: 21475 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8376 +[titan] 2025-10-05 11:44:52,552 - root - INFO - lr: 2.5227e-05 gnorm: 1.04 [13:10:42<11:22:05] +[titan] 2025-10-05 11:45:03,391 - root - INFO - step: 21480 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8745 +[titan] 2025-10-05 11:45:03,391 - root - INFO - lr: 2.5218e-05 gnorm: 1.07 [13:10:53<11:21:54] +[titan] 2025-10-05 11:45:14,218 - root - INFO - step: 21485 loss: 2.1182 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 11:45:14,218 - root - INFO - lr: 2.5209e-05 gnorm: 1.09 [13:11:04<11:21:42] +[titan] 2025-10-05 11:45:25,127 - root - INFO - step: 21490 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 11:45:25,127 - root - INFO - lr: 2.5201e-05 gnorm: 1.06 [13:11:15<11:21:31] +[titan] 2025-10-05 11:45:35,950 - root - INFO - step: 21495 loss: 2.1076 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8629 +[titan] 2025-10-05 11:45:35,951 - root - INFO - lr: 2.5192e-05 gnorm: 1.05 [13:11:25<11:21:20] +[titan] 2025-10-05 11:45:44,641 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:45:46,815 - root - INFO - step: 21500 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9073 +[titan] 2025-10-05 11:45:46,815 - root - INFO - lr: 2.5183e-05 gnorm: 1.08 [13:11:36<11:21:09] +[titan] 2025-10-05 11:45:55,765 - root - INFO - Dumping profiler traces at step 21504 +[titan] 2025-10-05 11:45:55,806 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 11:45:57,977 - root - INFO - step: 21505 loss: 2.1378 memory: 118.84GiB(85.28%) tps: 29,357 tflops: 407.29 mfu: 41.18% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8896 +[titan] 2025-10-05 11:45:57,977 - root - INFO - lr: 2.5174e-05 gnorm: 1.10 [13:11:47<11:20:58] +[titan] 2025-10-05 11:46:08,810 - root - INFO - step: 21510 loss: 2.1100 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8653 +[titan] 2025-10-05 11:46:08,810 - root - INFO - lr: 2.5165e-05 gnorm: 1.08 [13:11:58<11:20:47] +[titan] 2025-10-05 11:46:19,644 - root - INFO - step: 21515 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:46:19,645 - root - INFO - lr: 2.5156e-05 gnorm: 1.05 [13:12:09<11:20:35] +[titan] 2025-10-05 11:46:30,518 - root - INFO - step: 21520 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 11:46:30,518 - root - INFO - lr: 2.5147e-05 gnorm: 1.08 [13:12:20<11:20:24] +[titan] 2025-10-05 11:46:41,409 - root - INFO - step: 21525 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 11:46:41,409 - root - INFO - lr: 2.5138e-05 gnorm: 1.08 [13:12:31<11:20:13] +[titan] 2025-10-05 11:46:52,228 - root - INFO - step: 21530 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 11:46:52,228 - root - INFO - lr: 2.5130e-05 gnorm: 1.06 [13:12:42<11:20:02] +[titan] 2025-10-05 11:47:03,059 - root - INFO - step: 21535 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 11:47:03,059 - root - INFO - lr: 2.5121e-05 gnorm: 1.03 [13:12:52<11:19:51] +[titan] 2025-10-05 11:47:13,907 - root - INFO - step: 21540 loss: 2.1549 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9044 +[titan] 2025-10-05 11:47:13,907 - root - INFO - lr: 2.5112e-05 gnorm: 1.09 [13:13:03<11:19:39] +[titan] 2025-10-05 11:47:24,716 - root - INFO - step: 21545 loss: 2.1223 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8763 +[titan] 2025-10-05 11:47:24,717 - root - INFO - lr: 2.5103e-05 gnorm: 1.07 [13:13:14<11:19:28] +[titan] 2025-10-05 11:47:33,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:47:35,549 - root - INFO - step: 21550 loss: 2.1493 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.8994 +[titan] 2025-10-05 11:47:35,549 - root - INFO - lr: 2.5094e-05 gnorm: 1.05 [13:13:25<11:19:17] +[titan] 2025-10-05 11:47:46,489 - root - INFO - step: 21555 loss: 2.0469 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 11:47:46,490 - root - INFO - lr: 2.5085e-05 gnorm: 1.04 [13:13:36<11:19:06] +[titan] 2025-10-05 11:47:57,291 - root - INFO - step: 21560 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8649 +[titan] 2025-10-05 11:47:57,291 - root - INFO - lr: 2.5076e-05 gnorm: 1.08 [13:13:47<11:18:54] +[titan] 2025-10-05 11:48:08,089 - root - INFO - step: 21565 loss: 2.0826 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 11:48:08,090 - root - INFO - lr: 2.5067e-05 gnorm: 1.06 [13:13:57<11:18:43] +[titan] 2025-10-05 11:48:18,889 - root - INFO - step: 21570 loss: 2.1013 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8571 +[titan] 2025-10-05 11:48:18,889 - root - INFO - lr: 2.5059e-05 gnorm: 1.09 [13:14:08<11:18:32] +[titan] 2025-10-05 11:48:29,708 - root - INFO - step: 21575 loss: 2.1425 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:48:29,709 - root - INFO - lr: 2.5050e-05 gnorm: 1.06 [13:14:19<11:18:21] +[titan] 2025-10-05 11:48:40,539 - root - INFO - step: 21580 loss: 2.1380 memory: 118.84GiB(85.28%) tps: 30,256 tflops: 419.76 mfu: 42.44% global_avg_ntp_loss: 0.2486 global_avg_mtp_loss: 1.8894 +[titan] 2025-10-05 11:48:40,539 - root - INFO - lr: 2.5041e-05 gnorm: 1.11 [13:14:30<11:18:09] +[titan] 2025-10-05 11:48:51,410 - root - INFO - step: 21585 loss: 2.0951 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 11:48:51,410 - root - INFO - lr: 2.5032e-05 gnorm: 1.06 [13:14:41<11:17:58] +[titan] 2025-10-05 11:49:02,256 - root - INFO - step: 21590 loss: 2.1780 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2527 global_avg_mtp_loss: 1.9253 +[titan] 2025-10-05 11:49:02,256 - root - INFO - lr: 2.5023e-05 gnorm: 1.12 [13:14:52<11:17:47] +[titan] 2025-10-05 11:49:13,089 - root - INFO - step: 21595 loss: 2.1172 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 11:49:13,089 - root - INFO - lr: 2.5014e-05 gnorm: 1.10 [13:15:02<11:17:36] +[titan] 2025-10-05 11:49:21,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:49:23,936 - root - INFO - step: 21600 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 11:49:23,936 - root - INFO - lr: 2.5005e-05 gnorm: 1.09 [13:15:13<11:17:25] +[titan] 2025-10-05 11:49:34,750 - root - INFO - step: 21605 loss: 2.1314 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.39 mfu: 42.51% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8838 +[titan] 2025-10-05 11:49:34,751 - root - INFO - lr: 2.4996e-05 gnorm: 1.08 [13:15:24<11:17:13] +[titan] 2025-10-05 11:49:45,562 - root - INFO - step: 21610 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8037 +[titan] 2025-10-05 11:49:45,563 - root - INFO - lr: 2.4988e-05 gnorm: 1.02 [13:15:35<11:17:02] +[titan] 2025-10-05 11:49:56,369 - root - INFO - step: 21615 loss: 2.1371 memory: 118.84GiB(85.28%) tps: 30,322 tflops: 420.67 mfu: 42.54% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8883 +[titan] 2025-10-05 11:49:56,370 - root - INFO - lr: 2.4979e-05 gnorm: 1.04 [13:15:46<11:16:51] +[titan] 2025-10-05 11:50:07,237 - root - INFO - step: 21620 loss: 2.1415 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8935 +[titan] 2025-10-05 11:50:07,237 - root - INFO - lr: 2.4970e-05 gnorm: 1.05 [13:15:57<11:16:40] +[titan] 2025-10-05 11:50:18,053 - root - INFO - step: 21625 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8344 +[titan] 2025-10-05 11:50:18,053 - root - INFO - lr: 2.4961e-05 gnorm: 1.06 [13:16:07<11:16:28] +[titan] 2025-10-05 11:50:28,850 - root - INFO - step: 21630 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.05 mfu: 42.57% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 11:50:28,851 - root - INFO - lr: 2.4952e-05 gnorm: 1.04 [13:16:18<11:16:17] +[titan] 2025-10-05 11:50:39,656 - root - INFO - step: 21635 loss: 2.0898 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 11:50:39,656 - root - INFO - lr: 2.4943e-05 gnorm: 1.09 [13:16:29<11:16:06] +[titan] 2025-10-05 11:50:50,529 - root - INFO - step: 21640 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8761 +[titan] 2025-10-05 11:50:50,529 - root - INFO - lr: 2.4934e-05 gnorm: 1.06 [13:16:40<11:15:55] +[titan] 2025-10-05 11:51:01,328 - root - INFO - step: 21645 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 11:51:01,329 - root - INFO - lr: 2.4926e-05 gnorm: 1.04 [13:16:51<11:15:43] +[titan] 2025-10-05 11:51:09,997 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:51:12,168 - root - INFO - step: 21650 loss: 2.1489 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8991 +[titan] 2025-10-05 11:51:12,168 - root - INFO - lr: 2.4917e-05 gnorm: 1.07 [13:17:02<11:15:32] +[titan] 2025-10-05 11:51:23,012 - root - INFO - step: 21655 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2511 global_avg_mtp_loss: 1.9070 +[titan] 2025-10-05 11:51:23,012 - root - INFO - lr: 2.4908e-05 gnorm: 1.06 [13:17:12<11:15:21] +[titan] 2025-10-05 11:51:33,829 - root - INFO - step: 21660 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 11:51:33,830 - root - INFO - lr: 2.4899e-05 gnorm: 1.06 [13:17:23<11:15:10] +[titan] 2025-10-05 11:51:44,687 - root - INFO - step: 21665 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 11:51:44,687 - root - INFO - lr: 2.4890e-05 gnorm: 1.03 [13:17:34<11:14:59] +[titan] 2025-10-05 11:51:55,529 - root - INFO - step: 21670 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 11:51:55,529 - root - INFO - lr: 2.4881e-05 gnorm: 1.04 [13:17:45<11:14:47] +[titan] 2025-10-05 11:52:06,368 - root - INFO - step: 21675 loss: 2.1473 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2500 global_avg_mtp_loss: 1.8973 +[titan] 2025-10-05 11:52:06,368 - root - INFO - lr: 2.4872e-05 gnorm: 1.05 [13:17:56<11:14:36] +[titan] 2025-10-05 11:52:17,248 - root - INFO - step: 21680 loss: 2.0964 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8526 +[titan] 2025-10-05 11:52:17,248 - root - INFO - lr: 2.4863e-05 gnorm: 1.08 [13:18:07<11:14:25] +[titan] 2025-10-05 11:52:28,077 - root - INFO - step: 21685 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8691 +[titan] 2025-10-05 11:52:28,077 - root - INFO - lr: 2.4855e-05 gnorm: 1.04 [13:18:17<11:14:14] +[titan] 2025-10-05 11:52:38,897 - root - INFO - step: 21690 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 11:52:38,897 - root - INFO - lr: 2.4846e-05 gnorm: 1.12 [13:18:28<11:14:03] +[titan] 2025-10-05 11:52:49,731 - root - INFO - step: 21695 loss: 2.1723 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2534 global_avg_mtp_loss: 1.9189 +[titan] 2025-10-05 11:52:49,731 - root - INFO - lr: 2.4837e-05 gnorm: 1.14 [13:18:39<11:13:51] +[titan] 2025-10-05 11:52:58,383 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:53:00,557 - root - INFO - step: 21700 loss: 2.0942 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 11:53:00,558 - root - INFO - lr: 2.4828e-05 gnorm: 1.04 [13:18:50<11:13:40] +[titan] 2025-10-05 11:53:11,384 - root - INFO - step: 21705 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8616 +[titan] 2025-10-05 11:53:11,384 - root - INFO - lr: 2.4819e-05 gnorm: 1.01 [13:19:01<11:13:29] +[titan] 2025-10-05 11:53:22,180 - root - INFO - step: 21710 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,354 tflops: 421.11 mfu: 42.58% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 11:53:22,180 - root - INFO - lr: 2.4810e-05 gnorm: 1.08 [13:19:12<11:13:18] +[titan] 2025-10-05 11:53:33,006 - root - INFO - step: 21715 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 11:53:33,006 - root - INFO - lr: 2.4801e-05 gnorm: 1.07 [13:19:22<11:13:06] +[titan] 2025-10-05 11:53:43,863 - root - INFO - step: 21720 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8820 +[titan] 2025-10-05 11:53:43,863 - root - INFO - lr: 2.4793e-05 gnorm: 1.07 [13:19:33<11:12:55] +[titan] 2025-10-05 11:53:54,726 - root - INFO - step: 21725 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 11:53:54,726 - root - INFO - lr: 2.4784e-05 gnorm: 1.07 [13:19:44<11:12:44] +[titan] 2025-10-05 11:54:05,529 - root - INFO - step: 21730 loss: 2.2023 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2564 global_avg_mtp_loss: 1.9459 +[titan] 2025-10-05 11:54:05,529 - root - INFO - lr: 2.4775e-05 gnorm: 1.10 [13:19:55<11:12:33] +[titan] 2025-10-05 11:54:16,329 - root - INFO - step: 21735 loss: 2.1062 memory: 118.84GiB(85.28%) tps: 30,341 tflops: 420.94 mfu: 42.56% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8624 +[titan] 2025-10-05 11:54:16,329 - root - INFO - lr: 2.4766e-05 gnorm: 1.08 [13:20:06<11:12:21] +[titan] 2025-10-05 11:54:27,148 - root - INFO - step: 21740 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.19 mfu: 42.49% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9108 +[titan] 2025-10-05 11:54:27,149 - root - INFO - lr: 2.4757e-05 gnorm: 1.08 [13:20:17<11:12:10] +[titan] 2025-10-05 11:54:38,009 - root - INFO - step: 21745 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 11:54:38,009 - root - INFO - lr: 2.4748e-05 gnorm: 1.09 [13:20:27<11:11:59] +[titan] 2025-10-05 11:54:46,700 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:54:48,886 - root - INFO - step: 21750 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8871 +[titan] 2025-10-05 11:54:48,886 - root - INFO - lr: 2.4739e-05 gnorm: 1.11 [13:20:38<11:11:48] +[titan] 2025-10-05 11:54:59,687 - root - INFO - step: 21755 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 11:54:59,688 - root - INFO - lr: 2.4731e-05 gnorm: 1.03 [13:20:49<11:11:37] +[titan] 2025-10-05 11:55:10,503 - root - INFO - step: 21760 loss: 2.0855 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:55:10,504 - root - INFO - lr: 2.4722e-05 gnorm: 1.08 [13:21:00<11:11:25] +[titan] 2025-10-05 11:55:21,303 - root - INFO - step: 21765 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 11:55:21,303 - root - INFO - lr: 2.4713e-05 gnorm: 1.06 [13:21:11<11:11:14] +[titan] 2025-10-05 11:55:32,128 - root - INFO - step: 21770 loss: 2.0394 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 11:55:32,129 - root - INFO - lr: 2.4704e-05 gnorm: 1.07 [13:21:21<11:11:03] +[titan] 2025-10-05 11:55:42,948 - root - INFO - step: 21775 loss: 2.1278 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 11:55:42,948 - root - INFO - lr: 2.4695e-05 gnorm: 1.13 [13:21:32<11:10:52] +[titan] 2025-10-05 11:55:53,849 - root - INFO - step: 21780 loss: 2.1107 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 11:55:53,849 - root - INFO - lr: 2.4686e-05 gnorm: 1.06 [13:21:43<11:10:41] +[titan] 2025-10-05 11:56:04,670 - root - INFO - step: 21785 loss: 2.1221 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8765 +[titan] 2025-10-05 11:56:04,670 - root - INFO - lr: 2.4677e-05 gnorm: 1.11 [13:21:54<11:10:29] +[titan] 2025-10-05 11:56:15,465 - root - INFO - step: 21790 loss: 2.1667 memory: 118.84GiB(85.28%) tps: 30,355 tflops: 421.13 mfu: 42.58% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9148 +[titan] 2025-10-05 11:56:15,465 - root - INFO - lr: 2.4669e-05 gnorm: 1.08 [13:22:05<11:10:18] +[titan] 2025-10-05 11:56:26,269 - root - INFO - step: 21795 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 11:56:26,269 - root - INFO - lr: 2.4660e-05 gnorm: 1.04 [13:22:16<11:10:07] +[titan] 2025-10-05 11:56:34,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:56:37,050 - root - INFO - step: 21800 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8786 +[titan] 2025-10-05 11:56:37,050 - root - INFO - lr: 2.4651e-05 gnorm: 1.03 [13:22:26<11:09:56] +[titan] 2025-10-05 11:56:47,848 - root - INFO - step: 21805 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8423 +[titan] 2025-10-05 11:56:47,849 - root - INFO - lr: 2.4642e-05 gnorm: 1.06 [13:22:37<11:09:44] +[titan] 2025-10-05 11:56:58,686 - root - INFO - step: 21810 loss: 2.0632 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 11:56:58,686 - root - INFO - lr: 2.4633e-05 gnorm: 1.08 [13:22:48<11:09:33] +[titan] 2025-10-05 11:57:09,468 - root - INFO - step: 21815 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,391 tflops: 421.63 mfu: 42.63% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 11:57:09,469 - root - INFO - lr: 2.4624e-05 gnorm: 1.04 [13:22:59<11:09:22] +[titan] 2025-10-05 11:57:20,268 - root - INFO - step: 21820 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.97 mfu: 42.57% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 11:57:20,268 - root - INFO - lr: 2.4615e-05 gnorm: 1.06 [13:23:10<11:09:11] +[titan] 2025-10-05 11:57:31,069 - root - INFO - step: 21825 loss: 2.0588 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 11:57:31,069 - root - INFO - lr: 2.4607e-05 gnorm: 1.03 [13:23:20<11:08:59] +[titan] 2025-10-05 11:57:41,865 - root - INFO - step: 21830 loss: 2.1085 memory: 118.84GiB(85.28%) tps: 30,353 tflops: 421.10 mfu: 42.58% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 11:57:41,865 - root - INFO - lr: 2.4598e-05 gnorm: 1.03 [13:23:31<11:08:48] +[titan] 2025-10-05 11:57:52,686 - root - INFO - step: 21835 loss: 2.1336 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8854 +[titan] 2025-10-05 11:57:52,687 - root - INFO - lr: 2.4589e-05 gnorm: 1.03 [13:23:42<11:08:37] +[titan] 2025-10-05 11:58:03,531 - root - INFO - step: 21840 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8427 +[titan] 2025-10-05 11:58:03,532 - root - INFO - lr: 2.4580e-05 gnorm: 1.05 [13:23:53<11:08:26] +[titan] 2025-10-05 11:58:14,308 - root - INFO - step: 21845 loss: 2.1699 memory: 118.84GiB(85.28%) tps: 30,407 tflops: 421.85 mfu: 42.65% global_avg_ntp_loss: 0.2526 global_avg_mtp_loss: 1.9173 +[titan] 2025-10-05 11:58:14,309 - root - INFO - lr: 2.4571e-05 gnorm: 1.09 [13:24:04<11:08:14] +[titan] 2025-10-05 11:58:22,938 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 11:58:25,109 - root - INFO - step: 21850 loss: 2.1179 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8727 +[titan] 2025-10-05 11:58:25,110 - root - INFO - lr: 2.4562e-05 gnorm: 1.08 [13:24:14<11:08:03] +[titan] 2025-10-05 11:58:35,880 - root - INFO - step: 21855 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,424 tflops: 422.09 mfu: 42.68% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 11:58:35,880 - root - INFO - lr: 2.4554e-05 gnorm: 1.08 [13:24:25<11:07:52] +[titan] 2025-10-05 11:58:46,714 - root - INFO - step: 21860 loss: 2.0956 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8532 +[titan] 2025-10-05 11:58:46,714 - root - INFO - lr: 2.4545e-05 gnorm: 1.03 [13:24:36<11:07:41] +[titan] 2025-10-05 11:58:57,569 - root - INFO - step: 21865 loss: 2.1483 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.8989 +[titan] 2025-10-05 11:58:57,569 - root - INFO - lr: 2.4536e-05 gnorm: 1.07 [13:24:47<11:07:29] +[titan] 2025-10-05 11:59:08,390 - root - INFO - step: 21870 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 11:59:08,390 - root - INFO - lr: 2.4527e-05 gnorm: 1.04 [13:24:58<11:07:18] +[titan] 2025-10-05 11:59:19,246 - root - INFO - step: 21875 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8438 +[titan] 2025-10-05 11:59:19,246 - root - INFO - lr: 2.4518e-05 gnorm: 1.06 [13:25:09<11:07:07] +[titan] 2025-10-05 11:59:30,047 - root - INFO - step: 21880 loss: 2.0852 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 11:59:30,047 - root - INFO - lr: 2.4509e-05 gnorm: 1.08 [13:25:19<11:06:56] +[titan] 2025-10-05 11:59:40,863 - root - INFO - step: 21885 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 11:59:40,863 - root - INFO - lr: 2.4500e-05 gnorm: 1.05 [13:25:30<11:06:45] +[titan] 2025-10-05 11:59:51,744 - root - INFO - step: 21890 loss: 2.1740 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9204 +[titan] 2025-10-05 11:59:51,744 - root - INFO - lr: 2.4492e-05 gnorm: 1.10 [13:25:41<11:06:33] +[titan] 2025-10-05 12:00:02,570 - root - INFO - step: 21895 loss: 2.2128 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.96 mfu: 42.46% global_avg_ntp_loss: 0.2579 global_avg_mtp_loss: 1.9549 +[titan] 2025-10-05 12:00:02,570 - root - INFO - lr: 2.4483e-05 gnorm: 1.10 [13:25:52<11:06:22] +[titan] 2025-10-05 12:00:11,225 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:00:13,387 - root - INFO - step: 21900 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 12:00:13,387 - root - INFO - lr: 2.4474e-05 gnorm: 1.04 [13:26:03<11:06:11] +[titan] 2025-10-05 12:00:24,246 - root - INFO - step: 21905 loss: 2.1321 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 12:00:24,246 - root - INFO - lr: 2.4465e-05 gnorm: 1.05 [13:26:14<11:06:00] +[titan] 2025-10-05 12:00:35,064 - root - INFO - step: 21910 loss: 2.1188 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.24 mfu: 42.49% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8714 +[titan] 2025-10-05 12:00:35,064 - root - INFO - lr: 2.4456e-05 gnorm: 1.06 [13:26:24<11:05:49] +[titan] 2025-10-05 12:00:45,889 - root - INFO - step: 21915 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:00:45,889 - root - INFO - lr: 2.4447e-05 gnorm: 1.07 [13:26:35<11:05:37] +[titan] 2025-10-05 12:00:56,747 - root - INFO - step: 21920 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 12:00:56,748 - root - INFO - lr: 2.4439e-05 gnorm: 1.11 [13:26:46<11:05:26] +[titan] 2025-10-05 12:01:07,566 - root - INFO - step: 21925 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:01:07,567 - root - INFO - lr: 2.4430e-05 gnorm: 1.06 [13:26:57<11:05:15] +[titan] 2025-10-05 12:01:18,394 - root - INFO - step: 21930 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 12:01:18,394 - root - INFO - lr: 2.4421e-05 gnorm: 1.08 [13:27:08<11:05:04] +[titan] 2025-10-05 12:01:29,213 - root - INFO - step: 21935 loss: 2.1999 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2551 global_avg_mtp_loss: 1.9448 +[titan] 2025-10-05 12:01:29,213 - root - INFO - lr: 2.4412e-05 gnorm: 1.05 [13:27:19<11:04:52] +[titan] 2025-10-05 12:01:40,068 - root - INFO - step: 21940 loss: 2.1256 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8782 +[titan] 2025-10-05 12:01:40,068 - root - INFO - lr: 2.4403e-05 gnorm: 1.06 [13:27:29<11:04:41] +[titan] 2025-10-05 12:01:50,925 - root - INFO - step: 21945 loss: 2.1040 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8614 +[titan] 2025-10-05 12:01:50,925 - root - INFO - lr: 2.4394e-05 gnorm: 1.09 [13:27:40<11:04:30] +[titan] 2025-10-05 12:01:59,596 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:02:01,768 - root - INFO - step: 21950 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:02:01,768 - root - INFO - lr: 2.4385e-05 gnorm: 1.08 [13:27:51<11:04:19] +[titan] 2025-10-05 12:02:12,595 - root - INFO - step: 21955 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.89 mfu: 42.46% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8121 +[titan] 2025-10-05 12:02:12,595 - root - INFO - lr: 2.4377e-05 gnorm: 1.04 [13:28:02<11:04:08] +[titan] 2025-10-05 12:02:23,415 - root - INFO - step: 21960 loss: 2.0883 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8460 +[titan] 2025-10-05 12:02:23,415 - root - INFO - lr: 2.4368e-05 gnorm: 1.02 [13:28:13<11:03:56] +[titan] 2025-10-05 12:02:34,233 - root - INFO - step: 21965 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:02:34,234 - root - INFO - lr: 2.4359e-05 gnorm: 1.07 [13:28:24<11:03:45] +[titan] 2025-10-05 12:02:45,129 - root - INFO - step: 21970 loss: 2.1130 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8679 +[titan] 2025-10-05 12:02:45,129 - root - INFO - lr: 2.4350e-05 gnorm: 1.06 [13:28:34<11:03:34] +[titan] 2025-10-05 12:02:56,069 - root - INFO - step: 21975 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8574 +[titan] 2025-10-05 12:02:56,069 - root - INFO - lr: 2.4341e-05 gnorm: 1.04 [13:28:45<11:03:23] +[titan] 2025-10-05 12:03:06,899 - root - INFO - step: 21980 loss: 2.1654 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9135 +[titan] 2025-10-05 12:03:06,899 - root - INFO - lr: 2.4332e-05 gnorm: 1.08 [13:28:56<11:03:12] +[titan] 2025-10-05 12:03:17,738 - root - INFO - step: 21985 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:03:17,738 - root - INFO - lr: 2.4324e-05 gnorm: 1.09 [13:29:07<11:03:00] +[titan] 2025-10-05 12:03:28,567 - root - INFO - step: 21990 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8203 +[titan] 2025-10-05 12:03:28,567 - root - INFO - lr: 2.4315e-05 gnorm: 1.02 [13:29:18<11:02:49] +[titan] 2025-10-05 12:03:39,369 - root - INFO - step: 21995 loss: 2.1137 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 12:03:39,369 - root - INFO - lr: 2.4306e-05 gnorm: 1.06 [13:29:29<11:02:38] +[titan] 2025-10-05 12:03:48,026 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:03:50,242 - root - INFO - step: 22000 loss: 2.1265 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:03:50,242 - root - INFO - lr: 2.4297e-05 gnorm: 1.08 [13:29:40<11:02:27] +[titan] 2025-10-05 12:04:01,127 - root - INFO - step: 22005 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:04:01,127 - root - INFO - lr: 2.4288e-05 gnorm: 1.01 [13:29:50<11:02:16] +[titan] 2025-10-05 12:04:11,950 - root - INFO - step: 22010 loss: 2.1030 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8594 +[titan] 2025-10-05 12:04:11,950 - root - INFO - lr: 2.4279e-05 gnorm: 1.06 [13:30:01<11:02:04] +[titan] 2025-10-05 12:04:22,889 - root - INFO - step: 22015 loss: 2.0810 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:04:22,889 - root - INFO - lr: 2.4271e-05 gnorm: 1.07 [13:30:12<11:01:53] +[titan] 2025-10-05 12:04:25,240 - root - INFO - Dumping profiler traces at step 22016 +[titan] 2025-10-05 12:04:25,280 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:04:33,972 - root - INFO - step: 22020 loss: 2.1387 memory: 118.84GiB(85.28%) tps: 29,566 tflops: 410.19 mfu: 41.48% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.8891 +[titan] 2025-10-05 12:04:33,972 - root - INFO - lr: 2.4262e-05 gnorm: 1.03 [13:30:23<11:01:42] +[titan] 2025-10-05 12:04:44,810 - root - INFO - step: 22025 loss: 2.1465 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8978 +[titan] 2025-10-05 12:04:44,810 - root - INFO - lr: 2.4253e-05 gnorm: 1.08 [13:30:34<11:01:31] +[titan] 2025-10-05 12:04:55,694 - root - INFO - step: 22030 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8807 +[titan] 2025-10-05 12:04:55,694 - root - INFO - lr: 2.4244e-05 gnorm: 1.05 [13:30:45<11:01:20] +[titan] 2025-10-05 12:05:06,571 - root - INFO - step: 22035 loss: 2.0627 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8224 +[titan] 2025-10-05 12:05:06,571 - root - INFO - lr: 2.4235e-05 gnorm: 1.04 [13:30:56<11:01:09] +[titan] 2025-10-05 12:05:17,439 - root - INFO - step: 22040 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 12:05:17,439 - root - INFO - lr: 2.4226e-05 gnorm: 1.05 [13:31:07<11:00:58] +[titan] 2025-10-05 12:05:28,290 - root - INFO - step: 22045 loss: 2.1355 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8868 +[titan] 2025-10-05 12:05:28,290 - root - INFO - lr: 2.4218e-05 gnorm: 1.06 [13:31:18<11:00:46] +[titan] 2025-10-05 12:05:36,955 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:05:39,138 - root - INFO - step: 22050 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:05:39,138 - root - INFO - lr: 2.4209e-05 gnorm: 1.06 [13:31:28<11:00:35] +[titan] 2025-10-05 12:05:49,987 - root - INFO - step: 22055 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 12:05:49,987 - root - INFO - lr: 2.4200e-05 gnorm: 1.05 [13:31:39<11:00:24] +[titan] 2025-10-05 12:06:00,891 - root - INFO - step: 22060 loss: 2.1712 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2516 global_avg_mtp_loss: 1.9197 +[titan] 2025-10-05 12:06:00,891 - root - INFO - lr: 2.4191e-05 gnorm: 1.10 [13:31:50<11:00:13] +[titan] 2025-10-05 12:06:11,774 - root - INFO - step: 22065 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:06:11,775 - root - INFO - lr: 2.4182e-05 gnorm: 1.05 [13:32:01<11:00:02] +[titan] 2025-10-05 12:06:22,629 - root - INFO - step: 22070 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:06:22,629 - root - INFO - lr: 2.4173e-05 gnorm: 1.06 [13:32:12<10:59:50] +[titan] 2025-10-05 12:06:33,471 - root - INFO - step: 22075 loss: 2.0401 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8033 +[titan] 2025-10-05 12:06:33,471 - root - INFO - lr: 2.4165e-05 gnorm: 1.07 [13:32:23<10:59:39] +[titan] 2025-10-05 12:06:44,307 - root - INFO - step: 22080 loss: 2.1317 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8847 +[titan] 2025-10-05 12:06:44,307 - root - INFO - lr: 2.4156e-05 gnorm: 1.08 [13:32:34<10:59:28] +[titan] 2025-10-05 12:06:55,150 - root - INFO - step: 22085 loss: 2.0997 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8559 +[titan] 2025-10-05 12:06:55,151 - root - INFO - lr: 2.4147e-05 gnorm: 1.06 [13:32:44<10:59:17] +[titan] 2025-10-05 12:07:06,030 - root - INFO - step: 22090 loss: 2.1094 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 12:07:06,030 - root - INFO - lr: 2.4138e-05 gnorm: 1.09 [13:32:55<10:59:06] +[titan] 2025-10-05 12:07:16,895 - root - INFO - step: 22095 loss: 2.1217 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:07:16,895 - root - INFO - lr: 2.4129e-05 gnorm: 1.14 [13:33:06<10:58:55] +[titan] 2025-10-05 12:07:25,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:07:27,824 - root - INFO - step: 22100 loss: 2.1006 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 12:07:27,824 - root - INFO - lr: 2.4121e-05 gnorm: 1.06 [13:33:17<10:58:43] +[titan] 2025-10-05 12:07:38,689 - root - INFO - step: 22105 loss: 2.0715 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 12:07:38,689 - root - INFO - lr: 2.4112e-05 gnorm: 1.07 [13:33:28<10:58:32] +[titan] 2025-10-05 12:07:49,564 - root - INFO - step: 22110 loss: 2.1624 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2517 global_avg_mtp_loss: 1.9107 +[titan] 2025-10-05 12:07:49,564 - root - INFO - lr: 2.4103e-05 gnorm: 1.12 [13:33:39<10:58:21] +[titan] 2025-10-05 12:08:00,491 - root - INFO - step: 22115 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8523 +[titan] 2025-10-05 12:08:00,491 - root - INFO - lr: 2.4094e-05 gnorm: 1.06 [13:33:50<10:58:10] +[titan] 2025-10-05 12:08:11,388 - root - INFO - step: 22120 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 12:08:11,388 - root - INFO - lr: 2.4085e-05 gnorm: 1.07 [13:34:01<10:57:59] +[titan] 2025-10-05 12:08:22,246 - root - INFO - step: 22125 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 12:08:22,246 - root - INFO - lr: 2.4076e-05 gnorm: 1.08 [13:34:12<10:57:48] +[titan] 2025-10-05 12:08:33,148 - root - INFO - step: 22130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:08:33,148 - root - INFO - lr: 2.4068e-05 gnorm: 1.08 [13:34:22<10:57:36] +[titan] 2025-10-05 12:08:44,031 - root - INFO - step: 22135 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 12:08:44,031 - root - INFO - lr: 2.4059e-05 gnorm: 1.02 [13:34:33<10:57:25] +[titan] 2025-10-05 12:08:54,887 - root - INFO - step: 22140 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:08:54,888 - root - INFO - lr: 2.4050e-05 gnorm: 1.06 [13:34:44<10:57:14] +[titan] 2025-10-05 12:09:05,796 - root - INFO - step: 22145 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 12:09:05,796 - root - INFO - lr: 2.4041e-05 gnorm: 1.02 [13:34:55<10:57:03] +[titan] 2025-10-05 12:09:14,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:09:16,655 - root - INFO - step: 22150 loss: 2.1582 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 12:09:16,655 - root - INFO - lr: 2.4032e-05 gnorm: 1.06 [13:35:06<10:56:52] +[titan] 2025-10-05 12:09:27,522 - root - INFO - step: 22155 loss: 2.1116 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8661 +[titan] 2025-10-05 12:09:27,522 - root - INFO - lr: 2.4024e-05 gnorm: 1.02 [13:35:17<10:56:41] +[titan] 2025-10-05 12:09:38,432 - root - INFO - step: 22160 loss: 2.1527 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.9024 +[titan] 2025-10-05 12:09:38,432 - root - INFO - lr: 2.4015e-05 gnorm: 1.04 [13:35:28<10:56:29] +[titan] 2025-10-05 12:09:49,302 - root - INFO - step: 22165 loss: 2.1166 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:09:49,302 - root - INFO - lr: 2.4006e-05 gnorm: 1.12 [13:35:39<10:56:18] +[titan] 2025-10-05 12:10:00,224 - root - INFO - step: 22170 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.23 mfu: 42.09% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:10:00,224 - root - INFO - lr: 2.3997e-05 gnorm: 1.09 [13:35:50<10:56:07] +[titan] 2025-10-05 12:10:11,087 - root - INFO - step: 22175 loss: 2.1093 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 12:10:11,087 - root - INFO - lr: 2.3988e-05 gnorm: 1.05 [13:36:00<10:55:56] +[titan] 2025-10-05 12:10:21,968 - root - INFO - step: 22180 loss: 2.2466 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2611 global_avg_mtp_loss: 1.9855 +[titan] 2025-10-05 12:10:21,968 - root - INFO - lr: 2.3979e-05 gnorm: 1.15 [13:36:11<10:55:45] +[titan] 2025-10-05 12:10:32,857 - root - INFO - step: 22185 loss: 2.1657 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2521 global_avg_mtp_loss: 1.9136 +[titan] 2025-10-05 12:10:32,857 - root - INFO - lr: 2.3971e-05 gnorm: 1.08 [13:36:22<10:55:34] +[titan] 2025-10-05 12:10:43,721 - root - INFO - step: 22190 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:10:43,721 - root - INFO - lr: 2.3962e-05 gnorm: 1.07 [13:36:33<10:55:22] +[titan] 2025-10-05 12:10:54,626 - root - INFO - step: 22195 loss: 2.1296 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 12:10:54,626 - root - INFO - lr: 2.3953e-05 gnorm: 1.11 [13:36:44<10:55:11] +[titan] 2025-10-05 12:11:03,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:11:05,537 - root - INFO - step: 22200 loss: 2.1676 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2531 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 12:11:05,537 - root - INFO - lr: 2.3944e-05 gnorm: 1.09 [13:36:55<10:55:00] +[titan] 2025-10-05 12:11:16,410 - root - INFO - step: 22205 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9025 +[titan] 2025-10-05 12:11:16,410 - root - INFO - lr: 2.3935e-05 gnorm: 1.10 [13:37:06<10:54:49] +[titan] 2025-10-05 12:11:27,277 - root - INFO - step: 22210 loss: 2.1075 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 12:11:27,277 - root - INFO - lr: 2.3927e-05 gnorm: 1.10 [13:37:17<10:54:38] +[titan] 2025-10-05 12:11:38,149 - root - INFO - step: 22215 loss: 2.0858 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8426 +[titan] 2025-10-05 12:11:38,149 - root - INFO - lr: 2.3918e-05 gnorm: 1.08 [13:37:27<10:54:27] +[titan] 2025-10-05 12:11:49,017 - root - INFO - step: 22220 loss: 2.1032 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 12:11:49,017 - root - INFO - lr: 2.3909e-05 gnorm: 1.08 [13:37:38<10:54:15] +[titan] 2025-10-05 12:11:59,905 - root - INFO - step: 22225 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8675 +[titan] 2025-10-05 12:11:59,905 - root - INFO - lr: 2.3900e-05 gnorm: 1.08 [13:37:49<10:54:04] +[titan] 2025-10-05 12:12:10,823 - root - INFO - step: 22230 loss: 2.0998 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:12:10,823 - root - INFO - lr: 2.3891e-05 gnorm: 1.09 [13:38:00<10:53:53] +[titan] 2025-10-05 12:12:21,651 - root - INFO - step: 22235 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8295 +[titan] 2025-10-05 12:12:21,651 - root - INFO - lr: 2.3883e-05 gnorm: 1.08 [13:38:11<10:53:42] +[titan] 2025-10-05 12:12:32,529 - root - INFO - step: 22240 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 12:12:32,530 - root - INFO - lr: 2.3874e-05 gnorm: 1.11 [13:38:22<10:53:31] +[titan] 2025-10-05 12:12:43,387 - root - INFO - step: 22245 loss: 2.0593 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 12:12:43,388 - root - INFO - lr: 2.3865e-05 gnorm: 1.06 [13:38:33<10:53:20] +[titan] 2025-10-05 12:12:52,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:12:54,236 - root - INFO - step: 22250 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8399 +[titan] 2025-10-05 12:12:54,236 - root - INFO - lr: 2.3856e-05 gnorm: 1.09 [13:38:44<10:53:08] +[titan] 2025-10-05 12:13:05,203 - root - INFO - step: 22255 loss: 2.2062 memory: 118.84GiB(85.28%) tps: 29,878 tflops: 414.51 mfu: 41.91% global_avg_ntp_loss: 0.2570 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 12:13:05,204 - root - INFO - lr: 2.3847e-05 gnorm: 1.10 [13:38:55<10:52:57] +[titan] 2025-10-05 12:13:16,105 - root - INFO - step: 22260 loss: 2.0839 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8418 +[titan] 2025-10-05 12:13:16,105 - root - INFO - lr: 2.3838e-05 gnorm: 1.06 [13:39:05<10:52:46] +[titan] 2025-10-05 12:13:26,969 - root - INFO - step: 22265 loss: 2.1143 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 12:13:26,969 - root - INFO - lr: 2.3830e-05 gnorm: 1.09 [13:39:16<10:52:35] +[titan] 2025-10-05 12:13:37,833 - root - INFO - step: 22270 loss: 2.1822 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2540 global_avg_mtp_loss: 1.9282 +[titan] 2025-10-05 12:13:37,833 - root - INFO - lr: 2.3821e-05 gnorm: 1.10 [13:39:27<10:52:24] +[titan] 2025-10-05 12:13:48,696 - root - INFO - step: 22275 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 12:13:48,696 - root - INFO - lr: 2.3812e-05 gnorm: 1.05 [13:39:38<10:52:13] +[titan] 2025-10-05 12:13:59,557 - root - INFO - step: 22280 loss: 2.1416 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8932 +[titan] 2025-10-05 12:13:59,557 - root - INFO - lr: 2.3803e-05 gnorm: 1.06 [13:39:49<10:52:01] +[titan] 2025-10-05 12:14:10,442 - root - INFO - step: 22285 loss: 2.1340 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:14:10,442 - root - INFO - lr: 2.3794e-05 gnorm: 1.08 [13:40:00<10:51:50] +[titan] 2025-10-05 12:14:21,358 - root - INFO - step: 22290 loss: 2.0924 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:14:21,358 - root - INFO - lr: 2.3786e-05 gnorm: 1.08 [13:40:11<10:51:39] +[titan] 2025-10-05 12:14:32,225 - root - INFO - step: 22295 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 12:14:32,225 - root - INFO - lr: 2.3777e-05 gnorm: 1.07 [13:40:22<10:51:28] +[titan] 2025-10-05 12:14:40,897 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:14:43,076 - root - INFO - step: 22300 loss: 2.0949 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8512 +[titan] 2025-10-05 12:14:43,076 - root - INFO - lr: 2.3768e-05 gnorm: 1.08 [13:40:32<10:51:17] +[titan] 2025-10-05 12:14:53,944 - root - INFO - step: 22305 loss: 2.2081 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9520 +[titan] 2025-10-05 12:14:53,944 - root - INFO - lr: 2.3759e-05 gnorm: 1.05 [13:40:43<10:51:06] +[titan] 2025-10-05 12:15:04,844 - root - INFO - step: 22310 loss: 2.1909 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2562 global_avg_mtp_loss: 1.9347 +[titan] 2025-10-05 12:15:04,844 - root - INFO - lr: 2.3750e-05 gnorm: 1.11 [13:40:54<10:50:54] +[titan] 2025-10-05 12:15:15,693 - root - INFO - step: 22315 loss: 2.1095 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:15:15,693 - root - INFO - lr: 2.3742e-05 gnorm: 1.06 [13:41:05<10:50:43] +[titan] 2025-10-05 12:15:26,567 - root - INFO - step: 22320 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8513 +[titan] 2025-10-05 12:15:26,567 - root - INFO - lr: 2.3733e-05 gnorm: 1.04 [13:41:16<10:50:32] +[titan] 2025-10-05 12:15:37,421 - root - INFO - step: 22325 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 12:15:37,422 - root - INFO - lr: 2.3724e-05 gnorm: 1.08 [13:41:27<10:50:21] +[titan] 2025-10-05 12:15:48,281 - root - INFO - step: 22330 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 12:15:48,281 - root - INFO - lr: 2.3715e-05 gnorm: 1.08 [13:41:38<10:50:10] +[titan] 2025-10-05 12:15:59,149 - root - INFO - step: 22335 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:15:59,149 - root - INFO - lr: 2.3706e-05 gnorm: 1.04 [13:41:48<10:49:59] +[titan] 2025-10-05 12:16:10,046 - root - INFO - step: 22340 loss: 2.0616 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8223 +[titan] 2025-10-05 12:16:10,046 - root - INFO - lr: 2.3698e-05 gnorm: 1.10 [13:41:59<10:49:47] +[titan] 2025-10-05 12:16:20,913 - root - INFO - step: 22345 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 12:16:20,913 - root - INFO - lr: 2.3689e-05 gnorm: 1.09 [13:42:10<10:49:36] +[titan] 2025-10-05 12:16:29,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:16:31,773 - root - INFO - step: 22350 loss: 2.1041 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:16:31,773 - root - INFO - lr: 2.3680e-05 gnorm: 1.09 [13:42:21<10:49:25] +[titan] 2025-10-05 12:16:42,676 - root - INFO - step: 22355 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8541 +[titan] 2025-10-05 12:16:42,676 - root - INFO - lr: 2.3671e-05 gnorm: 1.06 [13:42:32<10:49:14] +[titan] 2025-10-05 12:16:53,529 - root - INFO - step: 22360 loss: 2.1363 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2491 global_avg_mtp_loss: 1.8873 +[titan] 2025-10-05 12:16:53,529 - root - INFO - lr: 2.3662e-05 gnorm: 1.08 [13:42:43<10:49:03] +[titan] 2025-10-05 12:17:04,373 - root - INFO - step: 22365 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:17:04,373 - root - INFO - lr: 2.3654e-05 gnorm: 1.08 [13:42:54<10:48:51] +[titan] 2025-10-05 12:17:15,272 - root - INFO - step: 22370 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8524 +[titan] 2025-10-05 12:17:15,272 - root - INFO - lr: 2.3645e-05 gnorm: 1.06 [13:43:05<10:48:40] +[titan] 2025-10-05 12:17:26,145 - root - INFO - step: 22375 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 12:17:26,145 - root - INFO - lr: 2.3636e-05 gnorm: 1.05 [13:43:15<10:48:29] +[titan] 2025-10-05 12:17:36,995 - root - INFO - step: 22380 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 12:17:36,995 - root - INFO - lr: 2.3627e-05 gnorm: 1.07 [13:43:26<10:48:18] +[titan] 2025-10-05 12:17:47,877 - root - INFO - step: 22385 loss: 2.0688 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:17:47,877 - root - INFO - lr: 2.3619e-05 gnorm: 1.08 [13:43:37<10:48:07] +[titan] 2025-10-05 12:17:58,732 - root - INFO - step: 22390 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8286 +[titan] 2025-10-05 12:17:58,732 - root - INFO - lr: 2.3610e-05 gnorm: 1.02 [13:43:48<10:47:56] +[titan] 2025-10-05 12:18:09,619 - root - INFO - step: 22395 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8656 +[titan] 2025-10-05 12:18:09,619 - root - INFO - lr: 2.3601e-05 gnorm: 1.08 [13:43:59<10:47:44] +[titan] 2025-10-05 12:18:18,277 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:18:20,460 - root - INFO - step: 22400 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8304 +[titan] 2025-10-05 12:18:20,461 - root - INFO - lr: 2.3592e-05 gnorm: 1.03 [13:44:10<10:47:33] +[titan] 2025-10-05 12:18:31,320 - root - INFO - step: 22405 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:18:31,320 - root - INFO - lr: 2.3583e-05 gnorm: 1.06 [13:44:21<10:47:22] +[titan] 2025-10-05 12:18:42,176 - root - INFO - step: 22410 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 12:18:42,177 - root - INFO - lr: 2.3575e-05 gnorm: 1.06 [13:44:31<10:47:11] +[titan] 2025-10-05 12:18:53,029 - root - INFO - step: 22415 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 12:18:53,029 - root - INFO - lr: 2.3566e-05 gnorm: 1.07 [13:44:42<10:47:00] +[titan] 2025-10-05 12:19:03,920 - root - INFO - step: 22420 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 12:19:03,920 - root - INFO - lr: 2.3557e-05 gnorm: 1.04 [13:44:53<10:46:49] +[titan] 2025-10-05 12:19:14,809 - root - INFO - step: 22425 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 12:19:14,809 - root - INFO - lr: 2.3548e-05 gnorm: 1.08 [13:45:04<10:46:37] +[titan] 2025-10-05 12:19:25,666 - root - INFO - step: 22430 loss: 2.1054 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 12:19:25,666 - root - INFO - lr: 2.3539e-05 gnorm: 1.11 [13:45:15<10:46:26] +[titan] 2025-10-05 12:19:36,537 - root - INFO - step: 22435 loss: 2.0990 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8566 +[titan] 2025-10-05 12:19:36,537 - root - INFO - lr: 2.3531e-05 gnorm: 1.07 [13:45:26<10:46:15] +[titan] 2025-10-05 12:19:47,408 - root - INFO - step: 22440 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 12:19:47,408 - root - INFO - lr: 2.3522e-05 gnorm: 1.07 [13:45:37<10:46:04] +[titan] 2025-10-05 12:19:58,267 - root - INFO - step: 22445 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:19:58,267 - root - INFO - lr: 2.3513e-05 gnorm: 1.04 [13:45:48<10:45:53] +[titan] 2025-10-05 12:20:06,985 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:20:09,202 - root - INFO - step: 22450 loss: 2.1175 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 12:20:09,202 - root - INFO - lr: 2.3504e-05 gnorm: 1.08 [13:45:58<10:45:42] +[titan] 2025-10-05 12:20:20,059 - root - INFO - step: 22455 loss: 2.1341 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8869 +[titan] 2025-10-05 12:20:20,059 - root - INFO - lr: 2.3495e-05 gnorm: 1.04 [13:46:09<10:45:30] +[titan] 2025-10-05 12:20:30,913 - root - INFO - step: 22460 loss: 2.0957 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:20:30,913 - root - INFO - lr: 2.3487e-05 gnorm: 1.14 [13:46:20<10:45:19] +[titan] 2025-10-05 12:20:41,788 - root - INFO - step: 22465 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8576 +[titan] 2025-10-05 12:20:41,788 - root - INFO - lr: 2.3478e-05 gnorm: 1.02 [13:46:31<10:45:08] +[titan] 2025-10-05 12:20:52,649 - root - INFO - step: 22470 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 12:20:52,649 - root - INFO - lr: 2.3469e-05 gnorm: 1.04 [13:46:42<10:44:57] +[titan] 2025-10-05 12:21:03,515 - root - INFO - step: 22475 loss: 2.0698 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8297 +[titan] 2025-10-05 12:21:03,515 - root - INFO - lr: 2.3460e-05 gnorm: 1.09 [13:46:53<10:44:46] +[titan] 2025-10-05 12:21:14,427 - root - INFO - step: 22480 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8596 +[titan] 2025-10-05 12:21:14,427 - root - INFO - lr: 2.3452e-05 gnorm: 1.07 [13:47:04<10:44:35] +[titan] 2025-10-05 12:21:25,291 - root - INFO - step: 22485 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8221 +[titan] 2025-10-05 12:21:25,291 - root - INFO - lr: 2.3443e-05 gnorm: 1.07 [13:47:15<10:44:23] +[titan] 2025-10-05 12:21:36,157 - root - INFO - step: 22490 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 12:21:36,157 - root - INFO - lr: 2.3434e-05 gnorm: 1.07 [13:47:25<10:44:12] +[titan] 2025-10-05 12:21:47,031 - root - INFO - step: 22495 loss: 2.1445 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8952 +[titan] 2025-10-05 12:21:47,032 - root - INFO - lr: 2.3425e-05 gnorm: 1.05 [13:47:36<10:44:01] +[titan] 2025-10-05 12:21:55,725 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:21:57,921 - root - INFO - step: 22500 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8915 +[titan] 2025-10-05 12:21:57,921 - root - INFO - lr: 2.3416e-05 gnorm: 1.10 [13:47:47<10:43:50] +[titan] 2025-10-05 12:22:08,788 - root - INFO - step: 22505 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 12:22:08,789 - root - INFO - lr: 2.3408e-05 gnorm: 1.07 [13:47:58<10:43:39] +[titan] 2025-10-05 12:22:19,750 - root - INFO - step: 22510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 29,895 tflops: 414.75 mfu: 41.94% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:22:19,750 - root - INFO - lr: 2.3399e-05 gnorm: 1.09 [13:48:09<10:43:28] +[titan] 2025-10-05 12:22:30,622 - root - INFO - step: 22515 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:22:30,622 - root - INFO - lr: 2.3390e-05 gnorm: 1.11 [13:48:20<10:43:17] +[titan] 2025-10-05 12:22:41,466 - root - INFO - step: 22520 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8830 +[titan] 2025-10-05 12:22:41,466 - root - INFO - lr: 2.3381e-05 gnorm: 1.06 [13:48:31<10:43:05] +[titan] 2025-10-05 12:22:52,408 - root - INFO - step: 22525 loss: 2.1020 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:22:52,408 - root - INFO - lr: 2.3373e-05 gnorm: 1.06 [13:48:42<10:42:54] +[titan] 2025-10-05 12:22:59,133 - root - INFO - Dumping profiler traces at step 22528 +[titan] 2025-10-05 12:22:59,173 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:23:03,544 - root - INFO - step: 22530 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,426 tflops: 408.25 mfu: 41.28% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8767 +[titan] 2025-10-05 12:23:03,544 - root - INFO - lr: 2.3364e-05 gnorm: 1.08 [13:48:53<10:42:43] +[titan] 2025-10-05 12:23:14,458 - root - INFO - step: 22535 loss: 2.1311 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2484 global_avg_mtp_loss: 1.8827 +[titan] 2025-10-05 12:23:14,458 - root - INFO - lr: 2.3355e-05 gnorm: 1.33 [13:49:04<10:42:32] +[titan] 2025-10-05 12:23:25,322 - root - INFO - step: 22540 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 12:23:25,322 - root - INFO - lr: 2.3346e-05 gnorm: 1.10 [13:49:15<10:42:21] +[titan] 2025-10-05 12:23:36,189 - root - INFO - step: 22545 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8657 +[titan] 2025-10-05 12:23:36,189 - root - INFO - lr: 2.3338e-05 gnorm: 1.04 [13:49:25<10:42:10] +[titan] 2025-10-05 12:23:44,856 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:23:47,032 - root - INFO - step: 22550 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8670 +[titan] 2025-10-05 12:23:47,032 - root - INFO - lr: 2.3329e-05 gnorm: 1.08 [13:49:36<10:41:59] +[titan] 2025-10-05 12:23:57,904 - root - INFO - step: 22555 loss: 2.0817 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8397 +[titan] 2025-10-05 12:23:57,904 - root - INFO - lr: 2.3320e-05 gnorm: 1.06 [13:49:47<10:41:47] +[titan] 2025-10-05 12:24:08,764 - root - INFO - step: 22560 loss: 2.0564 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 12:24:08,764 - root - INFO - lr: 2.3311e-05 gnorm: 1.08 [13:49:58<10:41:36] +[titan] 2025-10-05 12:24:19,652 - root - INFO - step: 22565 loss: 2.1051 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8615 +[titan] 2025-10-05 12:24:19,652 - root - INFO - lr: 2.3302e-05 gnorm: 1.10 [13:50:09<10:41:25] +[titan] 2025-10-05 12:24:30,523 - root - INFO - step: 22570 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 12:24:30,523 - root - INFO - lr: 2.3294e-05 gnorm: 1.05 [13:50:20<10:41:14] +[titan] 2025-10-05 12:24:41,397 - root - INFO - step: 22575 loss: 2.1080 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8633 +[titan] 2025-10-05 12:24:41,397 - root - INFO - lr: 2.3285e-05 gnorm: 1.06 [13:50:31<10:41:03] +[titan] 2025-10-05 12:24:52,282 - root - INFO - step: 22580 loss: 2.1028 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8584 +[titan] 2025-10-05 12:24:52,283 - root - INFO - lr: 2.3276e-05 gnorm: 1.02 [13:50:42<10:40:52] +[titan] 2025-10-05 12:25:03,150 - root - INFO - step: 22585 loss: 2.0722 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:25:03,150 - root - INFO - lr: 2.3267e-05 gnorm: 1.07 [13:50:52<10:40:40] +[titan] 2025-10-05 12:25:14,069 - root - INFO - step: 22590 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 12:25:14,069 - root - INFO - lr: 2.3259e-05 gnorm: 1.07 [13:51:03<10:40:29] +[titan] 2025-10-05 12:25:24,945 - root - INFO - step: 22595 loss: 2.0307 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 12:25:24,945 - root - INFO - lr: 2.3250e-05 gnorm: 1.06 [13:51:14<10:40:18] +[titan] 2025-10-05 12:25:33,616 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:25:35,800 - root - INFO - step: 22600 loss: 2.1354 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8874 +[titan] 2025-10-05 12:25:35,800 - root - INFO - lr: 2.3241e-05 gnorm: 1.09 [13:51:25<10:40:07] +[titan] 2025-10-05 12:25:46,666 - root - INFO - step: 22605 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 12:25:46,666 - root - INFO - lr: 2.3232e-05 gnorm: 1.08 [13:51:36<10:39:56] +[titan] 2025-10-05 12:25:57,545 - root - INFO - step: 22610 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8253 +[titan] 2025-10-05 12:25:57,545 - root - INFO - lr: 2.3224e-05 gnorm: 1.04 [13:51:47<10:39:45] +[titan] 2025-10-05 12:26:08,410 - root - INFO - step: 22615 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 12:26:08,411 - root - INFO - lr: 2.3215e-05 gnorm: 1.05 [13:51:58<10:39:33] +[titan] 2025-10-05 12:26:19,368 - root - INFO - step: 22620 loss: 2.1035 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8586 +[titan] 2025-10-05 12:26:19,368 - root - INFO - lr: 2.3206e-05 gnorm: 1.13 [13:52:09<10:39:22] +[titan] 2025-10-05 12:26:30,266 - root - INFO - step: 22625 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8493 +[titan] 2025-10-05 12:26:30,266 - root - INFO - lr: 2.3197e-05 gnorm: 1.07 [13:52:20<10:39:11] +[titan] 2025-10-05 12:26:41,175 - root - INFO - step: 22630 loss: 2.1065 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8618 +[titan] 2025-10-05 12:26:41,175 - root - INFO - lr: 2.3189e-05 gnorm: 1.06 [13:52:30<10:39:00] +[titan] 2025-10-05 12:26:52,070 - root - INFO - step: 22635 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8261 +[titan] 2025-10-05 12:26:52,070 - root - INFO - lr: 2.3180e-05 gnorm: 1.03 [13:52:41<10:38:49] +[titan] 2025-10-05 12:27:02,956 - root - INFO - step: 22640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 12:27:02,956 - root - INFO - lr: 2.3171e-05 gnorm: 1.05 [13:52:52<10:38:38] +[titan] 2025-10-05 12:27:13,822 - root - INFO - step: 22645 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 12:27:13,822 - root - INFO - lr: 2.3162e-05 gnorm: 1.02 [13:53:03<10:38:27] +[titan] 2025-10-05 12:27:22,552 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:27:24,736 - root - INFO - step: 22650 loss: 2.0501 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 12:27:24,736 - root - INFO - lr: 2.3153e-05 gnorm: 1.10 [13:53:14<10:38:16] +[titan] 2025-10-05 12:27:35,626 - root - INFO - step: 22655 loss: 2.0835 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8410 +[titan] 2025-10-05 12:27:35,626 - root - INFO - lr: 2.3145e-05 gnorm: 1.05 [13:53:25<10:38:04] +[titan] 2025-10-05 12:27:46,518 - root - INFO - step: 22660 loss: 2.1210 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8751 +[titan] 2025-10-05 12:27:46,518 - root - INFO - lr: 2.3136e-05 gnorm: 1.11 [13:53:36<10:37:53] +[titan] 2025-10-05 12:27:57,386 - root - INFO - step: 22665 loss: 2.1687 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2515 global_avg_mtp_loss: 1.9171 +[titan] 2025-10-05 12:27:57,386 - root - INFO - lr: 2.3127e-05 gnorm: 1.10 [13:53:47<10:37:42] +[titan] 2025-10-05 12:28:08,227 - root - INFO - step: 22670 loss: 2.0850 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8425 +[titan] 2025-10-05 12:28:08,228 - root - INFO - lr: 2.3118e-05 gnorm: 1.05 [13:53:58<10:37:31] +[titan] 2025-10-05 12:28:19,140 - root - INFO - step: 22675 loss: 2.1269 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8793 +[titan] 2025-10-05 12:28:19,140 - root - INFO - lr: 2.3110e-05 gnorm: 1.08 [13:54:08<10:37:20] +[titan] 2025-10-05 12:28:30,016 - root - INFO - step: 22680 loss: 2.1382 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.8862 +[titan] 2025-10-05 12:28:30,016 - root - INFO - lr: 2.3101e-05 gnorm: 1.16 [13:54:19<10:37:09] +[titan] 2025-10-05 12:28:40,902 - root - INFO - step: 22685 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8186 +[titan] 2025-10-05 12:28:40,902 - root - INFO - lr: 2.3092e-05 gnorm: 1.08 [13:54:30<10:36:57] +[titan] 2025-10-05 12:28:51,765 - root - INFO - step: 22690 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 12:28:51,766 - root - INFO - lr: 2.3083e-05 gnorm: 1.03 [13:54:41<10:36:46] +[titan] 2025-10-05 12:29:02,626 - root - INFO - step: 22695 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 12:29:02,626 - root - INFO - lr: 2.3075e-05 gnorm: 1.06 [13:54:52<10:36:35] +[titan] 2025-10-05 12:29:11,275 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:29:13,457 - root - INFO - step: 22700 loss: 2.1164 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8713 +[titan] 2025-10-05 12:29:13,457 - root - INFO - lr: 2.3066e-05 gnorm: 1.07 [13:55:03<10:36:24] +[titan] 2025-10-05 12:29:24,373 - root - INFO - step: 22705 loss: 2.0814 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:29:24,373 - root - INFO - lr: 2.3057e-05 gnorm: 1.08 [13:55:14<10:36:13] +[titan] 2025-10-05 12:29:35,226 - root - INFO - step: 22710 loss: 2.1674 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2512 global_avg_mtp_loss: 1.9162 +[titan] 2025-10-05 12:29:35,226 - root - INFO - lr: 2.3048e-05 gnorm: 1.06 [13:55:24<10:36:02] +[titan] 2025-10-05 12:29:46,086 - root - INFO - step: 22715 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8506 +[titan] 2025-10-05 12:29:46,087 - root - INFO - lr: 2.3040e-05 gnorm: 1.12 [13:55:35<10:35:50] +[titan] 2025-10-05 12:29:56,956 - root - INFO - step: 22720 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 12:29:56,956 - root - INFO - lr: 2.3031e-05 gnorm: 1.05 [13:55:46<10:35:39] +[titan] 2025-10-05 12:30:07,794 - root - INFO - step: 22725 loss: 2.1086 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8635 +[titan] 2025-10-05 12:30:07,794 - root - INFO - lr: 2.3022e-05 gnorm: 1.08 [13:55:57<10:35:28] +[titan] 2025-10-05 12:30:18,701 - root - INFO - step: 22730 loss: 2.0684 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:30:18,702 - root - INFO - lr: 2.3013e-05 gnorm: 1.06 [13:56:08<10:35:17] +[titan] 2025-10-05 12:30:29,526 - root - INFO - step: 22735 loss: 2.1558 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2502 global_avg_mtp_loss: 1.9056 +[titan] 2025-10-05 12:30:29,526 - root - INFO - lr: 2.3005e-05 gnorm: 1.08 [13:56:19<10:35:06] +[titan] 2025-10-05 12:30:40,389 - root - INFO - step: 22740 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 12:30:40,389 - root - INFO - lr: 2.2996e-05 gnorm: 1.08 [13:56:30<10:34:55] +[titan] 2025-10-05 12:30:51,240 - root - INFO - step: 22745 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 12:30:51,240 - root - INFO - lr: 2.2987e-05 gnorm: 1.07 [13:56:41<10:34:43] +[titan] 2025-10-05 12:30:59,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:31:02,123 - root - INFO - step: 22750 loss: 2.1101 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8642 +[titan] 2025-10-05 12:31:02,123 - root - INFO - lr: 2.2978e-05 gnorm: 1.09 [13:56:51<10:34:32] +[titan] 2025-10-05 12:31:12,994 - root - INFO - step: 22755 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8489 +[titan] 2025-10-05 12:31:12,994 - root - INFO - lr: 2.2970e-05 gnorm: 1.07 [13:57:02<10:34:21] +[titan] 2025-10-05 12:31:23,866 - root - INFO - step: 22760 loss: 2.0378 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 12:31:23,866 - root - INFO - lr: 2.2961e-05 gnorm: 1.07 [13:57:13<10:34:10] +[titan] 2025-10-05 12:31:34,726 - root - INFO - step: 22765 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8417 +[titan] 2025-10-05 12:31:34,726 - root - INFO - lr: 2.2952e-05 gnorm: 1.06 [13:57:24<10:33:59] +[titan] 2025-10-05 12:31:45,584 - root - INFO - step: 22770 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2547 global_avg_mtp_loss: 1.9354 +[titan] 2025-10-05 12:31:45,585 - root - INFO - lr: 2.2944e-05 gnorm: 1.08 [13:57:35<10:33:48] +[titan] 2025-10-05 12:31:56,424 - root - INFO - step: 22775 loss: 2.0368 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 12:31:56,424 - root - INFO - lr: 2.2935e-05 gnorm: 1.06 [13:57:46<10:33:36] +[titan] 2025-10-05 12:32:07,271 - root - INFO - step: 22780 loss: 2.1063 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8625 +[titan] 2025-10-05 12:32:07,271 - root - INFO - lr: 2.2926e-05 gnorm: 1.09 [13:57:57<10:33:25] +[titan] 2025-10-05 12:32:18,125 - root - INFO - step: 22785 loss: 2.0749 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 12:32:18,126 - root - INFO - lr: 2.2917e-05 gnorm: 1.06 [13:58:07<10:33:14] +[titan] 2025-10-05 12:32:29,041 - root - INFO - step: 22790 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 12:32:29,041 - root - INFO - lr: 2.2909e-05 gnorm: 1.01 [13:58:18<10:33:03] +[titan] 2025-10-05 12:32:39,901 - root - INFO - step: 22795 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8495 +[titan] 2025-10-05 12:32:39,901 - root - INFO - lr: 2.2900e-05 gnorm: 1.05 [13:58:29<10:32:52] +[titan] 2025-10-05 12:32:48,566 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:32:50,763 - root - INFO - step: 22800 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:32:50,764 - root - INFO - lr: 2.2891e-05 gnorm: 1.04 [13:58:40<10:32:41] +[titan] 2025-10-05 12:33:01,622 - root - INFO - step: 22805 loss: 2.0900 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8482 +[titan] 2025-10-05 12:33:01,622 - root - INFO - lr: 2.2882e-05 gnorm: 1.02 [13:58:51<10:32:29] +[titan] 2025-10-05 12:33:12,469 - root - INFO - step: 22810 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 12:33:12,469 - root - INFO - lr: 2.2874e-05 gnorm: 1.05 [13:59:02<10:32:18] +[titan] 2025-10-05 12:33:23,367 - root - INFO - step: 22815 loss: 2.1364 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8893 +[titan] 2025-10-05 12:33:23,367 - root - INFO - lr: 2.2865e-05 gnorm: 1.08 [13:59:13<10:32:07] +[titan] 2025-10-05 12:33:34,205 - root - INFO - step: 22820 loss: 2.1271 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:33:34,206 - root - INFO - lr: 2.2856e-05 gnorm: 1.08 [13:59:23<10:31:56] +[titan] 2025-10-05 12:33:45,062 - root - INFO - step: 22825 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8575 +[titan] 2025-10-05 12:33:45,062 - root - INFO - lr: 2.2847e-05 gnorm: 1.06 [13:59:34<10:31:45] +[titan] 2025-10-05 12:33:55,902 - root - INFO - step: 22830 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 12:33:55,902 - root - INFO - lr: 2.2839e-05 gnorm: 1.08 [13:59:45<10:31:34] +[titan] 2025-10-05 12:34:06,747 - root - INFO - step: 22835 loss: 2.0824 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8408 +[titan] 2025-10-05 12:34:06,747 - root - INFO - lr: 2.2830e-05 gnorm: 1.04 [13:59:56<10:31:22] +[titan] 2025-10-05 12:34:17,586 - root - INFO - step: 22840 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8552 +[titan] 2025-10-05 12:34:17,586 - root - INFO - lr: 2.2821e-05 gnorm: 1.04 [14:00:07<10:31:11] +[titan] 2025-10-05 12:34:28,454 - root - INFO - step: 22845 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8428 +[titan] 2025-10-05 12:34:28,454 - root - INFO - lr: 2.2813e-05 gnorm: 1.11 [14:00:18<10:31:00] +[titan] 2025-10-05 12:34:37,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:34:39,324 - root - INFO - step: 22850 loss: 2.0362 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 12:34:39,324 - root - INFO - lr: 2.2804e-05 gnorm: 1.07 [14:00:29<10:30:49] +[titan] 2025-10-05 12:34:50,183 - root - INFO - step: 22855 loss: 2.0829 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:34:50,183 - root - INFO - lr: 2.2795e-05 gnorm: 1.04 [14:00:39<10:30:38] +[titan] 2025-10-05 12:35:01,017 - root - INFO - step: 22860 loss: 1.9834 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 12:35:01,017 - root - INFO - lr: 2.2786e-05 gnorm: 1.01 [14:00:50<10:30:27] +[titan] 2025-10-05 12:35:11,885 - root - INFO - step: 22865 loss: 2.1417 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2487 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:35:11,885 - root - INFO - lr: 2.2778e-05 gnorm: 1.04 [14:01:01<10:30:15] +[titan] 2025-10-05 12:35:22,742 - root - INFO - step: 22870 loss: 2.1227 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8759 +[titan] 2025-10-05 12:35:22,742 - root - INFO - lr: 2.2769e-05 gnorm: 1.09 [14:01:12<10:30:04] +[titan] 2025-10-05 12:35:33,625 - root - INFO - step: 22875 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2459 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 12:35:33,625 - root - INFO - lr: 2.2760e-05 gnorm: 1.12 [14:01:23<10:29:53] +[titan] 2025-10-05 12:35:44,473 - root - INFO - step: 22880 loss: 2.0907 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8483 +[titan] 2025-10-05 12:35:44,473 - root - INFO - lr: 2.2751e-05 gnorm: 1.09 [14:01:34<10:29:42] +[titan] 2025-10-05 12:35:55,316 - root - INFO - step: 22885 loss: 2.1475 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2498 global_avg_mtp_loss: 1.8977 +[titan] 2025-10-05 12:35:55,316 - root - INFO - lr: 2.2743e-05 gnorm: 1.08 [14:01:45<10:29:31] +[titan] 2025-10-05 12:36:06,165 - root - INFO - step: 22890 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8494 +[titan] 2025-10-05 12:36:06,165 - root - INFO - lr: 2.2734e-05 gnorm: 1.10 [14:01:55<10:29:20] +[titan] 2025-10-05 12:36:17,010 - root - INFO - step: 22895 loss: 2.0866 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8455 +[titan] 2025-10-05 12:36:17,011 - root - INFO - lr: 2.2725e-05 gnorm: 1.06 [14:02:06<10:29:08] +[titan] 2025-10-05 12:36:25,704 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:36:27,886 - root - INFO - step: 22900 loss: 2.0973 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8529 +[titan] 2025-10-05 12:36:27,887 - root - INFO - lr: 2.2717e-05 gnorm: 1.09 [14:02:17<10:28:57] +[titan] 2025-10-05 12:36:38,741 - root - INFO - step: 22905 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8109 +[titan] 2025-10-05 12:36:38,741 - root - INFO - lr: 2.2708e-05 gnorm: 1.06 [14:02:28<10:28:46] +[titan] 2025-10-05 12:36:49,633 - root - INFO - step: 22910 loss: 2.0954 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 12:36:49,633 - root - INFO - lr: 2.2699e-05 gnorm: 1.13 [14:02:39<10:28:35] +[titan] 2025-10-05 12:37:00,494 - root - INFO - step: 22915 loss: 2.1261 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8788 +[titan] 2025-10-05 12:37:00,494 - root - INFO - lr: 2.2690e-05 gnorm: 1.09 [14:02:50<10:28:24] +[titan] 2025-10-05 12:37:11,343 - root - INFO - step: 22920 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8270 +[titan] 2025-10-05 12:37:11,343 - root - INFO - lr: 2.2682e-05 gnorm: 1.05 [14:03:01<10:28:13] +[titan] 2025-10-05 12:37:22,183 - root - INFO - step: 22925 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8036 +[titan] 2025-10-05 12:37:22,183 - root - INFO - lr: 2.2673e-05 gnorm: 1.04 [14:03:11<10:28:01] +[titan] 2025-10-05 12:37:33,033 - root - INFO - step: 22930 loss: 2.1588 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2507 global_avg_mtp_loss: 1.9081 +[titan] 2025-10-05 12:37:33,033 - root - INFO - lr: 2.2664e-05 gnorm: 1.08 [14:03:22<10:27:50] +[titan] 2025-10-05 12:37:43,902 - root - INFO - step: 22935 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 12:37:43,903 - root - INFO - lr: 2.2656e-05 gnorm: 1.04 [14:03:33<10:27:39] +[titan] 2025-10-05 12:37:54,792 - root - INFO - step: 22940 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8278 +[titan] 2025-10-05 12:37:54,792 - root - INFO - lr: 2.2647e-05 gnorm: 1.09 [14:03:44<10:27:28] +[titan] 2025-10-05 12:38:05,628 - root - INFO - step: 22945 loss: 2.0862 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8431 +[titan] 2025-10-05 12:38:05,628 - root - INFO - lr: 2.2638e-05 gnorm: 1.08 [14:03:55<10:27:17] +[titan] 2025-10-05 12:38:14,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:38:16,484 - root - INFO - step: 22950 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7713 +[titan] 2025-10-05 12:38:16,484 - root - INFO - lr: 2.2629e-05 gnorm: 1.08 [14:04:06<10:27:06] +[titan] 2025-10-05 12:38:27,334 - root - INFO - step: 22955 loss: 2.0812 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8386 +[titan] 2025-10-05 12:38:27,334 - root - INFO - lr: 2.2621e-05 gnorm: 1.09 [14:04:17<10:26:54] +[titan] 2025-10-05 12:38:38,180 - root - INFO - step: 22960 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8473 +[titan] 2025-10-05 12:38:38,180 - root - INFO - lr: 2.2612e-05 gnorm: 1.14 [14:04:27<10:26:43] +[titan] 2025-10-05 12:38:49,045 - root - INFO - step: 22965 loss: 2.0894 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 12:38:49,045 - root - INFO - lr: 2.2603e-05 gnorm: 1.02 [14:04:38<10:26:32] +[titan] 2025-10-05 12:38:59,904 - root - INFO - step: 22970 loss: 2.0347 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7986 +[titan] 2025-10-05 12:38:59,904 - root - INFO - lr: 2.2595e-05 gnorm: 1.08 [14:04:49<10:26:21] +[titan] 2025-10-05 12:39:10,774 - root - INFO - step: 22975 loss: 2.1546 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9048 +[titan] 2025-10-05 12:39:10,774 - root - INFO - lr: 2.2586e-05 gnorm: 1.09 [14:05:00<10:26:10] +[titan] 2025-10-05 12:39:21,640 - root - INFO - step: 22980 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:39:21,640 - root - INFO - lr: 2.2577e-05 gnorm: 1.09 [14:05:11<10:25:59] +[titan] 2025-10-05 12:39:32,525 - root - INFO - step: 22985 loss: 2.1258 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8780 +[titan] 2025-10-05 12:39:32,525 - root - INFO - lr: 2.2568e-05 gnorm: 1.07 [14:05:22<10:25:47] +[titan] 2025-10-05 12:39:43,368 - root - INFO - step: 22990 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 12:39:43,369 - root - INFO - lr: 2.2560e-05 gnorm: 1.08 [14:05:33<10:25:36] +[titan] 2025-10-05 12:39:54,216 - root - INFO - step: 22995 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 12:39:54,217 - root - INFO - lr: 2.2551e-05 gnorm: 1.02 [14:05:43<10:25:25] +[titan] 2025-10-05 12:40:02,883 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:40:05,066 - root - INFO - step: 23000 loss: 2.1507 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2497 global_avg_mtp_loss: 1.9010 +[titan] 2025-10-05 12:40:05,066 - root - INFO - lr: 2.2542e-05 gnorm: 1.06 [14:05:54<10:25:14] +[titan] 2025-10-05 12:40:15,916 - root - INFO - step: 23005 loss: 2.1008 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 12:40:15,916 - root - INFO - lr: 2.2534e-05 gnorm: 1.09 [14:06:05<10:25:03] +[titan] 2025-10-05 12:40:26,775 - root - INFO - step: 23010 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8717 +[titan] 2025-10-05 12:40:26,775 - root - INFO - lr: 2.2525e-05 gnorm: 1.06 [14:06:16<10:24:52] +[titan] 2025-10-05 12:40:37,622 - root - INFO - step: 23015 loss: 2.1347 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8876 +[titan] 2025-10-05 12:40:37,622 - root - INFO - lr: 2.2516e-05 gnorm: 1.06 [14:06:27<10:24:40] +[titan] 2025-10-05 12:40:48,479 - root - INFO - step: 23020 loss: 2.1422 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8931 +[titan] 2025-10-05 12:40:48,479 - root - INFO - lr: 2.2507e-05 gnorm: 1.08 [14:06:38<10:24:29] +[titan] 2025-10-05 12:40:59,327 - root - INFO - step: 23025 loss: 2.0668 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:40:59,327 - root - INFO - lr: 2.2499e-05 gnorm: 1.05 [14:06:49<10:24:18] +[titan] 2025-10-05 12:41:10,188 - root - INFO - step: 23030 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 12:41:10,188 - root - INFO - lr: 2.2490e-05 gnorm: 1.06 [14:06:59<10:24:07] +[titan] 2025-10-05 12:41:21,085 - root - INFO - step: 23035 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 12:41:21,085 - root - INFO - lr: 2.2481e-05 gnorm: 1.07 [14:07:10<10:23:56] +[titan] 2025-10-05 12:41:32,099 - root - INFO - step: 23040 loss: 2.1136 memory: 118.84GiB(85.28%) tps: 29,752 tflops: 412.76 mfu: 41.74% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8680 +[titan] 2025-10-05 12:41:32,100 - root - INFO - lr: 2.2473e-05 gnorm: 1.05 [14:07:21<10:23:45] +[titan] 2025-10-05 12:41:32,281 - root - INFO - Dumping profiler traces at step 23040 +[titan] 2025-10-05 12:41:32,321 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 12:41:43,199 - root - INFO - step: 23045 loss: 2.0590 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.58 mfu: 41.41% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 12:41:43,199 - root - INFO - lr: 2.2464e-05 gnorm: 1.07 [14:07:32<10:23:34] +[titan] 2025-10-05 12:41:51,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:41:54,062 - root - INFO - step: 23050 loss: 2.1014 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:41:54,062 - root - INFO - lr: 2.2455e-05 gnorm: 1.10 [14:07:43<10:23:23] +[titan] 2025-10-05 12:42:04,939 - root - INFO - step: 23055 loss: 2.1047 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 12:42:04,940 - root - INFO - lr: 2.2447e-05 gnorm: 1.10 [14:07:54<10:23:11] +[titan] 2025-10-05 12:42:15,807 - root - INFO - step: 23060 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 12:42:15,807 - root - INFO - lr: 2.2438e-05 gnorm: 1.09 [14:08:05<10:23:00] +[titan] 2025-10-05 12:42:26,648 - root - INFO - step: 23065 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 12:42:26,648 - root - INFO - lr: 2.2429e-05 gnorm: 1.06 [14:08:16<10:22:49] +[titan] 2025-10-05 12:42:37,523 - root - INFO - step: 23070 loss: 2.0664 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8273 +[titan] 2025-10-05 12:42:37,523 - root - INFO - lr: 2.2420e-05 gnorm: 1.06 [14:08:27<10:22:38] +[titan] 2025-10-05 12:42:48,380 - root - INFO - step: 23075 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 12:42:48,380 - root - INFO - lr: 2.2412e-05 gnorm: 1.08 [14:08:38<10:22:27] +[titan] 2025-10-05 12:42:59,255 - root - INFO - step: 23080 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 12:42:59,255 - root - INFO - lr: 2.2403e-05 gnorm: 1.11 [14:08:48<10:22:16] +[titan] 2025-10-05 12:43:10,104 - root - INFO - step: 23085 loss: 2.0492 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 12:43:10,104 - root - INFO - lr: 2.2394e-05 gnorm: 1.06 [14:08:59<10:22:04] +[titan] 2025-10-05 12:43:20,963 - root - INFO - step: 23090 loss: 2.0906 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8478 +[titan] 2025-10-05 12:43:20,963 - root - INFO - lr: 2.2386e-05 gnorm: 1.07 [14:09:10<10:21:53] +[titan] 2025-10-05 12:43:31,830 - root - INFO - step: 23095 loss: 2.0816 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8400 +[titan] 2025-10-05 12:43:31,830 - root - INFO - lr: 2.2377e-05 gnorm: 1.06 [14:09:21<10:21:42] +[titan] 2025-10-05 12:43:40,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:43:42,684 - root - INFO - step: 23100 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 12:43:42,684 - root - INFO - lr: 2.2368e-05 gnorm: 1.08 [14:09:32<10:21:31] +[titan] 2025-10-05 12:43:53,521 - root - INFO - step: 23105 loss: 2.1541 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2499 global_avg_mtp_loss: 1.9043 +[titan] 2025-10-05 12:43:53,521 - root - INFO - lr: 2.2360e-05 gnorm: 1.12 [14:09:43<10:21:20] +[titan] 2025-10-05 12:44:04,389 - root - INFO - step: 23110 loss: 2.0636 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:44:04,389 - root - INFO - lr: 2.2351e-05 gnorm: 1.09 [14:09:54<10:21:09] +[titan] 2025-10-05 12:44:15,253 - root - INFO - step: 23115 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8814 +[titan] 2025-10-05 12:44:15,253 - root - INFO - lr: 2.2342e-05 gnorm: 1.06 [14:10:04<10:20:58] +[titan] 2025-10-05 12:44:26,116 - root - INFO - step: 23120 loss: 2.0783 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 12:44:26,116 - root - INFO - lr: 2.2334e-05 gnorm: 1.03 [14:10:15<10:20:46] +[titan] 2025-10-05 12:44:36,986 - root - INFO - step: 23125 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 12:44:36,986 - root - INFO - lr: 2.2325e-05 gnorm: 1.04 [14:10:26<10:20:35] +[titan] 2025-10-05 12:44:47,859 - root - INFO - step: 23130 loss: 2.1268 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 12:44:47,860 - root - INFO - lr: 2.2316e-05 gnorm: 1.08 [14:10:37<10:20:24] +[titan] 2025-10-05 12:44:58,729 - root - INFO - step: 23135 loss: 2.1048 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 12:44:58,729 - root - INFO - lr: 2.2308e-05 gnorm: 1.10 [14:10:48<10:20:13] +[titan] 2025-10-05 12:45:09,592 - root - INFO - step: 23140 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8194 +[titan] 2025-10-05 12:45:09,592 - root - INFO - lr: 2.2299e-05 gnorm: 1.09 [14:10:59<10:20:02] +[titan] 2025-10-05 12:45:20,444 - root - INFO - step: 23145 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8275 +[titan] 2025-10-05 12:45:20,444 - root - INFO - lr: 2.2290e-05 gnorm: 1.10 [14:11:10<10:19:51] +[titan] 2025-10-05 12:45:29,113 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:45:31,320 - root - INFO - step: 23150 loss: 2.0752 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:45:31,320 - root - INFO - lr: 2.2281e-05 gnorm: 1.05 [14:11:21<10:19:39] +[titan] 2025-10-05 12:45:42,180 - root - INFO - step: 23155 loss: 2.0601 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 12:45:42,181 - root - INFO - lr: 2.2273e-05 gnorm: 1.10 [14:11:31<10:19:28] +[titan] 2025-10-05 12:45:53,058 - root - INFO - step: 23160 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 12:45:53,058 - root - INFO - lr: 2.2264e-05 gnorm: 1.06 [14:11:42<10:19:17] +[titan] 2025-10-05 12:46:03,966 - root - INFO - step: 23165 loss: 1.9940 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 12:46:03,966 - root - INFO - lr: 2.2255e-05 gnorm: 1.07 [14:11:53<10:19:06] +[titan] 2025-10-05 12:46:14,825 - root - INFO - step: 23170 loss: 2.1123 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8673 +[titan] 2025-10-05 12:46:14,825 - root - INFO - lr: 2.2247e-05 gnorm: 1.06 [14:12:04<10:18:55] +[titan] 2025-10-05 12:46:25,704 - root - INFO - step: 23175 loss: 2.1009 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8564 +[titan] 2025-10-05 12:46:25,705 - root - INFO - lr: 2.2238e-05 gnorm: 1.10 [14:12:15<10:18:44] +[titan] 2025-10-05 12:46:36,828 - root - INFO - step: 23180 loss: 2.1002 memory: 118.84GiB(85.28%) tps: 29,460 tflops: 408.71 mfu: 41.33% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8561 +[titan] 2025-10-05 12:46:36,828 - root - INFO - lr: 2.2229e-05 gnorm: 1.09 [14:12:26<10:18:33] +[titan] 2025-10-05 12:46:47,680 - root - INFO - step: 23185 loss: 2.0750 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 12:46:47,680 - root - INFO - lr: 2.2221e-05 gnorm: 1.07 [14:12:37<10:18:22] +[titan] 2025-10-05 12:46:58,544 - root - INFO - step: 23190 loss: 2.1293 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8817 +[titan] 2025-10-05 12:46:58,544 - root - INFO - lr: 2.2212e-05 gnorm: 1.07 [14:12:48<10:18:10] +[titan] 2025-10-05 12:47:09,423 - root - INFO - step: 23195 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 12:47:09,424 - root - INFO - lr: 2.2203e-05 gnorm: 1.07 [14:12:59<10:17:59] +[titan] 2025-10-05 12:47:18,105 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:47:20,285 - root - INFO - step: 23200 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8779 +[titan] 2025-10-05 12:47:20,285 - root - INFO - lr: 2.2195e-05 gnorm: 1.05 [14:13:10<10:17:48] +[titan] 2025-10-05 12:47:31,161 - root - INFO - step: 23205 loss: 2.1046 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8592 +[titan] 2025-10-05 12:47:31,161 - root - INFO - lr: 2.2186e-05 gnorm: 1.07 [14:13:20<10:17:37] +[titan] 2025-10-05 12:47:42,031 - root - INFO - step: 23210 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 12:47:42,031 - root - INFO - lr: 2.2177e-05 gnorm: 1.08 [14:13:31<10:17:26] +[titan] 2025-10-05 12:47:52,877 - root - INFO - step: 23215 loss: 2.1097 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8652 +[titan] 2025-10-05 12:47:52,877 - root - INFO - lr: 2.2169e-05 gnorm: 1.07 [14:13:42<10:17:15] +[titan] 2025-10-05 12:48:03,720 - root - INFO - step: 23220 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:48:03,720 - root - INFO - lr: 2.2160e-05 gnorm: 1.06 [14:13:53<10:17:03] +[titan] 2025-10-05 12:48:14,569 - root - INFO - step: 23225 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8277 +[titan] 2025-10-05 12:48:14,569 - root - INFO - lr: 2.2151e-05 gnorm: 1.07 [14:14:04<10:16:52] +[titan] 2025-10-05 12:48:25,456 - root - INFO - step: 23230 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 12:48:25,456 - root - INFO - lr: 2.2143e-05 gnorm: 1.09 [14:14:15<10:16:41] +[titan] 2025-10-05 12:48:36,322 - root - INFO - step: 23235 loss: 2.0597 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 12:48:36,322 - root - INFO - lr: 2.2134e-05 gnorm: 1.05 [14:14:26<10:16:30] +[titan] 2025-10-05 12:48:47,191 - root - INFO - step: 23240 loss: 2.1079 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 12:48:47,191 - root - INFO - lr: 2.2125e-05 gnorm: 1.06 [14:14:36<10:16:19] +[titan] 2025-10-05 12:48:58,072 - root - INFO - step: 23245 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2471 global_avg_mtp_loss: 1.8758 +[titan] 2025-10-05 12:48:58,072 - root - INFO - lr: 2.2117e-05 gnorm: 1.08 [14:14:47<10:16:08] +[titan] 2025-10-05 12:49:06,748 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:49:08,942 - root - INFO - step: 23250 loss: 2.0918 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8492 +[titan] 2025-10-05 12:49:08,943 - root - INFO - lr: 2.2108e-05 gnorm: 1.10 [14:14:58<10:15:57] +[titan] 2025-10-05 12:49:19,822 - root - INFO - step: 23255 loss: 2.1127 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2463 global_avg_mtp_loss: 1.8665 +[titan] 2025-10-05 12:49:19,822 - root - INFO - lr: 2.2099e-05 gnorm: 1.05 [14:15:09<10:15:45] +[titan] 2025-10-05 12:49:30,722 - root - INFO - step: 23260 loss: 2.1450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8961 +[titan] 2025-10-05 12:49:30,723 - root - INFO - lr: 2.2091e-05 gnorm: 1.10 [14:15:20<10:15:34] +[titan] 2025-10-05 12:49:41,642 - root - INFO - step: 23265 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 12:49:41,642 - root - INFO - lr: 2.2082e-05 gnorm: 1.09 [14:15:31<10:15:23] +[titan] 2025-10-05 12:49:52,513 - root - INFO - step: 23270 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 12:49:52,513 - root - INFO - lr: 2.2073e-05 gnorm: 1.08 [14:15:42<10:15:12] +[titan] 2025-10-05 12:50:03,384 - root - INFO - step: 23275 loss: 2.0549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8161 +[titan] 2025-10-05 12:50:03,384 - root - INFO - lr: 2.2065e-05 gnorm: 1.07 [14:15:53<10:15:01] +[titan] 2025-10-05 12:50:14,264 - root - INFO - step: 23280 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 12:50:14,265 - root - INFO - lr: 2.2056e-05 gnorm: 1.06 [14:16:03<10:14:50] +[titan] 2025-10-05 12:50:25,152 - root - INFO - step: 23285 loss: 2.1398 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2493 global_avg_mtp_loss: 1.8905 +[titan] 2025-10-05 12:50:25,152 - root - INFO - lr: 2.2047e-05 gnorm: 1.05 [14:16:14<10:14:39] +[titan] 2025-10-05 12:50:36,029 - root - INFO - step: 23290 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 12:50:36,029 - root - INFO - lr: 2.2039e-05 gnorm: 1.05 [14:16:25<10:14:28] +[titan] 2025-10-05 12:50:46,933 - root - INFO - step: 23295 loss: 2.1267 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2472 global_avg_mtp_loss: 1.8795 +[titan] 2025-10-05 12:50:46,933 - root - INFO - lr: 2.2030e-05 gnorm: 1.10 [14:16:36<10:14:16] +[titan] 2025-10-05 12:50:55,611 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:50:57,805 - root - INFO - step: 23300 loss: 2.1017 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:50:57,805 - root - INFO - lr: 2.2021e-05 gnorm: 1.10 [14:16:47<10:14:05] +[titan] 2025-10-05 12:51:08,673 - root - INFO - step: 23305 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 12:51:08,673 - root - INFO - lr: 2.2013e-05 gnorm: 1.06 [14:16:58<10:13:54] +[titan] 2025-10-05 12:51:19,553 - root - INFO - step: 23310 loss: 2.0851 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8433 +[titan] 2025-10-05 12:51:19,553 - root - INFO - lr: 2.2004e-05 gnorm: 1.07 [14:17:09<10:13:43] +[titan] 2025-10-05 12:51:30,434 - root - INFO - step: 23315 loss: 2.0776 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8361 +[titan] 2025-10-05 12:51:30,435 - root - INFO - lr: 2.1995e-05 gnorm: 1.07 [14:17:20<10:13:32] +[titan] 2025-10-05 12:51:41,297 - root - INFO - step: 23320 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:51:41,298 - root - INFO - lr: 2.1987e-05 gnorm: 1.06 [14:17:31<10:13:21] +[titan] 2025-10-05 12:51:52,171 - root - INFO - step: 23325 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8412 +[titan] 2025-10-05 12:51:52,171 - root - INFO - lr: 2.1978e-05 gnorm: 1.06 [14:17:41<10:13:10] +[titan] 2025-10-05 12:52:03,034 - root - INFO - step: 23330 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 12:52:03,034 - root - INFO - lr: 2.1969e-05 gnorm: 1.06 [14:17:52<10:12:58] +[titan] 2025-10-05 12:52:13,904 - root - INFO - step: 23335 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8247 +[titan] 2025-10-05 12:52:13,905 - root - INFO - lr: 2.1961e-05 gnorm: 1.08 [14:18:03<10:12:47] +[titan] 2025-10-05 12:52:24,765 - root - INFO - step: 23340 loss: 2.1134 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8684 +[titan] 2025-10-05 12:52:24,765 - root - INFO - lr: 2.1952e-05 gnorm: 1.05 [14:18:14<10:12:36] +[titan] 2025-10-05 12:52:35,613 - root - INFO - step: 23345 loss: 2.0713 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 12:52:35,613 - root - INFO - lr: 2.1944e-05 gnorm: 1.08 [14:18:25<10:12:25] +[titan] 2025-10-05 12:52:44,297 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:52:46,481 - root - INFO - step: 23350 loss: 2.0693 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 12:52:46,481 - root - INFO - lr: 2.1935e-05 gnorm: 1.06 [14:18:36<10:12:14] +[titan] 2025-10-05 12:52:57,341 - root - INFO - step: 23355 loss: 2.1206 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8730 +[titan] 2025-10-05 12:52:57,341 - root - INFO - lr: 2.1926e-05 gnorm: 1.09 [14:18:47<10:12:03] +[titan] 2025-10-05 12:53:08,214 - root - INFO - step: 23360 loss: 2.1012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8572 +[titan] 2025-10-05 12:53:08,214 - root - INFO - lr: 2.1918e-05 gnorm: 1.05 [14:18:57<10:11:51] +[titan] 2025-10-05 12:53:19,079 - root - INFO - step: 23365 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8477 +[titan] 2025-10-05 12:53:19,079 - root - INFO - lr: 2.1909e-05 gnorm: 1.11 [14:19:08<10:11:40] +[titan] 2025-10-05 12:53:29,939 - root - INFO - step: 23370 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8916 +[titan] 2025-10-05 12:53:29,939 - root - INFO - lr: 2.1900e-05 gnorm: 1.11 [14:19:19<10:11:29] +[titan] 2025-10-05 12:53:40,836 - root - INFO - step: 23375 loss: 2.0922 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 12:53:40,836 - root - INFO - lr: 2.1892e-05 gnorm: 1.13 [14:19:30<10:11:18] +[titan] 2025-10-05 12:53:51,725 - root - INFO - step: 23380 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 12:53:51,725 - root - INFO - lr: 2.1883e-05 gnorm: 1.07 [14:19:41<10:11:07] +[titan] 2025-10-05 12:54:02,631 - root - INFO - step: 23385 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 12:54:02,631 - root - INFO - lr: 2.1874e-05 gnorm: 1.07 [14:19:52<10:10:56] +[titan] 2025-10-05 12:54:13,542 - root - INFO - step: 23390 loss: 2.0791 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 12:54:13,543 - root - INFO - lr: 2.1866e-05 gnorm: 1.12 [14:20:03<10:10:45] +[titan] 2025-10-05 12:54:24,401 - root - INFO - step: 23395 loss: 2.0662 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 12:54:24,401 - root - INFO - lr: 2.1857e-05 gnorm: 1.08 [14:20:14<10:10:33] +[titan] 2025-10-05 12:54:33,071 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:54:35,256 - root - INFO - step: 23400 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 12:54:35,256 - root - INFO - lr: 2.1848e-05 gnorm: 1.09 [14:20:24<10:10:22] +[titan] 2025-10-05 12:54:46,105 - root - INFO - step: 23405 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8085 +[titan] 2025-10-05 12:54:46,105 - root - INFO - lr: 2.1840e-05 gnorm: 1.03 [14:20:35<10:10:11] +[titan] 2025-10-05 12:54:56,966 - root - INFO - step: 23410 loss: 2.1078 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8644 +[titan] 2025-10-05 12:54:56,966 - root - INFO - lr: 2.1831e-05 gnorm: 1.09 [14:20:46<10:10:00] +[titan] 2025-10-05 12:55:07,809 - root - INFO - step: 23415 loss: 2.1230 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2483 global_avg_mtp_loss: 1.8747 +[titan] 2025-10-05 12:55:07,810 - root - INFO - lr: 2.1823e-05 gnorm: 1.09 [14:20:57<10:09:49] +[titan] 2025-10-05 12:55:18,648 - root - INFO - step: 23420 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8498 +[titan] 2025-10-05 12:55:18,648 - root - INFO - lr: 2.1814e-05 gnorm: 1.09 [14:21:08<10:09:38] +[titan] 2025-10-05 12:55:29,531 - root - INFO - step: 23425 loss: 2.1312 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8837 +[titan] 2025-10-05 12:55:29,531 - root - INFO - lr: 2.1805e-05 gnorm: 1.07 [14:21:19<10:09:27] +[titan] 2025-10-05 12:55:40,423 - root - INFO - step: 23430 loss: 2.0740 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8334 +[titan] 2025-10-05 12:55:40,423 - root - INFO - lr: 2.1797e-05 gnorm: 1.07 [14:21:30<10:09:15] +[titan] 2025-10-05 12:55:51,286 - root - INFO - step: 23435 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 12:55:51,286 - root - INFO - lr: 2.1788e-05 gnorm: 1.05 [14:21:40<10:09:04] +[titan] 2025-10-05 12:56:02,131 - root - INFO - step: 23440 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8791 +[titan] 2025-10-05 12:56:02,131 - root - INFO - lr: 2.1779e-05 gnorm: 1.05 [14:21:51<10:08:53] +[titan] 2025-10-05 12:56:12,982 - root - INFO - step: 23445 loss: 2.0777 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 12:56:12,982 - root - INFO - lr: 2.1771e-05 gnorm: 1.07 [14:22:02<10:08:42] +[titan] 2025-10-05 12:56:21,657 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:56:23,837 - root - INFO - step: 23450 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8699 +[titan] 2025-10-05 12:56:23,837 - root - INFO - lr: 2.1762e-05 gnorm: 1.03 [14:22:13<10:08:31] +[titan] 2025-10-05 12:56:34,722 - root - INFO - step: 23455 loss: 2.1120 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 12:56:34,722 - root - INFO - lr: 2.1753e-05 gnorm: 1.08 [14:22:24<10:08:20] +[titan] 2025-10-05 12:56:45,579 - root - INFO - step: 23460 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 12:56:45,579 - root - INFO - lr: 2.1745e-05 gnorm: 1.05 [14:22:35<10:08:09] +[titan] 2025-10-05 12:56:56,409 - root - INFO - step: 23465 loss: 2.0982 memory: 118.84GiB(85.28%) tps: 30,257 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2440 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 12:56:56,409 - root - INFO - lr: 2.1736e-05 gnorm: 1.07 [14:22:46<10:07:57] +[titan] 2025-10-05 12:57:07,229 - root - INFO - step: 23470 loss: 2.0950 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 12:57:07,229 - root - INFO - lr: 2.1728e-05 gnorm: 1.08 [14:22:56<10:07:46] +[titan] 2025-10-05 12:57:18,049 - root - INFO - step: 23475 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2492 global_avg_mtp_loss: 1.8857 +[titan] 2025-10-05 12:57:18,049 - root - INFO - lr: 2.1719e-05 gnorm: 1.09 [14:23:07<10:07:35] +[titan] 2025-10-05 12:57:28,860 - root - INFO - step: 23480 loss: 2.0930 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8511 +[titan] 2025-10-05 12:57:28,860 - root - INFO - lr: 2.1710e-05 gnorm: 1.07 [14:23:18<10:07:24] +[titan] 2025-10-05 12:57:39,712 - root - INFO - step: 23485 loss: 2.1212 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8737 +[titan] 2025-10-05 12:57:39,712 - root - INFO - lr: 2.1702e-05 gnorm: 1.09 [14:23:29<10:07:13] +[titan] 2025-10-05 12:57:50,568 - root - INFO - step: 23490 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 12:57:50,568 - root - INFO - lr: 2.1693e-05 gnorm: 1.06 [14:23:40<10:07:01] +[titan] 2025-10-05 12:58:01,418 - root - INFO - step: 23495 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8276 +[titan] 2025-10-05 12:58:01,418 - root - INFO - lr: 2.1684e-05 gnorm: 1.13 [14:23:51<10:06:50] +[titan] 2025-10-05 12:58:10,087 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 12:58:12,263 - root - INFO - step: 23500 loss: 2.0793 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 12:58:12,263 - root - INFO - lr: 2.1676e-05 gnorm: 1.05 [14:24:01<10:06:39] +[titan] 2025-10-05 12:58:23,119 - root - INFO - step: 23505 loss: 2.1530 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9016 +[titan] 2025-10-05 12:58:23,119 - root - INFO - lr: 2.1667e-05 gnorm: 1.09 [14:24:12<10:06:28] +[titan] 2025-10-05 12:58:33,963 - root - INFO - step: 23510 loss: 2.0967 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 12:58:33,964 - root - INFO - lr: 2.1659e-05 gnorm: 1.07 [14:24:23<10:06:17] +[titan] 2025-10-05 12:58:44,834 - root - INFO - step: 23515 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 12:58:44,834 - root - INFO - lr: 2.1650e-05 gnorm: 1.06 [14:24:34<10:06:06] +[titan] 2025-10-05 12:58:55,708 - root - INFO - step: 23520 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8363 +[titan] 2025-10-05 12:58:55,708 - root - INFO - lr: 2.1641e-05 gnorm: 1.05 [14:24:45<10:05:55] +[titan] 2025-10-05 12:59:06,592 - root - INFO - step: 23525 loss: 2.0619 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 12:59:06,592 - root - INFO - lr: 2.1633e-05 gnorm: 1.06 [14:24:56<10:05:43] +[titan] 2025-10-05 12:59:17,459 - root - INFO - step: 23530 loss: 2.1419 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8941 +[titan] 2025-10-05 12:59:17,459 - root - INFO - lr: 2.1624e-05 gnorm: 1.08 [14:25:07<10:05:32] +[titan] 2025-10-05 12:59:28,332 - root - INFO - step: 23535 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8599 +[titan] 2025-10-05 12:59:28,332 - root - INFO - lr: 2.1616e-05 gnorm: 1.07 [14:25:18<10:05:21] +[titan] 2025-10-05 12:59:39,189 - root - INFO - step: 23540 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 12:59:39,190 - root - INFO - lr: 2.1607e-05 gnorm: 1.06 [14:25:28<10:05:10] +[titan] 2025-10-05 12:59:50,087 - root - INFO - step: 23545 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8219 +[titan] 2025-10-05 12:59:50,087 - root - INFO - lr: 2.1598e-05 gnorm: 1.07 [14:25:39<10:04:59] +[titan] 2025-10-05 12:59:58,866 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:00:01,039 - root - INFO - step: 23550 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8439 +[titan] 2025-10-05 13:00:01,039 - root - INFO - lr: 2.1590e-05 gnorm: 1.13 [14:25:50<10:04:48] +[titan] 2025-10-05 13:00:05,580 - root - INFO - Dumping profiler traces at step 23552 +[titan] 2025-10-05 13:00:05,628 - root - INFO - Finished dumping profiler traces in 0.05 seconds +[titan] 2025-10-05 13:00:12,148 - root - INFO - step: 23555 loss: 2.0620 memory: 118.84GiB(85.28%) tps: 29,498 tflops: 409.24 mfu: 41.38% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 13:00:12,148 - root - INFO - lr: 2.1581e-05 gnorm: 1.07 [14:26:01<10:04:37] +[titan] 2025-10-05 13:00:23,000 - root - INFO - step: 23560 loss: 2.1733 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2532 global_avg_mtp_loss: 1.9201 +[titan] 2025-10-05 13:00:23,000 - root - INFO - lr: 2.1572e-05 gnorm: 1.11 [14:26:12<10:04:26] +[titan] 2025-10-05 13:00:33,832 - root - INFO - step: 23565 loss: 2.1010 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.69 mfu: 42.44% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 13:00:33,833 - root - INFO - lr: 2.1564e-05 gnorm: 1.09 [14:26:23<10:04:15] +[titan] 2025-10-05 13:00:44,700 - root - INFO - step: 23570 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 13:00:44,701 - root - INFO - lr: 2.1555e-05 gnorm: 1.03 [14:26:34<10:04:03] +[titan] 2025-10-05 13:00:55,558 - root - INFO - step: 23575 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 13:00:55,558 - root - INFO - lr: 2.1547e-05 gnorm: 1.08 [14:26:45<10:03:52] +[titan] 2025-10-05 13:01:06,406 - root - INFO - step: 23580 loss: 2.1114 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8663 +[titan] 2025-10-05 13:01:06,406 - root - INFO - lr: 2.1538e-05 gnorm: 1.10 [14:26:56<10:03:41] +[titan] 2025-10-05 13:01:17,310 - root - INFO - step: 23585 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 13:01:17,310 - root - INFO - lr: 2.1529e-05 gnorm: 1.11 [14:27:06<10:03:30] +[titan] 2025-10-05 13:01:28,160 - root - INFO - step: 23590 loss: 2.1064 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:01:28,161 - root - INFO - lr: 2.1521e-05 gnorm: 1.07 [14:27:17<10:03:19] +[titan] 2025-10-05 13:01:39,026 - root - INFO - step: 23595 loss: 2.1273 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8800 +[titan] 2025-10-05 13:01:39,026 - root - INFO - lr: 2.1512e-05 gnorm: 1.08 [14:27:28<10:03:08] +[titan] 2025-10-05 13:01:47,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:01:49,899 - root - INFO - step: 23600 loss: 2.0941 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 13:01:49,899 - root - INFO - lr: 2.1504e-05 gnorm: 1.10 [14:27:39<10:02:57] +[titan] 2025-10-05 13:02:00,769 - root - INFO - step: 23605 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8391 +[titan] 2025-10-05 13:02:00,769 - root - INFO - lr: 2.1495e-05 gnorm: 1.04 [14:27:50<10:02:45] +[titan] 2025-10-05 13:02:11,618 - root - INFO - step: 23610 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 13:02:11,618 - root - INFO - lr: 2.1486e-05 gnorm: 1.09 [14:28:01<10:02:34] +[titan] 2025-10-05 13:02:22,509 - root - INFO - step: 23615 loss: 2.1141 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8690 +[titan] 2025-10-05 13:02:22,509 - root - INFO - lr: 2.1478e-05 gnorm: 1.07 [14:28:12<10:02:23] +[titan] 2025-10-05 13:02:33,369 - root - INFO - step: 23620 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 13:02:33,369 - root - INFO - lr: 2.1469e-05 gnorm: 1.06 [14:28:23<10:02:12] +[titan] 2025-10-05 13:02:44,239 - root - INFO - step: 23625 loss: 2.1327 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2480 global_avg_mtp_loss: 1.8846 +[titan] 2025-10-05 13:02:44,239 - root - INFO - lr: 2.1461e-05 gnorm: 1.08 [14:28:33<10:02:01] +[titan] 2025-10-05 13:02:55,117 - root - INFO - step: 23630 loss: 2.0120 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:02:55,117 - root - INFO - lr: 2.1452e-05 gnorm: 1.06 [14:28:44<10:01:50] +[titan] 2025-10-05 13:03:05,938 - root - INFO - step: 23635 loss: 2.1281 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8799 +[titan] 2025-10-05 13:03:05,938 - root - INFO - lr: 2.1443e-05 gnorm: 1.04 [14:28:55<10:01:38] +[titan] 2025-10-05 13:03:16,775 - root - INFO - step: 23640 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 13:03:16,775 - root - INFO - lr: 2.1435e-05 gnorm: 1.06 [14:29:06<10:01:27] +[titan] 2025-10-05 13:03:27,645 - root - INFO - step: 23645 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8228 +[titan] 2025-10-05 13:03:27,645 - root - INFO - lr: 2.1426e-05 gnorm: 1.08 [14:29:17<10:01:16] +[titan] 2025-10-05 13:03:36,314 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:03:38,490 - root - INFO - step: 23650 loss: 2.0936 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 13:03:38,491 - root - INFO - lr: 2.1418e-05 gnorm: 1.14 [14:29:28<10:01:05] +[titan] 2025-10-05 13:03:49,367 - root - INFO - step: 23655 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 13:03:49,367 - root - INFO - lr: 2.1409e-05 gnorm: 1.07 [14:29:39<10:00:54] +[titan] 2025-10-05 13:04:00,220 - root - INFO - step: 23660 loss: 2.0457 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 13:04:00,220 - root - INFO - lr: 2.1400e-05 gnorm: 1.05 [14:29:49<10:00:43] +[titan] 2025-10-05 13:04:11,080 - root - INFO - step: 23665 loss: 2.0569 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:04:11,080 - root - INFO - lr: 2.1392e-05 gnorm: 1.05 [14:30:00<10:00:32] +[titan] 2025-10-05 13:04:21,931 - root - INFO - step: 23670 loss: 2.0987 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8542 +[titan] 2025-10-05 13:04:21,931 - root - INFO - lr: 2.1383e-05 gnorm: 1.08 [14:30:11<10:00:20] +[titan] 2025-10-05 13:04:32,799 - root - INFO - step: 23675 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:04:32,799 - root - INFO - lr: 2.1375e-05 gnorm: 1.09 [14:30:22<10:00:09] +[titan] 2025-10-05 13:04:43,687 - root - INFO - step: 23680 loss: 2.0828 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8405 +[titan] 2025-10-05 13:04:43,687 - root - INFO - lr: 2.1366e-05 gnorm: 1.09 [14:30:33< 9:59:58] +[titan] 2025-10-05 13:04:54,557 - root - INFO - step: 23685 loss: 2.0985 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8546 +[titan] 2025-10-05 13:04:54,557 - root - INFO - lr: 2.1358e-05 gnorm: 1.06 [14:30:44< 9:59:47] +[titan] 2025-10-05 13:05:05,423 - root - INFO - step: 23690 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8688 +[titan] 2025-10-05 13:05:05,423 - root - INFO - lr: 2.1349e-05 gnorm: 1.11 [14:30:55< 9:59:36] +[titan] 2025-10-05 13:05:16,292 - root - INFO - step: 23695 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 13:05:16,292 - root - INFO - lr: 2.1340e-05 gnorm: 1.07 [14:31:05< 9:59:25] +[titan] 2025-10-05 13:05:24,969 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:05:27,152 - root - INFO - step: 23700 loss: 2.0847 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8437 +[titan] 2025-10-05 13:05:27,152 - root - INFO - lr: 2.1332e-05 gnorm: 1.06 [14:31:16< 9:59:14] +[titan] 2025-10-05 13:05:38,037 - root - INFO - step: 23705 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 13:05:38,037 - root - INFO - lr: 2.1323e-05 gnorm: 1.07 [14:31:27< 9:59:02] +[titan] 2025-10-05 13:05:48,993 - root - INFO - step: 23710 loss: 2.0935 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.96 mfu: 41.96% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8500 +[titan] 2025-10-05 13:05:48,993 - root - INFO - lr: 2.1315e-05 gnorm: 1.06 [14:31:38< 9:58:51] +[titan] 2025-10-05 13:05:59,853 - root - INFO - step: 23715 loss: 2.0738 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8333 +[titan] 2025-10-05 13:05:59,853 - root - INFO - lr: 2.1306e-05 gnorm: 1.12 [14:31:49< 9:58:40] +[titan] 2025-10-05 13:06:10,728 - root - INFO - step: 23720 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8063 +[titan] 2025-10-05 13:06:10,728 - root - INFO - lr: 2.1297e-05 gnorm: 1.05 [14:32:00< 9:58:29] +[titan] 2025-10-05 13:06:21,603 - root - INFO - step: 23725 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8106 +[titan] 2025-10-05 13:06:21,603 - root - INFO - lr: 2.1289e-05 gnorm: 1.04 [14:32:11< 9:58:18] +[titan] 2025-10-05 13:06:32,482 - root - INFO - step: 23730 loss: 2.0312 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 13:06:32,482 - root - INFO - lr: 2.1280e-05 gnorm: 1.09 [14:32:22< 9:58:07] +[titan] 2025-10-05 13:06:43,351 - root - INFO - step: 23735 loss: 2.0992 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8553 +[titan] 2025-10-05 13:06:43,351 - root - INFO - lr: 2.1272e-05 gnorm: 1.09 [14:32:33< 9:57:56] +[titan] 2025-10-05 13:06:54,243 - root - INFO - step: 23740 loss: 2.0278 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 13:06:54,243 - root - INFO - lr: 2.1263e-05 gnorm: 1.08 [14:32:43< 9:57:45] +[titan] 2025-10-05 13:07:05,147 - root - INFO - step: 23745 loss: 2.0481 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:07:05,147 - root - INFO - lr: 2.1255e-05 gnorm: 1.08 [14:32:54< 9:57:33] +[titan] 2025-10-05 13:07:13,827 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:07:16,019 - root - INFO - step: 23750 loss: 2.0022 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 13:07:16,019 - root - INFO - lr: 2.1246e-05 gnorm: 1.06 [14:33:05< 9:57:22] +[titan] 2025-10-05 13:07:26,891 - root - INFO - step: 23755 loss: 2.0412 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 13:07:26,891 - root - INFO - lr: 2.1237e-05 gnorm: 1.10 [14:33:16< 9:57:11] +[titan] 2025-10-05 13:07:37,752 - root - INFO - step: 23760 loss: 2.1055 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8617 +[titan] 2025-10-05 13:07:37,753 - root - INFO - lr: 2.1229e-05 gnorm: 1.10 [14:33:27< 9:57:00] +[titan] 2025-10-05 13:07:48,618 - root - INFO - step: 23765 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8288 +[titan] 2025-10-05 13:07:48,618 - root - INFO - lr: 2.1220e-05 gnorm: 1.07 [14:33:38< 9:56:49] +[titan] 2025-10-05 13:07:59,505 - root - INFO - step: 23770 loss: 2.1024 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:07:59,505 - root - INFO - lr: 2.1212e-05 gnorm: 1.10 [14:33:49< 9:56:38] +[titan] 2025-10-05 13:08:10,407 - root - INFO - step: 23775 loss: 2.1053 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8605 +[titan] 2025-10-05 13:08:10,408 - root - INFO - lr: 2.1203e-05 gnorm: 1.07 [14:34:00< 9:56:27] +[titan] 2025-10-05 13:08:21,270 - root - INFO - step: 23780 loss: 2.0507 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 13:08:21,270 - root - INFO - lr: 2.1195e-05 gnorm: 1.12 [14:34:10< 9:56:16] +[titan] 2025-10-05 13:08:32,141 - root - INFO - step: 23785 loss: 2.0872 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:08:32,141 - root - INFO - lr: 2.1186e-05 gnorm: 1.07 [14:34:21< 9:56:04] +[titan] 2025-10-05 13:08:43,013 - root - INFO - step: 23790 loss: 2.0543 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8158 +[titan] 2025-10-05 13:08:43,013 - root - INFO - lr: 2.1177e-05 gnorm: 1.08 [14:34:32< 9:55:53] +[titan] 2025-10-05 13:08:53,898 - root - INFO - step: 23795 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 13:08:53,898 - root - INFO - lr: 2.1169e-05 gnorm: 1.08 [14:34:43< 9:55:42] +[titan] 2025-10-05 13:09:02,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:09:04,767 - root - INFO - step: 23800 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8590 +[titan] 2025-10-05 13:09:04,767 - root - INFO - lr: 2.1160e-05 gnorm: 1.07 [14:34:54< 9:55:31] +[titan] 2025-10-05 13:09:15,675 - root - INFO - step: 23805 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:09:15,675 - root - INFO - lr: 2.1152e-05 gnorm: 1.07 [14:35:05< 9:55:20] +[titan] 2025-10-05 13:09:26,546 - root - INFO - step: 23810 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2458 global_avg_mtp_loss: 1.8689 +[titan] 2025-10-05 13:09:26,546 - root - INFO - lr: 2.1143e-05 gnorm: 1.06 [14:35:16< 9:55:09] +[titan] 2025-10-05 13:09:37,416 - root - INFO - step: 23815 loss: 2.0689 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:09:37,416 - root - INFO - lr: 2.1135e-05 gnorm: 1.04 [14:35:27< 9:54:58] +[titan] 2025-10-05 13:09:48,302 - root - INFO - step: 23820 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 13:09:48,302 - root - INFO - lr: 2.1126e-05 gnorm: 1.05 [14:35:37< 9:54:47] +[titan] 2025-10-05 13:09:59,200 - root - INFO - step: 23825 loss: 2.1145 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8692 +[titan] 2025-10-05 13:09:59,200 - root - INFO - lr: 2.1118e-05 gnorm: 1.10 [14:35:48< 9:54:35] +[titan] 2025-10-05 13:10:10,087 - root - INFO - step: 23830 loss: 2.0878 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8449 +[titan] 2025-10-05 13:10:10,087 - root - INFO - lr: 2.1109e-05 gnorm: 1.07 [14:35:59< 9:54:24] +[titan] 2025-10-05 13:10:20,968 - root - INFO - step: 23835 loss: 2.1275 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8802 +[titan] 2025-10-05 13:10:20,968 - root - INFO - lr: 2.1100e-05 gnorm: 1.11 [14:36:10< 9:54:13] +[titan] 2025-10-05 13:10:31,877 - root - INFO - step: 23840 loss: 2.0468 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 13:10:31,877 - root - INFO - lr: 2.1092e-05 gnorm: 1.07 [14:36:21< 9:54:02] +[titan] 2025-10-05 13:10:42,750 - root - INFO - step: 23845 loss: 2.1299 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8826 +[titan] 2025-10-05 13:10:42,751 - root - INFO - lr: 2.1083e-05 gnorm: 1.08 [14:36:32< 9:53:51] +[titan] 2025-10-05 13:10:51,448 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:10:53,641 - root - INFO - step: 23850 loss: 2.0254 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 13:10:53,642 - root - INFO - lr: 2.1075e-05 gnorm: 1.07 [14:36:43< 9:53:40] +[titan] 2025-10-05 13:11:04,523 - root - INFO - step: 23855 loss: 2.0986 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8551 +[titan] 2025-10-05 13:11:04,523 - root - INFO - lr: 2.1066e-05 gnorm: 1.09 [14:36:54< 9:53:29] +[titan] 2025-10-05 13:11:15,407 - root - INFO - step: 23860 loss: 2.0857 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8430 +[titan] 2025-10-05 13:11:15,407 - root - INFO - lr: 2.1058e-05 gnorm: 1.07 [14:37:05< 9:53:18] +[titan] 2025-10-05 13:11:26,299 - root - INFO - step: 23865 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8171 +[titan] 2025-10-05 13:11:26,299 - root - INFO - lr: 2.1049e-05 gnorm: 1.08 [14:37:15< 9:53:06] +[titan] 2025-10-05 13:11:37,198 - root - INFO - step: 23870 loss: 2.1119 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:11:37,198 - root - INFO - lr: 2.1041e-05 gnorm: 1.10 [14:37:26< 9:52:55] +[titan] 2025-10-05 13:11:48,068 - root - INFO - step: 23875 loss: 2.0789 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 13:11:48,068 - root - INFO - lr: 2.1032e-05 gnorm: 1.03 [14:37:37< 9:52:44] +[titan] 2025-10-05 13:11:58,937 - root - INFO - step: 23880 loss: 2.1572 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9068 +[titan] 2025-10-05 13:11:58,937 - root - INFO - lr: 2.1023e-05 gnorm: 1.10 [14:37:48< 9:52:33] +[titan] 2025-10-05 13:12:09,818 - root - INFO - step: 23885 loss: 2.1050 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:12:09,818 - root - INFO - lr: 2.1015e-05 gnorm: 1.07 [14:37:59< 9:52:22] +[titan] 2025-10-05 13:12:20,691 - root - INFO - step: 23890 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 13:12:20,691 - root - INFO - lr: 2.1006e-05 gnorm: 1.04 [14:38:10< 9:52:11] +[titan] 2025-10-05 13:12:31,575 - root - INFO - step: 23895 loss: 2.0886 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8459 +[titan] 2025-10-05 13:12:31,575 - root - INFO - lr: 2.0998e-05 gnorm: 1.07 [14:38:21< 9:52:00] +[titan] 2025-10-05 13:12:40,266 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:12:42,455 - root - INFO - step: 23900 loss: 2.1280 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8806 +[titan] 2025-10-05 13:12:42,455 - root - INFO - lr: 2.0989e-05 gnorm: 1.07 [14:38:32< 9:51:48] +[titan] 2025-10-05 13:12:53,357 - root - INFO - step: 23905 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 13:12:53,357 - root - INFO - lr: 2.0981e-05 gnorm: 1.10 [14:38:43< 9:51:37] +[titan] 2025-10-05 13:13:04,239 - root - INFO - step: 23910 loss: 2.0909 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8487 +[titan] 2025-10-05 13:13:04,239 - root - INFO - lr: 2.0972e-05 gnorm: 1.07 [14:38:53< 9:51:26] +[titan] 2025-10-05 13:13:15,113 - root - INFO - step: 23915 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 13:13:15,113 - root - INFO - lr: 2.0964e-05 gnorm: 1.10 [14:39:04< 9:51:15] +[titan] 2025-10-05 13:13:25,979 - root - INFO - step: 23920 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 13:13:25,980 - root - INFO - lr: 2.0955e-05 gnorm: 1.05 [14:39:15< 9:51:04] +[titan] 2025-10-05 13:13:36,839 - root - INFO - step: 23925 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8255 +[titan] 2025-10-05 13:13:36,839 - root - INFO - lr: 2.0947e-05 gnorm: 1.08 [14:39:26< 9:50:53] +[titan] 2025-10-05 13:13:47,718 - root - INFO - step: 23930 loss: 2.0539 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 13:13:47,718 - root - INFO - lr: 2.0938e-05 gnorm: 1.07 [14:39:37< 9:50:42] +[titan] 2025-10-05 13:13:58,659 - root - INFO - step: 23935 loss: 2.1295 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8828 +[titan] 2025-10-05 13:13:58,659 - root - INFO - lr: 2.0929e-05 gnorm: 1.09 [14:39:48< 9:50:31] +[titan] 2025-10-05 13:14:09,537 - root - INFO - step: 23940 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7883 +[titan] 2025-10-05 13:14:09,537 - root - INFO - lr: 2.0921e-05 gnorm: 1.06 [14:39:59< 9:50:20] +[titan] 2025-10-05 13:14:20,423 - root - INFO - step: 23945 loss: 2.0391 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8022 +[titan] 2025-10-05 13:14:20,423 - root - INFO - lr: 2.0912e-05 gnorm: 1.08 [14:40:10< 9:50:08] +[titan] 2025-10-05 13:14:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:14:31,294 - root - INFO - step: 23950 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8110 +[titan] 2025-10-05 13:14:31,294 - root - INFO - lr: 2.0904e-05 gnorm: 1.02 [14:40:20< 9:49:57] +[titan] 2025-10-05 13:14:42,149 - root - INFO - step: 23955 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:14:42,149 - root - INFO - lr: 2.0895e-05 gnorm: 1.11 [14:40:31< 9:49:46] +[titan] 2025-10-05 13:14:53,021 - root - INFO - step: 23960 loss: 2.0544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 13:14:53,021 - root - INFO - lr: 2.0887e-05 gnorm: 1.07 [14:40:42< 9:49:35] +[titan] 2025-10-05 13:15:03,924 - root - INFO - step: 23965 loss: 2.0186 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 13:15:03,925 - root - INFO - lr: 2.0878e-05 gnorm: 1.08 [14:40:53< 9:49:24] +[titan] 2025-10-05 13:15:14,778 - root - INFO - step: 23970 loss: 2.0244 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 13:15:14,778 - root - INFO - lr: 2.0870e-05 gnorm: 1.10 [14:41:04< 9:49:13] +[titan] 2025-10-05 13:15:25,658 - root - INFO - step: 23975 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:15:25,658 - root - INFO - lr: 2.0861e-05 gnorm: 1.05 [14:41:15< 9:49:02] +[titan] 2025-10-05 13:15:36,526 - root - INFO - step: 23980 loss: 2.1043 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8593 +[titan] 2025-10-05 13:15:36,526 - root - INFO - lr: 2.0853e-05 gnorm: 1.11 [14:41:26< 9:48:50] +[titan] 2025-10-05 13:15:47,390 - root - INFO - step: 23985 loss: 2.1519 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2494 global_avg_mtp_loss: 1.9026 +[titan] 2025-10-05 13:15:47,390 - root - INFO - lr: 2.0844e-05 gnorm: 1.10 [14:41:37< 9:48:39] +[titan] 2025-10-05 13:15:58,289 - root - INFO - step: 23990 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 13:15:58,289 - root - INFO - lr: 2.0836e-05 gnorm: 1.06 [14:41:47< 9:48:28] +[titan] 2025-10-05 13:16:09,157 - root - INFO - step: 23995 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8039 +[titan] 2025-10-05 13:16:09,157 - root - INFO - lr: 2.0827e-05 gnorm: 1.11 [14:41:58< 9:48:17] +[titan] 2025-10-05 13:16:17,876 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:16:20,053 - root - INFO - step: 24000 loss: 2.0037 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:16:20,053 - root - INFO - lr: 2.0819e-05 gnorm: 1.08 [14:42:09< 9:48:06] +[titan] 2025-10-05 13:16:30,898 - root - INFO - step: 24005 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 13:16:30,899 - root - INFO - lr: 2.0810e-05 gnorm: 1.07 [14:42:20< 9:47:55] +[titan] 2025-10-05 13:16:41,756 - root - INFO - step: 24010 loss: 2.0903 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 13:16:41,757 - root - INFO - lr: 2.0802e-05 gnorm: 1.05 [14:42:31< 9:47:44] +[titan] 2025-10-05 13:16:52,618 - root - INFO - step: 24015 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8347 +[titan] 2025-10-05 13:16:52,618 - root - INFO - lr: 2.0793e-05 gnorm: 1.12 [14:42:42< 9:47:33] +[titan] 2025-10-05 13:17:03,489 - root - INFO - step: 24020 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:17:03,489 - root - INFO - lr: 2.0785e-05 gnorm: 1.10 [14:42:53< 9:47:21] +[titan] 2025-10-05 13:17:14,356 - root - INFO - step: 24025 loss: 2.1091 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8630 +[titan] 2025-10-05 13:17:14,356 - root - INFO - lr: 2.0776e-05 gnorm: 1.08 [14:43:04< 9:47:10] +[titan] 2025-10-05 13:17:25,293 - root - INFO - step: 24030 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 13:17:25,293 - root - INFO - lr: 2.0767e-05 gnorm: 1.14 [14:43:14< 9:46:59] +[titan] 2025-10-05 13:17:36,153 - root - INFO - step: 24035 loss: 2.0553 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8156 +[titan] 2025-10-05 13:17:36,153 - root - INFO - lr: 2.0759e-05 gnorm: 1.07 [14:43:25< 9:46:48] +[titan] 2025-10-05 13:17:47,022 - root - INFO - step: 24040 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 13:17:47,022 - root - INFO - lr: 2.0750e-05 gnorm: 1.08 [14:43:36< 9:46:37] +[titan] 2025-10-05 13:17:57,898 - root - INFO - step: 24045 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 13:17:57,899 - root - INFO - lr: 2.0742e-05 gnorm: 1.07 [14:43:47< 9:46:26] +[titan] 2025-10-05 13:18:06,589 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:18:08,771 - root - INFO - step: 24050 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:18:08,771 - root - INFO - lr: 2.0733e-05 gnorm: 1.05 [14:43:58< 9:46:15] +[titan] 2025-10-05 13:18:19,609 - root - INFO - step: 24055 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 13:18:19,609 - root - INFO - lr: 2.0725e-05 gnorm: 1.10 [14:44:09< 9:46:04] +[titan] 2025-10-05 13:18:30,457 - root - INFO - step: 24060 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7801 +[titan] 2025-10-05 13:18:30,457 - root - INFO - lr: 2.0716e-05 gnorm: 1.12 [14:44:20< 9:45:52] +[titan] 2025-10-05 13:18:39,433 - root - INFO - Dumping profiler traces at step 24064 +[titan] 2025-10-05 13:18:39,475 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:18:41,660 - root - INFO - step: 24065 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,250 tflops: 405.80 mfu: 41.03% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7997 +[titan] 2025-10-05 13:18:41,660 - root - INFO - lr: 2.0708e-05 gnorm: 1.05 [14:44:31< 9:45:41] +[titan] 2025-10-05 13:18:52,499 - root - INFO - step: 24070 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 13:18:52,499 - root - INFO - lr: 2.0699e-05 gnorm: 1.05 [14:44:42< 9:45:30] +[titan] 2025-10-05 13:19:03,398 - root - INFO - step: 24075 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 13:19:03,398 - root - INFO - lr: 2.0691e-05 gnorm: 1.08 [14:44:53< 9:45:19] +[titan] 2025-10-05 13:19:14,221 - root - INFO - step: 24080 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8382 +[titan] 2025-10-05 13:19:14,221 - root - INFO - lr: 2.0682e-05 gnorm: 1.08 [14:45:03< 9:45:08] +[titan] 2025-10-05 13:19:25,059 - root - INFO - step: 24085 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 13:19:25,059 - root - INFO - lr: 2.0674e-05 gnorm: 1.05 [14:45:14< 9:44:57] +[titan] 2025-10-05 13:19:35,885 - root - INFO - step: 24090 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 13:19:35,885 - root - INFO - lr: 2.0665e-05 gnorm: 1.08 [14:45:25< 9:44:46] +[titan] 2025-10-05 13:19:46,755 - root - INFO - step: 24095 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 13:19:46,755 - root - INFO - lr: 2.0657e-05 gnorm: 1.09 [14:45:36< 9:44:35] +[titan] 2025-10-05 13:19:55,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:19:57,605 - root - INFO - step: 24100 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 13:19:57,605 - root - INFO - lr: 2.0648e-05 gnorm: 1.05 [14:45:47< 9:44:23] +[titan] 2025-10-05 13:20:08,458 - root - INFO - step: 24105 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8711 +[titan] 2025-10-05 13:20:08,458 - root - INFO - lr: 2.0640e-05 gnorm: 1.11 [14:45:58< 9:44:12] +[titan] 2025-10-05 13:20:19,304 - root - INFO - step: 24110 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:20:19,305 - root - INFO - lr: 2.0631e-05 gnorm: 1.04 [14:46:08< 9:44:01] +[titan] 2025-10-05 13:20:30,155 - root - INFO - step: 24115 loss: 2.0297 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 13:20:30,155 - root - INFO - lr: 2.0623e-05 gnorm: 1.07 [14:46:19< 9:43:50] +[titan] 2025-10-05 13:20:41,004 - root - INFO - step: 24120 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:20:41,005 - root - INFO - lr: 2.0614e-05 gnorm: 1.07 [14:46:30< 9:43:39] +[titan] 2025-10-05 13:20:51,867 - root - INFO - step: 24125 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8402 +[titan] 2025-10-05 13:20:51,867 - root - INFO - lr: 2.0606e-05 gnorm: 1.12 [14:46:41< 9:43:28] +[titan] 2025-10-05 13:21:02,698 - root - INFO - step: 24130 loss: 2.0869 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 13:21:02,699 - root - INFO - lr: 2.0597e-05 gnorm: 1.06 [14:46:52< 9:43:17] +[titan] 2025-10-05 13:21:13,527 - root - INFO - step: 24135 loss: 2.1329 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2488 global_avg_mtp_loss: 1.8842 +[titan] 2025-10-05 13:21:13,527 - root - INFO - lr: 2.0589e-05 gnorm: 1.10 [14:47:03< 9:43:05] +[titan] 2025-10-05 13:21:24,355 - root - INFO - step: 24140 loss: 2.0475 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8102 +[titan] 2025-10-05 13:21:24,355 - root - INFO - lr: 2.0580e-05 gnorm: 1.07 [14:47:13< 9:42:54] +[titan] 2025-10-05 13:21:35,208 - root - INFO - step: 24145 loss: 2.1059 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2455 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:21:35,208 - root - INFO - lr: 2.0572e-05 gnorm: 1.10 [14:47:24< 9:42:43] +[titan] 2025-10-05 13:21:43,854 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:21:46,037 - root - INFO - step: 24150 loss: 2.0404 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.8051 +[titan] 2025-10-05 13:21:46,037 - root - INFO - lr: 2.0563e-05 gnorm: 1.05 [14:47:35< 9:42:32] +[titan] 2025-10-05 13:21:56,862 - root - INFO - step: 24155 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8062 +[titan] 2025-10-05 13:21:56,862 - root - INFO - lr: 2.0555e-05 gnorm: 1.05 [14:47:46< 9:42:21] +[titan] 2025-10-05 13:22:07,697 - root - INFO - step: 24160 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,245 tflops: 419.60 mfu: 42.43% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:22:07,697 - root - INFO - lr: 2.0546e-05 gnorm: 1.07 [14:47:57< 9:42:10] +[titan] 2025-10-05 13:22:18,551 - root - INFO - step: 24165 loss: 2.0865 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 13:22:18,551 - root - INFO - lr: 2.0538e-05 gnorm: 1.09 [14:48:08< 9:41:59] +[titan] 2025-10-05 13:22:29,396 - root - INFO - step: 24170 loss: 2.0779 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:22:29,396 - root - INFO - lr: 2.0529e-05 gnorm: 1.08 [14:48:19< 9:41:47] +[titan] 2025-10-05 13:22:40,227 - root - INFO - step: 24175 loss: 2.1142 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8696 +[titan] 2025-10-05 13:22:40,227 - root - INFO - lr: 2.0521e-05 gnorm: 1.09 [14:48:29< 9:41:36] +[titan] 2025-10-05 13:22:51,092 - root - INFO - step: 24180 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 13:22:51,093 - root - INFO - lr: 2.0512e-05 gnorm: 1.09 [14:48:40< 9:41:25] +[titan] 2025-10-05 13:23:01,953 - root - INFO - step: 24185 loss: 1.9953 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 13:23:01,953 - root - INFO - lr: 2.0504e-05 gnorm: 1.07 [14:48:51< 9:41:14] +[titan] 2025-10-05 13:23:12,844 - root - INFO - step: 24190 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.9079 +[titan] 2025-10-05 13:23:12,844 - root - INFO - lr: 2.0496e-05 gnorm: 1.15 [14:49:02< 9:41:03] +[titan] 2025-10-05 13:23:23,695 - root - INFO - step: 24195 loss: 2.1135 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2450 global_avg_mtp_loss: 1.8686 +[titan] 2025-10-05 13:23:23,695 - root - INFO - lr: 2.0487e-05 gnorm: 1.07 [14:49:13< 9:40:52] +[titan] 2025-10-05 13:23:32,375 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:23:34,564 - root - INFO - step: 24200 loss: 2.0236 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:23:34,564 - root - INFO - lr: 2.0479e-05 gnorm: 1.07 [14:49:24< 9:40:41] +[titan] 2025-10-05 13:23:45,424 - root - INFO - step: 24205 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 13:23:45,425 - root - INFO - lr: 2.0470e-05 gnorm: 1.07 [14:49:35< 9:40:29] +[titan] 2025-10-05 13:23:56,267 - root - INFO - step: 24210 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 13:23:56,267 - root - INFO - lr: 2.0462e-05 gnorm: 1.03 [14:49:45< 9:40:18] +[titan] 2025-10-05 13:24:07,115 - root - INFO - step: 24215 loss: 2.1109 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8655 +[titan] 2025-10-05 13:24:07,115 - root - INFO - lr: 2.0453e-05 gnorm: 1.12 [14:49:56< 9:40:07] +[titan] 2025-10-05 13:24:17,952 - root - INFO - step: 24220 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 13:24:17,952 - root - INFO - lr: 2.0445e-05 gnorm: 1.13 [14:50:07< 9:39:56] +[titan] 2025-10-05 13:24:28,825 - root - INFO - step: 24225 loss: 2.0939 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8509 +[titan] 2025-10-05 13:24:28,826 - root - INFO - lr: 2.0436e-05 gnorm: 1.06 [14:50:18< 9:39:45] +[titan] 2025-10-05 13:24:39,649 - root - INFO - step: 24230 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 13:24:39,649 - root - INFO - lr: 2.0428e-05 gnorm: 1.10 [14:50:29< 9:39:34] +[titan] 2025-10-05 13:24:50,487 - root - INFO - step: 24235 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8723 +[titan] 2025-10-05 13:24:50,487 - root - INFO - lr: 2.0419e-05 gnorm: 1.07 [14:50:40< 9:39:23] +[titan] 2025-10-05 13:25:01,334 - root - INFO - step: 24240 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:25:01,335 - root - INFO - lr: 2.0411e-05 gnorm: 1.02 [14:50:50< 9:39:11] +[titan] 2025-10-05 13:25:12,172 - root - INFO - step: 24245 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 13:25:12,172 - root - INFO - lr: 2.0402e-05 gnorm: 1.07 [14:51:01< 9:39:00] +[titan] 2025-10-05 13:25:20,844 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:25:23,024 - root - INFO - step: 24250 loss: 2.1386 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2482 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:25:23,025 - root - INFO - lr: 2.0394e-05 gnorm: 1.10 [14:51:12< 9:38:49] +[titan] 2025-10-05 13:25:33,889 - root - INFO - step: 24255 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8122 +[titan] 2025-10-05 13:25:33,889 - root - INFO - lr: 2.0385e-05 gnorm: 1.11 [14:51:23< 9:38:38] +[titan] 2025-10-05 13:25:44,730 - root - INFO - step: 24260 loss: 2.1169 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8716 +[titan] 2025-10-05 13:25:44,730 - root - INFO - lr: 2.0377e-05 gnorm: 1.07 [14:51:34< 9:38:27] +[titan] 2025-10-05 13:25:55,582 - root - INFO - step: 24265 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 13:25:55,582 - root - INFO - lr: 2.0368e-05 gnorm: 1.07 [14:51:45< 9:38:16] +[titan] 2025-10-05 13:26:06,446 - root - INFO - step: 24270 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 13:26:06,447 - root - INFO - lr: 2.0360e-05 gnorm: 1.08 [14:51:56< 9:38:05] +[titan] 2025-10-05 13:26:17,296 - root - INFO - step: 24275 loss: 2.0367 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8000 +[titan] 2025-10-05 13:26:17,296 - root - INFO - lr: 2.0352e-05 gnorm: 1.08 [14:52:06< 9:37:53] +[titan] 2025-10-05 13:26:28,151 - root - INFO - step: 24280 loss: 2.1901 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2561 global_avg_mtp_loss: 1.9340 +[titan] 2025-10-05 13:26:28,151 - root - INFO - lr: 2.0343e-05 gnorm: 1.09 [14:52:17< 9:37:42] +[titan] 2025-10-05 13:26:39,050 - root - INFO - step: 24285 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 13:26:39,051 - root - INFO - lr: 2.0335e-05 gnorm: 1.10 [14:52:28< 9:37:31] +[titan] 2025-10-05 13:26:49,902 - root - INFO - step: 24290 loss: 2.0746 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:26:49,902 - root - INFO - lr: 2.0326e-05 gnorm: 1.07 [14:52:39< 9:37:20] +[titan] 2025-10-05 13:27:00,733 - root - INFO - step: 24295 loss: 2.1061 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8623 +[titan] 2025-10-05 13:27:00,734 - root - INFO - lr: 2.0318e-05 gnorm: 1.11 [14:52:50< 9:37:09] +[titan] 2025-10-05 13:27:09,414 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:27:11,587 - root - INFO - step: 24300 loss: 2.0702 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8300 +[titan] 2025-10-05 13:27:11,587 - root - INFO - lr: 2.0309e-05 gnorm: 1.10 [14:53:01< 9:36:58] +[titan] 2025-10-05 13:27:22,433 - root - INFO - step: 24305 loss: 2.0897 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 13:27:22,433 - root - INFO - lr: 2.0301e-05 gnorm: 1.05 [14:53:12< 9:36:47] +[titan] 2025-10-05 13:27:33,270 - root - INFO - step: 24310 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 13:27:33,270 - root - INFO - lr: 2.0292e-05 gnorm: 1.06 [14:53:22< 9:36:36] +[titan] 2025-10-05 13:27:44,105 - root - INFO - step: 24315 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 13:27:44,105 - root - INFO - lr: 2.0284e-05 gnorm: 1.07 [14:53:33< 9:36:24] +[titan] 2025-10-05 13:27:54,981 - root - INFO - step: 24320 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 13:27:54,981 - root - INFO - lr: 2.0275e-05 gnorm: 1.13 [14:53:44< 9:36:13] +[titan] 2025-10-05 13:28:05,837 - root - INFO - step: 24325 loss: 2.1113 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 13:28:05,838 - root - INFO - lr: 2.0267e-05 gnorm: 1.14 [14:53:55< 9:36:02] +[titan] 2025-10-05 13:28:16,705 - root - INFO - step: 24330 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 13:28:16,705 - root - INFO - lr: 2.0258e-05 gnorm: 1.05 [14:54:06< 9:35:51] +[titan] 2025-10-05 13:28:27,566 - root - INFO - step: 24335 loss: 2.1140 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2456 global_avg_mtp_loss: 1.8683 +[titan] 2025-10-05 13:28:27,566 - root - INFO - lr: 2.0250e-05 gnorm: 1.15 [14:54:17< 9:35:40] +[titan] 2025-10-05 13:28:38,418 - root - INFO - step: 24340 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 13:28:38,418 - root - INFO - lr: 2.0242e-05 gnorm: 1.08 [14:54:28< 9:35:29] +[titan] 2025-10-05 13:28:49,296 - root - INFO - step: 24345 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8368 +[titan] 2025-10-05 13:28:49,296 - root - INFO - lr: 2.0233e-05 gnorm: 1.14 [14:54:38< 9:35:18] +[titan] 2025-10-05 13:28:58,013 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:29:00,192 - root - INFO - step: 24350 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 13:29:00,192 - root - INFO - lr: 2.0225e-05 gnorm: 1.18 [14:54:49< 9:35:07] +[titan] 2025-10-05 13:29:11,072 - root - INFO - step: 24355 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 13:29:11,072 - root - INFO - lr: 2.0216e-05 gnorm: 1.09 [14:55:00< 9:34:55] +[titan] 2025-10-05 13:29:21,925 - root - INFO - step: 24360 loss: 2.1089 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8647 +[titan] 2025-10-05 13:29:21,925 - root - INFO - lr: 2.0208e-05 gnorm: 1.07 [14:55:11< 9:34:44] +[titan] 2025-10-05 13:29:32,780 - root - INFO - step: 24365 loss: 2.0836 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8424 +[titan] 2025-10-05 13:29:32,781 - root - INFO - lr: 2.0199e-05 gnorm: 1.10 [14:55:22< 9:34:33] +[titan] 2025-10-05 13:29:43,663 - root - INFO - step: 24370 loss: 2.0644 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 13:29:43,663 - root - INFO - lr: 2.0191e-05 gnorm: 1.10 [14:55:33< 9:34:22] +[titan] 2025-10-05 13:29:54,539 - root - INFO - step: 24375 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:29:54,540 - root - INFO - lr: 2.0182e-05 gnorm: 1.12 [14:55:44< 9:34:11] +[titan] 2025-10-05 13:30:05,417 - root - INFO - step: 24380 loss: 2.0864 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8436 +[titan] 2025-10-05 13:30:05,417 - root - INFO - lr: 2.0174e-05 gnorm: 1.12 [14:55:55< 9:34:00] +[titan] 2025-10-05 13:30:16,350 - root - INFO - step: 24385 loss: 2.1282 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.82 mfu: 42.04% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8804 +[titan] 2025-10-05 13:30:16,350 - root - INFO - lr: 2.0166e-05 gnorm: 1.05 [14:56:05< 9:33:49] +[titan] 2025-10-05 13:30:27,217 - root - INFO - step: 24390 loss: 2.0751 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8345 +[titan] 2025-10-05 13:30:27,217 - root - INFO - lr: 2.0157e-05 gnorm: 1.12 [14:56:16< 9:33:38] +[titan] 2025-10-05 13:30:38,065 - root - INFO - step: 24395 loss: 2.1057 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8604 +[titan] 2025-10-05 13:30:38,065 - root - INFO - lr: 2.0149e-05 gnorm: 1.08 [14:56:27< 9:33:26] +[titan] 2025-10-05 13:30:46,747 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:30:48,926 - root - INFO - step: 24400 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 13:30:48,926 - root - INFO - lr: 2.0140e-05 gnorm: 1.09 [14:56:38< 9:33:15] +[titan] 2025-10-05 13:30:59,781 - root - INFO - step: 24405 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8166 +[titan] 2025-10-05 13:30:59,781 - root - INFO - lr: 2.0132e-05 gnorm: 1.07 [14:56:49< 9:33:04] +[titan] 2025-10-05 13:31:10,656 - root - INFO - step: 24410 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 13:31:10,656 - root - INFO - lr: 2.0123e-05 gnorm: 1.11 [14:57:00< 9:32:53] +[titan] 2025-10-05 13:31:21,555 - root - INFO - step: 24415 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8268 +[titan] 2025-10-05 13:31:21,555 - root - INFO - lr: 2.0115e-05 gnorm: 1.09 [14:57:11< 9:32:42] +[titan] 2025-10-05 13:31:32,426 - root - INFO - step: 24420 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2518 global_avg_mtp_loss: 1.8839 +[titan] 2025-10-05 13:31:32,426 - root - INFO - lr: 2.0107e-05 gnorm: 1.07 [14:57:22< 9:32:31] +[titan] 2025-10-05 13:31:43,323 - root - INFO - step: 24425 loss: 2.1757 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2574 global_avg_mtp_loss: 1.9182 +[titan] 2025-10-05 13:31:43,323 - root - INFO - lr: 2.0098e-05 gnorm: 1.31 [14:57:32< 9:32:20] +[titan] 2025-10-05 13:31:54,203 - root - INFO - step: 24430 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 13:31:54,204 - root - INFO - lr: 2.0090e-05 gnorm: 1.05 [14:57:43< 9:32:09] +[titan] 2025-10-05 13:32:05,075 - root - INFO - step: 24435 loss: 2.0555 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 13:32:05,075 - root - INFO - lr: 2.0081e-05 gnorm: 1.07 [14:57:54< 9:31:57] +[titan] 2025-10-05 13:32:15,980 - root - INFO - step: 24440 loss: 2.1665 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2520 global_avg_mtp_loss: 1.9145 +[titan] 2025-10-05 13:32:15,980 - root - INFO - lr: 2.0073e-05 gnorm: 1.09 [14:58:05< 9:31:46] +[titan] 2025-10-05 13:32:26,906 - root - INFO - step: 24445 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 29,992 tflops: 416.09 mfu: 42.07% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 13:32:26,906 - root - INFO - lr: 2.0064e-05 gnorm: 1.08 [14:58:16< 9:31:35] +[titan] 2025-10-05 13:32:35,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:32:37,766 - root - INFO - step: 24450 loss: 2.0220 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 13:32:37,766 - root - INFO - lr: 2.0056e-05 gnorm: 1.06 [14:58:27< 9:31:24] +[titan] 2025-10-05 13:32:48,638 - root - INFO - step: 24455 loss: 2.1315 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8833 +[titan] 2025-10-05 13:32:48,638 - root - INFO - lr: 2.0048e-05 gnorm: 1.07 [14:58:38< 9:31:13] +[titan] 2025-10-05 13:32:59,507 - root - INFO - step: 24460 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 13:32:59,507 - root - INFO - lr: 2.0039e-05 gnorm: 1.07 [14:58:49< 9:31:02] +[titan] 2025-10-05 13:33:10,393 - root - INFO - step: 24465 loss: 2.0500 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:33:10,393 - root - INFO - lr: 2.0031e-05 gnorm: 1.05 [14:59:00< 9:30:51] +[titan] 2025-10-05 13:33:21,345 - root - INFO - step: 24470 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8103 +[titan] 2025-10-05 13:33:21,345 - root - INFO - lr: 2.0022e-05 gnorm: 1.06 [14:59:10< 9:30:40] +[titan] 2025-10-05 13:33:32,228 - root - INFO - step: 24475 loss: 2.0788 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 13:33:32,228 - root - INFO - lr: 2.0014e-05 gnorm: 1.09 [14:59:21< 9:30:29] +[titan] 2025-10-05 13:33:43,179 - root - INFO - step: 24480 loss: 2.1160 memory: 118.84GiB(85.28%) tps: 29,923 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2451 global_avg_mtp_loss: 1.8709 +[titan] 2025-10-05 13:33:43,180 - root - INFO - lr: 2.0006e-05 gnorm: 1.10 [14:59:32< 9:30:18] +[titan] 2025-10-05 13:33:54,062 - root - INFO - step: 24485 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8354 +[titan] 2025-10-05 13:33:54,062 - root - INFO - lr: 1.9997e-05 gnorm: 1.07 [14:59:43< 9:30:06] +[titan] 2025-10-05 13:34:04,940 - root - INFO - step: 24490 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 13:34:04,940 - root - INFO - lr: 1.9989e-05 gnorm: 1.06 [14:59:54< 9:29:55] +[titan] 2025-10-05 13:34:15,844 - root - INFO - step: 24495 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8069 +[titan] 2025-10-05 13:34:15,844 - root - INFO - lr: 1.9980e-05 gnorm: 1.09 [15:00:05< 9:29:44] +[titan] 2025-10-05 13:34:24,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:34:26,706 - root - INFO - step: 24500 loss: 2.0796 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8381 +[titan] 2025-10-05 13:34:26,706 - root - INFO - lr: 1.9972e-05 gnorm: 1.11 [15:00:16< 9:29:33] +[titan] 2025-10-05 13:34:37,585 - root - INFO - step: 24505 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 13:34:37,585 - root - INFO - lr: 1.9963e-05 gnorm: 1.08 [15:00:27< 9:29:22] +[titan] 2025-10-05 13:34:48,499 - root - INFO - step: 24510 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8365 +[titan] 2025-10-05 13:34:48,500 - root - INFO - lr: 1.9955e-05 gnorm: 1.12 [15:00:38< 9:29:11] +[titan] 2025-10-05 13:34:59,379 - root - INFO - step: 24515 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 13:34:59,379 - root - INFO - lr: 1.9947e-05 gnorm: 1.09 [15:00:48< 9:29:00] +[titan] 2025-10-05 13:35:10,244 - root - INFO - step: 24520 loss: 2.0374 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8014 +[titan] 2025-10-05 13:35:10,245 - root - INFO - lr: 1.9938e-05 gnorm: 1.03 [15:00:59< 9:28:49] +[titan] 2025-10-05 13:35:21,112 - root - INFO - step: 24525 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 13:35:21,113 - root - INFO - lr: 1.9930e-05 gnorm: 1.06 [15:01:10< 9:28:38] +[titan] 2025-10-05 13:35:31,956 - root - INFO - step: 24530 loss: 2.1133 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8681 +[titan] 2025-10-05 13:35:31,957 - root - INFO - lr: 1.9921e-05 gnorm: 1.08 [15:01:21< 9:28:26] +[titan] 2025-10-05 13:35:42,842 - root - INFO - step: 24535 loss: 2.0794 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8375 +[titan] 2025-10-05 13:35:42,842 - root - INFO - lr: 1.9913e-05 gnorm: 1.10 [15:01:32< 9:28:15] +[titan] 2025-10-05 13:35:53,706 - root - INFO - step: 24540 loss: 2.1082 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8634 +[titan] 2025-10-05 13:35:53,706 - root - INFO - lr: 1.9905e-05 gnorm: 1.11 [15:01:43< 9:28:04] +[titan] 2025-10-05 13:36:04,625 - root - INFO - step: 24545 loss: 2.1385 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8904 +[titan] 2025-10-05 13:36:04,625 - root - INFO - lr: 1.9896e-05 gnorm: 1.07 [15:01:54< 9:27:53] +[titan] 2025-10-05 13:36:13,292 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:36:15,477 - root - INFO - step: 24550 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 13:36:15,477 - root - INFO - lr: 1.9888e-05 gnorm: 1.08 [15:02:05< 9:27:42] +[titan] 2025-10-05 13:36:26,344 - root - INFO - step: 24555 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 13:36:26,344 - root - INFO - lr: 1.9879e-05 gnorm: 1.07 [15:02:15< 9:27:31] +[titan] 2025-10-05 13:36:37,204 - root - INFO - step: 24560 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 13:36:37,204 - root - INFO - lr: 1.9871e-05 gnorm: 1.07 [15:02:26< 9:27:20] +[titan] 2025-10-05 13:36:48,082 - root - INFO - step: 24565 loss: 2.1115 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8669 +[titan] 2025-10-05 13:36:48,082 - root - INFO - lr: 1.9863e-05 gnorm: 1.05 [15:02:37< 9:27:09] +[titan] 2025-10-05 13:36:58,948 - root - INFO - step: 24570 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 13:36:58,948 - root - INFO - lr: 1.9854e-05 gnorm: 1.07 [15:02:48< 9:26:57] +[titan] 2025-10-05 13:37:09,939 - root - INFO - step: 24575 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 29,815 tflops: 413.64 mfu: 41.82% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 13:37:09,939 - root - INFO - lr: 1.9846e-05 gnorm: 1.08 [15:02:59< 9:26:46] +[titan] 2025-10-05 13:37:12,301 - root - INFO - Dumping profiler traces at step 24576 +[titan] 2025-10-05 13:37:12,337 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:37:21,087 - root - INFO - step: 24580 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 29,394 tflops: 407.79 mfu: 41.23% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:37:21,087 - root - INFO - lr: 1.9837e-05 gnorm: 1.10 [15:03:10< 9:26:35] +[titan] 2025-10-05 13:37:31,945 - root - INFO - step: 24585 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 13:37:31,945 - root - INFO - lr: 1.9829e-05 gnorm: 1.04 [15:03:21< 9:26:24] +[titan] 2025-10-05 13:37:42,812 - root - INFO - step: 24590 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8024 +[titan] 2025-10-05 13:37:42,812 - root - INFO - lr: 1.9821e-05 gnorm: 1.05 [15:03:32< 9:26:13] +[titan] 2025-10-05 13:37:53,676 - root - INFO - step: 24595 loss: 2.0523 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8108 +[titan] 2025-10-05 13:37:53,677 - root - INFO - lr: 1.9812e-05 gnorm: 1.07 [15:03:43< 9:26:02] +[titan] 2025-10-05 13:38:02,369 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:38:04,554 - root - INFO - step: 24600 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8521 +[titan] 2025-10-05 13:38:04,554 - root - INFO - lr: 1.9804e-05 gnorm: 1.09 [15:03:54< 9:25:51] +[titan] 2025-10-05 13:38:15,471 - root - INFO - step: 24605 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 13:38:15,471 - root - INFO - lr: 1.9796e-05 gnorm: 1.07 [15:04:05< 9:25:40] +[titan] 2025-10-05 13:38:26,377 - root - INFO - step: 24610 loss: 2.1349 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8870 +[titan] 2025-10-05 13:38:26,377 - root - INFO - lr: 1.9787e-05 gnorm: 1.12 [15:04:15< 9:25:29] +[titan] 2025-10-05 13:38:37,243 - root - INFO - step: 24615 loss: 2.0786 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8367 +[titan] 2025-10-05 13:38:37,243 - root - INFO - lr: 1.9779e-05 gnorm: 1.09 [15:04:26< 9:25:18] +[titan] 2025-10-05 13:38:48,119 - root - INFO - step: 24620 loss: 2.1577 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.9074 +[titan] 2025-10-05 13:38:48,119 - root - INFO - lr: 1.9770e-05 gnorm: 1.07 [15:04:37< 9:25:07] +[titan] 2025-10-05 13:38:58,977 - root - INFO - step: 24625 loss: 2.0721 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8321 +[titan] 2025-10-05 13:38:58,977 - root - INFO - lr: 1.9762e-05 gnorm: 1.11 [15:04:48< 9:24:55] +[titan] 2025-10-05 13:39:09,830 - root - INFO - step: 24630 loss: 2.1406 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8930 +[titan] 2025-10-05 13:39:09,830 - root - INFO - lr: 1.9754e-05 gnorm: 1.13 [15:04:59< 9:24:44] +[titan] 2025-10-05 13:39:20,732 - root - INFO - step: 24635 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 13:39:20,733 - root - INFO - lr: 1.9745e-05 gnorm: 1.10 [15:05:10< 9:24:33] +[titan] 2025-10-05 13:39:31,629 - root - INFO - step: 24640 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 13:39:31,629 - root - INFO - lr: 1.9737e-05 gnorm: 1.08 [15:05:21< 9:24:22] +[titan] 2025-10-05 13:39:42,484 - root - INFO - step: 24645 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 13:39:42,484 - root - INFO - lr: 1.9728e-05 gnorm: 1.05 [15:05:32< 9:24:11] +[titan] 2025-10-05 13:39:51,161 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:39:53,346 - root - INFO - step: 24650 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 13:39:53,346 - root - INFO - lr: 1.9720e-05 gnorm: 1.06 [15:05:42< 9:24:00] +[titan] 2025-10-05 13:40:04,203 - root - INFO - step: 24655 loss: 2.1036 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2441 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 13:40:04,203 - root - INFO - lr: 1.9712e-05 gnorm: 1.12 [15:05:53< 9:23:49] +[titan] 2025-10-05 13:40:15,073 - root - INFO - step: 24660 loss: 2.0882 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 13:40:15,073 - root - INFO - lr: 1.9703e-05 gnorm: 1.10 [15:06:04< 9:23:38] +[titan] 2025-10-05 13:40:25,992 - root - INFO - step: 24665 loss: 2.0640 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:40:25,992 - root - INFO - lr: 1.9695e-05 gnorm: 1.06 [15:06:15< 9:23:26] +[titan] 2025-10-05 13:40:36,894 - root - INFO - step: 24670 loss: 2.0856 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8434 +[titan] 2025-10-05 13:40:36,894 - root - INFO - lr: 1.9687e-05 gnorm: 1.12 [15:06:26< 9:23:15] +[titan] 2025-10-05 13:40:47,766 - root - INFO - step: 24675 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 13:40:47,766 - root - INFO - lr: 1.9678e-05 gnorm: 1.09 [15:06:37< 9:23:04] +[titan] 2025-10-05 13:40:58,618 - root - INFO - step: 24680 loss: 2.0515 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8134 +[titan] 2025-10-05 13:40:58,618 - root - INFO - lr: 1.9670e-05 gnorm: 1.07 [15:06:48< 9:22:53] +[titan] 2025-10-05 13:41:09,490 - root - INFO - step: 24685 loss: 2.0772 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8370 +[titan] 2025-10-05 13:41:09,490 - root - INFO - lr: 1.9662e-05 gnorm: 1.13 [15:06:59< 9:22:42] +[titan] 2025-10-05 13:41:20,418 - root - INFO - step: 24690 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 13:41:20,418 - root - INFO - lr: 1.9653e-05 gnorm: 1.06 [15:07:10< 9:22:31] +[titan] 2025-10-05 13:41:31,285 - root - INFO - step: 24695 loss: 2.0651 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 13:41:31,285 - root - INFO - lr: 1.9645e-05 gnorm: 1.08 [15:07:20< 9:22:20] +[titan] 2025-10-05 13:41:39,977 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:41:42,156 - root - INFO - step: 24700 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 13:41:42,156 - root - INFO - lr: 1.9636e-05 gnorm: 1.11 [15:07:31< 9:22:09] +[titan] 2025-10-05 13:41:53,063 - root - INFO - step: 24705 loss: 2.1408 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2479 global_avg_mtp_loss: 1.8929 +[titan] 2025-10-05 13:41:53,063 - root - INFO - lr: 1.9628e-05 gnorm: 1.08 [15:07:42< 9:21:58] +[titan] 2025-10-05 13:42:03,922 - root - INFO - step: 24710 loss: 2.0804 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 13:42:03,922 - root - INFO - lr: 1.9620e-05 gnorm: 1.06 [15:07:53< 9:21:46] +[titan] 2025-10-05 13:42:14,790 - root - INFO - step: 24715 loss: 2.0666 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8274 +[titan] 2025-10-05 13:42:14,790 - root - INFO - lr: 1.9611e-05 gnorm: 1.09 [15:08:04< 9:21:35] +[titan] 2025-10-05 13:42:25,702 - root - INFO - step: 24720 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 13:42:25,702 - root - INFO - lr: 1.9603e-05 gnorm: 1.11 [15:08:15< 9:21:24] +[titan] 2025-10-05 13:42:36,573 - root - INFO - step: 24725 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 13:42:36,573 - root - INFO - lr: 1.9595e-05 gnorm: 1.08 [15:08:26< 9:21:13] +[titan] 2025-10-05 13:42:47,423 - root - INFO - step: 24730 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 13:42:47,424 - root - INFO - lr: 1.9586e-05 gnorm: 1.12 [15:08:37< 9:21:02] +[titan] 2025-10-05 13:42:58,321 - root - INFO - step: 24735 loss: 2.1290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8824 +[titan] 2025-10-05 13:42:58,321 - root - INFO - lr: 1.9578e-05 gnorm: 1.08 [15:08:47< 9:20:51] +[titan] 2025-10-05 13:43:09,170 - root - INFO - step: 24740 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:43:09,171 - root - INFO - lr: 1.9570e-05 gnorm: 1.12 [15:08:58< 9:20:40] +[titan] 2025-10-05 13:43:20,002 - root - INFO - step: 24745 loss: 2.0612 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8216 +[titan] 2025-10-05 13:43:20,002 - root - INFO - lr: 1.9561e-05 gnorm: 1.11 [15:09:09< 9:20:29] +[titan] 2025-10-05 13:43:28,699 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:43:30,874 - root - INFO - step: 24750 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8301 +[titan] 2025-10-05 13:43:30,874 - root - INFO - lr: 1.9553e-05 gnorm: 1.08 [15:09:20< 9:20:18] +[titan] 2025-10-05 13:43:41,719 - root - INFO - step: 24755 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 13:43:41,719 - root - INFO - lr: 1.9545e-05 gnorm: 1.11 [15:09:31< 9:20:06] +[titan] 2025-10-05 13:43:52,574 - root - INFO - step: 24760 loss: 2.0568 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 13:43:52,574 - root - INFO - lr: 1.9536e-05 gnorm: 1.07 [15:09:42< 9:19:55] +[titan] 2025-10-05 13:44:03,465 - root - INFO - step: 24765 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 13:44:03,465 - root - INFO - lr: 1.9528e-05 gnorm: 1.07 [15:09:53< 9:19:44] +[titan] 2025-10-05 13:44:14,316 - root - INFO - step: 24770 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 13:44:14,316 - root - INFO - lr: 1.9519e-05 gnorm: 1.05 [15:10:03< 9:19:33] +[titan] 2025-10-05 13:44:25,153 - root - INFO - step: 24775 loss: 2.0875 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8456 +[titan] 2025-10-05 13:44:25,154 - root - INFO - lr: 1.9511e-05 gnorm: 1.07 [15:10:14< 9:19:22] +[titan] 2025-10-05 13:44:36,002 - root - INFO - step: 24780 loss: 2.1003 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8573 +[titan] 2025-10-05 13:44:36,003 - root - INFO - lr: 1.9503e-05 gnorm: 1.08 [15:10:25< 9:19:11] +[titan] 2025-10-05 13:44:46,858 - root - INFO - step: 24785 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 13:44:46,858 - root - INFO - lr: 1.9494e-05 gnorm: 1.07 [15:10:36< 9:19:00] +[titan] 2025-10-05 13:44:57,702 - root - INFO - step: 24790 loss: 2.0838 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 13:44:57,702 - root - INFO - lr: 1.9486e-05 gnorm: 1.08 [15:10:47< 9:18:49] +[titan] 2025-10-05 13:45:08,535 - root - INFO - step: 24795 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 13:45:08,535 - root - INFO - lr: 1.9478e-05 gnorm: 1.06 [15:10:58< 9:18:37] +[titan] 2025-10-05 13:45:17,247 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:45:19,421 - root - INFO - step: 24800 loss: 2.1176 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2457 global_avg_mtp_loss: 1.8718 +[titan] 2025-10-05 13:45:19,421 - root - INFO - lr: 1.9469e-05 gnorm: 1.08 [15:11:08< 9:18:26] +[titan] 2025-10-05 13:45:30,265 - root - INFO - step: 24805 loss: 2.0238 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 13:45:30,265 - root - INFO - lr: 1.9461e-05 gnorm: 1.07 [15:11:19< 9:18:15] +[titan] 2025-10-05 13:45:41,104 - root - INFO - step: 24810 loss: 2.0540 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8154 +[titan] 2025-10-05 13:45:41,104 - root - INFO - lr: 1.9453e-05 gnorm: 1.07 [15:11:30< 9:18:04] +[titan] 2025-10-05 13:45:51,953 - root - INFO - step: 24815 loss: 2.1243 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2465 global_avg_mtp_loss: 1.8778 +[titan] 2025-10-05 13:45:51,953 - root - INFO - lr: 1.9444e-05 gnorm: 1.11 [15:11:41< 9:17:53] +[titan] 2025-10-05 13:46:02,816 - root - INFO - step: 24820 loss: 2.1004 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2435 global_avg_mtp_loss: 1.8568 +[titan] 2025-10-05 13:46:02,816 - root - INFO - lr: 1.9436e-05 gnorm: 1.07 [15:11:52< 9:17:42] +[titan] 2025-10-05 13:46:13,676 - root - INFO - step: 24825 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 13:46:13,676 - root - INFO - lr: 1.9428e-05 gnorm: 1.10 [15:12:03< 9:17:31] +[titan] 2025-10-05 13:46:24,572 - root - INFO - step: 24830 loss: 2.1395 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2473 global_avg_mtp_loss: 1.8922 +[titan] 2025-10-05 13:46:24,572 - root - INFO - lr: 1.9419e-05 gnorm: 1.10 [15:12:14< 9:17:20] +[titan] 2025-10-05 13:46:35,432 - root - INFO - step: 24835 loss: 2.1026 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:46:35,432 - root - INFO - lr: 1.9411e-05 gnorm: 1.07 [15:12:25< 9:17:08] +[titan] 2025-10-05 13:46:46,286 - root - INFO - step: 24840 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 13:46:46,287 - root - INFO - lr: 1.9403e-05 gnorm: 1.10 [15:12:35< 9:16:57] +[titan] 2025-10-05 13:46:57,123 - root - INFO - step: 24845 loss: 2.0606 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8212 +[titan] 2025-10-05 13:46:57,123 - root - INFO - lr: 1.9394e-05 gnorm: 1.07 [15:12:46< 9:16:46] +[titan] 2025-10-05 13:47:05,788 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:47:07,960 - root - INFO - step: 24850 loss: 2.1102 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2464 global_avg_mtp_loss: 1.8638 +[titan] 2025-10-05 13:47:07,960 - root - INFO - lr: 1.9386e-05 gnorm: 1.13 [15:12:57< 9:16:35] +[titan] 2025-10-05 13:47:18,794 - root - INFO - step: 24855 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8346 +[titan] 2025-10-05 13:47:18,794 - root - INFO - lr: 1.9378e-05 gnorm: 1.07 [15:13:08< 9:16:24] +[titan] 2025-10-05 13:47:29,672 - root - INFO - step: 24860 loss: 2.1559 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2496 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 13:47:29,672 - root - INFO - lr: 1.9369e-05 gnorm: 1.08 [15:13:19< 9:16:13] +[titan] 2025-10-05 13:47:40,555 - root - INFO - step: 24865 loss: 2.0479 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8095 +[titan] 2025-10-05 13:47:40,556 - root - INFO - lr: 1.9361e-05 gnorm: 1.14 [15:13:30< 9:16:02] +[titan] 2025-10-05 13:47:51,413 - root - INFO - step: 24870 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 13:47:51,413 - root - INFO - lr: 1.9353e-05 gnorm: 1.07 [15:13:40< 9:15:51] +[titan] 2025-10-05 13:48:02,253 - root - INFO - step: 24875 loss: 2.0532 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 13:48:02,253 - root - INFO - lr: 1.9345e-05 gnorm: 1.10 [15:13:51< 9:15:39] +[titan] 2025-10-05 13:48:13,099 - root - INFO - step: 24880 loss: 2.0338 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 13:48:13,099 - root - INFO - lr: 1.9336e-05 gnorm: 1.08 [15:14:02< 9:15:28] +[titan] 2025-10-05 13:48:23,933 - root - INFO - step: 24885 loss: 2.0834 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 13:48:23,933 - root - INFO - lr: 1.9328e-05 gnorm: 1.08 [15:14:13< 9:15:17] +[titan] 2025-10-05 13:48:34,822 - root - INFO - step: 24890 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7910 +[titan] 2025-10-05 13:48:34,822 - root - INFO - lr: 1.9320e-05 gnorm: 1.05 [15:14:24< 9:15:06] +[titan] 2025-10-05 13:48:45,673 - root - INFO - step: 24895 loss: 2.0653 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8260 +[titan] 2025-10-05 13:48:45,673 - root - INFO - lr: 1.9311e-05 gnorm: 1.13 [15:14:35< 9:14:55] +[titan] 2025-10-05 13:48:54,332 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:48:56,513 - root - INFO - step: 24900 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8462 +[titan] 2025-10-05 13:48:56,513 - root - INFO - lr: 1.9303e-05 gnorm: 1.08 [15:14:46< 9:14:44] +[titan] 2025-10-05 13:49:07,354 - root - INFO - step: 24905 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2462 global_avg_mtp_loss: 1.8643 +[titan] 2025-10-05 13:49:07,354 - root - INFO - lr: 1.9295e-05 gnorm: 1.09 [15:14:56< 9:14:33] +[titan] 2025-10-05 13:49:18,206 - root - INFO - step: 24910 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 13:49:18,206 - root - INFO - lr: 1.9286e-05 gnorm: 1.06 [15:15:07< 9:14:22] +[titan] 2025-10-05 13:49:29,079 - root - INFO - step: 24915 loss: 2.0453 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 13:49:29,080 - root - INFO - lr: 1.9278e-05 gnorm: 1.07 [15:15:18< 9:14:10] +[titan] 2025-10-05 13:49:39,928 - root - INFO - step: 24920 loss: 2.1357 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2470 global_avg_mtp_loss: 1.8887 +[titan] 2025-10-05 13:49:39,928 - root - INFO - lr: 1.9270e-05 gnorm: 1.10 [15:15:29< 9:13:59] +[titan] 2025-10-05 13:49:50,803 - root - INFO - step: 24925 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7714 +[titan] 2025-10-05 13:49:50,804 - root - INFO - lr: 1.9261e-05 gnorm: 1.05 [15:15:40< 9:13:48] +[titan] 2025-10-05 13:50:01,632 - root - INFO - step: 24930 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.85 mfu: 42.45% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7977 +[titan] 2025-10-05 13:50:01,632 - root - INFO - lr: 1.9253e-05 gnorm: 1.12 [15:15:51< 9:13:37] +[titan] 2025-10-05 13:50:12,484 - root - INFO - step: 24935 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 13:50:12,484 - root - INFO - lr: 1.9245e-05 gnorm: 1.08 [15:16:02< 9:13:26] +[titan] 2025-10-05 13:50:23,352 - root - INFO - step: 24940 loss: 2.0643 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 13:50:23,352 - root - INFO - lr: 1.9236e-05 gnorm: 1.08 [15:16:12< 9:13:15] +[titan] 2025-10-05 13:50:34,241 - root - INFO - step: 24945 loss: 2.0637 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8231 +[titan] 2025-10-05 13:50:34,241 - root - INFO - lr: 1.9228e-05 gnorm: 1.09 [15:16:23< 9:13:04] +[titan] 2025-10-05 13:50:42,919 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:50:45,094 - root - INFO - step: 24950 loss: 2.0635 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:50:45,094 - root - INFO - lr: 1.9220e-05 gnorm: 1.09 [15:16:34< 9:12:53] +[titan] 2025-10-05 13:50:55,957 - root - INFO - step: 24955 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8325 +[titan] 2025-10-05 13:50:55,957 - root - INFO - lr: 1.9212e-05 gnorm: 1.10 [15:16:45< 9:12:42] +[titan] 2025-10-05 13:51:06,846 - root - INFO - step: 24960 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 13:51:06,846 - root - INFO - lr: 1.9203e-05 gnorm: 1.18 [15:16:56< 9:12:30] +[titan] 2025-10-05 13:51:17,738 - root - INFO - step: 24965 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 13:51:17,738 - root - INFO - lr: 1.9195e-05 gnorm: 1.09 [15:17:07< 9:12:19] +[titan] 2025-10-05 13:51:28,604 - root - INFO - step: 24970 loss: 2.1023 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8589 +[titan] 2025-10-05 13:51:28,604 - root - INFO - lr: 1.9187e-05 gnorm: 1.11 [15:17:18< 9:12:08] +[titan] 2025-10-05 13:51:39,453 - root - INFO - step: 24975 loss: 2.0306 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 13:51:39,453 - root - INFO - lr: 1.9178e-05 gnorm: 1.12 [15:17:29< 9:11:57] +[titan] 2025-10-05 13:51:50,305 - root - INFO - step: 24980 loss: 2.0966 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2431 global_avg_mtp_loss: 1.8535 +[titan] 2025-10-05 13:51:50,305 - root - INFO - lr: 1.9170e-05 gnorm: 1.08 [15:17:39< 9:11:46] +[titan] 2025-10-05 13:52:01,147 - root - INFO - step: 24985 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7712 +[titan] 2025-10-05 13:52:01,147 - root - INFO - lr: 1.9162e-05 gnorm: 1.09 [15:17:50< 9:11:35] +[titan] 2025-10-05 13:52:12,002 - root - INFO - step: 24990 loss: 2.0567 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 13:52:12,002 - root - INFO - lr: 1.9154e-05 gnorm: 1.09 [15:18:01< 9:11:24] +[titan] 2025-10-05 13:52:22,852 - root - INFO - step: 24995 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 13:52:22,852 - root - INFO - lr: 1.9145e-05 gnorm: 1.08 [15:18:12< 9:11:13] +[titan] 2025-10-05 13:52:31,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:52:33,740 - root - INFO - step: 25000 loss: 2.0319 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7959 +[titan] 2025-10-05 13:52:33,740 - root - INFO - lr: 1.9137e-05 gnorm: 1.07 [15:18:23< 9:11:01] +[titan] 2025-10-05 13:52:33,740 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 13:52:51,438 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 13:52:51,438 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.70 seconds. +[titan] 2025-10-05 13:54:51,998 - root - INFO - step: 25005 loss: 2.0275 memory: 118.84GiB(85.28%) tps: 2,370 tflops: 32.88 mfu: 3.32% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7926 +[titan] 2025-10-05 13:54:51,999 - root - INFO - lr: 1.9129e-05 gnorm: 1.11 [15:20:41< 9:12:07] +[titan] 2025-10-05 13:55:02,804 - root - INFO - step: 25010 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8047 +[titan] 2025-10-05 13:55:02,804 - root - INFO - lr: 1.9120e-05 gnorm: 1.11 [15:20:52< 9:11:56] +[titan] 2025-10-05 13:55:13,603 - root - INFO - step: 25015 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 30,345 tflops: 420.99 mfu: 42.57% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8040 +[titan] 2025-10-05 13:55:13,603 - root - INFO - lr: 1.9112e-05 gnorm: 1.08 [15:21:03< 9:11:44] +[titan] 2025-10-05 13:55:24,411 - root - INFO - step: 25020 loss: 2.0888 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.62 mfu: 42.53% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8474 +[titan] 2025-10-05 13:55:24,411 - root - INFO - lr: 1.9104e-05 gnorm: 1.12 [15:21:13< 9:11:33] +[titan] 2025-10-05 13:55:35,262 - root - INFO - step: 25025 loss: 2.0508 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 13:55:35,263 - root - INFO - lr: 1.9096e-05 gnorm: 1.09 [15:21:24< 9:11:22] +[titan] 2025-10-05 13:55:46,139 - root - INFO - step: 25030 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 13:55:46,139 - root - INFO - lr: 1.9087e-05 gnorm: 1.12 [15:21:35< 9:11:11] +[titan] 2025-10-05 13:55:56,971 - root - INFO - step: 25035 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 13:55:56,971 - root - INFO - lr: 1.9079e-05 gnorm: 1.06 [15:21:46< 9:11:00] +[titan] 2025-10-05 13:56:07,833 - root - INFO - step: 25040 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7890 +[titan] 2025-10-05 13:56:07,833 - root - INFO - lr: 1.9071e-05 gnorm: 1.09 [15:21:57< 9:10:49] +[titan] 2025-10-05 13:56:18,697 - root - INFO - step: 25045 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 13:56:18,697 - root - INFO - lr: 1.9062e-05 gnorm: 1.07 [15:22:08< 9:10:37] +[titan] 2025-10-05 13:56:27,381 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:56:29,566 - root - INFO - step: 25050 loss: 2.1138 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2453 global_avg_mtp_loss: 1.8685 +[titan] 2025-10-05 13:56:29,566 - root - INFO - lr: 1.9054e-05 gnorm: 1.09 [15:22:19< 9:10:26] +[titan] 2025-10-05 13:56:40,477 - root - INFO - step: 25055 loss: 2.0926 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8503 +[titan] 2025-10-05 13:56:40,478 - root - INFO - lr: 1.9046e-05 gnorm: 1.10 [15:22:30< 9:10:15] +[titan] 2025-10-05 13:56:51,355 - root - INFO - step: 25060 loss: 2.0513 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 13:56:51,355 - root - INFO - lr: 1.9038e-05 gnorm: 1.09 [15:22:40< 9:10:04] +[titan] 2025-10-05 13:57:02,218 - root - INFO - step: 25065 loss: 2.1039 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8602 +[titan] 2025-10-05 13:57:02,218 - root - INFO - lr: 1.9029e-05 gnorm: 1.13 [15:22:51< 9:09:53] +[titan] 2025-10-05 13:57:13,100 - root - INFO - step: 25070 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 13:57:13,100 - root - INFO - lr: 1.9021e-05 gnorm: 1.08 [15:23:02< 9:09:42] +[titan] 2025-10-05 13:57:23,991 - root - INFO - step: 25075 loss: 2.0586 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8197 +[titan] 2025-10-05 13:57:23,991 - root - INFO - lr: 1.9013e-05 gnorm: 1.07 [15:23:13< 9:09:31] +[titan] 2025-10-05 13:57:34,864 - root - INFO - step: 25080 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8374 +[titan] 2025-10-05 13:57:34,864 - root - INFO - lr: 1.9005e-05 gnorm: 1.05 [15:23:24< 9:09:19] +[titan] 2025-10-05 13:57:45,884 - root - INFO - step: 25085 loss: 2.0874 memory: 118.84GiB(85.28%) tps: 29,737 tflops: 412.55 mfu: 41.71% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8452 +[titan] 2025-10-05 13:57:45,884 - root - INFO - lr: 1.8996e-05 gnorm: 1.11 [15:23:35< 9:09:08] +[titan] 2025-10-05 13:57:52,579 - root - INFO - Dumping profiler traces at step 25088 +[titan] 2025-10-05 13:57:52,620 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 13:57:56,994 - root - INFO - step: 25090 loss: 2.1241 memory: 118.84GiB(85.28%) tps: 29,496 tflops: 409.21 mfu: 41.38% global_avg_ntp_loss: 0.2481 global_avg_mtp_loss: 1.8760 +[titan] 2025-10-05 13:57:56,994 - root - INFO - lr: 1.8988e-05 gnorm: 1.10 [15:23:46< 9:08:57] +[titan] 2025-10-05 13:58:07,853 - root - INFO - step: 25095 loss: 2.0873 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8461 +[titan] 2025-10-05 13:58:07,853 - root - INFO - lr: 1.8980e-05 gnorm: 1.09 [15:23:57< 9:08:46] +[titan] 2025-10-05 13:58:16,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 13:58:18,699 - root - INFO - step: 25100 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7915 +[titan] 2025-10-05 13:58:18,700 - root - INFO - lr: 1.8972e-05 gnorm: 1.07 [15:24:08< 9:08:35] +[titan] 2025-10-05 13:58:29,551 - root - INFO - step: 25105 loss: 2.0232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 13:58:29,551 - root - INFO - lr: 1.8963e-05 gnorm: 1.09 [15:24:19< 9:08:24] +[titan] 2025-10-05 13:58:40,400 - root - INFO - step: 25110 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 13:58:40,400 - root - INFO - lr: 1.8955e-05 gnorm: 1.11 [15:24:29< 9:08:13] +[titan] 2025-10-05 13:58:51,352 - root - INFO - step: 25115 loss: 2.0288 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.10 mfu: 41.97% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 13:58:51,352 - root - INFO - lr: 1.8947e-05 gnorm: 1.09 [15:24:40< 9:08:02] +[titan] 2025-10-05 13:59:02,234 - root - INFO - step: 25120 loss: 2.0905 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8475 +[titan] 2025-10-05 13:59:02,234 - root - INFO - lr: 1.8939e-05 gnorm: 1.09 [15:24:51< 9:07:50] +[titan] 2025-10-05 13:59:13,119 - root - INFO - step: 25125 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8215 +[titan] 2025-10-05 13:59:13,120 - root - INFO - lr: 1.8930e-05 gnorm: 1.07 [15:25:02< 9:07:39] +[titan] 2025-10-05 13:59:23,995 - root - INFO - step: 25130 loss: 2.0744 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 13:59:23,996 - root - INFO - lr: 1.8922e-05 gnorm: 1.07 [15:25:13< 9:07:28] +[titan] 2025-10-05 13:59:34,878 - root - INFO - step: 25135 loss: 2.1335 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8860 +[titan] 2025-10-05 13:59:34,878 - root - INFO - lr: 1.8914e-05 gnorm: 1.12 [15:25:24< 9:07:17] +[titan] 2025-10-05 13:59:45,774 - root - INFO - step: 25140 loss: 2.0614 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8230 +[titan] 2025-10-05 13:59:45,775 - root - INFO - lr: 1.8905e-05 gnorm: 1.07 [15:25:35< 9:07:06] +[titan] 2025-10-05 13:59:56,648 - root - INFO - step: 25145 loss: 2.0630 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8235 +[titan] 2025-10-05 13:59:56,648 - root - INFO - lr: 1.8897e-05 gnorm: 1.08 [15:25:46< 9:06:55] +[titan] 2025-10-05 14:00:05,337 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:00:07,518 - root - INFO - step: 25150 loss: 2.1132 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8682 +[titan] 2025-10-05 14:00:07,519 - root - INFO - lr: 1.8889e-05 gnorm: 1.15 [15:25:57< 9:06:44] +[titan] 2025-10-05 14:00:18,376 - root - INFO - step: 25155 loss: 2.0122 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 14:00:18,377 - root - INFO - lr: 1.8881e-05 gnorm: 1.04 [15:26:07< 9:06:32] +[titan] 2025-10-05 14:00:29,255 - root - INFO - step: 25160 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 14:00:29,255 - root - INFO - lr: 1.8873e-05 gnorm: 1.08 [15:26:18< 9:06:21] +[titan] 2025-10-05 14:00:40,131 - root - INFO - step: 25165 loss: 2.0645 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 14:00:40,131 - root - INFO - lr: 1.8864e-05 gnorm: 1.09 [15:26:29< 9:06:10] +[titan] 2025-10-05 14:00:51,071 - root - INFO - step: 25170 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 29,954 tflops: 415.56 mfu: 42.02% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8358 +[titan] 2025-10-05 14:00:51,071 - root - INFO - lr: 1.8856e-05 gnorm: 1.06 [15:26:40< 9:05:59] +[titan] 2025-10-05 14:01:01,932 - root - INFO - step: 25175 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 14:01:01,932 - root - INFO - lr: 1.8848e-05 gnorm: 1.09 [15:26:51< 9:05:48] +[titan] 2025-10-05 14:01:12,823 - root - INFO - step: 25180 loss: 2.0514 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8133 +[titan] 2025-10-05 14:01:12,824 - root - INFO - lr: 1.8840e-05 gnorm: 1.08 [15:27:02< 9:05:37] +[titan] 2025-10-05 14:01:23,713 - root - INFO - step: 25185 loss: 2.0887 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2430 global_avg_mtp_loss: 1.8457 +[titan] 2025-10-05 14:01:23,713 - root - INFO - lr: 1.8831e-05 gnorm: 1.04 [15:27:13< 9:05:26] +[titan] 2025-10-05 14:01:34,565 - root - INFO - step: 25190 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8554 +[titan] 2025-10-05 14:01:34,565 - root - INFO - lr: 1.8823e-05 gnorm: 1.08 [15:27:24< 9:05:14] +[titan] 2025-10-05 14:01:45,489 - root - INFO - step: 25195 loss: 2.1611 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2519 global_avg_mtp_loss: 1.9092 +[titan] 2025-10-05 14:01:45,489 - root - INFO - lr: 1.8815e-05 gnorm: 1.10 [15:27:35< 9:05:03] +[titan] 2025-10-05 14:01:54,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:01:56,348 - root - INFO - step: 25200 loss: 2.1289 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2468 global_avg_mtp_loss: 1.8821 +[titan] 2025-10-05 14:01:56,348 - root - INFO - lr: 1.8807e-05 gnorm: 1.13 [15:27:45< 9:04:52] +[titan] 2025-10-05 14:02:07,198 - root - INFO - step: 25205 loss: 2.0344 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:02:07,198 - root - INFO - lr: 1.8798e-05 gnorm: 1.06 [15:27:56< 9:04:41] +[titan] 2025-10-05 14:02:18,072 - root - INFO - step: 25210 loss: 2.0271 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7913 +[titan] 2025-10-05 14:02:18,072 - root - INFO - lr: 1.8790e-05 gnorm: 1.09 [15:28:07< 9:04:30] +[titan] 2025-10-05 14:02:28,950 - root - INFO - step: 25215 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:02:28,951 - root - INFO - lr: 1.8782e-05 gnorm: 1.11 [15:28:18< 9:04:19] +[titan] 2025-10-05 14:02:39,828 - root - INFO - step: 25220 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 14:02:39,828 - root - INFO - lr: 1.8774e-05 gnorm: 1.10 [15:28:29< 9:04:08] +[titan] 2025-10-05 14:02:50,798 - root - INFO - step: 25225 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 29,872 tflops: 414.43 mfu: 41.90% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 14:02:50,798 - root - INFO - lr: 1.8765e-05 gnorm: 1.10 [15:28:40< 9:03:56] +[titan] 2025-10-05 14:03:01,706 - root - INFO - step: 25230 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 14:03:01,706 - root - INFO - lr: 1.8757e-05 gnorm: 1.07 [15:28:51< 9:03:45] +[titan] 2025-10-05 14:03:12,597 - root - INFO - step: 25235 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 14:03:12,597 - root - INFO - lr: 1.8749e-05 gnorm: 1.08 [15:29:02< 9:03:34] +[titan] 2025-10-05 14:03:23,476 - root - INFO - step: 25240 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:03:23,477 - root - INFO - lr: 1.8741e-05 gnorm: 1.05 [15:29:13< 9:03:23] +[titan] 2025-10-05 14:03:34,394 - root - INFO - step: 25245 loss: 2.0681 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8284 +[titan] 2025-10-05 14:03:34,395 - root - INFO - lr: 1.8733e-05 gnorm: 1.06 [15:29:23< 9:03:12] +[titan] 2025-10-05 14:03:43,076 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:03:45,291 - root - INFO - step: 25250 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7908 +[titan] 2025-10-05 14:03:45,292 - root - INFO - lr: 1.8724e-05 gnorm: 1.08 [15:29:34< 9:03:01] +[titan] 2025-10-05 14:03:56,215 - root - INFO - step: 25255 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8068 +[titan] 2025-10-05 14:03:56,215 - root - INFO - lr: 1.8716e-05 gnorm: 1.07 [15:29:45< 9:02:50] +[titan] 2025-10-05 14:04:07,069 - root - INFO - step: 25260 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7296 +[titan] 2025-10-05 14:04:07,070 - root - INFO - lr: 1.8708e-05 gnorm: 1.09 [15:29:56< 9:02:39] +[titan] 2025-10-05 14:04:17,929 - root - INFO - step: 25265 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 14:04:17,929 - root - INFO - lr: 1.8700e-05 gnorm: 1.05 [15:30:07< 9:02:27] +[titan] 2025-10-05 14:04:28,778 - root - INFO - step: 25270 loss: 2.0659 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8256 +[titan] 2025-10-05 14:04:28,778 - root - INFO - lr: 1.8692e-05 gnorm: 1.05 [15:30:18< 9:02:16] +[titan] 2025-10-05 14:04:39,663 - root - INFO - step: 25275 loss: 2.0679 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:04:39,663 - root - INFO - lr: 1.8683e-05 gnorm: 1.10 [15:30:29< 9:02:05] +[titan] 2025-10-05 14:04:50,619 - root - INFO - step: 25280 loss: 2.0423 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 14:04:50,620 - root - INFO - lr: 1.8675e-05 gnorm: 1.10 [15:30:40< 9:01:54] +[titan] 2025-10-05 14:05:01,490 - root - INFO - step: 25285 loss: 2.0840 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2425 global_avg_mtp_loss: 1.8415 +[titan] 2025-10-05 14:05:01,490 - root - INFO - lr: 1.8667e-05 gnorm: 1.07 [15:30:51< 9:01:43] +[titan] 2025-10-05 14:05:12,363 - root - INFO - step: 25290 loss: 2.0928 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8496 +[titan] 2025-10-05 14:05:12,363 - root - INFO - lr: 1.8659e-05 gnorm: 1.08 [15:31:01< 9:01:32] +[titan] 2025-10-05 14:05:23,239 - root - INFO - step: 25295 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 14:05:23,239 - root - INFO - lr: 1.8650e-05 gnorm: 1.12 [15:31:12< 9:01:21] +[titan] 2025-10-05 14:05:31,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:05:34,099 - root - INFO - step: 25300 loss: 2.0853 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:05:34,100 - root - INFO - lr: 1.8642e-05 gnorm: 1.10 [15:31:23< 9:01:09] +[titan] 2025-10-05 14:05:44,978 - root - INFO - step: 25305 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 14:05:44,978 - root - INFO - lr: 1.8634e-05 gnorm: 1.07 [15:31:34< 9:00:58] +[titan] 2025-10-05 14:05:55,924 - root - INFO - step: 25310 loss: 2.0792 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.33 mfu: 41.99% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8356 +[titan] 2025-10-05 14:05:55,924 - root - INFO - lr: 1.8626e-05 gnorm: 1.11 [15:31:45< 9:00:47] +[titan] 2025-10-05 14:06:06,777 - root - INFO - step: 25315 loss: 2.0737 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8330 +[titan] 2025-10-05 14:06:06,777 - root - INFO - lr: 1.8618e-05 gnorm: 1.08 [15:31:56< 9:00:36] +[titan] 2025-10-05 14:06:17,653 - root - INFO - step: 25320 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 14:06:17,654 - root - INFO - lr: 1.8609e-05 gnorm: 1.06 [15:32:07< 9:00:25] +[titan] 2025-10-05 14:06:28,537 - root - INFO - step: 25325 loss: 2.1056 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8612 +[titan] 2025-10-05 14:06:28,537 - root - INFO - lr: 1.8601e-05 gnorm: 1.08 [15:32:18< 9:00:14] +[titan] 2025-10-05 14:06:39,411 - root - INFO - step: 25330 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:06:39,411 - root - INFO - lr: 1.8593e-05 gnorm: 1.11 [15:32:28< 9:00:03] +[titan] 2025-10-05 14:06:50,340 - root - INFO - step: 25335 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 14:06:50,341 - root - INFO - lr: 1.8585e-05 gnorm: 1.10 [15:32:39< 8:59:52] +[titan] 2025-10-05 14:07:01,212 - root - INFO - step: 25340 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 14:07:01,212 - root - INFO - lr: 1.8577e-05 gnorm: 1.08 [15:32:50< 8:59:40] +[titan] 2025-10-05 14:07:12,114 - root - INFO - step: 25345 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 14:07:12,114 - root - INFO - lr: 1.8568e-05 gnorm: 1.06 [15:33:01< 8:59:29] +[titan] 2025-10-05 14:07:20,805 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:07:22,994 - root - INFO - step: 25350 loss: 2.0581 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8196 +[titan] 2025-10-05 14:07:22,994 - root - INFO - lr: 1.8560e-05 gnorm: 1.06 [15:33:12< 8:59:18] +[titan] 2025-10-05 14:07:33,878 - root - INFO - step: 25355 loss: 2.1110 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2452 global_avg_mtp_loss: 1.8658 +[titan] 2025-10-05 14:07:33,878 - root - INFO - lr: 1.8552e-05 gnorm: 1.11 [15:33:23< 8:59:07] +[titan] 2025-10-05 14:07:44,774 - root - INFO - step: 25360 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 14:07:44,774 - root - INFO - lr: 1.8544e-05 gnorm: 1.08 [15:33:34< 8:58:56] +[titan] 2025-10-05 14:07:55,691 - root - INFO - step: 25365 loss: 2.0709 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.42 mfu: 42.11% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:07:55,691 - root - INFO - lr: 1.8536e-05 gnorm: 1.08 [15:33:45< 8:58:45] +[titan] 2025-10-05 14:08:06,574 - root - INFO - step: 25370 loss: 2.0036 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 14:08:06,574 - root - INFO - lr: 1.8528e-05 gnorm: 1.08 [15:33:56< 8:58:34] +[titan] 2025-10-05 14:08:17,490 - root - INFO - step: 25375 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 14:08:17,490 - root - INFO - lr: 1.8519e-05 gnorm: 1.13 [15:34:07< 8:58:22] +[titan] 2025-10-05 14:08:28,356 - root - INFO - step: 25380 loss: 2.1491 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2504 global_avg_mtp_loss: 1.8986 +[titan] 2025-10-05 14:08:28,357 - root - INFO - lr: 1.8511e-05 gnorm: 1.09 [15:34:17< 8:58:11] +[titan] 2025-10-05 14:08:39,210 - root - INFO - step: 25385 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:08:39,210 - root - INFO - lr: 1.8503e-05 gnorm: 1.09 [15:34:28< 8:58:00] +[titan] 2025-10-05 14:08:50,100 - root - INFO - step: 25390 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 14:08:50,100 - root - INFO - lr: 1.8495e-05 gnorm: 1.11 [15:34:39< 8:57:49] +[titan] 2025-10-05 14:09:00,958 - root - INFO - step: 25395 loss: 2.0841 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8414 +[titan] 2025-10-05 14:09:00,959 - root - INFO - lr: 1.8487e-05 gnorm: 1.09 [15:34:50< 8:57:38] +[titan] 2025-10-05 14:09:09,639 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:09:11,824 - root - INFO - step: 25400 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:09:11,824 - root - INFO - lr: 1.8478e-05 gnorm: 1.09 [15:35:01< 8:57:27] +[titan] 2025-10-05 14:09:22,722 - root - INFO - step: 25405 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7846 +[titan] 2025-10-05 14:09:22,722 - root - INFO - lr: 1.8470e-05 gnorm: 1.06 [15:35:12< 8:57:16] +[titan] 2025-10-05 14:09:33,582 - root - INFO - step: 25410 loss: 2.0696 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8282 +[titan] 2025-10-05 14:09:33,583 - root - INFO - lr: 1.8462e-05 gnorm: 1.07 [15:35:23< 8:57:04] +[titan] 2025-10-05 14:09:44,445 - root - INFO - step: 25415 loss: 2.0371 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 14:09:44,445 - root - INFO - lr: 1.8454e-05 gnorm: 1.07 [15:35:33< 8:56:53] +[titan] 2025-10-05 14:09:55,342 - root - INFO - step: 25420 loss: 2.1096 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8650 +[titan] 2025-10-05 14:09:55,342 - root - INFO - lr: 1.8446e-05 gnorm: 1.08 [15:35:44< 8:56:42] +[titan] 2025-10-05 14:10:06,229 - root - INFO - step: 25425 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 14:10:06,229 - root - INFO - lr: 1.8438e-05 gnorm: 1.09 [15:35:55< 8:56:31] +[titan] 2025-10-05 14:10:17,110 - root - INFO - step: 25430 loss: 2.0503 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8115 +[titan] 2025-10-05 14:10:17,110 - root - INFO - lr: 1.8429e-05 gnorm: 1.09 [15:36:06< 8:56:20] +[titan] 2025-10-05 14:10:28,014 - root - INFO - step: 25435 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 14:10:28,014 - root - INFO - lr: 1.8421e-05 gnorm: 1.05 [15:36:17< 8:56:09] +[titan] 2025-10-05 14:10:38,939 - root - INFO - step: 25440 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8011 +[titan] 2025-10-05 14:10:38,939 - root - INFO - lr: 1.8413e-05 gnorm: 1.10 [15:36:28< 8:55:58] +[titan] 2025-10-05 14:10:49,824 - root - INFO - step: 25445 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 14:10:49,825 - root - INFO - lr: 1.8405e-05 gnorm: 1.08 [15:36:39< 8:55:47] +[titan] 2025-10-05 14:10:58,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:11:00,730 - root - INFO - step: 25450 loss: 2.0470 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8090 +[titan] 2025-10-05 14:11:00,730 - root - INFO - lr: 1.8397e-05 gnorm: 1.07 [15:36:50< 8:55:35] +[titan] 2025-10-05 14:11:11,607 - root - INFO - step: 25455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 14:11:11,607 - root - INFO - lr: 1.8389e-05 gnorm: 1.07 [15:37:01< 8:55:24] +[titan] 2025-10-05 14:11:22,482 - root - INFO - step: 25460 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2418 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 14:11:22,482 - root - INFO - lr: 1.8380e-05 gnorm: 1.10 [15:37:11< 8:55:13] +[titan] 2025-10-05 14:11:33,348 - root - INFO - step: 25465 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:11:33,348 - root - INFO - lr: 1.8372e-05 gnorm: 1.09 [15:37:22< 8:55:02] +[titan] 2025-10-05 14:11:44,248 - root - INFO - step: 25470 loss: 2.0570 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 14:11:44,248 - root - INFO - lr: 1.8364e-05 gnorm: 1.09 [15:37:33< 8:54:51] +[titan] 2025-10-05 14:11:55,157 - root - INFO - step: 25475 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 14:11:55,157 - root - INFO - lr: 1.8356e-05 gnorm: 1.09 [15:37:44< 8:54:40] +[titan] 2025-10-05 14:12:06,026 - root - INFO - step: 25480 loss: 2.0504 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8123 +[titan] 2025-10-05 14:12:06,027 - root - INFO - lr: 1.8348e-05 gnorm: 1.07 [15:37:55< 8:54:29] +[titan] 2025-10-05 14:12:16,908 - root - INFO - step: 25485 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:12:16,909 - root - INFO - lr: 1.8340e-05 gnorm: 1.10 [15:38:06< 8:54:17] +[titan] 2025-10-05 14:12:27,776 - root - INFO - step: 25490 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7677 +[titan] 2025-10-05 14:12:27,776 - root - INFO - lr: 1.8332e-05 gnorm: 1.09 [15:38:17< 8:54:06] +[titan] 2025-10-05 14:12:38,651 - root - INFO - step: 25495 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8385 +[titan] 2025-10-05 14:12:38,651 - root - INFO - lr: 1.8323e-05 gnorm: 1.08 [15:38:28< 8:53:55] +[titan] 2025-10-05 14:12:47,344 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:12:49,537 - root - INFO - step: 25500 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7501 +[titan] 2025-10-05 14:12:49,537 - root - INFO - lr: 1.8315e-05 gnorm: 1.13 [15:38:39< 8:53:44] +[titan] 2025-10-05 14:13:00,470 - root - INFO - step: 25505 loss: 2.0393 memory: 118.84GiB(85.28%) tps: 29,973 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:13:00,470 - root - INFO - lr: 1.8307e-05 gnorm: 1.08 [15:38:49< 8:53:33] +[titan] 2025-10-05 14:13:11,338 - root - INFO - step: 25510 loss: 2.0953 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8516 +[titan] 2025-10-05 14:13:11,338 - root - INFO - lr: 1.8299e-05 gnorm: 1.11 [15:39:00< 8:53:22] +[titan] 2025-10-05 14:13:22,196 - root - INFO - step: 25515 loss: 2.0657 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8259 +[titan] 2025-10-05 14:13:22,197 - root - INFO - lr: 1.8291e-05 gnorm: 1.17 [15:39:11< 8:53:11] +[titan] 2025-10-05 14:13:33,046 - root - INFO - step: 25520 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 14:13:33,047 - root - INFO - lr: 1.8283e-05 gnorm: 1.07 [15:39:22< 8:53:00] +[titan] 2025-10-05 14:13:43,917 - root - INFO - step: 25525 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 14:13:43,917 - root - INFO - lr: 1.8275e-05 gnorm: 1.12 [15:39:33< 8:52:48] +[titan] 2025-10-05 14:13:54,888 - root - INFO - step: 25530 loss: 2.1016 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2437 global_avg_mtp_loss: 1.8579 +[titan] 2025-10-05 14:13:54,888 - root - INFO - lr: 1.8266e-05 gnorm: 1.14 [15:39:44< 8:52:37] +[titan] 2025-10-05 14:14:05,796 - root - INFO - step: 25535 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:14:05,796 - root - INFO - lr: 1.8258e-05 gnorm: 1.11 [15:39:55< 8:52:26] +[titan] 2025-10-05 14:14:16,658 - root - INFO - step: 25540 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 14:14:16,658 - root - INFO - lr: 1.8250e-05 gnorm: 1.12 [15:40:06< 8:52:15] +[titan] 2025-10-05 14:14:27,520 - root - INFO - step: 25545 loss: 2.0809 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 14:14:27,521 - root - INFO - lr: 1.8242e-05 gnorm: 1.08 [15:40:17< 8:52:04] +[titan] 2025-10-05 14:14:36,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:14:38,398 - root - INFO - step: 25550 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 14:14:38,399 - root - INFO - lr: 1.8234e-05 gnorm: 1.07 [15:40:27< 8:51:53] +[titan] 2025-10-05 14:14:49,271 - root - INFO - step: 25555 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 14:14:49,271 - root - INFO - lr: 1.8226e-05 gnorm: 1.10 [15:40:38< 8:51:42] +[titan] 2025-10-05 14:15:00,189 - root - INFO - step: 25560 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8294 +[titan] 2025-10-05 14:15:00,189 - root - INFO - lr: 1.8218e-05 gnorm: 1.05 [15:40:49< 8:51:30] +[titan] 2025-10-05 14:15:11,120 - root - INFO - step: 25565 loss: 2.0717 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.89 mfu: 42.05% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8320 +[titan] 2025-10-05 14:15:11,121 - root - INFO - lr: 1.8209e-05 gnorm: 1.07 [15:41:00< 8:51:19] +[titan] 2025-10-05 14:15:21,997 - root - INFO - step: 25570 loss: 2.0948 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2508 global_avg_mtp_loss: 1.8440 +[titan] 2025-10-05 14:15:21,997 - root - INFO - lr: 1.8201e-05 gnorm: 1.56 [15:41:11< 8:51:08] +[titan] 2025-10-05 14:15:32,888 - root - INFO - step: 25575 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 14:15:32,888 - root - INFO - lr: 1.8193e-05 gnorm: 1.07 [15:41:22< 8:50:57] +[titan] 2025-10-05 14:15:43,769 - root - INFO - step: 25580 loss: 2.1159 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2449 global_avg_mtp_loss: 1.8710 +[titan] 2025-10-05 14:15:43,769 - root - INFO - lr: 1.8185e-05 gnorm: 1.07 [15:41:33< 8:50:46] +[titan] 2025-10-05 14:15:54,652 - root - INFO - step: 25585 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 14:15:54,652 - root - INFO - lr: 1.8177e-05 gnorm: 1.05 [15:41:44< 8:50:35] +[titan] 2025-10-05 14:16:05,536 - root - INFO - step: 25590 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 14:16:05,536 - root - INFO - lr: 1.8169e-05 gnorm: 1.07 [15:41:55< 8:50:24] +[titan] 2025-10-05 14:16:16,420 - root - INFO - step: 25595 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:16:16,420 - root - INFO - lr: 1.8161e-05 gnorm: 1.09 [15:42:05< 8:50:13] +[titan] 2025-10-05 14:16:25,234 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:16:27,418 - root - INFO - step: 25600 loss: 2.0152 memory: 118.84GiB(85.28%) tps: 29,795 tflops: 413.36 mfu: 41.80% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7812 +[titan] 2025-10-05 14:16:27,418 - root - INFO - lr: 1.8153e-05 gnorm: 1.10 [15:42:16< 8:50:02] +[titan] 2025-10-05 14:16:27,606 - root - INFO - Dumping profiler traces at step 25600 +[titan] 2025-10-05 14:16:27,647 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:16:38,481 - root - INFO - step: 25605 loss: 2.0476 memory: 118.84GiB(85.28%) tps: 29,620 tflops: 410.94 mfu: 41.55% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:16:38,482 - root - INFO - lr: 1.8144e-05 gnorm: 1.11 [15:42:27< 8:49:50] +[titan] 2025-10-05 14:16:49,316 - root - INFO - step: 25610 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8107 +[titan] 2025-10-05 14:16:49,316 - root - INFO - lr: 1.8136e-05 gnorm: 1.06 [15:42:38< 8:49:39] +[titan] 2025-10-05 14:17:00,171 - root - INFO - step: 25615 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 14:17:00,172 - root - INFO - lr: 1.8128e-05 gnorm: 1.06 [15:42:49< 8:49:28] +[titan] 2025-10-05 14:17:11,028 - root - INFO - step: 25620 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 14:17:11,028 - root - INFO - lr: 1.8120e-05 gnorm: 1.08 [15:43:00< 8:49:17] +[titan] 2025-10-05 14:17:21,893 - root - INFO - step: 25625 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8178 +[titan] 2025-10-05 14:17:21,894 - root - INFO - lr: 1.8112e-05 gnorm: 1.08 [15:43:11< 8:49:06] +[titan] 2025-10-05 14:17:32,791 - root - INFO - step: 25630 loss: 2.0937 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8510 +[titan] 2025-10-05 14:17:32,791 - root - INFO - lr: 1.8104e-05 gnorm: 1.17 [15:43:22< 8:48:55] +[titan] 2025-10-05 14:17:43,645 - root - INFO - step: 25635 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 14:17:43,645 - root - INFO - lr: 1.8096e-05 gnorm: 1.09 [15:43:33< 8:48:44] +[titan] 2025-10-05 14:17:54,490 - root - INFO - step: 25640 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8138 +[titan] 2025-10-05 14:17:54,490 - root - INFO - lr: 1.8088e-05 gnorm: 1.07 [15:43:43< 8:48:32] +[titan] 2025-10-05 14:18:05,362 - root - INFO - step: 25645 loss: 2.1264 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2478 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:18:05,362 - root - INFO - lr: 1.8080e-05 gnorm: 1.09 [15:43:54< 8:48:21] +[titan] 2025-10-05 14:18:14,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:18:16,215 - root - INFO - step: 25650 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 14:18:16,216 - root - INFO - lr: 1.8071e-05 gnorm: 1.09 [15:44:05< 8:48:10] +[titan] 2025-10-05 14:18:27,067 - root - INFO - step: 25655 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 14:18:27,068 - root - INFO - lr: 1.8063e-05 gnorm: 1.05 [15:44:16< 8:47:59] +[titan] 2025-10-05 14:18:37,921 - root - INFO - step: 25660 loss: 2.0284 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7927 +[titan] 2025-10-05 14:18:37,921 - root - INFO - lr: 1.8055e-05 gnorm: 1.09 [15:44:27< 8:47:48] +[titan] 2025-10-05 14:18:48,835 - root - INFO - step: 25665 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.53 mfu: 42.12% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 14:18:48,835 - root - INFO - lr: 1.8047e-05 gnorm: 1.08 [15:44:38< 8:47:37] +[titan] 2025-10-05 14:18:59,735 - root - INFO - step: 25670 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:18:59,736 - root - INFO - lr: 1.8039e-05 gnorm: 1.11 [15:44:49< 8:47:26] +[titan] 2025-10-05 14:19:10,621 - root - INFO - step: 25675 loss: 2.1144 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2443 global_avg_mtp_loss: 1.8701 +[titan] 2025-10-05 14:19:10,621 - root - INFO - lr: 1.8031e-05 gnorm: 1.12 [15:45:00< 8:47:15] +[titan] 2025-10-05 14:19:21,506 - root - INFO - step: 25680 loss: 2.0801 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8389 +[titan] 2025-10-05 14:19:21,506 - root - INFO - lr: 1.8023e-05 gnorm: 1.07 [15:45:10< 8:47:03] +[titan] 2025-10-05 14:19:32,375 - root - INFO - step: 25685 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 14:19:32,375 - root - INFO - lr: 1.8015e-05 gnorm: 1.07 [15:45:21< 8:46:52] +[titan] 2025-10-05 14:19:43,253 - root - INFO - step: 25690 loss: 1.9973 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7661 +[titan] 2025-10-05 14:19:43,254 - root - INFO - lr: 1.8007e-05 gnorm: 1.09 [15:45:32< 8:46:41] +[titan] 2025-10-05 14:19:54,175 - root - INFO - step: 25695 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7719 +[titan] 2025-10-05 14:19:54,175 - root - INFO - lr: 1.7999e-05 gnorm: 1.09 [15:45:43< 8:46:30] +[titan] 2025-10-05 14:20:02,862 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:20:05,037 - root - INFO - step: 25700 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 14:20:05,037 - root - INFO - lr: 1.7991e-05 gnorm: 1.10 [15:45:54< 8:46:19] +[titan] 2025-10-05 14:20:15,889 - root - INFO - step: 25705 loss: 2.1593 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2509 global_avg_mtp_loss: 1.9084 +[titan] 2025-10-05 14:20:15,889 - root - INFO - lr: 1.7982e-05 gnorm: 1.09 [15:46:05< 8:46:08] +[titan] 2025-10-05 14:20:26,754 - root - INFO - step: 25710 loss: 2.0748 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8335 +[titan] 2025-10-05 14:20:26,754 - root - INFO - lr: 1.7974e-05 gnorm: 1.08 [15:46:16< 8:45:57] +[titan] 2025-10-05 14:20:37,621 - root - INFO - step: 25715 loss: 2.0337 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7984 +[titan] 2025-10-05 14:20:37,621 - root - INFO - lr: 1.7966e-05 gnorm: 1.06 [15:46:27< 8:45:45] +[titan] 2025-10-05 14:20:48,501 - root - INFO - step: 25720 loss: 2.0692 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 14:20:48,501 - root - INFO - lr: 1.7958e-05 gnorm: 1.07 [15:46:37< 8:45:34] +[titan] 2025-10-05 14:20:59,442 - root - INFO - step: 25725 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8266 +[titan] 2025-10-05 14:20:59,442 - root - INFO - lr: 1.7950e-05 gnorm: 1.11 [15:46:48< 8:45:23] +[titan] 2025-10-05 14:21:10,316 - root - INFO - step: 25730 loss: 2.0784 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8362 +[titan] 2025-10-05 14:21:10,316 - root - INFO - lr: 1.7942e-05 gnorm: 1.10 [15:46:59< 8:45:12] +[titan] 2025-10-05 14:21:21,179 - root - INFO - step: 25735 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 14:21:21,179 - root - INFO - lr: 1.7934e-05 gnorm: 1.11 [15:47:10< 8:45:01] +[titan] 2025-10-05 14:21:32,060 - root - INFO - step: 25740 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 14:21:32,060 - root - INFO - lr: 1.7926e-05 gnorm: 1.05 [15:47:21< 8:44:50] +[titan] 2025-10-05 14:21:42,940 - root - INFO - step: 25745 loss: 2.0534 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8151 +[titan] 2025-10-05 14:21:42,940 - root - INFO - lr: 1.7918e-05 gnorm: 1.13 [15:47:32< 8:44:39] +[titan] 2025-10-05 14:21:51,620 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:21:53,800 - root - INFO - step: 25750 loss: 2.0656 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8265 +[titan] 2025-10-05 14:21:53,800 - root - INFO - lr: 1.7910e-05 gnorm: 1.09 [15:47:43< 8:44:28] +[titan] 2025-10-05 14:22:04,676 - root - INFO - step: 25755 loss: 2.0272 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:22:04,676 - root - INFO - lr: 1.7902e-05 gnorm: 1.10 [15:47:54< 8:44:16] +[titan] 2025-10-05 14:22:15,594 - root - INFO - step: 25760 loss: 2.0342 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7988 +[titan] 2025-10-05 14:22:15,594 - root - INFO - lr: 1.7894e-05 gnorm: 1.07 [15:48:05< 8:44:05] +[titan] 2025-10-05 14:22:26,449 - root - INFO - step: 25765 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 14:22:26,449 - root - INFO - lr: 1.7885e-05 gnorm: 1.09 [15:48:15< 8:43:54] +[titan] 2025-10-05 14:22:37,310 - root - INFO - step: 25770 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 14:22:37,310 - root - INFO - lr: 1.7877e-05 gnorm: 1.05 [15:48:26< 8:43:43] +[titan] 2025-10-05 14:22:48,182 - root - INFO - step: 25775 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8244 +[titan] 2025-10-05 14:22:48,182 - root - INFO - lr: 1.7869e-05 gnorm: 1.11 [15:48:37< 8:43:32] +[titan] 2025-10-05 14:22:59,049 - root - INFO - step: 25780 loss: 2.0127 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 14:22:59,049 - root - INFO - lr: 1.7861e-05 gnorm: 1.06 [15:48:48< 8:43:21] +[titan] 2025-10-05 14:23:09,928 - root - INFO - step: 25785 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 14:23:09,928 - root - INFO - lr: 1.7853e-05 gnorm: 1.04 [15:48:59< 8:43:10] +[titan] 2025-10-05 14:23:20,861 - root - INFO - step: 25790 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 14:23:20,861 - root - INFO - lr: 1.7845e-05 gnorm: 1.11 [15:49:10< 8:42:58] +[titan] 2025-10-05 14:23:31,734 - root - INFO - step: 25795 loss: 2.0316 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 14:23:31,734 - root - INFO - lr: 1.7837e-05 gnorm: 1.08 [15:49:21< 8:42:47] +[titan] 2025-10-05 14:23:40,435 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:23:42,620 - root - INFO - step: 25800 loss: 2.1252 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2514 global_avg_mtp_loss: 1.8738 +[titan] 2025-10-05 14:23:42,621 - root - INFO - lr: 1.7829e-05 gnorm: 2.05 [15:49:32< 8:42:36] +[titan] 2025-10-05 14:23:53,479 - root - INFO - step: 25805 loss: 2.0499 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8114 +[titan] 2025-10-05 14:23:53,479 - root - INFO - lr: 1.7821e-05 gnorm: 1.10 [15:49:42< 8:42:25] +[titan] 2025-10-05 14:24:04,354 - root - INFO - step: 25810 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8199 +[titan] 2025-10-05 14:24:04,354 - root - INFO - lr: 1.7813e-05 gnorm: 1.10 [15:49:53< 8:42:14] +[titan] 2025-10-05 14:24:15,228 - root - INFO - step: 25815 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8081 +[titan] 2025-10-05 14:24:15,228 - root - INFO - lr: 1.7805e-05 gnorm: 1.07 [15:50:04< 8:42:03] +[titan] 2025-10-05 14:24:26,126 - root - INFO - step: 25820 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:24:26,126 - root - INFO - lr: 1.7797e-05 gnorm: 1.11 [15:50:15< 8:41:52] +[titan] 2025-10-05 14:24:37,054 - root - INFO - step: 25825 loss: 2.0658 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8262 +[titan] 2025-10-05 14:24:37,055 - root - INFO - lr: 1.7789e-05 gnorm: 1.09 [15:50:26< 8:41:41] +[titan] 2025-10-05 14:24:47,925 - root - INFO - step: 25830 loss: 2.0757 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 14:24:47,925 - root - INFO - lr: 1.7781e-05 gnorm: 1.08 [15:50:37< 8:41:29] +[titan] 2025-10-05 14:24:58,795 - root - INFO - step: 25835 loss: 2.0919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8502 +[titan] 2025-10-05 14:24:58,795 - root - INFO - lr: 1.7773e-05 gnorm: 1.15 [15:50:48< 8:41:18] +[titan] 2025-10-05 14:25:09,680 - root - INFO - step: 25840 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 14:25:09,680 - root - INFO - lr: 1.7765e-05 gnorm: 1.04 [15:50:59< 8:41:07] +[titan] 2025-10-05 14:25:20,542 - root - INFO - step: 25845 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7882 +[titan] 2025-10-05 14:25:20,543 - root - INFO - lr: 1.7757e-05 gnorm: 1.08 [15:51:10< 8:40:56] +[titan] 2025-10-05 14:25:29,239 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:25:31,434 - root - INFO - step: 25850 loss: 2.0438 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8073 +[titan] 2025-10-05 14:25:31,434 - root - INFO - lr: 1.7749e-05 gnorm: 1.08 [15:51:20< 8:40:45] +[titan] 2025-10-05 14:25:42,355 - root - INFO - step: 25855 loss: 2.0565 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:25:42,356 - root - INFO - lr: 1.7740e-05 gnorm: 1.09 [15:51:31< 8:40:34] +[titan] 2025-10-05 14:25:53,227 - root - INFO - step: 25860 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8517 +[titan] 2025-10-05 14:25:53,227 - root - INFO - lr: 1.7732e-05 gnorm: 1.11 [15:51:42< 8:40:23] +[titan] 2025-10-05 14:26:04,104 - root - INFO - step: 25865 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8184 +[titan] 2025-10-05 14:26:04,105 - root - INFO - lr: 1.7724e-05 gnorm: 1.11 [15:51:53< 8:40:12] +[titan] 2025-10-05 14:26:15,028 - root - INFO - step: 25870 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 14:26:15,028 - root - INFO - lr: 1.7716e-05 gnorm: 1.04 [15:52:04< 8:40:00] +[titan] 2025-10-05 14:26:25,939 - root - INFO - step: 25875 loss: 2.0505 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8127 +[titan] 2025-10-05 14:26:25,939 - root - INFO - lr: 1.7708e-05 gnorm: 1.08 [15:52:15< 8:39:49] +[titan] 2025-10-05 14:26:36,815 - root - INFO - step: 25880 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:26:36,815 - root - INFO - lr: 1.7700e-05 gnorm: 1.07 [15:52:26< 8:39:38] +[titan] 2025-10-05 14:26:47,749 - root - INFO - step: 25885 loss: 2.0798 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8378 +[titan] 2025-10-05 14:26:47,749 - root - INFO - lr: 1.7692e-05 gnorm: 1.10 [15:52:37< 8:39:27] +[titan] 2025-10-05 14:26:58,622 - root - INFO - step: 25890 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:26:58,622 - root - INFO - lr: 1.7684e-05 gnorm: 1.07 [15:52:48< 8:39:16] +[titan] 2025-10-05 14:27:09,541 - root - INFO - step: 25895 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.36 mfu: 42.10% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7826 +[titan] 2025-10-05 14:27:09,541 - root - INFO - lr: 1.7676e-05 gnorm: 1.10 [15:52:59< 8:39:05] +[titan] 2025-10-05 14:27:18,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:27:20,420 - root - INFO - step: 25900 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 14:27:20,420 - root - INFO - lr: 1.7668e-05 gnorm: 1.08 [15:53:09< 8:38:54] +[titan] 2025-10-05 14:27:31,298 - root - INFO - step: 25905 loss: 2.0807 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8392 +[titan] 2025-10-05 14:27:31,299 - root - INFO - lr: 1.7660e-05 gnorm: 1.08 [15:53:20< 8:38:43] +[titan] 2025-10-05 14:27:42,163 - root - INFO - step: 25910 loss: 2.0892 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8454 +[titan] 2025-10-05 14:27:42,163 - root - INFO - lr: 1.7652e-05 gnorm: 1.12 [15:53:31< 8:38:31] +[titan] 2025-10-05 14:27:53,040 - root - INFO - step: 25915 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 14:27:53,041 - root - INFO - lr: 1.7644e-05 gnorm: 1.09 [15:53:42< 8:38:20] +[titan] 2025-10-05 14:28:03,938 - root - INFO - step: 25920 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 14:28:03,938 - root - INFO - lr: 1.7636e-05 gnorm: 1.05 [15:53:53< 8:38:09] +[titan] 2025-10-05 14:28:14,994 - root - INFO - step: 25925 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.18 mfu: 41.58% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8390 +[titan] 2025-10-05 14:28:14,995 - root - INFO - lr: 1.7628e-05 gnorm: 1.11 [15:54:04< 8:37:58] +[titan] 2025-10-05 14:28:25,864 - root - INFO - step: 25930 loss: 2.0995 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8550 +[titan] 2025-10-05 14:28:25,864 - root - INFO - lr: 1.7620e-05 gnorm: 1.09 [15:54:15< 8:37:47] +[titan] 2025-10-05 14:28:36,720 - root - INFO - step: 25935 loss: 2.0585 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8195 +[titan] 2025-10-05 14:28:36,720 - root - INFO - lr: 1.7612e-05 gnorm: 1.12 [15:54:26< 8:37:36] +[titan] 2025-10-05 14:28:47,595 - root - INFO - step: 25940 loss: 2.0654 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8257 +[titan] 2025-10-05 14:28:47,596 - root - INFO - lr: 1.7604e-05 gnorm: 1.13 [15:54:37< 8:37:25] +[titan] 2025-10-05 14:28:58,468 - root - INFO - step: 25945 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7629 +[titan] 2025-10-05 14:28:58,469 - root - INFO - lr: 1.7596e-05 gnorm: 1.11 [15:54:47< 8:37:14] +[titan] 2025-10-05 14:29:07,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:29:09,436 - root - INFO - step: 25950 loss: 2.0624 memory: 118.84GiB(85.28%) tps: 29,879 tflops: 414.52 mfu: 41.91% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:29:09,436 - root - INFO - lr: 1.7588e-05 gnorm: 1.14 [15:54:58< 8:37:03] +[titan] 2025-10-05 14:29:20,286 - root - INFO - step: 25955 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 14:29:20,286 - root - INFO - lr: 1.7580e-05 gnorm: 1.08 [15:55:09< 8:36:51] +[titan] 2025-10-05 14:29:31,140 - root - INFO - step: 25960 loss: 2.0279 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7922 +[titan] 2025-10-05 14:29:31,140 - root - INFO - lr: 1.7572e-05 gnorm: 1.08 [15:55:20< 8:36:40] +[titan] 2025-10-05 14:29:42,013 - root - INFO - step: 25965 loss: 2.0766 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2407 global_avg_mtp_loss: 1.8359 +[titan] 2025-10-05 14:29:42,013 - root - INFO - lr: 1.7564e-05 gnorm: 1.10 [15:55:31< 8:36:29] +[titan] 2025-10-05 14:29:52,914 - root - INFO - step: 25970 loss: 2.1034 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8595 +[titan] 2025-10-05 14:29:52,914 - root - INFO - lr: 1.7556e-05 gnorm: 1.06 [15:55:42< 8:36:18] +[titan] 2025-10-05 14:30:03,792 - root - INFO - step: 25975 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 14:30:03,792 - root - INFO - lr: 1.7548e-05 gnorm: 1.08 [15:55:53< 8:36:07] +[titan] 2025-10-05 14:30:14,715 - root - INFO - step: 25980 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8170 +[titan] 2025-10-05 14:30:14,715 - root - INFO - lr: 1.7540e-05 gnorm: 1.11 [15:56:04< 8:35:56] +[titan] 2025-10-05 14:30:25,638 - root - INFO - step: 25985 loss: 2.0484 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 14:30:25,638 - root - INFO - lr: 1.7532e-05 gnorm: 1.07 [15:56:15< 8:35:45] +[titan] 2025-10-05 14:30:36,501 - root - INFO - step: 25990 loss: 2.0665 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:30:36,501 - root - INFO - lr: 1.7524e-05 gnorm: 1.10 [15:56:25< 8:35:34] +[titan] 2025-10-05 14:30:47,379 - root - INFO - step: 25995 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7746 +[titan] 2025-10-05 14:30:47,379 - root - INFO - lr: 1.7516e-05 gnorm: 1.07 [15:56:36< 8:35:22] +[titan] 2025-10-05 14:30:56,083 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:30:58,259 - root - INFO - step: 26000 loss: 2.0535 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8148 +[titan] 2025-10-05 14:30:58,259 - root - INFO - lr: 1.7508e-05 gnorm: 1.14 [15:56:47< 8:35:11] +[titan] 2025-10-05 14:31:09,132 - root - INFO - step: 26005 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 14:31:09,132 - root - INFO - lr: 1.7500e-05 gnorm: 1.09 [15:56:58< 8:35:00] +[titan] 2025-10-05 14:31:20,058 - root - INFO - step: 26010 loss: 2.0243 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 14:31:20,058 - root - INFO - lr: 1.7492e-05 gnorm: 1.12 [15:57:09< 8:34:49] +[titan] 2025-10-05 14:31:30,973 - root - INFO - step: 26015 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:31:30,974 - root - INFO - lr: 1.7484e-05 gnorm: 1.10 [15:57:20< 8:34:38] +[titan] 2025-10-05 14:31:41,835 - root - INFO - step: 26020 loss: 2.0946 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8519 +[titan] 2025-10-05 14:31:41,835 - root - INFO - lr: 1.7476e-05 gnorm: 1.16 [15:57:31< 8:34:27] +[titan] 2025-10-05 14:31:52,692 - root - INFO - step: 26025 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8157 +[titan] 2025-10-05 14:31:52,692 - root - INFO - lr: 1.7468e-05 gnorm: 1.07 [15:57:42< 8:34:16] +[titan] 2025-10-05 14:32:03,561 - root - INFO - step: 26030 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 14:32:03,561 - root - INFO - lr: 1.7460e-05 gnorm: 1.09 [15:57:53< 8:34:05] +[titan] 2025-10-05 14:32:14,505 - root - INFO - step: 26035 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 29,944 tflops: 415.42 mfu: 42.00% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:32:14,505 - root - INFO - lr: 1.7452e-05 gnorm: 1.10 [15:58:03< 8:33:54] +[titan] 2025-10-05 14:32:25,388 - root - INFO - step: 26040 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2412 global_avg_mtp_loss: 1.8264 +[titan] 2025-10-05 14:32:25,388 - root - INFO - lr: 1.7444e-05 gnorm: 1.09 [15:58:14< 8:33:42] +[titan] 2025-10-05 14:32:36,316 - root - INFO - step: 26045 loss: 2.0763 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2421 global_avg_mtp_loss: 1.8342 +[titan] 2025-10-05 14:32:36,316 - root - INFO - lr: 1.7436e-05 gnorm: 1.10 [15:58:25< 8:33:31] +[titan] 2025-10-05 14:32:45,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:32:47,196 - root - INFO - step: 26050 loss: 2.0388 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 14:32:47,196 - root - INFO - lr: 1.7428e-05 gnorm: 1.08 [15:58:36< 8:33:20] +[titan] 2025-10-05 14:32:58,069 - root - INFO - step: 26055 loss: 2.0524 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8131 +[titan] 2025-10-05 14:32:58,069 - root - INFO - lr: 1.7420e-05 gnorm: 1.06 [15:58:47< 8:33:09] +[titan] 2025-10-05 14:33:08,972 - root - INFO - step: 26060 loss: 2.0150 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 14:33:08,972 - root - INFO - lr: 1.7412e-05 gnorm: 1.10 [15:58:58< 8:32:58] +[titan] 2025-10-05 14:33:19,953 - root - INFO - step: 26065 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,841 tflops: 414.00 mfu: 41.86% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:33:19,953 - root - INFO - lr: 1.7404e-05 gnorm: 1.12 [15:59:09< 8:32:47] +[titan] 2025-10-05 14:33:30,852 - root - INFO - step: 26070 loss: 2.0795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8380 +[titan] 2025-10-05 14:33:30,852 - root - INFO - lr: 1.7396e-05 gnorm: 1.11 [15:59:20< 8:32:36] +[titan] 2025-10-05 14:33:41,755 - root - INFO - step: 26075 loss: 2.0764 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8351 +[titan] 2025-10-05 14:33:41,755 - root - INFO - lr: 1.7388e-05 gnorm: 1.11 [15:59:31< 8:32:25] +[titan] 2025-10-05 14:33:52,678 - root - INFO - step: 26080 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.22 mfu: 42.08% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 14:33:52,678 - root - INFO - lr: 1.7380e-05 gnorm: 1.08 [15:59:42< 8:32:14] +[titan] 2025-10-05 14:34:03,540 - root - INFO - step: 26085 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8241 +[titan] 2025-10-05 14:34:03,540 - root - INFO - lr: 1.7372e-05 gnorm: 1.14 [15:59:52< 8:32:02] +[titan] 2025-10-05 14:34:14,468 - root - INFO - step: 26090 loss: 2.0497 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8118 +[titan] 2025-10-05 14:34:14,468 - root - INFO - lr: 1.7364e-05 gnorm: 1.11 [16:00:03< 8:31:51] +[titan] 2025-10-05 14:34:25,355 - root - INFO - step: 26095 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7776 +[titan] 2025-10-05 14:34:25,355 - root - INFO - lr: 1.7356e-05 gnorm: 1.07 [16:00:14< 8:31:40] +[titan] 2025-10-05 14:34:34,039 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:34:36,218 - root - INFO - step: 26100 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 14:34:36,218 - root - INFO - lr: 1.7348e-05 gnorm: 1.06 [16:00:25< 8:31:29] +[titan] 2025-10-05 14:34:47,106 - root - INFO - step: 26105 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 14:34:47,107 - root - INFO - lr: 1.7340e-05 gnorm: 1.06 [16:00:36< 8:31:18] +[titan] 2025-10-05 14:34:58,140 - root - INFO - step: 26110 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 29,699 tflops: 412.03 mfu: 41.66% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:34:58,140 - root - INFO - lr: 1.7332e-05 gnorm: 1.12 [16:00:47< 8:31:07] +[titan] 2025-10-05 14:35:02,666 - root - INFO - Dumping profiler traces at step 26112 +[titan] 2025-10-05 14:35:02,705 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:35:09,255 - root - INFO - step: 26115 loss: 2.0631 memory: 118.84GiB(85.28%) tps: 29,482 tflops: 409.02 mfu: 41.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8240 +[titan] 2025-10-05 14:35:09,255 - root - INFO - lr: 1.7324e-05 gnorm: 1.07 [16:00:58< 8:30:56] +[titan] 2025-10-05 14:35:20,180 - root - INFO - step: 26120 loss: 1.9396 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 14:35:20,180 - root - INFO - lr: 1.7316e-05 gnorm: 1.06 [16:01:09< 8:30:45] +[titan] 2025-10-05 14:35:31,056 - root - INFO - step: 26125 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 14:35:31,056 - root - INFO - lr: 1.7309e-05 gnorm: 1.12 [16:01:20< 8:30:34] +[titan] 2025-10-05 14:35:41,959 - root - INFO - step: 26130 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 14:35:41,959 - root - INFO - lr: 1.7301e-05 gnorm: 1.07 [16:01:31< 8:30:23] +[titan] 2025-10-05 14:35:52,846 - root - INFO - step: 26135 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7696 +[titan] 2025-10-05 14:35:52,846 - root - INFO - lr: 1.7293e-05 gnorm: 1.10 [16:01:42< 8:30:11] +[titan] 2025-10-05 14:36:03,715 - root - INFO - step: 26140 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7686 +[titan] 2025-10-05 14:36:03,715 - root - INFO - lr: 1.7285e-05 gnorm: 1.07 [16:01:53< 8:30:00] +[titan] 2025-10-05 14:36:14,674 - root - INFO - step: 26145 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.85 mfu: 41.95% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7630 +[titan] 2025-10-05 14:36:14,674 - root - INFO - lr: 1.7277e-05 gnorm: 1.05 [16:02:04< 8:29:49] +[titan] 2025-10-05 14:36:23,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:36:25,541 - root - INFO - step: 26150 loss: 2.1124 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8676 +[titan] 2025-10-05 14:36:25,541 - root - INFO - lr: 1.7269e-05 gnorm: 1.13 [16:02:14< 8:29:38] +[titan] 2025-10-05 14:36:36,415 - root - INFO - step: 26155 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:36:36,415 - root - INFO - lr: 1.7261e-05 gnorm: 1.12 [16:02:25< 8:29:27] +[titan] 2025-10-05 14:36:47,302 - root - INFO - step: 26160 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 14:36:47,302 - root - INFO - lr: 1.7253e-05 gnorm: 1.08 [16:02:36< 8:29:16] +[titan] 2025-10-05 14:36:58,194 - root - INFO - step: 26165 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 14:36:58,194 - root - INFO - lr: 1.7245e-05 gnorm: 1.08 [16:02:47< 8:29:05] +[titan] 2025-10-05 14:37:09,092 - root - INFO - step: 26170 loss: 2.1112 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2460 global_avg_mtp_loss: 1.8651 +[titan] 2025-10-05 14:37:09,092 - root - INFO - lr: 1.7237e-05 gnorm: 1.11 [16:02:58< 8:28:54] +[titan] 2025-10-05 14:37:20,018 - root - INFO - step: 26175 loss: 2.0516 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 14:37:20,018 - root - INFO - lr: 1.7229e-05 gnorm: 1.08 [16:03:09< 8:28:42] +[titan] 2025-10-05 14:37:30,897 - root - INFO - step: 26180 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:37:30,897 - root - INFO - lr: 1.7221e-05 gnorm: 1.09 [16:03:20< 8:28:31] +[titan] 2025-10-05 14:37:41,783 - root - INFO - step: 26185 loss: 2.0932 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2428 global_avg_mtp_loss: 1.8504 +[titan] 2025-10-05 14:37:41,783 - root - INFO - lr: 1.7213e-05 gnorm: 1.08 [16:03:31< 8:28:20] +[titan] 2025-10-05 14:37:52,662 - root - INFO - step: 26190 loss: 1.9604 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7331 +[titan] 2025-10-05 14:37:52,662 - root - INFO - lr: 1.7205e-05 gnorm: 1.08 [16:03:42< 8:28:09] +[titan] 2025-10-05 14:38:03,547 - root - INFO - step: 26195 loss: 2.0704 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 14:38:03,548 - root - INFO - lr: 1.7197e-05 gnorm: 1.08 [16:03:52< 8:27:58] +[titan] 2025-10-05 14:38:12,253 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:38:14,439 - root - INFO - step: 26200 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8117 +[titan] 2025-10-05 14:38:14,440 - root - INFO - lr: 1.7189e-05 gnorm: 1.06 [16:04:03< 8:27:47] +[titan] 2025-10-05 14:38:25,405 - root - INFO - step: 26205 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.58 mfu: 41.92% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 14:38:25,405 - root - INFO - lr: 1.7181e-05 gnorm: 1.10 [16:04:14< 8:27:36] +[titan] 2025-10-05 14:38:36,296 - root - INFO - step: 26210 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8205 +[titan] 2025-10-05 14:38:36,296 - root - INFO - lr: 1.7173e-05 gnorm: 1.08 [16:04:25< 8:27:25] +[titan] 2025-10-05 14:38:47,187 - root - INFO - step: 26215 loss: 2.1361 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2474 global_avg_mtp_loss: 1.8886 +[titan] 2025-10-05 14:38:47,187 - root - INFO - lr: 1.7166e-05 gnorm: 1.13 [16:04:36< 8:27:14] +[titan] 2025-10-05 14:38:58,082 - root - INFO - step: 26220 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 14:38:58,082 - root - INFO - lr: 1.7158e-05 gnorm: 1.07 [16:04:47< 8:27:02] +[titan] 2025-10-05 14:39:08,974 - root - INFO - step: 26225 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7872 +[titan] 2025-10-05 14:39:08,975 - root - INFO - lr: 1.7150e-05 gnorm: 1.06 [16:04:58< 8:26:51] +[titan] 2025-10-05 14:39:19,875 - root - INFO - step: 26230 loss: 1.9693 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 14:39:19,875 - root - INFO - lr: 1.7142e-05 gnorm: 1.07 [16:05:09< 8:26:40] +[titan] 2025-10-05 14:39:30,758 - root - INFO - step: 26235 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 14:39:30,759 - root - INFO - lr: 1.7134e-05 gnorm: 1.07 [16:05:20< 8:26:29] +[titan] 2025-10-05 14:39:41,666 - root - INFO - step: 26240 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7444 +[titan] 2025-10-05 14:39:41,666 - root - INFO - lr: 1.7126e-05 gnorm: 1.05 [16:05:31< 8:26:18] +[titan] 2025-10-05 14:39:52,544 - root - INFO - step: 26245 loss: 2.0594 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8198 +[titan] 2025-10-05 14:39:52,544 - root - INFO - lr: 1.7118e-05 gnorm: 1.08 [16:05:41< 8:26:07] +[titan] 2025-10-05 14:40:01,233 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:40:03,416 - root - INFO - step: 26250 loss: 2.0445 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8082 +[titan] 2025-10-05 14:40:03,416 - root - INFO - lr: 1.7110e-05 gnorm: 1.05 [16:05:52< 8:25:56] +[titan] 2025-10-05 14:40:14,284 - root - INFO - step: 26255 loss: 2.1779 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2536 global_avg_mtp_loss: 1.9243 +[titan] 2025-10-05 14:40:14,284 - root - INFO - lr: 1.7102e-05 gnorm: 1.15 [16:06:03< 8:25:45] +[titan] 2025-10-05 14:40:25,201 - root - INFO - step: 26260 loss: 2.1664 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2601 global_avg_mtp_loss: 1.9063 +[titan] 2025-10-05 14:40:25,201 - root - INFO - lr: 1.7094e-05 gnorm: 1.31 [16:06:14< 8:25:34] +[titan] 2025-10-05 14:40:36,067 - root - INFO - step: 26265 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 14:40:36,067 - root - INFO - lr: 1.7086e-05 gnorm: 1.07 [16:06:25< 8:25:22] +[titan] 2025-10-05 14:40:46,986 - root - INFO - step: 26270 loss: 2.0408 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 14:40:46,986 - root - INFO - lr: 1.7078e-05 gnorm: 1.12 [16:06:36< 8:25:11] +[titan] 2025-10-05 14:40:57,856 - root - INFO - step: 26275 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 14:40:57,856 - root - INFO - lr: 1.7071e-05 gnorm: 1.05 [16:06:47< 8:25:00] +[titan] 2025-10-05 14:41:08,711 - root - INFO - step: 26280 loss: 2.0822 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8421 +[titan] 2025-10-05 14:41:08,711 - root - INFO - lr: 1.7063e-05 gnorm: 1.08 [16:06:58< 8:24:49] +[titan] 2025-10-05 14:41:19,613 - root - INFO - step: 26285 loss: 2.0172 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 14:41:19,613 - root - INFO - lr: 1.7055e-05 gnorm: 1.10 [16:07:09< 8:24:38] +[titan] 2025-10-05 14:41:30,475 - root - INFO - step: 26290 loss: 2.0509 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8126 +[titan] 2025-10-05 14:41:30,476 - root - INFO - lr: 1.7047e-05 gnorm: 1.10 [16:07:19< 8:24:27] +[titan] 2025-10-05 14:41:41,346 - root - INFO - step: 26295 loss: 2.0334 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7973 +[titan] 2025-10-05 14:41:41,347 - root - INFO - lr: 1.7039e-05 gnorm: 1.04 [16:07:30< 8:24:16] +[titan] 2025-10-05 14:41:50,045 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:41:52,244 - root - INFO - step: 26300 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 14:41:52,245 - root - INFO - lr: 1.7031e-05 gnorm: 1.10 [16:07:41< 8:24:05] +[titan] 2025-10-05 14:42:03,172 - root - INFO - step: 26305 loss: 2.0574 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 14:42:03,172 - root - INFO - lr: 1.7023e-05 gnorm: 1.10 [16:07:52< 8:23:53] +[titan] 2025-10-05 14:42:14,032 - root - INFO - step: 26310 loss: 2.0276 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7924 +[titan] 2025-10-05 14:42:14,032 - root - INFO - lr: 1.7015e-05 gnorm: 1.10 [16:08:03< 8:23:42] +[titan] 2025-10-05 14:42:24,897 - root - INFO - step: 26315 loss: 2.0611 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8220 +[titan] 2025-10-05 14:42:24,897 - root - INFO - lr: 1.7007e-05 gnorm: 1.05 [16:08:14< 8:23:31] +[titan] 2025-10-05 14:42:35,737 - root - INFO - step: 26320 loss: 2.0287 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 14:42:35,737 - root - INFO - lr: 1.6999e-05 gnorm: 1.08 [16:08:25< 8:23:20] +[titan] 2025-10-05 14:42:46,593 - root - INFO - step: 26325 loss: 2.0775 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8364 +[titan] 2025-10-05 14:42:46,593 - root - INFO - lr: 1.6992e-05 gnorm: 1.11 [16:08:36< 8:23:09] +[titan] 2025-10-05 14:42:57,467 - root - INFO - step: 26330 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 14:42:57,467 - root - INFO - lr: 1.6984e-05 gnorm: 1.05 [16:08:46< 8:22:58] +[titan] 2025-10-05 14:43:08,377 - root - INFO - step: 26335 loss: 2.0206 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7862 +[titan] 2025-10-05 14:43:08,377 - root - INFO - lr: 1.6976e-05 gnorm: 1.10 [16:08:57< 8:22:47] +[titan] 2025-10-05 14:43:19,276 - root - INFO - step: 26340 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:43:19,276 - root - INFO - lr: 1.6968e-05 gnorm: 1.09 [16:09:08< 8:22:36] +[titan] 2025-10-05 14:43:30,150 - root - INFO - step: 26345 loss: 2.1103 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2503 global_avg_mtp_loss: 1.8600 +[titan] 2025-10-05 14:43:30,150 - root - INFO - lr: 1.6960e-05 gnorm: 1.09 [16:09:19< 8:22:24] +[titan] 2025-10-05 14:43:38,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:43:41,032 - root - INFO - step: 26350 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 14:43:41,033 - root - INFO - lr: 1.6952e-05 gnorm: 1.12 [16:09:30< 8:22:13] +[titan] 2025-10-05 14:43:51,910 - root - INFO - step: 26355 loss: 2.0554 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 14:43:51,911 - root - INFO - lr: 1.6944e-05 gnorm: 1.10 [16:09:41< 8:22:02] +[titan] 2025-10-05 14:44:02,775 - root - INFO - step: 26360 loss: 2.1105 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2442 global_avg_mtp_loss: 1.8664 +[titan] 2025-10-05 14:44:02,776 - root - INFO - lr: 1.6936e-05 gnorm: 1.11 [16:09:52< 8:21:51] +[titan] 2025-10-05 14:44:13,677 - root - INFO - step: 26365 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 14:44:13,678 - root - INFO - lr: 1.6928e-05 gnorm: 1.09 [16:10:03< 8:21:40] +[titan] 2025-10-05 14:44:24,544 - root - INFO - step: 26370 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 14:44:24,544 - root - INFO - lr: 1.6921e-05 gnorm: 1.08 [16:10:13< 8:21:29] +[titan] 2025-10-05 14:44:35,405 - root - INFO - step: 26375 loss: 2.0563 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8183 +[titan] 2025-10-05 14:44:35,405 - root - INFO - lr: 1.6913e-05 gnorm: 1.09 [16:10:24< 8:21:18] +[titan] 2025-10-05 14:44:46,277 - root - INFO - step: 26380 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 14:44:46,277 - root - INFO - lr: 1.6905e-05 gnorm: 1.10 [16:10:35< 8:21:07] +[titan] 2025-10-05 14:44:57,156 - root - INFO - step: 26385 loss: 2.1031 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8585 +[titan] 2025-10-05 14:44:57,156 - root - INFO - lr: 1.6897e-05 gnorm: 1.09 [16:10:46< 8:20:55] +[titan] 2025-10-05 14:45:07,991 - root - INFO - step: 26390 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 14:45:07,991 - root - INFO - lr: 1.6889e-05 gnorm: 1.09 [16:10:57< 8:20:44] +[titan] 2025-10-05 14:45:18,850 - root - INFO - step: 26395 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:45:18,850 - root - INFO - lr: 1.6881e-05 gnorm: 1.09 [16:11:08< 8:20:33] +[titan] 2025-10-05 14:45:27,579 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:45:29,758 - root - INFO - step: 26400 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 14:45:29,758 - root - INFO - lr: 1.6873e-05 gnorm: 1.09 [16:11:19< 8:20:22] +[titan] 2025-10-05 14:45:40,628 - root - INFO - step: 26405 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8074 +[titan] 2025-10-05 14:45:40,628 - root - INFO - lr: 1.6865e-05 gnorm: 1.09 [16:11:30< 8:20:11] +[titan] 2025-10-05 14:45:51,472 - root - INFO - step: 26410 loss: 2.0493 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8125 +[titan] 2025-10-05 14:45:51,472 - root - INFO - lr: 1.6858e-05 gnorm: 1.09 [16:11:40< 8:20:00] +[titan] 2025-10-05 14:46:02,329 - root - INFO - step: 26415 loss: 2.0718 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8312 +[titan] 2025-10-05 14:46:02,329 - root - INFO - lr: 1.6850e-05 gnorm: 1.09 [16:11:51< 8:19:49] +[titan] 2025-10-05 14:46:13,208 - root - INFO - step: 26420 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 14:46:13,208 - root - INFO - lr: 1.6842e-05 gnorm: 1.09 [16:12:02< 8:19:38] +[titan] 2025-10-05 14:46:24,077 - root - INFO - step: 26425 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 14:46:24,077 - root - INFO - lr: 1.6834e-05 gnorm: 1.09 [16:12:13< 8:19:26] +[titan] 2025-10-05 14:46:34,964 - root - INFO - step: 26430 loss: 2.0577 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8192 +[titan] 2025-10-05 14:46:34,964 - root - INFO - lr: 1.6826e-05 gnorm: 1.08 [16:12:24< 8:19:15] +[titan] 2025-10-05 14:46:45,809 - root - INFO - step: 26435 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 14:46:45,810 - root - INFO - lr: 1.6818e-05 gnorm: 1.08 [16:12:35< 8:19:04] +[titan] 2025-10-05 14:46:56,653 - root - INFO - step: 26440 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 14:46:56,653 - root - INFO - lr: 1.6810e-05 gnorm: 1.07 [16:12:46< 8:18:53] +[titan] 2025-10-05 14:47:07,510 - root - INFO - step: 26445 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 14:47:07,511 - root - INFO - lr: 1.6803e-05 gnorm: 1.09 [16:12:56< 8:18:42] +[titan] 2025-10-05 14:47:16,211 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:47:18,390 - root - INFO - step: 26450 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 14:47:18,390 - root - INFO - lr: 1.6795e-05 gnorm: 1.07 [16:13:07< 8:18:31] +[titan] 2025-10-05 14:47:29,255 - root - INFO - step: 26455 loss: 2.0450 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:47:29,255 - root - INFO - lr: 1.6787e-05 gnorm: 1.10 [16:13:18< 8:18:20] +[titan] 2025-10-05 14:47:40,123 - root - INFO - step: 26460 loss: 2.0742 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2410 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 14:47:40,123 - root - INFO - lr: 1.6779e-05 gnorm: 1.14 [16:13:29< 8:18:09] +[titan] 2025-10-05 14:47:51,023 - root - INFO - step: 26465 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 14:47:51,023 - root - INFO - lr: 1.6771e-05 gnorm: 1.10 [16:13:40< 8:17:57] +[titan] 2025-10-05 14:48:01,888 - root - INFO - step: 26470 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 14:48:01,888 - root - INFO - lr: 1.6763e-05 gnorm: 1.05 [16:13:51< 8:17:46] +[titan] 2025-10-05 14:48:12,750 - root - INFO - step: 26475 loss: 2.0854 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8435 +[titan] 2025-10-05 14:48:12,750 - root - INFO - lr: 1.6756e-05 gnorm: 1.10 [16:14:02< 8:17:35] +[titan] 2025-10-05 14:48:23,596 - root - INFO - step: 26480 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 14:48:23,597 - root - INFO - lr: 1.6748e-05 gnorm: 1.05 [16:14:13< 8:17:24] +[titan] 2025-10-05 14:48:34,475 - root - INFO - step: 26485 loss: 2.0429 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:48:34,475 - root - INFO - lr: 1.6740e-05 gnorm: 1.11 [16:14:23< 8:17:13] +[titan] 2025-10-05 14:48:45,347 - root - INFO - step: 26490 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8314 +[titan] 2025-10-05 14:48:45,348 - root - INFO - lr: 1.6732e-05 gnorm: 1.11 [16:14:34< 8:17:02] +[titan] 2025-10-05 14:48:56,251 - root - INFO - step: 26495 loss: 2.1088 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8640 +[titan] 2025-10-05 14:48:56,252 - root - INFO - lr: 1.6724e-05 gnorm: 1.15 [16:14:45< 8:16:51] +[titan] 2025-10-05 14:49:04,918 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:49:07,097 - root - INFO - step: 26500 loss: 2.0947 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2439 global_avg_mtp_loss: 1.8508 +[titan] 2025-10-05 14:49:07,097 - root - INFO - lr: 1.6716e-05 gnorm: 1.14 [16:14:56< 8:16:40] +[titan] 2025-10-05 14:49:17,975 - root - INFO - step: 26505 loss: 2.0105 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 14:49:17,976 - root - INFO - lr: 1.6709e-05 gnorm: 1.12 [16:15:07< 8:16:28] +[titan] 2025-10-05 14:49:28,870 - root - INFO - step: 26510 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7377 +[titan] 2025-10-05 14:49:28,870 - root - INFO - lr: 1.6701e-05 gnorm: 1.05 [16:15:18< 8:16:17] +[titan] 2025-10-05 14:49:39,744 - root - INFO - step: 26515 loss: 2.0774 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2405 global_avg_mtp_loss: 1.8369 +[titan] 2025-10-05 14:49:39,744 - root - INFO - lr: 1.6693e-05 gnorm: 1.14 [16:15:29< 8:16:06] +[titan] 2025-10-05 14:49:50,606 - root - INFO - step: 26520 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 14:49:50,606 - root - INFO - lr: 1.6685e-05 gnorm: 1.11 [16:15:40< 8:15:55] +[titan] 2025-10-05 14:50:01,497 - root - INFO - step: 26525 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8067 +[titan] 2025-10-05 14:50:01,497 - root - INFO - lr: 1.6677e-05 gnorm: 1.07 [16:15:50< 8:15:44] +[titan] 2025-10-05 14:50:12,351 - root - INFO - step: 26530 loss: 2.0439 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8075 +[titan] 2025-10-05 14:50:12,351 - root - INFO - lr: 1.6669e-05 gnorm: 1.11 [16:16:01< 8:15:33] +[titan] 2025-10-05 14:50:23,197 - root - INFO - step: 26535 loss: 2.0146 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 14:50:23,197 - root - INFO - lr: 1.6662e-05 gnorm: 1.28 [16:16:12< 8:15:22] +[titan] 2025-10-05 14:50:34,070 - root - INFO - step: 26540 loss: 2.0363 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8009 +[titan] 2025-10-05 14:50:34,070 - root - INFO - lr: 1.6654e-05 gnorm: 1.09 [16:16:23< 8:15:11] +[titan] 2025-10-05 14:50:44,935 - root - INFO - step: 26545 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 14:50:44,935 - root - INFO - lr: 1.6646e-05 gnorm: 1.07 [16:16:34< 8:15:00] +[titan] 2025-10-05 14:50:53,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:50:55,778 - root - INFO - step: 26550 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 14:50:55,778 - root - INFO - lr: 1.6638e-05 gnorm: 1.07 [16:16:45< 8:14:48] +[titan] 2025-10-05 14:51:06,624 - root - INFO - step: 26555 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 14:51:06,625 - root - INFO - lr: 1.6630e-05 gnorm: 1.11 [16:16:56< 8:14:37] +[titan] 2025-10-05 14:51:17,534 - root - INFO - step: 26560 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 14:51:17,534 - root - INFO - lr: 1.6622e-05 gnorm: 1.12 [16:17:06< 8:14:26] +[titan] 2025-10-05 14:51:28,410 - root - INFO - step: 26565 loss: 2.1178 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2477 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 14:51:28,410 - root - INFO - lr: 1.6615e-05 gnorm: 1.09 [16:17:17< 8:14:15] +[titan] 2025-10-05 14:51:39,262 - root - INFO - step: 26570 loss: 2.0251 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7911 +[titan] 2025-10-05 14:51:39,262 - root - INFO - lr: 1.6607e-05 gnorm: 1.11 [16:17:28< 8:14:04] +[titan] 2025-10-05 14:51:50,113 - root - INFO - step: 26575 loss: 2.1052 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2444 global_avg_mtp_loss: 1.8608 +[titan] 2025-10-05 14:51:50,113 - root - INFO - lr: 1.6599e-05 gnorm: 1.15 [16:17:39< 8:13:53] +[titan] 2025-10-05 14:52:00,978 - root - INFO - step: 26580 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8185 +[titan] 2025-10-05 14:52:00,978 - root - INFO - lr: 1.6591e-05 gnorm: 1.09 [16:17:50< 8:13:42] +[titan] 2025-10-05 14:52:11,826 - root - INFO - step: 26585 loss: 2.0519 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 14:52:11,826 - root - INFO - lr: 1.6583e-05 gnorm: 1.14 [16:18:01< 8:13:31] +[titan] 2025-10-05 14:52:22,714 - root - INFO - step: 26590 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8159 +[titan] 2025-10-05 14:52:22,714 - root - INFO - lr: 1.6576e-05 gnorm: 1.09 [16:18:12< 8:13:19] +[titan] 2025-10-05 14:52:33,578 - root - INFO - step: 26595 loss: 2.0442 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 14:52:33,578 - root - INFO - lr: 1.6568e-05 gnorm: 1.08 [16:18:22< 8:13:08] +[titan] 2025-10-05 14:52:42,211 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:52:44,378 - root - INFO - step: 26600 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,340 tflops: 420.92 mfu: 42.56% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 14:52:44,379 - root - INFO - lr: 1.6560e-05 gnorm: 1.10 [16:18:33< 8:12:57] +[titan] 2025-10-05 14:52:55,225 - root - INFO - step: 26605 loss: 2.0502 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8120 +[titan] 2025-10-05 14:52:55,225 - root - INFO - lr: 1.6552e-05 gnorm: 1.10 [16:18:44< 8:12:46] +[titan] 2025-10-05 14:53:06,069 - root - INFO - step: 26610 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:53:06,070 - root - INFO - lr: 1.6544e-05 gnorm: 1.07 [16:18:55< 8:12:35] +[titan] 2025-10-05 14:53:16,896 - root - INFO - step: 26615 loss: 2.0629 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8239 +[titan] 2025-10-05 14:53:16,896 - root - INFO - lr: 1.6537e-05 gnorm: 1.08 [16:19:06< 8:12:24] +[titan] 2025-10-05 14:53:27,734 - root - INFO - step: 26620 loss: 2.0045 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7725 +[titan] 2025-10-05 14:53:27,734 - root - INFO - lr: 1.6529e-05 gnorm: 1.15 [16:19:17< 8:12:13] +[titan] 2025-10-05 14:53:36,734 - root - INFO - Dumping profiler traces at step 26624 +[titan] 2025-10-05 14:53:36,775 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 14:53:38,970 - root - INFO - step: 26625 loss: 2.0899 memory: 118.84GiB(85.28%) tps: 29,164 tflops: 404.60 mfu: 40.91% global_avg_ntp_loss: 0.2433 global_avg_mtp_loss: 1.8466 +[titan] 2025-10-05 14:53:38,971 - root - INFO - lr: 1.6521e-05 gnorm: 1.13 [16:19:28< 8:12:02] +[titan] 2025-10-05 14:53:49,827 - root - INFO - step: 26630 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 14:53:49,827 - root - INFO - lr: 1.6513e-05 gnorm: 1.09 [16:19:39< 8:11:51] +[titan] 2025-10-05 14:54:00,657 - root - INFO - step: 26635 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,259 tflops: 419.80 mfu: 42.45% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7901 +[titan] 2025-10-05 14:54:00,657 - root - INFO - lr: 1.6505e-05 gnorm: 1.12 [16:19:50< 8:11:39] +[titan] 2025-10-05 14:54:11,514 - root - INFO - step: 26640 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8209 +[titan] 2025-10-05 14:54:11,514 - root - INFO - lr: 1.6498e-05 gnorm: 1.10 [16:20:00< 8:11:28] +[titan] 2025-10-05 14:54:22,378 - root - INFO - step: 26645 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 14:54:22,378 - root - INFO - lr: 1.6490e-05 gnorm: 1.06 [16:20:11< 8:11:17] +[titan] 2025-10-05 14:54:31,073 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:54:33,259 - root - INFO - step: 26650 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8377 +[titan] 2025-10-05 14:54:33,259 - root - INFO - lr: 1.6482e-05 gnorm: 1.12 [16:20:22< 8:11:06] +[titan] 2025-10-05 14:54:44,181 - root - INFO - step: 26655 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 14:54:44,181 - root - INFO - lr: 1.6474e-05 gnorm: 1.10 [16:20:33< 8:10:55] +[titan] 2025-10-05 14:54:55,045 - root - INFO - step: 26660 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:54:55,046 - root - INFO - lr: 1.6467e-05 gnorm: 1.09 [16:20:44< 8:10:44] +[titan] 2025-10-05 14:55:05,921 - root - INFO - step: 26665 loss: 2.0911 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8484 +[titan] 2025-10-05 14:55:05,921 - root - INFO - lr: 1.6459e-05 gnorm: 1.12 [16:20:55< 8:10:33] +[titan] 2025-10-05 14:55:16,810 - root - INFO - step: 26670 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 14:55:16,810 - root - INFO - lr: 1.6451e-05 gnorm: 1.08 [16:21:06< 8:10:22] +[titan] 2025-10-05 14:55:27,678 - root - INFO - step: 26675 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 14:55:27,679 - root - INFO - lr: 1.6443e-05 gnorm: 1.09 [16:21:17< 8:10:10] +[titan] 2025-10-05 14:55:38,537 - root - INFO - step: 26680 loss: 2.0488 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 14:55:38,537 - root - INFO - lr: 1.6435e-05 gnorm: 1.10 [16:21:27< 8:09:59] +[titan] 2025-10-05 14:55:49,438 - root - INFO - step: 26685 loss: 2.0107 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 14:55:49,439 - root - INFO - lr: 1.6428e-05 gnorm: 1.11 [16:21:38< 8:09:48] +[titan] 2025-10-05 14:56:00,304 - root - INFO - step: 26690 loss: 2.0743 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 14:56:00,304 - root - INFO - lr: 1.6420e-05 gnorm: 1.11 [16:21:49< 8:09:37] +[titan] 2025-10-05 14:56:11,149 - root - INFO - step: 26695 loss: 2.0415 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8035 +[titan] 2025-10-05 14:56:11,149 - root - INFO - lr: 1.6412e-05 gnorm: 1.10 [16:22:00< 8:09:26] +[titan] 2025-10-05 14:56:19,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:56:22,000 - root - INFO - step: 26700 loss: 2.0496 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8111 +[titan] 2025-10-05 14:56:22,000 - root - INFO - lr: 1.6404e-05 gnorm: 1.07 [16:22:11< 8:09:15] +[titan] 2025-10-05 14:56:32,858 - root - INFO - step: 26705 loss: 1.9909 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 14:56:32,858 - root - INFO - lr: 1.6397e-05 gnorm: 1.07 [16:22:22< 8:09:04] +[titan] 2025-10-05 14:56:43,728 - root - INFO - step: 26710 loss: 2.1246 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2461 global_avg_mtp_loss: 1.8785 +[titan] 2025-10-05 14:56:43,729 - root - INFO - lr: 1.6389e-05 gnorm: 1.12 [16:22:33< 8:08:53] +[titan] 2025-10-05 14:56:54,594 - root - INFO - step: 26715 loss: 2.0194 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 14:56:54,594 - root - INFO - lr: 1.6381e-05 gnorm: 1.08 [16:22:43< 8:08:41] +[titan] 2025-10-05 14:57:05,497 - root - INFO - step: 26720 loss: 1.9969 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 14:57:05,497 - root - INFO - lr: 1.6373e-05 gnorm: 1.08 [16:22:54< 8:08:30] +[titan] 2025-10-05 14:57:16,361 - root - INFO - step: 26725 loss: 2.0885 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 14:57:16,361 - root - INFO - lr: 1.6366e-05 gnorm: 1.07 [16:23:05< 8:08:19] +[titan] 2025-10-05 14:57:27,210 - root - INFO - step: 26730 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 14:57:27,211 - root - INFO - lr: 1.6358e-05 gnorm: 1.10 [16:23:16< 8:08:08] +[titan] 2025-10-05 14:57:38,049 - root - INFO - step: 26735 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 14:57:38,050 - root - INFO - lr: 1.6350e-05 gnorm: 1.11 [16:23:27< 8:07:57] +[titan] 2025-10-05 14:57:48,918 - root - INFO - step: 26740 loss: 2.0984 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8556 +[titan] 2025-10-05 14:57:48,918 - root - INFO - lr: 1.6342e-05 gnorm: 1.15 [16:23:38< 8:07:46] +[titan] 2025-10-05 14:57:59,773 - root - INFO - step: 26745 loss: 2.0328 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 14:57:59,773 - root - INFO - lr: 1.6335e-05 gnorm: 1.10 [16:23:49< 8:07:35] +[titan] 2025-10-05 14:58:08,499 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:58:10,688 - root - INFO - step: 26750 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 14:58:10,688 - root - INFO - lr: 1.6327e-05 gnorm: 1.10 [16:24:00< 8:07:24] +[titan] 2025-10-05 14:58:21,558 - root - INFO - step: 26755 loss: 2.0183 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 14:58:21,559 - root - INFO - lr: 1.6319e-05 gnorm: 1.10 [16:24:10< 8:07:13] +[titan] 2025-10-05 14:58:32,424 - root - INFO - step: 26760 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 14:58:32,425 - root - INFO - lr: 1.6311e-05 gnorm: 1.08 [16:24:21< 8:07:01] +[titan] 2025-10-05 14:58:43,310 - root - INFO - step: 26765 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:58:43,310 - root - INFO - lr: 1.6304e-05 gnorm: 1.07 [16:24:32< 8:06:50] +[titan] 2025-10-05 14:58:54,204 - root - INFO - step: 26770 loss: 1.9863 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7566 +[titan] 2025-10-05 14:58:54,204 - root - INFO - lr: 1.6296e-05 gnorm: 1.08 [16:24:43< 8:06:39] +[titan] 2025-10-05 14:59:05,077 - root - INFO - step: 26775 loss: 2.0390 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 14:59:05,077 - root - INFO - lr: 1.6288e-05 gnorm: 1.09 [16:24:54< 8:06:28] +[titan] 2025-10-05 14:59:15,970 - root - INFO - step: 26780 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 14:59:15,970 - root - INFO - lr: 1.6280e-05 gnorm: 1.13 [16:25:05< 8:06:17] +[titan] 2025-10-05 14:59:26,894 - root - INFO - step: 26785 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 14:59:26,894 - root - INFO - lr: 1.6273e-05 gnorm: 1.11 [16:25:16< 8:06:06] +[titan] 2025-10-05 14:59:37,753 - root - INFO - step: 26790 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 14:59:37,753 - root - INFO - lr: 1.6265e-05 gnorm: 1.03 [16:25:27< 8:05:55] +[titan] 2025-10-05 14:59:48,629 - root - INFO - step: 26795 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7989 +[titan] 2025-10-05 14:59:48,629 - root - INFO - lr: 1.6257e-05 gnorm: 1.08 [16:25:38< 8:05:44] +[titan] 2025-10-05 14:59:57,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 14:59:59,514 - root - INFO - step: 26800 loss: 1.9889 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 14:59:59,514 - root - INFO - lr: 1.6249e-05 gnorm: 1.12 [16:25:48< 8:05:33] +[titan] 2025-10-05 15:00:10,404 - root - INFO - step: 26805 loss: 2.0576 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8193 +[titan] 2025-10-05 15:00:10,405 - root - INFO - lr: 1.6242e-05 gnorm: 1.09 [16:25:59< 8:05:21] +[titan] 2025-10-05 15:00:21,298 - root - INFO - step: 26810 loss: 2.0441 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8079 +[titan] 2025-10-05 15:00:21,298 - root - INFO - lr: 1.6234e-05 gnorm: 1.09 [16:26:10< 8:05:10] +[titan] 2025-10-05 15:00:32,228 - root - INFO - step: 26815 loss: 2.0782 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.95 mfu: 42.06% global_avg_ntp_loss: 0.2411 global_avg_mtp_loss: 1.8371 +[titan] 2025-10-05 15:00:32,228 - root - INFO - lr: 1.6226e-05 gnorm: 1.15 [16:26:21< 8:04:59] +[titan] 2025-10-05 15:00:43,121 - root - INFO - step: 26820 loss: 2.0556 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8172 +[titan] 2025-10-05 15:00:43,121 - root - INFO - lr: 1.6219e-05 gnorm: 1.10 [16:26:32< 8:04:48] +[titan] 2025-10-05 15:00:54,008 - root - INFO - step: 26825 loss: 2.0473 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8094 +[titan] 2025-10-05 15:00:54,008 - root - INFO - lr: 1.6211e-05 gnorm: 1.16 [16:26:43< 8:04:37] +[titan] 2025-10-05 15:01:04,889 - root - INFO - step: 26830 loss: 2.0024 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 15:01:04,889 - root - INFO - lr: 1.6203e-05 gnorm: 1.09 [16:26:54< 8:04:26] +[titan] 2025-10-05 15:01:15,765 - root - INFO - step: 26835 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 15:01:15,765 - root - INFO - lr: 1.6195e-05 gnorm: 1.07 [16:27:05< 8:04:15] +[titan] 2025-10-05 15:01:26,630 - root - INFO - step: 26840 loss: 2.0420 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2370 global_avg_mtp_loss: 1.8050 +[titan] 2025-10-05 15:01:26,631 - root - INFO - lr: 1.6188e-05 gnorm: 1.11 [16:27:16< 8:04:04] +[titan] 2025-10-05 15:01:37,602 - root - INFO - step: 26845 loss: 2.0161 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.37 mfu: 41.90% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:01:37,602 - root - INFO - lr: 1.6180e-05 gnorm: 1.12 [16:27:26< 8:03:53] +[titan] 2025-10-05 15:01:46,305 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:01:48,489 - root - INFO - step: 26850 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7971 +[titan] 2025-10-05 15:01:48,490 - root - INFO - lr: 1.6172e-05 gnorm: 1.08 [16:27:37< 8:03:42] +[titan] 2025-10-05 15:01:59,381 - root - INFO - step: 26855 loss: 2.0392 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 15:01:59,382 - root - INFO - lr: 1.6165e-05 gnorm: 1.07 [16:27:48< 8:03:30] +[titan] 2025-10-05 15:02:10,248 - root - INFO - step: 26860 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:02:10,248 - root - INFO - lr: 1.6157e-05 gnorm: 1.10 [16:27:59< 8:03:19] +[titan] 2025-10-05 15:02:21,138 - root - INFO - step: 26865 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 15:02:21,139 - root - INFO - lr: 1.6149e-05 gnorm: 1.12 [16:28:10< 8:03:08] +[titan] 2025-10-05 15:02:32,019 - root - INFO - step: 26870 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8281 +[titan] 2025-10-05 15:02:32,019 - root - INFO - lr: 1.6141e-05 gnorm: 1.07 [16:28:21< 8:02:57] +[titan] 2025-10-05 15:02:42,942 - root - INFO - step: 26875 loss: 2.0517 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8145 +[titan] 2025-10-05 15:02:42,942 - root - INFO - lr: 1.6134e-05 gnorm: 1.12 [16:28:32< 8:02:46] +[titan] 2025-10-05 15:02:53,877 - root - INFO - step: 26880 loss: 2.0800 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2416 global_avg_mtp_loss: 1.8384 +[titan] 2025-10-05 15:02:53,877 - root - INFO - lr: 1.6126e-05 gnorm: 1.13 [16:28:43< 8:02:35] +[titan] 2025-10-05 15:03:04,754 - root - INFO - step: 26885 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 15:03:04,754 - root - INFO - lr: 1.6118e-05 gnorm: 1.16 [16:28:54< 8:02:24] +[titan] 2025-10-05 15:03:15,633 - root - INFO - step: 26890 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:03:15,633 - root - INFO - lr: 1.6111e-05 gnorm: 1.11 [16:29:05< 8:02:13] +[titan] 2025-10-05 15:03:26,500 - root - INFO - step: 26895 loss: 2.0231 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 15:03:26,500 - root - INFO - lr: 1.6103e-05 gnorm: 1.12 [16:29:15< 8:02:02] +[titan] 2025-10-05 15:03:35,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:03:37,367 - root - INFO - step: 26900 loss: 2.0325 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7961 +[titan] 2025-10-05 15:03:37,368 - root - INFO - lr: 1.6095e-05 gnorm: 1.11 [16:29:26< 8:01:50] +[titan] 2025-10-05 15:03:48,288 - root - INFO - step: 26905 loss: 2.0322 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7960 +[titan] 2025-10-05 15:03:48,289 - root - INFO - lr: 1.6088e-05 gnorm: 1.12 [16:29:37< 8:01:39] +[titan] 2025-10-05 15:03:59,203 - root - INFO - step: 26910 loss: 2.1108 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.8659 +[titan] 2025-10-05 15:03:59,203 - root - INFO - lr: 1.6080e-05 gnorm: 1.17 [16:29:48< 8:01:28] +[titan] 2025-10-05 15:04:10,072 - root - INFO - step: 26915 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 15:04:10,072 - root - INFO - lr: 1.6072e-05 gnorm: 1.08 [16:29:59< 8:01:17] +[titan] 2025-10-05 15:04:20,947 - root - INFO - step: 26920 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8313 +[titan] 2025-10-05 15:04:20,948 - root - INFO - lr: 1.6065e-05 gnorm: 1.11 [16:30:10< 8:01:06] +[titan] 2025-10-05 15:04:31,818 - root - INFO - step: 26925 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:04:31,818 - root - INFO - lr: 1.6057e-05 gnorm: 1.10 [16:30:21< 8:00:55] +[titan] 2025-10-05 15:04:42,737 - root - INFO - step: 26930 loss: 1.9755 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.38 mfu: 42.10% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 15:04:42,737 - root - INFO - lr: 1.6049e-05 gnorm: 1.07 [16:30:32< 8:00:44] +[titan] 2025-10-05 15:04:53,614 - root - INFO - step: 26935 loss: 2.1297 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2475 global_avg_mtp_loss: 1.8822 +[titan] 2025-10-05 15:04:53,614 - root - INFO - lr: 1.6041e-05 gnorm: 1.10 [16:30:42< 8:00:33] +[titan] 2025-10-05 15:05:04,493 - root - INFO - step: 26940 loss: 2.0686 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8289 +[titan] 2025-10-05 15:05:04,493 - root - INFO - lr: 1.6034e-05 gnorm: 1.16 [16:30:53< 8:00:22] +[titan] 2025-10-05 15:05:15,413 - root - INFO - step: 26945 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 15:05:15,413 - root - INFO - lr: 1.6026e-05 gnorm: 1.09 [16:31:04< 8:00:11] +[titan] 2025-10-05 15:05:24,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:05:26,301 - root - INFO - step: 26950 loss: 2.0262 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:05:26,302 - root - INFO - lr: 1.6018e-05 gnorm: 1.13 [16:31:15< 7:59:59] +[titan] 2025-10-05 15:05:37,170 - root - INFO - step: 26955 loss: 2.0365 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:05:37,170 - root - INFO - lr: 1.6011e-05 gnorm: 1.07 [16:31:26< 7:59:48] +[titan] 2025-10-05 15:05:48,097 - root - INFO - step: 26960 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8139 +[titan] 2025-10-05 15:05:48,097 - root - INFO - lr: 1.6003e-05 gnorm: 1.11 [16:31:37< 7:59:37] +[titan] 2025-10-05 15:05:58,956 - root - INFO - step: 26965 loss: 2.0670 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8269 +[titan] 2025-10-05 15:05:58,956 - root - INFO - lr: 1.5995e-05 gnorm: 1.13 [16:31:48< 7:59:26] +[titan] 2025-10-05 15:06:09,830 - root - INFO - step: 26970 loss: 1.9712 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 15:06:09,830 - root - INFO - lr: 1.5988e-05 gnorm: 1.09 [16:31:59< 7:59:15] +[titan] 2025-10-05 15:06:20,738 - root - INFO - step: 26975 loss: 2.0820 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8407 +[titan] 2025-10-05 15:06:20,738 - root - INFO - lr: 1.5980e-05 gnorm: 1.14 [16:32:10< 7:59:04] +[titan] 2025-10-05 15:06:31,607 - root - INFO - step: 26980 loss: 2.0877 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8445 +[titan] 2025-10-05 15:06:31,607 - root - INFO - lr: 1.5972e-05 gnorm: 1.13 [16:32:20< 7:58:53] +[titan] 2025-10-05 15:06:42,557 - root - INFO - step: 26985 loss: 2.1177 memory: 118.84GiB(85.28%) tps: 29,926 tflops: 415.18 mfu: 41.98% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8708 +[titan] 2025-10-05 15:06:42,557 - root - INFO - lr: 1.5965e-05 gnorm: 1.13 [16:32:31< 7:58:42] +[titan] 2025-10-05 15:06:53,430 - root - INFO - step: 26990 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:06:53,431 - root - INFO - lr: 1.5957e-05 gnorm: 1.11 [16:32:42< 7:58:31] +[titan] 2025-10-05 15:07:04,312 - root - INFO - step: 26995 loss: 2.1015 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8577 +[titan] 2025-10-05 15:07:04,312 - root - INFO - lr: 1.5949e-05 gnorm: 1.11 [16:32:53< 7:58:19] +[titan] 2025-10-05 15:07:13,022 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:07:15,207 - root - INFO - step: 27000 loss: 2.0677 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8283 +[titan] 2025-10-05 15:07:15,207 - root - INFO - lr: 1.5942e-05 gnorm: 1.13 [16:33:04< 7:58:08] +[titan] 2025-10-05 15:07:26,138 - root - INFO - step: 27005 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:07:26,138 - root - INFO - lr: 1.5934e-05 gnorm: 1.14 [16:33:15< 7:57:57] +[titan] 2025-10-05 15:07:37,028 - root - INFO - step: 27010 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8348 +[titan] 2025-10-05 15:07:37,028 - root - INFO - lr: 1.5926e-05 gnorm: 1.14 [16:33:26< 7:57:46] +[titan] 2025-10-05 15:07:47,970 - root - INFO - step: 27015 loss: 2.0050 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 15:07:47,971 - root - INFO - lr: 1.5919e-05 gnorm: 1.12 [16:33:37< 7:57:35] +[titan] 2025-10-05 15:07:58,854 - root - INFO - step: 27020 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7853 +[titan] 2025-10-05 15:07:58,855 - root - INFO - lr: 1.5911e-05 gnorm: 1.15 [16:33:48< 7:57:24] +[titan] 2025-10-05 15:08:09,736 - root - INFO - step: 27025 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:08:09,736 - root - INFO - lr: 1.5903e-05 gnorm: 1.12 [16:33:59< 7:57:13] +[titan] 2025-10-05 15:08:20,606 - root - INFO - step: 27030 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 15:08:20,606 - root - INFO - lr: 1.5896e-05 gnorm: 1.08 [16:34:09< 7:57:02] +[titan] 2025-10-05 15:08:31,489 - root - INFO - step: 27035 loss: 1.9763 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 15:08:31,490 - root - INFO - lr: 1.5888e-05 gnorm: 1.09 [16:34:20< 7:56:51] +[titan] 2025-10-05 15:08:42,436 - root - INFO - step: 27040 loss: 2.0880 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.31 mfu: 41.99% global_avg_ntp_loss: 0.2422 global_avg_mtp_loss: 1.8458 +[titan] 2025-10-05 15:08:42,436 - root - INFO - lr: 1.5881e-05 gnorm: 1.12 [16:34:31< 7:56:40] +[titan] 2025-10-05 15:08:53,408 - root - INFO - step: 27045 loss: 2.1193 memory: 118.84GiB(85.28%) tps: 29,865 tflops: 414.33 mfu: 41.89% global_avg_ntp_loss: 0.2469 global_avg_mtp_loss: 1.8724 +[titan] 2025-10-05 15:08:53,409 - root - INFO - lr: 1.5873e-05 gnorm: 1.16 [16:34:42< 7:56:29] +[titan] 2025-10-05 15:09:02,117 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:09:04,301 - root - INFO - step: 27050 loss: 2.0295 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7940 +[titan] 2025-10-05 15:09:04,301 - root - INFO - lr: 1.5865e-05 gnorm: 1.11 [16:34:53< 7:56:17] +[titan] 2025-10-05 15:09:15,193 - root - INFO - step: 27055 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7934 +[titan] 2025-10-05 15:09:15,194 - root - INFO - lr: 1.5858e-05 gnorm: 1.10 [16:35:04< 7:56:06] +[titan] 2025-10-05 15:09:26,100 - root - INFO - step: 27060 loss: 2.0465 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8091 +[titan] 2025-10-05 15:09:26,100 - root - INFO - lr: 1.5850e-05 gnorm: 1.12 [16:35:15< 7:55:55] +[titan] 2025-10-05 15:09:36,976 - root - INFO - step: 27065 loss: 1.9733 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7439 +[titan] 2025-10-05 15:09:36,976 - root - INFO - lr: 1.5842e-05 gnorm: 1.09 [16:35:26< 7:55:44] +[titan] 2025-10-05 15:09:47,942 - root - INFO - step: 27070 loss: 2.0633 memory: 118.84GiB(85.28%) tps: 29,883 tflops: 414.57 mfu: 41.92% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8245 +[titan] 2025-10-05 15:09:47,942 - root - INFO - lr: 1.5835e-05 gnorm: 1.09 [16:35:37< 7:55:33] +[titan] 2025-10-05 15:09:58,812 - root - INFO - step: 27075 loss: 2.0396 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8029 +[titan] 2025-10-05 15:09:58,812 - root - INFO - lr: 1.5827e-05 gnorm: 1.08 [16:35:48< 7:55:22] +[titan] 2025-10-05 15:10:09,685 - root - INFO - step: 27080 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 15:10:09,685 - root - INFO - lr: 1.5819e-05 gnorm: 1.09 [16:35:59< 7:55:11] +[titan] 2025-10-05 15:10:20,555 - root - INFO - step: 27085 loss: 2.0147 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 15:10:20,556 - root - INFO - lr: 1.5812e-05 gnorm: 1.07 [16:36:09< 7:55:00] +[titan] 2025-10-05 15:10:31,449 - root - INFO - step: 27090 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 15:10:31,449 - root - INFO - lr: 1.5804e-05 gnorm: 1.06 [16:36:20< 7:54:49] +[titan] 2025-10-05 15:10:42,317 - root - INFO - step: 27095 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:10:42,317 - root - INFO - lr: 1.5797e-05 gnorm: 1.08 [16:36:31< 7:54:38] +[titan] 2025-10-05 15:10:51,064 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:10:53,251 - root - INFO - step: 27100 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:10:53,252 - root - INFO - lr: 1.5789e-05 gnorm: 1.15 [16:36:42< 7:54:26] +[titan] 2025-10-05 15:11:04,173 - root - INFO - step: 27105 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 15:11:04,174 - root - INFO - lr: 1.5781e-05 gnorm: 1.12 [16:36:53< 7:54:15] +[titan] 2025-10-05 15:11:15,060 - root - INFO - step: 27110 loss: 2.0691 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:11:15,060 - root - INFO - lr: 1.5774e-05 gnorm: 1.15 [16:37:04< 7:54:04] +[titan] 2025-10-05 15:11:25,971 - root - INFO - step: 27115 loss: 2.0649 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:11:25,971 - root - INFO - lr: 1.5766e-05 gnorm: 1.12 [16:37:15< 7:53:53] +[titan] 2025-10-05 15:11:36,858 - root - INFO - step: 27120 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7858 +[titan] 2025-10-05 15:11:36,859 - root - INFO - lr: 1.5759e-05 gnorm: 1.09 [16:37:26< 7:53:42] +[titan] 2025-10-05 15:11:47,776 - root - INFO - step: 27125 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7663 +[titan] 2025-10-05 15:11:47,777 - root - INFO - lr: 1.5751e-05 gnorm: 1.08 [16:37:37< 7:53:31] +[titan] 2025-10-05 15:11:58,636 - root - INFO - step: 27130 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:11:58,636 - root - INFO - lr: 1.5743e-05 gnorm: 1.13 [16:37:47< 7:53:20] +[titan] 2025-10-05 15:12:09,626 - root - INFO - step: 27135 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 29,816 tflops: 413.66 mfu: 41.83% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 15:12:09,626 - root - INFO - lr: 1.5736e-05 gnorm: 1.11 [16:37:58< 7:53:09] +[titan] 2025-10-05 15:12:11,994 - root - INFO - Dumping profiler traces at step 27136 +[titan] 2025-10-05 15:12:12,034 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:12:20,793 - root - INFO - step: 27140 loss: 2.0237 memory: 118.84GiB(85.28%) tps: 29,344 tflops: 407.10 mfu: 41.16% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 15:12:20,794 - root - INFO - lr: 1.5728e-05 gnorm: 1.11 [16:38:10< 7:52:58] +[titan] 2025-10-05 15:12:31,651 - root - INFO - step: 27145 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 15:12:31,651 - root - INFO - lr: 1.5720e-05 gnorm: 1.09 [16:38:21< 7:52:47] +[titan] 2025-10-05 15:12:40,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:12:42,534 - root - INFO - step: 27150 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7607 +[titan] 2025-10-05 15:12:42,534 - root - INFO - lr: 1.5713e-05 gnorm: 1.06 [16:38:31< 7:52:36] +[titan] 2025-10-05 15:12:53,435 - root - INFO - step: 27155 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:12:53,435 - root - INFO - lr: 1.5705e-05 gnorm: 1.08 [16:38:42< 7:52:24] +[titan] 2025-10-05 15:13:04,284 - root - INFO - step: 27160 loss: 2.0466 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8071 +[titan] 2025-10-05 15:13:04,284 - root - INFO - lr: 1.5698e-05 gnorm: 1.11 [16:38:53< 7:52:13] +[titan] 2025-10-05 15:13:15,182 - root - INFO - step: 27165 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 15:13:15,182 - root - INFO - lr: 1.5690e-05 gnorm: 1.08 [16:39:04< 7:52:02] +[titan] 2025-10-05 15:13:26,046 - root - INFO - step: 27170 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 15:13:26,046 - root - INFO - lr: 1.5682e-05 gnorm: 1.08 [16:39:15< 7:51:51] +[titan] 2025-10-05 15:13:36,900 - root - INFO - step: 27175 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7416 +[titan] 2025-10-05 15:13:36,900 - root - INFO - lr: 1.5675e-05 gnorm: 1.10 [16:39:26< 7:51:40] +[titan] 2025-10-05 15:13:47,796 - root - INFO - step: 27180 loss: 2.1244 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2476 global_avg_mtp_loss: 1.8768 +[titan] 2025-10-05 15:13:47,796 - root - INFO - lr: 1.5667e-05 gnorm: 1.09 [16:39:37< 7:51:29] +[titan] 2025-10-05 15:13:58,664 - root - INFO - step: 27185 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:13:58,664 - root - INFO - lr: 1.5660e-05 gnorm: 1.10 [16:39:48< 7:51:18] +[titan] 2025-10-05 15:14:09,527 - root - INFO - step: 27190 loss: 2.0164 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:14:09,527 - root - INFO - lr: 1.5652e-05 gnorm: 1.09 [16:39:58< 7:51:07] +[titan] 2025-10-05 15:14:20,387 - root - INFO - step: 27195 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 15:14:20,387 - root - INFO - lr: 1.5645e-05 gnorm: 1.07 [16:40:09< 7:50:56] +[titan] 2025-10-05 15:14:29,129 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:14:31,314 - root - INFO - step: 27200 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7824 +[titan] 2025-10-05 15:14:31,314 - root - INFO - lr: 1.5637e-05 gnorm: 1.10 [16:40:20< 7:50:45] +[titan] 2025-10-05 15:14:42,185 - root - INFO - step: 27205 loss: 2.0377 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8002 +[titan] 2025-10-05 15:14:42,186 - root - INFO - lr: 1.5629e-05 gnorm: 1.08 [16:40:31< 7:50:33] +[titan] 2025-10-05 15:14:53,065 - root - INFO - step: 27210 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 15:14:53,065 - root - INFO - lr: 1.5622e-05 gnorm: 1.12 [16:40:42< 7:50:22] +[titan] 2025-10-05 15:15:03,941 - root - INFO - step: 27215 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8140 +[titan] 2025-10-05 15:15:03,941 - root - INFO - lr: 1.5614e-05 gnorm: 1.11 [16:40:53< 7:50:11] +[titan] 2025-10-05 15:15:14,801 - root - INFO - step: 27220 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 15:15:14,801 - root - INFO - lr: 1.5607e-05 gnorm: 1.10 [16:41:04< 7:50:00] +[titan] 2025-10-05 15:15:25,653 - root - INFO - step: 27225 loss: 1.9878 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7565 +[titan] 2025-10-05 15:15:25,653 - root - INFO - lr: 1.5599e-05 gnorm: 1.09 [16:41:14< 7:49:49] +[titan] 2025-10-05 15:15:36,551 - root - INFO - step: 27230 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8280 +[titan] 2025-10-05 15:15:36,552 - root - INFO - lr: 1.5591e-05 gnorm: 1.13 [16:41:25< 7:49:38] +[titan] 2025-10-05 15:15:47,426 - root - INFO - step: 27235 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 15:15:47,426 - root - INFO - lr: 1.5584e-05 gnorm: 1.10 [16:41:36< 7:49:27] +[titan] 2025-10-05 15:15:58,353 - root - INFO - step: 27240 loss: 2.0437 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.03 mfu: 42.07% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8066 +[titan] 2025-10-05 15:15:58,354 - root - INFO - lr: 1.5576e-05 gnorm: 1.08 [16:41:47< 7:49:16] +[titan] 2025-10-05 15:16:09,201 - root - INFO - step: 27245 loss: 2.0207 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 15:16:09,202 - root - INFO - lr: 1.5569e-05 gnorm: 1.11 [16:41:58< 7:49:05] +[titan] 2025-10-05 15:16:17,888 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:16:20,072 - root - INFO - step: 27250 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7443 +[titan] 2025-10-05 15:16:20,072 - root - INFO - lr: 1.5561e-05 gnorm: 1.08 [16:42:09< 7:48:53] +[titan] 2025-10-05 15:16:30,931 - root - INFO - step: 27255 loss: 2.0676 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:16:30,931 - root - INFO - lr: 1.5554e-05 gnorm: 1.12 [16:42:20< 7:48:42] +[titan] 2025-10-05 15:16:41,802 - root - INFO - step: 27260 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7950 +[titan] 2025-10-05 15:16:41,803 - root - INFO - lr: 1.5546e-05 gnorm: 1.15 [16:42:31< 7:48:31] +[titan] 2025-10-05 15:16:52,730 - root - INFO - step: 27265 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 15:16:52,730 - root - INFO - lr: 1.5539e-05 gnorm: 1.08 [16:42:42< 7:48:20] +[titan] 2025-10-05 15:17:03,595 - root - INFO - step: 27270 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 15:17:03,596 - root - INFO - lr: 1.5531e-05 gnorm: 1.08 [16:42:52< 7:48:09] +[titan] 2025-10-05 15:17:14,444 - root - INFO - step: 27275 loss: 2.1147 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2446 global_avg_mtp_loss: 1.8700 +[titan] 2025-10-05 15:17:14,444 - root - INFO - lr: 1.5523e-05 gnorm: 1.11 [16:43:03< 7:47:58] +[titan] 2025-10-05 15:17:25,317 - root - INFO - step: 27280 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7355 +[titan] 2025-10-05 15:17:25,317 - root - INFO - lr: 1.5516e-05 gnorm: 1.11 [16:43:14< 7:47:47] +[titan] 2025-10-05 15:17:36,180 - root - INFO - step: 27285 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8298 +[titan] 2025-10-05 15:17:36,180 - root - INFO - lr: 1.5508e-05 gnorm: 1.10 [16:43:25< 7:47:36] +[titan] 2025-10-05 15:17:47,037 - root - INFO - step: 27290 loss: 2.0421 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8046 +[titan] 2025-10-05 15:17:47,037 - root - INFO - lr: 1.5501e-05 gnorm: 1.10 [16:43:36< 7:47:25] +[titan] 2025-10-05 15:17:57,971 - root - INFO - step: 27295 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.77 mfu: 42.04% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:17:57,971 - root - INFO - lr: 1.5493e-05 gnorm: 1.10 [16:43:47< 7:47:13] +[titan] 2025-10-05 15:18:06,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:18:08,853 - root - INFO - step: 27300 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 15:18:08,854 - root - INFO - lr: 1.5486e-05 gnorm: 1.09 [16:43:58< 7:47:02] +[titan] 2025-10-05 15:18:19,735 - root - INFO - step: 27305 loss: 2.0092 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 15:18:19,735 - root - INFO - lr: 1.5478e-05 gnorm: 1.11 [16:44:09< 7:46:51] +[titan] 2025-10-05 15:18:30,612 - root - INFO - step: 27310 loss: 2.0700 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8287 +[titan] 2025-10-05 15:18:30,612 - root - INFO - lr: 1.5471e-05 gnorm: 1.10 [16:44:19< 7:46:40] +[titan] 2025-10-05 15:18:41,512 - root - INFO - step: 27315 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 15:18:41,512 - root - INFO - lr: 1.5463e-05 gnorm: 1.10 [16:44:30< 7:46:29] +[titan] 2025-10-05 15:18:52,399 - root - INFO - step: 27320 loss: 2.1391 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2490 global_avg_mtp_loss: 1.8900 +[titan] 2025-10-05 15:18:52,399 - root - INFO - lr: 1.5455e-05 gnorm: 1.12 [16:44:41< 7:46:18] +[titan] 2025-10-05 15:19:03,317 - root - INFO - step: 27325 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7809 +[titan] 2025-10-05 15:19:03,317 - root - INFO - lr: 1.5448e-05 gnorm: 1.13 [16:44:52< 7:46:07] +[titan] 2025-10-05 15:19:14,195 - root - INFO - step: 27330 loss: 2.1168 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2466 global_avg_mtp_loss: 1.8702 +[titan] 2025-10-05 15:19:14,195 - root - INFO - lr: 1.5440e-05 gnorm: 1.12 [16:45:03< 7:45:56] +[titan] 2025-10-05 15:19:25,053 - root - INFO - step: 27335 loss: 2.0622 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8236 +[titan] 2025-10-05 15:19:25,053 - root - INFO - lr: 1.5433e-05 gnorm: 1.14 [16:45:14< 7:45:45] +[titan] 2025-10-05 15:19:35,914 - root - INFO - step: 27340 loss: 2.0678 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8279 +[titan] 2025-10-05 15:19:35,914 - root - INFO - lr: 1.5425e-05 gnorm: 1.09 [16:45:25< 7:45:34] +[titan] 2025-10-05 15:19:46,795 - root - INFO - step: 27345 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8129 +[titan] 2025-10-05 15:19:46,796 - root - INFO - lr: 1.5418e-05 gnorm: 1.14 [16:45:36< 7:45:22] +[titan] 2025-10-05 15:19:55,520 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:19:57,711 - root - INFO - step: 27350 loss: 2.0646 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:19:57,711 - root - INFO - lr: 1.5410e-05 gnorm: 1.12 [16:45:47< 7:45:11] +[titan] 2025-10-05 15:20:08,574 - root - INFO - step: 27355 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:20:08,574 - root - INFO - lr: 1.5403e-05 gnorm: 1.10 [16:45:57< 7:45:00] +[titan] 2025-10-05 15:20:19,517 - root - INFO - step: 27360 loss: 2.0447 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8070 +[titan] 2025-10-05 15:20:19,518 - root - INFO - lr: 1.5395e-05 gnorm: 1.10 [16:46:08< 7:44:49] +[titan] 2025-10-05 15:20:30,400 - root - INFO - step: 27365 loss: 2.0266 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 15:20:30,401 - root - INFO - lr: 1.5388e-05 gnorm: 1.10 [16:46:19< 7:44:38] +[titan] 2025-10-05 15:20:41,257 - root - INFO - step: 27370 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7897 +[titan] 2025-10-05 15:20:41,257 - root - INFO - lr: 1.5380e-05 gnorm: 1.09 [16:46:30< 7:44:27] +[titan] 2025-10-05 15:20:52,147 - root - INFO - step: 27375 loss: 2.0770 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8366 +[titan] 2025-10-05 15:20:52,147 - root - INFO - lr: 1.5373e-05 gnorm: 1.09 [16:46:41< 7:44:16] +[titan] 2025-10-05 15:21:03,049 - root - INFO - step: 27380 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 15:21:03,049 - root - INFO - lr: 1.5365e-05 gnorm: 1.10 [16:46:52< 7:44:05] +[titan] 2025-10-05 15:21:13,927 - root - INFO - step: 27385 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 15:21:13,927 - root - INFO - lr: 1.5358e-05 gnorm: 1.10 [16:47:03< 7:43:54] +[titan] 2025-10-05 15:21:24,850 - root - INFO - step: 27390 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:21:24,850 - root - INFO - lr: 1.5350e-05 gnorm: 1.13 [16:47:14< 7:43:43] +[titan] 2025-10-05 15:21:35,739 - root - INFO - step: 27395 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 15:21:35,739 - root - INFO - lr: 1.5343e-05 gnorm: 1.13 [16:47:25< 7:43:32] +[titan] 2025-10-05 15:21:44,437 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:21:46,630 - root - INFO - step: 27400 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:21:46,630 - root - INFO - lr: 1.5335e-05 gnorm: 1.08 [16:47:35< 7:43:20] +[titan] 2025-10-05 15:21:57,526 - root - INFO - step: 27405 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7972 +[titan] 2025-10-05 15:21:57,526 - root - INFO - lr: 1.5328e-05 gnorm: 1.12 [16:47:46< 7:43:09] +[titan] 2025-10-05 15:22:08,415 - root - INFO - step: 27410 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7583 +[titan] 2025-10-05 15:22:08,415 - root - INFO - lr: 1.5320e-05 gnorm: 1.07 [16:47:57< 7:42:58] +[titan] 2025-10-05 15:22:19,304 - root - INFO - step: 27415 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 15:22:19,305 - root - INFO - lr: 1.5313e-05 gnorm: 1.12 [16:48:08< 7:42:47] +[titan] 2025-10-05 15:22:30,195 - root - INFO - step: 27420 loss: 2.0413 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8052 +[titan] 2025-10-05 15:22:30,196 - root - INFO - lr: 1.5305e-05 gnorm: 1.11 [16:48:19< 7:42:36] +[titan] 2025-10-05 15:22:41,161 - root - INFO - step: 27425 loss: 2.0339 memory: 118.84GiB(85.28%) tps: 29,884 tflops: 414.60 mfu: 41.92% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 15:22:41,161 - root - INFO - lr: 1.5298e-05 gnorm: 1.12 [16:48:30< 7:42:25] +[titan] 2025-10-05 15:22:52,043 - root - INFO - step: 27430 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 15:22:52,043 - root - INFO - lr: 1.5290e-05 gnorm: 1.10 [16:48:41< 7:42:14] +[titan] 2025-10-05 15:23:02,939 - root - INFO - step: 27435 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 15:23:02,939 - root - INFO - lr: 1.5283e-05 gnorm: 1.12 [16:48:52< 7:42:03] +[titan] 2025-10-05 15:23:13,837 - root - INFO - step: 27440 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 15:23:13,837 - root - INFO - lr: 1.5275e-05 gnorm: 1.12 [16:49:03< 7:41:52] +[titan] 2025-10-05 15:23:24,732 - root - INFO - step: 27445 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 15:23:24,733 - root - INFO - lr: 1.5268e-05 gnorm: 1.10 [16:49:14< 7:41:41] +[titan] 2025-10-05 15:23:33,429 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:23:35,615 - root - INFO - step: 27450 loss: 2.0803 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8394 +[titan] 2025-10-05 15:23:35,615 - root - INFO - lr: 1.5260e-05 gnorm: 1.12 [16:49:24< 7:41:29] +[titan] 2025-10-05 15:23:46,565 - root - INFO - step: 27455 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,927 tflops: 415.19 mfu: 41.98% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8190 +[titan] 2025-10-05 15:23:46,565 - root - INFO - lr: 1.5253e-05 gnorm: 1.14 [16:49:35< 7:41:18] +[titan] 2025-10-05 15:23:57,475 - root - INFO - step: 27460 loss: 2.0518 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8135 +[titan] 2025-10-05 15:23:57,475 - root - INFO - lr: 1.5245e-05 gnorm: 1.11 [16:49:46< 7:41:07] +[titan] 2025-10-05 15:24:08,327 - root - INFO - step: 27465 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 15:24:08,327 - root - INFO - lr: 1.5238e-05 gnorm: 1.11 [16:49:57< 7:40:56] +[titan] 2025-10-05 15:24:19,201 - root - INFO - step: 27470 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 15:24:19,201 - root - INFO - lr: 1.5230e-05 gnorm: 1.08 [16:50:08< 7:40:45] +[titan] 2025-10-05 15:24:30,080 - root - INFO - step: 27475 loss: 2.0648 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8250 +[titan] 2025-10-05 15:24:30,080 - root - INFO - lr: 1.5223e-05 gnorm: 1.09 [16:50:19< 7:40:34] +[titan] 2025-10-05 15:24:40,967 - root - INFO - step: 27480 loss: 2.0592 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8213 +[titan] 2025-10-05 15:24:40,967 - root - INFO - lr: 1.5215e-05 gnorm: 1.10 [16:50:30< 7:40:23] +[titan] 2025-10-05 15:24:51,863 - root - INFO - step: 27485 loss: 2.0802 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8387 +[titan] 2025-10-05 15:24:51,863 - root - INFO - lr: 1.5208e-05 gnorm: 1.11 [16:50:41< 7:40:12] +[titan] 2025-10-05 15:25:02,777 - root - INFO - step: 27490 loss: 2.1019 memory: 118.84GiB(85.28%) tps: 30,024 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8587 +[titan] 2025-10-05 15:25:02,778 - root - INFO - lr: 1.5200e-05 gnorm: 1.16 [16:50:52< 7:40:01] +[titan] 2025-10-05 15:25:13,681 - root - INFO - step: 27495 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:25:13,681 - root - INFO - lr: 1.5193e-05 gnorm: 1.12 [16:51:03< 7:39:50] +[titan] 2025-10-05 15:25:22,378 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:25:24,569 - root - INFO - step: 27500 loss: 2.0730 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8328 +[titan] 2025-10-05 15:25:24,570 - root - INFO - lr: 1.5185e-05 gnorm: 1.09 [16:51:13< 7:39:39] +[titan] 2025-10-05 15:25:35,452 - root - INFO - step: 27505 loss: 2.0528 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:25:35,452 - root - INFO - lr: 1.5178e-05 gnorm: 1.14 [16:51:24< 7:39:27] +[titan] 2025-10-05 15:25:46,344 - root - INFO - step: 27510 loss: 2.2224 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2732 global_avg_mtp_loss: 1.9492 +[titan] 2025-10-05 15:25:46,344 - root - INFO - lr: 1.5170e-05 gnorm: 7.42 [16:51:35< 7:39:16] +[titan] 2025-10-05 15:25:57,223 - root - INFO - step: 27515 loss: 2.0298 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7946 +[titan] 2025-10-05 15:25:57,223 - root - INFO - lr: 1.5163e-05 gnorm: 1.13 [16:51:46< 7:39:05] +[titan] 2025-10-05 15:26:08,195 - root - INFO - step: 27520 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 29,866 tflops: 414.35 mfu: 41.90% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 15:26:08,195 - root - INFO - lr: 1.5155e-05 gnorm: 1.07 [16:51:57< 7:38:54] +[titan] 2025-10-05 15:26:19,076 - root - INFO - step: 27525 loss: 2.0409 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 15:26:19,076 - root - INFO - lr: 1.5148e-05 gnorm: 1.13 [16:52:08< 7:38:43] +[titan] 2025-10-05 15:26:29,960 - root - INFO - step: 27530 loss: 2.0112 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7786 +[titan] 2025-10-05 15:26:29,960 - root - INFO - lr: 1.5141e-05 gnorm: 1.17 [16:52:19< 7:38:32] +[titan] 2025-10-05 15:26:40,847 - root - INFO - step: 27535 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 15:26:40,847 - root - INFO - lr: 1.5133e-05 gnorm: 1.09 [16:52:30< 7:38:21] +[titan] 2025-10-05 15:26:51,718 - root - INFO - step: 27540 loss: 1.9986 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 15:26:51,718 - root - INFO - lr: 1.5126e-05 gnorm: 1.13 [16:52:41< 7:38:10] +[titan] 2025-10-05 15:27:02,650 - root - INFO - step: 27545 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8088 +[titan] 2025-10-05 15:27:02,650 - root - INFO - lr: 1.5118e-05 gnorm: 1.12 [16:52:51< 7:37:59] +[titan] 2025-10-05 15:27:11,363 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:27:13,551 - root - INFO - step: 27550 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 15:27:13,551 - root - INFO - lr: 1.5111e-05 gnorm: 1.14 [16:53:02< 7:37:48] +[titan] 2025-10-05 15:27:24,430 - root - INFO - step: 27555 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:27:24,430 - root - INFO - lr: 1.5103e-05 gnorm: 1.10 [16:53:13< 7:37:37] +[titan] 2025-10-05 15:27:35,290 - root - INFO - step: 27560 loss: 2.0098 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7771 +[titan] 2025-10-05 15:27:35,290 - root - INFO - lr: 1.5096e-05 gnorm: 1.11 [16:53:24< 7:37:25] +[titan] 2025-10-05 15:27:46,162 - root - INFO - step: 27565 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8175 +[titan] 2025-10-05 15:27:46,162 - root - INFO - lr: 1.5088e-05 gnorm: 1.14 [16:53:35< 7:37:14] +[titan] 2025-10-05 15:27:57,049 - root - INFO - step: 27570 loss: 2.0327 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7970 +[titan] 2025-10-05 15:27:57,049 - root - INFO - lr: 1.5081e-05 gnorm: 1.09 [16:53:46< 7:37:03] +[titan] 2025-10-05 15:28:07,972 - root - INFO - step: 27575 loss: 2.0758 memory: 118.84GiB(85.28%) tps: 29,999 tflops: 416.19 mfu: 42.08% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8349 +[titan] 2025-10-05 15:28:07,972 - root - INFO - lr: 1.5074e-05 gnorm: 1.15 [16:53:57< 7:36:52] +[titan] 2025-10-05 15:28:18,854 - root - INFO - step: 27580 loss: 2.0600 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 15:28:18,855 - root - INFO - lr: 1.5066e-05 gnorm: 1.16 [16:54:08< 7:36:41] +[titan] 2025-10-05 15:28:29,772 - root - INFO - step: 27585 loss: 2.0971 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2423 global_avg_mtp_loss: 1.8547 +[titan] 2025-10-05 15:28:29,772 - root - INFO - lr: 1.5059e-05 gnorm: 1.13 [16:54:19< 7:36:30] +[titan] 2025-10-05 15:28:40,678 - root - INFO - step: 27590 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 15:28:40,678 - root - INFO - lr: 1.5051e-05 gnorm: 1.09 [16:54:29< 7:36:19] +[titan] 2025-10-05 15:28:51,534 - root - INFO - step: 27595 loss: 2.0739 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.8331 +[titan] 2025-10-05 15:28:51,534 - root - INFO - lr: 1.5044e-05 gnorm: 1.10 [16:54:40< 7:36:08] +[titan] 2025-10-05 15:29:00,205 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:29:02,428 - root - INFO - step: 27600 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7298 +[titan] 2025-10-05 15:29:02,428 - root - INFO - lr: 1.5036e-05 gnorm: 1.12 [16:54:51< 7:35:57] +[titan] 2025-10-05 15:29:13,305 - root - INFO - step: 27605 loss: 2.0663 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8263 +[titan] 2025-10-05 15:29:13,305 - root - INFO - lr: 1.5029e-05 gnorm: 1.09 [16:55:02< 7:35:46] +[titan] 2025-10-05 15:29:24,170 - root - INFO - step: 27610 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 15:29:24,170 - root - INFO - lr: 1.5021e-05 gnorm: 1.06 [16:55:13< 7:35:34] +[titan] 2025-10-05 15:29:35,069 - root - INFO - step: 27615 loss: 1.9817 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 15:29:35,070 - root - INFO - lr: 1.5014e-05 gnorm: 1.13 [16:55:24< 7:35:23] +[titan] 2025-10-05 15:29:45,908 - root - INFO - step: 27620 loss: 2.0605 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.45 mfu: 42.41% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8210 +[titan] 2025-10-05 15:29:45,908 - root - INFO - lr: 1.5007e-05 gnorm: 1.14 [16:55:35< 7:35:12] +[titan] 2025-10-05 15:29:56,752 - root - INFO - step: 27625 loss: 2.0304 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2355 global_avg_mtp_loss: 1.7949 +[titan] 2025-10-05 15:29:56,752 - root - INFO - lr: 1.4999e-05 gnorm: 1.14 [16:55:46< 7:35:01] +[titan] 2025-10-05 15:30:07,651 - root - INFO - step: 27630 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7645 +[titan] 2025-10-05 15:30:07,652 - root - INFO - lr: 1.4992e-05 gnorm: 1.10 [16:55:56< 7:34:50] +[titan] 2025-10-05 15:30:18,514 - root - INFO - step: 27635 loss: 2.0541 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8163 +[titan] 2025-10-05 15:30:18,514 - root - INFO - lr: 1.4984e-05 gnorm: 1.11 [16:56:07< 7:34:39] +[titan] 2025-10-05 15:30:29,382 - root - INFO - step: 27640 loss: 2.0889 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2417 global_avg_mtp_loss: 1.8472 +[titan] 2025-10-05 15:30:29,382 - root - INFO - lr: 1.4977e-05 gnorm: 1.09 [16:56:18< 7:34:28] +[titan] 2025-10-05 15:30:40,353 - root - INFO - step: 27645 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 29,868 tflops: 414.38 mfu: 41.90% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7813 +[titan] 2025-10-05 15:30:40,354 - root - INFO - lr: 1.4970e-05 gnorm: 1.12 [16:56:29< 7:34:17] +[titan] 2025-10-05 15:30:47,054 - root - INFO - Dumping profiler traces at step 27648 +[titan] 2025-10-05 15:30:47,092 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:30:49,274 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:30:51,455 - root - INFO - step: 27650 loss: 2.0385 memory: 118.84GiB(85.28%) tps: 29,516 tflops: 409.49 mfu: 41.40% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:30:51,456 - root - INFO - lr: 1.4962e-05 gnorm: 1.14 [16:56:40< 7:34:06] +[titan] 2025-10-05 15:31:02,322 - root - INFO - step: 27655 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 15:31:02,323 - root - INFO - lr: 1.4955e-05 gnorm: 1.07 [16:56:51< 7:33:55] +[titan] 2025-10-05 15:31:13,179 - root - INFO - step: 27660 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7706 +[titan] 2025-10-05 15:31:13,179 - root - INFO - lr: 1.4947e-05 gnorm: 1.10 [16:57:02< 7:33:44] +[titan] 2025-10-05 15:31:24,033 - root - INFO - step: 27665 loss: 2.0876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8463 +[titan] 2025-10-05 15:31:24,033 - root - INFO - lr: 1.4940e-05 gnorm: 1.09 [16:57:13< 7:33:32] +[titan] 2025-10-05 15:31:34,896 - root - INFO - step: 27670 loss: 2.0761 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8353 +[titan] 2025-10-05 15:31:34,896 - root - INFO - lr: 1.4933e-05 gnorm: 1.11 [16:57:24< 7:33:21] +[titan] 2025-10-05 15:31:45,721 - root - INFO - step: 27675 loss: 1.9979 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 15:31:45,721 - root - INFO - lr: 1.4925e-05 gnorm: 1.11 [16:57:35< 7:33:10] +[titan] 2025-10-05 15:31:56,620 - root - INFO - step: 27680 loss: 2.0129 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7791 +[titan] 2025-10-05 15:31:56,620 - root - INFO - lr: 1.4918e-05 gnorm: 1.10 [16:57:45< 7:32:59] +[titan] 2025-10-05 15:32:07,525 - root - INFO - step: 27685 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 15:32:07,525 - root - INFO - lr: 1.4910e-05 gnorm: 1.10 [16:57:56< 7:32:48] +[titan] 2025-10-05 15:32:18,367 - root - INFO - step: 27690 loss: 2.0707 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8306 +[titan] 2025-10-05 15:32:18,367 - root - INFO - lr: 1.4903e-05 gnorm: 1.09 [16:58:07< 7:32:37] +[titan] 2025-10-05 15:32:29,238 - root - INFO - step: 27695 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 15:32:29,238 - root - INFO - lr: 1.4896e-05 gnorm: 1.13 [16:58:18< 7:32:26] +[titan] 2025-10-05 15:32:37,922 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:32:40,104 - root - INFO - step: 27700 loss: 2.0988 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2438 global_avg_mtp_loss: 1.8549 +[titan] 2025-10-05 15:32:40,104 - root - INFO - lr: 1.4888e-05 gnorm: 1.12 [16:58:29< 7:32:15] +[titan] 2025-10-05 15:32:50,965 - root - INFO - step: 27705 loss: 2.1044 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2434 global_avg_mtp_loss: 1.8610 +[titan] 2025-10-05 15:32:50,965 - root - INFO - lr: 1.4881e-05 gnorm: 1.13 [16:58:40< 7:32:04] +[titan] 2025-10-05 15:33:01,872 - root - INFO - step: 27710 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7598 +[titan] 2025-10-05 15:33:01,872 - root - INFO - lr: 1.4873e-05 gnorm: 1.13 [16:58:51< 7:31:53] +[titan] 2025-10-05 15:33:12,825 - root - INFO - step: 27715 loss: 2.0402 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8034 +[titan] 2025-10-05 15:33:12,825 - root - INFO - lr: 1.4866e-05 gnorm: 1.12 [16:59:02< 7:31:41] +[titan] 2025-10-05 15:33:23,696 - root - INFO - step: 27720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 15:33:23,696 - root - INFO - lr: 1.4859e-05 gnorm: 1.13 [16:59:12< 7:31:30] +[titan] 2025-10-05 15:33:34,571 - root - INFO - step: 27725 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:33:34,572 - root - INFO - lr: 1.4851e-05 gnorm: 1.13 [16:59:23< 7:31:19] +[titan] 2025-10-05 15:33:45,446 - root - INFO - step: 27730 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 15:33:45,446 - root - INFO - lr: 1.4844e-05 gnorm: 1.07 [16:59:34< 7:31:08] +[titan] 2025-10-05 15:33:56,288 - root - INFO - step: 27735 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7851 +[titan] 2025-10-05 15:33:56,288 - root - INFO - lr: 1.4836e-05 gnorm: 1.13 [16:59:45< 7:30:57] +[titan] 2025-10-05 15:34:07,199 - root - INFO - step: 27740 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 15:34:07,199 - root - INFO - lr: 1.4829e-05 gnorm: 1.11 [16:59:56< 7:30:46] +[titan] 2025-10-05 15:34:18,088 - root - INFO - step: 27745 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2402 global_avg_mtp_loss: 1.8303 +[titan] 2025-10-05 15:34:18,088 - root - INFO - lr: 1.4822e-05 gnorm: 1.14 [17:00:07< 7:30:35] +[titan] 2025-10-05 15:34:26,744 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:34:28,919 - root - INFO - step: 27750 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7660 +[titan] 2025-10-05 15:34:28,919 - root - INFO - lr: 1.4814e-05 gnorm: 1.10 [17:00:18< 7:30:24] +[titan] 2025-10-05 15:34:39,788 - root - INFO - step: 27755 loss: 2.0085 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:34:39,788 - root - INFO - lr: 1.4807e-05 gnorm: 1.13 [17:00:29< 7:30:13] +[titan] 2025-10-05 15:34:50,649 - root - INFO - step: 27760 loss: 2.0229 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7889 +[titan] 2025-10-05 15:34:50,649 - root - INFO - lr: 1.4800e-05 gnorm: 1.08 [17:00:39< 7:30:02] +[titan] 2025-10-05 15:35:01,518 - root - INFO - step: 27765 loss: 2.0372 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:35:01,518 - root - INFO - lr: 1.4792e-05 gnorm: 1.09 [17:00:50< 7:29:50] +[titan] 2025-10-05 15:35:12,435 - root - INFO - step: 27770 loss: 2.0491 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 15:35:12,435 - root - INFO - lr: 1.4785e-05 gnorm: 1.11 [17:01:01< 7:29:39] +[titan] 2025-10-05 15:35:23,343 - root - INFO - step: 27775 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.7995 +[titan] 2025-10-05 15:35:23,344 - root - INFO - lr: 1.4777e-05 gnorm: 1.10 [17:01:12< 7:29:28] +[titan] 2025-10-05 15:35:34,184 - root - INFO - step: 27780 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 15:35:34,184 - root - INFO - lr: 1.4770e-05 gnorm: 1.09 [17:01:23< 7:29:17] +[titan] 2025-10-05 15:35:45,050 - root - INFO - step: 27785 loss: 2.0410 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 15:35:45,050 - root - INFO - lr: 1.4763e-05 gnorm: 1.08 [17:01:34< 7:29:06] +[titan] 2025-10-05 15:35:55,912 - root - INFO - step: 27790 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 15:35:55,912 - root - INFO - lr: 1.4755e-05 gnorm: 1.10 [17:01:45< 7:28:55] +[titan] 2025-10-05 15:36:06,779 - root - INFO - step: 27795 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7727 +[titan] 2025-10-05 15:36:06,780 - root - INFO - lr: 1.4748e-05 gnorm: 1.09 [17:01:56< 7:28:44] +[titan] 2025-10-05 15:36:15,503 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:36:17,685 - root - INFO - step: 27800 loss: 2.0545 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8152 +[titan] 2025-10-05 15:36:17,685 - root - INFO - lr: 1.4741e-05 gnorm: 1.11 [17:02:06< 7:28:33] +[titan] 2025-10-05 15:36:28,568 - root - INFO - step: 27805 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 15:36:28,568 - root - INFO - lr: 1.4733e-05 gnorm: 1.09 [17:02:17< 7:28:22] +[titan] 2025-10-05 15:36:39,435 - root - INFO - step: 27810 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 15:36:39,435 - root - INFO - lr: 1.4726e-05 gnorm: 1.12 [17:02:28< 7:28:11] +[titan] 2025-10-05 15:36:50,300 - root - INFO - step: 27815 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:36:50,300 - root - INFO - lr: 1.4719e-05 gnorm: 1.08 [17:02:39< 7:27:59] +[titan] 2025-10-05 15:37:01,162 - root - INFO - step: 27820 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7848 +[titan] 2025-10-05 15:37:01,162 - root - INFO - lr: 1.4711e-05 gnorm: 1.10 [17:02:50< 7:27:48] +[titan] 2025-10-05 15:37:12,068 - root - INFO - step: 27825 loss: 2.0443 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8065 +[titan] 2025-10-05 15:37:12,068 - root - INFO - lr: 1.4704e-05 gnorm: 1.12 [17:03:01< 7:27:37] +[titan] 2025-10-05 15:37:22,924 - root - INFO - step: 27830 loss: 2.0373 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 15:37:22,924 - root - INFO - lr: 1.4697e-05 gnorm: 1.12 [17:03:12< 7:27:26] +[titan] 2025-10-05 15:37:33,784 - root - INFO - step: 27835 loss: 1.9902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 15:37:33,784 - root - INFO - lr: 1.4689e-05 gnorm: 1.09 [17:03:23< 7:27:15] +[titan] 2025-10-05 15:37:44,693 - root - INFO - step: 27840 loss: 1.9926 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 15:37:44,694 - root - INFO - lr: 1.4682e-05 gnorm: 1.08 [17:03:33< 7:27:04] +[titan] 2025-10-05 15:37:55,562 - root - INFO - step: 27845 loss: 1.9782 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 15:37:55,562 - root - INFO - lr: 1.4675e-05 gnorm: 1.11 [17:03:44< 7:26:53] +[titan] 2025-10-05 15:38:04,219 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:38:06,403 - root - INFO - step: 27850 loss: 1.9362 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 15:38:06,403 - root - INFO - lr: 1.4667e-05 gnorm: 1.10 [17:03:55< 7:26:42] +[titan] 2025-10-05 15:38:17,311 - root - INFO - step: 27855 loss: 2.0240 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 15:38:17,311 - root - INFO - lr: 1.4660e-05 gnorm: 1.11 [17:04:06< 7:26:31] +[titan] 2025-10-05 15:38:28,219 - root - INFO - step: 27860 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 15:38:28,219 - root - INFO - lr: 1.4653e-05 gnorm: 1.12 [17:04:17< 7:26:20] +[titan] 2025-10-05 15:38:39,092 - root - INFO - step: 27865 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 15:38:39,092 - root - INFO - lr: 1.4645e-05 gnorm: 1.10 [17:04:28< 7:26:09] +[titan] 2025-10-05 15:38:49,999 - root - INFO - step: 27870 loss: 2.0091 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 15:38:49,999 - root - INFO - lr: 1.4638e-05 gnorm: 1.15 [17:04:39< 7:25:57] +[titan] 2025-10-05 15:39:00,879 - root - INFO - step: 27875 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:39:00,879 - root - INFO - lr: 1.4631e-05 gnorm: 1.10 [17:04:50< 7:25:46] +[titan] 2025-10-05 15:39:11,796 - root - INFO - step: 27880 loss: 2.0028 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 15:39:11,796 - root - INFO - lr: 1.4623e-05 gnorm: 1.09 [17:05:01< 7:25:35] +[titan] 2025-10-05 15:39:22,647 - root - INFO - step: 27885 loss: 2.0725 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8326 +[titan] 2025-10-05 15:39:22,647 - root - INFO - lr: 1.4616e-05 gnorm: 1.15 [17:05:11< 7:25:24] +[titan] 2025-10-05 15:39:33,473 - root - INFO - step: 27890 loss: 2.0958 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2429 global_avg_mtp_loss: 1.8528 +[titan] 2025-10-05 15:39:33,473 - root - INFO - lr: 1.4609e-05 gnorm: 1.11 [17:05:22< 7:25:13] +[titan] 2025-10-05 15:39:44,338 - root - INFO - step: 27895 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 15:39:44,338 - root - INFO - lr: 1.4601e-05 gnorm: 1.12 [17:05:33< 7:25:02] +[titan] 2025-10-05 15:39:53,006 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:39:55,213 - root - INFO - step: 27900 loss: 2.0345 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7993 +[titan] 2025-10-05 15:39:55,213 - root - INFO - lr: 1.4594e-05 gnorm: 1.12 [17:05:44< 7:24:51] +[titan] 2025-10-05 15:40:06,088 - root - INFO - step: 27905 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 15:40:06,088 - root - INFO - lr: 1.4587e-05 gnorm: 1.11 [17:05:55< 7:24:40] +[titan] 2025-10-05 15:40:16,993 - root - INFO - step: 27910 loss: 2.0536 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8146 +[titan] 2025-10-05 15:40:16,993 - root - INFO - lr: 1.4579e-05 gnorm: 1.11 [17:06:06< 7:24:29] +[titan] 2025-10-05 15:40:27,815 - root - INFO - step: 27915 loss: 2.1196 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.09 mfu: 42.48% global_avg_ntp_loss: 0.2454 global_avg_mtp_loss: 1.8742 +[titan] 2025-10-05 15:40:27,815 - root - INFO - lr: 1.4572e-05 gnorm: 1.11 [17:06:17< 7:24:18] +[titan] 2025-10-05 15:40:38,643 - root - INFO - step: 27920 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8155 +[titan] 2025-10-05 15:40:38,643 - root - INFO - lr: 1.4565e-05 gnorm: 1.07 [17:06:27< 7:24:06] +[titan] 2025-10-05 15:40:49,515 - root - INFO - step: 27925 loss: 1.9785 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7489 +[titan] 2025-10-05 15:40:49,515 - root - INFO - lr: 1.4558e-05 gnorm: 1.15 [17:06:38< 7:23:55] +[titan] 2025-10-05 15:41:00,387 - root - INFO - step: 27930 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 15:41:00,388 - root - INFO - lr: 1.4550e-05 gnorm: 1.08 [17:06:49< 7:23:44] +[titan] 2025-10-05 15:41:11,313 - root - INFO - step: 27935 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.11 mfu: 42.07% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 15:41:11,313 - root - INFO - lr: 1.4543e-05 gnorm: 1.11 [17:07:00< 7:23:33] +[titan] 2025-10-05 15:41:22,241 - root - INFO - step: 27940 loss: 2.0562 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:41:22,241 - root - INFO - lr: 1.4536e-05 gnorm: 1.12 [17:07:11< 7:23:22] +[titan] 2025-10-05 15:41:33,099 - root - INFO - step: 27945 loss: 2.0587 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8200 +[titan] 2025-10-05 15:41:33,100 - root - INFO - lr: 1.4528e-05 gnorm: 1.10 [17:07:22< 7:23:11] +[titan] 2025-10-05 15:41:41,785 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:41:43,968 - root - INFO - step: 27950 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7590 +[titan] 2025-10-05 15:41:43,968 - root - INFO - lr: 1.4521e-05 gnorm: 1.11 [17:07:33< 7:23:00] +[titan] 2025-10-05 15:41:54,868 - root - INFO - step: 27955 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8031 +[titan] 2025-10-05 15:41:54,868 - root - INFO - lr: 1.4514e-05 gnorm: 1.12 [17:07:44< 7:22:49] +[titan] 2025-10-05 15:42:05,736 - root - INFO - step: 27960 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:05,736 - root - INFO - lr: 1.4507e-05 gnorm: 1.10 [17:07:55< 7:22:38] +[titan] 2025-10-05 15:42:16,692 - root - INFO - step: 27965 loss: 1.9991 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 15:42:16,692 - root - INFO - lr: 1.4499e-05 gnorm: 1.12 [17:08:05< 7:22:27] +[titan] 2025-10-05 15:42:27,570 - root - INFO - step: 27970 loss: 2.0787 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2432 global_avg_mtp_loss: 1.8355 +[titan] 2025-10-05 15:42:27,570 - root - INFO - lr: 1.4492e-05 gnorm: 1.12 [17:08:16< 7:22:16] +[titan] 2025-10-05 15:42:38,440 - root - INFO - step: 27975 loss: 2.0135 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 15:42:38,440 - root - INFO - lr: 1.4485e-05 gnorm: 1.13 [17:08:27< 7:22:04] +[titan] 2025-10-05 15:42:49,330 - root - INFO - step: 27980 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 15:42:49,330 - root - INFO - lr: 1.4477e-05 gnorm: 1.12 [17:08:38< 7:21:53] +[titan] 2025-10-05 15:43:00,202 - root - INFO - step: 27985 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7878 +[titan] 2025-10-05 15:43:00,202 - root - INFO - lr: 1.4470e-05 gnorm: 1.10 [17:08:49< 7:21:42] +[titan] 2025-10-05 15:43:11,083 - root - INFO - step: 27990 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7731 +[titan] 2025-10-05 15:43:11,083 - root - INFO - lr: 1.4463e-05 gnorm: 1.11 [17:09:00< 7:21:31] +[titan] 2025-10-05 15:43:21,971 - root - INFO - step: 27995 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 15:43:21,972 - root - INFO - lr: 1.4456e-05 gnorm: 1.10 [17:09:11< 7:21:20] +[titan] 2025-10-05 15:43:30,695 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:43:32,877 - root - INFO - step: 28000 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 15:43:32,878 - root - INFO - lr: 1.4448e-05 gnorm: 1.08 [17:09:22< 7:21:09] +[titan] 2025-10-05 15:43:43,741 - root - INFO - step: 28005 loss: 2.0160 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 15:43:43,741 - root - INFO - lr: 1.4441e-05 gnorm: 1.13 [17:09:33< 7:20:58] +[titan] 2025-10-05 15:43:54,582 - root - INFO - step: 28010 loss: 2.0548 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 15:43:54,582 - root - INFO - lr: 1.4434e-05 gnorm: 1.11 [17:09:43< 7:20:47] +[titan] 2025-10-05 15:44:05,429 - root - INFO - step: 28015 loss: 2.0300 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 15:44:05,429 - root - INFO - lr: 1.4426e-05 gnorm: 1.11 [17:09:54< 7:20:36] +[titan] 2025-10-05 15:44:16,331 - root - INFO - step: 28020 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 15:44:16,331 - root - INFO - lr: 1.4419e-05 gnorm: 1.09 [17:10:05< 7:20:25] +[titan] 2025-10-05 15:44:27,186 - root - INFO - step: 28025 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 15:44:27,186 - root - INFO - lr: 1.4412e-05 gnorm: 1.08 [17:10:16< 7:20:13] +[titan] 2025-10-05 15:44:38,082 - root - INFO - step: 28030 loss: 2.0366 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 15:44:38,082 - root - INFO - lr: 1.4405e-05 gnorm: 1.17 [17:10:27< 7:20:02] +[titan] 2025-10-05 15:44:48,943 - root - INFO - step: 28035 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 15:44:48,943 - root - INFO - lr: 1.4397e-05 gnorm: 1.12 [17:10:38< 7:19:51] +[titan] 2025-10-05 15:44:59,808 - root - INFO - step: 28040 loss: 2.0729 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8327 +[titan] 2025-10-05 15:44:59,808 - root - INFO - lr: 1.4390e-05 gnorm: 1.14 [17:10:49< 7:19:40] +[titan] 2025-10-05 15:45:10,680 - root - INFO - step: 28045 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7836 +[titan] 2025-10-05 15:45:10,681 - root - INFO - lr: 1.4383e-05 gnorm: 1.11 [17:10:59< 7:19:29] +[titan] 2025-10-05 15:45:19,401 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:45:21,586 - root - INFO - step: 28050 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7741 +[titan] 2025-10-05 15:45:21,586 - root - INFO - lr: 1.4376e-05 gnorm: 1.06 [17:11:10< 7:19:18] +[titan] 2025-10-05 15:45:32,449 - root - INFO - step: 28055 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:45:32,449 - root - INFO - lr: 1.4368e-05 gnorm: 1.10 [17:11:21< 7:19:07] +[titan] 2025-10-05 15:45:43,299 - root - INFO - step: 28060 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 15:45:43,299 - root - INFO - lr: 1.4361e-05 gnorm: 1.14 [17:11:32< 7:18:56] +[titan] 2025-10-05 15:45:54,193 - root - INFO - step: 28065 loss: 2.0655 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8254 +[titan] 2025-10-05 15:45:54,193 - root - INFO - lr: 1.4354e-05 gnorm: 1.10 [17:11:43< 7:18:45] +[titan] 2025-10-05 15:46:05,045 - root - INFO - step: 28070 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 15:46:05,045 - root - INFO - lr: 1.4347e-05 gnorm: 1.08 [17:11:54< 7:18:34] +[titan] 2025-10-05 15:46:15,889 - root - INFO - step: 28075 loss: 2.1077 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2445 global_avg_mtp_loss: 1.8632 +[titan] 2025-10-05 15:46:15,889 - root - INFO - lr: 1.4339e-05 gnorm: 1.09 [17:12:05< 7:18:23] +[titan] 2025-10-05 15:46:26,781 - root - INFO - step: 28080 loss: 1.9684 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 15:46:26,781 - root - INFO - lr: 1.4332e-05 gnorm: 1.10 [17:12:16< 7:18:11] +[titan] 2025-10-05 15:46:37,625 - root - INFO - step: 28085 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 15:46:37,625 - root - INFO - lr: 1.4325e-05 gnorm: 1.08 [17:12:26< 7:18:00] +[titan] 2025-10-05 15:46:48,485 - root - INFO - step: 28090 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 15:46:48,486 - root - INFO - lr: 1.4318e-05 gnorm: 1.09 [17:12:37< 7:17:49] +[titan] 2025-10-05 15:46:59,393 - root - INFO - step: 28095 loss: 1.9937 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 15:46:59,393 - root - INFO - lr: 1.4311e-05 gnorm: 1.12 [17:12:48< 7:17:38] +[titan] 2025-10-05 15:47:08,069 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:47:10,253 - root - INFO - step: 28100 loss: 2.0525 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8143 +[titan] 2025-10-05 15:47:10,253 - root - INFO - lr: 1.4303e-05 gnorm: 1.13 [17:12:59< 7:17:27] +[titan] 2025-10-05 15:47:21,183 - root - INFO - step: 28105 loss: 2.0711 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 15:47:21,183 - root - INFO - lr: 1.4296e-05 gnorm: 1.14 [17:13:10< 7:17:16] +[titan] 2025-10-05 15:47:32,061 - root - INFO - step: 28110 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 15:47:32,061 - root - INFO - lr: 1.4289e-05 gnorm: 1.07 [17:13:21< 7:17:05] +[titan] 2025-10-05 15:47:42,930 - root - INFO - step: 28115 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:47:42,930 - root - INFO - lr: 1.4282e-05 gnorm: 1.11 [17:13:32< 7:16:54] +[titan] 2025-10-05 15:47:53,817 - root - INFO - step: 28120 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7942 +[titan] 2025-10-05 15:47:53,817 - root - INFO - lr: 1.4274e-05 gnorm: 1.09 [17:13:43< 7:16:43] +[titan] 2025-10-05 15:48:04,758 - root - INFO - step: 28125 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 15:48:04,758 - root - INFO - lr: 1.4267e-05 gnorm: 1.14 [17:13:54< 7:16:32] +[titan] 2025-10-05 15:48:15,645 - root - INFO - step: 28130 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 15:48:15,645 - root - INFO - lr: 1.4260e-05 gnorm: 1.09 [17:14:04< 7:16:21] +[titan] 2025-10-05 15:48:26,569 - root - INFO - step: 28135 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 15:48:26,569 - root - INFO - lr: 1.4253e-05 gnorm: 1.15 [17:14:15< 7:16:09] +[titan] 2025-10-05 15:48:37,430 - root - INFO - step: 28140 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 15:48:37,430 - root - INFO - lr: 1.4246e-05 gnorm: 1.13 [17:14:26< 7:15:58] +[titan] 2025-10-05 15:48:48,297 - root - INFO - step: 28145 loss: 2.0258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7912 +[titan] 2025-10-05 15:48:48,297 - root - INFO - lr: 1.4238e-05 gnorm: 1.13 [17:14:37< 7:15:47] +[titan] 2025-10-05 15:48:56,996 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:48:59,187 - root - INFO - step: 28150 loss: 2.0520 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 15:48:59,187 - root - INFO - lr: 1.4231e-05 gnorm: 1.13 [17:14:48< 7:15:36] +[titan] 2025-10-05 15:49:10,034 - root - INFO - step: 28155 loss: 2.0018 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 15:49:10,035 - root - INFO - lr: 1.4224e-05 gnorm: 1.11 [17:14:59< 7:15:25] +[titan] 2025-10-05 15:49:21,051 - root - INFO - step: 28160 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 29,745 tflops: 412.66 mfu: 41.73% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 15:49:21,051 - root - INFO - lr: 1.4217e-05 gnorm: 1.09 [17:15:10< 7:15:14] +[titan] 2025-10-05 15:49:21,233 - root - INFO - Dumping profiler traces at step 28160 +[titan] 2025-10-05 15:49:21,273 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 15:49:32,111 - root - INFO - step: 28165 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 29,630 tflops: 411.07 mfu: 41.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 15:49:32,111 - root - INFO - lr: 1.4210e-05 gnorm: 1.11 [17:15:21< 7:15:03] +[titan] 2025-10-05 15:49:42,959 - root - INFO - step: 28170 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:49:42,959 - root - INFO - lr: 1.4202e-05 gnorm: 1.08 [17:15:32< 7:14:52] +[titan] 2025-10-05 15:49:53,795 - root - INFO - step: 28175 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 15:49:53,795 - root - INFO - lr: 1.4195e-05 gnorm: 1.14 [17:15:43< 7:14:41] +[titan] 2025-10-05 15:50:04,659 - root - INFO - step: 28180 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7994 +[titan] 2025-10-05 15:50:04,659 - root - INFO - lr: 1.4188e-05 gnorm: 1.10 [17:15:53< 7:14:30] +[titan] 2025-10-05 15:50:15,529 - root - INFO - step: 28185 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 15:50:15,529 - root - INFO - lr: 1.4181e-05 gnorm: 1.09 [17:16:04< 7:14:19] +[titan] 2025-10-05 15:50:26,481 - root - INFO - step: 28190 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 29,919 tflops: 415.08 mfu: 41.97% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 15:50:26,482 - root - INFO - lr: 1.4174e-05 gnorm: 1.11 [17:16:15< 7:14:08] +[titan] 2025-10-05 15:50:37,355 - root - INFO - step: 28195 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 15:50:37,355 - root - INFO - lr: 1.4166e-05 gnorm: 1.06 [17:16:26< 7:13:56] +[titan] 2025-10-05 15:50:46,040 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:50:48,223 - root - INFO - step: 28200 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 15:50:48,223 - root - INFO - lr: 1.4159e-05 gnorm: 1.12 [17:16:37< 7:13:45] +[titan] 2025-10-05 15:50:59,085 - root - INFO - step: 28205 loss: 2.0690 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 15:50:59,085 - root - INFO - lr: 1.4152e-05 gnorm: 1.12 [17:16:48< 7:13:34] +[titan] 2025-10-05 15:51:09,953 - root - INFO - step: 28210 loss: 2.0216 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7870 +[titan] 2025-10-05 15:51:09,953 - root - INFO - lr: 1.4145e-05 gnorm: 1.14 [17:16:59< 7:13:23] +[titan] 2025-10-05 15:51:20,822 - root - INFO - step: 28215 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8086 +[titan] 2025-10-05 15:51:20,822 - root - INFO - lr: 1.4138e-05 gnorm: 1.10 [17:17:10< 7:13:12] +[titan] 2025-10-05 15:51:31,728 - root - INFO - step: 28220 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 15:51:31,728 - root - INFO - lr: 1.4130e-05 gnorm: 1.18 [17:17:20< 7:13:01] +[titan] 2025-10-05 15:51:42,649 - root - INFO - step: 28225 loss: 2.0647 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2398 global_avg_mtp_loss: 1.8249 +[titan] 2025-10-05 15:51:42,650 - root - INFO - lr: 1.4123e-05 gnorm: 1.10 [17:17:31< 7:12:50] +[titan] 2025-10-05 15:51:53,522 - root - INFO - step: 28230 loss: 2.0533 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2380 global_avg_mtp_loss: 1.8153 +[titan] 2025-10-05 15:51:53,522 - root - INFO - lr: 1.4116e-05 gnorm: 1.09 [17:17:42< 7:12:39] +[titan] 2025-10-05 15:52:04,406 - root - INFO - step: 28235 loss: 2.0389 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8020 +[titan] 2025-10-05 15:52:04,406 - root - INFO - lr: 1.4109e-05 gnorm: 1.11 [17:17:53< 7:12:28] +[titan] 2025-10-05 15:52:15,280 - root - INFO - step: 28240 loss: 1.9948 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 15:52:15,281 - root - INFO - lr: 1.4102e-05 gnorm: 1.11 [17:18:04< 7:12:17] +[titan] 2025-10-05 15:52:26,225 - root - INFO - step: 28245 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 29,941 tflops: 415.38 mfu: 42.00% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 15:52:26,225 - root - INFO - lr: 1.4095e-05 gnorm: 1.12 [17:18:15< 7:12:06] +[titan] 2025-10-05 15:52:34,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:52:37,105 - root - INFO - step: 28250 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 15:52:37,105 - root - INFO - lr: 1.4087e-05 gnorm: 1.08 [17:18:26< 7:11:55] +[titan] 2025-10-05 15:52:48,023 - root - INFO - step: 28255 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 15:52:48,023 - root - INFO - lr: 1.4080e-05 gnorm: 1.10 [17:18:37< 7:11:43] +[titan] 2025-10-05 15:52:58,912 - root - INFO - step: 28260 loss: 2.0448 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 15:52:58,913 - root - INFO - lr: 1.4073e-05 gnorm: 1.10 [17:18:48< 7:11:32] +[titan] 2025-10-05 15:53:09,803 - root - INFO - step: 28265 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8124 +[titan] 2025-10-05 15:53:09,803 - root - INFO - lr: 1.4066e-05 gnorm: 1.11 [17:18:59< 7:11:21] +[titan] 2025-10-05 15:53:20,692 - root - INFO - step: 28270 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 15:53:20,692 - root - INFO - lr: 1.4059e-05 gnorm: 1.34 [17:19:09< 7:11:10] +[titan] 2025-10-05 15:53:31,636 - root - INFO - step: 28275 loss: 2.0177 memory: 118.84GiB(85.28%) tps: 29,942 tflops: 415.40 mfu: 42.00% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 15:53:31,637 - root - INFO - lr: 1.4052e-05 gnorm: 1.11 [17:19:20< 7:10:59] +[titan] 2025-10-05 15:53:42,507 - root - INFO - step: 28280 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7702 +[titan] 2025-10-05 15:53:42,508 - root - INFO - lr: 1.4044e-05 gnorm: 1.09 [17:19:31< 7:10:48] +[titan] 2025-10-05 15:53:53,408 - root - INFO - step: 28285 loss: 2.0170 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:53:53,408 - root - INFO - lr: 1.4037e-05 gnorm: 1.12 [17:19:42< 7:10:37] +[titan] 2025-10-05 15:54:04,269 - root - INFO - step: 28290 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 15:54:04,269 - root - INFO - lr: 1.4030e-05 gnorm: 1.11 [17:19:53< 7:10:26] +[titan] 2025-10-05 15:54:15,140 - root - INFO - step: 28295 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7830 +[titan] 2025-10-05 15:54:15,140 - root - INFO - lr: 1.4023e-05 gnorm: 1.09 [17:20:04< 7:10:15] +[titan] 2025-10-05 15:54:23,843 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:54:26,111 - root - INFO - step: 28300 loss: 2.0193 memory: 118.84GiB(85.28%) tps: 29,869 tflops: 414.39 mfu: 41.90% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7860 +[titan] 2025-10-05 15:54:26,111 - root - INFO - lr: 1.4016e-05 gnorm: 1.07 [17:20:15< 7:10:04] +[titan] 2025-10-05 15:54:36,983 - root - INFO - step: 28305 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 15:54:36,984 - root - INFO - lr: 1.4009e-05 gnorm: 1.12 [17:20:26< 7:09:53] +[titan] 2025-10-05 15:54:47,858 - root - INFO - step: 28310 loss: 2.0560 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2387 global_avg_mtp_loss: 1.8174 +[titan] 2025-10-05 15:54:47,858 - root - INFO - lr: 1.4002e-05 gnorm: 1.07 [17:20:37< 7:09:42] +[titan] 2025-10-05 15:54:58,731 - root - INFO - step: 28315 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 15:54:58,732 - root - INFO - lr: 1.3994e-05 gnorm: 1.09 [17:20:47< 7:09:30] +[titan] 2025-10-05 15:55:09,654 - root - INFO - step: 28320 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 15:55:09,655 - root - INFO - lr: 1.3987e-05 gnorm: 1.46 [17:20:58< 7:09:19] +[titan] 2025-10-05 15:55:20,530 - root - INFO - step: 28325 loss: 2.0683 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2392 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 15:55:20,530 - root - INFO - lr: 1.3980e-05 gnorm: 1.12 [17:21:09< 7:09:08] +[titan] 2025-10-05 15:55:31,457 - root - INFO - step: 28330 loss: 1.9576 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7300 +[titan] 2025-10-05 15:55:31,457 - root - INFO - lr: 1.3973e-05 gnorm: 1.10 [17:21:20< 7:08:57] +[titan] 2025-10-05 15:55:42,330 - root - INFO - step: 28335 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 15:55:42,330 - root - INFO - lr: 1.3966e-05 gnorm: 1.12 [17:21:31< 7:08:46] +[titan] 2025-10-05 15:55:53,200 - root - INFO - step: 28340 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 15:55:53,200 - root - INFO - lr: 1.3959e-05 gnorm: 1.15 [17:21:42< 7:08:35] +[titan] 2025-10-05 15:56:04,083 - root - INFO - step: 28345 loss: 2.0214 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 15:56:04,083 - root - INFO - lr: 1.3952e-05 gnorm: 1.17 [17:21:53< 7:08:24] +[titan] 2025-10-05 15:56:12,817 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:56:15,006 - root - INFO - step: 28350 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,001 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 15:56:15,006 - root - INFO - lr: 1.3944e-05 gnorm: 1.14 [17:22:04< 7:08:13] +[titan] 2025-10-05 15:56:25,936 - root - INFO - step: 28355 loss: 1.9838 memory: 118.84GiB(85.28%) tps: 29,980 tflops: 415.92 mfu: 42.05% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 15:56:25,936 - root - INFO - lr: 1.3937e-05 gnorm: 1.10 [17:22:15< 7:08:02] +[titan] 2025-10-05 15:56:36,882 - root - INFO - step: 28360 loss: 2.0896 memory: 118.84GiB(85.28%) tps: 29,937 tflops: 415.34 mfu: 42.00% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8481 +[titan] 2025-10-05 15:56:36,882 - root - INFO - lr: 1.3930e-05 gnorm: 1.15 [17:22:26< 7:07:51] +[titan] 2025-10-05 15:56:47,760 - root - INFO - step: 28365 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 15:56:47,760 - root - INFO - lr: 1.3923e-05 gnorm: 1.11 [17:22:36< 7:07:40] +[titan] 2025-10-05 15:56:58,635 - root - INFO - step: 28370 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 15:56:58,635 - root - INFO - lr: 1.3916e-05 gnorm: 1.09 [17:22:47< 7:07:29] +[titan] 2025-10-05 15:57:09,503 - root - INFO - step: 28375 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7789 +[titan] 2025-10-05 15:57:09,503 - root - INFO - lr: 1.3909e-05 gnorm: 1.05 [17:22:58< 7:07:17] +[titan] 2025-10-05 15:57:20,365 - root - INFO - step: 28380 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7154 +[titan] 2025-10-05 15:57:20,365 - root - INFO - lr: 1.3902e-05 gnorm: 1.13 [17:23:09< 7:07:06] +[titan] 2025-10-05 15:57:31,331 - root - INFO - step: 28385 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 29,881 tflops: 414.56 mfu: 41.92% global_avg_ntp_loss: 0.2353 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 15:57:31,331 - root - INFO - lr: 1.3895e-05 gnorm: 1.12 [17:23:20< 7:06:55] +[titan] 2025-10-05 15:57:42,204 - root - INFO - step: 28390 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 15:57:42,205 - root - INFO - lr: 1.3888e-05 gnorm: 1.13 [17:23:31< 7:06:44] +[titan] 2025-10-05 15:57:53,067 - root - INFO - step: 28395 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 15:57:53,067 - root - INFO - lr: 1.3880e-05 gnorm: 1.09 [17:23:42< 7:06:33] +[titan] 2025-10-05 15:58:01,735 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:58:03,923 - root - INFO - step: 28400 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8317 +[titan] 2025-10-05 15:58:03,923 - root - INFO - lr: 1.3873e-05 gnorm: 1.11 [17:23:53< 7:06:22] +[titan] 2025-10-05 15:58:14,796 - root - INFO - step: 28405 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7815 +[titan] 2025-10-05 15:58:14,796 - root - INFO - lr: 1.3866e-05 gnorm: 1.09 [17:24:04< 7:06:11] +[titan] 2025-10-05 15:58:25,653 - root - INFO - step: 28410 loss: 1.9984 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7669 +[titan] 2025-10-05 15:58:25,653 - root - INFO - lr: 1.3859e-05 gnorm: 1.10 [17:24:14< 7:06:00] +[titan] 2025-10-05 15:58:36,589 - root - INFO - step: 28415 loss: 2.0846 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2427 global_avg_mtp_loss: 1.8419 +[titan] 2025-10-05 15:58:36,589 - root - INFO - lr: 1.3852e-05 gnorm: 1.13 [17:24:25< 7:05:49] +[titan] 2025-10-05 15:58:47,471 - root - INFO - step: 28420 loss: 2.0309 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7957 +[titan] 2025-10-05 15:58:47,471 - root - INFO - lr: 1.3845e-05 gnorm: 1.10 [17:24:36< 7:05:38] +[titan] 2025-10-05 15:58:58,364 - root - INFO - step: 28425 loss: 2.0184 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 15:58:58,364 - root - INFO - lr: 1.3838e-05 gnorm: 1.14 [17:24:47< 7:05:27] +[titan] 2025-10-05 15:59:09,235 - root - INFO - step: 28430 loss: 1.9943 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 15:59:09,235 - root - INFO - lr: 1.3831e-05 gnorm: 1.13 [17:24:58< 7:05:16] +[titan] 2025-10-05 15:59:20,106 - root - INFO - step: 28435 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7855 +[titan] 2025-10-05 15:59:20,106 - root - INFO - lr: 1.3824e-05 gnorm: 1.13 [17:25:09< 7:05:04] +[titan] 2025-10-05 15:59:31,054 - root - INFO - step: 28440 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.25 mfu: 41.99% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7996 +[titan] 2025-10-05 15:59:31,054 - root - INFO - lr: 1.3817e-05 gnorm: 1.10 [17:25:20< 7:04:53] +[titan] 2025-10-05 15:59:41,947 - root - INFO - step: 28445 loss: 2.0638 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8238 +[titan] 2025-10-05 15:59:41,947 - root - INFO - lr: 1.3810e-05 gnorm: 1.16 [17:25:31< 7:04:42] +[titan] 2025-10-05 15:59:50,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 15:59:52,821 - root - INFO - step: 28450 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 15:59:52,821 - root - INFO - lr: 1.3802e-05 gnorm: 1.11 [17:25:42< 7:04:31] +[titan] 2025-10-05 16:00:03,693 - root - INFO - step: 28455 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7793 +[titan] 2025-10-05 16:00:03,693 - root - INFO - lr: 1.3795e-05 gnorm: 1.09 [17:25:52< 7:04:20] +[titan] 2025-10-05 16:00:14,540 - root - INFO - step: 28460 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7692 +[titan] 2025-10-05 16:00:14,540 - root - INFO - lr: 1.3788e-05 gnorm: 1.09 [17:26:03< 7:04:09] +[titan] 2025-10-05 16:00:25,402 - root - INFO - step: 28465 loss: 2.0314 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 16:00:25,402 - root - INFO - lr: 1.3781e-05 gnorm: 1.12 [17:26:14< 7:03:58] +[titan] 2025-10-05 16:00:36,344 - root - INFO - step: 28470 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 29,949 tflops: 415.49 mfu: 42.01% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:00:36,344 - root - INFO - lr: 1.3774e-05 gnorm: 1.11 [17:26:25< 7:03:47] +[titan] 2025-10-05 16:00:47,202 - root - INFO - step: 28475 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:00:47,202 - root - INFO - lr: 1.3767e-05 gnorm: 1.13 [17:26:36< 7:03:36] +[titan] 2025-10-05 16:00:58,120 - root - INFO - step: 28480 loss: 2.0253 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 16:00:58,120 - root - INFO - lr: 1.3760e-05 gnorm: 1.14 [17:26:47< 7:03:25] +[titan] 2025-10-05 16:01:09,012 - root - INFO - step: 28485 loss: 2.0701 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8305 +[titan] 2025-10-05 16:01:09,012 - root - INFO - lr: 1.3753e-05 gnorm: 1.14 [17:26:58< 7:03:14] +[titan] 2025-10-05 16:01:19,903 - root - INFO - step: 28490 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 16:01:19,903 - root - INFO - lr: 1.3746e-05 gnorm: 1.12 [17:27:09< 7:03:03] +[titan] 2025-10-05 16:01:30,819 - root - INFO - step: 28495 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.48 mfu: 42.11% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8001 +[titan] 2025-10-05 16:01:30,819 - root - INFO - lr: 1.3739e-05 gnorm: 1.18 [17:27:20< 7:02:51] +[titan] 2025-10-05 16:01:39,556 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:01:41,746 - root - INFO - step: 28500 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:01:41,746 - root - INFO - lr: 1.3732e-05 gnorm: 1.13 [17:27:30< 7:02:40] +[titan] 2025-10-05 16:01:52,631 - root - INFO - step: 28505 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7896 +[titan] 2025-10-05 16:01:52,631 - root - INFO - lr: 1.3725e-05 gnorm: 1.13 [17:27:41< 7:02:29] +[titan] 2025-10-05 16:02:03,551 - root - INFO - step: 28510 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:02:03,551 - root - INFO - lr: 1.3718e-05 gnorm: 1.12 [17:27:52< 7:02:18] +[titan] 2025-10-05 16:02:14,435 - root - INFO - step: 28515 loss: 2.0118 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7792 +[titan] 2025-10-05 16:02:14,435 - root - INFO - lr: 1.3711e-05 gnorm: 1.16 [17:28:03< 7:02:07] +[titan] 2025-10-05 16:02:25,309 - root - INFO - step: 28520 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 16:02:25,310 - root - INFO - lr: 1.3704e-05 gnorm: 1.09 [17:28:14< 7:01:56] +[titan] 2025-10-05 16:02:36,209 - root - INFO - step: 28525 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 16:02:36,209 - root - INFO - lr: 1.3696e-05 gnorm: 1.09 [17:28:25< 7:01:45] +[titan] 2025-10-05 16:02:47,089 - root - INFO - step: 28530 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 16:02:47,089 - root - INFO - lr: 1.3689e-05 gnorm: 1.11 [17:28:36< 7:01:34] +[titan] 2025-10-05 16:02:57,982 - root - INFO - step: 28535 loss: 2.0168 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7822 +[titan] 2025-10-05 16:02:57,982 - root - INFO - lr: 1.3682e-05 gnorm: 1.14 [17:28:47< 7:01:23] +[titan] 2025-10-05 16:03:08,840 - root - INFO - step: 28540 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 16:03:08,840 - root - INFO - lr: 1.3675e-05 gnorm: 1.13 [17:28:58< 7:01:12] +[titan] 2025-10-05 16:03:19,756 - root - INFO - step: 28545 loss: 2.0706 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8308 +[titan] 2025-10-05 16:03:19,756 - root - INFO - lr: 1.3668e-05 gnorm: 1.15 [17:29:08< 7:01:01] +[titan] 2025-10-05 16:03:28,457 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:03:30,650 - root - INFO - step: 28550 loss: 1.9538 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:03:30,650 - root - INFO - lr: 1.3661e-05 gnorm: 1.08 [17:29:19< 7:00:50] +[titan] 2025-10-05 16:03:41,553 - root - INFO - step: 28555 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7904 +[titan] 2025-10-05 16:03:41,553 - root - INFO - lr: 1.3654e-05 gnorm: 1.11 [17:29:30< 7:00:39] +[titan] 2025-10-05 16:03:52,429 - root - INFO - step: 28560 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7492 +[titan] 2025-10-05 16:03:52,429 - root - INFO - lr: 1.3647e-05 gnorm: 1.07 [17:29:41< 7:00:27] +[titan] 2025-10-05 16:04:03,288 - root - INFO - step: 28565 loss: 2.0252 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 16:04:03,288 - root - INFO - lr: 1.3640e-05 gnorm: 1.10 [17:29:52< 7:00:16] +[titan] 2025-10-05 16:04:14,124 - root - INFO - step: 28570 loss: 2.0463 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2415 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 16:04:14,124 - root - INFO - lr: 1.3633e-05 gnorm: 1.11 [17:30:03< 7:00:05] +[titan] 2025-10-05 16:04:25,006 - root - INFO - step: 28575 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 16:04:25,006 - root - INFO - lr: 1.3626e-05 gnorm: 1.95 [17:30:14< 6:59:54] +[titan] 2025-10-05 16:04:35,875 - root - INFO - step: 28580 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:04:35,875 - root - INFO - lr: 1.3619e-05 gnorm: 1.09 [17:30:25< 6:59:43] +[titan] 2025-10-05 16:04:46,735 - root - INFO - step: 28585 loss: 1.9918 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 16:04:46,735 - root - INFO - lr: 1.3612e-05 gnorm: 1.12 [17:30:35< 6:59:32] +[titan] 2025-10-05 16:04:57,585 - root - INFO - step: 28590 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 16:04:57,585 - root - INFO - lr: 1.3605e-05 gnorm: 1.06 [17:30:46< 6:59:21] +[titan] 2025-10-05 16:05:08,445 - root - INFO - step: 28595 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7867 +[titan] 2025-10-05 16:05:08,445 - root - INFO - lr: 1.3598e-05 gnorm: 1.15 [17:30:57< 6:59:10] +[titan] 2025-10-05 16:05:17,134 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:05:19,314 - root - INFO - step: 28600 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:05:19,314 - root - INFO - lr: 1.3591e-05 gnorm: 1.10 [17:31:08< 6:58:59] +[titan] 2025-10-05 16:05:30,213 - root - INFO - step: 28605 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 16:05:30,213 - root - INFO - lr: 1.3584e-05 gnorm: 1.11 [17:31:19< 6:58:48] +[titan] 2025-10-05 16:05:41,137 - root - INFO - step: 28610 loss: 2.0767 memory: 118.84GiB(85.28%) tps: 29,998 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8360 +[titan] 2025-10-05 16:05:41,137 - root - INFO - lr: 1.3577e-05 gnorm: 1.11 [17:31:30< 6:58:37] +[titan] 2025-10-05 16:05:52,006 - root - INFO - step: 28615 loss: 2.0124 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:05:52,007 - root - INFO - lr: 1.3570e-05 gnorm: 1.10 [17:31:41< 6:58:25] +[titan] 2025-10-05 16:06:02,858 - root - INFO - step: 28620 loss: 2.0642 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2391 global_avg_mtp_loss: 1.8251 +[titan] 2025-10-05 16:06:02,859 - root - INFO - lr: 1.3563e-05 gnorm: 1.12 [17:31:52< 6:58:14] +[titan] 2025-10-05 16:06:13,712 - root - INFO - step: 28625 loss: 2.0174 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 16:06:13,712 - root - INFO - lr: 1.3556e-05 gnorm: 1.10 [17:32:02< 6:58:03] +[titan] 2025-10-05 16:06:24,582 - root - INFO - step: 28630 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 16:06:24,582 - root - INFO - lr: 1.3549e-05 gnorm: 1.11 [17:32:13< 6:57:52] +[titan] 2025-10-05 16:06:35,472 - root - INFO - step: 28635 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7192 +[titan] 2025-10-05 16:06:35,472 - root - INFO - lr: 1.3542e-05 gnorm: 1.12 [17:32:24< 6:57:41] +[titan] 2025-10-05 16:06:46,399 - root - INFO - step: 28640 loss: 2.0089 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7766 +[titan] 2025-10-05 16:06:46,399 - root - INFO - lr: 1.3535e-05 gnorm: 1.13 [17:32:35< 6:57:30] +[titan] 2025-10-05 16:06:57,266 - root - INFO - step: 28645 loss: 2.0456 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8087 +[titan] 2025-10-05 16:06:57,266 - root - INFO - lr: 1.3528e-05 gnorm: 1.16 [17:32:46< 6:57:19] +[titan] 2025-10-05 16:07:05,933 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:07:08,125 - root - INFO - step: 28650 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 16:07:08,125 - root - INFO - lr: 1.3521e-05 gnorm: 1.08 [17:32:57< 6:57:08] +[titan] 2025-10-05 16:07:18,989 - root - INFO - step: 28655 loss: 1.9921 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7620 +[titan] 2025-10-05 16:07:18,989 - root - INFO - lr: 1.3514e-05 gnorm: 1.13 [17:33:08< 6:56:57] +[titan] 2025-10-05 16:07:29,863 - root - INFO - step: 28660 loss: 2.0490 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:07:29,863 - root - INFO - lr: 1.3507e-05 gnorm: 1.11 [17:33:19< 6:56:46] +[titan] 2025-10-05 16:07:40,776 - root - INFO - step: 28665 loss: 2.0607 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2447 global_avg_mtp_loss: 1.8160 +[titan] 2025-10-05 16:07:40,776 - root - INFO - lr: 1.3500e-05 gnorm: 1.24 [17:33:29< 6:56:35] +[titan] 2025-10-05 16:07:51,762 - root - INFO - step: 28670 loss: 2.0573 memory: 118.84GiB(85.28%) tps: 29,828 tflops: 413.81 mfu: 41.84% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8180 +[titan] 2025-10-05 16:07:51,762 - root - INFO - lr: 1.3493e-05 gnorm: 1.20 [17:33:40< 6:56:24] +[titan] 2025-10-05 16:07:56,301 - root - INFO - Dumping profiler traces at step 28672 +[titan] 2025-10-05 16:07:56,339 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:08:02,863 - root - INFO - step: 28675 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,520 tflops: 409.54 mfu: 41.41% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 16:08:02,863 - root - INFO - lr: 1.3486e-05 gnorm: 1.09 [17:33:52< 6:56:13] +[titan] 2025-10-05 16:08:13,727 - root - INFO - step: 28680 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 16:08:13,728 - root - INFO - lr: 1.3479e-05 gnorm: 1.10 [17:34:02< 6:56:01] +[titan] 2025-10-05 16:08:24,599 - root - INFO - step: 28685 loss: 2.0016 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7697 +[titan] 2025-10-05 16:08:24,599 - root - INFO - lr: 1.3472e-05 gnorm: 1.11 [17:34:13< 6:55:50] +[titan] 2025-10-05 16:08:35,472 - root - INFO - step: 28690 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 16:08:35,472 - root - INFO - lr: 1.3465e-05 gnorm: 1.12 [17:34:24< 6:55:39] +[titan] 2025-10-05 16:08:46,365 - root - INFO - step: 28695 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:08:46,366 - root - INFO - lr: 1.3458e-05 gnorm: 1.10 [17:34:35< 6:55:28] +[titan] 2025-10-05 16:08:55,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:08:57,207 - root - INFO - step: 28700 loss: 2.0188 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7841 +[titan] 2025-10-05 16:08:57,207 - root - INFO - lr: 1.3451e-05 gnorm: 1.16 [17:34:46< 6:55:17] +[titan] 2025-10-05 16:09:08,084 - root - INFO - step: 28705 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 16:09:08,084 - root - INFO - lr: 1.3444e-05 gnorm: 1.11 [17:34:57< 6:55:06] +[titan] 2025-10-05 16:09:18,920 - root - INFO - step: 28710 loss: 1.8967 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.55 mfu: 42.42% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 16:09:18,920 - root - INFO - lr: 1.3437e-05 gnorm: 1.12 [17:35:08< 6:54:55] +[titan] 2025-10-05 16:09:29,743 - root - INFO - step: 28715 loss: 2.0460 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8078 +[titan] 2025-10-05 16:09:29,743 - root - INFO - lr: 1.3430e-05 gnorm: 1.24 [17:35:18< 6:54:44] +[titan] 2025-10-05 16:09:40,610 - root - INFO - step: 28720 loss: 2.0359 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:09:40,610 - root - INFO - lr: 1.3423e-05 gnorm: 1.10 [17:35:29< 6:54:33] +[titan] 2025-10-05 16:09:51,475 - root - INFO - step: 28725 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 16:09:51,475 - root - INFO - lr: 1.3416e-05 gnorm: 1.07 [17:35:40< 6:54:22] +[titan] 2025-10-05 16:10:02,333 - root - INFO - step: 28730 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:10:02,333 - root - INFO - lr: 1.3409e-05 gnorm: 1.13 [17:35:51< 6:54:11] +[titan] 2025-10-05 16:10:13,264 - root - INFO - step: 28735 loss: 1.9995 memory: 118.84GiB(85.28%) tps: 29,977 tflops: 415.88 mfu: 42.05% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:10:13,265 - root - INFO - lr: 1.3402e-05 gnorm: 1.14 [17:36:02< 6:54:00] +[titan] 2025-10-05 16:10:24,137 - root - INFO - step: 28740 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:10:24,137 - root - INFO - lr: 1.3395e-05 gnorm: 1.07 [17:36:13< 6:53:48] +[titan] 2025-10-05 16:10:34,996 - root - INFO - step: 28745 loss: 2.0343 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7979 +[titan] 2025-10-05 16:10:34,996 - root - INFO - lr: 1.3389e-05 gnorm: 1.14 [17:36:24< 6:53:37] +[titan] 2025-10-05 16:10:43,932 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:10:46,124 - root - INFO - step: 28750 loss: 2.0411 memory: 118.84GiB(85.28%) tps: 29,446 tflops: 408.52 mfu: 41.31% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8030 +[titan] 2025-10-05 16:10:46,125 - root - INFO - lr: 1.3382e-05 gnorm: 1.10 [17:36:35< 6:53:26] +[titan] 2025-10-05 16:10:56,975 - root - INFO - step: 28755 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7186 +[titan] 2025-10-05 16:10:56,975 - root - INFO - lr: 1.3375e-05 gnorm: 1.11 [17:36:46< 6:53:15] +[titan] 2025-10-05 16:11:07,804 - root - INFO - step: 28760 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7708 +[titan] 2025-10-05 16:11:07,804 - root - INFO - lr: 1.3368e-05 gnorm: 1.13 [17:36:57< 6:53:04] +[titan] 2025-10-05 16:11:18,644 - root - INFO - step: 28765 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 16:11:18,644 - root - INFO - lr: 1.3361e-05 gnorm: 1.14 [17:37:07< 6:52:53] +[titan] 2025-10-05 16:11:29,465 - root - INFO - step: 28770 loss: 2.0694 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2400 global_avg_mtp_loss: 1.8293 +[titan] 2025-10-05 16:11:29,465 - root - INFO - lr: 1.3354e-05 gnorm: 1.11 [17:37:18< 6:52:42] +[titan] 2025-10-05 16:11:40,342 - root - INFO - step: 28775 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 16:11:40,342 - root - INFO - lr: 1.3347e-05 gnorm: 1.10 [17:37:29< 6:52:31] +[titan] 2025-10-05 16:11:51,163 - root - INFO - step: 28780 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.12 mfu: 42.48% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 16:11:51,164 - root - INFO - lr: 1.3340e-05 gnorm: 1.10 [17:37:40< 6:52:20] +[titan] 2025-10-05 16:12:01,972 - root - INFO - step: 28785 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,319 tflops: 420.63 mfu: 42.53% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7549 +[titan] 2025-10-05 16:12:01,972 - root - INFO - lr: 1.3333e-05 gnorm: 1.09 [17:37:51< 6:52:09] +[titan] 2025-10-05 16:12:12,796 - root - INFO - step: 28790 loss: 2.0542 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8164 +[titan] 2025-10-05 16:12:12,796 - root - INFO - lr: 1.3326e-05 gnorm: 1.14 [17:38:01< 6:51:58] +[titan] 2025-10-05 16:12:23,627 - root - INFO - step: 28795 loss: 2.0187 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2448 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 16:12:23,627 - root - INFO - lr: 1.3319e-05 gnorm: 5.74 [17:38:12< 6:51:46] +[titan] 2025-10-05 16:12:32,331 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:12:34,510 - root - INFO - step: 28800 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 16:12:34,510 - root - INFO - lr: 1.3312e-05 gnorm: 1.12 [17:38:23< 6:51:35] +[titan] 2025-10-05 16:12:45,424 - root - INFO - step: 28805 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 16:12:45,424 - root - INFO - lr: 1.3305e-05 gnorm: 1.12 [17:38:34< 6:51:24] +[titan] 2025-10-05 16:12:56,285 - root - INFO - step: 28810 loss: 1.9337 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 16:12:56,285 - root - INFO - lr: 1.3298e-05 gnorm: 1.11 [17:38:45< 6:51:13] +[titan] 2025-10-05 16:13:07,115 - root - INFO - step: 28815 loss: 2.0821 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2414 global_avg_mtp_loss: 1.8406 +[titan] 2025-10-05 16:13:07,115 - root - INFO - lr: 1.3291e-05 gnorm: 1.14 [17:38:56< 6:51:02] +[titan] 2025-10-05 16:13:17,934 - root - INFO - step: 28820 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.8098 +[titan] 2025-10-05 16:13:17,934 - root - INFO - lr: 1.3284e-05 gnorm: 1.14 [17:39:07< 6:50:51] +[titan] 2025-10-05 16:13:28,784 - root - INFO - step: 28825 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7000 +[titan] 2025-10-05 16:13:28,784 - root - INFO - lr: 1.3278e-05 gnorm: 1.09 [17:39:17< 6:50:40] +[titan] 2025-10-05 16:13:39,674 - root - INFO - step: 28830 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 16:13:39,675 - root - INFO - lr: 1.3271e-05 gnorm: 1.13 [17:39:28< 6:50:29] +[titan] 2025-10-05 16:13:50,584 - root - INFO - step: 28835 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7808 +[titan] 2025-10-05 16:13:50,584 - root - INFO - lr: 1.3264e-05 gnorm: 1.14 [17:39:39< 6:50:18] +[titan] 2025-10-05 16:14:01,435 - root - INFO - step: 28840 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.7945 +[titan] 2025-10-05 16:14:01,435 - root - INFO - lr: 1.3257e-05 gnorm: 1.11 [17:39:50< 6:50:07] +[titan] 2025-10-05 16:14:12,264 - root - INFO - step: 28845 loss: 2.0446 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8076 +[titan] 2025-10-05 16:14:12,264 - root - INFO - lr: 1.3250e-05 gnorm: 1.14 [17:40:01< 6:49:56] +[titan] 2025-10-05 16:14:20,892 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:14:23,077 - root - INFO - step: 28850 loss: 2.0780 memory: 118.84GiB(85.28%) tps: 30,305 tflops: 420.44 mfu: 42.51% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8372 +[titan] 2025-10-05 16:14:23,077 - root - INFO - lr: 1.3243e-05 gnorm: 1.13 [17:40:12< 6:49:44] +[titan] 2025-10-05 16:14:33,903 - root - INFO - step: 28855 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 16:14:33,903 - root - INFO - lr: 1.3236e-05 gnorm: 1.10 [17:40:23< 6:49:33] +[titan] 2025-10-05 16:14:44,758 - root - INFO - step: 28860 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 16:14:44,758 - root - INFO - lr: 1.3229e-05 gnorm: 1.18 [17:40:33< 6:49:22] +[titan] 2025-10-05 16:14:55,624 - root - INFO - step: 28865 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 16:14:55,624 - root - INFO - lr: 1.3222e-05 gnorm: 1.13 [17:40:44< 6:49:11] +[titan] 2025-10-05 16:15:06,429 - root - INFO - step: 28870 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,328 tflops: 420.76 mfu: 42.54% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7437 +[titan] 2025-10-05 16:15:06,429 - root - INFO - lr: 1.3215e-05 gnorm: 1.12 [17:40:55< 6:49:00] +[titan] 2025-10-05 16:15:17,255 - root - INFO - step: 28875 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 16:15:17,255 - root - INFO - lr: 1.3209e-05 gnorm: 1.14 [17:41:06< 6:48:49] +[titan] 2025-10-05 16:15:28,083 - root - INFO - step: 28880 loss: 2.0444 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8077 +[titan] 2025-10-05 16:15:28,084 - root - INFO - lr: 1.3202e-05 gnorm: 1.11 [17:41:17< 6:48:38] +[titan] 2025-10-05 16:15:38,890 - root - INFO - step: 28885 loss: 2.0009 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.70 mfu: 42.54% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 16:15:38,890 - root - INFO - lr: 1.3195e-05 gnorm: 1.12 [17:41:28< 6:48:27] +[titan] 2025-10-05 16:15:49,744 - root - INFO - step: 28890 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7735 +[titan] 2025-10-05 16:15:49,744 - root - INFO - lr: 1.3188e-05 gnorm: 1.17 [17:41:38< 6:48:16] +[titan] 2025-10-05 16:16:00,598 - root - INFO - step: 28895 loss: 2.0217 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 16:16:00,598 - root - INFO - lr: 1.3181e-05 gnorm: 1.11 [17:41:49< 6:48:05] +[titan] 2025-10-05 16:16:09,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:16:11,420 - root - INFO - step: 28900 loss: 2.0432 memory: 118.84GiB(85.28%) tps: 30,279 tflops: 420.07 mfu: 42.47% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8061 +[titan] 2025-10-05 16:16:11,421 - root - INFO - lr: 1.3174e-05 gnorm: 1.14 [17:42:00< 6:47:54] +[titan] 2025-10-05 16:16:22,244 - root - INFO - step: 28905 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:16:22,245 - root - INFO - lr: 1.3167e-05 gnorm: 1.10 [17:42:11< 6:47:42] +[titan] 2025-10-05 16:16:33,059 - root - INFO - step: 28910 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 16:16:33,059 - root - INFO - lr: 1.3160e-05 gnorm: 1.14 [17:42:22< 6:47:31] +[titan] 2025-10-05 16:16:43,886 - root - INFO - step: 28915 loss: 1.9331 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7085 +[titan] 2025-10-05 16:16:43,887 - root - INFO - lr: 1.3153e-05 gnorm: 1.09 [17:42:33< 6:47:20] +[titan] 2025-10-05 16:16:54,738 - root - INFO - step: 28920 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 16:16:54,738 - root - INFO - lr: 1.3147e-05 gnorm: 1.09 [17:42:43< 6:47:09] +[titan] 2025-10-05 16:17:05,576 - root - INFO - step: 28925 loss: 2.0233 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.48 mfu: 42.41% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 16:17:05,576 - root - INFO - lr: 1.3140e-05 gnorm: 1.13 [17:42:54< 6:46:58] +[titan] 2025-10-05 16:17:16,436 - root - INFO - step: 28930 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7752 +[titan] 2025-10-05 16:17:16,437 - root - INFO - lr: 1.3133e-05 gnorm: 1.12 [17:43:05< 6:46:47] +[titan] 2025-10-05 16:17:27,262 - root - INFO - step: 28935 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 16:17:27,263 - root - INFO - lr: 1.3126e-05 gnorm: 1.13 [17:43:16< 6:46:36] +[titan] 2025-10-05 16:17:38,068 - root - INFO - step: 28940 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.72 mfu: 42.54% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7700 +[titan] 2025-10-05 16:17:38,069 - root - INFO - lr: 1.3119e-05 gnorm: 1.12 [17:43:27< 6:46:25] +[titan] 2025-10-05 16:17:48,901 - root - INFO - step: 28945 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 16:17:48,902 - root - INFO - lr: 1.3112e-05 gnorm: 1.11 [17:43:38< 6:46:14] +[titan] 2025-10-05 16:17:57,530 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:17:59,707 - root - INFO - step: 28950 loss: 2.0720 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2401 global_avg_mtp_loss: 1.8319 +[titan] 2025-10-05 16:17:59,708 - root - INFO - lr: 1.3105e-05 gnorm: 1.13 [17:43:48< 6:46:03] +[titan] 2025-10-05 16:18:10,530 - root - INFO - step: 28955 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 16:18:10,531 - root - INFO - lr: 1.3099e-05 gnorm: 1.13 [17:43:59< 6:45:51] +[titan] 2025-10-05 16:18:21,362 - root - INFO - step: 28960 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.72 mfu: 42.44% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7806 +[titan] 2025-10-05 16:18:21,362 - root - INFO - lr: 1.3092e-05 gnorm: 1.11 [17:44:10< 6:45:40] +[titan] 2025-10-05 16:18:32,177 - root - INFO - step: 28965 loss: 2.0315 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7954 +[titan] 2025-10-05 16:18:32,177 - root - INFO - lr: 1.3085e-05 gnorm: 1.15 [17:44:21< 6:45:29] +[titan] 2025-10-05 16:18:43,001 - root - INFO - step: 28970 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7932 +[titan] 2025-10-05 16:18:43,002 - root - INFO - lr: 1.3078e-05 gnorm: 1.12 [17:44:32< 6:45:18] +[titan] 2025-10-05 16:18:53,823 - root - INFO - step: 28975 loss: 2.0205 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7847 +[titan] 2025-10-05 16:18:53,823 - root - INFO - lr: 1.3071e-05 gnorm: 1.15 [17:44:43< 6:45:07] +[titan] 2025-10-05 16:19:04,658 - root - INFO - step: 28980 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7964 +[titan] 2025-10-05 16:19:04,658 - root - INFO - lr: 1.3064e-05 gnorm: 1.09 [17:44:53< 6:44:56] +[titan] 2025-10-05 16:19:15,441 - root - INFO - step: 28985 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,389 tflops: 421.61 mfu: 42.63% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 16:19:15,441 - root - INFO - lr: 1.3057e-05 gnorm: 1.14 [17:45:04< 6:44:45] +[titan] 2025-10-05 16:19:26,267 - root - INFO - step: 28990 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:19:26,267 - root - INFO - lr: 1.3051e-05 gnorm: 1.12 [17:45:15< 6:44:34] +[titan] 2025-10-05 16:19:37,046 - root - INFO - step: 28995 loss: 1.9800 memory: 118.84GiB(85.28%) tps: 30,400 tflops: 421.76 mfu: 42.65% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 16:19:37,046 - root - INFO - lr: 1.3044e-05 gnorm: 1.09 [17:45:26< 6:44:23] +[titan] 2025-10-05 16:19:45,671 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:19:47,884 - root - INFO - step: 29000 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 16:19:47,884 - root - INFO - lr: 1.3037e-05 gnorm: 1.10 [17:45:37< 6:44:11] +[titan] 2025-10-05 16:19:58,682 - root - INFO - step: 29005 loss: 2.0376 memory: 118.84GiB(85.28%) tps: 30,347 tflops: 421.02 mfu: 42.57% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8015 +[titan] 2025-10-05 16:19:58,682 - root - INFO - lr: 1.3030e-05 gnorm: 1.14 [17:45:47< 6:44:00] +[titan] 2025-10-05 16:20:09,482 - root - INFO - step: 29010 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 16:20:09,482 - root - INFO - lr: 1.3023e-05 gnorm: 1.08 [17:45:58< 6:43:49] +[titan] 2025-10-05 16:20:20,322 - root - INFO - step: 29015 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 16:20:20,323 - root - INFO - lr: 1.3016e-05 gnorm: 1.11 [17:46:09< 6:43:38] +[titan] 2025-10-05 16:20:31,122 - root - INFO - step: 29020 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 16:20:31,122 - root - INFO - lr: 1.3010e-05 gnorm: 1.14 [17:46:20< 6:43:27] +[titan] 2025-10-05 16:20:42,001 - root - INFO - step: 29025 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7709 +[titan] 2025-10-05 16:20:42,001 - root - INFO - lr: 1.3003e-05 gnorm: 1.11 [17:46:31< 6:43:16] +[titan] 2025-10-05 16:20:52,862 - root - INFO - step: 29030 loss: 2.0661 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8248 +[titan] 2025-10-05 16:20:52,862 - root - INFO - lr: 1.2996e-05 gnorm: 1.17 [17:46:42< 6:43:05] +[titan] 2025-10-05 16:21:03,692 - root - INFO - step: 29035 loss: 2.0489 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.45% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8113 +[titan] 2025-10-05 16:21:03,692 - root - INFO - lr: 1.2989e-05 gnorm: 1.19 [17:46:52< 6:42:54] +[titan] 2025-10-05 16:21:14,546 - root - INFO - step: 29040 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 16:21:14,546 - root - INFO - lr: 1.2982e-05 gnorm: 1.13 [17:47:03< 6:42:43] +[titan] 2025-10-05 16:21:25,382 - root - INFO - step: 29045 loss: 2.0710 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2408 global_avg_mtp_loss: 1.8302 +[titan] 2025-10-05 16:21:25,382 - root - INFO - lr: 1.2975e-05 gnorm: 1.11 [17:47:14< 6:42:32] +[titan] 2025-10-05 16:21:34,010 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:21:36,183 - root - INFO - step: 29050 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7903 +[titan] 2025-10-05 16:21:36,183 - root - INFO - lr: 1.2969e-05 gnorm: 1.15 [17:47:25< 6:42:21] +[titan] 2025-10-05 16:21:47,040 - root - INFO - step: 29055 loss: 2.0708 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8311 +[titan] 2025-10-05 16:21:47,040 - root - INFO - lr: 1.2962e-05 gnorm: 1.13 [17:47:36< 6:42:09] +[titan] 2025-10-05 16:21:57,945 - root - INFO - step: 29060 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7823 +[titan] 2025-10-05 16:21:57,945 - root - INFO - lr: 1.2955e-05 gnorm: 1.09 [17:47:47< 6:41:58] +[titan] 2025-10-05 16:22:08,763 - root - INFO - step: 29065 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 16:22:08,763 - root - INFO - lr: 1.2948e-05 gnorm: 1.10 [17:47:57< 6:41:47] +[titan] 2025-10-05 16:22:19,602 - root - INFO - step: 29070 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 16:22:19,603 - root - INFO - lr: 1.2941e-05 gnorm: 1.12 [17:48:08< 6:41:36] +[titan] 2025-10-05 16:22:30,424 - root - INFO - step: 29075 loss: 1.9436 memory: 118.84GiB(85.28%) tps: 30,282 tflops: 420.11 mfu: 42.48% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 16:22:30,424 - root - INFO - lr: 1.2935e-05 gnorm: 1.08 [17:48:19< 6:41:25] +[titan] 2025-10-05 16:22:41,221 - root - INFO - step: 29080 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 30,349 tflops: 421.04 mfu: 42.57% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7914 +[titan] 2025-10-05 16:22:41,221 - root - INFO - lr: 1.2928e-05 gnorm: 1.12 [17:48:30< 6:41:14] +[titan] 2025-10-05 16:22:52,143 - root - INFO - step: 29085 loss: 2.0455 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2383 global_avg_mtp_loss: 1.8072 +[titan] 2025-10-05 16:22:52,143 - root - INFO - lr: 1.2921e-05 gnorm: 1.13 [17:48:41< 6:41:03] +[titan] 2025-10-05 16:23:02,963 - root - INFO - step: 29090 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 16:23:02,964 - root - INFO - lr: 1.2914e-05 gnorm: 1.15 [17:48:52< 6:40:52] +[titan] 2025-10-05 16:23:13,781 - root - INFO - step: 29095 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7612 +[titan] 2025-10-05 16:23:13,781 - root - INFO - lr: 1.2907e-05 gnorm: 1.12 [17:49:02< 6:40:41] +[titan] 2025-10-05 16:23:22,389 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:23:24,561 - root - INFO - step: 29100 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,396 tflops: 421.70 mfu: 42.64% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 16:23:24,562 - root - INFO - lr: 1.2901e-05 gnorm: 1.15 [17:49:13< 6:40:30] +[titan] 2025-10-05 16:23:35,362 - root - INFO - step: 29105 loss: 2.0180 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:23:35,363 - root - INFO - lr: 1.2894e-05 gnorm: 1.15 [17:49:24< 6:40:18] +[titan] 2025-10-05 16:23:46,147 - root - INFO - step: 29110 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,385 tflops: 421.54 mfu: 42.62% global_avg_ntp_loss: 0.2409 global_avg_mtp_loss: 1.7864 +[titan] 2025-10-05 16:23:46,147 - root - INFO - lr: 1.2887e-05 gnorm: 1.13 [17:49:35< 6:40:07] +[titan] 2025-10-05 16:23:56,986 - root - INFO - step: 29115 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 16:23:56,986 - root - INFO - lr: 1.2880e-05 gnorm: 1.11 [17:49:46< 6:39:56] +[titan] 2025-10-05 16:24:07,804 - root - INFO - step: 29120 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 16:24:07,804 - root - INFO - lr: 1.2873e-05 gnorm: 1.09 [17:49:56< 6:39:45] +[titan] 2025-10-05 16:24:18,657 - root - INFO - step: 29125 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 16:24:18,657 - root - INFO - lr: 1.2867e-05 gnorm: 1.08 [17:50:07< 6:39:34] +[titan] 2025-10-05 16:24:29,461 - root - INFO - step: 29130 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.79 mfu: 42.55% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 16:24:29,461 - root - INFO - lr: 1.2860e-05 gnorm: 1.11 [17:50:18< 6:39:23] +[titan] 2025-10-05 16:24:40,248 - root - INFO - step: 29135 loss: 2.0370 memory: 118.84GiB(85.28%) tps: 30,379 tflops: 421.46 mfu: 42.61% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8003 +[titan] 2025-10-05 16:24:40,248 - root - INFO - lr: 1.2853e-05 gnorm: 1.23 [17:50:29< 6:39:12] +[titan] 2025-10-05 16:24:51,066 - root - INFO - step: 29140 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,290 tflops: 420.23 mfu: 42.49% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 16:24:51,066 - root - INFO - lr: 1.2846e-05 gnorm: 1.11 [17:50:40< 6:39:01] +[titan] 2025-10-05 16:25:01,882 - root - INFO - step: 29145 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,297 tflops: 420.33 mfu: 42.50% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 16:25:01,882 - root - INFO - lr: 1.2840e-05 gnorm: 1.14 [17:50:51< 6:38:50] +[titan] 2025-10-05 16:25:10,529 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:25:12,729 - root - INFO - step: 29150 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:12,730 - root - INFO - lr: 1.2833e-05 gnorm: 1.16 [17:51:01< 6:38:39] +[titan] 2025-10-05 16:25:23,552 - root - INFO - step: 29155 loss: 1.9771 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.06 mfu: 42.47% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:25:23,552 - root - INFO - lr: 1.2826e-05 gnorm: 1.11 [17:51:12< 6:38:27] +[titan] 2025-10-05 16:25:34,364 - root - INFO - step: 29160 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 16:25:34,364 - root - INFO - lr: 1.2819e-05 gnorm: 1.13 [17:51:23< 6:38:16] +[titan] 2025-10-05 16:25:45,141 - root - INFO - step: 29165 loss: 2.0035 memory: 118.84GiB(85.28%) tps: 30,406 tflops: 421.83 mfu: 42.65% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:25:45,141 - root - INFO - lr: 1.2813e-05 gnorm: 1.10 [17:51:34< 6:38:05] +[titan] 2025-10-05 16:25:55,942 - root - INFO - step: 29170 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 16:25:55,943 - root - INFO - lr: 1.2806e-05 gnorm: 1.12 [17:51:45< 6:37:54] +[titan] 2025-10-05 16:26:06,754 - root - INFO - step: 29175 loss: 2.0176 memory: 118.84GiB(85.28%) tps: 30,309 tflops: 420.49 mfu: 42.52% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7832 +[titan] 2025-10-05 16:26:06,754 - root - INFO - lr: 1.2799e-05 gnorm: 1.13 [17:51:55< 6:37:43] +[titan] 2025-10-05 16:26:17,565 - root - INFO - step: 29180 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,310 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 16:26:17,566 - root - INFO - lr: 1.2792e-05 gnorm: 1.11 [17:52:06< 6:37:32] +[titan] 2025-10-05 16:26:26,493 - root - INFO - Dumping profiler traces at step 29184 +[titan] 2025-10-05 16:26:26,533 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:26:28,703 - root - INFO - step: 29185 loss: 2.0239 memory: 118.84GiB(85.28%) tps: 29,423 tflops: 408.20 mfu: 41.27% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:26:28,703 - root - INFO - lr: 1.2786e-05 gnorm: 1.13 [17:52:17< 6:37:21] +[titan] 2025-10-05 16:26:39,480 - root - INFO - step: 29190 loss: 2.0459 memory: 118.84GiB(85.28%) tps: 30,405 tflops: 421.82 mfu: 42.65% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8084 +[titan] 2025-10-05 16:26:39,480 - root - INFO - lr: 1.2779e-05 gnorm: 1.08 [17:52:28< 6:37:10] +[titan] 2025-10-05 16:26:50,282 - root - INFO - step: 29195 loss: 2.0110 memory: 118.84GiB(85.28%) tps: 30,338 tflops: 420.90 mfu: 42.56% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7780 +[titan] 2025-10-05 16:26:50,282 - root - INFO - lr: 1.2772e-05 gnorm: 1.11 [17:52:39< 6:36:59] +[titan] 2025-10-05 16:26:58,914 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:27:01,083 - root - INFO - step: 29200 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:27:01,084 - root - INFO - lr: 1.2765e-05 gnorm: 1.10 [17:52:50< 6:36:48] +[titan] 2025-10-05 16:27:11,900 - root - INFO - step: 29205 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,295 tflops: 420.29 mfu: 42.50% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:27:11,900 - root - INFO - lr: 1.2759e-05 gnorm: 1.11 [17:53:01< 6:36:37] +[titan] 2025-10-05 16:27:22,704 - root - INFO - step: 29210 loss: 2.0293 memory: 118.84GiB(85.28%) tps: 30,331 tflops: 420.80 mfu: 42.55% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:27:22,704 - root - INFO - lr: 1.2752e-05 gnorm: 1.13 [17:53:11< 6:36:25] +[titan] 2025-10-05 16:27:33,520 - root - INFO - step: 29215 loss: 1.9806 memory: 118.84GiB(85.28%) tps: 30,296 tflops: 420.32 mfu: 42.50% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 16:27:33,520 - root - INFO - lr: 1.2745e-05 gnorm: 1.13 [17:53:22< 6:36:14] +[titan] 2025-10-05 16:27:44,344 - root - INFO - step: 29220 loss: 2.0330 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.04 mfu: 42.47% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:27:44,344 - root - INFO - lr: 1.2738e-05 gnorm: 1.11 [17:53:33< 6:36:03] +[titan] 2025-10-05 16:27:55,246 - root - INFO - step: 29225 loss: 2.0435 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 16:27:55,246 - root - INFO - lr: 1.2732e-05 gnorm: 1.14 [17:53:44< 6:35:52] +[titan] 2025-10-05 16:28:06,063 - root - INFO - step: 29230 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.26 mfu: 42.49% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 16:28:06,064 - root - INFO - lr: 1.2725e-05 gnorm: 1.10 [17:53:55< 6:35:41] +[titan] 2025-10-05 16:28:16,881 - root - INFO - step: 29235 loss: 1.9977 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7668 +[titan] 2025-10-05 16:28:16,882 - root - INFO - lr: 1.2718e-05 gnorm: 1.12 [17:54:06< 6:35:30] +[titan] 2025-10-05 16:28:27,741 - root - INFO - step: 29240 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 16:28:27,741 - root - INFO - lr: 1.2711e-05 gnorm: 1.12 [17:54:16< 6:35:19] +[titan] 2025-10-05 16:28:38,608 - root - INFO - step: 29245 loss: 2.0048 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7728 +[titan] 2025-10-05 16:28:38,608 - root - INFO - lr: 1.2705e-05 gnorm: 1.14 [17:54:27< 6:35:08] +[titan] 2025-10-05 16:28:47,295 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:28:49,481 - root - INFO - step: 29250 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7535 +[titan] 2025-10-05 16:28:49,482 - root - INFO - lr: 1.2698e-05 gnorm: 1.12 [17:54:38< 6:34:57] +[titan] 2025-10-05 16:29:00,345 - root - INFO - step: 29255 loss: 2.0993 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8569 +[titan] 2025-10-05 16:29:00,345 - root - INFO - lr: 1.2691e-05 gnorm: 1.13 [17:54:49< 6:34:46] +[titan] 2025-10-05 16:29:11,181 - root - INFO - step: 29260 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 16:29:11,181 - root - INFO - lr: 1.2684e-05 gnorm: 1.10 [17:55:00< 6:34:35] +[titan] 2025-10-05 16:29:22,010 - root - INFO - step: 29265 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.82 mfu: 42.45% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 16:29:22,010 - root - INFO - lr: 1.2678e-05 gnorm: 1.10 [17:55:11< 6:34:24] +[titan] 2025-10-05 16:29:32,844 - root - INFO - step: 29270 loss: 2.0759 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2419 global_avg_mtp_loss: 1.8340 +[titan] 2025-10-05 16:29:32,845 - root - INFO - lr: 1.2671e-05 gnorm: 1.14 [17:55:21< 6:34:12] +[titan] 2025-10-05 16:29:43,662 - root - INFO - step: 29275 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 16:29:43,662 - root - INFO - lr: 1.2664e-05 gnorm: 1.10 [17:55:32< 6:34:01] +[titan] 2025-10-05 16:29:54,552 - root - INFO - step: 29280 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 16:29:54,553 - root - INFO - lr: 1.2658e-05 gnorm: 1.15 [17:55:43< 6:33:50] +[titan] 2025-10-05 16:30:05,442 - root - INFO - step: 29285 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7976 +[titan] 2025-10-05 16:30:05,442 - root - INFO - lr: 1.2651e-05 gnorm: 1.15 [17:55:54< 6:33:39] +[titan] 2025-10-05 16:30:16,285 - root - INFO - step: 29290 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 16:30:16,285 - root - INFO - lr: 1.2644e-05 gnorm: 1.13 [17:56:05< 6:33:28] +[titan] 2025-10-05 16:30:27,122 - root - INFO - step: 29295 loss: 2.0538 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.50 mfu: 42.42% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8162 +[titan] 2025-10-05 16:30:27,122 - root - INFO - lr: 1.2638e-05 gnorm: 1.16 [17:56:16< 6:33:17] +[titan] 2025-10-05 16:30:35,789 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:30:37,974 - root - INFO - step: 29300 loss: 2.0301 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7943 +[titan] 2025-10-05 16:30:37,975 - root - INFO - lr: 1.2631e-05 gnorm: 1.13 [17:56:27< 6:33:06] +[titan] 2025-10-05 16:30:48,835 - root - INFO - step: 29305 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 16:30:48,835 - root - INFO - lr: 1.2624e-05 gnorm: 1.12 [17:56:37< 6:32:55] +[titan] 2025-10-05 16:30:59,735 - root - INFO - step: 29310 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 16:30:59,736 - root - INFO - lr: 1.2617e-05 gnorm: 1.16 [17:56:48< 6:32:44] +[titan] 2025-10-05 16:31:10,585 - root - INFO - step: 29315 loss: 2.0197 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7857 +[titan] 2025-10-05 16:31:10,585 - root - INFO - lr: 1.2611e-05 gnorm: 1.11 [17:56:59< 6:32:33] +[titan] 2025-10-05 16:31:21,451 - root - INFO - step: 29320 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 16:31:21,452 - root - INFO - lr: 1.2604e-05 gnorm: 1.14 [17:57:10< 6:32:22] +[titan] 2025-10-05 16:31:32,282 - root - INFO - step: 29325 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 16:31:32,283 - root - INFO - lr: 1.2597e-05 gnorm: 1.08 [17:57:21< 6:32:11] +[titan] 2025-10-05 16:31:43,142 - root - INFO - step: 29330 loss: 2.0286 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7937 +[titan] 2025-10-05 16:31:43,143 - root - INFO - lr: 1.2591e-05 gnorm: 1.15 [17:57:32< 6:31:59] +[titan] 2025-10-05 16:31:54,012 - root - INFO - step: 29335 loss: 2.0589 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8204 +[titan] 2025-10-05 16:31:54,012 - root - INFO - lr: 1.2584e-05 gnorm: 1.12 [17:57:43< 6:31:48] +[titan] 2025-10-05 16:32:04,880 - root - INFO - step: 29340 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 16:32:04,880 - root - INFO - lr: 1.2577e-05 gnorm: 1.18 [17:57:54< 6:31:37] +[titan] 2025-10-05 16:32:15,774 - root - INFO - step: 29345 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 16:32:15,774 - root - INFO - lr: 1.2571e-05 gnorm: 1.14 [17:58:04< 6:31:26] +[titan] 2025-10-05 16:32:24,447 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:32:26,631 - root - INFO - step: 29350 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 16:32:26,632 - root - INFO - lr: 1.2564e-05 gnorm: 1.11 [17:58:15< 6:31:15] +[titan] 2025-10-05 16:32:37,480 - root - INFO - step: 29355 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:32:37,480 - root - INFO - lr: 1.2557e-05 gnorm: 1.10 [17:58:26< 6:31:04] +[titan] 2025-10-05 16:32:48,323 - root - INFO - step: 29360 loss: 2.0498 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8116 +[titan] 2025-10-05 16:32:48,323 - root - INFO - lr: 1.2551e-05 gnorm: 1.14 [17:58:37< 6:30:53] +[titan] 2025-10-05 16:32:59,199 - root - INFO - step: 29365 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7845 +[titan] 2025-10-05 16:32:59,199 - root - INFO - lr: 1.2544e-05 gnorm: 1.13 [17:58:48< 6:30:42] +[titan] 2025-10-05 16:33:10,048 - root - INFO - step: 29370 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 16:33:10,048 - root - INFO - lr: 1.2537e-05 gnorm: 1.12 [17:58:59< 6:30:31] +[titan] 2025-10-05 16:33:20,934 - root - INFO - step: 29375 loss: 2.0512 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2375 global_avg_mtp_loss: 1.8137 +[titan] 2025-10-05 16:33:20,934 - root - INFO - lr: 1.2531e-05 gnorm: 1.15 [17:59:10< 6:30:20] +[titan] 2025-10-05 16:33:31,794 - root - INFO - step: 29380 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7797 +[titan] 2025-10-05 16:33:31,794 - root - INFO - lr: 1.2524e-05 gnorm: 1.11 [17:59:20< 6:30:09] +[titan] 2025-10-05 16:33:42,652 - root - INFO - step: 29385 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7647 +[titan] 2025-10-05 16:33:42,652 - root - INFO - lr: 1.2517e-05 gnorm: 1.13 [17:59:31< 6:29:58] +[titan] 2025-10-05 16:33:53,484 - root - INFO - step: 29390 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,253 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 16:33:53,484 - root - INFO - lr: 1.2511e-05 gnorm: 1.15 [17:59:42< 6:29:46] +[titan] 2025-10-05 16:34:04,355 - root - INFO - step: 29395 loss: 2.0273 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7921 +[titan] 2025-10-05 16:34:04,355 - root - INFO - lr: 1.2504e-05 gnorm: 1.11 [17:59:53< 6:29:35] +[titan] 2025-10-05 16:34:13,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:34:15,217 - root - INFO - step: 29400 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 16:34:15,218 - root - INFO - lr: 1.2497e-05 gnorm: 1.12 [18:00:04< 6:29:24] +[titan] 2025-10-05 16:34:26,084 - root - INFO - step: 29405 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 16:34:26,084 - root - INFO - lr: 1.2491e-05 gnorm: 1.13 [18:00:15< 6:29:13] +[titan] 2025-10-05 16:34:36,985 - root - INFO - step: 29410 loss: 1.9746 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:34:36,985 - root - INFO - lr: 1.2484e-05 gnorm: 1.14 [18:00:26< 6:29:02] +[titan] 2025-10-05 16:34:47,862 - root - INFO - step: 29415 loss: 2.0017 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7703 +[titan] 2025-10-05 16:34:47,862 - root - INFO - lr: 1.2477e-05 gnorm: 1.14 [18:00:37< 6:28:51] +[titan] 2025-10-05 16:34:58,716 - root - INFO - step: 29420 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7676 +[titan] 2025-10-05 16:34:58,716 - root - INFO - lr: 1.2471e-05 gnorm: 1.10 [18:00:47< 6:28:40] +[titan] 2025-10-05 16:35:09,613 - root - INFO - step: 29425 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 16:35:09,613 - root - INFO - lr: 1.2464e-05 gnorm: 1.13 [18:00:58< 6:28:29] +[titan] 2025-10-05 16:35:20,487 - root - INFO - step: 29430 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7675 +[titan] 2025-10-05 16:35:20,488 - root - INFO - lr: 1.2457e-05 gnorm: 1.12 [18:01:09< 6:28:18] +[titan] 2025-10-05 16:35:31,364 - root - INFO - step: 29435 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:35:31,364 - root - INFO - lr: 1.2451e-05 gnorm: 1.13 [18:01:20< 6:28:07] +[titan] 2025-10-05 16:35:42,266 - root - INFO - step: 29440 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 16:35:42,266 - root - INFO - lr: 1.2444e-05 gnorm: 1.13 [18:01:31< 6:27:56] +[titan] 2025-10-05 16:35:53,139 - root - INFO - step: 29445 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 16:35:53,139 - root - INFO - lr: 1.2438e-05 gnorm: 1.10 [18:01:42< 6:27:45] +[titan] 2025-10-05 16:36:01,852 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:36:04,036 - root - INFO - step: 29450 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:36:04,036 - root - INFO - lr: 1.2431e-05 gnorm: 1.10 [18:01:53< 6:27:34] +[titan] 2025-10-05 16:36:14,913 - root - INFO - step: 29455 loss: 2.0103 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 16:36:14,913 - root - INFO - lr: 1.2424e-05 gnorm: 1.13 [18:02:04< 6:27:23] +[titan] 2025-10-05 16:36:25,795 - root - INFO - step: 29460 loss: 2.0213 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7869 +[titan] 2025-10-05 16:36:25,795 - root - INFO - lr: 1.2418e-05 gnorm: 1.13 [18:02:14< 6:27:11] +[titan] 2025-10-05 16:36:36,668 - root - INFO - step: 29465 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 16:36:36,669 - root - INFO - lr: 1.2411e-05 gnorm: 1.14 [18:02:25< 6:27:00] +[titan] 2025-10-05 16:36:47,594 - root - INFO - step: 29470 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 29,993 tflops: 416.10 mfu: 42.07% global_avg_ntp_loss: 0.2363 global_avg_mtp_loss: 1.8032 +[titan] 2025-10-05 16:36:47,594 - root - INFO - lr: 1.2404e-05 gnorm: 1.17 [18:02:36< 6:26:49] +[titan] 2025-10-05 16:36:58,488 - root - INFO - step: 29475 loss: 2.0667 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8272 +[titan] 2025-10-05 16:36:58,488 - root - INFO - lr: 1.2398e-05 gnorm: 1.14 [18:02:47< 6:26:38] +[titan] 2025-10-05 16:37:09,396 - root - INFO - step: 29480 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 16:37:09,396 - root - INFO - lr: 1.2391e-05 gnorm: 1.10 [18:02:58< 6:26:27] +[titan] 2025-10-05 16:37:20,276 - root - INFO - step: 29485 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7523 +[titan] 2025-10-05 16:37:20,276 - root - INFO - lr: 1.2385e-05 gnorm: 1.14 [18:03:09< 6:26:16] +[titan] 2025-10-05 16:37:31,149 - root - INFO - step: 29490 loss: 2.0012 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 16:37:31,149 - root - INFO - lr: 1.2378e-05 gnorm: 1.18 [18:03:20< 6:26:05] +[titan] 2025-10-05 16:37:42,032 - root - INFO - step: 29495 loss: 1.9702 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 16:37:42,032 - root - INFO - lr: 1.2371e-05 gnorm: 1.12 [18:03:31< 6:25:54] +[titan] 2025-10-05 16:37:50,726 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:37:52,909 - root - INFO - step: 29500 loss: 2.0482 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2381 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 16:37:52,910 - root - INFO - lr: 1.2365e-05 gnorm: 1.18 [18:03:42< 6:25:43] +[titan] 2025-10-05 16:38:03,862 - root - INFO - step: 29505 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.07 mfu: 41.97% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7779 +[titan] 2025-10-05 16:38:03,862 - root - INFO - lr: 1.2358e-05 gnorm: 1.08 [18:03:52< 6:25:32] +[titan] 2025-10-05 16:38:14,737 - root - INFO - step: 29510 loss: 2.0280 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7920 +[titan] 2025-10-05 16:38:14,737 - root - INFO - lr: 1.2352e-05 gnorm: 1.12 [18:04:03< 6:25:21] +[titan] 2025-10-05 16:38:25,629 - root - INFO - step: 29515 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 16:38:25,629 - root - INFO - lr: 1.2345e-05 gnorm: 1.10 [18:04:14< 6:25:10] +[titan] 2025-10-05 16:38:36,496 - root - INFO - step: 29520 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2373 global_avg_mtp_loss: 1.7983 +[titan] 2025-10-05 16:38:36,497 - root - INFO - lr: 1.2338e-05 gnorm: 1.14 [18:04:25< 6:24:59] +[titan] 2025-10-05 16:38:47,375 - root - INFO - step: 29525 loss: 2.0360 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2354 global_avg_mtp_loss: 1.8006 +[titan] 2025-10-05 16:38:47,375 - root - INFO - lr: 1.2332e-05 gnorm: 1.12 [18:04:36< 6:24:48] +[titan] 2025-10-05 16:38:58,269 - root - INFO - step: 29530 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7887 +[titan] 2025-10-05 16:38:58,270 - root - INFO - lr: 1.2325e-05 gnorm: 1.14 [18:04:47< 6:24:37] +[titan] 2025-10-05 16:39:09,198 - root - INFO - step: 29535 loss: 2.0181 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:39:09,198 - root - INFO - lr: 1.2319e-05 gnorm: 1.14 [18:04:58< 6:24:25] +[titan] 2025-10-05 16:39:20,067 - root - INFO - step: 29540 loss: 2.0010 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7690 +[titan] 2025-10-05 16:39:20,068 - root - INFO - lr: 1.2312e-05 gnorm: 1.11 [18:05:09< 6:24:14] +[titan] 2025-10-05 16:39:30,927 - root - INFO - step: 29545 loss: 1.9548 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:39:30,927 - root - INFO - lr: 1.2305e-05 gnorm: 1.08 [18:05:20< 6:24:03] +[titan] 2025-10-05 16:39:39,602 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:39:41,783 - root - INFO - step: 29550 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 16:39:41,783 - root - INFO - lr: 1.2299e-05 gnorm: 1.16 [18:05:30< 6:23:52] +[titan] 2025-10-05 16:39:52,647 - root - INFO - step: 29555 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:39:52,647 - root - INFO - lr: 1.2292e-05 gnorm: 1.11 [18:05:41< 6:23:41] +[titan] 2025-10-05 16:40:03,511 - root - INFO - step: 29560 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 16:40:03,511 - root - INFO - lr: 1.2286e-05 gnorm: 1.11 [18:05:52< 6:23:30] +[titan] 2025-10-05 16:40:14,393 - root - INFO - step: 29565 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 16:40:14,393 - root - INFO - lr: 1.2279e-05 gnorm: 1.09 [18:06:03< 6:23:19] +[titan] 2025-10-05 16:40:25,289 - root - INFO - step: 29570 loss: 2.0000 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 16:40:25,289 - root - INFO - lr: 1.2273e-05 gnorm: 1.15 [18:06:14< 6:23:08] +[titan] 2025-10-05 16:40:36,151 - root - INFO - step: 29575 loss: 2.0480 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8104 +[titan] 2025-10-05 16:40:36,151 - root - INFO - lr: 1.2266e-05 gnorm: 1.12 [18:06:25< 6:22:57] +[titan] 2025-10-05 16:40:47,014 - root - INFO - step: 29580 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 16:40:47,014 - root - INFO - lr: 1.2259e-05 gnorm: 1.15 [18:06:36< 6:22:46] +[titan] 2025-10-05 16:40:57,884 - root - INFO - step: 29585 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7835 +[titan] 2025-10-05 16:40:57,884 - root - INFO - lr: 1.2253e-05 gnorm: 1.13 [18:06:47< 6:22:35] +[titan] 2025-10-05 16:41:08,765 - root - INFO - step: 29590 loss: 1.9964 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 16:41:08,765 - root - INFO - lr: 1.2246e-05 gnorm: 1.12 [18:06:57< 6:22:24] +[titan] 2025-10-05 16:41:19,628 - root - INFO - step: 29595 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 16:41:19,628 - root - INFO - lr: 1.2240e-05 gnorm: 1.14 [18:07:08< 6:22:13] +[titan] 2025-10-05 16:41:28,344 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:41:30,522 - root - INFO - step: 29600 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 16:41:30,522 - root - INFO - lr: 1.2233e-05 gnorm: 1.11 [18:07:19< 6:22:02] +[titan] 2025-10-05 16:41:41,388 - root - INFO - step: 29605 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 16:41:41,389 - root - INFO - lr: 1.2227e-05 gnorm: 1.11 [18:07:30< 6:21:50] +[titan] 2025-10-05 16:41:52,245 - root - INFO - step: 29610 loss: 1.9448 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 16:41:52,245 - root - INFO - lr: 1.2220e-05 gnorm: 1.09 [18:07:41< 6:21:39] +[titan] 2025-10-05 16:42:03,126 - root - INFO - step: 29615 loss: 2.0719 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2403 global_avg_mtp_loss: 1.8316 +[titan] 2025-10-05 16:42:03,126 - root - INFO - lr: 1.2214e-05 gnorm: 1.15 [18:07:52< 6:21:28] +[titan] 2025-10-05 16:42:13,989 - root - INFO - step: 29620 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7570 +[titan] 2025-10-05 16:42:13,989 - root - INFO - lr: 1.2207e-05 gnorm: 1.13 [18:08:03< 6:21:17] +[titan] 2025-10-05 16:42:24,845 - root - INFO - step: 29625 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 16:42:24,845 - root - INFO - lr: 1.2200e-05 gnorm: 1.11 [18:08:13< 6:21:06] +[titan] 2025-10-05 16:42:35,740 - root - INFO - step: 29630 loss: 1.9886 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 16:42:35,740 - root - INFO - lr: 1.2194e-05 gnorm: 1.16 [18:08:24< 6:20:55] +[titan] 2025-10-05 16:42:46,609 - root - INFO - step: 29635 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 16:42:46,609 - root - INFO - lr: 1.2187e-05 gnorm: 1.13 [18:08:35< 6:20:44] +[titan] 2025-10-05 16:42:57,451 - root - INFO - step: 29640 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 16:42:57,451 - root - INFO - lr: 1.2181e-05 gnorm: 1.11 [18:08:46< 6:20:33] +[titan] 2025-10-05 16:43:08,337 - root - INFO - step: 29645 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 16:43:08,337 - root - INFO - lr: 1.2174e-05 gnorm: 1.10 [18:08:57< 6:20:22] +[titan] 2025-10-05 16:43:17,011 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:43:19,192 - root - INFO - step: 29650 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7561 +[titan] 2025-10-05 16:43:19,192 - root - INFO - lr: 1.2168e-05 gnorm: 1.14 [18:09:08< 6:20:11] +[titan] 2025-10-05 16:43:30,040 - root - INFO - step: 29655 loss: 1.9877 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 16:43:30,040 - root - INFO - lr: 1.2161e-05 gnorm: 1.13 [18:09:19< 6:20:00] +[titan] 2025-10-05 16:43:40,896 - root - INFO - step: 29660 loss: 2.0175 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 16:43:40,896 - root - INFO - lr: 1.2155e-05 gnorm: 1.16 [18:09:30< 6:19:49] +[titan] 2025-10-05 16:43:51,775 - root - INFO - step: 29665 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 16:43:51,776 - root - INFO - lr: 1.2148e-05 gnorm: 1.12 [18:09:40< 6:19:38] +[titan] 2025-10-05 16:44:02,651 - root - INFO - step: 29670 loss: 2.0090 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7764 +[titan] 2025-10-05 16:44:02,651 - root - INFO - lr: 1.2142e-05 gnorm: 1.12 [18:09:51< 6:19:27] +[titan] 2025-10-05 16:44:13,541 - root - INFO - step: 29675 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 16:44:13,542 - root - INFO - lr: 1.2135e-05 gnorm: 1.12 [18:10:02< 6:19:15] +[titan] 2025-10-05 16:44:24,406 - root - INFO - step: 29680 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 16:44:24,406 - root - INFO - lr: 1.2129e-05 gnorm: 1.10 [18:10:13< 6:19:04] +[titan] 2025-10-05 16:44:35,270 - root - INFO - step: 29685 loss: 2.0294 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7948 +[titan] 2025-10-05 16:44:35,270 - root - INFO - lr: 1.2122e-05 gnorm: 1.14 [18:10:24< 6:18:53] +[titan] 2025-10-05 16:44:46,146 - root - INFO - step: 29690 loss: 2.0235 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 16:44:46,146 - root - INFO - lr: 1.2116e-05 gnorm: 1.14 [18:10:35< 6:18:42] +[titan] 2025-10-05 16:44:57,137 - root - INFO - step: 29695 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 29,814 tflops: 413.62 mfu: 41.82% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7674 +[titan] 2025-10-05 16:44:57,138 - root - INFO - lr: 1.2109e-05 gnorm: 1.16 [18:10:46< 6:18:31] +[titan] 2025-10-05 16:44:59,501 - root - INFO - Dumping profiler traces at step 29696 +[titan] 2025-10-05 16:44:59,541 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 16:45:06,053 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:45:08,245 - root - INFO - step: 29700 loss: 2.0615 memory: 118.84GiB(85.28%) tps: 29,502 tflops: 409.29 mfu: 41.38% global_avg_ntp_loss: 0.2389 global_avg_mtp_loss: 1.8226 +[titan] 2025-10-05 16:45:08,245 - root - INFO - lr: 1.2103e-05 gnorm: 1.15 [18:10:57< 6:18:20] +[titan] 2025-10-05 16:45:19,144 - root - INFO - step: 29705 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 16:45:19,144 - root - INFO - lr: 1.2096e-05 gnorm: 1.11 [18:11:08< 6:18:09] +[titan] 2025-10-05 16:45:30,019 - root - INFO - step: 29710 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 16:45:30,019 - root - INFO - lr: 1.2090e-05 gnorm: 1.15 [18:11:19< 6:17:58] +[titan] 2025-10-05 16:45:40,886 - root - INFO - step: 29715 loss: 1.9987 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7666 +[titan] 2025-10-05 16:45:40,886 - root - INFO - lr: 1.2083e-05 gnorm: 1.09 [18:11:29< 6:17:47] +[titan] 2025-10-05 16:45:51,774 - root - INFO - step: 29720 loss: 2.0604 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2397 global_avg_mtp_loss: 1.8207 +[titan] 2025-10-05 16:45:51,775 - root - INFO - lr: 1.2077e-05 gnorm: 1.13 [18:11:40< 6:17:36] +[titan] 2025-10-05 16:46:02,667 - root - INFO - step: 29725 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 16:46:02,667 - root - INFO - lr: 1.2070e-05 gnorm: 1.11 [18:11:51< 6:17:25] +[titan] 2025-10-05 16:46:13,605 - root - INFO - step: 29730 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 29,960 tflops: 415.65 mfu: 42.03% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 16:46:13,605 - root - INFO - lr: 1.2064e-05 gnorm: 1.10 [18:12:02< 6:17:14] +[titan] 2025-10-05 16:46:24,504 - root - INFO - step: 29735 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 16:46:24,505 - root - INFO - lr: 1.2057e-05 gnorm: 1.14 [18:12:13< 6:17:03] +[titan] 2025-10-05 16:46:35,396 - root - INFO - step: 29740 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 16:46:35,397 - root - INFO - lr: 1.2051e-05 gnorm: 1.16 [18:12:24< 6:16:52] +[titan] 2025-10-05 16:46:46,263 - root - INFO - step: 29745 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 16:46:46,263 - root - INFO - lr: 1.2044e-05 gnorm: 1.14 [18:12:35< 6:16:41] +[titan] 2025-10-05 16:46:54,956 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:46:57,142 - root - INFO - step: 29750 loss: 2.0081 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7749 +[titan] 2025-10-05 16:46:57,142 - root - INFO - lr: 1.2038e-05 gnorm: 1.14 [18:12:46< 6:16:30] +[titan] 2025-10-05 16:47:08,011 - root - INFO - step: 29755 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 16:47:08,011 - root - INFO - lr: 1.2031e-05 gnorm: 1.14 [18:12:57< 6:16:18] +[titan] 2025-10-05 16:47:18,928 - root - INFO - step: 29760 loss: 2.0226 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2345 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 16:47:18,929 - root - INFO - lr: 1.2025e-05 gnorm: 1.15 [18:13:08< 6:16:07] +[titan] 2025-10-05 16:47:29,805 - root - INFO - step: 29765 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 16:47:29,806 - root - INFO - lr: 1.2018e-05 gnorm: 1.11 [18:13:18< 6:15:56] +[titan] 2025-10-05 16:47:40,695 - root - INFO - step: 29770 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 16:47:40,695 - root - INFO - lr: 1.2012e-05 gnorm: 1.12 [18:13:29< 6:15:45] +[titan] 2025-10-05 16:47:51,568 - root - INFO - step: 29775 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 16:47:51,568 - root - INFO - lr: 1.2005e-05 gnorm: 1.13 [18:13:40< 6:15:34] +[titan] 2025-10-05 16:48:02,434 - root - INFO - step: 29780 loss: 2.0109 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7778 +[titan] 2025-10-05 16:48:02,434 - root - INFO - lr: 1.1999e-05 gnorm: 1.13 [18:13:51< 6:15:23] +[titan] 2025-10-05 16:48:13,326 - root - INFO - step: 29785 loss: 2.0923 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8497 +[titan] 2025-10-05 16:48:13,326 - root - INFO - lr: 1.1992e-05 gnorm: 1.17 [18:14:02< 6:15:12] +[titan] 2025-10-05 16:48:24,246 - root - INFO - step: 29790 loss: 2.0558 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.09% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8168 +[titan] 2025-10-05 16:48:24,246 - root - INFO - lr: 1.1986e-05 gnorm: 1.21 [18:14:13< 6:15:01] +[titan] 2025-10-05 16:48:35,115 - root - INFO - step: 29795 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7438 +[titan] 2025-10-05 16:48:35,115 - root - INFO - lr: 1.1979e-05 gnorm: 1.16 [18:14:24< 6:14:50] +[titan] 2025-10-05 16:48:43,808 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:48:45,984 - root - INFO - step: 29800 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 16:48:45,984 - root - INFO - lr: 1.1973e-05 gnorm: 1.17 [18:14:35< 6:14:39] +[titan] 2025-10-05 16:48:56,850 - root - INFO - step: 29805 loss: 2.0467 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 16:48:56,850 - root - INFO - lr: 1.1966e-05 gnorm: 1.13 [18:14:45< 6:14:28] +[titan] 2025-10-05 16:49:07,720 - root - INFO - step: 29810 loss: 2.0485 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8105 +[titan] 2025-10-05 16:49:07,720 - root - INFO - lr: 1.1960e-05 gnorm: 1.14 [18:14:56< 6:14:17] +[titan] 2025-10-05 16:49:18,594 - root - INFO - step: 29815 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 16:49:18,594 - root - INFO - lr: 1.1954e-05 gnorm: 1.11 [18:15:07< 6:14:06] +[titan] 2025-10-05 16:49:29,475 - root - INFO - step: 29820 loss: 2.0086 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7751 +[titan] 2025-10-05 16:49:29,475 - root - INFO - lr: 1.1947e-05 gnorm: 1.16 [18:15:18< 6:13:55] +[titan] 2025-10-05 16:49:40,387 - root - INFO - step: 29825 loss: 1.9867 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7563 +[titan] 2025-10-05 16:49:40,388 - root - INFO - lr: 1.1941e-05 gnorm: 1.10 [18:15:29< 6:13:44] +[titan] 2025-10-05 16:49:51,279 - root - INFO - step: 29830 loss: 1.9675 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 16:49:51,279 - root - INFO - lr: 1.1934e-05 gnorm: 1.09 [18:15:40< 6:13:32] +[titan] 2025-10-05 16:50:02,138 - root - INFO - step: 29835 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7622 +[titan] 2025-10-05 16:50:02,138 - root - INFO - lr: 1.1928e-05 gnorm: 1.12 [18:15:51< 6:13:21] +[titan] 2025-10-05 16:50:13,006 - root - INFO - step: 29840 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 16:50:13,006 - root - INFO - lr: 1.1921e-05 gnorm: 1.13 [18:16:02< 6:13:10] +[titan] 2025-10-05 16:50:23,932 - root - INFO - step: 29845 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6997 +[titan] 2025-10-05 16:50:23,933 - root - INFO - lr: 1.1915e-05 gnorm: 1.10 [18:16:13< 6:12:59] +[titan] 2025-10-05 16:50:32,610 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:50:34,782 - root - INFO - step: 29850 loss: 2.0571 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8188 +[titan] 2025-10-05 16:50:34,782 - root - INFO - lr: 1.1908e-05 gnorm: 1.18 [18:16:23< 6:12:48] +[titan] 2025-10-05 16:50:45,679 - root - INFO - step: 29855 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2357 global_avg_mtp_loss: 1.7933 +[titan] 2025-10-05 16:50:45,680 - root - INFO - lr: 1.1902e-05 gnorm: 1.17 [18:16:34< 6:12:37] +[titan] 2025-10-05 16:50:56,541 - root - INFO - step: 29860 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 16:50:56,541 - root - INFO - lr: 1.1896e-05 gnorm: 1.11 [18:16:45< 6:12:26] +[titan] 2025-10-05 16:51:07,402 - root - INFO - step: 29865 loss: 2.0625 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8225 +[titan] 2025-10-05 16:51:07,402 - root - INFO - lr: 1.1889e-05 gnorm: 1.18 [18:16:56< 6:12:15] +[titan] 2025-10-05 16:51:18,320 - root - INFO - step: 29870 loss: 1.9395 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.40 mfu: 42.10% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 16:51:18,320 - root - INFO - lr: 1.1883e-05 gnorm: 1.13 [18:17:07< 6:12:04] +[titan] 2025-10-05 16:51:29,178 - root - INFO - step: 29875 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 16:51:29,178 - root - INFO - lr: 1.1876e-05 gnorm: 1.13 [18:17:18< 6:11:53] +[titan] 2025-10-05 16:51:40,033 - root - INFO - step: 29880 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 16:51:40,033 - root - INFO - lr: 1.1870e-05 gnorm: 1.12 [18:17:29< 6:11:42] +[titan] 2025-10-05 16:51:50,881 - root - INFO - step: 29885 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 16:51:50,881 - root - INFO - lr: 1.1863e-05 gnorm: 1.10 [18:17:39< 6:11:31] +[titan] 2025-10-05 16:52:01,762 - root - INFO - step: 29890 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7834 +[titan] 2025-10-05 16:52:01,762 - root - INFO - lr: 1.1857e-05 gnorm: 1.15 [18:17:50< 6:11:20] +[titan] 2025-10-05 16:52:12,608 - root - INFO - step: 29895 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 16:52:12,608 - root - INFO - lr: 1.1851e-05 gnorm: 1.13 [18:18:01< 6:11:09] +[titan] 2025-10-05 16:52:21,308 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:52:23,480 - root - INFO - step: 29900 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 16:52:23,481 - root - INFO - lr: 1.1844e-05 gnorm: 1.13 [18:18:12< 6:10:58] +[titan] 2025-10-05 16:52:34,301 - root - INFO - step: 29905 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 16:52:34,301 - root - INFO - lr: 1.1838e-05 gnorm: 1.15 [18:18:23< 6:10:46] +[titan] 2025-10-05 16:52:45,148 - root - INFO - step: 29910 loss: 1.9512 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 16:52:45,149 - root - INFO - lr: 1.1831e-05 gnorm: 1.11 [18:18:34< 6:10:35] +[titan] 2025-10-05 16:52:55,998 - root - INFO - step: 29915 loss: 2.0610 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2399 global_avg_mtp_loss: 1.8211 +[titan] 2025-10-05 16:52:55,998 - root - INFO - lr: 1.1825e-05 gnorm: 1.13 [18:18:45< 6:10:24] +[titan] 2025-10-05 16:53:06,867 - root - INFO - step: 29920 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 16:53:06,867 - root - INFO - lr: 1.1819e-05 gnorm: 1.12 [18:18:55< 6:10:13] +[titan] 2025-10-05 16:53:17,736 - root - INFO - step: 29925 loss: 2.0321 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7962 +[titan] 2025-10-05 16:53:17,736 - root - INFO - lr: 1.1812e-05 gnorm: 1.12 [18:19:06< 6:10:02] +[titan] 2025-10-05 16:53:28,570 - root - INFO - step: 29930 loss: 2.0169 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7831 +[titan] 2025-10-05 16:53:28,570 - root - INFO - lr: 1.1806e-05 gnorm: 1.12 [18:19:17< 6:09:51] +[titan] 2025-10-05 16:53:39,418 - root - INFO - step: 29935 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2426 global_avg_mtp_loss: 1.8416 +[titan] 2025-10-05 16:53:39,418 - root - INFO - lr: 1.1799e-05 gnorm: 1.25 [18:19:28< 6:09:40] +[titan] 2025-10-05 16:53:50,272 - root - INFO - step: 29940 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 16:53:50,272 - root - INFO - lr: 1.1793e-05 gnorm: 1.12 [18:19:39< 6:09:29] +[titan] 2025-10-05 16:54:01,117 - root - INFO - step: 29945 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 16:54:01,117 - root - INFO - lr: 1.1787e-05 gnorm: 1.14 [18:19:50< 6:09:18] +[titan] 2025-10-05 16:54:09,772 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:54:12,029 - root - INFO - step: 29950 loss: 2.0687 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2420 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 16:54:12,029 - root - INFO - lr: 1.1780e-05 gnorm: 1.18 [18:20:01< 6:09:07] +[titan] 2025-10-05 16:54:22,840 - root - INFO - step: 29955 loss: 2.0756 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.52 mfu: 42.52% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8343 +[titan] 2025-10-05 16:54:22,840 - root - INFO - lr: 1.1774e-05 gnorm: 1.14 [18:20:11< 6:08:56] +[titan] 2025-10-05 16:54:33,694 - root - INFO - step: 29960 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 16:54:33,694 - root - INFO - lr: 1.1767e-05 gnorm: 1.14 [18:20:22< 6:08:45] +[titan] 2025-10-05 16:54:44,540 - root - INFO - step: 29965 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 16:54:44,540 - root - INFO - lr: 1.1761e-05 gnorm: 1.14 [18:20:33< 6:08:34] +[titan] 2025-10-05 16:54:55,380 - root - INFO - step: 29970 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.39 mfu: 42.40% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.8010 +[titan] 2025-10-05 16:54:55,380 - root - INFO - lr: 1.1755e-05 gnorm: 1.13 [18:20:44< 6:08:22] +[titan] 2025-10-05 16:55:06,200 - root - INFO - step: 29975 loss: 2.0336 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.17 mfu: 42.48% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7975 +[titan] 2025-10-05 16:55:06,200 - root - INFO - lr: 1.1748e-05 gnorm: 1.16 [18:20:55< 6:08:11] +[titan] 2025-10-05 16:55:17,035 - root - INFO - step: 29980 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 16:55:17,035 - root - INFO - lr: 1.1742e-05 gnorm: 1.16 [18:21:06< 6:08:00] +[titan] 2025-10-05 16:55:27,861 - root - INFO - step: 29985 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.95 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 16:55:27,861 - root - INFO - lr: 1.1736e-05 gnorm: 1.11 [18:21:16< 6:07:49] +[titan] 2025-10-05 16:55:38,685 - root - INFO - step: 29990 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,275 tflops: 420.02 mfu: 42.47% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 16:55:38,685 - root - INFO - lr: 1.1729e-05 gnorm: 1.08 [18:21:27< 6:07:38] +[titan] 2025-10-05 16:55:49,531 - root - INFO - step: 29995 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 16:55:49,531 - root - INFO - lr: 1.1723e-05 gnorm: 1.11 [18:21:38< 6:07:27] +[titan] 2025-10-05 16:55:58,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 16:56:00,346 - root - INFO - step: 30000 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,298 tflops: 420.34 mfu: 42.50% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 16:56:00,346 - root - INFO - lr: 1.1716e-05 gnorm: 1.14 [18:21:49< 6:07:16] +[titan] 2025-10-05 16:56:00,346 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 16:56:17,616 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 16:56:17,616 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.27 seconds. +[titan] 2025-10-05 16:58:26,179 - root - INFO - step: 30005 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 2,247 tflops: 31.17 mfu: 3.15% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 16:58:26,179 - root - INFO - lr: 1.1710e-05 gnorm: 1.15 [18:24:15< 6:07:50] +[titan] 2025-10-05 16:58:36,943 - root - INFO - step: 30010 loss: 2.0703 memory: 118.84GiB(85.28%) tps: 30,443 tflops: 422.35 mfu: 42.71% global_avg_ntp_loss: 0.2404 global_avg_mtp_loss: 1.8299 +[titan] 2025-10-05 16:58:36,943 - root - INFO - lr: 1.1704e-05 gnorm: 1.13 [18:24:26< 6:07:39] +[titan] 2025-10-05 16:58:47,757 - root - INFO - step: 30015 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,302 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7859 +[titan] 2025-10-05 16:58:47,757 - root - INFO - lr: 1.1697e-05 gnorm: 1.19 [18:24:36< 6:07:28] +[titan] 2025-10-05 16:58:58,551 - root - INFO - step: 30020 loss: 2.0398 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.19 mfu: 42.59% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8021 +[titan] 2025-10-05 16:58:58,551 - root - INFO - lr: 1.1691e-05 gnorm: 1.16 [18:24:47< 6:07:16] +[titan] 2025-10-05 16:59:09,338 - root - INFO - step: 30025 loss: 2.0133 memory: 118.84GiB(85.28%) tps: 30,378 tflops: 421.45 mfu: 42.61% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7804 +[titan] 2025-10-05 16:59:09,338 - root - INFO - lr: 1.1685e-05 gnorm: 1.17 [18:24:58< 6:07:05] +[titan] 2025-10-05 16:59:20,123 - root - INFO - step: 30030 loss: 2.0128 memory: 118.84GiB(85.28%) tps: 30,384 tflops: 421.53 mfu: 42.62% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7796 +[titan] 2025-10-05 16:59:20,123 - root - INFO - lr: 1.1678e-05 gnorm: 1.14 [18:25:09< 6:06:54] +[titan] 2025-10-05 16:59:30,956 - root - INFO - step: 30035 loss: 2.0332 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 16:59:30,956 - root - INFO - lr: 1.1672e-05 gnorm: 1.17 [18:25:20< 6:06:43] +[titan] 2025-10-05 16:59:41,784 - root - INFO - step: 30040 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7750 +[titan] 2025-10-05 16:59:41,784 - root - INFO - lr: 1.1666e-05 gnorm: 1.10 [18:25:30< 6:06:32] +[titan] 2025-10-05 16:59:52,578 - root - INFO - step: 30045 loss: 2.0364 memory: 118.84GiB(85.28%) tps: 30,359 tflops: 421.18 mfu: 42.59% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8007 +[titan] 2025-10-05 16:59:52,578 - root - INFO - lr: 1.1659e-05 gnorm: 1.20 [18:25:41< 6:06:21] +[titan] 2025-10-05 17:00:01,251 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:00:03,430 - root - INFO - step: 30050 loss: 2.0117 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:00:03,431 - root - INFO - lr: 1.1653e-05 gnorm: 1.13 [18:25:52< 6:06:10] +[titan] 2025-10-05 17:00:14,272 - root - INFO - step: 30055 loss: 2.0099 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:00:14,273 - root - INFO - lr: 1.1647e-05 gnorm: 1.14 [18:26:03< 6:05:59] +[titan] 2025-10-05 17:00:25,096 - root - INFO - step: 30060 loss: 2.0424 memory: 118.84GiB(85.28%) tps: 30,276 tflops: 420.03 mfu: 42.47% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8048 +[titan] 2025-10-05 17:00:25,096 - root - INFO - lr: 1.1640e-05 gnorm: 1.13 [18:26:14< 6:05:48] +[titan] 2025-10-05 17:00:35,911 - root - INFO - step: 30065 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 30,300 tflops: 420.37 mfu: 42.50% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 17:00:35,911 - root - INFO - lr: 1.1634e-05 gnorm: 1.13 [18:26:24< 6:05:36] +[titan] 2025-10-05 17:00:46,749 - root - INFO - step: 30070 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:00:46,749 - root - INFO - lr: 1.1628e-05 gnorm: 1.12 [18:26:35< 6:05:25] +[titan] 2025-10-05 17:00:57,558 - root - INFO - step: 30075 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,316 tflops: 420.59 mfu: 42.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 17:00:57,558 - root - INFO - lr: 1.1621e-05 gnorm: 1.11 [18:26:46< 6:05:14] +[titan] 2025-10-05 17:01:08,392 - root - INFO - step: 30080 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7726 +[titan] 2025-10-05 17:01:08,392 - root - INFO - lr: 1.1615e-05 gnorm: 1.15 [18:26:57< 6:05:03] +[titan] 2025-10-05 17:01:19,229 - root - INFO - step: 30085 loss: 2.0397 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.8038 +[titan] 2025-10-05 17:01:19,229 - root - INFO - lr: 1.1609e-05 gnorm: 1.15 [18:27:08< 6:04:52] +[titan] 2025-10-05 17:01:30,104 - root - INFO - step: 30090 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:01:30,105 - root - INFO - lr: 1.1602e-05 gnorm: 1.11 [18:27:19< 6:04:41] +[titan] 2025-10-05 17:01:40,932 - root - INFO - step: 30095 loss: 2.0598 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.87 mfu: 42.45% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8202 +[titan] 2025-10-05 17:01:40,932 - root - INFO - lr: 1.1596e-05 gnorm: 1.14 [18:27:30< 6:04:30] +[titan] 2025-10-05 17:01:49,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:01:51,740 - root - INFO - step: 30100 loss: 2.0023 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:01:51,740 - root - INFO - lr: 1.1590e-05 gnorm: 1.12 [18:27:40< 6:04:19] +[titan] 2025-10-05 17:02:02,591 - root - INFO - step: 30105 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7463 +[titan] 2025-10-05 17:02:02,591 - root - INFO - lr: 1.1583e-05 gnorm: 1.13 [18:27:51< 6:04:08] +[titan] 2025-10-05 17:02:13,423 - root - INFO - step: 30110 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 17:02:13,423 - root - INFO - lr: 1.1577e-05 gnorm: 1.16 [18:28:02< 6:03:56] +[titan] 2025-10-05 17:02:24,227 - root - INFO - step: 30115 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.78 mfu: 42.55% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 17:02:24,227 - root - INFO - lr: 1.1571e-05 gnorm: 1.12 [18:28:13< 6:03:45] +[titan] 2025-10-05 17:02:35,077 - root - INFO - step: 30120 loss: 2.0868 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2424 global_avg_mtp_loss: 1.8444 +[titan] 2025-10-05 17:02:35,077 - root - INFO - lr: 1.1565e-05 gnorm: 1.14 [18:28:24< 6:03:34] +[titan] 2025-10-05 17:02:45,895 - root - INFO - step: 30125 loss: 1.9945 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 17:02:45,895 - root - INFO - lr: 1.1558e-05 gnorm: 1.13 [18:28:34< 6:03:23] +[titan] 2025-10-05 17:02:56,710 - root - INFO - step: 30130 loss: 2.0551 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.36 mfu: 42.50% global_avg_ntp_loss: 0.2378 global_avg_mtp_loss: 1.8173 +[titan] 2025-10-05 17:02:56,710 - root - INFO - lr: 1.1552e-05 gnorm: 1.13 [18:28:45< 6:03:12] +[titan] 2025-10-05 17:03:07,565 - root - INFO - step: 30135 loss: 2.0521 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8142 +[titan] 2025-10-05 17:03:07,565 - root - INFO - lr: 1.1546e-05 gnorm: 1.11 [18:28:56< 6:03:01] +[titan] 2025-10-05 17:03:18,382 - root - INFO - step: 30140 loss: 2.0034 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 17:03:18,382 - root - INFO - lr: 1.1539e-05 gnorm: 1.21 [18:29:07< 6:02:50] +[titan] 2025-10-05 17:03:29,277 - root - INFO - step: 30145 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 17:03:29,277 - root - INFO - lr: 1.1533e-05 gnorm: 1.14 [18:29:18< 6:02:39] +[titan] 2025-10-05 17:03:37,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:03:40,104 - root - INFO - step: 30150 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,266 tflops: 419.90 mfu: 42.46% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 17:03:40,104 - root - INFO - lr: 1.1527e-05 gnorm: 1.13 [18:29:29< 6:02:28] +[titan] 2025-10-05 17:03:50,940 - root - INFO - step: 30155 loss: 2.0613 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2396 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:03:50,940 - root - INFO - lr: 1.1521e-05 gnorm: 1.15 [18:29:40< 6:02:17] +[titan] 2025-10-05 17:04:01,762 - root - INFO - step: 30160 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,280 tflops: 420.08 mfu: 42.48% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7694 +[titan] 2025-10-05 17:04:01,762 - root - INFO - lr: 1.1514e-05 gnorm: 1.14 [18:29:50< 6:02:05] +[titan] 2025-10-05 17:04:12,567 - root - INFO - step: 30165 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 17:04:12,567 - root - INFO - lr: 1.1508e-05 gnorm: 1.12 [18:30:01< 6:01:54] +[titan] 2025-10-05 17:04:23,420 - root - INFO - step: 30170 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:04:23,420 - root - INFO - lr: 1.1502e-05 gnorm: 1.12 [18:30:12< 6:01:43] +[titan] 2025-10-05 17:04:34,282 - root - INFO - step: 30175 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 17:04:34,282 - root - INFO - lr: 1.1495e-05 gnorm: 1.12 [18:30:23< 6:01:32] +[titan] 2025-10-05 17:04:45,111 - root - INFO - step: 30180 loss: 1.9784 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:04:45,111 - root - INFO - lr: 1.1489e-05 gnorm: 1.16 [18:30:34< 6:01:21] +[titan] 2025-10-05 17:04:55,961 - root - INFO - step: 30185 loss: 2.0025 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:04:55,962 - root - INFO - lr: 1.1483e-05 gnorm: 1.13 [18:30:45< 6:01:10] +[titan] 2025-10-05 17:05:06,781 - root - INFO - step: 30190 loss: 1.9450 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.18 mfu: 42.48% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 17:05:06,781 - root - INFO - lr: 1.1477e-05 gnorm: 1.16 [18:30:55< 6:00:59] +[titan] 2025-10-05 17:05:17,581 - root - INFO - step: 30195 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:05:17,581 - root - INFO - lr: 1.1470e-05 gnorm: 1.16 [18:31:06< 6:00:48] +[titan] 2025-10-05 17:05:26,291 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:05:28,480 - root - INFO - step: 30200 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 17:05:28,480 - root - INFO - lr: 1.1464e-05 gnorm: 1.13 [18:31:17< 6:00:37] +[titan] 2025-10-05 17:05:39,462 - root - INFO - step: 30205 loss: 1.9880 memory: 118.84GiB(85.28%) tps: 29,840 tflops: 413.98 mfu: 41.86% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7571 +[titan] 2025-10-05 17:05:39,462 - root - INFO - lr: 1.1458e-05 gnorm: 1.11 [18:31:28< 6:00:26] +[titan] 2025-10-05 17:05:46,251 - root - INFO - Dumping profiler traces at step 30208 +[titan] 2025-10-05 17:05:46,322 - root - INFO - Finished dumping profiler traces in 0.07 seconds +[titan] 2025-10-05 17:05:50,672 - root - INFO - step: 30210 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 29,233 tflops: 405.56 mfu: 41.01% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7633 +[titan] 2025-10-05 17:05:50,672 - root - INFO - lr: 1.1452e-05 gnorm: 1.14 [18:31:39< 6:00:15] +[titan] 2025-10-05 17:06:01,511 - root - INFO - step: 30215 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 17:06:01,511 - root - INFO - lr: 1.1445e-05 gnorm: 1.17 [18:31:50< 6:00:03] +[titan] 2025-10-05 17:06:12,360 - root - INFO - step: 30220 loss: 1.9647 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7378 +[titan] 2025-10-05 17:06:12,360 - root - INFO - lr: 1.1439e-05 gnorm: 1.11 [18:32:01< 5:59:52] +[titan] 2025-10-05 17:06:23,184 - root - INFO - step: 30225 loss: 2.0049 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 17:06:23,184 - root - INFO - lr: 1.1433e-05 gnorm: 1.13 [18:32:12< 5:59:41] +[titan] 2025-10-05 17:06:34,073 - root - INFO - step: 30230 loss: 1.9745 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7462 +[titan] 2025-10-05 17:06:34,073 - root - INFO - lr: 1.1427e-05 gnorm: 1.15 [18:32:23< 5:59:30] +[titan] 2025-10-05 17:06:44,900 - root - INFO - step: 30235 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7525 +[titan] 2025-10-05 17:06:44,900 - root - INFO - lr: 1.1420e-05 gnorm: 1.11 [18:32:33< 5:59:19] +[titan] 2025-10-05 17:06:55,740 - root - INFO - step: 30240 loss: 1.9188 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 17:06:55,740 - root - INFO - lr: 1.1414e-05 gnorm: 1.16 [18:32:44< 5:59:08] +[titan] 2025-10-05 17:07:06,541 - root - INFO - step: 30245 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,339 tflops: 420.91 mfu: 42.56% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:07:06,541 - root - INFO - lr: 1.1408e-05 gnorm: 1.13 [18:32:55< 5:58:57] +[titan] 2025-10-05 17:07:15,218 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:07:17,391 - root - INFO - step: 30250 loss: 2.0333 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7974 +[titan] 2025-10-05 17:07:17,392 - root - INFO - lr: 1.1402e-05 gnorm: 1.17 [18:33:06< 5:58:46] +[titan] 2025-10-05 17:07:28,241 - root - INFO - step: 30255 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 17:07:28,241 - root - INFO - lr: 1.1395e-05 gnorm: 1.18 [18:33:17< 5:58:35] +[titan] 2025-10-05 17:07:39,102 - root - INFO - step: 30260 loss: 2.0013 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 17:07:39,103 - root - INFO - lr: 1.1389e-05 gnorm: 1.12 [18:33:28< 5:58:24] +[titan] 2025-10-05 17:07:49,999 - root - INFO - step: 30265 loss: 1.9338 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 17:07:49,999 - root - INFO - lr: 1.1383e-05 gnorm: 1.16 [18:33:39< 5:58:12] +[titan] 2025-10-05 17:08:00,848 - root - INFO - step: 30270 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7504 +[titan] 2025-10-05 17:08:00,848 - root - INFO - lr: 1.1377e-05 gnorm: 1.17 [18:33:49< 5:58:01] +[titan] 2025-10-05 17:08:11,692 - root - INFO - step: 30275 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 17:08:11,692 - root - INFO - lr: 1.1370e-05 gnorm: 1.17 [18:34:00< 5:57:50] +[titan] 2025-10-05 17:08:22,552 - root - INFO - step: 30280 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:08:22,553 - root - INFO - lr: 1.1364e-05 gnorm: 1.18 [18:34:11< 5:57:39] +[titan] 2025-10-05 17:08:33,450 - root - INFO - step: 30285 loss: 1.9770 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 17:08:33,450 - root - INFO - lr: 1.1358e-05 gnorm: 1.11 [18:34:22< 5:57:28] +[titan] 2025-10-05 17:08:44,280 - root - INFO - step: 30290 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,258 tflops: 419.78 mfu: 42.44% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 17:08:44,280 - root - INFO - lr: 1.1352e-05 gnorm: 1.13 [18:34:33< 5:57:17] +[titan] 2025-10-05 17:08:55,139 - root - INFO - step: 30295 loss: 2.0245 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7894 +[titan] 2025-10-05 17:08:55,139 - root - INFO - lr: 1.1346e-05 gnorm: 1.13 [18:34:44< 5:57:06] +[titan] 2025-10-05 17:09:03,787 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:09:05,964 - root - INFO - step: 30300 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,271 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7758 +[titan] 2025-10-05 17:09:05,964 - root - INFO - lr: 1.1339e-05 gnorm: 1.17 [18:34:55< 5:56:55] +[titan] 2025-10-05 17:09:16,818 - root - INFO - step: 30305 loss: 2.0578 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2388 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 17:09:16,818 - root - INFO - lr: 1.1333e-05 gnorm: 1.16 [18:35:05< 5:56:44] +[titan] 2025-10-05 17:09:27,662 - root - INFO - step: 30310 loss: 2.0084 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 17:09:27,662 - root - INFO - lr: 1.1327e-05 gnorm: 1.15 [18:35:16< 5:56:33] +[titan] 2025-10-05 17:09:38,520 - root - INFO - step: 30315 loss: 1.9994 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 17:09:38,520 - root - INFO - lr: 1.1321e-05 gnorm: 1.14 [18:35:27< 5:56:21] +[titan] 2025-10-05 17:09:49,395 - root - INFO - step: 30320 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 17:09:49,395 - root - INFO - lr: 1.1315e-05 gnorm: 1.14 [18:35:38< 5:56:10] +[titan] 2025-10-05 17:10:00,277 - root - INFO - step: 30325 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7518 +[titan] 2025-10-05 17:10:00,277 - root - INFO - lr: 1.1308e-05 gnorm: 1.15 [18:35:49< 5:55:59] +[titan] 2025-10-05 17:10:11,173 - root - INFO - step: 30330 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.21 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:10:11,174 - root - INFO - lr: 1.1302e-05 gnorm: 1.15 [18:36:00< 5:55:48] +[titan] 2025-10-05 17:10:22,000 - root - INFO - step: 30335 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,267 tflops: 419.91 mfu: 42.46% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7805 +[titan] 2025-10-05 17:10:22,000 - root - INFO - lr: 1.1296e-05 gnorm: 1.18 [18:36:11< 5:55:37] +[titan] 2025-10-05 17:10:32,877 - root - INFO - step: 30340 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7569 +[titan] 2025-10-05 17:10:32,877 - root - INFO - lr: 1.1290e-05 gnorm: 1.13 [18:36:21< 5:55:26] +[titan] 2025-10-05 17:10:43,769 - root - INFO - step: 30345 loss: 2.0149 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:10:43,769 - root - INFO - lr: 1.1284e-05 gnorm: 1.15 [18:36:32< 5:55:15] +[titan] 2025-10-05 17:10:52,407 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:10:54,603 - root - INFO - step: 30350 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 17:10:54,603 - root - INFO - lr: 1.1277e-05 gnorm: 1.15 [18:36:43< 5:55:04] +[titan] 2025-10-05 17:11:05,438 - root - INFO - step: 30355 loss: 2.0011 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.58 mfu: 42.42% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7695 +[titan] 2025-10-05 17:11:05,438 - root - INFO - lr: 1.1271e-05 gnorm: 1.14 [18:36:54< 5:54:53] +[titan] 2025-10-05 17:11:16,300 - root - INFO - step: 30360 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7200 +[titan] 2025-10-05 17:11:16,300 - root - INFO - lr: 1.1265e-05 gnorm: 1.11 [18:37:05< 5:54:42] +[titan] 2025-10-05 17:11:27,159 - root - INFO - step: 30365 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 17:11:27,159 - root - INFO - lr: 1.1259e-05 gnorm: 1.11 [18:37:16< 5:54:30] +[titan] 2025-10-05 17:11:38,071 - root - INFO - step: 30370 loss: 2.0132 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 17:11:38,071 - root - INFO - lr: 1.1253e-05 gnorm: 1.18 [18:37:27< 5:54:19] +[titan] 2025-10-05 17:11:48,937 - root - INFO - step: 30375 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:11:48,937 - root - INFO - lr: 1.1247e-05 gnorm: 1.15 [18:37:37< 5:54:08] +[titan] 2025-10-05 17:11:59,780 - root - INFO - step: 30380 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 17:11:59,780 - root - INFO - lr: 1.1240e-05 gnorm: 1.13 [18:37:48< 5:53:57] +[titan] 2025-10-05 17:12:10,619 - root - INFO - step: 30385 loss: 1.9947 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7641 +[titan] 2025-10-05 17:12:10,620 - root - INFO - lr: 1.1234e-05 gnorm: 1.15 [18:37:59< 5:53:46] +[titan] 2025-10-05 17:12:21,479 - root - INFO - step: 30390 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7900 +[titan] 2025-10-05 17:12:21,479 - root - INFO - lr: 1.1228e-05 gnorm: 1.11 [18:38:10< 5:53:35] +[titan] 2025-10-05 17:12:32,330 - root - INFO - step: 30395 loss: 1.9584 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:12:32,330 - root - INFO - lr: 1.1222e-05 gnorm: 1.12 [18:38:21< 5:53:24] +[titan] 2025-10-05 17:12:41,047 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:12:43,230 - root - INFO - step: 30400 loss: 1.9537 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 17:12:43,230 - root - INFO - lr: 1.1216e-05 gnorm: 1.16 [18:38:32< 5:53:13] +[titan] 2025-10-05 17:12:54,073 - root - INFO - step: 30405 loss: 1.9890 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 17:12:54,073 - root - INFO - lr: 1.1210e-05 gnorm: 1.19 [18:38:43< 5:53:02] +[titan] 2025-10-05 17:13:04,941 - root - INFO - step: 30410 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 17:13:04,942 - root - INFO - lr: 1.1203e-05 gnorm: 1.16 [18:38:53< 5:52:51] +[titan] 2025-10-05 17:13:15,791 - root - INFO - step: 30415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7404 +[titan] 2025-10-05 17:13:15,791 - root - INFO - lr: 1.1197e-05 gnorm: 1.18 [18:39:04< 5:52:40] +[titan] 2025-10-05 17:13:26,642 - root - INFO - step: 30420 loss: 2.0087 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7760 +[titan] 2025-10-05 17:13:26,642 - root - INFO - lr: 1.1191e-05 gnorm: 1.13 [18:39:15< 5:52:28] +[titan] 2025-10-05 17:13:37,590 - root - INFO - step: 30425 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,932 tflops: 415.26 mfu: 41.99% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 17:13:37,590 - root - INFO - lr: 1.1185e-05 gnorm: 1.13 [18:39:26< 5:52:17] +[titan] 2025-10-05 17:13:48,481 - root - INFO - step: 30430 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7083 +[titan] 2025-10-05 17:13:48,481 - root - INFO - lr: 1.1179e-05 gnorm: 1.22 [18:39:37< 5:52:06] +[titan] 2025-10-05 17:13:59,341 - root - INFO - step: 30435 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 17:13:59,341 - root - INFO - lr: 1.1173e-05 gnorm: 1.10 [18:39:48< 5:51:55] +[titan] 2025-10-05 17:14:10,199 - root - INFO - step: 30440 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 17:14:10,199 - root - INFO - lr: 1.1166e-05 gnorm: 1.15 [18:39:59< 5:51:44] +[titan] 2025-10-05 17:14:21,050 - root - INFO - step: 30445 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:14:21,050 - root - INFO - lr: 1.1160e-05 gnorm: 1.17 [18:40:10< 5:51:33] +[titan] 2025-10-05 17:14:29,729 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:14:31,915 - root - INFO - step: 30450 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:14:31,915 - root - INFO - lr: 1.1154e-05 gnorm: 1.13 [18:40:20< 5:51:22] +[titan] 2025-10-05 17:14:42,853 - root - INFO - step: 30455 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 17:14:42,853 - root - INFO - lr: 1.1148e-05 gnorm: 1.15 [18:40:31< 5:51:11] +[titan] 2025-10-05 17:14:53,689 - root - INFO - step: 30460 loss: 1.9279 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:14:53,689 - root - INFO - lr: 1.1142e-05 gnorm: 1.16 [18:40:42< 5:51:00] +[titan] 2025-10-05 17:15:04,539 - root - INFO - step: 30465 loss: 1.9730 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7446 +[titan] 2025-10-05 17:15:04,539 - root - INFO - lr: 1.1136e-05 gnorm: 1.13 [18:40:53< 5:50:49] +[titan] 2025-10-05 17:15:15,418 - root - INFO - step: 30470 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 17:15:15,418 - root - INFO - lr: 1.1130e-05 gnorm: 1.20 [18:41:04< 5:50:38] +[titan] 2025-10-05 17:15:26,296 - root - INFO - step: 30475 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 17:15:26,297 - root - INFO - lr: 1.1124e-05 gnorm: 1.13 [18:41:15< 5:50:26] +[titan] 2025-10-05 17:15:37,128 - root - INFO - step: 30480 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 17:15:37,129 - root - INFO - lr: 1.1117e-05 gnorm: 1.16 [18:41:26< 5:50:15] +[titan] 2025-10-05 17:15:48,020 - root - INFO - step: 30485 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7880 +[titan] 2025-10-05 17:15:48,020 - root - INFO - lr: 1.1111e-05 gnorm: 1.16 [18:41:37< 5:50:04] +[titan] 2025-10-05 17:15:58,881 - root - INFO - step: 30490 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7689 +[titan] 2025-10-05 17:15:58,881 - root - INFO - lr: 1.1105e-05 gnorm: 1.13 [18:41:47< 5:49:53] +[titan] 2025-10-05 17:16:09,738 - root - INFO - step: 30495 loss: 2.0163 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7828 +[titan] 2025-10-05 17:16:09,738 - root - INFO - lr: 1.1099e-05 gnorm: 1.13 [18:41:58< 5:49:42] +[titan] 2025-10-05 17:16:18,406 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:16:20,594 - root - INFO - step: 30500 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:16:20,594 - root - INFO - lr: 1.1093e-05 gnorm: 1.15 [18:42:09< 5:49:31] +[titan] 2025-10-05 17:16:31,472 - root - INFO - step: 30505 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7582 +[titan] 2025-10-05 17:16:31,472 - root - INFO - lr: 1.1087e-05 gnorm: 1.19 [18:42:20< 5:49:20] +[titan] 2025-10-05 17:16:42,399 - root - INFO - step: 30510 loss: 2.0116 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.07 mfu: 42.07% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7794 +[titan] 2025-10-05 17:16:42,399 - root - INFO - lr: 1.1081e-05 gnorm: 1.14 [18:42:31< 5:49:09] +[titan] 2025-10-05 17:16:53,259 - root - INFO - step: 30515 loss: 2.0382 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.8016 +[titan] 2025-10-05 17:16:53,259 - root - INFO - lr: 1.1075e-05 gnorm: 1.15 [18:42:42< 5:48:58] +[titan] 2025-10-05 17:17:04,140 - root - INFO - step: 30520 loss: 2.0350 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.7991 +[titan] 2025-10-05 17:17:04,140 - root - INFO - lr: 1.1069e-05 gnorm: 1.13 [18:42:53< 5:48:47] +[titan] 2025-10-05 17:17:14,989 - root - INFO - step: 30525 loss: 1.9911 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7579 +[titan] 2025-10-05 17:17:14,989 - root - INFO - lr: 1.1063e-05 gnorm: 1.36 [18:43:04< 5:48:36] +[titan] 2025-10-05 17:17:25,901 - root - INFO - step: 30530 loss: 1.9711 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.61 mfu: 42.12% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:17:25,902 - root - INFO - lr: 1.1056e-05 gnorm: 1.14 [18:43:14< 5:48:25] +[titan] 2025-10-05 17:17:36,768 - root - INFO - step: 30535 loss: 2.0575 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8191 +[titan] 2025-10-05 17:17:36,768 - root - INFO - lr: 1.1050e-05 gnorm: 1.17 [18:43:25< 5:48:13] +[titan] 2025-10-05 17:17:47,700 - root - INFO - step: 30540 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 29,975 tflops: 415.86 mfu: 42.05% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 17:17:47,700 - root - INFO - lr: 1.1044e-05 gnorm: 1.12 [18:43:36< 5:48:02] +[titan] 2025-10-05 17:17:58,569 - root - INFO - step: 30545 loss: 1.9982 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7662 +[titan] 2025-10-05 17:17:58,569 - root - INFO - lr: 1.1038e-05 gnorm: 1.13 [18:43:47< 5:47:51] +[titan] 2025-10-05 17:18:07,245 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:18:09,461 - root - INFO - step: 30550 loss: 2.0311 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7963 +[titan] 2025-10-05 17:18:09,461 - root - INFO - lr: 1.1032e-05 gnorm: 1.15 [18:43:58< 5:47:40] +[titan] 2025-10-05 17:18:20,334 - root - INFO - step: 30555 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 17:18:20,335 - root - INFO - lr: 1.1026e-05 gnorm: 1.13 [18:44:09< 5:47:29] +[titan] 2025-10-05 17:18:31,222 - root - INFO - step: 30560 loss: 1.9701 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:18:31,222 - root - INFO - lr: 1.1020e-05 gnorm: 1.16 [18:44:20< 5:47:18] +[titan] 2025-10-05 17:18:42,115 - root - INFO - step: 30565 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 17:18:42,116 - root - INFO - lr: 1.1014e-05 gnorm: 1.18 [18:44:31< 5:47:07] +[titan] 2025-10-05 17:18:52,976 - root - INFO - step: 30570 loss: 2.0289 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7939 +[titan] 2025-10-05 17:18:52,978 - root - INFO - lr: 1.1008e-05 gnorm: 1.15 [18:44:42< 5:46:56] +[titan] 2025-10-05 17:19:03,822 - root - INFO - step: 30575 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 17:19:03,822 - root - INFO - lr: 1.1002e-05 gnorm: 1.13 [18:44:52< 5:46:45] +[titan] 2025-10-05 17:19:14,680 - root - INFO - step: 30580 loss: 1.9714 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 17:19:14,680 - root - INFO - lr: 1.0996e-05 gnorm: 1.15 [18:45:03< 5:46:34] +[titan] 2025-10-05 17:19:25,560 - root - INFO - step: 30585 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 17:19:25,560 - root - INFO - lr: 1.0990e-05 gnorm: 1.12 [18:45:14< 5:46:23] +[titan] 2025-10-05 17:19:36,432 - root - INFO - step: 30590 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7819 +[titan] 2025-10-05 17:19:36,432 - root - INFO - lr: 1.0984e-05 gnorm: 1.18 [18:45:25< 5:46:11] +[titan] 2025-10-05 17:19:47,343 - root - INFO - step: 30595 loss: 2.0192 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7840 +[titan] 2025-10-05 17:19:47,343 - root - INFO - lr: 1.0977e-05 gnorm: 1.12 [18:45:36< 5:46:00] +[titan] 2025-10-05 17:19:56,046 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:19:58,231 - root - INFO - step: 30600 loss: 2.0557 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2376 global_avg_mtp_loss: 1.8181 +[titan] 2025-10-05 17:19:58,231 - root - INFO - lr: 1.0971e-05 gnorm: 1.17 [18:45:47< 5:45:49] +[titan] 2025-10-05 17:20:09,100 - root - INFO - step: 30605 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:20:09,100 - root - INFO - lr: 1.0965e-05 gnorm: 1.15 [18:45:58< 5:45:38] +[titan] 2025-10-05 17:20:19,957 - root - INFO - step: 30610 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 17:20:19,957 - root - INFO - lr: 1.0959e-05 gnorm: 1.11 [18:46:08< 5:45:27] +[titan] 2025-10-05 17:20:30,886 - root - INFO - step: 30615 loss: 2.0241 memory: 118.84GiB(85.28%) tps: 29,984 tflops: 415.98 mfu: 42.06% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7895 +[titan] 2025-10-05 17:20:30,886 - root - INFO - lr: 1.0953e-05 gnorm: 1.14 [18:46:19< 5:45:16] +[titan] 2025-10-05 17:20:41,762 - root - INFO - step: 30620 loss: 1.9612 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:20:41,763 - root - INFO - lr: 1.0947e-05 gnorm: 1.19 [18:46:30< 5:45:05] +[titan] 2025-10-05 17:20:52,672 - root - INFO - step: 30625 loss: 1.9688 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7412 +[titan] 2025-10-05 17:20:52,672 - root - INFO - lr: 1.0941e-05 gnorm: 1.14 [18:46:41< 5:44:54] +[titan] 2025-10-05 17:21:03,551 - root - INFO - step: 30630 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 17:21:03,551 - root - INFO - lr: 1.0935e-05 gnorm: 1.13 [18:46:52< 5:44:43] +[titan] 2025-10-05 17:21:14,413 - root - INFO - step: 30635 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 17:21:14,413 - root - INFO - lr: 1.0929e-05 gnorm: 1.13 [18:47:03< 5:44:32] +[titan] 2025-10-05 17:21:25,276 - root - INFO - step: 30640 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7624 +[titan] 2025-10-05 17:21:25,276 - root - INFO - lr: 1.0923e-05 gnorm: 1.18 [18:47:14< 5:44:21] +[titan] 2025-10-05 17:21:36,129 - root - INFO - step: 30645 loss: 1.9703 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 17:21:36,129 - root - INFO - lr: 1.0917e-05 gnorm: 1.13 [18:47:25< 5:44:10] +[titan] 2025-10-05 17:21:44,864 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:21:47,049 - root - INFO - step: 30650 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:21:47,049 - root - INFO - lr: 1.0911e-05 gnorm: 1.12 [18:47:36< 5:43:58] +[titan] 2025-10-05 17:21:57,919 - root - INFO - step: 30655 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 17:21:57,919 - root - INFO - lr: 1.0905e-05 gnorm: 1.17 [18:47:46< 5:43:47] +[titan] 2025-10-05 17:22:08,772 - root - INFO - step: 30660 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7640 +[titan] 2025-10-05 17:22:08,772 - root - INFO - lr: 1.0899e-05 gnorm: 1.14 [18:47:57< 5:43:36] +[titan] 2025-10-05 17:22:19,639 - root - INFO - step: 30665 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7649 +[titan] 2025-10-05 17:22:19,639 - root - INFO - lr: 1.0893e-05 gnorm: 1.17 [18:48:08< 5:43:25] +[titan] 2025-10-05 17:22:30,511 - root - INFO - step: 30670 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 17:22:30,511 - root - INFO - lr: 1.0887e-05 gnorm: 1.15 [18:48:19< 5:43:14] +[titan] 2025-10-05 17:22:41,385 - root - INFO - step: 30675 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 17:22:41,385 - root - INFO - lr: 1.0881e-05 gnorm: 1.13 [18:48:30< 5:43:03] +[titan] 2025-10-05 17:22:52,312 - root - INFO - step: 30680 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 29,989 tflops: 416.05 mfu: 42.07% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:22:52,312 - root - INFO - lr: 1.0875e-05 gnorm: 1.15 [18:48:41< 5:42:52] +[titan] 2025-10-05 17:23:03,165 - root - INFO - step: 30685 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 17:23:03,165 - root - INFO - lr: 1.0869e-05 gnorm: 1.13 [18:48:52< 5:42:41] +[titan] 2025-10-05 17:23:14,020 - root - INFO - step: 30690 loss: 2.0155 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7825 +[titan] 2025-10-05 17:23:14,020 - root - INFO - lr: 1.0863e-05 gnorm: 1.14 [18:49:03< 5:42:30] +[titan] 2025-10-05 17:23:24,876 - root - INFO - step: 30695 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:23:24,876 - root - INFO - lr: 1.0857e-05 gnorm: 1.15 [18:49:13< 5:42:19] +[titan] 2025-10-05 17:23:33,557 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:23:35,744 - root - INFO - step: 30700 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7316 +[titan] 2025-10-05 17:23:35,744 - root - INFO - lr: 1.0851e-05 gnorm: 1.12 [18:49:24< 5:42:08] +[titan] 2025-10-05 17:23:46,630 - root - INFO - step: 30705 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 17:23:46,630 - root - INFO - lr: 1.0845e-05 gnorm: 1.14 [18:49:35< 5:41:57] +[titan] 2025-10-05 17:23:57,506 - root - INFO - step: 30710 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 17:23:57,506 - root - INFO - lr: 1.0839e-05 gnorm: 1.17 [18:49:46< 5:41:45] +[titan] 2025-10-05 17:24:08,364 - root - INFO - step: 30715 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:24:08,364 - root - INFO - lr: 1.0833e-05 gnorm: 1.13 [18:49:57< 5:41:34] +[titan] 2025-10-05 17:24:19,332 - root - INFO - step: 30720 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 29,876 tflops: 414.48 mfu: 41.91% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7148 +[titan] 2025-10-05 17:24:19,332 - root - INFO - lr: 1.0827e-05 gnorm: 1.11 [18:50:08< 5:41:23] +[titan] 2025-10-05 17:24:19,518 - root - INFO - Dumping profiler traces at step 30720 +[titan] 2025-10-05 17:24:19,556 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:24:30,456 - root - INFO - step: 30725 loss: 2.0212 memory: 118.84GiB(85.28%) tps: 29,458 tflops: 408.69 mfu: 41.32% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7861 +[titan] 2025-10-05 17:24:30,456 - root - INFO - lr: 1.0821e-05 gnorm: 1.13 [18:50:19< 5:41:12] +[titan] 2025-10-05 17:24:41,338 - root - INFO - step: 30730 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 17:24:41,338 - root - INFO - lr: 1.0815e-05 gnorm: 1.11 [18:50:30< 5:41:01] +[titan] 2025-10-05 17:24:52,229 - root - INFO - step: 30735 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7376 +[titan] 2025-10-05 17:24:52,229 - root - INFO - lr: 1.0809e-05 gnorm: 1.15 [18:50:41< 5:40:50] +[titan] 2025-10-05 17:25:03,105 - root - INFO - step: 30740 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7807 +[titan] 2025-10-05 17:25:03,105 - root - INFO - lr: 1.0803e-05 gnorm: 1.23 [18:50:52< 5:40:39] +[titan] 2025-10-05 17:25:13,996 - root - INFO - step: 30745 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:25:13,996 - root - INFO - lr: 1.0797e-05 gnorm: 1.16 [18:51:03< 5:40:28] +[titan] 2025-10-05 17:25:22,692 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:25:24,892 - root - INFO - step: 30750 loss: 2.0403 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.8042 +[titan] 2025-10-05 17:25:24,892 - root - INFO - lr: 1.0791e-05 gnorm: 1.21 [18:51:13< 5:40:17] +[titan] 2025-10-05 17:25:35,755 - root - INFO - step: 30755 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 17:25:35,755 - root - INFO - lr: 1.0785e-05 gnorm: 1.15 [18:51:24< 5:40:06] +[titan] 2025-10-05 17:25:46,627 - root - INFO - step: 30760 loss: 1.9424 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:25:46,627 - root - INFO - lr: 1.0779e-05 gnorm: 1.14 [18:51:35< 5:39:55] +[titan] 2025-10-05 17:25:57,513 - root - INFO - step: 30765 loss: 2.0510 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8128 +[titan] 2025-10-05 17:25:57,513 - root - INFO - lr: 1.0773e-05 gnorm: 1.16 [18:51:46< 5:39:44] +[titan] 2025-10-05 17:26:08,369 - root - INFO - step: 30770 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:26:08,369 - root - INFO - lr: 1.0767e-05 gnorm: 1.15 [18:51:57< 5:39:33] +[titan] 2025-10-05 17:26:19,291 - root - INFO - step: 30775 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 17:26:19,291 - root - INFO - lr: 1.0761e-05 gnorm: 1.16 [18:52:08< 5:39:21] +[titan] 2025-10-05 17:26:30,180 - root - INFO - step: 30780 loss: 1.9939 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7628 +[titan] 2025-10-05 17:26:30,180 - root - INFO - lr: 1.0755e-05 gnorm: 1.14 [18:52:19< 5:39:10] +[titan] 2025-10-05 17:26:41,064 - root - INFO - step: 30785 loss: 2.0227 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2346 global_avg_mtp_loss: 1.7881 +[titan] 2025-10-05 17:26:41,065 - root - INFO - lr: 1.0749e-05 gnorm: 1.14 [18:52:30< 5:38:59] +[titan] 2025-10-05 17:26:51,961 - root - INFO - step: 30790 loss: 1.9654 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:26:51,961 - root - INFO - lr: 1.0743e-05 gnorm: 1.11 [18:52:40< 5:38:48] +[titan] 2025-10-05 17:27:02,841 - root - INFO - step: 30795 loss: 2.0724 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2406 global_avg_mtp_loss: 1.8318 +[titan] 2025-10-05 17:27:02,841 - root - INFO - lr: 1.0737e-05 gnorm: 1.16 [18:52:51< 5:38:37] +[titan] 2025-10-05 17:27:11,522 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:27:13,697 - root - INFO - step: 30800 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 17:27:13,697 - root - INFO - lr: 1.0731e-05 gnorm: 1.14 [18:53:02< 5:38:26] +[titan] 2025-10-05 17:27:24,566 - root - INFO - step: 30805 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7423 +[titan] 2025-10-05 17:27:24,566 - root - INFO - lr: 1.0725e-05 gnorm: 1.15 [18:53:13< 5:38:15] +[titan] 2025-10-05 17:27:35,469 - root - INFO - step: 30810 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 17:27:35,469 - root - INFO - lr: 1.0719e-05 gnorm: 1.14 [18:53:24< 5:38:04] +[titan] 2025-10-05 17:27:46,340 - root - INFO - step: 30815 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7459 +[titan] 2025-10-05 17:27:46,340 - root - INFO - lr: 1.0713e-05 gnorm: 1.16 [18:53:35< 5:37:53] +[titan] 2025-10-05 17:27:57,238 - root - INFO - step: 30820 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 17:27:57,239 - root - INFO - lr: 1.0707e-05 gnorm: 1.12 [18:53:46< 5:37:42] +[titan] 2025-10-05 17:28:08,105 - root - INFO - step: 30825 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 17:28:08,105 - root - INFO - lr: 1.0702e-05 gnorm: 1.12 [18:53:57< 5:37:31] +[titan] 2025-10-05 17:28:18,970 - root - INFO - step: 30830 loss: 1.8472 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6336 +[titan] 2025-10-05 17:28:18,971 - root - INFO - lr: 1.0696e-05 gnorm: 1.15 [18:54:07< 5:37:20] +[titan] 2025-10-05 17:28:29,843 - root - INFO - step: 30835 loss: 1.9792 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:28:29,844 - root - INFO - lr: 1.0690e-05 gnorm: 1.13 [18:54:18< 5:37:08] +[titan] 2025-10-05 17:28:40,744 - root - INFO - step: 30840 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 17:28:40,744 - root - INFO - lr: 1.0684e-05 gnorm: 1.13 [18:54:29< 5:36:57] +[titan] 2025-10-05 17:28:51,648 - root - INFO - step: 30845 loss: 1.9017 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6818 +[titan] 2025-10-05 17:28:51,648 - root - INFO - lr: 1.0678e-05 gnorm: 1.14 [18:54:40< 5:36:46] +[titan] 2025-10-05 17:29:00,360 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:29:02,544 - root - INFO - step: 30850 loss: 2.0352 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.23 mfu: 42.19% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7992 +[titan] 2025-10-05 17:29:02,544 - root - INFO - lr: 1.0672e-05 gnorm: 1.15 [18:54:51< 5:36:35] +[titan] 2025-10-05 17:29:13,430 - root - INFO - step: 30855 loss: 1.9892 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:13,431 - root - INFO - lr: 1.0666e-05 gnorm: 1.16 [18:55:02< 5:36:24] +[titan] 2025-10-05 17:29:24,310 - root - INFO - step: 30860 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 17:29:24,310 - root - INFO - lr: 1.0660e-05 gnorm: 1.12 [18:55:13< 5:36:13] +[titan] 2025-10-05 17:29:35,178 - root - INFO - step: 30865 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7596 +[titan] 2025-10-05 17:29:35,178 - root - INFO - lr: 1.0654e-05 gnorm: 1.16 [18:55:24< 5:36:02] +[titan] 2025-10-05 17:29:46,070 - root - INFO - step: 30870 loss: 2.0165 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 17:29:46,070 - root - INFO - lr: 1.0648e-05 gnorm: 1.13 [18:55:35< 5:35:51] +[titan] 2025-10-05 17:29:56,949 - root - INFO - step: 30875 loss: 1.9562 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7292 +[titan] 2025-10-05 17:29:56,949 - root - INFO - lr: 1.0642e-05 gnorm: 1.14 [18:55:45< 5:35:40] +[titan] 2025-10-05 17:30:07,804 - root - INFO - step: 30880 loss: 2.0097 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 17:30:07,804 - root - INFO - lr: 1.0636e-05 gnorm: 1.15 [18:55:56< 5:35:29] +[titan] 2025-10-05 17:30:18,658 - root - INFO - step: 30885 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7737 +[titan] 2025-10-05 17:30:18,658 - root - INFO - lr: 1.0630e-05 gnorm: 1.17 [18:56:07< 5:35:18] +[titan] 2025-10-05 17:30:29,536 - root - INFO - step: 30890 loss: 2.0095 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7768 +[titan] 2025-10-05 17:30:29,536 - root - INFO - lr: 1.0625e-05 gnorm: 1.16 [18:56:18< 5:35:07] +[titan] 2025-10-05 17:30:40,429 - root - INFO - step: 30895 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7591 +[titan] 2025-10-05 17:30:40,429 - root - INFO - lr: 1.0619e-05 gnorm: 1.14 [18:56:29< 5:34:55] +[titan] 2025-10-05 17:30:49,116 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:30:51,303 - root - INFO - step: 30900 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 17:30:51,303 - root - INFO - lr: 1.0613e-05 gnorm: 1.15 [18:56:40< 5:34:44] +[titan] 2025-10-05 17:31:02,242 - root - INFO - step: 30905 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 29,956 tflops: 415.59 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 17:31:02,242 - root - INFO - lr: 1.0607e-05 gnorm: 1.14 [18:56:51< 5:34:33] +[titan] 2025-10-05 17:31:13,130 - root - INFO - step: 30910 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 17:31:13,130 - root - INFO - lr: 1.0601e-05 gnorm: 1.25 [18:57:02< 5:34:22] +[titan] 2025-10-05 17:31:24,016 - root - INFO - step: 30915 loss: 2.0317 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7966 +[titan] 2025-10-05 17:31:24,017 - root - INFO - lr: 1.0595e-05 gnorm: 1.13 [18:57:13< 5:34:11] +[titan] 2025-10-05 17:31:34,902 - root - INFO - step: 30920 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 17:31:34,903 - root - INFO - lr: 1.0589e-05 gnorm: 1.11 [18:57:23< 5:34:00] +[titan] 2025-10-05 17:31:45,757 - root - INFO - step: 30925 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 17:31:45,758 - root - INFO - lr: 1.0583e-05 gnorm: 1.14 [18:57:34< 5:33:49] +[titan] 2025-10-05 17:31:56,639 - root - INFO - step: 30930 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:31:56,639 - root - INFO - lr: 1.0577e-05 gnorm: 1.15 [18:57:45< 5:33:38] +[titan] 2025-10-05 17:32:07,510 - root - INFO - step: 30935 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 17:32:07,510 - root - INFO - lr: 1.0572e-05 gnorm: 1.14 [18:57:56< 5:33:27] +[titan] 2025-10-05 17:32:18,361 - root - INFO - step: 30940 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 17:32:18,361 - root - INFO - lr: 1.0566e-05 gnorm: 1.16 [18:58:07< 5:33:16] +[titan] 2025-10-05 17:32:29,229 - root - INFO - step: 30945 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7757 +[titan] 2025-10-05 17:32:29,229 - root - INFO - lr: 1.0560e-05 gnorm: 1.17 [18:58:18< 5:33:05] +[titan] 2025-10-05 17:32:37,885 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:32:40,069 - root - INFO - step: 30950 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:32:40,069 - root - INFO - lr: 1.0554e-05 gnorm: 1.13 [18:58:29< 5:32:54] +[titan] 2025-10-05 17:32:50,918 - root - INFO - step: 30955 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 17:32:50,918 - root - INFO - lr: 1.0548e-05 gnorm: 1.15 [18:58:39< 5:32:42] +[titan] 2025-10-05 17:33:01,839 - root - INFO - step: 30960 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6694 +[titan] 2025-10-05 17:33:01,840 - root - INFO - lr: 1.0542e-05 gnorm: 1.11 [18:58:50< 5:32:31] +[titan] 2025-10-05 17:33:12,698 - root - INFO - step: 30965 loss: 1.9487 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 17:33:12,698 - root - INFO - lr: 1.0536e-05 gnorm: 1.13 [18:59:01< 5:32:20] +[titan] 2025-10-05 17:33:23,587 - root - INFO - step: 30970 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7005 +[titan] 2025-10-05 17:33:23,587 - root - INFO - lr: 1.0530e-05 gnorm: 1.17 [18:59:12< 5:32:09] +[titan] 2025-10-05 17:33:34,467 - root - INFO - step: 30975 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 17:33:34,468 - root - INFO - lr: 1.0525e-05 gnorm: 1.19 [18:59:23< 5:31:58] +[titan] 2025-10-05 17:33:45,329 - root - INFO - step: 30980 loss: 1.9870 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 17:33:45,329 - root - INFO - lr: 1.0519e-05 gnorm: 1.21 [18:59:34< 5:31:47] +[titan] 2025-10-05 17:33:56,227 - root - INFO - step: 30985 loss: 1.9978 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7673 +[titan] 2025-10-05 17:33:56,227 - root - INFO - lr: 1.0513e-05 gnorm: 1.14 [18:59:45< 5:31:36] +[titan] 2025-10-05 17:34:07,068 - root - INFO - step: 30990 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 17:34:07,068 - root - INFO - lr: 1.0507e-05 gnorm: 1.14 [18:59:56< 5:31:25] +[titan] 2025-10-05 17:34:17,920 - root - INFO - step: 30995 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 17:34:17,920 - root - INFO - lr: 1.0501e-05 gnorm: 1.13 [19:00:06< 5:31:14] +[titan] 2025-10-05 17:34:26,599 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:34:28,785 - root - INFO - step: 31000 loss: 2.0063 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 17:34:28,785 - root - INFO - lr: 1.0495e-05 gnorm: 1.14 [19:00:17< 5:31:03] +[titan] 2025-10-05 17:34:39,677 - root - INFO - step: 31005 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 17:34:39,677 - root - INFO - lr: 1.0490e-05 gnorm: 1.12 [19:00:28< 5:30:52] +[titan] 2025-10-05 17:34:50,557 - root - INFO - step: 31010 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 17:34:50,557 - root - INFO - lr: 1.0484e-05 gnorm: 1.13 [19:00:39< 5:30:41] +[titan] 2025-10-05 17:35:01,441 - root - INFO - step: 31015 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 17:35:01,441 - root - INFO - lr: 1.0478e-05 gnorm: 1.11 [19:00:50< 5:30:29] +[titan] 2025-10-05 17:35:12,298 - root - INFO - step: 31020 loss: 2.0015 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7672 +[titan] 2025-10-05 17:35:12,298 - root - INFO - lr: 1.0472e-05 gnorm: 1.13 [19:01:01< 5:30:18] +[titan] 2025-10-05 17:35:23,148 - root - INFO - step: 31025 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 17:35:23,149 - root - INFO - lr: 1.0466e-05 gnorm: 1.19 [19:01:12< 5:30:07] +[titan] 2025-10-05 17:35:34,041 - root - INFO - step: 31030 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:35:34,042 - root - INFO - lr: 1.0460e-05 gnorm: 1.14 [19:01:23< 5:29:56] +[titan] 2025-10-05 17:35:44,917 - root - INFO - step: 31035 loss: 2.0130 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 17:35:44,917 - root - INFO - lr: 1.0455e-05 gnorm: 1.15 [19:01:33< 5:29:45] +[titan] 2025-10-05 17:35:55,789 - root - INFO - step: 31040 loss: 1.9464 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 17:35:55,789 - root - INFO - lr: 1.0449e-05 gnorm: 1.14 [19:01:44< 5:29:34] +[titan] 2025-10-05 17:36:06,662 - root - INFO - step: 31045 loss: 1.9399 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 17:36:06,663 - root - INFO - lr: 1.0443e-05 gnorm: 1.12 [19:01:55< 5:29:23] +[titan] 2025-10-05 17:36:15,351 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:36:17,541 - root - INFO - step: 31050 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 17:36:17,541 - root - INFO - lr: 1.0437e-05 gnorm: 1.15 [19:02:06< 5:29:12] +[titan] 2025-10-05 17:36:28,426 - root - INFO - step: 31055 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:36:28,426 - root - INFO - lr: 1.0431e-05 gnorm: 1.15 [19:02:17< 5:29:01] +[titan] 2025-10-05 17:36:39,289 - root - INFO - step: 31060 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 17:36:39,290 - root - INFO - lr: 1.0425e-05 gnorm: 1.14 [19:02:28< 5:28:50] +[titan] 2025-10-05 17:36:50,187 - root - INFO - step: 31065 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7602 +[titan] 2025-10-05 17:36:50,187 - root - INFO - lr: 1.0420e-05 gnorm: 1.16 [19:02:39< 5:28:39] +[titan] 2025-10-05 17:37:01,103 - root - INFO - step: 31070 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 17:37:01,103 - root - INFO - lr: 1.0414e-05 gnorm: 1.19 [19:02:50< 5:28:28] +[titan] 2025-10-05 17:37:11,969 - root - INFO - step: 31075 loss: 2.0021 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7699 +[titan] 2025-10-05 17:37:11,969 - root - INFO - lr: 1.0408e-05 gnorm: 1.16 [19:03:00< 5:28:17] +[titan] 2025-10-05 17:37:22,843 - root - INFO - step: 31080 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 17:37:22,843 - root - INFO - lr: 1.0402e-05 gnorm: 1.15 [19:03:11< 5:28:05] +[titan] 2025-10-05 17:37:33,710 - root - INFO - step: 31085 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 17:37:33,710 - root - INFO - lr: 1.0396e-05 gnorm: 1.14 [19:03:22< 5:27:54] +[titan] 2025-10-05 17:37:44,589 - root - INFO - step: 31090 loss: 2.0842 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2413 global_avg_mtp_loss: 1.8429 +[titan] 2025-10-05 17:37:44,589 - root - INFO - lr: 1.0391e-05 gnorm: 1.15 [19:03:33< 5:27:43] +[titan] 2025-10-05 17:37:55,476 - root - INFO - step: 31095 loss: 1.9001 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6797 +[titan] 2025-10-05 17:37:55,476 - root - INFO - lr: 1.0385e-05 gnorm: 1.14 [19:03:44< 5:27:32] +[titan] 2025-10-05 17:38:04,188 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:38:06,373 - root - INFO - step: 31100 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7429 +[titan] 2025-10-05 17:38:06,373 - root - INFO - lr: 1.0379e-05 gnorm: 1.18 [19:03:55< 5:27:21] +[titan] 2025-10-05 17:38:17,276 - root - INFO - step: 31105 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:38:17,277 - root - INFO - lr: 1.0373e-05 gnorm: 1.12 [19:04:06< 5:27:10] +[titan] 2025-10-05 17:38:28,149 - root - INFO - step: 31110 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 17:38:28,149 - root - INFO - lr: 1.0367e-05 gnorm: 1.11 [19:04:17< 5:26:59] +[titan] 2025-10-05 17:38:39,025 - root - INFO - step: 31115 loss: 1.9815 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 17:38:39,025 - root - INFO - lr: 1.0362e-05 gnorm: 1.15 [19:04:28< 5:26:48] +[titan] 2025-10-05 17:38:49,892 - root - INFO - step: 31120 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 17:38:49,892 - root - INFO - lr: 1.0356e-05 gnorm: 1.13 [19:04:38< 5:26:37] +[titan] 2025-10-05 17:39:00,809 - root - INFO - step: 31125 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 17:39:00,809 - root - INFO - lr: 1.0350e-05 gnorm: 1.12 [19:04:49< 5:26:26] +[titan] 2025-10-05 17:39:11,704 - root - INFO - step: 31130 loss: 2.0458 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2358 global_avg_mtp_loss: 1.8101 +[titan] 2025-10-05 17:39:11,705 - root - INFO - lr: 1.0344e-05 gnorm: 1.13 [19:05:00< 5:26:15] +[titan] 2025-10-05 17:39:22,594 - root - INFO - step: 31135 loss: 2.0148 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7817 +[titan] 2025-10-05 17:39:22,595 - root - INFO - lr: 1.0339e-05 gnorm: 1.19 [19:05:11< 5:26:04] +[titan] 2025-10-05 17:39:33,466 - root - INFO - step: 31140 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7578 +[titan] 2025-10-05 17:39:33,467 - root - INFO - lr: 1.0333e-05 gnorm: 1.15 [19:05:22< 5:25:53] +[titan] 2025-10-05 17:39:44,337 - root - INFO - step: 31145 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 17:39:44,337 - root - INFO - lr: 1.0327e-05 gnorm: 1.16 [19:05:33< 5:25:41] +[titan] 2025-10-05 17:39:53,020 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:39:55,199 - root - INFO - step: 31150 loss: 2.0088 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7763 +[titan] 2025-10-05 17:39:55,199 - root - INFO - lr: 1.0321e-05 gnorm: 1.14 [19:05:44< 5:25:30] +[titan] 2025-10-05 17:40:06,057 - root - INFO - step: 31155 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 17:40:06,057 - root - INFO - lr: 1.0315e-05 gnorm: 1.17 [19:05:55< 5:25:19] +[titan] 2025-10-05 17:40:16,910 - root - INFO - step: 31160 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 17:40:16,910 - root - INFO - lr: 1.0310e-05 gnorm: 1.10 [19:06:05< 5:25:08] +[titan] 2025-10-05 17:40:27,753 - root - INFO - step: 31165 loss: 1.8951 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.26 mfu: 42.39% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6755 +[titan] 2025-10-05 17:40:27,753 - root - INFO - lr: 1.0304e-05 gnorm: 1.16 [19:06:16< 5:24:57] +[titan] 2025-10-05 17:40:38,617 - root - INFO - step: 31170 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 17:40:38,617 - root - INFO - lr: 1.0298e-05 gnorm: 1.17 [19:06:27< 5:24:46] +[titan] 2025-10-05 17:40:49,491 - root - INFO - step: 31175 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7707 +[titan] 2025-10-05 17:40:49,491 - root - INFO - lr: 1.0292e-05 gnorm: 1.18 [19:06:38< 5:24:35] +[titan] 2025-10-05 17:41:00,364 - root - INFO - step: 31180 loss: 2.0114 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 17:41:00,365 - root - INFO - lr: 1.0287e-05 gnorm: 1.12 [19:06:49< 5:24:24] +[titan] 2025-10-05 17:41:11,255 - root - INFO - step: 31185 loss: 2.0026 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7701 +[titan] 2025-10-05 17:41:11,255 - root - INFO - lr: 1.0281e-05 gnorm: 1.19 [19:07:00< 5:24:13] +[titan] 2025-10-05 17:41:22,116 - root - INFO - step: 31190 loss: 1.9868 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7559 +[titan] 2025-10-05 17:41:22,116 - root - INFO - lr: 1.0275e-05 gnorm: 1.10 [19:07:11< 5:24:02] +[titan] 2025-10-05 17:41:32,999 - root - INFO - step: 31195 loss: 1.9088 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 17:41:32,999 - root - INFO - lr: 1.0269e-05 gnorm: 1.13 [19:07:21< 5:23:51] +[titan] 2025-10-05 17:41:41,710 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:41:43,892 - root - INFO - step: 31200 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 17:41:43,893 - root - INFO - lr: 1.0264e-05 gnorm: 1.13 [19:07:32< 5:23:40] +[titan] 2025-10-05 17:41:54,767 - root - INFO - step: 31205 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7842 +[titan] 2025-10-05 17:41:54,767 - root - INFO - lr: 1.0258e-05 gnorm: 1.13 [19:07:43< 5:23:28] +[titan] 2025-10-05 17:42:05,616 - root - INFO - step: 31210 loss: 1.9827 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 17:42:05,617 - root - INFO - lr: 1.0252e-05 gnorm: 1.12 [19:07:54< 5:23:17] +[titan] 2025-10-05 17:42:16,473 - root - INFO - step: 31215 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:42:16,473 - root - INFO - lr: 1.0247e-05 gnorm: 1.17 [19:08:05< 5:23:06] +[titan] 2025-10-05 17:42:27,363 - root - INFO - step: 31220 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7503 +[titan] 2025-10-05 17:42:27,363 - root - INFO - lr: 1.0241e-05 gnorm: 1.16 [19:08:16< 5:22:55] +[titan] 2025-10-05 17:42:38,236 - root - INFO - step: 31225 loss: 1.8762 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 17:42:38,237 - root - INFO - lr: 1.0235e-05 gnorm: 1.18 [19:08:27< 5:22:44] +[titan] 2025-10-05 17:42:49,232 - root - INFO - step: 31230 loss: 2.0595 memory: 118.84GiB(85.28%) tps: 29,801 tflops: 413.45 mfu: 41.80% global_avg_ntp_loss: 0.2390 global_avg_mtp_loss: 1.8206 +[titan] 2025-10-05 17:42:49,232 - root - INFO - lr: 1.0229e-05 gnorm: 1.23 [19:08:38< 5:22:33] +[titan] 2025-10-05 17:42:53,763 - root - INFO - Dumping profiler traces at step 31232 +[titan] 2025-10-05 17:42:53,803 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 17:43:00,360 - root - INFO - step: 31235 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 29,447 tflops: 408.54 mfu: 41.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 17:43:00,360 - root - INFO - lr: 1.0224e-05 gnorm: 1.10 [19:08:49< 5:22:22] +[titan] 2025-10-05 17:43:11,236 - root - INFO - step: 31240 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 17:43:11,236 - root - INFO - lr: 1.0218e-05 gnorm: 1.12 [19:09:00< 5:22:11] +[titan] 2025-10-05 17:43:22,106 - root - INFO - step: 31245 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 17:43:22,106 - root - INFO - lr: 1.0212e-05 gnorm: 1.14 [19:09:11< 5:22:00] +[titan] 2025-10-05 17:43:30,797 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:43:32,976 - root - INFO - step: 31250 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 17:43:32,977 - root - INFO - lr: 1.0207e-05 gnorm: 1.15 [19:09:21< 5:21:49] +[titan] 2025-10-05 17:43:43,850 - root - INFO - step: 31255 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 17:43:43,850 - root - INFO - lr: 1.0201e-05 gnorm: 1.17 [19:09:32< 5:21:38] +[titan] 2025-10-05 17:43:54,726 - root - INFO - step: 31260 loss: 2.0422 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:43:54,727 - root - INFO - lr: 1.0195e-05 gnorm: 1.16 [19:09:43< 5:21:27] +[titan] 2025-10-05 17:44:05,648 - root - INFO - step: 31265 loss: 1.9793 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 17:44:05,649 - root - INFO - lr: 1.0189e-05 gnorm: 1.18 [19:09:54< 5:21:16] +[titan] 2025-10-05 17:44:16,493 - root - INFO - step: 31270 loss: 1.9624 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7352 +[titan] 2025-10-05 17:44:16,493 - root - INFO - lr: 1.0184e-05 gnorm: 1.11 [19:10:05< 5:21:05] +[titan] 2025-10-05 17:44:27,352 - root - INFO - step: 31275 loss: 1.9671 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 17:44:27,352 - root - INFO - lr: 1.0178e-05 gnorm: 1.17 [19:10:16< 5:20:53] +[titan] 2025-10-05 17:44:38,191 - root - INFO - step: 31280 loss: 1.9559 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 17:44:38,191 - root - INFO - lr: 1.0172e-05 gnorm: 1.11 [19:10:27< 5:20:42] +[titan] 2025-10-05 17:44:49,058 - root - INFO - step: 31285 loss: 2.0070 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 17:44:49,058 - root - INFO - lr: 1.0167e-05 gnorm: 1.16 [19:10:38< 5:20:31] +[titan] 2025-10-05 17:44:59,922 - root - INFO - step: 31290 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 17:44:59,922 - root - INFO - lr: 1.0161e-05 gnorm: 1.16 [19:10:48< 5:20:20] +[titan] 2025-10-05 17:45:10,831 - root - INFO - step: 31295 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7252 +[titan] 2025-10-05 17:45:10,831 - root - INFO - lr: 1.0155e-05 gnorm: 1.14 [19:10:59< 5:20:09] +[titan] 2025-10-05 17:45:19,500 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:45:21,690 - root - INFO - step: 31300 loss: 2.0418 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.8055 +[titan] 2025-10-05 17:45:21,690 - root - INFO - lr: 1.0150e-05 gnorm: 1.13 [19:11:10< 5:19:58] +[titan] 2025-10-05 17:45:32,589 - root - INFO - step: 31305 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7413 +[titan] 2025-10-05 17:45:32,590 - root - INFO - lr: 1.0144e-05 gnorm: 1.14 [19:11:21< 5:19:47] +[titan] 2025-10-05 17:45:43,458 - root - INFO - step: 31310 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 17:45:43,459 - root - INFO - lr: 1.0138e-05 gnorm: 1.16 [19:11:32< 5:19:36] +[titan] 2025-10-05 17:45:54,309 - root - INFO - step: 31315 loss: 1.9913 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7610 +[titan] 2025-10-05 17:45:54,309 - root - INFO - lr: 1.0133e-05 gnorm: 1.15 [19:11:43< 5:19:25] +[titan] 2025-10-05 17:46:05,142 - root - INFO - step: 31320 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,248 tflops: 419.65 mfu: 42.43% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 17:46:05,143 - root - INFO - lr: 1.0127e-05 gnorm: 1.11 [19:11:54< 5:19:14] +[titan] 2025-10-05 17:46:16,012 - root - INFO - step: 31325 loss: 2.0290 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2352 global_avg_mtp_loss: 1.7938 +[titan] 2025-10-05 17:46:16,013 - root - INFO - lr: 1.0121e-05 gnorm: 1.22 [19:12:04< 5:19:03] +[titan] 2025-10-05 17:46:26,886 - root - INFO - step: 31330 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 17:46:26,886 - root - INFO - lr: 1.0116e-05 gnorm: 1.16 [19:12:15< 5:18:52] +[titan] 2025-10-05 17:46:37,770 - root - INFO - step: 31335 loss: 1.9931 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7631 +[titan] 2025-10-05 17:46:37,770 - root - INFO - lr: 1.0110e-05 gnorm: 1.14 [19:12:26< 5:18:40] +[titan] 2025-10-05 17:46:48,608 - root - INFO - step: 31340 loss: 1.9680 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 17:46:48,608 - root - INFO - lr: 1.0104e-05 gnorm: 1.12 [19:12:37< 5:18:29] +[titan] 2025-10-05 17:46:59,446 - root - INFO - step: 31345 loss: 1.9908 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 17:46:59,446 - root - INFO - lr: 1.0099e-05 gnorm: 1.14 [19:12:48< 5:18:18] +[titan] 2025-10-05 17:47:08,126 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:47:10,307 - root - INFO - step: 31350 loss: 2.0078 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7753 +[titan] 2025-10-05 17:47:10,307 - root - INFO - lr: 1.0093e-05 gnorm: 1.18 [19:12:59< 5:18:07] +[titan] 2025-10-05 17:47:21,149 - root - INFO - step: 31355 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 17:47:21,149 - root - INFO - lr: 1.0087e-05 gnorm: 1.14 [19:13:10< 5:17:56] +[titan] 2025-10-05 17:47:32,020 - root - INFO - step: 31360 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 17:47:32,020 - root - INFO - lr: 1.0082e-05 gnorm: 1.16 [19:13:20< 5:17:45] +[titan] 2025-10-05 17:47:42,860 - root - INFO - step: 31365 loss: 2.0383 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.8023 +[titan] 2025-10-05 17:47:42,861 - root - INFO - lr: 1.0076e-05 gnorm: 1.15 [19:13:31< 5:17:34] +[titan] 2025-10-05 17:47:53,707 - root - INFO - step: 31370 loss: 2.0511 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2379 global_avg_mtp_loss: 1.8132 +[titan] 2025-10-05 17:47:53,708 - root - INFO - lr: 1.0070e-05 gnorm: 1.16 [19:13:42< 5:17:23] +[titan] 2025-10-05 17:48:04,561 - root - INFO - step: 31375 loss: 2.0222 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7875 +[titan] 2025-10-05 17:48:04,561 - root - INFO - lr: 1.0065e-05 gnorm: 1.20 [19:13:53< 5:17:12] +[titan] 2025-10-05 17:48:15,405 - root - INFO - step: 31380 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7172 +[titan] 2025-10-05 17:48:15,405 - root - INFO - lr: 1.0059e-05 gnorm: 1.15 [19:14:04< 5:17:01] +[titan] 2025-10-05 17:48:26,264 - root - INFO - step: 31385 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 17:48:26,264 - root - INFO - lr: 1.0053e-05 gnorm: 1.15 [19:14:15< 5:16:50] +[titan] 2025-10-05 17:48:37,141 - root - INFO - step: 31390 loss: 2.0082 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 17:48:37,141 - root - INFO - lr: 1.0048e-05 gnorm: 1.19 [19:14:26< 5:16:39] +[titan] 2025-10-05 17:48:47,988 - root - INFO - step: 31395 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7658 +[titan] 2025-10-05 17:48:47,988 - root - INFO - lr: 1.0042e-05 gnorm: 1.17 [19:14:36< 5:16:27] +[titan] 2025-10-05 17:48:56,653 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:48:58,841 - root - INFO - step: 31400 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 17:48:58,841 - root - INFO - lr: 1.0036e-05 gnorm: 1.19 [19:14:47< 5:16:16] +[titan] 2025-10-05 17:49:09,687 - root - INFO - step: 31405 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:49:09,687 - root - INFO - lr: 1.0031e-05 gnorm: 1.19 [19:14:58< 5:16:05] +[titan] 2025-10-05 17:49:20,527 - root - INFO - step: 31410 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 17:49:20,527 - root - INFO - lr: 1.0025e-05 gnorm: 1.12 [19:15:09< 5:15:54] +[titan] 2025-10-05 17:49:31,368 - root - INFO - step: 31415 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:49:31,368 - root - INFO - lr: 1.0020e-05 gnorm: 1.12 [19:15:20< 5:15:43] +[titan] 2025-10-05 17:49:42,213 - root - INFO - step: 31420 loss: 1.9250 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7033 +[titan] 2025-10-05 17:49:42,213 - root - INFO - lr: 1.0014e-05 gnorm: 1.15 [19:15:31< 5:15:32] +[titan] 2025-10-05 17:49:53,106 - root - INFO - step: 31425 loss: 1.9352 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 17:49:53,106 - root - INFO - lr: 1.0008e-05 gnorm: 1.16 [19:15:42< 5:15:21] +[titan] 2025-10-05 17:50:03,949 - root - INFO - step: 31430 loss: 1.9804 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:50:03,949 - root - INFO - lr: 1.0003e-05 gnorm: 1.17 [19:15:52< 5:15:10] +[titan] 2025-10-05 17:50:14,831 - root - INFO - step: 31435 loss: 1.9571 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 17:50:14,832 - root - INFO - lr: 9.9971e-06 gnorm: 1.15 [19:16:03< 5:14:59] +[titan] 2025-10-05 17:50:25,694 - root - INFO - step: 31440 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 17:50:25,694 - root - INFO - lr: 9.9915e-06 gnorm: 1.18 [19:16:14< 5:14:48] +[titan] 2025-10-05 17:50:36,538 - root - INFO - step: 31445 loss: 2.0007 memory: 118.84GiB(85.28%) tps: 30,220 tflops: 419.25 mfu: 42.39% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 17:50:36,538 - root - INFO - lr: 9.9859e-06 gnorm: 2.11 [19:16:25< 5:14:37] +[titan] 2025-10-05 17:50:45,222 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:50:47,411 - root - INFO - step: 31450 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 17:50:47,411 - root - INFO - lr: 9.9803e-06 gnorm: 1.16 [19:16:36< 5:14:26] +[titan] 2025-10-05 17:50:58,315 - root - INFO - step: 31455 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7414 +[titan] 2025-10-05 17:50:58,315 - root - INFO - lr: 9.9747e-06 gnorm: 1.15 [19:16:47< 5:14:15] +[titan] 2025-10-05 17:51:09,156 - root - INFO - step: 31460 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7552 +[titan] 2025-10-05 17:51:09,156 - root - INFO - lr: 9.9691e-06 gnorm: 1.20 [19:16:58< 5:14:03] +[titan] 2025-10-05 17:51:20,027 - root - INFO - step: 31465 loss: 2.0529 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8147 +[titan] 2025-10-05 17:51:20,027 - root - INFO - lr: 9.9635e-06 gnorm: 1.21 [19:17:08< 5:13:52] +[titan] 2025-10-05 17:51:30,891 - root - INFO - step: 31470 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 17:51:30,891 - root - INFO - lr: 9.9579e-06 gnorm: 1.16 [19:17:19< 5:13:41] +[titan] 2025-10-05 17:51:41,738 - root - INFO - step: 31475 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 17:51:41,739 - root - INFO - lr: 9.9524e-06 gnorm: 1.16 [19:17:30< 5:13:30] +[titan] 2025-10-05 17:51:52,590 - root - INFO - step: 31480 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 17:51:52,590 - root - INFO - lr: 9.9468e-06 gnorm: 1.15 [19:17:41< 5:13:19] +[titan] 2025-10-05 17:52:03,461 - root - INFO - step: 31485 loss: 2.0228 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7888 +[titan] 2025-10-05 17:52:03,461 - root - INFO - lr: 9.9412e-06 gnorm: 1.15 [19:17:52< 5:13:08] +[titan] 2025-10-05 17:52:14,350 - root - INFO - step: 31490 loss: 2.0356 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7990 +[titan] 2025-10-05 17:52:14,350 - root - INFO - lr: 9.9356e-06 gnorm: 1.18 [19:18:03< 5:12:57] +[titan] 2025-10-05 17:52:25,212 - root - INFO - step: 31495 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 17:52:25,212 - root - INFO - lr: 9.9300e-06 gnorm: 1.13 [19:18:14< 5:12:46] +[titan] 2025-10-05 17:52:33,875 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:52:36,054 - root - INFO - step: 31500 loss: 2.0209 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2366 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 17:52:36,054 - root - INFO - lr: 9.9245e-06 gnorm: 1.18 [19:18:24< 5:12:35] +[titan] 2025-10-05 17:52:46,921 - root - INFO - step: 31505 loss: 1.9036 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 17:52:46,921 - root - INFO - lr: 9.9189e-06 gnorm: 1.12 [19:18:35< 5:12:24] +[titan] 2025-10-05 17:52:57,775 - root - INFO - step: 31510 loss: 1.9907 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7599 +[titan] 2025-10-05 17:52:57,775 - root - INFO - lr: 9.9133e-06 gnorm: 1.14 [19:18:46< 5:12:13] +[titan] 2025-10-05 17:53:08,630 - root - INFO - step: 31515 loss: 1.8954 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6762 +[titan] 2025-10-05 17:53:08,630 - root - INFO - lr: 9.9078e-06 gnorm: 1.14 [19:18:57< 5:12:02] +[titan] 2025-10-05 17:53:19,545 - root - INFO - step: 31520 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 17:53:19,545 - root - INFO - lr: 9.9022e-06 gnorm: 1.13 [19:19:08< 5:11:51] +[titan] 2025-10-05 17:53:30,414 - root - INFO - step: 31525 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 17:53:30,414 - root - INFO - lr: 9.8966e-06 gnorm: 1.16 [19:19:19< 5:11:39] +[titan] 2025-10-05 17:53:41,302 - root - INFO - step: 31530 loss: 2.0652 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2385 global_avg_mtp_loss: 1.8267 +[titan] 2025-10-05 17:53:41,302 - root - INFO - lr: 9.8911e-06 gnorm: 1.16 [19:19:30< 5:11:28] +[titan] 2025-10-05 17:53:52,151 - root - INFO - step: 31535 loss: 2.0131 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2360 global_avg_mtp_loss: 1.7770 +[titan] 2025-10-05 17:53:52,151 - root - INFO - lr: 9.8855e-06 gnorm: 1.18 [19:19:41< 5:11:17] +[titan] 2025-10-05 17:54:03,009 - root - INFO - step: 31540 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 17:54:03,009 - root - INFO - lr: 9.8800e-06 gnorm: 1.19 [19:19:51< 5:11:06] +[titan] 2025-10-05 17:54:13,869 - root - INFO - step: 31545 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6744 +[titan] 2025-10-05 17:54:13,869 - root - INFO - lr: 9.8744e-06 gnorm: 1.15 [19:20:02< 5:10:55] +[titan] 2025-10-05 17:54:22,536 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:54:24,751 - root - INFO - step: 31550 loss: 2.0225 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2348 global_avg_mtp_loss: 1.7877 +[titan] 2025-10-05 17:54:24,751 - root - INFO - lr: 9.8689e-06 gnorm: 1.23 [19:20:13< 5:10:44] +[titan] 2025-10-05 17:54:35,610 - root - INFO - step: 31555 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 17:54:35,610 - root - INFO - lr: 9.8633e-06 gnorm: 1.15 [19:20:24< 5:10:33] +[titan] 2025-10-05 17:54:46,473 - root - INFO - step: 31560 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7584 +[titan] 2025-10-05 17:54:46,473 - root - INFO - lr: 9.8578e-06 gnorm: 1.13 [19:20:35< 5:10:22] +[titan] 2025-10-05 17:54:57,341 - root - INFO - step: 31565 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 17:54:57,341 - root - INFO - lr: 9.8523e-06 gnorm: 1.14 [19:20:46< 5:10:11] +[titan] 2025-10-05 17:55:08,193 - root - INFO - step: 31570 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 17:55:08,193 - root - INFO - lr: 9.8467e-06 gnorm: 1.13 [19:20:57< 5:10:00] +[titan] 2025-10-05 17:55:19,059 - root - INFO - step: 31575 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 17:55:19,059 - root - INFO - lr: 9.8412e-06 gnorm: 1.16 [19:21:07< 5:09:49] +[titan] 2025-10-05 17:55:29,929 - root - INFO - step: 31580 loss: 2.0143 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7811 +[titan] 2025-10-05 17:55:29,929 - root - INFO - lr: 9.8357e-06 gnorm: 1.16 [19:21:18< 5:09:38] +[titan] 2025-10-05 17:55:40,810 - root - INFO - step: 31585 loss: 1.9768 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 17:55:40,810 - root - INFO - lr: 9.8301e-06 gnorm: 1.18 [19:21:29< 5:09:27] +[titan] 2025-10-05 17:55:51,674 - root - INFO - step: 31590 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 17:55:51,674 - root - INFO - lr: 9.8246e-06 gnorm: 1.14 [19:21:40< 5:09:15] +[titan] 2025-10-05 17:56:02,575 - root - INFO - step: 31595 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 17:56:02,575 - root - INFO - lr: 9.8191e-06 gnorm: 1.17 [19:21:51< 5:09:04] +[titan] 2025-10-05 17:56:11,262 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:56:13,445 - root - INFO - step: 31600 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 17:56:13,445 - root - INFO - lr: 9.8136e-06 gnorm: 1.13 [19:22:02< 5:08:53] +[titan] 2025-10-05 17:56:24,318 - root - INFO - step: 31605 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.28% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 17:56:24,318 - root - INFO - lr: 9.8081e-06 gnorm: 1.14 [19:22:13< 5:08:42] +[titan] 2025-10-05 17:56:35,171 - root - INFO - step: 31610 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 17:56:35,172 - root - INFO - lr: 9.8025e-06 gnorm: 1.19 [19:22:24< 5:08:31] +[titan] 2025-10-05 17:56:46,086 - root - INFO - step: 31615 loss: 2.0603 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.51 mfu: 42.11% global_avg_ntp_loss: 0.2386 global_avg_mtp_loss: 1.8217 +[titan] 2025-10-05 17:56:46,087 - root - INFO - lr: 9.7970e-06 gnorm: 1.20 [19:22:35< 5:08:20] +[titan] 2025-10-05 17:56:56,970 - root - INFO - step: 31620 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7478 +[titan] 2025-10-05 17:56:56,970 - root - INFO - lr: 9.7915e-06 gnorm: 1.15 [19:22:45< 5:08:09] +[titan] 2025-10-05 17:57:07,861 - root - INFO - step: 31625 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 17:57:07,862 - root - INFO - lr: 9.7860e-06 gnorm: 1.16 [19:22:56< 5:07:58] +[titan] 2025-10-05 17:57:18,731 - root - INFO - step: 31630 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 17:57:18,731 - root - INFO - lr: 9.7805e-06 gnorm: 1.15 [19:23:07< 5:07:47] +[titan] 2025-10-05 17:57:29,588 - root - INFO - step: 31635 loss: 1.9949 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7639 +[titan] 2025-10-05 17:57:29,588 - root - INFO - lr: 9.7750e-06 gnorm: 1.13 [19:23:18< 5:07:36] +[titan] 2025-10-05 17:57:40,435 - root - INFO - step: 31640 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7360 +[titan] 2025-10-05 17:57:40,435 - root - INFO - lr: 9.7695e-06 gnorm: 1.15 [19:23:29< 5:07:25] +[titan] 2025-10-05 17:57:51,307 - root - INFO - step: 31645 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 17:57:51,307 - root - INFO - lr: 9.7640e-06 gnorm: 1.15 [19:23:40< 5:07:14] +[titan] 2025-10-05 17:58:00,031 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:58:02,211 - root - INFO - step: 31650 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.92 mfu: 42.16% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 17:58:02,211 - root - INFO - lr: 9.7585e-06 gnorm: 1.14 [19:23:51< 5:07:03] +[titan] 2025-10-05 17:58:13,078 - root - INFO - step: 31655 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 17:58:13,078 - root - INFO - lr: 9.7531e-06 gnorm: 1.13 [19:24:02< 5:06:51] +[titan] 2025-10-05 17:58:23,966 - root - INFO - step: 31660 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 17:58:23,966 - root - INFO - lr: 9.7476e-06 gnorm: 1.16 [19:24:12< 5:06:40] +[titan] 2025-10-05 17:58:34,856 - root - INFO - step: 31665 loss: 1.9619 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 17:58:34,857 - root - INFO - lr: 9.7421e-06 gnorm: 1.15 [19:24:23< 5:06:29] +[titan] 2025-10-05 17:58:45,708 - root - INFO - step: 31670 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7203 +[titan] 2025-10-05 17:58:45,708 - root - INFO - lr: 9.7366e-06 gnorm: 1.17 [19:24:34< 5:06:18] +[titan] 2025-10-05 17:58:56,585 - root - INFO - step: 31675 loss: 1.9360 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7121 +[titan] 2025-10-05 17:58:56,586 - root - INFO - lr: 9.7311e-06 gnorm: 1.19 [19:24:45< 5:06:07] +[titan] 2025-10-05 17:59:07,478 - root - INFO - step: 31680 loss: 2.0159 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7829 +[titan] 2025-10-05 17:59:07,478 - root - INFO - lr: 9.7257e-06 gnorm: 1.17 [19:24:56< 5:05:56] +[titan] 2025-10-05 17:59:18,333 - root - INFO - step: 31685 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 17:59:18,333 - root - INFO - lr: 9.7202e-06 gnorm: 1.14 [19:25:07< 5:05:45] +[titan] 2025-10-05 17:59:29,202 - root - INFO - step: 31690 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 17:59:29,202 - root - INFO - lr: 9.7147e-06 gnorm: 1.15 [19:25:18< 5:05:34] +[titan] 2025-10-05 17:59:40,087 - root - INFO - step: 31695 loss: 2.0267 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2351 global_avg_mtp_loss: 1.7916 +[titan] 2025-10-05 17:59:40,087 - root - INFO - lr: 9.7093e-06 gnorm: 1.18 [19:25:29< 5:05:23] +[titan] 2025-10-05 17:59:48,803 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 17:59:50,987 - root - INFO - step: 31700 loss: 2.0901 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2436 global_avg_mtp_loss: 1.8465 +[titan] 2025-10-05 17:59:50,987 - root - INFO - lr: 9.7038e-06 gnorm: 1.21 [19:25:39< 5:05:12] +[titan] 2025-10-05 18:00:01,873 - root - INFO - step: 31705 loss: 2.0430 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2374 global_avg_mtp_loss: 1.8056 +[titan] 2025-10-05 18:00:01,873 - root - INFO - lr: 9.6983e-06 gnorm: 1.15 [19:25:50< 5:05:01] +[titan] 2025-10-05 18:00:12,786 - root - INFO - step: 31710 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 18:00:12,786 - root - INFO - lr: 9.6929e-06 gnorm: 1.19 [19:26:01< 5:04:50] +[titan] 2025-10-05 18:00:23,689 - root - INFO - step: 31715 loss: 2.0357 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7998 +[titan] 2025-10-05 18:00:23,689 - root - INFO - lr: 9.6874e-06 gnorm: 1.20 [19:26:12< 5:04:39] +[titan] 2025-10-05 18:00:34,603 - root - INFO - step: 31720 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:00:34,603 - root - INFO - lr: 9.6820e-06 gnorm: 1.17 [19:26:23< 5:04:28] +[titan] 2025-10-05 18:00:45,500 - root - INFO - step: 31725 loss: 2.0434 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2369 global_avg_mtp_loss: 1.8064 +[titan] 2025-10-05 18:00:45,500 - root - INFO - lr: 9.6765e-06 gnorm: 1.17 [19:26:34< 5:04:17] +[titan] 2025-10-05 18:00:56,416 - root - INFO - step: 31730 loss: 1.8776 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6601 +[titan] 2025-10-05 18:00:56,416 - root - INFO - lr: 9.6711e-06 gnorm: 1.13 [19:26:45< 5:04:05] +[titan] 2025-10-05 18:01:07,298 - root - INFO - step: 31735 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:01:07,298 - root - INFO - lr: 9.6656e-06 gnorm: 1.14 [19:26:56< 5:03:54] +[titan] 2025-10-05 18:01:18,193 - root - INFO - step: 31740 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 18:01:18,193 - root - INFO - lr: 9.6602e-06 gnorm: 1.19 [19:27:07< 5:03:43] +[titan] 2025-10-05 18:01:27,252 - root - INFO - Dumping profiler traces at step 31744 +[titan] 2025-10-05 18:01:27,290 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:01:29,483 - root - INFO - step: 31745 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 29,024 tflops: 402.66 mfu: 40.71% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:01:29,484 - root - INFO - lr: 9.6548e-06 gnorm: 1.14 [19:27:18< 5:03:32] +[titan] 2025-10-05 18:01:38,164 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:01:40,345 - root - INFO - step: 31750 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 18:01:40,345 - root - INFO - lr: 9.6493e-06 gnorm: 1.15 [19:27:29< 5:03:21] +[titan] 2025-10-05 18:01:51,212 - root - INFO - step: 31755 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 18:01:51,212 - root - INFO - lr: 9.6439e-06 gnorm: 1.16 [19:27:40< 5:03:10] +[titan] 2025-10-05 18:02:02,067 - root - INFO - step: 31760 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 18:02:02,067 - root - INFO - lr: 9.6385e-06 gnorm: 1.20 [19:27:50< 5:02:59] +[titan] 2025-10-05 18:02:12,939 - root - INFO - step: 31765 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7256 +[titan] 2025-10-05 18:02:12,939 - root - INFO - lr: 9.6330e-06 gnorm: 1.12 [19:28:01< 5:02:48] +[titan] 2025-10-05 18:02:24,117 - root - INFO - step: 31770 loss: 1.9667 memory: 118.84GiB(85.28%) tps: 29,315 tflops: 406.70 mfu: 41.12% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 18:02:24,118 - root - INFO - lr: 9.6276e-06 gnorm: 1.16 [19:28:13< 5:02:37] +[titan] 2025-10-05 18:02:35,008 - root - INFO - step: 31775 loss: 1.9929 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7616 +[titan] 2025-10-05 18:02:35,008 - root - INFO - lr: 9.6222e-06 gnorm: 1.21 [19:28:23< 5:02:26] +[titan] 2025-10-05 18:02:45,869 - root - INFO - step: 31780 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:02:45,869 - root - INFO - lr: 9.6168e-06 gnorm: 1.16 [19:28:34< 5:02:15] +[titan] 2025-10-05 18:02:56,730 - root - INFO - step: 31785 loss: 1.9747 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:02:56,730 - root - INFO - lr: 9.6114e-06 gnorm: 1.17 [19:28:45< 5:02:04] +[titan] 2025-10-05 18:03:07,583 - root - INFO - step: 31790 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 18:03:07,583 - root - INFO - lr: 9.6059e-06 gnorm: 1.14 [19:28:56< 5:01:53] +[titan] 2025-10-05 18:03:18,426 - root - INFO - step: 31795 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7063 +[titan] 2025-10-05 18:03:18,426 - root - INFO - lr: 9.6005e-06 gnorm: 1.12 [19:29:07< 5:01:42] +[titan] 2025-10-05 18:03:27,165 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:03:29,339 - root - INFO - step: 31800 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 18:03:29,339 - root - INFO - lr: 9.5951e-06 gnorm: 1.13 [19:29:18< 5:01:31] +[titan] 2025-10-05 18:03:40,190 - root - INFO - step: 31805 loss: 1.9797 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:03:40,190 - root - INFO - lr: 9.5897e-06 gnorm: 1.15 [19:29:29< 5:01:20] +[titan] 2025-10-05 18:03:51,090 - root - INFO - step: 31810 loss: 2.0140 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7803 +[titan] 2025-10-05 18:03:51,090 - root - INFO - lr: 9.5843e-06 gnorm: 1.18 [19:29:39< 5:01:08] +[titan] 2025-10-05 18:04:01,948 - root - INFO - step: 31815 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 18:04:01,948 - root - INFO - lr: 9.5789e-06 gnorm: 1.12 [19:29:50< 5:00:57] +[titan] 2025-10-05 18:04:12,805 - root - INFO - step: 31820 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 18:04:12,805 - root - INFO - lr: 9.5735e-06 gnorm: 1.15 [19:30:01< 5:00:46] +[titan] 2025-10-05 18:04:23,715 - root - INFO - step: 31825 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7202 +[titan] 2025-10-05 18:04:23,715 - root - INFO - lr: 9.5681e-06 gnorm: 1.12 [19:30:12< 5:00:35] +[titan] 2025-10-05 18:04:34,585 - root - INFO - step: 31830 loss: 1.9952 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 18:04:34,585 - root - INFO - lr: 9.5628e-06 gnorm: 1.16 [19:30:23< 5:00:24] +[titan] 2025-10-05 18:04:45,454 - root - INFO - step: 31835 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:04:45,454 - root - INFO - lr: 9.5574e-06 gnorm: 1.18 [19:30:34< 5:00:13] +[titan] 2025-10-05 18:04:56,357 - root - INFO - step: 31840 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 18:04:56,357 - root - INFO - lr: 9.5520e-06 gnorm: 1.16 [19:30:45< 5:00:02] +[titan] 2025-10-05 18:05:07,225 - root - INFO - step: 31845 loss: 2.0061 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7734 +[titan] 2025-10-05 18:05:07,226 - root - INFO - lr: 9.5466e-06 gnorm: 1.18 [19:30:56< 4:59:51] +[titan] 2025-10-05 18:05:15,912 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:05:18,103 - root - INFO - step: 31850 loss: 2.0270 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.7885 +[titan] 2025-10-05 18:05:18,103 - root - INFO - lr: 9.5412e-06 gnorm: 1.16 [19:31:07< 4:59:40] +[titan] 2025-10-05 18:05:29,031 - root - INFO - step: 31855 loss: 2.0335 memory: 118.84GiB(85.28%) tps: 29,986 tflops: 416.01 mfu: 42.06% global_avg_ntp_loss: 0.2365 global_avg_mtp_loss: 1.7969 +[titan] 2025-10-05 18:05:29,031 - root - INFO - lr: 9.5359e-06 gnorm: 1.17 [19:31:17< 4:59:29] +[titan] 2025-10-05 18:05:39,898 - root - INFO - step: 31860 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7375 +[titan] 2025-10-05 18:05:39,898 - root - INFO - lr: 9.5305e-06 gnorm: 1.17 [19:31:28< 4:59:18] +[titan] 2025-10-05 18:05:50,764 - root - INFO - step: 31865 loss: 1.9005 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6802 +[titan] 2025-10-05 18:05:50,764 - root - INFO - lr: 9.5251e-06 gnorm: 1.14 [19:31:39< 4:59:07] +[titan] 2025-10-05 18:06:01,663 - root - INFO - step: 31870 loss: 1.9427 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:06:01,664 - root - INFO - lr: 9.5197e-06 gnorm: 1.17 [19:31:50< 4:58:56] +[titan] 2025-10-05 18:06:12,506 - root - INFO - step: 31875 loss: 2.0201 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 18:06:12,507 - root - INFO - lr: 9.5144e-06 gnorm: 1.20 [19:32:01< 4:58:45] +[titan] 2025-10-05 18:06:23,358 - root - INFO - step: 31880 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 18:06:23,358 - root - INFO - lr: 9.5090e-06 gnorm: 1.12 [19:32:12< 4:58:33] +[titan] 2025-10-05 18:06:34,258 - root - INFO - step: 31885 loss: 1.8475 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 18:06:34,259 - root - INFO - lr: 9.5037e-06 gnorm: 1.13 [19:32:23< 4:58:22] +[titan] 2025-10-05 18:06:45,106 - root - INFO - step: 31890 loss: 2.0199 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7850 +[titan] 2025-10-05 18:06:45,106 - root - INFO - lr: 9.4983e-06 gnorm: 1.19 [19:32:34< 4:58:11] +[titan] 2025-10-05 18:06:55,965 - root - INFO - step: 31895 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 18:06:55,965 - root - INFO - lr: 9.4930e-06 gnorm: 1.16 [19:32:44< 4:58:00] +[titan] 2025-10-05 18:07:04,625 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:07:06,799 - root - INFO - step: 31900 loss: 1.9670 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.64 mfu: 42.43% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:07:06,799 - root - INFO - lr: 9.4876e-06 gnorm: 1.18 [19:32:55< 4:57:49] +[titan] 2025-10-05 18:07:17,698 - root - INFO - step: 31905 loss: 1.9749 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.18% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7452 +[titan] 2025-10-05 18:07:17,698 - root - INFO - lr: 9.4823e-06 gnorm: 1.20 [19:33:06< 4:57:38] +[titan] 2025-10-05 18:07:28,596 - root - INFO - step: 31910 loss: 1.9594 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 18:07:28,597 - root - INFO - lr: 9.4769e-06 gnorm: 1.14 [19:33:17< 4:57:27] +[titan] 2025-10-05 18:07:39,465 - root - INFO - step: 31915 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 18:07:39,466 - root - INFO - lr: 9.4716e-06 gnorm: 1.15 [19:33:28< 4:57:16] +[titan] 2025-10-05 18:07:50,320 - root - INFO - step: 31920 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:07:50,320 - root - INFO - lr: 9.4662e-06 gnorm: 1.18 [19:33:39< 4:57:05] +[titan] 2025-10-05 18:08:01,166 - root - INFO - step: 31925 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:08:01,166 - root - INFO - lr: 9.4609e-06 gnorm: 1.19 [19:33:50< 4:56:54] +[titan] 2025-10-05 18:08:12,045 - root - INFO - step: 31930 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 18:08:12,045 - root - INFO - lr: 9.4556e-06 gnorm: 1.11 [19:34:00< 4:56:43] +[titan] 2025-10-05 18:08:22,957 - root - INFO - step: 31935 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7321 +[titan] 2025-10-05 18:08:22,957 - root - INFO - lr: 9.4502e-06 gnorm: 1.14 [19:34:11< 4:56:32] +[titan] 2025-10-05 18:08:33,867 - root - INFO - step: 31940 loss: 2.0355 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2367 global_avg_mtp_loss: 1.7987 +[titan] 2025-10-05 18:08:33,867 - root - INFO - lr: 9.4449e-06 gnorm: 1.15 [19:34:22< 4:56:21] +[titan] 2025-10-05 18:08:44,766 - root - INFO - step: 31945 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7505 +[titan] 2025-10-05 18:08:44,766 - root - INFO - lr: 9.4396e-06 gnorm: 1.16 [19:34:33< 4:56:10] +[titan] 2025-10-05 18:08:53,450 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:08:55,628 - root - INFO - step: 31950 loss: 1.9698 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 18:08:55,628 - root - INFO - lr: 9.4343e-06 gnorm: 1.18 [19:34:44< 4:55:59] +[titan] 2025-10-05 18:09:06,503 - root - INFO - step: 31955 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 18:09:06,503 - root - INFO - lr: 9.4289e-06 gnorm: 1.12 [19:34:55< 4:55:47] +[titan] 2025-10-05 18:09:17,363 - root - INFO - step: 31960 loss: 2.0329 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.7965 +[titan] 2025-10-05 18:09:17,363 - root - INFO - lr: 9.4236e-06 gnorm: 1.18 [19:35:06< 4:55:36] +[titan] 2025-10-05 18:09:28,265 - root - INFO - step: 31965 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 18:09:28,265 - root - INFO - lr: 9.4183e-06 gnorm: 1.18 [19:35:17< 4:55:25] +[titan] 2025-10-05 18:09:39,153 - root - INFO - step: 31970 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 18:09:39,153 - root - INFO - lr: 9.4130e-06 gnorm: 1.15 [19:35:28< 4:55:14] +[titan] 2025-10-05 18:09:50,010 - root - INFO - step: 31975 loss: 1.9842 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 18:09:50,010 - root - INFO - lr: 9.4077e-06 gnorm: 1.18 [19:35:38< 4:55:03] +[titan] 2025-10-05 18:10:00,880 - root - INFO - step: 31980 loss: 1.9569 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 18:10:00,880 - root - INFO - lr: 9.4024e-06 gnorm: 1.14 [19:35:49< 4:54:52] +[titan] 2025-10-05 18:10:11,742 - root - INFO - step: 31985 loss: 1.9260 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 18:10:11,742 - root - INFO - lr: 9.3971e-06 gnorm: 1.13 [19:36:00< 4:54:41] +[titan] 2025-10-05 18:10:22,613 - root - INFO - step: 31990 loss: 1.9830 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 18:10:22,613 - root - INFO - lr: 9.3918e-06 gnorm: 1.16 [19:36:11< 4:54:30] +[titan] 2025-10-05 18:10:33,523 - root - INFO - step: 31995 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 18:10:33,523 - root - INFO - lr: 9.3865e-06 gnorm: 1.14 [19:36:22< 4:54:19] +[titan] 2025-10-05 18:10:42,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:10:44,426 - root - INFO - step: 32000 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 18:10:44,426 - root - INFO - lr: 9.3812e-06 gnorm: 1.14 [19:36:33< 4:54:08] +[titan] 2025-10-05 18:10:55,314 - root - INFO - step: 32005 loss: 2.0358 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2359 global_avg_mtp_loss: 1.7999 +[titan] 2025-10-05 18:10:55,315 - root - INFO - lr: 9.3759e-06 gnorm: 1.17 [19:36:44< 4:53:57] +[titan] 2025-10-05 18:11:06,182 - root - INFO - step: 32010 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:11:06,182 - root - INFO - lr: 9.3706e-06 gnorm: 1.16 [19:36:55< 4:53:46] +[titan] 2025-10-05 18:11:17,044 - root - INFO - step: 32015 loss: 2.0144 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7795 +[titan] 2025-10-05 18:11:17,044 - root - INFO - lr: 9.3653e-06 gnorm: 1.16 [19:37:05< 4:53:35] +[titan] 2025-10-05 18:11:27,919 - root - INFO - step: 32020 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 18:11:27,919 - root - INFO - lr: 9.3601e-06 gnorm: 1.21 [19:37:16< 4:53:24] +[titan] 2025-10-05 18:11:38,842 - root - INFO - step: 32025 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.21 mfu: 42.08% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 18:11:38,842 - root - INFO - lr: 9.3548e-06 gnorm: 1.19 [19:37:27< 4:53:12] +[titan] 2025-10-05 18:11:49,758 - root - INFO - step: 32030 loss: 1.9960 memory: 118.84GiB(85.28%) tps: 30,020 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2316 global_avg_mtp_loss: 1.7644 +[titan] 2025-10-05 18:11:49,758 - root - INFO - lr: 9.3495e-06 gnorm: 1.19 [19:37:38< 4:53:01] +[titan] 2025-10-05 18:12:00,638 - root - INFO - step: 32035 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:12:00,638 - root - INFO - lr: 9.3442e-06 gnorm: 1.14 [19:37:49< 4:52:50] +[titan] 2025-10-05 18:12:11,546 - root - INFO - step: 32040 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:12:11,547 - root - INFO - lr: 9.3390e-06 gnorm: 1.15 [19:38:00< 4:52:39] +[titan] 2025-10-05 18:12:22,450 - root - INFO - step: 32045 loss: 1.8868 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 18:12:22,450 - root - INFO - lr: 9.3337e-06 gnorm: 1.14 [19:38:11< 4:52:28] +[titan] 2025-10-05 18:12:31,215 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:12:33,403 - root - INFO - step: 32050 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 18:12:33,404 - root - INFO - lr: 9.3284e-06 gnorm: 1.15 [19:38:22< 4:52:17] +[titan] 2025-10-05 18:12:44,298 - root - INFO - step: 32055 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 18:12:44,298 - root - INFO - lr: 9.3232e-06 gnorm: 1.15 [19:38:33< 4:52:06] +[titan] 2025-10-05 18:12:55,164 - root - INFO - step: 32060 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 18:12:55,164 - root - INFO - lr: 9.3179e-06 gnorm: 1.16 [19:38:44< 4:51:55] +[titan] 2025-10-05 18:13:06,043 - root - INFO - step: 32065 loss: 1.9980 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7655 +[titan] 2025-10-05 18:13:06,044 - root - INFO - lr: 9.3127e-06 gnorm: 1.15 [19:38:54< 4:51:44] +[titan] 2025-10-05 18:13:16,898 - root - INFO - step: 32070 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 18:13:16,898 - root - INFO - lr: 9.3074e-06 gnorm: 1.18 [19:39:05< 4:51:33] +[titan] 2025-10-05 18:13:27,792 - root - INFO - step: 32075 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 18:13:27,793 - root - INFO - lr: 9.3022e-06 gnorm: 1.19 [19:39:16< 4:51:22] +[titan] 2025-10-05 18:13:38,698 - root - INFO - step: 32080 loss: 1.9499 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 18:13:38,698 - root - INFO - lr: 9.2969e-06 gnorm: 1.19 [19:39:27< 4:51:11] +[titan] 2025-10-05 18:13:49,556 - root - INFO - step: 32085 loss: 1.9954 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7632 +[titan] 2025-10-05 18:13:49,557 - root - INFO - lr: 9.2917e-06 gnorm: 1.17 [19:39:38< 4:51:00] +[titan] 2025-10-05 18:14:00,441 - root - INFO - step: 32090 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7762 +[titan] 2025-10-05 18:14:00,441 - root - INFO - lr: 9.2864e-06 gnorm: 1.16 [19:39:49< 4:50:49] +[titan] 2025-10-05 18:14:11,340 - root - INFO - step: 32095 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 18:14:11,340 - root - INFO - lr: 9.2812e-06 gnorm: 1.14 [19:40:00< 4:50:38] +[titan] 2025-10-05 18:14:20,021 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:14:22,203 - root - INFO - step: 32100 loss: 1.9882 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 18:14:22,203 - root - INFO - lr: 9.2759e-06 gnorm: 1.14 [19:40:11< 4:50:27] +[titan] 2025-10-05 18:14:33,146 - root - INFO - step: 32105 loss: 2.0008 memory: 118.84GiB(85.28%) tps: 29,946 tflops: 415.46 mfu: 42.01% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:14:33,146 - root - INFO - lr: 9.2707e-06 gnorm: 1.18 [19:40:22< 4:50:15] +[titan] 2025-10-05 18:14:44,011 - root - INFO - step: 32110 loss: 1.9522 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:14:44,011 - root - INFO - lr: 9.2655e-06 gnorm: 1.14 [19:40:32< 4:50:04] +[titan] 2025-10-05 18:14:54,863 - root - INFO - step: 32115 loss: 1.9586 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:14:54,864 - root - INFO - lr: 9.2603e-06 gnorm: 1.15 [19:40:43< 4:49:53] +[titan] 2025-10-05 18:15:05,716 - root - INFO - step: 32120 loss: 1.9321 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 18:15:05,716 - root - INFO - lr: 9.2550e-06 gnorm: 1.13 [19:40:54< 4:49:42] +[titan] 2025-10-05 18:15:16,556 - root - INFO - step: 32125 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:15:16,556 - root - INFO - lr: 9.2498e-06 gnorm: 1.15 [19:41:05< 4:49:31] +[titan] 2025-10-05 18:15:27,432 - root - INFO - step: 32130 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 18:15:27,433 - root - INFO - lr: 9.2446e-06 gnorm: 1.20 [19:41:16< 4:49:20] +[titan] 2025-10-05 18:15:38,339 - root - INFO - step: 32135 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7023 +[titan] 2025-10-05 18:15:38,339 - root - INFO - lr: 9.2394e-06 gnorm: 1.17 [19:41:27< 4:49:09] +[titan] 2025-10-05 18:15:49,183 - root - INFO - step: 32140 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7453 +[titan] 2025-10-05 18:15:49,184 - root - INFO - lr: 9.2342e-06 gnorm: 1.19 [19:41:38< 4:48:58] +[titan] 2025-10-05 18:16:00,016 - root - INFO - step: 32145 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:16:00,017 - root - INFO - lr: 9.2290e-06 gnorm: 1.19 [19:41:48< 4:48:47] +[titan] 2025-10-05 18:16:08,668 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:16:10,859 - root - INFO - step: 32150 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7520 +[titan] 2025-10-05 18:16:10,859 - root - INFO - lr: 9.2237e-06 gnorm: 1.17 [19:41:59< 4:48:36] +[titan] 2025-10-05 18:16:21,712 - root - INFO - step: 32155 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 18:16:21,712 - root - INFO - lr: 9.2185e-06 gnorm: 1.14 [19:42:10< 4:48:25] +[titan] 2025-10-05 18:16:32,617 - root - INFO - step: 32160 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7361 +[titan] 2025-10-05 18:16:32,618 - root - INFO - lr: 9.2133e-06 gnorm: 1.19 [19:42:21< 4:48:14] +[titan] 2025-10-05 18:16:43,525 - root - INFO - step: 32165 loss: 1.9893 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 18:16:43,525 - root - INFO - lr: 9.2081e-06 gnorm: 1.20 [19:42:32< 4:48:03] +[titan] 2025-10-05 18:16:54,419 - root - INFO - step: 32170 loss: 1.9751 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 18:16:54,419 - root - INFO - lr: 9.2029e-06 gnorm: 1.15 [19:42:43< 4:47:52] +[titan] 2025-10-05 18:17:05,289 - root - INFO - step: 32175 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 18:17:05,289 - root - INFO - lr: 9.1978e-06 gnorm: 1.15 [19:42:54< 4:47:41] +[titan] 2025-10-05 18:17:16,163 - root - INFO - step: 32180 loss: 1.9628 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7358 +[titan] 2025-10-05 18:17:16,164 - root - INFO - lr: 9.1926e-06 gnorm: 1.17 [19:43:05< 4:47:29] +[titan] 2025-10-05 18:17:27,035 - root - INFO - step: 32185 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7293 +[titan] 2025-10-05 18:17:27,035 - root - INFO - lr: 9.1874e-06 gnorm: 1.11 [19:43:15< 4:47:18] +[titan] 2025-10-05 18:17:37,986 - root - INFO - step: 32190 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 29,922 tflops: 415.12 mfu: 41.97% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 18:17:37,987 - root - INFO - lr: 9.1822e-06 gnorm: 1.20 [19:43:26< 4:47:07] +[titan] 2025-10-05 18:17:48,863 - root - INFO - step: 32195 loss: 2.0029 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 18:17:48,863 - root - INFO - lr: 9.1770e-06 gnorm: 1.14 [19:43:37< 4:46:56] +[titan] 2025-10-05 18:17:57,568 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:17:59,752 - root - INFO - step: 32200 loss: 1.9933 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7621 +[titan] 2025-10-05 18:17:59,752 - root - INFO - lr: 9.1718e-06 gnorm: 1.14 [19:43:48< 4:46:45] +[titan] 2025-10-05 18:18:10,633 - root - INFO - step: 32205 loss: 2.0530 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2394 global_avg_mtp_loss: 1.8136 +[titan] 2025-10-05 18:18:10,633 - root - INFO - lr: 9.1667e-06 gnorm: 1.16 [19:43:59< 4:46:34] +[titan] 2025-10-05 18:18:21,504 - root - INFO - step: 32210 loss: 1.9549 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 18:18:21,505 - root - INFO - lr: 9.1615e-06 gnorm: 1.14 [19:44:10< 4:46:23] +[titan] 2025-10-05 18:18:32,363 - root - INFO - step: 32215 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7288 +[titan] 2025-10-05 18:18:32,363 - root - INFO - lr: 9.1563e-06 gnorm: 1.15 [19:44:21< 4:46:12] +[titan] 2025-10-05 18:18:43,287 - root - INFO - step: 32220 loss: 2.0259 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.16 mfu: 42.08% global_avg_ntp_loss: 0.2349 global_avg_mtp_loss: 1.7909 +[titan] 2025-10-05 18:18:43,287 - root - INFO - lr: 9.1512e-06 gnorm: 1.16 [19:44:32< 4:46:01] +[titan] 2025-10-05 18:18:54,183 - root - INFO - step: 32225 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 18:18:54,183 - root - INFO - lr: 9.1460e-06 gnorm: 1.14 [19:44:43< 4:45:50] +[titan] 2025-10-05 18:19:05,065 - root - INFO - step: 32230 loss: 2.0030 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:19:05,066 - root - INFO - lr: 9.1408e-06 gnorm: 1.13 [19:44:53< 4:45:39] +[titan] 2025-10-05 18:19:15,931 - root - INFO - step: 32235 loss: 1.9942 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:19:15,932 - root - INFO - lr: 9.1357e-06 gnorm: 1.19 [19:45:04< 4:45:28] +[titan] 2025-10-05 18:19:26,783 - root - INFO - step: 32240 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 18:19:26,783 - root - INFO - lr: 9.1305e-06 gnorm: 1.16 [19:45:15< 4:45:17] +[titan] 2025-10-05 18:19:37,703 - root - INFO - step: 32245 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:19:37,703 - root - INFO - lr: 9.1254e-06 gnorm: 1.16 [19:45:26< 4:45:06] +[titan] 2025-10-05 18:19:46,377 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:19:48,556 - root - INFO - step: 32250 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 18:19:48,556 - root - INFO - lr: 9.1202e-06 gnorm: 1.15 [19:45:37< 4:44:55] +[titan] 2025-10-05 18:19:59,512 - root - INFO - step: 32255 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 29,910 tflops: 414.95 mfu: 41.96% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7460 +[titan] 2025-10-05 18:19:59,512 - root - INFO - lr: 9.1151e-06 gnorm: 1.19 [19:45:48< 4:44:43] +[titan] 2025-10-05 18:20:01,865 - root - INFO - Dumping profiler traces at step 32256 +[titan] 2025-10-05 18:20:01,903 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:20:10,612 - root - INFO - step: 32260 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,522 tflops: 409.57 mfu: 41.41% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 18:20:10,612 - root - INFO - lr: 9.1099e-06 gnorm: 1.15 [19:45:59< 4:44:32] +[titan] 2025-10-05 18:20:21,505 - root - INFO - step: 32265 loss: 1.9661 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 18:20:21,505 - root - INFO - lr: 9.1048e-06 gnorm: 1.18 [19:46:10< 4:44:21] +[titan] 2025-10-05 18:20:32,390 - root - INFO - step: 32270 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 18:20:32,390 - root - INFO - lr: 9.0996e-06 gnorm: 1.19 [19:46:21< 4:44:10] +[titan] 2025-10-05 18:20:43,343 - root - INFO - step: 32275 loss: 1.9611 memory: 118.84GiB(85.28%) tps: 29,918 tflops: 415.06 mfu: 41.97% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 18:20:43,343 - root - INFO - lr: 9.0945e-06 gnorm: 1.17 [19:46:32< 4:43:59] +[titan] 2025-10-05 18:20:54,195 - root - INFO - step: 32280 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 18:20:54,195 - root - INFO - lr: 9.0894e-06 gnorm: 1.13 [19:46:43< 4:43:48] +[titan] 2025-10-05 18:21:05,056 - root - INFO - step: 32285 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:21:05,056 - root - INFO - lr: 9.0842e-06 gnorm: 1.14 [19:46:53< 4:43:37] +[titan] 2025-10-05 18:21:15,905 - root - INFO - step: 32290 loss: 1.9864 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7560 +[titan] 2025-10-05 18:21:15,905 - root - INFO - lr: 9.0791e-06 gnorm: 1.14 [19:47:04< 4:43:26] +[titan] 2025-10-05 18:21:26,822 - root - INFO - step: 32295 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.44 mfu: 42.11% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:21:26,822 - root - INFO - lr: 9.0740e-06 gnorm: 1.15 [19:47:15< 4:43:15] +[titan] 2025-10-05 18:21:35,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:21:37,747 - root - INFO - step: 32300 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:21:37,747 - root - INFO - lr: 9.0689e-06 gnorm: 1.15 [19:47:26< 4:43:04] +[titan] 2025-10-05 18:21:48,651 - root - INFO - step: 32305 loss: 1.9420 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 18:21:48,651 - root - INFO - lr: 9.0638e-06 gnorm: 1.13 [19:47:37< 4:42:53] +[titan] 2025-10-05 18:21:59,526 - root - INFO - step: 32310 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7716 +[titan] 2025-10-05 18:21:59,526 - root - INFO - lr: 9.0586e-06 gnorm: 1.20 [19:47:48< 4:42:42] +[titan] 2025-10-05 18:22:10,410 - root - INFO - step: 32315 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 18:22:10,410 - root - INFO - lr: 9.0535e-06 gnorm: 1.16 [19:47:59< 4:42:31] +[titan] 2025-10-05 18:22:21,310 - root - INFO - step: 32320 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6910 +[titan] 2025-10-05 18:22:21,310 - root - INFO - lr: 9.0484e-06 gnorm: 1.16 [19:48:10< 4:42:20] +[titan] 2025-10-05 18:22:32,228 - root - INFO - step: 32325 loss: 1.9625 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 18:22:32,229 - root - INFO - lr: 9.0433e-06 gnorm: 1.17 [19:48:21< 4:42:09] +[titan] 2025-10-05 18:22:43,163 - root - INFO - step: 32330 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 18:22:43,164 - root - INFO - lr: 9.0382e-06 gnorm: 1.16 [19:48:32< 4:41:58] +[titan] 2025-10-05 18:22:54,059 - root - INFO - step: 32335 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:22:54,059 - root - INFO - lr: 9.0331e-06 gnorm: 1.18 [19:48:42< 4:41:47] +[titan] 2025-10-05 18:23:04,937 - root - INFO - step: 32340 loss: 2.0224 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7891 +[titan] 2025-10-05 18:23:04,938 - root - INFO - lr: 9.0280e-06 gnorm: 1.22 [19:48:53< 4:41:36] +[titan] 2025-10-05 18:23:15,809 - root - INFO - step: 32345 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:23:15,809 - root - INFO - lr: 9.0229e-06 gnorm: 1.18 [19:49:04< 4:41:24] +[titan] 2025-10-05 18:23:24,511 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:23:26,693 - root - INFO - step: 32350 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 18:23:26,693 - root - INFO - lr: 9.0178e-06 gnorm: 1.24 [19:49:15< 4:41:13] +[titan] 2025-10-05 18:23:37,572 - root - INFO - step: 32355 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:23:37,573 - root - INFO - lr: 9.0127e-06 gnorm: 1.15 [19:49:26< 4:41:02] +[titan] 2025-10-05 18:23:48,530 - root - INFO - step: 32360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 29,906 tflops: 414.90 mfu: 41.95% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 18:23:48,530 - root - INFO - lr: 9.0077e-06 gnorm: 1.15 [19:49:37< 4:40:51] +[titan] 2025-10-05 18:23:59,408 - root - INFO - step: 32365 loss: 2.0215 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7874 +[titan] 2025-10-05 18:23:59,408 - root - INFO - lr: 9.0026e-06 gnorm: 1.20 [19:49:48< 4:40:40] +[titan] 2025-10-05 18:24:10,292 - root - INFO - step: 32370 loss: 1.9796 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 18:24:10,292 - root - INFO - lr: 8.9975e-06 gnorm: 1.14 [19:49:59< 4:40:29] +[titan] 2025-10-05 18:24:21,173 - root - INFO - step: 32375 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:24:21,173 - root - INFO - lr: 8.9924e-06 gnorm: 1.18 [19:50:10< 4:40:18] +[titan] 2025-10-05 18:24:32,033 - root - INFO - step: 32380 loss: 1.9846 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7551 +[titan] 2025-10-05 18:24:32,033 - root - INFO - lr: 8.9873e-06 gnorm: 1.17 [19:50:20< 4:40:07] +[titan] 2025-10-05 18:24:42,932 - root - INFO - step: 32385 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 18:24:42,932 - root - INFO - lr: 8.9823e-06 gnorm: 1.17 [19:50:31< 4:39:56] +[titan] 2025-10-05 18:24:53,801 - root - INFO - step: 32390 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 18:24:53,801 - root - INFO - lr: 8.9772e-06 gnorm: 1.16 [19:50:42< 4:39:45] +[titan] 2025-10-05 18:25:04,696 - root - INFO - step: 32395 loss: 1.9873 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 18:25:04,696 - root - INFO - lr: 8.9721e-06 gnorm: 1.18 [19:50:53< 4:39:34] +[titan] 2025-10-05 18:25:13,384 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:25:15,566 - root - INFO - step: 32400 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:25:15,566 - root - INFO - lr: 8.9671e-06 gnorm: 1.16 [19:51:04< 4:39:23] +[titan] 2025-10-05 18:25:26,448 - root - INFO - step: 32405 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 18:25:26,449 - root - INFO - lr: 8.9620e-06 gnorm: 1.13 [19:51:15< 4:39:12] +[titan] 2025-10-05 18:25:37,323 - root - INFO - step: 32410 loss: 1.9454 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 18:25:37,323 - root - INFO - lr: 8.9570e-06 gnorm: 1.15 [19:51:26< 4:39:01] +[titan] 2025-10-05 18:25:48,505 - root - INFO - step: 32415 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 29,304 tflops: 406.54 mfu: 41.11% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 18:25:48,505 - root - INFO - lr: 8.9519e-06 gnorm: 1.17 [19:51:37< 4:38:50] +[titan] 2025-10-05 18:25:59,396 - root - INFO - step: 32420 loss: 1.9740 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 18:25:59,396 - root - INFO - lr: 8.9469e-06 gnorm: 1.17 [19:51:48< 4:38:39] +[titan] 2025-10-05 18:26:10,310 - root - INFO - step: 32425 loss: 1.9852 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.54 mfu: 42.12% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 18:26:10,310 - root - INFO - lr: 8.9418e-06 gnorm: 1.15 [19:51:59< 4:38:28] +[titan] 2025-10-05 18:26:21,195 - root - INFO - step: 32430 loss: 1.9222 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 18:26:21,195 - root - INFO - lr: 8.9368e-06 gnorm: 1.14 [19:52:10< 4:38:16] +[titan] 2025-10-05 18:26:32,089 - root - INFO - step: 32435 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 18:26:32,089 - root - INFO - lr: 8.9317e-06 gnorm: 1.17 [19:52:20< 4:38:05] +[titan] 2025-10-05 18:26:42,997 - root - INFO - step: 32440 loss: 2.0001 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 18:26:42,997 - root - INFO - lr: 8.9267e-06 gnorm: 1.18 [19:52:31< 4:37:54] +[titan] 2025-10-05 18:26:53,888 - root - INFO - step: 32445 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 18:26:53,888 - root - INFO - lr: 8.9217e-06 gnorm: 1.18 [19:52:42< 4:37:43] +[titan] 2025-10-05 18:27:02,624 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:27:04,807 - root - INFO - step: 32450 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 18:27:04,807 - root - INFO - lr: 8.9166e-06 gnorm: 1.18 [19:52:53< 4:37:32] +[titan] 2025-10-05 18:27:15,706 - root - INFO - step: 32455 loss: 1.9901 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7589 +[titan] 2025-10-05 18:27:15,706 - root - INFO - lr: 8.9116e-06 gnorm: 1.15 [19:53:04< 4:37:21] +[titan] 2025-10-05 18:27:26,608 - root - INFO - step: 32460 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 18:27:26,608 - root - INFO - lr: 8.9066e-06 gnorm: 1.14 [19:53:15< 4:37:10] +[titan] 2025-10-05 18:27:37,484 - root - INFO - step: 32465 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 18:27:37,485 - root - INFO - lr: 8.9015e-06 gnorm: 1.11 [19:53:26< 4:36:59] +[titan] 2025-10-05 18:27:48,368 - root - INFO - step: 32470 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:27:48,368 - root - INFO - lr: 8.8965e-06 gnorm: 1.13 [19:53:37< 4:36:48] +[titan] 2025-10-05 18:27:59,231 - root - INFO - step: 32475 loss: 1.9379 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 18:27:59,231 - root - INFO - lr: 8.8915e-06 gnorm: 1.15 [19:53:48< 4:36:37] +[titan] 2025-10-05 18:28:10,108 - root - INFO - step: 32480 loss: 2.0102 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7775 +[titan] 2025-10-05 18:28:10,108 - root - INFO - lr: 8.8865e-06 gnorm: 1.18 [19:53:58< 4:36:26] +[titan] 2025-10-05 18:28:20,988 - root - INFO - step: 32485 loss: 1.9823 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 18:28:20,988 - root - INFO - lr: 8.8815e-06 gnorm: 1.15 [19:54:09< 4:36:15] +[titan] 2025-10-05 18:28:31,851 - root - INFO - step: 32490 loss: 1.9732 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7448 +[titan] 2025-10-05 18:28:31,851 - root - INFO - lr: 8.8765e-06 gnorm: 1.16 [19:54:20< 4:36:04] +[titan] 2025-10-05 18:28:42,715 - root - INFO - step: 32495 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 18:28:42,716 - root - INFO - lr: 8.8715e-06 gnorm: 1.16 [19:54:31< 4:35:53] +[titan] 2025-10-05 18:28:51,431 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:28:53,617 - root - INFO - step: 32500 loss: 1.9959 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7651 +[titan] 2025-10-05 18:28:53,617 - root - INFO - lr: 8.8665e-06 gnorm: 1.21 [19:54:42< 4:35:42] +[titan] 2025-10-05 18:29:04,488 - root - INFO - step: 32505 loss: 1.9052 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6852 +[titan] 2025-10-05 18:29:04,489 - root - INFO - lr: 8.8615e-06 gnorm: 1.15 [19:54:53< 4:35:31] +[titan] 2025-10-05 18:29:15,355 - root - INFO - step: 32510 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7765 +[titan] 2025-10-05 18:29:15,355 - root - INFO - lr: 8.8565e-06 gnorm: 1.20 [19:55:04< 4:35:19] +[titan] 2025-10-05 18:29:26,197 - root - INFO - step: 32515 loss: 1.9015 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 18:29:26,198 - root - INFO - lr: 8.8515e-06 gnorm: 1.16 [19:55:15< 4:35:08] +[titan] 2025-10-05 18:29:37,043 - root - INFO - step: 32520 loss: 1.9322 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 18:29:37,043 - root - INFO - lr: 8.8465e-06 gnorm: 1.14 [19:55:25< 4:34:57] +[titan] 2025-10-05 18:29:47,915 - root - INFO - step: 32525 loss: 2.0083 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 18:29:47,915 - root - INFO - lr: 8.8415e-06 gnorm: 1.17 [19:55:36< 4:34:46] +[titan] 2025-10-05 18:29:58,786 - root - INFO - step: 32530 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:29:58,786 - root - INFO - lr: 8.8365e-06 gnorm: 1.14 [19:55:47< 4:34:35] +[titan] 2025-10-05 18:30:09,635 - root - INFO - step: 32535 loss: 1.9367 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 18:30:09,635 - root - INFO - lr: 8.8315e-06 gnorm: 1.18 [19:55:58< 4:34:24] +[titan] 2025-10-05 18:30:20,517 - root - INFO - step: 32540 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:30:20,517 - root - INFO - lr: 8.8265e-06 gnorm: 1.22 [19:56:09< 4:34:13] +[titan] 2025-10-05 18:30:31,388 - root - INFO - step: 32545 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 18:30:31,388 - root - INFO - lr: 8.8216e-06 gnorm: 1.18 [19:56:20< 4:34:02] +[titan] 2025-10-05 18:30:40,098 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:30:42,279 - root - INFO - step: 32550 loss: 2.0452 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.21% global_avg_ntp_loss: 0.2372 global_avg_mtp_loss: 1.8080 +[titan] 2025-10-05 18:30:42,279 - root - INFO - lr: 8.8166e-06 gnorm: 1.22 [19:56:31< 4:33:51] +[titan] 2025-10-05 18:30:53,167 - root - INFO - step: 32555 loss: 2.0156 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7816 +[titan] 2025-10-05 18:30:53,168 - root - INFO - lr: 8.8116e-06 gnorm: 1.16 [19:56:42< 4:33:40] +[titan] 2025-10-05 18:31:04,037 - root - INFO - step: 32560 loss: 1.9750 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7457 +[titan] 2025-10-05 18:31:04,037 - root - INFO - lr: 8.8066e-06 gnorm: 1.15 [19:56:52< 4:33:29] +[titan] 2025-10-05 18:31:14,905 - root - INFO - step: 32565 loss: 2.0104 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 18:31:14,905 - root - INFO - lr: 8.8017e-06 gnorm: 1.18 [19:57:03< 4:33:18] +[titan] 2025-10-05 18:31:25,750 - root - INFO - step: 32570 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 18:31:25,750 - root - INFO - lr: 8.7967e-06 gnorm: 1.18 [19:57:14< 4:33:07] +[titan] 2025-10-05 18:31:36,615 - root - INFO - step: 32575 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 18:31:36,615 - root - INFO - lr: 8.7917e-06 gnorm: 1.21 [19:57:25< 4:32:56] +[titan] 2025-10-05 18:31:47,505 - root - INFO - step: 32580 loss: 2.0547 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2382 global_avg_mtp_loss: 1.8165 +[titan] 2025-10-05 18:31:47,505 - root - INFO - lr: 8.7868e-06 gnorm: 1.18 [19:57:36< 4:32:45] +[titan] 2025-10-05 18:31:58,405 - root - INFO - step: 32585 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.07 mfu: 42.17% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 18:31:58,406 - root - INFO - lr: 8.7818e-06 gnorm: 1.15 [19:57:47< 4:32:34] +[titan] 2025-10-05 18:32:09,289 - root - INFO - step: 32590 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 18:32:09,289 - root - INFO - lr: 8.7769e-06 gnorm: 1.17 [19:57:58< 4:32:22] +[titan] 2025-10-05 18:32:20,143 - root - INFO - step: 32595 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 18:32:20,143 - root - INFO - lr: 8.7719e-06 gnorm: 1.15 [19:58:08< 4:32:11] +[titan] 2025-10-05 18:32:28,834 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:32:31,015 - root - INFO - step: 32600 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:32:31,015 - root - INFO - lr: 8.7670e-06 gnorm: 1.15 [19:58:19< 4:32:00] +[titan] 2025-10-05 18:32:41,866 - root - INFO - step: 32605 loss: 1.9357 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 18:32:41,866 - root - INFO - lr: 8.7621e-06 gnorm: 1.17 [19:58:30< 4:31:49] +[titan] 2025-10-05 18:32:52,744 - root - INFO - step: 32610 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7391 +[titan] 2025-10-05 18:32:52,744 - root - INFO - lr: 8.7571e-06 gnorm: 1.17 [19:58:41< 4:31:38] +[titan] 2025-10-05 18:33:03,626 - root - INFO - step: 32615 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.79 mfu: 42.24% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:33:03,626 - root - INFO - lr: 8.7522e-06 gnorm: 1.16 [19:58:52< 4:31:27] +[titan] 2025-10-05 18:33:14,510 - root - INFO - step: 32620 loss: 2.0190 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7843 +[titan] 2025-10-05 18:33:14,510 - root - INFO - lr: 8.7472e-06 gnorm: 1.19 [19:59:03< 4:31:16] +[titan] 2025-10-05 18:33:25,381 - root - INFO - step: 32625 loss: 1.9774 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7486 +[titan] 2025-10-05 18:33:25,381 - root - INFO - lr: 8.7423e-06 gnorm: 1.16 [19:59:14< 4:31:05] +[titan] 2025-10-05 18:33:36,243 - root - INFO - step: 32630 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 18:33:36,243 - root - INFO - lr: 8.7374e-06 gnorm: 1.17 [19:59:25< 4:30:54] +[titan] 2025-10-05 18:33:47,116 - root - INFO - step: 32635 loss: 1.9936 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7634 +[titan] 2025-10-05 18:33:47,116 - root - INFO - lr: 8.7325e-06 gnorm: 1.15 [19:59:35< 4:30:43] +[titan] 2025-10-05 18:33:58,057 - root - INFO - step: 32640 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 29,950 tflops: 415.51 mfu: 42.01% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7509 +[titan] 2025-10-05 18:33:58,058 - root - INFO - lr: 8.7275e-06 gnorm: 1.20 [19:59:46< 4:30:32] +[titan] 2025-10-05 18:34:08,946 - root - INFO - step: 32645 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:34:08,946 - root - INFO - lr: 8.7226e-06 gnorm: 1.17 [19:59:57< 4:30:21] +[titan] 2025-10-05 18:34:17,634 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:34:19,814 - root - INFO - step: 32650 loss: 2.0019 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7704 +[titan] 2025-10-05 18:34:19,814 - root - INFO - lr: 8.7177e-06 gnorm: 1.14 [20:00:08< 4:30:10] +[titan] 2025-10-05 18:34:30,684 - root - INFO - step: 32655 loss: 2.0211 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2338 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 18:34:30,685 - root - INFO - lr: 8.7128e-06 gnorm: 1.21 [20:00:19< 4:29:59] +[titan] 2025-10-05 18:34:41,540 - root - INFO - step: 32660 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 18:34:41,540 - root - INFO - lr: 8.7079e-06 gnorm: 1.18 [20:00:30< 4:29:48] +[titan] 2025-10-05 18:34:52,419 - root - INFO - step: 32665 loss: 1.9116 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 18:34:52,419 - root - INFO - lr: 8.7030e-06 gnorm: 1.19 [20:00:41< 4:29:37] +[titan] 2025-10-05 18:35:03,284 - root - INFO - step: 32670 loss: 1.9841 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 18:35:03,284 - root - INFO - lr: 8.6981e-06 gnorm: 1.23 [20:00:52< 4:29:25] +[titan] 2025-10-05 18:35:14,164 - root - INFO - step: 32675 loss: 1.9923 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 18:35:14,164 - root - INFO - lr: 8.6932e-06 gnorm: 1.14 [20:01:02< 4:29:14] +[titan] 2025-10-05 18:35:25,030 - root - INFO - step: 32680 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 18:35:25,030 - root - INFO - lr: 8.6883e-06 gnorm: 1.17 [20:01:13< 4:29:03] +[titan] 2025-10-05 18:35:35,901 - root - INFO - step: 32685 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 18:35:35,902 - root - INFO - lr: 8.6834e-06 gnorm: 1.19 [20:01:24< 4:28:52] +[titan] 2025-10-05 18:35:46,769 - root - INFO - step: 32690 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7393 +[titan] 2025-10-05 18:35:46,769 - root - INFO - lr: 8.6785e-06 gnorm: 1.17 [20:01:35< 4:28:41] +[titan] 2025-10-05 18:35:57,644 - root - INFO - step: 32695 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 18:35:57,644 - root - INFO - lr: 8.6736e-06 gnorm: 1.18 [20:01:46< 4:28:30] +[titan] 2025-10-05 18:36:06,324 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:36:08,504 - root - INFO - step: 32700 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 18:36:08,504 - root - INFO - lr: 8.6687e-06 gnorm: 1.16 [20:01:57< 4:28:19] +[titan] 2025-10-05 18:36:19,368 - root - INFO - step: 32705 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6516 +[titan] 2025-10-05 18:36:19,368 - root - INFO - lr: 8.6638e-06 gnorm: 1.12 [20:02:08< 4:28:08] +[titan] 2025-10-05 18:36:30,228 - root - INFO - step: 32710 loss: 1.9004 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 18:36:30,228 - root - INFO - lr: 8.6590e-06 gnorm: 1.17 [20:02:19< 4:27:57] +[titan] 2025-10-05 18:36:41,098 - root - INFO - step: 32715 loss: 1.9595 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7326 +[titan] 2025-10-05 18:36:41,098 - root - INFO - lr: 8.6541e-06 gnorm: 1.21 [20:02:29< 4:27:46] +[titan] 2025-10-05 18:36:51,987 - root - INFO - step: 32720 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 18:36:51,987 - root - INFO - lr: 8.6492e-06 gnorm: 1.15 [20:02:40< 4:27:35] +[titan] 2025-10-05 18:37:02,853 - root - INFO - step: 32725 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7097 +[titan] 2025-10-05 18:37:02,853 - root - INFO - lr: 8.6443e-06 gnorm: 1.18 [20:02:51< 4:27:24] +[titan] 2025-10-05 18:37:13,725 - root - INFO - step: 32730 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 18:37:13,725 - root - INFO - lr: 8.6395e-06 gnorm: 1.18 [20:03:02< 4:27:13] +[titan] 2025-10-05 18:37:24,602 - root - INFO - step: 32735 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 18:37:24,603 - root - INFO - lr: 8.6346e-06 gnorm: 1.18 [20:03:13< 4:27:02] +[titan] 2025-10-05 18:37:35,486 - root - INFO - step: 32740 loss: 1.9828 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 18:37:35,486 - root - INFO - lr: 8.6297e-06 gnorm: 1.15 [20:03:24< 4:26:51] +[titan] 2025-10-05 18:37:46,374 - root - INFO - step: 32745 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 18:37:46,374 - root - INFO - lr: 8.6249e-06 gnorm: 1.18 [20:03:35< 4:26:40] +[titan] 2025-10-05 18:37:55,081 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:37:57,268 - root - INFO - step: 32750 loss: 1.9951 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.19% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7646 +[titan] 2025-10-05 18:37:57,269 - root - INFO - lr: 8.6200e-06 gnorm: 1.16 [20:03:46< 4:26:28] +[titan] 2025-10-05 18:38:08,165 - root - INFO - step: 32755 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7456 +[titan] 2025-10-05 18:38:08,165 - root - INFO - lr: 8.6152e-06 gnorm: 1.15 [20:03:56< 4:26:17] +[titan] 2025-10-05 18:38:19,048 - root - INFO - step: 32760 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 18:38:19,048 - root - INFO - lr: 8.6103e-06 gnorm: 1.17 [20:04:07< 4:26:06] +[titan] 2025-10-05 18:38:30,021 - root - INFO - step: 32765 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 29,864 tflops: 414.31 mfu: 41.89% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 18:38:30,021 - root - INFO - lr: 8.6055e-06 gnorm: 1.19 [20:04:18< 4:25:55] +[titan] 2025-10-05 18:38:36,735 - root - INFO - Dumping profiler traces at step 32768 +[titan] 2025-10-05 18:38:36,775 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:38:41,140 - root - INFO - step: 32770 loss: 1.9997 memory: 118.84GiB(85.28%) tps: 29,470 tflops: 408.86 mfu: 41.34% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7670 +[titan] 2025-10-05 18:38:41,140 - root - INFO - lr: 8.6006e-06 gnorm: 1.18 [20:04:29< 4:25:44] +[titan] 2025-10-05 18:38:52,028 - root - INFO - step: 32775 loss: 1.8866 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 18:38:52,028 - root - INFO - lr: 8.5958e-06 gnorm: 1.16 [20:04:40< 4:25:33] +[titan] 2025-10-05 18:39:02,921 - root - INFO - step: 32780 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 18:39:02,921 - root - INFO - lr: 8.5909e-06 gnorm: 1.17 [20:04:51< 4:25:22] +[titan] 2025-10-05 18:39:13,797 - root - INFO - step: 32785 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 18:39:13,797 - root - INFO - lr: 8.5861e-06 gnorm: 1.19 [20:05:02< 4:25:11] +[titan] 2025-10-05 18:39:24,687 - root - INFO - step: 32790 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 18:39:24,688 - root - INFO - lr: 8.5813e-06 gnorm: 1.18 [20:05:13< 4:25:00] +[titan] 2025-10-05 18:39:35,548 - root - INFO - step: 32795 loss: 1.9151 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 18:39:35,548 - root - INFO - lr: 8.5764e-06 gnorm: 1.17 [20:05:24< 4:24:49] +[titan] 2025-10-05 18:39:44,235 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:39:46,421 - root - INFO - step: 32800 loss: 1.9530 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 18:39:46,421 - root - INFO - lr: 8.5716e-06 gnorm: 1.14 [20:05:35< 4:24:38] +[titan] 2025-10-05 18:39:57,301 - root - INFO - step: 32805 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 18:39:57,301 - root - INFO - lr: 8.5668e-06 gnorm: 1.20 [20:05:46< 4:24:27] +[titan] 2025-10-05 18:40:08,174 - root - INFO - step: 32810 loss: 1.8700 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6536 +[titan] 2025-10-05 18:40:08,175 - root - INFO - lr: 8.5620e-06 gnorm: 1.15 [20:05:56< 4:24:16] +[titan] 2025-10-05 18:40:19,054 - root - INFO - step: 32815 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 18:40:19,054 - root - INFO - lr: 8.5572e-06 gnorm: 1.17 [20:06:07< 4:24:05] +[titan] 2025-10-05 18:40:29,922 - root - INFO - step: 32820 loss: 1.9673 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 18:40:29,922 - root - INFO - lr: 8.5523e-06 gnorm: 1.20 [20:06:18< 4:23:54] +[titan] 2025-10-05 18:40:40,814 - root - INFO - step: 32825 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7226 +[titan] 2025-10-05 18:40:40,814 - root - INFO - lr: 8.5475e-06 gnorm: 1.14 [20:06:29< 4:23:43] +[titan] 2025-10-05 18:40:51,678 - root - INFO - step: 32830 loss: 1.9398 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 18:40:51,678 - root - INFO - lr: 8.5427e-06 gnorm: 1.23 [20:06:40< 4:23:32] +[titan] 2025-10-05 18:41:02,570 - root - INFO - step: 32835 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 18:41:02,570 - root - INFO - lr: 8.5379e-06 gnorm: 1.15 [20:06:51< 4:23:21] +[titan] 2025-10-05 18:41:13,446 - root - INFO - step: 32840 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7474 +[titan] 2025-10-05 18:41:13,447 - root - INFO - lr: 8.5331e-06 gnorm: 1.21 [20:07:02< 4:23:09] +[titan] 2025-10-05 18:41:24,359 - root - INFO - step: 32845 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:41:24,359 - root - INFO - lr: 8.5283e-06 gnorm: 1.16 [20:07:13< 4:22:58] +[titan] 2025-10-05 18:41:33,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:41:35,240 - root - INFO - step: 32850 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 18:41:35,240 - root - INFO - lr: 8.5235e-06 gnorm: 1.14 [20:07:24< 4:22:47] +[titan] 2025-10-05 18:41:46,124 - root - INFO - step: 32855 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 18:41:46,125 - root - INFO - lr: 8.5187e-06 gnorm: 1.17 [20:07:34< 4:22:36] +[titan] 2025-10-05 18:41:56,993 - root - INFO - step: 32860 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:41:56,993 - root - INFO - lr: 8.5139e-06 gnorm: 1.22 [20:07:45< 4:22:25] +[titan] 2025-10-05 18:42:07,859 - root - INFO - step: 32865 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 18:42:07,859 - root - INFO - lr: 8.5091e-06 gnorm: 1.20 [20:07:56< 4:22:14] +[titan] 2025-10-05 18:42:18,752 - root - INFO - step: 32870 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 18:42:18,752 - root - INFO - lr: 8.5044e-06 gnorm: 1.13 [20:08:07< 4:22:03] +[titan] 2025-10-05 18:42:29,644 - root - INFO - step: 32875 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 18:42:29,644 - root - INFO - lr: 8.4996e-06 gnorm: 1.19 [20:08:18< 4:21:52] +[titan] 2025-10-05 18:42:40,538 - root - INFO - step: 32880 loss: 1.9506 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 18:42:40,539 - root - INFO - lr: 8.4948e-06 gnorm: 1.15 [20:08:29< 4:21:41] +[titan] 2025-10-05 18:42:51,405 - root - INFO - step: 32885 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.36 mfu: 42.30% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:42:51,405 - root - INFO - lr: 8.4900e-06 gnorm: 1.14 [20:08:40< 4:21:30] +[titan] 2025-10-05 18:43:02,281 - root - INFO - step: 32890 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 18:43:02,281 - root - INFO - lr: 8.4853e-06 gnorm: 1.17 [20:08:51< 4:21:19] +[titan] 2025-10-05 18:43:13,144 - root - INFO - step: 32895 loss: 1.9608 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7334 +[titan] 2025-10-05 18:43:13,144 - root - INFO - lr: 8.4805e-06 gnorm: 1.22 [20:09:01< 4:21:08] +[titan] 2025-10-05 18:43:21,842 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:43:24,029 - root - INFO - step: 32900 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7094 +[titan] 2025-10-05 18:43:24,030 - root - INFO - lr: 8.4757e-06 gnorm: 1.16 [20:09:12< 4:20:57] +[titan] 2025-10-05 18:43:34,912 - root - INFO - step: 32905 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 18:43:34,912 - root - INFO - lr: 8.4710e-06 gnorm: 1.22 [20:09:23< 4:20:46] +[titan] 2025-10-05 18:43:45,784 - root - INFO - step: 32910 loss: 1.9113 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6908 +[titan] 2025-10-05 18:43:45,784 - root - INFO - lr: 8.4662e-06 gnorm: 1.19 [20:09:34< 4:20:35] +[titan] 2025-10-05 18:43:56,656 - root - INFO - step: 32915 loss: 1.9080 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6877 +[titan] 2025-10-05 18:43:56,657 - root - INFO - lr: 8.4614e-06 gnorm: 1.15 [20:09:45< 4:20:24] +[titan] 2025-10-05 18:44:07,519 - root - INFO - step: 32920 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 18:44:07,520 - root - INFO - lr: 8.4567e-06 gnorm: 1.14 [20:09:56< 4:20:13] +[titan] 2025-10-05 18:44:18,364 - root - INFO - step: 32925 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 18:44:18,364 - root - INFO - lr: 8.4519e-06 gnorm: 1.16 [20:10:07< 4:20:01] +[titan] 2025-10-05 18:44:29,222 - root - INFO - step: 32930 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:44:29,222 - root - INFO - lr: 8.4472e-06 gnorm: 1.19 [20:10:18< 4:19:50] +[titan] 2025-10-05 18:44:40,084 - root - INFO - step: 32935 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 18:44:40,084 - root - INFO - lr: 8.4424e-06 gnorm: 1.20 [20:10:28< 4:19:39] +[titan] 2025-10-05 18:44:50,961 - root - INFO - step: 32940 loss: 2.0407 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2364 global_avg_mtp_loss: 1.8043 +[titan] 2025-10-05 18:44:50,961 - root - INFO - lr: 8.4377e-06 gnorm: 1.21 [20:10:39< 4:19:28] +[titan] 2025-10-05 18:45:01,827 - root - INFO - step: 32945 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 18:45:01,827 - root - INFO - lr: 8.4330e-06 gnorm: 1.18 [20:10:50< 4:19:17] +[titan] 2025-10-05 18:45:10,494 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:45:12,694 - root - INFO - step: 32950 loss: 1.9061 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 18:45:12,695 - root - INFO - lr: 8.4282e-06 gnorm: 1.17 [20:11:01< 4:19:06] +[titan] 2025-10-05 18:45:23,553 - root - INFO - step: 32955 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 18:45:23,553 - root - INFO - lr: 8.4235e-06 gnorm: 1.19 [20:11:12< 4:18:55] +[titan] 2025-10-05 18:45:34,381 - root - INFO - step: 32960 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,262 tflops: 419.84 mfu: 42.45% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 18:45:34,381 - root - INFO - lr: 8.4187e-06 gnorm: 1.16 [20:11:23< 4:18:44] +[titan] 2025-10-05 18:45:45,262 - root - INFO - step: 32965 loss: 2.0361 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.8005 +[titan] 2025-10-05 18:45:45,262 - root - INFO - lr: 8.4140e-06 gnorm: 1.21 [20:11:34< 4:18:33] +[titan] 2025-10-05 18:45:56,104 - root - INFO - step: 32970 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 18:45:56,105 - root - INFO - lr: 8.4093e-06 gnorm: 1.16 [20:11:44< 4:18:22] +[titan] 2025-10-05 18:46:06,947 - root - INFO - step: 32975 loss: 2.0320 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2362 global_avg_mtp_loss: 1.7958 +[titan] 2025-10-05 18:46:06,948 - root - INFO - lr: 8.4046e-06 gnorm: 1.24 [20:11:55< 4:18:11] +[titan] 2025-10-05 18:46:17,797 - root - INFO - step: 32980 loss: 1.9700 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7415 +[titan] 2025-10-05 18:46:17,797 - root - INFO - lr: 8.3999e-06 gnorm: 1.16 [20:12:06< 4:18:00] +[titan] 2025-10-05 18:46:28,662 - root - INFO - step: 32985 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 18:46:28,662 - root - INFO - lr: 8.3951e-06 gnorm: 1.18 [20:12:17< 4:17:49] +[titan] 2025-10-05 18:46:39,537 - root - INFO - step: 32990 loss: 2.0395 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8028 +[titan] 2025-10-05 18:46:39,537 - root - INFO - lr: 8.3904e-06 gnorm: 1.22 [20:12:28< 4:17:38] +[titan] 2025-10-05 18:46:50,425 - root - INFO - step: 32995 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 18:46:50,425 - root - INFO - lr: 8.3857e-06 gnorm: 1.15 [20:12:39< 4:17:27] +[titan] 2025-10-05 18:46:59,142 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:47:01,344 - root - INFO - step: 33000 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.34 mfu: 42.10% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 18:47:01,344 - root - INFO - lr: 8.3810e-06 gnorm: 1.17 [20:12:50< 4:17:16] +[titan] 2025-10-05 18:47:12,220 - root - INFO - step: 33005 loss: 1.9810 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7515 +[titan] 2025-10-05 18:47:12,220 - root - INFO - lr: 8.3763e-06 gnorm: 1.17 [20:13:01< 4:17:05] +[titan] 2025-10-05 18:47:23,105 - root - INFO - step: 33010 loss: 2.0062 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2340 global_avg_mtp_loss: 1.7722 +[titan] 2025-10-05 18:47:23,105 - root - INFO - lr: 8.3716e-06 gnorm: 1.16 [20:13:11< 4:16:53] +[titan] 2025-10-05 18:47:33,991 - root - INFO - step: 33015 loss: 1.9630 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 18:47:33,991 - root - INFO - lr: 8.3669e-06 gnorm: 1.21 [20:13:22< 4:16:42] +[titan] 2025-10-05 18:47:44,854 - root - INFO - step: 33020 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 18:47:44,854 - root - INFO - lr: 8.3622e-06 gnorm: 1.20 [20:13:33< 4:16:31] +[titan] 2025-10-05 18:47:55,728 - root - INFO - step: 33025 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 18:47:55,728 - root - INFO - lr: 8.3575e-06 gnorm: 1.16 [20:13:44< 4:16:20] +[titan] 2025-10-05 18:48:06,621 - root - INFO - step: 33030 loss: 2.0033 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7705 +[titan] 2025-10-05 18:48:06,622 - root - INFO - lr: 8.3528e-06 gnorm: 1.15 [20:13:55< 4:16:09] +[titan] 2025-10-05 18:48:17,519 - root - INFO - step: 33035 loss: 2.0726 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2395 global_avg_mtp_loss: 1.8332 +[titan] 2025-10-05 18:48:17,519 - root - INFO - lr: 8.3481e-06 gnorm: 1.22 [20:14:06< 4:15:58] +[titan] 2025-10-05 18:48:28,405 - root - INFO - step: 33040 loss: 1.9946 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7642 +[titan] 2025-10-05 18:48:28,405 - root - INFO - lr: 8.3435e-06 gnorm: 1.19 [20:14:17< 4:15:47] +[titan] 2025-10-05 18:48:39,282 - root - INFO - step: 33045 loss: 1.9543 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 18:48:39,282 - root - INFO - lr: 8.3388e-06 gnorm: 1.16 [20:14:28< 4:15:36] +[titan] 2025-10-05 18:48:47,979 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:48:50,167 - root - INFO - step: 33050 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7315 +[titan] 2025-10-05 18:48:50,167 - root - INFO - lr: 8.3341e-06 gnorm: 1.22 [20:14:38< 4:15:25] +[titan] 2025-10-05 18:49:01,041 - root - INFO - step: 33055 loss: 2.0134 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7799 +[titan] 2025-10-05 18:49:01,041 - root - INFO - lr: 8.3294e-06 gnorm: 1.21 [20:14:49< 4:15:14] +[titan] 2025-10-05 18:49:11,966 - root - INFO - step: 33060 loss: 1.9156 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.07% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6929 +[titan] 2025-10-05 18:49:11,967 - root - INFO - lr: 8.3248e-06 gnorm: 1.15 [20:15:00< 4:15:03] +[titan] 2025-10-05 18:49:22,851 - root - INFO - step: 33065 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7314 +[titan] 2025-10-05 18:49:22,851 - root - INFO - lr: 8.3201e-06 gnorm: 1.22 [20:15:11< 4:14:52] +[titan] 2025-10-05 18:49:33,699 - root - INFO - step: 33070 loss: 1.9488 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7229 +[titan] 2025-10-05 18:49:33,699 - root - INFO - lr: 8.3154e-06 gnorm: 1.17 [20:15:22< 4:14:41] +[titan] 2025-10-05 18:49:44,562 - root - INFO - step: 33075 loss: 2.0173 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7837 +[titan] 2025-10-05 18:49:44,562 - root - INFO - lr: 8.3108e-06 gnorm: 1.18 [20:15:33< 4:14:30] +[titan] 2025-10-05 18:49:55,434 - root - INFO - step: 33080 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6975 +[titan] 2025-10-05 18:49:55,434 - root - INFO - lr: 8.3061e-06 gnorm: 1.15 [20:15:44< 4:14:19] +[titan] 2025-10-05 18:50:06,341 - root - INFO - step: 33085 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 18:50:06,341 - root - INFO - lr: 8.3015e-06 gnorm: 1.19 [20:15:55< 4:14:08] +[titan] 2025-10-05 18:50:17,204 - root - INFO - step: 33090 loss: 1.9583 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7313 +[titan] 2025-10-05 18:50:17,204 - root - INFO - lr: 8.2968e-06 gnorm: 1.17 [20:16:05< 4:13:57] +[titan] 2025-10-05 18:50:28,085 - root - INFO - step: 33095 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7508 +[titan] 2025-10-05 18:50:28,085 - root - INFO - lr: 8.2922e-06 gnorm: 1.19 [20:16:16< 4:13:46] +[titan] 2025-10-05 18:50:36,770 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:50:38,970 - root - INFO - step: 33100 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 18:50:38,970 - root - INFO - lr: 8.2875e-06 gnorm: 1.16 [20:16:27< 4:13:34] +[titan] 2025-10-05 18:50:49,853 - root - INFO - step: 33105 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:50:49,853 - root - INFO - lr: 8.2829e-06 gnorm: 1.18 [20:16:38< 4:13:23] +[titan] 2025-10-05 18:51:00,737 - root - INFO - step: 33110 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 18:51:00,737 - root - INFO - lr: 8.2782e-06 gnorm: 1.20 [20:16:49< 4:13:12] +[titan] 2025-10-05 18:51:11,650 - root - INFO - step: 33115 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.59 mfu: 42.12% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7236 +[titan] 2025-10-05 18:51:11,650 - root - INFO - lr: 8.2736e-06 gnorm: 1.18 [20:17:00< 4:13:01] +[titan] 2025-10-05 18:51:22,517 - root - INFO - step: 33120 loss: 1.9328 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 18:51:22,517 - root - INFO - lr: 8.2690e-06 gnorm: 1.15 [20:17:11< 4:12:50] +[titan] 2025-10-05 18:51:33,423 - root - INFO - step: 33125 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.87 mfu: 42.15% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7534 +[titan] 2025-10-05 18:51:33,423 - root - INFO - lr: 8.2643e-06 gnorm: 1.16 [20:17:22< 4:12:39] +[titan] 2025-10-05 18:51:44,314 - root - INFO - step: 33130 loss: 1.9891 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 18:51:44,314 - root - INFO - lr: 8.2597e-06 gnorm: 1.16 [20:17:33< 4:12:28] +[titan] 2025-10-05 18:51:55,207 - root - INFO - step: 33135 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7294 +[titan] 2025-10-05 18:51:55,207 - root - INFO - lr: 8.2551e-06 gnorm: 1.19 [20:17:43< 4:12:17] +[titan] 2025-10-05 18:52:06,124 - root - INFO - step: 33140 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 18:52:06,124 - root - INFO - lr: 8.2504e-06 gnorm: 1.15 [20:17:54< 4:12:06] +[titan] 2025-10-05 18:52:17,015 - root - INFO - step: 33145 loss: 1.8716 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6545 +[titan] 2025-10-05 18:52:17,015 - root - INFO - lr: 8.2458e-06 gnorm: 1.17 [20:18:05< 4:11:55] +[titan] 2025-10-05 18:52:25,707 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:52:27,896 - root - INFO - step: 33150 loss: 2.0582 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2393 global_avg_mtp_loss: 1.8189 +[titan] 2025-10-05 18:52:27,896 - root - INFO - lr: 8.2412e-06 gnorm: 1.28 [20:18:16< 4:11:44] +[titan] 2025-10-05 18:52:38,755 - root - INFO - step: 33155 loss: 1.9340 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 18:52:38,755 - root - INFO - lr: 8.2366e-06 gnorm: 1.17 [20:18:27< 4:11:33] +[titan] 2025-10-05 18:52:49,651 - root - INFO - step: 33160 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6592 +[titan] 2025-10-05 18:52:49,651 - root - INFO - lr: 8.2320e-06 gnorm: 1.14 [20:18:38< 4:11:22] +[titan] 2025-10-05 18:53:00,524 - root - INFO - step: 33165 loss: 1.9294 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7065 +[titan] 2025-10-05 18:53:00,525 - root - INFO - lr: 8.2274e-06 gnorm: 1.16 [20:18:49< 4:11:11] +[titan] 2025-10-05 18:53:11,434 - root - INFO - step: 33170 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 18:53:11,435 - root - INFO - lr: 8.2228e-06 gnorm: 1.19 [20:19:00< 4:11:00] +[titan] 2025-10-05 18:53:22,306 - root - INFO - step: 33175 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7342 +[titan] 2025-10-05 18:53:22,306 - root - INFO - lr: 8.2182e-06 gnorm: 1.21 [20:19:11< 4:10:49] +[titan] 2025-10-05 18:53:33,152 - root - INFO - step: 33180 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 18:53:33,152 - root - INFO - lr: 8.2136e-06 gnorm: 1.19 [20:19:21< 4:10:38] +[titan] 2025-10-05 18:53:44,004 - root - INFO - step: 33185 loss: 1.9935 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7627 +[titan] 2025-10-05 18:53:44,004 - root - INFO - lr: 8.2090e-06 gnorm: 1.16 [20:19:32< 4:10:27] +[titan] 2025-10-05 18:53:54,872 - root - INFO - step: 33190 loss: 1.9519 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 18:53:54,872 - root - INFO - lr: 8.2044e-06 gnorm: 1.21 [20:19:43< 4:10:15] +[titan] 2025-10-05 18:54:05,750 - root - INFO - step: 33195 loss: 2.0158 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7827 +[titan] 2025-10-05 18:54:05,750 - root - INFO - lr: 8.1998e-06 gnorm: 1.18 [20:19:54< 4:10:04] +[titan] 2025-10-05 18:54:14,509 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:54:16,691 - root - INFO - step: 33200 loss: 2.0100 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7777 +[titan] 2025-10-05 18:54:16,691 - root - INFO - lr: 8.1952e-06 gnorm: 1.21 [20:20:05< 4:09:53] +[titan] 2025-10-05 18:54:27,562 - root - INFO - step: 33205 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7204 +[titan] 2025-10-05 18:54:27,563 - root - INFO - lr: 8.1906e-06 gnorm: 1.18 [20:20:16< 4:09:42] +[titan] 2025-10-05 18:54:38,424 - root - INFO - step: 33210 loss: 1.9533 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 18:54:38,424 - root - INFO - lr: 8.1861e-06 gnorm: 1.19 [20:20:27< 4:09:31] +[titan] 2025-10-05 18:54:49,288 - root - INFO - step: 33215 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 18:54:49,289 - root - INFO - lr: 8.1815e-06 gnorm: 1.22 [20:20:38< 4:09:20] +[titan] 2025-10-05 18:55:00,163 - root - INFO - step: 33220 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 18:55:00,163 - root - INFO - lr: 8.1769e-06 gnorm: 1.21 [20:20:48< 4:09:09] +[titan] 2025-10-05 18:55:11,094 - root - INFO - step: 33225 loss: 2.0202 memory: 118.84GiB(85.28%) tps: 29,978 tflops: 415.90 mfu: 42.05% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7865 +[titan] 2025-10-05 18:55:11,094 - root - INFO - lr: 8.1723e-06 gnorm: 1.22 [20:20:59< 4:08:58] +[titan] 2025-10-05 18:55:21,957 - root - INFO - step: 33230 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 18:55:21,957 - root - INFO - lr: 8.1678e-06 gnorm: 1.18 [20:21:10< 4:08:47] +[titan] 2025-10-05 18:55:32,818 - root - INFO - step: 33235 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 18:55:32,818 - root - INFO - lr: 8.1632e-06 gnorm: 1.16 [20:21:21< 4:08:36] +[titan] 2025-10-05 18:55:43,665 - root - INFO - step: 33240 loss: 2.0182 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.11 mfu: 42.38% global_avg_ntp_loss: 0.2343 global_avg_mtp_loss: 1.7839 +[titan] 2025-10-05 18:55:43,665 - root - INFO - lr: 8.1586e-06 gnorm: 1.18 [20:21:32< 4:08:25] +[titan] 2025-10-05 18:55:54,510 - root - INFO - step: 33245 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 18:55:54,510 - root - INFO - lr: 8.1541e-06 gnorm: 1.19 [20:21:43< 4:08:14] +[titan] 2025-10-05 18:56:03,189 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:56:05,369 - root - INFO - step: 33250 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 18:56:05,369 - root - INFO - lr: 8.1495e-06 gnorm: 1.22 [20:21:54< 4:08:03] +[titan] 2025-10-05 18:56:16,302 - root - INFO - step: 33255 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 18:56:16,302 - root - INFO - lr: 8.1450e-06 gnorm: 1.21 [20:22:05< 4:07:52] +[titan] 2025-10-05 18:56:27,156 - root - INFO - step: 33260 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 18:56:27,156 - root - INFO - lr: 8.1404e-06 gnorm: 1.17 [20:22:15< 4:07:41] +[titan] 2025-10-05 18:56:37,991 - root - INFO - step: 33265 loss: 2.0079 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7756 +[titan] 2025-10-05 18:56:37,991 - root - INFO - lr: 8.1359e-06 gnorm: 1.18 [20:22:26< 4:07:30] +[titan] 2025-10-05 18:56:48,867 - root - INFO - step: 33270 loss: 1.9447 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 18:56:48,867 - root - INFO - lr: 8.1313e-06 gnorm: 1.14 [20:22:37< 4:07:19] +[titan] 2025-10-05 18:56:59,716 - root - INFO - step: 33275 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 18:56:59,716 - root - INFO - lr: 8.1268e-06 gnorm: 1.16 [20:22:48< 4:07:08] +[titan] 2025-10-05 18:57:10,748 - root - INFO - step: 33280 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 29,703 tflops: 412.08 mfu: 41.67% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7150 +[titan] 2025-10-05 18:57:10,749 - root - INFO - lr: 8.1223e-06 gnorm: 1.20 [20:22:59< 4:06:57] +[titan] 2025-10-05 18:57:10,951 - root - INFO - Dumping profiler traces at step 33280 +[titan] 2025-10-05 18:57:10,994 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 18:57:21,865 - root - INFO - step: 33285 loss: 1.9789 memory: 118.84GiB(85.28%) tps: 29,477 tflops: 408.95 mfu: 41.35% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 18:57:21,865 - root - INFO - lr: 8.1177e-06 gnorm: 1.18 [20:23:10< 4:06:46] +[titan] 2025-10-05 18:57:32,746 - root - INFO - step: 33290 loss: 1.9692 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 18:57:32,747 - root - INFO - lr: 8.1132e-06 gnorm: 1.19 [20:23:21< 4:06:34] +[titan] 2025-10-05 18:57:43,611 - root - INFO - step: 33295 loss: 2.0126 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7787 +[titan] 2025-10-05 18:57:43,611 - root - INFO - lr: 8.1087e-06 gnorm: 1.19 [20:23:32< 4:06:23] +[titan] 2025-10-05 18:57:52,270 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:57:54,457 - root - INFO - step: 33300 loss: 2.0080 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7759 +[titan] 2025-10-05 18:57:54,457 - root - INFO - lr: 8.1041e-06 gnorm: 1.22 [20:23:43< 4:06:12] +[titan] 2025-10-05 18:58:05,339 - root - INFO - step: 33305 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7178 +[titan] 2025-10-05 18:58:05,339 - root - INFO - lr: 8.0996e-06 gnorm: 1.21 [20:23:54< 4:06:01] +[titan] 2025-10-05 18:58:16,262 - root - INFO - step: 33310 loss: 1.9183 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 18:58:16,263 - root - INFO - lr: 8.0951e-06 gnorm: 1.24 [20:24:05< 4:05:50] +[titan] 2025-10-05 18:58:27,116 - root - INFO - step: 33315 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 18:58:27,116 - root - INFO - lr: 8.0906e-06 gnorm: 1.20 [20:24:15< 4:05:39] +[titan] 2025-10-05 18:58:38,011 - root - INFO - step: 33320 loss: 1.9814 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 18:58:38,011 - root - INFO - lr: 8.0861e-06 gnorm: 1.18 [20:24:26< 4:05:28] +[titan] 2025-10-05 18:58:48,874 - root - INFO - step: 33325 loss: 1.9166 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 18:58:48,874 - root - INFO - lr: 8.0816e-06 gnorm: 1.17 [20:24:37< 4:05:17] +[titan] 2025-10-05 18:58:59,752 - root - INFO - step: 33330 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 18:58:59,752 - root - INFO - lr: 8.0771e-06 gnorm: 1.18 [20:24:48< 4:05:06] +[titan] 2025-10-05 18:59:10,641 - root - INFO - step: 33335 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 18:59:10,641 - root - INFO - lr: 8.0725e-06 gnorm: 1.20 [20:24:59< 4:04:55] +[titan] 2025-10-05 18:59:21,564 - root - INFO - step: 33340 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 30,000 tflops: 416.20 mfu: 42.08% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 18:59:21,564 - root - INFO - lr: 8.0680e-06 gnorm: 1.15 [20:25:10< 4:04:44] +[titan] 2025-10-05 18:59:32,450 - root - INFO - step: 33345 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 18:59:32,450 - root - INFO - lr: 8.0636e-06 gnorm: 1.18 [20:25:21< 4:04:33] +[titan] 2025-10-05 18:59:41,166 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 18:59:43,356 - root - INFO - step: 33350 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 18:59:43,356 - root - INFO - lr: 8.0591e-06 gnorm: 1.16 [20:25:32< 4:04:22] +[titan] 2025-10-05 18:59:54,223 - root - INFO - step: 33355 loss: 1.9358 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7114 +[titan] 2025-10-05 18:59:54,223 - root - INFO - lr: 8.0546e-06 gnorm: 1.18 [20:25:42< 4:04:11] +[titan] 2025-10-05 19:00:05,102 - root - INFO - step: 33360 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 19:00:05,102 - root - INFO - lr: 8.0501e-06 gnorm: 1.17 [20:25:53< 4:04:00] +[titan] 2025-10-05 19:00:16,037 - root - INFO - step: 33365 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 29,967 tflops: 415.75 mfu: 42.04% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:00:16,037 - root - INFO - lr: 8.0456e-06 gnorm: 1.20 [20:26:04< 4:03:49] +[titan] 2025-10-05 19:00:26,915 - root - INFO - step: 33370 loss: 1.9660 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:00:26,916 - root - INFO - lr: 8.0411e-06 gnorm: 1.20 [20:26:15< 4:03:38] +[titan] 2025-10-05 19:00:37,762 - root - INFO - step: 33375 loss: 1.9706 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:00:37,763 - root - INFO - lr: 8.0366e-06 gnorm: 1.21 [20:26:26< 4:03:27] +[titan] 2025-10-05 19:00:48,614 - root - INFO - step: 33380 loss: 1.9232 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:00:48,614 - root - INFO - lr: 8.0322e-06 gnorm: 1.18 [20:26:37< 4:03:16] +[titan] 2025-10-05 19:00:59,483 - root - INFO - step: 33385 loss: 1.9453 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:00:59,483 - root - INFO - lr: 8.0277e-06 gnorm: 1.19 [20:26:48< 4:03:04] +[titan] 2025-10-05 19:01:10,340 - root - INFO - step: 33390 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 19:01:10,340 - root - INFO - lr: 8.0232e-06 gnorm: 1.19 [20:26:59< 4:02:53] +[titan] 2025-10-05 19:01:21,250 - root - INFO - step: 33395 loss: 1.9470 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 19:01:21,250 - root - INFO - lr: 8.0187e-06 gnorm: 1.17 [20:27:10< 4:02:42] +[titan] 2025-10-05 19:01:29,920 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:01:32,102 - root - INFO - step: 33400 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 19:01:32,102 - root - INFO - lr: 8.0143e-06 gnorm: 1.17 [20:27:20< 4:02:31] +[titan] 2025-10-05 19:01:42,959 - root - INFO - step: 33405 loss: 1.8686 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 19:01:42,960 - root - INFO - lr: 8.0098e-06 gnorm: 1.18 [20:27:31< 4:02:20] +[titan] 2025-10-05 19:01:53,819 - root - INFO - step: 33410 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 19:01:53,819 - root - INFO - lr: 8.0054e-06 gnorm: 1.19 [20:27:42< 4:02:09] +[titan] 2025-10-05 19:02:04,734 - root - INFO - step: 33415 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7409 +[titan] 2025-10-05 19:02:04,734 - root - INFO - lr: 8.0009e-06 gnorm: 1.18 [20:27:53< 4:01:58] +[titan] 2025-10-05 19:02:15,660 - root - INFO - step: 33420 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 29,991 tflops: 416.08 mfu: 42.07% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 19:02:15,661 - root - INFO - lr: 7.9965e-06 gnorm: 1.20 [20:28:04< 4:01:47] +[titan] 2025-10-05 19:02:26,561 - root - INFO - step: 33425 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7432 +[titan] 2025-10-05 19:02:26,561 - root - INFO - lr: 7.9920e-06 gnorm: 1.15 [20:28:15< 4:01:36] +[titan] 2025-10-05 19:02:37,445 - root - INFO - step: 33430 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 19:02:37,445 - root - INFO - lr: 7.9876e-06 gnorm: 1.18 [20:28:26< 4:01:25] +[titan] 2025-10-05 19:02:48,327 - root - INFO - step: 33435 loss: 1.9181 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:02:48,327 - root - INFO - lr: 7.9831e-06 gnorm: 1.16 [20:28:37< 4:01:14] +[titan] 2025-10-05 19:02:59,208 - root - INFO - step: 33440 loss: 1.9304 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7068 +[titan] 2025-10-05 19:02:59,209 - root - INFO - lr: 7.9787e-06 gnorm: 1.20 [20:28:47< 4:01:03] +[titan] 2025-10-05 19:03:10,117 - root - INFO - step: 33445 loss: 2.0526 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2377 global_avg_mtp_loss: 1.8150 +[titan] 2025-10-05 19:03:10,117 - root - INFO - lr: 7.9742e-06 gnorm: 1.21 [20:28:58< 4:00:52] +[titan] 2025-10-05 19:03:18,858 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:03:21,042 - root - INFO - step: 33450 loss: 1.9353 memory: 118.84GiB(85.28%) tps: 29,994 tflops: 416.12 mfu: 42.08% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:03:21,042 - root - INFO - lr: 7.9698e-06 gnorm: 1.16 [20:29:09< 4:00:41] +[titan] 2025-10-05 19:03:31,901 - root - INFO - step: 33455 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:03:31,901 - root - INFO - lr: 7.9654e-06 gnorm: 1.18 [20:29:20< 4:00:30] +[titan] 2025-10-05 19:03:42,767 - root - INFO - step: 33460 loss: 1.9820 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:03:42,767 - root - INFO - lr: 7.9610e-06 gnorm: 1.19 [20:29:31< 4:00:19] +[titan] 2025-10-05 19:03:53,626 - root - INFO - step: 33465 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:03:53,626 - root - INFO - lr: 7.9565e-06 gnorm: 1.20 [20:29:42< 4:00:08] +[titan] 2025-10-05 19:04:04,499 - root - INFO - step: 33470 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:04:04,499 - root - INFO - lr: 7.9521e-06 gnorm: 1.22 [20:29:53< 3:59:57] +[titan] 2025-10-05 19:04:15,390 - root - INFO - step: 33475 loss: 1.9236 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:04:15,390 - root - INFO - lr: 7.9477e-06 gnorm: 1.15 [20:30:04< 3:59:46] +[titan] 2025-10-05 19:04:26,338 - root - INFO - step: 33480 loss: 1.9298 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 19:04:26,338 - root - INFO - lr: 7.9433e-06 gnorm: 1.18 [20:30:15< 3:59:34] +[titan] 2025-10-05 19:04:37,222 - root - INFO - step: 33485 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 19:04:37,222 - root - INFO - lr: 7.9389e-06 gnorm: 1.12 [20:30:25< 3:59:23] +[titan] 2025-10-05 19:04:48,095 - root - INFO - step: 33490 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 19:04:48,095 - root - INFO - lr: 7.9345e-06 gnorm: 1.17 [20:30:36< 3:59:12] +[titan] 2025-10-05 19:04:58,969 - root - INFO - step: 33495 loss: 2.0094 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7772 +[titan] 2025-10-05 19:04:58,969 - root - INFO - lr: 7.9301e-06 gnorm: 1.19 [20:30:47< 3:59:01] +[titan] 2025-10-05 19:05:07,650 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:05:09,844 - root - INFO - step: 33500 loss: 1.9728 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 19:05:09,844 - root - INFO - lr: 7.9256e-06 gnorm: 1.16 [20:30:58< 3:58:50] +[titan] 2025-10-05 19:05:20,803 - root - INFO - step: 33505 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 29,902 tflops: 414.84 mfu: 41.95% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7103 +[titan] 2025-10-05 19:05:20,803 - root - INFO - lr: 7.9212e-06 gnorm: 1.17 [20:31:09< 3:58:39] +[titan] 2025-10-05 19:05:31,705 - root - INFO - step: 33510 loss: 2.0064 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 19:05:31,706 - root - INFO - lr: 7.9169e-06 gnorm: 1.18 [20:31:20< 3:58:28] +[titan] 2025-10-05 19:05:42,585 - root - INFO - step: 33515 loss: 2.0191 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7849 +[titan] 2025-10-05 19:05:42,585 - root - INFO - lr: 7.9125e-06 gnorm: 1.16 [20:31:31< 3:58:17] +[titan] 2025-10-05 19:05:53,459 - root - INFO - step: 33520 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7445 +[titan] 2025-10-05 19:05:53,459 - root - INFO - lr: 7.9081e-06 gnorm: 1.20 [20:31:42< 3:58:06] +[titan] 2025-10-05 19:06:04,332 - root - INFO - step: 33525 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6778 +[titan] 2025-10-05 19:06:04,332 - root - INFO - lr: 7.9037e-06 gnorm: 1.12 [20:31:53< 3:57:55] +[titan] 2025-10-05 19:06:15,198 - root - INFO - step: 33530 loss: 1.9813 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7514 +[titan] 2025-10-05 19:06:15,198 - root - INFO - lr: 7.8993e-06 gnorm: 1.17 [20:32:03< 3:57:44] +[titan] 2025-10-05 19:06:26,152 - root - INFO - step: 33535 loss: 1.9859 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 19:06:26,152 - root - INFO - lr: 7.8949e-06 gnorm: 1.24 [20:32:14< 3:57:33] +[titan] 2025-10-05 19:06:37,024 - root - INFO - step: 33540 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7744 +[titan] 2025-10-05 19:06:37,024 - root - INFO - lr: 7.8905e-06 gnorm: 1.22 [20:32:25< 3:57:22] +[titan] 2025-10-05 19:06:47,931 - root - INFO - step: 33545 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7297 +[titan] 2025-10-05 19:06:47,931 - root - INFO - lr: 7.8862e-06 gnorm: 1.24 [20:32:36< 3:57:11] +[titan] 2025-10-05 19:06:56,619 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:06:58,805 - root - INFO - step: 33550 loss: 1.9223 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 19:06:58,805 - root - INFO - lr: 7.8818e-06 gnorm: 1.17 [20:32:47< 3:57:00] +[titan] 2025-10-05 19:07:09,652 - root - INFO - step: 33555 loss: 1.9140 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:07:09,653 - root - INFO - lr: 7.8774e-06 gnorm: 1.21 [20:32:58< 3:56:49] +[titan] 2025-10-05 19:07:20,562 - root - INFO - step: 33560 loss: 1.9473 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:07:20,563 - root - INFO - lr: 7.8731e-06 gnorm: 1.23 [20:33:09< 3:56:38] +[titan] 2025-10-05 19:07:31,425 - root - INFO - step: 33565 loss: 1.8946 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 19:07:31,425 - root - INFO - lr: 7.8687e-06 gnorm: 1.19 [20:33:20< 3:56:27] +[titan] 2025-10-05 19:07:42,303 - root - INFO - step: 33570 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:07:42,303 - root - INFO - lr: 7.8643e-06 gnorm: 1.20 [20:33:31< 3:56:16] +[titan] 2025-10-05 19:07:53,210 - root - INFO - step: 33575 loss: 1.9262 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:07:53,211 - root - INFO - lr: 7.8600e-06 gnorm: 1.18 [20:33:41< 3:56:05] +[titan] 2025-10-05 19:08:04,072 - root - INFO - step: 33580 loss: 2.0042 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7723 +[titan] 2025-10-05 19:08:04,072 - root - INFO - lr: 7.8556e-06 gnorm: 1.18 [20:33:52< 3:55:53] +[titan] 2025-10-05 19:08:14,947 - root - INFO - step: 33585 loss: 1.8953 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 19:08:14,947 - root - INFO - lr: 7.8513e-06 gnorm: 1.14 [20:34:03< 3:55:42] +[titan] 2025-10-05 19:08:25,883 - root - INFO - step: 33590 loss: 1.9998 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7683 +[titan] 2025-10-05 19:08:25,883 - root - INFO - lr: 7.8469e-06 gnorm: 1.19 [20:34:14< 3:55:31] +[titan] 2025-10-05 19:08:36,748 - root - INFO - step: 33595 loss: 1.8788 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6618 +[titan] 2025-10-05 19:08:36,748 - root - INFO - lr: 7.8426e-06 gnorm: 1.17 [20:34:25< 3:55:20] +[titan] 2025-10-05 19:08:45,430 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:08:47,610 - root - INFO - step: 33600 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7176 +[titan] 2025-10-05 19:08:47,610 - root - INFO - lr: 7.8382e-06 gnorm: 1.20 [20:34:36< 3:55:09] +[titan] 2025-10-05 19:08:58,491 - root - INFO - step: 33605 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7542 +[titan] 2025-10-05 19:08:58,491 - root - INFO - lr: 7.8339e-06 gnorm: 1.19 [20:34:47< 3:54:58] +[titan] 2025-10-05 19:09:09,347 - root - INFO - step: 33610 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 19:09:09,347 - root - INFO - lr: 7.8296e-06 gnorm: 1.17 [20:34:58< 3:54:47] +[titan] 2025-10-05 19:09:20,217 - root - INFO - step: 33615 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7498 +[titan] 2025-10-05 19:09:20,217 - root - INFO - lr: 7.8252e-06 gnorm: 1.18 [20:35:08< 3:54:36] +[titan] 2025-10-05 19:09:31,144 - root - INFO - step: 33620 loss: 1.9273 memory: 118.84GiB(85.28%) tps: 29,990 tflops: 416.06 mfu: 42.07% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 19:09:31,144 - root - INFO - lr: 7.8209e-06 gnorm: 1.16 [20:35:19< 3:54:25] +[titan] 2025-10-05 19:09:41,985 - root - INFO - step: 33625 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6870 +[titan] 2025-10-05 19:09:41,985 - root - INFO - lr: 7.8166e-06 gnorm: 1.18 [20:35:30< 3:54:14] +[titan] 2025-10-05 19:09:52,855 - root - INFO - step: 33630 loss: 1.9919 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7601 +[titan] 2025-10-05 19:09:52,855 - root - INFO - lr: 7.8123e-06 gnorm: 1.21 [20:35:41< 3:54:03] +[titan] 2025-10-05 19:10:03,725 - root - INFO - step: 33635 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:10:03,725 - root - INFO - lr: 7.8080e-06 gnorm: 1.19 [20:35:52< 3:53:52] +[titan] 2025-10-05 19:10:14,597 - root - INFO - step: 33640 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6948 +[titan] 2025-10-05 19:10:14,597 - root - INFO - lr: 7.8036e-06 gnorm: 1.18 [20:36:03< 3:53:41] +[titan] 2025-10-05 19:10:25,501 - root - INFO - step: 33645 loss: 1.9523 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 19:10:25,501 - root - INFO - lr: 7.7993e-06 gnorm: 1.17 [20:36:14< 3:53:30] +[titan] 2025-10-05 19:10:34,172 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:10:36,352 - root - INFO - step: 33650 loss: 2.0136 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2336 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:10:36,352 - root - INFO - lr: 7.7950e-06 gnorm: 1.18 [20:36:25< 3:53:19] +[titan] 2025-10-05 19:10:47,197 - root - INFO - step: 33655 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 19:10:47,197 - root - INFO - lr: 7.7907e-06 gnorm: 1.18 [20:36:35< 3:53:08] +[titan] 2025-10-05 19:10:58,037 - root - INFO - step: 33660 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:10:58,037 - root - INFO - lr: 7.7864e-06 gnorm: 1.20 [20:36:46< 3:52:57] +[titan] 2025-10-05 19:11:08,895 - root - INFO - step: 33665 loss: 1.9240 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7018 +[titan] 2025-10-05 19:11:08,895 - root - INFO - lr: 7.7821e-06 gnorm: 1.18 [20:36:57< 3:52:46] +[titan] 2025-10-05 19:11:19,804 - root - INFO - step: 33670 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:11:19,805 - root - INFO - lr: 7.7778e-06 gnorm: 1.13 [20:37:08< 3:52:35] +[titan] 2025-10-05 19:11:30,707 - root - INFO - step: 33675 loss: 1.9811 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:11:30,707 - root - INFO - lr: 7.7735e-06 gnorm: 1.19 [20:37:19< 3:52:24] +[titan] 2025-10-05 19:11:41,571 - root - INFO - step: 33680 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 19:11:41,571 - root - INFO - lr: 7.7692e-06 gnorm: 1.15 [20:37:30< 3:52:12] +[titan] 2025-10-05 19:11:52,439 - root - INFO - step: 33685 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7395 +[titan] 2025-10-05 19:11:52,439 - root - INFO - lr: 7.7649e-06 gnorm: 1.17 [20:37:41< 3:52:01] +[titan] 2025-10-05 19:12:03,278 - root - INFO - step: 33690 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7553 +[titan] 2025-10-05 19:12:03,278 - root - INFO - lr: 7.7606e-06 gnorm: 1.21 [20:37:52< 3:51:50] +[titan] 2025-10-05 19:12:14,126 - root - INFO - step: 33695 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:12:14,126 - root - INFO - lr: 7.7564e-06 gnorm: 1.23 [20:38:02< 3:51:39] +[titan] 2025-10-05 19:12:22,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:12:25,032 - root - INFO - step: 33700 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 19:12:25,032 - root - INFO - lr: 7.7521e-06 gnorm: 1.18 [20:38:13< 3:51:28] +[titan] 2025-10-05 19:12:35,912 - root - INFO - step: 33705 loss: 1.9759 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7470 +[titan] 2025-10-05 19:12:35,913 - root - INFO - lr: 7.7478e-06 gnorm: 1.21 [20:38:24< 3:51:17] +[titan] 2025-10-05 19:12:46,776 - root - INFO - step: 33710 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 19:12:46,776 - root - INFO - lr: 7.7435e-06 gnorm: 1.19 [20:38:35< 3:51:06] +[titan] 2025-10-05 19:12:57,642 - root - INFO - step: 33715 loss: 1.9779 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 19:12:57,642 - root - INFO - lr: 7.7393e-06 gnorm: 1.17 [20:38:46< 3:50:55] +[titan] 2025-10-05 19:13:08,509 - root - INFO - step: 33720 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6902 +[titan] 2025-10-05 19:13:08,509 - root - INFO - lr: 7.7350e-06 gnorm: 1.19 [20:38:57< 3:50:44] +[titan] 2025-10-05 19:13:19,370 - root - INFO - step: 33725 loss: 1.9079 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:13:19,370 - root - INFO - lr: 7.7307e-06 gnorm: 1.18 [20:39:08< 3:50:33] +[titan] 2025-10-05 19:13:30,375 - root - INFO - step: 33730 loss: 1.9645 memory: 118.84GiB(85.28%) tps: 29,776 tflops: 413.09 mfu: 41.77% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7371 +[titan] 2025-10-05 19:13:30,375 - root - INFO - lr: 7.7265e-06 gnorm: 1.18 [20:39:19< 3:50:22] +[titan] 2025-10-05 19:13:41,281 - root - INFO - step: 33735 loss: 1.9102 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 19:13:41,281 - root - INFO - lr: 7.7222e-06 gnorm: 1.18 [20:39:30< 3:50:11] +[titan] 2025-10-05 19:13:52,137 - root - INFO - step: 33740 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 19:13:52,137 - root - INFO - lr: 7.7180e-06 gnorm: 1.20 [20:39:40< 3:50:00] +[titan] 2025-10-05 19:14:03,003 - root - INFO - step: 33745 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 19:14:03,003 - root - INFO - lr: 7.7137e-06 gnorm: 1.17 [20:39:51< 3:49:49] +[titan] 2025-10-05 19:14:11,685 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:14:13,871 - root - INFO - step: 33750 loss: 2.0153 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7820 +[titan] 2025-10-05 19:14:13,871 - root - INFO - lr: 7.7095e-06 gnorm: 1.23 [20:40:02< 3:49:38] +[titan] 2025-10-05 19:14:24,752 - root - INFO - step: 33755 loss: 1.8533 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2139 global_avg_mtp_loss: 1.6394 +[titan] 2025-10-05 19:14:24,752 - root - INFO - lr: 7.7052e-06 gnorm: 1.18 [20:40:13< 3:49:27] +[titan] 2025-10-05 19:14:35,653 - root - INFO - step: 33760 loss: 1.9974 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7664 +[titan] 2025-10-05 19:14:35,654 - root - INFO - lr: 7.7010e-06 gnorm: 1.20 [20:40:24< 3:49:16] +[titan] 2025-10-05 19:14:46,559 - root - INFO - step: 33765 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7685 +[titan] 2025-10-05 19:14:46,559 - root - INFO - lr: 7.6967e-06 gnorm: 1.17 [20:40:35< 3:49:05] +[titan] 2025-10-05 19:14:57,429 - root - INFO - step: 33770 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6631 +[titan] 2025-10-05 19:14:57,429 - root - INFO - lr: 7.6925e-06 gnorm: 1.19 [20:40:46< 3:48:54] +[titan] 2025-10-05 19:15:08,283 - root - INFO - step: 33775 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7218 +[titan] 2025-10-05 19:15:08,283 - root - INFO - lr: 7.6883e-06 gnorm: 1.20 [20:40:57< 3:48:43] +[titan] 2025-10-05 19:15:19,145 - root - INFO - step: 33780 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:15:19,145 - root - INFO - lr: 7.6841e-06 gnorm: 1.21 [20:41:07< 3:48:31] +[titan] 2025-10-05 19:15:30,024 - root - INFO - step: 33785 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6677 +[titan] 2025-10-05 19:15:30,024 - root - INFO - lr: 7.6798e-06 gnorm: 1.18 [20:41:18< 3:48:20] +[titan] 2025-10-05 19:15:40,968 - root - INFO - step: 33790 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 29,943 tflops: 415.41 mfu: 42.00% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 19:15:40,968 - root - INFO - lr: 7.6756e-06 gnorm: 1.18 [20:41:29< 3:48:09] +[titan] 2025-10-05 19:15:45,489 - root - INFO - Dumping profiler traces at step 33792 +[titan] 2025-10-05 19:15:45,525 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:15:52,106 - root - INFO - step: 33795 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 29,421 tflops: 408.17 mfu: 41.27% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 19:15:52,106 - root - INFO - lr: 7.6714e-06 gnorm: 1.19 [20:41:40< 3:47:58] +[titan] 2025-10-05 19:16:00,820 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:16:03,008 - root - INFO - step: 33800 loss: 1.9819 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 417.00 mfu: 42.16% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:16:03,008 - root - INFO - lr: 7.6672e-06 gnorm: 1.20 [20:41:51< 3:47:47] +[titan] 2025-10-05 19:16:13,885 - root - INFO - step: 33805 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 19:16:13,885 - root - INFO - lr: 7.6630e-06 gnorm: 1.17 [20:42:02< 3:47:36] +[titan] 2025-10-05 19:16:24,767 - root - INFO - step: 33810 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 19:16:24,767 - root - INFO - lr: 7.6587e-06 gnorm: 1.15 [20:42:13< 3:47:25] +[titan] 2025-10-05 19:16:35,714 - root - INFO - step: 33815 loss: 2.0005 memory: 118.84GiB(85.28%) tps: 29,935 tflops: 415.30 mfu: 41.99% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 19:16:35,714 - root - INFO - lr: 7.6545e-06 gnorm: 1.20 [20:42:24< 3:47:14] +[titan] 2025-10-05 19:16:46,588 - root - INFO - step: 33820 loss: 1.9501 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7241 +[titan] 2025-10-05 19:16:46,588 - root - INFO - lr: 7.6503e-06 gnorm: 1.18 [20:42:35< 3:47:03] +[titan] 2025-10-05 19:16:57,467 - root - INFO - step: 33825 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6954 +[titan] 2025-10-05 19:16:57,467 - root - INFO - lr: 7.6461e-06 gnorm: 1.16 [20:42:46< 3:46:52] +[titan] 2025-10-05 19:17:08,370 - root - INFO - step: 33830 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 19:17:08,371 - root - INFO - lr: 7.6419e-06 gnorm: 1.15 [20:42:57< 3:46:41] +[titan] 2025-10-05 19:17:19,239 - root - INFO - step: 33835 loss: 1.9118 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:17:19,239 - root - INFO - lr: 7.6377e-06 gnorm: 1.19 [20:43:07< 3:46:30] +[titan] 2025-10-05 19:17:30,168 - root - INFO - step: 33840 loss: 1.9930 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 415.99 mfu: 42.06% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7619 +[titan] 2025-10-05 19:17:30,168 - root - INFO - lr: 7.6335e-06 gnorm: 1.20 [20:43:18< 3:46:19] +[titan] 2025-10-05 19:17:41,047 - root - INFO - step: 33845 loss: 1.9781 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:17:41,047 - root - INFO - lr: 7.6294e-06 gnorm: 1.17 [20:43:29< 3:46:08] +[titan] 2025-10-05 19:17:49,727 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:17:51,911 - root - INFO - step: 33850 loss: 1.9924 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7615 +[titan] 2025-10-05 19:17:51,911 - root - INFO - lr: 7.6252e-06 gnorm: 1.20 [20:43:40< 3:45:57] +[titan] 2025-10-05 19:18:02,789 - root - INFO - step: 33855 loss: 1.9320 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7084 +[titan] 2025-10-05 19:18:02,789 - root - INFO - lr: 7.6210e-06 gnorm: 1.18 [20:43:51< 3:45:46] +[titan] 2025-10-05 19:18:13,634 - root - INFO - step: 33860 loss: 2.0047 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2322 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 19:18:13,634 - root - INFO - lr: 7.6168e-06 gnorm: 1.20 [20:44:02< 3:45:35] +[titan] 2025-10-05 19:18:24,528 - root - INFO - step: 33865 loss: 1.9380 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:18:24,528 - root - INFO - lr: 7.6126e-06 gnorm: 1.21 [20:44:13< 3:45:24] +[titan] 2025-10-05 19:18:35,439 - root - INFO - step: 33870 loss: 1.8718 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6559 +[titan] 2025-10-05 19:18:35,440 - root - INFO - lr: 7.6085e-06 gnorm: 1.16 [20:44:24< 3:45:13] +[titan] 2025-10-05 19:18:46,300 - root - INFO - step: 33875 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:18:46,300 - root - INFO - lr: 7.6043e-06 gnorm: 1.18 [20:44:35< 3:45:02] +[titan] 2025-10-05 19:18:57,171 - root - INFO - step: 33880 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7451 +[titan] 2025-10-05 19:18:57,171 - root - INFO - lr: 7.6001e-06 gnorm: 1.18 [20:44:45< 3:44:51] +[titan] 2025-10-05 19:19:08,027 - root - INFO - step: 33885 loss: 1.9610 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 19:19:08,028 - root - INFO - lr: 7.5960e-06 gnorm: 1.18 [20:44:56< 3:44:40] +[titan] 2025-10-05 19:19:18,884 - root - INFO - step: 33890 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 19:19:18,884 - root - INFO - lr: 7.5918e-06 gnorm: 1.15 [20:45:07< 3:44:28] +[titan] 2025-10-05 19:19:29,764 - root - INFO - step: 33895 loss: 1.9182 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6963 +[titan] 2025-10-05 19:19:29,765 - root - INFO - lr: 7.5877e-06 gnorm: 1.14 [20:45:18< 3:44:17] +[titan] 2025-10-05 19:19:38,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:19:40,700 - root - INFO - step: 33900 loss: 1.9418 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:19:40,700 - root - INFO - lr: 7.5835e-06 gnorm: 1.15 [20:45:29< 3:44:06] +[titan] 2025-10-05 19:19:51,575 - root - INFO - step: 33905 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7485 +[titan] 2025-10-05 19:19:51,575 - root - INFO - lr: 7.5793e-06 gnorm: 1.18 [20:45:40< 3:43:55] +[titan] 2025-10-05 19:20:02,438 - root - INFO - step: 33910 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7369 +[titan] 2025-10-05 19:20:02,438 - root - INFO - lr: 7.5752e-06 gnorm: 1.17 [20:45:51< 3:43:44] +[titan] 2025-10-05 19:20:13,310 - root - INFO - step: 33915 loss: 2.0142 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7810 +[titan] 2025-10-05 19:20:13,310 - root - INFO - lr: 7.5711e-06 gnorm: 1.21 [20:46:02< 3:43:33] +[titan] 2025-10-05 19:20:24,174 - root - INFO - step: 33920 loss: 2.0269 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7923 +[titan] 2025-10-05 19:20:24,174 - root - INFO - lr: 7.5669e-06 gnorm: 1.18 [20:46:12< 3:43:22] +[titan] 2025-10-05 19:20:35,419 - root - INFO - step: 33925 loss: 2.0138 memory: 118.84GiB(85.28%) tps: 29,141 tflops: 404.28 mfu: 40.88% global_avg_ntp_loss: 0.2339 global_avg_mtp_loss: 1.7798 +[titan] 2025-10-05 19:20:35,420 - root - INFO - lr: 7.5628e-06 gnorm: 1.19 [20:46:24< 3:43:11] +[titan] 2025-10-05 19:20:46,283 - root - INFO - step: 33930 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7275 +[titan] 2025-10-05 19:20:46,283 - root - INFO - lr: 7.5586e-06 gnorm: 1.17 [20:46:34< 3:43:00] +[titan] 2025-10-05 19:20:57,167 - root - INFO - step: 33935 loss: 1.9676 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7398 +[titan] 2025-10-05 19:20:57,167 - root - INFO - lr: 7.5545e-06 gnorm: 1.23 [20:46:45< 3:42:49] +[titan] 2025-10-05 19:21:08,017 - root - INFO - step: 33940 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:21:08,017 - root - INFO - lr: 7.5504e-06 gnorm: 1.19 [20:46:56< 3:42:38] +[titan] 2025-10-05 19:21:18,883 - root - INFO - step: 33945 loss: 1.9536 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 19:21:18,883 - root - INFO - lr: 7.5463e-06 gnorm: 1.17 [20:47:07< 3:42:27] +[titan] 2025-10-05 19:21:27,550 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:21:29,744 - root - INFO - step: 33950 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2271 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:21:29,745 - root - INFO - lr: 7.5421e-06 gnorm: 1.24 [20:47:18< 3:42:16] +[titan] 2025-10-05 19:21:40,694 - root - INFO - step: 33955 loss: 1.9644 memory: 118.84GiB(85.28%) tps: 29,929 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 19:21:40,694 - root - INFO - lr: 7.5380e-06 gnorm: 1.21 [20:47:29< 3:42:05] +[titan] 2025-10-05 19:21:51,598 - root - INFO - step: 33960 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,051 tflops: 416.91 mfu: 42.15% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7374 +[titan] 2025-10-05 19:21:51,598 - root - INFO - lr: 7.5339e-06 gnorm: 1.22 [20:47:40< 3:41:54] +[titan] 2025-10-05 19:22:02,484 - root - INFO - step: 33965 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7545 +[titan] 2025-10-05 19:22:02,484 - root - INFO - lr: 7.5298e-06 gnorm: 1.17 [20:47:51< 3:41:43] +[titan] 2025-10-05 19:22:13,362 - root - INFO - step: 33970 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 19:22:13,362 - root - INFO - lr: 7.5257e-06 gnorm: 1.15 [20:48:02< 3:41:32] +[titan] 2025-10-05 19:22:24,229 - root - INFO - step: 33975 loss: 1.9756 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:22:24,229 - root - INFO - lr: 7.5216e-06 gnorm: 1.21 [20:48:12< 3:41:21] +[titan] 2025-10-05 19:22:35,177 - root - INFO - step: 33980 loss: 2.0137 memory: 118.84GiB(85.28%) tps: 29,931 tflops: 415.24 mfu: 41.99% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7800 +[titan] 2025-10-05 19:22:35,178 - root - INFO - lr: 7.5175e-06 gnorm: 1.26 [20:48:23< 3:41:10] +[titan] 2025-10-05 19:22:46,038 - root - INFO - step: 33985 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 19:22:46,038 - root - INFO - lr: 7.5134e-06 gnorm: 1.22 [20:48:34< 3:40:59] +[titan] 2025-10-05 19:22:56,932 - root - INFO - step: 33990 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 19:22:56,932 - root - INFO - lr: 7.5093e-06 gnorm: 1.16 [20:48:45< 3:40:48] +[titan] 2025-10-05 19:23:07,777 - root - INFO - step: 33995 loss: 1.9348 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 19:23:07,778 - root - INFO - lr: 7.5052e-06 gnorm: 1.21 [20:48:56< 3:40:37] +[titan] 2025-10-05 19:23:16,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:23:18,638 - root - INFO - step: 34000 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6869 +[titan] 2025-10-05 19:23:18,638 - root - INFO - lr: 7.5011e-06 gnorm: 1.17 [20:49:07< 3:40:26] +[titan] 2025-10-05 19:23:29,501 - root - INFO - step: 34005 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7496 +[titan] 2025-10-05 19:23:29,501 - root - INFO - lr: 7.4970e-06 gnorm: 1.18 [20:49:18< 3:40:14] +[titan] 2025-10-05 19:23:40,426 - root - INFO - step: 34010 loss: 1.9485 memory: 118.84GiB(85.28%) tps: 29,995 tflops: 416.13 mfu: 42.08% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 19:23:40,426 - root - INFO - lr: 7.4929e-06 gnorm: 1.19 [20:49:29< 3:40:03] +[titan] 2025-10-05 19:23:51,314 - root - INFO - step: 34015 loss: 1.9884 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.53 mfu: 42.22% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7585 +[titan] 2025-10-05 19:23:51,315 - root - INFO - lr: 7.4888e-06 gnorm: 1.21 [20:49:40< 3:39:52] +[titan] 2025-10-05 19:24:02,171 - root - INFO - step: 34020 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 19:24:02,171 - root - INFO - lr: 7.4847e-06 gnorm: 1.18 [20:49:50< 3:39:41] +[titan] 2025-10-05 19:24:13,068 - root - INFO - step: 34025 loss: 2.0106 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7785 +[titan] 2025-10-05 19:24:13,068 - root - INFO - lr: 7.4807e-06 gnorm: 1.26 [20:50:01< 3:39:30] +[titan] 2025-10-05 19:24:23,950 - root - INFO - step: 34030 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 19:24:23,951 - root - INFO - lr: 7.4766e-06 gnorm: 1.18 [20:50:12< 3:39:19] +[titan] 2025-10-05 19:24:34,827 - root - INFO - step: 34035 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6696 +[titan] 2025-10-05 19:24:34,827 - root - INFO - lr: 7.4725e-06 gnorm: 1.20 [20:50:23< 3:39:08] +[titan] 2025-10-05 19:24:45,778 - root - INFO - step: 34040 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.14 mfu: 41.98% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7246 +[titan] 2025-10-05 19:24:45,778 - root - INFO - lr: 7.4685e-06 gnorm: 1.19 [20:50:34< 3:38:57] +[titan] 2025-10-05 19:24:56,664 - root - INFO - step: 34045 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 19:24:56,664 - root - INFO - lr: 7.4644e-06 gnorm: 1.20 [20:50:45< 3:38:46] +[titan] 2025-10-05 19:25:05,343 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:25:07,528 - root - INFO - step: 34050 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 19:25:07,528 - root - INFO - lr: 7.4603e-06 gnorm: 1.21 [20:50:56< 3:38:35] +[titan] 2025-10-05 19:25:18,416 - root - INFO - step: 34055 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6971 +[titan] 2025-10-05 19:25:18,416 - root - INFO - lr: 7.4563e-06 gnorm: 1.17 [20:51:07< 3:38:24] +[titan] 2025-10-05 19:25:29,290 - root - INFO - step: 34060 loss: 1.9560 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7304 +[titan] 2025-10-05 19:25:29,291 - root - INFO - lr: 7.4522e-06 gnorm: 1.17 [20:51:17< 3:38:13] +[titan] 2025-10-05 19:25:40,227 - root - INFO - step: 34065 loss: 1.9167 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.69 mfu: 42.03% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6962 +[titan] 2025-10-05 19:25:40,227 - root - INFO - lr: 7.4482e-06 gnorm: 1.16 [20:51:28< 3:38:02] +[titan] 2025-10-05 19:25:51,094 - root - INFO - step: 34070 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 19:25:51,094 - root - INFO - lr: 7.4441e-06 gnorm: 1.17 [20:51:39< 3:37:51] +[titan] 2025-10-05 19:26:01,990 - root - INFO - step: 34075 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 19:26:01,991 - root - INFO - lr: 7.4401e-06 gnorm: 1.19 [20:51:50< 3:37:40] +[titan] 2025-10-05 19:26:12,871 - root - INFO - step: 34080 loss: 1.9126 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6921 +[titan] 2025-10-05 19:26:12,871 - root - INFO - lr: 7.4361e-06 gnorm: 1.18 [20:52:01< 3:37:29] +[titan] 2025-10-05 19:26:23,746 - root - INFO - step: 34085 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7530 +[titan] 2025-10-05 19:26:23,746 - root - INFO - lr: 7.4320e-06 gnorm: 1.19 [20:52:12< 3:37:18] +[titan] 2025-10-05 19:26:34,615 - root - INFO - step: 34090 loss: 1.9192 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 19:26:34,615 - root - INFO - lr: 7.4280e-06 gnorm: 1.17 [20:52:23< 3:37:07] +[titan] 2025-10-05 19:26:45,574 - root - INFO - step: 34095 loss: 2.0674 memory: 118.84GiB(85.28%) tps: 29,900 tflops: 414.81 mfu: 41.94% global_avg_ntp_loss: 0.2384 global_avg_mtp_loss: 1.8291 +[titan] 2025-10-05 19:26:45,575 - root - INFO - lr: 7.4239e-06 gnorm: 1.24 [20:52:34< 3:36:56] +[titan] 2025-10-05 19:26:54,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:26:56,447 - root - INFO - step: 34100 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 19:26:56,447 - root - INFO - lr: 7.4199e-06 gnorm: 1.21 [20:52:45< 3:36:45] +[titan] 2025-10-05 19:27:07,327 - root - INFO - step: 34105 loss: 1.8752 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 19:27:07,327 - root - INFO - lr: 7.4159e-06 gnorm: 1.17 [20:52:56< 3:36:34] +[titan] 2025-10-05 19:27:18,206 - root - INFO - step: 34110 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 19:27:18,206 - root - INFO - lr: 7.4119e-06 gnorm: 1.25 [20:53:06< 3:36:23] +[titan] 2025-10-05 19:27:29,088 - root - INFO - step: 34115 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7077 +[titan] 2025-10-05 19:27:29,088 - root - INFO - lr: 7.4079e-06 gnorm: 1.18 [20:53:17< 3:36:11] +[titan] 2025-10-05 19:27:40,016 - root - INFO - step: 34120 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 29,985 tflops: 416.00 mfu: 42.06% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 19:27:40,017 - root - INFO - lr: 7.4038e-06 gnorm: 1.18 [20:53:28< 3:36:00] +[titan] 2025-10-05 19:27:50,909 - root - INFO - step: 34125 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 19:27:50,909 - root - INFO - lr: 7.3998e-06 gnorm: 1.20 [20:53:39< 3:35:49] +[titan] 2025-10-05 19:28:01,811 - root - INFO - step: 34130 loss: 1.9605 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:28:01,811 - root - INFO - lr: 7.3958e-06 gnorm: 1.15 [20:53:50< 3:35:38] +[titan] 2025-10-05 19:28:12,709 - root - INFO - step: 34135 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7353 +[titan] 2025-10-05 19:28:12,709 - root - INFO - lr: 7.3918e-06 gnorm: 1.17 [20:54:01< 3:35:27] +[titan] 2025-10-05 19:28:23,581 - root - INFO - step: 34140 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:28:23,582 - root - INFO - lr: 7.3878e-06 gnorm: 1.23 [20:54:12< 3:35:16] +[titan] 2025-10-05 19:28:34,458 - root - INFO - step: 34145 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7464 +[titan] 2025-10-05 19:28:34,458 - root - INFO - lr: 7.3838e-06 gnorm: 1.18 [20:54:23< 3:35:05] +[titan] 2025-10-05 19:28:43,204 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:28:45,388 - root - INFO - step: 34150 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 29,982 tflops: 415.96 mfu: 42.06% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 19:28:45,388 - root - INFO - lr: 7.3798e-06 gnorm: 1.19 [20:54:34< 3:34:54] +[titan] 2025-10-05 19:28:56,262 - root - INFO - step: 34155 loss: 1.9387 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:28:56,262 - root - INFO - lr: 7.3758e-06 gnorm: 1.16 [20:54:44< 3:34:43] +[titan] 2025-10-05 19:29:07,168 - root - INFO - step: 34160 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 19:29:07,168 - root - INFO - lr: 7.3718e-06 gnorm: 1.18 [20:54:55< 3:34:32] +[titan] 2025-10-05 19:29:18,057 - root - INFO - step: 34165 loss: 1.9617 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7350 +[titan] 2025-10-05 19:29:18,057 - root - INFO - lr: 7.3678e-06 gnorm: 1.19 [20:55:06< 3:34:21] +[titan] 2025-10-05 19:29:28,930 - root - INFO - step: 34170 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 19:29:28,930 - root - INFO - lr: 7.3639e-06 gnorm: 1.18 [20:55:17< 3:34:10] +[titan] 2025-10-05 19:29:39,844 - root - INFO - step: 34175 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 19:29:39,844 - root - INFO - lr: 7.3599e-06 gnorm: 1.25 [20:55:28< 3:33:59] +[titan] 2025-10-05 19:29:50,737 - root - INFO - step: 34180 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7600 +[titan] 2025-10-05 19:29:50,737 - root - INFO - lr: 7.3559e-06 gnorm: 1.26 [20:55:39< 3:33:48] +[titan] 2025-10-05 19:30:01,652 - root - INFO - step: 34185 loss: 1.9845 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 19:30:01,652 - root - INFO - lr: 7.3519e-06 gnorm: 1.25 [20:55:50< 3:33:37] +[titan] 2025-10-05 19:30:12,505 - root - INFO - step: 34190 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7397 +[titan] 2025-10-05 19:30:12,506 - root - INFO - lr: 7.3480e-06 gnorm: 1.20 [20:56:01< 3:33:26] +[titan] 2025-10-05 19:30:23,389 - root - INFO - step: 34195 loss: 1.9339 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 19:30:23,390 - root - INFO - lr: 7.3440e-06 gnorm: 1.19 [20:56:12< 3:33:15] +[titan] 2025-10-05 19:30:32,060 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:30:34,246 - root - INFO - step: 34200 loss: 1.9408 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 19:30:34,246 - root - INFO - lr: 7.3400e-06 gnorm: 1.19 [20:56:22< 3:33:04] +[titan] 2025-10-05 19:30:45,157 - root - INFO - step: 34205 loss: 1.9115 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 19:30:45,157 - root - INFO - lr: 7.3361e-06 gnorm: 1.18 [20:56:33< 3:32:53] +[titan] 2025-10-05 19:30:56,027 - root - INFO - step: 34210 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 19:30:56,027 - root - INFO - lr: 7.3321e-06 gnorm: 1.19 [20:56:44< 3:32:42] +[titan] 2025-10-05 19:31:06,908 - root - INFO - step: 34215 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 19:31:06,908 - root - INFO - lr: 7.3281e-06 gnorm: 1.17 [20:56:55< 3:32:31] +[titan] 2025-10-05 19:31:17,775 - root - INFO - step: 34220 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7272 +[titan] 2025-10-05 19:31:17,776 - root - INFO - lr: 7.3242e-06 gnorm: 1.20 [20:57:06< 3:32:20] +[titan] 2025-10-05 19:31:28,639 - root - INFO - step: 34225 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6851 +[titan] 2025-10-05 19:31:28,639 - root - INFO - lr: 7.3202e-06 gnorm: 1.19 [20:57:17< 3:32:09] +[titan] 2025-10-05 19:31:39,529 - root - INFO - step: 34230 loss: 1.9687 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:31:39,529 - root - INFO - lr: 7.3163e-06 gnorm: 1.20 [20:57:28< 3:31:57] +[titan] 2025-10-05 19:31:50,429 - root - INFO - step: 34235 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7400 +[titan] 2025-10-05 19:31:50,429 - root - INFO - lr: 7.3124e-06 gnorm: 1.19 [20:57:39< 3:31:46] +[titan] 2025-10-05 19:32:01,297 - root - INFO - step: 34240 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6972 +[titan] 2025-10-05 19:32:01,297 - root - INFO - lr: 7.3084e-06 gnorm: 1.22 [20:57:49< 3:31:35] +[titan] 2025-10-05 19:32:12,194 - root - INFO - step: 34245 loss: 1.9965 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7648 +[titan] 2025-10-05 19:32:12,194 - root - INFO - lr: 7.3045e-06 gnorm: 1.21 [20:58:00< 3:31:24] +[titan] 2025-10-05 19:32:20,872 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:32:23,059 - root - INFO - step: 34250 loss: 1.9691 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7420 +[titan] 2025-10-05 19:32:23,059 - root - INFO - lr: 7.3006e-06 gnorm: 1.18 [20:58:11< 3:31:13] +[titan] 2025-10-05 19:32:33,942 - root - INFO - step: 34255 loss: 1.9345 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 19:32:33,942 - root - INFO - lr: 7.2966e-06 gnorm: 1.16 [20:58:22< 3:31:02] +[titan] 2025-10-05 19:32:44,861 - root - INFO - step: 34260 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7106 +[titan] 2025-10-05 19:32:44,861 - root - INFO - lr: 7.2927e-06 gnorm: 1.18 [20:58:33< 3:30:51] +[titan] 2025-10-05 19:32:55,734 - root - INFO - step: 34265 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 19:32:55,734 - root - INFO - lr: 7.2888e-06 gnorm: 1.16 [20:58:44< 3:30:40] +[titan] 2025-10-05 19:33:06,617 - root - INFO - step: 34270 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7289 +[titan] 2025-10-05 19:33:06,617 - root - INFO - lr: 7.2849e-06 gnorm: 1.22 [20:58:55< 3:30:29] +[titan] 2025-10-05 19:33:17,521 - root - INFO - step: 34275 loss: 2.0006 memory: 118.84GiB(85.28%) tps: 30,052 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2318 global_avg_mtp_loss: 1.7687 +[titan] 2025-10-05 19:33:17,521 - root - INFO - lr: 7.2809e-06 gnorm: 1.19 [20:59:06< 3:30:18] +[titan] 2025-10-05 19:33:28,449 - root - INFO - step: 34280 loss: 2.0057 memory: 118.84GiB(85.28%) tps: 29,987 tflops: 416.02 mfu: 42.06% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7732 +[titan] 2025-10-05 19:33:28,449 - root - INFO - lr: 7.2770e-06 gnorm: 1.23 [20:59:17< 3:30:07] +[titan] 2025-10-05 19:33:39,328 - root - INFO - step: 34285 loss: 1.9261 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7029 +[titan] 2025-10-05 19:33:39,328 - root - INFO - lr: 7.2731e-06 gnorm: 1.17 [20:59:28< 3:29:56] +[titan] 2025-10-05 19:33:50,236 - root - INFO - step: 34290 loss: 2.0014 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.76 mfu: 42.14% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:33:50,236 - root - INFO - lr: 7.2692e-06 gnorm: 1.23 [20:59:38< 3:29:45] +[titan] 2025-10-05 19:34:01,108 - root - INFO - step: 34295 loss: 1.9672 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 19:34:01,108 - root - INFO - lr: 7.2653e-06 gnorm: 1.17 [20:59:49< 3:29:34] +[titan] 2025-10-05 19:34:09,792 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:34:11,978 - root - INFO - step: 34300 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2303 global_avg_mtp_loss: 1.7538 +[titan] 2025-10-05 19:34:11,978 - root - INFO - lr: 7.2614e-06 gnorm: 1.19 [21:00:00< 3:29:23] +[titan] 2025-10-05 19:34:20,931 - root - INFO - Dumping profiler traces at step 34304 +[titan] 2025-10-05 19:34:20,972 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:34:23,158 - root - INFO - step: 34305 loss: 1.8387 memory: 118.84GiB(85.28%) tps: 29,312 tflops: 406.65 mfu: 41.12% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6259 +[titan] 2025-10-05 19:34:23,158 - root - INFO - lr: 7.2575e-06 gnorm: 1.17 [21:00:11< 3:29:12] +[titan] 2025-10-05 19:34:34,056 - root - INFO - step: 34310 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 19:34:34,057 - root - INFO - lr: 7.2536e-06 gnorm: 1.17 [21:00:22< 3:29:01] +[titan] 2025-10-05 19:34:44,938 - root - INFO - step: 34315 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 19:34:44,938 - root - INFO - lr: 7.2497e-06 gnorm: 1.18 [21:00:33< 3:28:50] +[titan] 2025-10-05 19:34:55,805 - root - INFO - step: 34320 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7519 +[titan] 2025-10-05 19:34:55,805 - root - INFO - lr: 7.2458e-06 gnorm: 1.22 [21:00:44< 3:28:39] +[titan] 2025-10-05 19:35:06,664 - root - INFO - step: 34325 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7188 +[titan] 2025-10-05 19:35:06,664 - root - INFO - lr: 7.2419e-06 gnorm: 1.19 [21:00:55< 3:28:28] +[titan] 2025-10-05 19:35:17,530 - root - INFO - step: 34330 loss: 1.9235 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 19:35:17,530 - root - INFO - lr: 7.2381e-06 gnorm: 1.18 [21:01:06< 3:28:17] +[titan] 2025-10-05 19:35:28,388 - root - INFO - step: 34335 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.34% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 19:35:28,388 - root - INFO - lr: 7.2342e-06 gnorm: 1.27 [21:01:17< 3:28:06] +[titan] 2025-10-05 19:35:39,210 - root - INFO - step: 34340 loss: 2.0451 memory: 118.84GiB(85.28%) tps: 30,281 tflops: 420.10 mfu: 42.48% global_avg_ntp_loss: 0.2368 global_avg_mtp_loss: 1.8083 +[titan] 2025-10-05 19:35:39,210 - root - INFO - lr: 7.2303e-06 gnorm: 1.23 [21:01:27< 3:27:55] +[titan] 2025-10-05 19:35:50,072 - root - INFO - step: 34345 loss: 1.9981 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7671 +[titan] 2025-10-05 19:35:50,072 - root - INFO - lr: 7.2264e-06 gnorm: 1.20 [21:01:38< 3:27:44] +[titan] 2025-10-05 19:35:58,755 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:36:00,937 - root - INFO - step: 34350 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:00,938 - root - INFO - lr: 7.2226e-06 gnorm: 1.19 [21:01:49< 3:27:32] +[titan] 2025-10-05 19:36:11,779 - root - INFO - step: 34355 loss: 1.9721 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:36:11,779 - root - INFO - lr: 7.2187e-06 gnorm: 1.22 [21:02:00< 3:27:21] +[titan] 2025-10-05 19:36:22,618 - root - INFO - step: 34360 loss: 1.9809 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7510 +[titan] 2025-10-05 19:36:22,618 - root - INFO - lr: 7.2148e-06 gnorm: 1.19 [21:02:11< 3:27:10] +[titan] 2025-10-05 19:36:33,472 - root - INFO - step: 34365 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 19:36:33,472 - root - INFO - lr: 7.2110e-06 gnorm: 1.16 [21:02:22< 3:26:59] +[titan] 2025-10-05 19:36:44,328 - root - INFO - step: 34370 loss: 1.9285 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7054 +[titan] 2025-10-05 19:36:44,329 - root - INFO - lr: 7.2071e-06 gnorm: 1.17 [21:02:32< 3:26:48] +[titan] 2025-10-05 19:36:55,235 - root - INFO - step: 34375 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,045 tflops: 416.83 mfu: 42.15% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 19:36:55,235 - root - INFO - lr: 7.2033e-06 gnorm: 1.14 [21:02:43< 3:26:37] +[titan] 2025-10-05 19:37:06,084 - root - INFO - step: 34380 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7002 +[titan] 2025-10-05 19:37:06,084 - root - INFO - lr: 7.1994e-06 gnorm: 1.17 [21:02:54< 3:26:26] +[titan] 2025-10-05 19:37:16,961 - root - INFO - step: 34385 loss: 1.9854 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7555 +[titan] 2025-10-05 19:37:16,961 - root - INFO - lr: 7.1956e-06 gnorm: 1.22 [21:03:05< 3:26:15] +[titan] 2025-10-05 19:37:27,815 - root - INFO - step: 34390 loss: 2.0305 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7955 +[titan] 2025-10-05 19:37:27,815 - root - INFO - lr: 7.1917e-06 gnorm: 1.20 [21:03:16< 3:26:04] +[titan] 2025-10-05 19:37:38,670 - root - INFO - step: 34395 loss: 1.9896 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7587 +[titan] 2025-10-05 19:37:38,670 - root - INFO - lr: 7.1879e-06 gnorm: 1.19 [21:03:27< 3:25:53] +[titan] 2025-10-05 19:37:47,373 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:37:49,563 - root - INFO - step: 34400 loss: 1.9677 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7408 +[titan] 2025-10-05 19:37:49,563 - root - INFO - lr: 7.1840e-06 gnorm: 1.23 [21:03:38< 3:25:42] +[titan] 2025-10-05 19:38:00,476 - root - INFO - step: 34405 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,028 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 19:38:00,476 - root - INFO - lr: 7.1802e-06 gnorm: 1.21 [21:03:49< 3:25:31] +[titan] 2025-10-05 19:38:11,337 - root - INFO - step: 34410 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7067 +[titan] 2025-10-05 19:38:11,337 - root - INFO - lr: 7.1764e-06 gnorm: 1.16 [21:03:59< 3:25:20] +[titan] 2025-10-05 19:38:22,210 - root - INFO - step: 34415 loss: 2.0198 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2347 global_avg_mtp_loss: 1.7852 +[titan] 2025-10-05 19:38:22,210 - root - INFO - lr: 7.1726e-06 gnorm: 1.20 [21:04:10< 3:25:09] +[titan] 2025-10-05 19:38:33,092 - root - INFO - step: 34420 loss: 2.0059 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:38:33,092 - root - INFO - lr: 7.1687e-06 gnorm: 1.23 [21:04:21< 3:24:58] +[titan] 2025-10-05 19:38:43,954 - root - INFO - step: 34425 loss: 1.9899 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2306 global_avg_mtp_loss: 1.7593 +[titan] 2025-10-05 19:38:43,954 - root - INFO - lr: 7.1649e-06 gnorm: 1.19 [21:04:32< 3:24:47] +[titan] 2025-10-05 19:38:54,847 - root - INFO - step: 34430 loss: 1.9649 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7382 +[titan] 2025-10-05 19:38:54,847 - root - INFO - lr: 7.1611e-06 gnorm: 1.22 [21:04:43< 3:24:36] +[titan] 2025-10-05 19:39:05,711 - root - INFO - step: 34435 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:39:05,711 - root - INFO - lr: 7.1573e-06 gnorm: 1.18 [21:04:54< 3:24:25] +[titan] 2025-10-05 19:39:16,607 - root - INFO - step: 34440 loss: 1.9084 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 19:39:16,607 - root - INFO - lr: 7.1535e-06 gnorm: 1.15 [21:05:05< 3:24:14] +[titan] 2025-10-05 19:39:27,468 - root - INFO - step: 34445 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7336 +[titan] 2025-10-05 19:39:27,469 - root - INFO - lr: 7.1497e-06 gnorm: 1.21 [21:05:16< 3:24:03] +[titan] 2025-10-05 19:39:36,132 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:39:38,309 - root - INFO - step: 34450 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 19:39:38,309 - root - INFO - lr: 7.1458e-06 gnorm: 1.19 [21:05:26< 3:23:52] +[titan] 2025-10-05 19:39:49,168 - root - INFO - step: 34455 loss: 1.9922 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7614 +[titan] 2025-10-05 19:39:49,168 - root - INFO - lr: 7.1420e-06 gnorm: 1.22 [21:05:37< 3:23:41] +[titan] 2025-10-05 19:39:59,988 - root - INFO - step: 34460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 19:39:59,988 - root - INFO - lr: 7.1382e-06 gnorm: 1.18 [21:05:48< 3:23:29] +[titan] 2025-10-05 19:40:10,837 - root - INFO - step: 34465 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:40:10,837 - root - INFO - lr: 7.1345e-06 gnorm: 1.20 [21:05:59< 3:23:18] +[titan] 2025-10-05 19:40:21,711 - root - INFO - step: 34470 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7264 +[titan] 2025-10-05 19:40:21,711 - root - INFO - lr: 7.1307e-06 gnorm: 1.24 [21:06:10< 3:23:07] +[titan] 2025-10-05 19:40:32,577 - root - INFO - step: 34475 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7341 +[titan] 2025-10-05 19:40:32,577 - root - INFO - lr: 7.1269e-06 gnorm: 1.26 [21:06:21< 3:22:56] +[titan] 2025-10-05 19:40:43,432 - root - INFO - step: 34480 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6998 +[titan] 2025-10-05 19:40:43,432 - root - INFO - lr: 7.1231e-06 gnorm: 1.19 [21:06:32< 3:22:45] +[titan] 2025-10-05 19:40:54,326 - root - INFO - step: 34485 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.31 mfu: 42.20% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 19:40:54,326 - root - INFO - lr: 7.1193e-06 gnorm: 1.19 [21:06:42< 3:22:34] +[titan] 2025-10-05 19:41:05,190 - root - INFO - step: 34490 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7198 +[titan] 2025-10-05 19:41:05,190 - root - INFO - lr: 7.1155e-06 gnorm: 1.19 [21:06:53< 3:22:23] +[titan] 2025-10-05 19:41:16,016 - root - INFO - step: 34495 loss: 1.9452 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7197 +[titan] 2025-10-05 19:41:16,016 - root - INFO - lr: 7.1117e-06 gnorm: 1.28 [21:07:04< 3:22:12] +[titan] 2025-10-05 19:41:24,678 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:41:26,864 - root - INFO - step: 34500 loss: 2.0268 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 19:41:26,864 - root - INFO - lr: 7.1080e-06 gnorm: 1.20 [21:07:15< 3:22:01] +[titan] 2025-10-05 19:41:37,746 - root - INFO - step: 34505 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7384 +[titan] 2025-10-05 19:41:37,746 - root - INFO - lr: 7.1042e-06 gnorm: 1.20 [21:07:26< 3:21:50] +[titan] 2025-10-05 19:41:48,592 - root - INFO - step: 34510 loss: 1.9716 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 19:41:48,592 - root - INFO - lr: 7.1004e-06 gnorm: 1.23 [21:07:37< 3:21:39] +[titan] 2025-10-05 19:41:59,472 - root - INFO - step: 34515 loss: 1.9556 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.85 mfu: 42.25% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 19:41:59,472 - root - INFO - lr: 7.0967e-06 gnorm: 1.19 [21:07:48< 3:21:28] +[titan] 2025-10-05 19:42:10,331 - root - INFO - step: 34520 loss: 1.9697 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 19:42:10,331 - root - INFO - lr: 7.0929e-06 gnorm: 1.21 [21:07:58< 3:21:17] +[titan] 2025-10-05 19:42:21,195 - root - INFO - step: 34525 loss: 1.8598 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6441 +[titan] 2025-10-05 19:42:21,195 - root - INFO - lr: 7.0892e-06 gnorm: 1.20 [21:08:09< 3:21:06] +[titan] 2025-10-05 19:42:32,043 - root - INFO - step: 34530 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 19:42:32,043 - root - INFO - lr: 7.0854e-06 gnorm: 1.18 [21:08:20< 3:20:55] +[titan] 2025-10-05 19:42:42,933 - root - INFO - step: 34535 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6906 +[titan] 2025-10-05 19:42:42,933 - root - INFO - lr: 7.0816e-06 gnorm: 1.17 [21:08:31< 3:20:44] +[titan] 2025-10-05 19:42:53,805 - root - INFO - step: 34540 loss: 1.9412 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 19:42:53,805 - root - INFO - lr: 7.0779e-06 gnorm: 1.20 [21:08:42< 3:20:33] +[titan] 2025-10-05 19:43:04,676 - root - INFO - step: 34545 loss: 1.9428 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 19:43:04,676 - root - INFO - lr: 7.0742e-06 gnorm: 1.19 [21:08:53< 3:20:22] +[titan] 2025-10-05 19:43:13,386 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:43:15,579 - root - INFO - step: 34550 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.97 mfu: 42.16% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7242 +[titan] 2025-10-05 19:43:15,579 - root - INFO - lr: 7.0704e-06 gnorm: 1.21 [21:09:04< 3:20:11] +[titan] 2025-10-05 19:43:26,490 - root - INFO - step: 34555 loss: 1.9736 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7441 +[titan] 2025-10-05 19:43:26,490 - root - INFO - lr: 7.0667e-06 gnorm: 1.18 [21:09:15< 3:20:00] +[titan] 2025-10-05 19:43:37,391 - root - INFO - step: 34560 loss: 2.0699 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2467 global_avg_mtp_loss: 1.8232 +[titan] 2025-10-05 19:43:37,391 - root - INFO - lr: 7.0629e-06 gnorm: 4.37 [21:09:26< 3:19:49] +[titan] 2025-10-05 19:43:48,315 - root - INFO - step: 34565 loss: 1.9033 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 19:43:48,315 - root - INFO - lr: 7.0592e-06 gnorm: 1.24 [21:09:36< 3:19:38] +[titan] 2025-10-05 19:43:59,252 - root - INFO - step: 34570 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 29,961 tflops: 415.66 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7467 +[titan] 2025-10-05 19:43:59,252 - root - INFO - lr: 7.0555e-06 gnorm: 1.17 [21:09:47< 3:19:27] +[titan] 2025-10-05 19:44:10,131 - root - INFO - step: 34575 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.90 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 19:44:10,131 - root - INFO - lr: 7.0518e-06 gnorm: 1.21 [21:09:58< 3:19:15] +[titan] 2025-10-05 19:44:20,965 - root - INFO - step: 34580 loss: 2.0044 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2328 global_avg_mtp_loss: 1.7717 +[titan] 2025-10-05 19:44:20,965 - root - INFO - lr: 7.0480e-06 gnorm: 1.26 [21:10:09< 3:19:04] +[titan] 2025-10-05 19:44:31,829 - root - INFO - step: 34585 loss: 1.9058 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 19:44:31,829 - root - INFO - lr: 7.0443e-06 gnorm: 1.19 [21:10:20< 3:18:53] +[titan] 2025-10-05 19:44:42,679 - root - INFO - step: 34590 loss: 1.9735 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2505 global_avg_mtp_loss: 1.7230 +[titan] 2025-10-05 19:44:42,680 - root - INFO - lr: 7.0406e-06 gnorm: 2.68 [21:10:31< 3:18:42] +[titan] 2025-10-05 19:44:53,560 - root - INFO - step: 34595 loss: 1.8805 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 19:44:53,561 - root - INFO - lr: 7.0369e-06 gnorm: 1.23 [21:10:42< 3:18:31] +[titan] 2025-10-05 19:45:02,272 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:45:04,448 - root - INFO - step: 34600 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 19:45:04,448 - root - INFO - lr: 7.0332e-06 gnorm: 1.18 [21:10:53< 3:18:20] +[titan] 2025-10-05 19:45:15,326 - root - INFO - step: 34605 loss: 1.9912 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:45:15,326 - root - INFO - lr: 7.0295e-06 gnorm: 1.19 [21:11:03< 3:18:09] +[titan] 2025-10-05 19:45:26,191 - root - INFO - step: 34610 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 19:45:26,191 - root - INFO - lr: 7.0258e-06 gnorm: 1.20 [21:11:14< 3:17:58] +[titan] 2025-10-05 19:45:37,037 - root - INFO - step: 34615 loss: 1.9760 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7471 +[titan] 2025-10-05 19:45:37,037 - root - INFO - lr: 7.0221e-06 gnorm: 1.17 [21:11:25< 3:17:47] +[titan] 2025-10-05 19:45:47,905 - root - INFO - step: 34620 loss: 2.0066 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7742 +[titan] 2025-10-05 19:45:47,905 - root - INFO - lr: 7.0184e-06 gnorm: 1.23 [21:11:36< 3:17:36] +[titan] 2025-10-05 19:45:58,807 - root - INFO - step: 34625 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.16% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 19:45:58,807 - root - INFO - lr: 7.0147e-06 gnorm: 1.25 [21:11:47< 3:17:25] +[titan] 2025-10-05 19:46:09,704 - root - INFO - step: 34630 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 30,072 tflops: 417.20 mfu: 42.18% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 19:46:09,704 - root - INFO - lr: 7.0110e-06 gnorm: 1.24 [21:11:58< 3:17:14] +[titan] 2025-10-05 19:46:20,566 - root - INFO - step: 34635 loss: 1.9555 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 19:46:20,566 - root - INFO - lr: 7.0073e-06 gnorm: 1.25 [21:12:09< 3:17:03] +[titan] 2025-10-05 19:46:31,407 - root - INFO - step: 34640 loss: 1.9051 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 19:46:31,407 - root - INFO - lr: 7.0036e-06 gnorm: 1.18 [21:12:20< 3:16:52] +[titan] 2025-10-05 19:46:42,249 - root - INFO - step: 34645 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 19:46:42,249 - root - INFO - lr: 6.9999e-06 gnorm: 1.17 [21:12:30< 3:16:41] +[titan] 2025-10-05 19:46:50,917 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:46:53,097 - root - INFO - step: 34650 loss: 1.9871 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.38% global_avg_ntp_loss: 0.2313 global_avg_mtp_loss: 1.7558 +[titan] 2025-10-05 19:46:53,097 - root - INFO - lr: 6.9963e-06 gnorm: 1.18 [21:12:41< 3:16:30] +[titan] 2025-10-05 19:47:03,992 - root - INFO - step: 34655 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 19:47:03,992 - root - INFO - lr: 6.9926e-06 gnorm: 1.23 [21:12:52< 3:16:19] +[titan] 2025-10-05 19:47:14,867 - root - INFO - step: 34660 loss: 2.0471 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2371 global_avg_mtp_loss: 1.8100 +[titan] 2025-10-05 19:47:14,867 - root - INFO - lr: 6.9889e-06 gnorm: 1.29 [21:13:03< 3:16:08] +[titan] 2025-10-05 19:47:25,759 - root - INFO - step: 34665 loss: 1.9370 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.37 mfu: 42.20% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 19:47:25,759 - root - INFO - lr: 6.9853e-06 gnorm: 1.19 [21:13:14< 3:15:57] +[titan] 2025-10-05 19:47:36,638 - root - INFO - step: 34670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 19:47:36,639 - root - INFO - lr: 6.9816e-06 gnorm: 1.16 [21:13:25< 3:15:46] +[titan] 2025-10-05 19:47:47,526 - root - INFO - step: 34675 loss: 1.9202 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 19:47:47,526 - root - INFO - lr: 6.9779e-06 gnorm: 1.19 [21:13:36< 3:15:35] +[titan] 2025-10-05 19:47:58,418 - root - INFO - step: 34680 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 19:47:58,418 - root - INFO - lr: 6.9743e-06 gnorm: 1.20 [21:13:47< 3:15:24] +[titan] 2025-10-05 19:48:09,256 - root - INFO - step: 34685 loss: 1.9226 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6999 +[titan] 2025-10-05 19:48:09,256 - root - INFO - lr: 6.9706e-06 gnorm: 1.21 [21:13:57< 3:15:13] +[titan] 2025-10-05 19:48:20,111 - root - INFO - step: 34690 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 19:48:20,111 - root - INFO - lr: 6.9670e-06 gnorm: 1.22 [21:14:08< 3:15:02] +[titan] 2025-10-05 19:48:31,007 - root - INFO - step: 34695 loss: 1.9129 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 19:48:31,007 - root - INFO - lr: 6.9633e-06 gnorm: 1.22 [21:14:19< 3:14:50] +[titan] 2025-10-05 19:48:39,674 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:48:41,855 - root - INFO - step: 34700 loss: 1.9900 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7588 +[titan] 2025-10-05 19:48:41,855 - root - INFO - lr: 6.9597e-06 gnorm: 1.18 [21:14:30< 3:14:39] +[titan] 2025-10-05 19:48:52,713 - root - INFO - step: 34705 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7370 +[titan] 2025-10-05 19:48:52,713 - root - INFO - lr: 6.9560e-06 gnorm: 1.17 [21:14:41< 3:14:28] +[titan] 2025-10-05 19:49:03,608 - root - INFO - step: 34710 loss: 1.9120 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6899 +[titan] 2025-10-05 19:49:03,608 - root - INFO - lr: 6.9524e-06 gnorm: 1.17 [21:14:52< 3:14:17] +[titan] 2025-10-05 19:49:14,465 - root - INFO - step: 34715 loss: 1.9925 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7613 +[titan] 2025-10-05 19:49:14,465 - root - INFO - lr: 6.9488e-06 gnorm: 1.22 [21:15:03< 3:14:06] +[titan] 2025-10-05 19:49:25,305 - root - INFO - step: 34720 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7270 +[titan] 2025-10-05 19:49:25,305 - root - INFO - lr: 6.9451e-06 gnorm: 1.21 [21:15:13< 3:13:55] +[titan] 2025-10-05 19:49:36,189 - root - INFO - step: 34725 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.68 mfu: 42.23% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:49:36,189 - root - INFO - lr: 6.9415e-06 gnorm: 1.18 [21:15:24< 3:13:44] +[titan] 2025-10-05 19:49:47,060 - root - INFO - step: 34730 loss: 1.9637 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 19:49:47,060 - root - INFO - lr: 6.9379e-06 gnorm: 1.21 [21:15:35< 3:13:33] +[titan] 2025-10-05 19:49:57,949 - root - INFO - step: 34735 loss: 1.9808 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7512 +[titan] 2025-10-05 19:49:57,949 - root - INFO - lr: 6.9343e-06 gnorm: 1.21 [21:15:46< 3:13:22] +[titan] 2025-10-05 19:50:08,803 - root - INFO - step: 34740 loss: 1.9457 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 19:50:08,803 - root - INFO - lr: 6.9306e-06 gnorm: 1.24 [21:15:57< 3:13:11] +[titan] 2025-10-05 19:50:19,673 - root - INFO - step: 34745 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 19:50:19,673 - root - INFO - lr: 6.9270e-06 gnorm: 1.26 [21:16:08< 3:13:00] +[titan] 2025-10-05 19:50:28,354 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:50:30,535 - root - INFO - step: 34750 loss: 1.9492 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7233 +[titan] 2025-10-05 19:50:30,535 - root - INFO - lr: 6.9234e-06 gnorm: 1.24 [21:16:19< 3:12:49] +[titan] 2025-10-05 19:50:41,406 - root - INFO - step: 34755 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.28% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:50:41,406 - root - INFO - lr: 6.9198e-06 gnorm: 1.19 [21:16:30< 3:12:38] +[titan] 2025-10-05 19:50:52,304 - root - INFO - step: 34760 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.15 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 19:50:52,304 - root - INFO - lr: 6.9162e-06 gnorm: 1.19 [21:16:40< 3:12:27] +[titan] 2025-10-05 19:51:03,222 - root - INFO - step: 34765 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,013 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 19:51:03,223 - root - INFO - lr: 6.9126e-06 gnorm: 1.23 [21:16:51< 3:12:16] +[titan] 2025-10-05 19:51:14,086 - root - INFO - step: 34770 loss: 1.9174 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 19:51:14,086 - root - INFO - lr: 6.9090e-06 gnorm: 1.16 [21:17:02< 3:12:05] +[titan] 2025-10-05 19:51:24,963 - root - INFO - step: 34775 loss: 1.9641 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 19:51:24,963 - root - INFO - lr: 6.9054e-06 gnorm: 1.22 [21:17:13< 3:11:54] +[titan] 2025-10-05 19:51:35,828 - root - INFO - step: 34780 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 19:51:35,828 - root - INFO - lr: 6.9018e-06 gnorm: 1.21 [21:17:24< 3:11:43] +[titan] 2025-10-05 19:51:46,685 - root - INFO - step: 34785 loss: 1.9053 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 19:51:46,685 - root - INFO - lr: 6.8982e-06 gnorm: 1.20 [21:17:35< 3:11:32] +[titan] 2025-10-05 19:51:57,587 - root - INFO - step: 34790 loss: 1.9201 memory: 118.84GiB(85.28%) tps: 30,058 tflops: 417.01 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 19:51:57,587 - root - INFO - lr: 6.8946e-06 gnorm: 1.18 [21:17:46< 3:11:21] +[titan] 2025-10-05 19:52:08,485 - root - INFO - step: 34795 loss: 1.9967 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2311 global_avg_mtp_loss: 1.7656 +[titan] 2025-10-05 19:52:08,485 - root - INFO - lr: 6.8910e-06 gnorm: 1.22 [21:17:57< 3:11:10] +[titan] 2025-10-05 19:52:17,186 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:52:19,378 - root - INFO - step: 34800 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,084 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 19:52:19,378 - root - INFO - lr: 6.8875e-06 gnorm: 1.20 [21:18:08< 3:10:59] +[titan] 2025-10-05 19:52:30,261 - root - INFO - step: 34805 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 19:52:30,261 - root - INFO - lr: 6.8839e-06 gnorm: 1.19 [21:18:18< 3:10:48] +[titan] 2025-10-05 19:52:41,146 - root - INFO - step: 34810 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 30,106 tflops: 417.67 mfu: 42.23% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 19:52:41,146 - root - INFO - lr: 6.8803e-06 gnorm: 1.20 [21:18:29< 3:10:37] +[titan] 2025-10-05 19:52:52,095 - root - INFO - step: 34815 loss: 1.9996 memory: 118.84GiB(85.28%) tps: 29,928 tflops: 415.21 mfu: 41.98% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7667 +[titan] 2025-10-05 19:52:52,095 - root - INFO - lr: 6.8767e-06 gnorm: 1.23 [21:18:40< 3:10:26] +[titan] 2025-10-05 19:52:54,460 - root - INFO - Dumping profiler traces at step 34816 +[titan] 2025-10-05 19:52:54,497 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 19:53:03,216 - root - INFO - step: 34820 loss: 1.9324 memory: 118.84GiB(85.28%) tps: 29,466 tflops: 408.80 mfu: 41.33% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:53:03,216 - root - INFO - lr: 6.8732e-06 gnorm: 1.20 [21:18:51< 3:10:15] +[titan] 2025-10-05 19:53:14,080 - root - INFO - step: 34825 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7156 +[titan] 2025-10-05 19:53:14,080 - root - INFO - lr: 6.8696e-06 gnorm: 1.17 [21:19:02< 3:10:03] +[titan] 2025-10-05 19:53:24,945 - root - INFO - step: 34830 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 19:53:24,945 - root - INFO - lr: 6.8661e-06 gnorm: 1.15 [21:19:13< 3:09:52] +[titan] 2025-10-05 19:53:35,780 - root - INFO - step: 34835 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 19:53:35,780 - root - INFO - lr: 6.8625e-06 gnorm: 1.16 [21:19:24< 3:09:41] +[titan] 2025-10-05 19:53:46,625 - root - INFO - step: 34840 loss: 2.0003 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7678 +[titan] 2025-10-05 19:53:46,626 - root - INFO - lr: 6.8589e-06 gnorm: 1.21 [21:19:35< 3:09:30] +[titan] 2025-10-05 19:53:57,479 - root - INFO - step: 34845 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 19:53:57,479 - root - INFO - lr: 6.8554e-06 gnorm: 1.18 [21:19:46< 3:09:19] +[titan] 2025-10-05 19:54:06,167 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:54:08,348 - root - INFO - step: 34850 loss: 2.0208 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7866 +[titan] 2025-10-05 19:54:08,349 - root - INFO - lr: 6.8518e-06 gnorm: 1.22 [21:19:56< 3:09:08] +[titan] 2025-10-05 19:54:19,236 - root - INFO - step: 34855 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7302 +[titan] 2025-10-05 19:54:19,236 - root - INFO - lr: 6.8483e-06 gnorm: 1.19 [21:20:07< 3:08:57] +[titan] 2025-10-05 19:54:30,115 - root - INFO - step: 34860 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.90 mfu: 42.25% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 19:54:30,115 - root - INFO - lr: 6.8448e-06 gnorm: 1.17 [21:20:18< 3:08:46] +[titan] 2025-10-05 19:54:40,989 - root - INFO - step: 34865 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 19:54:40,989 - root - INFO - lr: 6.8412e-06 gnorm: 1.22 [21:20:29< 3:08:35] +[titan] 2025-10-05 19:54:51,840 - root - INFO - step: 34870 loss: 2.0039 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7710 +[titan] 2025-10-05 19:54:51,840 - root - INFO - lr: 6.8377e-06 gnorm: 1.23 [21:20:40< 3:08:24] +[titan] 2025-10-05 19:55:02,739 - root - INFO - step: 34875 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.12 mfu: 42.18% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 19:55:02,739 - root - INFO - lr: 6.8342e-06 gnorm: 1.18 [21:20:51< 3:08:13] +[titan] 2025-10-05 19:55:13,616 - root - INFO - step: 34880 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 19:55:13,616 - root - INFO - lr: 6.8306e-06 gnorm: 1.19 [21:21:02< 3:08:02] +[titan] 2025-10-05 19:55:24,502 - root - INFO - step: 34885 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7556 +[titan] 2025-10-05 19:55:24,502 - root - INFO - lr: 6.8271e-06 gnorm: 1.20 [21:21:13< 3:07:51] +[titan] 2025-10-05 19:55:35,390 - root - INFO - step: 34890 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 19:55:35,390 - root - INFO - lr: 6.8236e-06 gnorm: 1.20 [21:21:24< 3:07:40] +[titan] 2025-10-05 19:55:46,234 - root - INFO - step: 34895 loss: 1.9281 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 19:55:46,234 - root - INFO - lr: 6.8201e-06 gnorm: 1.21 [21:21:34< 3:07:29] +[titan] 2025-10-05 19:55:54,939 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:55:57,124 - root - INFO - step: 34900 loss: 1.9752 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7465 +[titan] 2025-10-05 19:55:57,124 - root - INFO - lr: 6.8166e-06 gnorm: 1.22 [21:21:45< 3:07:18] +[titan] 2025-10-05 19:56:07,979 - root - INFO - step: 34905 loss: 1.8773 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6604 +[titan] 2025-10-05 19:56:07,979 - root - INFO - lr: 6.8130e-06 gnorm: 1.27 [21:21:56< 3:07:07] +[titan] 2025-10-05 19:56:18,858 - root - INFO - step: 34910 loss: 1.9375 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 19:56:18,858 - root - INFO - lr: 6.8095e-06 gnorm: 1.28 [21:22:07< 3:06:56] +[titan] 2025-10-05 19:56:29,723 - root - INFO - step: 34915 loss: 1.9603 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 19:56:29,723 - root - INFO - lr: 6.8060e-06 gnorm: 1.20 [21:22:18< 3:06:45] +[titan] 2025-10-05 19:56:40,632 - root - INFO - step: 34920 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 19:56:40,632 - root - INFO - lr: 6.8025e-06 gnorm: 1.20 [21:22:29< 3:06:34] +[titan] 2025-10-05 19:56:51,542 - root - INFO - step: 34925 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7347 +[titan] 2025-10-05 19:56:51,542 - root - INFO - lr: 6.7990e-06 gnorm: 1.19 [21:22:40< 3:06:23] +[titan] 2025-10-05 19:57:02,433 - root - INFO - step: 34930 loss: 1.8978 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6780 +[titan] 2025-10-05 19:57:02,433 - root - INFO - lr: 6.7955e-06 gnorm: 1.19 [21:22:51< 3:06:12] +[titan] 2025-10-05 19:57:13,339 - root - INFO - step: 34935 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 19:57:13,339 - root - INFO - lr: 6.7920e-06 gnorm: 1.19 [21:23:01< 3:06:01] +[titan] 2025-10-05 19:57:24,225 - root - INFO - step: 34940 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 19:57:24,225 - root - INFO - lr: 6.7886e-06 gnorm: 1.21 [21:23:12< 3:05:50] +[titan] 2025-10-05 19:57:35,111 - root - INFO - step: 34945 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 19:57:35,112 - root - INFO - lr: 6.7851e-06 gnorm: 1.20 [21:23:23< 3:05:39] +[titan] 2025-10-05 19:57:43,908 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:57:46,109 - root - INFO - step: 34950 loss: 1.9818 memory: 118.84GiB(85.28%) tps: 29,796 tflops: 413.37 mfu: 41.80% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7527 +[titan] 2025-10-05 19:57:46,109 - root - INFO - lr: 6.7816e-06 gnorm: 1.21 [21:23:34< 3:05:28] +[titan] 2025-10-05 19:57:56,976 - root - INFO - step: 34955 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 19:57:56,976 - root - INFO - lr: 6.7781e-06 gnorm: 1.21 [21:23:45< 3:05:16] +[titan] 2025-10-05 19:58:07,860 - root - INFO - step: 34960 loss: 1.8843 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 19:58:07,860 - root - INFO - lr: 6.7746e-06 gnorm: 1.18 [21:23:56< 3:05:05] +[titan] 2025-10-05 19:58:18,737 - root - INFO - step: 34965 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7497 +[titan] 2025-10-05 19:58:18,737 - root - INFO - lr: 6.7712e-06 gnorm: 1.21 [21:24:07< 3:04:54] +[titan] 2025-10-05 19:58:29,592 - root - INFO - step: 34970 loss: 1.9872 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7564 +[titan] 2025-10-05 19:58:29,592 - root - INFO - lr: 6.7677e-06 gnorm: 2.00 [21:24:18< 3:04:43] +[titan] 2025-10-05 19:58:40,452 - root - INFO - step: 34975 loss: 1.9526 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7267 +[titan] 2025-10-05 19:58:40,453 - root - INFO - lr: 6.7642e-06 gnorm: 1.24 [21:24:29< 3:04:32] +[titan] 2025-10-05 19:58:51,317 - root - INFO - step: 34980 loss: 1.8424 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6294 +[titan] 2025-10-05 19:58:51,317 - root - INFO - lr: 6.7608e-06 gnorm: 1.20 [21:24:39< 3:04:21] +[titan] 2025-10-05 19:59:02,209 - root - INFO - step: 34985 loss: 2.0210 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2337 global_avg_mtp_loss: 1.7873 +[titan] 2025-10-05 19:59:02,209 - root - INFO - lr: 6.7573e-06 gnorm: 1.25 [21:24:50< 3:04:10] +[titan] 2025-10-05 19:59:13,085 - root - INFO - step: 34990 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.27% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 19:59:13,085 - root - INFO - lr: 6.7538e-06 gnorm: 1.21 [21:25:01< 3:03:59] +[titan] 2025-10-05 19:59:23,963 - root - INFO - step: 34995 loss: 1.9729 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7447 +[titan] 2025-10-05 19:59:23,964 - root - INFO - lr: 6.7504e-06 gnorm: 1.20 [21:25:12< 3:03:48] +[titan] 2025-10-05 19:59:32,669 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 19:59:34,852 - root - INFO - step: 35000 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7524 +[titan] 2025-10-05 19:59:34,852 - root - INFO - lr: 6.7469e-06 gnorm: 1.20 [21:25:23< 3:03:37] +[titan] 2025-10-05 19:59:34,852 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 19:59:52,561 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 19:59:52,561 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 17.71 seconds. +[titan] 2025-10-05 20:02:00,815 - root - INFO - step: 35005 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 2,245 tflops: 31.15 mfu: 3.15% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7222 +[titan] 2025-10-05 20:02:00,815 - root - INFO - lr: 6.7435e-06 gnorm: 1.17 [21:27:49< 3:03:45] +[titan] 2025-10-05 20:02:11,608 - root - INFO - step: 35010 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,362 tflops: 421.22 mfu: 42.59% global_avg_ntp_loss: 0.2304 global_avg_mtp_loss: 1.7532 +[titan] 2025-10-05 20:02:11,608 - root - INFO - lr: 6.7401e-06 gnorm: 1.25 [21:28:00< 3:03:34] +[titan] 2025-10-05 20:02:22,413 - root - INFO - step: 35015 loss: 1.8869 memory: 118.84GiB(85.28%) tps: 30,330 tflops: 420.78 mfu: 42.55% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 20:02:22,413 - root - INFO - lr: 6.7366e-06 gnorm: 1.21 [21:28:11< 3:03:23] +[titan] 2025-10-05 20:02:33,281 - root - INFO - step: 35020 loss: 1.9173 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6960 +[titan] 2025-10-05 20:02:33,281 - root - INFO - lr: 6.7332e-06 gnorm: 1.19 [21:28:21< 3:03:12] +[titan] 2025-10-05 20:02:44,100 - root - INFO - step: 35025 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:02:44,100 - root - INFO - lr: 6.7297e-06 gnorm: 1.17 [21:28:32< 3:03:01] +[titan] 2025-10-05 20:02:54,948 - root - INFO - step: 35030 loss: 1.9917 memory: 118.84GiB(85.28%) tps: 30,208 tflops: 419.09 mfu: 42.37% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7618 +[titan] 2025-10-05 20:02:54,948 - root - INFO - lr: 6.7263e-06 gnorm: 1.23 [21:28:43< 3:02:50] +[titan] 2025-10-05 20:03:05,780 - root - INFO - step: 35035 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.70 mfu: 42.44% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:03:05,780 - root - INFO - lr: 6.7229e-06 gnorm: 1.24 [21:28:54< 3:02:39] +[titan] 2025-10-05 20:03:16,638 - root - INFO - step: 35040 loss: 1.9905 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7605 +[titan] 2025-10-05 20:03:16,638 - root - INFO - lr: 6.7195e-06 gnorm: 1.23 [21:29:05< 3:02:28] +[titan] 2025-10-05 20:03:27,560 - root - INFO - step: 35045 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7166 +[titan] 2025-10-05 20:03:27,560 - root - INFO - lr: 6.7160e-06 gnorm: 1.19 [21:29:16< 3:02:17] +[titan] 2025-10-05 20:03:36,243 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:03:38,413 - root - INFO - step: 35050 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.90 mfu: 42.36% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7064 +[titan] 2025-10-05 20:03:38,413 - root - INFO - lr: 6.7126e-06 gnorm: 1.23 [21:29:27< 3:02:06] +[titan] 2025-10-05 20:03:49,265 - root - INFO - step: 35055 loss: 1.9623 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7343 +[titan] 2025-10-05 20:03:49,265 - root - INFO - lr: 6.7092e-06 gnorm: 1.18 [21:29:37< 3:01:55] +[titan] 2025-10-05 20:04:00,143 - root - INFO - step: 35060 loss: 1.9047 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6842 +[titan] 2025-10-05 20:04:00,143 - root - INFO - lr: 6.7058e-06 gnorm: 1.22 [21:29:48< 3:01:44] +[titan] 2025-10-05 20:04:11,001 - root - INFO - step: 35065 loss: 1.8697 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 20:04:11,001 - root - INFO - lr: 6.7024e-06 gnorm: 1.21 [21:29:59< 3:01:33] +[titan] 2025-10-05 20:04:21,863 - root - INFO - step: 35070 loss: 1.9582 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:04:21,863 - root - INFO - lr: 6.6990e-06 gnorm: 1.24 [21:30:10< 3:01:22] +[titan] 2025-10-05 20:04:32,799 - root - INFO - step: 35075 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7576 +[titan] 2025-10-05 20:04:32,799 - root - INFO - lr: 6.6956e-06 gnorm: 1.21 [21:30:21< 3:01:11] +[titan] 2025-10-05 20:04:43,675 - root - INFO - step: 35080 loss: 1.9508 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7248 +[titan] 2025-10-05 20:04:43,675 - root - INFO - lr: 6.6922e-06 gnorm: 1.15 [21:30:32< 3:00:59] +[titan] 2025-10-05 20:04:54,541 - root - INFO - step: 35085 loss: 1.9027 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 20:04:54,542 - root - INFO - lr: 6.6888e-06 gnorm: 1.19 [21:30:43< 3:00:48] +[titan] 2025-10-05 20:05:05,402 - root - INFO - step: 35090 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:05:05,402 - root - INFO - lr: 6.6854e-06 gnorm: 1.18 [21:30:53< 3:00:37] +[titan] 2025-10-05 20:05:16,263 - root - INFO - step: 35095 loss: 1.9794 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:05:16,263 - root - INFO - lr: 6.6820e-06 gnorm: 1.22 [21:31:04< 3:00:26] +[titan] 2025-10-05 20:05:24,926 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:05:27,144 - root - INFO - step: 35100 loss: 1.9245 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7014 +[titan] 2025-10-05 20:05:27,145 - root - INFO - lr: 6.6786e-06 gnorm: 1.23 [21:31:15< 3:00:15] +[titan] 2025-10-05 20:05:38,035 - root - INFO - step: 35105 loss: 1.9631 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7359 +[titan] 2025-10-05 20:05:38,035 - root - INFO - lr: 6.6753e-06 gnorm: 1.17 [21:31:26< 3:00:04] +[titan] 2025-10-05 20:05:48,877 - root - INFO - step: 35110 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7243 +[titan] 2025-10-05 20:05:48,877 - root - INFO - lr: 6.6719e-06 gnorm: 1.15 [21:31:37< 2:59:53] +[titan] 2025-10-05 20:05:59,749 - root - INFO - step: 35115 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 20:05:59,749 - root - INFO - lr: 6.6685e-06 gnorm: 1.20 [21:31:48< 2:59:42] +[titan] 2025-10-05 20:06:10,605 - root - INFO - step: 35120 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 20:06:10,606 - root - INFO - lr: 6.6651e-06 gnorm: 1.17 [21:31:59< 2:59:31] +[titan] 2025-10-05 20:06:21,451 - root - INFO - step: 35125 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 20:06:21,451 - root - INFO - lr: 6.6618e-06 gnorm: 1.20 [21:32:10< 2:59:20] +[titan] 2025-10-05 20:06:32,365 - root - INFO - step: 35130 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:06:32,365 - root - INFO - lr: 6.6584e-06 gnorm: 1.23 [21:32:20< 2:59:09] +[titan] 2025-10-05 20:06:43,231 - root - INFO - step: 35135 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7050 +[titan] 2025-10-05 20:06:43,232 - root - INFO - lr: 6.6550e-06 gnorm: 1.18 [21:32:31< 2:58:58] +[titan] 2025-10-05 20:06:54,140 - root - INFO - step: 35140 loss: 1.9653 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7386 +[titan] 2025-10-05 20:06:54,140 - root - INFO - lr: 6.6517e-06 gnorm: 1.23 [21:32:42< 2:58:47] +[titan] 2025-10-05 20:07:05,022 - root - INFO - step: 35145 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7255 +[titan] 2025-10-05 20:07:05,022 - root - INFO - lr: 6.6483e-06 gnorm: 1.23 [21:32:53< 2:58:36] +[titan] 2025-10-05 20:07:13,698 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:07:15,932 - root - INFO - step: 35150 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.72 mfu: 42.14% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6718 +[titan] 2025-10-05 20:07:15,932 - root - INFO - lr: 6.6450e-06 gnorm: 1.21 [21:33:04< 2:58:25] +[titan] 2025-10-05 20:07:26,828 - root - INFO - step: 35155 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:07:26,828 - root - INFO - lr: 6.6416e-06 gnorm: 1.16 [21:33:15< 2:58:14] +[titan] 2025-10-05 20:07:37,740 - root - INFO - step: 35160 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7417 +[titan] 2025-10-05 20:07:37,740 - root - INFO - lr: 6.6383e-06 gnorm: 1.17 [21:33:26< 2:58:03] +[titan] 2025-10-05 20:07:48,623 - root - INFO - step: 35165 loss: 1.9332 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7098 +[titan] 2025-10-05 20:07:48,623 - root - INFO - lr: 6.6349e-06 gnorm: 1.21 [21:33:37< 2:57:51] +[titan] 2025-10-05 20:07:59,524 - root - INFO - step: 35170 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7411 +[titan] 2025-10-05 20:07:59,524 - root - INFO - lr: 6.6316e-06 gnorm: 1.20 [21:33:48< 2:57:40] +[titan] 2025-10-05 20:08:10,396 - root - INFO - step: 35175 loss: 1.9544 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:08:10,396 - root - INFO - lr: 6.6283e-06 gnorm: 1.19 [21:33:58< 2:57:29] +[titan] 2025-10-05 20:08:21,269 - root - INFO - step: 35180 loss: 1.9109 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:08:21,270 - root - INFO - lr: 6.6249e-06 gnorm: 1.19 [21:34:09< 2:57:18] +[titan] 2025-10-05 20:08:32,174 - root - INFO - step: 35185 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.89 mfu: 42.15% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 20:08:32,175 - root - INFO - lr: 6.6216e-06 gnorm: 1.21 [21:34:20< 2:57:07] +[titan] 2025-10-05 20:08:43,054 - root - INFO - step: 35190 loss: 1.9950 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7626 +[titan] 2025-10-05 20:08:43,054 - root - INFO - lr: 6.6183e-06 gnorm: 1.21 [21:34:31< 2:56:56] +[titan] 2025-10-05 20:08:53,935 - root - INFO - step: 35195 loss: 1.9405 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7155 +[titan] 2025-10-05 20:08:53,935 - root - INFO - lr: 6.6150e-06 gnorm: 1.23 [21:34:42< 2:56:45] +[titan] 2025-10-05 20:09:02,614 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:09:04,794 - root - INFO - step: 35200 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 20:09:04,794 - root - INFO - lr: 6.6116e-06 gnorm: 1.17 [21:34:53< 2:56:34] +[titan] 2025-10-05 20:09:15,695 - root - INFO - step: 35205 loss: 1.9077 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:09:15,695 - root - INFO - lr: 6.6083e-06 gnorm: 1.21 [21:35:04< 2:56:23] +[titan] 2025-10-05 20:09:26,591 - root - INFO - step: 35210 loss: 1.9224 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:09:26,591 - root - INFO - lr: 6.6050e-06 gnorm: 1.21 [21:35:15< 2:56:12] +[titan] 2025-10-05 20:09:37,512 - root - INFO - step: 35215 loss: 1.9614 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7337 +[titan] 2025-10-05 20:09:37,512 - root - INFO - lr: 6.6017e-06 gnorm: 1.22 [21:35:26< 2:56:01] +[titan] 2025-10-05 20:09:48,396 - root - INFO - step: 35220 loss: 1.9286 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7057 +[titan] 2025-10-05 20:09:48,396 - root - INFO - lr: 6.5984e-06 gnorm: 1.23 [21:35:36< 2:55:50] +[titan] 2025-10-05 20:09:59,291 - root - INFO - step: 35225 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 20:09:59,291 - root - INFO - lr: 6.5951e-06 gnorm: 1.22 [21:35:47< 2:55:39] +[titan] 2025-10-05 20:10:10,147 - root - INFO - step: 35230 loss: 1.9319 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 20:10:10,147 - root - INFO - lr: 6.5918e-06 gnorm: 1.26 [21:35:58< 2:55:28] +[titan] 2025-10-05 20:10:21,054 - root - INFO - step: 35235 loss: 1.9111 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 20:10:21,055 - root - INFO - lr: 6.5885e-06 gnorm: 1.18 [21:36:09< 2:55:17] +[titan] 2025-10-05 20:10:31,940 - root - INFO - step: 35240 loss: 1.8612 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:10:31,940 - root - INFO - lr: 6.5852e-06 gnorm: 1.16 [21:36:20< 2:55:06] +[titan] 2025-10-05 20:10:42,806 - root - INFO - step: 35245 loss: 2.0002 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2314 global_avg_mtp_loss: 1.7688 +[titan] 2025-10-05 20:10:42,807 - root - INFO - lr: 6.5819e-06 gnorm: 1.22 [21:36:31< 2:54:55] +[titan] 2025-10-05 20:10:51,506 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:10:53,701 - root - INFO - step: 35250 loss: 1.9990 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2310 global_avg_mtp_loss: 1.7680 +[titan] 2025-10-05 20:10:53,701 - root - INFO - lr: 6.5786e-06 gnorm: 1.21 [21:36:42< 2:54:43] +[titan] 2025-10-05 20:11:04,581 - root - INFO - step: 35255 loss: 1.9709 memory: 118.84GiB(85.28%) tps: 30,119 tflops: 417.86 mfu: 42.25% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7426 +[titan] 2025-10-05 20:11:04,581 - root - INFO - lr: 6.5754e-06 gnorm: 1.20 [21:36:53< 2:54:32] +[titan] 2025-10-05 20:11:15,487 - root - INFO - step: 35260 loss: 1.9259 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:11:15,487 - root - INFO - lr: 6.5721e-06 gnorm: 1.23 [21:37:04< 2:54:21] +[titan] 2025-10-05 20:11:26,398 - root - INFO - step: 35265 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.66 mfu: 42.13% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:11:26,398 - root - INFO - lr: 6.5688e-06 gnorm: 1.23 [21:37:14< 2:54:10] +[titan] 2025-10-05 20:11:37,313 - root - INFO - step: 35270 loss: 1.9217 memory: 118.84GiB(85.28%) tps: 30,022 tflops: 416.50 mfu: 42.11% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 20:11:37,314 - root - INFO - lr: 6.5655e-06 gnorm: 1.21 [21:37:25< 2:53:59] +[titan] 2025-10-05 20:11:48,214 - root - INFO - step: 35275 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 20:11:48,214 - root - INFO - lr: 6.5623e-06 gnorm: 1.24 [21:37:36< 2:53:48] +[titan] 2025-10-05 20:11:59,075 - root - INFO - step: 35280 loss: 1.9849 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7550 +[titan] 2025-10-05 20:11:59,075 - root - INFO - lr: 6.5590e-06 gnorm: 1.20 [21:37:47< 2:53:37] +[titan] 2025-10-05 20:12:09,938 - root - INFO - step: 35285 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6887 +[titan] 2025-10-05 20:12:09,939 - root - INFO - lr: 6.5557e-06 gnorm: 1.21 [21:37:58< 2:53:26] +[titan] 2025-10-05 20:12:20,821 - root - INFO - step: 35290 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 20:12:20,821 - root - INFO - lr: 6.5525e-06 gnorm: 1.18 [21:38:09< 2:53:15] +[titan] 2025-10-05 20:12:31,713 - root - INFO - step: 35295 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 20:12:31,713 - root - INFO - lr: 6.5492e-06 gnorm: 1.23 [21:38:20< 2:53:04] +[titan] 2025-10-05 20:12:40,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:12:42,667 - root - INFO - step: 35300 loss: 1.9229 memory: 118.84GiB(85.28%) tps: 29,915 tflops: 415.03 mfu: 41.96% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 20:12:42,667 - root - INFO - lr: 6.5460e-06 gnorm: 1.23 [21:38:31< 2:52:53] +[titan] 2025-10-05 20:12:53,570 - root - INFO - step: 35305 loss: 1.9724 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.98 mfu: 42.16% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 20:12:53,570 - root - INFO - lr: 6.5427e-06 gnorm: 1.21 [21:38:42< 2:52:42] +[titan] 2025-10-05 20:13:04,452 - root - INFO - step: 35310 loss: 1.9317 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7088 +[titan] 2025-10-05 20:13:04,452 - root - INFO - lr: 6.5395e-06 gnorm: 1.22 [21:38:53< 2:52:31] +[titan] 2025-10-05 20:13:15,334 - root - INFO - step: 35315 loss: 1.9552 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7282 +[titan] 2025-10-05 20:13:15,335 - root - INFO - lr: 6.5362e-06 gnorm: 1.20 [21:39:03< 2:52:20] +[titan] 2025-10-05 20:13:26,220 - root - INFO - step: 35320 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 20:13:26,221 - root - INFO - lr: 6.5330e-06 gnorm: 1.19 [21:39:14< 2:52:09] +[titan] 2025-10-05 20:13:37,204 - root - INFO - step: 35325 loss: 1.9407 memory: 118.84GiB(85.28%) tps: 29,834 tflops: 413.90 mfu: 41.85% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 20:13:37,205 - root - INFO - lr: 6.5297e-06 gnorm: 1.20 [21:39:25< 2:51:58] +[titan] 2025-10-05 20:13:43,907 - root - INFO - Dumping profiler traces at step 35328 +[titan] 2025-10-05 20:13:43,946 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:13:48,348 - root - INFO - step: 35330 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 29,406 tflops: 407.96 mfu: 41.25% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:13:48,348 - root - INFO - lr: 6.5265e-06 gnorm: 1.23 [21:39:36< 2:51:47] +[titan] 2025-10-05 20:13:59,245 - root - INFO - step: 35335 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7238 +[titan] 2025-10-05 20:13:59,245 - root - INFO - lr: 6.5233e-06 gnorm: 1.23 [21:39:47< 2:51:36] +[titan] 2025-10-05 20:14:10,148 - root - INFO - step: 35340 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:14:10,149 - root - INFO - lr: 6.5201e-06 gnorm: 1.24 [21:39:58< 2:51:25] +[titan] 2025-10-05 20:14:21,047 - root - INFO - step: 35345 loss: 1.9642 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7363 +[titan] 2025-10-05 20:14:21,048 - root - INFO - lr: 6.5168e-06 gnorm: 1.20 [21:40:09< 2:51:14] +[titan] 2025-10-05 20:14:29,752 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:14:31,931 - root - INFO - step: 35350 loss: 1.9071 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 20:14:31,931 - root - INFO - lr: 6.5136e-06 gnorm: 1.22 [21:40:20< 2:51:02] +[titan] 2025-10-05 20:14:42,833 - root - INFO - step: 35355 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,057 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6814 +[titan] 2025-10-05 20:14:42,833 - root - INFO - lr: 6.5104e-06 gnorm: 1.18 [21:40:31< 2:50:51] +[titan] 2025-10-05 20:14:53,713 - root - INFO - step: 35360 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 20:14:53,714 - root - INFO - lr: 6.5072e-06 gnorm: 1.22 [21:40:42< 2:50:40] +[titan] 2025-10-05 20:15:04,622 - root - INFO - step: 35365 loss: 1.9438 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.77 mfu: 42.14% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:15:04,622 - root - INFO - lr: 6.5040e-06 gnorm: 1.20 [21:40:53< 2:50:29] +[titan] 2025-10-05 20:15:15,532 - root - INFO - step: 35370 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:15:15,532 - root - INFO - lr: 6.5008e-06 gnorm: 1.21 [21:41:04< 2:50:18] +[titan] 2025-10-05 20:15:26,422 - root - INFO - step: 35375 loss: 1.9139 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 20:15:26,422 - root - INFO - lr: 6.4976e-06 gnorm: 1.20 [21:41:14< 2:50:07] +[titan] 2025-10-05 20:15:37,640 - root - INFO - step: 35380 loss: 1.9110 memory: 118.84GiB(85.28%) tps: 29,212 tflops: 405.27 mfu: 40.98% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:15:37,640 - root - INFO - lr: 6.4944e-06 gnorm: 1.19 [21:41:26< 2:49:56] +[titan] 2025-10-05 20:15:48,502 - root - INFO - step: 35385 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6841 +[titan] 2025-10-05 20:15:48,502 - root - INFO - lr: 6.4912e-06 gnorm: 1.22 [21:41:37< 2:49:45] +[titan] 2025-10-05 20:15:59,387 - root - INFO - step: 35390 loss: 1.9078 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 20:15:59,387 - root - INFO - lr: 6.4880e-06 gnorm: 1.27 [21:41:47< 2:49:34] +[titan] 2025-10-05 20:16:10,282 - root - INFO - step: 35395 loss: 1.9762 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7479 +[titan] 2025-10-05 20:16:10,282 - root - INFO - lr: 6.4848e-06 gnorm: 1.21 [21:41:58< 2:49:23] +[titan] 2025-10-05 20:16:18,973 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:16:21,149 - root - INFO - step: 35400 loss: 1.8914 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6724 +[titan] 2025-10-05 20:16:21,150 - root - INFO - lr: 6.4816e-06 gnorm: 1.20 [21:42:09< 2:49:12] +[titan] 2025-10-05 20:16:32,037 - root - INFO - step: 35405 loss: 1.9437 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7194 +[titan] 2025-10-05 20:16:32,038 - root - INFO - lr: 6.4784e-06 gnorm: 1.23 [21:42:20< 2:49:01] +[titan] 2025-10-05 20:16:42,971 - root - INFO - step: 35410 loss: 1.9290 memory: 118.84GiB(85.28%) tps: 29,971 tflops: 415.80 mfu: 42.04% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:16:42,971 - root - INFO - lr: 6.4752e-06 gnorm: 1.20 [21:42:31< 2:48:50] +[titan] 2025-10-05 20:16:53,840 - root - INFO - step: 35415 loss: 1.9100 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6890 +[titan] 2025-10-05 20:16:53,840 - root - INFO - lr: 6.4721e-06 gnorm: 1.17 [21:42:42< 2:48:39] +[titan] 2025-10-05 20:17:04,705 - root - INFO - step: 35420 loss: 1.9333 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:17:04,705 - root - INFO - lr: 6.4689e-06 gnorm: 1.23 [21:42:53< 2:48:28] +[titan] 2025-10-05 20:17:15,612 - root - INFO - step: 35425 loss: 1.9832 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 20:17:15,612 - root - INFO - lr: 6.4657e-06 gnorm: 1.20 [21:43:04< 2:48:17] +[titan] 2025-10-05 20:17:26,485 - root - INFO - step: 35430 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:17:26,485 - root - INFO - lr: 6.4625e-06 gnorm: 1.18 [21:43:15< 2:48:06] +[titan] 2025-10-05 20:17:37,354 - root - INFO - step: 35435 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:17:37,354 - root - INFO - lr: 6.4594e-06 gnorm: 1.23 [21:43:25< 2:47:55] +[titan] 2025-10-05 20:17:48,271 - root - INFO - step: 35440 loss: 1.9162 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:17:48,271 - root - INFO - lr: 6.4562e-06 gnorm: 1.18 [21:43:36< 2:47:44] +[titan] 2025-10-05 20:17:59,161 - root - INFO - step: 35445 loss: 1.9393 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 20:17:59,161 - root - INFO - lr: 6.4531e-06 gnorm: 1.21 [21:43:47< 2:47:32] +[titan] 2025-10-05 20:18:07,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:18:10,014 - root - INFO - step: 35450 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:18:10,014 - root - INFO - lr: 6.4499e-06 gnorm: 1.20 [21:43:58< 2:47:21] +[titan] 2025-10-05 20:18:20,865 - root - INFO - step: 35455 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7133 +[titan] 2025-10-05 20:18:20,865 - root - INFO - lr: 6.4468e-06 gnorm: 1.25 [21:44:09< 2:47:10] +[titan] 2025-10-05 20:18:31,752 - root - INFO - step: 35460 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.6977 +[titan] 2025-10-05 20:18:31,752 - root - INFO - lr: 6.4436e-06 gnorm: 1.27 [21:44:20< 2:46:59] +[titan] 2025-10-05 20:18:42,672 - root - INFO - step: 35465 loss: 1.9476 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.32 mfu: 42.10% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 20:18:42,672 - root - INFO - lr: 6.4405e-06 gnorm: 1.21 [21:44:31< 2:46:48] +[titan] 2025-10-05 20:18:53,523 - root - INFO - step: 35470 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7130 +[titan] 2025-10-05 20:18:53,523 - root - INFO - lr: 6.4373e-06 gnorm: 1.22 [21:44:42< 2:46:37] +[titan] 2025-10-05 20:19:04,397 - root - INFO - step: 35475 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 20:19:04,397 - root - INFO - lr: 6.4342e-06 gnorm: 1.20 [21:44:52< 2:46:26] +[titan] 2025-10-05 20:19:15,272 - root - INFO - step: 35480 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:19:15,272 - root - INFO - lr: 6.4311e-06 gnorm: 1.15 [21:45:03< 2:46:15] +[titan] 2025-10-05 20:19:26,134 - root - INFO - step: 35485 loss: 1.9187 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6964 +[titan] 2025-10-05 20:19:26,134 - root - INFO - lr: 6.4279e-06 gnorm: 1.22 [21:45:14< 2:46:04] +[titan] 2025-10-05 20:19:37,003 - root - INFO - step: 35490 loss: 1.9264 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:19:37,003 - root - INFO - lr: 6.4248e-06 gnorm: 1.22 [21:45:25< 2:45:53] +[titan] 2025-10-05 20:19:48,060 - root - INFO - step: 35495 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 29,638 tflops: 411.18 mfu: 41.57% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 20:19:48,060 - root - INFO - lr: 6.4217e-06 gnorm: 1.17 [21:45:36< 2:45:42] +[titan] 2025-10-05 20:19:56,754 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:19:58,930 - root - INFO - step: 35500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 20:19:58,930 - root - INFO - lr: 6.4186e-06 gnorm: 1.23 [21:45:47< 2:45:31] +[titan] 2025-10-05 20:20:09,779 - root - INFO - step: 35505 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 20:20:09,780 - root - INFO - lr: 6.4154e-06 gnorm: 1.19 [21:45:58< 2:45:20] +[titan] 2025-10-05 20:20:20,670 - root - INFO - step: 35510 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:20:20,671 - root - INFO - lr: 6.4123e-06 gnorm: 1.22 [21:46:09< 2:45:09] +[titan] 2025-10-05 20:20:31,543 - root - INFO - step: 35515 loss: 1.8943 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 20:20:31,543 - root - INFO - lr: 6.4092e-06 gnorm: 1.24 [21:46:20< 2:44:58] +[titan] 2025-10-05 20:20:42,433 - root - INFO - step: 35520 loss: 1.9336 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:20:42,433 - root - INFO - lr: 6.4061e-06 gnorm: 1.20 [21:46:30< 2:44:47] +[titan] 2025-10-05 20:20:53,334 - root - INFO - step: 35525 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7142 +[titan] 2025-10-05 20:20:53,334 - root - INFO - lr: 6.4030e-06 gnorm: 1.24 [21:46:41< 2:44:36] +[titan] 2025-10-05 20:21:04,211 - root - INFO - step: 35530 loss: 1.9837 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7544 +[titan] 2025-10-05 20:21:04,211 - root - INFO - lr: 6.3999e-06 gnorm: 1.20 [21:46:52< 2:44:25] +[titan] 2025-10-05 20:21:15,077 - root - INFO - step: 35535 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7364 +[titan] 2025-10-05 20:21:15,077 - root - INFO - lr: 6.3968e-06 gnorm: 1.22 [21:47:03< 2:44:13] +[titan] 2025-10-05 20:21:25,947 - root - INFO - step: 35540 loss: 2.0043 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2325 global_avg_mtp_loss: 1.7718 +[titan] 2025-10-05 20:21:25,947 - root - INFO - lr: 6.3937e-06 gnorm: 1.23 [21:47:14< 2:44:02] +[titan] 2025-10-05 20:21:36,813 - root - INFO - step: 35545 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:21:36,813 - root - INFO - lr: 6.3906e-06 gnorm: 1.26 [21:47:25< 2:43:51] +[titan] 2025-10-05 20:21:45,548 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:21:47,733 - root - INFO - step: 35550 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,009 tflops: 416.33 mfu: 42.10% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 20:21:47,733 - root - INFO - lr: 6.3875e-06 gnorm: 1.29 [21:47:36< 2:43:40] +[titan] 2025-10-05 20:21:58,624 - root - INFO - step: 35555 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.43 mfu: 42.21% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6918 +[titan] 2025-10-05 20:21:58,624 - root - INFO - lr: 6.3845e-06 gnorm: 1.20 [21:47:47< 2:43:29] +[titan] 2025-10-05 20:22:09,503 - root - INFO - step: 35560 loss: 1.8840 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 20:22:09,503 - root - INFO - lr: 6.3814e-06 gnorm: 1.18 [21:47:58< 2:43:18] +[titan] 2025-10-05 20:22:20,399 - root - INFO - step: 35565 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7435 +[titan] 2025-10-05 20:22:20,399 - root - INFO - lr: 6.3783e-06 gnorm: 1.22 [21:48:08< 2:43:07] +[titan] 2025-10-05 20:22:31,288 - root - INFO - step: 35570 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 20:22:31,288 - root - INFO - lr: 6.3752e-06 gnorm: 1.21 [21:48:19< 2:42:56] +[titan] 2025-10-05 20:22:42,161 - root - INFO - step: 35575 loss: 1.9928 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2319 global_avg_mtp_loss: 1.7609 +[titan] 2025-10-05 20:22:42,162 - root - INFO - lr: 6.3722e-06 gnorm: 1.31 [21:48:30< 2:42:45] +[titan] 2025-10-05 20:22:53,096 - root - INFO - step: 35580 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 29,968 tflops: 415.76 mfu: 42.04% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 20:22:53,096 - root - INFO - lr: 6.3691e-06 gnorm: 1.25 [21:48:41< 2:42:34] +[titan] 2025-10-05 20:23:04,005 - root - INFO - step: 35585 loss: 1.9646 memory: 118.84GiB(85.28%) tps: 30,040 tflops: 416.75 mfu: 42.14% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7373 +[titan] 2025-10-05 20:23:04,005 - root - INFO - lr: 6.3660e-06 gnorm: 1.22 [21:48:52< 2:42:23] +[titan] 2025-10-05 20:23:14,874 - root - INFO - step: 35590 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6686 +[titan] 2025-10-05 20:23:14,874 - root - INFO - lr: 6.3630e-06 gnorm: 1.15 [21:49:03< 2:42:12] +[titan] 2025-10-05 20:23:25,760 - root - INFO - step: 35595 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 20:23:25,760 - root - INFO - lr: 6.3599e-06 gnorm: 1.26 [21:49:14< 2:42:01] +[titan] 2025-10-05 20:23:34,455 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:23:36,637 - root - INFO - step: 35600 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:23:36,637 - root - INFO - lr: 6.3568e-06 gnorm: 1.19 [21:49:25< 2:41:50] +[titan] 2025-10-05 20:23:47,564 - root - INFO - step: 35605 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 29,988 tflops: 416.04 mfu: 42.07% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 20:23:47,564 - root - INFO - lr: 6.3538e-06 gnorm: 1.18 [21:49:36< 2:41:39] +[titan] 2025-10-05 20:23:58,425 - root - INFO - step: 35610 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6615 +[titan] 2025-10-05 20:23:58,425 - root - INFO - lr: 6.3508e-06 gnorm: 1.20 [21:49:46< 2:41:28] +[titan] 2025-10-05 20:24:09,278 - root - INFO - step: 35615 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 20:24:09,278 - root - INFO - lr: 6.3477e-06 gnorm: 1.21 [21:49:57< 2:41:17] +[titan] 2025-10-05 20:24:20,176 - root - INFO - step: 35620 loss: 1.9484 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7234 +[titan] 2025-10-05 20:24:20,176 - root - INFO - lr: 6.3447e-06 gnorm: 1.21 [21:50:08< 2:41:06] +[titan] 2025-10-05 20:24:31,048 - root - INFO - step: 35625 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7146 +[titan] 2025-10-05 20:24:31,049 - root - INFO - lr: 6.3416e-06 gnorm: 1.18 [21:50:19< 2:40:55] +[titan] 2025-10-05 20:24:41,914 - root - INFO - step: 35630 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 20:24:41,914 - root - INFO - lr: 6.3386e-06 gnorm: 1.22 [21:50:30< 2:40:43] +[titan] 2025-10-05 20:24:52,829 - root - INFO - step: 35635 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,023 tflops: 416.52 mfu: 42.12% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:24:52,829 - root - INFO - lr: 6.3356e-06 gnorm: 1.18 [21:50:41< 2:40:32] +[titan] 2025-10-05 20:25:03,707 - root - INFO - step: 35640 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 20:25:03,707 - root - INFO - lr: 6.3325e-06 gnorm: 1.23 [21:50:52< 2:40:21] +[titan] 2025-10-05 20:25:14,555 - root - INFO - step: 35645 loss: 1.8684 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 20:25:14,556 - root - INFO - lr: 6.3295e-06 gnorm: 1.19 [21:51:03< 2:40:10] +[titan] 2025-10-05 20:25:23,267 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:25:25,451 - root - INFO - step: 35650 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7187 +[titan] 2025-10-05 20:25:25,451 - root - INFO - lr: 6.3265e-06 gnorm: 1.20 [21:51:13< 2:39:59] +[titan] 2025-10-05 20:25:36,291 - root - INFO - step: 35655 loss: 1.9662 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 20:25:36,291 - root - INFO - lr: 6.3235e-06 gnorm: 1.22 [21:51:24< 2:39:48] +[titan] 2025-10-05 20:25:47,198 - root - INFO - step: 35660 loss: 1.9669 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7381 +[titan] 2025-10-05 20:25:47,199 - root - INFO - lr: 6.3205e-06 gnorm: 1.19 [21:51:35< 2:39:37] +[titan] 2025-10-05 20:25:58,057 - root - INFO - step: 35665 loss: 1.9343 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 20:25:58,057 - root - INFO - lr: 6.3174e-06 gnorm: 1.22 [21:51:46< 2:39:26] +[titan] 2025-10-05 20:26:08,933 - root - INFO - step: 35670 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:26:08,934 - root - INFO - lr: 6.3144e-06 gnorm: 1.20 [21:51:57< 2:39:15] +[titan] 2025-10-05 20:26:19,799 - root - INFO - step: 35675 loss: 1.9723 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7434 +[titan] 2025-10-05 20:26:19,799 - root - INFO - lr: 6.3114e-06 gnorm: 1.25 [21:52:08< 2:39:04] +[titan] 2025-10-05 20:26:30,670 - root - INFO - step: 35680 loss: 2.0060 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7729 +[titan] 2025-10-05 20:26:30,670 - root - INFO - lr: 6.3084e-06 gnorm: 1.22 [21:52:19< 2:38:53] +[titan] 2025-10-05 20:26:41,581 - root - INFO - step: 35685 loss: 2.0069 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.68 mfu: 42.13% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7739 +[titan] 2025-10-05 20:26:41,581 - root - INFO - lr: 6.3054e-06 gnorm: 1.29 [21:52:30< 2:38:42] +[titan] 2025-10-05 20:26:52,517 - root - INFO - step: 35690 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,963 tflops: 415.70 mfu: 42.03% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:26:52,517 - root - INFO - lr: 6.3024e-06 gnorm: 1.23 [21:52:41< 2:38:31] +[titan] 2025-10-05 20:27:03,395 - root - INFO - step: 35695 loss: 1.9599 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7330 +[titan] 2025-10-05 20:27:03,396 - root - INFO - lr: 6.2995e-06 gnorm: 1.23 [21:52:51< 2:38:20] +[titan] 2025-10-05 20:27:12,077 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:27:14,268 - root - INFO - step: 35700 loss: 1.9472 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 20:27:14,268 - root - INFO - lr: 6.2965e-06 gnorm: 1.23 [21:53:02< 2:38:09] +[titan] 2025-10-05 20:27:25,130 - root - INFO - step: 35705 loss: 1.8887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:27:25,130 - root - INFO - lr: 6.2935e-06 gnorm: 1.19 [21:53:13< 2:37:58] +[titan] 2025-10-05 20:27:35,993 - root - INFO - step: 35710 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 20:27:35,993 - root - INFO - lr: 6.2905e-06 gnorm: 1.28 [21:53:24< 2:37:47] +[titan] 2025-10-05 20:27:46,902 - root - INFO - step: 35715 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.7053 +[titan] 2025-10-05 20:27:46,902 - root - INFO - lr: 6.2875e-06 gnorm: 1.23 [21:53:35< 2:37:36] +[titan] 2025-10-05 20:27:57,813 - root - INFO - step: 35720 loss: 2.0166 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2333 global_avg_mtp_loss: 1.7833 +[titan] 2025-10-05 20:27:57,813 - root - INFO - lr: 6.2846e-06 gnorm: 1.21 [21:53:46< 2:37:25] +[titan] 2025-10-05 20:28:08,684 - root - INFO - step: 35725 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 20:28:08,685 - root - INFO - lr: 6.2816e-06 gnorm: 1.21 [21:53:57< 2:37:13] +[titan] 2025-10-05 20:28:19,553 - root - INFO - step: 35730 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.6969 +[titan] 2025-10-05 20:28:19,553 - root - INFO - lr: 6.2786e-06 gnorm: 1.19 [21:54:08< 2:37:02] +[titan] 2025-10-05 20:28:30,452 - root - INFO - step: 35735 loss: 2.0073 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2330 global_avg_mtp_loss: 1.7743 +[titan] 2025-10-05 20:28:30,452 - root - INFO - lr: 6.2756e-06 gnorm: 1.25 [21:54:18< 2:36:51] +[titan] 2025-10-05 20:28:41,341 - root - INFO - step: 35740 loss: 1.9840 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7546 +[titan] 2025-10-05 20:28:41,341 - root - INFO - lr: 6.2727e-06 gnorm: 1.26 [21:54:29< 2:36:40] +[titan] 2025-10-05 20:28:52,320 - root - INFO - step: 35745 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.09 mfu: 41.87% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6496 +[titan] 2025-10-05 20:28:52,320 - root - INFO - lr: 6.2697e-06 gnorm: 1.19 [21:54:40< 2:36:29] +[titan] 2025-10-05 20:29:01,029 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:29:03,215 - root - INFO - step: 35750 loss: 1.8998 memory: 118.84GiB(85.28%) tps: 30,078 tflops: 417.28 mfu: 42.19% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 20:29:03,215 - root - INFO - lr: 6.2668e-06 gnorm: 1.23 [21:54:51< 2:36:18] +[titan] 2025-10-05 20:29:14,102 - root - INFO - step: 35755 loss: 1.9903 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7594 +[titan] 2025-10-05 20:29:14,102 - root - INFO - lr: 6.2638e-06 gnorm: 1.25 [21:55:02< 2:36:07] +[titan] 2025-10-05 20:29:24,977 - root - INFO - step: 35760 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7346 +[titan] 2025-10-05 20:29:24,977 - root - INFO - lr: 6.2609e-06 gnorm: 1.19 [21:55:13< 2:35:56] +[titan] 2025-10-05 20:29:35,865 - root - INFO - step: 35765 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 30,096 tflops: 417.54 mfu: 42.22% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7406 +[titan] 2025-10-05 20:29:35,865 - root - INFO - lr: 6.2579e-06 gnorm: 1.20 [21:55:24< 2:35:45] +[titan] 2025-10-05 20:29:46,743 - root - INFO - step: 35770 loss: 1.9516 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7260 +[titan] 2025-10-05 20:29:46,743 - root - INFO - lr: 6.2550e-06 gnorm: 1.22 [21:55:35< 2:35:34] +[titan] 2025-10-05 20:29:57,662 - root - INFO - step: 35775 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,010 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7137 +[titan] 2025-10-05 20:29:57,662 - root - INFO - lr: 6.2521e-06 gnorm: 1.24 [21:55:46< 2:35:23] +[titan] 2025-10-05 20:30:08,549 - root - INFO - step: 35780 loss: 2.0052 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7724 +[titan] 2025-10-05 20:30:08,549 - root - INFO - lr: 6.2491e-06 gnorm: 1.24 [21:55:57< 2:35:12] +[titan] 2025-10-05 20:30:19,428 - root - INFO - step: 35785 loss: 2.0119 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7784 +[titan] 2025-10-05 20:30:19,428 - root - INFO - lr: 6.2462e-06 gnorm: 1.22 [21:56:07< 2:35:01] +[titan] 2025-10-05 20:30:30,298 - root - INFO - step: 35790 loss: 1.8995 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 20:30:30,298 - root - INFO - lr: 6.2433e-06 gnorm: 1.20 [21:56:18< 2:34:50] +[titan] 2025-10-05 20:30:41,166 - root - INFO - step: 35795 loss: 1.9713 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7424 +[titan] 2025-10-05 20:30:41,166 - root - INFO - lr: 6.2403e-06 gnorm: 1.22 [21:56:29< 2:34:39] +[titan] 2025-10-05 20:30:49,930 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:30:52,145 - root - INFO - step: 35800 loss: 1.8719 memory: 118.84GiB(85.28%) tps: 29,847 tflops: 414.08 mfu: 41.87% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6562 +[titan] 2025-10-05 20:30:52,145 - root - INFO - lr: 6.2374e-06 gnorm: 1.18 [21:56:40< 2:34:28] +[titan] 2025-10-05 20:31:03,002 - root - INFO - step: 35805 loss: 1.8418 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2128 global_avg_mtp_loss: 1.6290 +[titan] 2025-10-05 20:31:03,002 - root - INFO - lr: 6.2345e-06 gnorm: 1.17 [21:56:51< 2:34:17] +[titan] 2025-10-05 20:31:13,902 - root - INFO - step: 35810 loss: 1.9513 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7249 +[titan] 2025-10-05 20:31:13,902 - root - INFO - lr: 6.2316e-06 gnorm: 1.20 [21:57:02< 2:34:06] +[titan] 2025-10-05 20:31:24,762 - root - INFO - step: 35815 loss: 1.8766 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:31:24,763 - root - INFO - lr: 6.2287e-06 gnorm: 1.21 [21:57:13< 2:33:55] +[titan] 2025-10-05 20:31:35,622 - root - INFO - step: 35820 loss: 1.9218 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6988 +[titan] 2025-10-05 20:31:35,623 - root - INFO - lr: 6.2258e-06 gnorm: 1.20 [21:57:24< 2:33:44] +[titan] 2025-10-05 20:31:46,463 - root - INFO - step: 35825 loss: 1.9833 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7536 +[titan] 2025-10-05 20:31:46,464 - root - INFO - lr: 6.2229e-06 gnorm: 1.23 [21:57:34< 2:33:32] +[titan] 2025-10-05 20:31:57,412 - root - INFO - step: 35830 loss: 1.8980 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 20:31:57,412 - root - INFO - lr: 6.2200e-06 gnorm: 1.20 [21:57:45< 2:33:21] +[titan] 2025-10-05 20:32:08,273 - root - INFO - step: 35835 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6616 +[titan] 2025-10-05 20:32:08,273 - root - INFO - lr: 6.2171e-06 gnorm: 1.17 [21:57:56< 2:33:10] +[titan] 2025-10-05 20:32:19,226 - root - INFO - step: 35840 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 29,917 tflops: 415.05 mfu: 41.97% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 20:32:19,227 - root - INFO - lr: 6.2142e-06 gnorm: 1.24 [21:58:07< 2:32:59] +[titan] 2025-10-05 20:32:19,424 - root - INFO - Dumping profiler traces at step 35840 +[titan] 2025-10-05 20:32:19,465 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:32:30,380 - root - INFO - step: 35845 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,379 tflops: 407.58 mfu: 41.21% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7161 +[titan] 2025-10-05 20:32:30,381 - root - INFO - lr: 6.2113e-06 gnorm: 1.21 [21:58:18< 2:32:48] +[titan] 2025-10-05 20:32:39,072 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:32:41,258 - root - INFO - step: 35850 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:32:41,258 - root - INFO - lr: 6.2084e-06 gnorm: 1.18 [21:58:29< 2:32:37] +[titan] 2025-10-05 20:32:52,159 - root - INFO - step: 35855 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.7037 +[titan] 2025-10-05 20:32:52,159 - root - INFO - lr: 6.2055e-06 gnorm: 1.21 [21:58:40< 2:32:26] +[titan] 2025-10-05 20:33:03,040 - root - INFO - step: 35860 loss: 1.9254 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 20:33:03,041 - root - INFO - lr: 6.2026e-06 gnorm: 1.22 [21:58:51< 2:32:15] +[titan] 2025-10-05 20:33:13,902 - root - INFO - step: 35865 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7075 +[titan] 2025-10-05 20:33:13,902 - root - INFO - lr: 6.1998e-06 gnorm: 1.24 [21:59:02< 2:32:04] +[titan] 2025-10-05 20:33:24,764 - root - INFO - step: 35870 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 20:33:24,764 - root - INFO - lr: 6.1969e-06 gnorm: 1.24 [21:59:13< 2:31:53] +[titan] 2025-10-05 20:33:35,665 - root - INFO - step: 35875 loss: 1.9478 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 20:33:35,666 - root - INFO - lr: 6.1940e-06 gnorm: 1.20 [21:59:24< 2:31:42] +[titan] 2025-10-05 20:33:46,542 - root - INFO - step: 35880 loss: 1.9795 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 417.99 mfu: 42.26% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 20:33:46,542 - root - INFO - lr: 6.1911e-06 gnorm: 1.22 [21:59:35< 2:31:31] +[titan] 2025-10-05 20:33:57,456 - root - INFO - step: 35885 loss: 1.9215 memory: 118.84GiB(85.28%) tps: 30,026 tflops: 416.56 mfu: 42.12% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 20:33:57,456 - root - INFO - lr: 6.1883e-06 gnorm: 1.21 [21:59:45< 2:31:20] +[titan] 2025-10-05 20:34:08,320 - root - INFO - step: 35890 loss: 1.9456 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 20:34:08,321 - root - INFO - lr: 6.1854e-06 gnorm: 1.23 [21:59:56< 2:31:09] +[titan] 2025-10-05 20:34:19,183 - root - INFO - step: 35895 loss: 1.9503 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7250 +[titan] 2025-10-05 20:34:19,183 - root - INFO - lr: 6.1826e-06 gnorm: 1.20 [22:00:07< 2:30:58] +[titan] 2025-10-05 20:34:27,859 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:34:30,044 - root - INFO - step: 35900 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7490 +[titan] 2025-10-05 20:34:30,044 - root - INFO - lr: 6.1797e-06 gnorm: 1.24 [22:00:18< 2:30:47] +[titan] 2025-10-05 20:34:40,962 - root - INFO - step: 35905 loss: 1.9685 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.42 mfu: 42.10% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 20:34:40,962 - root - INFO - lr: 6.1769e-06 gnorm: 1.21 [22:00:29< 2:30:36] +[titan] 2025-10-05 20:34:51,814 - root - INFO - step: 35910 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 20:34:51,814 - root - INFO - lr: 6.1740e-06 gnorm: 1.20 [22:00:40< 2:30:25] +[titan] 2025-10-05 20:35:02,707 - root - INFO - step: 35915 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2321 global_avg_mtp_loss: 1.7636 +[titan] 2025-10-05 20:35:02,708 - root - INFO - lr: 6.1712e-06 gnorm: 1.25 [22:00:51< 2:30:14] +[titan] 2025-10-05 20:35:13,561 - root - INFO - step: 35920 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 20:35:13,561 - root - INFO - lr: 6.1683e-06 gnorm: 1.21 [22:01:02< 2:30:03] +[titan] 2025-10-05 20:35:24,429 - root - INFO - step: 35925 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 20:35:24,429 - root - INFO - lr: 6.1655e-06 gnorm: 1.21 [22:01:12< 2:29:51] +[titan] 2025-10-05 20:35:35,298 - root - INFO - step: 35930 loss: 1.9695 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7410 +[titan] 2025-10-05 20:35:35,298 - root - INFO - lr: 6.1627e-06 gnorm: 1.20 [22:01:23< 2:29:40] +[titan] 2025-10-05 20:35:46,161 - root - INFO - step: 35935 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6568 +[titan] 2025-10-05 20:35:46,161 - root - INFO - lr: 6.1598e-06 gnorm: 1.22 [22:01:34< 2:29:29] +[titan] 2025-10-05 20:35:57,095 - root - INFO - step: 35940 loss: 1.9376 memory: 118.84GiB(85.28%) tps: 29,969 tflops: 415.78 mfu: 42.04% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7140 +[titan] 2025-10-05 20:35:57,096 - root - INFO - lr: 6.1570e-06 gnorm: 1.21 [22:01:45< 2:29:18] +[titan] 2025-10-05 20:36:07,977 - root - INFO - step: 35945 loss: 1.9482 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 20:36:07,977 - root - INFO - lr: 6.1542e-06 gnorm: 1.23 [22:01:56< 2:29:07] +[titan] 2025-10-05 20:36:16,663 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:36:18,851 - root - INFO - step: 35950 loss: 1.8140 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6041 +[titan] 2025-10-05 20:36:18,851 - root - INFO - lr: 6.1514e-06 gnorm: 1.21 [22:02:07< 2:28:56] +[titan] 2025-10-05 20:36:29,728 - root - INFO - step: 35955 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6593 +[titan] 2025-10-05 20:36:29,728 - root - INFO - lr: 6.1485e-06 gnorm: 1.18 [22:02:18< 2:28:45] +[titan] 2025-10-05 20:36:40,603 - root - INFO - step: 35960 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.05 mfu: 42.27% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 20:36:40,603 - root - INFO - lr: 6.1457e-06 gnorm: 1.20 [22:02:29< 2:28:34] +[titan] 2025-10-05 20:36:51,466 - root - INFO - step: 35965 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,165 tflops: 418.49 mfu: 42.31% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7135 +[titan] 2025-10-05 20:36:51,467 - root - INFO - lr: 6.1429e-06 gnorm: 1.21 [22:02:39< 2:28:23] +[titan] 2025-10-05 20:37:02,418 - root - INFO - step: 35970 loss: 1.8787 memory: 118.84GiB(85.28%) tps: 29,921 tflops: 415.11 mfu: 41.97% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6610 +[titan] 2025-10-05 20:37:02,418 - root - INFO - lr: 6.1401e-06 gnorm: 1.17 [22:02:50< 2:28:12] +[titan] 2025-10-05 20:37:13,277 - root - INFO - step: 35975 loss: 1.9766 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:37:13,277 - root - INFO - lr: 6.1373e-06 gnorm: 1.23 [22:03:01< 2:28:01] +[titan] 2025-10-05 20:37:24,151 - root - INFO - step: 35980 loss: 1.9461 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7215 +[titan] 2025-10-05 20:37:24,151 - root - INFO - lr: 6.1345e-06 gnorm: 1.17 [22:03:12< 2:27:50] +[titan] 2025-10-05 20:37:34,997 - root - INFO - step: 35985 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6927 +[titan] 2025-10-05 20:37:34,997 - root - INFO - lr: 6.1317e-06 gnorm: 1.19 [22:03:23< 2:27:39] +[titan] 2025-10-05 20:37:45,857 - root - INFO - step: 35990 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 20:37:45,857 - root - INFO - lr: 6.1289e-06 gnorm: 1.21 [22:03:34< 2:27:28] +[titan] 2025-10-05 20:37:56,761 - root - INFO - step: 35995 loss: 1.9764 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7481 +[titan] 2025-10-05 20:37:56,761 - root - INFO - lr: 6.1261e-06 gnorm: 1.22 [22:03:45< 2:27:17] +[titan] 2025-10-05 20:38:05,423 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:38:07,602 - root - INFO - step: 36000 loss: 2.0179 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2341 global_avg_mtp_loss: 1.7838 +[titan] 2025-10-05 20:38:07,602 - root - INFO - lr: 6.1233e-06 gnorm: 1.24 [22:03:56< 2:27:06] +[titan] 2025-10-05 20:38:18,465 - root - INFO - step: 36005 loss: 1.8959 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 20:38:18,465 - root - INFO - lr: 6.1206e-06 gnorm: 1.22 [22:04:06< 2:26:55] +[titan] 2025-10-05 20:38:29,352 - root - INFO - step: 36010 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:38:29,352 - root - INFO - lr: 6.1178e-06 gnorm: 1.19 [22:04:17< 2:26:44] +[titan] 2025-10-05 20:38:40,197 - root - INFO - step: 36015 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7096 +[titan] 2025-10-05 20:38:40,197 - root - INFO - lr: 6.1150e-06 gnorm: 1.22 [22:04:28< 2:26:33] +[titan] 2025-10-05 20:38:51,058 - root - INFO - step: 36020 loss: 1.8917 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 20:38:51,058 - root - INFO - lr: 6.1122e-06 gnorm: 1.21 [22:04:39< 2:26:22] +[titan] 2025-10-05 20:39:01,952 - root - INFO - step: 36025 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6606 +[titan] 2025-10-05 20:39:01,952 - root - INFO - lr: 6.1095e-06 gnorm: 1.20 [22:04:50< 2:26:10] +[titan] 2025-10-05 20:39:12,835 - root - INFO - step: 36030 loss: 1.9704 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2317 global_avg_mtp_loss: 1.7387 +[titan] 2025-10-05 20:39:12,835 - root - INFO - lr: 6.1067e-06 gnorm: 1.25 [22:05:01< 2:25:59] +[titan] 2025-10-05 20:39:23,710 - root - INFO - step: 36035 loss: 1.9575 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 20:39:23,711 - root - INFO - lr: 6.1039e-06 gnorm: 1.23 [22:05:12< 2:25:48] +[titan] 2025-10-05 20:39:34,575 - root - INFO - step: 36040 loss: 1.9897 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7595 +[titan] 2025-10-05 20:39:34,575 - root - INFO - lr: 6.1012e-06 gnorm: 1.23 [22:05:23< 2:25:37] +[titan] 2025-10-05 20:39:45,433 - root - INFO - step: 36045 loss: 1.8945 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6757 +[titan] 2025-10-05 20:39:45,434 - root - INFO - lr: 6.0984e-06 gnorm: 1.20 [22:05:33< 2:25:26] +[titan] 2025-10-05 20:39:54,106 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:39:56,290 - root - INFO - step: 36050 loss: 1.9349 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7112 +[titan] 2025-10-05 20:39:56,290 - root - INFO - lr: 6.0957e-06 gnorm: 1.20 [22:05:44< 2:25:15] +[titan] 2025-10-05 20:40:07,175 - root - INFO - step: 36055 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 20:40:07,175 - root - INFO - lr: 6.0929e-06 gnorm: 1.26 [22:05:55< 2:25:04] +[titan] 2025-10-05 20:40:18,044 - root - INFO - step: 36060 loss: 1.9545 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7285 +[titan] 2025-10-05 20:40:18,044 - root - INFO - lr: 6.0902e-06 gnorm: 1.22 [22:06:06< 2:24:53] +[titan] 2025-10-05 20:40:28,916 - root - INFO - step: 36065 loss: 1.9511 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.13 mfu: 42.28% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7253 +[titan] 2025-10-05 20:40:28,916 - root - INFO - lr: 6.0874e-06 gnorm: 1.21 [22:06:17< 2:24:42] +[titan] 2025-10-05 20:40:39,778 - root - INFO - step: 36070 loss: 1.8531 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6374 +[titan] 2025-10-05 20:40:39,778 - root - INFO - lr: 6.0847e-06 gnorm: 1.21 [22:06:28< 2:24:31] +[titan] 2025-10-05 20:40:50,621 - root - INFO - step: 36075 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7340 +[titan] 2025-10-05 20:40:50,621 - root - INFO - lr: 6.0820e-06 gnorm: 1.21 [22:06:39< 2:24:20] +[titan] 2025-10-05 20:41:01,488 - root - INFO - step: 36080 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.33 mfu: 42.30% global_avg_ntp_loss: 0.2331 global_avg_mtp_loss: 1.7854 +[titan] 2025-10-05 20:41:01,489 - root - INFO - lr: 6.0792e-06 gnorm: 1.24 [22:06:49< 2:24:09] +[titan] 2025-10-05 20:41:12,335 - root - INFO - step: 36085 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7143 +[titan] 2025-10-05 20:41:12,335 - root - INFO - lr: 6.0765e-06 gnorm: 1.21 [22:07:00< 2:23:58] +[titan] 2025-10-05 20:41:23,167 - root - INFO - step: 36090 loss: 1.9835 memory: 118.84GiB(85.28%) tps: 30,251 tflops: 419.68 mfu: 42.43% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7543 +[titan] 2025-10-05 20:41:23,168 - root - INFO - lr: 6.0738e-06 gnorm: 1.24 [22:07:11< 2:23:47] +[titan] 2025-10-05 20:41:34,043 - root - INFO - step: 36095 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:41:34,044 - root - INFO - lr: 6.0710e-06 gnorm: 1.26 [22:07:22< 2:23:36] +[titan] 2025-10-05 20:41:42,728 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:41:44,911 - root - INFO - step: 36100 loss: 1.9238 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7003 +[titan] 2025-10-05 20:41:44,911 - root - INFO - lr: 6.0683e-06 gnorm: 1.23 [22:07:33< 2:23:25] +[titan] 2025-10-05 20:41:55,794 - root - INFO - step: 36105 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 20:41:55,794 - root - INFO - lr: 6.0656e-06 gnorm: 1.18 [22:07:44< 2:23:14] +[titan] 2025-10-05 20:42:06,656 - root - INFO - step: 36110 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 20:42:06,657 - root - INFO - lr: 6.0629e-06 gnorm: 1.22 [22:07:55< 2:23:03] +[titan] 2025-10-05 20:42:17,515 - root - INFO - step: 36115 loss: 1.9416 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7173 +[titan] 2025-10-05 20:42:17,515 - root - INFO - lr: 6.0602e-06 gnorm: 1.22 [22:08:06< 2:22:52] +[titan] 2025-10-05 20:42:28,350 - root - INFO - step: 36120 loss: 1.9932 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7623 +[titan] 2025-10-05 20:42:28,350 - root - INFO - lr: 6.0575e-06 gnorm: 1.23 [22:08:16< 2:22:41] +[titan] 2025-10-05 20:42:39,197 - root - INFO - step: 36125 loss: 1.8516 memory: 118.84GiB(85.28%) tps: 30,211 tflops: 419.13 mfu: 42.38% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 20:42:39,197 - root - INFO - lr: 6.0548e-06 gnorm: 1.24 [22:08:27< 2:22:29] +[titan] 2025-10-05 20:42:50,082 - root - INFO - step: 36130 loss: 1.9008 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 20:42:50,083 - root - INFO - lr: 6.0521e-06 gnorm: 1.22 [22:08:38< 2:22:18] +[titan] 2025-10-05 20:43:00,986 - root - INFO - step: 36135 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:43:00,987 - root - INFO - lr: 6.0494e-06 gnorm: 1.22 [22:08:49< 2:22:07] +[titan] 2025-10-05 20:43:11,851 - root - INFO - step: 36140 loss: 1.9825 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7499 +[titan] 2025-10-05 20:43:11,851 - root - INFO - lr: 6.0467e-06 gnorm: 1.89 [22:09:00< 2:21:56] +[titan] 2025-10-05 20:43:22,694 - root - INFO - step: 36145 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 20:43:22,694 - root - INFO - lr: 6.0440e-06 gnorm: 1.18 [22:09:11< 2:21:45] +[titan] 2025-10-05 20:43:31,365 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:43:33,546 - root - INFO - step: 36150 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7093 +[titan] 2025-10-05 20:43:33,547 - root - INFO - lr: 6.0413e-06 gnorm: 1.25 [22:09:22< 2:21:34] +[titan] 2025-10-05 20:43:44,389 - root - INFO - step: 36155 loss: 1.9114 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 20:43:44,389 - root - INFO - lr: 6.0386e-06 gnorm: 1.20 [22:09:32< 2:21:23] +[titan] 2025-10-05 20:43:55,248 - root - INFO - step: 36160 loss: 1.9822 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.68 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7528 +[titan] 2025-10-05 20:43:55,248 - root - INFO - lr: 6.0360e-06 gnorm: 1.26 [22:09:43< 2:21:12] +[titan] 2025-10-05 20:44:06,157 - root - INFO - step: 36165 loss: 1.9439 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:06,157 - root - INFO - lr: 6.0333e-06 gnorm: 1.22 [22:09:54< 2:21:01] +[titan] 2025-10-05 20:44:17,014 - root - INFO - step: 36170 loss: 1.9606 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7345 +[titan] 2025-10-05 20:44:17,014 - root - INFO - lr: 6.0306e-06 gnorm: 1.24 [22:10:05< 2:20:50] +[titan] 2025-10-05 20:44:27,854 - root - INFO - step: 36175 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7191 +[titan] 2025-10-05 20:44:27,855 - root - INFO - lr: 6.0279e-06 gnorm: 1.23 [22:10:16< 2:20:39] +[titan] 2025-10-05 20:44:38,720 - root - INFO - step: 36180 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 20:44:38,720 - root - INFO - lr: 6.0253e-06 gnorm: 1.23 [22:10:27< 2:20:28] +[titan] 2025-10-05 20:44:49,571 - root - INFO - step: 36185 loss: 1.9231 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.96 mfu: 42.36% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7006 +[titan] 2025-10-05 20:44:49,571 - root - INFO - lr: 6.0226e-06 gnorm: 1.24 [22:10:38< 2:20:17] +[titan] 2025-10-05 20:45:00,410 - root - INFO - step: 36190 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7513 +[titan] 2025-10-05 20:45:00,411 - root - INFO - lr: 6.0200e-06 gnorm: 1.29 [22:10:48< 2:20:06] +[titan] 2025-10-05 20:45:11,321 - root - INFO - step: 36195 loss: 1.8986 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6794 +[titan] 2025-10-05 20:45:11,321 - root - INFO - lr: 6.0173e-06 gnorm: 1.22 [22:10:59< 2:19:55] +[titan] 2025-10-05 20:45:19,981 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:45:22,165 - root - INFO - step: 36200 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.21 mfu: 42.39% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 20:45:22,166 - root - INFO - lr: 6.0146e-06 gnorm: 1.25 [22:11:10< 2:19:44] +[titan] 2025-10-05 20:45:33,012 - root - INFO - step: 36205 loss: 1.8677 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6514 +[titan] 2025-10-05 20:45:33,012 - root - INFO - lr: 6.0120e-06 gnorm: 1.21 [22:11:21< 2:19:33] +[titan] 2025-10-05 20:45:43,868 - root - INFO - step: 36210 loss: 1.9368 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 20:45:43,868 - root - INFO - lr: 6.0094e-06 gnorm: 1.23 [22:11:32< 2:19:22] +[titan] 2025-10-05 20:45:54,736 - root - INFO - step: 36215 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.29% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6484 +[titan] 2025-10-05 20:45:54,737 - root - INFO - lr: 6.0067e-06 gnorm: 1.18 [22:11:43< 2:19:11] +[titan] 2025-10-05 20:46:05,631 - root - INFO - step: 36220 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.30 mfu: 42.19% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:46:05,631 - root - INFO - lr: 6.0041e-06 gnorm: 1.19 [22:11:54< 2:19:00] +[titan] 2025-10-05 20:46:16,518 - root - INFO - step: 36225 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 20:46:16,518 - root - INFO - lr: 6.0014e-06 gnorm: 1.22 [22:12:05< 2:18:48] +[titan] 2025-10-05 20:46:27,370 - root - INFO - step: 36230 loss: 1.9836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:46:27,371 - root - INFO - lr: 5.9988e-06 gnorm: 1.19 [22:12:15< 2:18:37] +[titan] 2025-10-05 20:46:38,232 - root - INFO - step: 36235 loss: 1.8873 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 20:46:38,233 - root - INFO - lr: 5.9962e-06 gnorm: 1.22 [22:12:26< 2:18:26] +[titan] 2025-10-05 20:46:49,088 - root - INFO - step: 36240 loss: 1.9283 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7058 +[titan] 2025-10-05 20:46:49,088 - root - INFO - lr: 5.9936e-06 gnorm: 1.21 [22:12:37< 2:18:15] +[titan] 2025-10-05 20:46:59,957 - root - INFO - step: 36245 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6792 +[titan] 2025-10-05 20:46:59,958 - root - INFO - lr: 5.9909e-06 gnorm: 1.22 [22:12:48< 2:18:04] +[titan] 2025-10-05 20:47:08,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:47:10,858 - root - INFO - step: 36250 loss: 1.9856 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7548 +[titan] 2025-10-05 20:47:10,858 - root - INFO - lr: 5.9883e-06 gnorm: 1.19 [22:12:59< 2:17:53] +[titan] 2025-10-05 20:47:21,702 - root - INFO - step: 36255 loss: 1.9778 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2291 global_avg_mtp_loss: 1.7487 +[titan] 2025-10-05 20:47:21,702 - root - INFO - lr: 5.9857e-06 gnorm: 1.26 [22:13:10< 2:17:42] +[titan] 2025-10-05 20:47:32,596 - root - INFO - step: 36260 loss: 1.9430 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 20:47:32,596 - root - INFO - lr: 5.9831e-06 gnorm: 1.22 [22:13:21< 2:17:31] +[titan] 2025-10-05 20:47:43,478 - root - INFO - step: 36265 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7657 +[titan] 2025-10-05 20:47:43,478 - root - INFO - lr: 5.9805e-06 gnorm: 1.28 [22:13:31< 2:17:20] +[titan] 2025-10-05 20:47:54,366 - root - INFO - step: 36270 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2155 global_avg_mtp_loss: 1.6470 +[titan] 2025-10-05 20:47:54,366 - root - INFO - lr: 5.9779e-06 gnorm: 1.22 [22:13:42< 2:17:09] +[titan] 2025-10-05 20:48:05,288 - root - INFO - step: 36275 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6837 +[titan] 2025-10-05 20:48:05,288 - root - INFO - lr: 5.9753e-06 gnorm: 1.22 [22:13:53< 2:16:58] +[titan] 2025-10-05 20:48:16,197 - root - INFO - step: 36280 loss: 1.9648 memory: 118.84GiB(85.28%) tps: 30,038 tflops: 416.73 mfu: 42.14% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 20:48:16,197 - root - INFO - lr: 5.9727e-06 gnorm: 1.22 [22:14:04< 2:16:47] +[titan] 2025-10-05 20:48:27,074 - root - INFO - step: 36285 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 20:48:27,075 - root - INFO - lr: 5.9701e-06 gnorm: 1.23 [22:14:15< 2:16:36] +[titan] 2025-10-05 20:48:37,962 - root - INFO - step: 36290 loss: 2.0053 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7733 +[titan] 2025-10-05 20:48:37,962 - root - INFO - lr: 5.9675e-06 gnorm: 1.26 [22:14:26< 2:16:25] +[titan] 2025-10-05 20:48:48,831 - root - INFO - step: 36295 loss: 1.9737 memory: 118.84GiB(85.28%) tps: 30,148 tflops: 418.26 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7454 +[titan] 2025-10-05 20:48:48,832 - root - INFO - lr: 5.9649e-06 gnorm: 1.22 [22:14:37< 2:16:14] +[titan] 2025-10-05 20:48:57,512 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:48:59,685 - root - INFO - step: 36300 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6884 +[titan] 2025-10-05 20:48:59,686 - root - INFO - lr: 5.9623e-06 gnorm: 1.23 [22:14:48< 2:16:03] +[titan] 2025-10-05 20:49:10,530 - root - INFO - step: 36305 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7089 +[titan] 2025-10-05 20:49:10,530 - root - INFO - lr: 5.9597e-06 gnorm: 1.21 [22:14:59< 2:15:52] +[titan] 2025-10-05 20:49:21,373 - root - INFO - step: 36310 loss: 1.9696 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7418 +[titan] 2025-10-05 20:49:21,373 - root - INFO - lr: 5.9572e-06 gnorm: 1.26 [22:15:09< 2:15:41] +[titan] 2025-10-05 20:49:32,211 - root - INFO - step: 36315 loss: 1.9098 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 20:49:32,211 - root - INFO - lr: 5.9546e-06 gnorm: 1.21 [22:15:20< 2:15:30] +[titan] 2025-10-05 20:49:43,047 - root - INFO - step: 36320 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7327 +[titan] 2025-10-05 20:49:43,048 - root - INFO - lr: 5.9520e-06 gnorm: 1.23 [22:15:31< 2:15:19] +[titan] 2025-10-05 20:49:53,930 - root - INFO - step: 36325 loss: 1.9656 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7379 +[titan] 2025-10-05 20:49:53,930 - root - INFO - lr: 5.9495e-06 gnorm: 1.21 [22:15:42< 2:15:08] +[titan] 2025-10-05 20:50:04,790 - root - INFO - step: 36330 loss: 1.8938 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 20:50:04,790 - root - INFO - lr: 5.9469e-06 gnorm: 1.22 [22:15:53< 2:14:56] +[titan] 2025-10-05 20:50:15,657 - root - INFO - step: 36335 loss: 1.9258 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7028 +[titan] 2025-10-05 20:50:15,657 - root - INFO - lr: 5.9443e-06 gnorm: 1.28 [22:16:04< 2:14:45] +[titan] 2025-10-05 20:50:26,516 - root - INFO - step: 36340 loss: 1.9621 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7349 +[titan] 2025-10-05 20:50:26,516 - root - INFO - lr: 5.9418e-06 gnorm: 1.22 [22:16:14< 2:14:34] +[titan] 2025-10-05 20:50:37,351 - root - INFO - step: 36345 loss: 1.8859 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 20:50:37,351 - root - INFO - lr: 5.9392e-06 gnorm: 1.21 [22:16:25< 2:14:23] +[titan] 2025-10-05 20:50:46,107 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:50:48,289 - root - INFO - step: 36350 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.64 mfu: 42.03% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 20:50:48,289 - root - INFO - lr: 5.9367e-06 gnorm: 1.26 [22:16:36< 2:14:12] +[titan] 2025-10-05 20:50:52,807 - root - INFO - Dumping profiler traces at step 36352 +[titan] 2025-10-05 20:50:52,846 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 20:50:59,390 - root - INFO - step: 36355 loss: 1.9099 memory: 118.84GiB(85.28%) tps: 29,519 tflops: 409.53 mfu: 41.41% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6886 +[titan] 2025-10-05 20:50:59,390 - root - INFO - lr: 5.9341e-06 gnorm: 1.21 [22:16:47< 2:14:01] +[titan] 2025-10-05 20:51:10,256 - root - INFO - step: 36360 loss: 1.8857 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6670 +[titan] 2025-10-05 20:51:10,257 - root - INFO - lr: 5.9316e-06 gnorm: 1.20 [22:16:58< 2:13:50] +[titan] 2025-10-05 20:51:21,108 - root - INFO - step: 36365 loss: 1.9715 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7430 +[titan] 2025-10-05 20:51:21,108 - root - INFO - lr: 5.9290e-06 gnorm: 1.25 [22:17:09< 2:13:39] +[titan] 2025-10-05 20:51:31,957 - root - INFO - step: 36370 loss: 1.9807 memory: 118.84GiB(85.28%) tps: 30,205 tflops: 419.05 mfu: 42.37% global_avg_ntp_loss: 0.2323 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 20:51:31,957 - root - INFO - lr: 5.9265e-06 gnorm: 1.22 [22:17:20< 2:13:28] +[titan] 2025-10-05 20:51:42,813 - root - INFO - step: 36375 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:51:42,813 - root - INFO - lr: 5.9240e-06 gnorm: 1.22 [22:17:31< 2:13:17] +[titan] 2025-10-05 20:51:53,656 - root - INFO - step: 36380 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 20:51:53,656 - root - INFO - lr: 5.9214e-06 gnorm: 1.27 [22:17:42< 2:13:06] +[titan] 2025-10-05 20:52:04,533 - root - INFO - step: 36385 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.96 mfu: 42.26% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7274 +[titan] 2025-10-05 20:52:04,533 - root - INFO - lr: 5.9189e-06 gnorm: 1.22 [22:17:53< 2:12:55] +[titan] 2025-10-05 20:52:15,414 - root - INFO - step: 36390 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7019 +[titan] 2025-10-05 20:52:15,414 - root - INFO - lr: 5.9164e-06 gnorm: 1.23 [22:18:03< 2:12:44] +[titan] 2025-10-05 20:52:26,295 - root - INFO - step: 36395 loss: 1.9748 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7461 +[titan] 2025-10-05 20:52:26,295 - root - INFO - lr: 5.9139e-06 gnorm: 1.21 [22:18:14< 2:12:33] +[titan] 2025-10-05 20:52:34,980 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:52:37,152 - root - INFO - step: 36400 loss: 1.9325 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7079 +[titan] 2025-10-05 20:52:37,153 - root - INFO - lr: 5.9114e-06 gnorm: 1.22 [22:18:25< 2:12:22] +[titan] 2025-10-05 20:52:48,028 - root - INFO - step: 36405 loss: 1.9539 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7280 +[titan] 2025-10-05 20:52:48,028 - root - INFO - lr: 5.9088e-06 gnorm: 1.20 [22:18:36< 2:12:11] +[titan] 2025-10-05 20:52:58,901 - root - INFO - step: 36410 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,139 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6968 +[titan] 2025-10-05 20:52:58,901 - root - INFO - lr: 5.9063e-06 gnorm: 1.21 [22:18:47< 2:12:00] +[titan] 2025-10-05 20:53:10,114 - root - INFO - step: 36415 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 29,223 tflops: 405.42 mfu: 40.99% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6627 +[titan] 2025-10-05 20:53:10,115 - root - INFO - lr: 5.9038e-06 gnorm: 1.21 [22:18:58< 2:11:49] +[titan] 2025-10-05 20:53:21,005 - root - INFO - step: 36420 loss: 2.0250 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.44 mfu: 42.21% global_avg_ntp_loss: 0.2344 global_avg_mtp_loss: 1.7906 +[titan] 2025-10-05 20:53:21,005 - root - INFO - lr: 5.9013e-06 gnorm: 1.27 [22:19:09< 2:11:38] +[titan] 2025-10-05 20:53:31,873 - root - INFO - step: 36425 loss: 1.9655 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.29 mfu: 42.29% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7367 +[titan] 2025-10-05 20:53:31,874 - root - INFO - lr: 5.8988e-06 gnorm: 1.24 [22:19:20< 2:11:27] +[titan] 2025-10-05 20:53:42,745 - root - INFO - step: 36430 loss: 1.8831 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6658 +[titan] 2025-10-05 20:53:42,745 - root - INFO - lr: 5.8963e-06 gnorm: 1.28 [22:19:31< 2:11:16] +[titan] 2025-10-05 20:53:53,613 - root - INFO - step: 36435 loss: 1.9414 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 20:53:53,613 - root - INFO - lr: 5.8938e-06 gnorm: 1.23 [22:19:42< 2:11:05] +[titan] 2025-10-05 20:54:04,481 - root - INFO - step: 36440 loss: 1.8968 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:54:04,481 - root - INFO - lr: 5.8914e-06 gnorm: 1.22 [22:19:52< 2:10:53] +[titan] 2025-10-05 20:54:15,378 - root - INFO - step: 36445 loss: 1.9147 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6938 +[titan] 2025-10-05 20:54:15,378 - root - INFO - lr: 5.8889e-06 gnorm: 1.24 [22:20:03< 2:10:42] +[titan] 2025-10-05 20:54:24,085 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:54:26,273 - root - INFO - step: 36450 loss: 1.9402 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 20:54:26,273 - root - INFO - lr: 5.8864e-06 gnorm: 1.25 [22:20:14< 2:10:31] +[titan] 2025-10-05 20:54:37,147 - root - INFO - step: 36455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6980 +[titan] 2025-10-05 20:54:37,147 - root - INFO - lr: 5.8839e-06 gnorm: 1.20 [22:20:25< 2:10:20] +[titan] 2025-10-05 20:54:48,029 - root - INFO - step: 36460 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 20:54:48,029 - root - INFO - lr: 5.8814e-06 gnorm: 1.21 [22:20:36< 2:10:09] +[titan] 2025-10-05 20:54:58,890 - root - INFO - step: 36465 loss: 1.9169 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6956 +[titan] 2025-10-05 20:54:58,890 - root - INFO - lr: 5.8790e-06 gnorm: 1.22 [22:20:47< 2:09:58] +[titan] 2025-10-05 20:55:09,763 - root - INFO - step: 36470 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6935 +[titan] 2025-10-05 20:55:09,764 - root - INFO - lr: 5.8765e-06 gnorm: 1.24 [22:20:58< 2:09:47] +[titan] 2025-10-05 20:55:20,621 - root - INFO - step: 36475 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 20:55:20,622 - root - INFO - lr: 5.8740e-06 gnorm: 1.24 [22:21:09< 2:09:36] +[titan] 2025-10-05 20:55:31,491 - root - INFO - step: 36480 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7117 +[titan] 2025-10-05 20:55:31,491 - root - INFO - lr: 5.8716e-06 gnorm: 1.25 [22:21:19< 2:09:25] +[titan] 2025-10-05 20:55:42,366 - root - INFO - step: 36485 loss: 1.9193 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6965 +[titan] 2025-10-05 20:55:42,367 - root - INFO - lr: 5.8691e-06 gnorm: 1.22 [22:21:30< 2:09:14] +[titan] 2025-10-05 20:55:53,240 - root - INFO - step: 36490 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6911 +[titan] 2025-10-05 20:55:53,241 - root - INFO - lr: 5.8667e-06 gnorm: 1.19 [22:21:41< 2:09:03] +[titan] 2025-10-05 20:56:04,092 - root - INFO - step: 36495 loss: 1.9788 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7494 +[titan] 2025-10-05 20:56:04,092 - root - INFO - lr: 5.8642e-06 gnorm: 1.28 [22:21:52< 2:08:52] +[titan] 2025-10-05 20:56:12,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:56:14,974 - root - INFO - step: 36500 loss: 1.9757 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 20:56:14,974 - root - INFO - lr: 5.8618e-06 gnorm: 1.22 [22:22:03< 2:08:41] +[titan] 2025-10-05 20:56:25,858 - root - INFO - step: 36505 loss: 1.9635 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2281 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 20:56:25,858 - root - INFO - lr: 5.8593e-06 gnorm: 1.21 [22:22:14< 2:08:30] +[titan] 2025-10-05 20:56:36,711 - root - INFO - step: 36510 loss: 1.9857 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7557 +[titan] 2025-10-05 20:56:36,712 - root - INFO - lr: 5.8569e-06 gnorm: 1.26 [22:22:25< 2:08:19] +[titan] 2025-10-05 20:56:47,594 - root - INFO - step: 36515 loss: 1.9385 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7141 +[titan] 2025-10-05 20:56:47,594 - root - INFO - lr: 5.8544e-06 gnorm: 1.24 [22:22:36< 2:08:08] +[titan] 2025-10-05 20:56:58,464 - root - INFO - step: 36520 loss: 1.8908 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6705 +[titan] 2025-10-05 20:56:58,465 - root - INFO - lr: 5.8520e-06 gnorm: 1.23 [22:22:46< 2:07:57] +[titan] 2025-10-05 20:57:09,332 - root - INFO - step: 36525 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6700 +[titan] 2025-10-05 20:57:09,332 - root - INFO - lr: 5.8496e-06 gnorm: 1.21 [22:22:57< 2:07:46] +[titan] 2025-10-05 20:57:20,232 - root - INFO - step: 36530 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.10 mfu: 42.17% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 20:57:20,232 - root - INFO - lr: 5.8471e-06 gnorm: 1.21 [22:23:08< 2:07:35] +[titan] 2025-10-05 20:57:31,124 - root - INFO - step: 36535 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7213 +[titan] 2025-10-05 20:57:31,124 - root - INFO - lr: 5.8447e-06 gnorm: 1.23 [22:23:19< 2:07:24] +[titan] 2025-10-05 20:57:42,014 - root - INFO - step: 36540 loss: 1.9957 memory: 118.84GiB(85.28%) tps: 30,090 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7650 +[titan] 2025-10-05 20:57:42,014 - root - INFO - lr: 5.8423e-06 gnorm: 1.25 [22:23:30< 2:07:13] +[titan] 2025-10-05 20:57:52,927 - root - INFO - step: 36545 loss: 1.9727 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.58 mfu: 42.12% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7433 +[titan] 2025-10-05 20:57:52,928 - root - INFO - lr: 5.8399e-06 gnorm: 1.24 [22:23:41< 2:07:02] +[titan] 2025-10-05 20:58:01,636 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:58:03,825 - root - INFO - step: 36550 loss: 1.9288 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.18 mfu: 42.18% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7049 +[titan] 2025-10-05 20:58:03,825 - root - INFO - lr: 5.8375e-06 gnorm: 1.24 [22:23:52< 2:06:50] +[titan] 2025-10-05 20:58:14,740 - root - INFO - step: 36555 loss: 1.9862 memory: 118.84GiB(85.28%) tps: 30,021 tflops: 416.49 mfu: 42.11% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7567 +[titan] 2025-10-05 20:58:14,741 - root - INFO - lr: 5.8351e-06 gnorm: 1.26 [22:24:03< 2:06:39] +[titan] 2025-10-05 20:58:25,614 - root - INFO - step: 36560 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6766 +[titan] 2025-10-05 20:58:25,614 - root - INFO - lr: 5.8326e-06 gnorm: 1.18 [22:24:14< 2:06:28] +[titan] 2025-10-05 20:58:36,506 - root - INFO - step: 36565 loss: 1.8964 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6774 +[titan] 2025-10-05 20:58:36,506 - root - INFO - lr: 5.8302e-06 gnorm: 1.20 [22:24:24< 2:06:17] +[titan] 2025-10-05 20:58:47,389 - root - INFO - step: 36570 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.71 mfu: 42.24% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 20:58:47,390 - root - INFO - lr: 5.8278e-06 gnorm: 1.25 [22:24:35< 2:06:06] +[titan] 2025-10-05 20:58:58,289 - root - INFO - step: 36575 loss: 1.9029 memory: 118.84GiB(85.28%) tps: 30,065 tflops: 417.11 mfu: 42.17% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 20:58:58,289 - root - INFO - lr: 5.8254e-06 gnorm: 1.20 [22:24:46< 2:05:55] +[titan] 2025-10-05 20:59:09,190 - root - INFO - step: 36580 loss: 1.9307 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7074 +[titan] 2025-10-05 20:59:09,190 - root - INFO - lr: 5.8231e-06 gnorm: 1.24 [22:24:57< 2:05:44] +[titan] 2025-10-05 20:59:20,103 - root - INFO - step: 36585 loss: 1.9720 memory: 118.84GiB(85.28%) tps: 30,029 tflops: 416.60 mfu: 42.12% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7440 +[titan] 2025-10-05 20:59:20,103 - root - INFO - lr: 5.8207e-06 gnorm: 1.22 [22:25:08< 2:05:33] +[titan] 2025-10-05 20:59:30,980 - root - INFO - step: 36590 loss: 1.9441 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7177 +[titan] 2025-10-05 20:59:30,980 - root - INFO - lr: 5.8183e-06 gnorm: 1.22 [22:25:19< 2:05:22] +[titan] 2025-10-05 20:59:41,845 - root - INFO - step: 36595 loss: 1.9044 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6830 +[titan] 2025-10-05 20:59:41,845 - root - INFO - lr: 5.8159e-06 gnorm: 1.20 [22:25:30< 2:05:11] +[titan] 2025-10-05 20:59:50,541 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 20:59:52,732 - root - INFO - step: 36600 loss: 1.9168 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6953 +[titan] 2025-10-05 20:59:52,732 - root - INFO - lr: 5.8135e-06 gnorm: 1.20 [22:25:41< 2:05:00] +[titan] 2025-10-05 21:00:03,618 - root - INFO - step: 36605 loss: 1.8614 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6465 +[titan] 2025-10-05 21:00:03,618 - root - INFO - lr: 5.8111e-06 gnorm: 1.22 [22:25:52< 2:04:49] +[titan] 2025-10-05 21:00:14,529 - root - INFO - step: 36610 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,033 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7120 +[titan] 2025-10-05 21:00:14,529 - root - INFO - lr: 5.8088e-06 gnorm: 1.24 [22:26:02< 2:04:38] +[titan] 2025-10-05 21:00:25,449 - root - INFO - step: 36615 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:00:25,450 - root - INFO - lr: 5.8064e-06 gnorm: 1.23 [22:26:13< 2:04:27] +[titan] 2025-10-05 21:00:36,361 - root - INFO - step: 36620 loss: 1.8625 memory: 118.84GiB(85.28%) tps: 30,031 tflops: 416.64 mfu: 42.13% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6464 +[titan] 2025-10-05 21:00:36,361 - root - INFO - lr: 5.8040e-06 gnorm: 1.24 [22:26:24< 2:04:16] +[titan] 2025-10-05 21:00:47,259 - root - INFO - step: 36625 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7482 +[titan] 2025-10-05 21:00:47,259 - root - INFO - lr: 5.8017e-06 gnorm: 1.24 [22:26:35< 2:04:05] +[titan] 2025-10-05 21:00:58,160 - root - INFO - step: 36630 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:00:58,160 - root - INFO - lr: 5.7993e-06 gnorm: 1.21 [22:26:46< 2:03:54] +[titan] 2025-10-05 21:01:09,053 - root - INFO - step: 36635 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.35 mfu: 42.20% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:01:09,053 - root - INFO - lr: 5.7969e-06 gnorm: 1.26 [22:26:57< 2:03:43] +[titan] 2025-10-05 21:01:19,958 - root - INFO - step: 36640 loss: 1.9596 memory: 118.84GiB(85.28%) tps: 30,049 tflops: 416.88 mfu: 42.15% global_avg_ntp_loss: 0.2284 global_avg_mtp_loss: 1.7312 +[titan] 2025-10-05 21:01:19,958 - root - INFO - lr: 5.7946e-06 gnorm: 1.24 [22:27:08< 2:03:32] +[titan] 2025-10-05 21:01:30,865 - root - INFO - step: 36645 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.81 mfu: 42.14% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:01:30,865 - root - INFO - lr: 5.7922e-06 gnorm: 1.22 [22:27:19< 2:03:21] +[titan] 2025-10-05 21:01:39,585 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:01:41,769 - root - INFO - step: 36650 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.93 mfu: 42.16% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:01:41,769 - root - INFO - lr: 5.7899e-06 gnorm: 1.25 [22:27:30< 2:03:10] +[titan] 2025-10-05 21:01:52,656 - root - INFO - step: 36655 loss: 1.8852 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6666 +[titan] 2025-10-05 21:01:52,656 - root - INFO - lr: 5.7876e-06 gnorm: 1.26 [22:27:41< 2:02:59] +[titan] 2025-10-05 21:02:03,549 - root - INFO - step: 36660 loss: 1.9170 memory: 118.84GiB(85.28%) tps: 30,083 tflops: 417.36 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6955 +[titan] 2025-10-05 21:02:03,549 - root - INFO - lr: 5.7852e-06 gnorm: 1.24 [22:27:51< 2:02:48] +[titan] 2025-10-05 21:02:14,436 - root - INFO - step: 36665 loss: 1.8750 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2165 global_avg_mtp_loss: 1.6585 +[titan] 2025-10-05 21:02:14,436 - root - INFO - lr: 5.7829e-06 gnorm: 1.20 [22:28:02< 2:02:37] +[titan] 2025-10-05 21:02:25,324 - root - INFO - step: 36670 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,097 tflops: 417.55 mfu: 42.22% global_avg_ntp_loss: 0.2234 global_avg_mtp_loss: 1.7110 +[titan] 2025-10-05 21:02:25,324 - root - INFO - lr: 5.7806e-06 gnorm: 1.25 [22:28:13< 2:02:25] +[titan] 2025-10-05 21:02:36,230 - root - INFO - step: 36675 loss: 1.8517 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6371 +[titan] 2025-10-05 21:02:36,230 - root - INFO - lr: 5.7782e-06 gnorm: 1.21 [22:28:24< 2:02:14] +[titan] 2025-10-05 21:02:47,119 - root - INFO - step: 36680 loss: 1.8308 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.21% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6192 +[titan] 2025-10-05 21:02:47,119 - root - INFO - lr: 5.7759e-06 gnorm: 1.21 [22:28:35< 2:02:03] +[titan] 2025-10-05 21:02:58,028 - root - INFO - step: 36685 loss: 1.9161 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:02:58,028 - root - INFO - lr: 5.7736e-06 gnorm: 1.27 [22:28:46< 2:01:52] +[titan] 2025-10-05 21:03:08,899 - root - INFO - step: 36690 loss: 1.9705 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7421 +[titan] 2025-10-05 21:03:08,899 - root - INFO - lr: 5.7713e-06 gnorm: 1.23 [22:28:57< 2:01:41] +[titan] 2025-10-05 21:03:19,806 - root - INFO - step: 36695 loss: 1.9616 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:03:19,806 - root - INFO - lr: 5.7689e-06 gnorm: 1.23 [22:29:08< 2:01:30] +[titan] 2025-10-05 21:03:28,517 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:03:30,710 - root - INFO - step: 36700 loss: 1.9551 memory: 118.84GiB(85.28%) tps: 30,053 tflops: 416.94 mfu: 42.16% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7284 +[titan] 2025-10-05 21:03:30,710 - root - INFO - lr: 5.7666e-06 gnorm: 1.27 [22:29:19< 2:01:19] +[titan] 2025-10-05 21:03:41,623 - root - INFO - step: 36705 loss: 1.9618 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7351 +[titan] 2025-10-05 21:03:41,624 - root - INFO - lr: 5.7643e-06 gnorm: 1.24 [22:29:30< 2:01:08] +[titan] 2025-10-05 21:03:52,525 - root - INFO - step: 36710 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,059 tflops: 417.02 mfu: 42.17% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:03:52,525 - root - INFO - lr: 5.7620e-06 gnorm: 1.26 [22:29:40< 2:00:57] +[titan] 2025-10-05 21:04:03,447 - root - INFO - step: 36715 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,003 tflops: 416.25 mfu: 42.09% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:04:03,447 - root - INFO - lr: 5.7597e-06 gnorm: 1.26 [22:29:51< 2:00:46] +[titan] 2025-10-05 21:04:14,324 - root - INFO - step: 36720 loss: 1.9198 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6982 +[titan] 2025-10-05 21:04:14,324 - root - INFO - lr: 5.7574e-06 gnorm: 1.20 [22:30:02< 2:00:35] +[titan] 2025-10-05 21:04:25,273 - root - INFO - step: 36725 loss: 1.9301 memory: 118.84GiB(85.28%) tps: 29,930 tflops: 415.23 mfu: 41.98% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:04:25,273 - root - INFO - lr: 5.7551e-06 gnorm: 1.23 [22:30:13< 2:00:24] +[titan] 2025-10-05 21:04:36,157 - root - INFO - step: 36730 loss: 1.8913 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:04:36,157 - root - INFO - lr: 5.7528e-06 gnorm: 1.24 [22:30:24< 2:00:13] +[titan] 2025-10-05 21:04:47,035 - root - INFO - step: 36735 loss: 1.9023 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6822 +[titan] 2025-10-05 21:04:47,035 - root - INFO - lr: 5.7505e-06 gnorm: 1.26 [22:30:35< 2:00:02] +[titan] 2025-10-05 21:04:57,939 - root - INFO - step: 36740 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,054 tflops: 416.95 mfu: 42.16% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7162 +[titan] 2025-10-05 21:04:57,939 - root - INFO - lr: 5.7483e-06 gnorm: 1.21 [22:30:46< 1:59:51] +[titan] 2025-10-05 21:05:08,831 - root - INFO - step: 36745 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7169 +[titan] 2025-10-05 21:05:08,831 - root - INFO - lr: 5.7460e-06 gnorm: 1.25 [22:30:57< 1:59:40] +[titan] 2025-10-05 21:05:17,519 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:05:19,701 - root - INFO - step: 36750 loss: 1.9028 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:05:19,702 - root - INFO - lr: 5.7437e-06 gnorm: 1.22 [22:31:08< 1:59:29] +[titan] 2025-10-05 21:05:30,640 - root - INFO - step: 36755 loss: 1.9268 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 21:05:30,640 - root - INFO - lr: 5.7414e-06 gnorm: 1.23 [22:31:19< 1:59:18] +[titan] 2025-10-05 21:05:41,514 - root - INFO - step: 36760 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:05:41,514 - root - INFO - lr: 5.7392e-06 gnorm: 1.21 [22:31:29< 1:59:07] +[titan] 2025-10-05 21:05:52,376 - root - INFO - step: 36765 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 21:05:52,376 - root - INFO - lr: 5.7369e-06 gnorm: 1.26 [22:31:40< 1:58:56] +[titan] 2025-10-05 21:06:03,266 - root - INFO - step: 36770 loss: 1.8668 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 21:06:03,266 - root - INFO - lr: 5.7346e-06 gnorm: 1.22 [22:31:51< 1:58:45] +[titan] 2025-10-05 21:06:14,143 - root - INFO - step: 36775 loss: 1.9190 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6973 +[titan] 2025-10-05 21:06:14,143 - root - INFO - lr: 5.7324e-06 gnorm: 1.23 [22:32:02< 1:58:34] +[titan] 2025-10-05 21:06:25,098 - root - INFO - step: 36780 loss: 1.8969 memory: 118.84GiB(85.28%) tps: 29,913 tflops: 415.00 mfu: 41.96% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6776 +[titan] 2025-10-05 21:06:25,098 - root - INFO - lr: 5.7301e-06 gnorm: 1.22 [22:32:13< 1:58:23] +[titan] 2025-10-05 21:06:35,961 - root - INFO - step: 36785 loss: 1.8486 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6355 +[titan] 2025-10-05 21:06:35,961 - root - INFO - lr: 5.7279e-06 gnorm: 1.26 [22:32:24< 1:58:12] +[titan] 2025-10-05 21:06:46,824 - root - INFO - step: 36790 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7201 +[titan] 2025-10-05 21:06:46,825 - root - INFO - lr: 5.7256e-06 gnorm: 1.26 [22:32:35< 1:58:00] +[titan] 2025-10-05 21:06:57,688 - root - INFO - step: 36795 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7115 +[titan] 2025-10-05 21:06:57,688 - root - INFO - lr: 5.7234e-06 gnorm: 1.23 [22:32:46< 1:57:49] +[titan] 2025-10-05 21:07:06,359 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:07:08,540 - root - INFO - step: 36800 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7017 +[titan] 2025-10-05 21:07:08,541 - root - INFO - lr: 5.7211e-06 gnorm: 1.23 [22:32:56< 1:57:38] +[titan] 2025-10-05 21:07:19,425 - root - INFO - step: 36805 loss: 1.9493 memory: 118.84GiB(85.28%) tps: 30,107 tflops: 417.69 mfu: 42.23% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:07:19,425 - root - INFO - lr: 5.7189e-06 gnorm: 1.24 [22:33:07< 1:57:27] +[titan] 2025-10-05 21:07:30,382 - root - INFO - step: 36810 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 29,904 tflops: 414.88 mfu: 41.95% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:07:30,383 - root - INFO - lr: 5.7166e-06 gnorm: 1.23 [22:33:18< 1:57:16] +[titan] 2025-10-05 21:07:41,263 - root - INFO - step: 36815 loss: 1.9284 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7055 +[titan] 2025-10-05 21:07:41,263 - root - INFO - lr: 5.7144e-06 gnorm: 1.24 [22:33:29< 1:57:05] +[titan] 2025-10-05 21:07:52,120 - root - INFO - step: 36820 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 21:07:52,120 - root - INFO - lr: 5.7122e-06 gnorm: 1.21 [22:33:40< 1:56:54] +[titan] 2025-10-05 21:08:02,998 - root - INFO - step: 36825 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6712 +[titan] 2025-10-05 21:08:02,999 - root - INFO - lr: 5.7100e-06 gnorm: 1.24 [22:33:51< 1:56:43] +[titan] 2025-10-05 21:08:13,877 - root - INFO - step: 36830 loss: 1.9915 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7608 +[titan] 2025-10-05 21:08:13,878 - root - INFO - lr: 5.7077e-06 gnorm: 1.31 [22:34:02< 1:56:32] +[titan] 2025-10-05 21:08:25,107 - root - INFO - step: 36835 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 29,180 tflops: 404.83 mfu: 40.93% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6650 +[titan] 2025-10-05 21:08:25,108 - root - INFO - lr: 5.7055e-06 gnorm: 1.20 [22:34:13< 1:56:21] +[titan] 2025-10-05 21:08:35,977 - root - INFO - step: 36840 loss: 1.9962 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7652 +[titan] 2025-10-05 21:08:35,977 - root - INFO - lr: 5.7033e-06 gnorm: 1.24 [22:34:24< 1:56:10] +[titan] 2025-10-05 21:08:46,865 - root - INFO - step: 36845 loss: 1.9329 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.56 mfu: 42.22% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 21:08:46,865 - root - INFO - lr: 5.7011e-06 gnorm: 1.24 [22:34:35< 1:55:59] +[titan] 2025-10-05 21:08:55,538 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:08:57,718 - root - INFO - step: 36850 loss: 1.9916 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7611 +[titan] 2025-10-05 21:08:57,718 - root - INFO - lr: 5.6989e-06 gnorm: 1.26 [22:34:46< 1:55:48] +[titan] 2025-10-05 21:09:08,595 - root - INFO - step: 36855 loss: 1.9577 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7308 +[titan] 2025-10-05 21:09:08,595 - root - INFO - lr: 5.6967e-06 gnorm: 1.24 [22:34:57< 1:55:37] +[titan] 2025-10-05 21:09:19,469 - root - INFO - step: 36860 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 21:09:19,470 - root - INFO - lr: 5.6945e-06 gnorm: 1.27 [22:35:07< 1:55:26] +[titan] 2025-10-05 21:09:28,579 - root - INFO - Dumping profiler traces at step 36864 +[titan] 2025-10-05 21:09:28,621 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:09:30,837 - root - INFO - step: 36865 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 28,827 tflops: 399.93 mfu: 40.44% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7189 +[titan] 2025-10-05 21:09:30,837 - root - INFO - lr: 5.6923e-06 gnorm: 1.23 [22:35:19< 1:55:15] +[titan] 2025-10-05 21:09:41,699 - root - INFO - step: 36870 loss: 1.9650 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7380 +[titan] 2025-10-05 21:09:41,699 - root - INFO - lr: 5.6901e-06 gnorm: 1.24 [22:35:30< 1:55:04] +[titan] 2025-10-05 21:09:52,574 - root - INFO - step: 36875 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7383 +[titan] 2025-10-05 21:09:52,574 - root - INFO - lr: 5.6879e-06 gnorm: 1.24 [22:35:41< 1:54:53] +[titan] 2025-10-05 21:10:03,422 - root - INFO - step: 36880 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:10:03,422 - root - INFO - lr: 5.6857e-06 gnorm: 1.21 [22:35:51< 1:54:42] +[titan] 2025-10-05 21:10:14,288 - root - INFO - step: 36885 loss: 2.0074 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2326 global_avg_mtp_loss: 1.7747 +[titan] 2025-10-05 21:10:14,288 - root - INFO - lr: 5.6835e-06 gnorm: 1.23 [22:36:02< 1:54:31] +[titan] 2025-10-05 21:10:25,157 - root - INFO - step: 36890 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 21:10:25,157 - root - INFO - lr: 5.6813e-06 gnorm: 1.24 [22:36:13< 1:54:20] +[titan] 2025-10-05 21:10:36,097 - root - INFO - step: 36895 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.54 mfu: 42.02% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6925 +[titan] 2025-10-05 21:10:36,098 - root - INFO - lr: 5.6792e-06 gnorm: 1.24 [22:36:24< 1:54:09] +[titan] 2025-10-05 21:10:44,800 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:10:46,986 - root - INFO - step: 36900 loss: 1.9738 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7458 +[titan] 2025-10-05 21:10:46,987 - root - INFO - lr: 5.6770e-06 gnorm: 1.28 [22:36:35< 1:53:58] +[titan] 2025-10-05 21:10:57,833 - root - INFO - step: 36905 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.14 mfu: 42.38% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6847 +[titan] 2025-10-05 21:10:57,833 - root - INFO - lr: 5.6748e-06 gnorm: 1.23 [22:36:46< 1:53:47] +[titan] 2025-10-05 21:11:08,682 - root - INFO - step: 36910 loss: 1.8557 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2150 global_avg_mtp_loss: 1.6407 +[titan] 2025-10-05 21:11:08,682 - root - INFO - lr: 5.6726e-06 gnorm: 1.19 [22:36:57< 1:53:36] +[titan] 2025-10-05 21:11:19,531 - root - INFO - step: 36915 loss: 1.8896 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:11:19,532 - root - INFO - lr: 5.6705e-06 gnorm: 1.18 [22:37:07< 1:53:24] +[titan] 2025-10-05 21:11:30,448 - root - INFO - step: 36920 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,016 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:11:30,449 - root - INFO - lr: 5.6683e-06 gnorm: 1.24 [22:37:18< 1:53:13] +[titan] 2025-10-05 21:11:41,323 - root - INFO - step: 36925 loss: 1.9798 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7500 +[titan] 2025-10-05 21:11:41,323 - root - INFO - lr: 5.6662e-06 gnorm: 1.26 [22:37:29< 1:53:02] +[titan] 2025-10-05 21:11:52,243 - root - INFO - step: 36930 loss: 1.9131 memory: 118.84GiB(85.28%) tps: 30,008 tflops: 416.31 mfu: 42.09% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6917 +[titan] 2025-10-05 21:11:52,243 - root - INFO - lr: 5.6640e-06 gnorm: 1.28 [22:37:40< 1:52:51] +[titan] 2025-10-05 21:12:03,124 - root - INFO - step: 36935 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6806 +[titan] 2025-10-05 21:12:03,124 - root - INFO - lr: 5.6619e-06 gnorm: 1.20 [22:37:51< 1:52:40] +[titan] 2025-10-05 21:12:14,002 - root - INFO - step: 36940 loss: 1.9158 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6952 +[titan] 2025-10-05 21:12:14,002 - root - INFO - lr: 5.6597e-06 gnorm: 1.26 [22:38:02< 1:52:29] +[titan] 2025-10-05 21:12:24,869 - root - INFO - step: 36945 loss: 1.9805 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7517 +[titan] 2025-10-05 21:12:24,870 - root - INFO - lr: 5.6576e-06 gnorm: 1.21 [22:38:13< 1:52:18] +[titan] 2025-10-05 21:12:33,628 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:12:35,810 - root - INFO - step: 36950 loss: 1.9573 memory: 118.84GiB(85.28%) tps: 29,952 tflops: 415.53 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7306 +[titan] 2025-10-05 21:12:35,810 - root - INFO - lr: 5.6554e-06 gnorm: 1.24 [22:38:24< 1:52:07] +[titan] 2025-10-05 21:12:46,684 - root - INFO - step: 36955 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7035 +[titan] 2025-10-05 21:12:46,685 - root - INFO - lr: 5.6533e-06 gnorm: 1.21 [22:38:35< 1:51:56] +[titan] 2025-10-05 21:12:57,552 - root - INFO - step: 36960 loss: 1.9363 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7125 +[titan] 2025-10-05 21:12:57,552 - root - INFO - lr: 5.6512e-06 gnorm: 1.24 [22:38:45< 1:51:45] +[titan] 2025-10-05 21:13:08,463 - root - INFO - step: 36965 loss: 2.0313 memory: 118.84GiB(85.28%) tps: 30,034 tflops: 416.67 mfu: 42.13% global_avg_ntp_loss: 0.2361 global_avg_mtp_loss: 1.7952 +[titan] 2025-10-05 21:13:08,463 - root - INFO - lr: 5.6490e-06 gnorm: 1.24 [22:38:56< 1:51:34] +[titan] 2025-10-05 21:13:19,335 - root - INFO - step: 36970 loss: 1.9561 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.14 mfu: 42.28% global_avg_ntp_loss: 0.2262 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:19,336 - root - INFO - lr: 5.6469e-06 gnorm: 1.21 [22:39:07< 1:51:23] +[titan] 2025-10-05 21:13:30,256 - root - INFO - step: 36975 loss: 1.9563 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7299 +[titan] 2025-10-05 21:13:30,256 - root - INFO - lr: 5.6448e-06 gnorm: 1.28 [22:39:18< 1:51:12] +[titan] 2025-10-05 21:13:41,127 - root - INFO - step: 36980 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6706 +[titan] 2025-10-05 21:13:41,128 - root - INFO - lr: 5.6427e-06 gnorm: 1.23 [22:39:29< 1:51:01] +[titan] 2025-10-05 21:13:51,994 - root - INFO - step: 36985 loss: 1.9107 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:13:51,995 - root - INFO - lr: 5.6405e-06 gnorm: 1.29 [22:39:40< 1:50:50] +[titan] 2025-10-05 21:14:02,859 - root - INFO - step: 36990 loss: 1.9270 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.7044 +[titan] 2025-10-05 21:14:02,859 - root - INFO - lr: 5.6384e-06 gnorm: 1.24 [22:39:51< 1:50:39] +[titan] 2025-10-05 21:14:13,749 - root - INFO - step: 36995 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6987 +[titan] 2025-10-05 21:14:13,749 - root - INFO - lr: 5.6363e-06 gnorm: 1.22 [22:40:02< 1:50:28] +[titan] 2025-10-05 21:14:22,443 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:14:24,631 - root - INFO - step: 37000 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6388 +[titan] 2025-10-05 21:14:24,631 - root - INFO - lr: 5.6342e-06 gnorm: 1.20 [22:40:13< 1:50:17] +[titan] 2025-10-05 21:14:35,570 - root - INFO - step: 37005 loss: 1.9489 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7225 +[titan] 2025-10-05 21:14:35,570 - root - INFO - lr: 5.6321e-06 gnorm: 1.26 [22:40:23< 1:50:06] +[titan] 2025-10-05 21:14:46,450 - root - INFO - step: 37010 loss: 1.9529 memory: 118.84GiB(85.28%) tps: 30,120 tflops: 417.87 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7271 +[titan] 2025-10-05 21:14:46,450 - root - INFO - lr: 5.6300e-06 gnorm: 1.19 [22:40:34< 1:49:55] +[titan] 2025-10-05 21:14:57,328 - root - INFO - step: 37015 loss: 1.9312 memory: 118.84GiB(85.28%) tps: 30,123 tflops: 417.91 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7076 +[titan] 2025-10-05 21:14:57,329 - root - INFO - lr: 5.6279e-06 gnorm: 1.27 [22:40:45< 1:49:44] +[titan] 2025-10-05 21:15:08,191 - root - INFO - step: 37020 loss: 2.0101 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2327 global_avg_mtp_loss: 1.7774 +[titan] 2025-10-05 21:15:08,192 - root - INFO - lr: 5.6258e-06 gnorm: 1.28 [22:40:56< 1:49:33] +[titan] 2025-10-05 21:15:19,080 - root - INFO - step: 37025 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.51 mfu: 42.22% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:15:19,081 - root - INFO - lr: 5.6237e-06 gnorm: 1.20 [22:41:07< 1:49:22] +[titan] 2025-10-05 21:15:29,953 - root - INFO - step: 37030 loss: 1.9683 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7403 +[titan] 2025-10-05 21:15:29,953 - root - INFO - lr: 5.6216e-06 gnorm: 1.21 [22:41:18< 1:49:11] +[titan] 2025-10-05 21:15:40,885 - root - INFO - step: 37035 loss: 1.8738 memory: 118.84GiB(85.28%) tps: 29,976 tflops: 415.87 mfu: 42.05% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6571 +[titan] 2025-10-05 21:15:40,885 - root - INFO - lr: 5.6196e-06 gnorm: 1.21 [22:41:29< 1:48:59] +[titan] 2025-10-05 21:15:51,738 - root - INFO - step: 37040 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6703 +[titan] 2025-10-05 21:15:51,738 - root - INFO - lr: 5.6175e-06 gnorm: 1.25 [22:41:40< 1:48:48] +[titan] 2025-10-05 21:16:02,623 - root - INFO - step: 37045 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:16:02,624 - root - INFO - lr: 5.6154e-06 gnorm: 1.21 [22:41:51< 1:48:37] +[titan] 2025-10-05 21:16:11,326 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:16:13,511 - root - INFO - step: 37050 loss: 1.9092 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6872 +[titan] 2025-10-05 21:16:13,511 - root - INFO - lr: 5.6133e-06 gnorm: 1.23 [22:42:01< 1:48:26] +[titan] 2025-10-05 21:16:24,393 - root - INFO - step: 37055 loss: 1.9722 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7436 +[titan] 2025-10-05 21:16:24,393 - root - INFO - lr: 5.6113e-06 gnorm: 1.24 [22:42:12< 1:48:15] +[titan] 2025-10-05 21:16:35,329 - root - INFO - step: 37060 loss: 1.9342 memory: 118.84GiB(85.28%) tps: 29,964 tflops: 415.71 mfu: 42.03% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:16:35,329 - root - INFO - lr: 5.6092e-06 gnorm: 1.25 [22:42:23< 1:48:04] +[titan] 2025-10-05 21:16:46,199 - root - INFO - step: 37065 loss: 1.9535 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:16:46,199 - root - INFO - lr: 5.6071e-06 gnorm: 1.27 [22:42:34< 1:47:53] +[titan] 2025-10-05 21:16:57,064 - root - INFO - step: 37070 loss: 1.9391 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 21:16:57,064 - root - INFO - lr: 5.6051e-06 gnorm: 1.24 [22:42:45< 1:47:42] +[titan] 2025-10-05 21:17:07,940 - root - INFO - step: 37075 loss: 1.9432 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7181 +[titan] 2025-10-05 21:17:07,940 - root - INFO - lr: 5.6030e-06 gnorm: 1.23 [22:42:56< 1:47:31] +[titan] 2025-10-05 21:17:18,806 - root - INFO - step: 37080 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6995 +[titan] 2025-10-05 21:17:18,807 - root - INFO - lr: 5.6010e-06 gnorm: 1.20 [22:43:07< 1:47:20] +[titan] 2025-10-05 21:17:29,692 - root - INFO - step: 37085 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6662 +[titan] 2025-10-05 21:17:29,692 - root - INFO - lr: 5.5989e-06 gnorm: 1.21 [22:43:18< 1:47:09] +[titan] 2025-10-05 21:17:40,647 - root - INFO - step: 37090 loss: 1.9429 memory: 118.84GiB(85.28%) tps: 29,912 tflops: 414.98 mfu: 41.96% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:17:40,648 - root - INFO - lr: 5.5969e-06 gnorm: 1.27 [22:43:29< 1:46:58] +[titan] 2025-10-05 21:17:51,517 - root - INFO - step: 37095 loss: 2.0065 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7745 +[titan] 2025-10-05 21:17:51,517 - root - INFO - lr: 5.5949e-06 gnorm: 1.27 [22:43:39< 1:46:47] +[titan] 2025-10-05 21:18:00,216 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:18:02,399 - root - INFO - step: 37100 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6612 +[titan] 2025-10-05 21:18:02,399 - root - INFO - lr: 5.5928e-06 gnorm: 1.26 [22:43:50< 1:46:36] +[titan] 2025-10-05 21:18:13,285 - root - INFO - step: 37105 loss: 1.8885 memory: 118.84GiB(85.28%) tps: 30,101 tflops: 417.61 mfu: 42.23% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6690 +[titan] 2025-10-05 21:18:13,286 - root - INFO - lr: 5.5908e-06 gnorm: 1.20 [22:44:01< 1:46:25] +[titan] 2025-10-05 21:18:24,145 - root - INFO - step: 37110 loss: 1.9574 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7311 +[titan] 2025-10-05 21:18:24,145 - root - INFO - lr: 5.5888e-06 gnorm: 1.25 [22:44:12< 1:46:14] +[titan] 2025-10-05 21:18:35,081 - root - INFO - step: 37115 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 29,965 tflops: 415.72 mfu: 42.03% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7139 +[titan] 2025-10-05 21:18:35,081 - root - INFO - lr: 5.5867e-06 gnorm: 1.20 [22:44:23< 1:46:03] +[titan] 2025-10-05 21:18:45,955 - root - INFO - step: 37120 loss: 1.9550 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:18:45,955 - root - INFO - lr: 5.5847e-06 gnorm: 1.24 [22:44:34< 1:45:52] +[titan] 2025-10-05 21:18:56,847 - root - INFO - step: 37125 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7220 +[titan] 2025-10-05 21:18:56,848 - root - INFO - lr: 5.5827e-06 gnorm: 1.27 [22:44:45< 1:45:41] +[titan] 2025-10-05 21:19:07,708 - root - INFO - step: 37130 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 21:19:07,709 - root - INFO - lr: 5.5807e-06 gnorm: 1.20 [22:44:56< 1:45:30] +[titan] 2025-10-05 21:19:18,570 - root - INFO - step: 37135 loss: 2.0234 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2342 global_avg_mtp_loss: 1.7892 +[titan] 2025-10-05 21:19:18,570 - root - INFO - lr: 5.5787e-06 gnorm: 1.23 [22:45:06< 1:45:19] +[titan] 2025-10-05 21:19:29,432 - root - INFO - step: 37140 loss: 1.9459 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7214 +[titan] 2025-10-05 21:19:29,432 - root - INFO - lr: 5.5766e-06 gnorm: 1.25 [22:45:17< 1:45:08] +[titan] 2025-10-05 21:19:40,328 - root - INFO - step: 37145 loss: 1.9155 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.24 mfu: 42.19% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6950 +[titan] 2025-10-05 21:19:40,328 - root - INFO - lr: 5.5746e-06 gnorm: 1.28 [22:45:28< 1:44:57] +[titan] 2025-10-05 21:19:48,999 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:19:51,178 - root - INFO - step: 37150 loss: 1.9636 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 21:19:51,178 - root - INFO - lr: 5.5726e-06 gnorm: 1.28 [22:45:39< 1:44:46] +[titan] 2025-10-05 21:20:02,074 - root - INFO - step: 37155 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,075 tflops: 417.25 mfu: 42.19% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6836 +[titan] 2025-10-05 21:20:02,074 - root - INFO - lr: 5.5706e-06 gnorm: 1.22 [22:45:50< 1:44:35] +[titan] 2025-10-05 21:20:12,936 - root - INFO - step: 37160 loss: 1.9383 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7134 +[titan] 2025-10-05 21:20:12,937 - root - INFO - lr: 5.5686e-06 gnorm: 1.25 [22:46:01< 1:44:23] +[titan] 2025-10-05 21:20:23,793 - root - INFO - step: 37165 loss: 1.9351 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 21:20:23,793 - root - INFO - lr: 5.5666e-06 gnorm: 1.26 [22:46:12< 1:44:12] +[titan] 2025-10-05 21:20:34,674 - root - INFO - step: 37170 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6734 +[titan] 2025-10-05 21:20:34,674 - root - INFO - lr: 5.5647e-06 gnorm: 1.21 [22:46:23< 1:44:01] +[titan] 2025-10-05 21:20:45,596 - root - INFO - step: 37175 loss: 1.9773 memory: 118.84GiB(85.28%) tps: 30,002 tflops: 416.24 mfu: 42.09% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7484 +[titan] 2025-10-05 21:20:45,596 - root - INFO - lr: 5.5627e-06 gnorm: 1.27 [22:46:33< 1:43:50] +[titan] 2025-10-05 21:20:56,483 - root - INFO - step: 37180 loss: 1.9256 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 21:20:56,484 - root - INFO - lr: 5.5607e-06 gnorm: 1.26 [22:46:44< 1:43:39] +[titan] 2025-10-05 21:21:07,391 - root - INFO - step: 37185 loss: 1.9615 memory: 118.84GiB(85.28%) tps: 30,041 tflops: 416.78 mfu: 42.14% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:21:07,392 - root - INFO - lr: 5.5587e-06 gnorm: 1.24 [22:46:55< 1:43:28] +[titan] 2025-10-05 21:21:18,272 - root - INFO - step: 37190 loss: 1.9843 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7541 +[titan] 2025-10-05 21:21:18,272 - root - INFO - lr: 5.5567e-06 gnorm: 1.22 [22:47:06< 1:43:17] +[titan] 2025-10-05 21:21:29,155 - root - INFO - step: 37195 loss: 1.9553 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7286 +[titan] 2025-10-05 21:21:29,155 - root - INFO - lr: 5.5548e-06 gnorm: 1.25 [22:47:17< 1:43:06] +[titan] 2025-10-05 21:21:37,896 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:21:40,076 - root - INFO - step: 37200 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.27 mfu: 42.09% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6944 +[titan] 2025-10-05 21:21:40,077 - root - INFO - lr: 5.5528e-06 gnorm: 1.22 [22:47:28< 1:42:55] +[titan] 2025-10-05 21:21:50,943 - root - INFO - step: 37205 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 21:21:50,944 - root - INFO - lr: 5.5508e-06 gnorm: 1.21 [22:47:39< 1:42:44] +[titan] 2025-10-05 21:22:01,837 - root - INFO - step: 37210 loss: 1.9065 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6862 +[titan] 2025-10-05 21:22:01,837 - root - INFO - lr: 5.5489e-06 gnorm: 1.21 [22:47:50< 1:42:33] +[titan] 2025-10-05 21:22:12,716 - root - INFO - step: 37215 loss: 1.8559 memory: 118.84GiB(85.28%) tps: 30,122 tflops: 417.89 mfu: 42.25% global_avg_ntp_loss: 0.2146 global_avg_mtp_loss: 1.6413 +[titan] 2025-10-05 21:22:12,716 - root - INFO - lr: 5.5469e-06 gnorm: 1.23 [22:48:01< 1:42:22] +[titan] 2025-10-05 21:22:23,615 - root - INFO - step: 37220 loss: 1.9802 memory: 118.84GiB(85.28%) tps: 30,066 tflops: 417.13 mfu: 42.18% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7516 +[titan] 2025-10-05 21:22:23,615 - root - INFO - lr: 5.5450e-06 gnorm: 1.24 [22:48:12< 1:42:11] +[titan] 2025-10-05 21:22:34,482 - root - INFO - step: 37225 loss: 1.9783 memory: 118.84GiB(85.28%) tps: 30,154 tflops: 418.34 mfu: 42.30% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7493 +[titan] 2025-10-05 21:22:34,483 - root - INFO - lr: 5.5430e-06 gnorm: 1.26 [22:48:22< 1:42:00] +[titan] 2025-10-05 21:22:45,400 - root - INFO - step: 37230 loss: 1.9106 memory: 118.84GiB(85.28%) tps: 30,015 tflops: 416.41 mfu: 42.10% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6898 +[titan] 2025-10-05 21:22:45,400 - root - INFO - lr: 5.5411e-06 gnorm: 1.23 [22:48:33< 1:41:49] +[titan] 2025-10-05 21:22:56,271 - root - INFO - step: 37235 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.18 mfu: 42.28% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6817 +[titan] 2025-10-05 21:22:56,272 - root - INFO - lr: 5.5391e-06 gnorm: 1.23 [22:48:44< 1:41:38] +[titan] 2025-10-05 21:23:07,114 - root - INFO - step: 37240 loss: 1.9626 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2272 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:23:07,114 - root - INFO - lr: 5.5372e-06 gnorm: 1.23 [22:48:55< 1:41:27] +[titan] 2025-10-05 21:23:17,969 - root - INFO - step: 37245 loss: 1.8827 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 21:23:17,970 - root - INFO - lr: 5.5352e-06 gnorm: 1.23 [22:49:06< 1:41:16] +[titan] 2025-10-05 21:23:26,677 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:23:28,858 - root - INFO - step: 37250 loss: 1.9019 memory: 118.84GiB(85.28%) tps: 30,095 tflops: 417.52 mfu: 42.22% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6816 +[titan] 2025-10-05 21:23:28,858 - root - INFO - lr: 5.5333e-06 gnorm: 1.21 [22:49:17< 1:41:05] +[titan] 2025-10-05 21:23:39,774 - root - INFO - step: 37255 loss: 1.9415 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.46 mfu: 42.11% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7168 +[titan] 2025-10-05 21:23:39,774 - root - INFO - lr: 5.5314e-06 gnorm: 1.21 [22:49:28< 1:40:54] +[titan] 2025-10-05 21:23:50,632 - root - INFO - step: 37260 loss: 1.8928 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 21:23:50,632 - root - INFO - lr: 5.5295e-06 gnorm: 1.20 [22:49:39< 1:40:43] +[titan] 2025-10-05 21:24:01,494 - root - INFO - step: 37265 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 21:24:01,495 - root - INFO - lr: 5.5275e-06 gnorm: 1.21 [22:49:49< 1:40:32] +[titan] 2025-10-05 21:24:12,333 - root - INFO - step: 37270 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7129 +[titan] 2025-10-05 21:24:12,333 - root - INFO - lr: 5.5256e-06 gnorm: 1.24 [22:50:00< 1:40:21] +[titan] 2025-10-05 21:24:23,189 - root - INFO - step: 37275 loss: 1.9326 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7100 +[titan] 2025-10-05 21:24:23,189 - root - INFO - lr: 5.5237e-06 gnorm: 1.22 [22:50:11< 1:40:10] +[titan] 2025-10-05 21:24:34,040 - root - INFO - step: 37280 loss: 1.8747 memory: 118.84GiB(85.28%) tps: 30,198 tflops: 418.95 mfu: 42.36% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6575 +[titan] 2025-10-05 21:24:34,041 - root - INFO - lr: 5.5218e-06 gnorm: 1.19 [22:50:22< 1:39:59] +[titan] 2025-10-05 21:24:44,965 - root - INFO - step: 37285 loss: 1.8654 memory: 118.84GiB(85.28%) tps: 29,996 tflops: 416.14 mfu: 42.08% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6500 +[titan] 2025-10-05 21:24:44,965 - root - INFO - lr: 5.5199e-06 gnorm: 1.23 [22:50:33< 1:39:48] +[titan] 2025-10-05 21:24:55,829 - root - INFO - step: 37290 loss: 1.9000 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6803 +[titan] 2025-10-05 21:24:55,829 - root - INFO - lr: 5.5180e-06 gnorm: 1.24 [22:50:44< 1:39:36] +[titan] 2025-10-05 21:25:06,686 - root - INFO - step: 37295 loss: 1.9958 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2315 global_avg_mtp_loss: 1.7643 +[titan] 2025-10-05 21:25:06,686 - root - INFO - lr: 5.5161e-06 gnorm: 1.23 [22:50:55< 1:39:25] +[titan] 2025-10-05 21:25:15,357 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:25:17,530 - root - INFO - step: 37300 loss: 1.9230 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7011 +[titan] 2025-10-05 21:25:17,530 - root - INFO - lr: 5.5142e-06 gnorm: 1.29 [22:51:05< 1:39:14] +[titan] 2025-10-05 21:25:28,378 - root - INFO - step: 37305 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 21:25:28,378 - root - INFO - lr: 5.5123e-06 gnorm: 1.28 [22:51:16< 1:39:03] +[titan] 2025-10-05 21:25:39,211 - root - INFO - step: 37310 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,250 tflops: 419.67 mfu: 42.43% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 21:25:39,211 - root - INFO - lr: 5.5104e-06 gnorm: 1.28 [22:51:27< 1:38:52] +[titan] 2025-10-05 21:25:50,153 - root - INFO - step: 37315 loss: 1.9754 memory: 118.84GiB(85.28%) tps: 29,948 tflops: 415.48 mfu: 42.01% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7480 +[titan] 2025-10-05 21:25:50,153 - root - INFO - lr: 5.5085e-06 gnorm: 1.24 [22:51:38< 1:38:41] +[titan] 2025-10-05 21:26:01,007 - root - INFO - step: 37320 loss: 1.9137 memory: 118.84GiB(85.28%) tps: 30,190 tflops: 418.84 mfu: 42.35% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6920 +[titan] 2025-10-05 21:26:01,007 - root - INFO - lr: 5.5066e-06 gnorm: 1.22 [22:51:49< 1:38:30] +[titan] 2025-10-05 21:26:11,849 - root - INFO - step: 37325 loss: 1.9678 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7401 +[titan] 2025-10-05 21:26:11,849 - root - INFO - lr: 5.5047e-06 gnorm: 1.28 [22:52:00< 1:38:19] +[titan] 2025-10-05 21:26:22,692 - root - INFO - step: 37330 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.6912 +[titan] 2025-10-05 21:26:22,692 - root - INFO - lr: 5.5028e-06 gnorm: 1.20 [22:52:11< 1:38:08] +[titan] 2025-10-05 21:26:33,566 - root - INFO - step: 37335 loss: 1.9531 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7263 +[titan] 2025-10-05 21:26:33,566 - root - INFO - lr: 5.5010e-06 gnorm: 1.21 [22:52:21< 1:37:57] +[titan] 2025-10-05 21:26:44,447 - root - INFO - step: 37340 loss: 1.9707 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2286 global_avg_mtp_loss: 1.7422 +[titan] 2025-10-05 21:26:44,447 - root - INFO - lr: 5.4991e-06 gnorm: 1.30 [22:52:32< 1:37:46] +[titan] 2025-10-05 21:26:55,353 - root - INFO - step: 37345 loss: 1.8670 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.84 mfu: 42.15% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6522 +[titan] 2025-10-05 21:26:55,354 - root - INFO - lr: 5.4972e-06 gnorm: 1.19 [22:52:43< 1:37:35] +[titan] 2025-10-05 21:27:04,030 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:27:06,210 - root - INFO - step: 37350 loss: 1.9148 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.75 mfu: 42.34% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6924 +[titan] 2025-10-05 21:27:06,210 - root - INFO - lr: 5.4954e-06 gnorm: 1.23 [22:52:54< 1:37:24] +[titan] 2025-10-05 21:27:17,097 - root - INFO - step: 37355 loss: 1.8844 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2174 global_avg_mtp_loss: 1.6671 +[titan] 2025-10-05 21:27:17,097 - root - INFO - lr: 5.4935e-06 gnorm: 1.22 [22:53:05< 1:37:13] +[titan] 2025-10-05 21:27:27,968 - root - INFO - step: 37360 loss: 1.8981 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 21:27:27,968 - root - INFO - lr: 5.4917e-06 gnorm: 1.24 [22:53:16< 1:37:02] +[titan] 2025-10-05 21:27:38,788 - root - INFO - step: 37365 loss: 1.9758 memory: 118.84GiB(85.28%) tps: 30,286 tflops: 420.16 mfu: 42.48% global_avg_ntp_loss: 0.2283 global_avg_mtp_loss: 1.7476 +[titan] 2025-10-05 21:27:38,788 - root - INFO - lr: 5.4898e-06 gnorm: 1.22 [22:53:27< 1:36:51] +[titan] 2025-10-05 21:27:49,689 - root - INFO - step: 37370 loss: 1.9381 memory: 118.84GiB(85.28%) tps: 30,060 tflops: 417.03 mfu: 42.17% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 21:27:49,689 - root - INFO - lr: 5.4880e-06 gnorm: 1.26 [22:53:38< 1:36:40] +[titan] 2025-10-05 21:28:00,629 - root - INFO - step: 37375 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 29,953 tflops: 415.55 mfu: 42.02% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7322 +[titan] 2025-10-05 21:28:00,630 - root - INFO - lr: 5.4861e-06 gnorm: 1.28 [22:53:49< 1:36:29] +[titan] 2025-10-05 21:28:03,005 - root - INFO - Dumping profiler traces at step 37376 +[titan] 2025-10-05 21:28:03,044 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:28:11,751 - root - INFO - step: 37380 loss: 1.9803 memory: 118.84GiB(85.28%) tps: 29,465 tflops: 408.78 mfu: 41.33% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7507 +[titan] 2025-10-05 21:28:11,751 - root - INFO - lr: 5.4843e-06 gnorm: 1.26 [22:54:00< 1:36:18] +[titan] 2025-10-05 21:28:22,605 - root - INFO - step: 37385 loss: 1.9272 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7043 +[titan] 2025-10-05 21:28:22,605 - root - INFO - lr: 5.4824e-06 gnorm: 1.23 [22:54:10< 1:36:07] +[titan] 2025-10-05 21:28:33,443 - root - INFO - step: 37390 loss: 1.8450 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6318 +[titan] 2025-10-05 21:28:33,443 - root - INFO - lr: 5.4806e-06 gnorm: 1.24 [22:54:21< 1:35:56] +[titan] 2025-10-05 21:28:44,325 - root - INFO - step: 37395 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:28:44,326 - root - INFO - lr: 5.4788e-06 gnorm: 1.22 [22:54:32< 1:35:45] +[titan] 2025-10-05 21:28:52,995 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:28:55,171 - root - INFO - step: 37400 loss: 1.9334 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7095 +[titan] 2025-10-05 21:28:55,171 - root - INFO - lr: 5.4769e-06 gnorm: 1.21 [22:54:43< 1:35:34] +[titan] 2025-10-05 21:29:06,006 - root - INFO - step: 37405 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.61 mfu: 42.43% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 21:29:06,006 - root - INFO - lr: 5.4751e-06 gnorm: 1.21 [22:54:54< 1:35:23] +[titan] 2025-10-05 21:29:16,874 - root - INFO - step: 37410 loss: 1.9252 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 21:29:16,874 - root - INFO - lr: 5.4733e-06 gnorm: 1.21 [22:55:05< 1:35:12] +[titan] 2025-10-05 21:29:27,686 - root - INFO - step: 37415 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,308 tflops: 420.48 mfu: 42.52% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6815 +[titan] 2025-10-05 21:29:27,686 - root - INFO - lr: 5.4715e-06 gnorm: 1.19 [22:55:16< 1:35:01] +[titan] 2025-10-05 21:29:38,526 - root - INFO - step: 37420 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:29:38,526 - root - INFO - lr: 5.4696e-06 gnorm: 1.22 [22:55:26< 1:34:49] +[titan] 2025-10-05 21:29:49,408 - root - INFO - step: 37425 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,114 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2300 global_avg_mtp_loss: 1.7526 +[titan] 2025-10-05 21:29:49,408 - root - INFO - lr: 5.4678e-06 gnorm: 1.25 [22:55:37< 1:34:38] +[titan] 2025-10-05 21:30:00,250 - root - INFO - step: 37430 loss: 1.9613 memory: 118.84GiB(85.28%) tps: 30,223 tflops: 419.30 mfu: 42.40% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7348 +[titan] 2025-10-05 21:30:00,250 - root - INFO - lr: 5.4660e-06 gnorm: 1.23 [22:55:48< 1:34:27] +[titan] 2025-10-05 21:30:11,084 - root - INFO - step: 37435 loss: 1.9022 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:30:11,084 - root - INFO - lr: 5.4642e-06 gnorm: 1.27 [22:55:59< 1:34:16] +[titan] 2025-10-05 21:30:21,909 - root - INFO - step: 37440 loss: 1.9502 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.97 mfu: 42.46% global_avg_ntp_loss: 0.2267 global_avg_mtp_loss: 1.7235 +[titan] 2025-10-05 21:30:21,909 - root - INFO - lr: 5.4624e-06 gnorm: 1.24 [22:56:10< 1:34:05] +[titan] 2025-10-05 21:30:32,791 - root - INFO - step: 37445 loss: 1.9772 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7483 +[titan] 2025-10-05 21:30:32,792 - root - INFO - lr: 5.4606e-06 gnorm: 1.30 [22:56:21< 1:33:54] +[titan] 2025-10-05 21:30:41,432 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:30:43,606 - root - INFO - step: 37450 loss: 1.9364 memory: 118.84GiB(85.28%) tps: 30,301 tflops: 420.38 mfu: 42.51% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.7122 +[titan] 2025-10-05 21:30:43,606 - root - INFO - lr: 5.4588e-06 gnorm: 1.25 [22:56:31< 1:33:43] +[titan] 2025-10-05 21:30:54,447 - root - INFO - step: 37455 loss: 1.9209 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.36 mfu: 42.40% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6981 +[titan] 2025-10-05 21:30:54,447 - root - INFO - lr: 5.4570e-06 gnorm: 1.27 [22:56:42< 1:33:32] +[titan] 2025-10-05 21:31:05,288 - root - INFO - step: 37460 loss: 1.8916 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:31:05,288 - root - INFO - lr: 5.4552e-06 gnorm: 1.22 [22:56:53< 1:33:21] +[titan] 2025-10-05 21:31:16,146 - root - INFO - step: 37465 loss: 1.9532 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7278 +[titan] 2025-10-05 21:31:16,146 - root - INFO - lr: 5.4535e-06 gnorm: 1.26 [22:57:04< 1:33:10] +[titan] 2025-10-05 21:31:26,988 - root - INFO - step: 37470 loss: 1.9308 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7071 +[titan] 2025-10-05 21:31:26,988 - root - INFO - lr: 5.4517e-06 gnorm: 1.26 [22:57:15< 1:32:59] +[titan] 2025-10-05 21:31:37,863 - root - INFO - step: 37475 loss: 1.8457 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2133 global_avg_mtp_loss: 1.6324 +[titan] 2025-10-05 21:31:37,863 - root - INFO - lr: 5.4499e-06 gnorm: 1.20 [22:57:26< 1:32:48] +[titan] 2025-10-05 21:31:48,716 - root - INFO - step: 37480 loss: 1.8673 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6511 +[titan] 2025-10-05 21:31:48,716 - root - INFO - lr: 5.4481e-06 gnorm: 1.22 [22:57:37< 1:32:37] +[titan] 2025-10-05 21:31:59,576 - root - INFO - step: 37485 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6883 +[titan] 2025-10-05 21:31:59,577 - root - INFO - lr: 5.4463e-06 gnorm: 1.26 [22:57:47< 1:32:26] +[titan] 2025-10-05 21:32:10,434 - root - INFO - step: 37490 loss: 1.9165 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6947 +[titan] 2025-10-05 21:32:10,434 - root - INFO - lr: 5.4446e-06 gnorm: 1.24 [22:57:58< 1:32:15] +[titan] 2025-10-05 21:32:21,290 - root - INFO - step: 37495 loss: 1.9993 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7684 +[titan] 2025-10-05 21:32:21,290 - root - INFO - lr: 5.4428e-06 gnorm: 1.24 [22:58:09< 1:32:04] +[titan] 2025-10-05 21:32:29,976 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:32:32,151 - root - INFO - step: 37500 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7004 +[titan] 2025-10-05 21:32:32,151 - root - INFO - lr: 5.4411e-06 gnorm: 1.29 [22:58:20< 1:31:53] +[titan] 2025-10-05 21:32:43,013 - root - INFO - step: 37505 loss: 1.8923 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:32:43,014 - root - INFO - lr: 5.4393e-06 gnorm: 1.21 [22:58:31< 1:31:42] +[titan] 2025-10-05 21:32:53,854 - root - INFO - step: 37510 loss: 1.9490 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7237 +[titan] 2025-10-05 21:32:53,854 - root - INFO - lr: 5.4375e-06 gnorm: 1.21 [22:58:42< 1:31:31] +[titan] 2025-10-05 21:33:04,724 - root - INFO - step: 37515 loss: 1.9466 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7217 +[titan] 2025-10-05 21:33:04,724 - root - INFO - lr: 5.4358e-06 gnorm: 1.24 [22:58:53< 1:31:20] +[titan] 2025-10-05 21:33:15,605 - root - INFO - step: 37520 loss: 1.9483 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:33:15,605 - root - INFO - lr: 5.4341e-06 gnorm: 1.22 [22:59:03< 1:31:09] +[titan] 2025-10-05 21:33:26,465 - root - INFO - step: 37525 loss: 1.8732 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6563 +[titan] 2025-10-05 21:33:26,465 - root - INFO - lr: 5.4323e-06 gnorm: 1.23 [22:59:14< 1:30:58] +[titan] 2025-10-05 21:33:37,315 - root - INFO - step: 37530 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 21:33:37,315 - root - INFO - lr: 5.4306e-06 gnorm: 1.23 [22:59:25< 1:30:47] +[titan] 2025-10-05 21:33:48,179 - root - INFO - step: 37535 loss: 1.8524 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6376 +[titan] 2025-10-05 21:33:48,179 - root - INFO - lr: 5.4288e-06 gnorm: 1.25 [22:59:36< 1:30:36] +[titan] 2025-10-05 21:33:59,032 - root - INFO - step: 37540 loss: 1.8890 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 21:33:59,032 - root - INFO - lr: 5.4271e-06 gnorm: 1.22 [22:59:47< 1:30:25] +[titan] 2025-10-05 21:34:09,894 - root - INFO - step: 37545 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6741 +[titan] 2025-10-05 21:34:09,894 - root - INFO - lr: 5.4254e-06 gnorm: 1.24 [22:59:58< 1:30:14] +[titan] 2025-10-05 21:34:18,559 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:34:20,750 - root - INFO - step: 37550 loss: 1.9069 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6861 +[titan] 2025-10-05 21:34:20,750 - root - INFO - lr: 5.4236e-06 gnorm: 1.24 [23:00:09< 1:30:02] +[titan] 2025-10-05 21:34:31,630 - root - INFO - step: 37555 loss: 1.9520 memory: 118.84GiB(85.28%) tps: 30,118 tflops: 417.84 mfu: 42.25% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7261 +[titan] 2025-10-05 21:34:31,630 - root - INFO - lr: 5.4219e-06 gnorm: 1.21 [23:00:19< 1:29:51] +[titan] 2025-10-05 21:34:42,476 - root - INFO - step: 37560 loss: 1.9558 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.17 mfu: 42.38% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7291 +[titan] 2025-10-05 21:34:42,476 - root - INFO - lr: 5.4202e-06 gnorm: 1.23 [23:00:30< 1:29:40] +[titan] 2025-10-05 21:34:53,333 - root - INFO - step: 37565 loss: 1.8764 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6595 +[titan] 2025-10-05 21:34:53,333 - root - INFO - lr: 5.4185e-06 gnorm: 1.26 [23:00:41< 1:29:29] +[titan] 2025-10-05 21:35:04,184 - root - INFO - step: 37570 loss: 1.9443 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 21:35:04,184 - root - INFO - lr: 5.4168e-06 gnorm: 1.30 [23:00:52< 1:29:18] +[titan] 2025-10-05 21:35:15,037 - root - INFO - step: 37575 loss: 1.8778 memory: 118.84GiB(85.28%) tps: 30,194 tflops: 418.89 mfu: 42.36% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6614 +[titan] 2025-10-05 21:35:15,037 - root - INFO - lr: 5.4151e-06 gnorm: 1.21 [23:01:03< 1:29:07] +[titan] 2025-10-05 21:35:25,912 - root - INFO - step: 37580 loss: 1.8864 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6681 +[titan] 2025-10-05 21:35:25,913 - root - INFO - lr: 5.4134e-06 gnorm: 1.23 [23:01:14< 1:28:56] +[titan] 2025-10-05 21:35:36,806 - root - INFO - step: 37585 loss: 1.9361 memory: 118.84GiB(85.28%) tps: 30,080 tflops: 417.32 mfu: 42.20% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7111 +[titan] 2025-10-05 21:35:36,807 - root - INFO - lr: 5.4117e-06 gnorm: 1.25 [23:01:25< 1:28:45] +[titan] 2025-10-05 21:35:47,715 - root - INFO - step: 37590 loss: 1.9076 memory: 118.84GiB(85.28%) tps: 30,039 tflops: 416.74 mfu: 42.14% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 21:35:47,716 - root - INFO - lr: 5.4100e-06 gnorm: 1.22 [23:01:36< 1:28:34] +[titan] 2025-10-05 21:35:58,598 - root - INFO - step: 37595 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.75 mfu: 42.24% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6729 +[titan] 2025-10-05 21:35:58,598 - root - INFO - lr: 5.4083e-06 gnorm: 1.20 [23:01:46< 1:28:23] +[titan] 2025-10-05 21:36:07,282 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:36:09,461 - root - INFO - step: 37600 loss: 1.9346 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7104 +[titan] 2025-10-05 21:36:09,461 - root - INFO - lr: 5.4066e-06 gnorm: 1.22 [23:01:57< 1:28:12] +[titan] 2025-10-05 21:36:20,345 - root - INFO - step: 37605 loss: 2.0027 memory: 118.84GiB(85.28%) tps: 30,108 tflops: 417.70 mfu: 42.23% global_avg_ntp_loss: 0.2312 global_avg_mtp_loss: 1.7715 +[titan] 2025-10-05 21:36:20,345 - root - INFO - lr: 5.4049e-06 gnorm: 1.28 [23:02:08< 1:28:01] +[titan] 2025-10-05 21:36:31,206 - root - INFO - step: 37610 loss: 1.9219 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 21:36:31,206 - root - INFO - lr: 5.4032e-06 gnorm: 1.20 [23:02:19< 1:27:50] +[titan] 2025-10-05 21:36:42,084 - root - INFO - step: 37615 loss: 1.9241 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.94 mfu: 42.26% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7009 +[titan] 2025-10-05 21:36:42,084 - root - INFO - lr: 5.4015e-06 gnorm: 1.28 [23:02:30< 1:27:39] +[titan] 2025-10-05 21:36:52,956 - root - INFO - step: 37620 loss: 2.0281 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2350 global_avg_mtp_loss: 1.7931 +[titan] 2025-10-05 21:36:52,956 - root - INFO - lr: 5.3999e-06 gnorm: 1.25 [23:02:41< 1:27:28] +[titan] 2025-10-05 21:37:03,800 - root - INFO - step: 37625 loss: 1.8956 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6758 +[titan] 2025-10-05 21:37:03,800 - root - INFO - lr: 5.3982e-06 gnorm: 1.23 [23:02:52< 1:27:17] +[titan] 2025-10-05 21:37:14,649 - root - INFO - step: 37630 loss: 1.9791 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2297 global_avg_mtp_loss: 1.7495 +[titan] 2025-10-05 21:37:14,649 - root - INFO - lr: 5.3965e-06 gnorm: 1.33 [23:03:03< 1:27:06] +[titan] 2025-10-05 21:37:25,497 - root - INFO - step: 37635 loss: 1.9344 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7102 +[titan] 2025-10-05 21:37:25,497 - root - INFO - lr: 5.3948e-06 gnorm: 1.24 [23:03:13< 1:26:55] +[titan] 2025-10-05 21:37:36,353 - root - INFO - step: 37640 loss: 1.9666 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2278 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 21:37:36,353 - root - INFO - lr: 5.3932e-06 gnorm: 1.22 [23:03:24< 1:26:44] +[titan] 2025-10-05 21:37:47,208 - root - INFO - step: 37645 loss: 1.9497 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7232 +[titan] 2025-10-05 21:37:47,208 - root - INFO - lr: 5.3915e-06 gnorm: 1.27 [23:03:35< 1:26:33] +[titan] 2025-10-05 21:37:55,907 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:37:58,091 - root - INFO - step: 37650 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6691 +[titan] 2025-10-05 21:37:58,091 - root - INFO - lr: 5.3899e-06 gnorm: 1.23 [23:03:46< 1:26:22] +[titan] 2025-10-05 21:38:08,977 - root - INFO - step: 37655 loss: 1.9390 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7145 +[titan] 2025-10-05 21:38:08,977 - root - INFO - lr: 5.3882e-06 gnorm: 1.28 [23:03:57< 1:26:11] +[titan] 2025-10-05 21:38:19,857 - root - INFO - step: 37660 loss: 1.9839 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.82 mfu: 42.25% global_avg_ntp_loss: 0.2302 global_avg_mtp_loss: 1.7537 +[titan] 2025-10-05 21:38:19,857 - root - INFO - lr: 5.3866e-06 gnorm: 1.29 [23:04:08< 1:26:00] +[titan] 2025-10-05 21:38:30,712 - root - INFO - step: 37665 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6720 +[titan] 2025-10-05 21:38:30,712 - root - INFO - lr: 5.3849e-06 gnorm: 1.23 [23:04:19< 1:25:49] +[titan] 2025-10-05 21:38:41,564 - root - INFO - step: 37670 loss: 1.8372 memory: 118.84GiB(85.28%) tps: 30,196 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2116 global_avg_mtp_loss: 1.6256 +[titan] 2025-10-05 21:38:41,565 - root - INFO - lr: 5.3833e-06 gnorm: 1.21 [23:04:29< 1:25:38] +[titan] 2025-10-05 21:38:52,429 - root - INFO - step: 37675 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 21:38:52,429 - root - INFO - lr: 5.3816e-06 gnorm: 1.26 [23:04:40< 1:25:27] +[titan] 2025-10-05 21:39:03,314 - root - INFO - step: 37680 loss: 2.0145 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2332 global_avg_mtp_loss: 1.7814 +[titan] 2025-10-05 21:39:03,314 - root - INFO - lr: 5.3800e-06 gnorm: 1.28 [23:04:51< 1:25:16] +[titan] 2025-10-05 21:39:14,212 - root - INFO - step: 37685 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 21:39:14,212 - root - INFO - lr: 5.3784e-06 gnorm: 1.22 [23:05:02< 1:25:05] +[titan] 2025-10-05 21:39:25,089 - root - INFO - step: 37690 loss: 2.0041 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7721 +[titan] 2025-10-05 21:39:25,089 - root - INFO - lr: 5.3767e-06 gnorm: 1.24 [23:05:13< 1:24:53] +[titan] 2025-10-05 21:39:35,965 - root - INFO - step: 37695 loss: 2.0071 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.98 mfu: 42.26% global_avg_ntp_loss: 0.2335 global_avg_mtp_loss: 1.7736 +[titan] 2025-10-05 21:39:35,965 - root - INFO - lr: 5.3751e-06 gnorm: 1.26 [23:05:24< 1:24:42] +[titan] 2025-10-05 21:39:44,645 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:39:46,832 - root - INFO - step: 37700 loss: 1.8803 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:39:46,832 - root - INFO - lr: 5.3735e-06 gnorm: 1.24 [23:05:35< 1:24:31] +[titan] 2025-10-05 21:39:57,708 - root - INFO - step: 37705 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:39:57,708 - root - INFO - lr: 5.3719e-06 gnorm: 1.24 [23:05:46< 1:24:20] +[titan] 2025-10-05 21:40:08,584 - root - INFO - step: 37710 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6486 +[titan] 2025-10-05 21:40:08,584 - root - INFO - lr: 5.3703e-06 gnorm: 1.23 [23:05:56< 1:24:09] +[titan] 2025-10-05 21:40:19,491 - root - INFO - step: 37715 loss: 1.9633 memory: 118.84GiB(85.28%) tps: 30,043 tflops: 416.80 mfu: 42.14% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7357 +[titan] 2025-10-05 21:40:19,491 - root - INFO - lr: 5.3687e-06 gnorm: 1.24 [23:06:07< 1:23:58] +[titan] 2025-10-05 21:40:30,374 - root - INFO - step: 37720 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6858 +[titan] 2025-10-05 21:40:30,375 - root - INFO - lr: 5.3671e-06 gnorm: 1.21 [23:06:18< 1:23:47] +[titan] 2025-10-05 21:40:41,250 - root - INFO - step: 37725 loss: 1.8832 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 21:40:41,250 - root - INFO - lr: 5.3654e-06 gnorm: 1.21 [23:06:29< 1:23:36] +[titan] 2025-10-05 21:40:52,074 - root - INFO - step: 37730 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,274 tflops: 420.00 mfu: 42.47% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 21:40:52,075 - root - INFO - lr: 5.3638e-06 gnorm: 1.23 [23:06:40< 1:23:25] +[titan] 2025-10-05 21:41:02,927 - root - INFO - step: 37735 loss: 1.9063 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 21:41:02,928 - root - INFO - lr: 5.3622e-06 gnorm: 1.24 [23:06:51< 1:23:14] +[titan] 2025-10-05 21:41:13,783 - root - INFO - step: 37740 loss: 1.9327 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7091 +[titan] 2025-10-05 21:41:13,783 - root - INFO - lr: 5.3607e-06 gnorm: 1.24 [23:07:02< 1:23:03] +[titan] 2025-10-05 21:41:24,647 - root - INFO - step: 37745 loss: 1.8905 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 21:41:24,647 - root - INFO - lr: 5.3591e-06 gnorm: 1.24 [23:07:12< 1:22:52] +[titan] 2025-10-05 21:41:33,361 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:41:35,546 - root - INFO - step: 37750 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,068 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 21:41:35,546 - root - INFO - lr: 5.3575e-06 gnorm: 1.24 [23:07:23< 1:22:41] +[titan] 2025-10-05 21:41:46,407 - root - INFO - step: 37755 loss: 1.8127 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.56 mfu: 42.32% global_avg_ntp_loss: 0.2099 global_avg_mtp_loss: 1.6029 +[titan] 2025-10-05 21:41:46,407 - root - INFO - lr: 5.3559e-06 gnorm: 1.23 [23:07:34< 1:22:30] +[titan] 2025-10-05 21:41:57,261 - root - INFO - step: 37760 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7208 +[titan] 2025-10-05 21:41:57,261 - root - INFO - lr: 5.3543e-06 gnorm: 1.27 [23:07:45< 1:22:19] +[titan] 2025-10-05 21:42:08,104 - root - INFO - step: 37765 loss: 1.9220 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.40% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.6996 +[titan] 2025-10-05 21:42:08,104 - root - INFO - lr: 5.3527e-06 gnorm: 1.22 [23:07:56< 1:22:08] +[titan] 2025-10-05 21:42:18,953 - root - INFO - step: 37770 loss: 1.9518 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:42:18,954 - root - INFO - lr: 5.3512e-06 gnorm: 1.28 [23:08:07< 1:21:57] +[titan] 2025-10-05 21:42:29,811 - root - INFO - step: 37775 loss: 1.9593 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.72 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 21:42:29,811 - root - INFO - lr: 5.3496e-06 gnorm: 1.30 [23:08:18< 1:21:46] +[titan] 2025-10-05 21:42:40,701 - root - INFO - step: 37780 loss: 1.9406 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.47 mfu: 42.21% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 21:42:40,701 - root - INFO - lr: 5.3480e-06 gnorm: 1.24 [23:08:29< 1:21:35] +[titan] 2025-10-05 21:42:51,568 - root - INFO - step: 37785 loss: 1.8503 memory: 118.84GiB(85.28%) tps: 30,155 tflops: 418.35 mfu: 42.30% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 21:42:51,568 - root - INFO - lr: 5.3465e-06 gnorm: 1.25 [23:08:39< 1:21:24] +[titan] 2025-10-05 21:43:02,441 - root - INFO - step: 37790 loss: 2.0185 memory: 118.84GiB(85.28%) tps: 30,137 tflops: 418.11 mfu: 42.28% global_avg_ntp_loss: 0.2329 global_avg_mtp_loss: 1.7856 +[titan] 2025-10-05 21:43:02,441 - root - INFO - lr: 5.3449e-06 gnorm: 1.29 [23:08:50< 1:21:13] +[titan] 2025-10-05 21:43:13,297 - root - INFO - step: 37795 loss: 1.9468 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2249 global_avg_mtp_loss: 1.7219 +[titan] 2025-10-05 21:43:13,297 - root - INFO - lr: 5.3434e-06 gnorm: 1.25 [23:09:01< 1:21:02] +[titan] 2025-10-05 21:43:21,968 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:43:24,171 - root - INFO - step: 37800 loss: 1.8974 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 21:43:24,171 - root - INFO - lr: 5.3418e-06 gnorm: 1.23 [23:09:12< 1:20:51] +[titan] 2025-10-05 21:43:35,037 - root - INFO - step: 37805 loss: 1.9248 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 21:43:35,037 - root - INFO - lr: 5.3403e-06 gnorm: 1.25 [23:09:23< 1:20:40] +[titan] 2025-10-05 21:43:45,919 - root - INFO - step: 37810 loss: 1.9479 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7231 +[titan] 2025-10-05 21:43:45,919 - root - INFO - lr: 5.3387e-06 gnorm: 1.21 [23:09:34< 1:20:29] +[titan] 2025-10-05 21:43:56,805 - root - INFO - step: 37815 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7211 +[titan] 2025-10-05 21:43:56,805 - root - INFO - lr: 5.3372e-06 gnorm: 1.27 [23:09:45< 1:20:18] +[titan] 2025-10-05 21:44:07,687 - root - INFO - step: 37820 loss: 2.0093 memory: 118.84GiB(85.28%) tps: 30,112 tflops: 417.76 mfu: 42.24% global_avg_ntp_loss: 0.2324 global_avg_mtp_loss: 1.7769 +[titan] 2025-10-05 21:44:07,687 - root - INFO - lr: 5.3356e-06 gnorm: 1.30 [23:09:56< 1:20:07] +[titan] 2025-10-05 21:44:18,545 - root - INFO - step: 37825 loss: 1.9824 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:44:18,545 - root - INFO - lr: 5.3341e-06 gnorm: 1.27 [23:10:06< 1:19:56] +[titan] 2025-10-05 21:44:29,413 - root - INFO - step: 37830 loss: 1.9731 memory: 118.84GiB(85.28%) tps: 30,152 tflops: 418.31 mfu: 42.30% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7442 +[titan] 2025-10-05 21:44:29,413 - root - INFO - lr: 5.3326e-06 gnorm: 1.21 [23:10:17< 1:19:44] +[titan] 2025-10-05 21:44:40,283 - root - INFO - step: 37835 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7221 +[titan] 2025-10-05 21:44:40,283 - root - INFO - lr: 5.3310e-06 gnorm: 1.22 [23:10:28< 1:19:33] +[titan] 2025-10-05 21:44:51,148 - root - INFO - step: 37840 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 21:44:51,149 - root - INFO - lr: 5.3295e-06 gnorm: 1.21 [23:10:39< 1:19:22] +[titan] 2025-10-05 21:45:02,046 - root - INFO - step: 37845 loss: 1.9602 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7338 +[titan] 2025-10-05 21:45:02,047 - root - INFO - lr: 5.3280e-06 gnorm: 1.25 [23:10:50< 1:19:11] +[titan] 2025-10-05 21:45:10,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:45:12,905 - root - INFO - step: 37850 loss: 1.9634 memory: 118.84GiB(85.28%) tps: 30,178 tflops: 418.67 mfu: 42.33% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7354 +[titan] 2025-10-05 21:45:12,905 - root - INFO - lr: 5.3265e-06 gnorm: 1.23 [23:11:01< 1:19:00] +[titan] 2025-10-05 21:45:23,773 - root - INFO - step: 37855 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6640 +[titan] 2025-10-05 21:45:23,773 - root - INFO - lr: 5.3250e-06 gnorm: 1.22 [23:11:12< 1:18:49] +[titan] 2025-10-05 21:45:34,638 - root - INFO - step: 37860 loss: 1.9306 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.7081 +[titan] 2025-10-05 21:45:34,638 - root - INFO - lr: 5.3235e-06 gnorm: 1.24 [23:11:22< 1:18:38] +[titan] 2025-10-05 21:45:45,491 - root - INFO - step: 37865 loss: 1.9514 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7254 +[titan] 2025-10-05 21:45:45,491 - root - INFO - lr: 5.3220e-06 gnorm: 1.24 [23:11:33< 1:18:27] +[titan] 2025-10-05 21:45:56,352 - root - INFO - step: 37870 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,170 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6506 +[titan] 2025-10-05 21:45:56,353 - root - INFO - lr: 5.3205e-06 gnorm: 1.21 [23:11:44< 1:18:16] +[titan] 2025-10-05 21:46:07,270 - root - INFO - step: 37875 loss: 1.9195 memory: 118.84GiB(85.28%) tps: 30,014 tflops: 416.39 mfu: 42.10% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 21:46:07,271 - root - INFO - lr: 5.3190e-06 gnorm: 1.24 [23:11:55< 1:18:05] +[titan] 2025-10-05 21:46:18,130 - root - INFO - step: 37880 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 21:46:18,131 - root - INFO - lr: 5.3175e-06 gnorm: 1.26 [23:12:06< 1:17:54] +[titan] 2025-10-05 21:46:29,081 - root - INFO - step: 37885 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,924 tflops: 415.15 mfu: 41.98% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6896 +[titan] 2025-10-05 21:46:29,082 - root - INFO - lr: 5.3160e-06 gnorm: 1.22 [23:12:17< 1:17:43] +[titan] 2025-10-05 21:46:35,782 - root - INFO - Dumping profiler traces at step 37888 +[titan] 2025-10-05 21:46:35,823 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 21:46:40,200 - root - INFO - step: 37890 loss: 1.9388 memory: 118.84GiB(85.28%) tps: 29,473 tflops: 408.89 mfu: 41.34% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7149 +[titan] 2025-10-05 21:46:40,200 - root - INFO - lr: 5.3145e-06 gnorm: 1.28 [23:12:28< 1:17:32] +[titan] 2025-10-05 21:46:51,073 - root - INFO - step: 37895 loss: 1.9689 memory: 118.84GiB(85.28%) tps: 30,138 tflops: 418.12 mfu: 42.28% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7399 +[titan] 2025-10-05 21:46:51,073 - root - INFO - lr: 5.3130e-06 gnorm: 1.23 [23:12:39< 1:17:21] +[titan] 2025-10-05 21:46:59,793 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:47:01,983 - root - INFO - step: 37900 loss: 1.9609 memory: 118.84GiB(85.28%) tps: 30,037 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2270 global_avg_mtp_loss: 1.7339 +[titan] 2025-10-05 21:47:01,983 - root - INFO - lr: 5.3115e-06 gnorm: 1.24 [23:12:50< 1:17:10] +[titan] 2025-10-05 21:47:12,859 - root - INFO - step: 37905 loss: 1.9242 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.01 mfu: 42.27% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7007 +[titan] 2025-10-05 21:47:12,859 - root - INFO - lr: 5.3100e-06 gnorm: 1.27 [23:13:01< 1:16:59] +[titan] 2025-10-05 21:47:23,757 - root - INFO - step: 37910 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 21:47:23,757 - root - INFO - lr: 5.3086e-06 gnorm: 1.26 [23:13:12< 1:16:48] +[titan] 2025-10-05 21:47:34,635 - root - INFO - step: 37915 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.92 mfu: 42.26% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6659 +[titan] 2025-10-05 21:47:34,635 - root - INFO - lr: 5.3071e-06 gnorm: 1.24 [23:13:22< 1:16:37] +[titan] 2025-10-05 21:47:45,522 - root - INFO - step: 37920 loss: 1.8835 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.60 mfu: 42.22% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:47:45,522 - root - INFO - lr: 5.3056e-06 gnorm: 1.20 [23:13:33< 1:16:26] +[titan] 2025-10-05 21:47:56,386 - root - INFO - step: 37925 loss: 1.9475 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7216 +[titan] 2025-10-05 21:47:56,386 - root - INFO - lr: 5.3042e-06 gnorm: 1.28 [23:13:44< 1:16:15] +[titan] 2025-10-05 21:48:07,400 - root - INFO - step: 37930 loss: 1.9244 memory: 118.84GiB(85.28%) tps: 29,753 tflops: 412.78 mfu: 41.74% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 21:48:07,400 - root - INFO - lr: 5.3027e-06 gnorm: 1.23 [23:13:55< 1:16:04] +[titan] 2025-10-05 21:48:18,249 - root - INFO - step: 37935 loss: 1.9378 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7119 +[titan] 2025-10-05 21:48:18,249 - root - INFO - lr: 5.3012e-06 gnorm: 1.24 [23:14:06< 1:15:53] +[titan] 2025-10-05 21:48:29,154 - root - INFO - step: 37940 loss: 1.9597 memory: 118.84GiB(85.28%) tps: 30,050 tflops: 416.90 mfu: 42.15% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7305 +[titan] 2025-10-05 21:48:29,154 - root - INFO - lr: 5.2998e-06 gnorm: 1.26 [23:14:17< 1:15:42] +[titan] 2025-10-05 21:48:40,024 - root - INFO - step: 37945 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6992 +[titan] 2025-10-05 21:48:40,024 - root - INFO - lr: 5.2983e-06 gnorm: 1.29 [23:14:28< 1:15:31] +[titan] 2025-10-05 21:48:48,690 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:48:50,876 - root - INFO - step: 37950 loss: 1.9305 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7069 +[titan] 2025-10-05 21:48:50,876 - root - INFO - lr: 5.2969e-06 gnorm: 1.28 [23:14:39< 1:15:20] +[titan] 2025-10-05 21:49:01,777 - root - INFO - step: 37955 loss: 1.9146 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.05 mfu: 42.17% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6934 +[titan] 2025-10-05 21:49:01,777 - root - INFO - lr: 5.2954e-06 gnorm: 1.23 [23:14:50< 1:15:09] +[titan] 2025-10-05 21:49:12,633 - root - INFO - step: 37960 loss: 1.9032 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6826 +[titan] 2025-10-05 21:49:12,633 - root - INFO - lr: 5.2940e-06 gnorm: 1.25 [23:15:00< 1:14:58] +[titan] 2025-10-05 21:49:23,498 - root - INFO - step: 37965 loss: 1.8874 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 21:49:23,498 - root - INFO - lr: 5.2926e-06 gnorm: 1.21 [23:15:11< 1:14:47] +[titan] 2025-10-05 21:49:34,372 - root - INFO - step: 37970 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.09 mfu: 42.27% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7031 +[titan] 2025-10-05 21:49:34,372 - root - INFO - lr: 5.2911e-06 gnorm: 1.25 [23:15:22< 1:14:36] +[titan] 2025-10-05 21:49:45,244 - root - INFO - step: 37975 loss: 1.9350 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7108 +[titan] 2025-10-05 21:49:45,244 - root - INFO - lr: 5.2897e-06 gnorm: 1.25 [23:15:33< 1:14:25] +[titan] 2025-10-05 21:49:56,122 - root - INFO - step: 37980 loss: 2.0219 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2334 global_avg_mtp_loss: 1.7886 +[titan] 2025-10-05 21:49:56,122 - root - INFO - lr: 5.2883e-06 gnorm: 1.31 [23:15:44< 1:14:14] +[titan] 2025-10-05 21:50:07,019 - root - INFO - step: 37985 loss: 1.9280 memory: 118.84GiB(85.28%) tps: 30,071 tflops: 417.19 mfu: 42.18% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 21:50:07,019 - root - INFO - lr: 5.2869e-06 gnorm: 1.24 [23:15:55< 1:14:02] +[titan] 2025-10-05 21:50:17,884 - root - INFO - step: 37990 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6913 +[titan] 2025-10-05 21:50:17,884 - root - INFO - lr: 5.2854e-06 gnorm: 1.22 [23:16:06< 1:13:51] +[titan] 2025-10-05 21:50:28,745 - root - INFO - step: 37995 loss: 1.8863 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 21:50:28,745 - root - INFO - lr: 5.2840e-06 gnorm: 1.21 [23:16:17< 1:13:40] +[titan] 2025-10-05 21:50:37,416 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:50:39,603 - root - INFO - step: 38000 loss: 1.9480 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7227 +[titan] 2025-10-05 21:50:39,603 - root - INFO - lr: 5.2826e-06 gnorm: 1.24 [23:16:27< 1:13:29] +[titan] 2025-10-05 21:50:50,499 - root - INFO - step: 38005 loss: 1.9446 memory: 118.84GiB(85.28%) tps: 30,074 tflops: 417.22 mfu: 42.19% global_avg_ntp_loss: 0.2268 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:50:50,499 - root - INFO - lr: 5.2812e-06 gnorm: 1.24 [23:16:38< 1:13:18] +[titan] 2025-10-05 21:51:01,361 - root - INFO - step: 38010 loss: 1.9423 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2260 global_avg_mtp_loss: 1.7163 +[titan] 2025-10-05 21:51:01,361 - root - INFO - lr: 5.2798e-06 gnorm: 1.25 [23:16:49< 1:13:07] +[titan] 2025-10-05 21:51:12,250 - root - INFO - step: 38015 loss: 1.9035 memory: 118.84GiB(85.28%) tps: 30,094 tflops: 417.50 mfu: 42.21% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6832 +[titan] 2025-10-05 21:51:12,250 - root - INFO - lr: 5.2784e-06 gnorm: 1.23 [23:17:00< 1:12:56] +[titan] 2025-10-05 21:51:23,111 - root - INFO - step: 38020 loss: 1.9570 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7290 +[titan] 2025-10-05 21:51:23,111 - root - INFO - lr: 5.2770e-06 gnorm: 1.26 [23:17:11< 1:12:45] +[titan] 2025-10-05 21:51:33,966 - root - INFO - step: 38025 loss: 1.9525 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2252 global_avg_mtp_loss: 1.7273 +[titan] 2025-10-05 21:51:33,966 - root - INFO - lr: 5.2756e-06 gnorm: 1.24 [23:17:22< 1:12:34] +[titan] 2025-10-05 21:51:44,841 - root - INFO - step: 38030 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6303 +[titan] 2025-10-05 21:51:44,841 - root - INFO - lr: 5.2742e-06 gnorm: 1.22 [23:17:33< 1:12:23] +[titan] 2025-10-05 21:51:55,747 - root - INFO - step: 38035 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,047 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6989 +[titan] 2025-10-05 21:51:55,747 - root - INFO - lr: 5.2728e-06 gnorm: 1.23 [23:17:44< 1:12:12] +[titan] 2025-10-05 21:52:06,666 - root - INFO - step: 38040 loss: 1.8833 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6657 +[titan] 2025-10-05 21:52:06,666 - root - INFO - lr: 5.2714e-06 gnorm: 1.25 [23:17:54< 1:12:01] +[titan] 2025-10-05 21:52:17,555 - root - INFO - step: 38045 loss: 1.8640 memory: 118.84GiB(85.28%) tps: 30,093 tflops: 417.49 mfu: 42.21% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6482 +[titan] 2025-10-05 21:52:17,555 - root - INFO - lr: 5.2701e-06 gnorm: 1.25 [23:18:05< 1:11:50] +[titan] 2025-10-05 21:52:26,252 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:52:28,442 - root - INFO - step: 38050 loss: 1.8572 memory: 118.84GiB(85.28%) tps: 30,099 tflops: 417.58 mfu: 42.22% global_avg_ntp_loss: 0.2147 global_avg_mtp_loss: 1.6424 +[titan] 2025-10-05 21:52:28,442 - root - INFO - lr: 5.2687e-06 gnorm: 1.21 [23:18:16< 1:11:39] +[titan] 2025-10-05 21:52:39,324 - root - INFO - step: 38055 loss: 1.9652 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2280 global_avg_mtp_loss: 1.7372 +[titan] 2025-10-05 21:52:39,325 - root - INFO - lr: 5.2673e-06 gnorm: 1.23 [23:18:27< 1:11:28] +[titan] 2025-10-05 21:52:50,189 - root - INFO - step: 38060 loss: 1.9568 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.44 mfu: 42.31% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7301 +[titan] 2025-10-05 21:52:50,189 - root - INFO - lr: 5.2659e-06 gnorm: 1.26 [23:18:38< 1:11:17] +[titan] 2025-10-05 21:53:01,061 - root - INFO - step: 38065 loss: 1.8871 memory: 118.84GiB(85.28%) tps: 30,142 tflops: 418.17 mfu: 42.28% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6693 +[titan] 2025-10-05 21:53:01,061 - root - INFO - lr: 5.2646e-06 gnorm: 1.22 [23:18:49< 1:11:06] +[titan] 2025-10-05 21:53:12,018 - root - INFO - step: 38070 loss: 1.9101 memory: 118.84GiB(85.28%) tps: 29,908 tflops: 414.92 mfu: 41.95% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6894 +[titan] 2025-10-05 21:53:12,018 - root - INFO - lr: 5.2632e-06 gnorm: 1.21 [23:19:00< 1:10:55] +[titan] 2025-10-05 21:53:22,903 - root - INFO - step: 38075 loss: 1.8578 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2140 global_avg_mtp_loss: 1.6438 +[titan] 2025-10-05 21:53:22,903 - root - INFO - lr: 5.2619e-06 gnorm: 1.21 [23:19:11< 1:10:44] +[titan] 2025-10-05 21:53:33,778 - root - INFO - step: 38080 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2244 global_avg_mtp_loss: 1.7179 +[titan] 2025-10-05 21:53:33,779 - root - INFO - lr: 5.2605e-06 gnorm: 1.25 [23:19:22< 1:10:33] +[titan] 2025-10-05 21:53:44,628 - root - INFO - step: 38085 loss: 1.9527 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7269 +[titan] 2025-10-05 21:53:44,628 - root - INFO - lr: 5.2591e-06 gnorm: 1.23 [23:19:32< 1:10:22] +[titan] 2025-10-05 21:53:55,480 - root - INFO - step: 38090 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.91 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6835 +[titan] 2025-10-05 21:53:55,480 - root - INFO - lr: 5.2578e-06 gnorm: 1.21 [23:19:43< 1:10:11] +[titan] 2025-10-05 21:54:06,381 - root - INFO - step: 38095 loss: 1.9521 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7257 +[titan] 2025-10-05 21:54:06,381 - root - INFO - lr: 5.2565e-06 gnorm: 1.32 [23:19:54< 1:10:00] +[titan] 2025-10-05 21:54:15,086 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:54:17,264 - root - INFO - step: 38100 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6717 +[titan] 2025-10-05 21:54:17,264 - root - INFO - lr: 5.2551e-06 gnorm: 1.21 [23:20:05< 1:09:49] +[titan] 2025-10-05 21:54:28,122 - root - INFO - step: 38105 loss: 1.9313 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 21:54:28,123 - root - INFO - lr: 5.2538e-06 gnorm: 1.25 [23:20:16< 1:09:38] +[titan] 2025-10-05 21:54:38,982 - root - INFO - step: 38110 loss: 1.8796 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6619 +[titan] 2025-10-05 21:54:38,982 - root - INFO - lr: 5.2524e-06 gnorm: 1.30 [23:20:27< 1:09:27] +[titan] 2025-10-05 21:54:49,840 - root - INFO - step: 38115 loss: 1.9389 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7152 +[titan] 2025-10-05 21:54:49,840 - root - INFO - lr: 5.2511e-06 gnorm: 1.27 [23:20:38< 1:09:16] +[titan] 2025-10-05 21:55:00,694 - root - INFO - step: 38120 loss: 1.9211 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6986 +[titan] 2025-10-05 21:55:00,694 - root - INFO - lr: 5.2498e-06 gnorm: 1.22 [23:20:49< 1:09:05] +[titan] 2025-10-05 21:55:11,613 - root - INFO - step: 38125 loss: 1.8922 memory: 118.84GiB(85.28%) tps: 30,011 tflops: 416.35 mfu: 42.10% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 21:55:11,613 - root - INFO - lr: 5.2485e-06 gnorm: 1.22 [23:20:59< 1:08:54] +[titan] 2025-10-05 21:55:22,478 - root - INFO - step: 38130 loss: 1.8761 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6600 +[titan] 2025-10-05 21:55:22,478 - root - INFO - lr: 5.2471e-06 gnorm: 1.23 [23:21:10< 1:08:43] +[titan] 2025-10-05 21:55:33,363 - root - INFO - step: 38135 loss: 1.9875 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.65 mfu: 42.23% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7568 +[titan] 2025-10-05 21:55:33,364 - root - INFO - lr: 5.2458e-06 gnorm: 1.25 [23:21:21< 1:08:32] +[titan] 2025-10-05 21:55:44,229 - root - INFO - step: 38140 loss: 1.9180 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 21:55:44,229 - root - INFO - lr: 5.2445e-06 gnorm: 1.25 [23:21:32< 1:08:20] +[titan] 2025-10-05 21:55:55,104 - root - INFO - step: 38145 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.03 mfu: 42.27% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6732 +[titan] 2025-10-05 21:55:55,104 - root - INFO - lr: 5.2432e-06 gnorm: 1.23 [23:21:43< 1:08:09] +[titan] 2025-10-05 21:56:03,776 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:56:05,959 - root - INFO - step: 38150 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,187 tflops: 418.80 mfu: 42.35% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6637 +[titan] 2025-10-05 21:56:05,960 - root - INFO - lr: 5.2419e-06 gnorm: 1.28 [23:21:54< 1:07:58] +[titan] 2025-10-05 21:56:16,858 - root - INFO - step: 38155 loss: 1.9104 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6893 +[titan] 2025-10-05 21:56:16,858 - root - INFO - lr: 5.2406e-06 gnorm: 1.23 [23:22:05< 1:07:47] +[titan] 2025-10-05 21:56:27,718 - root - INFO - step: 38160 loss: 1.9208 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.61 mfu: 42.33% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6976 +[titan] 2025-10-05 21:56:27,719 - root - INFO - lr: 5.2393e-06 gnorm: 1.25 [23:22:16< 1:07:36] +[titan] 2025-10-05 21:56:38,596 - root - INFO - step: 38165 loss: 1.8754 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6582 +[titan] 2025-10-05 21:56:38,597 - root - INFO - lr: 5.2380e-06 gnorm: 1.20 [23:22:26< 1:07:25] +[titan] 2025-10-05 21:56:49,479 - root - INFO - step: 38170 loss: 1.9310 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.73 mfu: 42.24% global_avg_ntp_loss: 0.2254 global_avg_mtp_loss: 1.7056 +[titan] 2025-10-05 21:56:49,480 - root - INFO - lr: 5.2367e-06 gnorm: 1.22 [23:22:37< 1:07:14] +[titan] 2025-10-05 21:57:00,354 - root - INFO - step: 38175 loss: 1.9048 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:57:00,354 - root - INFO - lr: 5.2354e-06 gnorm: 1.27 [23:22:48< 1:07:03] +[titan] 2025-10-05 21:57:11,246 - root - INFO - step: 38180 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2169 global_avg_mtp_loss: 1.6626 +[titan] 2025-10-05 21:57:11,247 - root - INFO - lr: 5.2341e-06 gnorm: 1.25 [23:22:59< 1:06:52] +[titan] 2025-10-05 21:57:22,096 - root - INFO - step: 38185 loss: 1.9869 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2288 global_avg_mtp_loss: 1.7581 +[titan] 2025-10-05 21:57:22,096 - root - INFO - lr: 5.2328e-06 gnorm: 1.27 [23:23:10< 1:06:41] +[titan] 2025-10-05 21:57:32,943 - root - INFO - step: 38190 loss: 1.9812 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2282 global_avg_mtp_loss: 1.7529 +[titan] 2025-10-05 21:57:32,943 - root - INFO - lr: 5.2316e-06 gnorm: 1.26 [23:23:21< 1:06:30] +[titan] 2025-10-05 21:57:43,812 - root - INFO - step: 38195 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6747 +[titan] 2025-10-05 21:57:43,812 - root - INFO - lr: 5.2303e-06 gnorm: 1.23 [23:23:32< 1:06:19] +[titan] 2025-10-05 21:57:52,472 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:57:54,668 - root - INFO - step: 38200 loss: 1.9598 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7333 +[titan] 2025-10-05 21:57:54,668 - root - INFO - lr: 5.2290e-06 gnorm: 1.24 [23:23:42< 1:06:08] +[titan] 2025-10-05 21:58:05,542 - root - INFO - step: 38205 loss: 1.8481 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 21:58:05,543 - root - INFO - lr: 5.2277e-06 gnorm: 1.26 [23:23:53< 1:05:57] +[titan] 2025-10-05 21:58:16,438 - root - INFO - step: 38210 loss: 1.9769 memory: 118.84GiB(85.28%) tps: 30,076 tflops: 417.26 mfu: 42.19% global_avg_ntp_loss: 0.2292 global_avg_mtp_loss: 1.7477 +[titan] 2025-10-05 21:58:16,438 - root - INFO - lr: 5.2265e-06 gnorm: 1.28 [23:24:04< 1:05:46] +[titan] 2025-10-05 21:58:27,285 - root - INFO - step: 38215 loss: 1.9355 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 21:58:27,285 - root - INFO - lr: 5.2252e-06 gnorm: 1.22 [23:24:15< 1:05:35] +[titan] 2025-10-05 21:58:38,133 - root - INFO - step: 38220 loss: 1.8546 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6405 +[titan] 2025-10-05 21:58:38,133 - root - INFO - lr: 5.2240e-06 gnorm: 1.23 [23:24:26< 1:05:24] +[titan] 2025-10-05 21:58:48,997 - root - INFO - step: 38225 loss: 1.8842 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6664 +[titan] 2025-10-05 21:58:48,997 - root - INFO - lr: 5.2227e-06 gnorm: 1.21 [23:24:37< 1:05:13] +[titan] 2025-10-05 21:58:59,888 - root - INFO - step: 38230 loss: 1.9848 memory: 118.84GiB(85.28%) tps: 30,088 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2294 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 21:58:59,888 - root - INFO - lr: 5.2214e-06 gnorm: 1.24 [23:24:48< 1:05:02] +[titan] 2025-10-05 21:59:10,888 - root - INFO - step: 38235 loss: 1.8777 memory: 118.84GiB(85.28%) tps: 29,791 tflops: 413.31 mfu: 41.79% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 21:59:10,888 - root - INFO - lr: 5.2202e-06 gnorm: 1.21 [23:24:59< 1:04:51] +[titan] 2025-10-05 21:59:21,732 - root - INFO - step: 38240 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6791 +[titan] 2025-10-05 21:59:21,732 - root - INFO - lr: 5.2190e-06 gnorm: 1.22 [23:25:10< 1:04:40] +[titan] 2025-10-05 21:59:32,592 - root - INFO - step: 38245 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 21:59:32,592 - root - INFO - lr: 5.2177e-06 gnorm: 1.26 [23:25:20< 1:04:29] +[titan] 2025-10-05 21:59:41,259 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 21:59:43,442 - root - INFO - step: 38250 loss: 1.8699 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2166 global_avg_mtp_loss: 1.6533 +[titan] 2025-10-05 21:59:43,442 - root - INFO - lr: 5.2165e-06 gnorm: 1.23 [23:25:31< 1:04:18] +[titan] 2025-10-05 21:59:54,302 - root - INFO - step: 38255 loss: 1.9046 memory: 118.84GiB(85.28%) tps: 30,173 tflops: 418.60 mfu: 42.33% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6844 +[titan] 2025-10-05 21:59:54,303 - root - INFO - lr: 5.2152e-06 gnorm: 1.28 [23:25:42< 1:04:07] +[titan] 2025-10-05 22:00:05,203 - root - INFO - step: 38260 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,062 tflops: 417.06 mfu: 42.17% global_avg_ntp_loss: 0.2305 global_avg_mtp_loss: 1.7521 +[titan] 2025-10-05 22:00:05,203 - root - INFO - lr: 5.2140e-06 gnorm: 1.26 [23:25:53< 1:03:56] +[titan] 2025-10-05 22:00:16,075 - root - INFO - step: 38265 loss: 1.8744 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6580 +[titan] 2025-10-05 22:00:16,076 - root - INFO - lr: 5.2128e-06 gnorm: 1.25 [23:26:04< 1:03:45] +[titan] 2025-10-05 22:00:26,953 - root - INFO - step: 38270 loss: 1.9090 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:00:26,953 - root - INFO - lr: 5.2116e-06 gnorm: 1.26 [23:26:15< 1:03:34] +[titan] 2025-10-05 22:00:37,823 - root - INFO - step: 38275 loss: 1.9112 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6897 +[titan] 2025-10-05 22:00:37,823 - root - INFO - lr: 5.2103e-06 gnorm: 1.27 [23:26:26< 1:03:23] +[titan] 2025-10-05 22:00:48,688 - root - INFO - step: 38280 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6959 +[titan] 2025-10-05 22:00:48,688 - root - INFO - lr: 5.2091e-06 gnorm: 1.25 [23:26:36< 1:03:12] +[titan] 2025-10-05 22:00:59,558 - root - INFO - step: 38285 loss: 1.9620 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7344 +[titan] 2025-10-05 22:00:59,559 - root - INFO - lr: 5.2079e-06 gnorm: 1.24 [23:26:47< 1:03:01] +[titan] 2025-10-05 22:01:10,423 - root - INFO - step: 38290 loss: 1.9216 memory: 118.84GiB(85.28%) tps: 30,162 tflops: 418.45 mfu: 42.31% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6990 +[titan] 2025-10-05 22:01:10,423 - root - INFO - lr: 5.2067e-06 gnorm: 1.23 [23:26:58< 1:02:50] +[titan] 2025-10-05 22:01:21,364 - root - INFO - step: 38295 loss: 1.9718 memory: 118.84GiB(85.28%) tps: 29,951 tflops: 415.52 mfu: 42.01% global_avg_ntp_loss: 0.2290 global_avg_mtp_loss: 1.7428 +[titan] 2025-10-05 22:01:21,364 - root - INFO - lr: 5.2055e-06 gnorm: 1.27 [23:27:09< 1:02:39] +[titan] 2025-10-05 22:01:30,048 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:01:32,234 - root - INFO - step: 38300 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.25 mfu: 42.29% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7052 +[titan] 2025-10-05 22:01:32,234 - root - INFO - lr: 5.2043e-06 gnorm: 1.22 [23:27:20< 1:02:28] +[titan] 2025-10-05 22:01:43,099 - root - INFO - step: 38305 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,161 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6868 +[titan] 2025-10-05 22:01:43,099 - root - INFO - lr: 5.2031e-06 gnorm: 1.24 [23:27:31< 1:02:16] +[titan] 2025-10-05 22:01:53,973 - root - INFO - step: 38310 loss: 1.9433 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7183 +[titan] 2025-10-05 22:01:53,973 - root - INFO - lr: 5.2019e-06 gnorm: 1.25 [23:27:42< 1:02:05] +[titan] 2025-10-05 22:02:04,844 - root - INFO - step: 38315 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6674 +[titan] 2025-10-05 22:02:04,845 - root - INFO - lr: 5.2007e-06 gnorm: 1.28 [23:27:53< 1:01:54] +[titan] 2025-10-05 22:02:15,752 - root - INFO - step: 38320 loss: 1.9010 memory: 118.84GiB(85.28%) tps: 30,042 tflops: 416.79 mfu: 42.14% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6811 +[titan] 2025-10-05 22:02:15,752 - root - INFO - lr: 5.1995e-06 gnorm: 1.24 [23:28:04< 1:01:43] +[titan] 2025-10-05 22:02:26,644 - root - INFO - step: 38325 loss: 1.8521 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.38 mfu: 42.20% global_avg_ntp_loss: 0.2138 global_avg_mtp_loss: 1.6383 +[titan] 2025-10-05 22:02:26,645 - root - INFO - lr: 5.1983e-06 gnorm: 1.27 [23:28:14< 1:01:32] +[titan] 2025-10-05 22:02:37,525 - root - INFO - step: 38330 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:02:37,525 - root - INFO - lr: 5.1972e-06 gnorm: 1.27 [23:28:25< 1:01:21] +[titan] 2025-10-05 22:02:48,403 - root - INFO - step: 38335 loss: 1.8947 memory: 118.84GiB(85.28%) tps: 30,125 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6753 +[titan] 2025-10-05 22:02:48,403 - root - INFO - lr: 5.1960e-06 gnorm: 1.31 [23:28:36< 1:01:10] +[titan] 2025-10-05 22:02:59,271 - root - INFO - step: 38340 loss: 1.8646 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6487 +[titan] 2025-10-05 22:02:59,271 - root - INFO - lr: 5.1948e-06 gnorm: 1.22 [23:28:47< 1:00:59] +[titan] 2025-10-05 22:03:10,127 - root - INFO - step: 38345 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7020 +[titan] 2025-10-05 22:03:10,127 - root - INFO - lr: 5.1936e-06 gnorm: 1.28 [23:28:58< 1:00:48] +[titan] 2025-10-05 22:03:18,835 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:03:21,025 - root - INFO - step: 38350 loss: 1.8758 memory: 118.84GiB(85.28%) tps: 30,070 tflops: 417.17 mfu: 42.18% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6588 +[titan] 2025-10-05 22:03:21,025 - root - INFO - lr: 5.1925e-06 gnorm: 1.22 [23:29:09< 1:00:37] +[titan] 2025-10-05 22:03:31,925 - root - INFO - step: 38355 loss: 1.9087 memory: 118.84GiB(85.28%) tps: 30,063 tflops: 417.08 mfu: 42.17% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6874 +[titan] 2025-10-05 22:03:31,925 - root - INFO - lr: 5.1913e-06 gnorm: 1.20 [23:29:20< 1:00:26] +[titan] 2025-10-05 22:03:42,780 - root - INFO - step: 38360 loss: 1.9124 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6905 +[titan] 2025-10-05 22:03:42,780 - root - INFO - lr: 5.1902e-06 gnorm: 1.24 [23:29:31< 1:00:15] +[titan] 2025-10-05 22:03:53,638 - root - INFO - step: 38365 loss: 1.9291 memory: 118.84GiB(85.28%) tps: 30,181 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7022 +[titan] 2025-10-05 22:03:53,638 - root - INFO - lr: 5.1890e-06 gnorm: 1.25 [23:29:41< 1:00:04] +[titan] 2025-10-05 22:04:04,503 - root - INFO - step: 38370 loss: 1.9142 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6922 +[titan] 2025-10-05 22:04:04,503 - root - INFO - lr: 5.1878e-06 gnorm: 1.23 [23:29:52< 0:59:53] +[titan] 2025-10-05 22:04:15,408 - root - INFO - step: 38375 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,048 tflops: 416.86 mfu: 42.15% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6903 +[titan] 2025-10-05 22:04:15,409 - root - INFO - lr: 5.1867e-06 gnorm: 1.24 [23:30:03< 0:59:42] +[titan] 2025-10-05 22:04:26,282 - root - INFO - step: 38380 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,136 tflops: 418.10 mfu: 42.27% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6785 +[titan] 2025-10-05 22:04:26,282 - root - INFO - lr: 5.1856e-06 gnorm: 1.23 [23:30:14< 0:59:31] +[titan] 2025-10-05 22:04:37,152 - root - INFO - step: 38385 loss: 1.9039 memory: 118.84GiB(85.28%) tps: 30,145 tflops: 418.22 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6840 +[titan] 2025-10-05 22:04:37,153 - root - INFO - lr: 5.1844e-06 gnorm: 1.26 [23:30:25< 0:59:20] +[titan] 2025-10-05 22:04:48,030 - root - INFO - step: 38390 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2214 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:04:48,030 - root - INFO - lr: 5.1833e-06 gnorm: 1.26 [23:30:36< 0:59:09] +[titan] 2025-10-05 22:04:58,887 - root - INFO - step: 38395 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:04:58,887 - root - INFO - lr: 5.1821e-06 gnorm: 1.24 [23:30:47< 0:58:58] +[titan] 2025-10-05 22:05:07,643 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:05:09,829 - root - INFO - step: 38400 loss: 1.9491 memory: 118.84GiB(85.28%) tps: 29,947 tflops: 415.47 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7228 +[titan] 2025-10-05 22:05:09,829 - root - INFO - lr: 5.1810e-06 gnorm: 1.25 [23:30:58< 0:58:47] +[titan] 2025-10-05 22:05:10,023 - root - INFO - Dumping profiler traces at step 38400 +[titan] 2025-10-05 22:05:10,065 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:05:20,979 - root - INFO - step: 38405 loss: 1.9210 memory: 118.84GiB(85.28%) tps: 29,389 tflops: 407.73 mfu: 41.23% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.6978 +[titan] 2025-10-05 22:05:20,979 - root - INFO - lr: 5.1799e-06 gnorm: 1.21 [23:31:09< 0:58:36] +[titan] 2025-10-05 22:05:31,845 - root - INFO - step: 38410 loss: 1.8893 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6711 +[titan] 2025-10-05 22:05:31,845 - root - INFO - lr: 5.1788e-06 gnorm: 1.25 [23:31:20< 0:58:25] +[titan] 2025-10-05 22:05:42,706 - root - INFO - step: 38415 loss: 1.9021 memory: 118.84GiB(85.28%) tps: 30,172 tflops: 418.59 mfu: 42.32% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6821 +[titan] 2025-10-05 22:05:42,706 - root - INFO - lr: 5.1776e-06 gnorm: 1.25 [23:31:30< 0:58:14] +[titan] 2025-10-05 22:05:53,597 - root - INFO - step: 38420 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.41 mfu: 42.20% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6853 +[titan] 2025-10-05 22:05:53,598 - root - INFO - lr: 5.1765e-06 gnorm: 1.24 [23:31:41< 0:58:03] +[titan] 2025-10-05 22:06:04,473 - root - INFO - step: 38425 loss: 1.8931 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6739 +[titan] 2025-10-05 22:06:04,473 - root - INFO - lr: 5.1754e-06 gnorm: 1.24 [23:31:52< 0:57:52] +[titan] 2025-10-05 22:06:15,341 - root - INFO - step: 38430 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,150 tflops: 418.28 mfu: 42.29% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:06:15,342 - root - INFO - lr: 5.1743e-06 gnorm: 1.28 [23:32:03< 0:57:41] +[titan] 2025-10-05 22:06:26,263 - root - INFO - step: 38435 loss: 1.8790 memory: 118.84GiB(85.28%) tps: 30,004 tflops: 416.26 mfu: 42.09% global_avg_ntp_loss: 0.2183 global_avg_mtp_loss: 1.6607 +[titan] 2025-10-05 22:06:26,263 - root - INFO - lr: 5.1732e-06 gnorm: 1.21 [23:32:14< 0:57:30] +[titan] 2025-10-05 22:06:37,131 - root - INFO - step: 38440 loss: 1.9451 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7207 +[titan] 2025-10-05 22:06:37,132 - root - INFO - lr: 5.1721e-06 gnorm: 1.24 [23:32:25< 0:57:19] +[titan] 2025-10-05 22:06:48,006 - root - INFO - step: 38445 loss: 1.8961 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.07 mfu: 42.27% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6765 +[titan] 2025-10-05 22:06:48,006 - root - INFO - lr: 5.1710e-06 gnorm: 1.26 [23:32:36< 0:57:08] +[titan] 2025-10-05 22:06:56,703 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:06:58,881 - root - INFO - step: 38450 loss: 1.9214 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:06:58,881 - root - INFO - lr: 5.1699e-06 gnorm: 1.25 [23:32:47< 0:56:57] +[titan] 2025-10-05 22:07:09,781 - root - INFO - step: 38455 loss: 1.9440 memory: 118.84GiB(85.28%) tps: 30,064 tflops: 417.09 mfu: 42.17% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:07:09,781 - root - INFO - lr: 5.1688e-06 gnorm: 1.24 [23:32:58< 0:56:46] +[titan] 2025-10-05 22:07:20,663 - root - INFO - step: 38460 loss: 1.8888 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.77 mfu: 42.24% global_avg_ntp_loss: 0.2184 global_avg_mtp_loss: 1.6704 +[titan] 2025-10-05 22:07:20,663 - root - INFO - lr: 5.1677e-06 gnorm: 1.25 [23:33:08< 0:56:35] +[titan] 2025-10-05 22:07:31,515 - root - INFO - step: 38465 loss: 1.9144 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2211 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:07:31,515 - root - INFO - lr: 5.1666e-06 gnorm: 1.27 [23:33:19< 0:56:24] +[titan] 2025-10-05 22:07:42,351 - root - INFO - step: 38470 loss: 1.8510 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.53 mfu: 42.42% global_avg_ntp_loss: 0.2144 global_avg_mtp_loss: 1.6366 +[titan] 2025-10-05 22:07:42,351 - root - INFO - lr: 5.1655e-06 gnorm: 1.22 [23:33:30< 0:56:13] +[titan] 2025-10-05 22:07:53,204 - root - INFO - step: 38475 loss: 1.9409 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.88 mfu: 42.35% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7151 +[titan] 2025-10-05 22:07:53,204 - root - INFO - lr: 5.1645e-06 gnorm: 1.27 [23:33:41< 0:56:01] +[titan] 2025-10-05 22:08:04,067 - root - INFO - step: 38480 loss: 1.9386 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7131 +[titan] 2025-10-05 22:08:04,067 - root - INFO - lr: 5.1634e-06 gnorm: 1.21 [23:33:52< 0:55:50] +[titan] 2025-10-05 22:08:14,965 - root - INFO - step: 38485 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,069 tflops: 417.16 mfu: 42.18% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:08:14,965 - root - INFO - lr: 5.1623e-06 gnorm: 1.27 [23:34:03< 0:55:39] +[titan] 2025-10-05 22:08:25,908 - root - INFO - step: 38490 loss: 1.9581 memory: 118.84GiB(85.28%) tps: 29,945 tflops: 415.43 mfu: 42.01% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:08:25,908 - root - INFO - lr: 5.1612e-06 gnorm: 1.26 [23:34:14< 0:55:28] +[titan] 2025-10-05 22:08:36,784 - root - INFO - step: 38495 loss: 1.8877 memory: 118.84GiB(85.28%) tps: 30,131 tflops: 418.02 mfu: 42.27% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6699 +[titan] 2025-10-05 22:08:36,784 - root - INFO - lr: 5.1602e-06 gnorm: 1.32 [23:34:25< 0:55:17] +[titan] 2025-10-05 22:08:45,475 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:08:47,650 - root - INFO - step: 38500 loss: 1.9821 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7522 +[titan] 2025-10-05 22:08:47,650 - root - INFO - lr: 5.1591e-06 gnorm: 1.33 [23:34:35< 0:55:06] +[titan] 2025-10-05 22:08:58,527 - root - INFO - step: 38505 loss: 2.0068 memory: 118.84GiB(85.28%) tps: 30,128 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2320 global_avg_mtp_loss: 1.7748 +[titan] 2025-10-05 22:08:58,527 - root - INFO - lr: 5.1581e-06 gnorm: 1.28 [23:34:46< 0:54:55] +[titan] 2025-10-05 22:09:09,391 - root - INFO - step: 38510 loss: 1.9323 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.46 mfu: 42.31% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:09:09,391 - root - INFO - lr: 5.1570e-06 gnorm: 1.28 [23:34:57< 0:54:44] +[titan] 2025-10-05 22:09:20,365 - root - INFO - step: 38515 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 29,861 tflops: 414.28 mfu: 41.89% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:09:20,365 - root - INFO - lr: 5.1560e-06 gnorm: 1.27 [23:35:08< 0:54:33] +[titan] 2025-10-05 22:09:31,218 - root - INFO - step: 38520 loss: 1.9315 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:09:31,219 - root - INFO - lr: 5.1549e-06 gnorm: 1.25 [23:35:19< 0:54:22] +[titan] 2025-10-05 22:09:42,070 - root - INFO - step: 38525 loss: 1.9049 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.93 mfu: 42.36% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6845 +[titan] 2025-10-05 22:09:42,071 - root - INFO - lr: 5.1539e-06 gnorm: 1.23 [23:35:30< 0:54:11] +[titan] 2025-10-05 22:09:52,922 - root - INFO - step: 38530 loss: 1.9541 memory: 118.84GiB(85.28%) tps: 30,197 tflops: 418.94 mfu: 42.36% global_avg_ntp_loss: 0.2265 global_avg_mtp_loss: 1.7276 +[titan] 2025-10-05 22:09:52,922 - root - INFO - lr: 5.1528e-06 gnorm: 1.26 [23:35:41< 0:54:00] +[titan] 2025-10-05 22:10:03,769 - root - INFO - step: 38535 loss: 1.9228 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:10:03,769 - root - INFO - lr: 5.1518e-06 gnorm: 1.25 [23:35:52< 0:53:49] +[titan] 2025-10-05 22:10:14,645 - root - INFO - step: 38540 loss: 1.9149 memory: 118.84GiB(85.28%) tps: 30,130 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6940 +[titan] 2025-10-05 22:10:14,645 - root - INFO - lr: 5.1508e-06 gnorm: 1.24 [23:36:02< 0:53:38] +[titan] 2025-10-05 22:10:25,531 - root - INFO - step: 38545 loss: 1.8971 memory: 118.84GiB(85.28%) tps: 30,102 tflops: 417.62 mfu: 42.23% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6771 +[titan] 2025-10-05 22:10:25,532 - root - INFO - lr: 5.1497e-06 gnorm: 1.21 [23:36:13< 0:53:27] +[titan] 2025-10-05 22:10:34,232 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:10:36,411 - root - INFO - step: 38550 loss: 1.9371 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2235 global_avg_mtp_loss: 1.7136 +[titan] 2025-10-05 22:10:36,411 - root - INFO - lr: 5.1487e-06 gnorm: 1.26 [23:36:24< 0:53:16] +[titan] 2025-10-05 22:10:47,265 - root - INFO - step: 38555 loss: 1.9055 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6859 +[titan] 2025-10-05 22:10:47,265 - root - INFO - lr: 5.1477e-06 gnorm: 1.25 [23:36:35< 0:53:05] +[titan] 2025-10-05 22:10:58,113 - root - INFO - step: 38560 loss: 1.8963 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6763 +[titan] 2025-10-05 22:10:58,113 - root - INFO - lr: 5.1467e-06 gnorm: 1.28 [23:36:46< 0:52:54] +[titan] 2025-10-05 22:11:08,954 - root - INFO - step: 38565 loss: 1.9417 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:11:08,954 - root - INFO - lr: 5.1456e-06 gnorm: 1.26 [23:36:57< 0:52:43] +[titan] 2025-10-05 22:11:19,804 - root - INFO - step: 38570 loss: 1.9003 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6809 +[titan] 2025-10-05 22:11:19,805 - root - INFO - lr: 5.1446e-06 gnorm: 1.22 [23:37:08< 0:52:32] +[titan] 2025-10-05 22:11:30,699 - root - INFO - step: 38575 loss: 1.8708 memory: 118.84GiB(85.28%) tps: 30,079 tflops: 417.29 mfu: 42.19% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6535 +[titan] 2025-10-05 22:11:30,699 - root - INFO - lr: 5.1436e-06 gnorm: 1.25 [23:37:18< 0:52:21] +[titan] 2025-10-05 22:11:41,605 - root - INFO - step: 38580 loss: 1.9498 memory: 118.84GiB(85.28%) tps: 30,046 tflops: 416.85 mfu: 42.15% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7240 +[titan] 2025-10-05 22:11:41,605 - root - INFO - lr: 5.1426e-06 gnorm: 1.26 [23:37:29< 0:52:10] +[titan] 2025-10-05 22:11:52,476 - root - INFO - step: 38585 loss: 1.8659 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.21 mfu: 42.29% global_avg_ntp_loss: 0.2149 global_avg_mtp_loss: 1.6510 +[titan] 2025-10-05 22:11:52,476 - root - INFO - lr: 5.1416e-06 gnorm: 1.27 [23:37:40< 0:51:59] +[titan] 2025-10-05 22:12:03,366 - root - INFO - step: 38590 loss: 1.8820 memory: 118.84GiB(85.28%) tps: 30,091 tflops: 417.46 mfu: 42.21% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6644 +[titan] 2025-10-05 22:12:03,366 - root - INFO - lr: 5.1406e-06 gnorm: 1.30 [23:37:51< 0:51:48] +[titan] 2025-10-05 22:12:14,240 - root - INFO - step: 38595 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,134 tflops: 418.06 mfu: 42.27% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6854 +[titan] 2025-10-05 22:12:14,241 - root - INFO - lr: 5.1396e-06 gnorm: 1.20 [23:38:02< 0:51:37] +[titan] 2025-10-05 22:12:22,975 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:12:25,159 - root - INFO - step: 38600 loss: 1.8441 memory: 118.84GiB(85.28%) tps: 30,012 tflops: 416.37 mfu: 42.10% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6314 +[titan] 2025-10-05 22:12:25,159 - root - INFO - lr: 5.1386e-06 gnorm: 1.21 [23:38:13< 0:51:26] +[titan] 2025-10-05 22:12:36,019 - root - INFO - step: 38605 loss: 1.9263 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7032 +[titan] 2025-10-05 22:12:36,019 - root - INFO - lr: 5.1376e-06 gnorm: 1.24 [23:38:24< 0:51:15] +[titan] 2025-10-05 22:12:46,891 - root - INFO - step: 38610 loss: 1.8742 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:12:46,891 - root - INFO - lr: 5.1367e-06 gnorm: 1.19 [23:38:35< 0:51:04] +[titan] 2025-10-05 22:12:57,808 - root - INFO - step: 38615 loss: 1.9826 memory: 118.84GiB(85.28%) tps: 30,017 tflops: 416.43 mfu: 42.11% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7533 +[titan] 2025-10-05 22:12:57,808 - root - INFO - lr: 5.1357e-06 gnorm: 1.29 [23:38:46< 0:50:53] +[titan] 2025-10-05 22:13:08,674 - root - INFO - step: 38620 loss: 1.9403 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.39 mfu: 42.30% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7147 +[titan] 2025-10-05 22:13:08,674 - root - INFO - lr: 5.1347e-06 gnorm: 1.29 [23:38:56< 0:50:42] +[titan] 2025-10-05 22:13:19,536 - root - INFO - step: 38625 loss: 1.9887 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2309 global_avg_mtp_loss: 1.7577 +[titan] 2025-10-05 22:13:19,537 - root - INFO - lr: 5.1337e-06 gnorm: 1.26 [23:39:07< 0:50:31] +[titan] 2025-10-05 22:13:30,453 - root - INFO - step: 38630 loss: 1.8878 memory: 118.84GiB(85.28%) tps: 30,019 tflops: 416.47 mfu: 42.11% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6688 +[titan] 2025-10-05 22:13:30,453 - root - INFO - lr: 5.1328e-06 gnorm: 1.24 [23:39:18< 0:50:20] +[titan] 2025-10-05 22:13:41,303 - root - INFO - step: 38635 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7040 +[titan] 2025-10-05 22:13:41,303 - root - INFO - lr: 5.1318e-06 gnorm: 1.27 [23:39:29< 0:50:09] +[titan] 2025-10-05 22:13:52,138 - root - INFO - step: 38640 loss: 1.8970 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6777 +[titan] 2025-10-05 22:13:52,139 - root - INFO - lr: 5.1308e-06 gnorm: 1.25 [23:39:40< 0:49:58] +[titan] 2025-10-05 22:14:03,026 - root - INFO - step: 38645 loss: 1.8958 memory: 118.84GiB(85.28%) tps: 30,098 tflops: 417.57 mfu: 42.22% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6769 +[titan] 2025-10-05 22:14:03,026 - root - INFO - lr: 5.1299e-06 gnorm: 1.24 [23:39:51< 0:49:47] +[titan] 2025-10-05 22:14:11,664 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:14:13,845 - root - INFO - step: 38650 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6629 +[titan] 2025-10-05 22:14:13,845 - root - INFO - lr: 5.1289e-06 gnorm: 1.27 [23:40:02< 0:49:36] +[titan] 2025-10-05 22:14:24,687 - root - INFO - step: 38655 loss: 1.9587 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7329 +[titan] 2025-10-05 22:14:24,687 - root - INFO - lr: 5.1280e-06 gnorm: 1.32 [23:40:12< 0:49:24] +[titan] 2025-10-05 22:14:35,527 - root - INFO - step: 38660 loss: 1.9177 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.40 mfu: 42.41% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6957 +[titan] 2025-10-05 22:14:35,527 - root - INFO - lr: 5.1270e-06 gnorm: 1.28 [23:40:23< 0:49:13] +[titan] 2025-10-05 22:14:46,388 - root - INFO - step: 38665 loss: 1.9847 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7540 +[titan] 2025-10-05 22:14:46,388 - root - INFO - lr: 5.1261e-06 gnorm: 1.24 [23:40:34< 0:49:02] +[titan] 2025-10-05 22:14:57,230 - root - INFO - step: 38670 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6892 +[titan] 2025-10-05 22:14:57,231 - root - INFO - lr: 5.1251e-06 gnorm: 1.25 [23:40:45< 0:48:51] +[titan] 2025-10-05 22:15:08,076 - root - INFO - step: 38675 loss: 1.9234 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2243 global_avg_mtp_loss: 1.6991 +[titan] 2025-10-05 22:15:08,077 - root - INFO - lr: 5.1242e-06 gnorm: 1.23 [23:40:56< 0:48:40] +[titan] 2025-10-05 22:15:18,905 - root - INFO - step: 38680 loss: 1.9564 memory: 118.84GiB(85.28%) tps: 30,261 tflops: 419.83 mfu: 42.45% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7307 +[titan] 2025-10-05 22:15:18,905 - root - INFO - lr: 5.1233e-06 gnorm: 1.24 [23:41:07< 0:48:29] +[titan] 2025-10-05 22:15:29,770 - root - INFO - step: 38685 loss: 1.8560 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6416 +[titan] 2025-10-05 22:15:29,770 - root - INFO - lr: 5.1223e-06 gnorm: 1.26 [23:41:18< 0:48:18] +[titan] 2025-10-05 22:15:40,605 - root - INFO - step: 38690 loss: 1.9384 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7138 +[titan] 2025-10-05 22:15:40,606 - root - INFO - lr: 5.1214e-06 gnorm: 1.26 [23:41:28< 0:48:07] +[titan] 2025-10-05 22:15:51,445 - root - INFO - step: 38695 loss: 1.9237 memory: 118.84GiB(85.28%) tps: 30,231 tflops: 419.41 mfu: 42.41% global_avg_ntp_loss: 0.2229 global_avg_mtp_loss: 1.7008 +[titan] 2025-10-05 22:15:51,445 - root - INFO - lr: 5.1205e-06 gnorm: 1.26 [23:41:39< 0:47:56] +[titan] 2025-10-05 22:16:00,115 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:16:02,303 - root - INFO - step: 38700 loss: 1.8806 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6625 +[titan] 2025-10-05 22:16:02,303 - root - INFO - lr: 5.1195e-06 gnorm: 1.22 [23:41:50< 0:47:45] +[titan] 2025-10-05 22:16:13,157 - root - INFO - step: 38705 loss: 1.8876 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6698 +[titan] 2025-10-05 22:16:13,157 - root - INFO - lr: 5.1186e-06 gnorm: 1.24 [23:42:01< 0:47:34] +[titan] 2025-10-05 22:16:24,067 - root - INFO - step: 38710 loss: 1.9089 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.71 mfu: 42.13% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6871 +[titan] 2025-10-05 22:16:24,067 - root - INFO - lr: 5.1177e-06 gnorm: 1.23 [23:42:12< 0:47:23] +[titan] 2025-10-05 22:16:34,977 - root - INFO - step: 38715 loss: 1.9159 memory: 118.84GiB(85.28%) tps: 30,036 tflops: 416.70 mfu: 42.13% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:16:34,977 - root - INFO - lr: 5.1168e-06 gnorm: 1.27 [23:42:23< 0:47:12] +[titan] 2025-10-05 22:16:45,845 - root - INFO - step: 38720 loss: 1.9462 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2239 global_avg_mtp_loss: 1.7224 +[titan] 2025-10-05 22:16:45,845 - root - INFO - lr: 5.1159e-06 gnorm: 1.26 [23:42:34< 0:47:01] +[titan] 2025-10-05 22:16:56,703 - root - INFO - step: 38725 loss: 1.8703 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.70 mfu: 42.34% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6532 +[titan] 2025-10-05 22:16:56,703 - root - INFO - lr: 5.1150e-06 gnorm: 1.22 [23:42:44< 0:46:50] +[titan] 2025-10-05 22:17:07,552 - root - INFO - step: 38730 loss: 1.8919 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.03 mfu: 42.37% global_avg_ntp_loss: 0.2188 global_avg_mtp_loss: 1.6731 +[titan] 2025-10-05 22:17:07,553 - root - INFO - lr: 5.1141e-06 gnorm: 1.23 [23:42:55< 0:46:39] +[titan] 2025-10-05 22:17:18,393 - root - INFO - step: 38735 loss: 1.9710 memory: 118.84GiB(85.28%) tps: 30,227 tflops: 419.35 mfu: 42.40% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7425 +[titan] 2025-10-05 22:17:18,393 - root - INFO - lr: 5.1132e-06 gnorm: 1.32 [23:43:06< 0:46:28] +[titan] 2025-10-05 22:17:29,314 - root - INFO - step: 38740 loss: 1.8794 memory: 118.84GiB(85.28%) tps: 30,007 tflops: 416.30 mfu: 42.09% global_avg_ntp_loss: 0.2171 global_avg_mtp_loss: 1.6623 +[titan] 2025-10-05 22:17:29,314 - root - INFO - lr: 5.1123e-06 gnorm: 1.24 [23:43:17< 0:46:17] +[titan] 2025-10-05 22:17:40,164 - root - INFO - step: 38745 loss: 1.8962 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.37% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6772 +[titan] 2025-10-05 22:17:40,164 - root - INFO - lr: 5.1114e-06 gnorm: 1.27 [23:43:28< 0:46:06] +[titan] 2025-10-05 22:17:48,833 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:17:51,020 - root - INFO - step: 38750 loss: 1.8652 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.78 mfu: 42.34% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6492 +[titan] 2025-10-05 22:17:51,020 - root - INFO - lr: 5.1105e-06 gnorm: 1.30 [23:43:39< 0:45:55] +[titan] 2025-10-05 22:18:01,867 - root - INFO - step: 38755 loss: 1.8715 memory: 118.84GiB(85.28%) tps: 30,210 tflops: 419.12 mfu: 42.38% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6553 +[titan] 2025-10-05 22:18:01,867 - root - INFO - lr: 5.1097e-06 gnorm: 1.24 [23:43:50< 0:45:44] +[titan] 2025-10-05 22:18:12,725 - root - INFO - step: 38760 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,180 tflops: 418.71 mfu: 42.34% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6833 +[titan] 2025-10-05 22:18:12,725 - root - INFO - lr: 5.1088e-06 gnorm: 1.25 [23:44:00< 0:45:33] +[titan] 2025-10-05 22:18:23,576 - root - INFO - step: 38765 loss: 1.9134 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6914 +[titan] 2025-10-05 22:18:23,576 - root - INFO - lr: 5.1079e-06 gnorm: 1.24 [23:44:11< 0:45:22] +[titan] 2025-10-05 22:18:34,466 - root - INFO - step: 38770 loss: 1.9247 memory: 118.84GiB(85.28%) tps: 30,092 tflops: 417.48 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7025 +[titan] 2025-10-05 22:18:34,466 - root - INFO - lr: 5.1070e-06 gnorm: 1.21 [23:44:22< 0:45:11] +[titan] 2025-10-05 22:18:45,359 - root - INFO - step: 38775 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,082 tflops: 417.34 mfu: 42.20% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6931 +[titan] 2025-10-05 22:18:45,359 - root - INFO - lr: 5.1062e-06 gnorm: 1.22 [23:44:33< 0:45:00] +[titan] 2025-10-05 22:18:56,225 - root - INFO - step: 38780 loss: 1.9150 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6923 +[titan] 2025-10-05 22:18:56,225 - root - INFO - lr: 5.1053e-06 gnorm: 1.23 [23:44:44< 0:44:49] +[titan] 2025-10-05 22:19:07,063 - root - INFO - step: 38785 loss: 1.8911 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6725 +[titan] 2025-10-05 22:19:07,063 - root - INFO - lr: 5.1044e-06 gnorm: 1.28 [23:44:55< 0:44:38] +[titan] 2025-10-05 22:19:17,908 - root - INFO - step: 38790 loss: 1.9510 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7262 +[titan] 2025-10-05 22:19:17,908 - root - INFO - lr: 5.1036e-06 gnorm: 1.27 [23:45:06< 0:44:27] +[titan] 2025-10-05 22:19:28,765 - root - INFO - step: 38795 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:19:28,765 - root - INFO - lr: 5.1027e-06 gnorm: 1.25 [23:45:17< 0:44:16] +[titan] 2025-10-05 22:19:37,452 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:19:39,646 - root - INFO - step: 38800 loss: 1.9199 memory: 118.84GiB(85.28%) tps: 30,116 tflops: 417.81 mfu: 42.25% global_avg_ntp_loss: 0.2225 global_avg_mtp_loss: 1.6974 +[titan] 2025-10-05 22:19:39,646 - root - INFO - lr: 5.1019e-06 gnorm: 1.22 [23:45:27< 0:44:05] +[titan] 2025-10-05 22:19:50,541 - root - INFO - step: 38805 loss: 1.9300 memory: 118.84GiB(85.28%) tps: 30,077 tflops: 417.27 mfu: 42.19% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7072 +[titan] 2025-10-05 22:19:50,542 - root - INFO - lr: 5.1010e-06 gnorm: 1.25 [23:45:38< 0:43:54] +[titan] 2025-10-05 22:20:01,404 - root - INFO - step: 38810 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.50 mfu: 42.32% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6756 +[titan] 2025-10-05 22:20:01,405 - root - INFO - lr: 5.1002e-06 gnorm: 1.25 [23:45:49< 0:43:43] +[titan] 2025-10-05 22:20:12,258 - root - INFO - step: 38815 loss: 1.9302 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2242 global_avg_mtp_loss: 1.7061 +[titan] 2025-10-05 22:20:12,258 - root - INFO - lr: 5.0993e-06 gnorm: 1.33 [23:46:00< 0:43:32] +[titan] 2025-10-05 22:20:23,109 - root - INFO - step: 38820 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:20:23,109 - root - INFO - lr: 5.0985e-06 gnorm: 1.25 [23:46:11< 0:43:21] +[titan] 2025-10-05 22:20:33,977 - root - INFO - step: 38825 loss: 1.8647 memory: 118.84GiB(85.28%) tps: 30,153 tflops: 418.32 mfu: 42.30% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6488 +[titan] 2025-10-05 22:20:33,977 - root - INFO - lr: 5.0977e-06 gnorm: 1.28 [23:46:22< 0:43:10] +[titan] 2025-10-05 22:20:44,821 - root - INFO - step: 38830 loss: 1.8861 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2178 global_avg_mtp_loss: 1.6682 +[titan] 2025-10-05 22:20:44,822 - root - INFO - lr: 5.0969e-06 gnorm: 1.27 [23:46:33< 0:42:59] +[titan] 2025-10-05 22:20:55,718 - root - INFO - step: 38835 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,073 tflops: 417.21 mfu: 42.19% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7164 +[titan] 2025-10-05 22:20:55,718 - root - INFO - lr: 5.0960e-06 gnorm: 1.26 [23:46:43< 0:42:48] +[titan] 2025-10-05 22:21:06,566 - root - INFO - step: 38840 loss: 1.9277 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7046 +[titan] 2025-10-05 22:21:06,566 - root - INFO - lr: 5.0952e-06 gnorm: 1.27 [23:46:54< 0:42:36] +[titan] 2025-10-05 22:21:17,446 - root - INFO - step: 38845 loss: 1.8528 memory: 118.84GiB(85.28%) tps: 30,117 tflops: 417.83 mfu: 42.25% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6385 +[titan] 2025-10-05 22:21:17,446 - root - INFO - lr: 5.0944e-06 gnorm: 1.24 [23:47:05< 0:42:25] +[titan] 2025-10-05 22:21:26,133 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:21:28,317 - root - INFO - step: 38850 loss: 1.9500 memory: 118.84GiB(85.28%) tps: 30,144 tflops: 418.20 mfu: 42.29% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7244 +[titan] 2025-10-05 22:21:28,317 - root - INFO - lr: 5.0936e-06 gnorm: 1.28 [23:47:16< 0:42:14] +[titan] 2025-10-05 22:21:39,188 - root - INFO - step: 38855 loss: 1.8571 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6436 +[titan] 2025-10-05 22:21:39,188 - root - INFO - lr: 5.0928e-06 gnorm: 1.25 [23:47:27< 0:42:03] +[titan] 2025-10-05 22:21:50,046 - root - INFO - step: 38860 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7015 +[titan] 2025-10-05 22:21:50,047 - root - INFO - lr: 5.0920e-06 gnorm: 1.30 [23:47:38< 0:41:52] +[titan] 2025-10-05 22:22:00,909 - root - INFO - step: 38865 loss: 1.9681 memory: 118.84GiB(85.28%) tps: 30,167 tflops: 418.52 mfu: 42.32% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:22:00,909 - root - INFO - lr: 5.0911e-06 gnorm: 1.28 [23:47:49< 0:41:41] +[titan] 2025-10-05 22:22:11,785 - root - INFO - step: 38870 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7082 +[titan] 2025-10-05 22:22:11,786 - root - INFO - lr: 5.0903e-06 gnorm: 1.26 [23:48:00< 0:41:30] +[titan] 2025-10-05 22:22:22,628 - root - INFO - step: 38875 loss: 1.9517 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7259 +[titan] 2025-10-05 22:22:22,628 - root - INFO - lr: 5.0895e-06 gnorm: 1.24 [23:48:10< 0:41:19] +[titan] 2025-10-05 22:22:33,500 - root - INFO - step: 38880 loss: 1.9275 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2237 global_avg_mtp_loss: 1.7038 +[titan] 2025-10-05 22:22:33,500 - root - INFO - lr: 5.0888e-06 gnorm: 1.26 [23:48:21< 0:41:08] +[titan] 2025-10-05 22:22:44,338 - root - INFO - step: 38885 loss: 1.9020 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:22:44,338 - root - INFO - lr: 5.0880e-06 gnorm: 1.28 [23:48:32< 0:40:57] +[titan] 2025-10-05 22:22:55,187 - root - INFO - step: 38890 loss: 1.9042 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6843 +[titan] 2025-10-05 22:22:55,187 - root - INFO - lr: 5.0872e-06 gnorm: 1.24 [23:48:43< 0:40:46] +[titan] 2025-10-05 22:23:06,026 - root - INFO - step: 38895 loss: 1.9172 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6946 +[titan] 2025-10-05 22:23:06,026 - root - INFO - lr: 5.0864e-06 gnorm: 1.26 [23:48:54< 0:40:35] +[titan] 2025-10-05 22:23:14,734 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:23:16,918 - root - INFO - step: 38900 loss: 1.9103 memory: 118.84GiB(85.28%) tps: 30,085 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6888 +[titan] 2025-10-05 22:23:16,918 - root - INFO - lr: 5.0856e-06 gnorm: 1.25 [23:49:05< 0:40:24] +[titan] 2025-10-05 22:23:27,768 - root - INFO - step: 38905 loss: 1.9460 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 418.99 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7210 +[titan] 2025-10-05 22:23:27,768 - root - INFO - lr: 5.0848e-06 gnorm: 1.28 [23:49:16< 0:40:13] +[titan] 2025-10-05 22:23:38,736 - root - INFO - step: 38910 loss: 1.9255 memory: 118.84GiB(85.28%) tps: 29,877 tflops: 414.50 mfu: 41.91% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7027 +[titan] 2025-10-05 22:23:38,736 - root - INFO - lr: 5.0841e-06 gnorm: 1.25 [23:49:26< 0:40:02] +[titan] 2025-10-05 22:23:43,287 - root - INFO - Dumping profiler traces at step 38912 +[titan] 2025-10-05 22:23:43,326 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:23:49,840 - root - INFO - step: 38915 loss: 1.9354 memory: 118.84GiB(85.28%) tps: 29,511 tflops: 409.42 mfu: 41.40% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7109 +[titan] 2025-10-05 22:23:49,840 - root - INFO - lr: 5.0833e-06 gnorm: 1.23 [23:49:38< 0:39:51] +[titan] 2025-10-05 22:24:00,685 - root - INFO - step: 38920 loss: 1.9638 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7362 +[titan] 2025-10-05 22:24:00,685 - root - INFO - lr: 5.0825e-06 gnorm: 1.24 [23:49:48< 0:39:40] +[titan] 2025-10-05 22:24:11,518 - root - INFO - step: 38925 loss: 1.9421 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7175 +[titan] 2025-10-05 22:24:11,518 - root - INFO - lr: 5.0818e-06 gnorm: 1.28 [23:49:59< 0:39:29] +[titan] 2025-10-05 22:24:22,383 - root - INFO - step: 38930 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,159 tflops: 418.41 mfu: 42.31% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:24:22,383 - root - INFO - lr: 5.0810e-06 gnorm: 1.22 [23:50:10< 0:39:18] +[titan] 2025-10-05 22:24:33,286 - root - INFO - step: 38935 loss: 1.9341 memory: 118.84GiB(85.28%) tps: 30,056 tflops: 416.99 mfu: 42.16% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7101 +[titan] 2025-10-05 22:24:33,286 - root - INFO - lr: 5.0803e-06 gnorm: 1.25 [23:50:21< 0:39:07] +[titan] 2025-10-05 22:24:44,145 - root - INFO - step: 38940 loss: 1.8817 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2175 global_avg_mtp_loss: 1.6642 +[titan] 2025-10-05 22:24:44,145 - root - INFO - lr: 5.0795e-06 gnorm: 1.33 [23:50:32< 0:38:56] +[titan] 2025-10-05 22:24:55,011 - root - INFO - step: 38945 loss: 1.8488 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.31% global_avg_ntp_loss: 0.2135 global_avg_mtp_loss: 1.6353 +[titan] 2025-10-05 22:24:55,011 - root - INFO - lr: 5.0788e-06 gnorm: 1.25 [23:50:43< 0:38:45] +[titan] 2025-10-05 22:25:03,688 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:25:05,861 - root - INFO - step: 38950 loss: 1.9434 memory: 118.84GiB(85.28%) tps: 30,200 tflops: 418.98 mfu: 42.36% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7184 +[titan] 2025-10-05 22:25:05,861 - root - INFO - lr: 5.0780e-06 gnorm: 1.26 [23:50:54< 0:38:34] +[titan] 2025-10-05 22:25:16,696 - root - INFO - step: 38955 loss: 1.8763 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2168 global_avg_mtp_loss: 1.6594 +[titan] 2025-10-05 22:25:16,696 - root - INFO - lr: 5.0773e-06 gnorm: 1.25 [23:51:04< 0:38:23] +[titan] 2025-10-05 22:25:27,557 - root - INFO - step: 38960 loss: 1.9096 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6895 +[titan] 2025-10-05 22:25:27,557 - root - INFO - lr: 5.0765e-06 gnorm: 1.25 [23:51:15< 0:38:12] +[titan] 2025-10-05 22:25:38,467 - root - INFO - step: 38965 loss: 1.9831 memory: 118.84GiB(85.28%) tps: 30,035 tflops: 416.69 mfu: 42.13% global_avg_ntp_loss: 0.2299 global_avg_mtp_loss: 1.7531 +[titan] 2025-10-05 22:25:38,467 - root - INFO - lr: 5.0758e-06 gnorm: 1.24 [23:51:26< 0:38:01] +[titan] 2025-10-05 22:25:49,317 - root - INFO - step: 38970 loss: 1.8769 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6602 +[titan] 2025-10-05 22:25:49,317 - root - INFO - lr: 5.0751e-06 gnorm: 1.22 [23:51:37< 0:37:50] +[titan] 2025-10-05 22:26:00,183 - root - INFO - step: 38975 loss: 1.9463 memory: 118.84GiB(85.28%) tps: 30,158 tflops: 418.40 mfu: 42.30% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 22:26:00,183 - root - INFO - lr: 5.0743e-06 gnorm: 1.28 [23:51:48< 0:37:39] +[titan] 2025-10-05 22:26:11,057 - root - INFO - step: 38980 loss: 1.9267 memory: 118.84GiB(85.28%) tps: 30,135 tflops: 418.08 mfu: 42.27% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7030 +[titan] 2025-10-05 22:26:11,057 - root - INFO - lr: 5.0736e-06 gnorm: 1.29 [23:51:59< 0:37:28] +[titan] 2025-10-05 22:26:21,891 - root - INFO - step: 38985 loss: 1.8837 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6661 +[titan] 2025-10-05 22:26:21,891 - root - INFO - lr: 5.0729e-06 gnorm: 1.26 [23:52:10< 0:37:17] +[titan] 2025-10-05 22:26:32,761 - root - INFO - step: 38990 loss: 1.8936 memory: 118.84GiB(85.28%) tps: 30,147 tflops: 418.24 mfu: 42.29% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:26:32,761 - root - INFO - lr: 5.0722e-06 gnorm: 1.25 [23:52:20< 0:37:06] +[titan] 2025-10-05 22:26:43,668 - root - INFO - step: 38995 loss: 1.8343 memory: 118.84GiB(85.28%) tps: 30,044 tflops: 416.82 mfu: 42.15% global_avg_ntp_loss: 0.2124 global_avg_mtp_loss: 1.6219 +[titan] 2025-10-05 22:26:43,668 - root - INFO - lr: 5.0715e-06 gnorm: 1.22 [23:52:31< 0:36:55] +[titan] 2025-10-05 22:26:52,328 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:26:54,511 - root - INFO - step: 39000 loss: 1.8692 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6534 +[titan] 2025-10-05 22:26:54,511 - root - INFO - lr: 5.0708e-06 gnorm: 1.23 [23:52:42< 0:36:44] +[titan] 2025-10-05 22:27:05,357 - root - INFO - step: 39005 loss: 1.8448 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2131 global_avg_mtp_loss: 1.6317 +[titan] 2025-10-05 22:27:05,357 - root - INFO - lr: 5.0701e-06 gnorm: 1.24 [23:52:53< 0:36:33] +[titan] 2025-10-05 22:27:16,214 - root - INFO - step: 39010 loss: 1.9373 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7127 +[titan] 2025-10-05 22:27:16,214 - root - INFO - lr: 5.0694e-06 gnorm: 1.27 [23:53:04< 0:36:22] +[titan] 2025-10-05 22:27:27,027 - root - INFO - step: 39015 loss: 1.8935 memory: 118.84GiB(85.28%) tps: 30,304 tflops: 420.43 mfu: 42.51% global_avg_ntp_loss: 0.2195 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 22:27:27,028 - root - INFO - lr: 5.0687e-06 gnorm: 1.29 [23:53:15< 0:36:11] +[titan] 2025-10-05 22:27:37,873 - root - INFO - step: 39020 loss: 1.9212 memory: 118.84GiB(85.28%) tps: 30,214 tflops: 419.18 mfu: 42.38% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6994 +[titan] 2025-10-05 22:27:37,873 - root - INFO - lr: 5.0680e-06 gnorm: 1.25 [23:53:26< 0:36:00] +[titan] 2025-10-05 22:27:48,725 - root - INFO - step: 39025 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,195 tflops: 418.92 mfu: 42.36% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6660 +[titan] 2025-10-05 22:27:48,725 - root - INFO - lr: 5.0673e-06 gnorm: 1.23 [23:53:36< 0:35:49] +[titan] 2025-10-05 22:27:59,585 - root - INFO - step: 39030 loss: 1.9578 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.64 mfu: 42.33% global_avg_ntp_loss: 0.2258 global_avg_mtp_loss: 1.7320 +[titan] 2025-10-05 22:27:59,585 - root - INFO - lr: 5.0666e-06 gnorm: 1.26 [23:53:47< 0:35:38] +[titan] 2025-10-05 22:28:10,411 - root - INFO - step: 39035 loss: 1.9141 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6933 +[titan] 2025-10-05 22:28:10,411 - root - INFO - lr: 5.0659e-06 gnorm: 1.29 [23:53:58< 0:35:26] +[titan] 2025-10-05 22:28:21,251 - root - INFO - step: 39040 loss: 1.9025 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:28:21,251 - root - INFO - lr: 5.0652e-06 gnorm: 1.26 [23:54:09< 0:35:15] +[titan] 2025-10-05 22:28:32,077 - root - INFO - step: 39045 loss: 1.9016 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6812 +[titan] 2025-10-05 22:28:32,077 - root - INFO - lr: 5.0645e-06 gnorm: 1.24 [23:54:20< 0:35:04] +[titan] 2025-10-05 22:28:40,768 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:28:42,943 - root - INFO - step: 39050 loss: 1.8855 memory: 118.84GiB(85.28%) tps: 30,157 tflops: 418.38 mfu: 42.30% global_avg_ntp_loss: 0.2177 global_avg_mtp_loss: 1.6678 +[titan] 2025-10-05 22:28:42,943 - root - INFO - lr: 5.0639e-06 gnorm: 1.25 [23:54:31< 0:34:53] +[titan] 2025-10-05 22:28:53,779 - root - INFO - step: 39055 loss: 1.9246 memory: 118.84GiB(85.28%) tps: 30,242 tflops: 419.56 mfu: 42.42% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.7024 +[titan] 2025-10-05 22:28:53,779 - root - INFO - lr: 5.0632e-06 gnorm: 1.27 [23:54:42< 0:34:42] +[titan] 2025-10-05 22:29:04,650 - root - INFO - step: 39060 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,143 tflops: 418.19 mfu: 42.28% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.7090 +[titan] 2025-10-05 22:29:04,650 - root - INFO - lr: 5.0625e-06 gnorm: 1.28 [23:54:52< 0:34:31] +[titan] 2025-10-05 22:29:15,481 - root - INFO - step: 39065 loss: 1.8892 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:29:15,481 - root - INFO - lr: 5.0619e-06 gnorm: 1.29 [23:55:03< 0:34:20] +[titan] 2025-10-05 22:29:26,319 - root - INFO - step: 39070 loss: 1.9725 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.46 mfu: 42.41% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7449 +[titan] 2025-10-05 22:29:26,319 - root - INFO - lr: 5.0612e-06 gnorm: 1.27 [23:55:14< 0:34:09] +[titan] 2025-10-05 22:29:37,169 - root - INFO - step: 39075 loss: 1.8711 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:29:37,169 - root - INFO - lr: 5.0606e-06 gnorm: 1.39 [23:55:25< 0:33:58] +[titan] 2025-10-05 22:29:47,983 - root - INFO - step: 39080 loss: 1.9585 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7324 +[titan] 2025-10-05 22:29:47,983 - root - INFO - lr: 5.0599e-06 gnorm: 1.27 [23:55:36< 0:33:47] +[titan] 2025-10-05 22:29:58,811 - root - INFO - step: 39085 loss: 1.9639 memory: 118.84GiB(85.28%) tps: 30,264 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2274 global_avg_mtp_loss: 1.7365 +[titan] 2025-10-05 22:29:58,811 - root - INFO - lr: 5.0593e-06 gnorm: 1.28 [23:55:47< 0:33:36] +[titan] 2025-10-05 22:30:09,630 - root - INFO - step: 39090 loss: 1.8996 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:30:09,630 - root - INFO - lr: 5.0586e-06 gnorm: 1.26 [23:55:57< 0:33:25] +[titan] 2025-10-05 22:30:20,468 - root - INFO - step: 39095 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6810 +[titan] 2025-10-05 22:30:20,469 - root - INFO - lr: 5.0580e-06 gnorm: 1.24 [23:56:08< 0:33:14] +[titan] 2025-10-05 22:30:29,112 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:30:31,293 - root - INFO - step: 39100 loss: 1.9874 memory: 118.84GiB(85.28%) tps: 30,273 tflops: 419.99 mfu: 42.47% global_avg_ntp_loss: 0.2301 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:30:31,293 - root - INFO - lr: 5.0573e-06 gnorm: 1.34 [23:56:19< 0:33:03] +[titan] 2025-10-05 22:30:42,362 - root - INFO - step: 39105 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,605 tflops: 410.73 mfu: 41.53% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6828 +[titan] 2025-10-05 22:30:42,362 - root - INFO - lr: 5.0567e-06 gnorm: 1.26 [23:56:30< 0:32:52] +[titan] 2025-10-05 22:30:53,217 - root - INFO - step: 39110 loss: 1.8609 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2156 global_avg_mtp_loss: 1.6453 +[titan] 2025-10-05 22:30:53,217 - root - INFO - lr: 5.0561e-06 gnorm: 1.23 [23:56:41< 0:32:41] +[titan] 2025-10-05 22:31:04,043 - root - INFO - step: 39115 loss: 1.9154 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6937 +[titan] 2025-10-05 22:31:04,043 - root - INFO - lr: 5.0554e-06 gnorm: 1.23 [23:56:52< 0:32:30] +[titan] 2025-10-05 22:31:14,877 - root - INFO - step: 39120 loss: 1.9243 memory: 118.84GiB(85.28%) tps: 30,246 tflops: 419.62 mfu: 42.43% global_avg_ntp_loss: 0.2230 global_avg_mtp_loss: 1.7013 +[titan] 2025-10-05 22:31:14,877 - root - INFO - lr: 5.0548e-06 gnorm: 1.28 [23:57:03< 0:32:19] +[titan] 2025-10-05 22:31:25,759 - root - INFO - step: 39125 loss: 1.9233 memory: 118.84GiB(85.28%) tps: 30,113 tflops: 417.78 mfu: 42.24% global_avg_ntp_loss: 0.2223 global_avg_mtp_loss: 1.7010 +[titan] 2025-10-05 22:31:25,759 - root - INFO - lr: 5.0542e-06 gnorm: 1.27 [23:57:13< 0:32:08] +[titan] 2025-10-05 22:31:36,579 - root - INFO - step: 39130 loss: 1.8894 memory: 118.84GiB(85.28%) tps: 30,287 tflops: 420.18 mfu: 42.49% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6709 +[titan] 2025-10-05 22:31:36,579 - root - INFO - lr: 5.0536e-06 gnorm: 1.28 [23:57:24< 0:31:57] +[titan] 2025-10-05 22:31:47,420 - root - INFO - step: 39135 loss: 1.9658 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7389 +[titan] 2025-10-05 22:31:47,420 - root - INFO - lr: 5.0530e-06 gnorm: 1.29 [23:57:35< 0:31:46] +[titan] 2025-10-05 22:31:58,260 - root - INFO - step: 39140 loss: 1.9505 memory: 118.84GiB(85.28%) tps: 30,230 tflops: 419.39 mfu: 42.41% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7258 +[titan] 2025-10-05 22:31:58,260 - root - INFO - lr: 5.0523e-06 gnorm: 1.27 [23:57:46< 0:31:35] +[titan] 2025-10-05 22:32:09,071 - root - INFO - step: 39145 loss: 1.8836 memory: 118.84GiB(85.28%) tps: 30,311 tflops: 420.51 mfu: 42.52% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 22:32:09,071 - root - INFO - lr: 5.0517e-06 gnorm: 1.24 [23:57:57< 0:31:24] +[titan] 2025-10-05 22:32:17,709 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:32:19,885 - root - INFO - step: 39150 loss: 1.8924 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.41 mfu: 42.51% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:32:19,885 - root - INFO - lr: 5.0511e-06 gnorm: 1.26 [23:58:08< 0:31:13] +[titan] 2025-10-05 22:32:30,741 - root - INFO - step: 39155 loss: 1.9157 memory: 118.84GiB(85.28%) tps: 30,185 tflops: 418.77 mfu: 42.34% global_avg_ntp_loss: 0.2215 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:32:30,741 - root - INFO - lr: 5.0505e-06 gnorm: 1.26 [23:58:18< 0:31:02] +[titan] 2025-10-05 22:32:41,618 - root - INFO - step: 39160 loss: 1.9074 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6875 +[titan] 2025-10-05 22:32:41,618 - root - INFO - lr: 5.0499e-06 gnorm: 1.27 [23:58:29< 0:30:51] +[titan] 2025-10-05 22:32:52,420 - root - INFO - step: 39165 loss: 1.9269 memory: 118.84GiB(85.28%) tps: 30,335 tflops: 420.86 mfu: 42.55% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7041 +[titan] 2025-10-05 22:32:52,421 - root - INFO - lr: 5.0493e-06 gnorm: 1.31 [23:58:40< 0:30:40] +[titan] 2025-10-05 22:33:03,241 - root - INFO - step: 39170 loss: 1.8973 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6786 +[titan] 2025-10-05 22:33:03,241 - root - INFO - lr: 5.0488e-06 gnorm: 1.28 [23:58:51< 0:30:29] +[titan] 2025-10-05 22:33:14,059 - root - INFO - step: 39175 loss: 1.8930 memory: 118.84GiB(85.28%) tps: 30,291 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6737 +[titan] 2025-10-05 22:33:14,059 - root - INFO - lr: 5.0482e-06 gnorm: 1.25 [23:59:02< 0:30:18] +[titan] 2025-10-05 22:33:24,854 - root - INFO - step: 39180 loss: 1.9316 memory: 118.84GiB(85.28%) tps: 30,356 tflops: 421.14 mfu: 42.58% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7066 +[titan] 2025-10-05 22:33:24,854 - root - INFO - lr: 5.0476e-06 gnorm: 1.25 [23:59:13< 0:30:07] +[titan] 2025-10-05 22:33:35,698 - root - INFO - step: 39185 loss: 1.8822 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6646 +[titan] 2025-10-05 22:33:35,698 - root - INFO - lr: 5.0470e-06 gnorm: 1.24 [23:59:23< 0:29:56] +[titan] 2025-10-05 22:33:46,541 - root - INFO - step: 39190 loss: 1.9287 memory: 118.84GiB(85.28%) tps: 30,221 tflops: 419.27 mfu: 42.39% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7047 +[titan] 2025-10-05 22:33:46,542 - root - INFO - lr: 5.0464e-06 gnorm: 1.26 [23:59:34< 0:29:45] +[titan] 2025-10-05 22:33:57,343 - root - INFO - step: 39195 loss: 1.8734 memory: 118.84GiB(85.28%) tps: 30,337 tflops: 420.88 mfu: 42.56% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6567 +[titan] 2025-10-05 22:33:57,343 - root - INFO - lr: 5.0459e-06 gnorm: 1.26 [23:59:45< 0:29:34] +[titan] 2025-10-05 22:34:05,988 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:34:08,160 - root - INFO - step: 39200 loss: 1.9127 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6909 +[titan] 2025-10-05 22:34:08,161 - root - INFO - lr: 5.0453e-06 gnorm: 1.24 [23:59:56< 0:29:23] +[titan] 2025-10-05 22:34:18,971 - root - INFO - step: 39205 loss: 1.9072 memory: 118.84GiB(85.28%) tps: 30,312 tflops: 420.53 mfu: 42.52% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:34:18,971 - root - INFO - lr: 5.0447e-06 gnorm: 1.27 [1 day, 0:00:07< 0:29:12] +[titan] 2025-10-05 22:34:29,800 - root - INFO - step: 39210 loss: 1.8480 memory: 118.84GiB(85.28%) tps: 30,260 tflops: 419.81 mfu: 42.45% global_avg_ntp_loss: 0.2137 global_avg_mtp_loss: 1.6343 +[titan] 2025-10-05 22:34:29,800 - root - INFO - lr: 5.0442e-06 gnorm: 1.20 [1 day, 0:00:18< 0:29:01] +[titan] 2025-10-05 22:34:40,603 - root - INFO - step: 39215 loss: 1.9787 memory: 118.84GiB(85.28%) tps: 30,334 tflops: 420.84 mfu: 42.55% global_avg_ntp_loss: 0.2296 global_avg_mtp_loss: 1.7491 +[titan] 2025-10-05 22:34:40,603 - root - INFO - lr: 5.0436e-06 gnorm: 1.28 [1 day, 0:00:28< 0:28:50] +[titan] 2025-10-05 22:34:51,467 - root - INFO - step: 39220 loss: 1.9221 memory: 118.84GiB(85.28%) tps: 30,164 tflops: 418.48 mfu: 42.31% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.6993 +[titan] 2025-10-05 22:34:51,467 - root - INFO - lr: 5.0431e-06 gnorm: 1.25 [1 day, 0:00:39< 0:28:39] +[titan] 2025-10-05 22:35:02,300 - root - INFO - step: 39225 loss: 1.9143 memory: 118.84GiB(85.28%) tps: 30,249 tflops: 419.66 mfu: 42.43% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6930 +[titan] 2025-10-05 22:35:02,300 - root - INFO - lr: 5.0425e-06 gnorm: 1.26 [1 day, 0:00:50< 0:28:28] +[titan] 2025-10-05 22:35:13,119 - root - INFO - step: 39230 loss: 1.8713 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.22 mfu: 42.49% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6548 +[titan] 2025-10-05 22:35:13,119 - root - INFO - lr: 5.0420e-06 gnorm: 1.29 [1 day, 0:01:01< 0:28:17] +[titan] 2025-10-05 22:35:23,922 - root - INFO - step: 39235 loss: 1.9204 memory: 118.84GiB(85.28%) tps: 30,333 tflops: 420.83 mfu: 42.55% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.6983 +[titan] 2025-10-05 22:35:23,922 - root - INFO - lr: 5.0414e-06 gnorm: 1.26 [1 day, 0:01:12< 0:28:06] +[titan] 2025-10-05 22:35:34,763 - root - INFO - step: 39240 loss: 1.9663 memory: 118.84GiB(85.28%) tps: 30,226 tflops: 419.34 mfu: 42.40% global_avg_ntp_loss: 0.2275 global_avg_mtp_loss: 1.7388 +[titan] 2025-10-05 22:35:34,763 - root - INFO - lr: 5.0409e-06 gnorm: 1.28 [1 day, 0:01:22< 0:27:55] +[titan] 2025-10-05 22:35:45,607 - root - INFO - step: 39245 loss: 1.9455 memory: 118.84GiB(85.28%) tps: 30,217 tflops: 419.22 mfu: 42.39% global_avg_ntp_loss: 0.2259 global_avg_mtp_loss: 1.7196 +[titan] 2025-10-05 22:35:45,608 - root - INFO - lr: 5.0403e-06 gnorm: 1.28 [1 day, 0:01:33< 0:27:43] +[titan] 2025-10-05 22:35:54,248 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:35:56,420 - root - INFO - step: 39250 loss: 1.9097 memory: 118.84GiB(85.28%) tps: 30,306 tflops: 420.45 mfu: 42.51% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6889 +[titan] 2025-10-05 22:35:56,420 - root - INFO - lr: 5.0398e-06 gnorm: 1.22 [1 day, 0:01:44< 0:27:32] +[titan] 2025-10-05 22:36:07,280 - root - INFO - step: 39255 loss: 1.8902 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6708 +[titan] 2025-10-05 22:36:07,280 - root - INFO - lr: 5.0393e-06 gnorm: 1.24 [1 day, 0:01:55< 0:27:21] +[titan] 2025-10-05 22:36:18,098 - root - INFO - step: 39260 loss: 1.9171 memory: 118.84GiB(85.28%) tps: 30,292 tflops: 420.25 mfu: 42.49% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6949 +[titan] 2025-10-05 22:36:18,098 - root - INFO - lr: 5.0388e-06 gnorm: 1.26 [1 day, 0:02:06< 0:27:10] +[titan] 2025-10-05 22:36:28,912 - root - INFO - step: 39265 loss: 1.8802 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2180 global_avg_mtp_loss: 1.6622 +[titan] 2025-10-05 22:36:28,912 - root - INFO - lr: 5.0382e-06 gnorm: 1.27 [1 day, 0:02:17< 0:26:59] +[titan] 2025-10-05 22:36:39,738 - root - INFO - step: 39270 loss: 1.8621 memory: 118.84GiB(85.28%) tps: 30,268 tflops: 419.92 mfu: 42.46% global_avg_ntp_loss: 0.2158 global_avg_mtp_loss: 1.6463 +[titan] 2025-10-05 22:36:39,739 - root - INFO - lr: 5.0377e-06 gnorm: 1.26 [1 day, 0:02:27< 0:26:48] +[titan] 2025-10-05 22:36:50,600 - root - INFO - step: 39275 loss: 1.9665 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.57 mfu: 42.32% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7392 +[titan] 2025-10-05 22:36:50,600 - root - INFO - lr: 5.0372e-06 gnorm: 1.28 [1 day, 0:02:38< 0:26:37] +[titan] 2025-10-05 22:37:01,420 - root - INFO - step: 39280 loss: 1.8839 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2176 global_avg_mtp_loss: 1.6663 +[titan] 2025-10-05 22:37:01,421 - root - INFO - lr: 5.0367e-06 gnorm: 1.20 [1 day, 0:02:49< 0:26:26] +[titan] 2025-10-05 22:37:12,238 - root - INFO - step: 39285 loss: 1.9318 memory: 118.84GiB(85.28%) tps: 30,293 tflops: 420.27 mfu: 42.49% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7080 +[titan] 2025-10-05 22:37:12,238 - root - INFO - lr: 5.0362e-06 gnorm: 1.27 [1 day, 0:03:00< 0:26:15] +[titan] 2025-10-05 22:37:23,040 - root - INFO - step: 39290 loss: 1.9426 memory: 118.84GiB(85.28%) tps: 30,336 tflops: 420.87 mfu: 42.55% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7174 +[titan] 2025-10-05 22:37:23,040 - root - INFO - lr: 5.0357e-06 gnorm: 1.26 [1 day, 0:03:11< 0:26:04] +[titan] 2025-10-05 22:37:33,840 - root - INFO - step: 39295 loss: 1.9775 memory: 118.84GiB(85.28%) tps: 30,342 tflops: 420.95 mfu: 42.56% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:37:33,840 - root - INFO - lr: 5.0352e-06 gnorm: 1.36 [1 day, 0:03:22< 0:25:53] +[titan] 2025-10-05 22:37:42,487 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:37:44,699 - root - INFO - step: 39300 loss: 1.9123 memory: 118.84GiB(85.28%) tps: 30,176 tflops: 418.65 mfu: 42.33% global_avg_ntp_loss: 0.2219 global_avg_mtp_loss: 1.6904 +[titan] 2025-10-05 22:37:44,699 - root - INFO - lr: 5.0347e-06 gnorm: 1.25 [1 day, 0:03:32< 0:25:42] +[titan] 2025-10-05 22:37:55,513 - root - INFO - step: 39305 loss: 1.9861 memory: 118.84GiB(85.28%) tps: 30,303 tflops: 420.40 mfu: 42.51% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7554 +[titan] 2025-10-05 22:37:55,513 - root - INFO - lr: 5.0342e-06 gnorm: 1.29 [1 day, 0:03:43< 0:25:31] +[titan] 2025-10-05 22:38:06,319 - root - INFO - step: 39310 loss: 1.8070 memory: 118.84GiB(85.28%) tps: 30,325 tflops: 420.71 mfu: 42.54% global_avg_ntp_loss: 0.2093 global_avg_mtp_loss: 1.5976 +[titan] 2025-10-05 22:38:06,319 - root - INFO - lr: 5.0337e-06 gnorm: 1.22 [1 day, 0:03:54< 0:25:20] +[titan] 2025-10-05 22:38:17,140 - root - INFO - step: 39315 loss: 1.9580 memory: 118.84GiB(85.28%) tps: 30,283 tflops: 420.13 mfu: 42.48% global_avg_ntp_loss: 0.2263 global_avg_mtp_loss: 1.7317 +[titan] 2025-10-05 22:38:17,140 - root - INFO - lr: 5.0332e-06 gnorm: 1.28 [1 day, 0:04:05< 0:25:09] +[titan] 2025-10-05 22:38:27,940 - root - INFO - step: 39320 loss: 1.8952 memory: 118.84GiB(85.28%) tps: 30,343 tflops: 420.96 mfu: 42.56% global_avg_ntp_loss: 0.2192 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:38:27,940 - root - INFO - lr: 5.0327e-06 gnorm: 1.24 [1 day, 0:04:16< 0:24:58] +[titan] 2025-10-05 22:38:38,794 - root - INFO - step: 39325 loss: 1.8206 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.83 mfu: 42.35% global_avg_ntp_loss: 0.2106 global_avg_mtp_loss: 1.6101 +[titan] 2025-10-05 22:38:38,795 - root - INFO - lr: 5.0323e-06 gnorm: 1.23 [1 day, 0:04:26< 0:24:47] +[titan] 2025-10-05 22:38:49,643 - root - INFO - step: 39330 loss: 1.9372 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.07 mfu: 42.37% global_avg_ntp_loss: 0.2248 global_avg_mtp_loss: 1.7124 +[titan] 2025-10-05 22:38:49,643 - root - INFO - lr: 5.0318e-06 gnorm: 1.29 [1 day, 0:04:37< 0:24:36] +[titan] 2025-10-05 22:39:00,463 - root - INFO - step: 39335 loss: 1.9117 memory: 118.84GiB(85.28%) tps: 30,285 tflops: 420.15 mfu: 42.48% global_avg_ntp_loss: 0.2226 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:39:00,464 - root - INFO - lr: 5.0313e-06 gnorm: 1.25 [1 day, 0:04:48< 0:24:25] +[titan] 2025-10-05 22:39:11,289 - root - INFO - step: 39340 loss: 1.8200 memory: 118.84GiB(85.28%) tps: 30,270 tflops: 419.94 mfu: 42.46% global_avg_ntp_loss: 0.2105 global_avg_mtp_loss: 1.6095 +[titan] 2025-10-05 22:39:11,289 - root - INFO - lr: 5.0308e-06 gnorm: 1.26 [1 day, 0:04:59< 0:24:14] +[titan] 2025-10-05 22:39:22,114 - root - INFO - step: 39345 loss: 1.9160 memory: 118.84GiB(85.28%) tps: 30,272 tflops: 419.98 mfu: 42.47% global_avg_ntp_loss: 0.2218 global_avg_mtp_loss: 1.6942 +[titan] 2025-10-05 22:39:22,114 - root - INFO - lr: 5.0304e-06 gnorm: 1.27 [1 day, 0:05:10< 0:24:03] +[titan] 2025-10-05 22:39:30,838 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:39:33,015 - root - INFO - step: 39350 loss: 1.9767 memory: 118.84GiB(85.28%) tps: 30,061 tflops: 417.04 mfu: 42.17% global_avg_ntp_loss: 0.2293 global_avg_mtp_loss: 1.7475 +[titan] 2025-10-05 22:39:33,015 - root - INFO - lr: 5.0299e-06 gnorm: 1.24 [1 day, 0:05:21< 0:23:52] +[titan] 2025-10-05 22:39:43,880 - root - INFO - step: 39355 loss: 1.8926 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.42 mfu: 42.31% global_avg_ntp_loss: 0.2199 global_avg_mtp_loss: 1.6728 +[titan] 2025-10-05 22:39:43,880 - root - INFO - lr: 5.0294e-06 gnorm: 1.21 [1 day, 0:05:32< 0:23:41] +[titan] 2025-10-05 22:39:54,779 - root - INFO - step: 39360 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,067 tflops: 417.14 mfu: 42.18% global_avg_ntp_loss: 0.2123 global_avg_mtp_loss: 1.6258 +[titan] 2025-10-05 22:39:54,779 - root - INFO - lr: 5.0290e-06 gnorm: 1.26 [1 day, 0:05:42< 0:23:30] +[titan] 2025-10-05 22:40:05,616 - root - INFO - step: 39365 loss: 1.9411 memory: 118.84GiB(85.28%) tps: 30,237 tflops: 419.49 mfu: 42.42% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7158 +[titan] 2025-10-05 22:40:05,617 - root - INFO - lr: 5.0285e-06 gnorm: 1.22 [1 day, 0:05:53< 0:23:19] +[titan] 2025-10-05 22:40:16,459 - root - INFO - step: 39370 loss: 1.8828 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.28 mfu: 42.39% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:40:16,459 - root - INFO - lr: 5.0281e-06 gnorm: 1.23 [1 day, 0:06:04< 0:23:08] +[titan] 2025-10-05 22:40:27,280 - root - INFO - step: 39375 loss: 1.9073 memory: 118.84GiB(85.28%) tps: 30,284 tflops: 420.14 mfu: 42.48% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6866 +[titan] 2025-10-05 22:40:27,280 - root - INFO - lr: 5.0277e-06 gnorm: 1.28 [1 day, 0:06:15< 0:22:57] +[titan] 2025-10-05 22:40:38,119 - root - INFO - step: 39380 loss: 1.9206 memory: 118.84GiB(85.28%) tps: 30,232 tflops: 419.42 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6984 +[titan] 2025-10-05 22:40:38,119 - root - INFO - lr: 5.0272e-06 gnorm: 1.23 [1 day, 0:06:26< 0:22:46] +[titan] 2025-10-05 22:40:49,118 - root - INFO - step: 39385 loss: 1.9186 memory: 118.84GiB(85.28%) tps: 29,794 tflops: 413.35 mfu: 41.79% global_avg_ntp_loss: 0.2216 global_avg_mtp_loss: 1.6970 +[titan] 2025-10-05 22:40:49,118 - root - INFO - lr: 5.0268e-06 gnorm: 1.25 [1 day, 0:06:37< 0:22:35] +[titan] 2025-10-05 22:40:59,990 - root - INFO - step: 39390 loss: 1.9410 memory: 118.84GiB(85.28%) tps: 30,140 tflops: 418.15 mfu: 42.28% global_avg_ntp_loss: 0.2240 global_avg_mtp_loss: 1.7170 +[titan] 2025-10-05 22:40:59,990 - root - INFO - lr: 5.0263e-06 gnorm: 1.30 [1 day, 0:06:48< 0:22:24] +[titan] 2025-10-05 22:41:10,818 - root - INFO - step: 39395 loss: 1.8672 memory: 118.84GiB(85.28%) tps: 30,263 tflops: 419.86 mfu: 42.45% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6520 +[titan] 2025-10-05 22:41:10,818 - root - INFO - lr: 5.0259e-06 gnorm: 1.23 [1 day, 0:06:59< 0:22:13] +[titan] 2025-10-05 22:41:19,493 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:41:21,663 - root - INFO - step: 39400 loss: 1.9590 memory: 118.84GiB(85.28%) tps: 30,216 tflops: 419.20 mfu: 42.39% global_avg_ntp_loss: 0.2273 global_avg_mtp_loss: 1.7318 +[titan] 2025-10-05 22:41:21,663 - root - INFO - lr: 5.0255e-06 gnorm: 1.24 [1 day, 0:07:09< 0:22:02] +[titan] 2025-10-05 22:41:32,499 - root - INFO - step: 39405 loss: 1.8950 memory: 118.84GiB(85.28%) tps: 30,241 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6761 +[titan] 2025-10-05 22:41:32,500 - root - INFO - lr: 5.0251e-06 gnorm: 1.26 [1 day, 0:07:20< 0:21:51] +[titan] 2025-10-05 22:41:43,338 - root - INFO - step: 39410 loss: 1.9067 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.43 mfu: 42.41% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:41:43,338 - root - INFO - lr: 5.0246e-06 gnorm: 1.25 [1 day, 0:07:31< 0:21:40] +[titan] 2025-10-05 22:41:54,271 - root - INFO - step: 39415 loss: 1.9413 memory: 118.84GiB(85.28%) tps: 29,974 tflops: 415.84 mfu: 42.05% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7167 +[titan] 2025-10-05 22:41:54,271 - root - INFO - lr: 5.0242e-06 gnorm: 1.26 [1 day, 0:07:42< 0:21:29] +[titan] 2025-10-05 22:42:05,121 - root - INFO - step: 39420 loss: 1.8925 memory: 118.84GiB(85.28%) tps: 30,201 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2191 global_avg_mtp_loss: 1.6733 +[titan] 2025-10-05 22:42:05,121 - root - INFO - lr: 5.0238e-06 gnorm: 1.27 [1 day, 0:07:53< 0:21:18] +[titan] 2025-10-05 22:42:14,068 - root - INFO - Dumping profiler traces at step 39424 +[titan] 2025-10-05 22:42:14,106 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 22:42:16,301 - root - INFO - step: 39425 loss: 1.8661 memory: 118.84GiB(85.28%) tps: 29,310 tflops: 406.63 mfu: 41.12% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6501 +[titan] 2025-10-05 22:42:16,301 - root - INFO - lr: 5.0234e-06 gnorm: 1.27 [1 day, 0:08:04< 0:21:07] +[titan] 2025-10-05 22:42:27,154 - root - INFO - step: 39430 loss: 1.8989 memory: 118.84GiB(85.28%) tps: 30,193 tflops: 418.89 mfu: 42.35% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6787 +[titan] 2025-10-05 22:42:27,155 - root - INFO - lr: 5.0230e-06 gnorm: 1.25 [1 day, 0:08:15< 0:20:56] +[titan] 2025-10-05 22:42:37,960 - root - INFO - step: 39435 loss: 1.9054 memory: 118.84GiB(85.28%) tps: 30,327 tflops: 420.74 mfu: 42.54% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6850 +[titan] 2025-10-05 22:42:37,960 - root - INFO - lr: 5.0226e-06 gnorm: 1.27 [1 day, 0:08:26< 0:20:45] +[titan] 2025-10-05 22:42:48,874 - root - INFO - step: 39440 loss: 1.8975 memory: 118.84GiB(85.28%) tps: 30,025 tflops: 416.55 mfu: 42.12% global_avg_ntp_loss: 0.2194 global_avg_mtp_loss: 1.6781 +[titan] 2025-10-05 22:42:48,874 - root - INFO - lr: 5.0222e-06 gnorm: 1.24 [1 day, 0:08:37< 0:20:34] +[titan] 2025-10-05 22:42:59,759 - root - INFO - step: 39445 loss: 1.9790 memory: 118.84GiB(85.28%) tps: 30,104 tflops: 417.64 mfu: 42.23% global_avg_ntp_loss: 0.2287 global_avg_mtp_loss: 1.7502 +[titan] 2025-10-05 22:42:59,759 - root - INFO - lr: 5.0218e-06 gnorm: 1.26 [1 day, 0:08:47< 0:20:23] +[titan] 2025-10-05 22:43:08,434 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:43:10,640 - root - INFO - step: 39450 loss: 1.9579 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.25% global_avg_ntp_loss: 0.2255 global_avg_mtp_loss: 1.7325 +[titan] 2025-10-05 22:43:10,641 - root - INFO - lr: 5.0214e-06 gnorm: 1.28 [1 day, 0:08:58< 0:20:12] +[titan] 2025-10-05 22:43:21,477 - root - INFO - step: 39455 loss: 1.9682 memory: 118.84GiB(85.28%) tps: 30,239 tflops: 419.52 mfu: 42.42% global_avg_ntp_loss: 0.2276 global_avg_mtp_loss: 1.7405 +[titan] 2025-10-05 22:43:21,478 - root - INFO - lr: 5.0210e-06 gnorm: 1.32 [1 day, 0:09:09< 0:20:01] +[titan] 2025-10-05 22:43:32,319 - root - INFO - step: 39460 loss: 1.9474 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.32 mfu: 42.40% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7223 +[titan] 2025-10-05 22:43:32,319 - root - INFO - lr: 5.0206e-06 gnorm: 1.30 [1 day, 0:09:20< 0:19:50] +[titan] 2025-10-05 22:43:43,178 - root - INFO - step: 39465 loss: 1.8880 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2179 global_avg_mtp_loss: 1.6701 +[titan] 2025-10-05 22:43:43,178 - root - INFO - lr: 5.0203e-06 gnorm: 1.28 [1 day, 0:09:31< 0:19:39] +[titan] 2025-10-05 22:43:54,102 - root - INFO - step: 39470 loss: 1.8901 memory: 118.84GiB(85.28%) tps: 29,997 tflops: 416.17 mfu: 42.08% global_avg_ntp_loss: 0.2187 global_avg_mtp_loss: 1.6714 +[titan] 2025-10-05 22:43:54,102 - root - INFO - lr: 5.0199e-06 gnorm: 1.25 [1 day, 0:09:42< 0:19:27] +[titan] 2025-10-05 22:44:04,938 - root - INFO - step: 39475 loss: 1.8656 memory: 118.84GiB(85.28%) tps: 30,243 tflops: 419.57 mfu: 42.42% global_avg_ntp_loss: 0.2159 global_avg_mtp_loss: 1.6497 +[titan] 2025-10-05 22:44:04,938 - root - INFO - lr: 5.0195e-06 gnorm: 1.23 [1 day, 0:09:53< 0:19:16] +[titan] 2025-10-05 22:44:15,757 - root - INFO - step: 39480 loss: 1.9465 memory: 118.84GiB(85.28%) tps: 30,288 tflops: 420.20 mfu: 42.49% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7209 +[titan] 2025-10-05 22:44:15,757 - root - INFO - lr: 5.0191e-06 gnorm: 1.27 [1 day, 0:10:03< 0:19:05] +[titan] 2025-10-05 22:44:26,640 - root - INFO - step: 39485 loss: 1.8523 memory: 118.84GiB(85.28%) tps: 30,111 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2148 global_avg_mtp_loss: 1.6375 +[titan] 2025-10-05 22:44:26,640 - root - INFO - lr: 5.0188e-06 gnorm: 1.23 [1 day, 0:10:14< 0:18:54] +[titan] 2025-10-05 22:44:37,455 - root - INFO - step: 39490 loss: 1.9200 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6979 +[titan] 2025-10-05 22:44:37,455 - root - INFO - lr: 5.0184e-06 gnorm: 1.26 [1 day, 0:10:25< 0:18:43] +[titan] 2025-10-05 22:44:48,278 - root - INFO - step: 39495 loss: 1.8271 memory: 118.84GiB(85.28%) tps: 30,277 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2112 global_avg_mtp_loss: 1.6159 +[titan] 2025-10-05 22:44:48,278 - root - INFO - lr: 5.0181e-06 gnorm: 1.24 [1 day, 0:10:36< 0:18:32] +[titan] 2025-10-05 22:44:57,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:44:59,302 - root - INFO - step: 39500 loss: 1.9031 memory: 118.84GiB(85.28%) tps: 29,726 tflops: 412.41 mfu: 41.70% global_avg_ntp_loss: 0.2206 global_avg_mtp_loss: 1.6825 +[titan] 2025-10-05 22:44:59,302 - root - INFO - lr: 5.0177e-06 gnorm: 1.26 [1 day, 0:10:47< 0:18:21] +[titan] 2025-10-05 22:45:10,138 - root - INFO - step: 39505 loss: 1.9095 memory: 118.84GiB(85.28%) tps: 30,240 tflops: 419.54 mfu: 42.42% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6882 +[titan] 2025-10-05 22:45:10,138 - root - INFO - lr: 5.0173e-06 gnorm: 1.25 [1 day, 0:10:58< 0:18:10] +[titan] 2025-10-05 22:45:20,973 - root - INFO - step: 39510 loss: 1.9394 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2241 global_avg_mtp_loss: 1.7153 +[titan] 2025-10-05 22:45:20,973 - root - INFO - lr: 5.0170e-06 gnorm: 1.26 [1 day, 0:11:09< 0:17:59] +[titan] 2025-10-05 22:45:31,857 - root - INFO - step: 39515 loss: 1.8345 memory: 118.84GiB(85.28%) tps: 30,109 tflops: 417.72 mfu: 42.24% global_avg_ntp_loss: 0.2130 global_avg_mtp_loss: 1.6215 +[titan] 2025-10-05 22:45:31,857 - root - INFO - lr: 5.0167e-06 gnorm: 1.32 [1 day, 0:11:20< 0:17:48] +[titan] 2025-10-05 22:45:42,715 - root - INFO - step: 39520 loss: 1.9554 memory: 118.84GiB(85.28%) tps: 30,179 tflops: 418.69 mfu: 42.33% global_avg_ntp_loss: 0.2266 global_avg_mtp_loss: 1.7287 +[titan] 2025-10-05 22:45:42,715 - root - INFO - lr: 5.0163e-06 gnorm: 1.30 [1 day, 0:11:30< 0:17:37] +[titan] 2025-10-05 22:45:53,618 - root - INFO - step: 39525 loss: 1.8642 memory: 118.84GiB(85.28%) tps: 30,055 tflops: 416.96 mfu: 42.16% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6485 +[titan] 2025-10-05 22:45:53,618 - root - INFO - lr: 5.0160e-06 gnorm: 1.23 [1 day, 0:11:41< 0:17:26] +[titan] 2025-10-05 22:46:04,478 - root - INFO - step: 39530 loss: 1.9030 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:46:04,478 - root - INFO - lr: 5.0156e-06 gnorm: 1.28 [1 day, 0:11:52< 0:17:15] +[titan] 2025-10-05 22:46:15,353 - root - INFO - step: 39535 loss: 1.8455 memory: 118.84GiB(85.28%) tps: 30,133 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2143 global_avg_mtp_loss: 1.6312 +[titan] 2025-10-05 22:46:15,353 - root - INFO - lr: 5.0153e-06 gnorm: 1.27 [1 day, 0:12:03< 0:17:04] +[titan] 2025-10-05 22:46:26,197 - root - INFO - step: 39540 loss: 1.8853 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2181 global_avg_mtp_loss: 1.6672 +[titan] 2025-10-05 22:46:26,197 - root - INFO - lr: 5.0150e-06 gnorm: 1.25 [1 day, 0:12:14< 0:16:53] +[titan] 2025-10-05 22:46:37,052 - root - INFO - step: 39545 loss: 1.9449 memory: 118.84GiB(85.28%) tps: 30,189 tflops: 418.82 mfu: 42.35% global_avg_ntp_loss: 0.2250 global_avg_mtp_loss: 1.7199 +[titan] 2025-10-05 22:46:37,052 - root - INFO - lr: 5.0147e-06 gnorm: 1.30 [1 day, 0:12:25< 0:16:42] +[titan] 2025-10-05 22:46:45,774 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:46:47,965 - root - INFO - step: 39550 loss: 1.9761 memory: 118.84GiB(85.28%) tps: 30,027 tflops: 416.57 mfu: 42.12% global_avg_ntp_loss: 0.2295 global_avg_mtp_loss: 1.7466 +[titan] 2025-10-05 22:46:47,966 - root - INFO - lr: 5.0143e-06 gnorm: 1.34 [1 day, 0:12:36< 0:16:31] +[titan] 2025-10-05 22:46:58,895 - root - INFO - step: 39555 loss: 1.9038 memory: 118.84GiB(85.28%) tps: 29,981 tflops: 415.94 mfu: 42.06% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6838 +[titan] 2025-10-05 22:46:58,895 - root - INFO - lr: 5.0140e-06 gnorm: 1.24 [1 day, 0:12:47< 0:16:20] +[titan] 2025-10-05 22:47:09,746 - root - INFO - step: 39560 loss: 1.9366 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7128 +[titan] 2025-10-05 22:47:09,746 - root - INFO - lr: 5.0137e-06 gnorm: 1.22 [1 day, 0:12:57< 0:16:09] +[titan] 2025-10-05 22:47:20,590 - root - INFO - step: 39565 loss: 1.9274 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7042 +[titan] 2025-10-05 22:47:20,590 - root - INFO - lr: 5.0134e-06 gnorm: 1.26 [1 day, 0:13:08< 0:15:58] +[titan] 2025-10-05 22:47:31,450 - root - INFO - step: 39570 loss: 1.8471 memory: 118.84GiB(85.28%) tps: 30,175 tflops: 418.63 mfu: 42.33% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6330 +[titan] 2025-10-05 22:47:31,450 - root - INFO - lr: 5.0131e-06 gnorm: 1.24 [1 day, 0:13:19< 0:15:47] +[titan] 2025-10-05 22:47:42,294 - root - INFO - step: 39575 loss: 1.9070 memory: 118.84GiB(85.28%) tps: 30,218 tflops: 419.23 mfu: 42.39% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:47:42,294 - root - INFO - lr: 5.0128e-06 gnorm: 1.26 [1 day, 0:13:30< 0:15:36] +[titan] 2025-10-05 22:47:53,185 - root - INFO - step: 39580 loss: 1.9435 memory: 118.84GiB(85.28%) tps: 30,087 tflops: 417.42 mfu: 42.21% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7190 +[titan] 2025-10-05 22:47:53,186 - root - INFO - lr: 5.0125e-06 gnorm: 1.24 [1 day, 0:13:41< 0:15:25] +[titan] 2025-10-05 22:48:04,125 - root - INFO - step: 39585 loss: 1.8977 memory: 118.84GiB(85.28%) tps: 29,955 tflops: 415.58 mfu: 42.02% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6775 +[titan] 2025-10-05 22:48:04,125 - root - INFO - lr: 5.0122e-06 gnorm: 1.23 [1 day, 0:13:52< 0:15:14] +[titan] 2025-10-05 22:48:14,970 - root - INFO - step: 39590 loss: 1.9739 memory: 118.84GiB(85.28%) tps: 30,215 tflops: 419.19 mfu: 42.39% global_avg_ntp_loss: 0.2289 global_avg_mtp_loss: 1.7450 +[titan] 2025-10-05 22:48:14,970 - root - INFO - lr: 5.0119e-06 gnorm: 1.27 [1 day, 0:14:03< 0:15:03] +[titan] 2025-10-05 22:48:25,818 - root - INFO - step: 39595 loss: 1.9534 memory: 118.84GiB(85.28%) tps: 30,207 tflops: 419.08 mfu: 42.37% global_avg_ntp_loss: 0.2269 global_avg_mtp_loss: 1.7265 +[titan] 2025-10-05 22:48:25,818 - root - INFO - lr: 5.0116e-06 gnorm: 1.25 [1 day, 0:14:13< 0:14:52] +[titan] 2025-10-05 22:48:34,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:48:36,655 - root - INFO - step: 39600 loss: 1.9335 memory: 118.84GiB(85.28%) tps: 30,238 tflops: 419.51 mfu: 42.42% global_avg_ntp_loss: 0.2228 global_avg_mtp_loss: 1.7107 +[titan] 2025-10-05 22:48:36,655 - root - INFO - lr: 5.0113e-06 gnorm: 1.26 [1 day, 0:14:24< 0:14:41] +[titan] 2025-10-05 22:48:47,505 - root - INFO - step: 39605 loss: 1.9012 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.01 mfu: 42.37% global_avg_ntp_loss: 0.2204 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:48:47,505 - root - INFO - lr: 5.0110e-06 gnorm: 1.25 [1 day, 0:14:35< 0:14:30] +[titan] 2025-10-05 22:48:58,396 - root - INFO - step: 39610 loss: 1.9266 memory: 118.84GiB(85.28%) tps: 30,089 tflops: 417.45 mfu: 42.21% global_avg_ntp_loss: 0.2221 global_avg_mtp_loss: 1.7045 +[titan] 2025-10-05 22:48:58,396 - root - INFO - lr: 5.0108e-06 gnorm: 1.27 [1 day, 0:14:46< 0:14:19] +[titan] 2025-10-05 22:49:09,249 - root - INFO - step: 39615 loss: 1.9542 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.87 mfu: 42.35% global_avg_ntp_loss: 0.2261 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:49:09,249 - root - INFO - lr: 5.0105e-06 gnorm: 1.28 [1 day, 0:14:57< 0:14:08] +[titan] 2025-10-05 22:49:20,110 - root - INFO - step: 39620 loss: 1.9060 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2205 global_avg_mtp_loss: 1.6855 +[titan] 2025-10-05 22:49:20,111 - root - INFO - lr: 5.0102e-06 gnorm: 1.26 [1 day, 0:15:08< 0:13:57] +[titan] 2025-10-05 22:49:30,958 - root - INFO - step: 39625 loss: 1.9163 memory: 118.84GiB(85.28%) tps: 30,209 tflops: 419.10 mfu: 42.38% global_avg_ntp_loss: 0.2212 global_avg_mtp_loss: 1.6951 +[titan] 2025-10-05 22:49:30,958 - root - INFO - lr: 5.0100e-06 gnorm: 1.28 [1 day, 0:15:19< 0:13:46] +[titan] 2025-10-05 22:49:41,804 - root - INFO - step: 39630 loss: 1.8829 memory: 118.84GiB(85.28%) tps: 30,212 tflops: 419.15 mfu: 42.38% global_avg_ntp_loss: 0.2186 global_avg_mtp_loss: 1.6643 +[titan] 2025-10-05 22:49:41,804 - root - INFO - lr: 5.0097e-06 gnorm: 1.26 [1 day, 0:15:29< 0:13:35] +[titan] 2025-10-05 22:49:52,655 - root - INFO - step: 39635 loss: 1.8627 memory: 118.84GiB(85.28%) tps: 30,199 tflops: 418.97 mfu: 42.36% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6476 +[titan] 2025-10-05 22:49:52,655 - root - INFO - lr: 5.0094e-06 gnorm: 1.24 [1 day, 0:15:40< 0:13:24] +[titan] 2025-10-05 22:50:03,525 - root - INFO - step: 39640 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,146 tflops: 418.23 mfu: 42.29% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6863 +[titan] 2025-10-05 22:50:03,526 - root - INFO - lr: 5.0092e-06 gnorm: 1.26 [1 day, 0:15:51< 0:13:13] +[titan] 2025-10-05 22:50:14,446 - root - INFO - step: 39645 loss: 1.8260 memory: 118.84GiB(85.28%) tps: 30,006 tflops: 416.29 mfu: 42.09% global_avg_ntp_loss: 0.2114 global_avg_mtp_loss: 1.6146 +[titan] 2025-10-05 22:50:14,446 - root - INFO - lr: 5.0089e-06 gnorm: 1.27 [1 day, 0:16:02< 0:13:02] +[titan] 2025-10-05 22:50:23,125 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:50:25,308 - root - INFO - step: 39650 loss: 1.9271 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.54 mfu: 42.32% global_avg_ntp_loss: 0.2232 global_avg_mtp_loss: 1.7039 +[titan] 2025-10-05 22:50:25,308 - root - INFO - lr: 5.0087e-06 gnorm: 1.28 [1 day, 0:16:13< 0:12:51] +[titan] 2025-10-05 22:50:36,173 - root - INFO - step: 39655 loss: 1.9546 memory: 118.84GiB(85.28%) tps: 30,160 tflops: 418.43 mfu: 42.31% global_avg_ntp_loss: 0.2264 global_avg_mtp_loss: 1.7281 +[titan] 2025-10-05 22:50:36,173 - root - INFO - lr: 5.0084e-06 gnorm: 1.26 [1 day, 0:16:24< 0:12:40] +[titan] 2025-10-05 22:50:47,037 - root - INFO - step: 39660 loss: 1.9006 memory: 118.84GiB(85.28%) tps: 30,163 tflops: 418.47 mfu: 42.31% global_avg_ntp_loss: 0.2198 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:50:47,037 - root - INFO - lr: 5.0082e-06 gnorm: 1.27 [1 day, 0:16:35< 0:12:29] +[titan] 2025-10-05 22:50:57,913 - root - INFO - step: 39665 loss: 1.9442 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2236 global_avg_mtp_loss: 1.7206 +[titan] 2025-10-05 22:50:57,913 - root - INFO - lr: 5.0079e-06 gnorm: 1.29 [1 day, 0:16:46< 0:12:18] +[titan] 2025-10-05 22:51:08,776 - root - INFO - step: 39670 loss: 1.8655 memory: 118.84GiB(85.28%) tps: 30,166 tflops: 418.51 mfu: 42.32% global_avg_ntp_loss: 0.2157 global_avg_mtp_loss: 1.6499 +[titan] 2025-10-05 22:51:08,776 - root - INFO - lr: 5.0077e-06 gnorm: 1.30 [1 day, 0:16:56< 0:12:07] +[titan] 2025-10-05 22:51:19,670 - root - INFO - step: 39675 loss: 1.8910 memory: 118.84GiB(85.28%) tps: 30,081 tflops: 417.33 mfu: 42.20% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6721 +[titan] 2025-10-05 22:51:19,670 - root - INFO - lr: 5.0075e-06 gnorm: 1.25 [1 day, 0:17:07< 0:11:56] +[titan] 2025-10-05 22:51:30,542 - root - INFO - step: 39680 loss: 1.9458 memory: 118.84GiB(85.28%) tps: 30,141 tflops: 418.16 mfu: 42.28% global_avg_ntp_loss: 0.2253 global_avg_mtp_loss: 1.7205 +[titan] 2025-10-05 22:51:30,542 - root - INFO - lr: 5.0072e-06 gnorm: 1.27 [1 day, 0:17:18< 0:11:45] +[titan] 2025-10-05 22:51:41,391 - root - INFO - step: 39685 loss: 1.9309 memory: 118.84GiB(85.28%) tps: 30,206 tflops: 419.06 mfu: 42.37% global_avg_ntp_loss: 0.2231 global_avg_mtp_loss: 1.7078 +[titan] 2025-10-05 22:51:41,391 - root - INFO - lr: 5.0070e-06 gnorm: 1.27 [1 day, 0:17:29< 0:11:34] +[titan] 2025-10-05 22:51:52,276 - root - INFO - step: 39690 loss: 1.9374 memory: 118.84GiB(85.28%) tps: 30,105 tflops: 417.66 mfu: 42.23% global_avg_ntp_loss: 0.2256 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:51:52,276 - root - INFO - lr: 5.0068e-06 gnorm: 1.25 [1 day, 0:17:40< 0:11:23] +[titan] 2025-10-05 22:52:03,144 - root - INFO - step: 39695 loss: 1.9239 memory: 118.84GiB(85.28%) tps: 30,151 tflops: 418.30 mfu: 42.30% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7012 +[titan] 2025-10-05 22:52:03,144 - root - INFO - lr: 5.0066e-06 gnorm: 1.27 [1 day, 0:17:51< 0:11:12] +[titan] 2025-10-05 22:52:11,813 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:52:13,994 - root - INFO - step: 39700 loss: 1.9249 memory: 118.84GiB(85.28%) tps: 30,202 tflops: 419.00 mfu: 42.37% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.7021 +[titan] 2025-10-05 22:52:13,994 - root - INFO - lr: 5.0064e-06 gnorm: 1.23 [1 day, 0:18:02< 0:11:01] +[titan] 2025-10-05 22:52:24,851 - root - INFO - step: 39705 loss: 1.9085 memory: 118.84GiB(85.28%) tps: 30,182 tflops: 418.73 mfu: 42.34% global_avg_ntp_loss: 0.2210 global_avg_mtp_loss: 1.6876 +[titan] 2025-10-05 22:52:24,852 - root - INFO - lr: 5.0062e-06 gnorm: 1.27 [1 day, 0:18:13< 0:10:50] +[titan] 2025-10-05 22:52:35,763 - root - INFO - step: 39710 loss: 1.8912 memory: 118.84GiB(85.28%) tps: 30,032 tflops: 416.65 mfu: 42.13% global_avg_ntp_loss: 0.2185 global_avg_mtp_loss: 1.6727 +[titan] 2025-10-05 22:52:35,763 - root - INFO - lr: 5.0060e-06 gnorm: 1.30 [1 day, 0:18:23< 0:10:39] +[titan] 2025-10-05 22:52:46,625 - root - INFO - step: 39715 loss: 1.8269 memory: 118.84GiB(85.28%) tps: 30,168 tflops: 418.53 mfu: 42.32% global_avg_ntp_loss: 0.2119 global_avg_mtp_loss: 1.6151 +[titan] 2025-10-05 22:52:46,625 - root - INFO - lr: 5.0058e-06 gnorm: 1.25 [1 day, 0:18:34< 0:10:28] +[titan] 2025-10-05 22:52:57,517 - root - INFO - step: 39720 loss: 1.9037 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.40 mfu: 42.20% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6829 +[titan] 2025-10-05 22:52:57,517 - root - INFO - lr: 5.0056e-06 gnorm: 1.26 [1 day, 0:18:45< 0:10:16] +[titan] 2025-10-05 22:53:08,394 - root - INFO - step: 39725 loss: 1.9178 memory: 118.84GiB(85.28%) tps: 30,126 tflops: 417.95 mfu: 42.26% global_avg_ntp_loss: 0.2220 global_avg_mtp_loss: 1.6958 +[titan] 2025-10-05 22:53:08,395 - root - INFO - lr: 5.0054e-06 gnorm: 1.27 [1 day, 0:18:56< 0:10:05] +[titan] 2025-10-05 22:53:19,248 - root - INFO - step: 39730 loss: 1.8733 memory: 118.84GiB(85.28%) tps: 30,192 tflops: 418.86 mfu: 42.35% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6572 +[titan] 2025-10-05 22:53:19,248 - root - INFO - lr: 5.0052e-06 gnorm: 1.26 [1 day, 0:19:07< 0:09:54] +[titan] 2025-10-05 22:53:30,094 - root - INFO - step: 39735 loss: 1.8701 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2161 global_avg_mtp_loss: 1.6540 +[titan] 2025-10-05 22:53:30,094 - root - INFO - lr: 5.0050e-06 gnorm: 1.25 [1 day, 0:19:18< 0:09:43] +[titan] 2025-10-05 22:53:40,977 - root - INFO - step: 39740 loss: 1.9359 memory: 118.84GiB(85.28%) tps: 30,110 tflops: 417.74 mfu: 42.24% global_avg_ntp_loss: 0.2246 global_avg_mtp_loss: 1.7113 +[titan] 2025-10-05 22:53:40,977 - root - INFO - lr: 5.0048e-06 gnorm: 1.28 [1 day, 0:19:29< 0:09:32] +[titan] 2025-10-05 22:53:51,816 - root - INFO - step: 39745 loss: 1.9013 memory: 118.84GiB(85.28%) tps: 30,233 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2189 global_avg_mtp_loss: 1.6823 +[titan] 2025-10-05 22:53:51,816 - root - INFO - lr: 5.0046e-06 gnorm: 1.30 [1 day, 0:19:39< 0:09:21] +[titan] 2025-10-05 22:54:00,508 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:54:02,694 - root - INFO - step: 39750 loss: 1.9404 memory: 118.84GiB(85.28%) tps: 30,124 tflops: 417.93 mfu: 42.26% global_avg_ntp_loss: 0.2247 global_avg_mtp_loss: 1.7157 +[titan] 2025-10-05 22:54:02,694 - root - INFO - lr: 5.0044e-06 gnorm: 1.25 [1 day, 0:19:50< 0:09:10] +[titan] 2025-10-05 22:54:13,563 - root - INFO - step: 39755 loss: 1.9679 memory: 118.84GiB(85.28%) tps: 30,149 tflops: 418.27 mfu: 42.29% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7402 +[titan] 2025-10-05 22:54:13,563 - root - INFO - lr: 5.0042e-06 gnorm: 1.31 [1 day, 0:20:01< 0:08:59] +[titan] 2025-10-05 22:54:24,438 - root - INFO - step: 39760 loss: 1.8623 memory: 118.84GiB(85.28%) tps: 30,132 tflops: 418.04 mfu: 42.27% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6459 +[titan] 2025-10-05 22:54:24,438 - root - INFO - lr: 5.0041e-06 gnorm: 1.23 [1 day, 0:20:12< 0:08:48] +[titan] 2025-10-05 22:54:35,297 - root - INFO - step: 39765 loss: 1.9444 memory: 118.84GiB(85.28%) tps: 30,177 tflops: 418.66 mfu: 42.33% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7193 +[titan] 2025-10-05 22:54:35,297 - root - INFO - lr: 5.0039e-06 gnorm: 1.29 [1 day, 0:20:23< 0:08:37] +[titan] 2025-10-05 22:54:46,209 - root - INFO - step: 39770 loss: 1.8709 memory: 118.84GiB(85.28%) tps: 30,030 tflops: 416.62 mfu: 42.13% global_avg_ntp_loss: 0.2170 global_avg_mtp_loss: 1.6539 +[titan] 2025-10-05 22:54:46,209 - root - INFO - lr: 5.0037e-06 gnorm: 1.26 [1 day, 0:20:34< 0:08:26] +[titan] 2025-10-05 22:54:57,101 - root - INFO - step: 39775 loss: 1.8990 memory: 118.84GiB(85.28%) tps: 30,086 tflops: 417.39 mfu: 42.20% global_avg_ntp_loss: 0.2201 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 22:54:57,101 - root - INFO - lr: 5.0036e-06 gnorm: 1.27 [1 day, 0:20:45< 0:08:15] +[titan] 2025-10-05 22:55:08,022 - root - INFO - step: 39780 loss: 1.9966 memory: 118.84GiB(85.28%) tps: 30,005 tflops: 416.28 mfu: 42.09% global_avg_ntp_loss: 0.2307 global_avg_mtp_loss: 1.7659 +[titan] 2025-10-05 22:55:08,023 - root - INFO - lr: 5.0034e-06 gnorm: 1.31 [1 day, 0:20:56< 0:08:04] +[titan] 2025-10-05 22:55:18,865 - root - INFO - step: 39785 loss: 1.9786 memory: 118.84GiB(85.28%) tps: 30,224 tflops: 419.31 mfu: 42.40% global_avg_ntp_loss: 0.2298 global_avg_mtp_loss: 1.7488 +[titan] 2025-10-05 22:55:18,865 - root - INFO - lr: 5.0033e-06 gnorm: 1.29 [1 day, 0:21:07< 0:07:53] +[titan] 2025-10-05 22:55:29,721 - root - INFO - step: 39790 loss: 1.9108 memory: 118.84GiB(85.28%) tps: 30,184 tflops: 418.76 mfu: 42.34% global_avg_ntp_loss: 0.2217 global_avg_mtp_loss: 1.6891 +[titan] 2025-10-05 22:55:29,721 - root - INFO - lr: 5.0031e-06 gnorm: 1.28 [1 day, 0:21:17< 0:07:42] +[titan] 2025-10-05 22:55:40,588 - root - INFO - step: 39795 loss: 1.8858 memory: 118.84GiB(85.28%) tps: 30,156 tflops: 418.37 mfu: 42.30% global_avg_ntp_loss: 0.2182 global_avg_mtp_loss: 1.6676 +[titan] 2025-10-05 22:55:40,588 - root - INFO - lr: 5.0030e-06 gnorm: 1.25 [1 day, 0:21:28< 0:07:31] +[titan] 2025-10-05 22:55:49,264 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:55:51,443 - root - INFO - step: 39800 loss: 1.9011 memory: 118.84GiB(85.28%) tps: 30,186 tflops: 418.79 mfu: 42.34% global_avg_ntp_loss: 0.2203 global_avg_mtp_loss: 1.6808 +[titan] 2025-10-05 22:55:51,443 - root - INFO - lr: 5.0028e-06 gnorm: 1.25 [1 day, 0:21:39< 0:07:20] +[titan] 2025-10-05 22:56:02,382 - root - INFO - step: 39805 loss: 1.8782 memory: 118.84GiB(85.28%) tps: 29,958 tflops: 415.62 mfu: 42.02% global_avg_ntp_loss: 0.2172 global_avg_mtp_loss: 1.6609 +[titan] 2025-10-05 22:56:02,382 - root - INFO - lr: 5.0027e-06 gnorm: 1.21 [1 day, 0:21:50< 0:07:09] +[titan] 2025-10-05 22:56:13,242 - root - INFO - step: 39810 loss: 1.9086 memory: 118.84GiB(85.28%) tps: 30,174 tflops: 418.62 mfu: 42.33% global_avg_ntp_loss: 0.2213 global_avg_mtp_loss: 1.6873 +[titan] 2025-10-05 22:56:13,242 - root - INFO - lr: 5.0026e-06 gnorm: 1.28 [1 day, 0:22:01< 0:06:58] +[titan] 2025-10-05 22:56:24,068 - root - INFO - step: 39815 loss: 1.9135 memory: 118.84GiB(85.28%) tps: 30,269 tflops: 419.93 mfu: 42.46% global_avg_ntp_loss: 0.2209 global_avg_mtp_loss: 1.6926 +[titan] 2025-10-05 22:56:24,068 - root - INFO - lr: 5.0024e-06 gnorm: 1.28 [1 day, 0:22:12< 0:06:47] +[titan] 2025-10-05 22:56:34,883 - root - INFO - step: 39820 loss: 1.8589 memory: 118.84GiB(85.28%) tps: 30,299 tflops: 420.35 mfu: 42.50% global_avg_ntp_loss: 0.2141 global_avg_mtp_loss: 1.6448 +[titan] 2025-10-05 22:56:34,883 - root - INFO - lr: 5.0023e-06 gnorm: 1.23 [1 day, 0:22:23< 0:06:36] +[titan] 2025-10-05 22:56:45,691 - root - INFO - step: 39825 loss: 1.8381 memory: 118.84GiB(85.28%) tps: 30,320 tflops: 420.64 mfu: 42.53% global_avg_ntp_loss: 0.2127 global_avg_mtp_loss: 1.6254 +[titan] 2025-10-05 22:56:45,691 - root - INFO - lr: 5.0022e-06 gnorm: 1.24 [1 day, 0:22:33< 0:06:25] +[titan] 2025-10-05 22:56:56,537 - root - INFO - step: 39830 loss: 1.9881 memory: 118.84GiB(85.28%) tps: 30,213 tflops: 419.16 mfu: 42.38% global_avg_ntp_loss: 0.2308 global_avg_mtp_loss: 1.7573 +[titan] 2025-10-05 22:56:56,537 - root - INFO - lr: 5.0020e-06 gnorm: 1.29 [1 day, 0:22:44< 0:06:14] +[titan] 2025-10-05 22:57:07,418 - root - INFO - step: 39835 loss: 1.8289 memory: 118.84GiB(85.28%) tps: 30,115 tflops: 417.80 mfu: 42.24% global_avg_ntp_loss: 0.2110 global_avg_mtp_loss: 1.6179 +[titan] 2025-10-05 22:57:07,419 - root - INFO - lr: 5.0019e-06 gnorm: 1.25 [1 day, 0:22:55< 0:06:03] +[titan] 2025-10-05 22:57:18,260 - root - INFO - step: 39840 loss: 1.8909 memory: 118.84GiB(85.28%) tps: 30,225 tflops: 419.33 mfu: 42.40% global_avg_ntp_loss: 0.2193 global_avg_mtp_loss: 1.6716 +[titan] 2025-10-05 22:57:18,260 - root - INFO - lr: 5.0018e-06 gnorm: 1.28 [1 day, 0:23:06< 0:05:52] +[titan] 2025-10-05 22:57:29,092 - root - INFO - step: 39845 loss: 1.9356 memory: 118.84GiB(85.28%) tps: 30,252 tflops: 419.71 mfu: 42.44% global_avg_ntp_loss: 0.2238 global_avg_mtp_loss: 1.7118 +[titan] 2025-10-05 22:57:29,092 - root - INFO - lr: 5.0017e-06 gnorm: 1.31 [1 day, 0:23:17< 0:05:41] +[titan] 2025-10-05 22:57:37,722 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:57:39,898 - root - INFO - step: 39850 loss: 1.8816 memory: 118.84GiB(85.28%) tps: 30,324 tflops: 420.69 mfu: 42.54% global_avg_ntp_loss: 0.2162 global_avg_mtp_loss: 1.6654 +[titan] 2025-10-05 22:57:39,899 - root - INFO - lr: 5.0016e-06 gnorm: 1.24 [1 day, 0:23:28< 0:05:30] +[titan] 2025-10-05 22:57:50,741 - root - INFO - step: 39855 loss: 2.0200 memory: 118.84GiB(85.28%) tps: 30,222 tflops: 419.29 mfu: 42.39% global_avg_ntp_loss: 0.2356 global_avg_mtp_loss: 1.7844 +[titan] 2025-10-05 22:57:50,741 - root - INFO - lr: 5.0015e-06 gnorm: 1.32 [1 day, 0:23:38< 0:05:19] +[titan] 2025-10-05 22:58:01,598 - root - INFO - step: 39860 loss: 1.9668 memory: 118.84GiB(85.28%) tps: 30,183 tflops: 418.74 mfu: 42.34% global_avg_ntp_loss: 0.2277 global_avg_mtp_loss: 1.7390 +[titan] 2025-10-05 22:58:01,598 - root - INFO - lr: 5.0014e-06 gnorm: 1.33 [1 day, 0:23:49< 0:05:08] +[titan] 2025-10-05 22:58:12,433 - root - INFO - step: 39865 loss: 1.8795 memory: 118.84GiB(85.28%) tps: 30,244 tflops: 419.59 mfu: 42.43% global_avg_ntp_loss: 0.2173 global_avg_mtp_loss: 1.6621 +[titan] 2025-10-05 22:58:12,433 - root - INFO - lr: 5.0013e-06 gnorm: 1.27 [1 day, 0:24:00< 0:04:57] +[titan] 2025-10-05 22:58:23,320 - root - INFO - step: 39870 loss: 1.8085 memory: 118.84GiB(85.28%) tps: 30,100 tflops: 417.59 mfu: 42.22% global_avg_ntp_loss: 0.2094 global_avg_mtp_loss: 1.5991 +[titan] 2025-10-05 22:58:23,320 - root - INFO - lr: 5.0012e-06 gnorm: 1.27 [1 day, 0:24:11< 0:04:46] +[titan] 2025-10-05 22:58:34,151 - root - INFO - step: 39875 loss: 1.9422 memory: 118.84GiB(85.28%) tps: 30,254 tflops: 419.73 mfu: 42.44% global_avg_ntp_loss: 0.2251 global_avg_mtp_loss: 1.7171 +[titan] 2025-10-05 22:58:34,151 - root - INFO - lr: 5.0011e-06 gnorm: 1.29 [1 day, 0:24:22< 0:04:35] +[titan] 2025-10-05 22:58:44,982 - root - INFO - step: 39880 loss: 1.8617 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2163 global_avg_mtp_loss: 1.6454 +[titan] 2025-10-05 22:58:44,982 - root - INFO - lr: 5.0010e-06 gnorm: 1.24 [1 day, 0:24:33< 0:04:24] +[titan] 2025-10-05 22:58:55,801 - root - INFO - step: 39885 loss: 1.9066 memory: 118.84GiB(85.28%) tps: 30,289 tflops: 420.21 mfu: 42.49% global_avg_ntp_loss: 0.2200 global_avg_mtp_loss: 1.6867 +[titan] 2025-10-05 22:58:55,801 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:43< 0:04:13] +[titan] 2025-10-05 22:59:06,655 - root - INFO - step: 39890 loss: 1.8466 memory: 118.84GiB(85.28%) tps: 30,191 tflops: 418.85 mfu: 42.35% global_avg_ntp_loss: 0.2132 global_avg_mtp_loss: 1.6333 +[titan] 2025-10-05 22:59:06,655 - root - INFO - lr: 5.0009e-06 gnorm: 1.23 [1 day, 0:24:54< 0:04:02] +[titan] 2025-10-05 22:59:17,499 - root - INFO - step: 39895 loss: 1.9303 memory: 118.84GiB(85.28%) tps: 30,219 tflops: 419.24 mfu: 42.39% global_avg_ntp_loss: 0.2233 global_avg_mtp_loss: 1.7070 +[titan] 2025-10-05 22:59:17,499 - root - INFO - lr: 5.0008e-06 gnorm: 1.27 [1 day, 0:25:05< 0:03:51] +[titan] 2025-10-05 22:59:26,208 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 22:59:28,385 - root - INFO - step: 39900 loss: 1.9225 memory: 118.84GiB(85.28%) tps: 30,103 tflops: 417.63 mfu: 42.23% global_avg_ntp_loss: 0.2224 global_avg_mtp_loss: 1.7001 +[titan] 2025-10-05 22:59:28,385 - root - INFO - lr: 5.0007e-06 gnorm: 1.29 [1 day, 0:25:16< 0:03:40] +[titan] 2025-10-05 22:59:39,223 - root - INFO - step: 39905 loss: 1.8920 memory: 118.84GiB(85.28%) tps: 30,236 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6730 +[titan] 2025-10-05 22:59:39,223 - root - INFO - lr: 5.0006e-06 gnorm: 1.25 [1 day, 0:25:27< 0:03:29] +[titan] 2025-10-05 22:59:50,050 - root - INFO - step: 39910 loss: 1.9026 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.46% global_avg_ntp_loss: 0.2202 global_avg_mtp_loss: 1.6824 +[titan] 2025-10-05 22:59:50,051 - root - INFO - lr: 5.0006e-06 gnorm: 1.33 [1 day, 0:25:38< 0:03:18] +[titan] 2025-10-05 23:00:00,881 - root - INFO - step: 39915 loss: 1.9651 memory: 118.84GiB(85.28%) tps: 30,255 tflops: 419.74 mfu: 42.44% global_avg_ntp_loss: 0.2285 global_avg_mtp_loss: 1.7366 +[titan] 2025-10-05 23:00:00,882 - root - INFO - lr: 5.0005e-06 gnorm: 1.25 [1 day, 0:25:49< 0:03:07] +[titan] 2025-10-05 23:00:11,722 - root - INFO - step: 39920 loss: 1.9075 memory: 118.84GiB(85.28%) tps: 30,229 tflops: 419.38 mfu: 42.40% global_avg_ntp_loss: 0.2227 global_avg_mtp_loss: 1.6848 +[titan] 2025-10-05 23:00:11,722 - root - INFO - lr: 5.0005e-06 gnorm: 1.23 [1 day, 0:25:59< 0:02:56] +[titan] 2025-10-05 23:00:22,583 - root - INFO - step: 39925 loss: 1.8682 memory: 118.84GiB(85.28%) tps: 30,171 tflops: 418.58 mfu: 42.32% global_avg_ntp_loss: 0.2164 global_avg_mtp_loss: 1.6518 +[titan] 2025-10-05 23:00:22,583 - root - INFO - lr: 5.0004e-06 gnorm: 1.24 [1 day, 0:26:10< 0:02:45] +[titan] 2025-10-05 23:00:33,459 - root - INFO - step: 39930 loss: 1.8937 memory: 118.84GiB(85.28%) tps: 30,129 tflops: 418.00 mfu: 42.26% global_avg_ntp_loss: 0.2197 global_avg_mtp_loss: 1.6740 +[titan] 2025-10-05 23:00:33,459 - root - INFO - lr: 5.0003e-06 gnorm: 1.28 [1 day, 0:26:21< 0:02:34] +[titan] 2025-10-05 23:00:44,397 - root - INFO - step: 39935 loss: 1.9686 memory: 118.84GiB(85.28%) tps: 29,959 tflops: 415.63 mfu: 42.03% global_avg_ntp_loss: 0.2279 global_avg_mtp_loss: 1.7407 +[titan] 2025-10-05 23:00:44,397 - root - INFO - lr: 5.0003e-06 gnorm: 1.37 [1 day, 0:26:32< 0:02:23] +[titan] 2025-10-05 23:00:46,762 - root - INFO - Dumping profiler traces at step 39936 +[titan] 2025-10-05 23:00:46,801 - root - INFO - Finished dumping profiler traces in 0.04 seconds +[titan] 2025-10-05 23:00:55,477 - root - INFO - step: 39940 loss: 1.9007 memory: 118.84GiB(85.28%) tps: 29,576 tflops: 410.32 mfu: 41.49% global_avg_ntp_loss: 0.2208 global_avg_mtp_loss: 1.6799 +[titan] 2025-10-05 23:00:55,477 - root - INFO - lr: 5.0003e-06 gnorm: 1.22 [1 day, 0:26:43< 0:02:12] +[titan] 2025-10-05 23:01:06,304 - root - INFO - step: 39945 loss: 1.9377 memory: 118.84GiB(85.28%) tps: 30,265 tflops: 419.88 mfu: 42.45% global_avg_ntp_loss: 0.2245 global_avg_mtp_loss: 1.7132 +[titan] 2025-10-05 23:01:06,305 - root - INFO - lr: 5.0002e-06 gnorm: 1.26 [1 day, 0:26:54< 0:02:01] +[titan] 2025-10-05 23:01:14,966 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:01:17,145 - root - INFO - step: 39950 loss: 1.8846 memory: 118.84GiB(85.28%) tps: 30,228 tflops: 419.37 mfu: 42.40% global_avg_ntp_loss: 0.2207 global_avg_mtp_loss: 1.6639 +[titan] 2025-10-05 23:01:17,145 - root - INFO - lr: 5.0002e-06 gnorm: 1.25 [1 day, 0:27:05< 0:01:50] +[titan] 2025-10-05 23:01:28,000 - root - INFO - step: 39955 loss: 1.8456 memory: 118.84GiB(85.28%) tps: 30,188 tflops: 418.81 mfu: 42.35% global_avg_ntp_loss: 0.2134 global_avg_mtp_loss: 1.6322 +[titan] 2025-10-05 23:01:28,000 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:16< 0:01:39] +[titan] 2025-10-05 23:01:38,823 - root - INFO - step: 39960 loss: 1.8985 memory: 118.84GiB(85.28%) tps: 30,278 tflops: 420.05 mfu: 42.47% global_avg_ntp_loss: 0.2196 global_avg_mtp_loss: 1.6789 +[titan] 2025-10-05 23:01:38,823 - root - INFO - lr: 5.0001e-06 gnorm: 1.26 [1 day, 0:27:26< 0:01:28] +[titan] 2025-10-05 23:01:49,702 - root - INFO - step: 39965 loss: 1.8650 memory: 118.84GiB(85.28%) tps: 30,121 tflops: 417.88 mfu: 42.25% global_avg_ntp_loss: 0.2152 global_avg_mtp_loss: 1.6498 +[titan] 2025-10-05 23:01:49,702 - root - INFO - lr: 5.0001e-06 gnorm: 1.29 [1 day, 0:27:37< 0:01:17] +[titan] 2025-10-05 23:02:00,536 - root - INFO - step: 39970 loss: 1.8845 memory: 118.84GiB(85.28%) tps: 30,247 tflops: 419.63 mfu: 42.43% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6655 +[titan] 2025-10-05 23:02:00,536 - root - INFO - lr: 5.0001e-06 gnorm: 1.25 [1 day, 0:27:48< 0:01:06] +[titan] 2025-10-05 23:02:11,385 - root - INFO - step: 39975 loss: 1.8798 memory: 118.84GiB(85.28%) tps: 30,204 tflops: 419.04 mfu: 42.37% global_avg_ntp_loss: 0.2167 global_avg_mtp_loss: 1.6632 +[titan] 2025-10-05 23:02:11,385 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:27:59< 0:00:55] +[titan] 2025-10-05 23:02:22,224 - root - INFO - step: 39980 loss: 1.9207 memory: 118.84GiB(85.28%) tps: 30,234 tflops: 419.44 mfu: 42.41% global_avg_ntp_loss: 0.2222 global_avg_mtp_loss: 1.6985 +[titan] 2025-10-05 23:02:22,224 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:10< 0:00:44] +[titan] 2025-10-05 23:02:33,062 - root - INFO - step: 39985 loss: 1.8577 memory: 118.84GiB(85.28%) tps: 30,235 tflops: 419.47 mfu: 42.41% global_avg_ntp_loss: 0.2154 global_avg_mtp_loss: 1.6423 +[titan] 2025-10-05 23:02:33,062 - root - INFO - lr: 5.0000e-06 gnorm: 1.26 [1 day, 0:28:21< 0:00:33] +[titan] 2025-10-05 23:02:43,924 - root - INFO - step: 39990 loss: 1.9469 memory: 118.84GiB(85.28%) tps: 30,169 tflops: 418.55 mfu: 42.32% global_avg_ntp_loss: 0.2257 global_avg_mtp_loss: 1.7212 +[titan] 2025-10-05 23:02:43,924 - root - INFO - lr: 5.0000e-06 gnorm: 1.29 [1 day, 0:28:32< 0:00:22] +[titan] 2025-10-05 23:02:54,801 - root - INFO - step: 39995 loss: 1.8720 memory: 118.84GiB(85.28%) tps: 30,127 tflops: 417.97 mfu: 42.26% global_avg_ntp_loss: 0.2160 global_avg_mtp_loss: 1.6560 +[titan] 2025-10-05 23:02:54,801 - root - INFO - lr: 5.0000e-06 gnorm: 1.27 [1 day, 0:28:42< 0:00:11] +[titan] 2025-10-05 23:03:03,471 - root - INFO - [GC] Peforming periodical GC collection. 0.00 seconds. +[titan] 2025-10-05 23:03:05,651 - root - INFO - step: 40000 loss: 1.8942 memory: 118.84GiB(85.28%) tps: 30,203 tflops: 419.02 mfu: 42.37% global_avg_ntp_loss: 0.2190 global_avg_mtp_loss: 1.6752 +[titan] 2025-10-05 23:03:05,651 - root - INFO - lr: 5.0000e-06 gnorm: 1.24 [1 day, 0:28:53< 0:00:00] +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving the checkpoint (or staging if async is enabled). +[titan] 2025-10-05 23:03:05,651 - root - INFO - Saving a full checkpoint at last step, step 40000. +[titan] 2025-10-05 23:03:23,713 - root - INFO - [GC] GC collection invoked by checkpointer. 0.02 seconds. +[titan] 2025-10-05 23:03:23,713 - root - INFO - Finished saving the checkpoint (or staging if async is enabled)in 18.06 seconds. +[titan] 2025-10-05 23:03:23,713 - root - INFO - Training completed diff --git a/torchtitan/components/__pycache__/float8.cpython-312.pyc b/torchtitan/components/__pycache__/float8.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a820299f851ef68e89fcda6c85c6865f2099b51e Binary files /dev/null and b/torchtitan/components/__pycache__/float8.cpython-312.pyc differ diff --git a/torchtitan/components/__pycache__/metrics.cpython-312.pyc b/torchtitan/components/__pycache__/metrics.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c51d10c1fa99dd9f0ac0b17b22d7c8138e179d0 Binary files /dev/null and b/torchtitan/components/__pycache__/metrics.cpython-312.pyc differ diff --git a/torchtitan/components/__pycache__/optimizer.cpython-312.pyc b/torchtitan/components/__pycache__/optimizer.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cba975b4f64dceea1eeec0d6e7dbca54a971eb55 Binary files /dev/null and b/torchtitan/components/__pycache__/optimizer.cpython-312.pyc differ diff --git a/torchtitan/components/lr_scheduler.py b/torchtitan/components/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..0a673ca89f4c1970a777c6009624925fc1aa5827 --- /dev/null +++ b/torchtitan/components/lr_scheduler.py @@ -0,0 +1,174 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import functools +import math +from typing import Any, Callable, Iterator + +from torch.distributed.checkpoint.stateful import Stateful +from torch.optim.lr_scheduler import LambdaLR, LRScheduler + +from torchtitan.components.optimizer import OptimizersContainer +from torchtitan.config_manager import JobConfig +from torchtitan.tools.logging import logger + +__all__ = [ + "LRSchedulersContainer", + "build_lr_schedulers", +] + + +class LRSchedulersContainer(Stateful): + """Container for multiple learning rate schedulers. + + This class is used to wrap multiple LRSchedulers into a single object that can be + used to reduce the complexity of the training loop. This mimics the behavior of + ``torch.optim.lr_scheduler.LRScheduler``. The design concept is the same as + ``OptimizersContainer``. This class currently only supports ``LambdaLR``. + + **Note** + Users who want to customize the lr_scheduler behavior can inherit from this class and + extend the functionality as needed. The following methods must follow the same + signature as ``torch.optim.lr_scheduler.LRScheduler`` class: ``step()``, ``state_dict()``, + ``load_state_dict()``. + + **Limitations** + This class assumes all the lr schedulers are the same. There is no easy way to support + resharding for multiple different LRSchedulers because LRScheduler.state_dict() is not + resharding friendly. Therefore, the limitation is used to allow TorchTitan to support + lr scheduler resharding. + + Args: + optimizers (OptimizersContainer): The corresponding optimizers for the lr_schedulers. + """ + + schedulers: list[LRScheduler] + + def __init__(self, optimizers: OptimizersContainer, lr_lambda: Callable) -> None: + assert ( + len(optimizers) > 0 + ), "Must have at least one optimizer to create LRScheduler" + + self.schedulers = [LambdaLR(optimizer, lr_lambda) for optimizer in optimizers] + + def __iter__(self) -> Iterator[LRScheduler]: + return iter(self.schedulers) + + def __len__(self) -> int: + return len(self.schedulers) + + def step(self) -> None: + for scheduler in self.schedulers: + scheduler.step() + + def state_dict(self) -> dict[str, Any]: + # While there may be multiple schedulers, we only save the first one because + # the state_dict is the same for all. See the limitations section in the + # docstring. + return self.schedulers[0].state_dict() + + def load_state_dict(self, state_dict: dict[str, Any]) -> None: + # Load the same state_dict for all schedulers. The key value we're concerned + # within ``LRScheduler.state_dict()`` is ``last_epoch``, which is an integer + # that is immutable. As long as ``training.steps`` and ``lr_scheduler.warmup_steps`` + # in ``job_config`` remain unchanged when resuming from a checkpoint, this + # approach is safe. We call ``copy()`` here to ensure extra safety. + for scheduler in self.schedulers: + scheduler.load_state_dict(copy.deepcopy(state_dict)) + + +def build_lr_schedulers( + optimizers: OptimizersContainer, job_config: JobConfig +) -> LRSchedulersContainer: + """Create a LRSchedulerContainer for the given optimizers and job config. + + This function creates a ``LRSchedulersContainer`` for the given optimizers. + ``job_config`` should define the correct lr scheduler parameters. + + **Note** + Users who want to customize the lr scheduler behavior can create their own + ``LRSchedulersContainer`` subclass and ``build_lr_scheduler``. Passing the + customized ``build_lr_schedulers`` to ``TrainSpec`` will create the customized + ``LRSchedulersContainer``. + + + Args: + optimizers (OptimizersContainer): The corresponding optimizers for the + lr_schedulers. + """ + training_steps = job_config.training.steps + warmup_steps = int(job_config.lr_scheduler.warmup_steps) + if job_config.lr_scheduler.decay_ratio is not None: + decay_steps = round(training_steps * job_config.lr_scheduler.decay_ratio) + if warmup_steps + decay_steps > training_steps: + logger.warning( + f"Warmup ({warmup_steps}) + decay ({decay_steps}) steps exceed " + f"total training steps ({training_steps}). " + f"Adjusting decay steps to {training_steps - warmup_steps}." + ) + decay_steps = training_steps - warmup_steps + else: + decay_steps = training_steps - warmup_steps + stable_steps = training_steps - warmup_steps - decay_steps + lr_decay_type = job_config.lr_scheduler.decay_type + lr_min = job_config.lr_scheduler.lr_min + + def linear_warmup_stable_decay( + current_step: int, + warmup_steps: int, + stable_steps: int, + decay_steps: int, + lr_decay_type: str, + lr_min: float, + ): + """ + Computes linear warmup followed by stable learning rate for a while, + then some type of decay. + + Per LambdaLR requirement, this is accomplished by returning + a multiplicative factor `curr_adjustment` ranging from 1 to 0 + to adjust the learning rate to create the desired schedule. + + We offer three types of learning rate decay schedules: + 1. `linear`: decays linearly from 1 to 0 over the decay period. + 2. `sqrt`: decays as 1 minus the square root of the decay progress. + 3. `cosine`: follows a cosine curve, decaying according to the values of the half-period of the cosine function. + + If `lr_min` is specified, the decay range is scaled from 1 to `lr_min` + to ensure the learning rate does not drop below this minimum value. + """ + warmup_stable_steps = warmup_steps + stable_steps + if current_step < warmup_steps: + # linear warmup + # 0-indexed step, hence + 1 adjustments + current_step += 1 + curr_adjustment = float(current_step / (warmup_steps + 1)) + elif current_step < warmup_stable_steps: + curr_adjustment = 1.0 + else: + # 0-indexed step, hence + 1 adjustments + current_step += 1 + progress = float(current_step - warmup_stable_steps) / (decay_steps + 1) + + if lr_decay_type == "linear": + curr_adjustment = 1 - progress + elif lr_decay_type == "sqrt": + curr_adjustment = 1 - math.sqrt(progress) + elif lr_decay_type == "cosine": + curr_adjustment = 0.5 * (1.0 + math.cos(math.pi * progress)) + curr_adjustment = lr_min + (1 - lr_min) * curr_adjustment + return curr_adjustment + + lr_lambda = functools.partial( + linear_warmup_stable_decay, + warmup_steps=warmup_steps, + stable_steps=stable_steps, + decay_steps=decay_steps, + lr_decay_type=lr_decay_type, + lr_min=lr_min, + ) + return LRSchedulersContainer(optimizers, lr_lambda) diff --git a/torchtitan/datasets/tokenizer/tiktoken.py b/torchtitan/datasets/tokenizer/tiktoken.py new file mode 100644 index 0000000000000000000000000000000000000000..401757a93e6b598a6a3a60c4ca934ea0427f25a4 --- /dev/null +++ b/torchtitan/datasets/tokenizer/tiktoken.py @@ -0,0 +1,190 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. + +import os +from collections.abc import Collection, Iterator, Sequence, Set as AbstractSet +from pathlib import Path +from typing import cast, Literal + +import tiktoken +from tiktoken.load import load_tiktoken_bpe + +from torchtitan.components.tokenizer import Tokenizer +from torchtitan.config_manager import JobConfig +from torchtitan.tools.logging import logger + + +class TikTokenizer(Tokenizer): + """ + Tokenizing and encoding/decoding text using the Tiktoken tokenizer. + + Args: + model_path (str): The path to the Tiktoken model file. + """ + + special_tokens: dict[str, int] + + num_reserved_special_tokens = 256 + + pat_str = r"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+" # noqa: E501, B950 + + def __init__(self, model_path: str): + super().__init__() + assert os.path.exists( + model_path + ), f"The tokenizer path does not exist: {model_path}" + assert os.path.isfile(model_path), model_path + + mergeable_ranks = load_tiktoken_bpe(model_path) + num_base_tokens = len(mergeable_ranks) + special_tokens = [ + "<|begin_of_text|>", + "<|end_of_text|>", + "<|reserved_special_token_0|>", + "<|reserved_special_token_1|>", + "<|reserved_special_token_2|>", + "<|reserved_special_token_3|>", + "<|start_header_id|>", + "<|end_header_id|>", + "<|reserved_special_token_4|>", + "<|eot_id|>", # end of turn + ] + [ + f"<|reserved_special_token_{i}|>" + for i in range(5, self.num_reserved_special_tokens - 5) + ] + self.special_tokens = { + token: num_base_tokens + i for i, token in enumerate(special_tokens) + } + self.model = tiktoken.Encoding( + name=Path(model_path).name, + pat_str=self.pat_str, + mergeable_ranks=mergeable_ranks, + special_tokens=self.special_tokens, + ) + + self._n_words: int = self.model.n_vocab + # BOS / EOS token IDs + self.bos_id: int = self.special_tokens["<|begin_of_text|>"] + self.eos_id: int = self.special_tokens["<|end_of_text|>"] + self.pad_id: int = -1 + self.stop_tokens = { + self.special_tokens["<|end_of_text|>"], + self.special_tokens["<|eot_id|>"], + } + logger.info( + f"TikTokenizer built: #words {self.n_words}, BOS ID {self.bos_id}, EOS ID {self.eos_id}" + ) + + def encode( + self, + s: str, + *, + bos: bool, + eos: bool, + allowed_special: Literal["all"] | AbstractSet[str] | None = None, + disallowed_special: Literal["all"] | Collection[str] | None = None, + ) -> list[int]: + """ + Encodes a string into a list of token IDs. + + Args: + s (str): The input string to be encoded. + bos (bool): Whether to prepend the beginning-of-sequence token. + eos (bool): Whether to append the end-of-sequence token. + allowed_tokens ("all"|set[str]): allowed special tokens in string + disallowed_tokens ("all"|set[str]): special tokens that raise an error when in string + + Returns: + list[int]: A list of token IDs. + + By default, setting disallowed_special=() encodes a string by ignoring + special tokens. Specifically: + - Setting `disallowed_special` to () will cause all text corresponding + to special tokens to be encoded as natural text (insteading of raising + an error). + - Setting `allowed_special` to "all" will treat all text corresponding + to special tokens to be encoded as special tokens. + """ + assert type(s) is str + allowed_special = allowed_special or set() + disallowed_special = disallowed_special or () + + # The tiktoken tokenizer can handle <=400k chars without + # pyo3_runtime.PanicException. + TIKTOKEN_MAX_ENCODE_CHARS = 400_000 + + # https://github.com/openai/tiktoken/issues/195 + # Here we iterate over subsequences and split if we exceed the limit + # of max consecutive non-whitespace or whitespace characters. + MAX_NO_WHITESPACES_CHARS = 25_000 + + substrs = ( + substr + for i in range(0, len(s), TIKTOKEN_MAX_ENCODE_CHARS) + for substr in self._split_whitespaces_or_nonwhitespaces( + s[i : i + TIKTOKEN_MAX_ENCODE_CHARS], MAX_NO_WHITESPACES_CHARS + ) + ) + t: list[int] = [] + for substr in substrs: + t.extend( + self.model.encode( + substr, + allowed_special=allowed_special, + disallowed_special=disallowed_special, + ) + ) + if bos: + t.insert(0, self.bos_id) + if eos: + t.append(self.eos_id) + return t + + def decode(self, t: Sequence[int]) -> str: + """ + Decodes a list of token IDs into a string. + + Args: + t (List[int]): The list of token IDs to be decoded. + + Returns: + str: The decoded string. + """ + # Typecast is safe here. Tiktoken doesn't do anything list-related with the sequence. + return self.model.decode(cast(list[int], t)) + + @staticmethod + def _split_whitespaces_or_nonwhitespaces( + s: str, max_consecutive_slice_len: int + ) -> Iterator[str]: + """ + Splits the string `s` so that each substring contains no more than `max_consecutive_slice_len` + consecutive whitespaces or consecutive non-whitespaces. + """ + current_slice_len = 0 + current_slice_is_space = s[0].isspace() if len(s) > 0 else False + slice_start = 0 + + for i in range(len(s)): + is_now_space = s[i].isspace() + + if current_slice_is_space ^ is_now_space: + current_slice_len = 1 + current_slice_is_space = is_now_space + else: + current_slice_len += 1 + if current_slice_len > max_consecutive_slice_len: + yield s[slice_start:i] + slice_start = i + current_slice_len = 1 + yield s[slice_start:] + + +def build_tiktoken_tokenizer(job_config: JobConfig) -> TikTokenizer: + return TikTokenizer(job_config.model.tokenizer_path) diff --git a/torchtitan/experiments/flux/__init__.py b/torchtitan/experiments/flux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..314a8689b291c74db639669e7bc4943612b47a03 --- /dev/null +++ b/torchtitan/experiments/flux/__init__.py @@ -0,0 +1,122 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Copyright (c) Meta Platforms, Inc. All Rights Reserved. + +from torchtitan.components.lr_scheduler import build_lr_schedulers +from torchtitan.components.optimizer import build_optimizers +from torchtitan.experiments.flux.dataset.flux_dataset import build_flux_dataloader +from torchtitan.experiments.flux.loss import build_mse_loss +from torchtitan.experiments.flux.model.autoencoder import AutoEncoderParams +from torchtitan.experiments.flux.parallelize_flux import parallelize_flux +from torchtitan.protocols.train_spec import register_train_spec, TrainSpec + +from .model.model import FluxModel, FluxModelArgs + +__all__ = [ + "FluxModelArgs", + "FluxModel", + "flux_configs", + "parallelize_flux", +] + + +flux_configs = { + "flux-dev": FluxModelArgs( + in_channels=64, + out_channels=64, + vec_in_dim=768, + context_in_dim=512, + hidden_size=3072, + mlp_ratio=4.0, + num_heads=24, + depth=19, + depth_single_blocks=38, + axes_dim=(16, 56, 56), + theta=10_000, + qkv_bias=True, + guidance_embed=True, + autoencoder_params=AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=(1, 2, 4, 4), + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, + ), + ), + "flux-schnell": FluxModelArgs( + in_channels=64, + out_channels=64, + vec_in_dim=768, + context_in_dim=4096, + hidden_size=3072, + mlp_ratio=4.0, + num_heads=24, + depth=19, + depth_single_blocks=38, + axes_dim=(16, 56, 56), + theta=10_000, + qkv_bias=True, + guidance_embed=False, + autoencoder_params=AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=(1, 2, 4, 4), + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, + ), + ), + "flux-debug": FluxModelArgs( + in_channels=64, + out_channels=64, + vec_in_dim=768, + context_in_dim=512, + hidden_size=512, + mlp_ratio=4.0, + num_heads=4, + depth=2, + depth_single_blocks=2, + axes_dim=(16, 56, 56), + theta=10_000, + qkv_bias=True, + guidance_embed=True, + autoencoder_params=AutoEncoderParams( + resolution=256, + in_channels=3, + ch=128, + out_ch=3, + ch_mult=(1, 2, 4, 4), + num_res_blocks=2, + z_channels=16, + scale_factor=0.3611, + shift_factor=0.1159, + ), + ), +} + + +register_train_spec( + TrainSpec( + name="flux", + cls=FluxModel, + config=flux_configs, + parallelize_fn=parallelize_flux, + pipelining_fn=None, + build_optimizers_fn=build_optimizers, + build_lr_schedulers_fn=build_lr_schedulers, + build_dataloader_fn=build_flux_dataloader, + build_tokenizer_fn=None, + build_loss_fn=build_mse_loss, + ) +) diff --git a/torchtitan/experiments/flux/dataset/flux_dataset.py b/torchtitan/experiments/flux/dataset/flux_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..995f0af3b4152052bcfb21b4331e8dcff8ddd7da --- /dev/null +++ b/torchtitan/experiments/flux/dataset/flux_dataset.py @@ -0,0 +1,267 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import math +import random +from dataclasses import dataclass +from typing import Any, Callable, Optional + +import numpy as np + +import torch + +from datasets import Dataset, load_dataset +from datasets.distributed import split_dataset_by_node +from PIL import Image + +from torch.distributed.checkpoint.stateful import Stateful + +from torch.utils.data import IterableDataset +from torchtitan.components.dataloader import ParallelAwareDataloader + +from torchtitan.config_manager import JobConfig +from torchtitan.experiments.flux.dataset.tokenizer import FluxTokenizer +from torchtitan.tools.logging import logger + + +def _process_cc12m_image( + img: Image.Image, + output_size: int = 256, +) -> Optional[torch.Tensor]: + """Process CC12M image to the desired size.""" + + width, height = img.size + # Skip low resolution images + if width < output_size or height < output_size: + return None + + if width >= height: + # resize height to be equal to output_size, then crop + new_width, new_height = math.ceil(output_size / height * width), output_size + img = img.resize((new_width, new_height)) + left = random.randint(0, new_width - output_size) + resized_img = img.crop((left, 0, left + output_size, output_size)) + else: + # resize width to be equal to output_size, the crop + new_width, new_height = ( + output_size, + math.ceil(output_size / width * height), + ) + img = img.resize((new_width, new_height)) + lower = random.randint(0, new_width - output_size) + resized_img = img.crop((0, lower, output_size, lower + output_size)) + + assert resized_img.size[0] == resized_img.size[1] == output_size + + # Skip grayscale images + if resized_img.mode == "L": + return None + + np_img = np.array(resized_img).transpose((2, 0, 1)) + tensor_img = torch.tensor(np_img).float() / 255.0 + + # NOTE: The following commented code is an alternative way + # img_transform = transforms.Compose( + # [ + # transforms.Resize(max(output_size, output_size)), + # transforms.CenterCrop((output_size, output_size)), + # transforms.ToTensor(), + # ] + # ) + # tensor_img = img_transform(img) + + return tensor_img + + +def _flux_data_processor( + sample: dict[str, Any], + t5_tokenizer: FluxTokenizer, + clip_tokenizer: FluxTokenizer, + output_size: int = 256, +) -> dict[str, Any]: + """ + Preprocess CC12M dataset sample image and text for Flux model. + + Args: + sample: A sample from dataset + t5_encoder: T5 encoder + clip_encoder: CLIP encoder + output_size: The output image size + + """ + img = _process_cc12m_image(sample["jpg"], output_size=output_size) + t5_tokens = t5_tokenizer.encode(sample["txt"]) + clip_tokens = clip_tokenizer.encode(sample["txt"]) + + return { + "image": img, + "clip_tokens": clip_tokens, # type: List[int] + "t5_tokens": t5_tokens, # type: List[int] + } + + +@dataclass +class TextToImageDatasetConfig: + path: str + loader: Callable + data_processor: Callable + + +DATASETS = { + "cc12m": TextToImageDatasetConfig( + path="pixparse/cc12m-wds", + loader=lambda path: load_dataset(path, split="train", streaming=True), + data_processor=_flux_data_processor, + ), +} + + +def _validate_dataset( + dataset_name: str, dataset_path: Optional[str] = None +) -> tuple[str, Callable, Callable]: + """Validate dataset name and path.""" + if dataset_name not in DATASETS: + raise ValueError( + f"Dataset {dataset_name} is not supported. " + f"Supported datasets are: {list(DATASETS.keys())}" + ) + + config = DATASETS[dataset_name] + path = dataset_path or config.path + logger.info(f"Preparing {dataset_name} dataset from {path}") + return path, config.loader, config.data_processor + + +class FluxDataset(IterableDataset, Stateful): + """Dataset for FLUX text-to-image model. + + Args: + dataset_name (str): Name of the dataset. + dataset_path (str): Path to the dataset. + model_transform (Transform): Callable that applies model-specific preprocessing to the sample. + dp_rank (int): Data parallel rank. + dp_world_size (int): Data parallel world size. + infinite (bool): Whether to loop over the dataset infinitely. + """ + + def __init__( + self, + dataset_name: str, + dataset_path: Optional[str], + t5_tokenizer: FluxTokenizer, + clip_tokenizer: FluxTokenizer, + job_config: Optional[JobConfig] = None, + dp_rank: int = 0, + dp_world_size: int = 1, + infinite: bool = False, + ) -> None: + + # Force lowercase for consistent comparison + dataset_name = dataset_name.lower() + + path, dataset_loader, data_processor = _validate_dataset( + dataset_name, dataset_path + ) + ds = dataset_loader(path) + + self.dataset_name = dataset_name + self._data = split_dataset_by_node(ds, dp_rank, dp_world_size) + + self._t5_tokenizer = t5_tokenizer + self._clip_tokenizer = clip_tokenizer + self._data_processor = data_processor + self.job_config = job_config + + self.infinite = infinite + + # Variables for checkpointing + self._sample_idx = 0 + self._all_samples: list[dict[str, Any]] = [] + + def _get_data_iter(self): + if isinstance(self._data, Dataset) and self._sample_idx == len(self._data): + return iter([]) + + it = iter(self._data) + for _ in range(self._sample_idx): + next(it) + return it + + def __iter__(self): + while True: + for sample in self._get_data_iter(): + # Use the dataset-specific preprocessor + sample_dict = self._data_processor( + sample, self._t5_tokenizer, self._clip_tokenizer, output_size=256 + ) + + # skip low quality image or image with color channel = 1 + if sample_dict["image"] is None: + logger.warning( + f"Low quality image {sample['__key__']} is skipped in Flux Dataloader" + ) + continue + + self._all_samples.extend(sample_dict) + self._sample_idx += 1 + + labels = sample_dict.pop("image") + yield sample_dict, labels + + if not self.infinite: + logger.warning(f"Dataset {self.dataset_name} has run out of data") + break + else: + # Reset offset for the next iteration + self._sample_idx = 0 + logger.warning(f"Dataset {self.dataset_name} is being re-looped") + + def load_state_dict(self, state_dict): + self._sample_idx = state_dict["sample_idx"] + self._all_samples = state_dict["all_samples"] + + def state_dict(self): + return { + "all_samples": self._all_samples, + "sample_idx": self._sample_idx, + } + + +def build_flux_dataloader( + dp_world_size: int, + dp_rank: int, + job_config: JobConfig, + # This parameter is not used, keep it for compatibility + tokenizer: FluxTokenizer | None, + infinite: bool = True, +) -> ParallelAwareDataloader: + """Build a data loader for HuggingFace datasets.""" + dataset_name = job_config.training.dataset + dataset_path = job_config.training.dataset_path + batch_size = job_config.training.batch_size + + t5_encoder_name = job_config.encoder.t5_encoder + clip_encoder_name = job_config.encoder.clip_encoder + max_t5_encoding_len = job_config.encoder.max_t5_encoding_len + + ds = FluxDataset( + dataset_name=dataset_name, + dataset_path=dataset_path, + t5_tokenizer=FluxTokenizer(t5_encoder_name, max_length=max_t5_encoding_len), + clip_tokenizer=FluxTokenizer( + clip_encoder_name, max_length=77 + ), # fix max_length for CLIP + dp_rank=dp_rank, + dp_world_size=dp_world_size, + infinite=infinite, + ) + + return ParallelAwareDataloader( + dataset=ds, + dp_rank=dp_rank, + dp_world_size=dp_world_size, + batch_size=batch_size, + ) diff --git a/torchtitan/experiments/flux/dataset/tokenizer.py b/torchtitan/experiments/flux/dataset/tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..090bfc955152d87614f03793fd606330995da39d --- /dev/null +++ b/torchtitan/experiments/flux/dataset/tokenizer.py @@ -0,0 +1,64 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. + + +from typing import List + +from torchtitan.components.tokenizer import Tokenizer +from transformers import CLIPTokenizer, T5Tokenizer + + +class FluxTokenizer(Tokenizer): + """ + Tokenizing and encoding/decoding text using the T5 or Clip tokenizer. + + Args: + model_path (str): Path to the tokenzier from hugging face. + + """ + + def __init__(self, model_path: str = "t5-small", max_length: int = 77): + super().__init__() + self._n_words = 8 # TODO(jianiw): check + self._max_length = max_length + + self.is_clip = model_path.startswith("openai") + + if self.is_clip: + self._tokenizer: CLIPTokenizer = CLIPTokenizer.from_pretrained( + model_path, max_length=max_length + ) + else: + self._tokenizer: T5Tokenizer = T5Tokenizer.from_pretrained( + model_path, max_length=max_length + ) + + def encode( + self, + s: str, + ) -> List[int]: + """ + Encode the prompt text into tokens. + """ + tokens = self._tokenizer( + s, + truncation=True, + max_length=self._max_length, + return_length=False, + return_overflowing_tokens=False, + padding="max_length", + return_tensors="pt", # return pytorch tensors, default return List[int] + )["input_ids"] + return tokens + + def decode(self, t: List[int]) -> str: + """ + Decode function. This function will not be called. + """ + return self._tokenizer.decode(t) diff --git a/torchtitan/experiments/flux/model/autoencoder.py b/torchtitan/experiments/flux/model/autoencoder.py new file mode 100644 index 0000000000000000000000000000000000000000..a68d5fb750d04b37d059dbef1de1f399bd3caea2 --- /dev/null +++ b/torchtitan/experiments/flux/model/autoencoder.py @@ -0,0 +1,388 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +from dataclasses import dataclass + +import torch +from einops import rearrange +from safetensors.torch import load_file as load_sft +from torch import nn, Tensor + + +@dataclass +class AutoEncoderParams: + resolution: int = 256 + in_channels: int = 3 + ch: int = 128 + out_ch: int = 3 + ch_mult: tuple[int] = (1, 2, 4, 4) + num_res_blocks: int = 2 + z_channels: int = 16 + scale_factor: float = 0.3611 + shift_factor: float = 0.1159 + + +def swish(x: Tensor) -> Tensor: + return x * torch.sigmoid(x) + + +class AttnBlock(nn.Module): + def __init__(self, in_channels: int): + super().__init__() + self.in_channels = in_channels + + self.norm = nn.GroupNorm( + num_groups=32, num_channels=in_channels, eps=1e-6, affine=True + ) + + self.q = nn.Conv2d(in_channels, in_channels, kernel_size=1) + self.k = nn.Conv2d(in_channels, in_channels, kernel_size=1) + self.v = nn.Conv2d(in_channels, in_channels, kernel_size=1) + self.proj_out = nn.Conv2d(in_channels, in_channels, kernel_size=1) + + def attention(self, h_: Tensor) -> Tensor: + h_ = self.norm(h_) + q = self.q(h_) + k = self.k(h_) + v = self.v(h_) + + b, c, h, w = q.shape + q = rearrange(q, "b c h w -> b 1 (h w) c").contiguous() + k = rearrange(k, "b c h w -> b 1 (h w) c").contiguous() + v = rearrange(v, "b c h w -> b 1 (h w) c").contiguous() + h_ = nn.functional.scaled_dot_product_attention(q, k, v) + + return rearrange(h_, "b 1 (h w) c -> b c h w", h=h, w=w, c=c, b=b) + + def forward(self, x: Tensor) -> Tensor: + return x + self.proj_out(self.attention(x)) + + +class ResnetBlock(nn.Module): + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + + self.norm1 = nn.GroupNorm( + num_groups=32, num_channels=in_channels, eps=1e-6, affine=True + ) + self.conv1 = nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + self.norm2 = nn.GroupNorm( + num_groups=32, num_channels=out_channels, eps=1e-6, affine=True + ) + self.conv2 = nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1 + ) + if self.in_channels != self.out_channels: + self.nin_shortcut = nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=1, padding=0 + ) + + def forward(self, x): + h = x + h = self.norm1(h) + h = swish(h) + h = self.conv1(h) + + h = self.norm2(h) + h = swish(h) + h = self.conv2(h) + + if self.in_channels != self.out_channels: + x = self.nin_shortcut(x) + + return x + h + + +class Downsample(nn.Module): + def __init__(self, in_channels: int): + super().__init__() + # no asymmetric padding in torch conv, must do it ourselves + self.conv = nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=2, padding=0 + ) + + def forward(self, x: Tensor): + pad = (0, 1, 0, 1) + x = nn.functional.pad(x, pad, mode="constant", value=0) + x = self.conv(x) + return x + + +class Upsample(nn.Module): + def __init__(self, in_channels: int): + super().__init__() + self.conv = nn.Conv2d( + in_channels, in_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x: Tensor): + x = nn.functional.interpolate(x, scale_factor=2.0, mode="nearest") + x = self.conv(x) + return x + + +class Encoder(nn.Module): + def __init__( + self, + resolution: int, + in_channels: int, + ch: int, + ch_mult: list[int], + num_res_blocks: int, + z_channels: int, + ): + super().__init__() + self.ch = ch + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + # downsampling + self.conv_in = nn.Conv2d( + in_channels, self.ch, kernel_size=3, stride=1, padding=1 + ) + + curr_res = resolution + in_ch_mult = (1,) + tuple(ch_mult) + self.in_ch_mult = in_ch_mult + self.down = nn.ModuleList() + block_in = self.ch + for i_level in range(self.num_resolutions): + block = nn.ModuleList() + attn = nn.ModuleList() + block_in = ch * in_ch_mult[i_level] + block_out = ch * ch_mult[i_level] + for _ in range(self.num_res_blocks): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out)) + block_in = block_out + down = nn.Module() + down.block = block + down.attn = attn + if i_level != self.num_resolutions - 1: + down.downsample = Downsample(block_in) + curr_res = curr_res // 2 + self.down.append(down) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in) + self.mid.attn_1 = AttnBlock(block_in) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in) + + # end + self.norm_out = nn.GroupNorm( + num_groups=32, num_channels=block_in, eps=1e-6, affine=True + ) + self.conv_out = nn.Conv2d( + block_in, 2 * z_channels, kernel_size=3, stride=1, padding=1 + ) + + def forward(self, x: Tensor) -> Tensor: + # downsampling + hs = [self.conv_in(x)] + for i_level in range(self.num_resolutions): + for i_block in range(self.num_res_blocks): + h = self.down[i_level].block[i_block](hs[-1]) + if len(self.down[i_level].attn) > 0: + h = self.down[i_level].attn[i_block](h) + hs.append(h) + if i_level != self.num_resolutions - 1: + hs.append(self.down[i_level].downsample(hs[-1])) + + # middle + h = hs[-1] + h = self.mid.block_1(h) + h = self.mid.attn_1(h) + h = self.mid.block_2(h) + # end + h = self.norm_out(h) + h = swish(h) + h = self.conv_out(h) + return h + + +class Decoder(nn.Module): + def __init__( + self, + ch: int, + out_ch: int, + ch_mult: list[int], + num_res_blocks: int, + in_channels: int, + resolution: int, + z_channels: int, + ): + super().__init__() + self.ch = ch + self.num_resolutions = len(ch_mult) + self.num_res_blocks = num_res_blocks + self.resolution = resolution + self.in_channels = in_channels + self.ffactor = 2 ** (self.num_resolutions - 1) + + # compute in_ch_mult, block_in and curr_res at lowest res + block_in = ch * ch_mult[self.num_resolutions - 1] + curr_res = resolution // 2 ** (self.num_resolutions - 1) + self.z_shape = (1, z_channels, curr_res, curr_res) + + # z to block_in + self.conv_in = nn.Conv2d( + z_channels, block_in, kernel_size=3, stride=1, padding=1 + ) + + # middle + self.mid = nn.Module() + self.mid.block_1 = ResnetBlock(in_channels=block_in, out_channels=block_in) + self.mid.attn_1 = AttnBlock(block_in) + self.mid.block_2 = ResnetBlock(in_channels=block_in, out_channels=block_in) + + # upsampling + self.up = nn.ModuleList() + for i_level in reversed(range(self.num_resolutions)): + block = nn.ModuleList() + attn = nn.ModuleList() + block_out = ch * ch_mult[i_level] + for _ in range(self.num_res_blocks + 1): + block.append(ResnetBlock(in_channels=block_in, out_channels=block_out)) + block_in = block_out + up = nn.Module() + up.block = block + up.attn = attn + if i_level != 0: + up.upsample = Upsample(block_in) + curr_res = curr_res * 2 + self.up.insert(0, up) # prepend to get consistent order + + # end + self.norm_out = nn.GroupNorm( + num_groups=32, num_channels=block_in, eps=1e-6, affine=True + ) + self.conv_out = nn.Conv2d(block_in, out_ch, kernel_size=3, stride=1, padding=1) + + def forward(self, z: Tensor) -> Tensor: + # get dtype for proper tracing + upscale_dtype = next(self.up.parameters()).dtype + + # z to block_in + h = self.conv_in(z) + + # middle + h = self.mid.block_1(h) + h = self.mid.attn_1(h) + h = self.mid.block_2(h) + + # cast to proper dtype + h = h.to(upscale_dtype) + # upsampling + for i_level in reversed(range(self.num_resolutions)): + for i_block in range(self.num_res_blocks + 1): + h = self.up[i_level].block[i_block](h) + if len(self.up[i_level].attn) > 0: + h = self.up[i_level].attn[i_block](h) + if i_level != 0: + h = self.up[i_level].upsample(h) + + # end + h = self.norm_out(h) + h = swish(h) + h = self.conv_out(h) + return h + + +class DiagonalGaussian(nn.Module): + def __init__(self, sample: bool = True, chunk_dim: int = 1): + super().__init__() + self.sample = sample + self.chunk_dim = chunk_dim + + def forward(self, z: Tensor) -> Tensor: + mean, logvar = torch.chunk(z, 2, dim=self.chunk_dim) + if self.sample: + std = torch.exp(0.5 * logvar) + return mean + std * torch.randn_like(mean) + else: + return mean + + +class AutoEncoder(nn.Module): + def __init__(self, params: AutoEncoderParams): + super().__init__() + self.params = params + self.encoder = Encoder( + resolution=params.resolution, + in_channels=params.in_channels, + ch=params.ch, + ch_mult=params.ch_mult, + num_res_blocks=params.num_res_blocks, + z_channels=params.z_channels, + ) + self.decoder = Decoder( + resolution=params.resolution, + in_channels=params.in_channels, + ch=params.ch, + out_ch=params.out_ch, + ch_mult=params.ch_mult, + num_res_blocks=params.num_res_blocks, + z_channels=params.z_channels, + ) + self.reg = DiagonalGaussian() + + self.scale_factor = params.scale_factor + self.shift_factor = params.shift_factor + + def encode(self, x: Tensor) -> Tensor: + z = self.reg(self.encoder(x)) + z = self.scale_factor * (z - self.shift_factor) + return z + + def decode(self, z: Tensor) -> Tensor: + z = z / self.scale_factor + self.shift_factor + return self.decoder(z) + + def forward(self, x: Tensor) -> Tensor: + return self.decode(self.encode(x)) + + +def load_ae( + ckpt_path: str, + autoencoder_params: AutoEncoderParams, + device: str | torch.device = "cuda", + dtype=torch.bfloat16, +) -> AutoEncoder: + """ + Load the autoencoder from the given model name. + Args: + name (str): The name of the autoencoder. + device (str or torch.device): The device to load the autoencoder to. + Returns: + AutoEncoder: The loaded autoencoder. + """ + # Loading the autoencoder + print("Init AE") + with torch.device(device): + ae = AutoEncoder(autoencoder_params) + + if not os.path.exists(ckpt_path): + raise ValueError( + f"Autoencoder path {ckpt_path} does not exist. Please download it first." + ) + + if ckpt_path is not None: + sd = load_sft(ckpt_path, device=str(device)) + missing, unexpected = ae.load_state_dict(sd, strict=False, assign=True) + if len(missing) > 0: + print(f"Got {len(missing)} missing keys:\n\t" + "\n\t".join(missing)) + if len(unexpected) > 0: + print( + f"Got {len(unexpected)} unexpected keys:\n\t" + "\n\t".join(unexpected) + ) + return ae.to(dtype=dtype) diff --git a/torchtitan/experiments/flux/model/hf_embedder.py b/torchtitan/experiments/flux/model/hf_embedder.py new file mode 100644 index 0000000000000000000000000000000000000000..495fd7a81d16cc0cadeaab3b390a638339ff0f94 --- /dev/null +++ b/torchtitan/experiments/flux/model/hf_embedder.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from torch import nn, Tensor +from transformers import CLIPTextModel, T5EncoderModel + + +class FluxEmbedder(nn.Module): + def __init__(self, version: str, **hf_kwargs): + super().__init__() + self.is_clip = version.startswith("openai") + self.output_key = "pooler_output" if self.is_clip else "last_hidden_state" + + if self.is_clip: + self.hf_module: CLIPTextModel = CLIPTextModel.from_pretrained( + version, **hf_kwargs + ) + else: + self.hf_module: T5EncoderModel = T5EncoderModel.from_pretrained( + version, **hf_kwargs + ) + + self.hf_module = self.hf_module.eval().requires_grad_(False) + + def forward(self, batch_tokens: Tensor) -> Tensor: + """ + batch_tokens: [bsz, embedding_length] + + For T5 Encoder, embeding_length is 768 + For CLIP, embedding_length is 256 + """ + outputs = self.hf_module( + input_ids=batch_tokens.to(self.hf_module.device), + attention_mask=None, + output_hidden_states=False, + ) + return outputs[self.output_key] diff --git a/torchtitan/experiments/flux/model/math.py b/torchtitan/experiments/flux/model/math.py new file mode 100644 index 0000000000000000000000000000000000000000..69a2d4acf13c1acf9f66edba1e5fe49c26d9b1d5 --- /dev/null +++ b/torchtitan/experiments/flux/model/math.py @@ -0,0 +1,38 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from einops import rearrange +from torch import Tensor + + +def attention(q: Tensor, k: Tensor, v: Tensor, pe: Tensor) -> Tensor: + q, k = apply_rope(q, k, pe) + + x = torch.nn.functional.scaled_dot_product_attention(q, k, v) + x = rearrange(x, "B H L D -> B L (H D)") + + return x + + +def rope(pos: Tensor, dim: int, theta: int) -> Tensor: + assert dim % 2 == 0 + scale = torch.arange(0, dim, 2, dtype=pos.dtype, device=pos.device) / dim + omega = 1.0 / (theta**scale) + out = torch.einsum("...n,d->...nd", pos, omega) + out = torch.stack( + [torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1 + ) + out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2) + return out.float() + + +def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor) -> tuple[Tensor, Tensor]: + xq_ = xq.float().reshape(*xq.shape[:-1], -1, 1, 2) + xk_ = xk.float().reshape(*xk.shape[:-1], -1, 1, 2) + xq_out = freqs_cis[..., 0] * xq_[..., 0] + freqs_cis[..., 1] * xq_[..., 1] + xk_out = freqs_cis[..., 0] * xk_[..., 0] + freqs_cis[..., 1] * xk_[..., 1] + return xq_out.reshape(*xq.shape).type_as(xq), xk_out.reshape(*xk.shape).type_as(xk) diff --git a/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/__init__.py b/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c90da16c282d4b8280f72ad8a0deb94484f59372 --- /dev/null +++ b/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from .mg_grouped_gemm import grouped_gemm_forward +from .tma_autotuning import ALIGN_SIZE_M + +__all__ = [ + "grouped_gemm_forward", + "ALIGN_SIZE_M", +] diff --git a/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/tma_autotuning.py b/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/tma_autotuning.py new file mode 100644 index 0000000000000000000000000000000000000000..8fdd7a66c6afc6ca2c3d5d50d55cd9e7d1ae78f1 --- /dev/null +++ b/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/tma_autotuning.py @@ -0,0 +1,240 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# credit - TMAHelper class, AutoTuning are derived from FBGemm: +# https://github.com/pytorch/FBGEMM/blob/main/fbgemm_gpu/experimental/gemm/triton_gemm + +# pyre-unsafe +import functools + +import os +import sys +from typing import Any, Dict, Optional, Tuple + +import torch + +import triton +import triton.language as tl +from triton import Config as TConfig + +from triton.runtime import driver # @manual + +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + + +# ===== Supporting utils, CUDA and TMA ===== + + +class CudaUtils: + @staticmethod + def is_cuda() -> bool: + """Check if Triton is running on CUDA backend.""" + return driver.active.get_current_target().backend == "cuda" + + @staticmethod + def verify_tma() -> bool: + """Check if TMA is supported on the current device.""" + return ( + CudaUtils.is_cuda() + and torch.cuda.is_available() + and torch.cuda.get_device_capability()[0] >= 9 + ) + + @staticmethod + def get_num_sms() -> int: + """Get the number of streaming multiprocessors on the current device.""" + if not CudaUtils.is_cuda(): + raise RuntimeError("Triton is not running on CUDA backend") + if not torch.cuda.is_available(): + raise RuntimeError("CUDA is not available") + return torch.cuda.get_device_properties("cuda").multi_processor_count + + +class TmaDescriptorHelper: + """Helper class for managing TMA descriptors in Triton kernels.""" + + class KernelParamWrapper: + """Wrapper to implement the TmaDescKernelParam interface.""" + + def __init__(self, desc: torch.Tensor): + self.desc = desc + + def tma_desc_cpu_ptr(self) -> int: + """Return the CPU pointer to the TMA descriptor.""" + return self.desc.data_ptr() + + def __init__(self, tma_size: int = 128): + """Initialize the TMA descriptor helper. + + Args: + tma_size: Size of the TMA descriptor in bytes + """ + if not CudaUtils.verify_tma(): + raise RuntimeError( + "TMA not supported on this device (requires Hopper or newer)" + ) + if "nv_tma_desc_type" not in dir(tl): + raise RuntimeError( + "TMA grid constant descriptors not supported in your Triton version" + ) + + self.tma_size = tma_size + self.fill_1d_tma_descriptor_inner = driver.active.utils.fill_1d_tma_descriptor + self.fill_2d_tma_descriptor_inner = driver.active.utils.fill_2d_tma_descriptor + self.descriptors: Dict[str, torch.Tensor] = {} + + def init_tma_descriptor(self, name: str) -> None: + """Initialize a TMA descriptor with the given name. + + Call this method outside of the lambda function for grid size. + """ + self.descriptors[name] = torch.empty( + self.tma_size, device="cpu", dtype=torch.int8 + ) + + def fill_1d_tma_descriptor( + self, name: str, ptr: int, dim: int, block_dim: int, element_size: int + ) -> None: + """Fill a 1D TMA descriptor. + + Call this method inside the lambda function for grid size. + """ + if name not in self.descriptors: + raise ValueError(f"TMA descriptor '{name}' not initialized") + + desc_x = self.descriptors[name] + if desc_x.data_ptr() % 64 != 0: + raise ValueError("TMA descriptor must be 64-byte aligned") + self.fill_1d_tma_descriptor_inner( + ptr, dim, block_dim, element_size, desc_x.data_ptr() + ) + + def fill_2d_tma_descriptor( + self, + name: str, + ptr: int, + dim1: int, + dim0: int, + block_dim1: int, + block_dim0: int, + element_size: int, + ) -> None: + """Fill a 2D TMA descriptor. + + Call this method inside the lambda function for grid size. + """ + if name not in self.descriptors: + raise ValueError(f"TMA descriptor '{name}' not initialized") + + desc_x = self.descriptors[name] + if desc_x.data_ptr() % 64 != 0: + raise ValueError("TMA descriptor must be 64-byte aligned") + self.fill_2d_tma_descriptor_inner( + ptr, dim1, dim0, block_dim1, block_dim0, element_size, desc_x.data_ptr() + ) + + def get_tma_descriptor_kernel_param(self, name: str) -> KernelParamWrapper: + """Get the TMA descriptor kernel parameter for the given name.""" + if name not in self.descriptors or self.descriptors[name] is None: + raise ValueError(f"TMA descriptor '{name}' not initialized") + return self.KernelParamWrapper(self.descriptors[name]) + + +# ====== Autotuning utilities ====== +ALIGN_SIZE_M = 128 + +_NV_CONFIGS = [ + triton.Config( + { + "BLOCK_SIZE_M": block_size_m, + "BLOCK_SIZE_N": block_size_n, + "BLOCK_SIZE_K": block_size_k, + }, + num_stages=num_stages, + num_warps=num_warps, + num_ctas=num_ctas, + ) + for block_size_m in [ALIGN_SIZE_M, ] + for block_size_n in [64, 128, 256] + for block_size_k in [64, 128, 256] + for num_stages in [3, 4] + for num_warps in [4, 8] + for num_ctas in [1] +] + + +def early_config_prune(configs, named_args, dtsize=None, dtype=None, **kwargs): + device = torch.cuda.current_device() + # Check for all possible pointer parameter names + if "grad_input_ptr" in named_args: + ptr_name = "grad_input_ptr" + elif "c_ptr" in named_args: + ptr_name = "c_ptr" + elif "grad_weight_ptr" in named_args: + ptr_name = "grad_weight_ptr" + else: + raise KeyError("No recognized pointer parameter found in kernel arguments") + + if dtsize is None: + dtsize = named_args[ptr_name].element_size() + if dtype is None: + dtype = named_args[ptr_name].dtype + + pruned_configs = [] + for config in configs: + kw = config.kwargs + BLOCK_M, BLOCK_N, BLOCK_K, num_stages = ( + kw["BLOCK_SIZE_M"], + kw["BLOCK_SIZE_N"], + kw["BLOCK_SIZE_K"], + config.num_stages, + ) + G, M, N, K = ( + named_args["G"], + named_args["M_BUCKET"], + named_args["N"], + named_args["K"], + ) + + # 1. make sure we have enough smem + max_shared_memory = driver.active.utils.get_device_properties(device)[ + "max_shared_mem" + ] + + required_shared_memory = (BLOCK_M + BLOCK_N) * BLOCK_K * num_stages * dtsize + if required_shared_memory > max_shared_memory: + continue + + M_PER_GROUP = M // G + MIN_M_TILES = 64 + # 2. make sure we don't load M tiles that are too big + if BLOCK_M > MIN_M_TILES and BLOCK_M > (M_PER_GROUP * 2): + continue + # 3. make sure we don't load N tiles that are too small + if BLOCK_M < 128 and BLOCK_M < (M_PER_GROUP // 2): + continue + + num_sm = driver.active.utils.get_device_properties(device)[ + "multiprocessor_count" + ] + N_TILES = N // BLOCK_N + MIN_N_TILES = 64 + # 4. make sure we don't load N tiles that are too big + if BLOCK_N > MIN_N_TILES and M * N_TILES < num_sm: + continue + # 5. make sure we don't load N tiles that are too small + if BLOCK_N < 128 and M * N_TILES > 2 * num_sm: + continue + # 6. make sure K can be evenly divided + if K % BLOCK_K != 0: + continue + + pruned_configs.append(config) + + return pruned_configs + + +# ======== End Autotuning utilities ======== diff --git a/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/unit_test_forwards.py b/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/unit_test_forwards.py new file mode 100644 index 0000000000000000000000000000000000000000..2429432d756ae4d5bb6f91a6108c7ba8a4b9c627 --- /dev/null +++ b/torchtitan/experiments/kernels/triton_mg_group_gemm/torchao_pr/unit_test_forwards.py @@ -0,0 +1,82 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe +import logging +import unittest +from typing import Tuple + +import torch +import torch.nn as nn + +from mg_grouped_gemm import grouped_gemm_forward + + +class TestMG_GroupedGEMM(unittest.TestCase): + def setUp(self) -> None: + torch.manual_seed(2020) + + def _run_grouped_gemm_test( + self, + shape: Tuple[int, int, int, int], + device: torch.device, + dtype: torch.dtype = torch.bfloat16, + atol: float = 1e-5, + rtol: float = 1.6e-2, + ) -> None: + G, M, N, K = shape + # In M*G grouping, input is [M*G, K] and weights are [N*G, K] + a = torch.randn(M * G, K, dtype=dtype, device=device) + b = torch.randn(N * G, K, dtype=dtype, device=device) + + # Create equal-sized groups for simplicity + m_size = M + m_sizes = torch.full((G,), m_size, device=device, dtype=torch.int32) + + result = grouped_gemm_forward(a, b, m_sizes) + self.assertTrue(result.shape == (M * G, N)) + + expected_result = torch.zeros(M * G, N, dtype=dtype, device=device) + m_start = 0 + for g in range(G): + m_end = m_start + m_sizes[g] + b_slice = b[N * g : N * (g+1), :] + expected_result[m_start:m_end, :] = a[m_start:m_end, :] @ b_slice.T + m_start = m_end + + # Convert result to match input dtype if needed + result = result.to(dtype) + torch.testing.assert_close(result, expected_result, atol=atol, rtol=rtol) + + def test_MG_grouped_gemm_bf16(self) -> None: + for G in (1, 4, 16): + for M in (128, 512, 1024): + print(f"Testing BF16 M*G GroupGeMM with G={G}, M={M}") + self._run_grouped_gemm_test( + (G, M, 1024, 1024), + torch.device("cuda"), + dtype=torch.bfloat16, + atol=1e-5, + rtol=1.6e-2, + ) + + def test_MG_grouped_gemm_deepseek_shapes(self) -> None: + """Test with shapes from Deepseek model.""" + deepseek_shapes = [ + (4, 2048, 4096, 7168), # G, M, N, K + (4, 2048, 7168, 2048), + (8, 512, 4096, 7168), + (8, 512, 7168, 2048), + ] + + device = torch.device("cuda") + + for shape in deepseek_shapes: + G, M, N, K = shape + print(f"Testing BF16 M*G Deepseek shape: G={G}, M={M}, N={N}, K={K}") + self._run_grouped_gemm_test( + shape, device, dtype=torch.bfloat16, atol=1e-5, rtol=1.6e-2 + ) diff --git a/torchtitan/experiments/llama4/infra/__pycache__/parallelize_llama.cpython-312.pyc b/torchtitan/experiments/llama4/infra/__pycache__/parallelize_llama.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e3738a53dcbd5fc28de537fefb1ab2577b1a58eb Binary files /dev/null and b/torchtitan/experiments/llama4/infra/__pycache__/parallelize_llama.cpython-312.pyc differ diff --git a/torchtitan/experiments/llama4/model/args.py b/torchtitan/experiments/llama4/model/args.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5757f08bced3ce6d5f92f343fd6e4beebaf400 --- /dev/null +++ b/torchtitan/experiments/llama4/model/args.py @@ -0,0 +1,109 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from dataclasses import dataclass +from typing import Optional + +from torch import nn +from torchtitan.components.tokenizer import Tokenizer +from torchtitan.config_manager import JobConfig + +from torchtitan.protocols.train_spec import BaseModelArgs +from torchtitan.tools.logging import logger + + +@dataclass +class TransformerModelArgs(BaseModelArgs): + dim: int = 4096 + n_layers: int = 32 + n_heads: int = 32 + n_kv_heads: Optional[int] = None + vocab_size: int = -1 # defined later by tokenizer + multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2 + ffn_dim_multiplier: Optional[float] = None + norm_eps: float = 1e-5 + rope_theta: float = 10000 + + max_seq_len: int = 2048 + # If `True`, then each transformer block init uses its layer ID, and if + # `False`, each uses the total number of transformer blocks + depth_init: bool = True + norm_type: str = "rmsnorm" + + use_flex_attn: bool = False + attn_mask_type: str = "causal" + eos_id: int = 0 + + # MoE args + moe_enabled: bool = True + num_experts: int = 8 + use_shared_expert: bool = True + auto_scale_hidden_dim: bool = True + # frequency of using MoE layer instead of feedforward layer in a transformer block + interleave_moe_layer_step: int = 2 + # token-choice + top_k: int = 1 + + def update_from_config(self, job_config: JobConfig, tokenizer: Tokenizer) -> None: + self.norm_type = job_config.model.norm_type + self.vocab_size = tokenizer.n_words + self.max_seq_len = job_config.training.seq_len + self.use_flex_attn = job_config.model.use_flex_attn + + def get_nparams_and_flops( + self, model: nn.Module, seq_len: int + ) -> tuple[int, float]: + nparams_embedding = 0 + nparams_moe_router = 0 + nparams_shared_expert = 0 + nparams_experts = 0 + nparams_dense = 0 + + for name, p in model.named_parameters(): + if "embedding" in name: + nparams_embedding += p.numel() + nparams_dense += p.numel() + elif "moe.shared_expert" in name: + nparams_shared_expert += p.numel() + elif "moe.router" in name: + nparams_moe_router += p.numel() + elif "moe.experts" in name: + nparams_experts += p.numel() + else: + nparams_dense += p.numel() + + nparams_sparse = nparams_moe_router + nparams_shared_expert + nparams_experts + nparams = nparams_dense + nparams_sparse + nparams_sparse_active = ( + nparams_moe_router + + nparams_shared_expert + + nparams_experts * self.top_k // self.num_experts + ) + + logger.info( + f"Total parameter count: dense {nparams_dense:,}, " + f"sparse {nparams_sparse:,}, active {nparams_dense + nparams_sparse_active:,}" + ) + + l, h, q, t = ( + self.n_layers, + self.n_heads, + self.dim // self.n_heads, + seq_len, + ) + # Reasoning behind the factor of 12 for the self-attention part of the formula: + # 1. each self-attention has 2 matmul in the forward and 4 in the backward (6) + # 2. the flash attention does 1 more matmul recomputation in the backward + # but recomputation should not be counted in calculating MFU (+0) + # 3. each matmul performs 1 multiplication and 1 addition (*2) + # 4. we follow the convention and do not account for sparsity in causal attention + num_flops_per_token = ( + 6 * (nparams_dense - nparams_embedding + nparams_sparse_active) + + 12 * l * h * q * t + ) + + return nparams, num_flops_per_token diff --git a/torchtitan/experiments/llama4/scripts/convert_hf_to_dcp_with_gpus.py b/torchtitan/experiments/llama4/scripts/convert_hf_to_dcp_with_gpus.py new file mode 100644 index 0000000000000000000000000000000000000000..99eb36ac6ffa8e546d8895358978e937088f7ee1 --- /dev/null +++ b/torchtitan/experiments/llama4/scripts/convert_hf_to_dcp_with_gpus.py @@ -0,0 +1,545 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +import json +import math +import os +import pprint +import time +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Optional + +import torch +import torch.distributed as dist +from torch.distributed.tensor import DeviceMesh, distribute_tensor, DTensor, Shard +from torch.distributed.tensor._utils import compute_local_shape_and_global_offset +from torchtitan.components.checkpoint import MODEL +from torchtitan.config_manager import JobConfig +from torchtitan.tools.logging import init_logger, logger +from torchtitan.train import Trainer + + +def extract_layer_number(s): + import re + + match = re.search(r"layers\.(\d+)", s) + if match: + return int(match.group(1)) + else: + return None + + +def convert_to_titan_fqns(fqn: str) -> list[str]: + # From the stored checkpoint keys to TorchTitan keys. + if "language_model." not in fqn: + # TODO: Not support video model yet + return [fqn] + + layer = extract_layer_number(fqn) + + if layer is None: + if "embed_tokens.weight" in fqn: + return ["tok_embeddings.weight"] + elif "norm.weight" in fqn: + return ["norm.weight"] + elif "lm_head.weight" in fqn: + return ["output.weight"] + else: + raise ValueError(f"Unknown fqn {fqn}") + + if "feed_forward.experts.down_proj" in fqn: + return [f"layers.{layer}.moe.experts.w2"] + elif "feed_forward.experts.gate_up_proj" in fqn: + return [f"layers.{layer}.moe.experts.w1", f"layers.{layer}.moe.experts.w3"] + elif "feed_forward.router.weight" in fqn: + return [f"layers.{layer}.moe.router.gate.weight"] + elif "feed_forward.shared_expert.down_proj.weight" in fqn: + return [f"layers.{layer}.moe.shared_expert.w2"] + elif "feed_forward.shared_expert.gate_proj.weight" in fqn: + return [f"layers.{layer}.moe.shared_expert.w3"] + elif "feed_forward.shared_expert.up_proj.weight" in fqn: + return [f"layers.{layer}.moe.shared_expert.w1"] + elif "input_layernorm.weight" in fqn: + return [f"layers.{layer}.ffn_norm.weight"] + elif "self_attn.k_proj" in fqn: + return [f"layers.{layer}.attention.wk.weight"] + elif "self_attn.o_proj" in fqn: + return [f"layers.{layer}.attention.wo.weight"] + elif "self_attn.q_proj" in fqn: + return [f"layers.{layer}.attention.wq.weight"] + elif "self_attn.v_proj" in fqn: + return [f"layers.{layer}.attention.wv.weight"] + elif "post_attention_layernorm.weight" in fqn: + return [f"layers.{layer}.attention_norm.weight"] + else: + raise ValueError(f"Unknown fqn {fqn}") + + +def convert_to_hf_shape(fqn: str, titan_fqns: list[str], dtensor: DTensor) -> list[str]: + if "feed_forward.experts.gate_up_proj" in fqn: + assert len(titan_fqns) == 2 + shape = dtensor.shape + return torch.Size(list(shape[:-1]) + [shape[-1] * 2]) + elif "shared_expert" in fqn: + s = dtensor.shape + # TODO: this is not right but I have to do this to load the checkpoint. + return torch.Size((s[2], s[1])) + return dtensor.shape + + +def convert_to_titan_tensors(fqn: str, full_tensor: torch.Tensor) -> torch.Tensor: + if "feed_forward.experts.gate_up_proj" in fqn: + full_tensors = full_tensor.chunk(2, dim=-1) + elif "shared_expert" in fqn: + # TODO: this is not right but I have to do this to load the checkpoint. + full_tensor = full_tensor.transpose(1, 0) + full_tensors = [full_tensor.unsqueeze(0)] + else: + full_tensors = [full_tensor] + return full_tensors + + +@dataclass +class _Assignment: + loader_id: int + filename: str + fqns: list[str] + shapes: list[torch.Size] + dtypes: list[torch.dtype] + + +@dataclass +class _AssignmentRound: + loader_assignments: dict[int, _Assignment] # List of assignments for each loader + + +@dataclass +class TensorMetadata: + fqn: str + shape: torch.Size + dtype: torch.dtype + + +class CheckpointConverter: + def __init__( + self, + process_group: dist.ProcessGroup, + path: str, + token: Optional[str] = None, + loader_every_n_ranks: int = 8, + ) -> None: + self.path = path + self.token = token + self.pg = process_group + self.my_rank = dist.get_rank(self.pg) + + self.loader_every_n_ranks = loader_every_n_ranks + self.loader_id = self.my_rank // loader_every_n_ranks + self.should_load = self.my_rank % loader_every_n_ranks == 0 + self.total_loader = dist.get_world_size(self.pg) // loader_every_n_ranks + + self.titan_fqn_to_stored_fqn: dict[str, str] = {} + self.stored_fqn_to_titan_fqn: dict[str, list[str]] = {} + self.total_send_bytes = 0 + self.total_recv_bytes = 0 + + def convert(self, state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: + begin = time.time() + self._load_metadata() + self._create_fqn_mappings(state_dict) + rounds = self._get_load_assignments(state_dict) + + logger.info(f"Got {len(rounds)} rounds of assignments.") + for idx, assignments in enumerate(rounds): + loader_assignments = assignments.loader_assignments + loaded_state_dict = None + # Let each loader to load its own data and move to its GPU. + logger.info(f"Loading round {idx}") + for i in range(self.total_loader): + # This loader doesn't have any loading assignment for this round. + if i not in loader_assignments: + continue + # This rank is not the loader + if i != self.loader_id or not self.should_load: + continue + loaded_state_dict = self._load_round(loader_assignments[i]) + + torch.cuda.synchronize() + logger.info(f"Loading round {idx} finished") + for i in range(self.total_loader): + if i not in loader_assignments: + continue + + logger.info(f"Resharding round {idx} loader {i} data. ") + if i == self.loader_id and self.should_load: + # This rank is the loader. It needs to send the loaded data to + # the other ranks. + assert loaded_state_dict is not None + results = self._reshard_send( + loader_assignments[i], loaded_state_dict + ) + else: + results = self._reshard_receive(loader_assignments[i], state_dict) + torch.cuda.synchronize() + + logger.info(f"Communication round {idx} loader {i} is done.") + self._reshard(results, state_dict) + logger.info(f"Resharding round {idx} loader {i} is done.") + self._reshard(results, state_dict) + torch.cuda.synchronize() + + dist.barrier() + torch.cuda.synchronize() + logger.info(f"Checkpoint conversion took {time.time() - begin:.2f} seconds.") + logger.info(f"Total send bytes: {self.total_send_bytes / 1e9:.2f} GB") + logger.info(f"Total recv bytes: {self.total_recv_bytes / 1e9:.2f} GB") + return state_dict + + def _load_metadata(self) -> None: + metadata_path = os.path.join(self.path, "model.safetensors.index.json") + with open(metadata_path, "r") as f: + self.metadata = json.load(f)["weight_map"] + + def _create_fqn_mappings(self, state_dict: dict[str, torch.Tensor]) -> None: + if not self.metadata: + return + + # Create the mapping from the stored checkpoint keys to TorchTitan keys. + for fqn in list(self.metadata.keys()): + titan_fqns = convert_to_titan_fqns(fqn) + # We don't know how to process _extra_state + if "_extra_state" in fqn: + self.metadata.pop(fqn) + continue + + if titan_fqns[0] not in state_dict: + for titan_fqn in titan_fqns: + assert titan_fqn not in state_dict + self.metadata.pop(fqn) + continue + + self.stored_fqn_to_titan_fqn[fqn] = titan_fqns + for titan_fqn in titan_fqns: + self.titan_fqn_to_stored_fqn[titan_fqn] = fqn + + torchtitan_extra = sorted( + list(set(state_dict.keys()) - set(self.titan_fqn_to_stored_fqn.keys())) + ) + converted_extra = sorted( + list(set(self.titan_fqn_to_stored_fqn.keys()) - set(state_dict.keys())) + ) + assert set(state_dict.keys()) == set(self.titan_fqn_to_stored_fqn.keys()), ( + f"{pprint.pformat(torchtitan_extra)}", + f"{pprint.pformat(converted_extra)}", + ) + + def _get_load_assignments( + self, state_dict: dict[str, Any] + ) -> list[_AssignmentRound]: + if self.my_rank == 0: + filename_to_metas = defaultdict(list) + for fqn, filename in self.metadata.items(): + titan_fqns = self.stored_fqn_to_titan_fqn[fqn] + shape = convert_to_hf_shape(fqn, titan_fqns, state_dict[titan_fqns[0]]) + meta = TensorMetadata( + fqn=fqn, + shape=shape, + # TODO: don't hardcode this + dtype=torch.bfloat16, + ) + filename_to_metas[filename].append(meta) + + loader_filename_to_metas = [{} for _ in range(self.total_loader)] + for idx, (filename, metas) in enumerate(filename_to_metas.items()): + loader_id = idx % self.total_loader + loader_filename_to_metas[loader_id][filename] = metas + + rounds = [] + while any(len(remain) > 0 for remain in loader_filename_to_metas): + round_assignment = _AssignmentRound(loader_assignments={}) + for loader_id in range(self.total_loader): + if not loader_filename_to_metas[loader_id]: + continue + + filename, metas = loader_filename_to_metas[loader_id].popitem() + round_assignment.loader_assignments[loader_id] = _Assignment( + filename=filename, + fqns=[meta.fqn for meta in metas], + shapes=[meta.shape for meta in metas], + dtypes=[meta.dtype for meta in metas], + loader_id=loader_id, + ) + + rounds.append(round_assignment) + + object_list: list[Any] = [ + rounds, + self.titan_fqn_to_stored_fqn, + self.stored_fqn_to_titan_fqn, + ] + else: + object_list = [None, None, None] + + dist.broadcast_object_list(object_list, src=0, group=self.pg) + rounds = object_list[0] + self.titan_fqn_to_stored_fqn = object_list[1] + self.stored_fqn_to_titan_fqn = object_list[2] + return rounds + + def _load_round(self, assignment: _Assignment) -> dict[str, Any]: + from safetensors.torch import load_file as hf_load_file + + path = os.path.join(self.path, assignment.filename) + state_dict = hf_load_file(path) + return { + k: v.to(device="cuda") + for k, v in state_dict.items() + if k in assignment.fqns + } + + def _reshard_send( + self, + assignment: _Assignment, + loaded_state_dict: dict[str, torch.Tensor], + ) -> dict[str, torch.Tensor]: + flatten_tensors = [t.flatten() for t in loaded_state_dict.values()] + flatten_tensor = torch.concat(flatten_tensors) + assert self.loader_id == assignment.loader_id + rank = self.loader_id * self.loader_every_n_ranks + assert rank == self.my_rank + logger.info( + f"Sending {assignment.filename} from {rank} {self.loader_id} " + f"{flatten_tensor.shape=} {flatten_tensor.dtype=} {loaded_state_dict.keys()=}." + ) + logger.info(f"Sending {assignment}") + dist.broadcast(flatten_tensor, src=rank, group=self.pg) + self.total_send_bytes += flatten_tensor.numel() * flatten_tensor.element_size() + return loaded_state_dict + + def _reshard_receive( + self, assignment: _Assignment, state_dict: dict[str, torch.Tensor] + ) -> dict[str, torch.Tensor]: + + flatten_tensor = torch.empty( + sum(math.prod(s) for s, d in zip(assignment.shapes, assignment.dtypes)), + dtype=assignment.dtypes[0], + device="cuda", + ) + rank = assignment.loader_id * self.loader_every_n_ranks + logger.info( + f"Receiving {assignment.filename} from {rank} " + f"{flatten_tensor.shape=} {flatten_tensor.dtype=}" + ) + logger.info(f"Receiving {assignment}") + dist.broadcast(flatten_tensor, src=rank, group=self.pg) + self.total_recv_bytes += flatten_tensor.numel() * flatten_tensor.element_size() + + ret: dict[str, torch.Tensor] = {} + loc = 0 + for fqn, shape, dtype in zip( + assignment.fqns, assignment.shapes, assignment.dtypes + ): + n_ele = math.prod(shape) + ret[fqn] = flatten_tensor[loc : loc + n_ele].view(shape) + loc += n_ele + return ret + + def _reshard( + self, + result: dict[str, torch.Tensor], + state_dict: dict[str, torch.Tensor], + ) -> None: + def _inplace_copy(fqn: str, full_tensors: list[torch.Tensor]): + titan_fqns = self.stored_fqn_to_titan_fqn[fqn] + assert len(titan_fqns) == len(full_tensors) + for titan_fqn, full_tensor in zip(titan_fqns, full_tensors): + dtensor = state_dict[titan_fqn] + assert isinstance(dtensor, DTensor) + assert dtensor.shape == full_tensor.shape, ( + (fqn, titan_fqn), + dtensor.shape, + full_tensor.shape, + ) + shape, offset = compute_local_shape_and_global_offset( + full_tensor.shape, dtensor.device_mesh, dtensor.placements + ) + slices = [ + slice(cur_offset, cur_offset + cur_shape) + for cur_shape, cur_offset in zip(shape, offset) + ] + logger.debug( + f"Copying {titan_fqn} with {slices=} {dtensor._local_tensor.shape=} " + f"{shape=} {offset=} {self.my_rank=} {dtensor.shape=} {full_tensor.shape=} " + f"{dtensor.placements=} {dtensor.device_mesh=} " + ) + dtensor.to_local().copy_(full_tensor[slices].to(dtensor.dtype)) + + for fqn, full_tensor in result.items(): + full_tensors = convert_to_titan_tensors(fqn, full_tensor) + _inplace_copy(fqn, full_tensors) + + +def _create_verified_state_dict( + pg: dist.ProcessGroup, mesh: DeviceMesh +) -> dict[str, torch.Tensor]: + placements = [Shard(0)] + state_dict = { + "vision_model.vision_adapter.mlp.fc1.weight": torch.rand( + 4096, 5632, device="cuda", dtype=torch.bfloat16 + ), + "vision_model.vision_adapter.mlp.fc2.weight": torch.rand( + 4096, 4096, device="cuda", dtype=torch.bfloat16 + ), + "language_model.model.layers.3.feed_forward.experts.gate_up_proj": torch.rand( + 16, 5120, 16384, device="cuda", dtype=torch.bfloat16 + ), + } + return {k: distribute_tensor(v, mesh, placements) for k, v in state_dict.items()} + + +def _verify_state_dict( + state_dict: dict[str, torch.Tensor], path: str, rank: int +) -> None: + metadata_path = os.path.join(path, "model.safetensors.index.json") + with open(metadata_path, "r") as f: + metadata = json.load(f)["weight_map"] + all_filenames = set() + for fqn, tensor in state_dict.items(): + filename = os.path.join(path, metadata[fqn]) + all_filenames.add(filename) + + stored_state_dict = {} + from safetensors.torch import load_file as hf_load_file + + for filename in all_filenames: + _sd = hf_load_file(filename) + for k in list(_sd.keys()): + if k not in state_dict: + _sd.pop(k) + else: + stored_state_dict[k] = _sd[k] + + def read_and_verify_tensor(fqn: str, dtensor: DTensor) -> None: + logger.info(f"Verifying {fqn} {dtensor.shape=} {dtensor.placements=} ") + stored_tensor = stored_state_dict[fqn] + full_tensor = dtensor.full_tensor() + logger.info(f"Gather {fqn} {full_tensor.shape} completely.") + + if rank > 0: + return + + stored_tensor = stored_tensor.to(device="cuda") + logger.info(f"Move to GPU {fqn} completely.") + + assert stored_tensor.shape == full_tensor.shape, fqn + assert stored_tensor.dtype == full_tensor.dtype, fqn + assert stored_tensor.device == full_tensor.device, fqn + assert torch.allclose(stored_tensor, full_tensor), fqn + + for k, v in state_dict.items(): + read_and_verify_tensor(k, v) + + +if __name__ == "__main__": + init_logger() + config = JobConfig() + config.parser.add_argument( + "--checkpoint.convert_path", + type=str, + default="", + help="""Specify the path of the target checkpoint to convert.""", + ) + config.parser.add_argument( + "--checkpoint.convert_hf_token", + type=str, + default="", + help="""Specify hf token.""", + ) + config.parser.add_argument( + "--checkpoint.convert_load_every_n_ranks", + type=int, + default=8, + help=""" + Specify the interval at which ranks are assigned to load checkpoints. + + For example, if this number is 4, then ranks 0, 4, 8, ... will load the + checkpoint. Each loader is responsible for loading one file. If there + are more loaders than files, only the first few loaders will be assigned + to load the checkpoint. The default value is 8. + """, + ) + config.parser.add_argument( + "--checkpoint.fake_model", + action="store_true", + help="""If true, the model will be fake.""", + ) + config.parse_args() + assert config.checkpoint.convert_path != "" + + trainer: Optional[Trainer] = None + + try: + trainer = Trainer(config) + if os.path.exists(trainer.checkpointer.folder): + raise RuntimeError( + "The checkpoint folder already exists. Abort to avoid overwriting " + f"the checkpoint. {trainer.checkpointer.folder=}" + ) + if config.checkpoint.fake_model: + state_dict = _create_verified_state_dict( + trainer.world_mesh.get_group(), trainer.world_mesh + ) + else: + state_dict = trainer.checkpointer.states[MODEL].state_dict() + + size = 0 + for v in state_dict.values(): + size += v.numel() * v.element_size() + logger.info(f"Total size of the model: {size / 1e9:.2f} GB") + + # Do not support PP yet, we will need to iterate over the PP dimension and + # extract the corresponding state_dict and device_mesh. + if "freqs_cis" in state_dict: + state_dict.pop("freqs_cis") + + # Our tokenizer is not up-to-date yet. + tok_embeddings_weight = state_dict.pop("tok_embeddings.weight") + output_weight = state_dict.pop("output.weight") + state_dict = CheckpointConverter( + process_group=trainer.world_mesh.get_group(), + path=config.checkpoint.convert_path, + token=config.checkpoint.convert_hf_token, + loader_every_n_ranks=config.checkpoint.convert_load_every_n_ranks, + ).convert(state_dict) + state_dict["tok_embeddings.weight"] = tok_embeddings_weight + state_dict["output.weight"] = output_weight + + class DummyModel: + def __init__(self, state_dict: dict[str, torch.Tensor]) -> None: + self._state_dict = state_dict + + def state_dict(self) -> dict[str, torch.Tensor]: + return self._state_dict + + if config.checkpoint.fake_model: + begin = time.time() + _verify_state_dict( + state_dict, + config.checkpoint.convert_path, + trainer.world_mesh.get_rank(), + ) + dist.barrier() + logger.info(f"Verifies state_dict {time.time() - begin}.") + else: + # oh, this is pretty bad, when can we get rid of the freqs_cis issue? + state_dict["freqs_cis"] = None + trainer.checkpointer.states[MODEL] = DummyModel(state_dict) + trainer.checkpointer.model_weights_only = True + trainer.checkpointer.export_dtype = next(iter(state_dict.values())).dtype + trainer.checkpointer.save(curr_step=0, force=True) + time.sleep(2) + finally: + pass diff --git a/torchtitan/experiments/multimodal/mm_collator.py b/torchtitan/experiments/multimodal/mm_collator.py new file mode 100644 index 0000000000000000000000000000000000000000..98793a7f6f9f9ad51a3f0b34a18fd102f8b99802 --- /dev/null +++ b/torchtitan/experiments/multimodal/mm_collator.py @@ -0,0 +1,227 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Copyright (c) Meta Platforms, Inc. All Rights Reserved. + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import torch +import torch.nn.functional as F + +from tokenizer.tiktoken import IGNORE_INDEX + +from torch.nn.utils.rnn import pad_sequence + + +def padded_collate( + batch: List[Dict[str, List[int]]], + padding_idx: int = 0, + ignore_idx: int = -100, +) -> Dict[str, torch.Tensor]: + """Pad a batch of sequences to the longest sequence length in the batch, and + convert integer lists to tensors. + + Args: + batch (List[Dict[str, List[int]]]): A list of dictionaries containing input, label pairs. + padding_idx (int): Padding index for input ids. Defaults to 0. + ignore_idx (int): Padding index for labels. Defaults to -100. + + Returns: + Dict[str, torch.Tensor]: Collated input and label tensors. + + Example: + >>> token_pairs = [ + >>> {"input_ids": [1, 2, 3], "labels": [4, 5, 6]}, + >>> {"input_ids": [7,], "labels": [10,]}, + >>> ] + >>> collated = padded_collate( + >>> batch=token_pairs, + >>> padding_idx=padding_idx, + >>> ignore_idx=ignore_idx, + >>> ) + >>> collated["input_ids"] + >>> tensor([[1, 2, 3], [7, 0, 0]]) + >>> collated["labels"] + >>> tensor([[4, 5, 6], [10, -100, -100]]) + """ + input_ids = pad_sequence( + [x["input_ids"] for x in batch], + batch_first=True, + padding_value=padding_idx, + ) + labels = pad_sequence( + [x["labels"] for x in batch], + batch_first=True, + padding_value=ignore_idx, + ) + + input_ids_seq_len = input_ids.shape[-1] + labels_seq_len = labels.shape[-1] + + # Hack to pad correctly and not use max_seq_len, which is costly + if input_ids_seq_len > labels_seq_len: + labels = F.pad( + labels, (0, input_ids_seq_len - labels_seq_len), value=ignore_idx + ) + elif labels_seq_len > input_ids_seq_len: + input_ids = F.pad( + input_ids, + (0, labels_seq_len - input_ids_seq_len), + value=padding_idx, + ) + return {"input_ids": input_ids, "labels": labels} + + +# NOTE Inspired from torchtune.data._collate.py +@dataclass +class MultiModalCollator: + padding_idx: int = 128004 + ignore_idx: int = IGNORE_INDEX + pad_max_tiles: Optional[int] = None + pad_max_images: Optional[int] = None + + def __call__(self, batch: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: + """Pad a batch of text sequences, tiled image tensors, aspect ratios, + and cross attention masks. This can be used for both training and inference. + + ``batch`` is expected to be a list of sample dicts containing the following:: + - "input_ids": List[int] of length text_seq_len, varies across samples + - "labels": List[int] of length text_seq_len, varies across samples + - "encoder_input": Dict[str, List[torch.Tensor]] + - "images": List[torch.Tensor], each with shape (n_tiles, c, h, w) + - "aspect_ratio": List[torch.Tensor], each with shape (2, ) to indicate h_ratio, w_ratio + + Shape notation: + - c = channel dim + - h = height dim + - w = weight dim + + Note: + For each element in the batch, ``len(images) == len(aspect_ratio)``. + + This collater does the following: + (1) Pad text sequence and encoder mask to the longest sequence length in the batch + (2) Pad image tensors in the tile dimension with zeros to the largest number + of tiles in the batch + (3) Add empty images of zeros to samples up to max number of images in the batch + (4) Pad aspect ratios with (1,1) for all added padding images + + Args: + batch (List[Dict[str, Any]]): A list of sample dicts containing input_ids, + labels, images, and aspect_ratio. + padding_idx (int): Padding index for input token ids. Defaults to 0. + ignore_idx (int): Padding index for labels. Defaults to -100. + pad_max_tiles (Optional[int]): Maximum number of tiles to pad to. If None, will pad to the largest number of tiles + in the batch. Defaults to None. + pad_max_images (Optional[int]): Maximum number of images to pad to. If None, will pad to the largest number of images + in the batch. Defaults to None. + + Returns: + Dict[str, Tensor]: Collated tokens, labels, images, aspect_ratio tensors. + - tokens: Tensor of shape (bsz, max_seq_len) + - labels: Tensor of shape (bsz, max_seq_len) + - images: Tensor of shape (bsz, max_num_images, max_num_tiles, c, h, w) + - aspect_ratio: Tensor of shape (bsz, max_num_images, 2) + + Example: + >>> image_id = 1 + >>> tokens_per_tile = 5 + >>> c, h, w = 1, 1, 1 + >>> batch = [ + ... { + ... "input_ids": [1, 2, 1, 3], "labels": [4, 5, 6, 7], + ... "encoder_input": { + ... # One image with two tiles, one image with three tiles + ... "images": [torch.ones(2, c, h, w), torch.ones(3, c, h, w)], + ... "aspect_ratio": [torch.tensor([1, 2]), torch.tensor([1, 3])], + ... }, + ... }, + ... { + ... "input_ids": [1, 4], "labels": [8, 9], + ... "encoder_input": { + ... # One image with four tiles + ... "images": [torch.ones(4, c, h, w)], + ... "aspect_ratio": [torch.tensor([2, 2])], + ... }, + ... }, + ... ] + ... collator = MultiModalCollator(pad_max_tiles=4) + >>> model_inputs = collator(batch=batch) + >>> print(model_inputs["input_ids"]) + tensor([[1, 2, 1, 3], + [1, 4, 0, 0]]) + >>> print(model_inputs["labels"]) + tensor([[4, 5, 6, 7], + [8, 9, -100, -100]]) + >>> print(model_inputs["encoder_input"]["images"].shape) # (bsz, max_num_images, max_num_tiles, c, h, w) + torch.Size([2, 2, 4, 1, 1, 1]) + >>> print(model_inputs["encoder_input"]["aspect_ratio"].shape) # (bsz, max_num_images, 2) + torch.Size([2, 2, 2]) + >>> print(model_inputs["encoder_input"]["images"][0, 0, ...]) # Image with two tiles got padded to four + tensor([[[[1.]]], [[[1.]]], [[[0.]]], [[[0.]]]]) + >>> print(model_inputs["encoder_input"]["images"][0, 1, ...]) # Image with three tiles got padded to four + tensor([[[[1.]]], [[[1.]]], [[[1.]]], [[[0.]]]]) + >>> print(model_inputs["encoder_input"]["images"][1, 0, ...]) # Image with four tiles did not get padded + tensor([[[[1.]]], [[[1.]]], [[[1.]]], [[[1.]]]]) + >>> print(model_inputs["encoder_input"]["images"][1, 1, ...]) # Extra padding image was added to second sample + tensor([[[[0.]]], [[[0.]]], [[[0.]]], [[[0.]]]]) + """ + # Text tokens can be handled independently by existing collaters + text_only = [ + {"input_ids": sample["input_ids"], "labels": sample["labels"]} + for sample in batch + ] + collated_text = padded_collate(text_only, self.padding_idx, self.ignore_idx) + + if self.pad_max_tiles is None: + # Get max number of tiles in batch + max_num_tiles = max(sample["images_tiles"].shape[0] for sample in batch) + else: + max_num_tiles = self.pad_max_tiles + + # Pad images and aspect ratios to max number of tiles + batch_images = [] + batch_aspect_ratios = [] + + for sample in batch: + sample_images = [] + for image in sample["encoder_input"]["images"]: + # Single image in each sample has shape (n_tiles, c, h, w) + n_tiles = image.shape[0] + # Single mask in each sample corresponds to a single image and has shape (text_seq_len, image_seq_len) + # where image_seq_len = n_tiles * tokens_per_tile + padding_tiles = max_num_tiles - n_tiles + + # Image should now have shape (max_num_tiles, c, h, w) + padded_image = F.pad( + image, (0, 0, 0, 0, 0, 0, 0, padding_tiles), value=0 + ) + + sample_images.append(padded_image) + # Stack multiple images and masks per sample in num_images dimension + batch_images.append(torch.stack(sample_images)) + batch_aspect_ratios.append( + torch.stack(sample["encoder_input"]["aspect_ratio"]) + ) + # Finally, pad images, masks, aspect ratios to max number of images in batch + # (bsz, max_num_images, max_num_tiles, c, h, w) + collated_images = pad_sequence(batch_images, batch_first=True, padding_value=0) + # (bsz, max_num_images, 2) + collated_aspect_ratios = pad_sequence( + batch_aspect_ratios, batch_first=True, padding_value=1 + ) + + batch_dict = { + "input_ids": collated_text["input_ids"], + "labels": collated_text["labels"], + "encoder_input": { + "images": collated_images, + "aspect_ratio": collated_aspect_ratios, + }, + } + + return batch_dict diff --git a/torchtitan/experiments/multimodal/model.py b/torchtitan/experiments/multimodal/model.py new file mode 100644 index 0000000000000000000000000000000000000000..419b3f8ab718923ac1478f951e22b9bd6391be5d --- /dev/null +++ b/torchtitan/experiments/multimodal/model.py @@ -0,0 +1,1464 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Llama 3 is licensed under the LLAMA 3 Community License, +# Copyright (c) Meta Platforms, Inc. All Rights Reserved. + +import math +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +@dataclass +class ModelArgs: + # encoder part + encoder_embed_dim: int = 4096 + encoder_num_layers: int = 32 + num_layers_projection: int = 32 + encoder_num_heads: int = 32 + encoder_num_kv_heads: Optional[int] = None + patch_size: int = 1 + tile_size: int = 128 + max_num_tiles: int = 8 + activation: nn.Module = nn.GELU() + # in_channels (int): The number of image input channels. + in_channels: int = 3 + # return_intermediates (Optional[List[int]]): The indices of hidden layers to return. + # If provided, it will return the intermediate results of the transformer layers + # before they go through a next layer. For example, ``return_intermediates=[0,3]`` + # will return the tokens before they go through the first and fourth layers. + return_intermediates: Optional[List[int]] = None + is_causal: bool = True + + # decoder part + decoder_embed_dim: int = 4096 # This is for linear projection to convert the output of encoder to decoder + fusion_interval: int = 1 # This is the interval of layers that are used for fusion + num_special_tokens: int = 2 # This is the number of special tokens in the tokenizer + decoder_num_layers: int = 16 + decoder_num_heads: int = 32 + decoder_num_kv_heads: Optional[int] = None + + # common part + vocab_size: int = -1 # defined later by tokenizer + multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2 + ffn_dim_multiplier: Optional[float] = None + norm_eps: float = 1e-5 + rope_theta: float = 10000 + + max_seq_len: int = 2048 + # If `True`, then each transformer block init uses its layer ID, and if + # `False`, each uses the total number of transformer blocks + depth_init: bool = True + norm_type: str = "rmsnorm" + + +class Fp32LayerNorm(nn.LayerNorm): + """ + Wrapper around :class:`~torch.nn.LayerNorm` to support mixed-precision training. + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + x (torch.Tensor): Input tensor. + Returns: + torch.Tensor: The normalized output tensor having the same shape as ``x``. + """ + output = nn.functional.layer_norm( + x.float(), + self.normalized_shape, + self.weight.float() if self.weight is not None else None, + self.bias.float() if self.bias is not None else None, + self.eps, + ) + return output.type_as(x) + + +def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0) -> torch.Tensor: + """ + Precompute the frequency tensor for complex exponentials (cis) with given dimensions. + + This function calculates a frequency tensor with complex exponentials using the given dimension 'dim' + and the end index 'end'. The 'theta' parameter scales the frequencies. + The returned tensor contains complex values in complex64 data type. + + Args: + dim (int): Dimension of the frequency tensor. + end (int): End index for precomputing frequencies. + theta (float, optional): Scaling factor for frequency computation. Defaults to 10000.0. + + Returns: + torch.Tensor: Precomputed frequency tensor with complex exponentials. + """ + freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) + t = torch.arange(end, device=freqs.device) + freqs = torch.outer(t, freqs).float() + freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 + return freqs_cis + + +def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor) -> torch.Tensor: + """ + Reshape frequency tensor for broadcasting it with another tensor. + + This function reshapes the frequency tensor to have the same shape as the target tensor 'x' + for the purpose of broadcasting the frequency tensor during element-wise operations. + + The input freqs_cis tensor is assumed to be of shape (max_seqlen, dim), + and the first seqlen elements will be sliced, but dim must match x. + + Args: + freqs_cis (torch.Tensor): Frequency tensor to be reshaped. + x (torch.Tensor): Target tensor for broadcasting compatibility. + + Returns: + torch.Tensor: Reshaped frequency tensor. + """ + ndim = x.ndim + assert 0 <= 1 < ndim + seqlen = x.shape[1] + freqs_cis = freqs_cis[0:seqlen] + assert freqs_cis.shape == (seqlen, x.shape[-1]) + shape = [d if i == 1 or i == ndim - 1 else 1 for i, d in enumerate(x.shape)] + return freqs_cis.view(*shape) + + +def apply_rotary_emb( + xq: torch.Tensor, + xk: torch.Tensor, + freqs_cis: torch.Tensor, +) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Apply rotary embeddings to input tensors using the given frequency tensor. + + This function applies rotary embeddings to the given query 'xq' and key 'xk' tensors using the provided + frequency tensor 'freqs_cis'. The input tensors are reshaped as complex numbers, and the frequency tensor + is reshaped for broadcasting compatibility. The resulting tensors contain rotary embeddings and are + returned as real tensors. + + Args: + xq (torch.Tensor): Query tensor to apply rotary embeddings. + xk (torch.Tensor): Key tensor to apply rotary embeddings. + freqs_cis (torch.Tensor): Precomputed frequency tensor for complex exponentials. + + Returns: + Tuple[torch.Tensor, torch.Tensor]: Tuple of modified query tensor and key tensor with rotary embeddings. + """ + xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2)) + xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2)) + freqs_cis = reshape_for_broadcast(freqs_cis, xq_) + xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3) + xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3) + return xq_out.type_as(xq), xk_out.type_as(xk) + + +def repeat_kv(x: torch.Tensor, num_rep: int) -> torch.Tensor: + """torch.repeat_interleave(x, dim=2, repeats=num_rep)""" + bsz, seq_len, num_kv_heads, head_dim = x.shape + if num_rep == 1: + return x + return ( + torch.unsqueeze(x, dim=3) + .expand(bsz, seq_len, num_kv_heads, num_rep, head_dim) + .reshape(bsz, seq_len, num_kv_heads * num_rep, head_dim) + ) + + +class Attention(nn.Module): + """ + Multi-head attention module. + + Args: + model_args (ModelArgs): Model configuration arguments. + + Attributes: + num_kv_heads (int): Number of key and value heads. + num_heads (int): Number of query heads. + num_rep (int): Number of repetitions for local heads. + head_dim (int): Dimension size of each attention head. + wq (Linear): Linear transformation for queries. + wk (Linear): Linear transformation for keys. + wv (Linear): Linear transformation for values. + wo (Linear): Linear transformation for output. + + """ + + def __init__(self, model_args: ModelArgs): + super().__init__() + self.num_heads = model_args.encoder_num_heads + self.num_kv_heads = ( + model_args.encoder_num_heads + if model_args.encoder_num_kv_heads is None + else model_args.encoder_num_kv_heads + ) + self.num_rep = self.num_heads // self.num_kv_heads + self.head_dim = model_args.encoder_embed_dim // model_args.encoder_num_heads + + self.wq = nn.Linear( + model_args.encoder_embed_dim, + model_args.encoder_num_heads * self.head_dim, + bias=False, + ) + self.wk = nn.Linear( + model_args.encoder_embed_dim, self.num_kv_heads * self.head_dim, bias=False + ) + self.wv = nn.Linear( + model_args.encoder_embed_dim, self.num_kv_heads * self.head_dim, bias=False + ) + self.wo = nn.Linear( + model_args.encoder_num_heads * self.head_dim, + model_args.encoder_embed_dim, + bias=False, + ) + self.is_causal = model_args.is_causal + + def init_weights(self, init_std: float): + for linear in (self.wq, self.wk, self.wv): + nn.init.trunc_normal_(linear.weight, mean=0.0, std=0.02) + nn.init.trunc_normal_(self.wo.weight, mean=0.0, std=init_std) + + def forward( + self, + x: torch.Tensor, + freqs_cis: torch.Tensor, + ): + """ + Forward pass of the attention module. + + Args: + x (torch.Tensor): Input tensor. + freqs_cis (torch.Tensor): Precomputed frequency tensor. + + Returns: + torch.Tensor: Output tensor after attention. + + """ + bs, seqlen, _ = x.shape + xq, xk, xv = self.wq(x), self.wk(x), self.wv(x) + + # Use -1 instead of `num_heads` (or `num_kv_heads`) to infer the actual + # local heads from sizes of xq, xk, and xv as TP may have sharded them + # after the above linear ops. + xq = xq.view(bs, seqlen, -1, self.head_dim) + xk = xk.view(bs, seqlen, -1, self.head_dim) + xv = xv.view(bs, seqlen, -1, self.head_dim) + + if ( + freqs_cis is not None + ): # Only used in the self attention layers for text decoder + xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis) + + # repeat k/v heads if num_kv_heads < num_heads + keys = repeat_kv(xk, self.num_rep) # (bs, seqlen, n_local_heads, head_dim) + values = repeat_kv(xv, self.num_rep) # (bs, seqlen, n_local_heads, head_dim) + + xq = xq.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + xk = keys.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + xv = values.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + + # we use casual mask for training + output = F.scaled_dot_product_attention(xq, xk, xv, is_causal=self.is_causal) + output = output.transpose( + 1, 2 + ).contiguous() # (bs, seqlen, n_local_heads, head_dim) + output = output.view(bs, seqlen, -1) + return self.wo(output) + + +class FeedForward(nn.Module): + """ + FeedForward module + + Args: + dim (int): Input dimension. + hidden_dim (int): Hidden dimension of the feedforward layer. + multiple_of (int): Value to ensure hidden dimension is a multiple of this value. + ffn_dim_multiplier (Optional[float]): Custom multiplier for hidden dimension. Defaults to None. + activation: (nn.Module): Activation function to use. Defaults to nn.silu. + + Attributes: + w1 (Linear): Linear transformation for the first layer, which projects input from input dim to + hidden dim, and multiplies by the projection from w3 for activation and second layer. + w2 (Linear): Linear transformation for the second layer. + """ + + def __init__( + self, + dim: int, + hidden_dim: int, + multiple_of: int, + ffn_dim_multiplier: Optional[float], + activation: nn.Module = nn.SiLU(), + ): + super().__init__() + hidden_dim = int(2 * hidden_dim / 3) + # custom dim factor multiplier + if ffn_dim_multiplier is not None: + hidden_dim = int(ffn_dim_multiplier * hidden_dim) + hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) + + self.activation = activation + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + + def forward(self, x): + return self.w2(self.activation(self.w1(x))) + + def init_weights(self, init_std: float): + nn.init.trunc_normal_(self.w1.weight, mean=0.0, std=0.02) + nn.init.trunc_normal_(self.w2.weight, mean=0.0, std=init_std) + + +class TanhGate(nn.Module): + """Implements a basic learnable gate to scale layer outputs""" + + def __init__(self) -> None: + super().__init__() + self.scale = nn.Parameter(torch.zeros(1)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """ + Args: + x (torch.Tensor): input tensor to gate + + Returns: + torch.Tensor: The output tensor after gating. Has the same shape as ``x``. + """ + return x * self.scale.tanh() + + +class TilePositionalEmbedding(nn.Module): + """ + Positional embedding for tiles, different for every tile, same for every token within a tile. + + For details, please check the documentation of :class:`ViT`. + + Args: + max_num_tiles (int): The maximum number of tiles an image can be divided into. + emb_dim (int): The dimensionality of each tile embedding. + """ + + def __init__( + self, + max_num_tiles: int, + emb_dim: int, + ): + super().__init__() + self.max_num_tiles = max_num_tiles + self.emb_dim = emb_dim + self.embedding = nn.Parameter( + torch.randn(max_num_tiles, max_num_tiles, 1, emb_dim) / math.sqrt(emb_dim) + ) + self.gate = nn.Parameter(torch.zeros(1)) + + def forward(self, x: torch.Tensor, aspect_ratio: torch.Tensor): + """ + args: + x (torch.Tensor): torch.Tensor with shape (bsz * num_imgs, num_tiles, num_tokens, emb_dim). + aspect_ratio (torch.Tensor): torch.Tensor with shape (bsz * num_imgs, 2), + representing the aspect ratio of the image before tile-cropping, e.g. (2,1). + returns: + torch.Tensor: The input tensor with added positional embeddings. + """ + bsz_and_num_imgs, num_tiles, num_tokens, emb_dim = x.shape + + for batch_idx, (num_tiles_h, num_tiles_w) in enumerate(aspect_ratio): + # When we batch images, all are padded to the same amount of tiles. + # The aspect_ratio lets us know the non padded tiles for each image. + # We only add positional encoding to those. + num_non_padded_tiles = int(num_tiles_h * num_tiles_w) + + # We get only the positional encoding for non padded tiles, + # i.e. num_tiles_h, num_tiles_w. + pos_embed = self.embedding[:num_tiles_h, :num_tiles_w, :, :] + + # Add pos encoding to the non padded tiles. + pos_embed = pos_embed.reshape(num_non_padded_tiles, 1, self.emb_dim) + x[batch_idx, :num_non_padded_tiles, :, :] += pos_embed * self.gate.tanh() + + return x + + +class TokenPositionalEmbedding(nn.Module): + """ + Token positional embedding for images, different for every token in an image. + + Args: + emb_dim (int): The dimensionality of each token embedding. + tile_size (int): The size of your image tiles, if the image was tile-cropped in advance. Otherwise, + the size of the input image. In this case, the function will consider your image as a single tile. + patch_size (int): The size of each patch. Used to divide the tiles into patches. + E.g. for ``patch_size=40``, a tile of shape (400, 400) will have 10x10 grid of patches + with shape (40, 40) each. + """ + + def __init__(self, emb_dim: int, tile_size: int, patch_size: int) -> None: + super().__init__() + patch_grid_size = tile_size // patch_size + scale = emb_dim**-0.5 + self.positional_embedding = nn.Parameter( + scale * torch.randn((patch_grid_size**2 + 1, emb_dim)) # +1 for CLS token + ) + + def forward(self, x: torch.Tensor, *args: Tuple[Any]) -> torch.Tensor: + """ + Args: + x (torch.Tensor): torch.Tensor with shape (..., num_tokens, emb_dim) + *args (Tuple[Any]): Optional args. + + Returns: + torch.Tensor: The input tensor with added positional embeddings. + """ + return x + self.positional_embedding + + +class TiledTokenPositionalEmbedding(nn.Module): + """ + + Token positional embedding for tiled images. There are two positional embeddings in this module: + + * local_token_positional_embedding: same for every tile, different for every token. Equivalent \ + to :class:`TokenPositionalEmbedding`, but gated. + * global_token_positional_embedding: different for every tile, different for every token. + + Notice that tile is different from patch (token). For details, please check the documentation of + :class:`ViT`. + + Args: + max_num_tiles (int): The maximum number of tiles an image can be divided into. + emb_dim (int): The dimensionality of each token embedding. + tile_size (int): The size of your image tiles, if the image was tile-cropped in advance. Otherwise, + the size of the input image. In this case, the function will consider your image as a single tile. + patch_size (int): The size of each patch. Used to divide the tiles into patches. + E.g. for ``patch_size=40``, a tile of shape (400, 400) will have 10x10 grid of patches + with shape (40, 40) each. + """ + + def __init__( + self, max_num_tiles: int, emb_dim: int, tile_size: int, patch_size: int + ) -> None: + super().__init__() + patch_grid_size = tile_size // patch_size + self.num_tokens_per_tile = patch_grid_size**2 + 1 # +1 for cls token + scale = emb_dim**-0.5 + + # different for every token, same for every tile + self.local_token_positional_embedding = nn.Parameter( + scale * torch.randn((patch_grid_size**2 + 1, emb_dim)) # +1 for CLS token + ) + + # different for every token, different for every tile + self.global_token_positional_embedding = nn.Parameter( + scale + * torch.randn( + max_num_tiles, + max_num_tiles, + self.num_tokens_per_tile, + emb_dim, + ) + ) + + self.gate = nn.Parameter(torch.zeros(1)) + + def forward(self, x: torch.Tensor, aspect_ratio: torch.Tensor) -> torch.Tensor: + """ + Args: + x (torch.Tensor): torch.Tensor with shape (bsz * num_imgs, num_tiles, num_tokens, emb_dim). + aspect_ratio (torch.Tensor): torch.Tensor with shape (bsz * num_imgs, 2), + where aspect_ratio[k] represents the aspect ratio of the k^th image + of the batch before tile-cropping, e.g. aspect_ratio[k] = (2,1). + Returns: + torch.Tensor: The input tensor with added positional embeddings. + """ + bsz_and_num_imgs, num_tiles, num_tokens, emb_dim = x.shape + + # apply local position embedding (same for every tile) + x = x + (self.local_token_positional_embedding * (1 - self.gate.tanh())) + + # apply global positional embedding (different for every tile) + x = x.view(bsz_and_num_imgs, num_tiles, num_tokens, emb_dim) + for batch_idx, (num_tiles_h, num_tiles_w) in enumerate(aspect_ratio): + # When we batch images, all are padded to the same amount of tiles. + # The aspect_ratio lets us know the non padded tiles for each image. + # We only add positional encoding to those. + num_non_padded_tiles = int(num_tiles_h * num_tiles_w) + + # We get only the positional encoding for non padded tiles, + # i.e. num_tiles_h, num_tiles_w. + pos_embed = self.global_token_positional_embedding[ + :num_tiles_h, :num_tiles_w, :, : + ] + + # Add pos encoding to the non padded tiles. + pos_embed = pos_embed.reshape( + num_non_padded_tiles, self.num_tokens_per_tile, emb_dim + ) + pos_embed = pos_embed * self.gate.tanh() + x[batch_idx, :num_non_padded_tiles, :, :] += pos_embed + + return x + + +class Conv2dModule(torch.nn.Module): + """Conv2D Module. + This is like Conv2D in PyTorch except: + + - PyTorch Conv2D outputs shape (*, out_channels, h_out, w_out), while this module + outputs (*, h_out * w_out, out_channels). + - We implement the conv as an unfold -> permute -> linear, where we can column-wise + shard the linear. + + Arguments: + in_channels: Input channels. + out_channels: Output channels. + kernel_size: Size of convolution kernel. This module also assumes a square kernel. + stride (default 1): Stride for convolution. + bias (default False): Use bias in Conv2d. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int, + bias: bool = False, + ) -> None: + super().__init__() + self._unfold = torch.nn.Unfold( + kernel_size=(kernel_size, kernel_size), stride=stride + ) + self._linear = torch.nn.Linear( + in_channels * kernel_size * kernel_size, + out_channels, + bias=bias, + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + # Input: (bsz, in_channels, width, height) + # Output: (bsz, in_channels * kernel_size * kernel_size, num_tokens) + x = self._unfold(x) + x = x.permute(0, 2, 1) + # Output: (bsz, num_tokens, out_channels), when stride = kernel_size, + # num_tokens = grid ** 2 and out_channels is emd_dim. + return self._linear(x) + + +class VitTransformerBlock(nn.Module): + def __init__( + self, + model_args: ModelArgs, + attn_scale: Optional[nn.Module] = None, + mlp_scale: Optional[nn.Module] = None, + ): + super().__init__() + self.attn = Attention(model_args) + self.ln_attn = Fp32LayerNorm(model_args.encoder_embed_dim, eps=1e-5) + self.mlp = FeedForward( + dim=model_args.encoder_embed_dim, + hidden_dim=4 * model_args.encoder_embed_dim, + multiple_of=model_args.multiple_of, + ffn_dim_multiplier=model_args.ffn_dim_multiplier, + activation=model_args.activation, + ) + self.ln_mlp = Fp32LayerNorm(model_args.encoder_embed_dim, eps=1e-5) + self.attn_scale = attn_scale or nn.Identity() + self.mlp_scale = mlp_scale or nn.Identity() + + def forward( + self, + x: torch.Tensor, + mask: Optional[torch.Tensor] = None, + ): + bsz, seq_len, emd_dim = x.shape + # x = x.view(bsz * seq_len, emd_dim) + x = x + self.attn_scale(self.attn(x=self.ln_attn(x), freqs_cis=None)) + x = x + self.mlp_scale(self.mlp(self.ln_mlp(x))) + # return x.view(bsz, seq_len, emd_dim) + return x + + +class CLSEmbedding(nn.Module): + """ + Adds a CLS token to every tile of an image in the beginning of each token. + + Args: + emb_dim (int): The dimensionality of the input patch embedding. + """ + + def __init__(self, emb_dim: int) -> None: + super().__init__() + + scale = emb_dim**-0.5 + self.weight = nn.Parameter(scale * torch.randn(emb_dim)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + + # add 1 CLS token to every tile + bsz_and_num_imgs, num_tiles, _, emb_dim = x.shape + cls_emb = self.weight.broadcast_to(bsz_and_num_imgs, num_tiles, 1, emb_dim) + return torch.cat([cls_emb, x], dim=2) + + +class Vit(nn.Module): + """ + Implementation of the ViT architecture (https://arxiv.org/abs/2010.11929), + with support for tile-cropped images, outputting of hidden layers. + + (credit for the documentation below: `vision_transformer.py + + `_). + + ViT is a transformer architecture that takes in images and outputs N embedded tokens that + represent this image. Each image is divided into **patches** by a convolution. + These patches are flattened and subsequently treated as **tokens** by the transformer. + + To further enhance the performance of ViT and avoid downscaling images, we support tile-cropped images, + which are images divided into **tiles** during the preprocessing stage. For example, instead of + downscaling an 800x400 image to fit 400x400, we may crop it into two 400x400 tiles, + if the ``tile_size=400``. + + Each of these tiles is further broken down into patches by a convolution operation. For example, if + your ``patch_size=40``, then each (400, 400) tile will become a grid of 10x10 patches, and your whole image will have + num_tiles * n_tokens -> num_tiles * (10x10 patches + 1 CLS token) -> num_tiles * 101. + + Before the transformer layers, a CLS token is added to each tile as the first token. + In transformers, a token called CLS is a special token that is added to the beginning of each sequence. + This token can be used to represent the whole input, instead of using a pooling operation, for example. + + To help the model "see" the whole image, we use positional embeddings. If your image + was tile-cropped, then you need to use tile positional embeddings: + + - token_pos_embedding (tiled): :class:`TiledTokenPositionalEmbedding` + - pre_tile_pos_embed: :class:`TilePositionalEmbedding` + - post_tile_pos_embed: :class:`TilePositionalEmbedding` + + Otherwise, pre and post tile_pos_embed should be None and all you need is a simple + token positional embedding: + + - token_pos_embedding (not tiled): :class:`TokenPositionalEmbedding` + + All images will be considered as a stack of tiles, even if your image was not tile-cropped. In such cases, + your image would be composed of a single tile. + + In summary: + + 1) An image is broken down into tiles during preprocessing. + 2) In the ViT, the tiles will be broken down into patches. + 3) The patches will be flattened and transformed. We call them tokens, because that's how the transformer sees them. + + Image: shape (8x8) + + .. code-block:: text + + | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | + | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | + | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | + | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | + | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | + | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | + | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | + | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | + + Tiles: shape (4,4,4) # (num_tiles, tile_size, tile_size) + + .. code-block:: text + + | 1 | 2 | 3 | 4 | | 5 | 6 | 7 | 8 | + | 9 | 10 | 11 | 12 | | 13 | 14 | 15 | 16 | + | 17 | 18 | 19 | 20 | | 21 | 22 | 23 | 24 | + | 25 | 26 | 27 | 28 | | 29 | 30 | 31 | 32 | + + | 33 | 34 | 35 | 36 | | 37 | 38 | 39 | 40 | + | 41 | 42 | 43 | 44 | | 45 | 46 | 47 | 48 | + | 49 | 50 | 51 | 52 | | 53 | 54 | 55 | 56 | + | 57 | 58 | 59 | 60 | | 61 | 62 | 63 | 64 | + + Patches: shape (4,4,2,2) # (num_tiles, num_patches_per_tile, patch_size, patch_size) + + .. code-block:: text + + | 1 | 2 | | 3 | 4 | | 5 | 6 | | 7 | 8 | + | 9 | 10 | | 11 | 12 | | 13 | 14 | | 15 | 16 | + + | 17 | 18 | | 19 | 20 | | 21 | 22 | | 23 | 24 | + | 25 | 26 | | 27 | 28 | | 29 | 30 | | 31 | 32 | + + | 33 | 34 | | 35 | 36 | | 37 | 38 | | 39 | 40 | + | 41 | 42 | | 43 | 44 | | 45 | 46 | | 47 | 48 | + + | 49 | 50 | | 51 | 52 | | 53 | 54 | | 55 | 56 | + | 57 | 58 | | 59 | 60 | | 61 | 62 | | 63 | 64 | + + token: shape (4, 4, 4) # (num_tiles, num_patches_per_tile, emb_dim) + + .. code-block:: text + + | 1 | 2 | 9 | 10 | | 3 | 4 | 11 | 12 | | 17 | 18 | 25 | 26 | | 19 | 20 | 27 | 28 | + | ... continuation of data ... + | ... continuation of data ... + | 37 | 38 | 45 | 46 | | 39 | 40 | 47 | 48 | | 53 | 54 | 61 | 62 | | 55 | 56 | 63 | 64 | + + For the positional embeddings: + + Same for every tile, different for every token. + + - :class:`TokenPositionalEmbedding` + + .. code-block:: text + + | 1 | 2 | 3 | 4 | | 1 | 2 | 3 | 4 | + | 9 | 10 | 11 | 12 | | 9 | 10 | 11 | 12 | + | 17 | 18 | 19 | 20 | | 17 | 18 | 19 | 20 | + | 25 | 26 | 27 | 28 | | 25 | 26 | 27 | 28 | + + | 1 | 2 | 3 | 4 | | 1 | 2 | 3 | 4 | + | 9 | 10 | 11 | 12 | | 9 | 10 | 11 | 12 | + | 17 | 18 | 19 | 20 | | 17 | 18 | 19 | 20 | + | 25 | 26 | 27 | 28 | | 25 | 26 | 27 | 28 | + + Different for every tile, different for every token. + + - :class:`TiledTokenPositionalEmbedding` + + .. code-block:: text + + | 1 | 2 | | 3 | 4 | | 5 | 6 | | 7 | 8 | + | 9 | 10 | | 11 | 12 | | 13 | 14 | | 15 | 16 | + + | 17 | 18 | | 19 | 20 | | 21 | 22 | | 23 | 24 | + | 25 | 26 | | 27 | 28 | | 29 | 30 | | 31 | 32 | + + | 33 | 34 | | 35 | 36 | | 37 | 38 | | 39 | 40 | + | 41 | 42 | | 43 | 44 | | 45 | 46 | | 47 | 48 | + + | 49 | 50 | | 51 | 52 | | 53 | 54 | | 55 | 56 | + | 57 | 58 | | 59 | 60 | | 61 | 62 | | 63 | 64 | + + different for every tile, same for every token within a tile. + + - :class:`TilePositionalEmbedding` + + .. code-block:: text + + | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 3 | + | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 3 | + | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 3 | + | 1 | 1 | 1 | 1 | | 2 | 2 | 2 | 3 | + + | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | + | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | + | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | + | 3 | 3 | 3 | 3 | | 4 | 4 | 4 | 4 | + + Args: + model_args (ModelArgs): The model args. + + Raises: + ValueError: If `patch_size` is not greater than 0. + ValueError: If `len(return_intermediates)` is greater than `num_layers`. + """ + + def __init__( + self, + model_args: ModelArgs, + ): + super().__init__() + if model_args.patch_size <= 0: + raise ValueError(f"kernel size of conv {model_args.patch_size} must be > 0") + if model_args.return_intermediates and ( + len(model_args.return_intermediates) > model_args.encoder_num_layers + ): + raise ValueError( + "len(return_intermediates) must be <= num_layers." + f" Got {model_args.return_intermediate=} and {model_args.encoder_num_layers=}" + ) + + # For test validation purposes + patch_grid_size = model_args.tile_size // model_args.patch_size + self.patches_per_tile = patch_grid_size**2 + + self.return_intermediates = model_args.return_intermediates + + self.conv = Conv2dModule( + in_channels=model_args.in_channels, + out_channels=model_args.encoder_embed_dim, + kernel_size=model_args.patch_size, + stride=model_args.patch_size, + bias=False, + ) + + self.ln_post = Fp32LayerNorm(model_args.encoder_embed_dim) + self.ln_pre = Fp32LayerNorm(model_args.encoder_embed_dim) + self.transformer_layers = nn.ModuleList( + [ + VitTransformerBlock(model_args) + for _ in range(model_args.encoder_num_layers) + ] + ) + + self.class_embedding = CLSEmbedding(model_args.encoder_embed_dim) + # pre and post tile position embedding + if model_args.max_num_tiles > 1: + self.pre_tile_pos_embed = TilePositionalEmbedding( + max_num_tiles=model_args.max_num_tiles, + emb_dim=model_args.encoder_embed_dim, + ) + self.post_tile_pos_embed = TilePositionalEmbedding( + max_num_tiles=model_args.max_num_tiles, + emb_dim=model_args.encoder_embed_dim, + ) + self.token_pos_embedding = TokenPositionalEmbedding( + emb_dim=model_args.encoder_embed_dim, + tile_size=model_args.tile_size, + patch_size=model_args.patch_size, + ) + else: + self.pre_tile_pos_embed = None + self.post_tile_pos_embed = None + self.token_pos_embedding = TiledTokenPositionalEmbedding( + max_num_tiles=model_args.max_num_tiles, + emb_dim=model_args.encoder_embed_dim, + tile_size=model_args.tile_size, + patch_size=model_args.patch_size, + ) + + def forward( + self, images: torch.Tensor, aspect_ratio: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Processes images and returns the tokens and hidden states. + + Multiple images per sample: we add a dimension num_imgs to the input. This is useful when a single + sample constains multiple images, for example: + + - sample 1: " what animal is this?" + - sample 2: "I like more than " + + In this case, sample 1 has one image, and sample 2 has two images. max_n_imgs = max(2,1) = 2. + So your input should have shape (bsz=2, num_imgs=2, num_tiles, num_channels, tile_size_w, tile_size_h). + + Notice that to batch it, you will have to pad num_imgs to max_num_imgs and max_num_tiles. + + Args: + images (torch.Tensor): torch.Tensor with shape (bsz, num_imgs, num_tiles, num_channels, tile_size_w, tile_size_h). + aspect_ratio (Optional[torch.Tensor]): torch.Tensor with shape (bsz, n_imgs, 2). If all + images have a single tile, i.e. they were not tile-cropped, it should be None. + Used to calculate the positional embeddings for the tiles. + + Returns: + Tuple[torch.Tensor, List[torch.Tensor]]: A tuple: (x, hidden_states), + where x is a torch.tensor of shape (bsz, num_imgs, num_tiles, num_tokens, emb_dim) and + hidden_states has shape is a list of len(out_indices) torch.tensor with shape + (bsz, num_imgs, num_tiles, num_tokens, emb_dim). + + Raises: + ValueError: If aspect_ratio is None, but num_tiles > 1 in the batch. + """ + + bsz, num_imgs, num_tiles, num_channels, width, height = images.shape + + if aspect_ratio is None: + aspect_ratio = torch.ones((bsz * num_imgs, 2), dtype=torch.int).to( + device=images.device + ) + if num_tiles > 1: + raise ValueError( + f"aspect_ratio was not provided, but found num_tiles > 1 " + f"for {images.shape=}. Please provide aspect_ratio." + ) + + aspect_ratio = aspect_ratio.reshape(bsz * num_imgs, 2) + + # patch embedding + images = images.view(bsz * num_imgs * num_tiles, num_channels, width, height) + # The op is not behaving completely same as conv2d it contains a permute inside. + x = self.conv(images) # shape = [*, emb_dim, grid ** 2] + _, num_tokens, emb_dim = x.shape # num_tokens = grid ** 2 + x = x.reshape(bsz * num_imgs, num_tiles, num_tokens, emb_dim) + + # tile embeddings + if self.pre_tile_pos_embed: + x = self.pre_tile_pos_embed(x, aspect_ratio) + + # apply cls token + x = self.class_embedding(x) + num_tokens += 1 + + # apply position embeddings + x = self.token_pos_embedding(x, aspect_ratio) + + x = self.ln_pre(x) + x = x.view(bsz * num_imgs, -1, emb_dim) + + int_x = [] # intermediate outputs + for layer_idx, transformer_layer in enumerate(self.transformer_layers): + if layer_idx in self.return_intermediates: + h = x.view(bsz, num_imgs, num_tiles, num_tokens, emb_dim) + int_x.append(h) + x = transformer_layer(x) + + x = self.ln_post(x) + x = x.view(bsz * num_imgs, num_tiles, num_tokens, emb_dim) + + if self.post_tile_pos_embed: + x = self.post_tile_pos_embed(x, aspect_ratio) + + x = x.view(bsz, num_imgs, num_tiles, num_tokens, emb_dim) + return x, int_x + + +class Projection(nn.Module): + """Projection transformer to adapt the output of a + encoder (CLIP) to the decoder model. + """ + + def __init__( + self, + model_args: ModelArgs, + ) -> None: + super().__init__() + self.transformer_layers = nn.ModuleList( + [ + VitTransformerBlock( + model_args, attn_scale=TanhGate(), mlp_scale=TanhGate() + ) + for _ in range(model_args.num_layers_projection) + ] + ) + + self.num_hidden = len(model_args.return_intermediates or []) + self.output = nn.Linear( + model_args.encoder_embed_dim * (self.num_hidden + 1), + model_args.decoder_embed_dim, + ) + + def forward( + self, + x: torch.Tensor, + hidden_states: Optional[List[torch.Tensor]] = None, + ) -> torch.Tensor: + bsz, num_imgs, num_tiles, num_tokens, emb_dim = x.shape + + # apply transformer layers + x = x.view(bsz * num_imgs, num_tiles * num_tokens, emb_dim) + for layer in self.transformer_layers: + x = layer(x) + x = x.view(bsz, num_imgs, num_tiles, num_tokens, emb_dim) + + # interleave hidden states and cat with x + if self.num_hidden > 0: + assert hidden_states is not None + hidden_states = torch.stack(hidden_states, dim=-1) + hidden_states = hidden_states.view(bsz, num_imgs, num_tiles, num_tokens, -1) + x = torch.cat([x, hidden_states], dim=-1) + + # [bsz x seq x decoder_emb_dim] + return self.output(x).reshape(bsz, num_imgs * num_tiles * num_tokens, -1) + + +class VisionEncoder(nn.Module): + """Vision encoder model for Llama 3.2 Vision. This combines a vision + encoder with a projection. We define two different components. + + Args: + model_args (ModelArgs): configs for the vision encoder. + """ + + def __init__(self, model_args: ModelArgs) -> None: + super().__init__() + self.vit = Vit(model_args) + self.proj = Projection(model_args) + + def forward( + self, images: torch.Tensor, aspect_ratio: Optional[torch.Tensor] = None + ) -> torch.Tensor: + """ + Args: + images (torch.Tensor): + Image tensor with shape [bsz x num_imgs x num_tiles x num_channels x width x height]. + aspect_ratio (Optional[torch.Tensor]): Tensor with shape [bsz x num_imgs x 2]. If all + images have a single tile, i.e. they were not tile-cropped, it should be None. + Used to calculate the positional embeddings for the tiles. + Returns: + Tensor: output tensor of a sequence of embedings [bsz x seq_len x decoder_emb_dim] + where sequence length is num_imgs*num_tiles+num_embeds + """ + return self.proj(*self.vit(images, aspect_ratio)) + + +class FeedForwardForDecoder(nn.Module): + """ + FeedForward module for the decoder. It's different from the one in the encoder. + This is the component which is orignally used in llama3. + """ + + def __init__( + self, + dim: int, + hidden_dim: int, + multiple_of: int, + ffn_dim_multiplier: Optional[float], + ): + super().__init__() + hidden_dim = int(2 * hidden_dim / 3) + # custom dim factor multiplier + if ffn_dim_multiplier is not None: + hidden_dim = int(ffn_dim_multiplier * hidden_dim) + hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) + + self.w1 = nn.Linear(dim, hidden_dim, bias=False) + self.w2 = nn.Linear(hidden_dim, dim, bias=False) + self.w3 = nn.Linear(dim, hidden_dim, bias=False) + + def forward(self, x): + return self.w2(F.silu(self.w1(x)) * self.w3(x)) + + def init_weights(self, init_std: float): + nn.init.trunc_normal_(self.w1.weight, mean=0.0, std=0.02) + for linear in (self.w2, self.w3): + nn.init.trunc_normal_(linear.weight, mean=0.0, std=init_std) + + +class SelfAttention(nn.Module): + """ + Multi-head self attention module with rotary position. + """ + + def __init__(self, model_args: ModelArgs): + super().__init__() + self.num_heads = model_args.decoder_num_heads + self.num_kv_heads = ( + model_args.decoder_num_heads + if model_args.decoder_num_kv_heads is None + else model_args.decoder_num_kv_heads + ) + self.n_rep = self.num_heads // self.num_kv_heads + self.head_dim = model_args.decoder_embed_dim // model_args.decoder_num_heads + + self.wq = nn.Linear( + model_args.decoder_embed_dim, + model_args.decoder_num_heads * self.head_dim, + bias=False, + ) + self.wk = nn.Linear( + model_args.decoder_embed_dim, self.num_kv_heads * self.head_dim, bias=False + ) + self.wv = nn.Linear( + model_args.decoder_embed_dim, self.num_kv_heads * self.head_dim, bias=False + ) + self.wo = nn.Linear( + model_args.decoder_num_heads * self.head_dim, + model_args.decoder_embed_dim, + bias=False, + ) + + def init_weights(self, init_std: float): + for linear in (self.wq, self.wk, self.wv): + nn.init.trunc_normal_(linear.weight, mean=0.0, std=0.02) + nn.init.trunc_normal_(self.wo.weight, mean=0.0, std=init_std) + + def forward( + self, + x: torch.Tensor, + freqs_cis: torch.Tensor, + ): + bs, seqlen, _ = x.shape + xq, xk, xv = self.wq(x), self.wk(x), self.wv(x) + + # Use -1 instead of `num_heads` (or `num_kv_heads`) to infer the actual + # local heads from sizes of xq, xk, and xv as TP may have sharded them + # after the above linear ops. + xq = xq.view(bs, seqlen, -1, self.head_dim) + xk = xk.view(bs, seqlen, -1, self.head_dim) + xv = xv.view(bs, seqlen, -1, self.head_dim) + + xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis) + + # repeat k/v heads if num_kv_heads < num_heads + keys = repeat_kv(xk, self.n_rep) # (bs, seqlen, n_local_heads, head_dim) + values = repeat_kv(xv, self.n_rep) # (bs, seqlen, n_local_heads, head_dim) + + xq = xq.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + xk = keys.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + xv = values.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) + + # we use casual mask for training + output = F.scaled_dot_product_attention(xq, xk, xv, is_causal=True) + output = output.transpose( + 1, 2 + ).contiguous() # (bs, seqlen, n_local_heads, head_dim) + output = output.view(bs, seqlen, -1) + return self.wo(output) + + +class CrossAttention(nn.Module): + """ + Multi-head cross attention module. + """ + + def __init__(self, model_args: ModelArgs): + super().__init__() + self.num_heads = model_args.decoder_num_heads + self.num_kv_heads = ( + model_args.decoder_num_heads + if model_args.decoder_num_kv_heads is None + else model_args.decoder_num_kv_heads + ) + self.n_rep = self.num_heads // self.num_kv_heads + self.head_dim = model_args.decoder_embed_dim // model_args.decoder_num_heads + + self.wq = nn.Linear( + model_args.decoder_embed_dim, + model_args.decoder_num_heads * self.head_dim, + bias=False, + ) + self.wk = nn.Linear( + model_args.decoder_embed_dim, self.num_kv_heads * self.head_dim, bias=False + ) + self.wv = nn.Linear( + model_args.decoder_embed_dim, self.num_kv_heads * self.head_dim, bias=False + ) + self.wo = nn.Linear( + model_args.decoder_num_heads * self.head_dim, + model_args.decoder_embed_dim, + bias=False, + ) + self.q_norm = nn.RMSNorm(self.head_dim, eps=1e-05) + self.k_norm = nn.RMSNorm(self.head_dim, eps=1e-05) + + def init_weights(self, init_std: float): + for linear in (self.wq, self.wk, self.wv): + nn.init.trunc_normal_(linear.weight, mean=0.0, std=0.02) + nn.init.trunc_normal_(self.wo.weight, mean=0.0, std=init_std) + + def forward( + self, + x: torch.Tensor, + encoder_input: torch.Tensor, + mask: Optional[torch.Tensor] = None, + ): + bs, seqlen_x, _ = x.shape + seqlen_y = encoder_input.shape[1] + xq, xk, xv = self.wq(x), self.wk(encoder_input), self.wv(encoder_input) + + # Use -1 instead of `num_heads` (or `num_kv_heads`) to infer the actual + # local heads from sizes of xq, xk, and xv as TP may have sharded them + # after the above linear ops. + xq = xq.view(bs, seqlen_x, -1, self.head_dim) + xk = xk.view(bs, seqlen_y, -1, self.head_dim) + xv = xv.view(bs, seqlen_y, -1, self.head_dim) + + # repeat k/v heads if num_kv_heads < num_heads + keys = repeat_kv(xk, self.n_rep) # (bs, seqlen_y, n_local_heads, head_dim) + values = repeat_kv(xv, self.n_rep) # (bs, seqlen_y, n_local_heads, head_dim) + + xq = xq.transpose(1, 2) # (bs, n_local_heads, seqlen_x, head_dim) + xk = keys.transpose(1, 2) # (bs, n_local_heads, seqlen_y, head_dim) + xv = values.transpose(1, 2) # (bs, n_local_heads, seqlen_y, head_dim) + + xq = self.q_norm(xq) + xk = self.k_norm(xk) + + # we use casual mask for training + output = F.scaled_dot_product_attention( + xq, xk, xv, attn_mask=mask, is_causal=False + ) + output = output.transpose( + 1, 2 + ).contiguous() # (bs, seqlen_x, n_local_heads, head_dim) + output = output.view(bs, seqlen_x, -1) + return self.wo(output) + + +class DecoderTransformerSelfAttnBlock(nn.Module): + def __init__( + self, + model_args: ModelArgs, + ): + super().__init__() + self.attn = SelfAttention(model_args) + self.ln_attn = nn.RMSNorm(model_args.decoder_embed_dim, eps=1e-5) + self.mlp = FeedForwardForDecoder( + dim=model_args.decoder_embed_dim, + hidden_dim=4 * model_args.decoder_embed_dim, + multiple_of=model_args.multiple_of, + ffn_dim_multiplier=model_args.ffn_dim_multiplier, + ) + self.ln_mlp = nn.RMSNorm(model_args.decoder_embed_dim, eps=1e-5) + + def forward( + self, + x: torch.Tensor, + freqs_cis: torch.Tensor, + **kwargs: Dict, + ): + bsz, seq_len, emd_dim = x.shape + x = x + self.attn(self.ln_attn(x), freqs_cis) + x = x + self.mlp(self.ln_mlp(x)) + return x + + +class DecoderTransformerCrossAttnBlock(nn.Module): + def __init__( + self, + model_args: ModelArgs, + ): + super().__init__() + self.attn = CrossAttention(model_args) + self.ln_attn = nn.RMSNorm(model_args.decoder_embed_dim) + self.mlp = FeedForward( + dim=model_args.decoder_embed_dim, + hidden_dim=4 * model_args.decoder_embed_dim, + multiple_of=model_args.multiple_of, + ffn_dim_multiplier=model_args.ffn_dim_multiplier, + ) + self.ln_mlp = nn.RMSNorm(model_args.decoder_embed_dim) + self.attn_scale = TanhGate() + self.mlp_scale = TanhGate() + + def _skip_mask(self, mask: Optional[torch.Tensor]) -> Optional[torch.Tensor]: + """Some tokens in x may not attend to any encoder inputs + due to the cross attention mask (encoder_mask). This results in + a full row of the attention matrix being masked out. + + In the example below, the word "the" is masked from every embedding. + The False value means a token can't attend to an embedding. + + .. code-block:: text + + |emb||emb||emb| + |The| F F F + |red| T F T + |car| F T T + + This results in no inputs into the softmax layer which causes a NaN. + The skip mask is used to mask the outputs of attention and + mlp resulting in the token being skipped. + + The above example would result in a skip mask of: [[True], [False], [False]] + which specifies which tokens to fully mask out. + + """ + # no skip_mask if no masking + if mask is None: + return None + # negate mask and convert to boolean mask + if mask.dtype == torch.bool: + mask = ~mask + else: + mask = torch.isneginf(mask) + # True where all elements in a row are True + mask = torch.all(mask, dim=-1, keepdim=True) + return mask + + def forward( + self, + x: torch.Tensor, + *, + encoder_input: Optional[torch.Tensor] = None, + encoder_mask: Optional[torch.Tensor] = None, + **kwargs: Dict, + ) -> torch.Tensor: + # Skip cross attention when no secondary input as it's primary purpose + # is to attend between x and encoder_input. + if encoder_input is None: + return x + + # A mask of tokens (x) with no encoder_input + skip_mask = self._skip_mask(encoder_mask) + + attn_out = self.attn( + self.ln_attn(x), + encoder_input, + mask=encoder_mask, + ) + if skip_mask is not None: + attn_out.masked_fill_(skip_mask, 0) + + h = self.attn_scale(attn_out) + x + # Norm applied before the feedforward layer + mlp_out = self.mlp(self.ln_mlp(h)) + if skip_mask is not None: + mlp_out.masked_fill_(skip_mask, 0) + + # Residual connection; shape: [batch_size, seq_length, embed_dim] + out = h + self.mlp_scale(mlp_out) + + return out + + +class FusionLayer(nn.Module): + """ + Deep Fusion model architectures combine pretrained encoder models with pretrained + language models by infusing the encoder outputs into the middle layers of the LLM. + This allows the language model to interpret the enocder outputs as text and + "understand" any modality for which you can train an decoder. To enable the language model + to adapt to the encoder outputs, the FusionLayer fuses a new learnable layer to an existing + decoder (language model) layer. This additional layer can take the encoder embeddings and + learn to combine them with the token embeddings from the decoder. + """ + + def __init__( + self, layer: nn.Module, fusion_layer: nn.Module, fusion_first: bool = True + ): + super().__init__() + self.layer = layer + self.fusion_layer = fusion_layer + + def forward(self, x: torch.Tensor, **kwargs: Dict) -> torch.Tensor: + x = self.fusion_layer(x, **kwargs) + x = self.layer(x, **kwargs) + return x + + +class FusionEmbedding(nn.Module): + """ + Fusion embedding supports training additional special tokens while keeping + the original embedding frozen. When fusing new models with a language model, + there may be some additional tokens needed to support the fused language model. For + example, adding a vision encoder might necessitate additional tokens like ``<|image|>`` + to indicate an images position in text and require learning an embedding for this token. + The FusionEmbedding keeps the original embeddings frozen while learning a much smaller + second embedding for the additional tokens. During forward this module routes + the tokens to the appropriate embedding table. + """ + + def __init__(self, vocab_size: int, fusion_vocab_size: int, embed_dim: int) -> None: + super().__init__() + self.embedding = nn.Embedding(vocab_size, embed_dim) + self.fusion_embedding = nn.Embedding(fusion_vocab_size, embed_dim) + self.dim = embed_dim + self.num_embeddings = vocab_size + fusion_vocab_size + + def forward(self, input: torch.Tensor) -> torch.Tensor: + bsz, seq_len = input.size() + vocab_size = self.embedding.num_embeddings + + mask = input < vocab_size + # num_tokens = (input < vocab_size).sum() + tokens = torch.masked_select(input, mask) + # num_fusion_tokens = (input >= vocab_size).sum() + fusion_tokens = torch.masked_select(input, ~mask) - vocab_size + + # [batch_size x num_tokens x embed_dim] + embeds = self.embedding(tokens) + # [batch_size x num_fusion_tokens x embed_dim] + fusion_embeds = self.fusion_embedding(fusion_tokens) + + # [batch_size x seq_length x embed_dim] + out = torch.empty( + bsz, + seq_len, + self.dim, + device=self.embedding.weight.device, + dtype=self.embedding.weight.dtype, + ) + mask = mask.unsqueeze(-1).expand(bsz, seq_len, self.dim) + out.masked_scatter_(mask, embeds) + out.masked_scatter_(~mask, fusion_embeds) + return out + + +class MultimodalDecoder(nn.Module): + """Decoder multimodal model for Llama 3.2. + + Args: + model_args (ModelArgs): configs for the vision encoder. + """ + + def __init__(self, model_args: ModelArgs): + super().__init__() + + # TODO persistent should be set to false, since this buffer can be recomputed. + # however, we set it to true for 2 reasons. (1) due to pytorch/pytorch#123411, + # compile or pipeline-tracer will not correctly handle non-persistent buffers, + # so we need to fix that. (2) if we initialize pipeline-parallel models from + # a seed checkpoint rather than calling init_weights, we need freqs_cis to be + # initialized by the checkpoint, or we need to add a separate initializer for + # just the non-persistent buffers that is called after loading checkpoints. + self.register_buffer( + "freqs_cis", self._precompute_freqs_cis(model_args), persistent=True + ) + + self.layers = [] + for idx in range(1, model_args.decoder_num_layers + 1): + # define a llama3-like decoder layer, we don't train this part. + decoder_layer = DecoderTransformerSelfAttnBlock(model_args) + # cross attention layers, mixing text and vision, + # placed every `fusion_interval` layers + if idx % model_args.fusion_interval == 0: + cross_attn_layer = DecoderTransformerCrossAttnBlock(model_args) + fusion_layer = FusionLayer( + layer=decoder_layer, fusion_layer=cross_attn_layer + ) + self.layers.append(fusion_layer) + else: + self.layers.append(decoder_layer) + + self.tok_embeddings = FusionEmbedding( + model_args.vocab_size, + model_args.num_special_tokens, + model_args.decoder_embed_dim, + ) + self.norm = nn.RMSNorm(model_args.decoder_embed_dim, eps=1e-05) + self.output = nn.Linear( + model_args.decoder_embed_dim, model_args.vocab_size, bias=False + ) + + def _precompute_freqs_cis(self, model_args) -> torch.Tensor: + return precompute_freqs_cis( + model_args.decoder_embed_dim // model_args.decoder_num_heads, + # Need to compute until at least the max token limit for generation + # (use 2x max sequence length to be safe) + model_args.max_seq_len * 2, + model_args.rope_theta, + ) + + def forward( + self, + tokens: torch.Tensor, + *, + encoder_input: Optional[torch.Tensor] = None, + encoder_mask: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Args: + tokens (torch.Tensor): input tensor with shape ``[b x s]`` + encoder_input (Optional[torch.Tensor]): Optional input embeds from the encoder. Shape ``[b x s_e x d_e]`` + encoder_mask (Optional[torch.Tensor]): Boolean tensor defining a relational matrix between + tokens and encoder embeddings. A True value at position ``i,j`` means token ``i`` can attend + to embedding ``j`` in the decoder. Mask has shape ``[b x s x s_e]``. Default is None, + but this is required during inference if the model has been setup with any layers + which use encoder embeddings and caches have been setup. + """ + # input tensor of shape [b, s] + bsz, seq_len = tokens.shape + + # shape: [b, s, d] + h = self.tok_embeddings(tokens) + + for layer in self.layers: + # shape: [b, s, d] + h = layer( + h, + freqs_cis=self.freqs_cis, + encoder_input=encoder_input, + encoder_mask=encoder_mask, + ) + + # shape: [b, s, d] + h = self.norm(h) + output = self.output(h).float() + + return output diff --git a/torchtitan/experiments/multimodal/transform.py b/torchtitan/experiments/multimodal/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..ecb0f989acd0b818f20116a60813c26e68438cec --- /dev/null +++ b/torchtitan/experiments/multimodal/transform.py @@ -0,0 +1,185 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, List, Mapping, Optional, Tuple + +import torch + +import torchvision +from torchvision.transforms.v2 import functional as F + +from utils import ( + find_supported_resolutions, + get_canvas_best_fit, + resize_with_pad, + tile_crop, +) + +from torchtitan.tools.logging import logger + + +class CLIPTransform: + """ + This class accepts images of any size and dynamically resizes, pads, normalizes and tiles it + based on the image aspect ratio and the number of image tiles we allow. + + The algorithm will NOT distort the image to fit a certain aspect ratio, because + that leads to a significant degradation in image quality. + + The user can choose if they want to allow upscaling by using the flag ``resize_to_max_canvas``. + + For example, if an input image is of size 300x800, and we want to allow + a maximum of 16 image tiles, with side 224px, then: + + If ``resize_to_max_canvas=False``, then: + best_resolution = (448, 896) -> smallest canvas, up to 16 tiles, that doesn't require downscaling + image is NOT resized + image is padded (300, 800) -> 448,896 + Image is tiled 2x4, for a final output shape of (8, 3, 224, 224) + + If ``resize_to_max_canvas=True``, then: + best_resolution = (448, 1344) # canvas that allows maximum upscaling, with minimum padding, up to 16 tiles + image is resized without distortion (300,800) -> (448, 1194) #448 is the limiting side for the resize + image is padded (448, 1194) -> (448, 1344) + Image is tiled 2x6, for a final output shape of (10, 3, 224, 224) + + Args: + image_mean (Optional[List[float]]): Mean values of each channel, used for normalization. + Should be the same used for the pre-trained model. If None, no normalization is performed. Default None. + image_std (Optional[List[float]]): Standard deviation values of each channel, used for normalization. + Should be the same used for the pre-trained model. If None, no normalization is performed. Default None. + possible_resolutions (Optional[List[Tuple[int, int]]]): List of possible resolutions as tuples (height, width). + where each tuple represents a possible canvas to fit the image into when calling ``get_canvas_best_fit``. + If None, this will be calculated using max_num_tiles and tile_size. Default None. + tile_size (int): Size of the tiles to divide the image into. Default 224. + max_num_tiles (Optional[int]): Only used if possible_resolutions is NOT given. + Maximum number of tiles to break an image into. + This will be used to generate possible_resolutions, + e.g. [(224, 224), (224, 448), (448, 224)] if max_num_tiles = 2 and tile_size = 224. + Default 4. + dtype (torch.dtype): Data type of the output image. Default torch.bfloat16. + resample (str): Resampling method used when resizing images. Supports any enum of + ``torchvision.transforms.InterpolationMode``, e.g. "nearest", "nearest_exact", "bilinear", "bicubic". + Default 'bilinear'. + resize_to_max_canvas (bool): "If True, the image will be upscaled without distortion to fit the largest possible + resolution from possible_resolutions. + If False, it will pick the resolution that minimizes downscaling, including no downscaling at all. + In this case, the image will only be upscaled if it's size < tile_size. Default False. + + Examples: + >>> image_transform = CLIPImageTransform( + ... image_mean=None, + ... image_std=None, + ... tile_size=224, + ... possible_resolutions=None, + ... max_num_tiles=4, + ... resample="bilinear", + ... resize_to_max_canvas=True, + ...) + >>> # create random image + >>> image = (np.random.rand(100,200,3) * 255).astype(np.uint8) + >>> image = PIL.Image.fromarray(image) + >>> output = image_transform(image) + >>> output['image'].shape # [num_tiles, num_channels, tile_size, tile_size] + torch.Size([2, 3, 224, 224]) + >>> output['ar'] # image best fits the canvas 224x448 + torch.tensor([1,2]) + """ + + def __init__( + self, + *, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, + possible_resolutions: Optional[List[Tuple[int, int]]] = None, + tile_size: int = 224, + max_num_tiles: Optional[int] = 4, + dtype: torch.dtype = torch.bfloat16, + resample: str = "bilinear", + resize_to_max_canvas: bool = False, + ) -> None: + + # get_canvas_best_fit + assert ( + possible_resolutions is not None or max_num_tiles is not None + ), f"Either possible_resolutions or max_num_tiles must be given. Got {possible_resolutions} and {max_num_tiles}" + + # If possible_resolutions are not given, then calculate possible ones based on max_num_tiles + if not possible_resolutions and max_num_tiles: + possible_resolutions = find_supported_resolutions( + max_num_tiles=max_num_tiles, tile_size=tile_size + ) + else: + possible_resolutions = possible_resolutions + + self.possible_resolutions = torch.tensor(possible_resolutions).reshape(-1, 2) + logger.debug( + f"Found possible_resolutions: {self.possible_resolutions}. Will fit the images into the canvas with best fit." + ) + + self.resize_to_max_canvas = resize_to_max_canvas + + # normalize + assert (image_mean is None) == ( + image_std is None + ), f"Need to provide both or none of image_mean and image_std. Got {image_mean=} and {image_std=}" + self.mean = image_mean + self.std = image_std + + # resize_with_pad + self.max_size = None if resize_to_max_canvas else tile_size + self.dtype = dtype + self.resample = torchvision.transforms.InterpolationMode[resample.upper()] + + # tile_crop + self.tile_size = tile_size + + def __call__(self, image: torch.Tensor) -> Mapping[str, Any]: + """ + Apply image decoding and transformations to the "image" field in the sample. + + Args: + sample (Mapping[str, Any]): A sample with an "image" field containing + a List[Message] to tokenize + + Returns: + Mapping[str, Any]: The sample with an updated "image" filed and added + "aspect_ratio" field. + """ + assert isinstance(image, torch.Tensor), "Input image must be a torch.Tensor." + + image = F.to_image(image) + image = F.grayscale_to_rgb_image(image) + image = F.to_dtype(image, dtype=self.dtype, scale=True) + + # Find the best canvas to fit the image without distortion + best_resolution = get_canvas_best_fit( + image=image, + possible_resolutions=self.possible_resolutions, + resize_to_max_canvas=self.resize_to_max_canvas, + ) + + # resize without distortion + pad to fit best_resolution + image = resize_with_pad( + image=image, + target_size=best_resolution, + resample=self.resample, + max_size=self.max_size, + ) + + # Normalize + if self.mean: + image = F.normalize(image, mean=self.mean, std=self.std) + + # Divide the image into equally sized tiles + image = tile_crop(image=image, tile_size=self.tile_size) + + aspect_ratio = torch.tensor(best_resolution).reshape(-1) // self.tile_size + + return { + "image": image, + "aspect_ratio": aspect_ratio, + } diff --git a/torchtitan/experiments/simple_fsdp/__pycache__/model.cpython-312.pyc b/torchtitan/experiments/simple_fsdp/__pycache__/model.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7de9d1c777bee7810cbc10ce6dbe8b5d338e55ec Binary files /dev/null and b/torchtitan/experiments/simple_fsdp/__pycache__/model.cpython-312.pyc differ diff --git a/torchtitan/experiments/simple_fsdp/__pycache__/parallelize_llama.cpython-312.pyc b/torchtitan/experiments/simple_fsdp/__pycache__/parallelize_llama.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..373a4643dffeb7e509ca7ddc28a8e195ebd7c6a6 Binary files /dev/null and b/torchtitan/experiments/simple_fsdp/__pycache__/parallelize_llama.cpython-312.pyc differ diff --git a/torchtitan/models/__pycache__/norms.cpython-312.pyc b/torchtitan/models/__pycache__/norms.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..689f39ff134cdcf7b138592a3a7de4c64b164e8e Binary files /dev/null and b/torchtitan/models/__pycache__/norms.cpython-312.pyc differ diff --git a/torchtitan/models/llama3/__init__.py b/torchtitan/models/llama3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ea183c8bbfa8cfe2ed387c298e4940a2c6a890d1 --- /dev/null +++ b/torchtitan/models/llama3/__init__.py @@ -0,0 +1,76 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Copyright (c) Meta Platforms, Inc. All Rights Reserved. + +from torchtitan.components.loss import build_cross_entropy_loss +from torchtitan.components.lr_scheduler import build_lr_schedulers +from torchtitan.components.optimizer import build_optimizers +from torchtitan.datasets.hf_datasets import build_hf_dataloader +from torchtitan.datasets.tokenizer.tiktoken import build_tiktoken_tokenizer +from torchtitan.protocols.train_spec import register_train_spec, TrainSpec + +from .model import Transformer, TransformerModelArgs +from .parallelize_llama import parallelize_llama +from .pipeline_llama import pipeline_llama + +__all__ = [ + "parallelize_llama", + "pipeline_llama", + "TransformerModelArgs", + "Transformer", + "llama3_configs", +] + + +llama3_configs = { + "debugmodel": TransformerModelArgs( + dim=256, n_layers=8, n_heads=16, rope_theta=500000 + ), + "8B": TransformerModelArgs( + dim=4096, + n_layers=32, + n_heads=32, + n_kv_heads=8, + ffn_dim_multiplier=1.3, + multiple_of=1024, + rope_theta=500000, + ), + "70B": TransformerModelArgs( + dim=8192, + n_layers=80, + n_heads=64, + n_kv_heads=8, + ffn_dim_multiplier=1.3, + multiple_of=4096, + rope_theta=500000, + ), + "405B": TransformerModelArgs( + dim=16384, + n_layers=126, + n_heads=128, + n_kv_heads=8, + ffn_dim_multiplier=1.2, + multiple_of=4096, + rope_theta=500000, + ), +} + + +register_train_spec( + TrainSpec( + name="llama3", + cls=Transformer, + config=llama3_configs, + parallelize_fn=parallelize_llama, + pipelining_fn=pipeline_llama, + build_optimizers_fn=build_optimizers, + build_lr_schedulers_fn=build_lr_schedulers, + build_dataloader_fn=build_hf_dataloader, + build_tokenizer_fn=build_tiktoken_tokenizer, + build_loss_fn=build_cross_entropy_loss, + ) +) diff --git a/torchtitan/models/llama3/__pycache__/pipeline_llama.cpython-312.pyc b/torchtitan/models/llama3/__pycache__/pipeline_llama.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..713b8786b46ca55b74cc8c23300004515f392509 Binary files /dev/null and b/torchtitan/models/llama3/__pycache__/pipeline_llama.cpython-312.pyc differ diff --git a/torchtitan/models/llama3/pipeline_llama.py b/torchtitan/models/llama3/pipeline_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..0ae21fcbe3e7c3069246f02c557bc97409499ddb --- /dev/null +++ b/torchtitan/models/llama3/pipeline_llama.py @@ -0,0 +1,161 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# This file applies the PT-D pipeline parallelism to the Llama model. + +import copy + +import torch.nn as nn +from torch.distributed import DeviceMesh +from torch.distributed.pipelining import PipelineStage +from torch.distributed.pipelining.schedules import ( + _PipelineSchedule, + get_schedule_class, + ScheduleZBVZeroBubble, +) + +from torchtitan.components.loss import LossFunction +from torchtitan.config_manager import JobConfig +from torchtitan.distributed import ParallelDims +from torchtitan.distributed.pipeline import ( + build_pipeline_schedule, + generate_split_points, + stage_ids_this_rank, +) +from torchtitan.protocols.train_spec import DeviceType, ParallelizeFunction +from torchtitan.tools.logging import logger + +from .model import TransformerModelArgs + + +def pipeline_llama( + model: nn.Module, + world_mesh: DeviceMesh, + parallel_dims: ParallelDims, + job_config: JobConfig, + device: DeviceType, + model_config: TransformerModelArgs, + parallelize_fn: ParallelizeFunction, + loss_fn: LossFunction, +) -> tuple[_PipelineSchedule, list[nn.Module], bool, bool]: + pp_mesh = world_mesh["pp"] + + stages, model_parts = pipeline_llama_manual_split( + model, pp_mesh, parallel_dims, job_config, device, model_config + ) + + # For PP with looped schedules, each item in model_parts is one stage-model-chunk. + # We need to iterate through model_parts to apply SPMD parallelisms, compilation, + # optimizer, and checkpointing + for i, m in enumerate(model_parts): + # apply SPMD-style PT-D techniques + m = parallelize_fn(m, world_mesh, parallel_dims, job_config) + model_parts[i] = m + # NOTE: this is to update the model in the stage + # in case the model is modified e.g. by torch.compile + stages[i].submod = m + + pp_schedule = build_pipeline_schedule(job_config, stages, loss_fn) + + # This is used in the train loop to determine whether to pass in the input_ids and labels + has_first_stage = False + has_last_stage = False + for stage in stages: + if stage.is_first: + has_first_stage = True + if stage.is_last: + has_last_stage = True + + return pp_schedule, model_parts, has_first_stage, has_last_stage + + +def pipeline_llama_manual_split( + whole_model: nn.Module, + pp_mesh: DeviceMesh, + parallel_dims: ParallelDims, + job_config: JobConfig, + device: DeviceType, + model_config: TransformerModelArgs, +) -> tuple[list[PipelineStage], list[nn.Module]]: + """ + This API extracts one torch.nn.Module objects for the part of the model configured to run inside this stage. + + It wraps the model chunk in a ManualPipelineStage object and returns both the stage and model objects. + + The stage object is used to create a pipeline schedule, and the model object can be used for applying SPMD + parallelism. + """ + pp_rank = pp_mesh.get_local_rank() + pp_size = pp_mesh.size() + parallelism_config = job_config.parallelism + + splits = parallelism_config.pipeline_parallel_split_points or generate_split_points( + parallelism_config.pipeline_parallel_schedule, + parallelism_config.pipeline_parallel_layers_per_stage, + parallel_dims.pp, + model_config.n_layers, + ) + + def _build_stage( + stage_idx: int, + start_layer: str | None, + stop_layer: str | None, + is_first: bool = False, + is_last: bool = False, + ) -> tuple[PipelineStage, nn.Module]: + model = copy.deepcopy(whole_model) + if not is_first: + model.tok_embeddings = None + + drop_layers = start_layer is not None + for name in list(model.layers.keys()): + # we keep layers in a contiguous region between start (inclusive) and stop (exclusive) + if f"layers.{name}" == start_layer: + drop_layers = False + if f"layers.{name}" == stop_layer: + drop_layers = True + if drop_layers: + del model.layers[name] + + if not is_last: + model.norm = None + model.output = None + + stage = PipelineStage( + model, + stage_idx, + num_stages, + device, + group=pp_mesh.get_group("pp"), + ) + return stage, model + + num_stages = len(splits) + 1 + stage_idx = pp_rank + + stages = [] + models = [] + + schedule_class = get_schedule_class(parallelism_config.pipeline_parallel_schedule) + style = "v" if schedule_class == ScheduleZBVZeroBubble else "loop" + + for stage_idx in stage_ids_this_rank(pp_rank, pp_size, num_stages, style=style): + start_layer = splits[stage_idx - 1] if stage_idx > 0 else None + stop_layer = splits[stage_idx] if stage_idx < num_stages - 1 else None + stage, model_chunk = _build_stage( + stage_idx, + start_layer, + stop_layer, + is_first=stage_idx == 0, + is_last=stage_idx == num_stages - 1, + ) + logger.info( + f"PP rank {pp_rank} is building stage_idx {stage_idx}" + f" with start_layer {start_layer}, stop_layer {stop_layer}" + ) + stages.append(stage) + models.append(model_chunk) + return stages, models diff --git a/torchtitan/models/llama3/train_configs/llama3_70b.toml b/torchtitan/models/llama3/train_configs/llama3_70b.toml new file mode 100644 index 0000000000000000000000000000000000000000..64ef62ebfe96a6a8fbdbbc0aaa1849992769a1b8 --- /dev/null +++ b/torchtitan/models/llama3/train_configs/llama3_70b.toml @@ -0,0 +1,62 @@ +# torchtitan Config.toml +# NOTE: this toml config is a preset for 64 A100 GPUs. + +[job] +dump_folder = "./outputs" +description = "Llama 3 70B training" + +[profiling] +enable_profiling = true +save_traces_folder = "profile_trace" +profile_freq = 100 + +[metrics] +log_freq = 10 +enable_tensorboard = true +save_tb_folder = "tb" + +[model] +name = "llama3" +flavor = "70B" +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm +tokenizer_path = "./assets/tokenizer/original/tokenizer.model" +# converters = "float8" + +[optimizer] +name = "AdamW" +lr = 1.5e-4 +eps = 1e-8 + +[lr_scheduler] +warmup_steps = 200 # lr scheduler warm up, normally 20% of the train steps + +[training] +batch_size = 8 +seq_len = 8192 +max_norm = 1.0 # grad norm clipping +steps = 1000 +compile = false +dataset = "c4" + +[parallelism] +data_parallel_replicate_degree = 1 +data_parallel_shard_degree = -1 +tensor_parallel_degree = 8 # 8-way TP +pipeline_parallel_degree = 1 +context_parallel_degree = 1 + +[checkpoint] +enable_checkpoint = false +folder = "checkpoint" +interval = 500 +model_weights_only = false +export_dtype = "float32" +async_mode = "disabled" # ["disabled", "async", "async_with_pinned_mem"] + +[activation_checkpoint] +mode = 'full' + +[float8] +enable_fsdp_float8_all_gather = false +precompute_float8_dynamic_scale_for_fsdp = false +filter_fqns = "output" diff --git a/torchtitan/models/llama3/train_configs/llama3_8b.toml b/torchtitan/models/llama3/train_configs/llama3_8b.toml new file mode 100644 index 0000000000000000000000000000000000000000..5594b0fe33763fd289ab0e5a298858ec1869b23b --- /dev/null +++ b/torchtitan/models/llama3/train_configs/llama3_8b.toml @@ -0,0 +1,63 @@ +# torchtitan Config.toml +# NOTE: this toml config is a preset for 64 A100 GPUs. + +[job] +dump_folder = "./outputs" +description = "Llama 3 8B training" + +[profiling] +enable_profiling = true +save_traces_folder = "profile_trace" +profile_freq = 100 + +[metrics] +log_freq = 10 +enable_tensorboard = true +save_tb_folder = "tb" + +[model] +name = "llama3" +flavor = "8B" +norm_type = "rmsnorm" # layernorm / np_layernorm / rmsnorm +tokenizer_path = "./assets/tokenizer/original/tokenizer.model" +# converters = "float8" + +[optimizer] +name = "AdamW" +lr = 3e-4 +eps = 1e-8 + +[lr_scheduler] +warmup_steps = 200 # lr scheduler warm up + +[training] +batch_size = 1 +seq_len = 8192 +max_norm = 1.0 # grad norm clipping +steps = 1000 +compile = false +dataset = "c4" + +[parallelism] +data_parallel_replicate_degree = 1 +data_parallel_shard_degree = -1 +tensor_parallel_degree = 1 +pipeline_parallel_degree = 1 +context_parallel_degree = 1 + +[checkpoint] +enable_checkpoint = false +folder = "checkpoint" +interval = 500 +model_weights_only = false +export_dtype = "float32" +async_mode = "disabled" # ["disabled", "async", "async_with_pinned_mem"] + +[activation_checkpoint] +mode = 'selective' # ['none', 'selective', 'full'] +selective_ac_option = 'op' # 'int' = ac every positive int layer or 'op', ac based on ops policy + +[float8] +enable_fsdp_float8_all_gather = false +precompute_float8_dynamic_scale_for_fsdp = false +filter_fqns = "output" diff --git a/torchtitan/protocols/__pycache__/model_converter.cpython-312.pyc b/torchtitan/protocols/__pycache__/model_converter.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab8bdd6dd8474656d2ca6dcad690dc39dce2b71e Binary files /dev/null and b/torchtitan/protocols/__pycache__/model_converter.cpython-312.pyc differ